Defer guide OCR to generate progress
CI / Lint (push) Has been cancelled
CI / Type Check (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Build (push) Has been cancelled

This commit is contained in:
huanld
2026-05-28 21:05:39 +07:00
parent 0bd26eebf7
commit 6ebabbaaaa
5 changed files with 176 additions and 211 deletions
+2 -195
View File
@@ -1,11 +1,10 @@
import { type ChildProcessWithoutNullStreams, execFile, spawn } from "node:child_process";
import { type ChildProcessWithoutNullStreams, spawn } from "node:child_process";
import { EventEmitter } from "node:events";
import { constants as fsConstants } from "node:fs";
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { fileURLToPath, pathToFileURL } from "node:url";
import { promisify } from "node:util";
import type { DesktopCapturerSource, Rectangle } from "electron";
import {
app,
@@ -18,7 +17,7 @@ import {
shell,
systemPreferences,
} from "electron";
import type { GuideEvent, GuideMarkerCapturedPayload } from "../../src/guide/contracts";
import type { GuideMarkerCapturedPayload } from "../../src/guide/contracts";
import type { NativeMacRecordingRequest } from "../../src/lib/nativeMacRecording";
import type { NativeWindowsRecordingRequest } from "../../src/lib/nativeWindowsRecording";
import {
@@ -57,7 +56,6 @@ const RECORDING_SESSION_SUFFIX = ".session.json";
const ALLOWED_IMPORT_VIDEO_EXTENSIONS = new Set([".webm", ".mp4", ".mov", ".avi", ".mkv"]);
const PREVIEW_AUDIO_DIR = path.join(app.getPath("userData"), "preview-audio");
const nativeMacCaptureEvents = new EventEmitter();
const execFileAsync = promisify(execFile);
/**
* Paths explicitly approved by the user via file picker dialogs or project loads.
@@ -456,7 +454,6 @@ let activeGuideHotkeyRecording: GuideHotkeyRecordingState | null = null;
let activeGuideHotkeySessionId: number | null = null;
let guideMarkerHotkeyRegistered = false;
let lastGuideHotkeyCaptureAtMs = 0;
const guideHotkeyBackgroundJobs = new Map<string, Promise<void>>();
const GUIDE_HOTKEY_CAPTURE_DEBOUNCE_MS = 250;
function normalizeCursorSample(sample: unknown): CursorRecordingSample | null {
@@ -811,195 +808,6 @@ function clampGuideHotkey01(value: number): number {
return Math.min(1, Math.max(0, value));
}
async function captureGuideHotkeySnapshotAndRunOcr(
guideStore: GuideStore,
event: GuideEvent,
boundsInput: GuideHotkeyBounds,
point: { normalizedX: number; normalizedY: number },
) {
try {
const bounds = sanitizeGuideHotkeyBounds(boundsInput);
const sources = await desktopCapturer.getSources({
types: ["screen"],
thumbnailSize: {
width: Math.max(1, Math.round(bounds.width)),
height: Math.max(1, Math.round(bounds.height)),
},
});
const source = findScreenSourceForGuideBounds(sources, bounds);
if (!source || source.thumbnail.isEmpty()) {
console.warn("[guide-hotkey] no live screen thumbnail was available for OCR");
return;
}
const pngBuffer = source.thumbnail.toPNG();
const imageSize = source.thumbnail.getSize();
const markedPngBuffer = await createMarkedGuideSnapshotPng(pngBuffer, {
width: imageSize.width,
height: imageSize.height,
x: point.normalizedX * imageSize.width,
y: point.normalizedY * imageSize.height,
}).catch((error) => {
console.warn("[guide-hotkey] failed to create marked live snapshot:", error);
return undefined;
});
enqueueGuideHotkeyBackgroundJob(event.recordingId, async () => {
const session = await guideStore.writeSnapshot({
recordingId: event.recordingId,
eventId: event.id,
timeMs: event.timeMs,
offsetMs: 0,
pngBytes: bufferToArrayBuffer(pngBuffer),
markedPngBytes: markedPngBuffer ? bufferToArrayBuffer(markedPngBuffer) : undefined,
width: imageSize.width,
height: imageSize.height,
});
const snapshot = session.snapshots.find((item) => item.eventId === event.id);
if (!snapshot) {
return;
}
await guideStore.runOcr({
recordingId: event.recordingId,
snapshotIds: [snapshot.id],
});
console.info("[guide-hotkey] live snapshot OCR completed", {
recordingId: event.recordingId,
eventId: event.id,
snapshotId: snapshot.id,
});
});
} catch (error) {
console.warn("[guide-hotkey] live snapshot OCR failed:", error);
}
}
function enqueueGuideHotkeyBackgroundJob(recordingId: string, job: () => Promise<void>) {
const previousJob =
guideHotkeyBackgroundJobs.get(recordingId)?.catch(() => undefined) ?? Promise.resolve();
const nextJob = previousJob
.then(job)
.catch((error) => {
console.warn("[guide-hotkey] background OCR job failed:", error);
})
.finally(() => {
if (guideHotkeyBackgroundJobs.get(recordingId) === nextJob) {
guideHotkeyBackgroundJobs.delete(recordingId);
}
});
guideHotkeyBackgroundJobs.set(recordingId, nextJob);
}
function findScreenSourceForGuideBounds(
sources: DesktopCapturerSource[],
bounds: GuideHotkeyBounds,
): DesktopCapturerSource | undefined {
const displays = screen.getAllDisplays();
const displayIndex = displays.findIndex((display) =>
rectMatchesGuideBounds(display.bounds, bounds),
);
const display = displayIndex >= 0 ? displays[displayIndex] : undefined;
if (display) {
const byDisplayId = sources.find((source) => Number(source.display_id) === display.id);
if (byDisplayId) {
return byDisplayId;
}
const bySourceIndex = sources.find(
(source) => parseDesktopCapturerScreenIndex(source.id) === displayIndex,
);
if (bySourceIndex) {
return bySourceIndex;
}
}
return sources.find((source) => source.id.startsWith("screen:")) ?? sources[0];
}
function rectMatchesGuideBounds(rect: Rectangle, bounds: GuideHotkeyBounds): boolean {
return (
Math.round(rect.x) === Math.round(bounds.x) &&
Math.round(rect.y) === Math.round(bounds.y) &&
Math.round(rect.width) === Math.round(bounds.width) &&
Math.round(rect.height) === Math.round(bounds.height)
);
}
async function createMarkedGuideSnapshotPng(
pngBuffer: Buffer,
marker: { width: number; height: number; x: number; y: number },
): Promise<Buffer> {
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openscreen-guide-marker-"));
const sourcePath = path.join(tempDir, "source.png");
const outputPath = path.join(tempDir, "marked.png");
try {
await fs.writeFile(sourcePath, pngBuffer);
await execFileAsync(
"powershell.exe",
[
"-NoProfile",
"-ExecutionPolicy",
"Bypass",
"-EncodedCommand",
buildMarkerScript(sourcePath, outputPath, marker),
],
{
timeout: 30000,
windowsHide: true,
maxBuffer: 1024 * 1024,
},
);
return await fs.readFile(outputPath);
} finally {
await fs.rm(tempDir, { recursive: true, force: true }).catch(() => undefined);
}
}
function buildMarkerScript(
sourcePath: string,
outputPath: string,
marker: { width: number; height: number; x: number; y: number },
): string {
const sourcePathBase64 = Buffer.from(sourcePath, "utf8").toString("base64");
const outputPathBase64 = Buffer.from(outputPath, "utf8").toString("base64");
const script = `
$ErrorActionPreference = "Stop"
$sourcePath = [System.Text.Encoding]::UTF8.GetString([Convert]::FromBase64String("${sourcePathBase64}"))
$outputPath = [System.Text.Encoding]::UTF8.GetString([Convert]::FromBase64String("${outputPathBase64}"))
Add-Type -AssemblyName System.Drawing
$source = [System.Drawing.Image]::FromFile($sourcePath)
$bitmap = [System.Drawing.Bitmap]::new($source.Width, $source.Height)
$graphics = [System.Drawing.Graphics]::FromImage($bitmap)
try {
$graphics.SmoothingMode = [System.Drawing.Drawing2D.SmoothingMode]::AntiAlias
$graphics.DrawImage($source, 0, 0, $source.Width, $source.Height)
$shortSide = [Math]::Max(1, [Math]::Min($source.Width, $source.Height))
$dotRadius = [Math]::Min(7, [Math]::Max(4, [Math]::Round($shortSide * 0.005)))
$x = [Math]::Min($source.Width, [Math]::Max(0, ${marker.x.toFixed(4)}))
$y = [Math]::Min($source.Height, [Math]::Max(0, ${marker.y.toFixed(4)}))
$dotBrush = [System.Drawing.SolidBrush]::new([System.Drawing.Color]::FromArgb(235, 220, 38, 38))
try {
$graphics.FillEllipse($dotBrush, $x - $dotRadius, $y - $dotRadius, $dotRadius * 2, $dotRadius * 2)
} finally {
$dotBrush.Dispose()
}
$bitmap.Save($outputPath, [System.Drawing.Imaging.ImageFormat]::Png)
} finally {
$graphics.Dispose()
$bitmap.Dispose()
$source.Dispose()
}
`;
return Buffer.from(script, "utf16le").toString("base64");
}
function bufferToArrayBuffer(buffer: Buffer): ArrayBuffer {
return buffer.buffer.slice(
buffer.byteOffset,
buffer.byteOffset + buffer.byteLength,
) as ArrayBuffer;
}
async function captureGuideHotkeyMarker(
guideStore: GuideStore,
trigger: GuideMarkerTrigger = "global-shortcut",
@@ -1046,7 +854,6 @@ async function captureGuideHotkeyMarker(
rawY: point.rawY,
bounds: point.bounds,
});
void captureGuideHotkeySnapshotAndRunOcr(guideStore, result.event, recording.bounds, point);
return { captured: true, ...result };
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
+2 -2
View File
@@ -1,12 +1,12 @@
{
"name": "openscreen",
"version": "1.4.9",
"version": "1.4.10",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "openscreen",
"version": "1.4.9",
"version": "1.4.10",
"dependencies": {
"@fix-webm-duration/fix": "^1.0.1",
"@pixi/filter-drop-shadow": "^5.2.0",
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "openscreen",
"private": true,
"version": "1.4.9",
"version": "1.4.10",
"type": "module",
"packageManager": "npm@10.9.4",
"engines": {
@@ -9,6 +9,7 @@ import type {
GuideLanguage,
GuideOcrProfile,
GuideSession,
GuideSnapshot,
} from "@/guide/contracts";
import { captureGuideSnapshots } from "@/guide/snapshot/extractGuideSnapshots";
@@ -21,6 +22,13 @@ interface GuidePanelProps {
type BusyAction = "load" | "generate";
interface GuideProgressState {
label: string;
current: number;
total: number;
detail?: string;
}
const COPY = {
en: {
title: "Guide",
@@ -63,6 +71,11 @@ const COPY = {
noEvents: "No click events were captured for this guide.",
ocrUnavailable: "Local OCR service is unavailable. You can still create a local draft.",
exported: "Guide exported",
progressPreparing: "Preparing events",
progressSnapshots: "Capturing snapshots",
progressOcr: "Running OCR",
progressDraft: "Writing draft",
progressExport: "Exporting files",
},
vi: {
title: "Hướng dẫn",
@@ -105,9 +118,32 @@ const COPY = {
noEvents: "Chưa ghi nhận click event nào cho guide này.",
ocrUnavailable: "OCR local chưa chạy. Vẫn có thể tạo draft local.",
exported: "Đã export hướng dẫn",
progressPreparing: "Đang chuẩn bị events",
progressSnapshots: "Đang chụp ảnh",
progressOcr: "Đang OCR",
progressDraft: "Đang tạo draft",
progressExport: "Đang export file",
},
} as const;
function getPendingOcrSnapshots(session: GuideSession): GuideSnapshot[] {
const ocrCompletedSnapshotIds = new Set(session.ocrBlocks.map((block) => block.snapshotId));
return session.snapshots.filter(
(snapshot) => !snapshot.ocrCompletedAt && !ocrCompletedSnapshotIds.has(snapshot.id),
);
}
function getProgressPercent(progress: GuideProgressState | null): number {
if (!progress) {
return 0;
}
if (progress.total <= 0) {
return 100;
}
const percent = Math.round((progress.current / progress.total) * 100);
return Math.min(100, Math.max(progress.current > 0 ? 8 : 4, percent));
}
export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePanelProps) {
const { locale } = useI18n();
const copy = useMemo(() => (locale.startsWith("vi") ? COPY.vi : COPY.en), [locale]);
@@ -124,8 +160,10 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
const [ocrProfile, setOcrProfile] = useState<GuideOcrProfile>("vietnamese");
const [ocrLanguage, setOcrLanguage] = useState("vi,en");
const [message, setMessage] = useState<string | null>(null);
const [progress, setProgress] = useState<GuideProgressState | null>(null);
const isBusy = busyAction !== null;
const progressPercent = getProgressPercent(progress);
const canUseGuide = Boolean(recordingId && videoSourcePath && window.electronAPI?.guide);
const generatedSteps = session?.generatedGuide?.steps ?? [];
const statusLabel = useMemo(() => {
@@ -220,6 +258,15 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
}
let current = session;
const readResult = await window.electronAPI.guide.readSession(recordingId);
if (readResult.success) {
current = readResult.data;
} else if (readResult.code === "guide-session-not-found") {
current = null;
} else if (!current) {
throw new Error(readResult.error);
}
if (!current) {
const startResult = await window.electronAPI.guide.startSession(recordingId);
if (!startResult.success) {
@@ -251,6 +298,7 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
}
setBusyAction(action);
setMessage(null);
setProgress(null);
try {
await task();
} catch (error) {
@@ -355,25 +403,59 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
if (!videoPath) {
throw new Error("Video URL is not available.");
}
setProgress({
label: copy.progressPreparing,
current: 0,
total: 1,
detail: "0/1",
});
let current = await ensureEventsSession();
setProgress({
label: copy.progressPreparing,
current: 1,
total: 1,
detail: "1/1",
});
if (current.events.length === 0) {
throw new Error(copy.noEvents);
}
if (current.snapshots.length < current.events.length) {
const snapshotEventIds = new Set(current.snapshots.map((snapshot) => snapshot.eventId));
const pendingSnapshotTotal = current.events.filter(
(event) => !snapshotEventIds.has(event.id),
).length;
if (pendingSnapshotTotal > 0) {
setProgress({
label: copy.progressSnapshots,
current: 0,
total: pendingSnapshotTotal,
detail: `0/${pendingSnapshotTotal}`,
});
current = await captureGuideSnapshots({
session: current,
videoUrl: videoPath,
maxWidth: 1280,
onProgress: ({ completed, total }) => {
setProgress({
label: copy.progressSnapshots,
current: completed,
total,
detail: `${completed}/${total}`,
});
},
});
setSession(current);
}
const ocrCompletedSnapshotIds = new Set(current.ocrBlocks.map((block) => block.snapshotId));
const hasPendingOcr = current.snapshots.some(
(snapshot) => !snapshot.ocrCompletedAt && !ocrCompletedSnapshotIds.has(snapshot.id),
);
if (hasPendingOcr) {
const pendingOcrSnapshots = getPendingOcrSnapshots(current);
for (const [index, snapshot] of pendingOcrSnapshots.entries()) {
setProgress({
label: copy.progressOcr,
current: index,
total: pendingOcrSnapshots.length,
detail: `${index + 1}/${pendingOcrSnapshots.length}`,
});
const ocrResult = await window.electronAPI.guide.runOcr({
recordingId: current.recordingId,
snapshotIds: [snapshot.id],
});
if (!ocrResult.success) {
if (ocrResult.code === "guide-ocr-unavailable") {
@@ -383,7 +465,19 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
}
current = ocrResult.data;
setSession(current);
setProgress({
label: copy.progressOcr,
current: index + 1,
total: pendingOcrSnapshots.length,
detail: `${index + 1}/${pendingOcrSnapshots.length}`,
});
}
setProgress({
label: copy.progressDraft,
current: 0,
total: 1,
detail: "0/1",
});
const result = await window.electronAPI.guide.generateDraft({
recordingId: current.recordingId,
language: guideLanguage,
@@ -392,18 +486,44 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
if (!result.success) {
throw new Error(result.error);
}
current = result.data;
setSession(current);
setProgress({
label: copy.progressDraft,
current: 1,
total: 1,
detail: "1/1",
});
setProgress({
label: copy.progressExport,
current: 0,
total: 2,
detail: "0/2",
});
const markdownResult = await window.electronAPI.guide.exportMarkdown({
recordingId: current.recordingId,
});
if (!markdownResult.success) {
throw new Error(markdownResult.error);
}
setProgress({
label: copy.progressExport,
current: 1,
total: 2,
detail: "1/2",
});
const htmlResult = await window.electronAPI.guide.exportHtml({
recordingId: current.recordingId,
});
if (!htmlResult.success) {
throw new Error(htmlResult.error);
}
setProgress({
label: copy.progressExport,
current: 2,
total: 2,
detail: "2/2",
});
const revealResult = await window.electronAPI.revealInFolder(htmlResult.data.path);
if (!revealResult.success) {
toast.warning(revealResult.error ?? "Unable to open guide folder.");
@@ -419,6 +539,11 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
copy.keyMissing,
copy.noEvents,
copy.ocrUnavailable,
copy.progressDraft,
copy.progressExport,
copy.progressOcr,
copy.progressPreparing,
copy.progressSnapshots,
ensureEventsSession,
guideLanguage,
provider,
@@ -449,6 +574,24 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
{canUseGuide ? statusLabel : copy.noRecording}
</p>
{message && <p className="mb-2 text-[11px] leading-4 text-amber-300">{message}</p>}
{progress && (
<div className="mb-2 rounded-md border border-white/[0.07] bg-white/[0.035] px-2 py-1.5">
<div className="mb-1 flex items-center justify-between gap-2 text-[10px] leading-4">
<span className="min-w-0 truncate font-semibold text-slate-200">
{progress.label}
</span>
<span className="shrink-0 text-slate-500">
{progress.detail ?? `${progress.current}/${progress.total}`}
</span>
</div>
<div className="h-1.5 overflow-hidden rounded-full bg-white/[0.06]">
<div
className="h-full rounded-full bg-[#34B27B] transition-all duration-200"
style={{ width: `${progressPercent}%` }}
/>
</div>
</div>
)}
<div className="mb-2 flex items-center gap-1.5">
<select
+22 -7
View File
@@ -4,6 +4,13 @@ export interface CaptureGuideSnapshotsInput {
session: GuideSession;
videoUrl: string;
maxWidth?: number;
onProgress?: (progress: CaptureGuideSnapshotsProgress) => void;
}
export interface CaptureGuideSnapshotsProgress {
event: GuideEvent;
completed: number;
total: number;
}
export async function captureGuideSnapshots(
@@ -13,6 +20,13 @@ export async function captureGuideSnapshots(
if (events.length === 0) {
return input.session;
}
const existingSnapshotsByEventId = new Set(
input.session.snapshots.map((snapshot) => snapshot.eventId),
);
const pendingEvents = events.filter((event) => !existingSnapshotsByEventId.has(event.id));
if (pendingEvents.length === 0) {
return input.session;
}
const video = document.createElement("video");
video.preload = "auto";
@@ -35,13 +49,8 @@ export async function captureGuideSnapshots(
canvas.height = Math.max(1, Math.round(sourceHeight * scale));
let latestSession = input.session;
const existingSnapshotsByEventId = new Set(
input.session.snapshots.map((snapshot) => snapshot.eventId),
);
for (const event of events) {
if (existingSnapshotsByEventId.has(event.id)) {
continue;
}
let completed = 0;
for (const event of pendingEvents) {
const offsetMs = event.screenshotOffsetMs ?? 500;
const timeMs = getSnapshotTimeMs(event, offsetMs, video.duration);
await seekVideo(video, timeMs / 1000);
@@ -65,6 +74,12 @@ export async function captureGuideSnapshots(
throw new Error(result.error);
}
latestSession = result.data;
completed += 1;
input.onProgress?.({
event,
completed,
total: pendingEvents.length,
});
}
return latestSession;