From 6ebabbaaaa1c8efe0731876249bb41d8d535f359 Mon Sep 17 00:00:00 2001
From: huanld
Date: Thu, 28 May 2026 21:05:39 +0700
Subject: [PATCH] Defer guide OCR to generate progress
---
electron/ipc/handlers.ts | 197 +-----------------
package-lock.json | 4 +-
package.json | 2 +-
.../video-editor/guide/GuidePanel.tsx | 155 +++++++++++++-
src/guide/snapshot/extractGuideSnapshots.ts | 29 ++-
5 files changed, 176 insertions(+), 211 deletions(-)
diff --git a/electron/ipc/handlers.ts b/electron/ipc/handlers.ts
index 77b50f2..1eb88f8 100644
--- a/electron/ipc/handlers.ts
+++ b/electron/ipc/handlers.ts
@@ -1,11 +1,10 @@
-import { type ChildProcessWithoutNullStreams, execFile, spawn } from "node:child_process";
+import { type ChildProcessWithoutNullStreams, spawn } from "node:child_process";
import { EventEmitter } from "node:events";
import { constants as fsConstants } from "node:fs";
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { fileURLToPath, pathToFileURL } from "node:url";
-import { promisify } from "node:util";
import type { DesktopCapturerSource, Rectangle } from "electron";
import {
app,
@@ -18,7 +17,7 @@ import {
shell,
systemPreferences,
} from "electron";
-import type { GuideEvent, GuideMarkerCapturedPayload } from "../../src/guide/contracts";
+import type { GuideMarkerCapturedPayload } from "../../src/guide/contracts";
import type { NativeMacRecordingRequest } from "../../src/lib/nativeMacRecording";
import type { NativeWindowsRecordingRequest } from "../../src/lib/nativeWindowsRecording";
import {
@@ -57,7 +56,6 @@ const RECORDING_SESSION_SUFFIX = ".session.json";
const ALLOWED_IMPORT_VIDEO_EXTENSIONS = new Set([".webm", ".mp4", ".mov", ".avi", ".mkv"]);
const PREVIEW_AUDIO_DIR = path.join(app.getPath("userData"), "preview-audio");
const nativeMacCaptureEvents = new EventEmitter();
-const execFileAsync = promisify(execFile);
/**
* Paths explicitly approved by the user via file picker dialogs or project loads.
@@ -456,7 +454,6 @@ let activeGuideHotkeyRecording: GuideHotkeyRecordingState | null = null;
let activeGuideHotkeySessionId: number | null = null;
let guideMarkerHotkeyRegistered = false;
let lastGuideHotkeyCaptureAtMs = 0;
-const guideHotkeyBackgroundJobs = new Map>();
const GUIDE_HOTKEY_CAPTURE_DEBOUNCE_MS = 250;
function normalizeCursorSample(sample: unknown): CursorRecordingSample | null {
@@ -811,195 +808,6 @@ function clampGuideHotkey01(value: number): number {
return Math.min(1, Math.max(0, value));
}
-async function captureGuideHotkeySnapshotAndRunOcr(
- guideStore: GuideStore,
- event: GuideEvent,
- boundsInput: GuideHotkeyBounds,
- point: { normalizedX: number; normalizedY: number },
-) {
- try {
- const bounds = sanitizeGuideHotkeyBounds(boundsInput);
- const sources = await desktopCapturer.getSources({
- types: ["screen"],
- thumbnailSize: {
- width: Math.max(1, Math.round(bounds.width)),
- height: Math.max(1, Math.round(bounds.height)),
- },
- });
- const source = findScreenSourceForGuideBounds(sources, bounds);
- if (!source || source.thumbnail.isEmpty()) {
- console.warn("[guide-hotkey] no live screen thumbnail was available for OCR");
- return;
- }
-
- const pngBuffer = source.thumbnail.toPNG();
- const imageSize = source.thumbnail.getSize();
- const markedPngBuffer = await createMarkedGuideSnapshotPng(pngBuffer, {
- width: imageSize.width,
- height: imageSize.height,
- x: point.normalizedX * imageSize.width,
- y: point.normalizedY * imageSize.height,
- }).catch((error) => {
- console.warn("[guide-hotkey] failed to create marked live snapshot:", error);
- return undefined;
- });
-
- enqueueGuideHotkeyBackgroundJob(event.recordingId, async () => {
- const session = await guideStore.writeSnapshot({
- recordingId: event.recordingId,
- eventId: event.id,
- timeMs: event.timeMs,
- offsetMs: 0,
- pngBytes: bufferToArrayBuffer(pngBuffer),
- markedPngBytes: markedPngBuffer ? bufferToArrayBuffer(markedPngBuffer) : undefined,
- width: imageSize.width,
- height: imageSize.height,
- });
- const snapshot = session.snapshots.find((item) => item.eventId === event.id);
- if (!snapshot) {
- return;
- }
-
- await guideStore.runOcr({
- recordingId: event.recordingId,
- snapshotIds: [snapshot.id],
- });
- console.info("[guide-hotkey] live snapshot OCR completed", {
- recordingId: event.recordingId,
- eventId: event.id,
- snapshotId: snapshot.id,
- });
- });
- } catch (error) {
- console.warn("[guide-hotkey] live snapshot OCR failed:", error);
- }
-}
-
-function enqueueGuideHotkeyBackgroundJob(recordingId: string, job: () => Promise) {
- const previousJob =
- guideHotkeyBackgroundJobs.get(recordingId)?.catch(() => undefined) ?? Promise.resolve();
- const nextJob = previousJob
- .then(job)
- .catch((error) => {
- console.warn("[guide-hotkey] background OCR job failed:", error);
- })
- .finally(() => {
- if (guideHotkeyBackgroundJobs.get(recordingId) === nextJob) {
- guideHotkeyBackgroundJobs.delete(recordingId);
- }
- });
- guideHotkeyBackgroundJobs.set(recordingId, nextJob);
-}
-
-function findScreenSourceForGuideBounds(
- sources: DesktopCapturerSource[],
- bounds: GuideHotkeyBounds,
-): DesktopCapturerSource | undefined {
- const displays = screen.getAllDisplays();
- const displayIndex = displays.findIndex((display) =>
- rectMatchesGuideBounds(display.bounds, bounds),
- );
- const display = displayIndex >= 0 ? displays[displayIndex] : undefined;
- if (display) {
- const byDisplayId = sources.find((source) => Number(source.display_id) === display.id);
- if (byDisplayId) {
- return byDisplayId;
- }
- const bySourceIndex = sources.find(
- (source) => parseDesktopCapturerScreenIndex(source.id) === displayIndex,
- );
- if (bySourceIndex) {
- return bySourceIndex;
- }
- }
- return sources.find((source) => source.id.startsWith("screen:")) ?? sources[0];
-}
-
-function rectMatchesGuideBounds(rect: Rectangle, bounds: GuideHotkeyBounds): boolean {
- return (
- Math.round(rect.x) === Math.round(bounds.x) &&
- Math.round(rect.y) === Math.round(bounds.y) &&
- Math.round(rect.width) === Math.round(bounds.width) &&
- Math.round(rect.height) === Math.round(bounds.height)
- );
-}
-
-async function createMarkedGuideSnapshotPng(
- pngBuffer: Buffer,
- marker: { width: number; height: number; x: number; y: number },
-): Promise {
- const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openscreen-guide-marker-"));
- const sourcePath = path.join(tempDir, "source.png");
- const outputPath = path.join(tempDir, "marked.png");
- try {
- await fs.writeFile(sourcePath, pngBuffer);
- await execFileAsync(
- "powershell.exe",
- [
- "-NoProfile",
- "-ExecutionPolicy",
- "Bypass",
- "-EncodedCommand",
- buildMarkerScript(sourcePath, outputPath, marker),
- ],
- {
- timeout: 30000,
- windowsHide: true,
- maxBuffer: 1024 * 1024,
- },
- );
- return await fs.readFile(outputPath);
- } finally {
- await fs.rm(tempDir, { recursive: true, force: true }).catch(() => undefined);
- }
-}
-
-function buildMarkerScript(
- sourcePath: string,
- outputPath: string,
- marker: { width: number; height: number; x: number; y: number },
-): string {
- const sourcePathBase64 = Buffer.from(sourcePath, "utf8").toString("base64");
- const outputPathBase64 = Buffer.from(outputPath, "utf8").toString("base64");
- const script = `
-$ErrorActionPreference = "Stop"
-$sourcePath = [System.Text.Encoding]::UTF8.GetString([Convert]::FromBase64String("${sourcePathBase64}"))
-$outputPath = [System.Text.Encoding]::UTF8.GetString([Convert]::FromBase64String("${outputPathBase64}"))
-Add-Type -AssemblyName System.Drawing
-
-$source = [System.Drawing.Image]::FromFile($sourcePath)
-$bitmap = [System.Drawing.Bitmap]::new($source.Width, $source.Height)
-$graphics = [System.Drawing.Graphics]::FromImage($bitmap)
-try {
- $graphics.SmoothingMode = [System.Drawing.Drawing2D.SmoothingMode]::AntiAlias
- $graphics.DrawImage($source, 0, 0, $source.Width, $source.Height)
- $shortSide = [Math]::Max(1, [Math]::Min($source.Width, $source.Height))
- $dotRadius = [Math]::Min(7, [Math]::Max(4, [Math]::Round($shortSide * 0.005)))
- $x = [Math]::Min($source.Width, [Math]::Max(0, ${marker.x.toFixed(4)}))
- $y = [Math]::Min($source.Height, [Math]::Max(0, ${marker.y.toFixed(4)}))
- $dotBrush = [System.Drawing.SolidBrush]::new([System.Drawing.Color]::FromArgb(235, 220, 38, 38))
- try {
- $graphics.FillEllipse($dotBrush, $x - $dotRadius, $y - $dotRadius, $dotRadius * 2, $dotRadius * 2)
- } finally {
- $dotBrush.Dispose()
- }
- $bitmap.Save($outputPath, [System.Drawing.Imaging.ImageFormat]::Png)
-} finally {
- $graphics.Dispose()
- $bitmap.Dispose()
- $source.Dispose()
-}
-`;
- return Buffer.from(script, "utf16le").toString("base64");
-}
-
-function bufferToArrayBuffer(buffer: Buffer): ArrayBuffer {
- return buffer.buffer.slice(
- buffer.byteOffset,
- buffer.byteOffset + buffer.byteLength,
- ) as ArrayBuffer;
-}
-
async function captureGuideHotkeyMarker(
guideStore: GuideStore,
trigger: GuideMarkerTrigger = "global-shortcut",
@@ -1046,7 +854,6 @@ async function captureGuideHotkeyMarker(
rawY: point.rawY,
bounds: point.bounds,
});
- void captureGuideHotkeySnapshotAndRunOcr(guideStore, result.event, recording.bounds, point);
return { captured: true, ...result };
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
diff --git a/package-lock.json b/package-lock.json
index 5206a55..e24c5f1 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "openscreen",
- "version": "1.4.9",
+ "version": "1.4.10",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "openscreen",
- "version": "1.4.9",
+ "version": "1.4.10",
"dependencies": {
"@fix-webm-duration/fix": "^1.0.1",
"@pixi/filter-drop-shadow": "^5.2.0",
diff --git a/package.json b/package.json
index 83bc5d2..bc1a9f3 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
{
"name": "openscreen",
"private": true,
- "version": "1.4.9",
+ "version": "1.4.10",
"type": "module",
"packageManager": "npm@10.9.4",
"engines": {
diff --git a/src/components/video-editor/guide/GuidePanel.tsx b/src/components/video-editor/guide/GuidePanel.tsx
index 2dd1173..e6a61e9 100644
--- a/src/components/video-editor/guide/GuidePanel.tsx
+++ b/src/components/video-editor/guide/GuidePanel.tsx
@@ -9,6 +9,7 @@ import type {
GuideLanguage,
GuideOcrProfile,
GuideSession,
+ GuideSnapshot,
} from "@/guide/contracts";
import { captureGuideSnapshots } from "@/guide/snapshot/extractGuideSnapshots";
@@ -21,6 +22,13 @@ interface GuidePanelProps {
type BusyAction = "load" | "generate";
+interface GuideProgressState {
+ label: string;
+ current: number;
+ total: number;
+ detail?: string;
+}
+
const COPY = {
en: {
title: "Guide",
@@ -63,6 +71,11 @@ const COPY = {
noEvents: "No click events were captured for this guide.",
ocrUnavailable: "Local OCR service is unavailable. You can still create a local draft.",
exported: "Guide exported",
+ progressPreparing: "Preparing events",
+ progressSnapshots: "Capturing snapshots",
+ progressOcr: "Running OCR",
+ progressDraft: "Writing draft",
+ progressExport: "Exporting files",
},
vi: {
title: "Hướng dẫn",
@@ -105,9 +118,32 @@ const COPY = {
noEvents: "Chưa ghi nhận click event nào cho guide này.",
ocrUnavailable: "OCR local chưa chạy. Vẫn có thể tạo draft local.",
exported: "Đã export hướng dẫn",
+ progressPreparing: "Đang chuẩn bị events",
+ progressSnapshots: "Đang chụp ảnh",
+ progressOcr: "Đang OCR",
+ progressDraft: "Đang tạo draft",
+ progressExport: "Đang export file",
},
} as const;
+function getPendingOcrSnapshots(session: GuideSession): GuideSnapshot[] {
+ const ocrCompletedSnapshotIds = new Set(session.ocrBlocks.map((block) => block.snapshotId));
+ return session.snapshots.filter(
+ (snapshot) => !snapshot.ocrCompletedAt && !ocrCompletedSnapshotIds.has(snapshot.id),
+ );
+}
+
+function getProgressPercent(progress: GuideProgressState | null): number {
+ if (!progress) {
+ return 0;
+ }
+ if (progress.total <= 0) {
+ return 100;
+ }
+ const percent = Math.round((progress.current / progress.total) * 100);
+ return Math.min(100, Math.max(progress.current > 0 ? 8 : 4, percent));
+}
+
export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePanelProps) {
const { locale } = useI18n();
const copy = useMemo(() => (locale.startsWith("vi") ? COPY.vi : COPY.en), [locale]);
@@ -124,8 +160,10 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
const [ocrProfile, setOcrProfile] = useState("vietnamese");
const [ocrLanguage, setOcrLanguage] = useState("vi,en");
const [message, setMessage] = useState(null);
+ const [progress, setProgress] = useState(null);
const isBusy = busyAction !== null;
+ const progressPercent = getProgressPercent(progress);
const canUseGuide = Boolean(recordingId && videoSourcePath && window.electronAPI?.guide);
const generatedSteps = session?.generatedGuide?.steps ?? [];
const statusLabel = useMemo(() => {
@@ -220,6 +258,15 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
}
let current = session;
+ const readResult = await window.electronAPI.guide.readSession(recordingId);
+ if (readResult.success) {
+ current = readResult.data;
+ } else if (readResult.code === "guide-session-not-found") {
+ current = null;
+ } else if (!current) {
+ throw new Error(readResult.error);
+ }
+
if (!current) {
const startResult = await window.electronAPI.guide.startSession(recordingId);
if (!startResult.success) {
@@ -251,6 +298,7 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
}
setBusyAction(action);
setMessage(null);
+ setProgress(null);
try {
await task();
} catch (error) {
@@ -355,25 +403,59 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
if (!videoPath) {
throw new Error("Video URL is not available.");
}
+ setProgress({
+ label: copy.progressPreparing,
+ current: 0,
+ total: 1,
+ detail: "0/1",
+ });
let current = await ensureEventsSession();
+ setProgress({
+ label: copy.progressPreparing,
+ current: 1,
+ total: 1,
+ detail: "1/1",
+ });
if (current.events.length === 0) {
throw new Error(copy.noEvents);
}
- if (current.snapshots.length < current.events.length) {
+ const snapshotEventIds = new Set(current.snapshots.map((snapshot) => snapshot.eventId));
+ const pendingSnapshotTotal = current.events.filter(
+ (event) => !snapshotEventIds.has(event.id),
+ ).length;
+ if (pendingSnapshotTotal > 0) {
+ setProgress({
+ label: copy.progressSnapshots,
+ current: 0,
+ total: pendingSnapshotTotal,
+ detail: `0/${pendingSnapshotTotal}`,
+ });
current = await captureGuideSnapshots({
session: current,
videoUrl: videoPath,
maxWidth: 1280,
+ onProgress: ({ completed, total }) => {
+ setProgress({
+ label: copy.progressSnapshots,
+ current: completed,
+ total,
+ detail: `${completed}/${total}`,
+ });
+ },
});
setSession(current);
}
- const ocrCompletedSnapshotIds = new Set(current.ocrBlocks.map((block) => block.snapshotId));
- const hasPendingOcr = current.snapshots.some(
- (snapshot) => !snapshot.ocrCompletedAt && !ocrCompletedSnapshotIds.has(snapshot.id),
- );
- if (hasPendingOcr) {
+ const pendingOcrSnapshots = getPendingOcrSnapshots(current);
+ for (const [index, snapshot] of pendingOcrSnapshots.entries()) {
+ setProgress({
+ label: copy.progressOcr,
+ current: index,
+ total: pendingOcrSnapshots.length,
+ detail: `${index + 1}/${pendingOcrSnapshots.length}`,
+ });
const ocrResult = await window.electronAPI.guide.runOcr({
recordingId: current.recordingId,
+ snapshotIds: [snapshot.id],
});
if (!ocrResult.success) {
if (ocrResult.code === "guide-ocr-unavailable") {
@@ -383,7 +465,19 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
}
current = ocrResult.data;
setSession(current);
+ setProgress({
+ label: copy.progressOcr,
+ current: index + 1,
+ total: pendingOcrSnapshots.length,
+ detail: `${index + 1}/${pendingOcrSnapshots.length}`,
+ });
}
+ setProgress({
+ label: copy.progressDraft,
+ current: 0,
+ total: 1,
+ detail: "0/1",
+ });
const result = await window.electronAPI.guide.generateDraft({
recordingId: current.recordingId,
language: guideLanguage,
@@ -392,18 +486,44 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
if (!result.success) {
throw new Error(result.error);
}
+ current = result.data;
+ setSession(current);
+ setProgress({
+ label: copy.progressDraft,
+ current: 1,
+ total: 1,
+ detail: "1/1",
+ });
+ setProgress({
+ label: copy.progressExport,
+ current: 0,
+ total: 2,
+ detail: "0/2",
+ });
const markdownResult = await window.electronAPI.guide.exportMarkdown({
recordingId: current.recordingId,
});
if (!markdownResult.success) {
throw new Error(markdownResult.error);
}
+ setProgress({
+ label: copy.progressExport,
+ current: 1,
+ total: 2,
+ detail: "1/2",
+ });
const htmlResult = await window.electronAPI.guide.exportHtml({
recordingId: current.recordingId,
});
if (!htmlResult.success) {
throw new Error(htmlResult.error);
}
+ setProgress({
+ label: copy.progressExport,
+ current: 2,
+ total: 2,
+ detail: "2/2",
+ });
const revealResult = await window.electronAPI.revealInFolder(htmlResult.data.path);
if (!revealResult.success) {
toast.warning(revealResult.error ?? "Unable to open guide folder.");
@@ -419,6 +539,11 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
copy.keyMissing,
copy.noEvents,
copy.ocrUnavailable,
+ copy.progressDraft,
+ copy.progressExport,
+ copy.progressOcr,
+ copy.progressPreparing,
+ copy.progressSnapshots,
ensureEventsSession,
guideLanguage,
provider,
@@ -449,6 +574,24 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
{canUseGuide ? statusLabel : copy.noRecording}
{message && {message}
}
+ {progress && (
+
+
+
+ {progress.label}
+
+
+ {progress.detail ?? `${progress.current}/${progress.total}`}
+
+
+
+
+ )}