diff --git a/electron/guide/guideStore.test.ts b/electron/guide/guideStore.test.ts index a652117..43f16d6 100644 --- a/electron/guide/guideStore.test.ts +++ b/electron/guide/guideStore.test.ts @@ -232,6 +232,74 @@ describe("GuideStore", () => { await expect(fs.readFile(html.path, "utf-8")).resolves.toContain(""); }); + it("resumes OCR without reprocessing completed snapshots", async () => { + const recognizedSnapshotIds: string[] = []; + const store = new GuideStore(recordingsDir, { + ocrClient: { + recognize: async (snapshot) => { + recognizedSnapshotIds.push(snapshot.id); + return []; + }, + }, + }); + await store.startSession(115); + const firstMarker = await store.addMarker({ + recordingId: 115, + kind: "hotkey", + timeMs: 100, + label: "Ctrl+F12 marker", + normalizedX: 0.25, + normalizedY: 0.35, + }); + const secondMarker = await store.addMarker({ + recordingId: 115, + kind: "hotkey", + timeMs: 300, + label: "Ctrl+F12 marker", + normalizedX: 0.6, + normalizedY: 0.7, + }); + const firstEvent = firstMarker.event; + const secondEvent = secondMarker.event; + await store.writeSnapshot({ + recordingId: 115, + eventId: firstEvent?.id ?? "", + timeMs: 100, + offsetMs: 0, + width: 800, + height: 600, + pngBytes: new Uint8Array([1, 2, 3]).buffer, + }); + await store.writeSnapshot({ + recordingId: 115, + eventId: secondEvent?.id ?? "", + timeMs: 300, + offsetMs: 0, + width: 800, + height: 600, + pngBytes: new Uint8Array([4, 5, 6]).buffer, + }); + + await store.runOcr({ + recordingId: 115, + snapshotIds: [`snapshot-${firstEvent?.id}`], + }); + expect(recognizedSnapshotIds).toEqual([`snapshot-${firstEvent?.id}`]); + + const resumedSession = await store.runOcr({ recordingId: 115 }); + expect(recognizedSnapshotIds).toEqual([ + `snapshot-${firstEvent?.id}`, + `snapshot-${secondEvent?.id}`, + ]); + expect(resumedSession.snapshots.every((snapshot) => snapshot.ocrCompletedAt)).toBe(true); + + await store.runOcr({ recordingId: 115 }); + expect(recognizedSnapshotIds).toEqual([ + `snapshot-${firstEvent?.id}`, + `snapshot-${secondEvent?.id}`, + ]); + }); + it("repairs generic hotkey marker text and attaches AI draft artifacts", async () => { const store = new GuideStore(recordingsDir, { ocrClient: { diff --git a/electron/guide/guideStore.ts b/electron/guide/guideStore.ts index b41de17..89f87cd 100644 --- a/electron/guide/guideStore.ts +++ b/electron/guide/guideStore.ts @@ -58,6 +58,8 @@ const VALID_EVENT_SOURCES = new Set([ "review-ui", ]); +const guideOcrJobsByRecordingId = new Map>(); + export class GuideStoreError extends Error { constructor( readonly code: GuideErrorCode, @@ -259,50 +261,103 @@ export class GuideStore { } async runOcr(input: RunGuideOcrInput): Promise { - const session = await this.readSession(input.recordingId); - const requestedIds = new Set(input.snapshotIds ?? []); - const snapshots = - requestedIds.size > 0 - ? session.snapshots.filter((snapshot) => requestedIds.has(snapshot.id)) - : session.snapshots; - if (snapshots.length === 0) { - throw new GuideStoreError("guide-invalid-input", "No guide snapshots are available for OCR."); + const recordingId = normalizeGuideRecordingId(input.recordingId); + if (!recordingId) { + throw new GuideStoreError("guide-invalid-input", "OCR run is missing recordingId."); } - const ocrClient = - this.dependencies.ocrClient ?? - DefaultGuideOcrClient.fromConfig(await this.dependencies.ocrConfigProvider?.getOcrConfig()); - const shouldFocusOcrSnapshots = - this.dependencies.focusOcrSnapshots ?? this.dependencies.ocrClient === undefined; - const eventsById = new Map(session.events.map((event) => [event.id, event])); - const blocks: OcrBlock[] = []; - try { - for (const snapshot of snapshots) { - const focusedSnapshot = shouldFocusOcrSnapshots - ? await createFocusedOcrSnapshot({ - snapshot, - event: eventsById.get(snapshot.eventId), - outputDir: session.outputDir, - }) - : { snapshot }; - const recognizedBlocks = await ocrClient.recognize(focusedSnapshot.snapshot); - blocks.push(...remapFocusedOcrBlocks(recognizedBlocks, focusedSnapshot.transform)); + const previousJob = + guideOcrJobsByRecordingId.get(recordingId)?.catch(() => undefined) ?? Promise.resolve(); + const nextJob = previousJob.then(async () => { + let session = await this.readSession(recordingId); + const requestedIds = new Set(input.snapshotIds ?? []); + const snapshots = + requestedIds.size > 0 + ? session.snapshots.filter((snapshot) => requestedIds.has(snapshot.id)) + : session.snapshots; + if (snapshots.length === 0) { + throw new GuideStoreError( + "guide-invalid-input", + "No guide snapshots are available for OCR.", + ); } - } catch (error) { - throw new GuideStoreError( - "guide-ocr-unavailable", - error instanceof Error ? error.message : "OCR failed.", - true, - ); - } - const snapshotIds = new Set(snapshots.map((snapshot) => snapshot.id)); + const completedSnapshotIds = new Set( + session.snapshots + .filter((snapshot) => isSnapshotOcrCompleted(snapshot, session.ocrBlocks)) + .map((snapshot) => snapshot.id), + ); + const pendingSnapshots = snapshots.filter( + (snapshot) => !completedSnapshotIds.has(snapshot.id), + ); + if (pendingSnapshots.length === 0) { + if (session.status === "ocr-ready") { + return session; + } + const readySession = touchSession({ + ...session, + status: "ocr-ready", + candidates: buildGuideStepCandidates(session), + }); + await this.writeSession(readySession); + return readySession; + } + + const ocrClient = + this.dependencies.ocrClient ?? + DefaultGuideOcrClient.fromConfig(await this.dependencies.ocrConfigProvider?.getOcrConfig()); + const shouldFocusOcrSnapshots = + this.dependencies.focusOcrSnapshots ?? this.dependencies.ocrClient === undefined; + const eventsById = new Map(session.events.map((event) => [event.id, event])); + try { + for (const snapshot of pendingSnapshots) { + const focusedSnapshot = shouldFocusOcrSnapshots + ? await createFocusedOcrSnapshot({ + snapshot, + event: eventsById.get(snapshot.eventId), + outputDir: session.outputDir, + }) + : { snapshot }; + const recognizedBlocks = await ocrClient.recognize(focusedSnapshot.snapshot); + const blocks = remapFocusedOcrBlocks(recognizedBlocks, focusedSnapshot.transform); + session = await this.writeOcrSnapshotProgress(session, snapshot.id, blocks); + } + } catch (error) { + throw new GuideStoreError( + "guide-ocr-unavailable", + error instanceof Error ? error.message : "OCR failed.", + true, + ); + } + + return session; + }); + guideOcrJobsByRecordingId.set(recordingId, nextJob); + try { + return await nextJob; + } finally { + if (guideOcrJobsByRecordingId.get(recordingId) === nextJob) { + guideOcrJobsByRecordingId.delete(recordingId); + } + } + } + + private async writeOcrSnapshotProgress( + session: GuideSession, + snapshotId: string, + blocks: OcrBlock[], + ): Promise { const updatedOcrBlocks = [ - ...session.ocrBlocks.filter((block) => !snapshotIds.has(block.snapshotId)), + ...session.ocrBlocks.filter((block) => block.snapshotId !== snapshotId), ...blocks, ]; + const completedAt = new Date().toISOString(); + const updatedSnapshots = session.snapshots.map((snapshot) => + snapshot.id === snapshotId ? { ...snapshot, ocrCompletedAt: completedAt } : snapshot, + ); const draftSession = { ...session, + snapshots: updatedSnapshots, ocrBlocks: updatedOcrBlocks, }; const updatedSession = touchSession({ @@ -679,6 +734,7 @@ function normalizeGuideSnapshot(input: unknown): GuideSnapshot | null { const eventId = normalizeString(input.eventId); const pathValue = normalizeString(input.path); const markedPath = normalizeOptionalString(input.markedPath); + const ocrCompletedAt = normalizeOptionalString(input.ocrCompletedAt); const timeMs = normalizeNonNegativeNumber(input.timeMs); const offsetMs = normalizeOptionalNumber(input.offsetMs); const width = normalizePositiveInteger(input.width); @@ -694,7 +750,23 @@ function normalizeGuideSnapshot(input: unknown): GuideSnapshot | null { ) { return null; } - return { id, eventId, timeMs, offsetMs, path: pathValue, markedPath, width, height }; + return { + id, + eventId, + timeMs, + offsetMs, + path: pathValue, + markedPath, + ocrCompletedAt, + width, + height, + }; +} + +function isSnapshotOcrCompleted(snapshot: GuideSnapshot, ocrBlocks: OcrBlock[]): boolean { + return ( + Boolean(snapshot.ocrCompletedAt) || ocrBlocks.some((block) => block.snapshotId === snapshot.id) + ); } function normalizeOcrBlock(input: unknown): OcrBlock | null { diff --git a/electron/ipc/handlers.ts b/electron/ipc/handlers.ts index 3b0abac..77b50f2 100644 --- a/electron/ipc/handlers.ts +++ b/electron/ipc/handlers.ts @@ -974,21 +974,13 @@ try { $graphics.SmoothingMode = [System.Drawing.Drawing2D.SmoothingMode]::AntiAlias $graphics.DrawImage($source, 0, 0, $source.Width, $source.Height) $shortSide = [Math]::Max(1, [Math]::Min($source.Width, $source.Height)) - $haloRadius = [Math]::Min(14, [Math]::Max(8, [Math]::Round($shortSide * 0.012))) - $dotRadius = [Math]::Min(6, [Math]::Max(3, [Math]::Round($shortSide * 0.0045))) - $lineWidth = [Math]::Max(1, [Math]::Round($shortSide * 0.0015)) + $dotRadius = [Math]::Min(7, [Math]::Max(4, [Math]::Round($shortSide * 0.005))) $x = [Math]::Min($source.Width, [Math]::Max(0, ${marker.x.toFixed(4)})) $y = [Math]::Min($source.Height, [Math]::Max(0, ${marker.y.toFixed(4)})) - $haloBrush = [System.Drawing.SolidBrush]::new([System.Drawing.Color]::FromArgb(87, 250, 204, 21)) - $ringPen = [System.Drawing.Pen]::new([System.Drawing.Color]::FromArgb(184, 239, 68, 68), $lineWidth) $dotBrush = [System.Drawing.SolidBrush]::new([System.Drawing.Color]::FromArgb(235, 220, 38, 38)) try { - $graphics.FillEllipse($haloBrush, $x - $haloRadius, $y - $haloRadius, $haloRadius * 2, $haloRadius * 2) - $graphics.DrawEllipse($ringPen, $x - $haloRadius, $y - $haloRadius, $haloRadius * 2, $haloRadius * 2) $graphics.FillEllipse($dotBrush, $x - $dotRadius, $y - $dotRadius, $dotRadius * 2, $dotRadius * 2) } finally { - $haloBrush.Dispose() - $ringPen.Dispose() $dotBrush.Dispose() } $bitmap.Save($outputPath, [System.Drawing.Imaging.ImageFormat]::Png) diff --git a/package-lock.json b/package-lock.json index 531c010..5206a55 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "openscreen", - "version": "1.4.8", + "version": "1.4.9", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "openscreen", - "version": "1.4.8", + "version": "1.4.9", "dependencies": { "@fix-webm-duration/fix": "^1.0.1", "@pixi/filter-drop-shadow": "^5.2.0", diff --git a/package.json b/package.json index 8c3229d..83bc5d2 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "openscreen", "private": true, - "version": "1.4.8", + "version": "1.4.9", "type": "module", "packageManager": "npm@10.9.4", "engines": { diff --git a/src/components/video-editor/guide/GuidePanel.tsx b/src/components/video-editor/guide/GuidePanel.tsx index ee9507f..2dd1173 100644 --- a/src/components/video-editor/guide/GuidePanel.tsx +++ b/src/components/video-editor/guide/GuidePanel.tsx @@ -367,7 +367,11 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan }); setSession(current); } - if (current.ocrBlocks.length === 0 && current.snapshots.length > 0) { + const ocrCompletedSnapshotIds = new Set(current.ocrBlocks.map((block) => block.snapshotId)); + const hasPendingOcr = current.snapshots.some( + (snapshot) => !snapshot.ocrCompletedAt && !ocrCompletedSnapshotIds.has(snapshot.id), + ); + if (hasPendingOcr) { const ocrResult = await window.electronAPI.guide.runOcr({ recordingId: current.recordingId, }); diff --git a/src/guide/contracts.ts b/src/guide/contracts.ts index 7eabea8..618e2e9 100644 --- a/src/guide/contracts.ts +++ b/src/guide/contracts.ts @@ -56,6 +56,7 @@ export interface GuideSnapshot { offsetMs: number; path: string; markedPath?: string; + ocrCompletedAt?: string; width: number; height: number; } diff --git a/src/guide/snapshot/extractGuideSnapshots.ts b/src/guide/snapshot/extractGuideSnapshots.ts index 592fb62..6736fee 100644 --- a/src/guide/snapshot/extractGuideSnapshots.ts +++ b/src/guide/snapshot/extractGuideSnapshots.ts @@ -177,17 +177,7 @@ function drawSnapshotMarker( point: { x: number; y: number }, ) { const shortSide = Math.max(1, Math.min(canvas.width, canvas.height)); - const haloRadius = clampNumber(Math.round(shortSide * 0.012), 8, 14); - const dotRadius = clampNumber(Math.round(shortSide * 0.0045), 3, 6); - const lineWidth = Math.max(1, Math.round(shortSide * 0.0015)); - - context.beginPath(); - context.arc(point.x, point.y, haloRadius, 0, Math.PI * 2); - context.fillStyle = "rgba(250, 204, 21, 0.34)"; - context.fill(); - context.lineWidth = lineWidth; - context.strokeStyle = "rgba(239, 68, 68, 0.72)"; - context.stroke(); + const dotRadius = clampNumber(Math.round(shortSide * 0.005), 4, 7); context.beginPath(); context.arc(point.x, point.y, dotRadius, 0, Math.PI * 2);