Track guide OCR snapshot progress

This commit is contained in:
huanld
2026-05-28 19:35:42 +07:00
parent cce81dd7c4
commit 0bd26eebf7
8 changed files with 187 additions and 60 deletions
+68
View File
@@ -232,6 +232,74 @@ describe("GuideStore", () => {
await expect(fs.readFile(html.path, "utf-8")).resolves.toContain("<!doctype html>"); await expect(fs.readFile(html.path, "utf-8")).resolves.toContain("<!doctype html>");
}); });
it("resumes OCR without reprocessing completed snapshots", async () => {
const recognizedSnapshotIds: string[] = [];
const store = new GuideStore(recordingsDir, {
ocrClient: {
recognize: async (snapshot) => {
recognizedSnapshotIds.push(snapshot.id);
return [];
},
},
});
await store.startSession(115);
const firstMarker = await store.addMarker({
recordingId: 115,
kind: "hotkey",
timeMs: 100,
label: "Ctrl+F12 marker",
normalizedX: 0.25,
normalizedY: 0.35,
});
const secondMarker = await store.addMarker({
recordingId: 115,
kind: "hotkey",
timeMs: 300,
label: "Ctrl+F12 marker",
normalizedX: 0.6,
normalizedY: 0.7,
});
const firstEvent = firstMarker.event;
const secondEvent = secondMarker.event;
await store.writeSnapshot({
recordingId: 115,
eventId: firstEvent?.id ?? "",
timeMs: 100,
offsetMs: 0,
width: 800,
height: 600,
pngBytes: new Uint8Array([1, 2, 3]).buffer,
});
await store.writeSnapshot({
recordingId: 115,
eventId: secondEvent?.id ?? "",
timeMs: 300,
offsetMs: 0,
width: 800,
height: 600,
pngBytes: new Uint8Array([4, 5, 6]).buffer,
});
await store.runOcr({
recordingId: 115,
snapshotIds: [`snapshot-${firstEvent?.id}`],
});
expect(recognizedSnapshotIds).toEqual([`snapshot-${firstEvent?.id}`]);
const resumedSession = await store.runOcr({ recordingId: 115 });
expect(recognizedSnapshotIds).toEqual([
`snapshot-${firstEvent?.id}`,
`snapshot-${secondEvent?.id}`,
]);
expect(resumedSession.snapshots.every((snapshot) => snapshot.ocrCompletedAt)).toBe(true);
await store.runOcr({ recordingId: 115 });
expect(recognizedSnapshotIds).toEqual([
`snapshot-${firstEvent?.id}`,
`snapshot-${secondEvent?.id}`,
]);
});
it("repairs generic hotkey marker text and attaches AI draft artifacts", async () => { it("repairs generic hotkey marker text and attaches AI draft artifacts", async () => {
const store = new GuideStore(recordingsDir, { const store = new GuideStore(recordingsDir, {
ocrClient: { ocrClient: {
+108 -36
View File
@@ -58,6 +58,8 @@ const VALID_EVENT_SOURCES = new Set<GuideEventSource>([
"review-ui", "review-ui",
]); ]);
const guideOcrJobsByRecordingId = new Map<string, Promise<GuideSession>>();
export class GuideStoreError extends Error { export class GuideStoreError extends Error {
constructor( constructor(
readonly code: GuideErrorCode, readonly code: GuideErrorCode,
@@ -259,50 +261,103 @@ export class GuideStore {
} }
async runOcr(input: RunGuideOcrInput): Promise<GuideSession> { async runOcr(input: RunGuideOcrInput): Promise<GuideSession> {
const session = await this.readSession(input.recordingId); const recordingId = normalizeGuideRecordingId(input.recordingId);
const requestedIds = new Set(input.snapshotIds ?? []); if (!recordingId) {
const snapshots = throw new GuideStoreError("guide-invalid-input", "OCR run is missing recordingId.");
requestedIds.size > 0
? session.snapshots.filter((snapshot) => requestedIds.has(snapshot.id))
: session.snapshots;
if (snapshots.length === 0) {
throw new GuideStoreError("guide-invalid-input", "No guide snapshots are available for OCR.");
} }
const ocrClient = const previousJob =
this.dependencies.ocrClient ?? guideOcrJobsByRecordingId.get(recordingId)?.catch(() => undefined) ?? Promise.resolve();
DefaultGuideOcrClient.fromConfig(await this.dependencies.ocrConfigProvider?.getOcrConfig()); const nextJob = previousJob.then(async () => {
const shouldFocusOcrSnapshots = let session = await this.readSession(recordingId);
this.dependencies.focusOcrSnapshots ?? this.dependencies.ocrClient === undefined; const requestedIds = new Set(input.snapshotIds ?? []);
const eventsById = new Map(session.events.map((event) => [event.id, event])); const snapshots =
const blocks: OcrBlock[] = []; requestedIds.size > 0
try { ? session.snapshots.filter((snapshot) => requestedIds.has(snapshot.id))
for (const snapshot of snapshots) { : session.snapshots;
const focusedSnapshot = shouldFocusOcrSnapshots if (snapshots.length === 0) {
? await createFocusedOcrSnapshot({ throw new GuideStoreError(
snapshot, "guide-invalid-input",
event: eventsById.get(snapshot.eventId), "No guide snapshots are available for OCR.",
outputDir: session.outputDir, );
})
: { snapshot };
const recognizedBlocks = await ocrClient.recognize(focusedSnapshot.snapshot);
blocks.push(...remapFocusedOcrBlocks(recognizedBlocks, focusedSnapshot.transform));
} }
} catch (error) {
throw new GuideStoreError(
"guide-ocr-unavailable",
error instanceof Error ? error.message : "OCR failed.",
true,
);
}
const snapshotIds = new Set(snapshots.map((snapshot) => snapshot.id)); const completedSnapshotIds = new Set(
session.snapshots
.filter((snapshot) => isSnapshotOcrCompleted(snapshot, session.ocrBlocks))
.map((snapshot) => snapshot.id),
);
const pendingSnapshots = snapshots.filter(
(snapshot) => !completedSnapshotIds.has(snapshot.id),
);
if (pendingSnapshots.length === 0) {
if (session.status === "ocr-ready") {
return session;
}
const readySession = touchSession({
...session,
status: "ocr-ready",
candidates: buildGuideStepCandidates(session),
});
await this.writeSession(readySession);
return readySession;
}
const ocrClient =
this.dependencies.ocrClient ??
DefaultGuideOcrClient.fromConfig(await this.dependencies.ocrConfigProvider?.getOcrConfig());
const shouldFocusOcrSnapshots =
this.dependencies.focusOcrSnapshots ?? this.dependencies.ocrClient === undefined;
const eventsById = new Map(session.events.map((event) => [event.id, event]));
try {
for (const snapshot of pendingSnapshots) {
const focusedSnapshot = shouldFocusOcrSnapshots
? await createFocusedOcrSnapshot({
snapshot,
event: eventsById.get(snapshot.eventId),
outputDir: session.outputDir,
})
: { snapshot };
const recognizedBlocks = await ocrClient.recognize(focusedSnapshot.snapshot);
const blocks = remapFocusedOcrBlocks(recognizedBlocks, focusedSnapshot.transform);
session = await this.writeOcrSnapshotProgress(session, snapshot.id, blocks);
}
} catch (error) {
throw new GuideStoreError(
"guide-ocr-unavailable",
error instanceof Error ? error.message : "OCR failed.",
true,
);
}
return session;
});
guideOcrJobsByRecordingId.set(recordingId, nextJob);
try {
return await nextJob;
} finally {
if (guideOcrJobsByRecordingId.get(recordingId) === nextJob) {
guideOcrJobsByRecordingId.delete(recordingId);
}
}
}
private async writeOcrSnapshotProgress(
session: GuideSession,
snapshotId: string,
blocks: OcrBlock[],
): Promise<GuideSession> {
const updatedOcrBlocks = [ const updatedOcrBlocks = [
...session.ocrBlocks.filter((block) => !snapshotIds.has(block.snapshotId)), ...session.ocrBlocks.filter((block) => block.snapshotId !== snapshotId),
...blocks, ...blocks,
]; ];
const completedAt = new Date().toISOString();
const updatedSnapshots = session.snapshots.map((snapshot) =>
snapshot.id === snapshotId ? { ...snapshot, ocrCompletedAt: completedAt } : snapshot,
);
const draftSession = { const draftSession = {
...session, ...session,
snapshots: updatedSnapshots,
ocrBlocks: updatedOcrBlocks, ocrBlocks: updatedOcrBlocks,
}; };
const updatedSession = touchSession({ const updatedSession = touchSession({
@@ -679,6 +734,7 @@ function normalizeGuideSnapshot(input: unknown): GuideSnapshot | null {
const eventId = normalizeString(input.eventId); const eventId = normalizeString(input.eventId);
const pathValue = normalizeString(input.path); const pathValue = normalizeString(input.path);
const markedPath = normalizeOptionalString(input.markedPath); const markedPath = normalizeOptionalString(input.markedPath);
const ocrCompletedAt = normalizeOptionalString(input.ocrCompletedAt);
const timeMs = normalizeNonNegativeNumber(input.timeMs); const timeMs = normalizeNonNegativeNumber(input.timeMs);
const offsetMs = normalizeOptionalNumber(input.offsetMs); const offsetMs = normalizeOptionalNumber(input.offsetMs);
const width = normalizePositiveInteger(input.width); const width = normalizePositiveInteger(input.width);
@@ -694,7 +750,23 @@ function normalizeGuideSnapshot(input: unknown): GuideSnapshot | null {
) { ) {
return null; return null;
} }
return { id, eventId, timeMs, offsetMs, path: pathValue, markedPath, width, height }; return {
id,
eventId,
timeMs,
offsetMs,
path: pathValue,
markedPath,
ocrCompletedAt,
width,
height,
};
}
function isSnapshotOcrCompleted(snapshot: GuideSnapshot, ocrBlocks: OcrBlock[]): boolean {
return (
Boolean(snapshot.ocrCompletedAt) || ocrBlocks.some((block) => block.snapshotId === snapshot.id)
);
} }
function normalizeOcrBlock(input: unknown): OcrBlock | null { function normalizeOcrBlock(input: unknown): OcrBlock | null {
+1 -9
View File
@@ -974,21 +974,13 @@ try {
$graphics.SmoothingMode = [System.Drawing.Drawing2D.SmoothingMode]::AntiAlias $graphics.SmoothingMode = [System.Drawing.Drawing2D.SmoothingMode]::AntiAlias
$graphics.DrawImage($source, 0, 0, $source.Width, $source.Height) $graphics.DrawImage($source, 0, 0, $source.Width, $source.Height)
$shortSide = [Math]::Max(1, [Math]::Min($source.Width, $source.Height)) $shortSide = [Math]::Max(1, [Math]::Min($source.Width, $source.Height))
$haloRadius = [Math]::Min(14, [Math]::Max(8, [Math]::Round($shortSide * 0.012))) $dotRadius = [Math]::Min(7, [Math]::Max(4, [Math]::Round($shortSide * 0.005)))
$dotRadius = [Math]::Min(6, [Math]::Max(3, [Math]::Round($shortSide * 0.0045)))
$lineWidth = [Math]::Max(1, [Math]::Round($shortSide * 0.0015))
$x = [Math]::Min($source.Width, [Math]::Max(0, ${marker.x.toFixed(4)})) $x = [Math]::Min($source.Width, [Math]::Max(0, ${marker.x.toFixed(4)}))
$y = [Math]::Min($source.Height, [Math]::Max(0, ${marker.y.toFixed(4)})) $y = [Math]::Min($source.Height, [Math]::Max(0, ${marker.y.toFixed(4)}))
$haloBrush = [System.Drawing.SolidBrush]::new([System.Drawing.Color]::FromArgb(87, 250, 204, 21))
$ringPen = [System.Drawing.Pen]::new([System.Drawing.Color]::FromArgb(184, 239, 68, 68), $lineWidth)
$dotBrush = [System.Drawing.SolidBrush]::new([System.Drawing.Color]::FromArgb(235, 220, 38, 38)) $dotBrush = [System.Drawing.SolidBrush]::new([System.Drawing.Color]::FromArgb(235, 220, 38, 38))
try { try {
$graphics.FillEllipse($haloBrush, $x - $haloRadius, $y - $haloRadius, $haloRadius * 2, $haloRadius * 2)
$graphics.DrawEllipse($ringPen, $x - $haloRadius, $y - $haloRadius, $haloRadius * 2, $haloRadius * 2)
$graphics.FillEllipse($dotBrush, $x - $dotRadius, $y - $dotRadius, $dotRadius * 2, $dotRadius * 2) $graphics.FillEllipse($dotBrush, $x - $dotRadius, $y - $dotRadius, $dotRadius * 2, $dotRadius * 2)
} finally { } finally {
$haloBrush.Dispose()
$ringPen.Dispose()
$dotBrush.Dispose() $dotBrush.Dispose()
} }
$bitmap.Save($outputPath, [System.Drawing.Imaging.ImageFormat]::Png) $bitmap.Save($outputPath, [System.Drawing.Imaging.ImageFormat]::Png)
+2 -2
View File
@@ -1,12 +1,12 @@
{ {
"name": "openscreen", "name": "openscreen",
"version": "1.4.8", "version": "1.4.9",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "openscreen", "name": "openscreen",
"version": "1.4.8", "version": "1.4.9",
"dependencies": { "dependencies": {
"@fix-webm-duration/fix": "^1.0.1", "@fix-webm-duration/fix": "^1.0.1",
"@pixi/filter-drop-shadow": "^5.2.0", "@pixi/filter-drop-shadow": "^5.2.0",
+1 -1
View File
@@ -1,7 +1,7 @@
{ {
"name": "openscreen", "name": "openscreen",
"private": true, "private": true,
"version": "1.4.8", "version": "1.4.9",
"type": "module", "type": "module",
"packageManager": "npm@10.9.4", "packageManager": "npm@10.9.4",
"engines": { "engines": {
@@ -367,7 +367,11 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
}); });
setSession(current); setSession(current);
} }
if (current.ocrBlocks.length === 0 && current.snapshots.length > 0) { const ocrCompletedSnapshotIds = new Set(current.ocrBlocks.map((block) => block.snapshotId));
const hasPendingOcr = current.snapshots.some(
(snapshot) => !snapshot.ocrCompletedAt && !ocrCompletedSnapshotIds.has(snapshot.id),
);
if (hasPendingOcr) {
const ocrResult = await window.electronAPI.guide.runOcr({ const ocrResult = await window.electronAPI.guide.runOcr({
recordingId: current.recordingId, recordingId: current.recordingId,
}); });
+1
View File
@@ -56,6 +56,7 @@ export interface GuideSnapshot {
offsetMs: number; offsetMs: number;
path: string; path: string;
markedPath?: string; markedPath?: string;
ocrCompletedAt?: string;
width: number; width: number;
height: number; height: number;
} }
+1 -11
View File
@@ -177,17 +177,7 @@ function drawSnapshotMarker(
point: { x: number; y: number }, point: { x: number; y: number },
) { ) {
const shortSide = Math.max(1, Math.min(canvas.width, canvas.height)); const shortSide = Math.max(1, Math.min(canvas.width, canvas.height));
const haloRadius = clampNumber(Math.round(shortSide * 0.012), 8, 14); const dotRadius = clampNumber(Math.round(shortSide * 0.005), 4, 7);
const dotRadius = clampNumber(Math.round(shortSide * 0.0045), 3, 6);
const lineWidth = Math.max(1, Math.round(shortSide * 0.0015));
context.beginPath();
context.arc(point.x, point.y, haloRadius, 0, Math.PI * 2);
context.fillStyle = "rgba(250, 204, 21, 0.34)";
context.fill();
context.lineWidth = lineWidth;
context.strokeStyle = "rgba(239, 68, 68, 0.72)";
context.stroke();
context.beginPath(); context.beginPath();
context.arc(point.x, point.y, dotRadius, 0, Math.PI * 2); context.arc(point.x, point.y, dotRadius, 0, Math.PI * 2);