Track guide OCR snapshot progress

This commit is contained in:
huanld
2026-05-28 19:35:42 +07:00
parent cce81dd7c4
commit 0bd26eebf7
8 changed files with 187 additions and 60 deletions
+68
View File
@@ -232,6 +232,74 @@ describe("GuideStore", () => {
await expect(fs.readFile(html.path, "utf-8")).resolves.toContain("<!doctype html>");
});
it("resumes OCR without reprocessing completed snapshots", async () => {
const recognizedSnapshotIds: string[] = [];
const store = new GuideStore(recordingsDir, {
ocrClient: {
recognize: async (snapshot) => {
recognizedSnapshotIds.push(snapshot.id);
return [];
},
},
});
await store.startSession(115);
const firstMarker = await store.addMarker({
recordingId: 115,
kind: "hotkey",
timeMs: 100,
label: "Ctrl+F12 marker",
normalizedX: 0.25,
normalizedY: 0.35,
});
const secondMarker = await store.addMarker({
recordingId: 115,
kind: "hotkey",
timeMs: 300,
label: "Ctrl+F12 marker",
normalizedX: 0.6,
normalizedY: 0.7,
});
const firstEvent = firstMarker.event;
const secondEvent = secondMarker.event;
await store.writeSnapshot({
recordingId: 115,
eventId: firstEvent?.id ?? "",
timeMs: 100,
offsetMs: 0,
width: 800,
height: 600,
pngBytes: new Uint8Array([1, 2, 3]).buffer,
});
await store.writeSnapshot({
recordingId: 115,
eventId: secondEvent?.id ?? "",
timeMs: 300,
offsetMs: 0,
width: 800,
height: 600,
pngBytes: new Uint8Array([4, 5, 6]).buffer,
});
await store.runOcr({
recordingId: 115,
snapshotIds: [`snapshot-${firstEvent?.id}`],
});
expect(recognizedSnapshotIds).toEqual([`snapshot-${firstEvent?.id}`]);
const resumedSession = await store.runOcr({ recordingId: 115 });
expect(recognizedSnapshotIds).toEqual([
`snapshot-${firstEvent?.id}`,
`snapshot-${secondEvent?.id}`,
]);
expect(resumedSession.snapshots.every((snapshot) => snapshot.ocrCompletedAt)).toBe(true);
await store.runOcr({ recordingId: 115 });
expect(recognizedSnapshotIds).toEqual([
`snapshot-${firstEvent?.id}`,
`snapshot-${secondEvent?.id}`,
]);
});
it("repairs generic hotkey marker text and attaches AI draft artifacts", async () => {
const store = new GuideStore(recordingsDir, {
ocrClient: {
+108 -36
View File
@@ -58,6 +58,8 @@ const VALID_EVENT_SOURCES = new Set<GuideEventSource>([
"review-ui",
]);
const guideOcrJobsByRecordingId = new Map<string, Promise<GuideSession>>();
export class GuideStoreError extends Error {
constructor(
readonly code: GuideErrorCode,
@@ -259,50 +261,103 @@ export class GuideStore {
}
async runOcr(input: RunGuideOcrInput): Promise<GuideSession> {
const session = await this.readSession(input.recordingId);
const requestedIds = new Set(input.snapshotIds ?? []);
const snapshots =
requestedIds.size > 0
? session.snapshots.filter((snapshot) => requestedIds.has(snapshot.id))
: session.snapshots;
if (snapshots.length === 0) {
throw new GuideStoreError("guide-invalid-input", "No guide snapshots are available for OCR.");
const recordingId = normalizeGuideRecordingId(input.recordingId);
if (!recordingId) {
throw new GuideStoreError("guide-invalid-input", "OCR run is missing recordingId.");
}
const ocrClient =
this.dependencies.ocrClient ??
DefaultGuideOcrClient.fromConfig(await this.dependencies.ocrConfigProvider?.getOcrConfig());
const shouldFocusOcrSnapshots =
this.dependencies.focusOcrSnapshots ?? this.dependencies.ocrClient === undefined;
const eventsById = new Map(session.events.map((event) => [event.id, event]));
const blocks: OcrBlock[] = [];
try {
for (const snapshot of snapshots) {
const focusedSnapshot = shouldFocusOcrSnapshots
? await createFocusedOcrSnapshot({
snapshot,
event: eventsById.get(snapshot.eventId),
outputDir: session.outputDir,
})
: { snapshot };
const recognizedBlocks = await ocrClient.recognize(focusedSnapshot.snapshot);
blocks.push(...remapFocusedOcrBlocks(recognizedBlocks, focusedSnapshot.transform));
const previousJob =
guideOcrJobsByRecordingId.get(recordingId)?.catch(() => undefined) ?? Promise.resolve();
const nextJob = previousJob.then(async () => {
let session = await this.readSession(recordingId);
const requestedIds = new Set(input.snapshotIds ?? []);
const snapshots =
requestedIds.size > 0
? session.snapshots.filter((snapshot) => requestedIds.has(snapshot.id))
: session.snapshots;
if (snapshots.length === 0) {
throw new GuideStoreError(
"guide-invalid-input",
"No guide snapshots are available for OCR.",
);
}
} catch (error) {
throw new GuideStoreError(
"guide-ocr-unavailable",
error instanceof Error ? error.message : "OCR failed.",
true,
);
}
const snapshotIds = new Set(snapshots.map((snapshot) => snapshot.id));
const completedSnapshotIds = new Set(
session.snapshots
.filter((snapshot) => isSnapshotOcrCompleted(snapshot, session.ocrBlocks))
.map((snapshot) => snapshot.id),
);
const pendingSnapshots = snapshots.filter(
(snapshot) => !completedSnapshotIds.has(snapshot.id),
);
if (pendingSnapshots.length === 0) {
if (session.status === "ocr-ready") {
return session;
}
const readySession = touchSession({
...session,
status: "ocr-ready",
candidates: buildGuideStepCandidates(session),
});
await this.writeSession(readySession);
return readySession;
}
const ocrClient =
this.dependencies.ocrClient ??
DefaultGuideOcrClient.fromConfig(await this.dependencies.ocrConfigProvider?.getOcrConfig());
const shouldFocusOcrSnapshots =
this.dependencies.focusOcrSnapshots ?? this.dependencies.ocrClient === undefined;
const eventsById = new Map(session.events.map((event) => [event.id, event]));
try {
for (const snapshot of pendingSnapshots) {
const focusedSnapshot = shouldFocusOcrSnapshots
? await createFocusedOcrSnapshot({
snapshot,
event: eventsById.get(snapshot.eventId),
outputDir: session.outputDir,
})
: { snapshot };
const recognizedBlocks = await ocrClient.recognize(focusedSnapshot.snapshot);
const blocks = remapFocusedOcrBlocks(recognizedBlocks, focusedSnapshot.transform);
session = await this.writeOcrSnapshotProgress(session, snapshot.id, blocks);
}
} catch (error) {
throw new GuideStoreError(
"guide-ocr-unavailable",
error instanceof Error ? error.message : "OCR failed.",
true,
);
}
return session;
});
guideOcrJobsByRecordingId.set(recordingId, nextJob);
try {
return await nextJob;
} finally {
if (guideOcrJobsByRecordingId.get(recordingId) === nextJob) {
guideOcrJobsByRecordingId.delete(recordingId);
}
}
}
private async writeOcrSnapshotProgress(
session: GuideSession,
snapshotId: string,
blocks: OcrBlock[],
): Promise<GuideSession> {
const updatedOcrBlocks = [
...session.ocrBlocks.filter((block) => !snapshotIds.has(block.snapshotId)),
...session.ocrBlocks.filter((block) => block.snapshotId !== snapshotId),
...blocks,
];
const completedAt = new Date().toISOString();
const updatedSnapshots = session.snapshots.map((snapshot) =>
snapshot.id === snapshotId ? { ...snapshot, ocrCompletedAt: completedAt } : snapshot,
);
const draftSession = {
...session,
snapshots: updatedSnapshots,
ocrBlocks: updatedOcrBlocks,
};
const updatedSession = touchSession({
@@ -679,6 +734,7 @@ function normalizeGuideSnapshot(input: unknown): GuideSnapshot | null {
const eventId = normalizeString(input.eventId);
const pathValue = normalizeString(input.path);
const markedPath = normalizeOptionalString(input.markedPath);
const ocrCompletedAt = normalizeOptionalString(input.ocrCompletedAt);
const timeMs = normalizeNonNegativeNumber(input.timeMs);
const offsetMs = normalizeOptionalNumber(input.offsetMs);
const width = normalizePositiveInteger(input.width);
@@ -694,7 +750,23 @@ function normalizeGuideSnapshot(input: unknown): GuideSnapshot | null {
) {
return null;
}
return { id, eventId, timeMs, offsetMs, path: pathValue, markedPath, width, height };
return {
id,
eventId,
timeMs,
offsetMs,
path: pathValue,
markedPath,
ocrCompletedAt,
width,
height,
};
}
function isSnapshotOcrCompleted(snapshot: GuideSnapshot, ocrBlocks: OcrBlock[]): boolean {
return (
Boolean(snapshot.ocrCompletedAt) || ocrBlocks.some((block) => block.snapshotId === snapshot.id)
);
}
function normalizeOcrBlock(input: unknown): OcrBlock | null {
+1 -9
View File
@@ -974,21 +974,13 @@ try {
$graphics.SmoothingMode = [System.Drawing.Drawing2D.SmoothingMode]::AntiAlias
$graphics.DrawImage($source, 0, 0, $source.Width, $source.Height)
$shortSide = [Math]::Max(1, [Math]::Min($source.Width, $source.Height))
$haloRadius = [Math]::Min(14, [Math]::Max(8, [Math]::Round($shortSide * 0.012)))
$dotRadius = [Math]::Min(6, [Math]::Max(3, [Math]::Round($shortSide * 0.0045)))
$lineWidth = [Math]::Max(1, [Math]::Round($shortSide * 0.0015))
$dotRadius = [Math]::Min(7, [Math]::Max(4, [Math]::Round($shortSide * 0.005)))
$x = [Math]::Min($source.Width, [Math]::Max(0, ${marker.x.toFixed(4)}))
$y = [Math]::Min($source.Height, [Math]::Max(0, ${marker.y.toFixed(4)}))
$haloBrush = [System.Drawing.SolidBrush]::new([System.Drawing.Color]::FromArgb(87, 250, 204, 21))
$ringPen = [System.Drawing.Pen]::new([System.Drawing.Color]::FromArgb(184, 239, 68, 68), $lineWidth)
$dotBrush = [System.Drawing.SolidBrush]::new([System.Drawing.Color]::FromArgb(235, 220, 38, 38))
try {
$graphics.FillEllipse($haloBrush, $x - $haloRadius, $y - $haloRadius, $haloRadius * 2, $haloRadius * 2)
$graphics.DrawEllipse($ringPen, $x - $haloRadius, $y - $haloRadius, $haloRadius * 2, $haloRadius * 2)
$graphics.FillEllipse($dotBrush, $x - $dotRadius, $y - $dotRadius, $dotRadius * 2, $dotRadius * 2)
} finally {
$haloBrush.Dispose()
$ringPen.Dispose()
$dotBrush.Dispose()
}
$bitmap.Save($outputPath, [System.Drawing.Imaging.ImageFormat]::Png)
+2 -2
View File
@@ -1,12 +1,12 @@
{
"name": "openscreen",
"version": "1.4.8",
"version": "1.4.9",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "openscreen",
"version": "1.4.8",
"version": "1.4.9",
"dependencies": {
"@fix-webm-duration/fix": "^1.0.1",
"@pixi/filter-drop-shadow": "^5.2.0",
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "openscreen",
"private": true,
"version": "1.4.8",
"version": "1.4.9",
"type": "module",
"packageManager": "npm@10.9.4",
"engines": {
@@ -367,7 +367,11 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
});
setSession(current);
}
if (current.ocrBlocks.length === 0 && current.snapshots.length > 0) {
const ocrCompletedSnapshotIds = new Set(current.ocrBlocks.map((block) => block.snapshotId));
const hasPendingOcr = current.snapshots.some(
(snapshot) => !snapshot.ocrCompletedAt && !ocrCompletedSnapshotIds.has(snapshot.id),
);
if (hasPendingOcr) {
const ocrResult = await window.electronAPI.guide.runOcr({
recordingId: current.recordingId,
});
+1
View File
@@ -56,6 +56,7 @@ export interface GuideSnapshot {
offsetMs: number;
path: string;
markedPath?: string;
ocrCompletedAt?: string;
width: number;
height: number;
}
+1 -11
View File
@@ -177,17 +177,7 @@ function drawSnapshotMarker(
point: { x: number; y: number },
) {
const shortSide = Math.max(1, Math.min(canvas.width, canvas.height));
const haloRadius = clampNumber(Math.round(shortSide * 0.012), 8, 14);
const dotRadius = clampNumber(Math.round(shortSide * 0.0045), 3, 6);
const lineWidth = Math.max(1, Math.round(shortSide * 0.0015));
context.beginPath();
context.arc(point.x, point.y, haloRadius, 0, Math.PI * 2);
context.fillStyle = "rgba(250, 204, 21, 0.34)";
context.fill();
context.lineWidth = lineWidth;
context.strokeStyle = "rgba(239, 68, 68, 0.72)";
context.stroke();
const dotRadius = clampNumber(Math.round(shortSide * 0.005), 4, 7);
context.beginPath();
context.arc(point.x, point.y, dotRadius, 0, Math.PI * 2);