148 lines
3.4 KiB
TypeScript
148 lines
3.4 KiB
TypeScript
import fs from "node:fs/promises";
|
|
import os from "node:os";
|
|
import path from "node:path";
|
|
import { afterEach, describe, expect, it, vi } from "vitest";
|
|
import type { GuideSnapshot, OcrBlock } from "../../../src/guide/contracts";
|
|
import {
|
|
DefaultGuideOcrClient,
|
|
normalizeOcrResponse,
|
|
PaddleOcrHttpClient,
|
|
parseWindowsOcrPayload,
|
|
} from "./paddleOcrClient";
|
|
|
|
const snapshot: GuideSnapshot = {
|
|
id: "snapshot-1",
|
|
eventId: "event-1",
|
|
timeMs: 1000,
|
|
offsetMs: 500,
|
|
path: "/tmp/step-001.png",
|
|
width: 1000,
|
|
height: 800,
|
|
};
|
|
|
|
afterEach(() => {
|
|
vi.unstubAllGlobals();
|
|
});
|
|
|
|
describe("normalizeOcrResponse", () => {
|
|
it("normalizes pixel boxes into guide OCR blocks", () => {
|
|
const blocks = normalizeOcrResponse(
|
|
{
|
|
blocks: [
|
|
{
|
|
text: "Save",
|
|
confidence: 92,
|
|
box: { x: 400, y: 320, width: 120, height: 40 },
|
|
},
|
|
],
|
|
},
|
|
snapshot,
|
|
);
|
|
|
|
expect(blocks).toEqual([
|
|
{
|
|
id: "ocr-snapshot-1-1",
|
|
snapshotId: "snapshot-1",
|
|
text: "Save",
|
|
confidence: 0.92,
|
|
box: { x: 0.4, y: 0.4, width: 0.12, height: 0.05 },
|
|
},
|
|
]);
|
|
});
|
|
|
|
it("normalizes polygon responses", () => {
|
|
const blocks = normalizeOcrResponse(
|
|
[
|
|
{
|
|
text: "Next",
|
|
score: 0.8,
|
|
bbox: [
|
|
[100, 200],
|
|
[300, 200],
|
|
[300, 260],
|
|
[100, 260],
|
|
],
|
|
},
|
|
],
|
|
snapshot,
|
|
);
|
|
|
|
expect(blocks[0]).toMatchObject({
|
|
text: "Next",
|
|
confidence: 0.8,
|
|
box: { x: 0.1, y: 0.25, width: 0.2, height: 0.075 },
|
|
});
|
|
});
|
|
});
|
|
|
|
describe("PaddleOcrHttpClient", () => {
|
|
it("sends the selected OCR profile to the local service", async () => {
|
|
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openscreen-ocr-client-"));
|
|
const imagePath = path.join(tempDir, "step.png");
|
|
await fs.writeFile(imagePath, Buffer.from([137, 80, 78, 71]));
|
|
const requests: unknown[] = [];
|
|
vi.stubGlobal(
|
|
"fetch",
|
|
vi.fn(async (_url: string, init?: RequestInit) => {
|
|
requests.push(JSON.parse(String(init?.body ?? "{}")));
|
|
return new Response(JSON.stringify({ blocks: [] }), {
|
|
status: 200,
|
|
headers: { "content-type": "application/json" },
|
|
});
|
|
}),
|
|
);
|
|
|
|
const client = new PaddleOcrHttpClient("https://ocr.example.test", "vi,en", "hybrid");
|
|
await client.recognize({ ...snapshot, path: imagePath });
|
|
|
|
expect(requests[0]).toMatchObject({
|
|
language: "vi,en",
|
|
profile: "hybrid",
|
|
path: imagePath,
|
|
});
|
|
await fs.rm(tempDir, { recursive: true, force: true });
|
|
});
|
|
});
|
|
|
|
describe("DefaultGuideOcrClient", () => {
|
|
it("falls back when the HTTP OCR service is unavailable", async () => {
|
|
const fallbackBlock: OcrBlock = {
|
|
id: "ocr-snapshot-1-1",
|
|
snapshotId: "snapshot-1",
|
|
text: "Save",
|
|
confidence: 0.75,
|
|
box: { x: 0.1, y: 0.2, width: 0.3, height: 0.4 },
|
|
};
|
|
const client = new DefaultGuideOcrClient(
|
|
{
|
|
recognize: async () => {
|
|
throw new Error("HTTP down");
|
|
},
|
|
},
|
|
{
|
|
recognize: async () => [fallbackBlock],
|
|
},
|
|
);
|
|
|
|
await expect(client.recognize(snapshot)).resolves.toEqual([fallbackBlock]);
|
|
});
|
|
});
|
|
|
|
describe("parseWindowsOcrPayload", () => {
|
|
it("recovers from raw control characters in OCR text", () => {
|
|
const payload = parseWindowsOcrPayload(
|
|
'{"blocks":[{"text":"Save\u0001now","confidence":0.75,"box":{"x":1,"y":2,"width":3,"height":4}}]}',
|
|
);
|
|
|
|
expect(payload).toEqual({
|
|
blocks: [
|
|
{
|
|
text: "Save now",
|
|
confidence: 0.75,
|
|
box: { x: 1, y: 2, width: 3, height: 4 },
|
|
},
|
|
],
|
|
});
|
|
});
|
|
});
|