Add auto guide generation with bundled OCR
This commit is contained in:
Vendored
+88
@@ -27,6 +27,94 @@ interface Window {
|
||||
invokeNativeBridge: <TData = unknown>(
|
||||
request: import("../src/native/contracts").NativeBridgeRequest,
|
||||
) => Promise<import("../src/native/contracts").NativeBridgeResponse<TData>>;
|
||||
guide: {
|
||||
startSession: (
|
||||
recordingId: import("../src/guide/contracts").GuideRecordingIdInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideSession
|
||||
>
|
||||
>;
|
||||
readSession: (
|
||||
recordingId: import("../src/guide/contracts").GuideRecordingIdInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideSession
|
||||
>
|
||||
>;
|
||||
addMarker: (input: import("../src/guide/contracts").AddGuideMarkerInput) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<{
|
||||
session: import("../src/guide/contracts").GuideSession;
|
||||
event: import("../src/guide/contracts").GuideEvent;
|
||||
}>
|
||||
>;
|
||||
finalizeEvents: (
|
||||
input: import("../src/guide/contracts").FinalizeGuideEventsInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideSession
|
||||
>
|
||||
>;
|
||||
writeSnapshot: (
|
||||
input: import("../src/guide/contracts").WriteGuideSnapshotInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideSession
|
||||
>
|
||||
>;
|
||||
runOcr: (
|
||||
input: import("../src/guide/contracts").RunGuideOcrInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideSession
|
||||
>
|
||||
>;
|
||||
generateDraft: (
|
||||
input: import("../src/guide/contracts").GenerateGuideDraftInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideSession
|
||||
>
|
||||
>;
|
||||
getAiSettings: () => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideAiSettings
|
||||
>
|
||||
>;
|
||||
saveAiSettings: (
|
||||
input: import("../src/guide/contracts").SaveGuideAiSettingsInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideAiSettings
|
||||
>
|
||||
>;
|
||||
saveGuide: (
|
||||
input: import("../src/guide/contracts").SaveGuideInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideSession
|
||||
>
|
||||
>;
|
||||
exportMarkdown: (
|
||||
input: import("../src/guide/contracts").ExportGuideInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").ExportGuideResult
|
||||
>
|
||||
>;
|
||||
exportHtml: (
|
||||
input: import("../src/guide/contracts").ExportGuideInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").ExportGuideResult
|
||||
>
|
||||
>;
|
||||
discardSession: (input: import("../src/guide/contracts").DiscardGuideSessionInput) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<{
|
||||
discarded: true;
|
||||
}>
|
||||
>;
|
||||
};
|
||||
getSources: (opts: Electron.SourcesOptions) => Promise<ProcessedDesktopSource[]>;
|
||||
switchToEditor: () => Promise<void>;
|
||||
switchToHud: () => Promise<void>;
|
||||
|
||||
@@ -0,0 +1,181 @@
|
||||
import type {
|
||||
GeneratedGuide,
|
||||
GuideLanguage,
|
||||
GuideSession,
|
||||
GuideStepCandidate,
|
||||
} from "../../../src/guide/contracts";
|
||||
import { buildGuideDraftPrompt } from "../../../src/guide/promptBuilder";
|
||||
import type { DeepSeekGuideConfigProvider } from "./deepseekSettingsStore";
|
||||
|
||||
export interface GuideDraftClient {
|
||||
generate(input: {
|
||||
session: GuideSession;
|
||||
candidates: GuideStepCandidate[];
|
||||
language: GuideLanguage;
|
||||
}): Promise<GeneratedGuide>;
|
||||
}
|
||||
|
||||
export class DeepSeekGuideClientError extends Error {
|
||||
constructor(
|
||||
readonly code: "guide-ai-key-missing" | "guide-ai-request-failed" | "guide-ai-invalid-output",
|
||||
message: string,
|
||||
readonly retryable = false,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "DeepSeekGuideClientError";
|
||||
}
|
||||
}
|
||||
|
||||
interface DeepSeekChatResponse {
|
||||
choices?: Array<{
|
||||
message?: {
|
||||
content?: string;
|
||||
};
|
||||
}>;
|
||||
}
|
||||
|
||||
export class DeepSeekGuideClient implements GuideDraftClient {
|
||||
constructor(
|
||||
private readonly configProvider?: DeepSeekGuideConfigProvider,
|
||||
private readonly fallbackApiKey = process.env.DEEPSEEK_API_KEY,
|
||||
private readonly fallbackBaseUrl = process.env.DEEPSEEK_BASE_URL ?? "https://api.deepseek.com",
|
||||
private readonly fallbackModel = process.env.DEEPSEEK_MODEL ?? "deepseek-chat",
|
||||
) {}
|
||||
|
||||
async generate(input: {
|
||||
session: GuideSession;
|
||||
candidates: GuideStepCandidate[];
|
||||
language: GuideLanguage;
|
||||
}): Promise<GeneratedGuide> {
|
||||
const config = await this.resolveConfig();
|
||||
if (!config.apiKey) {
|
||||
throw new DeepSeekGuideClientError(
|
||||
"guide-ai-key-missing",
|
||||
"DeepSeek API key is not configured.",
|
||||
);
|
||||
}
|
||||
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetch(`${config.baseUrl.replace(/\/$/, "")}/chat/completions`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
authorization: `Bearer ${config.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: config.model,
|
||||
temperature: 0.2,
|
||||
response_format: { type: "json_object" },
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content:
|
||||
"You convert UI interaction telemetry into concise software user-guide steps.",
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: buildGuideDraftPrompt(input),
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
} catch (error) {
|
||||
throw new DeepSeekGuideClientError(
|
||||
"guide-ai-request-failed",
|
||||
`DeepSeek request failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
throw new DeepSeekGuideClientError(
|
||||
"guide-ai-request-failed",
|
||||
`DeepSeek returned HTTP ${response.status}.`,
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
const payload = (await response.json()) as DeepSeekChatResponse;
|
||||
const content = payload.choices?.[0]?.message?.content;
|
||||
if (!content) {
|
||||
throw new DeepSeekGuideClientError(
|
||||
"guide-ai-invalid-output",
|
||||
"DeepSeek returned an empty response.",
|
||||
);
|
||||
}
|
||||
return parseGeneratedGuide(content);
|
||||
}
|
||||
|
||||
private async resolveConfig(): Promise<{ apiKey?: string; baseUrl: string; model: string }> {
|
||||
if (this.configProvider) {
|
||||
return await this.configProvider.getDeepSeekConfig();
|
||||
}
|
||||
return {
|
||||
apiKey: this.fallbackApiKey,
|
||||
baseUrl: this.fallbackBaseUrl,
|
||||
model: this.fallbackModel,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function parseGeneratedGuide(content: string): GeneratedGuide {
|
||||
try {
|
||||
const parsed = JSON.parse(stripCodeFence(content)) as unknown;
|
||||
const normalized = normalizeGeneratedGuide(parsed);
|
||||
if (!normalized) {
|
||||
throw new Error("Unexpected guide JSON shape.");
|
||||
}
|
||||
return normalized;
|
||||
} catch (error) {
|
||||
throw new DeepSeekGuideClientError(
|
||||
"guide-ai-invalid-output",
|
||||
`DeepSeek response is not valid guide JSON: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function stripCodeFence(content: string): string {
|
||||
return content
|
||||
.replace(/^```(?:json)?\s*/i, "")
|
||||
.replace(/\s*```$/i, "")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function normalizeGeneratedGuide(value: unknown): GeneratedGuide | null {
|
||||
if (!value || typeof value !== "object") {
|
||||
return null;
|
||||
}
|
||||
const guide = value as Partial<GeneratedGuide>;
|
||||
if (typeof guide.title !== "string" || !Array.isArray(guide.steps)) {
|
||||
return null;
|
||||
}
|
||||
const steps = guide.steps
|
||||
.map((step, index) => {
|
||||
if (!step || typeof step !== "object") {
|
||||
return null;
|
||||
}
|
||||
const raw = step as Partial<GeneratedGuide["steps"][number]>;
|
||||
if (typeof raw.title !== "string" || typeof raw.instruction !== "string") {
|
||||
return null;
|
||||
}
|
||||
const order =
|
||||
typeof raw.order === "number" && Number.isFinite(raw.order) ? raw.order : index + 1;
|
||||
return {
|
||||
id: typeof raw.id === "string" && raw.id.trim() ? raw.id : `guide-step-${order}`,
|
||||
order,
|
||||
title: raw.title,
|
||||
instruction: raw.instruction,
|
||||
...(typeof raw.screenshotPath === "string" ? { screenshotPath: raw.screenshotPath } : {}),
|
||||
...(typeof raw.sourceCandidateId === "string"
|
||||
? { sourceCandidateId: raw.sourceCandidateId }
|
||||
: {}),
|
||||
};
|
||||
})
|
||||
.filter((step): step is GeneratedGuide["steps"][number] => step !== null);
|
||||
return {
|
||||
title: guide.title,
|
||||
summary: typeof guide.summary === "string" ? guide.summary : undefined,
|
||||
steps,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,157 @@
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import type { GuideAiSettings, SaveGuideAiSettingsInput } from "../../../src/guide/contracts";
|
||||
|
||||
export interface DeepSeekGuideConfig {
|
||||
apiKey?: string;
|
||||
baseUrl: string;
|
||||
model: string;
|
||||
}
|
||||
|
||||
export interface DeepSeekGuideConfigProvider {
|
||||
getDeepSeekConfig(): Promise<DeepSeekGuideConfig>;
|
||||
}
|
||||
|
||||
interface PersistedGuideAiSettings {
|
||||
schemaVersion: 1;
|
||||
deepseek?: {
|
||||
apiKeyEnvName?: string;
|
||||
baseUrl?: string;
|
||||
model?: string;
|
||||
updatedAt?: string;
|
||||
};
|
||||
}
|
||||
|
||||
const DEFAULT_DEEPSEEK_API_KEY_ENV_NAME = "DEEPSEEK_API_KEY";
|
||||
const DEFAULT_DEEPSEEK_BASE_URL = "https://api.deepseek.com";
|
||||
const DEFAULT_DEEPSEEK_MODEL = "deepseek-chat";
|
||||
|
||||
export class DeepSeekSettingsStore implements DeepSeekGuideConfigProvider {
|
||||
constructor(private readonly filePath: string) {}
|
||||
|
||||
async getStatus(): Promise<GuideAiSettings> {
|
||||
const raw = await this.readSettings();
|
||||
const apiKeyEnvName = normalizeEnvName(raw?.deepseek?.apiKeyEnvName);
|
||||
const activeApiKey = process.env[apiKeyEnvName];
|
||||
|
||||
return {
|
||||
deepseek: {
|
||||
hasApiKey: Boolean(activeApiKey),
|
||||
apiKeyEnvName,
|
||||
baseUrl: normalizeBaseUrl(raw?.deepseek?.baseUrl ?? process.env.DEEPSEEK_BASE_URL),
|
||||
model: normalizeModel(raw?.deepseek?.model ?? process.env.DEEPSEEK_MODEL),
|
||||
storage: activeApiKey ? "environment" : "none",
|
||||
encryptionAvailable: false,
|
||||
updatedAt: raw?.deepseek?.updatedAt,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async save(input: SaveGuideAiSettingsInput): Promise<GuideAiSettings> {
|
||||
const current = (await this.readSettings()) ?? { schemaVersion: 1 };
|
||||
const currentDeepSeek = current.deepseek ?? {};
|
||||
const nextDeepSeek = {
|
||||
...currentDeepSeek,
|
||||
baseUrl: normalizeBaseUrl(input.baseUrl ?? currentDeepSeek.baseUrl),
|
||||
model: normalizeModel(input.model ?? currentDeepSeek.model),
|
||||
updatedAt: new Date().toISOString(),
|
||||
};
|
||||
|
||||
if (input.clearDeepseekApiKeyEnvName) {
|
||||
delete nextDeepSeek.apiKeyEnvName;
|
||||
} else if (input.deepseekApiKeyEnvName !== undefined) {
|
||||
nextDeepSeek.apiKeyEnvName = normalizeEnvName(input.deepseekApiKeyEnvName);
|
||||
}
|
||||
|
||||
await this.writeSettings({
|
||||
schemaVersion: 1,
|
||||
deepseek: nextDeepSeek,
|
||||
});
|
||||
return await this.getStatus();
|
||||
}
|
||||
|
||||
async getDeepSeekConfig(): Promise<DeepSeekGuideConfig> {
|
||||
const raw = await this.readSettings();
|
||||
const apiKeyEnvName = normalizeEnvName(raw?.deepseek?.apiKeyEnvName);
|
||||
return {
|
||||
apiKey: process.env[apiKeyEnvName],
|
||||
baseUrl: normalizeBaseUrl(raw?.deepseek?.baseUrl ?? process.env.DEEPSEEK_BASE_URL),
|
||||
model: normalizeModel(raw?.deepseek?.model ?? process.env.DEEPSEEK_MODEL),
|
||||
};
|
||||
}
|
||||
|
||||
private async readSettings(): Promise<PersistedGuideAiSettings | null> {
|
||||
try {
|
||||
const content = await fs.readFile(this.filePath, "utf-8");
|
||||
const parsed = JSON.parse(content) as unknown;
|
||||
const normalized = normalizePersistedSettings(parsed);
|
||||
if (normalized && hasLegacyStoredSecret(parsed)) {
|
||||
await this.writeSettings(normalized);
|
||||
}
|
||||
return normalized;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private async writeSettings(settings: PersistedGuideAiSettings): Promise<void> {
|
||||
await fs.mkdir(path.dirname(this.filePath), { recursive: true });
|
||||
const tempPath = `${this.filePath}.${process.pid}.${Date.now()}.tmp`;
|
||||
await fs.writeFile(tempPath, JSON.stringify(settings, null, 2), "utf-8");
|
||||
await fs.rename(tempPath, this.filePath);
|
||||
}
|
||||
}
|
||||
|
||||
function hasLegacyStoredSecret(input: unknown): boolean {
|
||||
return (
|
||||
typeof input === "object" &&
|
||||
input !== null &&
|
||||
typeof (input as { deepseek?: { apiKey?: unknown } }).deepseek?.apiKey === "object"
|
||||
);
|
||||
}
|
||||
|
||||
function normalizePersistedSettings(input: unknown): PersistedGuideAiSettings | null {
|
||||
if (!input || typeof input !== "object") {
|
||||
return null;
|
||||
}
|
||||
const raw = input as Partial<PersistedGuideAiSettings>;
|
||||
if (raw.schemaVersion !== 1) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
schemaVersion: 1,
|
||||
deepseek: {
|
||||
apiKeyEnvName: normalizeEnvName(raw.deepseek?.apiKeyEnvName),
|
||||
baseUrl: raw.deepseek?.baseUrl,
|
||||
model: raw.deepseek?.model,
|
||||
updatedAt: raw.deepseek?.updatedAt,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeEnvName(value: string | undefined): string {
|
||||
const normalized = value?.trim();
|
||||
if (!normalized) {
|
||||
return DEFAULT_DEEPSEEK_API_KEY_ENV_NAME;
|
||||
}
|
||||
return /^[A-Za-z_][A-Za-z0-9_]*$/.test(normalized)
|
||||
? normalized
|
||||
: DEFAULT_DEEPSEEK_API_KEY_ENV_NAME;
|
||||
}
|
||||
|
||||
function normalizeBaseUrl(value: string | undefined): string {
|
||||
const candidate = value?.trim() || DEFAULT_DEEPSEEK_BASE_URL;
|
||||
try {
|
||||
const url = new URL(candidate);
|
||||
if (url.protocol !== "https:" && url.protocol !== "http:") {
|
||||
return DEFAULT_DEEPSEEK_BASE_URL;
|
||||
}
|
||||
return url.toString().replace(/\/$/, "");
|
||||
} catch {
|
||||
return DEFAULT_DEEPSEEK_BASE_URL;
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeModel(value: string | undefined): string {
|
||||
return value?.trim() || DEFAULT_DEEPSEEK_MODEL;
|
||||
}
|
||||
@@ -0,0 +1,152 @@
|
||||
import type { IpcMain } from "electron";
|
||||
import type {
|
||||
AddGuideMarkerInput,
|
||||
DiscardGuideSessionInput,
|
||||
ExportGuideInput,
|
||||
ExportGuideResult,
|
||||
FinalizeGuideEventsInput,
|
||||
GenerateGuideDraftInput,
|
||||
GuideAiSettings,
|
||||
GuideEvent,
|
||||
GuideIpcResult,
|
||||
GuideSession,
|
||||
RunGuideOcrInput,
|
||||
SaveGuideAiSettingsInput,
|
||||
SaveGuideInput,
|
||||
WriteGuideSnapshotInput,
|
||||
} from "../../src/guide/contracts";
|
||||
import type { DeepSeekSettingsStore } from "./ai/deepseekSettingsStore";
|
||||
import { GuideStore, GuideStoreError } from "./guideStore";
|
||||
|
||||
export function registerGuideIpcHandlers(
|
||||
ipcMain: IpcMain,
|
||||
store: GuideStore,
|
||||
aiSettingsStore?: DeepSeekSettingsStore,
|
||||
): void {
|
||||
ipcMain.handle(
|
||||
"guide:start-session",
|
||||
async (_, recordingId): Promise<GuideIpcResult<GuideSession>> => {
|
||||
return await toGuideResult(() => store.startSession(recordingId));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:read-session",
|
||||
async (_, recordingId): Promise<GuideIpcResult<GuideSession>> => {
|
||||
return await toGuideResult(() => store.readSession(recordingId));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:add-marker",
|
||||
async (
|
||||
_,
|
||||
input: AddGuideMarkerInput,
|
||||
): Promise<GuideIpcResult<{ session: GuideSession; event: GuideEvent }>> => {
|
||||
return await toGuideResult(() => store.addMarker(input));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:finalize-events",
|
||||
async (_, input: FinalizeGuideEventsInput): Promise<GuideIpcResult<GuideSession>> => {
|
||||
return await toGuideResult(() => store.finalizeEvents(input));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:write-snapshot",
|
||||
async (_, input: WriteGuideSnapshotInput): Promise<GuideIpcResult<GuideSession>> => {
|
||||
return await toGuideResult(() => store.writeSnapshot(input));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:run-ocr",
|
||||
async (_, input: RunGuideOcrInput): Promise<GuideIpcResult<GuideSession>> => {
|
||||
return await toGuideResult(() => store.runOcr(input));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:generate-draft",
|
||||
async (_, input: GenerateGuideDraftInput): Promise<GuideIpcResult<GuideSession>> => {
|
||||
return await toGuideResult(() => store.generateDraft(input));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle("guide:get-ai-settings", async (): Promise<GuideIpcResult<GuideAiSettings>> => {
|
||||
return await toGuideResult(() => requireAiSettingsStore(aiSettingsStore).getStatus());
|
||||
});
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:save-ai-settings",
|
||||
async (_, input: SaveGuideAiSettingsInput): Promise<GuideIpcResult<GuideAiSettings>> => {
|
||||
return await toGuideResult(() => requireAiSettingsStore(aiSettingsStore).save(input));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:save-guide",
|
||||
async (_, input: SaveGuideInput): Promise<GuideIpcResult<GuideSession>> => {
|
||||
return await toGuideResult(() => store.saveGuide(input));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:export-markdown",
|
||||
async (_, input: ExportGuideInput): Promise<GuideIpcResult<ExportGuideResult>> => {
|
||||
return await toGuideResult(() => store.exportMarkdown(input));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:export-html",
|
||||
async (_, input: ExportGuideInput): Promise<GuideIpcResult<ExportGuideResult>> => {
|
||||
return await toGuideResult(() => store.exportHtml(input));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:discard-session",
|
||||
async (_, input: DiscardGuideSessionInput): Promise<GuideIpcResult<{ discarded: true }>> => {
|
||||
return await toGuideResult(async () => {
|
||||
await store.discardSession(input);
|
||||
return { discarded: true };
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
function requireAiSettingsStore(store: DeepSeekSettingsStore | undefined): DeepSeekSettingsStore {
|
||||
if (!store) {
|
||||
throw new GuideStoreError("guide-internal-error", "Guide AI settings store is unavailable.");
|
||||
}
|
||||
return store;
|
||||
}
|
||||
|
||||
async function toGuideResult<TData>(action: () => Promise<TData>): Promise<GuideIpcResult<TData>> {
|
||||
try {
|
||||
return {
|
||||
success: true,
|
||||
data: await action(),
|
||||
};
|
||||
} catch (error) {
|
||||
if (error instanceof GuideStoreError) {
|
||||
return {
|
||||
success: false,
|
||||
code: error.code,
|
||||
error: error.message,
|
||||
retryable: error.retryable,
|
||||
};
|
||||
}
|
||||
|
||||
console.error("Guide IPC failed:", error);
|
||||
return {
|
||||
success: false,
|
||||
code: "guide-internal-error",
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
retryable: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
import path from "node:path";
|
||||
import type { GuideRecordingIdInput } from "../../src/guide/contracts";
|
||||
|
||||
export const GUIDE_SESSION_SUFFIX = ".guide.json";
|
||||
export const GUIDE_OUTPUT_DIR_SUFFIX = "-guide";
|
||||
|
||||
export interface GuidePaths {
|
||||
recordingId: string;
|
||||
baseName: string;
|
||||
baseDir: string;
|
||||
guidePath: string;
|
||||
outputDir: string;
|
||||
}
|
||||
|
||||
export function normalizeGuideRecordingId(recordingId: GuideRecordingIdInput): string | null {
|
||||
if (typeof recordingId === "number") {
|
||||
return Number.isFinite(recordingId) ? String(Math.trunc(recordingId)) : null;
|
||||
}
|
||||
|
||||
if (typeof recordingId !== "string") {
|
||||
return null;
|
||||
}
|
||||
|
||||
const trimmed = recordingId.trim();
|
||||
return trimmed.length > 0 ? trimmed : null;
|
||||
}
|
||||
|
||||
export function resolveGuidePaths(input: {
|
||||
recordingsDir: string;
|
||||
recordingId: GuideRecordingIdInput;
|
||||
videoPath?: string | null;
|
||||
}): GuidePaths | null {
|
||||
const recordingId = normalizeGuideRecordingId(input.recordingId);
|
||||
if (!recordingId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const normalizedVideoPath =
|
||||
typeof input.videoPath === "string" && input.videoPath.trim()
|
||||
? path.resolve(input.videoPath.trim())
|
||||
: null;
|
||||
const parsedVideoPath = normalizedVideoPath ? path.parse(normalizedVideoPath) : null;
|
||||
const baseName = parsedVideoPath?.name ?? defaultGuideBaseName(recordingId);
|
||||
const baseDir = parsedVideoPath?.dir ?? path.resolve(input.recordingsDir);
|
||||
|
||||
return {
|
||||
recordingId,
|
||||
baseName,
|
||||
baseDir,
|
||||
guidePath: path.join(baseDir, `${baseName}${GUIDE_SESSION_SUFFIX}`),
|
||||
outputDir: path.join(baseDir, `${baseName}${GUIDE_OUTPUT_DIR_SUFFIX}`),
|
||||
};
|
||||
}
|
||||
|
||||
function defaultGuideBaseName(recordingId: string): string {
|
||||
return recordingId.startsWith("recording-") ? recordingId : `recording-${recordingId}`;
|
||||
}
|
||||
@@ -0,0 +1,233 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import { GuideStore, GuideStoreError } from "./guideStore";
|
||||
|
||||
let recordingsDir = "";
|
||||
|
||||
beforeEach(async () => {
|
||||
recordingsDir = await fs.mkdtemp(path.join(os.tmpdir(), "openscreen-guide-"));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
if (recordingsDir) {
|
||||
await fs.rm(recordingsDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
describe("GuideStore", () => {
|
||||
it("creates and reads an empty guide session", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
|
||||
const session = await store.startSession(123);
|
||||
const readSession = await store.readSession(123);
|
||||
|
||||
expect(session.recordingId).toBe("123");
|
||||
expect(session.status).toBe("recording");
|
||||
expect(session.guidePath).toBe(path.join(recordingsDir, "recording-123.guide.json"));
|
||||
expect(readSession).toEqual(session);
|
||||
await expect(fs.stat(session.outputDir)).resolves.toMatchObject({
|
||||
isDirectory: expect.any(Function),
|
||||
});
|
||||
});
|
||||
|
||||
it("adds marker events in timeline order", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
await store.startSession(456);
|
||||
|
||||
await store.addMarker({ recordingId: 456, kind: "manual", timeMs: 2000, label: "Later" });
|
||||
const result = await store.addMarker({
|
||||
recordingId: 456,
|
||||
kind: "hotkey",
|
||||
timeMs: 500,
|
||||
label: "First",
|
||||
});
|
||||
|
||||
expect(result.event.kind).toBe("hotkey");
|
||||
expect(result.session.events.map((event) => event.timeMs)).toEqual([500, 2000]);
|
||||
expect(result.session.events[0]?.source).toBe("guide-hotkey");
|
||||
expect(result.session.events[1]?.source).toBe("review-ui");
|
||||
});
|
||||
|
||||
it("finalizes a session against the saved video path", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
await store.startSession(789);
|
||||
const videoPath = path.join(recordingsDir, "recording-789.mp4");
|
||||
await fs.writeFile(videoPath, "");
|
||||
|
||||
const session = await store.finalizeEvents({ recordingId: 789, videoPath });
|
||||
|
||||
expect(session.status).toBe("events-ready");
|
||||
expect(session.videoPath).toBe(videoPath);
|
||||
expect(session.guidePath).toBe(path.join(recordingsDir, "recording-789.guide.json"));
|
||||
});
|
||||
|
||||
it("adds cursor click events when finalizing a session", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
await store.startSession(790);
|
||||
await store.addMarker({ recordingId: 790, kind: "manual", timeMs: 250, label: "Manual" });
|
||||
const videoPath = path.join(recordingsDir, "recording-790.mp4");
|
||||
await fs.writeFile(videoPath, "");
|
||||
await fs.writeFile(
|
||||
`${videoPath}.cursor.json`,
|
||||
JSON.stringify({
|
||||
version: 2,
|
||||
provider: "native",
|
||||
assets: [],
|
||||
samples: [
|
||||
{ timeMs: 100, cx: 0.2, cy: 0.3, interactionType: "move" },
|
||||
{ timeMs: 200, cx: 0.4, cy: 0.5, interactionType: "click" },
|
||||
{ timeMs: 225, cx: 0.401, cy: 0.501, interactionType: "click" },
|
||||
],
|
||||
}),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
const session = await store.finalizeEvents({ recordingId: 790, videoPath });
|
||||
|
||||
expect(session.cursorPath).toBe(`${videoPath}.cursor.json`);
|
||||
expect(session.events.map((event) => event.kind)).toEqual(["click", "manual"]);
|
||||
expect(session.events[0]).toMatchObject({
|
||||
timeMs: 200,
|
||||
normalizedX: 0.4,
|
||||
normalizedY: 0.5,
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects guide artifacts outside the recordings directory", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
await store.startSession(321);
|
||||
const outsideVideoPath = path.join(path.dirname(recordingsDir), "outside.mp4");
|
||||
|
||||
await expect(
|
||||
store.finalizeEvents({ recordingId: 321, videoPath: outsideVideoPath }),
|
||||
).rejects.toMatchObject({
|
||||
code: "guide-invalid-input",
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects invalid guide session schema", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
await fs.writeFile(
|
||||
path.join(recordingsDir, "recording-bad.guide.json"),
|
||||
JSON.stringify({ schemaVersion: 999 }),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
await expect(store.readSession("bad")).rejects.toBeInstanceOf(GuideStoreError);
|
||||
await expect(store.readSession("bad")).rejects.toMatchObject({
|
||||
code: "guide-invalid-schema",
|
||||
});
|
||||
});
|
||||
|
||||
it("saves a reviewed generated guide", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
await store.startSession(654);
|
||||
|
||||
const session = await store.saveGuide({
|
||||
recordingId: 654,
|
||||
generatedGuide: {
|
||||
title: "Huong dan thao tac",
|
||||
steps: [
|
||||
{
|
||||
id: "step-1",
|
||||
order: 1,
|
||||
title: "Mo cai dat",
|
||||
instruction: "Nhan nut Settings.",
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
expect(session.status).toBe("reviewed");
|
||||
expect(session.generatedGuide?.steps).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("writes snapshots and builds candidates without OCR", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
await store.startSession(112);
|
||||
await store.addMarker({ recordingId: 112, kind: "manual", timeMs: 500, label: "Save" });
|
||||
const videoPath = path.join(recordingsDir, "recording-112.mp4");
|
||||
await fs.writeFile(videoPath, "");
|
||||
const eventsSession = await store.finalizeEvents({ recordingId: 112, videoPath });
|
||||
|
||||
const session = await store.writeSnapshot({
|
||||
recordingId: 112,
|
||||
eventId: eventsSession.events[0]?.id ?? "",
|
||||
timeMs: 1000,
|
||||
offsetMs: 500,
|
||||
width: 800,
|
||||
height: 600,
|
||||
pngBytes: new Uint8Array([137, 80, 78, 71]).buffer,
|
||||
});
|
||||
|
||||
expect(session.status).toBe("snapshots-ready");
|
||||
expect(session.snapshots).toHaveLength(1);
|
||||
expect(session.candidates[0]).toMatchObject({ targetText: "Save" });
|
||||
await expect(fs.readFile(session.snapshots[0]?.path ?? "")).resolves.toEqual(
|
||||
Buffer.from([137, 80, 78, 71]),
|
||||
);
|
||||
});
|
||||
|
||||
it("runs OCR, generates a local draft, and exports files", async () => {
|
||||
const store = new GuideStore(recordingsDir, {
|
||||
ocrClient: {
|
||||
recognize: async (snapshot) => [
|
||||
{
|
||||
id: `ocr-${snapshot.id}-1`,
|
||||
snapshotId: snapshot.id,
|
||||
text: "Save",
|
||||
confidence: 0.95,
|
||||
box: { x: 0.45, y: 0.45, width: 0.15, height: 0.08 },
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
await store.startSession(113);
|
||||
const videoPath = path.join(recordingsDir, "recording-113.mp4");
|
||||
await fs.writeFile(videoPath, "");
|
||||
await fs.writeFile(
|
||||
`${videoPath}.cursor.json`,
|
||||
JSON.stringify({
|
||||
samples: [{ timeMs: 200, cx: 0.5, cy: 0.5, interactionType: "click" }],
|
||||
}),
|
||||
"utf-8",
|
||||
);
|
||||
const eventsSession = await store.finalizeEvents({ recordingId: 113, videoPath });
|
||||
await store.writeSnapshot({
|
||||
recordingId: 113,
|
||||
eventId: eventsSession.events[0]?.id ?? "",
|
||||
timeMs: 700,
|
||||
offsetMs: 500,
|
||||
width: 800,
|
||||
height: 600,
|
||||
pngBytes: new Uint8Array([1, 2, 3]).buffer,
|
||||
});
|
||||
|
||||
const ocrSession = await store.runOcr({ recordingId: 113 });
|
||||
const draftSession = await store.generateDraft({
|
||||
recordingId: 113,
|
||||
language: "en",
|
||||
provider: "local",
|
||||
});
|
||||
const markdown = await store.exportMarkdown({ recordingId: 113 });
|
||||
const html = await store.exportHtml({ recordingId: 113 });
|
||||
|
||||
expect(ocrSession.candidates[0]).toMatchObject({ targetText: "Save" });
|
||||
expect(draftSession.generatedGuide?.steps[0]?.instruction).toBe('Click "Save".');
|
||||
await expect(fs.readFile(markdown.path, "utf-8")).resolves.toContain("# User guide");
|
||||
await expect(fs.readFile(html.path, "utf-8")).resolves.toContain("<!doctype html>");
|
||||
});
|
||||
|
||||
it("discards a guide session and output directory", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
const session = await store.startSession(111);
|
||||
await fs.writeFile(path.join(session.outputDir, "step-001.png"), "");
|
||||
|
||||
await store.discardSession({ recordingId: 111 });
|
||||
|
||||
await expect(fs.stat(session.guidePath)).rejects.toMatchObject({ code: "ENOENT" });
|
||||
await expect(fs.stat(session.outputDir)).rejects.toMatchObject({ code: "ENOENT" });
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,824 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import {
|
||||
type AddGuideMarkerInput,
|
||||
type DiscardGuideSessionInput,
|
||||
type ExportGuideInput,
|
||||
type ExportGuideResult,
|
||||
type FinalizeGuideEventsInput,
|
||||
type GeneratedGuide,
|
||||
type GeneratedGuideStep,
|
||||
type GenerateGuideDraftInput,
|
||||
GUIDE_SCHEMA_VERSION,
|
||||
type GuideErrorCode,
|
||||
type GuideEvent,
|
||||
type GuideEventKind,
|
||||
type GuideEventSource,
|
||||
type GuideSession,
|
||||
type GuideSessionStatus,
|
||||
type GuideSnapshot,
|
||||
type GuideStepCandidate,
|
||||
type OcrBlock,
|
||||
type RunGuideOcrInput,
|
||||
type SaveGuideInput,
|
||||
type WriteGuideSnapshotInput,
|
||||
} from "../../src/guide/contracts";
|
||||
import { buildGuideEventsFromCursor, mergeGuideEvents } from "../../src/guide/eventBuilder";
|
||||
import { exportGuideToHtml, exportGuideToMarkdown } from "../../src/guide/exporters";
|
||||
import { buildLocalGuideDraft } from "../../src/guide/promptBuilder";
|
||||
import { buildGuideStepCandidates } from "../../src/guide/targetMapper";
|
||||
import type { CursorRecordingSample } from "../../src/native/contracts";
|
||||
import {
|
||||
DeepSeekGuideClient,
|
||||
DeepSeekGuideClientError,
|
||||
type GuideDraftClient,
|
||||
} from "./ai/deepseekGuideClient";
|
||||
import type { DeepSeekGuideConfigProvider } from "./ai/deepseekSettingsStore";
|
||||
import { type GuidePaths, normalizeGuideRecordingId, resolveGuidePaths } from "./guidePaths";
|
||||
import { createFocusedOcrSnapshot, remapFocusedOcrBlocks } from "./ocr/focusedOcrSnapshot";
|
||||
import { DefaultGuideOcrClient, type GuideOcrClient } from "./ocr/paddleOcrClient";
|
||||
|
||||
const VALID_SESSION_STATUSES = new Set<GuideSessionStatus>([
|
||||
"recording",
|
||||
"events-ready",
|
||||
"snapshots-ready",
|
||||
"ocr-ready",
|
||||
"draft-ready",
|
||||
"reviewed",
|
||||
]);
|
||||
|
||||
const VALID_EVENT_KINDS = new Set<GuideEventKind>(["click", "hotkey", "manual"]);
|
||||
const VALID_EVENT_SOURCES = new Set<GuideEventSource>([
|
||||
"cursor-recording",
|
||||
"guide-hotkey",
|
||||
"review-ui",
|
||||
]);
|
||||
|
||||
export class GuideStoreError extends Error {
|
||||
constructor(
|
||||
readonly code: GuideErrorCode,
|
||||
message: string,
|
||||
readonly retryable = false,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "GuideStoreError";
|
||||
}
|
||||
}
|
||||
|
||||
export interface GuideStoreDependencies {
|
||||
ocrClient?: GuideOcrClient;
|
||||
draftClient?: GuideDraftClient;
|
||||
deepSeekConfigProvider?: DeepSeekGuideConfigProvider;
|
||||
focusOcrSnapshots?: boolean;
|
||||
}
|
||||
|
||||
export class GuideStore {
|
||||
constructor(
|
||||
private readonly recordingsDir: string,
|
||||
private readonly dependencies: GuideStoreDependencies = {},
|
||||
) {}
|
||||
|
||||
async startSession(recordingIdInput: AddGuideMarkerInput["recordingId"]): Promise<GuideSession> {
|
||||
const paths = this.requireGuidePaths(recordingIdInput);
|
||||
const now = new Date().toISOString();
|
||||
const session: GuideSession = {
|
||||
schemaVersion: GUIDE_SCHEMA_VERSION,
|
||||
recordingId: paths.recordingId,
|
||||
videoPath: "",
|
||||
guidePath: paths.guidePath,
|
||||
outputDir: paths.outputDir,
|
||||
status: "recording",
|
||||
events: [],
|
||||
snapshots: [],
|
||||
ocrBlocks: [],
|
||||
candidates: [],
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
};
|
||||
|
||||
await this.writeSession(session);
|
||||
return session;
|
||||
}
|
||||
|
||||
async readSession(recordingIdInput: AddGuideMarkerInput["recordingId"]): Promise<GuideSession> {
|
||||
const paths = this.requireGuidePaths(recordingIdInput);
|
||||
return await this.readSessionAtPath(paths.guidePath);
|
||||
}
|
||||
|
||||
async addMarker(
|
||||
input: AddGuideMarkerInput,
|
||||
): Promise<{ session: GuideSession; event: GuideEvent }> {
|
||||
const recordingId = normalizeGuideRecordingId(input.recordingId);
|
||||
if (!recordingId) {
|
||||
throw new GuideStoreError("guide-invalid-input", "Guide marker is missing recordingId.");
|
||||
}
|
||||
if (input.kind !== "hotkey" && input.kind !== "manual") {
|
||||
throw new GuideStoreError("guide-invalid-input", "Guide marker kind is invalid.");
|
||||
}
|
||||
if (!Number.isFinite(input.timeMs) || input.timeMs < 0) {
|
||||
throw new GuideStoreError("guide-invalid-input", "Guide marker timeMs must be non-negative.");
|
||||
}
|
||||
|
||||
const session = await this.readSession(recordingId);
|
||||
const event: GuideEvent = {
|
||||
id: `guide-event-${randomUUID()}`,
|
||||
recordingId,
|
||||
kind: input.kind,
|
||||
source: input.kind === "hotkey" ? "guide-hotkey" : "review-ui",
|
||||
timeMs: Math.max(0, input.timeMs),
|
||||
label: normalizeOptionalString(input.label),
|
||||
screenshotOffsetMs: 500,
|
||||
createdAt: new Date().toISOString(),
|
||||
};
|
||||
|
||||
const updatedSession = touchSession({
|
||||
...session,
|
||||
events: sortGuideEvents([...session.events, event]),
|
||||
});
|
||||
await this.writeSession(updatedSession);
|
||||
return { session: updatedSession, event };
|
||||
}
|
||||
|
||||
async finalizeEvents(input: FinalizeGuideEventsInput): Promise<GuideSession> {
|
||||
const recordingId = normalizeGuideRecordingId(input.recordingId);
|
||||
if (!recordingId) {
|
||||
throw new GuideStoreError(
|
||||
"guide-invalid-input",
|
||||
"Guide finalization is missing recordingId.",
|
||||
);
|
||||
}
|
||||
if (typeof input.videoPath !== "string" || input.videoPath.trim().length === 0) {
|
||||
throw new GuideStoreError("guide-invalid-input", "Guide finalization is missing videoPath.");
|
||||
}
|
||||
|
||||
const videoPath = path.resolve(input.videoPath);
|
||||
const currentSession = await this.readSession(recordingId);
|
||||
const nextPaths = this.requireGuidePaths(recordingId, videoPath);
|
||||
const cursorPath = await this.resolveCursorPath(videoPath, input.cursorPath);
|
||||
const cursorEvents = cursorPath
|
||||
? await this.readCursorGuideEvents(recordingId, cursorPath)
|
||||
: [];
|
||||
const manualEvents = currentSession.events.filter(
|
||||
(event) => event.source !== "cursor-recording",
|
||||
);
|
||||
const updatedSession = touchSession({
|
||||
...currentSession,
|
||||
videoPath,
|
||||
cursorPath,
|
||||
guidePath: nextPaths.guidePath,
|
||||
outputDir: nextPaths.outputDir,
|
||||
status: "events-ready",
|
||||
events: mergeGuideEvents([...cursorEvents, ...manualEvents]),
|
||||
});
|
||||
|
||||
await this.writeSession(updatedSession);
|
||||
if (path.resolve(currentSession.guidePath) !== path.resolve(updatedSession.guidePath)) {
|
||||
await fs.unlink(currentSession.guidePath).catch(() => undefined);
|
||||
}
|
||||
|
||||
return updatedSession;
|
||||
}
|
||||
|
||||
async writeSnapshot(input: WriteGuideSnapshotInput): Promise<GuideSession> {
|
||||
const recordingId = normalizeGuideRecordingId(input.recordingId);
|
||||
if (!recordingId) {
|
||||
throw new GuideStoreError("guide-invalid-input", "Snapshot write is missing recordingId.");
|
||||
}
|
||||
if (!input.eventId || !Number.isFinite(input.timeMs) || input.timeMs < 0) {
|
||||
throw new GuideStoreError("guide-invalid-input", "Snapshot metadata is invalid.");
|
||||
}
|
||||
if (!input.pngBytes || input.pngBytes.byteLength === 0) {
|
||||
throw new GuideStoreError("guide-invalid-input", "Snapshot PNG data is empty.");
|
||||
}
|
||||
if (
|
||||
!Number.isFinite(input.width) ||
|
||||
input.width <= 0 ||
|
||||
!Number.isFinite(input.height) ||
|
||||
input.height <= 0
|
||||
) {
|
||||
throw new GuideStoreError("guide-invalid-input", "Snapshot dimensions are invalid.");
|
||||
}
|
||||
|
||||
const session = await this.readSession(recordingId);
|
||||
const eventIndex = session.events.findIndex((event) => event.id === input.eventId);
|
||||
if (eventIndex === -1) {
|
||||
throw new GuideStoreError("guide-invalid-input", "Snapshot event does not exist.");
|
||||
}
|
||||
|
||||
this.assertGuidePathIsAllowed(session.outputDir);
|
||||
await fs.mkdir(session.outputDir, { recursive: true });
|
||||
const fileName = `step-${String(eventIndex + 1).padStart(3, "0")}.png`;
|
||||
const snapshotPath = path.join(session.outputDir, fileName);
|
||||
this.assertGuidePathIsAllowed(snapshotPath);
|
||||
await fs.writeFile(snapshotPath, Buffer.from(new Uint8Array(input.pngBytes)));
|
||||
|
||||
const snapshot: GuideSnapshot = {
|
||||
id: `snapshot-${input.eventId}`,
|
||||
eventId: input.eventId,
|
||||
timeMs: Math.max(0, input.timeMs),
|
||||
offsetMs: input.offsetMs,
|
||||
path: snapshotPath,
|
||||
width: Math.round(input.width),
|
||||
height: Math.round(input.height),
|
||||
};
|
||||
const updatedSnapshots = [
|
||||
...session.snapshots.filter((existing) => existing.eventId !== input.eventId),
|
||||
snapshot,
|
||||
].sort((left, right) => left.timeMs - right.timeMs);
|
||||
const updatedSession = touchSession({
|
||||
...session,
|
||||
status: "snapshots-ready",
|
||||
snapshots: updatedSnapshots,
|
||||
ocrBlocks: session.ocrBlocks.filter((block) => block.snapshotId !== snapshot.id),
|
||||
candidates: buildGuideStepCandidates({
|
||||
...session,
|
||||
snapshots: updatedSnapshots,
|
||||
ocrBlocks: session.ocrBlocks.filter((block) => block.snapshotId !== snapshot.id),
|
||||
}),
|
||||
generatedGuide: undefined,
|
||||
});
|
||||
|
||||
await this.writeSession(updatedSession);
|
||||
return updatedSession;
|
||||
}
|
||||
|
||||
async runOcr(input: RunGuideOcrInput): Promise<GuideSession> {
|
||||
const session = await this.readSession(input.recordingId);
|
||||
const requestedIds = new Set(input.snapshotIds ?? []);
|
||||
const snapshots =
|
||||
requestedIds.size > 0
|
||||
? session.snapshots.filter((snapshot) => requestedIds.has(snapshot.id))
|
||||
: session.snapshots;
|
||||
if (snapshots.length === 0) {
|
||||
throw new GuideStoreError("guide-invalid-input", "No guide snapshots are available for OCR.");
|
||||
}
|
||||
|
||||
const ocrClient = this.dependencies.ocrClient ?? new DefaultGuideOcrClient();
|
||||
const shouldFocusOcrSnapshots =
|
||||
this.dependencies.focusOcrSnapshots ?? this.dependencies.ocrClient === undefined;
|
||||
const eventsById = new Map(session.events.map((event) => [event.id, event]));
|
||||
const blocks: OcrBlock[] = [];
|
||||
try {
|
||||
for (const snapshot of snapshots) {
|
||||
const focusedSnapshot = shouldFocusOcrSnapshots
|
||||
? await createFocusedOcrSnapshot({
|
||||
snapshot,
|
||||
event: eventsById.get(snapshot.eventId),
|
||||
outputDir: session.outputDir,
|
||||
})
|
||||
: { snapshot };
|
||||
const recognizedBlocks = await ocrClient.recognize(focusedSnapshot.snapshot);
|
||||
blocks.push(...remapFocusedOcrBlocks(recognizedBlocks, focusedSnapshot.transform));
|
||||
}
|
||||
} catch (error) {
|
||||
throw new GuideStoreError(
|
||||
"guide-ocr-unavailable",
|
||||
error instanceof Error ? error.message : "OCR failed.",
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
const snapshotIds = new Set(snapshots.map((snapshot) => snapshot.id));
|
||||
const updatedOcrBlocks = [
|
||||
...session.ocrBlocks.filter((block) => !snapshotIds.has(block.snapshotId)),
|
||||
...blocks,
|
||||
];
|
||||
const draftSession = {
|
||||
...session,
|
||||
ocrBlocks: updatedOcrBlocks,
|
||||
};
|
||||
const updatedSession = touchSession({
|
||||
...draftSession,
|
||||
status: "ocr-ready",
|
||||
candidates: buildGuideStepCandidates(draftSession),
|
||||
generatedGuide: undefined,
|
||||
});
|
||||
|
||||
await this.writeSession(updatedSession);
|
||||
return updatedSession;
|
||||
}
|
||||
|
||||
async generateDraft(input: GenerateGuideDraftInput): Promise<GuideSession> {
|
||||
const session = await this.readSession(input.recordingId);
|
||||
const candidates =
|
||||
session.candidates.length > 0 ? session.candidates : buildGuideStepCandidates(session);
|
||||
if (candidates.length === 0) {
|
||||
throw new GuideStoreError(
|
||||
"guide-invalid-input",
|
||||
"No guide events are available for drafting.",
|
||||
);
|
||||
}
|
||||
|
||||
let generatedGuide: GeneratedGuide;
|
||||
if (input.provider === "local") {
|
||||
generatedGuide = buildLocalGuideDraft(session, candidates, input.language);
|
||||
} else {
|
||||
const draftClient =
|
||||
this.dependencies.draftClient ??
|
||||
new DeepSeekGuideClient(this.dependencies.deepSeekConfigProvider);
|
||||
try {
|
||||
generatedGuide = await draftClient.generate({
|
||||
session,
|
||||
candidates,
|
||||
language: input.language,
|
||||
});
|
||||
} catch (error) {
|
||||
if (error instanceof DeepSeekGuideClientError) {
|
||||
throw new GuideStoreError(error.code, error.message, error.retryable);
|
||||
}
|
||||
throw new GuideStoreError(
|
||||
"guide-ai-request-failed",
|
||||
error instanceof Error ? error.message : "Guide draft generation failed.",
|
||||
true,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const updatedSession = touchSession({
|
||||
...session,
|
||||
candidates,
|
||||
generatedGuide: normalizeGeneratedGuide(generatedGuide) ?? generatedGuide,
|
||||
status: "draft-ready",
|
||||
});
|
||||
await this.writeSession(updatedSession);
|
||||
return updatedSession;
|
||||
}
|
||||
|
||||
async saveGuide(input: SaveGuideInput): Promise<GuideSession> {
|
||||
const session = await this.readSession(input.recordingId);
|
||||
const generatedGuide = normalizeGeneratedGuide(input.generatedGuide);
|
||||
if (!generatedGuide) {
|
||||
throw new GuideStoreError("guide-invalid-input", "Generated guide shape is invalid.");
|
||||
}
|
||||
|
||||
const updatedSession = touchSession({
|
||||
...session,
|
||||
generatedGuide,
|
||||
status: "reviewed",
|
||||
});
|
||||
await this.writeSession(updatedSession);
|
||||
return updatedSession;
|
||||
}
|
||||
|
||||
async exportMarkdown(input: ExportGuideInput): Promise<ExportGuideResult> {
|
||||
const session = await this.readSession(input.recordingId);
|
||||
return await this.writeGuideExport(session, "guide.md", () => exportGuideToMarkdown(session));
|
||||
}
|
||||
|
||||
async exportHtml(input: ExportGuideInput): Promise<ExportGuideResult> {
|
||||
const session = await this.readSession(input.recordingId);
|
||||
return await this.writeGuideExport(session, "guide.html", () => exportGuideToHtml(session));
|
||||
}
|
||||
|
||||
async discardSession(input: DiscardGuideSessionInput): Promise<void> {
|
||||
const paths = this.requireGuidePaths(input.recordingId);
|
||||
const session = await this.readSession(input.recordingId).catch(() => null);
|
||||
const guidePath = session?.guidePath ?? paths.guidePath;
|
||||
const outputDir = session?.outputDir ?? paths.outputDir;
|
||||
this.assertGuidePathIsAllowed(guidePath);
|
||||
this.assertGuidePathIsAllowed(outputDir);
|
||||
await fs.unlink(guidePath).catch(() => undefined);
|
||||
await fs.rm(outputDir, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
private async writeGuideExport(
|
||||
session: GuideSession,
|
||||
fileName: string,
|
||||
renderContent: () => string,
|
||||
): Promise<ExportGuideResult> {
|
||||
if (!session.generatedGuide) {
|
||||
throw new GuideStoreError("guide-invalid-input", "Generate a guide draft before exporting.");
|
||||
}
|
||||
const exportPath = path.join(session.outputDir, fileName);
|
||||
this.assertGuidePathIsAllowed(exportPath);
|
||||
try {
|
||||
await fs.mkdir(session.outputDir, { recursive: true });
|
||||
await fs.writeFile(exportPath, renderContent(), "utf-8");
|
||||
} catch (error) {
|
||||
throw new GuideStoreError(
|
||||
"guide-export-failed",
|
||||
error instanceof Error ? error.message : "Guide export failed.",
|
||||
true,
|
||||
);
|
||||
}
|
||||
return { path: exportPath, session };
|
||||
}
|
||||
|
||||
async writeSession(session: GuideSession): Promise<void> {
|
||||
const normalized = normalizeGuideSession(session);
|
||||
if (!normalized) {
|
||||
throw new GuideStoreError("guide-invalid-schema", "Guide session schema is invalid.");
|
||||
}
|
||||
this.assertGuidePathIsAllowed(normalized.guidePath);
|
||||
this.assertGuidePathIsAllowed(normalized.outputDir);
|
||||
await fs.mkdir(path.dirname(normalized.guidePath), { recursive: true });
|
||||
await fs.mkdir(normalized.outputDir, { recursive: true });
|
||||
await atomicWriteJson(normalized.guidePath, normalized);
|
||||
}
|
||||
|
||||
private async readSessionAtPath(guidePath: string): Promise<GuideSession> {
|
||||
this.assertGuidePathIsAllowed(guidePath);
|
||||
try {
|
||||
const content = await fs.readFile(guidePath, "utf-8");
|
||||
const session = normalizeGuideSession(JSON.parse(content));
|
||||
if (!session) {
|
||||
throw new GuideStoreError("guide-invalid-schema", "Guide session schema is invalid.");
|
||||
}
|
||||
return session;
|
||||
} catch (error) {
|
||||
if (error instanceof GuideStoreError) {
|
||||
throw error;
|
||||
}
|
||||
const nodeError = error as NodeJS.ErrnoException;
|
||||
if (nodeError.code === "ENOENT") {
|
||||
throw new GuideStoreError("guide-session-not-found", "Guide session was not found.");
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
private requireGuidePaths(
|
||||
recordingIdInput: AddGuideMarkerInput["recordingId"],
|
||||
videoPath?: string | null,
|
||||
): GuidePaths {
|
||||
const paths = resolveGuidePaths({
|
||||
recordingsDir: this.recordingsDir,
|
||||
recordingId: recordingIdInput,
|
||||
videoPath,
|
||||
});
|
||||
if (!paths) {
|
||||
throw new GuideStoreError("guide-invalid-input", "Guide recordingId is invalid.");
|
||||
}
|
||||
this.assertGuidePathIsAllowed(paths.guidePath);
|
||||
this.assertGuidePathIsAllowed(paths.outputDir);
|
||||
return paths;
|
||||
}
|
||||
|
||||
private assertGuidePathIsAllowed(targetPath: string): void {
|
||||
if (this.isPathAllowed(targetPath)) {
|
||||
return;
|
||||
}
|
||||
|
||||
throw new GuideStoreError(
|
||||
"guide-invalid-input",
|
||||
"Guide artifacts must be stored inside the recordings directory.",
|
||||
);
|
||||
}
|
||||
|
||||
private async resolveCursorPath(
|
||||
videoPath: string,
|
||||
explicitCursorPath?: string,
|
||||
): Promise<string | undefined> {
|
||||
const candidates = [
|
||||
normalizeOptionalString(explicitCursorPath),
|
||||
`${videoPath}.cursor.json`,
|
||||
].filter((candidate): candidate is string => Boolean(candidate));
|
||||
|
||||
for (const candidate of candidates) {
|
||||
const resolvedCandidate = path.resolve(candidate);
|
||||
if (!this.isPathAllowed(resolvedCandidate)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
await fs.access(resolvedCandidate);
|
||||
return resolvedCandidate;
|
||||
} catch {
|
||||
// Cursor telemetry is optional for guide sessions.
|
||||
}
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
private async readCursorGuideEvents(
|
||||
recordingId: string,
|
||||
cursorPath: string,
|
||||
): Promise<GuideEvent[]> {
|
||||
try {
|
||||
const content = await fs.readFile(cursorPath, "utf-8");
|
||||
const parsed = JSON.parse(content) as unknown;
|
||||
const rawSamples =
|
||||
isRecord(parsed) && Array.isArray(parsed.samples) ? parsed.samples : parsed;
|
||||
const samples = Array.isArray(rawSamples)
|
||||
? rawSamples
|
||||
.map(normalizeCursorSampleForGuide)
|
||||
.filter((sample): sample is CursorRecordingSample => sample !== null)
|
||||
: [];
|
||||
return buildGuideEventsFromCursor({ recordingId, samples });
|
||||
} catch (error) {
|
||||
console.warn("Failed to read cursor telemetry for guide events:", error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
private isPathAllowed(targetPath: string): boolean {
|
||||
const resolvedTarget = path.resolve(targetPath);
|
||||
const resolvedRecordingsDir = path.resolve(this.recordingsDir);
|
||||
const relative = path.relative(resolvedRecordingsDir, resolvedTarget);
|
||||
return relative === "" || (!relative.startsWith("..") && !path.isAbsolute(relative));
|
||||
}
|
||||
}
|
||||
|
||||
function touchSession(session: GuideSession): GuideSession {
|
||||
return {
|
||||
...session,
|
||||
updatedAt: new Date().toISOString(),
|
||||
};
|
||||
}
|
||||
|
||||
function sortGuideEvents(events: GuideEvent[]): GuideEvent[] {
|
||||
return [...events].sort((left, right) => left.timeMs - right.timeMs);
|
||||
}
|
||||
|
||||
function normalizeCursorSampleForGuide(input: unknown): CursorRecordingSample | null {
|
||||
if (!isRecord(input)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const interactionType =
|
||||
input.interactionType === "click" ||
|
||||
input.interactionType === "mouseup" ||
|
||||
input.interactionType === "move"
|
||||
? input.interactionType
|
||||
: "move";
|
||||
const timeMs = normalizeNonNegativeNumber(input.timeMs);
|
||||
const cx = normalizeOptionalNumber(input.cx);
|
||||
const cy = normalizeOptionalNumber(input.cy);
|
||||
if (timeMs === null || cx === undefined || cy === undefined) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
timeMs,
|
||||
cx,
|
||||
cy,
|
||||
interactionType,
|
||||
};
|
||||
}
|
||||
|
||||
async function atomicWriteJson(filePath: string, value: unknown): Promise<void> {
|
||||
const tempPath = `${filePath}.${process.pid}.${Date.now()}.tmp`;
|
||||
await fs.writeFile(tempPath, JSON.stringify(value, null, 2), "utf-8");
|
||||
await fs.rename(tempPath, filePath);
|
||||
}
|
||||
|
||||
function normalizeGuideSession(input: unknown): GuideSession | null {
|
||||
if (!isRecord(input) || input.schemaVersion !== GUIDE_SCHEMA_VERSION) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const recordingId = normalizeString(input.recordingId);
|
||||
const videoPath = normalizeString(input.videoPath);
|
||||
const guidePath = normalizeString(input.guidePath);
|
||||
const outputDir = normalizeString(input.outputDir);
|
||||
const status = normalizeSessionStatus(input.status);
|
||||
const createdAt = normalizeString(input.createdAt);
|
||||
const updatedAt = normalizeString(input.updatedAt);
|
||||
if (
|
||||
!recordingId ||
|
||||
videoPath === null ||
|
||||
!guidePath ||
|
||||
!outputDir ||
|
||||
!status ||
|
||||
!createdAt ||
|
||||
!updatedAt
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const generatedGuide =
|
||||
input.generatedGuide === undefined ? undefined : normalizeGeneratedGuide(input.generatedGuide);
|
||||
if (generatedGuide === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
schemaVersion: GUIDE_SCHEMA_VERSION,
|
||||
recordingId,
|
||||
videoPath,
|
||||
cursorPath: normalizeOptionalString(input.cursorPath),
|
||||
guidePath,
|
||||
outputDir,
|
||||
status,
|
||||
events: normalizeArray(input.events, normalizeGuideEvent),
|
||||
snapshots: normalizeArray(input.snapshots, normalizeGuideSnapshot),
|
||||
ocrBlocks: normalizeArray(input.ocrBlocks, normalizeOcrBlock),
|
||||
candidates: normalizeArray(input.candidates, normalizeGuideStepCandidate),
|
||||
generatedGuide,
|
||||
createdAt,
|
||||
updatedAt,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeGuideEvent(input: unknown): GuideEvent | null {
|
||||
if (!isRecord(input)) {
|
||||
return null;
|
||||
}
|
||||
const id = normalizeString(input.id);
|
||||
const recordingId = normalizeString(input.recordingId);
|
||||
const kind = VALID_EVENT_KINDS.has(input.kind as GuideEventKind)
|
||||
? (input.kind as GuideEventKind)
|
||||
: null;
|
||||
const source = VALID_EVENT_SOURCES.has(input.source as GuideEventSource)
|
||||
? (input.source as GuideEventSource)
|
||||
: null;
|
||||
const timeMs = normalizeNonNegativeNumber(input.timeMs);
|
||||
const createdAt = normalizeString(input.createdAt);
|
||||
if (!id || !recordingId || !kind || !source || timeMs === null || !createdAt) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
id,
|
||||
recordingId,
|
||||
kind,
|
||||
source,
|
||||
timeMs,
|
||||
x: normalizeOptionalNumber(input.x),
|
||||
y: normalizeOptionalNumber(input.y),
|
||||
normalizedX: normalizeOptionalNumber(input.normalizedX),
|
||||
normalizedY: normalizeOptionalNumber(input.normalizedY),
|
||||
button:
|
||||
input.button === "left" ||
|
||||
input.button === "right" ||
|
||||
input.button === "middle" ||
|
||||
input.button === "unknown"
|
||||
? input.button
|
||||
: undefined,
|
||||
label: normalizeOptionalString(input.label),
|
||||
screenshotOffsetMs: normalizeOptionalNumber(input.screenshotOffsetMs),
|
||||
createdAt,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeGuideSnapshot(input: unknown): GuideSnapshot | null {
|
||||
if (!isRecord(input)) {
|
||||
return null;
|
||||
}
|
||||
const id = normalizeString(input.id);
|
||||
const eventId = normalizeString(input.eventId);
|
||||
const pathValue = normalizeString(input.path);
|
||||
const timeMs = normalizeNonNegativeNumber(input.timeMs);
|
||||
const offsetMs = normalizeOptionalNumber(input.offsetMs);
|
||||
const width = normalizePositiveInteger(input.width);
|
||||
const height = normalizePositiveInteger(input.height);
|
||||
if (
|
||||
!id ||
|
||||
!eventId ||
|
||||
!pathValue ||
|
||||
timeMs === null ||
|
||||
offsetMs === undefined ||
|
||||
width === null ||
|
||||
height === null
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
return { id, eventId, timeMs, offsetMs, path: pathValue, width, height };
|
||||
}
|
||||
|
||||
function normalizeOcrBlock(input: unknown): OcrBlock | null {
|
||||
if (!isRecord(input) || !isRecord(input.box)) {
|
||||
return null;
|
||||
}
|
||||
const id = normalizeString(input.id);
|
||||
const snapshotId = normalizeString(input.snapshotId);
|
||||
const text = normalizeString(input.text);
|
||||
const confidence = normalizeOptionalNumber(input.confidence);
|
||||
const x = normalizeOptionalNumber(input.box.x);
|
||||
const y = normalizeOptionalNumber(input.box.y);
|
||||
const width = normalizeOptionalNumber(input.box.width);
|
||||
const height = normalizeOptionalNumber(input.box.height);
|
||||
if (
|
||||
!id ||
|
||||
!snapshotId ||
|
||||
text === null ||
|
||||
confidence === undefined ||
|
||||
x === undefined ||
|
||||
y === undefined ||
|
||||
width === undefined ||
|
||||
height === undefined
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
return { id, snapshotId, text, confidence, box: { x, y, width, height } };
|
||||
}
|
||||
|
||||
function normalizeGuideStepCandidate(input: unknown): GuideStepCandidate | null {
|
||||
if (!isRecord(input)) {
|
||||
return null;
|
||||
}
|
||||
const id = normalizeString(input.id);
|
||||
const eventId = normalizeString(input.eventId);
|
||||
const timeMs = normalizeNonNegativeNumber(input.timeMs);
|
||||
const confidence = normalizeOptionalNumber(input.confidence);
|
||||
const nearbyText = Array.isArray(input.nearbyText)
|
||||
? input.nearbyText.map(normalizeString).filter((text): text is string => text !== null)
|
||||
: [];
|
||||
if (!id || !eventId || timeMs === null || confidence === undefined) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
id,
|
||||
eventId,
|
||||
snapshotId: normalizeOptionalString(input.snapshotId),
|
||||
timeMs,
|
||||
action:
|
||||
input.action === "click" ||
|
||||
input.action === "choose" ||
|
||||
input.action === "type" ||
|
||||
input.action === "wait" ||
|
||||
input.action === "manual"
|
||||
? input.action
|
||||
: "manual",
|
||||
targetText: normalizeOptionalString(input.targetText),
|
||||
targetRole:
|
||||
input.targetRole === "button" ||
|
||||
input.targetRole === "menu" ||
|
||||
input.targetRole === "tab" ||
|
||||
input.targetRole === "field" ||
|
||||
input.targetRole === "link" ||
|
||||
input.targetRole === "unknown"
|
||||
? input.targetRole
|
||||
: undefined,
|
||||
nearbyText,
|
||||
confidence,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeGeneratedGuide(input: unknown): GeneratedGuide | null {
|
||||
if (!isRecord(input)) {
|
||||
return null;
|
||||
}
|
||||
const title = normalizeString(input.title);
|
||||
if (!title || !Array.isArray(input.steps)) {
|
||||
return null;
|
||||
}
|
||||
const steps = input.steps
|
||||
.map((step): GeneratedGuideStep | null => {
|
||||
if (!isRecord(step)) {
|
||||
return null;
|
||||
}
|
||||
const id = normalizeString(step.id);
|
||||
const order = normalizePositiveInteger(step.order);
|
||||
const stepTitle = normalizeString(step.title);
|
||||
const instruction = normalizeString(step.instruction);
|
||||
if (!id || order === null || !stepTitle || !instruction) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
id,
|
||||
order,
|
||||
title: stepTitle,
|
||||
instruction,
|
||||
screenshotPath: normalizeOptionalString(step.screenshotPath),
|
||||
sourceCandidateId: normalizeOptionalString(step.sourceCandidateId),
|
||||
};
|
||||
})
|
||||
.filter((step): step is GeneratedGuide["steps"][number] => step !== null);
|
||||
return {
|
||||
title,
|
||||
summary: normalizeOptionalString(input.summary),
|
||||
steps,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeArray<T>(input: unknown, normalize: (value: unknown) => T | null): T[] {
|
||||
return Array.isArray(input)
|
||||
? input.map((value) => normalize(value)).filter((value): value is T => value !== null)
|
||||
: [];
|
||||
}
|
||||
|
||||
function normalizeSessionStatus(value: unknown): GuideSessionStatus | null {
|
||||
return VALID_SESSION_STATUSES.has(value as GuideSessionStatus)
|
||||
? (value as GuideSessionStatus)
|
||||
: null;
|
||||
}
|
||||
|
||||
function normalizeString(value: unknown): string | null {
|
||||
return typeof value === "string" ? value : null;
|
||||
}
|
||||
|
||||
function normalizeOptionalString(value: unknown): string | undefined {
|
||||
const text = normalizeString(value);
|
||||
return text === null || text.length === 0 ? undefined : text;
|
||||
}
|
||||
|
||||
function normalizeNonNegativeNumber(value: unknown): number | null {
|
||||
return typeof value === "number" && Number.isFinite(value) && value >= 0 ? value : null;
|
||||
}
|
||||
|
||||
function normalizeOptionalNumber(value: unknown): number | undefined {
|
||||
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
|
||||
}
|
||||
|
||||
function normalizePositiveInteger(value: unknown): number | null {
|
||||
return typeof value === "number" && Number.isFinite(value) && value > 0
|
||||
? Math.round(value)
|
||||
: null;
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null;
|
||||
}
|
||||
@@ -0,0 +1,232 @@
|
||||
import { type ChildProcessWithoutNullStreams, spawn } from "node:child_process";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { app } from "electron";
|
||||
|
||||
const DEFAULT_OCR_BASE_URL = "http://127.0.0.1:8866";
|
||||
const DEFAULT_OCR_PORT = "8866";
|
||||
const SERVICE_EXE_NAME = "openscreen-ocr-service.exe";
|
||||
const HEALTH_TIMEOUT_MS = 1000;
|
||||
const STARTUP_TIMEOUT_MS = 90000;
|
||||
const PADDLEX_MODEL_NAMES = ["PP-OCRv5_mobile_det", "latin_PP-OCRv5_mobile_rec"];
|
||||
|
||||
let ocrProcess: ChildProcessWithoutNullStreams | null = null;
|
||||
let startupPromise: Promise<void> | null = null;
|
||||
let quitHookRegistered = false;
|
||||
|
||||
export async function ensureBundledOcrServiceRunning(
|
||||
baseUrl = DEFAULT_OCR_BASE_URL,
|
||||
): Promise<void> {
|
||||
if (!shouldManageOcrService(baseUrl)) {
|
||||
return;
|
||||
}
|
||||
if (await isOcrServiceHealthy(baseUrl, HEALTH_TIMEOUT_MS)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const executablePath = await findBundledOcrServiceExecutable();
|
||||
if (!executablePath) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!startupPromise) {
|
||||
startupPromise = startAndWaitForOcrService(executablePath, baseUrl).finally(() => {
|
||||
startupPromise = null;
|
||||
});
|
||||
}
|
||||
await startupPromise;
|
||||
}
|
||||
|
||||
function shouldManageOcrService(baseUrl: string): boolean {
|
||||
try {
|
||||
const url = new URL(baseUrl);
|
||||
const hostname = url.hostname.toLowerCase();
|
||||
return (
|
||||
(url.protocol === "http:" || url.protocol === "https:") &&
|
||||
(hostname === "127.0.0.1" || hostname === "localhost") &&
|
||||
(url.port === "" || url.port === DEFAULT_OCR_PORT)
|
||||
);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function findBundledOcrServiceExecutable(): Promise<string | null> {
|
||||
const candidates = [
|
||||
process.env.OPENSCREEN_GUIDE_OCR_EXE,
|
||||
path.join(process.resourcesPath, "ocr-service", SERVICE_EXE_NAME),
|
||||
path.join(process.resourcesPath, "ocr-service", "openscreen-ocr-service", SERVICE_EXE_NAME),
|
||||
path.resolve(process.cwd(), "tools", "ocr", "dist", "openscreen-ocr-service", SERVICE_EXE_NAME),
|
||||
].filter(
|
||||
(candidate): candidate is string => typeof candidate === "string" && candidate.length > 0,
|
||||
);
|
||||
|
||||
for (const candidate of candidates) {
|
||||
try {
|
||||
const stats = await fs.stat(candidate);
|
||||
if (stats.isFile()) {
|
||||
return candidate;
|
||||
}
|
||||
} catch {
|
||||
// Try the next candidate.
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function startAndWaitForOcrService(executablePath: string, baseUrl: string): Promise<void> {
|
||||
const runtimePaths = await prepareOcrRuntimePaths();
|
||||
if (!ocrProcess || ocrProcess.exitCode !== null || ocrProcess.killed) {
|
||||
startOcrServiceProcess(executablePath, runtimePaths);
|
||||
}
|
||||
await waitForOcrServiceHealth(baseUrl, STARTUP_TIMEOUT_MS);
|
||||
}
|
||||
|
||||
async function prepareOcrRuntimePaths(): Promise<{
|
||||
modelCachePath: string;
|
||||
paddlexCachePath: string;
|
||||
}> {
|
||||
const modelCachePath = path.join(app.getPath("userData"), "ocr-models");
|
||||
const paddlexCachePath = path.join(modelCachePath, "paddlex");
|
||||
await seedBundledPaddlexModels(paddlexCachePath);
|
||||
return { modelCachePath, paddlexCachePath };
|
||||
}
|
||||
|
||||
async function seedBundledPaddlexModels(destinationCachePath: string): Promise<void> {
|
||||
const sourceCachePath = await findBundledPaddlexModelCache();
|
||||
if (!sourceCachePath) {
|
||||
return;
|
||||
}
|
||||
|
||||
const sourceOfficialModels = path.join(sourceCachePath, "official_models");
|
||||
const destinationOfficialModels = path.join(destinationCachePath, "official_models");
|
||||
await fs.mkdir(destinationOfficialModels, { recursive: true });
|
||||
|
||||
for (const modelName of PADDLEX_MODEL_NAMES) {
|
||||
const sourceModelPath = path.join(sourceOfficialModels, modelName);
|
||||
const destinationModelPath = path.join(destinationOfficialModels, modelName);
|
||||
if (!(await pathExists(sourceModelPath)) || (await pathExists(destinationModelPath))) {
|
||||
continue;
|
||||
}
|
||||
await fs.cp(sourceModelPath, destinationModelPath, {
|
||||
recursive: true,
|
||||
errorOnExist: false,
|
||||
force: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function findBundledPaddlexModelCache(): Promise<string | null> {
|
||||
const candidates = [
|
||||
path.join(process.resourcesPath, "ocr-models", "paddlex"),
|
||||
path.resolve(process.cwd(), "tools", "ocr", "models", "paddlex"),
|
||||
];
|
||||
for (const candidate of candidates) {
|
||||
try {
|
||||
const stats = await fs.stat(candidate);
|
||||
if (stats.isDirectory()) {
|
||||
return candidate;
|
||||
}
|
||||
} catch {
|
||||
// Try the next candidate.
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function pathExists(value: string): Promise<boolean> {
|
||||
try {
|
||||
await fs.access(value);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function startOcrServiceProcess(
|
||||
executablePath: string,
|
||||
runtimePaths: { modelCachePath: string; paddlexCachePath: string },
|
||||
): void {
|
||||
registerQuitHook();
|
||||
ocrProcess = spawn(executablePath, [], {
|
||||
cwd: path.dirname(executablePath),
|
||||
env: {
|
||||
...process.env,
|
||||
OPENSCREEN_OCR_HOST: "127.0.0.1",
|
||||
OPENSCREEN_OCR_PORT: DEFAULT_OCR_PORT,
|
||||
PADDLEOCR_DEVICE: process.env.PADDLEOCR_DEVICE ?? "cpu",
|
||||
PADDLEOCR_ENABLE_MKLDNN: process.env.PADDLEOCR_ENABLE_MKLDNN ?? "0",
|
||||
PADDLEOCR_LANG: process.env.PADDLEOCR_LANG ?? "latin",
|
||||
PADDLEOCR_USE_MOBILE: process.env.PADDLEOCR_USE_MOBILE ?? "1",
|
||||
PADDLE_PDX_ENABLE_MKLDNN_BYDEFAULT: process.env.PADDLE_PDX_ENABLE_MKLDNN_BYDEFAULT ?? "False",
|
||||
PADDLE_PDX_CACHE_HOME: process.env.PADDLE_PDX_CACHE_HOME ?? runtimePaths.paddlexCachePath,
|
||||
PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK:
|
||||
process.env.PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK ?? "True",
|
||||
PADDLE_HOME: process.env.PADDLE_HOME ?? path.join(runtimePaths.modelCachePath, "paddle"),
|
||||
PADDLEOCR_HOME:
|
||||
process.env.PADDLEOCR_HOME ?? path.join(runtimePaths.modelCachePath, "paddleocr"),
|
||||
PYTHONUTF8: "1",
|
||||
},
|
||||
windowsHide: true,
|
||||
});
|
||||
|
||||
ocrProcess.stdout.on("data", (chunk) => {
|
||||
console.info(`[guide-ocr-service] ${chunk.toString().trim()}`);
|
||||
});
|
||||
ocrProcess.stderr.on("data", (chunk) => {
|
||||
console.warn(`[guide-ocr-service] ${chunk.toString().trim()}`);
|
||||
});
|
||||
ocrProcess.on("exit", (code, signal) => {
|
||||
console.info("[guide-ocr-service] exited", { code, signal });
|
||||
ocrProcess = null;
|
||||
});
|
||||
}
|
||||
|
||||
function registerQuitHook(): void {
|
||||
if (quitHookRegistered) {
|
||||
return;
|
||||
}
|
||||
quitHookRegistered = true;
|
||||
app.once("before-quit", () => {
|
||||
const processToStop = ocrProcess;
|
||||
ocrProcess = null;
|
||||
processToStop?.kill();
|
||||
});
|
||||
}
|
||||
|
||||
async function waitForOcrServiceHealth(baseUrl: string, timeoutMs: number): Promise<void> {
|
||||
const startedAt = Date.now();
|
||||
let lastError: unknown;
|
||||
while (Date.now() - startedAt < timeoutMs) {
|
||||
if (await isOcrServiceHealthy(baseUrl, HEALTH_TIMEOUT_MS)) {
|
||||
return;
|
||||
}
|
||||
if (ocrProcess?.exitCode !== null && ocrProcess?.exitCode !== undefined) {
|
||||
throw new Error(`Bundled OCR service exited with code ${ocrProcess.exitCode}.`);
|
||||
}
|
||||
await sleep(750);
|
||||
}
|
||||
if (lastError instanceof Error) {
|
||||
throw lastError;
|
||||
}
|
||||
throw new Error("Timed out waiting for bundled OCR service to start.");
|
||||
}
|
||||
|
||||
async function isOcrServiceHealthy(baseUrl: string, timeoutMs: number): Promise<boolean> {
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
||||
try {
|
||||
const response = await fetch(`${baseUrl.replace(/\/$/, "")}/health`, {
|
||||
signal: controller.signal,
|
||||
});
|
||||
return response.ok;
|
||||
} catch {
|
||||
return false;
|
||||
} finally {
|
||||
clearTimeout(timeoutId);
|
||||
}
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { OcrBlock } from "../../../src/guide/contracts";
|
||||
import { remapFocusedOcrBlocks } from "./focusedOcrSnapshot";
|
||||
|
||||
describe("remapFocusedOcrBlocks", () => {
|
||||
it("maps boxes from a focused crop back to the original snapshot coordinates", () => {
|
||||
const blocks: OcrBlock[] = [
|
||||
{
|
||||
id: "ocr-1",
|
||||
snapshotId: "snapshot-1",
|
||||
text: "Settings",
|
||||
confidence: 0.9,
|
||||
box: { x: 0.25, y: 0.5, width: 0.2, height: 0.1 },
|
||||
},
|
||||
];
|
||||
|
||||
const remapped = remapFocusedOcrBlocks(blocks, {
|
||||
cropX: 320,
|
||||
cropY: 180,
|
||||
cropWidth: 640,
|
||||
cropHeight: 360,
|
||||
originalWidth: 1280,
|
||||
originalHeight: 720,
|
||||
});
|
||||
|
||||
expect(remapped[0]?.box).toEqual({
|
||||
x: 0.375,
|
||||
y: 0.5,
|
||||
width: 0.1,
|
||||
height: 0.05,
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,225 @@
|
||||
import { execFile } from "node:child_process";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { promisify } from "node:util";
|
||||
import type { GuideEvent, GuideSnapshot, OcrBlock } from "../../../src/guide/contracts";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
interface FocusTransform {
|
||||
cropX: number;
|
||||
cropY: number;
|
||||
cropWidth: number;
|
||||
cropHeight: number;
|
||||
originalWidth: number;
|
||||
originalHeight: number;
|
||||
}
|
||||
|
||||
export interface FocusedOcrSnapshot {
|
||||
snapshot: GuideSnapshot;
|
||||
transform?: FocusTransform;
|
||||
}
|
||||
|
||||
export async function createFocusedOcrSnapshot(input: {
|
||||
snapshot: GuideSnapshot;
|
||||
event?: GuideEvent;
|
||||
outputDir: string;
|
||||
}): Promise<FocusedOcrSnapshot> {
|
||||
if (process.platform !== "win32") {
|
||||
return { snapshot: input.snapshot };
|
||||
}
|
||||
|
||||
const click = getEventPoint(input.event, input.snapshot);
|
||||
if (!click) {
|
||||
return { snapshot: input.snapshot };
|
||||
}
|
||||
|
||||
const crop = calculateFocusCrop(input.snapshot, click);
|
||||
if (
|
||||
!crop ||
|
||||
(crop.cropWidth === input.snapshot.width && crop.cropHeight === input.snapshot.height)
|
||||
) {
|
||||
return { snapshot: input.snapshot };
|
||||
}
|
||||
|
||||
const focusDir = path.join(input.outputDir, "ocr-focus");
|
||||
await fs.mkdir(focusDir, { recursive: true });
|
||||
const focusPath = path.join(focusDir, `${path.parse(input.snapshot.path).name}-focus.png`);
|
||||
const zoom = 2;
|
||||
const focusedSnapshot: GuideSnapshot = {
|
||||
...input.snapshot,
|
||||
path: focusPath,
|
||||
width: crop.cropWidth * zoom,
|
||||
height: crop.cropHeight * zoom,
|
||||
};
|
||||
|
||||
try {
|
||||
await writeFocusedPng({
|
||||
sourcePath: input.snapshot.path,
|
||||
outputPath: focusPath,
|
||||
cropX: crop.cropX,
|
||||
cropY: crop.cropY,
|
||||
cropWidth: crop.cropWidth,
|
||||
cropHeight: crop.cropHeight,
|
||||
outputWidth: focusedSnapshot.width,
|
||||
outputHeight: focusedSnapshot.height,
|
||||
});
|
||||
return { snapshot: focusedSnapshot, transform: crop };
|
||||
} catch {
|
||||
return { snapshot: input.snapshot };
|
||||
}
|
||||
}
|
||||
|
||||
export function remapFocusedOcrBlocks(
|
||||
blocks: OcrBlock[],
|
||||
transform: FocusedOcrSnapshot["transform"],
|
||||
): OcrBlock[] {
|
||||
if (!transform) {
|
||||
return blocks;
|
||||
}
|
||||
|
||||
return blocks.map((block) => ({
|
||||
...block,
|
||||
box: {
|
||||
x: clamp01((transform.cropX + block.box.x * transform.cropWidth) / transform.originalWidth),
|
||||
y: clamp01((transform.cropY + block.box.y * transform.cropHeight) / transform.originalHeight),
|
||||
width: clamp01((block.box.width * transform.cropWidth) / transform.originalWidth),
|
||||
height: clamp01((block.box.height * transform.cropHeight) / transform.originalHeight),
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
function getEventPoint(
|
||||
event: GuideEvent | undefined,
|
||||
snapshot: GuideSnapshot,
|
||||
): { x: number; y: number } | null {
|
||||
if (!event) {
|
||||
return null;
|
||||
}
|
||||
if (isNormalizedNumber(event.normalizedX) && isNormalizedNumber(event.normalizedY)) {
|
||||
return { x: event.normalizedX, y: event.normalizedY };
|
||||
}
|
||||
if (isNormalizedNumber(event.x) && isNormalizedNumber(event.y)) {
|
||||
return { x: event.x, y: event.y };
|
||||
}
|
||||
if (
|
||||
typeof event.x === "number" &&
|
||||
typeof event.y === "number" &&
|
||||
event.x >= 0 &&
|
||||
event.y >= 0 &&
|
||||
event.x <= snapshot.width &&
|
||||
event.y <= snapshot.height
|
||||
) {
|
||||
return { x: clamp01(event.x / snapshot.width), y: clamp01(event.y / snapshot.height) };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function calculateFocusCrop(
|
||||
snapshot: GuideSnapshot,
|
||||
click: { x: number; y: number },
|
||||
): FocusTransform | null {
|
||||
if (snapshot.width <= 0 || snapshot.height <= 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const cropWidth = clampInteger(
|
||||
Math.round(snapshot.width * 0.42),
|
||||
Math.min(360, snapshot.width),
|
||||
Math.min(720, snapshot.width),
|
||||
);
|
||||
const cropHeight = clampInteger(
|
||||
Math.round(snapshot.height * 0.42),
|
||||
Math.min(240, snapshot.height),
|
||||
Math.min(520, snapshot.height),
|
||||
);
|
||||
const clickX = Math.round(clamp01(click.x) * snapshot.width);
|
||||
const clickY = Math.round(clamp01(click.y) * snapshot.height);
|
||||
return {
|
||||
cropX: clampInteger(Math.round(clickX - cropWidth / 2), 0, snapshot.width - cropWidth),
|
||||
cropY: clampInteger(Math.round(clickY - cropHeight / 2), 0, snapshot.height - cropHeight),
|
||||
cropWidth,
|
||||
cropHeight,
|
||||
originalWidth: snapshot.width,
|
||||
originalHeight: snapshot.height,
|
||||
};
|
||||
}
|
||||
|
||||
async function writeFocusedPng(input: {
|
||||
sourcePath: string;
|
||||
outputPath: string;
|
||||
cropX: number;
|
||||
cropY: number;
|
||||
cropWidth: number;
|
||||
cropHeight: number;
|
||||
outputWidth: number;
|
||||
outputHeight: number;
|
||||
}): Promise<void> {
|
||||
const script = buildCropScript(input);
|
||||
const encodedCommand = Buffer.from(script, "utf16le").toString("base64");
|
||||
await execFileAsync(
|
||||
"powershell.exe",
|
||||
["-NoProfile", "-ExecutionPolicy", "Bypass", "-EncodedCommand", encodedCommand],
|
||||
{
|
||||
timeout: 30000,
|
||||
maxBuffer: 1024 * 1024,
|
||||
windowsHide: true,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
function buildCropScript(input: {
|
||||
sourcePath: string;
|
||||
outputPath: string;
|
||||
cropX: number;
|
||||
cropY: number;
|
||||
cropWidth: number;
|
||||
cropHeight: number;
|
||||
outputWidth: number;
|
||||
outputHeight: number;
|
||||
}): string {
|
||||
const sourcePathBase64 = Buffer.from(input.sourcePath, "utf8").toString("base64");
|
||||
const outputPathBase64 = Buffer.from(input.outputPath, "utf8").toString("base64");
|
||||
return `
|
||||
$ErrorActionPreference = "Stop"
|
||||
$sourcePath = [System.Text.Encoding]::UTF8.GetString([Convert]::FromBase64String("${sourcePathBase64}"))
|
||||
$outputPath = [System.Text.Encoding]::UTF8.GetString([Convert]::FromBase64String("${outputPathBase64}"))
|
||||
Add-Type -AssemblyName System.Drawing
|
||||
|
||||
$source = [System.Drawing.Image]::FromFile($sourcePath)
|
||||
$target = [System.Drawing.Bitmap]::new(${input.outputWidth}, ${input.outputHeight})
|
||||
$graphics = [System.Drawing.Graphics]::FromImage($target)
|
||||
try {
|
||||
$graphics.Clear([System.Drawing.Color]::White)
|
||||
$graphics.InterpolationMode = [System.Drawing.Drawing2D.InterpolationMode]::HighQualityBicubic
|
||||
$graphics.SmoothingMode = [System.Drawing.Drawing2D.SmoothingMode]::HighQuality
|
||||
$graphics.PixelOffsetMode = [System.Drawing.Drawing2D.PixelOffsetMode]::HighQuality
|
||||
$sourceRect = [System.Drawing.Rectangle]::new(${input.cropX}, ${input.cropY}, ${input.cropWidth}, ${input.cropHeight})
|
||||
$targetRect = [System.Drawing.Rectangle]::new(0, 0, ${input.outputWidth}, ${input.outputHeight})
|
||||
$graphics.DrawImage($source, $targetRect, $sourceRect, [System.Drawing.GraphicsUnit]::Pixel)
|
||||
$target.Save($outputPath, [System.Drawing.Imaging.ImageFormat]::Png)
|
||||
} finally {
|
||||
$graphics.Dispose()
|
||||
$target.Dispose()
|
||||
$source.Dispose()
|
||||
}
|
||||
`;
|
||||
}
|
||||
|
||||
function isNormalizedNumber(value: unknown): value is number {
|
||||
return typeof value === "number" && Number.isFinite(value) && value >= 0 && value <= 1;
|
||||
}
|
||||
|
||||
function clampInteger(value: number, min: number, max: number): number {
|
||||
if (max < min) {
|
||||
return min;
|
||||
}
|
||||
return Math.round(Math.min(max, Math.max(min, value)));
|
||||
}
|
||||
|
||||
function clamp01(value: number): number {
|
||||
if (!Number.isFinite(value)) {
|
||||
return 0;
|
||||
}
|
||||
return Math.min(1, Math.max(0, value));
|
||||
}
|
||||
@@ -0,0 +1,110 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { GuideSnapshot, OcrBlock } from "../../../src/guide/contracts";
|
||||
import {
|
||||
DefaultGuideOcrClient,
|
||||
normalizeOcrResponse,
|
||||
parseWindowsOcrPayload,
|
||||
} from "./paddleOcrClient";
|
||||
|
||||
const snapshot: GuideSnapshot = {
|
||||
id: "snapshot-1",
|
||||
eventId: "event-1",
|
||||
timeMs: 1000,
|
||||
offsetMs: 500,
|
||||
path: "/tmp/step-001.png",
|
||||
width: 1000,
|
||||
height: 800,
|
||||
};
|
||||
|
||||
describe("normalizeOcrResponse", () => {
|
||||
it("normalizes pixel boxes into guide OCR blocks", () => {
|
||||
const blocks = normalizeOcrResponse(
|
||||
{
|
||||
blocks: [
|
||||
{
|
||||
text: "Save",
|
||||
confidence: 92,
|
||||
box: { x: 400, y: 320, width: 120, height: 40 },
|
||||
},
|
||||
],
|
||||
},
|
||||
snapshot,
|
||||
);
|
||||
|
||||
expect(blocks).toEqual([
|
||||
{
|
||||
id: "ocr-snapshot-1-1",
|
||||
snapshotId: "snapshot-1",
|
||||
text: "Save",
|
||||
confidence: 0.92,
|
||||
box: { x: 0.4, y: 0.4, width: 0.12, height: 0.05 },
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("normalizes polygon responses", () => {
|
||||
const blocks = normalizeOcrResponse(
|
||||
[
|
||||
{
|
||||
text: "Next",
|
||||
score: 0.8,
|
||||
bbox: [
|
||||
[100, 200],
|
||||
[300, 200],
|
||||
[300, 260],
|
||||
[100, 260],
|
||||
],
|
||||
},
|
||||
],
|
||||
snapshot,
|
||||
);
|
||||
|
||||
expect(blocks[0]).toMatchObject({
|
||||
text: "Next",
|
||||
confidence: 0.8,
|
||||
box: { x: 0.1, y: 0.25, width: 0.2, height: 0.075 },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("DefaultGuideOcrClient", () => {
|
||||
it("falls back when the HTTP OCR service is unavailable", async () => {
|
||||
const fallbackBlock: OcrBlock = {
|
||||
id: "ocr-snapshot-1-1",
|
||||
snapshotId: "snapshot-1",
|
||||
text: "Save",
|
||||
confidence: 0.75,
|
||||
box: { x: 0.1, y: 0.2, width: 0.3, height: 0.4 },
|
||||
};
|
||||
const client = new DefaultGuideOcrClient(
|
||||
{
|
||||
recognize: async () => {
|
||||
throw new Error("HTTP down");
|
||||
},
|
||||
},
|
||||
{
|
||||
recognize: async () => [fallbackBlock],
|
||||
},
|
||||
);
|
||||
|
||||
await expect(client.recognize(snapshot)).resolves.toEqual([fallbackBlock]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("parseWindowsOcrPayload", () => {
|
||||
it("recovers from raw control characters in OCR text", () => {
|
||||
const payload = parseWindowsOcrPayload(
|
||||
'{"blocks":[{"text":"Save\u0001now","confidence":0.75,"box":{"x":1,"y":2,"width":3,"height":4}}]}',
|
||||
);
|
||||
|
||||
expect(payload).toEqual({
|
||||
blocks: [
|
||||
{
|
||||
text: "Save now",
|
||||
confidence: 0.75,
|
||||
box: { x: 1, y: 2, width: 3, height: 4 },
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,372 @@
|
||||
import { execFile } from "node:child_process";
|
||||
import fs from "node:fs/promises";
|
||||
import { promisify } from "node:util";
|
||||
import type { GuideSnapshot, OcrBlock } from "../../../src/guide/contracts";
|
||||
import { ensureBundledOcrServiceRunning } from "./bundledOcrService";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
export interface GuideOcrClient {
|
||||
recognize(snapshot: GuideSnapshot): Promise<OcrBlock[]>;
|
||||
}
|
||||
|
||||
interface PaddleOcrResponseBlock {
|
||||
text?: unknown;
|
||||
confidence?: unknown;
|
||||
score?: unknown;
|
||||
box?: unknown;
|
||||
bbox?: unknown;
|
||||
}
|
||||
|
||||
export class PaddleOcrHttpClient implements GuideOcrClient {
|
||||
constructor(
|
||||
private readonly baseUrl = process.env.OPENSCREEN_GUIDE_OCR_URL ?? "http://127.0.0.1:8866",
|
||||
private readonly language = process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE ?? "vi,en",
|
||||
) {}
|
||||
|
||||
async recognize(snapshot: GuideSnapshot): Promise<OcrBlock[]> {
|
||||
await ensureBundledOcrServiceRunning(this.baseUrl);
|
||||
const imageBase64 = await fs.readFile(snapshot.path, "base64");
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetch(`${this.baseUrl.replace(/\/$/, "")}/ocr`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
imageBase64,
|
||||
path: snapshot.path,
|
||||
language: this.language,
|
||||
}),
|
||||
});
|
||||
} catch (error) {
|
||||
throw new Error(
|
||||
`OCR service is unavailable: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`OCR service returned HTTP ${response.status}.`);
|
||||
}
|
||||
|
||||
const payload = (await response.json()) as unknown;
|
||||
return normalizeOcrResponse(payload, snapshot);
|
||||
}
|
||||
}
|
||||
|
||||
export class WindowsOcrClient implements GuideOcrClient {
|
||||
constructor(private readonly language = process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE ?? "vi,en") {}
|
||||
|
||||
async recognize(snapshot: GuideSnapshot): Promise<OcrBlock[]> {
|
||||
if (process.platform !== "win32") {
|
||||
throw new Error("Windows OCR fallback is only available on Windows.");
|
||||
}
|
||||
|
||||
const script = buildWindowsOcrScript(snapshot.path, this.language);
|
||||
const encodedCommand = Buffer.from(script, "utf16le").toString("base64");
|
||||
let stdout: string;
|
||||
try {
|
||||
const result = await execFileAsync(
|
||||
"powershell.exe",
|
||||
["-NoProfile", "-ExecutionPolicy", "Bypass", "-EncodedCommand", encodedCommand],
|
||||
{
|
||||
maxBuffer: 8 * 1024 * 1024,
|
||||
timeout: 30000,
|
||||
windowsHide: true,
|
||||
},
|
||||
);
|
||||
stdout = result.stdout;
|
||||
} catch (error) {
|
||||
throw new Error(
|
||||
`Windows OCR failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
}
|
||||
|
||||
let payload: unknown;
|
||||
try {
|
||||
payload = parseWindowsOcrPayload(stdout);
|
||||
} catch (error) {
|
||||
throw new Error(
|
||||
`Windows OCR returned invalid JSON: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
return normalizeOcrResponse(payload, snapshot);
|
||||
}
|
||||
}
|
||||
|
||||
export class DefaultGuideOcrClient implements GuideOcrClient {
|
||||
constructor(
|
||||
private readonly httpClient = new PaddleOcrHttpClient(),
|
||||
private readonly windowsClient = new WindowsOcrClient(),
|
||||
) {}
|
||||
|
||||
async recognize(snapshot: GuideSnapshot): Promise<OcrBlock[]> {
|
||||
try {
|
||||
return await this.httpClient.recognize(snapshot);
|
||||
} catch (httpError) {
|
||||
try {
|
||||
return await this.windowsClient.recognize(snapshot);
|
||||
} catch (fallbackError) {
|
||||
throw new Error(
|
||||
[
|
||||
httpError instanceof Error ? httpError.message : String(httpError),
|
||||
fallbackError instanceof Error ? fallbackError.message : String(fallbackError),
|
||||
].join(" "),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function parseWindowsOcrPayload(stdout: string): unknown {
|
||||
const normalized = stdout.replace(/^\uFEFF/, "").trim();
|
||||
try {
|
||||
return JSON.parse(normalized);
|
||||
} catch {
|
||||
return JSON.parse(replaceRawJsonControlCharacters(normalized));
|
||||
}
|
||||
}
|
||||
|
||||
function replaceRawJsonControlCharacters(value: string): string {
|
||||
let result = "";
|
||||
for (const character of value) {
|
||||
const code = character.charCodeAt(0);
|
||||
result += code < 32 || code === 127 ? " " : character;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
export function normalizeOcrResponse(payload: unknown, snapshot: GuideSnapshot): OcrBlock[] {
|
||||
const rawBlocks = extractRawBlocks(payload);
|
||||
return rawBlocks
|
||||
.map((raw, index) => normalizeBlock(raw, snapshot, index))
|
||||
.filter((block): block is OcrBlock => block !== null);
|
||||
}
|
||||
|
||||
function extractRawBlocks(payload: unknown): PaddleOcrResponseBlock[] {
|
||||
if (Array.isArray(payload)) {
|
||||
return payload as PaddleOcrResponseBlock[];
|
||||
}
|
||||
if (isRecord(payload)) {
|
||||
if (Array.isArray(payload.blocks)) {
|
||||
return payload.blocks as PaddleOcrResponseBlock[];
|
||||
}
|
||||
if (Array.isArray(payload.results)) {
|
||||
return payload.results as PaddleOcrResponseBlock[];
|
||||
}
|
||||
if (Array.isArray(payload.data)) {
|
||||
return payload.data as PaddleOcrResponseBlock[];
|
||||
}
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
function normalizeBlock(
|
||||
raw: PaddleOcrResponseBlock,
|
||||
snapshot: GuideSnapshot,
|
||||
index: number,
|
||||
): OcrBlock | null {
|
||||
if (!isRecord(raw)) {
|
||||
return null;
|
||||
}
|
||||
const text = typeof raw.text === "string" ? raw.text.trim() : "";
|
||||
if (!text) {
|
||||
return null;
|
||||
}
|
||||
const confidence = normalizeConfidence(raw.confidence ?? raw.score);
|
||||
const box = normalizeBox(raw.box ?? raw.bbox, snapshot);
|
||||
if (!box) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
id: `ocr-${snapshot.id}-${index + 1}`,
|
||||
snapshotId: snapshot.id,
|
||||
text,
|
||||
confidence,
|
||||
box,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeConfidence(value: unknown): number {
|
||||
if (typeof value !== "number" || !Number.isFinite(value)) {
|
||||
return 0.5;
|
||||
}
|
||||
return value > 1 ? clamp01(value / 100) : clamp01(value);
|
||||
}
|
||||
|
||||
function normalizeBox(
|
||||
value: unknown,
|
||||
snapshot: GuideSnapshot,
|
||||
): { x: number; y: number; width: number; height: number } | null {
|
||||
if (Array.isArray(value)) {
|
||||
return normalizeArrayBox(value, snapshot);
|
||||
}
|
||||
if (!isRecord(value)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const x = normalizeNumber(value.x);
|
||||
const y = normalizeNumber(value.y);
|
||||
const width = normalizeNumber(value.width ?? value.w);
|
||||
const height = normalizeNumber(value.height ?? value.h);
|
||||
if (x === null || y === null || width === null || height === null) {
|
||||
return null;
|
||||
}
|
||||
return normalizeBoxDimensions({ x, y, width, height }, snapshot);
|
||||
}
|
||||
|
||||
function normalizeArrayBox(
|
||||
value: unknown[],
|
||||
snapshot: GuideSnapshot,
|
||||
): { x: number; y: number; width: number; height: number } | null {
|
||||
const numbers = value.flat(2).filter((item): item is number => typeof item === "number");
|
||||
if (numbers.length >= 8) {
|
||||
const xs = [numbers[0], numbers[2], numbers[4], numbers[6]];
|
||||
const ys = [numbers[1], numbers[3], numbers[5], numbers[7]];
|
||||
const minX = Math.min(...xs);
|
||||
const maxX = Math.max(...xs);
|
||||
const minY = Math.min(...ys);
|
||||
const maxY = Math.max(...ys);
|
||||
return normalizeBoxDimensions(
|
||||
{ x: minX, y: minY, width: maxX - minX, height: maxY - minY },
|
||||
snapshot,
|
||||
);
|
||||
}
|
||||
if (numbers.length >= 4) {
|
||||
return normalizeBoxDimensions(
|
||||
{ x: numbers[0] ?? 0, y: numbers[1] ?? 0, width: numbers[2] ?? 0, height: numbers[3] ?? 0 },
|
||||
snapshot,
|
||||
);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function normalizeBoxDimensions(
|
||||
box: { x: number; y: number; width: number; height: number },
|
||||
snapshot: GuideSnapshot,
|
||||
): { x: number; y: number; width: number; height: number } {
|
||||
const usesPixels =
|
||||
box.x > 1 ||
|
||||
box.y > 1 ||
|
||||
box.width > 1 ||
|
||||
box.height > 1 ||
|
||||
box.x + box.width > 1 ||
|
||||
box.y + box.height > 1;
|
||||
const scaleX = usesPixels ? snapshot.width : 1;
|
||||
const scaleY = usesPixels ? snapshot.height : 1;
|
||||
return {
|
||||
x: clamp01(box.x / scaleX),
|
||||
y: clamp01(box.y / scaleY),
|
||||
width: clamp01(box.width / scaleX),
|
||||
height: clamp01(box.height / scaleY),
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeNumber(value: unknown): number | null {
|
||||
return typeof value === "number" && Number.isFinite(value) ? value : null;
|
||||
}
|
||||
|
||||
function clamp01(value: number): number {
|
||||
if (!Number.isFinite(value)) {
|
||||
return 0;
|
||||
}
|
||||
return Math.min(1, Math.max(0, value));
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null;
|
||||
}
|
||||
|
||||
function buildWindowsOcrScript(imagePath: string, language: string): string {
|
||||
const imagePathBase64 = Buffer.from(imagePath, "utf8").toString("base64");
|
||||
const languageBase64 = Buffer.from(language, "utf8").toString("base64");
|
||||
return `
|
||||
$ErrorActionPreference = "Stop"
|
||||
[Console]::OutputEncoding = [System.Text.UTF8Encoding]::new($false)
|
||||
$OutputEncoding = [System.Text.UTF8Encoding]::new($false)
|
||||
$imagePath = [System.Text.Encoding]::UTF8.GetString([Convert]::FromBase64String("${imagePathBase64}"))
|
||||
$languageSetting = [System.Text.Encoding]::UTF8.GetString([Convert]::FromBase64String("${languageBase64}"))
|
||||
|
||||
Add-Type -AssemblyName System.Runtime.WindowsRuntime
|
||||
[void][Windows.Storage.StorageFile, Windows.Storage, ContentType=WindowsRuntime]
|
||||
[void][Windows.Storage.FileAccessMode, Windows.Storage, ContentType=WindowsRuntime]
|
||||
[void][Windows.Graphics.Imaging.BitmapDecoder, Windows.Graphics.Imaging, ContentType=WindowsRuntime]
|
||||
[void][Windows.Graphics.Imaging.SoftwareBitmap, Windows.Graphics.Imaging, ContentType=WindowsRuntime]
|
||||
[void][Windows.Media.Ocr.OcrEngine, Windows.Foundation, ContentType=WindowsRuntime]
|
||||
[void][Windows.Globalization.Language, Windows.Globalization, ContentType=WindowsRuntime]
|
||||
|
||||
$asTaskGeneric = ([System.WindowsRuntimeSystemExtensions].GetMethods() | Where-Object {
|
||||
$_.Name -eq "AsTask" -and $_.IsGenericMethodDefinition -and $_.GetParameters().Count -eq 1
|
||||
})[0]
|
||||
|
||||
function Await-WinRt($operation, [Type]$resultType) {
|
||||
$asTask = $asTaskGeneric.MakeGenericMethod($resultType)
|
||||
$task = $asTask.Invoke($null, @($operation))
|
||||
$task.Wait()
|
||||
return $task.Result
|
||||
}
|
||||
|
||||
function New-OcrEngine($languageSetting) {
|
||||
$languageTags = @()
|
||||
foreach ($item in $languageSetting.Split(",")) {
|
||||
$tag = $item.Trim()
|
||||
if ($tag -eq "vi") { $tag = "vi-VN" }
|
||||
if ($tag -eq "en") { $tag = "en-US" }
|
||||
if ($tag.Length -gt 0) { $languageTags += $tag }
|
||||
}
|
||||
|
||||
foreach ($tag in $languageTags) {
|
||||
try {
|
||||
$language = [Windows.Globalization.Language]::new($tag)
|
||||
$engine = [Windows.Media.Ocr.OcrEngine]::TryCreateFromLanguage($language)
|
||||
if ($null -ne $engine) { return $engine }
|
||||
} catch {}
|
||||
}
|
||||
|
||||
$profileEngine = [Windows.Media.Ocr.OcrEngine]::TryCreateFromUserProfileLanguages()
|
||||
if ($null -ne $profileEngine) { return $profileEngine }
|
||||
return [Windows.Media.Ocr.OcrEngine]::TryCreateFromLanguage([Windows.Globalization.Language]::new("en-US"))
|
||||
}
|
||||
|
||||
function Normalize-OcrText($value) {
|
||||
if ($null -eq $value) { return "" }
|
||||
$text = [string]$value
|
||||
$text = [System.Text.RegularExpressions.Regex]::Replace($text, "[\\x00-\\x1F\\x7F]", " ")
|
||||
return $text.Trim()
|
||||
}
|
||||
|
||||
$file = Await-WinRt ([Windows.Storage.StorageFile]::GetFileFromPathAsync($imagePath)) ([Windows.Storage.StorageFile])
|
||||
$stream = Await-WinRt ($file.OpenAsync([Windows.Storage.FileAccessMode]::Read)) ([Windows.Storage.Streams.IRandomAccessStream])
|
||||
$decoder = Await-WinRt ([Windows.Graphics.Imaging.BitmapDecoder]::CreateAsync($stream)) ([Windows.Graphics.Imaging.BitmapDecoder])
|
||||
$bitmap = Await-WinRt ($decoder.GetSoftwareBitmapAsync()) ([Windows.Graphics.Imaging.SoftwareBitmap])
|
||||
$engine = New-OcrEngine $languageSetting
|
||||
if ($null -eq $engine) { throw "No Windows OCR engine is available." }
|
||||
$result = Await-WinRt ($engine.RecognizeAsync($bitmap)) ([Windows.Media.Ocr.OcrResult])
|
||||
|
||||
$blocks = @()
|
||||
$index = 0
|
||||
foreach ($line in $result.Lines) {
|
||||
foreach ($word in $line.Words) {
|
||||
$rect = $word.BoundingRect
|
||||
$text = Normalize-OcrText $word.Text
|
||||
if ($text.Length -gt 0) {
|
||||
$index += 1
|
||||
$blocks += [PSCustomObject]@{
|
||||
text = $text
|
||||
confidence = 0.75
|
||||
box = [PSCustomObject]@{
|
||||
x = [double]$rect.X
|
||||
y = [double]$rect.Y
|
||||
width = [double]$rect.Width
|
||||
height = [double]$rect.Height
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[PSCustomObject]@{ blocks = $blocks } | ConvertTo-Json -Depth 6 -Compress
|
||||
`;
|
||||
}
|
||||
@@ -35,6 +35,9 @@ import type {
|
||||
ProjectFileResult,
|
||||
ProjectPathResult,
|
||||
} from "../../src/native/contracts";
|
||||
import { DeepSeekSettingsStore } from "../guide/ai/deepseekSettingsStore";
|
||||
import { registerGuideIpcHandlers } from "../guide/guideIpc";
|
||||
import { GuideStore } from "../guide/guideStore";
|
||||
import { mainT } from "../i18n";
|
||||
import { RECORDINGS_DIR } from "../main";
|
||||
import { createCursorRecordingSession } from "../native-bridge/cursor/recording/factory";
|
||||
@@ -2172,6 +2175,14 @@ export function registerIpcHandlers(
|
||||
// never buffers the full video in memory (the #616 fix).
|
||||
const recordingStreams = new RecordingStreamRegistry();
|
||||
registerRecordingStreamHandlers(ipcMain, recordingStreams, resolveRecordingOutputPath);
|
||||
const guideAiSettingsStore = new DeepSeekSettingsStore(
|
||||
path.join(app.getPath("userData"), "guide-ai-settings.json"),
|
||||
);
|
||||
registerGuideIpcHandlers(
|
||||
ipcMain,
|
||||
new GuideStore(RECORDINGS_DIR, { deepSeekConfigProvider: guideAiSettingsStore }),
|
||||
guideAiSettingsStore,
|
||||
);
|
||||
|
||||
ipcMain.handle("store-recorded-session", async (_, payload: StoreRecordedSessionInput) => {
|
||||
try {
|
||||
|
||||
@@ -632,8 +632,8 @@ int main(int argc, char* argv[]) {
|
||||
(webcamOutputFrameIndex * 10'000'000ULL) / std::max(1, webcamCapture.fps()));
|
||||
if (!webcamEncoder.writeBgraFrame(webcamFrame, webcamTimestampHns)) {
|
||||
encodeFailed = true;
|
||||
stopRequested = true;
|
||||
cv.notify_all();
|
||||
control.stopRequested = true;
|
||||
control.cv.notify_all();
|
||||
return;
|
||||
}
|
||||
lastWrittenWebcamSequence = latestWebcamSequence;
|
||||
|
||||
@@ -1,4 +1,15 @@
|
||||
import { contextBridge, ipcRenderer } from "electron";
|
||||
import type {
|
||||
AddGuideMarkerInput,
|
||||
DiscardGuideSessionInput,
|
||||
ExportGuideInput,
|
||||
FinalizeGuideEventsInput,
|
||||
GenerateGuideDraftInput,
|
||||
RunGuideOcrInput,
|
||||
SaveGuideAiSettingsInput,
|
||||
SaveGuideInput,
|
||||
WriteGuideSnapshotInput,
|
||||
} from "../src/guide/contracts";
|
||||
import type { NativeMacRecordingRequest } from "../src/lib/nativeMacRecording";
|
||||
import type { NativeWindowsRecordingRequest } from "../src/lib/nativeWindowsRecording";
|
||||
import type { RecordingSession, StoreRecordedSessionInput } from "../src/lib/recordingSession";
|
||||
@@ -16,6 +27,47 @@ contextBridge.exposeInMainWorld("electronAPI", {
|
||||
invokeNativeBridge: <TData>(request: NativeBridgeRequest) => {
|
||||
return ipcRenderer.invoke(NATIVE_BRIDGE_CHANNEL, request) as Promise<TData>;
|
||||
},
|
||||
guide: {
|
||||
startSession: (recordingId: string | number) => {
|
||||
return ipcRenderer.invoke("guide:start-session", recordingId);
|
||||
},
|
||||
readSession: (recordingId: string | number) => {
|
||||
return ipcRenderer.invoke("guide:read-session", recordingId);
|
||||
},
|
||||
addMarker: (input: AddGuideMarkerInput) => {
|
||||
return ipcRenderer.invoke("guide:add-marker", input);
|
||||
},
|
||||
finalizeEvents: (input: FinalizeGuideEventsInput) => {
|
||||
return ipcRenderer.invoke("guide:finalize-events", input);
|
||||
},
|
||||
writeSnapshot: (input: WriteGuideSnapshotInput) => {
|
||||
return ipcRenderer.invoke("guide:write-snapshot", input);
|
||||
},
|
||||
runOcr: (input: RunGuideOcrInput) => {
|
||||
return ipcRenderer.invoke("guide:run-ocr", input);
|
||||
},
|
||||
generateDraft: (input: GenerateGuideDraftInput) => {
|
||||
return ipcRenderer.invoke("guide:generate-draft", input);
|
||||
},
|
||||
getAiSettings: () => {
|
||||
return ipcRenderer.invoke("guide:get-ai-settings");
|
||||
},
|
||||
saveAiSettings: (input: SaveGuideAiSettingsInput) => {
|
||||
return ipcRenderer.invoke("guide:save-ai-settings", input);
|
||||
},
|
||||
saveGuide: (input: SaveGuideInput) => {
|
||||
return ipcRenderer.invoke("guide:save-guide", input);
|
||||
},
|
||||
exportMarkdown: (input: ExportGuideInput) => {
|
||||
return ipcRenderer.invoke("guide:export-markdown", input);
|
||||
},
|
||||
exportHtml: (input: ExportGuideInput) => {
|
||||
return ipcRenderer.invoke("guide:export-html", input);
|
||||
},
|
||||
discardSession: (input: DiscardGuideSessionInput) => {
|
||||
return ipcRenderer.invoke("guide:discard-session", input);
|
||||
},
|
||||
},
|
||||
hudOverlayHide: () => {
|
||||
ipcRenderer.send("hud-overlay-hide");
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user