Add Windows OCR service installer

This commit is contained in:
huanld
2026-05-28 19:01:34 +07:00
parent 7823507a18
commit cce81dd7c4
16 changed files with 749 additions and 72 deletions
+17
View File
@@ -0,0 +1,17 @@
!macro customInstall
DetailPrint "Installing OpenScreen OCR Windows service"
nsExec::ExecToLog '"$SYSDIR\sc.exe" stop OpenScreenOCR'
nsExec::ExecToLog '"$SYSDIR\sc.exe" delete OpenScreenOCR'
Sleep 1000
ExpandEnvStrings $0 "%ProgramData%\OpenScreen\ocr-runtime"
CreateDirectory "$0"
nsExec::ExecToLog '"$SYSDIR\sc.exe" create OpenScreenOCR binPath= "\"$INSTDIR\resources\electron\native\bin\win32-x64\openscreen-ocr-service-wrapper.exe\" --service --exe \"$INSTDIR\resources\ocr-service\openscreen-ocr-service.exe\" --resources \"$INSTDIR\resources\" --data \"$0\"" start= auto DisplayName= "OpenScreen OCR Service"'
nsExec::ExecToLog '"$SYSDIR\sc.exe" description OpenScreenOCR "Local OCR service used by OpenScreen guide capture."'
nsExec::ExecToLog '"$SYSDIR\sc.exe" start OpenScreenOCR'
!macroend
!macro customUnInstall
DetailPrint "Removing OpenScreen OCR Windows service"
nsExec::ExecToLog '"$SYSDIR\sc.exe" stop OpenScreenOCR'
nsExec::ExecToLog '"$SYSDIR\sc.exe" delete OpenScreenOCR'
!macroend
+8 -5
View File
@@ -79,6 +79,7 @@
"nsis" "nsis"
], ],
"icon": "icons/icons/win/icon.ico", "icon": "icons/icons/win/icon.ico",
"requestedExecutionLevel": "requireAdministrator",
"signAndEditExecutable": false, "signAndEditExecutable": false,
"signExts": ["!.exe"], "signExts": ["!.exe"],
"extraResources": [ "extraResources": [
@@ -99,8 +100,10 @@
} }
] ]
}, },
"nsis": { "nsis": {
"oneClick": false, "oneClick": false,
"allowToChangeInstallationDirectory": true "allowToChangeInstallationDirectory": true,
} "perMachine": true,
} "include": "build/installer.nsh"
}
}
+4
View File
@@ -168,6 +168,7 @@ describe("GuideStore", () => {
width: 800, width: 800,
height: 600, height: 600,
pngBytes: new Uint8Array([137, 80, 78, 71]).buffer, pngBytes: new Uint8Array([137, 80, 78, 71]).buffer,
markedPngBytes: new Uint8Array([137, 80, 78, 71, 1]).buffer,
}); });
expect(session.status).toBe("snapshots-ready"); expect(session.status).toBe("snapshots-ready");
@@ -176,6 +177,9 @@ describe("GuideStore", () => {
await expect(fs.readFile(session.snapshots[0]?.path ?? "")).resolves.toEqual( await expect(fs.readFile(session.snapshots[0]?.path ?? "")).resolves.toEqual(
Buffer.from([137, 80, 78, 71]), Buffer.from([137, 80, 78, 71]),
); );
await expect(fs.readFile(session.snapshots[0]?.markedPath ?? "")).resolves.toEqual(
Buffer.from([137, 80, 78, 71, 1]),
);
}); });
it("runs OCR, generates a local draft, and exports files", async () => { it("runs OCR, generates a local draft, and exports files", async () => {
+13 -2
View File
@@ -213,10 +213,19 @@ export class GuideStore {
this.assertGuidePathIsAllowed(session.outputDir); this.assertGuidePathIsAllowed(session.outputDir);
await fs.mkdir(session.outputDir, { recursive: true }); await fs.mkdir(session.outputDir, { recursive: true });
const fileName = `step-${String(eventIndex + 1).padStart(3, "0")}.png`; const fileBaseName = `step-${String(eventIndex + 1).padStart(3, "0")}`;
const fileName = `${fileBaseName}.png`;
const snapshotPath = path.join(session.outputDir, fileName); const snapshotPath = path.join(session.outputDir, fileName);
const markedSnapshotPath = path.join(session.outputDir, `${fileBaseName}-marked.png`);
this.assertGuidePathIsAllowed(snapshotPath); this.assertGuidePathIsAllowed(snapshotPath);
this.assertGuidePathIsAllowed(markedSnapshotPath);
await fs.writeFile(snapshotPath, Buffer.from(new Uint8Array(input.pngBytes))); await fs.writeFile(snapshotPath, Buffer.from(new Uint8Array(input.pngBytes)));
const hasMarkedSnapshot = Boolean(input.markedPngBytes?.byteLength);
if (hasMarkedSnapshot && input.markedPngBytes) {
await fs.writeFile(markedSnapshotPath, Buffer.from(new Uint8Array(input.markedPngBytes)));
} else {
await fs.unlink(markedSnapshotPath).catch(() => undefined);
}
const snapshot: GuideSnapshot = { const snapshot: GuideSnapshot = {
id: `snapshot-${input.eventId}`, id: `snapshot-${input.eventId}`,
@@ -224,6 +233,7 @@ export class GuideStore {
timeMs: Math.max(0, input.timeMs), timeMs: Math.max(0, input.timeMs),
offsetMs: input.offsetMs, offsetMs: input.offsetMs,
path: snapshotPath, path: snapshotPath,
markedPath: hasMarkedSnapshot ? markedSnapshotPath : undefined,
width: Math.round(input.width), width: Math.round(input.width),
height: Math.round(input.height), height: Math.round(input.height),
}; };
@@ -668,6 +678,7 @@ function normalizeGuideSnapshot(input: unknown): GuideSnapshot | null {
const id = normalizeString(input.id); const id = normalizeString(input.id);
const eventId = normalizeString(input.eventId); const eventId = normalizeString(input.eventId);
const pathValue = normalizeString(input.path); const pathValue = normalizeString(input.path);
const markedPath = normalizeOptionalString(input.markedPath);
const timeMs = normalizeNonNegativeNumber(input.timeMs); const timeMs = normalizeNonNegativeNumber(input.timeMs);
const offsetMs = normalizeOptionalNumber(input.offsetMs); const offsetMs = normalizeOptionalNumber(input.offsetMs);
const width = normalizePositiveInteger(input.width); const width = normalizePositiveInteger(input.width);
@@ -683,7 +694,7 @@ function normalizeGuideSnapshot(input: unknown): GuideSnapshot | null {
) { ) {
return null; return null;
} }
return { id, eventId, timeMs, offsetMs, path: pathValue, width, height }; return { id, eventId, timeMs, offsetMs, path: pathValue, markedPath, width, height };
} }
function normalizeOcrBlock(input: unknown): OcrBlock | null { function normalizeOcrBlock(input: unknown): OcrBlock | null {
+43 -1
View File
@@ -1,14 +1,17 @@
import { type ChildProcessWithoutNullStreams, spawn } from "node:child_process"; import { type ChildProcessWithoutNullStreams, execFile, spawn } from "node:child_process";
import fs from "node:fs/promises"; import fs from "node:fs/promises";
import path from "node:path"; import path from "node:path";
import { promisify } from "node:util";
import { app } from "electron"; import { app } from "electron";
const DEFAULT_OCR_BASE_URL = "http://127.0.0.1:8866"; const DEFAULT_OCR_BASE_URL = "http://127.0.0.1:8866";
const DEFAULT_OCR_PORT = "8866"; const DEFAULT_OCR_PORT = "8866";
const WINDOWS_SERVICE_NAME = "OpenScreenOCR";
const SERVICE_EXE_NAME = "openscreen-ocr-service.exe"; const SERVICE_EXE_NAME = "openscreen-ocr-service.exe";
const HEALTH_TIMEOUT_MS = 1000; const HEALTH_TIMEOUT_MS = 1000;
const STARTUP_TIMEOUT_MS = 90000; const STARTUP_TIMEOUT_MS = 90000;
const PADDLEX_MODEL_NAMES = ["PP-OCRv5_mobile_det", "latin_PP-OCRv5_mobile_rec"]; const PADDLEX_MODEL_NAMES = ["PP-OCRv5_mobile_det", "latin_PP-OCRv5_mobile_rec"];
const execFileAsync = promisify(execFile);
let ocrProcess: ChildProcessWithoutNullStreams | null = null; let ocrProcess: ChildProcessWithoutNullStreams | null = null;
let startupPromise: Promise<void> | null = null; let startupPromise: Promise<void> | null = null;
@@ -24,6 +27,11 @@ export async function ensureBundledOcrServiceRunning(
return; return;
} }
if (process.platform === "win32" && (await startInstalledWindowsOcrService())) {
await waitForOcrServiceHealth(baseUrl, STARTUP_TIMEOUT_MS);
return;
}
const executablePath = await findBundledOcrServiceExecutable(); const executablePath = await findBundledOcrServiceExecutable();
if (!executablePath) { if (!executablePath) {
return; return;
@@ -51,6 +59,39 @@ function shouldManageOcrService(baseUrl: string): boolean {
} }
} }
async function startInstalledWindowsOcrService(): Promise<boolean> {
const query = await runSc(["query", WINDOWS_SERVICE_NAME]);
if (!query.success) {
return false;
}
if (/\bRUNNING\b/i.test(query.output)) {
return true;
}
const start = await runSc(["start", WINDOWS_SERVICE_NAME]);
return start.success || /\b1056\b/.test(start.output) || /already running/i.test(start.output);
}
async function runSc(args: string[]): Promise<{ success: boolean; output: string }> {
try {
const result = await execFileAsync("sc.exe", args, {
windowsHide: true,
timeout: 10000,
maxBuffer: 512 * 1024,
});
return {
success: true,
output: `${result.stdout ?? ""}\n${result.stderr ?? ""}`,
};
} catch (error) {
const failed = error as { stdout?: string; stderr?: string };
return {
success: false,
output: `${failed.stdout ?? ""}\n${failed.stderr ?? ""}`,
};
}
}
async function findBundledOcrServiceExecutable(): Promise<string | null> { async function findBundledOcrServiceExecutable(): Promise<string | null> {
const candidates = [ const candidates = [
process.env.OPENSCREEN_GUIDE_OCR_EXE, process.env.OPENSCREEN_GUIDE_OCR_EXE,
@@ -160,6 +201,7 @@ function startOcrServiceProcess(
PADDLEOCR_USE_MOBILE: process.env.PADDLEOCR_USE_MOBILE ?? "1", PADDLEOCR_USE_MOBILE: process.env.PADDLEOCR_USE_MOBILE ?? "1",
OPENSCREEN_OCR_PROFILE: OPENSCREEN_OCR_PROFILE:
process.env.OPENSCREEN_OCR_PROFILE ?? process.env.OPENSCREEN_GUIDE_OCR_PROFILE ?? "", process.env.OPENSCREEN_OCR_PROFILE ?? process.env.OPENSCREEN_GUIDE_OCR_PROFILE ?? "",
OPENSCREEN_OCR_WARMUP: process.env.OPENSCREEN_OCR_WARMUP ?? "1",
PADDLE_PDX_ENABLE_MKLDNN_BYDEFAULT: process.env.PADDLE_PDX_ENABLE_MKLDNN_BYDEFAULT ?? "False", PADDLE_PDX_ENABLE_MKLDNN_BYDEFAULT: process.env.PADDLE_PDX_ENABLE_MKLDNN_BYDEFAULT ?? "False",
PADDLE_PDX_CACHE_HOME: process.env.PADDLE_PDX_CACHE_HOME ?? runtimePaths.paddlexCachePath, PADDLE_PDX_CACHE_HOME: process.env.PADDLE_PDX_CACHE_HOME ?? runtimePaths.paddlexCachePath,
PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK: PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK:
+203 -2
View File
@@ -1,10 +1,11 @@
import { type ChildProcessWithoutNullStreams, spawn } from "node:child_process"; import { type ChildProcessWithoutNullStreams, execFile, spawn } from "node:child_process";
import { EventEmitter } from "node:events"; import { EventEmitter } from "node:events";
import { constants as fsConstants } from "node:fs"; import { constants as fsConstants } from "node:fs";
import fs from "node:fs/promises"; import fs from "node:fs/promises";
import os from "node:os"; import os from "node:os";
import path from "node:path"; import path from "node:path";
import { fileURLToPath, pathToFileURL } from "node:url"; import { fileURLToPath, pathToFileURL } from "node:url";
import { promisify } from "node:util";
import type { DesktopCapturerSource, Rectangle } from "electron"; import type { DesktopCapturerSource, Rectangle } from "electron";
import { import {
app, app,
@@ -17,7 +18,7 @@ import {
shell, shell,
systemPreferences, systemPreferences,
} from "electron"; } from "electron";
import type { GuideMarkerCapturedPayload } from "../../src/guide/contracts"; import type { GuideEvent, GuideMarkerCapturedPayload } from "../../src/guide/contracts";
import type { NativeMacRecordingRequest } from "../../src/lib/nativeMacRecording"; import type { NativeMacRecordingRequest } from "../../src/lib/nativeMacRecording";
import type { NativeWindowsRecordingRequest } from "../../src/lib/nativeWindowsRecording"; import type { NativeWindowsRecordingRequest } from "../../src/lib/nativeWindowsRecording";
import { import {
@@ -56,6 +57,7 @@ const RECORDING_SESSION_SUFFIX = ".session.json";
const ALLOWED_IMPORT_VIDEO_EXTENSIONS = new Set([".webm", ".mp4", ".mov", ".avi", ".mkv"]); const ALLOWED_IMPORT_VIDEO_EXTENSIONS = new Set([".webm", ".mp4", ".mov", ".avi", ".mkv"]);
const PREVIEW_AUDIO_DIR = path.join(app.getPath("userData"), "preview-audio"); const PREVIEW_AUDIO_DIR = path.join(app.getPath("userData"), "preview-audio");
const nativeMacCaptureEvents = new EventEmitter(); const nativeMacCaptureEvents = new EventEmitter();
const execFileAsync = promisify(execFile);
/** /**
* Paths explicitly approved by the user via file picker dialogs or project loads. * Paths explicitly approved by the user via file picker dialogs or project loads.
@@ -454,6 +456,7 @@ let activeGuideHotkeyRecording: GuideHotkeyRecordingState | null = null;
let activeGuideHotkeySessionId: number | null = null; let activeGuideHotkeySessionId: number | null = null;
let guideMarkerHotkeyRegistered = false; let guideMarkerHotkeyRegistered = false;
let lastGuideHotkeyCaptureAtMs = 0; let lastGuideHotkeyCaptureAtMs = 0;
const guideHotkeyBackgroundJobs = new Map<string, Promise<void>>();
const GUIDE_HOTKEY_CAPTURE_DEBOUNCE_MS = 250; const GUIDE_HOTKEY_CAPTURE_DEBOUNCE_MS = 250;
function normalizeCursorSample(sample: unknown): CursorRecordingSample | null { function normalizeCursorSample(sample: unknown): CursorRecordingSample | null {
@@ -808,6 +811,203 @@ function clampGuideHotkey01(value: number): number {
return Math.min(1, Math.max(0, value)); return Math.min(1, Math.max(0, value));
} }
async function captureGuideHotkeySnapshotAndRunOcr(
guideStore: GuideStore,
event: GuideEvent,
boundsInput: GuideHotkeyBounds,
point: { normalizedX: number; normalizedY: number },
) {
try {
const bounds = sanitizeGuideHotkeyBounds(boundsInput);
const sources = await desktopCapturer.getSources({
types: ["screen"],
thumbnailSize: {
width: Math.max(1, Math.round(bounds.width)),
height: Math.max(1, Math.round(bounds.height)),
},
});
const source = findScreenSourceForGuideBounds(sources, bounds);
if (!source || source.thumbnail.isEmpty()) {
console.warn("[guide-hotkey] no live screen thumbnail was available for OCR");
return;
}
const pngBuffer = source.thumbnail.toPNG();
const imageSize = source.thumbnail.getSize();
const markedPngBuffer = await createMarkedGuideSnapshotPng(pngBuffer, {
width: imageSize.width,
height: imageSize.height,
x: point.normalizedX * imageSize.width,
y: point.normalizedY * imageSize.height,
}).catch((error) => {
console.warn("[guide-hotkey] failed to create marked live snapshot:", error);
return undefined;
});
enqueueGuideHotkeyBackgroundJob(event.recordingId, async () => {
const session = await guideStore.writeSnapshot({
recordingId: event.recordingId,
eventId: event.id,
timeMs: event.timeMs,
offsetMs: 0,
pngBytes: bufferToArrayBuffer(pngBuffer),
markedPngBytes: markedPngBuffer ? bufferToArrayBuffer(markedPngBuffer) : undefined,
width: imageSize.width,
height: imageSize.height,
});
const snapshot = session.snapshots.find((item) => item.eventId === event.id);
if (!snapshot) {
return;
}
await guideStore.runOcr({
recordingId: event.recordingId,
snapshotIds: [snapshot.id],
});
console.info("[guide-hotkey] live snapshot OCR completed", {
recordingId: event.recordingId,
eventId: event.id,
snapshotId: snapshot.id,
});
});
} catch (error) {
console.warn("[guide-hotkey] live snapshot OCR failed:", error);
}
}
function enqueueGuideHotkeyBackgroundJob(recordingId: string, job: () => Promise<void>) {
const previousJob =
guideHotkeyBackgroundJobs.get(recordingId)?.catch(() => undefined) ?? Promise.resolve();
const nextJob = previousJob
.then(job)
.catch((error) => {
console.warn("[guide-hotkey] background OCR job failed:", error);
})
.finally(() => {
if (guideHotkeyBackgroundJobs.get(recordingId) === nextJob) {
guideHotkeyBackgroundJobs.delete(recordingId);
}
});
guideHotkeyBackgroundJobs.set(recordingId, nextJob);
}
function findScreenSourceForGuideBounds(
sources: DesktopCapturerSource[],
bounds: GuideHotkeyBounds,
): DesktopCapturerSource | undefined {
const displays = screen.getAllDisplays();
const displayIndex = displays.findIndex((display) =>
rectMatchesGuideBounds(display.bounds, bounds),
);
const display = displayIndex >= 0 ? displays[displayIndex] : undefined;
if (display) {
const byDisplayId = sources.find((source) => Number(source.display_id) === display.id);
if (byDisplayId) {
return byDisplayId;
}
const bySourceIndex = sources.find(
(source) => parseDesktopCapturerScreenIndex(source.id) === displayIndex,
);
if (bySourceIndex) {
return bySourceIndex;
}
}
return sources.find((source) => source.id.startsWith("screen:")) ?? sources[0];
}
function rectMatchesGuideBounds(rect: Rectangle, bounds: GuideHotkeyBounds): boolean {
return (
Math.round(rect.x) === Math.round(bounds.x) &&
Math.round(rect.y) === Math.round(bounds.y) &&
Math.round(rect.width) === Math.round(bounds.width) &&
Math.round(rect.height) === Math.round(bounds.height)
);
}
async function createMarkedGuideSnapshotPng(
pngBuffer: Buffer,
marker: { width: number; height: number; x: number; y: number },
): Promise<Buffer> {
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openscreen-guide-marker-"));
const sourcePath = path.join(tempDir, "source.png");
const outputPath = path.join(tempDir, "marked.png");
try {
await fs.writeFile(sourcePath, pngBuffer);
await execFileAsync(
"powershell.exe",
[
"-NoProfile",
"-ExecutionPolicy",
"Bypass",
"-EncodedCommand",
buildMarkerScript(sourcePath, outputPath, marker),
],
{
timeout: 30000,
windowsHide: true,
maxBuffer: 1024 * 1024,
},
);
return await fs.readFile(outputPath);
} finally {
await fs.rm(tempDir, { recursive: true, force: true }).catch(() => undefined);
}
}
function buildMarkerScript(
sourcePath: string,
outputPath: string,
marker: { width: number; height: number; x: number; y: number },
): string {
const sourcePathBase64 = Buffer.from(sourcePath, "utf8").toString("base64");
const outputPathBase64 = Buffer.from(outputPath, "utf8").toString("base64");
const script = `
$ErrorActionPreference = "Stop"
$sourcePath = [System.Text.Encoding]::UTF8.GetString([Convert]::FromBase64String("${sourcePathBase64}"))
$outputPath = [System.Text.Encoding]::UTF8.GetString([Convert]::FromBase64String("${outputPathBase64}"))
Add-Type -AssemblyName System.Drawing
$source = [System.Drawing.Image]::FromFile($sourcePath)
$bitmap = [System.Drawing.Bitmap]::new($source.Width, $source.Height)
$graphics = [System.Drawing.Graphics]::FromImage($bitmap)
try {
$graphics.SmoothingMode = [System.Drawing.Drawing2D.SmoothingMode]::AntiAlias
$graphics.DrawImage($source, 0, 0, $source.Width, $source.Height)
$shortSide = [Math]::Max(1, [Math]::Min($source.Width, $source.Height))
$haloRadius = [Math]::Min(14, [Math]::Max(8, [Math]::Round($shortSide * 0.012)))
$dotRadius = [Math]::Min(6, [Math]::Max(3, [Math]::Round($shortSide * 0.0045)))
$lineWidth = [Math]::Max(1, [Math]::Round($shortSide * 0.0015))
$x = [Math]::Min($source.Width, [Math]::Max(0, ${marker.x.toFixed(4)}))
$y = [Math]::Min($source.Height, [Math]::Max(0, ${marker.y.toFixed(4)}))
$haloBrush = [System.Drawing.SolidBrush]::new([System.Drawing.Color]::FromArgb(87, 250, 204, 21))
$ringPen = [System.Drawing.Pen]::new([System.Drawing.Color]::FromArgb(184, 239, 68, 68), $lineWidth)
$dotBrush = [System.Drawing.SolidBrush]::new([System.Drawing.Color]::FromArgb(235, 220, 38, 38))
try {
$graphics.FillEllipse($haloBrush, $x - $haloRadius, $y - $haloRadius, $haloRadius * 2, $haloRadius * 2)
$graphics.DrawEllipse($ringPen, $x - $haloRadius, $y - $haloRadius, $haloRadius * 2, $haloRadius * 2)
$graphics.FillEllipse($dotBrush, $x - $dotRadius, $y - $dotRadius, $dotRadius * 2, $dotRadius * 2)
} finally {
$haloBrush.Dispose()
$ringPen.Dispose()
$dotBrush.Dispose()
}
$bitmap.Save($outputPath, [System.Drawing.Imaging.ImageFormat]::Png)
} finally {
$graphics.Dispose()
$bitmap.Dispose()
$source.Dispose()
}
`;
return Buffer.from(script, "utf16le").toString("base64");
}
function bufferToArrayBuffer(buffer: Buffer): ArrayBuffer {
return buffer.buffer.slice(
buffer.byteOffset,
buffer.byteOffset + buffer.byteLength,
) as ArrayBuffer;
}
async function captureGuideHotkeyMarker( async function captureGuideHotkeyMarker(
guideStore: GuideStore, guideStore: GuideStore,
trigger: GuideMarkerTrigger = "global-shortcut", trigger: GuideMarkerTrigger = "global-shortcut",
@@ -854,6 +1054,7 @@ async function captureGuideHotkeyMarker(
rawY: point.rawY, rawY: point.rawY,
bounds: point.bounds, bounds: point.bounds,
}); });
void captureGuideHotkeySnapshotAndRunOcr(guideStore, result.event, recording.bounds, point);
return { captured: true, ...result }; return { captured: true, ...result };
} catch (error) { } catch (error) {
const message = error instanceof Error ? error.message : String(error); const message = error instanceof Error ? error.message : String(error);
@@ -81,3 +81,21 @@ target_compile_options(guide-hotkey-listener PRIVATE /EHsc /W4 /utf-8)
target_link_libraries(guide-hotkey-listener PRIVATE target_link_libraries(guide-hotkey-listener PRIVATE
user32 user32
) )
add_executable(openscreen-ocr-service-wrapper
src/ocr-service-wrapper.cpp
)
target_compile_definitions(openscreen-ocr-service-wrapper PRIVATE
NOMINMAX
WIN32_LEAN_AND_MEAN
UNICODE
_UNICODE
_WIN32_WINNT=0x0A00
)
target_compile_options(openscreen-ocr-service-wrapper PRIVATE /EHsc /W4 /utf-8)
target_link_libraries(openscreen-ocr-service-wrapper PRIVATE
advapi32
)
@@ -0,0 +1,263 @@
#include <Windows.h>
#include <algorithm>
#include <iostream>
#include <string>
#include <vector>
namespace {
constexpr const wchar_t* SERVICE_NAME = L"OpenScreenOCR";
struct ServiceConfig {
std::wstring exePath;
std::wstring resourcesPath;
std::wstring dataPath;
};
SERVICE_STATUS_HANDLE g_statusHandle = nullptr;
SERVICE_STATUS g_status{};
HANDLE g_stopEvent = nullptr;
PROCESS_INFORMATION g_childProcess{};
ServiceConfig g_config;
std::wstring quoteArg(const std::wstring& value) {
std::wstring result = L"\"";
for (wchar_t ch : value) {
if (ch == L'"') {
result += L"\\\"";
} else {
result.push_back(ch);
}
}
result += L"\"";
return result;
}
std::wstring directoryName(const std::wstring& path) {
const size_t slash = path.find_last_of(L"\\/");
return slash == std::wstring::npos ? L"." : path.substr(0, slash);
}
void createDirectoryRecursive(const std::wstring& path) {
if (path.empty()) {
return;
}
std::wstring current;
for (size_t i = 0; i < path.size(); ++i) {
current.push_back(path[i]);
if (path[i] != L'\\' && path[i] != L'/') {
continue;
}
if (current.size() > 3) {
CreateDirectoryW(current.c_str(), nullptr);
}
}
CreateDirectoryW(path.c_str(), nullptr);
}
void setEnv(const wchar_t* name, const std::wstring& value) {
SetEnvironmentVariableW(name, value.empty() ? nullptr : value.c_str());
}
void setServiceStatus(DWORD state, DWORD win32ExitCode = NO_ERROR, DWORD waitHint = 0) {
if (!g_statusHandle) {
return;
}
g_status.dwServiceType = SERVICE_WIN32_OWN_PROCESS;
g_status.dwCurrentState = state;
g_status.dwWin32ExitCode = win32ExitCode;
g_status.dwWaitHint = waitHint;
g_status.dwControlsAccepted =
state == SERVICE_RUNNING ? SERVICE_ACCEPT_STOP | SERVICE_ACCEPT_SHUTDOWN : 0;
static DWORD checkpoint = 1;
g_status.dwCheckPoint =
state == SERVICE_START_PENDING || state == SERVICE_STOP_PENDING ? checkpoint++ : 0;
SetServiceStatus(g_statusHandle, &g_status);
}
HANDLE openServiceLog(const std::wstring& dataPath) {
const std::wstring logDir = dataPath + L"\\logs";
createDirectoryRecursive(logDir);
const std::wstring logPath = logDir + L"\\ocr-service.log";
SECURITY_ATTRIBUTES securityAttributes{};
securityAttributes.nLength = sizeof(securityAttributes);
securityAttributes.bInheritHandle = TRUE;
HANDLE file = CreateFileW(
logPath.c_str(),
FILE_APPEND_DATA,
FILE_SHARE_READ | FILE_SHARE_WRITE,
&securityAttributes,
OPEN_ALWAYS,
FILE_ATTRIBUTE_NORMAL,
nullptr);
if (file != INVALID_HANDLE_VALUE) {
SetFilePointer(file, 0, nullptr, FILE_END);
}
return file;
}
bool startOcrProcess(const ServiceConfig& config) {
if (config.exePath.empty()) {
return false;
}
const std::wstring dataPath = config.dataPath.empty()
? directoryName(config.exePath) + L"\\ocr-runtime"
: config.dataPath;
const std::wstring resourcesPath = config.resourcesPath.empty()
? directoryName(directoryName(config.exePath))
: config.resourcesPath;
const std::wstring modelCachePath = dataPath + L"\\ocr-models";
const std::wstring paddlexCachePath = resourcesPath + L"\\ocr-models\\paddlex";
createDirectoryRecursive(dataPath);
createDirectoryRecursive(modelCachePath);
setEnv(L"OPENSCREEN_OCR_HOST", L"127.0.0.1");
setEnv(L"OPENSCREEN_OCR_PORT", L"8866");
setEnv(L"PADDLEOCR_DEVICE", L"cpu");
setEnv(L"PADDLEOCR_ENABLE_MKLDNN", L"0");
setEnv(L"PADDLEOCR_LANG", L"");
setEnv(L"PADDLEOCR_USE_MOBILE", L"1");
setEnv(L"OPENSCREEN_OCR_PROFILE", L"vietnamese");
setEnv(L"OPENSCREEN_OCR_WARMUP", L"1");
setEnv(L"PADDLE_PDX_ENABLE_MKLDNN_BYDEFAULT", L"False");
setEnv(L"PADDLE_PDX_CACHE_HOME", paddlexCachePath);
setEnv(L"PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", L"True");
setEnv(L"PADDLE_HOME", modelCachePath + L"\\paddle");
setEnv(L"PADDLEOCR_HOME", modelCachePath + L"\\paddleocr");
setEnv(L"PYTHONUTF8", L"1");
STARTUPINFOW startupInfo{};
startupInfo.cb = sizeof(startupInfo);
HANDLE logFile = openServiceLog(dataPath);
if (logFile != INVALID_HANDLE_VALUE) {
startupInfo.dwFlags |= STARTF_USESTDHANDLES;
startupInfo.hStdOutput = logFile;
startupInfo.hStdError = logFile;
startupInfo.hStdInput = GetStdHandle(STD_INPUT_HANDLE);
}
std::wstring commandLine = quoteArg(config.exePath);
const std::wstring cwd = directoryName(config.exePath);
ZeroMemory(&g_childProcess, sizeof(g_childProcess));
const BOOL created = CreateProcessW(
config.exePath.c_str(),
commandLine.data(),
nullptr,
nullptr,
TRUE,
CREATE_NO_WINDOW,
nullptr,
cwd.c_str(),
&startupInfo,
&g_childProcess);
if (logFile != INVALID_HANDLE_VALUE) {
CloseHandle(logFile);
}
return created == TRUE;
}
void stopOcrProcess() {
if (g_childProcess.hProcess) {
TerminateProcess(g_childProcess.hProcess, 0);
WaitForSingleObject(g_childProcess.hProcess, 10000);
CloseHandle(g_childProcess.hProcess);
g_childProcess.hProcess = nullptr;
}
if (g_childProcess.hThread) {
CloseHandle(g_childProcess.hThread);
g_childProcess.hThread = nullptr;
}
}
DWORD WINAPI serviceControlHandler(DWORD control, DWORD, LPVOID, LPVOID) {
if (control == SERVICE_CONTROL_STOP || control == SERVICE_CONTROL_SHUTDOWN) {
setServiceStatus(SERVICE_STOP_PENDING, NO_ERROR, 10000);
if (g_stopEvent) {
SetEvent(g_stopEvent);
}
stopOcrProcess();
return NO_ERROR;
}
return NO_ERROR;
}
void WINAPI serviceMain(DWORD, LPWSTR*) {
g_statusHandle = RegisterServiceCtrlHandlerExW(SERVICE_NAME, serviceControlHandler, nullptr);
if (!g_statusHandle) {
return;
}
setServiceStatus(SERVICE_START_PENDING, NO_ERROR, 30000);
g_stopEvent = CreateEventW(nullptr, TRUE, FALSE, nullptr);
if (!g_stopEvent || !startOcrProcess(g_config)) {
setServiceStatus(SERVICE_STOPPED, ERROR_SERVICE_SPECIFIC_ERROR);
return;
}
setServiceStatus(SERVICE_RUNNING);
HANDLE waitHandles[] = {g_stopEvent, g_childProcess.hProcess};
WaitForMultipleObjects(2, waitHandles, FALSE, INFINITE);
stopOcrProcess();
if (g_stopEvent) {
CloseHandle(g_stopEvent);
g_stopEvent = nullptr;
}
setServiceStatus(SERVICE_STOPPED);
}
ServiceConfig parseConfig(int argc, wchar_t* argv[]) {
ServiceConfig config;
for (int i = 1; i < argc; ++i) {
const std::wstring arg = argv[i];
auto readNext = [&](std::wstring& target) {
if (i + 1 < argc) {
target = argv[++i];
}
};
if (arg == L"--exe") {
readNext(config.exePath);
} else if (arg == L"--resources") {
readNext(config.resourcesPath);
} else if (arg == L"--data") {
readNext(config.dataPath);
}
}
return config;
}
bool hasServiceFlag(int argc, wchar_t* argv[]) {
for (int i = 1; i < argc; ++i) {
if (std::wstring(argv[i]) == L"--service") {
return true;
}
}
return false;
}
} // namespace
int wmain(int argc, wchar_t* argv[]) {
g_config = parseConfig(argc, argv);
if (hasServiceFlag(argc, argv)) {
SERVICE_TABLE_ENTRYW serviceTable[] = {
{const_cast<LPWSTR>(SERVICE_NAME), serviceMain},
{nullptr, nullptr},
};
return StartServiceCtrlDispatcherW(serviceTable) ? 0 : 1;
}
if (!startOcrProcess(g_config)) {
std::wcerr << L"Failed to start OCR service process." << std::endl;
return 1;
}
WaitForSingleObject(g_childProcess.hProcess, INFINITE);
stopOcrProcess();
return 0;
}
+2 -2
View File
@@ -1,12 +1,12 @@
{ {
"name": "openscreen", "name": "openscreen",
"version": "1.4.6", "version": "1.4.8",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "openscreen", "name": "openscreen",
"version": "1.4.6", "version": "1.4.8",
"dependencies": { "dependencies": {
"@fix-webm-duration/fix": "^1.0.1", "@fix-webm-duration/fix": "^1.0.1",
"@pixi/filter-drop-shadow": "^5.2.0", "@pixi/filter-drop-shadow": "^5.2.0",
+1 -1
View File
@@ -1,7 +1,7 @@
{ {
"name": "openscreen", "name": "openscreen",
"private": true, "private": true,
"version": "1.4.6", "version": "1.4.8",
"type": "module", "type": "module",
"packageManager": "npm@10.9.4", "packageManager": "npm@10.9.4",
"engines": { "engines": {
+10
View File
@@ -131,6 +131,11 @@ if (!fs.existsSync(guideHotkeyListenerOutputPath)) {
throw new Error(`WGC helper build completed but ${guideHotkeyListenerOutputPath} was not found.`); throw new Error(`WGC helper build completed but ${guideHotkeyListenerOutputPath} was not found.`);
} }
const ocrServiceWrapperOutputPath = path.join(BUILD_DIR, "openscreen-ocr-service-wrapper.exe");
if (!fs.existsSync(ocrServiceWrapperOutputPath)) {
throw new Error(`WGC helper build completed but ${ocrServiceWrapperOutputPath} was not found.`);
}
fs.mkdirSync(BIN_DIR, { recursive: true }); fs.mkdirSync(BIN_DIR, { recursive: true });
const distributablePath = path.join(BIN_DIR, "wgc-capture.exe"); const distributablePath = path.join(BIN_DIR, "wgc-capture.exe");
fs.copyFileSync(outputPath, distributablePath); fs.copyFileSync(outputPath, distributablePath);
@@ -141,9 +146,14 @@ fs.copyFileSync(cursorSamplerOutputPath, cursorSamplerDistributablePath);
const guideHotkeyListenerDistributablePath = path.join(BIN_DIR, "guide-hotkey-listener.exe"); const guideHotkeyListenerDistributablePath = path.join(BIN_DIR, "guide-hotkey-listener.exe");
fs.copyFileSync(guideHotkeyListenerOutputPath, guideHotkeyListenerDistributablePath); fs.copyFileSync(guideHotkeyListenerOutputPath, guideHotkeyListenerDistributablePath);
const ocrServiceWrapperDistributablePath = path.join(BIN_DIR, "openscreen-ocr-service-wrapper.exe");
fs.copyFileSync(ocrServiceWrapperOutputPath, ocrServiceWrapperDistributablePath);
console.log(`Built ${outputPath}`); console.log(`Built ${outputPath}`);
console.log(`Copied ${distributablePath}`); console.log(`Copied ${distributablePath}`);
console.log(`Built ${cursorSamplerOutputPath}`); console.log(`Built ${cursorSamplerOutputPath}`);
console.log(`Copied ${cursorSamplerDistributablePath}`); console.log(`Copied ${cursorSamplerDistributablePath}`);
console.log(`Built ${guideHotkeyListenerOutputPath}`); console.log(`Built ${guideHotkeyListenerOutputPath}`);
console.log(`Copied ${guideHotkeyListenerDistributablePath}`); console.log(`Copied ${guideHotkeyListenerDistributablePath}`);
console.log(`Built ${ocrServiceWrapperOutputPath}`);
console.log(`Copied ${ocrServiceWrapperDistributablePath}`);
+2
View File
@@ -55,6 +55,7 @@ export interface GuideSnapshot {
timeMs: number; timeMs: number;
offsetMs: number; offsetMs: number;
path: string; path: string;
markedPath?: string;
width: number; width: number;
height: number; height: number;
} }
@@ -163,6 +164,7 @@ export interface WriteGuideSnapshotInput {
timeMs: number; timeMs: number;
offsetMs: number; offsetMs: number;
pngBytes: ArrayBuffer; pngBytes: ArrayBuffer;
markedPngBytes?: ArrayBuffer;
width: number; width: number;
height: number; height: number;
} }
+21 -7
View File
@@ -29,6 +29,7 @@ const session: GuideSession = {
timeMs: 1500, timeMs: 1500,
offsetMs: 500, offsetMs: 500,
path: "/tmp/recording-guide/step-001.png", path: "/tmp/recording-guide/step-001.png",
markedPath: "/tmp/recording-guide/step-001-marked.png",
width: 1280, width: 1280,
height: 720, height: 720,
}, },
@@ -71,7 +72,7 @@ describe("guide exporters", () => {
expect(markdown).toContain("# User guide"); expect(markdown).toContain("# User guide");
expect(markdown).toContain("## 1. Open Settings"); expect(markdown).toContain("## 1. Open Settings");
expect(markdown).toContain("](step-001.png)"); expect(markdown).toContain("](step-001-marked.png)");
}); });
it("exports escaped HTML", () => { it("exports escaped HTML", () => {
@@ -79,12 +80,11 @@ describe("guide exporters", () => {
expect(html).toContain("<!doctype html>"); expect(html).toContain("<!doctype html>");
expect(html).toContain("<h1>User guide</h1>"); expect(html).toContain("<h1>User guide</h1>");
expect(html).toContain('src="step-001.png"'); expect(html).toContain('src="step-001-marked.png"');
expect(html).toContain("click-marker"); expect(html).not.toContain("click-marker");
expect(html).toContain("left: 25.00%; top: 75.00%;");
}); });
it("draws click markers for hotkey events with coordinates", () => { it("uses marker snapshots for hotkey events with coordinates", () => {
const hotkeySession: GuideSession = { const hotkeySession: GuideSession = {
...session, ...session,
events: [ events: [
@@ -98,7 +98,21 @@ describe("guide exporters", () => {
const html = exportGuideToHtml(hotkeySession); const html = exportGuideToHtml(hotkeySession);
expect(html).toContain("click-marker"); expect(html).toContain('src="step-001-marked.png"');
expect(html).toContain("left: 25.00%; top: 75.00%;"); expect(html).not.toContain("click-marker");
});
it("falls back to the unmarked screenshot when no marker snapshot exists", () => {
const unmarkedSession: GuideSession = {
...session,
snapshots: session.snapshots.map((snapshot) => ({
...snapshot,
markedPath: undefined,
})),
};
const markdown = exportGuideToMarkdown(unmarkedSession);
expect(markdown).toContain("](step-001.png)");
}); });
}); });
+25 -51
View File
@@ -10,8 +10,9 @@ export function exportGuideToMarkdown(session: GuideSession): string {
for (const step of guide.steps) { for (const step of guide.steps) {
lines.push(`## ${step.order}. ${step.title}`, "", step.instruction, ""); lines.push(`## ${step.order}. ${step.title}`, "", step.instruction, "");
if (step.screenshotPath) { const screenshotPath = resolveStepScreenshotPath(step, session);
lines.push(`![${escapeMarkdownAlt(step.title)}](${path.basename(step.screenshotPath)})`, ""); if (screenshotPath) {
lines.push(`![${escapeMarkdownAlt(step.title)}](${path.basename(screenshotPath)})`, "");
} }
} }
@@ -36,10 +37,8 @@ export function exportGuideToHtml(session: GuideSession): string {
.step { border-top: 1px solid #e5e7eb; padding: 22px 0; } .step { border-top: 1px solid #e5e7eb; padding: 22px 0; }
.step h2 { font-size: 18px; margin: 0 0 8px; } .step h2 { font-size: 18px; margin: 0 0 8px; }
.step p { margin: 0 0 12px; } .step p { margin: 0 0 12px; }
.shot { display: inline-block; position: relative; max-width: 100%; margin: 0; } .shot { display: inline-block; max-width: 100%; margin: 0; }
img { display: block; max-width: 100%; border: 1px solid #e5e7eb; border-radius: 6px; } img { display: block; max-width: 100%; border: 1px solid #e5e7eb; border-radius: 6px; }
.click-marker { position: absolute; width: 26px; height: 26px; border: 3px solid #ef4444; border-radius: 999px; box-shadow: 0 0 0 4px rgba(239, 68, 68, 0.18), 0 2px 8px rgba(17, 24, 39, 0.28); transform: translate(-50%, -50%); pointer-events: none; }
.click-marker::after { content: ""; position: absolute; left: 50%; top: 50%; width: 6px; height: 6px; border-radius: 999px; background: #ef4444; transform: translate(-50%, -50%); }
</style> </style>
</head> </head>
<body> <body>
@@ -54,12 +53,9 @@ export function exportGuideToHtml(session: GuideSession): string {
} }
function renderStepHtml(step: GeneratedGuideStep, session: GuideSession): string { function renderStepHtml(step: GeneratedGuideStep, session: GuideSession): string {
const clickPoint = resolveStepClickPoint(step, session); const screenshotPath = resolveStepScreenshotPath(step, session);
const marker = clickPoint const image = screenshotPath
? `<span class="click-marker" style="left: ${formatPercent(clickPoint.x)}%; top: ${formatPercent(clickPoint.y)}%;" aria-label="Click position"></span>` ? `<figure class="shot"><img src="${escapeHtml(path.basename(screenshotPath))}" alt="${escapeHtml(step.title)}"></figure>`
: "";
const image = step.screenshotPath
? `<figure class="shot"><img src="${escapeHtml(path.basename(step.screenshotPath))}" alt="${escapeHtml(step.title)}">${marker}</figure>`
: ""; : "";
return `<section class="step"> return `<section class="step">
<h2>${step.order}. ${escapeHtml(step.title)}</h2> <h2>${step.order}. ${escapeHtml(step.title)}</h2>
@@ -88,54 +84,32 @@ function escapeHtml(value: string): string {
.replace(/'/g, "&#39;"); .replace(/'/g, "&#39;");
} }
function resolveStepClickPoint( function resolveStepScreenshotPath(
step: GeneratedGuideStep, step: GeneratedGuideStep,
session: GuideSession, session: GuideSession,
): { x: number; y: number } | null { ): string | undefined {
const snapshot = resolveStepSnapshot(step, session);
return snapshot?.markedPath ?? step.screenshotPath ?? snapshot?.path;
}
function resolveStepSnapshot(step: GeneratedGuideStep, session: GuideSession) {
const candidate = step.sourceCandidateId const candidate = step.sourceCandidateId
? session.candidates.find((item) => item.id === step.sourceCandidateId) ? session.candidates.find((item) => item.id === step.sourceCandidateId)
: undefined; : undefined;
const eventId = candidate?.eventId;
const event = eventId ? session.events.find((item) => item.id === eventId) : undefined;
if (!event || (event.kind !== "click" && event.kind !== "hotkey")) {
return null;
}
if (isNormalizedNumber(event.normalizedX) && isNormalizedNumber(event.normalizedY)) {
return { x: clamp01(event.normalizedX), y: clamp01(event.normalizedY) };
}
const screenshotFileName = step.screenshotPath ? path.basename(step.screenshotPath) : undefined; const screenshotFileName = step.screenshotPath ? path.basename(step.screenshotPath) : undefined;
const snapshot = return (
(candidate?.snapshotId (candidate?.snapshotId
? session.snapshots.find((item) => item.id === candidate.snapshotId) ? session.snapshots.find((item) => item.id === candidate.snapshotId)
: undefined) ?? : undefined) ??
(candidate?.eventId
? session.snapshots.find((item) => item.eventId === candidate.eventId)
: undefined) ??
(screenshotFileName (screenshotFileName
? session.snapshots.find((item) => path.basename(item.path) === screenshotFileName) ? session.snapshots.find(
: undefined); (item) =>
if ( path.basename(item.path) === screenshotFileName ||
!snapshot || (item.markedPath ? path.basename(item.markedPath) === screenshotFileName : false),
typeof event.x !== "number" || )
typeof event.y !== "number" || : undefined)
snapshot.width <= 0 || );
snapshot.height <= 0
) {
return null;
}
return {
x: clamp01(event.x / snapshot.width),
y: clamp01(event.y / snapshot.height),
};
}
function formatPercent(value: number): string {
return (clamp01(value) * 100).toFixed(2);
}
function isNormalizedNumber(value: unknown): value is number {
return typeof value === "number" && Number.isFinite(value) && value >= 0 && value <= 1;
}
function clamp01(value: number): number {
return Math.min(1, Math.max(0, value));
} }
@@ -35,18 +35,29 @@ export async function captureGuideSnapshots(
canvas.height = Math.max(1, Math.round(sourceHeight * scale)); canvas.height = Math.max(1, Math.round(sourceHeight * scale));
let latestSession = input.session; let latestSession = input.session;
const existingSnapshotsByEventId = new Set(
input.session.snapshots.map((snapshot) => snapshot.eventId),
);
for (const event of events) { for (const event of events) {
if (existingSnapshotsByEventId.has(event.id)) {
continue;
}
const offsetMs = event.screenshotOffsetMs ?? 500; const offsetMs = event.screenshotOffsetMs ?? 500;
const timeMs = getSnapshotTimeMs(event, offsetMs, video.duration); const timeMs = getSnapshotTimeMs(event, offsetMs, video.duration);
await seekVideo(video, timeMs / 1000); await seekVideo(video, timeMs / 1000);
context.drawImage(video, 0, 0, canvas.width, canvas.height); context.drawImage(video, 0, 0, canvas.width, canvas.height);
const pngBytes = await canvasToPngBytes(canvas); const pngBytes = await canvasToPngBytes(canvas);
const markerPoint = getSnapshotMarkerPoint(event, canvas.width, canvas.height);
const markedPngBytes = markerPoint
? await canvasToMarkedPngBytes(canvas, markerPoint)
: undefined;
const result = await window.electronAPI.guide.writeSnapshot({ const result = await window.electronAPI.guide.writeSnapshot({
recordingId: input.session.recordingId, recordingId: input.session.recordingId,
eventId: event.id, eventId: event.id,
timeMs, timeMs,
offsetMs, offsetMs,
pngBytes, pngBytes,
markedPngBytes,
width: canvas.width, width: canvas.width,
height: canvas.height, height: canvas.height,
}); });
@@ -143,3 +154,85 @@ function canvasToPngBytes(canvas: HTMLCanvasElement): Promise<ArrayBuffer> {
}, "image/png"); }, "image/png");
}); });
} }
async function canvasToMarkedPngBytes(
canvas: HTMLCanvasElement,
point: { x: number; y: number },
): Promise<ArrayBuffer> {
const markedCanvas = document.createElement("canvas");
markedCanvas.width = canvas.width;
markedCanvas.height = canvas.height;
const markedContext = markedCanvas.getContext("2d");
if (!markedContext) {
throw new Error("Canvas 2D context is unavailable.");
}
markedContext.drawImage(canvas, 0, 0);
drawSnapshotMarker(markedContext, markedCanvas, point);
return await canvasToPngBytes(markedCanvas);
}
function drawSnapshotMarker(
context: CanvasRenderingContext2D,
canvas: HTMLCanvasElement,
point: { x: number; y: number },
) {
const shortSide = Math.max(1, Math.min(canvas.width, canvas.height));
const haloRadius = clampNumber(Math.round(shortSide * 0.012), 8, 14);
const dotRadius = clampNumber(Math.round(shortSide * 0.0045), 3, 6);
const lineWidth = Math.max(1, Math.round(shortSide * 0.0015));
context.beginPath();
context.arc(point.x, point.y, haloRadius, 0, Math.PI * 2);
context.fillStyle = "rgba(250, 204, 21, 0.34)";
context.fill();
context.lineWidth = lineWidth;
context.strokeStyle = "rgba(239, 68, 68, 0.72)";
context.stroke();
context.beginPath();
context.arc(point.x, point.y, dotRadius, 0, Math.PI * 2);
context.fillStyle = "rgba(220, 38, 38, 0.92)";
context.fill();
}
function getSnapshotMarkerPoint(
event: GuideEvent,
width: number,
height: number,
): { x: number; y: number } | null {
if (event.kind !== "click" && event.kind !== "hotkey") {
return null;
}
if (isNormalizedNumber(event.normalizedX) && isNormalizedNumber(event.normalizedY)) {
return {
x: clampNumber(event.normalizedX * width, 0, width),
y: clampNumber(event.normalizedY * height, 0, height),
};
}
if (isNormalizedNumber(event.x) && isNormalizedNumber(event.y)) {
return {
x: clampNumber(event.x * width, 0, width),
y: clampNumber(event.y * height, 0, height),
};
}
if (
typeof event.x === "number" &&
typeof event.y === "number" &&
Number.isFinite(event.x) &&
Number.isFinite(event.y)
) {
return {
x: clampNumber(event.x, 0, width),
y: clampNumber(event.y, 0, height),
};
}
return null;
}
function isNormalizedNumber(value: unknown): value is number {
return typeof value === "number" && Number.isFinite(value) && value >= 0 && value <= 1;
}
function clampNumber(value: number, min = 0, max = Number.POSITIVE_INFINITY): number {
return Math.min(max, Math.max(min, value));
}
+26 -1
View File
@@ -7,7 +7,7 @@ import sys
import tempfile import tempfile
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from threading import Lock from threading import Lock, Thread
from typing import Any from typing import Any
from fastapi import FastAPI, HTTPException from fastapi import FastAPI, HTTPException
@@ -18,6 +18,8 @@ app = FastAPI(title="OpenScreen PaddleOCR service")
_engines: dict[str, Any] = {} _engines: dict[str, Any] = {}
_engine_lock = Lock() _engine_lock = Lock()
_warmup_lock = Lock()
_warmup_started = False
_LATIN_RECOGNITION_LANGS = { _LATIN_RECOGNITION_LANGS = {
"af", "af",
"az", "az",
@@ -87,6 +89,20 @@ class OcrRequest(BaseModel):
profile: str | None = None profile: str | None = None
@app.on_event("startup")
def start_ocr_warmup() -> None:
if os.getenv("OPENSCREEN_OCR_WARMUP", "0") != "1":
return
global _warmup_started
with _warmup_lock:
if _warmup_started:
return
_warmup_started = True
Thread(target=_warmup_default_engines, name="openscreen-ocr-warmup", daemon=True).start()
@app.get("/health") @app.get("/health")
def health() -> dict[str, Any]: def health() -> dict[str, Any]:
return { return {
@@ -100,6 +116,15 @@ def health() -> dict[str, Any]:
} }
def _warmup_default_engines() -> None:
try:
profile = _resolve_ocr_profile(None)
for paddle_lang in _resolve_paddle_languages(None, profile):
_get_engine(paddle_lang)
except Exception as error:
print(f"OpenScreen OCR warmup failed: {error}", file=sys.stderr, flush=True)
@app.post("/ocr") @app.post("/ocr")
async def ocr(request: OcrRequest) -> dict[str, Any]: async def ocr(request: OcrRequest) -> dict[str, Any]:
image_path, should_delete = _resolve_image_path(request) image_path, should_delete = _resolve_image_path(request)