diff --git a/build/installer.nsh b/build/installer.nsh new file mode 100644 index 0000000..4840e53 --- /dev/null +++ b/build/installer.nsh @@ -0,0 +1,17 @@ +!macro customInstall + DetailPrint "Installing OpenScreen OCR Windows service" + nsExec::ExecToLog '"$SYSDIR\sc.exe" stop OpenScreenOCR' + nsExec::ExecToLog '"$SYSDIR\sc.exe" delete OpenScreenOCR' + Sleep 1000 + ExpandEnvStrings $0 "%ProgramData%\OpenScreen\ocr-runtime" + CreateDirectory "$0" + nsExec::ExecToLog '"$SYSDIR\sc.exe" create OpenScreenOCR binPath= "\"$INSTDIR\resources\electron\native\bin\win32-x64\openscreen-ocr-service-wrapper.exe\" --service --exe \"$INSTDIR\resources\ocr-service\openscreen-ocr-service.exe\" --resources \"$INSTDIR\resources\" --data \"$0\"" start= auto DisplayName= "OpenScreen OCR Service"' + nsExec::ExecToLog '"$SYSDIR\sc.exe" description OpenScreenOCR "Local OCR service used by OpenScreen guide capture."' + nsExec::ExecToLog '"$SYSDIR\sc.exe" start OpenScreenOCR' +!macroend + +!macro customUnInstall + DetailPrint "Removing OpenScreen OCR Windows service" + nsExec::ExecToLog '"$SYSDIR\sc.exe" stop OpenScreenOCR' + nsExec::ExecToLog '"$SYSDIR\sc.exe" delete OpenScreenOCR' +!macroend diff --git a/electron-builder.json5 b/electron-builder.json5 index c58ba88..df38f5a 100644 --- a/electron-builder.json5 +++ b/electron-builder.json5 @@ -79,6 +79,7 @@ "nsis" ], "icon": "icons/icons/win/icon.ico", + "requestedExecutionLevel": "requireAdministrator", "signAndEditExecutable": false, "signExts": ["!.exe"], "extraResources": [ @@ -99,8 +100,10 @@ } ] }, - "nsis": { - "oneClick": false, - "allowToChangeInstallationDirectory": true - } -} + "nsis": { + "oneClick": false, + "allowToChangeInstallationDirectory": true, + "perMachine": true, + "include": "build/installer.nsh" + } +} diff --git a/electron/guide/guideStore.test.ts b/electron/guide/guideStore.test.ts index 9fe10d9..a652117 100644 --- a/electron/guide/guideStore.test.ts +++ b/electron/guide/guideStore.test.ts @@ -168,6 +168,7 @@ describe("GuideStore", () => { width: 800, height: 600, pngBytes: new Uint8Array([137, 80, 78, 71]).buffer, + markedPngBytes: new Uint8Array([137, 80, 78, 71, 1]).buffer, }); expect(session.status).toBe("snapshots-ready"); @@ -176,6 +177,9 @@ describe("GuideStore", () => { await expect(fs.readFile(session.snapshots[0]?.path ?? "")).resolves.toEqual( Buffer.from([137, 80, 78, 71]), ); + await expect(fs.readFile(session.snapshots[0]?.markedPath ?? "")).resolves.toEqual( + Buffer.from([137, 80, 78, 71, 1]), + ); }); it("runs OCR, generates a local draft, and exports files", async () => { diff --git a/electron/guide/guideStore.ts b/electron/guide/guideStore.ts index 57f9709..b41de17 100644 --- a/electron/guide/guideStore.ts +++ b/electron/guide/guideStore.ts @@ -213,10 +213,19 @@ export class GuideStore { this.assertGuidePathIsAllowed(session.outputDir); await fs.mkdir(session.outputDir, { recursive: true }); - const fileName = `step-${String(eventIndex + 1).padStart(3, "0")}.png`; + const fileBaseName = `step-${String(eventIndex + 1).padStart(3, "0")}`; + const fileName = `${fileBaseName}.png`; const snapshotPath = path.join(session.outputDir, fileName); + const markedSnapshotPath = path.join(session.outputDir, `${fileBaseName}-marked.png`); this.assertGuidePathIsAllowed(snapshotPath); + this.assertGuidePathIsAllowed(markedSnapshotPath); await fs.writeFile(snapshotPath, Buffer.from(new Uint8Array(input.pngBytes))); + const hasMarkedSnapshot = Boolean(input.markedPngBytes?.byteLength); + if (hasMarkedSnapshot && input.markedPngBytes) { + await fs.writeFile(markedSnapshotPath, Buffer.from(new Uint8Array(input.markedPngBytes))); + } else { + await fs.unlink(markedSnapshotPath).catch(() => undefined); + } const snapshot: GuideSnapshot = { id: `snapshot-${input.eventId}`, @@ -224,6 +233,7 @@ export class GuideStore { timeMs: Math.max(0, input.timeMs), offsetMs: input.offsetMs, path: snapshotPath, + markedPath: hasMarkedSnapshot ? markedSnapshotPath : undefined, width: Math.round(input.width), height: Math.round(input.height), }; @@ -668,6 +678,7 @@ function normalizeGuideSnapshot(input: unknown): GuideSnapshot | null { const id = normalizeString(input.id); const eventId = normalizeString(input.eventId); const pathValue = normalizeString(input.path); + const markedPath = normalizeOptionalString(input.markedPath); const timeMs = normalizeNonNegativeNumber(input.timeMs); const offsetMs = normalizeOptionalNumber(input.offsetMs); const width = normalizePositiveInteger(input.width); @@ -683,7 +694,7 @@ function normalizeGuideSnapshot(input: unknown): GuideSnapshot | null { ) { return null; } - return { id, eventId, timeMs, offsetMs, path: pathValue, width, height }; + return { id, eventId, timeMs, offsetMs, path: pathValue, markedPath, width, height }; } function normalizeOcrBlock(input: unknown): OcrBlock | null { diff --git a/electron/guide/ocr/bundledOcrService.ts b/electron/guide/ocr/bundledOcrService.ts index d278b81..207592f 100644 --- a/electron/guide/ocr/bundledOcrService.ts +++ b/electron/guide/ocr/bundledOcrService.ts @@ -1,14 +1,17 @@ -import { type ChildProcessWithoutNullStreams, spawn } from "node:child_process"; +import { type ChildProcessWithoutNullStreams, execFile, spawn } from "node:child_process"; import fs from "node:fs/promises"; import path from "node:path"; +import { promisify } from "node:util"; import { app } from "electron"; const DEFAULT_OCR_BASE_URL = "http://127.0.0.1:8866"; const DEFAULT_OCR_PORT = "8866"; +const WINDOWS_SERVICE_NAME = "OpenScreenOCR"; const SERVICE_EXE_NAME = "openscreen-ocr-service.exe"; const HEALTH_TIMEOUT_MS = 1000; const STARTUP_TIMEOUT_MS = 90000; const PADDLEX_MODEL_NAMES = ["PP-OCRv5_mobile_det", "latin_PP-OCRv5_mobile_rec"]; +const execFileAsync = promisify(execFile); let ocrProcess: ChildProcessWithoutNullStreams | null = null; let startupPromise: Promise | null = null; @@ -24,6 +27,11 @@ export async function ensureBundledOcrServiceRunning( return; } + if (process.platform === "win32" && (await startInstalledWindowsOcrService())) { + await waitForOcrServiceHealth(baseUrl, STARTUP_TIMEOUT_MS); + return; + } + const executablePath = await findBundledOcrServiceExecutable(); if (!executablePath) { return; @@ -51,6 +59,39 @@ function shouldManageOcrService(baseUrl: string): boolean { } } +async function startInstalledWindowsOcrService(): Promise { + const query = await runSc(["query", WINDOWS_SERVICE_NAME]); + if (!query.success) { + return false; + } + if (/\bRUNNING\b/i.test(query.output)) { + return true; + } + + const start = await runSc(["start", WINDOWS_SERVICE_NAME]); + return start.success || /\b1056\b/.test(start.output) || /already running/i.test(start.output); +} + +async function runSc(args: string[]): Promise<{ success: boolean; output: string }> { + try { + const result = await execFileAsync("sc.exe", args, { + windowsHide: true, + timeout: 10000, + maxBuffer: 512 * 1024, + }); + return { + success: true, + output: `${result.stdout ?? ""}\n${result.stderr ?? ""}`, + }; + } catch (error) { + const failed = error as { stdout?: string; stderr?: string }; + return { + success: false, + output: `${failed.stdout ?? ""}\n${failed.stderr ?? ""}`, + }; + } +} + async function findBundledOcrServiceExecutable(): Promise { const candidates = [ process.env.OPENSCREEN_GUIDE_OCR_EXE, @@ -160,6 +201,7 @@ function startOcrServiceProcess( PADDLEOCR_USE_MOBILE: process.env.PADDLEOCR_USE_MOBILE ?? "1", OPENSCREEN_OCR_PROFILE: process.env.OPENSCREEN_OCR_PROFILE ?? process.env.OPENSCREEN_GUIDE_OCR_PROFILE ?? "", + OPENSCREEN_OCR_WARMUP: process.env.OPENSCREEN_OCR_WARMUP ?? "1", PADDLE_PDX_ENABLE_MKLDNN_BYDEFAULT: process.env.PADDLE_PDX_ENABLE_MKLDNN_BYDEFAULT ?? "False", PADDLE_PDX_CACHE_HOME: process.env.PADDLE_PDX_CACHE_HOME ?? runtimePaths.paddlexCachePath, PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK: diff --git a/electron/ipc/handlers.ts b/electron/ipc/handlers.ts index 1eb88f8..3b0abac 100644 --- a/electron/ipc/handlers.ts +++ b/electron/ipc/handlers.ts @@ -1,10 +1,11 @@ -import { type ChildProcessWithoutNullStreams, spawn } from "node:child_process"; +import { type ChildProcessWithoutNullStreams, execFile, spawn } from "node:child_process"; import { EventEmitter } from "node:events"; import { constants as fsConstants } from "node:fs"; import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { fileURLToPath, pathToFileURL } from "node:url"; +import { promisify } from "node:util"; import type { DesktopCapturerSource, Rectangle } from "electron"; import { app, @@ -17,7 +18,7 @@ import { shell, systemPreferences, } from "electron"; -import type { GuideMarkerCapturedPayload } from "../../src/guide/contracts"; +import type { GuideEvent, GuideMarkerCapturedPayload } from "../../src/guide/contracts"; import type { NativeMacRecordingRequest } from "../../src/lib/nativeMacRecording"; import type { NativeWindowsRecordingRequest } from "../../src/lib/nativeWindowsRecording"; import { @@ -56,6 +57,7 @@ const RECORDING_SESSION_SUFFIX = ".session.json"; const ALLOWED_IMPORT_VIDEO_EXTENSIONS = new Set([".webm", ".mp4", ".mov", ".avi", ".mkv"]); const PREVIEW_AUDIO_DIR = path.join(app.getPath("userData"), "preview-audio"); const nativeMacCaptureEvents = new EventEmitter(); +const execFileAsync = promisify(execFile); /** * Paths explicitly approved by the user via file picker dialogs or project loads. @@ -454,6 +456,7 @@ let activeGuideHotkeyRecording: GuideHotkeyRecordingState | null = null; let activeGuideHotkeySessionId: number | null = null; let guideMarkerHotkeyRegistered = false; let lastGuideHotkeyCaptureAtMs = 0; +const guideHotkeyBackgroundJobs = new Map>(); const GUIDE_HOTKEY_CAPTURE_DEBOUNCE_MS = 250; function normalizeCursorSample(sample: unknown): CursorRecordingSample | null { @@ -808,6 +811,203 @@ function clampGuideHotkey01(value: number): number { return Math.min(1, Math.max(0, value)); } +async function captureGuideHotkeySnapshotAndRunOcr( + guideStore: GuideStore, + event: GuideEvent, + boundsInput: GuideHotkeyBounds, + point: { normalizedX: number; normalizedY: number }, +) { + try { + const bounds = sanitizeGuideHotkeyBounds(boundsInput); + const sources = await desktopCapturer.getSources({ + types: ["screen"], + thumbnailSize: { + width: Math.max(1, Math.round(bounds.width)), + height: Math.max(1, Math.round(bounds.height)), + }, + }); + const source = findScreenSourceForGuideBounds(sources, bounds); + if (!source || source.thumbnail.isEmpty()) { + console.warn("[guide-hotkey] no live screen thumbnail was available for OCR"); + return; + } + + const pngBuffer = source.thumbnail.toPNG(); + const imageSize = source.thumbnail.getSize(); + const markedPngBuffer = await createMarkedGuideSnapshotPng(pngBuffer, { + width: imageSize.width, + height: imageSize.height, + x: point.normalizedX * imageSize.width, + y: point.normalizedY * imageSize.height, + }).catch((error) => { + console.warn("[guide-hotkey] failed to create marked live snapshot:", error); + return undefined; + }); + + enqueueGuideHotkeyBackgroundJob(event.recordingId, async () => { + const session = await guideStore.writeSnapshot({ + recordingId: event.recordingId, + eventId: event.id, + timeMs: event.timeMs, + offsetMs: 0, + pngBytes: bufferToArrayBuffer(pngBuffer), + markedPngBytes: markedPngBuffer ? bufferToArrayBuffer(markedPngBuffer) : undefined, + width: imageSize.width, + height: imageSize.height, + }); + const snapshot = session.snapshots.find((item) => item.eventId === event.id); + if (!snapshot) { + return; + } + + await guideStore.runOcr({ + recordingId: event.recordingId, + snapshotIds: [snapshot.id], + }); + console.info("[guide-hotkey] live snapshot OCR completed", { + recordingId: event.recordingId, + eventId: event.id, + snapshotId: snapshot.id, + }); + }); + } catch (error) { + console.warn("[guide-hotkey] live snapshot OCR failed:", error); + } +} + +function enqueueGuideHotkeyBackgroundJob(recordingId: string, job: () => Promise) { + const previousJob = + guideHotkeyBackgroundJobs.get(recordingId)?.catch(() => undefined) ?? Promise.resolve(); + const nextJob = previousJob + .then(job) + .catch((error) => { + console.warn("[guide-hotkey] background OCR job failed:", error); + }) + .finally(() => { + if (guideHotkeyBackgroundJobs.get(recordingId) === nextJob) { + guideHotkeyBackgroundJobs.delete(recordingId); + } + }); + guideHotkeyBackgroundJobs.set(recordingId, nextJob); +} + +function findScreenSourceForGuideBounds( + sources: DesktopCapturerSource[], + bounds: GuideHotkeyBounds, +): DesktopCapturerSource | undefined { + const displays = screen.getAllDisplays(); + const displayIndex = displays.findIndex((display) => + rectMatchesGuideBounds(display.bounds, bounds), + ); + const display = displayIndex >= 0 ? displays[displayIndex] : undefined; + if (display) { + const byDisplayId = sources.find((source) => Number(source.display_id) === display.id); + if (byDisplayId) { + return byDisplayId; + } + const bySourceIndex = sources.find( + (source) => parseDesktopCapturerScreenIndex(source.id) === displayIndex, + ); + if (bySourceIndex) { + return bySourceIndex; + } + } + return sources.find((source) => source.id.startsWith("screen:")) ?? sources[0]; +} + +function rectMatchesGuideBounds(rect: Rectangle, bounds: GuideHotkeyBounds): boolean { + return ( + Math.round(rect.x) === Math.round(bounds.x) && + Math.round(rect.y) === Math.round(bounds.y) && + Math.round(rect.width) === Math.round(bounds.width) && + Math.round(rect.height) === Math.round(bounds.height) + ); +} + +async function createMarkedGuideSnapshotPng( + pngBuffer: Buffer, + marker: { width: number; height: number; x: number; y: number }, +): Promise { + const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openscreen-guide-marker-")); + const sourcePath = path.join(tempDir, "source.png"); + const outputPath = path.join(tempDir, "marked.png"); + try { + await fs.writeFile(sourcePath, pngBuffer); + await execFileAsync( + "powershell.exe", + [ + "-NoProfile", + "-ExecutionPolicy", + "Bypass", + "-EncodedCommand", + buildMarkerScript(sourcePath, outputPath, marker), + ], + { + timeout: 30000, + windowsHide: true, + maxBuffer: 1024 * 1024, + }, + ); + return await fs.readFile(outputPath); + } finally { + await fs.rm(tempDir, { recursive: true, force: true }).catch(() => undefined); + } +} + +function buildMarkerScript( + sourcePath: string, + outputPath: string, + marker: { width: number; height: number; x: number; y: number }, +): string { + const sourcePathBase64 = Buffer.from(sourcePath, "utf8").toString("base64"); + const outputPathBase64 = Buffer.from(outputPath, "utf8").toString("base64"); + const script = ` +$ErrorActionPreference = "Stop" +$sourcePath = [System.Text.Encoding]::UTF8.GetString([Convert]::FromBase64String("${sourcePathBase64}")) +$outputPath = [System.Text.Encoding]::UTF8.GetString([Convert]::FromBase64String("${outputPathBase64}")) +Add-Type -AssemblyName System.Drawing + +$source = [System.Drawing.Image]::FromFile($sourcePath) +$bitmap = [System.Drawing.Bitmap]::new($source.Width, $source.Height) +$graphics = [System.Drawing.Graphics]::FromImage($bitmap) +try { + $graphics.SmoothingMode = [System.Drawing.Drawing2D.SmoothingMode]::AntiAlias + $graphics.DrawImage($source, 0, 0, $source.Width, $source.Height) + $shortSide = [Math]::Max(1, [Math]::Min($source.Width, $source.Height)) + $haloRadius = [Math]::Min(14, [Math]::Max(8, [Math]::Round($shortSide * 0.012))) + $dotRadius = [Math]::Min(6, [Math]::Max(3, [Math]::Round($shortSide * 0.0045))) + $lineWidth = [Math]::Max(1, [Math]::Round($shortSide * 0.0015)) + $x = [Math]::Min($source.Width, [Math]::Max(0, ${marker.x.toFixed(4)})) + $y = [Math]::Min($source.Height, [Math]::Max(0, ${marker.y.toFixed(4)})) + $haloBrush = [System.Drawing.SolidBrush]::new([System.Drawing.Color]::FromArgb(87, 250, 204, 21)) + $ringPen = [System.Drawing.Pen]::new([System.Drawing.Color]::FromArgb(184, 239, 68, 68), $lineWidth) + $dotBrush = [System.Drawing.SolidBrush]::new([System.Drawing.Color]::FromArgb(235, 220, 38, 38)) + try { + $graphics.FillEllipse($haloBrush, $x - $haloRadius, $y - $haloRadius, $haloRadius * 2, $haloRadius * 2) + $graphics.DrawEllipse($ringPen, $x - $haloRadius, $y - $haloRadius, $haloRadius * 2, $haloRadius * 2) + $graphics.FillEllipse($dotBrush, $x - $dotRadius, $y - $dotRadius, $dotRadius * 2, $dotRadius * 2) + } finally { + $haloBrush.Dispose() + $ringPen.Dispose() + $dotBrush.Dispose() + } + $bitmap.Save($outputPath, [System.Drawing.Imaging.ImageFormat]::Png) +} finally { + $graphics.Dispose() + $bitmap.Dispose() + $source.Dispose() +} +`; + return Buffer.from(script, "utf16le").toString("base64"); +} + +function bufferToArrayBuffer(buffer: Buffer): ArrayBuffer { + return buffer.buffer.slice( + buffer.byteOffset, + buffer.byteOffset + buffer.byteLength, + ) as ArrayBuffer; +} + async function captureGuideHotkeyMarker( guideStore: GuideStore, trigger: GuideMarkerTrigger = "global-shortcut", @@ -854,6 +1054,7 @@ async function captureGuideHotkeyMarker( rawY: point.rawY, bounds: point.bounds, }); + void captureGuideHotkeySnapshotAndRunOcr(guideStore, result.event, recording.bounds, point); return { captured: true, ...result }; } catch (error) { const message = error instanceof Error ? error.message : String(error); diff --git a/electron/native/wgc-capture/CMakeLists.txt b/electron/native/wgc-capture/CMakeLists.txt index 68b97a2..19820a0 100644 --- a/electron/native/wgc-capture/CMakeLists.txt +++ b/electron/native/wgc-capture/CMakeLists.txt @@ -81,3 +81,21 @@ target_compile_options(guide-hotkey-listener PRIVATE /EHsc /W4 /utf-8) target_link_libraries(guide-hotkey-listener PRIVATE user32 ) + +add_executable(openscreen-ocr-service-wrapper + src/ocr-service-wrapper.cpp +) + +target_compile_definitions(openscreen-ocr-service-wrapper PRIVATE + NOMINMAX + WIN32_LEAN_AND_MEAN + UNICODE + _UNICODE + _WIN32_WINNT=0x0A00 +) + +target_compile_options(openscreen-ocr-service-wrapper PRIVATE /EHsc /W4 /utf-8) + +target_link_libraries(openscreen-ocr-service-wrapper PRIVATE + advapi32 +) diff --git a/electron/native/wgc-capture/src/ocr-service-wrapper.cpp b/electron/native/wgc-capture/src/ocr-service-wrapper.cpp new file mode 100644 index 0000000..1b58c0e --- /dev/null +++ b/electron/native/wgc-capture/src/ocr-service-wrapper.cpp @@ -0,0 +1,263 @@ +#include + +#include +#include +#include +#include + +namespace { + +constexpr const wchar_t* SERVICE_NAME = L"OpenScreenOCR"; + +struct ServiceConfig { + std::wstring exePath; + std::wstring resourcesPath; + std::wstring dataPath; +}; + +SERVICE_STATUS_HANDLE g_statusHandle = nullptr; +SERVICE_STATUS g_status{}; +HANDLE g_stopEvent = nullptr; +PROCESS_INFORMATION g_childProcess{}; +ServiceConfig g_config; + +std::wstring quoteArg(const std::wstring& value) { + std::wstring result = L"\""; + for (wchar_t ch : value) { + if (ch == L'"') { + result += L"\\\""; + } else { + result.push_back(ch); + } + } + result += L"\""; + return result; +} + +std::wstring directoryName(const std::wstring& path) { + const size_t slash = path.find_last_of(L"\\/"); + return slash == std::wstring::npos ? L"." : path.substr(0, slash); +} + +void createDirectoryRecursive(const std::wstring& path) { + if (path.empty()) { + return; + } + + std::wstring current; + for (size_t i = 0; i < path.size(); ++i) { + current.push_back(path[i]); + if (path[i] != L'\\' && path[i] != L'/') { + continue; + } + if (current.size() > 3) { + CreateDirectoryW(current.c_str(), nullptr); + } + } + CreateDirectoryW(path.c_str(), nullptr); +} + +void setEnv(const wchar_t* name, const std::wstring& value) { + SetEnvironmentVariableW(name, value.empty() ? nullptr : value.c_str()); +} + +void setServiceStatus(DWORD state, DWORD win32ExitCode = NO_ERROR, DWORD waitHint = 0) { + if (!g_statusHandle) { + return; + } + + g_status.dwServiceType = SERVICE_WIN32_OWN_PROCESS; + g_status.dwCurrentState = state; + g_status.dwWin32ExitCode = win32ExitCode; + g_status.dwWaitHint = waitHint; + g_status.dwControlsAccepted = + state == SERVICE_RUNNING ? SERVICE_ACCEPT_STOP | SERVICE_ACCEPT_SHUTDOWN : 0; + static DWORD checkpoint = 1; + g_status.dwCheckPoint = + state == SERVICE_START_PENDING || state == SERVICE_STOP_PENDING ? checkpoint++ : 0; + SetServiceStatus(g_statusHandle, &g_status); +} + +HANDLE openServiceLog(const std::wstring& dataPath) { + const std::wstring logDir = dataPath + L"\\logs"; + createDirectoryRecursive(logDir); + const std::wstring logPath = logDir + L"\\ocr-service.log"; + SECURITY_ATTRIBUTES securityAttributes{}; + securityAttributes.nLength = sizeof(securityAttributes); + securityAttributes.bInheritHandle = TRUE; + HANDLE file = CreateFileW( + logPath.c_str(), + FILE_APPEND_DATA, + FILE_SHARE_READ | FILE_SHARE_WRITE, + &securityAttributes, + OPEN_ALWAYS, + FILE_ATTRIBUTE_NORMAL, + nullptr); + if (file != INVALID_HANDLE_VALUE) { + SetFilePointer(file, 0, nullptr, FILE_END); + } + return file; +} + +bool startOcrProcess(const ServiceConfig& config) { + if (config.exePath.empty()) { + return false; + } + + const std::wstring dataPath = config.dataPath.empty() + ? directoryName(config.exePath) + L"\\ocr-runtime" + : config.dataPath; + const std::wstring resourcesPath = config.resourcesPath.empty() + ? directoryName(directoryName(config.exePath)) + : config.resourcesPath; + const std::wstring modelCachePath = dataPath + L"\\ocr-models"; + const std::wstring paddlexCachePath = resourcesPath + L"\\ocr-models\\paddlex"; + + createDirectoryRecursive(dataPath); + createDirectoryRecursive(modelCachePath); + + setEnv(L"OPENSCREEN_OCR_HOST", L"127.0.0.1"); + setEnv(L"OPENSCREEN_OCR_PORT", L"8866"); + setEnv(L"PADDLEOCR_DEVICE", L"cpu"); + setEnv(L"PADDLEOCR_ENABLE_MKLDNN", L"0"); + setEnv(L"PADDLEOCR_LANG", L""); + setEnv(L"PADDLEOCR_USE_MOBILE", L"1"); + setEnv(L"OPENSCREEN_OCR_PROFILE", L"vietnamese"); + setEnv(L"OPENSCREEN_OCR_WARMUP", L"1"); + setEnv(L"PADDLE_PDX_ENABLE_MKLDNN_BYDEFAULT", L"False"); + setEnv(L"PADDLE_PDX_CACHE_HOME", paddlexCachePath); + setEnv(L"PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", L"True"); + setEnv(L"PADDLE_HOME", modelCachePath + L"\\paddle"); + setEnv(L"PADDLEOCR_HOME", modelCachePath + L"\\paddleocr"); + setEnv(L"PYTHONUTF8", L"1"); + + STARTUPINFOW startupInfo{}; + startupInfo.cb = sizeof(startupInfo); + HANDLE logFile = openServiceLog(dataPath); + if (logFile != INVALID_HANDLE_VALUE) { + startupInfo.dwFlags |= STARTF_USESTDHANDLES; + startupInfo.hStdOutput = logFile; + startupInfo.hStdError = logFile; + startupInfo.hStdInput = GetStdHandle(STD_INPUT_HANDLE); + } + + std::wstring commandLine = quoteArg(config.exePath); + const std::wstring cwd = directoryName(config.exePath); + ZeroMemory(&g_childProcess, sizeof(g_childProcess)); + const BOOL created = CreateProcessW( + config.exePath.c_str(), + commandLine.data(), + nullptr, + nullptr, + TRUE, + CREATE_NO_WINDOW, + nullptr, + cwd.c_str(), + &startupInfo, + &g_childProcess); + + if (logFile != INVALID_HANDLE_VALUE) { + CloseHandle(logFile); + } + return created == TRUE; +} + +void stopOcrProcess() { + if (g_childProcess.hProcess) { + TerminateProcess(g_childProcess.hProcess, 0); + WaitForSingleObject(g_childProcess.hProcess, 10000); + CloseHandle(g_childProcess.hProcess); + g_childProcess.hProcess = nullptr; + } + if (g_childProcess.hThread) { + CloseHandle(g_childProcess.hThread); + g_childProcess.hThread = nullptr; + } +} + +DWORD WINAPI serviceControlHandler(DWORD control, DWORD, LPVOID, LPVOID) { + if (control == SERVICE_CONTROL_STOP || control == SERVICE_CONTROL_SHUTDOWN) { + setServiceStatus(SERVICE_STOP_PENDING, NO_ERROR, 10000); + if (g_stopEvent) { + SetEvent(g_stopEvent); + } + stopOcrProcess(); + return NO_ERROR; + } + return NO_ERROR; +} + +void WINAPI serviceMain(DWORD, LPWSTR*) { + g_statusHandle = RegisterServiceCtrlHandlerExW(SERVICE_NAME, serviceControlHandler, nullptr); + if (!g_statusHandle) { + return; + } + + setServiceStatus(SERVICE_START_PENDING, NO_ERROR, 30000); + g_stopEvent = CreateEventW(nullptr, TRUE, FALSE, nullptr); + if (!g_stopEvent || !startOcrProcess(g_config)) { + setServiceStatus(SERVICE_STOPPED, ERROR_SERVICE_SPECIFIC_ERROR); + return; + } + + setServiceStatus(SERVICE_RUNNING); + HANDLE waitHandles[] = {g_stopEvent, g_childProcess.hProcess}; + WaitForMultipleObjects(2, waitHandles, FALSE, INFINITE); + stopOcrProcess(); + if (g_stopEvent) { + CloseHandle(g_stopEvent); + g_stopEvent = nullptr; + } + setServiceStatus(SERVICE_STOPPED); +} + +ServiceConfig parseConfig(int argc, wchar_t* argv[]) { + ServiceConfig config; + for (int i = 1; i < argc; ++i) { + const std::wstring arg = argv[i]; + auto readNext = [&](std::wstring& target) { + if (i + 1 < argc) { + target = argv[++i]; + } + }; + if (arg == L"--exe") { + readNext(config.exePath); + } else if (arg == L"--resources") { + readNext(config.resourcesPath); + } else if (arg == L"--data") { + readNext(config.dataPath); + } + } + return config; +} + +bool hasServiceFlag(int argc, wchar_t* argv[]) { + for (int i = 1; i < argc; ++i) { + if (std::wstring(argv[i]) == L"--service") { + return true; + } + } + return false; +} + +} // namespace + +int wmain(int argc, wchar_t* argv[]) { + g_config = parseConfig(argc, argv); + + if (hasServiceFlag(argc, argv)) { + SERVICE_TABLE_ENTRYW serviceTable[] = { + {const_cast(SERVICE_NAME), serviceMain}, + {nullptr, nullptr}, + }; + return StartServiceCtrlDispatcherW(serviceTable) ? 0 : 1; + } + + if (!startOcrProcess(g_config)) { + std::wcerr << L"Failed to start OCR service process." << std::endl; + return 1; + } + WaitForSingleObject(g_childProcess.hProcess, INFINITE); + stopOcrProcess(); + return 0; +} diff --git a/package-lock.json b/package-lock.json index 421d538..531c010 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "openscreen", - "version": "1.4.6", + "version": "1.4.8", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "openscreen", - "version": "1.4.6", + "version": "1.4.8", "dependencies": { "@fix-webm-duration/fix": "^1.0.1", "@pixi/filter-drop-shadow": "^5.2.0", diff --git a/package.json b/package.json index b656c32..8c3229d 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "openscreen", "private": true, - "version": "1.4.6", + "version": "1.4.8", "type": "module", "packageManager": "npm@10.9.4", "engines": { diff --git a/scripts/build-windows-wgc-helper.mjs b/scripts/build-windows-wgc-helper.mjs index 5b378fc..9eb50d1 100644 --- a/scripts/build-windows-wgc-helper.mjs +++ b/scripts/build-windows-wgc-helper.mjs @@ -131,6 +131,11 @@ if (!fs.existsSync(guideHotkeyListenerOutputPath)) { throw new Error(`WGC helper build completed but ${guideHotkeyListenerOutputPath} was not found.`); } +const ocrServiceWrapperOutputPath = path.join(BUILD_DIR, "openscreen-ocr-service-wrapper.exe"); +if (!fs.existsSync(ocrServiceWrapperOutputPath)) { + throw new Error(`WGC helper build completed but ${ocrServiceWrapperOutputPath} was not found.`); +} + fs.mkdirSync(BIN_DIR, { recursive: true }); const distributablePath = path.join(BIN_DIR, "wgc-capture.exe"); fs.copyFileSync(outputPath, distributablePath); @@ -141,9 +146,14 @@ fs.copyFileSync(cursorSamplerOutputPath, cursorSamplerDistributablePath); const guideHotkeyListenerDistributablePath = path.join(BIN_DIR, "guide-hotkey-listener.exe"); fs.copyFileSync(guideHotkeyListenerOutputPath, guideHotkeyListenerDistributablePath); +const ocrServiceWrapperDistributablePath = path.join(BIN_DIR, "openscreen-ocr-service-wrapper.exe"); +fs.copyFileSync(ocrServiceWrapperOutputPath, ocrServiceWrapperDistributablePath); + console.log(`Built ${outputPath}`); console.log(`Copied ${distributablePath}`); console.log(`Built ${cursorSamplerOutputPath}`); console.log(`Copied ${cursorSamplerDistributablePath}`); console.log(`Built ${guideHotkeyListenerOutputPath}`); console.log(`Copied ${guideHotkeyListenerDistributablePath}`); +console.log(`Built ${ocrServiceWrapperOutputPath}`); +console.log(`Copied ${ocrServiceWrapperDistributablePath}`); diff --git a/src/guide/contracts.ts b/src/guide/contracts.ts index 3f8c8ea..7eabea8 100644 --- a/src/guide/contracts.ts +++ b/src/guide/contracts.ts @@ -55,6 +55,7 @@ export interface GuideSnapshot { timeMs: number; offsetMs: number; path: string; + markedPath?: string; width: number; height: number; } @@ -163,6 +164,7 @@ export interface WriteGuideSnapshotInput { timeMs: number; offsetMs: number; pngBytes: ArrayBuffer; + markedPngBytes?: ArrayBuffer; width: number; height: number; } diff --git a/src/guide/exporters.test.ts b/src/guide/exporters.test.ts index b10d41c..50aeeee 100644 --- a/src/guide/exporters.test.ts +++ b/src/guide/exporters.test.ts @@ -29,6 +29,7 @@ const session: GuideSession = { timeMs: 1500, offsetMs: 500, path: "/tmp/recording-guide/step-001.png", + markedPath: "/tmp/recording-guide/step-001-marked.png", width: 1280, height: 720, }, @@ -71,7 +72,7 @@ describe("guide exporters", () => { expect(markdown).toContain("# User guide"); expect(markdown).toContain("## 1. Open Settings"); - expect(markdown).toContain("](step-001.png)"); + expect(markdown).toContain("](step-001-marked.png)"); }); it("exports escaped HTML", () => { @@ -79,12 +80,11 @@ describe("guide exporters", () => { expect(html).toContain(""); expect(html).toContain("

User guide

"); - expect(html).toContain('src="step-001.png"'); - expect(html).toContain("click-marker"); - expect(html).toContain("left: 25.00%; top: 75.00%;"); + expect(html).toContain('src="step-001-marked.png"'); + expect(html).not.toContain("click-marker"); }); - it("draws click markers for hotkey events with coordinates", () => { + it("uses marker snapshots for hotkey events with coordinates", () => { const hotkeySession: GuideSession = { ...session, events: [ @@ -98,7 +98,21 @@ describe("guide exporters", () => { const html = exportGuideToHtml(hotkeySession); - expect(html).toContain("click-marker"); - expect(html).toContain("left: 25.00%; top: 75.00%;"); + expect(html).toContain('src="step-001-marked.png"'); + expect(html).not.toContain("click-marker"); + }); + + it("falls back to the unmarked screenshot when no marker snapshot exists", () => { + const unmarkedSession: GuideSession = { + ...session, + snapshots: session.snapshots.map((snapshot) => ({ + ...snapshot, + markedPath: undefined, + })), + }; + + const markdown = exportGuideToMarkdown(unmarkedSession); + + expect(markdown).toContain("](step-001.png)"); }); }); diff --git a/src/guide/exporters.ts b/src/guide/exporters.ts index e84f86e..7c57131 100644 --- a/src/guide/exporters.ts +++ b/src/guide/exporters.ts @@ -10,8 +10,9 @@ export function exportGuideToMarkdown(session: GuideSession): string { for (const step of guide.steps) { lines.push(`## ${step.order}. ${step.title}`, "", step.instruction, ""); - if (step.screenshotPath) { - lines.push(`![${escapeMarkdownAlt(step.title)}](${path.basename(step.screenshotPath)})`, ""); + const screenshotPath = resolveStepScreenshotPath(step, session); + if (screenshotPath) { + lines.push(`![${escapeMarkdownAlt(step.title)}](${path.basename(screenshotPath)})`, ""); } } @@ -36,10 +37,8 @@ export function exportGuideToHtml(session: GuideSession): string { .step { border-top: 1px solid #e5e7eb; padding: 22px 0; } .step h2 { font-size: 18px; margin: 0 0 8px; } .step p { margin: 0 0 12px; } - .shot { display: inline-block; position: relative; max-width: 100%; margin: 0; } + .shot { display: inline-block; max-width: 100%; margin: 0; } img { display: block; max-width: 100%; border: 1px solid #e5e7eb; border-radius: 6px; } - .click-marker { position: absolute; width: 26px; height: 26px; border: 3px solid #ef4444; border-radius: 999px; box-shadow: 0 0 0 4px rgba(239, 68, 68, 0.18), 0 2px 8px rgba(17, 24, 39, 0.28); transform: translate(-50%, -50%); pointer-events: none; } - .click-marker::after { content: ""; position: absolute; left: 50%; top: 50%; width: 6px; height: 6px; border-radius: 999px; background: #ef4444; transform: translate(-50%, -50%); } @@ -54,12 +53,9 @@ export function exportGuideToHtml(session: GuideSession): string { } function renderStepHtml(step: GeneratedGuideStep, session: GuideSession): string { - const clickPoint = resolveStepClickPoint(step, session); - const marker = clickPoint - ? `` - : ""; - const image = step.screenshotPath - ? `
${escapeHtml(step.title)}${marker}
` + const screenshotPath = resolveStepScreenshotPath(step, session); + const image = screenshotPath + ? `
${escapeHtml(step.title)}
` : ""; return `

${step.order}. ${escapeHtml(step.title)}

@@ -88,54 +84,32 @@ function escapeHtml(value: string): string { .replace(/'/g, "'"); } -function resolveStepClickPoint( +function resolveStepScreenshotPath( step: GeneratedGuideStep, session: GuideSession, -): { x: number; y: number } | null { +): string | undefined { + const snapshot = resolveStepSnapshot(step, session); + return snapshot?.markedPath ?? step.screenshotPath ?? snapshot?.path; +} + +function resolveStepSnapshot(step: GeneratedGuideStep, session: GuideSession) { const candidate = step.sourceCandidateId ? session.candidates.find((item) => item.id === step.sourceCandidateId) : undefined; - const eventId = candidate?.eventId; - const event = eventId ? session.events.find((item) => item.id === eventId) : undefined; - if (!event || (event.kind !== "click" && event.kind !== "hotkey")) { - return null; - } - if (isNormalizedNumber(event.normalizedX) && isNormalizedNumber(event.normalizedY)) { - return { x: clamp01(event.normalizedX), y: clamp01(event.normalizedY) }; - } - const screenshotFileName = step.screenshotPath ? path.basename(step.screenshotPath) : undefined; - const snapshot = + return ( (candidate?.snapshotId ? session.snapshots.find((item) => item.id === candidate.snapshotId) : undefined) ?? + (candidate?.eventId + ? session.snapshots.find((item) => item.eventId === candidate.eventId) + : undefined) ?? (screenshotFileName - ? session.snapshots.find((item) => path.basename(item.path) === screenshotFileName) - : undefined); - if ( - !snapshot || - typeof event.x !== "number" || - typeof event.y !== "number" || - snapshot.width <= 0 || - snapshot.height <= 0 - ) { - return null; - } - - return { - x: clamp01(event.x / snapshot.width), - y: clamp01(event.y / snapshot.height), - }; -} - -function formatPercent(value: number): string { - return (clamp01(value) * 100).toFixed(2); -} - -function isNormalizedNumber(value: unknown): value is number { - return typeof value === "number" && Number.isFinite(value) && value >= 0 && value <= 1; -} - -function clamp01(value: number): number { - return Math.min(1, Math.max(0, value)); + ? session.snapshots.find( + (item) => + path.basename(item.path) === screenshotFileName || + (item.markedPath ? path.basename(item.markedPath) === screenshotFileName : false), + ) + : undefined) + ); } diff --git a/src/guide/snapshot/extractGuideSnapshots.ts b/src/guide/snapshot/extractGuideSnapshots.ts index 41a412c..592fb62 100644 --- a/src/guide/snapshot/extractGuideSnapshots.ts +++ b/src/guide/snapshot/extractGuideSnapshots.ts @@ -35,18 +35,29 @@ export async function captureGuideSnapshots( canvas.height = Math.max(1, Math.round(sourceHeight * scale)); let latestSession = input.session; + const existingSnapshotsByEventId = new Set( + input.session.snapshots.map((snapshot) => snapshot.eventId), + ); for (const event of events) { + if (existingSnapshotsByEventId.has(event.id)) { + continue; + } const offsetMs = event.screenshotOffsetMs ?? 500; const timeMs = getSnapshotTimeMs(event, offsetMs, video.duration); await seekVideo(video, timeMs / 1000); context.drawImage(video, 0, 0, canvas.width, canvas.height); const pngBytes = await canvasToPngBytes(canvas); + const markerPoint = getSnapshotMarkerPoint(event, canvas.width, canvas.height); + const markedPngBytes = markerPoint + ? await canvasToMarkedPngBytes(canvas, markerPoint) + : undefined; const result = await window.electronAPI.guide.writeSnapshot({ recordingId: input.session.recordingId, eventId: event.id, timeMs, offsetMs, pngBytes, + markedPngBytes, width: canvas.width, height: canvas.height, }); @@ -143,3 +154,85 @@ function canvasToPngBytes(canvas: HTMLCanvasElement): Promise { }, "image/png"); }); } + +async function canvasToMarkedPngBytes( + canvas: HTMLCanvasElement, + point: { x: number; y: number }, +): Promise { + const markedCanvas = document.createElement("canvas"); + markedCanvas.width = canvas.width; + markedCanvas.height = canvas.height; + const markedContext = markedCanvas.getContext("2d"); + if (!markedContext) { + throw new Error("Canvas 2D context is unavailable."); + } + markedContext.drawImage(canvas, 0, 0); + drawSnapshotMarker(markedContext, markedCanvas, point); + return await canvasToPngBytes(markedCanvas); +} + +function drawSnapshotMarker( + context: CanvasRenderingContext2D, + canvas: HTMLCanvasElement, + point: { x: number; y: number }, +) { + const shortSide = Math.max(1, Math.min(canvas.width, canvas.height)); + const haloRadius = clampNumber(Math.round(shortSide * 0.012), 8, 14); + const dotRadius = clampNumber(Math.round(shortSide * 0.0045), 3, 6); + const lineWidth = Math.max(1, Math.round(shortSide * 0.0015)); + + context.beginPath(); + context.arc(point.x, point.y, haloRadius, 0, Math.PI * 2); + context.fillStyle = "rgba(250, 204, 21, 0.34)"; + context.fill(); + context.lineWidth = lineWidth; + context.strokeStyle = "rgba(239, 68, 68, 0.72)"; + context.stroke(); + + context.beginPath(); + context.arc(point.x, point.y, dotRadius, 0, Math.PI * 2); + context.fillStyle = "rgba(220, 38, 38, 0.92)"; + context.fill(); +} + +function getSnapshotMarkerPoint( + event: GuideEvent, + width: number, + height: number, +): { x: number; y: number } | null { + if (event.kind !== "click" && event.kind !== "hotkey") { + return null; + } + if (isNormalizedNumber(event.normalizedX) && isNormalizedNumber(event.normalizedY)) { + return { + x: clampNumber(event.normalizedX * width, 0, width), + y: clampNumber(event.normalizedY * height, 0, height), + }; + } + if (isNormalizedNumber(event.x) && isNormalizedNumber(event.y)) { + return { + x: clampNumber(event.x * width, 0, width), + y: clampNumber(event.y * height, 0, height), + }; + } + if ( + typeof event.x === "number" && + typeof event.y === "number" && + Number.isFinite(event.x) && + Number.isFinite(event.y) + ) { + return { + x: clampNumber(event.x, 0, width), + y: clampNumber(event.y, 0, height), + }; + } + return null; +} + +function isNormalizedNumber(value: unknown): value is number { + return typeof value === "number" && Number.isFinite(value) && value >= 0 && value <= 1; +} + +function clampNumber(value: number, min = 0, max = Number.POSITIVE_INFINITY): number { + return Math.min(max, Math.max(min, value)); +} diff --git a/tools/ocr/paddle_ocr_service.py b/tools/ocr/paddle_ocr_service.py index 351096e..6fbc264 100644 --- a/tools/ocr/paddle_ocr_service.py +++ b/tools/ocr/paddle_ocr_service.py @@ -7,7 +7,7 @@ import sys import tempfile from dataclasses import dataclass from pathlib import Path -from threading import Lock +from threading import Lock, Thread from typing import Any from fastapi import FastAPI, HTTPException @@ -18,6 +18,8 @@ app = FastAPI(title="OpenScreen PaddleOCR service") _engines: dict[str, Any] = {} _engine_lock = Lock() +_warmup_lock = Lock() +_warmup_started = False _LATIN_RECOGNITION_LANGS = { "af", "az", @@ -87,6 +89,20 @@ class OcrRequest(BaseModel): profile: str | None = None +@app.on_event("startup") +def start_ocr_warmup() -> None: + if os.getenv("OPENSCREEN_OCR_WARMUP", "0") != "1": + return + + global _warmup_started + with _warmup_lock: + if _warmup_started: + return + _warmup_started = True + + Thread(target=_warmup_default_engines, name="openscreen-ocr-warmup", daemon=True).start() + + @app.get("/health") def health() -> dict[str, Any]: return { @@ -100,6 +116,15 @@ def health() -> dict[str, Any]: } +def _warmup_default_engines() -> None: + try: + profile = _resolve_ocr_profile(None) + for paddle_lang in _resolve_paddle_languages(None, profile): + _get_engine(paddle_lang) + except Exception as error: + print(f"OpenScreen OCR warmup failed: {error}", file=sys.stderr, flush=True) + + @app.post("/ocr") async def ocr(request: OcrRequest) -> dict[str, Any]: image_path, should_delete = _resolve_image_path(request)