From 198dc022b087e48533295fd4e830a36d175cf7f9 Mon Sep 17 00:00:00 2001 From: huanld Date: Thu, 28 May 2026 10:01:22 +0700 Subject: [PATCH] Release OpenScreen 1.4.2 --- electron/electron-env.d.ts | 13 + electron/guide/guideStore.test.ts | 68 ++++ electron/guide/guideStore.ts | 128 ++++++- electron/ipc/handlers.ts | 329 ++++++++++++++++-- electron/native/README.md | 2 +- electron/native/wgc-capture/CMakeLists.txt | 16 + .../wgc-capture/src/guide-hotkey-listener.cpp | 91 +++++ electron/native/wgc-capture/src/main.cpp | 1 + .../native/wgc-capture/src/monitor_utils.cpp | 35 +- .../native/wgc-capture/src/monitor_utils.h | 6 +- electron/preload.ts | 8 + package-lock.json | 4 +- package.json | 2 +- scripts/build-windows-wgc-helper.mjs | 10 + src/components/launch/LaunchWindow.tsx | 60 +--- src/components/launch/SourceSelector.tsx | 24 +- src/guide/contracts.ts | 18 + src/guide/exporters.test.ts | 18 + src/guide/exporters.ts | 2 +- src/guide/promptBuilder.test.ts | 9 + src/guide/promptBuilder.ts | 12 +- src/guide/targetMapper.test.ts | 7 + src/guide/targetMapper.ts | 46 ++- src/hooks/useScreenRecorder.ts | 11 +- src/lib/nativeWindowsRecording.ts | 6 + 25 files changed, 844 insertions(+), 82 deletions(-) create mode 100644 electron/native/wgc-capture/src/guide-hotkey-listener.cpp diff --git a/electron/electron-env.d.ts b/electron/electron-env.d.ts index 19e878c..e55f18d 100644 --- a/electron/electron-env.d.ts +++ b/electron/electron-env.d.ts @@ -53,6 +53,9 @@ interface Window { import("../src/guide/contracts").CaptureGuidePointerMarkerResult > >; + onMarkerCaptured: ( + callback: (payload: import("../src/guide/contracts").GuideMarkerCapturedPayload) => void, + ) => () => void; finalizeEvents: ( input: import("../src/guide/contracts").FinalizeGuideEventsInput, ) => Promise< @@ -376,6 +379,16 @@ interface ProcessedDesktopSource { display_id: string; thumbnail: string | null; appIcon: string | null; + displayId?: number; + displayIndex?: number; + screenIndex?: number; + displayLabel?: string; + bounds?: { + x: number; + y: number; + width: number; + height: number; + }; } interface CursorTelemetryPoint { diff --git a/electron/guide/guideStore.test.ts b/electron/guide/guideStore.test.ts index 27272f9..9fe10d9 100644 --- a/electron/guide/guideStore.test.ts +++ b/electron/guide/guideStore.test.ts @@ -228,6 +228,74 @@ describe("GuideStore", () => { await expect(fs.readFile(html.path, "utf-8")).resolves.toContain(""); }); + it("repairs generic hotkey marker text and attaches AI draft artifacts", async () => { + const store = new GuideStore(recordingsDir, { + ocrClient: { + recognize: async (snapshot) => [ + { + id: `ocr-${snapshot.id}-1`, + snapshotId: snapshot.id, + text: "Save", + confidence: 0.95, + box: { x: 0.45, y: 0.45, width: 0.15, height: 0.08 }, + }, + ], + }, + draftClient: { + generate: async () => ({ + title: "Guide", + steps: [ + { + id: "guide-step-1", + order: 1, + title: "Step 1: Click Ctrl+F12 marker", + instruction: "Click Ctrl+F12 marker.", + }, + ], + }), + }, + }); + await store.startSession(114); + await store.addMarker({ + recordingId: 114, + kind: "hotkey", + timeMs: 200, + label: "Ctrl+F12 marker", + normalizedX: 0.5, + normalizedY: 0.5, + }); + const videoPath = path.join(recordingsDir, "recording-114.mp4"); + await fs.writeFile(videoPath, ""); + const eventsSession = await store.finalizeEvents({ recordingId: 114, videoPath }); + await store.writeSnapshot({ + recordingId: 114, + eventId: eventsSession.events[0]?.id ?? "", + timeMs: 700, + offsetMs: 500, + width: 800, + height: 600, + pngBytes: new Uint8Array([1, 2, 3]).buffer, + }); + await store.runOcr({ recordingId: 114 }); + + const draftSession = await store.generateDraft({ + recordingId: 114, + language: "en", + provider: "deepseek", + }); + + expect(draftSession.candidates[0]).toMatchObject({ + targetText: "Save", + position: { xPercent: 50, yPercent: 50 }, + }); + expect(draftSession.generatedGuide?.steps[0]).toMatchObject({ + title: "Step 1: Save", + instruction: 'Click "Save".', + sourceCandidateId: draftSession.candidates[0]?.id, + screenshotPath: draftSession.snapshots[0]?.path, + }); + }); + it("discards a guide session and output directory", async () => { const store = new GuideStore(recordingsDir); const session = await store.startSession(111); diff --git a/electron/guide/guideStore.ts b/electron/guide/guideStore.ts index 39946c7..b2d8725 100644 --- a/electron/guide/guideStore.ts +++ b/electron/guide/guideStore.ts @@ -336,10 +336,11 @@ export class GuideStore { } } + const normalizedGuide = normalizeGeneratedGuide(generatedGuide) ?? generatedGuide; const updatedSession = touchSession({ ...session, candidates, - generatedGuide: normalizeGeneratedGuide(generatedGuide) ?? generatedGuide, + generatedGuide: enrichGeneratedGuide(normalizedGuide, session, candidates, input.language), status: "draft-ready", }); await this.writeSession(updatedSession); @@ -743,11 +744,41 @@ function normalizeGuideStepCandidate(input: unknown): GuideStepCandidate | null input.targetRole === "unknown" ? input.targetRole : undefined, + position: normalizeGuideCandidatePosition(input.position), nearbyText, confidence, }; } +function normalizeGuideCandidatePosition( + input: unknown, +): GuideStepCandidate["position"] | undefined { + if (!isRecord(input)) { + return undefined; + } + const normalizedX = normalizeOptionalNormalizedNumber(input.normalizedX); + const normalizedY = normalizeOptionalNormalizedNumber(input.normalizedY); + const xPercent = normalizeOptionalNumber(input.xPercent); + const yPercent = normalizeOptionalNumber(input.yPercent); + const description = normalizeOptionalString(input.description); + if ( + normalizedX === undefined || + normalizedY === undefined || + xPercent === undefined || + yPercent === undefined || + !description + ) { + return undefined; + } + return { + normalizedX, + normalizedY, + xPercent, + yPercent, + description, + }; +} + function normalizeGeneratedGuide(input: unknown): GeneratedGuide | null { if (!isRecord(input)) { return null; @@ -785,6 +816,101 @@ function normalizeGeneratedGuide(input: unknown): GeneratedGuide | null { }; } +function enrichGeneratedGuide( + guide: GeneratedGuide, + session: GuideSession, + candidates: GuideStepCandidate[], + language: GenerateGuideDraftInput["language"], +): GeneratedGuide { + const sortedCandidates = [...candidates].sort((left, right) => left.timeMs - right.timeMs); + const candidatesById = new Map(candidates.map((candidate) => [candidate.id, candidate])); + const snapshotsById = new Map(session.snapshots.map((snapshot) => [snapshot.id, snapshot])); + const snapshotsByEventId = new Map( + session.snapshots.map((snapshot) => [snapshot.eventId, snapshot]), + ); + + return { + ...guide, + steps: guide.steps.map((step, index) => { + const candidate = + (step.sourceCandidateId ? candidatesById.get(step.sourceCandidateId) : undefined) ?? + sortedCandidates[index]; + const snapshot = candidate + ? ((candidate.snapshotId ? snapshotsById.get(candidate.snapshotId) : undefined) ?? + snapshotsByEventId.get(candidate.eventId)) + : undefined; + const repairedStep = repairGenericMarkerStep(step, candidate, language); + return { + ...repairedStep, + sourceCandidateId: candidate?.id ?? repairedStep.sourceCandidateId, + screenshotPath: repairedStep.screenshotPath ?? snapshot?.path, + }; + }), + }; +} + +function repairGenericMarkerStep( + step: GeneratedGuideStep, + candidate: GuideStepCandidate | undefined, + language: GenerateGuideDraftInput["language"], +): GeneratedGuideStep { + if ( + !candidate || + (!containsGenericMarkerText(step.title) && !containsGenericMarkerText(step.instruction)) + ) { + return step; + } + + return { + ...step, + title: buildRepairedStepTitle(candidate, step.order, language), + instruction: buildRepairedStepInstruction(candidate, language), + }; +} + +function containsGenericMarkerText(value: string): boolean { + return /\b(?:ctrl|control)(?:\s*\+\s*f12)?\s+marker\b/i.test(value); +} + +function buildRepairedStepTitle( + candidate: GuideStepCandidate, + order: number, + language: GenerateGuideDraftInput["language"], +): string { + if (candidate.targetText) { + return language === "vi" + ? `Bước ${order}: ${candidate.targetText}` + : `Step ${order}: ${candidate.targetText}`; + } + if (candidate.position) { + return language === "vi" + ? `Bước ${order}: Vị trí x ${candidate.position.xPercent}%, y ${candidate.position.yPercent}%` + : `Step ${order}: Position x ${candidate.position.xPercent}%, y ${candidate.position.yPercent}%`; + } + return stepTitleFallback(order, language); +} + +function buildRepairedStepInstruction( + candidate: GuideStepCandidate, + language: GenerateGuideDraftInput["language"], +): string { + if (candidate.targetText) { + return language === "vi" + ? `Nhấn vào "${candidate.targetText}".` + : `Click "${candidate.targetText}".`; + } + if (candidate.position) { + return language === "vi" + ? `Nhấn tại vùng ${candidate.position.description} (x ${candidate.position.xPercent}%, y ${candidate.position.yPercent}%).` + : `Click the ${candidate.position.description} area (x ${candidate.position.xPercent}%, y ${candidate.position.yPercent}%).`; + } + return language === "vi" ? "Thực hiện thao tác tại mốc đã ghi." : "Perform the recorded action."; +} + +function stepTitleFallback(order: number, language: GenerateGuideDraftInput["language"]): string { + return language === "vi" ? `Bước ${order}` : `Step ${order}`; +} + function normalizeArray(input: unknown, normalize: (value: unknown) => T | null): T[] { return Array.isArray(input) ? input.map((value) => normalize(value)).filter((value): value is T => value !== null) diff --git a/electron/ipc/handlers.ts b/electron/ipc/handlers.ts index 8643b6e..551109c 100644 --- a/electron/ipc/handlers.ts +++ b/electron/ipc/handlers.ts @@ -5,7 +5,7 @@ import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { fileURLToPath, pathToFileURL } from "node:url"; -import type { DesktopCapturerSource } from "electron"; +import type { DesktopCapturerSource, Rectangle } from "electron"; import { app, BrowserWindow, @@ -17,6 +17,7 @@ import { shell, systemPreferences, } from "electron"; +import type { GuideMarkerCapturedPayload } from "../../src/guide/contracts"; import type { NativeMacRecordingRequest } from "../../src/lib/nativeMacRecording"; import type { NativeWindowsRecordingRequest } from "../../src/lib/nativeWindowsRecording"; import { @@ -344,9 +345,16 @@ type SelectedSource = { name: string; id?: string; display_id?: string; + displayId?: number; + displayIndex?: number; + screenIndex?: number; + displayLabel?: string; + bounds?: SourceBounds; [key: string]: unknown; }; +type SourceBounds = { x: number; y: number; width: number; height: number }; + type AttachNativeMacWebcamRecordingInput = { screenVideoPath?: string; recordingId?: number; @@ -429,8 +437,10 @@ let nativeMacCursorRecordingStartMs = 0; let nativeMacPauseStartedAtMs: number | null = null; let nativeMacPauseRanges: Array<{ startMs: number; endMs: number }> = []; let nativeMacIsPaused = false; +let guideHotkeyListenerProcess: ChildProcessWithoutNullStreams | null = null; const GUIDE_MARKER_HOTKEY = "Control+F12"; const GUIDE_MARKER_HOTKEY_LABEL = "Ctrl+F12"; +type GuideMarkerTrigger = GuideMarkerCapturedPayload["trigger"]; type GuideHotkeyBounds = { x: number; y: number; width: number; height: number }; type GuideHotkeyRecordingState = { recordingId: number; @@ -442,6 +452,8 @@ type GuideHotkeyRecordingState = { let activeGuideHotkeyRecording: GuideHotkeyRecordingState | null = null; let activeGuideHotkeySessionId: number | null = null; let guideMarkerHotkeyRegistered = false; +let lastGuideHotkeyCaptureAtMs = 0; +const GUIDE_HOTKEY_CAPTURE_DEBOUNCE_MS = 250; function normalizeCursorSample(sample: unknown): CursorRecordingSample | null { if (!sample || typeof sample !== "object") { @@ -590,12 +602,109 @@ function resolveAssetBasePath() { } } +function parseDesktopCapturerScreenIndex(sourceId?: string | null): number | null { + if (!sourceId?.startsWith("screen:")) { + return null; + } + const indexPart = sourceId.split(":")[1]; + if (!indexPart || !/^\d+$/.test(indexPart)) { + return null; + } + const index = Number(indexPart); + return Number.isInteger(index) && index >= 0 ? index : null; +} + +function normalizeSourceBounds(input: unknown): SourceBounds | undefined { + if (!input || typeof input !== "object") { + return undefined; + } + const bounds = input as Partial; + const x = Number(bounds.x); + const y = Number(bounds.y); + const width = Number(bounds.width); + const height = Number(bounds.height); + if ( + !Number.isFinite(x) || + !Number.isFinite(y) || + !Number.isFinite(width) || + !Number.isFinite(height) || + width <= 0 || + height <= 0 + ) { + return undefined; + } + return { + x: Math.round(x), + y: Math.round(y), + width: Math.round(width), + height: Math.round(height), + }; +} + +function toSourceBounds(bounds: Rectangle): SourceBounds { + return { + x: Math.round(bounds.x), + y: Math.round(bounds.y), + width: Math.round(bounds.width), + height: Math.round(bounds.height), + }; +} + +function findDisplayForSource( + source: Pick, + screenSourceIndex?: number, +) { + const displays = screen.getAllDisplays(); + const displayId = Number(source.display_id); + const displayById = Number.isFinite(displayId) + ? displays.find((display) => display.id === displayId) + : undefined; + if (displayById) { + return { display: displayById, displayIndex: displays.indexOf(displayById) }; + } + + const sourceIndex = parseDesktopCapturerScreenIndex(source.id) ?? screenSourceIndex; + if (sourceIndex !== null && sourceIndex !== undefined && sourceIndex < displays.length) { + return { display: displays[sourceIndex], displayIndex: sourceIndex }; + } + + return { display: null, displayIndex: undefined }; +} + +function getSelectedSourceDisplay() { + const displays = screen.getAllDisplays(); + const explicitDisplayId = + typeof selectedSource?.displayId === "number" + ? selectedSource.displayId + : Number(selectedSource?.display_id); + const displayById = Number.isFinite(explicitDisplayId) + ? displays.find((display) => display.id === explicitDisplayId) + : undefined; + if (displayById) { + return displayById; + } + + const sourceIndex = + typeof selectedSource?.displayIndex === "number" + ? selectedSource.displayIndex + : typeof selectedSource?.screenIndex === "number" + ? selectedSource.screenIndex + : parseDesktopCapturerScreenIndex(selectedSource?.id); + if (sourceIndex !== null && sourceIndex !== undefined && sourceIndex < displays.length) { + return displays[sourceIndex]; + } + + return null; +} + function getSelectedSourceBounds() { const cursor = screen.getCursorScreenPoint(); - const sourceDisplayId = Number(selectedSource?.display_id); - const sourceDisplay = Number.isFinite(sourceDisplayId) - ? (screen.getAllDisplays().find((display) => display.id === sourceDisplayId) ?? null) - : null; + const selectedBounds = normalizeSourceBounds(selectedSource?.bounds); + if (selectedBounds) { + return selectedBounds; + } + + const sourceDisplay = getSelectedSourceDisplay(); return (sourceDisplay ?? screen.getDisplayNearestPoint(cursor)).bounds; } @@ -698,12 +807,21 @@ function clampGuideHotkey01(value: number): number { return Math.min(1, Math.max(0, value)); } -async function captureGuideHotkeyMarker(guideStore: GuideStore) { +async function captureGuideHotkeyMarker( + guideStore: GuideStore, + trigger: GuideMarkerTrigger = "global-shortcut", +) { const recording = activeGuideHotkeyRecording; if (!recording || activeGuideHotkeySessionId !== recording.recordingId) { return { captured: false }; } + const captureRequestedAtMs = Date.now(); + if (captureRequestedAtMs - lastGuideHotkeyCaptureAtMs < GUIDE_HOTKEY_CAPTURE_DEBOUNCE_MS) { + return { captured: false }; + } + lastGuideHotkeyCaptureAtMs = captureRequestedAtMs; + const point = getGuideHotkeyPoint(recording.bounds); try { const result = await guideStore.addMarker({ @@ -714,11 +832,21 @@ async function captureGuideHotkeyMarker(guideStore: GuideStore) { y: point.normalizedY, normalizedX: point.normalizedX, normalizedY: point.normalizedY, - label: `${GUIDE_MARKER_HOTKEY_LABEL} marker`, + }); + notifyGuideMarkerCaptured({ + recordingId: result.event.recordingId, + eventId: result.event.id, + timeMs: result.event.timeMs, + trigger, + normalizedX: result.event.normalizedX, + normalizedY: result.event.normalizedY, + rawX: point.rawX, + rawY: point.rawY, }); console.info("[guide-hotkey] marker captured", { recordingId: recording.recordingId, timeMs: result.event.timeMs, + trigger, normalizedX: result.event.normalizedX, normalizedY: result.event.normalizedY, rawX: point.rawX, @@ -733,13 +861,110 @@ async function captureGuideHotkeyMarker(guideStore: GuideStore) { } } +function notifyGuideMarkerCaptured(payload: GuideMarkerCapturedPayload) { + for (const window of BrowserWindow.getAllWindows()) { + if (!window.isDestroyed()) { + window.webContents.send("guide:marker-captured", payload); + } + } +} + +function handleGuideHotkeyListenerLine(line: string, guideStore: GuideStore) { + const text = line.trim(); + if (!text) { + return; + } + + try { + const event = JSON.parse(text) as { + event?: unknown; + key?: unknown; + state?: unknown; + }; + if (event.event === "ready") { + console.info("[guide-hotkey] native Ctrl listener ready"); + return; + } + if (event.event === "guide-hotkey" && event.key === "control" && event.state === "down") { + void captureGuideHotkeyMarker(guideStore, "global-control"); + return; + } + } catch { + console.warn("[guide-hotkey] native listener emitted invalid JSON:", text); + } +} + +async function startNativeGuideHotkeyListener(guideStore: GuideStore) { + if (process.platform !== "win32" || guideHotkeyListenerProcess) { + return; + } + + const helperPath = await findNativeGuideHotkeyListenerPath(); + if (!helperPath) { + console.warn("[guide-hotkey] native Ctrl listener is unavailable"); + return; + } + + const proc = spawn(helperPath, [], { + cwd: path.dirname(helperPath), + stdio: ["pipe", "pipe", "pipe"], + windowsHide: true, + }); + proc.stdin.end(); + guideHotkeyListenerProcess = proc; + + let stdoutBuffer = ""; + proc.stdout.setEncoding("utf-8"); + proc.stdout.on("data", (chunk: string) => { + stdoutBuffer += chunk; + const lines = stdoutBuffer.split(/\r?\n/); + stdoutBuffer = lines.pop() ?? ""; + for (const line of lines) { + handleGuideHotkeyListenerLine(line, guideStore); + } + }); + + proc.stderr.setEncoding("utf-8"); + proc.stderr.on("data", (chunk: string) => { + const message = chunk.trim(); + if (message) { + console.warn("[guide-hotkey] native listener:", message); + } + }); + + proc.once("error", (error) => { + console.warn("[guide-hotkey] failed to start native Ctrl listener:", error); + if (guideHotkeyListenerProcess === proc) { + guideHotkeyListenerProcess = null; + } + }); + proc.once("exit", (code, signal) => { + if (guideHotkeyListenerProcess === proc) { + guideHotkeyListenerProcess = null; + } + if (code !== 0 && code !== null) { + console.warn("[guide-hotkey] native Ctrl listener exited", { code, signal }); + } + }); +} + +function stopNativeGuideHotkeyListener() { + const proc = guideHotkeyListenerProcess; + guideHotkeyListenerProcess = null; + if (proc && !proc.killed) { + proc.kill(); + } +} + function registerGuideMarkerHotkey(guideStore: GuideStore) { if (guideMarkerHotkeyRegistered) { return; } + void startNativeGuideHotkeyListener(guideStore); + guideMarkerHotkeyRegistered = globalShortcut.register(GUIDE_MARKER_HOTKEY, () => { - void captureGuideHotkeyMarker(guideStore); + void captureGuideHotkeyMarker(guideStore, "global-shortcut"); }); if (!guideMarkerHotkeyRegistered) { @@ -749,6 +974,7 @@ function registerGuideMarkerHotkey(guideStore: GuideStore) { app.once("will-quit", () => { globalShortcut.unregister(GUIDE_MARKER_HOTKEY); + stopNativeGuideHotkeyListener(); guideMarkerHotkeyRegistered = false; }); } @@ -758,12 +984,7 @@ function getSelectedSourceId() { } function getSelectedDisplay() { - const sourceDisplayId = Number(selectedSource?.display_id); - if (!Number.isFinite(sourceDisplayId)) { - return null; - } - - return screen.getAllDisplays().find((display) => display.id === sourceDisplayId) ?? null; + return getSelectedSourceDisplay(); } function resolveUnpackedAppPath(...segments: string[]) { @@ -802,6 +1023,19 @@ function getNativeWindowsCaptureHelperCandidates() { ].filter((candidate): candidate is string => Boolean(candidate)); } +function getNativeGuideHotkeyListenerCandidates() { + const envPath = process.env.OPENSCREEN_GUIDE_HOTKEY_LISTENER_EXE?.trim(); + const archTag = process.arch === "arm64" ? "win32-arm64" : "win32-x64"; + const helperName = "guide-hotkey-listener.exe"; + return [ + envPath, + resolveUnpackedAppPath("electron", "native", "wgc-capture", "build", "Release", helperName), + resolveUnpackedAppPath("electron", "native", "wgc-capture", "build", helperName), + resolveUnpackedAppPath("electron", "native", "bin", archTag, helperName), + resolvePackagedResourcePath("electron", "native", "bin", archTag, helperName), + ].filter((candidate): candidate is string => Boolean(candidate)); +} + async function findNativeWindowsCaptureHelperPath() { if (process.platform !== "win32") { return null; @@ -819,6 +1053,23 @@ async function findNativeWindowsCaptureHelperPath() { return null; } +async function findNativeGuideHotkeyListenerPath() { + if (process.platform !== "win32") { + return null; + } + + for (const candidate of getNativeGuideHotkeyListenerCandidates()) { + try { + await fs.access(candidate, fsConstants.X_OK); + return candidate; + } catch { + // Try the next configured helper location. + } + } + + return null; +} + function getNativeMacCaptureHelperCandidates() { const envPath = process.env.OPENSCREEN_SCK_CAPTURE_EXE?.trim(); const archTag = process.arch === "arm64" ? "darwin-arm64" : "darwin-x64"; @@ -1480,17 +1731,42 @@ export function registerIpcHandlers( ipcMain.handle("get-sources", async (_, opts) => { const sources = await desktopCapturer.getSources(opts); lastEnumeratedSources = new Map(sources.map((source) => [source.id, source])); - return sources.map((source) => ({ - id: source.id, - name: source.name, - display_id: source.display_id, - thumbnail: source.thumbnail ? source.thumbnail.toDataURL() : null, - appIcon: source.appIcon ? source.appIcon.toDataURL() : null, - })); + let screenSourceIndex = 0; + return sources.map((source) => { + const isScreenSource = source.id.startsWith("screen:"); + const sourceIndex = isScreenSource + ? (parseDesktopCapturerScreenIndex(source.id) ?? screenSourceIndex) + : undefined; + const { display, displayIndex } = isScreenSource + ? findDisplayForSource(source, screenSourceIndex) + : { display: null, displayIndex: undefined }; + if (isScreenSource) { + screenSourceIndex += 1; + } + const bounds = display ? toSourceBounds(display.bounds) : undefined; + const displayLabel = bounds + ? `Display ${(displayIndex ?? sourceIndex ?? 0) + 1} - ${bounds.width}x${bounds.height} @ ${bounds.x},${bounds.y}` + : undefined; + return { + id: source.id, + name: source.name, + display_id: source.display_id, + thumbnail: source.thumbnail ? source.thumbnail.toDataURL() : null, + appIcon: source.appIcon ? source.appIcon.toDataURL() : null, + displayId: display?.id, + displayIndex, + screenIndex: sourceIndex, + displayLabel, + bounds, + }; + }); }); ipcMain.handle("select-source", async (_, source: SelectedSource) => { - selectedSource = source; + selectedSource = { + ...source, + bounds: normalizeSourceBounds(source.bounds), + }; // Reuse the exact source object returned during enumeration to avoid // Windows window-source id mismatches across separate getSources() calls. selectedDesktopSource = @@ -1713,16 +1989,19 @@ export function registerIpcHandlers( RECORDINGS_DIR, `${RECORDING_FILE_PREFIX}${recordingId}-webcam.mp4`, ); + const requestBounds = normalizeSourceBounds(request.source.bounds); const sourceDisplay = request.source.type === "display" && typeof request.source.displayId === "number" ? (screen.getAllDisplays().find((display) => display.id === request.source.displayId) ?? null) : getSelectedDisplay(); - const bounds = sourceDisplay?.bounds ?? getSelectedSourceBounds(); + const bounds = requestBounds ?? sourceDisplay?.bounds ?? getSelectedSourceBounds(); const displayId = typeof request.source.displayId === "number" && Number.isFinite(request.source.displayId) ? request.source.displayId - : Number(selectedSource?.display_id); + : typeof selectedSource?.displayId === "number" + ? selectedSource.displayId + : Number(selectedSource?.display_id); const webcamDirectShowClsid = request.webcam.enabled ? await resolveDirectShowWebcamClsid(request.webcam.deviceName) : null; @@ -2365,7 +2644,7 @@ export function registerIpcHandlers( onSessionEnded: (recordingId) => deactivateGuideHotkeySession(recordingId), }); ipcMain.handle("guide:capture-pointer-marker", async () => { - const result = await captureGuideHotkeyMarker(guideStore); + const result = await captureGuideHotkeyMarker(guideStore, "button"); if (result.error) { return { success: false, diff --git a/electron/native/README.md b/electron/native/README.md index 59930ba..f402fb2 100644 --- a/electron/native/README.md +++ b/electron/native/README.md @@ -46,7 +46,7 @@ Build the Windows helper with: npm run build:native:win ``` -The build writes the CMake output to `electron/native/wgc-capture/build/wgc-capture.exe` and copies the redistributable binary to `electron/native/bin/win32-x64/wgc-capture.exe`. +The build writes the CMake output to `electron/native/wgc-capture/build/wgc-capture.exe` and copies the redistributable binary to `electron/native/bin/win32-x64/wgc-capture.exe`. It also builds `cursor-sampler.exe` for editable cursor telemetry and `guide-hotkey-listener.exe` for the Guide Mode global Ctrl capture hook. The helper contract is process-based: the app starts the process with one JSON argument and sends commands on stdin. `stop\n` finalizes the recording. During migration the helper prints both newline-delimited JSON events and the legacy text messages `Recording started` / `Recording stopped. Output path: `. diff --git a/electron/native/wgc-capture/CMakeLists.txt b/electron/native/wgc-capture/CMakeLists.txt index 32c5d6e..68b97a2 100644 --- a/electron/native/wgc-capture/CMakeLists.txt +++ b/electron/native/wgc-capture/CMakeLists.txt @@ -65,3 +65,19 @@ target_link_libraries(cursor-sampler PRIVATE gdi32 gdiplus ) + +add_executable(guide-hotkey-listener + src/guide-hotkey-listener.cpp +) + +target_compile_definitions(guide-hotkey-listener PRIVATE + NOMINMAX + WIN32_LEAN_AND_MEAN + _WIN32_WINNT=0x0A00 +) + +target_compile_options(guide-hotkey-listener PRIVATE /EHsc /W4 /utf-8) + +target_link_libraries(guide-hotkey-listener PRIVATE + user32 +) diff --git a/electron/native/wgc-capture/src/guide-hotkey-listener.cpp b/electron/native/wgc-capture/src/guide-hotkey-listener.cpp new file mode 100644 index 0000000..fa6fa68 --- /dev/null +++ b/electron/native/wgc-capture/src/guide-hotkey-listener.cpp @@ -0,0 +1,91 @@ +#include + +#include +#include +#include +#include +#include +#include + +static HHOOK g_keyboardHook = nullptr; +static DWORD g_mainThreadId = 0; +static std::atomic g_ctrlDown{false}; +static std::mutex g_stdoutMutex; + +static int64_t nowMs() { + return static_cast( + std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count()); +} + +static void writeJsonLine(const std::string& json) { + std::lock_guard lock(g_stdoutMutex); + std::cout << json << '\n'; + std::cout.flush(); +} + +static bool isCtrlKey(DWORD vkCode) { + return vkCode == VK_CONTROL || vkCode == VK_LCONTROL || vkCode == VK_RCONTROL; +} + +static LRESULT CALLBACK LowLevelKeyboardProc(int nCode, WPARAM wParam, LPARAM lParam) { + if (nCode >= 0) { + const auto* event = reinterpret_cast(lParam); + if (event && isCtrlKey(event->vkCode)) { + if (wParam == WM_KEYDOWN || wParam == WM_SYSKEYDOWN) { + const bool wasDown = g_ctrlDown.exchange(true, std::memory_order_acq_rel); + if (!wasDown) { + writeJsonLine( + "{\"event\":\"guide-hotkey\",\"key\":\"control\",\"state\":\"down\",\"timeMs\":" + + std::to_string(nowMs()) + "}"); + } + } else if (wParam == WM_KEYUP || wParam == WM_SYSKEYUP) { + g_ctrlDown.store(false, std::memory_order_release); + } + } + } + + return CallNextHookEx(g_keyboardHook, nCode, wParam, lParam); +} + +static BOOL WINAPI consoleCtrlHandler(DWORD signal) { + if ( + signal == CTRL_C_EVENT || + signal == CTRL_BREAK_EVENT || + signal == CTRL_CLOSE_EVENT || + signal == CTRL_LOGOFF_EVENT || + signal == CTRL_SHUTDOWN_EVENT + ) { + PostThreadMessage(g_mainThreadId, WM_QUIT, 0, 0); + return TRUE; + } + + return FALSE; +} + +int main() { + g_mainThreadId = GetCurrentThreadId(); + SetConsoleCtrlHandler(consoleCtrlHandler, TRUE); + + g_keyboardHook = SetWindowsHookExW(WH_KEYBOARD_LL, LowLevelKeyboardProc, GetModuleHandleW(nullptr), 0); + if (!g_keyboardHook) { + std::cerr << "Failed to install guide hotkey keyboard hook. error=" << GetLastError() << std::endl; + return 1; + } + + writeJsonLine("{\"event\":\"ready\"}"); + + MSG msg{}; + while (GetMessageW(&msg, nullptr, 0, 0) > 0) { + TranslateMessage(&msg); + DispatchMessageW(&msg); + } + + if (g_keyboardHook) { + UnhookWindowsHookEx(g_keyboardHook); + g_keyboardHook = nullptr; + } + + return 0; +} diff --git a/electron/native/wgc-capture/src/main.cpp b/electron/native/wgc-capture/src/main.cpp index 9ea4e59..caf6c33 100644 --- a/electron/native/wgc-capture/src/main.cpp +++ b/electron/native/wgc-capture/src/main.cpp @@ -400,6 +400,7 @@ int main(int argc, char* argv[]) { if (config.sourceType == "display") { HMONITOR monitor = findMonitorForCapture( config.displayId, + config.sourceId, config.hasDisplayBounds ? &config.bounds : nullptr); if (!monitor) { std::cerr << "ERROR: Could not resolve monitor" << std::endl; diff --git a/electron/native/wgc-capture/src/monitor_utils.cpp b/electron/native/wgc-capture/src/monitor_utils.cpp index f83e77d..aade2e0 100644 --- a/electron/native/wgc-capture/src/monitor_utils.cpp +++ b/electron/native/wgc-capture/src/monitor_utils.cpp @@ -2,6 +2,7 @@ #include #include +#include #include namespace { @@ -43,9 +44,36 @@ int64_t overlapArea(const RECT& rect, const MonitorBounds& bounds) { return static_cast(right - left) * static_cast(bottom - top); } +int parseScreenSourceIndex(const std::string& sourceId) { + constexpr char prefix[] = "screen:"; + if (sourceId.rfind(prefix, 0) != 0) { + return -1; + } + + const size_t start = sizeof(prefix) - 1; + const size_t end = sourceId.find(':', start); + const std::string indexText = sourceId.substr( + start, + end == std::string::npos ? std::string::npos : end - start); + if (indexText.empty()) { + return -1; + } + + try { + size_t parsed = 0; + const int index = std::stoi(indexText, &parsed, 10); + return parsed == indexText.size() && index >= 0 ? index : -1; + } catch (...) { + return -1; + } +} + } // namespace -HMONITOR findMonitorForCapture(int64_t displayId, const MonitorBounds* bounds) { +HMONITOR findMonitorForCapture( + int64_t displayId, + const std::string& sourceId, + const MonitorBounds* bounds) { const auto monitors = enumerateMonitors(); if (monitors.empty()) { return MonitorFromPoint({0, 0}, MONITOR_DEFAULTTOPRIMARY); @@ -84,5 +112,10 @@ HMONITOR findMonitorForCapture(int64_t displayId, const MonitorBounds* bounds) { } } + const int sourceIndex = parseScreenSourceIndex(sourceId); + if (sourceIndex >= 0 && static_cast(sourceIndex) < monitors.size()) { + return monitors[static_cast(sourceIndex)].monitor; + } + return MonitorFromPoint({0, 0}, MONITOR_DEFAULTTOPRIMARY); } diff --git a/electron/native/wgc-capture/src/monitor_utils.h b/electron/native/wgc-capture/src/monitor_utils.h index 11d5d83..d1d1daa 100644 --- a/electron/native/wgc-capture/src/monitor_utils.h +++ b/electron/native/wgc-capture/src/monitor_utils.h @@ -3,6 +3,7 @@ #include #include +#include struct MonitorBounds { int x = 0; @@ -11,4 +12,7 @@ struct MonitorBounds { int height = 0; }; -HMONITOR findMonitorForCapture(int64_t displayId, const MonitorBounds* bounds); +HMONITOR findMonitorForCapture( + int64_t displayId, + const std::string& sourceId, + const MonitorBounds* bounds); diff --git a/electron/preload.ts b/electron/preload.ts index cd72611..ef435b7 100644 --- a/electron/preload.ts +++ b/electron/preload.ts @@ -6,6 +6,7 @@ import type { ExportGuideInput, FinalizeGuideEventsInput, GenerateGuideDraftInput, + GuideMarkerCapturedPayload, RunGuideOcrInput, SaveGuideAiSettingsInput, SaveGuideInput, @@ -43,6 +44,13 @@ contextBridge.exposeInMainWorld("electronAPI", { import("../src/guide/contracts").GuideIpcResult >; }, + onMarkerCaptured: (callback: (payload: GuideMarkerCapturedPayload) => void) => { + const listener = (_event: Electron.IpcRendererEvent, payload: GuideMarkerCapturedPayload) => { + callback(payload); + }; + ipcRenderer.on("guide:marker-captured", listener); + return () => ipcRenderer.removeListener("guide:marker-captured", listener); + }, finalizeEvents: (input: FinalizeGuideEventsInput) => { return ipcRenderer.invoke("guide:finalize-events", input); }, diff --git a/package-lock.json b/package-lock.json index 5f0a3d7..749fe7b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "openscreen", - "version": "1.4.1", + "version": "1.4.2", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "openscreen", - "version": "1.4.1", + "version": "1.4.2", "dependencies": { "@fix-webm-duration/fix": "^1.0.1", "@pixi/filter-drop-shadow": "^5.2.0", diff --git a/package.json b/package.json index 801bd53..2c15f2d 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "openscreen", "private": true, - "version": "1.4.1", + "version": "1.4.2", "type": "module", "packageManager": "npm@10.9.4", "engines": { diff --git a/scripts/build-windows-wgc-helper.mjs b/scripts/build-windows-wgc-helper.mjs index 29df4d8..5b378fc 100644 --- a/scripts/build-windows-wgc-helper.mjs +++ b/scripts/build-windows-wgc-helper.mjs @@ -126,6 +126,11 @@ if (!fs.existsSync(cursorSamplerOutputPath)) { throw new Error(`WGC helper build completed but ${cursorSamplerOutputPath} was not found.`); } +const guideHotkeyListenerOutputPath = path.join(BUILD_DIR, "guide-hotkey-listener.exe"); +if (!fs.existsSync(guideHotkeyListenerOutputPath)) { + throw new Error(`WGC helper build completed but ${guideHotkeyListenerOutputPath} was not found.`); +} + fs.mkdirSync(BIN_DIR, { recursive: true }); const distributablePath = path.join(BIN_DIR, "wgc-capture.exe"); fs.copyFileSync(outputPath, distributablePath); @@ -133,7 +138,12 @@ fs.copyFileSync(outputPath, distributablePath); const cursorSamplerDistributablePath = path.join(BIN_DIR, "cursor-sampler.exe"); fs.copyFileSync(cursorSamplerOutputPath, cursorSamplerDistributablePath); +const guideHotkeyListenerDistributablePath = path.join(BIN_DIR, "guide-hotkey-listener.exe"); +fs.copyFileSync(guideHotkeyListenerOutputPath, guideHotkeyListenerDistributablePath); + console.log(`Built ${outputPath}`); console.log(`Copied ${distributablePath}`); console.log(`Built ${cursorSamplerOutputPath}`); console.log(`Copied ${cursorSamplerDistributablePath}`); +console.log(`Built ${guideHotkeyListenerOutputPath}`); +console.log(`Copied ${guideHotkeyListenerDistributablePath}`); diff --git a/src/components/launch/LaunchWindow.tsx b/src/components/launch/LaunchWindow.tsx index 9d8b8c3..76eb995 100644 --- a/src/components/launch/LaunchWindow.tsx +++ b/src/components/launch/LaunchWindow.tsx @@ -19,6 +19,7 @@ import { MdVolumeUp, } from "react-icons/md"; import { RxDragHandleDots2 } from "react-icons/rx"; +import { toast } from "sonner"; import { useI18n, useScopedT } from "@/contexts/I18nContext"; import { getAvailableLocales, getLocaleName } from "@/i18n/loader"; import { nativeBridgeClient } from "@/native"; @@ -143,7 +144,6 @@ export function LaunchWindow() { top: 12, maxHeight: 240, }); - const guideCtrlMarkerArmedRef = useRef(false); const { devices: micDevices, @@ -248,47 +248,6 @@ export function LaunchWindow() { }; }, [isLanguageMenuOpen]); - useEffect(() => { - if (!recording || !guideModeEnabled) { - guideCtrlMarkerArmedRef.current = false; - return; - } - - const isCtrlKey = (event: KeyboardEvent) => - event.key === "Control" || event.code === "ControlLeft" || event.code === "ControlRight"; - - const handleKeyDown = (event: KeyboardEvent) => { - if (!isCtrlKey(event) || event.repeat || guideCtrlMarkerArmedRef.current) { - return; - } - - guideCtrlMarkerArmedRef.current = true; - event.preventDefault(); - event.stopPropagation(); - addGuideMarker(); - }; - - const releaseCtrlMarker = (event?: KeyboardEvent) => { - if (event && !isCtrlKey(event)) { - return; - } - guideCtrlMarkerArmedRef.current = false; - }; - const handleWindowBlur = () => { - guideCtrlMarkerArmedRef.current = false; - }; - - window.addEventListener("keydown", handleKeyDown, { capture: true }); - window.addEventListener("keyup", releaseCtrlMarker, { capture: true }); - window.addEventListener("blur", handleWindowBlur); - - return () => { - window.removeEventListener("keydown", handleKeyDown, { capture: true }); - window.removeEventListener("keyup", releaseCtrlMarker, { capture: true }); - window.removeEventListener("blur", handleWindowBlur); - }; - }, [addGuideMarker, guideModeEnabled, recording]); - useEffect(() => { if (!isLanguageMenuOpen || !languageTriggerRef.current) return; @@ -347,6 +306,23 @@ export function LaunchWindow() { setHudMouseEventsEnabled(isLanguageMenuOpen); }, [isLanguageMenuOpen, setHudMouseEventsEnabled]); + useEffect(() => { + const unsubscribe = window.electronAPI?.guide.onMarkerCaptured?.((payload) => { + const position = + typeof payload.normalizedX === "number" && typeof payload.normalizedY === "number" + ? `x ${Math.round(payload.normalizedX * 100)}%, y ${Math.round(payload.normalizedY * 100)}%` + : undefined; + toast.success("Guide event captured", { + id: `guide-marker-${payload.eventId}`, + description: position, + duration: 1400, + }); + }); + return () => { + unsubscribe?.(); + }; + }, []); + const [selectedSource, setSelectedSource] = useState("Screen"); const [hasSelectedSource, setHasSelectedSource] = useState(false); const [, setRecordPointerDownCount] = useState(0); diff --git a/src/components/launch/SourceSelector.tsx b/src/components/launch/SourceSelector.tsx index f3ae989..495577c 100644 --- a/src/components/launch/SourceSelector.tsx +++ b/src/components/launch/SourceSelector.tsx @@ -11,6 +11,16 @@ interface DesktopSource { thumbnail: string | null; display_id: string; appIcon: string | null; + displayId?: number; + displayIndex?: number; + screenIndex?: number; + displayLabel?: string; + bounds?: { + x: number; + y: number; + width: number; + height: number; + }; } export function SourceSelector() { @@ -39,6 +49,11 @@ export function SourceSelector() { thumbnail: source.thumbnail, display_id: source.display_id, appIcon: source.appIcon, + displayId: source.displayId, + displayIndex: source.displayIndex, + screenIndex: source.screenIndex, + displayLabel: source.displayLabel, + bounds: source.bounds, })), ); } catch (error) { @@ -98,7 +113,14 @@ export function SourceSelector() { {source.appIcon && ( )} -
{source.name}
+
+
{source.name}
+ {source.displayLabel && ( +
+ {source.displayLabel} +
+ )} +
); diff --git a/src/guide/contracts.ts b/src/guide/contracts.ts index 6f162cb..4886edb 100644 --- a/src/guide/contracts.ts +++ b/src/guide/contracts.ts @@ -79,10 +79,28 @@ export interface GuideStepCandidate { action: GuideAction; targetText?: string; targetRole?: GuideTargetRole; + position?: { + normalizedX: number; + normalizedY: number; + xPercent: number; + yPercent: number; + description: string; + }; nearbyText: string[]; confidence: number; } +export interface GuideMarkerCapturedPayload { + recordingId: string; + eventId: string; + timeMs: number; + trigger: "button" | "global-control" | "global-shortcut"; + normalizedX?: number; + normalizedY?: number; + rawX?: number; + rawY?: number; +} + export interface GeneratedGuideStep { id: string; order: number; diff --git a/src/guide/exporters.test.ts b/src/guide/exporters.test.ts index 9c4d120..b10d41c 100644 --- a/src/guide/exporters.test.ts +++ b/src/guide/exporters.test.ts @@ -83,4 +83,22 @@ describe("guide exporters", () => { expect(html).toContain("click-marker"); expect(html).toContain("left: 25.00%; top: 75.00%;"); }); + + it("draws click markers for hotkey events with coordinates", () => { + const hotkeySession: GuideSession = { + ...session, + events: [ + { + ...session.events[0], + kind: "hotkey", + source: "guide-hotkey", + }, + ], + }; + + const html = exportGuideToHtml(hotkeySession); + + expect(html).toContain("click-marker"); + expect(html).toContain("left: 25.00%; top: 75.00%;"); + }); }); diff --git a/src/guide/exporters.ts b/src/guide/exporters.ts index bf98191..e84f86e 100644 --- a/src/guide/exporters.ts +++ b/src/guide/exporters.ts @@ -97,7 +97,7 @@ function resolveStepClickPoint( : undefined; const eventId = candidate?.eventId; const event = eventId ? session.events.find((item) => item.id === eventId) : undefined; - if (!event || event.kind !== "click") { + if (!event || (event.kind !== "click" && event.kind !== "hotkey")) { return null; } if (isNormalizedNumber(event.normalizedX) && isNormalizedNumber(event.normalizedY)) { diff --git a/src/guide/promptBuilder.test.ts b/src/guide/promptBuilder.test.ts index 0d973cd..59ed239 100644 --- a/src/guide/promptBuilder.test.ts +++ b/src/guide/promptBuilder.test.ts @@ -36,6 +36,13 @@ const candidates: GuideStepCandidate[] = [ action: "click", targetText: "Save", targetRole: "button", + position: { + normalizedX: 0.5, + normalizedY: 0.5, + xPercent: 50, + yPercent: 50, + description: "center", + }, nearbyText: ["Save"], confidence: 0.9, }, @@ -46,7 +53,9 @@ describe("guide draft helpers", () => { const prompt = buildGuideDraftPrompt({ session, candidates, language: "en" }); expect(prompt).toContain("Return JSON only"); + expect(prompt).toContain('"sourceCandidateId": "candidate-1"'); expect(prompt).toContain('"targetText": "Save"'); + expect(prompt).toContain('"xPercent": 50'); expect(prompt).toContain('"id":"guide-step-1"'); }); diff --git a/src/guide/promptBuilder.ts b/src/guide/promptBuilder.ts index 8ee175a..3cd3368 100644 --- a/src/guide/promptBuilder.ts +++ b/src/guide/promptBuilder.ts @@ -17,10 +17,12 @@ export function buildGuideDraftPrompt(input: GuidePromptInput): string { const candidatesJson = JSON.stringify( input.candidates.map((candidate, index) => ({ order: index + 1, + sourceCandidateId: candidate.id, timeMs: Math.round(candidate.timeMs), action: candidate.action, targetText: candidate.targetText, targetRole: candidate.targetRole, + position: candidate.position, nearbyText: candidate.nearbyText, confidence: candidate.confidence, })), @@ -36,8 +38,10 @@ export function buildGuideDraftPrompt(input: GuidePromptInput): string { "Rules:", "- Use short, explicit step instructions.", "- Prefer visible target text from OCR when it is available.", + "- Return sourceCandidateId exactly from the chosen candidate.", + "- Never use generic marker text such as Ctrl+F12 marker or Ctrl marker as a UI target.", "- Do not invent buttons or screens that are not in the candidates.", - "- If a target is unclear, describe the action by screen position or timestamp.", + "- If a target is unclear, describe the action by the candidate position and include the x/y percentages.", "", "Candidates:", candidatesJson, @@ -92,12 +96,18 @@ function buildInstruction(candidate: GuideStepCandidate, language: GuideLanguage if (target) { return `${candidate.action === "click" ? "Nhấn" : "Thực hiện thao tác"} vào "${target}".`; } + if (candidate.position) { + return `Nhấn tại vùng ${candidate.position.description} (x ${candidate.position.xPercent}%, y ${candidate.position.yPercent}%).`; + } return `Thực hiện thao tác tại mốc ${formatTimestamp(candidate.timeMs)}.`; } if (target) { return `${candidate.action === "click" ? "Click" : "Use"} "${target}".`; } + if (candidate.position) { + return `Click the ${candidate.position.description} area (x ${candidate.position.xPercent}%, y ${candidate.position.yPercent}%).`; + } return `Perform the action at ${formatTimestamp(candidate.timeMs)}.`; } diff --git a/src/guide/targetMapper.test.ts b/src/guide/targetMapper.test.ts index f89ad9d..bec6936 100644 --- a/src/guide/targetMapper.test.ts +++ b/src/guide/targetMapper.test.ts @@ -98,6 +98,7 @@ describe("buildGuideStepCandidates", () => { source: "guide-hotkey", normalizedX: 0.5, normalizedY: 0.5, + label: "Ctrl+F12 marker", }; const candidates = buildGuideStepCandidates(session); @@ -106,6 +107,12 @@ describe("buildGuideStepCandidates", () => { action: "click", targetText: "Save", targetRole: "button", + position: { + normalizedX: 0.5, + normalizedY: 0.5, + xPercent: 50, + yPercent: 50, + }, }); }); diff --git a/src/guide/targetMapper.ts b/src/guide/targetMapper.ts index 837b283..52eaeb7 100644 --- a/src/guide/targetMapper.ts +++ b/src/guide/targetMapper.ts @@ -46,8 +46,11 @@ export function buildGuideStepCandidates( 0, maxNearbyText, ); - const label = normalizeText(event.label); - const targetText = label ?? normalizeText(targetRegion?.text); + const label = normalizeEventLabelForTarget(event); + const point = getEventPoint(event); + const targetText = point + ? (normalizeText(targetRegion?.text) ?? label) + : (label ?? normalizeText(targetRegion?.text)); return { id: `candidate-${event.id}`, @@ -57,6 +60,7 @@ export function buildGuideStepCandidates( action: inferAction(event), targetText, targetRole: inferTargetRole(targetText), + position: point ? describeEventPosition(point) : undefined, nearbyText, confidence: calculateCandidateConfidence(event, targetRegion, rankedRegions[0]?.score), }; @@ -275,7 +279,7 @@ function calculateCandidateConfidence( 0.45 + clamp01(targetRegion.confidence) * 0.25 + clamp01(score ?? 0) * 0.3, ); } - if (event.label) { + if (normalizeEventLabelForTarget(event)) { return 0.75; } if (getEventPoint(event)) { @@ -307,6 +311,38 @@ function normalizeText(value: string | undefined): string | undefined { return text ? text : undefined; } +function normalizeEventLabelForTarget(event: GuideEvent): string | undefined { + const label = normalizeText(event.label); + if (!label) { + return undefined; + } + if (/^(?:ctrl(?:\s*\+\s*f12)?|control)\s+marker$/i.test(label)) { + return undefined; + } + if (/^manual\s+marker$/i.test(label)) { + return undefined; + } + return label; +} + +function describeEventPosition(point: { x: number; y: number }): GuideStepCandidate["position"] { + const normalizedX = clamp01(point.x); + const normalizedY = clamp01(point.y); + return { + normalizedX: roundPosition(normalizedX), + normalizedY: roundPosition(normalizedY), + xPercent: Math.round(normalizedX * 100), + yPercent: Math.round(normalizedY * 100), + description: describeScreenRegion(normalizedX, normalizedY), + }; +} + +function describeScreenRegion(x: number, y: number): string { + const vertical = y < 0.33 ? "top" : y > 0.66 ? "bottom" : "middle"; + const horizontal = x < 0.33 ? "left" : x > 0.66 ? "right" : "center"; + return vertical === "middle" && horizontal === "center" ? "center" : `${vertical} ${horizontal}`; +} + function isUsefulOcrText(text: string): boolean { if (!/[A-Za-z0-9À-ỹ]/.test(text)) { return false; @@ -346,6 +382,10 @@ function roundConfidence(value: number): number { return Math.round(clamp01(value) * 100) / 100; } +function roundPosition(value: number): number { + return Math.round(clamp01(value) * 1000) / 1000; +} + function clamp01(value: number): number { if (!Number.isFinite(value)) { return 0; diff --git a/src/hooks/useScreenRecorder.ts b/src/hooks/useScreenRecorder.ts index b45566a..8780bb7 100644 --- a/src/hooks/useScreenRecorder.ts +++ b/src/hooks/useScreenRecorder.ts @@ -921,7 +921,10 @@ export function useScreenRecorder(): UseScreenRecorderReturn { } const activeRecordingId = Date.now(); - const displayId = Number(selectedSource.display_id); + const displayId = + typeof selectedSource.displayId === "number" + ? selectedSource.displayId + : Number(selectedSource.display_id); const sourceType = selectedSource.id.startsWith("window:") ? "window" : "display"; const windowHandle = parseWindowHandleFromSourceId(selectedSource.id); if (webcamEnabled) { @@ -946,6 +949,7 @@ export function useScreenRecorder(): UseScreenRecorderReturn { type: sourceType, sourceId: selectedSource.id, ...(Number.isFinite(displayId) ? { displayId } : {}), + ...(selectedSource.bounds ? { bounds: selectedSource.bounds } : {}), ...(windowHandle ? { windowHandle } : {}), }, video: { @@ -1039,7 +1043,9 @@ export function useScreenRecorder(): UseScreenRecorderReturn { const activeRecordingId = Date.now(); const sourceType = selectedSource.id.startsWith("window:") ? "window" : "display"; const displayId = - Number(selectedSource.display_id) || parseMacDisplayIdFromSourceId(selectedSource.id); + typeof selectedSource.displayId === "number" + ? selectedSource.displayId + : Number(selectedSource.display_id) || parseMacDisplayIdFromSourceId(selectedSource.id); const windowId = parseMacWindowIdFromSourceId(selectedSource.id); let nativeWebcamRecorder: RecorderHandle | null = null; if (webcamEnabled) { @@ -1083,6 +1089,7 @@ export function useScreenRecorder(): UseScreenRecorderReturn { type: sourceType, sourceId: selectedSource.id, ...(displayId ? { displayId } : {}), + ...(selectedSource.bounds ? { bounds: selectedSource.bounds } : {}), ...(windowId ? { windowId } : {}), }, video: { diff --git a/src/lib/nativeWindowsRecording.ts b/src/lib/nativeWindowsRecording.ts index 5e06851..58ce6d8 100644 --- a/src/lib/nativeWindowsRecording.ts +++ b/src/lib/nativeWindowsRecording.ts @@ -6,6 +6,12 @@ export type NativeWindowsRecordingRequest = { type: NativeWindowsSourceType; sourceId: string; displayId?: number; + bounds?: { + x: number; + y: number; + width: number; + height: number; + }; windowHandle?: string; }; video: {