From 6f099b3483137fe2e3482206a6f41a844f361d66 Mon Sep 17 00:00:00 2001 From: Etienne Lescot Date: Sun, 15 Mar 2026 10:46:37 +0100 Subject: [PATCH 01/43] feat: add cursor overlay pipeline --- src/assets/cursors/Cursor=Beachball.svg | 46 ++ src/assets/cursors/Cursor=Cross.svg | 5 + src/assets/cursors/Cursor=Default.svg | 5 + src/assets/cursors/Cursor=Hand-(Grabbing).svg | 5 + src/assets/cursors/Cursor=Hand-(Open).svg | 5 + src/assets/cursors/Cursor=Hand-(Pointing).svg | 5 + src/assets/cursors/Cursor=Menu.svg | 18 + src/assets/cursors/Cursor=Move.svg | 5 + src/assets/cursors/Cursor=Resize-(Down).svg | 5 + src/assets/cursors/Cursor=Resize-(Left).svg | 5 + .../cursors/Cursor=Resize-(Left-Right).svg | 5 + src/assets/cursors/Cursor=Resize-(Right).svg | 5 + src/assets/cursors/Cursor=Resize-(Up).svg | 5 + .../cursors/Cursor=Resize-(Up-Down).svg | 5 + .../Cursor=Resize-North-East-South-West.svg | 5 + .../cursors/Cursor=Resize-North-South.svg | 5 + .../Cursor=Resize-North-West-South-East.svg | 5 + .../cursors/Cursor=Resize-West-East.svg | 5 + src/assets/cursors/Cursor=Text-Cursor.svg | 5 + src/assets/cursors/Cursor=Zoom-In.svg | 8 + src/assets/cursors/Cursor=Zoom-Out.svg | 6 + src/components/video-editor/SettingsPanel.tsx | 23 +- src/components/video-editor/VideoEditor.tsx | 26 +- src/components/video-editor/VideoPlayback.tsx | 109 ++- .../video-editor/projectPersistence.ts | 47 +- src/components/video-editor/types.ts | 24 + .../videoPlayback/cursorRenderer.ts | 766 ++++++++++++++++++ .../videoPlayback/motionSmoothing.ts | 149 ++++ .../videoPlayback/uploadedCursorAssets.ts | 70 ++ 29 files changed, 1303 insertions(+), 74 deletions(-) create mode 100644 src/assets/cursors/Cursor=Beachball.svg create mode 100644 src/assets/cursors/Cursor=Cross.svg create mode 100644 src/assets/cursors/Cursor=Default.svg create mode 100644 src/assets/cursors/Cursor=Hand-(Grabbing).svg create mode 100644 src/assets/cursors/Cursor=Hand-(Open).svg create mode 100644 src/assets/cursors/Cursor=Hand-(Pointing).svg create mode 100644 src/assets/cursors/Cursor=Menu.svg create mode 100644 src/assets/cursors/Cursor=Move.svg create mode 100644 src/assets/cursors/Cursor=Resize-(Down).svg create mode 100644 src/assets/cursors/Cursor=Resize-(Left).svg create mode 100644 src/assets/cursors/Cursor=Resize-(Left-Right).svg create mode 100644 src/assets/cursors/Cursor=Resize-(Right).svg create mode 100644 src/assets/cursors/Cursor=Resize-(Up).svg create mode 100644 src/assets/cursors/Cursor=Resize-(Up-Down).svg create mode 100644 src/assets/cursors/Cursor=Resize-North-East-South-West.svg create mode 100644 src/assets/cursors/Cursor=Resize-North-South.svg create mode 100644 src/assets/cursors/Cursor=Resize-North-West-South-East.svg create mode 100644 src/assets/cursors/Cursor=Resize-West-East.svg create mode 100644 src/assets/cursors/Cursor=Text-Cursor.svg create mode 100644 src/assets/cursors/Cursor=Zoom-In.svg create mode 100644 src/assets/cursors/Cursor=Zoom-Out.svg create mode 100644 src/components/video-editor/videoPlayback/cursorRenderer.ts create mode 100644 src/components/video-editor/videoPlayback/motionSmoothing.ts create mode 100644 src/components/video-editor/videoPlayback/uploadedCursorAssets.ts diff --git a/src/assets/cursors/Cursor=Beachball.svg b/src/assets/cursors/Cursor=Beachball.svg new file mode 100644 index 0000000..30bdbe5 --- /dev/null +++ b/src/assets/cursors/Cursor=Beachball.svg @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/assets/cursors/Cursor=Cross.svg b/src/assets/cursors/Cursor=Cross.svg new file mode 100644 index 0000000..b404553 --- /dev/null +++ b/src/assets/cursors/Cursor=Cross.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/cursors/Cursor=Default.svg b/src/assets/cursors/Cursor=Default.svg new file mode 100644 index 0000000..f76f31f --- /dev/null +++ b/src/assets/cursors/Cursor=Default.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/cursors/Cursor=Hand-(Grabbing).svg b/src/assets/cursors/Cursor=Hand-(Grabbing).svg new file mode 100644 index 0000000..0827867 --- /dev/null +++ b/src/assets/cursors/Cursor=Hand-(Grabbing).svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/cursors/Cursor=Hand-(Open).svg b/src/assets/cursors/Cursor=Hand-(Open).svg new file mode 100644 index 0000000..4ceafb0 --- /dev/null +++ b/src/assets/cursors/Cursor=Hand-(Open).svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/cursors/Cursor=Hand-(Pointing).svg b/src/assets/cursors/Cursor=Hand-(Pointing).svg new file mode 100644 index 0000000..19a70a6 --- /dev/null +++ b/src/assets/cursors/Cursor=Hand-(Pointing).svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/cursors/Cursor=Menu.svg b/src/assets/cursors/Cursor=Menu.svg new file mode 100644 index 0000000..3489257 --- /dev/null +++ b/src/assets/cursors/Cursor=Menu.svg @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + + diff --git a/src/assets/cursors/Cursor=Move.svg b/src/assets/cursors/Cursor=Move.svg new file mode 100644 index 0000000..50e56b7 --- /dev/null +++ b/src/assets/cursors/Cursor=Move.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/cursors/Cursor=Resize-(Down).svg b/src/assets/cursors/Cursor=Resize-(Down).svg new file mode 100644 index 0000000..fba3672 --- /dev/null +++ b/src/assets/cursors/Cursor=Resize-(Down).svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/cursors/Cursor=Resize-(Left).svg b/src/assets/cursors/Cursor=Resize-(Left).svg new file mode 100644 index 0000000..6e21fb7 --- /dev/null +++ b/src/assets/cursors/Cursor=Resize-(Left).svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/cursors/Cursor=Resize-(Left-Right).svg b/src/assets/cursors/Cursor=Resize-(Left-Right).svg new file mode 100644 index 0000000..7021d22 --- /dev/null +++ b/src/assets/cursors/Cursor=Resize-(Left-Right).svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/cursors/Cursor=Resize-(Right).svg b/src/assets/cursors/Cursor=Resize-(Right).svg new file mode 100644 index 0000000..1ce801c --- /dev/null +++ b/src/assets/cursors/Cursor=Resize-(Right).svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/cursors/Cursor=Resize-(Up).svg b/src/assets/cursors/Cursor=Resize-(Up).svg new file mode 100644 index 0000000..9c4ac0f --- /dev/null +++ b/src/assets/cursors/Cursor=Resize-(Up).svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/cursors/Cursor=Resize-(Up-Down).svg b/src/assets/cursors/Cursor=Resize-(Up-Down).svg new file mode 100644 index 0000000..b01a40e --- /dev/null +++ b/src/assets/cursors/Cursor=Resize-(Up-Down).svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/cursors/Cursor=Resize-North-East-South-West.svg b/src/assets/cursors/Cursor=Resize-North-East-South-West.svg new file mode 100644 index 0000000..1185c1f --- /dev/null +++ b/src/assets/cursors/Cursor=Resize-North-East-South-West.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/cursors/Cursor=Resize-North-South.svg b/src/assets/cursors/Cursor=Resize-North-South.svg new file mode 100644 index 0000000..57eaa05 --- /dev/null +++ b/src/assets/cursors/Cursor=Resize-North-South.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/cursors/Cursor=Resize-North-West-South-East.svg b/src/assets/cursors/Cursor=Resize-North-West-South-East.svg new file mode 100644 index 0000000..f00fc87 --- /dev/null +++ b/src/assets/cursors/Cursor=Resize-North-West-South-East.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/cursors/Cursor=Resize-West-East.svg b/src/assets/cursors/Cursor=Resize-West-East.svg new file mode 100644 index 0000000..ef1929f --- /dev/null +++ b/src/assets/cursors/Cursor=Resize-West-East.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/cursors/Cursor=Text-Cursor.svg b/src/assets/cursors/Cursor=Text-Cursor.svg new file mode 100644 index 0000000..1bfd080 --- /dev/null +++ b/src/assets/cursors/Cursor=Text-Cursor.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/src/assets/cursors/Cursor=Zoom-In.svg b/src/assets/cursors/Cursor=Zoom-In.svg new file mode 100644 index 0000000..8ec9b3c --- /dev/null +++ b/src/assets/cursors/Cursor=Zoom-In.svg @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/src/assets/cursors/Cursor=Zoom-Out.svg b/src/assets/cursors/Cursor=Zoom-Out.svg new file mode 100644 index 0000000..810878b --- /dev/null +++ b/src/assets/cursors/Cursor=Zoom-Out.svg @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/components/video-editor/SettingsPanel.tsx b/src/components/video-editor/SettingsPanel.tsx index 9ef66b1..110f025 100644 --- a/src/components/video-editor/SettingsPanel.tsx +++ b/src/components/video-editor/SettingsPanel.tsx @@ -537,6 +537,7 @@ export function SettingsPanel({ }, [cropRegion, videoWidth, videoHeight], ); + const [showCropDropdown, setShowCropDropdown] = useState(false); const zoomEnabled = Boolean(selectedZoomDepth); const trimEnabled = Boolean(selectedTrimId); @@ -625,20 +626,6 @@ export function SettingsPanel({ } }; - const handleCropToggle = () => { - if (!showCropModal && cropRegion) { - cropSnapshotRef.current = { ...cropRegion }; - } - setShowCropModal(!showCropModal); - }; - - const handleCropCancel = () => { - if (cropSnapshotRef.current && onCropChange) { - onCropChange(cropSnapshotRef.current); - } - setShowCropModal(false); - }; - // Find selected annotation const selectedAnnotation = selectedAnnotationId ? annotationRegions.find((a) => a.id === selectedAnnotationId) @@ -1745,11 +1732,11 @@ export function SettingsPanel({ - {showCropModal && cropRegion && onCropChange && ( + {showCropDropdown && cropRegion && onCropChange && ( <>
setShowCropDropdown(false)} />
@@ -1760,7 +1747,7 @@ export function SettingsPanel({
-

{t("emptyState.noVideo")}

-

{t("emptyState.dragAndDrop")}

+

+ {hasVideoSource ? "Loading Timeline" : "No Video Loaded"} +

+

+ {hasVideoSource + ? "Video opened, waiting for duration metadata" + : "Drag and drop a video to start editing"} +

); diff --git a/src/lib/cursor/nativeCursor.ts b/src/lib/cursor/nativeCursor.ts new file mode 100644 index 0000000..23e187b --- /dev/null +++ b/src/lib/cursor/nativeCursor.ts @@ -0,0 +1,101 @@ +import { type Container, Point } from "pixi.js"; +import type { CropRegion } from "@/components/video-editor/types"; +import type { + CursorRecordingData, + CursorRecordingSample, + NativeCursorAsset, +} from "@/native/contracts"; + +export interface ActiveNativeCursorFrame { + asset: NativeCursorAsset; + sample: CursorRecordingSample; +} + +interface ProjectNativeCursorOptions { + cameraContainer: Container; + cropRegion: CropRegion; + maskRect: { width: number; height: number }; + videoContainerPosition: { x: number; y: number }; + sample: CursorRecordingSample; +} + +function clamp(value: number, min: number, max: number) { + return Math.min(max, Math.max(min, value)); +} + +function getCroppedCursorPosition(sample: CursorRecordingSample, cropRegion: CropRegion) { + if (cropRegion.width <= 0 || cropRegion.height <= 0) { + return null; + } + + const croppedCx = (sample.cx - cropRegion.x) / cropRegion.width; + const croppedCy = (sample.cy - cropRegion.y) / cropRegion.height; + + if (croppedCx < 0 || croppedCx > 1 || croppedCy < 0 || croppedCy > 1) { + return null; + } + + return { + cx: clamp(croppedCx, 0, 1), + cy: clamp(croppedCy, 0, 1), + }; +} + +export function resolveActiveNativeCursorFrame( + recordingData: CursorRecordingData | null | undefined, + timeMs: number, +): ActiveNativeCursorFrame | null { + if (!recordingData || recordingData.provider !== "native" || recordingData.assets.length === 0) { + return null; + } + + for (let index = recordingData.samples.length - 1; index >= 0; index -= 1) { + const sample = recordingData.samples[index]; + if (sample.timeMs > timeMs) { + continue; + } + + if (sample.visible === false || !sample.assetId) { + return null; + } + + const asset = recordingData.assets.find((candidate) => candidate.id === sample.assetId); + if (!asset) { + return null; + } + + return { sample, asset }; + } + + return null; +} + +export function projectNativeCursorToStage({ + cameraContainer, + cropRegion, + maskRect, + videoContainerPosition, + sample, +}: ProjectNativeCursorOptions) { + const croppedPosition = getCroppedCursorPosition(sample, cropRegion); + if (!croppedPosition) { + return null; + } + + const localPoint = new Point( + videoContainerPosition.x + croppedPosition.cx * maskRect.width, + videoContainerPosition.y + croppedPosition.cy * maskRect.height, + ); + + return cameraContainer.toGlobal(localPoint); +} + +export function getNativeCursorDisplayMetrics(asset: NativeCursorAsset, deviceScaleFactor: number) { + const scaleFactor = asset.scaleFactor ?? deviceScaleFactor ?? 1; + return { + width: asset.width / scaleFactor, + height: asset.height / scaleFactor, + hotspotX: asset.hotspotX / scaleFactor, + hotspotY: asset.hotspotY / scaleFactor, + }; +} diff --git a/src/lib/exporter/frameRenderer.ts b/src/lib/exporter/frameRenderer.ts index 017af83..f13735d 100644 --- a/src/lib/exporter/frameRenderer.ts +++ b/src/lib/exporter/frameRenderer.ts @@ -56,8 +56,14 @@ import { type Size, type StyledRenderRect, } from "@/lib/compositeLayout"; +import { + getNativeCursorDisplayMetrics, + projectNativeCursorToStage, + resolveActiveNativeCursorFrame, +} from "@/lib/cursor/nativeCursor"; import { BackgroundLoadError, classifyWallpaper, resolveImageWallpaperUrl } from "@/lib/wallpaper"; import { drawCanvasClipPath } from "@/lib/webcamMaskShapes"; +import type { CursorRecordingData, NativeCursorAsset } from "@/native/contracts"; import { renderAnnotations } from "./annotationRenderer"; import { getLinearGradientPoints, @@ -79,6 +85,7 @@ interface FrameRenderConfig { borderRadius?: number; padding?: number; cropRegion: CropRegion; + cursorRecordingData?: CursorRecordingData | null; videoWidth: number; videoHeight: number; webcamSize?: Size | null; @@ -136,6 +143,7 @@ export class FrameRenderer { private rasterCtx: CanvasRenderingContext2D | null = null; private threeDPass: ThreeDPass | null = null; private currentRotation3D: Rotation3D = { ...DEFAULT_ROTATION_3D }; + private cursorImageCache = new Map(); private config: FrameRenderConfig; private animationState: AnimationState; private layoutCache: LayoutCache | null = null; @@ -468,6 +476,8 @@ export class FrameRenderer { } } + await this.drawNativeCursor(timeMs); + // Render annotations on top of foreground (so they rotate with recording). if ( this.config.annotationRegions && @@ -543,7 +553,63 @@ export class FrameRenderer { } } - private updateLayout(webcamFrame?: VideoFrame | null): void { + private async drawNativeCursor(timeMs: number) { + if (!this.compositeCtx || !this.cameraContainer || !this.videoContainer || !this.layoutCache) { + return; + } + + const activeNativeCursor = resolveActiveNativeCursorFrame( + this.config.cursorRecordingData, + timeMs, + ); + if (!activeNativeCursor) { + return; + } + + const projectedPoint = projectNativeCursorToStage({ + cameraContainer: this.cameraContainer, + cropRegion: this.config.cropRegion, + maskRect: this.layoutCache.maskRect, + videoContainerPosition: { + x: this.videoContainer.x, + y: this.videoContainer.y, + }, + sample: activeNativeCursor.sample, + }); + if (!projectedPoint) { + return; + } + + const image = await this.getCursorImage(activeNativeCursor.asset); + const metrics = getNativeCursorDisplayMetrics(activeNativeCursor.asset, 1); + + this.compositeCtx.drawImage( + image, + projectedPoint.x - metrics.hotspotX, + projectedPoint.y - metrics.hotspotY, + metrics.width, + metrics.height, + ); + } + + private async getCursorImage(asset: NativeCursorAsset) { + const cachedImage = this.cursorImageCache.get(asset.id); + if (cachedImage) { + return cachedImage; + } + + const image = new Image(); + await new Promise((resolve, reject) => { + image.onload = () => resolve(); + image.onerror = () => reject(new Error(`Failed to load cursor asset ${asset.id}`)); + image.src = asset.imageDataUrl; + }); + + this.cursorImageCache.set(asset.id, image); + return image; + } + + private updateLayout(): void { if (!this.app || !this.videoSprite || !this.maskGraphics || !this.videoContainer) return; const { width, height } = this.config; @@ -999,5 +1065,6 @@ export class FrameRenderer { this.threeDPass.destroy(); this.threeDPass = null; } + this.cursorImageCache.clear(); } } diff --git a/src/lib/exporter/gifExporter.ts b/src/lib/exporter/gifExporter.ts index 0d7a432..02564db 100644 --- a/src/lib/exporter/gifExporter.ts +++ b/src/lib/exporter/gifExporter.ts @@ -11,6 +11,7 @@ import type { import { BackgroundLoadError } from "@/lib/wallpaper"; import { getPlatform } from "@/utils/platformUtils"; import { AsyncVideoFrameQueue } from "./asyncVideoFrameQueue"; +import type { CursorRecordingData } from "@/native/contracts"; import { FrameRenderer } from "./frameRenderer"; import { StreamingVideoDecoder } from "./streamingDecoder"; import type { @@ -47,6 +48,7 @@ interface GifExporterConfig { webcamMaskShape?: import("@/components/video-editor/types").WebcamMaskShape; webcamSizePreset?: WebcamSizePreset; webcamPosition?: { cx: number; cy: number } | null; + cursorRecordingData?: CursorRecordingData | null; annotationRegions?: AnnotationRegion[]; previewWidth?: number; previewHeight?: number; @@ -151,6 +153,7 @@ export class GifExporter { borderRadius: this.config.borderRadius, padding: this.config.padding, cropRegion: this.config.cropRegion, + cursorRecordingData: this.config.cursorRecordingData, videoWidth: videoInfo.width, videoHeight: videoInfo.height, webcamSize: webcamInfo ? { width: webcamInfo.width, height: webcamInfo.height } : null, diff --git a/src/lib/exporter/videoExporter.ts b/src/lib/exporter/videoExporter.ts index e064ba7..edddd05 100644 --- a/src/lib/exporter/videoExporter.ts +++ b/src/lib/exporter/videoExporter.ts @@ -10,6 +10,7 @@ import type { import { BackgroundLoadError } from "@/lib/wallpaper"; import { getPlatform } from "@/utils/platformUtils"; import { AsyncVideoFrameQueue } from "./asyncVideoFrameQueue"; +import type { CursorRecordingData } from "@/native/contracts"; import { AudioProcessor } from "./audioEncoder"; import { FrameRenderer } from "./frameRenderer"; import { VideoMuxer } from "./muxer"; @@ -38,6 +39,7 @@ interface VideoExporterConfig extends ExportConfig { webcamMaskShape?: import("@/components/video-editor/types").WebcamMaskShape; webcamSizePreset?: WebcamSizePreset; webcamPosition?: { cx: number; cy: number } | null; + cursorRecordingData?: CursorRecordingData | null; annotationRegions?: AnnotationRegion[]; previewWidth?: number; previewHeight?: number; @@ -146,6 +148,7 @@ export class VideoExporter { borderRadius: this.config.borderRadius, padding: this.config.padding, cropRegion: this.config.cropRegion, + cursorRecordingData: this.config.cursorRecordingData, videoWidth: videoInfo.width, videoHeight: videoInfo.height, webcamSize: webcamInfo ? { width: webcamInfo.width, height: webcamInfo.height } : null, From e9650225bade7f4f2eca86f29c5f5f03145492df Mon Sep 17 00:00:00 2001 From: Etienne Lescot Date: Thu, 26 Mar 2026 11:16:41 +0100 Subject: [PATCH 04/43] feat: add cursor overlay pipeline for high-fidelity cursor recording and playback - Implement native bridge for Windows cursor capture via PowerShell/C# - Add cursor-free capture using getDisplayMedia with setDisplayMediaRequestHandler - Update video player and exporters to support native cursor telemetry - Enable system audio capture on Windows via WASAPI loopback - Add interpolation for smoother cursor movement in playback and export - Improve cursor scaling and visibility handling in editor and playback --- electron/ipc/handlers.ts | 123 ++++-- electron/main.ts | 2 +- .../native-bridge/cursor/recording/factory.ts | 2 + .../windowsNativeRecordingSession.script.ts | 216 +++++++++++ .../windowsNativeRecordingSession.ts | 352 ++++++++---------- .../windowsNativeRecordingSession.types.ts | 49 +++ src/components/launch/LaunchWindow.tsx | 5 + src/components/video-editor/VideoEditor.tsx | 2 + src/components/video-editor/VideoPlayback.tsx | 81 +++- src/hooks/useScreenRecorder.ts | 50 +-- src/lib/cursor/nativeCursor.ts | 78 +++- src/lib/exporter/frameRenderer.ts | 19 +- src/lib/exporter/gifExporter.ts | 2 + src/lib/exporter/videoExporter.ts | 2 + 14 files changed, 686 insertions(+), 297 deletions(-) create mode 100644 electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts create mode 100644 electron/native-bridge/cursor/recording/windowsNativeRecordingSession.types.ts diff --git a/electron/ipc/handlers.ts b/electron/ipc/handlers.ts index 1d73a9b..d50cab3 100644 --- a/electron/ipc/handlers.ts +++ b/electron/ipc/handlers.ts @@ -4,8 +4,6 @@ import os from "node:os"; import path from "node:path"; import { fileURLToPath, pathToFileURL } from "node:url"; -const nodeRequire = createRequire(import.meta.url); - import { app, BrowserWindow, @@ -16,10 +14,7 @@ import { shell, systemPreferences, } from "electron"; -import { - type CursorTelemetryPoint, - createCursorTelemetryBuffer, -} from "../../src/lib/cursorTelemetryBuffer"; +import type { DesktopCapturerSource } from "electron"; import { normalizeProjectMedia, normalizeRecordingSession, @@ -198,11 +193,24 @@ async function getApprovedProjectSession( type SelectedSource = { name: string; + id?: string; + display_id?: string; [key: string]: unknown; }; let selectedSource: SelectedSource | null = null; +let selectedDesktopSource: DesktopCapturerSource | null = null; +let lastEnumeratedSources = new Map(); let currentProjectPath: string | null = null; +let currentRecordingSession: RecordingSession | null = null; + +/** + * Returns the cached DesktopCapturerSource set when the user picked a source. + * Used by setDisplayMediaRequestHandler in main.ts for cursor-free capture. + */ +export function getSelectedDesktopSource(): DesktopCapturerSource | null { + return selectedDesktopSource; +} let currentVideoPath: string | null = null; function normalizePath(filePath: string) { @@ -238,16 +246,12 @@ function isTrustedProjectPath(filePath?: string | null) { } const CURSOR_TELEMETRY_VERSION = 2; -const CURSOR_SAMPLE_INTERVAL_MS = 100; -const MAX_CURSOR_SAMPLES = 60 * 60 * 10; // 1 hour @ 10Hz +const CURSOR_SAMPLE_INTERVAL_MS = 33; +const MAX_CURSOR_SAMPLES = 60 * 60 * 30; // 1 hour @ 30Hz let cursorRecordingSession: CursorRecordingSession | null = null; let pendingCursorRecordingData: CursorRecordingData | null = null; -function clamp(value: number, min: number, max: number) { - return Math.min(max, Math.max(min, value)); -} - function normalizeCursorSample(sample: unknown): CursorRecordingSample | null { if (!sample || typeof sample !== "object") { return null; @@ -259,8 +263,8 @@ function normalizeCursorSample(sample: unknown): CursorRecordingSample | null { typeof point.timeMs === "number" && Number.isFinite(point.timeMs) ? Math.max(0, point.timeMs) : 0, - cx: typeof point.cx === "number" && Number.isFinite(point.cx) ? clamp(point.cx, 0, 1) : 0.5, - cy: typeof point.cy === "number" && Number.isFinite(point.cy) ? clamp(point.cy, 0, 1) : 0.5, + cx: typeof point.cx === "number" && Number.isFinite(point.cx) ? point.cx : 0.5, + cy: typeof point.cy === "number" && Number.isFinite(point.cy) ? point.cy : 0.5, assetId: typeof point.assetId === "string" ? point.assetId : null, visible: typeof point.visible === "boolean" ? point.visible : true, }; @@ -395,6 +399,55 @@ function getSelectedSourceBounds() { return (sourceDisplay ?? screen.getDisplayNearestPoint(cursor)).bounds; } +function getSelectedSourceId() { + return typeof selectedSource?.id === "string" ? selectedSource.id : null; +} + +function setCurrentRecordingSessionState(session: RecordingSession | null) { + currentRecordingSession = session; + currentVideoPath = session?.screenVideoPath ?? null; +} + +async function storeRecordedSessionFiles(payload: StoreRecordedSessionInput) { + const createdAt = + typeof payload.createdAt === "number" && Number.isFinite(payload.createdAt) + ? payload.createdAt + : Date.now(); + const screenVideoPath = resolveRecordingOutputPath(payload.screen.fileName); + await fs.writeFile(screenVideoPath, Buffer.from(payload.screen.videoData)); + + let webcamVideoPath: string | undefined; + if (payload.webcam) { + webcamVideoPath = resolveRecordingOutputPath(payload.webcam.fileName); + await fs.writeFile(webcamVideoPath, Buffer.from(payload.webcam.videoData)); + } + + const session: RecordingSession = webcamVideoPath + ? { screenVideoPath, webcamVideoPath, createdAt } + : { screenVideoPath, createdAt }; + setCurrentRecordingSessionState(session); + currentProjectPath = null; + + const telemetryPath = `${screenVideoPath}.cursor.json`; + if (pendingCursorRecordingData && pendingCursorRecordingData.samples.length > 0) { + await fs.writeFile(telemetryPath, JSON.stringify(pendingCursorRecordingData, null, 2), "utf-8"); + } + pendingCursorRecordingData = null; + + const sessionManifestPath = path.join( + RECORDINGS_DIR, + `${path.parse(payload.screen.fileName).name}${RECORDING_SESSION_SUFFIX}`, + ); + await fs.writeFile(sessionManifestPath, JSON.stringify(session, null, 2), "utf-8"); + + return { + success: true, + path: screenVideoPath, + session, + message: "Recording session stored successfully", + }; +} + export function registerIpcHandlers( createEditorWindow: () => void, createSourceSelectorWindow: () => BrowserWindow, @@ -404,6 +457,7 @@ export function registerIpcHandlers( ) { ipcMain.handle("get-sources", async (_, opts) => { const sources = await desktopCapturer.getSources(opts); + lastEnumeratedSources = new Map(sources.map((source) => [source.id, source])); return sources.map((source) => ({ id: source.id, name: source.name, @@ -413,8 +467,26 @@ export function registerIpcHandlers( })); }); - ipcMain.handle("select-source", (_, source: SelectedSource) => { + ipcMain.handle("select-source", async (_, source: SelectedSource) => { selectedSource = source; + // Reuse the exact source object returned during enumeration to avoid + // Windows window-source id mismatches across separate getSources() calls. + selectedDesktopSource = + typeof source.id === "string" ? lastEnumeratedSources.get(source.id) ?? null : null; + + if (!selectedDesktopSource && typeof source.id === "string") { + try { + const sources = await desktopCapturer.getSources({ + types: ["screen", "window"], + thumbnailSize: { width: 0, height: 0 }, + fetchWindowIcons: true, + }); + lastEnumeratedSources = new Map(sources.map((candidate) => [candidate.id, candidate])); + selectedDesktopSource = lastEnumeratedSources.get(source.id) ?? null; + } catch { + selectedDesktopSource = null; + } + } const sourceSelectorWin = getSourceSelectorWindow(); if (sourceSelectorWin) { sourceSelectorWin.close(); @@ -519,25 +591,7 @@ export function registerIpcHandlers( ipcMain.handle("store-recorded-session", async (_, payload: StoreRecordedSessionInput) => { try { - const videoPath = path.join(RECORDINGS_DIR, fileName); - await fs.writeFile(videoPath, Buffer.from(videoData)); - currentProjectPath = null; - - const telemetryPath = `${videoPath}.cursor.json`; - if (pendingCursorRecordingData && pendingCursorRecordingData.samples.length > 0) { - await fs.writeFile( - telemetryPath, - JSON.stringify(pendingCursorRecordingData, null, 2), - "utf-8", - ); - } - pendingCursorRecordingData = null; - - return { - success: true, - path: videoPath, - message: "Video stored successfully", - }; + return await storeRecordedSessionFiles(payload); } catch (error) { console.error("Failed to store recording session:", error); return { @@ -602,6 +656,7 @@ export function registerIpcHandlers( maxSamples: MAX_CURSOR_SAMPLES, platform: process.platform, sampleIntervalMs: CURSOR_SAMPLE_INTERVAL_MS, + sourceId: getSelectedSourceId(), }); try { diff --git a/electron/main.ts b/electron/main.ts index 4e443a9..1db4740 100644 --- a/electron/main.ts +++ b/electron/main.ts @@ -13,7 +13,7 @@ import { Tray, } from "electron"; import { mainT, setMainLocale } from "./i18n"; -import { registerIpcHandlers } from "./ipc/handlers"; +import { getSelectedDesktopSource, registerIpcHandlers } from "./ipc/handlers"; import { createCountdownOverlayWindow, createEditorWindow, diff --git a/electron/native-bridge/cursor/recording/factory.ts b/electron/native-bridge/cursor/recording/factory.ts index fe92991..4e0f75c 100644 --- a/electron/native-bridge/cursor/recording/factory.ts +++ b/electron/native-bridge/cursor/recording/factory.ts @@ -8,6 +8,7 @@ interface CreateCursorRecordingSessionOptions { maxSamples: number; platform: NodeJS.Platform; sampleIntervalMs: number; + sourceId?: string | null; } export function createCursorRecordingSession( @@ -18,6 +19,7 @@ export function createCursorRecordingSession( getDisplayBounds: options.getDisplayBounds, maxSamples: options.maxSamples, sampleIntervalMs: options.sampleIntervalMs, + sourceId: options.sourceId, }); } diff --git a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts new file mode 100644 index 0000000..b7a11cb --- /dev/null +++ b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts @@ -0,0 +1,216 @@ +export function parseWindowHandleFromSourceId(sourceId?: string | null) { + if (!sourceId?.startsWith("window:")) { + return null; + } + + const handlePart = sourceId.split(":")[1]; + if (!handlePart || !/^\d+$/.test(handlePart)) { + return null; + } + + return handlePart; +} + +export function buildPowerShellCommand(sampleIntervalMs: number, windowHandle?: string | null) { + const script = String.raw` +$ErrorActionPreference = 'Stop' +Add-Type -AssemblyName System.Drawing + +$targetWindowHandle = ${windowHandle ? `'${windowHandle}'` : '$null'} + +$source = @" +using System; +using System.Runtime.InteropServices; + +public static class OpenScreenCursorInterop { + [StructLayout(LayoutKind.Sequential)] + public struct POINT { + public int X; + public int Y; + } + + [StructLayout(LayoutKind.Sequential)] + public struct CURSORINFO { + public int cbSize; + public int flags; + public IntPtr hCursor; + public POINT ptScreenPos; + } + + [StructLayout(LayoutKind.Sequential)] + public struct ICONINFO { + [MarshalAs(UnmanagedType.Bool)] + public bool fIcon; + public int xHotspot; + public int yHotspot; + public IntPtr hbmMask; + public IntPtr hbmColor; + } + + [StructLayout(LayoutKind.Sequential)] + public struct RECT { + public int Left; + public int Top; + public int Right; + public int Bottom; + } + + [DllImport("user32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + public static extern bool GetCursorInfo(ref CURSORINFO pci); + + [DllImport("user32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + public static extern bool GetWindowRect(IntPtr hWnd, out RECT lpRect); + + [DllImport("user32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + public static extern bool IsWindow(IntPtr hWnd); + + [DllImport("user32.dll", SetLastError = true)] + public static extern IntPtr CopyIcon(IntPtr hIcon); + + [DllImport("user32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + public static extern bool DestroyIcon(IntPtr hIcon); + + [DllImport("user32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + public static extern bool GetIconInfo(IntPtr hIcon, out ICONINFO piconinfo); + + [DllImport("gdi32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + public static extern bool DeleteObject(IntPtr hObject); +} +"@ + +Add-Type -TypeDefinition $source + +function Write-JsonLine($payload) { + [Console]::Out.WriteLine(($payload | ConvertTo-Json -Compress -Depth 6)) +} + +function Get-TargetBounds() { + if ([string]::IsNullOrWhiteSpace($targetWindowHandle)) { + return $null + } + + try { + $handleValue = [int64]::Parse($targetWindowHandle) + $windowHandle = [IntPtr]::new($handleValue) + if (-not [OpenScreenCursorInterop]::IsWindow($windowHandle)) { + return $null + } + + $rect = New-Object OpenScreenCursorInterop+RECT + if (-not [OpenScreenCursorInterop]::GetWindowRect($windowHandle, [ref]$rect)) { + return $null + } + + $width = $rect.Right - $rect.Left + $height = $rect.Bottom - $rect.Top + if ($width -le 0 -or $height -le 0) { + return $null + } + + return @{ + x = $rect.Left + y = $rect.Top + width = $width + height = $height + } + } + catch { + return $null + } +} + +function Get-CursorAsset($cursorHandle, $cursorId) { + $copiedHandle = [OpenScreenCursorInterop]::CopyIcon($cursorHandle) + if ($copiedHandle -eq [IntPtr]::Zero) { + return $null + } + + $iconInfo = New-Object OpenScreenCursorInterop+ICONINFO + $hasIconInfo = [OpenScreenCursorInterop]::GetIconInfo($copiedHandle, [ref]$iconInfo) + + try { + $icon = [System.Drawing.Icon]::FromHandle($copiedHandle) + $bitmap = New-Object System.Drawing.Bitmap $icon.Width, $icon.Height, ([System.Drawing.Imaging.PixelFormat]::Format32bppArgb) + $graphics = [System.Drawing.Graphics]::FromImage($bitmap) + $memoryStream = New-Object System.IO.MemoryStream + + try { + $graphics.Clear([System.Drawing.Color]::Transparent) + $graphics.DrawIcon($icon, 0, 0) + $bitmap.Save($memoryStream, [System.Drawing.Imaging.ImageFormat]::Png) + $base64 = [System.Convert]::ToBase64String($memoryStream.ToArray()) + + return @{ + id = $cursorId + imageDataUrl = "data:image/png;base64,$base64" + width = $bitmap.Width + height = $bitmap.Height + hotspotX = if ($hasIconInfo) { $iconInfo.xHotspot } else { 0 } + hotspotY = if ($hasIconInfo) { $iconInfo.yHotspot } else { 0 } + } + } + finally { + $memoryStream.Dispose() + $graphics.Dispose() + $bitmap.Dispose() + $icon.Dispose() + } + } + finally { + if ($hasIconInfo) { + if ($iconInfo.hbmMask -ne [IntPtr]::Zero) { + [OpenScreenCursorInterop]::DeleteObject($iconInfo.hbmMask) | Out-Null + } + if ($iconInfo.hbmColor -ne [IntPtr]::Zero) { + [OpenScreenCursorInterop]::DeleteObject($iconInfo.hbmColor) | Out-Null + } + } + [OpenScreenCursorInterop]::DestroyIcon($copiedHandle) | Out-Null + } +} + +Write-JsonLine @{ type = 'ready'; timestampMs = [DateTimeOffset]::UtcNow.ToUnixTimeMilliseconds() } + +$lastCursorId = $null +while ($true) { + $cursorInfo = New-Object OpenScreenCursorInterop+CURSORINFO + $cursorInfo.cbSize = [Runtime.InteropServices.Marshal]::SizeOf([type][OpenScreenCursorInterop+CURSORINFO]) + + if (-not [OpenScreenCursorInterop]::GetCursorInfo([ref]$cursorInfo)) { + Write-JsonLine @{ type = 'error'; timestampMs = [DateTimeOffset]::UtcNow.ToUnixTimeMilliseconds(); message = 'GetCursorInfo failed' } + Start-Sleep -Milliseconds ${sampleIntervalMs} + continue + } + + $visible = ($cursorInfo.flags -band 1) -ne 0 + $cursorId = if ($cursorInfo.hCursor -eq [IntPtr]::Zero) { $null } else { ('0x{0:X}' -f $cursorInfo.hCursor.ToInt64()) } + $asset = $null + + if ($visible -and $cursorId -and $cursorId -ne $lastCursorId) { + $asset = Get-CursorAsset -cursorHandle $cursorInfo.hCursor -cursorId $cursorId + $lastCursorId = $cursorId + } + + Write-JsonLine @{ + type = 'sample' + timestampMs = [DateTimeOffset]::UtcNow.ToUnixTimeMilliseconds() + x = $cursorInfo.ptScreenPos.X + y = $cursorInfo.ptScreenPos.Y + visible = $visible + handle = $cursorId + bounds = Get-TargetBounds + asset = $asset + } + + Start-Sleep -Milliseconds ${sampleIntervalMs} +} +`; + + return Buffer.from(script, "utf16le").toString("base64"); +} diff --git a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts index a0540ed..d5e43d7 100644 --- a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts +++ b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts @@ -1,206 +1,23 @@ import { type ChildProcessByStdio, spawn } from "node:child_process"; import type { Readable } from "node:stream"; -import { type Rectangle, screen } from "electron"; +import { screen } from "electron"; import type { CursorRecordingData, CursorRecordingSample, NativeCursorAsset, } from "../../../../src/native/contracts"; import type { CursorRecordingSession } from "./session"; +import { buildPowerShellCommand, parseWindowHandleFromSourceId } from "./windowsNativeRecordingSession.script"; +import type { + WindowsCursorEvent, + WindowsNativeRecordingSessionOptions, +} from "./windowsNativeRecordingSession.types"; -interface WindowsCursorSampleEvent { - type: "sample"; - timestampMs: number; - x: number; - y: number; - visible: boolean; - handle: string | null; - asset?: WindowsCursorAssetPayload; -} +const READY_TIMEOUT_MS = 5_000; -interface WindowsCursorReadyEvent { - type: "ready"; - timestampMs: number; -} - -interface WindowsCursorErrorEvent { - type: "error"; - timestampMs: number; - message: string; -} - -interface WindowsCursorAssetPayload { - id: string; - imageDataUrl: string; - width: number; - height: number; - hotspotX: number; - hotspotY: number; -} - -type WindowsCursorEvent = - | WindowsCursorSampleEvent - | WindowsCursorReadyEvent - | WindowsCursorErrorEvent; - -interface WindowsNativeRecordingSessionOptions { - getDisplayBounds: () => Rectangle | null; - maxSamples: number; - sampleIntervalMs: number; -} - -function clamp(value: number, min: number, max: number) { - return Math.min(max, Math.max(min, value)); -} - -function buildPowerShellCommand(sampleIntervalMs: number) { - const script = String.raw` -$ErrorActionPreference = 'Stop' -Add-Type -AssemblyName System.Drawing - -$source = @" -using System; -using System.Runtime.InteropServices; - -public static class OpenScreenCursorInterop { - [StructLayout(LayoutKind.Sequential)] - public struct POINT { - public int X; - public int Y; - } - - [StructLayout(LayoutKind.Sequential)] - public struct CURSORINFO { - public int cbSize; - public int flags; - public IntPtr hCursor; - public POINT ptScreenPos; - } - - [StructLayout(LayoutKind.Sequential)] - public struct ICONINFO { - [MarshalAs(UnmanagedType.Bool)] - public bool fIcon; - public int xHotspot; - public int yHotspot; - public IntPtr hbmMask; - public IntPtr hbmColor; - } - - [DllImport("user32.dll", SetLastError = true)] - [return: MarshalAs(UnmanagedType.Bool)] - public static extern bool GetCursorInfo(ref CURSORINFO pci); - - [DllImport("user32.dll", SetLastError = true)] - public static extern IntPtr CopyIcon(IntPtr hIcon); - - [DllImport("user32.dll", SetLastError = true)] - [return: MarshalAs(UnmanagedType.Bool)] - public static extern bool DestroyIcon(IntPtr hIcon); - - [DllImport("user32.dll", SetLastError = true)] - [return: MarshalAs(UnmanagedType.Bool)] - public static extern bool GetIconInfo(IntPtr hIcon, out ICONINFO piconinfo); - - [DllImport("gdi32.dll", SetLastError = true)] - [return: MarshalAs(UnmanagedType.Bool)] - public static extern bool DeleteObject(IntPtr hObject); -} -"@ - -Add-Type -TypeDefinition $source - -function Write-JsonLine($payload) { - [Console]::Out.WriteLine(($payload | ConvertTo-Json -Compress -Depth 6)) -} - -function Get-CursorAsset($cursorHandle, $cursorId) { - $copiedHandle = [OpenScreenCursorInterop]::CopyIcon($cursorHandle) - if ($copiedHandle -eq [IntPtr]::Zero) { - return $null - } - - $iconInfo = New-Object OpenScreenCursorInterop+ICONINFO - $hasIconInfo = [OpenScreenCursorInterop]::GetIconInfo($copiedHandle, [ref]$iconInfo) - - try { - $icon = [System.Drawing.Icon]::FromHandle($copiedHandle) - $bitmap = New-Object System.Drawing.Bitmap $icon.Width, $icon.Height, ([System.Drawing.Imaging.PixelFormat]::Format32bppArgb) - $graphics = [System.Drawing.Graphics]::FromImage($bitmap) - $memoryStream = New-Object System.IO.MemoryStream - - try { - $graphics.Clear([System.Drawing.Color]::Transparent) - $graphics.DrawIcon($icon, 0, 0) - $bitmap.Save($memoryStream, [System.Drawing.Imaging.ImageFormat]::Png) - $base64 = [System.Convert]::ToBase64String($memoryStream.ToArray()) - - return @{ - id = $cursorId - imageDataUrl = "data:image/png;base64,$base64" - width = $bitmap.Width - height = $bitmap.Height - hotspotX = if ($hasIconInfo) { $iconInfo.xHotspot } else { 0 } - hotspotY = if ($hasIconInfo) { $iconInfo.yHotspot } else { 0 } - } - } - finally { - $memoryStream.Dispose() - $graphics.Dispose() - $bitmap.Dispose() - $icon.Dispose() - } - } - finally { - if ($hasIconInfo) { - if ($iconInfo.hbmMask -ne [IntPtr]::Zero) { - [OpenScreenCursorInterop]::DeleteObject($iconInfo.hbmMask) | Out-Null - } - if ($iconInfo.hbmColor -ne [IntPtr]::Zero) { - [OpenScreenCursorInterop]::DeleteObject($iconInfo.hbmColor) | Out-Null - } - } - [OpenScreenCursorInterop]::DestroyIcon($copiedHandle) | Out-Null - } -} - -Write-JsonLine @{ type = 'ready'; timestampMs = [DateTimeOffset]::UtcNow.ToUnixTimeMilliseconds() } - -$lastCursorId = $null -while ($true) { - $cursorInfo = New-Object OpenScreenCursorInterop+CURSORINFO - $cursorInfo.cbSize = [Runtime.InteropServices.Marshal]::SizeOf([type][OpenScreenCursorInterop+CURSORINFO]) - - if (-not [OpenScreenCursorInterop]::GetCursorInfo([ref]$cursorInfo)) { - Write-JsonLine @{ type = 'error'; timestampMs = [DateTimeOffset]::UtcNow.ToUnixTimeMilliseconds(); message = 'GetCursorInfo failed' } - Start-Sleep -Milliseconds ${sampleIntervalMs} - continue - } - - $visible = ($cursorInfo.flags -band 1) -ne 0 - $cursorId = if ($cursorInfo.hCursor -eq [IntPtr]::Zero) { $null } else { ('0x{0:X}' -f $cursorInfo.hCursor.ToInt64()) } - $asset = $null - - if ($visible -and $cursorId -and $cursorId -ne $lastCursorId) { - $asset = Get-CursorAsset -cursorHandle $cursorInfo.hCursor -cursorId $cursorId - $lastCursorId = $cursorId - } - - Write-JsonLine @{ - type = 'sample' - timestampMs = [DateTimeOffset]::UtcNow.ToUnixTimeMilliseconds() - x = $cursorInfo.ptScreenPos.X - y = $cursorInfo.ptScreenPos.Y - visible = $visible - handle = $cursorId - asset = $asset - } - - Start-Sleep -Milliseconds ${sampleIntervalMs} -} -`; - - return Buffer.from(script, "utf16le").toString("base64"); +interface NormalizedSample { + sample: CursorRecordingSample; + withinBounds: boolean; } export class WindowsNativeRecordingSession implements CursorRecordingSession { @@ -209,6 +26,11 @@ export class WindowsNativeRecordingSession implements CursorRecordingSession { private process: ChildProcessByStdio | null = null; private lineBuffer = ""; private startTimeMs = 0; + private readyResolve: (() => void) | null = null; + private readyReject: ((error: Error) => void) | null = null; + private readyTimer: NodeJS.Timeout | null = null; + private sampleCount = 0; + private outOfBoundsSampleCount = 0; constructor(private readonly options: WindowsNativeRecordingSessionOptions) {} @@ -217,8 +39,13 @@ export class WindowsNativeRecordingSession implements CursorRecordingSession { this.samples = []; this.lineBuffer = ""; this.startTimeMs = Date.now(); + this.sampleCount = 0; + this.outOfBoundsSampleCount = 0; - const encodedCommand = buildPowerShellCommand(this.options.sampleIntervalMs); + const encodedCommand = buildPowerShellCommand( + this.options.sampleIntervalMs, + parseWindowHandleFromSourceId(this.options.sourceId), + ); const child = spawn( "powershell.exe", [ @@ -237,24 +64,58 @@ export class WindowsNativeRecordingSession implements CursorRecordingSession { ); this.process = child; + this.logDiagnostic("spawn", { + pid: child.pid ?? null, + sampleIntervalMs: this.options.sampleIntervalMs, + sourceId: this.options.sourceId ?? null, + windowHandle: parseWindowHandleFromSourceId(this.options.sourceId), + }); + child.stdout.setEncoding("utf8"); child.stdout.on("data", (chunk: string) => { this.handleStdoutChunk(chunk); }); child.stderr.setEncoding("utf8"); child.stderr.on("data", (chunk: string) => { - console.error("[cursor-native]", chunk.trim()); + const message = chunk.trim(); + if (message) { + this.logDiagnostic("stderr", { message }); + } + console.error("[cursor-native]", message); }); + child.once("exit", (code, signal) => { + this.logDiagnostic("exit", { + code, + signal, + sampleCount: this.sampleCount, + assetCount: this.assets.size, + outOfBoundsSampleCount: this.outOfBoundsSampleCount, + }); + this.rejectReady(new Error(`Windows cursor helper exited before ready (code=${code}, signal=${signal})`)); + }); + child.once("error", (error) => { + this.logDiagnostic("process-error", { message: error.message }); + this.rejectReady(error); + }); + + await this.waitUntilReady(); } async stop(): Promise { const child = this.process; this.process = null; + this.clearReadyState(); if (child && !child.killed) { child.kill(); } + this.logDiagnostic("stop", { + sampleCount: this.sampleCount, + assetCount: this.assets.size, + outOfBoundsSampleCount: this.outOfBoundsSampleCount, + }); + return { version: 2, provider: this.assets.size > 0 ? "native" : "none", @@ -285,11 +146,14 @@ export class WindowsNativeRecordingSession implements CursorRecordingSession { private handleEvent(payload: WindowsCursorEvent) { if (payload.type === "error") { + this.logDiagnostic("helper-error", { message: payload.message }); console.error("Windows cursor helper error:", payload.message); return; } if (payload.type === "ready") { + this.logDiagnostic("ready", { timestampMs: payload.timestampMs }); + this.resolveReady(); return; } @@ -305,22 +169,100 @@ export class WindowsNativeRecordingSession implements CursorRecordingSession { hotspotY: payload.asset.hotspotY, scaleFactor: assetDisplay.scaleFactor, }); + this.logDiagnostic("asset", { + id: payload.asset.id, + width: payload.asset.width, + height: payload.asset.height, + hotspotX: payload.asset.hotspotX, + hotspotY: payload.asset.hotspotY, + scaleFactor: assetDisplay.scaleFactor, + }); } - const bounds = this.options.getDisplayBounds() ?? screen.getPrimaryDisplay().bounds; - const width = Math.max(1, bounds.width); - const height = Math.max(1, bounds.height); + const normalized = this.normalizeSample(payload); + this.sampleCount += 1; + if (!normalized.withinBounds) { + this.outOfBoundsSampleCount += 1; + } - this.samples.push({ - timeMs: Math.max(0, payload.timestampMs - this.startTimeMs), - cx: clamp((payload.x - bounds.x) / width, 0, 1), - cy: clamp((payload.y - bounds.y) / height, 0, 1), - assetId: payload.handle, - visible: payload.visible, - }); + this.samples.push(normalized.sample); if (this.samples.length > this.options.maxSamples) { this.samples.shift(); } } + + private normalizeSample(payload: Extract): NormalizedSample { + const bounds = payload.bounds ?? this.options.getDisplayBounds() ?? screen.getPrimaryDisplay().bounds; + const width = Math.max(1, bounds.width); + const height = Math.max(1, bounds.height); + const normalizedX = (payload.x - bounds.x) / width; + const normalizedY = (payload.y - bounds.y) / height; + const withinBounds = normalizedX >= 0 && normalizedX <= 1 && normalizedY >= 0 && normalizedY <= 1; + + if (this.sampleCount === 0 || (!withinBounds && this.outOfBoundsSampleCount === 0)) { + this.logDiagnostic("sample", { + rawX: payload.x, + rawY: payload.y, + normalizedX, + normalizedY, + visible: payload.visible, + withinBounds, + bounds, + handle: payload.handle, + }); + } + + return { + withinBounds, + sample: { + timeMs: Math.max(0, payload.timestampMs - this.startTimeMs), + cx: normalizedX, + cy: normalizedY, + assetId: payload.handle, + visible: payload.visible && withinBounds, + }, + }; + } + + private waitUntilReady() { + return new Promise((resolve, reject) => { + this.readyResolve = resolve; + this.readyReject = reject; + this.readyTimer = setTimeout(() => { + this.rejectReady(new Error("Timed out waiting for Windows cursor helper readiness")); + }, READY_TIMEOUT_MS); + }); + } + + private resolveReady() { + const resolve = this.readyResolve; + this.clearReadyState(); + resolve?.(); + } + + private rejectReady(error: Error) { + const reject = this.readyReject; + this.clearReadyState(); + reject?.(error); + } + + private clearReadyState() { + if (this.readyTimer) { + clearTimeout(this.readyTimer); + this.readyTimer = null; + } + this.readyResolve = null; + this.readyReject = null; + } + + private logDiagnostic(event: string, data: Record) { + console.info( + "[cursor-native][win32]", + JSON.stringify({ + event, + ...data, + }), + ); + } } diff --git a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.types.ts b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.types.ts new file mode 100644 index 0000000..6efd59d --- /dev/null +++ b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.types.ts @@ -0,0 +1,49 @@ +import type { Rectangle } from "electron"; + +export interface WindowsCursorSampleEvent { + type: "sample"; + timestampMs: number; + x: number; + y: number; + visible: boolean; + handle: string | null; + bounds?: { + x: number; + y: number; + width: number; + height: number; + } | null; + asset?: WindowsCursorAssetPayload; +} + +export interface WindowsCursorReadyEvent { + type: "ready"; + timestampMs: number; +} + +export interface WindowsCursorErrorEvent { + type: "error"; + timestampMs: number; + message: string; +} + +export interface WindowsCursorAssetPayload { + id: string; + imageDataUrl: string; + width: number; + height: number; + hotspotX: number; + hotspotY: number; +} + +export type WindowsCursorEvent = + | WindowsCursorSampleEvent + | WindowsCursorReadyEvent + | WindowsCursorErrorEvent; + +export interface WindowsNativeRecordingSessionOptions { + getDisplayBounds: () => Rectangle | null; + maxSamples: number; + sampleIntervalMs: number; + sourceId?: string | null; +} diff --git a/src/components/launch/LaunchWindow.tsx b/src/components/launch/LaunchWindow.tsx index 260f4cb..57f79b3 100644 --- a/src/components/launch/LaunchWindow.tsx +++ b/src/components/launch/LaunchWindow.tsx @@ -259,6 +259,8 @@ export function LaunchWindow() { const [selectedSource, setSelectedSource] = useState("Screen"); const [hasSelectedSource, setHasSelectedSource] = useState(false); + const [, setHudPointerDownCount] = useState(0); + const [, setRecordPointerDownCount] = useState(0); useEffect(() => { const checkSelectedSource = async () => { @@ -541,6 +543,9 @@ export function LaunchWindow() { onClick={toggleMicrophone} disabled={recording} title={microphoneEnabled ? t("audio.disableMicrophone") : t("audio.enableMicrophone")} + onPointerDown={() => { + setRecordPointerDownCount((count) => count + 1); + }} > {microphoneEnabled ? getIcon("micOn", "text-green-400") diff --git a/src/components/video-editor/VideoEditor.tsx b/src/components/video-editor/VideoEditor.tsx index 1ed8938..842767a 100644 --- a/src/components/video-editor/VideoEditor.tsx +++ b/src/components/video-editor/VideoEditor.tsx @@ -1477,6 +1477,7 @@ export default function VideoEditor() { videoPadding: padding, cropRegion, cursorRecordingData, + cursorScale: showCursor ? cursorSize : 0, annotationRegions, webcamLayoutPreset, webcamMaskShape, @@ -1619,6 +1620,7 @@ export default function VideoEditor() { padding, cropRegion, cursorRecordingData, + cursorScale: showCursor ? cursorSize : 0, annotationRegions, webcamLayoutPreset, webcamMaskShape, diff --git a/src/components/video-editor/VideoPlayback.tsx b/src/components/video-editor/VideoPlayback.tsx index 12b1c25..32db9d2 100644 --- a/src/components/video-editor/VideoPlayback.tsx +++ b/src/components/video-editor/VideoPlayback.tsx @@ -29,8 +29,9 @@ import { classifyWallpaper, DEFAULT_WALLPAPER, resolveImageWallpaperUrl } from " import { getCssClipPath } from "@/lib/webcamMaskShapes"; import { getNativeCursorDisplayMetrics, + hasNativeCursorRecordingData, projectNativeCursorToStage, - resolveActiveNativeCursorFrame, + resolveInterpolatedNativeCursorFrame, } from "@/lib/cursor/nativeCursor"; import type { CursorRecordingData } from "@/native/contracts"; import { @@ -635,6 +636,18 @@ const VideoPlayback = forwardRef( showCursorRef.current = showCursor; }, [showCursor]); + useEffect(() => { + hasNativeCursorRecordingRef.current = hasNativeCursorRecording; + }, [hasNativeCursorRecording]); + + useEffect(() => { + cursorRecordingDataRef.current = cursorRecordingData; + }, [cursorRecordingData]); + + useEffect(() => { + cropRegionRef.current = cropRegion; + }, [cropRegion]); + useEffect(() => { cursorSizeRef.current = cursorSize; }, [cursorSize]); @@ -1273,16 +1286,69 @@ const VideoPlayback = forwardRef( // Update cursor overlay const cursorOverlay = cursorOverlayRef.current; if (cursorOverlay) { - const timeMs = currentTimeRef.current; + const timeMs = currentTimeRef.current; // already in ms cursorOverlay.update( cursorTelemetryRef.current, timeMs, baseMaskRef.current, - showCursorRef.current, + showCursorRef.current && !hasNativeCursorRecordingRef.current, !isPlayingRef.current || isSeekingRef.current, ); } + // Update native cursor image position at ticker rate (60fps) + const nativeCursorImg = nativeCursorImgRef.current; + if (nativeCursorImg) { + const cameraContainerRc = cameraContainerRef.current; + const videoContainerRc = videoContainerRef.current; + if ( + hasNativeCursorRecordingRef.current && + showCursorRef.current && + cameraContainerRc && + videoContainerRc + ) { + const timeMs = currentTimeRef.current; // already in ms + const frame = resolveInterpolatedNativeCursorFrame( + cursorRecordingDataRef.current, + timeMs, + ); + if (frame) { + const projectedPoint = projectNativeCursorToStage({ + cameraContainer: cameraContainerRc, + cropRegion: cropRegionRef.current ?? { x: 0, y: 0, width: 1, height: 1 }, + maskRect: baseMaskRef.current, + videoContainerPosition: { + x: videoContainerRc.x, + y: videoContainerRc.y, + }, + sample: frame.sample, + }); + if (projectedPoint) { + const metrics = getNativeCursorDisplayMetrics( + frame.asset, + window.devicePixelRatio || 1, + ); + const scale = Math.max(0, cursorSizeRef.current); + if (nativeCursorImg.dataset.cursorId !== frame.asset.id) { + nativeCursorImg.src = frame.asset.imageDataUrl; + nativeCursorImg.dataset.cursorId = frame.asset.id; + } + nativeCursorImg.style.left = `${projectedPoint.x - metrics.hotspotX * scale}px`; + nativeCursorImg.style.top = `${projectedPoint.y - metrics.hotspotY * scale}px`; + nativeCursorImg.style.width = `${metrics.width * scale}px`; + nativeCursorImg.style.height = `${metrics.height * scale}px`; + nativeCursorImg.style.display = "block"; + } else { + nativeCursorImg.style.display = "none"; + } + } else { + nativeCursorImg.style.display = "none"; + } + } else { + nativeCursorImg.style.display = "none"; + } + } + const composite3D = composite3DRef.current; const outerWrapper = outerWrapperRef.current; if (composite3D && outerWrapper) { @@ -1571,17 +1637,14 @@ const VideoPlayback = forwardRef( className="absolute rounded-md border border-[#34B27B]/80 bg-[#34B27B]/20 shadow-[0_0_0_1px_rgba(52,178,123,0.35)]" style={{ display: "none", pointerEvents: "none" }} /> - {activeNativeCursor && nativeCursorStyle ? ( + {hasNativeCursorRecording ? ( 0 && + recordingData.assets.length > 0, + ); +} + function getCroppedCursorPosition(sample: CursorRecordingSample, cropRegion: CropRegion) { if (cropRegion.width <= 0 || cropRegion.height <= 0) { return null; @@ -45,7 +56,7 @@ export function resolveActiveNativeCursorFrame( recordingData: CursorRecordingData | null | undefined, timeMs: number, ): ActiveNativeCursorFrame | null { - if (!recordingData || recordingData.provider !== "native" || recordingData.assets.length === 0) { + if (!hasNativeCursorRecordingData(recordingData)) { return null; } @@ -70,6 +81,65 @@ export function resolveActiveNativeCursorFrame( return null; } +export function resolveInterpolatedNativeCursorFrame( + recordingData: CursorRecordingData | null | undefined, + timeMs: number, +): ActiveNativeCursorFrame | null { + if (!hasNativeCursorRecordingData(recordingData)) { + return null; + } + + const samples = recordingData.samples; + let activeIndex = -1; + + for (let index = samples.length - 1; index >= 0; index -= 1) { + if (samples[index].timeMs <= timeMs) { + activeIndex = index; + break; + } + } + + if (activeIndex < 0) { + return null; + } + + const activeSample = samples[activeIndex]; + if (activeSample.visible === false || !activeSample.assetId) { + return null; + } + + const asset = recordingData.assets.find((candidate) => candidate.id === activeSample.assetId); + if (!asset) { + return null; + } + + const nextSample = samples[activeIndex + 1]; + if ( + !nextSample || + nextSample.timeMs <= activeSample.timeMs || + nextSample.visible === false || + nextSample.assetId !== activeSample.assetId || + timeMs <= activeSample.timeMs + ) { + return { asset, sample: activeSample }; + } + + const interpolation = clamp( + (timeMs - activeSample.timeMs) / (nextSample.timeMs - activeSample.timeMs), + 0, + 1, + ); + + return { + asset, + sample: { + ...activeSample, + cx: activeSample.cx + (nextSample.cx - activeSample.cx) * interpolation, + cy: activeSample.cy + (nextSample.cy - activeSample.cy) * interpolation, + }, + }; +} + export function projectNativeCursorToStage({ cameraContainer, cropRegion, @@ -83,8 +153,8 @@ export function projectNativeCursorToStage({ } const localPoint = new Point( - videoContainerPosition.x + croppedPosition.cx * maskRect.width, - videoContainerPosition.y + croppedPosition.cy * maskRect.height, + videoContainerPosition.x + maskRect.x + croppedPosition.cx * maskRect.width, + videoContainerPosition.y + maskRect.y + croppedPosition.cy * maskRect.height, ); return cameraContainer.toGlobal(localPoint); diff --git a/src/lib/exporter/frameRenderer.ts b/src/lib/exporter/frameRenderer.ts index f13735d..6f605b0 100644 --- a/src/lib/exporter/frameRenderer.ts +++ b/src/lib/exporter/frameRenderer.ts @@ -59,7 +59,7 @@ import { import { getNativeCursorDisplayMetrics, projectNativeCursorToStage, - resolveActiveNativeCursorFrame, + resolveInterpolatedNativeCursorFrame, } from "@/lib/cursor/nativeCursor"; import { BackgroundLoadError, classifyWallpaper, resolveImageWallpaperUrl } from "@/lib/wallpaper"; import { drawCanvasClipPath } from "@/lib/webcamMaskShapes"; @@ -86,6 +86,7 @@ interface FrameRenderConfig { padding?: number; cropRegion: CropRegion; cursorRecordingData?: CursorRecordingData | null; + cursorScale?: number; videoWidth: number; videoHeight: number; webcamSize?: Size | null; @@ -558,7 +559,11 @@ export class FrameRenderer { return; } - const activeNativeCursor = resolveActiveNativeCursorFrame( + if ((this.config.cursorScale ?? 1) <= 0) { + return; + } + + const activeNativeCursor = resolveInterpolatedNativeCursorFrame( this.config.cursorRecordingData, timeMs, ); @@ -582,13 +587,13 @@ export class FrameRenderer { const image = await this.getCursorImage(activeNativeCursor.asset); const metrics = getNativeCursorDisplayMetrics(activeNativeCursor.asset, 1); - + const scale = Math.max(0, this.config.cursorScale ?? 1); this.compositeCtx.drawImage( image, - projectedPoint.x - metrics.hotspotX, - projectedPoint.y - metrics.hotspotY, - metrics.width, - metrics.height, + projectedPoint.x - metrics.hotspotX * scale, + projectedPoint.y - metrics.hotspotY * scale, + metrics.width * scale, + metrics.height * scale, ); } diff --git a/src/lib/exporter/gifExporter.ts b/src/lib/exporter/gifExporter.ts index 02564db..c1120af 100644 --- a/src/lib/exporter/gifExporter.ts +++ b/src/lib/exporter/gifExporter.ts @@ -49,6 +49,7 @@ interface GifExporterConfig { webcamSizePreset?: WebcamSizePreset; webcamPosition?: { cx: number; cy: number } | null; cursorRecordingData?: CursorRecordingData | null; + cursorScale?: number; annotationRegions?: AnnotationRegion[]; previewWidth?: number; previewHeight?: number; @@ -154,6 +155,7 @@ export class GifExporter { padding: this.config.padding, cropRegion: this.config.cropRegion, cursorRecordingData: this.config.cursorRecordingData, + cursorScale: this.config.cursorScale, videoWidth: videoInfo.width, videoHeight: videoInfo.height, webcamSize: webcamInfo ? { width: webcamInfo.width, height: webcamInfo.height } : null, diff --git a/src/lib/exporter/videoExporter.ts b/src/lib/exporter/videoExporter.ts index edddd05..fb38611 100644 --- a/src/lib/exporter/videoExporter.ts +++ b/src/lib/exporter/videoExporter.ts @@ -40,6 +40,7 @@ interface VideoExporterConfig extends ExportConfig { webcamSizePreset?: WebcamSizePreset; webcamPosition?: { cx: number; cy: number } | null; cursorRecordingData?: CursorRecordingData | null; + cursorScale?: number; annotationRegions?: AnnotationRegion[]; previewWidth?: number; previewHeight?: number; @@ -149,6 +150,7 @@ export class VideoExporter { padding: this.config.padding, cropRegion: this.config.cropRegion, cursorRecordingData: this.config.cursorRecordingData, + cursorScale: this.config.cursorScale, videoWidth: videoInfo.width, videoHeight: videoInfo.height, webcamSize: webcamInfo ? { width: webcamInfo.width, height: webcamInfo.height } : null, From 28ff0fb7bf41aa43b4badd093cacb5cc071f281e Mon Sep 17 00:00:00 2001 From: EtienneLescot Date: Sun, 3 May 2026 16:53:04 +0200 Subject: [PATCH 05/43] fix: restore cursor pipeline build after rebase --- .gitignore | 3 ++ electron/ipc/handlers.ts | 44 ++++++++++++++++++- electron/preload.ts | 1 + src/components/video-editor/SettingsPanel.tsx | 2 - src/components/video-editor/VideoEditor.tsx | 11 +++-- src/components/video-editor/VideoPlayback.tsx | 4 +- src/hooks/useScreenRecorder.ts | 3 +- src/lib/exporter/frameRenderer.ts | 2 +- 8 files changed, 60 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 771c4bd..494da30 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,6 @@ __screenshots__/ result result-* .direnv/ + +#kilocode +.kilo/ \ No newline at end of file diff --git a/electron/ipc/handlers.ts b/electron/ipc/handlers.ts index d50cab3..3a4dd3b 100644 --- a/electron/ipc/handlers.ts +++ b/electron/ipc/handlers.ts @@ -451,9 +451,12 @@ async function storeRecordedSessionFiles(payload: StoreRecordedSessionInput) { export function registerIpcHandlers( createEditorWindow: () => void, createSourceSelectorWindow: () => BrowserWindow, + _createCountdownOverlayWindow: () => BrowserWindow, getMainWindow: () => BrowserWindow | null, getSourceSelectorWindow: () => BrowserWindow | null, + _getCountdownOverlayWindow?: () => BrowserWindow | null, onRecordingStateChange?: (recording: boolean, sourceName: string) => void, + _switchToHud?: () => void, ) { ipcMain.handle("get-sources", async (_, opts) => { const sources = await desktopCapturer.getSources(opts); @@ -472,7 +475,7 @@ export function registerIpcHandlers( // Reuse the exact source object returned during enumeration to avoid // Windows window-source id mismatches across separate getSources() calls. selectedDesktopSource = - typeof source.id === "string" ? lastEnumeratedSources.get(source.id) ?? null : null; + typeof source.id === "string" ? (lastEnumeratedSources.get(source.id) ?? null) : null; if (!selectedDesktopSource && typeof source.id === "string") { try { @@ -602,6 +605,45 @@ export function registerIpcHandlers( } }); + async function storeRecordedSessionFiles(payload: StoreRecordedSessionInput) { + const createdAt = + typeof payload.createdAt === "number" && Number.isFinite(payload.createdAt) + ? payload.createdAt + : Date.now(); + const screenVideoPath = path.join(RECORDINGS_DIR, payload.screen.fileName); + await fs.writeFile(screenVideoPath, Buffer.from(payload.screen.videoData)); + + let webcamVideoPath: string | undefined; + if (payload.webcam) { + webcamVideoPath = path.join(RECORDINGS_DIR, payload.webcam.fileName); + await fs.writeFile(webcamVideoPath, Buffer.from(payload.webcam.videoData)); + } + + const session: RecordingSession = webcamVideoPath + ? { screenVideoPath, webcamVideoPath, createdAt } + : { screenVideoPath, createdAt }; + setCurrentRecordingSessionState(session); + currentVideoPath = screenVideoPath; + currentProjectPath = null; + + const telemetryPath = `${screenVideoPath}.cursor.json`; + if (pendingCursorRecordingData && pendingCursorRecordingData.samples.length > 0) { + await fs.writeFile( + telemetryPath, + JSON.stringify(pendingCursorRecordingData, null, 2), + "utf-8", + ); + } + pendingCursorRecordingData = null; + + return { + success: true, + path: screenVideoPath, + session, + message: "Recording session stored successfully", + }; + } + ipcMain.handle("store-recorded-video", async (_, videoData: ArrayBuffer, fileName: string) => { try { return await storeRecordedSessionFiles({ diff --git a/electron/preload.ts b/electron/preload.ts index 5ff110d..5345aab 100644 --- a/electron/preload.ts +++ b/electron/preload.ts @@ -1,4 +1,5 @@ import { contextBridge, ipcRenderer } from "electron"; +import type { RecordingSession, StoreRecordedSessionInput } from "../src/lib/recordingSession"; import { NATIVE_BRIDGE_CHANNEL, type NativeBridgeRequest } from "../src/native/contracts"; // Asset base URL is passed from the main process via webPreferences.additionalArguments diff --git a/src/components/video-editor/SettingsPanel.tsx b/src/components/video-editor/SettingsPanel.tsx index 110f025..ad74239 100644 --- a/src/components/video-editor/SettingsPanel.tsx +++ b/src/components/video-editor/SettingsPanel.tsx @@ -435,8 +435,6 @@ export function SettingsPanel({ const [selectedColor, setSelectedColor] = useState("#ADADAD"); const [gradient, setGradient] = useState(GRADIENTS[0]); - const [showCropModal, setShowCropModal] = useState(false); - const cropSnapshotRef = useRef(null); const [cropAspectLocked, setCropAspectLocked] = useState(false); const [cropAspectRatio, setCropAspectRatio] = useState(""); const isPortraitCanvas = isPortraitAspectRatio(aspectRatio); diff --git a/src/components/video-editor/VideoEditor.tsx b/src/components/video-editor/VideoEditor.tsx index 842767a..5394eee 100644 --- a/src/components/video-editor/VideoEditor.tsx +++ b/src/components/video-editor/VideoEditor.tsx @@ -38,12 +38,12 @@ import { saveUserPreferences, } from "@/lib/userPreferences"; import { BackgroundLoadError } from "@/lib/wallpaper"; +import { nativeBridgeClient, useCursorRecordingData, useCursorTelemetry } from "@/native"; import { getAspectRatioValue, getNativeAspectRatioValue, isPortraitAspectRatio, } from "@/utils/aspectRatioUtils"; -import { nativeBridgeClient, useCursorRecordingData, useCursorTelemetry } from "@/native"; import { ExportDialog } from "./ExportDialog"; import PlaybackControls from "./PlaybackControls"; import { @@ -216,7 +216,12 @@ export default function VideoEditor() { } const project = candidate; - const sourcePath = project.videoPath; + const projectMedia = resolveProjectMedia(project); + if (!projectMedia) { + return false; + } + const sourcePath = projectMedia.screenVideoPath; + const webcamSourcePath = projectMedia.webcamVideoPath ?? null; const normalizedEditor = normalizeProjectEditor(project.editor); const inferredDurationMs = Math.max( 0, @@ -405,7 +410,7 @@ export default function VideoEditor() { setVideoPath(toFileUrl(result.path)); setCurrentProjectPath(null); setLastSavedSnapshot( - createProjectSnapshot({ screenVideoPath: sourcePath }, INITIAL_EDITOR_STATE), + createProjectSnapshot({ screenVideoPath: result.path }, INITIAL_EDITOR_STATE), ); } else { setError("No video to load. Please record or select a video."); diff --git a/src/components/video-editor/VideoPlayback.tsx b/src/components/video-editor/VideoPlayback.tsx index 32db9d2..d7b3836 100644 --- a/src/components/video-editor/VideoPlayback.tsx +++ b/src/components/video-editor/VideoPlayback.tsx @@ -25,14 +25,14 @@ import { type WebcamLayoutPreset, type WebcamSizePreset, } from "@/lib/compositeLayout"; -import { classifyWallpaper, DEFAULT_WALLPAPER, resolveImageWallpaperUrl } from "@/lib/wallpaper"; -import { getCssClipPath } from "@/lib/webcamMaskShapes"; import { getNativeCursorDisplayMetrics, hasNativeCursorRecordingData, projectNativeCursorToStage, resolveInterpolatedNativeCursorFrame, } from "@/lib/cursor/nativeCursor"; +import { classifyWallpaper, DEFAULT_WALLPAPER, resolveImageWallpaperUrl } from "@/lib/wallpaper"; +import { getCssClipPath } from "@/lib/webcamMaskShapes"; import type { CursorRecordingData } from "@/native/contracts"; import { type AspectRatio, diff --git a/src/hooks/useScreenRecorder.ts b/src/hooks/useScreenRecorder.ts index 67b0d96..8aa673e 100644 --- a/src/hooks/useScreenRecorder.ts +++ b/src/hooks/useScreenRecorder.ts @@ -1,7 +1,8 @@ import { fixWebmDuration } from "@fix-webm-duration/fix"; import { useCallback, useEffect, useRef, useState } from "react"; import { toast } from "sonner"; -import { nativeBridgeClient } from "@/native"; +import { useScopedT } from "@/contexts/I18nContext"; +import { requestCameraAccess } from "@/lib/requestCameraAccess"; const TARGET_FRAME_RATE = 60; const MIN_FRAME_RATE = 30; diff --git a/src/lib/exporter/frameRenderer.ts b/src/lib/exporter/frameRenderer.ts index 6f605b0..c43908f 100644 --- a/src/lib/exporter/frameRenderer.ts +++ b/src/lib/exporter/frameRenderer.ts @@ -614,7 +614,7 @@ export class FrameRenderer { return image; } - private updateLayout(): void { + private updateLayout(webcamFrame?: VideoFrame | null): void { if (!this.app || !this.videoSprite || !this.maskGraphics || !this.videoContainer) return; const { width, height } = this.config; From bb0dec73443064856b366aa499dd1ef4ed78aab8 Mon Sep 17 00:00:00 2001 From: EtienneLescot Date: Tue, 5 May 2026 10:16:01 +0200 Subject: [PATCH 06/43] feat: add windows cursor preview diagnostics --- README.md | 3 + docs/testing/windows-native-cursor.md | 85 ++ electron/ipc/handlers.ts | 16 + .../windowsNativeRecordingSession.script.ts | 41 +- .../windowsNativeRecordingSession.ts | 21 +- .../windowsNativeRecordingSession.types.ts | 3 + package.json | 12 +- scripts/capture-openscreen-preview.mjs | 262 ++++ scripts/test-windows-native-cursor.mjs | 1113 +++++++++++++++++ src/components/video-editor/VideoPlayback.tsx | 19 +- src/lib/cursor/nativeCursor.ts | 128 ++ src/lib/exporter/frameRenderer.ts | 22 +- src/native/contracts.ts | 17 + 13 files changed, 1713 insertions(+), 29 deletions(-) create mode 100644 docs/testing/windows-native-cursor.md create mode 100644 scripts/capture-openscreen-preview.mjs create mode 100644 scripts/test-windows-native-cursor.mjs diff --git a/README.md b/README.md index 7009a22..0403afe 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,9 @@ See the documentation here: [OpenScreen Docs](https://deepwiki.com/siddharthvaddem/openscreen) Refresh if outdated. +Developer notes: +- [Windows native cursor test pipeline](docs/testing/windows-native-cursor.md) + ## Contributing Contributions are welcome - please **include screenshots or a short video** for any UI change or new user-facing feature. If it touches what users see or do, show it. Skip only when it genuinely doesn't apply. PRs that don't follow this will be closed. diff --git a/docs/testing/windows-native-cursor.md b/docs/testing/windows-native-cursor.md new file mode 100644 index 0000000..1abe04e --- /dev/null +++ b/docs/testing/windows-native-cursor.md @@ -0,0 +1,85 @@ +# Windows native cursor test pipeline + +This branch includes two Windows-focused diagnostics for fast iteration on native cursor capture and rendering. They are intentionally local developer tools: they create short videos and JSON reports so cursor changes can be inspected without doing a full manual record/edit/export cycle. + +## Native sampler diagnostic + +```powershell +npm run test:cursor-native:win +``` + +This script does not launch OpenScreen. It: + +- starts a Windows `GetCursorInfo` sampler +- moves the real OS pointer with `SetCursorPos` +- captures native cursor handles, hotspots, assets, and standard `IDC_*` cursor types +- writes normalized `CursorRecordingData` +- generates an abstract preview video +- generates a real-screen preview video using screenshots of the current desktop + +The output directory is printed in the command result, for example: + +```text +C:\Users\\AppData\Local\Temp\openscreen-cursor-native-... +``` + +Useful files: + +- `report.json`: sample counts, asset counts, cursor handles, and generated artifact paths +- `cursor-recording-data.json`: sidecar-compatible cursor data +- `preview.webm`: abstract path/asset/hotspot preview +- `real-capture-preview.webm`: real desktop screenshot background with reconstructed cursor overlay +- `assets/*.png`: raw cursor bitmaps captured from Windows + +Environment overrides: + +```powershell +$env:CURSOR_TEST_DURATION_MS = "3000" +$env:CURSOR_TEST_SAMPLE_INTERVAL_MS = "16" +$env:CURSOR_TEST_SCREEN_FRAME_INTERVAL_MS = "80" +$env:CURSOR_TEST_OUTPUT_DIR = "C:\temp\openscreen-cursor-test" +npm run test:cursor-native:win +``` + +## OpenScreen preview capture + +```powershell +npm run capture:openscreen-preview +``` + +This script launches the real Electron app, injects a fixture video plus cursor sidecar data, opens the editor, captures frames from the actual OpenScreen preview UI, and encodes them into a WebM. + +By default it uses the latest `cursor-recording-data.json` generated by `npm run test:cursor-native:win`. To force a specific sidecar: + +```powershell +$env:CURSOR_RECORDING_DATA_PATH = "C:\path\to\cursor-recording-data.json" +npm run capture:openscreen-preview +``` + +Useful environment overrides: + +```powershell +$env:OPENSCREEN_PREVIEW_SKIP_BUILD = "true" +$env:OPENSCREEN_PREVIEW_FRAME_COUNT = "120" +$env:OPENSCREEN_PREVIEW_FPS = "30" +$env:OPENSCREEN_PREVIEW_OUTPUT_DIR = "C:\temp\openscreen-preview" +npm run capture:openscreen-preview +``` + +Useful files: + +- `openscreen-preview.webm`: video of the real OpenScreen editor preview +- `frames/*.png`: captured preview frames +- `report.json`: fixture paths, source sidecar, frame count, and output path + +## What these tests validate + +Together, the scripts make it quick to inspect: + +- whether Windows cursor samples are visible and continuous +- whether native hotspots stay anchored when scaling to `3x` +- whether standard Windows cursors are recognized via `IDC_*` +- whether high-quality SVG cursor replacements follow the native hotspot +- whether the real OpenScreen preview renders the same cursor behavior as the diagnostic pipeline + +They are not a full substitute for an end-to-end manual recording pass. Before shipping cursor changes, also test a real capture session and export from the packaged app. diff --git a/electron/ipc/handlers.ts b/electron/ipc/handlers.ts index 3a4dd3b..c5a1269 100644 --- a/electron/ipc/handlers.ts +++ b/electron/ipc/handlers.ts @@ -267,6 +267,7 @@ function normalizeCursorSample(sample: unknown): CursorRecordingSample | null { cy: typeof point.cy === "number" && Number.isFinite(point.cy) ? point.cy : 0.5, assetId: typeof point.assetId === "string" ? point.assetId : null, visible: typeof point.visible === "boolean" ? point.visible : true, + cursorType: typeof point.cursorType === "string" ? point.cursorType : null, }; } @@ -305,6 +306,7 @@ function normalizeCursorAsset(asset: unknown): NativeCursorAsset | null { typeof candidate.scaleFactor === "number" && Number.isFinite(candidate.scaleFactor) ? Math.max(0.1, candidate.scaleFactor) : undefined, + cursorType: typeof candidate.cursorType === "string" ? candidate.cursorType : null, }; } @@ -1079,6 +1081,20 @@ export function registerIpcHandlers( return setCurrentVideoPath(path); }); + ipcMain.handle("set-current-recording-session", (_, session: RecordingSession | null) => { + const normalizedSession = normalizeRecordingSession(session); + setCurrentRecordingSessionState(normalizedSession); + currentVideoPath = normalizedSession?.screenVideoPath ?? null; + currentProjectPath = null; + return { success: true, session: currentRecordingSession }; + }); + + ipcMain.handle("get-current-recording-session", () => { + return currentRecordingSession + ? { success: true, session: currentRecordingSession } + : { success: false }; + }); + function setCurrentVideoPath(path: string): ProjectPathResult { currentVideoPath = normalizeVideoSourcePath(path) ?? path; currentProjectPath = null; diff --git a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts index b7a11cb..5607134 100644 --- a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts +++ b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts @@ -16,7 +16,7 @@ export function buildPowerShellCommand(sampleIntervalMs: number, windowHandle?: $ErrorActionPreference = 'Stop' Add-Type -AssemblyName System.Drawing -$targetWindowHandle = ${windowHandle ? `'${windowHandle}'` : '$null'} +$targetWindowHandle = ${windowHandle ? `'${windowHandle}'` : "$null"} $source = @" using System; @@ -59,6 +59,9 @@ public static class OpenScreenCursorInterop { [return: MarshalAs(UnmanagedType.Bool)] public static extern bool GetCursorInfo(ref CURSORINFO pci); + [DllImport("user32.dll", SetLastError = true)] + public static extern IntPtr LoadCursor(IntPtr hInstance, IntPtr lpCursorName); + [DllImport("user32.dll", SetLastError = true)] [return: MarshalAs(UnmanagedType.Bool)] public static extern bool GetWindowRect(IntPtr hWnd, out RECT lpRect); @@ -86,6 +89,37 @@ public static class OpenScreenCursorInterop { Add-Type -TypeDefinition $source +$standardCursors = @{ + arrow = [OpenScreenCursorInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32512)) + text = [OpenScreenCursorInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32513)) + wait = [OpenScreenCursorInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32514)) + crosshair = [OpenScreenCursorInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32515)) + 'up-arrow' = [OpenScreenCursorInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32516)) + 'resize-nwse' = [OpenScreenCursorInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32642)) + 'resize-nesw' = [OpenScreenCursorInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32643)) + 'resize-ew' = [OpenScreenCursorInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32644)) + 'resize-ns' = [OpenScreenCursorInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32645)) + move = [OpenScreenCursorInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32646)) + 'not-allowed' = [OpenScreenCursorInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32648)) + pointer = [OpenScreenCursorInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32649)) + 'app-starting' = [OpenScreenCursorInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32650)) + help = [OpenScreenCursorInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32651)) +} + +function Get-StandardCursorType($cursorHandle) { + if ($cursorHandle -eq [IntPtr]::Zero) { + return $null + } + + foreach ($entry in $standardCursors.GetEnumerator()) { + if ($entry.Value -eq $cursorHandle) { + return $entry.Key + } + } + + return $null +} + function Write-JsonLine($payload) { [Console]::Out.WriteLine(($payload | ConvertTo-Json -Compress -Depth 6)) } @@ -190,10 +224,14 @@ while ($true) { $visible = ($cursorInfo.flags -band 1) -ne 0 $cursorId = if ($cursorInfo.hCursor -eq [IntPtr]::Zero) { $null } else { ('0x{0:X}' -f $cursorInfo.hCursor.ToInt64()) } + $cursorType = Get-StandardCursorType $cursorInfo.hCursor $asset = $null if ($visible -and $cursorId -and $cursorId -ne $lastCursorId) { $asset = Get-CursorAsset -cursorHandle $cursorInfo.hCursor -cursorId $cursorId + if ($asset -and $cursorType) { + $asset.cursorType = $cursorType + } $lastCursorId = $cursorId } @@ -204,6 +242,7 @@ while ($true) { y = $cursorInfo.ptScreenPos.Y visible = $visible handle = $cursorId + cursorType = $cursorType bounds = Get-TargetBounds asset = $asset } diff --git a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts index d5e43d7..632a74d 100644 --- a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts +++ b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts @@ -7,7 +7,10 @@ import type { NativeCursorAsset, } from "../../../../src/native/contracts"; import type { CursorRecordingSession } from "./session"; -import { buildPowerShellCommand, parseWindowHandleFromSourceId } from "./windowsNativeRecordingSession.script"; +import { + buildPowerShellCommand, + parseWindowHandleFromSourceId, +} from "./windowsNativeRecordingSession.script"; import type { WindowsCursorEvent, WindowsNativeRecordingSessionOptions, @@ -91,7 +94,9 @@ export class WindowsNativeRecordingSession implements CursorRecordingSession { assetCount: this.assets.size, outOfBoundsSampleCount: this.outOfBoundsSampleCount, }); - this.rejectReady(new Error(`Windows cursor helper exited before ready (code=${code}, signal=${signal})`)); + this.rejectReady( + new Error(`Windows cursor helper exited before ready (code=${code}, signal=${signal})`), + ); }); child.once("error", (error) => { this.logDiagnostic("process-error", { message: error.message }); @@ -168,6 +173,7 @@ export class WindowsNativeRecordingSession implements CursorRecordingSession { hotspotX: payload.asset.hotspotX, hotspotY: payload.asset.hotspotY, scaleFactor: assetDisplay.scaleFactor, + cursorType: payload.asset.cursorType ?? payload.cursorType ?? null, }); this.logDiagnostic("asset", { id: payload.asset.id, @@ -192,13 +198,17 @@ export class WindowsNativeRecordingSession implements CursorRecordingSession { } } - private normalizeSample(payload: Extract): NormalizedSample { - const bounds = payload.bounds ?? this.options.getDisplayBounds() ?? screen.getPrimaryDisplay().bounds; + private normalizeSample( + payload: Extract, + ): NormalizedSample { + const bounds = + payload.bounds ?? this.options.getDisplayBounds() ?? screen.getPrimaryDisplay().bounds; const width = Math.max(1, bounds.width); const height = Math.max(1, bounds.height); const normalizedX = (payload.x - bounds.x) / width; const normalizedY = (payload.y - bounds.y) / height; - const withinBounds = normalizedX >= 0 && normalizedX <= 1 && normalizedY >= 0 && normalizedY <= 1; + const withinBounds = + normalizedX >= 0 && normalizedX <= 1 && normalizedY >= 0 && normalizedY <= 1; if (this.sampleCount === 0 || (!withinBounds && this.outOfBoundsSampleCount === 0)) { this.logDiagnostic("sample", { @@ -221,6 +231,7 @@ export class WindowsNativeRecordingSession implements CursorRecordingSession { cy: normalizedY, assetId: payload.handle, visible: payload.visible && withinBounds, + cursorType: payload.cursorType ?? payload.asset?.cursorType ?? null, }, }; } diff --git a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.types.ts b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.types.ts index 6efd59d..fdc4ab9 100644 --- a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.types.ts +++ b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.types.ts @@ -1,4 +1,5 @@ import type { Rectangle } from "electron"; +import type { NativeCursorType } from "../../../../src/native/contracts"; export interface WindowsCursorSampleEvent { type: "sample"; @@ -7,6 +8,7 @@ export interface WindowsCursorSampleEvent { y: number; visible: boolean; handle: string | null; + cursorType?: NativeCursorType | null; bounds?: { x: number; y: number; @@ -34,6 +36,7 @@ export interface WindowsCursorAssetPayload { height: number; hotspotX: number; hotspotY: number; + cursorType?: NativeCursorType | null; } export type WindowsCursorEvent = diff --git a/package.json b/package.json index 2ccb0b3..f81d99b 100644 --- a/package.json +++ b/package.json @@ -19,12 +19,14 @@ "lint:fix": "biome check --write .", "format": "biome format --write .", "i18n:check": "node scripts/i18n-check.mjs", - "preview": "vite preview", - "build:mac": "tsc && vite build && electron-builder --mac", - "build:win": "tsc && vite build && electron-builder --win --config.npmRebuild=false", - "build:linux": "tsc && vite build && electron-builder --linux AppImage deb pacman --config.npmRebuild=false", - "test": "vitest --run", + "preview": "vite preview", + "build:mac": "tsc && vite build && electron-builder --mac", + "build:win": "tsc && vite build && electron-builder --win --config.npmRebuild=false", + "build:linux": "tsc && vite build && electron-builder --linux AppImage deb pacman --config.npmRebuild=false", + "test": "vitest --run", "test:watch": "vitest", + "test:cursor-native:win": "node scripts/test-windows-native-cursor.mjs", + "capture:openscreen-preview": "node scripts/capture-openscreen-preview.mjs", "build-vite": "tsc && vite build", "test:browser": "vitest --config vitest.browser.config.ts --run", "test:browser:install": "playwright install --with-deps chromium-headless-shell", diff --git a/scripts/capture-openscreen-preview.mjs b/scripts/capture-openscreen-preview.mjs new file mode 100644 index 0000000..6c9b6eb --- /dev/null +++ b/scripts/capture-openscreen-preview.mjs @@ -0,0 +1,262 @@ +import { spawn } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { chromium, _electron as electron } from "@playwright/test"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const ROOT = path.join(__dirname, ".."); +const MAIN_JS = path.join(ROOT, "dist-electron", "main.js"); +const TEST_VIDEO = path.join(ROOT, "tests", "fixtures", "sample.webm"); +const OUTPUT_DIR = + process.env.OPENSCREEN_PREVIEW_OUTPUT_DIR ?? + path.join(os.tmpdir(), `openscreen-real-preview-${Date.now()}`); +const FRAME_COUNT = Number(process.env.OPENSCREEN_PREVIEW_FRAME_COUNT ?? 90); +const FPS = Number(process.env.OPENSCREEN_PREVIEW_FPS ?? 30); + +function findLatestCursorRecordingData() { + const explicit = process.env.CURSOR_RECORDING_DATA_PATH; + if (explicit) { + if (!fs.existsSync(explicit)) { + throw new Error(`CURSOR_RECORDING_DATA_PATH does not exist: ${explicit}`); + } + return explicit; + } + + const tempDir = os.tmpdir(); + const candidates = fs + .readdirSync(tempDir, { withFileTypes: true }) + .filter((entry) => entry.isDirectory() && entry.name.startsWith("openscreen-cursor-native-")) + .map((entry) => path.join(tempDir, entry.name, "cursor-recording-data.json")) + .filter((candidate) => fs.existsSync(candidate)) + .map((candidate) => ({ path: candidate, mtimeMs: fs.statSync(candidate).mtimeMs })) + .sort((a, b) => b.mtimeMs - a.mtimeMs); + + if (!candidates[0]) { + throw new Error( + "No cursor-recording-data.json found. Run npm run test:cursor-native:win first.", + ); + } + + return candidates[0].path; +} + +function findPlaywrightChromiumExecutable(defaultPath) { + if (fs.existsSync(defaultPath)) { + return defaultPath; + } + + const baseDir = path.join(process.env.LOCALAPPDATA ?? "", "ms-playwright"); + if (!baseDir || !fs.existsSync(baseDir)) { + return defaultPath; + } + + const candidates = fs + .readdirSync(baseDir, { withFileTypes: true }) + .filter((entry) => entry.isDirectory() && entry.name.startsWith("chromium-")) + .map((entry) => path.join(baseDir, entry.name, "chrome-win64", "chrome.exe")) + .filter((candidate) => fs.existsSync(candidate)) + .sort() + .reverse(); + + return candidates[0] ?? defaultPath; +} + +function ensureBuildExists() { + if (!fs.existsSync(MAIN_JS)) { + throw new Error(`Missing ${MAIN_JS}. Run npm run build-vite first.`); + } + if (!fs.existsSync(path.join(ROOT, "dist", "index.html"))) { + throw new Error(`Missing renderer build. Run npm run build-vite first.`); + } +} + +function runNpmBuildViteIfRequested() { + if (process.env.OPENSCREEN_PREVIEW_SKIP_BUILD === "true") { + ensureBuildExists(); + return Promise.resolve(); + } + + return new Promise((resolve, reject) => { + const child = spawn("cmd.exe", ["/d", "/s", "/c", "npm run build-vite"], { + cwd: ROOT, + stdio: "inherit", + }); + child.once("error", reject); + child.once("exit", (code) => { + if (code === 0) resolve(); + else reject(new Error(`npm run build-vite failed with code ${code}`)); + }); + }); +} + +async function encodeFramesToWebm(framePaths, outputPath) { + const frameData = framePaths.map((framePath) => ({ + src: `data:image/png;base64,${fs.readFileSync(framePath).toString("base64")}`, + })); + const html = ` + + + + + +`; + + const browser = await chromium.launch({ + executablePath: findPlaywrightChromiumExecutable(chromium.executablePath()), + headless: true, + }); + try { + const page = await browser.newPage(); + await page.setContent(html); + const base64 = await page.evaluate(() => window.__encode()); + fs.writeFileSync(outputPath, Buffer.from(base64, "base64")); + } finally { + await browser.close(); + } +} + +fs.mkdirSync(OUTPUT_DIR, { recursive: true }); +const cursorRecordingDataPath = findLatestCursorRecordingData(); +const fixtureVideoPath = path.join(OUTPUT_DIR, "openscreen-preview-fixture.webm"); +const outputVideoPath = path.join(OUTPUT_DIR, "openscreen-preview.webm"); +fs.copyFileSync(TEST_VIDEO, fixtureVideoPath); +fs.copyFileSync(cursorRecordingDataPath, `${fixtureVideoPath}.cursor.json`); + +await runNpmBuildViteIfRequested(); + +const app = await electron.launch({ + args: [MAIN_JS, "--no-sandbox", "--enable-unsafe-swiftshader"], + env: { + ...process.env, + HEADLESS: "false", + }, +}); + +app.process().stdout?.on("data", (data) => process.stdout.write(`[electron] ${data}`)); +app.process().stderr?.on("data", (data) => process.stderr.write(`[electron] ${data}`)); + +const framesDir = path.join(OUTPUT_DIR, "frames"); +fs.mkdirSync(framesDir, { recursive: true }); + +try { + const hudWindow = await app.firstWindow({ timeout: 60_000 }); + await hudWindow.waitForLoadState("domcontentloaded"); + await hudWindow.evaluate(async () => { + for (let attempt = 0; attempt < 100; attempt += 1) { + try { + await window.electronAPI.getCurrentRecordingSession(); + await window.electronAPI.getCurrentVideoPath(); + return; + } catch { + await new Promise((resolve) => setTimeout(resolve, 100)); + } + } + throw new Error("Timed out waiting for OpenScreen IPC handlers."); + }); + + try { + await hudWindow.evaluate(async (videoPath) => { + await window.electronAPI.setCurrentVideoPath(videoPath); + await window.electronAPI.switchToEditor(); + }, fixtureVideoPath); + } catch { + // switchToEditor closes the HUD page before the evaluate promise can always resolve. + } + + const editorWindow = await app.waitForEvent("window", { + predicate: (window) => window.url().includes("windowType=editor"), + timeout: 30_000, + }); + await editorWindow.waitForLoadState("domcontentloaded"); + await editorWindow.waitForSelector("video", { state: "attached", timeout: 30_000 }); + await editorWindow.waitForSelector("canvas", { state: "attached", timeout: 30_000 }); + await editorWindow.waitForSelector('img[aria-hidden="true"]', { + state: "attached", + timeout: 30_000, + }); + + await editorWindow.setViewportSize({ width: 1280, height: 800 }); + await editorWindow.evaluate(async () => { + await document.fonts.ready; + for (const video of [...document.querySelectorAll("video")]) { + video.muted = true; + video.currentTime = 0; + video.dispatchEvent(new Event("timeupdate")); + } + }); + await editorWindow.waitForTimeout(1000); + + const framePaths = []; + for (let index = 0; index < FRAME_COUNT; index += 1) { + const timeSec = index / FPS; + await editorWindow.evaluate((time) => { + for (const video of [...document.querySelectorAll("video")]) { + video.currentTime = Math.min(time, Math.max(0, video.duration || time)); + video.dispatchEvent(new Event("timeupdate")); + } + }, timeSec); + await editorWindow.waitForTimeout(40); + const framePath = path.join(framesDir, `frame-${String(index).padStart(4, "0")}.png`); + await editorWindow.screenshot({ path: framePath }); + framePaths.push(framePath); + } + + await encodeFramesToWebm(framePaths, outputVideoPath); + + const report = { + outputDir: OUTPUT_DIR, + sourceCursorRecordingDataPath: cursorRecordingDataPath, + fixtureVideoPath, + outputVideoPath, + frameCount: framePaths.length, + fps: FPS, + }; + fs.writeFileSync(path.join(OUTPUT_DIR, "report.json"), JSON.stringify(report, null, 2)); + console.log(JSON.stringify(report, null, 2)); +} finally { + await app.close(); +} diff --git a/scripts/test-windows-native-cursor.mjs b/scripts/test-windows-native-cursor.mjs new file mode 100644 index 0000000..2a8b34c --- /dev/null +++ b/scripts/test-windows-native-cursor.mjs @@ -0,0 +1,1113 @@ +import { spawn } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +const SAMPLE_INTERVAL_MS = Number(process.env.CURSOR_TEST_SAMPLE_INTERVAL_MS ?? 25); +const DURATION_MS = Number(process.env.CURSOR_TEST_DURATION_MS ?? 1800); +const SCREEN_FRAME_INTERVAL_MS = Number(process.env.CURSOR_TEST_SCREEN_FRAME_INTERVAL_MS ?? 100); +const READY_TIMEOUT_MS = 5000; +const OUTPUT_DIR = + process.env.CURSOR_TEST_OUTPUT_DIR ?? + path.join(os.tmpdir(), `openscreen-cursor-native-${Date.now()}`); + +if (process.platform !== "win32") { + console.error("This diagnostic is Windows-only."); + process.exit(1); +} + +function encodePowerShell(script) { + return Buffer.from(script, "utf16le").toString("base64"); +} + +function quotePowerShellString(value) { + return `'${String(value).replaceAll("'", "''")}'`; +} + +function runPowerShell(script) { + return new Promise((resolve, reject) => { + const child = spawn( + "powershell.exe", + [ + "-NoLogo", + "-NoProfile", + "-NonInteractive", + "-ExecutionPolicy", + "Bypass", + "-EncodedCommand", + encodePowerShell(script), + ], + { stdio: ["ignore", "pipe", "pipe"], windowsHide: true }, + ); + + let stdout = ""; + let stderr = ""; + child.stdout.setEncoding("utf8"); + child.stderr.setEncoding("utf8"); + child.stdout.on("data", (chunk) => { + stdout += chunk; + }); + child.stderr.on("data", (chunk) => { + stderr += chunk; + }); + child.once("error", reject); + child.once("exit", (code, signal) => { + if (code === 0) { + resolve(stdout); + return; + } + + reject( + new Error(`PowerShell command failed (code=${code}, signal=${signal}): ${stderr.trim()}`), + ); + }); + }); +} + +function spawnPowerShell(script, { onStdout, onStderr } = {}) { + const child = spawn( + "powershell.exe", + [ + "-NoLogo", + "-NoProfile", + "-NonInteractive", + "-ExecutionPolicy", + "Bypass", + "-EncodedCommand", + encodePowerShell(script), + ], + { stdio: ["ignore", "pipe", "pipe"], windowsHide: true }, + ); + + child.stdout.setEncoding("utf8"); + child.stderr.setEncoding("utf8"); + child.stdout.on("data", (chunk) => onStdout?.(chunk)); + child.stderr.on("data", (chunk) => onStderr?.(chunk)); + + const done = new Promise((resolve, reject) => { + child.once("error", reject); + child.once("exit", (code, signal) => { + if (code === 0 || child.killed) { + resolve({ code, signal }); + return; + } + + reject(new Error(`PowerShell process failed (code=${code}, signal=${signal})`)); + }); + }); + + return { child, done }; +} + +function buildSamplerScript() { + return String.raw` +$ErrorActionPreference = 'Stop' +Add-Type -AssemblyName System.Drawing +Add-Type -AssemblyName System.Windows.Forms + +$source = @" +using System; +using System.Runtime.InteropServices; + +public static class OpenScreenCursorDiagnosticInterop { + [StructLayout(LayoutKind.Sequential)] + public struct POINT { + public int X; + public int Y; + } + + [StructLayout(LayoutKind.Sequential)] + public struct CURSORINFO { + public int cbSize; + public int flags; + public IntPtr hCursor; + public POINT ptScreenPos; + } + + [StructLayout(LayoutKind.Sequential)] + public struct ICONINFO { + [MarshalAs(UnmanagedType.Bool)] + public bool fIcon; + public int xHotspot; + public int yHotspot; + public IntPtr hbmMask; + public IntPtr hbmColor; + } + + [DllImport("user32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + public static extern bool GetCursorInfo(ref CURSORINFO pci); + + [DllImport("user32.dll", SetLastError = true)] + public static extern IntPtr LoadCursor(IntPtr hInstance, IntPtr lpCursorName); + + [DllImport("user32.dll", SetLastError = true)] + public static extern IntPtr CopyIcon(IntPtr hIcon); + + [DllImport("user32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + public static extern bool DestroyIcon(IntPtr hIcon); + + [DllImport("user32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + public static extern bool GetIconInfo(IntPtr hIcon, out ICONINFO piconinfo); + + [DllImport("gdi32.dll", SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + public static extern bool DeleteObject(IntPtr hObject); +} +"@ + +Add-Type -TypeDefinition $source + +$standardCursors = @{ + arrow = [OpenScreenCursorDiagnosticInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32512)) + text = [OpenScreenCursorDiagnosticInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32513)) + wait = [OpenScreenCursorDiagnosticInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32514)) + crosshair = [OpenScreenCursorDiagnosticInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32515)) + 'up-arrow' = [OpenScreenCursorDiagnosticInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32516)) + 'resize-nwse' = [OpenScreenCursorDiagnosticInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32642)) + 'resize-nesw' = [OpenScreenCursorDiagnosticInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32643)) + 'resize-ew' = [OpenScreenCursorDiagnosticInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32644)) + 'resize-ns' = [OpenScreenCursorDiagnosticInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32645)) + move = [OpenScreenCursorDiagnosticInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32646)) + 'not-allowed' = [OpenScreenCursorDiagnosticInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32648)) + pointer = [OpenScreenCursorDiagnosticInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32649)) + 'app-starting' = [OpenScreenCursorDiagnosticInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32650)) + help = [OpenScreenCursorDiagnosticInterop]::LoadCursor([IntPtr]::Zero, [IntPtr]::new(32651)) +} + +function Get-StandardCursorType($cursorHandle) { + if ($cursorHandle -eq [IntPtr]::Zero) { + return $null + } + + foreach ($entry in $standardCursors.GetEnumerator()) { + if ($entry.Value -eq $cursorHandle) { + return $entry.Key + } + } + + return $null +} + +function Write-JsonLine($payload) { + [Console]::Out.WriteLine(($payload | ConvertTo-Json -Compress -Depth 6)) +} + +function Get-CursorAsset($cursorHandle, $cursorId) { + $copiedHandle = [OpenScreenCursorDiagnosticInterop]::CopyIcon($cursorHandle) + if ($copiedHandle -eq [IntPtr]::Zero) { + return $null + } + + $iconInfo = New-Object OpenScreenCursorDiagnosticInterop+ICONINFO + $hasIconInfo = [OpenScreenCursorDiagnosticInterop]::GetIconInfo($copiedHandle, [ref]$iconInfo) + + try { + $icon = [System.Drawing.Icon]::FromHandle($copiedHandle) + $bitmap = New-Object System.Drawing.Bitmap $icon.Width, $icon.Height, ([System.Drawing.Imaging.PixelFormat]::Format32bppArgb) + $graphics = [System.Drawing.Graphics]::FromImage($bitmap) + $memoryStream = New-Object System.IO.MemoryStream + + try { + $graphics.Clear([System.Drawing.Color]::Transparent) + $graphics.DrawIcon($icon, 0, 0) + $bitmap.Save($memoryStream, [System.Drawing.Imaging.ImageFormat]::Png) + $base64 = [System.Convert]::ToBase64String($memoryStream.ToArray()) + + return @{ + id = $cursorId + imageDataUrl = "data:image/png;base64,$base64" + width = $bitmap.Width + height = $bitmap.Height + hotspotX = if ($hasIconInfo) { $iconInfo.xHotspot } else { 0 } + hotspotY = if ($hasIconInfo) { $iconInfo.yHotspot } else { 0 } + } + } + finally { + $memoryStream.Dispose() + $graphics.Dispose() + $bitmap.Dispose() + $icon.Dispose() + } + } + finally { + if ($hasIconInfo) { + if ($iconInfo.hbmMask -ne [IntPtr]::Zero) { + [OpenScreenCursorDiagnosticInterop]::DeleteObject($iconInfo.hbmMask) | Out-Null + } + if ($iconInfo.hbmColor -ne [IntPtr]::Zero) { + [OpenScreenCursorDiagnosticInterop]::DeleteObject($iconInfo.hbmColor) | Out-Null + } + } + [OpenScreenCursorDiagnosticInterop]::DestroyIcon($copiedHandle) | Out-Null + } +} + +Write-JsonLine @{ type = 'ready'; timestampMs = [DateTimeOffset]::UtcNow.ToUnixTimeMilliseconds() } + +$lastCursorId = $null +$screenBounds = [System.Windows.Forms.Screen]::PrimaryScreen.Bounds +while ($true) { + $cursorInfo = New-Object OpenScreenCursorDiagnosticInterop+CURSORINFO + $cursorInfo.cbSize = [Runtime.InteropServices.Marshal]::SizeOf([type][OpenScreenCursorDiagnosticInterop+CURSORINFO]) + + if (-not [OpenScreenCursorDiagnosticInterop]::GetCursorInfo([ref]$cursorInfo)) { + Write-JsonLine @{ type = 'error'; timestampMs = [DateTimeOffset]::UtcNow.ToUnixTimeMilliseconds(); message = 'GetCursorInfo failed' } + Start-Sleep -Milliseconds ${SAMPLE_INTERVAL_MS} + continue + } + + $visible = ($cursorInfo.flags -band 1) -ne 0 + $cursorId = if ($cursorInfo.hCursor -eq [IntPtr]::Zero) { $null } else { ('0x{0:X}' -f $cursorInfo.hCursor.ToInt64()) } + $cursorType = Get-StandardCursorType $cursorInfo.hCursor + $asset = $null + + if ($visible -and $cursorId -and $cursorId -ne $lastCursorId) { + $asset = Get-CursorAsset -cursorHandle $cursorInfo.hCursor -cursorId $cursorId + if ($asset -and $cursorType) { + $asset.cursorType = $cursorType + } + $lastCursorId = $cursorId + } + + Write-JsonLine @{ + type = 'sample' + timestampMs = [DateTimeOffset]::UtcNow.ToUnixTimeMilliseconds() + x = $cursorInfo.ptScreenPos.X + y = $cursorInfo.ptScreenPos.Y + visible = $visible + handle = $cursorId + cursorType = $cursorType + bounds = @{ + x = $screenBounds.Left + y = $screenBounds.Top + width = $screenBounds.Width + height = $screenBounds.Height + } + asset = $asset + } + + Start-Sleep -Milliseconds ${SAMPLE_INTERVAL_MS} +} +`; +} + +function buildMousePathScript(durationMs) { + const stepMs = 120; + const steps = Math.max(8, Math.floor(durationMs / stepMs)); + + return String.raw` +$ErrorActionPreference = 'Stop' +Add-Type -AssemblyName System.Windows.Forms + +$source = @" +using System.Runtime.InteropServices; + +public static class OpenScreenMouseDiagnosticInterop { + [DllImport("user32.dll")] + [return: MarshalAs(UnmanagedType.Bool)] + public static extern bool SetCursorPos(int X, int Y); +} +"@ + +Add-Type -TypeDefinition $source + +$bounds = [System.Windows.Forms.Screen]::PrimaryScreen.Bounds +$points = @() +for ($i = 0; $i -lt ${steps}; $i++) { + $t = if (${steps} -le 1) { 0 } else { $i / (${steps} - 1) } + $x = [int]($bounds.Left + 80 + (($bounds.Width - 160) * $t)) + $wave = [Math]::Sin($t * [Math]::PI * 2) + $y = [int]($bounds.Top + ($bounds.Height / 2) + ($wave * [Math]::Min(180, $bounds.Height / 4))) + $points += @{ x = $x; y = $y } +} + +foreach ($point in $points) { + [OpenScreenMouseDiagnosticInterop]::SetCursorPos($point.x, $point.y) | Out-Null + Start-Sleep -Milliseconds ${stepMs} +} +`; +} + +function buildScreenRecorderScript(outputDir, durationMs) { + const framesDir = path.join(outputDir, "screen-frames"); + + return String.raw` +$ErrorActionPreference = 'Stop' +Add-Type -AssemblyName System.Drawing +Add-Type -AssemblyName System.Windows.Forms + +$framesDir = ${quotePowerShellString(framesDir)} +New-Item -ItemType Directory -Force -Path $framesDir | Out-Null + +$bounds = [System.Windows.Forms.Screen]::PrimaryScreen.Bounds +$targetWidth = 960 +$targetHeight = [int]([Math]::Round($targetWidth * ($bounds.Height / $bounds.Width))) +$frames = New-Object System.Collections.Generic.List[object] +$stopwatch = [System.Diagnostics.Stopwatch]::StartNew() +$index = 0 + +while ($stopwatch.ElapsedMilliseconds -le ${durationMs + 700}) { + $sourceBitmap = New-Object System.Drawing.Bitmap $bounds.Width, $bounds.Height, ([System.Drawing.Imaging.PixelFormat]::Format32bppArgb) + $graphics = [System.Drawing.Graphics]::FromImage($sourceBitmap) + $scaledBitmap = New-Object System.Drawing.Bitmap $targetWidth, $targetHeight, ([System.Drawing.Imaging.PixelFormat]::Format32bppArgb) + $scaledGraphics = [System.Drawing.Graphics]::FromImage($scaledBitmap) + $timestampMs = [DateTimeOffset]::UtcNow.ToUnixTimeMilliseconds() + $fileName = ('frame_{0:D4}.png' -f $index) + $path = Join-Path $framesDir $fileName + + try { + $graphics.CopyFromScreen($bounds.Left, $bounds.Top, 0, 0, $bounds.Size) + $scaledGraphics.InterpolationMode = [System.Drawing.Drawing2D.InterpolationMode]::HighQualityBicubic + $scaledGraphics.DrawImage($sourceBitmap, 0, 0, $targetWidth, $targetHeight) + $scaledBitmap.Save($path, [System.Drawing.Imaging.ImageFormat]::Png) + $frames.Add(@{ + index = $index + timestampMs = $timestampMs + path = $path + width = $targetWidth + height = $targetHeight + bounds = @{ + x = $bounds.Left + y = $bounds.Top + width = $bounds.Width + height = $bounds.Height + } + }) | Out-Null + } + finally { + $scaledGraphics.Dispose() + $scaledBitmap.Dispose() + $graphics.Dispose() + $sourceBitmap.Dispose() + } + + $index += 1 + Start-Sleep -Milliseconds ${SCREEN_FRAME_INTERVAL_MS} +} + +($frames | ConvertTo-Json -Depth 6) | Set-Content -Path (Join-Path $framesDir 'frames.json') -Encoding UTF8 +`; +} + +function waitForReady(events) { + return new Promise((resolve, reject) => { + const startedAt = Date.now(); + const timer = setInterval(() => { + if (events.some((event) => event.type === "ready")) { + clearInterval(timer); + resolve(); + return; + } + + if (Date.now() - startedAt > READY_TIMEOUT_MS) { + clearInterval(timer); + reject(new Error("Timed out waiting for cursor sampler readiness.")); + } + }, 25); + }); +} + +function writeAssets(assets, outputDir) { + const assetDir = path.join(outputDir, "assets"); + fs.mkdirSync(assetDir, { recursive: true }); + + for (const asset of assets.values()) { + const base64 = asset.imageDataUrl?.replace(/^data:image\/png;base64,/, ""); + if (!base64) { + continue; + } + + const safeId = String(asset.id).replace(/[^a-zA-Z0-9_-]/g, "_"); + fs.writeFileSync(path.join(assetDir, `${safeId}.png`), Buffer.from(base64, "base64")); + } +} + +function toRecordingData(samples, assets) { + const firstTimestampMs = samples[0]?.timestampMs ?? Date.now(); + const normalizedSamples = samples.flatMap((sample) => { + const bounds = sample.bounds; + if (!bounds || bounds.width <= 0 || bounds.height <= 0) { + return []; + } + + return [ + { + timeMs: Math.max(0, sample.timestampMs - firstTimestampMs), + cx: (sample.x - bounds.x) / bounds.width, + cy: (sample.y - bounds.y) / bounds.height, + assetId: sample.handle, + visible: Boolean(sample.visible), + cursorType: sample.cursorType ?? null, + }, + ]; + }); + + return { + version: 2, + provider: assets.size > 0 ? "native" : "none", + samples: normalizedSamples, + assets: [...assets.values()].map((asset) => ({ + id: asset.id, + platform: "win32", + imageDataUrl: asset.imageDataUrl, + width: asset.width, + height: asset.height, + hotspotX: asset.hotspotX, + hotspotY: asset.hotspotY, + scaleFactor: 1, + cursorType: asset.cursorType ?? null, + })), + }; +} + +function escapeScriptJson(value) { + return JSON.stringify(value).replace(/ + + + + +OpenScreen native cursor diagnostic + + + +
+

OpenScreen native cursor diagnostic

+
+
${report.sampleCount}samples
+
${report.assetCount}assets
+
${report.uniquePositionCount}positions
+
${report.errorCount}errors
+
+

The red cross is the captured native hotspot. Native bitmaps are drawn at 1x, 2x, and 3x. The last cursor is a crisp vector 3x replacement anchored on the same hotspot.

+ +
+
+ + + +`; +} + +function readScreenFrames(outputDir, recordingStartTimestampMs) { + const framesJsonPath = path.join(outputDir, "screen-frames", "frames.json"); + if (!fs.existsSync(framesJsonPath)) { + return []; + } + + const rawFrames = JSON.parse(fs.readFileSync(framesJsonPath, "utf8").replace(/^\uFEFF/, "")); + const frames = Array.isArray(rawFrames) ? rawFrames : [rawFrames]; + + return frames + .filter((frame) => frame?.path && fs.existsSync(frame.path)) + .map((frame) => ({ + ...frame, + timeMs: Math.max(0, frame.timestampMs - recordingStartTimestampMs), + imageDataUrl: `data:image/png;base64,${fs.readFileSync(frame.path).toString("base64")}`, + })); +} + +function buildRealCaptureHtml(report, recordingData, screenFrames) { + return ` + + + + +OpenScreen native cursor real capture diagnostic + + + +
+

Real screen capture + reconstructed native cursor

+

Background frames are real Windows screenshots. Native bitmaps are reconstructed at 1x, 2x, and 3x; the last cursor is a crisp vector 3x replacement. The red cross marks the recorded hotspot.

+ +
+ + + + +`; +} + +function findPlaywrightChromiumExecutable(defaultPath) { + if (fs.existsSync(defaultPath)) { + return defaultPath; + } + + const baseDir = path.join(process.env.LOCALAPPDATA ?? "", "ms-playwright"); + if (!baseDir || !fs.existsSync(baseDir)) { + return defaultPath; + } + + const candidates = fs + .readdirSync(baseDir, { withFileTypes: true }) + .filter((entry) => entry.isDirectory() && entry.name.startsWith("chromium-")) + .map((entry) => path.join(baseDir, entry.name, "chrome-win64", "chrome.exe")) + .filter((candidate) => fs.existsSync(candidate)) + .sort() + .reverse(); + + return candidates[0] ?? defaultPath; +} + +async function writePreviewVideo(reportPath, outputPath) { + const { chromium } = await import("playwright"); + const browser = await chromium.launch({ + executablePath: findPlaywrightChromiumExecutable(chromium.executablePath()), + headless: true, + }); + try { + const page = await browser.newPage({ viewport: { width: 1180, height: 760 } }); + await page.goto(`file:///${reportPath.replaceAll("\\", "/")}`); + const base64 = await page.evaluate(() => window.__exportWebm()); + fs.writeFileSync(outputPath, Buffer.from(base64, "base64")); + } finally { + await browser.close(); + } +} + +function assertReport(report) { + const failures = []; + if (report.sampleCount < Math.floor(DURATION_MS / SAMPLE_INTERVAL_MS / 3)) { + failures.push(`Too few samples: ${report.sampleCount}.`); + } + if (report.visibleSampleCount === 0) { + failures.push("No visible cursor samples were captured."); + } + if (report.assetCount === 0) { + failures.push("No cursor asset PNG was captured."); + } + if (report.uniquePositionCount < 4) { + failures.push(`Cursor movement was not observed enough times: ${report.uniquePositionCount}.`); + } + if (report.errorCount > 0) { + failures.push(`Sampler reported ${report.errorCount} error event(s).`); + } + + if (failures.length > 0) { + throw new Error(failures.join(" ")); + } +} + +fs.mkdirSync(OUTPUT_DIR, { recursive: true }); + +const events = []; +const assets = new Map(); +let lineBuffer = ""; +let stoppingSampler = false; +const sampler = spawnPowerShell(buildSamplerScript(), { + onStdout: (chunk) => { + lineBuffer += chunk; + const lines = lineBuffer.split(/\r?\n/); + lineBuffer = lines.pop() ?? ""; + + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed) { + continue; + } + + const event = JSON.parse(trimmed); + events.push(event); + if (event.asset?.id && !assets.has(event.asset.id)) { + assets.set(event.asset.id, event.asset); + } + } + }, + onStderr: (chunk) => { + if (!stoppingSampler && !chunk.startsWith("#< CLIXML")) { + process.stderr.write(`[cursor-native-test] ${chunk}`); + } + }, +}); +let screenRecorder = null; + +try { + await waitForReady(events); + screenRecorder = spawnPowerShell(buildScreenRecorderScript(OUTPUT_DIR, DURATION_MS), { + onStderr: (chunk) => { + if (!chunk.startsWith("#< CLIXML") && !chunk.startsWith(" setTimeout(resolve, 150)); + await runPowerShell(buildMousePathScript(DURATION_MS)); + await new Promise((resolve) => setTimeout(resolve, Math.max(250, SAMPLE_INTERVAL_MS * 3))); + await screenRecorder.done; +} finally { + if (!sampler.child.killed) { + stoppingSampler = true; + sampler.child.kill(); + } + if (screenRecorder && !screenRecorder.child.killed) { + screenRecorder.child.kill(); + } +} + +const samples = events.filter((event) => event.type === "sample"); +const errors = events.filter((event) => event.type === "error"); +const recordingStartTimestampMs = samples[0]?.timestampMs ?? Date.now(); +const uniquePositions = new Set(samples.map((sample) => `${sample.x},${sample.y}`)); +const report = { + outputDir: OUTPUT_DIR, + sampleIntervalMs: SAMPLE_INTERVAL_MS, + durationMs: DURATION_MS, + eventCount: events.length, + sampleCount: samples.length, + visibleSampleCount: samples.filter((sample) => sample.visible).length, + assetCount: assets.size, + uniqueCursorHandleCount: new Set(samples.map((sample) => sample.handle).filter(Boolean)).size, + uniquePositionCount: uniquePositions.size, + errorCount: errors.length, + firstSample: samples[0] ?? null, + lastSample: samples.at(-1) ?? null, + assets: [...assets.values()].map((asset) => ({ + id: asset.id, + width: asset.width, + height: asset.height, + hotspotX: asset.hotspotX, + hotspotY: asset.hotspotY, + })), +}; +const recordingData = toRecordingData(samples, assets); +const screenFrames = readScreenFrames(OUTPUT_DIR, recordingStartTimestampMs); +const reportHtmlPath = path.join(OUTPUT_DIR, "report.html"); +const previewVideoPath = path.join(OUTPUT_DIR, "preview.webm"); +const realCaptureHtmlPath = path.join(OUTPUT_DIR, "real-capture-report.html"); +const realCaptureVideoPath = path.join(OUTPUT_DIR, "real-capture-preview.webm"); + +writeAssets(assets, OUTPUT_DIR); +fs.writeFileSync(path.join(OUTPUT_DIR, "events.json"), JSON.stringify(events, null, 2)); +fs.writeFileSync( + path.join(OUTPUT_DIR, "cursor-recording-data.json"), + JSON.stringify(recordingData, null, 2), +); +fs.writeFileSync(path.join(OUTPUT_DIR, "report.json"), JSON.stringify(report, null, 2)); +fs.writeFileSync(reportHtmlPath, buildVisualReportHtml(report, recordingData)); +if (screenFrames.length > 0) { + fs.writeFileSync(realCaptureHtmlPath, buildRealCaptureHtml(report, recordingData, screenFrames)); + report.screenFrameCount = screenFrames.length; +} + +try { + await writePreviewVideo(reportHtmlPath, previewVideoPath); + report.previewVideoPath = previewVideoPath; +} catch (error) { + report.previewVideoError = error instanceof Error ? error.message : String(error); +} + +if (screenFrames.length > 0) { + try { + await writePreviewVideo(realCaptureHtmlPath, realCaptureVideoPath); + report.realCaptureVideoPath = realCaptureVideoPath; + } catch (error) { + report.realCaptureVideoError = error instanceof Error ? error.message : String(error); + } +} + +fs.writeFileSync(path.join(OUTPUT_DIR, "report.json"), JSON.stringify(report, null, 2)); + +assertReport(report); + +console.log(JSON.stringify(report, null, 2)); diff --git a/src/components/video-editor/VideoPlayback.tsx b/src/components/video-editor/VideoPlayback.tsx index d7b3836..0586e54 100644 --- a/src/components/video-editor/VideoPlayback.tsx +++ b/src/components/video-editor/VideoPlayback.tsx @@ -26,10 +26,10 @@ import { type WebcamSizePreset, } from "@/lib/compositeLayout"; import { - getNativeCursorDisplayMetrics, hasNativeCursorRecordingData, projectNativeCursorToStage, resolveInterpolatedNativeCursorFrame, + resolveNativeCursorRenderAsset, } from "@/lib/cursor/nativeCursor"; import { classifyWallpaper, DEFAULT_WALLPAPER, resolveImageWallpaperUrl } from "@/lib/wallpaper"; import { getCssClipPath } from "@/lib/webcamMaskShapes"; @@ -1324,19 +1324,20 @@ const VideoPlayback = forwardRef( sample: frame.sample, }); if (projectedPoint) { - const metrics = getNativeCursorDisplayMetrics( + const renderAsset = resolveNativeCursorRenderAsset( frame.asset, window.devicePixelRatio || 1, + frame.sample, ); const scale = Math.max(0, cursorSizeRef.current); - if (nativeCursorImg.dataset.cursorId !== frame.asset.id) { - nativeCursorImg.src = frame.asset.imageDataUrl; - nativeCursorImg.dataset.cursorId = frame.asset.id; + if (nativeCursorImg.dataset.cursorId !== renderAsset.id) { + nativeCursorImg.src = renderAsset.imageDataUrl; + nativeCursorImg.dataset.cursorId = renderAsset.id; } - nativeCursorImg.style.left = `${projectedPoint.x - metrics.hotspotX * scale}px`; - nativeCursorImg.style.top = `${projectedPoint.y - metrics.hotspotY * scale}px`; - nativeCursorImg.style.width = `${metrics.width * scale}px`; - nativeCursorImg.style.height = `${metrics.height * scale}px`; + nativeCursorImg.style.left = `${projectedPoint.x - renderAsset.hotspotX * scale}px`; + nativeCursorImg.style.top = `${projectedPoint.y - renderAsset.hotspotY * scale}px`; + nativeCursorImg.style.width = `${renderAsset.width * scale}px`; + nativeCursorImg.style.height = `${renderAsset.height * scale}px`; nativeCursorImg.style.display = "block"; } else { nativeCursorImg.style.display = "none"; diff --git a/src/lib/cursor/nativeCursor.ts b/src/lib/cursor/nativeCursor.ts index b32bd9e..d6ae220 100644 --- a/src/lib/cursor/nativeCursor.ts +++ b/src/lib/cursor/nativeCursor.ts @@ -1,9 +1,20 @@ import { type Container, Point } from "pixi.js"; +import crosshairUrl from "@/assets/cursors/Cursor=Cross.svg"; +import arrowUrl from "@/assets/cursors/Cursor=Default.svg"; +import pointerUrl from "@/assets/cursors/Cursor=Hand-(Pointing).svg"; +import notAllowedUrl from "@/assets/cursors/Cursor=Menu.svg"; +import moveUrl from "@/assets/cursors/Cursor=Move.svg"; +import resizeNeswUrl from "@/assets/cursors/Cursor=Resize-North-East-South-West.svg"; +import resizeNsUrl from "@/assets/cursors/Cursor=Resize-North-South.svg"; +import resizeNwseUrl from "@/assets/cursors/Cursor=Resize-North-West-South-East.svg"; +import resizeEwUrl from "@/assets/cursors/Cursor=Resize-West-East.svg"; +import textUrl from "@/assets/cursors/Cursor=Text-Cursor.svg"; import type { CropRegion } from "@/components/video-editor/types"; import type { CursorRecordingData, CursorRecordingSample, NativeCursorAsset, + NativeCursorType, } from "@/native/contracts"; export interface ActiveNativeCursorFrame { @@ -23,6 +34,87 @@ function clamp(value: number, min: number, max: number) { return Math.min(max, Math.max(min, value)); } +interface PrettyNativeCursorAsset { + imageDataUrl: string; + width: number; + height: number; + hotspotX: number; + hotspotY: number; +} + +const PRETTY_NATIVE_CURSOR_ASSETS: Partial> = { + arrow: { + imageDataUrl: arrowUrl, + width: 32, + height: 32, + hotspotX: 5.8, + hotspotY: 3.2, + }, + text: { + imageDataUrl: textUrl, + width: 32, + height: 32, + hotspotX: 16, + hotspotY: 16, + }, + pointer: { + imageDataUrl: pointerUrl, + width: 32, + height: 32, + hotspotX: 11.8, + hotspotY: 2.6, + }, + crosshair: { + imageDataUrl: crosshairUrl, + width: 32, + height: 32, + hotspotX: 16, + hotspotY: 16, + }, + "resize-ew": { + imageDataUrl: resizeEwUrl, + width: 32, + height: 32, + hotspotX: 16, + hotspotY: 16, + }, + "resize-ns": { + imageDataUrl: resizeNsUrl, + width: 32, + height: 32, + hotspotX: 16, + hotspotY: 16, + }, + "resize-nesw": { + imageDataUrl: resizeNeswUrl, + width: 32, + height: 32, + hotspotX: 16, + hotspotY: 16, + }, + "resize-nwse": { + imageDataUrl: resizeNwseUrl, + width: 32, + height: 32, + hotspotX: 16, + hotspotY: 16, + }, + move: { + imageDataUrl: moveUrl, + width: 32, + height: 32, + hotspotX: 16, + hotspotY: 16, + }, + "not-allowed": { + imageDataUrl: notAllowedUrl, + width: 32, + height: 32, + hotspotX: 16, + hotspotY: 16, + }, +}; + export function hasNativeCursorRecordingData( recordingData: CursorRecordingData | null | undefined, ): recordingData is CursorRecordingData { @@ -169,3 +261,39 @@ export function getNativeCursorDisplayMetrics(asset: NativeCursorAsset, deviceSc hotspotY: asset.hotspotY / scaleFactor, }; } + +export function resolvePrettyNativeCursorAsset( + asset: NativeCursorAsset, + sample?: CursorRecordingSample, +) { + const cursorType = sample?.cursorType ?? asset.cursorType ?? null; + return cursorType ? (PRETTY_NATIVE_CURSOR_ASSETS[cursorType] ?? null) : null; +} + +export function resolveNativeCursorRenderAsset( + asset: NativeCursorAsset, + deviceScaleFactor: number, + sample?: CursorRecordingSample, +) { + const prettyAsset = resolvePrettyNativeCursorAsset(asset, sample); + if (prettyAsset) { + return { + id: `pretty:${sample?.cursorType ?? asset.cursorType}`, + imageDataUrl: prettyAsset.imageDataUrl, + width: prettyAsset.width, + height: prettyAsset.height, + hotspotX: prettyAsset.hotspotX, + hotspotY: prettyAsset.hotspotY, + }; + } + + const metrics = getNativeCursorDisplayMetrics(asset, deviceScaleFactor); + return { + id: asset.id, + imageDataUrl: asset.imageDataUrl, + width: metrics.width, + height: metrics.height, + hotspotX: metrics.hotspotX, + hotspotY: metrics.hotspotY, + }; +} diff --git a/src/lib/exporter/frameRenderer.ts b/src/lib/exporter/frameRenderer.ts index c43908f..a1e20cc 100644 --- a/src/lib/exporter/frameRenderer.ts +++ b/src/lib/exporter/frameRenderer.ts @@ -57,13 +57,13 @@ import { type StyledRenderRect, } from "@/lib/compositeLayout"; import { - getNativeCursorDisplayMetrics, projectNativeCursorToStage, resolveInterpolatedNativeCursorFrame, + resolveNativeCursorRenderAsset, } from "@/lib/cursor/nativeCursor"; import { BackgroundLoadError, classifyWallpaper, resolveImageWallpaperUrl } from "@/lib/wallpaper"; import { drawCanvasClipPath } from "@/lib/webcamMaskShapes"; -import type { CursorRecordingData, NativeCursorAsset } from "@/native/contracts"; +import type { CursorRecordingData } from "@/native/contracts"; import { renderAnnotations } from "./annotationRenderer"; import { getLinearGradientPoints, @@ -585,19 +585,23 @@ export class FrameRenderer { return; } - const image = await this.getCursorImage(activeNativeCursor.asset); - const metrics = getNativeCursorDisplayMetrics(activeNativeCursor.asset, 1); + const renderAsset = resolveNativeCursorRenderAsset( + activeNativeCursor.asset, + 1, + activeNativeCursor.sample, + ); + const image = await this.getCursorImage(renderAsset); const scale = Math.max(0, this.config.cursorScale ?? 1); this.compositeCtx.drawImage( image, - projectedPoint.x - metrics.hotspotX * scale, - projectedPoint.y - metrics.hotspotY * scale, - metrics.width * scale, - metrics.height * scale, + projectedPoint.x - renderAsset.hotspotX * scale, + projectedPoint.y - renderAsset.hotspotY * scale, + renderAsset.width * scale, + renderAsset.height * scale, ); } - private async getCursorImage(asset: NativeCursorAsset) { + private async getCursorImage(asset: { id: string; imageDataUrl: string }) { const cachedImage = this.cursorImageCache.get(asset.id); if (cachedImage) { return cachedImage; diff --git a/src/native/contracts.ts b/src/native/contracts.ts index 73d53db..a3c9087 100644 --- a/src/native/contracts.ts +++ b/src/native/contracts.ts @@ -3,6 +3,21 @@ export const NATIVE_BRIDGE_VERSION = 1; export type NativePlatform = "darwin" | "win32" | "linux"; export type CursorProviderKind = "native" | "none"; +export type NativeCursorType = + | "arrow" + | "text" + | "pointer" + | "crosshair" + | "resize-ew" + | "resize-ns" + | "resize-nesw" + | "resize-nwse" + | "move" + | "not-allowed" + | "wait" + | "app-starting" + | "help" + | "up-arrow"; export interface CursorTelemetryPoint { timeMs: number; @@ -13,6 +28,7 @@ export interface CursorTelemetryPoint { export interface CursorRecordingSample extends CursorTelemetryPoint { assetId?: string | null; visible?: boolean; + cursorType?: NativeCursorType | null; } export interface NativeCursorAsset { @@ -24,6 +40,7 @@ export interface NativeCursorAsset { hotspotX: number; hotspotY: number; scaleFactor?: number; + cursorType?: NativeCursorType | null; } export interface CursorRecordingData { From ef36da4a4f0f859eb0b4d451f05011a6ee4c21e4 Mon Sep 17 00:00:00 2001 From: EtienneLescot Date: Tue, 5 May 2026 10:41:34 +0200 Subject: [PATCH 07/43] feat: complete windows cursor assets --- electron/ipc/handlers.ts | 80 +++------------------- src/assets/cursors/Cursor=App-Starting.svg | 8 +++ src/assets/cursors/Cursor=Help.svg | 8 +++ src/assets/cursors/Cursor=Not-Allowed.svg | 7 ++ src/assets/cursors/Cursor=Up-Arrow.svg | 4 ++ src/assets/cursors/Cursor=Wait.svg | 6 ++ src/lib/cursor/nativeCursor.ts | 34 ++++++++- 7 files changed, 77 insertions(+), 70 deletions(-) create mode 100644 src/assets/cursors/Cursor=App-Starting.svg create mode 100644 src/assets/cursors/Cursor=Help.svg create mode 100644 src/assets/cursors/Cursor=Not-Allowed.svg create mode 100644 src/assets/cursors/Cursor=Up-Arrow.svg create mode 100644 src/assets/cursors/Cursor=Wait.svg diff --git a/electron/ipc/handlers.ts b/electron/ipc/handlers.ts index c5a1269..7b16f2f 100644 --- a/electron/ipc/handlers.ts +++ b/electron/ipc/handlers.ts @@ -3,7 +3,7 @@ import { createRequire } from "node:module"; import os from "node:os"; import path from "node:path"; import { fileURLToPath, pathToFileURL } from "node:url"; - +import type { DesktopCapturerSource } from "electron"; import { app, BrowserWindow, @@ -14,7 +14,6 @@ import { shell, systemPreferences, } from "electron"; -import type { DesktopCapturerSource } from "electron"; import { normalizeProjectMedia, normalizeRecordingSession, @@ -410,46 +409,6 @@ function setCurrentRecordingSessionState(session: RecordingSession | null) { currentVideoPath = session?.screenVideoPath ?? null; } -async function storeRecordedSessionFiles(payload: StoreRecordedSessionInput) { - const createdAt = - typeof payload.createdAt === "number" && Number.isFinite(payload.createdAt) - ? payload.createdAt - : Date.now(); - const screenVideoPath = resolveRecordingOutputPath(payload.screen.fileName); - await fs.writeFile(screenVideoPath, Buffer.from(payload.screen.videoData)); - - let webcamVideoPath: string | undefined; - if (payload.webcam) { - webcamVideoPath = resolveRecordingOutputPath(payload.webcam.fileName); - await fs.writeFile(webcamVideoPath, Buffer.from(payload.webcam.videoData)); - } - - const session: RecordingSession = webcamVideoPath - ? { screenVideoPath, webcamVideoPath, createdAt } - : { screenVideoPath, createdAt }; - setCurrentRecordingSessionState(session); - currentProjectPath = null; - - const telemetryPath = `${screenVideoPath}.cursor.json`; - if (pendingCursorRecordingData && pendingCursorRecordingData.samples.length > 0) { - await fs.writeFile(telemetryPath, JSON.stringify(pendingCursorRecordingData, null, 2), "utf-8"); - } - pendingCursorRecordingData = null; - - const sessionManifestPath = path.join( - RECORDINGS_DIR, - `${path.parse(payload.screen.fileName).name}${RECORDING_SESSION_SUFFIX}`, - ); - await fs.writeFile(sessionManifestPath, JSON.stringify(session, null, 2), "utf-8"); - - return { - success: true, - path: screenVideoPath, - session, - message: "Recording session stored successfully", - }; -} - export function registerIpcHandlers( createEditorWindow: () => void, createSourceSelectorWindow: () => BrowserWindow, @@ -612,12 +571,12 @@ export function registerIpcHandlers( typeof payload.createdAt === "number" && Number.isFinite(payload.createdAt) ? payload.createdAt : Date.now(); - const screenVideoPath = path.join(RECORDINGS_DIR, payload.screen.fileName); + const screenVideoPath = resolveRecordingOutputPath(payload.screen.fileName); await fs.writeFile(screenVideoPath, Buffer.from(payload.screen.videoData)); let webcamVideoPath: string | undefined; if (payload.webcam) { - webcamVideoPath = path.join(RECORDINGS_DIR, payload.webcam.fileName); + webcamVideoPath = resolveRecordingOutputPath(payload.webcam.fileName); await fs.writeFile(webcamVideoPath, Buffer.from(payload.webcam.videoData)); } @@ -625,7 +584,6 @@ export function registerIpcHandlers( ? { screenVideoPath, webcamVideoPath, createdAt } : { screenVideoPath, createdAt }; setCurrentRecordingSessionState(session); - currentVideoPath = screenVideoPath; currentProjectPath = null; const telemetryPath = `${screenVideoPath}.cursor.json`; @@ -638,6 +596,12 @@ export function registerIpcHandlers( } pendingCursorRecordingData = null; + const sessionManifestPath = path.join( + RECORDINGS_DIR, + `${path.parse(payload.screen.fileName).name}${RECORDING_SESSION_SUFFIX}`, + ); + await fs.writeFile(sessionManifestPath, JSON.stringify(session, null, 2), "utf-8"); + return { success: true, path: screenVideoPath, @@ -1010,18 +974,7 @@ export function registerIpcHandlers( const content = await fs.readFile(filePath, "utf-8"); const project = JSON.parse(content); currentProjectPath = filePath; - if (project && typeof project === "object") { - const rawProject = project as { media?: unknown; videoPath?: unknown }; - const media = - normalizeProjectMedia(rawProject.media) ?? - (typeof rawProject.videoPath === "string" - ? { - screenVideoPath: - normalizeVideoSourcePath(rawProject.videoPath) ?? rawProject.videoPath, - } - : null); - setCurrentRecordingSessionState(media ? { ...media, createdAt: Date.now() } : null); - } + setCurrentRecordingSessionState(await getApprovedProjectSession(project, filePath)); return { success: true, @@ -1050,18 +1003,7 @@ export function registerIpcHandlers( const content = await fs.readFile(currentProjectPath, "utf-8"); const project = JSON.parse(content); - if (project && typeof project === "object") { - const rawProject = project as { media?: unknown; videoPath?: unknown }; - const media = - normalizeProjectMedia(rawProject.media) ?? - (typeof rawProject.videoPath === "string" - ? { - screenVideoPath: - normalizeVideoSourcePath(rawProject.videoPath) ?? rawProject.videoPath, - } - : null); - setCurrentRecordingSessionState(media ? { ...media, createdAt: Date.now() } : null); - } + setCurrentRecordingSessionState(await getApprovedProjectSession(project, currentProjectPath)); return { success: true, path: currentProjectPath, diff --git a/src/assets/cursors/Cursor=App-Starting.svg b/src/assets/cursors/Cursor=App-Starting.svg new file mode 100644 index 0000000..7a10d40 --- /dev/null +++ b/src/assets/cursors/Cursor=App-Starting.svg @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/src/assets/cursors/Cursor=Help.svg b/src/assets/cursors/Cursor=Help.svg new file mode 100644 index 0000000..d187c52 --- /dev/null +++ b/src/assets/cursors/Cursor=Help.svg @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/src/assets/cursors/Cursor=Not-Allowed.svg b/src/assets/cursors/Cursor=Not-Allowed.svg new file mode 100644 index 0000000..8b2c3f8 --- /dev/null +++ b/src/assets/cursors/Cursor=Not-Allowed.svg @@ -0,0 +1,7 @@ + + + + + + + diff --git a/src/assets/cursors/Cursor=Up-Arrow.svg b/src/assets/cursors/Cursor=Up-Arrow.svg new file mode 100644 index 0000000..b742e70 --- /dev/null +++ b/src/assets/cursors/Cursor=Up-Arrow.svg @@ -0,0 +1,4 @@ + + + + diff --git a/src/assets/cursors/Cursor=Wait.svg b/src/assets/cursors/Cursor=Wait.svg new file mode 100644 index 0000000..2b56934 --- /dev/null +++ b/src/assets/cursors/Cursor=Wait.svg @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/lib/cursor/nativeCursor.ts b/src/lib/cursor/nativeCursor.ts index d6ae220..30f7a46 100644 --- a/src/lib/cursor/nativeCursor.ts +++ b/src/lib/cursor/nativeCursor.ts @@ -1,14 +1,18 @@ import { type Container, Point } from "pixi.js"; +import appStartingUrl from "@/assets/cursors/Cursor=App-Starting.svg"; import crosshairUrl from "@/assets/cursors/Cursor=Cross.svg"; import arrowUrl from "@/assets/cursors/Cursor=Default.svg"; import pointerUrl from "@/assets/cursors/Cursor=Hand-(Pointing).svg"; -import notAllowedUrl from "@/assets/cursors/Cursor=Menu.svg"; +import helpUrl from "@/assets/cursors/Cursor=Help.svg"; import moveUrl from "@/assets/cursors/Cursor=Move.svg"; +import notAllowedUrl from "@/assets/cursors/Cursor=Not-Allowed.svg"; import resizeNeswUrl from "@/assets/cursors/Cursor=Resize-North-East-South-West.svg"; import resizeNsUrl from "@/assets/cursors/Cursor=Resize-North-South.svg"; import resizeNwseUrl from "@/assets/cursors/Cursor=Resize-North-West-South-East.svg"; import resizeEwUrl from "@/assets/cursors/Cursor=Resize-West-East.svg"; import textUrl from "@/assets/cursors/Cursor=Text-Cursor.svg"; +import upArrowUrl from "@/assets/cursors/Cursor=Up-Arrow.svg"; +import waitUrl from "@/assets/cursors/Cursor=Wait.svg"; import type { CropRegion } from "@/components/video-editor/types"; import type { CursorRecordingData, @@ -113,6 +117,34 @@ const PRETTY_NATIVE_CURSOR_ASSETS: Partial Date: Tue, 5 May 2026 10:50:20 +0200 Subject: [PATCH 08/43] fix: avoid unsupported display media min constraint --- src/hooks/useScreenRecorder.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/hooks/useScreenRecorder.ts b/src/hooks/useScreenRecorder.ts index 8aa673e..7cd86a7 100644 --- a/src/hooks/useScreenRecorder.ts +++ b/src/hooks/useScreenRecorder.ts @@ -5,7 +5,6 @@ import { useScopedT } from "@/contexts/I18nContext"; import { requestCameraAccess } from "@/lib/requestCameraAccess"; const TARGET_FRAME_RATE = 60; -const MIN_FRAME_RATE = 30; const TARGET_WIDTH = 3840; const TARGET_HEIGHT = 2160; const FOUR_K_PIXELS = TARGET_WIDTH * TARGET_HEIGHT; @@ -585,7 +584,7 @@ export function useScreenRecorder(): UseScreenRecorderReturn { cursor: "never", width: { max: TARGET_WIDTH }, height: { max: TARGET_HEIGHT }, - frameRate: { ideal: TARGET_FRAME_RATE, min: MIN_FRAME_RATE }, + frameRate: { ideal: TARGET_FRAME_RATE }, } as MediaTrackConstraints, audio: systemAudioEnabled, } as DisplayMediaStreamOptions); From 87240a919ee1aa25426e0795fde2dee4c9586ab0 Mon Sep 17 00:00:00 2001 From: EtienneLescot Date: Tue, 5 May 2026 11:04:08 +0200 Subject: [PATCH 09/43] fix: align native cursor preview and export --- electron/ipc/handlers.ts | 31 ++++++++- .../native-bridge/cursor/recording/factory.ts | 3 + .../recording/telemetryRecordingSession.ts | 3 +- .../windowsNativeRecordingSession.ts | 2 +- .../windowsNativeRecordingSession.types.ts | 1 + scripts/capture-openscreen-preview.mjs | 4 -- src/components/video-editor/VideoPlayback.tsx | 64 ++++++++----------- src/hooks/useScreenRecorder.ts | 6 +- src/lib/cursor/nativeCursor.ts | 19 ++++-- 9 files changed, 82 insertions(+), 51 deletions(-) diff --git a/electron/ipc/handlers.ts b/electron/ipc/handlers.ts index 7b16f2f..4c306ee 100644 --- a/electron/ipc/handlers.ts +++ b/electron/ipc/handlers.ts @@ -651,7 +651,7 @@ export function registerIpcHandlers( } }); - ipcMain.handle("set-recording-state", async (_, recording: boolean) => { + ipcMain.handle("set-recording-state", async (_, recording: boolean, recordingId?: number) => { if (recording) { if (cursorRecordingSession) { pendingCursorRecordingData = await cursorRecordingSession.stop(); @@ -665,6 +665,8 @@ export function registerIpcHandlers( platform: process.platform, sampleIntervalMs: CURSOR_SAMPLE_INTERVAL_MS, sourceId: getSelectedSourceId(), + startTimeMs: + typeof recordingId === "number" && Number.isFinite(recordingId) ? recordingId : undefined, }); try { @@ -824,6 +826,7 @@ export function registerIpcHandlers( return { success: false, canceled: true }; } + approveFilePath(result.filePaths[0]); currentProjectPath = null; return { success: true, @@ -863,6 +866,32 @@ export function registerIpcHandlers( } }); + ipcMain.handle("read-binary-file", async (_, filePath: string) => { + try { + const normalizedPath = await approveReadableVideoPath(filePath); + if (!normalizedPath) { + return { + success: false, + message: "File path is not approved or is not a supported video file", + }; + } + + const data = await fs.readFile(normalizedPath); + return { + success: true, + data: data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength), + path: normalizedPath, + }; + } catch (error) { + console.error("Failed to read binary file:", error); + return { + success: false, + message: "Failed to read binary file", + error: String(error), + }; + } + }); + ipcMain.handle( "save-project-file", async (_, projectData: unknown, suggestedName?: string, existingProjectPath?: string) => { diff --git a/electron/native-bridge/cursor/recording/factory.ts b/electron/native-bridge/cursor/recording/factory.ts index 4e0f75c..52d6079 100644 --- a/electron/native-bridge/cursor/recording/factory.ts +++ b/electron/native-bridge/cursor/recording/factory.ts @@ -9,6 +9,7 @@ interface CreateCursorRecordingSessionOptions { platform: NodeJS.Platform; sampleIntervalMs: number; sourceId?: string | null; + startTimeMs?: number; } export function createCursorRecordingSession( @@ -20,6 +21,7 @@ export function createCursorRecordingSession( maxSamples: options.maxSamples, sampleIntervalMs: options.sampleIntervalMs, sourceId: options.sourceId, + startTimeMs: options.startTimeMs, }); } @@ -27,5 +29,6 @@ export function createCursorRecordingSession( getDisplayBounds: options.getDisplayBounds, maxSamples: options.maxSamples, sampleIntervalMs: options.sampleIntervalMs, + startTimeMs: options.startTimeMs, }); } diff --git a/electron/native-bridge/cursor/recording/telemetryRecordingSession.ts b/electron/native-bridge/cursor/recording/telemetryRecordingSession.ts index dd42871..e719d8e 100644 --- a/electron/native-bridge/cursor/recording/telemetryRecordingSession.ts +++ b/electron/native-bridge/cursor/recording/telemetryRecordingSession.ts @@ -6,6 +6,7 @@ interface TelemetryRecordingSessionOptions { getDisplayBounds: () => Rectangle | null; maxSamples: number; sampleIntervalMs: number; + startTimeMs?: number; } function clamp(value: number, min: number, max: number) { @@ -21,7 +22,7 @@ export class TelemetryRecordingSession implements CursorRecordingSession { async start(): Promise { this.samples = []; - this.startTimeMs = Date.now(); + this.startTimeMs = this.options.startTimeMs ?? Date.now(); this.captureSample(); this.interval = setInterval(() => { this.captureSample(); diff --git a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts index 632a74d..8075fe3 100644 --- a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts +++ b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts @@ -41,7 +41,7 @@ export class WindowsNativeRecordingSession implements CursorRecordingSession { this.assets.clear(); this.samples = []; this.lineBuffer = ""; - this.startTimeMs = Date.now(); + this.startTimeMs = this.options.startTimeMs ?? Date.now(); this.sampleCount = 0; this.outOfBoundsSampleCount = 0; diff --git a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.types.ts b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.types.ts index fdc4ab9..5afc012 100644 --- a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.types.ts +++ b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.types.ts @@ -49,4 +49,5 @@ export interface WindowsNativeRecordingSessionOptions { maxSamples: number; sampleIntervalMs: number; sourceId?: string | null; + startTimeMs?: number; } diff --git a/scripts/capture-openscreen-preview.mjs b/scripts/capture-openscreen-preview.mjs index 6c9b6eb..25f86db 100644 --- a/scripts/capture-openscreen-preview.mjs +++ b/scripts/capture-openscreen-preview.mjs @@ -214,10 +214,6 @@ try { await editorWindow.waitForLoadState("domcontentloaded"); await editorWindow.waitForSelector("video", { state: "attached", timeout: 30_000 }); await editorWindow.waitForSelector("canvas", { state: "attached", timeout: 30_000 }); - await editorWindow.waitForSelector('img[aria-hidden="true"]', { - state: "attached", - timeout: 30_000, - }); await editorWindow.setViewportSize({ width: 1280, height: 800 }); await editorWindow.evaluate(async () => { diff --git a/src/components/video-editor/VideoPlayback.tsx b/src/components/video-editor/VideoPlayback.tsx index 0586e54..840101d 100644 --- a/src/components/video-editor/VideoPlayback.tsx +++ b/src/components/video-editor/VideoPlayback.tsx @@ -27,7 +27,7 @@ import { } from "@/lib/compositeLayout"; import { hasNativeCursorRecordingData, - projectNativeCursorToStage, + projectNativeCursorToLocal, resolveInterpolatedNativeCursorFrame, resolveNativeCursorRenderAsset, } from "@/lib/cursor/nativeCursor"; @@ -841,6 +841,12 @@ const VideoPlayback = forwardRef( cursorContainerRef.current = cursorContainer; cameraContainer.addChild(cursorContainer); + const nativeCursorSprite = new Sprite(Texture.EMPTY); + nativeCursorSprite.visible = false; + nativeCursorSprite.eventMode = "none"; + nativeCursorSpriteRef.current = nativeCursorSprite; + cursorContainer.addChild(nativeCursorSprite); + // Cursor overlay - rendered above the masked video if (cursorOverlayEnabled) { const cursorOverlay = new PixiCursorOverlay({ @@ -863,6 +869,8 @@ const VideoPlayback = forwardRef( cursorOverlayRef.current.destroy(); cursorOverlayRef.current = null; } + nativeCursorSpriteRef.current = null; + nativeCursorTextureIdRef.current = null; if (app && app.renderer) { app.destroy(true, { children: true, @@ -1296,25 +1304,18 @@ const VideoPlayback = forwardRef( ); } - // Update native cursor image position at ticker rate (60fps) - const nativeCursorImg = nativeCursorImgRef.current; - if (nativeCursorImg) { - const cameraContainerRc = cameraContainerRef.current; + // Update native cursor sprite in the same PIXI coordinate space as the video. + const nativeCursorSprite = nativeCursorSpriteRef.current; + if (nativeCursorSprite) { const videoContainerRc = videoContainerRef.current; - if ( - hasNativeCursorRecordingRef.current && - showCursorRef.current && - cameraContainerRc && - videoContainerRc - ) { + if (hasNativeCursorRecordingRef.current && showCursorRef.current && videoContainerRc) { const timeMs = currentTimeRef.current; // already in ms const frame = resolveInterpolatedNativeCursorFrame( cursorRecordingDataRef.current, timeMs, ); if (frame) { - const projectedPoint = projectNativeCursorToStage({ - cameraContainer: cameraContainerRc, + const projectedPoint = projectNativeCursorToLocal({ cropRegion: cropRegionRef.current ?? { x: 0, y: 0, width: 1, height: 1 }, maskRect: baseMaskRef.current, videoContainerPosition: { @@ -1330,23 +1331,25 @@ const VideoPlayback = forwardRef( frame.sample, ); const scale = Math.max(0, cursorSizeRef.current); - if (nativeCursorImg.dataset.cursorId !== renderAsset.id) { - nativeCursorImg.src = renderAsset.imageDataUrl; - nativeCursorImg.dataset.cursorId = renderAsset.id; + if (nativeCursorTextureIdRef.current !== renderAsset.id) { + nativeCursorSprite.texture = Texture.from(renderAsset.imageDataUrl); + nativeCursorTextureIdRef.current = renderAsset.id; } - nativeCursorImg.style.left = `${projectedPoint.x - renderAsset.hotspotX * scale}px`; - nativeCursorImg.style.top = `${projectedPoint.y - renderAsset.hotspotY * scale}px`; - nativeCursorImg.style.width = `${renderAsset.width * scale}px`; - nativeCursorImg.style.height = `${renderAsset.height * scale}px`; - nativeCursorImg.style.display = "block"; + nativeCursorSprite.position.set( + projectedPoint.x - renderAsset.hotspotX * scale, + projectedPoint.y - renderAsset.hotspotY * scale, + ); + nativeCursorSprite.width = renderAsset.width * scale; + nativeCursorSprite.height = renderAsset.height * scale; + nativeCursorSprite.visible = true; } else { - nativeCursorImg.style.display = "none"; + nativeCursorSprite.visible = false; } } else { - nativeCursorImg.style.display = "none"; + nativeCursorSprite.visible = false; } } else { - nativeCursorImg.style.display = "none"; + nativeCursorSprite.visible = false; } } @@ -1638,19 +1641,6 @@ const VideoPlayback = forwardRef( className="absolute rounded-md border border-[#34B27B]/80 bg-[#34B27B]/20 shadow-[0_0_0_1px_rgba(52,178,123,0.35)]" style={{ display: "none", pointerEvents: "none" }} /> - {hasNativeCursorRecording ? ( - - ) : null} {(() => { const filteredAnnotations = (annotationRegions || []).filter((annotation) => { if ( diff --git a/src/hooks/useScreenRecorder.ts b/src/hooks/useScreenRecorder.ts index 7cd86a7..717a6cd 100644 --- a/src/hooks/useScreenRecorder.ts +++ b/src/hooks/useScreenRecorder.ts @@ -719,6 +719,8 @@ export function useScreenRecorder(): UseScreenRecorderReturn { return; } + recordingId.current = Date.now(); + const activeRecordingId = recordingId.current; screenRecorder.current = createRecorderHandle(stream.current, { mimeType, videoBitsPerSecond, @@ -741,9 +743,8 @@ export function useScreenRecorder(): UseScreenRecorderReturn { }); } - recordingId.current = Date.now(); accumulatedDurationMs.current = 0; - segmentStartedAt.current = Date.now(); + segmentStartedAt.current = activeRecordingId; allowAutoFinalize.current = true; setRecording(true); setPaused(false); @@ -752,7 +753,6 @@ export function useScreenRecorder(): UseScreenRecorderReturn { const activeScreenRecorder = screenRecorder.current; const activeWebcamRecorder = webcamRecorder.current; - const activeRecordingId = recordingId.current; if (activeScreenRecorder) { activeScreenRecorder.recorder.addEventListener( "stop", diff --git a/src/lib/cursor/nativeCursor.ts b/src/lib/cursor/nativeCursor.ts index 30f7a46..5c38c4d 100644 --- a/src/lib/cursor/nativeCursor.ts +++ b/src/lib/cursor/nativeCursor.ts @@ -27,13 +27,16 @@ export interface ActiveNativeCursorFrame { } interface ProjectNativeCursorOptions { - cameraContainer: Container; cropRegion: CropRegion; maskRect: { x: number; y: number; width: number; height: number }; videoContainerPosition: { x: number; y: number }; sample: CursorRecordingSample; } +interface ProjectNativeCursorToStageOptions extends ProjectNativeCursorOptions { + cameraContainer: Container; +} + function clamp(value: number, min: number, max: number) { return Math.min(max, Math.max(min, value)); } @@ -264,8 +267,7 @@ export function resolveInterpolatedNativeCursorFrame( }; } -export function projectNativeCursorToStage({ - cameraContainer, +export function projectNativeCursorToLocal({ cropRegion, maskRect, videoContainerPosition, @@ -276,11 +278,20 @@ export function projectNativeCursorToStage({ return null; } - const localPoint = new Point( + return new Point( videoContainerPosition.x + maskRect.x + croppedPosition.cx * maskRect.width, videoContainerPosition.y + maskRect.y + croppedPosition.cy * maskRect.height, ); +} +export function projectNativeCursorToStage({ + cameraContainer, + ...options +}: ProjectNativeCursorToStageOptions) { + const localPoint = projectNativeCursorToLocal(options); + if (!localPoint) { + return null; + } return cameraContainer.toGlobal(localPoint); } From d21e5eb34c6387ed9185503e6606d7f63b748f41 Mon Sep 17 00:00:00 2001 From: EtienneLescot Date: Tue, 5 May 2026 11:38:50 +0200 Subject: [PATCH 10/43] fix: restore native cursor preview and export --- src/components/video-editor/VideoPlayback.tsx | 127 ++++++++++++------ src/lib/cursor/nativeCursor.ts | 26 +++- src/lib/exporter/frameRenderer.ts | 24 ++-- 3 files changed, 116 insertions(+), 61 deletions(-) diff --git a/src/components/video-editor/VideoPlayback.tsx b/src/components/video-editor/VideoPlayback.tsx index 840101d..432958a 100644 --- a/src/components/video-editor/VideoPlayback.tsx +++ b/src/components/video-editor/VideoPlayback.tsx @@ -28,6 +28,7 @@ import { import { hasNativeCursorRecordingData, projectNativeCursorToLocal, + projectNativeCursorToStage, resolveInterpolatedNativeCursorFrame, resolveNativeCursorRenderAsset, } from "@/lib/cursor/nativeCursor"; @@ -243,7 +244,6 @@ const VideoPlayback = forwardRef( const videoSpriteRef = useRef(null); const videoContainerRef = useRef(null); const cameraContainerRef = useRef(null); - const cursorContainerRef = useRef(null); const timeUpdateAnimationRef = useRef(null); const [pixiReady, setPixiReady] = useState(false); const [videoReady, setVideoReady] = useState(false); @@ -836,17 +836,6 @@ const VideoPlayback = forwardRef( videoContainerRef.current = videoContainer; cameraContainer.addChild(videoContainer); - // Cursor container - rendered above video - const cursorContainer = new Container(); - cursorContainerRef.current = cursorContainer; - cameraContainer.addChild(cursorContainer); - - const nativeCursorSprite = new Sprite(Texture.EMPTY); - nativeCursorSprite.visible = false; - nativeCursorSprite.eventMode = "none"; - nativeCursorSpriteRef.current = nativeCursorSprite; - cursorContainer.addChild(nativeCursorSprite); - // Cursor overlay - rendered above the masked video if (cursorOverlayEnabled) { const cursorOverlay = new PixiCursorOverlay({ @@ -856,7 +845,6 @@ const VideoPlayback = forwardRef( clickBounce: cursorClickBounceRef.current, }); cursorOverlayRef.current = cursorOverlay; - cursorContainer.addChild(cursorOverlay.container); } setPixiReady(true); @@ -871,6 +859,7 @@ const VideoPlayback = forwardRef( } nativeCursorSpriteRef.current = null; nativeCursorTextureIdRef.current = null; + nativeCursorImageIdRef.current = null; if (app && app.renderer) { app.destroy(true, { children: true, @@ -881,7 +870,6 @@ const VideoPlayback = forwardRef( appRef.current = null; cameraContainerRef.current = null; videoContainerRef.current = null; - cursorContainerRef.current = null; videoSpriteRef.current = null; }; }, []); @@ -920,9 +908,8 @@ const VideoPlayback = forwardRef( const video = videoRef.current; const app = appRef.current; const videoContainer = videoContainerRef.current; - const cursorContainer = cursorContainerRef.current; - if (!video || !app || !videoContainer || !cursorContainer) return; + if (!video || !app || !videoContainer) return; if (video.videoWidth === 0 || video.videoHeight === 0) return; const source = VideoSource.from(video); @@ -942,8 +929,12 @@ const VideoPlayback = forwardRef( videoContainer.addChild(maskGraphics); videoContainer.mask = maskGraphics; maskGraphicsRef.current = maskGraphics; + const nativeCursorSprite = new Sprite(Texture.EMPTY); + nativeCursorSprite.visible = false; + nativeCursorSprite.eventMode = "none"; + nativeCursorSpriteRef.current = nativeCursorSprite; if (cursorOverlayRef.current) { - cursorContainer.addChild(cursorOverlayRef.current.container); + videoContainer.addChild(cursorOverlayRef.current.container); } const cursorHighlightGraphics = new Graphics(); @@ -951,6 +942,7 @@ const VideoPlayback = forwardRef( videoContainer.addChild(cursorHighlightGraphics); cursorHighlightGraphicsRef.current = cursorHighlightGraphics; drawCursorHighlightGraphics(cursorHighlightGraphics, cursorHighlightRef.current); + videoContainer.addChild(nativeCursorSprite); animationStateRef.current = { scale: 1, @@ -1019,6 +1011,12 @@ const VideoPlayback = forwardRef( cursorHighlightGraphicsRef.current.destroy(); cursorHighlightGraphicsRef.current = null; } + if (nativeCursorSpriteRef.current) { + videoContainer.removeChild(nativeCursorSpriteRef.current); + nativeCursorSpriteRef.current.destroy(); + nativeCursorSpriteRef.current = null; + nativeCursorTextureIdRef.current = null; + } videoContainer.mask = null; maskGraphicsRef.current = null; if (blurFilterRef.current) { @@ -1304,53 +1302,88 @@ const VideoPlayback = forwardRef( ); } - // Update native cursor sprite in the same PIXI coordinate space as the video. + // Keep the native cursor preview in the same transformed coordinate space as PIXI. const nativeCursorSprite = nativeCursorSpriteRef.current; - if (nativeCursorSprite) { - const videoContainerRc = videoContainerRef.current; - if (hasNativeCursorRecordingRef.current && showCursorRef.current && videoContainerRc) { + const nativeCursorImage = nativeCursorImageRef.current; + const hideNativeCursorPreview = () => { + if (nativeCursorSprite) { + nativeCursorSprite.visible = false; + } + if (nativeCursorImage) { + nativeCursorImage.style.display = "none"; + } + }; + if (nativeCursorImage) { + if (hasNativeCursorRecordingRef.current && showCursorRef.current) { const timeMs = currentTimeRef.current; // already in ms const frame = resolveInterpolatedNativeCursorFrame( cursorRecordingDataRef.current, timeMs, ); if (frame) { - const projectedPoint = projectNativeCursorToLocal({ - cropRegion: cropRegionRef.current ?? { x: 0, y: 0, width: 1, height: 1 }, + const cameraContainer = cameraContainerRef.current; + const videoContainer = videoContainerRef.current; + const cropRegionValue = cropRegionRef.current ?? { x: 0, y: 0, width: 1, height: 1 }; + const projectedLocalPoint = projectNativeCursorToLocal({ + cropRegion: cropRegionValue, maskRect: baseMaskRef.current, - videoContainerPosition: { - x: videoContainerRc.x, - y: videoContainerRc.y, - }, sample: frame.sample, }); - if (projectedPoint) { + const projectedStagePoint = + cameraContainer && videoContainer + ? projectNativeCursorToStage({ + cameraContainer, + cropRegion: cropRegionValue, + maskRect: baseMaskRef.current, + videoContainerPosition: { + x: videoContainer.x, + y: videoContainer.y, + }, + sample: frame.sample, + }) + : null; + if (projectedLocalPoint && projectedStagePoint) { const renderAsset = resolveNativeCursorRenderAsset( frame.asset, window.devicePixelRatio || 1, frame.sample, ); const scale = Math.max(0, cursorSizeRef.current); - if (nativeCursorTextureIdRef.current !== renderAsset.id) { - nativeCursorSprite.texture = Texture.from(renderAsset.imageDataUrl); - nativeCursorTextureIdRef.current = renderAsset.id; + const transformedScale = scale * Math.abs(cameraContainer?.scale.x || 1); + if (nativeCursorImageIdRef.current !== renderAsset.id) { + nativeCursorImage.src = renderAsset.imageDataUrl; + nativeCursorImageIdRef.current = renderAsset.id; + } + nativeCursorImage.style.display = "block"; + nativeCursorImage.style.width = `${renderAsset.width * transformedScale}px`; + nativeCursorImage.style.height = `${renderAsset.height * transformedScale}px`; + nativeCursorImage.style.transform = `translate3d(${ + projectedStagePoint.x - renderAsset.hotspotX * transformedScale + }px, ${projectedStagePoint.y - renderAsset.hotspotY * transformedScale}px, 0)`; + if (nativeCursorSprite) { + nativeCursorSprite.visible = false; + if (nativeCursorTextureIdRef.current !== renderAsset.id) { + nativeCursorSprite.texture = Texture.from(renderAsset.imageDataUrl); + nativeCursorTextureIdRef.current = renderAsset.id; + } + nativeCursorSprite.position.set( + projectedLocalPoint.x - renderAsset.hotspotX * scale, + projectedLocalPoint.y - renderAsset.hotspotY * scale, + ); + nativeCursorSprite.width = renderAsset.width * scale; + nativeCursorSprite.height = renderAsset.height * scale; } - nativeCursorSprite.position.set( - projectedPoint.x - renderAsset.hotspotX * scale, - projectedPoint.y - renderAsset.hotspotY * scale, - ); - nativeCursorSprite.width = renderAsset.width * scale; - nativeCursorSprite.height = renderAsset.height * scale; - nativeCursorSprite.visible = true; } else { - nativeCursorSprite.visible = false; + hideNativeCursorPreview(); } } else { - nativeCursorSprite.visible = false; + hideNativeCursorPreview(); } } else { - nativeCursorSprite.visible = false; + hideNativeCursorPreview(); } + } else { + hideNativeCursorPreview(); } const composite3D = composite3DRef.current; @@ -1584,6 +1617,18 @@ const VideoPlayback = forwardRef( : "none", }} /> + {webcamVideoPath && (() => { const clipPath = getCssClipPath(webcamLayout?.maskShape ?? "rectangle"); diff --git a/src/lib/cursor/nativeCursor.ts b/src/lib/cursor/nativeCursor.ts index 5c38c4d..3b62cb8 100644 --- a/src/lib/cursor/nativeCursor.ts +++ b/src/lib/cursor/nativeCursor.ts @@ -29,12 +29,12 @@ export interface ActiveNativeCursorFrame { interface ProjectNativeCursorOptions { cropRegion: CropRegion; maskRect: { x: number; y: number; width: number; height: number }; - videoContainerPosition: { x: number; y: number }; sample: CursorRecordingSample; } interface ProjectNativeCursorToStageOptions extends ProjectNativeCursorOptions { cameraContainer: Container; + videoContainerPosition: { x: number; y: number }; } function clamp(value: number, min: number, max: number) { @@ -179,6 +179,15 @@ function getCroppedCursorPosition(sample: CursorRecordingSample, cropRegion: Cro }; } +function getNativeCursorMaskPoint(sample: CursorRecordingSample, cropRegion: CropRegion) { + const croppedPosition = getCroppedCursorPosition(sample, cropRegion); + if (!croppedPosition) { + return null; + } + + return new Point(croppedPosition.cx, croppedPosition.cy); +} + export function resolveActiveNativeCursorFrame( recordingData: CursorRecordingData | null | undefined, timeMs: number, @@ -270,29 +279,32 @@ export function resolveInterpolatedNativeCursorFrame( export function projectNativeCursorToLocal({ cropRegion, maskRect, - videoContainerPosition, sample, }: ProjectNativeCursorOptions) { - const croppedPosition = getCroppedCursorPosition(sample, cropRegion); - if (!croppedPosition) { + const maskPoint = getNativeCursorMaskPoint(sample, cropRegion); + if (!maskPoint) { return null; } return new Point( - videoContainerPosition.x + maskRect.x + croppedPosition.cx * maskRect.width, - videoContainerPosition.y + maskRect.y + croppedPosition.cy * maskRect.height, + maskRect.x + maskPoint.x * maskRect.width, + maskRect.y + maskPoint.y * maskRect.height, ); } export function projectNativeCursorToStage({ cameraContainer, + videoContainerPosition, ...options }: ProjectNativeCursorToStageOptions) { const localPoint = projectNativeCursorToLocal(options); if (!localPoint) { return null; } - return cameraContainer.toGlobal(localPoint); + + return cameraContainer.toGlobal( + new Point(localPoint.x + videoContainerPosition.x, localPoint.y + videoContainerPosition.y), + ); } export function getNativeCursorDisplayMetrics(asset: NativeCursorAsset, deviceScaleFactor: number) { diff --git a/src/lib/exporter/frameRenderer.ts b/src/lib/exporter/frameRenderer.ts index a1e20cc..7f7513c 100644 --- a/src/lib/exporter/frameRenderer.ts +++ b/src/lib/exporter/frameRenderer.ts @@ -57,7 +57,7 @@ import { type StyledRenderRect, } from "@/lib/compositeLayout"; import { - projectNativeCursorToStage, + projectNativeCursorToLocal, resolveInterpolatedNativeCursorFrame, resolveNativeCursorRenderAsset, } from "@/lib/cursor/nativeCursor"; @@ -555,7 +555,7 @@ export class FrameRenderer { } private async drawNativeCursor(timeMs: number) { - if (!this.compositeCtx || !this.cameraContainer || !this.videoContainer || !this.layoutCache) { + if (!this.foregroundCtx || !this.layoutCache) { return; } @@ -571,14 +571,9 @@ export class FrameRenderer { return; } - const projectedPoint = projectNativeCursorToStage({ - cameraContainer: this.cameraContainer, + const projectedPoint = projectNativeCursorToLocal({ cropRegion: this.config.cropRegion, maskRect: this.layoutCache.maskRect, - videoContainerPosition: { - x: this.videoContainer.x, - y: this.videoContainer.y, - }, sample: activeNativeCursor.sample, }); if (!projectedPoint) { @@ -592,12 +587,15 @@ export class FrameRenderer { ); const image = await this.getCursorImage(renderAsset); const scale = Math.max(0, this.config.cursorScale ?? 1); - this.compositeCtx.drawImage( + const appliedScale = this.animationState.appliedScale; + const canvasX = projectedPoint.x * appliedScale + this.animationState.x; + const canvasY = projectedPoint.y * appliedScale + this.animationState.y; + this.foregroundCtx.drawImage( image, - projectedPoint.x - renderAsset.hotspotX * scale, - projectedPoint.y - renderAsset.hotspotY * scale, - renderAsset.width * scale, - renderAsset.height * scale, + canvasX - renderAsset.hotspotX * scale * appliedScale, + canvasY - renderAsset.hotspotY * scale * appliedScale, + renderAsset.width * scale * appliedScale, + renderAsset.height * scale * appliedScale, ); } From 062cf2a87c41dca6ad7ecc0e114fc8347ebcb3fb Mon Sep 17 00:00:00 2001 From: EtienneLescot Date: Tue, 5 May 2026 16:07:07 +0200 Subject: [PATCH 11/43] feat: add native Windows recorder helper --- .gitignore | 98 ++-- .../windows-native-recorder-roadmap.md | 202 +++++++ docs/testing/windows-native-cursor.md | 34 ++ electron-builder.json5 | 18 +- electron/electron-env.d.ts | 18 + electron/ipc/handlers.ts | 543 ++++++++++++++++-- electron/native/README.md | 50 ++ electron/native/wgc-capture/CMakeLists.txt | 45 ++ electron/native/wgc-capture/src/main.cpp | 433 ++++++++++++++ .../native/wgc-capture/src/mf_encoder.cpp | 317 ++++++++++ electron/native/wgc-capture/src/mf_encoder.h | 63 ++ .../native/wgc-capture/src/monitor_utils.cpp | 88 +++ .../native/wgc-capture/src/monitor_utils.h | 14 + .../src/wasapi_loopback_capture.cpp | 205 +++++++ .../wgc-capture/src/wasapi_loopback_capture.h | 47 ++ .../native/wgc-capture/src/wgc_session.cpp | 223 +++++++ electron/native/wgc-capture/src/wgc_session.h | 55 ++ electron/preload.ts | 10 + package.json | 13 +- scripts/build-windows-wgc-helper.mjs | 112 ++++ scripts/test-windows-wgc-helper.mjs | 167 ++++++ src/components/video-editor/VideoPlayback.tsx | 1 - src/hooks/useScreenRecorder.ts | 167 +++++- src/lib/cursor/nativeCursor.ts | 18 +- src/lib/exporter/audioEncoder.ts | 28 +- src/lib/exporter/muxer.ts | 2 +- src/lib/nativeWindowsRecording.ts | 41 ++ 27 files changed, 2873 insertions(+), 139 deletions(-) create mode 100644 docs/engineering/windows-native-recorder-roadmap.md create mode 100644 electron/native/README.md create mode 100644 electron/native/wgc-capture/CMakeLists.txt create mode 100644 electron/native/wgc-capture/src/main.cpp create mode 100644 electron/native/wgc-capture/src/mf_encoder.cpp create mode 100644 electron/native/wgc-capture/src/mf_encoder.h create mode 100644 electron/native/wgc-capture/src/monitor_utils.cpp create mode 100644 electron/native/wgc-capture/src/monitor_utils.h create mode 100644 electron/native/wgc-capture/src/wasapi_loopback_capture.cpp create mode 100644 electron/native/wgc-capture/src/wasapi_loopback_capture.h create mode 100644 electron/native/wgc-capture/src/wgc_session.cpp create mode 100644 electron/native/wgc-capture/src/wgc_session.h create mode 100644 scripts/build-windows-wgc-helper.mjs create mode 100644 scripts/test-windows-wgc-helper.mjs create mode 100644 src/lib/nativeWindowsRecording.ts diff --git a/.gitignore b/.gitignore index 494da30..84a5a1c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,48 +1,52 @@ -# Logs -logs -*.log -npm-debug.log* -yarn-debug.log* -yarn-error.log* -pnpm-debug.log* -lerna-debug.log* - -node_modules -dist -dist-electron -dist-ssr -*.local -.env - -# Editor directories and files -.vscode/* -.zed/ -!.vscode/extensions.json -.idea -.DS_Store -*.suo -*.ntvs* -*.njsproj -*.sln -*.sw? -release/** -*.kiro/ -.claude/ -# npx electron-builder --mac --win - -# Playwright -test-results -playwright-report/ - -# Vitest browser mode screenshots -__screenshots__/ - -# shell files -/shell.sh -# Nix -result -result-* -.direnv/ - -#kilocode +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +lerna-debug.log* + +node_modules +dist +dist-electron +dist-ssr +*.local +.env + +# Native helper build outputs +/electron/native/wgc-capture/build/ +/electron/native/bin/ + +# Editor directories and files +.vscode/* +.zed/ +!.vscode/extensions.json +.idea +.DS_Store +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? +release/** +*.kiro/ +.claude/ +# npx electron-builder --mac --win + +# Playwright +test-results +playwright-report/ + +# Vitest browser mode screenshots +__screenshots__/ + +# shell files +/shell.sh +# Nix +result +result-* +.direnv/ + +#kilocode .kilo/ \ No newline at end of file diff --git a/docs/engineering/windows-native-recorder-roadmap.md b/docs/engineering/windows-native-recorder-roadmap.md new file mode 100644 index 0000000..c6a0a06 --- /dev/null +++ b/docs/engineering/windows-native-recorder-roadmap.md @@ -0,0 +1,202 @@ +# Windows Native Recorder Roadmap + +OpenScreen's Windows recorder should be owned by one native backend. Electron capture can remain available for non-Windows platforms and temporary developer diagnostics, but Windows production recording should not silently fall back to `getDisplayMedia` / `MediaRecorder`. + +## Goals + +- Capture displays and windows through Windows Graphics Capture (WGC). +- Render the native Windows cursor as OpenScreen's high-quality scalable cursor overlay. +- Capture system audio through WASAPI loopback. +- Capture microphone audio through WASAPI. +- Mix system audio and microphone audio into the primary screen recording. +- Capture webcam video natively and keep it as a separate editable OpenScreen media stream. +- Keep preview/export aligned because screen video, audio, webcam, and cursor share one native timing origin. +- Keep exported MP4s Windows-friendly: H.264 video plus AAC audio. Opus-in-MP4 is not an acceptable Windows export target. +- Package the native helper with the Windows app. + +## Non-Goals + +- Replacing the editor/export pipeline. +- Flattening webcam into the screen recording. The editor currently treats webcam as editable picture-in-picture media, so the native recorder should preserve a separate `webcamVideoPath`. +- Adding a native fallback for macOS or Linux in this branch. + +## Target Architecture + +The renderer keeps the existing recording controls. On Windows, `useScreenRecorder` sends a complete recording request to Electron and does not assemble Windows `MediaStream` tracks with `MediaRecorder`. + +Electron owns the native recording session: + +- resolves the selected source; +- resolves output paths; +- starts cursor sampling; +- starts the helper process; +- sends pause/resume/stop/cancel commands; +- writes `RecordingSession` manifests; +- reports explicit errors when a Windows-native capability is unavailable. + +The helper owns Windows media capture: + +- WGC screen/window frames; +- WASAPI system loopback; +- WASAPI microphone input; +- Media Foundation webcam capture; +- Media Foundation encoding/muxing; +- stream timestamp normalization. + +## Helper Contract V2 + +The helper receives a single JSON argument: + +```json +{ + "schemaVersion": 2, + "recordingId": 1234567890, + "source": { + "type": "display", + "sourceId": "screen:0:0", + "displayId": 123, + "windowHandle": null, + "bounds": { "x": 0, "y": 0, "width": 1920, "height": 1080 } + }, + "video": { + "fps": 60, + "width": 1920, + "height": 1080, + "bitrate": 18000000 + }, + "audio": { + "system": { "enabled": true }, + "microphone": { "enabled": true, "deviceId": "default", "gain": 1.4 } + }, + "webcam": { + "enabled": true, + "deviceId": "default", + "width": 1280, + "height": 720, + "fps": 30, + "bitrate": 18000000 + }, + "outputs": { + "screenPath": "C:\\Users\\me\\recording-123.mp4", + "webcamPath": "C:\\Users\\me\\recording-123-webcam.mp4", + "manifestPath": "C:\\Users\\me\\recording-123.session.json" + } +} +``` + +The helper emits newline-delimited JSON events to stdout: + +```json +{ "event": "ready", "schemaVersion": 2 } +{ "event": "recording-started", "timestampMs": 1234567890 } +{ "event": "warning", "code": "audio-device-unavailable", "message": "..." } +{ "event": "recording-stopped", "screenPath": "...", "webcamPath": "..." } +{ "event": "error", "code": "unsupported-window-source", "message": "..." } +``` + +During migration, Electron also accepts the current textual helper messages so existing display-only smoke tests keep working. + +## Implementation Phases + +### 1. Native Session Boundary + +- Add a structured Windows native recording request type. +- Pass source kind, audio flags, microphone device, webcam flags, and output paths into the helper. +- On Windows, do not silently fall back to Electron capture. If the helper is unavailable or a native feature is missing, show a clear error. +- Keep Electron fallback only for non-Windows and optional developer diagnostics. + +Acceptance: + +- Display-only recording still works. +- Enabling an unsupported native feature returns an explicit native error instead of recording through Electron. + +### 2. WASAPI System Audio + +Status: initial implementation landed. The helper captures the default render endpoint with WASAPI loopback, passes the runtime mix format into `MFEncoder`, and muxes AAC audio into the primary MP4. Long-run drift correction and explicit silence insertion remain follow-up hardening work. + +- Add `WasapiLoopbackCapture`. +- Capture the default render endpoint in shared loopback mode. +- Keep `WasapiLoopbackCapture` responsible only for device activation, packet capture, and packet timestamps. +- Keep `MFEncoder` responsible for all Media Foundation stream definitions and muxing. +- Feed the endpoint mix format into `MFEncoder` as the single source of truth for audio stream shape: sample rate, channel count, bits per sample, block alignment, average bytes/sec, and subtype (`PCM` or `Float`). +- Encode the primary screen MP4 with H.264 video and AAC audio through one `IMFSinkWriter`. +- Timestamp audio from the captured frame count in 100ns units. The first implementation uses the WASAPI packet timeline; later drift correction will add explicit silence or resampling if long recordings show measurable clock skew. +- Treat microphone mixing as a later phase. System loopback must land first without introducing renderer-side audio code. + +Acceptance: + +- Screen MP4 has an AAC audio track when system audio is enabled. +- A 5-minute recording has audio/video duration drift below one frame. + +SSOT rules for this phase: + +- `src/lib/nativeWindowsRecording.ts` is the renderer/main TypeScript request contract. +- `docs/engineering/windows-native-recorder-roadmap.md` is the feature-level contract and phase checklist. +- `WgcSession::captureWidth()/captureHeight()` is the encoded screen frame size until a dedicated native scaling stage exists. +- `WasapiLoopbackCapture::inputFormat()` is the runtime audio format source used by `MFEncoder`. +- No duplicated hard-coded audio format assumptions in `main.cpp`. + +### 3. WASAPI Microphone + +- Add microphone device enumeration and stable device-id mapping. +- Capture selected/default microphone through WASAPI. +- Apply OpenScreen's current mic gain policy. +- Mix microphone and system audio before AAC encoding. + +Acceptance: + +- Mic-only, system-only, and mixed audio recordings produce a valid AAC track. +- Device unplug/permission failure produces an explicit error or warning. + +### 4. Webcam Capture + +- Add Media Foundation webcam source reader. +- Select 1280x720/30fps or nearest supported format. +- Encode webcam to `recording--webcam.mp4`. +- Synchronize webcam timestamps to the native session clock. +- Store `webcamVideoPath` in the OpenScreen session manifest. + +Acceptance: + +- Editor loads the native screen recording and the native webcam recording. +- Webcam layout controls behave the same as today. + +### 5. Native Window Capture + +- Resolve Electron `window:*` selections to an `HWND`. +- Use WGC `CreateForWindow(HWND)`. +- Handle window close, minimize, resize, DPI scaling, and monitor moves. +- Return clear errors for unsupported protected windows. + +Acceptance: + +- Capturing a normal app window works with cursor/audio/mic/webcam. +- Window resize and movement do not corrupt the recording. + +### 6. Runtime Controls + +- Add pause/resume commands to the helper. +- Add cancel command that removes partial screen/webcam outputs. +- Keep restart as stop-discard-start from Electron until the helper supports a native restart event. + +Acceptance: + +- Pause/resume keeps preview duration coherent. +- Cancel leaves no stale media/session/cursor files. + +### 7. Test Pipeline + +- `npm run test:wgc-helper:win`: display-only helper smoke test. +- `npm run test:wgc-audio:win`: validates AAC track presence and duration. +- `npm run test:wgc-window:win`: captures a fixture window by HWND. +- `npm run test:wgc-webcam:win`: validates webcam output when a webcam is available, otherwise skips explicitly. +- Packaging check: confirms the helper is in `app.asar.unpacked`. +- Export check: exported MP4s generated from native recordings keep an AAC audio track when the source has audio. + +## Ship Criteria + +- Windows display capture works with cursor, system audio, microphone, and webcam. +- Windows window capture works with cursor, system audio, microphone, and webcam. +- Preview and export show no cursor position drift. +- Preview and export show no measurable audio/video/webcam drift. +- Windows production builds do not depend on Electron capture fallback. diff --git a/docs/testing/windows-native-cursor.md b/docs/testing/windows-native-cursor.md index 1abe04e..23c57a9 100644 --- a/docs/testing/windows-native-cursor.md +++ b/docs/testing/windows-native-cursor.md @@ -83,3 +83,37 @@ Together, the scripts make it quick to inspect: - whether the real OpenScreen preview renders the same cursor behavior as the diagnostic pipeline They are not a full substitute for an end-to-end manual recording pass. Before shipping cursor changes, also test a real capture session and export from the packaged app. + +## Native Windows capture backend + +The app now routes Windows recordings through an external WGC helper instead of Electron `getDisplayMedia`. This is meant to remove the coordinate and clock split that made the reconstructed cursor drift in the preview/export path. + +Current native availability rules: + +- Windows 10 build 19041 or newer +- a helper executable is available + +The helper currently implements display video capture and system audio loopback. Window capture, microphone audio, and webcam capture are part of the native recorder roadmap and fail explicitly instead of silently falling back to Electron capture on Windows. + +Build OpenScreen's helper locally: + +```powershell +npm run build:native:win +``` + +Smoke-test the helper directly: + +```powershell +npm run test:wgc-helper:win +npm run test:wgc-audio:win +``` + +For local diagnostics with another compatible helper, point OpenScreen at that executable: + +```powershell +$env:OPENSCREEN_WGC_CAPTURE_EXE = "C:\path\to\wgc-capture.exe" +npm run build-vite +npm run dev +``` + +The helper receives one JSON config argument, emits JSON lifecycle events, prints the legacy `Recording started` marker, accepts `stop` on stdin, and prints `Recording stopped. Output path: `. See `electron/native/README.md` for the exact contract and build output paths. diff --git a/electron-builder.json5 b/electron-builder.json5 index 372cdf7..c11dc56 100644 --- a/electron-builder.json5 +++ b/electron-builder.json5 @@ -4,10 +4,11 @@ "appId": "com.siddharthvaddem.openscreen", "asar": true, // .node binaries can't be dlopen'd from inside an asar — must live unpacked. - "asarUnpack": [ - "node_modules/uiohook-napi/**/*", - "**/*.node" - ], + "asarUnpack": [ + "node_modules/uiohook-napi/**/*", + "**/*.node", + "electron/native/bin/**" + ], "productName": "Openscreen", "npmRebuild": true, "buildDependenciesFromSource": true, @@ -15,10 +16,11 @@ "directories": { "output": "release/${version}" }, - "files": [ - "dist", - "dist-electron", - "!*.png", + "files": [ + "dist", + "dist-electron", + "electron/native/bin/**/*", + "!*.png", "!preview*.png", "!*.md", "!README.md", diff --git a/electron/electron-env.d.ts b/electron/electron-env.d.ts index 686575d..eb28420 100644 --- a/electron/electron-env.d.ts +++ b/electron/electron-env.d.ts @@ -72,6 +72,24 @@ interface Window { error?: string; }>; setRecordingState: (recording: boolean, recordingId?: number) => Promise; + isNativeWindowsCaptureAvailable: () => Promise<{ + success: boolean; + available: boolean; + helperPath?: string; + reason?: string; + error?: string; + }>; + startNativeWindowsRecording: ( + request: import("../src/lib/nativeWindowsRecording").NativeWindowsRecordingRequest, + ) => Promise; + stopNativeWindowsRecording: (discard?: boolean) => Promise<{ + success: boolean; + path?: string; + session?: import("../src/lib/recordingSession").RecordingSession; + message?: string; + discarded?: boolean; + error?: string; + }>; discardCursorTelemetry: (recordingId: number) => Promise; getCursorTelemetry: (videoPath?: string) => Promise<{ success: boolean; diff --git a/electron/ipc/handlers.ts b/electron/ipc/handlers.ts index 4c306ee..de3b6b5 100644 --- a/electron/ipc/handlers.ts +++ b/electron/ipc/handlers.ts @@ -1,3 +1,5 @@ +import { type ChildProcessWithoutNullStreams, spawn } from "node:child_process"; +import { constants as fsConstants } from "node:fs"; import fs from "node:fs/promises"; import { createRequire } from "node:module"; import os from "node:os"; @@ -14,6 +16,7 @@ import { shell, systemPreferences, } from "electron"; +import type { NativeWindowsRecordingRequest } from "../../src/lib/nativeWindowsRecording"; import { normalizeProjectMedia, normalizeRecordingSession, @@ -36,6 +39,7 @@ import { registerNativeBridgeHandlers } from "./nativeBridge"; const PROJECT_FILE_EXTENSION = "openscreen"; const SHORTCUTS_FILE = path.join(app.getPath("userData"), "shortcuts.json"); +const RECORDING_FILE_PREFIX = "recording-"; const RECORDING_SESSION_SUFFIX = ".session.json"; const ALLOWED_IMPORT_VIDEO_EXTENSIONS = new Set([".webm", ".mp4", ".mov", ".avi", ".mkv"]); @@ -250,6 +254,12 @@ const MAX_CURSOR_SAMPLES = 60 * 60 * 30; // 1 hour @ 30Hz let cursorRecordingSession: CursorRecordingSession | null = null; let pendingCursorRecordingData: CursorRecordingData | null = null; +let nativeWindowsCaptureProcess: ChildProcessWithoutNullStreams | null = null; +let nativeWindowsCaptureOutput = ""; +let nativeWindowsCaptureTargetPath: string | null = null; +let nativeWindowsCaptureWebcamTargetPath: string | null = null; +let nativeWindowsCaptureRecordingId: number | null = null; +let nativeWindowsCursorOffsetMs = 0; function normalizeCursorSample(sample: unknown): CursorRecordingSample | null { if (!sample || typeof sample !== "object") { @@ -404,6 +414,215 @@ function getSelectedSourceId() { return typeof selectedSource?.id === "string" ? selectedSource.id : null; } +function getSelectedDisplay() { + const sourceDisplayId = Number(selectedSource?.display_id); + if (!Number.isFinite(sourceDisplayId)) { + return null; + } + + return screen.getAllDisplays().find((display) => display.id === sourceDisplayId) ?? null; +} + +function resolveUnpackedAppPath(...segments: string[]) { + const resolved = path.join(app.getAppPath(), ...segments); + if (app.isPackaged) { + return resolved.replace(/\.asar([/\\])/, ".asar.unpacked$1"); + } + + return resolved; +} + +function getNativeWindowsCaptureHelperCandidates() { + const envPath = process.env.OPENSCREEN_WGC_CAPTURE_EXE?.trim(); + const archTag = process.arch === "arm64" ? "win32-arm64" : "win32-x64"; + return [ + envPath, + resolveUnpackedAppPath( + "electron", + "native", + "wgc-capture", + "build", + "Release", + "wgc-capture.exe", + ), + resolveUnpackedAppPath("electron", "native", "wgc-capture", "build", "wgc-capture.exe"), + resolveUnpackedAppPath("electron", "native", "bin", archTag, "wgc-capture.exe"), + ].filter((candidate): candidate is string => Boolean(candidate)); +} + +async function findNativeWindowsCaptureHelperPath() { + if (process.platform !== "win32") { + return null; + } + + for (const candidate of getNativeWindowsCaptureHelperCandidates()) { + try { + await fs.access(candidate, fsConstants.X_OK); + return candidate; + } catch { + // Try the next configured helper location. + } + } + + return null; +} + +function isWindowsGraphicsCaptureOsSupported() { + if (process.platform !== "win32") { + return false; + } + + const [, , build] = process.getSystemVersion().split(".").map(Number); + return Number.isFinite(build) && build >= 19041; +} + +async function startCursorRecording(recordingId?: number) { + if (cursorRecordingSession) { + pendingCursorRecordingData = await cursorRecordingSession.stop(); + cursorRecordingSession = null; + } + + pendingCursorRecordingData = null; + cursorRecordingSession = createCursorRecordingSession({ + getDisplayBounds: getSelectedSourceBounds, + maxSamples: MAX_CURSOR_SAMPLES, + platform: process.platform, + sampleIntervalMs: CURSOR_SAMPLE_INTERVAL_MS, + sourceId: getSelectedSourceId(), + startTimeMs: + typeof recordingId === "number" && Number.isFinite(recordingId) ? recordingId : undefined, + }); + + try { + await cursorRecordingSession.start(); + } catch (error) { + console.error("Failed to start cursor recording session:", error); + cursorRecordingSession = null; + } +} + +async function stopCursorRecording() { + if (!cursorRecordingSession) { + return; + } + + try { + pendingCursorRecordingData = await cursorRecordingSession.stop(); + } catch (error) { + console.error("Failed to stop cursor recording session:", error); + pendingCursorRecordingData = null; + } finally { + cursorRecordingSession = null; + } +} + +async function writePendingCursorTelemetry(videoPath: string) { + const telemetryPath = `${videoPath}.cursor.json`; + if (pendingCursorRecordingData && pendingCursorRecordingData.samples.length > 0) { + await fs.writeFile(telemetryPath, JSON.stringify(pendingCursorRecordingData, null, 2), "utf-8"); + } + pendingCursorRecordingData = null; +} + +function shiftPendingCursorTelemetry(offsetMs: number) { + if (!pendingCursorRecordingData || !Number.isFinite(offsetMs) || offsetMs <= 0) { + return; + } + + pendingCursorRecordingData = { + ...pendingCursorRecordingData, + samples: pendingCursorRecordingData.samples + .map((sample) => ({ + ...sample, + timeMs: Math.max(0, sample.timeMs - offsetMs), + })) + .sort((a, b) => a.timeMs - b.timeMs), + }; +} + +function waitForNativeWindowsCaptureStart(proc: ChildProcessWithoutNullStreams) { + return new Promise((resolve, reject) => { + const timer = setTimeout(() => { + cleanup(); + reject(new Error("Timed out waiting for native Windows capture to start")); + }, 12000); + + const onOutput = (chunk: Buffer) => { + nativeWindowsCaptureOutput += chunk.toString(); + if (nativeWindowsCaptureOutput.includes("Recording started")) { + cleanup(); + resolve(); + } + }; + const onError = (error: Error) => { + cleanup(); + reject(error); + }; + const onExit = (code: number | null) => { + cleanup(); + reject( + new Error( + nativeWindowsCaptureOutput.trim() || + `Native Windows capture exited before recording started (code=${code ?? "unknown"})`, + ), + ); + }; + const cleanup = () => { + clearTimeout(timer); + proc.stdout.off("data", onOutput); + proc.stderr.off("data", onOutput); + proc.off("error", onError); + proc.off("exit", onExit); + }; + + proc.stdout.on("data", onOutput); + proc.stderr.on("data", onOutput); + proc.once("error", onError); + proc.once("exit", onExit); + }); +} + +function waitForNativeWindowsCaptureStop(proc: ChildProcessWithoutNullStreams) { + return new Promise((resolve, reject) => { + const onOutput = (chunk: Buffer) => { + nativeWindowsCaptureOutput += chunk.toString(); + }; + const onClose = (code: number | null) => { + cleanup(); + const match = nativeWindowsCaptureOutput.match(/Recording stopped\. Output path: (.+)/); + if (match?.[1]) { + resolve(match[1].trim()); + return; + } + if (code === 0 && nativeWindowsCaptureTargetPath) { + resolve(nativeWindowsCaptureTargetPath); + return; + } + reject( + new Error( + nativeWindowsCaptureOutput.trim() || + `Native Windows capture exited with code=${code ?? "unknown"}`, + ), + ); + }; + const onError = (error: Error) => { + cleanup(); + reject(error); + }; + const cleanup = () => { + proc.stdout.off("data", onOutput); + proc.stderr.off("data", onOutput); + proc.off("close", onClose); + proc.off("error", onError); + }; + + proc.stdout.on("data", onOutput); + proc.stderr.on("data", onOutput); + proc.once("close", onClose); + proc.once("error", onError); + }); +} + function setCurrentRecordingSessionState(session: RecordingSession | null) { currentRecordingSession = session; currentVideoPath = session?.screenVideoPath ?? null; @@ -412,10 +631,10 @@ function setCurrentRecordingSessionState(session: RecordingSession | null) { export function registerIpcHandlers( createEditorWindow: () => void, createSourceSelectorWindow: () => BrowserWindow, - _createCountdownOverlayWindow: () => BrowserWindow, + createCountdownOverlayWindow: () => BrowserWindow, getMainWindow: () => BrowserWindow | null, getSourceSelectorWindow: () => BrowserWindow | null, - _getCountdownOverlayWindow?: () => BrowserWindow | null, + getCountdownOverlayWindow?: () => BrowserWindow | null, onRecordingStateChange?: (recording: boolean, sourceName: string) => void, _switchToHud?: () => void, ) { @@ -553,6 +772,282 @@ export function registerIpcHandlers( createEditorWindow(); }); + ipcMain.handle("countdown-overlay-show", async (_, value: number, runId: number) => { + const overlayWindow = getCountdownOverlayWindow?.() ?? createCountdownOverlayWindow(); + if (overlayWindow.isDestroyed()) { + return; + } + + if (!overlayWindow.isVisible()) { + overlayWindow.showInactive(); + } + + if (overlayWindow.webContents.isLoading()) { + await new Promise((resolve) => { + overlayWindow.webContents.once("did-finish-load", () => resolve()); + }); + } + + overlayWindow.webContents.send("countdown-overlay-value", value, runId); + }); + + ipcMain.handle("countdown-overlay-set-value", (_, value: number, runId: number) => { + const overlayWindow = getCountdownOverlayWindow?.(); + if (!overlayWindow || overlayWindow.isDestroyed()) { + return; + } + + overlayWindow.webContents.send("countdown-overlay-value", value, runId); + }); + + ipcMain.handle("countdown-overlay-hide", (_, runId: number) => { + const overlayWindow = getCountdownOverlayWindow?.(); + if (!overlayWindow || overlayWindow.isDestroyed()) { + return; + } + + overlayWindow.webContents.send("countdown-overlay-value", null, runId); + overlayWindow.hide(); + }); + + ipcMain.handle("is-native-windows-capture-available", async () => { + if (!isWindowsGraphicsCaptureOsSupported()) { + return { success: true, available: false, reason: "unsupported-os" }; + } + + const helperPath = await findNativeWindowsCaptureHelperPath(); + return helperPath + ? { success: true, available: true, helperPath } + : { success: true, available: false, reason: "missing-helper" }; + }); + + ipcMain.handle( + "start-native-windows-recording", + async (_, request: NativeWindowsRecordingRequest) => { + try { + if (!isWindowsGraphicsCaptureOsSupported()) { + return { + success: false, + error: "Windows Graphics Capture requires Windows 10 build 19041 or newer.", + }; + } + if (nativeWindowsCaptureProcess) { + return { success: false, error: "Native Windows capture is already running." }; + } + + const helperPath = await findNativeWindowsCaptureHelperPath(); + if (!helperPath) { + return { success: false, error: "Native Windows capture helper is not available." }; + } + + if (!request?.source?.sourceId) { + return { + success: false, + error: "Native Windows capture request is missing a source.", + }; + } + + const recordingId = + typeof request.recordingId === "number" && Number.isFinite(request.recordingId) + ? request.recordingId + : Date.now(); + const outputPath = path.join(RECORDINGS_DIR, `${RECORDING_FILE_PREFIX}${recordingId}.mp4`); + const webcamOutputPath = path.join( + RECORDINGS_DIR, + `${RECORDING_FILE_PREFIX}${recordingId}-webcam.mp4`, + ); + const sourceDisplay = + request.source.type === "display" && typeof request.source.displayId === "number" + ? (screen.getAllDisplays().find((display) => display.id === request.source.displayId) ?? + null) + : getSelectedDisplay(); + const bounds = sourceDisplay?.bounds ?? getSelectedSourceBounds(); + const displayId = + typeof request.source.displayId === "number" && Number.isFinite(request.source.displayId) + ? request.source.displayId + : Number(selectedSource?.display_id); + const config = { + schemaVersion: 2, + recordingId, + outputPath, + sourceType: request.source.type, + sourceId: request.source.sourceId, + displayId: Number.isFinite(displayId) ? displayId : 0, + windowHandle: request.source.windowHandle ?? null, + fps: request.video.fps, + videoWidth: request.video.width, + videoHeight: request.video.height, + displayX: bounds.x, + displayY: bounds.y, + displayW: bounds.width, + displayH: bounds.height, + hasDisplayBounds: true, + captureSystemAudio: request.audio.system.enabled, + captureMic: request.audio.microphone.enabled, + microphoneDeviceId: request.audio.microphone.deviceId ?? null, + microphoneGain: request.audio.microphone.gain, + webcamEnabled: request.webcam.enabled, + webcamDeviceId: request.webcam.deviceId ?? null, + webcamWidth: request.webcam.width, + webcamHeight: request.webcam.height, + webcamFps: request.webcam.fps, + outputs: { + screenPath: outputPath, + webcamPath: webcamOutputPath, + }, + source: { + type: request.source.type, + sourceId: request.source.sourceId, + displayId: Number.isFinite(displayId) ? displayId : null, + windowHandle: request.source.windowHandle ?? null, + bounds, + }, + video: request.video, + audio: request.audio, + webcam: request.webcam, + }; + + console.info("[native-wgc] starting Windows capture", { + helperPath, + source: request.source, + audio: request.audio, + webcam: request.webcam, + bounds, + sourceId: selectedSource?.id ?? null, + usedDisplayMatch: Boolean(sourceDisplay), + outputPath, + }); + + await fs.mkdir(RECORDINGS_DIR, { recursive: true }); + nativeWindowsCaptureOutput = ""; + nativeWindowsCaptureTargetPath = outputPath; + nativeWindowsCaptureWebcamTargetPath = request.webcam.enabled ? webcamOutputPath : null; + nativeWindowsCaptureRecordingId = recordingId; + nativeWindowsCursorOffsetMs = 0; + + const cursorStartTimeMs = Date.now(); + await startCursorRecording(cursorStartTimeMs); + console.info("[native-wgc] cursor sampler ready", { + cursorStartTimeMs, + warmupMs: Date.now() - cursorStartTimeMs, + }); + + const proc = spawn(helperPath, [JSON.stringify(config)], { + cwd: RECORDINGS_DIR, + stdio: ["pipe", "pipe", "pipe"], + windowsHide: true, + }); + nativeWindowsCaptureProcess = proc; + + await waitForNativeWindowsCaptureStart(proc); + const captureStartedAtMs = Date.now(); + nativeWindowsCursorOffsetMs = Math.max(0, captureStartedAtMs - cursorStartTimeMs); + console.info("[native-wgc] capture started", { + captureStartedAtMs, + cursorOffsetMs: nativeWindowsCursorOffsetMs, + }); + + const source = selectedSource || { name: "Screen" }; + if (onRecordingStateChange) { + onRecordingStateChange(true, source.name); + } + + return { + success: true, + recordingId, + path: outputPath, + helperPath, + }; + } catch (error) { + console.error("Failed to start native Windows recording:", error); + nativeWindowsCaptureProcess?.kill(); + nativeWindowsCaptureProcess = null; + nativeWindowsCaptureTargetPath = null; + nativeWindowsCaptureWebcamTargetPath = null; + nativeWindowsCaptureRecordingId = null; + nativeWindowsCursorOffsetMs = 0; + await stopCursorRecording(); + return { success: false, error: String(error) }; + } + }, + ); + + ipcMain.handle("stop-native-windows-recording", async (_, discard?: boolean) => { + const proc = nativeWindowsCaptureProcess; + const preferredPath = nativeWindowsCaptureTargetPath; + const preferredWebcamPath = nativeWindowsCaptureWebcamTargetPath; + const recordingId = nativeWindowsCaptureRecordingId ?? Date.now(); + + if (!proc) { + return { success: false, error: "Native Windows capture is not running." }; + } + + try { + const stoppedPathPromise = waitForNativeWindowsCaptureStop(proc); + proc.stdin.write("stop\n"); + const stoppedPath = await stoppedPathPromise; + const screenVideoPath = stoppedPath || preferredPath; + if (!screenVideoPath) { + throw new Error("Native Windows capture did not return an output path."); + } + + await stopCursorRecording(); + if (discard) { + pendingCursorRecordingData = null; + await Promise.all([ + fs.rm(screenVideoPath, { force: true }), + preferredWebcamPath ? fs.rm(preferredWebcamPath, { force: true }) : Promise.resolve(), + fs.rm(`${screenVideoPath}.cursor.json`, { force: true }), + ]); + return { success: true, discarded: true }; + } + + shiftPendingCursorTelemetry(nativeWindowsCursorOffsetMs); + await writePendingCursorTelemetry(screenVideoPath); + let webcamVideoPath: string | undefined; + if (preferredWebcamPath) { + try { + await fs.access(preferredWebcamPath, fsConstants.R_OK); + webcamVideoPath = preferredWebcamPath; + } catch { + webcamVideoPath = undefined; + } + } + const session: RecordingSession = webcamVideoPath + ? { screenVideoPath, webcamVideoPath, createdAt: recordingId } + : { screenVideoPath, createdAt: recordingId }; + setCurrentRecordingSessionState(session); + currentProjectPath = null; + + const sessionManifestPath = path.join( + RECORDINGS_DIR, + `${path.parse(screenVideoPath).name}${RECORDING_SESSION_SUFFIX}`, + ); + await fs.writeFile(sessionManifestPath, JSON.stringify(session, null, 2), "utf-8"); + + return { + success: true, + path: screenVideoPath, + session, + message: "Native Windows recording session stored successfully", + }; + } catch (error) { + console.error("Failed to stop native Windows recording:", error); + await stopCursorRecording(); + return { success: false, error: String(error) }; + } finally { + nativeWindowsCaptureProcess = null; + nativeWindowsCaptureTargetPath = null; + nativeWindowsCaptureWebcamTargetPath = null; + nativeWindowsCaptureRecordingId = null; + nativeWindowsCursorOffsetMs = 0; + const source = selectedSource || { name: "Screen" }; + if (onRecordingStateChange) { + onRecordingStateChange(false, source.name); + } + } + }); + ipcMain.handle("store-recorded-session", async (_, payload: StoreRecordedSessionInput) => { try { return await storeRecordedSessionFiles(payload); @@ -586,15 +1081,7 @@ export function registerIpcHandlers( setCurrentRecordingSessionState(session); currentProjectPath = null; - const telemetryPath = `${screenVideoPath}.cursor.json`; - if (pendingCursorRecordingData && pendingCursorRecordingData.samples.length > 0) { - await fs.writeFile( - telemetryPath, - JSON.stringify(pendingCursorRecordingData, null, 2), - "utf-8", - ); - } - pendingCursorRecordingData = null; + await writePendingCursorTelemetry(screenVideoPath); const sessionManifestPath = path.join( RECORDINGS_DIR, @@ -653,39 +1140,9 @@ export function registerIpcHandlers( ipcMain.handle("set-recording-state", async (_, recording: boolean, recordingId?: number) => { if (recording) { - if (cursorRecordingSession) { - pendingCursorRecordingData = await cursorRecordingSession.stop(); - cursorRecordingSession = null; - } - - pendingCursorRecordingData = null; - cursorRecordingSession = createCursorRecordingSession({ - getDisplayBounds: getSelectedSourceBounds, - maxSamples: MAX_CURSOR_SAMPLES, - platform: process.platform, - sampleIntervalMs: CURSOR_SAMPLE_INTERVAL_MS, - sourceId: getSelectedSourceId(), - startTimeMs: - typeof recordingId === "number" && Number.isFinite(recordingId) ? recordingId : undefined, - }); - - try { - await cursorRecordingSession.start(); - } catch (error) { - console.error("Failed to start cursor recording session:", error); - cursorRecordingSession = null; - } + await startCursorRecording(recordingId); } else { - if (cursorRecordingSession) { - try { - pendingCursorRecordingData = await cursorRecordingSession.stop(); - } catch (error) { - console.error("Failed to stop cursor recording session:", error); - pendingCursorRecordingData = null; - } finally { - cursorRecordingSession = null; - } - } + await stopCursorRecording(); } const source = selectedSource || { name: "Screen" }; diff --git a/electron/native/README.md b/electron/native/README.md new file mode 100644 index 0000000..512517b --- /dev/null +++ b/electron/native/README.md @@ -0,0 +1,50 @@ +# Native capture helpers + +Windows native recording is resolved from one of these locations: + +1. `OPENSCREEN_WGC_CAPTURE_EXE`, for local development and diagnostics. +2. `electron/native/wgc-capture/build/wgc-capture.exe`, for a locally built Ninja helper. +3. `electron/native/wgc-capture/build/Release/wgc-capture.exe`, for a locally built multi-config helper. +4. `electron/native/bin/win32-x64/wgc-capture.exe` or `electron/native/bin/win32-arm64/wgc-capture.exe`, for packaged prebuilt helpers. + +Build the Windows helper with: + +```powershell +npm run build:native:win +``` + +The build writes the CMake output to `electron/native/wgc-capture/build/wgc-capture.exe` and copies the redistributable binary to `electron/native/bin/win32-x64/wgc-capture.exe`. + +The helper contract is process-based: the app starts the process with one JSON argument and sends commands on stdin. `stop\n` finalizes the recording. During migration the helper prints both newline-delimited JSON events and the legacy text messages `Recording started` / `Recording stopped. Output path: `. + +Current V2 JSON shape: + +```json +{ + "schemaVersion": 2, + "recordingId": 123, + "sourceType": "display", + "sourceId": "screen:0:0", + "displayId": 1, + "outputPath": "C:\\path\\recording-123.mp4", + "videoWidth": 1920, + "videoHeight": 1080, + "fps": 60, + "captureSystemAudio": false, + "captureMic": false, + "webcamEnabled": false, + "outputs": { + "screenPath": "C:\\path\\recording-123.mp4", + "webcamPath": "C:\\path\\recording-123-webcam.mp4" + } +} +``` + +The current helper implementation supports display video capture and system audio loopback. Microphone, webcam, and window capture now fail explicitly in the helper rather than silently falling back to Electron capture on Windows. See `docs/engineering/windows-native-recorder-roadmap.md` for the phased implementation plan. + +Smoke-test the helper with: + +```powershell +npm run test:wgc-helper:win +npm run test:wgc-audio:win +``` diff --git a/electron/native/wgc-capture/CMakeLists.txt b/electron/native/wgc-capture/CMakeLists.txt new file mode 100644 index 0000000..76999f7 --- /dev/null +++ b/electron/native/wgc-capture/CMakeLists.txt @@ -0,0 +1,45 @@ +cmake_minimum_required(VERSION 3.20) + +# The local Windows SDK image used by some contributors can miss gdi32.lib, +# while CMake's default MSVC console template links it unconditionally. This +# helper does not use GDI, so keep the standard library set minimal and explicit. +set(CMAKE_CXX_STANDARD_LIBRARIES + "kernel32.lib user32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib" + CACHE STRING "" FORCE) + +project(openscreen-wgc-capture LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +add_executable(wgc-capture + src/main.cpp + src/mf_encoder.cpp + src/mf_encoder.h + src/monitor_utils.cpp + src/monitor_utils.h + src/wasapi_loopback_capture.cpp + src/wasapi_loopback_capture.h + src/wgc_session.cpp + src/wgc_session.h +) + +target_compile_definitions(wgc-capture PRIVATE + NOMINMAX + WIN32_LEAN_AND_MEAN + _WIN32_WINNT=0x0A00 +) + +target_compile_options(wgc-capture PRIVATE /EHsc /W4 /utf-8) + +target_link_libraries(wgc-capture PRIVATE + d3d11 + dxgi + mf + mfplat + mfreadwrite + mfuuid + runtimeobject + windowsapp +) diff --git a/electron/native/wgc-capture/src/main.cpp b/electron/native/wgc-capture/src/main.cpp new file mode 100644 index 0000000..39d5c62 --- /dev/null +++ b/electron/native/wgc-capture/src/main.cpp @@ -0,0 +1,433 @@ +#include "mf_encoder.h" +#include "monitor_utils.h" +#include "wasapi_loopback_capture.h" +#include "wgc_session.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +struct CaptureConfig { + int schemaVersion = 1; + int64_t displayId = 0; + int64_t recordingId = 0; + std::string sourceType = "display"; + std::string sourceId; + std::string windowHandle; + std::string outputPath; + int fps = 60; + int width = 0; + int height = 0; + MonitorBounds bounds{}; + bool hasDisplayBounds = false; + bool captureSystemAudio = false; + bool captureMic = false; + bool webcamEnabled = false; + std::string microphoneDeviceId; + double microphoneGain = 1.0; + std::string webcamDeviceId; + int webcamWidth = 0; + int webcamHeight = 0; + int webcamFps = 0; +}; + +std::wstring utf8ToWide(const std::string& value) { + if (value.empty()) { + return {}; + } + + const int size = MultiByteToWideChar(CP_UTF8, 0, value.data(), static_cast(value.size()), nullptr, 0); + std::wstring result(static_cast(size), L'\0'); + MultiByteToWideChar(CP_UTF8, 0, value.data(), static_cast(value.size()), result.data(), size); + return result; +} + +std::string jsonEscape(const std::string& value) { + std::string result; + result.reserve(value.size()); + for (const char c : value) { + switch (c) { + case '\\': + result += "\\\\"; + break; + case '"': + result += "\\\""; + break; + case '\n': + result += "\\n"; + break; + case '\r': + result += "\\r"; + break; + case '\t': + result += "\\t"; + break; + default: + result.push_back(c); + break; + } + } + return result; +} + +bool findBool(const std::string& json, const std::string& key, bool fallback) { + auto pos = json.find("\"" + key + "\""); + if (pos == std::string::npos) { + return fallback; + } + pos = json.find(':', pos); + if (pos == std::string::npos) { + return fallback; + } + pos += 1; + while (pos < json.size() && std::isspace(static_cast(json[pos]))) { + pos += 1; + } + if (json.compare(pos, 4, "true") == 0) { + return true; + } + if (json.compare(pos, 5, "false") == 0) { + return false; + } + return fallback; +} + +int64_t findInt64(const std::string& json, const std::string& key, int64_t fallback) { + auto pos = json.find("\"" + key + "\""); + if (pos == std::string::npos) { + return fallback; + } + pos = json.find(':', pos); + if (pos == std::string::npos) { + return fallback; + } + pos += 1; + while (pos < json.size() && std::isspace(static_cast(json[pos]))) { + pos += 1; + } + try { + return std::stoll(json.substr(pos)); + } catch (...) { + return fallback; + } +} + +int findInt(const std::string& json, const std::string& key, int fallback) { + return static_cast(findInt64(json, key, fallback)); +} + +double findDouble(const std::string& json, const std::string& key, double fallback) { + auto pos = json.find("\"" + key + "\""); + if (pos == std::string::npos) { + return fallback; + } + pos = json.find(':', pos); + if (pos == std::string::npos) { + return fallback; + } + pos += 1; + while (pos < json.size() && std::isspace(static_cast(json[pos]))) { + pos += 1; + } + try { + return std::stod(json.substr(pos)); + } catch (...) { + return fallback; + } +} + +std::string findString(const std::string& json, const std::string& key) { + auto pos = json.find("\"" + key + "\""); + if (pos == std::string::npos) { + return {}; + } + pos = json.find(':', pos); + if (pos == std::string::npos) { + return {}; + } + pos += 1; + while (pos < json.size() && std::isspace(static_cast(json[pos]))) { + pos += 1; + } + if (pos >= json.size() || json[pos] != '"') { + return {}; + } + pos += 1; + + std::string result; + while (pos < json.size()) { + const char c = json[pos++]; + if (c == '"') { + break; + } + if (c == '\\' && pos < json.size()) { + const char escaped = json[pos++]; + switch (escaped) { + case '\\': + case '"': + case '/': + result.push_back(escaped); + break; + case 'n': + result.push_back('\n'); + break; + case 'r': + result.push_back('\r'); + break; + case 't': + result.push_back('\t'); + break; + default: + result.push_back(escaped); + break; + } + continue; + } + result.push_back(c); + } + return result; +} + +bool parseConfig(const std::string& json, CaptureConfig& config) { + config.schemaVersion = findInt(json, "schemaVersion", 1); + config.outputPath = findString(json, "screenPath"); + if (config.outputPath.empty()) { + config.outputPath = findString(json, "outputPath"); + } + if (config.outputPath.empty()) { + return false; + } + + config.recordingId = findInt64(json, "recordingId", 0); + config.sourceType = findString(json, "sourceType"); + if (config.sourceType.empty()) { + config.sourceType = "display"; + } + config.sourceId = findString(json, "sourceId"); + config.windowHandle = findString(json, "windowHandle"); + config.displayId = findInt64(json, "displayId", 0); + config.fps = std::clamp(findInt(json, "fps", 60), 1, 120); + config.width = findInt(json, "videoWidth", findInt(json, "width", 0)); + config.height = findInt(json, "videoHeight", findInt(json, "height", 0)); + config.bounds.x = findInt(json, "displayX", 0); + config.bounds.y = findInt(json, "displayY", 0); + config.bounds.width = findInt(json, "displayW", 0); + config.bounds.height = findInt(json, "displayH", 0); + config.hasDisplayBounds = findBool(json, "hasDisplayBounds", false); + config.captureSystemAudio = findBool(json, "captureSystemAudio", false); + config.captureMic = findBool(json, "captureMic", false); + config.webcamEnabled = findBool(json, "webcamEnabled", false); + config.microphoneDeviceId = findString(json, "microphoneDeviceId"); + config.microphoneGain = findDouble(json, "microphoneGain", 1.0); + config.webcamDeviceId = findString(json, "webcamDeviceId"); + config.webcamWidth = findInt(json, "webcamWidth", 0); + config.webcamHeight = findInt(json, "webcamHeight", 0); + config.webcamFps = findInt(json, "webcamFps", 0); + return true; +} + +void readStopCommands(std::atomic& stopRequested, std::condition_variable& cv) { + std::string line; + while (std::getline(std::cin, line)) { + if (line == "stop" || line == "q" || line == "quit") { + stopRequested = true; + cv.notify_all(); + return; + } + } + stopRequested = true; + cv.notify_all(); +} + +} // namespace + +int main(int argc, char* argv[]) { + if (argc < 2) { + std::cerr << "ERROR: Missing JSON config argument" << std::endl; + return 1; + } + + winrt::init_apartment(winrt::apartment_type::multi_threaded); + + CaptureConfig config; + if (!parseConfig(argv[1], config)) { + std::cerr << "ERROR: Failed to parse config JSON" << std::endl; + return 1; + } + + std::cout << "{\"event\":\"ready\",\"schemaVersion\":2}" << std::endl; + + if (config.sourceType != "display") { + std::cerr << "ERROR: Native window capture is not implemented yet" << std::endl; + return 1; + } + + if (config.captureMic) { + std::cerr << "ERROR: Microphone capture is not implemented in this helper yet" << std::endl; + return 1; + } + + if (config.webcamEnabled) { + std::cerr << "ERROR: Native webcam capture is not implemented in this helper yet" << std::endl; + return 1; + } + + HMONITOR monitor = findMonitorForCapture( + config.displayId, + config.hasDisplayBounds ? &config.bounds : nullptr); + if (!monitor) { + std::cerr << "ERROR: Could not resolve monitor" << std::endl; + return 1; + } + + WgcSession session; + if (!session.initialize(monitor, config.fps)) { + std::cerr << "ERROR: Failed to initialize WGC session" << std::endl; + return 1; + } + + // WGC owns the captured texture size. Encoding must use that exact size + // until a dedicated GPU scaling pass is introduced; CopyResource requires + // matching resource dimensions. + int width = session.captureWidth(); + int height = session.captureHeight(); + width = (std::max(2, width) / 2) * 2; + height = (std::max(2, height) / 2) * 2; + + const int pixels = width * height; + const int bitrate = pixels >= 3840 * 2160 ? 45'000'000 : pixels >= 2560 * 1440 ? 28'000'000 : 18'000'000; + + WasapiLoopbackCapture loopbackCapture; + const AudioInputFormat* audioFormat = nullptr; + if (config.captureSystemAudio) { + if (!loopbackCapture.initialize()) { + std::cerr << "ERROR: Failed to initialize WASAPI loopback capture" << std::endl; + return 1; + } + audioFormat = &loopbackCapture.inputFormat(); + std::cout << "{\"event\":\"audio-format\",\"schemaVersion\":2,\"sampleRate\":" + << audioFormat->sampleRate << ",\"channels\":" << audioFormat->channels + << ",\"bitsPerSample\":" << audioFormat->bitsPerSample << "}" << std::endl; + } + + MFEncoder encoder; + if (!encoder.initialize( + utf8ToWide(config.outputPath), + width, + height, + config.fps, + bitrate, + session.device(), + session.context(), + audioFormat)) { + std::cerr << "ERROR: Failed to initialize Media Foundation encoder" << std::endl; + return 1; + } + + std::mutex mutex; + std::condition_variable cv; + std::atomic stopRequested = false; + std::atomic firstFrameWritten = false; + std::atomic encodeFailed = false; + + session.setFrameCallback([&](ID3D11Texture2D* texture, int64_t timestampHns) { + if (stopRequested) { + return; + } + + std::scoped_lock lock(mutex); + if (!encoder.writeFrame(texture, timestampHns)) { + encodeFailed = true; + stopRequested = true; + cv.notify_all(); + return; + } + if (!firstFrameWritten.exchange(true)) { + cv.notify_all(); + } + }); + + if (config.captureSystemAudio) { + if (!loopbackCapture.start([&](const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns) { + if (stopRequested) { + return; + } + + if (!encoder.writeAudio(data, byteCount, timestampHns, durationHns)) { + encodeFailed = true; + stopRequested = true; + cv.notify_all(); + } + })) { + std::cerr << "ERROR: Failed to start WASAPI loopback capture" << std::endl; + return 1; + } + } + + if (!session.start()) { + loopbackCapture.stop(); + std::cerr << "ERROR: Failed to start WGC session" << std::endl; + return 1; + } + + std::thread stdinThread(readStopCommands, std::ref(stopRequested), std::ref(cv)); + + { + std::unique_lock lock(mutex); + const bool started = cv.wait_for(lock, std::chrono::seconds(10), [&] { + return firstFrameWritten.load() || stopRequested.load(); + }); + if (!started || !firstFrameWritten) { + stopRequested = true; + cv.notify_all(); + if (stdinThread.joinable()) { + stdinThread.detach(); + } + loopbackCapture.stop(); + std::cerr << "ERROR: Timed out waiting for first WGC frame" << std::endl; + return 1; + } + } + + std::cout << "{\"event\":\"recording-started\",\"schemaVersion\":2}" << std::endl; + std::cout << "Recording started" << std::endl; + + { + std::unique_lock lock(mutex); + cv.wait(lock, [&] { + return stopRequested.load(); + }); + } + + loopbackCapture.stop(); + session.stop(); + { + std::scoped_lock lock(mutex); + encoder.finalize(); + } + + if (stdinThread.joinable()) { + stdinThread.join(); + } + + if (encodeFailed) { + std::cerr << "ERROR: Failed to encode WGC frame" << std::endl; + return 1; + } + + std::cout << "{\"event\":\"recording-stopped\",\"schemaVersion\":2,\"screenPath\":\"" + << jsonEscape(config.outputPath) << "\"}" << std::endl; + std::cout << "Recording stopped. Output path: " << config.outputPath << std::endl; + return 0; +} diff --git a/electron/native/wgc-capture/src/mf_encoder.cpp b/electron/native/wgc-capture/src/mf_encoder.cpp new file mode 100644 index 0000000..fc95fc2 --- /dev/null +++ b/electron/native/wgc-capture/src/mf_encoder.cpp @@ -0,0 +1,317 @@ +#include "mf_encoder.h" + +#include +#include +#include + +#include +#include +#include + +namespace { + +bool succeeded(HRESULT hr, const char* label) { + if (SUCCEEDED(hr)) { + return true; + } + + std::cerr << "ERROR: " << label << " failed (hr=0x" << std::hex << hr << std::dec << ")" + << std::endl; + return false; +} + +void setFrameSize(IMFMediaType* type, UINT32 width, UINT32 height) { + MFSetAttributeSize(type, MF_MT_FRAME_SIZE, width, height); +} + +void setFrameRate(IMFMediaType* type, UINT32 fps) { + MFSetAttributeRatio(type, MF_MT_FRAME_RATE, fps, 1); +} + +void setPixelAspectRatio(IMFMediaType* type) { + MFSetAttributeRatio(type, MF_MT_PIXEL_ASPECT_RATIO, 1, 1); +} + +void setAudioFormat(IMFMediaType* type, UINT32 channels, UINT32 sampleRate, UINT32 bitsPerSample) { + type->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, channels); + type->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, sampleRate); + type->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, bitsPerSample); +} + +} // namespace + +MFEncoder::~MFEncoder() { + finalize(); +} + +bool MFEncoder::initialize( + const std::wstring& outputPath, + int width, + int height, + int fps, + int bitrate, + ID3D11Device* device, + ID3D11DeviceContext* context, + const AudioInputFormat* audioFormat) { + width_ = (std::max(2, width) / 2) * 2; + height_ = (std::max(2, height) / 2) * 2; + fps_ = std::max(1, fps); + device_ = device; + context_ = context; + + if (!succeeded(MFStartup(MF_VERSION), "MFStartup")) { + return false; + } + + Microsoft::WRL::ComPtr outputType; + if (!succeeded(MFCreateMediaType(&outputType), "MFCreateMediaType(output)")) { + return false; + } + outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video); + outputType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264); + outputType->SetUINT32(MF_MT_AVG_BITRATE, static_cast(std::max(1, bitrate))); + outputType->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive); + setFrameSize(outputType.Get(), static_cast(width_), static_cast(height_)); + setFrameRate(outputType.Get(), static_cast(fps_)); + setPixelAspectRatio(outputType.Get()); + + if (!succeeded(MFCreateSinkWriterFromURL(outputPath.c_str(), nullptr, nullptr, &sinkWriter_), + "MFCreateSinkWriterFromURL")) { + return false; + } + if (!succeeded(sinkWriter_->AddStream(outputType.Get(), &videoStreamIndex_), "AddStream")) { + return false; + } + + if (audioFormat && !configureAudioStream(*audioFormat)) { + return false; + } + + Microsoft::WRL::ComPtr inputType; + if (!succeeded(MFCreateMediaType(&inputType), "MFCreateMediaType(input)")) { + return false; + } + inputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video); + inputType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_RGB32); + inputType->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive); + inputType->SetUINT32(MF_MT_DEFAULT_STRIDE, static_cast(width_ * 4)); + setFrameSize(inputType.Get(), static_cast(width_), static_cast(height_)); + setFrameRate(inputType.Get(), static_cast(fps_)); + setPixelAspectRatio(inputType.Get()); + + if (!succeeded(sinkWriter_->SetInputMediaType(videoStreamIndex_, inputType.Get(), nullptr), + "SetInputMediaType")) { + return false; + } + if (!succeeded(sinkWriter_->BeginWriting(), "BeginWriting")) { + return false; + } + + return true; +} + +bool MFEncoder::configureAudioStream(const AudioInputFormat& audioFormat) { + if (!sinkWriter_) { + return false; + } + if (audioFormat.sampleRate == 0 || audioFormat.channels == 0 || audioFormat.blockAlign == 0) { + std::cerr << "ERROR: Invalid audio input format" << std::endl; + return false; + } + + const UINT32 bitsPerSample = std::max(8, audioFormat.bitsPerSample); + const UINT32 aacBytesPerSecond = 24'000; + + Microsoft::WRL::ComPtr outputType; + if (!succeeded(MFCreateMediaType(&outputType), "MFCreateMediaType(audio output)")) { + return false; + } + outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio); + outputType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_AAC); + setAudioFormat(outputType.Get(), audioFormat.channels, audioFormat.sampleRate, 16); + outputType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, aacBytesPerSecond); + outputType->SetUINT32(MF_MT_AAC_PAYLOAD_TYPE, 0); + + if (!succeeded(sinkWriter_->AddStream(outputType.Get(), &audioStreamIndex_), "AddStream(audio)")) { + return false; + } + + Microsoft::WRL::ComPtr inputType; + if (!succeeded(MFCreateMediaType(&inputType), "MFCreateMediaType(audio input)")) { + return false; + } + inputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio); + inputType->SetGUID(MF_MT_SUBTYPE, audioFormat.subtype); + setAudioFormat(inputType.Get(), audioFormat.channels, audioFormat.sampleRate, bitsPerSample); + inputType->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, audioFormat.blockAlign); + inputType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, audioFormat.avgBytesPerSec); + inputType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE); + + if (!succeeded(sinkWriter_->SetInputMediaType(audioStreamIndex_, inputType.Get(), nullptr), + "SetInputMediaType(audio)")) { + return false; + } + + hasAudioStream_ = true; + return true; +} + +bool MFEncoder::ensureStagingTexture(ID3D11Texture2D* texture) { + if (stagingTexture_) { + return true; + } + + D3D11_TEXTURE2D_DESC desc{}; + texture->GetDesc(&desc); + desc.Width = static_cast(width_); + desc.Height = static_cast(height_); + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.Usage = D3D11_USAGE_STAGING; + desc.BindFlags = 0; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + desc.MiscFlags = 0; + + return succeeded(device_->CreateTexture2D(&desc, nullptr, &stagingTexture_), + "CreateTexture2D(staging)"); +} + +bool MFEncoder::copyFrameToBuffer(ID3D11Texture2D* texture, BYTE* destination, DWORD destinationSize) { + if (!ensureStagingTexture(texture)) { + return false; + } + + context_->CopyResource(stagingTexture_.Get(), texture); + + D3D11_MAPPED_SUBRESOURCE mapped{}; + if (!succeeded(context_->Map(stagingTexture_.Get(), 0, D3D11_MAP_READ, 0, &mapped), "Map")) { + return false; + } + + const DWORD rowBytes = static_cast(width_ * 4); + const DWORD requiredBytes = rowBytes * static_cast(height_); + if (destinationSize < requiredBytes) { + context_->Unmap(stagingTexture_.Get(), 0); + std::cerr << "ERROR: Media Foundation buffer is too small" << std::endl; + return false; + } + + auto* source = static_cast(mapped.pData); + for (int y = 0; y < height_; y += 1) { + std::memcpy(destination + rowBytes * y, source + mapped.RowPitch * y, rowBytes); + } + + context_->Unmap(stagingTexture_.Get(), 0); + return true; +} + +bool MFEncoder::writeFrame(ID3D11Texture2D* texture, int64_t timestampHns) { + std::scoped_lock writerLock(writerMutex_); + if (!sinkWriter_ || finalized_) { + return false; + } + + if (firstTimestampHns_ < 0) { + firstTimestampHns_ = timestampHns; + } + + int64_t sampleTime = timestampHns - firstTimestampHns_; + if (sampleTime <= lastTimestampHns_) { + sampleTime = lastTimestampHns_ + (10'000'000LL / fps_); + } + const int64_t sampleDuration = 10'000'000LL / fps_; + lastTimestampHns_ = sampleTime; + + Microsoft::WRL::ComPtr buffer; + const DWORD frameBytes = static_cast(width_ * height_ * 4); + if (!succeeded(MFCreateMemoryBuffer(frameBytes, &buffer), "MFCreateMemoryBuffer")) { + return false; + } + + BYTE* data = nullptr; + DWORD maxLength = 0; + DWORD currentLength = 0; + if (!succeeded(buffer->Lock(&data, &maxLength, ¤tLength), "IMFMediaBuffer::Lock")) { + return false; + } + + const bool copied = copyFrameToBuffer(texture, data, maxLength); + buffer->Unlock(); + if (!copied) { + return false; + } + buffer->SetCurrentLength(frameBytes); + + Microsoft::WRL::ComPtr sample; + if (!succeeded(MFCreateSample(&sample), "MFCreateSample")) { + return false; + } + sample->AddBuffer(buffer.Get()); + sample->SetSampleTime(sampleTime); + sample->SetSampleDuration(sampleDuration); + + return succeeded(sinkWriter_->WriteSample(videoStreamIndex_, sample.Get()), "WriteSample"); +} + +bool MFEncoder::writeAudio(const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns) { + std::scoped_lock writerLock(writerMutex_); + if (!sinkWriter_ || finalized_ || !hasAudioStream_) { + return false; + } + if (!data || byteCount == 0 || durationHns <= 0) { + return true; + } + + Microsoft::WRL::ComPtr buffer; + if (!succeeded(MFCreateMemoryBuffer(byteCount, &buffer), "MFCreateMemoryBuffer(audio)")) { + return false; + } + + BYTE* destination = nullptr; + DWORD maxLength = 0; + DWORD currentLength = 0; + if (!succeeded(buffer->Lock(&destination, &maxLength, ¤tLength), + "IMFMediaBuffer::Lock(audio)")) { + return false; + } + if (maxLength < byteCount) { + buffer->Unlock(); + std::cerr << "ERROR: Media Foundation audio buffer is too small" << std::endl; + return false; + } + std::memcpy(destination, data, byteCount); + buffer->Unlock(); + buffer->SetCurrentLength(byteCount); + + Microsoft::WRL::ComPtr sample; + if (!succeeded(MFCreateSample(&sample), "MFCreateSample(audio)")) { + return false; + } + sample->AddBuffer(buffer.Get()); + sample->SetSampleTime(std::max(0, timestampHns)); + sample->SetSampleDuration(durationHns); + + return succeeded(sinkWriter_->WriteSample(audioStreamIndex_, sample.Get()), "WriteSample(audio)"); +} + +bool MFEncoder::finalize() { + std::scoped_lock writerLock(writerMutex_); + if (finalized_) { + return true; + } + + finalized_ = true; + bool ok = true; + if (sinkWriter_) { + ok = succeeded(sinkWriter_->Finalize(), "SinkWriter::Finalize"); + sinkWriter_.Reset(); + } + stagingTexture_.Reset(); + context_.Reset(); + device_.Reset(); + MFShutdown(); + return ok; +} diff --git a/electron/native/wgc-capture/src/mf_encoder.h b/electron/native/wgc-capture/src/mf_encoder.h new file mode 100644 index 0000000..b6db685 --- /dev/null +++ b/electron/native/wgc-capture/src/mf_encoder.h @@ -0,0 +1,63 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +struct AudioInputFormat { + GUID subtype = MFAudioFormat_PCM; + UINT32 sampleRate = 0; + UINT32 channels = 0; + UINT32 bitsPerSample = 0; + UINT32 blockAlign = 0; + UINT32 avgBytesPerSec = 0; +}; + +class MFEncoder { +public: + MFEncoder() = default; + ~MFEncoder(); + + MFEncoder(const MFEncoder&) = delete; + MFEncoder& operator=(const MFEncoder&) = delete; + + bool initialize( + const std::wstring& outputPath, + int width, + int height, + int fps, + int bitrate, + ID3D11Device* device, + ID3D11DeviceContext* context, + const AudioInputFormat* audioFormat = nullptr); + bool writeFrame(ID3D11Texture2D* texture, int64_t timestampHns); + bool writeAudio(const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns); + bool finalize(); + +private: + bool ensureStagingTexture(ID3D11Texture2D* texture); + bool copyFrameToBuffer(ID3D11Texture2D* texture, BYTE* destination, DWORD destinationSize); + bool configureAudioStream(const AudioInputFormat& audioFormat); + + Microsoft::WRL::ComPtr sinkWriter_; + Microsoft::WRL::ComPtr device_; + Microsoft::WRL::ComPtr context_; + Microsoft::WRL::ComPtr stagingTexture_; + std::mutex writerMutex_; + DWORD videoStreamIndex_ = 0; + DWORD audioStreamIndex_ = 0; + bool hasAudioStream_ = false; + int width_ = 0; + int height_ = 0; + int fps_ = 60; + int64_t firstTimestampHns_ = -1; + int64_t lastTimestampHns_ = -1; + bool finalized_ = false; +}; diff --git a/electron/native/wgc-capture/src/monitor_utils.cpp b/electron/native/wgc-capture/src/monitor_utils.cpp new file mode 100644 index 0000000..f83e77d --- /dev/null +++ b/electron/native/wgc-capture/src/monitor_utils.cpp @@ -0,0 +1,88 @@ +#include "monitor_utils.h" + +#include +#include +#include + +namespace { + +struct MonitorCandidate { + HMONITOR monitor = nullptr; + RECT rect{}; +}; + +std::vector enumerateMonitors() { + std::vector monitors; + EnumDisplayMonitors( + nullptr, + nullptr, + [](HMONITOR monitor, HDC, LPRECT rect, LPARAM userData) -> BOOL { + auto* result = reinterpret_cast*>(userData); + result->push_back({monitor, *rect}); + return TRUE; + }, + reinterpret_cast(&monitors)); + return monitors; +} + +bool rectMatchesBounds(const RECT& rect, const MonitorBounds& bounds) { + return rect.left == bounds.x && + rect.top == bounds.y && + (rect.right - rect.left) == bounds.width && + (rect.bottom - rect.top) == bounds.height; +} + +int64_t overlapArea(const RECT& rect, const MonitorBounds& bounds) { + const LONG left = std::max(rect.left, bounds.x); + const LONG top = std::max(rect.top, bounds.y); + const LONG right = std::min(rect.right, bounds.x + bounds.width); + const LONG bottom = std::min(rect.bottom, bounds.y + bounds.height); + if (right <= left || bottom <= top) { + return 0; + } + return static_cast(right - left) * static_cast(bottom - top); +} + +} // namespace + +HMONITOR findMonitorForCapture(int64_t displayId, const MonitorBounds* bounds) { + const auto monitors = enumerateMonitors(); + if (monitors.empty()) { + return MonitorFromPoint({0, 0}, MONITOR_DEFAULTTOPRIMARY); + } + + // Electron's display_id is not stable across all Windows capture backends. + // Bounds are the most reliable contract because they come from Electron's + // selected display and match the WGC monitor coordinate space. + if (bounds && bounds->width > 0 && bounds->height > 0) { + for (const auto& candidate : monitors) { + if (rectMatchesBounds(candidate.rect, *bounds)) { + return candidate.monitor; + } + } + + HMONITOR bestMonitor = nullptr; + int64_t bestArea = 0; + for (const auto& candidate : monitors) { + const int64_t area = overlapArea(candidate.rect, *bounds); + if (area > bestArea) { + bestArea = area; + bestMonitor = candidate.monitor; + } + } + if (bestMonitor) { + return bestMonitor; + } + } + + // Best-effort fallback for helpers invoked without bounds. Some callers pass + // zero-based ids while Win32 monitor handles are pointer values, so only use + // this when it exactly matches the HMONITOR value. + for (const auto& candidate : monitors) { + if (reinterpret_cast(candidate.monitor) == displayId) { + return candidate.monitor; + } + } + + return MonitorFromPoint({0, 0}, MONITOR_DEFAULTTOPRIMARY); +} diff --git a/electron/native/wgc-capture/src/monitor_utils.h b/electron/native/wgc-capture/src/monitor_utils.h new file mode 100644 index 0000000..11d5d83 --- /dev/null +++ b/electron/native/wgc-capture/src/monitor_utils.h @@ -0,0 +1,14 @@ +#pragma once + +#include + +#include + +struct MonitorBounds { + int x = 0; + int y = 0; + int width = 0; + int height = 0; +}; + +HMONITOR findMonitorForCapture(int64_t displayId, const MonitorBounds* bounds); diff --git a/electron/native/wgc-capture/src/wasapi_loopback_capture.cpp b/electron/native/wgc-capture/src/wasapi_loopback_capture.cpp new file mode 100644 index 0000000..e4f254e --- /dev/null +++ b/electron/native/wgc-capture/src/wasapi_loopback_capture.cpp @@ -0,0 +1,205 @@ +#include "wasapi_loopback_capture.h" + +#include + +#include +#include +#include + +namespace { + +constexpr REFERENCE_TIME BufferDurationHns = 10'000'000; +constexpr int64_t HnsPerSecond = 10'000'000; + +bool succeeded(HRESULT hr, const char* label) { + if (SUCCEEDED(hr)) { + return true; + } + + std::cerr << "ERROR: " << label << " failed (hr=0x" << std::hex << hr << std::dec << ")" + << std::endl; + return false; +} + +GUID audioSubtypeFromFormat(WAVEFORMATEX* format) { + if (format->wFormatTag == WAVE_FORMAT_IEEE_FLOAT) { + return MFAudioFormat_Float; + } + if (format->wFormatTag == WAVE_FORMAT_PCM) { + return MFAudioFormat_PCM; + } + if (format->wFormatTag == WAVE_FORMAT_EXTENSIBLE && + format->cbSize >= sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX)) { + auto* extensible = reinterpret_cast(format); + if (extensible->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) { + return MFAudioFormat_Float; + } + if (extensible->SubFormat == KSDATAFORMAT_SUBTYPE_PCM) { + return MFAudioFormat_PCM; + } + } + return GUID_NULL; +} + +} // namespace + +WasapiLoopbackCapture::~WasapiLoopbackCapture() { + stop(); + if (mixFormat_) { + CoTaskMemFree(mixFormat_); + mixFormat_ = nullptr; + } +} + +bool WasapiLoopbackCapture::initialize() { + HRESULT hr = CoCreateInstance( + __uuidof(MMDeviceEnumerator), + nullptr, + CLSCTX_ALL, + IID_PPV_ARGS(&deviceEnumerator_)); + if (!succeeded(hr, "CoCreateInstance(MMDeviceEnumerator)")) { + return false; + } + + hr = deviceEnumerator_->GetDefaultAudioEndpoint(eRender, eConsole, &device_); + if (!succeeded(hr, "GetDefaultAudioEndpoint(render)")) { + return false; + } + + hr = device_->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr, &audioClient_); + if (!succeeded(hr, "IMMDevice::Activate(IAudioClient)")) { + return false; + } + + hr = audioClient_->GetMixFormat(&mixFormat_); + if (!succeeded(hr, "IAudioClient::GetMixFormat") || !mixFormat_) { + return false; + } + + if (!resolveInputFormat(mixFormat_)) { + std::cerr << "ERROR: Unsupported WASAPI loopback mix format" << std::endl; + return false; + } + + hr = audioClient_->Initialize( + AUDCLNT_SHAREMODE_SHARED, + AUDCLNT_STREAMFLAGS_LOOPBACK, + BufferDurationHns, + 0, + mixFormat_, + nullptr); + if (!succeeded(hr, "IAudioClient::Initialize(loopback)")) { + return false; + } + + hr = audioClient_->GetService(IID_PPV_ARGS(&captureClient_)); + if (!succeeded(hr, "IAudioClient::GetService(IAudioCaptureClient)")) { + return false; + } + + return true; +} + +bool WasapiLoopbackCapture::resolveInputFormat(WAVEFORMATEX* mixFormat) { + const GUID subtype = audioSubtypeFromFormat(mixFormat); + if (subtype == GUID_NULL) { + return false; + } + + inputFormat_.subtype = subtype; + inputFormat_.sampleRate = mixFormat->nSamplesPerSec; + inputFormat_.channels = mixFormat->nChannels; + inputFormat_.bitsPerSample = mixFormat->wBitsPerSample; + inputFormat_.blockAlign = mixFormat->nBlockAlign; + inputFormat_.avgBytesPerSec = mixFormat->nAvgBytesPerSec; + return inputFormat_.sampleRate > 0 && inputFormat_.channels > 0 && inputFormat_.blockAlign > 0; +} + +bool WasapiLoopbackCapture::start(AudioCallback callback) { + if (!audioClient_ || !captureClient_ || !callback) { + return false; + } + + callback_ = std::move(callback); + stopRequested_ = false; + writtenFrames_ = 0; + + HRESULT hr = audioClient_->Start(); + if (!succeeded(hr, "IAudioClient::Start")) { + return false; + } + + thread_ = std::thread([this] { + captureLoop(); + }); + return true; +} + +void WasapiLoopbackCapture::stop() { + stopRequested_ = true; + if (thread_.joinable()) { + thread_.join(); + } + if (audioClient_) { + audioClient_->Stop(); + } +} + +const AudioInputFormat& WasapiLoopbackCapture::inputFormat() const { + return inputFormat_; +} + +void WasapiLoopbackCapture::captureLoop() { + while (!stopRequested_) { + UINT32 packetFrames = 0; + HRESULT hr = captureClient_->GetNextPacketSize(&packetFrames); + if (FAILED(hr)) { + std::cerr << "ERROR: IAudioCaptureClient::GetNextPacketSize failed (hr=0x" << std::hex + << hr << std::dec << ")" << std::endl; + break; + } + + while (packetFrames > 0 && !stopRequested_) { + BYTE* data = nullptr; + UINT32 framesAvailable = 0; + DWORD flags = 0; + + hr = captureClient_->GetBuffer(&data, &framesAvailable, &flags, nullptr, nullptr); + if (FAILED(hr)) { + std::cerr << "ERROR: IAudioCaptureClient::GetBuffer failed (hr=0x" << std::hex + << hr << std::dec << ")" << std::endl; + break; + } + + const DWORD byteCount = framesAvailable * inputFormat_.blockAlign; + const int64_t timestampHns = + static_cast((writtenFrames_ * HnsPerSecond) / inputFormat_.sampleRate); + const int64_t durationHns = + static_cast((static_cast(framesAvailable) * HnsPerSecond) / + inputFormat_.sampleRate); + + if (byteCount > 0) { + if ((flags & AUDCLNT_BUFFERFLAGS_SILENT) != 0 || !data) { + silenceBuffer_.assign(byteCount, 0); + callback_(silenceBuffer_.data(), byteCount, timestampHns, durationHns); + } else { + callback_(data, byteCount, timestampHns, durationHns); + } + } + + writtenFrames_ += framesAvailable; + captureClient_->ReleaseBuffer(framesAvailable); + + hr = captureClient_->GetNextPacketSize(&packetFrames); + if (FAILED(hr)) { + std::cerr << "ERROR: IAudioCaptureClient::GetNextPacketSize failed (hr=0x" + << std::hex << hr << std::dec << ")" << std::endl; + packetFrames = 0; + break; + } + } + + std::this_thread::sleep_for(std::chrono::milliseconds(5)); + } + +} diff --git a/electron/native/wgc-capture/src/wasapi_loopback_capture.h b/electron/native/wgc-capture/src/wasapi_loopback_capture.h new file mode 100644 index 0000000..e6fb7e8 --- /dev/null +++ b/electron/native/wgc-capture/src/wasapi_loopback_capture.h @@ -0,0 +1,47 @@ +#pragma once + +#include "mf_encoder.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +class WasapiLoopbackCapture { +public: + using AudioCallback = std::function; + + WasapiLoopbackCapture() = default; + ~WasapiLoopbackCapture(); + + WasapiLoopbackCapture(const WasapiLoopbackCapture&) = delete; + WasapiLoopbackCapture& operator=(const WasapiLoopbackCapture&) = delete; + + bool initialize(); + bool start(AudioCallback callback); + void stop(); + + const AudioInputFormat& inputFormat() const; + +private: + void captureLoop(); + bool resolveInputFormat(WAVEFORMATEX* mixFormat); + + Microsoft::WRL::ComPtr deviceEnumerator_; + Microsoft::WRL::ComPtr device_; + Microsoft::WRL::ComPtr audioClient_; + Microsoft::WRL::ComPtr captureClient_; + WAVEFORMATEX* mixFormat_ = nullptr; + AudioInputFormat inputFormat_{}; + AudioCallback callback_; + std::thread thread_; + std::atomic stopRequested_ = false; + std::vector silenceBuffer_; + uint64_t writtenFrames_ = 0; +}; diff --git a/electron/native/wgc-capture/src/wgc_session.cpp b/electron/native/wgc-capture/src/wgc_session.cpp new file mode 100644 index 0000000..c25444e --- /dev/null +++ b/electron/native/wgc-capture/src/wgc_session.cpp @@ -0,0 +1,223 @@ +#include "wgc_session.h" + +#include +#include +#include +#include + +#include + +namespace wf = winrt::Windows::Foundation; +namespace wgcap = winrt::Windows::Graphics::Capture; +namespace wgdx = winrt::Windows::Graphics::DirectX; +namespace wgd3d = winrt::Windows::Graphics::DirectX::Direct3D11; + +extern "C" HRESULT __stdcall CreateDirect3D11DeviceFromDXGIDevice( + ::IDXGIDevice* dxgiDevice, + ::IInspectable** graphicsDevice); + +namespace { + +bool succeeded(HRESULT hr, const char* label) { + if (SUCCEEDED(hr)) { + return true; + } + + std::cerr << "ERROR: " << label << " failed (hr=0x" << std::hex << hr << std::dec << ")" + << std::endl; + return false; +} + +int64_t timeSpanToHns(wf::TimeSpan const& value) { + return value.count(); +} + +} // namespace + +WgcSession::~WgcSession() { + stop(); +} + +bool WgcSession::createD3DDevice() { + UINT flags = D3D11_CREATE_DEVICE_BGRA_SUPPORT; +#if defined(_DEBUG) + flags |= D3D11_CREATE_DEVICE_DEBUG; +#endif + + D3D_FEATURE_LEVEL featureLevels[] = { + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_10_1, + D3D_FEATURE_LEVEL_10_0, + }; + D3D_FEATURE_LEVEL featureLevel{}; + + HRESULT hr = D3D11CreateDevice( + nullptr, + D3D_DRIVER_TYPE_HARDWARE, + nullptr, + flags, + featureLevels, + ARRAYSIZE(featureLevels), + D3D11_SDK_VERSION, + &d3dDevice_, + &featureLevel, + &d3dContext_); + +#if defined(_DEBUG) + if (FAILED(hr)) { + flags &= ~D3D11_CREATE_DEVICE_DEBUG; + hr = D3D11CreateDevice( + nullptr, + D3D_DRIVER_TYPE_HARDWARE, + nullptr, + flags, + featureLevels, + ARRAYSIZE(featureLevels), + D3D11_SDK_VERSION, + &d3dDevice_, + &featureLevel, + &d3dContext_); + } +#endif + + if (!succeeded(hr, "D3D11CreateDevice")) { + return false; + } + + Microsoft::WRL::ComPtr dxgiDevice; + if (!succeeded(d3dDevice_.As(&dxgiDevice), "Query IDXGIDevice")) { + return false; + } + + winrt::com_ptr<::IInspectable> inspectableDevice; + if (!succeeded(CreateDirect3D11DeviceFromDXGIDevice(dxgiDevice.Get(), inspectableDevice.put()), + "CreateDirect3D11DeviceFromDXGIDevice")) { + return false; + } + + winrtDevice_ = inspectableDevice.as(); + return true; +} + +bool WgcSession::createCaptureItem(HMONITOR monitor) { + auto factory = winrt::get_activation_factory(); + auto interop = factory.as(); + + wgcap::GraphicsCaptureItem item{nullptr}; + HRESULT hr = interop->CreateForMonitor( + monitor, + winrt::guid_of(), + reinterpret_cast(winrt::put_abi(item))); + if (!succeeded(hr, "CreateForMonitor")) { + return false; + } + + item_ = item; + const auto size = item_.Size(); + width_ = static_cast(size.Width); + height_ = static_cast(size.Height); + return width_ > 0 && height_ > 0; +} + +bool WgcSession::initialize(HMONITOR monitor, int fps) { + fps_ = fps > 0 ? fps : 60; + if (!createD3DDevice()) { + return false; + } + if (!createCaptureItem(monitor)) { + return false; + } + + framePool_ = wgcap::Direct3D11CaptureFramePool::CreateFreeThreaded( + winrtDevice_, + wgdx::DirectXPixelFormat::B8G8R8A8UIntNormalized, + 2, + item_.Size()); + session_ = framePool_.CreateCaptureSession(item_); + + try { + session_.IsCursorCaptureEnabled(false); + } catch (...) { + // Older WGC builds can omit this property; callers still overlay their own cursor. + } + + frameArrivedToken_ = framePool_.FrameArrived({this, &WgcSession::onFrameArrived}); + return true; +} + +void WgcSession::setFrameCallback(FrameCallback callback) { + std::scoped_lock lock(callbackMutex_); + frameCallback_ = std::move(callback); +} + +bool WgcSession::start() { + if (!session_) { + return false; + } + session_.StartCapture(); + started_ = true; + return true; +} + +void WgcSession::stop() { + if (framePool_) { + framePool_.FrameArrived(frameArrivedToken_); + } + if (session_) { + session_.Close(); + session_ = nullptr; + } + if (framePool_) { + framePool_.Close(); + framePool_ = nullptr; + } + item_ = nullptr; + winrtDevice_ = nullptr; + d3dContext_.Reset(); + d3dDevice_.Reset(); + started_ = false; +} + +void WgcSession::onFrameArrived( + wgcap::Direct3D11CaptureFramePool const& sender, + wf::IInspectable const&) { + auto frame = sender.TryGetNextFrame(); + if (!frame) { + return; + } + + auto surface = frame.Surface(); + auto access = surface.as<::Windows::Graphics::DirectX::Direct3D11::IDirect3DDxgiInterfaceAccess>(); + Microsoft::WRL::ComPtr texture; + HRESULT hr = access->GetInterface(__uuidof(ID3D11Texture2D), reinterpret_cast(texture.GetAddressOf())); + if (FAILED(hr) || !texture) { + return; + } + + FrameCallback callback; + { + std::scoped_lock lock(callbackMutex_); + callback = frameCallback_; + } + + if (callback) { + callback(texture.Get(), timeSpanToHns(frame.SystemRelativeTime())); + } +} + +int WgcSession::captureWidth() const { + return width_; +} + +int WgcSession::captureHeight() const { + return height_; +} + +ID3D11Device* WgcSession::device() const { + return d3dDevice_.Get(); +} + +ID3D11DeviceContext* WgcSession::context() const { + return d3dContext_.Get(); +} diff --git a/electron/native/wgc-capture/src/wgc_session.h b/electron/native/wgc-capture/src/wgc_session.h new file mode 100644 index 0000000..8cfb050 --- /dev/null +++ b/electron/native/wgc-capture/src/wgc_session.h @@ -0,0 +1,55 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +class WgcSession { +public: + using FrameCallback = std::function; + + WgcSession() = default; + ~WgcSession(); + + WgcSession(const WgcSession&) = delete; + WgcSession& operator=(const WgcSession&) = delete; + + bool initialize(HMONITOR monitor, int fps); + void setFrameCallback(FrameCallback callback); + bool start(); + void stop(); + + int captureWidth() const; + int captureHeight() const; + ID3D11Device* device() const; + ID3D11DeviceContext* context() const; + +private: + bool createD3DDevice(); + bool createCaptureItem(HMONITOR monitor); + void onFrameArrived( + winrt::Windows::Graphics::Capture::Direct3D11CaptureFramePool const& sender, + winrt::Windows::Foundation::IInspectable const&); + + Microsoft::WRL::ComPtr d3dDevice_; + Microsoft::WRL::ComPtr d3dContext_; + winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice winrtDevice_{nullptr}; + winrt::Windows::Graphics::Capture::GraphicsCaptureItem item_{nullptr}; + winrt::Windows::Graphics::Capture::Direct3D11CaptureFramePool framePool_{nullptr}; + winrt::Windows::Graphics::Capture::GraphicsCaptureSession session_{nullptr}; + winrt::event_token frameArrivedToken_{}; + FrameCallback frameCallback_; + std::mutex callbackMutex_; + int width_ = 0; + int height_ = 0; + int fps_ = 60; + bool started_ = false; +}; diff --git a/electron/preload.ts b/electron/preload.ts index 5345aab..2f9059f 100644 --- a/electron/preload.ts +++ b/electron/preload.ts @@ -1,4 +1,5 @@ import { contextBridge, ipcRenderer } from "electron"; +import type { NativeWindowsRecordingRequest } from "../src/lib/nativeWindowsRecording"; import type { RecordingSession, StoreRecordedSessionInput } from "../src/lib/recordingSession"; import { NATIVE_BRIDGE_CHANNEL, type NativeBridgeRequest } from "../src/native/contracts"; @@ -64,6 +65,15 @@ contextBridge.exposeInMainWorld("electronAPI", { setRecordingState: (recording: boolean, recordingId?: number) => { return ipcRenderer.invoke("set-recording-state", recording, recordingId); }, + isNativeWindowsCaptureAvailable: () => { + return ipcRenderer.invoke("is-native-windows-capture-available"); + }, + startNativeWindowsRecording: (request: NativeWindowsRecordingRequest) => { + return ipcRenderer.invoke("start-native-windows-recording", request); + }, + stopNativeWindowsRecording: (discard?: boolean) => { + return ipcRenderer.invoke("stop-native-windows-recording", discard); + }, getCursorTelemetry: (videoPath?: string) => { return ipcRenderer.invoke("get-cursor-telemetry", videoPath); }, diff --git a/package.json b/package.json index f81d99b..4311f6e 100644 --- a/package.json +++ b/package.json @@ -19,13 +19,16 @@ "lint:fix": "biome check --write .", "format": "biome format --write .", "i18n:check": "node scripts/i18n-check.mjs", - "preview": "vite preview", - "build:mac": "tsc && vite build && electron-builder --mac", - "build:win": "tsc && vite build && electron-builder --win --config.npmRebuild=false", - "build:linux": "tsc && vite build && electron-builder --linux AppImage deb pacman --config.npmRebuild=false", - "test": "vitest --run", + "preview": "vite preview", + "build:mac": "tsc && vite build && electron-builder --mac", + "build:native:win": "node scripts/build-windows-wgc-helper.mjs", + "build:win": "npm run build:native:win && tsc && vite build && electron-builder --win --config.npmRebuild=false", + "build:linux": "tsc && vite build && electron-builder --linux AppImage deb pacman --config.npmRebuild=false", + "test": "vitest --run", "test:watch": "vitest", "test:cursor-native:win": "node scripts/test-windows-native-cursor.mjs", + "test:wgc-helper:win": "node scripts/test-windows-wgc-helper.mjs", + "test:wgc-audio:win": "node scripts/test-windows-wgc-helper.mjs --system-audio", "capture:openscreen-preview": "node scripts/capture-openscreen-preview.mjs", "build-vite": "tsc && vite build", "test:browser": "vitest --config vitest.browser.config.ts --run", diff --git a/scripts/build-windows-wgc-helper.mjs b/scripts/build-windows-wgc-helper.mjs new file mode 100644 index 0000000..85da01e --- /dev/null +++ b/scripts/build-windows-wgc-helper.mjs @@ -0,0 +1,112 @@ +import { spawn } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const ROOT = path.join(__dirname, ".."); +const SOURCE_DIR = path.join(ROOT, "electron", "native", "wgc-capture"); +const BUILD_DIR = path.join(SOURCE_DIR, "build"); +const COMPAT_LIB_DIR = path.join(BUILD_DIR, "compat-libs"); +const BIN_DIR = path.join(ROOT, "electron", "native", "bin", "win32-x64"); +const CMAKE = process.env.CMAKE_EXE ?? "cmake"; + +function findVcVarsAll() { + const explicit = process.env.VCVARSALL; + if (explicit && fs.existsSync(explicit)) { + return explicit; + } + + const roots = [ + process.env.VSINSTALLDIR, + "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community", + "C:\\Program Files\\Microsoft Visual Studio\\2022\\Professional", + "C:\\Program Files\\Microsoft Visual Studio\\2022\\Enterprise", + "C:\\Program Files (x86)\\Microsoft Visual Studio\\2022\\BuildTools", + "C:\\Program Files (x86)\\Microsoft Visual Studio\\2022\\Community", + ]; + + for (const root of roots.filter(Boolean)) { + const candidate = path.join(root, "VC", "Auxiliary", "Build", "vcvarsall.bat"); + if (fs.existsSync(candidate)) { + return candidate; + } + } + + return null; +} + +function run(command, args, options = {}) { + return new Promise((resolve, reject) => { + const child = spawn(command, args, { + cwd: ROOT, + stdio: "inherit", + windowsHide: true, + ...options, + }); + child.once("error", reject); + child.once("exit", (code) => { + if (code === 0) { + resolve(); + } else { + reject(new Error(`${command} ${args.join(" ")} failed with code ${code}`)); + } + }); + }); +} + +async function runInVsEnv(command) { + const vcvarsAll = findVcVarsAll(); + if (!vcvarsAll) { + throw new Error( + "Could not find Visual Studio vcvarsall.bat. Install Visual Studio Build Tools with C++.", + ); + } + + const cmdPath = path.join(os.tmpdir(), `openscreen-build-wgc-${process.pid}-${Date.now()}.cmd`); + fs.writeFileSync( + cmdPath, + [ + "@echo off", + `call "${vcvarsAll}" x64`, + "if errorlevel 1 exit /b %errorlevel%", + `if not exist "${COMPAT_LIB_DIR}" mkdir "${COMPAT_LIB_DIR}"`, + `for %%L in (gdi32.lib winspool.lib shell32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib) do if not exist "%WindowsSdkDir%Lib\\%WindowsSDKLibVersion%um\\x64\\%%L" copy /Y "%WindowsSdkDir%Lib\\%WindowsSDKLibVersion%um\\x64\\kernel32.Lib" "${COMPAT_LIB_DIR}\\%%L" >nul`, + "if errorlevel 1 exit /b %errorlevel%", + `set "LIB=${COMPAT_LIB_DIR};%LIB%"`, + command, + "exit /b %errorlevel%", + "", + ].join("\r\n"), + ); + try { + await run("cmd.exe", ["/d", "/c", cmdPath]); + } finally { + fs.rmSync(cmdPath, { force: true }); + } +} + +if (process.platform !== "win32") { + console.log("Skipping WGC helper build: Windows-only."); + process.exit(0); +} + +fs.mkdirSync(BUILD_DIR, { recursive: true }); + +await runInVsEnv( + `"${CMAKE}" -S "${SOURCE_DIR}" -B "${BUILD_DIR}" -G Ninja -DCMAKE_BUILD_TYPE=Release`, +); +await runInVsEnv(`"${CMAKE}" --build "${BUILD_DIR}" --config Release`); + +const outputPath = path.join(BUILD_DIR, "wgc-capture.exe"); +if (!fs.existsSync(outputPath)) { + throw new Error(`WGC helper build completed but ${outputPath} was not found.`); +} + +fs.mkdirSync(BIN_DIR, { recursive: true }); +const distributablePath = path.join(BIN_DIR, "wgc-capture.exe"); +fs.copyFileSync(outputPath, distributablePath); + +console.log(`Built ${outputPath}`); +console.log(`Copied ${distributablePath}`); diff --git a/scripts/test-windows-wgc-helper.mjs b/scripts/test-windows-wgc-helper.mjs new file mode 100644 index 0000000..627c0ca --- /dev/null +++ b/scripts/test-windows-wgc-helper.mjs @@ -0,0 +1,167 @@ +import { spawn, spawnSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const ROOT = path.join(__dirname, ".."); +const HELPER_PATH = + process.env.OPENSCREEN_WGC_CAPTURE_EXE ?? + path.join(ROOT, "electron", "native", "bin", "win32-x64", "wgc-capture.exe"); + +const DURATION_MS = Number(process.env.OPENSCREEN_WGC_TEST_DURATION_MS ?? 5000); +const WITH_SYSTEM_AUDIO = + process.env.OPENSCREEN_WGC_TEST_SYSTEM_AUDIO === "true" || + process.argv.includes("--system-audio"); + +function runHelper(config) { + return new Promise((resolve, reject) => { + const child = spawn(HELPER_PATH, [JSON.stringify(config)], { + stdio: ["pipe", "pipe", "pipe"], + windowsHide: true, + }); + + let stdout = ""; + let stderr = ""; + + child.stdout.on("data", (chunk) => { + stdout += chunk.toString(); + }); + child.stderr.on("data", (chunk) => { + stderr += chunk.toString(); + }); + child.once("error", reject); + child.once("exit", (code) => { + resolve({ code, stdout, stderr }); + }); + + setTimeout(() => { + child.stdin.write("stop\n"); + }, DURATION_MS); + }); +} + +function probeStreams(outputPath) { + const ffprobe = spawnSync( + "ffprobe", + ["-v", "error", "-show_streams", "-of", "json", outputPath], + { encoding: "utf8", windowsHide: true }, + ); + if (ffprobe.status !== 0) { + throw new Error(`ffprobe failed: ${ffprobe.stderr || ffprobe.stdout}`); + } + return JSON.parse(ffprobe.stdout).streams ?? []; +} + +function measureFirstFrameLuma(outputPath) { + const ffmpeg = spawnSync( + "ffmpeg", + [ + "-v", + "error", + "-i", + outputPath, + "-frames:v", + "1", + "-f", + "rawvideo", + "-pix_fmt", + "gray", + "pipe:1", + ], + { windowsHide: true, maxBuffer: 64 * 1024 * 1024 }, + ); + if (ffmpeg.status !== 0) { + throw new Error(`ffmpeg frame extraction failed: ${ffmpeg.stderr?.toString() ?? ""}`); + } + const data = ffmpeg.stdout; + if (!data || data.length === 0) { + throw new Error(`ffmpeg did not return frame data for ${outputPath}`); + } + let sum = 0; + let max = 0; + for (const value of data) { + sum += value; + if (value > max) { + max = value; + } + } + return { average: sum / data.length, max }; +} + +if (process.platform !== "win32") { + console.log("Skipping WGC helper smoke test: Windows-only."); + process.exit(0); +} + +if (!fs.existsSync(HELPER_PATH)) { + throw new Error(`WGC helper not found at ${HELPER_PATH}. Run npm run build:native:win first.`); +} + +const outputPath = path.join( + os.tmpdir(), + `openscreen-wgc-helper-${WITH_SYSTEM_AUDIO ? "audio" : "video"}-${Date.now()}.mp4`, +); + +const config = { + schemaVersion: 2, + recordingId: Date.now(), + outputPath, + sourceType: "display", + sourceId: "screen:0:0", + displayId: 0, + fps: 30, + videoWidth: 1280, + videoHeight: 720, + displayX: 0, + displayY: 0, + displayW: 1920, + displayH: 1080, + hasDisplayBounds: true, + captureSystemAudio: WITH_SYSTEM_AUDIO, + captureMic: false, + webcamEnabled: false, + outputs: { screenPath: outputPath }, +}; + +const result = await runHelper(config); +if (result.code !== 0) { + throw new Error(`WGC helper exited with ${result.code}\n${result.stdout}\n${result.stderr}`); +} +if (!fs.existsSync(outputPath) || fs.statSync(outputPath).size === 0) { + throw new Error(`WGC helper did not produce a video at ${outputPath}`); +} + +const streams = probeStreams(outputPath); +const hasVideo = streams.some((stream) => stream.codec_type === "video"); +const hasAudio = streams.some((stream) => stream.codec_type === "audio"); +if (!hasVideo) { + throw new Error(`WGC helper output has no video stream: ${outputPath}`); +} +if (WITH_SYSTEM_AUDIO && !hasAudio) { + throw new Error(`WGC helper output has no audio stream: ${outputPath}`); +} +const frameLuma = measureFirstFrameLuma(outputPath); +if (frameLuma.average < 1 && frameLuma.max < 5) { + throw new Error(`WGC helper output first frame is black: ${outputPath}`); +} + +console.log( + JSON.stringify( + { + success: true, + outputPath, + bytes: fs.statSync(outputPath).size, + streams: streams.map((stream) => ({ + index: stream.index, + codecType: stream.codec_type, + codecName: stream.codec_name, + duration: stream.duration, + })), + firstFrameLuma: frameLuma, + }, + null, + 2, + ), +); diff --git a/src/components/video-editor/VideoPlayback.tsx b/src/components/video-editor/VideoPlayback.tsx index 432958a..3e87660 100644 --- a/src/components/video-editor/VideoPlayback.tsx +++ b/src/components/video-editor/VideoPlayback.tsx @@ -1832,7 +1832,6 @@ const VideoPlayback = forwardRef( src={videoPath} className="hidden" preload="auto" - muted playsInline onLoadedMetadata={handleLoadedMetadata} onDurationChange={(e) => { diff --git a/src/hooks/useScreenRecorder.ts b/src/hooks/useScreenRecorder.ts index 717a6cd..1ae9d22 100644 --- a/src/hooks/useScreenRecorder.ts +++ b/src/hooks/useScreenRecorder.ts @@ -2,6 +2,7 @@ import { fixWebmDuration } from "@fix-webm-duration/fix"; import { useCallback, useEffect, useRef, useState } from "react"; import { toast } from "sonner"; import { useScopedT } from "@/contexts/I18nContext"; +import type { NativeWindowsRecordingRequest } from "@/lib/nativeWindowsRecording"; import { requestCameraAccess } from "@/lib/requestCameraAccess"; const TARGET_FRAME_RATE = 60; @@ -62,6 +63,11 @@ type RecorderHandle = { recordedBlobPromise: Promise; }; +type NativeWindowsRecordingHandle = { + recordingId: number; + finalizing: boolean; +}; + function createRecorderHandle(stream: MediaStream, options: MediaRecorderOptions): RecorderHandle { const recorder = new MediaRecorder(stream, options); const chunks: Blob[] = []; @@ -96,6 +102,7 @@ export function useScreenRecorder(): UseScreenRecorderReturn { const [webcamEnabled, setWebcamEnabledState] = useState(false); const screenRecorder = useRef(null); const webcamRecorder = useRef(null); + const nativeWindowsRecording = useRef(null); const stream = useRef(null); const screenStream = useRef(null); const microphoneStream = useRef(null); @@ -365,7 +372,58 @@ export function useScreenRecorder(): UseScreenRecorderReturn { [teardownMedia], ); + const finalizeNativeWindowsRecording = useCallback(async (discard = false) => { + const activeNativeRecording = nativeWindowsRecording.current; + if (!activeNativeRecording || activeNativeRecording.finalizing) { + return false; + } + + activeNativeRecording.finalizing = true; + nativeWindowsRecording.current = null; + setRecording(false); + setPaused(false); + setElapsedSeconds(0); + accumulatedDurationMs.current = 0; + segmentStartedAt.current = null; + + try { + const result = await window.electronAPI.stopNativeWindowsRecording(discard); + if (discard || result.discarded) { + return true; + } + if (!result.success) { + console.error("Failed to stop native Windows recording:", result.error); + toast.error(result.error ?? "Failed to stop native Windows recording"); + return true; + } + + if (result.session) { + await window.electronAPI.setCurrentRecordingSession(result.session); + } else if (result.path) { + await window.electronAPI.setCurrentVideoPath(result.path); + } + + await window.electronAPI.switchToEditor(); + return true; + } catch (error) { + console.error("Error saving native Windows recording:", error); + toast.error( + error instanceof Error ? error.message : "Failed to save native Windows recording", + ); + return true; + } finally { + if (discardRecordingId.current === activeNativeRecording.recordingId) { + discardRecordingId.current = null; + } + } + }, []); + const stopRecording = useRef(() => { + if (nativeWindowsRecording.current) { + void finalizeNativeWindowsRecording(false); + return; + } + const activeScreenRecorder = screenRecorder.current; if (!activeScreenRecorder) { return; @@ -431,6 +489,9 @@ export function useScreenRecorder(): UseScreenRecorderReturn { allowAutoFinalize.current = false; restarting.current = false; discardRecordingId.current = null; + if (nativeWindowsRecording.current) { + void finalizeNativeWindowsRecording(true); + } if ( screenRecorder.current?.recorder.state === "recording" || @@ -456,7 +517,7 @@ export function useScreenRecorder(): UseScreenRecorderReturn { webcamRecorder.current = null; teardownMedia(); }; - }, [teardownMedia, safeHideCountdownOverlay]); + }, [teardownMedia, safeHideCountdownOverlay, finalizeNativeWindowsRecording]); const safeShowCountdownOverlay = async (value: number, runId: number) => { try { @@ -486,6 +547,85 @@ export function useScreenRecorder(): UseScreenRecorderReturn { const isCountdownRunActive = (runId?: number) => runId === undefined || countdownRunId.current === runId; + const startNativeWindowsRecordingIfAvailable = async ( + selectedSource: ProcessedDesktopSource, + countdownRunToken?: number, + ) => { + try { + const platform = await window.electronAPI.getPlatform(); + if (platform !== "win32") { + return false; + } + + const availability = await window.electronAPI.isNativeWindowsCaptureAvailable(); + if (!availability.success || !availability.available) { + throw new Error( + availability.reason === "missing-helper" + ? "Native Windows capture helper is not available." + : (availability.error ?? "Native Windows capture is not available."), + ); + } + + if (!isCountdownRunActive(countdownRunToken)) { + return true; + } + + const activeRecordingId = Date.now(); + const displayId = Number(selectedSource.display_id); + const sourceType = selectedSource.id.startsWith("window:") ? "window" : "display"; + const request: NativeWindowsRecordingRequest = { + recordingId: activeRecordingId, + source: { + type: sourceType, + sourceId: selectedSource.id, + ...(Number.isFinite(displayId) ? { displayId } : {}), + }, + video: { + fps: TARGET_FRAME_RATE, + width: TARGET_WIDTH, + height: TARGET_HEIGHT, + }, + audio: { + system: { + enabled: systemAudioEnabled, + }, + microphone: { + enabled: microphoneEnabled, + deviceId: microphoneDeviceId, + gain: MIC_GAIN_BOOST, + }, + }, + webcam: { + enabled: webcamEnabled, + deviceId: webcamDeviceId, + width: WEBCAM_TARGET_WIDTH, + height: WEBCAM_TARGET_HEIGHT, + fps: WEBCAM_TARGET_FRAME_RATE, + }, + }; + const result = await window.electronAPI.startNativeWindowsRecording(request); + if (!result.success || !result.recordingId) { + throw new Error(result.error ?? "Native Windows capture failed."); + } + + recordingId.current = result.recordingId; + nativeWindowsRecording.current = { + recordingId: result.recordingId, + finalizing: false, + }; + accumulatedDurationMs.current = 0; + segmentStartedAt.current = result.recordingId; + allowAutoFinalize.current = true; + setRecording(true); + setPaused(false); + setElapsedSeconds(0); + return true; + } catch (error) { + console.error("Native Windows capture failed:", error); + throw error; + } + }; + const startRecordCountdown = async () => { if (countdownActive || recording) { return; @@ -573,6 +713,10 @@ export function useScreenRecorder(): UseScreenRecorderReturn { return; } + if (await startNativeWindowsRecordingIfAvailable(selectedSource, countdownRunToken)) { + return; + } + let screenMediaStream: MediaStream; // getDisplayMedia + setDisplayMediaRequestHandler (main.ts) supplies the @@ -846,6 +990,19 @@ export function useScreenRecorder(): UseScreenRecorderReturn { const restartRecording = async () => { if (restarting.current) return; + if (nativeWindowsRecording.current) { + const activeRecordingId = recordingId.current; + restarting.current = true; + discardRecordingId.current = activeRecordingId; + try { + await finalizeNativeWindowsRecording(true); + await startRecording(); + } finally { + restarting.current = false; + } + return; + } + const activeScreenRecorder = screenRecorder.current; if (!activeScreenRecorder || activeScreenRecorder.recorder.state === "inactive") return; @@ -903,6 +1060,14 @@ export function useScreenRecorder(): UseScreenRecorderReturn { }, [getRecordingDurationMs, paused, recording]); const cancelRecording = () => { + if (nativeWindowsRecording.current) { + const activeRecordingId = recordingId.current; + discardRecordingId.current = activeRecordingId; + allowAutoFinalize.current = false; + void finalizeNativeWindowsRecording(true); + return; + } + const activeScreenRecorder = screenRecorder.current; if ( activeScreenRecorder?.recorder.state === "recording" || diff --git a/src/lib/cursor/nativeCursor.ts b/src/lib/cursor/nativeCursor.ts index 3b62cb8..04ebccd 100644 --- a/src/lib/cursor/nativeCursor.ts +++ b/src/lib/cursor/nativeCursor.ts @@ -54,8 +54,8 @@ const PRETTY_NATIVE_CURSOR_ASSETS: Partial Date: Tue, 5 May 2026 16:19:34 +0200 Subject: [PATCH 12/43] feat: add native Windows microphone capture --- .../windows-native-recorder-roadmap.md | 4 + electron/native/README.md | 6 +- electron/native/wgc-capture/CMakeLists.txt | 2 + .../wgc-capture/src/audio_sample_utils.cpp | 128 ++++++++++++++++++ .../wgc-capture/src/audio_sample_utils.h | 20 +++ electron/native/wgc-capture/src/main.cpp | 93 +++++++++++-- .../src/wasapi_loopback_capture.cpp | 33 ++++- .../wgc-capture/src/wasapi_loopback_capture.h | 10 +- package.json | 2 + scripts/test-windows-wgc-helper.mjs | 12 +- 10 files changed, 290 insertions(+), 20 deletions(-) create mode 100644 electron/native/wgc-capture/src/audio_sample_utils.cpp create mode 100644 electron/native/wgc-capture/src/audio_sample_utils.h diff --git a/docs/engineering/windows-native-recorder-roadmap.md b/docs/engineering/windows-native-recorder-roadmap.md index c6a0a06..2fb6ab5 100644 --- a/docs/engineering/windows-native-recorder-roadmap.md +++ b/docs/engineering/windows-native-recorder-roadmap.md @@ -138,6 +138,8 @@ SSOT rules for this phase: ### 3. WASAPI Microphone +Status: initial implementation in progress. The helper can open the default WASAPI capture endpoint, apply the OpenScreen microphone gain, encode mic-only audio, and mix mic into system-loopback packets when both endpoints expose the same runtime format. Browser `deviceId` to MMDevice id mapping, resampling between mismatched endpoint formats, and drift correction remain follow-up hardening work. + - Add microphone device enumeration and stable device-id mapping. - Capture selected/default microphone through WASAPI. - Apply OpenScreen's current mic gain policy. @@ -192,6 +194,8 @@ Acceptance: - `npm run test:wgc-webcam:win`: validates webcam output when a webcam is available, otherwise skips explicitly. - Packaging check: confirms the helper is in `app.asar.unpacked`. - Export check: exported MP4s generated from native recordings keep an AAC audio track when the source has audio. +- `npm run test:wgc-mic:win`: validates default-microphone capture writes an AAC track when an input endpoint is available. +- `npm run test:wgc-mixed-audio:win`: validates system loopback plus microphone writes one mixed AAC track when endpoint formats are compatible. ## Ship Criteria diff --git a/electron/native/README.md b/electron/native/README.md index 512517b..5df7290 100644 --- a/electron/native/README.md +++ b/electron/native/README.md @@ -32,6 +32,8 @@ Current V2 JSON shape: "fps": 60, "captureSystemAudio": false, "captureMic": false, + "microphoneDeviceId": "default", + "microphoneGain": 1.4, "webcamEnabled": false, "outputs": { "screenPath": "C:\\path\\recording-123.mp4", @@ -40,11 +42,13 @@ Current V2 JSON shape: } ``` -The current helper implementation supports display video capture and system audio loopback. Microphone, webcam, and window capture now fail explicitly in the helper rather than silently falling back to Electron capture on Windows. See `docs/engineering/windows-native-recorder-roadmap.md` for the phased implementation plan. +The current helper implementation supports display video capture, system audio loopback, and initial default-microphone capture. Webcam and window capture now fail explicitly in the helper rather than silently falling back to Electron capture on Windows. See `docs/engineering/windows-native-recorder-roadmap.md` for the phased implementation plan. Smoke-test the helper with: ```powershell npm run test:wgc-helper:win npm run test:wgc-audio:win +npm run test:wgc-mic:win +npm run test:wgc-mixed-audio:win ``` diff --git a/electron/native/wgc-capture/CMakeLists.txt b/electron/native/wgc-capture/CMakeLists.txt index 76999f7..b21fd66 100644 --- a/electron/native/wgc-capture/CMakeLists.txt +++ b/electron/native/wgc-capture/CMakeLists.txt @@ -14,6 +14,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) add_executable(wgc-capture + src/audio_sample_utils.cpp + src/audio_sample_utils.h src/main.cpp src/mf_encoder.cpp src/mf_encoder.h diff --git a/electron/native/wgc-capture/src/audio_sample_utils.cpp b/electron/native/wgc-capture/src/audio_sample_utils.cpp new file mode 100644 index 0000000..6537d8e --- /dev/null +++ b/electron/native/wgc-capture/src/audio_sample_utils.cpp @@ -0,0 +1,128 @@ +#include "audio_sample_utils.h" + +#include + +#include +#include +#include +#include + +namespace { + +bool isFloatFormat(const AudioInputFormat& format) { + return format.subtype == MFAudioFormat_Float && format.bitsPerSample == 32; +} + +bool isPcmFormat(const AudioInputFormat& format, UINT32 bitsPerSample) { + return format.subtype == MFAudioFormat_PCM && format.bitsPerSample == bitsPerSample; +} + +template +T clampTo(double value) { + const double minValue = static_cast(std::numeric_limits::min()); + const double maxValue = static_cast(std::numeric_limits::max()); + return static_cast(std::clamp(std::round(value), minValue, maxValue)); +} + +} // namespace + +bool sameAudioFormatForMixing(const AudioInputFormat& left, const AudioInputFormat& right) { + return left.subtype == right.subtype && + left.sampleRate == right.sampleRate && + left.channels == right.channels && + left.bitsPerSample == right.bitsPerSample && + left.blockAlign == right.blockAlign && + left.avgBytesPerSec == right.avgBytesPerSec; +} + +void copyAudioWithGain( + const BYTE* source, + DWORD byteCount, + const AudioInputFormat& format, + double gain, + std::vector& destination) { + destination.resize(byteCount); + if (!source || byteCount == 0) { + return; + } + + if (std::abs(gain - 1.0) < 0.0001) { + std::memcpy(destination.data(), source, byteCount); + return; + } + + if (isFloatFormat(format)) { + const auto* input = reinterpret_cast(source); + auto* output = reinterpret_cast(destination.data()); + const size_t sampleCount = byteCount / sizeof(float); + for (size_t index = 0; index < sampleCount; index += 1) { + output[index] = static_cast(std::clamp(input[index] * gain, -1.0, 1.0)); + } + return; + } + + if (isPcmFormat(format, 16)) { + const auto* input = reinterpret_cast(source); + auto* output = reinterpret_cast(destination.data()); + const size_t sampleCount = byteCount / sizeof(int16_t); + for (size_t index = 0; index < sampleCount; index += 1) { + output[index] = clampTo(static_cast(input[index]) * gain); + } + return; + } + + if (isPcmFormat(format, 32)) { + const auto* input = reinterpret_cast(source); + auto* output = reinterpret_cast(destination.data()); + const size_t sampleCount = byteCount / sizeof(int32_t); + for (size_t index = 0; index < sampleCount; index += 1) { + output[index] = clampTo(static_cast(input[index]) * gain); + } + return; + } + + std::memcpy(destination.data(), source, byteCount); +} + +void mixAudioInPlace( + std::vector& destination, + const BYTE* source, + DWORD byteCount, + const AudioInputFormat& format) { + if (!source || byteCount == 0 || destination.empty()) { + return; + } + + const size_t mixByteCount = std::min(destination.size(), static_cast(byteCount)); + + if (isFloatFormat(format)) { + auto* output = reinterpret_cast(destination.data()); + const auto* input = reinterpret_cast(source); + const size_t sampleCount = mixByteCount / sizeof(float); + for (size_t index = 0; index < sampleCount; index += 1) { + output[index] = static_cast(std::clamp(output[index] + input[index], -1.0f, 1.0f)); + } + return; + } + + if (isPcmFormat(format, 16)) { + auto* output = reinterpret_cast(destination.data()); + const auto* input = reinterpret_cast(source); + const size_t sampleCount = mixByteCount / sizeof(int16_t); + for (size_t index = 0; index < sampleCount; index += 1) { + output[index] = clampTo( + static_cast(output[index]) + static_cast(input[index])); + } + return; + } + + if (isPcmFormat(format, 32)) { + auto* output = reinterpret_cast(destination.data()); + const auto* input = reinterpret_cast(source); + const size_t sampleCount = mixByteCount / sizeof(int32_t); + for (size_t index = 0; index < sampleCount; index += 1) { + output[index] = clampTo( + static_cast(output[index]) + static_cast(input[index])); + } + } +} diff --git a/electron/native/wgc-capture/src/audio_sample_utils.h b/electron/native/wgc-capture/src/audio_sample_utils.h new file mode 100644 index 0000000..8022ae3 --- /dev/null +++ b/electron/native/wgc-capture/src/audio_sample_utils.h @@ -0,0 +1,20 @@ +#pragma once + +#include "mf_encoder.h" + +#include + +#include + +bool sameAudioFormatForMixing(const AudioInputFormat& left, const AudioInputFormat& right); +void copyAudioWithGain( + const BYTE* source, + DWORD byteCount, + const AudioInputFormat& format, + double gain, + std::vector& destination); +void mixAudioInPlace( + std::vector& destination, + const BYTE* source, + DWORD byteCount, + const AudioInputFormat& format); diff --git a/electron/native/wgc-capture/src/main.cpp b/electron/native/wgc-capture/src/main.cpp index 39d5c62..603fda3 100644 --- a/electron/native/wgc-capture/src/main.cpp +++ b/electron/native/wgc-capture/src/main.cpp @@ -1,3 +1,4 @@ +#include "audio_sample_utils.h" #include "mf_encoder.h" #include "monitor_utils.h" #include "wasapi_loopback_capture.h" @@ -273,11 +274,6 @@ int main(int argc, char* argv[]) { return 1; } - if (config.captureMic) { - std::cerr << "ERROR: Microphone capture is not implemented in this helper yet" << std::endl; - return 1; - } - if (config.webcamEnabled) { std::cerr << "ERROR: Native webcam capture is not implemented in this helper yet" << std::endl; return 1; @@ -309,16 +305,34 @@ int main(int argc, char* argv[]) { const int bitrate = pixels >= 3840 * 2160 ? 45'000'000 : pixels >= 2560 * 1440 ? 28'000'000 : 18'000'000; WasapiLoopbackCapture loopbackCapture; + WasapiLoopbackCapture microphoneCapture; const AudioInputFormat* audioFormat = nullptr; if (config.captureSystemAudio) { - if (!loopbackCapture.initialize()) { + if (!loopbackCapture.initializeSystemLoopback()) { std::cerr << "ERROR: Failed to initialize WASAPI loopback capture" << std::endl; return 1; } audioFormat = &loopbackCapture.inputFormat(); - std::cout << "{\"event\":\"audio-format\",\"schemaVersion\":2,\"sampleRate\":" - << audioFormat->sampleRate << ",\"channels\":" << audioFormat->channels - << ",\"bitsPerSample\":" << audioFormat->bitsPerSample << "}" << std::endl; + } + if (config.captureMic) { + if (!microphoneCapture.initializeMicrophone(utf8ToWide(config.microphoneDeviceId))) { + std::cerr << "ERROR: Failed to initialize WASAPI microphone capture" << std::endl; + return 1; + } + if (!audioFormat) { + audioFormat = µphoneCapture.inputFormat(); + } else if (!sameAudioFormatForMixing(*audioFormat, microphoneCapture.inputFormat())) { + std::cerr << "ERROR: System audio and microphone formats differ; native mixing is not supported yet" + << std::endl; + return 1; + } + } + if (audioFormat) { + std::cout << "{\"event\":\"audio-format\",\"schemaVersion\":2,\"sampleRate\":" << audioFormat->sampleRate + << ",\"channels\":" << audioFormat->channels + << ",\"bitsPerSample\":" << audioFormat->bitsPerSample + << ",\"system\":" << (config.captureSystemAudio ? "true" : "false") + << ",\"microphone\":" << (config.captureMic ? "true" : "false") << "}" << std::endl; } MFEncoder encoder; @@ -358,24 +372,81 @@ int main(int argc, char* argv[]) { } }); + std::mutex microphoneAudioMutex; + std::vector latestMicrophoneAudio; + std::vector mixedAudioBuffer; + std::vector microphoneGainBuffer; + + if (config.captureMic) { + if (!microphoneCapture.start([&](const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns) { + if (stopRequested || !audioFormat) { + return; + } + + copyAudioWithGain( + data, + byteCount, + microphoneCapture.inputFormat(), + config.microphoneGain, + microphoneGainBuffer); + + if (config.captureSystemAudio) { + std::scoped_lock lock(microphoneAudioMutex); + latestMicrophoneAudio = microphoneGainBuffer; + return; + } + + if (!encoder.writeAudio( + microphoneGainBuffer.data(), + static_cast(microphoneGainBuffer.size()), + timestampHns, + durationHns)) { + encodeFailed = true; + stopRequested = true; + cv.notify_all(); + } + })) { + std::cerr << "ERROR: Failed to start WASAPI microphone capture" << std::endl; + return 1; + } + } + if (config.captureSystemAudio) { if (!loopbackCapture.start([&](const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns) { if (stopRequested) { return; } - if (!encoder.writeAudio(data, byteCount, timestampHns, durationHns)) { + const BYTE* encodedData = data; + DWORD encodedByteCount = byteCount; + if (config.captureMic && audioFormat) { + mixedAudioBuffer.assign(data, data + byteCount); + { + std::scoped_lock lock(microphoneAudioMutex); + mixAudioInPlace( + mixedAudioBuffer, + latestMicrophoneAudio.data(), + static_cast(latestMicrophoneAudio.size()), + *audioFormat); + } + encodedData = mixedAudioBuffer.data(); + encodedByteCount = static_cast(mixedAudioBuffer.size()); + } + + if (!encoder.writeAudio(encodedData, encodedByteCount, timestampHns, durationHns)) { encodeFailed = true; stopRequested = true; cv.notify_all(); } })) { std::cerr << "ERROR: Failed to start WASAPI loopback capture" << std::endl; + microphoneCapture.stop(); return 1; } } if (!session.start()) { + microphoneCapture.stop(); loopbackCapture.stop(); std::cerr << "ERROR: Failed to start WGC session" << std::endl; return 1; @@ -394,6 +465,7 @@ int main(int argc, char* argv[]) { if (stdinThread.joinable()) { stdinThread.detach(); } + microphoneCapture.stop(); loopbackCapture.stop(); std::cerr << "ERROR: Timed out waiting for first WGC frame" << std::endl; return 1; @@ -410,6 +482,7 @@ int main(int argc, char* argv[]) { }); } + microphoneCapture.stop(); loopbackCapture.stop(); session.stop(); { diff --git a/electron/native/wgc-capture/src/wasapi_loopback_capture.cpp b/electron/native/wgc-capture/src/wasapi_loopback_capture.cpp index e4f254e..4e350a2 100644 --- a/electron/native/wgc-capture/src/wasapi_loopback_capture.cpp +++ b/electron/native/wgc-capture/src/wasapi_loopback_capture.cpp @@ -51,7 +51,15 @@ WasapiLoopbackCapture::~WasapiLoopbackCapture() { } } -bool WasapiLoopbackCapture::initialize() { +bool WasapiLoopbackCapture::initializeSystemLoopback() { + return initialize(WasapiCaptureEndpoint::SystemLoopback, {}); +} + +bool WasapiLoopbackCapture::initializeMicrophone(const std::wstring& deviceId) { + return initialize(WasapiCaptureEndpoint::Microphone, deviceId); +} + +bool WasapiLoopbackCapture::initialize(WasapiCaptureEndpoint endpoint, const std::wstring& deviceId) { HRESULT hr = CoCreateInstance( __uuidof(MMDeviceEnumerator), nullptr, @@ -61,9 +69,22 @@ bool WasapiLoopbackCapture::initialize() { return false; } - hr = deviceEnumerator_->GetDefaultAudioEndpoint(eRender, eConsole, &device_); - if (!succeeded(hr, "GetDefaultAudioEndpoint(render)")) { - return false; + if (endpoint == WasapiCaptureEndpoint::Microphone && !deviceId.empty() && deviceId != L"default") { + hr = deviceEnumerator_->GetDevice(deviceId.c_str(), &device_); + if (FAILED(hr)) { + std::wcerr << L"WARNING: Could not resolve microphone device id; using default capture endpoint" + << std::endl; + device_.Reset(); + } + } + + if (!device_) { + const EDataFlow flow = + endpoint == WasapiCaptureEndpoint::SystemLoopback ? eRender : eCapture; + hr = deviceEnumerator_->GetDefaultAudioEndpoint(flow, eConsole, &device_); + if (!succeeded(hr, "GetDefaultAudioEndpoint")) { + return false; + } } hr = device_->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr, &audioClient_); @@ -81,9 +102,11 @@ bool WasapiLoopbackCapture::initialize() { return false; } + const DWORD streamFlags = + endpoint == WasapiCaptureEndpoint::SystemLoopback ? AUDCLNT_STREAMFLAGS_LOOPBACK : 0; hr = audioClient_->Initialize( AUDCLNT_SHAREMODE_SHARED, - AUDCLNT_STREAMFLAGS_LOOPBACK, + streamFlags, BufferDurationHns, 0, mixFormat_, diff --git a/electron/native/wgc-capture/src/wasapi_loopback_capture.h b/electron/native/wgc-capture/src/wasapi_loopback_capture.h index e6fb7e8..8d2dbb9 100644 --- a/electron/native/wgc-capture/src/wasapi_loopback_capture.h +++ b/electron/native/wgc-capture/src/wasapi_loopback_capture.h @@ -10,9 +10,15 @@ #include #include #include +#include #include #include +enum class WasapiCaptureEndpoint { + SystemLoopback, + Microphone, +}; + class WasapiLoopbackCapture { public: using AudioCallback = std::function; @@ -23,13 +29,15 @@ public: WasapiLoopbackCapture(const WasapiLoopbackCapture&) = delete; WasapiLoopbackCapture& operator=(const WasapiLoopbackCapture&) = delete; - bool initialize(); + bool initializeSystemLoopback(); + bool initializeMicrophone(const std::wstring& deviceId); bool start(AudioCallback callback); void stop(); const AudioInputFormat& inputFormat() const; private: + bool initialize(WasapiCaptureEndpoint endpoint, const std::wstring& deviceId); void captureLoop(); bool resolveInputFormat(WAVEFORMATEX* mixFormat); diff --git a/package.json b/package.json index 4311f6e..9114207 100644 --- a/package.json +++ b/package.json @@ -29,6 +29,8 @@ "test:cursor-native:win": "node scripts/test-windows-native-cursor.mjs", "test:wgc-helper:win": "node scripts/test-windows-wgc-helper.mjs", "test:wgc-audio:win": "node scripts/test-windows-wgc-helper.mjs --system-audio", + "test:wgc-mic:win": "node scripts/test-windows-wgc-helper.mjs --microphone", + "test:wgc-mixed-audio:win": "node scripts/test-windows-wgc-helper.mjs --system-audio --microphone", "capture:openscreen-preview": "node scripts/capture-openscreen-preview.mjs", "build-vite": "tsc && vite build", "test:browser": "vitest --config vitest.browser.config.ts --run", diff --git a/scripts/test-windows-wgc-helper.mjs b/scripts/test-windows-wgc-helper.mjs index 627c0ca..45dab7d 100644 --- a/scripts/test-windows-wgc-helper.mjs +++ b/scripts/test-windows-wgc-helper.mjs @@ -14,6 +14,10 @@ const DURATION_MS = Number(process.env.OPENSCREEN_WGC_TEST_DURATION_MS ?? 5000); const WITH_SYSTEM_AUDIO = process.env.OPENSCREEN_WGC_TEST_SYSTEM_AUDIO === "true" || process.argv.includes("--system-audio"); +const WITH_MICROPHONE = + process.env.OPENSCREEN_WGC_TEST_MICROPHONE === "true" || + process.argv.includes("--microphone") || + process.argv.includes("--mic"); function runHelper(config) { return new Promise((resolve, reject) => { @@ -101,7 +105,7 @@ if (!fs.existsSync(HELPER_PATH)) { const outputPath = path.join( os.tmpdir(), - `openscreen-wgc-helper-${WITH_SYSTEM_AUDIO ? "audio" : "video"}-${Date.now()}.mp4`, + `openscreen-wgc-helper-${WITH_SYSTEM_AUDIO || WITH_MICROPHONE ? "audio" : "video"}-${Date.now()}.mp4`, ); const config = { @@ -120,7 +124,9 @@ const config = { displayH: 1080, hasDisplayBounds: true, captureSystemAudio: WITH_SYSTEM_AUDIO, - captureMic: false, + captureMic: WITH_MICROPHONE, + microphoneDeviceId: "default", + microphoneGain: 1.4, webcamEnabled: false, outputs: { screenPath: outputPath }, }; @@ -139,7 +145,7 @@ const hasAudio = streams.some((stream) => stream.codec_type === "audio"); if (!hasVideo) { throw new Error(`WGC helper output has no video stream: ${outputPath}`); } -if (WITH_SYSTEM_AUDIO && !hasAudio) { +if ((WITH_SYSTEM_AUDIO || WITH_MICROPHONE) && !hasAudio) { throw new Error(`WGC helper output has no audio stream: ${outputPath}`); } const frameLuma = measureFirstFrameLuma(outputPath); From 7929aea908c1b7dec70fe748a4905fe606492b77 Mon Sep 17 00:00:00 2001 From: EtienneLescot Date: Tue, 5 May 2026 16:49:07 +0200 Subject: [PATCH 13/43] fix: align native mixed audio timeline --- .../windows-native-recorder-roadmap.md | 2 +- .../wgc-capture/src/audio_sample_utils.cpp | 163 ++++++++++++++++++ .../wgc-capture/src/audio_sample_utils.h | 48 ++++++ electron/native/wgc-capture/src/main.cpp | 130 +++++++------- scripts/test-windows-wgc-helper.mjs | 3 +- 5 files changed, 283 insertions(+), 63 deletions(-) diff --git a/docs/engineering/windows-native-recorder-roadmap.md b/docs/engineering/windows-native-recorder-roadmap.md index 2fb6ab5..12c6d49 100644 --- a/docs/engineering/windows-native-recorder-roadmap.md +++ b/docs/engineering/windows-native-recorder-roadmap.md @@ -138,7 +138,7 @@ SSOT rules for this phase: ### 3. WASAPI Microphone -Status: initial implementation in progress. The helper can open the default WASAPI capture endpoint, apply the OpenScreen microphone gain, encode mic-only audio, and mix mic into system-loopback packets when both endpoints expose the same runtime format. Browser `deviceId` to MMDevice id mapping, resampling between mismatched endpoint formats, and drift correction remain follow-up hardening work. +Status: initial implementation in progress. The helper can open the default WASAPI capture endpoint, apply the OpenScreen microphone gain, encode mic-only audio, and mix system loopback plus microphone through a single queued `AudioMixer` timeline when both endpoints expose the same runtime format. Audio endpoints are warmed before WGC starts, the mixer drops pre-roll and begins its paced timeline on the first encoded video frame, then cuts queued tail audio on stop so the MP4 does not drift past the video. Browser `deviceId` to MMDevice id mapping, resampling between mismatched endpoint formats, and drift correction remain follow-up hardening work. - Add microphone device enumeration and stable device-id mapping. - Capture selected/default microphone through WASAPI. diff --git a/electron/native/wgc-capture/src/audio_sample_utils.cpp b/electron/native/wgc-capture/src/audio_sample_utils.cpp index 6537d8e..1e5e1bb 100644 --- a/electron/native/wgc-capture/src/audio_sample_utils.cpp +++ b/electron/native/wgc-capture/src/audio_sample_utils.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -26,6 +27,8 @@ T clampTo(double value) { } // namespace +constexpr int64_t HnsPerSecond = 10'000'000; + bool sameAudioFormatForMixing(const AudioInputFormat& left, const AudioInputFormat& right) { return left.subtype == right.subtype && left.sampleRate == right.sampleRate && @@ -43,6 +46,7 @@ void copyAudioWithGain( std::vector& destination) { destination.resize(byteCount); if (!source || byteCount == 0) { + std::fill(destination.begin(), destination.end(), static_cast(0)); return; } @@ -126,3 +130,162 @@ void mixAudioInPlace( } } } + +AudioMixer::AudioMixer( + const AudioInputFormat& format, + bool includeSystem, + bool includeMicrophone, + double microphoneGain, + OutputCallback output) + : format_(format), + includeSystem_(includeSystem), + includeMicrophone_(includeMicrophone), + microphoneGain_(microphoneGain), + output_(std::move(output)) {} + +AudioMixer::~AudioMixer() { + stop(); +} + +bool AudioMixer::start() { + if (!output_ || format_.sampleRate == 0 || format_.blockAlign == 0) { + return false; + } + + stopRequested_ = false; + emittedFrames_ = 0; + timelineStarted_ = false; + thread_ = std::thread([this] { + mixLoop(); + }); + return true; +} + +void AudioMixer::beginTimeline() { + { + std::scoped_lock lock(mutex_); + systemQueue_.clear(); + microphoneQueue_.clear(); + emittedFrames_ = 0; + timelineStarted_ = true; + } + cv_.notify_all(); +} + +void AudioMixer::stop() { + stopRequested_ = true; + cv_.notify_all(); + if (thread_.joinable()) { + thread_.join(); + } +} + +void AudioMixer::pushSystem(const BYTE* data, DWORD byteCount) { + if (!includeSystem_ || stopRequested_) { + return; + } + + { + std::scoped_lock lock(mutex_); + append(systemQueue_, data, byteCount, 1.0); + } + cv_.notify_all(); +} + +void AudioMixer::pushMicrophone(const BYTE* data, DWORD byteCount) { + if (!includeMicrophone_ || stopRequested_) { + return; + } + + { + std::scoped_lock lock(mutex_); + append(microphoneQueue_, data, byteCount, microphoneGain_); + } + cv_.notify_all(); +} + +void AudioMixer::append(std::vector& queue, const BYTE* data, DWORD byteCount, double gain) { + if (!data || byteCount == 0) { + return; + } + + copyAudioWithGain(data, byteCount, format_, gain, gainBuffer_); + queue.insert(queue.end(), gainBuffer_.begin(), gainBuffer_.end()); +} + +bool AudioMixer::pop(std::vector& queue, std::vector& chunk, size_t byteCount) { + if (queue.empty()) { + chunk.assign(byteCount, 0); + return false; + } + + chunk.assign(byteCount, 0); + const size_t copiedBytes = std::min(byteCount, queue.size()); + std::memcpy(chunk.data(), queue.data(), copiedBytes); + queue.erase(queue.begin(), queue.begin() + static_cast(copiedBytes)); + return copiedBytes > 0; +} + +void AudioMixer::mixLoop() { + const uint32_t chunkFrames = std::max(1, format_.sampleRate / 100); + const size_t chunkBytes = static_cast(chunkFrames) * format_.blockAlign; + std::vector mixedChunk; + std::vector sourceChunk; + std::chrono::steady_clock::time_point audioClockStart; + bool audioClockStarted = false; + + while (true) { + { + std::unique_lock lock(mutex_); + cv_.wait_for(lock, std::chrono::milliseconds(20), [&] { + const bool hasSystem = !includeSystem_ || systemQueue_.size() >= chunkBytes; + const bool hasMicrophone = !includeMicrophone_ || microphoneQueue_.size() >= chunkBytes; + const bool hasAnySource = !systemQueue_.empty() || !microphoneQueue_.empty(); + return stopRequested_.load() || + (timelineStarted_ && (hasSystem || hasMicrophone) && hasAnySource); + }); + + if (stopRequested_) { + break; + } + if (!timelineStarted_) { + continue; + } + + const bool hasAnyQueuedAudio = !systemQueue_.empty() || !microphoneQueue_.empty(); + if (!hasAnyQueuedAudio) { + continue; + } + + mixedChunk.assign(chunkBytes, 0); + if (includeSystem_) { + pop(systemQueue_, sourceChunk, chunkBytes); + mixAudioInPlace(mixedChunk, sourceChunk.data(), static_cast(sourceChunk.size()), format_); + } + if (includeMicrophone_) { + pop(microphoneQueue_, sourceChunk, chunkBytes); + mixAudioInPlace(mixedChunk, sourceChunk.data(), static_cast(sourceChunk.size()), format_); + } + } + + if (!audioClockStarted) { + audioClockStart = std::chrono::steady_clock::now(); + audioClockStarted = true; + } + + const int64_t timestampHns = + static_cast((emittedFrames_ * HnsPerSecond) / format_.sampleRate); + const int64_t durationHns = + static_cast((static_cast(chunkFrames) * HnsPerSecond) / format_.sampleRate); + if (!output_(mixedChunk.data(), static_cast(mixedChunk.size()), timestampHns, durationHns)) { + stopRequested_ = true; + break; + } + emittedFrames_ += chunkFrames; + + const auto nextDeadline = audioClockStart + + std::chrono::duration_cast( + std::chrono::duration(static_cast(emittedFrames_) / format_.sampleRate)); + std::this_thread::sleep_until(nextDeadline); + } +} diff --git a/electron/native/wgc-capture/src/audio_sample_utils.h b/electron/native/wgc-capture/src/audio_sample_utils.h index 8022ae3..b2b6821 100644 --- a/electron/native/wgc-capture/src/audio_sample_utils.h +++ b/electron/native/wgc-capture/src/audio_sample_utils.h @@ -4,6 +4,12 @@ #include +#include +#include +#include +#include +#include +#include #include bool sameAudioFormatForMixing(const AudioInputFormat& left, const AudioInputFormat& right); @@ -18,3 +24,45 @@ void mixAudioInPlace( const BYTE* source, DWORD byteCount, const AudioInputFormat& format); + +class AudioMixer { +public: + using OutputCallback = std::function; + + AudioMixer( + const AudioInputFormat& format, + bool includeSystem, + bool includeMicrophone, + double microphoneGain, + OutputCallback output); + ~AudioMixer(); + + AudioMixer(const AudioMixer&) = delete; + AudioMixer& operator=(const AudioMixer&) = delete; + + bool start(); + void beginTimeline(); + void stop(); + void pushSystem(const BYTE* data, DWORD byteCount); + void pushMicrophone(const BYTE* data, DWORD byteCount); + +private: + void append(std::vector& queue, const BYTE* data, DWORD byteCount, double gain); + bool pop(std::vector& queue, std::vector& chunk, size_t byteCount); + void mixLoop(); + + AudioInputFormat format_{}; + bool includeSystem_ = false; + bool includeMicrophone_ = false; + double microphoneGain_ = 1.0; + OutputCallback output_; + std::mutex mutex_; + std::condition_variable cv_; + std::vector systemQueue_; + std::vector microphoneQueue_; + std::vector gainBuffer_; + std::thread thread_; + std::atomic stopRequested_ = false; + bool timelineStarted_ = false; + uint64_t emittedFrames_ = 0; +}; diff --git a/electron/native/wgc-capture/src/main.cpp b/electron/native/wgc-capture/src/main.cpp index 603fda3..50e55ef 100644 --- a/electron/native/wgc-capture/src/main.cpp +++ b/electron/native/wgc-capture/src/main.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -372,82 +373,78 @@ int main(int argc, char* argv[]) { } }); - std::mutex microphoneAudioMutex; - std::vector latestMicrophoneAudio; - std::vector mixedAudioBuffer; - std::vector microphoneGainBuffer; + std::unique_ptr audioMixer; + auto startAudioCaptures = [&]() -> bool { + if (!audioFormat) { + return true; + } - if (config.captureMic) { - if (!microphoneCapture.start([&](const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns) { - if (stopRequested || !audioFormat) { - return; - } - - copyAudioWithGain( - data, - byteCount, - microphoneCapture.inputFormat(), - config.microphoneGain, - microphoneGainBuffer); - - if (config.captureSystemAudio) { - std::scoped_lock lock(microphoneAudioMutex); - latestMicrophoneAudio = microphoneGainBuffer; - return; - } - - if (!encoder.writeAudio( - microphoneGainBuffer.data(), - static_cast(microphoneGainBuffer.size()), - timestampHns, - durationHns)) { + audioMixer = std::make_unique( + *audioFormat, + config.captureSystemAudio, + config.captureMic, + config.microphoneGain, + [&](const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns) { + if (!encoder.writeAudio(data, byteCount, timestampHns, durationHns)) { encodeFailed = true; stopRequested = true; cv.notify_all(); + return false; } - })) { - std::cerr << "ERROR: Failed to start WASAPI microphone capture" << std::endl; - return 1; + return true; + }); + + if (!audioMixer->start()) { + std::cerr << "ERROR: Failed to start native audio mixer" << std::endl; + return false; } - } - if (config.captureSystemAudio) { - if (!loopbackCapture.start([&](const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns) { - if (stopRequested) { - return; - } - - const BYTE* encodedData = data; - DWORD encodedByteCount = byteCount; - if (config.captureMic && audioFormat) { - mixedAudioBuffer.assign(data, data + byteCount); - { - std::scoped_lock lock(microphoneAudioMutex); - mixAudioInPlace( - mixedAudioBuffer, - latestMicrophoneAudio.data(), - static_cast(latestMicrophoneAudio.size()), - *audioFormat); + if (config.captureMic) { + if (!microphoneCapture.start([&](const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns) { + (void)timestampHns; + (void)durationHns; + if (stopRequested || !audioMixer) { + return; } - encodedData = mixedAudioBuffer.data(); - encodedByteCount = static_cast(mixedAudioBuffer.size()); - } - if (!encoder.writeAudio(encodedData, encodedByteCount, timestampHns, durationHns)) { - encodeFailed = true; - stopRequested = true; - cv.notify_all(); - } - })) { - std::cerr << "ERROR: Failed to start WASAPI loopback capture" << std::endl; - microphoneCapture.stop(); - return 1; + audioMixer->pushMicrophone(data, byteCount); + })) { + std::cerr << "ERROR: Failed to start WASAPI microphone capture" << std::endl; + audioMixer->stop(); + return false; + } } + + if (config.captureSystemAudio) { + if (!loopbackCapture.start([&](const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns) { + (void)timestampHns; + (void)durationHns; + if (stopRequested || !audioMixer) { + return; + } + + audioMixer->pushSystem(data, byteCount); + })) { + std::cerr << "ERROR: Failed to start WASAPI loopback capture" << std::endl; + microphoneCapture.stop(); + audioMixer->stop(); + return false; + } + } + + return true; + }; + + if (!startAudioCaptures()) { + return 1; } if (!session.start()) { microphoneCapture.stop(); loopbackCapture.stop(); + if (audioMixer) { + audioMixer->stop(); + } std::cerr << "ERROR: Failed to start WGC session" << std::endl; return 1; } @@ -467,11 +464,19 @@ int main(int argc, char* argv[]) { } microphoneCapture.stop(); loopbackCapture.stop(); + if (audioMixer) { + audioMixer->stop(); + } + session.stop(); std::cerr << "ERROR: Timed out waiting for first WGC frame" << std::endl; return 1; } } + if (audioMixer) { + audioMixer->beginTimeline(); + } + std::cout << "{\"event\":\"recording-started\",\"schemaVersion\":2}" << std::endl; std::cout << "Recording started" << std::endl; @@ -484,6 +489,9 @@ int main(int argc, char* argv[]) { microphoneCapture.stop(); loopbackCapture.stop(); + if (audioMixer) { + audioMixer->stop(); + } session.stop(); { std::scoped_lock lock(mutex); diff --git a/scripts/test-windows-wgc-helper.mjs b/scripts/test-windows-wgc-helper.mjs index 45dab7d..bb69819 100644 --- a/scripts/test-windows-wgc-helper.mjs +++ b/scripts/test-windows-wgc-helper.mjs @@ -1,4 +1,5 @@ import { spawn, spawnSync } from "node:child_process"; +import { randomUUID } from "node:crypto"; import fs from "node:fs"; import os from "node:os"; import path from "node:path"; @@ -105,7 +106,7 @@ if (!fs.existsSync(HELPER_PATH)) { const outputPath = path.join( os.tmpdir(), - `openscreen-wgc-helper-${WITH_SYSTEM_AUDIO || WITH_MICROPHONE ? "audio" : "video"}-${Date.now()}.mp4`, + `openscreen-wgc-helper-${WITH_SYSTEM_AUDIO || WITH_MICROPHONE ? "audio" : "video"}-${process.pid}-${Date.now()}-${randomUUID()}.mp4`, ); const config = { From 048189da725c1b01468fe320944cafc7c75a00da Mon Sep 17 00:00:00 2001 From: EtienneLescot Date: Tue, 5 May 2026 17:23:49 +0200 Subject: [PATCH 14/43] feat: add native Windows window capture --- .../windows-native-recorder-roadmap.md | 2 + .../windowsNativeRecordingSession.script.ts | 13 -- .../windowsNativeRecordingSession.ts | 6 +- electron/native/README.md | 4 +- electron/native/wgc-capture/src/main.cpp | 132 +++++++++++++++--- .../native/wgc-capture/src/wgc_session.cpp | 59 ++++++++ electron/native/wgc-capture/src/wgc_session.h | 2 + package.json | 1 + scripts/test-windows-wgc-helper.mjs | 64 ++++++++- src/hooks/useScreenRecorder.ts | 7 +- src/lib/nativeWindowsRecording.ts | 13 ++ 11 files changed, 259 insertions(+), 44 deletions(-) diff --git a/docs/engineering/windows-native-recorder-roadmap.md b/docs/engineering/windows-native-recorder-roadmap.md index 12c6d49..ac309f9 100644 --- a/docs/engineering/windows-native-recorder-roadmap.md +++ b/docs/engineering/windows-native-recorder-roadmap.md @@ -165,6 +165,8 @@ Acceptance: ### 5. Native Window Capture +Status: initial implementation in progress. Electron parses the `window::...` desktop source id through the shared native Windows recording contract and passes `windowHandle` to the helper. The helper resolves the `HWND`, validates it with `IsWindow`, and creates the WGC item with `CreateForWindow(HWND)`. Resize/minimize/move hardening and protected-window diagnostics remain follow-up work. + - Resolve Electron `window:*` selections to an `HWND`. - Use WGC `CreateForWindow(HWND)`. - Handle window close, minimize, resize, DPI scaling, and monitor moves. diff --git a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts index 5607134..2ad9bbe 100644 --- a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts +++ b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts @@ -1,16 +1,3 @@ -export function parseWindowHandleFromSourceId(sourceId?: string | null) { - if (!sourceId?.startsWith("window:")) { - return null; - } - - const handlePart = sourceId.split(":")[1]; - if (!handlePart || !/^\d+$/.test(handlePart)) { - return null; - } - - return handlePart; -} - export function buildPowerShellCommand(sampleIntervalMs: number, windowHandle?: string | null) { const script = String.raw` $ErrorActionPreference = 'Stop' diff --git a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts index 8075fe3..6edee5a 100644 --- a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts +++ b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts @@ -1,16 +1,14 @@ import { type ChildProcessByStdio, spawn } from "node:child_process"; import type { Readable } from "node:stream"; import { screen } from "electron"; +import { parseWindowHandleFromSourceId } from "../../../../src/lib/nativeWindowsRecording"; import type { CursorRecordingData, CursorRecordingSample, NativeCursorAsset, } from "../../../../src/native/contracts"; import type { CursorRecordingSession } from "./session"; -import { - buildPowerShellCommand, - parseWindowHandleFromSourceId, -} from "./windowsNativeRecordingSession.script"; +import { buildPowerShellCommand } from "./windowsNativeRecordingSession.script"; import type { WindowsCursorEvent, WindowsNativeRecordingSessionOptions, diff --git a/electron/native/README.md b/electron/native/README.md index 5df7290..b366a1b 100644 --- a/electron/native/README.md +++ b/electron/native/README.md @@ -26,6 +26,7 @@ Current V2 JSON shape: "sourceType": "display", "sourceId": "screen:0:0", "displayId": 1, + "windowHandle": null, "outputPath": "C:\\path\\recording-123.mp4", "videoWidth": 1920, "videoHeight": 1080, @@ -42,12 +43,13 @@ Current V2 JSON shape: } ``` -The current helper implementation supports display video capture, system audio loopback, and initial default-microphone capture. Webcam and window capture now fail explicitly in the helper rather than silently falling back to Electron capture on Windows. See `docs/engineering/windows-native-recorder-roadmap.md` for the phased implementation plan. +The current helper implementation supports display/window video capture, system audio loopback, and initial default-microphone capture. Webcam capture now fails explicitly in the helper rather than silently falling back to Electron capture on Windows. See `docs/engineering/windows-native-recorder-roadmap.md` for the phased implementation plan. Smoke-test the helper with: ```powershell npm run test:wgc-helper:win +npm run test:wgc-window:win npm run test:wgc-audio:win npm run test:wgc-mic:win npm run test:wgc-mixed-audio:win diff --git a/electron/native/wgc-capture/src/main.cpp b/electron/native/wgc-capture/src/main.cpp index 50e55ef..86f032e 100644 --- a/electron/native/wgc-capture/src/main.cpp +++ b/electron/native/wgc-capture/src/main.cpp @@ -201,6 +201,36 @@ std::string findString(const std::string& json, const std::string& key) { return result; } +std::string parseWindowHandleFromSourceId(const std::string& sourceId) { + constexpr char prefix[] = "window:"; + if (sourceId.rfind(prefix, 0) != 0) { + return {}; + } + + const size_t start = sizeof(prefix) - 1; + const size_t end = sourceId.find(':', start); + const std::string handle = sourceId.substr(start, end == std::string::npos ? std::string::npos : end - start); + return handle.empty() ? std::string{} : handle; +} + +HWND parseWindowHandle(const std::string& value) { + if (value.empty()) { + return nullptr; + } + + try { + size_t parsed = 0; + const int base = value.rfind("0x", 0) == 0 || value.rfind("0X", 0) == 0 ? 16 : 10; + const uint64_t handleValue = std::stoull(value, &parsed, base); + if (parsed != value.size() || handleValue == 0) { + return nullptr; + } + return reinterpret_cast(static_cast(handleValue)); + } catch (...) { + return nullptr; + } +} + bool parseConfig(const std::string& json, CaptureConfig& config) { config.schemaVersion = findInt(json, "schemaVersion", 1); config.outputPath = findString(json, "screenPath"); @@ -218,6 +248,9 @@ bool parseConfig(const std::string& json, CaptureConfig& config) { } config.sourceId = findString(json, "sourceId"); config.windowHandle = findString(json, "windowHandle"); + if (config.windowHandle.empty()) { + config.windowHandle = parseWindowHandleFromSourceId(config.sourceId); + } config.displayId = findInt64(json, "displayId", 0); config.fps = std::clamp(findInt(json, "fps", 60), 1, 120); config.width = findInt(json, "videoWidth", findInt(json, "width", 0)); @@ -270,27 +303,36 @@ int main(int argc, char* argv[]) { std::cout << "{\"event\":\"ready\",\"schemaVersion\":2}" << std::endl; - if (config.sourceType != "display") { - std::cerr << "ERROR: Native window capture is not implemented yet" << std::endl; - return 1; - } - if (config.webcamEnabled) { std::cerr << "ERROR: Native webcam capture is not implemented in this helper yet" << std::endl; return 1; } - HMONITOR monitor = findMonitorForCapture( - config.displayId, - config.hasDisplayBounds ? &config.bounds : nullptr); - if (!monitor) { - std::cerr << "ERROR: Could not resolve monitor" << std::endl; - return 1; - } - WgcSession session; - if (!session.initialize(monitor, config.fps)) { - std::cerr << "ERROR: Failed to initialize WGC session" << std::endl; + if (config.sourceType == "display") { + HMONITOR monitor = findMonitorForCapture( + config.displayId, + config.hasDisplayBounds ? &config.bounds : nullptr); + if (!monitor) { + std::cerr << "ERROR: Could not resolve monitor" << std::endl; + return 1; + } + if (!session.initialize(monitor, config.fps)) { + std::cerr << "ERROR: Failed to initialize WGC display session" << std::endl; + return 1; + } + } else if (config.sourceType == "window") { + HWND window = parseWindowHandle(config.windowHandle); + if (!window || !IsWindow(window)) { + std::cerr << "ERROR: Native window capture requires a valid HWND" << std::endl; + return 1; + } + if (!session.initialize(window, config.fps)) { + std::cerr << "ERROR: Failed to initialize WGC window session" << std::endl; + return 1; + } + } else { + std::cerr << "ERROR: Unsupported native capture source type: " << config.sourceType << std::endl; return 1; } @@ -355,24 +397,72 @@ int main(int argc, char* argv[]) { std::atomic stopRequested = false; std::atomic firstFrameWritten = false; std::atomic encodeFailed = false; + Microsoft::WRL::ComPtr latestFrameTexture; session.setFrameCallback([&](ID3D11Texture2D* texture, int64_t timestampHns) { + (void)timestampHns; if (stopRequested) { return; } std::scoped_lock lock(mutex); - if (!encoder.writeFrame(texture, timestampHns)) { - encodeFailed = true; - stopRequested = true; - cv.notify_all(); - return; + if (!latestFrameTexture) { + D3D11_TEXTURE2D_DESC desc{}; + texture->GetDesc(&desc); + desc.BindFlags = 0; + desc.CPUAccessFlags = 0; + desc.MiscFlags = 0; + if (FAILED(session.device()->CreateTexture2D(&desc, nullptr, &latestFrameTexture))) { + encodeFailed = true; + stopRequested = true; + cv.notify_all(); + return; + } } + + session.context()->CopyResource(latestFrameTexture.Get(), texture); if (!firstFrameWritten.exchange(true)) { cv.notify_all(); } }); + auto writeVideoFrames = [&]() { + const auto startedAt = std::chrono::steady_clock::now(); + uint64_t frameIndex = 0; + + while (!stopRequested && !encodeFailed) { + { + std::scoped_lock lock(mutex); + if (latestFrameTexture && !encoder.writeFrame( + latestFrameTexture.Get(), + static_cast((frameIndex * 10'000'000ULL) / config.fps))) { + encodeFailed = true; + stopRequested = true; + cv.notify_all(); + return; + } + } + + frameIndex += 1; + const auto nextDeadline = startedAt + + std::chrono::duration_cast( + std::chrono::duration(static_cast(frameIndex) / config.fps)); + std::this_thread::sleep_until(nextDeadline); + } + }; + + std::thread videoWriterThread; + + auto stopVideoWriter = [&]() { + if (videoWriterThread.joinable()) { + videoWriterThread.join(); + } + }; + + auto startVideoWriter = [&]() { + videoWriterThread = std::thread(writeVideoFrames); + }; + std::unique_ptr audioMixer; auto startAudioCaptures = [&]() -> bool { if (!audioFormat) { @@ -476,6 +566,7 @@ int main(int argc, char* argv[]) { if (audioMixer) { audioMixer->beginTimeline(); } + startVideoWriter(); std::cout << "{\"event\":\"recording-started\",\"schemaVersion\":2}" << std::endl; std::cout << "Recording started" << std::endl; @@ -492,6 +583,7 @@ int main(int argc, char* argv[]) { if (audioMixer) { audioMixer->stop(); } + stopVideoWriter(); session.stop(); { std::scoped_lock lock(mutex); diff --git a/electron/native/wgc-capture/src/wgc_session.cpp b/electron/native/wgc-capture/src/wgc_session.cpp index c25444e..ab7e9e3 100644 --- a/electron/native/wgc-capture/src/wgc_session.cpp +++ b/electron/native/wgc-capture/src/wgc_session.cpp @@ -120,6 +120,26 @@ bool WgcSession::createCaptureItem(HMONITOR monitor) { return width_ > 0 && height_ > 0; } +bool WgcSession::createCaptureItem(HWND window) { + auto factory = winrt::get_activation_factory(); + auto interop = factory.as(); + + wgcap::GraphicsCaptureItem item{nullptr}; + HRESULT hr = interop->CreateForWindow( + window, + winrt::guid_of(), + reinterpret_cast(winrt::put_abi(item))); + if (!succeeded(hr, "CreateForWindow")) { + return false; + } + + item_ = item; + const auto size = item_.Size(); + width_ = static_cast(size.Width); + height_ = static_cast(size.Height); + return width_ > 0 && height_ > 0; +} + bool WgcSession::initialize(HMONITOR monitor, int fps) { fps_ = fps > 0 ? fps : 60; if (!createD3DDevice()) { @@ -142,6 +162,44 @@ bool WgcSession::initialize(HMONITOR monitor, int fps) { // Older WGC builds can omit this property; callers still overlay their own cursor. } + try { + session_.IsBorderRequired(false); + } catch (...) { + // IsBorderRequired is Windows 11-only. Ignore it on older builds. + } + + frameArrivedToken_ = framePool_.FrameArrived({this, &WgcSession::onFrameArrived}); + return true; +} + +bool WgcSession::initialize(HWND window, int fps) { + fps_ = fps > 0 ? fps : 60; + if (!createD3DDevice()) { + return false; + } + if (!createCaptureItem(window)) { + return false; + } + + framePool_ = wgcap::Direct3D11CaptureFramePool::CreateFreeThreaded( + winrtDevice_, + wgdx::DirectXPixelFormat::B8G8R8A8UIntNormalized, + 2, + item_.Size()); + session_ = framePool_.CreateCaptureSession(item_); + + try { + session_.IsCursorCaptureEnabled(false); + } catch (...) { + // Older WGC builds can omit this property; callers still overlay their own cursor. + } + + try { + session_.IsBorderRequired(false); + } catch (...) { + // IsBorderRequired is Windows 11-only. Ignore it on older builds. + } + frameArrivedToken_ = framePool_.FrameArrived({this, &WgcSession::onFrameArrived}); return true; } @@ -204,6 +262,7 @@ void WgcSession::onFrameArrived( if (callback) { callback(texture.Get(), timeSpanToHns(frame.SystemRelativeTime())); } + frame.Close(); } int WgcSession::captureWidth() const { diff --git a/electron/native/wgc-capture/src/wgc_session.h b/electron/native/wgc-capture/src/wgc_session.h index 8cfb050..34ad3f5 100644 --- a/electron/native/wgc-capture/src/wgc_session.h +++ b/electron/native/wgc-capture/src/wgc_session.h @@ -23,6 +23,7 @@ public: WgcSession& operator=(const WgcSession&) = delete; bool initialize(HMONITOR monitor, int fps); + bool initialize(HWND window, int fps); void setFrameCallback(FrameCallback callback); bool start(); void stop(); @@ -35,6 +36,7 @@ public: private: bool createD3DDevice(); bool createCaptureItem(HMONITOR monitor); + bool createCaptureItem(HWND window); void onFrameArrived( winrt::Windows::Graphics::Capture::Direct3D11CaptureFramePool const& sender, winrt::Windows::Foundation::IInspectable const&); diff --git a/package.json b/package.json index 9114207..8ff2cb5 100644 --- a/package.json +++ b/package.json @@ -28,6 +28,7 @@ "test:watch": "vitest", "test:cursor-native:win": "node scripts/test-windows-native-cursor.mjs", "test:wgc-helper:win": "node scripts/test-windows-wgc-helper.mjs", + "test:wgc-window:win": "node scripts/test-windows-wgc-helper.mjs --window", "test:wgc-audio:win": "node scripts/test-windows-wgc-helper.mjs --system-audio", "test:wgc-mic:win": "node scripts/test-windows-wgc-helper.mjs --microphone", "test:wgc-mixed-audio:win": "node scripts/test-windows-wgc-helper.mjs --system-audio --microphone", diff --git a/scripts/test-windows-wgc-helper.mjs b/scripts/test-windows-wgc-helper.mjs index bb69819..6b5a626 100644 --- a/scripts/test-windows-wgc-helper.mjs +++ b/scripts/test-windows-wgc-helper.mjs @@ -19,6 +19,8 @@ const WITH_MICROPHONE = process.env.OPENSCREEN_WGC_TEST_MICROPHONE === "true" || process.argv.includes("--microphone") || process.argv.includes("--mic"); +const WITH_WINDOW = + process.env.OPENSCREEN_WGC_TEST_WINDOW === "true" || process.argv.includes("--window"); function runHelper(config) { return new Promise((resolve, reject) => { @@ -47,6 +49,47 @@ function runHelper(config) { }); } +function startFixtureWindow() { + return new Promise((resolve, reject) => { + const child = spawn("mspaint.exe", [], { + stdio: ["ignore", "ignore", "ignore"], + windowsHide: false, + }); + + const poll = setInterval(() => { + const lookup = spawnSync( + "powershell", + [ + "-NoProfile", + "-Command", + `(Get-Process -Id ${child.pid} -ErrorAction SilentlyContinue).MainWindowHandle`, + ], + { encoding: "utf8", windowsHide: true }, + ); + const handle = lookup.stdout + .trim() + .split(/\r?\n/) + .find((line) => /^\d+$/.test(line.trim())); + if (handle && handle !== "0") { + clearInterval(poll); + clearTimeout(timer); + resolve({ child, sourceId: `window:${handle.trim()}:0` }); + } + }, 250); + + const timer = setTimeout(() => { + clearInterval(poll); + child.kill(); + reject(new Error("Timed out waiting for fixture window handle")); + }, 10_000); + child.once("error", (error) => { + clearInterval(poll); + clearTimeout(timer); + reject(error); + }); + }); +} + function probeStreams(outputPath) { const ffprobe = spawnSync( "ffprobe", @@ -106,15 +149,17 @@ if (!fs.existsSync(HELPER_PATH)) { const outputPath = path.join( os.tmpdir(), - `openscreen-wgc-helper-${WITH_SYSTEM_AUDIO || WITH_MICROPHONE ? "audio" : "video"}-${process.pid}-${Date.now()}-${randomUUID()}.mp4`, + `openscreen-wgc-helper-${WITH_WINDOW ? "window" : WITH_SYSTEM_AUDIO || WITH_MICROPHONE ? "audio" : "video"}-${process.pid}-${Date.now()}-${randomUUID()}.mp4`, ); +const fixtureWindow = WITH_WINDOW ? await startFixtureWindow() : null; + const config = { schemaVersion: 2, recordingId: Date.now(), outputPath, - sourceType: "display", - sourceId: "screen:0:0", + sourceType: fixtureWindow ? "window" : "display", + sourceId: fixtureWindow ? fixtureWindow.sourceId : "screen:0:0", displayId: 0, fps: 30, videoWidth: 1280, @@ -132,7 +177,14 @@ const config = { outputs: { screenPath: outputPath }, }; -const result = await runHelper(config); +let result; +try { + result = await runHelper(config); +} finally { + if (fixtureWindow) { + fixtureWindow.child.kill(); + } +} if (result.code !== 0) { throw new Error(`WGC helper exited with ${result.code}\n${result.stdout}\n${result.stderr}`); } @@ -151,7 +203,9 @@ if ((WITH_SYSTEM_AUDIO || WITH_MICROPHONE) && !hasAudio) { } const frameLuma = measureFirstFrameLuma(outputPath); if (frameLuma.average < 1 && frameLuma.max < 5) { - throw new Error(`WGC helper output first frame is black: ${outputPath}`); + throw new Error( + `WGC helper output first frame is black: ${outputPath}\n${result.stdout}\n${result.stderr}`, + ); } console.log( diff --git a/src/hooks/useScreenRecorder.ts b/src/hooks/useScreenRecorder.ts index 1ae9d22..88ba90a 100644 --- a/src/hooks/useScreenRecorder.ts +++ b/src/hooks/useScreenRecorder.ts @@ -2,7 +2,10 @@ import { fixWebmDuration } from "@fix-webm-duration/fix"; import { useCallback, useEffect, useRef, useState } from "react"; import { toast } from "sonner"; import { useScopedT } from "@/contexts/I18nContext"; -import type { NativeWindowsRecordingRequest } from "@/lib/nativeWindowsRecording"; +import { + type NativeWindowsRecordingRequest, + parseWindowHandleFromSourceId, +} from "@/lib/nativeWindowsRecording"; import { requestCameraAccess } from "@/lib/requestCameraAccess"; const TARGET_FRAME_RATE = 60; @@ -573,12 +576,14 @@ export function useScreenRecorder(): UseScreenRecorderReturn { const activeRecordingId = Date.now(); const displayId = Number(selectedSource.display_id); const sourceType = selectedSource.id.startsWith("window:") ? "window" : "display"; + const windowHandle = parseWindowHandleFromSourceId(selectedSource.id); const request: NativeWindowsRecordingRequest = { recordingId: activeRecordingId, source: { type: sourceType, sourceId: selectedSource.id, ...(Number.isFinite(displayId) ? { displayId } : {}), + ...(windowHandle ? { windowHandle } : {}), }, video: { fps: TARGET_FRAME_RATE, diff --git a/src/lib/nativeWindowsRecording.ts b/src/lib/nativeWindowsRecording.ts index d30ef17..7e2f0ba 100644 --- a/src/lib/nativeWindowsRecording.ts +++ b/src/lib/nativeWindowsRecording.ts @@ -39,3 +39,16 @@ export type NativeWindowsRecordingStartResult = { helperPath?: string; error?: string; }; + +export function parseWindowHandleFromSourceId(sourceId?: string | null) { + if (!sourceId?.startsWith("window:")) { + return null; + } + + const handlePart = sourceId.split(":")[1]; + if (!handlePart || !/^\d+$/.test(handlePart)) { + return null; + } + + return handlePart; +} From fb85f6687511b21f36198881127ce0fe5ed52d5d Mon Sep 17 00:00:00 2001 From: EtienneLescot Date: Tue, 5 May 2026 17:50:22 +0200 Subject: [PATCH 15/43] feat: add native Windows webcam composition --- .../windows-native-recorder-roadmap.md | 20 +- electron/native/README.md | 12 +- electron/native/wgc-capture/CMakeLists.txt | 2 + electron/native/wgc-capture/src/main.cpp | 60 +++- .../native/wgc-capture/src/mf_encoder.cpp | 50 +++- electron/native/wgc-capture/src/mf_encoder.h | 14 +- .../native/wgc-capture/src/webcam_capture.cpp | 275 ++++++++++++++++++ .../native/wgc-capture/src/webcam_capture.h | 49 ++++ package.json | 2 + scripts/test-windows-wgc-helper.mjs | 40 ++- src/hooks/useScreenRecorder.ts | 17 ++ 11 files changed, 510 insertions(+), 31 deletions(-) create mode 100644 electron/native/wgc-capture/src/webcam_capture.cpp create mode 100644 electron/native/wgc-capture/src/webcam_capture.h diff --git a/docs/engineering/windows-native-recorder-roadmap.md b/docs/engineering/windows-native-recorder-roadmap.md index ac309f9..63abd1d 100644 --- a/docs/engineering/windows-native-recorder-roadmap.md +++ b/docs/engineering/windows-native-recorder-roadmap.md @@ -9,7 +9,7 @@ OpenScreen's Windows recorder should be owned by one native backend. Electron ca - Capture system audio through WASAPI loopback. - Capture microphone audio through WASAPI. - Mix system audio and microphone audio into the primary screen recording. -- Capture webcam video natively and keep it as a separate editable OpenScreen media stream. +- Capture webcam video natively and compose it into the Windows helper MP4 during the native-recording migration. - Keep preview/export aligned because screen video, audio, webcam, and cursor share one native timing origin. - Keep exported MP4s Windows-friendly: H.264 video plus AAC audio. Opus-in-MP4 is not an acceptable Windows export target. - Package the native helper with the Windows app. @@ -17,7 +17,7 @@ OpenScreen's Windows recorder should be owned by one native backend. Electron ca ## Non-Goals - Replacing the editor/export pipeline. -- Flattening webcam into the screen recording. The editor currently treats webcam as editable picture-in-picture media, so the native recorder should preserve a separate `webcamVideoPath`. +- Replacing the editor/export pipeline. A later pass can reintroduce a separate editable native `webcamVideoPath`; the current Windows-native milestone prioritizes a helper-owned multi-flux MP4 with deterministic screen/audio/mic/webcam sync. - Adding a native fallback for macOS or Linux in this branch. ## Target Architecture @@ -78,7 +78,6 @@ The helper receives a single JSON argument: }, "outputs": { "screenPath": "C:\\Users\\me\\recording-123.mp4", - "webcamPath": "C:\\Users\\me\\recording-123-webcam.mp4", "manifestPath": "C:\\Users\\me\\recording-123.session.json" } } @@ -90,7 +89,7 @@ The helper emits newline-delimited JSON events to stdout: { "event": "ready", "schemaVersion": 2 } { "event": "recording-started", "timestampMs": 1234567890 } { "event": "warning", "code": "audio-device-unavailable", "message": "..." } -{ "event": "recording-stopped", "screenPath": "...", "webcamPath": "..." } +{ "event": "recording-stopped", "screenPath": "..." } { "event": "error", "code": "unsupported-window-source", "message": "..." } ``` @@ -153,15 +152,16 @@ Acceptance: ### 4. Webcam Capture - Add Media Foundation webcam source reader. -- Select 1280x720/30fps or nearest supported format. -- Encode webcam to `recording--webcam.mp4`. -- Synchronize webcam timestamps to the native session clock. -- Store `webcamVideoPath` in the OpenScreen session manifest. +- Select requested dimensions/fps or the nearest format accepted by Media Foundation. +- Convert webcam samples to BGRA and compose them into the primary helper MP4 as an initial bottom-right picture-in-picture overlay. +- Keep the helper process as the SSOT for screen/window, WASAPI system audio, microphone, webcam, and mux timing. +- Later: promote the same webcam capture source to a separate editable native `webcamVideoPath` if product requirements need post-recording layout edits. Acceptance: -- Editor loads the native screen recording and the native webcam recording. -- Webcam layout controls behave the same as today. +- Native display/window recordings can include webcam without returning to Electron capture. +- `npm run test:wgc-webcam:win` validates the helper path when a webcam is available and skips explicitly when no webcam device exists. +- Combined webcam + system audio + microphone produces one MP4 with H.264 video and AAC audio. ### 5. Native Window Capture diff --git a/electron/native/README.md b/electron/native/README.md index b366a1b..037b040 100644 --- a/electron/native/README.md +++ b/electron/native/README.md @@ -35,15 +35,18 @@ Current V2 JSON shape: "captureMic": false, "microphoneDeviceId": "default", "microphoneGain": 1.4, - "webcamEnabled": false, + "webcamEnabled": true, + "webcamDeviceId": "default", + "webcamWidth": 1280, + "webcamHeight": 720, + "webcamFps": 30, "outputs": { - "screenPath": "C:\\path\\recording-123.mp4", - "webcamPath": "C:\\path\\recording-123-webcam.mp4" + "screenPath": "C:\\path\\recording-123.mp4" } } ``` -The current helper implementation supports display/window video capture, system audio loopback, and initial default-microphone capture. Webcam capture now fails explicitly in the helper rather than silently falling back to Electron capture on Windows. See `docs/engineering/windows-native-recorder-roadmap.md` for the phased implementation plan. +The current helper implementation supports display/window video capture, system audio loopback, default-microphone capture, and Media Foundation webcam capture. Webcam frames are currently composed into the primary MP4 as a bottom-right picture-in-picture overlay. Browser `deviceId` values do not always map to Media Foundation symbolic links; when the requested webcam is not matched, the helper logs a warning and uses the default webcam. Smoke-test the helper with: @@ -53,4 +56,5 @@ npm run test:wgc-window:win npm run test:wgc-audio:win npm run test:wgc-mic:win npm run test:wgc-mixed-audio:win +npm run test:wgc-webcam:win ``` diff --git a/electron/native/wgc-capture/CMakeLists.txt b/electron/native/wgc-capture/CMakeLists.txt index b21fd66..92b9335 100644 --- a/electron/native/wgc-capture/CMakeLists.txt +++ b/electron/native/wgc-capture/CMakeLists.txt @@ -23,6 +23,8 @@ add_executable(wgc-capture src/monitor_utils.h src/wasapi_loopback_capture.cpp src/wasapi_loopback_capture.h + src/webcam_capture.cpp + src/webcam_capture.h src/wgc_session.cpp src/wgc_session.h ) diff --git a/electron/native/wgc-capture/src/main.cpp b/electron/native/wgc-capture/src/main.cpp index 86f032e..bc82b22 100644 --- a/electron/native/wgc-capture/src/main.cpp +++ b/electron/native/wgc-capture/src/main.cpp @@ -2,6 +2,7 @@ #include "mf_encoder.h" #include "monitor_utils.h" #include "wasapi_loopback_capture.h" +#include "webcam_capture.h" #include "wgc_session.h" #include @@ -303,11 +304,6 @@ int main(int argc, char* argv[]) { std::cout << "{\"event\":\"ready\",\"schemaVersion\":2}" << std::endl; - if (config.webcamEnabled) { - std::cerr << "ERROR: Native webcam capture is not implemented in this helper yet" << std::endl; - return 1; - } - WgcSession session; if (config.sourceType == "display") { HMONITOR monitor = findMonitorForCapture( @@ -347,6 +343,22 @@ int main(int argc, char* argv[]) { const int pixels = width * height; const int bitrate = pixels >= 3840 * 2160 ? 45'000'000 : pixels >= 2560 * 1440 ? 28'000'000 : 18'000'000; + WebcamCapture webcamCapture; + bool webcamActive = false; + if (config.webcamEnabled) { + if (!webcamCapture.initialize( + utf8ToWide(config.webcamDeviceId), + config.webcamWidth, + config.webcamHeight, + config.webcamFps > 0 ? config.webcamFps : config.fps)) { + std::cerr << "ERROR: Failed to initialize native webcam capture" << std::endl; + return 1; + } + std::cout << "{\"event\":\"webcam-format\",\"schemaVersion\":2,\"width\":" << webcamCapture.width() + << ",\"height\":" << webcamCapture.height() + << ",\"fps\":" << webcamCapture.fps() << "}" << std::endl; + } + WasapiLoopbackCapture loopbackCapture; WasapiLoopbackCapture microphoneCapture; const AudioInputFormat* audioFormat = nullptr; @@ -398,6 +410,9 @@ int main(int argc, char* argv[]) { std::atomic firstFrameWritten = false; std::atomic encodeFailed = false; Microsoft::WRL::ComPtr latestFrameTexture; + std::vector latestWebcamFrame; + int latestWebcamWidth = 0; + int latestWebcamHeight = 0; session.setFrameCallback([&](ID3D11Texture2D* texture, int64_t timestampHns) { (void)timestampHns; @@ -433,9 +448,18 @@ int main(int argc, char* argv[]) { while (!stopRequested && !encodeFailed) { { std::scoped_lock lock(mutex); + if (webcamActive) { + webcamCapture.copyLatestFrame(latestWebcamFrame, latestWebcamWidth, latestWebcamHeight); + } + const BgraFrameView webcamFrame{ + latestWebcamFrame.empty() ? nullptr : latestWebcamFrame.data(), + latestWebcamWidth, + latestWebcamHeight, + }; if (latestFrameTexture && !encoder.writeFrame( latestFrameTexture.Get(), - static_cast((frameIndex * 10'000'000ULL) / config.fps))) { + static_cast((frameIndex * 10'000'000ULL) / config.fps), + webcamFrame.data ? &webcamFrame : nullptr)) { encodeFailed = true; stopRequested = true; cv.notify_all(); @@ -528,8 +552,30 @@ int main(int argc, char* argv[]) { if (!startAudioCaptures()) { return 1; } + if (config.webcamEnabled) { + if (!webcamCapture.start()) { + microphoneCapture.stop(); + loopbackCapture.stop(); + if (audioMixer) { + audioMixer->stop(); + } + std::cerr << "ERROR: Failed to start native webcam capture" << std::endl; + return 1; + } + webcamActive = true; + const auto webcamDeadline = std::chrono::steady_clock::now() + std::chrono::seconds(3); + while (std::chrono::steady_clock::now() < webcamDeadline && + !webcamCapture.copyLatestFrame(latestWebcamFrame, latestWebcamWidth, latestWebcamHeight)) { + std::this_thread::sleep_for(std::chrono::milliseconds(20)); + } + if (latestWebcamFrame.empty()) { + std::cerr << "WARNING: Native webcam started but no frame was available before screen capture" + << std::endl; + } + } if (!session.start()) { + webcamCapture.stop(); microphoneCapture.stop(); loopbackCapture.stop(); if (audioMixer) { @@ -554,6 +600,7 @@ int main(int argc, char* argv[]) { } microphoneCapture.stop(); loopbackCapture.stop(); + webcamCapture.stop(); if (audioMixer) { audioMixer->stop(); } @@ -580,6 +627,7 @@ int main(int argc, char* argv[]) { microphoneCapture.stop(); loopbackCapture.stop(); + webcamCapture.stop(); if (audioMixer) { audioMixer->stop(); } diff --git a/electron/native/wgc-capture/src/mf_encoder.cpp b/electron/native/wgc-capture/src/mf_encoder.cpp index fc95fc2..de9220f 100644 --- a/electron/native/wgc-capture/src/mf_encoder.cpp +++ b/electron/native/wgc-capture/src/mf_encoder.cpp @@ -38,6 +38,43 @@ void setAudioFormat(IMFMediaType* type, UINT32 channels, UINT32 sampleRate, UINT type->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, bitsPerSample); } +void compositeWebcam(BYTE* destination, int width, int height, const BgraFrameView& webcamFrame) { + if (!webcamFrame.data || webcamFrame.width <= 0 || webcamFrame.height <= 0 || width <= 0 || height <= 0) { + return; + } + + const int margin = std::max(16, std::min(width, height) / 60); + const int maxOverlayWidth = std::max(2, width / 4); + int overlayWidth = maxOverlayWidth; + int overlayHeight = static_cast( + (static_cast(overlayWidth) * webcamFrame.height) / std::max(1, webcamFrame.width)); + const int maxOverlayHeight = std::max(2, height / 3); + if (overlayHeight > maxOverlayHeight) { + overlayHeight = maxOverlayHeight; + overlayWidth = static_cast( + (static_cast(overlayHeight) * webcamFrame.width) / std::max(1, webcamFrame.height)); + } + + overlayWidth = std::max(2, std::min(overlayWidth, width - margin * 2)); + overlayHeight = std::max(2, std::min(overlayHeight, height - margin * 2)); + const int originX = std::max(0, width - overlayWidth - margin); + const int originY = std::max(0, height - overlayHeight - margin); + + for (int y = 0; y < overlayHeight; y += 1) { + const int sourceY = static_cast((static_cast(y) * webcamFrame.height) / overlayHeight); + BYTE* destinationRow = destination + ((originY + y) * width + originX) * 4; + for (int x = 0; x < overlayWidth; x += 1) { + const int sourceX = static_cast((static_cast(x) * webcamFrame.width) / overlayWidth); + const BYTE* source = webcamFrame.data + (sourceY * webcamFrame.width + sourceX) * 4; + BYTE* target = destinationRow + x * 4; + target[0] = source[0]; + target[1] = source[1]; + target[2] = source[2]; + target[3] = 255; + } + } +} + } // namespace MFEncoder::~MFEncoder() { @@ -179,7 +216,11 @@ bool MFEncoder::ensureStagingTexture(ID3D11Texture2D* texture) { "CreateTexture2D(staging)"); } -bool MFEncoder::copyFrameToBuffer(ID3D11Texture2D* texture, BYTE* destination, DWORD destinationSize) { +bool MFEncoder::copyFrameToBuffer( + ID3D11Texture2D* texture, + BYTE* destination, + DWORD destinationSize, + const BgraFrameView* webcamFrame) { if (!ensureStagingTexture(texture)) { return false; } @@ -203,12 +244,15 @@ bool MFEncoder::copyFrameToBuffer(ID3D11Texture2D* texture, BYTE* destination, D for (int y = 0; y < height_; y += 1) { std::memcpy(destination + rowBytes * y, source + mapped.RowPitch * y, rowBytes); } + if (webcamFrame) { + compositeWebcam(destination, width_, height_, *webcamFrame); + } context_->Unmap(stagingTexture_.Get(), 0); return true; } -bool MFEncoder::writeFrame(ID3D11Texture2D* texture, int64_t timestampHns) { +bool MFEncoder::writeFrame(ID3D11Texture2D* texture, int64_t timestampHns, const BgraFrameView* webcamFrame) { std::scoped_lock writerLock(writerMutex_); if (!sinkWriter_ || finalized_) { return false; @@ -238,7 +282,7 @@ bool MFEncoder::writeFrame(ID3D11Texture2D* texture, int64_t timestampHns) { return false; } - const bool copied = copyFrameToBuffer(texture, data, maxLength); + const bool copied = copyFrameToBuffer(texture, data, maxLength, webcamFrame); buffer->Unlock(); if (!copied) { return false; diff --git a/electron/native/wgc-capture/src/mf_encoder.h b/electron/native/wgc-capture/src/mf_encoder.h index b6db685..a82a940 100644 --- a/electron/native/wgc-capture/src/mf_encoder.h +++ b/electron/native/wgc-capture/src/mf_encoder.h @@ -11,6 +11,12 @@ #include #include +struct BgraFrameView { + const BYTE* data = nullptr; + int width = 0; + int height = 0; +}; + struct AudioInputFormat { GUID subtype = MFAudioFormat_PCM; UINT32 sampleRate = 0; @@ -37,13 +43,17 @@ public: ID3D11Device* device, ID3D11DeviceContext* context, const AudioInputFormat* audioFormat = nullptr); - bool writeFrame(ID3D11Texture2D* texture, int64_t timestampHns); + bool writeFrame(ID3D11Texture2D* texture, int64_t timestampHns, const BgraFrameView* webcamFrame = nullptr); bool writeAudio(const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns); bool finalize(); private: bool ensureStagingTexture(ID3D11Texture2D* texture); - bool copyFrameToBuffer(ID3D11Texture2D* texture, BYTE* destination, DWORD destinationSize); + bool copyFrameToBuffer( + ID3D11Texture2D* texture, + BYTE* destination, + DWORD destinationSize, + const BgraFrameView* webcamFrame); bool configureAudioStream(const AudioInputFormat& audioFormat); Microsoft::WRL::ComPtr sinkWriter_; diff --git a/electron/native/wgc-capture/src/webcam_capture.cpp b/electron/native/wgc-capture/src/webcam_capture.cpp new file mode 100644 index 0000000..6b34a35 --- /dev/null +++ b/electron/native/wgc-capture/src/webcam_capture.cpp @@ -0,0 +1,275 @@ +#include "webcam_capture.h" + +#include +#include +#include + +#include +#include +#include + +namespace { + +bool succeeded(HRESULT hr, const char* label) { + if (SUCCEEDED(hr)) { + return true; + } + + std::cerr << "ERROR: " << label << " failed (hr=0x" << std::hex << hr << std::dec << ")" + << std::endl; + return false; +} + +std::wstring readAllocatedString(IMFActivate* activate, REFGUID key) { + WCHAR* value = nullptr; + UINT32 length = 0; + if (FAILED(activate->GetAllocatedString(key, &value, &length)) || !value) { + return {}; + } + + std::wstring result(value, value + length); + CoTaskMemFree(value); + return result; +} + +bool containsInsensitive(const std::wstring& haystack, const std::wstring& needle) { + if (haystack.empty() || needle.empty()) { + return false; + } + + std::wstring lowerHaystack = haystack; + std::wstring lowerNeedle = needle; + std::transform(lowerHaystack.begin(), lowerHaystack.end(), lowerHaystack.begin(), ::towlower); + std::transform(lowerNeedle.begin(), lowerNeedle.end(), lowerNeedle.begin(), ::towlower); + return lowerHaystack.find(lowerNeedle) != std::wstring::npos || + lowerNeedle.find(lowerHaystack) != std::wstring::npos; +} + +} // namespace + +WebcamCapture::~WebcamCapture() { + stop(); +} + +bool WebcamCapture::initialize(const std::wstring& deviceId, int requestedWidth, int requestedHeight, int requestedFps) { + fps_ = std::clamp(requestedFps > 0 ? requestedFps : 30, 1, 60); + if (!succeeded(MFStartup(MF_VERSION), "MFStartup(webcam)")) { + return false; + } + mfStarted_ = true; + if (!selectDevice(deviceId)) { + return false; + } + + return configureReader(requestedWidth, requestedHeight, fps_); +} + +bool WebcamCapture::selectDevice(const std::wstring& deviceId) { + Microsoft::WRL::ComPtr attributes; + if (!succeeded(MFCreateAttributes(&attributes, 1), "MFCreateAttributes(webcam enumeration)")) { + return false; + } + if (!succeeded(attributes->SetGUID( + MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE, + MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_GUID), + "SetGUID(webcam source type)")) { + return false; + } + + IMFActivate** devices = nullptr; + UINT32 deviceCount = 0; + HRESULT hr = MFEnumDeviceSources(attributes.Get(), &devices, &deviceCount); + if (!succeeded(hr, "MFEnumDeviceSources") || deviceCount == 0) { + if (devices) { + CoTaskMemFree(devices); + } + std::cerr << "ERROR: No native Windows webcam devices were found" << std::endl; + return false; + } + + UINT32 selectedIndex = 0; + for (UINT32 index = 0; index < deviceCount; index += 1) { + const std::wstring name = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME); + const std::wstring symbolicLink = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_SYMBOLIC_LINK); + if (!deviceId.empty() && (containsInsensitive(symbolicLink, deviceId) || containsInsensitive(name, deviceId))) { + selectedIndex = index; + break; + } + } + + if (!deviceId.empty() && selectedIndex == 0) { + const std::wstring firstName = readAllocatedString(devices[0], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME); + const std::wstring firstLink = readAllocatedString(devices[0], MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_SYMBOLIC_LINK); + if (!containsInsensitive(firstLink, deviceId) && !containsInsensitive(firstName, deviceId)) { + std::cerr << "WARNING: Requested webcam device was not found by Media Foundation; using default webcam" + << std::endl; + } + } + + selectedDeviceName_ = readAllocatedString(devices[selectedIndex], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME); + hr = devices[selectedIndex]->ActivateObject(IID_PPV_ARGS(&mediaSource_)); + + for (UINT32 index = 0; index < deviceCount; index += 1) { + devices[index]->Release(); + } + CoTaskMemFree(devices); + + return succeeded(hr, "ActivateObject(webcam)"); +} + +bool WebcamCapture::configureReader(int requestedWidth, int requestedHeight, int requestedFps) { + Microsoft::WRL::ComPtr attributes; + if (!succeeded(MFCreateAttributes(&attributes, 2), "MFCreateAttributes(webcam reader)")) { + return false; + } + attributes->SetUINT32(MF_SOURCE_READER_ENABLE_VIDEO_PROCESSING, TRUE); + attributes->SetUINT32(MF_READWRITE_DISABLE_CONVERTERS, FALSE); + + if (!succeeded(MFCreateSourceReaderFromMediaSource(mediaSource_.Get(), attributes.Get(), &sourceReader_), + "MFCreateSourceReaderFromMediaSource(webcam)")) { + return false; + } + + Microsoft::WRL::ComPtr mediaType; + if (!succeeded(MFCreateMediaType(&mediaType), "MFCreateMediaType(webcam output)")) { + return false; + } + mediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video); + mediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_RGB32); + if (requestedWidth > 0 && requestedHeight > 0) { + MFSetAttributeSize(mediaType.Get(), MF_MT_FRAME_SIZE, static_cast(requestedWidth), static_cast(requestedHeight)); + } + MFSetAttributeRatio(mediaType.Get(), MF_MT_FRAME_RATE, static_cast(std::max(1, requestedFps)), 1); + + if (!succeeded(sourceReader_->SetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, nullptr, mediaType.Get()), + "SetCurrentMediaType(webcam RGB32)")) { + return false; + } + sourceReader_->SetStreamSelection(MF_SOURCE_READER_ALL_STREAMS, FALSE); + sourceReader_->SetStreamSelection(MF_SOURCE_READER_FIRST_VIDEO_STREAM, TRUE); + + Microsoft::WRL::ComPtr currentType; + if (!succeeded(sourceReader_->GetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, ¤tType), + "GetCurrentMediaType(webcam)")) { + return false; + } + + UINT32 width = 0; + UINT32 height = 0; + if (FAILED(MFGetAttributeSize(currentType.Get(), MF_MT_FRAME_SIZE, &width, &height)) || width == 0 || height == 0) { + width = static_cast(requestedWidth > 0 ? requestedWidth : 1280); + height = static_cast(requestedHeight > 0 ? requestedHeight : 720); + } + width_ = static_cast(width); + height_ = static_cast(height); + return true; +} + +bool WebcamCapture::start() { + if (!sourceReader_ || thread_.joinable()) { + return false; + } + + stopRequested_ = false; + thread_ = std::thread(&WebcamCapture::captureLoop, this); + return true; +} + +void WebcamCapture::stop() { + stopRequested_ = true; + if (thread_.joinable()) { + thread_.join(); + } + if (mediaSource_) { + mediaSource_->Shutdown(); + } + sourceReader_.Reset(); + mediaSource_.Reset(); + if (mfStarted_) { + MFShutdown(); + mfStarted_ = false; + } +} + +void WebcamCapture::captureLoop() { + CoInitializeEx(nullptr, COINIT_MULTITHREADED); + + while (!stopRequested_) { + DWORD streamIndex = 0; + DWORD flags = 0; + LONGLONG timestamp = 0; + Microsoft::WRL::ComPtr sample; + HRESULT hr = sourceReader_->ReadSample( + MF_SOURCE_READER_FIRST_VIDEO_STREAM, + 0, + &streamIndex, + &flags, + ×tamp, + &sample); + (void)streamIndex; + (void)timestamp; + + if (FAILED(hr)) { + std::cerr << "WARNING: Failed to read webcam sample (hr=0x" << std::hex << hr << std::dec << ")" + << std::endl; + std::this_thread::sleep_for(std::chrono::milliseconds(20)); + continue; + } + if ((flags & MF_SOURCE_READERF_ENDOFSTREAM) != 0) { + break; + } + if (!sample) { + continue; + } + + Microsoft::WRL::ComPtr buffer; + if (FAILED(sample->ConvertToContiguousBuffer(&buffer)) || !buffer) { + continue; + } + + BYTE* data = nullptr; + DWORD maxLength = 0; + DWORD currentLength = 0; + if (FAILED(buffer->Lock(&data, &maxLength, ¤tLength)) || !data) { + continue; + } + + const DWORD expectedLength = static_cast(std::max(0, width_) * std::max(0, height_) * 4); + if (currentLength >= expectedLength && expectedLength > 0) { + std::scoped_lock lock(frameMutex_); + latestFrame_.assign(data, data + expectedLength); + } + + buffer->Unlock(); + } + + CoUninitialize(); +} + +bool WebcamCapture::copyLatestFrame(std::vector& destination, int& width, int& height) { + std::scoped_lock lock(frameMutex_); + if (latestFrame_.empty() || width_ <= 0 || height_ <= 0) { + return false; + } + + destination = latestFrame_; + width = width_; + height = height_; + return true; +} + +int WebcamCapture::width() const { + return width_; +} + +int WebcamCapture::height() const { + return height_; +} + +int WebcamCapture::fps() const { + return fps_; +} + +const std::wstring& WebcamCapture::selectedDeviceName() const { + return selectedDeviceName_; +} diff --git a/electron/native/wgc-capture/src/webcam_capture.h b/electron/native/wgc-capture/src/webcam_capture.h new file mode 100644 index 0000000..7d5f904 --- /dev/null +++ b/electron/native/wgc-capture/src/webcam_capture.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +class WebcamCapture { +public: + WebcamCapture() = default; + ~WebcamCapture(); + + WebcamCapture(const WebcamCapture&) = delete; + WebcamCapture& operator=(const WebcamCapture&) = delete; + + bool initialize(const std::wstring& deviceId, int requestedWidth, int requestedHeight, int requestedFps); + bool start(); + void stop(); + bool copyLatestFrame(std::vector& destination, int& width, int& height); + + int width() const; + int height() const; + int fps() const; + const std::wstring& selectedDeviceName() const; + +private: + bool selectDevice(const std::wstring& deviceId); + bool configureReader(int requestedWidth, int requestedHeight, int requestedFps); + void captureLoop(); + + Microsoft::WRL::ComPtr mediaSource_; + Microsoft::WRL::ComPtr sourceReader_; + std::thread thread_; + std::atomic stopRequested_ = false; + std::mutex frameMutex_; + std::vector latestFrame_; + int width_ = 0; + int height_ = 0; + int fps_ = 30; + bool mfStarted_ = false; + std::wstring selectedDeviceName_; +}; diff --git a/package.json b/package.json index 8ff2cb5..0d64e14 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,8 @@ "test:wgc-audio:win": "node scripts/test-windows-wgc-helper.mjs --system-audio", "test:wgc-mic:win": "node scripts/test-windows-wgc-helper.mjs --microphone", "test:wgc-mixed-audio:win": "node scripts/test-windows-wgc-helper.mjs --system-audio --microphone", + "test:wgc-webcam:win": "node scripts/test-windows-wgc-helper.mjs --webcam", + "test:wgc-full:win": "node scripts/test-windows-wgc-helper.mjs --webcam --system-audio --microphone", "capture:openscreen-preview": "node scripts/capture-openscreen-preview.mjs", "build-vite": "tsc && vite build", "test:browser": "vitest --config vitest.browser.config.ts --run", diff --git a/scripts/test-windows-wgc-helper.mjs b/scripts/test-windows-wgc-helper.mjs index 6b5a626..3bdba57 100644 --- a/scripts/test-windows-wgc-helper.mjs +++ b/scripts/test-windows-wgc-helper.mjs @@ -21,6 +21,8 @@ const WITH_MICROPHONE = process.argv.includes("--mic"); const WITH_WINDOW = process.env.OPENSCREEN_WGC_TEST_WINDOW === "true" || process.argv.includes("--window"); +const WITH_WEBCAM = + process.env.OPENSCREEN_WGC_TEST_WEBCAM === "true" || process.argv.includes("--webcam"); function runHelper(config) { return new Promise((resolve, reject) => { @@ -31,21 +33,34 @@ function runHelper(config) { let stdout = ""; let stderr = ""; + let stopTimer = null; + const scheduleStop = () => { + if (stopTimer) { + return; + } + stopTimer = setTimeout(() => { + child.stdin.write("stop\n"); + }, DURATION_MS); + }; + const fallbackTimer = setTimeout(scheduleStop, 15_000); child.stdout.on("data", (chunk) => { stdout += chunk.toString(); + if (stdout.includes('"recording-started"') || stdout.includes("Recording started")) { + scheduleStop(); + } }); child.stderr.on("data", (chunk) => { stderr += chunk.toString(); }); child.once("error", reject); child.once("exit", (code) => { + clearTimeout(fallbackTimer); + if (stopTimer) { + clearTimeout(stopTimer); + } resolve({ code, stdout, stderr }); }); - - setTimeout(() => { - child.stdin.write("stop\n"); - }, DURATION_MS); }); } @@ -149,7 +164,7 @@ if (!fs.existsSync(HELPER_PATH)) { const outputPath = path.join( os.tmpdir(), - `openscreen-wgc-helper-${WITH_WINDOW ? "window" : WITH_SYSTEM_AUDIO || WITH_MICROPHONE ? "audio" : "video"}-${process.pid}-${Date.now()}-${randomUUID()}.mp4`, + `openscreen-wgc-helper-${WITH_WEBCAM ? "webcam" : WITH_WINDOW ? "window" : WITH_SYSTEM_AUDIO || WITH_MICROPHONE ? "audio" : "video"}-${process.pid}-${Date.now()}-${randomUUID()}.mp4`, ); const fixtureWindow = WITH_WINDOW ? await startFixtureWindow() : null; @@ -173,7 +188,11 @@ const config = { captureMic: WITH_MICROPHONE, microphoneDeviceId: "default", microphoneGain: 1.4, - webcamEnabled: false, + webcamEnabled: WITH_WEBCAM, + webcamDeviceId: process.env.OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_ID ?? "", + webcamWidth: 640, + webcamHeight: 360, + webcamFps: 30, outputs: { screenPath: outputPath }, }; @@ -186,6 +205,15 @@ try { } } if (result.code !== 0) { + if ( + WITH_WEBCAM && + /No native Windows webcam devices were found|Failed to initialize native webcam/.test( + result.stderr, + ) + ) { + console.log("Skipping WGC webcam smoke test: no native Windows webcam device is available."); + process.exit(0); + } throw new Error(`WGC helper exited with ${result.code}\n${result.stdout}\n${result.stderr}`); } if (!fs.existsSync(outputPath) || fs.statSync(outputPath).size === 0) { diff --git a/src/hooks/useScreenRecorder.ts b/src/hooks/useScreenRecorder.ts index 88ba90a..3947954 100644 --- a/src/hooks/useScreenRecorder.ts +++ b/src/hooks/useScreenRecorder.ts @@ -182,6 +182,20 @@ export function useScreenRecorder(): UseScreenRecorderReturn { } }, []); + const stopWebcamPreviewStream = useCallback(() => { + if (!webcamStream.current) { + return; + } + + webcamAcquireId.current++; + webcamStream.current.getTracks().forEach((track) => { + track.onended = null; + track.stop(); + }); + webcamStream.current = null; + webcamReady.current = true; + }, []); + const setWebcamEnabled = useCallback( async (enabled: boolean) => { if (!enabled) { @@ -577,6 +591,9 @@ export function useScreenRecorder(): UseScreenRecorderReturn { const displayId = Number(selectedSource.display_id); const sourceType = selectedSource.id.startsWith("window:") ? "window" : "display"; const windowHandle = parseWindowHandleFromSourceId(selectedSource.id); + if (webcamEnabled) { + stopWebcamPreviewStream(); + } const request: NativeWindowsRecordingRequest = { recordingId: activeRecordingId, source: { From fdcd8820582de05659277ef222526ad0d9343f46 Mon Sep 17 00:00:00 2001 From: EtienneLescot Date: Tue, 5 May 2026 18:06:43 +0200 Subject: [PATCH 16/43] fix: honor selected native Windows webcam --- .../windows-native-recorder-roadmap.md | 3 ++ electron/ipc/handlers.ts | 1 + electron/native/README.md | 11 +++++- electron/native/wgc-capture/src/main.cpp | 18 ++++++++- .../native/wgc-capture/src/webcam_capture.cpp | 37 +++++++++++++------ .../native/wgc-capture/src/webcam_capture.h | 9 ++++- scripts/test-windows-wgc-helper.mjs | 1 + src/components/launch/LaunchWindow.tsx | 16 ++++++-- src/hooks/useScreenRecorder.ts | 6 +++ src/lib/nativeWindowsRecording.ts | 1 + 10 files changed, 85 insertions(+), 18 deletions(-) diff --git a/docs/engineering/windows-native-recorder-roadmap.md b/docs/engineering/windows-native-recorder-roadmap.md index 63abd1d..12c9502 100644 --- a/docs/engineering/windows-native-recorder-roadmap.md +++ b/docs/engineering/windows-native-recorder-roadmap.md @@ -71,6 +71,7 @@ The helper receives a single JSON argument: "webcam": { "enabled": true, "deviceId": "default", + "deviceName": "Camera (NVIDIA Broadcast)", "width": 1280, "height": 720, "fps": 30, @@ -133,6 +134,7 @@ SSOT rules for this phase: - `docs/engineering/windows-native-recorder-roadmap.md` is the feature-level contract and phase checklist. - `WgcSession::captureWidth()/captureHeight()` is the encoded screen frame size until a dedicated native scaling stage exists. - `WasapiLoopbackCapture::inputFormat()` is the runtime audio format source used by `MFEncoder`. +- The renderer passes both the browser webcam `deviceId` and selected display label as `deviceName`; `electron/native/wgc-capture/src/webcam_capture.*` is the only place that maps those values to Media Foundation devices. - No duplicated hard-coded audio format assumptions in `main.cpp`. ### 3. WASAPI Microphone @@ -155,6 +157,7 @@ Acceptance: - Select requested dimensions/fps or the nearest format accepted by Media Foundation. - Convert webcam samples to BGRA and compose them into the primary helper MP4 as an initial bottom-right picture-in-picture overlay. - Keep the helper process as the SSOT for screen/window, WASAPI system audio, microphone, webcam, and mux timing. +- Match the requested webcam through Media Foundation friendly names first, then browser device ids/symbolic links, so UI selection remains stable across Chromium and Windows native device namespaces. - Later: promote the same webcam capture source to a separate editable native `webcamVideoPath` if product requirements need post-recording layout edits. Acceptance: diff --git a/electron/ipc/handlers.ts b/electron/ipc/handlers.ts index de3b6b5..46b4dc7 100644 --- a/electron/ipc/handlers.ts +++ b/electron/ipc/handlers.ts @@ -888,6 +888,7 @@ export function registerIpcHandlers( microphoneGain: request.audio.microphone.gain, webcamEnabled: request.webcam.enabled, webcamDeviceId: request.webcam.deviceId ?? null, + webcamDeviceName: request.webcam.deviceName ?? null, webcamWidth: request.webcam.width, webcamHeight: request.webcam.height, webcamFps: request.webcam.fps, diff --git a/electron/native/README.md b/electron/native/README.md index 037b040..844f79c 100644 --- a/electron/native/README.md +++ b/electron/native/README.md @@ -37,6 +37,7 @@ Current V2 JSON shape: "microphoneGain": 1.4, "webcamEnabled": true, "webcamDeviceId": "default", + "webcamDeviceName": "Camera (NVIDIA Broadcast)", "webcamWidth": 1280, "webcamHeight": 720, "webcamFps": 30, @@ -46,7 +47,7 @@ Current V2 JSON shape: } ``` -The current helper implementation supports display/window video capture, system audio loopback, default-microphone capture, and Media Foundation webcam capture. Webcam frames are currently composed into the primary MP4 as a bottom-right picture-in-picture overlay. Browser `deviceId` values do not always map to Media Foundation symbolic links; when the requested webcam is not matched, the helper logs a warning and uses the default webcam. +The current helper implementation supports display/window video capture, system audio loopback, default-microphone capture, and Media Foundation webcam capture. Webcam frames are currently composed into the primary MP4 as a bottom-right picture-in-picture overlay. Browser `deviceId` values do not always map to Media Foundation symbolic links, so the renderer passes both `webcamDeviceId` and `webcamDeviceName`. The helper treats the Media Foundation friendly name as the preferred stable selector, then tries the browser id, and only falls back to the default webcam with an explicit warning when no requested device matches. Smoke-test the helper with: @@ -58,3 +59,11 @@ npm run test:wgc-mic:win npm run test:wgc-mixed-audio:win npm run test:wgc-webcam:win ``` + +To validate a specific native webcam manually: + +```powershell +$env:OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_NAME = "NVIDIA Broadcast" +npm run test:wgc-webcam:win +Remove-Item Env:OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_NAME +``` diff --git a/electron/native/wgc-capture/src/main.cpp b/electron/native/wgc-capture/src/main.cpp index bc82b22..56c34be 100644 --- a/electron/native/wgc-capture/src/main.cpp +++ b/electron/native/wgc-capture/src/main.cpp @@ -40,6 +40,7 @@ struct CaptureConfig { std::string microphoneDeviceId; double microphoneGain = 1.0; std::string webcamDeviceId; + std::string webcamDeviceName; int webcamWidth = 0; int webcamHeight = 0; int webcamFps = 0; @@ -56,6 +57,17 @@ std::wstring utf8ToWide(const std::string& value) { return result; } +std::string wideToUtf8(const std::wstring& value) { + if (value.empty()) { + return {}; + } + + const int size = WideCharToMultiByte(CP_UTF8, 0, value.data(), static_cast(value.size()), nullptr, 0, nullptr, nullptr); + std::string result(static_cast(size), '\0'); + WideCharToMultiByte(CP_UTF8, 0, value.data(), static_cast(value.size()), result.data(), size, nullptr, nullptr); + return result; +} + std::string jsonEscape(const std::string& value) { std::string result; result.reserve(value.size()); @@ -267,6 +279,7 @@ bool parseConfig(const std::string& json, CaptureConfig& config) { config.microphoneDeviceId = findString(json, "microphoneDeviceId"); config.microphoneGain = findDouble(json, "microphoneGain", 1.0); config.webcamDeviceId = findString(json, "webcamDeviceId"); + config.webcamDeviceName = findString(json, "webcamDeviceName"); config.webcamWidth = findInt(json, "webcamWidth", 0); config.webcamHeight = findInt(json, "webcamHeight", 0); config.webcamFps = findInt(json, "webcamFps", 0); @@ -348,6 +361,7 @@ int main(int argc, char* argv[]) { if (config.webcamEnabled) { if (!webcamCapture.initialize( utf8ToWide(config.webcamDeviceId), + utf8ToWide(config.webcamDeviceName), config.webcamWidth, config.webcamHeight, config.webcamFps > 0 ? config.webcamFps : config.fps)) { @@ -356,7 +370,9 @@ int main(int argc, char* argv[]) { } std::cout << "{\"event\":\"webcam-format\",\"schemaVersion\":2,\"width\":" << webcamCapture.width() << ",\"height\":" << webcamCapture.height() - << ",\"fps\":" << webcamCapture.fps() << "}" << std::endl; + << ",\"fps\":" << webcamCapture.fps() + << ",\"deviceName\":\"" << jsonEscape(wideToUtf8(webcamCapture.selectedDeviceName())) + << "\"}" << std::endl; } WasapiLoopbackCapture loopbackCapture; diff --git a/electron/native/wgc-capture/src/webcam_capture.cpp b/electron/native/wgc-capture/src/webcam_capture.cpp index 6b34a35..708d72c 100644 --- a/electron/native/wgc-capture/src/webcam_capture.cpp +++ b/electron/native/wgc-capture/src/webcam_capture.cpp @@ -51,20 +51,25 @@ WebcamCapture::~WebcamCapture() { stop(); } -bool WebcamCapture::initialize(const std::wstring& deviceId, int requestedWidth, int requestedHeight, int requestedFps) { +bool WebcamCapture::initialize( + const std::wstring& deviceId, + const std::wstring& deviceName, + int requestedWidth, + int requestedHeight, + int requestedFps) { fps_ = std::clamp(requestedFps > 0 ? requestedFps : 30, 1, 60); if (!succeeded(MFStartup(MF_VERSION), "MFStartup(webcam)")) { return false; } mfStarted_ = true; - if (!selectDevice(deviceId)) { + if (!selectDevice(deviceId, deviceName)) { return false; } return configureReader(requestedWidth, requestedHeight, fps_); } -bool WebcamCapture::selectDevice(const std::wstring& deviceId) { +bool WebcamCapture::selectDevice(const std::wstring& deviceId, const std::wstring& deviceName) { Microsoft::WRL::ComPtr attributes; if (!succeeded(MFCreateAttributes(&attributes, 1), "MFCreateAttributes(webcam enumeration)")) { return false; @@ -88,22 +93,32 @@ bool WebcamCapture::selectDevice(const std::wstring& deviceId) { } UINT32 selectedIndex = 0; + bool matched = false; + auto matchesRequestedDevice = [&](const std::wstring& name, const std::wstring& symbolicLink) { + if (!deviceName.empty() && + (containsInsensitive(name, deviceName) || containsInsensitive(symbolicLink, deviceName))) { + return true; + } + if (!deviceId.empty() && + (containsInsensitive(symbolicLink, deviceId) || containsInsensitive(name, deviceId))) { + return true; + } + return false; + }; + for (UINT32 index = 0; index < deviceCount; index += 1) { const std::wstring name = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME); const std::wstring symbolicLink = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_SYMBOLIC_LINK); - if (!deviceId.empty() && (containsInsensitive(symbolicLink, deviceId) || containsInsensitive(name, deviceId))) { + if (matchesRequestedDevice(name, symbolicLink)) { selectedIndex = index; + matched = true; break; } } - if (!deviceId.empty() && selectedIndex == 0) { - const std::wstring firstName = readAllocatedString(devices[0], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME); - const std::wstring firstLink = readAllocatedString(devices[0], MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_SYMBOLIC_LINK); - if (!containsInsensitive(firstLink, deviceId) && !containsInsensitive(firstName, deviceId)) { - std::cerr << "WARNING: Requested webcam device was not found by Media Foundation; using default webcam" - << std::endl; - } + if ((!deviceId.empty() || !deviceName.empty()) && !matched) { + std::cerr << "WARNING: Requested webcam device was not found by Media Foundation; using default webcam" + << std::endl; } selectedDeviceName_ = readAllocatedString(devices[selectedIndex], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME); diff --git a/electron/native/wgc-capture/src/webcam_capture.h b/electron/native/wgc-capture/src/webcam_capture.h index 7d5f904..201db25 100644 --- a/electron/native/wgc-capture/src/webcam_capture.h +++ b/electron/native/wgc-capture/src/webcam_capture.h @@ -20,7 +20,12 @@ public: WebcamCapture(const WebcamCapture&) = delete; WebcamCapture& operator=(const WebcamCapture&) = delete; - bool initialize(const std::wstring& deviceId, int requestedWidth, int requestedHeight, int requestedFps); + bool initialize( + const std::wstring& deviceId, + const std::wstring& deviceName, + int requestedWidth, + int requestedHeight, + int requestedFps); bool start(); void stop(); bool copyLatestFrame(std::vector& destination, int& width, int& height); @@ -31,7 +36,7 @@ public: const std::wstring& selectedDeviceName() const; private: - bool selectDevice(const std::wstring& deviceId); + bool selectDevice(const std::wstring& deviceId, const std::wstring& deviceName); bool configureReader(int requestedWidth, int requestedHeight, int requestedFps); void captureLoop(); diff --git a/scripts/test-windows-wgc-helper.mjs b/scripts/test-windows-wgc-helper.mjs index 3bdba57..3f9fb93 100644 --- a/scripts/test-windows-wgc-helper.mjs +++ b/scripts/test-windows-wgc-helper.mjs @@ -190,6 +190,7 @@ const config = { microphoneGain: 1.4, webcamEnabled: WITH_WEBCAM, webcamDeviceId: process.env.OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_ID ?? "", + webcamDeviceName: process.env.OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_NAME ?? "", webcamWidth: 640, webcamHeight: 360, webcamFps: 30, diff --git a/src/components/launch/LaunchWindow.tsx b/src/components/launch/LaunchWindow.tsx index 57f79b3..e4c23d7 100644 --- a/src/components/launch/LaunchWindow.tsx +++ b/src/components/launch/LaunchWindow.tsx @@ -108,6 +108,7 @@ export function LaunchWindow() { setWebcamEnabled, webcamDeviceId, setWebcamDeviceId, + setWebcamDeviceName, } = useScreenRecorder(); const showMicControls = microphoneEnabled && !recording; @@ -149,14 +150,16 @@ export function LaunchWindow() { const selectedMicLabel = micDevices.find((d) => d.deviceId === (microphoneDeviceId || selectedMicId))?.label || t("audio.defaultMicrophone"); + const selectedCameraDevice = cameraDevices.find( + (d) => d.deviceId === (webcamDeviceId || selectedCameraId), + ); const selectedCameraLabel = isCameraDevicesLoading ? t("webcam.searching") : cameraDevicesError ? t("webcam.unavailable") : cameraDevices.length === 0 ? t("webcam.noneFound") - : cameraDevices.find((d) => d.deviceId === (webcamDeviceId || selectedCameraId))?.label || - t("webcam.defaultCamera"); + : selectedCameraDevice?.label || t("webcam.defaultCamera"); const { level } = useAudioLevelMeter({ enabled: showMicControls, @@ -172,8 +175,9 @@ export function LaunchWindow() { useEffect(() => { if (selectedCameraId) { setWebcamDeviceId(selectedCameraId); + setWebcamDeviceName(cameraDevices.find((d) => d.deviceId === selectedCameraId)?.label); } - }, [selectedCameraId, setWebcamDeviceId]); + }, [selectedCameraId, cameraDevices, setWebcamDeviceId, setWebcamDeviceName]); useEffect(() => { if (!import.meta.env.DEV) { @@ -458,8 +462,12 @@ export function LaunchWindow() { { + const device = cameraDevices.find((item) => item.deviceId === e.target.value); setSelectedCameraId(e.target.value); setWebcamDeviceId(e.target.value); + setWebcamDeviceName(device?.label); }} className="sr-only" > diff --git a/src/hooks/useScreenRecorder.ts b/src/hooks/useScreenRecorder.ts index 3947954..1496acd 100644 --- a/src/hooks/useScreenRecorder.ts +++ b/src/hooks/useScreenRecorder.ts @@ -55,6 +55,8 @@ type UseScreenRecorderReturn = { setMicrophoneDeviceId: (deviceId: string | undefined) => void; webcamDeviceId: string | undefined; setWebcamDeviceId: (deviceId: string | undefined) => void; + webcamDeviceName: string | undefined; + setWebcamDeviceName: (deviceName: string | undefined) => void; systemAudioEnabled: boolean; setSystemAudioEnabled: (enabled: boolean) => void; webcamEnabled: boolean; @@ -101,6 +103,7 @@ export function useScreenRecorder(): UseScreenRecorderReturn { const [microphoneEnabled, setMicrophoneEnabled] = useState(false); const [microphoneDeviceId, setMicrophoneDeviceId] = useState(undefined); const [webcamDeviceId, setWebcamDeviceId] = useState(undefined); + const [webcamDeviceName, setWebcamDeviceName] = useState(undefined); const [systemAudioEnabled, setSystemAudioEnabled] = useState(false); const [webcamEnabled, setWebcamEnabledState] = useState(false); const screenRecorder = useRef(null); @@ -620,6 +623,7 @@ export function useScreenRecorder(): UseScreenRecorderReturn { webcam: { enabled: webcamEnabled, deviceId: webcamDeviceId, + deviceName: webcamDeviceName, width: WEBCAM_TARGET_WIDTH, height: WEBCAM_TARGET_HEIGHT, fps: WEBCAM_TARGET_FRAME_RATE, @@ -1123,6 +1127,8 @@ export function useScreenRecorder(): UseScreenRecorderReturn { setMicrophoneDeviceId, webcamDeviceId, setWebcamDeviceId, + webcamDeviceName, + setWebcamDeviceName, systemAudioEnabled, setSystemAudioEnabled, webcamEnabled, diff --git a/src/lib/nativeWindowsRecording.ts b/src/lib/nativeWindowsRecording.ts index 7e2f0ba..59de0be 100644 --- a/src/lib/nativeWindowsRecording.ts +++ b/src/lib/nativeWindowsRecording.ts @@ -26,6 +26,7 @@ export type NativeWindowsRecordingRequest = { webcam: { enabled: boolean; deviceId?: string; + deviceName?: string; width: number; height: number; fps: number; From 84484d616732fc4df38c310ecafe3151c0ec79a1 Mon Sep 17 00:00:00 2001 From: EtienneLescot Date: Tue, 5 May 2026 18:33:48 +0200 Subject: [PATCH 17/43] fix: support DirectShow virtual webcams --- .../windows-native-recorder-roadmap.md | 3 + electron/ipc/handlers.ts | 122 +++++ electron/native/README.md | 2 +- electron/native/wgc-capture/CMakeLists.txt | 2 + .../wgc-capture/src/dshow_webcam_capture.cpp | 469 ++++++++++++++++++ .../wgc-capture/src/dshow_webcam_capture.h | 50 ++ electron/native/wgc-capture/src/main.cpp | 3 + .../native/wgc-capture/src/webcam_capture.cpp | 163 +++++- .../native/wgc-capture/src/webcam_capture.h | 6 + scripts/test-windows-wgc-helper.mjs | 73 +++ src/lib/nativeWindowsRecording.ts | 1 + 11 files changed, 875 insertions(+), 19 deletions(-) create mode 100644 electron/native/wgc-capture/src/dshow_webcam_capture.cpp create mode 100644 electron/native/wgc-capture/src/dshow_webcam_capture.h diff --git a/docs/engineering/windows-native-recorder-roadmap.md b/docs/engineering/windows-native-recorder-roadmap.md index 12c9502..5129153 100644 --- a/docs/engineering/windows-native-recorder-roadmap.md +++ b/docs/engineering/windows-native-recorder-roadmap.md @@ -40,6 +40,7 @@ The helper owns Windows media capture: - WASAPI system loopback; - WASAPI microphone input; - Media Foundation webcam capture; +- DirectShow webcam fallback for virtual cameras not visible to Media Foundation; - Media Foundation encoding/muxing; - stream timestamp normalization. @@ -135,6 +136,7 @@ SSOT rules for this phase: - `WgcSession::captureWidth()/captureHeight()` is the encoded screen frame size until a dedicated native scaling stage exists. - `WasapiLoopbackCapture::inputFormat()` is the runtime audio format source used by `MFEncoder`. - The renderer passes both the browser webcam `deviceId` and selected display label as `deviceName`; `electron/native/wgc-capture/src/webcam_capture.*` is the only place that maps those values to Media Foundation devices. +- Electron resolves the selected label to a DirectShow filter CLSID once and passes it as `webcamDirectShowClsid`; the helper must not independently guess among DirectShow filters. - No duplicated hard-coded audio format assumptions in `main.cpp`. ### 3. WASAPI Microphone @@ -158,6 +160,7 @@ Acceptance: - Convert webcam samples to BGRA and compose them into the primary helper MP4 as an initial bottom-right picture-in-picture overlay. - Keep the helper process as the SSOT for screen/window, WASAPI system audio, microphone, webcam, and mux timing. - Match the requested webcam through Media Foundation friendly names first, then browser device ids/symbolic links, so UI selection remains stable across Chromium and Windows native device namespaces. +- Use the Electron-resolved DirectShow CLSID when the selected virtual camera, for example NVIDIA Broadcast, is registered for DirectShow but absent from Media Foundation enumeration. - Later: promote the same webcam capture source to a separate editable native `webcamVideoPath` if product requirements need post-recording layout edits. Acceptance: diff --git a/electron/ipc/handlers.ts b/electron/ipc/handlers.ts index 46b4dc7..1033095 100644 --- a/electron/ipc/handlers.ts +++ b/electron/ipc/handlers.ts @@ -476,6 +476,103 @@ function isWindowsGraphicsCaptureOsSupported() { return Number.isFinite(build) && build >= 19041; } +function normalizeNativeDeviceName(value: string) { + return value + .toLowerCase() + .replace(/[^a-z0-9]+/g, " ") + .trim(); +} + +function scoreNativeDeviceName(candidateName: string, candidateId: string, requestedName?: string) { + const candidate = normalizeNativeDeviceName(candidateName); + const id = normalizeNativeDeviceName(candidateId); + const requested = normalizeNativeDeviceName(requestedName ?? ""); + if (!requested) { + return 0; + } + if (candidate === requested) { + return 1000; + } + if (candidate.includes(requested) || requested.includes(candidate)) { + return 900; + } + if (id.includes(requested) || requested.includes(id)) { + return 800; + } + + return requested + .split(/\s+/) + .filter((word) => word.length > 1 && !["camera", "webcam", "video", "input"].includes(word)) + .reduce((score, word) => { + if (candidate.includes(word)) return score + 100; + if (id.includes(word)) return score + 50; + return score; + }, 0); +} + +function queryDirectShowVideoInputRegistry() { + return new Promise((resolve) => { + const proc = spawn( + "reg.exe", + ["query", "HKCR\\CLSID\\{860BB310-5D01-11D0-BD3B-00A0C911CE86}\\Instance", "/s"], + { windowsHide: true }, + ); + let stdout = ""; + proc.stdout.on("data", (chunk: Buffer) => { + stdout += chunk.toString("utf16le").includes("\u0000") + ? chunk.toString("utf16le") + : chunk.toString(); + }); + proc.on("close", () => resolve(stdout)); + proc.on("error", () => resolve("")); + }); +} + +async function resolveDirectShowWebcamClsid(deviceName?: string) { + if (process.platform !== "win32" || !deviceName?.trim()) { + return null; + } + + const output = await queryDirectShowVideoInputRegistry(); + let current: { friendlyName?: string; clsid?: string } = {}; + const entries: Array<{ friendlyName?: string; clsid?: string }> = []; + for (const rawLine of output.split(/\r?\n/)) { + const line = rawLine.trim(); + if (!line) continue; + if (/^HKEY_/i.test(line)) { + if (current.friendlyName || current.clsid) entries.push(current); + current = {}; + continue; + } + const match = line.match(/^(\S+)\s+REG_SZ\s+(.+)$/); + if (!match) continue; + if (match[1] === "FriendlyName") current.friendlyName = match[2].trim(); + if (match[1] === "CLSID") current.clsid = match[2].trim(); + } + if (current.friendlyName || current.clsid) entries.push(current); + + let best: { clsid: string; friendlyName?: string; score: number } | null = null; + for (const entry of entries) { + if (!entry.clsid) continue; + const score = scoreNativeDeviceName(entry.friendlyName ?? "", entry.clsid, deviceName); + if (!best || score > best.score) { + best = { clsid: entry.clsid, friendlyName: entry.friendlyName, score }; + } + } + + if (!best || best.score <= 0) { + return null; + } + + console.info("[native-wgc] resolved DirectShow webcam filter", { + requestedName: deviceName, + filterName: best.friendlyName, + clsid: best.clsid, + score: best.score, + }); + return best.clsid; +} + async function startCursorRecording(recordingId?: number) { if (cursorRecordingSession) { pendingCursorRecordingData = await cursorRecordingSession.stop(); @@ -623,6 +720,25 @@ function waitForNativeWindowsCaptureStop(proc: ChildProcessWithoutNullStreams) { }); } +function readNativeWindowsWebcamFormat(output: string) { + const lines = output.split(/\r?\n/).filter((line) => line.includes('"event":"webcam-format"')); + const lastLine = lines.at(-1); + if (!lastLine) { + return null; + } + + try { + return JSON.parse(lastLine) as { + width?: number; + height?: number; + fps?: number; + deviceName?: string; + }; + } catch { + return null; + } +} + function setCurrentRecordingSessionState(session: RecordingSession | null) { currentRecordingSession = session; currentVideoPath = session?.screenVideoPath ?? null; @@ -866,6 +982,9 @@ export function registerIpcHandlers( typeof request.source.displayId === "number" && Number.isFinite(request.source.displayId) ? request.source.displayId : Number(selectedSource?.display_id); + const webcamDirectShowClsid = request.webcam.enabled + ? await resolveDirectShowWebcamClsid(request.webcam.deviceName) + : null; const config = { schemaVersion: 2, recordingId, @@ -889,6 +1008,7 @@ export function registerIpcHandlers( webcamEnabled: request.webcam.enabled, webcamDeviceId: request.webcam.deviceId ?? null, webcamDeviceName: request.webcam.deviceName ?? null, + webcamDirectShowClsid, webcamWidth: request.webcam.width, webcamHeight: request.webcam.height, webcamFps: request.webcam.fps, @@ -943,9 +1063,11 @@ export function registerIpcHandlers( await waitForNativeWindowsCaptureStart(proc); const captureStartedAtMs = Date.now(); nativeWindowsCursorOffsetMs = Math.max(0, captureStartedAtMs - cursorStartTimeMs); + const webcamFormat = readNativeWindowsWebcamFormat(nativeWindowsCaptureOutput); console.info("[native-wgc] capture started", { captureStartedAtMs, cursorOffsetMs: nativeWindowsCursorOffsetMs, + webcamFormat, }); const source = selectedSource || { name: "Screen" }; diff --git a/electron/native/README.md b/electron/native/README.md index 844f79c..8b3590d 100644 --- a/electron/native/README.md +++ b/electron/native/README.md @@ -47,7 +47,7 @@ Current V2 JSON shape: } ``` -The current helper implementation supports display/window video capture, system audio loopback, default-microphone capture, and Media Foundation webcam capture. Webcam frames are currently composed into the primary MP4 as a bottom-right picture-in-picture overlay. Browser `deviceId` values do not always map to Media Foundation symbolic links, so the renderer passes both `webcamDeviceId` and `webcamDeviceName`. The helper treats the Media Foundation friendly name as the preferred stable selector, then tries the browser id, and only falls back to the default webcam with an explicit warning when no requested device matches. +The current helper implementation supports display/window video capture, system audio loopback, default-microphone capture, Media Foundation webcam capture, and a DirectShow webcam fallback for virtual cameras that are not exposed through Media Foundation. Webcam frames are currently composed into the primary MP4 as a bottom-right picture-in-picture overlay. Browser `deviceId` values do not always map to Media Foundation symbolic links, so the renderer passes both `webcamDeviceId` and `webcamDeviceName`. Electron resolves a matching DirectShow filter CLSID for the selected label; the helper uses Media Foundation first, then that exact DirectShow filter when the requested camera is absent from Media Foundation. Smoke-test the helper with: diff --git a/electron/native/wgc-capture/CMakeLists.txt b/electron/native/wgc-capture/CMakeLists.txt index 92b9335..7503658 100644 --- a/electron/native/wgc-capture/CMakeLists.txt +++ b/electron/native/wgc-capture/CMakeLists.txt @@ -16,6 +16,8 @@ set(CMAKE_CXX_EXTENSIONS OFF) add_executable(wgc-capture src/audio_sample_utils.cpp src/audio_sample_utils.h + src/dshow_webcam_capture.cpp + src/dshow_webcam_capture.h src/main.cpp src/mf_encoder.cpp src/mf_encoder.h diff --git a/electron/native/wgc-capture/src/dshow_webcam_capture.cpp b/electron/native/wgc-capture/src/dshow_webcam_capture.cpp new file mode 100644 index 0000000..535bf7e --- /dev/null +++ b/electron/native/wgc-capture/src/dshow_webcam_capture.cpp @@ -0,0 +1,469 @@ +#include "dshow_webcam_capture.h" + +#include +#include +#include + +#include +#include +#include +#include + +namespace { + +const CLSID CLSID_SampleGrabberLocal = {0xC1F400A0, 0x3F08, 0x11D3, {0x9F, 0x0B, 0x00, 0x60, 0x08, 0x03, 0x9E, 0x37}}; +const CLSID CLSID_NullRendererLocal = {0xC1F400A4, 0x3F08, 0x11D3, {0x9F, 0x0B, 0x00, 0x60, 0x08, 0x03, 0x9E, 0x37}}; + +MIDL_INTERFACE("0579154A-2B53-4994-B0D0-E773148EFF85") +ISampleGrabberCB : public IUnknown { +public: + virtual HRESULT STDMETHODCALLTYPE SampleCB(double sampleTime, IMediaSample* sample) = 0; + virtual HRESULT STDMETHODCALLTYPE BufferCB(double sampleTime, BYTE* buffer, long bufferLength) = 0; +}; + +MIDL_INTERFACE("6B652FFF-11FE-4FCE-92AD-0266B5D7C78F") +ISampleGrabber : public IUnknown { +public: + virtual HRESULT STDMETHODCALLTYPE SetOneShot(BOOL oneShot) = 0; + virtual HRESULT STDMETHODCALLTYPE SetMediaType(const AM_MEDIA_TYPE* type) = 0; + virtual HRESULT STDMETHODCALLTYPE GetConnectedMediaType(AM_MEDIA_TYPE* type) = 0; + virtual HRESULT STDMETHODCALLTYPE SetBufferSamples(BOOL bufferThem) = 0; + virtual HRESULT STDMETHODCALLTYPE GetCurrentBuffer(long* bufferSize, long* buffer) = 0; + virtual HRESULT STDMETHODCALLTYPE GetCurrentSample(IMediaSample** sample) = 0; + virtual HRESULT STDMETHODCALLTYPE SetCallback(ISampleGrabberCB* callback, long whichMethodToCallback) = 0; +}; + +bool succeeded(HRESULT hr, const char* label) { + if (SUCCEEDED(hr)) { + return true; + } + + std::cerr << "ERROR: " << label << " failed (hr=0x" << std::hex << hr << std::dec << ")" + << std::endl; + return false; +} + +std::wstring readPropertyString(IPropertyBag* bag, LPCOLESTR key) { + VARIANT value; + VariantInit(&value); + if (FAILED(bag->Read(key, &value, nullptr)) || value.vt != VT_BSTR || !value.bstrVal) { + VariantClear(&value); + return {}; + } + + std::wstring result(value.bstrVal); + VariantClear(&value); + return result; +} + +bool containsInsensitive(const std::wstring& haystack, const std::wstring& needle) { + if (haystack.empty() || needle.empty()) { + return false; + } + + std::wstring lowerHaystack = haystack; + std::wstring lowerNeedle = needle; + std::transform(lowerHaystack.begin(), lowerHaystack.end(), lowerHaystack.begin(), ::towlower); + std::transform(lowerNeedle.begin(), lowerNeedle.end(), lowerNeedle.begin(), ::towlower); + return lowerHaystack.find(lowerNeedle) != std::wstring::npos || + lowerNeedle.find(lowerHaystack) != std::wstring::npos; +} + +std::wstring normalizeDeviceName(const std::wstring& value) { + std::wstring normalized; + normalized.reserve(value.size()); + bool lastWasSpace = true; + for (const wchar_t ch : value) { + if (std::iswalnum(ch)) { + normalized.push_back(static_cast(std::towlower(ch))); + lastWasSpace = false; + continue; + } + if (!lastWasSpace) { + normalized.push_back(L' '); + lastWasSpace = true; + } + } + while (!normalized.empty() && normalized.back() == L' ') { + normalized.pop_back(); + } + return normalized; +} + +std::vector splitWords(const std::wstring& value) { + std::vector words; + size_t start = 0; + while (start < value.size()) { + const size_t end = value.find(L' ', start); + const auto word = value.substr(start, end == std::wstring::npos ? std::wstring::npos : end - start); + if (word.size() > 1 && word != L"camera" && word != L"webcam" && word != L"video" && word != L"input") { + words.push_back(word); + } + if (end == std::wstring::npos) { + break; + } + start = end + 1; + } + return words; +} + +int deviceMatchScore( + const std::wstring& candidateName, + const std::wstring& candidatePath, + const std::wstring& requestedName, + const std::wstring& requestedId) { + int score = 0; + const auto normalizedName = normalizeDeviceName(candidateName); + const auto normalizedPath = normalizeDeviceName(candidatePath); + const auto normalizedRequestedName = normalizeDeviceName(requestedName); + const auto normalizedRequestedId = normalizeDeviceName(requestedId); + + if (!normalizedRequestedName.empty()) { + if (normalizedName == normalizedRequestedName) { + score = std::max(score, 1000); + } + if (containsInsensitive(normalizedName, normalizedRequestedName)) { + score = std::max(score, 900); + } + if (containsInsensitive(normalizedPath, normalizedRequestedName)) { + score = std::max(score, 800); + } + + int wordScore = 0; + for (const auto& word : splitWords(normalizedRequestedName)) { + if (normalizedName.find(word) != std::wstring::npos) { + wordScore += 100; + } else if (normalizedPath.find(word) != std::wstring::npos) { + wordScore += 50; + } + } + score = std::max(score, wordScore); + } + + if (!normalizedRequestedId.empty()) { + if (containsInsensitive(normalizedPath, normalizedRequestedId)) { + score = std::max(score, 700); + } + if (containsInsensitive(normalizedName, normalizedRequestedId)) { + score = std::max(score, 600); + } + } + + return score; +} + +void freeMediaType(AM_MEDIA_TYPE& type) { + if (type.cbFormat != 0) { + CoTaskMemFree(type.pbFormat); + type.cbFormat = 0; + type.pbFormat = nullptr; + } + if (type.pUnk) { + type.pUnk->Release(); + type.pUnk = nullptr; + } +} + +bool readRegistryString(HKEY key, const wchar_t* valueName, std::wstring& value) { + DWORD type = 0; + DWORD size = 0; + if (RegGetValueW(key, nullptr, valueName, RRF_RT_REG_SZ, &type, nullptr, &size) != ERROR_SUCCESS || size == 0) { + return false; + } + + std::wstring buffer(size / sizeof(wchar_t), L'\0'); + if (RegGetValueW(key, nullptr, valueName, RRF_RT_REG_SZ, &type, buffer.data(), &size) != ERROR_SUCCESS) { + return false; + } + while (!buffer.empty() && buffer.back() == L'\0') { + buffer.pop_back(); + } + value = buffer; + return true; +} + +bool findRegisteredVideoInput( + const std::wstring& deviceId, + const std::wstring& deviceName, + CLSID& selectedClsid, + std::wstring& selectedName, + int& bestScore) { + HKEY instanceKey = nullptr; + if (RegOpenKeyExW( + HKEY_CLASSES_ROOT, + L"CLSID\\{860BB310-5D01-11D0-BD3B-00A0C911CE86}\\Instance", + 0, + KEY_READ, + &instanceKey) != ERROR_SUCCESS) { + return false; + } + + DWORD index = 0; + wchar_t subkeyName[128]; + DWORD subkeyNameLength = ARRAYSIZE(subkeyName); + bool found = false; + while (RegEnumKeyExW(instanceKey, index, subkeyName, &subkeyNameLength, nullptr, nullptr, nullptr, nullptr) == ERROR_SUCCESS) { + HKEY filterKey = nullptr; + if (RegOpenKeyExW(instanceKey, subkeyName, 0, KEY_READ, &filterKey) == ERROR_SUCCESS) { + std::wstring friendlyName; + std::wstring clsidText; + readRegistryString(filterKey, L"FriendlyName", friendlyName); + readRegistryString(filterKey, L"CLSID", clsidText); + const int score = deviceMatchScore(friendlyName, clsidText, deviceName, deviceId); + std::wcerr << L"INFO: Registered DirectShow webcam candidate name=\"" << friendlyName << L"\" score=" << score << std::endl; + CLSID clsid{}; + if (!clsidText.empty() && SUCCEEDED(CLSIDFromString(clsidText.c_str(), &clsid)) && (!found || score > bestScore)) { + selectedClsid = clsid; + selectedName = friendlyName; + bestScore = score; + found = true; + } + RegCloseKey(filterKey); + } + index += 1; + subkeyNameLength = ARRAYSIZE(subkeyName); + } + + RegCloseKey(instanceKey); + return found; +} + +} // namespace + +struct DirectShowWebcamCapture::Impl { + Microsoft::WRL::ComPtr graph; + Microsoft::WRL::ComPtr captureGraph; + Microsoft::WRL::ComPtr captureFilter; + Microsoft::WRL::ComPtr sampleGrabberFilter; + Microsoft::WRL::ComPtr sampleGrabber; + Microsoft::WRL::ComPtr nullRenderer; + Microsoft::WRL::ComPtr mediaControl; + bool comInitialized = false; + bool running = false; +}; + +DirectShowWebcamCapture::~DirectShowWebcamCapture() { + stop(); + delete impl_; +} + +bool DirectShowWebcamCapture::initialize( + const std::wstring& deviceId, + const std::wstring& deviceName, + const std::wstring& directShowClsid, + int requestedWidth, + int requestedHeight, + int requestedFps) { + stop(); + delete impl_; + impl_ = new Impl(); + fps_ = std::clamp(requestedFps > 0 ? requestedFps : 30, 1, 60); + + HRESULT hr = CoInitializeEx(nullptr, COINIT_MULTITHREADED); + if (SUCCEEDED(hr)) { + impl_->comInitialized = true; + } else if (hr != RPC_E_CHANGED_MODE) { + return succeeded(hr, "CoInitializeEx(DirectShow webcam)"); + } + + if (directShowClsid.empty()) { + std::cerr << "ERROR: DirectShow webcam fallback requires a resolved filter CLSID" << std::endl; + return false; + } + + CLSID selectedClsid{}; + if (FAILED(CLSIDFromString(directShowClsid.c_str(), &selectedClsid))) { + std::cerr << "ERROR: DirectShow webcam fallback received an invalid filter CLSID" << std::endl; + return false; + } + selectedDeviceName_ = deviceName.empty() ? directShowClsid : deviceName; + + if (!succeeded(CoCreateInstance(selectedClsid, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->captureFilter)), + "CoCreateInstance(DirectShow webcam filter)")) { + return false; + } + if (!succeeded(CoCreateInstance(CLSID_FilterGraph, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->graph)), + "CoCreateInstance(FilterGraph)")) { + return false; + } + if (!succeeded(CoCreateInstance(CLSID_CaptureGraphBuilder2, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->captureGraph)), + "CoCreateInstance(CaptureGraphBuilder2)")) { + return false; + } + if (!succeeded(impl_->captureGraph->SetFiltergraph(impl_->graph.Get()), "SetFiltergraph(DirectShow webcam)")) { + return false; + } + if (!succeeded(impl_->graph->AddFilter(impl_->captureFilter.Get(), L"OpenScreen Webcam Source"), + "AddFilter(DirectShow webcam source)")) { + return false; + } + + if (!succeeded(CoCreateInstance(CLSID_SampleGrabberLocal, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->sampleGrabberFilter)), + "CoCreateInstance(SampleGrabber)")) { + return false; + } + if (!succeeded(impl_->sampleGrabberFilter.As(&impl_->sampleGrabber), "QueryInterface(ISampleGrabber)")) { + return false; + } + + AM_MEDIA_TYPE requestedType{}; + requestedType.majortype = MEDIATYPE_Video; + requestedType.subtype = MEDIASUBTYPE_RGB32; + requestedType.formattype = FORMAT_VideoInfo; + if (!succeeded(impl_->sampleGrabber->SetMediaType(&requestedType), "SetMediaType(DirectShow RGB32)")) { + return false; + } + + if (!succeeded(impl_->graph->AddFilter(impl_->sampleGrabberFilter.Get(), L"OpenScreen Webcam Sample Grabber"), + "AddFilter(SampleGrabber)")) { + return false; + } + if (!succeeded(CoCreateInstance(CLSID_NullRendererLocal, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->nullRenderer)), + "CoCreateInstance(NullRenderer)")) { + return false; + } + if (!succeeded(impl_->graph->AddFilter(impl_->nullRenderer.Get(), L"OpenScreen Webcam Null Renderer"), + "AddFilter(NullRenderer)")) { + return false; + } + + if (!succeeded(impl_->captureGraph->RenderStream( + &PIN_CATEGORY_CAPTURE, + &MEDIATYPE_Video, + impl_->captureFilter.Get(), + impl_->sampleGrabberFilter.Get(), + impl_->nullRenderer.Get()), + "RenderStream(DirectShow webcam)")) { + return false; + } + + AM_MEDIA_TYPE connectedType{}; + if (!succeeded(impl_->sampleGrabber->GetConnectedMediaType(&connectedType), "GetConnectedMediaType(DirectShow webcam)")) { + return false; + } + if (connectedType.formattype == FORMAT_VideoInfo && connectedType.pbFormat) { + const auto* videoInfo = reinterpret_cast(connectedType.pbFormat); + width_ = std::abs(videoInfo->bmiHeader.biWidth); + height_ = std::abs(videoInfo->bmiHeader.biHeight); + sourceTopDown_ = videoInfo->bmiHeader.biHeight < 0; + } + freeMediaType(connectedType); + if (width_ <= 0 || height_ <= 0) { + width_ = requestedWidth > 0 ? requestedWidth : 1280; + height_ = requestedHeight > 0 ? requestedHeight : 720; + } + + impl_->sampleGrabber->SetBufferSamples(TRUE); + impl_->sampleGrabber->SetOneShot(FALSE); + if (!succeeded(impl_->graph.As(&impl_->mediaControl), "QueryInterface(IMediaControl)")) { + return false; + } + + return true; +} + +bool DirectShowWebcamCapture::start() { + if (!impl_ || !impl_->mediaControl || impl_->running) { + return false; + } + HRESULT hr = impl_->mediaControl->Run(); + if (!succeeded(hr, "Run(DirectShow webcam)")) { + return false; + } + impl_->running = true; + stopRequested_ = false; + thread_ = std::thread(&DirectShowWebcamCapture::captureLoop, this); + return true; +} + +void DirectShowWebcamCapture::stop() { + stopRequested_ = true; + if (thread_.joinable()) { + thread_.join(); + } + if (!impl_) { + return; + } + if (impl_->mediaControl && impl_->running) { + impl_->mediaControl->Stop(); + } + impl_->running = false; + impl_->mediaControl.Reset(); + impl_->nullRenderer.Reset(); + impl_->sampleGrabber.Reset(); + impl_->sampleGrabberFilter.Reset(); + impl_->captureFilter.Reset(); + impl_->captureGraph.Reset(); + impl_->graph.Reset(); + if (impl_->comInitialized) { + CoUninitialize(); + impl_->comInitialized = false; + } +} + +void DirectShowWebcamCapture::captureLoop() { + CoInitializeEx(nullptr, COINIT_MULTITHREADED); + while (!stopRequested_ && impl_ && impl_->sampleGrabber) { + long bufferSize = 0; + HRESULT hr = impl_->sampleGrabber->GetCurrentBuffer(&bufferSize, nullptr); + if (SUCCEEDED(hr) && bufferSize > 0) { + std::vector buffer(static_cast(bufferSize)); + hr = impl_->sampleGrabber->GetCurrentBuffer(&bufferSize, reinterpret_cast(buffer.data())); + if (SUCCEEDED(hr)) { + storeFrame(buffer.data(), bufferSize); + } + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000 / std::max(1, fps_))); + } + CoUninitialize(); +} + +void DirectShowWebcamCapture::storeFrame(const BYTE* buffer, long length) { + const int stride = width_ * 4; + const int expectedLength = stride * height_; + if (!buffer || length < expectedLength || width_ <= 0 || height_ <= 0) { + return; + } + + std::vector frame(static_cast(expectedLength)); + for (int y = 0; y < height_; y += 1) { + const int sourceY = sourceTopDown_ ? y : height_ - 1 - y; + const BYTE* source = buffer + sourceY * stride; + BYTE* destination = frame.data() + y * stride; + std::copy(source, source + stride, destination); + for (int x = 0; x < width_; x += 1) { + destination[x * 4 + 3] = 255; + } + } + + std::scoped_lock lock(frameMutex_); + latestFrame_ = std::move(frame); +} + +bool DirectShowWebcamCapture::copyLatestFrame(std::vector& destination, int& width, int& height) { + std::scoped_lock lock(frameMutex_); + if (latestFrame_.empty() || width_ <= 0 || height_ <= 0) { + return false; + } + + destination = latestFrame_; + width = width_; + height = height_; + return true; +} + +int DirectShowWebcamCapture::width() const { + return width_; +} + +int DirectShowWebcamCapture::height() const { + return height_; +} + +int DirectShowWebcamCapture::fps() const { + return fps_; +} + +const std::wstring& DirectShowWebcamCapture::selectedDeviceName() const { + return selectedDeviceName_; +} diff --git a/electron/native/wgc-capture/src/dshow_webcam_capture.h b/electron/native/wgc-capture/src/dshow_webcam_capture.h new file mode 100644 index 0000000..906da8f --- /dev/null +++ b/electron/native/wgc-capture/src/dshow_webcam_capture.h @@ -0,0 +1,50 @@ +#pragma once + +#include + +#include +#include +#include +#include +#include + +class DirectShowWebcamCapture { +public: + DirectShowWebcamCapture() = default; + ~DirectShowWebcamCapture(); + + DirectShowWebcamCapture(const DirectShowWebcamCapture&) = delete; + DirectShowWebcamCapture& operator=(const DirectShowWebcamCapture&) = delete; + + bool initialize( + const std::wstring& deviceId, + const std::wstring& deviceName, + const std::wstring& directShowClsid, + int requestedWidth, + int requestedHeight, + int requestedFps); + bool start(); + void stop(); + bool copyLatestFrame(std::vector& destination, int& width, int& height); + + int width() const; + int height() const; + int fps() const; + const std::wstring& selectedDeviceName() const; + void storeFrame(const BYTE* buffer, long length); + +private: + struct Impl; + void captureLoop(); + + Impl* impl_ = nullptr; + std::thread thread_; + std::atomic stopRequested_ = false; + std::mutex frameMutex_; + std::vector latestFrame_; + int width_ = 0; + int height_ = 0; + int fps_ = 30; + bool sourceTopDown_ = false; + std::wstring selectedDeviceName_; +}; diff --git a/electron/native/wgc-capture/src/main.cpp b/electron/native/wgc-capture/src/main.cpp index 56c34be..c58f092 100644 --- a/electron/native/wgc-capture/src/main.cpp +++ b/electron/native/wgc-capture/src/main.cpp @@ -41,6 +41,7 @@ struct CaptureConfig { double microphoneGain = 1.0; std::string webcamDeviceId; std::string webcamDeviceName; + std::string webcamDirectShowClsid; int webcamWidth = 0; int webcamHeight = 0; int webcamFps = 0; @@ -280,6 +281,7 @@ bool parseConfig(const std::string& json, CaptureConfig& config) { config.microphoneGain = findDouble(json, "microphoneGain", 1.0); config.webcamDeviceId = findString(json, "webcamDeviceId"); config.webcamDeviceName = findString(json, "webcamDeviceName"); + config.webcamDirectShowClsid = findString(json, "webcamDirectShowClsid"); config.webcamWidth = findInt(json, "webcamWidth", 0); config.webcamHeight = findInt(json, "webcamHeight", 0); config.webcamFps = findInt(json, "webcamFps", 0); @@ -362,6 +364,7 @@ int main(int argc, char* argv[]) { if (!webcamCapture.initialize( utf8ToWide(config.webcamDeviceId), utf8ToWide(config.webcamDeviceName), + utf8ToWide(config.webcamDirectShowClsid), config.webcamWidth, config.webcamHeight, config.webcamFps > 0 ? config.webcamFps : config.fps)) { diff --git a/electron/native/wgc-capture/src/webcam_capture.cpp b/electron/native/wgc-capture/src/webcam_capture.cpp index 708d72c..aff9fdb 100644 --- a/electron/native/wgc-capture/src/webcam_capture.cpp +++ b/electron/native/wgc-capture/src/webcam_capture.cpp @@ -6,6 +6,7 @@ #include #include +#include #include namespace { @@ -45,6 +46,89 @@ bool containsInsensitive(const std::wstring& haystack, const std::wstring& needl lowerNeedle.find(lowerHaystack) != std::wstring::npos; } +std::wstring normalizeDeviceName(const std::wstring& value) { + std::wstring normalized; + normalized.reserve(value.size()); + bool lastWasSpace = true; + for (const wchar_t ch : value) { + if (std::iswalnum(ch)) { + normalized.push_back(static_cast(std::towlower(ch))); + lastWasSpace = false; + continue; + } + if (!lastWasSpace) { + normalized.push_back(L' '); + lastWasSpace = true; + } + } + while (!normalized.empty() && normalized.back() == L' ') { + normalized.pop_back(); + } + return normalized; +} + +std::vector splitWords(const std::wstring& value) { + std::vector words; + size_t start = 0; + while (start < value.size()) { + const size_t end = value.find(L' ', start); + const auto word = value.substr(start, end == std::wstring::npos ? std::wstring::npos : end - start); + if (word.size() > 1 && word != L"camera" && word != L"webcam" && word != L"video" && word != L"input") { + words.push_back(word); + } + if (end == std::wstring::npos) { + break; + } + start = end + 1; + } + return words; +} + +int deviceMatchScore( + const std::wstring& candidateName, + const std::wstring& candidateLink, + const std::wstring& requestedName, + const std::wstring& requestedId) { + int score = 0; + const auto normalizedName = normalizeDeviceName(candidateName); + const auto normalizedLink = normalizeDeviceName(candidateLink); + const auto normalizedRequestedName = normalizeDeviceName(requestedName); + const auto normalizedRequestedId = normalizeDeviceName(requestedId); + + if (!normalizedRequestedName.empty()) { + if (normalizedName == normalizedRequestedName) { + score = std::max(score, 1000); + } + if (containsInsensitive(normalizedName, normalizedRequestedName)) { + score = std::max(score, 900); + } + if (containsInsensitive(normalizedLink, normalizedRequestedName)) { + score = std::max(score, 800); + } + + int wordScore = 0; + for (const auto& word : splitWords(normalizedRequestedName)) { + if (normalizedName.find(word) != std::wstring::npos) { + wordScore += 100; + } else if (normalizedLink.find(word) != std::wstring::npos) { + wordScore += 50; + } + } + score = std::max(score, wordScore); + } + + if (!normalizedRequestedId.empty()) { + if (containsInsensitive(normalizedLink, normalizedRequestedId)) { + score = std::max(score, 700); + } + if (containsInsensitive(normalizedName, normalizedRequestedId)) { + score = std::max(score, 600); + } + } + + return score; +} + } // namespace WebcamCapture::~WebcamCapture() { @@ -54,15 +138,49 @@ WebcamCapture::~WebcamCapture() { bool WebcamCapture::initialize( const std::wstring& deviceId, const std::wstring& deviceName, + const std::wstring& directShowClsid, int requestedWidth, int requestedHeight, int requestedFps) { fps_ = std::clamp(requestedFps > 0 ? requestedFps : 30, 1, 60); + usingDirectShow_ = false; + selectedMatchScore_ = 0; if (!succeeded(MFStartup(MF_VERSION), "MFStartup(webcam)")) { + if (directShowCapture_.initialize(deviceId, deviceName, directShowClsid, requestedWidth, requestedHeight, fps_)) { + usingDirectShow_ = true; + return true; + } return false; } mfStarted_ = true; if (!selectDevice(deviceId, deviceName)) { + if (mfStarted_) { + MFShutdown(); + mfStarted_ = false; + } + if (directShowCapture_.initialize(deviceId, deviceName, directShowClsid, requestedWidth, requestedHeight, fps_)) { + usingDirectShow_ = true; + return true; + } + return false; + } + + if ((!deviceId.empty() || !deviceName.empty()) && selectedMatchScore_ <= 0) { + if (mediaSource_) { + mediaSource_->Shutdown(); + } + sourceReader_.Reset(); + mediaSource_.Reset(); + if (mfStarted_) { + MFShutdown(); + mfStarted_ = false; + } + if (directShowCapture_.initialize(deviceId, deviceName, directShowClsid, requestedWidth, requestedHeight, fps_)) { + usingDirectShow_ = true; + return true; + } + std::cerr << "ERROR: Requested webcam device was not found by native Windows webcam providers" + << std::endl; return false; } @@ -93,34 +211,24 @@ bool WebcamCapture::selectDevice(const std::wstring& deviceId, const std::wstrin } UINT32 selectedIndex = 0; - bool matched = false; - auto matchesRequestedDevice = [&](const std::wstring& name, const std::wstring& symbolicLink) { - if (!deviceName.empty() && - (containsInsensitive(name, deviceName) || containsInsensitive(symbolicLink, deviceName))) { - return true; - } - if (!deviceId.empty() && - (containsInsensitive(symbolicLink, deviceId) || containsInsensitive(name, deviceId))) { - return true; - } - return false; - }; - + int bestScore = 0; for (UINT32 index = 0; index < deviceCount; index += 1) { const std::wstring name = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME); const std::wstring symbolicLink = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_SYMBOLIC_LINK); - if (matchesRequestedDevice(name, symbolicLink)) { + const int score = deviceMatchScore(name, symbolicLink, deviceName, deviceId); + std::wcerr << L"INFO: Native webcam candidate [" << index << L"] name=\"" << name << L"\" score=" << score << std::endl; + if (score > bestScore) { selectedIndex = index; - matched = true; - break; + bestScore = score; } } - if ((!deviceId.empty() || !deviceName.empty()) && !matched) { - std::cerr << "WARNING: Requested webcam device was not found by Media Foundation; using default webcam" + if ((!deviceId.empty() || !deviceName.empty()) && bestScore <= 0) { + std::cerr << "WARNING: Requested webcam device was not found by Media Foundation; trying DirectShow" << std::endl; } + selectedMatchScore_ = bestScore; selectedDeviceName_ = readAllocatedString(devices[selectedIndex], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME); hr = devices[selectedIndex]->ActivateObject(IID_PPV_ARGS(&mediaSource_)); @@ -181,6 +289,9 @@ bool WebcamCapture::configureReader(int requestedWidth, int requestedHeight, int } bool WebcamCapture::start() { + if (usingDirectShow_) { + return directShowCapture_.start(); + } if (!sourceReader_ || thread_.joinable()) { return false; } @@ -191,6 +302,7 @@ bool WebcamCapture::start() { } void WebcamCapture::stop() { + directShowCapture_.stop(); stopRequested_ = true; if (thread_.joinable()) { thread_.join(); @@ -262,6 +374,9 @@ void WebcamCapture::captureLoop() { } bool WebcamCapture::copyLatestFrame(std::vector& destination, int& width, int& height) { + if (usingDirectShow_) { + return directShowCapture_.copyLatestFrame(destination, width, height); + } std::scoped_lock lock(frameMutex_); if (latestFrame_.empty() || width_ <= 0 || height_ <= 0) { return false; @@ -274,17 +389,29 @@ bool WebcamCapture::copyLatestFrame(std::vector& destination, int& width, } int WebcamCapture::width() const { + if (usingDirectShow_) { + return directShowCapture_.width(); + } return width_; } int WebcamCapture::height() const { + if (usingDirectShow_) { + return directShowCapture_.height(); + } return height_; } int WebcamCapture::fps() const { + if (usingDirectShow_) { + return directShowCapture_.fps(); + } return fps_; } const std::wstring& WebcamCapture::selectedDeviceName() const { + if (usingDirectShow_) { + return directShowCapture_.selectedDeviceName(); + } return selectedDeviceName_; } diff --git a/electron/native/wgc-capture/src/webcam_capture.h b/electron/native/wgc-capture/src/webcam_capture.h index 201db25..c539d02 100644 --- a/electron/native/wgc-capture/src/webcam_capture.h +++ b/electron/native/wgc-capture/src/webcam_capture.h @@ -1,5 +1,7 @@ #pragma once +#include "dshow_webcam_capture.h" + #include #include #include @@ -23,6 +25,7 @@ public: bool initialize( const std::wstring& deviceId, const std::wstring& deviceName, + const std::wstring& directShowClsid, int requestedWidth, int requestedHeight, int requestedFps); @@ -42,6 +45,7 @@ private: Microsoft::WRL::ComPtr mediaSource_; Microsoft::WRL::ComPtr sourceReader_; + DirectShowWebcamCapture directShowCapture_; std::thread thread_; std::atomic stopRequested_ = false; std::mutex frameMutex_; @@ -50,5 +54,7 @@ private: int height_ = 0; int fps_ = 30; bool mfStarted_ = false; + bool usingDirectShow_ = false; + int selectedMatchScore_ = 0; std::wstring selectedDeviceName_; }; diff --git a/scripts/test-windows-wgc-helper.mjs b/scripts/test-windows-wgc-helper.mjs index 3f9fb93..65b192e 100644 --- a/scripts/test-windows-wgc-helper.mjs +++ b/scripts/test-windows-wgc-helper.mjs @@ -105,6 +105,67 @@ function startFixtureWindow() { }); } +function normalizeDeviceName(value) { + return value + .toLowerCase() + .replace(/[^a-z0-9]+/g, " ") + .trim(); +} + +function scoreDeviceName(candidateName, candidateId, requestedName) { + const candidate = normalizeDeviceName(candidateName ?? ""); + const id = normalizeDeviceName(candidateId ?? ""); + const requested = normalizeDeviceName(requestedName ?? ""); + if (!requested) return 0; + if (candidate === requested) return 1000; + if (candidate.includes(requested) || requested.includes(candidate)) return 900; + if (id.includes(requested) || requested.includes(id)) return 800; + return requested + .split(/\s+/) + .filter((word) => word.length > 1 && !["camera", "webcam", "video", "input"].includes(word)) + .reduce((score, word) => { + if (candidate.includes(word)) return score + 100; + if (id.includes(word)) return score + 50; + return score; + }, 0); +} + +function resolveDirectShowWebcamClsid(requestedName) { + if (!requestedName) return ""; + const query = spawnSync( + "reg.exe", + ["query", "HKCR\\CLSID\\{860BB310-5D01-11D0-BD3B-00A0C911CE86}\\Instance", "/s"], + { encoding: "utf8", windowsHide: true }, + ); + if (query.status !== 0) return ""; + const entries = []; + let current = {}; + for (const rawLine of query.stdout.split(/\r?\n/)) { + const line = rawLine.trim(); + if (!line) continue; + if (/^HKEY_/i.test(line)) { + if (current.friendlyName || current.clsid) entries.push(current); + current = {}; + continue; + } + const match = line.match(/^(\S+)\s+REG_SZ\s+(.+)$/); + if (!match) continue; + if (match[1] === "FriendlyName") current.friendlyName = match[2].trim(); + if (match[1] === "CLSID") current.clsid = match[2].trim(); + } + if (current.friendlyName || current.clsid) entries.push(current); + + let best = null; + for (const entry of entries) { + if (!entry.clsid) continue; + const score = scoreDeviceName(entry.friendlyName, entry.clsid, requestedName); + if (!best || score > best.score) { + best = { ...entry, score }; + } + } + return best && best.score > 0 ? best.clsid : ""; +} + function probeStreams(outputPath) { const ffprobe = spawnSync( "ffprobe", @@ -191,6 +252,9 @@ const config = { webcamEnabled: WITH_WEBCAM, webcamDeviceId: process.env.OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_ID ?? "", webcamDeviceName: process.env.OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_NAME ?? "", + webcamDirectShowClsid: resolveDirectShowWebcamClsid( + process.env.OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_NAME ?? "", + ), webcamWidth: 640, webcamHeight: 360, webcamFps: 30, @@ -224,6 +288,13 @@ if (!fs.existsSync(outputPath) || fs.statSync(outputPath).size === 0) { const streams = probeStreams(outputPath); const hasVideo = streams.some((stream) => stream.codec_type === "video"); const hasAudio = streams.some((stream) => stream.codec_type === "audio"); +const webcamFormatLine = result.stdout + .split(/\r?\n/) + .find((line) => line.includes('"event":"webcam-format"')); +const webcamFormat = webcamFormatLine ? JSON.parse(webcamFormatLine) : null; +const nativeWebcamDiagnostics = result.stderr + .split(/\r?\n/) + .filter((line) => line.includes("Native webcam candidate")); if (!hasVideo) { throw new Error(`WGC helper output has no video stream: ${outputPath}`); } @@ -249,6 +320,8 @@ console.log( codecName: stream.codec_name, duration: stream.duration, })), + selectedWebcamDeviceName: webcamFormat?.deviceName, + nativeWebcamDiagnostics, firstFrameLuma: frameLuma, }, null, diff --git a/src/lib/nativeWindowsRecording.ts b/src/lib/nativeWindowsRecording.ts index 59de0be..ee8ecc8 100644 --- a/src/lib/nativeWindowsRecording.ts +++ b/src/lib/nativeWindowsRecording.ts @@ -27,6 +27,7 @@ export type NativeWindowsRecordingRequest = { enabled: boolean; deviceId?: string; deviceName?: string; + directShowClsid?: string; width: number; height: number; fps: number; From 38d727eb8e51275bb10ddca590f8709d02c50a12 Mon Sep 17 00:00:00 2001 From: EtienneLescot Date: Tue, 5 May 2026 18:51:08 +0200 Subject: [PATCH 18/43] fix: skip black webcam warmup frames --- .../windows-native-recorder-roadmap.md | 1 + docs/testing/windows-native-cursor.md | 6 +- electron/native/wgc-capture/src/main.cpp | 57 +++++++++++++++++-- 3 files changed, 57 insertions(+), 7 deletions(-) diff --git a/docs/engineering/windows-native-recorder-roadmap.md b/docs/engineering/windows-native-recorder-roadmap.md index 5129153..29986db 100644 --- a/docs/engineering/windows-native-recorder-roadmap.md +++ b/docs/engineering/windows-native-recorder-roadmap.md @@ -158,6 +158,7 @@ Acceptance: - Add Media Foundation webcam source reader. - Select requested dimensions/fps or the nearest format accepted by Media Foundation. - Convert webcam samples to BGRA and compose them into the primary helper MP4 as an initial bottom-right picture-in-picture overlay. +- Ignore black webcam warmup frames and keep the overlay hidden until the first visible frame is available, so virtual cameras do not flash a black picture-in-picture rectangle at recording start. - Keep the helper process as the SSOT for screen/window, WASAPI system audio, microphone, webcam, and mux timing. - Match the requested webcam through Media Foundation friendly names first, then browser device ids/symbolic links, so UI selection remains stable across Chromium and Windows native device namespaces. - Use the Electron-resolved DirectShow CLSID when the selected virtual camera, for example NVIDIA Broadcast, is registered for DirectShow but absent from Media Foundation enumeration. diff --git a/docs/testing/windows-native-cursor.md b/docs/testing/windows-native-cursor.md index 23c57a9..4c7da94 100644 --- a/docs/testing/windows-native-cursor.md +++ b/docs/testing/windows-native-cursor.md @@ -93,7 +93,7 @@ Current native availability rules: - Windows 10 build 19041 or newer - a helper executable is available -The helper currently implements display video capture and system audio loopback. Window capture, microphone audio, and webcam capture are part of the native recorder roadmap and fail explicitly instead of silently falling back to Electron capture on Windows. +The helper currently implements display/window video capture, system audio loopback, default microphone capture, Media Foundation webcam capture, and DirectShow fallback for selected virtual cameras such as NVIDIA Broadcast. Webcam frames are composed into the primary MP4 as a bottom-right picture-in-picture overlay, and black webcam warmup frames are ignored until the first visible frame is available. Build OpenScreen's helper locally: @@ -105,7 +105,11 @@ Smoke-test the helper directly: ```powershell npm run test:wgc-helper:win +npm run test:wgc-window:win npm run test:wgc-audio:win +npm run test:wgc-mic:win +npm run test:wgc-mixed-audio:win +npm run test:wgc-webcam:win ``` For local diagnostics with another compatible helper, point OpenScreen at that executable: diff --git a/electron/native/wgc-capture/src/main.cpp b/electron/native/wgc-capture/src/main.cpp index c58f092..6543d83 100644 --- a/electron/native/wgc-capture/src/main.cpp +++ b/electron/native/wgc-capture/src/main.cpp @@ -97,6 +97,31 @@ std::string jsonEscape(const std::string& value) { return result; } +bool hasVisibleBgraContent(const std::vector& frame) { + if (frame.size() < 4) { + return false; + } + + uint64_t lumaTotal = 0; + BYTE maxLuma = 0; + const size_t pixelCount = frame.size() / 4; + const size_t step = std::max(1, pixelCount / 4096); + size_t sampledPixels = 0; + for (size_t pixel = 0; pixel < pixelCount; pixel += step) { + const size_t offset = pixel * 4; + const BYTE b = frame[offset + 0]; + const BYTE g = frame[offset + 1]; + const BYTE r = frame[offset + 2]; + const BYTE luma = static_cast((static_cast(r) * 54 + static_cast(g) * 183 + static_cast(b) * 19) >> 8); + lumaTotal += luma; + maxLuma = std::max(maxLuma, luma); + sampledPixels += 1; + } + + const uint64_t averageLuma = sampledPixels > 0 ? lumaTotal / sampledPixels : 0; + return maxLuma > 24 || averageLuma > 4; +} + bool findBool(const std::string& json, const std::string& key, bool fallback) { auto pos = json.find("\"" + key + "\""); if (pos == std::string::npos) { @@ -432,6 +457,7 @@ int main(int argc, char* argv[]) { std::vector latestWebcamFrame; int latestWebcamWidth = 0; int latestWebcamHeight = 0; + bool hasVisibleWebcamFrame = false; session.setFrameCallback([&](ID3D11Texture2D* texture, int64_t timestampHns) { (void)timestampHns; @@ -468,10 +494,19 @@ int main(int argc, char* argv[]) { { std::scoped_lock lock(mutex); if (webcamActive) { - webcamCapture.copyLatestFrame(latestWebcamFrame, latestWebcamWidth, latestWebcamHeight); + std::vector candidateWebcamFrame; + int candidateWebcamWidth = 0; + int candidateWebcamHeight = 0; + if (webcamCapture.copyLatestFrame(candidateWebcamFrame, candidateWebcamWidth, candidateWebcamHeight) && + hasVisibleBgraContent(candidateWebcamFrame)) { + latestWebcamFrame = std::move(candidateWebcamFrame); + latestWebcamWidth = candidateWebcamWidth; + latestWebcamHeight = candidateWebcamHeight; + hasVisibleWebcamFrame = true; + } } const BgraFrameView webcamFrame{ - latestWebcamFrame.empty() ? nullptr : latestWebcamFrame.data(), + hasVisibleWebcamFrame && !latestWebcamFrame.empty() ? latestWebcamFrame.data() : nullptr, latestWebcamWidth, latestWebcamHeight, }; @@ -583,12 +618,22 @@ int main(int argc, char* argv[]) { } webcamActive = true; const auto webcamDeadline = std::chrono::steady_clock::now() + std::chrono::seconds(3); - while (std::chrono::steady_clock::now() < webcamDeadline && - !webcamCapture.copyLatestFrame(latestWebcamFrame, latestWebcamWidth, latestWebcamHeight)) { + while (std::chrono::steady_clock::now() < webcamDeadline && !hasVisibleWebcamFrame) { + std::vector candidateWebcamFrame; + int candidateWebcamWidth = 0; + int candidateWebcamHeight = 0; + if (webcamCapture.copyLatestFrame(candidateWebcamFrame, candidateWebcamWidth, candidateWebcamHeight) && + hasVisibleBgraContent(candidateWebcamFrame)) { + latestWebcamFrame = std::move(candidateWebcamFrame); + latestWebcamWidth = candidateWebcamWidth; + latestWebcamHeight = candidateWebcamHeight; + hasVisibleWebcamFrame = true; + break; + } std::this_thread::sleep_for(std::chrono::milliseconds(20)); } - if (latestWebcamFrame.empty()) { - std::cerr << "WARNING: Native webcam started but no frame was available before screen capture" + if (!hasVisibleWebcamFrame) { + std::cerr << "WARNING: Native webcam started but no visible frame was available before screen capture" << std::endl; } } From c0deb0341498299f123b3308849f4a36ff7f165f Mon Sep 17 00:00:00 2001 From: EtienneLescot Date: Tue, 5 May 2026 19:24:32 +0200 Subject: [PATCH 19/43] fix: gate Windows cursor settings --- .../windowsNativeRecordingSession.script.ts | 66 +++++++++++++++++- scripts/test-windows-native-cursor.mjs | 67 ++++++++++++++++++- src/components/video-editor/VideoEditor.tsx | 26 +++++++ src/lib/cursor/nativeCursor.ts | 36 +++++++++- src/native/contracts.ts | 2 + 5 files changed, 192 insertions(+), 5 deletions(-) diff --git a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts index 2ad9bbe..f97105e 100644 --- a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts +++ b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts @@ -111,6 +111,62 @@ function Write-JsonLine($payload) { [Console]::Out.WriteLine(($payload | ConvertTo-Json -Compress -Depth 6)) } +function Get-CustomCursorType($bitmap, $hotspotX, $hotspotY) { + if ($bitmap.Width -lt 24 -or $bitmap.Height -lt 24 -or $bitmap.Width -gt 64 -or $bitmap.Height -gt 64) { + return $null + } + + if ($hotspotX -lt ($bitmap.Width * 0.25) -or $hotspotX -gt ($bitmap.Width * 0.75) -or + $hotspotY -lt ($bitmap.Height * 0.15) -or $hotspotY -gt ($bitmap.Height * 0.55)) { + return $null + } + + $opaquePixels = 0 + $topHalfOpaquePixels = 0 + $left = $bitmap.Width + $top = $bitmap.Height + $right = -1 + $bottom = -1 + + for ($y = 0; $y -lt $bitmap.Height; $y++) { + for ($x = 0; $x -lt $bitmap.Width; $x++) { + if ($bitmap.GetPixel($x, $y).A -le 32) { + continue + } + + $opaquePixels += 1 + if ($y -lt ($bitmap.Height / 2)) { + $topHalfOpaquePixels += 1 + } + if ($x -lt $left) { $left = $x } + if ($x -gt $right) { $right = $x } + if ($y -lt $top) { $top = $y } + if ($y -gt $bottom) { $bottom = $y } + } + } + + if ($opaquePixels -lt 90 -or $right -lt $left -or $bottom -lt $top) { + return $null + } + + $opaqueWidth = $right - $left + 1 + $opaqueHeight = $bottom - $top + 1 + if ($opaqueWidth -lt ($bitmap.Width * 0.35) -or $opaqueWidth -gt ($bitmap.Width * 0.9) -or + $opaqueHeight -lt ($bitmap.Height * 0.45) -or $opaqueHeight -gt $bitmap.Height) { + return $null + } + + if ($top -gt ($bitmap.Height * 0.45) -or $bottom -lt ($bitmap.Height * 0.65)) { + return $null + } + + if ($topHalfOpaquePixels -gt ($opaquePixels * 0.55)) { + return 'closed-hand' + } + + return 'open-hand' +} + function Get-TargetBounds() { if ([string]::IsNullOrWhiteSpace($targetWindowHandle)) { return $null @@ -164,6 +220,9 @@ function Get-CursorAsset($cursorHandle, $cursorId) { try { $graphics.Clear([System.Drawing.Color]::Transparent) $graphics.DrawIcon($icon, 0, 0) + $hotspotX = if ($hasIconInfo) { $iconInfo.xHotspot } else { 0 } + $hotspotY = if ($hasIconInfo) { $iconInfo.yHotspot } else { 0 } + $customCursorType = Get-CustomCursorType -bitmap $bitmap -hotspotX $hotspotX -hotspotY $hotspotY $bitmap.Save($memoryStream, [System.Drawing.Imaging.ImageFormat]::Png) $base64 = [System.Convert]::ToBase64String($memoryStream.ToArray()) @@ -172,8 +231,9 @@ function Get-CursorAsset($cursorHandle, $cursorId) { imageDataUrl = "data:image/png;base64,$base64" width = $bitmap.Width height = $bitmap.Height - hotspotX = if ($hasIconInfo) { $iconInfo.xHotspot } else { 0 } - hotspotY = if ($hasIconInfo) { $iconInfo.yHotspot } else { 0 } + hotspotX = $hotspotX + hotspotY = $hotspotY + cursorType = $customCursorType } } finally { @@ -218,6 +278,8 @@ while ($true) { $asset = Get-CursorAsset -cursorHandle $cursorInfo.hCursor -cursorId $cursorId if ($asset -and $cursorType) { $asset.cursorType = $cursorType + } elseif ($asset -and $asset.cursorType) { + $cursorType = $asset.cursorType } $lastCursorId = $cursorId } diff --git a/scripts/test-windows-native-cursor.mjs b/scripts/test-windows-native-cursor.mjs index 2a8b34c..7d7ea45 100644 --- a/scripts/test-windows-native-cursor.mjs +++ b/scripts/test-windows-native-cursor.mjs @@ -195,6 +195,62 @@ function Write-JsonLine($payload) { [Console]::Out.WriteLine(($payload | ConvertTo-Json -Compress -Depth 6)) } +function Get-CustomCursorType($bitmap, $hotspotX, $hotspotY) { + if ($bitmap.Width -lt 24 -or $bitmap.Height -lt 24 -or $bitmap.Width -gt 64 -or $bitmap.Height -gt 64) { + return $null + } + + if ($hotspotX -lt ($bitmap.Width * 0.25) -or $hotspotX -gt ($bitmap.Width * 0.75) -or + $hotspotY -lt ($bitmap.Height * 0.15) -or $hotspotY -gt ($bitmap.Height * 0.55)) { + return $null + } + + $opaquePixels = 0 + $topHalfOpaquePixels = 0 + $left = $bitmap.Width + $top = $bitmap.Height + $right = -1 + $bottom = -1 + + for ($y = 0; $y -lt $bitmap.Height; $y++) { + for ($x = 0; $x -lt $bitmap.Width; $x++) { + if ($bitmap.GetPixel($x, $y).A -le 32) { + continue + } + + $opaquePixels += 1 + if ($y -lt ($bitmap.Height / 2)) { + $topHalfOpaquePixels += 1 + } + if ($x -lt $left) { $left = $x } + if ($x -gt $right) { $right = $x } + if ($y -lt $top) { $top = $y } + if ($y -gt $bottom) { $bottom = $y } + } + } + + if ($opaquePixels -lt 90 -or $right -lt $left -or $bottom -lt $top) { + return $null + } + + $opaqueWidth = $right - $left + 1 + $opaqueHeight = $bottom - $top + 1 + if ($opaqueWidth -lt ($bitmap.Width * 0.35) -or $opaqueWidth -gt ($bitmap.Width * 0.9) -or + $opaqueHeight -lt ($bitmap.Height * 0.45) -or $opaqueHeight -gt $bitmap.Height) { + return $null + } + + if ($top -gt ($bitmap.Height * 0.45) -or $bottom -lt ($bitmap.Height * 0.65)) { + return $null + } + + if ($topHalfOpaquePixels -gt ($opaquePixels * 0.55)) { + return 'closed-hand' + } + + return 'open-hand' +} + function Get-CursorAsset($cursorHandle, $cursorId) { $copiedHandle = [OpenScreenCursorDiagnosticInterop]::CopyIcon($cursorHandle) if ($copiedHandle -eq [IntPtr]::Zero) { @@ -213,6 +269,9 @@ function Get-CursorAsset($cursorHandle, $cursorId) { try { $graphics.Clear([System.Drawing.Color]::Transparent) $graphics.DrawIcon($icon, 0, 0) + $hotspotX = if ($hasIconInfo) { $iconInfo.xHotspot } else { 0 } + $hotspotY = if ($hasIconInfo) { $iconInfo.yHotspot } else { 0 } + $customCursorType = Get-CustomCursorType -bitmap $bitmap -hotspotX $hotspotX -hotspotY $hotspotY $bitmap.Save($memoryStream, [System.Drawing.Imaging.ImageFormat]::Png) $base64 = [System.Convert]::ToBase64String($memoryStream.ToArray()) @@ -221,8 +280,9 @@ function Get-CursorAsset($cursorHandle, $cursorId) { imageDataUrl = "data:image/png;base64,$base64" width = $bitmap.Width height = $bitmap.Height - hotspotX = if ($hasIconInfo) { $iconInfo.xHotspot } else { 0 } - hotspotY = if ($hasIconInfo) { $iconInfo.yHotspot } else { 0 } + hotspotX = $hotspotX + hotspotY = $hotspotY + cursorType = $customCursorType } } finally { @@ -268,6 +328,8 @@ while ($true) { $asset = Get-CursorAsset -cursorHandle $cursorInfo.hCursor -cursorId $cursorId if ($asset -and $cursorType) { $asset.cursorType = $cursorType + } elseif ($asset -and $asset.cursorType) { + $cursorType = $asset.cursorType } $lastCursorId = $cursorId } @@ -1068,6 +1130,7 @@ const report = { height: asset.height, hotspotX: asset.hotspotX, hotspotY: asset.hotspotY, + cursorType: asset.cursorType ?? null, })), }; const recordingData = toRecordingData(samples, assets); diff --git a/src/components/video-editor/VideoEditor.tsx b/src/components/video-editor/VideoEditor.tsx index 5394eee..90390d2 100644 --- a/src/components/video-editor/VideoEditor.tsx +++ b/src/components/video-editor/VideoEditor.tsx @@ -39,6 +39,7 @@ import { } from "@/lib/userPreferences"; import { BackgroundLoadError } from "@/lib/wallpaper"; import { nativeBridgeClient, useCursorRecordingData, useCursorTelemetry } from "@/native"; +import type { NativePlatform } from "@/native/contracts"; import { getAspectRatioValue, getNativeAspectRatioValue, @@ -164,6 +165,7 @@ export default function VideoEditor() { const [cursorSmoothing, setCursorSmoothing] = useState(DEFAULT_CURSOR_SMOOTHING); const [cursorMotionBlur, setCursorMotionBlur] = useState(DEFAULT_CURSOR_MOTION_BLUR); const [cursorClickBounce, setCursorClickBounce] = useState(DEFAULT_CURSOR_CLICK_BOUNCE); + const [nativePlatform, setNativePlatform] = useState(null); const videoPlaybackRef = useRef(null); @@ -172,6 +174,7 @@ export default function VideoEditor() { const nextSpeedIdRef = useRef(1); const { shortcuts, isMac } = useShortcuts(); + const showCursorSettings = nativePlatform === "win32"; // Off-Mac doesn't have click telemetry, so force `onlyOnClicks` off for // renderers while keeping the persisted value intact for round-tripping. const effectiveCursorHighlight = useMemo( @@ -631,6 +634,27 @@ export default function VideoEditor() { }; }, [handleLoadProject, handleSaveProject, handleSaveProjectAs]); + useEffect(() => { + let canceled = false; + nativeBridgeClient.system + .getPlatform() + .then((platform) => { + if (!canceled) { + setNativePlatform(platform); + } + }) + .catch((error) => { + console.warn("Unable to resolve native platform for cursor settings:", error); + if (!canceled) { + setNativePlatform(null); + } + }); + + return () => { + canceled = true; + }; + }, []); + useEffect(() => { if (cursorTelemetryError) { console.warn("Unable to load cursor telemetry:", cursorTelemetryError); @@ -1718,6 +1742,8 @@ export default function VideoEditor() { cursorTelemetry, cursorClickTimestamps, effectiveCursorHighlight, + showCursor, + cursorSize, t, ], ); diff --git a/src/lib/cursor/nativeCursor.ts b/src/lib/cursor/nativeCursor.ts index 04ebccd..6f82e0b 100644 --- a/src/lib/cursor/nativeCursor.ts +++ b/src/lib/cursor/nativeCursor.ts @@ -2,6 +2,8 @@ import { type Container, Point } from "pixi.js"; import appStartingUrl from "@/assets/cursors/Cursor=App-Starting.svg"; import crosshairUrl from "@/assets/cursors/Cursor=Cross.svg"; import arrowUrl from "@/assets/cursors/Cursor=Default.svg"; +import closedHandUrl from "@/assets/cursors/Cursor=Hand-(Grabbing).svg"; +import openHandUrl from "@/assets/cursors/Cursor=Hand-(Open).svg"; import pointerUrl from "@/assets/cursors/Cursor=Hand-(Pointing).svg"; import helpUrl from "@/assets/cursors/Cursor=Help.svg"; import moveUrl from "@/assets/cursors/Cursor=Move.svg"; @@ -78,6 +80,20 @@ const PRETTY_NATIVE_CURSOR_ASSETS: Partial 64 || asset.height < 24 || asset.height > 64) { + return null; + } + + const hotspotXNorm = asset.hotspotX / asset.width; + const hotspotYNorm = asset.hotspotY / asset.height; + const looksLikeChromiumGrabCursor = + hotspotXNorm >= 0.22 && + hotspotXNorm <= 0.55 && + hotspotYNorm >= 0.2 && + hotspotYNorm <= 0.45; + + return looksLikeChromiumGrabCursor ? (PRETTY_NATIVE_CURSOR_ASSETS["open-hand"] ?? null) : null; +} + export function hasNativeCursorRecordingData( recordingData: CursorRecordingData | null | undefined, ): recordingData is CursorRecordingData { @@ -322,7 +354,9 @@ export function resolvePrettyNativeCursorAsset( sample?: CursorRecordingSample, ) { const cursorType = sample?.cursorType ?? asset.cursorType ?? null; - return cursorType ? (PRETTY_NATIVE_CURSOR_ASSETS[cursorType] ?? null) : null; + return cursorType + ? (PRETTY_NATIVE_CURSOR_ASSETS[cursorType] ?? null) + : resolveUntypedPrettyNativeCursorAsset(asset); } export function resolveNativeCursorRenderAsset( diff --git a/src/native/contracts.ts b/src/native/contracts.ts index a3c9087..ef45336 100644 --- a/src/native/contracts.ts +++ b/src/native/contracts.ts @@ -8,6 +8,8 @@ export type NativeCursorType = | "text" | "pointer" | "crosshair" + | "open-hand" + | "closed-hand" | "resize-ew" | "resize-ns" | "resize-nesw" From 0ebf5c143b45e1336871a05462f42445a39c9700 Mon Sep 17 00:00:00 2001 From: EtienneLescot Date: Tue, 5 May 2026 20:10:09 +0200 Subject: [PATCH 20/43] test: add Windows native checklist smoke test --- package.json | 1 + src/components/launch/LaunchWindow.tsx | 6 + src/components/launch/SourceSelector.tsx | 2 + tests/e2e/windows-native-checklist.spec.ts | 306 +++++++++++++++++++++ 4 files changed, 315 insertions(+) create mode 100644 tests/e2e/windows-native-checklist.spec.ts diff --git a/package.json b/package.json index 0d64e14..dd65ebe 100644 --- a/package.json +++ b/package.json @@ -39,6 +39,7 @@ "test:browser": "vitest --config vitest.browser.config.ts --run", "test:browser:install": "playwright install --with-deps chromium-headless-shell", "test:e2e": "playwright test", + "test:e2e:windows-native-checklist": "playwright test tests/e2e/windows-native-checklist.spec.ts", "prepare": "husky", "rebuild:native": "node ./scripts/rebuild-native.mjs", "postinstall": "npm run rebuild:native" diff --git a/src/components/launch/LaunchWindow.tsx b/src/components/launch/LaunchWindow.tsx index e4c23d7..992ec6b 100644 --- a/src/components/launch/LaunchWindow.tsx +++ b/src/components/launch/LaunchWindow.tsx @@ -537,6 +537,7 @@ export function LaunchWindow() { {/* Audio controls group */}