From 048189da725c1b01468fe320944cafc7c75a00da Mon Sep 17 00:00:00 2001 From: EtienneLescot Date: Tue, 5 May 2026 17:23:49 +0200 Subject: [PATCH] feat: add native Windows window capture --- .../windows-native-recorder-roadmap.md | 2 + .../windowsNativeRecordingSession.script.ts | 13 -- .../windowsNativeRecordingSession.ts | 6 +- electron/native/README.md | 4 +- electron/native/wgc-capture/src/main.cpp | 132 +++++++++++++++--- .../native/wgc-capture/src/wgc_session.cpp | 59 ++++++++ electron/native/wgc-capture/src/wgc_session.h | 2 + package.json | 1 + scripts/test-windows-wgc-helper.mjs | 64 ++++++++- src/hooks/useScreenRecorder.ts | 7 +- src/lib/nativeWindowsRecording.ts | 13 ++ 11 files changed, 259 insertions(+), 44 deletions(-) diff --git a/docs/engineering/windows-native-recorder-roadmap.md b/docs/engineering/windows-native-recorder-roadmap.md index 12c6d49..ac309f9 100644 --- a/docs/engineering/windows-native-recorder-roadmap.md +++ b/docs/engineering/windows-native-recorder-roadmap.md @@ -165,6 +165,8 @@ Acceptance: ### 5. Native Window Capture +Status: initial implementation in progress. Electron parses the `window::...` desktop source id through the shared native Windows recording contract and passes `windowHandle` to the helper. The helper resolves the `HWND`, validates it with `IsWindow`, and creates the WGC item with `CreateForWindow(HWND)`. Resize/minimize/move hardening and protected-window diagnostics remain follow-up work. + - Resolve Electron `window:*` selections to an `HWND`. - Use WGC `CreateForWindow(HWND)`. - Handle window close, minimize, resize, DPI scaling, and monitor moves. diff --git a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts index 5607134..2ad9bbe 100644 --- a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts +++ b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.script.ts @@ -1,16 +1,3 @@ -export function parseWindowHandleFromSourceId(sourceId?: string | null) { - if (!sourceId?.startsWith("window:")) { - return null; - } - - const handlePart = sourceId.split(":")[1]; - if (!handlePart || !/^\d+$/.test(handlePart)) { - return null; - } - - return handlePart; -} - export function buildPowerShellCommand(sampleIntervalMs: number, windowHandle?: string | null) { const script = String.raw` $ErrorActionPreference = 'Stop' diff --git a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts index 8075fe3..6edee5a 100644 --- a/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts +++ b/electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts @@ -1,16 +1,14 @@ import { type ChildProcessByStdio, spawn } from "node:child_process"; import type { Readable } from "node:stream"; import { screen } from "electron"; +import { parseWindowHandleFromSourceId } from "../../../../src/lib/nativeWindowsRecording"; import type { CursorRecordingData, CursorRecordingSample, NativeCursorAsset, } from "../../../../src/native/contracts"; import type { CursorRecordingSession } from "./session"; -import { - buildPowerShellCommand, - parseWindowHandleFromSourceId, -} from "./windowsNativeRecordingSession.script"; +import { buildPowerShellCommand } from "./windowsNativeRecordingSession.script"; import type { WindowsCursorEvent, WindowsNativeRecordingSessionOptions, diff --git a/electron/native/README.md b/electron/native/README.md index 5df7290..b366a1b 100644 --- a/electron/native/README.md +++ b/electron/native/README.md @@ -26,6 +26,7 @@ Current V2 JSON shape: "sourceType": "display", "sourceId": "screen:0:0", "displayId": 1, + "windowHandle": null, "outputPath": "C:\\path\\recording-123.mp4", "videoWidth": 1920, "videoHeight": 1080, @@ -42,12 +43,13 @@ Current V2 JSON shape: } ``` -The current helper implementation supports display video capture, system audio loopback, and initial default-microphone capture. Webcam and window capture now fail explicitly in the helper rather than silently falling back to Electron capture on Windows. See `docs/engineering/windows-native-recorder-roadmap.md` for the phased implementation plan. +The current helper implementation supports display/window video capture, system audio loopback, and initial default-microphone capture. Webcam capture now fails explicitly in the helper rather than silently falling back to Electron capture on Windows. See `docs/engineering/windows-native-recorder-roadmap.md` for the phased implementation plan. Smoke-test the helper with: ```powershell npm run test:wgc-helper:win +npm run test:wgc-window:win npm run test:wgc-audio:win npm run test:wgc-mic:win npm run test:wgc-mixed-audio:win diff --git a/electron/native/wgc-capture/src/main.cpp b/electron/native/wgc-capture/src/main.cpp index 50e55ef..86f032e 100644 --- a/electron/native/wgc-capture/src/main.cpp +++ b/electron/native/wgc-capture/src/main.cpp @@ -201,6 +201,36 @@ std::string findString(const std::string& json, const std::string& key) { return result; } +std::string parseWindowHandleFromSourceId(const std::string& sourceId) { + constexpr char prefix[] = "window:"; + if (sourceId.rfind(prefix, 0) != 0) { + return {}; + } + + const size_t start = sizeof(prefix) - 1; + const size_t end = sourceId.find(':', start); + const std::string handle = sourceId.substr(start, end == std::string::npos ? std::string::npos : end - start); + return handle.empty() ? std::string{} : handle; +} + +HWND parseWindowHandle(const std::string& value) { + if (value.empty()) { + return nullptr; + } + + try { + size_t parsed = 0; + const int base = value.rfind("0x", 0) == 0 || value.rfind("0X", 0) == 0 ? 16 : 10; + const uint64_t handleValue = std::stoull(value, &parsed, base); + if (parsed != value.size() || handleValue == 0) { + return nullptr; + } + return reinterpret_cast(static_cast(handleValue)); + } catch (...) { + return nullptr; + } +} + bool parseConfig(const std::string& json, CaptureConfig& config) { config.schemaVersion = findInt(json, "schemaVersion", 1); config.outputPath = findString(json, "screenPath"); @@ -218,6 +248,9 @@ bool parseConfig(const std::string& json, CaptureConfig& config) { } config.sourceId = findString(json, "sourceId"); config.windowHandle = findString(json, "windowHandle"); + if (config.windowHandle.empty()) { + config.windowHandle = parseWindowHandleFromSourceId(config.sourceId); + } config.displayId = findInt64(json, "displayId", 0); config.fps = std::clamp(findInt(json, "fps", 60), 1, 120); config.width = findInt(json, "videoWidth", findInt(json, "width", 0)); @@ -270,27 +303,36 @@ int main(int argc, char* argv[]) { std::cout << "{\"event\":\"ready\",\"schemaVersion\":2}" << std::endl; - if (config.sourceType != "display") { - std::cerr << "ERROR: Native window capture is not implemented yet" << std::endl; - return 1; - } - if (config.webcamEnabled) { std::cerr << "ERROR: Native webcam capture is not implemented in this helper yet" << std::endl; return 1; } - HMONITOR monitor = findMonitorForCapture( - config.displayId, - config.hasDisplayBounds ? &config.bounds : nullptr); - if (!monitor) { - std::cerr << "ERROR: Could not resolve monitor" << std::endl; - return 1; - } - WgcSession session; - if (!session.initialize(monitor, config.fps)) { - std::cerr << "ERROR: Failed to initialize WGC session" << std::endl; + if (config.sourceType == "display") { + HMONITOR monitor = findMonitorForCapture( + config.displayId, + config.hasDisplayBounds ? &config.bounds : nullptr); + if (!monitor) { + std::cerr << "ERROR: Could not resolve monitor" << std::endl; + return 1; + } + if (!session.initialize(monitor, config.fps)) { + std::cerr << "ERROR: Failed to initialize WGC display session" << std::endl; + return 1; + } + } else if (config.sourceType == "window") { + HWND window = parseWindowHandle(config.windowHandle); + if (!window || !IsWindow(window)) { + std::cerr << "ERROR: Native window capture requires a valid HWND" << std::endl; + return 1; + } + if (!session.initialize(window, config.fps)) { + std::cerr << "ERROR: Failed to initialize WGC window session" << std::endl; + return 1; + } + } else { + std::cerr << "ERROR: Unsupported native capture source type: " << config.sourceType << std::endl; return 1; } @@ -355,24 +397,72 @@ int main(int argc, char* argv[]) { std::atomic stopRequested = false; std::atomic firstFrameWritten = false; std::atomic encodeFailed = false; + Microsoft::WRL::ComPtr latestFrameTexture; session.setFrameCallback([&](ID3D11Texture2D* texture, int64_t timestampHns) { + (void)timestampHns; if (stopRequested) { return; } std::scoped_lock lock(mutex); - if (!encoder.writeFrame(texture, timestampHns)) { - encodeFailed = true; - stopRequested = true; - cv.notify_all(); - return; + if (!latestFrameTexture) { + D3D11_TEXTURE2D_DESC desc{}; + texture->GetDesc(&desc); + desc.BindFlags = 0; + desc.CPUAccessFlags = 0; + desc.MiscFlags = 0; + if (FAILED(session.device()->CreateTexture2D(&desc, nullptr, &latestFrameTexture))) { + encodeFailed = true; + stopRequested = true; + cv.notify_all(); + return; + } } + + session.context()->CopyResource(latestFrameTexture.Get(), texture); if (!firstFrameWritten.exchange(true)) { cv.notify_all(); } }); + auto writeVideoFrames = [&]() { + const auto startedAt = std::chrono::steady_clock::now(); + uint64_t frameIndex = 0; + + while (!stopRequested && !encodeFailed) { + { + std::scoped_lock lock(mutex); + if (latestFrameTexture && !encoder.writeFrame( + latestFrameTexture.Get(), + static_cast((frameIndex * 10'000'000ULL) / config.fps))) { + encodeFailed = true; + stopRequested = true; + cv.notify_all(); + return; + } + } + + frameIndex += 1; + const auto nextDeadline = startedAt + + std::chrono::duration_cast( + std::chrono::duration(static_cast(frameIndex) / config.fps)); + std::this_thread::sleep_until(nextDeadline); + } + }; + + std::thread videoWriterThread; + + auto stopVideoWriter = [&]() { + if (videoWriterThread.joinable()) { + videoWriterThread.join(); + } + }; + + auto startVideoWriter = [&]() { + videoWriterThread = std::thread(writeVideoFrames); + }; + std::unique_ptr audioMixer; auto startAudioCaptures = [&]() -> bool { if (!audioFormat) { @@ -476,6 +566,7 @@ int main(int argc, char* argv[]) { if (audioMixer) { audioMixer->beginTimeline(); } + startVideoWriter(); std::cout << "{\"event\":\"recording-started\",\"schemaVersion\":2}" << std::endl; std::cout << "Recording started" << std::endl; @@ -492,6 +583,7 @@ int main(int argc, char* argv[]) { if (audioMixer) { audioMixer->stop(); } + stopVideoWriter(); session.stop(); { std::scoped_lock lock(mutex); diff --git a/electron/native/wgc-capture/src/wgc_session.cpp b/electron/native/wgc-capture/src/wgc_session.cpp index c25444e..ab7e9e3 100644 --- a/electron/native/wgc-capture/src/wgc_session.cpp +++ b/electron/native/wgc-capture/src/wgc_session.cpp @@ -120,6 +120,26 @@ bool WgcSession::createCaptureItem(HMONITOR monitor) { return width_ > 0 && height_ > 0; } +bool WgcSession::createCaptureItem(HWND window) { + auto factory = winrt::get_activation_factory(); + auto interop = factory.as(); + + wgcap::GraphicsCaptureItem item{nullptr}; + HRESULT hr = interop->CreateForWindow( + window, + winrt::guid_of(), + reinterpret_cast(winrt::put_abi(item))); + if (!succeeded(hr, "CreateForWindow")) { + return false; + } + + item_ = item; + const auto size = item_.Size(); + width_ = static_cast(size.Width); + height_ = static_cast(size.Height); + return width_ > 0 && height_ > 0; +} + bool WgcSession::initialize(HMONITOR monitor, int fps) { fps_ = fps > 0 ? fps : 60; if (!createD3DDevice()) { @@ -142,6 +162,44 @@ bool WgcSession::initialize(HMONITOR monitor, int fps) { // Older WGC builds can omit this property; callers still overlay their own cursor. } + try { + session_.IsBorderRequired(false); + } catch (...) { + // IsBorderRequired is Windows 11-only. Ignore it on older builds. + } + + frameArrivedToken_ = framePool_.FrameArrived({this, &WgcSession::onFrameArrived}); + return true; +} + +bool WgcSession::initialize(HWND window, int fps) { + fps_ = fps > 0 ? fps : 60; + if (!createD3DDevice()) { + return false; + } + if (!createCaptureItem(window)) { + return false; + } + + framePool_ = wgcap::Direct3D11CaptureFramePool::CreateFreeThreaded( + winrtDevice_, + wgdx::DirectXPixelFormat::B8G8R8A8UIntNormalized, + 2, + item_.Size()); + session_ = framePool_.CreateCaptureSession(item_); + + try { + session_.IsCursorCaptureEnabled(false); + } catch (...) { + // Older WGC builds can omit this property; callers still overlay their own cursor. + } + + try { + session_.IsBorderRequired(false); + } catch (...) { + // IsBorderRequired is Windows 11-only. Ignore it on older builds. + } + frameArrivedToken_ = framePool_.FrameArrived({this, &WgcSession::onFrameArrived}); return true; } @@ -204,6 +262,7 @@ void WgcSession::onFrameArrived( if (callback) { callback(texture.Get(), timeSpanToHns(frame.SystemRelativeTime())); } + frame.Close(); } int WgcSession::captureWidth() const { diff --git a/electron/native/wgc-capture/src/wgc_session.h b/electron/native/wgc-capture/src/wgc_session.h index 8cfb050..34ad3f5 100644 --- a/electron/native/wgc-capture/src/wgc_session.h +++ b/electron/native/wgc-capture/src/wgc_session.h @@ -23,6 +23,7 @@ public: WgcSession& operator=(const WgcSession&) = delete; bool initialize(HMONITOR monitor, int fps); + bool initialize(HWND window, int fps); void setFrameCallback(FrameCallback callback); bool start(); void stop(); @@ -35,6 +36,7 @@ public: private: bool createD3DDevice(); bool createCaptureItem(HMONITOR monitor); + bool createCaptureItem(HWND window); void onFrameArrived( winrt::Windows::Graphics::Capture::Direct3D11CaptureFramePool const& sender, winrt::Windows::Foundation::IInspectable const&); diff --git a/package.json b/package.json index 9114207..8ff2cb5 100644 --- a/package.json +++ b/package.json @@ -28,6 +28,7 @@ "test:watch": "vitest", "test:cursor-native:win": "node scripts/test-windows-native-cursor.mjs", "test:wgc-helper:win": "node scripts/test-windows-wgc-helper.mjs", + "test:wgc-window:win": "node scripts/test-windows-wgc-helper.mjs --window", "test:wgc-audio:win": "node scripts/test-windows-wgc-helper.mjs --system-audio", "test:wgc-mic:win": "node scripts/test-windows-wgc-helper.mjs --microphone", "test:wgc-mixed-audio:win": "node scripts/test-windows-wgc-helper.mjs --system-audio --microphone", diff --git a/scripts/test-windows-wgc-helper.mjs b/scripts/test-windows-wgc-helper.mjs index bb69819..6b5a626 100644 --- a/scripts/test-windows-wgc-helper.mjs +++ b/scripts/test-windows-wgc-helper.mjs @@ -19,6 +19,8 @@ const WITH_MICROPHONE = process.env.OPENSCREEN_WGC_TEST_MICROPHONE === "true" || process.argv.includes("--microphone") || process.argv.includes("--mic"); +const WITH_WINDOW = + process.env.OPENSCREEN_WGC_TEST_WINDOW === "true" || process.argv.includes("--window"); function runHelper(config) { return new Promise((resolve, reject) => { @@ -47,6 +49,47 @@ function runHelper(config) { }); } +function startFixtureWindow() { + return new Promise((resolve, reject) => { + const child = spawn("mspaint.exe", [], { + stdio: ["ignore", "ignore", "ignore"], + windowsHide: false, + }); + + const poll = setInterval(() => { + const lookup = spawnSync( + "powershell", + [ + "-NoProfile", + "-Command", + `(Get-Process -Id ${child.pid} -ErrorAction SilentlyContinue).MainWindowHandle`, + ], + { encoding: "utf8", windowsHide: true }, + ); + const handle = lookup.stdout + .trim() + .split(/\r?\n/) + .find((line) => /^\d+$/.test(line.trim())); + if (handle && handle !== "0") { + clearInterval(poll); + clearTimeout(timer); + resolve({ child, sourceId: `window:${handle.trim()}:0` }); + } + }, 250); + + const timer = setTimeout(() => { + clearInterval(poll); + child.kill(); + reject(new Error("Timed out waiting for fixture window handle")); + }, 10_000); + child.once("error", (error) => { + clearInterval(poll); + clearTimeout(timer); + reject(error); + }); + }); +} + function probeStreams(outputPath) { const ffprobe = spawnSync( "ffprobe", @@ -106,15 +149,17 @@ if (!fs.existsSync(HELPER_PATH)) { const outputPath = path.join( os.tmpdir(), - `openscreen-wgc-helper-${WITH_SYSTEM_AUDIO || WITH_MICROPHONE ? "audio" : "video"}-${process.pid}-${Date.now()}-${randomUUID()}.mp4`, + `openscreen-wgc-helper-${WITH_WINDOW ? "window" : WITH_SYSTEM_AUDIO || WITH_MICROPHONE ? "audio" : "video"}-${process.pid}-${Date.now()}-${randomUUID()}.mp4`, ); +const fixtureWindow = WITH_WINDOW ? await startFixtureWindow() : null; + const config = { schemaVersion: 2, recordingId: Date.now(), outputPath, - sourceType: "display", - sourceId: "screen:0:0", + sourceType: fixtureWindow ? "window" : "display", + sourceId: fixtureWindow ? fixtureWindow.sourceId : "screen:0:0", displayId: 0, fps: 30, videoWidth: 1280, @@ -132,7 +177,14 @@ const config = { outputs: { screenPath: outputPath }, }; -const result = await runHelper(config); +let result; +try { + result = await runHelper(config); +} finally { + if (fixtureWindow) { + fixtureWindow.child.kill(); + } +} if (result.code !== 0) { throw new Error(`WGC helper exited with ${result.code}\n${result.stdout}\n${result.stderr}`); } @@ -151,7 +203,9 @@ if ((WITH_SYSTEM_AUDIO || WITH_MICROPHONE) && !hasAudio) { } const frameLuma = measureFirstFrameLuma(outputPath); if (frameLuma.average < 1 && frameLuma.max < 5) { - throw new Error(`WGC helper output first frame is black: ${outputPath}`); + throw new Error( + `WGC helper output first frame is black: ${outputPath}\n${result.stdout}\n${result.stderr}`, + ); } console.log( diff --git a/src/hooks/useScreenRecorder.ts b/src/hooks/useScreenRecorder.ts index 1ae9d22..88ba90a 100644 --- a/src/hooks/useScreenRecorder.ts +++ b/src/hooks/useScreenRecorder.ts @@ -2,7 +2,10 @@ import { fixWebmDuration } from "@fix-webm-duration/fix"; import { useCallback, useEffect, useRef, useState } from "react"; import { toast } from "sonner"; import { useScopedT } from "@/contexts/I18nContext"; -import type { NativeWindowsRecordingRequest } from "@/lib/nativeWindowsRecording"; +import { + type NativeWindowsRecordingRequest, + parseWindowHandleFromSourceId, +} from "@/lib/nativeWindowsRecording"; import { requestCameraAccess } from "@/lib/requestCameraAccess"; const TARGET_FRAME_RATE = 60; @@ -573,12 +576,14 @@ export function useScreenRecorder(): UseScreenRecorderReturn { const activeRecordingId = Date.now(); const displayId = Number(selectedSource.display_id); const sourceType = selectedSource.id.startsWith("window:") ? "window" : "display"; + const windowHandle = parseWindowHandleFromSourceId(selectedSource.id); const request: NativeWindowsRecordingRequest = { recordingId: activeRecordingId, source: { type: sourceType, sourceId: selectedSource.id, ...(Number.isFinite(displayId) ? { displayId } : {}), + ...(windowHandle ? { windowHandle } : {}), }, video: { fps: TARGET_FRAME_RATE, diff --git a/src/lib/nativeWindowsRecording.ts b/src/lib/nativeWindowsRecording.ts index d30ef17..7e2f0ba 100644 --- a/src/lib/nativeWindowsRecording.ts +++ b/src/lib/nativeWindowsRecording.ts @@ -39,3 +39,16 @@ export type NativeWindowsRecordingStartResult = { helperPath?: string; error?: string; }; + +export function parseWindowHandleFromSourceId(sourceId?: string | null) { + if (!sourceId?.startsWith("window:")) { + return null; + } + + const handlePart = sourceId.split(":")[1]; + if (!handlePart || !/^\d+$/.test(handlePart)) { + return null; + } + + return handlePart; +}