feat: add native Windows window capture

This commit is contained in:
EtienneLescot
2026-05-05 17:23:49 +02:00
parent 7929aea908
commit 048189da72
11 changed files with 259 additions and 44 deletions
@@ -165,6 +165,8 @@ Acceptance:
### 5. Native Window Capture
Status: initial implementation in progress. Electron parses the `window:<HWND>:...` desktop source id through the shared native Windows recording contract and passes `windowHandle` to the helper. The helper resolves the `HWND`, validates it with `IsWindow`, and creates the WGC item with `CreateForWindow(HWND)`. Resize/minimize/move hardening and protected-window diagnostics remain follow-up work.
- Resolve Electron `window:*` selections to an `HWND`.
- Use WGC `CreateForWindow(HWND)`.
- Handle window close, minimize, resize, DPI scaling, and monitor moves.
@@ -1,16 +1,3 @@
export function parseWindowHandleFromSourceId(sourceId?: string | null) {
if (!sourceId?.startsWith("window:")) {
return null;
}
const handlePart = sourceId.split(":")[1];
if (!handlePart || !/^\d+$/.test(handlePart)) {
return null;
}
return handlePart;
}
export function buildPowerShellCommand(sampleIntervalMs: number, windowHandle?: string | null) {
const script = String.raw`
$ErrorActionPreference = 'Stop'
@@ -1,16 +1,14 @@
import { type ChildProcessByStdio, spawn } from "node:child_process";
import type { Readable } from "node:stream";
import { screen } from "electron";
import { parseWindowHandleFromSourceId } from "../../../../src/lib/nativeWindowsRecording";
import type {
CursorRecordingData,
CursorRecordingSample,
NativeCursorAsset,
} from "../../../../src/native/contracts";
import type { CursorRecordingSession } from "./session";
import {
buildPowerShellCommand,
parseWindowHandleFromSourceId,
} from "./windowsNativeRecordingSession.script";
import { buildPowerShellCommand } from "./windowsNativeRecordingSession.script";
import type {
WindowsCursorEvent,
WindowsNativeRecordingSessionOptions,
+3 -1
View File
@@ -26,6 +26,7 @@ Current V2 JSON shape:
"sourceType": "display",
"sourceId": "screen:0:0",
"displayId": 1,
"windowHandle": null,
"outputPath": "C:\\path\\recording-123.mp4",
"videoWidth": 1920,
"videoHeight": 1080,
@@ -42,12 +43,13 @@ Current V2 JSON shape:
}
```
The current helper implementation supports display video capture, system audio loopback, and initial default-microphone capture. Webcam and window capture now fail explicitly in the helper rather than silently falling back to Electron capture on Windows. See `docs/engineering/windows-native-recorder-roadmap.md` for the phased implementation plan.
The current helper implementation supports display/window video capture, system audio loopback, and initial default-microphone capture. Webcam capture now fails explicitly in the helper rather than silently falling back to Electron capture on Windows. See `docs/engineering/windows-native-recorder-roadmap.md` for the phased implementation plan.
Smoke-test the helper with:
```powershell
npm run test:wgc-helper:win
npm run test:wgc-window:win
npm run test:wgc-audio:win
npm run test:wgc-mic:win
npm run test:wgc-mixed-audio:win
+112 -20
View File
@@ -201,6 +201,36 @@ std::string findString(const std::string& json, const std::string& key) {
return result;
}
std::string parseWindowHandleFromSourceId(const std::string& sourceId) {
constexpr char prefix[] = "window:";
if (sourceId.rfind(prefix, 0) != 0) {
return {};
}
const size_t start = sizeof(prefix) - 1;
const size_t end = sourceId.find(':', start);
const std::string handle = sourceId.substr(start, end == std::string::npos ? std::string::npos : end - start);
return handle.empty() ? std::string{} : handle;
}
HWND parseWindowHandle(const std::string& value) {
if (value.empty()) {
return nullptr;
}
try {
size_t parsed = 0;
const int base = value.rfind("0x", 0) == 0 || value.rfind("0X", 0) == 0 ? 16 : 10;
const uint64_t handleValue = std::stoull(value, &parsed, base);
if (parsed != value.size() || handleValue == 0) {
return nullptr;
}
return reinterpret_cast<HWND>(static_cast<uintptr_t>(handleValue));
} catch (...) {
return nullptr;
}
}
bool parseConfig(const std::string& json, CaptureConfig& config) {
config.schemaVersion = findInt(json, "schemaVersion", 1);
config.outputPath = findString(json, "screenPath");
@@ -218,6 +248,9 @@ bool parseConfig(const std::string& json, CaptureConfig& config) {
}
config.sourceId = findString(json, "sourceId");
config.windowHandle = findString(json, "windowHandle");
if (config.windowHandle.empty()) {
config.windowHandle = parseWindowHandleFromSourceId(config.sourceId);
}
config.displayId = findInt64(json, "displayId", 0);
config.fps = std::clamp(findInt(json, "fps", 60), 1, 120);
config.width = findInt(json, "videoWidth", findInt(json, "width", 0));
@@ -270,27 +303,36 @@ int main(int argc, char* argv[]) {
std::cout << "{\"event\":\"ready\",\"schemaVersion\":2}" << std::endl;
if (config.sourceType != "display") {
std::cerr << "ERROR: Native window capture is not implemented yet" << std::endl;
return 1;
}
if (config.webcamEnabled) {
std::cerr << "ERROR: Native webcam capture is not implemented in this helper yet" << std::endl;
return 1;
}
HMONITOR monitor = findMonitorForCapture(
config.displayId,
config.hasDisplayBounds ? &config.bounds : nullptr);
if (!monitor) {
std::cerr << "ERROR: Could not resolve monitor" << std::endl;
return 1;
}
WgcSession session;
if (!session.initialize(monitor, config.fps)) {
std::cerr << "ERROR: Failed to initialize WGC session" << std::endl;
if (config.sourceType == "display") {
HMONITOR monitor = findMonitorForCapture(
config.displayId,
config.hasDisplayBounds ? &config.bounds : nullptr);
if (!monitor) {
std::cerr << "ERROR: Could not resolve monitor" << std::endl;
return 1;
}
if (!session.initialize(monitor, config.fps)) {
std::cerr << "ERROR: Failed to initialize WGC display session" << std::endl;
return 1;
}
} else if (config.sourceType == "window") {
HWND window = parseWindowHandle(config.windowHandle);
if (!window || !IsWindow(window)) {
std::cerr << "ERROR: Native window capture requires a valid HWND" << std::endl;
return 1;
}
if (!session.initialize(window, config.fps)) {
std::cerr << "ERROR: Failed to initialize WGC window session" << std::endl;
return 1;
}
} else {
std::cerr << "ERROR: Unsupported native capture source type: " << config.sourceType << std::endl;
return 1;
}
@@ -355,24 +397,72 @@ int main(int argc, char* argv[]) {
std::atomic<bool> stopRequested = false;
std::atomic<bool> firstFrameWritten = false;
std::atomic<bool> encodeFailed = false;
Microsoft::WRL::ComPtr<ID3D11Texture2D> latestFrameTexture;
session.setFrameCallback([&](ID3D11Texture2D* texture, int64_t timestampHns) {
(void)timestampHns;
if (stopRequested) {
return;
}
std::scoped_lock lock(mutex);
if (!encoder.writeFrame(texture, timestampHns)) {
encodeFailed = true;
stopRequested = true;
cv.notify_all();
return;
if (!latestFrameTexture) {
D3D11_TEXTURE2D_DESC desc{};
texture->GetDesc(&desc);
desc.BindFlags = 0;
desc.CPUAccessFlags = 0;
desc.MiscFlags = 0;
if (FAILED(session.device()->CreateTexture2D(&desc, nullptr, &latestFrameTexture))) {
encodeFailed = true;
stopRequested = true;
cv.notify_all();
return;
}
}
session.context()->CopyResource(latestFrameTexture.Get(), texture);
if (!firstFrameWritten.exchange(true)) {
cv.notify_all();
}
});
auto writeVideoFrames = [&]() {
const auto startedAt = std::chrono::steady_clock::now();
uint64_t frameIndex = 0;
while (!stopRequested && !encodeFailed) {
{
std::scoped_lock lock(mutex);
if (latestFrameTexture && !encoder.writeFrame(
latestFrameTexture.Get(),
static_cast<int64_t>((frameIndex * 10'000'000ULL) / config.fps))) {
encodeFailed = true;
stopRequested = true;
cv.notify_all();
return;
}
}
frameIndex += 1;
const auto nextDeadline = startedAt +
std::chrono::duration_cast<std::chrono::steady_clock::duration>(
std::chrono::duration<double>(static_cast<double>(frameIndex) / config.fps));
std::this_thread::sleep_until(nextDeadline);
}
};
std::thread videoWriterThread;
auto stopVideoWriter = [&]() {
if (videoWriterThread.joinable()) {
videoWriterThread.join();
}
};
auto startVideoWriter = [&]() {
videoWriterThread = std::thread(writeVideoFrames);
};
std::unique_ptr<AudioMixer> audioMixer;
auto startAudioCaptures = [&]() -> bool {
if (!audioFormat) {
@@ -476,6 +566,7 @@ int main(int argc, char* argv[]) {
if (audioMixer) {
audioMixer->beginTimeline();
}
startVideoWriter();
std::cout << "{\"event\":\"recording-started\",\"schemaVersion\":2}" << std::endl;
std::cout << "Recording started" << std::endl;
@@ -492,6 +583,7 @@ int main(int argc, char* argv[]) {
if (audioMixer) {
audioMixer->stop();
}
stopVideoWriter();
session.stop();
{
std::scoped_lock lock(mutex);
@@ -120,6 +120,26 @@ bool WgcSession::createCaptureItem(HMONITOR monitor) {
return width_ > 0 && height_ > 0;
}
bool WgcSession::createCaptureItem(HWND window) {
auto factory = winrt::get_activation_factory<wgcap::GraphicsCaptureItem>();
auto interop = factory.as<IGraphicsCaptureItemInterop>();
wgcap::GraphicsCaptureItem item{nullptr};
HRESULT hr = interop->CreateForWindow(
window,
winrt::guid_of<wgcap::GraphicsCaptureItem>(),
reinterpret_cast<void**>(winrt::put_abi(item)));
if (!succeeded(hr, "CreateForWindow")) {
return false;
}
item_ = item;
const auto size = item_.Size();
width_ = static_cast<int>(size.Width);
height_ = static_cast<int>(size.Height);
return width_ > 0 && height_ > 0;
}
bool WgcSession::initialize(HMONITOR monitor, int fps) {
fps_ = fps > 0 ? fps : 60;
if (!createD3DDevice()) {
@@ -142,6 +162,44 @@ bool WgcSession::initialize(HMONITOR monitor, int fps) {
// Older WGC builds can omit this property; callers still overlay their own cursor.
}
try {
session_.IsBorderRequired(false);
} catch (...) {
// IsBorderRequired is Windows 11-only. Ignore it on older builds.
}
frameArrivedToken_ = framePool_.FrameArrived({this, &WgcSession::onFrameArrived});
return true;
}
bool WgcSession::initialize(HWND window, int fps) {
fps_ = fps > 0 ? fps : 60;
if (!createD3DDevice()) {
return false;
}
if (!createCaptureItem(window)) {
return false;
}
framePool_ = wgcap::Direct3D11CaptureFramePool::CreateFreeThreaded(
winrtDevice_,
wgdx::DirectXPixelFormat::B8G8R8A8UIntNormalized,
2,
item_.Size());
session_ = framePool_.CreateCaptureSession(item_);
try {
session_.IsCursorCaptureEnabled(false);
} catch (...) {
// Older WGC builds can omit this property; callers still overlay their own cursor.
}
try {
session_.IsBorderRequired(false);
} catch (...) {
// IsBorderRequired is Windows 11-only. Ignore it on older builds.
}
frameArrivedToken_ = framePool_.FrameArrived({this, &WgcSession::onFrameArrived});
return true;
}
@@ -204,6 +262,7 @@ void WgcSession::onFrameArrived(
if (callback) {
callback(texture.Get(), timeSpanToHns(frame.SystemRelativeTime()));
}
frame.Close();
}
int WgcSession::captureWidth() const {
@@ -23,6 +23,7 @@ public:
WgcSession& operator=(const WgcSession&) = delete;
bool initialize(HMONITOR monitor, int fps);
bool initialize(HWND window, int fps);
void setFrameCallback(FrameCallback callback);
bool start();
void stop();
@@ -35,6 +36,7 @@ public:
private:
bool createD3DDevice();
bool createCaptureItem(HMONITOR monitor);
bool createCaptureItem(HWND window);
void onFrameArrived(
winrt::Windows::Graphics::Capture::Direct3D11CaptureFramePool const& sender,
winrt::Windows::Foundation::IInspectable const&);
+1
View File
@@ -28,6 +28,7 @@
"test:watch": "vitest",
"test:cursor-native:win": "node scripts/test-windows-native-cursor.mjs",
"test:wgc-helper:win": "node scripts/test-windows-wgc-helper.mjs",
"test:wgc-window:win": "node scripts/test-windows-wgc-helper.mjs --window",
"test:wgc-audio:win": "node scripts/test-windows-wgc-helper.mjs --system-audio",
"test:wgc-mic:win": "node scripts/test-windows-wgc-helper.mjs --microphone",
"test:wgc-mixed-audio:win": "node scripts/test-windows-wgc-helper.mjs --system-audio --microphone",
+59 -5
View File
@@ -19,6 +19,8 @@ const WITH_MICROPHONE =
process.env.OPENSCREEN_WGC_TEST_MICROPHONE === "true" ||
process.argv.includes("--microphone") ||
process.argv.includes("--mic");
const WITH_WINDOW =
process.env.OPENSCREEN_WGC_TEST_WINDOW === "true" || process.argv.includes("--window");
function runHelper(config) {
return new Promise((resolve, reject) => {
@@ -47,6 +49,47 @@ function runHelper(config) {
});
}
function startFixtureWindow() {
return new Promise((resolve, reject) => {
const child = spawn("mspaint.exe", [], {
stdio: ["ignore", "ignore", "ignore"],
windowsHide: false,
});
const poll = setInterval(() => {
const lookup = spawnSync(
"powershell",
[
"-NoProfile",
"-Command",
`(Get-Process -Id ${child.pid} -ErrorAction SilentlyContinue).MainWindowHandle`,
],
{ encoding: "utf8", windowsHide: true },
);
const handle = lookup.stdout
.trim()
.split(/\r?\n/)
.find((line) => /^\d+$/.test(line.trim()));
if (handle && handle !== "0") {
clearInterval(poll);
clearTimeout(timer);
resolve({ child, sourceId: `window:${handle.trim()}:0` });
}
}, 250);
const timer = setTimeout(() => {
clearInterval(poll);
child.kill();
reject(new Error("Timed out waiting for fixture window handle"));
}, 10_000);
child.once("error", (error) => {
clearInterval(poll);
clearTimeout(timer);
reject(error);
});
});
}
function probeStreams(outputPath) {
const ffprobe = spawnSync(
"ffprobe",
@@ -106,15 +149,17 @@ if (!fs.existsSync(HELPER_PATH)) {
const outputPath = path.join(
os.tmpdir(),
`openscreen-wgc-helper-${WITH_SYSTEM_AUDIO || WITH_MICROPHONE ? "audio" : "video"}-${process.pid}-${Date.now()}-${randomUUID()}.mp4`,
`openscreen-wgc-helper-${WITH_WINDOW ? "window" : WITH_SYSTEM_AUDIO || WITH_MICROPHONE ? "audio" : "video"}-${process.pid}-${Date.now()}-${randomUUID()}.mp4`,
);
const fixtureWindow = WITH_WINDOW ? await startFixtureWindow() : null;
const config = {
schemaVersion: 2,
recordingId: Date.now(),
outputPath,
sourceType: "display",
sourceId: "screen:0:0",
sourceType: fixtureWindow ? "window" : "display",
sourceId: fixtureWindow ? fixtureWindow.sourceId : "screen:0:0",
displayId: 0,
fps: 30,
videoWidth: 1280,
@@ -132,7 +177,14 @@ const config = {
outputs: { screenPath: outputPath },
};
const result = await runHelper(config);
let result;
try {
result = await runHelper(config);
} finally {
if (fixtureWindow) {
fixtureWindow.child.kill();
}
}
if (result.code !== 0) {
throw new Error(`WGC helper exited with ${result.code}\n${result.stdout}\n${result.stderr}`);
}
@@ -151,7 +203,9 @@ if ((WITH_SYSTEM_AUDIO || WITH_MICROPHONE) && !hasAudio) {
}
const frameLuma = measureFirstFrameLuma(outputPath);
if (frameLuma.average < 1 && frameLuma.max < 5) {
throw new Error(`WGC helper output first frame is black: ${outputPath}`);
throw new Error(
`WGC helper output first frame is black: ${outputPath}\n${result.stdout}\n${result.stderr}`,
);
}
console.log(
+6 -1
View File
@@ -2,7 +2,10 @@ import { fixWebmDuration } from "@fix-webm-duration/fix";
import { useCallback, useEffect, useRef, useState } from "react";
import { toast } from "sonner";
import { useScopedT } from "@/contexts/I18nContext";
import type { NativeWindowsRecordingRequest } from "@/lib/nativeWindowsRecording";
import {
type NativeWindowsRecordingRequest,
parseWindowHandleFromSourceId,
} from "@/lib/nativeWindowsRecording";
import { requestCameraAccess } from "@/lib/requestCameraAccess";
const TARGET_FRAME_RATE = 60;
@@ -573,12 +576,14 @@ export function useScreenRecorder(): UseScreenRecorderReturn {
const activeRecordingId = Date.now();
const displayId = Number(selectedSource.display_id);
const sourceType = selectedSource.id.startsWith("window:") ? "window" : "display";
const windowHandle = parseWindowHandleFromSourceId(selectedSource.id);
const request: NativeWindowsRecordingRequest = {
recordingId: activeRecordingId,
source: {
type: sourceType,
sourceId: selectedSource.id,
...(Number.isFinite(displayId) ? { displayId } : {}),
...(windowHandle ? { windowHandle } : {}),
},
video: {
fps: TARGET_FRAME_RATE,
+13
View File
@@ -39,3 +39,16 @@ export type NativeWindowsRecordingStartResult = {
helperPath?: string;
error?: string;
};
export function parseWindowHandleFromSourceId(sourceId?: string | null) {
if (!sourceId?.startsWith("window:")) {
return null;
}
const handlePart = sourceId.split(":")[1];
if (!handlePart || !/^\d+$/.test(handlePart)) {
return null;
}
return handlePart;
}