From 38d727eb8e51275bb10ddca590f8709d02c50a12 Mon Sep 17 00:00:00 2001 From: EtienneLescot Date: Tue, 5 May 2026 18:51:08 +0200 Subject: [PATCH] fix: skip black webcam warmup frames --- .../windows-native-recorder-roadmap.md | 1 + docs/testing/windows-native-cursor.md | 6 +- electron/native/wgc-capture/src/main.cpp | 57 +++++++++++++++++-- 3 files changed, 57 insertions(+), 7 deletions(-) diff --git a/docs/engineering/windows-native-recorder-roadmap.md b/docs/engineering/windows-native-recorder-roadmap.md index 5129153..29986db 100644 --- a/docs/engineering/windows-native-recorder-roadmap.md +++ b/docs/engineering/windows-native-recorder-roadmap.md @@ -158,6 +158,7 @@ Acceptance: - Add Media Foundation webcam source reader. - Select requested dimensions/fps or the nearest format accepted by Media Foundation. - Convert webcam samples to BGRA and compose them into the primary helper MP4 as an initial bottom-right picture-in-picture overlay. +- Ignore black webcam warmup frames and keep the overlay hidden until the first visible frame is available, so virtual cameras do not flash a black picture-in-picture rectangle at recording start. - Keep the helper process as the SSOT for screen/window, WASAPI system audio, microphone, webcam, and mux timing. - Match the requested webcam through Media Foundation friendly names first, then browser device ids/symbolic links, so UI selection remains stable across Chromium and Windows native device namespaces. - Use the Electron-resolved DirectShow CLSID when the selected virtual camera, for example NVIDIA Broadcast, is registered for DirectShow but absent from Media Foundation enumeration. diff --git a/docs/testing/windows-native-cursor.md b/docs/testing/windows-native-cursor.md index 23c57a9..4c7da94 100644 --- a/docs/testing/windows-native-cursor.md +++ b/docs/testing/windows-native-cursor.md @@ -93,7 +93,7 @@ Current native availability rules: - Windows 10 build 19041 or newer - a helper executable is available -The helper currently implements display video capture and system audio loopback. Window capture, microphone audio, and webcam capture are part of the native recorder roadmap and fail explicitly instead of silently falling back to Electron capture on Windows. +The helper currently implements display/window video capture, system audio loopback, default microphone capture, Media Foundation webcam capture, and DirectShow fallback for selected virtual cameras such as NVIDIA Broadcast. Webcam frames are composed into the primary MP4 as a bottom-right picture-in-picture overlay, and black webcam warmup frames are ignored until the first visible frame is available. Build OpenScreen's helper locally: @@ -105,7 +105,11 @@ Smoke-test the helper directly: ```powershell npm run test:wgc-helper:win +npm run test:wgc-window:win npm run test:wgc-audio:win +npm run test:wgc-mic:win +npm run test:wgc-mixed-audio:win +npm run test:wgc-webcam:win ``` For local diagnostics with another compatible helper, point OpenScreen at that executable: diff --git a/electron/native/wgc-capture/src/main.cpp b/electron/native/wgc-capture/src/main.cpp index c58f092..6543d83 100644 --- a/electron/native/wgc-capture/src/main.cpp +++ b/electron/native/wgc-capture/src/main.cpp @@ -97,6 +97,31 @@ std::string jsonEscape(const std::string& value) { return result; } +bool hasVisibleBgraContent(const std::vector& frame) { + if (frame.size() < 4) { + return false; + } + + uint64_t lumaTotal = 0; + BYTE maxLuma = 0; + const size_t pixelCount = frame.size() / 4; + const size_t step = std::max(1, pixelCount / 4096); + size_t sampledPixels = 0; + for (size_t pixel = 0; pixel < pixelCount; pixel += step) { + const size_t offset = pixel * 4; + const BYTE b = frame[offset + 0]; + const BYTE g = frame[offset + 1]; + const BYTE r = frame[offset + 2]; + const BYTE luma = static_cast((static_cast(r) * 54 + static_cast(g) * 183 + static_cast(b) * 19) >> 8); + lumaTotal += luma; + maxLuma = std::max(maxLuma, luma); + sampledPixels += 1; + } + + const uint64_t averageLuma = sampledPixels > 0 ? lumaTotal / sampledPixels : 0; + return maxLuma > 24 || averageLuma > 4; +} + bool findBool(const std::string& json, const std::string& key, bool fallback) { auto pos = json.find("\"" + key + "\""); if (pos == std::string::npos) { @@ -432,6 +457,7 @@ int main(int argc, char* argv[]) { std::vector latestWebcamFrame; int latestWebcamWidth = 0; int latestWebcamHeight = 0; + bool hasVisibleWebcamFrame = false; session.setFrameCallback([&](ID3D11Texture2D* texture, int64_t timestampHns) { (void)timestampHns; @@ -468,10 +494,19 @@ int main(int argc, char* argv[]) { { std::scoped_lock lock(mutex); if (webcamActive) { - webcamCapture.copyLatestFrame(latestWebcamFrame, latestWebcamWidth, latestWebcamHeight); + std::vector candidateWebcamFrame; + int candidateWebcamWidth = 0; + int candidateWebcamHeight = 0; + if (webcamCapture.copyLatestFrame(candidateWebcamFrame, candidateWebcamWidth, candidateWebcamHeight) && + hasVisibleBgraContent(candidateWebcamFrame)) { + latestWebcamFrame = std::move(candidateWebcamFrame); + latestWebcamWidth = candidateWebcamWidth; + latestWebcamHeight = candidateWebcamHeight; + hasVisibleWebcamFrame = true; + } } const BgraFrameView webcamFrame{ - latestWebcamFrame.empty() ? nullptr : latestWebcamFrame.data(), + hasVisibleWebcamFrame && !latestWebcamFrame.empty() ? latestWebcamFrame.data() : nullptr, latestWebcamWidth, latestWebcamHeight, }; @@ -583,12 +618,22 @@ int main(int argc, char* argv[]) { } webcamActive = true; const auto webcamDeadline = std::chrono::steady_clock::now() + std::chrono::seconds(3); - while (std::chrono::steady_clock::now() < webcamDeadline && - !webcamCapture.copyLatestFrame(latestWebcamFrame, latestWebcamWidth, latestWebcamHeight)) { + while (std::chrono::steady_clock::now() < webcamDeadline && !hasVisibleWebcamFrame) { + std::vector candidateWebcamFrame; + int candidateWebcamWidth = 0; + int candidateWebcamHeight = 0; + if (webcamCapture.copyLatestFrame(candidateWebcamFrame, candidateWebcamWidth, candidateWebcamHeight) && + hasVisibleBgraContent(candidateWebcamFrame)) { + latestWebcamFrame = std::move(candidateWebcamFrame); + latestWebcamWidth = candidateWebcamWidth; + latestWebcamHeight = candidateWebcamHeight; + hasVisibleWebcamFrame = true; + break; + } std::this_thread::sleep_for(std::chrono::milliseconds(20)); } - if (latestWebcamFrame.empty()) { - std::cerr << "WARNING: Native webcam started but no frame was available before screen capture" + if (!hasVisibleWebcamFrame) { + std::cerr << "WARNING: Native webcam started but no visible frame was available before screen capture" << std::endl; } }