diff --git a/docs/engineering/windows-native-recorder-roadmap.md b/docs/engineering/windows-native-recorder-roadmap.md index ac309f9..63abd1d 100644 --- a/docs/engineering/windows-native-recorder-roadmap.md +++ b/docs/engineering/windows-native-recorder-roadmap.md @@ -9,7 +9,7 @@ OpenScreen's Windows recorder should be owned by one native backend. Electron ca - Capture system audio through WASAPI loopback. - Capture microphone audio through WASAPI. - Mix system audio and microphone audio into the primary screen recording. -- Capture webcam video natively and keep it as a separate editable OpenScreen media stream. +- Capture webcam video natively and compose it into the Windows helper MP4 during the native-recording migration. - Keep preview/export aligned because screen video, audio, webcam, and cursor share one native timing origin. - Keep exported MP4s Windows-friendly: H.264 video plus AAC audio. Opus-in-MP4 is not an acceptable Windows export target. - Package the native helper with the Windows app. @@ -17,7 +17,7 @@ OpenScreen's Windows recorder should be owned by one native backend. Electron ca ## Non-Goals - Replacing the editor/export pipeline. -- Flattening webcam into the screen recording. The editor currently treats webcam as editable picture-in-picture media, so the native recorder should preserve a separate `webcamVideoPath`. +- Replacing the editor/export pipeline. A later pass can reintroduce a separate editable native `webcamVideoPath`; the current Windows-native milestone prioritizes a helper-owned multi-flux MP4 with deterministic screen/audio/mic/webcam sync. - Adding a native fallback for macOS or Linux in this branch. ## Target Architecture @@ -78,7 +78,6 @@ The helper receives a single JSON argument: }, "outputs": { "screenPath": "C:\\Users\\me\\recording-123.mp4", - "webcamPath": "C:\\Users\\me\\recording-123-webcam.mp4", "manifestPath": "C:\\Users\\me\\recording-123.session.json" } } @@ -90,7 +89,7 @@ The helper emits newline-delimited JSON events to stdout: { "event": "ready", "schemaVersion": 2 } { "event": "recording-started", "timestampMs": 1234567890 } { "event": "warning", "code": "audio-device-unavailable", "message": "..." } -{ "event": "recording-stopped", "screenPath": "...", "webcamPath": "..." } +{ "event": "recording-stopped", "screenPath": "..." } { "event": "error", "code": "unsupported-window-source", "message": "..." } ``` @@ -153,15 +152,16 @@ Acceptance: ### 4. Webcam Capture - Add Media Foundation webcam source reader. -- Select 1280x720/30fps or nearest supported format. -- Encode webcam to `recording--webcam.mp4`. -- Synchronize webcam timestamps to the native session clock. -- Store `webcamVideoPath` in the OpenScreen session manifest. +- Select requested dimensions/fps or the nearest format accepted by Media Foundation. +- Convert webcam samples to BGRA and compose them into the primary helper MP4 as an initial bottom-right picture-in-picture overlay. +- Keep the helper process as the SSOT for screen/window, WASAPI system audio, microphone, webcam, and mux timing. +- Later: promote the same webcam capture source to a separate editable native `webcamVideoPath` if product requirements need post-recording layout edits. Acceptance: -- Editor loads the native screen recording and the native webcam recording. -- Webcam layout controls behave the same as today. +- Native display/window recordings can include webcam without returning to Electron capture. +- `npm run test:wgc-webcam:win` validates the helper path when a webcam is available and skips explicitly when no webcam device exists. +- Combined webcam + system audio + microphone produces one MP4 with H.264 video and AAC audio. ### 5. Native Window Capture diff --git a/electron/native/README.md b/electron/native/README.md index b366a1b..037b040 100644 --- a/electron/native/README.md +++ b/electron/native/README.md @@ -35,15 +35,18 @@ Current V2 JSON shape: "captureMic": false, "microphoneDeviceId": "default", "microphoneGain": 1.4, - "webcamEnabled": false, + "webcamEnabled": true, + "webcamDeviceId": "default", + "webcamWidth": 1280, + "webcamHeight": 720, + "webcamFps": 30, "outputs": { - "screenPath": "C:\\path\\recording-123.mp4", - "webcamPath": "C:\\path\\recording-123-webcam.mp4" + "screenPath": "C:\\path\\recording-123.mp4" } } ``` -The current helper implementation supports display/window video capture, system audio loopback, and initial default-microphone capture. Webcam capture now fails explicitly in the helper rather than silently falling back to Electron capture on Windows. See `docs/engineering/windows-native-recorder-roadmap.md` for the phased implementation plan. +The current helper implementation supports display/window video capture, system audio loopback, default-microphone capture, and Media Foundation webcam capture. Webcam frames are currently composed into the primary MP4 as a bottom-right picture-in-picture overlay. Browser `deviceId` values do not always map to Media Foundation symbolic links; when the requested webcam is not matched, the helper logs a warning and uses the default webcam. Smoke-test the helper with: @@ -53,4 +56,5 @@ npm run test:wgc-window:win npm run test:wgc-audio:win npm run test:wgc-mic:win npm run test:wgc-mixed-audio:win +npm run test:wgc-webcam:win ``` diff --git a/electron/native/wgc-capture/CMakeLists.txt b/electron/native/wgc-capture/CMakeLists.txt index b21fd66..92b9335 100644 --- a/electron/native/wgc-capture/CMakeLists.txt +++ b/electron/native/wgc-capture/CMakeLists.txt @@ -23,6 +23,8 @@ add_executable(wgc-capture src/monitor_utils.h src/wasapi_loopback_capture.cpp src/wasapi_loopback_capture.h + src/webcam_capture.cpp + src/webcam_capture.h src/wgc_session.cpp src/wgc_session.h ) diff --git a/electron/native/wgc-capture/src/main.cpp b/electron/native/wgc-capture/src/main.cpp index 86f032e..bc82b22 100644 --- a/electron/native/wgc-capture/src/main.cpp +++ b/electron/native/wgc-capture/src/main.cpp @@ -2,6 +2,7 @@ #include "mf_encoder.h" #include "monitor_utils.h" #include "wasapi_loopback_capture.h" +#include "webcam_capture.h" #include "wgc_session.h" #include @@ -303,11 +304,6 @@ int main(int argc, char* argv[]) { std::cout << "{\"event\":\"ready\",\"schemaVersion\":2}" << std::endl; - if (config.webcamEnabled) { - std::cerr << "ERROR: Native webcam capture is not implemented in this helper yet" << std::endl; - return 1; - } - WgcSession session; if (config.sourceType == "display") { HMONITOR monitor = findMonitorForCapture( @@ -347,6 +343,22 @@ int main(int argc, char* argv[]) { const int pixels = width * height; const int bitrate = pixels >= 3840 * 2160 ? 45'000'000 : pixels >= 2560 * 1440 ? 28'000'000 : 18'000'000; + WebcamCapture webcamCapture; + bool webcamActive = false; + if (config.webcamEnabled) { + if (!webcamCapture.initialize( + utf8ToWide(config.webcamDeviceId), + config.webcamWidth, + config.webcamHeight, + config.webcamFps > 0 ? config.webcamFps : config.fps)) { + std::cerr << "ERROR: Failed to initialize native webcam capture" << std::endl; + return 1; + } + std::cout << "{\"event\":\"webcam-format\",\"schemaVersion\":2,\"width\":" << webcamCapture.width() + << ",\"height\":" << webcamCapture.height() + << ",\"fps\":" << webcamCapture.fps() << "}" << std::endl; + } + WasapiLoopbackCapture loopbackCapture; WasapiLoopbackCapture microphoneCapture; const AudioInputFormat* audioFormat = nullptr; @@ -398,6 +410,9 @@ int main(int argc, char* argv[]) { std::atomic firstFrameWritten = false; std::atomic encodeFailed = false; Microsoft::WRL::ComPtr latestFrameTexture; + std::vector latestWebcamFrame; + int latestWebcamWidth = 0; + int latestWebcamHeight = 0; session.setFrameCallback([&](ID3D11Texture2D* texture, int64_t timestampHns) { (void)timestampHns; @@ -433,9 +448,18 @@ int main(int argc, char* argv[]) { while (!stopRequested && !encodeFailed) { { std::scoped_lock lock(mutex); + if (webcamActive) { + webcamCapture.copyLatestFrame(latestWebcamFrame, latestWebcamWidth, latestWebcamHeight); + } + const BgraFrameView webcamFrame{ + latestWebcamFrame.empty() ? nullptr : latestWebcamFrame.data(), + latestWebcamWidth, + latestWebcamHeight, + }; if (latestFrameTexture && !encoder.writeFrame( latestFrameTexture.Get(), - static_cast((frameIndex * 10'000'000ULL) / config.fps))) { + static_cast((frameIndex * 10'000'000ULL) / config.fps), + webcamFrame.data ? &webcamFrame : nullptr)) { encodeFailed = true; stopRequested = true; cv.notify_all(); @@ -528,8 +552,30 @@ int main(int argc, char* argv[]) { if (!startAudioCaptures()) { return 1; } + if (config.webcamEnabled) { + if (!webcamCapture.start()) { + microphoneCapture.stop(); + loopbackCapture.stop(); + if (audioMixer) { + audioMixer->stop(); + } + std::cerr << "ERROR: Failed to start native webcam capture" << std::endl; + return 1; + } + webcamActive = true; + const auto webcamDeadline = std::chrono::steady_clock::now() + std::chrono::seconds(3); + while (std::chrono::steady_clock::now() < webcamDeadline && + !webcamCapture.copyLatestFrame(latestWebcamFrame, latestWebcamWidth, latestWebcamHeight)) { + std::this_thread::sleep_for(std::chrono::milliseconds(20)); + } + if (latestWebcamFrame.empty()) { + std::cerr << "WARNING: Native webcam started but no frame was available before screen capture" + << std::endl; + } + } if (!session.start()) { + webcamCapture.stop(); microphoneCapture.stop(); loopbackCapture.stop(); if (audioMixer) { @@ -554,6 +600,7 @@ int main(int argc, char* argv[]) { } microphoneCapture.stop(); loopbackCapture.stop(); + webcamCapture.stop(); if (audioMixer) { audioMixer->stop(); } @@ -580,6 +627,7 @@ int main(int argc, char* argv[]) { microphoneCapture.stop(); loopbackCapture.stop(); + webcamCapture.stop(); if (audioMixer) { audioMixer->stop(); } diff --git a/electron/native/wgc-capture/src/mf_encoder.cpp b/electron/native/wgc-capture/src/mf_encoder.cpp index fc95fc2..de9220f 100644 --- a/electron/native/wgc-capture/src/mf_encoder.cpp +++ b/electron/native/wgc-capture/src/mf_encoder.cpp @@ -38,6 +38,43 @@ void setAudioFormat(IMFMediaType* type, UINT32 channels, UINT32 sampleRate, UINT type->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, bitsPerSample); } +void compositeWebcam(BYTE* destination, int width, int height, const BgraFrameView& webcamFrame) { + if (!webcamFrame.data || webcamFrame.width <= 0 || webcamFrame.height <= 0 || width <= 0 || height <= 0) { + return; + } + + const int margin = std::max(16, std::min(width, height) / 60); + const int maxOverlayWidth = std::max(2, width / 4); + int overlayWidth = maxOverlayWidth; + int overlayHeight = static_cast( + (static_cast(overlayWidth) * webcamFrame.height) / std::max(1, webcamFrame.width)); + const int maxOverlayHeight = std::max(2, height / 3); + if (overlayHeight > maxOverlayHeight) { + overlayHeight = maxOverlayHeight; + overlayWidth = static_cast( + (static_cast(overlayHeight) * webcamFrame.width) / std::max(1, webcamFrame.height)); + } + + overlayWidth = std::max(2, std::min(overlayWidth, width - margin * 2)); + overlayHeight = std::max(2, std::min(overlayHeight, height - margin * 2)); + const int originX = std::max(0, width - overlayWidth - margin); + const int originY = std::max(0, height - overlayHeight - margin); + + for (int y = 0; y < overlayHeight; y += 1) { + const int sourceY = static_cast((static_cast(y) * webcamFrame.height) / overlayHeight); + BYTE* destinationRow = destination + ((originY + y) * width + originX) * 4; + for (int x = 0; x < overlayWidth; x += 1) { + const int sourceX = static_cast((static_cast(x) * webcamFrame.width) / overlayWidth); + const BYTE* source = webcamFrame.data + (sourceY * webcamFrame.width + sourceX) * 4; + BYTE* target = destinationRow + x * 4; + target[0] = source[0]; + target[1] = source[1]; + target[2] = source[2]; + target[3] = 255; + } + } +} + } // namespace MFEncoder::~MFEncoder() { @@ -179,7 +216,11 @@ bool MFEncoder::ensureStagingTexture(ID3D11Texture2D* texture) { "CreateTexture2D(staging)"); } -bool MFEncoder::copyFrameToBuffer(ID3D11Texture2D* texture, BYTE* destination, DWORD destinationSize) { +bool MFEncoder::copyFrameToBuffer( + ID3D11Texture2D* texture, + BYTE* destination, + DWORD destinationSize, + const BgraFrameView* webcamFrame) { if (!ensureStagingTexture(texture)) { return false; } @@ -203,12 +244,15 @@ bool MFEncoder::copyFrameToBuffer(ID3D11Texture2D* texture, BYTE* destination, D for (int y = 0; y < height_; y += 1) { std::memcpy(destination + rowBytes * y, source + mapped.RowPitch * y, rowBytes); } + if (webcamFrame) { + compositeWebcam(destination, width_, height_, *webcamFrame); + } context_->Unmap(stagingTexture_.Get(), 0); return true; } -bool MFEncoder::writeFrame(ID3D11Texture2D* texture, int64_t timestampHns) { +bool MFEncoder::writeFrame(ID3D11Texture2D* texture, int64_t timestampHns, const BgraFrameView* webcamFrame) { std::scoped_lock writerLock(writerMutex_); if (!sinkWriter_ || finalized_) { return false; @@ -238,7 +282,7 @@ bool MFEncoder::writeFrame(ID3D11Texture2D* texture, int64_t timestampHns) { return false; } - const bool copied = copyFrameToBuffer(texture, data, maxLength); + const bool copied = copyFrameToBuffer(texture, data, maxLength, webcamFrame); buffer->Unlock(); if (!copied) { return false; diff --git a/electron/native/wgc-capture/src/mf_encoder.h b/electron/native/wgc-capture/src/mf_encoder.h index b6db685..a82a940 100644 --- a/electron/native/wgc-capture/src/mf_encoder.h +++ b/electron/native/wgc-capture/src/mf_encoder.h @@ -11,6 +11,12 @@ #include #include +struct BgraFrameView { + const BYTE* data = nullptr; + int width = 0; + int height = 0; +}; + struct AudioInputFormat { GUID subtype = MFAudioFormat_PCM; UINT32 sampleRate = 0; @@ -37,13 +43,17 @@ public: ID3D11Device* device, ID3D11DeviceContext* context, const AudioInputFormat* audioFormat = nullptr); - bool writeFrame(ID3D11Texture2D* texture, int64_t timestampHns); + bool writeFrame(ID3D11Texture2D* texture, int64_t timestampHns, const BgraFrameView* webcamFrame = nullptr); bool writeAudio(const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns); bool finalize(); private: bool ensureStagingTexture(ID3D11Texture2D* texture); - bool copyFrameToBuffer(ID3D11Texture2D* texture, BYTE* destination, DWORD destinationSize); + bool copyFrameToBuffer( + ID3D11Texture2D* texture, + BYTE* destination, + DWORD destinationSize, + const BgraFrameView* webcamFrame); bool configureAudioStream(const AudioInputFormat& audioFormat); Microsoft::WRL::ComPtr sinkWriter_; diff --git a/electron/native/wgc-capture/src/webcam_capture.cpp b/electron/native/wgc-capture/src/webcam_capture.cpp new file mode 100644 index 0000000..6b34a35 --- /dev/null +++ b/electron/native/wgc-capture/src/webcam_capture.cpp @@ -0,0 +1,275 @@ +#include "webcam_capture.h" + +#include +#include +#include + +#include +#include +#include + +namespace { + +bool succeeded(HRESULT hr, const char* label) { + if (SUCCEEDED(hr)) { + return true; + } + + std::cerr << "ERROR: " << label << " failed (hr=0x" << std::hex << hr << std::dec << ")" + << std::endl; + return false; +} + +std::wstring readAllocatedString(IMFActivate* activate, REFGUID key) { + WCHAR* value = nullptr; + UINT32 length = 0; + if (FAILED(activate->GetAllocatedString(key, &value, &length)) || !value) { + return {}; + } + + std::wstring result(value, value + length); + CoTaskMemFree(value); + return result; +} + +bool containsInsensitive(const std::wstring& haystack, const std::wstring& needle) { + if (haystack.empty() || needle.empty()) { + return false; + } + + std::wstring lowerHaystack = haystack; + std::wstring lowerNeedle = needle; + std::transform(lowerHaystack.begin(), lowerHaystack.end(), lowerHaystack.begin(), ::towlower); + std::transform(lowerNeedle.begin(), lowerNeedle.end(), lowerNeedle.begin(), ::towlower); + return lowerHaystack.find(lowerNeedle) != std::wstring::npos || + lowerNeedle.find(lowerHaystack) != std::wstring::npos; +} + +} // namespace + +WebcamCapture::~WebcamCapture() { + stop(); +} + +bool WebcamCapture::initialize(const std::wstring& deviceId, int requestedWidth, int requestedHeight, int requestedFps) { + fps_ = std::clamp(requestedFps > 0 ? requestedFps : 30, 1, 60); + if (!succeeded(MFStartup(MF_VERSION), "MFStartup(webcam)")) { + return false; + } + mfStarted_ = true; + if (!selectDevice(deviceId)) { + return false; + } + + return configureReader(requestedWidth, requestedHeight, fps_); +} + +bool WebcamCapture::selectDevice(const std::wstring& deviceId) { + Microsoft::WRL::ComPtr attributes; + if (!succeeded(MFCreateAttributes(&attributes, 1), "MFCreateAttributes(webcam enumeration)")) { + return false; + } + if (!succeeded(attributes->SetGUID( + MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE, + MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_GUID), + "SetGUID(webcam source type)")) { + return false; + } + + IMFActivate** devices = nullptr; + UINT32 deviceCount = 0; + HRESULT hr = MFEnumDeviceSources(attributes.Get(), &devices, &deviceCount); + if (!succeeded(hr, "MFEnumDeviceSources") || deviceCount == 0) { + if (devices) { + CoTaskMemFree(devices); + } + std::cerr << "ERROR: No native Windows webcam devices were found" << std::endl; + return false; + } + + UINT32 selectedIndex = 0; + for (UINT32 index = 0; index < deviceCount; index += 1) { + const std::wstring name = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME); + const std::wstring symbolicLink = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_SYMBOLIC_LINK); + if (!deviceId.empty() && (containsInsensitive(symbolicLink, deviceId) || containsInsensitive(name, deviceId))) { + selectedIndex = index; + break; + } + } + + if (!deviceId.empty() && selectedIndex == 0) { + const std::wstring firstName = readAllocatedString(devices[0], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME); + const std::wstring firstLink = readAllocatedString(devices[0], MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_SYMBOLIC_LINK); + if (!containsInsensitive(firstLink, deviceId) && !containsInsensitive(firstName, deviceId)) { + std::cerr << "WARNING: Requested webcam device was not found by Media Foundation; using default webcam" + << std::endl; + } + } + + selectedDeviceName_ = readAllocatedString(devices[selectedIndex], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME); + hr = devices[selectedIndex]->ActivateObject(IID_PPV_ARGS(&mediaSource_)); + + for (UINT32 index = 0; index < deviceCount; index += 1) { + devices[index]->Release(); + } + CoTaskMemFree(devices); + + return succeeded(hr, "ActivateObject(webcam)"); +} + +bool WebcamCapture::configureReader(int requestedWidth, int requestedHeight, int requestedFps) { + Microsoft::WRL::ComPtr attributes; + if (!succeeded(MFCreateAttributes(&attributes, 2), "MFCreateAttributes(webcam reader)")) { + return false; + } + attributes->SetUINT32(MF_SOURCE_READER_ENABLE_VIDEO_PROCESSING, TRUE); + attributes->SetUINT32(MF_READWRITE_DISABLE_CONVERTERS, FALSE); + + if (!succeeded(MFCreateSourceReaderFromMediaSource(mediaSource_.Get(), attributes.Get(), &sourceReader_), + "MFCreateSourceReaderFromMediaSource(webcam)")) { + return false; + } + + Microsoft::WRL::ComPtr mediaType; + if (!succeeded(MFCreateMediaType(&mediaType), "MFCreateMediaType(webcam output)")) { + return false; + } + mediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video); + mediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_RGB32); + if (requestedWidth > 0 && requestedHeight > 0) { + MFSetAttributeSize(mediaType.Get(), MF_MT_FRAME_SIZE, static_cast(requestedWidth), static_cast(requestedHeight)); + } + MFSetAttributeRatio(mediaType.Get(), MF_MT_FRAME_RATE, static_cast(std::max(1, requestedFps)), 1); + + if (!succeeded(sourceReader_->SetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, nullptr, mediaType.Get()), + "SetCurrentMediaType(webcam RGB32)")) { + return false; + } + sourceReader_->SetStreamSelection(MF_SOURCE_READER_ALL_STREAMS, FALSE); + sourceReader_->SetStreamSelection(MF_SOURCE_READER_FIRST_VIDEO_STREAM, TRUE); + + Microsoft::WRL::ComPtr currentType; + if (!succeeded(sourceReader_->GetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, ¤tType), + "GetCurrentMediaType(webcam)")) { + return false; + } + + UINT32 width = 0; + UINT32 height = 0; + if (FAILED(MFGetAttributeSize(currentType.Get(), MF_MT_FRAME_SIZE, &width, &height)) || width == 0 || height == 0) { + width = static_cast(requestedWidth > 0 ? requestedWidth : 1280); + height = static_cast(requestedHeight > 0 ? requestedHeight : 720); + } + width_ = static_cast(width); + height_ = static_cast(height); + return true; +} + +bool WebcamCapture::start() { + if (!sourceReader_ || thread_.joinable()) { + return false; + } + + stopRequested_ = false; + thread_ = std::thread(&WebcamCapture::captureLoop, this); + return true; +} + +void WebcamCapture::stop() { + stopRequested_ = true; + if (thread_.joinable()) { + thread_.join(); + } + if (mediaSource_) { + mediaSource_->Shutdown(); + } + sourceReader_.Reset(); + mediaSource_.Reset(); + if (mfStarted_) { + MFShutdown(); + mfStarted_ = false; + } +} + +void WebcamCapture::captureLoop() { + CoInitializeEx(nullptr, COINIT_MULTITHREADED); + + while (!stopRequested_) { + DWORD streamIndex = 0; + DWORD flags = 0; + LONGLONG timestamp = 0; + Microsoft::WRL::ComPtr sample; + HRESULT hr = sourceReader_->ReadSample( + MF_SOURCE_READER_FIRST_VIDEO_STREAM, + 0, + &streamIndex, + &flags, + ×tamp, + &sample); + (void)streamIndex; + (void)timestamp; + + if (FAILED(hr)) { + std::cerr << "WARNING: Failed to read webcam sample (hr=0x" << std::hex << hr << std::dec << ")" + << std::endl; + std::this_thread::sleep_for(std::chrono::milliseconds(20)); + continue; + } + if ((flags & MF_SOURCE_READERF_ENDOFSTREAM) != 0) { + break; + } + if (!sample) { + continue; + } + + Microsoft::WRL::ComPtr buffer; + if (FAILED(sample->ConvertToContiguousBuffer(&buffer)) || !buffer) { + continue; + } + + BYTE* data = nullptr; + DWORD maxLength = 0; + DWORD currentLength = 0; + if (FAILED(buffer->Lock(&data, &maxLength, ¤tLength)) || !data) { + continue; + } + + const DWORD expectedLength = static_cast(std::max(0, width_) * std::max(0, height_) * 4); + if (currentLength >= expectedLength && expectedLength > 0) { + std::scoped_lock lock(frameMutex_); + latestFrame_.assign(data, data + expectedLength); + } + + buffer->Unlock(); + } + + CoUninitialize(); +} + +bool WebcamCapture::copyLatestFrame(std::vector& destination, int& width, int& height) { + std::scoped_lock lock(frameMutex_); + if (latestFrame_.empty() || width_ <= 0 || height_ <= 0) { + return false; + } + + destination = latestFrame_; + width = width_; + height = height_; + return true; +} + +int WebcamCapture::width() const { + return width_; +} + +int WebcamCapture::height() const { + return height_; +} + +int WebcamCapture::fps() const { + return fps_; +} + +const std::wstring& WebcamCapture::selectedDeviceName() const { + return selectedDeviceName_; +} diff --git a/electron/native/wgc-capture/src/webcam_capture.h b/electron/native/wgc-capture/src/webcam_capture.h new file mode 100644 index 0000000..7d5f904 --- /dev/null +++ b/electron/native/wgc-capture/src/webcam_capture.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +class WebcamCapture { +public: + WebcamCapture() = default; + ~WebcamCapture(); + + WebcamCapture(const WebcamCapture&) = delete; + WebcamCapture& operator=(const WebcamCapture&) = delete; + + bool initialize(const std::wstring& deviceId, int requestedWidth, int requestedHeight, int requestedFps); + bool start(); + void stop(); + bool copyLatestFrame(std::vector& destination, int& width, int& height); + + int width() const; + int height() const; + int fps() const; + const std::wstring& selectedDeviceName() const; + +private: + bool selectDevice(const std::wstring& deviceId); + bool configureReader(int requestedWidth, int requestedHeight, int requestedFps); + void captureLoop(); + + Microsoft::WRL::ComPtr mediaSource_; + Microsoft::WRL::ComPtr sourceReader_; + std::thread thread_; + std::atomic stopRequested_ = false; + std::mutex frameMutex_; + std::vector latestFrame_; + int width_ = 0; + int height_ = 0; + int fps_ = 30; + bool mfStarted_ = false; + std::wstring selectedDeviceName_; +}; diff --git a/package.json b/package.json index 8ff2cb5..0d64e14 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,8 @@ "test:wgc-audio:win": "node scripts/test-windows-wgc-helper.mjs --system-audio", "test:wgc-mic:win": "node scripts/test-windows-wgc-helper.mjs --microphone", "test:wgc-mixed-audio:win": "node scripts/test-windows-wgc-helper.mjs --system-audio --microphone", + "test:wgc-webcam:win": "node scripts/test-windows-wgc-helper.mjs --webcam", + "test:wgc-full:win": "node scripts/test-windows-wgc-helper.mjs --webcam --system-audio --microphone", "capture:openscreen-preview": "node scripts/capture-openscreen-preview.mjs", "build-vite": "tsc && vite build", "test:browser": "vitest --config vitest.browser.config.ts --run", diff --git a/scripts/test-windows-wgc-helper.mjs b/scripts/test-windows-wgc-helper.mjs index 6b5a626..3bdba57 100644 --- a/scripts/test-windows-wgc-helper.mjs +++ b/scripts/test-windows-wgc-helper.mjs @@ -21,6 +21,8 @@ const WITH_MICROPHONE = process.argv.includes("--mic"); const WITH_WINDOW = process.env.OPENSCREEN_WGC_TEST_WINDOW === "true" || process.argv.includes("--window"); +const WITH_WEBCAM = + process.env.OPENSCREEN_WGC_TEST_WEBCAM === "true" || process.argv.includes("--webcam"); function runHelper(config) { return new Promise((resolve, reject) => { @@ -31,21 +33,34 @@ function runHelper(config) { let stdout = ""; let stderr = ""; + let stopTimer = null; + const scheduleStop = () => { + if (stopTimer) { + return; + } + stopTimer = setTimeout(() => { + child.stdin.write("stop\n"); + }, DURATION_MS); + }; + const fallbackTimer = setTimeout(scheduleStop, 15_000); child.stdout.on("data", (chunk) => { stdout += chunk.toString(); + if (stdout.includes('"recording-started"') || stdout.includes("Recording started")) { + scheduleStop(); + } }); child.stderr.on("data", (chunk) => { stderr += chunk.toString(); }); child.once("error", reject); child.once("exit", (code) => { + clearTimeout(fallbackTimer); + if (stopTimer) { + clearTimeout(stopTimer); + } resolve({ code, stdout, stderr }); }); - - setTimeout(() => { - child.stdin.write("stop\n"); - }, DURATION_MS); }); } @@ -149,7 +164,7 @@ if (!fs.existsSync(HELPER_PATH)) { const outputPath = path.join( os.tmpdir(), - `openscreen-wgc-helper-${WITH_WINDOW ? "window" : WITH_SYSTEM_AUDIO || WITH_MICROPHONE ? "audio" : "video"}-${process.pid}-${Date.now()}-${randomUUID()}.mp4`, + `openscreen-wgc-helper-${WITH_WEBCAM ? "webcam" : WITH_WINDOW ? "window" : WITH_SYSTEM_AUDIO || WITH_MICROPHONE ? "audio" : "video"}-${process.pid}-${Date.now()}-${randomUUID()}.mp4`, ); const fixtureWindow = WITH_WINDOW ? await startFixtureWindow() : null; @@ -173,7 +188,11 @@ const config = { captureMic: WITH_MICROPHONE, microphoneDeviceId: "default", microphoneGain: 1.4, - webcamEnabled: false, + webcamEnabled: WITH_WEBCAM, + webcamDeviceId: process.env.OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_ID ?? "", + webcamWidth: 640, + webcamHeight: 360, + webcamFps: 30, outputs: { screenPath: outputPath }, }; @@ -186,6 +205,15 @@ try { } } if (result.code !== 0) { + if ( + WITH_WEBCAM && + /No native Windows webcam devices were found|Failed to initialize native webcam/.test( + result.stderr, + ) + ) { + console.log("Skipping WGC webcam smoke test: no native Windows webcam device is available."); + process.exit(0); + } throw new Error(`WGC helper exited with ${result.code}\n${result.stdout}\n${result.stderr}`); } if (!fs.existsSync(outputPath) || fs.statSync(outputPath).size === 0) { diff --git a/src/hooks/useScreenRecorder.ts b/src/hooks/useScreenRecorder.ts index 88ba90a..3947954 100644 --- a/src/hooks/useScreenRecorder.ts +++ b/src/hooks/useScreenRecorder.ts @@ -182,6 +182,20 @@ export function useScreenRecorder(): UseScreenRecorderReturn { } }, []); + const stopWebcamPreviewStream = useCallback(() => { + if (!webcamStream.current) { + return; + } + + webcamAcquireId.current++; + webcamStream.current.getTracks().forEach((track) => { + track.onended = null; + track.stop(); + }); + webcamStream.current = null; + webcamReady.current = true; + }, []); + const setWebcamEnabled = useCallback( async (enabled: boolean) => { if (!enabled) { @@ -577,6 +591,9 @@ export function useScreenRecorder(): UseScreenRecorderReturn { const displayId = Number(selectedSource.display_id); const sourceType = selectedSource.id.startsWith("window:") ? "window" : "display"; const windowHandle = parseWindowHandleFromSourceId(selectedSource.id); + if (webcamEnabled) { + stopWebcamPreviewStream(); + } const request: NativeWindowsRecordingRequest = { recordingId: activeRecordingId, source: {