feat: add native Windows webcam composition
This commit is contained in:
@@ -9,7 +9,7 @@ OpenScreen's Windows recorder should be owned by one native backend. Electron ca
|
||||
- Capture system audio through WASAPI loopback.
|
||||
- Capture microphone audio through WASAPI.
|
||||
- Mix system audio and microphone audio into the primary screen recording.
|
||||
- Capture webcam video natively and keep it as a separate editable OpenScreen media stream.
|
||||
- Capture webcam video natively and compose it into the Windows helper MP4 during the native-recording migration.
|
||||
- Keep preview/export aligned because screen video, audio, webcam, and cursor share one native timing origin.
|
||||
- Keep exported MP4s Windows-friendly: H.264 video plus AAC audio. Opus-in-MP4 is not an acceptable Windows export target.
|
||||
- Package the native helper with the Windows app.
|
||||
@@ -17,7 +17,7 @@ OpenScreen's Windows recorder should be owned by one native backend. Electron ca
|
||||
## Non-Goals
|
||||
|
||||
- Replacing the editor/export pipeline.
|
||||
- Flattening webcam into the screen recording. The editor currently treats webcam as editable picture-in-picture media, so the native recorder should preserve a separate `webcamVideoPath`.
|
||||
- Replacing the editor/export pipeline. A later pass can reintroduce a separate editable native `webcamVideoPath`; the current Windows-native milestone prioritizes a helper-owned multi-flux MP4 with deterministic screen/audio/mic/webcam sync.
|
||||
- Adding a native fallback for macOS or Linux in this branch.
|
||||
|
||||
## Target Architecture
|
||||
@@ -78,7 +78,6 @@ The helper receives a single JSON argument:
|
||||
},
|
||||
"outputs": {
|
||||
"screenPath": "C:\\Users\\me\\recording-123.mp4",
|
||||
"webcamPath": "C:\\Users\\me\\recording-123-webcam.mp4",
|
||||
"manifestPath": "C:\\Users\\me\\recording-123.session.json"
|
||||
}
|
||||
}
|
||||
@@ -90,7 +89,7 @@ The helper emits newline-delimited JSON events to stdout:
|
||||
{ "event": "ready", "schemaVersion": 2 }
|
||||
{ "event": "recording-started", "timestampMs": 1234567890 }
|
||||
{ "event": "warning", "code": "audio-device-unavailable", "message": "..." }
|
||||
{ "event": "recording-stopped", "screenPath": "...", "webcamPath": "..." }
|
||||
{ "event": "recording-stopped", "screenPath": "..." }
|
||||
{ "event": "error", "code": "unsupported-window-source", "message": "..." }
|
||||
```
|
||||
|
||||
@@ -153,15 +152,16 @@ Acceptance:
|
||||
### 4. Webcam Capture
|
||||
|
||||
- Add Media Foundation webcam source reader.
|
||||
- Select 1280x720/30fps or nearest supported format.
|
||||
- Encode webcam to `recording-<id>-webcam.mp4`.
|
||||
- Synchronize webcam timestamps to the native session clock.
|
||||
- Store `webcamVideoPath` in the OpenScreen session manifest.
|
||||
- Select requested dimensions/fps or the nearest format accepted by Media Foundation.
|
||||
- Convert webcam samples to BGRA and compose them into the primary helper MP4 as an initial bottom-right picture-in-picture overlay.
|
||||
- Keep the helper process as the SSOT for screen/window, WASAPI system audio, microphone, webcam, and mux timing.
|
||||
- Later: promote the same webcam capture source to a separate editable native `webcamVideoPath` if product requirements need post-recording layout edits.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Editor loads the native screen recording and the native webcam recording.
|
||||
- Webcam layout controls behave the same as today.
|
||||
- Native display/window recordings can include webcam without returning to Electron capture.
|
||||
- `npm run test:wgc-webcam:win` validates the helper path when a webcam is available and skips explicitly when no webcam device exists.
|
||||
- Combined webcam + system audio + microphone produces one MP4 with H.264 video and AAC audio.
|
||||
|
||||
### 5. Native Window Capture
|
||||
|
||||
|
||||
@@ -35,15 +35,18 @@ Current V2 JSON shape:
|
||||
"captureMic": false,
|
||||
"microphoneDeviceId": "default",
|
||||
"microphoneGain": 1.4,
|
||||
"webcamEnabled": false,
|
||||
"webcamEnabled": true,
|
||||
"webcamDeviceId": "default",
|
||||
"webcamWidth": 1280,
|
||||
"webcamHeight": 720,
|
||||
"webcamFps": 30,
|
||||
"outputs": {
|
||||
"screenPath": "C:\\path\\recording-123.mp4",
|
||||
"webcamPath": "C:\\path\\recording-123-webcam.mp4"
|
||||
"screenPath": "C:\\path\\recording-123.mp4"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The current helper implementation supports display/window video capture, system audio loopback, and initial default-microphone capture. Webcam capture now fails explicitly in the helper rather than silently falling back to Electron capture on Windows. See `docs/engineering/windows-native-recorder-roadmap.md` for the phased implementation plan.
|
||||
The current helper implementation supports display/window video capture, system audio loopback, default-microphone capture, and Media Foundation webcam capture. Webcam frames are currently composed into the primary MP4 as a bottom-right picture-in-picture overlay. Browser `deviceId` values do not always map to Media Foundation symbolic links; when the requested webcam is not matched, the helper logs a warning and uses the default webcam.
|
||||
|
||||
Smoke-test the helper with:
|
||||
|
||||
@@ -53,4 +56,5 @@ npm run test:wgc-window:win
|
||||
npm run test:wgc-audio:win
|
||||
npm run test:wgc-mic:win
|
||||
npm run test:wgc-mixed-audio:win
|
||||
npm run test:wgc-webcam:win
|
||||
```
|
||||
|
||||
@@ -23,6 +23,8 @@ add_executable(wgc-capture
|
||||
src/monitor_utils.h
|
||||
src/wasapi_loopback_capture.cpp
|
||||
src/wasapi_loopback_capture.h
|
||||
src/webcam_capture.cpp
|
||||
src/webcam_capture.h
|
||||
src/wgc_session.cpp
|
||||
src/wgc_session.h
|
||||
)
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#include "mf_encoder.h"
|
||||
#include "monitor_utils.h"
|
||||
#include "wasapi_loopback_capture.h"
|
||||
#include "webcam_capture.h"
|
||||
#include "wgc_session.h"
|
||||
|
||||
#include <winrt/Windows.Foundation.h>
|
||||
@@ -303,11 +304,6 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
std::cout << "{\"event\":\"ready\",\"schemaVersion\":2}" << std::endl;
|
||||
|
||||
if (config.webcamEnabled) {
|
||||
std::cerr << "ERROR: Native webcam capture is not implemented in this helper yet" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
WgcSession session;
|
||||
if (config.sourceType == "display") {
|
||||
HMONITOR monitor = findMonitorForCapture(
|
||||
@@ -347,6 +343,22 @@ int main(int argc, char* argv[]) {
|
||||
const int pixels = width * height;
|
||||
const int bitrate = pixels >= 3840 * 2160 ? 45'000'000 : pixels >= 2560 * 1440 ? 28'000'000 : 18'000'000;
|
||||
|
||||
WebcamCapture webcamCapture;
|
||||
bool webcamActive = false;
|
||||
if (config.webcamEnabled) {
|
||||
if (!webcamCapture.initialize(
|
||||
utf8ToWide(config.webcamDeviceId),
|
||||
config.webcamWidth,
|
||||
config.webcamHeight,
|
||||
config.webcamFps > 0 ? config.webcamFps : config.fps)) {
|
||||
std::cerr << "ERROR: Failed to initialize native webcam capture" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::cout << "{\"event\":\"webcam-format\",\"schemaVersion\":2,\"width\":" << webcamCapture.width()
|
||||
<< ",\"height\":" << webcamCapture.height()
|
||||
<< ",\"fps\":" << webcamCapture.fps() << "}" << std::endl;
|
||||
}
|
||||
|
||||
WasapiLoopbackCapture loopbackCapture;
|
||||
WasapiLoopbackCapture microphoneCapture;
|
||||
const AudioInputFormat* audioFormat = nullptr;
|
||||
@@ -398,6 +410,9 @@ int main(int argc, char* argv[]) {
|
||||
std::atomic<bool> firstFrameWritten = false;
|
||||
std::atomic<bool> encodeFailed = false;
|
||||
Microsoft::WRL::ComPtr<ID3D11Texture2D> latestFrameTexture;
|
||||
std::vector<BYTE> latestWebcamFrame;
|
||||
int latestWebcamWidth = 0;
|
||||
int latestWebcamHeight = 0;
|
||||
|
||||
session.setFrameCallback([&](ID3D11Texture2D* texture, int64_t timestampHns) {
|
||||
(void)timestampHns;
|
||||
@@ -433,9 +448,18 @@ int main(int argc, char* argv[]) {
|
||||
while (!stopRequested && !encodeFailed) {
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
if (webcamActive) {
|
||||
webcamCapture.copyLatestFrame(latestWebcamFrame, latestWebcamWidth, latestWebcamHeight);
|
||||
}
|
||||
const BgraFrameView webcamFrame{
|
||||
latestWebcamFrame.empty() ? nullptr : latestWebcamFrame.data(),
|
||||
latestWebcamWidth,
|
||||
latestWebcamHeight,
|
||||
};
|
||||
if (latestFrameTexture && !encoder.writeFrame(
|
||||
latestFrameTexture.Get(),
|
||||
static_cast<int64_t>((frameIndex * 10'000'000ULL) / config.fps))) {
|
||||
static_cast<int64_t>((frameIndex * 10'000'000ULL) / config.fps),
|
||||
webcamFrame.data ? &webcamFrame : nullptr)) {
|
||||
encodeFailed = true;
|
||||
stopRequested = true;
|
||||
cv.notify_all();
|
||||
@@ -528,8 +552,30 @@ int main(int argc, char* argv[]) {
|
||||
if (!startAudioCaptures()) {
|
||||
return 1;
|
||||
}
|
||||
if (config.webcamEnabled) {
|
||||
if (!webcamCapture.start()) {
|
||||
microphoneCapture.stop();
|
||||
loopbackCapture.stop();
|
||||
if (audioMixer) {
|
||||
audioMixer->stop();
|
||||
}
|
||||
std::cerr << "ERROR: Failed to start native webcam capture" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
webcamActive = true;
|
||||
const auto webcamDeadline = std::chrono::steady_clock::now() + std::chrono::seconds(3);
|
||||
while (std::chrono::steady_clock::now() < webcamDeadline &&
|
||||
!webcamCapture.copyLatestFrame(latestWebcamFrame, latestWebcamWidth, latestWebcamHeight)) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(20));
|
||||
}
|
||||
if (latestWebcamFrame.empty()) {
|
||||
std::cerr << "WARNING: Native webcam started but no frame was available before screen capture"
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if (!session.start()) {
|
||||
webcamCapture.stop();
|
||||
microphoneCapture.stop();
|
||||
loopbackCapture.stop();
|
||||
if (audioMixer) {
|
||||
@@ -554,6 +600,7 @@ int main(int argc, char* argv[]) {
|
||||
}
|
||||
microphoneCapture.stop();
|
||||
loopbackCapture.stop();
|
||||
webcamCapture.stop();
|
||||
if (audioMixer) {
|
||||
audioMixer->stop();
|
||||
}
|
||||
@@ -580,6 +627,7 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
microphoneCapture.stop();
|
||||
loopbackCapture.stop();
|
||||
webcamCapture.stop();
|
||||
if (audioMixer) {
|
||||
audioMixer->stop();
|
||||
}
|
||||
|
||||
@@ -38,6 +38,43 @@ void setAudioFormat(IMFMediaType* type, UINT32 channels, UINT32 sampleRate, UINT
|
||||
type->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, bitsPerSample);
|
||||
}
|
||||
|
||||
void compositeWebcam(BYTE* destination, int width, int height, const BgraFrameView& webcamFrame) {
|
||||
if (!webcamFrame.data || webcamFrame.width <= 0 || webcamFrame.height <= 0 || width <= 0 || height <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int margin = std::max(16, std::min(width, height) / 60);
|
||||
const int maxOverlayWidth = std::max(2, width / 4);
|
||||
int overlayWidth = maxOverlayWidth;
|
||||
int overlayHeight = static_cast<int>(
|
||||
(static_cast<int64_t>(overlayWidth) * webcamFrame.height) / std::max(1, webcamFrame.width));
|
||||
const int maxOverlayHeight = std::max(2, height / 3);
|
||||
if (overlayHeight > maxOverlayHeight) {
|
||||
overlayHeight = maxOverlayHeight;
|
||||
overlayWidth = static_cast<int>(
|
||||
(static_cast<int64_t>(overlayHeight) * webcamFrame.width) / std::max(1, webcamFrame.height));
|
||||
}
|
||||
|
||||
overlayWidth = std::max(2, std::min(overlayWidth, width - margin * 2));
|
||||
overlayHeight = std::max(2, std::min(overlayHeight, height - margin * 2));
|
||||
const int originX = std::max(0, width - overlayWidth - margin);
|
||||
const int originY = std::max(0, height - overlayHeight - margin);
|
||||
|
||||
for (int y = 0; y < overlayHeight; y += 1) {
|
||||
const int sourceY = static_cast<int>((static_cast<int64_t>(y) * webcamFrame.height) / overlayHeight);
|
||||
BYTE* destinationRow = destination + ((originY + y) * width + originX) * 4;
|
||||
for (int x = 0; x < overlayWidth; x += 1) {
|
||||
const int sourceX = static_cast<int>((static_cast<int64_t>(x) * webcamFrame.width) / overlayWidth);
|
||||
const BYTE* source = webcamFrame.data + (sourceY * webcamFrame.width + sourceX) * 4;
|
||||
BYTE* target = destinationRow + x * 4;
|
||||
target[0] = source[0];
|
||||
target[1] = source[1];
|
||||
target[2] = source[2];
|
||||
target[3] = 255;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
MFEncoder::~MFEncoder() {
|
||||
@@ -179,7 +216,11 @@ bool MFEncoder::ensureStagingTexture(ID3D11Texture2D* texture) {
|
||||
"CreateTexture2D(staging)");
|
||||
}
|
||||
|
||||
bool MFEncoder::copyFrameToBuffer(ID3D11Texture2D* texture, BYTE* destination, DWORD destinationSize) {
|
||||
bool MFEncoder::copyFrameToBuffer(
|
||||
ID3D11Texture2D* texture,
|
||||
BYTE* destination,
|
||||
DWORD destinationSize,
|
||||
const BgraFrameView* webcamFrame) {
|
||||
if (!ensureStagingTexture(texture)) {
|
||||
return false;
|
||||
}
|
||||
@@ -203,12 +244,15 @@ bool MFEncoder::copyFrameToBuffer(ID3D11Texture2D* texture, BYTE* destination, D
|
||||
for (int y = 0; y < height_; y += 1) {
|
||||
std::memcpy(destination + rowBytes * y, source + mapped.RowPitch * y, rowBytes);
|
||||
}
|
||||
if (webcamFrame) {
|
||||
compositeWebcam(destination, width_, height_, *webcamFrame);
|
||||
}
|
||||
|
||||
context_->Unmap(stagingTexture_.Get(), 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MFEncoder::writeFrame(ID3D11Texture2D* texture, int64_t timestampHns) {
|
||||
bool MFEncoder::writeFrame(ID3D11Texture2D* texture, int64_t timestampHns, const BgraFrameView* webcamFrame) {
|
||||
std::scoped_lock writerLock(writerMutex_);
|
||||
if (!sinkWriter_ || finalized_) {
|
||||
return false;
|
||||
@@ -238,7 +282,7 @@ bool MFEncoder::writeFrame(ID3D11Texture2D* texture, int64_t timestampHns) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool copied = copyFrameToBuffer(texture, data, maxLength);
|
||||
const bool copied = copyFrameToBuffer(texture, data, maxLength, webcamFrame);
|
||||
buffer->Unlock();
|
||||
if (!copied) {
|
||||
return false;
|
||||
|
||||
@@ -11,6 +11,12 @@
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
|
||||
struct BgraFrameView {
|
||||
const BYTE* data = nullptr;
|
||||
int width = 0;
|
||||
int height = 0;
|
||||
};
|
||||
|
||||
struct AudioInputFormat {
|
||||
GUID subtype = MFAudioFormat_PCM;
|
||||
UINT32 sampleRate = 0;
|
||||
@@ -37,13 +43,17 @@ public:
|
||||
ID3D11Device* device,
|
||||
ID3D11DeviceContext* context,
|
||||
const AudioInputFormat* audioFormat = nullptr);
|
||||
bool writeFrame(ID3D11Texture2D* texture, int64_t timestampHns);
|
||||
bool writeFrame(ID3D11Texture2D* texture, int64_t timestampHns, const BgraFrameView* webcamFrame = nullptr);
|
||||
bool writeAudio(const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns);
|
||||
bool finalize();
|
||||
|
||||
private:
|
||||
bool ensureStagingTexture(ID3D11Texture2D* texture);
|
||||
bool copyFrameToBuffer(ID3D11Texture2D* texture, BYTE* destination, DWORD destinationSize);
|
||||
bool copyFrameToBuffer(
|
||||
ID3D11Texture2D* texture,
|
||||
BYTE* destination,
|
||||
DWORD destinationSize,
|
||||
const BgraFrameView* webcamFrame);
|
||||
bool configureAudioStream(const AudioInputFormat& audioFormat);
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFSinkWriter> sinkWriter_;
|
||||
|
||||
@@ -0,0 +1,275 @@
|
||||
#include "webcam_capture.h"
|
||||
|
||||
#include <mfapi.h>
|
||||
#include <mferror.h>
|
||||
#include <propvarutil.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
|
||||
namespace {
|
||||
|
||||
bool succeeded(HRESULT hr, const char* label) {
|
||||
if (SUCCEEDED(hr)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::cerr << "ERROR: " << label << " failed (hr=0x" << std::hex << hr << std::dec << ")"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::wstring readAllocatedString(IMFActivate* activate, REFGUID key) {
|
||||
WCHAR* value = nullptr;
|
||||
UINT32 length = 0;
|
||||
if (FAILED(activate->GetAllocatedString(key, &value, &length)) || !value) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::wstring result(value, value + length);
|
||||
CoTaskMemFree(value);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool containsInsensitive(const std::wstring& haystack, const std::wstring& needle) {
|
||||
if (haystack.empty() || needle.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::wstring lowerHaystack = haystack;
|
||||
std::wstring lowerNeedle = needle;
|
||||
std::transform(lowerHaystack.begin(), lowerHaystack.end(), lowerHaystack.begin(), ::towlower);
|
||||
std::transform(lowerNeedle.begin(), lowerNeedle.end(), lowerNeedle.begin(), ::towlower);
|
||||
return lowerHaystack.find(lowerNeedle) != std::wstring::npos ||
|
||||
lowerNeedle.find(lowerHaystack) != std::wstring::npos;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
WebcamCapture::~WebcamCapture() {
|
||||
stop();
|
||||
}
|
||||
|
||||
bool WebcamCapture::initialize(const std::wstring& deviceId, int requestedWidth, int requestedHeight, int requestedFps) {
|
||||
fps_ = std::clamp(requestedFps > 0 ? requestedFps : 30, 1, 60);
|
||||
if (!succeeded(MFStartup(MF_VERSION), "MFStartup(webcam)")) {
|
||||
return false;
|
||||
}
|
||||
mfStarted_ = true;
|
||||
if (!selectDevice(deviceId)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return configureReader(requestedWidth, requestedHeight, fps_);
|
||||
}
|
||||
|
||||
bool WebcamCapture::selectDevice(const std::wstring& deviceId) {
|
||||
Microsoft::WRL::ComPtr<IMFAttributes> attributes;
|
||||
if (!succeeded(MFCreateAttributes(&attributes, 1), "MFCreateAttributes(webcam enumeration)")) {
|
||||
return false;
|
||||
}
|
||||
if (!succeeded(attributes->SetGUID(
|
||||
MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE,
|
||||
MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_GUID),
|
||||
"SetGUID(webcam source type)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
IMFActivate** devices = nullptr;
|
||||
UINT32 deviceCount = 0;
|
||||
HRESULT hr = MFEnumDeviceSources(attributes.Get(), &devices, &deviceCount);
|
||||
if (!succeeded(hr, "MFEnumDeviceSources") || deviceCount == 0) {
|
||||
if (devices) {
|
||||
CoTaskMemFree(devices);
|
||||
}
|
||||
std::cerr << "ERROR: No native Windows webcam devices were found" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
UINT32 selectedIndex = 0;
|
||||
for (UINT32 index = 0; index < deviceCount; index += 1) {
|
||||
const std::wstring name = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME);
|
||||
const std::wstring symbolicLink = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_SYMBOLIC_LINK);
|
||||
if (!deviceId.empty() && (containsInsensitive(symbolicLink, deviceId) || containsInsensitive(name, deviceId))) {
|
||||
selectedIndex = index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!deviceId.empty() && selectedIndex == 0) {
|
||||
const std::wstring firstName = readAllocatedString(devices[0], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME);
|
||||
const std::wstring firstLink = readAllocatedString(devices[0], MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_SYMBOLIC_LINK);
|
||||
if (!containsInsensitive(firstLink, deviceId) && !containsInsensitive(firstName, deviceId)) {
|
||||
std::cerr << "WARNING: Requested webcam device was not found by Media Foundation; using default webcam"
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
selectedDeviceName_ = readAllocatedString(devices[selectedIndex], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME);
|
||||
hr = devices[selectedIndex]->ActivateObject(IID_PPV_ARGS(&mediaSource_));
|
||||
|
||||
for (UINT32 index = 0; index < deviceCount; index += 1) {
|
||||
devices[index]->Release();
|
||||
}
|
||||
CoTaskMemFree(devices);
|
||||
|
||||
return succeeded(hr, "ActivateObject(webcam)");
|
||||
}
|
||||
|
||||
bool WebcamCapture::configureReader(int requestedWidth, int requestedHeight, int requestedFps) {
|
||||
Microsoft::WRL::ComPtr<IMFAttributes> attributes;
|
||||
if (!succeeded(MFCreateAttributes(&attributes, 2), "MFCreateAttributes(webcam reader)")) {
|
||||
return false;
|
||||
}
|
||||
attributes->SetUINT32(MF_SOURCE_READER_ENABLE_VIDEO_PROCESSING, TRUE);
|
||||
attributes->SetUINT32(MF_READWRITE_DISABLE_CONVERTERS, FALSE);
|
||||
|
||||
if (!succeeded(MFCreateSourceReaderFromMediaSource(mediaSource_.Get(), attributes.Get(), &sourceReader_),
|
||||
"MFCreateSourceReaderFromMediaSource(webcam)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFMediaType> mediaType;
|
||||
if (!succeeded(MFCreateMediaType(&mediaType), "MFCreateMediaType(webcam output)")) {
|
||||
return false;
|
||||
}
|
||||
mediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
|
||||
mediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_RGB32);
|
||||
if (requestedWidth > 0 && requestedHeight > 0) {
|
||||
MFSetAttributeSize(mediaType.Get(), MF_MT_FRAME_SIZE, static_cast<UINT32>(requestedWidth), static_cast<UINT32>(requestedHeight));
|
||||
}
|
||||
MFSetAttributeRatio(mediaType.Get(), MF_MT_FRAME_RATE, static_cast<UINT32>(std::max(1, requestedFps)), 1);
|
||||
|
||||
if (!succeeded(sourceReader_->SetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, nullptr, mediaType.Get()),
|
||||
"SetCurrentMediaType(webcam RGB32)")) {
|
||||
return false;
|
||||
}
|
||||
sourceReader_->SetStreamSelection(MF_SOURCE_READER_ALL_STREAMS, FALSE);
|
||||
sourceReader_->SetStreamSelection(MF_SOURCE_READER_FIRST_VIDEO_STREAM, TRUE);
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFMediaType> currentType;
|
||||
if (!succeeded(sourceReader_->GetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, ¤tType),
|
||||
"GetCurrentMediaType(webcam)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
UINT32 width = 0;
|
||||
UINT32 height = 0;
|
||||
if (FAILED(MFGetAttributeSize(currentType.Get(), MF_MT_FRAME_SIZE, &width, &height)) || width == 0 || height == 0) {
|
||||
width = static_cast<UINT32>(requestedWidth > 0 ? requestedWidth : 1280);
|
||||
height = static_cast<UINT32>(requestedHeight > 0 ? requestedHeight : 720);
|
||||
}
|
||||
width_ = static_cast<int>(width);
|
||||
height_ = static_cast<int>(height);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool WebcamCapture::start() {
|
||||
if (!sourceReader_ || thread_.joinable()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
stopRequested_ = false;
|
||||
thread_ = std::thread(&WebcamCapture::captureLoop, this);
|
||||
return true;
|
||||
}
|
||||
|
||||
void WebcamCapture::stop() {
|
||||
stopRequested_ = true;
|
||||
if (thread_.joinable()) {
|
||||
thread_.join();
|
||||
}
|
||||
if (mediaSource_) {
|
||||
mediaSource_->Shutdown();
|
||||
}
|
||||
sourceReader_.Reset();
|
||||
mediaSource_.Reset();
|
||||
if (mfStarted_) {
|
||||
MFShutdown();
|
||||
mfStarted_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
void WebcamCapture::captureLoop() {
|
||||
CoInitializeEx(nullptr, COINIT_MULTITHREADED);
|
||||
|
||||
while (!stopRequested_) {
|
||||
DWORD streamIndex = 0;
|
||||
DWORD flags = 0;
|
||||
LONGLONG timestamp = 0;
|
||||
Microsoft::WRL::ComPtr<IMFSample> sample;
|
||||
HRESULT hr = sourceReader_->ReadSample(
|
||||
MF_SOURCE_READER_FIRST_VIDEO_STREAM,
|
||||
0,
|
||||
&streamIndex,
|
||||
&flags,
|
||||
×tamp,
|
||||
&sample);
|
||||
(void)streamIndex;
|
||||
(void)timestamp;
|
||||
|
||||
if (FAILED(hr)) {
|
||||
std::cerr << "WARNING: Failed to read webcam sample (hr=0x" << std::hex << hr << std::dec << ")"
|
||||
<< std::endl;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(20));
|
||||
continue;
|
||||
}
|
||||
if ((flags & MF_SOURCE_READERF_ENDOFSTREAM) != 0) {
|
||||
break;
|
||||
}
|
||||
if (!sample) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFMediaBuffer> buffer;
|
||||
if (FAILED(sample->ConvertToContiguousBuffer(&buffer)) || !buffer) {
|
||||
continue;
|
||||
}
|
||||
|
||||
BYTE* data = nullptr;
|
||||
DWORD maxLength = 0;
|
||||
DWORD currentLength = 0;
|
||||
if (FAILED(buffer->Lock(&data, &maxLength, ¤tLength)) || !data) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const DWORD expectedLength = static_cast<DWORD>(std::max(0, width_) * std::max(0, height_) * 4);
|
||||
if (currentLength >= expectedLength && expectedLength > 0) {
|
||||
std::scoped_lock lock(frameMutex_);
|
||||
latestFrame_.assign(data, data + expectedLength);
|
||||
}
|
||||
|
||||
buffer->Unlock();
|
||||
}
|
||||
|
||||
CoUninitialize();
|
||||
}
|
||||
|
||||
bool WebcamCapture::copyLatestFrame(std::vector<BYTE>& destination, int& width, int& height) {
|
||||
std::scoped_lock lock(frameMutex_);
|
||||
if (latestFrame_.empty() || width_ <= 0 || height_ <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
destination = latestFrame_;
|
||||
width = width_;
|
||||
height = height_;
|
||||
return true;
|
||||
}
|
||||
|
||||
int WebcamCapture::width() const {
|
||||
return width_;
|
||||
}
|
||||
|
||||
int WebcamCapture::height() const {
|
||||
return height_;
|
||||
}
|
||||
|
||||
int WebcamCapture::fps() const {
|
||||
return fps_;
|
||||
}
|
||||
|
||||
const std::wstring& WebcamCapture::selectedDeviceName() const {
|
||||
return selectedDeviceName_;
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
#pragma once
|
||||
|
||||
#include <Windows.h>
|
||||
#include <mfidl.h>
|
||||
#include <mfreadwrite.h>
|
||||
#include <wrl/client.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
class WebcamCapture {
|
||||
public:
|
||||
WebcamCapture() = default;
|
||||
~WebcamCapture();
|
||||
|
||||
WebcamCapture(const WebcamCapture&) = delete;
|
||||
WebcamCapture& operator=(const WebcamCapture&) = delete;
|
||||
|
||||
bool initialize(const std::wstring& deviceId, int requestedWidth, int requestedHeight, int requestedFps);
|
||||
bool start();
|
||||
void stop();
|
||||
bool copyLatestFrame(std::vector<BYTE>& destination, int& width, int& height);
|
||||
|
||||
int width() const;
|
||||
int height() const;
|
||||
int fps() const;
|
||||
const std::wstring& selectedDeviceName() const;
|
||||
|
||||
private:
|
||||
bool selectDevice(const std::wstring& deviceId);
|
||||
bool configureReader(int requestedWidth, int requestedHeight, int requestedFps);
|
||||
void captureLoop();
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFMediaSource> mediaSource_;
|
||||
Microsoft::WRL::ComPtr<IMFSourceReader> sourceReader_;
|
||||
std::thread thread_;
|
||||
std::atomic<bool> stopRequested_ = false;
|
||||
std::mutex frameMutex_;
|
||||
std::vector<BYTE> latestFrame_;
|
||||
int width_ = 0;
|
||||
int height_ = 0;
|
||||
int fps_ = 30;
|
||||
bool mfStarted_ = false;
|
||||
std::wstring selectedDeviceName_;
|
||||
};
|
||||
@@ -32,6 +32,8 @@
|
||||
"test:wgc-audio:win": "node scripts/test-windows-wgc-helper.mjs --system-audio",
|
||||
"test:wgc-mic:win": "node scripts/test-windows-wgc-helper.mjs --microphone",
|
||||
"test:wgc-mixed-audio:win": "node scripts/test-windows-wgc-helper.mjs --system-audio --microphone",
|
||||
"test:wgc-webcam:win": "node scripts/test-windows-wgc-helper.mjs --webcam",
|
||||
"test:wgc-full:win": "node scripts/test-windows-wgc-helper.mjs --webcam --system-audio --microphone",
|
||||
"capture:openscreen-preview": "node scripts/capture-openscreen-preview.mjs",
|
||||
"build-vite": "tsc && vite build",
|
||||
"test:browser": "vitest --config vitest.browser.config.ts --run",
|
||||
|
||||
@@ -21,6 +21,8 @@ const WITH_MICROPHONE =
|
||||
process.argv.includes("--mic");
|
||||
const WITH_WINDOW =
|
||||
process.env.OPENSCREEN_WGC_TEST_WINDOW === "true" || process.argv.includes("--window");
|
||||
const WITH_WEBCAM =
|
||||
process.env.OPENSCREEN_WGC_TEST_WEBCAM === "true" || process.argv.includes("--webcam");
|
||||
|
||||
function runHelper(config) {
|
||||
return new Promise((resolve, reject) => {
|
||||
@@ -31,21 +33,34 @@ function runHelper(config) {
|
||||
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
let stopTimer = null;
|
||||
const scheduleStop = () => {
|
||||
if (stopTimer) {
|
||||
return;
|
||||
}
|
||||
stopTimer = setTimeout(() => {
|
||||
child.stdin.write("stop\n");
|
||||
}, DURATION_MS);
|
||||
};
|
||||
const fallbackTimer = setTimeout(scheduleStop, 15_000);
|
||||
|
||||
child.stdout.on("data", (chunk) => {
|
||||
stdout += chunk.toString();
|
||||
if (stdout.includes('"recording-started"') || stdout.includes("Recording started")) {
|
||||
scheduleStop();
|
||||
}
|
||||
});
|
||||
child.stderr.on("data", (chunk) => {
|
||||
stderr += chunk.toString();
|
||||
});
|
||||
child.once("error", reject);
|
||||
child.once("exit", (code) => {
|
||||
clearTimeout(fallbackTimer);
|
||||
if (stopTimer) {
|
||||
clearTimeout(stopTimer);
|
||||
}
|
||||
resolve({ code, stdout, stderr });
|
||||
});
|
||||
|
||||
setTimeout(() => {
|
||||
child.stdin.write("stop\n");
|
||||
}, DURATION_MS);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -149,7 +164,7 @@ if (!fs.existsSync(HELPER_PATH)) {
|
||||
|
||||
const outputPath = path.join(
|
||||
os.tmpdir(),
|
||||
`openscreen-wgc-helper-${WITH_WINDOW ? "window" : WITH_SYSTEM_AUDIO || WITH_MICROPHONE ? "audio" : "video"}-${process.pid}-${Date.now()}-${randomUUID()}.mp4`,
|
||||
`openscreen-wgc-helper-${WITH_WEBCAM ? "webcam" : WITH_WINDOW ? "window" : WITH_SYSTEM_AUDIO || WITH_MICROPHONE ? "audio" : "video"}-${process.pid}-${Date.now()}-${randomUUID()}.mp4`,
|
||||
);
|
||||
|
||||
const fixtureWindow = WITH_WINDOW ? await startFixtureWindow() : null;
|
||||
@@ -173,7 +188,11 @@ const config = {
|
||||
captureMic: WITH_MICROPHONE,
|
||||
microphoneDeviceId: "default",
|
||||
microphoneGain: 1.4,
|
||||
webcamEnabled: false,
|
||||
webcamEnabled: WITH_WEBCAM,
|
||||
webcamDeviceId: process.env.OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_ID ?? "",
|
||||
webcamWidth: 640,
|
||||
webcamHeight: 360,
|
||||
webcamFps: 30,
|
||||
outputs: { screenPath: outputPath },
|
||||
};
|
||||
|
||||
@@ -186,6 +205,15 @@ try {
|
||||
}
|
||||
}
|
||||
if (result.code !== 0) {
|
||||
if (
|
||||
WITH_WEBCAM &&
|
||||
/No native Windows webcam devices were found|Failed to initialize native webcam/.test(
|
||||
result.stderr,
|
||||
)
|
||||
) {
|
||||
console.log("Skipping WGC webcam smoke test: no native Windows webcam device is available.");
|
||||
process.exit(0);
|
||||
}
|
||||
throw new Error(`WGC helper exited with ${result.code}\n${result.stdout}\n${result.stderr}`);
|
||||
}
|
||||
if (!fs.existsSync(outputPath) || fs.statSync(outputPath).size === 0) {
|
||||
|
||||
@@ -182,6 +182,20 @@ export function useScreenRecorder(): UseScreenRecorderReturn {
|
||||
}
|
||||
}, []);
|
||||
|
||||
const stopWebcamPreviewStream = useCallback(() => {
|
||||
if (!webcamStream.current) {
|
||||
return;
|
||||
}
|
||||
|
||||
webcamAcquireId.current++;
|
||||
webcamStream.current.getTracks().forEach((track) => {
|
||||
track.onended = null;
|
||||
track.stop();
|
||||
});
|
||||
webcamStream.current = null;
|
||||
webcamReady.current = true;
|
||||
}, []);
|
||||
|
||||
const setWebcamEnabled = useCallback(
|
||||
async (enabled: boolean) => {
|
||||
if (!enabled) {
|
||||
@@ -577,6 +591,9 @@ export function useScreenRecorder(): UseScreenRecorderReturn {
|
||||
const displayId = Number(selectedSource.display_id);
|
||||
const sourceType = selectedSource.id.startsWith("window:") ? "window" : "display";
|
||||
const windowHandle = parseWindowHandleFromSourceId(selectedSource.id);
|
||||
if (webcamEnabled) {
|
||||
stopWebcamPreviewStream();
|
||||
}
|
||||
const request: NativeWindowsRecordingRequest = {
|
||||
recordingId: activeRecordingId,
|
||||
source: {
|
||||
|
||||
Reference in New Issue
Block a user