fix: support DirectShow virtual webcams

This commit is contained in:
EtienneLescot
2026-05-05 18:33:48 +02:00
parent fdcd882058
commit 84484d6167
11 changed files with 875 additions and 19 deletions
@@ -40,6 +40,7 @@ The helper owns Windows media capture:
- WASAPI system loopback;
- WASAPI microphone input;
- Media Foundation webcam capture;
- DirectShow webcam fallback for virtual cameras not visible to Media Foundation;
- Media Foundation encoding/muxing;
- stream timestamp normalization.
@@ -135,6 +136,7 @@ SSOT rules for this phase:
- `WgcSession::captureWidth()/captureHeight()` is the encoded screen frame size until a dedicated native scaling stage exists.
- `WasapiLoopbackCapture::inputFormat()` is the runtime audio format source used by `MFEncoder`.
- The renderer passes both the browser webcam `deviceId` and selected display label as `deviceName`; `electron/native/wgc-capture/src/webcam_capture.*` is the only place that maps those values to Media Foundation devices.
- Electron resolves the selected label to a DirectShow filter CLSID once and passes it as `webcamDirectShowClsid`; the helper must not independently guess among DirectShow filters.
- No duplicated hard-coded audio format assumptions in `main.cpp`.
### 3. WASAPI Microphone
@@ -158,6 +160,7 @@ Acceptance:
- Convert webcam samples to BGRA and compose them into the primary helper MP4 as an initial bottom-right picture-in-picture overlay.
- Keep the helper process as the SSOT for screen/window, WASAPI system audio, microphone, webcam, and mux timing.
- Match the requested webcam through Media Foundation friendly names first, then browser device ids/symbolic links, so UI selection remains stable across Chromium and Windows native device namespaces.
- Use the Electron-resolved DirectShow CLSID when the selected virtual camera, for example NVIDIA Broadcast, is registered for DirectShow but absent from Media Foundation enumeration.
- Later: promote the same webcam capture source to a separate editable native `webcamVideoPath` if product requirements need post-recording layout edits.
Acceptance:
+122
View File
@@ -476,6 +476,103 @@ function isWindowsGraphicsCaptureOsSupported() {
return Number.isFinite(build) && build >= 19041;
}
function normalizeNativeDeviceName(value: string) {
return value
.toLowerCase()
.replace(/[^a-z0-9]+/g, " ")
.trim();
}
function scoreNativeDeviceName(candidateName: string, candidateId: string, requestedName?: string) {
const candidate = normalizeNativeDeviceName(candidateName);
const id = normalizeNativeDeviceName(candidateId);
const requested = normalizeNativeDeviceName(requestedName ?? "");
if (!requested) {
return 0;
}
if (candidate === requested) {
return 1000;
}
if (candidate.includes(requested) || requested.includes(candidate)) {
return 900;
}
if (id.includes(requested) || requested.includes(id)) {
return 800;
}
return requested
.split(/\s+/)
.filter((word) => word.length > 1 && !["camera", "webcam", "video", "input"].includes(word))
.reduce((score, word) => {
if (candidate.includes(word)) return score + 100;
if (id.includes(word)) return score + 50;
return score;
}, 0);
}
function queryDirectShowVideoInputRegistry() {
return new Promise<string>((resolve) => {
const proc = spawn(
"reg.exe",
["query", "HKCR\\CLSID\\{860BB310-5D01-11D0-BD3B-00A0C911CE86}\\Instance", "/s"],
{ windowsHide: true },
);
let stdout = "";
proc.stdout.on("data", (chunk: Buffer) => {
stdout += chunk.toString("utf16le").includes("\u0000")
? chunk.toString("utf16le")
: chunk.toString();
});
proc.on("close", () => resolve(stdout));
proc.on("error", () => resolve(""));
});
}
async function resolveDirectShowWebcamClsid(deviceName?: string) {
if (process.platform !== "win32" || !deviceName?.trim()) {
return null;
}
const output = await queryDirectShowVideoInputRegistry();
let current: { friendlyName?: string; clsid?: string } = {};
const entries: Array<{ friendlyName?: string; clsid?: string }> = [];
for (const rawLine of output.split(/\r?\n/)) {
const line = rawLine.trim();
if (!line) continue;
if (/^HKEY_/i.test(line)) {
if (current.friendlyName || current.clsid) entries.push(current);
current = {};
continue;
}
const match = line.match(/^(\S+)\s+REG_SZ\s+(.+)$/);
if (!match) continue;
if (match[1] === "FriendlyName") current.friendlyName = match[2].trim();
if (match[1] === "CLSID") current.clsid = match[2].trim();
}
if (current.friendlyName || current.clsid) entries.push(current);
let best: { clsid: string; friendlyName?: string; score: number } | null = null;
for (const entry of entries) {
if (!entry.clsid) continue;
const score = scoreNativeDeviceName(entry.friendlyName ?? "", entry.clsid, deviceName);
if (!best || score > best.score) {
best = { clsid: entry.clsid, friendlyName: entry.friendlyName, score };
}
}
if (!best || best.score <= 0) {
return null;
}
console.info("[native-wgc] resolved DirectShow webcam filter", {
requestedName: deviceName,
filterName: best.friendlyName,
clsid: best.clsid,
score: best.score,
});
return best.clsid;
}
async function startCursorRecording(recordingId?: number) {
if (cursorRecordingSession) {
pendingCursorRecordingData = await cursorRecordingSession.stop();
@@ -623,6 +720,25 @@ function waitForNativeWindowsCaptureStop(proc: ChildProcessWithoutNullStreams) {
});
}
function readNativeWindowsWebcamFormat(output: string) {
const lines = output.split(/\r?\n/).filter((line) => line.includes('"event":"webcam-format"'));
const lastLine = lines.at(-1);
if (!lastLine) {
return null;
}
try {
return JSON.parse(lastLine) as {
width?: number;
height?: number;
fps?: number;
deviceName?: string;
};
} catch {
return null;
}
}
function setCurrentRecordingSessionState(session: RecordingSession | null) {
currentRecordingSession = session;
currentVideoPath = session?.screenVideoPath ?? null;
@@ -866,6 +982,9 @@ export function registerIpcHandlers(
typeof request.source.displayId === "number" && Number.isFinite(request.source.displayId)
? request.source.displayId
: Number(selectedSource?.display_id);
const webcamDirectShowClsid = request.webcam.enabled
? await resolveDirectShowWebcamClsid(request.webcam.deviceName)
: null;
const config = {
schemaVersion: 2,
recordingId,
@@ -889,6 +1008,7 @@ export function registerIpcHandlers(
webcamEnabled: request.webcam.enabled,
webcamDeviceId: request.webcam.deviceId ?? null,
webcamDeviceName: request.webcam.deviceName ?? null,
webcamDirectShowClsid,
webcamWidth: request.webcam.width,
webcamHeight: request.webcam.height,
webcamFps: request.webcam.fps,
@@ -943,9 +1063,11 @@ export function registerIpcHandlers(
await waitForNativeWindowsCaptureStart(proc);
const captureStartedAtMs = Date.now();
nativeWindowsCursorOffsetMs = Math.max(0, captureStartedAtMs - cursorStartTimeMs);
const webcamFormat = readNativeWindowsWebcamFormat(nativeWindowsCaptureOutput);
console.info("[native-wgc] capture started", {
captureStartedAtMs,
cursorOffsetMs: nativeWindowsCursorOffsetMs,
webcamFormat,
});
const source = selectedSource || { name: "Screen" };
+1 -1
View File
@@ -47,7 +47,7 @@ Current V2 JSON shape:
}
```
The current helper implementation supports display/window video capture, system audio loopback, default-microphone capture, and Media Foundation webcam capture. Webcam frames are currently composed into the primary MP4 as a bottom-right picture-in-picture overlay. Browser `deviceId` values do not always map to Media Foundation symbolic links, so the renderer passes both `webcamDeviceId` and `webcamDeviceName`. The helper treats the Media Foundation friendly name as the preferred stable selector, then tries the browser id, and only falls back to the default webcam with an explicit warning when no requested device matches.
The current helper implementation supports display/window video capture, system audio loopback, default-microphone capture, Media Foundation webcam capture, and a DirectShow webcam fallback for virtual cameras that are not exposed through Media Foundation. Webcam frames are currently composed into the primary MP4 as a bottom-right picture-in-picture overlay. Browser `deviceId` values do not always map to Media Foundation symbolic links, so the renderer passes both `webcamDeviceId` and `webcamDeviceName`. Electron resolves a matching DirectShow filter CLSID for the selected label; the helper uses Media Foundation first, then that exact DirectShow filter when the requested camera is absent from Media Foundation.
Smoke-test the helper with:
@@ -16,6 +16,8 @@ set(CMAKE_CXX_EXTENSIONS OFF)
add_executable(wgc-capture
src/audio_sample_utils.cpp
src/audio_sample_utils.h
src/dshow_webcam_capture.cpp
src/dshow_webcam_capture.h
src/main.cpp
src/mf_encoder.cpp
src/mf_encoder.h
@@ -0,0 +1,469 @@
#include "dshow_webcam_capture.h"
#include <initguid.h>
#include <dshow.h>
#include <wrl/client.h>
#include <algorithm>
#include <chrono>
#include <cwctype>
#include <iostream>
namespace {
const CLSID CLSID_SampleGrabberLocal = {0xC1F400A0, 0x3F08, 0x11D3, {0x9F, 0x0B, 0x00, 0x60, 0x08, 0x03, 0x9E, 0x37}};
const CLSID CLSID_NullRendererLocal = {0xC1F400A4, 0x3F08, 0x11D3, {0x9F, 0x0B, 0x00, 0x60, 0x08, 0x03, 0x9E, 0x37}};
MIDL_INTERFACE("0579154A-2B53-4994-B0D0-E773148EFF85")
ISampleGrabberCB : public IUnknown {
public:
virtual HRESULT STDMETHODCALLTYPE SampleCB(double sampleTime, IMediaSample* sample) = 0;
virtual HRESULT STDMETHODCALLTYPE BufferCB(double sampleTime, BYTE* buffer, long bufferLength) = 0;
};
MIDL_INTERFACE("6B652FFF-11FE-4FCE-92AD-0266B5D7C78F")
ISampleGrabber : public IUnknown {
public:
virtual HRESULT STDMETHODCALLTYPE SetOneShot(BOOL oneShot) = 0;
virtual HRESULT STDMETHODCALLTYPE SetMediaType(const AM_MEDIA_TYPE* type) = 0;
virtual HRESULT STDMETHODCALLTYPE GetConnectedMediaType(AM_MEDIA_TYPE* type) = 0;
virtual HRESULT STDMETHODCALLTYPE SetBufferSamples(BOOL bufferThem) = 0;
virtual HRESULT STDMETHODCALLTYPE GetCurrentBuffer(long* bufferSize, long* buffer) = 0;
virtual HRESULT STDMETHODCALLTYPE GetCurrentSample(IMediaSample** sample) = 0;
virtual HRESULT STDMETHODCALLTYPE SetCallback(ISampleGrabberCB* callback, long whichMethodToCallback) = 0;
};
bool succeeded(HRESULT hr, const char* label) {
if (SUCCEEDED(hr)) {
return true;
}
std::cerr << "ERROR: " << label << " failed (hr=0x" << std::hex << hr << std::dec << ")"
<< std::endl;
return false;
}
std::wstring readPropertyString(IPropertyBag* bag, LPCOLESTR key) {
VARIANT value;
VariantInit(&value);
if (FAILED(bag->Read(key, &value, nullptr)) || value.vt != VT_BSTR || !value.bstrVal) {
VariantClear(&value);
return {};
}
std::wstring result(value.bstrVal);
VariantClear(&value);
return result;
}
bool containsInsensitive(const std::wstring& haystack, const std::wstring& needle) {
if (haystack.empty() || needle.empty()) {
return false;
}
std::wstring lowerHaystack = haystack;
std::wstring lowerNeedle = needle;
std::transform(lowerHaystack.begin(), lowerHaystack.end(), lowerHaystack.begin(), ::towlower);
std::transform(lowerNeedle.begin(), lowerNeedle.end(), lowerNeedle.begin(), ::towlower);
return lowerHaystack.find(lowerNeedle) != std::wstring::npos ||
lowerNeedle.find(lowerHaystack) != std::wstring::npos;
}
std::wstring normalizeDeviceName(const std::wstring& value) {
std::wstring normalized;
normalized.reserve(value.size());
bool lastWasSpace = true;
for (const wchar_t ch : value) {
if (std::iswalnum(ch)) {
normalized.push_back(static_cast<wchar_t>(std::towlower(ch)));
lastWasSpace = false;
continue;
}
if (!lastWasSpace) {
normalized.push_back(L' ');
lastWasSpace = true;
}
}
while (!normalized.empty() && normalized.back() == L' ') {
normalized.pop_back();
}
return normalized;
}
std::vector<std::wstring> splitWords(const std::wstring& value) {
std::vector<std::wstring> words;
size_t start = 0;
while (start < value.size()) {
const size_t end = value.find(L' ', start);
const auto word = value.substr(start, end == std::wstring::npos ? std::wstring::npos : end - start);
if (word.size() > 1 && word != L"camera" && word != L"webcam" && word != L"video" && word != L"input") {
words.push_back(word);
}
if (end == std::wstring::npos) {
break;
}
start = end + 1;
}
return words;
}
int deviceMatchScore(
const std::wstring& candidateName,
const std::wstring& candidatePath,
const std::wstring& requestedName,
const std::wstring& requestedId) {
int score = 0;
const auto normalizedName = normalizeDeviceName(candidateName);
const auto normalizedPath = normalizeDeviceName(candidatePath);
const auto normalizedRequestedName = normalizeDeviceName(requestedName);
const auto normalizedRequestedId = normalizeDeviceName(requestedId);
if (!normalizedRequestedName.empty()) {
if (normalizedName == normalizedRequestedName) {
score = std::max(score, 1000);
}
if (containsInsensitive(normalizedName, normalizedRequestedName)) {
score = std::max(score, 900);
}
if (containsInsensitive(normalizedPath, normalizedRequestedName)) {
score = std::max(score, 800);
}
int wordScore = 0;
for (const auto& word : splitWords(normalizedRequestedName)) {
if (normalizedName.find(word) != std::wstring::npos) {
wordScore += 100;
} else if (normalizedPath.find(word) != std::wstring::npos) {
wordScore += 50;
}
}
score = std::max(score, wordScore);
}
if (!normalizedRequestedId.empty()) {
if (containsInsensitive(normalizedPath, normalizedRequestedId)) {
score = std::max(score, 700);
}
if (containsInsensitive(normalizedName, normalizedRequestedId)) {
score = std::max(score, 600);
}
}
return score;
}
void freeMediaType(AM_MEDIA_TYPE& type) {
if (type.cbFormat != 0) {
CoTaskMemFree(type.pbFormat);
type.cbFormat = 0;
type.pbFormat = nullptr;
}
if (type.pUnk) {
type.pUnk->Release();
type.pUnk = nullptr;
}
}
bool readRegistryString(HKEY key, const wchar_t* valueName, std::wstring& value) {
DWORD type = 0;
DWORD size = 0;
if (RegGetValueW(key, nullptr, valueName, RRF_RT_REG_SZ, &type, nullptr, &size) != ERROR_SUCCESS || size == 0) {
return false;
}
std::wstring buffer(size / sizeof(wchar_t), L'\0');
if (RegGetValueW(key, nullptr, valueName, RRF_RT_REG_SZ, &type, buffer.data(), &size) != ERROR_SUCCESS) {
return false;
}
while (!buffer.empty() && buffer.back() == L'\0') {
buffer.pop_back();
}
value = buffer;
return true;
}
bool findRegisteredVideoInput(
const std::wstring& deviceId,
const std::wstring& deviceName,
CLSID& selectedClsid,
std::wstring& selectedName,
int& bestScore) {
HKEY instanceKey = nullptr;
if (RegOpenKeyExW(
HKEY_CLASSES_ROOT,
L"CLSID\\{860BB310-5D01-11D0-BD3B-00A0C911CE86}\\Instance",
0,
KEY_READ,
&instanceKey) != ERROR_SUCCESS) {
return false;
}
DWORD index = 0;
wchar_t subkeyName[128];
DWORD subkeyNameLength = ARRAYSIZE(subkeyName);
bool found = false;
while (RegEnumKeyExW(instanceKey, index, subkeyName, &subkeyNameLength, nullptr, nullptr, nullptr, nullptr) == ERROR_SUCCESS) {
HKEY filterKey = nullptr;
if (RegOpenKeyExW(instanceKey, subkeyName, 0, KEY_READ, &filterKey) == ERROR_SUCCESS) {
std::wstring friendlyName;
std::wstring clsidText;
readRegistryString(filterKey, L"FriendlyName", friendlyName);
readRegistryString(filterKey, L"CLSID", clsidText);
const int score = deviceMatchScore(friendlyName, clsidText, deviceName, deviceId);
std::wcerr << L"INFO: Registered DirectShow webcam candidate name=\"" << friendlyName << L"\" score=" << score << std::endl;
CLSID clsid{};
if (!clsidText.empty() && SUCCEEDED(CLSIDFromString(clsidText.c_str(), &clsid)) && (!found || score > bestScore)) {
selectedClsid = clsid;
selectedName = friendlyName;
bestScore = score;
found = true;
}
RegCloseKey(filterKey);
}
index += 1;
subkeyNameLength = ARRAYSIZE(subkeyName);
}
RegCloseKey(instanceKey);
return found;
}
} // namespace
struct DirectShowWebcamCapture::Impl {
Microsoft::WRL::ComPtr<IGraphBuilder> graph;
Microsoft::WRL::ComPtr<ICaptureGraphBuilder2> captureGraph;
Microsoft::WRL::ComPtr<IBaseFilter> captureFilter;
Microsoft::WRL::ComPtr<IBaseFilter> sampleGrabberFilter;
Microsoft::WRL::ComPtr<ISampleGrabber> sampleGrabber;
Microsoft::WRL::ComPtr<IBaseFilter> nullRenderer;
Microsoft::WRL::ComPtr<IMediaControl> mediaControl;
bool comInitialized = false;
bool running = false;
};
DirectShowWebcamCapture::~DirectShowWebcamCapture() {
stop();
delete impl_;
}
bool DirectShowWebcamCapture::initialize(
const std::wstring& deviceId,
const std::wstring& deviceName,
const std::wstring& directShowClsid,
int requestedWidth,
int requestedHeight,
int requestedFps) {
stop();
delete impl_;
impl_ = new Impl();
fps_ = std::clamp(requestedFps > 0 ? requestedFps : 30, 1, 60);
HRESULT hr = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
if (SUCCEEDED(hr)) {
impl_->comInitialized = true;
} else if (hr != RPC_E_CHANGED_MODE) {
return succeeded(hr, "CoInitializeEx(DirectShow webcam)");
}
if (directShowClsid.empty()) {
std::cerr << "ERROR: DirectShow webcam fallback requires a resolved filter CLSID" << std::endl;
return false;
}
CLSID selectedClsid{};
if (FAILED(CLSIDFromString(directShowClsid.c_str(), &selectedClsid))) {
std::cerr << "ERROR: DirectShow webcam fallback received an invalid filter CLSID" << std::endl;
return false;
}
selectedDeviceName_ = deviceName.empty() ? directShowClsid : deviceName;
if (!succeeded(CoCreateInstance(selectedClsid, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->captureFilter)),
"CoCreateInstance(DirectShow webcam filter)")) {
return false;
}
if (!succeeded(CoCreateInstance(CLSID_FilterGraph, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->graph)),
"CoCreateInstance(FilterGraph)")) {
return false;
}
if (!succeeded(CoCreateInstance(CLSID_CaptureGraphBuilder2, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->captureGraph)),
"CoCreateInstance(CaptureGraphBuilder2)")) {
return false;
}
if (!succeeded(impl_->captureGraph->SetFiltergraph(impl_->graph.Get()), "SetFiltergraph(DirectShow webcam)")) {
return false;
}
if (!succeeded(impl_->graph->AddFilter(impl_->captureFilter.Get(), L"OpenScreen Webcam Source"),
"AddFilter(DirectShow webcam source)")) {
return false;
}
if (!succeeded(CoCreateInstance(CLSID_SampleGrabberLocal, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->sampleGrabberFilter)),
"CoCreateInstance(SampleGrabber)")) {
return false;
}
if (!succeeded(impl_->sampleGrabberFilter.As(&impl_->sampleGrabber), "QueryInterface(ISampleGrabber)")) {
return false;
}
AM_MEDIA_TYPE requestedType{};
requestedType.majortype = MEDIATYPE_Video;
requestedType.subtype = MEDIASUBTYPE_RGB32;
requestedType.formattype = FORMAT_VideoInfo;
if (!succeeded(impl_->sampleGrabber->SetMediaType(&requestedType), "SetMediaType(DirectShow RGB32)")) {
return false;
}
if (!succeeded(impl_->graph->AddFilter(impl_->sampleGrabberFilter.Get(), L"OpenScreen Webcam Sample Grabber"),
"AddFilter(SampleGrabber)")) {
return false;
}
if (!succeeded(CoCreateInstance(CLSID_NullRendererLocal, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->nullRenderer)),
"CoCreateInstance(NullRenderer)")) {
return false;
}
if (!succeeded(impl_->graph->AddFilter(impl_->nullRenderer.Get(), L"OpenScreen Webcam Null Renderer"),
"AddFilter(NullRenderer)")) {
return false;
}
if (!succeeded(impl_->captureGraph->RenderStream(
&PIN_CATEGORY_CAPTURE,
&MEDIATYPE_Video,
impl_->captureFilter.Get(),
impl_->sampleGrabberFilter.Get(),
impl_->nullRenderer.Get()),
"RenderStream(DirectShow webcam)")) {
return false;
}
AM_MEDIA_TYPE connectedType{};
if (!succeeded(impl_->sampleGrabber->GetConnectedMediaType(&connectedType), "GetConnectedMediaType(DirectShow webcam)")) {
return false;
}
if (connectedType.formattype == FORMAT_VideoInfo && connectedType.pbFormat) {
const auto* videoInfo = reinterpret_cast<VIDEOINFOHEADER*>(connectedType.pbFormat);
width_ = std::abs(videoInfo->bmiHeader.biWidth);
height_ = std::abs(videoInfo->bmiHeader.biHeight);
sourceTopDown_ = videoInfo->bmiHeader.biHeight < 0;
}
freeMediaType(connectedType);
if (width_ <= 0 || height_ <= 0) {
width_ = requestedWidth > 0 ? requestedWidth : 1280;
height_ = requestedHeight > 0 ? requestedHeight : 720;
}
impl_->sampleGrabber->SetBufferSamples(TRUE);
impl_->sampleGrabber->SetOneShot(FALSE);
if (!succeeded(impl_->graph.As(&impl_->mediaControl), "QueryInterface(IMediaControl)")) {
return false;
}
return true;
}
bool DirectShowWebcamCapture::start() {
if (!impl_ || !impl_->mediaControl || impl_->running) {
return false;
}
HRESULT hr = impl_->mediaControl->Run();
if (!succeeded(hr, "Run(DirectShow webcam)")) {
return false;
}
impl_->running = true;
stopRequested_ = false;
thread_ = std::thread(&DirectShowWebcamCapture::captureLoop, this);
return true;
}
void DirectShowWebcamCapture::stop() {
stopRequested_ = true;
if (thread_.joinable()) {
thread_.join();
}
if (!impl_) {
return;
}
if (impl_->mediaControl && impl_->running) {
impl_->mediaControl->Stop();
}
impl_->running = false;
impl_->mediaControl.Reset();
impl_->nullRenderer.Reset();
impl_->sampleGrabber.Reset();
impl_->sampleGrabberFilter.Reset();
impl_->captureFilter.Reset();
impl_->captureGraph.Reset();
impl_->graph.Reset();
if (impl_->comInitialized) {
CoUninitialize();
impl_->comInitialized = false;
}
}
void DirectShowWebcamCapture::captureLoop() {
CoInitializeEx(nullptr, COINIT_MULTITHREADED);
while (!stopRequested_ && impl_ && impl_->sampleGrabber) {
long bufferSize = 0;
HRESULT hr = impl_->sampleGrabber->GetCurrentBuffer(&bufferSize, nullptr);
if (SUCCEEDED(hr) && bufferSize > 0) {
std::vector<BYTE> buffer(static_cast<size_t>(bufferSize));
hr = impl_->sampleGrabber->GetCurrentBuffer(&bufferSize, reinterpret_cast<long*>(buffer.data()));
if (SUCCEEDED(hr)) {
storeFrame(buffer.data(), bufferSize);
}
}
std::this_thread::sleep_for(std::chrono::milliseconds(1000 / std::max(1, fps_)));
}
CoUninitialize();
}
void DirectShowWebcamCapture::storeFrame(const BYTE* buffer, long length) {
const int stride = width_ * 4;
const int expectedLength = stride * height_;
if (!buffer || length < expectedLength || width_ <= 0 || height_ <= 0) {
return;
}
std::vector<BYTE> frame(static_cast<size_t>(expectedLength));
for (int y = 0; y < height_; y += 1) {
const int sourceY = sourceTopDown_ ? y : height_ - 1 - y;
const BYTE* source = buffer + sourceY * stride;
BYTE* destination = frame.data() + y * stride;
std::copy(source, source + stride, destination);
for (int x = 0; x < width_; x += 1) {
destination[x * 4 + 3] = 255;
}
}
std::scoped_lock lock(frameMutex_);
latestFrame_ = std::move(frame);
}
bool DirectShowWebcamCapture::copyLatestFrame(std::vector<BYTE>& destination, int& width, int& height) {
std::scoped_lock lock(frameMutex_);
if (latestFrame_.empty() || width_ <= 0 || height_ <= 0) {
return false;
}
destination = latestFrame_;
width = width_;
height = height_;
return true;
}
int DirectShowWebcamCapture::width() const {
return width_;
}
int DirectShowWebcamCapture::height() const {
return height_;
}
int DirectShowWebcamCapture::fps() const {
return fps_;
}
const std::wstring& DirectShowWebcamCapture::selectedDeviceName() const {
return selectedDeviceName_;
}
@@ -0,0 +1,50 @@
#pragma once
#include <Windows.h>
#include <atomic>
#include <mutex>
#include <string>
#include <thread>
#include <vector>
class DirectShowWebcamCapture {
public:
DirectShowWebcamCapture() = default;
~DirectShowWebcamCapture();
DirectShowWebcamCapture(const DirectShowWebcamCapture&) = delete;
DirectShowWebcamCapture& operator=(const DirectShowWebcamCapture&) = delete;
bool initialize(
const std::wstring& deviceId,
const std::wstring& deviceName,
const std::wstring& directShowClsid,
int requestedWidth,
int requestedHeight,
int requestedFps);
bool start();
void stop();
bool copyLatestFrame(std::vector<BYTE>& destination, int& width, int& height);
int width() const;
int height() const;
int fps() const;
const std::wstring& selectedDeviceName() const;
void storeFrame(const BYTE* buffer, long length);
private:
struct Impl;
void captureLoop();
Impl* impl_ = nullptr;
std::thread thread_;
std::atomic<bool> stopRequested_ = false;
std::mutex frameMutex_;
std::vector<BYTE> latestFrame_;
int width_ = 0;
int height_ = 0;
int fps_ = 30;
bool sourceTopDown_ = false;
std::wstring selectedDeviceName_;
};
+3
View File
@@ -41,6 +41,7 @@ struct CaptureConfig {
double microphoneGain = 1.0;
std::string webcamDeviceId;
std::string webcamDeviceName;
std::string webcamDirectShowClsid;
int webcamWidth = 0;
int webcamHeight = 0;
int webcamFps = 0;
@@ -280,6 +281,7 @@ bool parseConfig(const std::string& json, CaptureConfig& config) {
config.microphoneGain = findDouble(json, "microphoneGain", 1.0);
config.webcamDeviceId = findString(json, "webcamDeviceId");
config.webcamDeviceName = findString(json, "webcamDeviceName");
config.webcamDirectShowClsid = findString(json, "webcamDirectShowClsid");
config.webcamWidth = findInt(json, "webcamWidth", 0);
config.webcamHeight = findInt(json, "webcamHeight", 0);
config.webcamFps = findInt(json, "webcamFps", 0);
@@ -362,6 +364,7 @@ int main(int argc, char* argv[]) {
if (!webcamCapture.initialize(
utf8ToWide(config.webcamDeviceId),
utf8ToWide(config.webcamDeviceName),
utf8ToWide(config.webcamDirectShowClsid),
config.webcamWidth,
config.webcamHeight,
config.webcamFps > 0 ? config.webcamFps : config.fps)) {
@@ -6,6 +6,7 @@
#include <algorithm>
#include <chrono>
#include <cwctype>
#include <iostream>
namespace {
@@ -45,6 +46,89 @@ bool containsInsensitive(const std::wstring& haystack, const std::wstring& needl
lowerNeedle.find(lowerHaystack) != std::wstring::npos;
}
std::wstring normalizeDeviceName(const std::wstring& value) {
std::wstring normalized;
normalized.reserve(value.size());
bool lastWasSpace = true;
for (const wchar_t ch : value) {
if (std::iswalnum(ch)) {
normalized.push_back(static_cast<wchar_t>(std::towlower(ch)));
lastWasSpace = false;
continue;
}
if (!lastWasSpace) {
normalized.push_back(L' ');
lastWasSpace = true;
}
}
while (!normalized.empty() && normalized.back() == L' ') {
normalized.pop_back();
}
return normalized;
}
std::vector<std::wstring> splitWords(const std::wstring& value) {
std::vector<std::wstring> words;
size_t start = 0;
while (start < value.size()) {
const size_t end = value.find(L' ', start);
const auto word = value.substr(start, end == std::wstring::npos ? std::wstring::npos : end - start);
if (word.size() > 1 && word != L"camera" && word != L"webcam" && word != L"video" && word != L"input") {
words.push_back(word);
}
if (end == std::wstring::npos) {
break;
}
start = end + 1;
}
return words;
}
int deviceMatchScore(
const std::wstring& candidateName,
const std::wstring& candidateLink,
const std::wstring& requestedName,
const std::wstring& requestedId) {
int score = 0;
const auto normalizedName = normalizeDeviceName(candidateName);
const auto normalizedLink = normalizeDeviceName(candidateLink);
const auto normalizedRequestedName = normalizeDeviceName(requestedName);
const auto normalizedRequestedId = normalizeDeviceName(requestedId);
if (!normalizedRequestedName.empty()) {
if (normalizedName == normalizedRequestedName) {
score = std::max(score, 1000);
}
if (containsInsensitive(normalizedName, normalizedRequestedName)) {
score = std::max(score, 900);
}
if (containsInsensitive(normalizedLink, normalizedRequestedName)) {
score = std::max(score, 800);
}
int wordScore = 0;
for (const auto& word : splitWords(normalizedRequestedName)) {
if (normalizedName.find(word) != std::wstring::npos) {
wordScore += 100;
} else if (normalizedLink.find(word) != std::wstring::npos) {
wordScore += 50;
}
}
score = std::max(score, wordScore);
}
if (!normalizedRequestedId.empty()) {
if (containsInsensitive(normalizedLink, normalizedRequestedId)) {
score = std::max(score, 700);
}
if (containsInsensitive(normalizedName, normalizedRequestedId)) {
score = std::max(score, 600);
}
}
return score;
}
} // namespace
WebcamCapture::~WebcamCapture() {
@@ -54,15 +138,49 @@ WebcamCapture::~WebcamCapture() {
bool WebcamCapture::initialize(
const std::wstring& deviceId,
const std::wstring& deviceName,
const std::wstring& directShowClsid,
int requestedWidth,
int requestedHeight,
int requestedFps) {
fps_ = std::clamp(requestedFps > 0 ? requestedFps : 30, 1, 60);
usingDirectShow_ = false;
selectedMatchScore_ = 0;
if (!succeeded(MFStartup(MF_VERSION), "MFStartup(webcam)")) {
if (directShowCapture_.initialize(deviceId, deviceName, directShowClsid, requestedWidth, requestedHeight, fps_)) {
usingDirectShow_ = true;
return true;
}
return false;
}
mfStarted_ = true;
if (!selectDevice(deviceId, deviceName)) {
if (mfStarted_) {
MFShutdown();
mfStarted_ = false;
}
if (directShowCapture_.initialize(deviceId, deviceName, directShowClsid, requestedWidth, requestedHeight, fps_)) {
usingDirectShow_ = true;
return true;
}
return false;
}
if ((!deviceId.empty() || !deviceName.empty()) && selectedMatchScore_ <= 0) {
if (mediaSource_) {
mediaSource_->Shutdown();
}
sourceReader_.Reset();
mediaSource_.Reset();
if (mfStarted_) {
MFShutdown();
mfStarted_ = false;
}
if (directShowCapture_.initialize(deviceId, deviceName, directShowClsid, requestedWidth, requestedHeight, fps_)) {
usingDirectShow_ = true;
return true;
}
std::cerr << "ERROR: Requested webcam device was not found by native Windows webcam providers"
<< std::endl;
return false;
}
@@ -93,34 +211,24 @@ bool WebcamCapture::selectDevice(const std::wstring& deviceId, const std::wstrin
}
UINT32 selectedIndex = 0;
bool matched = false;
auto matchesRequestedDevice = [&](const std::wstring& name, const std::wstring& symbolicLink) {
if (!deviceName.empty() &&
(containsInsensitive(name, deviceName) || containsInsensitive(symbolicLink, deviceName))) {
return true;
}
if (!deviceId.empty() &&
(containsInsensitive(symbolicLink, deviceId) || containsInsensitive(name, deviceId))) {
return true;
}
return false;
};
int bestScore = 0;
for (UINT32 index = 0; index < deviceCount; index += 1) {
const std::wstring name = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME);
const std::wstring symbolicLink = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_SYMBOLIC_LINK);
if (matchesRequestedDevice(name, symbolicLink)) {
const int score = deviceMatchScore(name, symbolicLink, deviceName, deviceId);
std::wcerr << L"INFO: Native webcam candidate [" << index << L"] name=\"" << name << L"\" score=" << score << std::endl;
if (score > bestScore) {
selectedIndex = index;
matched = true;
break;
bestScore = score;
}
}
if ((!deviceId.empty() || !deviceName.empty()) && !matched) {
std::cerr << "WARNING: Requested webcam device was not found by Media Foundation; using default webcam"
if ((!deviceId.empty() || !deviceName.empty()) && bestScore <= 0) {
std::cerr << "WARNING: Requested webcam device was not found by Media Foundation; trying DirectShow"
<< std::endl;
}
selectedMatchScore_ = bestScore;
selectedDeviceName_ = readAllocatedString(devices[selectedIndex], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME);
hr = devices[selectedIndex]->ActivateObject(IID_PPV_ARGS(&mediaSource_));
@@ -181,6 +289,9 @@ bool WebcamCapture::configureReader(int requestedWidth, int requestedHeight, int
}
bool WebcamCapture::start() {
if (usingDirectShow_) {
return directShowCapture_.start();
}
if (!sourceReader_ || thread_.joinable()) {
return false;
}
@@ -191,6 +302,7 @@ bool WebcamCapture::start() {
}
void WebcamCapture::stop() {
directShowCapture_.stop();
stopRequested_ = true;
if (thread_.joinable()) {
thread_.join();
@@ -262,6 +374,9 @@ void WebcamCapture::captureLoop() {
}
bool WebcamCapture::copyLatestFrame(std::vector<BYTE>& destination, int& width, int& height) {
if (usingDirectShow_) {
return directShowCapture_.copyLatestFrame(destination, width, height);
}
std::scoped_lock lock(frameMutex_);
if (latestFrame_.empty() || width_ <= 0 || height_ <= 0) {
return false;
@@ -274,17 +389,29 @@ bool WebcamCapture::copyLatestFrame(std::vector<BYTE>& destination, int& width,
}
int WebcamCapture::width() const {
if (usingDirectShow_) {
return directShowCapture_.width();
}
return width_;
}
int WebcamCapture::height() const {
if (usingDirectShow_) {
return directShowCapture_.height();
}
return height_;
}
int WebcamCapture::fps() const {
if (usingDirectShow_) {
return directShowCapture_.fps();
}
return fps_;
}
const std::wstring& WebcamCapture::selectedDeviceName() const {
if (usingDirectShow_) {
return directShowCapture_.selectedDeviceName();
}
return selectedDeviceName_;
}
@@ -1,5 +1,7 @@
#pragma once
#include "dshow_webcam_capture.h"
#include <Windows.h>
#include <mfidl.h>
#include <mfreadwrite.h>
@@ -23,6 +25,7 @@ public:
bool initialize(
const std::wstring& deviceId,
const std::wstring& deviceName,
const std::wstring& directShowClsid,
int requestedWidth,
int requestedHeight,
int requestedFps);
@@ -42,6 +45,7 @@ private:
Microsoft::WRL::ComPtr<IMFMediaSource> mediaSource_;
Microsoft::WRL::ComPtr<IMFSourceReader> sourceReader_;
DirectShowWebcamCapture directShowCapture_;
std::thread thread_;
std::atomic<bool> stopRequested_ = false;
std::mutex frameMutex_;
@@ -50,5 +54,7 @@ private:
int height_ = 0;
int fps_ = 30;
bool mfStarted_ = false;
bool usingDirectShow_ = false;
int selectedMatchScore_ = 0;
std::wstring selectedDeviceName_;
};
+73
View File
@@ -105,6 +105,67 @@ function startFixtureWindow() {
});
}
function normalizeDeviceName(value) {
return value
.toLowerCase()
.replace(/[^a-z0-9]+/g, " ")
.trim();
}
function scoreDeviceName(candidateName, candidateId, requestedName) {
const candidate = normalizeDeviceName(candidateName ?? "");
const id = normalizeDeviceName(candidateId ?? "");
const requested = normalizeDeviceName(requestedName ?? "");
if (!requested) return 0;
if (candidate === requested) return 1000;
if (candidate.includes(requested) || requested.includes(candidate)) return 900;
if (id.includes(requested) || requested.includes(id)) return 800;
return requested
.split(/\s+/)
.filter((word) => word.length > 1 && !["camera", "webcam", "video", "input"].includes(word))
.reduce((score, word) => {
if (candidate.includes(word)) return score + 100;
if (id.includes(word)) return score + 50;
return score;
}, 0);
}
function resolveDirectShowWebcamClsid(requestedName) {
if (!requestedName) return "";
const query = spawnSync(
"reg.exe",
["query", "HKCR\\CLSID\\{860BB310-5D01-11D0-BD3B-00A0C911CE86}\\Instance", "/s"],
{ encoding: "utf8", windowsHide: true },
);
if (query.status !== 0) return "";
const entries = [];
let current = {};
for (const rawLine of query.stdout.split(/\r?\n/)) {
const line = rawLine.trim();
if (!line) continue;
if (/^HKEY_/i.test(line)) {
if (current.friendlyName || current.clsid) entries.push(current);
current = {};
continue;
}
const match = line.match(/^(\S+)\s+REG_SZ\s+(.+)$/);
if (!match) continue;
if (match[1] === "FriendlyName") current.friendlyName = match[2].trim();
if (match[1] === "CLSID") current.clsid = match[2].trim();
}
if (current.friendlyName || current.clsid) entries.push(current);
let best = null;
for (const entry of entries) {
if (!entry.clsid) continue;
const score = scoreDeviceName(entry.friendlyName, entry.clsid, requestedName);
if (!best || score > best.score) {
best = { ...entry, score };
}
}
return best && best.score > 0 ? best.clsid : "";
}
function probeStreams(outputPath) {
const ffprobe = spawnSync(
"ffprobe",
@@ -191,6 +252,9 @@ const config = {
webcamEnabled: WITH_WEBCAM,
webcamDeviceId: process.env.OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_ID ?? "",
webcamDeviceName: process.env.OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_NAME ?? "",
webcamDirectShowClsid: resolveDirectShowWebcamClsid(
process.env.OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_NAME ?? "",
),
webcamWidth: 640,
webcamHeight: 360,
webcamFps: 30,
@@ -224,6 +288,13 @@ if (!fs.existsSync(outputPath) || fs.statSync(outputPath).size === 0) {
const streams = probeStreams(outputPath);
const hasVideo = streams.some((stream) => stream.codec_type === "video");
const hasAudio = streams.some((stream) => stream.codec_type === "audio");
const webcamFormatLine = result.stdout
.split(/\r?\n/)
.find((line) => line.includes('"event":"webcam-format"'));
const webcamFormat = webcamFormatLine ? JSON.parse(webcamFormatLine) : null;
const nativeWebcamDiagnostics = result.stderr
.split(/\r?\n/)
.filter((line) => line.includes("Native webcam candidate"));
if (!hasVideo) {
throw new Error(`WGC helper output has no video stream: ${outputPath}`);
}
@@ -249,6 +320,8 @@ console.log(
codecName: stream.codec_name,
duration: stream.duration,
})),
selectedWebcamDeviceName: webcamFormat?.deviceName,
nativeWebcamDiagnostics,
firstFrameLuma: frameLuma,
},
null,
+1
View File
@@ -27,6 +27,7 @@ export type NativeWindowsRecordingRequest = {
enabled: boolean;
deviceId?: string;
deviceName?: string;
directShowClsid?: string;
width: number;
height: number;
fps: number;