fix: support DirectShow virtual webcams

This commit is contained in:
EtienneLescot
2026-05-05 18:33:48 +02:00
parent fdcd882058
commit 84484d6167
11 changed files with 875 additions and 19 deletions
+122
View File
@@ -476,6 +476,103 @@ function isWindowsGraphicsCaptureOsSupported() {
return Number.isFinite(build) && build >= 19041;
}
function normalizeNativeDeviceName(value: string) {
return value
.toLowerCase()
.replace(/[^a-z0-9]+/g, " ")
.trim();
}
function scoreNativeDeviceName(candidateName: string, candidateId: string, requestedName?: string) {
const candidate = normalizeNativeDeviceName(candidateName);
const id = normalizeNativeDeviceName(candidateId);
const requested = normalizeNativeDeviceName(requestedName ?? "");
if (!requested) {
return 0;
}
if (candidate === requested) {
return 1000;
}
if (candidate.includes(requested) || requested.includes(candidate)) {
return 900;
}
if (id.includes(requested) || requested.includes(id)) {
return 800;
}
return requested
.split(/\s+/)
.filter((word) => word.length > 1 && !["camera", "webcam", "video", "input"].includes(word))
.reduce((score, word) => {
if (candidate.includes(word)) return score + 100;
if (id.includes(word)) return score + 50;
return score;
}, 0);
}
function queryDirectShowVideoInputRegistry() {
return new Promise<string>((resolve) => {
const proc = spawn(
"reg.exe",
["query", "HKCR\\CLSID\\{860BB310-5D01-11D0-BD3B-00A0C911CE86}\\Instance", "/s"],
{ windowsHide: true },
);
let stdout = "";
proc.stdout.on("data", (chunk: Buffer) => {
stdout += chunk.toString("utf16le").includes("\u0000")
? chunk.toString("utf16le")
: chunk.toString();
});
proc.on("close", () => resolve(stdout));
proc.on("error", () => resolve(""));
});
}
async function resolveDirectShowWebcamClsid(deviceName?: string) {
if (process.platform !== "win32" || !deviceName?.trim()) {
return null;
}
const output = await queryDirectShowVideoInputRegistry();
let current: { friendlyName?: string; clsid?: string } = {};
const entries: Array<{ friendlyName?: string; clsid?: string }> = [];
for (const rawLine of output.split(/\r?\n/)) {
const line = rawLine.trim();
if (!line) continue;
if (/^HKEY_/i.test(line)) {
if (current.friendlyName || current.clsid) entries.push(current);
current = {};
continue;
}
const match = line.match(/^(\S+)\s+REG_SZ\s+(.+)$/);
if (!match) continue;
if (match[1] === "FriendlyName") current.friendlyName = match[2].trim();
if (match[1] === "CLSID") current.clsid = match[2].trim();
}
if (current.friendlyName || current.clsid) entries.push(current);
let best: { clsid: string; friendlyName?: string; score: number } | null = null;
for (const entry of entries) {
if (!entry.clsid) continue;
const score = scoreNativeDeviceName(entry.friendlyName ?? "", entry.clsid, deviceName);
if (!best || score > best.score) {
best = { clsid: entry.clsid, friendlyName: entry.friendlyName, score };
}
}
if (!best || best.score <= 0) {
return null;
}
console.info("[native-wgc] resolved DirectShow webcam filter", {
requestedName: deviceName,
filterName: best.friendlyName,
clsid: best.clsid,
score: best.score,
});
return best.clsid;
}
async function startCursorRecording(recordingId?: number) {
if (cursorRecordingSession) {
pendingCursorRecordingData = await cursorRecordingSession.stop();
@@ -623,6 +720,25 @@ function waitForNativeWindowsCaptureStop(proc: ChildProcessWithoutNullStreams) {
});
}
function readNativeWindowsWebcamFormat(output: string) {
const lines = output.split(/\r?\n/).filter((line) => line.includes('"event":"webcam-format"'));
const lastLine = lines.at(-1);
if (!lastLine) {
return null;
}
try {
return JSON.parse(lastLine) as {
width?: number;
height?: number;
fps?: number;
deviceName?: string;
};
} catch {
return null;
}
}
function setCurrentRecordingSessionState(session: RecordingSession | null) {
currentRecordingSession = session;
currentVideoPath = session?.screenVideoPath ?? null;
@@ -866,6 +982,9 @@ export function registerIpcHandlers(
typeof request.source.displayId === "number" && Number.isFinite(request.source.displayId)
? request.source.displayId
: Number(selectedSource?.display_id);
const webcamDirectShowClsid = request.webcam.enabled
? await resolveDirectShowWebcamClsid(request.webcam.deviceName)
: null;
const config = {
schemaVersion: 2,
recordingId,
@@ -889,6 +1008,7 @@ export function registerIpcHandlers(
webcamEnabled: request.webcam.enabled,
webcamDeviceId: request.webcam.deviceId ?? null,
webcamDeviceName: request.webcam.deviceName ?? null,
webcamDirectShowClsid,
webcamWidth: request.webcam.width,
webcamHeight: request.webcam.height,
webcamFps: request.webcam.fps,
@@ -943,9 +1063,11 @@ export function registerIpcHandlers(
await waitForNativeWindowsCaptureStart(proc);
const captureStartedAtMs = Date.now();
nativeWindowsCursorOffsetMs = Math.max(0, captureStartedAtMs - cursorStartTimeMs);
const webcamFormat = readNativeWindowsWebcamFormat(nativeWindowsCaptureOutput);
console.info("[native-wgc] capture started", {
captureStartedAtMs,
cursorOffsetMs: nativeWindowsCursorOffsetMs,
webcamFormat,
});
const source = selectedSource || { name: "Screen" };
+1 -1
View File
@@ -47,7 +47,7 @@ Current V2 JSON shape:
}
```
The current helper implementation supports display/window video capture, system audio loopback, default-microphone capture, and Media Foundation webcam capture. Webcam frames are currently composed into the primary MP4 as a bottom-right picture-in-picture overlay. Browser `deviceId` values do not always map to Media Foundation symbolic links, so the renderer passes both `webcamDeviceId` and `webcamDeviceName`. The helper treats the Media Foundation friendly name as the preferred stable selector, then tries the browser id, and only falls back to the default webcam with an explicit warning when no requested device matches.
The current helper implementation supports display/window video capture, system audio loopback, default-microphone capture, Media Foundation webcam capture, and a DirectShow webcam fallback for virtual cameras that are not exposed through Media Foundation. Webcam frames are currently composed into the primary MP4 as a bottom-right picture-in-picture overlay. Browser `deviceId` values do not always map to Media Foundation symbolic links, so the renderer passes both `webcamDeviceId` and `webcamDeviceName`. Electron resolves a matching DirectShow filter CLSID for the selected label; the helper uses Media Foundation first, then that exact DirectShow filter when the requested camera is absent from Media Foundation.
Smoke-test the helper with:
@@ -16,6 +16,8 @@ set(CMAKE_CXX_EXTENSIONS OFF)
add_executable(wgc-capture
src/audio_sample_utils.cpp
src/audio_sample_utils.h
src/dshow_webcam_capture.cpp
src/dshow_webcam_capture.h
src/main.cpp
src/mf_encoder.cpp
src/mf_encoder.h
@@ -0,0 +1,469 @@
#include "dshow_webcam_capture.h"
#include <initguid.h>
#include <dshow.h>
#include <wrl/client.h>
#include <algorithm>
#include <chrono>
#include <cwctype>
#include <iostream>
namespace {
const CLSID CLSID_SampleGrabberLocal = {0xC1F400A0, 0x3F08, 0x11D3, {0x9F, 0x0B, 0x00, 0x60, 0x08, 0x03, 0x9E, 0x37}};
const CLSID CLSID_NullRendererLocal = {0xC1F400A4, 0x3F08, 0x11D3, {0x9F, 0x0B, 0x00, 0x60, 0x08, 0x03, 0x9E, 0x37}};
MIDL_INTERFACE("0579154A-2B53-4994-B0D0-E773148EFF85")
ISampleGrabberCB : public IUnknown {
public:
virtual HRESULT STDMETHODCALLTYPE SampleCB(double sampleTime, IMediaSample* sample) = 0;
virtual HRESULT STDMETHODCALLTYPE BufferCB(double sampleTime, BYTE* buffer, long bufferLength) = 0;
};
MIDL_INTERFACE("6B652FFF-11FE-4FCE-92AD-0266B5D7C78F")
ISampleGrabber : public IUnknown {
public:
virtual HRESULT STDMETHODCALLTYPE SetOneShot(BOOL oneShot) = 0;
virtual HRESULT STDMETHODCALLTYPE SetMediaType(const AM_MEDIA_TYPE* type) = 0;
virtual HRESULT STDMETHODCALLTYPE GetConnectedMediaType(AM_MEDIA_TYPE* type) = 0;
virtual HRESULT STDMETHODCALLTYPE SetBufferSamples(BOOL bufferThem) = 0;
virtual HRESULT STDMETHODCALLTYPE GetCurrentBuffer(long* bufferSize, long* buffer) = 0;
virtual HRESULT STDMETHODCALLTYPE GetCurrentSample(IMediaSample** sample) = 0;
virtual HRESULT STDMETHODCALLTYPE SetCallback(ISampleGrabberCB* callback, long whichMethodToCallback) = 0;
};
bool succeeded(HRESULT hr, const char* label) {
if (SUCCEEDED(hr)) {
return true;
}
std::cerr << "ERROR: " << label << " failed (hr=0x" << std::hex << hr << std::dec << ")"
<< std::endl;
return false;
}
std::wstring readPropertyString(IPropertyBag* bag, LPCOLESTR key) {
VARIANT value;
VariantInit(&value);
if (FAILED(bag->Read(key, &value, nullptr)) || value.vt != VT_BSTR || !value.bstrVal) {
VariantClear(&value);
return {};
}
std::wstring result(value.bstrVal);
VariantClear(&value);
return result;
}
bool containsInsensitive(const std::wstring& haystack, const std::wstring& needle) {
if (haystack.empty() || needle.empty()) {
return false;
}
std::wstring lowerHaystack = haystack;
std::wstring lowerNeedle = needle;
std::transform(lowerHaystack.begin(), lowerHaystack.end(), lowerHaystack.begin(), ::towlower);
std::transform(lowerNeedle.begin(), lowerNeedle.end(), lowerNeedle.begin(), ::towlower);
return lowerHaystack.find(lowerNeedle) != std::wstring::npos ||
lowerNeedle.find(lowerHaystack) != std::wstring::npos;
}
std::wstring normalizeDeviceName(const std::wstring& value) {
std::wstring normalized;
normalized.reserve(value.size());
bool lastWasSpace = true;
for (const wchar_t ch : value) {
if (std::iswalnum(ch)) {
normalized.push_back(static_cast<wchar_t>(std::towlower(ch)));
lastWasSpace = false;
continue;
}
if (!lastWasSpace) {
normalized.push_back(L' ');
lastWasSpace = true;
}
}
while (!normalized.empty() && normalized.back() == L' ') {
normalized.pop_back();
}
return normalized;
}
std::vector<std::wstring> splitWords(const std::wstring& value) {
std::vector<std::wstring> words;
size_t start = 0;
while (start < value.size()) {
const size_t end = value.find(L' ', start);
const auto word = value.substr(start, end == std::wstring::npos ? std::wstring::npos : end - start);
if (word.size() > 1 && word != L"camera" && word != L"webcam" && word != L"video" && word != L"input") {
words.push_back(word);
}
if (end == std::wstring::npos) {
break;
}
start = end + 1;
}
return words;
}
int deviceMatchScore(
const std::wstring& candidateName,
const std::wstring& candidatePath,
const std::wstring& requestedName,
const std::wstring& requestedId) {
int score = 0;
const auto normalizedName = normalizeDeviceName(candidateName);
const auto normalizedPath = normalizeDeviceName(candidatePath);
const auto normalizedRequestedName = normalizeDeviceName(requestedName);
const auto normalizedRequestedId = normalizeDeviceName(requestedId);
if (!normalizedRequestedName.empty()) {
if (normalizedName == normalizedRequestedName) {
score = std::max(score, 1000);
}
if (containsInsensitive(normalizedName, normalizedRequestedName)) {
score = std::max(score, 900);
}
if (containsInsensitive(normalizedPath, normalizedRequestedName)) {
score = std::max(score, 800);
}
int wordScore = 0;
for (const auto& word : splitWords(normalizedRequestedName)) {
if (normalizedName.find(word) != std::wstring::npos) {
wordScore += 100;
} else if (normalizedPath.find(word) != std::wstring::npos) {
wordScore += 50;
}
}
score = std::max(score, wordScore);
}
if (!normalizedRequestedId.empty()) {
if (containsInsensitive(normalizedPath, normalizedRequestedId)) {
score = std::max(score, 700);
}
if (containsInsensitive(normalizedName, normalizedRequestedId)) {
score = std::max(score, 600);
}
}
return score;
}
void freeMediaType(AM_MEDIA_TYPE& type) {
if (type.cbFormat != 0) {
CoTaskMemFree(type.pbFormat);
type.cbFormat = 0;
type.pbFormat = nullptr;
}
if (type.pUnk) {
type.pUnk->Release();
type.pUnk = nullptr;
}
}
bool readRegistryString(HKEY key, const wchar_t* valueName, std::wstring& value) {
DWORD type = 0;
DWORD size = 0;
if (RegGetValueW(key, nullptr, valueName, RRF_RT_REG_SZ, &type, nullptr, &size) != ERROR_SUCCESS || size == 0) {
return false;
}
std::wstring buffer(size / sizeof(wchar_t), L'\0');
if (RegGetValueW(key, nullptr, valueName, RRF_RT_REG_SZ, &type, buffer.data(), &size) != ERROR_SUCCESS) {
return false;
}
while (!buffer.empty() && buffer.back() == L'\0') {
buffer.pop_back();
}
value = buffer;
return true;
}
bool findRegisteredVideoInput(
const std::wstring& deviceId,
const std::wstring& deviceName,
CLSID& selectedClsid,
std::wstring& selectedName,
int& bestScore) {
HKEY instanceKey = nullptr;
if (RegOpenKeyExW(
HKEY_CLASSES_ROOT,
L"CLSID\\{860BB310-5D01-11D0-BD3B-00A0C911CE86}\\Instance",
0,
KEY_READ,
&instanceKey) != ERROR_SUCCESS) {
return false;
}
DWORD index = 0;
wchar_t subkeyName[128];
DWORD subkeyNameLength = ARRAYSIZE(subkeyName);
bool found = false;
while (RegEnumKeyExW(instanceKey, index, subkeyName, &subkeyNameLength, nullptr, nullptr, nullptr, nullptr) == ERROR_SUCCESS) {
HKEY filterKey = nullptr;
if (RegOpenKeyExW(instanceKey, subkeyName, 0, KEY_READ, &filterKey) == ERROR_SUCCESS) {
std::wstring friendlyName;
std::wstring clsidText;
readRegistryString(filterKey, L"FriendlyName", friendlyName);
readRegistryString(filterKey, L"CLSID", clsidText);
const int score = deviceMatchScore(friendlyName, clsidText, deviceName, deviceId);
std::wcerr << L"INFO: Registered DirectShow webcam candidate name=\"" << friendlyName << L"\" score=" << score << std::endl;
CLSID clsid{};
if (!clsidText.empty() && SUCCEEDED(CLSIDFromString(clsidText.c_str(), &clsid)) && (!found || score > bestScore)) {
selectedClsid = clsid;
selectedName = friendlyName;
bestScore = score;
found = true;
}
RegCloseKey(filterKey);
}
index += 1;
subkeyNameLength = ARRAYSIZE(subkeyName);
}
RegCloseKey(instanceKey);
return found;
}
} // namespace
struct DirectShowWebcamCapture::Impl {
Microsoft::WRL::ComPtr<IGraphBuilder> graph;
Microsoft::WRL::ComPtr<ICaptureGraphBuilder2> captureGraph;
Microsoft::WRL::ComPtr<IBaseFilter> captureFilter;
Microsoft::WRL::ComPtr<IBaseFilter> sampleGrabberFilter;
Microsoft::WRL::ComPtr<ISampleGrabber> sampleGrabber;
Microsoft::WRL::ComPtr<IBaseFilter> nullRenderer;
Microsoft::WRL::ComPtr<IMediaControl> mediaControl;
bool comInitialized = false;
bool running = false;
};
DirectShowWebcamCapture::~DirectShowWebcamCapture() {
stop();
delete impl_;
}
bool DirectShowWebcamCapture::initialize(
const std::wstring& deviceId,
const std::wstring& deviceName,
const std::wstring& directShowClsid,
int requestedWidth,
int requestedHeight,
int requestedFps) {
stop();
delete impl_;
impl_ = new Impl();
fps_ = std::clamp(requestedFps > 0 ? requestedFps : 30, 1, 60);
HRESULT hr = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
if (SUCCEEDED(hr)) {
impl_->comInitialized = true;
} else if (hr != RPC_E_CHANGED_MODE) {
return succeeded(hr, "CoInitializeEx(DirectShow webcam)");
}
if (directShowClsid.empty()) {
std::cerr << "ERROR: DirectShow webcam fallback requires a resolved filter CLSID" << std::endl;
return false;
}
CLSID selectedClsid{};
if (FAILED(CLSIDFromString(directShowClsid.c_str(), &selectedClsid))) {
std::cerr << "ERROR: DirectShow webcam fallback received an invalid filter CLSID" << std::endl;
return false;
}
selectedDeviceName_ = deviceName.empty() ? directShowClsid : deviceName;
if (!succeeded(CoCreateInstance(selectedClsid, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->captureFilter)),
"CoCreateInstance(DirectShow webcam filter)")) {
return false;
}
if (!succeeded(CoCreateInstance(CLSID_FilterGraph, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->graph)),
"CoCreateInstance(FilterGraph)")) {
return false;
}
if (!succeeded(CoCreateInstance(CLSID_CaptureGraphBuilder2, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->captureGraph)),
"CoCreateInstance(CaptureGraphBuilder2)")) {
return false;
}
if (!succeeded(impl_->captureGraph->SetFiltergraph(impl_->graph.Get()), "SetFiltergraph(DirectShow webcam)")) {
return false;
}
if (!succeeded(impl_->graph->AddFilter(impl_->captureFilter.Get(), L"OpenScreen Webcam Source"),
"AddFilter(DirectShow webcam source)")) {
return false;
}
if (!succeeded(CoCreateInstance(CLSID_SampleGrabberLocal, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->sampleGrabberFilter)),
"CoCreateInstance(SampleGrabber)")) {
return false;
}
if (!succeeded(impl_->sampleGrabberFilter.As(&impl_->sampleGrabber), "QueryInterface(ISampleGrabber)")) {
return false;
}
AM_MEDIA_TYPE requestedType{};
requestedType.majortype = MEDIATYPE_Video;
requestedType.subtype = MEDIASUBTYPE_RGB32;
requestedType.formattype = FORMAT_VideoInfo;
if (!succeeded(impl_->sampleGrabber->SetMediaType(&requestedType), "SetMediaType(DirectShow RGB32)")) {
return false;
}
if (!succeeded(impl_->graph->AddFilter(impl_->sampleGrabberFilter.Get(), L"OpenScreen Webcam Sample Grabber"),
"AddFilter(SampleGrabber)")) {
return false;
}
if (!succeeded(CoCreateInstance(CLSID_NullRendererLocal, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->nullRenderer)),
"CoCreateInstance(NullRenderer)")) {
return false;
}
if (!succeeded(impl_->graph->AddFilter(impl_->nullRenderer.Get(), L"OpenScreen Webcam Null Renderer"),
"AddFilter(NullRenderer)")) {
return false;
}
if (!succeeded(impl_->captureGraph->RenderStream(
&PIN_CATEGORY_CAPTURE,
&MEDIATYPE_Video,
impl_->captureFilter.Get(),
impl_->sampleGrabberFilter.Get(),
impl_->nullRenderer.Get()),
"RenderStream(DirectShow webcam)")) {
return false;
}
AM_MEDIA_TYPE connectedType{};
if (!succeeded(impl_->sampleGrabber->GetConnectedMediaType(&connectedType), "GetConnectedMediaType(DirectShow webcam)")) {
return false;
}
if (connectedType.formattype == FORMAT_VideoInfo && connectedType.pbFormat) {
const auto* videoInfo = reinterpret_cast<VIDEOINFOHEADER*>(connectedType.pbFormat);
width_ = std::abs(videoInfo->bmiHeader.biWidth);
height_ = std::abs(videoInfo->bmiHeader.biHeight);
sourceTopDown_ = videoInfo->bmiHeader.biHeight < 0;
}
freeMediaType(connectedType);
if (width_ <= 0 || height_ <= 0) {
width_ = requestedWidth > 0 ? requestedWidth : 1280;
height_ = requestedHeight > 0 ? requestedHeight : 720;
}
impl_->sampleGrabber->SetBufferSamples(TRUE);
impl_->sampleGrabber->SetOneShot(FALSE);
if (!succeeded(impl_->graph.As(&impl_->mediaControl), "QueryInterface(IMediaControl)")) {
return false;
}
return true;
}
bool DirectShowWebcamCapture::start() {
if (!impl_ || !impl_->mediaControl || impl_->running) {
return false;
}
HRESULT hr = impl_->mediaControl->Run();
if (!succeeded(hr, "Run(DirectShow webcam)")) {
return false;
}
impl_->running = true;
stopRequested_ = false;
thread_ = std::thread(&DirectShowWebcamCapture::captureLoop, this);
return true;
}
void DirectShowWebcamCapture::stop() {
stopRequested_ = true;
if (thread_.joinable()) {
thread_.join();
}
if (!impl_) {
return;
}
if (impl_->mediaControl && impl_->running) {
impl_->mediaControl->Stop();
}
impl_->running = false;
impl_->mediaControl.Reset();
impl_->nullRenderer.Reset();
impl_->sampleGrabber.Reset();
impl_->sampleGrabberFilter.Reset();
impl_->captureFilter.Reset();
impl_->captureGraph.Reset();
impl_->graph.Reset();
if (impl_->comInitialized) {
CoUninitialize();
impl_->comInitialized = false;
}
}
void DirectShowWebcamCapture::captureLoop() {
CoInitializeEx(nullptr, COINIT_MULTITHREADED);
while (!stopRequested_ && impl_ && impl_->sampleGrabber) {
long bufferSize = 0;
HRESULT hr = impl_->sampleGrabber->GetCurrentBuffer(&bufferSize, nullptr);
if (SUCCEEDED(hr) && bufferSize > 0) {
std::vector<BYTE> buffer(static_cast<size_t>(bufferSize));
hr = impl_->sampleGrabber->GetCurrentBuffer(&bufferSize, reinterpret_cast<long*>(buffer.data()));
if (SUCCEEDED(hr)) {
storeFrame(buffer.data(), bufferSize);
}
}
std::this_thread::sleep_for(std::chrono::milliseconds(1000 / std::max(1, fps_)));
}
CoUninitialize();
}
void DirectShowWebcamCapture::storeFrame(const BYTE* buffer, long length) {
const int stride = width_ * 4;
const int expectedLength = stride * height_;
if (!buffer || length < expectedLength || width_ <= 0 || height_ <= 0) {
return;
}
std::vector<BYTE> frame(static_cast<size_t>(expectedLength));
for (int y = 0; y < height_; y += 1) {
const int sourceY = sourceTopDown_ ? y : height_ - 1 - y;
const BYTE* source = buffer + sourceY * stride;
BYTE* destination = frame.data() + y * stride;
std::copy(source, source + stride, destination);
for (int x = 0; x < width_; x += 1) {
destination[x * 4 + 3] = 255;
}
}
std::scoped_lock lock(frameMutex_);
latestFrame_ = std::move(frame);
}
bool DirectShowWebcamCapture::copyLatestFrame(std::vector<BYTE>& destination, int& width, int& height) {
std::scoped_lock lock(frameMutex_);
if (latestFrame_.empty() || width_ <= 0 || height_ <= 0) {
return false;
}
destination = latestFrame_;
width = width_;
height = height_;
return true;
}
int DirectShowWebcamCapture::width() const {
return width_;
}
int DirectShowWebcamCapture::height() const {
return height_;
}
int DirectShowWebcamCapture::fps() const {
return fps_;
}
const std::wstring& DirectShowWebcamCapture::selectedDeviceName() const {
return selectedDeviceName_;
}
@@ -0,0 +1,50 @@
#pragma once
#include <Windows.h>
#include <atomic>
#include <mutex>
#include <string>
#include <thread>
#include <vector>
class DirectShowWebcamCapture {
public:
DirectShowWebcamCapture() = default;
~DirectShowWebcamCapture();
DirectShowWebcamCapture(const DirectShowWebcamCapture&) = delete;
DirectShowWebcamCapture& operator=(const DirectShowWebcamCapture&) = delete;
bool initialize(
const std::wstring& deviceId,
const std::wstring& deviceName,
const std::wstring& directShowClsid,
int requestedWidth,
int requestedHeight,
int requestedFps);
bool start();
void stop();
bool copyLatestFrame(std::vector<BYTE>& destination, int& width, int& height);
int width() const;
int height() const;
int fps() const;
const std::wstring& selectedDeviceName() const;
void storeFrame(const BYTE* buffer, long length);
private:
struct Impl;
void captureLoop();
Impl* impl_ = nullptr;
std::thread thread_;
std::atomic<bool> stopRequested_ = false;
std::mutex frameMutex_;
std::vector<BYTE> latestFrame_;
int width_ = 0;
int height_ = 0;
int fps_ = 30;
bool sourceTopDown_ = false;
std::wstring selectedDeviceName_;
};
+3
View File
@@ -41,6 +41,7 @@ struct CaptureConfig {
double microphoneGain = 1.0;
std::string webcamDeviceId;
std::string webcamDeviceName;
std::string webcamDirectShowClsid;
int webcamWidth = 0;
int webcamHeight = 0;
int webcamFps = 0;
@@ -280,6 +281,7 @@ bool parseConfig(const std::string& json, CaptureConfig& config) {
config.microphoneGain = findDouble(json, "microphoneGain", 1.0);
config.webcamDeviceId = findString(json, "webcamDeviceId");
config.webcamDeviceName = findString(json, "webcamDeviceName");
config.webcamDirectShowClsid = findString(json, "webcamDirectShowClsid");
config.webcamWidth = findInt(json, "webcamWidth", 0);
config.webcamHeight = findInt(json, "webcamHeight", 0);
config.webcamFps = findInt(json, "webcamFps", 0);
@@ -362,6 +364,7 @@ int main(int argc, char* argv[]) {
if (!webcamCapture.initialize(
utf8ToWide(config.webcamDeviceId),
utf8ToWide(config.webcamDeviceName),
utf8ToWide(config.webcamDirectShowClsid),
config.webcamWidth,
config.webcamHeight,
config.webcamFps > 0 ? config.webcamFps : config.fps)) {
@@ -6,6 +6,7 @@
#include <algorithm>
#include <chrono>
#include <cwctype>
#include <iostream>
namespace {
@@ -45,6 +46,89 @@ bool containsInsensitive(const std::wstring& haystack, const std::wstring& needl
lowerNeedle.find(lowerHaystack) != std::wstring::npos;
}
std::wstring normalizeDeviceName(const std::wstring& value) {
std::wstring normalized;
normalized.reserve(value.size());
bool lastWasSpace = true;
for (const wchar_t ch : value) {
if (std::iswalnum(ch)) {
normalized.push_back(static_cast<wchar_t>(std::towlower(ch)));
lastWasSpace = false;
continue;
}
if (!lastWasSpace) {
normalized.push_back(L' ');
lastWasSpace = true;
}
}
while (!normalized.empty() && normalized.back() == L' ') {
normalized.pop_back();
}
return normalized;
}
std::vector<std::wstring> splitWords(const std::wstring& value) {
std::vector<std::wstring> words;
size_t start = 0;
while (start < value.size()) {
const size_t end = value.find(L' ', start);
const auto word = value.substr(start, end == std::wstring::npos ? std::wstring::npos : end - start);
if (word.size() > 1 && word != L"camera" && word != L"webcam" && word != L"video" && word != L"input") {
words.push_back(word);
}
if (end == std::wstring::npos) {
break;
}
start = end + 1;
}
return words;
}
int deviceMatchScore(
const std::wstring& candidateName,
const std::wstring& candidateLink,
const std::wstring& requestedName,
const std::wstring& requestedId) {
int score = 0;
const auto normalizedName = normalizeDeviceName(candidateName);
const auto normalizedLink = normalizeDeviceName(candidateLink);
const auto normalizedRequestedName = normalizeDeviceName(requestedName);
const auto normalizedRequestedId = normalizeDeviceName(requestedId);
if (!normalizedRequestedName.empty()) {
if (normalizedName == normalizedRequestedName) {
score = std::max(score, 1000);
}
if (containsInsensitive(normalizedName, normalizedRequestedName)) {
score = std::max(score, 900);
}
if (containsInsensitive(normalizedLink, normalizedRequestedName)) {
score = std::max(score, 800);
}
int wordScore = 0;
for (const auto& word : splitWords(normalizedRequestedName)) {
if (normalizedName.find(word) != std::wstring::npos) {
wordScore += 100;
} else if (normalizedLink.find(word) != std::wstring::npos) {
wordScore += 50;
}
}
score = std::max(score, wordScore);
}
if (!normalizedRequestedId.empty()) {
if (containsInsensitive(normalizedLink, normalizedRequestedId)) {
score = std::max(score, 700);
}
if (containsInsensitive(normalizedName, normalizedRequestedId)) {
score = std::max(score, 600);
}
}
return score;
}
} // namespace
WebcamCapture::~WebcamCapture() {
@@ -54,15 +138,49 @@ WebcamCapture::~WebcamCapture() {
bool WebcamCapture::initialize(
const std::wstring& deviceId,
const std::wstring& deviceName,
const std::wstring& directShowClsid,
int requestedWidth,
int requestedHeight,
int requestedFps) {
fps_ = std::clamp(requestedFps > 0 ? requestedFps : 30, 1, 60);
usingDirectShow_ = false;
selectedMatchScore_ = 0;
if (!succeeded(MFStartup(MF_VERSION), "MFStartup(webcam)")) {
if (directShowCapture_.initialize(deviceId, deviceName, directShowClsid, requestedWidth, requestedHeight, fps_)) {
usingDirectShow_ = true;
return true;
}
return false;
}
mfStarted_ = true;
if (!selectDevice(deviceId, deviceName)) {
if (mfStarted_) {
MFShutdown();
mfStarted_ = false;
}
if (directShowCapture_.initialize(deviceId, deviceName, directShowClsid, requestedWidth, requestedHeight, fps_)) {
usingDirectShow_ = true;
return true;
}
return false;
}
if ((!deviceId.empty() || !deviceName.empty()) && selectedMatchScore_ <= 0) {
if (mediaSource_) {
mediaSource_->Shutdown();
}
sourceReader_.Reset();
mediaSource_.Reset();
if (mfStarted_) {
MFShutdown();
mfStarted_ = false;
}
if (directShowCapture_.initialize(deviceId, deviceName, directShowClsid, requestedWidth, requestedHeight, fps_)) {
usingDirectShow_ = true;
return true;
}
std::cerr << "ERROR: Requested webcam device was not found by native Windows webcam providers"
<< std::endl;
return false;
}
@@ -93,34 +211,24 @@ bool WebcamCapture::selectDevice(const std::wstring& deviceId, const std::wstrin
}
UINT32 selectedIndex = 0;
bool matched = false;
auto matchesRequestedDevice = [&](const std::wstring& name, const std::wstring& symbolicLink) {
if (!deviceName.empty() &&
(containsInsensitive(name, deviceName) || containsInsensitive(symbolicLink, deviceName))) {
return true;
}
if (!deviceId.empty() &&
(containsInsensitive(symbolicLink, deviceId) || containsInsensitive(name, deviceId))) {
return true;
}
return false;
};
int bestScore = 0;
for (UINT32 index = 0; index < deviceCount; index += 1) {
const std::wstring name = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME);
const std::wstring symbolicLink = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_SYMBOLIC_LINK);
if (matchesRequestedDevice(name, symbolicLink)) {
const int score = deviceMatchScore(name, symbolicLink, deviceName, deviceId);
std::wcerr << L"INFO: Native webcam candidate [" << index << L"] name=\"" << name << L"\" score=" << score << std::endl;
if (score > bestScore) {
selectedIndex = index;
matched = true;
break;
bestScore = score;
}
}
if ((!deviceId.empty() || !deviceName.empty()) && !matched) {
std::cerr << "WARNING: Requested webcam device was not found by Media Foundation; using default webcam"
if ((!deviceId.empty() || !deviceName.empty()) && bestScore <= 0) {
std::cerr << "WARNING: Requested webcam device was not found by Media Foundation; trying DirectShow"
<< std::endl;
}
selectedMatchScore_ = bestScore;
selectedDeviceName_ = readAllocatedString(devices[selectedIndex], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME);
hr = devices[selectedIndex]->ActivateObject(IID_PPV_ARGS(&mediaSource_));
@@ -181,6 +289,9 @@ bool WebcamCapture::configureReader(int requestedWidth, int requestedHeight, int
}
bool WebcamCapture::start() {
if (usingDirectShow_) {
return directShowCapture_.start();
}
if (!sourceReader_ || thread_.joinable()) {
return false;
}
@@ -191,6 +302,7 @@ bool WebcamCapture::start() {
}
void WebcamCapture::stop() {
directShowCapture_.stop();
stopRequested_ = true;
if (thread_.joinable()) {
thread_.join();
@@ -262,6 +374,9 @@ void WebcamCapture::captureLoop() {
}
bool WebcamCapture::copyLatestFrame(std::vector<BYTE>& destination, int& width, int& height) {
if (usingDirectShow_) {
return directShowCapture_.copyLatestFrame(destination, width, height);
}
std::scoped_lock lock(frameMutex_);
if (latestFrame_.empty() || width_ <= 0 || height_ <= 0) {
return false;
@@ -274,17 +389,29 @@ bool WebcamCapture::copyLatestFrame(std::vector<BYTE>& destination, int& width,
}
int WebcamCapture::width() const {
if (usingDirectShow_) {
return directShowCapture_.width();
}
return width_;
}
int WebcamCapture::height() const {
if (usingDirectShow_) {
return directShowCapture_.height();
}
return height_;
}
int WebcamCapture::fps() const {
if (usingDirectShow_) {
return directShowCapture_.fps();
}
return fps_;
}
const std::wstring& WebcamCapture::selectedDeviceName() const {
if (usingDirectShow_) {
return directShowCapture_.selectedDeviceName();
}
return selectedDeviceName_;
}
@@ -1,5 +1,7 @@
#pragma once
#include "dshow_webcam_capture.h"
#include <Windows.h>
#include <mfidl.h>
#include <mfreadwrite.h>
@@ -23,6 +25,7 @@ public:
bool initialize(
const std::wstring& deviceId,
const std::wstring& deviceName,
const std::wstring& directShowClsid,
int requestedWidth,
int requestedHeight,
int requestedFps);
@@ -42,6 +45,7 @@ private:
Microsoft::WRL::ComPtr<IMFMediaSource> mediaSource_;
Microsoft::WRL::ComPtr<IMFSourceReader> sourceReader_;
DirectShowWebcamCapture directShowCapture_;
std::thread thread_;
std::atomic<bool> stopRequested_ = false;
std::mutex frameMutex_;
@@ -50,5 +54,7 @@ private:
int height_ = 0;
int fps_ = 30;
bool mfStarted_ = false;
bool usingDirectShow_ = false;
int selectedMatchScore_ = 0;
std::wstring selectedDeviceName_;
};