fix: honor selected native Windows webcam

This commit is contained in:
EtienneLescot
2026-05-05 18:06:43 +02:00
parent fb85f66875
commit fdcd882058
10 changed files with 85 additions and 18 deletions
@@ -71,6 +71,7 @@ The helper receives a single JSON argument:
"webcam": {
"enabled": true,
"deviceId": "default",
"deviceName": "Camera (NVIDIA Broadcast)",
"width": 1280,
"height": 720,
"fps": 30,
@@ -133,6 +134,7 @@ SSOT rules for this phase:
- `docs/engineering/windows-native-recorder-roadmap.md` is the feature-level contract and phase checklist.
- `WgcSession::captureWidth()/captureHeight()` is the encoded screen frame size until a dedicated native scaling stage exists.
- `WasapiLoopbackCapture::inputFormat()` is the runtime audio format source used by `MFEncoder`.
- The renderer passes both the browser webcam `deviceId` and selected display label as `deviceName`; `electron/native/wgc-capture/src/webcam_capture.*` is the only place that maps those values to Media Foundation devices.
- No duplicated hard-coded audio format assumptions in `main.cpp`.
### 3. WASAPI Microphone
@@ -155,6 +157,7 @@ Acceptance:
- Select requested dimensions/fps or the nearest format accepted by Media Foundation.
- Convert webcam samples to BGRA and compose them into the primary helper MP4 as an initial bottom-right picture-in-picture overlay.
- Keep the helper process as the SSOT for screen/window, WASAPI system audio, microphone, webcam, and mux timing.
- Match the requested webcam through Media Foundation friendly names first, then browser device ids/symbolic links, so UI selection remains stable across Chromium and Windows native device namespaces.
- Later: promote the same webcam capture source to a separate editable native `webcamVideoPath` if product requirements need post-recording layout edits.
Acceptance:
+1
View File
@@ -888,6 +888,7 @@ export function registerIpcHandlers(
microphoneGain: request.audio.microphone.gain,
webcamEnabled: request.webcam.enabled,
webcamDeviceId: request.webcam.deviceId ?? null,
webcamDeviceName: request.webcam.deviceName ?? null,
webcamWidth: request.webcam.width,
webcamHeight: request.webcam.height,
webcamFps: request.webcam.fps,
+10 -1
View File
@@ -37,6 +37,7 @@ Current V2 JSON shape:
"microphoneGain": 1.4,
"webcamEnabled": true,
"webcamDeviceId": "default",
"webcamDeviceName": "Camera (NVIDIA Broadcast)",
"webcamWidth": 1280,
"webcamHeight": 720,
"webcamFps": 30,
@@ -46,7 +47,7 @@ Current V2 JSON shape:
}
```
The current helper implementation supports display/window video capture, system audio loopback, default-microphone capture, and Media Foundation webcam capture. Webcam frames are currently composed into the primary MP4 as a bottom-right picture-in-picture overlay. Browser `deviceId` values do not always map to Media Foundation symbolic links; when the requested webcam is not matched, the helper logs a warning and uses the default webcam.
The current helper implementation supports display/window video capture, system audio loopback, default-microphone capture, and Media Foundation webcam capture. Webcam frames are currently composed into the primary MP4 as a bottom-right picture-in-picture overlay. Browser `deviceId` values do not always map to Media Foundation symbolic links, so the renderer passes both `webcamDeviceId` and `webcamDeviceName`. The helper treats the Media Foundation friendly name as the preferred stable selector, then tries the browser id, and only falls back to the default webcam with an explicit warning when no requested device matches.
Smoke-test the helper with:
@@ -58,3 +59,11 @@ npm run test:wgc-mic:win
npm run test:wgc-mixed-audio:win
npm run test:wgc-webcam:win
```
To validate a specific native webcam manually:
```powershell
$env:OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_NAME = "NVIDIA Broadcast"
npm run test:wgc-webcam:win
Remove-Item Env:OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_NAME
```
+17 -1
View File
@@ -40,6 +40,7 @@ struct CaptureConfig {
std::string microphoneDeviceId;
double microphoneGain = 1.0;
std::string webcamDeviceId;
std::string webcamDeviceName;
int webcamWidth = 0;
int webcamHeight = 0;
int webcamFps = 0;
@@ -56,6 +57,17 @@ std::wstring utf8ToWide(const std::string& value) {
return result;
}
std::string wideToUtf8(const std::wstring& value) {
if (value.empty()) {
return {};
}
const int size = WideCharToMultiByte(CP_UTF8, 0, value.data(), static_cast<int>(value.size()), nullptr, 0, nullptr, nullptr);
std::string result(static_cast<size_t>(size), '\0');
WideCharToMultiByte(CP_UTF8, 0, value.data(), static_cast<int>(value.size()), result.data(), size, nullptr, nullptr);
return result;
}
std::string jsonEscape(const std::string& value) {
std::string result;
result.reserve(value.size());
@@ -267,6 +279,7 @@ bool parseConfig(const std::string& json, CaptureConfig& config) {
config.microphoneDeviceId = findString(json, "microphoneDeviceId");
config.microphoneGain = findDouble(json, "microphoneGain", 1.0);
config.webcamDeviceId = findString(json, "webcamDeviceId");
config.webcamDeviceName = findString(json, "webcamDeviceName");
config.webcamWidth = findInt(json, "webcamWidth", 0);
config.webcamHeight = findInt(json, "webcamHeight", 0);
config.webcamFps = findInt(json, "webcamFps", 0);
@@ -348,6 +361,7 @@ int main(int argc, char* argv[]) {
if (config.webcamEnabled) {
if (!webcamCapture.initialize(
utf8ToWide(config.webcamDeviceId),
utf8ToWide(config.webcamDeviceName),
config.webcamWidth,
config.webcamHeight,
config.webcamFps > 0 ? config.webcamFps : config.fps)) {
@@ -356,7 +370,9 @@ int main(int argc, char* argv[]) {
}
std::cout << "{\"event\":\"webcam-format\",\"schemaVersion\":2,\"width\":" << webcamCapture.width()
<< ",\"height\":" << webcamCapture.height()
<< ",\"fps\":" << webcamCapture.fps() << "}" << std::endl;
<< ",\"fps\":" << webcamCapture.fps()
<< ",\"deviceName\":\"" << jsonEscape(wideToUtf8(webcamCapture.selectedDeviceName()))
<< "\"}" << std::endl;
}
WasapiLoopbackCapture loopbackCapture;
@@ -51,20 +51,25 @@ WebcamCapture::~WebcamCapture() {
stop();
}
bool WebcamCapture::initialize(const std::wstring& deviceId, int requestedWidth, int requestedHeight, int requestedFps) {
bool WebcamCapture::initialize(
const std::wstring& deviceId,
const std::wstring& deviceName,
int requestedWidth,
int requestedHeight,
int requestedFps) {
fps_ = std::clamp(requestedFps > 0 ? requestedFps : 30, 1, 60);
if (!succeeded(MFStartup(MF_VERSION), "MFStartup(webcam)")) {
return false;
}
mfStarted_ = true;
if (!selectDevice(deviceId)) {
if (!selectDevice(deviceId, deviceName)) {
return false;
}
return configureReader(requestedWidth, requestedHeight, fps_);
}
bool WebcamCapture::selectDevice(const std::wstring& deviceId) {
bool WebcamCapture::selectDevice(const std::wstring& deviceId, const std::wstring& deviceName) {
Microsoft::WRL::ComPtr<IMFAttributes> attributes;
if (!succeeded(MFCreateAttributes(&attributes, 1), "MFCreateAttributes(webcam enumeration)")) {
return false;
@@ -88,22 +93,32 @@ bool WebcamCapture::selectDevice(const std::wstring& deviceId) {
}
UINT32 selectedIndex = 0;
bool matched = false;
auto matchesRequestedDevice = [&](const std::wstring& name, const std::wstring& symbolicLink) {
if (!deviceName.empty() &&
(containsInsensitive(name, deviceName) || containsInsensitive(symbolicLink, deviceName))) {
return true;
}
if (!deviceId.empty() &&
(containsInsensitive(symbolicLink, deviceId) || containsInsensitive(name, deviceId))) {
return true;
}
return false;
};
for (UINT32 index = 0; index < deviceCount; index += 1) {
const std::wstring name = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME);
const std::wstring symbolicLink = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_SYMBOLIC_LINK);
if (!deviceId.empty() && (containsInsensitive(symbolicLink, deviceId) || containsInsensitive(name, deviceId))) {
if (matchesRequestedDevice(name, symbolicLink)) {
selectedIndex = index;
matched = true;
break;
}
}
if (!deviceId.empty() && selectedIndex == 0) {
const std::wstring firstName = readAllocatedString(devices[0], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME);
const std::wstring firstLink = readAllocatedString(devices[0], MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_SYMBOLIC_LINK);
if (!containsInsensitive(firstLink, deviceId) && !containsInsensitive(firstName, deviceId)) {
std::cerr << "WARNING: Requested webcam device was not found by Media Foundation; using default webcam"
<< std::endl;
}
if ((!deviceId.empty() || !deviceName.empty()) && !matched) {
std::cerr << "WARNING: Requested webcam device was not found by Media Foundation; using default webcam"
<< std::endl;
}
selectedDeviceName_ = readAllocatedString(devices[selectedIndex], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME);
@@ -20,7 +20,12 @@ public:
WebcamCapture(const WebcamCapture&) = delete;
WebcamCapture& operator=(const WebcamCapture&) = delete;
bool initialize(const std::wstring& deviceId, int requestedWidth, int requestedHeight, int requestedFps);
bool initialize(
const std::wstring& deviceId,
const std::wstring& deviceName,
int requestedWidth,
int requestedHeight,
int requestedFps);
bool start();
void stop();
bool copyLatestFrame(std::vector<BYTE>& destination, int& width, int& height);
@@ -31,7 +36,7 @@ public:
const std::wstring& selectedDeviceName() const;
private:
bool selectDevice(const std::wstring& deviceId);
bool selectDevice(const std::wstring& deviceId, const std::wstring& deviceName);
bool configureReader(int requestedWidth, int requestedHeight, int requestedFps);
void captureLoop();
+1
View File
@@ -190,6 +190,7 @@ const config = {
microphoneGain: 1.4,
webcamEnabled: WITH_WEBCAM,
webcamDeviceId: process.env.OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_ID ?? "",
webcamDeviceName: process.env.OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_NAME ?? "",
webcamWidth: 640,
webcamHeight: 360,
webcamFps: 30,
+13 -3
View File
@@ -108,6 +108,7 @@ export function LaunchWindow() {
setWebcamEnabled,
webcamDeviceId,
setWebcamDeviceId,
setWebcamDeviceName,
} = useScreenRecorder();
const showMicControls = microphoneEnabled && !recording;
@@ -149,14 +150,16 @@ export function LaunchWindow() {
const selectedMicLabel =
micDevices.find((d) => d.deviceId === (microphoneDeviceId || selectedMicId))?.label ||
t("audio.defaultMicrophone");
const selectedCameraDevice = cameraDevices.find(
(d) => d.deviceId === (webcamDeviceId || selectedCameraId),
);
const selectedCameraLabel = isCameraDevicesLoading
? t("webcam.searching")
: cameraDevicesError
? t("webcam.unavailable")
: cameraDevices.length === 0
? t("webcam.noneFound")
: cameraDevices.find((d) => d.deviceId === (webcamDeviceId || selectedCameraId))?.label ||
t("webcam.defaultCamera");
: selectedCameraDevice?.label || t("webcam.defaultCamera");
const { level } = useAudioLevelMeter({
enabled: showMicControls,
@@ -172,8 +175,9 @@ export function LaunchWindow() {
useEffect(() => {
if (selectedCameraId) {
setWebcamDeviceId(selectedCameraId);
setWebcamDeviceName(cameraDevices.find((d) => d.deviceId === selectedCameraId)?.label);
}
}, [selectedCameraId, setWebcamDeviceId]);
}, [selectedCameraId, cameraDevices, setWebcamDeviceId, setWebcamDeviceName]);
useEffect(() => {
if (!import.meta.env.DEV) {
@@ -458,8 +462,12 @@ export function LaunchWindow() {
<select
value={webcamDeviceId || selectedCameraId}
onChange={(e) => {
const device = cameraDevices.find(
(item) => item.deviceId === e.target.value,
);
setSelectedCameraId(e.target.value);
setWebcamDeviceId(e.target.value);
setWebcamDeviceName(device?.label);
}}
className="w-full appearance-none bg-white/5 text-white text-[11px] rounded-lg pl-2 pr-6 py-1 border border-white/10 outline-none hover:bg-white/10 transition-colors cursor-pointer"
>
@@ -483,8 +491,10 @@ export function LaunchWindow() {
<select
value={webcamDeviceId || selectedCameraId}
onChange={(e) => {
const device = cameraDevices.find((item) => item.deviceId === e.target.value);
setSelectedCameraId(e.target.value);
setWebcamDeviceId(e.target.value);
setWebcamDeviceName(device?.label);
}}
className="sr-only"
>
+6
View File
@@ -55,6 +55,8 @@ type UseScreenRecorderReturn = {
setMicrophoneDeviceId: (deviceId: string | undefined) => void;
webcamDeviceId: string | undefined;
setWebcamDeviceId: (deviceId: string | undefined) => void;
webcamDeviceName: string | undefined;
setWebcamDeviceName: (deviceName: string | undefined) => void;
systemAudioEnabled: boolean;
setSystemAudioEnabled: (enabled: boolean) => void;
webcamEnabled: boolean;
@@ -101,6 +103,7 @@ export function useScreenRecorder(): UseScreenRecorderReturn {
const [microphoneEnabled, setMicrophoneEnabled] = useState(false);
const [microphoneDeviceId, setMicrophoneDeviceId] = useState<string | undefined>(undefined);
const [webcamDeviceId, setWebcamDeviceId] = useState<string | undefined>(undefined);
const [webcamDeviceName, setWebcamDeviceName] = useState<string | undefined>(undefined);
const [systemAudioEnabled, setSystemAudioEnabled] = useState(false);
const [webcamEnabled, setWebcamEnabledState] = useState(false);
const screenRecorder = useRef<RecorderHandle | null>(null);
@@ -620,6 +623,7 @@ export function useScreenRecorder(): UseScreenRecorderReturn {
webcam: {
enabled: webcamEnabled,
deviceId: webcamDeviceId,
deviceName: webcamDeviceName,
width: WEBCAM_TARGET_WIDTH,
height: WEBCAM_TARGET_HEIGHT,
fps: WEBCAM_TARGET_FRAME_RATE,
@@ -1123,6 +1127,8 @@ export function useScreenRecorder(): UseScreenRecorderReturn {
setMicrophoneDeviceId,
webcamDeviceId,
setWebcamDeviceId,
webcamDeviceName,
setWebcamDeviceName,
systemAudioEnabled,
setSystemAudioEnabled,
webcamEnabled,
+1
View File
@@ -26,6 +26,7 @@ export type NativeWindowsRecordingRequest = {
webcam: {
enabled: boolean;
deviceId?: string;
deviceName?: string;
width: number;
height: number;
fps: number;