fix: resolve selected Windows microphone
This commit is contained in:
@@ -34,6 +34,7 @@ Current V2 JSON shape:
|
||||
"captureSystemAudio": false,
|
||||
"captureMic": false,
|
||||
"microphoneDeviceId": "default",
|
||||
"microphoneDeviceName": "Microphone (NVIDIA Broadcast)",
|
||||
"microphoneGain": 1.4,
|
||||
"webcamEnabled": true,
|
||||
"webcamDeviceId": "default",
|
||||
@@ -47,7 +48,7 @@ Current V2 JSON shape:
|
||||
}
|
||||
```
|
||||
|
||||
The current helper implementation supports display/window video capture, system audio loopback, default-microphone capture, Media Foundation webcam capture, and a DirectShow webcam fallback for virtual cameras that are not exposed through Media Foundation. Webcam frames are currently composed into the primary MP4 as a bottom-right picture-in-picture overlay. Browser `deviceId` values do not always map to Media Foundation symbolic links, so the renderer passes both `webcamDeviceId` and `webcamDeviceName`. Electron resolves a matching DirectShow filter CLSID for the selected label; the helper uses Media Foundation first, then that exact DirectShow filter when the requested camera is absent from Media Foundation.
|
||||
The current helper implementation supports display/window video capture, system audio loopback, selected-microphone capture, Media Foundation webcam capture, and a DirectShow webcam fallback for virtual cameras that are not exposed through Media Foundation. Webcam frames are currently composed into the primary MP4 as a bottom-right picture-in-picture overlay. Browser `deviceId` values do not always map to Media Foundation symbolic links or WASAPI endpoint IDs, so the renderer passes both browser IDs and user-visible device names. For microphones, the helper tries the requested WASAPI endpoint ID first, then resolves an active capture endpoint by `microphoneDeviceName`, then falls back to the default endpoint. For webcams, Electron resolves a matching DirectShow filter CLSID for the selected label; the helper uses Media Foundation first, then that exact DirectShow filter when the requested camera is absent from Media Foundation.
|
||||
|
||||
Smoke-test the helper with:
|
||||
|
||||
@@ -67,3 +68,11 @@ $env:OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_NAME = "NVIDIA Broadcast"
|
||||
npm run test:wgc-webcam:win
|
||||
Remove-Item Env:OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_NAME
|
||||
```
|
||||
|
||||
To validate a specific native microphone manually:
|
||||
|
||||
```powershell
|
||||
$env:OPENSCREEN_WGC_TEST_MICROPHONE_DEVICE_NAME = "Microphone (NVIDIA Broadcast)"
|
||||
npm run test:wgc-mic:win
|
||||
Remove-Item Env:OPENSCREEN_WGC_TEST_MICROPHONE_DEVICE_NAME
|
||||
```
|
||||
|
||||
@@ -25,6 +25,68 @@ T clampTo(double value) {
|
||||
return static_cast<T>(std::clamp(std::round(value), minValue, maxValue));
|
||||
}
|
||||
|
||||
size_t bytesPerSample(const AudioInputFormat& format) {
|
||||
return format.bitsPerSample / 8;
|
||||
}
|
||||
|
||||
double readSampleAsDouble(const BYTE* source, const AudioInputFormat& format, size_t frameIndex, UINT32 channelIndex) {
|
||||
if (!source || format.blockAlign == 0 || channelIndex >= format.channels) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
const size_t offset = frameIndex * format.blockAlign + channelIndex * bytesPerSample(format);
|
||||
if (isFloatFormat(format)) {
|
||||
return static_cast<double>(*reinterpret_cast<const float*>(source + offset));
|
||||
}
|
||||
if (isPcmFormat(format, 16)) {
|
||||
return static_cast<double>(*reinterpret_cast<const int16_t*>(source + offset)) / 32768.0;
|
||||
}
|
||||
if (isPcmFormat(format, 32)) {
|
||||
return static_cast<double>(*reinterpret_cast<const int32_t*>(source + offset)) / 2147483648.0;
|
||||
}
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
void writeSampleFromDouble(BYTE* destination, const AudioInputFormat& format, size_t frameIndex, UINT32 channelIndex, double value) {
|
||||
if (!destination || format.blockAlign == 0 || channelIndex >= format.channels) {
|
||||
return;
|
||||
}
|
||||
|
||||
const double clamped = std::clamp(value, -1.0, 1.0);
|
||||
const size_t offset = frameIndex * format.blockAlign + channelIndex * bytesPerSample(format);
|
||||
if (isFloatFormat(format)) {
|
||||
*reinterpret_cast<float*>(destination + offset) = static_cast<float>(clamped);
|
||||
return;
|
||||
}
|
||||
if (isPcmFormat(format, 16)) {
|
||||
*reinterpret_cast<int16_t*>(destination + offset) = clampTo<int16_t>(clamped * 32767.0);
|
||||
return;
|
||||
}
|
||||
if (isPcmFormat(format, 32)) {
|
||||
*reinterpret_cast<int32_t*>(destination + offset) = clampTo<int32_t>(clamped * 2147483647.0);
|
||||
}
|
||||
}
|
||||
|
||||
double readMappedChannel(const BYTE* source, const AudioInputFormat& format, size_t frameIndex, UINT32 targetChannel, UINT32 targetChannels) {
|
||||
if (format.channels == 0) {
|
||||
return 0.0;
|
||||
}
|
||||
if (format.channels == targetChannels && targetChannel < format.channels) {
|
||||
return readSampleAsDouble(source, format, frameIndex, targetChannel);
|
||||
}
|
||||
if (format.channels == 1) {
|
||||
return readSampleAsDouble(source, format, frameIndex, 0);
|
||||
}
|
||||
if (targetChannels == 1) {
|
||||
double sum = 0.0;
|
||||
for (UINT32 channel = 0; channel < format.channels; ++channel) {
|
||||
sum += readSampleAsDouble(source, format, frameIndex, channel);
|
||||
}
|
||||
return sum / static_cast<double>(format.channels);
|
||||
}
|
||||
return readSampleAsDouble(source, format, frameIndex, std::min(targetChannel, format.channels - 1));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
constexpr int64_t HnsPerSecond = 10'000'000;
|
||||
@@ -88,6 +150,53 @@ void copyAudioWithGain(
|
||||
std::memcpy(destination.data(), source, byteCount);
|
||||
}
|
||||
|
||||
void convertAudioWithGain(
|
||||
const BYTE* source,
|
||||
DWORD byteCount,
|
||||
const AudioInputFormat& sourceFormat,
|
||||
const AudioInputFormat& targetFormat,
|
||||
double gain,
|
||||
std::vector<BYTE>& destination) {
|
||||
if (!source || byteCount == 0 || sourceFormat.blockAlign == 0 || targetFormat.blockAlign == 0 ||
|
||||
sourceFormat.sampleRate == 0 || targetFormat.sampleRate == 0 || sourceFormat.channels == 0 ||
|
||||
targetFormat.channels == 0) {
|
||||
destination.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
if (sameAudioFormatForMixing(sourceFormat, targetFormat)) {
|
||||
copyAudioWithGain(source, byteCount, targetFormat, gain, destination);
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t sourceFrames = byteCount / sourceFormat.blockAlign;
|
||||
if (sourceFrames == 0) {
|
||||
destination.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
const double rateRatio = static_cast<double>(targetFormat.sampleRate) /
|
||||
static_cast<double>(sourceFormat.sampleRate);
|
||||
const size_t targetFrames = std::max<size_t>(1, static_cast<size_t>(std::llround(sourceFrames * rateRatio)));
|
||||
destination.assign(targetFrames * targetFormat.blockAlign, 0);
|
||||
|
||||
for (size_t targetFrame = 0; targetFrame < targetFrames; ++targetFrame) {
|
||||
const double sourcePosition = static_cast<double>(targetFrame) / rateRatio;
|
||||
const size_t sourceFrame = std::min(
|
||||
sourceFrames - 1,
|
||||
static_cast<size_t>(std::llround(sourcePosition)));
|
||||
for (UINT32 channel = 0; channel < targetFormat.channels; ++channel) {
|
||||
const double sample = readMappedChannel(
|
||||
source,
|
||||
sourceFormat,
|
||||
sourceFrame,
|
||||
channel,
|
||||
targetFormat.channels);
|
||||
writeSampleFromDouble(destination.data(), targetFormat, targetFrame, channel, sample * gain);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void mixAudioInPlace(
|
||||
std::vector<BYTE>& destination,
|
||||
const BYTE* source,
|
||||
@@ -133,11 +242,15 @@ void mixAudioInPlace(
|
||||
|
||||
AudioMixer::AudioMixer(
|
||||
const AudioInputFormat& format,
|
||||
const AudioInputFormat& systemFormat,
|
||||
const AudioInputFormat& microphoneFormat,
|
||||
bool includeSystem,
|
||||
bool includeMicrophone,
|
||||
double microphoneGain,
|
||||
OutputCallback output)
|
||||
: format_(format),
|
||||
systemFormat_(systemFormat),
|
||||
microphoneFormat_(microphoneFormat),
|
||||
includeSystem_(includeSystem),
|
||||
includeMicrophone_(includeMicrophone),
|
||||
microphoneGain_(microphoneGain),
|
||||
@@ -187,7 +300,7 @@ void AudioMixer::pushSystem(const BYTE* data, DWORD byteCount) {
|
||||
|
||||
{
|
||||
std::scoped_lock lock(mutex_);
|
||||
append(systemQueue_, data, byteCount, 1.0);
|
||||
append(systemQueue_, data, byteCount, systemFormat_, 1.0);
|
||||
}
|
||||
cv_.notify_all();
|
||||
}
|
||||
@@ -199,17 +312,22 @@ void AudioMixer::pushMicrophone(const BYTE* data, DWORD byteCount) {
|
||||
|
||||
{
|
||||
std::scoped_lock lock(mutex_);
|
||||
append(microphoneQueue_, data, byteCount, microphoneGain_);
|
||||
append(microphoneQueue_, data, byteCount, microphoneFormat_, microphoneGain_);
|
||||
}
|
||||
cv_.notify_all();
|
||||
}
|
||||
|
||||
void AudioMixer::append(std::vector<BYTE>& queue, const BYTE* data, DWORD byteCount, double gain) {
|
||||
void AudioMixer::append(
|
||||
std::vector<BYTE>& queue,
|
||||
const BYTE* data,
|
||||
DWORD byteCount,
|
||||
const AudioInputFormat& sourceFormat,
|
||||
double gain) {
|
||||
if (!data || byteCount == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
copyAudioWithGain(data, byteCount, format_, gain, gainBuffer_);
|
||||
convertAudioWithGain(data, byteCount, sourceFormat, format_, gain, gainBuffer_);
|
||||
queue.insert(queue.end(), gainBuffer_.begin(), gainBuffer_.end());
|
||||
}
|
||||
|
||||
|
||||
@@ -19,6 +19,13 @@ void copyAudioWithGain(
|
||||
const AudioInputFormat& format,
|
||||
double gain,
|
||||
std::vector<BYTE>& destination);
|
||||
void convertAudioWithGain(
|
||||
const BYTE* source,
|
||||
DWORD byteCount,
|
||||
const AudioInputFormat& sourceFormat,
|
||||
const AudioInputFormat& targetFormat,
|
||||
double gain,
|
||||
std::vector<BYTE>& destination);
|
||||
void mixAudioInPlace(
|
||||
std::vector<BYTE>& destination,
|
||||
const BYTE* source,
|
||||
@@ -31,6 +38,8 @@ public:
|
||||
|
||||
AudioMixer(
|
||||
const AudioInputFormat& format,
|
||||
const AudioInputFormat& systemFormat,
|
||||
const AudioInputFormat& microphoneFormat,
|
||||
bool includeSystem,
|
||||
bool includeMicrophone,
|
||||
double microphoneGain,
|
||||
@@ -47,11 +56,18 @@ public:
|
||||
void pushMicrophone(const BYTE* data, DWORD byteCount);
|
||||
|
||||
private:
|
||||
void append(std::vector<BYTE>& queue, const BYTE* data, DWORD byteCount, double gain);
|
||||
void append(
|
||||
std::vector<BYTE>& queue,
|
||||
const BYTE* data,
|
||||
DWORD byteCount,
|
||||
const AudioInputFormat& sourceFormat,
|
||||
double gain);
|
||||
bool pop(std::vector<BYTE>& queue, std::vector<BYTE>& chunk, size_t byteCount);
|
||||
void mixLoop();
|
||||
|
||||
AudioInputFormat format_{};
|
||||
AudioInputFormat systemFormat_{};
|
||||
AudioInputFormat microphoneFormat_{};
|
||||
bool includeSystem_ = false;
|
||||
bool includeMicrophone_ = false;
|
||||
double microphoneGain_ = 1.0;
|
||||
|
||||
@@ -38,6 +38,7 @@ struct CaptureConfig {
|
||||
bool captureMic = false;
|
||||
bool webcamEnabled = false;
|
||||
std::string microphoneDeviceId;
|
||||
std::string microphoneDeviceName;
|
||||
double microphoneGain = 1.0;
|
||||
std::string webcamDeviceId;
|
||||
std::string webcamDeviceName;
|
||||
@@ -303,6 +304,7 @@ bool parseConfig(const std::string& json, CaptureConfig& config) {
|
||||
config.captureMic = findBool(json, "captureMic", false);
|
||||
config.webcamEnabled = findBool(json, "webcamEnabled", false);
|
||||
config.microphoneDeviceId = findString(json, "microphoneDeviceId");
|
||||
config.microphoneDeviceName = findString(json, "microphoneDeviceName");
|
||||
config.microphoneGain = findDouble(json, "microphoneGain", 1.0);
|
||||
config.webcamDeviceId = findString(json, "webcamDeviceId");
|
||||
config.webcamDeviceName = findString(json, "webcamDeviceName");
|
||||
@@ -406,24 +408,26 @@ int main(int argc, char* argv[]) {
|
||||
WasapiLoopbackCapture loopbackCapture;
|
||||
WasapiLoopbackCapture microphoneCapture;
|
||||
const AudioInputFormat* audioFormat = nullptr;
|
||||
AudioInputFormat systemAudioFormat{};
|
||||
AudioInputFormat microphoneAudioFormat{};
|
||||
if (config.captureSystemAudio) {
|
||||
if (!loopbackCapture.initializeSystemLoopback()) {
|
||||
std::cerr << "ERROR: Failed to initialize WASAPI loopback capture" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
systemAudioFormat = loopbackCapture.inputFormat();
|
||||
audioFormat = &loopbackCapture.inputFormat();
|
||||
}
|
||||
if (config.captureMic) {
|
||||
if (!microphoneCapture.initializeMicrophone(utf8ToWide(config.microphoneDeviceId))) {
|
||||
if (!microphoneCapture.initializeMicrophone(
|
||||
utf8ToWide(config.microphoneDeviceId),
|
||||
utf8ToWide(config.microphoneDeviceName))) {
|
||||
std::cerr << "ERROR: Failed to initialize WASAPI microphone capture" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
microphoneAudioFormat = microphoneCapture.inputFormat();
|
||||
if (!audioFormat) {
|
||||
audioFormat = µphoneCapture.inputFormat();
|
||||
} else if (!sameAudioFormatForMixing(*audioFormat, microphoneCapture.inputFormat())) {
|
||||
std::cerr << "ERROR: System audio and microphone formats differ; native mixing is not supported yet"
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if (audioFormat) {
|
||||
@@ -431,7 +435,12 @@ int main(int argc, char* argv[]) {
|
||||
<< ",\"channels\":" << audioFormat->channels
|
||||
<< ",\"bitsPerSample\":" << audioFormat->bitsPerSample
|
||||
<< ",\"system\":" << (config.captureSystemAudio ? "true" : "false")
|
||||
<< ",\"microphone\":" << (config.captureMic ? "true" : "false") << "}" << std::endl;
|
||||
<< ",\"microphone\":" << (config.captureMic ? "true" : "false");
|
||||
if (config.captureMic) {
|
||||
std::cout << ",\"microphoneDeviceName\":\""
|
||||
<< jsonEscape(wideToUtf8(microphoneCapture.selectedDeviceName())) << "\"";
|
||||
}
|
||||
std::cout << "}" << std::endl;
|
||||
}
|
||||
|
||||
MFEncoder encoder;
|
||||
@@ -549,6 +558,8 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
audioMixer = std::make_unique<AudioMixer>(
|
||||
*audioFormat,
|
||||
config.captureSystemAudio ? systemAudioFormat : *audioFormat,
|
||||
config.captureMic ? microphoneAudioFormat : *audioFormat,
|
||||
config.captureSystemAudio,
|
||||
config.captureMic,
|
||||
config.microphoneGain,
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
#include "wasapi_loopback_capture.h"
|
||||
|
||||
#include <Functiondiscoverykeys_devpkey.h>
|
||||
#include <ksmedia.h>
|
||||
#include <propvarutil.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cwctype>
|
||||
#include <iostream>
|
||||
|
||||
namespace {
|
||||
@@ -41,6 +44,86 @@ GUID audioSubtypeFromFormat(WAVEFORMATEX* format) {
|
||||
return GUID_NULL;
|
||||
}
|
||||
|
||||
std::wstring normalizeDeviceName(const std::wstring& value) {
|
||||
std::wstring result;
|
||||
result.reserve(value.size());
|
||||
bool lastWasSpace = true;
|
||||
|
||||
for (const wchar_t c : value) {
|
||||
if (std::iswalnum(c)) {
|
||||
result.push_back(static_cast<wchar_t>(std::towlower(c)));
|
||||
lastWasSpace = false;
|
||||
} else if (!lastWasSpace) {
|
||||
result.push_back(L' ');
|
||||
lastWasSpace = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!result.empty() && result.back() == L' ') {
|
||||
result.pop_back();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int scoreDeviceName(const std::wstring& candidateName, const std::wstring& candidateId, const std::wstring& requestedName) {
|
||||
const std::wstring candidate = normalizeDeviceName(candidateName);
|
||||
const std::wstring id = normalizeDeviceName(candidateId);
|
||||
const std::wstring requested = normalizeDeviceName(requestedName);
|
||||
if (requested.empty()) {
|
||||
return 0;
|
||||
}
|
||||
if (candidate == requested) {
|
||||
return 1000;
|
||||
}
|
||||
if (!candidate.empty() && (candidate.find(requested) != std::wstring::npos || requested.find(candidate) != std::wstring::npos)) {
|
||||
return 900;
|
||||
}
|
||||
if (!id.empty() && (id.find(requested) != std::wstring::npos || requested.find(id) != std::wstring::npos)) {
|
||||
return 800;
|
||||
}
|
||||
|
||||
int score = 0;
|
||||
size_t pos = 0;
|
||||
while (pos < requested.size()) {
|
||||
const size_t end = requested.find(L' ', pos);
|
||||
const std::wstring word = requested.substr(pos, end == std::wstring::npos ? std::wstring::npos : end - pos);
|
||||
if (word.size() > 1 && word != L"microphone" && word != L"mic" && word != L"audio" && word != L"input") {
|
||||
if (candidate.find(word) != std::wstring::npos) {
|
||||
score += 100;
|
||||
} else if (id.find(word) != std::wstring::npos) {
|
||||
score += 50;
|
||||
}
|
||||
}
|
||||
if (end == std::wstring::npos) {
|
||||
break;
|
||||
}
|
||||
pos = end + 1;
|
||||
}
|
||||
return score;
|
||||
}
|
||||
|
||||
std::wstring getDeviceFriendlyName(IMMDevice* device) {
|
||||
if (!device) {
|
||||
return {};
|
||||
}
|
||||
|
||||
Microsoft::WRL::ComPtr<IPropertyStore> properties;
|
||||
HRESULT hr = device->OpenPropertyStore(STGM_READ, &properties);
|
||||
if (FAILED(hr) || !properties) {
|
||||
return {};
|
||||
}
|
||||
|
||||
PROPVARIANT value;
|
||||
PropVariantInit(&value);
|
||||
hr = properties->GetValue(PKEY_Device_FriendlyName, &value);
|
||||
std::wstring name;
|
||||
if (SUCCEEDED(hr) && value.vt == VT_LPWSTR && value.pwszVal) {
|
||||
name = value.pwszVal;
|
||||
}
|
||||
PropVariantClear(&value);
|
||||
return name;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
WasapiLoopbackCapture::~WasapiLoopbackCapture() {
|
||||
@@ -52,14 +135,14 @@ WasapiLoopbackCapture::~WasapiLoopbackCapture() {
|
||||
}
|
||||
|
||||
bool WasapiLoopbackCapture::initializeSystemLoopback() {
|
||||
return initialize(WasapiCaptureEndpoint::SystemLoopback, {});
|
||||
return initialize(WasapiCaptureEndpoint::SystemLoopback, {}, {});
|
||||
}
|
||||
|
||||
bool WasapiLoopbackCapture::initializeMicrophone(const std::wstring& deviceId) {
|
||||
return initialize(WasapiCaptureEndpoint::Microphone, deviceId);
|
||||
bool WasapiLoopbackCapture::initializeMicrophone(const std::wstring& deviceId, const std::wstring& deviceName) {
|
||||
return initialize(WasapiCaptureEndpoint::Microphone, deviceId, deviceName);
|
||||
}
|
||||
|
||||
bool WasapiLoopbackCapture::initialize(WasapiCaptureEndpoint endpoint, const std::wstring& deviceId) {
|
||||
bool WasapiLoopbackCapture::initialize(WasapiCaptureEndpoint endpoint, const std::wstring& deviceId, const std::wstring& deviceName) {
|
||||
HRESULT hr = CoCreateInstance(
|
||||
__uuidof(MMDeviceEnumerator),
|
||||
nullptr,
|
||||
@@ -72,12 +155,19 @@ bool WasapiLoopbackCapture::initialize(WasapiCaptureEndpoint endpoint, const std
|
||||
if (endpoint == WasapiCaptureEndpoint::Microphone && !deviceId.empty() && deviceId != L"default") {
|
||||
hr = deviceEnumerator_->GetDevice(deviceId.c_str(), &device_);
|
||||
if (FAILED(hr)) {
|
||||
std::wcerr << L"WARNING: Could not resolve microphone device id; using default capture endpoint"
|
||||
std::wcerr << L"WARNING: Could not resolve microphone device id directly"
|
||||
<< std::endl;
|
||||
device_.Reset();
|
||||
}
|
||||
}
|
||||
|
||||
if (endpoint == WasapiCaptureEndpoint::Microphone && !device_ && !deviceName.empty()) {
|
||||
if (!resolveMicrophoneByName(deviceName)) {
|
||||
std::wcerr << L"WARNING: Could not resolve microphone by name; using default capture endpoint"
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if (!device_) {
|
||||
const EDataFlow flow =
|
||||
endpoint == WasapiCaptureEndpoint::SystemLoopback ? eRender : eCapture;
|
||||
@@ -87,6 +177,8 @@ bool WasapiLoopbackCapture::initialize(WasapiCaptureEndpoint endpoint, const std
|
||||
}
|
||||
}
|
||||
|
||||
selectedDeviceName_ = getDeviceFriendlyName(device_.Get());
|
||||
|
||||
hr = device_->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr, &audioClient_);
|
||||
if (!succeeded(hr, "IMMDevice::Activate(IAudioClient)")) {
|
||||
return false;
|
||||
@@ -123,6 +215,61 @@ bool WasapiLoopbackCapture::initialize(WasapiCaptureEndpoint endpoint, const std
|
||||
return true;
|
||||
}
|
||||
|
||||
bool WasapiLoopbackCapture::resolveMicrophoneByName(const std::wstring& deviceName) {
|
||||
if (!deviceEnumerator_ || deviceName.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Microsoft::WRL::ComPtr<IMMDeviceCollection> devices;
|
||||
HRESULT hr = deviceEnumerator_->EnumAudioEndpoints(eCapture, DEVICE_STATE_ACTIVE, &devices);
|
||||
if (!succeeded(hr, "IMMDeviceEnumerator::EnumAudioEndpoints(eCapture)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
UINT count = 0;
|
||||
hr = devices->GetCount(&count);
|
||||
if (!succeeded(hr, "IMMDeviceCollection::GetCount")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Microsoft::WRL::ComPtr<IMMDevice> bestDevice;
|
||||
std::wstring bestId;
|
||||
std::wstring bestName;
|
||||
int bestScore = 0;
|
||||
for (UINT i = 0; i < count; ++i) {
|
||||
Microsoft::WRL::ComPtr<IMMDevice> candidate;
|
||||
hr = devices->Item(i, &candidate);
|
||||
if (FAILED(hr) || !candidate) {
|
||||
continue;
|
||||
}
|
||||
|
||||
LPWSTR rawId = nullptr;
|
||||
std::wstring candidateId;
|
||||
if (SUCCEEDED(candidate->GetId(&rawId)) && rawId) {
|
||||
candidateId = rawId;
|
||||
CoTaskMemFree(rawId);
|
||||
}
|
||||
|
||||
const std::wstring candidateName = getDeviceFriendlyName(candidate.Get());
|
||||
const int score = scoreDeviceName(candidateName, candidateId, deviceName);
|
||||
std::wcerr << L"Native microphone candidate: " << candidateName << L" score=" << score << std::endl;
|
||||
if (score > bestScore) {
|
||||
bestScore = score;
|
||||
bestDevice = candidate;
|
||||
bestId = candidateId;
|
||||
bestName = candidateName;
|
||||
}
|
||||
}
|
||||
|
||||
if (!bestDevice || bestScore <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
device_ = bestDevice;
|
||||
std::wcerr << L"Selected native microphone endpoint: " << bestName << L" id=" << bestId << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool WasapiLoopbackCapture::resolveInputFormat(WAVEFORMATEX* mixFormat) {
|
||||
const GUID subtype = audioSubtypeFromFormat(mixFormat);
|
||||
if (subtype == GUID_NULL) {
|
||||
@@ -172,6 +319,10 @@ const AudioInputFormat& WasapiLoopbackCapture::inputFormat() const {
|
||||
return inputFormat_;
|
||||
}
|
||||
|
||||
const std::wstring& WasapiLoopbackCapture::selectedDeviceName() const {
|
||||
return selectedDeviceName_;
|
||||
}
|
||||
|
||||
void WasapiLoopbackCapture::captureLoop() {
|
||||
while (!stopRequested_) {
|
||||
UINT32 packetFrames = 0;
|
||||
|
||||
@@ -30,14 +30,16 @@ public:
|
||||
WasapiLoopbackCapture& operator=(const WasapiLoopbackCapture&) = delete;
|
||||
|
||||
bool initializeSystemLoopback();
|
||||
bool initializeMicrophone(const std::wstring& deviceId);
|
||||
bool initializeMicrophone(const std::wstring& deviceId, const std::wstring& deviceName);
|
||||
bool start(AudioCallback callback);
|
||||
void stop();
|
||||
|
||||
const AudioInputFormat& inputFormat() const;
|
||||
const std::wstring& selectedDeviceName() const;
|
||||
|
||||
private:
|
||||
bool initialize(WasapiCaptureEndpoint endpoint, const std::wstring& deviceId);
|
||||
bool initialize(WasapiCaptureEndpoint endpoint, const std::wstring& deviceId, const std::wstring& deviceName);
|
||||
bool resolveMicrophoneByName(const std::wstring& deviceName);
|
||||
void captureLoop();
|
||||
bool resolveInputFormat(WAVEFORMATEX* mixFormat);
|
||||
|
||||
@@ -47,6 +49,7 @@ private:
|
||||
Microsoft::WRL::ComPtr<IAudioCaptureClient> captureClient_;
|
||||
WAVEFORMATEX* mixFormat_ = nullptr;
|
||||
AudioInputFormat inputFormat_{};
|
||||
std::wstring selectedDeviceName_;
|
||||
AudioCallback callback_;
|
||||
std::thread thread_;
|
||||
std::atomic<bool> stopRequested_ = false;
|
||||
|
||||
Reference in New Issue
Block a user