fix: normalize native Windows audio for AAC
This commit is contained in:
@@ -100,6 +100,17 @@ bool sameAudioFormatForMixing(const AudioInputFormat& left, const AudioInputForm
|
||||
left.avgBytesPerSec == right.avgBytesPerSec;
|
||||
}
|
||||
|
||||
AudioInputFormat makeAacCompatibleAudioFormat(const AudioInputFormat& source) {
|
||||
AudioInputFormat format{};
|
||||
format.subtype = MFAudioFormat_PCM;
|
||||
format.sampleRate = source.sampleRate > 0 ? source.sampleRate : 48000;
|
||||
format.channels = 2;
|
||||
format.bitsPerSample = 16;
|
||||
format.blockAlign = format.channels * (format.bitsPerSample / 8);
|
||||
format.avgBytesPerSec = format.sampleRate * format.blockAlign;
|
||||
return format;
|
||||
}
|
||||
|
||||
void copyAudioWithGain(
|
||||
const BYTE* source,
|
||||
DWORD byteCount,
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <vector>
|
||||
|
||||
bool sameAudioFormatForMixing(const AudioInputFormat& left, const AudioInputFormat& right);
|
||||
AudioInputFormat makeAacCompatibleAudioFormat(const AudioInputFormat& source);
|
||||
void copyAudioWithGain(
|
||||
const BYTE* source,
|
||||
DWORD byteCount,
|
||||
|
||||
@@ -410,6 +410,7 @@ int main(int argc, char* argv[]) {
|
||||
WasapiLoopbackCapture loopbackCapture;
|
||||
WasapiLoopbackCapture microphoneCapture;
|
||||
const AudioInputFormat* audioFormat = nullptr;
|
||||
AudioInputFormat encoderAudioFormat{};
|
||||
AudioInputFormat systemAudioFormat{};
|
||||
AudioInputFormat microphoneAudioFormat{};
|
||||
if (config.captureSystemAudio) {
|
||||
@@ -443,6 +444,12 @@ int main(int argc, char* argv[]) {
|
||||
<< jsonEscape(wideToUtf8(microphoneCapture.selectedDeviceName())) << "\"";
|
||||
}
|
||||
std::cout << "}" << std::endl;
|
||||
encoderAudioFormat = makeAacCompatibleAudioFormat(*audioFormat);
|
||||
std::cout << "{\"event\":\"encoder-audio-format\",\"schemaVersion\":2,\"sampleRate\":"
|
||||
<< encoderAudioFormat.sampleRate
|
||||
<< ",\"channels\":" << encoderAudioFormat.channels
|
||||
<< ",\"bitsPerSample\":" << encoderAudioFormat.bitsPerSample
|
||||
<< "}" << std::endl;
|
||||
}
|
||||
|
||||
MFEncoder encoder;
|
||||
@@ -454,7 +461,7 @@ int main(int argc, char* argv[]) {
|
||||
bitrate,
|
||||
session.device(),
|
||||
session.context(),
|
||||
audioFormat)) {
|
||||
audioFormat ? &encoderAudioFormat : nullptr)) {
|
||||
std::cerr << "ERROR: Failed to initialize Media Foundation encoder" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
@@ -579,9 +586,9 @@ int main(int argc, char* argv[]) {
|
||||
}
|
||||
|
||||
audioMixer = std::make_unique<AudioMixer>(
|
||||
*audioFormat,
|
||||
config.captureSystemAudio ? systemAudioFormat : *audioFormat,
|
||||
config.captureMic ? microphoneAudioFormat : *audioFormat,
|
||||
encoderAudioFormat,
|
||||
config.captureSystemAudio ? systemAudioFormat : encoderAudioFormat,
|
||||
config.captureMic ? microphoneAudioFormat : encoderAudioFormat,
|
||||
config.captureSystemAudio,
|
||||
config.captureMic,
|
||||
config.microphoneGain,
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
#include "mf_encoder.h"
|
||||
|
||||
#include "audio_sample_utils.h"
|
||||
|
||||
#include <mfapi.h>
|
||||
#include <mferror.h>
|
||||
#include <propvarutil.h>
|
||||
@@ -156,7 +158,7 @@ bool MFEncoder::configureAudioStream(const AudioInputFormat& audioFormat) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const UINT32 bitsPerSample = std::max<UINT32>(8, audioFormat.bitsPerSample);
|
||||
const AudioInputFormat encoderFormat = makeAacCompatibleAudioFormat(audioFormat);
|
||||
const UINT32 aacBytesPerSecond = 24'000;
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFMediaType> outputType;
|
||||
@@ -165,7 +167,7 @@ bool MFEncoder::configureAudioStream(const AudioInputFormat& audioFormat) {
|
||||
}
|
||||
outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
|
||||
outputType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_AAC);
|
||||
setAudioFormat(outputType.Get(), audioFormat.channels, audioFormat.sampleRate, 16);
|
||||
setAudioFormat(outputType.Get(), encoderFormat.channels, encoderFormat.sampleRate, 16);
|
||||
outputType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, aacBytesPerSecond);
|
||||
outputType->SetUINT32(MF_MT_AAC_PAYLOAD_TYPE, 0);
|
||||
|
||||
@@ -178,10 +180,10 @@ bool MFEncoder::configureAudioStream(const AudioInputFormat& audioFormat) {
|
||||
return false;
|
||||
}
|
||||
inputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
|
||||
inputType->SetGUID(MF_MT_SUBTYPE, audioFormat.subtype);
|
||||
setAudioFormat(inputType.Get(), audioFormat.channels, audioFormat.sampleRate, bitsPerSample);
|
||||
inputType->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, audioFormat.blockAlign);
|
||||
inputType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, audioFormat.avgBytesPerSec);
|
||||
inputType->SetGUID(MF_MT_SUBTYPE, encoderFormat.subtype);
|
||||
setAudioFormat(inputType.Get(), encoderFormat.channels, encoderFormat.sampleRate, encoderFormat.bitsPerSample);
|
||||
inputType->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, encoderFormat.blockAlign);
|
||||
inputType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, encoderFormat.avgBytesPerSec);
|
||||
inputType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE);
|
||||
|
||||
if (!succeeded(sinkWriter_->SetInputMediaType(audioStreamIndex_, inputType.Get(), nullptr),
|
||||
|
||||
Reference in New Issue
Block a user