fix: downmix multichannel export audio

This commit is contained in:
EtienneLescot
2026-05-06 14:58:37 +02:00
parent 238fc97c6d
commit b349c0a27c
2 changed files with 133 additions and 17 deletions
+40
View File
@@ -0,0 +1,40 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { AudioProcessor } from "./audioEncoder";
describe("AudioProcessor.selectSupportedExportCodec", () => {
afterEach(() => {
vi.unstubAllGlobals();
});
it("falls back to stereo when the source channel count cannot be encoded", async () => {
const isConfigSupported = vi.fn(async (config: AudioEncoderConfig) => ({
config,
supported:
config.codec === "mp4a.40.2" &&
config.sampleRate === 44100 &&
config.numberOfChannels === 2,
}));
vi.stubGlobal("AudioEncoder", { isConfigSupported });
const codec = await AudioProcessor.selectSupportedExportCodec(44100, 8);
expect(codec).toMatchObject({
encoderCodec: "mp4a.40.2",
muxerCodec: "aac",
sampleRate: 44100,
numberOfChannels: 2,
});
expect(isConfigSupported).toHaveBeenCalledWith({
codec: "mp4a.40.2",
sampleRate: 44100,
numberOfChannels: 8,
bitrate: 128000,
});
expect(isConfigSupported).toHaveBeenCalledWith({
codec: "mp4a.40.2",
sampleRate: 44100,
numberOfChannels: 2,
bitrate: 128000,
});
});
});
+93 -17
View File
@@ -11,11 +11,15 @@ export interface ExportAudioCodec {
encoderCodec: string;
muxerCodec: ExportAudioMuxerCodec;
label: string;
sampleRate: number;
numberOfChannels: number;
}
const EXPORT_AUDIO_CODECS: ExportAudioCodec[] = [
{ encoderCodec: "opus", muxerCodec: "opus", label: "Opus" },
type ExportAudioCodecCandidate = Omit<ExportAudioCodec, "sampleRate" | "numberOfChannels">;
const EXPORT_AUDIO_CODECS: ExportAudioCodecCandidate[] = [
{ encoderCodec: "mp4a.40.2", muxerCodec: "aac", label: "AAC" },
{ encoderCodec: "opus", muxerCodec: "opus", label: "Opus" },
];
export class AudioProcessor {
@@ -25,15 +29,26 @@ export class AudioProcessor {
sampleRate: number,
numberOfChannels: number,
): Promise<ExportAudioCodec | null> {
const channelOptions = [numberOfChannels];
if (numberOfChannels > 2) {
channelOptions.push(2);
}
if (!channelOptions.includes(1)) {
channelOptions.push(1);
}
for (const codec of EXPORT_AUDIO_CODECS) {
const support = await AudioEncoder.isConfigSupported({
codec: codec.encoderCodec,
sampleRate,
numberOfChannels,
bitrate: AUDIO_BITRATE,
});
if (support.supported) {
return codec;
for (const channels of channelOptions) {
const support = await AudioEncoder.isConfigSupported({
codec: codec.encoderCodec,
sampleRate,
numberOfChannels: channels,
bitrate: AUDIO_BITRATE,
});
if (support.supported) {
return { ...codec, sampleRate, numberOfChannels: channels };
}
}
}
@@ -56,9 +71,10 @@ export class AudioProcessor {
return null;
}
const sampleRate = audioConfig.sampleRate || 48000;
const channels = audioConfig.numberOfChannels || 2;
return AudioProcessor.selectSupportedExportCodec(sampleRate, channels);
return AudioProcessor.selectSupportedExportCodec(
audioConfig.sampleRate || 48000,
audioConfig.numberOfChannels || 2,
);
}
/**
@@ -197,10 +213,12 @@ export class AudioProcessor {
return;
}
const outputSampleRate = selectedCodec.sampleRate || sampleRate;
const outputChannels = selectedCodec.numberOfChannels || channels;
const encodeConfig: AudioEncoderConfig = {
codec: selectedCodec.encoderCodec,
sampleRate,
numberOfChannels: channels,
sampleRate: outputSampleRate,
numberOfChannels: outputChannels,
bitrate: AUDIO_BITRATE,
};
@@ -225,7 +243,11 @@ export class AudioProcessor {
const trimOffsetMs = this.computeTrimOffset(timestampMs, sortedTrims);
const adjustedTimestampUs = audioData.timestamp - trimOffsetMs * 1000;
const adjusted = this.cloneWithTimestamp(audioData, Math.max(0, adjustedTimestampUs));
const adjusted = this.cloneForEncoding(
audioData,
Math.max(0, adjustedTimestampUs),
outputChannels,
);
audioData.close();
encoder.encode(adjusted);
@@ -586,7 +608,15 @@ export class AudioProcessor {
);
}
private cloneWithTimestamp(src: AudioData, newTimestamp: number): AudioData {
private cloneForEncoding(
src: AudioData,
newTimestamp: number,
targetChannels: number,
): AudioData {
if (targetChannels !== src.numberOfChannels) {
return this.downmixWithTimestamp(src, newTimestamp, targetChannels);
}
if (!src.format) {
throw new Error("AudioData format is required for cloning");
}
@@ -616,6 +646,52 @@ export class AudioProcessor {
});
}
private downmixWithTimestamp(
src: AudioData,
newTimestamp: number,
targetChannels: number,
): AudioData {
const sourceChannels = src.numberOfChannels;
const frameCount = src.numberOfFrames;
if (targetChannels < 1 || targetChannels > 2) {
throw new Error(`Unsupported target channel count: ${targetChannels}`);
}
const sourcePlanes = Array.from({ length: sourceChannels }, () => new Float32Array(frameCount));
for (let channel = 0; channel < sourceChannels; channel++) {
src.copyTo(sourcePlanes[channel], {
format: "f32-planar",
planeIndex: channel,
});
}
const output = new Float32Array(frameCount * targetChannels);
if (targetChannels === 1) {
for (let frame = 0; frame < frameCount; frame++) {
let mixed = 0;
for (let channel = 0; channel < sourceChannels; channel++) {
mixed += sourcePlanes[channel][frame];
}
output[frame] = mixed / sourceChannels;
}
} else if (sourceChannels === 1) {
output.set(sourcePlanes[0], 0);
output.set(sourcePlanes[0], frameCount);
} else {
output.set(sourcePlanes[0], 0);
output.set(sourcePlanes[1], frameCount);
}
return new AudioData({
format: "f32-planar",
sampleRate: src.sampleRate,
numberOfFrames: frameCount,
numberOfChannels: targetChannels,
timestamp: newTimestamp,
data: output,
});
}
private isInTrimRegion(timestampMs: number, trims: TrimRegion[]): boolean {
return trims.some((trim) => timestampMs >= trim.startMs && timestampMs < trim.endMs);
}