From b349c0a27c6547f1a018e39d51875150df486186 Mon Sep 17 00:00:00 2001 From: EtienneLescot Date: Wed, 6 May 2026 14:58:37 +0200 Subject: [PATCH] fix: downmix multichannel export audio --- src/lib/exporter/audioEncoder.test.ts | 40 ++++++++++ src/lib/exporter/audioEncoder.ts | 110 ++++++++++++++++++++++---- 2 files changed, 133 insertions(+), 17 deletions(-) create mode 100644 src/lib/exporter/audioEncoder.test.ts diff --git a/src/lib/exporter/audioEncoder.test.ts b/src/lib/exporter/audioEncoder.test.ts new file mode 100644 index 0000000..5d66989 --- /dev/null +++ b/src/lib/exporter/audioEncoder.test.ts @@ -0,0 +1,40 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import { AudioProcessor } from "./audioEncoder"; + +describe("AudioProcessor.selectSupportedExportCodec", () => { + afterEach(() => { + vi.unstubAllGlobals(); + }); + + it("falls back to stereo when the source channel count cannot be encoded", async () => { + const isConfigSupported = vi.fn(async (config: AudioEncoderConfig) => ({ + config, + supported: + config.codec === "mp4a.40.2" && + config.sampleRate === 44100 && + config.numberOfChannels === 2, + })); + vi.stubGlobal("AudioEncoder", { isConfigSupported }); + + const codec = await AudioProcessor.selectSupportedExportCodec(44100, 8); + + expect(codec).toMatchObject({ + encoderCodec: "mp4a.40.2", + muxerCodec: "aac", + sampleRate: 44100, + numberOfChannels: 2, + }); + expect(isConfigSupported).toHaveBeenCalledWith({ + codec: "mp4a.40.2", + sampleRate: 44100, + numberOfChannels: 8, + bitrate: 128000, + }); + expect(isConfigSupported).toHaveBeenCalledWith({ + codec: "mp4a.40.2", + sampleRate: 44100, + numberOfChannels: 2, + bitrate: 128000, + }); + }); +}); diff --git a/src/lib/exporter/audioEncoder.ts b/src/lib/exporter/audioEncoder.ts index 18dac62..0d6a622 100644 --- a/src/lib/exporter/audioEncoder.ts +++ b/src/lib/exporter/audioEncoder.ts @@ -11,11 +11,15 @@ export interface ExportAudioCodec { encoderCodec: string; muxerCodec: ExportAudioMuxerCodec; label: string; + sampleRate: number; + numberOfChannels: number; } -const EXPORT_AUDIO_CODECS: ExportAudioCodec[] = [ - { encoderCodec: "opus", muxerCodec: "opus", label: "Opus" }, +type ExportAudioCodecCandidate = Omit; + +const EXPORT_AUDIO_CODECS: ExportAudioCodecCandidate[] = [ { encoderCodec: "mp4a.40.2", muxerCodec: "aac", label: "AAC" }, + { encoderCodec: "opus", muxerCodec: "opus", label: "Opus" }, ]; export class AudioProcessor { @@ -25,15 +29,26 @@ export class AudioProcessor { sampleRate: number, numberOfChannels: number, ): Promise { + const channelOptions = [numberOfChannels]; + if (numberOfChannels > 2) { + channelOptions.push(2); + } + + if (!channelOptions.includes(1)) { + channelOptions.push(1); + } + for (const codec of EXPORT_AUDIO_CODECS) { - const support = await AudioEncoder.isConfigSupported({ - codec: codec.encoderCodec, - sampleRate, - numberOfChannels, - bitrate: AUDIO_BITRATE, - }); - if (support.supported) { - return codec; + for (const channels of channelOptions) { + const support = await AudioEncoder.isConfigSupported({ + codec: codec.encoderCodec, + sampleRate, + numberOfChannels: channels, + bitrate: AUDIO_BITRATE, + }); + if (support.supported) { + return { ...codec, sampleRate, numberOfChannels: channels }; + } } } @@ -56,9 +71,10 @@ export class AudioProcessor { return null; } - const sampleRate = audioConfig.sampleRate || 48000; - const channels = audioConfig.numberOfChannels || 2; - return AudioProcessor.selectSupportedExportCodec(sampleRate, channels); + return AudioProcessor.selectSupportedExportCodec( + audioConfig.sampleRate || 48000, + audioConfig.numberOfChannels || 2, + ); } /** @@ -197,10 +213,12 @@ export class AudioProcessor { return; } + const outputSampleRate = selectedCodec.sampleRate || sampleRate; + const outputChannels = selectedCodec.numberOfChannels || channels; const encodeConfig: AudioEncoderConfig = { codec: selectedCodec.encoderCodec, - sampleRate, - numberOfChannels: channels, + sampleRate: outputSampleRate, + numberOfChannels: outputChannels, bitrate: AUDIO_BITRATE, }; @@ -225,7 +243,11 @@ export class AudioProcessor { const trimOffsetMs = this.computeTrimOffset(timestampMs, sortedTrims); const adjustedTimestampUs = audioData.timestamp - trimOffsetMs * 1000; - const adjusted = this.cloneWithTimestamp(audioData, Math.max(0, adjustedTimestampUs)); + const adjusted = this.cloneForEncoding( + audioData, + Math.max(0, adjustedTimestampUs), + outputChannels, + ); audioData.close(); encoder.encode(adjusted); @@ -586,7 +608,15 @@ export class AudioProcessor { ); } - private cloneWithTimestamp(src: AudioData, newTimestamp: number): AudioData { + private cloneForEncoding( + src: AudioData, + newTimestamp: number, + targetChannels: number, + ): AudioData { + if (targetChannels !== src.numberOfChannels) { + return this.downmixWithTimestamp(src, newTimestamp, targetChannels); + } + if (!src.format) { throw new Error("AudioData format is required for cloning"); } @@ -616,6 +646,52 @@ export class AudioProcessor { }); } + private downmixWithTimestamp( + src: AudioData, + newTimestamp: number, + targetChannels: number, + ): AudioData { + const sourceChannels = src.numberOfChannels; + const frameCount = src.numberOfFrames; + if (targetChannels < 1 || targetChannels > 2) { + throw new Error(`Unsupported target channel count: ${targetChannels}`); + } + + const sourcePlanes = Array.from({ length: sourceChannels }, () => new Float32Array(frameCount)); + for (let channel = 0; channel < sourceChannels; channel++) { + src.copyTo(sourcePlanes[channel], { + format: "f32-planar", + planeIndex: channel, + }); + } + + const output = new Float32Array(frameCount * targetChannels); + if (targetChannels === 1) { + for (let frame = 0; frame < frameCount; frame++) { + let mixed = 0; + for (let channel = 0; channel < sourceChannels; channel++) { + mixed += sourcePlanes[channel][frame]; + } + output[frame] = mixed / sourceChannels; + } + } else if (sourceChannels === 1) { + output.set(sourcePlanes[0], 0); + output.set(sourcePlanes[0], frameCount); + } else { + output.set(sourcePlanes[0], 0); + output.set(sourcePlanes[1], frameCount); + } + + return new AudioData({ + format: "f32-planar", + sampleRate: src.sampleRate, + numberOfFrames: frameCount, + numberOfChannels: targetChannels, + timestamp: newTimestamp, + data: output, + }); + } + private isInTrimRegion(timestampMs: number, trims: TrimRegion[]): boolean { return trims.some((trim) => timestampMs >= trim.startMs && timestampMs < trim.endMs); }