174 lines
5.4 KiB
TypeScript
174 lines
5.4 KiB
TypeScript
import type { WebDemuxer } from 'web-demuxer';
|
|
import type { TrimRegion } from '@/components/video-editor/types';
|
|
import type { VideoMuxer } from './muxer';
|
|
|
|
const AUDIO_BITRATE = 128_000;
|
|
const DECODE_BACKPRESSURE_LIMIT = 20;
|
|
|
|
export class AudioProcessor {
|
|
private cancelled = false;
|
|
|
|
async process(
|
|
demuxer: WebDemuxer,
|
|
muxer: VideoMuxer,
|
|
trimRegions?: TrimRegion[],
|
|
): Promise<void> {
|
|
let audioConfig: AudioDecoderConfig;
|
|
try {
|
|
audioConfig = await demuxer.getDecoderConfig('audio') as AudioDecoderConfig;
|
|
} catch {
|
|
console.warn('[AudioProcessor] No audio track found, skipping');
|
|
return;
|
|
}
|
|
|
|
const codecCheck = await AudioDecoder.isConfigSupported(audioConfig);
|
|
if (!codecCheck.supported) {
|
|
console.warn('[AudioProcessor] Audio codec not supported:', audioConfig.codec);
|
|
return;
|
|
}
|
|
|
|
const sortedTrims = trimRegions
|
|
? [...trimRegions].sort((a, b) => a.startMs - b.startMs)
|
|
: [];
|
|
|
|
// Phase 1: Decode audio from source, skipping trimmed regions
|
|
const decodedFrames: AudioData[] = [];
|
|
|
|
const decoder = new AudioDecoder({
|
|
output: (data: AudioData) => decodedFrames.push(data),
|
|
error: (e: DOMException) => console.error('[AudioProcessor] Decode error:', e),
|
|
});
|
|
decoder.configure(audioConfig);
|
|
|
|
const reader = (demuxer.read('audio') as ReadableStream<EncodedAudioChunk>).getReader();
|
|
|
|
while (!this.cancelled) {
|
|
const { done, value: chunk } = await reader.read();
|
|
if (done || !chunk) break;
|
|
|
|
const timestampMs = chunk.timestamp / 1000;
|
|
if (this.isInTrimRegion(timestampMs, sortedTrims)) continue;
|
|
|
|
decoder.decode(chunk);
|
|
|
|
while (decoder.decodeQueueSize > DECODE_BACKPRESSURE_LIMIT && !this.cancelled) {
|
|
await new Promise(resolve => setTimeout(resolve, 1));
|
|
}
|
|
}
|
|
|
|
if (decoder.state === 'configured') {
|
|
await decoder.flush();
|
|
decoder.close();
|
|
}
|
|
|
|
if (this.cancelled || decodedFrames.length === 0) {
|
|
for (const f of decodedFrames) f.close();
|
|
return;
|
|
}
|
|
|
|
// Phase 2: Re-encode with timestamps adjusted for trim gaps
|
|
const encodedChunks: { chunk: EncodedAudioChunk; meta?: EncodedAudioChunkMetadata }[] = [];
|
|
|
|
const encoder = new AudioEncoder({
|
|
output: (chunk: EncodedAudioChunk, meta?: EncodedAudioChunkMetadata) => {
|
|
encodedChunks.push({ chunk, meta });
|
|
},
|
|
error: (e: DOMException) => console.error('[AudioProcessor] Encode error:', e),
|
|
});
|
|
|
|
const sampleRate = audioConfig.sampleRate || 48000;
|
|
const channels = audioConfig.numberOfChannels || 2;
|
|
|
|
const encodeConfig: AudioEncoderConfig = {
|
|
codec: 'opus',
|
|
sampleRate,
|
|
numberOfChannels: channels,
|
|
bitrate: AUDIO_BITRATE,
|
|
};
|
|
|
|
const encodeSupport = await AudioEncoder.isConfigSupported(encodeConfig);
|
|
if (!encodeSupport.supported) {
|
|
console.warn('[AudioProcessor] Opus encoding not supported, skipping audio');
|
|
for (const f of decodedFrames) f.close();
|
|
return;
|
|
}
|
|
|
|
encoder.configure(encodeConfig);
|
|
|
|
for (const audioData of decodedFrames) {
|
|
if (this.cancelled) {
|
|
audioData.close();
|
|
continue;
|
|
}
|
|
|
|
const timestampMs = audioData.timestamp / 1000;
|
|
const trimOffsetMs = this.computeTrimOffset(timestampMs, sortedTrims);
|
|
const adjustedTimestampUs = audioData.timestamp - trimOffsetMs * 1000;
|
|
|
|
const adjusted = this.cloneWithTimestamp(audioData, Math.max(0, adjustedTimestampUs));
|
|
audioData.close();
|
|
|
|
encoder.encode(adjusted);
|
|
adjusted.close();
|
|
}
|
|
|
|
if (encoder.state === 'configured') {
|
|
await encoder.flush();
|
|
encoder.close();
|
|
}
|
|
|
|
// Phase 3: Flush encoded chunks to muxer
|
|
for (const { chunk, meta } of encodedChunks) {
|
|
if (this.cancelled) break;
|
|
await muxer.addAudioChunk(chunk, meta);
|
|
}
|
|
|
|
console.log(`[AudioProcessor] Processed ${decodedFrames.length} audio frames, encoded ${encodedChunks.length} chunks`);
|
|
}
|
|
|
|
private cloneWithTimestamp(src: AudioData, newTimestamp: number): AudioData {
|
|
const isPlanar = src.format?.includes('planar') ?? false;
|
|
const numPlanes = isPlanar ? src.numberOfChannels : 1;
|
|
|
|
let totalSize = 0;
|
|
for (let p = 0; p < numPlanes; p++) {
|
|
totalSize += src.allocationSize({ planeIndex: p });
|
|
}
|
|
|
|
const buffer = new ArrayBuffer(totalSize);
|
|
let offset = 0;
|
|
for (let p = 0; p < numPlanes; p++) {
|
|
const planeSize = src.allocationSize({ planeIndex: p });
|
|
src.copyTo(new Uint8Array(buffer, offset, planeSize), { planeIndex: p });
|
|
offset += planeSize;
|
|
}
|
|
|
|
return new AudioData({
|
|
format: src.format!,
|
|
sampleRate: src.sampleRate,
|
|
numberOfFrames: src.numberOfFrames,
|
|
numberOfChannels: src.numberOfChannels,
|
|
timestamp: newTimestamp,
|
|
data: buffer,
|
|
});
|
|
}
|
|
|
|
private isInTrimRegion(timestampMs: number, trims: TrimRegion[]): boolean {
|
|
return trims.some(t => timestampMs >= t.startMs && timestampMs < t.endMs);
|
|
}
|
|
|
|
private computeTrimOffset(timestampMs: number, trims: TrimRegion[]): number {
|
|
let offset = 0;
|
|
for (const trim of trims) {
|
|
if (trim.endMs <= timestampMs) {
|
|
offset += trim.endMs - trim.startMs;
|
|
}
|
|
}
|
|
return offset;
|
|
}
|
|
|
|
cancel(): void {
|
|
this.cancelled = true;
|
|
}
|
|
}
|