From 16dea49fa82ca569f1c00289dcc65cf3851fb154 Mon Sep 17 00:00:00 2001 From: Siddharth Date: Sat, 14 Mar 2026 11:58:43 -0700 Subject: [PATCH] fix audio desync and speed issue --- src/lib/exporter/audioEncoder.ts | 338 ++++++++++++++++++++++++++++-- src/lib/exporter/videoExporter.ts | 2 + 2 files changed, 328 insertions(+), 12 deletions(-) diff --git a/src/lib/exporter/audioEncoder.ts b/src/lib/exporter/audioEncoder.ts index 815c8d8..490eed2 100644 --- a/src/lib/exporter/audioEncoder.ts +++ b/src/lib/exporter/audioEncoder.ts @@ -1,17 +1,56 @@ -import type { WebDemuxer } from "web-demuxer"; -import type { TrimRegion } from "@/components/video-editor/types"; +import { WebDemuxer } from "web-demuxer"; +import type { SpeedRegion, TrimRegion } from "@/components/video-editor/types"; import type { VideoMuxer } from "./muxer"; const AUDIO_BITRATE = 128_000; const DECODE_BACKPRESSURE_LIMIT = 20; +const MIN_SPEED_REGION_DELTA_MS = 0.0001; export class AudioProcessor { private cancelled = false; + /** + * Audio export has two modes: + * 1) no speed regions -> fast WebCodecs trim-only pipeline + * 2) speed regions present -> pitch-preserving rendered timeline pipeline + */ async process( demuxer: WebDemuxer, muxer: VideoMuxer, + videoUrl: string, trimRegions?: TrimRegion[], + speedRegions?: SpeedRegion[], + readEndSec?: number, + ): Promise { + const sortedTrims = trimRegions ? [...trimRegions].sort((a, b) => a.startMs - b.startMs) : []; + const sortedSpeedRegions = speedRegions + ? [...speedRegions] + .filter((region) => region.endMs - region.startMs > MIN_SPEED_REGION_DELTA_MS) + .sort((a, b) => a.startMs - b.startMs) + : []; + + // Speed edits must use timeline playback to preserve pitch + if (sortedSpeedRegions.length > 0) { + const renderedAudioBlob = await this.renderPitchPreservedTimelineAudio( + videoUrl, + sortedTrims, + sortedSpeedRegions, + ); + if (!this.cancelled) { + await this.muxRenderedAudioBlob(renderedAudioBlob, muxer); + return; + } + } + + // No speed edits: keep the original demux/decode/encode path with trim timestamp remap. + await this.processTrimOnlyAudio(demuxer, muxer, sortedTrims, readEndSec); + } + + // Legacy trim-only path. This is still used for projects without speed regions. + private async processTrimOnlyAudio( + demuxer: WebDemuxer, + muxer: VideoMuxer, + sortedTrims: TrimRegion[], readEndSec?: number, ): Promise { let audioConfig: AudioDecoderConfig; @@ -28,8 +67,6 @@ export class AudioProcessor { return; } - const sortedTrims = trimRegions ? [...trimRegions].sort((a, b) => a.startMs - b.startMs) : []; - // Phase 1: Decode audio from source, skipping trimmed regions const decodedFrames: AudioData[] = []; @@ -78,7 +115,7 @@ export class AudioProcessor { } if (this.cancelled || decodedFrames.length === 0) { - for (const f of decodedFrames) f.close(); + for (const frame of decodedFrames) frame.close(); return; } @@ -105,7 +142,7 @@ export class AudioProcessor { const encodeSupport = await AudioEncoder.isConfigSupported(encodeConfig); if (!encodeSupport.supported) { console.warn("[AudioProcessor] Opus encoding not supported, skipping audio"); - for (const f of decodedFrames) f.close(); + for (const frame of decodedFrames) frame.close(); return; } @@ -144,20 +181,297 @@ export class AudioProcessor { ); } + // Speed-aware path that mirrors preview semantics (trim skipping + playbackRate regions) + // preserve pitch through browser media playback behavior to avoid chipmunk effect. + private async renderPitchPreservedTimelineAudio( + videoUrl: string, + trimRegions: TrimRegion[], + speedRegions: SpeedRegion[], + ): Promise { + const media = document.createElement("audio"); + media.src = videoUrl; + media.preload = "auto"; + + const pitchMedia = media as HTMLMediaElement & { + preservesPitch?: boolean; + mozPreservesPitch?: boolean; + webkitPreservesPitch?: boolean; + }; + pitchMedia.preservesPitch = true; + pitchMedia.mozPreservesPitch = true; + pitchMedia.webkitPreservesPitch = true; + + await this.waitForLoadedMetadata(media); + if (this.cancelled) { + throw new Error("Export cancelled"); + } + + const audioContext = new AudioContext(); + const sourceNode = audioContext.createMediaElementSource(media); + const destinationNode = audioContext.createMediaStreamDestination(); + sourceNode.connect(destinationNode); + + const { recorder, recordedBlobPromise } = this.startAudioRecording(destinationNode.stream); + let rafId: number | null = null; + + try { + if (audioContext.state === "suspended") { + await audioContext.resume(); + } + + await this.seekTo(media, 0); + await media.play(); + + await new Promise((resolve, reject) => { + const cleanup = () => { + if (rafId !== null) { + cancelAnimationFrame(rafId); + rafId = null; + } + media.removeEventListener("error", onError); + media.removeEventListener("ended", onEnded); + }; + + const onError = () => { + cleanup(); + reject(new Error("Failed while rendering speed-adjusted audio timeline")); + }; + + const onEnded = () => { + cleanup(); + resolve(); + }; + + const tick = () => { + if (this.cancelled) { + cleanup(); + resolve(); + return; + } + + const currentTimeMs = media.currentTime * 1000; + const activeTrimRegion = this.findActiveTrimRegion(currentTimeMs, trimRegions); + + if (activeTrimRegion && !media.paused && !media.ended) { + const skipToTime = activeTrimRegion.endMs / 1000; + if (skipToTime >= media.duration) { + media.pause(); + cleanup(); + resolve(); + return; + } + media.currentTime = skipToTime; + } else { + const activeSpeedRegion = this.findActiveSpeedRegion(currentTimeMs, speedRegions); + const playbackRate = activeSpeedRegion ? activeSpeedRegion.speed : 1; + if (Math.abs(media.playbackRate - playbackRate) > 0.0001) { + media.playbackRate = playbackRate; + } + } + + if (!media.paused && !media.ended) { + rafId = requestAnimationFrame(tick); + } else { + cleanup(); + resolve(); + } + }; + + media.addEventListener("error", onError, { once: true }); + media.addEventListener("ended", onEnded, { once: true }); + rafId = requestAnimationFrame(tick); + }); + } finally { + if (rafId !== null) { + cancelAnimationFrame(rafId); + } + media.pause(); + if (recorder.state !== "inactive") { + recorder.stop(); + } + destinationNode.stream.getTracks().forEach((track) => track.stop()); + sourceNode.disconnect(); + destinationNode.disconnect(); + await audioContext.close(); + media.src = ""; + media.load(); + } + + const recordedBlob = await recordedBlobPromise; + if (this.cancelled) { + throw new Error("Export cancelled"); + } + return recordedBlob; + } + + // Demuxes the rendered speed-adjusted blob and feeds encoded chunks into the MP4 muxer. + private async muxRenderedAudioBlob(blob: Blob, muxer: VideoMuxer): Promise { + if (this.cancelled) return; + + const file = new File([blob], "speed-audio.webm", { type: blob.type || "audio/webm" }); + const wasmUrl = new URL("./wasm/web-demuxer.wasm", window.location.href).href; + const demuxer = new WebDemuxer({ wasmFilePath: wasmUrl }); + + try { + await demuxer.load(file); + const audioConfig = (await demuxer.getDecoderConfig("audio")) as AudioDecoderConfig; + const reader = (demuxer.read("audio") as ReadableStream).getReader(); + let isFirstChunk = true; + + try { + while (!this.cancelled) { + const { done, value: chunk } = await reader.read(); + if (done || !chunk) break; + if (isFirstChunk) { + await muxer.addAudioChunk(chunk, { decoderConfig: audioConfig }); + isFirstChunk = false; + } else { + await muxer.addAudioChunk(chunk); + } + } + } finally { + try { + await reader.cancel(); + } catch { + /* reader already closed */ + } + } + } finally { + try { + demuxer.destroy(); + } catch { + /* ignore */ + } + } + } + + private startAudioRecording(stream: MediaStream): { + recorder: MediaRecorder; + recordedBlobPromise: Promise; + } { + const mimeType = this.getSupportedAudioMimeType(); + const options: MediaRecorderOptions = { + audioBitsPerSecond: AUDIO_BITRATE, + ...(mimeType ? { mimeType } : {}), + }; + + const recorder = new MediaRecorder(stream, options); + const chunks: Blob[] = []; + + const recordedBlobPromise = new Promise((resolve, reject) => { + recorder.ondataavailable = (event: BlobEvent) => { + if (event.data && event.data.size > 0) { + chunks.push(event.data); + } + }; + recorder.onerror = () => { + reject(new Error("MediaRecorder failed while capturing speed-adjusted audio")); + }; + recorder.onstop = () => { + const type = mimeType || chunks[0]?.type || "audio/webm"; + resolve(new Blob(chunks, { type })); + }; + }); + + recorder.start(); + return { recorder, recordedBlobPromise }; + } + + private getSupportedAudioMimeType(): string | undefined { + const candidates = ["audio/webm;codecs=opus", "audio/webm"]; + for (const candidate of candidates) { + if (MediaRecorder.isTypeSupported(candidate)) { + return candidate; + } + } + return undefined; + } + + private waitForLoadedMetadata(media: HTMLMediaElement): Promise { + if (Number.isFinite(media.duration) && media.readyState >= HTMLMediaElement.HAVE_METADATA) { + return Promise.resolve(); + } + + return new Promise((resolve, reject) => { + const onLoaded = () => { + cleanup(); + resolve(); + }; + const onError = () => { + cleanup(); + reject(new Error("Failed to load media metadata for speed-adjusted audio")); + }; + const cleanup = () => { + media.removeEventListener("loadedmetadata", onLoaded); + media.removeEventListener("error", onError); + }; + + media.addEventListener("loadedmetadata", onLoaded); + media.addEventListener("error", onError, { once: true }); + }); + } + + private seekTo(media: HTMLMediaElement, targetSec: number): Promise { + if (Math.abs(media.currentTime - targetSec) < 0.0001) { + return Promise.resolve(); + } + + return new Promise((resolve, reject) => { + const onSeeked = () => { + cleanup(); + resolve(); + }; + const onError = () => { + cleanup(); + reject(new Error("Failed to seek media for speed-adjusted audio")); + }; + const cleanup = () => { + media.removeEventListener("seeked", onSeeked); + media.removeEventListener("error", onError); + }; + + media.addEventListener("seeked", onSeeked, { once: true }); + media.addEventListener("error", onError, { once: true }); + media.currentTime = targetSec; + }); + } + + private findActiveTrimRegion( + currentTimeMs: number, + trimRegions: TrimRegion[], + ): TrimRegion | null { + return ( + trimRegions.find( + (region) => currentTimeMs >= region.startMs && currentTimeMs < region.endMs, + ) || null + ); + } + + private findActiveSpeedRegion( + currentTimeMs: number, + speedRegions: SpeedRegion[], + ): SpeedRegion | null { + return ( + speedRegions.find( + (region) => currentTimeMs >= region.startMs && currentTimeMs < region.endMs, + ) || null + ); + } + private cloneWithTimestamp(src: AudioData, newTimestamp: number): AudioData { const isPlanar = src.format?.includes("planar") ?? false; const numPlanes = isPlanar ? src.numberOfChannels : 1; let totalSize = 0; - for (let p = 0; p < numPlanes; p++) { - totalSize += src.allocationSize({ planeIndex: p }); + for (let planeIndex = 0; planeIndex < numPlanes; planeIndex++) { + totalSize += src.allocationSize({ planeIndex }); } const buffer = new ArrayBuffer(totalSize); let offset = 0; - for (let p = 0; p < numPlanes; p++) { - const planeSize = src.allocationSize({ planeIndex: p }); - src.copyTo(new Uint8Array(buffer, offset, planeSize), { planeIndex: p }); + for (let planeIndex = 0; planeIndex < numPlanes; planeIndex++) { + const planeSize = src.allocationSize({ planeIndex }); + src.copyTo(new Uint8Array(buffer, offset, planeSize), { planeIndex }); offset += planeSize; } @@ -172,7 +486,7 @@ export class AudioProcessor { } private isInTrimRegion(timestampMs: number, trims: TrimRegion[]): boolean { - return trims.some((t) => timestampMs >= t.startMs && timestampMs < t.endMs); + return trims.some((trim) => timestampMs >= trim.startMs && timestampMs < trim.endMs); } private computeTrimOffset(timestampMs: number, trims: TrimRegion[]): number { diff --git a/src/lib/exporter/videoExporter.ts b/src/lib/exporter/videoExporter.ts index 060c9b5..ef01001 100644 --- a/src/lib/exporter/videoExporter.ts +++ b/src/lib/exporter/videoExporter.ts @@ -203,7 +203,9 @@ export class VideoExporter { await this.audioProcessor.process( demuxer, this.muxer!, + this.config.videoUrl, this.config.trimRegions, + this.config.speedRegions, readEndSec, ); }