fix audio desync and speed issue
This commit is contained in:
@@ -1,17 +1,56 @@
|
||||
import type { WebDemuxer } from "web-demuxer";
|
||||
import type { TrimRegion } from "@/components/video-editor/types";
|
||||
import { WebDemuxer } from "web-demuxer";
|
||||
import type { SpeedRegion, TrimRegion } from "@/components/video-editor/types";
|
||||
import type { VideoMuxer } from "./muxer";
|
||||
|
||||
const AUDIO_BITRATE = 128_000;
|
||||
const DECODE_BACKPRESSURE_LIMIT = 20;
|
||||
const MIN_SPEED_REGION_DELTA_MS = 0.0001;
|
||||
|
||||
export class AudioProcessor {
|
||||
private cancelled = false;
|
||||
|
||||
/**
|
||||
* Audio export has two modes:
|
||||
* 1) no speed regions -> fast WebCodecs trim-only pipeline
|
||||
* 2) speed regions present -> pitch-preserving rendered timeline pipeline
|
||||
*/
|
||||
async process(
|
||||
demuxer: WebDemuxer,
|
||||
muxer: VideoMuxer,
|
||||
videoUrl: string,
|
||||
trimRegions?: TrimRegion[],
|
||||
speedRegions?: SpeedRegion[],
|
||||
readEndSec?: number,
|
||||
): Promise<void> {
|
||||
const sortedTrims = trimRegions ? [...trimRegions].sort((a, b) => a.startMs - b.startMs) : [];
|
||||
const sortedSpeedRegions = speedRegions
|
||||
? [...speedRegions]
|
||||
.filter((region) => region.endMs - region.startMs > MIN_SPEED_REGION_DELTA_MS)
|
||||
.sort((a, b) => a.startMs - b.startMs)
|
||||
: [];
|
||||
|
||||
// Speed edits must use timeline playback to preserve pitch
|
||||
if (sortedSpeedRegions.length > 0) {
|
||||
const renderedAudioBlob = await this.renderPitchPreservedTimelineAudio(
|
||||
videoUrl,
|
||||
sortedTrims,
|
||||
sortedSpeedRegions,
|
||||
);
|
||||
if (!this.cancelled) {
|
||||
await this.muxRenderedAudioBlob(renderedAudioBlob, muxer);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// No speed edits: keep the original demux/decode/encode path with trim timestamp remap.
|
||||
await this.processTrimOnlyAudio(demuxer, muxer, sortedTrims, readEndSec);
|
||||
}
|
||||
|
||||
// Legacy trim-only path. This is still used for projects without speed regions.
|
||||
private async processTrimOnlyAudio(
|
||||
demuxer: WebDemuxer,
|
||||
muxer: VideoMuxer,
|
||||
sortedTrims: TrimRegion[],
|
||||
readEndSec?: number,
|
||||
): Promise<void> {
|
||||
let audioConfig: AudioDecoderConfig;
|
||||
@@ -28,8 +67,6 @@ export class AudioProcessor {
|
||||
return;
|
||||
}
|
||||
|
||||
const sortedTrims = trimRegions ? [...trimRegions].sort((a, b) => a.startMs - b.startMs) : [];
|
||||
|
||||
// Phase 1: Decode audio from source, skipping trimmed regions
|
||||
const decodedFrames: AudioData[] = [];
|
||||
|
||||
@@ -78,7 +115,7 @@ export class AudioProcessor {
|
||||
}
|
||||
|
||||
if (this.cancelled || decodedFrames.length === 0) {
|
||||
for (const f of decodedFrames) f.close();
|
||||
for (const frame of decodedFrames) frame.close();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -105,7 +142,7 @@ export class AudioProcessor {
|
||||
const encodeSupport = await AudioEncoder.isConfigSupported(encodeConfig);
|
||||
if (!encodeSupport.supported) {
|
||||
console.warn("[AudioProcessor] Opus encoding not supported, skipping audio");
|
||||
for (const f of decodedFrames) f.close();
|
||||
for (const frame of decodedFrames) frame.close();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -144,20 +181,297 @@ export class AudioProcessor {
|
||||
);
|
||||
}
|
||||
|
||||
// Speed-aware path that mirrors preview semantics (trim skipping + playbackRate regions)
|
||||
// preserve pitch through browser media playback behavior to avoid chipmunk effect.
|
||||
private async renderPitchPreservedTimelineAudio(
|
||||
videoUrl: string,
|
||||
trimRegions: TrimRegion[],
|
||||
speedRegions: SpeedRegion[],
|
||||
): Promise<Blob> {
|
||||
const media = document.createElement("audio");
|
||||
media.src = videoUrl;
|
||||
media.preload = "auto";
|
||||
|
||||
const pitchMedia = media as HTMLMediaElement & {
|
||||
preservesPitch?: boolean;
|
||||
mozPreservesPitch?: boolean;
|
||||
webkitPreservesPitch?: boolean;
|
||||
};
|
||||
pitchMedia.preservesPitch = true;
|
||||
pitchMedia.mozPreservesPitch = true;
|
||||
pitchMedia.webkitPreservesPitch = true;
|
||||
|
||||
await this.waitForLoadedMetadata(media);
|
||||
if (this.cancelled) {
|
||||
throw new Error("Export cancelled");
|
||||
}
|
||||
|
||||
const audioContext = new AudioContext();
|
||||
const sourceNode = audioContext.createMediaElementSource(media);
|
||||
const destinationNode = audioContext.createMediaStreamDestination();
|
||||
sourceNode.connect(destinationNode);
|
||||
|
||||
const { recorder, recordedBlobPromise } = this.startAudioRecording(destinationNode.stream);
|
||||
let rafId: number | null = null;
|
||||
|
||||
try {
|
||||
if (audioContext.state === "suspended") {
|
||||
await audioContext.resume();
|
||||
}
|
||||
|
||||
await this.seekTo(media, 0);
|
||||
await media.play();
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const cleanup = () => {
|
||||
if (rafId !== null) {
|
||||
cancelAnimationFrame(rafId);
|
||||
rafId = null;
|
||||
}
|
||||
media.removeEventListener("error", onError);
|
||||
media.removeEventListener("ended", onEnded);
|
||||
};
|
||||
|
||||
const onError = () => {
|
||||
cleanup();
|
||||
reject(new Error("Failed while rendering speed-adjusted audio timeline"));
|
||||
};
|
||||
|
||||
const onEnded = () => {
|
||||
cleanup();
|
||||
resolve();
|
||||
};
|
||||
|
||||
const tick = () => {
|
||||
if (this.cancelled) {
|
||||
cleanup();
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
|
||||
const currentTimeMs = media.currentTime * 1000;
|
||||
const activeTrimRegion = this.findActiveTrimRegion(currentTimeMs, trimRegions);
|
||||
|
||||
if (activeTrimRegion && !media.paused && !media.ended) {
|
||||
const skipToTime = activeTrimRegion.endMs / 1000;
|
||||
if (skipToTime >= media.duration) {
|
||||
media.pause();
|
||||
cleanup();
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
media.currentTime = skipToTime;
|
||||
} else {
|
||||
const activeSpeedRegion = this.findActiveSpeedRegion(currentTimeMs, speedRegions);
|
||||
const playbackRate = activeSpeedRegion ? activeSpeedRegion.speed : 1;
|
||||
if (Math.abs(media.playbackRate - playbackRate) > 0.0001) {
|
||||
media.playbackRate = playbackRate;
|
||||
}
|
||||
}
|
||||
|
||||
if (!media.paused && !media.ended) {
|
||||
rafId = requestAnimationFrame(tick);
|
||||
} else {
|
||||
cleanup();
|
||||
resolve();
|
||||
}
|
||||
};
|
||||
|
||||
media.addEventListener("error", onError, { once: true });
|
||||
media.addEventListener("ended", onEnded, { once: true });
|
||||
rafId = requestAnimationFrame(tick);
|
||||
});
|
||||
} finally {
|
||||
if (rafId !== null) {
|
||||
cancelAnimationFrame(rafId);
|
||||
}
|
||||
media.pause();
|
||||
if (recorder.state !== "inactive") {
|
||||
recorder.stop();
|
||||
}
|
||||
destinationNode.stream.getTracks().forEach((track) => track.stop());
|
||||
sourceNode.disconnect();
|
||||
destinationNode.disconnect();
|
||||
await audioContext.close();
|
||||
media.src = "";
|
||||
media.load();
|
||||
}
|
||||
|
||||
const recordedBlob = await recordedBlobPromise;
|
||||
if (this.cancelled) {
|
||||
throw new Error("Export cancelled");
|
||||
}
|
||||
return recordedBlob;
|
||||
}
|
||||
|
||||
// Demuxes the rendered speed-adjusted blob and feeds encoded chunks into the MP4 muxer.
|
||||
private async muxRenderedAudioBlob(blob: Blob, muxer: VideoMuxer): Promise<void> {
|
||||
if (this.cancelled) return;
|
||||
|
||||
const file = new File([blob], "speed-audio.webm", { type: blob.type || "audio/webm" });
|
||||
const wasmUrl = new URL("./wasm/web-demuxer.wasm", window.location.href).href;
|
||||
const demuxer = new WebDemuxer({ wasmFilePath: wasmUrl });
|
||||
|
||||
try {
|
||||
await demuxer.load(file);
|
||||
const audioConfig = (await demuxer.getDecoderConfig("audio")) as AudioDecoderConfig;
|
||||
const reader = (demuxer.read("audio") as ReadableStream<EncodedAudioChunk>).getReader();
|
||||
let isFirstChunk = true;
|
||||
|
||||
try {
|
||||
while (!this.cancelled) {
|
||||
const { done, value: chunk } = await reader.read();
|
||||
if (done || !chunk) break;
|
||||
if (isFirstChunk) {
|
||||
await muxer.addAudioChunk(chunk, { decoderConfig: audioConfig });
|
||||
isFirstChunk = false;
|
||||
} else {
|
||||
await muxer.addAudioChunk(chunk);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
try {
|
||||
await reader.cancel();
|
||||
} catch {
|
||||
/* reader already closed */
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
try {
|
||||
demuxer.destroy();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private startAudioRecording(stream: MediaStream): {
|
||||
recorder: MediaRecorder;
|
||||
recordedBlobPromise: Promise<Blob>;
|
||||
} {
|
||||
const mimeType = this.getSupportedAudioMimeType();
|
||||
const options: MediaRecorderOptions = {
|
||||
audioBitsPerSecond: AUDIO_BITRATE,
|
||||
...(mimeType ? { mimeType } : {}),
|
||||
};
|
||||
|
||||
const recorder = new MediaRecorder(stream, options);
|
||||
const chunks: Blob[] = [];
|
||||
|
||||
const recordedBlobPromise = new Promise<Blob>((resolve, reject) => {
|
||||
recorder.ondataavailable = (event: BlobEvent) => {
|
||||
if (event.data && event.data.size > 0) {
|
||||
chunks.push(event.data);
|
||||
}
|
||||
};
|
||||
recorder.onerror = () => {
|
||||
reject(new Error("MediaRecorder failed while capturing speed-adjusted audio"));
|
||||
};
|
||||
recorder.onstop = () => {
|
||||
const type = mimeType || chunks[0]?.type || "audio/webm";
|
||||
resolve(new Blob(chunks, { type }));
|
||||
};
|
||||
});
|
||||
|
||||
recorder.start();
|
||||
return { recorder, recordedBlobPromise };
|
||||
}
|
||||
|
||||
private getSupportedAudioMimeType(): string | undefined {
|
||||
const candidates = ["audio/webm;codecs=opus", "audio/webm"];
|
||||
for (const candidate of candidates) {
|
||||
if (MediaRecorder.isTypeSupported(candidate)) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
private waitForLoadedMetadata(media: HTMLMediaElement): Promise<void> {
|
||||
if (Number.isFinite(media.duration) && media.readyState >= HTMLMediaElement.HAVE_METADATA) {
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
const onLoaded = () => {
|
||||
cleanup();
|
||||
resolve();
|
||||
};
|
||||
const onError = () => {
|
||||
cleanup();
|
||||
reject(new Error("Failed to load media metadata for speed-adjusted audio"));
|
||||
};
|
||||
const cleanup = () => {
|
||||
media.removeEventListener("loadedmetadata", onLoaded);
|
||||
media.removeEventListener("error", onError);
|
||||
};
|
||||
|
||||
media.addEventListener("loadedmetadata", onLoaded);
|
||||
media.addEventListener("error", onError, { once: true });
|
||||
});
|
||||
}
|
||||
|
||||
private seekTo(media: HTMLMediaElement, targetSec: number): Promise<void> {
|
||||
if (Math.abs(media.currentTime - targetSec) < 0.0001) {
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
const onSeeked = () => {
|
||||
cleanup();
|
||||
resolve();
|
||||
};
|
||||
const onError = () => {
|
||||
cleanup();
|
||||
reject(new Error("Failed to seek media for speed-adjusted audio"));
|
||||
};
|
||||
const cleanup = () => {
|
||||
media.removeEventListener("seeked", onSeeked);
|
||||
media.removeEventListener("error", onError);
|
||||
};
|
||||
|
||||
media.addEventListener("seeked", onSeeked, { once: true });
|
||||
media.addEventListener("error", onError, { once: true });
|
||||
media.currentTime = targetSec;
|
||||
});
|
||||
}
|
||||
|
||||
private findActiveTrimRegion(
|
||||
currentTimeMs: number,
|
||||
trimRegions: TrimRegion[],
|
||||
): TrimRegion | null {
|
||||
return (
|
||||
trimRegions.find(
|
||||
(region) => currentTimeMs >= region.startMs && currentTimeMs < region.endMs,
|
||||
) || null
|
||||
);
|
||||
}
|
||||
|
||||
private findActiveSpeedRegion(
|
||||
currentTimeMs: number,
|
||||
speedRegions: SpeedRegion[],
|
||||
): SpeedRegion | null {
|
||||
return (
|
||||
speedRegions.find(
|
||||
(region) => currentTimeMs >= region.startMs && currentTimeMs < region.endMs,
|
||||
) || null
|
||||
);
|
||||
}
|
||||
|
||||
private cloneWithTimestamp(src: AudioData, newTimestamp: number): AudioData {
|
||||
const isPlanar = src.format?.includes("planar") ?? false;
|
||||
const numPlanes = isPlanar ? src.numberOfChannels : 1;
|
||||
|
||||
let totalSize = 0;
|
||||
for (let p = 0; p < numPlanes; p++) {
|
||||
totalSize += src.allocationSize({ planeIndex: p });
|
||||
for (let planeIndex = 0; planeIndex < numPlanes; planeIndex++) {
|
||||
totalSize += src.allocationSize({ planeIndex });
|
||||
}
|
||||
|
||||
const buffer = new ArrayBuffer(totalSize);
|
||||
let offset = 0;
|
||||
for (let p = 0; p < numPlanes; p++) {
|
||||
const planeSize = src.allocationSize({ planeIndex: p });
|
||||
src.copyTo(new Uint8Array(buffer, offset, planeSize), { planeIndex: p });
|
||||
for (let planeIndex = 0; planeIndex < numPlanes; planeIndex++) {
|
||||
const planeSize = src.allocationSize({ planeIndex });
|
||||
src.copyTo(new Uint8Array(buffer, offset, planeSize), { planeIndex });
|
||||
offset += planeSize;
|
||||
}
|
||||
|
||||
@@ -172,7 +486,7 @@ export class AudioProcessor {
|
||||
}
|
||||
|
||||
private isInTrimRegion(timestampMs: number, trims: TrimRegion[]): boolean {
|
||||
return trims.some((t) => timestampMs >= t.startMs && timestampMs < t.endMs);
|
||||
return trims.some((trim) => timestampMs >= trim.startMs && timestampMs < trim.endMs);
|
||||
}
|
||||
|
||||
private computeTrimOffset(timestampMs: number, trims: TrimRegion[]): number {
|
||||
|
||||
@@ -203,7 +203,9 @@ export class VideoExporter {
|
||||
await this.audioProcessor.process(
|
||||
demuxer,
|
||||
this.muxer!,
|
||||
this.config.videoUrl,
|
||||
this.config.trimRegions,
|
||||
this.config.speedRegions,
|
||||
readEndSec,
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user