fix: validate export duration and fix audio trim in speed-aware path

Two bugs in the export pipeline:

1. Container duration from WebM metadata can be unreliable (Chromium bug
   on Linux — reports Infinity, 0, or inflated values). The pipeline
   trusted this value, causing inflated exports, frozen video, and
   "decode ended early" errors.

   Fix: scan actual packet timestamps in loadMetadata() and compare
   against container duration. Use packet-based ground truth when they
   diverge.

2. The speed-aware audio path (renderPitchPreservedTimelineAudio)
   recorded in real-time via MediaRecorder but never paused recording
   during trim-region seeks. Seek dead time was captured as audio,
   inflating the audio track beyond the video duration.

   Fix: pause MediaRecorder during trim seeks, skip past initial trim
   before recording starts, wait for seek completion before resuming.

Fixes #276, #433. Partially addresses #428.
This commit is contained in:
Enriquefft
2026-04-12 17:54:43 -05:00
parent 6d449a46c4
commit 5e62ad3215
4 changed files with 180 additions and 23 deletions
+94 -12
View File
@@ -5,6 +5,7 @@ import type { VideoMuxer } from "./muxer";
const AUDIO_BITRATE = 128_000;
const DECODE_BACKPRESSURE_LIMIT = 20;
const MIN_SPEED_REGION_DELTA_MS = 0.0001;
const SEEK_TIMEOUT_MS = 5_000;
export class AudioProcessor {
private cancelled = false;
@@ -20,7 +21,7 @@ export class AudioProcessor {
videoUrl: string,
trimRegions?: TrimRegion[],
speedRegions?: SpeedRegion[],
readEndSec?: number,
validatedDurationSec?: number,
): Promise<void> {
const sortedTrims = trimRegions ? [...trimRegions].sort((a, b) => a.startMs - b.startMs) : [];
const sortedSpeedRegions = speedRegions
@@ -35,14 +36,20 @@ export class AudioProcessor {
videoUrl,
sortedTrims,
sortedSpeedRegions,
validatedDurationSec,
);
if (!this.cancelled) {
if (!this.cancelled && renderedAudioBlob.size > 0) {
await this.muxRenderedAudioBlob(renderedAudioBlob, muxer);
return;
}
return;
}
// No speed edits: keep the original demux/decode/encode path with trim timestamp remap.
const readEndSec =
typeof validatedDurationSec === "number" && Number.isFinite(validatedDurationSec)
? validatedDurationSec + 0.5
: undefined;
await this.processTrimOnlyAudio(demuxer, muxer, sortedTrims, readEndSec);
}
@@ -187,6 +194,7 @@ export class AudioProcessor {
videoUrl: string,
trimRegions: TrimRegion[],
speedRegions: SpeedRegion[],
validatedDurationSec?: number,
): Promise<Blob> {
const media = document.createElement("audio");
media.src = videoUrl;
@@ -211,15 +219,41 @@ export class AudioProcessor {
const destinationNode = audioContext.createMediaStreamDestination();
sourceNode.connect(destinationNode);
const { recorder, recordedBlobPromise } = this.startAudioRecording(destinationNode.stream);
let rafId: number | null = null;
let recorder: MediaRecorder | null = null;
let recordedBlobPromise: Promise<Blob> | null = null;
try {
if (audioContext.state === "suspended") {
await audioContext.resume();
}
await this.seekTo(media, 0);
// Skip past any initial trim region before recording starts
// to avoid capturing trimmed audio during the first frames.
let startPosition = 0;
const initialTrim = this.findActiveTrimRegion(0, trimRegions);
if (initialTrim) {
startPosition = initialTrim.endMs / 1000;
}
const effectiveEnd = validatedDurationSec ?? media.duration;
if (startPosition >= effectiveEnd) {
// All content is trimmed — return silent blob
return new Blob([], { type: "audio/webm" });
}
await this.seekTo(media, startPosition);
// Set initial playback rate for the starting position
const initialSpeedRegion = this.findActiveSpeedRegion(startPosition * 1000, speedRegions);
if (initialSpeedRegion) {
media.playbackRate = initialSpeedRegion.speed;
}
// Start recording only AFTER seeking past trims
const recording = this.startAudioRecording(destinationNode.stream);
recorder = recording.recorder;
recordedBlobPromise = recording.recordedBlobPromise;
await media.play();
await new Promise<void>((resolve, reject) => {
@@ -249,24 +283,69 @@ export class AudioProcessor {
return;
}
// Stop playback at validated duration — browser's media.duration
// may be inflated from bad container metadata.
if (validatedDurationSec !== undefined && media.currentTime >= validatedDurationSec) {
media.pause();
cleanup();
resolve();
return;
}
const currentTimeMs = media.currentTime * 1000;
const activeTrimRegion = this.findActiveTrimRegion(currentTimeMs, trimRegions);
if (activeTrimRegion && !media.paused && !media.ended) {
const skipToTime = activeTrimRegion.endMs / 1000;
if (skipToTime >= media.duration) {
if (
skipToTime >= media.duration ||
(validatedDurationSec !== undefined && skipToTime >= validatedDurationSec)
) {
media.pause();
cleanup();
resolve();
return;
}
// Pause recording during trim seek to prevent capturing
// silence/noise as the audio element seeks.
media.pause();
if (recorder?.state === "recording") recorder.pause();
const onSeeked = () => {
clearTimeout(seekTimer);
if (this.cancelled) {
cleanup();
resolve();
return;
}
if (recorder?.state === "paused") recorder.resume();
media
.play()
.then(() => {
if (!this.cancelled) rafId = requestAnimationFrame(tick);
})
.catch((err) => {
cleanup();
reject(
new Error(
`Failed to resume playback after trim seek: ${err instanceof Error ? err.message : String(err)}`,
),
);
});
};
const seekTimer = window.setTimeout(() => {
media.removeEventListener("seeked", onSeeked);
cleanup();
reject(new Error("Audio seek timed out while skipping trim region"));
}, SEEK_TIMEOUT_MS);
media.addEventListener("seeked", onSeeked, { once: true });
media.currentTime = skipToTime;
} else {
const activeSpeedRegion = this.findActiveSpeedRegion(currentTimeMs, speedRegions);
const playbackRate = activeSpeedRegion ? activeSpeedRegion.speed : 1;
if (Math.abs(media.playbackRate - playbackRate) > 0.0001) {
media.playbackRate = playbackRate;
}
return;
}
const activeSpeedRegion = this.findActiveSpeedRegion(currentTimeMs, speedRegions);
const playbackRate = activeSpeedRegion ? activeSpeedRegion.speed : 1;
if (Math.abs(media.playbackRate - playbackRate) > 0.0001) {
media.playbackRate = playbackRate;
}
if (!media.paused && !media.ended) {
@@ -286,7 +365,7 @@ export class AudioProcessor {
cancelAnimationFrame(rafId);
}
media.pause();
if (recorder.state !== "inactive") {
if (recorder && recorder.state !== "inactive") {
recorder.stop();
}
destinationNode.stream.getTracks().forEach((track) => track.stop());
@@ -297,6 +376,9 @@ export class AudioProcessor {
media.load();
}
if (!recordedBlobPromise) {
return new Blob([], { type: "audio/webm" });
}
const recordedBlob = await recordedBlobPromise;
if (this.cancelled) {
throw new Error("Export cancelled");
+32 -1
View File
@@ -1,5 +1,36 @@
import { describe, expect, it } from "vitest";
import { shouldFailDecodeEndedEarly } from "./streamingDecoder";
import { shouldFailDecodeEndedEarly, validateDuration } from "./streamingDecoder";
describe("validateDuration", () => {
it("returns scanned duration when container reports Infinity", () => {
expect(validateDuration(Infinity, 15.3)).toBe(15.3);
});
it("returns scanned duration when container reports 0", () => {
expect(validateDuration(0, 15.3)).toBe(15.3);
});
it("returns scanned duration when container reports NaN", () => {
expect(validateDuration(NaN, 15.3)).toBe(15.3);
});
it("returns scanned duration when container is inflated beyond threshold", () => {
expect(validateDuration(42, 15.3)).toBe(15.3);
});
it("returns container duration when values are close", () => {
expect(validateDuration(15.5, 15.3)).toBe(15.5);
});
it("returns container duration when scanned is slightly higher", () => {
// container < scanned (scanned overshoot from last frame duration)
expect(validateDuration(15.0, 15.3)).toBe(15.0);
});
it("returns container duration when scanned is zero (corrupted/empty file)", () => {
expect(validateDuration(10, 0)).toBe(10);
});
});
describe("shouldFailDecodeEndedEarly", () => {
it("does not fail once every segment has been satisfied", () => {
+52 -2
View File
@@ -70,6 +70,32 @@ type EarlyDecodeEndCheck = {
const EARLY_DECODE_END_THRESHOLD_SEC = 1;
const METADATA_TAIL_TOLERANCE_SEC = 1.5;
const STREAM_DURATION_MATCH_TOLERANCE_SEC = 0.25;
const DURATION_DIVERGENCE_THRESHOLD_SEC = 1.5;
/**
* Validate container duration against actual packet timestamps.
*
* Chrome/Electron's MediaRecorder writes WebM containers with unreliable
* Duration fields (often Infinity, 0, or inflated) especially on Linux.
* This function picks the most trustworthy duration value.
*
* @param containerDuration Duration from the container-level metadata
* @param scannedDuration Duration derived from actual packet timestamps (ground truth)
*/
export function validateDuration(containerDuration: number, scannedDuration: number): number {
if (scannedDuration <= 0) {
// Zero scanned duration means corrupted/empty file — fall back to container
// (downstream shouldFailDecodeEndedEarly will catch truly empty files)
return Math.max(containerDuration, 0);
}
if (!Number.isFinite(containerDuration) || containerDuration <= 0) {
return scannedDuration;
}
if (containerDuration - scannedDuration > DURATION_DIVERGENCE_THRESHOLD_SEC) {
return scannedDuration;
}
return containerDuration;
}
export function shouldFailDecodeEndedEarly({
cancelled,
@@ -201,10 +227,34 @@ export class StreamingVideoDecoder {
const audioStream = mediaInfo.streams.find((s) => s.codec_type_string === "audio");
// Scan video packets to find the true content boundary.
// MediaRecorder (especially on Linux) writes unreliable container durations.
// Packet timestamps are ground truth — no decode needed, just timestamp reads.
let maxPacketEndUs = 0;
const scanReader = (
this.demuxer.read("video") as ReadableStream<EncodedVideoChunk>
).getReader();
try {
while (true) {
const { done, value } = await scanReader.read();
if (done || !value) break;
const endUs = value.timestamp + (value.duration ?? 0);
if (endUs > maxPacketEndUs) maxPacketEndUs = endUs;
}
} finally {
try {
await scanReader.cancel();
} catch {
/* already closed */
}
}
const scannedDuration = maxPacketEndUs / 1_000_000;
const validatedDuration = validateDuration(mediaInfo.duration, scannedDuration);
this.metadata = {
width: videoStream?.width || 1920,
height: videoStream?.height || 1080,
duration: mediaInfo.duration,
duration: validatedDuration,
streamDuration:
typeof videoStream?.duration === "number" && Number.isFinite(videoStream.duration)
? videoStream.duration
@@ -305,7 +355,7 @@ export class StreamingVideoDecoder {
// One forward stream through the whole file.
// Pass explicit range because some containers are truncated when no end is provided.
const readEndSec = Math.max(this.metadata.duration, this.metadata.streamDuration ?? 0) + 0.5;
const readEndSec = this.metadata.duration + 0.5;
const reader = this.demuxer.read("video", 0, readEndSec).getReader();
// Feed chunks to decoder in background with backpressure
+2 -8
View File
@@ -157,17 +157,11 @@ export class VideoExporter {
this.muxer = muxer;
await muxer.initialize();
const { effectiveDuration, totalFrames } = streamingDecoder.getExportMetrics(
const { totalFrames } = streamingDecoder.getExportMetrics(
this.config.frameRate,
this.config.trimRegions,
this.config.speedRegions,
);
const readEndSec = Math.max(videoInfo.duration, videoInfo.streamDuration ?? 0) + 0.5;
console.log("[VideoExporter] Original duration:", videoInfo.duration, "s");
console.log("[VideoExporter] Effective duration:", effectiveDuration, "s");
console.log("[VideoExporter] Total frames to export:", totalFrames);
console.log("[VideoExporter] Using streaming decode (web-demuxer + VideoDecoder)");
const frameDuration = 1_000_000 / this.config.frameRate;
let frameIndex = 0;
@@ -346,7 +340,7 @@ export class VideoExporter {
this.config.videoUrl,
this.config.trimRegions,
this.config.speedRegions,
readEndSec,
videoInfo.duration,
);
}
}