fix: validate export duration and fix audio trim in speed-aware path
Two bugs in the export pipeline: 1. Container duration from WebM metadata can be unreliable (Chromium bug on Linux — reports Infinity, 0, or inflated values). The pipeline trusted this value, causing inflated exports, frozen video, and "decode ended early" errors. Fix: scan actual packet timestamps in loadMetadata() and compare against container duration. Use packet-based ground truth when they diverge. 2. The speed-aware audio path (renderPitchPreservedTimelineAudio) recorded in real-time via MediaRecorder but never paused recording during trim-region seeks. Seek dead time was captured as audio, inflating the audio track beyond the video duration. Fix: pause MediaRecorder during trim seeks, skip past initial trim before recording starts, wait for seek completion before resuming. Fixes #276, #433. Partially addresses #428.
This commit is contained in:
@@ -5,6 +5,7 @@ import type { VideoMuxer } from "./muxer";
|
||||
const AUDIO_BITRATE = 128_000;
|
||||
const DECODE_BACKPRESSURE_LIMIT = 20;
|
||||
const MIN_SPEED_REGION_DELTA_MS = 0.0001;
|
||||
const SEEK_TIMEOUT_MS = 5_000;
|
||||
|
||||
export class AudioProcessor {
|
||||
private cancelled = false;
|
||||
@@ -20,7 +21,7 @@ export class AudioProcessor {
|
||||
videoUrl: string,
|
||||
trimRegions?: TrimRegion[],
|
||||
speedRegions?: SpeedRegion[],
|
||||
readEndSec?: number,
|
||||
validatedDurationSec?: number,
|
||||
): Promise<void> {
|
||||
const sortedTrims = trimRegions ? [...trimRegions].sort((a, b) => a.startMs - b.startMs) : [];
|
||||
const sortedSpeedRegions = speedRegions
|
||||
@@ -35,14 +36,20 @@ export class AudioProcessor {
|
||||
videoUrl,
|
||||
sortedTrims,
|
||||
sortedSpeedRegions,
|
||||
validatedDurationSec,
|
||||
);
|
||||
if (!this.cancelled) {
|
||||
if (!this.cancelled && renderedAudioBlob.size > 0) {
|
||||
await this.muxRenderedAudioBlob(renderedAudioBlob, muxer);
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// No speed edits: keep the original demux/decode/encode path with trim timestamp remap.
|
||||
const readEndSec =
|
||||
typeof validatedDurationSec === "number" && Number.isFinite(validatedDurationSec)
|
||||
? validatedDurationSec + 0.5
|
||||
: undefined;
|
||||
await this.processTrimOnlyAudio(demuxer, muxer, sortedTrims, readEndSec);
|
||||
}
|
||||
|
||||
@@ -187,6 +194,7 @@ export class AudioProcessor {
|
||||
videoUrl: string,
|
||||
trimRegions: TrimRegion[],
|
||||
speedRegions: SpeedRegion[],
|
||||
validatedDurationSec?: number,
|
||||
): Promise<Blob> {
|
||||
const media = document.createElement("audio");
|
||||
media.src = videoUrl;
|
||||
@@ -211,15 +219,41 @@ export class AudioProcessor {
|
||||
const destinationNode = audioContext.createMediaStreamDestination();
|
||||
sourceNode.connect(destinationNode);
|
||||
|
||||
const { recorder, recordedBlobPromise } = this.startAudioRecording(destinationNode.stream);
|
||||
let rafId: number | null = null;
|
||||
let recorder: MediaRecorder | null = null;
|
||||
let recordedBlobPromise: Promise<Blob> | null = null;
|
||||
|
||||
try {
|
||||
if (audioContext.state === "suspended") {
|
||||
await audioContext.resume();
|
||||
}
|
||||
|
||||
await this.seekTo(media, 0);
|
||||
// Skip past any initial trim region before recording starts
|
||||
// to avoid capturing trimmed audio during the first frames.
|
||||
let startPosition = 0;
|
||||
const initialTrim = this.findActiveTrimRegion(0, trimRegions);
|
||||
if (initialTrim) {
|
||||
startPosition = initialTrim.endMs / 1000;
|
||||
}
|
||||
|
||||
const effectiveEnd = validatedDurationSec ?? media.duration;
|
||||
if (startPosition >= effectiveEnd) {
|
||||
// All content is trimmed — return silent blob
|
||||
return new Blob([], { type: "audio/webm" });
|
||||
}
|
||||
|
||||
await this.seekTo(media, startPosition);
|
||||
|
||||
// Set initial playback rate for the starting position
|
||||
const initialSpeedRegion = this.findActiveSpeedRegion(startPosition * 1000, speedRegions);
|
||||
if (initialSpeedRegion) {
|
||||
media.playbackRate = initialSpeedRegion.speed;
|
||||
}
|
||||
|
||||
// Start recording only AFTER seeking past trims
|
||||
const recording = this.startAudioRecording(destinationNode.stream);
|
||||
recorder = recording.recorder;
|
||||
recordedBlobPromise = recording.recordedBlobPromise;
|
||||
await media.play();
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
@@ -249,24 +283,69 @@ export class AudioProcessor {
|
||||
return;
|
||||
}
|
||||
|
||||
// Stop playback at validated duration — browser's media.duration
|
||||
// may be inflated from bad container metadata.
|
||||
if (validatedDurationSec !== undefined && media.currentTime >= validatedDurationSec) {
|
||||
media.pause();
|
||||
cleanup();
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
|
||||
const currentTimeMs = media.currentTime * 1000;
|
||||
const activeTrimRegion = this.findActiveTrimRegion(currentTimeMs, trimRegions);
|
||||
|
||||
if (activeTrimRegion && !media.paused && !media.ended) {
|
||||
const skipToTime = activeTrimRegion.endMs / 1000;
|
||||
if (skipToTime >= media.duration) {
|
||||
if (
|
||||
skipToTime >= media.duration ||
|
||||
(validatedDurationSec !== undefined && skipToTime >= validatedDurationSec)
|
||||
) {
|
||||
media.pause();
|
||||
cleanup();
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
// Pause recording during trim seek to prevent capturing
|
||||
// silence/noise as the audio element seeks.
|
||||
media.pause();
|
||||
if (recorder?.state === "recording") recorder.pause();
|
||||
const onSeeked = () => {
|
||||
clearTimeout(seekTimer);
|
||||
if (this.cancelled) {
|
||||
cleanup();
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
if (recorder?.state === "paused") recorder.resume();
|
||||
media
|
||||
.play()
|
||||
.then(() => {
|
||||
if (!this.cancelled) rafId = requestAnimationFrame(tick);
|
||||
})
|
||||
.catch((err) => {
|
||||
cleanup();
|
||||
reject(
|
||||
new Error(
|
||||
`Failed to resume playback after trim seek: ${err instanceof Error ? err.message : String(err)}`,
|
||||
),
|
||||
);
|
||||
});
|
||||
};
|
||||
const seekTimer = window.setTimeout(() => {
|
||||
media.removeEventListener("seeked", onSeeked);
|
||||
cleanup();
|
||||
reject(new Error("Audio seek timed out while skipping trim region"));
|
||||
}, SEEK_TIMEOUT_MS);
|
||||
media.addEventListener("seeked", onSeeked, { once: true });
|
||||
media.currentTime = skipToTime;
|
||||
} else {
|
||||
const activeSpeedRegion = this.findActiveSpeedRegion(currentTimeMs, speedRegions);
|
||||
const playbackRate = activeSpeedRegion ? activeSpeedRegion.speed : 1;
|
||||
if (Math.abs(media.playbackRate - playbackRate) > 0.0001) {
|
||||
media.playbackRate = playbackRate;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const activeSpeedRegion = this.findActiveSpeedRegion(currentTimeMs, speedRegions);
|
||||
const playbackRate = activeSpeedRegion ? activeSpeedRegion.speed : 1;
|
||||
if (Math.abs(media.playbackRate - playbackRate) > 0.0001) {
|
||||
media.playbackRate = playbackRate;
|
||||
}
|
||||
|
||||
if (!media.paused && !media.ended) {
|
||||
@@ -286,7 +365,7 @@ export class AudioProcessor {
|
||||
cancelAnimationFrame(rafId);
|
||||
}
|
||||
media.pause();
|
||||
if (recorder.state !== "inactive") {
|
||||
if (recorder && recorder.state !== "inactive") {
|
||||
recorder.stop();
|
||||
}
|
||||
destinationNode.stream.getTracks().forEach((track) => track.stop());
|
||||
@@ -297,6 +376,9 @@ export class AudioProcessor {
|
||||
media.load();
|
||||
}
|
||||
|
||||
if (!recordedBlobPromise) {
|
||||
return new Blob([], { type: "audio/webm" });
|
||||
}
|
||||
const recordedBlob = await recordedBlobPromise;
|
||||
if (this.cancelled) {
|
||||
throw new Error("Export cancelled");
|
||||
|
||||
@@ -1,5 +1,36 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { shouldFailDecodeEndedEarly } from "./streamingDecoder";
|
||||
import { shouldFailDecodeEndedEarly, validateDuration } from "./streamingDecoder";
|
||||
|
||||
describe("validateDuration", () => {
|
||||
it("returns scanned duration when container reports Infinity", () => {
|
||||
expect(validateDuration(Infinity, 15.3)).toBe(15.3);
|
||||
});
|
||||
|
||||
it("returns scanned duration when container reports 0", () => {
|
||||
expect(validateDuration(0, 15.3)).toBe(15.3);
|
||||
});
|
||||
|
||||
it("returns scanned duration when container reports NaN", () => {
|
||||
expect(validateDuration(NaN, 15.3)).toBe(15.3);
|
||||
});
|
||||
|
||||
it("returns scanned duration when container is inflated beyond threshold", () => {
|
||||
expect(validateDuration(42, 15.3)).toBe(15.3);
|
||||
});
|
||||
|
||||
it("returns container duration when values are close", () => {
|
||||
expect(validateDuration(15.5, 15.3)).toBe(15.5);
|
||||
});
|
||||
|
||||
it("returns container duration when scanned is slightly higher", () => {
|
||||
// container < scanned (scanned overshoot from last frame duration)
|
||||
expect(validateDuration(15.0, 15.3)).toBe(15.0);
|
||||
});
|
||||
|
||||
it("returns container duration when scanned is zero (corrupted/empty file)", () => {
|
||||
expect(validateDuration(10, 0)).toBe(10);
|
||||
});
|
||||
});
|
||||
|
||||
describe("shouldFailDecodeEndedEarly", () => {
|
||||
it("does not fail once every segment has been satisfied", () => {
|
||||
|
||||
@@ -70,6 +70,32 @@ type EarlyDecodeEndCheck = {
|
||||
const EARLY_DECODE_END_THRESHOLD_SEC = 1;
|
||||
const METADATA_TAIL_TOLERANCE_SEC = 1.5;
|
||||
const STREAM_DURATION_MATCH_TOLERANCE_SEC = 0.25;
|
||||
const DURATION_DIVERGENCE_THRESHOLD_SEC = 1.5;
|
||||
|
||||
/**
|
||||
* Validate container duration against actual packet timestamps.
|
||||
*
|
||||
* Chrome/Electron's MediaRecorder writes WebM containers with unreliable
|
||||
* Duration fields (often Infinity, 0, or inflated) — especially on Linux.
|
||||
* This function picks the most trustworthy duration value.
|
||||
*
|
||||
* @param containerDuration Duration from the container-level metadata
|
||||
* @param scannedDuration Duration derived from actual packet timestamps (ground truth)
|
||||
*/
|
||||
export function validateDuration(containerDuration: number, scannedDuration: number): number {
|
||||
if (scannedDuration <= 0) {
|
||||
// Zero scanned duration means corrupted/empty file — fall back to container
|
||||
// (downstream shouldFailDecodeEndedEarly will catch truly empty files)
|
||||
return Math.max(containerDuration, 0);
|
||||
}
|
||||
if (!Number.isFinite(containerDuration) || containerDuration <= 0) {
|
||||
return scannedDuration;
|
||||
}
|
||||
if (containerDuration - scannedDuration > DURATION_DIVERGENCE_THRESHOLD_SEC) {
|
||||
return scannedDuration;
|
||||
}
|
||||
return containerDuration;
|
||||
}
|
||||
|
||||
export function shouldFailDecodeEndedEarly({
|
||||
cancelled,
|
||||
@@ -201,10 +227,34 @@ export class StreamingVideoDecoder {
|
||||
|
||||
const audioStream = mediaInfo.streams.find((s) => s.codec_type_string === "audio");
|
||||
|
||||
// Scan video packets to find the true content boundary.
|
||||
// MediaRecorder (especially on Linux) writes unreliable container durations.
|
||||
// Packet timestamps are ground truth — no decode needed, just timestamp reads.
|
||||
let maxPacketEndUs = 0;
|
||||
const scanReader = (
|
||||
this.demuxer.read("video") as ReadableStream<EncodedVideoChunk>
|
||||
).getReader();
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await scanReader.read();
|
||||
if (done || !value) break;
|
||||
const endUs = value.timestamp + (value.duration ?? 0);
|
||||
if (endUs > maxPacketEndUs) maxPacketEndUs = endUs;
|
||||
}
|
||||
} finally {
|
||||
try {
|
||||
await scanReader.cancel();
|
||||
} catch {
|
||||
/* already closed */
|
||||
}
|
||||
}
|
||||
const scannedDuration = maxPacketEndUs / 1_000_000;
|
||||
const validatedDuration = validateDuration(mediaInfo.duration, scannedDuration);
|
||||
|
||||
this.metadata = {
|
||||
width: videoStream?.width || 1920,
|
||||
height: videoStream?.height || 1080,
|
||||
duration: mediaInfo.duration,
|
||||
duration: validatedDuration,
|
||||
streamDuration:
|
||||
typeof videoStream?.duration === "number" && Number.isFinite(videoStream.duration)
|
||||
? videoStream.duration
|
||||
@@ -305,7 +355,7 @@ export class StreamingVideoDecoder {
|
||||
|
||||
// One forward stream through the whole file.
|
||||
// Pass explicit range because some containers are truncated when no end is provided.
|
||||
const readEndSec = Math.max(this.metadata.duration, this.metadata.streamDuration ?? 0) + 0.5;
|
||||
const readEndSec = this.metadata.duration + 0.5;
|
||||
const reader = this.demuxer.read("video", 0, readEndSec).getReader();
|
||||
|
||||
// Feed chunks to decoder in background with backpressure
|
||||
|
||||
@@ -157,17 +157,11 @@ export class VideoExporter {
|
||||
this.muxer = muxer;
|
||||
await muxer.initialize();
|
||||
|
||||
const { effectiveDuration, totalFrames } = streamingDecoder.getExportMetrics(
|
||||
const { totalFrames } = streamingDecoder.getExportMetrics(
|
||||
this.config.frameRate,
|
||||
this.config.trimRegions,
|
||||
this.config.speedRegions,
|
||||
);
|
||||
const readEndSec = Math.max(videoInfo.duration, videoInfo.streamDuration ?? 0) + 0.5;
|
||||
|
||||
console.log("[VideoExporter] Original duration:", videoInfo.duration, "s");
|
||||
console.log("[VideoExporter] Effective duration:", effectiveDuration, "s");
|
||||
console.log("[VideoExporter] Total frames to export:", totalFrames);
|
||||
console.log("[VideoExporter] Using streaming decode (web-demuxer + VideoDecoder)");
|
||||
|
||||
const frameDuration = 1_000_000 / this.config.frameRate;
|
||||
let frameIndex = 0;
|
||||
@@ -346,7 +340,7 @@ export class VideoExporter {
|
||||
this.config.videoUrl,
|
||||
this.config.trimRegions,
|
||||
this.config.speedRegions,
|
||||
readEndSec,
|
||||
videoInfo.duration,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user