Merge pull request #434 from Enriquefft/fix/export-audio-duration-validation
fix: validate export duration and fix audio trim in speed-aware path
This commit is contained in:
@@ -5,6 +5,7 @@ import type { VideoMuxer } from "./muxer";
|
||||
const AUDIO_BITRATE = 128_000;
|
||||
const DECODE_BACKPRESSURE_LIMIT = 20;
|
||||
const MIN_SPEED_REGION_DELTA_MS = 0.0001;
|
||||
const SEEK_TIMEOUT_MS = 5_000;
|
||||
|
||||
export class AudioProcessor {
|
||||
private cancelled = false;
|
||||
@@ -18,9 +19,9 @@ export class AudioProcessor {
|
||||
demuxer: WebDemuxer,
|
||||
muxer: VideoMuxer,
|
||||
videoUrl: string,
|
||||
trimRegions?: TrimRegion[],
|
||||
speedRegions?: SpeedRegion[],
|
||||
readEndSec?: number,
|
||||
trimRegions: TrimRegion[] | undefined,
|
||||
speedRegions: SpeedRegion[] | undefined,
|
||||
validatedDurationSec: number,
|
||||
): Promise<void> {
|
||||
const sortedTrims = trimRegions ? [...trimRegions].sort((a, b) => a.startMs - b.startMs) : [];
|
||||
const sortedSpeedRegions = speedRegions
|
||||
@@ -35,14 +36,19 @@ export class AudioProcessor {
|
||||
videoUrl,
|
||||
sortedTrims,
|
||||
sortedSpeedRegions,
|
||||
validatedDurationSec,
|
||||
);
|
||||
if (!this.cancelled) {
|
||||
if (!this.cancelled && renderedAudioBlob.size > 0) {
|
||||
await this.muxRenderedAudioBlob(renderedAudioBlob, muxer);
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// No speed edits: keep the original demux/decode/encode path with trim timestamp remap.
|
||||
// The +0.5s buffer mirrors streamingDecoder.decodeAll's read window so the trim-only
|
||||
// and speed-aware paths agree on how far to read past the validated duration boundary.
|
||||
const readEndSec = validatedDurationSec + 0.5;
|
||||
await this.processTrimOnlyAudio(demuxer, muxer, sortedTrims, readEndSec);
|
||||
}
|
||||
|
||||
@@ -55,7 +61,7 @@ export class AudioProcessor {
|
||||
): Promise<void> {
|
||||
let audioConfig: AudioDecoderConfig;
|
||||
try {
|
||||
audioConfig = (await demuxer.getDecoderConfig("audio")) as AudioDecoderConfig;
|
||||
audioConfig = await demuxer.getDecoderConfig("audio");
|
||||
} catch {
|
||||
console.warn("[AudioProcessor] No audio track found, skipping");
|
||||
return;
|
||||
@@ -80,11 +86,10 @@ export class AudioProcessor {
|
||||
typeof readEndSec === "number" && Number.isFinite(readEndSec)
|
||||
? Math.max(0, readEndSec)
|
||||
: undefined;
|
||||
const audioStream = (
|
||||
const audioStream =
|
||||
safeReadEndSec !== undefined
|
||||
? demuxer.read("audio", 0, safeReadEndSec)
|
||||
: demuxer.read("audio")
|
||||
) as ReadableStream<EncodedAudioChunk>;
|
||||
: demuxer.read("audio");
|
||||
const reader = audioStream.getReader();
|
||||
|
||||
try {
|
||||
@@ -187,6 +192,7 @@ export class AudioProcessor {
|
||||
videoUrl: string,
|
||||
trimRegions: TrimRegion[],
|
||||
speedRegions: SpeedRegion[],
|
||||
validatedDurationSec: number,
|
||||
): Promise<Blob> {
|
||||
const media = document.createElement("audio");
|
||||
media.src = videoUrl;
|
||||
@@ -211,15 +217,44 @@ export class AudioProcessor {
|
||||
const destinationNode = audioContext.createMediaStreamDestination();
|
||||
sourceNode.connect(destinationNode);
|
||||
|
||||
const { recorder, recordedBlobPromise } = this.startAudioRecording(destinationNode.stream);
|
||||
let rafId: number | null = null;
|
||||
let recorder: MediaRecorder | null = null;
|
||||
let recordedBlobPromise: Promise<Blob> | null = null;
|
||||
|
||||
try {
|
||||
if (audioContext.state === "suspended") {
|
||||
await audioContext.resume();
|
||||
}
|
||||
|
||||
await this.seekTo(media, 0);
|
||||
// Skip past any initial trim region(s) before recording starts to avoid
|
||||
// capturing trimmed audio during the first rAF frames of playback.
|
||||
// Loops to handle back-to-back or overlapping trims at t=0.
|
||||
const effectiveEnd = validatedDurationSec;
|
||||
let startPosition = 0;
|
||||
for (let i = 0; i <= trimRegions.length; i++) {
|
||||
const activeTrim = this.findActiveTrimRegion(startPosition * 1000, trimRegions);
|
||||
if (!activeTrim) break;
|
||||
startPosition = activeTrim.endMs / 1000;
|
||||
if (startPosition >= effectiveEnd) break;
|
||||
}
|
||||
|
||||
if (startPosition >= effectiveEnd) {
|
||||
// All content is trimmed — return silent blob
|
||||
return new Blob([], { type: "audio/webm" });
|
||||
}
|
||||
|
||||
await this.seekTo(media, startPosition);
|
||||
|
||||
// Set initial playback rate for the starting position
|
||||
const initialSpeedRegion = this.findActiveSpeedRegion(startPosition * 1000, speedRegions);
|
||||
if (initialSpeedRegion) {
|
||||
media.playbackRate = initialSpeedRegion.speed;
|
||||
}
|
||||
|
||||
// Start recording only AFTER seeking past trims
|
||||
const recording = this.startAudioRecording(destinationNode.stream);
|
||||
recorder = recording.recorder;
|
||||
recordedBlobPromise = recording.recordedBlobPromise;
|
||||
await media.play();
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
@@ -249,24 +284,66 @@ export class AudioProcessor {
|
||||
return;
|
||||
}
|
||||
|
||||
// Stop playback at validated duration — browser's media.duration
|
||||
// may be inflated from bad container metadata.
|
||||
if (media.currentTime >= validatedDurationSec) {
|
||||
media.pause();
|
||||
cleanup();
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
|
||||
const currentTimeMs = media.currentTime * 1000;
|
||||
const activeTrimRegion = this.findActiveTrimRegion(currentTimeMs, trimRegions);
|
||||
|
||||
if (activeTrimRegion && !media.paused && !media.ended) {
|
||||
const skipToTime = activeTrimRegion.endMs / 1000;
|
||||
if (skipToTime >= media.duration) {
|
||||
if (skipToTime >= media.duration || skipToTime >= validatedDurationSec) {
|
||||
media.pause();
|
||||
cleanup();
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
// Pause recording during trim seek to prevent capturing
|
||||
// silence/noise as the audio element seeks.
|
||||
media.pause();
|
||||
if (recorder?.state === "recording") recorder.pause();
|
||||
const onSeeked = () => {
|
||||
clearTimeout(seekTimer);
|
||||
if (this.cancelled) {
|
||||
cleanup();
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
if (recorder?.state === "paused") recorder.resume();
|
||||
media
|
||||
.play()
|
||||
.then(() => {
|
||||
if (!this.cancelled) rafId = requestAnimationFrame(tick);
|
||||
})
|
||||
.catch((err) => {
|
||||
cleanup();
|
||||
reject(
|
||||
new Error(
|
||||
`Failed to resume playback after trim seek: ${err instanceof Error ? err.message : String(err)}`,
|
||||
),
|
||||
);
|
||||
});
|
||||
};
|
||||
const seekTimer = window.setTimeout(() => {
|
||||
media.removeEventListener("seeked", onSeeked);
|
||||
cleanup();
|
||||
reject(new Error("Audio seek timed out while skipping trim region"));
|
||||
}, SEEK_TIMEOUT_MS);
|
||||
media.addEventListener("seeked", onSeeked, { once: true });
|
||||
media.currentTime = skipToTime;
|
||||
} else {
|
||||
const activeSpeedRegion = this.findActiveSpeedRegion(currentTimeMs, speedRegions);
|
||||
const playbackRate = activeSpeedRegion ? activeSpeedRegion.speed : 1;
|
||||
if (Math.abs(media.playbackRate - playbackRate) > 0.0001) {
|
||||
media.playbackRate = playbackRate;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const activeSpeedRegion = this.findActiveSpeedRegion(currentTimeMs, speedRegions);
|
||||
const playbackRate = activeSpeedRegion ? activeSpeedRegion.speed : 1;
|
||||
if (Math.abs(media.playbackRate - playbackRate) > 0.0001) {
|
||||
media.playbackRate = playbackRate;
|
||||
}
|
||||
|
||||
if (!media.paused && !media.ended) {
|
||||
@@ -286,7 +363,7 @@ export class AudioProcessor {
|
||||
cancelAnimationFrame(rafId);
|
||||
}
|
||||
media.pause();
|
||||
if (recorder.state !== "inactive") {
|
||||
if (recorder && recorder.state !== "inactive") {
|
||||
recorder.stop();
|
||||
}
|
||||
destinationNode.stream.getTracks().forEach((track) => track.stop());
|
||||
@@ -297,6 +374,12 @@ export class AudioProcessor {
|
||||
media.load();
|
||||
}
|
||||
|
||||
if (!recordedBlobPromise) {
|
||||
// Invariant: either an early return above fires, or startAudioRecording ran and
|
||||
// populated recordedBlobPromise before the playback Promise resolved. Reaching
|
||||
// here means that contract was broken — fail loud instead of returning silence.
|
||||
throw new Error("Audio recorder finished without assigning recordedBlobPromise");
|
||||
}
|
||||
const recordedBlob = await recordedBlobPromise;
|
||||
if (this.cancelled) {
|
||||
throw new Error("Export cancelled");
|
||||
@@ -314,8 +397,8 @@ export class AudioProcessor {
|
||||
|
||||
try {
|
||||
await demuxer.load(file);
|
||||
const audioConfig = (await demuxer.getDecoderConfig("audio")) as AudioDecoderConfig;
|
||||
const reader = (demuxer.read("audio") as ReadableStream<EncodedAudioChunk>).getReader();
|
||||
const audioConfig = await demuxer.getDecoderConfig("audio");
|
||||
const reader = demuxer.read("audio").getReader();
|
||||
let isFirstChunk = true;
|
||||
|
||||
try {
|
||||
|
||||
@@ -1,5 +1,44 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { shouldFailDecodeEndedEarly } from "./streamingDecoder";
|
||||
import { shouldFailDecodeEndedEarly, validateDuration } from "./streamingDecoder";
|
||||
|
||||
describe("validateDuration", () => {
|
||||
it("returns scanned duration when container reports Infinity", () => {
|
||||
expect(validateDuration(Infinity, 15.3)).toBe(15.3);
|
||||
});
|
||||
|
||||
it("returns scanned duration when container reports 0", () => {
|
||||
expect(validateDuration(0, 15.3)).toBe(15.3);
|
||||
});
|
||||
|
||||
it("returns scanned duration when container reports NaN", () => {
|
||||
expect(validateDuration(NaN, 15.3)).toBe(15.3);
|
||||
});
|
||||
|
||||
it("returns scanned duration when container is inflated beyond threshold", () => {
|
||||
expect(validateDuration(42, 15.3)).toBe(15.3);
|
||||
});
|
||||
|
||||
it("returns container duration when values are close", () => {
|
||||
expect(validateDuration(15.5, 15.3)).toBe(15.5);
|
||||
});
|
||||
|
||||
it("returns container duration when scanned is slightly higher", () => {
|
||||
// container < scanned (scanned overshoot from last frame duration)
|
||||
expect(validateDuration(15.0, 15.3)).toBe(15.0);
|
||||
});
|
||||
|
||||
it("returns scanned duration when container under-reports beyond threshold", () => {
|
||||
expect(validateDuration(10, 15.3)).toBe(15.3);
|
||||
});
|
||||
|
||||
it("returns container duration when scanned is zero (corrupted/empty file)", () => {
|
||||
expect(validateDuration(10, 0)).toBe(10);
|
||||
});
|
||||
|
||||
it("returns 0 when both container is NaN and scanned is zero", () => {
|
||||
expect(validateDuration(NaN, 0)).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("shouldFailDecodeEndedEarly", () => {
|
||||
it("does not fail once every segment has been satisfied", () => {
|
||||
|
||||
@@ -70,6 +70,37 @@ type EarlyDecodeEndCheck = {
|
||||
const EARLY_DECODE_END_THRESHOLD_SEC = 1;
|
||||
const METADATA_TAIL_TOLERANCE_SEC = 1.5;
|
||||
const STREAM_DURATION_MATCH_TOLERANCE_SEC = 0.25;
|
||||
const DURATION_DIVERGENCE_THRESHOLD_SEC = 1.5;
|
||||
// Fallback upper bound for the packet scan when no reliable duration hint is
|
||||
// available. Explicit end is required (some containers are truncated without
|
||||
// one), but the hint-derived bound would cap the scan prematurely when
|
||||
// container/stream duration are missing or corrupt.
|
||||
const SCAN_UNBOUNDED_FALLBACK_SEC = 24 * 60 * 60;
|
||||
|
||||
/**
|
||||
* Validate container duration against actual packet timestamps.
|
||||
*
|
||||
* Chrome/Electron's MediaRecorder writes WebM containers with unreliable
|
||||
* Duration fields (often Infinity, 0, or inflated) — especially on Linux.
|
||||
* This function picks the most trustworthy duration value.
|
||||
*
|
||||
* @param containerDuration Duration from the container-level metadata
|
||||
* @param scannedDuration Duration derived from actual packet timestamps (ground truth)
|
||||
*/
|
||||
export function validateDuration(containerDuration: number, scannedDuration: number): number {
|
||||
if (scannedDuration <= 0) {
|
||||
// Zero scanned duration means corrupted/empty file — fall back to container
|
||||
// (downstream shouldFailDecodeEndedEarly will catch truly empty files)
|
||||
return Number.isFinite(containerDuration) ? Math.max(containerDuration, 0) : 0;
|
||||
}
|
||||
if (!Number.isFinite(containerDuration) || containerDuration <= 0) {
|
||||
return scannedDuration;
|
||||
}
|
||||
if (Math.abs(containerDuration - scannedDuration) > DURATION_DIVERGENCE_THRESHOLD_SEC) {
|
||||
return scannedDuration;
|
||||
}
|
||||
return containerDuration;
|
||||
}
|
||||
|
||||
export function shouldFailDecodeEndedEarly({
|
||||
cancelled,
|
||||
@@ -201,10 +232,43 @@ export class StreamingVideoDecoder {
|
||||
|
||||
const audioStream = mediaInfo.streams.find((s) => s.codec_type_string === "audio");
|
||||
|
||||
// Scan video packets to find the true content boundary.
|
||||
// MediaRecorder (especially on Linux) writes unreliable container durations.
|
||||
// Packet timestamps are ground truth — no decode needed, just timestamp reads.
|
||||
// Pass explicit range because some containers are truncated without one.
|
||||
// Sanitize because mediaInfo.duration can be NaN/Infinity (Chromium Linux bug),
|
||||
// which would propagate into demuxer.read() as an invalid endpoint.
|
||||
const containerDurationSec = Number.isFinite(mediaInfo.duration) ? mediaInfo.duration : 0;
|
||||
const streamDurationSec =
|
||||
typeof videoStream?.duration === "number" && Number.isFinite(videoStream.duration)
|
||||
? videoStream.duration
|
||||
: 0;
|
||||
const hintedDurationSec = Math.max(containerDurationSec, streamDurationSec, 0);
|
||||
const scanEndSec =
|
||||
hintedDurationSec > 0 ? hintedDurationSec + 0.5 : SCAN_UNBOUNDED_FALLBACK_SEC;
|
||||
let maxPacketEndUs = 0;
|
||||
const scanReader = this.demuxer.read("video", 0, scanEndSec).getReader();
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await scanReader.read();
|
||||
if (done || !value) break;
|
||||
const endUs = value.timestamp + (value.duration ?? 0);
|
||||
if (endUs > maxPacketEndUs) maxPacketEndUs = endUs;
|
||||
}
|
||||
} finally {
|
||||
try {
|
||||
await scanReader.cancel();
|
||||
} catch {
|
||||
/* already closed */
|
||||
}
|
||||
}
|
||||
const scannedDuration = maxPacketEndUs / 1_000_000;
|
||||
const validatedDuration = validateDuration(mediaInfo.duration, scannedDuration);
|
||||
|
||||
this.metadata = {
|
||||
width: videoStream?.width || 1920,
|
||||
height: videoStream?.height || 1080,
|
||||
duration: mediaInfo.duration,
|
||||
duration: validatedDuration,
|
||||
streamDuration:
|
||||
typeof videoStream?.duration === "number" && Number.isFinite(videoStream.duration)
|
||||
? videoStream.duration
|
||||
@@ -305,7 +369,7 @@ export class StreamingVideoDecoder {
|
||||
|
||||
// One forward stream through the whole file.
|
||||
// Pass explicit range because some containers are truncated when no end is provided.
|
||||
const readEndSec = Math.max(this.metadata.duration, this.metadata.streamDuration ?? 0) + 0.5;
|
||||
const readEndSec = this.metadata.duration + 0.5;
|
||||
const reader = this.demuxer.read("video", 0, readEndSec).getReader();
|
||||
|
||||
// Feed chunks to decoder in background with backpressure
|
||||
|
||||
@@ -157,17 +157,11 @@ export class VideoExporter {
|
||||
this.muxer = muxer;
|
||||
await muxer.initialize();
|
||||
|
||||
const { effectiveDuration, totalFrames } = streamingDecoder.getExportMetrics(
|
||||
const { totalFrames } = streamingDecoder.getExportMetrics(
|
||||
this.config.frameRate,
|
||||
this.config.trimRegions,
|
||||
this.config.speedRegions,
|
||||
);
|
||||
const readEndSec = Math.max(videoInfo.duration, videoInfo.streamDuration ?? 0) + 0.5;
|
||||
|
||||
console.log("[VideoExporter] Original duration:", videoInfo.duration, "s");
|
||||
console.log("[VideoExporter] Effective duration:", effectiveDuration, "s");
|
||||
console.log("[VideoExporter] Total frames to export:", totalFrames);
|
||||
console.log("[VideoExporter] Using streaming decode (web-demuxer + VideoDecoder)");
|
||||
|
||||
const frameDuration = 1_000_000 / this.config.frameRate;
|
||||
let frameIndex = 0;
|
||||
@@ -346,7 +340,7 @@ export class VideoExporter {
|
||||
this.config.videoUrl,
|
||||
this.config.trimRegions,
|
||||
this.config.speedRegions,
|
||||
readEndSec,
|
||||
videoInfo.duration,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user