diff --git a/src/lib/exporter/audioEncoder.ts b/src/lib/exporter/audioEncoder.ts index 490eed2..08cdaf1 100644 --- a/src/lib/exporter/audioEncoder.ts +++ b/src/lib/exporter/audioEncoder.ts @@ -5,6 +5,7 @@ import type { VideoMuxer } from "./muxer"; const AUDIO_BITRATE = 128_000; const DECODE_BACKPRESSURE_LIMIT = 20; const MIN_SPEED_REGION_DELTA_MS = 0.0001; +const SEEK_TIMEOUT_MS = 5_000; export class AudioProcessor { private cancelled = false; @@ -18,9 +19,9 @@ export class AudioProcessor { demuxer: WebDemuxer, muxer: VideoMuxer, videoUrl: string, - trimRegions?: TrimRegion[], - speedRegions?: SpeedRegion[], - readEndSec?: number, + trimRegions: TrimRegion[] | undefined, + speedRegions: SpeedRegion[] | undefined, + validatedDurationSec: number, ): Promise { const sortedTrims = trimRegions ? [...trimRegions].sort((a, b) => a.startMs - b.startMs) : []; const sortedSpeedRegions = speedRegions @@ -35,14 +36,19 @@ export class AudioProcessor { videoUrl, sortedTrims, sortedSpeedRegions, + validatedDurationSec, ); - if (!this.cancelled) { + if (!this.cancelled && renderedAudioBlob.size > 0) { await this.muxRenderedAudioBlob(renderedAudioBlob, muxer); return; } + return; } // No speed edits: keep the original demux/decode/encode path with trim timestamp remap. + // The +0.5s buffer mirrors streamingDecoder.decodeAll's read window so the trim-only + // and speed-aware paths agree on how far to read past the validated duration boundary. + const readEndSec = validatedDurationSec + 0.5; await this.processTrimOnlyAudio(demuxer, muxer, sortedTrims, readEndSec); } @@ -55,7 +61,7 @@ export class AudioProcessor { ): Promise { let audioConfig: AudioDecoderConfig; try { - audioConfig = (await demuxer.getDecoderConfig("audio")) as AudioDecoderConfig; + audioConfig = await demuxer.getDecoderConfig("audio"); } catch { console.warn("[AudioProcessor] No audio track found, skipping"); return; @@ -80,11 +86,10 @@ export class AudioProcessor { typeof readEndSec === "number" && Number.isFinite(readEndSec) ? Math.max(0, readEndSec) : undefined; - const audioStream = ( + const audioStream = safeReadEndSec !== undefined ? demuxer.read("audio", 0, safeReadEndSec) - : demuxer.read("audio") - ) as ReadableStream; + : demuxer.read("audio"); const reader = audioStream.getReader(); try { @@ -187,6 +192,7 @@ export class AudioProcessor { videoUrl: string, trimRegions: TrimRegion[], speedRegions: SpeedRegion[], + validatedDurationSec: number, ): Promise { const media = document.createElement("audio"); media.src = videoUrl; @@ -211,15 +217,44 @@ export class AudioProcessor { const destinationNode = audioContext.createMediaStreamDestination(); sourceNode.connect(destinationNode); - const { recorder, recordedBlobPromise } = this.startAudioRecording(destinationNode.stream); let rafId: number | null = null; + let recorder: MediaRecorder | null = null; + let recordedBlobPromise: Promise | null = null; try { if (audioContext.state === "suspended") { await audioContext.resume(); } - await this.seekTo(media, 0); + // Skip past any initial trim region(s) before recording starts to avoid + // capturing trimmed audio during the first rAF frames of playback. + // Loops to handle back-to-back or overlapping trims at t=0. + const effectiveEnd = validatedDurationSec; + let startPosition = 0; + for (let i = 0; i <= trimRegions.length; i++) { + const activeTrim = this.findActiveTrimRegion(startPosition * 1000, trimRegions); + if (!activeTrim) break; + startPosition = activeTrim.endMs / 1000; + if (startPosition >= effectiveEnd) break; + } + + if (startPosition >= effectiveEnd) { + // All content is trimmed — return silent blob + return new Blob([], { type: "audio/webm" }); + } + + await this.seekTo(media, startPosition); + + // Set initial playback rate for the starting position + const initialSpeedRegion = this.findActiveSpeedRegion(startPosition * 1000, speedRegions); + if (initialSpeedRegion) { + media.playbackRate = initialSpeedRegion.speed; + } + + // Start recording only AFTER seeking past trims + const recording = this.startAudioRecording(destinationNode.stream); + recorder = recording.recorder; + recordedBlobPromise = recording.recordedBlobPromise; await media.play(); await new Promise((resolve, reject) => { @@ -249,24 +284,66 @@ export class AudioProcessor { return; } + // Stop playback at validated duration — browser's media.duration + // may be inflated from bad container metadata. + if (media.currentTime >= validatedDurationSec) { + media.pause(); + cleanup(); + resolve(); + return; + } + const currentTimeMs = media.currentTime * 1000; const activeTrimRegion = this.findActiveTrimRegion(currentTimeMs, trimRegions); if (activeTrimRegion && !media.paused && !media.ended) { const skipToTime = activeTrimRegion.endMs / 1000; - if (skipToTime >= media.duration) { + if (skipToTime >= media.duration || skipToTime >= validatedDurationSec) { media.pause(); cleanup(); resolve(); return; } + // Pause recording during trim seek to prevent capturing + // silence/noise as the audio element seeks. + media.pause(); + if (recorder?.state === "recording") recorder.pause(); + const onSeeked = () => { + clearTimeout(seekTimer); + if (this.cancelled) { + cleanup(); + resolve(); + return; + } + if (recorder?.state === "paused") recorder.resume(); + media + .play() + .then(() => { + if (!this.cancelled) rafId = requestAnimationFrame(tick); + }) + .catch((err) => { + cleanup(); + reject( + new Error( + `Failed to resume playback after trim seek: ${err instanceof Error ? err.message : String(err)}`, + ), + ); + }); + }; + const seekTimer = window.setTimeout(() => { + media.removeEventListener("seeked", onSeeked); + cleanup(); + reject(new Error("Audio seek timed out while skipping trim region")); + }, SEEK_TIMEOUT_MS); + media.addEventListener("seeked", onSeeked, { once: true }); media.currentTime = skipToTime; - } else { - const activeSpeedRegion = this.findActiveSpeedRegion(currentTimeMs, speedRegions); - const playbackRate = activeSpeedRegion ? activeSpeedRegion.speed : 1; - if (Math.abs(media.playbackRate - playbackRate) > 0.0001) { - media.playbackRate = playbackRate; - } + return; + } + + const activeSpeedRegion = this.findActiveSpeedRegion(currentTimeMs, speedRegions); + const playbackRate = activeSpeedRegion ? activeSpeedRegion.speed : 1; + if (Math.abs(media.playbackRate - playbackRate) > 0.0001) { + media.playbackRate = playbackRate; } if (!media.paused && !media.ended) { @@ -286,7 +363,7 @@ export class AudioProcessor { cancelAnimationFrame(rafId); } media.pause(); - if (recorder.state !== "inactive") { + if (recorder && recorder.state !== "inactive") { recorder.stop(); } destinationNode.stream.getTracks().forEach((track) => track.stop()); @@ -297,6 +374,12 @@ export class AudioProcessor { media.load(); } + if (!recordedBlobPromise) { + // Invariant: either an early return above fires, or startAudioRecording ran and + // populated recordedBlobPromise before the playback Promise resolved. Reaching + // here means that contract was broken — fail loud instead of returning silence. + throw new Error("Audio recorder finished without assigning recordedBlobPromise"); + } const recordedBlob = await recordedBlobPromise; if (this.cancelled) { throw new Error("Export cancelled"); @@ -314,8 +397,8 @@ export class AudioProcessor { try { await demuxer.load(file); - const audioConfig = (await demuxer.getDecoderConfig("audio")) as AudioDecoderConfig; - const reader = (demuxer.read("audio") as ReadableStream).getReader(); + const audioConfig = await demuxer.getDecoderConfig("audio"); + const reader = demuxer.read("audio").getReader(); let isFirstChunk = true; try { diff --git a/src/lib/exporter/streamingDecoder.test.ts b/src/lib/exporter/streamingDecoder.test.ts index 1969c84..55b9123 100644 --- a/src/lib/exporter/streamingDecoder.test.ts +++ b/src/lib/exporter/streamingDecoder.test.ts @@ -1,5 +1,44 @@ import { describe, expect, it } from "vitest"; -import { shouldFailDecodeEndedEarly } from "./streamingDecoder"; +import { shouldFailDecodeEndedEarly, validateDuration } from "./streamingDecoder"; + +describe("validateDuration", () => { + it("returns scanned duration when container reports Infinity", () => { + expect(validateDuration(Infinity, 15.3)).toBe(15.3); + }); + + it("returns scanned duration when container reports 0", () => { + expect(validateDuration(0, 15.3)).toBe(15.3); + }); + + it("returns scanned duration when container reports NaN", () => { + expect(validateDuration(NaN, 15.3)).toBe(15.3); + }); + + it("returns scanned duration when container is inflated beyond threshold", () => { + expect(validateDuration(42, 15.3)).toBe(15.3); + }); + + it("returns container duration when values are close", () => { + expect(validateDuration(15.5, 15.3)).toBe(15.5); + }); + + it("returns container duration when scanned is slightly higher", () => { + // container < scanned (scanned overshoot from last frame duration) + expect(validateDuration(15.0, 15.3)).toBe(15.0); + }); + + it("returns scanned duration when container under-reports beyond threshold", () => { + expect(validateDuration(10, 15.3)).toBe(15.3); + }); + + it("returns container duration when scanned is zero (corrupted/empty file)", () => { + expect(validateDuration(10, 0)).toBe(10); + }); + + it("returns 0 when both container is NaN and scanned is zero", () => { + expect(validateDuration(NaN, 0)).toBe(0); + }); +}); describe("shouldFailDecodeEndedEarly", () => { it("does not fail once every segment has been satisfied", () => { diff --git a/src/lib/exporter/streamingDecoder.ts b/src/lib/exporter/streamingDecoder.ts index 651a557..00d9f0b 100644 --- a/src/lib/exporter/streamingDecoder.ts +++ b/src/lib/exporter/streamingDecoder.ts @@ -70,6 +70,37 @@ type EarlyDecodeEndCheck = { const EARLY_DECODE_END_THRESHOLD_SEC = 1; const METADATA_TAIL_TOLERANCE_SEC = 1.5; const STREAM_DURATION_MATCH_TOLERANCE_SEC = 0.25; +const DURATION_DIVERGENCE_THRESHOLD_SEC = 1.5; +// Fallback upper bound for the packet scan when no reliable duration hint is +// available. Explicit end is required (some containers are truncated without +// one), but the hint-derived bound would cap the scan prematurely when +// container/stream duration are missing or corrupt. +const SCAN_UNBOUNDED_FALLBACK_SEC = 24 * 60 * 60; + +/** + * Validate container duration against actual packet timestamps. + * + * Chrome/Electron's MediaRecorder writes WebM containers with unreliable + * Duration fields (often Infinity, 0, or inflated) — especially on Linux. + * This function picks the most trustworthy duration value. + * + * @param containerDuration Duration from the container-level metadata + * @param scannedDuration Duration derived from actual packet timestamps (ground truth) + */ +export function validateDuration(containerDuration: number, scannedDuration: number): number { + if (scannedDuration <= 0) { + // Zero scanned duration means corrupted/empty file — fall back to container + // (downstream shouldFailDecodeEndedEarly will catch truly empty files) + return Number.isFinite(containerDuration) ? Math.max(containerDuration, 0) : 0; + } + if (!Number.isFinite(containerDuration) || containerDuration <= 0) { + return scannedDuration; + } + if (Math.abs(containerDuration - scannedDuration) > DURATION_DIVERGENCE_THRESHOLD_SEC) { + return scannedDuration; + } + return containerDuration; +} export function shouldFailDecodeEndedEarly({ cancelled, @@ -201,10 +232,43 @@ export class StreamingVideoDecoder { const audioStream = mediaInfo.streams.find((s) => s.codec_type_string === "audio"); + // Scan video packets to find the true content boundary. + // MediaRecorder (especially on Linux) writes unreliable container durations. + // Packet timestamps are ground truth — no decode needed, just timestamp reads. + // Pass explicit range because some containers are truncated without one. + // Sanitize because mediaInfo.duration can be NaN/Infinity (Chromium Linux bug), + // which would propagate into demuxer.read() as an invalid endpoint. + const containerDurationSec = Number.isFinite(mediaInfo.duration) ? mediaInfo.duration : 0; + const streamDurationSec = + typeof videoStream?.duration === "number" && Number.isFinite(videoStream.duration) + ? videoStream.duration + : 0; + const hintedDurationSec = Math.max(containerDurationSec, streamDurationSec, 0); + const scanEndSec = + hintedDurationSec > 0 ? hintedDurationSec + 0.5 : SCAN_UNBOUNDED_FALLBACK_SEC; + let maxPacketEndUs = 0; + const scanReader = this.demuxer.read("video", 0, scanEndSec).getReader(); + try { + while (true) { + const { done, value } = await scanReader.read(); + if (done || !value) break; + const endUs = value.timestamp + (value.duration ?? 0); + if (endUs > maxPacketEndUs) maxPacketEndUs = endUs; + } + } finally { + try { + await scanReader.cancel(); + } catch { + /* already closed */ + } + } + const scannedDuration = maxPacketEndUs / 1_000_000; + const validatedDuration = validateDuration(mediaInfo.duration, scannedDuration); + this.metadata = { width: videoStream?.width || 1920, height: videoStream?.height || 1080, - duration: mediaInfo.duration, + duration: validatedDuration, streamDuration: typeof videoStream?.duration === "number" && Number.isFinite(videoStream.duration) ? videoStream.duration @@ -305,7 +369,7 @@ export class StreamingVideoDecoder { // One forward stream through the whole file. // Pass explicit range because some containers are truncated when no end is provided. - const readEndSec = Math.max(this.metadata.duration, this.metadata.streamDuration ?? 0) + 0.5; + const readEndSec = this.metadata.duration + 0.5; const reader = this.demuxer.read("video", 0, readEndSec).getReader(); // Feed chunks to decoder in background with backpressure diff --git a/src/lib/exporter/videoExporter.ts b/src/lib/exporter/videoExporter.ts index dcfcc3e..d007b30 100644 --- a/src/lib/exporter/videoExporter.ts +++ b/src/lib/exporter/videoExporter.ts @@ -157,17 +157,11 @@ export class VideoExporter { this.muxer = muxer; await muxer.initialize(); - const { effectiveDuration, totalFrames } = streamingDecoder.getExportMetrics( + const { totalFrames } = streamingDecoder.getExportMetrics( this.config.frameRate, this.config.trimRegions, this.config.speedRegions, ); - const readEndSec = Math.max(videoInfo.duration, videoInfo.streamDuration ?? 0) + 0.5; - - console.log("[VideoExporter] Original duration:", videoInfo.duration, "s"); - console.log("[VideoExporter] Effective duration:", effectiveDuration, "s"); - console.log("[VideoExporter] Total frames to export:", totalFrames); - console.log("[VideoExporter] Using streaming decode (web-demuxer + VideoDecoder)"); const frameDuration = 1_000_000 / this.config.frameRate; let frameIndex = 0; @@ -346,7 +340,7 @@ export class VideoExporter { this.config.videoUrl, this.config.trimRegions, this.config.speedRegions, - readEndSec, + videoInfo.duration, ); } }