372 lines
10 KiB
TypeScript
372 lines
10 KiB
TypeScript
import { WebDemuxer } from "web-demuxer";
|
|
import type { SpeedRegion, TrimRegion } from "@/components/video-editor/types";
|
|
|
|
export interface DecodedVideoInfo {
|
|
width: number;
|
|
height: number;
|
|
duration: number; // seconds
|
|
frameRate: number;
|
|
codec: string;
|
|
hasAudio: boolean;
|
|
audioCodec?: string;
|
|
}
|
|
|
|
/** Caller must close the VideoFrame after use. */
|
|
type OnFrameCallback = (
|
|
frame: VideoFrame,
|
|
exportTimestampUs: number,
|
|
sourceTimestampMs: number,
|
|
) => Promise<void>;
|
|
|
|
/**
|
|
* Decodes video frames via web-demuxer + VideoDecoder in a single forward pass.
|
|
* Way faster than seeking an HTMLVideoElement per frame.
|
|
*
|
|
* Frames in trimmed regions are decoded (needed for P/B-frame state) but discarded.
|
|
* Non-trimmed frames get buffered per segment and resampled to the target frame rate.
|
|
*/
|
|
export class StreamingVideoDecoder {
|
|
private demuxer: WebDemuxer | null = null;
|
|
private decoder: VideoDecoder | null = null;
|
|
private cancelled = false;
|
|
private metadata: DecodedVideoInfo | null = null;
|
|
|
|
async loadMetadata(videoUrl: string): Promise<DecodedVideoInfo> {
|
|
const response = await fetch(videoUrl);
|
|
const blob = await response.blob();
|
|
const filename = videoUrl.split("/").pop() || "video";
|
|
const file = new File([blob], filename, { type: blob.type });
|
|
|
|
// Relative URL so it resolves correctly in both dev (http) and packaged (file://) builds
|
|
const wasmUrl = new URL("./wasm/web-demuxer.wasm", window.location.href).href;
|
|
this.demuxer = new WebDemuxer({ wasmFilePath: wasmUrl });
|
|
await this.demuxer.load(file);
|
|
|
|
const mediaInfo = await this.demuxer.getMediaInfo();
|
|
const videoStream = mediaInfo.streams.find((s) => s.codec_type_string === "video");
|
|
|
|
let frameRate = 60;
|
|
if (videoStream?.avg_frame_rate) {
|
|
const parts = videoStream.avg_frame_rate.split("/");
|
|
if (parts.length === 2) {
|
|
const num = parseInt(parts[0], 10);
|
|
const den = parseInt(parts[1], 10);
|
|
if (den > 0 && num > 0) frameRate = num / den;
|
|
}
|
|
}
|
|
|
|
const audioStream = mediaInfo.streams.find((s) => s.codec_type_string === "audio");
|
|
|
|
this.metadata = {
|
|
width: videoStream?.width || 1920,
|
|
height: videoStream?.height || 1080,
|
|
duration: mediaInfo.duration,
|
|
frameRate,
|
|
codec: videoStream?.codec_string || "unknown",
|
|
hasAudio: !!audioStream,
|
|
audioCodec: audioStream?.codec_string,
|
|
};
|
|
|
|
return this.metadata;
|
|
}
|
|
|
|
async decodeAll(
|
|
targetFrameRate: number,
|
|
trimRegions: TrimRegion[] | undefined,
|
|
speedRegions: SpeedRegion[] | undefined,
|
|
onFrame: OnFrameCallback,
|
|
): Promise<void> {
|
|
if (!this.demuxer || !this.metadata) {
|
|
throw new Error("Must call loadMetadata() before decodeAll()");
|
|
}
|
|
|
|
const decoderConfig = await this.demuxer.getDecoderConfig("video");
|
|
const segments = this.splitBySpeed(
|
|
this.computeSegments(this.metadata.duration, trimRegions),
|
|
speedRegions,
|
|
);
|
|
const frameDurationUs = 1_000_000 / targetFrameRate;
|
|
|
|
// Async frame queue — decoder pushes, consumer pulls
|
|
const pendingFrames: VideoFrame[] = [];
|
|
let frameResolve: ((frame: VideoFrame | null) => void) | null = null;
|
|
let decodeError: Error | null = null;
|
|
let decodeDone = false;
|
|
|
|
this.decoder = new VideoDecoder({
|
|
output: (frame: VideoFrame) => {
|
|
if (frameResolve) {
|
|
const resolve = frameResolve;
|
|
frameResolve = null;
|
|
resolve(frame);
|
|
} else {
|
|
pendingFrames.push(frame);
|
|
}
|
|
},
|
|
error: (e: DOMException) => {
|
|
decodeError = new Error(`VideoDecoder error: ${e.message}`);
|
|
if (frameResolve) {
|
|
const resolve = frameResolve;
|
|
frameResolve = null;
|
|
resolve(null);
|
|
}
|
|
},
|
|
});
|
|
this.decoder.configure(decoderConfig);
|
|
|
|
const getNextFrame = (): Promise<VideoFrame | null> => {
|
|
if (decodeError) throw decodeError;
|
|
if (pendingFrames.length > 0) return Promise.resolve(pendingFrames.shift()!);
|
|
if (decodeDone) return Promise.resolve(null);
|
|
return new Promise((resolve) => {
|
|
frameResolve = resolve;
|
|
});
|
|
};
|
|
|
|
// One forward stream through the whole file
|
|
const reader = this.demuxer.read("video").getReader();
|
|
|
|
// Feed chunks to decoder in background with backpressure
|
|
const feedPromise = (async () => {
|
|
try {
|
|
while (!this.cancelled) {
|
|
const { done, value: chunk } = await reader.read();
|
|
if (done || !chunk) break;
|
|
|
|
while (this.decoder!.decodeQueueSize > 10 && !this.cancelled) {
|
|
await new Promise((resolve) => setTimeout(resolve, 1));
|
|
}
|
|
if (this.cancelled) break;
|
|
|
|
this.decoder!.decode(chunk);
|
|
}
|
|
|
|
if (!this.cancelled && this.decoder!.state === "configured") {
|
|
await this.decoder!.flush();
|
|
}
|
|
} catch (e) {
|
|
decodeError = e instanceof Error ? e : new Error(String(e));
|
|
} finally {
|
|
decodeDone = true;
|
|
if (frameResolve) {
|
|
const resolve = frameResolve;
|
|
frameResolve = null;
|
|
resolve(null);
|
|
}
|
|
}
|
|
})();
|
|
|
|
// Route decoded frames into segments by timestamp, then deliver with VFR→CFR resampling
|
|
let segmentIdx = 0;
|
|
let exportFrameIndex = 0;
|
|
let segmentBuffer: VideoFrame[] = [];
|
|
|
|
while (!this.cancelled && segmentIdx < segments.length) {
|
|
const frame = await getNextFrame();
|
|
if (!frame) break;
|
|
|
|
const frameTimeSec = frame.timestamp / 1_000_000;
|
|
const currentSegment = segments[segmentIdx];
|
|
|
|
// Before current segment — trimmed or pre-video
|
|
if (frameTimeSec < currentSegment.startSec - 0.001) {
|
|
frame.close();
|
|
continue;
|
|
}
|
|
|
|
// Past current segment — flush buffer and advance
|
|
if (frameTimeSec >= currentSegment.endSec - 0.001) {
|
|
exportFrameIndex = await this.deliverSegment(
|
|
segmentBuffer,
|
|
currentSegment,
|
|
targetFrameRate,
|
|
frameDurationUs,
|
|
exportFrameIndex,
|
|
onFrame,
|
|
);
|
|
for (const f of segmentBuffer) f.close();
|
|
segmentBuffer = [];
|
|
|
|
segmentIdx++;
|
|
while (
|
|
segmentIdx < segments.length &&
|
|
frameTimeSec >= segments[segmentIdx].endSec - 0.001
|
|
) {
|
|
segmentIdx++;
|
|
}
|
|
|
|
if (segmentIdx < segments.length && frameTimeSec >= segments[segmentIdx].startSec - 0.001) {
|
|
segmentBuffer.push(frame);
|
|
} else {
|
|
frame.close();
|
|
}
|
|
continue;
|
|
}
|
|
|
|
segmentBuffer.push(frame);
|
|
}
|
|
|
|
// Flush last segment
|
|
if (segmentBuffer.length > 0 && segmentIdx < segments.length) {
|
|
exportFrameIndex = await this.deliverSegment(
|
|
segmentBuffer,
|
|
segments[segmentIdx],
|
|
targetFrameRate,
|
|
frameDurationUs,
|
|
exportFrameIndex,
|
|
onFrame,
|
|
);
|
|
for (const f of segmentBuffer) f.close();
|
|
}
|
|
|
|
// Drain leftover decoded frames
|
|
while (!decodeDone) {
|
|
const frame = await getNextFrame();
|
|
if (!frame) break;
|
|
frame.close();
|
|
}
|
|
|
|
try {
|
|
reader.cancel();
|
|
} catch {
|
|
/* already closed */
|
|
}
|
|
await feedPromise;
|
|
for (const f of pendingFrames) f.close();
|
|
pendingFrames.length = 0;
|
|
|
|
if (this.decoder?.state === "configured") {
|
|
this.decoder.close();
|
|
}
|
|
this.decoder = null;
|
|
}
|
|
|
|
/**
|
|
* Resample buffered frames to fill the target frame count for this segment.
|
|
* Handles VFR sources by duplicating/decimating as needed.
|
|
*/
|
|
private async deliverSegment(
|
|
frames: VideoFrame[],
|
|
segment: { startSec: number; endSec: number; speed: number },
|
|
targetFrameRate: number,
|
|
frameDurationUs: number,
|
|
startExportFrameIndex: number,
|
|
onFrame: OnFrameCallback,
|
|
): Promise<number> {
|
|
if (frames.length === 0) return startExportFrameIndex;
|
|
|
|
const segmentFrameCount = Math.ceil(
|
|
((segment.endSec - segment.startSec) / segment.speed) * targetFrameRate,
|
|
);
|
|
let exportFrameIndex = startExportFrameIndex;
|
|
|
|
for (let i = 0; i < segmentFrameCount && !this.cancelled; i++) {
|
|
const sourceIdx = Math.min(
|
|
Math.floor((i * frames.length) / segmentFrameCount),
|
|
frames.length - 1,
|
|
);
|
|
const sourceFrame = frames[sourceIdx];
|
|
const clone = new VideoFrame(sourceFrame, { timestamp: sourceFrame.timestamp });
|
|
await onFrame(clone, exportFrameIndex * frameDurationUs, sourceFrame.timestamp / 1000);
|
|
exportFrameIndex++;
|
|
}
|
|
|
|
return exportFrameIndex;
|
|
}
|
|
|
|
private computeSegments(
|
|
totalDuration: number,
|
|
trimRegions?: TrimRegion[],
|
|
): Array<{ startSec: number; endSec: number }> {
|
|
if (!trimRegions || trimRegions.length === 0) {
|
|
return [{ startSec: 0, endSec: totalDuration }];
|
|
}
|
|
|
|
const sorted = [...trimRegions].sort((a, b) => a.startMs - b.startMs);
|
|
const segments: Array<{ startSec: number; endSec: number }> = [];
|
|
let cursor = 0;
|
|
|
|
for (const trim of sorted) {
|
|
const trimStart = trim.startMs / 1000;
|
|
const trimEnd = trim.endMs / 1000;
|
|
if (cursor < trimStart) {
|
|
segments.push({ startSec: cursor, endSec: trimStart });
|
|
}
|
|
cursor = trimEnd;
|
|
}
|
|
|
|
if (cursor < totalDuration) {
|
|
segments.push({ startSec: cursor, endSec: totalDuration });
|
|
}
|
|
|
|
return segments;
|
|
}
|
|
|
|
getEffectiveDuration(trimRegions?: TrimRegion[], speedRegions?: SpeedRegion[]): number {
|
|
if (!this.metadata) throw new Error("Must call loadMetadata() first");
|
|
const trimSegments = this.computeSegments(this.metadata.duration, trimRegions);
|
|
const speedSegments = this.splitBySpeed(trimSegments, speedRegions);
|
|
return speedSegments.reduce((sum, seg) => sum + (seg.endSec - seg.startSec) / seg.speed, 0);
|
|
}
|
|
|
|
private splitBySpeed(
|
|
segments: Array<{ startSec: number; endSec: number }>,
|
|
speedRegions?: SpeedRegion[],
|
|
): Array<{ startSec: number; endSec: number; speed: number }> {
|
|
if (!speedRegions || speedRegions.length === 0)
|
|
return segments.map((s) => ({ ...s, speed: 1 }));
|
|
|
|
const result: Array<{ startSec: number; endSec: number; speed: number }> = [];
|
|
for (const segment of segments) {
|
|
const overlapping = speedRegions
|
|
.filter((sr) => sr.startMs / 1000 < segment.endSec && sr.endMs / 1000 > segment.startSec)
|
|
.sort((a, b) => a.startMs - b.startMs);
|
|
|
|
if (overlapping.length === 0) {
|
|
result.push({ ...segment, speed: 1 });
|
|
continue;
|
|
}
|
|
|
|
let cursor = segment.startSec;
|
|
for (const sr of overlapping) {
|
|
const srStart = Math.max(sr.startMs / 1000, segment.startSec);
|
|
const srEnd = Math.min(sr.endMs / 1000, segment.endSec);
|
|
if (cursor < srStart) result.push({ startSec: cursor, endSec: srStart, speed: 1 });
|
|
result.push({ startSec: srStart, endSec: srEnd, speed: sr.speed });
|
|
cursor = srEnd;
|
|
}
|
|
if (cursor < segment.endSec)
|
|
result.push({ startSec: cursor, endSec: segment.endSec, speed: 1 });
|
|
}
|
|
return result.filter((s) => s.endSec - s.startSec > 0.0001);
|
|
}
|
|
|
|
getDemuxer(): WebDemuxer | null {
|
|
return this.demuxer;
|
|
}
|
|
|
|
cancel(): void {
|
|
this.cancelled = true;
|
|
}
|
|
|
|
destroy(): void {
|
|
this.cancelled = true;
|
|
|
|
if (this.decoder) {
|
|
try {
|
|
if (this.decoder.state === "configured") this.decoder.close();
|
|
} catch {
|
|
/* ignore */
|
|
}
|
|
this.decoder = null;
|
|
}
|
|
|
|
if (this.demuxer) {
|
|
try {
|
|
this.demuxer.destroy();
|
|
} catch {}
|
|
this.demuxer = null;
|
|
}
|
|
}
|
|
}
|