Fix native Windows webcam sidecar capture

Record browser webcam sidecar when native Windows capture is active.

Add native webcam sidecar output and DirectShow NV12/YUY2 fallback.

Sample exported webcam frames by source timestamp.
This commit is contained in:
EtienneLescot
2026-05-22 11:20:35 +02:00
parent 9f7f498e22
commit ef5855f1f4
13 changed files with 618 additions and 111 deletions
@@ -5,22 +5,18 @@
#include <wrl/client.h>
#include <algorithm>
#include <array>
#include <chrono>
#include <exception>
#include <iomanip>
#include <iostream>
#include <sstream>
namespace {
const CLSID CLSID_SampleGrabberLocal = {0xC1F400A0, 0x3F08, 0x11D3, {0x9F, 0x0B, 0x00, 0x60, 0x08, 0x03, 0x9E, 0x37}};
const CLSID CLSID_NullRendererLocal = {0xC1F400A4, 0x3F08, 0x11D3, {0x9F, 0x0B, 0x00, 0x60, 0x08, 0x03, 0x9E, 0x37}};
MIDL_INTERFACE("0579154A-2B53-4994-B0D0-E773148EFF85")
ISampleGrabberCB : public IUnknown {
public:
virtual HRESULT STDMETHODCALLTYPE SampleCB(double sampleTime, IMediaSample* sample) = 0;
virtual HRESULT STDMETHODCALLTYPE BufferCB(double sampleTime, BYTE* buffer, long bufferLength) = 0;
};
MIDL_INTERFACE("6B652FFF-11FE-4FCE-92AD-0266B5D7C78F")
ISampleGrabber : public IUnknown {
public:
@@ -30,7 +26,7 @@ public:
virtual HRESULT STDMETHODCALLTYPE SetBufferSamples(BOOL bufferThem) = 0;
virtual HRESULT STDMETHODCALLTYPE GetCurrentBuffer(long* bufferSize, long* buffer) = 0;
virtual HRESULT STDMETHODCALLTYPE GetCurrentSample(IMediaSample** sample) = 0;
virtual HRESULT STDMETHODCALLTYPE SetCallback(ISampleGrabberCB* callback, long whichMethodToCallback) = 0;
virtual HRESULT STDMETHODCALLTYPE SetCallback(IUnknown* callback, long whichMethodToCallback) = 0;
};
bool succeeded(HRESULT hr, const char* label) {
@@ -43,6 +39,34 @@ bool succeeded(HRESULT hr, const char* label) {
return false;
}
std::string guidToString(const GUID& guid) {
if (guid == MEDIASUBTYPE_RGB32) {
return "RGB32";
}
if (guid == MEDIASUBTYPE_YUY2) {
return "YUY2";
}
if (guid == MEDIASUBTYPE_NV12) {
return "NV12";
}
std::ostringstream stream;
stream << std::hex << std::setfill('0')
<< '{' << std::setw(8) << guid.Data1
<< '-' << std::setw(4) << guid.Data2
<< '-' << std::setw(4) << guid.Data3
<< '-';
for (int index = 0; index < 2; index += 1) {
stream << std::setw(2) << static_cast<int>(guid.Data4[index]);
}
stream << '-';
for (int index = 2; index < 8; index += 1) {
stream << std::setw(2) << static_cast<int>(guid.Data4[index]);
}
stream << '}';
return stream.str();
}
void freeMediaType(AM_MEDIA_TYPE& type) {
if (type.cbFormat != 0) {
CoTaskMemFree(type.pbFormat);
@@ -55,6 +79,20 @@ void freeMediaType(AM_MEDIA_TYPE& type) {
}
}
BYTE clampToByte(int value) {
return static_cast<BYTE>(std::clamp(value, 0, 255));
}
std::array<BYTE, 3> yuvToBgr(int y, int u, int v) {
const int c = y - 16;
const int d = u - 128;
const int e = v - 128;
const int blue = (298 * c + 516 * d + 128) >> 8;
const int green = (298 * c - 100 * d - 208 * e + 128) >> 8;
const int red = (298 * c + 409 * e + 128) >> 8;
return {clampToByte(blue), clampToByte(green), clampToByte(red)};
}
} // namespace
struct DirectShowWebcamCapture::Impl {
@@ -137,9 +175,8 @@ bool DirectShowWebcamCapture::initialize(
AM_MEDIA_TYPE requestedType{};
requestedType.majortype = MEDIATYPE_Video;
requestedType.subtype = MEDIASUBTYPE_RGB32;
requestedType.formattype = FORMAT_VideoInfo;
if (!succeeded(impl_->sampleGrabber->SetMediaType(&requestedType), "SetMediaType(DirectShow RGB32)")) {
if (!succeeded(impl_->sampleGrabber->SetMediaType(&requestedType), "SetMediaType(DirectShow video)")) {
return false;
}
@@ -170,17 +207,40 @@ bool DirectShowWebcamCapture::initialize(
if (!succeeded(impl_->sampleGrabber->GetConnectedMediaType(&connectedType), "GetConnectedMediaType(DirectShow webcam)")) {
return false;
}
if (connectedType.subtype == MEDIASUBTYPE_YUY2) {
pixelFormat_ = PixelFormat::Yuy2;
} else if (connectedType.subtype == MEDIASUBTYPE_NV12) {
pixelFormat_ = PixelFormat::Nv12;
} else if (connectedType.subtype == MEDIASUBTYPE_RGB32) {
pixelFormat_ = PixelFormat::Bgra;
} else {
std::cerr << "ERROR: Unsupported DirectShow webcam media subtype "
<< guidToString(connectedType.subtype) << std::endl;
freeMediaType(connectedType);
return false;
}
if (connectedType.formattype == FORMAT_VideoInfo && connectedType.pbFormat) {
const auto* videoInfo = reinterpret_cast<VIDEOINFOHEADER*>(connectedType.pbFormat);
width_ = std::abs(videoInfo->bmiHeader.biWidth);
height_ = std::abs(videoInfo->bmiHeader.biHeight);
sourceTopDown_ = videoInfo->bmiHeader.biHeight < 0;
const int bitsPerPixel = videoInfo->bmiHeader.biBitCount > 0 ? videoInfo->bmiHeader.biBitCount : 16;
if (pixelFormat_ == PixelFormat::Nv12) {
sourceStride_ = ((width_ + 3) / 4) * 4;
} else {
sourceStride_ = ((width_ * bitsPerPixel + 31) / 32) * 4;
}
sourceTopDown_ = pixelFormat_ != PixelFormat::Bgra || videoInfo->bmiHeader.biHeight < 0;
}
std::cerr << "INFO: DirectShow webcam connected subtype " << guidToString(connectedType.subtype)
<< " " << width_ << "x" << height_ << " stride=" << sourceStride_ << std::endl;
freeMediaType(connectedType);
if (width_ <= 0 || height_ <= 0) {
width_ = requestedWidth > 0 ? requestedWidth : 1280;
height_ = requestedHeight > 0 ? requestedHeight : 720;
}
if (sourceStride_ <= 0) {
sourceStride_ = pixelFormat_ == PixelFormat::Bgra ? width_ * 4 : ((width_ + 3) / 4) * 4;
}
impl_->sampleGrabber->SetBufferSamples(TRUE);
impl_->sampleGrabber->SetOneShot(FALSE);
@@ -262,36 +322,88 @@ void DirectShowWebcamCapture::captureLoop() {
}
void DirectShowWebcamCapture::storeFrame(const BYTE* buffer, long length) {
const int stride = width_ * 4;
const int expectedLength = stride * height_;
const int destinationStride = width_ * 4;
const int sourceStride = sourceStride_ > 0 ? sourceStride_ : destinationStride;
const int expectedLength = pixelFormat_ == PixelFormat::Nv12
? sourceStride * height_ + sourceStride * ((height_ + 1) / 2)
: sourceStride * height_;
if (!buffer || length < expectedLength || width_ <= 0 || height_ <= 0) {
return;
}
std::vector<BYTE> frame(static_cast<size_t>(expectedLength));
std::vector<BYTE> frame(static_cast<size_t>(destinationStride * height_));
for (int y = 0; y < height_; y += 1) {
const int sourceY = sourceTopDown_ ? y : height_ - 1 - y;
const BYTE* source = buffer + sourceY * stride;
BYTE* destination = frame.data() + y * stride;
std::copy(source, source + stride, destination);
for (int x = 0; x < width_; x += 1) {
destination[x * 4 + 3] = 255;
const BYTE* source = buffer + sourceY * sourceStride;
BYTE* destination = frame.data() + y * destinationStride;
if (pixelFormat_ == PixelFormat::Bgra) {
std::copy(source, source + destinationStride, destination);
for (int x = 0; x < width_; x += 1) {
destination[x * 4 + 3] = 255;
}
continue;
}
if (pixelFormat_ == PixelFormat::Nv12) {
const BYTE* yPlane = buffer + sourceY * sourceStride;
const BYTE* uvPlane = buffer + sourceStride * height_ + (sourceY / 2) * sourceStride;
for (int x = 0; x < width_; x += 1) {
const int uvX = (x / 2) * 2;
const auto color = yuvToBgr(yPlane[x], uvPlane[uvX], uvPlane[uvX + 1]);
BYTE* pixel = destination + x * 4;
pixel[0] = color[0];
pixel[1] = color[1];
pixel[2] = color[2];
pixel[3] = 255;
}
continue;
}
for (int x = 0; x + 1 < width_; x += 2) {
const BYTE y0 = source[x * 2];
const BYTE u = source[x * 2 + 1];
const BYTE y1 = source[x * 2 + 2];
const BYTE v = source[x * 2 + 3];
const auto first = yuvToBgr(y0, u, v);
const auto second = yuvToBgr(y1, u, v);
BYTE* firstPixel = destination + x * 4;
BYTE* secondPixel = firstPixel + 4;
firstPixel[0] = first[0];
firstPixel[1] = first[1];
firstPixel[2] = first[2];
firstPixel[3] = 255;
secondPixel[0] = second[0];
secondPixel[1] = second[1];
secondPixel[2] = second[2];
secondPixel[3] = 255;
}
if (width_ % 2 == 1) {
const int x = width_ - 1;
const BYTE* pair = source + (x - 1) * 2;
const auto color = yuvToBgr(pair[2], pair[1], pair[3]);
BYTE* pixel = destination + x * 4;
pixel[0] = color[0];
pixel[1] = color[1];
pixel[2] = color[2];
pixel[3] = 255;
}
}
std::scoped_lock lock(frameMutex_);
latestFrame_ = std::move(frame);
latestFrameSequence_ += 1;
}
bool DirectShowWebcamCapture::copyLatestFrame(std::vector<BYTE>& destination, int& width, int& height) {
bool DirectShowWebcamCapture::copyLatestFrame(WebcamFrameSnapshot& destination) {
std::scoped_lock lock(frameMutex_);
if (latestFrame_.empty() || width_ <= 0 || height_ <= 0) {
return false;
}
destination = latestFrame_;
width = width_;
height = height_;
destination.data = latestFrame_;
destination.width = width_;
destination.height = height_;
destination.sequence = latestFrameSequence_;
return true;
}
@@ -3,11 +3,19 @@
#include <Windows.h>
#include <atomic>
#include <cstdint>
#include <mutex>
#include <string>
#include <thread>
#include <vector>
struct WebcamFrameSnapshot {
std::vector<BYTE> data;
int width = 0;
int height = 0;
uint64_t sequence = 0;
};
class DirectShowWebcamCapture {
public:
DirectShowWebcamCapture() = default;
@@ -25,7 +33,7 @@ public:
int requestedFps);
bool start();
void stop();
bool copyLatestFrame(std::vector<BYTE>& destination, int& width, int& height);
bool copyLatestFrame(WebcamFrameSnapshot& destination);
int width() const;
int height() const;
@@ -34,6 +42,12 @@ public:
void storeFrame(const BYTE* buffer, long length);
private:
enum class PixelFormat {
Bgra,
Nv12,
Yuy2,
};
struct Impl;
void captureLoop();
@@ -42,9 +56,12 @@ private:
std::atomic<bool> stopRequested_ = false;
std::mutex frameMutex_;
std::vector<BYTE> latestFrame_;
uint64_t latestFrameSequence_ = 0;
int width_ = 0;
int height_ = 0;
int fps_ = 30;
int sourceStride_ = 0;
bool sourceTopDown_ = false;
PixelFormat pixelFormat_ = PixelFormat::Bgra;
std::wstring selectedDeviceName_;
};
+62 -18
View File
@@ -29,6 +29,7 @@ struct CaptureConfig {
std::string sourceId;
std::string windowHandle;
std::string outputPath;
std::string webcamOutputPath;
int fps = 60;
int width = 0;
int height = 0;
@@ -311,6 +312,7 @@ bool parseConfig(const std::string& json, CaptureConfig& config) {
config.webcamDeviceId = findString(json, "webcamDeviceId");
config.webcamDeviceName = findString(json, "webcamDeviceName");
config.webcamDirectShowClsid = findString(json, "webcamDirectShowClsid");
config.webcamOutputPath = findString(json, "webcamPath");
config.webcamWidth = findInt(json, "webcamWidth", 0);
config.webcamHeight = findInt(json, "webcamHeight", 0);
config.webcamFps = findInt(json, "webcamFps", 0);
@@ -389,6 +391,7 @@ int main(int argc, char* argv[]) {
WebcamCapture webcamCapture;
bool webcamActive = false;
bool writeSeparateWebcam = false;
if (config.webcamEnabled) {
if (!webcamCapture.initialize(
utf8ToWide(config.webcamDeviceId),
@@ -405,6 +408,7 @@ int main(int argc, char* argv[]) {
<< ",\"fps\":" << webcamCapture.fps()
<< ",\"deviceName\":\"" << jsonEscape(wideToUtf8(webcamCapture.selectedDeviceName()))
<< "\"}" << std::endl;
writeSeparateWebcam = !config.webcamOutputPath.empty();
}
WasapiLoopbackCapture loopbackCapture;
@@ -466,6 +470,24 @@ int main(int argc, char* argv[]) {
return 1;
}
MFEncoder webcamEncoder;
if (writeSeparateWebcam) {
const int webcamPixels = std::max(1, webcamCapture.width()) * std::max(1, webcamCapture.height());
const int webcamBitrate = webcamPixels >= 1280 * 720 ? 8'000'000 : 4'000'000;
if (!webcamEncoder.initialize(
utf8ToWide(config.webcamOutputPath),
webcamCapture.width(),
webcamCapture.height(),
webcamCapture.fps(),
webcamBitrate,
session.device(),
session.context(),
nullptr)) {
std::cerr << "ERROR: Failed to initialize native webcam encoder" << std::endl;
return 1;
}
}
std::mutex mutex;
std::condition_variable cv;
std::atomic<bool> stopRequested = false;
@@ -477,6 +499,7 @@ int main(int argc, char* argv[]) {
std::vector<BYTE> latestWebcamFrame;
int latestWebcamWidth = 0;
int latestWebcamHeight = 0;
uint64_t latestWebcamSequence = 0;
bool hasVisibleWebcamFrame = false;
session.setFrameCallback([&](ID3D11Texture2D* texture, int64_t timestampHns) {
@@ -509,20 +532,22 @@ int main(int argc, char* argv[]) {
auto writeVideoFrames = [&]() {
const auto startedAt = std::chrono::steady_clock::now();
uint64_t frameIndex = 0;
uint64_t lastWrittenWebcamSequence = 0;
uint64_t webcamOutputFrameIndex = 0;
int64_t lastEncodedVideoTimestampHns = -1;
while (!stopRequested && !encodeFailed) {
{
std::scoped_lock lock(mutex);
if (webcamActive) {
std::vector<BYTE> candidateWebcamFrame;
int candidateWebcamWidth = 0;
int candidateWebcamHeight = 0;
if (webcamCapture.copyLatestFrame(candidateWebcamFrame, candidateWebcamWidth, candidateWebcamHeight) &&
hasVisibleBgraContent(candidateWebcamFrame)) {
latestWebcamFrame = std::move(candidateWebcamFrame);
latestWebcamWidth = candidateWebcamWidth;
latestWebcamHeight = candidateWebcamHeight;
WebcamFrameSnapshot candidateWebcamFrame;
if (webcamCapture.copyLatestFrame(candidateWebcamFrame) &&
candidateWebcamFrame.sequence != latestWebcamSequence &&
hasVisibleBgraContent(candidateWebcamFrame.data)) {
latestWebcamFrame = std::move(candidateWebcamFrame.data);
latestWebcamWidth = candidateWebcamFrame.width;
latestWebcamHeight = candidateWebcamFrame.height;
latestWebcamSequence = candidateWebcamFrame.sequence;
hasVisibleWebcamFrame = true;
}
}
@@ -545,10 +570,23 @@ int main(int argc, char* argv[]) {
frameTimestampHns =
lastEncodedVideoTimestampHns + static_cast<int64_t>(10'000'000ULL / config.fps);
}
if (writeSeparateWebcam && webcamFrame.data &&
latestWebcamSequence != lastWrittenWebcamSequence) {
const int64_t webcamTimestampHns = static_cast<int64_t>(
(webcamOutputFrameIndex * 10'000'000ULL) / std::max(1, webcamCapture.fps()));
if (!webcamEncoder.writeBgraFrame(webcamFrame, webcamTimestampHns)) {
encodeFailed = true;
stopRequested = true;
cv.notify_all();
return;
}
lastWrittenWebcamSequence = latestWebcamSequence;
webcamOutputFrameIndex += 1;
}
if (latestFrameTexture && !encoder.writeFrame(
latestFrameTexture.Get(),
frameTimestampHns,
webcamFrame.data ? &webcamFrame : nullptr)) {
!writeSeparateWebcam && webcamFrame.data ? &webcamFrame : nullptr)) {
encodeFailed = true;
stopRequested = true;
cv.notify_all();
@@ -659,14 +697,13 @@ int main(int argc, char* argv[]) {
webcamActive = true;
const auto webcamDeadline = std::chrono::steady_clock::now() + std::chrono::seconds(3);
while (std::chrono::steady_clock::now() < webcamDeadline && !hasVisibleWebcamFrame) {
std::vector<BYTE> candidateWebcamFrame;
int candidateWebcamWidth = 0;
int candidateWebcamHeight = 0;
if (webcamCapture.copyLatestFrame(candidateWebcamFrame, candidateWebcamWidth, candidateWebcamHeight) &&
hasVisibleBgraContent(candidateWebcamFrame)) {
latestWebcamFrame = std::move(candidateWebcamFrame);
latestWebcamWidth = candidateWebcamWidth;
latestWebcamHeight = candidateWebcamHeight;
WebcamFrameSnapshot candidateWebcamFrame;
if (webcamCapture.copyLatestFrame(candidateWebcamFrame) &&
hasVisibleBgraContent(candidateWebcamFrame.data)) {
latestWebcamFrame = std::move(candidateWebcamFrame.data);
latestWebcamWidth = candidateWebcamFrame.width;
latestWebcamHeight = candidateWebcamFrame.height;
latestWebcamSequence = candidateWebcamFrame.sequence;
hasVisibleWebcamFrame = true;
break;
}
@@ -740,6 +777,9 @@ int main(int argc, char* argv[]) {
{
std::scoped_lock lock(mutex);
encoder.finalize();
if (writeSeparateWebcam) {
webcamEncoder.finalize();
}
}
if (stdinThread.joinable()) {
@@ -752,7 +792,11 @@ int main(int argc, char* argv[]) {
}
std::cout << "{\"event\":\"recording-stopped\",\"schemaVersion\":2,\"screenPath\":\""
<< jsonEscape(config.outputPath) << "\"}" << std::endl;
<< jsonEscape(config.outputPath) << "\"";
if (writeSeparateWebcam) {
std::cout << ",\"webcamPath\":\"" << jsonEscape(config.webcamOutputPath) << "\"";
}
std::cout << "}" << std::endl;
std::cout << "Recording stopped. Output path: " << config.outputPath << std::endl;
return 0;
}
@@ -254,6 +254,40 @@ bool MFEncoder::copyFrameToBuffer(
return true;
}
bool MFEncoder::copyBgraFrameToBuffer(const BgraFrameView& frame, BYTE* destination, DWORD destinationSize) {
if (!frame.data || frame.width <= 0 || frame.height <= 0) {
return false;
}
const DWORD rowBytes = static_cast<DWORD>(width_ * 4);
const DWORD requiredBytes = rowBytes * static_cast<DWORD>(height_);
if (destinationSize < requiredBytes) {
std::cerr << "ERROR: Media Foundation webcam buffer is too small" << std::endl;
return false;
}
if (frame.width == width_ && frame.height == height_) {
std::memcpy(destination, frame.data, requiredBytes);
return true;
}
for (int y = 0; y < height_; y += 1) {
const int sourceY = static_cast<int>((static_cast<int64_t>(y) * frame.height) / height_);
BYTE* destinationRow = destination + rowBytes * y;
for (int x = 0; x < width_; x += 1) {
const int sourceX = static_cast<int>((static_cast<int64_t>(x) * frame.width) / width_);
const BYTE* source = frame.data + (sourceY * frame.width + sourceX) * 4;
BYTE* target = destinationRow + x * 4;
target[0] = source[0];
target[1] = source[1];
target[2] = source[2];
target[3] = 255;
}
}
return true;
}
bool MFEncoder::writeFrame(ID3D11Texture2D* texture, int64_t timestampHns, const BgraFrameView* webcamFrame) {
std::scoped_lock writerLock(writerMutex_);
if (!sinkWriter_ || finalized_) {
@@ -302,6 +336,54 @@ bool MFEncoder::writeFrame(ID3D11Texture2D* texture, int64_t timestampHns, const
return succeeded(sinkWriter_->WriteSample(videoStreamIndex_, sample.Get()), "WriteSample");
}
bool MFEncoder::writeBgraFrame(const BgraFrameView& frame, int64_t timestampHns) {
std::scoped_lock writerLock(writerMutex_);
if (!sinkWriter_ || finalized_) {
return false;
}
if (firstTimestampHns_ < 0) {
firstTimestampHns_ = timestampHns;
}
int64_t sampleTime = timestampHns - firstTimestampHns_;
if (sampleTime <= lastTimestampHns_) {
sampleTime = lastTimestampHns_ + (10'000'000LL / fps_);
}
const int64_t sampleDuration = 10'000'000LL / fps_;
lastTimestampHns_ = sampleTime;
Microsoft::WRL::ComPtr<IMFMediaBuffer> buffer;
const DWORD frameBytes = static_cast<DWORD>(width_ * height_ * 4);
if (!succeeded(MFCreateMemoryBuffer(frameBytes, &buffer), "MFCreateMemoryBuffer(webcam)")) {
return false;
}
BYTE* data = nullptr;
DWORD maxLength = 0;
DWORD currentLength = 0;
if (!succeeded(buffer->Lock(&data, &maxLength, &currentLength), "IMFMediaBuffer::Lock(webcam)")) {
return false;
}
const bool copied = copyBgraFrameToBuffer(frame, data, maxLength);
buffer->Unlock();
if (!copied) {
return false;
}
buffer->SetCurrentLength(frameBytes);
Microsoft::WRL::ComPtr<IMFSample> sample;
if (!succeeded(MFCreateSample(&sample), "MFCreateSample(webcam)")) {
return false;
}
sample->AddBuffer(buffer.Get());
sample->SetSampleTime(sampleTime);
sample->SetSampleDuration(sampleDuration);
return succeeded(sinkWriter_->WriteSample(videoStreamIndex_, sample.Get()), "WriteSample(webcam)");
}
bool MFEncoder::writeAudio(const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns) {
std::scoped_lock writerLock(writerMutex_);
if (!sinkWriter_ || finalized_ || !hasAudioStream_) {
@@ -44,6 +44,7 @@ public:
ID3D11DeviceContext* context,
const AudioInputFormat* audioFormat = nullptr);
bool writeFrame(ID3D11Texture2D* texture, int64_t timestampHns, const BgraFrameView* webcamFrame = nullptr);
bool writeBgraFrame(const BgraFrameView& frame, int64_t timestampHns);
bool writeAudio(const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns);
bool finalize();
@@ -54,6 +55,7 @@ private:
BYTE* destination,
DWORD destinationSize,
const BgraFrameView* webcamFrame);
bool copyBgraFrameToBuffer(const BgraFrameView& frame, BYTE* destination, DWORD destinationSize);
bool configureAudioStream(const AudioInputFormat& audioFormat);
Microsoft::WRL::ComPtr<IMFSinkWriter> sinkWriter_;
@@ -365,6 +365,7 @@ void WebcamCapture::captureLoop() {
if (currentLength >= expectedLength && expectedLength > 0) {
std::scoped_lock lock(frameMutex_);
latestFrame_.assign(data, data + expectedLength);
latestFrameSequence_ += 1;
}
buffer->Unlock();
@@ -373,18 +374,19 @@ void WebcamCapture::captureLoop() {
CoUninitialize();
}
bool WebcamCapture::copyLatestFrame(std::vector<BYTE>& destination, int& width, int& height) {
bool WebcamCapture::copyLatestFrame(WebcamFrameSnapshot& destination) {
if (usingDirectShow_) {
return directShowCapture_.copyLatestFrame(destination, width, height);
return directShowCapture_.copyLatestFrame(destination);
}
std::scoped_lock lock(frameMutex_);
if (latestFrame_.empty() || width_ <= 0 || height_ <= 0) {
return false;
}
destination = latestFrame_;
width = width_;
height = height_;
destination.data = latestFrame_;
destination.width = width_;
destination.height = height_;
destination.sequence = latestFrameSequence_;
return true;
}
@@ -31,7 +31,7 @@ public:
int requestedFps);
bool start();
void stop();
bool copyLatestFrame(std::vector<BYTE>& destination, int& width, int& height);
bool copyLatestFrame(WebcamFrameSnapshot& destination);
int width() const;
int height() const;
@@ -50,6 +50,7 @@ private:
std::atomic<bool> stopRequested_ = false;
std::mutex frameMutex_;
std::vector<BYTE> latestFrame_;
uint64_t latestFrameSequence_ = 0;
int width_ = 0;
int height_ = 0;
int fps_ = 30;
+26 -1
View File
@@ -230,6 +230,7 @@ const outputPath = path.join(
os.tmpdir(),
`openscreen-wgc-helper-${WITH_WEBCAM ? "webcam" : WITH_WINDOW ? "window" : WITH_SYSTEM_AUDIO || WITH_MICROPHONE ? "audio" : "video"}-${process.pid}-${Date.now()}-${randomUUID()}.mp4`,
);
const webcamOutputPath = WITH_WEBCAM ? outputPath.replace(/\.mp4$/i, "-webcam.mp4") : null;
const fixtureWindow = WITH_WINDOW ? await startFixtureWindow() : null;
@@ -263,7 +264,10 @@ const config = {
webcamWidth: 640,
webcamHeight: 360,
webcamFps: 30,
outputs: { screenPath: outputPath },
outputs: {
screenPath: outputPath,
...(webcamOutputPath ? { webcamPath: webcamOutputPath } : {}),
},
};
let result;
@@ -289,8 +293,13 @@ if (result.code !== 0) {
if (!fs.existsSync(outputPath) || fs.statSync(outputPath).size === 0) {
throw new Error(`WGC helper did not produce a video at ${outputPath}`);
}
if (WITH_WEBCAM && (!fs.existsSync(webcamOutputPath) || fs.statSync(webcamOutputPath).size === 0)) {
throw new Error(`WGC helper did not produce a webcam video at ${webcamOutputPath}`);
}
const streams = probeStreams(outputPath);
const webcamStreams =
webcamOutputPath && fs.existsSync(webcamOutputPath) ? probeStreams(webcamOutputPath) : [];
const hasVideo = streams.some((stream) => stream.codec_type === "video");
const hasAudio = streams.some((stream) => stream.codec_type === "audio");
const webcamFormatLine = result.stdout
@@ -318,6 +327,9 @@ const nativeMicrophoneDiagnostics = result.stderr
if (!hasVideo) {
throw new Error(`WGC helper output has no video stream: ${outputPath}`);
}
if (WITH_WEBCAM && !webcamStreams.some((stream) => stream.codec_type === "video")) {
throw new Error(`WGC helper webcam output has no video stream: ${webcamOutputPath}`);
}
if (
(CAPTURE_CURSOR && !cursorCapture) ||
(cursorCapture &&
@@ -342,13 +354,26 @@ console.log(
{
success: true,
outputPath,
webcamOutputPath,
bytes: fs.statSync(outputPath).size,
webcamBytes:
webcamOutputPath && fs.existsSync(webcamOutputPath)
? fs.statSync(webcamOutputPath).size
: undefined,
streams: streams.map((stream) => ({
index: stream.index,
codecType: stream.codec_type,
codecName: stream.codec_name,
duration: stream.duration,
})),
webcamStreams: webcamStreams.map((stream) => ({
index: stream.index,
codecType: stream.codec_type,
codecName: stream.codec_name,
width: stream.width,
height: stream.height,
duration: stream.duration,
})),
cursorCapture,
selectedMicrophoneDeviceName: audioFormat?.microphoneDeviceName,
selectedWebcamDeviceName: webcamFormat?.deviceName,
+113 -50
View File
@@ -84,6 +84,7 @@ type RecorderHandle = {
type NativeWindowsRecordingHandle = {
recordingId: number;
finalizing: boolean;
webcamRecorder: RecorderHandle | null;
};
type NativeMacRecordingHandle = {
@@ -422,58 +423,105 @@ export function useScreenRecorder(): UseScreenRecorderReturn {
[cursorCaptureMode, teardownMedia],
);
const finalizeNativeWindowsRecording = useCallback(async (discard = false) => {
const activeNativeRecording = nativeWindowsRecording.current;
if (!activeNativeRecording || activeNativeRecording.finalizing) {
return false;
}
activeNativeRecording.finalizing = true;
const clearNativeRecordingState = () => {
nativeWindowsRecording.current = null;
setRecording(false);
setPaused(false);
setElapsedSeconds(0);
accumulatedDurationMs.current = 0;
segmentStartedAt.current = null;
};
try {
const result = await window.electronAPI.stopNativeWindowsRecording(discard);
if (discard || result.discarded) {
clearNativeRecordingState();
return true;
const finalizeNativeWindowsRecording = useCallback(
async (discard = false) => {
const activeNativeRecording = nativeWindowsRecording.current;
if (!activeNativeRecording || activeNativeRecording.finalizing) {
return false;
}
if (!result.success) {
console.error("Failed to stop native Windows recording:", result.error);
toast.error(result.error ?? "Failed to stop native Windows recording");
activeNativeRecording.finalizing = true;
const activeWebcamRecorder = activeNativeRecording.webcamRecorder;
const duration = Math.max(0, getRecordingDurationMs());
if (
activeWebcamRecorder?.recorder.state === "recording" ||
activeWebcamRecorder?.recorder.state === "paused"
) {
try {
activeWebcamRecorder.recorder.stop();
} catch {
// Recorder may already be stopping.
}
}
if (activeWebcamRecorder && webcamRecorder.current === activeWebcamRecorder) {
webcamRecorder.current = null;
}
const clearNativeRecordingState = () => {
nativeWindowsRecording.current = null;
setRecording(false);
setPaused(false);
setElapsedSeconds(0);
accumulatedDurationMs.current = 0;
segmentStartedAt.current = null;
};
try {
const result = await window.electronAPI.stopNativeWindowsRecording(discard);
if (discard || result.discarded) {
clearNativeRecordingState();
return true;
}
if (!result.success) {
console.error("Failed to stop native Windows recording:", result.error);
toast.error(result.error ?? "Failed to stop native Windows recording");
activeNativeRecording.finalizing = false;
return true;
}
const nativeScreenPath = result.session?.screenVideoPath ?? result.path;
let storedSession = result.session;
if (activeWebcamRecorder && nativeScreenPath) {
const webcamBlob = await activeWebcamRecorder.recordedBlobPromise.catch(() => null);
const screenRead = await window.electronAPI.readBinaryFile(nativeScreenPath);
if (webcamBlob && webcamBlob.size > 0 && screenRead.success && screenRead.data) {
const fixedWebcamBlob = await fixWebmDuration(webcamBlob, duration);
const nativeScreenFileName =
nativeScreenPath.split(/[\\/]/).pop() ??
`${RECORDING_FILE_PREFIX}${activeNativeRecording.recordingId}.mp4`;
const webcamFileName = `${RECORDING_FILE_PREFIX}${activeNativeRecording.recordingId}${WEBCAM_FILE_SUFFIX}${VIDEO_FILE_EXTENSION}`;
const stored = await window.electronAPI.storeRecordedSession({
screen: {
videoData: screenRead.data,
fileName: nativeScreenFileName,
},
webcam: {
videoData: await fixedWebcamBlob.arrayBuffer(),
fileName: webcamFileName,
},
createdAt: activeNativeRecording.recordingId,
cursorCaptureMode,
});
if (stored.success && stored.session) {
storedSession = stored.session;
}
}
}
clearNativeRecordingState();
if (storedSession) {
await window.electronAPI.setCurrentRecordingSession(storedSession);
} else if (result.path) {
await window.electronAPI.setCurrentVideoPath(result.path);
}
await window.electronAPI.switchToEditor();
return true;
} catch (error) {
console.error("Error saving native Windows recording:", error);
toast.error(
error instanceof Error ? error.message : "Failed to save native Windows recording",
);
activeNativeRecording.finalizing = false;
return true;
} finally {
if (discardRecordingId.current === activeNativeRecording.recordingId) {
discardRecordingId.current = null;
}
}
clearNativeRecordingState();
if (result.session) {
await window.electronAPI.setCurrentRecordingSession(result.session);
} else if (result.path) {
await window.electronAPI.setCurrentVideoPath(result.path);
}
await window.electronAPI.switchToEditor();
return true;
} catch (error) {
console.error("Error saving native Windows recording:", error);
toast.error(
error instanceof Error ? error.message : "Failed to save native Windows recording",
);
activeNativeRecording.finalizing = false;
return true;
} finally {
if (discardRecordingId.current === activeNativeRecording.recordingId) {
discardRecordingId.current = null;
}
}
}, []);
},
[cursorCaptureMode, getRecordingDurationMs],
);
const finalizeNativeMacRecording = useCallback(
async (discard = false) => {
@@ -747,7 +795,14 @@ export function useScreenRecorder(): UseScreenRecorderReturn {
const displayId = Number(selectedSource.display_id);
const sourceType = selectedSource.id.startsWith("window:") ? "window" : "display";
const windowHandle = parseWindowHandleFromSourceId(selectedSource.id);
if (webcamEnabled) {
const browserWebcamRecorder =
webcamEnabled && webcamStream.current
? createRecorderHandle(webcamStream.current, {
mimeType: selectMimeType(),
videoBitsPerSecond: BITRATE_BASE,
})
: null;
if (webcamEnabled && !browserWebcamRecorder) {
stopWebcamPreviewStream();
}
const request: NativeWindowsRecordingRequest = {
@@ -775,7 +830,7 @@ export function useScreenRecorder(): UseScreenRecorderReturn {
},
},
webcam: {
enabled: webcamEnabled,
enabled: webcamEnabled && !browserWebcamRecorder,
deviceId: webcamDeviceId,
deviceName: webcamDeviceName,
width: WEBCAM_TARGET_WIDTH,
@@ -788,6 +843,12 @@ export function useScreenRecorder(): UseScreenRecorderReturn {
};
const result = await window.electronAPI.startNativeWindowsRecording(request);
if (!result.success || !result.recordingId) {
if (
browserWebcamRecorder?.recorder.state === "recording" ||
browserWebcamRecorder?.recorder.state === "paused"
) {
browserWebcamRecorder.recorder.stop();
}
throw new Error(result.error ?? "Native Windows capture failed.");
}
@@ -795,7 +856,9 @@ export function useScreenRecorder(): UseScreenRecorderReturn {
nativeWindowsRecording.current = {
recordingId: result.recordingId,
finalizing: false,
webcamRecorder: browserWebcamRecorder,
};
webcamRecorder.current = browserWebcamRecorder;
accumulatedDurationMs.current = 0;
segmentStartedAt.current = Date.now();
allowAutoFinalize.current = true;
+8 -6
View File
@@ -11,9 +11,9 @@ import type {
import { BackgroundLoadError } from "@/lib/wallpaper";
import type { CursorRecordingData } from "@/native/contracts";
import { getPlatform } from "@/utils/platformUtils";
import { AsyncVideoFrameQueue } from "./asyncVideoFrameQueue";
import { FrameRenderer } from "./frameRenderer";
import { StreamingVideoDecoder } from "./streamingDecoder";
import { TimestampedVideoFrameQueue } from "./timestampedVideoFrameQueue";
import type {
ExportProgress,
ExportResult,
@@ -124,7 +124,7 @@ export class GifExporter {
}
async export(): Promise<ExportResult> {
let webcamFrameQueue: AsyncVideoFrameQueue | null = null;
let webcamFrameQueue: TimestampedVideoFrameQueue | null = null;
const warnings: string[] = [];
const onWarning = (message: string) => warnings.push(message);
@@ -216,7 +216,7 @@ export class GifExporter {
console.log("[GifExporter] Using streaming decode (web-demuxer + VideoDecoder)");
let frameIndex = 0;
webcamFrameQueue = this.config.webcamVideoUrl ? new AsyncVideoFrameQueue() : null;
webcamFrameQueue = this.config.webcamVideoUrl ? new TimestampedVideoFrameQueue() : null;
let stopWebcamDecode = false;
let webcamDecodeError: Error | null = null;
const webcamDecodePromise =
@@ -228,7 +228,7 @@ export class GifExporter {
this.config.frameRate,
this.config.trimRegions,
this.config.speedRegions,
async (webcamFrame) => {
async (webcamFrame, _exportTimestampUs, webcamSourceTimestampMs) => {
while (queue.length >= 12 && !this.cancelled && !stopWebcamDecode) {
await new Promise((resolve) => setTimeout(resolve, 2));
}
@@ -236,7 +236,7 @@ export class GifExporter {
webcamFrame.close();
return;
}
queue.enqueue(webcamFrame);
queue.enqueue(webcamFrame, webcamSourceTimestampMs);
},
onWarning,
)
@@ -266,7 +266,9 @@ export class GifExporter {
return;
}
webcamFrame = webcamFrameQueue ? await webcamFrameQueue.dequeue() : null;
webcamFrame = webcamFrameQueue
? await webcamFrameQueue.frameAt(sourceTimestampMs)
: null;
const renderer = this.renderer;
if (this.cancelled || !renderer) {
return;
@@ -0,0 +1,50 @@
import { describe, expect, it, vi } from "vitest";
import { TimestampedVideoFrameQueue } from "./timestampedVideoFrameQueue";
class MockVideoFrame {
timestamp: number;
closed = false;
constructor(source: MockVideoFrame | number) {
this.timestamp = typeof source === "number" ? source : source.timestamp;
}
close() {
this.closed = true;
}
}
describe("TimestampedVideoFrameQueue", () => {
it("samples the latest webcam frame at or before the requested source timestamp", async () => {
const originalVideoFrame = globalThis.VideoFrame;
vi.stubGlobal("VideoFrame", MockVideoFrame);
try {
const queue = new TimestampedVideoFrameQueue();
const frame0 = new MockVideoFrame(0) as unknown as VideoFrame;
const frame33 = new MockVideoFrame(33_000) as unknown as VideoFrame;
const frame66 = new MockVideoFrame(66_000) as unknown as VideoFrame;
queue.enqueue(frame0, 0);
queue.enqueue(frame33, 33);
queue.enqueue(frame66, 66);
const sampled0 = await queue.frameAt(0);
const sampled20 = await queue.frameAt(20);
const sampled40 = await queue.frameAt(40);
const sampled80 = await queue.frameAt(80);
expect(sampled0?.timestamp).toBe(0);
expect(sampled20?.timestamp).toBe(0);
expect(sampled40?.timestamp).toBe(33_000);
expect(sampled80?.timestamp).toBe(66_000);
sampled0?.close();
sampled20?.close();
sampled40?.close();
sampled80?.close();
queue.destroy();
} finally {
vi.stubGlobal("VideoFrame", originalVideoFrame);
}
});
});
@@ -0,0 +1,105 @@
type TimestampedVideoFrame = {
frame: VideoFrame;
sourceTimestampMs: number;
};
type PendingConsumer = {
resolve: () => void;
reject: (error: Error) => void;
};
const TIMESTAMP_EPSILON_MS = 0.5;
export class TimestampedVideoFrameQueue {
private frames: TimestampedVideoFrame[] = [];
private consumers: PendingConsumer[] = [];
private error: Error | null = null;
private closed = false;
private heldFrame: TimestampedVideoFrame | null = null;
get length() {
return this.frames.length;
}
enqueue(frame: VideoFrame, sourceTimestampMs: number) {
if (this.closed) {
frame.close();
return;
}
this.frames.push({ frame, sourceTimestampMs });
const consumers = this.consumers.splice(0);
for (const consumer of consumers) {
consumer.resolve();
}
}
fail(error: Error) {
this.error = error;
this.closed = true;
const consumers = this.consumers.splice(0);
for (const consumer of consumers) {
consumer.reject(error);
}
this.closeOwnedFrames();
}
close() {
this.closed = true;
const consumers = this.consumers.splice(0);
for (const consumer of consumers) {
consumer.resolve();
}
}
async frameAt(sourceTimestampMs: number): Promise<VideoFrame | null> {
for (;;) {
if (this.error) {
throw this.error;
}
const next = this.frames[0] ?? null;
if (next && next.sourceTimestampMs <= sourceTimestampMs + TIMESTAMP_EPSILON_MS) {
this.replaceHeldFrame(this.frames.shift() ?? null);
continue;
}
if (this.heldFrame) {
return new VideoFrame(this.heldFrame.frame, {
timestamp: this.heldFrame.frame.timestamp,
});
}
if (next || this.closed) {
return null;
}
await new Promise<void>((resolve, reject) => {
this.consumers.push({ resolve, reject });
});
}
}
destroy() {
this.close();
this.closeOwnedFrames();
}
private replaceHeldFrame(frame: TimestampedVideoFrame | null) {
if (this.heldFrame) {
this.heldFrame.frame.close();
}
this.heldFrame = frame;
}
private closeOwnedFrames() {
if (this.heldFrame) {
this.heldFrame.frame.close();
this.heldFrame = null;
}
for (const item of this.frames) {
item.frame.close();
}
this.frames = [];
}
}
+8 -6
View File
@@ -10,11 +10,11 @@ import type {
import { BackgroundLoadError } from "@/lib/wallpaper";
import type { CursorRecordingData } from "@/native/contracts";
import { getPlatform } from "@/utils/platformUtils";
import { AsyncVideoFrameQueue } from "./asyncVideoFrameQueue";
import { AudioProcessor } from "./audioEncoder";
import { FrameRenderer } from "./frameRenderer";
import { VideoMuxer } from "./muxer";
import { StreamingVideoDecoder } from "./streamingDecoder";
import { TimestampedVideoFrameQueue } from "./timestampedVideoFrameQueue";
import type { ExportConfig, ExportProgress, ExportResult } from "./types";
const ENCODER_STALL_TIMEOUT_MS = 15_000;
@@ -195,7 +195,7 @@ export class VideoExporter {
private async exportWithEncoderPreference(
encoderPreference: HardwareAcceleration,
): Promise<ExportResult> {
let webcamFrameQueue: AsyncVideoFrameQueue | null = null;
let webcamFrameQueue: TimestampedVideoFrameQueue | null = null;
let stopWebcamDecode = false;
let webcamDecodeError: Error | null = null;
let webcamDecodePromise: Promise<void> | null = null;
@@ -290,7 +290,7 @@ export class VideoExporter {
? Math.min(this.MAX_ENCODE_QUEUE, 32)
: this.MAX_ENCODE_QUEUE;
webcamFrameQueue = this.config.webcamVideoUrl ? new AsyncVideoFrameQueue() : null;
webcamFrameQueue = this.config.webcamVideoUrl ? new TimestampedVideoFrameQueue() : null;
webcamDecodePromise =
webcamDecoder && webcamFrameQueue
? (() => {
@@ -300,7 +300,7 @@ export class VideoExporter {
this.config.frameRate,
this.config.trimRegions,
this.config.speedRegions,
async (webcamFrame) => {
async (webcamFrame, _exportTimestampUs, webcamSourceTimestampMs) => {
while (queue.length >= 12 && !this.cancelled && !stopWebcamDecode) {
await new Promise((resolve) => setTimeout(resolve, 2));
}
@@ -308,7 +308,7 @@ export class VideoExporter {
webcamFrame.close();
return;
}
queue.enqueue(webcamFrame);
queue.enqueue(webcamFrame, webcamSourceTimestampMs);
},
onWarning,
)
@@ -342,7 +342,9 @@ export class VideoExporter {
}
const timestamp = frameIndex * frameDuration;
webcamFrame = webcamFrameQueue ? await webcamFrameQueue.dequeue() : null;
webcamFrame = webcamFrameQueue
? await webcamFrameQueue.frameAt(sourceTimestampMs)
: null;
if (this.cancelled) {
return;
}