feat: compose mac native capture with media

This commit is contained in:
Etienne
2026-05-12 09:32:14 +02:00
committed by Etienne Lescot
parent b9e2134749
commit 6a4ddc5dad
7 changed files with 453 additions and 81 deletions
+12
View File
@@ -107,6 +107,18 @@ interface Window {
discarded?: boolean;
error?: string;
}>;
attachNativeMacWebcamRecording: (payload: {
screenVideoPath: string;
recordingId: number;
webcam: import("../src/lib/recordingSession").RecordedVideoAssetInput;
cursorCaptureMode?: import("../src/lib/recordingSession").CursorCaptureMode;
}) => Promise<{
success: boolean;
path?: string;
session?: import("../src/lib/recordingSession").RecordingSession;
message?: string;
error?: string;
}>;
discardCursorTelemetry: (recordingId: number) => Promise<void>;
getCursorTelemetry: (videoPath?: string) => Promise<{
success: boolean;
+83
View File
@@ -23,6 +23,7 @@ import {
normalizeProjectMedia,
normalizeRecordingSession,
type ProjectMedia,
type RecordedVideoAssetInput,
type RecordingSession,
type StoreRecordedSessionInput,
} from "../../src/lib/recordingSession";
@@ -216,6 +217,13 @@ type SelectedSource = {
[key: string]: unknown;
};
type AttachNativeMacWebcamRecordingInput = {
screenVideoPath?: string;
recordingId?: number;
webcam?: RecordedVideoAssetInput;
cursorCaptureMode?: CursorCaptureMode;
};
let selectedSource: SelectedSource | null = null;
let selectedDesktopSource: DesktopCapturerSource | null = null;
let lastEnumeratedSources = new Map<string, DesktopCapturerSource>();
@@ -1417,6 +1425,20 @@ export function registerIpcHandlers(
const outputPath = path.join(RECORDINGS_DIR, `${RECORDING_FILE_PREFIX}${recordingId}.mp4`);
const cursorCaptureMode =
normalizeCursorCaptureMode(request.cursor?.mode) ?? "editable-overlay";
try {
await desktopCapturer.getSources({
types: ["screen"],
thumbnailSize: { width: 1, height: 1 },
});
} catch {
// The helper reports the final ScreenCaptureKit permission status.
}
if (request.audio?.microphone?.enabled) {
const micStatus = systemPreferences.getMediaAccessStatus("microphone");
if (micStatus !== "granted") {
await systemPreferences.askForMediaAccess("microphone");
}
}
const sourceDisplay =
request.source.type === "display" && typeof request.source.displayId === "number"
? (screen.getAllDisplays().find((display) => display.id === request.source.displayId) ??
@@ -1435,6 +1457,10 @@ export function registerIpcHandlers(
...request.video,
hideSystemCursor: cursorCaptureMode === "editable-overlay",
},
webcam: {
...request.webcam,
enabled: false,
},
cursor: {
mode: cursorCaptureMode,
},
@@ -1666,6 +1692,63 @@ export function registerIpcHandlers(
}
});
ipcMain.handle(
"attach-native-mac-webcam-recording",
async (_, payload: AttachNativeMacWebcamRecordingInput) => {
try {
const screenVideoPath = normalizeVideoSourcePath(payload.screenVideoPath);
if (!screenVideoPath || !isPathWithinDir(screenVideoPath, RECORDINGS_DIR)) {
return {
success: false,
error: "Native macOS webcam attachment requires a recording output path.",
};
}
await fs.access(screenVideoPath, fsConstants.R_OK);
if (!payload.webcam?.fileName || !payload.webcam.videoData) {
return { success: false, error: "Native macOS webcam attachment is missing video data." };
}
const webcamVideoPath = resolveRecordingOutputPath(payload.webcam.fileName);
await fs.writeFile(webcamVideoPath, Buffer.from(payload.webcam.videoData));
const createdAt =
typeof payload.recordingId === "number" && Number.isFinite(payload.recordingId)
? payload.recordingId
: Date.now();
const cursorCaptureMode = normalizeCursorCaptureMode(payload.cursorCaptureMode);
const session: RecordingSession = {
screenVideoPath,
webcamVideoPath,
createdAt,
...(cursorCaptureMode ? { cursorCaptureMode } : {}),
};
setCurrentRecordingSessionState(session);
currentProjectPath = null;
const sessionManifestPath = path.join(
RECORDINGS_DIR,
`${path.parse(screenVideoPath).name}${RECORDING_SESSION_SUFFIX}`,
);
await fs.writeFile(sessionManifestPath, JSON.stringify(session, null, 2), "utf-8");
return {
success: true,
path: screenVideoPath,
session,
message: "Native macOS webcam recording attached successfully",
};
} catch (error) {
console.error("Failed to attach native macOS webcam recording:", error);
return {
success: false,
error: error instanceof Error ? error.message : String(error),
};
}
},
);
ipcMain.handle("store-recorded-session", async (_, payload: StoreRecordedSessionInput) => {
try {
return await storeRecordedSessionFiles(payload);
+2 -2
View File
@@ -23,9 +23,9 @@ npm run build:native:mac
On non-macOS hosts this command exits successfully and does not affect Windows/Linux development. On macOS it builds the Swift package at `electron/native/screencapturekit`, writes the development binary to `electron/native/screencapturekit/build/openscreen-screencapturekit-helper`, and copies the redistributable binary to `electron/native/bin/darwin-${arch}/openscreen-screencapturekit-helper`.
The current helper implementation supports the first native media slice: display/window ScreenCaptureKit video capture, cursor exclusion through `SCStreamConfiguration.showsCursor`, H.264 encoding, and MP4 muxing. System audio, microphone capture, webcam composition, and runtime controls are intentionally left as explicit roadmap phases.
The current helper implementation supports display/window ScreenCaptureKit video capture, cursor exclusion through `SCStreamConfiguration.showsCursor`, H.264 encoding, MP4 muxing, and ScreenCaptureKit system audio. It also attempts native ScreenCaptureKit microphone capture when the running macOS version exposes that capability. Webcam recording currently stays as an Electron sidecar and is attached to the same recording session after the native screen capture stops.
Electron exposes `is-native-mac-capture-available` for capability probing. It resolves the same helper locations listed above and reports `missing-helper` until a Swift helper binary is present; production recording is not routed through the macOS helper yet.
Electron exposes `is-native-mac-capture-available` for capability probing. It resolves the same helper locations listed above and reports `missing-helper` until a Swift helper binary is present. When available, macOS recording routes screen/window capture through the native helper so editable cursor recordings do not bake the system cursor into the video.
See `docs/engineering/macos-native-recorder-roadmap.md` for the contract, rollout phases, and SSOT rules.
@@ -1,4 +1,5 @@
import AVFoundation
import CoreGraphics
import CoreMedia
import Foundation
import ScreenCaptureKit
@@ -77,6 +78,7 @@ enum HelperError: Error, CustomStringConvertible {
case unsupportedFeature(String)
case sourceNotFound(String)
case invalidSourceType(String)
case permissionDenied(String)
case writerSetupFailed(String)
var description: String {
@@ -91,6 +93,8 @@ enum HelperError: Error, CustomStringConvertible {
return message
case .invalidSourceType(let sourceType):
return "Unsupported source type: \(sourceType)."
case .permissionDenied(let message):
return message
case .writerSetupFailed(let message):
return message
}
@@ -116,40 +120,62 @@ func emitError(code: String, message: String) {
@available(macOS 13.0, *)
final class ScreenCaptureRecorder: NSObject, SCStreamOutput, SCStreamDelegate {
private struct CaptureTarget {
let filter: SCContentFilter
let width: Int
let height: Int
}
private let request: RecordingRequest
private let sampleQueue = DispatchQueue(label: "app.openscreen.sck-helper.samples")
private let stateQueue = DispatchQueue(label: "app.openscreen.sck-helper.state")
private var stream: SCStream?
private var writer: AVAssetWriter?
private var videoInput: AVAssetWriterInput?
private var systemAudioInput: AVAssetWriterInput?
private var microphoneAudioInput: AVAssetWriterInput?
private var didStartWriting = false
private var didEmitRecordingStarted = false
private var isStopping = false
private var nativeMicrophoneEnabled = false
private var outputWidth = 1920
private var outputHeight = 1080
private let microphoneOutputTypeRawValue = 2
init(request: RecordingRequest) {
self.request = request
}
func start() async throws {
try rejectUnsupportedPhaseFeatures()
try ensureRequestedPermissions()
let content = try await SCShareableContent.excludingDesktopWindows(
false,
onScreenWindowsOnly: true
)
let filter = try makeContentFilter(from: content)
let target = try makeCaptureTarget(from: content)
outputWidth = target.width
outputHeight = target.height
let configuration = makeStreamConfiguration()
let stream = SCStream(filter: filter, configuration: configuration, delegate: self)
let stream = SCStream(filter: target.filter, configuration: configuration, delegate: self)
try stream.addStreamOutput(self, type: .screen, sampleHandlerQueue: sampleQueue)
if request.audio.system.enabled {
try stream.addStreamOutput(self, type: .audio, sampleHandlerQueue: sampleQueue)
}
if nativeMicrophoneEnabled {
guard let microphoneOutputType = SCStreamOutputType(rawValue: microphoneOutputTypeRawValue) else {
throw HelperError.unsupportedFeature(
"Native microphone capture requires a macOS version with ScreenCaptureKit microphone output."
)
}
try stream.addStreamOutput(self, type: microphoneOutputType, sampleHandlerQueue: sampleQueue)
}
try setupWriter()
self.stream = stream
emit(["event": "ready", "schemaVersion": 1])
try await stream.startCapture()
emit([
"event": "recording-started",
"timestampMs": Int(Date().timeIntervalSince1970 * 1000),
])
}
func stop() async {
@@ -185,16 +211,29 @@ final class ScreenCaptureRecorder: NSObject, SCStreamOutput, SCStreamDelegate {
}
func stream(_ stream: SCStream, didOutputSampleBuffer sampleBuffer: CMSampleBuffer, of type: SCStreamOutputType) {
guard CMSampleBufferDataIsReady(sampleBuffer) else {
return
}
if type == .audio {
appendAudioSampleBuffer(sampleBuffer, to: systemAudioInput)
return
}
if type.rawValue == microphoneOutputTypeRawValue {
appendAudioSampleBuffer(sampleBuffer, to: microphoneAudioInput)
return
}
guard type == .screen else {
return
}
guard CMSampleBufferDataIsReady(sampleBuffer) else {
guard isCompleteFrame(sampleBuffer) else {
return
}
guard let videoInput, let writer else {
return
}
let presentationTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
if !didStartWriting {
writer.startWriting()
@@ -203,29 +242,46 @@ final class ScreenCaptureRecorder: NSObject, SCStreamOutput, SCStreamDelegate {
}
if videoInput.isReadyForMoreMediaData {
videoInput.append(sampleBuffer)
if videoInput.append(sampleBuffer), !didEmitRecordingStarted {
didEmitRecordingStarted = true
emit([
"event": "recording-started",
"timestampMs": Int(Date().timeIntervalSince1970 * 1000),
"width": outputWidth,
"height": outputHeight,
])
}
}
}
private func rejectUnsupportedPhaseFeatures() throws {
if request.audio.system.enabled {
throw HelperError.unsupportedFeature(
"System audio capture is planned for the roadmap system-audio phase."
)
private func ensureRequestedPermissions() throws {
if !CGPreflightScreenCaptureAccess() {
let granted = CGRequestScreenCaptureAccess()
if !granted {
throw HelperError.permissionDenied("Screen recording permission is required for ScreenCaptureKit capture.")
}
}
if request.audio.microphone.enabled {
throw HelperError.unsupportedFeature(
"Microphone capture is planned for the roadmap microphone phase."
)
}
if request.webcam.enabled {
throw HelperError.unsupportedFeature(
"Webcam composition is planned for the roadmap webcam phase."
)
switch AVCaptureDevice.authorizationStatus(for: .audio) {
case .authorized:
break
case .notDetermined:
let semaphore = DispatchSemaphore(value: 0)
AVCaptureDevice.requestAccess(for: .audio) { _ in
semaphore.signal()
}
semaphore.wait()
if AVCaptureDevice.authorizationStatus(for: .audio) != .authorized {
throw HelperError.permissionDenied("Microphone permission is required for native microphone capture.")
}
default:
throw HelperError.permissionDenied("Microphone permission is required for native microphone capture.")
}
}
}
private func makeContentFilter(from content: SCShareableContent) throws -> SCContentFilter {
private func makeCaptureTarget(from content: SCShareableContent) throws -> CaptureTarget {
switch request.source.type {
case "display":
guard let displayId = request.source.displayId else {
@@ -234,7 +290,13 @@ final class ScreenCaptureRecorder: NSObject, SCStreamOutput, SCStreamDelegate {
guard let display = content.displays.first(where: { $0.displayID == displayId }) else {
throw HelperError.sourceNotFound("No ScreenCaptureKit display found for id \(displayId).")
}
return SCContentFilter(display: display, excludingWindows: [])
let width = Int(CGDisplayPixelsWide(display.displayID))
let height = Int(CGDisplayPixelsHigh(display.displayID))
return CaptureTarget(
filter: SCContentFilter(display: display, excludingWindows: []),
width: clampCaptureDimension(width, fallback: request.video.width),
height: clampCaptureDimension(height, fallback: request.video.height)
)
case "window":
guard let windowId = request.source.windowId else {
throw HelperError.sourceNotFound("Window capture requires source.windowId.")
@@ -242,7 +304,17 @@ final class ScreenCaptureRecorder: NSObject, SCStreamOutput, SCStreamDelegate {
guard let window = content.windows.first(where: { $0.windowID == windowId }) else {
throw HelperError.sourceNotFound("No ScreenCaptureKit window found for id \(windowId).")
}
return SCContentFilter(desktopIndependentWindow: window)
let candidateDisplay = content.displays.first {
$0.frame.intersects(window.frame) || $0.frame.contains(CGPoint(x: window.frame.midX, y: window.frame.midY))
}
let scaleFactor = Self.scaleFactor(for: candidateDisplay?.displayID ?? CGMainDisplayID())
let width = Int(window.frame.width) * scaleFactor
let height = Int(window.frame.height) * scaleFactor
return CaptureTarget(
filter: SCContentFilter(desktopIndependentWindow: window),
width: clampCaptureDimension(width, fallback: request.video.width),
height: clampCaptureDimension(height, fallback: request.video.height)
)
default:
throw HelperError.invalidSourceType(request.source.type)
}
@@ -250,12 +322,37 @@ final class ScreenCaptureRecorder: NSObject, SCStreamOutput, SCStreamDelegate {
private func makeStreamConfiguration() -> SCStreamConfiguration {
let configuration = SCStreamConfiguration()
configuration.width = request.video.width
configuration.height = request.video.height
configuration.width = outputWidth
configuration.height = outputHeight
configuration.minimumFrameInterval = CMTime(value: 1, timescale: CMTimeScale(max(1, request.video.fps)))
configuration.queueDepth = 6
configuration.showsCursor = !request.video.hideSystemCursor
configuration.pixelFormat = kCVPixelFormatType_32BGRA
configuration.sampleRate = 48_000
configuration.channelCount = 2
configuration.excludesCurrentProcessAudio = true
configuration.capturesAudio = request.audio.system.enabled
if request.audio.microphone.enabled {
guard supportsNativeMicrophoneCapture(streamConfig: configuration) else {
nativeMicrophoneEnabled = false
emit([
"event": "warning",
"code": "microphone-unavailable",
"message": "Native microphone capture requires ScreenCaptureKit microphone support on this macOS version.",
])
return configuration
}
nativeMicrophoneEnabled = true
configuration.capturesAudio = true
configuration.setValue(true, forKey: "captureMicrophone")
if let deviceId = resolveMicrophoneCaptureDeviceID() {
configuration.setValue(deviceId, forKey: "microphoneCaptureDeviceID")
}
} else {
nativeMicrophoneEnabled = false
}
return configuration
}
@@ -270,8 +367,8 @@ final class ScreenCaptureRecorder: NSObject, SCStreamOutput, SCStreamDelegate {
let writer = try AVAssetWriter(outputURL: outputUrl, fileType: .mp4)
let settings: [String: Any] = [
AVVideoCodecKey: AVVideoCodecType.h264,
AVVideoWidthKey: request.video.width,
AVVideoHeightKey: request.video.height,
AVVideoWidthKey: outputWidth,
AVVideoHeightKey: outputHeight,
AVVideoCompressionPropertiesKey: [
AVVideoAverageBitRateKey: request.video.bitrate ?? 18_000_000,
AVVideoExpectedSourceFrameRateKey: request.video.fps,
@@ -287,6 +384,13 @@ final class ScreenCaptureRecorder: NSObject, SCStreamOutput, SCStreamDelegate {
writer.add(input)
self.writer = writer
self.videoInput = input
if request.audio.system.enabled {
systemAudioInput = try addAudioInput(to: writer, bitRate: 192_000)
}
if nativeMicrophoneEnabled {
microphoneAudioInput = try addAudioInput(to: writer, bitRate: 128_000)
}
}
private func finishWriter() async {
@@ -295,6 +399,8 @@ final class ScreenCaptureRecorder: NSObject, SCStreamOutput, SCStreamDelegate {
}
videoInput?.markAsFinished()
systemAudioInput?.markAsFinished()
microphoneAudioInput?.markAsFinished()
await withCheckedContinuation { continuation in
writer.finishWriting {
@@ -314,6 +420,91 @@ final class ScreenCaptureRecorder: NSObject, SCStreamOutput, SCStreamDelegate {
)
}
}
private func addAudioInput(to writer: AVAssetWriter, bitRate: Int) throws -> AVAssetWriterInput {
let settings: [String: Any] = [
AVFormatIDKey: kAudioFormatMPEG4AAC,
AVSampleRateKey: 48_000,
AVNumberOfChannelsKey: 2,
AVEncoderBitRateKey: bitRate,
]
let input = AVAssetWriterInput(mediaType: .audio, outputSettings: settings)
input.expectsMediaDataInRealTime = true
guard writer.canAdd(input) else {
throw HelperError.writerSetupFailed("Unable to add AAC audio input to AVAssetWriter.")
}
writer.add(input)
return input
}
private func appendAudioSampleBuffer(_ sampleBuffer: CMSampleBuffer, to input: AVAssetWriterInput?) {
guard didStartWriting else {
return
}
guard let input, input.isReadyForMoreMediaData else {
return
}
input.append(sampleBuffer)
}
private func isCompleteFrame(_ sampleBuffer: CMSampleBuffer) -> Bool {
guard let attachments = CMSampleBufferGetSampleAttachmentsArray(
sampleBuffer,
createIfNecessary: false
) as? [[SCStreamFrameInfo: Any]],
let attachment = attachments.first,
let statusRawValue = attachment[SCStreamFrameInfo.status] as? Int,
let status = SCFrameStatus(rawValue: statusRawValue)
else {
return true
}
return status == .complete
}
private func clampCaptureDimension(_ value: Int, fallback: Int) -> Int {
let requested = max(2, fallback)
let candidate = value > 0 ? value : requested
let clamped = min(candidate, requested)
return max(2, clamped - (clamped % 2))
}
private static func scaleFactor(for displayId: CGDirectDisplayID) -> Int {
guard let mode = CGDisplayCopyDisplayMode(displayId) else {
return 1
}
return max(1, mode.pixelWidth / max(1, mode.width))
}
private func supportsNativeMicrophoneCapture(streamConfig: SCStreamConfiguration) -> Bool {
streamConfig.responds(to: Selector(("setCaptureMicrophone:"))) &&
streamConfig.responds(to: Selector(("setMicrophoneCaptureDeviceID:"))) &&
SCStreamOutputType(rawValue: microphoneOutputTypeRawValue) != nil
}
private func resolveMicrophoneCaptureDeviceID() -> String? {
let devices = AVCaptureDevice.devices(for: .audio)
if let deviceName = request.audio.microphone.deviceName?.trimmingCharacters(in: .whitespacesAndNewlines),
!deviceName.isEmpty,
let device = devices.first(where: { $0.localizedName == deviceName })
{
return device.uniqueID
}
if let deviceId = request.audio.microphone.deviceId?.trimmingCharacters(in: .whitespacesAndNewlines),
!deviceId.isEmpty,
devices.contains(where: { $0.uniqueID == deviceId })
{
return deviceId
}
return nil
}
}
@main
+8
View File
@@ -84,6 +84,14 @@ contextBridge.exposeInMainWorld("electronAPI", {
stopNativeMacRecording: (discard?: boolean) => {
return ipcRenderer.invoke("stop-native-mac-recording", discard);
},
attachNativeMacWebcamRecording: (payload: {
screenVideoPath: string;
recordingId: number;
webcam: { fileName: string; videoData: ArrayBuffer };
cursorCaptureMode?: import("../src/lib/recordingSession").CursorCaptureMode;
}) => {
return ipcRenderer.invoke("attach-native-mac-webcam-recording", payload);
},
getCursorTelemetry: (videoPath?: string) => {
return ipcRenderer.invoke("get-cursor-telemetry", videoPath);
},