Initial OpenScreen import
CI / Lint (push) Has been cancelled
CI / Type Check (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Build (push) Has been cancelled
Bump Nix package on release / bump (release) Has been cancelled
Update Homebrew Cask / update-cask (release) Has been cancelled
@@ -0,0 +1,14 @@
|
||||
root = true
|
||||
|
||||
[*]
|
||||
indent_style = tab
|
||||
end_of_line = lf
|
||||
charset = utf-8
|
||||
trim_trailing_whitespace = true
|
||||
insert_final_newline = true
|
||||
|
||||
[*.{json,yml,yaml}]
|
||||
indent_size = 2
|
||||
|
||||
[*.md]
|
||||
trim_trailing_whitespace = false
|
||||
@@ -0,0 +1,10 @@
|
||||
APP_NAME=Openscreen
|
||||
BUNDLE_ID=com.siddharthvaddem.openscreen
|
||||
|
||||
APPLE_ID=
|
||||
TEAM_ID=
|
||||
SIGN_IDENTITY="Developer ID Application: Samir Patil ()"
|
||||
CSC_NAME="Samir Patil ()"
|
||||
|
||||
NOTARY_PROFILE=OpenScreen-notary
|
||||
APPLE_APP_SPECIFIC_PASSWORD=
|
||||
@@ -0,0 +1,8 @@
|
||||
# Copy to .env.signing.local for a local signing machine. Do not commit real values.
|
||||
AZURE_TENANT_ID=
|
||||
AZURE_CLIENT_ID=
|
||||
AZURE_CLIENT_SECRET=
|
||||
AZURE_TRUSTED_SIGNING_ENDPOINT=https://<region>.codesigning.azure.net/
|
||||
AZURE_TRUSTED_SIGNING_ACCOUNT_NAME=
|
||||
AZURE_TRUSTED_SIGNING_CERTIFICATE_PROFILE_NAME=
|
||||
AZURE_TRUSTED_SIGNING_PUBLISHER_NAME=
|
||||
@@ -0,0 +1 @@
|
||||
* @siddharthvaddem
|
||||
@@ -0,0 +1,149 @@
|
||||
name: Bug Report
|
||||
description: Create a report to help us improve
|
||||
title: "[Bug]: "
|
||||
labels: ["bug", "triage"]
|
||||
body:
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Search existing issues
|
||||
description: Please search to see if an issue already exists for the bug you encountered.
|
||||
options:
|
||||
- label: I have searched the existing issues
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: bug-description
|
||||
attributes:
|
||||
label: Describe the bug
|
||||
description: A clear and concise description of what the bug is.
|
||||
placeholder: e.g., When I click submit, nothing happens...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: expected-behavior
|
||||
attributes:
|
||||
label: Expected behavior
|
||||
description: A clear and concise description of what you expected to happen.
|
||||
placeholder: e.g., The form should submit and show a success message
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: steps-to-reproduce
|
||||
attributes:
|
||||
label: To Reproduce
|
||||
description: Steps to reproduce the behavior.
|
||||
placeholder: |
|
||||
1. Go to '...'
|
||||
2. Click on '....'
|
||||
3. Scroll down to '....'
|
||||
4. See error
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
id: screenshots
|
||||
attributes:
|
||||
label: Screenshots
|
||||
description: If applicable, add screenshots to help explain your problem.
|
||||
placeholder: Drag and drop images here or paste them
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: dropdown
|
||||
id: os-type
|
||||
attributes:
|
||||
label: OS
|
||||
description: Operating system
|
||||
options:
|
||||
- Windows
|
||||
- macOS
|
||||
- Linux
|
||||
- iOS
|
||||
- Android
|
||||
- Other
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: input
|
||||
id: os-version
|
||||
attributes:
|
||||
label: OS Version
|
||||
description: Please specify your OS version
|
||||
placeholder: e.g., Windows 11, macOS Sonoma, Ubuntu 22.04
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: input
|
||||
id: os-other
|
||||
attributes:
|
||||
label: Other OS
|
||||
description: If you selected "Other" for OS, please specify your operating system
|
||||
placeholder: e.g., FreeBSD, Solaris
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: dropdown
|
||||
id: browser
|
||||
attributes:
|
||||
label: Browser
|
||||
description: What browser are you using?
|
||||
options:
|
||||
- Chrome
|
||||
- Firefox
|
||||
- Safari
|
||||
- Edge
|
||||
- Other
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: input
|
||||
id: browser-version
|
||||
attributes:
|
||||
label: Browser Version
|
||||
description: Please specify your browser version
|
||||
placeholder: e.g., 120.0, 121.0.1
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: input
|
||||
id: browser-other
|
||||
attributes:
|
||||
label: Other Browser
|
||||
description: If you selected "Other" for Browser, please specify your browser
|
||||
placeholder: e.g., Brave, Vivaldi, Opera
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: dropdown
|
||||
id: device-type
|
||||
attributes:
|
||||
label: Device Type
|
||||
description: Device category
|
||||
options:
|
||||
- Desktop
|
||||
- Laptop
|
||||
- Tablet
|
||||
- Mobile
|
||||
- Other
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: input
|
||||
id: device-other
|
||||
attributes:
|
||||
label: Other Device
|
||||
description: If you selected "Other" for Device Type, please specify your device
|
||||
placeholder: e.g., Smart TV, IoT device
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
id: additional-context
|
||||
attributes:
|
||||
label: Additional context
|
||||
description: Add any other context about the problem here.
|
||||
placeholder: Links, references, or any additional information
|
||||
validations:
|
||||
required: false
|
||||
@@ -0,0 +1,48 @@
|
||||
name: Feature Request
|
||||
description: Suggest an idea for this project
|
||||
title: "[Feature]: "
|
||||
labels: ["enhancement", "feature-request"]
|
||||
body:
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Search existing issues
|
||||
description: Please search to see if an issue already exists for this feature request.
|
||||
options:
|
||||
- label: I have searched the existing issues
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: problem-description
|
||||
attributes:
|
||||
label: Is your feature request related to a problem?
|
||||
description: A clear and concise description of what the problem is.
|
||||
placeholder: e.g., I'm always frustrated when I have to...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: solution-description
|
||||
attributes:
|
||||
label: Describe the solution you'd like
|
||||
description: A clear and concise description of what you want to happen.
|
||||
placeholder: Describe the feature or change you're proposing
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
id: alternatives
|
||||
attributes:
|
||||
label: Describe alternatives you've considered
|
||||
description: A clear and concise description of any alternative solutions or features you've considered.
|
||||
placeholder: Have you considered any workarounds or alternative approaches?
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
id: additional-context
|
||||
attributes:
|
||||
label: Additional context
|
||||
description: Add any other context or screenshots about the feature request here.
|
||||
placeholder: Links, mockups, or any additional information
|
||||
validations:
|
||||
required: false
|
||||
@@ -0,0 +1,43 @@
|
||||
# Pull Request Template
|
||||
|
||||
## Description
|
||||
<!-- Briefly describe the purpose of this PR. -->
|
||||
|
||||
## Motivation
|
||||
<!-- Explain why this change is needed. What problem does it solve? -->
|
||||
|
||||
## Type of Change
|
||||
- [ ] New Feature
|
||||
- [ ] Bug Fix
|
||||
- [ ] Refactor / Code Cleanup
|
||||
- [ ] Documentation Update
|
||||
- [ ] Other (please specify)
|
||||
|
||||
## Related Issue(s)
|
||||
<!-- Link to any related issue(s) (e.g., #123) -->
|
||||
|
||||
## Screenshots / Video
|
||||
<!-- Include screenshots or a short video demonstrating the change. If the change adds a new UI feature, attach an image. If it adds functionality best shown via video, embed a video. -->
|
||||
|
||||
**Screenshot** (if applicable):
|
||||
|
||||
```markdown
|
||||

|
||||
```
|
||||
|
||||
**Video** (if applicable):
|
||||
|
||||
```html
|
||||
<video src="path/to/video.mp4" controls width="600"></video>
|
||||
```
|
||||
|
||||
## Testing
|
||||
<!-- Describe how reviewers can test the changes. Include steps, commands, or environment setup. -->
|
||||
|
||||
## Checklist
|
||||
- [ ] I have performed a self-review of my code.
|
||||
- [ ] I have added any necessary screenshots or videos.
|
||||
- [ ] I have linked related issue(s) and updated the changelog if applicable.
|
||||
|
||||
---
|
||||
*Thank you for contributing!*
|
||||
@@ -0,0 +1,253 @@
|
||||
|
||||
name: Build Electron App
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
arch:
|
||||
description: 'Architecture to build'
|
||||
required: true
|
||||
default: 'both'
|
||||
type: choice
|
||||
options:
|
||||
- arm64
|
||||
- x64
|
||||
- both
|
||||
|
||||
jobs:
|
||||
build-windows:
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: '22'
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Build Windows app
|
||||
run: npm run build:win
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Upload Windows build
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: windows-installer
|
||||
path: release/**/*.exe
|
||||
retention-days: 30
|
||||
|
||||
build-macos:
|
||||
runs-on: macos-latest
|
||||
strategy:
|
||||
matrix:
|
||||
arch: ${{ github.event.inputs.arch == 'both' && fromJSON('["arm64", "x64"]') || fromJSON(format('["{0}"]', github.event.inputs.arch)) }}
|
||||
|
||||
steps:
|
||||
# ─── Checkout ─────────────────────────────────────────────
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
# ─── Setup Node.js ────────────────────────────────────────
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
cache: npm
|
||||
|
||||
# ─── Setup Python (needed by some native deps) ────────────
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
# ─── Install Dependencies ─────────────────────────────────
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
# ─── Import Code Signing Certificate ──────────────────────
|
||||
# This is the KEY step that makes CI signing work.
|
||||
# We create a temporary keychain, import the .p12 cert into it,
|
||||
# and set it as the default so codesign can find it.
|
||||
- name: Import code signing certificate
|
||||
env:
|
||||
MAC_CERTIFICATE_P12: ${{ secrets.MAC_CERTIFICATE_P12 }}
|
||||
MAC_CERTIFICATE_PASSWORD: ${{ secrets.MAC_CERTIFICATE_PASSWORD }}
|
||||
run: |
|
||||
# Create a temporary keychain
|
||||
KEYCHAIN_PATH=$RUNNER_TEMP/build.keychain-db
|
||||
KEYCHAIN_PASSWORD=$(openssl rand -base64 32)
|
||||
|
||||
# Create and configure keychain
|
||||
security create-keychain -p "$KEYCHAIN_PASSWORD" "$KEYCHAIN_PATH"
|
||||
security set-keychain-settings -lut 21600 "$KEYCHAIN_PATH"
|
||||
security unlock-keychain -p "$KEYCHAIN_PASSWORD" "$KEYCHAIN_PATH"
|
||||
|
||||
# Decode and import certificate
|
||||
echo "$MAC_CERTIFICATE_P12" | base64 --decode > $RUNNER_TEMP/certificate.p12
|
||||
security import $RUNNER_TEMP/certificate.p12 \
|
||||
-k "$KEYCHAIN_PATH" \
|
||||
-P "$MAC_CERTIFICATE_PASSWORD" \
|
||||
-T /usr/bin/codesign \
|
||||
-T /usr/bin/security
|
||||
|
||||
# Allow codesign to access the keychain without UI prompt
|
||||
security set-key-partition-list -S apple-tool:,apple: -k "$KEYCHAIN_PASSWORD" "$KEYCHAIN_PATH"
|
||||
|
||||
# Add to keychain search path (makes it the default)
|
||||
security list-keychains -d user -s "$KEYCHAIN_PATH" $(security list-keychains -d user | tr -d '"')
|
||||
|
||||
# Verify the identity is available
|
||||
security find-identity -v -p codesigning "$KEYCHAIN_PATH"
|
||||
|
||||
# Clean up the .p12 file
|
||||
rm -f $RUNNER_TEMP/certificate.p12
|
||||
|
||||
# ─── Build Vite + Electron ────────────────────────────────
|
||||
- name: Build Vite + Electron
|
||||
run: npx tsc && npx vite build
|
||||
|
||||
# ─── Package with electron-builder ────────────────────────
|
||||
# electron-builder handles deep codesigning the .app bundle
|
||||
# "notarize: false" in electron-builder.json5 prevents it from
|
||||
# trying its own notarization flow
|
||||
- name: Package .app bundle
|
||||
run: npx electron-builder --mac --${{ matrix.arch }} --dir
|
||||
env:
|
||||
CSC_NAME: "Samir Patil (N26FZ4GW28)"
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# ─── Read version from package.json ───────────────────────
|
||||
- name: Get version
|
||||
id: version
|
||||
run: echo "version=$(node -p 'require(\"./package.json\").version')" >> $GITHUB_OUTPUT
|
||||
|
||||
# ─── Locate the .app bundle ───────────────────────────────
|
||||
- name: Find .app bundle
|
||||
id: find_app
|
||||
run: |
|
||||
VERSION="${{ steps.version.outputs.version }}"
|
||||
echo "=== Release directory contents ==="
|
||||
ls -laR "release/${VERSION}/" || echo "release/${VERSION}/ not found"
|
||||
echo "=== Searching for .app bundle ==="
|
||||
APP_BUNDLE=$(find "release/${VERSION}" -maxdepth 4 -name "*.app" -type d | head -n1)
|
||||
if [ -z "$APP_BUNDLE" ]; then
|
||||
echo "::error::No .app bundle found in release/${VERSION}/"
|
||||
exit 1
|
||||
fi
|
||||
echo "app_bundle=$APP_BUNDLE" >> $GITHUB_OUTPUT
|
||||
echo "Found: $APP_BUNDLE"
|
||||
|
||||
# ─── Verify .app signature ────────────────────────────────
|
||||
- name: Verify .app code signature
|
||||
run: codesign --verify --deep --strict "${{ steps.find_app.outputs.app_bundle }}"
|
||||
|
||||
# ─── Create DMG ───────────────────────────────────────────
|
||||
- name: Create DMG
|
||||
id: dmg
|
||||
run: |
|
||||
VERSION="${{ steps.version.outputs.version }}"
|
||||
ARCH="${{ matrix.arch }}"
|
||||
DMG_NAME="Openscreen-Mac-${ARCH}-${VERSION}.dmg"
|
||||
RELEASE_DIR="release/${VERSION}"
|
||||
DMG_OUTPUT="${RELEASE_DIR}/${DMG_NAME}"
|
||||
STAGING="${RELEASE_DIR}/dmg-staging"
|
||||
|
||||
mkdir -p "$STAGING"
|
||||
cp -R "${{ steps.find_app.outputs.app_bundle }}" "$STAGING/"
|
||||
ln -s /Applications "$STAGING/Applications"
|
||||
|
||||
hdiutil create \
|
||||
-srcfolder "$STAGING" \
|
||||
-volname "Openscreen" \
|
||||
-fs HFS+ \
|
||||
-fsargs "-c c=64,a=16,e=16" \
|
||||
-format UDBZ \
|
||||
"$DMG_OUTPUT"
|
||||
|
||||
rm -rf "$STAGING"
|
||||
|
||||
echo "dmg_path=$DMG_OUTPUT" >> $GITHUB_OUTPUT
|
||||
echo "dmg_name=$DMG_NAME" >> $GITHUB_OUTPUT
|
||||
|
||||
# ─── Sign DMG ─────────────────────────────────────────────
|
||||
- name: Sign DMG
|
||||
run: |
|
||||
codesign --force \
|
||||
--sign "Developer ID Application: Samir Patil (N26FZ4GW28)" \
|
||||
--timestamp \
|
||||
"${{ steps.dmg.outputs.dmg_path }}"
|
||||
|
||||
# ─── Notarize DMG ────────────────────────────────────────
|
||||
# On CI we can't use keychain profiles for notarytool, so we
|
||||
# pass credentials directly via env vars / flags
|
||||
- name: Notarize DMG
|
||||
run: |
|
||||
xcrun notarytool submit "${{ steps.dmg.outputs.dmg_path }}" \
|
||||
--apple-id "${{ secrets.APPLE_ID }}" \
|
||||
--team-id "${{ secrets.APPLE_TEAM_ID }}" \
|
||||
--password "${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}" \
|
||||
--wait
|
||||
timeout-minutes: 15
|
||||
|
||||
# ─── Staple ───────────────────────────────────────────────
|
||||
- name: Staple notarization ticket
|
||||
run: xcrun stapler staple "${{ steps.dmg.outputs.dmg_path }}"
|
||||
|
||||
# ─── Validate ─────────────────────────────────────────────
|
||||
- name: Validate stapled DMG
|
||||
run: |
|
||||
xcrun stapler validate "${{ steps.dmg.outputs.dmg_path }}"
|
||||
spctl -a -vv -t install "${{ steps.dmg.outputs.dmg_path }}"
|
||||
|
||||
# ─── Upload Artifact ──────────────────────────────────────
|
||||
- name: Upload notarized DMG
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: openscreen-mac-${{ matrix.arch }}
|
||||
path: ${{ steps.dmg.outputs.dmg_path }}
|
||||
retention-days: 30
|
||||
|
||||
# ─── Cleanup Keychain ─────────────────────────────────────
|
||||
- name: Cleanup keychain
|
||||
if: always()
|
||||
run: security delete-keychain $RUNNER_TEMP/build.keychain-db || true
|
||||
|
||||
build-linux:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: '22'
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
# bsdtar (from libarchive-tools) is required by fpm to build pacman
|
||||
# packages. AppImage and deb don't need it; ubuntu-latest doesn't ship it.
|
||||
- name: Install pacman build dependencies
|
||||
run: sudo apt-get update && sudo apt-get install -y libarchive-tools
|
||||
|
||||
- name: Build Linux app
|
||||
run: npm run build:linux
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Upload Linux build
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: linux-installer
|
||||
path: |
|
||||
release/**/*.AppImage
|
||||
release/**/*.zsync
|
||||
release/**/*.deb
|
||||
release/**/*.pacman
|
||||
retention-days: 30
|
||||
@@ -0,0 +1,118 @@
|
||||
name: Bump Nix package on release
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: "Release tag to bump (e.g. v1.5.0)"
|
||||
required: true
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
bump:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' || !github.event.release.prerelease
|
||||
steps:
|
||||
- name: Resolve tag and version
|
||||
id: meta
|
||||
env:
|
||||
GH_EVENT_TAG: ${{ github.event.release.tag_name }}
|
||||
INPUT_TAG: ${{ inputs.tag }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
TAG="${GH_EVENT_TAG:-$INPUT_TAG}"
|
||||
if [[ -z "$TAG" ]]; then
|
||||
echo "::error::No tag resolved from release event or workflow input"
|
||||
exit 1
|
||||
fi
|
||||
VERSION="${TAG#v}"
|
||||
BRANCH="chore/bump-nix-${VERSION}"
|
||||
echo "tag=$TAG" >> "$GITHUB_OUTPUT"
|
||||
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
|
||||
echo "branch=$BRANCH" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Checkout main
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: main
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install Nix
|
||||
uses: cachix/install-nix-action@v27
|
||||
with:
|
||||
nix_path: nixpkgs=channel:nixos-unstable
|
||||
extra_nix_config: |
|
||||
experimental-features = nix-command flakes
|
||||
|
||||
- name: Compute npmDepsHash
|
||||
id: hash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
HASH=$(nix run nixpkgs#prefetch-npm-deps -- package-lock.json)
|
||||
if [[ -z "$HASH" ]]; then
|
||||
echo "::error::prefetch-npm-deps returned an empty hash"
|
||||
exit 1
|
||||
fi
|
||||
echo "hash=$HASH" >> "$GITHUB_OUTPUT"
|
||||
echo "Computed npmDepsHash: $HASH"
|
||||
|
||||
- name: Update nix/package.nix
|
||||
env:
|
||||
VERSION: ${{ steps.meta.outputs.version }}
|
||||
HASH: ${{ steps.hash.outputs.hash }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Update version line: ` version = "<anything>";`
|
||||
sed -i -E "s|^([[:space:]]*version[[:space:]]*=[[:space:]]*)\"[^\"]*\";|\1\"${VERSION}\";|" nix/package.nix
|
||||
# Update npmDepsHash line: ` npmDepsHash = "<anything>";`
|
||||
sed -i -E "s|^([[:space:]]*npmDepsHash[[:space:]]*=[[:space:]]*)\"[^\"]*\";|\1\"${HASH}\";|" nix/package.nix
|
||||
|
||||
echo "=== diff ==="
|
||||
git --no-pager diff nix/package.nix || true
|
||||
|
||||
- name: Create PR
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
VERSION: ${{ steps.meta.outputs.version }}
|
||||
HASH: ${{ steps.hash.outputs.hash }}
|
||||
BRANCH: ${{ steps.meta.outputs.branch }}
|
||||
TAG: ${{ steps.meta.outputs.tag }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
if git diff --quiet -- nix/package.nix; then
|
||||
echo "nix/package.nix already at v${VERSION} with this hash — nothing to do."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
|
||||
# Replace any prior bump branch to keep the workflow idempotent.
|
||||
git push origin --delete "$BRANCH" 2>/dev/null || true
|
||||
git checkout -b "$BRANCH"
|
||||
git add nix/package.nix
|
||||
git commit -m "chore: bump nix package to v${VERSION}"
|
||||
git push -u origin "$BRANCH"
|
||||
|
||||
gh pr create \
|
||||
--title "chore: bump nix package to v${VERSION}" \
|
||||
--base main \
|
||||
--head "$BRANCH" \
|
||||
--body "$(cat <<EOF
|
||||
Automated bump triggered by release \`${TAG}\`.
|
||||
|
||||
- \`version\` → \`${VERSION}\`
|
||||
- \`npmDepsHash\` → \`${HASH}\` (computed via \`prefetch-npm-deps package-lock.json\`)
|
||||
|
||||
Merge this so Nix users (NixOS, Home Manager, \`nix run github:siddharthvaddem/openscreen\`) pick up the new release.
|
||||
|
||||
> Note: PRs opened by \`GITHUB_TOKEN\` don't auto-trigger CI. The diff is two lines — review the change here, then merge. If you want CI to run, push an empty commit to this branch or close-and-reopen the PR.
|
||||
EOF
|
||||
)"
|
||||
@@ -0,0 +1,58 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
name: Lint
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
cache: npm
|
||||
- run: npm ci
|
||||
- run: npm run lint
|
||||
|
||||
typecheck:
|
||||
name: Type Check
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
cache: npm
|
||||
- run: npm ci
|
||||
- run: npx tsc --noEmit
|
||||
|
||||
test:
|
||||
name: Test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
cache: npm
|
||||
- run: npm ci
|
||||
- run: npm run test
|
||||
- run: npm run test:browser:install
|
||||
- run: npm run test:browser
|
||||
|
||||
build:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
cache: npm
|
||||
- run: npm ci
|
||||
- run: npx vite build
|
||||
@@ -0,0 +1,519 @@
|
||||
name: PR to Discord Forum
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened, reopened, ready_for_review, converted_to_draft, synchronize, edited, labeled, unlabeled, closed]
|
||||
pull_request_review:
|
||||
types: [submitted]
|
||||
issue_comment:
|
||||
types: [created]
|
||||
schedule:
|
||||
- cron: "0 12 * * 1"
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
issues: read
|
||||
|
||||
jobs:
|
||||
notify:
|
||||
if: github.event_name != 'schedule' && github.actor != 'github-actions[bot]'
|
||||
concurrency:
|
||||
group: discord-pr-sync-${{ github.repository }}-${{ github.event.pull_request.number || github.event.issue.number || github.run_id }}
|
||||
cancel-in-progress: false
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Sync PR activity to Discord forum thread
|
||||
id: sync
|
||||
uses: actions/github-script@v7
|
||||
env:
|
||||
DISCORD_WEBHOOK_URL: ${{ secrets.DISCORD_WEBHOOK_URL }}
|
||||
DISCORD_PR_FORUM_WEBHOOK: ${{ secrets.DISCORD_PR_FORUM_WEBHOOK }}
|
||||
DISCORD_WEBHOOK_USERNAME: ${{ secrets.DISCORD_WEBHOOK_USERNAME }}
|
||||
DISCORD_WEBHOOK_AVATAR_URL: ${{ secrets.DISCORD_WEBHOOK_AVATAR_URL }}
|
||||
DISCORD_BOT_TOKEN: ${{ secrets.DISCORD_BOT_TOKEN }}
|
||||
DISCORD_REVIEWER_ROLE_ID: ${{ secrets.DISCORD_REVIEWER_ROLE_ID }}
|
||||
DISCORD_ALERT_WEBHOOK_URL: ${{ secrets.DISCORD_ALERT_WEBHOOK_URL }}
|
||||
with:
|
||||
script: |
|
||||
const WEBHOOK_USERNAME = (process.env.DISCORD_WEBHOOK_USERNAME || "OpenScreen").trim();
|
||||
const WEBHOOK_AVATAR = (process.env.DISCORD_WEBHOOK_AVATAR_URL || "").trim();
|
||||
|
||||
const THREAD_MARKER_REGEX = /<!--\s*discord-thread-id:(\d+)\s*-->/i;
|
||||
const webhookUrl = (process.env.DISCORD_WEBHOOK_URL || process.env.DISCORD_PR_FORUM_WEBHOOK || "").trim();
|
||||
const botToken = (process.env.DISCORD_BOT_TOKEN || "").trim();
|
||||
const reviewerRoleId = (process.env.DISCORD_REVIEWER_ROLE_ID || "").trim();
|
||||
const alertWebhookUrl = (process.env.DISCORD_ALERT_WEBHOOK_URL || "").trim();
|
||||
|
||||
const TAGS = {
|
||||
open: "1493976692967080096",
|
||||
draft: "1493976782028935279",
|
||||
ready: "1493976833626996756",
|
||||
changes: "1493976909875515564",
|
||||
approved: "1493976951038152764",
|
||||
merged: "1493977049709281320",
|
||||
closed: "1493977108102516786",
|
||||
};
|
||||
|
||||
const labelTagMap = {
|
||||
bug: "1493977562773458975",
|
||||
enhancement: "1493977619216207993",
|
||||
documentation: "1493978565153394830",
|
||||
};
|
||||
|
||||
function cleanDescription(text, maxLen = 3500) {
|
||||
if (!text) return "No description provided.";
|
||||
const normalized = text
|
||||
.replace(/\r\n/g, "\n")
|
||||
.replace(/\n{3,}/g, "\n\n")
|
||||
.trim();
|
||||
if (normalized.length <= maxLen) return normalized;
|
||||
return `${normalized.slice(0, maxLen - 1)}…`;
|
||||
}
|
||||
|
||||
function trimThreadName(name) {
|
||||
return name.length > 95 ? name.slice(0, 95) : name;
|
||||
}
|
||||
|
||||
function extractThreadId(body) {
|
||||
if (!body) return null;
|
||||
const match = body.match(THREAD_MARKER_REGEX);
|
||||
return match ? match[1] : null;
|
||||
}
|
||||
|
||||
function upsertThreadMarker(body, threadId) {
|
||||
const cleaned = (body || "").replace(THREAD_MARKER_REGEX, "").trim();
|
||||
return `${cleaned}\n\n<!-- discord-thread-id:${threadId} -->`.trim();
|
||||
}
|
||||
|
||||
async function discordPost(payload, options = {}) {
|
||||
const endpoint = new URL(webhookUrl);
|
||||
endpoint.searchParams.set("wait", "true");
|
||||
if (options.threadId) endpoint.searchParams.set("thread_id", String(options.threadId));
|
||||
|
||||
const response = await fetch(endpoint.toString(), {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
username: WEBHOOK_USERNAME,
|
||||
avatar_url: WEBHOOK_AVATAR,
|
||||
allowed_mentions: { parse: [] },
|
||||
...payload,
|
||||
})
|
||||
});
|
||||
|
||||
const contentType = (response.headers.get("content-type") || "").toLowerCase();
|
||||
const text = await response.text();
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Discord API error ${response.status}: ${text}`);
|
||||
}
|
||||
|
||||
if (!text) return {};
|
||||
if (contentType.includes("application/json")) return JSON.parse(text);
|
||||
|
||||
// Some proxy/CDN edge responses may return HTML with 2xx; avoid crashing on JSON parse.
|
||||
core.warning(`Discord webhook returned non-JSON response (content-type: ${contentType || "unknown"}).`);
|
||||
return {};
|
||||
}
|
||||
|
||||
async function patchDiscordThread(threadId, patchBody) {
|
||||
if (!botToken || !threadId) return;
|
||||
const response = await fetch(`https://discord.com/api/v10/channels/${threadId}`, {
|
||||
method: "PATCH",
|
||||
headers: {
|
||||
"Authorization": `Bot ${botToken}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify(patchBody),
|
||||
});
|
||||
if (!response.ok) {
|
||||
const text = await response.text();
|
||||
core.warning(`Discord thread patch failed (${response.status}): ${text}`);
|
||||
}
|
||||
}
|
||||
|
||||
function desiredStatusTag(prState) {
|
||||
if (prState.merged && TAGS.merged) return TAGS.merged;
|
||||
if (prState.closed && !prState.merged && TAGS.closed) return TAGS.closed;
|
||||
if (prState.reviewState === "CHANGES_REQUESTED" && TAGS.changes) return TAGS.changes;
|
||||
if (prState.reviewState === "APPROVED" && TAGS.approved) return TAGS.approved;
|
||||
if (prState.draft && TAGS.draft) return TAGS.draft;
|
||||
if (!prState.draft && TAGS.ready) return TAGS.ready;
|
||||
return TAGS.open || null;
|
||||
}
|
||||
|
||||
function tagIdsFromLabels(labels) {
|
||||
const out = [];
|
||||
for (const label of labels) {
|
||||
const mapped = labelTagMap[label.toLowerCase()] || labelTagMap[label];
|
||||
if (mapped) out.push(String(mapped));
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
async function getPullRequest() {
|
||||
if (context.eventName === "pull_request_target" || context.eventName === "pull_request_review") {
|
||||
return context.payload.pull_request || null;
|
||||
}
|
||||
if (context.eventName === "issue_comment") {
|
||||
const issue = context.payload.issue;
|
||||
if (!issue?.pull_request) return null;
|
||||
const { data } = await github.rest.pulls.get({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
pull_number: issue.number,
|
||||
});
|
||||
return data;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function getReviewState(owner, repo, pullNumber) {
|
||||
const { data } = await github.rest.pulls.listReviews({ owner, repo, pull_number: pullNumber, per_page: 100 });
|
||||
let hasChanges = false;
|
||||
let hasApproved = false;
|
||||
for (const r of data) {
|
||||
const s = (r.state || "").toUpperCase();
|
||||
if (s === "CHANGES_REQUESTED") hasChanges = true;
|
||||
if (s === "APPROVED") hasApproved = true;
|
||||
}
|
||||
if (hasChanges) return "CHANGES_REQUESTED";
|
||||
if (hasApproved) return "APPROVED";
|
||||
return "NONE";
|
||||
}
|
||||
|
||||
async function sendFailureAlert(message) {
|
||||
if (!alertWebhookUrl) return;
|
||||
try {
|
||||
await fetch(alertWebhookUrl, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
username: "OpenScreen",
|
||||
avatar_url: WEBHOOK_AVATAR,
|
||||
content: `⚠️ PR Discord sync failed\n${message}\nRun: ${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`,
|
||||
allowed_mentions: { parse: [] }
|
||||
})
|
||||
});
|
||||
} catch {
|
||||
core.warning("Failed to send failure alert webhook.");
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const pr = await getPullRequest();
|
||||
if (!pr) {
|
||||
core.info("No PR context found. Skipping.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!webhookUrl) {
|
||||
const strictEvents = new Set(["pull_request_target", "workflow_dispatch"]);
|
||||
const msg =
|
||||
`Discord sync skipped: webhook secret unavailable for event '${context.eventName}'. ` +
|
||||
"Set either DISCORD_WEBHOOK_URL or DISCORD_PR_FORUM_WEBHOOK in repository secrets.";
|
||||
if (strictEvents.has(context.eventName)) {
|
||||
core.setFailed(msg);
|
||||
} else {
|
||||
core.warning(msg);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const action = context.payload.action || "";
|
||||
const owner = context.repo.owner;
|
||||
const repo = context.repo.repo;
|
||||
const number = pr.number;
|
||||
const title = pr.title;
|
||||
const author = pr.user?.login || "unknown";
|
||||
const url = pr.html_url;
|
||||
const authorUrl = pr.user?.html_url || "";
|
||||
const authorAvatar = pr.user?.avatar_url || "";
|
||||
const base = pr.base?.ref || "";
|
||||
const head = pr.head?.ref || "";
|
||||
const repoFullName = pr.base?.repo?.full_name || `${owner}/${repo}`;
|
||||
const labels = (pr.labels || []).map((l) => l.name);
|
||||
const body = (pr.body || "").trim();
|
||||
const reviewState = await getReviewState(owner, repo, number);
|
||||
|
||||
let threadId = extractThreadId(body);
|
||||
const shouldCreateThread =
|
||||
context.eventName === "pull_request_target" &&
|
||||
["opened", "reopened", "ready_for_review"].includes(action) &&
|
||||
!threadId;
|
||||
|
||||
if (shouldCreateThread) {
|
||||
const fields = [
|
||||
{ name: "PR", value: `[#${number}](${url})`, inline: true },
|
||||
{ name: "Author", value: `[${author}](${authorUrl || url})`, inline: true },
|
||||
{ name: "Status", value: pr.draft ? "Draft" : "Open", inline: true },
|
||||
{ name: "Branches", value: `\`${head}\` -> \`${base}\``, inline: true },
|
||||
{ name: "Changes", value: `+${pr.additions} / -${pr.deletions}`, inline: true },
|
||||
{ name: "Files Changed", value: String(pr.changed_files), inline: true }
|
||||
];
|
||||
|
||||
if (labels.length) {
|
||||
fields.push({
|
||||
name: "Labels",
|
||||
value: labels.map((l) => `\`${l}\``).join(" "),
|
||||
inline: false,
|
||||
});
|
||||
}
|
||||
|
||||
const statusTag = desiredStatusTag({ draft: pr.draft, reviewState, merged: false, closed: false });
|
||||
const mappedLabelTags = tagIdsFromLabels(labels);
|
||||
const appliedTags = [...new Set([statusTag, ...mappedLabelTags].filter(Boolean))];
|
||||
|
||||
const createPayload = {
|
||||
content: action === "ready_for_review" ? "🔔 PR is now ready for review" : "🔔 New pull request opened",
|
||||
thread_name: trimThreadName(`PR #${number} - ${title}`),
|
||||
applied_tags: appliedTags,
|
||||
embeds: [
|
||||
{
|
||||
title: `PR #${number}: ${title}`,
|
||||
url,
|
||||
description: cleanDescription(body),
|
||||
color: pr.draft ? 15105570 : 1998671,
|
||||
author: {
|
||||
name: author,
|
||||
url: authorUrl || undefined,
|
||||
icon_url: authorAvatar || undefined,
|
||||
},
|
||||
fields,
|
||||
footer: { text: repoFullName },
|
||||
timestamp: new Date().toISOString(),
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const result = await discordPost(createPayload);
|
||||
const createdThreadId = result.channel_id || null;
|
||||
if (createdThreadId) {
|
||||
const updatedBody = upsertThreadMarker(body, createdThreadId);
|
||||
await github.rest.pulls.update({ owner, repo, pull_number: number, body: updatedBody });
|
||||
core.info(`Created Discord thread ${createdThreadId} and stored mapping.`);
|
||||
} else {
|
||||
core.warning("Discord thread created but channel_id missing in response.");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (!threadId) {
|
||||
core.info("No mapped Discord thread ID found; skipping update event.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (context.eventName === "pull_request_target" && ["edited", "labeled", "unlabeled", "ready_for_review", "converted_to_draft"].includes(action)) {
|
||||
const statusTag = desiredStatusTag({
|
||||
draft: action === "converted_to_draft" ? true : pr.draft,
|
||||
reviewState,
|
||||
merged: false,
|
||||
closed: false,
|
||||
});
|
||||
const mappedLabelTags = tagIdsFromLabels(labels);
|
||||
const appliedTags = [...new Set([statusTag, ...mappedLabelTags].filter(Boolean))];
|
||||
await patchDiscordThread(threadId, {
|
||||
name: trimThreadName(`PR #${number} - ${title}`),
|
||||
...(appliedTags.length ? { applied_tags: appliedTags } : {}),
|
||||
});
|
||||
}
|
||||
|
||||
let updateMessage = null;
|
||||
let updateEmbed = null;
|
||||
|
||||
if (context.eventName === "pull_request_target") {
|
||||
if (action === "synchronize") {
|
||||
const { data: commits } = await github.rest.pulls.listCommits({ owner, repo, pull_number: number, per_page: 5 });
|
||||
const list = commits.map((c) => `- \`${c.sha.slice(0, 7)}\` ${c.commit.message.split("\n")[0]}`).join("\n") || "- No commit details";
|
||||
updateMessage = `🧩 New commits pushed to PR #${number}`;
|
||||
updateEmbed = {
|
||||
title: `Commit Update • PR #${number}`,
|
||||
url: `${url}/files`,
|
||||
description: `${list}`,
|
||||
color: 1998671,
|
||||
footer: { text: repoFullName },
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
} else if (action === "edited") {
|
||||
updateMessage = `✏️ PR #${number} details were edited`;
|
||||
updateEmbed = {
|
||||
title: `PR Updated • #${number}`,
|
||||
url,
|
||||
description: cleanDescription(body, 1200),
|
||||
color: 1998671,
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
} else if (action === "closed") {
|
||||
const isMerged = !!pr.merged;
|
||||
const statusTag = desiredStatusTag({ draft: false, reviewState, merged: isMerged, closed: true });
|
||||
const mappedLabelTags = tagIdsFromLabels(labels);
|
||||
const appliedTags = [...new Set([statusTag, ...mappedLabelTags].filter(Boolean))];
|
||||
await patchDiscordThread(threadId, {
|
||||
...(appliedTags.length ? { applied_tags: appliedTags } : {}),
|
||||
...(isMerged ? { archived: true, locked: true } : {}),
|
||||
});
|
||||
|
||||
updateMessage = isMerged
|
||||
? `✅ PR #${number} was merged`
|
||||
: `🛑 PR #${number} was closed without merge`;
|
||||
updateEmbed = {
|
||||
title: isMerged ? `Merged • PR #${number}` : `Closed • PR #${number}`,
|
||||
url,
|
||||
description: isMerged ? "This PR has been merged into the base branch." : "This PR was closed before merge.",
|
||||
color: isMerged ? 5763719 : 15158332,
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
} else if (action === "ready_for_review") {
|
||||
updateMessage = `🚀 PR #${number} moved from draft to ready for review`;
|
||||
if (reviewerRoleId) updateMessage += ` <@&${reviewerRoleId}>`;
|
||||
} else if (action === "converted_to_draft") {
|
||||
updateMessage = `📝 PR #${number} converted to draft`;
|
||||
}
|
||||
} else if (context.eventName === "pull_request_review") {
|
||||
const review = context.payload.review;
|
||||
if (review) {
|
||||
const state = (review.state || "commented").toUpperCase();
|
||||
const reviewer = review.user?.login || "reviewer";
|
||||
updateMessage = `🧪 Review ${state} by **${reviewer}** on PR #${number}`;
|
||||
if (state === "CHANGES_REQUESTED" && reviewerRoleId) updateMessage += ` <@&${reviewerRoleId}>`;
|
||||
updateEmbed = {
|
||||
title: `Review ${state} • PR #${number}`,
|
||||
url: review.html_url || url,
|
||||
description: cleanDescription(review.body || "No review note.", 1000),
|
||||
color: state === "APPROVED" ? 5763719 : state === "CHANGES_REQUESTED" ? 15158332 : 1998671,
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
|
||||
if (state === "CHANGES_REQUESTED" || state === "APPROVED") {
|
||||
const statusTag = desiredStatusTag({ draft: pr.draft, reviewState: state, merged: false, closed: false });
|
||||
const mappedLabelTags = tagIdsFromLabels(labels);
|
||||
const appliedTags = [...new Set([statusTag, ...mappedLabelTags].filter(Boolean))];
|
||||
await patchDiscordThread(threadId, {
|
||||
...(appliedTags.length ? { applied_tags: appliedTags } : {}),
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if (context.eventName === "issue_comment") {
|
||||
const comment = context.payload.comment;
|
||||
if (comment) {
|
||||
const commenter = comment.user?.login || "user";
|
||||
updateMessage = `💬 New comment by **${commenter}** on PR #${number}`;
|
||||
updateEmbed = {
|
||||
title: `New PR Comment • #${number}`,
|
||||
url: comment.html_url || url,
|
||||
description: cleanDescription(comment.body || "No comment body.", 1000),
|
||||
color: 1998671,
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (!updateMessage && !updateEmbed) {
|
||||
core.info("No Discord update message for this event/action. Skipping.");
|
||||
return;
|
||||
}
|
||||
|
||||
const payload = { content: updateMessage || "" };
|
||||
if (updateEmbed) payload.embeds = [updateEmbed];
|
||||
await discordPost(payload, { threadId });
|
||||
core.info(`Posted update to Discord thread ${threadId}.`);
|
||||
} catch (err) {
|
||||
const msg = err && err.message ? err.message : String(err);
|
||||
core.setFailed(msg);
|
||||
|
||||
const alertWebhook = process.env.DISCORD_ALERT_WEBHOOK_URL;
|
||||
if (alertWebhook) {
|
||||
try {
|
||||
await fetch(alertWebhook, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
username: "OpenScreen",
|
||||
avatar_url: WEBHOOK_AVATAR,
|
||||
content: `⚠️ PR->Discord sync failed\n${msg}\nRun: ${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`,
|
||||
allowed_mentions: { parse: [] }
|
||||
})
|
||||
});
|
||||
} catch {
|
||||
core.warning("Failed to send alert webhook.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
weekly-contributor-leaderboard:
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Post weekly contributor leaderboard
|
||||
uses: actions/github-script@v7
|
||||
env:
|
||||
DISCORD_SPOTLIGHT_WEBHOOK_URL: ${{ secrets.DISCORD_SPOTLIGHT_WEBHOOK_URL }}
|
||||
DISCORD_WEBHOOK_USERNAME: ${{ secrets.DISCORD_WEBHOOK_USERNAME }}
|
||||
DISCORD_WEBHOOK_AVATAR_URL: ${{ secrets.DISCORD_WEBHOOK_AVATAR_URL }}
|
||||
with:
|
||||
script: |
|
||||
const spotlightWebhook = (process.env.DISCORD_SPOTLIGHT_WEBHOOK_URL || "").trim();
|
||||
const webhookUsername = (process.env.DISCORD_WEBHOOK_USERNAME || "OpenScreen").trim();
|
||||
const webhookAvatar = (process.env.DISCORD_WEBHOOK_AVATAR_URL || "").trim();
|
||||
if (!spotlightWebhook) {
|
||||
core.info("DISCORD_SPOTLIGHT_WEBHOOK_URL missing. Skipping leaderboard post.");
|
||||
return;
|
||||
}
|
||||
|
||||
const since = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString();
|
||||
const owner = context.repo.owner;
|
||||
const repo = context.repo.repo;
|
||||
|
||||
const q = `repo:${owner}/${repo} is:pr is:merged merged:>=${since.substring(0, 10)}`;
|
||||
const search = await github.rest.search.issuesAndPullRequests({
|
||||
q,
|
||||
per_page: 100,
|
||||
});
|
||||
|
||||
const counter = new Map();
|
||||
for (const item of search.data.items) {
|
||||
const login = item.user?.login;
|
||||
if (!login) continue;
|
||||
counter.set(login, (counter.get(login) || 0) + 1);
|
||||
}
|
||||
|
||||
const ranked = [...counter.entries()]
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.slice(0, 10);
|
||||
|
||||
const totalMerged = search.data.items.length;
|
||||
const lines = ranked.length
|
||||
? ranked.map(([user, count], idx) => `${idx + 1}. **${user}** - ${count} merged PR(s)`).join("\n")
|
||||
: "No merged PRs this week.";
|
||||
|
||||
const payload = {
|
||||
username: webhookUsername,
|
||||
...(webhookAvatar ? { avatar_url: webhookAvatar } : {}),
|
||||
embeds: [
|
||||
{
|
||||
title: "🌟 Weekly Contributor Leaderboard",
|
||||
description: lines,
|
||||
color: 1998671,
|
||||
fields: [
|
||||
{ name: "Merged PRs (7d)", value: String(totalMerged), inline: true },
|
||||
{ name: "Repository", value: `${owner}/${repo}`, inline: true },
|
||||
{ name: "Period", value: "Last 7 days", inline: true }
|
||||
],
|
||||
timestamp: new Date().toISOString()
|
||||
}
|
||||
],
|
||||
allowed_mentions: { parse: [] }
|
||||
};
|
||||
|
||||
const res = await fetch(`${spotlightWebhook}?wait=true`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(payload)
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const txt = await res.text();
|
||||
core.setFailed(`Leaderboard post failed ${res.status}: ${txt}`);
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
name: Publish release to WinGet
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [released]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: "Release tag to publish to winget (e.g. v1.4.0)"
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: windows-latest
|
||||
if: github.event_name == 'workflow_dispatch' || !github.event.release.prerelease
|
||||
steps:
|
||||
- uses: vedantmgoyal9/winget-releaser@v2
|
||||
with:
|
||||
identifier: SiddharthVaddem.OpenScreen
|
||||
# Match the Windows installer asset attached to each release.
|
||||
# Today: "Openscreen.Setup.latest.exe". Adjust this regex if you
|
||||
# ever rename the installer to include a version (e.g. "Setup\.\d+\.\d+\.\d+\.exe").
|
||||
installers-regex: 'Setup\..*\.exe$'
|
||||
release-tag: ${{ inputs.tag || github.event.release.tag_name }}
|
||||
token: ${{ secrets.WINGET_ACC_TOKEN }}
|
||||
@@ -0,0 +1,168 @@
|
||||
name: Update Homebrew Cask
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: "Release tag to publish to the tap (e.g. v1.4.0)"
|
||||
required: true
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
update-cask:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' || !github.event.release.prerelease
|
||||
env:
|
||||
TAP_OWNER: siddharthvaddem
|
||||
TAP_REPO: homebrew-openscreen
|
||||
CASK_NAME: openscreen
|
||||
steps:
|
||||
- name: Resolve tag and version
|
||||
id: meta
|
||||
env:
|
||||
GH_EVENT_TAG: ${{ github.event.release.tag_name }}
|
||||
INPUT_TAG: ${{ inputs.tag }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
TAG="${GH_EVENT_TAG:-$INPUT_TAG}"
|
||||
if [[ -z "$TAG" ]]; then
|
||||
echo "::error::No tag resolved from release event or workflow input"
|
||||
exit 1
|
||||
fi
|
||||
VERSION="${TAG#v}"
|
||||
echo "tag=$TAG" >> "$GITHUB_OUTPUT"
|
||||
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Find macOS DMG assets
|
||||
id: assets
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
TAG: ${{ steps.meta.outputs.tag }}
|
||||
REPO: ${{ github.repository }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
NAMES=$(gh release view "$TAG" --repo "$REPO" --json assets --jq '.assets[].name')
|
||||
|
||||
# arm64 DMG: explicit "arm64" / "apple silicon" / fallback to any .dmg
|
||||
# whose name does NOT contain "x64" or non-mac platform markers.
|
||||
ARM_NAME=$(echo "$NAMES" | grep -iE '\.dmg$' \
|
||||
| grep -iE '(arm64|apple[-_. ]?silicon)' | head -n1 || true)
|
||||
if [[ -z "$ARM_NAME" ]]; then
|
||||
ARM_NAME=$(echo "$NAMES" | grep -iE '\.dmg$' \
|
||||
| grep -iv 'x64' | grep -iv 'linux' | grep -iv 'win' | head -n1 || true)
|
||||
fi
|
||||
|
||||
# x64 DMG
|
||||
X64_NAME=$(echo "$NAMES" | grep -iE '\.dmg$' \
|
||||
| grep -iE '(x64|x86[-_]?64|intel)' | head -n1 || true)
|
||||
|
||||
if [[ -z "$ARM_NAME" || -z "$X64_NAME" ]]; then
|
||||
echo "::error::Could not locate both arm64 and x64 DMGs in release assets"
|
||||
echo "Available assets:"
|
||||
echo "$NAMES"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "arm_name=$ARM_NAME" >> "$GITHUB_OUTPUT"
|
||||
echo "x64_name=$X64_NAME" >> "$GITHUB_OUTPUT"
|
||||
echo "Found arm64 asset: $ARM_NAME"
|
||||
echo "Found x64 asset: $X64_NAME"
|
||||
|
||||
- name: Download DMGs and compute sha256
|
||||
id: shas
|
||||
env:
|
||||
REPO: ${{ github.repository }}
|
||||
TAG: ${{ steps.meta.outputs.tag }}
|
||||
ARM_NAME: ${{ steps.assets.outputs.arm_name }}
|
||||
X64_NAME: ${{ steps.assets.outputs.x64_name }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
BASE="https://github.com/${REPO}/releases/download/${TAG}"
|
||||
curl -fsSL --retry 3 -o /tmp/arm.dmg "${BASE}/${ARM_NAME}"
|
||||
curl -fsSL --retry 3 -o /tmp/x64.dmg "${BASE}/${X64_NAME}"
|
||||
ARM_SHA=$(sha256sum /tmp/arm.dmg | awk '{print $1}')
|
||||
X64_SHA=$(sha256sum /tmp/x64.dmg | awk '{print $1}')
|
||||
echo "arm_sha=$ARM_SHA" >> "$GITHUB_OUTPUT"
|
||||
echo "x64_sha=$X64_SHA" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Checkout tap
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: ${{ env.TAP_OWNER }}/${{ env.TAP_REPO }}
|
||||
token: ${{ secrets.HOMEBREW_TAP_TOKEN }}
|
||||
path: tap
|
||||
|
||||
- name: Write cask file
|
||||
env:
|
||||
REPO: ${{ github.repository }}
|
||||
TAG: ${{ steps.meta.outputs.tag }}
|
||||
VERSION: ${{ steps.meta.outputs.version }}
|
||||
ARM_NAME: ${{ steps.assets.outputs.arm_name }}
|
||||
X64_NAME: ${{ steps.assets.outputs.x64_name }}
|
||||
ARM_SHA: ${{ steps.shas.outputs.arm_sha }}
|
||||
X64_SHA: ${{ steps.shas.outputs.x64_sha }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
mkdir -p tap/Casks
|
||||
BASE="https://github.com/${REPO}/releases/download/${TAG}"
|
||||
|
||||
# #{version} is Ruby interpolation written literally to the cask
|
||||
# file (bash heredoc leaves "#{...}" alone). \${VERSION}, \${ARM_SHA},
|
||||
# etc. are bash variables expanded by the heredoc. The literal
|
||||
# #{version} fixes Homebrew's "URL is unversioned" audit warning by
|
||||
# making the version string statically detectable.
|
||||
cat > "tap/Casks/${CASK_NAME}.rb" <<EOF
|
||||
cask "${CASK_NAME}" do
|
||||
version "${VERSION}"
|
||||
|
||||
on_arm do
|
||||
sha256 "${ARM_SHA}"
|
||||
|
||||
url "https://github.com/${REPO}/releases/download/v#{version}/${ARM_NAME}"
|
||||
end
|
||||
on_intel do
|
||||
sha256 "${X64_SHA}"
|
||||
|
||||
url "https://github.com/${REPO}/releases/download/v#{version}/${X64_NAME}"
|
||||
end
|
||||
|
||||
name "Openscreen"
|
||||
desc "Screen recorder and video editor"
|
||||
homepage "https://github.com/${REPO}"
|
||||
|
||||
auto_updates false
|
||||
depends_on macos: ">= :big_sur"
|
||||
|
||||
app "Openscreen.app"
|
||||
|
||||
zap trash: [
|
||||
"~/Library/Application Support/Openscreen",
|
||||
"~/Library/Caches/com.siddharthvaddem.openscreen",
|
||||
"~/Library/Logs/Openscreen",
|
||||
"~/Library/Preferences/com.siddharthvaddem.openscreen.plist",
|
||||
"~/Library/Saved Application State/com.siddharthvaddem.openscreen.savedState",
|
||||
]
|
||||
end
|
||||
EOF
|
||||
|
||||
- name: Commit and push to tap
|
||||
working-directory: tap
|
||||
env:
|
||||
VERSION: ${{ steps.meta.outputs.version }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
git add "Casks/${CASK_NAME}.rb"
|
||||
if git diff --cached --quiet; then
|
||||
echo "Cask already up to date for ${VERSION} — nothing to commit."
|
||||
exit 0
|
||||
fi
|
||||
git commit -m "Bump ${CASK_NAME} to ${VERSION}"
|
||||
git push
|
||||
@@ -0,0 +1,77 @@
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
pnpm-debug.log*
|
||||
lerna-debug.log*
|
||||
|
||||
node_modules
|
||||
dist
|
||||
dist-electron
|
||||
dist-ssr
|
||||
*.local
|
||||
.env
|
||||
.env.signing.local
|
||||
|
||||
# Native helper build outputs
|
||||
/electron/native/wgc-capture/build/
|
||||
/electron/native/screencapturekit/build/
|
||||
/electron/native/screencapturekit/.build/
|
||||
/electron/native/screencapturekit/.swiftpm/
|
||||
/electron/native/bin/
|
||||
/tools/ocr/build/
|
||||
/tools/ocr/dist/
|
||||
/tools/ocr/models/**/.gitattributes
|
||||
/tools/ocr/models/**/README.md
|
||||
|
||||
# Native macOS generated files
|
||||
DerivedData/
|
||||
*.xcuserstate
|
||||
xcuserdata/
|
||||
|
||||
# Editor directories and files
|
||||
.vscode/*
|
||||
.zed/
|
||||
!.vscode/extensions.json
|
||||
.idea
|
||||
.DS_Store
|
||||
*.suo
|
||||
*.ntvs*
|
||||
*.njsproj
|
||||
*.sln
|
||||
*.sw?
|
||||
release/**
|
||||
*.kiro/
|
||||
.claude/
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
# npx electron-builder --mac --win
|
||||
|
||||
# Playwright
|
||||
test-results
|
||||
playwright-report/
|
||||
|
||||
|
||||
# Vitest browser mode screenshots
|
||||
__screenshots__/
|
||||
|
||||
# shell files
|
||||
/shell.sh
|
||||
# Nix
|
||||
result
|
||||
result-*
|
||||
.direnv/
|
||||
|
||||
#kilocode
|
||||
.kilo/
|
||||
|
||||
#others
|
||||
|
||||
**/*.import
|
||||
|
||||
# Local agent/tooling state
|
||||
/.agent/
|
||||
/.serena/
|
||||
/.venv-ocr-build/
|
||||
@@ -0,0 +1 @@
|
||||
npx lint-staged
|
||||
@@ -0,0 +1,57 @@
|
||||
# Contribution Guidelines
|
||||
|
||||
Thank you for considering contributing to this project! By contributing, you help make this project better for everyone. Please take a moment to review these guidelines to ensure a smooth contribution process.
|
||||
|
||||
## How to Contribute
|
||||
|
||||
1. **Fork the Repository**
|
||||
- Click the "Fork" button at the top right of this repository to create your own copy.
|
||||
|
||||
2. **Clone Your Fork**
|
||||
- Clone your forked repository to your local machine:
|
||||
```bash
|
||||
git clone https://github.com/your-username/openscreen.git
|
||||
```
|
||||
|
||||
3. **Create a New Branch**
|
||||
- Create a branch for your feature or bug fix:
|
||||
```bash
|
||||
git checkout -b feature/your-feature-name
|
||||
```
|
||||
|
||||
4. **Make Changes**
|
||||
- Make your changes.
|
||||
|
||||
5. **Test Your Changes**
|
||||
- Test your changes thoroughly to ensure they work as expected and do not break existing functionality.
|
||||
|
||||
6. **Commit Your Changes**
|
||||
- Commit your changes with a clear and concise commit message:
|
||||
```bash
|
||||
git add .
|
||||
git commit -m "Add a brief description of your changes"
|
||||
```
|
||||
|
||||
7. **Push Your Changes**
|
||||
- Push your branch to your forked repository:
|
||||
```bash
|
||||
git push origin feature/your-feature-name
|
||||
```
|
||||
|
||||
8. **Open a Pull Request**
|
||||
- Go to the original repository and open a pull request from your branch. Provide a clear description of your changes and the problem they solve.
|
||||
|
||||
## Reporting Issues
|
||||
|
||||
If you encounter a bug or have a feature request, please open an issue in the [Issues](https://github.com/siddharthvaddem/openscreen/issues) section of this repository. Provide as much detail as possible to help us address the issue effectively.
|
||||
|
||||
## Style Guide
|
||||
|
||||
- Write clear, concise, and descriptive commit messages.
|
||||
- Include comments where necessary to explain complex code.
|
||||
|
||||
## License
|
||||
|
||||
By contributing to this project, you agree that your contributions will be licensed under the [MIT License](./LICENSE).
|
||||
|
||||
Thank you for your contributions!
|
||||
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 Siddharth Vaddem
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -0,0 +1,190 @@
|
||||
> [!WARNING]
|
||||
> This started as a side project that took off — it's not production grade and you'll hit bugs, but hopefully it covers what you need.
|
||||
|
||||
<p align="center">
|
||||
<img src="public/openscreen.png" alt="OpenScreen Logo" width="64" />
|
||||
<br />
|
||||
<br />
|
||||
<a href="https://trendshift.io/repositories/17427" target="_blank"><img src="https://trendshift.io/api/badge/repositories/17427" alt="siddharthvaddem%2Fopenscreen | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
||||
<br />
|
||||
<br />
|
||||
<a href="https://deepwiki.com/siddharthvaddem/openscreen">
|
||||
<img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki" />
|
||||
</a>
|
||||
|
||||
<a href="https://discord.gg/yAQQhRaEeg">
|
||||
<img src="https://dcbadge.limes.pink/api/server/https://discord.gg/yAQQhRaEeg?style=flat" alt="Join Discord" />
|
||||
</a>
|
||||
</p>
|
||||
|
||||
# <p align="center">OpenScreen</p>
|
||||
|
||||
<p align="center"><strong>OpenScreen is your free, open-source alternative to Screen Studio (sort of).</strong></p>
|
||||
|
||||
If you don't want to pay $29/month for Screen Studio but want a much simpler version that does what most people seem to need - quick, polished product demos and walkthroughs you'd post on X, Reddit. OpenScreen does not offer all Screen Studio features, but covers the basics well!
|
||||
|
||||
Screen Studio is an awesome product and this is definitely not a 1:1 clone. OpenScreen is a much simpler take, just the basics for folks who want control and don't want to pay. If you need all the fancy features, your best bet is to support Screen Studio (they really do a great job, haha). But if you just want something free (no gotchas) and open, this project does the job!
|
||||
|
||||
**100% free** for both **personal** and **commercial** use. Use it, modify it, distribute it — just be cool 😁 and shout out the project if you feel like it.
|
||||
|
||||
<p align="center">
|
||||
<img src="public/preview3.png" alt="OpenScreen App Preview 3" style="height: 0.2467; margin-right: 12px;" />
|
||||
<img src="public/preview4.png" alt="OpenScreen App Preview 4" style="height: 0.1678; margin-right: 12px;" />
|
||||
</p>
|
||||
|
||||
## Core Features
|
||||
- Record a specific window, region, or your whole screen.
|
||||
- Record microphone and system audio.
|
||||
- Webcam overlay with picture-in-picture, drag-to-position, and shape options.
|
||||
- Auto or manual zooms with adjustable depth, duration, easing, and pixel-precise position.
|
||||
- Wallpapers, solid colors, gradients, or a custom background.
|
||||
- Motion blur for smoother pan and zoom transitions.
|
||||
- Crop, trim, and per-segment speed control on the timeline.
|
||||
- Blur effects to hide sensitive parts of the screen.
|
||||
- Cursor and click highlighting.
|
||||
- Text, arrow, and image annotations.
|
||||
- Save and reopen projects without re-recording.
|
||||
- Export to MP4 or GIF in multiple aspect ratios and resolutions.
|
||||
- Translated into Arabic, English, Spanish, French, Japanese, Korean, Russian, Turkish, Vietnamese, Simplified Chinese, and Traditional Chinese.
|
||||
|
||||
## Installation
|
||||
|
||||
Download the latest installer for your platform from the [GitHub Releases](https://github.com/siddharthvaddem/openscreen/releases) page.
|
||||
|
||||
### macOS
|
||||
|
||||
The easiest way to install on macOS is via [Homebrew](https://brew.sh):
|
||||
|
||||
```bash
|
||||
brew install --cask siddharthvaddem/openscreen/openscreen
|
||||
```
|
||||
|
||||
Brew automatically picks the right build for Apple Silicon or Intel, and verifies the download against a notarized signature so Gatekeeper won't block it.
|
||||
|
||||
To update later: `brew upgrade --cask openscreen`
|
||||
To uninstall: `brew uninstall --cask openscreen` (add `--zap` to also remove app data)
|
||||
|
||||
#### Manual install (if you prefer)
|
||||
|
||||
If you'd rather grab the `.dmg` directly from the [Releases page](https://github.com/siddharthvaddem/openscreen/releases) and encounter Gatekeeper blocking the app, you can bypass it by running the following command in your terminal after installation:
|
||||
|
||||
```bash
|
||||
xattr -rd com.apple.quarantine /Applications/Openscreen.app
|
||||
```
|
||||
|
||||
Note: Give your terminal Full Disk Access in **System Settings > Privacy & Security** to grant you access and then run the above command.
|
||||
|
||||
After running this command, proceed to **System Preferences > Security & Privacy** to grant the necessary permissions for "screen recording" and "accessibility". Once permissions are granted, you can launch the app.
|
||||
|
||||
### Windows
|
||||
|
||||
Install via [winget](https://learn.microsoft.com/en-us/windows/package-manager/winget/):
|
||||
|
||||
```bash
|
||||
winget install SiddharthVaddem.OpenScreen
|
||||
```
|
||||
|
||||
To update later: `winget upgrade SiddharthVaddem.OpenScreen`
|
||||
To uninstall: `winget uninstall SiddharthVaddem.OpenScreen`
|
||||
|
||||
If you'd rather grab the `.exe` installer directly, download it from the [Releases page](https://github.com/siddharthvaddem/openscreen/releases).
|
||||
|
||||
### Linux
|
||||
|
||||
Three packages are published to the [Releases page](https://github.com/siddharthvaddem/openscreen/releases) for each version. Pick the one that matches your distro:
|
||||
|
||||
**Debian / Ubuntu / Pop!_OS (`.deb`)**
|
||||
```bash
|
||||
sudo apt install ./Openscreen-Linux-latest.deb
|
||||
```
|
||||
|
||||
**Arch / Manjaro (`.pacman`)**
|
||||
```bash
|
||||
sudo pacman -U Openscreen-Linux-latest.pacman
|
||||
```
|
||||
|
||||
**Any distro (`.AppImage`)**
|
||||
```bash
|
||||
chmod +x Openscreen-Linux-*.AppImage
|
||||
./Openscreen-Linux-*.AppImage
|
||||
```
|
||||
|
||||
**NixOS / Nix (flake)**
|
||||
|
||||
Try without installing:
|
||||
```bash
|
||||
nix run github:siddharthvaddem/openscreen
|
||||
```
|
||||
|
||||
Install into your user profile:
|
||||
```bash
|
||||
nix profile install github:siddharthvaddem/openscreen
|
||||
```
|
||||
|
||||
For a NixOS system config (flake):
|
||||
```nix
|
||||
{
|
||||
inputs.openscreen.url = "github:siddharthvaddem/openscreen";
|
||||
|
||||
outputs = { nixpkgs, openscreen, ... }: {
|
||||
nixosConfigurations.<host> = nixpkgs.lib.nixosSystem {
|
||||
modules = [
|
||||
openscreen.nixosModules.default
|
||||
{ programs.openscreen.enable = true; }
|
||||
];
|
||||
};
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
For Home Manager, use `openscreen.homeManagerModules.default` with the same `programs.openscreen.enable = true;`.
|
||||
|
||||
You may need to grant screen recording permissions depending on your desktop environment.
|
||||
|
||||
**Sandbox error:** If the AppImage fails to launch with a "sandbox" error, run it with `--no-sandbox`:
|
||||
```bash
|
||||
./Openscreen-Linux-*.AppImage --no-sandbox
|
||||
```
|
||||
|
||||
### Limitations
|
||||
|
||||
System audio capture relies on Electron's [desktopCapturer](https://www.electronjs.org/docs/latest/api/desktop-capturer) and has some platform-specific quirks:
|
||||
|
||||
- **macOS**: Requires macOS 13+. On macOS 14.2+ you'll be prompted to grant audio capture permission. macOS 12 and below does not support system audio (mic still works).
|
||||
- **Windows**: Works out of the box.
|
||||
- **Linux**: Needs PipeWire (default on Ubuntu 22.04+, Fedora 34+). Older PulseAudio-only setups may not support system audio (mic should still work).
|
||||
|
||||
## Built with
|
||||
- Electron
|
||||
- React
|
||||
- TypeScript
|
||||
- Vite
|
||||
- PixiJS
|
||||
- dnd-timeline
|
||||
|
||||
---
|
||||
|
||||
|
||||
## Documentation
|
||||
|
||||
See the documentation here:
|
||||
[OpenScreen Docs](https://deepwiki.com/siddharthvaddem/openscreen)
|
||||
Refresh if outdated.
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome - please **include screenshots or a short video** for any UI change or new user-facing feature. If it touches what users see or do, show it. Skip only when it genuinely doesn't apply. PRs that don't follow this will be closed.
|
||||
|
||||
## Star History
|
||||
|
||||
<a href="https://www.star-history.com/?repos=siddharthvaddem%2Fopenscreen&type=date&legend=top-left">
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/chart?repos=siddharthvaddem/openscreen&type=date&theme=dark&legend=top-left" />
|
||||
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/chart?repos=siddharthvaddem/openscreen&type=date&legend=top-left" />
|
||||
<img alt="Star History Chart" src="https://api.star-history.com/chart?repos=siddharthvaddem/openscreen&type=date&legend=top-left" />
|
||||
</picture>
|
||||
</a>
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the [MIT License](./LICENSE). By using this software, you agree that the authors are not liable for any issues, damages, or claims arising from its use.
|
||||
@@ -0,0 +1,134 @@
|
||||
{
|
||||
"$schema": "https://biomejs.dev/schemas/2.4.12/schema.json",
|
||||
"vcs": { "enabled": true, "clientKind": "git", "useIgnoreFile": true },
|
||||
"files": { "ignoreUnknown": false, "includes": ["**", "!**/*.css"] },
|
||||
"formatter": {
|
||||
"enabled": true,
|
||||
"indentStyle": "tab",
|
||||
"formatWithErrors": true,
|
||||
"lineEnding": "lf",
|
||||
"lineWidth": 100,
|
||||
"attributePosition": "auto"
|
||||
},
|
||||
"linter": {
|
||||
"enabled": true,
|
||||
"rules": {
|
||||
"recommended": false,
|
||||
"complexity": {
|
||||
"noAdjacentSpacesInRegex": "error",
|
||||
"noBannedTypes": "error",
|
||||
"noExtraBooleanCast": "error",
|
||||
"noUselessCatch": "error",
|
||||
"noUselessEscapeInRegex": "error",
|
||||
"noUselessThisAlias": "error",
|
||||
"noUselessTypeConstraint": "error"
|
||||
},
|
||||
"correctness": {
|
||||
"noConstAssign": "error",
|
||||
"noConstantCondition": "error",
|
||||
"noEmptyCharacterClassInRegex": "error",
|
||||
"noEmptyPattern": "error",
|
||||
"noGlobalObjectCalls": "error",
|
||||
"noInnerDeclarations": "error",
|
||||
"noInvalidConstructorSuper": "error",
|
||||
"noNonoctalDecimalEscape": "error",
|
||||
"noPrecisionLoss": "error",
|
||||
"noSelfAssign": "error",
|
||||
"noSetterReturn": "error",
|
||||
"noSwitchDeclarations": "error",
|
||||
"noUndeclaredVariables": "error",
|
||||
"noUnreachable": "error",
|
||||
"noUnreachableSuper": "error",
|
||||
"noUnsafeFinally": "error",
|
||||
"noUnsafeOptionalChaining": "error",
|
||||
"noUnusedLabels": "error",
|
||||
"noUnusedVariables": "error",
|
||||
"useExhaustiveDependencies": "warn",
|
||||
"useHookAtTopLevel": "error",
|
||||
"useIsNan": "error",
|
||||
"useValidForDirection": "error",
|
||||
"useValidTypeof": "error",
|
||||
"useYield": "error"
|
||||
},
|
||||
"style": {
|
||||
"noNamespace": "off",
|
||||
"useArrayLiterals": "error",
|
||||
"useAsConstAssertion": "error",
|
||||
"useComponentExportOnlyModules": "off"
|
||||
},
|
||||
"suspicious": {
|
||||
"noAssignInExpressions": "error",
|
||||
"noAsyncPromiseExecutor": "error",
|
||||
"noCatchAssign": "error",
|
||||
"noClassAssign": "error",
|
||||
"noCompareNegZero": "error",
|
||||
"noControlCharactersInRegex": "error",
|
||||
"noDebugger": "error",
|
||||
"noDuplicateCase": "error",
|
||||
"noDuplicateClassMembers": "error",
|
||||
"noDuplicateElseIf": "error",
|
||||
"noDuplicateObjectKeys": "error",
|
||||
"noDuplicateParameters": "error",
|
||||
"noEmptyBlockStatements": "warn",
|
||||
"noExplicitAny": "warn",
|
||||
"noExtraNonNullAssertion": "error",
|
||||
"noFallthroughSwitchClause": "error",
|
||||
"noFunctionAssign": "error",
|
||||
"noGlobalAssign": "error",
|
||||
"noImportAssign": "error",
|
||||
"noIrregularWhitespace": "error",
|
||||
"noMisleadingCharacterClass": "error",
|
||||
"noMisleadingInstantiator": "error",
|
||||
"noNonNullAssertedOptionalChain": "error",
|
||||
"noPrototypeBuiltins": "error",
|
||||
"noRedeclare": "error",
|
||||
"noShadowRestrictedNames": "error",
|
||||
"noSparseArray": "error",
|
||||
"noTsIgnore": "error",
|
||||
"noUnsafeDeclarationMerging": "error",
|
||||
"noUnsafeNegation": "error",
|
||||
"noUselessRegexBackrefs": "error",
|
||||
"noWith": "error",
|
||||
"useGetterReturn": "error"
|
||||
}
|
||||
},
|
||||
"includes": ["**", "**/dist", "**/.eslintrc.cjs", "!**/*.css"]
|
||||
},
|
||||
"javascript": { "formatter": { "quoteStyle": "double" } },
|
||||
"overrides": [
|
||||
{
|
||||
"includes": ["*.ts", "*.tsx", "*.mts", "*.cts"],
|
||||
"linter": {
|
||||
"rules": {
|
||||
"complexity": { "noArguments": "error" },
|
||||
"correctness": {
|
||||
"noConstAssign": "off",
|
||||
"noGlobalObjectCalls": "off",
|
||||
"noInvalidBuiltinInstantiation": "off",
|
||||
"noInvalidConstructorSuper": "off",
|
||||
"noSetterReturn": "off",
|
||||
"noUndeclaredVariables": "off",
|
||||
"noUnreachable": "off",
|
||||
"noUnreachableSuper": "off"
|
||||
},
|
||||
"style": { "useConst": "error" },
|
||||
"suspicious": {
|
||||
"noDuplicateClassMembers": "off",
|
||||
"noDuplicateObjectKeys": "off",
|
||||
"noDuplicateParameters": "off",
|
||||
"noFunctionAssign": "off",
|
||||
"noImportAssign": "off",
|
||||
"noRedeclare": "off",
|
||||
"noUnsafeNegation": "off",
|
||||
"noVar": "error",
|
||||
"useGetterReturn": "off"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"assist": {
|
||||
"enabled": true,
|
||||
"actions": { "source": { "organizeImports": "on" } }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
!macro customInstall
|
||||
DetailPrint "Installing OpenScreen OCR Windows service"
|
||||
nsExec::ExecToLog '"$SYSDIR\sc.exe" stop OpenScreenOCR'
|
||||
nsExec::ExecToLog '"$SYSDIR\sc.exe" delete OpenScreenOCR'
|
||||
Sleep 1000
|
||||
ExpandEnvStrings $0 "%ProgramData%\OpenScreen\ocr-runtime"
|
||||
CreateDirectory "$0"
|
||||
nsExec::ExecToLog '"$SYSDIR\sc.exe" create OpenScreenOCR binPath= "\"$INSTDIR\resources\electron\native\bin\win32-x64\openscreen-ocr-service-wrapper.exe\" --service --exe \"$INSTDIR\resources\ocr-service\openscreen-ocr-service.exe\" --resources \"$INSTDIR\resources\" --data \"$0\"" start= auto DisplayName= "OpenScreen OCR Service"'
|
||||
nsExec::ExecToLog '"$SYSDIR\sc.exe" description OpenScreenOCR "Local OCR service used by OpenScreen guide capture."'
|
||||
nsExec::ExecToLog '"$SYSDIR\sc.exe" start OpenScreenOCR'
|
||||
!macroend
|
||||
|
||||
!macro customUnInstall
|
||||
DetailPrint "Removing OpenScreen OCR Windows service"
|
||||
nsExec::ExecToLog '"$SYSDIR\sc.exe" stop OpenScreenOCR'
|
||||
nsExec::ExecToLog '"$SYSDIR\sc.exe" delete OpenScreenOCR'
|
||||
!macroend
|
||||
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"$schema": "https://ui.shadcn.com/schema.json",
|
||||
"style": "new-york",
|
||||
"rsc": false,
|
||||
"tsx": true,
|
||||
"tailwind": {
|
||||
"config": "tailwind.config.cjs",
|
||||
"css": "src/index.css",
|
||||
"baseColor": "stone",
|
||||
"cssVariables": true,
|
||||
"prefix": ""
|
||||
},
|
||||
"iconLibrary": "lucide",
|
||||
"aliases": {
|
||||
"components": "@/components",
|
||||
"utils": "@/lib/utils",
|
||||
"ui": "@/components/ui",
|
||||
"lib": "@/lib",
|
||||
"hooks": "@/hooks"
|
||||
},
|
||||
"registries": {}
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
# Native Bridge Architecture
|
||||
|
||||
## Goal
|
||||
|
||||
Provide a single, resilient source of truth for platform-native capabilities while keeping Electron transport thin and renderer APIs unified.
|
||||
|
||||
## Layers
|
||||
|
||||
1. Native adapters
|
||||
Platform-specific providers implement stable domain interfaces such as cursor telemetry or system asset discovery.
|
||||
|
||||
2. Main-process services
|
||||
Services orchestrate adapters, own runtime state, and expose domain-level operations.
|
||||
|
||||
3. Unified IPC transport
|
||||
Renderer code talks to a single `native-bridge:invoke` channel using versioned contracts.
|
||||
|
||||
4. Renderer client
|
||||
React code should consume `src/native/client.ts` rather than binding directly to ad hoc Electron APIs.
|
||||
|
||||
## Principles
|
||||
|
||||
- Single source of truth: runtime-native state lives in the Electron main process.
|
||||
- Capability-first: renderer can query support before attempting native behavior.
|
||||
- Versioned contracts: requests and responses are explicit and evolve predictably.
|
||||
- Resilience: every response uses a consistent result envelope with stable error codes.
|
||||
|
||||
## Current rollout
|
||||
|
||||
This repository now contains the initial scaffold:
|
||||
|
||||
- shared contracts in `src/native/contracts.ts`
|
||||
- renderer SDK in `src/native/client.ts`
|
||||
- main-process state store in `electron/native-bridge/store.ts`
|
||||
- cursor telemetry adapter in `electron/native-bridge/cursor/telemetryCursorAdapter.ts`
|
||||
- domain services in `electron/native-bridge/services/*`
|
||||
- unified handler registration in `electron/ipc/nativeBridge.ts`
|
||||
|
||||
The legacy `window.electronAPI` surface still exists for backward compatibility. New native-facing features should prefer the unified bridge client.
|
||||
@@ -0,0 +1,935 @@
|
||||
# Quy trình triển khai Auto User Guide Generation
|
||||
|
||||
Mục tiêu của tính năng này là biến OpenScreen từ công cụ quay màn hình thành công cụ tự tạo tài liệu hướng dẫn sử dụng phần mềm. Người dùng bật Guide Mode, quay thao tác như bình thường, hệ thống ghi lại thời điểm click hoặc hotkey, trích ảnh từ video sau khi quay xong, chạy OCR local để đọc chữ trên giao diện, sau đó dùng AI tạo bản nháp hướng dẫn từng bước.
|
||||
|
||||
Tài liệu này được viết để có thể bắt đầu coding ngay: có kiến trúc, schema, file cần thêm/sửa, thứ tự task, tiêu chí test và định nghĩa MVP.
|
||||
|
||||
## Trạng Thái MVP Hiện Tại
|
||||
|
||||
- Đã có Guide Mode trong HUD, ghi click/marker vào `.guide.json`.
|
||||
- Đã có GuidePanel trong editor để chạy: prepare events, capture snapshots, OCR, generate draft, export Markdown/HTML.
|
||||
- Đã có local deterministic draft để test không cần DeepSeek key.
|
||||
- DeepSeek được gọi khi chọn provider `DeepSeek` và có `DEEPSEEK_API_KEY`.
|
||||
- OCR local mặc định gọi `OPENSCREEN_GUIDE_OCR_URL` hoặc `http://127.0.0.1:8866/ocr`.
|
||||
- Verification hiện tại: targeted guide tests pass, `npm test` pass, `npm run build-vite` pass, `npm run i18n:check` pass.
|
||||
|
||||
## Mục Tiêu Sản Phẩm
|
||||
|
||||
Flow người dùng:
|
||||
|
||||
1. Bật Guide Mode.
|
||||
2. Quay màn hình phần mềm cần hướng dẫn.
|
||||
3. Trong lúc quay, hệ thống tự ghi timestamp các click chuột.
|
||||
4. Người dùng có thể bấm một hotkey/nút marker nếu muốn đánh dấu bước thủ công.
|
||||
5. Sau khi dừng quay, hệ thống trích ảnh màn hình từ video tại các timestamp đó.
|
||||
6. OCR local đọc text trên ảnh giao diện.
|
||||
7. Hệ thống map vị trí click tới text/control gần nhất.
|
||||
8. AI Agent tạo tài liệu dạng từng bước.
|
||||
9. Người dùng review, sửa nội dung, export Markdown/HTML.
|
||||
|
||||
Ví dụ output:
|
||||
|
||||
```md
|
||||
# Hướng dẫn xuất báo cáo
|
||||
|
||||
## Bước 1: Mở phần cài đặt
|
||||
|
||||
Nhấn nút **Settings** ở thanh điều hướng bên trái.
|
||||
|
||||
## Bước 2: Chọn Export
|
||||
|
||||
Trong màn hình Settings, chọn **Export report**.
|
||||
```
|
||||
|
||||
## Phạm Vi MVP
|
||||
|
||||
MVP cần làm:
|
||||
|
||||
- Bật/tắt Guide Mode trước khi quay.
|
||||
- Tận dụng recorder hiện tại, không viết recorder mới.
|
||||
- Tận dụng `.cursor.json` hiện tại để lấy click timestamp.
|
||||
- Thêm marker bằng hotkey hoặc nút trên HUD.
|
||||
- Tạo sidecar `.guide.json` riêng cho guide.
|
||||
- Trích screenshot sau khi quay xong, từ video đã lưu.
|
||||
- OCR local bằng PaddleOCR service.
|
||||
- Tạo step candidate từ click position + OCR blocks.
|
||||
- Gọi DeepSeek bằng text metadata, không gửi ảnh mặc định.
|
||||
- Có panel review trong editor.
|
||||
- Export Markdown và HTML.
|
||||
|
||||
Không làm trong MVP:
|
||||
|
||||
- Không chụp screenshot realtime trong lúc quay nếu chưa có benchmark cần thiết.
|
||||
- Không gửi raw screenshot lên cloud AI mặc định.
|
||||
- Không sửa schema `.cursor.json` nếu không bắt buộc.
|
||||
- Không build full UI automation engine.
|
||||
- Không làm PDF/DOCX ngay.
|
||||
- Không bundle OCR runtime vào app packaged ngay.
|
||||
|
||||
## Code Hiện Có Cần Tận Dụng
|
||||
|
||||
Các điểm đã có trong codebase:
|
||||
|
||||
- Recording orchestration: `src/hooks/useScreenRecorder.ts`
|
||||
- Launch/HUD UI: `src/components/launch/LaunchWindow.tsx`
|
||||
- Source selection: `src/components/launch/SourceSelector.tsx`
|
||||
- Editor chính: `src/components/video-editor/VideoEditor.tsx`
|
||||
- Project/session persistence: `src/components/video-editor/projectPersistence.ts`
|
||||
- Cursor contracts: `src/native/contracts.ts`
|
||||
- Hook đọc cursor data: `src/native/hooks/useCursorRecordingData.ts`
|
||||
- IPC main handlers: `electron/ipc/handlers.ts`
|
||||
- Native bridge: `electron/ipc/nativeBridge.ts`
|
||||
- Cursor service: `electron/native-bridge/services/cursorService.ts`
|
||||
- Windows cursor recording: `electron/native-bridge/cursor/recording/windowsNativeRecordingSession.ts`
|
||||
- macOS cursor recording: `electron/native-bridge/cursor/recording/macNativeCursorRecordingSession.ts`
|
||||
- Frame/export primitives: `src/lib/exporter/frameRenderer.ts`
|
||||
|
||||
Nhận định kỹ thuật:
|
||||
|
||||
- Windows/macOS native cursor recording đã có dữ liệu click.
|
||||
- Cursor sample hiện có thể có `interactionType: "click" | "mouseup" | "move"`.
|
||||
- Editor hiện đã dùng click timestamp để render hiệu ứng click.
|
||||
- Vì schema cursor đang được nhiều nơi dùng, MVP nên tạo `.guide.json` riêng thay vì mở rộng `.cursor.json`.
|
||||
|
||||
## Kiến Trúc Tổng Thể
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A["User bật Guide Mode"] --> B["Quay video bằng recorder hiện tại"]
|
||||
B --> C["Cursor recorder ghi click timestamp"]
|
||||
B --> D["Hotkey/HUD marker ghi manual event"]
|
||||
C --> E["Dừng quay"]
|
||||
D --> E
|
||||
E --> F["Guide assembler tạo .guide.json"]
|
||||
F --> G["Snapshot extractor seek video và xuất PNG"]
|
||||
G --> H["PaddleOCR local đọc text + bounding boxes"]
|
||||
H --> I["Target mapper map click tới OCR text/control"]
|
||||
I --> J["DeepSeek/local LLM viết draft guide"]
|
||||
J --> K["GuidePanel cho user review/sửa"]
|
||||
K --> L["Export Markdown/HTML"]
|
||||
```
|
||||
|
||||
Quyết định chính:
|
||||
|
||||
- Realtime recording chỉ ghi event/timestamp, không xử lý OCR/AI.
|
||||
- Screenshot được trích từ video sau khi quay, tránh ảnh hưởng performance recorder.
|
||||
- OCR chạy local-first.
|
||||
- DeepSeek chỉ nhận text metadata trừ khi user opt-in gửi ảnh.
|
||||
- Guide data nằm cạnh recording artifact.
|
||||
|
||||
## File Cần Thêm
|
||||
|
||||
```text
|
||||
src/guide/
|
||||
contracts.ts
|
||||
eventBuilder.ts
|
||||
targetMapper.ts
|
||||
promptBuilder.ts
|
||||
generatedGuideSchema.ts
|
||||
snapshot/
|
||||
extractGuideSnapshots.ts
|
||||
export/
|
||||
markdownExporter.ts
|
||||
htmlExporter.ts
|
||||
__tests__/
|
||||
eventBuilder.test.ts
|
||||
targetMapper.test.ts
|
||||
promptBuilder.test.ts
|
||||
markdownExporter.test.ts
|
||||
|
||||
src/components/video-editor/guide/
|
||||
GuidePanel.tsx
|
||||
GuideStepList.tsx
|
||||
GuideStepEditor.tsx
|
||||
GuideSnapshotPreview.tsx
|
||||
|
||||
electron/guide/
|
||||
guideStore.ts
|
||||
guidePaths.ts
|
||||
guideIpc.ts
|
||||
ocr/
|
||||
paddleOcrClient.ts
|
||||
ai/
|
||||
deepseekGuideClient.ts
|
||||
```
|
||||
|
||||
File hiện có khả năng phải sửa:
|
||||
|
||||
- `src/hooks/useScreenRecorder.ts`
|
||||
- `src/components/launch/LaunchWindow.tsx`
|
||||
- `src/components/video-editor/VideoEditor.tsx`
|
||||
- `electron/ipc/handlers.ts`
|
||||
- `electron/preload.ts`
|
||||
- file khai báo type cho `window.electronAPI`
|
||||
- `package.json` nếu thêm script test hoặc dependency nhỏ
|
||||
|
||||
## Artifact Đầu Ra
|
||||
|
||||
Với video `recording-123.mp4`, hệ thống tạo:
|
||||
|
||||
```text
|
||||
recording-123.mp4
|
||||
recording-123.cursor.json
|
||||
recording-123.guide.json
|
||||
recording-123-guide/
|
||||
step-001.png
|
||||
step-002.png
|
||||
ocr.json
|
||||
guide.md
|
||||
guide.html
|
||||
```
|
||||
|
||||
Quy tắc:
|
||||
|
||||
- `.cursor.json` vẫn là dữ liệu cursor gốc.
|
||||
- `.guide.json` là source of truth cho guide workflow.
|
||||
- Folder `recording-123-guide/` chứa file phát sinh từ guide.
|
||||
- `guide.md` và `guide.html` có thể được tạo lại từ `.guide.json`.
|
||||
|
||||
## Contract Chính
|
||||
|
||||
Tạo `src/guide/contracts.ts`.
|
||||
|
||||
```ts
|
||||
export type GuideEventKind = "click" | "hotkey" | "manual";
|
||||
|
||||
export type GuideEventSource =
|
||||
| "cursor-recording"
|
||||
| "guide-hotkey"
|
||||
| "review-ui";
|
||||
|
||||
export interface GuideEvent {
|
||||
id: string;
|
||||
recordingId: string;
|
||||
kind: GuideEventKind;
|
||||
source: GuideEventSource;
|
||||
timeMs: number;
|
||||
x?: number;
|
||||
y?: number;
|
||||
normalizedX?: number;
|
||||
normalizedY?: number;
|
||||
button?: "left" | "right" | "middle" | "unknown";
|
||||
label?: string;
|
||||
screenshotOffsetMs?: number;
|
||||
createdAt: string;
|
||||
}
|
||||
|
||||
export interface GuideSnapshot {
|
||||
id: string;
|
||||
eventId: string;
|
||||
timeMs: number;
|
||||
offsetMs: number;
|
||||
path: string;
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
export interface OcrBlock {
|
||||
id: string;
|
||||
snapshotId: string;
|
||||
text: string;
|
||||
confidence: number;
|
||||
box: {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface GuideStepCandidate {
|
||||
id: string;
|
||||
eventId: string;
|
||||
snapshotId?: string;
|
||||
timeMs: number;
|
||||
action: "click" | "choose" | "type" | "wait" | "manual";
|
||||
targetText?: string;
|
||||
targetRole?: "button" | "menu" | "tab" | "field" | "link" | "unknown";
|
||||
nearbyText: string[];
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
export interface GeneratedGuideStep {
|
||||
id: string;
|
||||
order: number;
|
||||
title: string;
|
||||
instruction: string;
|
||||
screenshotPath?: string;
|
||||
sourceCandidateId?: string;
|
||||
}
|
||||
|
||||
export interface GeneratedGuide {
|
||||
title: string;
|
||||
summary?: string;
|
||||
steps: GeneratedGuideStep[];
|
||||
}
|
||||
|
||||
export interface GuideSession {
|
||||
schemaVersion: 1;
|
||||
recordingId: string;
|
||||
videoPath: string;
|
||||
cursorPath?: string;
|
||||
guidePath: string;
|
||||
outputDir: string;
|
||||
status:
|
||||
| "recording"
|
||||
| "events-ready"
|
||||
| "snapshots-ready"
|
||||
| "ocr-ready"
|
||||
| "draft-ready"
|
||||
| "reviewed";
|
||||
events: GuideEvent[];
|
||||
snapshots: GuideSnapshot[];
|
||||
ocrBlocks: OcrBlock[];
|
||||
candidates: GuideStepCandidate[];
|
||||
generatedGuide?: GeneratedGuide;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
}
|
||||
```
|
||||
|
||||
Quy tắc dữ liệu:
|
||||
|
||||
- `timeMs` luôn tính theo timeline video cuối cùng.
|
||||
- `x/y` là tọa độ pixel nếu có.
|
||||
- `normalizedX/Y` dùng để chống lệch khi video scale.
|
||||
- `screenshotOffsetMs` mặc định `500`, nghĩa là lấy ảnh sau click 0.5 giây để bắt trạng thái UI sau thao tác.
|
||||
- AI output chỉ là draft, user edit mới là nội dung cuối.
|
||||
|
||||
## IPC Cần Thêm
|
||||
|
||||
MVP dùng app-level Electron IPC, không cần đưa vào native bridge vì đây là workflow cấp ứng dụng.
|
||||
|
||||
Preload API đề xuất:
|
||||
|
||||
```ts
|
||||
window.electronAPI.guide = {
|
||||
startSession(recordingId: string): Promise<GuideSession>;
|
||||
addMarker(input: AddGuideMarkerInput): Promise<GuideEvent>;
|
||||
finalizeEvents(input: FinalizeGuideEventsInput): Promise<GuideSession>;
|
||||
writeSnapshot(input: WriteGuideSnapshotInput): Promise<GuideSnapshot>;
|
||||
runOcr(input: RunGuideOcrInput): Promise<GuideSession>;
|
||||
generateDraft(input: GenerateGuideDraftInput): Promise<GuideSession>;
|
||||
saveGuide(input: SaveGuideInput): Promise<GuideSession>;
|
||||
exportMarkdown(input: ExportGuideInput): Promise<{ path: string }>;
|
||||
exportHtml(input: ExportGuideInput): Promise<{ path: string }>;
|
||||
};
|
||||
```
|
||||
|
||||
Input types:
|
||||
|
||||
```ts
|
||||
export interface AddGuideMarkerInput {
|
||||
recordingId: string;
|
||||
timeMs: number;
|
||||
kind: "hotkey" | "manual";
|
||||
label?: string;
|
||||
}
|
||||
|
||||
export interface FinalizeGuideEventsInput {
|
||||
recordingId: string;
|
||||
videoPath: string;
|
||||
cursorPath?: string;
|
||||
}
|
||||
|
||||
export interface WriteGuideSnapshotInput {
|
||||
recordingId: string;
|
||||
eventId: string;
|
||||
timeMs: number;
|
||||
offsetMs: number;
|
||||
pngBytes: ArrayBuffer;
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
export interface RunGuideOcrInput {
|
||||
recordingId: string;
|
||||
snapshotIds?: string[];
|
||||
}
|
||||
|
||||
export interface GenerateGuideDraftInput {
|
||||
recordingId: string;
|
||||
language: "vi" | "en";
|
||||
provider: "deepseek" | "local";
|
||||
}
|
||||
|
||||
export interface SaveGuideInput {
|
||||
recordingId: string;
|
||||
generatedGuide: GeneratedGuide;
|
||||
}
|
||||
|
||||
export interface ExportGuideInput {
|
||||
recordingId: string;
|
||||
}
|
||||
```
|
||||
|
||||
## Phase 1: Contracts, Store, IPC
|
||||
|
||||
Mục tiêu: tạo khung lưu trữ `.guide.json` mà chưa đụng recorder.
|
||||
|
||||
Task coding:
|
||||
|
||||
1. Tạo `src/guide/contracts.ts`.
|
||||
2. Tạo `electron/guide/guidePaths.ts`.
|
||||
3. Tạo `electron/guide/guideStore.ts`.
|
||||
4. Tạo `electron/guide/guideIpc.ts`.
|
||||
5. Register guide IPC trong `electron/ipc/handlers.ts`.
|
||||
6. Expose API trong `electron/preload.ts`.
|
||||
7. Bổ sung type cho `window.electronAPI.guide`.
|
||||
|
||||
Yêu cầu kỹ thuật:
|
||||
|
||||
- Ghi file atomically: write temp file rồi rename.
|
||||
- Validate `schemaVersion`.
|
||||
- Không throw raw error ra renderer, trả error code ổn định.
|
||||
- Không yêu cầu AI/OCR trong phase này.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Tạo được guide session fake bằng IPC.
|
||||
- Đọc/ghi `.guide.json` round-trip không mất dữ liệu.
|
||||
- Input thiếu `recordingId` hoặc `videoPath` bị reject rõ ràng.
|
||||
|
||||
Test:
|
||||
|
||||
- `guideStore` tạo path đúng.
|
||||
- `guideStore` đọc file lỗi schema và trả error.
|
||||
- IPC handler reject input thiếu field.
|
||||
|
||||
## Phase 2: Build Event Từ Cursor Click
|
||||
|
||||
Mục tiêu: lấy click event từ `.cursor.json` hiện tại.
|
||||
|
||||
Task coding:
|
||||
|
||||
1. Tạo `src/guide/eventBuilder.ts`.
|
||||
2. Thêm hàm `buildGuideEventsFromCursor`.
|
||||
3. Lọc sample có `interactionType === "click"`.
|
||||
4. Convert sang `GuideEvent`.
|
||||
5. De-duplicate click trong cửa sổ `250ms`.
|
||||
6. Sort theo `timeMs`.
|
||||
7. Merge với marker thủ công nếu có.
|
||||
|
||||
Pseudo-code:
|
||||
|
||||
```ts
|
||||
export function buildGuideEventsFromCursor(input: {
|
||||
recordingId: string;
|
||||
samples: CursorRecordingSample[];
|
||||
videoWidth?: number;
|
||||
videoHeight?: number;
|
||||
}): GuideEvent[] {
|
||||
const events = input.samples
|
||||
.filter((sample) => sample.interactionType === "click")
|
||||
.map((sample) => ({
|
||||
id: createGuideEventId(input.recordingId, sample.timeMs),
|
||||
recordingId: input.recordingId,
|
||||
kind: "click" as const,
|
||||
source: "cursor-recording" as const,
|
||||
timeMs: sample.timeMs,
|
||||
x: sample.cx,
|
||||
y: sample.cy,
|
||||
normalizedX: normalize(sample.cx, input.videoWidth),
|
||||
normalizedY: normalize(sample.cy, input.videoHeight),
|
||||
button: "left" as const,
|
||||
screenshotOffsetMs: 500,
|
||||
createdAt: new Date().toISOString(),
|
||||
}));
|
||||
|
||||
return sortGuideEvents(dedupeGuideEvents(events));
|
||||
}
|
||||
```
|
||||
|
||||
Acceptance:
|
||||
|
||||
- 5 click samples tạo 5 guide events.
|
||||
- `move` và `mouseup` không tạo step.
|
||||
- Double click hoặc click bounce không tạo quá nhiều step nếu nằm trong dedupe window.
|
||||
- Không có cursor click thì vẫn dùng được manual marker.
|
||||
|
||||
Test:
|
||||
|
||||
- convert click sample.
|
||||
- bỏ qua move/mouseup.
|
||||
- dedupe theo thời gian.
|
||||
- sort đúng thứ tự.
|
||||
- xử lý sample thiếu tọa độ.
|
||||
|
||||
## Phase 3: Guide Mode UI Và Manual Marker
|
||||
|
||||
Mục tiêu: user bật được Guide Mode và đánh dấu bước thủ công.
|
||||
|
||||
Task coding:
|
||||
|
||||
1. Thêm Guide Mode toggle trong `LaunchWindow.tsx`.
|
||||
2. Truyền trạng thái guide vào flow recording trong `useScreenRecorder.ts`.
|
||||
3. Khi start recording và Guide Mode on, gọi `guide.startSession(recordingId)`.
|
||||
4. Thêm nút marker trong HUD.
|
||||
5. Thêm global hotkey ở Electron main, ví dụ `CommandOrControl+Shift+G`.
|
||||
6. Khi bấm marker/hotkey, gọi `guide.addMarker`.
|
||||
7. Khi stop recording, gọi `guide.finalizeEvents`.
|
||||
|
||||
Lưu ý:
|
||||
|
||||
- Global hotkey phải nằm ở Electron main vì app đang được quay có thể đang focus.
|
||||
- Nếu register hotkey fail, UI vẫn dùng nút marker.
|
||||
- Không làm thay đổi behavior khi Guide Mode off.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Guide Mode off: quay/sửa/export vẫn như cũ.
|
||||
- Guide Mode on: stop recording tạo `.guide.json`.
|
||||
- Hotkey tạo event đúng timestamp.
|
||||
- Cancel recording không để lại guide artifact rác.
|
||||
|
||||
## Phase 4: Snapshot Extraction
|
||||
|
||||
Mục tiêu: trích ảnh PNG cho từng event sau khi quay xong.
|
||||
|
||||
Quyết định MVP:
|
||||
|
||||
- Không chụp realtime trong lúc quay.
|
||||
- Dùng video đã lưu, seek tới timestamp cần lấy.
|
||||
- Thực hiện trong renderer/editor bằng hidden `<video>` + `<canvas>`.
|
||||
- Persist PNG qua IPC.
|
||||
|
||||
Task coding:
|
||||
|
||||
1. Tạo `src/guide/snapshot/extractGuideSnapshots.ts`.
|
||||
2. Nhận `GuideSession` + `videoPath`.
|
||||
3. Với mỗi event, lấy timestamp `event.timeMs + screenshotOffsetMs`.
|
||||
4. Clamp timestamp vào duration video.
|
||||
5. Seek hidden video tới timestamp.
|
||||
6. Draw frame vào canvas.
|
||||
7. Convert canvas thành PNG bytes.
|
||||
8. Gọi `guide.writeSnapshot`.
|
||||
9. Update `.guide.json` với danh sách snapshots.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Mỗi event có một ảnh `step-xxx.png`.
|
||||
- Nếu một event fail snapshot, các event khác vẫn chạy.
|
||||
- Ảnh được lưu trong `recording-123-guide/`.
|
||||
- UI báo lỗi recoverable, không crash editor.
|
||||
|
||||
Test:
|
||||
|
||||
- clamp timestamp.
|
||||
- tên file đúng thứ tự.
|
||||
- handle seek timeout.
|
||||
- không abort toàn bộ batch khi một frame lỗi.
|
||||
|
||||
## Phase 5: OCR Local
|
||||
|
||||
Mục tiêu: đọc text trên giao diện phần mềm từ screenshot.
|
||||
|
||||
Khuyến nghị:
|
||||
|
||||
- Dùng PaddleOCR làm OCR chính.
|
||||
- Tesseract chỉ nên là fallback đơn giản.
|
||||
- VLM local như Gemma 3 4B/MiniCPM/Qwen-VL chỉ dùng cho trường hợp icon/no-text khó, không dùng làm OCR chính.
|
||||
|
||||
Kiến trúc MVP:
|
||||
|
||||
- Chạy PaddleOCR như local HTTP service tại `127.0.0.1:8866`.
|
||||
- Electron main gọi OCR service.
|
||||
- Renderer không gọi OCR trực tiếp.
|
||||
|
||||
API local OCR đề xuất:
|
||||
|
||||
```http
|
||||
GET /health
|
||||
POST /ocr
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"imagePath": "D:\\Code\\OpenScreen\\recording-123-guide\\step-001.png",
|
||||
"language": "vi,en"
|
||||
}
|
||||
```
|
||||
|
||||
Response:
|
||||
|
||||
```json
|
||||
{
|
||||
"blocks": [
|
||||
{
|
||||
"text": "Settings",
|
||||
"confidence": 0.97,
|
||||
"box": { "x": 120, "y": 80, "width": 90, "height": 24 }
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Task coding:
|
||||
|
||||
1. Tạo `electron/guide/ocr/paddleOcrClient.ts`.
|
||||
2. Thêm health check.
|
||||
3. Gọi OCR cho từng snapshot.
|
||||
4. Convert output sang `OcrBlock`.
|
||||
5. Ghi `ocrBlocks` vào `.guide.json`.
|
||||
6. Ghi bản tổng hợp vào `recording-123-guide/ocr.json`.
|
||||
|
||||
Config đề xuất:
|
||||
|
||||
```ts
|
||||
export interface GuideOcrConfig {
|
||||
provider: "paddleocr";
|
||||
baseUrl: string; // default http://127.0.0.1:8866
|
||||
language: string; // default vi,en
|
||||
timeoutMs: number; // default 30000
|
||||
}
|
||||
```
|
||||
|
||||
Acceptance:
|
||||
|
||||
- OCR service offline thì UI báo lỗi rõ ràng.
|
||||
- OCR fail không xóa snapshots.
|
||||
- OCR result có text, confidence, bounding box.
|
||||
- Guide vẫn export thủ công được nếu OCR không chạy.
|
||||
|
||||
## Phase 6: Target Mapper
|
||||
|
||||
Mục tiêu: xác định user đã click vào nút/menu/field nào dựa trên tọa độ click và OCR.
|
||||
|
||||
Task coding:
|
||||
|
||||
1. Tạo `src/guide/targetMapper.ts`.
|
||||
2. Với mỗi `GuideEvent`, lấy snapshot tương ứng.
|
||||
3. Lấy OCR blocks của snapshot đó.
|
||||
4. Score từng OCR block.
|
||||
5. Chọn target tốt nhất.
|
||||
6. Sinh `GuideStepCandidate`.
|
||||
|
||||
Scoring đề xuất:
|
||||
|
||||
- `+100` nếu click nằm trong OCR box.
|
||||
- Điểm cao hơn nếu box center gần click hơn.
|
||||
- Cộng điểm nếu text ngắn, giống label button/menu.
|
||||
- Trừ điểm nếu confidence thấp.
|
||||
- Nếu không có block đủ tốt, để `targetRole: "unknown"`.
|
||||
|
||||
Role heuristic:
|
||||
|
||||
- `button`: click vào/near text dạng action label.
|
||||
- `menu`: text nằm trong danh sách dọc.
|
||||
- `tab`: text nằm trong hàng ngang gần đầu giao diện.
|
||||
- `field`: click vào vùng giống input.
|
||||
- `unknown`: không đủ tự tin.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Click trực tiếp vào nút text map đúng target text.
|
||||
- Click gần label map được OCR block gần nhất.
|
||||
- Click vùng icon/no-text tạo candidate confidence thấp để user review.
|
||||
|
||||
Test:
|
||||
|
||||
- click inside box.
|
||||
- nearest box.
|
||||
- low-confidence penalty.
|
||||
- no OCR fallback.
|
||||
|
||||
## Phase 7: AI Draft Generation
|
||||
|
||||
Mục tiêu: tạo bản nháp hướng dẫn từ candidate metadata.
|
||||
|
||||
Provider MVP:
|
||||
|
||||
- DeepSeek API cho cloud text generation.
|
||||
- Local LLM có thể thêm sau qua cùng prompt contract.
|
||||
- Không gửi ảnh lên DeepSeek mặc định.
|
||||
|
||||
Task coding:
|
||||
|
||||
1. Tạo `src/guide/promptBuilder.ts`.
|
||||
2. Tạo `electron/guide/ai/deepseekGuideClient.ts`.
|
||||
3. Đọc API key ở Electron main qua env/config.
|
||||
4. Build prompt từ candidates + OCR nearby text.
|
||||
5. Yêu cầu output JSON.
|
||||
6. Validate output.
|
||||
7. Ghi `generatedGuide` vào `.guide.json`.
|
||||
|
||||
Env:
|
||||
|
||||
```powershell
|
||||
$env:DEEPSEEK_API_KEY="..."
|
||||
$env:DEEPSEEK_BASE_URL="https://api.deepseek.com"
|
||||
$env:DEEPSEEK_MODEL="deepseek-v4-flash"
|
||||
```
|
||||
|
||||
Prompt input:
|
||||
|
||||
```json
|
||||
{
|
||||
"language": "vi",
|
||||
"softwareContext": {
|
||||
"recordingName": "recording-123",
|
||||
"userGoal": "Tạo báo cáo"
|
||||
},
|
||||
"steps": [
|
||||
{
|
||||
"order": 1,
|
||||
"eventKind": "click",
|
||||
"targetText": "Settings",
|
||||
"targetRole": "button",
|
||||
"nearbyText": ["Home", "Settings", "Account"],
|
||||
"confidence": 0.91
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Expected AI output:
|
||||
|
||||
```json
|
||||
{
|
||||
"title": "Hướng dẫn thao tác",
|
||||
"summary": "Tài liệu này mô tả các bước thực hiện thao tác đã ghi hình.",
|
||||
"steps": [
|
||||
{
|
||||
"order": 1,
|
||||
"title": "Mở phần cài đặt",
|
||||
"instruction": "Nhấn nút Settings ở thanh điều hướng bên trái."
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Thiếu API key thì UI báo lỗi rõ ràng.
|
||||
- AI trả invalid JSON thì reject và cho retry.
|
||||
- Output được validate trước khi lưu.
|
||||
- Có thể generate tiếng Việt.
|
||||
|
||||
Test:
|
||||
|
||||
- promptBuilder không đưa raw image vào prompt.
|
||||
- parser reject JSON sai schema.
|
||||
- DeepSeek client handle timeout/401/rate limit.
|
||||
|
||||
## Phase 8: GuidePanel Review UI
|
||||
|
||||
Mục tiêu: người dùng sửa được guide trước khi export.
|
||||
|
||||
Task coding:
|
||||
|
||||
1. Tạo `src/components/video-editor/guide/GuidePanel.tsx`.
|
||||
2. Tạo `GuideStepList.tsx`.
|
||||
3. Tạo `GuideStepEditor.tsx`.
|
||||
4. Tạo `GuideSnapshotPreview.tsx`.
|
||||
5. Mount panel trong `VideoEditor.tsx`.
|
||||
6. Load `.guide.json` khi mở video có guide sidecar.
|
||||
7. Thêm action:
|
||||
- Generate snapshots.
|
||||
- Run OCR.
|
||||
- Generate AI draft.
|
||||
- Save edits.
|
||||
- Export Markdown.
|
||||
- Export HTML.
|
||||
|
||||
UX:
|
||||
|
||||
- AI output là draft, không khóa nội dung.
|
||||
- User sửa title/instruction từng step.
|
||||
- User xóa step nhiễu.
|
||||
- User merge step sau nếu cần.
|
||||
- Confidence hiển thị nhỏ, không làm UI rối.
|
||||
- Guide fail không ảnh hưởng video editing.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- User sửa step và save được.
|
||||
- User xóa step được.
|
||||
- Regenerate cần confirm nếu đang có manual edits.
|
||||
- Export dùng nội dung đã sửa, không dùng lại AI raw output.
|
||||
|
||||
## Phase 9: Export Markdown/HTML
|
||||
|
||||
Mục tiêu: tạo tài liệu dùng được ngay.
|
||||
|
||||
Task coding:
|
||||
|
||||
1. Tạo `src/guide/export/markdownExporter.ts`.
|
||||
2. Tạo `src/guide/export/htmlExporter.ts`.
|
||||
3. Gọi exporter từ Electron IPC.
|
||||
4. Ghi file vào `recording-123-guide/guide.md`.
|
||||
5. Ghi file vào `recording-123-guide/guide.html`.
|
||||
6. Dùng relative screenshot link.
|
||||
|
||||
Markdown format:
|
||||
|
||||
```md
|
||||
# Hướng dẫn thao tác
|
||||
|
||||
Tài liệu này mô tả các bước thực hiện thao tác đã ghi hình.
|
||||
|
||||
## Bước 1: Mở phần cài đặt
|
||||
|
||||
Nhấn nút **Settings** ở thanh điều hướng bên trái.
|
||||
|
||||

|
||||
```
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Markdown mở được và thấy ảnh local.
|
||||
- HTML mở được bằng browser.
|
||||
- Export vẫn chạy nếu guide được viết thủ công, không cần AI.
|
||||
|
||||
## Thứ Tự Coding Ngay
|
||||
|
||||
Nên làm theo thứ tự này để giảm rủi ro:
|
||||
|
||||
1. Tạo `src/guide/contracts.ts`.
|
||||
2. Tạo `electron/guide/guidePaths.ts`.
|
||||
3. Tạo `electron/guide/guideStore.ts`.
|
||||
4. Tạo `electron/guide/guideIpc.ts`.
|
||||
5. Expose `window.electronAPI.guide`.
|
||||
6. Viết unit test cho guide store.
|
||||
7. Tạo `src/guide/eventBuilder.ts`.
|
||||
8. Viết unit test convert cursor samples sang guide events.
|
||||
9. Thêm Guide Mode toggle vào launch UI.
|
||||
10. Gọi `startSession` khi bắt đầu quay.
|
||||
11. Gọi `finalizeEvents` khi dừng quay.
|
||||
12. Tạo snapshot extractor trong renderer.
|
||||
13. Tạo `paddleOcrClient`.
|
||||
14. Tạo `targetMapper`.
|
||||
15. Tạo `promptBuilder`.
|
||||
16. Tạo `deepseekGuideClient`.
|
||||
17. Tạo `GuidePanel`.
|
||||
18. Tạo Markdown/HTML exporters.
|
||||
19. Chạy lint/test/build.
|
||||
20. Test thủ công flow đầy đủ.
|
||||
|
||||
Chia PR đề xuất:
|
||||
|
||||
- PR 1: contracts, store, IPC, unit tests.
|
||||
- PR 2: cursor-click event builder, Guide Mode toggle, manual marker.
|
||||
- PR 3: snapshot extraction và GuidePanel shell.
|
||||
- PR 4: PaddleOCR integration và target mapping.
|
||||
- PR 5: DeepSeek generation, review UI, Markdown/HTML export.
|
||||
|
||||
## Error Codes
|
||||
|
||||
Dùng error code ổn định để UI xử lý:
|
||||
|
||||
```ts
|
||||
export type GuideErrorCode =
|
||||
| "guide-session-not-found"
|
||||
| "guide-invalid-schema"
|
||||
| "guide-video-load-failed"
|
||||
| "guide-snapshot-failed"
|
||||
| "guide-ocr-unavailable"
|
||||
| "guide-ocr-failed"
|
||||
| "guide-ai-key-missing"
|
||||
| "guide-ai-request-failed"
|
||||
| "guide-ai-invalid-output"
|
||||
| "guide-export-failed";
|
||||
```
|
||||
|
||||
Quy tắc:
|
||||
|
||||
- IPC không throw raw provider error ra renderer.
|
||||
- OCR fail là recoverable.
|
||||
- AI fail là recoverable.
|
||||
- Export fail phải giữ nguyên `.guide.json`.
|
||||
|
||||
## Local Development
|
||||
|
||||
Baseline:
|
||||
|
||||
```powershell
|
||||
npm install
|
||||
npm run lint
|
||||
npm test
|
||||
npm run build-vite
|
||||
```
|
||||
|
||||
OCR service dev:
|
||||
|
||||
```powershell
|
||||
python -m venv .venv-ocr
|
||||
.venv-ocr\Scripts\Activate.ps1
|
||||
pip install paddleocr fastapi uvicorn
|
||||
uvicorn local_ocr_service:app --host 127.0.0.1 --port 8866
|
||||
```
|
||||
|
||||
DeepSeek env:
|
||||
|
||||
```powershell
|
||||
$env:DEEPSEEK_API_KEY="..."
|
||||
$env:DEEPSEEK_BASE_URL="https://api.deepseek.com"
|
||||
$env:DEEPSEEK_MODEL="deepseek-v4-flash"
|
||||
```
|
||||
|
||||
Không commit API key.
|
||||
|
||||
## Testing Matrix
|
||||
|
||||
Unit tests:
|
||||
|
||||
- `eventBuilder`: cursor sample -> guide events.
|
||||
- `targetMapper`: OCR blocks -> step candidates.
|
||||
- `promptBuilder`: candidates -> AI prompt.
|
||||
- `markdownExporter`: generated guide -> Markdown.
|
||||
- `htmlExporter`: generated guide -> HTML.
|
||||
|
||||
Renderer/browser tests:
|
||||
|
||||
- snapshot extractor seek video fixture.
|
||||
- GuidePanel edit/delete/save step.
|
||||
|
||||
Manual integration:
|
||||
|
||||
1. Quay với Guide Mode off, xác nhận behavior cũ không đổi.
|
||||
2. Quay với Guide Mode on và 3 click.
|
||||
3. Kiểm tra `.guide.json` có 3 click events.
|
||||
4. Generate snapshots.
|
||||
5. Run OCR local.
|
||||
6. Generate Vietnamese draft.
|
||||
7. Sửa một step.
|
||||
8. Export Markdown.
|
||||
9. Mở Markdown/HTML xem ảnh local.
|
||||
10. Tắt OCR service và test lỗi recoverable.
|
||||
11. Xóa DeepSeek key và test lỗi recoverable.
|
||||
|
||||
Lệnh trước khi merge:
|
||||
|
||||
```powershell
|
||||
npm run lint
|
||||
npm test
|
||||
npm run build-vite
|
||||
```
|
||||
|
||||
Nếu phase không sửa native recorder thì chưa cần chạy native helper tests.
|
||||
|
||||
## Definition Of Done Cho MVP
|
||||
|
||||
MVP được xem là xong khi:
|
||||
|
||||
- Guide Mode bật/tắt được.
|
||||
- Guide Mode off không ảnh hưởng recording hiện tại.
|
||||
- Click events lấy được từ cursor telemetry hiện có.
|
||||
- Hotkey/HUD marker tạo event thủ công.
|
||||
- `.guide.json` được tạo cạnh recording.
|
||||
- Snapshot PNG được trích từ final video.
|
||||
- PaddleOCR local đọc được text và bounding boxes.
|
||||
- Target mapper tạo step candidates.
|
||||
- DeepSeek tạo được draft tiếng Việt từ text metadata.
|
||||
- User review/sửa/xóa step được.
|
||||
- Export Markdown/HTML dùng nội dung đã review.
|
||||
- Lint/test/build pass.
|
||||
|
||||
## Nâng Cấp Sau MVP
|
||||
|
||||
- Export PDF/DOCX.
|
||||
- Bundle local OCR runtime vào packaged app.
|
||||
- Thêm local VLM fallback cho icon-only control.
|
||||
- Cho phép user opt-in gửi crop ảnh lên remote vision model.
|
||||
- Merge/dedupe step thông minh hơn cho double click/menu navigation.
|
||||
- Dùng transcript giọng nói làm ngữ cảnh thêm.
|
||||
- Template theo từng loại phần mềm.
|
||||
- Computer vision detect UI element ngoài OCR.
|
||||
@@ -0,0 +1,210 @@
|
||||
# macOS Native Recorder Roadmap
|
||||
|
||||
OpenScreen's macOS recorder should follow the same architecture boundaries as the Windows native recorder: Electron owns session orchestration and persistence, while a platform-native helper owns capture, timing, encoding, and platform-specific permissions.
|
||||
|
||||
This work is intentionally scoped as a macOS-only port. Windows native capture remains owned by the WGC helper, and Linux remains on the existing Electron path.
|
||||
|
||||
## Goals
|
||||
|
||||
- Capture displays and windows through ScreenCaptureKit.
|
||||
- Exclude the real system cursor during capture when using the editable OpenScreen cursor overlay.
|
||||
- Preserve the current high-quality cursor overlay path in preview and export.
|
||||
- Capture macOS system audio through ScreenCaptureKit on supported macOS versions.
|
||||
- Capture microphone audio through the same native timing domain where the OS supports it, or through an explicit companion path until it can be moved into the helper.
|
||||
- Mix system audio and microphone audio into the primary MP4 without renderer-side track assembly.
|
||||
- Capture webcam video natively and compose it into the helper-owned MP4 during the native-recording migration.
|
||||
- Keep screen video, audio, webcam, and cursor aligned to one native timing origin.
|
||||
- Package per-architecture helper binaries with macOS builds.
|
||||
|
||||
## Non-Goals
|
||||
|
||||
- Replacing the editor/export pipeline.
|
||||
- Changing Windows native capture behavior.
|
||||
- Adding Linux native capture.
|
||||
- Shipping a silent fallback from native macOS capture to Electron capture when the user explicitly requested a native-only feature.
|
||||
|
||||
## Architecture
|
||||
|
||||
The renderer keeps the existing recording controls. On macOS, `useScreenRecorder` should eventually send a complete recording request to Electron instead of assembling display, audio, microphone, webcam, and cursor streams in the browser.
|
||||
|
||||
Electron owns the native recording session:
|
||||
|
||||
- resolves the selected display/window source;
|
||||
- resolves output paths;
|
||||
- starts cursor telemetry capture when editable cursor mode is selected;
|
||||
- starts the ScreenCaptureKit helper process;
|
||||
- sends pause/resume/stop/cancel commands;
|
||||
- writes `RecordingSession` manifests;
|
||||
- reports explicit errors when a macOS-native capability is unavailable.
|
||||
|
||||
The helper owns macOS media capture:
|
||||
|
||||
- ScreenCaptureKit display/window frames;
|
||||
- ScreenCaptureKit system audio where supported;
|
||||
- microphone capture or helper-owned companion audio capture;
|
||||
- webcam capture and initial picture-in-picture composition;
|
||||
- AVFoundation/VideoToolbox encoding and muxing;
|
||||
- stream timestamp normalization.
|
||||
|
||||
## Helper Contract V1
|
||||
|
||||
The helper receives a single JSON argument:
|
||||
|
||||
```json
|
||||
{
|
||||
"schemaVersion": 1,
|
||||
"recordingId": 1234567890,
|
||||
"source": {
|
||||
"type": "display",
|
||||
"sourceId": "screen:0:0",
|
||||
"displayId": 1,
|
||||
"windowId": null,
|
||||
"bounds": { "x": 0, "y": 0, "width": 1920, "height": 1080 }
|
||||
},
|
||||
"video": {
|
||||
"fps": 60,
|
||||
"width": 1920,
|
||||
"height": 1080,
|
||||
"bitrate": 18000000,
|
||||
"hideSystemCursor": true
|
||||
},
|
||||
"audio": {
|
||||
"system": { "enabled": true },
|
||||
"microphone": {
|
||||
"enabled": true,
|
||||
"deviceId": "default",
|
||||
"deviceName": "MacBook Pro Microphone",
|
||||
"gain": 1.4
|
||||
}
|
||||
},
|
||||
"webcam": {
|
||||
"enabled": true,
|
||||
"deviceId": "default",
|
||||
"deviceName": "FaceTime HD Camera",
|
||||
"width": 1280,
|
||||
"height": 720,
|
||||
"fps": 30
|
||||
},
|
||||
"cursor": {
|
||||
"mode": "editable-overlay"
|
||||
},
|
||||
"outputs": {
|
||||
"screenPath": "/Users/me/Library/Application Support/openscreen/recordings/recording-123.mp4",
|
||||
"manifestPath": "/Users/me/Library/Application Support/openscreen/recordings/recording-123.session.json"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The helper emits newline-delimited JSON events to stdout:
|
||||
|
||||
```json
|
||||
{ "event": "ready", "schemaVersion": 1 }
|
||||
{ "event": "recording-started", "timestampMs": 1234567890 }
|
||||
{ "event": "warning", "code": "microphone-unavailable", "message": "..." }
|
||||
{ "event": "recording-stopped", "screenPath": "..." }
|
||||
{ "event": "error", "code": "screen-permission-denied", "message": "..." }
|
||||
```
|
||||
|
||||
## Implementation Phases
|
||||
|
||||
Current PR status: macOS screen/window capture routes through the ScreenCaptureKit helper when it is available so editable-cursor recordings can hide the system cursor. The helper now writes ScreenCaptureKit system audio into the primary MP4 and attempts runtime-gated native microphone capture on macOS versions that expose ScreenCaptureKit microphone output. Webcam capture is currently an Electron-recorded sidecar attached to the same recording session; native AVFoundation webcam composition remains the target end state.
|
||||
|
||||
### 1. Native Session Boundary
|
||||
|
||||
- Add a structured macOS native recording request type.
|
||||
- Add a macOS helper resolver and build script placeholders.
|
||||
- Keep the helper contract process-based, matching the Windows helper boundary.
|
||||
- Do not route production macOS recording through this helper until the helper is available and validated.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- TypeScript build passes.
|
||||
- The macOS helper path and request contract are documented and testable without affecting Windows/Linux behavior.
|
||||
|
||||
### 2. ScreenCaptureKit Display Capture
|
||||
|
||||
- Implement a Swift helper using ScreenCaptureKit.
|
||||
- Select display captures by `displayId`.
|
||||
- Encode H.264 MP4 through AVFoundation/VideoToolbox.
|
||||
- Set `showsCursor = false` when editable cursor overlay mode is selected.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Display-only recording produces a valid MP4.
|
||||
- The real cursor is not baked into editable-cursor recordings.
|
||||
|
||||
### 3. ScreenCaptureKit Window Capture
|
||||
|
||||
- Resolve Electron `window:*` selections to ScreenCaptureKit window ids.
|
||||
- Capture `SCContentFilter(desktopIndependentWindow:)`.
|
||||
- Handle closed/minimized/protected windows with explicit errors.
|
||||
- Keep window selection and capture source resolution in Electron/main, not the renderer.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Capturing a normal app window works with cursor/audio/webcam disabled.
|
||||
- Unsupported windows return clear native errors.
|
||||
|
||||
### 4. System Audio
|
||||
|
||||
- Enable ScreenCaptureKit system audio on supported macOS versions.
|
||||
- Keep audio format and timing owned by the helper.
|
||||
- Encode or mux AAC audio into the primary MP4.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- System-audio-only recordings produce a valid AAC track.
|
||||
- Unsupported macOS versions return an explicit capability error.
|
||||
|
||||
### 5. Microphone
|
||||
|
||||
- Resolve the selected microphone device from the renderer-provided browser `deviceId` and user-visible label.
|
||||
- Capture microphone audio in the helper timing domain.
|
||||
- Apply OpenScreen microphone gain policy.
|
||||
- Mix system and microphone audio before final AAC output.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Mic-only and mic-plus-system recordings produce a valid, balanced AAC track.
|
||||
- Device selection honors the selected microphone, not only the default device.
|
||||
|
||||
### 6. Webcam Composition
|
||||
|
||||
- Capture the selected camera natively through AVFoundation.
|
||||
- Match browser device id first where possible, then user-visible label.
|
||||
- Compose an initial picture-in-picture overlay into the primary MP4.
|
||||
- Hide webcam output until the first usable frame to avoid black startup flashes.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Native display/window recordings can include webcam without returning to Electron capture.
|
||||
- Selected camera is honored.
|
||||
|
||||
### 7. Runtime Controls
|
||||
|
||||
- Add pause/resume commands to the helper.
|
||||
- Add cancel command that removes partial outputs.
|
||||
- Keep restart as stop-discard-start until the helper exposes a native restart operation.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Pause/resume keeps output duration coherent.
|
||||
- Cancel leaves no stale media/session files.
|
||||
|
||||
### 8. Test Pipeline
|
||||
|
||||
- `npm run build:native:mac`: builds Swift helper binaries on macOS.
|
||||
- `npm run test:sck-helper:mac`: display-only helper smoke test.
|
||||
- `npm run test:sck-window:mac`: window capture smoke test.
|
||||
- `npm run test:sck-audio:mac`: system audio smoke test when supported.
|
||||
- `npm run test:sck-mic:mac`: microphone smoke test.
|
||||
- `npm run test:sck-webcam:mac`: webcam smoke test when a webcam is available.
|
||||
- Packaging check: confirms helpers are available under `electron/native/bin/darwin-${arch}` in packaged builds.
|
||||
|
||||
## SSOT Rules
|
||||
|
||||
- `src/lib/nativeMacRecording.ts` is the renderer/main TypeScript request contract.
|
||||
- This document is the feature-level contract and phase checklist.
|
||||
- The Swift helper owns ScreenCaptureKit/AVFoundation media timing.
|
||||
- Electron owns output paths, session manifests, and selected source/device resolution.
|
||||
- Renderer code must use existing hooks/client APIs and should not bind directly to helper process details.
|
||||
@@ -0,0 +1,77 @@
|
||||
# PaddleOCR Local Service
|
||||
|
||||
OpenScreen calls OCR through a local HTTP service. The default endpoint is:
|
||||
|
||||
```text
|
||||
http://127.0.0.1:8866/ocr
|
||||
```
|
||||
|
||||
The app sends either `imageBase64` or `path`, plus optional `language` and `profile`, and expects OCR blocks:
|
||||
|
||||
```json
|
||||
{
|
||||
"blocks": [
|
||||
{
|
||||
"text": "Settings",
|
||||
"confidence": 0.97,
|
||||
"box": { "x": 120, "y": 80, "width": 90, "height": 24 }
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Install
|
||||
|
||||
Use a separate virtual environment because PaddleOCR and PaddlePaddle are large dependencies.
|
||||
|
||||
```powershell
|
||||
python -m venv .venv-ocr
|
||||
.\.venv-ocr\Scripts\Activate.ps1
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install -r tools\ocr\requirements.txt
|
||||
```
|
||||
|
||||
If `paddle` is still missing after installing `paddleocr`, install the CPU PaddlePaddle wheel that matches your Python and OS from the official PaddlePaddle install guide.
|
||||
|
||||
## Run
|
||||
|
||||
```powershell
|
||||
.\.venv-ocr\Scripts\Activate.ps1
|
||||
$env:PADDLEOCR_DEVICE="cpu"
|
||||
$env:OPENSCREEN_OCR_PROFILE="vietnamese"
|
||||
npm run ocr:paddle
|
||||
```
|
||||
|
||||
Keep this terminal open while using the Guide OCR step in OpenScreen.
|
||||
|
||||
## Verify
|
||||
|
||||
```powershell
|
||||
Invoke-WebRequest http://127.0.0.1:8866/health -UseBasicParsing
|
||||
```
|
||||
|
||||
Expected healthy environment:
|
||||
|
||||
```json
|
||||
{
|
||||
"ok": true,
|
||||
"paddleocrInstalled": true,
|
||||
"paddleInstalled": true,
|
||||
"engineReady": false,
|
||||
"defaultLanguage": "vi,en",
|
||||
"defaultProfile": "vietnamese"
|
||||
}
|
||||
```
|
||||
|
||||
`engineReady` becomes `true` after the first OCR request. The first request can be slow because PaddleOCR downloads and loads models.
|
||||
|
||||
## Configuration
|
||||
|
||||
- `PADDLEOCR_DEVICE`: `cpu`, `gpu:0`, or another PaddleOCR device string.
|
||||
- `OPENSCREEN_OCR_PROFILE`: `fast`, `vietnamese`, or `hybrid`. The default `vietnamese` profile upscales and sharpens focused UI screenshots before OCR.
|
||||
- `OPENSCREEN_GUIDE_OCR_LANGUAGE`: defaults to `vi,en`.
|
||||
- `PADDLEOCR_LANG`: optional hard override. Leave unset for the app profile/language settings to work.
|
||||
- `PADDLEOCR_VERSION`: defaults to `PP-OCRv5`.
|
||||
- `PADDLEOCR_USE_MOBILE`: defaults to `1`; set to `0` to use the default/server models.
|
||||
- `PADDLEOCR_REC_MODEL`: optional recognizer model override. The bundled profile uses `latin_PP-OCRv5_mobile_rec`, which supports Vietnamese Latin-script text.
|
||||
- `OPENSCREEN_GUIDE_OCR_URL`: OpenScreen OCR endpoint override; defaults to `http://127.0.0.1:8866`.
|
||||
@@ -0,0 +1,248 @@
|
||||
# Windows Native Recorder Roadmap
|
||||
|
||||
OpenScreen's Windows recorder should be owned by one native backend. Electron capture can remain available for non-Windows platforms and temporary developer diagnostics, but Windows production recording should not silently fall back to `getDisplayMedia` / `MediaRecorder`.
|
||||
|
||||
## Goals
|
||||
|
||||
- Capture displays and windows through Windows Graphics Capture (WGC).
|
||||
- Render the native Windows cursor as OpenScreen's high-quality scalable cursor overlay.
|
||||
- Capture system audio through WASAPI loopback.
|
||||
- Capture microphone audio through WASAPI.
|
||||
- Mix system audio and microphone audio into the primary screen recording.
|
||||
- Capture webcam video natively and compose it into the Windows helper MP4 during the native-recording migration.
|
||||
- Keep preview/export aligned because screen video, audio, webcam, and cursor share one native timing origin.
|
||||
- Keep exported MP4s Windows-friendly: H.264 video plus AAC audio. Opus-in-MP4 is not an acceptable Windows export target.
|
||||
- Package the native helper with the Windows app.
|
||||
|
||||
## Non-Goals
|
||||
|
||||
- Replacing the editor/export pipeline.
|
||||
- Replacing the editor/export pipeline. A later pass can reintroduce a separate editable native `webcamVideoPath`; the current Windows-native milestone prioritizes a helper-owned multi-flux MP4 with deterministic screen/audio/mic/webcam sync.
|
||||
- Adding a native fallback for macOS or Linux in this branch.
|
||||
|
||||
## Target Architecture
|
||||
|
||||
The renderer keeps the existing recording controls. On Windows, `useScreenRecorder` sends a complete recording request to Electron and does not assemble Windows `MediaStream` tracks with `MediaRecorder`.
|
||||
|
||||
Electron owns the native recording session:
|
||||
|
||||
- resolves the selected source;
|
||||
- resolves output paths;
|
||||
- starts cursor sampling;
|
||||
- starts the helper process;
|
||||
- sends pause/resume/stop/cancel commands;
|
||||
- writes `RecordingSession` manifests;
|
||||
- reports explicit errors when a Windows-native capability is unavailable.
|
||||
|
||||
The helper owns Windows media capture:
|
||||
|
||||
- WGC screen/window frames;
|
||||
- WASAPI system loopback;
|
||||
- WASAPI microphone input;
|
||||
- Media Foundation webcam capture;
|
||||
- DirectShow webcam fallback for virtual cameras not visible to Media Foundation;
|
||||
- Media Foundation encoding/muxing;
|
||||
- stream timestamp normalization.
|
||||
|
||||
## Helper Contract V2
|
||||
|
||||
The helper receives a single JSON argument:
|
||||
|
||||
```json
|
||||
{
|
||||
"schemaVersion": 2,
|
||||
"recordingId": 1234567890,
|
||||
"source": {
|
||||
"type": "display",
|
||||
"sourceId": "screen:0:0",
|
||||
"displayId": 123,
|
||||
"windowHandle": null,
|
||||
"bounds": { "x": 0, "y": 0, "width": 1920, "height": 1080 }
|
||||
},
|
||||
"video": {
|
||||
"fps": 60,
|
||||
"width": 1920,
|
||||
"height": 1080,
|
||||
"bitrate": 18000000
|
||||
},
|
||||
"audio": {
|
||||
"system": { "enabled": true },
|
||||
"microphone": { "enabled": true, "deviceId": "default", "gain": 1.4 }
|
||||
},
|
||||
"webcam": {
|
||||
"enabled": true,
|
||||
"deviceId": "default",
|
||||
"deviceName": "Camera (NVIDIA Broadcast)",
|
||||
"width": 1280,
|
||||
"height": 720,
|
||||
"fps": 30,
|
||||
"bitrate": 18000000
|
||||
},
|
||||
"outputs": {
|
||||
"screenPath": "C:\\Users\\me\\recording-123.mp4",
|
||||
"manifestPath": "C:\\Users\\me\\recording-123.session.json"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The helper emits newline-delimited JSON events to stdout:
|
||||
|
||||
```json
|
||||
{ "event": "ready", "schemaVersion": 2 }
|
||||
{ "event": "recording-started", "timestampMs": 1234567890 }
|
||||
{ "event": "warning", "code": "audio-device-unavailable", "message": "..." }
|
||||
{ "event": "recording-stopped", "screenPath": "..." }
|
||||
{ "event": "error", "code": "unsupported-window-source", "message": "..." }
|
||||
```
|
||||
|
||||
During migration, Electron also accepts the current textual helper messages so existing display-only smoke tests keep working.
|
||||
|
||||
## Implementation Phases
|
||||
|
||||
### 1. Native Session Boundary
|
||||
|
||||
- Add a structured Windows native recording request type.
|
||||
- Pass source kind, audio flags, microphone device, webcam flags, and output paths into the helper.
|
||||
- On Windows, do not silently fall back to Electron capture. If the helper is unavailable or a native feature is missing, show a clear error.
|
||||
- Keep Electron fallback only for non-Windows and optional developer diagnostics.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Display-only recording still works.
|
||||
- Enabling an unsupported native feature returns an explicit native error instead of recording through Electron.
|
||||
|
||||
### 2. WASAPI System Audio
|
||||
|
||||
Status: initial implementation landed. The helper captures the default render endpoint with WASAPI loopback, passes the runtime mix format into `MFEncoder`, and muxes AAC audio into the primary MP4. Long-run drift correction and explicit silence insertion remain follow-up hardening work.
|
||||
|
||||
- Add `WasapiLoopbackCapture`.
|
||||
- Capture the default render endpoint in shared loopback mode.
|
||||
- Keep `WasapiLoopbackCapture` responsible only for device activation, packet capture, and packet timestamps.
|
||||
- Keep `MFEncoder` responsible for all Media Foundation stream definitions and muxing.
|
||||
- Feed the endpoint mix format into `MFEncoder` as the single source of truth for audio stream shape: sample rate, channel count, bits per sample, block alignment, average bytes/sec, and subtype (`PCM` or `Float`).
|
||||
- Encode the primary screen MP4 with H.264 video and AAC audio through one `IMFSinkWriter`.
|
||||
- Timestamp audio from the captured frame count in 100ns units. The first implementation uses the WASAPI packet timeline; later drift correction will add explicit silence or resampling if long recordings show measurable clock skew.
|
||||
- Treat microphone mixing as a later phase. System loopback must land first without introducing renderer-side audio code.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Screen MP4 has an AAC audio track when system audio is enabled.
|
||||
- A 5-minute recording has audio/video duration drift below one frame.
|
||||
|
||||
SSOT rules for this phase:
|
||||
|
||||
- `src/lib/nativeWindowsRecording.ts` is the renderer/main TypeScript request contract.
|
||||
- `docs/engineering/windows-native-recorder-roadmap.md` is the feature-level contract and phase checklist.
|
||||
- `WgcSession::captureWidth()/captureHeight()` is the encoded screen frame size until a dedicated native scaling stage exists.
|
||||
- `WasapiLoopbackCapture::inputFormat()` is the runtime audio format source used by `MFEncoder`.
|
||||
- The renderer passes both the browser webcam `deviceId` and selected display label as `deviceName`; `electron/native/wgc-capture/src/webcam_capture.*` is the only place that maps those values to Media Foundation devices.
|
||||
- Electron resolves the selected label to a DirectShow filter CLSID once and passes it as `webcamDirectShowClsid`; the helper must not independently guess among DirectShow filters.
|
||||
- No duplicated hard-coded audio format assumptions in `main.cpp`.
|
||||
|
||||
### 3. WASAPI Microphone
|
||||
|
||||
Status: initial implementation in progress. The helper can open the default WASAPI capture endpoint, apply the OpenScreen microphone gain, encode mic-only audio, and mix system loopback plus microphone through a single queued `AudioMixer` timeline when both endpoints expose the same runtime format. Audio endpoints are warmed before WGC starts, the mixer drops pre-roll and begins its paced timeline on the first encoded video frame, then cuts queued tail audio on stop so the MP4 does not drift past the video. Browser `deviceId` to MMDevice id mapping, resampling between mismatched endpoint formats, and drift correction remain follow-up hardening work.
|
||||
|
||||
- Add microphone device enumeration and stable device-id mapping.
|
||||
- Capture selected/default microphone through WASAPI.
|
||||
- Apply OpenScreen's current mic gain policy.
|
||||
- Mix microphone and system audio before AAC encoding.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Mic-only, system-only, and mixed audio recordings produce a valid AAC track.
|
||||
- Device unplug/permission failure produces an explicit error or warning.
|
||||
|
||||
### 4. Webcam Capture
|
||||
|
||||
- Add Media Foundation webcam source reader.
|
||||
- Select requested dimensions/fps or the nearest format accepted by Media Foundation.
|
||||
- Convert webcam samples to BGRA and compose them into the primary helper MP4 as an initial bottom-right picture-in-picture overlay.
|
||||
- Ignore black webcam warmup frames and keep the overlay hidden until the first visible frame is available, so virtual cameras do not flash a black picture-in-picture rectangle at recording start.
|
||||
- Keep the helper process as the SSOT for screen/window, WASAPI system audio, microphone, webcam, and mux timing.
|
||||
- Match the requested webcam through Media Foundation friendly names first, then browser device ids/symbolic links, so UI selection remains stable across Chromium and Windows native device namespaces.
|
||||
- Use the Electron-resolved DirectShow CLSID when the selected virtual camera, for example NVIDIA Broadcast, is registered for DirectShow but absent from Media Foundation enumeration.
|
||||
- Later: promote the same webcam capture source to a separate editable native `webcamVideoPath` if product requirements need post-recording layout edits.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Native display/window recordings can include webcam without returning to Electron capture.
|
||||
- `npm run test:wgc-webcam:win` validates the helper path when a webcam is available and skips explicitly when no webcam device exists.
|
||||
- Combined webcam + system audio + microphone produces one MP4 with H.264 video and AAC audio.
|
||||
|
||||
### 5. Native Window Capture
|
||||
|
||||
Status: initial implementation in progress. Electron parses the `window:<HWND>:...` desktop source id through the shared native Windows recording contract and passes `windowHandle` to the helper. The helper resolves the `HWND`, validates it with `IsWindow`, and creates the WGC item with `CreateForWindow(HWND)`. Resize/minimize/move hardening and protected-window diagnostics remain follow-up work.
|
||||
|
||||
- Resolve Electron `window:*` selections to an `HWND`.
|
||||
- Use WGC `CreateForWindow(HWND)`.
|
||||
- Handle window close, minimize, resize, DPI scaling, and monitor moves.
|
||||
- Return clear errors for unsupported protected windows.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Capturing a normal app window works with cursor/audio/mic/webcam.
|
||||
- Window resize and movement do not corrupt the recording.
|
||||
|
||||
### 6. Runtime Controls
|
||||
|
||||
- Add pause/resume commands to the helper.
|
||||
- Add cancel command that removes partial screen/webcam outputs.
|
||||
- Keep restart as stop-discard-start from Electron until the helper supports a native restart event.
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Pause/resume keeps preview duration coherent.
|
||||
- Cancel leaves no stale media/session/cursor files.
|
||||
|
||||
### 7. Test Pipeline
|
||||
|
||||
- `npm run test:wgc-helper:win`: display-only helper smoke test.
|
||||
- `npm run test:wgc-audio:win`: validates AAC track presence and duration.
|
||||
- `npm run test:wgc-window:win`: captures a fixture window by HWND.
|
||||
- `npm run test:wgc-webcam:win`: validates webcam output when a webcam is available, otherwise skips explicitly.
|
||||
- Packaging check: confirms the helper is in `app.asar.unpacked`.
|
||||
- Export check: exported MP4s generated from native recordings keep an AAC audio track when the source has audio.
|
||||
- `npm run test:wgc-mic:win`: validates default-microphone capture writes an AAC track when an input endpoint is available.
|
||||
- `npm run test:wgc-mixed-audio:win`: validates system loopback plus microphone writes one mixed AAC track when endpoint formats are compatible.
|
||||
|
||||
## Backlog
|
||||
|
||||
### Native Cursor Click Bounce Is Not Visibly Applied
|
||||
|
||||
Status: open. Do not treat Windows native cursor `Click Bounce` as shipped.
|
||||
|
||||
Problem:
|
||||
|
||||
- The cursor settings UI exposes `Size`, `Smoothing`, `Motion Blur`, and `Click Bounce`.
|
||||
- On Windows native cursor recordings, `Size`, `Smoothing`, and `Motion Blur` are visibly applied in preview/export.
|
||||
- `Click Bounce` still has no visible effect in manual packaged-app testing, even after adding click-related sample metadata.
|
||||
|
||||
What has already been tried:
|
||||
|
||||
- Added `interactionType: "click" | "mouseup" | "move"` to native cursor samples.
|
||||
- Added polling-based left-button state through `GetAsyncKeyState`.
|
||||
- Added the `GetAsyncKeyState` low-bit path to catch quick clicks between samples.
|
||||
- Added a PowerShell/C# `WH_MOUSE_LL` mouse hook experiment and launched the sampler through a temporary `.ps1` file to avoid Windows command-line length limits.
|
||||
- Updated `npm run test:cursor-native:win` so the diagnostic can observe a synthetic short click and emit `clickSampleCount`.
|
||||
|
||||
Current diagnosis:
|
||||
|
||||
- The diagnostic can observe synthetic click events, but this has not translated into a visible `Click Bounce` effect in the real packaged app.
|
||||
- The test currently proves that some click metadata can be recorded, not that the full OpenScreen record -> preview -> export path displays a bounce at the expected time.
|
||||
- The current native implementation may be animating from metadata that is not present in the real recording session, may be using the wrong timestamp origin, or may be applying a scale change too subtle to notice on the DOM/native cursor path.
|
||||
|
||||
Next investigation when resumed:
|
||||
|
||||
- Inspect the actual `.cursor.json`/session sidecar generated by a packaged-app manual recording and confirm whether real clicks produce `interactionType: "click"` at the right `timeMs`.
|
||||
- Add a targeted end-to-end fixture that records a known click, loads the generated project, and asserts the preview/export cursor scale changes across adjacent frames.
|
||||
- Compare the native DOM cursor path against the older `PixiCursorOverlay` click visual state and decide whether native cursor bounce should be a scale-only animation, an additional click ring, or a short explicit keyframe animation independent of sample cadence.
|
||||
- If event capture remains unreliable in the PowerShell sampler, move click events into a small native cursor helper instead of PowerShell/C# script injection.
|
||||
|
||||
## Ship Criteria
|
||||
|
||||
- Windows display capture works with cursor, system audio, microphone, and webcam.
|
||||
- Windows window capture works with cursor, system audio, microphone, and webcam.
|
||||
- Preview and export show no cursor position drift.
|
||||
- Preview and export show no measurable audio/video/webcam drift.
|
||||
- Windows production builds do not depend on Electron capture fallback.
|
||||
@@ -0,0 +1,84 @@
|
||||
# Windows Private Trust Signing
|
||||
|
||||
OpenScreen supports Microsoft Trusted Signing private trust profiles for Windows
|
||||
builds. Secrets and signing resource names are read from environment variables;
|
||||
no certificate, client secret, or API key should be committed.
|
||||
|
||||
For a local signing machine, copy `.env.signing.example` to
|
||||
`.env.signing.local` and fill in values there. `.env.signing.local` is ignored
|
||||
by Git. Explicit shell environment variables override values in that local file.
|
||||
|
||||
## Required Azure Resource Variables
|
||||
|
||||
Set these values for the Trusted Signing account and certificate profile:
|
||||
|
||||
```powershell
|
||||
$env:AZURE_TRUSTED_SIGNING_ENDPOINT = "https://<region>.codesigning.azure.net/"
|
||||
$env:AZURE_TRUSTED_SIGNING_ACCOUNT_NAME = "<trusted-signing-account-name>"
|
||||
$env:AZURE_TRUSTED_SIGNING_CERTIFICATE_PROFILE_NAME = "<private-trust-profile-name>"
|
||||
$env:AZURE_TRUSTED_SIGNING_PUBLISHER_NAME = "<certificate-common-name>"
|
||||
```
|
||||
|
||||
`AZURE_TRUSTED_SIGNING_CERTIFICATE_PROFILE_NAME` must point to a certificate
|
||||
profile created with the `PrivateTrust` profile type.
|
||||
|
||||
## Required Azure Auth Variables
|
||||
|
||||
Electron Builder uses Azure environment credentials. Set the tenant and client:
|
||||
|
||||
```powershell
|
||||
$env:AZURE_TENANT_ID = "<tenant-id>"
|
||||
$env:AZURE_CLIENT_ID = "<app-registration-client-id>"
|
||||
```
|
||||
|
||||
Then set one authentication mode. Service principal secret is the simplest for
|
||||
local signing:
|
||||
|
||||
```powershell
|
||||
$env:AZURE_CLIENT_SECRET = "<client-secret>"
|
||||
```
|
||||
|
||||
Certificate auth is also supported:
|
||||
|
||||
```powershell
|
||||
$env:AZURE_CLIENT_CERTIFICATE_PATH = "C:\secure\signing-auth.pfx"
|
||||
$env:AZURE_CLIENT_CERTIFICATE_PASSWORD = "<pfx-password>"
|
||||
```
|
||||
|
||||
## Sign Existing Installer
|
||||
|
||||
This signs the installer already built at
|
||||
`release/<version>/Openscreen Setup <version>.exe`:
|
||||
|
||||
```powershell
|
||||
npm run sign:win:private-trust
|
||||
```
|
||||
|
||||
To sign a specific file:
|
||||
|
||||
```powershell
|
||||
npm run sign:win:private-trust -- --file "D:\Code\OpenScreen\release\1.4.0\Openscreen Setup 1.4.0.exe"
|
||||
```
|
||||
|
||||
## Build And Sign
|
||||
|
||||
This signs the packaged app executable, bundled OCR service executable, and NSIS
|
||||
installer during the Windows build:
|
||||
|
||||
```powershell
|
||||
npm run build:win:private-trust
|
||||
```
|
||||
|
||||
The regular `npm run build:win` remains unsigned for local development builds.
|
||||
|
||||
## Verification
|
||||
|
||||
After signing:
|
||||
|
||||
```powershell
|
||||
Get-AuthenticodeSignature "release\1.4.0\Openscreen Setup 1.4.0.exe" | Format-List
|
||||
```
|
||||
|
||||
Private trust signatures are valid only on machines that trust the private trust
|
||||
certificate chain/publisher. For public downloads that must be trusted on any
|
||||
Windows machine, use a public trust certificate profile instead.
|
||||
@@ -0,0 +1,130 @@
|
||||
# Windows native cursor test pipeline
|
||||
|
||||
This branch includes two Windows-focused diagnostics for fast iteration on native cursor capture and rendering. They are intentionally local developer tools: they create short videos and JSON reports so cursor changes can be inspected without doing a full manual record/edit/export cycle.
|
||||
|
||||
## Native sampler diagnostic
|
||||
|
||||
```powershell
|
||||
npm run test:cursor-native:win
|
||||
```
|
||||
|
||||
This script does not launch OpenScreen. It:
|
||||
|
||||
- starts a Windows `GetCursorInfo` sampler
|
||||
- moves the real OS pointer with `SetCursorPos`
|
||||
- captures native cursor handles, hotspots, assets, and standard `IDC_*` cursor types
|
||||
- writes normalized `CursorRecordingData`
|
||||
- generates an abstract preview video
|
||||
- generates a real-screen preview video using screenshots of the current desktop
|
||||
|
||||
The output directory is printed in the command result, for example:
|
||||
|
||||
```text
|
||||
C:\Users\<user>\AppData\Local\Temp\openscreen-cursor-native-...
|
||||
```
|
||||
|
||||
Useful files:
|
||||
|
||||
- `report.json`: sample counts, asset counts, cursor handles, and generated artifact paths
|
||||
- `cursor-recording-data.json`: sidecar-compatible cursor data
|
||||
- `preview.webm`: abstract path/asset/hotspot preview
|
||||
- `real-capture-preview.webm`: real desktop screenshot background with reconstructed cursor overlay
|
||||
- `assets/*.png`: raw cursor bitmaps captured from Windows
|
||||
|
||||
Environment overrides:
|
||||
|
||||
```powershell
|
||||
$env:CURSOR_TEST_DURATION_MS = "3000"
|
||||
$env:CURSOR_TEST_SAMPLE_INTERVAL_MS = "16"
|
||||
$env:CURSOR_TEST_SCREEN_FRAME_INTERVAL_MS = "80"
|
||||
$env:CURSOR_TEST_OUTPUT_DIR = "C:\temp\openscreen-cursor-test"
|
||||
npm run test:cursor-native:win
|
||||
```
|
||||
|
||||
## OpenScreen preview capture
|
||||
|
||||
```powershell
|
||||
npm run capture:openscreen-preview
|
||||
```
|
||||
|
||||
This script launches the real Electron app, injects a fixture video plus cursor sidecar data, opens the editor, captures frames from the actual OpenScreen preview UI, and encodes them into a WebM.
|
||||
|
||||
By default it uses the latest `cursor-recording-data.json` generated by `npm run test:cursor-native:win`. To force a specific sidecar:
|
||||
|
||||
```powershell
|
||||
$env:CURSOR_RECORDING_DATA_PATH = "C:\path\to\cursor-recording-data.json"
|
||||
npm run capture:openscreen-preview
|
||||
```
|
||||
|
||||
Useful environment overrides:
|
||||
|
||||
```powershell
|
||||
$env:OPENSCREEN_PREVIEW_SKIP_BUILD = "true"
|
||||
$env:OPENSCREEN_PREVIEW_FRAME_COUNT = "120"
|
||||
$env:OPENSCREEN_PREVIEW_FPS = "30"
|
||||
$env:OPENSCREEN_PREVIEW_OUTPUT_DIR = "C:\temp\openscreen-preview"
|
||||
npm run capture:openscreen-preview
|
||||
```
|
||||
|
||||
Useful files:
|
||||
|
||||
- `openscreen-preview.webm`: video of the real OpenScreen editor preview
|
||||
- `frames/*.png`: captured preview frames
|
||||
- `report.json`: fixture paths, source sidecar, frame count, and output path
|
||||
|
||||
## What these tests validate
|
||||
|
||||
Together, the scripts make it quick to inspect:
|
||||
|
||||
- whether Windows cursor samples are visible and continuous
|
||||
- whether native hotspots stay anchored when scaling to `3x`
|
||||
- whether standard Windows cursors are recognized via `IDC_*`
|
||||
- whether high-quality SVG cursor replacements follow the native hotspot
|
||||
- whether the real OpenScreen preview renders the same cursor behavior as the diagnostic pipeline
|
||||
|
||||
They are not a full substitute for an end-to-end manual recording pass. Before shipping cursor changes, also test a real capture session and export from the packaged app.
|
||||
|
||||
## Known Gap
|
||||
|
||||
Windows native cursor `Click Bounce` is currently backlogged. `Size`, `Smoothing`, and `Motion Blur` can be validated through preview/export, but `Click Bounce` has not shown a visible effect in packaged-app manual testing. The current diagnostic can observe synthetic click metadata, but that is not enough to validate the real OpenScreen record -> preview -> export path.
|
||||
|
||||
Track the open item in `docs/engineering/windows-native-recorder-roadmap.md` under `Native Cursor Click Bounce Is Not Visibly Applied`.
|
||||
|
||||
## Native Windows capture backend
|
||||
|
||||
The app now routes Windows recordings through an external WGC helper instead of Electron `getDisplayMedia`. This is meant to remove the coordinate and clock split that made the reconstructed cursor drift in the preview/export path.
|
||||
|
||||
Current native availability rules:
|
||||
|
||||
- Windows 10 build 19041 or newer
|
||||
- a helper executable is available
|
||||
|
||||
The helper currently implements display/window video capture, system audio loopback, default microphone capture, Media Foundation webcam capture, and DirectShow fallback for selected virtual cameras such as NVIDIA Broadcast. Webcam frames are composed into the primary MP4 as a bottom-right picture-in-picture overlay, and black webcam warmup frames are ignored until the first visible frame is available.
|
||||
|
||||
Build OpenScreen's helper locally:
|
||||
|
||||
```powershell
|
||||
npm run build:native:win
|
||||
```
|
||||
|
||||
Smoke-test the helper directly:
|
||||
|
||||
```powershell
|
||||
npm run test:wgc-helper:win
|
||||
npm run test:wgc-helper:win -- --capture-cursor
|
||||
npm run test:wgc-window:win
|
||||
npm run test:wgc-audio:win
|
||||
npm run test:wgc-mic:win
|
||||
npm run test:wgc-mixed-audio:win
|
||||
npm run test:wgc-webcam:win
|
||||
```
|
||||
|
||||
For local diagnostics with another compatible helper, point OpenScreen at that executable:
|
||||
|
||||
```powershell
|
||||
$env:OPENSCREEN_WGC_CAPTURE_EXE = "C:\path\to\wgc-capture.exe"
|
||||
npm run build-vite
|
||||
npm run dev
|
||||
```
|
||||
|
||||
The helper receives one JSON config argument, emits JSON lifecycle events, prints the legacy `Recording started` marker, accepts `stop` on stdin, and prints `Recording stopped. Output path: <path>`. See `electron/native/README.md` for the exact contract and build output paths.
|
||||
@@ -0,0 +1,149 @@
|
||||
# Writing Tests
|
||||
|
||||
This project uses [Vitest](https://vitest.dev/) for both unit/integration tests and browser tests. There are two separate configs — each targets a different set of files.
|
||||
|
||||
## Unit tests
|
||||
|
||||
**Config:** `vitest.config.ts`
|
||||
**Runs in:** jsdom (simulated DOM, no real browser)
|
||||
**File pattern:** `src/**/*.test.ts` — anything that does **not** end in `.browser.test.ts`
|
||||
**CI command:** `npm run test`
|
||||
|
||||
Use unit tests for pure logic, utility functions, data transformations, and anything that doesn't need real browser APIs (Canvas, WebCodecs, MediaRecorder, etc.).
|
||||
|
||||
### File placement
|
||||
|
||||
Co-locate the test file next to the source file, or put it in a `__tests__/` folder in the same directory.
|
||||
|
||||
```
|
||||
src/lib/compositeLayout.ts
|
||||
src/lib/compositeLayout.test.ts # co-located
|
||||
|
||||
src/i18n/__tests__/tutorialHelpTranslations.test.ts # grouped
|
||||
```
|
||||
|
||||
### Example
|
||||
|
||||
```ts
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { computeCompositeLayout } from "./compositeLayout";
|
||||
|
||||
describe("computeCompositeLayout", () => {
|
||||
it("anchors the overlay in the lower-right corner", () => {
|
||||
const layout = computeCompositeLayout({
|
||||
canvasSize: { width: 1920, height: 1080 },
|
||||
screenSize: { width: 1920, height: 1080 },
|
||||
webcamSize: { width: 1280, height: 720 },
|
||||
});
|
||||
|
||||
expect(layout).not.toBeNull();
|
||||
expect(layout!.webcamRect!.x).toBeGreaterThan(1920 / 2);
|
||||
expect(layout!.webcamRect!.y).toBeGreaterThan(1080 / 2);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### Path aliases
|
||||
|
||||
The `@/` alias resolves to `src/`. Use it for imports that would otherwise need long relative paths.
|
||||
|
||||
```ts
|
||||
import { SUPPORTED_LOCALES } from "@/i18n/config";
|
||||
```
|
||||
|
||||
### Running locally
|
||||
|
||||
```bash
|
||||
npm run test # run once
|
||||
npm run test:watch # watch mode
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Browser tests
|
||||
|
||||
**Config:** `vitest.browser.config.ts`
|
||||
**Runs in:** real Chromium via Playwright (headless)
|
||||
**File pattern:** `src/**/*.browser.test.ts`
|
||||
**CI commands:** `npm run test:browser:install` then `npm run test:browser`
|
||||
|
||||
Use browser tests when the code under test depends on real browser APIs that jsdom doesn't implement: `VideoDecoder`, `VideoEncoder`, `MediaRecorder`, `OffscreenCanvas`, `WebGL`, etc.
|
||||
|
||||
### File placement
|
||||
|
||||
Name the file `<subject>.browser.test.ts` and place it next to the source file.
|
||||
|
||||
```
|
||||
src/lib/exporter/videoExporter.ts
|
||||
src/lib/exporter/videoExporter.browser.test.ts
|
||||
```
|
||||
|
||||
### Loading fixture assets
|
||||
|
||||
Static assets (video files, images) live in `tests/fixtures/`. Import them with Vite's `?url` suffix so Vite serves them through the dev server.
|
||||
|
||||
```ts
|
||||
import sampleVideoUrl from "../../../tests/fixtures/sample.webm?url";
|
||||
```
|
||||
|
||||
### Example
|
||||
|
||||
```ts
|
||||
import { describe, expect, it } from "vitest";
|
||||
import sampleVideoUrl from "../../../tests/fixtures/sample.webm?url";
|
||||
import { VideoExporter } from "./videoExporter";
|
||||
|
||||
describe("VideoExporter (real browser)", () => {
|
||||
it("exports a valid MP4 blob from a real video", async () => {
|
||||
const exporter = new VideoExporter({
|
||||
videoUrl: sampleVideoUrl,
|
||||
width: 320,
|
||||
height: 180,
|
||||
frameRate: 15,
|
||||
bitrate: 1_000_000,
|
||||
wallpaper: "#1a1a2e",
|
||||
zoomRegions: [],
|
||||
showShadow: false,
|
||||
shadowIntensity: 0,
|
||||
showBlur: false,
|
||||
cropRegion: { x: 0, y: 0, width: 1, height: 1 },
|
||||
});
|
||||
|
||||
const result = await exporter.export();
|
||||
|
||||
expect(result.success, result.error).toBe(true);
|
||||
expect(result.blob).toBeInstanceOf(Blob);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### Timeouts
|
||||
|
||||
Browser tests have a default timeout of 120 seconds per test and 30 seconds per hook (set in `vitest.browser.config.ts`). Export operations are slow — prefer small fixture dimensions (320×180) and low bitrates to keep tests fast.
|
||||
|
||||
### Running locally
|
||||
|
||||
First install the browser (one-time):
|
||||
|
||||
```bash
|
||||
npm run test:browser:install
|
||||
```
|
||||
|
||||
Then run the tests:
|
||||
|
||||
```bash
|
||||
npm run test:browser
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Choosing the right type
|
||||
|
||||
| Situation | Use |
|
||||
|---|---|
|
||||
| Pure function / data transformation | Unit test |
|
||||
| i18n key coverage | Unit test |
|
||||
| React hook logic (no real browser APIs) | Unit test |
|
||||
| `VideoDecoder` / `VideoEncoder` / `MediaRecorder` | Browser test |
|
||||
| `OffscreenCanvas` / WebGL / Pixi.js rendering | Browser test |
|
||||
| File export producing a real `Blob` | Browser test |
|
||||
@@ -0,0 +1,109 @@
|
||||
// @see - https://www.electron.build/configuration/configuration
|
||||
{
|
||||
"$schema": "https://raw.githubusercontent.com/electron-userland/electron-builder/master/packages/app-builder-lib/scheme.json",
|
||||
"appId": "com.siddharthvaddem.openscreen",
|
||||
"asar": true,
|
||||
// .node binaries cannot be loaded from inside an asar; keep them unpacked.
|
||||
"asarUnpack": [
|
||||
"**/*.node"
|
||||
],
|
||||
"productName": "Openscreen",
|
||||
"toolsets": {
|
||||
"winCodeSign": "1.1.0"
|
||||
},
|
||||
"npmRebuild": true,
|
||||
"buildDependenciesFromSource": true,
|
||||
"compression": "normal",
|
||||
"directories": {
|
||||
"output": "release/${version}"
|
||||
},
|
||||
"files": [
|
||||
"dist",
|
||||
"dist-electron",
|
||||
"!*.png",
|
||||
"!preview*.png",
|
||||
"!*.md",
|
||||
"!README.md",
|
||||
"!CONTRIBUTING.md",
|
||||
"!LICENSE"
|
||||
],
|
||||
// Asset layout contract: "wallpapers/" under resourcesPath must align with
|
||||
// assetBaseDir in electron/preload.ts (packaged branch).
|
||||
"extraResources": [
|
||||
{
|
||||
"from": "public/wallpapers",
|
||||
"to": "wallpapers"
|
||||
}
|
||||
],
|
||||
|
||||
"mac": {
|
||||
"notarize": false,
|
||||
"hardenedRuntime": true,
|
||||
"entitlements": "macos.entitlements",
|
||||
"entitlementsInherit": "macos.entitlements",
|
||||
"target": [
|
||||
{
|
||||
"target": "dmg",
|
||||
"arch": ["x64", "arm64"]
|
||||
}
|
||||
],
|
||||
"icon": "icons/icons/mac/icon.icns",
|
||||
"artifactName": "${productName}-Mac-${arch}-${version}-Installer.${ext}",
|
||||
"extraResources": [
|
||||
{
|
||||
"from": "electron/native/bin",
|
||||
"to": "electron/native/bin",
|
||||
"filter": ["darwin-*/*"]
|
||||
}
|
||||
],
|
||||
"extendInfo": {
|
||||
"NSAudioCaptureUsageDescription": "OpenScreen needs audio capture permission to record system audio.",
|
||||
"NSMicrophoneUsageDescription": "OpenScreen needs microphone access to record voice audio.",
|
||||
"NSCameraUsageDescription": "OpenScreen needs camera access to record webcam video.",
|
||||
"NSScreenCaptureUsageDescription": "OpenScreen needs screen recording permission to detect and capture windows.",
|
||||
"NSCameraUseContinuityCameraDeviceType": true
|
||||
}
|
||||
},
|
||||
"linux": {
|
||||
"target": [
|
||||
"AppImage",
|
||||
"deb",
|
||||
"pacman"
|
||||
],
|
||||
"icon": "icons/icons/png",
|
||||
"artifactName": "${productName}-Linux-${version}.${ext}",
|
||||
"category": "AudioVideo"
|
||||
},
|
||||
"win": {
|
||||
"target": [
|
||||
"nsis"
|
||||
],
|
||||
"icon": "icons/icons/win/icon.ico",
|
||||
"requestedExecutionLevel": "requireAdministrator",
|
||||
"signAndEditExecutable": false,
|
||||
"signExts": ["!.exe"],
|
||||
"extraResources": [
|
||||
{
|
||||
"from": "electron/native/bin",
|
||||
"to": "electron/native/bin",
|
||||
"filter": ["win32-*/*"]
|
||||
},
|
||||
{
|
||||
"from": "tools/ocr/dist/openscreen-ocr-service",
|
||||
"to": "ocr-service",
|
||||
"filter": ["**/*"]
|
||||
},
|
||||
{
|
||||
"from": "tools/ocr/models/paddlex",
|
||||
"to": "ocr-models/paddlex",
|
||||
"filter": ["**/*"]
|
||||
}
|
||||
]
|
||||
},
|
||||
"nsis": {
|
||||
"oneClick": false,
|
||||
"allowToChangeInstallationDirectory": true,
|
||||
"perMachine": true,
|
||||
"include": "build/installer.nsh"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
const fs = require("node:fs");
|
||||
const path = require("node:path");
|
||||
const JSON5 = require("json5");
|
||||
|
||||
function loadLocalSigningEnv() {
|
||||
const envPath = path.join(__dirname, ".env.signing.local");
|
||||
if (!fs.existsSync(envPath)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const lines = fs.readFileSync(envPath, "utf8").split(/\r?\n/);
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed || trimmed.startsWith("#")) {
|
||||
continue;
|
||||
}
|
||||
const match = trimmed.match(/^([A-Za-z_][A-Za-z0-9_]*)=(.*)$/);
|
||||
if (!match || process.env[match[1]]) {
|
||||
continue;
|
||||
}
|
||||
process.env[match[1]] = match[2].replace(/^['"]|['"]$/g, "");
|
||||
}
|
||||
}
|
||||
|
||||
function readBaseConfig() {
|
||||
const configPath = path.join(__dirname, "electron-builder.json5");
|
||||
return JSON5.parse(fs.readFileSync(configPath, "utf8"));
|
||||
}
|
||||
|
||||
function requireEnv(name) {
|
||||
const value = process.env[name]?.trim();
|
||||
if (!value) {
|
||||
throw new Error(`Missing required environment variable: ${name}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function requireAnyEnv(names) {
|
||||
for (const name of names) {
|
||||
const value = process.env[name]?.trim();
|
||||
if (value) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
throw new Error(`Missing required environment variable: ${names.join(" or ")}`);
|
||||
}
|
||||
|
||||
loadLocalSigningEnv();
|
||||
|
||||
const config = readBaseConfig();
|
||||
|
||||
config.win = {
|
||||
...config.win,
|
||||
signAndEditExecutable: true,
|
||||
azureSignOptions: {
|
||||
publisherName: requireAnyEnv([
|
||||
"AZURE_TRUSTED_SIGNING_PUBLISHER_NAME",
|
||||
"OPENSCREEN_SIGNING_PUBLISHER_NAME",
|
||||
]),
|
||||
endpoint: requireEnv("AZURE_TRUSTED_SIGNING_ENDPOINT"),
|
||||
certificateProfileName: requireEnv("AZURE_TRUSTED_SIGNING_CERTIFICATE_PROFILE_NAME"),
|
||||
codeSigningAccountName: requireEnv("AZURE_TRUSTED_SIGNING_ACCOUNT_NAME"),
|
||||
fileDigest: process.env.AZURE_TRUSTED_SIGNING_FILE_DIGEST?.trim() || "SHA256",
|
||||
timestampRfc3161:
|
||||
process.env.AZURE_TRUSTED_SIGNING_TIMESTAMP_RFC3161?.trim() ||
|
||||
"http://timestamp.acs.microsoft.com",
|
||||
timestampDigest: process.env.AZURE_TRUSTED_SIGNING_TIMESTAMP_DIGEST?.trim() || "SHA256",
|
||||
},
|
||||
};
|
||||
|
||||
delete config.win.signExts;
|
||||
|
||||
module.exports = config;
|
||||
@@ -0,0 +1,398 @@
|
||||
/// <reference types="vite-plugin-electron/electron-env" />
|
||||
|
||||
declare namespace NodeJS {
|
||||
interface ProcessEnv {
|
||||
/**
|
||||
* The built directory structure
|
||||
*
|
||||
* ```tree
|
||||
* ├─┬─┬ dist
|
||||
* │ │ └── index.html
|
||||
* │ │
|
||||
* │ ├─┬ dist-electron
|
||||
* │ │ ├── main.js
|
||||
* │ │ └── preload.js
|
||||
* │
|
||||
* ```
|
||||
*/
|
||||
APP_ROOT: string;
|
||||
/** /dist/ or /public/ */
|
||||
VITE_PUBLIC: string;
|
||||
}
|
||||
}
|
||||
|
||||
// Used in Renderer process, expose in `preload.ts`
|
||||
interface Window {
|
||||
electronAPI: {
|
||||
invokeNativeBridge: <TData = unknown>(
|
||||
request: import("../src/native/contracts").NativeBridgeRequest,
|
||||
) => Promise<import("../src/native/contracts").NativeBridgeResponse<TData>>;
|
||||
guide: {
|
||||
startSession: (
|
||||
recordingId: import("../src/guide/contracts").GuideRecordingIdInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideSession
|
||||
>
|
||||
>;
|
||||
readSession: (
|
||||
recordingId: import("../src/guide/contracts").GuideRecordingIdInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideSession
|
||||
>
|
||||
>;
|
||||
addMarker: (input: import("../src/guide/contracts").AddGuideMarkerInput) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<{
|
||||
session: import("../src/guide/contracts").GuideSession;
|
||||
event: import("../src/guide/contracts").GuideEvent;
|
||||
}>
|
||||
>;
|
||||
capturePointerMarker: () => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").CaptureGuidePointerMarkerResult
|
||||
>
|
||||
>;
|
||||
onMarkerCaptured: (
|
||||
callback: (payload: import("../src/guide/contracts").GuideMarkerCapturedPayload) => void,
|
||||
) => () => void;
|
||||
finalizeEvents: (
|
||||
input: import("../src/guide/contracts").FinalizeGuideEventsInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideSession
|
||||
>
|
||||
>;
|
||||
writeSnapshot: (
|
||||
input: import("../src/guide/contracts").WriteGuideSnapshotInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideSession
|
||||
>
|
||||
>;
|
||||
runOcr: (
|
||||
input: import("../src/guide/contracts").RunGuideOcrInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideSession
|
||||
>
|
||||
>;
|
||||
generateDraft: (
|
||||
input: import("../src/guide/contracts").GenerateGuideDraftInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideSession
|
||||
>
|
||||
>;
|
||||
getAiSettings: () => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideAiSettings
|
||||
>
|
||||
>;
|
||||
saveAiSettings: (
|
||||
input: import("../src/guide/contracts").SaveGuideAiSettingsInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideAiSettings
|
||||
>
|
||||
>;
|
||||
saveGuide: (
|
||||
input: import("../src/guide/contracts").SaveGuideInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").GuideSession
|
||||
>
|
||||
>;
|
||||
exportMarkdown: (
|
||||
input: import("../src/guide/contracts").ExportGuideInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").ExportGuideResult
|
||||
>
|
||||
>;
|
||||
exportHtml: (
|
||||
input: import("../src/guide/contracts").ExportGuideInput,
|
||||
) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<
|
||||
import("../src/guide/contracts").ExportGuideResult
|
||||
>
|
||||
>;
|
||||
discardSession: (input: import("../src/guide/contracts").DiscardGuideSessionInput) => Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<{
|
||||
discarded: true;
|
||||
}>
|
||||
>;
|
||||
};
|
||||
getSources: (opts: Electron.SourcesOptions) => Promise<ProcessedDesktopSource[]>;
|
||||
switchToEditor: () => Promise<void>;
|
||||
switchToHud: () => Promise<void>;
|
||||
startNewRecording: () => Promise<{ success: boolean; error?: string }>;
|
||||
openSourceSelector: () => Promise<{
|
||||
opened: boolean;
|
||||
reason?: string;
|
||||
access?: {
|
||||
success: boolean;
|
||||
granted: boolean;
|
||||
status: string;
|
||||
error?: string;
|
||||
};
|
||||
}>;
|
||||
selectSource: (source: ProcessedDesktopSource) => Promise<ProcessedDesktopSource | null>;
|
||||
getSelectedSource: () => Promise<ProcessedDesktopSource | null>;
|
||||
requestCameraAccess: () => Promise<{
|
||||
success: boolean;
|
||||
granted: boolean;
|
||||
status: string;
|
||||
error?: string;
|
||||
}>;
|
||||
requestScreenAccess: () => Promise<{
|
||||
success: boolean;
|
||||
granted: boolean;
|
||||
status: string;
|
||||
error?: string;
|
||||
}>;
|
||||
requestNativeMacCursorAccess: () => Promise<{
|
||||
success: boolean;
|
||||
granted: boolean;
|
||||
status: string;
|
||||
error?: string;
|
||||
}>;
|
||||
assetBaseUrl: string;
|
||||
storeRecordedVideo: (
|
||||
videoData: ArrayBuffer,
|
||||
fileName: string,
|
||||
) => Promise<{
|
||||
success: boolean;
|
||||
path?: string;
|
||||
session?: import("../src/lib/recordingSession").RecordingSession;
|
||||
message?: string;
|
||||
error?: string;
|
||||
}>;
|
||||
storeRecordedSession: (
|
||||
payload: import("../src/lib/recordingSession").StoreRecordedSessionInput,
|
||||
) => Promise<{
|
||||
success: boolean;
|
||||
path?: string;
|
||||
session?: import("../src/lib/recordingSession").RecordingSession;
|
||||
message?: string;
|
||||
error?: string;
|
||||
}>;
|
||||
openRecordingStream: (fileName: string) => Promise<{ success: boolean; error?: string }>;
|
||||
appendRecordingChunk: (
|
||||
fileName: string,
|
||||
chunk: ArrayBuffer,
|
||||
) => Promise<{ success: boolean; error?: string }>;
|
||||
closeRecordingStream: (fileName: string) => Promise<{ success: boolean; error?: string }>;
|
||||
getRecordedVideoPath: () => Promise<{
|
||||
success: boolean;
|
||||
path?: string;
|
||||
message?: string;
|
||||
error?: string;
|
||||
}>;
|
||||
setRecordingState: (
|
||||
recording: boolean,
|
||||
recordingId?: number,
|
||||
cursorCaptureMode?: import("../src/lib/recordingSession").CursorCaptureMode,
|
||||
) => Promise<void>;
|
||||
isNativeWindowsCaptureAvailable: () => Promise<{
|
||||
success: boolean;
|
||||
available: boolean;
|
||||
helperPath?: string;
|
||||
reason?: string;
|
||||
error?: string;
|
||||
}>;
|
||||
isNativeMacCaptureAvailable: () => Promise<{
|
||||
success: boolean;
|
||||
available: boolean;
|
||||
helperPath?: string;
|
||||
reason?: "unsupported-platform" | "missing-helper" | string;
|
||||
error?: string;
|
||||
}>;
|
||||
startNativeWindowsRecording: (
|
||||
request: import("../src/lib/nativeWindowsRecording").NativeWindowsRecordingRequest,
|
||||
) => Promise<import("../src/lib/nativeWindowsRecording").NativeWindowsRecordingStartResult>;
|
||||
stopNativeWindowsRecording: (discard?: boolean) => Promise<{
|
||||
success: boolean;
|
||||
path?: string;
|
||||
session?: import("../src/lib/recordingSession").RecordingSession;
|
||||
message?: string;
|
||||
discarded?: boolean;
|
||||
error?: string;
|
||||
}>;
|
||||
pauseNativeWindowsRecording: () => Promise<{
|
||||
success: boolean;
|
||||
error?: string;
|
||||
}>;
|
||||
resumeNativeWindowsRecording: () => Promise<{
|
||||
success: boolean;
|
||||
error?: string;
|
||||
}>;
|
||||
startNativeMacRecording: (
|
||||
request: import("../src/lib/nativeMacRecording").NativeMacRecordingRequest,
|
||||
) => Promise<import("../src/lib/nativeMacRecording").NativeMacRecordingStartResult>;
|
||||
pauseNativeMacRecording: () => Promise<{
|
||||
success: boolean;
|
||||
error?: string;
|
||||
}>;
|
||||
resumeNativeMacRecording: () => Promise<{
|
||||
success: boolean;
|
||||
error?: string;
|
||||
}>;
|
||||
stopNativeMacRecording: (discard?: boolean) => Promise<{
|
||||
success: boolean;
|
||||
path?: string;
|
||||
session?: import("../src/lib/recordingSession").RecordingSession;
|
||||
message?: string;
|
||||
discarded?: boolean;
|
||||
error?: string;
|
||||
}>;
|
||||
attachNativeMacWebcamRecording: (payload: {
|
||||
screenVideoPath: string;
|
||||
recordingId: number;
|
||||
webcam: import("../src/lib/recordingSession").RecordedVideoAssetInput;
|
||||
cursorCaptureMode?: import("../src/lib/recordingSession").CursorCaptureMode;
|
||||
}) => Promise<{
|
||||
success: boolean;
|
||||
path?: string;
|
||||
session?: import("../src/lib/recordingSession").RecordingSession;
|
||||
message?: string;
|
||||
error?: string;
|
||||
}>;
|
||||
discardCursorTelemetry: (recordingId: number) => Promise<void>;
|
||||
getCursorTelemetry: (videoPath?: string) => Promise<{
|
||||
success: boolean;
|
||||
samples: CursorTelemetryPoint[];
|
||||
clicks: number[];
|
||||
message?: string;
|
||||
error?: string;
|
||||
}>;
|
||||
onStopRecordingFromTray: (callback: () => void) => () => void;
|
||||
openExternalUrl: (url: string) => Promise<{ success: boolean; error?: string }>;
|
||||
pickExportSavePath: (
|
||||
fileName: string,
|
||||
exportFolder?: string,
|
||||
) => Promise<{
|
||||
success: boolean;
|
||||
path?: string;
|
||||
message?: string;
|
||||
canceled?: boolean;
|
||||
error?: string;
|
||||
}>;
|
||||
writeExportToPath: (
|
||||
videoData: ArrayBuffer,
|
||||
filePath: string,
|
||||
) => Promise<{
|
||||
success: boolean;
|
||||
path?: string;
|
||||
message?: string;
|
||||
error?: string;
|
||||
}>;
|
||||
openVideoFilePicker: () => Promise<{ success: boolean; path?: string; canceled?: boolean }>;
|
||||
setCurrentVideoPath: (path: string) => Promise<{ success: boolean }>;
|
||||
setCurrentRecordingSession: (
|
||||
session: import("../src/lib/recordingSession").RecordingSession | null,
|
||||
) => Promise<{
|
||||
success: boolean;
|
||||
session?: import("../src/lib/recordingSession").RecordingSession;
|
||||
}>;
|
||||
getCurrentVideoPath: () => Promise<{ success: boolean; path?: string }>;
|
||||
getCurrentRecordingSession: () => Promise<{
|
||||
success: boolean;
|
||||
session?: import("../src/lib/recordingSession").RecordingSession;
|
||||
}>;
|
||||
readBinaryFile: (filePath: string) => Promise<{
|
||||
success: boolean;
|
||||
data?: ArrayBuffer;
|
||||
path?: string;
|
||||
message?: string;
|
||||
error?: string;
|
||||
}>;
|
||||
preparePreviewAudioTrack: (filePath: string) => Promise<{
|
||||
success: boolean;
|
||||
path?: string | null;
|
||||
message?: string;
|
||||
error?: string;
|
||||
}>;
|
||||
clearCurrentVideoPath: () => Promise<{ success: boolean }>;
|
||||
saveProjectFile: (
|
||||
projectData: unknown,
|
||||
suggestedName?: string,
|
||||
existingProjectPath?: string,
|
||||
) => Promise<{
|
||||
success: boolean;
|
||||
path?: string;
|
||||
message?: string;
|
||||
canceled?: boolean;
|
||||
error?: string;
|
||||
}>;
|
||||
loadProjectFile: () => Promise<{
|
||||
success: boolean;
|
||||
path?: string;
|
||||
project?: unknown;
|
||||
message?: string;
|
||||
canceled?: boolean;
|
||||
error?: string;
|
||||
}>;
|
||||
loadCurrentProjectFile: () => Promise<{
|
||||
success: boolean;
|
||||
path?: string;
|
||||
project?: unknown;
|
||||
message?: string;
|
||||
canceled?: boolean;
|
||||
error?: string;
|
||||
}>;
|
||||
onMenuLoadProject: (callback: () => void) => () => void;
|
||||
onMenuSaveProject: (callback: () => void) => () => void;
|
||||
onMenuSaveProjectAs: (callback: () => void) => () => void;
|
||||
getPlatform: () => Promise<string>;
|
||||
revealInFolder: (
|
||||
filePath: string,
|
||||
) => Promise<{ success: boolean; error?: string; message?: string }>;
|
||||
getShortcuts: () => Promise<Record<string, unknown> | null>;
|
||||
saveShortcuts: (shortcuts: unknown) => Promise<{ success: boolean; error?: string }>;
|
||||
hudOverlayHide: () => void;
|
||||
hudOverlayClose: () => void;
|
||||
setHudOverlayIgnoreMouseEvents: (ignore: boolean) => void;
|
||||
moveHudOverlayBy: (deltaX: number, deltaY: number) => void;
|
||||
showCountdownOverlay: (value: number, runId: number) => Promise<void>;
|
||||
setCountdownOverlayValue: (value: number, runId: number) => Promise<void>;
|
||||
hideCountdownOverlay: (runId: number) => Promise<void>;
|
||||
onCountdownOverlayValue: (callback: (value: number | null) => void) => () => void;
|
||||
setMicrophoneExpanded: (expanded: boolean) => void;
|
||||
setHasUnsavedChanges: (hasChanges: boolean) => void;
|
||||
onRequestSaveBeforeClose: (callback: () => Promise<boolean> | boolean) => () => void;
|
||||
onRequestCloseConfirm: (callback: () => void) => () => void;
|
||||
sendCloseConfirmResponse: (choice: "save" | "discard" | "cancel") => void;
|
||||
setLocale: (locale: string) => Promise<void>;
|
||||
saveDiagnostic: (payload: {
|
||||
error: string;
|
||||
stack?: string;
|
||||
projectState: unknown;
|
||||
logs: string[];
|
||||
}) => Promise<{ success: boolean; path?: string; canceled?: boolean; error?: string }>;
|
||||
};
|
||||
}
|
||||
|
||||
interface ProcessedDesktopSource {
|
||||
id: string;
|
||||
name: string;
|
||||
display_id: string;
|
||||
thumbnail: string | null;
|
||||
appIcon: string | null;
|
||||
displayId?: number;
|
||||
displayIndex?: number;
|
||||
screenIndex?: number;
|
||||
displayLabel?: string;
|
||||
bounds?: {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
};
|
||||
}
|
||||
|
||||
interface CursorTelemetryPoint {
|
||||
timeMs: number;
|
||||
cx: number;
|
||||
cy: number;
|
||||
}
|
||||
@@ -0,0 +1,181 @@
|
||||
import type {
|
||||
GeneratedGuide,
|
||||
GuideLanguage,
|
||||
GuideSession,
|
||||
GuideStepCandidate,
|
||||
} from "../../../src/guide/contracts";
|
||||
import { buildGuideDraftPrompt } from "../../../src/guide/promptBuilder";
|
||||
import type { DeepSeekGuideConfigProvider } from "./deepseekSettingsStore";
|
||||
|
||||
export interface GuideDraftClient {
|
||||
generate(input: {
|
||||
session: GuideSession;
|
||||
candidates: GuideStepCandidate[];
|
||||
language: GuideLanguage;
|
||||
}): Promise<GeneratedGuide>;
|
||||
}
|
||||
|
||||
export class DeepSeekGuideClientError extends Error {
|
||||
constructor(
|
||||
readonly code: "guide-ai-key-missing" | "guide-ai-request-failed" | "guide-ai-invalid-output",
|
||||
message: string,
|
||||
readonly retryable = false,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "DeepSeekGuideClientError";
|
||||
}
|
||||
}
|
||||
|
||||
interface DeepSeekChatResponse {
|
||||
choices?: Array<{
|
||||
message?: {
|
||||
content?: string;
|
||||
};
|
||||
}>;
|
||||
}
|
||||
|
||||
export class DeepSeekGuideClient implements GuideDraftClient {
|
||||
constructor(
|
||||
private readonly configProvider?: DeepSeekGuideConfigProvider,
|
||||
private readonly fallbackApiKey = process.env.DEEPSEEK_API_KEY,
|
||||
private readonly fallbackBaseUrl = process.env.DEEPSEEK_BASE_URL ?? "https://api.deepseek.com",
|
||||
private readonly fallbackModel = process.env.DEEPSEEK_MODEL ?? "deepseek-chat",
|
||||
) {}
|
||||
|
||||
async generate(input: {
|
||||
session: GuideSession;
|
||||
candidates: GuideStepCandidate[];
|
||||
language: GuideLanguage;
|
||||
}): Promise<GeneratedGuide> {
|
||||
const config = await this.resolveConfig();
|
||||
if (!config.apiKey) {
|
||||
throw new DeepSeekGuideClientError(
|
||||
"guide-ai-key-missing",
|
||||
"DeepSeek API key is not configured.",
|
||||
);
|
||||
}
|
||||
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetch(`${config.baseUrl.replace(/\/$/, "")}/chat/completions`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"content-type": "application/json",
|
||||
authorization: `Bearer ${config.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: config.model,
|
||||
temperature: 0.2,
|
||||
response_format: { type: "json_object" },
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content:
|
||||
"You convert UI interaction telemetry into concise software user-guide steps.",
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: buildGuideDraftPrompt(input),
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
} catch (error) {
|
||||
throw new DeepSeekGuideClientError(
|
||||
"guide-ai-request-failed",
|
||||
`DeepSeek request failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
throw new DeepSeekGuideClientError(
|
||||
"guide-ai-request-failed",
|
||||
`DeepSeek returned HTTP ${response.status}.`,
|
||||
true,
|
||||
);
|
||||
}
|
||||
|
||||
const payload = (await response.json()) as DeepSeekChatResponse;
|
||||
const content = payload.choices?.[0]?.message?.content;
|
||||
if (!content) {
|
||||
throw new DeepSeekGuideClientError(
|
||||
"guide-ai-invalid-output",
|
||||
"DeepSeek returned an empty response.",
|
||||
);
|
||||
}
|
||||
return parseGeneratedGuide(content);
|
||||
}
|
||||
|
||||
private async resolveConfig(): Promise<{ apiKey?: string; baseUrl: string; model: string }> {
|
||||
if (this.configProvider) {
|
||||
return await this.configProvider.getDeepSeekConfig();
|
||||
}
|
||||
return {
|
||||
apiKey: this.fallbackApiKey,
|
||||
baseUrl: this.fallbackBaseUrl,
|
||||
model: this.fallbackModel,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function parseGeneratedGuide(content: string): GeneratedGuide {
|
||||
try {
|
||||
const parsed = JSON.parse(stripCodeFence(content)) as unknown;
|
||||
const normalized = normalizeGeneratedGuide(parsed);
|
||||
if (!normalized) {
|
||||
throw new Error("Unexpected guide JSON shape.");
|
||||
}
|
||||
return normalized;
|
||||
} catch (error) {
|
||||
throw new DeepSeekGuideClientError(
|
||||
"guide-ai-invalid-output",
|
||||
`DeepSeek response is not valid guide JSON: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function stripCodeFence(content: string): string {
|
||||
return content
|
||||
.replace(/^```(?:json)?\s*/i, "")
|
||||
.replace(/\s*```$/i, "")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function normalizeGeneratedGuide(value: unknown): GeneratedGuide | null {
|
||||
if (!value || typeof value !== "object") {
|
||||
return null;
|
||||
}
|
||||
const guide = value as Partial<GeneratedGuide>;
|
||||
if (typeof guide.title !== "string" || !Array.isArray(guide.steps)) {
|
||||
return null;
|
||||
}
|
||||
const steps = guide.steps
|
||||
.map((step, index) => {
|
||||
if (!step || typeof step !== "object") {
|
||||
return null;
|
||||
}
|
||||
const raw = step as Partial<GeneratedGuide["steps"][number]>;
|
||||
if (typeof raw.title !== "string" || typeof raw.instruction !== "string") {
|
||||
return null;
|
||||
}
|
||||
const order =
|
||||
typeof raw.order === "number" && Number.isFinite(raw.order) ? raw.order : index + 1;
|
||||
return {
|
||||
id: typeof raw.id === "string" && raw.id.trim() ? raw.id : `guide-step-${order}`,
|
||||
order,
|
||||
title: raw.title,
|
||||
instruction: raw.instruction,
|
||||
...(typeof raw.screenshotPath === "string" ? { screenshotPath: raw.screenshotPath } : {}),
|
||||
...(typeof raw.sourceCandidateId === "string"
|
||||
? { sourceCandidateId: raw.sourceCandidateId }
|
||||
: {}),
|
||||
};
|
||||
})
|
||||
.filter((step): step is GeneratedGuide["steps"][number] => step !== null);
|
||||
return {
|
||||
title: guide.title,
|
||||
summary: typeof guide.summary === "string" ? guide.summary : undefined,
|
||||
steps,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import { DeepSeekSettingsStore } from "./deepseekSettingsStore";
|
||||
|
||||
const tempDirs: string[] = [];
|
||||
const originalOcrProfile = process.env.OPENSCREEN_GUIDE_OCR_PROFILE;
|
||||
const originalOcrLanguage = process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE;
|
||||
|
||||
beforeEach(() => {
|
||||
delete process.env.OPENSCREEN_GUIDE_OCR_PROFILE;
|
||||
delete process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE;
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
restoreEnv("OPENSCREEN_GUIDE_OCR_PROFILE", originalOcrProfile);
|
||||
restoreEnv("OPENSCREEN_GUIDE_OCR_LANGUAGE", originalOcrLanguage);
|
||||
await Promise.all(tempDirs.splice(0).map((dir) => fs.rm(dir, { recursive: true, force: true })));
|
||||
});
|
||||
|
||||
function restoreEnv(name: string, value: string | undefined): void {
|
||||
if (value === undefined) {
|
||||
delete process.env[name];
|
||||
return;
|
||||
}
|
||||
process.env[name] = value;
|
||||
}
|
||||
|
||||
async function createStore(): Promise<DeepSeekSettingsStore> {
|
||||
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openscreen-guide-settings-"));
|
||||
tempDirs.push(dir);
|
||||
return new DeepSeekSettingsStore(path.join(dir, "guide-ai-settings.json"));
|
||||
}
|
||||
|
||||
describe("DeepSeekSettingsStore OCR settings", () => {
|
||||
it("defaults to the Vietnamese enhanced OCR profile", async () => {
|
||||
const store = await createStore();
|
||||
|
||||
await expect(store.getOcrConfig()).resolves.toEqual({
|
||||
profile: "vietnamese",
|
||||
language: "vi,en",
|
||||
});
|
||||
});
|
||||
|
||||
it("persists OCR profile changes alongside DeepSeek settings", async () => {
|
||||
const store = await createStore();
|
||||
|
||||
const status = await store.save({
|
||||
deepseekApiKeyEnvName: "DEEPSEEK_API_KEY",
|
||||
baseUrl: "https://api.deepseek.com",
|
||||
model: "deepseek-chat",
|
||||
ocrProfile: "hybrid",
|
||||
ocrLanguage: "vi,en",
|
||||
});
|
||||
|
||||
expect(status.ocr).toMatchObject({
|
||||
profile: "hybrid",
|
||||
language: "vi,en",
|
||||
});
|
||||
await expect(store.getOcrConfig()).resolves.toEqual({
|
||||
profile: "hybrid",
|
||||
language: "vi,en",
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,223 @@
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import type {
|
||||
GuideAiSettings,
|
||||
GuideOcrProfile,
|
||||
SaveGuideAiSettingsInput,
|
||||
} from "../../../src/guide/contracts";
|
||||
|
||||
export interface DeepSeekGuideConfig {
|
||||
apiKey?: string;
|
||||
baseUrl: string;
|
||||
model: string;
|
||||
}
|
||||
|
||||
export interface DeepSeekGuideConfigProvider {
|
||||
getDeepSeekConfig(): Promise<DeepSeekGuideConfig>;
|
||||
}
|
||||
|
||||
export interface GuideOcrConfig {
|
||||
profile: GuideOcrProfile;
|
||||
language: string;
|
||||
}
|
||||
|
||||
export interface GuideOcrConfigProvider {
|
||||
getOcrConfig(): Promise<GuideOcrConfig>;
|
||||
}
|
||||
|
||||
interface PersistedGuideAiSettings {
|
||||
schemaVersion: 1;
|
||||
ocr?: {
|
||||
profile?: GuideOcrProfile;
|
||||
language?: string;
|
||||
updatedAt?: string;
|
||||
};
|
||||
deepseek?: {
|
||||
apiKeyEnvName?: string;
|
||||
baseUrl?: string;
|
||||
model?: string;
|
||||
updatedAt?: string;
|
||||
};
|
||||
}
|
||||
|
||||
const DEFAULT_DEEPSEEK_API_KEY_ENV_NAME = "DEEPSEEK_API_KEY";
|
||||
const DEFAULT_DEEPSEEK_BASE_URL = "https://api.deepseek.com";
|
||||
const DEFAULT_DEEPSEEK_MODEL = "deepseek-chat";
|
||||
const DEFAULT_OCR_PROFILE: GuideOcrProfile = "vietnamese";
|
||||
const DEFAULT_OCR_LANGUAGE = "vi,en";
|
||||
|
||||
export class DeepSeekSettingsStore implements DeepSeekGuideConfigProvider, GuideOcrConfigProvider {
|
||||
constructor(private readonly filePath: string) {}
|
||||
|
||||
async getStatus(): Promise<GuideAiSettings> {
|
||||
const raw = await this.readSettings();
|
||||
const apiKeyEnvName = normalizeEnvName(raw?.deepseek?.apiKeyEnvName);
|
||||
const activeApiKey = process.env[apiKeyEnvName];
|
||||
|
||||
return {
|
||||
ocr: {
|
||||
profile: normalizeOcrProfile(raw?.ocr?.profile ?? process.env.OPENSCREEN_GUIDE_OCR_PROFILE),
|
||||
language: normalizeOcrLanguage(
|
||||
raw?.ocr?.language ?? process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE,
|
||||
),
|
||||
updatedAt: raw?.ocr?.updatedAt,
|
||||
},
|
||||
deepseek: {
|
||||
hasApiKey: Boolean(activeApiKey),
|
||||
apiKeyEnvName,
|
||||
baseUrl: normalizeBaseUrl(raw?.deepseek?.baseUrl ?? process.env.DEEPSEEK_BASE_URL),
|
||||
model: normalizeModel(raw?.deepseek?.model ?? process.env.DEEPSEEK_MODEL),
|
||||
storage: activeApiKey ? "environment" : "none",
|
||||
encryptionAvailable: false,
|
||||
updatedAt: raw?.deepseek?.updatedAt,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async save(input: SaveGuideAiSettingsInput): Promise<GuideAiSettings> {
|
||||
const current = (await this.readSettings()) ?? { schemaVersion: 1 };
|
||||
const currentOcr = current.ocr ?? {};
|
||||
const currentDeepSeek = current.deepseek ?? {};
|
||||
const nextOcr = {
|
||||
...currentOcr,
|
||||
profile: normalizeOcrProfile(input.ocrProfile ?? currentOcr.profile),
|
||||
language: normalizeOcrLanguage(input.ocrLanguage ?? currentOcr.language),
|
||||
updatedAt: new Date().toISOString(),
|
||||
};
|
||||
const nextDeepSeek = {
|
||||
...currentDeepSeek,
|
||||
baseUrl: normalizeBaseUrl(input.baseUrl ?? currentDeepSeek.baseUrl),
|
||||
model: normalizeModel(input.model ?? currentDeepSeek.model),
|
||||
updatedAt: new Date().toISOString(),
|
||||
};
|
||||
|
||||
if (input.clearDeepseekApiKeyEnvName) {
|
||||
delete nextDeepSeek.apiKeyEnvName;
|
||||
} else if (input.deepseekApiKeyEnvName !== undefined) {
|
||||
nextDeepSeek.apiKeyEnvName = normalizeEnvName(input.deepseekApiKeyEnvName);
|
||||
}
|
||||
|
||||
await this.writeSettings({
|
||||
schemaVersion: 1,
|
||||
ocr: nextOcr,
|
||||
deepseek: nextDeepSeek,
|
||||
});
|
||||
return await this.getStatus();
|
||||
}
|
||||
|
||||
async getDeepSeekConfig(): Promise<DeepSeekGuideConfig> {
|
||||
const raw = await this.readSettings();
|
||||
const apiKeyEnvName = normalizeEnvName(raw?.deepseek?.apiKeyEnvName);
|
||||
return {
|
||||
apiKey: process.env[apiKeyEnvName],
|
||||
baseUrl: normalizeBaseUrl(raw?.deepseek?.baseUrl ?? process.env.DEEPSEEK_BASE_URL),
|
||||
model: normalizeModel(raw?.deepseek?.model ?? process.env.DEEPSEEK_MODEL),
|
||||
};
|
||||
}
|
||||
|
||||
async getOcrConfig(): Promise<GuideOcrConfig> {
|
||||
const raw = await this.readSettings();
|
||||
return {
|
||||
profile: normalizeOcrProfile(raw?.ocr?.profile ?? process.env.OPENSCREEN_GUIDE_OCR_PROFILE),
|
||||
language: normalizeOcrLanguage(
|
||||
raw?.ocr?.language ?? process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE,
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
private async readSettings(): Promise<PersistedGuideAiSettings | null> {
|
||||
try {
|
||||
const content = await fs.readFile(this.filePath, "utf-8");
|
||||
const parsed = JSON.parse(content) as unknown;
|
||||
const normalized = normalizePersistedSettings(parsed);
|
||||
if (normalized && hasLegacyStoredSecret(parsed)) {
|
||||
await this.writeSettings(normalized);
|
||||
}
|
||||
return normalized;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private async writeSettings(settings: PersistedGuideAiSettings): Promise<void> {
|
||||
await fs.mkdir(path.dirname(this.filePath), { recursive: true });
|
||||
const tempPath = `${this.filePath}.${process.pid}.${Date.now()}.tmp`;
|
||||
await fs.writeFile(tempPath, JSON.stringify(settings, null, 2), "utf-8");
|
||||
await fs.rename(tempPath, this.filePath);
|
||||
}
|
||||
}
|
||||
|
||||
function hasLegacyStoredSecret(input: unknown): boolean {
|
||||
return (
|
||||
typeof input === "object" &&
|
||||
input !== null &&
|
||||
typeof (input as { deepseek?: { apiKey?: unknown } }).deepseek?.apiKey === "object"
|
||||
);
|
||||
}
|
||||
|
||||
function normalizePersistedSettings(input: unknown): PersistedGuideAiSettings | null {
|
||||
if (!input || typeof input !== "object") {
|
||||
return null;
|
||||
}
|
||||
const raw = input as Partial<PersistedGuideAiSettings>;
|
||||
if (raw.schemaVersion !== 1) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
schemaVersion: 1,
|
||||
ocr: {
|
||||
profile: normalizeOcrProfile(raw.ocr?.profile),
|
||||
language: normalizeOcrLanguage(raw.ocr?.language),
|
||||
updatedAt: raw.ocr?.updatedAt,
|
||||
},
|
||||
deepseek: {
|
||||
apiKeyEnvName: normalizeEnvName(raw.deepseek?.apiKeyEnvName),
|
||||
baseUrl: raw.deepseek?.baseUrl,
|
||||
model: raw.deepseek?.model,
|
||||
updatedAt: raw.deepseek?.updatedAt,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeEnvName(value: string | undefined): string {
|
||||
const normalized = value?.trim();
|
||||
if (!normalized) {
|
||||
return DEFAULT_DEEPSEEK_API_KEY_ENV_NAME;
|
||||
}
|
||||
return /^[A-Za-z_][A-Za-z0-9_]*$/.test(normalized)
|
||||
? normalized
|
||||
: DEFAULT_DEEPSEEK_API_KEY_ENV_NAME;
|
||||
}
|
||||
|
||||
function normalizeBaseUrl(value: string | undefined): string {
|
||||
const candidate = value?.trim() || DEFAULT_DEEPSEEK_BASE_URL;
|
||||
try {
|
||||
const url = new URL(candidate);
|
||||
if (url.protocol !== "https:" && url.protocol !== "http:") {
|
||||
return DEFAULT_DEEPSEEK_BASE_URL;
|
||||
}
|
||||
return url.toString().replace(/\/$/, "");
|
||||
} catch {
|
||||
return DEFAULT_DEEPSEEK_BASE_URL;
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeModel(value: string | undefined): string {
|
||||
return value?.trim() || DEFAULT_DEEPSEEK_MODEL;
|
||||
}
|
||||
|
||||
function normalizeOcrProfile(value: string | undefined): GuideOcrProfile {
|
||||
if (value === "fast" || value === "vietnamese" || value === "hybrid") {
|
||||
return value;
|
||||
}
|
||||
return DEFAULT_OCR_PROFILE;
|
||||
}
|
||||
|
||||
function normalizeOcrLanguage(value: string | undefined): string {
|
||||
const normalized = value
|
||||
?.split(",")
|
||||
.map((part) => part.trim().toLowerCase())
|
||||
.filter(Boolean)
|
||||
.join(",");
|
||||
return normalized || DEFAULT_OCR_LANGUAGE;
|
||||
}
|
||||
@@ -0,0 +1,170 @@
|
||||
import type { IpcMain } from "electron";
|
||||
import type {
|
||||
AddGuideMarkerInput,
|
||||
DiscardGuideSessionInput,
|
||||
ExportGuideInput,
|
||||
ExportGuideResult,
|
||||
FinalizeGuideEventsInput,
|
||||
GenerateGuideDraftInput,
|
||||
GuideAiSettings,
|
||||
GuideEvent,
|
||||
GuideIpcResult,
|
||||
GuideSession,
|
||||
RunGuideOcrInput,
|
||||
SaveGuideAiSettingsInput,
|
||||
SaveGuideInput,
|
||||
WriteGuideSnapshotInput,
|
||||
} from "../../src/guide/contracts";
|
||||
import type { DeepSeekSettingsStore } from "./ai/deepseekSettingsStore";
|
||||
import { GuideStore, GuideStoreError } from "./guideStore";
|
||||
|
||||
export interface GuideIpcLifecycle {
|
||||
onSessionStarted?: (session: GuideSession) => void;
|
||||
onSessionEnded?: (recordingId: unknown) => void;
|
||||
}
|
||||
|
||||
export function registerGuideIpcHandlers(
|
||||
ipcMain: IpcMain,
|
||||
store: GuideStore,
|
||||
aiSettingsStore?: DeepSeekSettingsStore,
|
||||
lifecycle: GuideIpcLifecycle = {},
|
||||
): void {
|
||||
ipcMain.handle(
|
||||
"guide:start-session",
|
||||
async (_, recordingId): Promise<GuideIpcResult<GuideSession>> => {
|
||||
const result = await toGuideResult(() => store.startSession(recordingId));
|
||||
if (result.success) {
|
||||
lifecycle.onSessionStarted?.(result.data);
|
||||
}
|
||||
return result;
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:read-session",
|
||||
async (_, recordingId): Promise<GuideIpcResult<GuideSession>> => {
|
||||
return await toGuideResult(() => store.readSession(recordingId));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:add-marker",
|
||||
async (
|
||||
_,
|
||||
input: AddGuideMarkerInput,
|
||||
): Promise<GuideIpcResult<{ session: GuideSession; event: GuideEvent }>> => {
|
||||
return await toGuideResult(() => store.addMarker(input));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:finalize-events",
|
||||
async (_, input: FinalizeGuideEventsInput): Promise<GuideIpcResult<GuideSession>> => {
|
||||
const result = await toGuideResult(() => store.finalizeEvents(input));
|
||||
if (result.success) {
|
||||
lifecycle.onSessionEnded?.(input.recordingId);
|
||||
}
|
||||
return result;
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:write-snapshot",
|
||||
async (_, input: WriteGuideSnapshotInput): Promise<GuideIpcResult<GuideSession>> => {
|
||||
return await toGuideResult(() => store.writeSnapshot(input));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:run-ocr",
|
||||
async (_, input: RunGuideOcrInput): Promise<GuideIpcResult<GuideSession>> => {
|
||||
return await toGuideResult(() => store.runOcr(input));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:generate-draft",
|
||||
async (_, input: GenerateGuideDraftInput): Promise<GuideIpcResult<GuideSession>> => {
|
||||
return await toGuideResult(() => store.generateDraft(input));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle("guide:get-ai-settings", async (): Promise<GuideIpcResult<GuideAiSettings>> => {
|
||||
return await toGuideResult(() => requireAiSettingsStore(aiSettingsStore).getStatus());
|
||||
});
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:save-ai-settings",
|
||||
async (_, input: SaveGuideAiSettingsInput): Promise<GuideIpcResult<GuideAiSettings>> => {
|
||||
return await toGuideResult(() => requireAiSettingsStore(aiSettingsStore).save(input));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:save-guide",
|
||||
async (_, input: SaveGuideInput): Promise<GuideIpcResult<GuideSession>> => {
|
||||
return await toGuideResult(() => store.saveGuide(input));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:export-markdown",
|
||||
async (_, input: ExportGuideInput): Promise<GuideIpcResult<ExportGuideResult>> => {
|
||||
return await toGuideResult(() => store.exportMarkdown(input));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:export-html",
|
||||
async (_, input: ExportGuideInput): Promise<GuideIpcResult<ExportGuideResult>> => {
|
||||
return await toGuideResult(() => store.exportHtml(input));
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"guide:discard-session",
|
||||
async (_, input: DiscardGuideSessionInput): Promise<GuideIpcResult<{ discarded: true }>> => {
|
||||
const result = await toGuideResult(async () => {
|
||||
await store.discardSession(input);
|
||||
return { discarded: true as const };
|
||||
});
|
||||
if (result.success) {
|
||||
lifecycle.onSessionEnded?.(input.recordingId);
|
||||
}
|
||||
return result;
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
function requireAiSettingsStore(store: DeepSeekSettingsStore | undefined): DeepSeekSettingsStore {
|
||||
if (!store) {
|
||||
throw new GuideStoreError("guide-internal-error", "Guide AI settings store is unavailable.");
|
||||
}
|
||||
return store;
|
||||
}
|
||||
|
||||
async function toGuideResult<TData>(action: () => Promise<TData>): Promise<GuideIpcResult<TData>> {
|
||||
try {
|
||||
return {
|
||||
success: true,
|
||||
data: await action(),
|
||||
};
|
||||
} catch (error) {
|
||||
if (error instanceof GuideStoreError) {
|
||||
return {
|
||||
success: false,
|
||||
code: error.code,
|
||||
error: error.message,
|
||||
retryable: error.retryable,
|
||||
};
|
||||
}
|
||||
|
||||
console.error("Guide IPC failed:", error);
|
||||
return {
|
||||
success: false,
|
||||
code: "guide-internal-error",
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
retryable: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
import path from "node:path";
|
||||
import type { GuideRecordingIdInput } from "../../src/guide/contracts";
|
||||
|
||||
export const GUIDE_SESSION_SUFFIX = ".guide.json";
|
||||
export const GUIDE_OUTPUT_DIR_SUFFIX = "-guide";
|
||||
|
||||
export interface GuidePaths {
|
||||
recordingId: string;
|
||||
baseName: string;
|
||||
baseDir: string;
|
||||
guidePath: string;
|
||||
outputDir: string;
|
||||
}
|
||||
|
||||
export function normalizeGuideRecordingId(recordingId: GuideRecordingIdInput): string | null {
|
||||
if (typeof recordingId === "number") {
|
||||
return Number.isFinite(recordingId) ? String(Math.trunc(recordingId)) : null;
|
||||
}
|
||||
|
||||
if (typeof recordingId !== "string") {
|
||||
return null;
|
||||
}
|
||||
|
||||
const trimmed = recordingId.trim();
|
||||
return trimmed.length > 0 ? trimmed : null;
|
||||
}
|
||||
|
||||
export function resolveGuidePaths(input: {
|
||||
recordingsDir: string;
|
||||
recordingId: GuideRecordingIdInput;
|
||||
videoPath?: string | null;
|
||||
}): GuidePaths | null {
|
||||
const recordingId = normalizeGuideRecordingId(input.recordingId);
|
||||
if (!recordingId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const normalizedVideoPath =
|
||||
typeof input.videoPath === "string" && input.videoPath.trim()
|
||||
? path.resolve(input.videoPath.trim())
|
||||
: null;
|
||||
const parsedVideoPath = normalizedVideoPath ? path.parse(normalizedVideoPath) : null;
|
||||
const baseName = parsedVideoPath?.name ?? defaultGuideBaseName(recordingId);
|
||||
const baseDir = parsedVideoPath?.dir ?? path.resolve(input.recordingsDir);
|
||||
|
||||
return {
|
||||
recordingId,
|
||||
baseName,
|
||||
baseDir,
|
||||
guidePath: path.join(baseDir, `${baseName}${GUIDE_SESSION_SUFFIX}`),
|
||||
outputDir: path.join(baseDir, `${baseName}${GUIDE_OUTPUT_DIR_SUFFIX}`),
|
||||
};
|
||||
}
|
||||
|
||||
function defaultGuideBaseName(recordingId: string): string {
|
||||
return recordingId.startsWith("recording-") ? recordingId : `recording-${recordingId}`;
|
||||
}
|
||||
@@ -0,0 +1,381 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import { GuideStore, GuideStoreError } from "./guideStore";
|
||||
|
||||
let recordingsDir = "";
|
||||
|
||||
beforeEach(async () => {
|
||||
recordingsDir = await fs.mkdtemp(path.join(os.tmpdir(), "openscreen-guide-"));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
if (recordingsDir) {
|
||||
await fs.rm(recordingsDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
describe("GuideStore", () => {
|
||||
it("creates and reads an empty guide session", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
|
||||
const session = await store.startSession(123);
|
||||
const readSession = await store.readSession(123);
|
||||
|
||||
expect(session.recordingId).toBe("123");
|
||||
expect(session.status).toBe("recording");
|
||||
expect(session.guidePath).toBe(path.join(recordingsDir, "recording-123.guide.json"));
|
||||
expect(readSession).toEqual(session);
|
||||
await expect(fs.stat(session.outputDir)).resolves.toMatchObject({
|
||||
isDirectory: expect.any(Function),
|
||||
});
|
||||
});
|
||||
|
||||
it("adds marker events in timeline order", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
await store.startSession(456);
|
||||
|
||||
await store.addMarker({ recordingId: 456, kind: "manual", timeMs: 2000, label: "Later" });
|
||||
const result = await store.addMarker({
|
||||
recordingId: 456,
|
||||
kind: "hotkey",
|
||||
timeMs: 500,
|
||||
label: "First",
|
||||
normalizedX: 0.25,
|
||||
normalizedY: 0.75,
|
||||
});
|
||||
|
||||
expect(result.event.kind).toBe("hotkey");
|
||||
expect(result.event).toMatchObject({
|
||||
x: 0.25,
|
||||
y: 0.75,
|
||||
normalizedX: 0.25,
|
||||
normalizedY: 0.75,
|
||||
});
|
||||
expect(result.session.events.map((event) => event.timeMs)).toEqual([500, 2000]);
|
||||
expect(result.session.events[0]?.source).toBe("guide-hotkey");
|
||||
expect(result.session.events[1]?.source).toBe("review-ui");
|
||||
});
|
||||
|
||||
it("finalizes a session against the saved video path", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
await store.startSession(789);
|
||||
const videoPath = path.join(recordingsDir, "recording-789.mp4");
|
||||
await fs.writeFile(videoPath, "");
|
||||
|
||||
const session = await store.finalizeEvents({ recordingId: 789, videoPath });
|
||||
|
||||
expect(session.status).toBe("events-ready");
|
||||
expect(session.videoPath).toBe(videoPath);
|
||||
expect(session.guidePath).toBe(path.join(recordingsDir, "recording-789.guide.json"));
|
||||
});
|
||||
|
||||
it("adds cursor click events when finalizing a session", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
await store.startSession(790);
|
||||
await store.addMarker({ recordingId: 790, kind: "manual", timeMs: 250, label: "Manual" });
|
||||
const videoPath = path.join(recordingsDir, "recording-790.mp4");
|
||||
await fs.writeFile(videoPath, "");
|
||||
await fs.writeFile(
|
||||
`${videoPath}.cursor.json`,
|
||||
JSON.stringify({
|
||||
version: 2,
|
||||
provider: "native",
|
||||
assets: [],
|
||||
samples: [
|
||||
{ timeMs: 100, cx: 0.2, cy: 0.3, interactionType: "move" },
|
||||
{ timeMs: 200, cx: 0.4, cy: 0.5, interactionType: "click" },
|
||||
{ timeMs: 225, cx: 0.401, cy: 0.501, interactionType: "click" },
|
||||
],
|
||||
}),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
const session = await store.finalizeEvents({ recordingId: 790, videoPath });
|
||||
|
||||
expect(session.cursorPath).toBe(`${videoPath}.cursor.json`);
|
||||
expect(session.events.map((event) => event.kind)).toEqual(["click", "manual"]);
|
||||
expect(session.events[0]).toMatchObject({
|
||||
timeMs: 200,
|
||||
normalizedX: 0.4,
|
||||
normalizedY: 0.5,
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects guide artifacts outside the recordings directory", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
await store.startSession(321);
|
||||
const outsideVideoPath = path.join(path.dirname(recordingsDir), "outside.mp4");
|
||||
|
||||
await expect(
|
||||
store.finalizeEvents({ recordingId: 321, videoPath: outsideVideoPath }),
|
||||
).rejects.toMatchObject({
|
||||
code: "guide-invalid-input",
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects invalid guide session schema", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
await fs.writeFile(
|
||||
path.join(recordingsDir, "recording-bad.guide.json"),
|
||||
JSON.stringify({ schemaVersion: 999 }),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
await expect(store.readSession("bad")).rejects.toBeInstanceOf(GuideStoreError);
|
||||
await expect(store.readSession("bad")).rejects.toMatchObject({
|
||||
code: "guide-invalid-schema",
|
||||
});
|
||||
});
|
||||
|
||||
it("saves a reviewed generated guide", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
await store.startSession(654);
|
||||
|
||||
const session = await store.saveGuide({
|
||||
recordingId: 654,
|
||||
generatedGuide: {
|
||||
title: "Huong dan thao tac",
|
||||
steps: [
|
||||
{
|
||||
id: "step-1",
|
||||
order: 1,
|
||||
title: "Mo cai dat",
|
||||
instruction: "Nhan nut Settings.",
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
expect(session.status).toBe("reviewed");
|
||||
expect(session.generatedGuide?.steps).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("writes snapshots and builds candidates without OCR", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
await store.startSession(112);
|
||||
await store.addMarker({ recordingId: 112, kind: "manual", timeMs: 500, label: "Save" });
|
||||
const videoPath = path.join(recordingsDir, "recording-112.mp4");
|
||||
await fs.writeFile(videoPath, "");
|
||||
const eventsSession = await store.finalizeEvents({ recordingId: 112, videoPath });
|
||||
|
||||
const session = await store.writeSnapshot({
|
||||
recordingId: 112,
|
||||
eventId: eventsSession.events[0]?.id ?? "",
|
||||
timeMs: 1000,
|
||||
offsetMs: 500,
|
||||
width: 800,
|
||||
height: 600,
|
||||
pngBytes: new Uint8Array([137, 80, 78, 71]).buffer,
|
||||
markedPngBytes: new Uint8Array([137, 80, 78, 71, 1]).buffer,
|
||||
});
|
||||
|
||||
expect(session.status).toBe("snapshots-ready");
|
||||
expect(session.snapshots).toHaveLength(1);
|
||||
expect(session.candidates[0]).toMatchObject({ targetText: "Save" });
|
||||
await expect(fs.readFile(session.snapshots[0]?.path ?? "")).resolves.toEqual(
|
||||
Buffer.from([137, 80, 78, 71]),
|
||||
);
|
||||
await expect(fs.readFile(session.snapshots[0]?.markedPath ?? "")).resolves.toEqual(
|
||||
Buffer.from([137, 80, 78, 71, 1]),
|
||||
);
|
||||
});
|
||||
|
||||
it("runs OCR, generates a local draft, and exports files", async () => {
|
||||
const store = new GuideStore(recordingsDir, {
|
||||
ocrClient: {
|
||||
recognize: async (snapshot) => [
|
||||
{
|
||||
id: `ocr-${snapshot.id}-1`,
|
||||
snapshotId: snapshot.id,
|
||||
text: "Save",
|
||||
confidence: 0.95,
|
||||
box: { x: 0.45, y: 0.45, width: 0.15, height: 0.08 },
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
await store.startSession(113);
|
||||
const videoPath = path.join(recordingsDir, "recording-113.mp4");
|
||||
await fs.writeFile(videoPath, "");
|
||||
await fs.writeFile(
|
||||
`${videoPath}.cursor.json`,
|
||||
JSON.stringify({
|
||||
samples: [{ timeMs: 200, cx: 0.5, cy: 0.5, interactionType: "click" }],
|
||||
}),
|
||||
"utf-8",
|
||||
);
|
||||
const eventsSession = await store.finalizeEvents({ recordingId: 113, videoPath });
|
||||
await store.writeSnapshot({
|
||||
recordingId: 113,
|
||||
eventId: eventsSession.events[0]?.id ?? "",
|
||||
timeMs: 700,
|
||||
offsetMs: 500,
|
||||
width: 800,
|
||||
height: 600,
|
||||
pngBytes: new Uint8Array([1, 2, 3]).buffer,
|
||||
});
|
||||
|
||||
const ocrSession = await store.runOcr({ recordingId: 113 });
|
||||
const draftSession = await store.generateDraft({
|
||||
recordingId: 113,
|
||||
language: "en",
|
||||
provider: "local",
|
||||
});
|
||||
const markdown = await store.exportMarkdown({ recordingId: 113 });
|
||||
const html = await store.exportHtml({ recordingId: 113 });
|
||||
|
||||
expect(ocrSession.candidates[0]).toMatchObject({ targetText: "Save" });
|
||||
expect(draftSession.generatedGuide?.steps[0]?.instruction).toBe('Click "Save".');
|
||||
await expect(fs.readFile(markdown.path, "utf-8")).resolves.toContain("# User guide");
|
||||
await expect(fs.readFile(html.path, "utf-8")).resolves.toContain("<!doctype html>");
|
||||
});
|
||||
|
||||
it("resumes OCR without reprocessing completed snapshots", async () => {
|
||||
const recognizedSnapshotIds: string[] = [];
|
||||
const store = new GuideStore(recordingsDir, {
|
||||
ocrClient: {
|
||||
recognize: async (snapshot) => {
|
||||
recognizedSnapshotIds.push(snapshot.id);
|
||||
return [];
|
||||
},
|
||||
},
|
||||
});
|
||||
await store.startSession(115);
|
||||
const firstMarker = await store.addMarker({
|
||||
recordingId: 115,
|
||||
kind: "hotkey",
|
||||
timeMs: 100,
|
||||
label: "Ctrl+F12 marker",
|
||||
normalizedX: 0.25,
|
||||
normalizedY: 0.35,
|
||||
});
|
||||
const secondMarker = await store.addMarker({
|
||||
recordingId: 115,
|
||||
kind: "hotkey",
|
||||
timeMs: 300,
|
||||
label: "Ctrl+F12 marker",
|
||||
normalizedX: 0.6,
|
||||
normalizedY: 0.7,
|
||||
});
|
||||
const firstEvent = firstMarker.event;
|
||||
const secondEvent = secondMarker.event;
|
||||
await store.writeSnapshot({
|
||||
recordingId: 115,
|
||||
eventId: firstEvent?.id ?? "",
|
||||
timeMs: 100,
|
||||
offsetMs: 0,
|
||||
width: 800,
|
||||
height: 600,
|
||||
pngBytes: new Uint8Array([1, 2, 3]).buffer,
|
||||
});
|
||||
await store.writeSnapshot({
|
||||
recordingId: 115,
|
||||
eventId: secondEvent?.id ?? "",
|
||||
timeMs: 300,
|
||||
offsetMs: 0,
|
||||
width: 800,
|
||||
height: 600,
|
||||
pngBytes: new Uint8Array([4, 5, 6]).buffer,
|
||||
});
|
||||
|
||||
await store.runOcr({
|
||||
recordingId: 115,
|
||||
snapshotIds: [`snapshot-${firstEvent?.id}`],
|
||||
});
|
||||
expect(recognizedSnapshotIds).toEqual([`snapshot-${firstEvent?.id}`]);
|
||||
|
||||
const resumedSession = await store.runOcr({ recordingId: 115 });
|
||||
expect(recognizedSnapshotIds).toEqual([
|
||||
`snapshot-${firstEvent?.id}`,
|
||||
`snapshot-${secondEvent?.id}`,
|
||||
]);
|
||||
expect(resumedSession.snapshots.every((snapshot) => snapshot.ocrCompletedAt)).toBe(true);
|
||||
|
||||
await store.runOcr({ recordingId: 115 });
|
||||
expect(recognizedSnapshotIds).toEqual([
|
||||
`snapshot-${firstEvent?.id}`,
|
||||
`snapshot-${secondEvent?.id}`,
|
||||
]);
|
||||
});
|
||||
|
||||
it("repairs generic hotkey marker text and attaches AI draft artifacts", async () => {
|
||||
const store = new GuideStore(recordingsDir, {
|
||||
ocrClient: {
|
||||
recognize: async (snapshot) => [
|
||||
{
|
||||
id: `ocr-${snapshot.id}-1`,
|
||||
snapshotId: snapshot.id,
|
||||
text: "Save",
|
||||
confidence: 0.95,
|
||||
box: { x: 0.45, y: 0.45, width: 0.15, height: 0.08 },
|
||||
},
|
||||
],
|
||||
},
|
||||
draftClient: {
|
||||
generate: async () => ({
|
||||
title: "Guide",
|
||||
steps: [
|
||||
{
|
||||
id: "guide-step-1",
|
||||
order: 1,
|
||||
title: "Step 1: Click Ctrl+F12 marker",
|
||||
instruction: "Click Ctrl+F12 marker.",
|
||||
},
|
||||
],
|
||||
}),
|
||||
},
|
||||
});
|
||||
await store.startSession(114);
|
||||
await store.addMarker({
|
||||
recordingId: 114,
|
||||
kind: "hotkey",
|
||||
timeMs: 200,
|
||||
label: "Ctrl+F12 marker",
|
||||
normalizedX: 0.5,
|
||||
normalizedY: 0.5,
|
||||
});
|
||||
const videoPath = path.join(recordingsDir, "recording-114.mp4");
|
||||
await fs.writeFile(videoPath, "");
|
||||
const eventsSession = await store.finalizeEvents({ recordingId: 114, videoPath });
|
||||
await store.writeSnapshot({
|
||||
recordingId: 114,
|
||||
eventId: eventsSession.events[0]?.id ?? "",
|
||||
timeMs: 700,
|
||||
offsetMs: 500,
|
||||
width: 800,
|
||||
height: 600,
|
||||
pngBytes: new Uint8Array([1, 2, 3]).buffer,
|
||||
});
|
||||
await store.runOcr({ recordingId: 114 });
|
||||
|
||||
const draftSession = await store.generateDraft({
|
||||
recordingId: 114,
|
||||
language: "en",
|
||||
provider: "deepseek",
|
||||
});
|
||||
|
||||
expect(draftSession.candidates[0]).toMatchObject({
|
||||
targetText: "Save",
|
||||
position: { xPercent: 50, yPercent: 50 },
|
||||
});
|
||||
expect(draftSession.generatedGuide?.steps[0]).toMatchObject({
|
||||
title: "Step 1: Save",
|
||||
instruction: 'Click "Save".',
|
||||
sourceCandidateId: draftSession.candidates[0]?.id,
|
||||
screenshotPath: draftSession.snapshots[0]?.path,
|
||||
});
|
||||
});
|
||||
|
||||
it("discards a guide session and output directory", async () => {
|
||||
const store = new GuideStore(recordingsDir);
|
||||
const session = await store.startSession(111);
|
||||
await fs.writeFile(path.join(session.outputDir, "step-001.png"), "");
|
||||
|
||||
await store.discardSession({ recordingId: 111 });
|
||||
|
||||
await expect(fs.stat(session.guidePath)).rejects.toMatchObject({ code: "ENOENT" });
|
||||
await expect(fs.stat(session.outputDir)).rejects.toMatchObject({ code: "ENOENT" });
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,276 @@
|
||||
import { type ChildProcessWithoutNullStreams, execFile, spawn } from "node:child_process";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { promisify } from "node:util";
|
||||
import { app } from "electron";
|
||||
|
||||
const DEFAULT_OCR_BASE_URL = "http://127.0.0.1:8866";
|
||||
const DEFAULT_OCR_PORT = "8866";
|
||||
const WINDOWS_SERVICE_NAME = "OpenScreenOCR";
|
||||
const SERVICE_EXE_NAME = "openscreen-ocr-service.exe";
|
||||
const HEALTH_TIMEOUT_MS = 1000;
|
||||
const STARTUP_TIMEOUT_MS = 90000;
|
||||
const PADDLEX_MODEL_NAMES = ["PP-OCRv5_mobile_det", "latin_PP-OCRv5_mobile_rec"];
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
let ocrProcess: ChildProcessWithoutNullStreams | null = null;
|
||||
let startupPromise: Promise<void> | null = null;
|
||||
let quitHookRegistered = false;
|
||||
|
||||
export async function ensureBundledOcrServiceRunning(
|
||||
baseUrl = DEFAULT_OCR_BASE_URL,
|
||||
): Promise<void> {
|
||||
if (!shouldManageOcrService(baseUrl)) {
|
||||
return;
|
||||
}
|
||||
if (await isOcrServiceHealthy(baseUrl, HEALTH_TIMEOUT_MS)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (process.platform === "win32" && (await startInstalledWindowsOcrService())) {
|
||||
await waitForOcrServiceHealth(baseUrl, STARTUP_TIMEOUT_MS);
|
||||
return;
|
||||
}
|
||||
|
||||
const executablePath = await findBundledOcrServiceExecutable();
|
||||
if (!executablePath) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!startupPromise) {
|
||||
startupPromise = startAndWaitForOcrService(executablePath, baseUrl).finally(() => {
|
||||
startupPromise = null;
|
||||
});
|
||||
}
|
||||
await startupPromise;
|
||||
}
|
||||
|
||||
function shouldManageOcrService(baseUrl: string): boolean {
|
||||
try {
|
||||
const url = new URL(baseUrl);
|
||||
const hostname = url.hostname.toLowerCase();
|
||||
return (
|
||||
(url.protocol === "http:" || url.protocol === "https:") &&
|
||||
(hostname === "127.0.0.1" || hostname === "localhost") &&
|
||||
(url.port === "" || url.port === DEFAULT_OCR_PORT)
|
||||
);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function startInstalledWindowsOcrService(): Promise<boolean> {
|
||||
const query = await runSc(["query", WINDOWS_SERVICE_NAME]);
|
||||
if (!query.success) {
|
||||
return false;
|
||||
}
|
||||
if (/\bRUNNING\b/i.test(query.output)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const start = await runSc(["start", WINDOWS_SERVICE_NAME]);
|
||||
return start.success || /\b1056\b/.test(start.output) || /already running/i.test(start.output);
|
||||
}
|
||||
|
||||
async function runSc(args: string[]): Promise<{ success: boolean; output: string }> {
|
||||
try {
|
||||
const result = await execFileAsync("sc.exe", args, {
|
||||
windowsHide: true,
|
||||
timeout: 10000,
|
||||
maxBuffer: 512 * 1024,
|
||||
});
|
||||
return {
|
||||
success: true,
|
||||
output: `${result.stdout ?? ""}\n${result.stderr ?? ""}`,
|
||||
};
|
||||
} catch (error) {
|
||||
const failed = error as { stdout?: string; stderr?: string };
|
||||
return {
|
||||
success: false,
|
||||
output: `${failed.stdout ?? ""}\n${failed.stderr ?? ""}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async function findBundledOcrServiceExecutable(): Promise<string | null> {
|
||||
const candidates = [
|
||||
process.env.OPENSCREEN_GUIDE_OCR_EXE,
|
||||
path.join(process.resourcesPath, "ocr-service", SERVICE_EXE_NAME),
|
||||
path.join(process.resourcesPath, "ocr-service", "openscreen-ocr-service", SERVICE_EXE_NAME),
|
||||
path.resolve(process.cwd(), "tools", "ocr", "dist", "openscreen-ocr-service", SERVICE_EXE_NAME),
|
||||
].filter(
|
||||
(candidate): candidate is string => typeof candidate === "string" && candidate.length > 0,
|
||||
);
|
||||
|
||||
for (const candidate of candidates) {
|
||||
try {
|
||||
const stats = await fs.stat(candidate);
|
||||
if (stats.isFile()) {
|
||||
return candidate;
|
||||
}
|
||||
} catch {
|
||||
// Try the next candidate.
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function startAndWaitForOcrService(executablePath: string, baseUrl: string): Promise<void> {
|
||||
const runtimePaths = await prepareOcrRuntimePaths();
|
||||
if (!ocrProcess || ocrProcess.exitCode !== null || ocrProcess.killed) {
|
||||
startOcrServiceProcess(executablePath, runtimePaths);
|
||||
}
|
||||
await waitForOcrServiceHealth(baseUrl, STARTUP_TIMEOUT_MS);
|
||||
}
|
||||
|
||||
async function prepareOcrRuntimePaths(): Promise<{
|
||||
modelCachePath: string;
|
||||
paddlexCachePath: string;
|
||||
}> {
|
||||
const modelCachePath = path.join(app.getPath("userData"), "ocr-models");
|
||||
const paddlexCachePath = path.join(modelCachePath, "paddlex");
|
||||
await seedBundledPaddlexModels(paddlexCachePath);
|
||||
return { modelCachePath, paddlexCachePath };
|
||||
}
|
||||
|
||||
async function seedBundledPaddlexModels(destinationCachePath: string): Promise<void> {
|
||||
const sourceCachePath = await findBundledPaddlexModelCache();
|
||||
if (!sourceCachePath) {
|
||||
return;
|
||||
}
|
||||
|
||||
const sourceOfficialModels = path.join(sourceCachePath, "official_models");
|
||||
const destinationOfficialModels = path.join(destinationCachePath, "official_models");
|
||||
await fs.mkdir(destinationOfficialModels, { recursive: true });
|
||||
|
||||
for (const modelName of PADDLEX_MODEL_NAMES) {
|
||||
const sourceModelPath = path.join(sourceOfficialModels, modelName);
|
||||
const destinationModelPath = path.join(destinationOfficialModels, modelName);
|
||||
if (!(await pathExists(sourceModelPath)) || (await pathExists(destinationModelPath))) {
|
||||
continue;
|
||||
}
|
||||
await fs.cp(sourceModelPath, destinationModelPath, {
|
||||
recursive: true,
|
||||
errorOnExist: false,
|
||||
force: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function findBundledPaddlexModelCache(): Promise<string | null> {
|
||||
const candidates = [
|
||||
path.join(process.resourcesPath, "ocr-models", "paddlex"),
|
||||
path.resolve(process.cwd(), "tools", "ocr", "models", "paddlex"),
|
||||
];
|
||||
for (const candidate of candidates) {
|
||||
try {
|
||||
const stats = await fs.stat(candidate);
|
||||
if (stats.isDirectory()) {
|
||||
return candidate;
|
||||
}
|
||||
} catch {
|
||||
// Try the next candidate.
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function pathExists(value: string): Promise<boolean> {
|
||||
try {
|
||||
await fs.access(value);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function startOcrServiceProcess(
|
||||
executablePath: string,
|
||||
runtimePaths: { modelCachePath: string; paddlexCachePath: string },
|
||||
): void {
|
||||
registerQuitHook();
|
||||
ocrProcess = spawn(executablePath, [], {
|
||||
cwd: path.dirname(executablePath),
|
||||
env: {
|
||||
...process.env,
|
||||
OPENSCREEN_OCR_HOST: "127.0.0.1",
|
||||
OPENSCREEN_OCR_PORT: DEFAULT_OCR_PORT,
|
||||
PADDLEOCR_DEVICE: process.env.PADDLEOCR_DEVICE ?? "cpu",
|
||||
PADDLEOCR_ENABLE_MKLDNN: process.env.PADDLEOCR_ENABLE_MKLDNN ?? "0",
|
||||
PADDLEOCR_LANG: process.env.PADDLEOCR_LANG ?? "",
|
||||
PADDLEOCR_USE_MOBILE: process.env.PADDLEOCR_USE_MOBILE ?? "1",
|
||||
OPENSCREEN_OCR_PROFILE:
|
||||
process.env.OPENSCREEN_OCR_PROFILE ?? process.env.OPENSCREEN_GUIDE_OCR_PROFILE ?? "",
|
||||
OPENSCREEN_OCR_WARMUP: process.env.OPENSCREEN_OCR_WARMUP ?? "1",
|
||||
PADDLE_PDX_ENABLE_MKLDNN_BYDEFAULT: process.env.PADDLE_PDX_ENABLE_MKLDNN_BYDEFAULT ?? "False",
|
||||
PADDLE_PDX_CACHE_HOME: process.env.PADDLE_PDX_CACHE_HOME ?? runtimePaths.paddlexCachePath,
|
||||
PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK:
|
||||
process.env.PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK ?? "True",
|
||||
PADDLE_HOME: process.env.PADDLE_HOME ?? path.join(runtimePaths.modelCachePath, "paddle"),
|
||||
PADDLEOCR_HOME:
|
||||
process.env.PADDLEOCR_HOME ?? path.join(runtimePaths.modelCachePath, "paddleocr"),
|
||||
PYTHONUTF8: "1",
|
||||
},
|
||||
windowsHide: true,
|
||||
});
|
||||
|
||||
ocrProcess.stdout.on("data", (chunk) => {
|
||||
console.info(`[guide-ocr-service] ${chunk.toString().trim()}`);
|
||||
});
|
||||
ocrProcess.stderr.on("data", (chunk) => {
|
||||
console.warn(`[guide-ocr-service] ${chunk.toString().trim()}`);
|
||||
});
|
||||
ocrProcess.on("exit", (code, signal) => {
|
||||
console.info("[guide-ocr-service] exited", { code, signal });
|
||||
ocrProcess = null;
|
||||
});
|
||||
}
|
||||
|
||||
function registerQuitHook(): void {
|
||||
if (quitHookRegistered) {
|
||||
return;
|
||||
}
|
||||
quitHookRegistered = true;
|
||||
app.once("before-quit", () => {
|
||||
const processToStop = ocrProcess;
|
||||
ocrProcess = null;
|
||||
processToStop?.kill();
|
||||
});
|
||||
}
|
||||
|
||||
async function waitForOcrServiceHealth(baseUrl: string, timeoutMs: number): Promise<void> {
|
||||
const startedAt = Date.now();
|
||||
let lastError: unknown;
|
||||
while (Date.now() - startedAt < timeoutMs) {
|
||||
if (await isOcrServiceHealthy(baseUrl, HEALTH_TIMEOUT_MS)) {
|
||||
return;
|
||||
}
|
||||
if (ocrProcess?.exitCode !== null && ocrProcess?.exitCode !== undefined) {
|
||||
throw new Error(`Bundled OCR service exited with code ${ocrProcess.exitCode}.`);
|
||||
}
|
||||
await sleep(750);
|
||||
}
|
||||
if (lastError instanceof Error) {
|
||||
throw lastError;
|
||||
}
|
||||
throw new Error("Timed out waiting for bundled OCR service to start.");
|
||||
}
|
||||
|
||||
async function isOcrServiceHealthy(baseUrl: string, timeoutMs: number): Promise<boolean> {
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
||||
try {
|
||||
const response = await fetch(`${baseUrl.replace(/\/$/, "")}/health`, {
|
||||
signal: controller.signal,
|
||||
});
|
||||
return response.ok;
|
||||
} catch {
|
||||
return false;
|
||||
} finally {
|
||||
clearTimeout(timeoutId);
|
||||
}
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { OcrBlock } from "../../../src/guide/contracts";
|
||||
import { remapFocusedOcrBlocks } from "./focusedOcrSnapshot";
|
||||
|
||||
describe("remapFocusedOcrBlocks", () => {
|
||||
it("maps boxes from a focused crop back to the original snapshot coordinates", () => {
|
||||
const blocks: OcrBlock[] = [
|
||||
{
|
||||
id: "ocr-1",
|
||||
snapshotId: "snapshot-1",
|
||||
text: "Settings",
|
||||
confidence: 0.9,
|
||||
box: { x: 0.25, y: 0.5, width: 0.2, height: 0.1 },
|
||||
},
|
||||
];
|
||||
|
||||
const remapped = remapFocusedOcrBlocks(blocks, {
|
||||
cropX: 320,
|
||||
cropY: 180,
|
||||
cropWidth: 640,
|
||||
cropHeight: 360,
|
||||
originalWidth: 1280,
|
||||
originalHeight: 720,
|
||||
});
|
||||
|
||||
expect(remapped[0]?.box).toEqual({
|
||||
x: 0.375,
|
||||
y: 0.5,
|
||||
width: 0.1,
|
||||
height: 0.05,
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,225 @@
|
||||
import { execFile } from "node:child_process";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { promisify } from "node:util";
|
||||
import type { GuideEvent, GuideSnapshot, OcrBlock } from "../../../src/guide/contracts";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
interface FocusTransform {
|
||||
cropX: number;
|
||||
cropY: number;
|
||||
cropWidth: number;
|
||||
cropHeight: number;
|
||||
originalWidth: number;
|
||||
originalHeight: number;
|
||||
}
|
||||
|
||||
export interface FocusedOcrSnapshot {
|
||||
snapshot: GuideSnapshot;
|
||||
transform?: FocusTransform;
|
||||
}
|
||||
|
||||
export async function createFocusedOcrSnapshot(input: {
|
||||
snapshot: GuideSnapshot;
|
||||
event?: GuideEvent;
|
||||
outputDir: string;
|
||||
}): Promise<FocusedOcrSnapshot> {
|
||||
if (process.platform !== "win32") {
|
||||
return { snapshot: input.snapshot };
|
||||
}
|
||||
|
||||
const click = getEventPoint(input.event, input.snapshot);
|
||||
if (!click) {
|
||||
return { snapshot: input.snapshot };
|
||||
}
|
||||
|
||||
const crop = calculateFocusCrop(input.snapshot, click);
|
||||
if (
|
||||
!crop ||
|
||||
(crop.cropWidth === input.snapshot.width && crop.cropHeight === input.snapshot.height)
|
||||
) {
|
||||
return { snapshot: input.snapshot };
|
||||
}
|
||||
|
||||
const focusDir = path.join(input.outputDir, "ocr-focus");
|
||||
await fs.mkdir(focusDir, { recursive: true });
|
||||
const focusPath = path.join(focusDir, `${path.parse(input.snapshot.path).name}-focus.png`);
|
||||
const zoom = 2;
|
||||
const focusedSnapshot: GuideSnapshot = {
|
||||
...input.snapshot,
|
||||
path: focusPath,
|
||||
width: crop.cropWidth * zoom,
|
||||
height: crop.cropHeight * zoom,
|
||||
};
|
||||
|
||||
try {
|
||||
await writeFocusedPng({
|
||||
sourcePath: input.snapshot.path,
|
||||
outputPath: focusPath,
|
||||
cropX: crop.cropX,
|
||||
cropY: crop.cropY,
|
||||
cropWidth: crop.cropWidth,
|
||||
cropHeight: crop.cropHeight,
|
||||
outputWidth: focusedSnapshot.width,
|
||||
outputHeight: focusedSnapshot.height,
|
||||
});
|
||||
return { snapshot: focusedSnapshot, transform: crop };
|
||||
} catch {
|
||||
return { snapshot: input.snapshot };
|
||||
}
|
||||
}
|
||||
|
||||
export function remapFocusedOcrBlocks(
|
||||
blocks: OcrBlock[],
|
||||
transform: FocusedOcrSnapshot["transform"],
|
||||
): OcrBlock[] {
|
||||
if (!transform) {
|
||||
return blocks;
|
||||
}
|
||||
|
||||
return blocks.map((block) => ({
|
||||
...block,
|
||||
box: {
|
||||
x: clamp01((transform.cropX + block.box.x * transform.cropWidth) / transform.originalWidth),
|
||||
y: clamp01((transform.cropY + block.box.y * transform.cropHeight) / transform.originalHeight),
|
||||
width: clamp01((block.box.width * transform.cropWidth) / transform.originalWidth),
|
||||
height: clamp01((block.box.height * transform.cropHeight) / transform.originalHeight),
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
function getEventPoint(
|
||||
event: GuideEvent | undefined,
|
||||
snapshot: GuideSnapshot,
|
||||
): { x: number; y: number } | null {
|
||||
if (!event) {
|
||||
return null;
|
||||
}
|
||||
if (isNormalizedNumber(event.normalizedX) && isNormalizedNumber(event.normalizedY)) {
|
||||
return { x: event.normalizedX, y: event.normalizedY };
|
||||
}
|
||||
if (isNormalizedNumber(event.x) && isNormalizedNumber(event.y)) {
|
||||
return { x: event.x, y: event.y };
|
||||
}
|
||||
if (
|
||||
typeof event.x === "number" &&
|
||||
typeof event.y === "number" &&
|
||||
event.x >= 0 &&
|
||||
event.y >= 0 &&
|
||||
event.x <= snapshot.width &&
|
||||
event.y <= snapshot.height
|
||||
) {
|
||||
return { x: clamp01(event.x / snapshot.width), y: clamp01(event.y / snapshot.height) };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function calculateFocusCrop(
|
||||
snapshot: GuideSnapshot,
|
||||
click: { x: number; y: number },
|
||||
): FocusTransform | null {
|
||||
if (snapshot.width <= 0 || snapshot.height <= 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const cropWidth = clampInteger(
|
||||
Math.round(snapshot.width * 0.42),
|
||||
Math.min(360, snapshot.width),
|
||||
Math.min(720, snapshot.width),
|
||||
);
|
||||
const cropHeight = clampInteger(
|
||||
Math.round(snapshot.height * 0.42),
|
||||
Math.min(240, snapshot.height),
|
||||
Math.min(520, snapshot.height),
|
||||
);
|
||||
const clickX = Math.round(clamp01(click.x) * snapshot.width);
|
||||
const clickY = Math.round(clamp01(click.y) * snapshot.height);
|
||||
return {
|
||||
cropX: clampInteger(Math.round(clickX - cropWidth / 2), 0, snapshot.width - cropWidth),
|
||||
cropY: clampInteger(Math.round(clickY - cropHeight / 2), 0, snapshot.height - cropHeight),
|
||||
cropWidth,
|
||||
cropHeight,
|
||||
originalWidth: snapshot.width,
|
||||
originalHeight: snapshot.height,
|
||||
};
|
||||
}
|
||||
|
||||
async function writeFocusedPng(input: {
|
||||
sourcePath: string;
|
||||
outputPath: string;
|
||||
cropX: number;
|
||||
cropY: number;
|
||||
cropWidth: number;
|
||||
cropHeight: number;
|
||||
outputWidth: number;
|
||||
outputHeight: number;
|
||||
}): Promise<void> {
|
||||
const script = buildCropScript(input);
|
||||
const encodedCommand = Buffer.from(script, "utf16le").toString("base64");
|
||||
await execFileAsync(
|
||||
"powershell.exe",
|
||||
["-NoProfile", "-ExecutionPolicy", "Bypass", "-EncodedCommand", encodedCommand],
|
||||
{
|
||||
timeout: 30000,
|
||||
maxBuffer: 1024 * 1024,
|
||||
windowsHide: true,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
function buildCropScript(input: {
|
||||
sourcePath: string;
|
||||
outputPath: string;
|
||||
cropX: number;
|
||||
cropY: number;
|
||||
cropWidth: number;
|
||||
cropHeight: number;
|
||||
outputWidth: number;
|
||||
outputHeight: number;
|
||||
}): string {
|
||||
const sourcePathBase64 = Buffer.from(input.sourcePath, "utf8").toString("base64");
|
||||
const outputPathBase64 = Buffer.from(input.outputPath, "utf8").toString("base64");
|
||||
return `
|
||||
$ErrorActionPreference = "Stop"
|
||||
$sourcePath = [System.Text.Encoding]::UTF8.GetString([Convert]::FromBase64String("${sourcePathBase64}"))
|
||||
$outputPath = [System.Text.Encoding]::UTF8.GetString([Convert]::FromBase64String("${outputPathBase64}"))
|
||||
Add-Type -AssemblyName System.Drawing
|
||||
|
||||
$source = [System.Drawing.Image]::FromFile($sourcePath)
|
||||
$target = [System.Drawing.Bitmap]::new(${input.outputWidth}, ${input.outputHeight})
|
||||
$graphics = [System.Drawing.Graphics]::FromImage($target)
|
||||
try {
|
||||
$graphics.Clear([System.Drawing.Color]::White)
|
||||
$graphics.InterpolationMode = [System.Drawing.Drawing2D.InterpolationMode]::HighQualityBicubic
|
||||
$graphics.SmoothingMode = [System.Drawing.Drawing2D.SmoothingMode]::HighQuality
|
||||
$graphics.PixelOffsetMode = [System.Drawing.Drawing2D.PixelOffsetMode]::HighQuality
|
||||
$sourceRect = [System.Drawing.Rectangle]::new(${input.cropX}, ${input.cropY}, ${input.cropWidth}, ${input.cropHeight})
|
||||
$targetRect = [System.Drawing.Rectangle]::new(0, 0, ${input.outputWidth}, ${input.outputHeight})
|
||||
$graphics.DrawImage($source, $targetRect, $sourceRect, [System.Drawing.GraphicsUnit]::Pixel)
|
||||
$target.Save($outputPath, [System.Drawing.Imaging.ImageFormat]::Png)
|
||||
} finally {
|
||||
$graphics.Dispose()
|
||||
$target.Dispose()
|
||||
$source.Dispose()
|
||||
}
|
||||
`;
|
||||
}
|
||||
|
||||
function isNormalizedNumber(value: unknown): value is number {
|
||||
return typeof value === "number" && Number.isFinite(value) && value >= 0 && value <= 1;
|
||||
}
|
||||
|
||||
function clampInteger(value: number, min: number, max: number): number {
|
||||
if (max < min) {
|
||||
return min;
|
||||
}
|
||||
return Math.round(Math.min(max, Math.max(min, value)));
|
||||
}
|
||||
|
||||
function clamp01(value: number): number {
|
||||
if (!Number.isFinite(value)) {
|
||||
return 0;
|
||||
}
|
||||
return Math.min(1, Math.max(0, value));
|
||||
}
|
||||
@@ -0,0 +1,147 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import type { GuideSnapshot, OcrBlock } from "../../../src/guide/contracts";
|
||||
import {
|
||||
DefaultGuideOcrClient,
|
||||
normalizeOcrResponse,
|
||||
PaddleOcrHttpClient,
|
||||
parseWindowsOcrPayload,
|
||||
} from "./paddleOcrClient";
|
||||
|
||||
const snapshot: GuideSnapshot = {
|
||||
id: "snapshot-1",
|
||||
eventId: "event-1",
|
||||
timeMs: 1000,
|
||||
offsetMs: 500,
|
||||
path: "/tmp/step-001.png",
|
||||
width: 1000,
|
||||
height: 800,
|
||||
};
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals();
|
||||
});
|
||||
|
||||
describe("normalizeOcrResponse", () => {
|
||||
it("normalizes pixel boxes into guide OCR blocks", () => {
|
||||
const blocks = normalizeOcrResponse(
|
||||
{
|
||||
blocks: [
|
||||
{
|
||||
text: "Save",
|
||||
confidence: 92,
|
||||
box: { x: 400, y: 320, width: 120, height: 40 },
|
||||
},
|
||||
],
|
||||
},
|
||||
snapshot,
|
||||
);
|
||||
|
||||
expect(blocks).toEqual([
|
||||
{
|
||||
id: "ocr-snapshot-1-1",
|
||||
snapshotId: "snapshot-1",
|
||||
text: "Save",
|
||||
confidence: 0.92,
|
||||
box: { x: 0.4, y: 0.4, width: 0.12, height: 0.05 },
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("normalizes polygon responses", () => {
|
||||
const blocks = normalizeOcrResponse(
|
||||
[
|
||||
{
|
||||
text: "Next",
|
||||
score: 0.8,
|
||||
bbox: [
|
||||
[100, 200],
|
||||
[300, 200],
|
||||
[300, 260],
|
||||
[100, 260],
|
||||
],
|
||||
},
|
||||
],
|
||||
snapshot,
|
||||
);
|
||||
|
||||
expect(blocks[0]).toMatchObject({
|
||||
text: "Next",
|
||||
confidence: 0.8,
|
||||
box: { x: 0.1, y: 0.25, width: 0.2, height: 0.075 },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("PaddleOcrHttpClient", () => {
|
||||
it("sends the selected OCR profile to the local service", async () => {
|
||||
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openscreen-ocr-client-"));
|
||||
const imagePath = path.join(tempDir, "step.png");
|
||||
await fs.writeFile(imagePath, Buffer.from([137, 80, 78, 71]));
|
||||
const requests: unknown[] = [];
|
||||
vi.stubGlobal(
|
||||
"fetch",
|
||||
vi.fn(async (_url: string, init?: RequestInit) => {
|
||||
requests.push(JSON.parse(String(init?.body ?? "{}")));
|
||||
return new Response(JSON.stringify({ blocks: [] }), {
|
||||
status: 200,
|
||||
headers: { "content-type": "application/json" },
|
||||
});
|
||||
}),
|
||||
);
|
||||
|
||||
const client = new PaddleOcrHttpClient("https://ocr.example.test", "vi,en", "hybrid");
|
||||
await client.recognize({ ...snapshot, path: imagePath });
|
||||
|
||||
expect(requests[0]).toMatchObject({
|
||||
language: "vi,en",
|
||||
profile: "hybrid",
|
||||
path: imagePath,
|
||||
});
|
||||
await fs.rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
});
|
||||
|
||||
describe("DefaultGuideOcrClient", () => {
|
||||
it("falls back when the HTTP OCR service is unavailable", async () => {
|
||||
const fallbackBlock: OcrBlock = {
|
||||
id: "ocr-snapshot-1-1",
|
||||
snapshotId: "snapshot-1",
|
||||
text: "Save",
|
||||
confidence: 0.75,
|
||||
box: { x: 0.1, y: 0.2, width: 0.3, height: 0.4 },
|
||||
};
|
||||
const client = new DefaultGuideOcrClient(
|
||||
{
|
||||
recognize: async () => {
|
||||
throw new Error("HTTP down");
|
||||
},
|
||||
},
|
||||
{
|
||||
recognize: async () => [fallbackBlock],
|
||||
},
|
||||
);
|
||||
|
||||
await expect(client.recognize(snapshot)).resolves.toEqual([fallbackBlock]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("parseWindowsOcrPayload", () => {
|
||||
it("recovers from raw control characters in OCR text", () => {
|
||||
const payload = parseWindowsOcrPayload(
|
||||
'{"blocks":[{"text":"Save\u0001now","confidence":0.75,"box":{"x":1,"y":2,"width":3,"height":4}}]}',
|
||||
);
|
||||
|
||||
expect(payload).toEqual({
|
||||
blocks: [
|
||||
{
|
||||
text: "Save now",
|
||||
confidence: 0.75,
|
||||
box: { x: 1, y: 2, width: 3, height: 4 },
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,414 @@
|
||||
import { execFile } from "node:child_process";
|
||||
import fs from "node:fs/promises";
|
||||
import { promisify } from "node:util";
|
||||
import type { GuideOcrProfile, GuideSnapshot, OcrBlock } from "../../../src/guide/contracts";
|
||||
import { ensureBundledOcrServiceRunning } from "./bundledOcrService";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
export interface GuideOcrClient {
|
||||
recognize(snapshot: GuideSnapshot): Promise<OcrBlock[]>;
|
||||
}
|
||||
|
||||
export interface GuideOcrClientConfig {
|
||||
profile: GuideOcrProfile;
|
||||
language: string;
|
||||
}
|
||||
|
||||
interface PaddleOcrResponseBlock {
|
||||
text?: unknown;
|
||||
confidence?: unknown;
|
||||
score?: unknown;
|
||||
box?: unknown;
|
||||
bbox?: unknown;
|
||||
}
|
||||
|
||||
export class PaddleOcrHttpClient implements GuideOcrClient {
|
||||
constructor(
|
||||
private readonly baseUrl = process.env.OPENSCREEN_GUIDE_OCR_URL ?? "http://127.0.0.1:8866",
|
||||
private readonly language = normalizeOcrLanguage(process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE),
|
||||
private readonly profile = normalizeOcrProfile(process.env.OPENSCREEN_GUIDE_OCR_PROFILE),
|
||||
) {}
|
||||
|
||||
async recognize(snapshot: GuideSnapshot): Promise<OcrBlock[]> {
|
||||
await ensureBundledOcrServiceRunning(this.baseUrl);
|
||||
const imageBase64 = await fs.readFile(snapshot.path, "base64");
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetch(`${this.baseUrl.replace(/\/$/, "")}/ocr`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
imageBase64,
|
||||
path: snapshot.path,
|
||||
language: this.language,
|
||||
profile: this.profile,
|
||||
}),
|
||||
});
|
||||
} catch (error) {
|
||||
throw new Error(
|
||||
`OCR service is unavailable: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`OCR service returned HTTP ${response.status}.`);
|
||||
}
|
||||
|
||||
const payload = (await response.json()) as unknown;
|
||||
return normalizeOcrResponse(payload, snapshot);
|
||||
}
|
||||
}
|
||||
|
||||
export class WindowsOcrClient implements GuideOcrClient {
|
||||
constructor(
|
||||
private readonly language = normalizeOcrLanguage(process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE),
|
||||
) {}
|
||||
|
||||
async recognize(snapshot: GuideSnapshot): Promise<OcrBlock[]> {
|
||||
if (process.platform !== "win32") {
|
||||
throw new Error("Windows OCR fallback is only available on Windows.");
|
||||
}
|
||||
|
||||
const script = buildWindowsOcrScript(snapshot.path, this.language);
|
||||
const encodedCommand = Buffer.from(script, "utf16le").toString("base64");
|
||||
let stdout: string;
|
||||
try {
|
||||
const result = await execFileAsync(
|
||||
"powershell.exe",
|
||||
["-NoProfile", "-ExecutionPolicy", "Bypass", "-EncodedCommand", encodedCommand],
|
||||
{
|
||||
maxBuffer: 8 * 1024 * 1024,
|
||||
timeout: 30000,
|
||||
windowsHide: true,
|
||||
},
|
||||
);
|
||||
stdout = result.stdout;
|
||||
} catch (error) {
|
||||
throw new Error(
|
||||
`Windows OCR failed: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
}
|
||||
|
||||
let payload: unknown;
|
||||
try {
|
||||
payload = parseWindowsOcrPayload(stdout);
|
||||
} catch (error) {
|
||||
throw new Error(
|
||||
`Windows OCR returned invalid JSON: ${
|
||||
error instanceof Error ? error.message : String(error)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
return normalizeOcrResponse(payload, snapshot);
|
||||
}
|
||||
}
|
||||
|
||||
export class DefaultGuideOcrClient implements GuideOcrClient {
|
||||
static fromConfig(config?: Partial<GuideOcrClientConfig>): DefaultGuideOcrClient {
|
||||
const normalizedConfig = normalizeOcrClientConfig(config);
|
||||
return new DefaultGuideOcrClient(
|
||||
new PaddleOcrHttpClient(undefined, normalizedConfig.language, normalizedConfig.profile),
|
||||
new WindowsOcrClient(normalizedConfig.language),
|
||||
);
|
||||
}
|
||||
|
||||
constructor(
|
||||
private readonly httpClient = new PaddleOcrHttpClient(),
|
||||
private readonly windowsClient = new WindowsOcrClient(),
|
||||
) {}
|
||||
|
||||
async recognize(snapshot: GuideSnapshot): Promise<OcrBlock[]> {
|
||||
try {
|
||||
return await this.httpClient.recognize(snapshot);
|
||||
} catch (httpError) {
|
||||
try {
|
||||
return await this.windowsClient.recognize(snapshot);
|
||||
} catch (fallbackError) {
|
||||
throw new Error(
|
||||
[
|
||||
httpError instanceof Error ? httpError.message : String(httpError),
|
||||
fallbackError instanceof Error ? fallbackError.message : String(fallbackError),
|
||||
].join(" "),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeOcrClientConfig(
|
||||
config: Partial<GuideOcrClientConfig> | undefined,
|
||||
): GuideOcrClientConfig {
|
||||
return {
|
||||
profile: normalizeOcrProfile(config?.profile ?? process.env.OPENSCREEN_GUIDE_OCR_PROFILE),
|
||||
language: normalizeOcrLanguage(config?.language ?? process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE),
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeOcrProfile(value: string | undefined): GuideOcrProfile {
|
||||
if (value === "fast" || value === "vietnamese" || value === "hybrid") {
|
||||
return value;
|
||||
}
|
||||
return "vietnamese";
|
||||
}
|
||||
|
||||
function normalizeOcrLanguage(value: string | undefined): string {
|
||||
const normalized = value
|
||||
?.split(",")
|
||||
.map((part) => part.trim().toLowerCase())
|
||||
.filter(Boolean)
|
||||
.join(",");
|
||||
return normalized || "vi,en";
|
||||
}
|
||||
|
||||
export function parseWindowsOcrPayload(stdout: string): unknown {
|
||||
const normalized = stdout.replace(/^\uFEFF/, "").trim();
|
||||
try {
|
||||
return JSON.parse(normalized);
|
||||
} catch {
|
||||
return JSON.parse(replaceRawJsonControlCharacters(normalized));
|
||||
}
|
||||
}
|
||||
|
||||
function replaceRawJsonControlCharacters(value: string): string {
|
||||
let result = "";
|
||||
for (const character of value) {
|
||||
const code = character.charCodeAt(0);
|
||||
result += code < 32 || code === 127 ? " " : character;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
export function normalizeOcrResponse(payload: unknown, snapshot: GuideSnapshot): OcrBlock[] {
|
||||
const rawBlocks = extractRawBlocks(payload);
|
||||
return rawBlocks
|
||||
.map((raw, index) => normalizeBlock(raw, snapshot, index))
|
||||
.filter((block): block is OcrBlock => block !== null);
|
||||
}
|
||||
|
||||
function extractRawBlocks(payload: unknown): PaddleOcrResponseBlock[] {
|
||||
if (Array.isArray(payload)) {
|
||||
return payload as PaddleOcrResponseBlock[];
|
||||
}
|
||||
if (isRecord(payload)) {
|
||||
if (Array.isArray(payload.blocks)) {
|
||||
return payload.blocks as PaddleOcrResponseBlock[];
|
||||
}
|
||||
if (Array.isArray(payload.results)) {
|
||||
return payload.results as PaddleOcrResponseBlock[];
|
||||
}
|
||||
if (Array.isArray(payload.data)) {
|
||||
return payload.data as PaddleOcrResponseBlock[];
|
||||
}
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
function normalizeBlock(
|
||||
raw: PaddleOcrResponseBlock,
|
||||
snapshot: GuideSnapshot,
|
||||
index: number,
|
||||
): OcrBlock | null {
|
||||
if (!isRecord(raw)) {
|
||||
return null;
|
||||
}
|
||||
const text = typeof raw.text === "string" ? raw.text.trim() : "";
|
||||
if (!text) {
|
||||
return null;
|
||||
}
|
||||
const confidence = normalizeConfidence(raw.confidence ?? raw.score);
|
||||
const box = normalizeBox(raw.box ?? raw.bbox, snapshot);
|
||||
if (!box) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
id: `ocr-${snapshot.id}-${index + 1}`,
|
||||
snapshotId: snapshot.id,
|
||||
text,
|
||||
confidence,
|
||||
box,
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeConfidence(value: unknown): number {
|
||||
if (typeof value !== "number" || !Number.isFinite(value)) {
|
||||
return 0.5;
|
||||
}
|
||||
return value > 1 ? clamp01(value / 100) : clamp01(value);
|
||||
}
|
||||
|
||||
function normalizeBox(
|
||||
value: unknown,
|
||||
snapshot: GuideSnapshot,
|
||||
): { x: number; y: number; width: number; height: number } | null {
|
||||
if (Array.isArray(value)) {
|
||||
return normalizeArrayBox(value, snapshot);
|
||||
}
|
||||
if (!isRecord(value)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const x = normalizeNumber(value.x);
|
||||
const y = normalizeNumber(value.y);
|
||||
const width = normalizeNumber(value.width ?? value.w);
|
||||
const height = normalizeNumber(value.height ?? value.h);
|
||||
if (x === null || y === null || width === null || height === null) {
|
||||
return null;
|
||||
}
|
||||
return normalizeBoxDimensions({ x, y, width, height }, snapshot);
|
||||
}
|
||||
|
||||
function normalizeArrayBox(
|
||||
value: unknown[],
|
||||
snapshot: GuideSnapshot,
|
||||
): { x: number; y: number; width: number; height: number } | null {
|
||||
const numbers = value.flat(2).filter((item): item is number => typeof item === "number");
|
||||
if (numbers.length >= 8) {
|
||||
const xs = [numbers[0], numbers[2], numbers[4], numbers[6]];
|
||||
const ys = [numbers[1], numbers[3], numbers[5], numbers[7]];
|
||||
const minX = Math.min(...xs);
|
||||
const maxX = Math.max(...xs);
|
||||
const minY = Math.min(...ys);
|
||||
const maxY = Math.max(...ys);
|
||||
return normalizeBoxDimensions(
|
||||
{ x: minX, y: minY, width: maxX - minX, height: maxY - minY },
|
||||
snapshot,
|
||||
);
|
||||
}
|
||||
if (numbers.length >= 4) {
|
||||
return normalizeBoxDimensions(
|
||||
{ x: numbers[0] ?? 0, y: numbers[1] ?? 0, width: numbers[2] ?? 0, height: numbers[3] ?? 0 },
|
||||
snapshot,
|
||||
);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function normalizeBoxDimensions(
|
||||
box: { x: number; y: number; width: number; height: number },
|
||||
snapshot: GuideSnapshot,
|
||||
): { x: number; y: number; width: number; height: number } {
|
||||
const usesPixels =
|
||||
box.x > 1 ||
|
||||
box.y > 1 ||
|
||||
box.width > 1 ||
|
||||
box.height > 1 ||
|
||||
box.x + box.width > 1 ||
|
||||
box.y + box.height > 1;
|
||||
const scaleX = usesPixels ? snapshot.width : 1;
|
||||
const scaleY = usesPixels ? snapshot.height : 1;
|
||||
return {
|
||||
x: clamp01(box.x / scaleX),
|
||||
y: clamp01(box.y / scaleY),
|
||||
width: clamp01(box.width / scaleX),
|
||||
height: clamp01(box.height / scaleY),
|
||||
};
|
||||
}
|
||||
|
||||
function normalizeNumber(value: unknown): number | null {
|
||||
return typeof value === "number" && Number.isFinite(value) ? value : null;
|
||||
}
|
||||
|
||||
function clamp01(value: number): number {
|
||||
if (!Number.isFinite(value)) {
|
||||
return 0;
|
||||
}
|
||||
return Math.min(1, Math.max(0, value));
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null;
|
||||
}
|
||||
|
||||
function buildWindowsOcrScript(imagePath: string, language: string): string {
|
||||
const imagePathBase64 = Buffer.from(imagePath, "utf8").toString("base64");
|
||||
const languageBase64 = Buffer.from(language, "utf8").toString("base64");
|
||||
return `
|
||||
$ErrorActionPreference = "Stop"
|
||||
[Console]::OutputEncoding = [System.Text.UTF8Encoding]::new($false)
|
||||
$OutputEncoding = [System.Text.UTF8Encoding]::new($false)
|
||||
$imagePath = [System.Text.Encoding]::UTF8.GetString([Convert]::FromBase64String("${imagePathBase64}"))
|
||||
$languageSetting = [System.Text.Encoding]::UTF8.GetString([Convert]::FromBase64String("${languageBase64}"))
|
||||
|
||||
Add-Type -AssemblyName System.Runtime.WindowsRuntime
|
||||
[void][Windows.Storage.StorageFile, Windows.Storage, ContentType=WindowsRuntime]
|
||||
[void][Windows.Storage.FileAccessMode, Windows.Storage, ContentType=WindowsRuntime]
|
||||
[void][Windows.Graphics.Imaging.BitmapDecoder, Windows.Graphics.Imaging, ContentType=WindowsRuntime]
|
||||
[void][Windows.Graphics.Imaging.SoftwareBitmap, Windows.Graphics.Imaging, ContentType=WindowsRuntime]
|
||||
[void][Windows.Media.Ocr.OcrEngine, Windows.Foundation, ContentType=WindowsRuntime]
|
||||
[void][Windows.Globalization.Language, Windows.Globalization, ContentType=WindowsRuntime]
|
||||
|
||||
$asTaskGeneric = ([System.WindowsRuntimeSystemExtensions].GetMethods() | Where-Object {
|
||||
$_.Name -eq "AsTask" -and $_.IsGenericMethodDefinition -and $_.GetParameters().Count -eq 1
|
||||
})[0]
|
||||
|
||||
function Await-WinRt($operation, [Type]$resultType) {
|
||||
$asTask = $asTaskGeneric.MakeGenericMethod($resultType)
|
||||
$task = $asTask.Invoke($null, @($operation))
|
||||
$task.Wait()
|
||||
return $task.Result
|
||||
}
|
||||
|
||||
function New-OcrEngine($languageSetting) {
|
||||
$languageTags = @()
|
||||
foreach ($item in $languageSetting.Split(",")) {
|
||||
$tag = $item.Trim()
|
||||
if ($tag -eq "vi") { $tag = "vi-VN" }
|
||||
if ($tag -eq "en") { $tag = "en-US" }
|
||||
if ($tag.Length -gt 0) { $languageTags += $tag }
|
||||
}
|
||||
|
||||
foreach ($tag in $languageTags) {
|
||||
try {
|
||||
$language = [Windows.Globalization.Language]::new($tag)
|
||||
$engine = [Windows.Media.Ocr.OcrEngine]::TryCreateFromLanguage($language)
|
||||
if ($null -ne $engine) { return $engine }
|
||||
} catch {}
|
||||
}
|
||||
|
||||
$profileEngine = [Windows.Media.Ocr.OcrEngine]::TryCreateFromUserProfileLanguages()
|
||||
if ($null -ne $profileEngine) { return $profileEngine }
|
||||
return [Windows.Media.Ocr.OcrEngine]::TryCreateFromLanguage([Windows.Globalization.Language]::new("en-US"))
|
||||
}
|
||||
|
||||
function Normalize-OcrText($value) {
|
||||
if ($null -eq $value) { return "" }
|
||||
$text = [string]$value
|
||||
$text = [System.Text.RegularExpressions.Regex]::Replace($text, "[\\x00-\\x1F\\x7F]", " ")
|
||||
return $text.Trim()
|
||||
}
|
||||
|
||||
$file = Await-WinRt ([Windows.Storage.StorageFile]::GetFileFromPathAsync($imagePath)) ([Windows.Storage.StorageFile])
|
||||
$stream = Await-WinRt ($file.OpenAsync([Windows.Storage.FileAccessMode]::Read)) ([Windows.Storage.Streams.IRandomAccessStream])
|
||||
$decoder = Await-WinRt ([Windows.Graphics.Imaging.BitmapDecoder]::CreateAsync($stream)) ([Windows.Graphics.Imaging.BitmapDecoder])
|
||||
$bitmap = Await-WinRt ($decoder.GetSoftwareBitmapAsync()) ([Windows.Graphics.Imaging.SoftwareBitmap])
|
||||
$engine = New-OcrEngine $languageSetting
|
||||
if ($null -eq $engine) { throw "No Windows OCR engine is available." }
|
||||
$result = Await-WinRt ($engine.RecognizeAsync($bitmap)) ([Windows.Media.Ocr.OcrResult])
|
||||
|
||||
$blocks = @()
|
||||
$index = 0
|
||||
foreach ($line in $result.Lines) {
|
||||
foreach ($word in $line.Words) {
|
||||
$rect = $word.BoundingRect
|
||||
$text = Normalize-OcrText $word.Text
|
||||
if ($text.Length -gt 0) {
|
||||
$index += 1
|
||||
$blocks += [PSCustomObject]@{
|
||||
text = $text
|
||||
confidence = 0.75
|
||||
box = [PSCustomObject]@{
|
||||
x = [double]$rect.X
|
||||
y = [double]$rect.Y
|
||||
width = [double]$rect.Width
|
||||
height = [double]$rect.Height
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[PSCustomObject]@{ blocks = $blocks } | ConvertTo-Json -Depth 6 -Compress
|
||||
`;
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
// Lightweight i18n for the Electron main process.
|
||||
// Imports the same JSON translation files used by the renderer.
|
||||
|
||||
import commonAr from "../src/i18n/locales/ar/common.json";
|
||||
import dialogsAr from "../src/i18n/locales/ar/dialogs.json";
|
||||
import commonEn from "../src/i18n/locales/en/common.json";
|
||||
import dialogsEn from "../src/i18n/locales/en/dialogs.json";
|
||||
import commonEs from "../src/i18n/locales/es/common.json";
|
||||
import dialogsEs from "../src/i18n/locales/es/dialogs.json";
|
||||
import commonFr from "../src/i18n/locales/fr/common.json";
|
||||
import dialogsFr from "../src/i18n/locales/fr/dialogs.json";
|
||||
import commonIt from "../src/i18n/locales/it/common.json";
|
||||
import dialogsIt from "../src/i18n/locales/it/dialogs.json";
|
||||
import commonJa from "../src/i18n/locales/ja-JP/common.json";
|
||||
import dialogsJa from "../src/i18n/locales/ja-JP/dialogs.json";
|
||||
import commonKo from "../src/i18n/locales/ko-KR/common.json";
|
||||
import dialogsKo from "../src/i18n/locales/ko-KR/dialogs.json";
|
||||
import commonRu from "../src/i18n/locales/ru/common.json";
|
||||
import dialogsRu from "../src/i18n/locales/ru/dialogs.json";
|
||||
import commonTr from "../src/i18n/locales/tr/common.json";
|
||||
import dialogsTr from "../src/i18n/locales/tr/dialogs.json";
|
||||
import commonVi from "../src/i18n/locales/vi/common.json";
|
||||
import dialogsVi from "../src/i18n/locales/vi/dialogs.json";
|
||||
import commonZh from "../src/i18n/locales/zh-CN/common.json";
|
||||
import dialogsZh from "../src/i18n/locales/zh-CN/dialogs.json";
|
||||
import commonZhTw from "../src/i18n/locales/zh-TW/common.json";
|
||||
import dialogsZhTw from "../src/i18n/locales/zh-TW/dialogs.json";
|
||||
|
||||
type Locale =
|
||||
| "en"
|
||||
| "ar"
|
||||
| "es"
|
||||
| "fr"
|
||||
| "it"
|
||||
| "ja-JP"
|
||||
| "ko-KR"
|
||||
| "ru"
|
||||
| "tr"
|
||||
| "vi"
|
||||
| "zh-CN"
|
||||
| "zh-TW";
|
||||
type Namespace = "common" | "dialogs";
|
||||
type MessageMap = Record<string, unknown>;
|
||||
|
||||
const messages: Record<Locale, Record<Namespace, MessageMap>> = {
|
||||
en: { common: commonEn, dialogs: dialogsEn },
|
||||
ar: { common: commonAr, dialogs: dialogsAr },
|
||||
es: { common: commonEs, dialogs: dialogsEs },
|
||||
fr: { common: commonFr, dialogs: dialogsFr },
|
||||
it: { common: commonIt, dialogs: dialogsIt },
|
||||
"ja-JP": { common: commonJa, dialogs: dialogsJa },
|
||||
"ko-KR": { common: commonKo, dialogs: dialogsKo },
|
||||
ru: { common: commonRu, dialogs: dialogsRu },
|
||||
tr: { common: commonTr, dialogs: dialogsTr },
|
||||
vi: { common: commonVi, dialogs: dialogsVi },
|
||||
"zh-CN": { common: commonZh, dialogs: dialogsZh },
|
||||
"zh-TW": { common: commonZhTw, dialogs: dialogsZhTw },
|
||||
};
|
||||
|
||||
let currentLocale: Locale = "en";
|
||||
|
||||
export function setMainLocale(locale: string) {
|
||||
if (
|
||||
locale === "en" ||
|
||||
locale === "ar" ||
|
||||
locale === "es" ||
|
||||
locale === "fr" ||
|
||||
locale === "it" ||
|
||||
locale === "ja-JP" ||
|
||||
locale === "ko-KR" ||
|
||||
locale === "ru" ||
|
||||
locale === "tr" ||
|
||||
locale === "vi" ||
|
||||
locale === "zh-CN" ||
|
||||
locale === "zh-TW"
|
||||
) {
|
||||
currentLocale = locale;
|
||||
}
|
||||
}
|
||||
|
||||
export function getMainLocale(): Locale {
|
||||
return currentLocale;
|
||||
}
|
||||
|
||||
function getMessageValue(obj: unknown, dotPath: string): string | undefined {
|
||||
const keys = dotPath.split(".");
|
||||
let current: unknown = obj;
|
||||
for (const key of keys) {
|
||||
if (current == null || typeof current !== "object") return undefined;
|
||||
current = (current as Record<string, unknown>)[key];
|
||||
}
|
||||
return typeof current === "string" ? current : undefined;
|
||||
}
|
||||
|
||||
function interpolate(str: string, vars?: Record<string, string | number>): string {
|
||||
if (!vars) return str;
|
||||
return str.replace(/\{\{(\w+)\}\}/g, (_, key: string) => String(vars[key] ?? `{{${key}}}`));
|
||||
}
|
||||
|
||||
export function mainT(
|
||||
namespace: Namespace,
|
||||
key: string,
|
||||
vars?: Record<string, string | number>,
|
||||
): string {
|
||||
const value =
|
||||
getMessageValue(messages[currentLocale]?.[namespace], key) ??
|
||||
getMessageValue(messages.en?.[namespace], key);
|
||||
|
||||
if (value == null) return `${namespace}.${key}`;
|
||||
return interpolate(value, vars);
|
||||
}
|
||||
@@ -0,0 +1,229 @@
|
||||
import { ipcMain } from "electron";
|
||||
import {
|
||||
NATIVE_BRIDGE_CHANNEL,
|
||||
NATIVE_BRIDGE_VERSION,
|
||||
type NativeBridgeErrorCode,
|
||||
type NativeBridgeRequest,
|
||||
type NativeBridgeResponse,
|
||||
type NativePlatform,
|
||||
type ProjectFileResult,
|
||||
type ProjectPathResult,
|
||||
} from "../../src/native/contracts";
|
||||
import type { CursorTelemetryLoadResult } from "../native-bridge/cursor/adapter";
|
||||
import { TelemetryCursorAdapter } from "../native-bridge/cursor/telemetryCursorAdapter";
|
||||
import { CursorService } from "../native-bridge/services/cursorService";
|
||||
import { ProjectService } from "../native-bridge/services/projectService";
|
||||
import { SystemService } from "../native-bridge/services/systemService";
|
||||
import { NativeBridgeStateStore } from "../native-bridge/store";
|
||||
|
||||
export interface NativeBridgeContext {
|
||||
getPlatform: () => NodeJS.Platform;
|
||||
getCurrentProjectPath: () => string | null;
|
||||
getCurrentVideoPath: () => string | null;
|
||||
saveProjectFile: (
|
||||
projectData: unknown,
|
||||
suggestedName?: string,
|
||||
existingProjectPath?: string,
|
||||
) => Promise<ProjectFileResult>;
|
||||
loadProjectFile: () => Promise<ProjectFileResult>;
|
||||
loadCurrentProjectFile: () => Promise<ProjectFileResult>;
|
||||
setCurrentVideoPath: (path: string) => ProjectPathResult | Promise<ProjectPathResult>;
|
||||
getCurrentVideoPathResult: () => ProjectPathResult;
|
||||
clearCurrentVideoPath: () => ProjectPathResult;
|
||||
resolveAssetBasePath: () => string | null;
|
||||
resolveVideoPath: (videoPath?: string | null) => string | null;
|
||||
loadCursorRecordingData: (
|
||||
videoPath: string,
|
||||
) => Promise<import("../../src/native/contracts").CursorRecordingData>;
|
||||
loadCursorTelemetry: (videoPath: string) => Promise<CursorTelemetryLoadResult>;
|
||||
}
|
||||
|
||||
function normalizePlatform(platform: NodeJS.Platform): NativePlatform {
|
||||
if (platform === "darwin" || platform === "win32") {
|
||||
return platform;
|
||||
}
|
||||
|
||||
return "linux";
|
||||
}
|
||||
|
||||
function createMeta(requestId?: string) {
|
||||
return {
|
||||
version: NATIVE_BRIDGE_VERSION,
|
||||
requestId: requestId || `native-${Date.now()}`,
|
||||
timestampMs: Date.now(),
|
||||
} as const;
|
||||
}
|
||||
|
||||
function createSuccessResponse<TData>(requestId: string | undefined, data: TData) {
|
||||
return {
|
||||
ok: true,
|
||||
data,
|
||||
meta: createMeta(requestId),
|
||||
} satisfies NativeBridgeResponse<TData>;
|
||||
}
|
||||
|
||||
function createErrorResponse(
|
||||
requestId: string | undefined,
|
||||
code: NativeBridgeErrorCode,
|
||||
message: string,
|
||||
retryable = false,
|
||||
) {
|
||||
return {
|
||||
ok: false,
|
||||
error: {
|
||||
code,
|
||||
message,
|
||||
retryable,
|
||||
},
|
||||
meta: createMeta(requestId),
|
||||
} satisfies NativeBridgeResponse;
|
||||
}
|
||||
|
||||
function isBridgeRequest(value: unknown): value is NativeBridgeRequest {
|
||||
if (!value || typeof value !== "object") {
|
||||
return false;
|
||||
}
|
||||
|
||||
const candidate = value as Partial<NativeBridgeRequest>;
|
||||
return typeof candidate.domain === "string" && typeof candidate.action === "string";
|
||||
}
|
||||
|
||||
export function registerNativeBridgeHandlers(context: NativeBridgeContext) {
|
||||
ipcMain.removeHandler(NATIVE_BRIDGE_CHANNEL);
|
||||
|
||||
const platform = normalizePlatform(context.getPlatform());
|
||||
const store = new NativeBridgeStateStore(platform);
|
||||
const projectService = new ProjectService({
|
||||
store,
|
||||
getCurrentProjectPath: context.getCurrentProjectPath,
|
||||
getCurrentVideoPath: context.getCurrentVideoPath,
|
||||
saveProjectFile: context.saveProjectFile,
|
||||
loadProjectFile: context.loadProjectFile,
|
||||
loadCurrentProjectFile: context.loadCurrentProjectFile,
|
||||
setCurrentVideoPath: context.setCurrentVideoPath,
|
||||
getCurrentVideoPathResult: context.getCurrentVideoPathResult,
|
||||
clearCurrentVideoPath: context.clearCurrentVideoPath,
|
||||
});
|
||||
const cursorService = new CursorService({
|
||||
store,
|
||||
adapter: new TelemetryCursorAdapter({
|
||||
loadRecordingData: context.loadCursorRecordingData,
|
||||
resolveVideoPath: context.resolveVideoPath,
|
||||
loadTelemetry: context.loadCursorTelemetry,
|
||||
}),
|
||||
});
|
||||
const systemService = new SystemService({
|
||||
store,
|
||||
getPlatform: () => platform,
|
||||
getAssetBasePath: context.resolveAssetBasePath,
|
||||
getCursorCapabilities: () => cursorService.getCapabilities(),
|
||||
});
|
||||
|
||||
ipcMain.handle(NATIVE_BRIDGE_CHANNEL, async (_, request: unknown) => {
|
||||
if (!isBridgeRequest(request)) {
|
||||
return createErrorResponse(undefined, "INVALID_REQUEST", "Invalid native bridge request.");
|
||||
}
|
||||
|
||||
const requestId = request.requestId;
|
||||
const domain = request.domain as string;
|
||||
|
||||
try {
|
||||
switch (request.domain) {
|
||||
case "system": {
|
||||
const action = request.action as string;
|
||||
switch (request.action) {
|
||||
case "getPlatform":
|
||||
return createSuccessResponse(requestId, systemService.getPlatform());
|
||||
case "getAssetBasePath":
|
||||
return createSuccessResponse(requestId, systemService.getAssetBasePath());
|
||||
case "getCapabilities":
|
||||
return createSuccessResponse(requestId, await systemService.getCapabilities());
|
||||
default:
|
||||
return createErrorResponse(
|
||||
requestId,
|
||||
"UNSUPPORTED_ACTION",
|
||||
`Unsupported system action: ${action}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
case "project": {
|
||||
const action = request.action as string;
|
||||
switch (request.action) {
|
||||
case "getCurrentContext":
|
||||
return createSuccessResponse(requestId, projectService.getCurrentContext());
|
||||
case "saveProjectFile":
|
||||
return createSuccessResponse(
|
||||
requestId,
|
||||
await projectService.saveProjectFile(
|
||||
request.payload.projectData,
|
||||
request.payload.suggestedName,
|
||||
request.payload.existingProjectPath,
|
||||
),
|
||||
);
|
||||
case "loadProjectFile":
|
||||
return createSuccessResponse(requestId, await projectService.loadProjectFile());
|
||||
case "loadCurrentProjectFile":
|
||||
return createSuccessResponse(
|
||||
requestId,
|
||||
await projectService.loadCurrentProjectFile(),
|
||||
);
|
||||
case "setCurrentVideoPath":
|
||||
return createSuccessResponse(
|
||||
requestId,
|
||||
await projectService.setCurrentVideoPath(request.payload.path),
|
||||
);
|
||||
case "getCurrentVideoPath":
|
||||
return createSuccessResponse(requestId, projectService.getCurrentVideoPath());
|
||||
case "clearCurrentVideoPath":
|
||||
return createSuccessResponse(requestId, projectService.clearCurrentVideoPath());
|
||||
default:
|
||||
return createErrorResponse(
|
||||
requestId,
|
||||
"UNSUPPORTED_ACTION",
|
||||
`Unsupported project action: ${action}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
case "cursor": {
|
||||
const action = request.action as string;
|
||||
switch (request.action) {
|
||||
case "getCapabilities":
|
||||
return createSuccessResponse(requestId, await cursorService.getCapabilities());
|
||||
case "getTelemetry":
|
||||
return createSuccessResponse(
|
||||
requestId,
|
||||
await cursorService.getTelemetry(request.payload?.videoPath),
|
||||
);
|
||||
case "getRecordingData":
|
||||
return createSuccessResponse(
|
||||
requestId,
|
||||
await cursorService.getRecordingData(request.payload?.videoPath),
|
||||
);
|
||||
default:
|
||||
return createErrorResponse(
|
||||
requestId,
|
||||
"UNSUPPORTED_ACTION",
|
||||
`Unsupported cursor action: ${action}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
return createErrorResponse(
|
||||
requestId,
|
||||
"UNSUPPORTED_ACTION",
|
||||
`Unsupported bridge domain: ${domain}`,
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
return createErrorResponse(
|
||||
requestId,
|
||||
"INTERNAL_ERROR",
|
||||
error instanceof Error ? error.message : "Unknown native bridge error.",
|
||||
true,
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
import { mkdtemp, readFile, rm, stat } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import { RecordingStreamRegistry } from "./recordingStream";
|
||||
|
||||
describe("RecordingStreamRegistry", () => {
|
||||
let dir: string;
|
||||
const pathFor = (name: string) => path.join(dir, name);
|
||||
|
||||
beforeEach(async () => {
|
||||
dir = await mkdtemp(path.join(tmpdir(), "openscreen-stream-"));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(dir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("streams chunks to disk in order and reports streamed on finalize", async () => {
|
||||
const registry = new RecordingStreamRegistry();
|
||||
await registry.open("rec.webm", pathFor("rec.webm"));
|
||||
await registry.append("rec.webm", Buffer.from("hello "));
|
||||
await registry.append("rec.webm", Buffer.from("world"));
|
||||
|
||||
const streamed = await registry.finalize("rec.webm");
|
||||
|
||||
expect(streamed).toBe(true);
|
||||
expect(await readFile(pathFor("rec.webm"), "utf8")).toBe("hello world");
|
||||
// A second finalize has nothing to close.
|
||||
expect(await registry.finalize("rec.webm")).toBe(false);
|
||||
});
|
||||
|
||||
it("reports not-streamed when no stream was opened", async () => {
|
||||
const registry = new RecordingStreamRegistry();
|
||||
expect(await registry.finalize("missing.webm")).toBe(false);
|
||||
expect(registry.has("missing.webm")).toBe(false);
|
||||
});
|
||||
|
||||
it("rejects open when the target path is not writable (open is awaited, not assumed)", async () => {
|
||||
const registry = new RecordingStreamRegistry();
|
||||
// Parent directory does not exist, so createWriteStream emits 'error' on open.
|
||||
await expect(
|
||||
registry.open("rec.webm", path.join(dir, "does-not-exist", "rec.webm")),
|
||||
).rejects.toThrow();
|
||||
// A failed open must not register a stream the renderer would treat as live.
|
||||
expect(registry.has("rec.webm")).toBe(false);
|
||||
});
|
||||
|
||||
it("rejects append when no stream is open", async () => {
|
||||
const registry = new RecordingStreamRegistry();
|
||||
await expect(registry.append("rec.webm", Buffer.from("x"))).rejects.toThrow(
|
||||
/No active recording stream/,
|
||||
);
|
||||
});
|
||||
|
||||
it("discard closes the stream and removes the partial file", async () => {
|
||||
const registry = new RecordingStreamRegistry();
|
||||
await registry.open("rec.webm", pathFor("rec.webm"));
|
||||
await registry.append("rec.webm", Buffer.from("partial"));
|
||||
|
||||
await registry.discard("rec.webm", pathFor("rec.webm"));
|
||||
|
||||
expect(registry.has("rec.webm")).toBe(false);
|
||||
await expect(stat(pathFor("rec.webm"))).rejects.toThrow();
|
||||
// Nothing left to finalize after a discard.
|
||||
expect(await registry.finalize("rec.webm")).toBe(false);
|
||||
});
|
||||
|
||||
it("discard tolerates a missing file", async () => {
|
||||
const registry = new RecordingStreamRegistry();
|
||||
await expect(registry.discard("never.webm", pathFor("never.webm"))).resolves.toBeUndefined();
|
||||
});
|
||||
|
||||
it("opening the same file twice replaces the prior stream", async () => {
|
||||
const registry = new RecordingStreamRegistry();
|
||||
await registry.open("rec.webm", pathFor("rec.webm"));
|
||||
await registry.append("rec.webm", Buffer.from("first"));
|
||||
await registry.open("rec.webm", pathFor("rec.webm"));
|
||||
await registry.append("rec.webm", Buffer.from("second"));
|
||||
await registry.finalize("rec.webm");
|
||||
|
||||
expect(await readFile(pathFor("rec.webm"), "utf8")).toBe("second");
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,147 @@
|
||||
import { createWriteStream, type WriteStream } from "node:fs";
|
||||
import { unlink } from "node:fs/promises";
|
||||
import type { IpcMain } from "electron";
|
||||
|
||||
/**
|
||||
* Owns the lifecycle of on-disk write streams for in-progress recordings, keyed
|
||||
* by the recording's output file name. Browser MediaRecorder chunks are appended
|
||||
* here as they arrive so a long recording never buffers the whole video in the
|
||||
* renderer (the #616 fix).
|
||||
*
|
||||
* The file name is the key because it is the one value the renderer and main
|
||||
* process already exchange and it is globally unique per recording, so there is
|
||||
* no derived/offset key to keep in sync across the IPC boundary.
|
||||
*/
|
||||
export class RecordingStreamRegistry {
|
||||
private readonly streams = new Map<string, WriteStream>();
|
||||
|
||||
/**
|
||||
* Open a write stream and resolve only once the OS confirms it is writable.
|
||||
* Resolving on the `open` event (rather than on `createWriteStream` returning)
|
||||
* means a bad path or permission error rejects here instead of surfacing as a
|
||||
* silent chunk drop later, so the renderer's fallback can take over.
|
||||
*/
|
||||
async open(fileName: string, filePath: string): Promise<void> {
|
||||
await this.endStream(fileName);
|
||||
|
||||
const ws = createWriteStream(filePath, { flags: "w" });
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const onError = (error: Error) => reject(error);
|
||||
ws.once("error", onError);
|
||||
ws.once("open", () => {
|
||||
ws.removeListener("error", onError);
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
// Keep a listener for the stream's lifetime so a late error logs rather
|
||||
// than crashing the main process with an unhandled 'error' event. Per-write
|
||||
// failures still surface through the `append` callback below.
|
||||
ws.on("error", (error) => {
|
||||
console.error(`[recording-stream] ${fileName}:`, error);
|
||||
});
|
||||
|
||||
this.streams.set(fileName, ws);
|
||||
}
|
||||
|
||||
has(fileName: string): boolean {
|
||||
return this.streams.has(fileName);
|
||||
}
|
||||
|
||||
/** Append a chunk; rejects if no stream is open or the write fails. */
|
||||
async append(fileName: string, chunk: Buffer): Promise<void> {
|
||||
const ws = this.streams.get(fileName);
|
||||
if (!ws) {
|
||||
throw new Error(`No active recording stream for ${fileName}`);
|
||||
}
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
ws.write(chunk, (error) => (error ? reject(error) : resolve()));
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Flush and close the stream, keeping the file. Returns whether a stream was
|
||||
* open — i.e. whether the recording was streamed to disk (true) or needs its
|
||||
* in-memory buffer written by the caller (false).
|
||||
*/
|
||||
async finalize(fileName: string): Promise<boolean> {
|
||||
const ws = this.streams.get(fileName);
|
||||
if (!ws) {
|
||||
return false;
|
||||
}
|
||||
this.streams.delete(fileName);
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
ws.end((error?: Error | null) => (error ? reject(error) : resolve()));
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the stream (if any) and delete the partial file. Used when a streamed
|
||||
* recording is discarded or fails before a successful save, so cancelled runs
|
||||
* don't leak file descriptors or orphan partial recordings on disk.
|
||||
*/
|
||||
async discard(fileName: string, filePath: string): Promise<void> {
|
||||
await this.endStream(fileName);
|
||||
await unlink(filePath).catch(() => undefined);
|
||||
}
|
||||
|
||||
private async endStream(fileName: string): Promise<void> {
|
||||
const ws = this.streams.get(fileName);
|
||||
if (!ws) {
|
||||
return;
|
||||
}
|
||||
this.streams.delete(fileName);
|
||||
await new Promise<void>((resolve) => ws.end(() => resolve()));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Register the streaming IPC handlers. Thin wrappers that translate the
|
||||
* registry's throw-on-failure contract into the `{ success, error }` shape the
|
||||
* renderer expects.
|
||||
*/
|
||||
export function registerRecordingStreamHandlers(
|
||||
ipcMain: IpcMain,
|
||||
registry: RecordingStreamRegistry,
|
||||
resolveRecordingOutputPath: (fileName: string) => string,
|
||||
): void {
|
||||
ipcMain.handle(
|
||||
"open-recording-stream",
|
||||
async (_, fileName: string): Promise<{ success: boolean; error?: string }> => {
|
||||
try {
|
||||
await registry.open(fileName, resolveRecordingOutputPath(fileName));
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: String(error) };
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"append-recording-chunk",
|
||||
async (
|
||||
_,
|
||||
fileName: string,
|
||||
chunk: ArrayBuffer,
|
||||
): Promise<{ success: boolean; error?: string }> => {
|
||||
try {
|
||||
await registry.append(fileName, Buffer.from(chunk));
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: String(error) };
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
ipcMain.handle(
|
||||
"close-recording-stream",
|
||||
async (_, fileName: string): Promise<{ success: boolean; error?: string }> => {
|
||||
try {
|
||||
await registry.discard(fileName, resolveRecordingOutputPath(fileName));
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
return { success: false, error: String(error) };
|
||||
}
|
||||
},
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,549 @@
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import {
|
||||
app,
|
||||
BrowserWindow,
|
||||
ipcMain,
|
||||
Menu,
|
||||
nativeImage,
|
||||
session,
|
||||
systemPreferences,
|
||||
Tray,
|
||||
} from "electron";
|
||||
import { mainT, setMainLocale } from "./i18n";
|
||||
import { getSelectedDesktopSource, registerIpcHandlers } from "./ipc/handlers";
|
||||
import {
|
||||
createCountdownOverlayWindow,
|
||||
createEditorWindow,
|
||||
createHudOverlayWindow,
|
||||
createSourceSelectorWindow,
|
||||
} from "./windows";
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
// Use Screen & System Audio Recording permissions instead of CoreAudio Tap API on macOS.
|
||||
// CoreAudio Tap requires NSAudioCaptureUsageDescription in the parent app's Info.plist,
|
||||
// which doesn't work when running from a terminal/IDE during development, makes my life easier
|
||||
if (process.platform === "darwin") {
|
||||
app.commandLine.appendSwitch("disable-features", "MacCatapLoopbackAudioForScreenShare");
|
||||
}
|
||||
|
||||
// Enable Wayland support for proper screen capture and window management
|
||||
// on Wayland compositors (Hyprland, GNOME, KDE, etc.)
|
||||
if (process.platform === "linux") {
|
||||
const isWayland =
|
||||
process.env.XDG_SESSION_TYPE === "wayland" || process.env.WAYLAND_DISPLAY !== undefined;
|
||||
if (isWayland) {
|
||||
app.commandLine.appendSwitch("ozone-platform", "wayland");
|
||||
// Enable WebRTCPipeWireCapturer for screen capture on Wayland
|
||||
app.commandLine.appendSwitch("enable-features", "WaylandWindowDrag,WebRTCPipeWireCapturer");
|
||||
}
|
||||
}
|
||||
|
||||
export const RECORDINGS_DIR = path.join(app.getPath("userData"), "recordings");
|
||||
|
||||
async function ensureRecordingsDir() {
|
||||
try {
|
||||
await fs.mkdir(RECORDINGS_DIR, { recursive: true });
|
||||
console.log("RECORDINGS_DIR:", RECORDINGS_DIR);
|
||||
console.log("User Data Path:", app.getPath("userData"));
|
||||
} catch (error) {
|
||||
console.error("Failed to create recordings directory:", error);
|
||||
}
|
||||
}
|
||||
|
||||
// The built directory structure
|
||||
//
|
||||
// ├─┬─┬ dist
|
||||
// │ │ └── index.html
|
||||
// │ │
|
||||
// │ ├─┬ dist-electron
|
||||
// │ │ ├── main.js
|
||||
// │ │ └── preload.mjs
|
||||
// │
|
||||
process.env.APP_ROOT = path.join(__dirname, "..");
|
||||
|
||||
// Use ['ENV_NAME'] avoid vite:define plugin - Vite@2.x
|
||||
export const VITE_DEV_SERVER_URL = process.env["VITE_DEV_SERVER_URL"];
|
||||
export const MAIN_DIST = path.join(process.env.APP_ROOT, "dist-electron");
|
||||
export const RENDERER_DIST = path.join(process.env.APP_ROOT, "dist");
|
||||
|
||||
process.env.VITE_PUBLIC = VITE_DEV_SERVER_URL
|
||||
? path.join(process.env.APP_ROOT, "public")
|
||||
: RENDERER_DIST;
|
||||
|
||||
// Window references
|
||||
let mainWindow: BrowserWindow | null = null;
|
||||
let sourceSelectorWindow: BrowserWindow | null = null;
|
||||
let countdownOverlayWindow: BrowserWindow | null = null;
|
||||
let tray: Tray | null = null;
|
||||
let selectedSourceName = "";
|
||||
const isMac = process.platform === "darwin";
|
||||
const trayIconSize = isMac ? 16 : 24;
|
||||
|
||||
// Tray Icons
|
||||
const defaultTrayIcon = getTrayIcon("openscreen.png", trayIconSize);
|
||||
const recordingTrayIcon = getTrayIcon("rec-button.png", trayIconSize);
|
||||
|
||||
function createWindow() {
|
||||
mainWindow = createHudOverlayWindow();
|
||||
}
|
||||
|
||||
function showMainWindow() {
|
||||
if (mainWindow && !mainWindow.isDestroyed()) {
|
||||
if (mainWindow.isMinimized()) {
|
||||
mainWindow.restore();
|
||||
}
|
||||
mainWindow.show();
|
||||
mainWindow.focus();
|
||||
return;
|
||||
}
|
||||
|
||||
createWindow();
|
||||
}
|
||||
|
||||
function isEditorWindow(window: BrowserWindow) {
|
||||
return window.webContents.getURL().includes("windowType=editor");
|
||||
}
|
||||
|
||||
function sendEditorMenuAction(
|
||||
channel: "menu-load-project" | "menu-save-project" | "menu-save-project-as",
|
||||
) {
|
||||
let targetWindow = BrowserWindow.getFocusedWindow() ?? mainWindow;
|
||||
|
||||
if (!targetWindow || targetWindow.isDestroyed() || !isEditorWindow(targetWindow)) {
|
||||
createEditorWindowWrapper();
|
||||
targetWindow = mainWindow;
|
||||
if (!targetWindow || targetWindow.isDestroyed()) return;
|
||||
|
||||
targetWindow.webContents.once("did-finish-load", () => {
|
||||
if (!targetWindow || targetWindow.isDestroyed()) return;
|
||||
targetWindow.webContents.send(channel);
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
targetWindow.webContents.send(channel);
|
||||
}
|
||||
|
||||
function setupApplicationMenu() {
|
||||
const isMac = process.platform === "darwin";
|
||||
const template: Electron.MenuItemConstructorOptions[] = [];
|
||||
|
||||
if (isMac) {
|
||||
template.push({
|
||||
label: app.name,
|
||||
submenu: [
|
||||
{
|
||||
role: "about",
|
||||
label: mainT("common", "actions.about") || "About OpenScreen",
|
||||
},
|
||||
{ type: "separator" },
|
||||
{
|
||||
role: "services",
|
||||
label: mainT("common", "actions.services") || "Services",
|
||||
},
|
||||
{ type: "separator" },
|
||||
{
|
||||
role: "hide",
|
||||
label: mainT("common", "actions.hide") || "Hide OpenScreen",
|
||||
},
|
||||
{
|
||||
role: "hideOthers",
|
||||
label: mainT("common", "actions.hideOthers") || "Hide Others",
|
||||
},
|
||||
{
|
||||
role: "unhide",
|
||||
label: mainT("common", "actions.unhide") || "Show All",
|
||||
},
|
||||
{ type: "separator" },
|
||||
{ role: "quit", label: mainT("common", "actions.quit") || "Quit" },
|
||||
],
|
||||
});
|
||||
}
|
||||
|
||||
template.push(
|
||||
{
|
||||
label: mainT("common", "actions.file") || "File",
|
||||
submenu: [
|
||||
{
|
||||
label: mainT("dialogs", "unsavedChanges.loadProject") || "Load Project…",
|
||||
accelerator: "CmdOrCtrl+O",
|
||||
click: () => sendEditorMenuAction("menu-load-project"),
|
||||
},
|
||||
{
|
||||
label: mainT("dialogs", "unsavedChanges.saveProject") || "Save Project…",
|
||||
accelerator: "CmdOrCtrl+S",
|
||||
click: () => sendEditorMenuAction("menu-save-project"),
|
||||
},
|
||||
{
|
||||
label: mainT("dialogs", "unsavedChanges.saveProjectAs") || "Save Project As…",
|
||||
accelerator: "CmdOrCtrl+Shift+S",
|
||||
click: () => sendEditorMenuAction("menu-save-project-as"),
|
||||
},
|
||||
...(isMac
|
||||
? []
|
||||
: [
|
||||
{ type: "separator" as const },
|
||||
{
|
||||
role: "quit" as const,
|
||||
label: mainT("common", "actions.quit") || "Quit",
|
||||
},
|
||||
]),
|
||||
],
|
||||
},
|
||||
{
|
||||
label: mainT("common", "actions.edit") || "Edit",
|
||||
submenu: [
|
||||
{ role: "undo", label: mainT("common", "actions.undo") || "Undo" },
|
||||
{ role: "redo", label: mainT("common", "actions.redo") || "Redo" },
|
||||
{ type: "separator" },
|
||||
{ role: "cut", label: mainT("common", "actions.cut") || "Cut" },
|
||||
{ role: "copy", label: mainT("common", "actions.copy") || "Copy" },
|
||||
{ role: "paste", label: mainT("common", "actions.paste") || "Paste" },
|
||||
{
|
||||
role: "selectAll",
|
||||
label: mainT("common", "actions.selectAll") || "Select All",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
label: mainT("common", "actions.view") || "View",
|
||||
submenu: [
|
||||
{
|
||||
role: "reload",
|
||||
label: mainT("common", "actions.reload") || "Reload",
|
||||
},
|
||||
{
|
||||
role: "forceReload",
|
||||
label: mainT("common", "actions.forceReload") || "Force Reload",
|
||||
},
|
||||
{
|
||||
role: "toggleDevTools",
|
||||
label: mainT("common", "actions.toggleDevTools") || "Toggle Developer Tools",
|
||||
},
|
||||
{ type: "separator" },
|
||||
{
|
||||
role: "resetZoom",
|
||||
label: mainT("common", "actions.actualSize") || "Actual Size",
|
||||
},
|
||||
{
|
||||
role: "zoomIn",
|
||||
label: mainT("common", "actions.zoomIn") || "Zoom In",
|
||||
},
|
||||
{
|
||||
role: "zoomOut",
|
||||
label: mainT("common", "actions.zoomOut") || "Zoom Out",
|
||||
},
|
||||
{ type: "separator" },
|
||||
{
|
||||
role: "togglefullscreen",
|
||||
label: mainT("common", "actions.toggleFullScreen") || "Toggle Full Screen",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
label: mainT("common", "actions.window") || "Window",
|
||||
submenu: isMac
|
||||
? [
|
||||
{
|
||||
role: "minimize",
|
||||
label: mainT("common", "actions.minimize") || "Minimize",
|
||||
},
|
||||
{ role: "zoom" },
|
||||
{ type: "separator" },
|
||||
{ role: "front" },
|
||||
]
|
||||
: [
|
||||
{
|
||||
role: "minimize",
|
||||
label: mainT("common", "actions.minimize") || "Minimize",
|
||||
},
|
||||
{
|
||||
role: "close",
|
||||
label: mainT("common", "actions.close") || "Close",
|
||||
},
|
||||
],
|
||||
},
|
||||
);
|
||||
|
||||
const menu = Menu.buildFromTemplate(template);
|
||||
Menu.setApplicationMenu(menu);
|
||||
}
|
||||
|
||||
function createTray() {
|
||||
tray = new Tray(defaultTrayIcon);
|
||||
tray.on("click", () => {
|
||||
showMainWindow();
|
||||
});
|
||||
tray.on("double-click", () => {
|
||||
showMainWindow();
|
||||
});
|
||||
}
|
||||
|
||||
function getTrayIcon(filename: string, size: number) {
|
||||
return nativeImage
|
||||
.createFromPath(path.join(process.env.VITE_PUBLIC || RENDERER_DIST, filename))
|
||||
.resize({
|
||||
width: size,
|
||||
height: size,
|
||||
quality: "best",
|
||||
});
|
||||
}
|
||||
|
||||
function updateTrayMenu(recording: boolean = false) {
|
||||
if (!tray) return;
|
||||
const trayIcon = recording ? recordingTrayIcon : defaultTrayIcon;
|
||||
const trayToolTip = recording
|
||||
? mainT("common", "actions.recordingStatus", {
|
||||
source: selectedSourceName,
|
||||
}) || `Recording: ${selectedSourceName}`
|
||||
: "OpenScreen";
|
||||
const menuTemplate = recording
|
||||
? [
|
||||
{
|
||||
label: mainT("common", "actions.stopRecording") || "Stop Recording",
|
||||
click: () => {
|
||||
if (mainWindow && !mainWindow.isDestroyed()) {
|
||||
mainWindow.webContents.send("stop-recording-from-tray");
|
||||
}
|
||||
},
|
||||
},
|
||||
]
|
||||
: [
|
||||
{
|
||||
label: mainT("common", "actions.open") || "Open",
|
||||
click: () => {
|
||||
showMainWindow();
|
||||
},
|
||||
},
|
||||
{
|
||||
label: mainT("common", "actions.quit") || "Quit",
|
||||
click: () => {
|
||||
app.quit();
|
||||
},
|
||||
},
|
||||
];
|
||||
tray.setImage(trayIcon);
|
||||
tray.setToolTip(trayToolTip);
|
||||
tray.setContextMenu(Menu.buildFromTemplate(menuTemplate));
|
||||
}
|
||||
|
||||
let editorHasUnsavedChanges = false;
|
||||
let isForceClosing = false;
|
||||
let isCloseConfirmInFlight = false;
|
||||
|
||||
ipcMain.on("set-has-unsaved-changes", (_, hasChanges: boolean) => {
|
||||
editorHasUnsavedChanges = hasChanges;
|
||||
});
|
||||
|
||||
function forceCloseEditorWindow(windowToClose: BrowserWindow | null) {
|
||||
if (!windowToClose || windowToClose.isDestroyed()) return;
|
||||
|
||||
isForceClosing = true;
|
||||
setImmediate(() => {
|
||||
try {
|
||||
if (!windowToClose.isDestroyed()) {
|
||||
windowToClose.close();
|
||||
}
|
||||
} finally {
|
||||
isForceClosing = false;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function createEditorWindowWrapper() {
|
||||
if (mainWindow) {
|
||||
isForceClosing = true;
|
||||
mainWindow.close();
|
||||
isForceClosing = false;
|
||||
mainWindow = null;
|
||||
}
|
||||
mainWindow = createEditorWindow();
|
||||
editorHasUnsavedChanges = false;
|
||||
|
||||
mainWindow.on("close", (event) => {
|
||||
if (isForceClosing || !editorHasUnsavedChanges || isCloseConfirmInFlight) return;
|
||||
|
||||
event.preventDefault();
|
||||
isCloseConfirmInFlight = true;
|
||||
|
||||
const windowToClose = mainWindow;
|
||||
if (!windowToClose || windowToClose.isDestroyed()) return;
|
||||
|
||||
// Ask renderer to show the custom in-app dialog
|
||||
windowToClose.webContents.send("request-close-confirm");
|
||||
|
||||
ipcMain.once("close-confirm-response", (event, choice: "save" | "discard" | "cancel") => {
|
||||
if (event.sender.id !== windowToClose?.webContents.id) return;
|
||||
isCloseConfirmInFlight = false;
|
||||
if (!windowToClose || windowToClose.isDestroyed()) return;
|
||||
|
||||
if (choice === "save") {
|
||||
// Tell renderer to save the project, then close when done
|
||||
windowToClose.webContents.send("request-save-before-close");
|
||||
ipcMain.once("save-before-close-done", (event, shouldClose: boolean) => {
|
||||
if (event.sender.id !== windowToClose?.webContents.id) return;
|
||||
if (!shouldClose) return;
|
||||
forceCloseEditorWindow(windowToClose);
|
||||
});
|
||||
} else if (choice === "discard") {
|
||||
forceCloseEditorWindow(windowToClose);
|
||||
}
|
||||
// "cancel": flag reset, window stays open
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function createSourceSelectorWindowWrapper() {
|
||||
sourceSelectorWindow = createSourceSelectorWindow();
|
||||
sourceSelectorWindow.on("closed", () => {
|
||||
sourceSelectorWindow = null;
|
||||
});
|
||||
return sourceSelectorWindow;
|
||||
}
|
||||
|
||||
function createCountdownOverlayWindowWrapper() {
|
||||
if (countdownOverlayWindow && !countdownOverlayWindow.isDestroyed()) {
|
||||
return countdownOverlayWindow;
|
||||
}
|
||||
|
||||
countdownOverlayWindow = createCountdownOverlayWindow();
|
||||
countdownOverlayWindow.on("closed", () => {
|
||||
countdownOverlayWindow = null;
|
||||
});
|
||||
return countdownOverlayWindow;
|
||||
}
|
||||
|
||||
// Closing every window quits the app entirely (tray icon goes too).
|
||||
// The in-app "Return to Recorder" button covers the editor → HUD round-trip,
|
||||
// so closing the last window is an explicit "I'm done" signal.
|
||||
app.on("window-all-closed", () => {
|
||||
app.quit();
|
||||
});
|
||||
|
||||
app.on("activate", () => {
|
||||
// On OS X it's common to re-create a window in the app when the
|
||||
// dock icon is clicked and there are no other windows open.
|
||||
const hasVisibleWindow = BrowserWindow.getAllWindows().some((window) => {
|
||||
if (window.isDestroyed() || !window.isVisible()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const url = window.webContents.getURL();
|
||||
const isCountdownOverlayWindow = url.includes("windowType=countdown-overlay");
|
||||
return !isCountdownOverlayWindow;
|
||||
});
|
||||
if (!hasVisibleWindow) {
|
||||
showMainWindow();
|
||||
}
|
||||
});
|
||||
|
||||
// Register all IPC handlers when app is ready
|
||||
app.whenReady().then(async () => {
|
||||
// Force the app into "regular" activation policy so the Dock icon appears.
|
||||
// The HUD overlay (transparent + frameless + skipTaskbar) is the first
|
||||
// window we open, and AppKit otherwise classifies us as an accessory app.
|
||||
if (process.platform === "darwin") {
|
||||
app.dock?.show();
|
||||
}
|
||||
|
||||
// Allow microphone/media/screen permission checks
|
||||
session.defaultSession.setPermissionCheckHandler((_webContents, permission) => {
|
||||
const allowed = [
|
||||
"media",
|
||||
"audioCapture",
|
||||
"microphone",
|
||||
"videoCapture",
|
||||
"camera",
|
||||
"screen",
|
||||
"display-capture",
|
||||
];
|
||||
return allowed.includes(permission);
|
||||
});
|
||||
|
||||
session.defaultSession.setPermissionRequestHandler((_webContents, permission, callback) => {
|
||||
const allowed = [
|
||||
"media",
|
||||
"audioCapture",
|
||||
"microphone",
|
||||
"videoCapture",
|
||||
"camera",
|
||||
"screen",
|
||||
"display-capture",
|
||||
];
|
||||
callback(allowed.includes(permission));
|
||||
});
|
||||
|
||||
session.defaultSession.setDisplayMediaRequestHandler(
|
||||
(request, callback) => {
|
||||
const source = getSelectedDesktopSource();
|
||||
if (!request.videoRequested || !source) {
|
||||
callback({});
|
||||
return;
|
||||
}
|
||||
|
||||
callback({
|
||||
video: source,
|
||||
...(request.audioRequested && process.platform === "win32" ? { audio: "loopback" } : {}),
|
||||
});
|
||||
},
|
||||
{ useSystemPicker: false },
|
||||
);
|
||||
|
||||
// Request microphone permission from macOS. Screen Recording is requested
|
||||
// lazily from the source-picker action so the system prompt is not hidden
|
||||
// behind OpenScreen's source selector window.
|
||||
if (process.platform === "darwin") {
|
||||
const micStatus = systemPreferences.getMediaAccessStatus("microphone");
|
||||
if (micStatus !== "granted") {
|
||||
await systemPreferences.askForMediaAccess("microphone");
|
||||
}
|
||||
}
|
||||
|
||||
// Listen for HUD overlay quit event (macOS only)
|
||||
ipcMain.on("hud-overlay-close", () => {
|
||||
app.quit();
|
||||
});
|
||||
ipcMain.handle("set-locale", (_, locale: string) => {
|
||||
setMainLocale(locale);
|
||||
setupApplicationMenu();
|
||||
updateTrayMenu();
|
||||
});
|
||||
|
||||
createTray();
|
||||
updateTrayMenu();
|
||||
setupApplicationMenu();
|
||||
// Ensure recordings directory exists
|
||||
await ensureRecordingsDir();
|
||||
|
||||
function switchToHudWrapper() {
|
||||
if (mainWindow) {
|
||||
isForceClosing = true;
|
||||
mainWindow.close();
|
||||
isForceClosing = false;
|
||||
mainWindow = null;
|
||||
}
|
||||
showMainWindow();
|
||||
}
|
||||
|
||||
registerIpcHandlers(
|
||||
createEditorWindowWrapper,
|
||||
createSourceSelectorWindowWrapper,
|
||||
createCountdownOverlayWindowWrapper,
|
||||
() => mainWindow,
|
||||
() => sourceSelectorWindow,
|
||||
() => countdownOverlayWindow,
|
||||
(recording: boolean, sourceName: string) => {
|
||||
selectedSourceName = sourceName;
|
||||
if (!tray) createTray();
|
||||
updateTrayMenu(recording);
|
||||
if (!recording) {
|
||||
showMainWindow();
|
||||
}
|
||||
},
|
||||
switchToHudWrapper,
|
||||
);
|
||||
createWindow();
|
||||
});
|
||||
@@ -0,0 +1,20 @@
|
||||
import type {
|
||||
CursorCapabilities,
|
||||
CursorProviderKind,
|
||||
CursorRecordingData,
|
||||
CursorTelemetryPoint,
|
||||
} from "../../../src/native/contracts";
|
||||
|
||||
export interface CursorTelemetryLoadResult {
|
||||
success: boolean;
|
||||
samples: CursorTelemetryPoint[];
|
||||
message?: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface CursorNativeAdapter {
|
||||
readonly kind: CursorProviderKind;
|
||||
getCapabilities(): Promise<CursorCapabilities>;
|
||||
getRecordingData(videoPath?: string | null): Promise<CursorRecordingData>;
|
||||
getTelemetry(videoPath?: string | null): Promise<CursorTelemetryLoadResult>;
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
import type { Rectangle } from "electron";
|
||||
import { MacNativeCursorRecordingSession } from "./macNativeCursorRecordingSession";
|
||||
import type { CursorRecordingSession } from "./session";
|
||||
import { TelemetryRecordingSession } from "./telemetryRecordingSession";
|
||||
import { WindowsNativeRecordingSession } from "./windowsNativeRecordingSession";
|
||||
|
||||
interface CreateCursorRecordingSessionOptions {
|
||||
getDisplayBounds: () => Rectangle | null;
|
||||
maxSamples: number;
|
||||
platform: NodeJS.Platform;
|
||||
sampleIntervalMs: number;
|
||||
sourceId?: string | null;
|
||||
startTimeMs?: number;
|
||||
}
|
||||
|
||||
export function createCursorRecordingSession(
|
||||
options: CreateCursorRecordingSessionOptions,
|
||||
): CursorRecordingSession {
|
||||
if (options.platform === "win32") {
|
||||
return new WindowsNativeRecordingSession({
|
||||
getDisplayBounds: options.getDisplayBounds,
|
||||
maxSamples: options.maxSamples,
|
||||
sampleIntervalMs: options.sampleIntervalMs,
|
||||
sourceId: options.sourceId,
|
||||
startTimeMs: options.startTimeMs,
|
||||
});
|
||||
}
|
||||
|
||||
if (options.platform === "darwin") {
|
||||
return new MacNativeCursorRecordingSession({
|
||||
getDisplayBounds: options.getDisplayBounds,
|
||||
maxSamples: options.maxSamples,
|
||||
sampleIntervalMs: options.sampleIntervalMs,
|
||||
startTimeMs: options.startTimeMs,
|
||||
});
|
||||
}
|
||||
|
||||
// Linux: capture cursor positions via Electron's `screen` API on an interval.
|
||||
// No cursor sprites/assets and no clicks — just position telemetry.
|
||||
return new TelemetryRecordingSession({
|
||||
getDisplayBounds: options.getDisplayBounds,
|
||||
maxSamples: options.maxSamples,
|
||||
sampleIntervalMs: options.sampleIntervalMs,
|
||||
startTimeMs: options.startTimeMs,
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,411 @@
|
||||
import { type ChildProcessByStdio, spawn } from "node:child_process";
|
||||
import { accessSync, constants as fsConstants } from "node:fs";
|
||||
import path from "node:path";
|
||||
import type { Readable } from "node:stream";
|
||||
import { type Rectangle, screen, systemPreferences } from "electron";
|
||||
import type {
|
||||
CursorRecordingData,
|
||||
CursorRecordingSample,
|
||||
NativeCursorType,
|
||||
} from "../../../../src/native/contracts";
|
||||
import type { CursorRecordingSession } from "./session";
|
||||
|
||||
interface MacNativeCursorRecordingSessionOptions {
|
||||
getDisplayBounds: () => Rectangle | null;
|
||||
maxSamples: number;
|
||||
sampleIntervalMs: number;
|
||||
startTimeMs?: number;
|
||||
}
|
||||
|
||||
type MacCursorEvent =
|
||||
| {
|
||||
type: "ready";
|
||||
timestampMs: number;
|
||||
accessibilityTrusted?: boolean;
|
||||
mouseTapReady?: boolean;
|
||||
}
|
||||
| {
|
||||
type: "sample";
|
||||
timestampMs: number;
|
||||
cursorType?: NativeCursorType | null;
|
||||
leftButtonDown?: boolean;
|
||||
leftButtonPressed?: boolean;
|
||||
leftButtonReleased?: boolean;
|
||||
};
|
||||
|
||||
const HELPER_NAME = "openscreen-macos-cursor-helper";
|
||||
const READY_TIMEOUT_MS = 5_000;
|
||||
|
||||
function helperCandidates() {
|
||||
const envPath = process.env.OPENSCREEN_MAC_CURSOR_HELPER_EXE?.trim();
|
||||
const appRoot = process.env.APP_ROOT ? path.resolve(process.env.APP_ROOT) : process.cwd();
|
||||
const archTag = process.arch === "arm64" ? "darwin-arm64" : "darwin-x64";
|
||||
const resourceRoot =
|
||||
typeof process.resourcesPath === "string"
|
||||
? process.resourcesPath
|
||||
: path.join(appRoot, "resources");
|
||||
|
||||
return [
|
||||
envPath,
|
||||
path.join(appRoot, "electron", "native", "screencapturekit", "build", HELPER_NAME),
|
||||
path.join(appRoot, "electron", "native", "bin", archTag, HELPER_NAME),
|
||||
path.join(resourceRoot, "electron", "native", "bin", archTag, HELPER_NAME),
|
||||
].filter((candidate): candidate is string => Boolean(candidate));
|
||||
}
|
||||
|
||||
export function findMacCursorHelperPath() {
|
||||
for (const candidate of helperCandidates()) {
|
||||
try {
|
||||
accessSync(candidate, fsConstants.X_OK);
|
||||
return candidate;
|
||||
} catch {
|
||||
// Try the next helper location.
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function requestMacCursorAccessibilityAccess() {
|
||||
if (process.platform !== "darwin") {
|
||||
return { success: true, granted: true, status: "granted" };
|
||||
}
|
||||
|
||||
try {
|
||||
systemPreferences.isTrustedAccessibilityClient(true);
|
||||
} catch {
|
||||
// Continue with helper probing; it can trigger the same macOS prompt.
|
||||
}
|
||||
|
||||
const helperPath = findMacCursorHelperPath();
|
||||
if (!helperPath) {
|
||||
return { success: true, granted: false, status: "missing-helper" };
|
||||
}
|
||||
|
||||
return new Promise<{ success: boolean; granted: boolean; status: string; error?: string }>(
|
||||
(resolve) => {
|
||||
const child = spawn(helperPath, [JSON.stringify({ sampleIntervalMs: 250 })], {
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
let settled = false;
|
||||
let lineBuffer = "";
|
||||
const finish = (result: {
|
||||
success: boolean;
|
||||
granted: boolean;
|
||||
status: string;
|
||||
error?: string;
|
||||
}) => {
|
||||
if (settled) {
|
||||
return;
|
||||
}
|
||||
settled = true;
|
||||
clearTimeout(timer);
|
||||
if (!child.killed) {
|
||||
child.kill("SIGTERM");
|
||||
}
|
||||
resolve(result);
|
||||
};
|
||||
const timer = setTimeout(() => {
|
||||
finish({
|
||||
success: false,
|
||||
granted: false,
|
||||
status: "timeout",
|
||||
error: "Timed out waiting for macOS cursor helper",
|
||||
});
|
||||
}, READY_TIMEOUT_MS);
|
||||
|
||||
child.stdout.setEncoding("utf8");
|
||||
child.stdout.on("data", (chunk: string) => {
|
||||
lineBuffer += chunk;
|
||||
const lines = lineBuffer.split(/\r?\n/);
|
||||
lineBuffer = lines.pop() ?? "";
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
const event = JSON.parse(trimmed) as MacCursorEvent;
|
||||
if (event.type === "ready") {
|
||||
finish({
|
||||
success: true,
|
||||
granted: event.accessibilityTrusted === true,
|
||||
status: event.accessibilityTrusted === true ? "granted" : "not-determined",
|
||||
});
|
||||
return;
|
||||
}
|
||||
} catch {
|
||||
// Ignore non-JSON helper output.
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
child.once("error", (error) => {
|
||||
finish({
|
||||
success: false,
|
||||
granted: false,
|
||||
status: "error",
|
||||
error: error.message,
|
||||
});
|
||||
});
|
||||
child.once("exit", (code, signal) => {
|
||||
finish({
|
||||
success: false,
|
||||
granted: false,
|
||||
status: "exited",
|
||||
error: `macOS cursor helper exited before ready (code=${code}, signal=${signal})`,
|
||||
});
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
function clamp(value: number, min: number, max: number) {
|
||||
return Math.min(max, Math.max(min, value));
|
||||
}
|
||||
|
||||
function normalizeCursorType(value: unknown): NativeCursorType | null {
|
||||
return value === "arrow" || value === "pointer" || value === "text" ? value : null;
|
||||
}
|
||||
|
||||
export class MacNativeCursorRecordingSession implements CursorRecordingSession {
|
||||
private samples: CursorRecordingSample[] = [];
|
||||
private process: ChildProcessByStdio<null, Readable, Readable> | null = null;
|
||||
private lineBuffer = "";
|
||||
private startTimeMs = 0;
|
||||
private fallbackInterval: NodeJS.Timeout | null = null;
|
||||
private readyResolve: (() => void) | null = null;
|
||||
private readyReject: ((error: Error) => void) | null = null;
|
||||
private readyTimer: NodeJS.Timeout | null = null;
|
||||
private previousLeftButtonDown = false;
|
||||
private consecutiveOutsideSamples = 0;
|
||||
// Only hide after this many consecutive out-of-bounds samples (≈100ms at 33ms interval).
|
||||
// Fast swipes that briefly exit the display are clipped by clip-path instead of disappearing.
|
||||
private static readonly OUTSIDE_HIDE_THRESHOLD = 3;
|
||||
|
||||
constructor(private readonly options: MacNativeCursorRecordingSessionOptions) {}
|
||||
|
||||
async start(): Promise<void> {
|
||||
this.samples = [];
|
||||
this.lineBuffer = "";
|
||||
this.startTimeMs = this.options.startTimeMs ?? Date.now();
|
||||
this.previousLeftButtonDown = false;
|
||||
this.consecutiveOutsideSamples = 0;
|
||||
|
||||
try {
|
||||
systemPreferences.isTrustedAccessibilityClient(true);
|
||||
} catch {
|
||||
// Link cursor detection degrades to arrow when Accessibility is unavailable.
|
||||
}
|
||||
|
||||
const helperPath = findMacCursorHelperPath();
|
||||
if (!helperPath) {
|
||||
this.startPositionOnlyFallback();
|
||||
return;
|
||||
}
|
||||
|
||||
const child = spawn(
|
||||
helperPath,
|
||||
[
|
||||
JSON.stringify({
|
||||
sampleIntervalMs: this.options.sampleIntervalMs,
|
||||
}),
|
||||
],
|
||||
{
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
},
|
||||
);
|
||||
this.process = child;
|
||||
|
||||
child.stdout.setEncoding("utf8");
|
||||
child.stdout.on("data", (chunk: string) => this.handleStdoutChunk(chunk));
|
||||
child.stderr.setEncoding("utf8");
|
||||
child.stderr.on("data", (chunk: string) => {
|
||||
const message = chunk.trim();
|
||||
if (message) {
|
||||
console.error("[cursor-macos]", message);
|
||||
}
|
||||
});
|
||||
child.once("exit", (code, signal) => {
|
||||
this.rejectReady(
|
||||
new Error(`macOS cursor helper exited before ready (code=${code}, signal=${signal})`),
|
||||
);
|
||||
this.process = null;
|
||||
});
|
||||
child.once("error", (error) => {
|
||||
this.rejectReady(error);
|
||||
this.process = null;
|
||||
});
|
||||
|
||||
try {
|
||||
await this.waitUntilReady();
|
||||
} catch (error) {
|
||||
this.killHelperProcess(child);
|
||||
this.process = null;
|
||||
console.warn("[cursor-macos] falling back to position-only cursor telemetry:", error);
|
||||
this.startPositionOnlyFallback();
|
||||
}
|
||||
}
|
||||
|
||||
async stop(): Promise<CursorRecordingData> {
|
||||
const child = this.process;
|
||||
this.process = null;
|
||||
this.clearReadyState();
|
||||
|
||||
if (this.fallbackInterval) {
|
||||
clearInterval(this.fallbackInterval);
|
||||
this.fallbackInterval = null;
|
||||
}
|
||||
|
||||
if (child) {
|
||||
this.killHelperProcess(child);
|
||||
}
|
||||
|
||||
return {
|
||||
version: 2,
|
||||
provider: "none",
|
||||
samples: this.samples,
|
||||
assets: [],
|
||||
};
|
||||
}
|
||||
|
||||
private startPositionOnlyFallback() {
|
||||
this.captureSample(Date.now(), null, false, false, false);
|
||||
this.fallbackInterval = setInterval(() => {
|
||||
this.captureSample(Date.now(), null, false, false, false);
|
||||
}, this.options.sampleIntervalMs);
|
||||
}
|
||||
|
||||
private handleStdoutChunk(chunk: string) {
|
||||
this.lineBuffer += chunk;
|
||||
const lines = this.lineBuffer.split(/\r?\n/);
|
||||
this.lineBuffer = lines.pop() ?? "";
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmedLine = line.trim();
|
||||
if (!trimmedLine) {
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
this.handleEvent(JSON.parse(trimmedLine) as MacCursorEvent);
|
||||
} catch (error) {
|
||||
console.error("Failed to parse macOS cursor helper output:", error, trimmedLine);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private handleEvent(payload: MacCursorEvent) {
|
||||
if (payload.type === "ready") {
|
||||
if (payload.accessibilityTrusted === false) {
|
||||
console.warn(
|
||||
"[cursor-macos] Accessibility is not trusted; cursor shape detection will be arrow-only.",
|
||||
);
|
||||
}
|
||||
this.resolveReady();
|
||||
return;
|
||||
}
|
||||
|
||||
if (payload.type === "sample") {
|
||||
this.captureSample(
|
||||
payload.timestampMs,
|
||||
normalizeCursorType(payload.cursorType),
|
||||
payload.leftButtonDown === true,
|
||||
payload.leftButtonPressed === true,
|
||||
payload.leftButtonReleased === true,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private captureSample(
|
||||
timestampMs: number,
|
||||
cursorType: NativeCursorType | null,
|
||||
leftButtonDown: boolean,
|
||||
leftButtonPressed: boolean,
|
||||
leftButtonReleased: boolean,
|
||||
) {
|
||||
const cursor = screen.getCursorScreenPoint();
|
||||
const bounds = this.options.getDisplayBounds() ?? screen.getDisplayNearestPoint(cursor).bounds;
|
||||
const width = Math.max(1, bounds.width);
|
||||
const height = Math.max(1, bounds.height);
|
||||
const normalizedX = (cursor.x - bounds.x) / width;
|
||||
const normalizedY = (cursor.y - bounds.y) / height;
|
||||
const isOutsideDisplay =
|
||||
normalizedX < 0 || normalizedX > 1 || normalizedY < 0 || normalizedY > 1;
|
||||
// Fast swipes that briefly exit the display (<THRESHOLD samples) are handled by
|
||||
// clip-path — the cursor clips to the canvas edge instead of snapping invisible.
|
||||
// Sustained exits (≥THRESHOLD samples, ≈100ms) mark visible=false to prevent
|
||||
// ghost cursors and motion trails from multi-display movement.
|
||||
if (isOutsideDisplay) {
|
||||
this.consecutiveOutsideSamples++;
|
||||
} else {
|
||||
this.consecutiveOutsideSamples = 0;
|
||||
}
|
||||
const visible =
|
||||
this.consecutiveOutsideSamples < MacNativeCursorRecordingSession.OUTSIDE_HIDE_THRESHOLD;
|
||||
const interactionType =
|
||||
leftButtonPressed || (leftButtonDown && !this.previousLeftButtonDown)
|
||||
? "click"
|
||||
: leftButtonReleased || (!leftButtonDown && this.previousLeftButtonDown)
|
||||
? "mouseup"
|
||||
: "move";
|
||||
this.previousLeftButtonDown = leftButtonDown;
|
||||
|
||||
this.samples.push({
|
||||
timeMs: Math.max(0, timestampMs - this.startTimeMs),
|
||||
cx: clamp(normalizedX, 0, 1),
|
||||
cy: clamp(normalizedY, 0, 1),
|
||||
visible,
|
||||
interactionType,
|
||||
...(cursorType ? { cursorType } : {}),
|
||||
});
|
||||
|
||||
if (this.samples.length > this.options.maxSamples) {
|
||||
this.samples.shift();
|
||||
}
|
||||
}
|
||||
|
||||
private waitUntilReady() {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
this.readyResolve = resolve;
|
||||
this.readyReject = reject;
|
||||
this.readyTimer = setTimeout(() => {
|
||||
this.rejectReady(new Error("Timed out waiting for macOS cursor helper"));
|
||||
}, READY_TIMEOUT_MS);
|
||||
});
|
||||
}
|
||||
|
||||
private resolveReady() {
|
||||
const resolve = this.readyResolve;
|
||||
this.clearReadyState();
|
||||
resolve?.();
|
||||
}
|
||||
|
||||
private rejectReady(error: Error) {
|
||||
const reject = this.readyReject;
|
||||
this.clearReadyState();
|
||||
reject?.(error);
|
||||
}
|
||||
|
||||
private clearReadyState() {
|
||||
if (this.readyTimer) {
|
||||
clearTimeout(this.readyTimer);
|
||||
this.readyTimer = null;
|
||||
}
|
||||
this.readyResolve = null;
|
||||
this.readyReject = null;
|
||||
}
|
||||
|
||||
private killHelperProcess(child: ChildProcessByStdio<null, Readable, Readable>) {
|
||||
if (child.killed) {
|
||||
return;
|
||||
}
|
||||
|
||||
child.kill("SIGTERM");
|
||||
setTimeout(() => {
|
||||
if (!child.killed) {
|
||||
child.kill("SIGKILL");
|
||||
}
|
||||
}, 500).unref();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
import type { CursorRecordingData } from "../../../../src/native/contracts";
|
||||
|
||||
export interface CursorRecordingSession {
|
||||
start(): Promise<void>;
|
||||
stop(): Promise<CursorRecordingData>;
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
import { type Rectangle, screen } from "electron";
|
||||
import type { CursorRecordingData, CursorRecordingSample } from "../../../../src/native/contracts";
|
||||
import type { CursorRecordingSession } from "./session";
|
||||
|
||||
interface TelemetryRecordingSessionOptions {
|
||||
getDisplayBounds: () => Rectangle | null;
|
||||
maxSamples: number;
|
||||
sampleIntervalMs: number;
|
||||
startTimeMs?: number;
|
||||
}
|
||||
|
||||
function clamp(value: number, min: number, max: number) {
|
||||
return Math.min(max, Math.max(min, value));
|
||||
}
|
||||
|
||||
export class TelemetryRecordingSession implements CursorRecordingSession {
|
||||
private samples: CursorRecordingSample[] = [];
|
||||
private interval: NodeJS.Timeout | null = null;
|
||||
private startTimeMs = 0;
|
||||
|
||||
constructor(private readonly options: TelemetryRecordingSessionOptions) {}
|
||||
|
||||
async start(): Promise<void> {
|
||||
this.samples = [];
|
||||
this.startTimeMs = this.options.startTimeMs ?? Date.now();
|
||||
this.captureSample();
|
||||
this.interval = setInterval(() => {
|
||||
this.captureSample();
|
||||
}, this.options.sampleIntervalMs);
|
||||
}
|
||||
|
||||
async stop(): Promise<CursorRecordingData> {
|
||||
if (this.interval) {
|
||||
clearInterval(this.interval);
|
||||
this.interval = null;
|
||||
}
|
||||
|
||||
return {
|
||||
version: 2,
|
||||
provider: "none",
|
||||
samples: this.samples,
|
||||
assets: [],
|
||||
};
|
||||
}
|
||||
|
||||
private captureSample() {
|
||||
const cursor = screen.getCursorScreenPoint();
|
||||
const display = this.options.getDisplayBounds() ?? screen.getDisplayNearestPoint(cursor).bounds;
|
||||
const width = Math.max(1, display.width);
|
||||
const height = Math.max(1, display.height);
|
||||
|
||||
this.samples.push({
|
||||
timeMs: Math.max(0, Date.now() - this.startTimeMs),
|
||||
cx: clamp((cursor.x - display.x) / width, 0, 1),
|
||||
cy: clamp((cursor.y - display.y) / height, 0, 1),
|
||||
visible: true,
|
||||
});
|
||||
|
||||
if (this.samples.length > this.options.maxSamples) {
|
||||
this.samples.shift();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,326 @@
|
||||
import { type ChildProcessByStdio, spawn } from "node:child_process";
|
||||
import { existsSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import type { Readable } from "node:stream";
|
||||
import { app, screen } from "electron";
|
||||
import { parseWindowHandleFromSourceId } from "../../../../src/lib/nativeWindowsRecording";
|
||||
import type {
|
||||
CursorRecordingData,
|
||||
CursorRecordingSample,
|
||||
NativeCursorAsset,
|
||||
} from "../../../../src/native/contracts";
|
||||
import type { CursorRecordingSession } from "./session";
|
||||
import type {
|
||||
WindowsCursorEvent,
|
||||
WindowsNativeRecordingSessionOptions,
|
||||
} from "./windowsNativeRecordingSession.types";
|
||||
|
||||
function getCursorSamplerCandidates(): string[] {
|
||||
const envPath = process.env.OPENSCREEN_CURSOR_SAMPLER_EXE?.trim();
|
||||
const archTag = process.arch === "arm64" ? "win32-arm64" : "win32-x64";
|
||||
const resolve = (...segs: string[]) => {
|
||||
const p = join(app.getAppPath(), ...segs);
|
||||
return app.isPackaged ? p.replace(/\.asar([/\\])/, ".asar.unpacked$1") : p;
|
||||
};
|
||||
return [
|
||||
envPath,
|
||||
resolve("electron", "native", "wgc-capture", "build", "cursor-sampler.exe"),
|
||||
resolve("electron", "native", "bin", archTag, "cursor-sampler.exe"),
|
||||
].filter((c): c is string => Boolean(c));
|
||||
}
|
||||
|
||||
function findCursorSamplerPath(): string | null {
|
||||
for (const candidate of getCursorSamplerCandidates()) {
|
||||
if (existsSync(candidate)) return candidate;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
const READY_TIMEOUT_MS = 5_000;
|
||||
|
||||
interface NormalizedSample {
|
||||
sample: CursorRecordingSample;
|
||||
withinBounds: boolean;
|
||||
}
|
||||
|
||||
export class WindowsNativeRecordingSession implements CursorRecordingSession {
|
||||
private assets = new Map<string, NativeCursorAsset>();
|
||||
private samples: CursorRecordingSample[] = [];
|
||||
private process: ChildProcessByStdio<null, Readable, Readable> | null = null;
|
||||
private lineBuffer = "";
|
||||
private startTimeMs = 0;
|
||||
private readyResolve: (() => void) | null = null;
|
||||
private readyReject: ((error: Error) => void) | null = null;
|
||||
private readyTimer: NodeJS.Timeout | null = null;
|
||||
private sampleCount = 0;
|
||||
private outOfBoundsSampleCount = 0;
|
||||
private previousLeftButtonDown = false;
|
||||
|
||||
constructor(private readonly options: WindowsNativeRecordingSessionOptions) {}
|
||||
|
||||
async start(): Promise<void> {
|
||||
this.assets.clear();
|
||||
this.samples = [];
|
||||
this.lineBuffer = "";
|
||||
this.startTimeMs = this.options.startTimeMs ?? Date.now();
|
||||
this.sampleCount = 0;
|
||||
this.outOfBoundsSampleCount = 0;
|
||||
this.previousLeftButtonDown = false;
|
||||
|
||||
const helperPath = findCursorSamplerPath();
|
||||
if (!helperPath) {
|
||||
throw new Error("Windows cursor sampler helper is not available.");
|
||||
}
|
||||
|
||||
const windowHandle = parseWindowHandleFromSourceId(this.options.sourceId);
|
||||
const args = [String(this.options.sampleIntervalMs)];
|
||||
if (windowHandle) args.push(windowHandle);
|
||||
|
||||
const child = spawn(helperPath, args, {
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
windowsHide: true,
|
||||
});
|
||||
|
||||
this.process = child;
|
||||
this.logDiagnostic("spawn", {
|
||||
pid: child.pid ?? null,
|
||||
sampleIntervalMs: this.options.sampleIntervalMs,
|
||||
sourceId: this.options.sourceId ?? null,
|
||||
windowHandle,
|
||||
});
|
||||
|
||||
child.stdout.setEncoding("utf8");
|
||||
child.stdout.on("data", (chunk: string) => {
|
||||
this.handleStdoutChunk(chunk);
|
||||
});
|
||||
child.stderr.setEncoding("utf8");
|
||||
child.stderr.on("data", (chunk: string) => {
|
||||
const message = chunk.trim();
|
||||
if (message) {
|
||||
this.logDiagnostic("stderr", { message });
|
||||
}
|
||||
console.error("[cursor-native]", message);
|
||||
});
|
||||
child.once("exit", (code, signal) => {
|
||||
this.logDiagnostic("exit", {
|
||||
code,
|
||||
signal,
|
||||
sampleCount: this.sampleCount,
|
||||
assetCount: this.assets.size,
|
||||
outOfBoundsSampleCount: this.outOfBoundsSampleCount,
|
||||
});
|
||||
this.rejectReady(
|
||||
new Error(`Windows cursor helper exited before ready (code=${code}, signal=${signal})`),
|
||||
);
|
||||
});
|
||||
child.once("error", (error) => {
|
||||
this.logDiagnostic("process-error", { message: error.message });
|
||||
this.rejectReady(error);
|
||||
});
|
||||
|
||||
try {
|
||||
await this.waitUntilReady();
|
||||
} catch (error) {
|
||||
this.terminateHelperProcess();
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async stop(): Promise<CursorRecordingData> {
|
||||
const child = this.process;
|
||||
this.process = null;
|
||||
this.clearReadyState();
|
||||
|
||||
this.killHelperProcess(child);
|
||||
|
||||
this.logDiagnostic("stop", {
|
||||
sampleCount: this.sampleCount,
|
||||
assetCount: this.assets.size,
|
||||
outOfBoundsSampleCount: this.outOfBoundsSampleCount,
|
||||
});
|
||||
|
||||
return {
|
||||
version: 2,
|
||||
provider: this.assets.size > 0 ? "native" : "none",
|
||||
samples: this.samples,
|
||||
assets: [...this.assets.values()],
|
||||
};
|
||||
}
|
||||
|
||||
private handleStdoutChunk(chunk: string) {
|
||||
this.lineBuffer += chunk;
|
||||
const lines = this.lineBuffer.split(/\r?\n/);
|
||||
this.lineBuffer = lines.pop() ?? "";
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmedLine = line.trim();
|
||||
if (!trimmedLine) {
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const payload = JSON.parse(trimmedLine) as WindowsCursorEvent;
|
||||
this.handleEvent(payload);
|
||||
} catch (error) {
|
||||
console.error("Failed to parse Windows cursor helper output:", error, trimmedLine);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private handleEvent(payload: WindowsCursorEvent) {
|
||||
if (payload.type === "error") {
|
||||
this.logDiagnostic("helper-error", { message: payload.message });
|
||||
console.error("Windows cursor helper error:", payload.message);
|
||||
this.failHelper(new Error(payload.message));
|
||||
return;
|
||||
}
|
||||
|
||||
if (payload.type === "ready") {
|
||||
this.logDiagnostic("ready", { timestampMs: payload.timestampMs });
|
||||
this.resolveReady();
|
||||
return;
|
||||
}
|
||||
|
||||
if (payload.asset?.id && !this.assets.has(payload.asset.id)) {
|
||||
const assetDisplay = screen.getDisplayNearestPoint({ x: payload.x, y: payload.y });
|
||||
this.assets.set(payload.asset.id, {
|
||||
id: payload.asset.id,
|
||||
platform: "win32",
|
||||
imageDataUrl: payload.asset.imageDataUrl,
|
||||
width: payload.asset.width,
|
||||
height: payload.asset.height,
|
||||
hotspotX: payload.asset.hotspotX,
|
||||
hotspotY: payload.asset.hotspotY,
|
||||
scaleFactor: assetDisplay.scaleFactor,
|
||||
cursorType: payload.asset.cursorType ?? payload.cursorType ?? null,
|
||||
});
|
||||
this.logDiagnostic("asset", {
|
||||
id: payload.asset.id,
|
||||
width: payload.asset.width,
|
||||
height: payload.asset.height,
|
||||
hotspotX: payload.asset.hotspotX,
|
||||
hotspotY: payload.asset.hotspotY,
|
||||
scaleFactor: assetDisplay.scaleFactor,
|
||||
});
|
||||
}
|
||||
|
||||
const normalized = this.normalizeSample(payload);
|
||||
this.sampleCount += 1;
|
||||
if (!normalized.withinBounds) {
|
||||
this.outOfBoundsSampleCount += 1;
|
||||
}
|
||||
|
||||
this.samples.push(normalized.sample);
|
||||
|
||||
if (this.samples.length > this.options.maxSamples) {
|
||||
this.samples.shift();
|
||||
}
|
||||
}
|
||||
|
||||
private normalizeSample(
|
||||
payload: Extract<WindowsCursorEvent, { type: "sample" }>,
|
||||
): NormalizedSample {
|
||||
const bounds =
|
||||
payload.bounds ?? this.options.getDisplayBounds() ?? screen.getPrimaryDisplay().bounds;
|
||||
const width = Math.max(1, bounds.width);
|
||||
const height = Math.max(1, bounds.height);
|
||||
const normalizedX = (payload.x - bounds.x) / width;
|
||||
const normalizedY = (payload.y - bounds.y) / height;
|
||||
const withinBounds =
|
||||
normalizedX >= 0 && normalizedX <= 1 && normalizedY >= 0 && normalizedY <= 1;
|
||||
const leftButtonDown = payload.leftButtonDown === true;
|
||||
const leftButtonPressed = payload.leftButtonPressed === true;
|
||||
const leftButtonReleased = payload.leftButtonReleased === true;
|
||||
const interactionType =
|
||||
leftButtonPressed || (leftButtonDown && !this.previousLeftButtonDown)
|
||||
? "click"
|
||||
: leftButtonReleased || (!leftButtonDown && this.previousLeftButtonDown)
|
||||
? "mouseup"
|
||||
: "move";
|
||||
this.previousLeftButtonDown = leftButtonDown;
|
||||
|
||||
if (this.sampleCount === 0 || (!withinBounds && this.outOfBoundsSampleCount === 0)) {
|
||||
this.logDiagnostic("sample", {
|
||||
rawX: payload.x,
|
||||
rawY: payload.y,
|
||||
normalizedX,
|
||||
normalizedY,
|
||||
visible: payload.visible,
|
||||
withinBounds,
|
||||
bounds,
|
||||
handle: payload.handle,
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
withinBounds,
|
||||
sample: {
|
||||
timeMs: Math.max(0, payload.timestampMs - this.startTimeMs),
|
||||
cx: normalizedX,
|
||||
cy: normalizedY,
|
||||
assetId: payload.handle,
|
||||
visible: payload.visible && withinBounds,
|
||||
cursorType: payload.cursorType ?? payload.asset?.cursorType ?? null,
|
||||
interactionType,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
private waitUntilReady() {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
this.readyResolve = resolve;
|
||||
this.readyReject = reject;
|
||||
this.readyTimer = setTimeout(() => {
|
||||
this.rejectReady(new Error("Timed out waiting for Windows cursor helper readiness"));
|
||||
}, READY_TIMEOUT_MS);
|
||||
});
|
||||
}
|
||||
|
||||
private resolveReady() {
|
||||
const resolve = this.readyResolve;
|
||||
this.clearReadyState();
|
||||
resolve?.();
|
||||
}
|
||||
|
||||
private rejectReady(error: Error) {
|
||||
const reject = this.readyReject;
|
||||
this.clearReadyState();
|
||||
reject?.(error);
|
||||
}
|
||||
|
||||
private failHelper(error: Error) {
|
||||
this.rejectReady(error);
|
||||
this.terminateHelperProcess();
|
||||
}
|
||||
|
||||
private terminateHelperProcess() {
|
||||
const child = this.process;
|
||||
this.process = null;
|
||||
this.killHelperProcess(child);
|
||||
}
|
||||
|
||||
private killHelperProcess(child: ChildProcessByStdio<null, Readable, Readable> | null) {
|
||||
if (child && !child.killed) {
|
||||
child.kill();
|
||||
}
|
||||
}
|
||||
|
||||
private clearReadyState() {
|
||||
if (this.readyTimer) {
|
||||
clearTimeout(this.readyTimer);
|
||||
this.readyTimer = null;
|
||||
}
|
||||
this.readyResolve = null;
|
||||
this.readyReject = null;
|
||||
}
|
||||
|
||||
private logDiagnostic(event: string, data: Record<string, unknown>) {
|
||||
console.info(
|
||||
"[cursor-native][win32]",
|
||||
JSON.stringify({
|
||||
event,
|
||||
...data,
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
import type { Rectangle } from "electron";
|
||||
import type { NativeCursorType } from "../../../../src/native/contracts";
|
||||
|
||||
export interface WindowsCursorSampleEvent {
|
||||
type: "sample";
|
||||
timestampMs: number;
|
||||
x: number;
|
||||
y: number;
|
||||
visible: boolean;
|
||||
handle: string | null;
|
||||
cursorType?: NativeCursorType | null;
|
||||
leftButtonDown?: boolean;
|
||||
leftButtonPressed?: boolean;
|
||||
leftButtonReleased?: boolean;
|
||||
bounds?: {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
} | null;
|
||||
asset: WindowsCursorAssetPayload | null;
|
||||
}
|
||||
|
||||
export interface WindowsCursorReadyEvent {
|
||||
type: "ready";
|
||||
timestampMs: number;
|
||||
}
|
||||
|
||||
export interface WindowsCursorErrorEvent {
|
||||
type: "error";
|
||||
timestampMs: number;
|
||||
message: string;
|
||||
}
|
||||
|
||||
export interface WindowsCursorAssetPayload {
|
||||
id: string;
|
||||
imageDataUrl: string;
|
||||
width: number;
|
||||
height: number;
|
||||
hotspotX: number;
|
||||
hotspotY: number;
|
||||
cursorType?: NativeCursorType | null;
|
||||
}
|
||||
|
||||
export type WindowsCursorEvent =
|
||||
| WindowsCursorSampleEvent
|
||||
| WindowsCursorReadyEvent
|
||||
| WindowsCursorErrorEvent;
|
||||
|
||||
export interface WindowsNativeRecordingSessionOptions {
|
||||
getDisplayBounds: () => Rectangle | null;
|
||||
maxSamples: number;
|
||||
sampleIntervalMs: number;
|
||||
sourceId?: string | null;
|
||||
startTimeMs?: number;
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
import type { CursorCapabilities, CursorRecordingData } from "../../../src/native/contracts";
|
||||
import type { CursorNativeAdapter, CursorTelemetryLoadResult } from "./adapter";
|
||||
|
||||
interface TelemetryCursorAdapterOptions {
|
||||
loadRecordingData: (videoPath: string) => Promise<CursorRecordingData>;
|
||||
resolveVideoPath: (videoPath?: string | null) => string | null;
|
||||
loadTelemetry: (videoPath: string) => Promise<CursorTelemetryLoadResult>;
|
||||
}
|
||||
|
||||
export class TelemetryCursorAdapter implements CursorNativeAdapter {
|
||||
readonly kind = "none" as const;
|
||||
|
||||
constructor(private readonly options: TelemetryCursorAdapterOptions) {}
|
||||
|
||||
async getCapabilities(): Promise<CursorCapabilities> {
|
||||
return {
|
||||
telemetry: true,
|
||||
systemAssets: false,
|
||||
provider: this.kind,
|
||||
};
|
||||
}
|
||||
|
||||
async getRecordingData(videoPath?: string | null): Promise<CursorRecordingData> {
|
||||
const resolvedVideoPath = this.options.resolveVideoPath(videoPath);
|
||||
if (!resolvedVideoPath) {
|
||||
return {
|
||||
version: 2,
|
||||
provider: this.kind,
|
||||
samples: [],
|
||||
assets: [],
|
||||
};
|
||||
}
|
||||
|
||||
return this.options.loadRecordingData(resolvedVideoPath);
|
||||
}
|
||||
|
||||
async getTelemetry(videoPath?: string | null) {
|
||||
const resolvedVideoPath = this.options.resolveVideoPath(videoPath);
|
||||
if (!resolvedVideoPath) {
|
||||
return {
|
||||
success: false,
|
||||
message: "No video path is available for cursor telemetry",
|
||||
samples: [],
|
||||
} satisfies CursorTelemetryLoadResult;
|
||||
}
|
||||
|
||||
return this.options.loadTelemetry(resolvedVideoPath);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
import type {
|
||||
CursorCapabilities,
|
||||
CursorRecordingData,
|
||||
CursorTelemetryPoint,
|
||||
} from "../../../src/native/contracts";
|
||||
import type { CursorNativeAdapter } from "../cursor/adapter";
|
||||
import type { NativeBridgeStateStore } from "../store";
|
||||
|
||||
interface CursorServiceOptions {
|
||||
store: NativeBridgeStateStore;
|
||||
adapter: CursorNativeAdapter;
|
||||
}
|
||||
|
||||
export class CursorService {
|
||||
constructor(private readonly options: CursorServiceOptions) {}
|
||||
|
||||
async getCapabilities(): Promise<CursorCapabilities> {
|
||||
const capabilities = await this.options.adapter.getCapabilities();
|
||||
this.options.store.setCursorCapabilities(capabilities);
|
||||
return capabilities;
|
||||
}
|
||||
|
||||
async getTelemetry(videoPath?: string | null): Promise<CursorTelemetryPoint[]> {
|
||||
const result = await this.options.adapter.getTelemetry(videoPath);
|
||||
if (!result.success) {
|
||||
throw new Error(result.message || result.error || "Failed to load cursor telemetry");
|
||||
}
|
||||
|
||||
const resolvedVideoPath = videoPath ?? this.options.store.getState().project.currentVideoPath;
|
||||
if (resolvedVideoPath) {
|
||||
this.options.store.markCursorTelemetryLoaded(resolvedVideoPath, result.samples.length);
|
||||
}
|
||||
|
||||
return result.samples;
|
||||
}
|
||||
|
||||
async getRecordingData(videoPath?: string | null): Promise<CursorRecordingData> {
|
||||
const data = await this.options.adapter.getRecordingData(videoPath);
|
||||
const resolvedVideoPath = videoPath ?? this.options.store.getState().project.currentVideoPath;
|
||||
if (resolvedVideoPath) {
|
||||
this.options.store.markCursorTelemetryLoaded(resolvedVideoPath, data.samples.length);
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
import type {
|
||||
ProjectContext,
|
||||
ProjectFileResult,
|
||||
ProjectPathResult,
|
||||
} from "../../../src/native/contracts";
|
||||
import type { NativeBridgeStateStore } from "../store";
|
||||
|
||||
interface ProjectServiceOptions {
|
||||
store: NativeBridgeStateStore;
|
||||
getCurrentProjectPath: () => string | null;
|
||||
getCurrentVideoPath: () => string | null;
|
||||
saveProjectFile: (
|
||||
projectData: unknown,
|
||||
suggestedName?: string,
|
||||
existingProjectPath?: string,
|
||||
) => Promise<ProjectFileResult>;
|
||||
loadProjectFile: () => Promise<ProjectFileResult>;
|
||||
loadCurrentProjectFile: () => Promise<ProjectFileResult>;
|
||||
setCurrentVideoPath: (path: string) => ProjectPathResult | Promise<ProjectPathResult>;
|
||||
getCurrentVideoPathResult: () => ProjectPathResult;
|
||||
clearCurrentVideoPath: () => ProjectPathResult;
|
||||
}
|
||||
|
||||
export class ProjectService {
|
||||
constructor(private readonly options: ProjectServiceOptions) {}
|
||||
|
||||
getCurrentContext(): ProjectContext {
|
||||
const context = {
|
||||
currentProjectPath: this.options.getCurrentProjectPath(),
|
||||
currentVideoPath: this.options.getCurrentVideoPath(),
|
||||
};
|
||||
|
||||
this.options.store.setProjectContext(context);
|
||||
return context;
|
||||
}
|
||||
|
||||
async saveProjectFile(
|
||||
projectData: unknown,
|
||||
suggestedName?: string,
|
||||
existingProjectPath?: string,
|
||||
) {
|
||||
const result = await this.options.saveProjectFile(
|
||||
projectData,
|
||||
suggestedName,
|
||||
existingProjectPath,
|
||||
);
|
||||
this.getCurrentContext();
|
||||
return result;
|
||||
}
|
||||
|
||||
async loadProjectFile() {
|
||||
const result = await this.options.loadProjectFile();
|
||||
this.getCurrentContext();
|
||||
return result;
|
||||
}
|
||||
|
||||
async loadCurrentProjectFile() {
|
||||
const result = await this.options.loadCurrentProjectFile();
|
||||
this.getCurrentContext();
|
||||
return result;
|
||||
}
|
||||
|
||||
async setCurrentVideoPath(path: string) {
|
||||
const result = await this.options.setCurrentVideoPath(path);
|
||||
this.getCurrentContext();
|
||||
return result;
|
||||
}
|
||||
|
||||
getCurrentVideoPath() {
|
||||
const result = this.options.getCurrentVideoPathResult();
|
||||
this.getCurrentContext();
|
||||
return result;
|
||||
}
|
||||
|
||||
clearCurrentVideoPath() {
|
||||
const result = this.options.clearCurrentVideoPath();
|
||||
this.getCurrentContext();
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
import type {
|
||||
CursorCapabilities,
|
||||
NativePlatform,
|
||||
SystemCapabilities,
|
||||
} from "../../../src/native/contracts";
|
||||
import { NATIVE_BRIDGE_VERSION } from "../../../src/native/contracts";
|
||||
import type { NativeBridgeStateStore } from "../store";
|
||||
|
||||
interface SystemServiceOptions {
|
||||
store: NativeBridgeStateStore;
|
||||
getPlatform: () => NativePlatform;
|
||||
getAssetBasePath: () => string | null;
|
||||
getCursorCapabilities: () => Promise<CursorCapabilities>;
|
||||
}
|
||||
|
||||
export class SystemService {
|
||||
constructor(private readonly options: SystemServiceOptions) {}
|
||||
|
||||
getPlatform() {
|
||||
return this.options.getPlatform();
|
||||
}
|
||||
|
||||
getAssetBasePath() {
|
||||
return this.options.getAssetBasePath();
|
||||
}
|
||||
|
||||
async getCapabilities(): Promise<SystemCapabilities> {
|
||||
const platform = this.getPlatform();
|
||||
const cursorCapabilities = await this.options.getCursorCapabilities();
|
||||
|
||||
const capabilities: SystemCapabilities = {
|
||||
bridgeVersion: NATIVE_BRIDGE_VERSION,
|
||||
platform,
|
||||
cursor: cursorCapabilities,
|
||||
project: {
|
||||
currentContext: true,
|
||||
},
|
||||
};
|
||||
|
||||
this.options.store.setSystemCapabilities(capabilities);
|
||||
return capabilities;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
import type {
|
||||
CursorCapabilities,
|
||||
NativePlatform,
|
||||
ProjectContext,
|
||||
SystemCapabilities,
|
||||
} from "../../src/native/contracts";
|
||||
|
||||
export interface NativeBridgeState {
|
||||
system: {
|
||||
platform: NativePlatform;
|
||||
capabilities: SystemCapabilities | null;
|
||||
};
|
||||
project: ProjectContext;
|
||||
cursor: {
|
||||
capabilities: CursorCapabilities | null;
|
||||
lastTelemetryLoad: {
|
||||
videoPath: string;
|
||||
sampleCount: number;
|
||||
loadedAt: number;
|
||||
} | null;
|
||||
};
|
||||
}
|
||||
|
||||
export class NativeBridgeStateStore {
|
||||
private state: NativeBridgeState;
|
||||
|
||||
constructor(platform: NativePlatform) {
|
||||
this.state = {
|
||||
system: {
|
||||
platform,
|
||||
capabilities: null,
|
||||
},
|
||||
project: {
|
||||
currentProjectPath: null,
|
||||
currentVideoPath: null,
|
||||
},
|
||||
cursor: {
|
||||
capabilities: null,
|
||||
lastTelemetryLoad: null,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
getState() {
|
||||
return this.state;
|
||||
}
|
||||
|
||||
setProjectContext(project: ProjectContext) {
|
||||
this.state = {
|
||||
...this.state,
|
||||
project,
|
||||
};
|
||||
}
|
||||
|
||||
setSystemCapabilities(capabilities: SystemCapabilities) {
|
||||
this.state = {
|
||||
...this.state,
|
||||
system: {
|
||||
...this.state.system,
|
||||
capabilities,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
setCursorCapabilities(capabilities: CursorCapabilities) {
|
||||
this.state = {
|
||||
...this.state,
|
||||
cursor: {
|
||||
...this.state.cursor,
|
||||
capabilities,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
markCursorTelemetryLoaded(videoPath: string, sampleCount: number) {
|
||||
this.state = {
|
||||
...this.state,
|
||||
cursor: {
|
||||
...this.state.cursor,
|
||||
lastTelemetryLoad: {
|
||||
videoPath,
|
||||
sampleCount,
|
||||
loadedAt: Date.now(),
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
# Native capture helpers
|
||||
|
||||
## macOS
|
||||
|
||||
macOS native recording will use a ScreenCaptureKit helper with the same process boundary as the Windows WGC helper:
|
||||
|
||||
1. Electron resolves the selected source, output paths, and user-selected devices.
|
||||
2. The helper receives one structured JSON request.
|
||||
3. The helper owns ScreenCaptureKit/AVFoundation capture, timing, encoding, and muxing.
|
||||
4. Electron persists the resulting media/session manifest and reports helper errors explicitly.
|
||||
|
||||
Helper locations:
|
||||
|
||||
1. `OPENSCREEN_SCK_CAPTURE_EXE`, for local development and diagnostics.
|
||||
2. `electron/native/screencapturekit/build/openscreen-screencapturekit-helper`, for locally built Swift output.
|
||||
3. `electron/native/bin/darwin-arm64/openscreen-screencapturekit-helper` or `electron/native/bin/darwin-x64/openscreen-screencapturekit-helper`, for packaged prebuilt helpers.
|
||||
|
||||
The macOS cursor-shape helper is resolved from `OPENSCREEN_MAC_CURSOR_HELPER_EXE` first, then the matching `openscreen-macos-cursor-helper` binary in the same local build and packaged `electron/native/bin/darwin-${arch}` directories.
|
||||
|
||||
Build the macOS helper with:
|
||||
|
||||
```bash
|
||||
npm run build:native:mac
|
||||
```
|
||||
|
||||
On non-macOS hosts this command exits successfully and does not affect Windows/Linux development. On macOS it builds the Swift package at `electron/native/screencapturekit`, writes the development binaries to `electron/native/screencapturekit/build`, and copies redistributable binaries to `electron/native/bin/darwin-${arch}`.
|
||||
|
||||
The current helper implementation supports display/window ScreenCaptureKit video capture, cursor exclusion through `SCStreamConfiguration.showsCursor`, H.264 encoding, MP4 muxing, and ScreenCaptureKit system audio. It also attempts native ScreenCaptureKit microphone capture when the running macOS version exposes that capability. Webcam recording currently stays as an Electron sidecar and is attached to the same recording session after the native screen capture stops.
|
||||
|
||||
Electron exposes `is-native-mac-capture-available` for capability probing. It resolves the same helper locations listed above and reports `missing-helper` until a Swift helper binary is present. When available, macOS recording routes screen/window capture through the native helper so editable cursor recordings do not bake the system cursor into the video. Cursor positions are sampled in Electron; when the cursor helper is available and Accessibility is granted, samples are also tagged with link/text cursor hints such as `pointer`.
|
||||
|
||||
See `docs/engineering/macos-native-recorder-roadmap.md` for the contract, rollout phases, and SSOT rules.
|
||||
|
||||
## Windows
|
||||
|
||||
Windows native recording is resolved from one of these locations:
|
||||
|
||||
1. `OPENSCREEN_WGC_CAPTURE_EXE`, for local development and diagnostics.
|
||||
2. `electron/native/wgc-capture/build/wgc-capture.exe`, for a locally built Ninja helper.
|
||||
3. `electron/native/wgc-capture/build/Release/wgc-capture.exe`, for a locally built multi-config helper.
|
||||
4. `electron/native/bin/win32-x64/wgc-capture.exe` or `electron/native/bin/win32-arm64/wgc-capture.exe`, for packaged prebuilt helpers.
|
||||
|
||||
Build the Windows helper with:
|
||||
|
||||
```powershell
|
||||
npm run build:native:win
|
||||
```
|
||||
|
||||
The build writes the CMake output to `electron/native/wgc-capture/build/wgc-capture.exe` and copies the redistributable binary to `electron/native/bin/win32-x64/wgc-capture.exe`. It also builds `cursor-sampler.exe` for editable cursor telemetry and `guide-hotkey-listener.exe` for the Guide Mode global Ctrl capture hook.
|
||||
|
||||
The helper contract is process-based: the app starts the process with one JSON argument and sends commands on stdin. `stop\n` finalizes the recording. During migration the helper prints both newline-delimited JSON events and the legacy text messages `Recording started` / `Recording stopped. Output path: <path>`.
|
||||
|
||||
Current V2 JSON shape:
|
||||
|
||||
```json
|
||||
{
|
||||
"schemaVersion": 2,
|
||||
"recordingId": 123,
|
||||
"sourceType": "display",
|
||||
"sourceId": "screen:0:0",
|
||||
"displayId": 1,
|
||||
"windowHandle": null,
|
||||
"outputPath": "C:\\path\\recording-123.mp4",
|
||||
"videoWidth": 1920,
|
||||
"videoHeight": 1080,
|
||||
"fps": 60,
|
||||
"captureSystemAudio": false,
|
||||
"captureMic": false,
|
||||
"microphoneDeviceId": "default",
|
||||
"microphoneDeviceName": "Microphone (NVIDIA Broadcast)",
|
||||
"microphoneGain": 1.4,
|
||||
"webcamEnabled": true,
|
||||
"webcamDeviceId": "default",
|
||||
"webcamDeviceName": "Camera (NVIDIA Broadcast)",
|
||||
"webcamWidth": 1280,
|
||||
"webcamHeight": 720,
|
||||
"webcamFps": 30,
|
||||
"outputs": {
|
||||
"screenPath": "C:\\path\\recording-123.mp4"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The current helper implementation supports display/window video capture, system audio loopback, selected-microphone capture, Media Foundation webcam capture, and a DirectShow webcam fallback for virtual cameras that are not exposed through Media Foundation. Webcam frames are currently composed into the primary MP4 as a bottom-right picture-in-picture overlay. Browser `deviceId` values do not always map to Media Foundation symbolic links or WASAPI endpoint IDs, so the renderer passes both browser IDs and user-visible device names. For microphones, the helper tries the requested WASAPI endpoint ID first, then resolves an active capture endpoint by `microphoneDeviceName`, then falls back to the default endpoint. For webcams, Electron resolves a matching DirectShow filter CLSID for the selected label; the helper uses Media Foundation first, then that exact DirectShow filter when the requested camera is absent from Media Foundation.
|
||||
|
||||
Smoke-test the helper with:
|
||||
|
||||
```powershell
|
||||
npm run test:wgc-helper:win
|
||||
npm run test:wgc-window:win
|
||||
npm run test:wgc-audio:win
|
||||
npm run test:wgc-mic:win
|
||||
npm run test:wgc-mixed-audio:win
|
||||
npm run test:wgc-webcam:win
|
||||
```
|
||||
|
||||
To validate a specific native webcam manually:
|
||||
|
||||
```powershell
|
||||
$env:OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_NAME = "NVIDIA Broadcast"
|
||||
npm run test:wgc-webcam:win
|
||||
Remove-Item Env:OPENSCREEN_WGC_TEST_WEBCAM_DEVICE_NAME
|
||||
```
|
||||
|
||||
To validate a specific native microphone manually:
|
||||
|
||||
```powershell
|
||||
$env:OPENSCREEN_WGC_TEST_MICROPHONE_DEVICE_NAME = "Microphone (NVIDIA Broadcast)"
|
||||
npm run test:wgc-mic:win
|
||||
Remove-Item Env:OPENSCREEN_WGC_TEST_MICROPHONE_DEVICE_NAME
|
||||
```
|
||||
@@ -0,0 +1,30 @@
|
||||
// swift-tools-version: 5.9
|
||||
|
||||
import PackageDescription
|
||||
|
||||
let package = Package(
|
||||
name: "OpenScreenScreenCaptureKitHelper",
|
||||
platforms: [
|
||||
.macOS(.v13)
|
||||
],
|
||||
products: [
|
||||
.executable(
|
||||
name: "openscreen-screencapturekit-helper",
|
||||
targets: ["OpenScreenScreenCaptureKitHelper"]
|
||||
),
|
||||
.executable(
|
||||
name: "openscreen-macos-cursor-helper",
|
||||
targets: ["OpenScreenMacOSCursorHelper"]
|
||||
)
|
||||
],
|
||||
targets: [
|
||||
.executableTarget(
|
||||
name: "OpenScreenScreenCaptureKitHelper",
|
||||
path: "Sources/OpenScreenScreenCaptureKitHelper"
|
||||
),
|
||||
.executableTarget(
|
||||
name: "OpenScreenMacOSCursorHelper",
|
||||
path: "Sources/OpenScreenMacOSCursorHelper"
|
||||
)
|
||||
]
|
||||
)
|
||||
@@ -0,0 +1,268 @@
|
||||
import AppKit
|
||||
import ApplicationServices
|
||||
import Foundation
|
||||
|
||||
struct CursorHelperRequest: Decodable {
|
||||
let sampleIntervalMs: Int?
|
||||
}
|
||||
|
||||
final class MouseButtonTracker {
|
||||
private let lock = NSLock()
|
||||
private var leftDownCount = 0
|
||||
private var leftUpCount = 0
|
||||
private var eventTap: CFMachPort?
|
||||
private var runLoopSource: CFRunLoopSource?
|
||||
|
||||
struct Events {
|
||||
let leftDownCount: Int
|
||||
let leftUpCount: Int
|
||||
}
|
||||
|
||||
func start() -> Bool {
|
||||
let mask =
|
||||
(1 << CGEventType.leftMouseDown.rawValue) |
|
||||
(1 << CGEventType.leftMouseUp.rawValue)
|
||||
guard let tap = CGEvent.tapCreate(
|
||||
tap: .cgSessionEventTap,
|
||||
place: .headInsertEventTap,
|
||||
options: .listenOnly,
|
||||
eventsOfInterest: CGEventMask(mask),
|
||||
callback: { _, type, event, userInfo in
|
||||
if let userInfo {
|
||||
let tracker = Unmanaged<MouseButtonTracker>.fromOpaque(userInfo).takeUnretainedValue()
|
||||
tracker.record(type)
|
||||
}
|
||||
return Unmanaged.passUnretained(event)
|
||||
},
|
||||
userInfo: UnsafeMutableRawPointer(Unmanaged.passUnretained(self).toOpaque())
|
||||
) else {
|
||||
return false
|
||||
}
|
||||
|
||||
guard let source = CFMachPortCreateRunLoopSource(kCFAllocatorDefault, tap, 0) else {
|
||||
return false
|
||||
}
|
||||
|
||||
eventTap = tap
|
||||
runLoopSource = source
|
||||
CFRunLoopAddSource(CFRunLoopGetCurrent(), source, .commonModes)
|
||||
CGEvent.tapEnable(tap: tap, enable: true)
|
||||
return true
|
||||
}
|
||||
|
||||
func pump() {
|
||||
CFRunLoopRunInMode(.defaultMode, 0.001, false)
|
||||
}
|
||||
|
||||
func consume() -> Events {
|
||||
lock.lock()
|
||||
defer { lock.unlock() }
|
||||
let events = Events(leftDownCount: leftDownCount, leftUpCount: leftUpCount)
|
||||
leftDownCount = 0
|
||||
leftUpCount = 0
|
||||
return events
|
||||
}
|
||||
|
||||
private func record(_ type: CGEventType) {
|
||||
lock.lock()
|
||||
defer { lock.unlock() }
|
||||
if type == .tapDisabledByTimeout || type == .tapDisabledByUserInput {
|
||||
reenableTap()
|
||||
return
|
||||
}
|
||||
if type == .leftMouseDown {
|
||||
leftDownCount += 1
|
||||
} else if type == .leftMouseUp {
|
||||
leftUpCount += 1
|
||||
}
|
||||
}
|
||||
|
||||
private func reenableTap() {
|
||||
if let eventTap {
|
||||
CGEvent.tapEnable(tap: eventTap, enable: true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func emit(_ fields: [String: Any?]) {
|
||||
let compacted = fields.compactMapValues { $0 }
|
||||
if let data = try? JSONSerialization.data(withJSONObject: compacted, options: []),
|
||||
let line = String(data: data, encoding: .utf8)
|
||||
{
|
||||
print(line)
|
||||
fflush(stdout)
|
||||
}
|
||||
}
|
||||
|
||||
func stringAttribute(_ element: AXUIElement, _ attribute: String) -> String? {
|
||||
var value: CFTypeRef?
|
||||
let result = AXUIElementCopyAttributeValue(element, attribute as CFString, &value)
|
||||
guard result == .success else {
|
||||
return nil
|
||||
}
|
||||
|
||||
return value as? String
|
||||
}
|
||||
|
||||
func parentElement(_ element: AXUIElement) -> AXUIElement? {
|
||||
var value: CFTypeRef?
|
||||
let result = AXUIElementCopyAttributeValue(element, kAXParentAttribute as CFString, &value)
|
||||
guard result == .success else {
|
||||
return nil
|
||||
}
|
||||
|
||||
guard CFGetTypeID(value) == AXUIElementGetTypeID() else {
|
||||
return nil
|
||||
}
|
||||
|
||||
return (value as! AXUIElement)
|
||||
}
|
||||
|
||||
func roleDescription(_ element: AXUIElement) -> String? {
|
||||
var value: CFTypeRef?
|
||||
let result = AXUIElementCopyAttributeValue(element, kAXRoleDescriptionAttribute as CFString, &value)
|
||||
guard result == .success else {
|
||||
return nil
|
||||
}
|
||||
|
||||
return value as? String
|
||||
}
|
||||
|
||||
func actionNames(_ element: AXUIElement) -> [String] {
|
||||
var value: CFArray?
|
||||
let result = AXUIElementCopyActionNames(element, &value)
|
||||
guard result == .success, let value else {
|
||||
return []
|
||||
}
|
||||
|
||||
return (value as NSArray).compactMap { $0 as? String }
|
||||
}
|
||||
func isTextInputRole(_ role: String?) -> Bool {
|
||||
role == "AXTextField" ||
|
||||
role == "AXTextArea" ||
|
||||
role == "AXTextView" ||
|
||||
role == "AXComboBox"
|
||||
}
|
||||
|
||||
func isPointerRole(_ role: String?, _ subrole: String?, _ description: String?) -> Bool {
|
||||
if role == "AXLink" ||
|
||||
subrole?.localizedCaseInsensitiveContains("link") == true ||
|
||||
description?.contains("link") == true
|
||||
{
|
||||
return true
|
||||
}
|
||||
|
||||
return role == "AXButton" ||
|
||||
role == "AXMenuButton" ||
|
||||
role == "AXPopUpButton" ||
|
||||
role == "AXCheckBox" ||
|
||||
role == "AXRadioButton" ||
|
||||
role == "AXSwitch" ||
|
||||
role == "AXDisclosureTriangle" ||
|
||||
role == "AXTab" ||
|
||||
role == "AXMenuItem"
|
||||
}
|
||||
|
||||
func cursorTypeForElement(_ element: AXUIElement) -> String? {
|
||||
var current: AXUIElement? = element
|
||||
|
||||
for _ in 0..<5 {
|
||||
guard let element = current else {
|
||||
break
|
||||
}
|
||||
|
||||
let role = stringAttribute(element, kAXRoleAttribute)
|
||||
let subrole = stringAttribute(element, kAXSubroleAttribute)
|
||||
let description = roleDescription(element)?.lowercased()
|
||||
|
||||
if isTextInputRole(role) {
|
||||
return "text"
|
||||
}
|
||||
|
||||
if isPointerRole(role, subrole, description) {
|
||||
return "pointer"
|
||||
}
|
||||
|
||||
current = parentElement(element)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func accessibilityPointForMouse() -> CGPoint {
|
||||
let mouse = NSEvent.mouseLocation
|
||||
let primaryHeight = NSScreen.screens.first?.frame.height ?? NSScreen.main?.frame.height ?? 0
|
||||
return CGPoint(x: mouse.x, y: primaryHeight - mouse.y)
|
||||
}
|
||||
|
||||
func currentCursorType() -> String? {
|
||||
guard AXIsProcessTrusted() else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let point = accessibilityPointForMouse()
|
||||
let systemWide = AXUIElementCreateSystemWide()
|
||||
var element: AXUIElement?
|
||||
let result = AXUIElementCopyElementAtPosition(
|
||||
systemWide,
|
||||
Float(point.x),
|
||||
Float(point.y),
|
||||
&element
|
||||
)
|
||||
|
||||
guard result == .success, let element else {
|
||||
return "arrow"
|
||||
}
|
||||
|
||||
return cursorTypeForElement(element) ?? "arrow"
|
||||
}
|
||||
|
||||
func timestampMs() -> Int {
|
||||
Int(Date().timeIntervalSince1970 * 1000)
|
||||
}
|
||||
|
||||
func leftButtonDown() -> Bool {
|
||||
CGEventSource.buttonState(.hidSystemState, button: .left)
|
||||
}
|
||||
|
||||
func requestAccessibilityTrust() -> Bool {
|
||||
let options = [
|
||||
kAXTrustedCheckOptionPrompt.takeUnretainedValue() as String: true
|
||||
] as CFDictionary
|
||||
return AXIsProcessTrustedWithOptions(options)
|
||||
}
|
||||
|
||||
let request: CursorHelperRequest
|
||||
if CommandLine.arguments.count >= 2,
|
||||
let data = CommandLine.arguments[1].data(using: .utf8),
|
||||
let decoded = try? JSONDecoder().decode(CursorHelperRequest.self, from: data)
|
||||
{
|
||||
request = decoded
|
||||
} else {
|
||||
request = CursorHelperRequest(sampleIntervalMs: nil)
|
||||
}
|
||||
|
||||
let intervalMs = max(8, request.sampleIntervalMs ?? 33)
|
||||
let accessibilityTrusted = requestAccessibilityTrust()
|
||||
let mouseTracker = MouseButtonTracker()
|
||||
let mouseTapReady = mouseTracker.start()
|
||||
emit([
|
||||
"type": "ready",
|
||||
"timestampMs": timestampMs(),
|
||||
"accessibilityTrusted": accessibilityTrusted,
|
||||
"mouseTapReady": mouseTapReady,
|
||||
])
|
||||
|
||||
while true {
|
||||
mouseTracker.pump()
|
||||
let mouseEvents = mouseTracker.consume()
|
||||
emit([
|
||||
"type": "sample",
|
||||
"timestampMs": timestampMs(),
|
||||
"cursorType": currentCursorType(),
|
||||
"leftButtonDown": leftButtonDown(),
|
||||
"leftButtonPressed": mouseEvents.leftDownCount > 0,
|
||||
"leftButtonReleased": mouseEvents.leftUpCount > 0,
|
||||
])
|
||||
Thread.sleep(forTimeInterval: Double(intervalMs) / 1000.0)
|
||||
}
|
||||
@@ -0,0 +1,673 @@
|
||||
import AVFoundation
|
||||
import CoreGraphics
|
||||
import CoreMedia
|
||||
import Foundation
|
||||
import ScreenCaptureKit
|
||||
|
||||
struct Rectangle: Decodable {
|
||||
let x: Double
|
||||
let y: Double
|
||||
let width: Double
|
||||
let height: Double
|
||||
}
|
||||
|
||||
struct RecordingRequest: Decodable {
|
||||
struct Source: Decodable {
|
||||
let type: String
|
||||
let sourceId: String
|
||||
let displayId: UInt32?
|
||||
let windowId: UInt32?
|
||||
let bounds: Rectangle?
|
||||
}
|
||||
|
||||
struct Video: Decodable {
|
||||
let fps: Int
|
||||
let width: Int
|
||||
let height: Int
|
||||
let bitrate: Int?
|
||||
let hideSystemCursor: Bool
|
||||
}
|
||||
|
||||
struct Audio: Decodable {
|
||||
struct SystemAudio: Decodable {
|
||||
let enabled: Bool
|
||||
}
|
||||
|
||||
struct Microphone: Decodable {
|
||||
let enabled: Bool
|
||||
let deviceId: String?
|
||||
let deviceName: String?
|
||||
let gain: Double
|
||||
}
|
||||
|
||||
let system: SystemAudio
|
||||
let microphone: Microphone
|
||||
}
|
||||
|
||||
struct Webcam: Decodable {
|
||||
let enabled: Bool
|
||||
let deviceId: String?
|
||||
let deviceName: String?
|
||||
let width: Int
|
||||
let height: Int
|
||||
let fps: Int
|
||||
}
|
||||
|
||||
struct Cursor: Decodable {
|
||||
let mode: String
|
||||
}
|
||||
|
||||
struct Outputs: Decodable {
|
||||
let screenPath: String
|
||||
let manifestPath: String?
|
||||
}
|
||||
|
||||
let schemaVersion: Int?
|
||||
let recordingId: Int?
|
||||
let source: Source
|
||||
let video: Video
|
||||
let audio: Audio
|
||||
let webcam: Webcam
|
||||
let cursor: Cursor
|
||||
let outputs: Outputs
|
||||
}
|
||||
|
||||
enum HelperError: Error, CustomStringConvertible {
|
||||
case invalidArguments
|
||||
case unsupportedMacOS
|
||||
case unsupportedFeature(String)
|
||||
case sourceNotFound(String)
|
||||
case invalidSourceType(String)
|
||||
case permissionDenied(String)
|
||||
case writerSetupFailed(String)
|
||||
|
||||
var description: String {
|
||||
switch self {
|
||||
case .invalidArguments:
|
||||
return "Expected one JSON recording request argument."
|
||||
case .unsupportedMacOS:
|
||||
return "ScreenCaptureKit recording requires macOS 13 or newer."
|
||||
case .unsupportedFeature(let message):
|
||||
return message
|
||||
case .sourceNotFound(let message):
|
||||
return message
|
||||
case .invalidSourceType(let sourceType):
|
||||
return "Unsupported source type: \(sourceType)."
|
||||
case .permissionDenied(let message):
|
||||
return message
|
||||
case .writerSetupFailed(let message):
|
||||
return message
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func emit(_ fields: [String: Any]) {
|
||||
if let data = try? JSONSerialization.data(withJSONObject: fields, options: []),
|
||||
let line = String(data: data, encoding: .utf8)
|
||||
{
|
||||
print(line)
|
||||
fflush(stdout)
|
||||
}
|
||||
}
|
||||
|
||||
func emitError(code: String, message: String) {
|
||||
emit([
|
||||
"event": "error",
|
||||
"code": code,
|
||||
"message": message,
|
||||
])
|
||||
}
|
||||
|
||||
@available(macOS 13.0, *)
|
||||
final class ScreenCaptureRecorder: NSObject, SCStreamOutput, SCStreamDelegate {
|
||||
private struct CaptureTarget {
|
||||
let filter: SCContentFilter
|
||||
let width: Int
|
||||
let height: Int
|
||||
}
|
||||
|
||||
private let request: RecordingRequest
|
||||
private let sampleQueue = DispatchQueue(label: "app.openscreen.sck-helper.samples")
|
||||
private let stateQueue = DispatchQueue(label: "app.openscreen.sck-helper.state")
|
||||
private var stream: SCStream?
|
||||
private var writer: AVAssetWriter?
|
||||
private var videoInput: AVAssetWriterInput?
|
||||
private var systemAudioInput: AVAssetWriterInput?
|
||||
private var microphoneAudioInput: AVAssetWriterInput?
|
||||
private var didStartWriting = false
|
||||
private var didEmitRecordingStarted = false
|
||||
private var isStopping = false
|
||||
private var isPaused = false
|
||||
private var pauseStartedAt: CMTime?
|
||||
private var totalPausedDuration = CMTime.zero
|
||||
private var nativeMicrophoneEnabled = false
|
||||
private var outputWidth = 1920
|
||||
private var outputHeight = 1080
|
||||
private let microphoneOutputTypeRawValue = 2
|
||||
private let hostClock = CMClockGetHostTimeClock()
|
||||
|
||||
init(request: RecordingRequest) {
|
||||
self.request = request
|
||||
}
|
||||
|
||||
func start() async throws {
|
||||
try ensureRequestedPermissions()
|
||||
|
||||
let content = try await SCShareableContent.excludingDesktopWindows(
|
||||
false,
|
||||
onScreenWindowsOnly: true
|
||||
)
|
||||
let target = try makeCaptureTarget(from: content)
|
||||
outputWidth = target.width
|
||||
outputHeight = target.height
|
||||
let configuration = makeStreamConfiguration()
|
||||
let stream = SCStream(filter: target.filter, configuration: configuration, delegate: self)
|
||||
|
||||
try stream.addStreamOutput(self, type: .screen, sampleHandlerQueue: sampleQueue)
|
||||
if request.audio.system.enabled {
|
||||
try stream.addStreamOutput(self, type: .audio, sampleHandlerQueue: sampleQueue)
|
||||
}
|
||||
if nativeMicrophoneEnabled {
|
||||
guard let microphoneOutputType = SCStreamOutputType(rawValue: microphoneOutputTypeRawValue) else {
|
||||
throw HelperError.unsupportedFeature(
|
||||
"Native microphone capture requires a macOS version with ScreenCaptureKit microphone output."
|
||||
)
|
||||
}
|
||||
try stream.addStreamOutput(self, type: microphoneOutputType, sampleHandlerQueue: sampleQueue)
|
||||
}
|
||||
try setupWriter()
|
||||
|
||||
self.stream = stream
|
||||
emit(["event": "ready", "schemaVersion": 1])
|
||||
try await stream.startCapture()
|
||||
}
|
||||
|
||||
func stop() async {
|
||||
let shouldStop = stateQueue.sync {
|
||||
if isStopping {
|
||||
return false
|
||||
}
|
||||
isStopping = true
|
||||
return true
|
||||
}
|
||||
if !shouldStop {
|
||||
return
|
||||
}
|
||||
|
||||
do {
|
||||
try await stream?.stopCapture()
|
||||
} catch {
|
||||
emit([
|
||||
"event": "warning",
|
||||
"code": "stop-capture-failed",
|
||||
"message": "\(error)",
|
||||
])
|
||||
}
|
||||
|
||||
await finishWriter()
|
||||
}
|
||||
|
||||
func pause() {
|
||||
let didPause = stateQueue.sync {
|
||||
if isStopping || isPaused {
|
||||
return false
|
||||
}
|
||||
|
||||
isPaused = true
|
||||
pauseStartedAt = CMClockGetTime(hostClock)
|
||||
return true
|
||||
}
|
||||
|
||||
if didPause {
|
||||
emit([
|
||||
"event": "recording-paused",
|
||||
"timestampMs": Int(Date().timeIntervalSince1970 * 1000),
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
func resume() {
|
||||
let didResume = stateQueue.sync {
|
||||
if isStopping || !isPaused {
|
||||
return false
|
||||
}
|
||||
|
||||
if let pauseStartedAt {
|
||||
let now = CMClockGetTime(hostClock)
|
||||
totalPausedDuration = CMTimeAdd(
|
||||
totalPausedDuration,
|
||||
CMTimeSubtract(now, pauseStartedAt)
|
||||
)
|
||||
}
|
||||
isPaused = false
|
||||
pauseStartedAt = nil
|
||||
return true
|
||||
}
|
||||
|
||||
if didResume {
|
||||
emit([
|
||||
"event": "recording-resumed",
|
||||
"timestampMs": Int(Date().timeIntervalSince1970 * 1000),
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
func stream(_ stream: SCStream, didStopWithError error: Error) {
|
||||
emitError(code: "capture-stopped-with-error", message: "\(error)")
|
||||
Task {
|
||||
await stop()
|
||||
}
|
||||
}
|
||||
|
||||
func stream(_ stream: SCStream, didOutputSampleBuffer sampleBuffer: CMSampleBuffer, of type: SCStreamOutputType) {
|
||||
guard CMSampleBufferDataIsReady(sampleBuffer) else {
|
||||
return
|
||||
}
|
||||
let pauseState = currentPauseState()
|
||||
if pauseState.paused {
|
||||
return
|
||||
}
|
||||
guard let sampleBuffer = retimedSampleBuffer(sampleBuffer, subtracting: pauseState.offset) else {
|
||||
return
|
||||
}
|
||||
|
||||
if type == .audio {
|
||||
appendAudioSampleBuffer(sampleBuffer, to: systemAudioInput)
|
||||
return
|
||||
}
|
||||
|
||||
if type.rawValue == microphoneOutputTypeRawValue {
|
||||
appendAudioSampleBuffer(sampleBuffer, to: microphoneAudioInput)
|
||||
return
|
||||
}
|
||||
|
||||
guard type == .screen else {
|
||||
return
|
||||
}
|
||||
guard isCompleteFrame(sampleBuffer) else {
|
||||
return
|
||||
}
|
||||
guard let videoInput, let writer else {
|
||||
return
|
||||
}
|
||||
let presentationTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
|
||||
if !didStartWriting {
|
||||
writer.startWriting()
|
||||
writer.startSession(atSourceTime: presentationTime)
|
||||
didStartWriting = true
|
||||
}
|
||||
|
||||
if videoInput.isReadyForMoreMediaData {
|
||||
if videoInput.append(sampleBuffer), !didEmitRecordingStarted {
|
||||
didEmitRecordingStarted = true
|
||||
emit([
|
||||
"event": "recording-started",
|
||||
"timestampMs": Int(Date().timeIntervalSince1970 * 1000),
|
||||
"width": outputWidth,
|
||||
"height": outputHeight,
|
||||
])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func ensureRequestedPermissions() throws {
|
||||
if !CGPreflightScreenCaptureAccess() {
|
||||
let granted = CGRequestScreenCaptureAccess()
|
||||
if !granted {
|
||||
throw HelperError.permissionDenied("Screen recording permission is required for ScreenCaptureKit capture.")
|
||||
}
|
||||
}
|
||||
|
||||
if request.audio.microphone.enabled {
|
||||
switch AVCaptureDevice.authorizationStatus(for: .audio) {
|
||||
case .authorized:
|
||||
break
|
||||
case .notDetermined:
|
||||
let semaphore = DispatchSemaphore(value: 0)
|
||||
AVCaptureDevice.requestAccess(for: .audio) { _ in
|
||||
semaphore.signal()
|
||||
}
|
||||
let waitResult = semaphore.wait(timeout: .now() + 30)
|
||||
if waitResult == .timedOut || AVCaptureDevice.authorizationStatus(for: .audio) != .authorized {
|
||||
throw HelperError.permissionDenied("Microphone permission is required for native microphone capture.")
|
||||
}
|
||||
default:
|
||||
throw HelperError.permissionDenied("Microphone permission is required for native microphone capture.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func makeCaptureTarget(from content: SCShareableContent) throws -> CaptureTarget {
|
||||
switch request.source.type {
|
||||
case "display":
|
||||
guard let displayId = request.source.displayId else {
|
||||
throw HelperError.sourceNotFound("Display capture requires source.displayId.")
|
||||
}
|
||||
guard let display = content.displays.first(where: { $0.displayID == displayId }) else {
|
||||
throw HelperError.sourceNotFound("No ScreenCaptureKit display found for id \(displayId).")
|
||||
}
|
||||
let width = Int(CGDisplayPixelsWide(display.displayID))
|
||||
let height = Int(CGDisplayPixelsHigh(display.displayID))
|
||||
return CaptureTarget(
|
||||
filter: SCContentFilter(display: display, excludingWindows: []),
|
||||
width: clampCaptureDimension(width, fallback: request.video.width),
|
||||
height: clampCaptureDimension(height, fallback: request.video.height)
|
||||
)
|
||||
case "window":
|
||||
guard let windowId = request.source.windowId else {
|
||||
throw HelperError.sourceNotFound("Window capture requires source.windowId.")
|
||||
}
|
||||
guard let window = content.windows.first(where: { $0.windowID == windowId }) else {
|
||||
throw HelperError.sourceNotFound("No ScreenCaptureKit window found for id \(windowId).")
|
||||
}
|
||||
let candidateDisplay = content.displays.first {
|
||||
$0.frame.intersects(window.frame) || $0.frame.contains(CGPoint(x: window.frame.midX, y: window.frame.midY))
|
||||
}
|
||||
let scaleFactor = Self.scaleFactor(for: candidateDisplay?.displayID ?? CGMainDisplayID())
|
||||
let width = Int(window.frame.width) * scaleFactor
|
||||
let height = Int(window.frame.height) * scaleFactor
|
||||
return CaptureTarget(
|
||||
filter: SCContentFilter(desktopIndependentWindow: window),
|
||||
width: clampCaptureDimension(width, fallback: request.video.width),
|
||||
height: clampCaptureDimension(height, fallback: request.video.height)
|
||||
)
|
||||
default:
|
||||
throw HelperError.invalidSourceType(request.source.type)
|
||||
}
|
||||
}
|
||||
|
||||
private func makeStreamConfiguration() -> SCStreamConfiguration {
|
||||
let configuration = SCStreamConfiguration()
|
||||
configuration.width = outputWidth
|
||||
configuration.height = outputHeight
|
||||
configuration.minimumFrameInterval = CMTime(value: 1, timescale: CMTimeScale(max(1, request.video.fps)))
|
||||
configuration.queueDepth = 6
|
||||
configuration.showsCursor = !request.video.hideSystemCursor
|
||||
configuration.pixelFormat = kCVPixelFormatType_32BGRA
|
||||
configuration.sampleRate = 48_000
|
||||
configuration.channelCount = 2
|
||||
configuration.excludesCurrentProcessAudio = true
|
||||
configuration.capturesAudio = request.audio.system.enabled
|
||||
|
||||
if request.audio.microphone.enabled {
|
||||
guard supportsNativeMicrophoneCapture(streamConfig: configuration) else {
|
||||
nativeMicrophoneEnabled = false
|
||||
emit([
|
||||
"event": "warning",
|
||||
"code": "microphone-unavailable",
|
||||
"message": "Native microphone capture requires ScreenCaptureKit microphone support on this macOS version.",
|
||||
])
|
||||
return configuration
|
||||
}
|
||||
nativeMicrophoneEnabled = true
|
||||
configuration.capturesAudio = true
|
||||
configuration.setValue(true, forKey: "captureMicrophone")
|
||||
if let deviceId = resolveMicrophoneCaptureDeviceID() {
|
||||
configuration.setValue(deviceId, forKey: "microphoneCaptureDeviceID")
|
||||
}
|
||||
} else {
|
||||
nativeMicrophoneEnabled = false
|
||||
}
|
||||
|
||||
return configuration
|
||||
}
|
||||
|
||||
private func setupWriter() throws {
|
||||
let outputUrl = URL(fileURLWithPath: request.outputs.screenPath)
|
||||
try? FileManager.default.removeItem(at: outputUrl)
|
||||
try FileManager.default.createDirectory(
|
||||
at: outputUrl.deletingLastPathComponent(),
|
||||
withIntermediateDirectories: true
|
||||
)
|
||||
|
||||
let writer = try AVAssetWriter(outputURL: outputUrl, fileType: .mp4)
|
||||
let settings: [String: Any] = [
|
||||
AVVideoCodecKey: AVVideoCodecType.h264,
|
||||
AVVideoWidthKey: outputWidth,
|
||||
AVVideoHeightKey: outputHeight,
|
||||
AVVideoCompressionPropertiesKey: [
|
||||
AVVideoAverageBitRateKey: request.video.bitrate ?? 18_000_000,
|
||||
AVVideoExpectedSourceFrameRateKey: request.video.fps,
|
||||
],
|
||||
]
|
||||
let input = AVAssetWriterInput(mediaType: .video, outputSettings: settings)
|
||||
input.expectsMediaDataInRealTime = true
|
||||
|
||||
guard writer.canAdd(input) else {
|
||||
throw HelperError.writerSetupFailed("Unable to add H.264 video input to AVAssetWriter.")
|
||||
}
|
||||
|
||||
writer.add(input)
|
||||
self.writer = writer
|
||||
self.videoInput = input
|
||||
|
||||
if request.audio.system.enabled {
|
||||
systemAudioInput = try addAudioInput(to: writer, bitRate: 192_000)
|
||||
}
|
||||
if nativeMicrophoneEnabled {
|
||||
microphoneAudioInput = try addAudioInput(to: writer, bitRate: 128_000)
|
||||
}
|
||||
}
|
||||
|
||||
private func finishWriter() async {
|
||||
guard let writer else {
|
||||
return
|
||||
}
|
||||
|
||||
videoInput?.markAsFinished()
|
||||
systemAudioInput?.markAsFinished()
|
||||
microphoneAudioInput?.markAsFinished()
|
||||
|
||||
await withCheckedContinuation { continuation in
|
||||
writer.finishWriting {
|
||||
continuation.resume()
|
||||
}
|
||||
}
|
||||
|
||||
if writer.status == .completed {
|
||||
emit([
|
||||
"event": "recording-stopped",
|
||||
"screenPath": request.outputs.screenPath,
|
||||
])
|
||||
} else {
|
||||
emitError(
|
||||
code: "writer-failed",
|
||||
message: writer.error.map { "\($0)" } ?? "AVAssetWriter failed with status \(writer.status.rawValue)."
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private func addAudioInput(to writer: AVAssetWriter, bitRate: Int) throws -> AVAssetWriterInput {
|
||||
let settings: [String: Any] = [
|
||||
AVFormatIDKey: kAudioFormatMPEG4AAC,
|
||||
AVSampleRateKey: 48_000,
|
||||
AVNumberOfChannelsKey: 2,
|
||||
AVEncoderBitRateKey: bitRate,
|
||||
]
|
||||
let input = AVAssetWriterInput(mediaType: .audio, outputSettings: settings)
|
||||
input.expectsMediaDataInRealTime = true
|
||||
|
||||
guard writer.canAdd(input) else {
|
||||
throw HelperError.writerSetupFailed("Unable to add AAC audio input to AVAssetWriter.")
|
||||
}
|
||||
|
||||
writer.add(input)
|
||||
return input
|
||||
}
|
||||
|
||||
private func appendAudioSampleBuffer(_ sampleBuffer: CMSampleBuffer, to input: AVAssetWriterInput?) {
|
||||
guard didStartWriting else {
|
||||
return
|
||||
}
|
||||
guard let input, input.isReadyForMoreMediaData else {
|
||||
return
|
||||
}
|
||||
|
||||
input.append(sampleBuffer)
|
||||
}
|
||||
|
||||
private func currentPauseState() -> (paused: Bool, offset: CMTime) {
|
||||
stateQueue.sync {
|
||||
(isPaused, totalPausedDuration)
|
||||
}
|
||||
}
|
||||
|
||||
private func retimedSampleBuffer(_ sampleBuffer: CMSampleBuffer, subtracting offset: CMTime) -> CMSampleBuffer? {
|
||||
if !offset.isValid || offset == .zero {
|
||||
return sampleBuffer
|
||||
}
|
||||
|
||||
let sampleCount = CMSampleBufferGetNumSamples(sampleBuffer)
|
||||
if sampleCount <= 0 {
|
||||
return sampleBuffer
|
||||
}
|
||||
|
||||
var timing = Array(repeating: CMSampleTimingInfo(), count: sampleCount)
|
||||
let timingStatus = CMSampleBufferGetSampleTimingInfoArray(
|
||||
sampleBuffer,
|
||||
entryCount: sampleCount,
|
||||
arrayToFill: &timing,
|
||||
entriesNeededOut: nil
|
||||
)
|
||||
if timingStatus != noErr {
|
||||
emit([
|
||||
"event": "warning",
|
||||
"code": "sample-retime-failed",
|
||||
"message": "Unable to read sample timing info: \(timingStatus).",
|
||||
])
|
||||
return sampleBuffer
|
||||
}
|
||||
|
||||
for index in timing.indices {
|
||||
if timing[index].presentationTimeStamp.isValid {
|
||||
timing[index].presentationTimeStamp = CMTimeSubtract(
|
||||
timing[index].presentationTimeStamp,
|
||||
offset
|
||||
)
|
||||
}
|
||||
if timing[index].decodeTimeStamp.isValid {
|
||||
timing[index].decodeTimeStamp = CMTimeSubtract(timing[index].decodeTimeStamp, offset)
|
||||
}
|
||||
}
|
||||
|
||||
var retimedBuffer: CMSampleBuffer?
|
||||
let copyStatus = CMSampleBufferCreateCopyWithNewTiming(
|
||||
allocator: kCFAllocatorDefault,
|
||||
sampleBuffer: sampleBuffer,
|
||||
sampleTimingEntryCount: sampleCount,
|
||||
sampleTimingArray: &timing,
|
||||
sampleBufferOut: &retimedBuffer
|
||||
)
|
||||
if copyStatus != noErr {
|
||||
emit([
|
||||
"event": "warning",
|
||||
"code": "sample-retime-failed",
|
||||
"message": "Unable to copy sample timing info: \(copyStatus).",
|
||||
])
|
||||
return sampleBuffer
|
||||
}
|
||||
|
||||
return retimedBuffer
|
||||
}
|
||||
|
||||
private func isCompleteFrame(_ sampleBuffer: CMSampleBuffer) -> Bool {
|
||||
guard let attachments = CMSampleBufferGetSampleAttachmentsArray(
|
||||
sampleBuffer,
|
||||
createIfNecessary: false
|
||||
) as? [[SCStreamFrameInfo: Any]],
|
||||
let attachment = attachments.first,
|
||||
let statusRawValue = attachment[SCStreamFrameInfo.status] as? Int,
|
||||
let status = SCFrameStatus(rawValue: statusRawValue)
|
||||
else {
|
||||
return true
|
||||
}
|
||||
|
||||
return status == .complete
|
||||
}
|
||||
|
||||
private func clampCaptureDimension(_ value: Int, fallback: Int) -> Int {
|
||||
let requested = max(2, fallback)
|
||||
let candidate = value > 0 ? value : requested
|
||||
let clamped = min(candidate, requested)
|
||||
return max(2, clamped - (clamped % 2))
|
||||
}
|
||||
|
||||
private static func scaleFactor(for displayId: CGDirectDisplayID) -> Int {
|
||||
guard let mode = CGDisplayCopyDisplayMode(displayId) else {
|
||||
return 1
|
||||
}
|
||||
|
||||
return max(1, mode.pixelWidth / max(1, mode.width))
|
||||
}
|
||||
|
||||
private func supportsNativeMicrophoneCapture(streamConfig: SCStreamConfiguration) -> Bool {
|
||||
streamConfig.responds(to: Selector(("setCaptureMicrophone:"))) &&
|
||||
streamConfig.responds(to: Selector(("setMicrophoneCaptureDeviceID:"))) &&
|
||||
SCStreamOutputType(rawValue: microphoneOutputTypeRawValue) != nil
|
||||
}
|
||||
|
||||
private func resolveMicrophoneCaptureDeviceID() -> String? {
|
||||
let devices = AVCaptureDevice.devices(for: .audio)
|
||||
|
||||
if let deviceName = request.audio.microphone.deviceName?.trimmingCharacters(in: .whitespacesAndNewlines),
|
||||
!deviceName.isEmpty,
|
||||
let device = devices.first(where: { $0.localizedName == deviceName })
|
||||
{
|
||||
return device.uniqueID
|
||||
}
|
||||
|
||||
if let deviceId = request.audio.microphone.deviceId?.trimmingCharacters(in: .whitespacesAndNewlines),
|
||||
!deviceId.isEmpty,
|
||||
devices.contains(where: { $0.uniqueID == deviceId })
|
||||
{
|
||||
return deviceId
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
@main
|
||||
struct OpenScreenScreenCaptureKitHelper {
|
||||
static func main() async {
|
||||
do {
|
||||
guard CommandLine.arguments.count == 2 else {
|
||||
throw HelperError.invalidArguments
|
||||
}
|
||||
|
||||
guard #available(macOS 13.0, *) else {
|
||||
throw HelperError.unsupportedMacOS
|
||||
}
|
||||
|
||||
let requestData = Data(CommandLine.arguments[1].utf8)
|
||||
let decoder = JSONDecoder()
|
||||
let request = try decoder.decode(RecordingRequest.self, from: requestData)
|
||||
let recorder = ScreenCaptureRecorder(request: request)
|
||||
let stopTask = Task.detached {
|
||||
while let line = readLine() {
|
||||
let command = line.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
switch command {
|
||||
case "pause":
|
||||
recorder.pause()
|
||||
case "resume":
|
||||
recorder.resume()
|
||||
case "stop":
|
||||
await recorder.stop()
|
||||
exit(0)
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try await recorder.start()
|
||||
await stopTask.value
|
||||
} catch let error as HelperError {
|
||||
emitError(code: "helper-error", message: error.description)
|
||||
exit(1)
|
||||
} catch {
|
||||
emitError(code: "helper-error", message: "\(error)")
|
||||
exit(1)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
# The local Windows SDK image used by some contributors can miss gdi32.lib,
|
||||
# while CMake's default MSVC console template links it unconditionally. This
|
||||
# helper does not use GDI, so keep the standard library set minimal and explicit.
|
||||
set(CMAKE_CXX_STANDARD_LIBRARIES
|
||||
"kernel32.lib user32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib"
|
||||
CACHE STRING "" FORCE)
|
||||
|
||||
project(openscreen-wgc-capture LANGUAGES CXX)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
add_executable(wgc-capture
|
||||
src/audio_sample_utils.cpp
|
||||
src/audio_sample_utils.h
|
||||
src/dshow_webcam_capture.cpp
|
||||
src/dshow_webcam_capture.h
|
||||
src/main.cpp
|
||||
src/mf_encoder.cpp
|
||||
src/mf_encoder.h
|
||||
src/monitor_utils.cpp
|
||||
src/monitor_utils.h
|
||||
src/wasapi_loopback_capture.cpp
|
||||
src/wasapi_loopback_capture.h
|
||||
src/webcam_capture.cpp
|
||||
src/webcam_capture.h
|
||||
src/wgc_session.cpp
|
||||
src/wgc_session.h
|
||||
)
|
||||
|
||||
target_compile_definitions(wgc-capture PRIVATE
|
||||
NOMINMAX
|
||||
WIN32_LEAN_AND_MEAN
|
||||
_WIN32_WINNT=0x0A00
|
||||
)
|
||||
|
||||
target_compile_options(wgc-capture PRIVATE /EHsc /W4 /utf-8)
|
||||
|
||||
target_link_libraries(wgc-capture PRIVATE
|
||||
d3d11
|
||||
dxgi
|
||||
mf
|
||||
mfplat
|
||||
mfreadwrite
|
||||
mfuuid
|
||||
runtimeobject
|
||||
windowsapp
|
||||
)
|
||||
|
||||
add_executable(cursor-sampler
|
||||
src/cursor-sampler.cpp
|
||||
)
|
||||
|
||||
target_compile_definitions(cursor-sampler PRIVATE
|
||||
NOMINMAX
|
||||
_WIN32_WINNT=0x0A00
|
||||
)
|
||||
|
||||
target_compile_options(cursor-sampler PRIVATE /EHsc /W4 /utf-8)
|
||||
|
||||
target_link_libraries(cursor-sampler PRIVATE
|
||||
gdi32
|
||||
gdiplus
|
||||
)
|
||||
|
||||
add_executable(guide-hotkey-listener
|
||||
src/guide-hotkey-listener.cpp
|
||||
)
|
||||
|
||||
target_compile_definitions(guide-hotkey-listener PRIVATE
|
||||
NOMINMAX
|
||||
WIN32_LEAN_AND_MEAN
|
||||
_WIN32_WINNT=0x0A00
|
||||
)
|
||||
|
||||
target_compile_options(guide-hotkey-listener PRIVATE /EHsc /W4 /utf-8)
|
||||
|
||||
target_link_libraries(guide-hotkey-listener PRIVATE
|
||||
user32
|
||||
)
|
||||
|
||||
add_executable(openscreen-ocr-service-wrapper
|
||||
src/ocr-service-wrapper.cpp
|
||||
)
|
||||
|
||||
target_compile_definitions(openscreen-ocr-service-wrapper PRIVATE
|
||||
NOMINMAX
|
||||
WIN32_LEAN_AND_MEAN
|
||||
UNICODE
|
||||
_UNICODE
|
||||
_WIN32_WINNT=0x0A00
|
||||
)
|
||||
|
||||
target_compile_options(openscreen-ocr-service-wrapper PRIVATE /EHsc /W4 /utf-8)
|
||||
|
||||
target_link_libraries(openscreen-ocr-service-wrapper PRIVATE
|
||||
advapi32
|
||||
)
|
||||
@@ -0,0 +1,439 @@
|
||||
#include "audio_sample_utils.h"
|
||||
|
||||
#include <mfapi.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
|
||||
namespace {
|
||||
|
||||
bool isFloatFormat(const AudioInputFormat& format) {
|
||||
return format.subtype == MFAudioFormat_Float && format.bitsPerSample == 32;
|
||||
}
|
||||
|
||||
bool isPcmFormat(const AudioInputFormat& format, UINT32 bitsPerSample) {
|
||||
return format.subtype == MFAudioFormat_PCM && format.bitsPerSample == bitsPerSample;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T clampTo(double value) {
|
||||
const double minValue = static_cast<double>(std::numeric_limits<T>::min());
|
||||
const double maxValue = static_cast<double>(std::numeric_limits<T>::max());
|
||||
return static_cast<T>(std::clamp(std::round(value), minValue, maxValue));
|
||||
}
|
||||
|
||||
size_t bytesPerSample(const AudioInputFormat& format) {
|
||||
return format.bitsPerSample / 8;
|
||||
}
|
||||
|
||||
double readSampleAsDouble(const BYTE* source, const AudioInputFormat& format, size_t frameIndex, UINT32 channelIndex) {
|
||||
if (!source || format.blockAlign == 0 || channelIndex >= format.channels) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
const size_t offset = frameIndex * format.blockAlign + channelIndex * bytesPerSample(format);
|
||||
if (isFloatFormat(format)) {
|
||||
return static_cast<double>(*reinterpret_cast<const float*>(source + offset));
|
||||
}
|
||||
if (isPcmFormat(format, 16)) {
|
||||
return static_cast<double>(*reinterpret_cast<const int16_t*>(source + offset)) / 32768.0;
|
||||
}
|
||||
if (isPcmFormat(format, 32)) {
|
||||
return static_cast<double>(*reinterpret_cast<const int32_t*>(source + offset)) / 2147483648.0;
|
||||
}
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
void writeSampleFromDouble(BYTE* destination, const AudioInputFormat& format, size_t frameIndex, UINT32 channelIndex, double value) {
|
||||
if (!destination || format.blockAlign == 0 || channelIndex >= format.channels) {
|
||||
return;
|
||||
}
|
||||
|
||||
const double clamped = std::clamp(value, -1.0, 1.0);
|
||||
const size_t offset = frameIndex * format.blockAlign + channelIndex * bytesPerSample(format);
|
||||
if (isFloatFormat(format)) {
|
||||
*reinterpret_cast<float*>(destination + offset) = static_cast<float>(clamped);
|
||||
return;
|
||||
}
|
||||
if (isPcmFormat(format, 16)) {
|
||||
*reinterpret_cast<int16_t*>(destination + offset) = clampTo<int16_t>(clamped * 32767.0);
|
||||
return;
|
||||
}
|
||||
if (isPcmFormat(format, 32)) {
|
||||
*reinterpret_cast<int32_t*>(destination + offset) = clampTo<int32_t>(clamped * 2147483647.0);
|
||||
}
|
||||
}
|
||||
|
||||
double readMappedChannel(const BYTE* source, const AudioInputFormat& format, size_t frameIndex, UINT32 targetChannel, UINT32 targetChannels) {
|
||||
if (format.channels == 0) {
|
||||
return 0.0;
|
||||
}
|
||||
if (format.channels == targetChannels && targetChannel < format.channels) {
|
||||
return readSampleAsDouble(source, format, frameIndex, targetChannel);
|
||||
}
|
||||
if (format.channels == 1) {
|
||||
return readSampleAsDouble(source, format, frameIndex, 0);
|
||||
}
|
||||
if (targetChannels == 1) {
|
||||
double sum = 0.0;
|
||||
for (UINT32 channel = 0; channel < format.channels; ++channel) {
|
||||
sum += readSampleAsDouble(source, format, frameIndex, channel);
|
||||
}
|
||||
return sum / static_cast<double>(format.channels);
|
||||
}
|
||||
return readSampleAsDouble(source, format, frameIndex, std::min(targetChannel, format.channels - 1));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
constexpr int64_t HnsPerSecond = 10'000'000;
|
||||
|
||||
bool sameAudioFormatForMixing(const AudioInputFormat& left, const AudioInputFormat& right) {
|
||||
return left.subtype == right.subtype &&
|
||||
left.sampleRate == right.sampleRate &&
|
||||
left.channels == right.channels &&
|
||||
left.bitsPerSample == right.bitsPerSample &&
|
||||
left.blockAlign == right.blockAlign &&
|
||||
left.avgBytesPerSec == right.avgBytesPerSec;
|
||||
}
|
||||
|
||||
AudioInputFormat makeAacCompatibleAudioFormat(const AudioInputFormat& source) {
|
||||
AudioInputFormat format{};
|
||||
format.subtype = MFAudioFormat_PCM;
|
||||
format.sampleRate = source.sampleRate > 0 ? source.sampleRate : 48000;
|
||||
format.channels = 2;
|
||||
format.bitsPerSample = 16;
|
||||
format.blockAlign = format.channels * (format.bitsPerSample / 8);
|
||||
format.avgBytesPerSec = format.sampleRate * format.blockAlign;
|
||||
return format;
|
||||
}
|
||||
|
||||
void copyAudioWithGain(
|
||||
const BYTE* source,
|
||||
DWORD byteCount,
|
||||
const AudioInputFormat& format,
|
||||
double gain,
|
||||
std::vector<BYTE>& destination) {
|
||||
destination.resize(byteCount);
|
||||
if (!source || byteCount == 0) {
|
||||
std::fill(destination.begin(), destination.end(), static_cast<BYTE>(0));
|
||||
return;
|
||||
}
|
||||
|
||||
if (std::abs(gain - 1.0) < 0.0001) {
|
||||
std::memcpy(destination.data(), source, byteCount);
|
||||
return;
|
||||
}
|
||||
|
||||
if (isFloatFormat(format)) {
|
||||
const auto* input = reinterpret_cast<const float*>(source);
|
||||
auto* output = reinterpret_cast<float*>(destination.data());
|
||||
const size_t sampleCount = byteCount / sizeof(float);
|
||||
for (size_t index = 0; index < sampleCount; index += 1) {
|
||||
output[index] = static_cast<float>(std::clamp(input[index] * gain, -1.0, 1.0));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (isPcmFormat(format, 16)) {
|
||||
const auto* input = reinterpret_cast<const int16_t*>(source);
|
||||
auto* output = reinterpret_cast<int16_t*>(destination.data());
|
||||
const size_t sampleCount = byteCount / sizeof(int16_t);
|
||||
for (size_t index = 0; index < sampleCount; index += 1) {
|
||||
output[index] = clampTo<int16_t>(static_cast<double>(input[index]) * gain);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (isPcmFormat(format, 32)) {
|
||||
const auto* input = reinterpret_cast<const int32_t*>(source);
|
||||
auto* output = reinterpret_cast<int32_t*>(destination.data());
|
||||
const size_t sampleCount = byteCount / sizeof(int32_t);
|
||||
for (size_t index = 0; index < sampleCount; index += 1) {
|
||||
output[index] = clampTo<int32_t>(static_cast<double>(input[index]) * gain);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
std::memcpy(destination.data(), source, byteCount);
|
||||
}
|
||||
|
||||
void convertAudioWithGain(
|
||||
const BYTE* source,
|
||||
DWORD byteCount,
|
||||
const AudioInputFormat& sourceFormat,
|
||||
const AudioInputFormat& targetFormat,
|
||||
double gain,
|
||||
std::vector<BYTE>& destination) {
|
||||
if (!source || byteCount == 0 || sourceFormat.blockAlign == 0 || targetFormat.blockAlign == 0 ||
|
||||
sourceFormat.sampleRate == 0 || targetFormat.sampleRate == 0 || sourceFormat.channels == 0 ||
|
||||
targetFormat.channels == 0) {
|
||||
destination.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
if (sameAudioFormatForMixing(sourceFormat, targetFormat)) {
|
||||
copyAudioWithGain(source, byteCount, targetFormat, gain, destination);
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t sourceFrames = byteCount / sourceFormat.blockAlign;
|
||||
if (sourceFrames == 0) {
|
||||
destination.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
const double rateRatio = static_cast<double>(targetFormat.sampleRate) /
|
||||
static_cast<double>(sourceFormat.sampleRate);
|
||||
const size_t targetFrames = std::max<size_t>(1, static_cast<size_t>(std::llround(sourceFrames * rateRatio)));
|
||||
destination.assign(targetFrames * targetFormat.blockAlign, 0);
|
||||
|
||||
for (size_t targetFrame = 0; targetFrame < targetFrames; ++targetFrame) {
|
||||
const double sourcePosition = static_cast<double>(targetFrame) / rateRatio;
|
||||
const size_t sourceFrame = std::min(
|
||||
sourceFrames - 1,
|
||||
static_cast<size_t>(std::llround(sourcePosition)));
|
||||
for (UINT32 channel = 0; channel < targetFormat.channels; ++channel) {
|
||||
const double sample = readMappedChannel(
|
||||
source,
|
||||
sourceFormat,
|
||||
sourceFrame,
|
||||
channel,
|
||||
targetFormat.channels);
|
||||
writeSampleFromDouble(destination.data(), targetFormat, targetFrame, channel, sample * gain);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void mixAudioInPlace(
|
||||
std::vector<BYTE>& destination,
|
||||
const BYTE* source,
|
||||
DWORD byteCount,
|
||||
const AudioInputFormat& format) {
|
||||
if (!source || byteCount == 0 || destination.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t mixByteCount = std::min(destination.size(), static_cast<size_t>(byteCount));
|
||||
|
||||
if (isFloatFormat(format)) {
|
||||
auto* output = reinterpret_cast<float*>(destination.data());
|
||||
const auto* input = reinterpret_cast<const float*>(source);
|
||||
const size_t sampleCount = mixByteCount / sizeof(float);
|
||||
for (size_t index = 0; index < sampleCount; index += 1) {
|
||||
output[index] = static_cast<float>(std::clamp(output[index] + input[index], -1.0f, 1.0f));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (isPcmFormat(format, 16)) {
|
||||
auto* output = reinterpret_cast<int16_t*>(destination.data());
|
||||
const auto* input = reinterpret_cast<const int16_t*>(source);
|
||||
const size_t sampleCount = mixByteCount / sizeof(int16_t);
|
||||
for (size_t index = 0; index < sampleCount; index += 1) {
|
||||
output[index] = clampTo<int16_t>(
|
||||
static_cast<double>(output[index]) + static_cast<double>(input[index]));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (isPcmFormat(format, 32)) {
|
||||
auto* output = reinterpret_cast<int32_t*>(destination.data());
|
||||
const auto* input = reinterpret_cast<const int32_t*>(source);
|
||||
const size_t sampleCount = mixByteCount / sizeof(int32_t);
|
||||
for (size_t index = 0; index < sampleCount; index += 1) {
|
||||
output[index] = clampTo<int32_t>(
|
||||
static_cast<double>(output[index]) + static_cast<double>(input[index]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AudioMixer::AudioMixer(
|
||||
const AudioInputFormat& format,
|
||||
const AudioInputFormat& systemFormat,
|
||||
const AudioInputFormat& microphoneFormat,
|
||||
bool includeSystem,
|
||||
bool includeMicrophone,
|
||||
double microphoneGain,
|
||||
OutputCallback output)
|
||||
: format_(format),
|
||||
systemFormat_(systemFormat),
|
||||
microphoneFormat_(microphoneFormat),
|
||||
includeSystem_(includeSystem),
|
||||
includeMicrophone_(includeMicrophone),
|
||||
microphoneGain_(microphoneGain),
|
||||
output_(std::move(output)) {}
|
||||
|
||||
AudioMixer::~AudioMixer() {
|
||||
stop();
|
||||
}
|
||||
|
||||
bool AudioMixer::start() {
|
||||
if (!output_ || format_.sampleRate == 0 || format_.blockAlign == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
stopRequested_ = false;
|
||||
emittedFrames_ = 0;
|
||||
timelineStarted_ = false;
|
||||
paused_ = false;
|
||||
thread_ = std::thread([this] {
|
||||
mixLoop();
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
void AudioMixer::beginTimeline() {
|
||||
{
|
||||
std::scoped_lock lock(mutex_);
|
||||
systemQueue_.clear();
|
||||
microphoneQueue_.clear();
|
||||
emittedFrames_ = 0;
|
||||
timelineStarted_ = true;
|
||||
}
|
||||
cv_.notify_all();
|
||||
}
|
||||
|
||||
void AudioMixer::setPaused(bool paused) {
|
||||
{
|
||||
std::scoped_lock lock(mutex_);
|
||||
paused_ = paused;
|
||||
if (paused_) {
|
||||
systemQueue_.clear();
|
||||
microphoneQueue_.clear();
|
||||
}
|
||||
}
|
||||
cv_.notify_all();
|
||||
}
|
||||
|
||||
void AudioMixer::stop() {
|
||||
stopRequested_ = true;
|
||||
cv_.notify_all();
|
||||
if (thread_.joinable()) {
|
||||
thread_.join();
|
||||
}
|
||||
}
|
||||
|
||||
void AudioMixer::pushSystem(const BYTE* data, DWORD byteCount) {
|
||||
if (!includeSystem_ || stopRequested_) {
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
std::scoped_lock lock(mutex_);
|
||||
if (paused_) {
|
||||
return;
|
||||
}
|
||||
append(systemQueue_, data, byteCount, systemFormat_, 1.0);
|
||||
}
|
||||
cv_.notify_all();
|
||||
}
|
||||
|
||||
void AudioMixer::pushMicrophone(const BYTE* data, DWORD byteCount) {
|
||||
if (!includeMicrophone_ || stopRequested_) {
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
std::scoped_lock lock(mutex_);
|
||||
if (paused_) {
|
||||
return;
|
||||
}
|
||||
append(microphoneQueue_, data, byteCount, microphoneFormat_, microphoneGain_);
|
||||
}
|
||||
cv_.notify_all();
|
||||
}
|
||||
|
||||
void AudioMixer::append(
|
||||
std::vector<BYTE>& queue,
|
||||
const BYTE* data,
|
||||
DWORD byteCount,
|
||||
const AudioInputFormat& sourceFormat,
|
||||
double gain) {
|
||||
if (!data || byteCount == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
convertAudioWithGain(data, byteCount, sourceFormat, format_, gain, gainBuffer_);
|
||||
queue.insert(queue.end(), gainBuffer_.begin(), gainBuffer_.end());
|
||||
}
|
||||
|
||||
bool AudioMixer::pop(std::vector<BYTE>& queue, std::vector<BYTE>& chunk, size_t byteCount) {
|
||||
if (queue.empty()) {
|
||||
chunk.assign(byteCount, 0);
|
||||
return false;
|
||||
}
|
||||
|
||||
chunk.assign(byteCount, 0);
|
||||
const size_t copiedBytes = std::min(byteCount, queue.size());
|
||||
std::memcpy(chunk.data(), queue.data(), copiedBytes);
|
||||
queue.erase(queue.begin(), queue.begin() + static_cast<std::ptrdiff_t>(copiedBytes));
|
||||
return copiedBytes > 0;
|
||||
}
|
||||
|
||||
void AudioMixer::mixLoop() {
|
||||
const uint32_t chunkFrames = std::max<uint32_t>(1, format_.sampleRate / 100);
|
||||
const size_t chunkBytes = static_cast<size_t>(chunkFrames) * format_.blockAlign;
|
||||
std::vector<BYTE> mixedChunk;
|
||||
std::vector<BYTE> sourceChunk;
|
||||
std::chrono::steady_clock::time_point audioClockStart;
|
||||
bool audioClockStarted = false;
|
||||
|
||||
while (true) {
|
||||
{
|
||||
std::unique_lock lock(mutex_);
|
||||
cv_.wait_for(lock, std::chrono::milliseconds(20), [&] {
|
||||
const bool hasSystem = !includeSystem_ || systemQueue_.size() >= chunkBytes;
|
||||
const bool hasMicrophone = !includeMicrophone_ || microphoneQueue_.size() >= chunkBytes;
|
||||
const bool hasAnySource = !systemQueue_.empty() || !microphoneQueue_.empty();
|
||||
return stopRequested_.load() ||
|
||||
(timelineStarted_ && !paused_ && (hasSystem || hasMicrophone) && hasAnySource);
|
||||
});
|
||||
|
||||
if (stopRequested_) {
|
||||
break;
|
||||
}
|
||||
if (!timelineStarted_ || paused_) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const bool hasAnyQueuedAudio = !systemQueue_.empty() || !microphoneQueue_.empty();
|
||||
if (!hasAnyQueuedAudio) {
|
||||
continue;
|
||||
}
|
||||
|
||||
mixedChunk.assign(chunkBytes, 0);
|
||||
if (includeSystem_) {
|
||||
pop(systemQueue_, sourceChunk, chunkBytes);
|
||||
mixAudioInPlace(mixedChunk, sourceChunk.data(), static_cast<DWORD>(sourceChunk.size()), format_);
|
||||
}
|
||||
if (includeMicrophone_) {
|
||||
pop(microphoneQueue_, sourceChunk, chunkBytes);
|
||||
mixAudioInPlace(mixedChunk, sourceChunk.data(), static_cast<DWORD>(sourceChunk.size()), format_);
|
||||
}
|
||||
}
|
||||
|
||||
if (!audioClockStarted) {
|
||||
audioClockStart = std::chrono::steady_clock::now();
|
||||
audioClockStarted = true;
|
||||
}
|
||||
|
||||
const int64_t timestampHns =
|
||||
static_cast<int64_t>((emittedFrames_ * HnsPerSecond) / format_.sampleRate);
|
||||
const int64_t durationHns =
|
||||
static_cast<int64_t>((static_cast<uint64_t>(chunkFrames) * HnsPerSecond) / format_.sampleRate);
|
||||
if (!output_(mixedChunk.data(), static_cast<DWORD>(mixedChunk.size()), timestampHns, durationHns)) {
|
||||
stopRequested_ = true;
|
||||
break;
|
||||
}
|
||||
emittedFrames_ += chunkFrames;
|
||||
|
||||
const auto nextDeadline = audioClockStart +
|
||||
std::chrono::duration_cast<std::chrono::steady_clock::duration>(
|
||||
std::chrono::duration<double>(static_cast<double>(emittedFrames_) / format_.sampleRate));
|
||||
std::this_thread::sleep_until(nextDeadline);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
#pragma once
|
||||
|
||||
#include "mf_encoder.h"
|
||||
|
||||
#include <Windows.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
bool sameAudioFormatForMixing(const AudioInputFormat& left, const AudioInputFormat& right);
|
||||
AudioInputFormat makeAacCompatibleAudioFormat(const AudioInputFormat& source);
|
||||
void copyAudioWithGain(
|
||||
const BYTE* source,
|
||||
DWORD byteCount,
|
||||
const AudioInputFormat& format,
|
||||
double gain,
|
||||
std::vector<BYTE>& destination);
|
||||
void convertAudioWithGain(
|
||||
const BYTE* source,
|
||||
DWORD byteCount,
|
||||
const AudioInputFormat& sourceFormat,
|
||||
const AudioInputFormat& targetFormat,
|
||||
double gain,
|
||||
std::vector<BYTE>& destination);
|
||||
void mixAudioInPlace(
|
||||
std::vector<BYTE>& destination,
|
||||
const BYTE* source,
|
||||
DWORD byteCount,
|
||||
const AudioInputFormat& format);
|
||||
|
||||
class AudioMixer {
|
||||
public:
|
||||
using OutputCallback = std::function<bool(const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns)>;
|
||||
|
||||
AudioMixer(
|
||||
const AudioInputFormat& format,
|
||||
const AudioInputFormat& systemFormat,
|
||||
const AudioInputFormat& microphoneFormat,
|
||||
bool includeSystem,
|
||||
bool includeMicrophone,
|
||||
double microphoneGain,
|
||||
OutputCallback output);
|
||||
~AudioMixer();
|
||||
|
||||
AudioMixer(const AudioMixer&) = delete;
|
||||
AudioMixer& operator=(const AudioMixer&) = delete;
|
||||
|
||||
bool start();
|
||||
void beginTimeline();
|
||||
void setPaused(bool paused);
|
||||
void stop();
|
||||
void pushSystem(const BYTE* data, DWORD byteCount);
|
||||
void pushMicrophone(const BYTE* data, DWORD byteCount);
|
||||
|
||||
private:
|
||||
void append(
|
||||
std::vector<BYTE>& queue,
|
||||
const BYTE* data,
|
||||
DWORD byteCount,
|
||||
const AudioInputFormat& sourceFormat,
|
||||
double gain);
|
||||
bool pop(std::vector<BYTE>& queue, std::vector<BYTE>& chunk, size_t byteCount);
|
||||
void mixLoop();
|
||||
|
||||
AudioInputFormat format_{};
|
||||
AudioInputFormat systemFormat_{};
|
||||
AudioInputFormat microphoneFormat_{};
|
||||
bool includeSystem_ = false;
|
||||
bool includeMicrophone_ = false;
|
||||
double microphoneGain_ = 1.0;
|
||||
OutputCallback output_;
|
||||
std::mutex mutex_;
|
||||
std::condition_variable cv_;
|
||||
std::vector<BYTE> systemQueue_;
|
||||
std::vector<BYTE> microphoneQueue_;
|
||||
std::vector<BYTE> gainBuffer_;
|
||||
std::thread thread_;
|
||||
std::atomic<bool> stopRequested_ = false;
|
||||
bool timelineStarted_ = false;
|
||||
bool paused_ = false;
|
||||
uint64_t emittedFrames_ = 0;
|
||||
};
|
||||
@@ -0,0 +1,482 @@
|
||||
#include <windows.h>
|
||||
#include <gdiplus.h>
|
||||
#include <objbase.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cinttypes>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Global mouse-hook state
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
static HHOOK g_mouseHook = nullptr;
|
||||
static DWORD g_mainThreadId = 0;
|
||||
static std::atomic<int> g_leftDownCount{0};
|
||||
static std::atomic<int> g_leftUpCount{0};
|
||||
static std::atomic<bool> g_stop{false};
|
||||
static std::mutex g_stdoutMtx;
|
||||
|
||||
static LRESULT CALLBACK LowLevelMouseProc(int nCode, WPARAM wParam, LPARAM lParam) {
|
||||
if (nCode >= 0) {
|
||||
if (wParam == WM_LBUTTONDOWN) g_leftDownCount.fetch_add(1, std::memory_order_relaxed);
|
||||
else if (wParam == WM_LBUTTONUP) g_leftUpCount.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
return CallNextHookEx(g_mouseHook, nCode, wParam, lParam);
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Utilities
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
static int64_t nowMs() {
|
||||
return static_cast<int64_t>(
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch())
|
||||
.count());
|
||||
}
|
||||
|
||||
static void writeJsonLine(const std::string& json) {
|
||||
std::lock_guard<std::mutex> lock(g_stdoutMtx);
|
||||
std::cout << json << '\n';
|
||||
std::cout.flush();
|
||||
}
|
||||
|
||||
static std::string jsonEscape(const std::string& s) {
|
||||
std::string r;
|
||||
r.reserve(s.size());
|
||||
for (unsigned char c : s) {
|
||||
switch (c) {
|
||||
case '"': r += "\\\""; break;
|
||||
case '\\': r += "\\\\"; break;
|
||||
case '\n': r += "\\n"; break;
|
||||
case '\r': r += "\\r"; break;
|
||||
case '\t': r += "\\t"; break;
|
||||
default: r.push_back(static_cast<char>(c)); break;
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static const char kBase64Chars[] =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
|
||||
static std::string base64Encode(const uint8_t* data, size_t len) {
|
||||
std::string out;
|
||||
out.reserve(((len + 2) / 3) * 4);
|
||||
for (size_t i = 0; i < len; i += 3) {
|
||||
const uint32_t b =
|
||||
(static_cast<uint32_t>(data[i]) << 16) |
|
||||
(i + 1 < len ? static_cast<uint32_t>(data[i + 1]) << 8 : 0u) |
|
||||
(i + 2 < len ? static_cast<uint32_t>(data[i + 2]) : 0u);
|
||||
out.push_back(kBase64Chars[(b >> 18) & 0x3F]);
|
||||
out.push_back(kBase64Chars[(b >> 12) & 0x3F]);
|
||||
out.push_back(i + 1 < len ? kBase64Chars[(b >> 6) & 0x3F] : '=');
|
||||
out.push_back(i + 2 < len ? kBase64Chars[(b ) & 0x3F] : '=');
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// GDI+ PNG encoder CLSID
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
static bool getPngClsid(CLSID& out) {
|
||||
UINT num = 0, sz = 0;
|
||||
if (Gdiplus::GetImageEncodersSize(&num, &sz) != Gdiplus::Ok || sz == 0) return false;
|
||||
std::vector<uint8_t> buf(sz);
|
||||
auto* enc = reinterpret_cast<Gdiplus::ImageCodecInfo*>(buf.data());
|
||||
if (Gdiplus::GetImageEncoders(num, sz, enc) != Gdiplus::Ok) return false;
|
||||
for (UINT i = 0; i < num; ++i) {
|
||||
if (std::wstring(enc[i].MimeType) == L"image/png") {
|
||||
out = enc[i].Clsid;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Standard cursor-type lookup
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
static const char* standardCursorType(HCURSOR hc) {
|
||||
if (!hc) return nullptr;
|
||||
static const struct { WORD id; const char* name; } kMap[] = {
|
||||
{32512, "arrow"},
|
||||
{32513, "text"},
|
||||
{32514, "wait"},
|
||||
{32515, "crosshair"},
|
||||
{32516, "up-arrow"},
|
||||
{32642, "resize-nwse"},
|
||||
{32643, "resize-nesw"},
|
||||
{32644, "resize-ew"},
|
||||
{32645, "resize-ns"},
|
||||
{32646, "move"},
|
||||
{32648, "not-allowed"},
|
||||
{32649, "pointer"},
|
||||
{32650, "app-starting"},
|
||||
{32651, "help"},
|
||||
};
|
||||
static constexpr int N = static_cast<int>(sizeof(kMap) / sizeof(kMap[0]));
|
||||
static HCURSOR g_handles[N] = {};
|
||||
static bool g_init = false;
|
||||
if (!g_init) {
|
||||
for (int i = 0; i < N; ++i)
|
||||
g_handles[i] = LoadCursor(nullptr, MAKEINTRESOURCE(kMap[i].id));
|
||||
g_init = true;
|
||||
}
|
||||
for (int i = 0; i < N; ++i)
|
||||
if (g_handles[i] && g_handles[i] == hc) return kMap[i].name;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Custom cursor-type detection (replicates the PowerShell heuristic)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
static const char* detectCustomCursorType(
|
||||
const uint32_t* pixels, int w, int h, int hotX, int hotY)
|
||||
{
|
||||
if (w < 24 || h < 24 || w > 64 || h > 64) return nullptr;
|
||||
if (hotX < w * 0.25 || hotX > w * 0.75) return nullptr;
|
||||
if (hotY < h * 0.15 || hotY > h * 0.55) return nullptr;
|
||||
|
||||
int opaque = 0, topHalf = 0;
|
||||
int left = w, top = h, right = -1, bottom = -1;
|
||||
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int x = 0; x < w; ++x) {
|
||||
const uint8_t a = static_cast<uint8_t>(pixels[y * w + x] >> 24);
|
||||
if (a <= 32) continue;
|
||||
++opaque;
|
||||
if (y < h / 2) ++topHalf;
|
||||
if (x < left) left = x;
|
||||
if (x > right) right = x;
|
||||
if (y < top) top = y;
|
||||
if (y > bottom) bottom = y;
|
||||
}
|
||||
}
|
||||
|
||||
if (opaque < 90 || right < left || bottom < top) return nullptr;
|
||||
|
||||
const int ow = right - left + 1;
|
||||
const int oh = bottom - top + 1;
|
||||
if (ow < w * 0.35 || ow > w * 0.9) return nullptr;
|
||||
if (oh < h * 0.45 || oh > static_cast<double>(h)) return nullptr;
|
||||
if (top > h * 0.45 || bottom < h * 0.65) return nullptr;
|
||||
|
||||
return topHalf > opaque * 0.55 ? "closed-hand" : "open-hand";
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Build asset JSON for the given cursor (returns empty string on failure)
|
||||
//
|
||||
// Renders the cursor via GDI DrawIconEx onto a 32-bpp transparent DIB section
|
||||
// and then encodes to PNG — matching the PowerShell approach of
|
||||
// Graphics.Clear(Transparent) + Graphics.DrawIcon(). This correctly preserves
|
||||
// per-pixel alpha for 32-bit cursors, unlike Gdiplus::Bitmap::FromHICON which
|
||||
// can produce incorrect alpha for cursor handles.
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
static std::string buildAssetJson(
|
||||
HCURSOR hCursor,
|
||||
const std::string& handleStr,
|
||||
const CLSID& pngClsid,
|
||||
const char** outCustomType)
|
||||
{
|
||||
*outCustomType = nullptr;
|
||||
|
||||
// Get hotspot and cursor dimensions from the icon info.
|
||||
// For color cursors hbmColor gives the size; for monochrome cursors the
|
||||
// mask bitmap is twice the cursor height (AND mask stacked on XOR mask).
|
||||
ICONINFO ii{};
|
||||
if (!GetIconInfo(hCursor, &ii)) return {};
|
||||
const int hotX = static_cast<int>(ii.xHotspot);
|
||||
const int hotY = static_cast<int>(ii.yHotspot);
|
||||
|
||||
int w = 0, h = 0;
|
||||
if (ii.hbmColor) {
|
||||
BITMAP bm{};
|
||||
if (GetObject(ii.hbmColor, sizeof(bm), &bm)) { w = bm.bmWidth; h = bm.bmHeight; }
|
||||
}
|
||||
if (ii.hbmMask && (w == 0 || h == 0)) {
|
||||
BITMAP bm{};
|
||||
if (GetObject(ii.hbmMask, sizeof(bm), &bm)) {
|
||||
w = bm.bmWidth;
|
||||
h = ii.hbmColor ? bm.bmHeight : bm.bmHeight / 2;
|
||||
}
|
||||
}
|
||||
if (ii.hbmMask) DeleteObject(ii.hbmMask);
|
||||
if (ii.hbmColor) DeleteObject(ii.hbmColor);
|
||||
if (w <= 0 || h <= 0) return {};
|
||||
|
||||
// Copy the cursor handle so DrawIconEx cannot affect the live system cursor.
|
||||
const HICON hCopy = CopyIcon(hCursor);
|
||||
if (!hCopy) return {};
|
||||
|
||||
// Allocate a 32-bpp top-down DIB section and clear it to transparent black,
|
||||
// then draw the cursor with DI_NORMAL. For 32-bit alpha cursors Windows
|
||||
// writes correct per-pixel alpha into the high byte of each BGRA pixel.
|
||||
const int stride = w * 4;
|
||||
BITMAPINFOHEADER bih{};
|
||||
bih.biSize = sizeof(bih);
|
||||
bih.biWidth = w;
|
||||
bih.biHeight = -h; // negative = top-down scanline order
|
||||
bih.biPlanes = 1;
|
||||
bih.biBitCount = 32;
|
||||
bih.biCompression = BI_RGB;
|
||||
|
||||
void* pBits = nullptr;
|
||||
HDC hDC = CreateCompatibleDC(nullptr);
|
||||
HBITMAP hBmp = hDC ? CreateDIBSection(hDC,
|
||||
reinterpret_cast<const BITMAPINFO*>(&bih),
|
||||
DIB_RGB_COLORS, &pBits, nullptr, 0)
|
||||
: nullptr;
|
||||
|
||||
if (!hBmp || !pBits) {
|
||||
if (hBmp) DeleteObject(hBmp);
|
||||
if (hDC) DeleteDC(hDC);
|
||||
DestroyIcon(hCopy);
|
||||
return {};
|
||||
}
|
||||
|
||||
HGDIOBJ hOld = SelectObject(hDC, hBmp);
|
||||
std::memset(pBits, 0, static_cast<size_t>(stride * h)); // transparent black
|
||||
DrawIconEx(hDC, 0, 0, hCopy, w, h, 0, nullptr, DI_NORMAL);
|
||||
GdiFlush();
|
||||
SelectObject(hDC, hOld);
|
||||
DeleteDC(hDC);
|
||||
DestroyIcon(hCopy);
|
||||
|
||||
// GDI's 32-bit DIB stores pixels as BGRA in memory. GDI+'s
|
||||
// PixelFormat32bppARGB interprets each 32-bit word as 0xAARRGGBB which is
|
||||
// identical to BGRA on little-endian, so the alpha byte is always >> 24.
|
||||
{
|
||||
const auto* px = static_cast<const uint32_t*>(pBits);
|
||||
*outCustomType = detectCustomCursorType(px, w, h, hotX, hotY);
|
||||
}
|
||||
|
||||
// Wrap the DIB pixels in a GDI+ Bitmap (zero-copy) and save to PNG.
|
||||
// Keep hBmp alive until after gBmp is destroyed so pBits remains valid.
|
||||
std::vector<uint8_t> pngData;
|
||||
{
|
||||
Gdiplus::Bitmap gBmp(w, h, stride, PixelFormat32bppARGB,
|
||||
static_cast<BYTE*>(pBits));
|
||||
if (gBmp.GetLastStatus() == Gdiplus::Ok) {
|
||||
IStream* pStream = nullptr;
|
||||
if (SUCCEEDED(CreateStreamOnHGlobal(nullptr, TRUE, &pStream))) {
|
||||
if (gBmp.Save(pStream, &pngClsid) == Gdiplus::Ok) {
|
||||
ULARGE_INTEGER sz{};
|
||||
LARGE_INTEGER zero{};
|
||||
pStream->Seek(zero, STREAM_SEEK_END, &sz);
|
||||
pStream->Seek(zero, STREAM_SEEK_SET, nullptr);
|
||||
pngData.resize(static_cast<size_t>(sz.QuadPart));
|
||||
ULONG n = 0;
|
||||
pStream->Read(pngData.data(), static_cast<ULONG>(pngData.size()), &n);
|
||||
pngData.resize(n);
|
||||
}
|
||||
pStream->Release();
|
||||
}
|
||||
}
|
||||
} // gBmp destroyed here; pBits (owned by hBmp) still valid
|
||||
DeleteObject(hBmp);
|
||||
|
||||
if (pngData.empty()) return {};
|
||||
|
||||
const std::string dataUrl =
|
||||
"data:image/png;base64," + base64Encode(pngData.data(), pngData.size());
|
||||
|
||||
std::string json;
|
||||
json.reserve(dataUrl.size() + 128);
|
||||
json = "{\"id\":\"" + handleStr + "\"";
|
||||
json += ",\"imageDataUrl\":\"" + jsonEscape(dataUrl) + "\"";
|
||||
json += ",\"width\":" + std::to_string(w);
|
||||
json += ",\"height\":" + std::to_string(h);
|
||||
json += ",\"hotspotX\":" + std::to_string(hotX);
|
||||
json += ",\"hotspotY\":" + std::to_string(hotY);
|
||||
if (*outCustomType) {
|
||||
json += ",\"cursorType\":\"";
|
||||
json += *outCustomType;
|
||||
json += "\"";
|
||||
} else {
|
||||
json += ",\"cursorType\":null";
|
||||
}
|
||||
json += "}";
|
||||
return json;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Sampling loop (background thread)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
static void runSamplingLoop(int intervalMs, HWND targetWindow, const CLSID& pngClsid) {
|
||||
HCURSOR lastCursor = nullptr;
|
||||
|
||||
while (!g_stop.load(std::memory_order_relaxed)) {
|
||||
const int downCount = g_leftDownCount.exchange(0, std::memory_order_relaxed);
|
||||
const int upCount = g_leftUpCount.exchange(0, std::memory_order_relaxed);
|
||||
|
||||
CURSORINFO ci{};
|
||||
ci.cbSize = sizeof(ci);
|
||||
if (!GetCursorInfo(&ci)) {
|
||||
char buf[160];
|
||||
std::snprintf(buf, sizeof(buf),
|
||||
"{\"type\":\"error\",\"timestampMs\":%" PRId64 ",\"message\":\"GetCursorInfo failed\"}",
|
||||
nowMs());
|
||||
writeJsonLine(buf);
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(intervalMs));
|
||||
continue;
|
||||
}
|
||||
|
||||
const bool visible = (ci.flags & CURSOR_SHOWING) != 0;
|
||||
const HCURSOR hc = ci.hCursor;
|
||||
|
||||
// Handle string ("0xHEX" or empty for null cursor)
|
||||
char handleBuf[32] = {};
|
||||
if (hc)
|
||||
std::snprintf(handleBuf, sizeof(handleBuf),
|
||||
"0x%" PRIX64, static_cast<uint64_t>(reinterpret_cast<uintptr_t>(hc)));
|
||||
const std::string handleStr = hc ? handleBuf : "";
|
||||
|
||||
// Standard cursor type
|
||||
const char* cursorType = standardCursorType(hc);
|
||||
|
||||
// Mouse button state
|
||||
const SHORT ks = GetAsyncKeyState(VK_LBUTTON);
|
||||
const bool leftDown = (ks & 0x8000) != 0;
|
||||
const bool leftPressed = downCount > 0 || (ks & 0x0001) != 0;
|
||||
const bool leftReleased = upCount > 0;
|
||||
|
||||
// Asset — only when the cursor handle changes
|
||||
std::string assetJson;
|
||||
if (visible && hc && hc != lastCursor) {
|
||||
const char* customType = nullptr;
|
||||
assetJson = buildAssetJson(hc, handleStr, pngClsid, &customType);
|
||||
if (!assetJson.empty() && !cursorType && customType)
|
||||
cursorType = customType;
|
||||
lastCursor = hc;
|
||||
}
|
||||
|
||||
// Window bounds
|
||||
std::string boundsJson = "null";
|
||||
if (targetWindow && IsWindow(targetWindow)) {
|
||||
RECT r{};
|
||||
if (GetWindowRect(targetWindow, &r)) {
|
||||
const int bw = r.right - r.left;
|
||||
const int bh = r.bottom - r.top;
|
||||
if (bw > 0 && bh > 0) {
|
||||
char buf[128];
|
||||
std::snprintf(buf, sizeof(buf),
|
||||
"{\"x\":%ld,\"y\":%ld,\"width\":%d,\"height\":%d}",
|
||||
r.left, r.top, bw, bh);
|
||||
boundsJson = buf;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Emit sample JSON
|
||||
std::string out;
|
||||
out.reserve(256);
|
||||
out += "{\"type\":\"sample\"";
|
||||
out += ",\"timestampMs\":"; out += std::to_string(nowMs());
|
||||
out += ",\"x\":"; out += std::to_string(ci.ptScreenPos.x);
|
||||
out += ",\"y\":"; out += std::to_string(ci.ptScreenPos.y);
|
||||
out += ",\"visible\":"; out += visible ? "true" : "false";
|
||||
out += ",\"handle\":"; out += hc ? ("\"" + handleStr + "\"") : "null";
|
||||
out += ",\"cursorType\":"; out += cursorType ? ("\"" + std::string(cursorType) + "\"") : "null";
|
||||
out += ",\"leftButtonDown\":"; out += leftDown ? "true" : "false";
|
||||
out += ",\"leftButtonPressed\":"; out += leftPressed ? "true" : "false";
|
||||
out += ",\"leftButtonReleased\":"; out += leftReleased ? "true" : "false";
|
||||
out += ",\"bounds\":"; out += boundsJson;
|
||||
out += ",\"asset\":"; out += assetJson.empty() ? "null" : assetJson;
|
||||
out += "}";
|
||||
|
||||
writeJsonLine(out);
|
||||
|
||||
// Exit if stdout pipe is broken (parent process died)
|
||||
if (std::cout.fail()) {
|
||||
PostThreadMessage(g_mainThreadId, WM_QUIT, 0, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(intervalMs));
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// main
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
int main(int argc, char* argv[]) {
|
||||
if (argc < 2) {
|
||||
std::cerr << "Usage: cursor-sampler <intervalMs> [windowHandle]" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
const int intervalMs = std::max(1, std::atoi(argv[1]));
|
||||
|
||||
HWND targetWindow = nullptr;
|
||||
if (argc >= 3) {
|
||||
const std::string arg = argv[2];
|
||||
if (!arg.empty() && arg != "null") {
|
||||
try {
|
||||
const int base = (arg.rfind("0x", 0) == 0 || arg.rfind("0X", 0) == 0) ? 16 : 10;
|
||||
const uint64_t v = std::stoull(arg, nullptr, base);
|
||||
if (v) targetWindow = reinterpret_cast<HWND>(static_cast<uintptr_t>(v));
|
||||
} catch (...) {}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize GDI+
|
||||
Gdiplus::GdiplusStartupInput gdipInput{};
|
||||
ULONG_PTR gdipToken = 0;
|
||||
if (Gdiplus::GdiplusStartup(&gdipToken, &gdipInput, nullptr) != Gdiplus::Ok) {
|
||||
std::cerr << "GDI+ init failed" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
CLSID pngClsid{};
|
||||
if (!getPngClsid(pngClsid)) {
|
||||
std::cerr << "PNG encoder not found" << std::endl;
|
||||
Gdiplus::GdiplusShutdown(gdipToken);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Install global low-level mouse hook on this thread
|
||||
g_mouseHook = SetWindowsHookEx(WH_MOUSE_LL, LowLevelMouseProc, GetModuleHandle(nullptr), 0);
|
||||
if (!g_mouseHook) {
|
||||
std::cerr << "SetWindowsHookEx failed" << std::endl;
|
||||
Gdiplus::GdiplusShutdown(gdipToken);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Prime GetAsyncKeyState so the first poll doesn't return stale "since-last-call" bits
|
||||
GetAsyncKeyState(VK_LBUTTON);
|
||||
|
||||
// Signal readiness
|
||||
g_mainThreadId = GetCurrentThreadId();
|
||||
{
|
||||
char buf[80];
|
||||
std::snprintf(buf, sizeof(buf),
|
||||
"{\"type\":\"ready\",\"timestampMs\":%" PRId64 "}", nowMs());
|
||||
writeJsonLine(buf);
|
||||
}
|
||||
|
||||
// Start sampling on a background thread
|
||||
std::thread sampler(runSamplingLoop, intervalMs, targetWindow, std::cref(pngClsid));
|
||||
|
||||
// Run the message pump on the main thread — required for WH_MOUSE_LL callbacks
|
||||
MSG msg;
|
||||
while (GetMessage(&msg, nullptr, 0, 0) > 0) {
|
||||
TranslateMessage(&msg);
|
||||
DispatchMessage(&msg);
|
||||
}
|
||||
|
||||
g_stop.store(true, std::memory_order_relaxed);
|
||||
if (sampler.joinable()) sampler.join();
|
||||
UnhookWindowsHookEx(g_mouseHook);
|
||||
Gdiplus::GdiplusShutdown(gdipToken);
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,427 @@
|
||||
#include "dshow_webcam_capture.h"
|
||||
|
||||
#include <initguid.h>
|
||||
#include <dshow.h>
|
||||
#include <wrl/client.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <chrono>
|
||||
#include <exception>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
namespace {
|
||||
|
||||
const CLSID CLSID_SampleGrabberLocal = {0xC1F400A0, 0x3F08, 0x11D3, {0x9F, 0x0B, 0x00, 0x60, 0x08, 0x03, 0x9E, 0x37}};
|
||||
const CLSID CLSID_NullRendererLocal = {0xC1F400A4, 0x3F08, 0x11D3, {0x9F, 0x0B, 0x00, 0x60, 0x08, 0x03, 0x9E, 0x37}};
|
||||
|
||||
MIDL_INTERFACE("6B652FFF-11FE-4FCE-92AD-0266B5D7C78F")
|
||||
ISampleGrabber : public IUnknown {
|
||||
public:
|
||||
virtual HRESULT STDMETHODCALLTYPE SetOneShot(BOOL oneShot) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE SetMediaType(const AM_MEDIA_TYPE* type) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE GetConnectedMediaType(AM_MEDIA_TYPE* type) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE SetBufferSamples(BOOL bufferThem) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE GetCurrentBuffer(long* bufferSize, long* buffer) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE GetCurrentSample(IMediaSample** sample) = 0;
|
||||
virtual HRESULT STDMETHODCALLTYPE SetCallback(IUnknown* callback, long whichMethodToCallback) = 0;
|
||||
};
|
||||
|
||||
bool succeeded(HRESULT hr, const char* label) {
|
||||
if (SUCCEEDED(hr)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::cerr << "ERROR: " << label << " failed (hr=0x" << std::hex << hr << std::dec << ")"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string guidToString(const GUID& guid) {
|
||||
if (guid == MEDIASUBTYPE_RGB32) {
|
||||
return "RGB32";
|
||||
}
|
||||
if (guid == MEDIASUBTYPE_YUY2) {
|
||||
return "YUY2";
|
||||
}
|
||||
if (guid == MEDIASUBTYPE_NV12) {
|
||||
return "NV12";
|
||||
}
|
||||
|
||||
std::ostringstream stream;
|
||||
stream << std::hex << std::setfill('0')
|
||||
<< '{' << std::setw(8) << guid.Data1
|
||||
<< '-' << std::setw(4) << guid.Data2
|
||||
<< '-' << std::setw(4) << guid.Data3
|
||||
<< '-';
|
||||
for (int index = 0; index < 2; index += 1) {
|
||||
stream << std::setw(2) << static_cast<int>(guid.Data4[index]);
|
||||
}
|
||||
stream << '-';
|
||||
for (int index = 2; index < 8; index += 1) {
|
||||
stream << std::setw(2) << static_cast<int>(guid.Data4[index]);
|
||||
}
|
||||
stream << '}';
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
void freeMediaType(AM_MEDIA_TYPE& type) {
|
||||
if (type.cbFormat != 0) {
|
||||
CoTaskMemFree(type.pbFormat);
|
||||
type.cbFormat = 0;
|
||||
type.pbFormat = nullptr;
|
||||
}
|
||||
if (type.pUnk) {
|
||||
type.pUnk->Release();
|
||||
type.pUnk = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
BYTE clampToByte(int value) {
|
||||
return static_cast<BYTE>(std::clamp(value, 0, 255));
|
||||
}
|
||||
|
||||
std::array<BYTE, 3> yuvToBgr(int y, int u, int v) {
|
||||
const int c = y - 16;
|
||||
const int d = u - 128;
|
||||
const int e = v - 128;
|
||||
const int blue = (298 * c + 516 * d + 128) >> 8;
|
||||
const int green = (298 * c - 100 * d - 208 * e + 128) >> 8;
|
||||
const int red = (298 * c + 409 * e + 128) >> 8;
|
||||
return {clampToByte(blue), clampToByte(green), clampToByte(red)};
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
struct DirectShowWebcamCapture::Impl {
|
||||
Microsoft::WRL::ComPtr<IGraphBuilder> graph;
|
||||
Microsoft::WRL::ComPtr<ICaptureGraphBuilder2> captureGraph;
|
||||
Microsoft::WRL::ComPtr<IBaseFilter> captureFilter;
|
||||
Microsoft::WRL::ComPtr<IBaseFilter> sampleGrabberFilter;
|
||||
Microsoft::WRL::ComPtr<ISampleGrabber> sampleGrabber;
|
||||
Microsoft::WRL::ComPtr<IBaseFilter> nullRenderer;
|
||||
Microsoft::WRL::ComPtr<IMediaControl> mediaControl;
|
||||
bool comInitialized = false;
|
||||
bool running = false;
|
||||
};
|
||||
|
||||
DirectShowWebcamCapture::~DirectShowWebcamCapture() {
|
||||
stop();
|
||||
delete impl_;
|
||||
}
|
||||
|
||||
bool DirectShowWebcamCapture::initialize(
|
||||
const std::wstring& deviceId,
|
||||
const std::wstring& deviceName,
|
||||
const std::wstring& directShowClsid,
|
||||
int requestedWidth,
|
||||
int requestedHeight,
|
||||
int requestedFps) {
|
||||
(void)deviceId;
|
||||
stop();
|
||||
delete impl_;
|
||||
impl_ = nullptr;
|
||||
impl_ = new Impl();
|
||||
fps_ = std::clamp(requestedFps > 0 ? requestedFps : 30, 1, 60);
|
||||
|
||||
HRESULT hr = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
|
||||
if (SUCCEEDED(hr)) {
|
||||
impl_->comInitialized = true;
|
||||
} else if (hr != RPC_E_CHANGED_MODE) {
|
||||
return succeeded(hr, "CoInitializeEx(DirectShow webcam)");
|
||||
}
|
||||
|
||||
if (directShowClsid.empty()) {
|
||||
std::cerr << "ERROR: DirectShow webcam fallback requires a resolved filter CLSID" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
CLSID selectedClsid{};
|
||||
if (FAILED(CLSIDFromString(directShowClsid.c_str(), &selectedClsid))) {
|
||||
std::cerr << "ERROR: DirectShow webcam fallback received an invalid filter CLSID" << std::endl;
|
||||
return false;
|
||||
}
|
||||
selectedDeviceName_ = deviceName.empty() ? directShowClsid : deviceName;
|
||||
|
||||
if (!succeeded(CoCreateInstance(selectedClsid, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->captureFilter)),
|
||||
"CoCreateInstance(DirectShow webcam filter)")) {
|
||||
return false;
|
||||
}
|
||||
if (!succeeded(CoCreateInstance(CLSID_FilterGraph, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->graph)),
|
||||
"CoCreateInstance(FilterGraph)")) {
|
||||
return false;
|
||||
}
|
||||
if (!succeeded(CoCreateInstance(CLSID_CaptureGraphBuilder2, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->captureGraph)),
|
||||
"CoCreateInstance(CaptureGraphBuilder2)")) {
|
||||
return false;
|
||||
}
|
||||
if (!succeeded(impl_->captureGraph->SetFiltergraph(impl_->graph.Get()), "SetFiltergraph(DirectShow webcam)")) {
|
||||
return false;
|
||||
}
|
||||
if (!succeeded(impl_->graph->AddFilter(impl_->captureFilter.Get(), L"OpenScreen Webcam Source"),
|
||||
"AddFilter(DirectShow webcam source)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!succeeded(CoCreateInstance(CLSID_SampleGrabberLocal, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->sampleGrabberFilter)),
|
||||
"CoCreateInstance(SampleGrabber)")) {
|
||||
return false;
|
||||
}
|
||||
if (!succeeded(impl_->sampleGrabberFilter.As(&impl_->sampleGrabber), "QueryInterface(ISampleGrabber)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
AM_MEDIA_TYPE requestedType{};
|
||||
requestedType.majortype = MEDIATYPE_Video;
|
||||
requestedType.formattype = FORMAT_VideoInfo;
|
||||
if (!succeeded(impl_->sampleGrabber->SetMediaType(&requestedType), "SetMediaType(DirectShow video)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!succeeded(impl_->graph->AddFilter(impl_->sampleGrabberFilter.Get(), L"OpenScreen Webcam Sample Grabber"),
|
||||
"AddFilter(SampleGrabber)")) {
|
||||
return false;
|
||||
}
|
||||
if (!succeeded(CoCreateInstance(CLSID_NullRendererLocal, nullptr, CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&impl_->nullRenderer)),
|
||||
"CoCreateInstance(NullRenderer)")) {
|
||||
return false;
|
||||
}
|
||||
if (!succeeded(impl_->graph->AddFilter(impl_->nullRenderer.Get(), L"OpenScreen Webcam Null Renderer"),
|
||||
"AddFilter(NullRenderer)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!succeeded(impl_->captureGraph->RenderStream(
|
||||
&PIN_CATEGORY_CAPTURE,
|
||||
&MEDIATYPE_Video,
|
||||
impl_->captureFilter.Get(),
|
||||
impl_->sampleGrabberFilter.Get(),
|
||||
impl_->nullRenderer.Get()),
|
||||
"RenderStream(DirectShow webcam)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
AM_MEDIA_TYPE connectedType{};
|
||||
if (!succeeded(impl_->sampleGrabber->GetConnectedMediaType(&connectedType), "GetConnectedMediaType(DirectShow webcam)")) {
|
||||
return false;
|
||||
}
|
||||
if (connectedType.subtype == MEDIASUBTYPE_YUY2) {
|
||||
pixelFormat_ = PixelFormat::Yuy2;
|
||||
} else if (connectedType.subtype == MEDIASUBTYPE_NV12) {
|
||||
pixelFormat_ = PixelFormat::Nv12;
|
||||
} else if (connectedType.subtype == MEDIASUBTYPE_RGB32) {
|
||||
pixelFormat_ = PixelFormat::Bgra;
|
||||
} else {
|
||||
std::cerr << "ERROR: Unsupported DirectShow webcam media subtype "
|
||||
<< guidToString(connectedType.subtype) << std::endl;
|
||||
freeMediaType(connectedType);
|
||||
return false;
|
||||
}
|
||||
if (connectedType.formattype == FORMAT_VideoInfo && connectedType.pbFormat) {
|
||||
const auto* videoInfo = reinterpret_cast<VIDEOINFOHEADER*>(connectedType.pbFormat);
|
||||
width_ = std::abs(videoInfo->bmiHeader.biWidth);
|
||||
height_ = std::abs(videoInfo->bmiHeader.biHeight);
|
||||
const int bitsPerPixel = videoInfo->bmiHeader.biBitCount > 0 ? videoInfo->bmiHeader.biBitCount : 16;
|
||||
if (pixelFormat_ == PixelFormat::Nv12) {
|
||||
sourceStride_ = ((width_ + 3) / 4) * 4;
|
||||
} else {
|
||||
sourceStride_ = ((width_ * bitsPerPixel + 31) / 32) * 4;
|
||||
}
|
||||
sourceTopDown_ = pixelFormat_ != PixelFormat::Bgra || videoInfo->bmiHeader.biHeight < 0;
|
||||
}
|
||||
std::cerr << "INFO: DirectShow webcam connected subtype " << guidToString(connectedType.subtype)
|
||||
<< " " << width_ << "x" << height_ << " stride=" << sourceStride_ << std::endl;
|
||||
freeMediaType(connectedType);
|
||||
if (width_ <= 0 || height_ <= 0) {
|
||||
width_ = requestedWidth > 0 ? requestedWidth : 1280;
|
||||
height_ = requestedHeight > 0 ? requestedHeight : 720;
|
||||
}
|
||||
if (sourceStride_ <= 0) {
|
||||
sourceStride_ = pixelFormat_ == PixelFormat::Bgra ? width_ * 4 : ((width_ + 3) / 4) * 4;
|
||||
}
|
||||
|
||||
impl_->sampleGrabber->SetBufferSamples(TRUE);
|
||||
impl_->sampleGrabber->SetOneShot(FALSE);
|
||||
if (!succeeded(impl_->graph.As(&impl_->mediaControl), "QueryInterface(IMediaControl)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DirectShowWebcamCapture::start() {
|
||||
if (!impl_ || !impl_->mediaControl || impl_->running) {
|
||||
return false;
|
||||
}
|
||||
HRESULT hr = impl_->mediaControl->Run();
|
||||
if (!succeeded(hr, "Run(DirectShow webcam)")) {
|
||||
return false;
|
||||
}
|
||||
stopRequested_ = false;
|
||||
try {
|
||||
thread_ = std::thread(&DirectShowWebcamCapture::captureLoop, this);
|
||||
} catch (const std::exception& error) {
|
||||
stopRequested_ = true;
|
||||
impl_->mediaControl->Stop();
|
||||
std::cerr << "ERROR: Failed to start DirectShow webcam capture thread: " << error.what() << std::endl;
|
||||
return false;
|
||||
} catch (...) {
|
||||
stopRequested_ = true;
|
||||
impl_->mediaControl->Stop();
|
||||
std::cerr << "ERROR: Failed to start DirectShow webcam capture thread" << std::endl;
|
||||
return false;
|
||||
}
|
||||
impl_->running = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void DirectShowWebcamCapture::stop() {
|
||||
stopRequested_ = true;
|
||||
if (thread_.joinable()) {
|
||||
thread_.join();
|
||||
}
|
||||
if (!impl_) {
|
||||
return;
|
||||
}
|
||||
if (impl_->mediaControl && impl_->running) {
|
||||
impl_->mediaControl->Stop();
|
||||
}
|
||||
impl_->running = false;
|
||||
impl_->mediaControl.Reset();
|
||||
impl_->nullRenderer.Reset();
|
||||
impl_->sampleGrabber.Reset();
|
||||
impl_->sampleGrabberFilter.Reset();
|
||||
impl_->captureFilter.Reset();
|
||||
impl_->captureGraph.Reset();
|
||||
impl_->graph.Reset();
|
||||
if (impl_->comInitialized) {
|
||||
CoUninitialize();
|
||||
impl_->comInitialized = false;
|
||||
}
|
||||
}
|
||||
|
||||
void DirectShowWebcamCapture::captureLoop() {
|
||||
const HRESULT coinitHr = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
|
||||
while (!stopRequested_ && impl_ && impl_->sampleGrabber) {
|
||||
long bufferSize = 0;
|
||||
HRESULT hr = impl_->sampleGrabber->GetCurrentBuffer(&bufferSize, nullptr);
|
||||
if (SUCCEEDED(hr) && bufferSize > 0) {
|
||||
std::vector<BYTE> buffer(static_cast<size_t>(bufferSize));
|
||||
hr = impl_->sampleGrabber->GetCurrentBuffer(&bufferSize, reinterpret_cast<long*>(buffer.data()));
|
||||
if (SUCCEEDED(hr)) {
|
||||
storeFrame(buffer.data(), bufferSize);
|
||||
}
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1000 / std::max(1, fps_)));
|
||||
}
|
||||
if (SUCCEEDED(coinitHr)) {
|
||||
CoUninitialize();
|
||||
}
|
||||
}
|
||||
|
||||
void DirectShowWebcamCapture::storeFrame(const BYTE* buffer, long length) {
|
||||
const int destinationStride = width_ * 4;
|
||||
const int sourceStride = sourceStride_ > 0 ? sourceStride_ : destinationStride;
|
||||
const int expectedLength = pixelFormat_ == PixelFormat::Nv12
|
||||
? sourceStride * height_ + sourceStride * ((height_ + 1) / 2)
|
||||
: sourceStride * height_;
|
||||
if (!buffer || length < expectedLength || width_ <= 0 || height_ <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<BYTE> frame(static_cast<size_t>(destinationStride * height_));
|
||||
for (int y = 0; y < height_; y += 1) {
|
||||
const int sourceY = sourceTopDown_ ? y : height_ - 1 - y;
|
||||
const BYTE* source = buffer + sourceY * sourceStride;
|
||||
BYTE* destination = frame.data() + y * destinationStride;
|
||||
if (pixelFormat_ == PixelFormat::Bgra) {
|
||||
std::copy(source, source + destinationStride, destination);
|
||||
for (int x = 0; x < width_; x += 1) {
|
||||
destination[x * 4 + 3] = 255;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pixelFormat_ == PixelFormat::Nv12) {
|
||||
const BYTE* yPlane = buffer + sourceY * sourceStride;
|
||||
const BYTE* uvPlane = buffer + sourceStride * height_ + (sourceY / 2) * sourceStride;
|
||||
for (int x = 0; x < width_; x += 1) {
|
||||
const int uvX = (x / 2) * 2;
|
||||
const auto color = yuvToBgr(yPlane[x], uvPlane[uvX], uvPlane[uvX + 1]);
|
||||
BYTE* pixel = destination + x * 4;
|
||||
pixel[0] = color[0];
|
||||
pixel[1] = color[1];
|
||||
pixel[2] = color[2];
|
||||
pixel[3] = 255;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int x = 0; x + 1 < width_; x += 2) {
|
||||
const BYTE y0 = source[x * 2];
|
||||
const BYTE u = source[x * 2 + 1];
|
||||
const BYTE y1 = source[x * 2 + 2];
|
||||
const BYTE v = source[x * 2 + 3];
|
||||
const auto first = yuvToBgr(y0, u, v);
|
||||
const auto second = yuvToBgr(y1, u, v);
|
||||
BYTE* firstPixel = destination + x * 4;
|
||||
BYTE* secondPixel = firstPixel + 4;
|
||||
firstPixel[0] = first[0];
|
||||
firstPixel[1] = first[1];
|
||||
firstPixel[2] = first[2];
|
||||
firstPixel[3] = 255;
|
||||
secondPixel[0] = second[0];
|
||||
secondPixel[1] = second[1];
|
||||
secondPixel[2] = second[2];
|
||||
secondPixel[3] = 255;
|
||||
}
|
||||
if (width_ % 2 == 1) {
|
||||
const int x = width_ - 1;
|
||||
const int previousPairStart = ((x - 1) / 2) * 4;
|
||||
const BYTE y = source[x * 2];
|
||||
const BYTE u = source[previousPairStart + 1];
|
||||
const BYTE v = source[previousPairStart + 3];
|
||||
const auto color = yuvToBgr(y, u, v);
|
||||
BYTE* pixel = destination + x * 4;
|
||||
pixel[0] = color[0];
|
||||
pixel[1] = color[1];
|
||||
pixel[2] = color[2];
|
||||
pixel[3] = 255;
|
||||
}
|
||||
}
|
||||
|
||||
std::scoped_lock lock(frameMutex_);
|
||||
latestFrame_ = std::move(frame);
|
||||
latestFrameSequence_ += 1;
|
||||
}
|
||||
|
||||
bool DirectShowWebcamCapture::copyLatestFrame(WebcamFrameSnapshot& destination) {
|
||||
std::scoped_lock lock(frameMutex_);
|
||||
if (latestFrame_.empty() || width_ <= 0 || height_ <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
destination.data = latestFrame_;
|
||||
destination.width = width_;
|
||||
destination.height = height_;
|
||||
destination.sequence = latestFrameSequence_;
|
||||
return true;
|
||||
}
|
||||
|
||||
int DirectShowWebcamCapture::width() const {
|
||||
return width_;
|
||||
}
|
||||
|
||||
int DirectShowWebcamCapture::height() const {
|
||||
return height_;
|
||||
}
|
||||
|
||||
int DirectShowWebcamCapture::fps() const {
|
||||
return fps_;
|
||||
}
|
||||
|
||||
const std::wstring& DirectShowWebcamCapture::selectedDeviceName() const {
|
||||
return selectedDeviceName_;
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
#pragma once
|
||||
|
||||
#include <Windows.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
struct WebcamFrameSnapshot {
|
||||
std::vector<BYTE> data;
|
||||
int width = 0;
|
||||
int height = 0;
|
||||
uint64_t sequence = 0;
|
||||
};
|
||||
|
||||
class DirectShowWebcamCapture {
|
||||
public:
|
||||
DirectShowWebcamCapture() = default;
|
||||
~DirectShowWebcamCapture();
|
||||
|
||||
DirectShowWebcamCapture(const DirectShowWebcamCapture&) = delete;
|
||||
DirectShowWebcamCapture& operator=(const DirectShowWebcamCapture&) = delete;
|
||||
|
||||
bool initialize(
|
||||
const std::wstring& deviceId,
|
||||
const std::wstring& deviceName,
|
||||
const std::wstring& directShowClsid,
|
||||
int requestedWidth,
|
||||
int requestedHeight,
|
||||
int requestedFps);
|
||||
bool start();
|
||||
void stop();
|
||||
bool copyLatestFrame(WebcamFrameSnapshot& destination);
|
||||
|
||||
int width() const;
|
||||
int height() const;
|
||||
int fps() const;
|
||||
const std::wstring& selectedDeviceName() const;
|
||||
void storeFrame(const BYTE* buffer, long length);
|
||||
|
||||
private:
|
||||
enum class PixelFormat {
|
||||
Bgra,
|
||||
Nv12,
|
||||
Yuy2,
|
||||
};
|
||||
|
||||
struct Impl;
|
||||
void captureLoop();
|
||||
|
||||
Impl* impl_ = nullptr;
|
||||
std::thread thread_;
|
||||
std::atomic<bool> stopRequested_ = false;
|
||||
std::mutex frameMutex_;
|
||||
std::vector<BYTE> latestFrame_;
|
||||
uint64_t latestFrameSequence_ = 0;
|
||||
int width_ = 0;
|
||||
int height_ = 0;
|
||||
int fps_ = 30;
|
||||
int sourceStride_ = 0;
|
||||
bool sourceTopDown_ = false;
|
||||
PixelFormat pixelFormat_ = PixelFormat::Bgra;
|
||||
std::wstring selectedDeviceName_;
|
||||
};
|
||||
@@ -0,0 +1,91 @@
|
||||
#include <windows.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
|
||||
static HHOOK g_keyboardHook = nullptr;
|
||||
static DWORD g_mainThreadId = 0;
|
||||
static std::atomic<bool> g_ctrlDown{false};
|
||||
static std::mutex g_stdoutMutex;
|
||||
|
||||
static int64_t nowMs() {
|
||||
return static_cast<int64_t>(
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch())
|
||||
.count());
|
||||
}
|
||||
|
||||
static void writeJsonLine(const std::string& json) {
|
||||
std::lock_guard<std::mutex> lock(g_stdoutMutex);
|
||||
std::cout << json << '\n';
|
||||
std::cout.flush();
|
||||
}
|
||||
|
||||
static bool isCtrlKey(DWORD vkCode) {
|
||||
return vkCode == VK_CONTROL || vkCode == VK_LCONTROL || vkCode == VK_RCONTROL;
|
||||
}
|
||||
|
||||
static LRESULT CALLBACK LowLevelKeyboardProc(int nCode, WPARAM wParam, LPARAM lParam) {
|
||||
if (nCode >= 0) {
|
||||
const auto* event = reinterpret_cast<KBDLLHOOKSTRUCT*>(lParam);
|
||||
if (event && isCtrlKey(event->vkCode)) {
|
||||
if (wParam == WM_KEYDOWN || wParam == WM_SYSKEYDOWN) {
|
||||
const bool wasDown = g_ctrlDown.exchange(true, std::memory_order_acq_rel);
|
||||
if (!wasDown) {
|
||||
writeJsonLine(
|
||||
"{\"event\":\"guide-hotkey\",\"key\":\"control\",\"state\":\"down\",\"timeMs\":" +
|
||||
std::to_string(nowMs()) + "}");
|
||||
}
|
||||
} else if (wParam == WM_KEYUP || wParam == WM_SYSKEYUP) {
|
||||
g_ctrlDown.store(false, std::memory_order_release);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return CallNextHookEx(g_keyboardHook, nCode, wParam, lParam);
|
||||
}
|
||||
|
||||
static BOOL WINAPI consoleCtrlHandler(DWORD signal) {
|
||||
if (
|
||||
signal == CTRL_C_EVENT ||
|
||||
signal == CTRL_BREAK_EVENT ||
|
||||
signal == CTRL_CLOSE_EVENT ||
|
||||
signal == CTRL_LOGOFF_EVENT ||
|
||||
signal == CTRL_SHUTDOWN_EVENT
|
||||
) {
|
||||
PostThreadMessage(g_mainThreadId, WM_QUIT, 0, 0);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
int main() {
|
||||
g_mainThreadId = GetCurrentThreadId();
|
||||
SetConsoleCtrlHandler(consoleCtrlHandler, TRUE);
|
||||
|
||||
g_keyboardHook = SetWindowsHookExW(WH_KEYBOARD_LL, LowLevelKeyboardProc, GetModuleHandleW(nullptr), 0);
|
||||
if (!g_keyboardHook) {
|
||||
std::cerr << "Failed to install guide hotkey keyboard hook. error=" << GetLastError() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
writeJsonLine("{\"event\":\"ready\"}");
|
||||
|
||||
MSG msg{};
|
||||
while (GetMessageW(&msg, nullptr, 0, 0) > 0) {
|
||||
TranslateMessage(&msg);
|
||||
DispatchMessageW(&msg);
|
||||
}
|
||||
|
||||
if (g_keyboardHook) {
|
||||
UnhookWindowsHookEx(g_keyboardHook);
|
||||
g_keyboardHook = nullptr;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,860 @@
|
||||
#include "audio_sample_utils.h"
|
||||
#include "mf_encoder.h"
|
||||
#include "monitor_utils.h"
|
||||
#include "wasapi_loopback_capture.h"
|
||||
#include "webcam_capture.h"
|
||||
#include "wgc_session.h"
|
||||
|
||||
#include <winrt/Windows.Foundation.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <condition_variable>
|
||||
#include <cctype>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
namespace {
|
||||
|
||||
struct CaptureConfig {
|
||||
int schemaVersion = 1;
|
||||
int64_t displayId = 0;
|
||||
int64_t recordingId = 0;
|
||||
std::string sourceType = "display";
|
||||
std::string sourceId;
|
||||
std::string windowHandle;
|
||||
std::string outputPath;
|
||||
std::string webcamOutputPath;
|
||||
int fps = 60;
|
||||
int width = 0;
|
||||
int height = 0;
|
||||
MonitorBounds bounds{};
|
||||
bool hasDisplayBounds = false;
|
||||
bool captureSystemAudio = false;
|
||||
bool captureMic = false;
|
||||
bool captureCursor = false;
|
||||
bool webcamEnabled = false;
|
||||
std::string microphoneDeviceId;
|
||||
std::string microphoneDeviceName;
|
||||
double microphoneGain = 1.0;
|
||||
std::string webcamDeviceId;
|
||||
std::string webcamDeviceName;
|
||||
std::string webcamDirectShowClsid;
|
||||
int webcamWidth = 0;
|
||||
int webcamHeight = 0;
|
||||
int webcamFps = 0;
|
||||
};
|
||||
|
||||
struct CaptureControl {
|
||||
std::atomic<bool> stopRequested = false;
|
||||
std::atomic<bool> paused = false;
|
||||
std::mutex mutex;
|
||||
std::condition_variable cv;
|
||||
std::chrono::steady_clock::time_point pauseStartedAt;
|
||||
std::chrono::steady_clock::duration totalPausedDuration{};
|
||||
|
||||
int64_t pausedDurationHns() {
|
||||
std::scoped_lock lock(mutex);
|
||||
auto total = totalPausedDuration;
|
||||
if (paused.load()) {
|
||||
total += std::chrono::steady_clock::now() - pauseStartedAt;
|
||||
}
|
||||
return std::chrono::duration_cast<std::chrono::nanoseconds>(total).count() / 100;
|
||||
}
|
||||
|
||||
void setPaused(bool nextPaused) {
|
||||
std::scoped_lock lock(mutex);
|
||||
if (nextPaused == paused.load()) {
|
||||
return;
|
||||
}
|
||||
if (nextPaused) {
|
||||
pauseStartedAt = std::chrono::steady_clock::now();
|
||||
} else {
|
||||
totalPausedDuration += std::chrono::steady_clock::now() - pauseStartedAt;
|
||||
}
|
||||
paused = nextPaused;
|
||||
}
|
||||
};
|
||||
|
||||
std::wstring utf8ToWide(const std::string& value) {
|
||||
if (value.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const int size = MultiByteToWideChar(CP_UTF8, 0, value.data(), static_cast<int>(value.size()), nullptr, 0);
|
||||
std::wstring result(static_cast<size_t>(size), L'\0');
|
||||
MultiByteToWideChar(CP_UTF8, 0, value.data(), static_cast<int>(value.size()), result.data(), size);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string wideToUtf8(const std::wstring& value) {
|
||||
if (value.empty()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const int size = WideCharToMultiByte(CP_UTF8, 0, value.data(), static_cast<int>(value.size()), nullptr, 0, nullptr, nullptr);
|
||||
std::string result(static_cast<size_t>(size), '\0');
|
||||
WideCharToMultiByte(CP_UTF8, 0, value.data(), static_cast<int>(value.size()), result.data(), size, nullptr, nullptr);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string jsonEscape(const std::string& value) {
|
||||
std::string result;
|
||||
result.reserve(value.size());
|
||||
for (const char c : value) {
|
||||
switch (c) {
|
||||
case '\\':
|
||||
result += "\\\\";
|
||||
break;
|
||||
case '"':
|
||||
result += "\\\"";
|
||||
break;
|
||||
case '\n':
|
||||
result += "\\n";
|
||||
break;
|
||||
case '\r':
|
||||
result += "\\r";
|
||||
break;
|
||||
case '\t':
|
||||
result += "\\t";
|
||||
break;
|
||||
default:
|
||||
result.push_back(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool hasVisibleBgraContent(const std::vector<BYTE>& frame) {
|
||||
if (frame.size() < 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t lumaTotal = 0;
|
||||
BYTE maxLuma = 0;
|
||||
const size_t pixelCount = frame.size() / 4;
|
||||
const size_t step = std::max<size_t>(1, pixelCount / 4096);
|
||||
size_t sampledPixels = 0;
|
||||
for (size_t pixel = 0; pixel < pixelCount; pixel += step) {
|
||||
const size_t offset = pixel * 4;
|
||||
const BYTE b = frame[offset + 0];
|
||||
const BYTE g = frame[offset + 1];
|
||||
const BYTE r = frame[offset + 2];
|
||||
const BYTE luma = static_cast<BYTE>((static_cast<uint16_t>(r) * 54 + static_cast<uint16_t>(g) * 183 + static_cast<uint16_t>(b) * 19) >> 8);
|
||||
lumaTotal += luma;
|
||||
maxLuma = std::max(maxLuma, luma);
|
||||
sampledPixels += 1;
|
||||
}
|
||||
|
||||
const uint64_t averageLuma = sampledPixels > 0 ? lumaTotal / sampledPixels : 0;
|
||||
return maxLuma > 24 || averageLuma > 4;
|
||||
}
|
||||
|
||||
bool findBool(const std::string& json, const std::string& key, bool fallback) {
|
||||
auto pos = json.find("\"" + key + "\"");
|
||||
if (pos == std::string::npos) {
|
||||
return fallback;
|
||||
}
|
||||
pos = json.find(':', pos);
|
||||
if (pos == std::string::npos) {
|
||||
return fallback;
|
||||
}
|
||||
pos += 1;
|
||||
while (pos < json.size() && std::isspace(static_cast<unsigned char>(json[pos]))) {
|
||||
pos += 1;
|
||||
}
|
||||
if (json.compare(pos, 4, "true") == 0) {
|
||||
return true;
|
||||
}
|
||||
if (json.compare(pos, 5, "false") == 0) {
|
||||
return false;
|
||||
}
|
||||
return fallback;
|
||||
}
|
||||
|
||||
int64_t findInt64(const std::string& json, const std::string& key, int64_t fallback) {
|
||||
auto pos = json.find("\"" + key + "\"");
|
||||
if (pos == std::string::npos) {
|
||||
return fallback;
|
||||
}
|
||||
pos = json.find(':', pos);
|
||||
if (pos == std::string::npos) {
|
||||
return fallback;
|
||||
}
|
||||
pos += 1;
|
||||
while (pos < json.size() && std::isspace(static_cast<unsigned char>(json[pos]))) {
|
||||
pos += 1;
|
||||
}
|
||||
try {
|
||||
return std::stoll(json.substr(pos));
|
||||
} catch (...) {
|
||||
return fallback;
|
||||
}
|
||||
}
|
||||
|
||||
int findInt(const std::string& json, const std::string& key, int fallback) {
|
||||
return static_cast<int>(findInt64(json, key, fallback));
|
||||
}
|
||||
|
||||
double findDouble(const std::string& json, const std::string& key, double fallback) {
|
||||
auto pos = json.find("\"" + key + "\"");
|
||||
if (pos == std::string::npos) {
|
||||
return fallback;
|
||||
}
|
||||
pos = json.find(':', pos);
|
||||
if (pos == std::string::npos) {
|
||||
return fallback;
|
||||
}
|
||||
pos += 1;
|
||||
while (pos < json.size() && std::isspace(static_cast<unsigned char>(json[pos]))) {
|
||||
pos += 1;
|
||||
}
|
||||
try {
|
||||
return std::stod(json.substr(pos));
|
||||
} catch (...) {
|
||||
return fallback;
|
||||
}
|
||||
}
|
||||
|
||||
std::string findString(const std::string& json, const std::string& key) {
|
||||
auto pos = json.find("\"" + key + "\"");
|
||||
if (pos == std::string::npos) {
|
||||
return {};
|
||||
}
|
||||
pos = json.find(':', pos);
|
||||
if (pos == std::string::npos) {
|
||||
return {};
|
||||
}
|
||||
pos += 1;
|
||||
while (pos < json.size() && std::isspace(static_cast<unsigned char>(json[pos]))) {
|
||||
pos += 1;
|
||||
}
|
||||
if (pos >= json.size() || json[pos] != '"') {
|
||||
return {};
|
||||
}
|
||||
pos += 1;
|
||||
|
||||
std::string result;
|
||||
while (pos < json.size()) {
|
||||
const char c = json[pos++];
|
||||
if (c == '"') {
|
||||
break;
|
||||
}
|
||||
if (c == '\\' && pos < json.size()) {
|
||||
const char escaped = json[pos++];
|
||||
switch (escaped) {
|
||||
case '\\':
|
||||
case '"':
|
||||
case '/':
|
||||
result.push_back(escaped);
|
||||
break;
|
||||
case 'n':
|
||||
result.push_back('\n');
|
||||
break;
|
||||
case 'r':
|
||||
result.push_back('\r');
|
||||
break;
|
||||
case 't':
|
||||
result.push_back('\t');
|
||||
break;
|
||||
default:
|
||||
result.push_back(escaped);
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
result.push_back(c);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string parseWindowHandleFromSourceId(const std::string& sourceId) {
|
||||
constexpr char prefix[] = "window:";
|
||||
if (sourceId.rfind(prefix, 0) != 0) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const size_t start = sizeof(prefix) - 1;
|
||||
const size_t end = sourceId.find(':', start);
|
||||
const std::string handle = sourceId.substr(start, end == std::string::npos ? std::string::npos : end - start);
|
||||
return handle.empty() ? std::string{} : handle;
|
||||
}
|
||||
|
||||
HWND parseWindowHandle(const std::string& value) {
|
||||
if (value.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
try {
|
||||
size_t parsed = 0;
|
||||
const int base = value.rfind("0x", 0) == 0 || value.rfind("0X", 0) == 0 ? 16 : 10;
|
||||
const uint64_t handleValue = std::stoull(value, &parsed, base);
|
||||
if (parsed != value.size() || handleValue == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
return reinterpret_cast<HWND>(static_cast<uintptr_t>(handleValue));
|
||||
} catch (...) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool parseConfig(const std::string& json, CaptureConfig& config) {
|
||||
config.schemaVersion = findInt(json, "schemaVersion", 1);
|
||||
config.outputPath = findString(json, "screenPath");
|
||||
if (config.outputPath.empty()) {
|
||||
config.outputPath = findString(json, "outputPath");
|
||||
}
|
||||
if (config.outputPath.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
config.recordingId = findInt64(json, "recordingId", 0);
|
||||
config.sourceType = findString(json, "sourceType");
|
||||
if (config.sourceType.empty()) {
|
||||
config.sourceType = "display";
|
||||
}
|
||||
config.sourceId = findString(json, "sourceId");
|
||||
config.windowHandle = findString(json, "windowHandle");
|
||||
if (config.windowHandle.empty()) {
|
||||
config.windowHandle = parseWindowHandleFromSourceId(config.sourceId);
|
||||
}
|
||||
config.displayId = findInt64(json, "displayId", 0);
|
||||
config.fps = std::clamp(findInt(json, "fps", 60), 1, 120);
|
||||
config.width = findInt(json, "videoWidth", findInt(json, "width", 0));
|
||||
config.height = findInt(json, "videoHeight", findInt(json, "height", 0));
|
||||
config.bounds.x = findInt(json, "displayX", 0);
|
||||
config.bounds.y = findInt(json, "displayY", 0);
|
||||
config.bounds.width = findInt(json, "displayW", 0);
|
||||
config.bounds.height = findInt(json, "displayH", 0);
|
||||
config.hasDisplayBounds = findBool(json, "hasDisplayBounds", false);
|
||||
config.captureSystemAudio = findBool(json, "captureSystemAudio", false);
|
||||
config.captureMic = findBool(json, "captureMic", false);
|
||||
config.captureCursor = findBool(json, "captureCursor", false);
|
||||
config.webcamEnabled = findBool(json, "webcamEnabled", false);
|
||||
config.microphoneDeviceId = findString(json, "microphoneDeviceId");
|
||||
config.microphoneDeviceName = findString(json, "microphoneDeviceName");
|
||||
config.microphoneGain = findDouble(json, "microphoneGain", 1.0);
|
||||
config.webcamDeviceId = findString(json, "webcamDeviceId");
|
||||
config.webcamDeviceName = findString(json, "webcamDeviceName");
|
||||
config.webcamDirectShowClsid = findString(json, "webcamDirectShowClsid");
|
||||
config.webcamOutputPath = findString(json, "webcamPath");
|
||||
config.webcamWidth = findInt(json, "webcamWidth", 0);
|
||||
config.webcamHeight = findInt(json, "webcamHeight", 0);
|
||||
config.webcamFps = findInt(json, "webcamFps", 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
void readCaptureCommands(CaptureControl& control, const std::function<void(bool)>& onPauseChanged) {
|
||||
std::string line;
|
||||
while (std::getline(std::cin, line)) {
|
||||
if (line == "stop" || line == "q" || line == "quit") {
|
||||
control.stopRequested = true;
|
||||
control.cv.notify_all();
|
||||
return;
|
||||
}
|
||||
if (line == "pause") {
|
||||
control.setPaused(true);
|
||||
onPauseChanged(true);
|
||||
std::cout << "{\"event\":\"recording-paused\",\"schemaVersion\":2}" << std::endl;
|
||||
control.cv.notify_all();
|
||||
continue;
|
||||
}
|
||||
if (line == "resume") {
|
||||
control.setPaused(false);
|
||||
onPauseChanged(false);
|
||||
std::cout << "{\"event\":\"recording-resumed\",\"schemaVersion\":2}" << std::endl;
|
||||
control.cv.notify_all();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
control.stopRequested = true;
|
||||
control.cv.notify_all();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
if (argc < 2) {
|
||||
std::cerr << "ERROR: Missing JSON config argument" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
winrt::init_apartment(winrt::apartment_type::multi_threaded);
|
||||
|
||||
CaptureConfig config;
|
||||
if (!parseConfig(argv[1], config)) {
|
||||
std::cerr << "ERROR: Failed to parse config JSON" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::cout << "{\"event\":\"ready\",\"schemaVersion\":2}" << std::endl;
|
||||
|
||||
WgcSession session;
|
||||
if (config.sourceType == "display") {
|
||||
HMONITOR monitor = findMonitorForCapture(
|
||||
config.displayId,
|
||||
config.sourceId,
|
||||
config.hasDisplayBounds ? &config.bounds : nullptr);
|
||||
if (!monitor) {
|
||||
std::cerr << "ERROR: Could not resolve monitor" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
if (!session.initialize(monitor, config.fps, config.captureCursor)) {
|
||||
std::cerr << "ERROR: Failed to initialize WGC display session" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
} else if (config.sourceType == "window") {
|
||||
HWND window = parseWindowHandle(config.windowHandle);
|
||||
if (!window || !IsWindow(window)) {
|
||||
std::cerr << "ERROR: Native window capture requires a valid HWND" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
if (!session.initialize(window, config.fps, config.captureCursor)) {
|
||||
std::cerr << "ERROR: Failed to initialize WGC window session" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
std::cerr << "ERROR: Unsupported native capture source type: " << config.sourceType << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// WGC owns the captured texture size. Encoding must use that exact size
|
||||
// until a dedicated GPU scaling pass is introduced; CopyResource requires
|
||||
// matching resource dimensions.
|
||||
int width = session.captureWidth();
|
||||
int height = session.captureHeight();
|
||||
width = (std::max(2, width) / 2) * 2;
|
||||
height = (std::max(2, height) / 2) * 2;
|
||||
|
||||
const int pixels = width * height;
|
||||
const int bitrate = pixels >= 3840 * 2160 ? 45'000'000 : pixels >= 2560 * 1440 ? 28'000'000 : 18'000'000;
|
||||
|
||||
WebcamCapture webcamCapture;
|
||||
bool webcamActive = false;
|
||||
bool writeSeparateWebcam = false;
|
||||
if (config.webcamEnabled) {
|
||||
if (!webcamCapture.initialize(
|
||||
utf8ToWide(config.webcamDeviceId),
|
||||
utf8ToWide(config.webcamDeviceName),
|
||||
utf8ToWide(config.webcamDirectShowClsid),
|
||||
config.webcamWidth,
|
||||
config.webcamHeight,
|
||||
config.webcamFps > 0 ? config.webcamFps : config.fps)) {
|
||||
std::cerr << "ERROR: Failed to initialize native webcam capture" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::cout << "{\"event\":\"webcam-format\",\"schemaVersion\":2,\"width\":" << webcamCapture.width()
|
||||
<< ",\"height\":" << webcamCapture.height()
|
||||
<< ",\"fps\":" << webcamCapture.fps()
|
||||
<< ",\"deviceName\":\"" << jsonEscape(wideToUtf8(webcamCapture.selectedDeviceName()))
|
||||
<< "\"}" << std::endl;
|
||||
writeSeparateWebcam = !config.webcamOutputPath.empty();
|
||||
}
|
||||
|
||||
WasapiLoopbackCapture loopbackCapture;
|
||||
WasapiLoopbackCapture microphoneCapture;
|
||||
const AudioInputFormat* audioFormat = nullptr;
|
||||
AudioInputFormat encoderAudioFormat{};
|
||||
AudioInputFormat systemAudioFormat{};
|
||||
AudioInputFormat microphoneAudioFormat{};
|
||||
if (config.captureSystemAudio) {
|
||||
if (!loopbackCapture.initializeSystemLoopback()) {
|
||||
std::cerr << "ERROR: Failed to initialize WASAPI loopback capture" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
systemAudioFormat = loopbackCapture.inputFormat();
|
||||
audioFormat = &loopbackCapture.inputFormat();
|
||||
}
|
||||
if (config.captureMic) {
|
||||
if (!microphoneCapture.initializeMicrophone(
|
||||
utf8ToWide(config.microphoneDeviceId),
|
||||
utf8ToWide(config.microphoneDeviceName))) {
|
||||
std::cerr << "ERROR: Failed to initialize WASAPI microphone capture" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
microphoneAudioFormat = microphoneCapture.inputFormat();
|
||||
if (!audioFormat) {
|
||||
audioFormat = µphoneCapture.inputFormat();
|
||||
}
|
||||
}
|
||||
if (audioFormat) {
|
||||
std::cout << "{\"event\":\"audio-format\",\"schemaVersion\":2,\"sampleRate\":" << audioFormat->sampleRate
|
||||
<< ",\"channels\":" << audioFormat->channels
|
||||
<< ",\"bitsPerSample\":" << audioFormat->bitsPerSample
|
||||
<< ",\"system\":" << (config.captureSystemAudio ? "true" : "false")
|
||||
<< ",\"microphone\":" << (config.captureMic ? "true" : "false");
|
||||
if (config.captureMic) {
|
||||
std::cout << ",\"microphoneDeviceName\":\""
|
||||
<< jsonEscape(wideToUtf8(microphoneCapture.selectedDeviceName())) << "\"";
|
||||
}
|
||||
std::cout << "}" << std::endl;
|
||||
encoderAudioFormat = makeAacCompatibleAudioFormat(*audioFormat);
|
||||
std::cout << "{\"event\":\"encoder-audio-format\",\"schemaVersion\":2,\"sampleRate\":"
|
||||
<< encoderAudioFormat.sampleRate
|
||||
<< ",\"channels\":" << encoderAudioFormat.channels
|
||||
<< ",\"bitsPerSample\":" << encoderAudioFormat.bitsPerSample
|
||||
<< "}" << std::endl;
|
||||
}
|
||||
|
||||
MFEncoder encoder;
|
||||
if (!encoder.initialize(
|
||||
utf8ToWide(config.outputPath),
|
||||
width,
|
||||
height,
|
||||
config.fps,
|
||||
bitrate,
|
||||
session.device(),
|
||||
session.context(),
|
||||
audioFormat ? &encoderAudioFormat : nullptr)) {
|
||||
std::cerr << "ERROR: Failed to initialize Media Foundation encoder" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
MFEncoder webcamEncoder;
|
||||
if (writeSeparateWebcam) {
|
||||
const int webcamPixels = std::max(1, webcamCapture.width()) * std::max(1, webcamCapture.height());
|
||||
const int webcamBitrate = webcamPixels >= 1280 * 720 ? 8'000'000 : 4'000'000;
|
||||
if (!webcamEncoder.initialize(
|
||||
utf8ToWide(config.webcamOutputPath),
|
||||
webcamCapture.width(),
|
||||
webcamCapture.height(),
|
||||
webcamCapture.fps(),
|
||||
webcamBitrate,
|
||||
session.device(),
|
||||
session.context(),
|
||||
nullptr)) {
|
||||
std::cerr << "ERROR: Failed to initialize native webcam encoder" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
std::mutex mutex;
|
||||
CaptureControl control;
|
||||
std::atomic<bool> firstFrameWritten = false;
|
||||
std::atomic<bool> encodeFailed = false;
|
||||
Microsoft::WRL::ComPtr<ID3D11Texture2D> latestFrameTexture;
|
||||
int64_t latestFrameTimestampHns = 0;
|
||||
int64_t firstFrameTimestampHns = -1;
|
||||
std::vector<BYTE> latestWebcamFrame;
|
||||
int latestWebcamWidth = 0;
|
||||
int latestWebcamHeight = 0;
|
||||
uint64_t latestWebcamSequence = 0;
|
||||
bool hasVisibleWebcamFrame = false;
|
||||
|
||||
session.setFrameCallback([&](ID3D11Texture2D* texture, int64_t timestampHns) {
|
||||
if (control.stopRequested || control.paused) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::scoped_lock lock(mutex);
|
||||
if (!latestFrameTexture) {
|
||||
D3D11_TEXTURE2D_DESC desc{};
|
||||
texture->GetDesc(&desc);
|
||||
desc.BindFlags = 0;
|
||||
desc.CPUAccessFlags = 0;
|
||||
desc.MiscFlags = 0;
|
||||
if (FAILED(session.device()->CreateTexture2D(&desc, nullptr, &latestFrameTexture))) {
|
||||
encodeFailed = true;
|
||||
control.stopRequested = true;
|
||||
control.cv.notify_all();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
session.context()->CopyResource(latestFrameTexture.Get(), texture);
|
||||
latestFrameTimestampHns = timestampHns;
|
||||
if (!firstFrameWritten.exchange(true)) {
|
||||
control.cv.notify_all();
|
||||
}
|
||||
});
|
||||
|
||||
auto writeVideoFrames = [&]() {
|
||||
const auto frameDuration = std::chrono::duration_cast<std::chrono::steady_clock::duration>(
|
||||
std::chrono::duration<double>(1.0 / config.fps));
|
||||
uint64_t frameIndex = 0;
|
||||
uint64_t lastWrittenWebcamSequence = 0;
|
||||
uint64_t webcamOutputFrameIndex = 0;
|
||||
int64_t lastEncodedVideoTimestampHns = -1;
|
||||
|
||||
while (!control.stopRequested && !encodeFailed) {
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
control.cv.wait(lock, [&] {
|
||||
return control.stopRequested.load() ||
|
||||
encodeFailed.load() ||
|
||||
(!control.paused.load() && latestFrameTexture);
|
||||
});
|
||||
if (control.stopRequested || encodeFailed) {
|
||||
break;
|
||||
}
|
||||
if (webcamActive) {
|
||||
WebcamFrameSnapshot candidateWebcamFrame;
|
||||
if (webcamCapture.copyLatestFrame(candidateWebcamFrame) &&
|
||||
candidateWebcamFrame.sequence != latestWebcamSequence &&
|
||||
hasVisibleBgraContent(candidateWebcamFrame.data)) {
|
||||
latestWebcamFrame = std::move(candidateWebcamFrame.data);
|
||||
latestWebcamWidth = candidateWebcamFrame.width;
|
||||
latestWebcamHeight = candidateWebcamFrame.height;
|
||||
latestWebcamSequence = candidateWebcamFrame.sequence;
|
||||
hasVisibleWebcamFrame = true;
|
||||
}
|
||||
}
|
||||
const BgraFrameView webcamFrame{
|
||||
hasVisibleWebcamFrame && !latestWebcamFrame.empty() ? latestWebcamFrame.data() : nullptr,
|
||||
latestWebcamWidth,
|
||||
latestWebcamHeight,
|
||||
};
|
||||
const int64_t syntheticTimestampHns =
|
||||
static_cast<int64_t>((frameIndex * 10'000'000ULL) / config.fps);
|
||||
const int64_t sourceTimestampHns =
|
||||
latestFrameTimestampHns > 0 ? latestFrameTimestampHns : syntheticTimestampHns;
|
||||
if (firstFrameTimestampHns < 0) {
|
||||
firstFrameTimestampHns = sourceTimestampHns;
|
||||
}
|
||||
int64_t frameTimestampHns =
|
||||
std::max<int64_t>(
|
||||
0,
|
||||
sourceTimestampHns - firstFrameTimestampHns - control.pausedDurationHns());
|
||||
if (lastEncodedVideoTimestampHns >= 0 &&
|
||||
frameTimestampHns <= lastEncodedVideoTimestampHns) {
|
||||
frameTimestampHns =
|
||||
lastEncodedVideoTimestampHns + static_cast<int64_t>(10'000'000ULL / config.fps);
|
||||
}
|
||||
if (writeSeparateWebcam && webcamFrame.data &&
|
||||
latestWebcamSequence != lastWrittenWebcamSequence) {
|
||||
const int64_t webcamTimestampHns = static_cast<int64_t>(
|
||||
(webcamOutputFrameIndex * 10'000'000ULL) / std::max(1, webcamCapture.fps()));
|
||||
if (!webcamEncoder.writeBgraFrame(webcamFrame, webcamTimestampHns)) {
|
||||
encodeFailed = true;
|
||||
control.stopRequested = true;
|
||||
control.cv.notify_all();
|
||||
return;
|
||||
}
|
||||
lastWrittenWebcamSequence = latestWebcamSequence;
|
||||
webcamOutputFrameIndex += 1;
|
||||
}
|
||||
if (latestFrameTexture && !encoder.writeFrame(
|
||||
latestFrameTexture.Get(),
|
||||
frameTimestampHns,
|
||||
!writeSeparateWebcam && webcamFrame.data ? &webcamFrame : nullptr)) {
|
||||
encodeFailed = true;
|
||||
control.stopRequested = true;
|
||||
control.cv.notify_all();
|
||||
return;
|
||||
}
|
||||
if (latestFrameTexture) {
|
||||
lastEncodedVideoTimestampHns = frameTimestampHns;
|
||||
}
|
||||
}
|
||||
|
||||
frameIndex += 1;
|
||||
std::this_thread::sleep_for(frameDuration);
|
||||
}
|
||||
};
|
||||
|
||||
std::thread videoWriterThread;
|
||||
|
||||
auto stopVideoWriter = [&]() {
|
||||
if (videoWriterThread.joinable()) {
|
||||
videoWriterThread.join();
|
||||
}
|
||||
};
|
||||
|
||||
auto startVideoWriter = [&]() {
|
||||
videoWriterThread = std::thread(writeVideoFrames);
|
||||
};
|
||||
|
||||
std::unique_ptr<AudioMixer> audioMixer;
|
||||
auto startAudioCaptures = [&]() -> bool {
|
||||
if (!audioFormat) {
|
||||
return true;
|
||||
}
|
||||
|
||||
audioMixer = std::make_unique<AudioMixer>(
|
||||
encoderAudioFormat,
|
||||
config.captureSystemAudio ? systemAudioFormat : encoderAudioFormat,
|
||||
config.captureMic ? microphoneAudioFormat : encoderAudioFormat,
|
||||
config.captureSystemAudio,
|
||||
config.captureMic,
|
||||
config.microphoneGain,
|
||||
[&](const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns) {
|
||||
if (!encoder.writeAudio(data, byteCount, timestampHns, durationHns)) {
|
||||
encodeFailed = true;
|
||||
control.stopRequested = true;
|
||||
control.cv.notify_all();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
if (!audioMixer->start()) {
|
||||
std::cerr << "ERROR: Failed to start native audio mixer" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (config.captureMic) {
|
||||
if (!microphoneCapture.start([&](const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns) {
|
||||
(void)timestampHns;
|
||||
(void)durationHns;
|
||||
if (control.stopRequested || !audioMixer) {
|
||||
return;
|
||||
}
|
||||
|
||||
audioMixer->pushMicrophone(data, byteCount);
|
||||
})) {
|
||||
std::cerr << "ERROR: Failed to start WASAPI microphone capture" << std::endl;
|
||||
audioMixer->stop();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (config.captureSystemAudio) {
|
||||
if (!loopbackCapture.start([&](const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns) {
|
||||
(void)timestampHns;
|
||||
(void)durationHns;
|
||||
if (control.stopRequested || !audioMixer) {
|
||||
return;
|
||||
}
|
||||
|
||||
audioMixer->pushSystem(data, byteCount);
|
||||
})) {
|
||||
std::cerr << "ERROR: Failed to start WASAPI loopback capture" << std::endl;
|
||||
microphoneCapture.stop();
|
||||
audioMixer->stop();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
if (!startAudioCaptures()) {
|
||||
return 1;
|
||||
}
|
||||
if (config.webcamEnabled) {
|
||||
if (!webcamCapture.start()) {
|
||||
microphoneCapture.stop();
|
||||
loopbackCapture.stop();
|
||||
if (audioMixer) {
|
||||
audioMixer->stop();
|
||||
}
|
||||
std::cerr << "ERROR: Failed to start native webcam capture" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
webcamActive = true;
|
||||
const auto webcamDeadline = std::chrono::steady_clock::now() + std::chrono::seconds(3);
|
||||
while (std::chrono::steady_clock::now() < webcamDeadline && !hasVisibleWebcamFrame) {
|
||||
WebcamFrameSnapshot candidateWebcamFrame;
|
||||
if (webcamCapture.copyLatestFrame(candidateWebcamFrame) &&
|
||||
hasVisibleBgraContent(candidateWebcamFrame.data)) {
|
||||
latestWebcamFrame = std::move(candidateWebcamFrame.data);
|
||||
latestWebcamWidth = candidateWebcamFrame.width;
|
||||
latestWebcamHeight = candidateWebcamFrame.height;
|
||||
latestWebcamSequence = candidateWebcamFrame.sequence;
|
||||
hasVisibleWebcamFrame = true;
|
||||
break;
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(20));
|
||||
}
|
||||
if (!hasVisibleWebcamFrame) {
|
||||
std::cerr << "WARNING: Native webcam started but no visible frame was available before screen capture"
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if (!session.start()) {
|
||||
webcamCapture.stop();
|
||||
microphoneCapture.stop();
|
||||
loopbackCapture.stop();
|
||||
if (audioMixer) {
|
||||
audioMixer->stop();
|
||||
}
|
||||
std::cerr << "ERROR: Failed to start WGC session" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::thread stdinThread(readCaptureCommands, std::ref(control), [&](bool isPaused) {
|
||||
if (audioMixer) {
|
||||
audioMixer->setPaused(isPaused);
|
||||
}
|
||||
});
|
||||
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
const bool started = control.cv.wait_for(lock, std::chrono::seconds(10), [&] {
|
||||
return firstFrameWritten.load() || control.stopRequested.load();
|
||||
});
|
||||
if (!started || !firstFrameWritten) {
|
||||
control.stopRequested = true;
|
||||
control.cv.notify_all();
|
||||
if (stdinThread.joinable()) {
|
||||
stdinThread.detach();
|
||||
}
|
||||
microphoneCapture.stop();
|
||||
loopbackCapture.stop();
|
||||
webcamCapture.stop();
|
||||
if (audioMixer) {
|
||||
audioMixer->stop();
|
||||
}
|
||||
session.stop();
|
||||
std::cerr << "ERROR: Timed out waiting for first WGC frame" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (audioMixer) {
|
||||
audioMixer->beginTimeline();
|
||||
}
|
||||
startVideoWriter();
|
||||
|
||||
std::cout << "{\"event\":\"recording-started\",\"schemaVersion\":2}" << std::endl;
|
||||
std::cout << "Recording started" << std::endl;
|
||||
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
control.cv.wait(lock, [&] {
|
||||
return control.stopRequested.load();
|
||||
});
|
||||
}
|
||||
|
||||
microphoneCapture.stop();
|
||||
loopbackCapture.stop();
|
||||
webcamCapture.stop();
|
||||
if (audioMixer) {
|
||||
audioMixer->stop();
|
||||
}
|
||||
stopVideoWriter();
|
||||
session.stop();
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
encoder.finalize();
|
||||
if (writeSeparateWebcam) {
|
||||
webcamEncoder.finalize();
|
||||
}
|
||||
}
|
||||
|
||||
if (stdinThread.joinable()) {
|
||||
stdinThread.detach();
|
||||
}
|
||||
|
||||
if (encodeFailed) {
|
||||
std::cerr << "ERROR: Failed to encode WGC frame" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::cout << "{\"event\":\"recording-stopped\",\"schemaVersion\":2,\"screenPath\":\""
|
||||
<< jsonEscape(config.outputPath) << "\"";
|
||||
if (writeSeparateWebcam) {
|
||||
std::cout << ",\"webcamPath\":\"" << jsonEscape(config.webcamOutputPath) << "\"";
|
||||
}
|
||||
std::cout << "}" << std::endl;
|
||||
std::cout << "Recording stopped. Output path: " << config.outputPath << std::endl;
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,450 @@
|
||||
#include "mf_encoder.h"
|
||||
|
||||
#include "audio_sample_utils.h"
|
||||
|
||||
#include <mfapi.h>
|
||||
#include <mferror.h>
|
||||
#include <propvarutil.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
|
||||
namespace {
|
||||
|
||||
bool succeeded(HRESULT hr, const char* label) {
|
||||
if (SUCCEEDED(hr)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::cerr << "ERROR: " << label << " failed (hr=0x" << std::hex << hr << std::dec << ")"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
void setFrameSize(IMFMediaType* type, UINT32 width, UINT32 height) {
|
||||
MFSetAttributeSize(type, MF_MT_FRAME_SIZE, width, height);
|
||||
}
|
||||
|
||||
void setFrameRate(IMFMediaType* type, UINT32 fps) {
|
||||
MFSetAttributeRatio(type, MF_MT_FRAME_RATE, fps, 1);
|
||||
}
|
||||
|
||||
void setPixelAspectRatio(IMFMediaType* type) {
|
||||
MFSetAttributeRatio(type, MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
|
||||
}
|
||||
|
||||
void setAudioFormat(IMFMediaType* type, UINT32 channels, UINT32 sampleRate, UINT32 bitsPerSample) {
|
||||
type->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, channels);
|
||||
type->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, sampleRate);
|
||||
type->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, bitsPerSample);
|
||||
}
|
||||
|
||||
void compositeWebcam(BYTE* destination, int width, int height, const BgraFrameView& webcamFrame) {
|
||||
if (!webcamFrame.data || webcamFrame.width <= 0 || webcamFrame.height <= 0 || width <= 0 || height <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int margin = std::max(16, std::min(width, height) / 60);
|
||||
const int maxOverlayWidth = std::max(2, width / 4);
|
||||
int overlayWidth = maxOverlayWidth;
|
||||
int overlayHeight = static_cast<int>(
|
||||
(static_cast<int64_t>(overlayWidth) * webcamFrame.height) / std::max(1, webcamFrame.width));
|
||||
const int maxOverlayHeight = std::max(2, height / 3);
|
||||
if (overlayHeight > maxOverlayHeight) {
|
||||
overlayHeight = maxOverlayHeight;
|
||||
overlayWidth = static_cast<int>(
|
||||
(static_cast<int64_t>(overlayHeight) * webcamFrame.width) / std::max(1, webcamFrame.height));
|
||||
}
|
||||
|
||||
overlayWidth = std::max(2, std::min(overlayWidth, width - margin * 2));
|
||||
overlayHeight = std::max(2, std::min(overlayHeight, height - margin * 2));
|
||||
const int originX = std::max(0, width - overlayWidth - margin);
|
||||
const int originY = std::max(0, height - overlayHeight - margin);
|
||||
|
||||
for (int y = 0; y < overlayHeight; y += 1) {
|
||||
const int sourceY = static_cast<int>((static_cast<int64_t>(y) * webcamFrame.height) / overlayHeight);
|
||||
BYTE* destinationRow = destination + ((originY + y) * width + originX) * 4;
|
||||
for (int x = 0; x < overlayWidth; x += 1) {
|
||||
const int sourceX = static_cast<int>((static_cast<int64_t>(x) * webcamFrame.width) / overlayWidth);
|
||||
const BYTE* source = webcamFrame.data + (sourceY * webcamFrame.width + sourceX) * 4;
|
||||
BYTE* target = destinationRow + x * 4;
|
||||
target[0] = source[0];
|
||||
target[1] = source[1];
|
||||
target[2] = source[2];
|
||||
target[3] = 255;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
MFEncoder::~MFEncoder() {
|
||||
finalize();
|
||||
}
|
||||
|
||||
bool MFEncoder::initialize(
|
||||
const std::wstring& outputPath,
|
||||
int width,
|
||||
int height,
|
||||
int fps,
|
||||
int bitrate,
|
||||
ID3D11Device* device,
|
||||
ID3D11DeviceContext* context,
|
||||
const AudioInputFormat* audioFormat) {
|
||||
width_ = (std::max(2, width) / 2) * 2;
|
||||
height_ = (std::max(2, height) / 2) * 2;
|
||||
fps_ = std::max(1, fps);
|
||||
device_ = device;
|
||||
context_ = context;
|
||||
|
||||
if (!succeeded(MFStartup(MF_VERSION), "MFStartup")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFMediaType> outputType;
|
||||
if (!succeeded(MFCreateMediaType(&outputType), "MFCreateMediaType(output)")) {
|
||||
return false;
|
||||
}
|
||||
outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
|
||||
outputType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264);
|
||||
outputType->SetUINT32(MF_MT_AVG_BITRATE, static_cast<UINT32>(std::max(1, bitrate)));
|
||||
outputType->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
|
||||
setFrameSize(outputType.Get(), static_cast<UINT32>(width_), static_cast<UINT32>(height_));
|
||||
setFrameRate(outputType.Get(), static_cast<UINT32>(fps_));
|
||||
setPixelAspectRatio(outputType.Get());
|
||||
|
||||
if (!succeeded(MFCreateSinkWriterFromURL(outputPath.c_str(), nullptr, nullptr, &sinkWriter_),
|
||||
"MFCreateSinkWriterFromURL")) {
|
||||
return false;
|
||||
}
|
||||
if (!succeeded(sinkWriter_->AddStream(outputType.Get(), &videoStreamIndex_), "AddStream")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (audioFormat && !configureAudioStream(*audioFormat)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFMediaType> inputType;
|
||||
if (!succeeded(MFCreateMediaType(&inputType), "MFCreateMediaType(input)")) {
|
||||
return false;
|
||||
}
|
||||
inputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
|
||||
inputType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_RGB32);
|
||||
inputType->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
|
||||
inputType->SetUINT32(MF_MT_DEFAULT_STRIDE, static_cast<UINT32>(width_ * 4));
|
||||
setFrameSize(inputType.Get(), static_cast<UINT32>(width_), static_cast<UINT32>(height_));
|
||||
setFrameRate(inputType.Get(), static_cast<UINT32>(fps_));
|
||||
setPixelAspectRatio(inputType.Get());
|
||||
|
||||
if (!succeeded(sinkWriter_->SetInputMediaType(videoStreamIndex_, inputType.Get(), nullptr),
|
||||
"SetInputMediaType")) {
|
||||
return false;
|
||||
}
|
||||
if (!succeeded(sinkWriter_->BeginWriting(), "BeginWriting")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MFEncoder::configureAudioStream(const AudioInputFormat& audioFormat) {
|
||||
if (!sinkWriter_) {
|
||||
return false;
|
||||
}
|
||||
if (audioFormat.sampleRate == 0 || audioFormat.channels == 0 || audioFormat.blockAlign == 0) {
|
||||
std::cerr << "ERROR: Invalid audio input format" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
const AudioInputFormat encoderFormat = makeAacCompatibleAudioFormat(audioFormat);
|
||||
const UINT32 aacBytesPerSecond = 24'000;
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFMediaType> outputType;
|
||||
if (!succeeded(MFCreateMediaType(&outputType), "MFCreateMediaType(audio output)")) {
|
||||
return false;
|
||||
}
|
||||
outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
|
||||
outputType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_AAC);
|
||||
setAudioFormat(outputType.Get(), encoderFormat.channels, encoderFormat.sampleRate, 16);
|
||||
outputType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, aacBytesPerSecond);
|
||||
outputType->SetUINT32(MF_MT_AAC_PAYLOAD_TYPE, 0);
|
||||
|
||||
if (!succeeded(sinkWriter_->AddStream(outputType.Get(), &audioStreamIndex_), "AddStream(audio)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFMediaType> inputType;
|
||||
if (!succeeded(MFCreateMediaType(&inputType), "MFCreateMediaType(audio input)")) {
|
||||
return false;
|
||||
}
|
||||
inputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
|
||||
inputType->SetGUID(MF_MT_SUBTYPE, encoderFormat.subtype);
|
||||
setAudioFormat(inputType.Get(), encoderFormat.channels, encoderFormat.sampleRate, encoderFormat.bitsPerSample);
|
||||
inputType->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, encoderFormat.blockAlign);
|
||||
inputType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, encoderFormat.avgBytesPerSec);
|
||||
inputType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE);
|
||||
|
||||
if (!succeeded(sinkWriter_->SetInputMediaType(audioStreamIndex_, inputType.Get(), nullptr),
|
||||
"SetInputMediaType(audio)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
hasAudioStream_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MFEncoder::ensureStagingTexture(ID3D11Texture2D* texture) {
|
||||
if (stagingTexture_) {
|
||||
return true;
|
||||
}
|
||||
|
||||
D3D11_TEXTURE2D_DESC desc{};
|
||||
texture->GetDesc(&desc);
|
||||
desc.Width = static_cast<UINT>(width_);
|
||||
desc.Height = static_cast<UINT>(height_);
|
||||
desc.MipLevels = 1;
|
||||
desc.ArraySize = 1;
|
||||
desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
|
||||
desc.SampleDesc.Count = 1;
|
||||
desc.SampleDesc.Quality = 0;
|
||||
desc.Usage = D3D11_USAGE_STAGING;
|
||||
desc.BindFlags = 0;
|
||||
desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
|
||||
desc.MiscFlags = 0;
|
||||
|
||||
return succeeded(device_->CreateTexture2D(&desc, nullptr, &stagingTexture_),
|
||||
"CreateTexture2D(staging)");
|
||||
}
|
||||
|
||||
bool MFEncoder::copyFrameToBuffer(
|
||||
ID3D11Texture2D* texture,
|
||||
BYTE* destination,
|
||||
DWORD destinationSize,
|
||||
const BgraFrameView* webcamFrame) {
|
||||
if (!ensureStagingTexture(texture)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
context_->CopyResource(stagingTexture_.Get(), texture);
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE mapped{};
|
||||
if (!succeeded(context_->Map(stagingTexture_.Get(), 0, D3D11_MAP_READ, 0, &mapped), "Map")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const DWORD rowBytes = static_cast<DWORD>(width_ * 4);
|
||||
const DWORD requiredBytes = rowBytes * static_cast<DWORD>(height_);
|
||||
if (destinationSize < requiredBytes) {
|
||||
context_->Unmap(stagingTexture_.Get(), 0);
|
||||
std::cerr << "ERROR: Media Foundation buffer is too small" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
auto* source = static_cast<const BYTE*>(mapped.pData);
|
||||
for (int y = 0; y < height_; y += 1) {
|
||||
std::memcpy(destination + rowBytes * y, source + mapped.RowPitch * y, rowBytes);
|
||||
}
|
||||
if (webcamFrame) {
|
||||
compositeWebcam(destination, width_, height_, *webcamFrame);
|
||||
}
|
||||
|
||||
context_->Unmap(stagingTexture_.Get(), 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MFEncoder::copyBgraFrameToBuffer(const BgraFrameView& frame, BYTE* destination, DWORD destinationSize) {
|
||||
if (!frame.data || frame.width <= 0 || frame.height <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const DWORD rowBytes = static_cast<DWORD>(width_ * 4);
|
||||
const DWORD requiredBytes = rowBytes * static_cast<DWORD>(height_);
|
||||
if (destinationSize < requiredBytes) {
|
||||
std::cerr << "ERROR: Media Foundation webcam buffer is too small" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (frame.width == width_ && frame.height == height_) {
|
||||
for (DWORD i = 0; i < requiredBytes; i += 4) {
|
||||
destination[i] = frame.data[i];
|
||||
destination[i + 1] = frame.data[i + 1];
|
||||
destination[i + 2] = frame.data[i + 2];
|
||||
destination[i + 3] = 255;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
for (int y = 0; y < height_; y += 1) {
|
||||
const int sourceY = static_cast<int>((static_cast<int64_t>(y) * frame.height) / height_);
|
||||
BYTE* destinationRow = destination + rowBytes * y;
|
||||
for (int x = 0; x < width_; x += 1) {
|
||||
const int sourceX = static_cast<int>((static_cast<int64_t>(x) * frame.width) / width_);
|
||||
const BYTE* source = frame.data + (sourceY * frame.width + sourceX) * 4;
|
||||
BYTE* target = destinationRow + x * 4;
|
||||
target[0] = source[0];
|
||||
target[1] = source[1];
|
||||
target[2] = source[2];
|
||||
target[3] = 255;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MFEncoder::writeFrame(ID3D11Texture2D* texture, int64_t timestampHns, const BgraFrameView* webcamFrame) {
|
||||
std::scoped_lock writerLock(writerMutex_);
|
||||
if (!sinkWriter_ || finalized_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (firstTimestampHns_ < 0) {
|
||||
firstTimestampHns_ = timestampHns;
|
||||
}
|
||||
|
||||
int64_t sampleTime = timestampHns - firstTimestampHns_;
|
||||
if (sampleTime <= lastTimestampHns_) {
|
||||
sampleTime = lastTimestampHns_ + (10'000'000LL / fps_);
|
||||
}
|
||||
const int64_t sampleDuration = 10'000'000LL / fps_;
|
||||
lastTimestampHns_ = sampleTime;
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFMediaBuffer> buffer;
|
||||
const DWORD frameBytes = static_cast<DWORD>(width_ * height_ * 4);
|
||||
if (!succeeded(MFCreateMemoryBuffer(frameBytes, &buffer), "MFCreateMemoryBuffer")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
BYTE* data = nullptr;
|
||||
DWORD maxLength = 0;
|
||||
DWORD currentLength = 0;
|
||||
if (!succeeded(buffer->Lock(&data, &maxLength, ¤tLength), "IMFMediaBuffer::Lock")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool copied = copyFrameToBuffer(texture, data, maxLength, webcamFrame);
|
||||
buffer->Unlock();
|
||||
if (!copied) {
|
||||
return false;
|
||||
}
|
||||
buffer->SetCurrentLength(frameBytes);
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFSample> sample;
|
||||
if (!succeeded(MFCreateSample(&sample), "MFCreateSample")) {
|
||||
return false;
|
||||
}
|
||||
sample->AddBuffer(buffer.Get());
|
||||
sample->SetSampleTime(sampleTime);
|
||||
sample->SetSampleDuration(sampleDuration);
|
||||
|
||||
return succeeded(sinkWriter_->WriteSample(videoStreamIndex_, sample.Get()), "WriteSample");
|
||||
}
|
||||
|
||||
bool MFEncoder::writeBgraFrame(const BgraFrameView& frame, int64_t timestampHns) {
|
||||
std::scoped_lock writerLock(writerMutex_);
|
||||
if (!sinkWriter_ || finalized_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (firstTimestampHns_ < 0) {
|
||||
firstTimestampHns_ = timestampHns;
|
||||
}
|
||||
|
||||
int64_t sampleTime = timestampHns - firstTimestampHns_;
|
||||
if (sampleTime <= lastTimestampHns_) {
|
||||
sampleTime = lastTimestampHns_ + (10'000'000LL / fps_);
|
||||
}
|
||||
const int64_t sampleDuration = 10'000'000LL / fps_;
|
||||
lastTimestampHns_ = sampleTime;
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFMediaBuffer> buffer;
|
||||
const DWORD frameBytes = static_cast<DWORD>(width_ * height_ * 4);
|
||||
if (!succeeded(MFCreateMemoryBuffer(frameBytes, &buffer), "MFCreateMemoryBuffer(webcam)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
BYTE* data = nullptr;
|
||||
DWORD maxLength = 0;
|
||||
DWORD currentLength = 0;
|
||||
if (!succeeded(buffer->Lock(&data, &maxLength, ¤tLength), "IMFMediaBuffer::Lock(webcam)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool copied = copyBgraFrameToBuffer(frame, data, maxLength);
|
||||
buffer->Unlock();
|
||||
if (!copied) {
|
||||
return false;
|
||||
}
|
||||
buffer->SetCurrentLength(frameBytes);
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFSample> sample;
|
||||
if (!succeeded(MFCreateSample(&sample), "MFCreateSample(webcam)")) {
|
||||
return false;
|
||||
}
|
||||
sample->AddBuffer(buffer.Get());
|
||||
sample->SetSampleTime(sampleTime);
|
||||
sample->SetSampleDuration(sampleDuration);
|
||||
|
||||
return succeeded(sinkWriter_->WriteSample(videoStreamIndex_, sample.Get()), "WriteSample(webcam)");
|
||||
}
|
||||
|
||||
bool MFEncoder::writeAudio(const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns) {
|
||||
std::scoped_lock writerLock(writerMutex_);
|
||||
if (!sinkWriter_ || finalized_ || !hasAudioStream_) {
|
||||
return false;
|
||||
}
|
||||
if (!data || byteCount == 0 || durationHns <= 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFMediaBuffer> buffer;
|
||||
if (!succeeded(MFCreateMemoryBuffer(byteCount, &buffer), "MFCreateMemoryBuffer(audio)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
BYTE* destination = nullptr;
|
||||
DWORD maxLength = 0;
|
||||
DWORD currentLength = 0;
|
||||
if (!succeeded(buffer->Lock(&destination, &maxLength, ¤tLength),
|
||||
"IMFMediaBuffer::Lock(audio)")) {
|
||||
return false;
|
||||
}
|
||||
if (maxLength < byteCount) {
|
||||
buffer->Unlock();
|
||||
std::cerr << "ERROR: Media Foundation audio buffer is too small" << std::endl;
|
||||
return false;
|
||||
}
|
||||
std::memcpy(destination, data, byteCount);
|
||||
buffer->Unlock();
|
||||
buffer->SetCurrentLength(byteCount);
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFSample> sample;
|
||||
if (!succeeded(MFCreateSample(&sample), "MFCreateSample(audio)")) {
|
||||
return false;
|
||||
}
|
||||
sample->AddBuffer(buffer.Get());
|
||||
sample->SetSampleTime(std::max<int64_t>(0, timestampHns));
|
||||
sample->SetSampleDuration(durationHns);
|
||||
|
||||
return succeeded(sinkWriter_->WriteSample(audioStreamIndex_, sample.Get()), "WriteSample(audio)");
|
||||
}
|
||||
|
||||
bool MFEncoder::finalize() {
|
||||
std::scoped_lock writerLock(writerMutex_);
|
||||
if (finalized_) {
|
||||
return true;
|
||||
}
|
||||
|
||||
finalized_ = true;
|
||||
bool ok = true;
|
||||
if (sinkWriter_) {
|
||||
ok = succeeded(sinkWriter_->Finalize(), "SinkWriter::Finalize");
|
||||
sinkWriter_.Reset();
|
||||
}
|
||||
stagingTexture_.Reset();
|
||||
context_.Reset();
|
||||
device_.Reset();
|
||||
MFShutdown();
|
||||
return ok;
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
#pragma once
|
||||
|
||||
#include <Windows.h>
|
||||
#include <d3d11.h>
|
||||
#include <mfapi.h>
|
||||
#include <mfidl.h>
|
||||
#include <mfreadwrite.h>
|
||||
#include <wrl/client.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
|
||||
struct BgraFrameView {
|
||||
const BYTE* data = nullptr;
|
||||
int width = 0;
|
||||
int height = 0;
|
||||
};
|
||||
|
||||
struct AudioInputFormat {
|
||||
GUID subtype = MFAudioFormat_PCM;
|
||||
UINT32 sampleRate = 0;
|
||||
UINT32 channels = 0;
|
||||
UINT32 bitsPerSample = 0;
|
||||
UINT32 blockAlign = 0;
|
||||
UINT32 avgBytesPerSec = 0;
|
||||
};
|
||||
|
||||
class MFEncoder {
|
||||
public:
|
||||
MFEncoder() = default;
|
||||
~MFEncoder();
|
||||
|
||||
MFEncoder(const MFEncoder&) = delete;
|
||||
MFEncoder& operator=(const MFEncoder&) = delete;
|
||||
|
||||
bool initialize(
|
||||
const std::wstring& outputPath,
|
||||
int width,
|
||||
int height,
|
||||
int fps,
|
||||
int bitrate,
|
||||
ID3D11Device* device,
|
||||
ID3D11DeviceContext* context,
|
||||
const AudioInputFormat* audioFormat = nullptr);
|
||||
bool writeFrame(ID3D11Texture2D* texture, int64_t timestampHns, const BgraFrameView* webcamFrame = nullptr);
|
||||
bool writeBgraFrame(const BgraFrameView& frame, int64_t timestampHns);
|
||||
bool writeAudio(const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns);
|
||||
bool finalize();
|
||||
|
||||
private:
|
||||
bool ensureStagingTexture(ID3D11Texture2D* texture);
|
||||
bool copyFrameToBuffer(
|
||||
ID3D11Texture2D* texture,
|
||||
BYTE* destination,
|
||||
DWORD destinationSize,
|
||||
const BgraFrameView* webcamFrame);
|
||||
bool copyBgraFrameToBuffer(const BgraFrameView& frame, BYTE* destination, DWORD destinationSize);
|
||||
bool configureAudioStream(const AudioInputFormat& audioFormat);
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFSinkWriter> sinkWriter_;
|
||||
Microsoft::WRL::ComPtr<ID3D11Device> device_;
|
||||
Microsoft::WRL::ComPtr<ID3D11DeviceContext> context_;
|
||||
Microsoft::WRL::ComPtr<ID3D11Texture2D> stagingTexture_;
|
||||
std::mutex writerMutex_;
|
||||
DWORD videoStreamIndex_ = 0;
|
||||
DWORD audioStreamIndex_ = 0;
|
||||
bool hasAudioStream_ = false;
|
||||
int width_ = 0;
|
||||
int height_ = 0;
|
||||
int fps_ = 60;
|
||||
int64_t firstTimestampHns_ = -1;
|
||||
int64_t lastTimestampHns_ = -1;
|
||||
bool finalized_ = false;
|
||||
};
|
||||
@@ -0,0 +1,121 @@
|
||||
#include "monitor_utils.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace {
|
||||
|
||||
struct MonitorCandidate {
|
||||
HMONITOR monitor = nullptr;
|
||||
RECT rect{};
|
||||
};
|
||||
|
||||
std::vector<MonitorCandidate> enumerateMonitors() {
|
||||
std::vector<MonitorCandidate> monitors;
|
||||
EnumDisplayMonitors(
|
||||
nullptr,
|
||||
nullptr,
|
||||
[](HMONITOR monitor, HDC, LPRECT rect, LPARAM userData) -> BOOL {
|
||||
auto* result = reinterpret_cast<std::vector<MonitorCandidate>*>(userData);
|
||||
result->push_back({monitor, *rect});
|
||||
return TRUE;
|
||||
},
|
||||
reinterpret_cast<LPARAM>(&monitors));
|
||||
return monitors;
|
||||
}
|
||||
|
||||
bool rectMatchesBounds(const RECT& rect, const MonitorBounds& bounds) {
|
||||
return rect.left == bounds.x &&
|
||||
rect.top == bounds.y &&
|
||||
(rect.right - rect.left) == bounds.width &&
|
||||
(rect.bottom - rect.top) == bounds.height;
|
||||
}
|
||||
|
||||
int64_t overlapArea(const RECT& rect, const MonitorBounds& bounds) {
|
||||
const LONG left = std::max<LONG>(rect.left, bounds.x);
|
||||
const LONG top = std::max<LONG>(rect.top, bounds.y);
|
||||
const LONG right = std::min<LONG>(rect.right, bounds.x + bounds.width);
|
||||
const LONG bottom = std::min<LONG>(rect.bottom, bounds.y + bounds.height);
|
||||
if (right <= left || bottom <= top) {
|
||||
return 0;
|
||||
}
|
||||
return static_cast<int64_t>(right - left) * static_cast<int64_t>(bottom - top);
|
||||
}
|
||||
|
||||
int parseScreenSourceIndex(const std::string& sourceId) {
|
||||
constexpr char prefix[] = "screen:";
|
||||
if (sourceId.rfind(prefix, 0) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
const size_t start = sizeof(prefix) - 1;
|
||||
const size_t end = sourceId.find(':', start);
|
||||
const std::string indexText = sourceId.substr(
|
||||
start,
|
||||
end == std::string::npos ? std::string::npos : end - start);
|
||||
if (indexText.empty()) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
try {
|
||||
size_t parsed = 0;
|
||||
const int index = std::stoi(indexText, &parsed, 10);
|
||||
return parsed == indexText.size() && index >= 0 ? index : -1;
|
||||
} catch (...) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
HMONITOR findMonitorForCapture(
|
||||
int64_t displayId,
|
||||
const std::string& sourceId,
|
||||
const MonitorBounds* bounds) {
|
||||
const auto monitors = enumerateMonitors();
|
||||
if (monitors.empty()) {
|
||||
return MonitorFromPoint({0, 0}, MONITOR_DEFAULTTOPRIMARY);
|
||||
}
|
||||
|
||||
// Electron's display_id is not stable across all Windows capture backends.
|
||||
// Bounds are the most reliable contract because they come from Electron's
|
||||
// selected display and match the WGC monitor coordinate space.
|
||||
if (bounds && bounds->width > 0 && bounds->height > 0) {
|
||||
for (const auto& candidate : monitors) {
|
||||
if (rectMatchesBounds(candidate.rect, *bounds)) {
|
||||
return candidate.monitor;
|
||||
}
|
||||
}
|
||||
|
||||
HMONITOR bestMonitor = nullptr;
|
||||
int64_t bestArea = 0;
|
||||
for (const auto& candidate : monitors) {
|
||||
const int64_t area = overlapArea(candidate.rect, *bounds);
|
||||
if (area > bestArea) {
|
||||
bestArea = area;
|
||||
bestMonitor = candidate.monitor;
|
||||
}
|
||||
}
|
||||
if (bestMonitor) {
|
||||
return bestMonitor;
|
||||
}
|
||||
}
|
||||
|
||||
// Best-effort fallback for helpers invoked without bounds. Some callers pass
|
||||
// zero-based ids while Win32 monitor handles are pointer values, so only use
|
||||
// this when it exactly matches the HMONITOR value.
|
||||
for (const auto& candidate : monitors) {
|
||||
if (reinterpret_cast<int64_t>(candidate.monitor) == displayId) {
|
||||
return candidate.monitor;
|
||||
}
|
||||
}
|
||||
|
||||
const int sourceIndex = parseScreenSourceIndex(sourceId);
|
||||
if (sourceIndex >= 0 && static_cast<size_t>(sourceIndex) < monitors.size()) {
|
||||
return monitors[static_cast<size_t>(sourceIndex)].monitor;
|
||||
}
|
||||
|
||||
return MonitorFromPoint({0, 0}, MONITOR_DEFAULTTOPRIMARY);
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <Windows.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
struct MonitorBounds {
|
||||
int x = 0;
|
||||
int y = 0;
|
||||
int width = 0;
|
||||
int height = 0;
|
||||
};
|
||||
|
||||
HMONITOR findMonitorForCapture(
|
||||
int64_t displayId,
|
||||
const std::string& sourceId,
|
||||
const MonitorBounds* bounds);
|
||||
@@ -0,0 +1,263 @@
|
||||
#include <Windows.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr const wchar_t* SERVICE_NAME = L"OpenScreenOCR";
|
||||
|
||||
struct ServiceConfig {
|
||||
std::wstring exePath;
|
||||
std::wstring resourcesPath;
|
||||
std::wstring dataPath;
|
||||
};
|
||||
|
||||
SERVICE_STATUS_HANDLE g_statusHandle = nullptr;
|
||||
SERVICE_STATUS g_status{};
|
||||
HANDLE g_stopEvent = nullptr;
|
||||
PROCESS_INFORMATION g_childProcess{};
|
||||
ServiceConfig g_config;
|
||||
|
||||
std::wstring quoteArg(const std::wstring& value) {
|
||||
std::wstring result = L"\"";
|
||||
for (wchar_t ch : value) {
|
||||
if (ch == L'"') {
|
||||
result += L"\\\"";
|
||||
} else {
|
||||
result.push_back(ch);
|
||||
}
|
||||
}
|
||||
result += L"\"";
|
||||
return result;
|
||||
}
|
||||
|
||||
std::wstring directoryName(const std::wstring& path) {
|
||||
const size_t slash = path.find_last_of(L"\\/");
|
||||
return slash == std::wstring::npos ? L"." : path.substr(0, slash);
|
||||
}
|
||||
|
||||
void createDirectoryRecursive(const std::wstring& path) {
|
||||
if (path.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::wstring current;
|
||||
for (size_t i = 0; i < path.size(); ++i) {
|
||||
current.push_back(path[i]);
|
||||
if (path[i] != L'\\' && path[i] != L'/') {
|
||||
continue;
|
||||
}
|
||||
if (current.size() > 3) {
|
||||
CreateDirectoryW(current.c_str(), nullptr);
|
||||
}
|
||||
}
|
||||
CreateDirectoryW(path.c_str(), nullptr);
|
||||
}
|
||||
|
||||
void setEnv(const wchar_t* name, const std::wstring& value) {
|
||||
SetEnvironmentVariableW(name, value.empty() ? nullptr : value.c_str());
|
||||
}
|
||||
|
||||
void setServiceStatus(DWORD state, DWORD win32ExitCode = NO_ERROR, DWORD waitHint = 0) {
|
||||
if (!g_statusHandle) {
|
||||
return;
|
||||
}
|
||||
|
||||
g_status.dwServiceType = SERVICE_WIN32_OWN_PROCESS;
|
||||
g_status.dwCurrentState = state;
|
||||
g_status.dwWin32ExitCode = win32ExitCode;
|
||||
g_status.dwWaitHint = waitHint;
|
||||
g_status.dwControlsAccepted =
|
||||
state == SERVICE_RUNNING ? SERVICE_ACCEPT_STOP | SERVICE_ACCEPT_SHUTDOWN : 0;
|
||||
static DWORD checkpoint = 1;
|
||||
g_status.dwCheckPoint =
|
||||
state == SERVICE_START_PENDING || state == SERVICE_STOP_PENDING ? checkpoint++ : 0;
|
||||
SetServiceStatus(g_statusHandle, &g_status);
|
||||
}
|
||||
|
||||
HANDLE openServiceLog(const std::wstring& dataPath) {
|
||||
const std::wstring logDir = dataPath + L"\\logs";
|
||||
createDirectoryRecursive(logDir);
|
||||
const std::wstring logPath = logDir + L"\\ocr-service.log";
|
||||
SECURITY_ATTRIBUTES securityAttributes{};
|
||||
securityAttributes.nLength = sizeof(securityAttributes);
|
||||
securityAttributes.bInheritHandle = TRUE;
|
||||
HANDLE file = CreateFileW(
|
||||
logPath.c_str(),
|
||||
FILE_APPEND_DATA,
|
||||
FILE_SHARE_READ | FILE_SHARE_WRITE,
|
||||
&securityAttributes,
|
||||
OPEN_ALWAYS,
|
||||
FILE_ATTRIBUTE_NORMAL,
|
||||
nullptr);
|
||||
if (file != INVALID_HANDLE_VALUE) {
|
||||
SetFilePointer(file, 0, nullptr, FILE_END);
|
||||
}
|
||||
return file;
|
||||
}
|
||||
|
||||
bool startOcrProcess(const ServiceConfig& config) {
|
||||
if (config.exePath.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const std::wstring dataPath = config.dataPath.empty()
|
||||
? directoryName(config.exePath) + L"\\ocr-runtime"
|
||||
: config.dataPath;
|
||||
const std::wstring resourcesPath = config.resourcesPath.empty()
|
||||
? directoryName(directoryName(config.exePath))
|
||||
: config.resourcesPath;
|
||||
const std::wstring modelCachePath = dataPath + L"\\ocr-models";
|
||||
const std::wstring paddlexCachePath = resourcesPath + L"\\ocr-models\\paddlex";
|
||||
|
||||
createDirectoryRecursive(dataPath);
|
||||
createDirectoryRecursive(modelCachePath);
|
||||
|
||||
setEnv(L"OPENSCREEN_OCR_HOST", L"127.0.0.1");
|
||||
setEnv(L"OPENSCREEN_OCR_PORT", L"8866");
|
||||
setEnv(L"PADDLEOCR_DEVICE", L"cpu");
|
||||
setEnv(L"PADDLEOCR_ENABLE_MKLDNN", L"0");
|
||||
setEnv(L"PADDLEOCR_LANG", L"");
|
||||
setEnv(L"PADDLEOCR_USE_MOBILE", L"1");
|
||||
setEnv(L"OPENSCREEN_OCR_PROFILE", L"vietnamese");
|
||||
setEnv(L"OPENSCREEN_OCR_WARMUP", L"1");
|
||||
setEnv(L"PADDLE_PDX_ENABLE_MKLDNN_BYDEFAULT", L"False");
|
||||
setEnv(L"PADDLE_PDX_CACHE_HOME", paddlexCachePath);
|
||||
setEnv(L"PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", L"True");
|
||||
setEnv(L"PADDLE_HOME", modelCachePath + L"\\paddle");
|
||||
setEnv(L"PADDLEOCR_HOME", modelCachePath + L"\\paddleocr");
|
||||
setEnv(L"PYTHONUTF8", L"1");
|
||||
|
||||
STARTUPINFOW startupInfo{};
|
||||
startupInfo.cb = sizeof(startupInfo);
|
||||
HANDLE logFile = openServiceLog(dataPath);
|
||||
if (logFile != INVALID_HANDLE_VALUE) {
|
||||
startupInfo.dwFlags |= STARTF_USESTDHANDLES;
|
||||
startupInfo.hStdOutput = logFile;
|
||||
startupInfo.hStdError = logFile;
|
||||
startupInfo.hStdInput = GetStdHandle(STD_INPUT_HANDLE);
|
||||
}
|
||||
|
||||
std::wstring commandLine = quoteArg(config.exePath);
|
||||
const std::wstring cwd = directoryName(config.exePath);
|
||||
ZeroMemory(&g_childProcess, sizeof(g_childProcess));
|
||||
const BOOL created = CreateProcessW(
|
||||
config.exePath.c_str(),
|
||||
commandLine.data(),
|
||||
nullptr,
|
||||
nullptr,
|
||||
TRUE,
|
||||
CREATE_NO_WINDOW,
|
||||
nullptr,
|
||||
cwd.c_str(),
|
||||
&startupInfo,
|
||||
&g_childProcess);
|
||||
|
||||
if (logFile != INVALID_HANDLE_VALUE) {
|
||||
CloseHandle(logFile);
|
||||
}
|
||||
return created == TRUE;
|
||||
}
|
||||
|
||||
void stopOcrProcess() {
|
||||
if (g_childProcess.hProcess) {
|
||||
TerminateProcess(g_childProcess.hProcess, 0);
|
||||
WaitForSingleObject(g_childProcess.hProcess, 10000);
|
||||
CloseHandle(g_childProcess.hProcess);
|
||||
g_childProcess.hProcess = nullptr;
|
||||
}
|
||||
if (g_childProcess.hThread) {
|
||||
CloseHandle(g_childProcess.hThread);
|
||||
g_childProcess.hThread = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
DWORD WINAPI serviceControlHandler(DWORD control, DWORD, LPVOID, LPVOID) {
|
||||
if (control == SERVICE_CONTROL_STOP || control == SERVICE_CONTROL_SHUTDOWN) {
|
||||
setServiceStatus(SERVICE_STOP_PENDING, NO_ERROR, 10000);
|
||||
if (g_stopEvent) {
|
||||
SetEvent(g_stopEvent);
|
||||
}
|
||||
stopOcrProcess();
|
||||
return NO_ERROR;
|
||||
}
|
||||
return NO_ERROR;
|
||||
}
|
||||
|
||||
void WINAPI serviceMain(DWORD, LPWSTR*) {
|
||||
g_statusHandle = RegisterServiceCtrlHandlerExW(SERVICE_NAME, serviceControlHandler, nullptr);
|
||||
if (!g_statusHandle) {
|
||||
return;
|
||||
}
|
||||
|
||||
setServiceStatus(SERVICE_START_PENDING, NO_ERROR, 30000);
|
||||
g_stopEvent = CreateEventW(nullptr, TRUE, FALSE, nullptr);
|
||||
if (!g_stopEvent || !startOcrProcess(g_config)) {
|
||||
setServiceStatus(SERVICE_STOPPED, ERROR_SERVICE_SPECIFIC_ERROR);
|
||||
return;
|
||||
}
|
||||
|
||||
setServiceStatus(SERVICE_RUNNING);
|
||||
HANDLE waitHandles[] = {g_stopEvent, g_childProcess.hProcess};
|
||||
WaitForMultipleObjects(2, waitHandles, FALSE, INFINITE);
|
||||
stopOcrProcess();
|
||||
if (g_stopEvent) {
|
||||
CloseHandle(g_stopEvent);
|
||||
g_stopEvent = nullptr;
|
||||
}
|
||||
setServiceStatus(SERVICE_STOPPED);
|
||||
}
|
||||
|
||||
ServiceConfig parseConfig(int argc, wchar_t* argv[]) {
|
||||
ServiceConfig config;
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
const std::wstring arg = argv[i];
|
||||
auto readNext = [&](std::wstring& target) {
|
||||
if (i + 1 < argc) {
|
||||
target = argv[++i];
|
||||
}
|
||||
};
|
||||
if (arg == L"--exe") {
|
||||
readNext(config.exePath);
|
||||
} else if (arg == L"--resources") {
|
||||
readNext(config.resourcesPath);
|
||||
} else if (arg == L"--data") {
|
||||
readNext(config.dataPath);
|
||||
}
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
bool hasServiceFlag(int argc, wchar_t* argv[]) {
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
if (std::wstring(argv[i]) == L"--service") {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int wmain(int argc, wchar_t* argv[]) {
|
||||
g_config = parseConfig(argc, argv);
|
||||
|
||||
if (hasServiceFlag(argc, argv)) {
|
||||
SERVICE_TABLE_ENTRYW serviceTable[] = {
|
||||
{const_cast<LPWSTR>(SERVICE_NAME), serviceMain},
|
||||
{nullptr, nullptr},
|
||||
};
|
||||
return StartServiceCtrlDispatcherW(serviceTable) ? 0 : 1;
|
||||
}
|
||||
|
||||
if (!startOcrProcess(g_config)) {
|
||||
std::wcerr << L"Failed to start OCR service process." << std::endl;
|
||||
return 1;
|
||||
}
|
||||
WaitForSingleObject(g_childProcess.hProcess, INFINITE);
|
||||
stopOcrProcess();
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,411 @@
|
||||
#include "wasapi_loopback_capture.h"
|
||||
|
||||
#include <Functiondiscoverykeys_devpkey.h>
|
||||
#include <ksmedia.h>
|
||||
#include <propvarutil.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cwctype>
|
||||
#include <iostream>
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr REFERENCE_TIME BufferDurationHns = 10'000'000;
|
||||
constexpr int64_t HnsPerSecond = 10'000'000;
|
||||
|
||||
bool succeeded(HRESULT hr, const char* label) {
|
||||
if (SUCCEEDED(hr)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::cerr << "ERROR: " << label << " failed (hr=0x" << std::hex << hr << std::dec << ")"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
GUID audioSubtypeFromFormat(WAVEFORMATEX* format) {
|
||||
if (format->wFormatTag == WAVE_FORMAT_IEEE_FLOAT) {
|
||||
return MFAudioFormat_Float;
|
||||
}
|
||||
if (format->wFormatTag == WAVE_FORMAT_PCM) {
|
||||
return MFAudioFormat_PCM;
|
||||
}
|
||||
if (format->wFormatTag == WAVE_FORMAT_EXTENSIBLE &&
|
||||
format->cbSize >= sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX)) {
|
||||
auto* extensible = reinterpret_cast<WAVEFORMATEXTENSIBLE*>(format);
|
||||
if (extensible->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) {
|
||||
return MFAudioFormat_Float;
|
||||
}
|
||||
if (extensible->SubFormat == KSDATAFORMAT_SUBTYPE_PCM) {
|
||||
return MFAudioFormat_PCM;
|
||||
}
|
||||
}
|
||||
return GUID_NULL;
|
||||
}
|
||||
|
||||
std::wstring normalizeDeviceName(const std::wstring& value) {
|
||||
std::wstring result;
|
||||
result.reserve(value.size());
|
||||
bool lastWasSpace = true;
|
||||
|
||||
for (const wchar_t c : value) {
|
||||
if (std::iswalnum(c)) {
|
||||
result.push_back(static_cast<wchar_t>(std::towlower(c)));
|
||||
lastWasSpace = false;
|
||||
} else if (!lastWasSpace) {
|
||||
result.push_back(L' ');
|
||||
lastWasSpace = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!result.empty() && result.back() == L' ') {
|
||||
result.pop_back();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int scoreDeviceName(const std::wstring& candidateName, const std::wstring& candidateId, const std::wstring& requestedName) {
|
||||
const std::wstring candidate = normalizeDeviceName(candidateName);
|
||||
const std::wstring id = normalizeDeviceName(candidateId);
|
||||
const std::wstring requested = normalizeDeviceName(requestedName);
|
||||
if (requested.empty()) {
|
||||
return 0;
|
||||
}
|
||||
if (candidate == requested) {
|
||||
return 1000;
|
||||
}
|
||||
if (!candidate.empty() && (candidate.find(requested) != std::wstring::npos || requested.find(candidate) != std::wstring::npos)) {
|
||||
return 900;
|
||||
}
|
||||
if (!id.empty() && (id.find(requested) != std::wstring::npos || requested.find(id) != std::wstring::npos)) {
|
||||
return 800;
|
||||
}
|
||||
|
||||
int score = 0;
|
||||
size_t pos = 0;
|
||||
while (pos < requested.size()) {
|
||||
const size_t end = requested.find(L' ', pos);
|
||||
const std::wstring word = requested.substr(pos, end == std::wstring::npos ? std::wstring::npos : end - pos);
|
||||
if (word.size() > 1 && word != L"microphone" && word != L"mic" && word != L"audio" && word != L"input") {
|
||||
if (candidate.find(word) != std::wstring::npos) {
|
||||
score += 100;
|
||||
} else if (id.find(word) != std::wstring::npos) {
|
||||
score += 50;
|
||||
}
|
||||
}
|
||||
if (end == std::wstring::npos) {
|
||||
break;
|
||||
}
|
||||
pos = end + 1;
|
||||
}
|
||||
return score;
|
||||
}
|
||||
|
||||
std::wstring getDeviceFriendlyName(IMMDevice* device) {
|
||||
if (!device) {
|
||||
return {};
|
||||
}
|
||||
|
||||
Microsoft::WRL::ComPtr<IPropertyStore> properties;
|
||||
HRESULT hr = device->OpenPropertyStore(STGM_READ, &properties);
|
||||
if (FAILED(hr) || !properties) {
|
||||
return {};
|
||||
}
|
||||
|
||||
PROPVARIANT value;
|
||||
PropVariantInit(&value);
|
||||
hr = properties->GetValue(PKEY_Device_FriendlyName, &value);
|
||||
std::wstring name;
|
||||
if (SUCCEEDED(hr) && value.vt == VT_LPWSTR && value.pwszVal) {
|
||||
name = value.pwszVal;
|
||||
}
|
||||
PropVariantClear(&value);
|
||||
return name;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
WasapiLoopbackCapture::~WasapiLoopbackCapture() {
|
||||
stop();
|
||||
if (mixFormat_) {
|
||||
CoTaskMemFree(mixFormat_);
|
||||
mixFormat_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool WasapiLoopbackCapture::initializeSystemLoopback() {
|
||||
return initialize(WasapiCaptureEndpoint::SystemLoopback, {}, {});
|
||||
}
|
||||
|
||||
bool WasapiLoopbackCapture::initializeMicrophone(const std::wstring& deviceId, const std::wstring& deviceName) {
|
||||
return initialize(WasapiCaptureEndpoint::Microphone, deviceId, deviceName);
|
||||
}
|
||||
|
||||
bool WasapiLoopbackCapture::initialize(WasapiCaptureEndpoint endpoint, const std::wstring& deviceId, const std::wstring& deviceName) {
|
||||
HRESULT hr = CoCreateInstance(
|
||||
__uuidof(MMDeviceEnumerator),
|
||||
nullptr,
|
||||
CLSCTX_ALL,
|
||||
IID_PPV_ARGS(&deviceEnumerator_));
|
||||
if (!succeeded(hr, "CoCreateInstance(MMDeviceEnumerator)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (endpoint == WasapiCaptureEndpoint::Microphone && !deviceId.empty() && deviceId != L"default") {
|
||||
hr = deviceEnumerator_->GetDevice(deviceId.c_str(), &device_);
|
||||
if (FAILED(hr)) {
|
||||
std::wcerr << L"WARNING: Could not resolve microphone device id directly"
|
||||
<< std::endl;
|
||||
device_.Reset();
|
||||
}
|
||||
}
|
||||
|
||||
if (endpoint == WasapiCaptureEndpoint::Microphone && !device_ && !deviceName.empty()) {
|
||||
if (!resolveMicrophoneByName(deviceName)) {
|
||||
std::wcerr << L"WARNING: Could not resolve microphone by name; using default capture endpoint"
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if (!device_) {
|
||||
const EDataFlow flow =
|
||||
endpoint == WasapiCaptureEndpoint::SystemLoopback ? eRender : eCapture;
|
||||
hr = deviceEnumerator_->GetDefaultAudioEndpoint(flow, eConsole, &device_);
|
||||
if (!succeeded(hr, "GetDefaultAudioEndpoint")) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
selectedDeviceName_ = getDeviceFriendlyName(device_.Get());
|
||||
|
||||
hr = device_->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr, &audioClient_);
|
||||
if (!succeeded(hr, "IMMDevice::Activate(IAudioClient)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
hr = audioClient_->GetMixFormat(&mixFormat_);
|
||||
if (!succeeded(hr, "IAudioClient::GetMixFormat") || !mixFormat_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!resolveInputFormat(mixFormat_)) {
|
||||
std::cerr << "ERROR: Unsupported WASAPI loopback mix format" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
const DWORD streamFlags =
|
||||
endpoint == WasapiCaptureEndpoint::SystemLoopback ? AUDCLNT_STREAMFLAGS_LOOPBACK : 0;
|
||||
hr = audioClient_->Initialize(
|
||||
AUDCLNT_SHAREMODE_SHARED,
|
||||
streamFlags,
|
||||
BufferDurationHns,
|
||||
0,
|
||||
mixFormat_,
|
||||
nullptr);
|
||||
if (!succeeded(hr, "IAudioClient::Initialize(loopback)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
hr = audioClient_->GetService(IID_PPV_ARGS(&captureClient_));
|
||||
if (!succeeded(hr, "IAudioClient::GetService(IAudioCaptureClient)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool WasapiLoopbackCapture::resolveMicrophoneByName(const std::wstring& deviceName) {
|
||||
if (!deviceEnumerator_ || deviceName.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Microsoft::WRL::ComPtr<IMMDeviceCollection> devices;
|
||||
HRESULT hr = deviceEnumerator_->EnumAudioEndpoints(eCapture, DEVICE_STATE_ACTIVE, &devices);
|
||||
if (!succeeded(hr, "IMMDeviceEnumerator::EnumAudioEndpoints(eCapture)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
UINT count = 0;
|
||||
hr = devices->GetCount(&count);
|
||||
if (!succeeded(hr, "IMMDeviceCollection::GetCount")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Microsoft::WRL::ComPtr<IMMDevice> bestDevice;
|
||||
std::wstring bestId;
|
||||
std::wstring bestName;
|
||||
int bestScore = 0;
|
||||
for (UINT i = 0; i < count; ++i) {
|
||||
Microsoft::WRL::ComPtr<IMMDevice> candidate;
|
||||
hr = devices->Item(i, &candidate);
|
||||
if (FAILED(hr) || !candidate) {
|
||||
continue;
|
||||
}
|
||||
|
||||
LPWSTR rawId = nullptr;
|
||||
std::wstring candidateId;
|
||||
if (SUCCEEDED(candidate->GetId(&rawId)) && rawId) {
|
||||
candidateId = rawId;
|
||||
CoTaskMemFree(rawId);
|
||||
}
|
||||
|
||||
const std::wstring candidateName = getDeviceFriendlyName(candidate.Get());
|
||||
const int score = scoreDeviceName(candidateName, candidateId, deviceName);
|
||||
std::wcerr << L"Native microphone candidate: " << candidateName << L" score=" << score << std::endl;
|
||||
if (score > bestScore) {
|
||||
bestScore = score;
|
||||
bestDevice = candidate;
|
||||
bestId = candidateId;
|
||||
bestName = candidateName;
|
||||
}
|
||||
}
|
||||
|
||||
if (!bestDevice || bestScore <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
device_ = bestDevice;
|
||||
std::wcerr << L"Selected native microphone endpoint: " << bestName << L" id=" << bestId << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool WasapiLoopbackCapture::resolveInputFormat(WAVEFORMATEX* mixFormat) {
|
||||
const GUID subtype = audioSubtypeFromFormat(mixFormat);
|
||||
if (subtype == GUID_NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
inputFormat_.subtype = subtype;
|
||||
inputFormat_.sampleRate = mixFormat->nSamplesPerSec;
|
||||
inputFormat_.channels = mixFormat->nChannels;
|
||||
inputFormat_.bitsPerSample = mixFormat->wBitsPerSample;
|
||||
inputFormat_.blockAlign = mixFormat->nBlockAlign;
|
||||
inputFormat_.avgBytesPerSec = mixFormat->nAvgBytesPerSec;
|
||||
return inputFormat_.sampleRate > 0 && inputFormat_.channels > 0 && inputFormat_.blockAlign > 0;
|
||||
}
|
||||
|
||||
bool WasapiLoopbackCapture::start(AudioCallback callback) {
|
||||
if (!audioClient_ || !captureClient_ || !callback) {
|
||||
return false;
|
||||
}
|
||||
|
||||
callback_ = std::move(callback);
|
||||
stopRequested_ = false;
|
||||
writtenFrames_ = 0;
|
||||
lastDevicePositionEnd_ = 0;
|
||||
hasLastDevicePosition_ = false;
|
||||
|
||||
HRESULT hr = audioClient_->Start();
|
||||
if (!succeeded(hr, "IAudioClient::Start")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
thread_ = std::thread([this] {
|
||||
captureLoop();
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
void WasapiLoopbackCapture::stop() {
|
||||
stopRequested_ = true;
|
||||
if (thread_.joinable()) {
|
||||
thread_.join();
|
||||
}
|
||||
if (audioClient_) {
|
||||
audioClient_->Stop();
|
||||
}
|
||||
}
|
||||
|
||||
const AudioInputFormat& WasapiLoopbackCapture::inputFormat() const {
|
||||
return inputFormat_;
|
||||
}
|
||||
|
||||
const std::wstring& WasapiLoopbackCapture::selectedDeviceName() const {
|
||||
return selectedDeviceName_;
|
||||
}
|
||||
|
||||
void WasapiLoopbackCapture::captureLoop() {
|
||||
auto emitSilenceFrames = [&](uint64_t frames, int64_t timestampHns) {
|
||||
constexpr uint64_t MaxSilenceChunkFrames = 4800;
|
||||
uint64_t remainingFrames = frames;
|
||||
int64_t currentTimestampHns = timestampHns;
|
||||
while (remainingFrames > 0 && !stopRequested_) {
|
||||
const uint64_t chunkFrames = std::min<uint64_t>(remainingFrames, MaxSilenceChunkFrames);
|
||||
const DWORD chunkBytes = static_cast<DWORD>(chunkFrames * inputFormat_.blockAlign);
|
||||
const int64_t chunkDurationHns =
|
||||
static_cast<int64_t>((chunkFrames * HnsPerSecond) / inputFormat_.sampleRate);
|
||||
silenceBuffer_.assign(chunkBytes, 0);
|
||||
callback_(silenceBuffer_.data(), chunkBytes, currentTimestampHns, chunkDurationHns);
|
||||
remainingFrames -= chunkFrames;
|
||||
currentTimestampHns += chunkDurationHns;
|
||||
}
|
||||
};
|
||||
|
||||
while (!stopRequested_) {
|
||||
UINT32 packetFrames = 0;
|
||||
HRESULT hr = captureClient_->GetNextPacketSize(&packetFrames);
|
||||
if (FAILED(hr)) {
|
||||
std::cerr << "ERROR: IAudioCaptureClient::GetNextPacketSize failed (hr=0x" << std::hex
|
||||
<< hr << std::dec << ")" << std::endl;
|
||||
break;
|
||||
}
|
||||
|
||||
while (packetFrames > 0 && !stopRequested_) {
|
||||
BYTE* data = nullptr;
|
||||
UINT32 framesAvailable = 0;
|
||||
DWORD flags = 0;
|
||||
UINT64 devicePosition = 0;
|
||||
UINT64 qpcPosition = 0;
|
||||
|
||||
hr = captureClient_->GetBuffer(&data, &framesAvailable, &flags, &devicePosition, &qpcPosition);
|
||||
if (FAILED(hr)) {
|
||||
std::cerr << "ERROR: IAudioCaptureClient::GetBuffer failed (hr=0x" << std::hex
|
||||
<< hr << std::dec << ")" << std::endl;
|
||||
break;
|
||||
}
|
||||
|
||||
(void)qpcPosition;
|
||||
if (hasLastDevicePosition_ && devicePosition > lastDevicePositionEnd_) {
|
||||
const uint64_t gapFrames = devicePosition - lastDevicePositionEnd_;
|
||||
if ((flags & AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY) != 0 || gapFrames > framesAvailable) {
|
||||
const int64_t gapTimestampHns =
|
||||
static_cast<int64_t>((lastDevicePositionEnd_ * HnsPerSecond) / inputFormat_.sampleRate);
|
||||
emitSilenceFrames(gapFrames, gapTimestampHns);
|
||||
}
|
||||
}
|
||||
|
||||
const DWORD byteCount = framesAvailable * inputFormat_.blockAlign;
|
||||
const int64_t timestampHns =
|
||||
static_cast<int64_t>((devicePosition * HnsPerSecond) / inputFormat_.sampleRate);
|
||||
const int64_t durationHns =
|
||||
static_cast<int64_t>((static_cast<uint64_t>(framesAvailable) * HnsPerSecond) /
|
||||
inputFormat_.sampleRate);
|
||||
|
||||
if (byteCount > 0) {
|
||||
if ((flags & AUDCLNT_BUFFERFLAGS_SILENT) != 0 || !data) {
|
||||
silenceBuffer_.assign(byteCount, 0);
|
||||
callback_(silenceBuffer_.data(), byteCount, timestampHns, durationHns);
|
||||
} else {
|
||||
callback_(data, byteCount, timestampHns, durationHns);
|
||||
}
|
||||
}
|
||||
|
||||
writtenFrames_ += framesAvailable;
|
||||
lastDevicePositionEnd_ = devicePosition + framesAvailable;
|
||||
hasLastDevicePosition_ = true;
|
||||
captureClient_->ReleaseBuffer(framesAvailable);
|
||||
|
||||
hr = captureClient_->GetNextPacketSize(&packetFrames);
|
||||
if (FAILED(hr)) {
|
||||
std::cerr << "ERROR: IAudioCaptureClient::GetNextPacketSize failed (hr=0x"
|
||||
<< std::hex << hr << std::dec << ")" << std::endl;
|
||||
packetFrames = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(5));
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
#pragma once
|
||||
|
||||
#include "mf_encoder.h"
|
||||
|
||||
#include <Windows.h>
|
||||
#include <audioclient.h>
|
||||
#include <mmdeviceapi.h>
|
||||
#include <wrl/client.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
enum class WasapiCaptureEndpoint {
|
||||
SystemLoopback,
|
||||
Microphone,
|
||||
};
|
||||
|
||||
class WasapiLoopbackCapture {
|
||||
public:
|
||||
using AudioCallback = std::function<void(const BYTE* data, DWORD byteCount, int64_t timestampHns, int64_t durationHns)>;
|
||||
|
||||
WasapiLoopbackCapture() = default;
|
||||
~WasapiLoopbackCapture();
|
||||
|
||||
WasapiLoopbackCapture(const WasapiLoopbackCapture&) = delete;
|
||||
WasapiLoopbackCapture& operator=(const WasapiLoopbackCapture&) = delete;
|
||||
|
||||
bool initializeSystemLoopback();
|
||||
bool initializeMicrophone(const std::wstring& deviceId, const std::wstring& deviceName);
|
||||
bool start(AudioCallback callback);
|
||||
void stop();
|
||||
|
||||
const AudioInputFormat& inputFormat() const;
|
||||
const std::wstring& selectedDeviceName() const;
|
||||
|
||||
private:
|
||||
bool initialize(WasapiCaptureEndpoint endpoint, const std::wstring& deviceId, const std::wstring& deviceName);
|
||||
bool resolveMicrophoneByName(const std::wstring& deviceName);
|
||||
void captureLoop();
|
||||
bool resolveInputFormat(WAVEFORMATEX* mixFormat);
|
||||
|
||||
Microsoft::WRL::ComPtr<IMMDeviceEnumerator> deviceEnumerator_;
|
||||
Microsoft::WRL::ComPtr<IMMDevice> device_;
|
||||
Microsoft::WRL::ComPtr<IAudioClient> audioClient_;
|
||||
Microsoft::WRL::ComPtr<IAudioCaptureClient> captureClient_;
|
||||
WAVEFORMATEX* mixFormat_ = nullptr;
|
||||
AudioInputFormat inputFormat_{};
|
||||
std::wstring selectedDeviceName_;
|
||||
AudioCallback callback_;
|
||||
std::thread thread_;
|
||||
std::atomic<bool> stopRequested_ = false;
|
||||
std::vector<BYTE> silenceBuffer_;
|
||||
uint64_t writtenFrames_ = 0;
|
||||
uint64_t lastDevicePositionEnd_ = 0;
|
||||
bool hasLastDevicePosition_ = false;
|
||||
};
|
||||
@@ -0,0 +1,419 @@
|
||||
#include "webcam_capture.h"
|
||||
|
||||
#include <mfapi.h>
|
||||
#include <mferror.h>
|
||||
#include <propvarutil.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cwctype>
|
||||
#include <iostream>
|
||||
|
||||
namespace {
|
||||
|
||||
bool succeeded(HRESULT hr, const char* label) {
|
||||
if (SUCCEEDED(hr)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::cerr << "ERROR: " << label << " failed (hr=0x" << std::hex << hr << std::dec << ")"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::wstring readAllocatedString(IMFActivate* activate, REFGUID key) {
|
||||
WCHAR* value = nullptr;
|
||||
UINT32 length = 0;
|
||||
if (FAILED(activate->GetAllocatedString(key, &value, &length)) || !value) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::wstring result(value, value + length);
|
||||
CoTaskMemFree(value);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool containsInsensitive(const std::wstring& haystack, const std::wstring& needle) {
|
||||
if (haystack.empty() || needle.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::wstring lowerHaystack = haystack;
|
||||
std::wstring lowerNeedle = needle;
|
||||
std::transform(lowerHaystack.begin(), lowerHaystack.end(), lowerHaystack.begin(), ::towlower);
|
||||
std::transform(lowerNeedle.begin(), lowerNeedle.end(), lowerNeedle.begin(), ::towlower);
|
||||
return lowerHaystack.find(lowerNeedle) != std::wstring::npos ||
|
||||
lowerNeedle.find(lowerHaystack) != std::wstring::npos;
|
||||
}
|
||||
|
||||
std::wstring normalizeDeviceName(const std::wstring& value) {
|
||||
std::wstring normalized;
|
||||
normalized.reserve(value.size());
|
||||
bool lastWasSpace = true;
|
||||
for (const wchar_t ch : value) {
|
||||
if (std::iswalnum(ch)) {
|
||||
normalized.push_back(static_cast<wchar_t>(std::towlower(ch)));
|
||||
lastWasSpace = false;
|
||||
continue;
|
||||
}
|
||||
if (!lastWasSpace) {
|
||||
normalized.push_back(L' ');
|
||||
lastWasSpace = true;
|
||||
}
|
||||
}
|
||||
while (!normalized.empty() && normalized.back() == L' ') {
|
||||
normalized.pop_back();
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
std::vector<std::wstring> splitWords(const std::wstring& value) {
|
||||
std::vector<std::wstring> words;
|
||||
size_t start = 0;
|
||||
while (start < value.size()) {
|
||||
const size_t end = value.find(L' ', start);
|
||||
const auto word = value.substr(start, end == std::wstring::npos ? std::wstring::npos : end - start);
|
||||
if (word.size() > 1 && word != L"camera" && word != L"webcam" && word != L"video" && word != L"input") {
|
||||
words.push_back(word);
|
||||
}
|
||||
if (end == std::wstring::npos) {
|
||||
break;
|
||||
}
|
||||
start = end + 1;
|
||||
}
|
||||
return words;
|
||||
}
|
||||
|
||||
int deviceMatchScore(
|
||||
const std::wstring& candidateName,
|
||||
const std::wstring& candidateLink,
|
||||
const std::wstring& requestedName,
|
||||
const std::wstring& requestedId) {
|
||||
int score = 0;
|
||||
const auto normalizedName = normalizeDeviceName(candidateName);
|
||||
const auto normalizedLink = normalizeDeviceName(candidateLink);
|
||||
const auto normalizedRequestedName = normalizeDeviceName(requestedName);
|
||||
const auto normalizedRequestedId = normalizeDeviceName(requestedId);
|
||||
|
||||
if (!normalizedRequestedName.empty()) {
|
||||
if (normalizedName == normalizedRequestedName) {
|
||||
score = std::max(score, 1000);
|
||||
}
|
||||
if (containsInsensitive(normalizedName, normalizedRequestedName)) {
|
||||
score = std::max(score, 900);
|
||||
}
|
||||
if (containsInsensitive(normalizedLink, normalizedRequestedName)) {
|
||||
score = std::max(score, 800);
|
||||
}
|
||||
|
||||
int wordScore = 0;
|
||||
for (const auto& word : splitWords(normalizedRequestedName)) {
|
||||
if (normalizedName.find(word) != std::wstring::npos) {
|
||||
wordScore += 100;
|
||||
} else if (normalizedLink.find(word) != std::wstring::npos) {
|
||||
wordScore += 50;
|
||||
}
|
||||
}
|
||||
score = std::max(score, wordScore);
|
||||
}
|
||||
|
||||
if (!normalizedRequestedId.empty()) {
|
||||
if (containsInsensitive(normalizedLink, normalizedRequestedId)) {
|
||||
score = std::max(score, 700);
|
||||
}
|
||||
if (containsInsensitive(normalizedName, normalizedRequestedId)) {
|
||||
score = std::max(score, 600);
|
||||
}
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
WebcamCapture::~WebcamCapture() {
|
||||
stop();
|
||||
}
|
||||
|
||||
bool WebcamCapture::initialize(
|
||||
const std::wstring& deviceId,
|
||||
const std::wstring& deviceName,
|
||||
const std::wstring& directShowClsid,
|
||||
int requestedWidth,
|
||||
int requestedHeight,
|
||||
int requestedFps) {
|
||||
fps_ = std::clamp(requestedFps > 0 ? requestedFps : 30, 1, 60);
|
||||
usingDirectShow_ = false;
|
||||
selectedMatchScore_ = 0;
|
||||
if (!succeeded(MFStartup(MF_VERSION), "MFStartup(webcam)")) {
|
||||
if (directShowCapture_.initialize(deviceId, deviceName, directShowClsid, requestedWidth, requestedHeight, fps_)) {
|
||||
usingDirectShow_ = true;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
mfStarted_ = true;
|
||||
if (!selectDevice(deviceId, deviceName)) {
|
||||
if (mfStarted_) {
|
||||
MFShutdown();
|
||||
mfStarted_ = false;
|
||||
}
|
||||
if (directShowCapture_.initialize(deviceId, deviceName, directShowClsid, requestedWidth, requestedHeight, fps_)) {
|
||||
usingDirectShow_ = true;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((!deviceId.empty() || !deviceName.empty()) && selectedMatchScore_ <= 0) {
|
||||
if (mediaSource_) {
|
||||
mediaSource_->Shutdown();
|
||||
}
|
||||
sourceReader_.Reset();
|
||||
mediaSource_.Reset();
|
||||
if (mfStarted_) {
|
||||
MFShutdown();
|
||||
mfStarted_ = false;
|
||||
}
|
||||
if (directShowCapture_.initialize(deviceId, deviceName, directShowClsid, requestedWidth, requestedHeight, fps_)) {
|
||||
usingDirectShow_ = true;
|
||||
return true;
|
||||
}
|
||||
std::cerr << "ERROR: Requested webcam device was not found by native Windows webcam providers"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return configureReader(requestedWidth, requestedHeight, fps_);
|
||||
}
|
||||
|
||||
bool WebcamCapture::selectDevice(const std::wstring& deviceId, const std::wstring& deviceName) {
|
||||
Microsoft::WRL::ComPtr<IMFAttributes> attributes;
|
||||
if (!succeeded(MFCreateAttributes(&attributes, 1), "MFCreateAttributes(webcam enumeration)")) {
|
||||
return false;
|
||||
}
|
||||
if (!succeeded(attributes->SetGUID(
|
||||
MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE,
|
||||
MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_GUID),
|
||||
"SetGUID(webcam source type)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
IMFActivate** devices = nullptr;
|
||||
UINT32 deviceCount = 0;
|
||||
HRESULT hr = MFEnumDeviceSources(attributes.Get(), &devices, &deviceCount);
|
||||
if (!succeeded(hr, "MFEnumDeviceSources") || deviceCount == 0) {
|
||||
if (devices) {
|
||||
CoTaskMemFree(devices);
|
||||
}
|
||||
std::cerr << "ERROR: No native Windows webcam devices were found" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
UINT32 selectedIndex = 0;
|
||||
int bestScore = 0;
|
||||
for (UINT32 index = 0; index < deviceCount; index += 1) {
|
||||
const std::wstring name = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME);
|
||||
const std::wstring symbolicLink = readAllocatedString(devices[index], MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_SYMBOLIC_LINK);
|
||||
const int score = deviceMatchScore(name, symbolicLink, deviceName, deviceId);
|
||||
std::wcerr << L"INFO: Native webcam candidate [" << index << L"] name=\"" << name << L"\" score=" << score << std::endl;
|
||||
if (score > bestScore) {
|
||||
selectedIndex = index;
|
||||
bestScore = score;
|
||||
}
|
||||
}
|
||||
|
||||
if ((!deviceId.empty() || !deviceName.empty()) && bestScore <= 0) {
|
||||
std::cerr << "WARNING: Requested webcam device was not found by Media Foundation; trying DirectShow"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
selectedMatchScore_ = bestScore;
|
||||
selectedDeviceName_ = readAllocatedString(devices[selectedIndex], MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME);
|
||||
hr = devices[selectedIndex]->ActivateObject(IID_PPV_ARGS(&mediaSource_));
|
||||
|
||||
for (UINT32 index = 0; index < deviceCount; index += 1) {
|
||||
devices[index]->Release();
|
||||
}
|
||||
CoTaskMemFree(devices);
|
||||
|
||||
return succeeded(hr, "ActivateObject(webcam)");
|
||||
}
|
||||
|
||||
bool WebcamCapture::configureReader(int requestedWidth, int requestedHeight, int requestedFps) {
|
||||
Microsoft::WRL::ComPtr<IMFAttributes> attributes;
|
||||
if (!succeeded(MFCreateAttributes(&attributes, 2), "MFCreateAttributes(webcam reader)")) {
|
||||
return false;
|
||||
}
|
||||
attributes->SetUINT32(MF_SOURCE_READER_ENABLE_VIDEO_PROCESSING, TRUE);
|
||||
attributes->SetUINT32(MF_READWRITE_DISABLE_CONVERTERS, FALSE);
|
||||
|
||||
if (!succeeded(MFCreateSourceReaderFromMediaSource(mediaSource_.Get(), attributes.Get(), &sourceReader_),
|
||||
"MFCreateSourceReaderFromMediaSource(webcam)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFMediaType> mediaType;
|
||||
if (!succeeded(MFCreateMediaType(&mediaType), "MFCreateMediaType(webcam output)")) {
|
||||
return false;
|
||||
}
|
||||
mediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
|
||||
mediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_RGB32);
|
||||
if (requestedWidth > 0 && requestedHeight > 0) {
|
||||
MFSetAttributeSize(mediaType.Get(), MF_MT_FRAME_SIZE, static_cast<UINT32>(requestedWidth), static_cast<UINT32>(requestedHeight));
|
||||
}
|
||||
MFSetAttributeRatio(mediaType.Get(), MF_MT_FRAME_RATE, static_cast<UINT32>(std::max(1, requestedFps)), 1);
|
||||
|
||||
if (!succeeded(sourceReader_->SetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, nullptr, mediaType.Get()),
|
||||
"SetCurrentMediaType(webcam RGB32)")) {
|
||||
return false;
|
||||
}
|
||||
sourceReader_->SetStreamSelection(MF_SOURCE_READER_ALL_STREAMS, FALSE);
|
||||
sourceReader_->SetStreamSelection(MF_SOURCE_READER_FIRST_VIDEO_STREAM, TRUE);
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFMediaType> currentType;
|
||||
if (!succeeded(sourceReader_->GetCurrentMediaType(MF_SOURCE_READER_FIRST_VIDEO_STREAM, ¤tType),
|
||||
"GetCurrentMediaType(webcam)")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
UINT32 width = 0;
|
||||
UINT32 height = 0;
|
||||
if (FAILED(MFGetAttributeSize(currentType.Get(), MF_MT_FRAME_SIZE, &width, &height)) || width == 0 || height == 0) {
|
||||
width = static_cast<UINT32>(requestedWidth > 0 ? requestedWidth : 1280);
|
||||
height = static_cast<UINT32>(requestedHeight > 0 ? requestedHeight : 720);
|
||||
}
|
||||
width_ = static_cast<int>(width);
|
||||
height_ = static_cast<int>(height);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool WebcamCapture::start() {
|
||||
if (usingDirectShow_) {
|
||||
return directShowCapture_.start();
|
||||
}
|
||||
if (!sourceReader_ || thread_.joinable()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
stopRequested_ = false;
|
||||
thread_ = std::thread(&WebcamCapture::captureLoop, this);
|
||||
return true;
|
||||
}
|
||||
|
||||
void WebcamCapture::stop() {
|
||||
directShowCapture_.stop();
|
||||
stopRequested_ = true;
|
||||
if (thread_.joinable()) {
|
||||
thread_.join();
|
||||
}
|
||||
if (mediaSource_) {
|
||||
mediaSource_->Shutdown();
|
||||
}
|
||||
sourceReader_.Reset();
|
||||
mediaSource_.Reset();
|
||||
if (mfStarted_) {
|
||||
MFShutdown();
|
||||
mfStarted_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
void WebcamCapture::captureLoop() {
|
||||
CoInitializeEx(nullptr, COINIT_MULTITHREADED);
|
||||
|
||||
while (!stopRequested_) {
|
||||
DWORD streamIndex = 0;
|
||||
DWORD flags = 0;
|
||||
LONGLONG timestamp = 0;
|
||||
Microsoft::WRL::ComPtr<IMFSample> sample;
|
||||
HRESULT hr = sourceReader_->ReadSample(
|
||||
MF_SOURCE_READER_FIRST_VIDEO_STREAM,
|
||||
0,
|
||||
&streamIndex,
|
||||
&flags,
|
||||
×tamp,
|
||||
&sample);
|
||||
(void)streamIndex;
|
||||
(void)timestamp;
|
||||
|
||||
if (FAILED(hr)) {
|
||||
std::cerr << "WARNING: Failed to read webcam sample (hr=0x" << std::hex << hr << std::dec << ")"
|
||||
<< std::endl;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(20));
|
||||
continue;
|
||||
}
|
||||
if ((flags & MF_SOURCE_READERF_ENDOFSTREAM) != 0) {
|
||||
break;
|
||||
}
|
||||
if (!sample) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFMediaBuffer> buffer;
|
||||
if (FAILED(sample->ConvertToContiguousBuffer(&buffer)) || !buffer) {
|
||||
continue;
|
||||
}
|
||||
|
||||
BYTE* data = nullptr;
|
||||
DWORD maxLength = 0;
|
||||
DWORD currentLength = 0;
|
||||
if (FAILED(buffer->Lock(&data, &maxLength, ¤tLength)) || !data) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const DWORD expectedLength = static_cast<DWORD>(std::max(0, width_) * std::max(0, height_) * 4);
|
||||
if (currentLength >= expectedLength && expectedLength > 0) {
|
||||
std::scoped_lock lock(frameMutex_);
|
||||
latestFrame_.assign(data, data + expectedLength);
|
||||
latestFrameSequence_ += 1;
|
||||
}
|
||||
|
||||
buffer->Unlock();
|
||||
}
|
||||
|
||||
CoUninitialize();
|
||||
}
|
||||
|
||||
bool WebcamCapture::copyLatestFrame(WebcamFrameSnapshot& destination) {
|
||||
if (usingDirectShow_) {
|
||||
return directShowCapture_.copyLatestFrame(destination);
|
||||
}
|
||||
std::scoped_lock lock(frameMutex_);
|
||||
if (latestFrame_.empty() || width_ <= 0 || height_ <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
destination.data = latestFrame_;
|
||||
destination.width = width_;
|
||||
destination.height = height_;
|
||||
destination.sequence = latestFrameSequence_;
|
||||
return true;
|
||||
}
|
||||
|
||||
int WebcamCapture::width() const {
|
||||
if (usingDirectShow_) {
|
||||
return directShowCapture_.width();
|
||||
}
|
||||
return width_;
|
||||
}
|
||||
|
||||
int WebcamCapture::height() const {
|
||||
if (usingDirectShow_) {
|
||||
return directShowCapture_.height();
|
||||
}
|
||||
return height_;
|
||||
}
|
||||
|
||||
int WebcamCapture::fps() const {
|
||||
if (usingDirectShow_) {
|
||||
return directShowCapture_.fps();
|
||||
}
|
||||
return fps_;
|
||||
}
|
||||
|
||||
const std::wstring& WebcamCapture::selectedDeviceName() const {
|
||||
if (usingDirectShow_) {
|
||||
return directShowCapture_.selectedDeviceName();
|
||||
}
|
||||
return selectedDeviceName_;
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
#pragma once
|
||||
|
||||
#include "dshow_webcam_capture.h"
|
||||
|
||||
#include <Windows.h>
|
||||
#include <mfidl.h>
|
||||
#include <mfreadwrite.h>
|
||||
#include <wrl/client.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
class WebcamCapture {
|
||||
public:
|
||||
WebcamCapture() = default;
|
||||
~WebcamCapture();
|
||||
|
||||
WebcamCapture(const WebcamCapture&) = delete;
|
||||
WebcamCapture& operator=(const WebcamCapture&) = delete;
|
||||
|
||||
bool initialize(
|
||||
const std::wstring& deviceId,
|
||||
const std::wstring& deviceName,
|
||||
const std::wstring& directShowClsid,
|
||||
int requestedWidth,
|
||||
int requestedHeight,
|
||||
int requestedFps);
|
||||
bool start();
|
||||
void stop();
|
||||
bool copyLatestFrame(WebcamFrameSnapshot& destination);
|
||||
|
||||
int width() const;
|
||||
int height() const;
|
||||
int fps() const;
|
||||
const std::wstring& selectedDeviceName() const;
|
||||
|
||||
private:
|
||||
bool selectDevice(const std::wstring& deviceId, const std::wstring& deviceName);
|
||||
bool configureReader(int requestedWidth, int requestedHeight, int requestedFps);
|
||||
void captureLoop();
|
||||
|
||||
Microsoft::WRL::ComPtr<IMFMediaSource> mediaSource_;
|
||||
Microsoft::WRL::ComPtr<IMFSourceReader> sourceReader_;
|
||||
DirectShowWebcamCapture directShowCapture_;
|
||||
std::thread thread_;
|
||||
std::atomic<bool> stopRequested_ = false;
|
||||
std::mutex frameMutex_;
|
||||
std::vector<BYTE> latestFrame_;
|
||||
uint64_t latestFrameSequence_ = 0;
|
||||
int width_ = 0;
|
||||
int height_ = 0;
|
||||
int fps_ = 30;
|
||||
bool mfStarted_ = false;
|
||||
bool usingDirectShow_ = false;
|
||||
int selectedMatchScore_ = 0;
|
||||
std::wstring selectedDeviceName_;
|
||||
};
|
||||
@@ -0,0 +1,390 @@
|
||||
#include "wgc_session.h"
|
||||
|
||||
#include <Windows.Graphics.Capture.Interop.h>
|
||||
#include <dxgi1_2.h>
|
||||
#include <inspectable.h>
|
||||
#include <winrt/base.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
namespace wf = winrt::Windows::Foundation;
|
||||
namespace wgcap = winrt::Windows::Graphics::Capture;
|
||||
namespace wgdx = winrt::Windows::Graphics::DirectX;
|
||||
namespace wgd3d = winrt::Windows::Graphics::DirectX::Direct3D11;
|
||||
|
||||
extern "C" HRESULT __stdcall CreateDirect3D11DeviceFromDXGIDevice(
|
||||
::IDXGIDevice* dxgiDevice,
|
||||
::IInspectable** graphicsDevice);
|
||||
|
||||
namespace {
|
||||
|
||||
bool succeeded(HRESULT hr, const char* label) {
|
||||
if (SUCCEEDED(hr)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::cerr << "ERROR: " << label << " failed (hr=0x" << std::hex << hr << std::dec << ")"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
Microsoft::WRL::ComPtr<IDXGIAdapter1> findAdapterForMonitor(HMONITOR monitor) {
|
||||
if (!monitor) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Microsoft::WRL::ComPtr<IDXGIFactory1> factory;
|
||||
HRESULT hr = CreateDXGIFactory1(IID_PPV_ARGS(&factory));
|
||||
if (FAILED(hr) || !factory) {
|
||||
std::cerr << "WARNING: CreateDXGIFactory1 failed while resolving monitor adapter (hr=0x"
|
||||
<< std::hex << hr << std::dec << ")" << std::endl;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
for (UINT adapterIndex = 0;; ++adapterIndex) {
|
||||
Microsoft::WRL::ComPtr<IDXGIAdapter1> adapter;
|
||||
hr = factory->EnumAdapters1(adapterIndex, adapter.GetAddressOf());
|
||||
if (hr == DXGI_ERROR_NOT_FOUND) {
|
||||
break;
|
||||
}
|
||||
if (FAILED(hr) || !adapter) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DXGI_ADAPTER_DESC1 adapterDesc{};
|
||||
if (SUCCEEDED(adapter->GetDesc1(&adapterDesc)) &&
|
||||
(adapterDesc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (UINT outputIndex = 0;; ++outputIndex) {
|
||||
Microsoft::WRL::ComPtr<IDXGIOutput> output;
|
||||
hr = adapter->EnumOutputs(outputIndex, output.GetAddressOf());
|
||||
if (hr == DXGI_ERROR_NOT_FOUND) {
|
||||
break;
|
||||
}
|
||||
if (FAILED(hr) || !output) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DXGI_OUTPUT_DESC outputDesc{};
|
||||
if (SUCCEEDED(output->GetDesc(&outputDesc)) && outputDesc.Monitor == monitor) {
|
||||
std::cout << "{\"event\":\"display-adapter-resolved\",\"schemaVersion\":2,"
|
||||
<< "\"adapterIndex\":" << adapterIndex
|
||||
<< ",\"outputIndex\":" << outputIndex << "}" << std::endl;
|
||||
return adapter;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::cerr << "WARNING: Could not resolve DXGI adapter for selected monitor; using default adapter"
|
||||
<< std::endl;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int64_t timeSpanToHns(wf::TimeSpan const& value) {
|
||||
return value.count();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
WgcSession::~WgcSession() {
|
||||
stop();
|
||||
}
|
||||
|
||||
bool WgcSession::createD3DDevice(IDXGIAdapter* adapter) {
|
||||
UINT flags = D3D11_CREATE_DEVICE_BGRA_SUPPORT;
|
||||
#if defined(_DEBUG)
|
||||
flags |= D3D11_CREATE_DEVICE_DEBUG;
|
||||
#endif
|
||||
|
||||
D3D_FEATURE_LEVEL featureLevels[] = {
|
||||
D3D_FEATURE_LEVEL_11_1,
|
||||
D3D_FEATURE_LEVEL_11_0,
|
||||
D3D_FEATURE_LEVEL_10_1,
|
||||
D3D_FEATURE_LEVEL_10_0,
|
||||
};
|
||||
D3D_FEATURE_LEVEL featureLevel{};
|
||||
|
||||
HRESULT hr = D3D11CreateDevice(
|
||||
adapter,
|
||||
adapter ? D3D_DRIVER_TYPE_UNKNOWN : D3D_DRIVER_TYPE_HARDWARE,
|
||||
nullptr,
|
||||
flags,
|
||||
featureLevels,
|
||||
ARRAYSIZE(featureLevels),
|
||||
D3D11_SDK_VERSION,
|
||||
&d3dDevice_,
|
||||
&featureLevel,
|
||||
&d3dContext_);
|
||||
|
||||
#if defined(_DEBUG)
|
||||
if (FAILED(hr)) {
|
||||
flags &= ~D3D11_CREATE_DEVICE_DEBUG;
|
||||
hr = D3D11CreateDevice(
|
||||
adapter,
|
||||
adapter ? D3D_DRIVER_TYPE_UNKNOWN : D3D_DRIVER_TYPE_HARDWARE,
|
||||
nullptr,
|
||||
flags,
|
||||
featureLevels,
|
||||
ARRAYSIZE(featureLevels),
|
||||
D3D11_SDK_VERSION,
|
||||
&d3dDevice_,
|
||||
&featureLevel,
|
||||
&d3dContext_);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (FAILED(hr) && adapter) {
|
||||
std::cerr << "WARNING: D3D11CreateDevice failed for selected monitor adapter (hr=0x"
|
||||
<< std::hex << hr << std::dec << "); retrying default adapter" << std::endl;
|
||||
hr = D3D11CreateDevice(
|
||||
nullptr,
|
||||
D3D_DRIVER_TYPE_HARDWARE,
|
||||
nullptr,
|
||||
flags,
|
||||
featureLevels,
|
||||
ARRAYSIZE(featureLevels),
|
||||
D3D11_SDK_VERSION,
|
||||
&d3dDevice_,
|
||||
&featureLevel,
|
||||
&d3dContext_);
|
||||
}
|
||||
|
||||
if (!succeeded(hr, "D3D11CreateDevice")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Microsoft::WRL::ComPtr<IDXGIDevice> dxgiDevice;
|
||||
if (!succeeded(d3dDevice_.As(&dxgiDevice), "Query IDXGIDevice")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
winrt::com_ptr<::IInspectable> inspectableDevice;
|
||||
if (!succeeded(CreateDirect3D11DeviceFromDXGIDevice(dxgiDevice.Get(), inspectableDevice.put()),
|
||||
"CreateDirect3D11DeviceFromDXGIDevice")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
winrtDevice_ = inspectableDevice.as<wgd3d::IDirect3DDevice>();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool WgcSession::createD3DDeviceForMonitor(HMONITOR monitor) {
|
||||
auto adapter = findAdapterForMonitor(monitor);
|
||||
return createD3DDevice(adapter.Get());
|
||||
}
|
||||
|
||||
bool WgcSession::createCaptureItem(HMONITOR monitor) {
|
||||
auto factory = winrt::get_activation_factory<wgcap::GraphicsCaptureItem>();
|
||||
auto interop = factory.as<IGraphicsCaptureItemInterop>();
|
||||
|
||||
wgcap::GraphicsCaptureItem item{nullptr};
|
||||
HRESULT hr = interop->CreateForMonitor(
|
||||
monitor,
|
||||
winrt::guid_of<wgcap::GraphicsCaptureItem>(),
|
||||
reinterpret_cast<void**>(winrt::put_abi(item)));
|
||||
if (!succeeded(hr, "CreateForMonitor")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
item_ = item;
|
||||
const auto size = item_.Size();
|
||||
width_ = static_cast<int>(size.Width);
|
||||
height_ = static_cast<int>(size.Height);
|
||||
return width_ > 0 && height_ > 0;
|
||||
}
|
||||
|
||||
bool WgcSession::createCaptureItem(HWND window) {
|
||||
auto factory = winrt::get_activation_factory<wgcap::GraphicsCaptureItem>();
|
||||
auto interop = factory.as<IGraphicsCaptureItemInterop>();
|
||||
|
||||
wgcap::GraphicsCaptureItem item{nullptr};
|
||||
HRESULT hr = interop->CreateForWindow(
|
||||
window,
|
||||
winrt::guid_of<wgcap::GraphicsCaptureItem>(),
|
||||
reinterpret_cast<void**>(winrt::put_abi(item)));
|
||||
if (!succeeded(hr, "CreateForWindow")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
item_ = item;
|
||||
const auto size = item_.Size();
|
||||
width_ = static_cast<int>(size.Width);
|
||||
height_ = static_cast<int>(size.Height);
|
||||
return width_ > 0 && height_ > 0;
|
||||
}
|
||||
|
||||
bool WgcSession::applySessionOptions(bool captureCursor) {
|
||||
captureCursor_ = captureCursor;
|
||||
|
||||
try {
|
||||
auto session2 = session_.try_as<wgcap::IGraphicsCaptureSession2>();
|
||||
if (!session2) {
|
||||
if (!captureCursor) {
|
||||
std::cerr << "ERROR: WGC cursor suppression is not supported by this Windows runtime"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
session2.IsCursorCaptureEnabled(captureCursor);
|
||||
const bool appliedCursorCapture = session2.IsCursorCaptureEnabled();
|
||||
std::cout << "{\"event\":\"cursor-capture\",\"schemaVersion\":2,\"requested\":"
|
||||
<< (captureCursor ? "true" : "false")
|
||||
<< ",\"applied\":" << (appliedCursorCapture ? "true" : "false") << "}"
|
||||
<< std::endl;
|
||||
|
||||
if (appliedCursorCapture != captureCursor) {
|
||||
std::cerr << "ERROR: WGC cursor capture setting did not apply" << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} catch (winrt::hresult_error const& error) {
|
||||
std::cerr << "ERROR: Failed to configure WGC cursor capture (hr=0x" << std::hex
|
||||
<< static_cast<uint32_t>(error.code()) << std::dec << ")" << std::endl;
|
||||
if (!captureCursor) {
|
||||
return false;
|
||||
}
|
||||
} catch (...) {
|
||||
std::cerr << "ERROR: Failed to configure WGC cursor capture" << std::endl;
|
||||
if (!captureCursor) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
session_.IsBorderRequired(false);
|
||||
} catch (...) {
|
||||
// IsBorderRequired is Windows 11-only. Ignore it on older builds.
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool WgcSession::initialize(HMONITOR monitor, int fps, bool captureCursor) {
|
||||
fps_ = fps > 0 ? fps : 60;
|
||||
if (!createD3DDeviceForMonitor(monitor)) {
|
||||
return false;
|
||||
}
|
||||
if (!createCaptureItem(monitor)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
framePool_ = wgcap::Direct3D11CaptureFramePool::CreateFreeThreaded(
|
||||
winrtDevice_,
|
||||
wgdx::DirectXPixelFormat::B8G8R8A8UIntNormalized,
|
||||
2,
|
||||
item_.Size());
|
||||
session_ = framePool_.CreateCaptureSession(item_);
|
||||
|
||||
if (!applySessionOptions(captureCursor)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
frameArrivedToken_ = framePool_.FrameArrived({this, &WgcSession::onFrameArrived});
|
||||
return true;
|
||||
}
|
||||
|
||||
bool WgcSession::initialize(HWND window, int fps, bool captureCursor) {
|
||||
fps_ = fps > 0 ? fps : 60;
|
||||
if (!createD3DDevice()) {
|
||||
return false;
|
||||
}
|
||||
if (!createCaptureItem(window)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
framePool_ = wgcap::Direct3D11CaptureFramePool::CreateFreeThreaded(
|
||||
winrtDevice_,
|
||||
wgdx::DirectXPixelFormat::B8G8R8A8UIntNormalized,
|
||||
2,
|
||||
item_.Size());
|
||||
session_ = framePool_.CreateCaptureSession(item_);
|
||||
|
||||
if (!applySessionOptions(captureCursor)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
frameArrivedToken_ = framePool_.FrameArrived({this, &WgcSession::onFrameArrived});
|
||||
return true;
|
||||
}
|
||||
|
||||
void WgcSession::setFrameCallback(FrameCallback callback) {
|
||||
std::scoped_lock lock(callbackMutex_);
|
||||
frameCallback_ = std::move(callback);
|
||||
}
|
||||
|
||||
bool WgcSession::start() {
|
||||
if (!session_) {
|
||||
return false;
|
||||
}
|
||||
if (!applySessionOptions(captureCursor_)) {
|
||||
return false;
|
||||
}
|
||||
session_.StartCapture();
|
||||
started_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void WgcSession::stop() {
|
||||
if (framePool_) {
|
||||
framePool_.FrameArrived(frameArrivedToken_);
|
||||
}
|
||||
if (session_) {
|
||||
session_.Close();
|
||||
session_ = nullptr;
|
||||
}
|
||||
if (framePool_) {
|
||||
framePool_.Close();
|
||||
framePool_ = nullptr;
|
||||
}
|
||||
item_ = nullptr;
|
||||
winrtDevice_ = nullptr;
|
||||
d3dContext_.Reset();
|
||||
d3dDevice_.Reset();
|
||||
started_ = false;
|
||||
}
|
||||
|
||||
void WgcSession::onFrameArrived(
|
||||
wgcap::Direct3D11CaptureFramePool const& sender,
|
||||
wf::IInspectable const&) {
|
||||
auto frame = sender.TryGetNextFrame();
|
||||
if (!frame) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto surface = frame.Surface();
|
||||
auto access = surface.as<::Windows::Graphics::DirectX::Direct3D11::IDirect3DDxgiInterfaceAccess>();
|
||||
Microsoft::WRL::ComPtr<ID3D11Texture2D> texture;
|
||||
HRESULT hr = access->GetInterface(__uuidof(ID3D11Texture2D), reinterpret_cast<void**>(texture.GetAddressOf()));
|
||||
if (FAILED(hr) || !texture) {
|
||||
return;
|
||||
}
|
||||
|
||||
FrameCallback callback;
|
||||
{
|
||||
std::scoped_lock lock(callbackMutex_);
|
||||
callback = frameCallback_;
|
||||
}
|
||||
|
||||
if (callback) {
|
||||
callback(texture.Get(), timeSpanToHns(frame.SystemRelativeTime()));
|
||||
}
|
||||
frame.Close();
|
||||
}
|
||||
|
||||
int WgcSession::captureWidth() const {
|
||||
return width_;
|
||||
}
|
||||
|
||||
int WgcSession::captureHeight() const {
|
||||
return height_;
|
||||
}
|
||||
|
||||
ID3D11Device* WgcSession::device() const {
|
||||
return d3dDevice_.Get();
|
||||
}
|
||||
|
||||
ID3D11DeviceContext* WgcSession::context() const {
|
||||
return d3dContext_.Get();
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
#pragma once
|
||||
|
||||
#include <Windows.h>
|
||||
#include <d3d11.h>
|
||||
#include <dxgi.h>
|
||||
#include <windows.graphics.capture.h>
|
||||
#include <windows.graphics.directx.direct3d11.interop.h>
|
||||
#include <winrt/Windows.Foundation.h>
|
||||
#include <winrt/Windows.Graphics.Capture.h>
|
||||
#include <winrt/Windows.Graphics.DirectX.Direct3D11.h>
|
||||
#include <wrl/client.h>
|
||||
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
|
||||
class WgcSession {
|
||||
public:
|
||||
using FrameCallback = std::function<void(ID3D11Texture2D*, int64_t)>;
|
||||
|
||||
WgcSession() = default;
|
||||
~WgcSession();
|
||||
|
||||
WgcSession(const WgcSession&) = delete;
|
||||
WgcSession& operator=(const WgcSession&) = delete;
|
||||
|
||||
bool initialize(HMONITOR monitor, int fps, bool captureCursor);
|
||||
bool initialize(HWND window, int fps, bool captureCursor);
|
||||
void setFrameCallback(FrameCallback callback);
|
||||
bool start();
|
||||
void stop();
|
||||
|
||||
int captureWidth() const;
|
||||
int captureHeight() const;
|
||||
ID3D11Device* device() const;
|
||||
ID3D11DeviceContext* context() const;
|
||||
|
||||
private:
|
||||
bool createD3DDevice(IDXGIAdapter* adapter = nullptr);
|
||||
bool createD3DDeviceForMonitor(HMONITOR monitor);
|
||||
bool createCaptureItem(HMONITOR monitor);
|
||||
bool createCaptureItem(HWND window);
|
||||
bool applySessionOptions(bool captureCursor);
|
||||
void onFrameArrived(
|
||||
winrt::Windows::Graphics::Capture::Direct3D11CaptureFramePool const& sender,
|
||||
winrt::Windows::Foundation::IInspectable const&);
|
||||
|
||||
Microsoft::WRL::ComPtr<ID3D11Device> d3dDevice_;
|
||||
Microsoft::WRL::ComPtr<ID3D11DeviceContext> d3dContext_;
|
||||
winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice winrtDevice_{nullptr};
|
||||
winrt::Windows::Graphics::Capture::GraphicsCaptureItem item_{nullptr};
|
||||
winrt::Windows::Graphics::Capture::Direct3D11CaptureFramePool framePool_{nullptr};
|
||||
winrt::Windows::Graphics::Capture::GraphicsCaptureSession session_{nullptr};
|
||||
winrt::event_token frameArrivedToken_{};
|
||||
FrameCallback frameCallback_;
|
||||
std::mutex callbackMutex_;
|
||||
int width_ = 0;
|
||||
int height_ = 0;
|
||||
int fps_ = 60;
|
||||
bool captureCursor_ = false;
|
||||
bool started_ = false;
|
||||
};
|
||||
@@ -0,0 +1,322 @@
|
||||
import { contextBridge, ipcRenderer } from "electron";
|
||||
import type {
|
||||
AddGuideMarkerInput,
|
||||
CaptureGuidePointerMarkerResult,
|
||||
DiscardGuideSessionInput,
|
||||
ExportGuideInput,
|
||||
FinalizeGuideEventsInput,
|
||||
GenerateGuideDraftInput,
|
||||
GuideMarkerCapturedPayload,
|
||||
RunGuideOcrInput,
|
||||
SaveGuideAiSettingsInput,
|
||||
SaveGuideInput,
|
||||
WriteGuideSnapshotInput,
|
||||
} from "../src/guide/contracts";
|
||||
import type { NativeMacRecordingRequest } from "../src/lib/nativeMacRecording";
|
||||
import type { NativeWindowsRecordingRequest } from "../src/lib/nativeWindowsRecording";
|
||||
import type { RecordingSession, StoreRecordedSessionInput } from "../src/lib/recordingSession";
|
||||
import { NATIVE_BRIDGE_CHANNEL, type NativeBridgeRequest } from "../src/native/contracts";
|
||||
|
||||
// Asset base URL is passed from the main process via webPreferences.additionalArguments
|
||||
// (see windows.ts). Sandboxed preloads cannot import node:path / node:url, so we
|
||||
// can't compute it here.
|
||||
const ASSET_BASE_URL_ARG_PREFIX = "--asset-base-url=";
|
||||
const assetBaseUrlArg = process.argv.find((arg) => arg.startsWith(ASSET_BASE_URL_ARG_PREFIX));
|
||||
const assetBaseUrl = assetBaseUrlArg ? assetBaseUrlArg.slice(ASSET_BASE_URL_ARG_PREFIX.length) : "";
|
||||
|
||||
contextBridge.exposeInMainWorld("electronAPI", {
|
||||
assetBaseUrl,
|
||||
invokeNativeBridge: <TData>(request: NativeBridgeRequest) => {
|
||||
return ipcRenderer.invoke(NATIVE_BRIDGE_CHANNEL, request) as Promise<TData>;
|
||||
},
|
||||
guide: {
|
||||
startSession: (recordingId: string | number) => {
|
||||
return ipcRenderer.invoke("guide:start-session", recordingId);
|
||||
},
|
||||
readSession: (recordingId: string | number) => {
|
||||
return ipcRenderer.invoke("guide:read-session", recordingId);
|
||||
},
|
||||
addMarker: (input: AddGuideMarkerInput) => {
|
||||
return ipcRenderer.invoke("guide:add-marker", input);
|
||||
},
|
||||
capturePointerMarker: () => {
|
||||
return ipcRenderer.invoke("guide:capture-pointer-marker") as Promise<
|
||||
import("../src/guide/contracts").GuideIpcResult<CaptureGuidePointerMarkerResult>
|
||||
>;
|
||||
},
|
||||
onMarkerCaptured: (callback: (payload: GuideMarkerCapturedPayload) => void) => {
|
||||
const listener = (_event: Electron.IpcRendererEvent, payload: GuideMarkerCapturedPayload) => {
|
||||
callback(payload);
|
||||
};
|
||||
ipcRenderer.on("guide:marker-captured", listener);
|
||||
return () => ipcRenderer.removeListener("guide:marker-captured", listener);
|
||||
},
|
||||
finalizeEvents: (input: FinalizeGuideEventsInput) => {
|
||||
return ipcRenderer.invoke("guide:finalize-events", input);
|
||||
},
|
||||
writeSnapshot: (input: WriteGuideSnapshotInput) => {
|
||||
return ipcRenderer.invoke("guide:write-snapshot", input);
|
||||
},
|
||||
runOcr: (input: RunGuideOcrInput) => {
|
||||
return ipcRenderer.invoke("guide:run-ocr", input);
|
||||
},
|
||||
generateDraft: (input: GenerateGuideDraftInput) => {
|
||||
return ipcRenderer.invoke("guide:generate-draft", input);
|
||||
},
|
||||
getAiSettings: () => {
|
||||
return ipcRenderer.invoke("guide:get-ai-settings");
|
||||
},
|
||||
saveAiSettings: (input: SaveGuideAiSettingsInput) => {
|
||||
return ipcRenderer.invoke("guide:save-ai-settings", input);
|
||||
},
|
||||
saveGuide: (input: SaveGuideInput) => {
|
||||
return ipcRenderer.invoke("guide:save-guide", input);
|
||||
},
|
||||
exportMarkdown: (input: ExportGuideInput) => {
|
||||
return ipcRenderer.invoke("guide:export-markdown", input);
|
||||
},
|
||||
exportHtml: (input: ExportGuideInput) => {
|
||||
return ipcRenderer.invoke("guide:export-html", input);
|
||||
},
|
||||
discardSession: (input: DiscardGuideSessionInput) => {
|
||||
return ipcRenderer.invoke("guide:discard-session", input);
|
||||
},
|
||||
},
|
||||
hudOverlayHide: () => {
|
||||
ipcRenderer.send("hud-overlay-hide");
|
||||
},
|
||||
hudOverlayClose: () => {
|
||||
ipcRenderer.send("hud-overlay-close");
|
||||
},
|
||||
setHudOverlayIgnoreMouseEvents: (ignore: boolean) => {
|
||||
ipcRenderer.send("hud-overlay-ignore-mouse-events", ignore);
|
||||
},
|
||||
moveHudOverlayBy: (deltaX: number, deltaY: number) => {
|
||||
ipcRenderer.send("hud-overlay-move-by", deltaX, deltaY);
|
||||
},
|
||||
getSources: async (opts: Electron.SourcesOptions) => {
|
||||
return await ipcRenderer.invoke("get-sources", opts);
|
||||
},
|
||||
switchToEditor: () => {
|
||||
return ipcRenderer.invoke("switch-to-editor");
|
||||
},
|
||||
switchToHud: () => {
|
||||
return ipcRenderer.invoke("switch-to-hud");
|
||||
},
|
||||
startNewRecording: () => {
|
||||
return ipcRenderer.invoke("start-new-recording");
|
||||
},
|
||||
openSourceSelector: () => {
|
||||
return ipcRenderer.invoke("open-source-selector");
|
||||
},
|
||||
selectSource: (source: ProcessedDesktopSource) => {
|
||||
return ipcRenderer.invoke("select-source", source);
|
||||
},
|
||||
getSelectedSource: () => {
|
||||
return ipcRenderer.invoke("get-selected-source");
|
||||
},
|
||||
requestCameraAccess: () => {
|
||||
return ipcRenderer.invoke("request-camera-access");
|
||||
},
|
||||
requestScreenAccess: () => {
|
||||
return ipcRenderer.invoke("request-screen-access");
|
||||
},
|
||||
requestNativeMacCursorAccess: () => {
|
||||
return ipcRenderer.invoke("request-native-mac-cursor-access");
|
||||
},
|
||||
storeRecordedVideo: (videoData: ArrayBuffer, fileName: string) => {
|
||||
return ipcRenderer.invoke("store-recorded-video", videoData, fileName);
|
||||
},
|
||||
storeRecordedSession: (payload: StoreRecordedSessionInput) => {
|
||||
return ipcRenderer.invoke("store-recorded-session", payload);
|
||||
},
|
||||
openRecordingStream: (fileName: string) => {
|
||||
return ipcRenderer.invoke("open-recording-stream", fileName);
|
||||
},
|
||||
appendRecordingChunk: (fileName: string, chunk: ArrayBuffer) => {
|
||||
return ipcRenderer.invoke("append-recording-chunk", fileName, chunk);
|
||||
},
|
||||
closeRecordingStream: (fileName: string) => {
|
||||
return ipcRenderer.invoke("close-recording-stream", fileName);
|
||||
},
|
||||
|
||||
getRecordedVideoPath: () => {
|
||||
return ipcRenderer.invoke("get-recorded-video-path");
|
||||
},
|
||||
setRecordingState: (
|
||||
recording: boolean,
|
||||
recordingId?: number,
|
||||
cursorCaptureMode?: import("../src/lib/recordingSession").CursorCaptureMode,
|
||||
) => {
|
||||
return ipcRenderer.invoke("set-recording-state", recording, recordingId, cursorCaptureMode);
|
||||
},
|
||||
isNativeWindowsCaptureAvailable: () => {
|
||||
return ipcRenderer.invoke("is-native-windows-capture-available");
|
||||
},
|
||||
isNativeMacCaptureAvailable: () => {
|
||||
return ipcRenderer.invoke("is-native-mac-capture-available");
|
||||
},
|
||||
startNativeWindowsRecording: (request: NativeWindowsRecordingRequest) => {
|
||||
return ipcRenderer.invoke("start-native-windows-recording", request);
|
||||
},
|
||||
stopNativeWindowsRecording: (discard?: boolean) => {
|
||||
return ipcRenderer.invoke("stop-native-windows-recording", discard);
|
||||
},
|
||||
pauseNativeWindowsRecording: () => {
|
||||
return ipcRenderer.invoke("pause-native-windows-recording");
|
||||
},
|
||||
resumeNativeWindowsRecording: () => {
|
||||
return ipcRenderer.invoke("resume-native-windows-recording");
|
||||
},
|
||||
startNativeMacRecording: (request: NativeMacRecordingRequest) => {
|
||||
return ipcRenderer.invoke("start-native-mac-recording", request);
|
||||
},
|
||||
pauseNativeMacRecording: () => {
|
||||
return ipcRenderer.invoke("pause-native-mac-recording");
|
||||
},
|
||||
resumeNativeMacRecording: () => {
|
||||
return ipcRenderer.invoke("resume-native-mac-recording");
|
||||
},
|
||||
stopNativeMacRecording: (discard?: boolean) => {
|
||||
return ipcRenderer.invoke("stop-native-mac-recording", discard);
|
||||
},
|
||||
attachNativeMacWebcamRecording: (payload: {
|
||||
screenVideoPath: string;
|
||||
recordingId: number;
|
||||
webcam: { fileName: string; videoData: ArrayBuffer };
|
||||
cursorCaptureMode?: import("../src/lib/recordingSession").CursorCaptureMode;
|
||||
}) => {
|
||||
return ipcRenderer.invoke("attach-native-mac-webcam-recording", payload);
|
||||
},
|
||||
getCursorTelemetry: (videoPath?: string) => {
|
||||
return ipcRenderer.invoke("get-cursor-telemetry", videoPath);
|
||||
},
|
||||
discardCursorTelemetry: (recordingId: number) => {
|
||||
return ipcRenderer.invoke("discard-cursor-telemetry", recordingId);
|
||||
},
|
||||
onStopRecordingFromTray: (callback: () => void) => {
|
||||
const listener = () => callback();
|
||||
ipcRenderer.on("stop-recording-from-tray", listener);
|
||||
return () => ipcRenderer.removeListener("stop-recording-from-tray", listener);
|
||||
},
|
||||
openExternalUrl: (url: string) => {
|
||||
return ipcRenderer.invoke("open-external-url", url);
|
||||
},
|
||||
pickExportSavePath: (fileName: string, exportFolder?: string) => {
|
||||
return ipcRenderer.invoke("pick-export-save-path", fileName, exportFolder);
|
||||
},
|
||||
writeExportToPath: (videoData: ArrayBuffer, filePath: string) => {
|
||||
return ipcRenderer.invoke("write-export-to-path", videoData, filePath);
|
||||
},
|
||||
openVideoFilePicker: () => {
|
||||
return ipcRenderer.invoke("open-video-file-picker");
|
||||
},
|
||||
setCurrentVideoPath: (path: string) => {
|
||||
return ipcRenderer.invoke("set-current-video-path", path);
|
||||
},
|
||||
setCurrentRecordingSession: (session: RecordingSession | null) => {
|
||||
return ipcRenderer.invoke("set-current-recording-session", session);
|
||||
},
|
||||
getCurrentVideoPath: () => {
|
||||
return ipcRenderer.invoke("get-current-video-path");
|
||||
},
|
||||
getCurrentRecordingSession: () => {
|
||||
return ipcRenderer.invoke("get-current-recording-session");
|
||||
},
|
||||
readBinaryFile: (filePath: string) => {
|
||||
return ipcRenderer.invoke("read-binary-file", filePath);
|
||||
},
|
||||
preparePreviewAudioTrack: (filePath: string) => {
|
||||
return ipcRenderer.invoke("prepare-preview-audio-track", filePath);
|
||||
},
|
||||
clearCurrentVideoPath: () => {
|
||||
return ipcRenderer.invoke("clear-current-video-path");
|
||||
},
|
||||
saveProjectFile: (projectData: unknown, suggestedName?: string, existingProjectPath?: string) => {
|
||||
return ipcRenderer.invoke("save-project-file", projectData, suggestedName, existingProjectPath);
|
||||
},
|
||||
loadProjectFile: () => {
|
||||
return ipcRenderer.invoke("load-project-file");
|
||||
},
|
||||
loadCurrentProjectFile: () => {
|
||||
return ipcRenderer.invoke("load-current-project-file");
|
||||
},
|
||||
onMenuLoadProject: (callback: () => void) => {
|
||||
const listener = () => callback();
|
||||
ipcRenderer.on("menu-load-project", listener);
|
||||
return () => ipcRenderer.removeListener("menu-load-project", listener);
|
||||
},
|
||||
onMenuSaveProject: (callback: () => void) => {
|
||||
const listener = () => callback();
|
||||
ipcRenderer.on("menu-save-project", listener);
|
||||
return () => ipcRenderer.removeListener("menu-save-project", listener);
|
||||
},
|
||||
onMenuSaveProjectAs: (callback: () => void) => {
|
||||
const listener = () => callback();
|
||||
ipcRenderer.on("menu-save-project-as", listener);
|
||||
return () => ipcRenderer.removeListener("menu-save-project-as", listener);
|
||||
},
|
||||
getPlatform: () => {
|
||||
return ipcRenderer.invoke("get-platform");
|
||||
},
|
||||
revealInFolder: (filePath: string) => {
|
||||
return ipcRenderer.invoke("reveal-in-folder", filePath);
|
||||
},
|
||||
getShortcuts: () => {
|
||||
return ipcRenderer.invoke("get-shortcuts");
|
||||
},
|
||||
saveShortcuts: (shortcuts: unknown) => {
|
||||
return ipcRenderer.invoke("save-shortcuts", shortcuts);
|
||||
},
|
||||
setLocale: (locale: string) => {
|
||||
return ipcRenderer.invoke("set-locale", locale);
|
||||
},
|
||||
saveDiagnostic: (payload: {
|
||||
error: string;
|
||||
stack?: string;
|
||||
projectState: unknown;
|
||||
logs: string[];
|
||||
}) => {
|
||||
return ipcRenderer.invoke("save-diagnostic", payload);
|
||||
},
|
||||
setMicrophoneExpanded: (expanded: boolean) => {
|
||||
ipcRenderer.send("hud:setMicrophoneExpanded", expanded);
|
||||
},
|
||||
setHasUnsavedChanges: (hasChanges: boolean) => {
|
||||
ipcRenderer.send("set-has-unsaved-changes", hasChanges);
|
||||
},
|
||||
showCountdownOverlay: (value: number, runId: number) => {
|
||||
return ipcRenderer.invoke("countdown-overlay-show", value, runId);
|
||||
},
|
||||
setCountdownOverlayValue: (value: number, runId: number) => {
|
||||
return ipcRenderer.invoke("countdown-overlay-set-value", value, runId);
|
||||
},
|
||||
hideCountdownOverlay: (runId: number) => {
|
||||
return ipcRenderer.invoke("countdown-overlay-hide", runId);
|
||||
},
|
||||
onCountdownOverlayValue: (callback: (value: number | null) => void) => {
|
||||
const listener = (_event: unknown, value: number | null) => callback(value);
|
||||
ipcRenderer.on("countdown-overlay-value", listener);
|
||||
return () => ipcRenderer.removeListener("countdown-overlay-value", listener);
|
||||
},
|
||||
onRequestSaveBeforeClose: (callback: () => Promise<boolean> | boolean) => {
|
||||
const listener = async () => {
|
||||
try {
|
||||
const shouldClose = await callback();
|
||||
ipcRenderer.send("save-before-close-done", shouldClose);
|
||||
} catch {
|
||||
ipcRenderer.send("save-before-close-done", false);
|
||||
}
|
||||
};
|
||||
ipcRenderer.on("request-save-before-close", listener);
|
||||
return () => ipcRenderer.removeListener("request-save-before-close", listener);
|
||||
},
|
||||
onRequestCloseConfirm: (callback: () => void) => {
|
||||
const listener = () => callback();
|
||||
ipcRenderer.on("request-close-confirm", listener);
|
||||
return () => ipcRenderer.removeListener("request-close-confirm", listener);
|
||||
},
|
||||
sendCloseConfirmResponse: (choice: "save" | "discard" | "cancel") => {
|
||||
ipcRenderer.send("close-confirm-response", choice);
|
||||
},
|
||||
});
|
||||
@@ -0,0 +1,97 @@
|
||||
import fs from "node:fs/promises";
|
||||
import { fixParsedWebmDuration } from "@fix-webm-duration/fix";
|
||||
import { WebmFile } from "@fix-webm-duration/parser";
|
||||
|
||||
export type DurationPatchResult =
|
||||
| { patched: true }
|
||||
| { patched: false; reason: "no-section" | "already-valid" | "io-error" | "internal" };
|
||||
|
||||
/**
|
||||
* Patch the WebM Duration header on a finalized recording file.
|
||||
*
|
||||
* Browser MediaRecorder writes WebM with no Duration EBML element. With the
|
||||
* streaming-to-disk path the renderer never holds the blob, so the historical
|
||||
* `fixWebmDuration(blob, durationMs)` call can't run. Patching on disk after
|
||||
* `WriteStream.end()` produces an equivalent result: the editor's seek bar and
|
||||
* timeline read a real duration instead of `N/A`.
|
||||
*
|
||||
* Atomic by design: writes the patched bytes to `<filePath>.duration-patch.tmp`
|
||||
* and renames in place. If the process crashes mid-rewrite, the original file
|
||||
* survives intact, so the user never loses their recording to a partial write.
|
||||
*
|
||||
* Best-effort by intent: any failure (read, parse, write) logs and returns a
|
||||
* non-`patched` result rather than throwing. The file is still playable without
|
||||
* the patch (decoders walk frames sequentially); the only cost is that the
|
||||
* editor's seek bar and timeline break until it is patched.
|
||||
*
|
||||
* Memory: reads the whole file into a main-process Buffer, the same footprint
|
||||
* as the pre-streaming renderer path, just on the side without V8's heap cap.
|
||||
*/
|
||||
export async function patchWebmDurationOnDisk(
|
||||
filePath: string,
|
||||
durationMs: number,
|
||||
): Promise<DurationPatchResult> {
|
||||
try {
|
||||
const fileBytes = await fs.readFile(filePath);
|
||||
const webm = new WebmFile(new Uint8Array(fileBytes));
|
||||
|
||||
const patched = fixParsedWebmDuration(webm, durationMs, { logger: false });
|
||||
if (!patched) {
|
||||
// fixParsedWebmDuration returns false for: missing Segment, missing
|
||||
// Info, or a Duration that is already valid. The first two mean a
|
||||
// malformed (most likely truncated) file; the third is a no-op.
|
||||
const reason = inferUnpatchedReason(webm);
|
||||
if (reason === "no-section") {
|
||||
console.warn(
|
||||
`[webm-duration] no Segment/Info section in ${filePath}; file may be truncated`,
|
||||
);
|
||||
}
|
||||
return { patched: false, reason };
|
||||
}
|
||||
|
||||
if (!webm.source) {
|
||||
console.error(`[webm-duration] patched but source missing for ${filePath}`);
|
||||
return { patched: false, reason: "internal" };
|
||||
}
|
||||
|
||||
const tmpPath = `${filePath}.duration-patch.tmp`;
|
||||
const patchedBytes = Buffer.from(
|
||||
webm.source.buffer,
|
||||
webm.source.byteOffset,
|
||||
webm.source.byteLength,
|
||||
);
|
||||
try {
|
||||
await fs.writeFile(tmpPath, patchedBytes);
|
||||
await fs.rename(tmpPath, filePath);
|
||||
return { patched: true };
|
||||
} catch (writeError) {
|
||||
console.error(`[webm-duration] failed to write patched ${filePath}:`, writeError);
|
||||
// Best-effort cleanup of the temp file; if unlink also fails, leave it.
|
||||
// The original recording is untouched because the rename never ran.
|
||||
await fs.unlink(tmpPath).catch(() => undefined);
|
||||
return { patched: false, reason: "io-error" };
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`[webm-duration] failed to patch ${filePath}:`, error);
|
||||
return { patched: false, reason: "io-error" };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Distinguish "no Segment/Info section" (malformed/truncated file) from "Info
|
||||
* present but Duration already valid" (patch unnecessary).
|
||||
*
|
||||
* The IDs are the length-descriptor-stripped form that @fix-webm-duration/parser
|
||||
* uses as its lookup keys (Segment `0x8538067`, Info `0x549a966`), verified
|
||||
* against the parser's `src/lib/sections.js` — not the canonical 4-byte EBML
|
||||
* IDs (`0x18538067` / `0x1549A966`), which this parser's `getSectionById` would
|
||||
* never match.
|
||||
*/
|
||||
function inferUnpatchedReason(webm: WebmFile): "no-section" | "already-valid" {
|
||||
const segment = webm.getSectionById?.(0x8538067);
|
||||
if (!segment) return "no-section";
|
||||
const info = (
|
||||
segment as unknown as { getSectionById?: (id: number) => unknown }
|
||||
).getSectionById?.(0x549a966);
|
||||
return info ? "already-valid" : "no-section";
|
||||
}
|
||||
@@ -0,0 +1,263 @@
|
||||
import path from "node:path";
|
||||
import { fileURLToPath, pathToFileURL } from "node:url";
|
||||
import { BrowserWindow, ipcMain, screen } from "electron";
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
const APP_ROOT = path.join(__dirname, "..");
|
||||
const VITE_DEV_SERVER_URL = process.env["VITE_DEV_SERVER_URL"];
|
||||
const RENDERER_DIST = path.join(APP_ROOT, "dist");
|
||||
const HEADLESS = process.env["HEADLESS"] === "true";
|
||||
|
||||
// Asset base URL for renderer (wallpapers, etc.). Packaged: extraResources copies
|
||||
// public/wallpapers -> resources/wallpapers. Unpackaged: <appRoot>/public/.
|
||||
const ASSET_BASE_DIR = process.defaultApp
|
||||
? path.join(__dirname, "..", "public")
|
||||
: process.resourcesPath;
|
||||
const ASSET_BASE_URL_ARG = `--asset-base-url=${pathToFileURL(`${ASSET_BASE_DIR}${path.sep}`).toString()}`;
|
||||
|
||||
let hudOverlayWindow: BrowserWindow | null = null;
|
||||
|
||||
ipcMain.on("hud-overlay-hide", () => {
|
||||
if (hudOverlayWindow && !hudOverlayWindow.isDestroyed()) {
|
||||
hudOverlayWindow.minimize();
|
||||
}
|
||||
});
|
||||
|
||||
ipcMain.on("hud-overlay-ignore-mouse-events", (_event, ignore: boolean) => {
|
||||
if (hudOverlayWindow && !hudOverlayWindow.isDestroyed()) {
|
||||
hudOverlayWindow.setIgnoreMouseEvents(ignore, { forward: true });
|
||||
}
|
||||
});
|
||||
|
||||
ipcMain.on("hud-overlay-move-by", (_event, deltaX: number, deltaY: number) => {
|
||||
if (
|
||||
!hudOverlayWindow ||
|
||||
hudOverlayWindow.isDestroyed() ||
|
||||
!Number.isFinite(deltaX) ||
|
||||
!Number.isFinite(deltaY)
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
const [x, y] = hudOverlayWindow.getPosition();
|
||||
hudOverlayWindow.setPosition(Math.round(x + deltaX), Math.round(y + deltaY), false);
|
||||
});
|
||||
|
||||
/**
|
||||
* Creates the always-on-top HUD overlay window centred at the bottom of the
|
||||
* primary display. The window is frameless, transparent, and follows the user
|
||||
* across macOS Spaces so it is never lost when switching virtual desktops.
|
||||
*/
|
||||
export function createHudOverlayWindow(): BrowserWindow {
|
||||
const primaryDisplay = screen.getPrimaryDisplay();
|
||||
const { workArea } = primaryDisplay;
|
||||
|
||||
const windowWidth = 600;
|
||||
const windowHeight = 160;
|
||||
|
||||
const x = Math.floor(workArea.x + (workArea.width - windowWidth) / 2);
|
||||
const y = Math.floor(workArea.y + workArea.height - windowHeight - 5);
|
||||
|
||||
const win = new BrowserWindow({
|
||||
width: windowWidth,
|
||||
height: windowHeight,
|
||||
minWidth: 600,
|
||||
maxWidth: 600,
|
||||
minHeight: 160,
|
||||
maxHeight: 160,
|
||||
x: x,
|
||||
y: y,
|
||||
frame: false,
|
||||
transparent: true,
|
||||
resizable: false,
|
||||
alwaysOnTop: true,
|
||||
skipTaskbar: true,
|
||||
hasShadow: false,
|
||||
show: !HEADLESS,
|
||||
webPreferences: {
|
||||
preload: path.join(__dirname, "preload.mjs"),
|
||||
additionalArguments: [ASSET_BASE_URL_ARG],
|
||||
nodeIntegration: false,
|
||||
contextIsolation: true,
|
||||
backgroundThrottling: false,
|
||||
},
|
||||
});
|
||||
win.setIgnoreMouseEvents(true, { forward: true });
|
||||
|
||||
// Follow the user across macOS Spaces (virtual desktops).
|
||||
// Without this the HUD stays pinned to the Space it was first opened on.
|
||||
if (process.platform === "darwin") {
|
||||
win.setVisibleOnAllWorkspaces(true, { visibleOnFullScreen: true });
|
||||
}
|
||||
|
||||
win.webContents.on("did-finish-load", () => {
|
||||
win?.webContents.send("main-process-message", new Date().toLocaleString());
|
||||
});
|
||||
|
||||
hudOverlayWindow = win;
|
||||
|
||||
win.on("closed", () => {
|
||||
if (hudOverlayWindow === win) {
|
||||
hudOverlayWindow = null;
|
||||
}
|
||||
});
|
||||
|
||||
if (VITE_DEV_SERVER_URL) {
|
||||
win.loadURL(VITE_DEV_SERVER_URL + "?windowType=hud-overlay");
|
||||
} else {
|
||||
win.loadFile(path.join(RENDERER_DIST, "index.html"), {
|
||||
query: { windowType: "hud-overlay" },
|
||||
});
|
||||
}
|
||||
|
||||
return win;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the main editor window. Starts maximised with a hidden title bar on
|
||||
* macOS. This window is not always-on-top and appears in the taskbar/dock.
|
||||
*/
|
||||
export function createEditorWindow(): BrowserWindow {
|
||||
const isMac = process.platform === "darwin";
|
||||
|
||||
const win = new BrowserWindow({
|
||||
width: 1200,
|
||||
height: 800,
|
||||
minWidth: 800,
|
||||
minHeight: 600,
|
||||
...(isMac && {
|
||||
titleBarStyle: "hiddenInset",
|
||||
trafficLightPosition: { x: 12, y: 12 },
|
||||
}),
|
||||
transparent: false,
|
||||
resizable: true,
|
||||
alwaysOnTop: false,
|
||||
skipTaskbar: false,
|
||||
title: "OpenScreen",
|
||||
backgroundColor: "#000000",
|
||||
show: !HEADLESS,
|
||||
webPreferences: {
|
||||
preload: path.join(__dirname, "preload.mjs"),
|
||||
additionalArguments: [ASSET_BASE_URL_ARG],
|
||||
nodeIntegration: false,
|
||||
contextIsolation: true,
|
||||
webSecurity: false,
|
||||
backgroundThrottling: false,
|
||||
},
|
||||
});
|
||||
|
||||
// Maximize the window by default
|
||||
win.maximize();
|
||||
|
||||
win.webContents.on("did-finish-load", () => {
|
||||
win?.webContents.send("main-process-message", new Date().toLocaleString());
|
||||
});
|
||||
|
||||
if (VITE_DEV_SERVER_URL) {
|
||||
win.loadURL(VITE_DEV_SERVER_URL + "?windowType=editor");
|
||||
} else {
|
||||
win.loadFile(path.join(RENDERER_DIST, "index.html"), {
|
||||
query: { windowType: "editor" },
|
||||
});
|
||||
}
|
||||
|
||||
return win;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the floating source-selector window used to pick a screen or window
|
||||
* to record. Frameless, transparent, and follows the user across macOS Spaces.
|
||||
*/
|
||||
export function createSourceSelectorWindow(): BrowserWindow {
|
||||
const { width, height } = screen.getPrimaryDisplay().workAreaSize;
|
||||
|
||||
const win = new BrowserWindow({
|
||||
width: 620,
|
||||
height: 420,
|
||||
minHeight: 350,
|
||||
maxHeight: 500,
|
||||
x: Math.round((width - 620) / 2),
|
||||
y: Math.round((height - 420) / 2),
|
||||
frame: false,
|
||||
resizable: false,
|
||||
alwaysOnTop: true,
|
||||
transparent: true,
|
||||
backgroundColor: "#00000000",
|
||||
webPreferences: {
|
||||
preload: path.join(__dirname, "preload.mjs"),
|
||||
additionalArguments: [ASSET_BASE_URL_ARG],
|
||||
nodeIntegration: false,
|
||||
contextIsolation: true,
|
||||
},
|
||||
});
|
||||
|
||||
// Follow the user across macOS Spaces so the selector appears on the
|
||||
// active desktop regardless of where the HUD was originally opened.
|
||||
if (process.platform === "darwin") {
|
||||
win.setVisibleOnAllWorkspaces(true, { visibleOnFullScreen: true });
|
||||
}
|
||||
|
||||
if (VITE_DEV_SERVER_URL) {
|
||||
win.loadURL(VITE_DEV_SERVER_URL + "?windowType=source-selector");
|
||||
} else {
|
||||
win.loadFile(path.join(RENDERER_DIST, "index.html"), {
|
||||
query: { windowType: "source-selector" },
|
||||
});
|
||||
}
|
||||
|
||||
return win;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a centered transparent countdown overlay window that sits above the
|
||||
* HUD while recording pre-roll is running.
|
||||
*/
|
||||
export function createCountdownOverlayWindow(): BrowserWindow {
|
||||
const { workArea } = screen.getPrimaryDisplay();
|
||||
const overlayWidth = 420;
|
||||
const overlayHeight = 260;
|
||||
|
||||
const win = new BrowserWindow({
|
||||
width: overlayWidth,
|
||||
height: overlayHeight,
|
||||
minWidth: overlayWidth,
|
||||
maxWidth: overlayWidth,
|
||||
minHeight: overlayHeight,
|
||||
maxHeight: overlayHeight,
|
||||
x: Math.round(workArea.x + (workArea.width - overlayWidth) / 2),
|
||||
y: Math.round(workArea.y + (workArea.height - overlayHeight) / 2),
|
||||
frame: false,
|
||||
resizable: false,
|
||||
alwaysOnTop: true,
|
||||
skipTaskbar: true,
|
||||
focusable: false,
|
||||
transparent: true,
|
||||
backgroundColor: "#00000000",
|
||||
hasShadow: false,
|
||||
show: false,
|
||||
webPreferences: {
|
||||
preload: path.join(__dirname, "preload.mjs"),
|
||||
additionalArguments: [ASSET_BASE_URL_ARG],
|
||||
nodeIntegration: false,
|
||||
contextIsolation: true,
|
||||
backgroundThrottling: false,
|
||||
},
|
||||
});
|
||||
|
||||
win.setIgnoreMouseEvents(true);
|
||||
|
||||
if (process.platform === "darwin") {
|
||||
win.setVisibleOnAllWorkspaces(true, { visibleOnFullScreen: true });
|
||||
}
|
||||
|
||||
if (VITE_DEV_SERVER_URL) {
|
||||
win.loadURL(VITE_DEV_SERVER_URL + "?windowType=countdown-overlay");
|
||||
} else {
|
||||
win.loadFile(path.join(RENDERER_DIST, "index.html"), {
|
||||
query: { windowType: "countdown-overlay" },
|
||||
});
|
||||
}
|
||||
|
||||
return win;
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"nodes": {
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1775710090,
|
||||
"narHash": "sha256-ar3rofg+awPB8QXDaFJhJ2jJhu+KqN/PRCXeyuXR76E=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "4c1018dae018162ec878d42fec712642d214fdfa",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-unstable",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"nixpkgs": "nixpkgs"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
||||
@@ -0,0 +1,122 @@
|
||||
{
|
||||
description = "OpenScreen — desktop screen recorder with built-in editor";
|
||||
|
||||
inputs = {
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||
};
|
||||
|
||||
outputs =
|
||||
{ self, nixpkgs }:
|
||||
let
|
||||
systems = [
|
||||
"x86_64-linux"
|
||||
"aarch64-linux"
|
||||
];
|
||||
forAllSystems = f: nixpkgs.lib.genAttrs systems (system: f nixpkgs.legacyPackages.${system});
|
||||
in
|
||||
{
|
||||
# -- Per-system outputs (packages, dev shells) --
|
||||
|
||||
packages = forAllSystems (pkgs: {
|
||||
openscreen = pkgs.callPackage ./nix/package.nix { };
|
||||
default = self.packages.${pkgs.stdenv.hostPlatform.system}.openscreen;
|
||||
});
|
||||
|
||||
devShells = forAllSystems (
|
||||
pkgs:
|
||||
let
|
||||
electron = pkgs.electron;
|
||||
|
||||
# Libraries Electron needs at runtime on Linux
|
||||
runtimeLibs = with pkgs; [
|
||||
# X11
|
||||
libx11
|
||||
libxcomposite
|
||||
libxdamage
|
||||
libxext
|
||||
libxfixes
|
||||
libxrandr
|
||||
libxtst
|
||||
libxcb
|
||||
libxshmfence
|
||||
|
||||
# Wayland
|
||||
wayland
|
||||
|
||||
# GTK / UI toolkit
|
||||
gtk3
|
||||
glib
|
||||
pango
|
||||
cairo
|
||||
gdk-pixbuf
|
||||
atk
|
||||
at-spi2-atk
|
||||
at-spi2-core
|
||||
|
||||
# Graphics
|
||||
mesa
|
||||
libGL
|
||||
libdrm
|
||||
vulkan-loader
|
||||
|
||||
# Networking / crypto (NSS for Chromium)
|
||||
nss
|
||||
nspr
|
||||
|
||||
# Audio
|
||||
alsa-lib
|
||||
pipewire
|
||||
pulseaudio
|
||||
|
||||
# System
|
||||
dbus
|
||||
cups
|
||||
expat
|
||||
libnotify
|
||||
libsecret
|
||||
util-linux # libuuid
|
||||
];
|
||||
in
|
||||
{
|
||||
default = pkgs.mkShell {
|
||||
packages = with pkgs; [
|
||||
nodejs_22
|
||||
electron
|
||||
|
||||
# Native module compilation
|
||||
python3
|
||||
pkg-config
|
||||
gcc
|
||||
|
||||
# Playwright browser tests
|
||||
playwright-driver.browsers
|
||||
];
|
||||
|
||||
# Electron's prebuilt binary needs these at runtime
|
||||
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath runtimeLibs;
|
||||
|
||||
# Tell the npm `electron` package to use the Nix-provided binary
|
||||
# instead of downloading its own. vite-plugin-electron respects this.
|
||||
ELECTRON_OVERRIDE_DIST_PATH = "${electron}/libexec/electron";
|
||||
|
||||
# Playwright browser path for test:browser / test:e2e
|
||||
PLAYWRIGHT_BROWSERS_PATH = "${pkgs.playwright-driver.browsers}";
|
||||
PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD = "1";
|
||||
|
||||
shellHook = ''
|
||||
echo "OpenScreen dev shell — node $(node --version), electron v$(electron --version 2>/dev/null | tr -d 'v')"
|
||||
'';
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
# -- System-wide outputs (modules, overlay) --
|
||||
|
||||
overlays.default = final: _prev: {
|
||||
openscreen = self.packages.${final.stdenv.hostPlatform.system}.openscreen;
|
||||
};
|
||||
|
||||
nixosModules.default = import ./nix/module.nix self;
|
||||
homeManagerModules.default = import ./nix/hm-module.nix self;
|
||||
};
|
||||
}
|
||||
|
After Width: | Height: | Size: 813 KiB |
|
After Width: | Height: | Size: 14 KiB |
|
After Width: | Height: | Size: 630 B |
|
After Width: | Height: | Size: 1.1 KiB |
|
After Width: | Height: | Size: 53 KiB |
|
After Width: | Height: | Size: 1.6 KiB |
|
After Width: | Height: | Size: 2.9 KiB |