From 0b78ff6f7d6a9bb3ee0526d384c21e05742f2407 Mon Sep 17 00:00:00 2001 From: huanld Date: Thu, 28 May 2026 12:25:23 +0700 Subject: [PATCH] Release OpenScreen 1.4.4 --- docs/engineering/paddleocr-local-service.md | 12 +- .../guide/ai/deepseekSettingsStore.test.ts | 66 ++++ electron/guide/ai/deepseekSettingsStore.ts | 70 +++- electron/guide/guideStore.ts | 10 +- electron/guide/ocr/bundledOcrService.ts | 4 +- electron/guide/ocr/paddleOcrClient.test.ts | 39 ++- electron/guide/ocr/paddleOcrClient.ts | 48 ++- electron/ipc/handlers.ts | 40 ++- package-lock.json | 4 +- package.json | 2 +- src/components/launch/SourceSelector.tsx | 24 +- .../video-editor/guide/GuidePanel.tsx | 73 +++- src/guide/contracts.ts | 8 + tools/ocr/paddle_ocr_service.py | 319 +++++++++++++++++- 14 files changed, 671 insertions(+), 48 deletions(-) create mode 100644 electron/guide/ai/deepseekSettingsStore.test.ts diff --git a/docs/engineering/paddleocr-local-service.md b/docs/engineering/paddleocr-local-service.md index a15063b..cb9fd65 100644 --- a/docs/engineering/paddleocr-local-service.md +++ b/docs/engineering/paddleocr-local-service.md @@ -6,7 +6,7 @@ OpenScreen calls OCR through a local HTTP service. The default endpoint is: http://127.0.0.1:8866/ocr ``` -The app sends either `imageBase64` or `path` and expects OCR blocks: +The app sends either `imageBase64` or `path`, plus optional `language` and `profile`, and expects OCR blocks: ```json { @@ -38,7 +38,7 @@ If `paddle` is still missing after installing `paddleocr`, install the CPU Paddl ```powershell .\.venv-ocr\Scripts\Activate.ps1 $env:PADDLEOCR_DEVICE="cpu" -$env:PADDLEOCR_LANG="latin" +$env:OPENSCREEN_OCR_PROFILE="vietnamese" npm run ocr:paddle ``` @@ -58,7 +58,8 @@ Expected healthy environment: "paddleocrInstalled": true, "paddleInstalled": true, "engineReady": false, - "defaultLanguage": "latin" + "defaultLanguage": "vi,en", + "defaultProfile": "vietnamese" } ``` @@ -67,7 +68,10 @@ Expected healthy environment: ## Configuration - `PADDLEOCR_DEVICE`: `cpu`, `gpu:0`, or another PaddleOCR device string. -- `PADDLEOCR_LANG`: defaults to `latin`; this is preferred for Vietnamese UI text because it uses a Latin-script recognition model. +- `OPENSCREEN_OCR_PROFILE`: `fast`, `vietnamese`, or `hybrid`. The default `vietnamese` profile upscales and sharpens focused UI screenshots before OCR. +- `OPENSCREEN_GUIDE_OCR_LANGUAGE`: defaults to `vi,en`. +- `PADDLEOCR_LANG`: optional hard override. Leave unset for the app profile/language settings to work. - `PADDLEOCR_VERSION`: defaults to `PP-OCRv5`. - `PADDLEOCR_USE_MOBILE`: defaults to `1`; set to `0` to use the default/server models. +- `PADDLEOCR_REC_MODEL`: optional recognizer model override. The bundled profile uses `latin_PP-OCRv5_mobile_rec`, which supports Vietnamese Latin-script text. - `OPENSCREEN_GUIDE_OCR_URL`: OpenScreen OCR endpoint override; defaults to `http://127.0.0.1:8866`. diff --git a/electron/guide/ai/deepseekSettingsStore.test.ts b/electron/guide/ai/deepseekSettingsStore.test.ts new file mode 100644 index 0000000..37bbedd --- /dev/null +++ b/electron/guide/ai/deepseekSettingsStore.test.ts @@ -0,0 +1,66 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { DeepSeekSettingsStore } from "./deepseekSettingsStore"; + +const tempDirs: string[] = []; +const originalOcrProfile = process.env.OPENSCREEN_GUIDE_OCR_PROFILE; +const originalOcrLanguage = process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE; + +beforeEach(() => { + delete process.env.OPENSCREEN_GUIDE_OCR_PROFILE; + delete process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE; +}); + +afterEach(async () => { + restoreEnv("OPENSCREEN_GUIDE_OCR_PROFILE", originalOcrProfile); + restoreEnv("OPENSCREEN_GUIDE_OCR_LANGUAGE", originalOcrLanguage); + await Promise.all(tempDirs.splice(0).map((dir) => fs.rm(dir, { recursive: true, force: true }))); +}); + +function restoreEnv(name: string, value: string | undefined): void { + if (value === undefined) { + delete process.env[name]; + return; + } + process.env[name] = value; +} + +async function createStore(): Promise { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openscreen-guide-settings-")); + tempDirs.push(dir); + return new DeepSeekSettingsStore(path.join(dir, "guide-ai-settings.json")); +} + +describe("DeepSeekSettingsStore OCR settings", () => { + it("defaults to the Vietnamese enhanced OCR profile", async () => { + const store = await createStore(); + + await expect(store.getOcrConfig()).resolves.toEqual({ + profile: "vietnamese", + language: "vi,en", + }); + }); + + it("persists OCR profile changes alongside DeepSeek settings", async () => { + const store = await createStore(); + + const status = await store.save({ + deepseekApiKeyEnvName: "DEEPSEEK_API_KEY", + baseUrl: "https://api.deepseek.com", + model: "deepseek-chat", + ocrProfile: "hybrid", + ocrLanguage: "vi,en", + }); + + expect(status.ocr).toMatchObject({ + profile: "hybrid", + language: "vi,en", + }); + await expect(store.getOcrConfig()).resolves.toEqual({ + profile: "hybrid", + language: "vi,en", + }); + }); +}); diff --git a/electron/guide/ai/deepseekSettingsStore.ts b/electron/guide/ai/deepseekSettingsStore.ts index 16673fe..0ea5753 100644 --- a/electron/guide/ai/deepseekSettingsStore.ts +++ b/electron/guide/ai/deepseekSettingsStore.ts @@ -1,6 +1,10 @@ import fs from "node:fs/promises"; import path from "node:path"; -import type { GuideAiSettings, SaveGuideAiSettingsInput } from "../../../src/guide/contracts"; +import type { + GuideAiSettings, + GuideOcrProfile, + SaveGuideAiSettingsInput, +} from "../../../src/guide/contracts"; export interface DeepSeekGuideConfig { apiKey?: string; @@ -12,8 +16,22 @@ export interface DeepSeekGuideConfigProvider { getDeepSeekConfig(): Promise; } +export interface GuideOcrConfig { + profile: GuideOcrProfile; + language: string; +} + +export interface GuideOcrConfigProvider { + getOcrConfig(): Promise; +} + interface PersistedGuideAiSettings { schemaVersion: 1; + ocr?: { + profile?: GuideOcrProfile; + language?: string; + updatedAt?: string; + }; deepseek?: { apiKeyEnvName?: string; baseUrl?: string; @@ -25,8 +43,10 @@ interface PersistedGuideAiSettings { const DEFAULT_DEEPSEEK_API_KEY_ENV_NAME = "DEEPSEEK_API_KEY"; const DEFAULT_DEEPSEEK_BASE_URL = "https://api.deepseek.com"; const DEFAULT_DEEPSEEK_MODEL = "deepseek-chat"; +const DEFAULT_OCR_PROFILE: GuideOcrProfile = "vietnamese"; +const DEFAULT_OCR_LANGUAGE = "vi,en"; -export class DeepSeekSettingsStore implements DeepSeekGuideConfigProvider { +export class DeepSeekSettingsStore implements DeepSeekGuideConfigProvider, GuideOcrConfigProvider { constructor(private readonly filePath: string) {} async getStatus(): Promise { @@ -35,6 +55,13 @@ export class DeepSeekSettingsStore implements DeepSeekGuideConfigProvider { const activeApiKey = process.env[apiKeyEnvName]; return { + ocr: { + profile: normalizeOcrProfile(raw?.ocr?.profile ?? process.env.OPENSCREEN_GUIDE_OCR_PROFILE), + language: normalizeOcrLanguage( + raw?.ocr?.language ?? process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE, + ), + updatedAt: raw?.ocr?.updatedAt, + }, deepseek: { hasApiKey: Boolean(activeApiKey), apiKeyEnvName, @@ -49,7 +76,14 @@ export class DeepSeekSettingsStore implements DeepSeekGuideConfigProvider { async save(input: SaveGuideAiSettingsInput): Promise { const current = (await this.readSettings()) ?? { schemaVersion: 1 }; + const currentOcr = current.ocr ?? {}; const currentDeepSeek = current.deepseek ?? {}; + const nextOcr = { + ...currentOcr, + profile: normalizeOcrProfile(input.ocrProfile ?? currentOcr.profile), + language: normalizeOcrLanguage(input.ocrLanguage ?? currentOcr.language), + updatedAt: new Date().toISOString(), + }; const nextDeepSeek = { ...currentDeepSeek, baseUrl: normalizeBaseUrl(input.baseUrl ?? currentDeepSeek.baseUrl), @@ -65,6 +99,7 @@ export class DeepSeekSettingsStore implements DeepSeekGuideConfigProvider { await this.writeSettings({ schemaVersion: 1, + ocr: nextOcr, deepseek: nextDeepSeek, }); return await this.getStatus(); @@ -80,6 +115,16 @@ export class DeepSeekSettingsStore implements DeepSeekGuideConfigProvider { }; } + async getOcrConfig(): Promise { + const raw = await this.readSettings(); + return { + profile: normalizeOcrProfile(raw?.ocr?.profile ?? process.env.OPENSCREEN_GUIDE_OCR_PROFILE), + language: normalizeOcrLanguage( + raw?.ocr?.language ?? process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE, + ), + }; + } + private async readSettings(): Promise { try { const content = await fs.readFile(this.filePath, "utf-8"); @@ -120,6 +165,11 @@ function normalizePersistedSettings(input: unknown): PersistedGuideAiSettings | } return { schemaVersion: 1, + ocr: { + profile: normalizeOcrProfile(raw.ocr?.profile), + language: normalizeOcrLanguage(raw.ocr?.language), + updatedAt: raw.ocr?.updatedAt, + }, deepseek: { apiKeyEnvName: normalizeEnvName(raw.deepseek?.apiKeyEnvName), baseUrl: raw.deepseek?.baseUrl, @@ -155,3 +205,19 @@ function normalizeBaseUrl(value: string | undefined): string { function normalizeModel(value: string | undefined): string { return value?.trim() || DEFAULT_DEEPSEEK_MODEL; } + +function normalizeOcrProfile(value: string | undefined): GuideOcrProfile { + if (value === "fast" || value === "vietnamese" || value === "hybrid") { + return value; + } + return DEFAULT_OCR_PROFILE; +} + +function normalizeOcrLanguage(value: string | undefined): string { + const normalized = value + ?.split(",") + .map((part) => part.trim().toLowerCase()) + .filter(Boolean) + .join(","); + return normalized || DEFAULT_OCR_LANGUAGE; +} diff --git a/electron/guide/guideStore.ts b/electron/guide/guideStore.ts index b2d8725..57f9709 100644 --- a/electron/guide/guideStore.ts +++ b/electron/guide/guideStore.ts @@ -34,7 +34,10 @@ import { DeepSeekGuideClientError, type GuideDraftClient, } from "./ai/deepseekGuideClient"; -import type { DeepSeekGuideConfigProvider } from "./ai/deepseekSettingsStore"; +import type { + DeepSeekGuideConfigProvider, + GuideOcrConfigProvider, +} from "./ai/deepseekSettingsStore"; import { type GuidePaths, normalizeGuideRecordingId, resolveGuidePaths } from "./guidePaths"; import { createFocusedOcrSnapshot, remapFocusedOcrBlocks } from "./ocr/focusedOcrSnapshot"; import { DefaultGuideOcrClient, type GuideOcrClient } from "./ocr/paddleOcrClient"; @@ -70,6 +73,7 @@ export interface GuideStoreDependencies { ocrClient?: GuideOcrClient; draftClient?: GuideDraftClient; deepSeekConfigProvider?: DeepSeekGuideConfigProvider; + ocrConfigProvider?: GuideOcrConfigProvider; focusOcrSnapshots?: boolean; } @@ -255,7 +259,9 @@ export class GuideStore { throw new GuideStoreError("guide-invalid-input", "No guide snapshots are available for OCR."); } - const ocrClient = this.dependencies.ocrClient ?? new DefaultGuideOcrClient(); + const ocrClient = + this.dependencies.ocrClient ?? + DefaultGuideOcrClient.fromConfig(await this.dependencies.ocrConfigProvider?.getOcrConfig()); const shouldFocusOcrSnapshots = this.dependencies.focusOcrSnapshots ?? this.dependencies.ocrClient === undefined; const eventsById = new Map(session.events.map((event) => [event.id, event])); diff --git a/electron/guide/ocr/bundledOcrService.ts b/electron/guide/ocr/bundledOcrService.ts index ccb936f..d278b81 100644 --- a/electron/guide/ocr/bundledOcrService.ts +++ b/electron/guide/ocr/bundledOcrService.ts @@ -156,8 +156,10 @@ function startOcrServiceProcess( OPENSCREEN_OCR_PORT: DEFAULT_OCR_PORT, PADDLEOCR_DEVICE: process.env.PADDLEOCR_DEVICE ?? "cpu", PADDLEOCR_ENABLE_MKLDNN: process.env.PADDLEOCR_ENABLE_MKLDNN ?? "0", - PADDLEOCR_LANG: process.env.PADDLEOCR_LANG ?? "latin", + PADDLEOCR_LANG: process.env.PADDLEOCR_LANG ?? "", PADDLEOCR_USE_MOBILE: process.env.PADDLEOCR_USE_MOBILE ?? "1", + OPENSCREEN_OCR_PROFILE: + process.env.OPENSCREEN_OCR_PROFILE ?? process.env.OPENSCREEN_GUIDE_OCR_PROFILE ?? "", PADDLE_PDX_ENABLE_MKLDNN_BYDEFAULT: process.env.PADDLE_PDX_ENABLE_MKLDNN_BYDEFAULT ?? "False", PADDLE_PDX_CACHE_HOME: process.env.PADDLE_PDX_CACHE_HOME ?? runtimePaths.paddlexCachePath, PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK: diff --git a/electron/guide/ocr/paddleOcrClient.test.ts b/electron/guide/ocr/paddleOcrClient.test.ts index 38c0f79..f7f8b51 100644 --- a/electron/guide/ocr/paddleOcrClient.test.ts +++ b/electron/guide/ocr/paddleOcrClient.test.ts @@ -1,8 +1,12 @@ -import { describe, expect, it } from "vitest"; +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, describe, expect, it, vi } from "vitest"; import type { GuideSnapshot, OcrBlock } from "../../../src/guide/contracts"; import { DefaultGuideOcrClient, normalizeOcrResponse, + PaddleOcrHttpClient, parseWindowsOcrPayload, } from "./paddleOcrClient"; @@ -16,6 +20,10 @@ const snapshot: GuideSnapshot = { height: 800, }; +afterEach(() => { + vi.unstubAllGlobals(); +}); + describe("normalizeOcrResponse", () => { it("normalizes pixel boxes into guide OCR blocks", () => { const blocks = normalizeOcrResponse( @@ -67,6 +75,35 @@ describe("normalizeOcrResponse", () => { }); }); +describe("PaddleOcrHttpClient", () => { + it("sends the selected OCR profile to the local service", async () => { + const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openscreen-ocr-client-")); + const imagePath = path.join(tempDir, "step.png"); + await fs.writeFile(imagePath, Buffer.from([137, 80, 78, 71])); + const requests: unknown[] = []; + vi.stubGlobal( + "fetch", + vi.fn(async (_url: string, init?: RequestInit) => { + requests.push(JSON.parse(String(init?.body ?? "{}"))); + return new Response(JSON.stringify({ blocks: [] }), { + status: 200, + headers: { "content-type": "application/json" }, + }); + }), + ); + + const client = new PaddleOcrHttpClient("https://ocr.example.test", "vi,en", "hybrid"); + await client.recognize({ ...snapshot, path: imagePath }); + + expect(requests[0]).toMatchObject({ + language: "vi,en", + profile: "hybrid", + path: imagePath, + }); + await fs.rm(tempDir, { recursive: true, force: true }); + }); +}); + describe("DefaultGuideOcrClient", () => { it("falls back when the HTTP OCR service is unavailable", async () => { const fallbackBlock: OcrBlock = { diff --git a/electron/guide/ocr/paddleOcrClient.ts b/electron/guide/ocr/paddleOcrClient.ts index 2c63b3d..0def50d 100644 --- a/electron/guide/ocr/paddleOcrClient.ts +++ b/electron/guide/ocr/paddleOcrClient.ts @@ -1,7 +1,7 @@ import { execFile } from "node:child_process"; import fs from "node:fs/promises"; import { promisify } from "node:util"; -import type { GuideSnapshot, OcrBlock } from "../../../src/guide/contracts"; +import type { GuideOcrProfile, GuideSnapshot, OcrBlock } from "../../../src/guide/contracts"; import { ensureBundledOcrServiceRunning } from "./bundledOcrService"; const execFileAsync = promisify(execFile); @@ -10,6 +10,11 @@ export interface GuideOcrClient { recognize(snapshot: GuideSnapshot): Promise; } +export interface GuideOcrClientConfig { + profile: GuideOcrProfile; + language: string; +} + interface PaddleOcrResponseBlock { text?: unknown; confidence?: unknown; @@ -21,7 +26,8 @@ interface PaddleOcrResponseBlock { export class PaddleOcrHttpClient implements GuideOcrClient { constructor( private readonly baseUrl = process.env.OPENSCREEN_GUIDE_OCR_URL ?? "http://127.0.0.1:8866", - private readonly language = process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE ?? "vi,en", + private readonly language = normalizeOcrLanguage(process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE), + private readonly profile = normalizeOcrProfile(process.env.OPENSCREEN_GUIDE_OCR_PROFILE), ) {} async recognize(snapshot: GuideSnapshot): Promise { @@ -36,6 +42,7 @@ export class PaddleOcrHttpClient implements GuideOcrClient { imageBase64, path: snapshot.path, language: this.language, + profile: this.profile, }), }); } catch (error) { @@ -54,7 +61,9 @@ export class PaddleOcrHttpClient implements GuideOcrClient { } export class WindowsOcrClient implements GuideOcrClient { - constructor(private readonly language = process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE ?? "vi,en") {} + constructor( + private readonly language = normalizeOcrLanguage(process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE), + ) {} async recognize(snapshot: GuideSnapshot): Promise { if (process.platform !== "win32") { @@ -96,6 +105,14 @@ export class WindowsOcrClient implements GuideOcrClient { } export class DefaultGuideOcrClient implements GuideOcrClient { + static fromConfig(config?: Partial): DefaultGuideOcrClient { + const normalizedConfig = normalizeOcrClientConfig(config); + return new DefaultGuideOcrClient( + new PaddleOcrHttpClient(undefined, normalizedConfig.language, normalizedConfig.profile), + new WindowsOcrClient(normalizedConfig.language), + ); + } + constructor( private readonly httpClient = new PaddleOcrHttpClient(), private readonly windowsClient = new WindowsOcrClient(), @@ -119,6 +136,31 @@ export class DefaultGuideOcrClient implements GuideOcrClient { } } +function normalizeOcrClientConfig( + config: Partial | undefined, +): GuideOcrClientConfig { + return { + profile: normalizeOcrProfile(config?.profile ?? process.env.OPENSCREEN_GUIDE_OCR_PROFILE), + language: normalizeOcrLanguage(config?.language ?? process.env.OPENSCREEN_GUIDE_OCR_LANGUAGE), + }; +} + +function normalizeOcrProfile(value: string | undefined): GuideOcrProfile { + if (value === "fast" || value === "vietnamese" || value === "hybrid") { + return value; + } + return "vietnamese"; +} + +function normalizeOcrLanguage(value: string | undefined): string { + const normalized = value + ?.split(",") + .map((part) => part.trim().toLowerCase()) + .filter(Boolean) + .join(","); + return normalized || "vi,en"; +} + export function parseWindowsOcrPayload(stdout: string): unknown { const normalized = stdout.replace(/^\uFEFF/, "").trim(); try { diff --git a/electron/ipc/handlers.ts b/electron/ipc/handlers.ts index 551109c..4fad32a 100644 --- a/electron/ipc/handlers.ts +++ b/electron/ipc/handlers.ts @@ -1732,7 +1732,7 @@ export function registerIpcHandlers( const sources = await desktopCapturer.getSources(opts); lastEnumeratedSources = new Map(sources.map((source) => [source.id, source])); let screenSourceIndex = 0; - return sources.map((source) => { + const processedSources = sources.map((source) => { const isScreenSource = source.id.startsWith("screen:"); const sourceIndex = isScreenSource ? (parseDesktopCapturerScreenIndex(source.id) ?? screenSourceIndex) @@ -1760,6 +1760,43 @@ export function registerIpcHandlers( bounds, }; }); + const screenDisplays = screen.getAllDisplays(); + const mappedDisplayIds = new Set( + processedSources + .filter((source) => source.id.startsWith("screen:") && typeof source.displayId === "number") + .map((source) => source.displayId), + ); + const fallbackScreenSources = screenDisplays + .map((display, displayIndex) => ({ display, displayIndex })) + .filter(({ display }) => !mappedDisplayIds.has(display.id)) + .map(({ display, displayIndex }) => { + const bounds = toSourceBounds(display.bounds); + return { + id: `screen:${displayIndex}:fallback:${display.id}`, + name: `Screen ${displayIndex + 1}`, + display_id: String(display.id), + thumbnail: null, + appIcon: null, + displayId: display.id, + displayIndex, + screenIndex: displayIndex, + displayLabel: `Display ${displayIndex + 1} - ${bounds.width}x${bounds.height} @ ${bounds.x},${bounds.y}`, + bounds, + }; + }); + if (fallbackScreenSources.length > 0) { + console.warn("[desktop-capturer] added fallback display sources", { + capturerScreens: processedSources.filter((source) => source.id.startsWith("screen:")) + .length, + electronDisplays: screenDisplays.length, + fallbackScreens: fallbackScreenSources.map((source) => ({ + id: source.id, + displayId: source.displayId, + bounds: source.bounds, + })), + }); + } + return [...processedSources, ...fallbackScreenSources]; }); ipcMain.handle("select-source", async (_, source: SelectedSource) => { @@ -2637,6 +2674,7 @@ export function registerIpcHandlers( ); const guideStore = new GuideStore(RECORDINGS_DIR, { deepSeekConfigProvider: guideAiSettingsStore, + ocrConfigProvider: guideAiSettingsStore, }); registerGuideMarkerHotkey(guideStore); registerGuideIpcHandlers(ipcMain, guideStore, guideAiSettingsStore, { diff --git a/package-lock.json b/package-lock.json index 749fe7b..6879eee 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "openscreen", - "version": "1.4.2", + "version": "1.4.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "openscreen", - "version": "1.4.2", + "version": "1.4.4", "dependencies": { "@fix-webm-duration/fix": "^1.0.1", "@pixi/filter-drop-shadow": "^5.2.0", diff --git a/package.json b/package.json index 2c15f2d..c22bd2c 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "openscreen", "private": true, - "version": "1.4.2", + "version": "1.4.4", "type": "module", "packageManager": "npm@10.9.4", "engines": { diff --git a/src/components/launch/SourceSelector.tsx b/src/components/launch/SourceSelector.tsx index 495577c..86ee8e8 100644 --- a/src/components/launch/SourceSelector.tsx +++ b/src/components/launch/SourceSelector.tsx @@ -65,7 +65,13 @@ export function SourceSelector() { fetchSources(); }, []); - const screenSources = sources.filter((s) => s.id.startsWith("screen:")); + const screenSources = sources + .filter((s) => s.id.startsWith("screen:")) + .sort( + (left, right) => + (left.displayIndex ?? left.screenIndex ?? Number.MAX_SAFE_INTEGER) - + (right.displayIndex ?? right.screenIndex ?? Number.MAX_SAFE_INTEGER), + ); const windowSources = sources.filter((s) => s.id.startsWith("window:")); const handleSourceSelect = (source: DesktopSource) => setSelectedSource(source); @@ -96,11 +102,17 @@ export function SourceSelector() { onClick={() => handleSourceSelect(source)} >
- {source.name} + {source.thumbnail ? ( + {source.name} + ) : ( +
+ {source.displayLabel ?? source.name} +
+ )} {isSelected && (
diff --git a/src/components/video-editor/guide/GuidePanel.tsx b/src/components/video-editor/guide/GuidePanel.tsx index 7ef6bb7..ee9507f 100644 --- a/src/components/video-editor/guide/GuidePanel.tsx +++ b/src/components/video-editor/guide/GuidePanel.tsx @@ -7,6 +7,7 @@ import type { GuideAiProvider, GuideAiSettings, GuideLanguage, + GuideOcrProfile, GuideSession, } from "@/guide/contracts"; import { captureGuideSnapshots } from "@/guide/snapshot/extractGuideSnapshots"; @@ -42,13 +43,19 @@ const COPY = { captureStep: "Capture step", captureLabel: "Manual capture", settings: "Settings", + guideSettings: "Guide settings", apiKey: "API key env", apiKeyPlaceholder: "DEEPSEEK_API_KEY", baseUrl: "Base URL", model: "Model", + ocrProfile: "OCR profile", + ocrLanguage: "OCR languages", + ocrFast: "Fast Latin", + ocrVietnamese: "Vietnamese Enhanced", + ocrHybrid: "Hybrid Vi + Latin", saveSettings: "Save", clearKey: "Reset env", - keySaved: "DeepSeek settings saved.", + settingsSaved: "Guide settings saved.", keyMissing: "Set a DeepSeek API key environment variable before generating with DeepSeek.", keyConfigured: "Env ready", keyNotConfigured: "Env value missing", @@ -78,13 +85,19 @@ const COPY = { captureStep: "Chụp bước", captureLabel: "Chụp thủ công", settings: "Cài đặt", + guideSettings: "Guide settings", apiKey: "API key env", apiKeyPlaceholder: "DEEPSEEK_API_KEY", baseUrl: "Base URL", model: "Model", + ocrProfile: "OCR profile", + ocrLanguage: "OCR languages", + ocrFast: "Fast Latin", + ocrVietnamese: "Vietnamese Enhanced", + ocrHybrid: "Hybrid Vi + Latin", saveSettings: "Lưu", clearKey: "Reset env", - keySaved: "Đã lưu cài đặt DeepSeek.", + settingsSaved: "Da luu cai dat guide.", keyMissing: "Hãy set biến môi trường DeepSeek API key trước khi tạo draft bằng DeepSeek.", keyConfigured: "Env ready", keyNotConfigured: "Chưa thấy giá trị env", @@ -108,6 +121,8 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan const [deepSeekApiKeyEnvName, setDeepSeekApiKeyEnvName] = useState("DEEPSEEK_API_KEY"); const [deepSeekBaseUrl, setDeepSeekBaseUrl] = useState("https://api.deepseek.com"); const [deepSeekModel, setDeepSeekModel] = useState("deepseek-chat"); + const [ocrProfile, setOcrProfile] = useState("vietnamese"); + const [ocrLanguage, setOcrLanguage] = useState("vi,en"); const [message, setMessage] = useState(null); const isBusy = busyAction !== null; @@ -138,6 +153,8 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan setDeepSeekBaseUrl(result.data.deepseek.baseUrl); setDeepSeekModel(result.data.deepseek.model); setDeepSeekApiKeyEnvName(result.data.deepseek.apiKeyEnvName); + setOcrProfile(result.data.ocr.profile); + setOcrLanguage(result.data.ocr.language); }, []); useEffect(() => { @@ -269,6 +286,8 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan deepseekApiKeyEnvName: deepSeekApiKeyEnvName, baseUrl: deepSeekBaseUrl, model: deepSeekModel, + ocrProfile, + ocrLanguage, }); if (!result.success) { throw new Error(result.error); @@ -277,7 +296,9 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan setDeepSeekApiKeyEnvName(result.data.deepseek.apiKeyEnvName); setDeepSeekBaseUrl(result.data.deepseek.baseUrl); setDeepSeekModel(result.data.deepseek.model); - toast.success(copy.keySaved); + setOcrProfile(result.data.ocr.profile); + setOcrLanguage(result.data.ocr.language); + toast.success(copy.settingsSaved); } catch (error) { const text = error instanceof Error ? error.message : String(error); setMessage(text); @@ -285,7 +306,14 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan } finally { setSettingsBusy(false); } - }, [copy.keySaved, deepSeekApiKeyEnvName, deepSeekBaseUrl, deepSeekModel]); + }, [ + copy.settingsSaved, + deepSeekApiKeyEnvName, + deepSeekBaseUrl, + deepSeekModel, + ocrLanguage, + ocrProfile, + ]); const handleClearDeepSeekKey = useCallback(async () => { if (!window.electronAPI?.guide?.saveAiSettings) { @@ -298,13 +326,17 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan clearDeepseekApiKeyEnvName: true, baseUrl: deepSeekBaseUrl, model: deepSeekModel, + ocrProfile, + ocrLanguage, }); if (!result.success) { throw new Error(result.error); } setAiSettings(result.data); setDeepSeekApiKeyEnvName(result.data.deepseek.apiKeyEnvName); - toast.success(copy.keySaved); + setOcrProfile(result.data.ocr.profile); + setOcrLanguage(result.data.ocr.language); + toast.success(copy.settingsSaved); } catch (error) { const text = error instanceof Error ? error.message : String(error); setMessage(text); @@ -312,7 +344,7 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan } finally { setSettingsBusy(false); } - }, [copy.keySaved, deepSeekBaseUrl, deepSeekModel]); + }, [copy.settingsSaved, deepSeekBaseUrl, deepSeekModel, ocrLanguage, ocrProfile]); const handleGenerateGuide = useCallback(() => { void runAction("generate", async () => { @@ -455,7 +487,7 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
- {copy.deepseek} {copy.settings} + {copy.guideSettings}
{aiSettings?.deepseek.hasApiKey @@ -470,6 +502,33 @@ export function GuidePanel({ recordingId, videoPath, videoSourcePath }: GuidePan
+
+ + +
+