Files
openscreen/scripts/build-windows-ocr-service.mjs
2026-05-28 07:07:30 +07:00

164 lines
4.0 KiB
JavaScript

import { execFileSync } from "node:child_process";
import fs from "node:fs";
import path from "node:path";
import process from "node:process";
import { fileURLToPath } from "node:url";
const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
const OCR_DIR = path.join(ROOT, "tools", "ocr");
const VENV_DIR = path.join(ROOT, ".venv-ocr-build");
const VENV_PYTHON = path.join(VENV_DIR, "Scripts", "python.exe");
const DIST_DIR = path.join(OCR_DIR, "dist");
const WORK_DIR = path.join(OCR_DIR, "build");
const MODEL_CACHE_DIR = path.join(OCR_DIR, "models", "paddlex");
const ENTRYPOINT = path.join(OCR_DIR, "openscreen_ocr_service_entry.py");
const OUTPUT_DIR = path.join(DIST_DIR, "openscreen-ocr-service");
const OUTPUT_EXE = path.join(OUTPUT_DIR, "openscreen-ocr-service.exe");
const REQUIRED_MODEL_NAMES = ["PP-OCRv5_mobile_det", "latin_PP-OCRv5_mobile_rec"];
if (process.platform !== "win32") {
console.log("Skipping Windows OCR service build on non-Windows host.");
process.exit(0);
}
function run(command, args, options = {}) {
console.log(`> ${command} ${args.join(" ")}`);
execFileSync(command, args, {
cwd: ROOT,
stdio: "inherit",
...options,
});
}
function ensureVenv() {
if (fs.existsSync(VENV_PYTHON)) {
return;
}
run(process.env.PYTHON ?? "python", ["-m", "venv", VENV_DIR]);
}
function installDependencies() {
run(VENV_PYTHON, ["-m", "pip", "install", "--upgrade", "pip"]);
run(VENV_PYTHON, ["-m", "pip", "install", "-r", path.join(OCR_DIR, "requirements.txt")]);
run(VENV_PYTHON, ["-m", "pip", "install", "pyinstaller>=6.0"]);
}
function prepareModelCache() {
const officialModelsDir = path.join(MODEL_CACHE_DIR, "official_models");
const hasRequiredModels = REQUIRED_MODEL_NAMES.every((modelName) =>
fs.existsSync(path.join(officialModelsDir, modelName)),
);
if (hasRequiredModels) {
return;
}
fs.mkdirSync(officialModelsDir, { recursive: true });
run(
VENV_PYTHON,
[
"-c",
[
"import sys",
`sys.path.insert(0, ${JSON.stringify(OCR_DIR)})`,
"from paddle_ocr_service import _create_engine",
"_create_engine('latin')",
].join("; "),
],
{
env: {
...process.env,
PADDLE_PDX_CACHE_HOME: MODEL_CACHE_DIR,
PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK: "True",
PADDLE_PDX_ENABLE_MKLDNN_BYDEFAULT: "False",
PADDLEOCR_DEVICE: "cpu",
PADDLEOCR_ENABLE_MKLDNN: "0",
PADDLEOCR_LANG: "latin",
PADDLEOCR_USE_MOBILE: "1",
PYTHONUTF8: "1",
},
},
);
}
function buildService() {
fs.rmSync(OUTPUT_DIR, { recursive: true, force: true });
fs.mkdirSync(DIST_DIR, { recursive: true });
fs.mkdirSync(WORK_DIR, { recursive: true });
run(VENV_PYTHON, [
"-m",
"PyInstaller",
"--noconfirm",
"--clean",
"--onedir",
"--name",
"openscreen-ocr-service",
"--distpath",
DIST_DIR,
"--workpath",
WORK_DIR,
"--specpath",
WORK_DIR,
"--paths",
OCR_DIR,
"--collect-all",
"paddleocr",
"--collect-all",
"paddle",
"--collect-all",
"paddlex",
"--collect-all",
"cv2",
"--collect-all",
"shapely",
"--collect-all",
"pyclipper",
"--collect-all",
"pypdfium2",
"--collect-all",
"bidi",
"--copy-metadata",
"paddleocr",
"--copy-metadata",
"paddlex",
"--copy-metadata",
"paddlepaddle",
"--copy-metadata",
"opencv-contrib-python",
"--copy-metadata",
"shapely",
"--copy-metadata",
"pyclipper",
"--copy-metadata",
"pypdfium2",
"--copy-metadata",
"python-bidi",
"--hidden-import",
"uvicorn.logging",
"--hidden-import",
"uvicorn.loops",
"--hidden-import",
"uvicorn.loops.auto",
"--hidden-import",
"uvicorn.protocols",
"--hidden-import",
"uvicorn.protocols.http",
"--hidden-import",
"uvicorn.protocols.http.auto",
"--hidden-import",
"uvicorn.lifespan",
"--hidden-import",
"uvicorn.lifespan.on",
ENTRYPOINT,
]);
if (!fs.existsSync(OUTPUT_EXE)) {
throw new Error(`OCR service build did not produce ${OUTPUT_EXE}`);
}
console.log(`Built OCR service: ${OUTPUT_EXE}`);
}
ensureVenv();
installDependencies();
prepareModelCache();
buildService();