from __future__ import annotations from dataclasses import dataclass, field from typing import Protocol import cv2 import numpy as np @dataclass class OcrLine: text: str confidence: float | None = None bbox: list[list[float]] | None = None @dataclass class OcrResult: text: str = "" confidence: float | None = None lines: list[OcrLine] = field(default_factory=list) error: str | None = None elapsed_ms: float = 0.0 engine: str = "none" class OcrEngine(Protocol): name: str def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> OcrResult: ... def crop_bbox(frame_bgr: np.ndarray, bbox: tuple[int, int, int, int], margin: int = 0) -> np.ndarray | None: x1, y1, x2, y2 = bbox h, w = frame_bgr.shape[:2] x1, y1 = max(0, x1 - margin), max(0, y1 - margin) x2, y2 = min(w, x2 + margin), min(h, y2 + margin) if x2 <= x1 or y2 <= y1: return None return frame_bgr[y1:y2, x1:x2] def prepare_ocr_image(image_bgr: np.ndarray, config: dict) -> np.ndarray: scale = float(config.get("scale", 1.0)) if scale != 1.0: image_bgr = cv2.resize(image_bgr, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) if not config.get("threshold", False): return image_bgr gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY) gray = cv2.GaussianBlur(gray, (3, 3), 0) return cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]