55 lines
1.4 KiB
Python
55 lines
1.4 KiB
Python
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import Protocol
|
|
|
|
import cv2
|
|
import numpy as np
|
|
|
|
|
|
@dataclass
|
|
class OcrLine:
|
|
text: str
|
|
confidence: float | None = None
|
|
bbox: list[list[float]] | None = None
|
|
|
|
|
|
@dataclass
|
|
class OcrResult:
|
|
text: str = ""
|
|
confidence: float | None = None
|
|
lines: list[OcrLine] = field(default_factory=list)
|
|
error: str | None = None
|
|
elapsed_ms: float = 0.0
|
|
engine: str = "none"
|
|
|
|
|
|
class OcrEngine(Protocol):
|
|
name: str
|
|
|
|
def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> OcrResult:
|
|
...
|
|
|
|
|
|
def crop_bbox(frame_bgr: np.ndarray, bbox: tuple[int, int, int, int], margin: int = 0) -> np.ndarray | None:
|
|
x1, y1, x2, y2 = bbox
|
|
h, w = frame_bgr.shape[:2]
|
|
x1, y1 = max(0, x1 - margin), max(0, y1 - margin)
|
|
x2, y2 = min(w, x2 + margin), min(h, y2 + margin)
|
|
if x2 <= x1 or y2 <= y1:
|
|
return None
|
|
return frame_bgr[y1:y2, x1:x2]
|
|
|
|
|
|
def prepare_ocr_image(image_bgr: np.ndarray, config: dict) -> np.ndarray:
|
|
scale = float(config.get("scale", 1.0))
|
|
if scale != 1.0:
|
|
image_bgr = cv2.resize(image_bgr, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
|
|
|
if not config.get("threshold", False):
|
|
return image_bgr
|
|
|
|
gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
|
|
gray = cv2.GaussianBlur(gray, (3, 3), 0)
|
|
return cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
|