Files
duck-stain-yolo/app/ocr/base.py

55 lines
1.4 KiB
Python

from __future__ import annotations
from dataclasses import dataclass, field
from typing import Protocol
import cv2
import numpy as np
@dataclass
class OcrLine:
text: str
confidence: float | None = None
bbox: list[list[float]] | None = None
@dataclass
class OcrResult:
text: str = ""
confidence: float | None = None
lines: list[OcrLine] = field(default_factory=list)
error: str | None = None
elapsed_ms: float = 0.0
engine: str = "none"
class OcrEngine(Protocol):
name: str
def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> OcrResult:
...
def crop_bbox(frame_bgr: np.ndarray, bbox: tuple[int, int, int, int], margin: int = 0) -> np.ndarray | None:
x1, y1, x2, y2 = bbox
h, w = frame_bgr.shape[:2]
x1, y1 = max(0, x1 - margin), max(0, y1 - margin)
x2, y2 = min(w, x2 + margin), min(h, y2 + margin)
if x2 <= x1 or y2 <= y1:
return None
return frame_bgr[y1:y2, x1:x2]
def prepare_ocr_image(image_bgr: np.ndarray, config: dict) -> np.ndarray:
scale = float(config.get("scale", 1.0))
if scale != 1.0:
image_bgr = cv2.resize(image_bgr, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
if not config.get("threshold", False):
return image_bgr
gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (3, 3), 0)
return cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]