Implement OCR engine architecture with base, factory, and specific engines
This commit is contained in:
54
app/ocr/base.py
Normal file
54
app/ocr/base.py
Normal file
@@ -0,0 +1,54 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Protocol
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
@dataclass
|
||||
class OcrLine:
|
||||
text: str
|
||||
confidence: float | None = None
|
||||
bbox: list[list[float]] | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class OcrResult:
|
||||
text: str = ""
|
||||
confidence: float | None = None
|
||||
lines: list[OcrLine] = field(default_factory=list)
|
||||
error: str | None = None
|
||||
elapsed_ms: float = 0.0
|
||||
engine: str = "none"
|
||||
|
||||
|
||||
class OcrEngine(Protocol):
|
||||
name: str
|
||||
|
||||
def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> OcrResult:
|
||||
...
|
||||
|
||||
|
||||
def crop_bbox(frame_bgr: np.ndarray, bbox: tuple[int, int, int, int], margin: int = 0) -> np.ndarray | None:
|
||||
x1, y1, x2, y2 = bbox
|
||||
h, w = frame_bgr.shape[:2]
|
||||
x1, y1 = max(0, x1 - margin), max(0, y1 - margin)
|
||||
x2, y2 = min(w, x2 + margin), min(h, y2 + margin)
|
||||
if x2 <= x1 or y2 <= y1:
|
||||
return None
|
||||
return frame_bgr[y1:y2, x1:x2]
|
||||
|
||||
|
||||
def prepare_ocr_image(image_bgr: np.ndarray, config: dict) -> np.ndarray:
|
||||
scale = float(config.get("scale", 1.0))
|
||||
if scale != 1.0:
|
||||
image_bgr = cv2.resize(image_bgr, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
||||
|
||||
if not config.get("threshold", False):
|
||||
return image_bgr
|
||||
|
||||
gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
|
||||
gray = cv2.GaussianBlur(gray, (3, 3), 0)
|
||||
return cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
|
||||
Reference in New Issue
Block a user