Enhance OCR configuration and integrate fuzzy matching for label parsing
This commit is contained in:
@@ -4,10 +4,10 @@ from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from app.label_parser import ParsedLabel, parse_label_text
|
||||
from app.ocr import create_ocr_engine
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -18,6 +18,9 @@ class DetectionResult:
|
||||
raw_text: str = ""
|
||||
parsed: ParsedLabel | None = None
|
||||
error: str | None = None
|
||||
ocr_engine: str | None = None
|
||||
ocr_confidence: float | None = None
|
||||
ocr_elapsed_ms: float | None = None
|
||||
all_boxes: list[dict[str, Any]] = field(default_factory=list)
|
||||
|
||||
def to_metadata(self) -> dict[str, Any]:
|
||||
@@ -28,6 +31,9 @@ class DetectionResult:
|
||||
"raw_text": self.raw_text,
|
||||
"parsed": self.parsed.to_dict() if self.parsed else None,
|
||||
"error": self.error,
|
||||
"ocr_engine": self.ocr_engine,
|
||||
"ocr_confidence": self.ocr_confidence,
|
||||
"ocr_elapsed_ms": self.ocr_elapsed_ms,
|
||||
"all_boxes": self.all_boxes,
|
||||
}
|
||||
|
||||
@@ -72,6 +78,9 @@ class YoloLabelDetector:
|
||||
boxes = []
|
||||
names = getattr(self.model, "names", {})
|
||||
for result in results:
|
||||
if result.boxes is None:
|
||||
continue
|
||||
|
||||
for box in result.boxes:
|
||||
x1, y1, x2, y2 = [int(v) for v in box.xyxy[0].tolist()]
|
||||
confidence = float(box.conf[0])
|
||||
@@ -106,78 +115,30 @@ class YoloLabelDetector:
|
||||
return result
|
||||
|
||||
|
||||
class TesseractOcr:
|
||||
def __init__(self, config: dict[str, Any]) -> None:
|
||||
self.config = config
|
||||
self.load_error: str | None = None
|
||||
self.pytesseract = None
|
||||
self._load()
|
||||
|
||||
def _load(self) -> None:
|
||||
if not self.config["ocr"].get("enabled", True):
|
||||
return
|
||||
try:
|
||||
import pytesseract
|
||||
|
||||
command = self.config["ocr"].get("tesseract_cmd")
|
||||
if command:
|
||||
pytesseract.pytesseract.tesseract_cmd = command
|
||||
self.pytesseract = pytesseract
|
||||
except Exception as exc:
|
||||
self.load_error = f"Nie mozna zaladowac pytesseract: {exc}"
|
||||
|
||||
def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> tuple[str, str | None]:
|
||||
if not self.config["ocr"].get("enabled", True):
|
||||
return "", None
|
||||
if self.pytesseract is None:
|
||||
return "", self.load_error or "OCR nie jest zaladowany"
|
||||
|
||||
x1, y1, x2, y2 = bbox
|
||||
h, w = frame_bgr.shape[:2]
|
||||
x1, y1 = max(0, x1), max(0, y1)
|
||||
x2, y2 = min(w, x2), min(h, y2)
|
||||
if x2 <= x1 or y2 <= y1:
|
||||
return "", "Nieprawidlowy bbox OCR"
|
||||
|
||||
roi = frame_bgr[y1:y2, x1:x2]
|
||||
scale = float(self.config["ocr"].get("scale", 1.0))
|
||||
if scale != 1.0:
|
||||
roi = cv2.resize(roi, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
||||
|
||||
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
|
||||
if self.config["ocr"].get("threshold", True):
|
||||
gray = cv2.GaussianBlur(gray, (3, 3), 0)
|
||||
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
|
||||
|
||||
try:
|
||||
text = self.pytesseract.image_to_string(
|
||||
gray,
|
||||
lang=self.config["ocr"].get("language", "eng"),
|
||||
config="--psm 6",
|
||||
)
|
||||
except Exception as exc:
|
||||
return "", f"Blad OCR: {exc}"
|
||||
return text, None
|
||||
|
||||
|
||||
class DetectionPipeline:
|
||||
def __init__(self, config: dict[str, Any], app_config: Any) -> None:
|
||||
self.config = config
|
||||
self.detector = YoloLabelDetector(config, app_config)
|
||||
self.ocr = TesseractOcr(config)
|
||||
self.ocr = create_ocr_engine(config)
|
||||
|
||||
def process(self, frame_bgr: np.ndarray) -> DetectionResult:
|
||||
result = self.detector.detect(frame_bgr)
|
||||
if result.xyxy is None:
|
||||
return result
|
||||
|
||||
text, ocr_error = self.ocr.read_label(frame_bgr, result.xyxy)
|
||||
result.raw_text = text
|
||||
ocr_result = self.ocr.read_label(frame_bgr, result.xyxy)
|
||||
result.raw_text = ocr_result.text
|
||||
result.ocr_engine = ocr_result.engine
|
||||
result.ocr_confidence = ocr_result.confidence
|
||||
result.ocr_elapsed_ms = ocr_result.elapsed_ms
|
||||
label_cfg = self.config["label_data"]
|
||||
result.parsed = parse_label_text(
|
||||
text,
|
||||
self.config["label_data"].get("colors", []),
|
||||
self.config["label_data"].get("models", []),
|
||||
ocr_result.text,
|
||||
label_cfg.get("colors", []),
|
||||
label_cfg.get("models", []),
|
||||
model_min_score=float(label_cfg.get("model_min_score", 0.72)),
|
||||
color_min_score=float(label_cfg.get("color_min_score", 0.72)),
|
||||
)
|
||||
if ocr_error:
|
||||
result.error = ocr_error
|
||||
if ocr_result.error:
|
||||
result.error = ocr_result.error
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user