from __future__ import annotations from dataclasses import dataclass, field from pathlib import Path from typing import Any import cv2 import numpy as np from app.label_parser import ParsedLabel, parse_label_text @dataclass class DetectionResult: xyxy: tuple[int, int, int, int] | None = None confidence: float | None = None class_name: str | None = None raw_text: str = "" parsed: ParsedLabel | None = None error: str | None = None all_boxes: list[dict[str, Any]] = field(default_factory=list) def to_metadata(self) -> dict[str, Any]: return { "bbox_xyxy": list(self.xyxy) if self.xyxy else None, "confidence": self.confidence, "class_name": self.class_name, "raw_text": self.raw_text, "parsed": self.parsed.to_dict() if self.parsed else None, "error": self.error, "all_boxes": self.all_boxes, } class YoloLabelDetector: def __init__(self, config: dict[str, Any], app_config: Any) -> None: self.config = config self.app_config = app_config self.model = None self.load_error: str | None = None self._load_model() def _load_model(self) -> None: model_path = self.app_config.resolve_path(self.config["detection"]["model_path"]) if not model_path.exists(): self.load_error = f"Brak modelu: {model_path}" return try: from ultralytics import YOLO self.model = YOLO(str(model_path)) except Exception as exc: # pragma: no cover - depends on optional runtime deps self.load_error = f"Nie mozna zaladowac YOLO: {exc}" def detect(self, frame_bgr: np.ndarray) -> DetectionResult: if self.model is None: return DetectionResult(error=self.load_error or "Model YOLO nie jest zaladowany") detection_cfg = self.config["detection"] try: results = self.model.predict( source=frame_bgr, conf=float(detection_cfg["confidence_threshold"]), imgsz=int(detection_cfg["image_size"]), device=detection_cfg.get("device", "cpu"), verbose=False, ) except Exception as exc: # pragma: no cover - depends on model runtime return DetectionResult(error=f"Blad YOLO: {exc}") boxes = [] names = getattr(self.model, "names", {}) for result in results: for box in result.boxes: x1, y1, x2, y2 = [int(v) for v in box.xyxy[0].tolist()] confidence = float(box.conf[0]) class_id = int(box.cls[0]) if box.cls is not None else -1 class_name = names.get(class_id, str(class_id)) if isinstance(names, dict) else str(class_id) boxes.append( { "xyxy": (x1, y1, x2, y2), "confidence": confidence, "class_name": class_name, } ) if not boxes: return DetectionResult(error="Nie wykryto etykiety") boxes.sort(key=lambda item: item["confidence"], reverse=True) selected = boxes[0] result = DetectionResult( xyxy=selected["xyxy"], confidence=selected["confidence"], class_name=selected["class_name"], all_boxes=[ { "xyxy": list(item["xyxy"]), "confidence": item["confidence"], "class_name": item["class_name"], } for item in boxes ], ) return result class TesseractOcr: def __init__(self, config: dict[str, Any]) -> None: self.config = config self.load_error: str | None = None self.pytesseract = None self._load() def _load(self) -> None: if not self.config["ocr"].get("enabled", True): return try: import pytesseract command = self.config["ocr"].get("tesseract_cmd") if command: pytesseract.pytesseract.tesseract_cmd = command self.pytesseract = pytesseract except Exception as exc: self.load_error = f"Nie mozna zaladowac pytesseract: {exc}" def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> tuple[str, str | None]: if not self.config["ocr"].get("enabled", True): return "", None if self.pytesseract is None: return "", self.load_error or "OCR nie jest zaladowany" x1, y1, x2, y2 = bbox h, w = frame_bgr.shape[:2] x1, y1 = max(0, x1), max(0, y1) x2, y2 = min(w, x2), min(h, y2) if x2 <= x1 or y2 <= y1: return "", "Nieprawidlowy bbox OCR" roi = frame_bgr[y1:y2, x1:x2] scale = float(self.config["ocr"].get("scale", 1.0)) if scale != 1.0: roi = cv2.resize(roi, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) if self.config["ocr"].get("threshold", True): gray = cv2.GaussianBlur(gray, (3, 3), 0) gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] try: text = self.pytesseract.image_to_string( gray, lang=self.config["ocr"].get("language", "eng"), config="--psm 6", ) except Exception as exc: return "", f"Blad OCR: {exc}" return text, None class DetectionPipeline: def __init__(self, config: dict[str, Any], app_config: Any) -> None: self.config = config self.detector = YoloLabelDetector(config, app_config) self.ocr = TesseractOcr(config) def process(self, frame_bgr: np.ndarray) -> DetectionResult: result = self.detector.detect(frame_bgr) if result.xyxy is None: return result text, ocr_error = self.ocr.read_label(frame_bgr, result.xyxy) result.raw_text = text result.parsed = parse_label_text( text, self.config["label_data"].get("colors", []), self.config["label_data"].get("models", []), ) if ocr_error: result.error = ocr_error return result