duck-stain-yolo/app/detection.py

from __future__ import annotations

from dataclasses import dataclass, field
from pathlib import Path
from typing import Any

import cv2
import numpy as np

from app.label_parser import ParsedLabel, parse_label_text


@dataclass
class DetectionResult:
    xyxy: tuple[int, int, int, int] | None = None
    confidence: float | None = None
    class_name: str | None = None
    raw_text: str = ""
    parsed: ParsedLabel | None = None
    error: str | None = None
    all_boxes: list[dict[str, Any]] = field(default_factory=list)

    def to_metadata(self) -> dict[str, Any]:
        return {
            "bbox_xyxy": list(self.xyxy) if self.xyxy else None,
            "confidence": self.confidence,
            "class_name": self.class_name,
            "raw_text": self.raw_text,
            "parsed": self.parsed.to_dict() if self.parsed else None,
            "error": self.error,
            "all_boxes": self.all_boxes,
        }


class YoloLabelDetector:
    def __init__(self, config: dict[str, Any], app_config: Any) -> None:
        self.config = config
        self.app_config = app_config
        self.model = None
        self.load_error: str | None = None
        self._load_model()

    def _load_model(self) -> None:
        model_path = self.app_config.resolve_path(self.config["detection"]["model_path"])
        if not model_path.exists():
            self.load_error = f"Brak modelu: {model_path}"
            return

        try:
            from ultralytics import YOLO

            self.model = YOLO(str(model_path))
        except Exception as exc:  # pragma: no cover - depends on optional runtime deps
            self.load_error = f"Nie mozna zaladowac YOLO: {exc}"

    def detect(self, frame_bgr: np.ndarray) -> DetectionResult:
        if self.model is None:
            return DetectionResult(error=self.load_error or "Model YOLO nie jest zaladowany")

        detection_cfg = self.config["detection"]
        try:
            results = self.model.predict(
                source=frame_bgr,
                conf=float(detection_cfg["confidence_threshold"]),
                imgsz=int(detection_cfg["image_size"]),
                device=detection_cfg.get("device", "cpu"),
                verbose=False,
            )
        except Exception as exc:  # pragma: no cover - depends on model runtime
            return DetectionResult(error=f"Blad YOLO: {exc}")

        boxes = []
        names = getattr(self.model, "names", {})
        for result in results:
            for box in result.boxes:
                x1, y1, x2, y2 = [int(v) for v in box.xyxy[0].tolist()]
                confidence = float(box.conf[0])
                class_id = int(box.cls[0]) if box.cls is not None else -1
                class_name = names.get(class_id, str(class_id)) if isinstance(names, dict) else str(class_id)
                boxes.append(
                    {
                        "xyxy": (x1, y1, x2, y2),
                        "confidence": confidence,
                        "class_name": class_name,
                    }
                )

        if not boxes:
            return DetectionResult(error="Nie wykryto etykiety")

        boxes.sort(key=lambda item: item["confidence"], reverse=True)
        selected = boxes[0]
        result = DetectionResult(
            xyxy=selected["xyxy"],
            confidence=selected["confidence"],
            class_name=selected["class_name"],
            all_boxes=[
                {
                    "xyxy": list(item["xyxy"]),
                    "confidence": item["confidence"],
                    "class_name": item["class_name"],
                }
                for item in boxes
            ],
        )
        return result


class TesseractOcr:
    def __init__(self, config: dict[str, Any]) -> None:
        self.config = config
        self.load_error: str | None = None
        self.pytesseract = None
        self._load()

    def _load(self) -> None:
        if not self.config["ocr"].get("enabled", True):
            return
        try:
            import pytesseract

            command = self.config["ocr"].get("tesseract_cmd")
            if command:
                pytesseract.pytesseract.tesseract_cmd = command
            self.pytesseract = pytesseract
        except Exception as exc:
            self.load_error = f"Nie mozna zaladowac pytesseract: {exc}"

    def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> tuple[str, str | None]:
        if not self.config["ocr"].get("enabled", True):
            return "", None
        if self.pytesseract is None:
            return "", self.load_error or "OCR nie jest zaladowany"

        x1, y1, x2, y2 = bbox
        h, w = frame_bgr.shape[:2]
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(w, x2), min(h, y2)
        if x2 <= x1 or y2 <= y1:
            return "", "Nieprawidlowy bbox OCR"

        roi = frame_bgr[y1:y2, x1:x2]
        scale = float(self.config["ocr"].get("scale", 1.0))
        if scale != 1.0:
            roi = cv2.resize(roi, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)

        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        if self.config["ocr"].get("threshold", True):
            gray = cv2.GaussianBlur(gray, (3, 3), 0)
            gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

        try:
            text = self.pytesseract.image_to_string(
                gray,
                lang=self.config["ocr"].get("language", "eng"),
                config="--psm 6",
            )
        except Exception as exc:
            return "", f"Blad OCR: {exc}"
        return text, None


class DetectionPipeline:
    def __init__(self, config: dict[str, Any], app_config: Any) -> None:
        self.config = config
        self.detector = YoloLabelDetector(config, app_config)
        self.ocr = TesseractOcr(config)

    def process(self, frame_bgr: np.ndarray) -> DetectionResult:
        result = self.detector.detect(frame_bgr)
        if result.xyxy is None:
            return result

        text, ocr_error = self.ocr.read_label(frame_bgr, result.xyxy)
        result.raw_text = text
        result.parsed = parse_label_text(
            text,
            self.config["label_data"].get("colors", []),
            self.config["label_data"].get("models", []),
        )
        if ocr_error:
            result.error = ocr_error
        return result