Initial MVP application skeleton

Add PySide6 camera UI, YOLO/Tesseract detection pipeline, capture metadata, configuration, and project gitignore.
2026-05-07 00:18:38 +02:00
commit 090865af76
18 changed files with 1140 additions and 0 deletions
--- a/app/detection.py
+++ b/app/detection.py
@@ -0,0 +1,183 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+import cv2
+import numpy as np
+
+from app.label_parser import ParsedLabel, parse_label_text
+
+
+@dataclass
+class DetectionResult:
+    xyxy: tuple[int, int, int, int] | None = None
+    confidence: float | None = None
+    class_name: str | None = None
+    raw_text: str = ""
+    parsed: ParsedLabel | None = None
+    error: str | None = None
+    all_boxes: list[dict[str, Any]] = field(default_factory=list)
+
+    def to_metadata(self) -> dict[str, Any]:
+        return {
+            "bbox_xyxy": list(self.xyxy) if self.xyxy else None,
+            "confidence": self.confidence,
+            "class_name": self.class_name,
+            "raw_text": self.raw_text,
+            "parsed": self.parsed.to_dict() if self.parsed else None,
+            "error": self.error,
+            "all_boxes": self.all_boxes,
+        }
+
+
+class YoloLabelDetector:
+    def __init__(self, config: dict[str, Any], app_config: Any) -> None:
+        self.config = config
+        self.app_config = app_config
+        self.model = None
+        self.load_error: str | None = None
+        self._load_model()
+
+    def _load_model(self) -> None:
+        model_path = self.app_config.resolve_path(self.config["detection"]["model_path"])
+        if not model_path.exists():
+            self.load_error = f"Brak modelu: {model_path}"
+            return
+
+        try:
+            from ultralytics import YOLO
+
+            self.model = YOLO(str(model_path))
+        except Exception as exc:  # pragma: no cover - depends on optional runtime deps
+            self.load_error = f"Nie mozna zaladowac YOLO: {exc}"
+
+    def detect(self, frame_bgr: np.ndarray) -> DetectionResult:
+        if self.model is None:
+            return DetectionResult(error=self.load_error or "Model YOLO nie jest zaladowany")
+
+        detection_cfg = self.config["detection"]
+        try:
+            results = self.model.predict(
+                source=frame_bgr,
+                conf=float(detection_cfg["confidence_threshold"]),
+                imgsz=int(detection_cfg["image_size"]),
+                device=detection_cfg.get("device", "cpu"),
+                verbose=False,
+            )
+        except Exception as exc:  # pragma: no cover - depends on model runtime
+            return DetectionResult(error=f"Blad YOLO: {exc}")
+
+        boxes = []
+        names = getattr(self.model, "names", {})
+        for result in results:
+            for box in result.boxes:
+                x1, y1, x2, y2 = [int(v) for v in box.xyxy[0].tolist()]
+                confidence = float(box.conf[0])
+                class_id = int(box.cls[0]) if box.cls is not None else -1
+                class_name = names.get(class_id, str(class_id)) if isinstance(names, dict) else str(class_id)
+                boxes.append(
+                    {
+                        "xyxy": (x1, y1, x2, y2),
+                        "confidence": confidence,
+                        "class_name": class_name,
+                    }
+                )
+
+        if not boxes:
+            return DetectionResult(error="Nie wykryto etykiety")
+
+        boxes.sort(key=lambda item: item["confidence"], reverse=True)
+        selected = boxes[0]
+        result = DetectionResult(
+            xyxy=selected["xyxy"],
+            confidence=selected["confidence"],
+            class_name=selected["class_name"],
+            all_boxes=[
+                {
+                    "xyxy": list(item["xyxy"]),
+                    "confidence": item["confidence"],
+                    "class_name": item["class_name"],
+                }
+                for item in boxes
+            ],
+        )
+        return result
+
+
+class TesseractOcr:
+    def __init__(self, config: dict[str, Any]) -> None:
+        self.config = config
+        self.load_error: str | None = None
+        self.pytesseract = None
+        self._load()
+
+    def _load(self) -> None:
+        if not self.config["ocr"].get("enabled", True):
+            return
+        try:
+            import pytesseract
+
+            command = self.config["ocr"].get("tesseract_cmd")
+            if command:
+                pytesseract.pytesseract.tesseract_cmd = command
+            self.pytesseract = pytesseract
+        except Exception as exc:
+            self.load_error = f"Nie mozna zaladowac pytesseract: {exc}"
+
+    def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> tuple[str, str | None]:
+        if not self.config["ocr"].get("enabled", True):
+            return "", None
+        if self.pytesseract is None:
+            return "", self.load_error or "OCR nie jest zaladowany"
+
+        x1, y1, x2, y2 = bbox
+        h, w = frame_bgr.shape[:2]
+        x1, y1 = max(0, x1), max(0, y1)
+        x2, y2 = min(w, x2), min(h, y2)
+        if x2 <= x1 or y2 <= y1:
+            return "", "Nieprawidlowy bbox OCR"
+
+        roi = frame_bgr[y1:y2, x1:x2]
+        scale = float(self.config["ocr"].get("scale", 1.0))
+        if scale != 1.0:
+            roi = cv2.resize(roi, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
+
+        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
+        if self.config["ocr"].get("threshold", True):
+            gray = cv2.GaussianBlur(gray, (3, 3), 0)
+            gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
+
+        try:
+            text = self.pytesseract.image_to_string(
+                gray,
+                lang=self.config["ocr"].get("language", "eng"),
+                config="--psm 6",
+            )
+        except Exception as exc:
+            return "", f"Blad OCR: {exc}"
+        return text, None
+
+
+class DetectionPipeline:
+    def __init__(self, config: dict[str, Any], app_config: Any) -> None:
+        self.config = config
+        self.detector = YoloLabelDetector(config, app_config)
+        self.ocr = TesseractOcr(config)
+
+    def process(self, frame_bgr: np.ndarray) -> DetectionResult:
+        result = self.detector.detect(frame_bgr)
+        if result.xyxy is None:
+            return result
+
+        text, ocr_error = self.ocr.read_label(frame_bgr, result.xyxy)
+        result.raw_text = text
+        result.parsed = parse_label_text(
+            text,
+            self.config["label_data"].get("colors", []),
+            self.config["label_data"].get("models", []),
+        )
+        if ocr_error:
+            result.error = ocr_error
+        return result