Files
duck-stain-yolo/app/detection.py
bartool 090865af76 Initial MVP application skeleton
Add PySide6 camera UI, YOLO/Tesseract detection pipeline, capture metadata, configuration, and project gitignore.
2026-05-07 00:18:38 +02:00

184 lines
6.3 KiB
Python

from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
import cv2
import numpy as np
from app.label_parser import ParsedLabel, parse_label_text
@dataclass
class DetectionResult:
xyxy: tuple[int, int, int, int] | None = None
confidence: float | None = None
class_name: str | None = None
raw_text: str = ""
parsed: ParsedLabel | None = None
error: str | None = None
all_boxes: list[dict[str, Any]] = field(default_factory=list)
def to_metadata(self) -> dict[str, Any]:
return {
"bbox_xyxy": list(self.xyxy) if self.xyxy else None,
"confidence": self.confidence,
"class_name": self.class_name,
"raw_text": self.raw_text,
"parsed": self.parsed.to_dict() if self.parsed else None,
"error": self.error,
"all_boxes": self.all_boxes,
}
class YoloLabelDetector:
def __init__(self, config: dict[str, Any], app_config: Any) -> None:
self.config = config
self.app_config = app_config
self.model = None
self.load_error: str | None = None
self._load_model()
def _load_model(self) -> None:
model_path = self.app_config.resolve_path(self.config["detection"]["model_path"])
if not model_path.exists():
self.load_error = f"Brak modelu: {model_path}"
return
try:
from ultralytics import YOLO
self.model = YOLO(str(model_path))
except Exception as exc: # pragma: no cover - depends on optional runtime deps
self.load_error = f"Nie mozna zaladowac YOLO: {exc}"
def detect(self, frame_bgr: np.ndarray) -> DetectionResult:
if self.model is None:
return DetectionResult(error=self.load_error or "Model YOLO nie jest zaladowany")
detection_cfg = self.config["detection"]
try:
results = self.model.predict(
source=frame_bgr,
conf=float(detection_cfg["confidence_threshold"]),
imgsz=int(detection_cfg["image_size"]),
device=detection_cfg.get("device", "cpu"),
verbose=False,
)
except Exception as exc: # pragma: no cover - depends on model runtime
return DetectionResult(error=f"Blad YOLO: {exc}")
boxes = []
names = getattr(self.model, "names", {})
for result in results:
for box in result.boxes:
x1, y1, x2, y2 = [int(v) for v in box.xyxy[0].tolist()]
confidence = float(box.conf[0])
class_id = int(box.cls[0]) if box.cls is not None else -1
class_name = names.get(class_id, str(class_id)) if isinstance(names, dict) else str(class_id)
boxes.append(
{
"xyxy": (x1, y1, x2, y2),
"confidence": confidence,
"class_name": class_name,
}
)
if not boxes:
return DetectionResult(error="Nie wykryto etykiety")
boxes.sort(key=lambda item: item["confidence"], reverse=True)
selected = boxes[0]
result = DetectionResult(
xyxy=selected["xyxy"],
confidence=selected["confidence"],
class_name=selected["class_name"],
all_boxes=[
{
"xyxy": list(item["xyxy"]),
"confidence": item["confidence"],
"class_name": item["class_name"],
}
for item in boxes
],
)
return result
class TesseractOcr:
def __init__(self, config: dict[str, Any]) -> None:
self.config = config
self.load_error: str | None = None
self.pytesseract = None
self._load()
def _load(self) -> None:
if not self.config["ocr"].get("enabled", True):
return
try:
import pytesseract
command = self.config["ocr"].get("tesseract_cmd")
if command:
pytesseract.pytesseract.tesseract_cmd = command
self.pytesseract = pytesseract
except Exception as exc:
self.load_error = f"Nie mozna zaladowac pytesseract: {exc}"
def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> tuple[str, str | None]:
if not self.config["ocr"].get("enabled", True):
return "", None
if self.pytesseract is None:
return "", self.load_error or "OCR nie jest zaladowany"
x1, y1, x2, y2 = bbox
h, w = frame_bgr.shape[:2]
x1, y1 = max(0, x1), max(0, y1)
x2, y2 = min(w, x2), min(h, y2)
if x2 <= x1 or y2 <= y1:
return "", "Nieprawidlowy bbox OCR"
roi = frame_bgr[y1:y2, x1:x2]
scale = float(self.config["ocr"].get("scale", 1.0))
if scale != 1.0:
roi = cv2.resize(roi, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
if self.config["ocr"].get("threshold", True):
gray = cv2.GaussianBlur(gray, (3, 3), 0)
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
try:
text = self.pytesseract.image_to_string(
gray,
lang=self.config["ocr"].get("language", "eng"),
config="--psm 6",
)
except Exception as exc:
return "", f"Blad OCR: {exc}"
return text, None
class DetectionPipeline:
def __init__(self, config: dict[str, Any], app_config: Any) -> None:
self.config = config
self.detector = YoloLabelDetector(config, app_config)
self.ocr = TesseractOcr(config)
def process(self, frame_bgr: np.ndarray) -> DetectionResult:
result = self.detector.detect(frame_bgr)
if result.xyxy is None:
return result
text, ocr_error = self.ocr.read_label(frame_bgr, result.xyxy)
result.raw_text = text
result.parsed = parse_label_text(
text,
self.config["label_data"].get("colors", []),
self.config["label_data"].get("models", []),
)
if ocr_error:
result.error = ocr_error
return result