Initial MVP application skeleton

Add PySide6 camera UI, YOLO/Tesseract detection pipeline, capture metadata, configuration, and project gitignore.
2026-05-07 00:18:38 +02:00
commit 090865af76
18 changed files with 1140 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,31 @@
 # Python
 __pycache__/
 *.py[cod]
 *$py.class
 .pytest_cache/
 .ruff_cache/
 .mypy_cache/
 # Virtual environments
 .venv/
 .venv-*/
 venv/
 env/
 # Local/runtime data
 captures/photos/*
 captures/videos/*
 !captures/photos/.gitkeep
 !captures/videos/.gitkeep
 models/*
 !models/.gitkeep
 # OS/editor
 .DS_Store
 .idea/
 .vscode/
 # Ultralytics/runtime caches
 runs/
 *.onnx
 *.engine
--- a/README.md
+++ b/README.md
@@ -0,0 +1,42 @@
 # Duck Stain YOLO
 MVP aplikacji okienkowej do podgladu kamery USB, wykrywania etykiety modelem YOLOv8 i zapisu zdjec/filmow z metadanymi JSON.
 ## Uruchomienie
 ```bash
 python3 -m venv .venv
 source .venv/bin/activate
 pip install -r requirements.txt
 python main.py
 ```
 Na macOS z Tesseractem:
 ```bash
 brew install tesseract
 ```
 Na Ubuntu/WSL:
 ```bash
 sudo apt install tesseract-ocr
 ```
 Na Linuksie `requirements.txt` wymusza CPU build PyTorch, zeby nie pobierac wariantu CUDA. Na macOS pip zainstaluje standardowy wariant CPU dla procesora Intel.
 Umiesc wytrenowany model jako `models/best.pt` albo zmien `detection.model_path` w `app_config.json`.
 ## Konfiguracja
 Glowny plik konfiguracji: `app_config.json`.
 Istotne ustawienia:
 - `camera.width`, `camera.height`, `camera.fps` - rozdzielczosc i FPS kamery.
 - `camera.properties` - parametry OpenCV ustawiane na kamerze, np. jasnosc, kontrast, ekspozycja. `null` oznacza brak wymuszania wartosci.
 - `detection.mode` - `best` rysuje najlepsza etykiete, `all` rysuje wszystkie wykrycia.
 - `detection.frame_stride` - YOLO uruchamiany co N klatek podczas aktywnego wykrywania.
 - `label_data.models`, `label_data.colors` - slowniki do walidacji tekstu z etykiety.
 Zdjecia trafiaja do `captures/photos`, filmy do `captures/videos`. Obok kazdego pliku media zapisywany jest JSON z aktualnym wynikiem detekcji/OCR.
--- a/app/init.py
+++ b/app/init.py
@@ -0,0 +1 @@
 __all__ = []
--- a/app/camera.py
+++ b/app/camera.py
@@ -0,0 +1,129 @@
 from __future__ import annotations
 import threading
 import time
 from typing import Any
 import cv2
 import numpy as np
 from PySide6.QtCore import QThread, Signal, Slot
 from app.detection import DetectionPipeline, DetectionResult
 CV_CAP_PROPS = {
    "brightness": cv2.CAP_PROP_BRIGHTNESS,
    "contrast": cv2.CAP_PROP_CONTRAST,
    "saturation": cv2.CAP_PROP_SATURATION,
    "hue": cv2.CAP_PROP_HUE,
    "gain": cv2.CAP_PROP_GAIN,
    "exposure": cv2.CAP_PROP_EXPOSURE,
    "sharpness": cv2.CAP_PROP_SHARPNESS,
    "auto_exposure": cv2.CAP_PROP_AUTO_EXPOSURE,
    "focus": cv2.CAP_PROP_FOCUS,
    "auto_focus": cv2.CAP_PROP_AUTOFOCUS,
 }
 def backend_for_name(name: str) -> int:
    if name == "avfoundation":
        return cv2.CAP_AVFOUNDATION
    if name == "v4l2":
        return cv2.CAP_V4L2
    if name == "dshow":
        return cv2.CAP_DSHOW
    return cv2.CAP_ANY
 class CameraWorker(QThread):
    frame_ready = Signal(object)
    detection_ready = Signal(object)
    camera_error = Signal(str)
    def __init__(self, config: dict[str, Any], app_config: Any) -> None:
        super().__init__()
        self.config = config
        self.app_config = app_config
        self.pipeline = DetectionPipeline(config, app_config)
        self._running = threading.Event()
        self._running.set()
        self._detecting = False
        self._accepted = False
        self._frame_count = 0
        self._capture: cv2.VideoCapture | None = None
        self._lock = threading.Lock()
    def stop(self) -> None:
        self._running.clear()
    @Slot()
    def start_detection(self) -> None:
        with self._lock:
            self._detecting = True
            self._accepted = False
            self._frame_count = 0
    @Slot()
    def accept_detection(self) -> None:
        with self._lock:
            self._detecting = False
            self._accepted = True
    @Slot(dict)
    def update_camera_config(self, camera_config: dict[str, Any]) -> None:
        with self._lock:
            self.config["camera"] = camera_config
            capture = self._capture
        if capture is not None:
            self._apply_camera_settings(capture)
    def run(self) -> None:
        camera_cfg = self.config["camera"]
        capture = cv2.VideoCapture(
            int(camera_cfg.get("index", 0)),
            backend_for_name(str(camera_cfg.get("backend", "auto"))),
        )
        self._capture = capture
        if not capture.isOpened():
            self.camera_error.emit("Nie mozna otworzyc kamery USB")
            return
        self._apply_camera_settings(capture)
        try:
            while self._running.is_set():
                ok, frame = capture.read()
                if not ok or frame is None:
                    self.camera_error.emit("Nie mozna odczytac klatki z kamery")
                    time.sleep(0.2)
                    continue
                self.frame_ready.emit(frame)
                self._maybe_detect(frame)
        finally:
            capture.release()
            self._capture = None
    def _apply_camera_settings(self, capture: cv2.VideoCapture) -> None:
        camera_cfg = self.config["camera"]
        capture.set(cv2.CAP_PROP_FRAME_WIDTH, int(camera_cfg.get("width", 1920)))
        capture.set(cv2.CAP_PROP_FRAME_HEIGHT, int(camera_cfg.get("height", 1080)))
        capture.set(cv2.CAP_PROP_FPS, int(camera_cfg.get("fps", 30)))
        for name, value in camera_cfg.get("properties", {}).items():
            if value is None or name not in CV_CAP_PROPS:
                continue
            capture.set(CV_CAP_PROPS[name], float(value))
    def _maybe_detect(self, frame: np.ndarray) -> None:
        with self._lock:
            detecting = self._detecting and not self._accepted
            frame_stride = max(1, int(self.config["detection"].get("frame_stride", 5)))
            self._frame_count += 1
            should_detect = detecting and self._frame_count % frame_stride == 0
        if not should_detect:
            return
        result: DetectionResult = self.pipeline.process(frame)
        self.detection_ready.emit(result)
--- a/app/config.py
+++ b/app/config.py
@@ -0,0 +1,96 @@
 from __future__ import annotations
 import json
 from copy import deepcopy
 from pathlib import Path
 from typing import Any
 APP_ROOT = Path(__file__).resolve().parent.parent
 CONFIG_PATH = APP_ROOT / "app_config.json"
 DEFAULT_CONFIG: dict[str, Any] = {
    "camera": {
        "index": 0,
        "width": 1920,
        "height": 1080,
        "fps": 30,
        "backend": "auto",
        "properties": {
            "brightness": None,
            "contrast": None,
            "saturation": None,
            "hue": None,
            "gain": None,
            "exposure": None,
            "sharpness": None,
            "auto_exposure": None,
            "focus": None,
            "auto_focus": None,
        },
    },
    "detection": {
        "model_path": "models/best.pt",
        "confidence_threshold": 0.25,
        "mode": "best",
        "frame_stride": 5,
        "image_size": 640,
        "device": "cpu",
    },
    "ocr": {
        "enabled": True,
        "language": "eng",
        "tesseract_cmd": None,
        "threshold": True,
        "scale": 2.0,
    },
    "capture": {
        "photos_dir": "captures/photos",
        "videos_dir": "captures/videos",
        "image_extension": "jpg",
        "video_extension": "mp4",
        "video_codec": "mp4v",
    },
    "label_data": {"models": ["Regius", "Duvell"], "colors": ["T-NF-BLK-OUT-BST-G", "T-BLK-G"]},
 }
 def deep_merge(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
    result = deepcopy(base)
    for key, value in override.items():
        if isinstance(value, dict) and isinstance(result.get(key), dict):
            result[key] = deep_merge(result[key], value)
        else:
            result[key] = value
    return result
 class AppConfig:
    def __init__(self, path: Path = CONFIG_PATH) -> None:
        self.path = path
        self.data = self.load()
    def load(self) -> dict[str, Any]:
        if not self.path.exists():
            self.path.parent.mkdir(parents=True, exist_ok=True)
            self.save(DEFAULT_CONFIG)
            return deepcopy(DEFAULT_CONFIG)
        with self.path.open("r", encoding="utf-8") as config_file:
            loaded = json.load(config_file)
        return deep_merge(DEFAULT_CONFIG, loaded)
    def save(self, data: dict[str, Any] | None = None) -> None:
        if data is not None:
            self.data = data
        self.path.parent.mkdir(parents=True, exist_ok=True)
        with self.path.open("w", encoding="utf-8") as config_file:
            json.dump(self.data, config_file, indent=2, ensure_ascii=False)
            config_file.write("\n")
    def resolve_path(self, configured_path: str) -> Path:
        path = Path(configured_path)
        if path.is_absolute():
            return path
        return APP_ROOT / path
--- a/app/detection.py
+++ b/app/detection.py
@@ -0,0 +1,183 @@
 from __future__ import annotations
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
 import cv2
 import numpy as np
 from app.label_parser import ParsedLabel, parse_label_text
@dataclass
 class DetectionResult:
    xyxy: tuple[int, int, int, int] | None = None
    confidence: float | None = None
    class_name: str | None = None
    raw_text: str = ""
    parsed: ParsedLabel | None = None
    error: str | None = None
    all_boxes: list[dict[str, Any]] = field(default_factory=list)
    def to_metadata(self) -> dict[str, Any]:
        return {
            "bbox_xyxy": list(self.xyxy) if self.xyxy else None,
            "confidence": self.confidence,
            "class_name": self.class_name,
            "raw_text": self.raw_text,
            "parsed": self.parsed.to_dict() if self.parsed else None,
            "error": self.error,
            "all_boxes": self.all_boxes,
        }
 class YoloLabelDetector:
    def __init__(self, config: dict[str, Any], app_config: Any) -> None:
        self.config = config
        self.app_config = app_config
        self.model = None
        self.load_error: str | None = None
        self._load_model()
    def _load_model(self) -> None:
        model_path = self.app_config.resolve_path(self.config["detection"]["model_path"])
        if not model_path.exists():
            self.load_error = f"Brak modelu: {model_path}"
            return
        try:
            from ultralytics import YOLO
            self.model = YOLO(str(model_path))
        except Exception as exc:  # pragma: no cover - depends on optional runtime deps
            self.load_error = f"Nie mozna zaladowac YOLO: {exc}"
    def detect(self, frame_bgr: np.ndarray) -> DetectionResult:
        if self.model is None:
            return DetectionResult(error=self.load_error or "Model YOLO nie jest zaladowany")
        detection_cfg = self.config["detection"]
        try:
            results = self.model.predict(
                source=frame_bgr,
                conf=float(detection_cfg["confidence_threshold"]),
                imgsz=int(detection_cfg["image_size"]),
                device=detection_cfg.get("device", "cpu"),
                verbose=False,
            )
        except Exception as exc:  # pragma: no cover - depends on model runtime
            return DetectionResult(error=f"Blad YOLO: {exc}")
        boxes = []
        names = getattr(self.model, "names", {})
        for result in results:
            for box in result.boxes:
                x1, y1, x2, y2 = [int(v) for v in box.xyxy[0].tolist()]
                confidence = float(box.conf[0])
                class_id = int(box.cls[0]) if box.cls is not None else -1
                class_name = names.get(class_id, str(class_id)) if isinstance(names, dict) else str(class_id)
                boxes.append(
                    {
                        "xyxy": (x1, y1, x2, y2),
                        "confidence": confidence,
                        "class_name": class_name,
                    }
                )
        if not boxes:
            return DetectionResult(error="Nie wykryto etykiety")
        boxes.sort(key=lambda item: item["confidence"], reverse=True)
        selected = boxes[0]
        result = DetectionResult(
            xyxy=selected["xyxy"],
            confidence=selected["confidence"],
            class_name=selected["class_name"],
            all_boxes=[
                {
                    "xyxy": list(item["xyxy"]),
                    "confidence": item["confidence"],
                    "class_name": item["class_name"],
                }
                for item in boxes
            ],
        )
        return result
 class TesseractOcr:
    def __init__(self, config: dict[str, Any]) -> None:
        self.config = config
        self.load_error: str | None = None
        self.pytesseract = None
        self._load()
    def _load(self) -> None:
        if not self.config["ocr"].get("enabled", True):
            return
        try:
            import pytesseract
            command = self.config["ocr"].get("tesseract_cmd")
            if command:
                pytesseract.pytesseract.tesseract_cmd = command
            self.pytesseract = pytesseract
        except Exception as exc:
            self.load_error = f"Nie mozna zaladowac pytesseract: {exc}"
    def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> tuple[str, str | None]:
        if not self.config["ocr"].get("enabled", True):
            return "", None
        if self.pytesseract is None:
            return "", self.load_error or "OCR nie jest zaladowany"
        x1, y1, x2, y2 = bbox
        h, w = frame_bgr.shape[:2]
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(w, x2), min(h, y2)
        if x2 <= x1 or y2 <= y1:
            return "", "Nieprawidlowy bbox OCR"
        roi = frame_bgr[y1:y2, x1:x2]
        scale = float(self.config["ocr"].get("scale", 1.0))
        if scale != 1.0:
            roi = cv2.resize(roi, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
        if self.config["ocr"].get("threshold", True):
            gray = cv2.GaussianBlur(gray, (3, 3), 0)
            gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
        try:
            text = self.pytesseract.image_to_string(
                gray,
                lang=self.config["ocr"].get("language", "eng"),
                config="--psm 6",
            )
        except Exception as exc:
            return "", f"Blad OCR: {exc}"
        return text, None
 class DetectionPipeline:
    def __init__(self, config: dict[str, Any], app_config: Any) -> None:
        self.config = config
        self.detector = YoloLabelDetector(config, app_config)
        self.ocr = TesseractOcr(config)
    def process(self, frame_bgr: np.ndarray) -> DetectionResult:
        result = self.detector.detect(frame_bgr)
        if result.xyxy is None:
            return result
        text, ocr_error = self.ocr.read_label(frame_bgr, result.xyxy)
        result.raw_text = text
        result.parsed = parse_label_text(
            text,
            self.config["label_data"].get("colors", []),
            self.config["label_data"].get("models", []),
        )
        if ocr_error:
            result.error = ocr_error
        return result
--- a/app/label_parser.py
+++ b/app/label_parser.py
@@ -0,0 +1,44 @@
 from __future__ import annotations
 import re
 from dataclasses import dataclass, asdict
 ORDER_RE = re.compile(r"\b(?P<order>\d{4}/\d{4}/(?:[1-9]|[1-9]\d))\b")
@dataclass
 class ParsedLabel:
    order_number: str | None
    color_code: str | None
    product_model: str | None
    raw_text: str
    def to_dict(self) -> dict[str, str | None]:
        return asdict(self)
 def normalize_ocr_text(text: str) -> str:
    return " ".join(text.replace("\n", " ").replace("\r", " ").split())
 def parse_label_text(text: str, known_colors: list[str], known_models: list[str]) -> ParsedLabel:
    normalized = normalize_ocr_text(text)
    order_match = ORDER_RE.search(normalized)
    normalized_upper = normalized.upper()
    color_code = next(
        (color for color in known_colors if color.upper() in normalized_upper),
        None,
    )
    product_model = next(
        (model for model in known_models if re.search(rf"\b{re.escape(model)}\b", normalized, re.I)),
        None,
    )
    return ParsedLabel(
        order_number=order_match.group("order") if order_match else None,
        color_code=color_code,
        product_model=product_model,
        raw_text=normalized,
    )
--- a/app/main.py
+++ b/app/main.py
@@ -0,0 +1,11 @@
 from __future__ import annotations
 import sys
 from app.config import AppConfig
 from app.main_window import run_app
 def main() -> None:
    app_config = AppConfig()
    sys.exit(run_app(app_config))
--- a/app/main_window.py
+++ b/app/main_window.py
@@ -0,0 +1,292 @@
 from __future__ import annotations
 from datetime import datetime
 from typing import Any
 import cv2
 import numpy as np
 from PySide6.QtCore import Qt, Slot
 from PySide6.QtGui import QAction, QImage, QPixmap
 from PySide6.QtWidgets import (
    QApplication,
    QHBoxLayout,
    QLabel,
    QMainWindow,
    QMessageBox,
    QPushButton,
    QTextEdit,
    QToolButton,
    QVBoxLayout,
    QWidget,
    QStyle,
 )
 from app.camera import CameraWorker
 from app.config import AppConfig
 from app.detection import DetectionResult
 from app.media import MediaStore, VideoRecorder
 from app.settings_dialog import SettingsDialog
 class MainWindow(QMainWindow):
    def __init__(self, app_config: AppConfig) -> None:
        super().__init__()
        self.app_config = app_config
        self.config = app_config.data
        self.last_frame: np.ndarray | None = None
        self.overlay_result: DetectionResult | None = None
        self.last_detection: DetectionResult | None = None
        self.media_store = MediaStore(self.config, self.app_config)
        self.video_recorder = VideoRecorder(self.config, self.app_config)
        self.setWindowTitle("Duck Stain YOLO")
        self.resize(1280, 720)
        self._build_ui()
        self.worker = CameraWorker(self.config, self.app_config)
        self.worker.frame_ready.connect(self.on_frame_ready)
        self.worker.detection_ready.connect(self.on_detection_ready)
        self.worker.camera_error.connect(self.on_camera_error)
        self.worker.start()
    def _build_ui(self) -> None:
        self.stage = QWidget()
        self.setCentralWidget(self.stage)
        self.video_label = QLabel(self.stage)
        self.video_label.setAlignment(Qt.AlignCenter)
        self.video_label.setStyleSheet("background: #111; color: #ddd;")
        self.video_label.setText("Kamera")
        self.result_panel = QWidget(self.stage)
        self.result_panel.setObjectName("resultPanel")
        self.result_panel.setStyleSheet(
            """
            QWidget#resultPanel {
                background: rgba(20, 20, 20, 170);
                border-radius: 8px;
            }
            QTextEdit {
                background: transparent;
                color: white;
                border: 0;
                font-size: 13px;
            }
            QPushButton {
                min-height: 28px;
                padding: 4px 12px;
            }
            """
        )
        panel_layout = QVBoxLayout(self.result_panel)
        self.result_text = QTextEdit()
        self.result_text.setReadOnly(True)
        self.result_text.setFixedHeight(118)
        panel_layout.addWidget(self.result_text)
        panel_buttons = QHBoxLayout()
        self.detect_button = QPushButton("wykryj")
        self.ok_button = QPushButton("ok")
        panel_buttons.addStretch(1)
        panel_buttons.addWidget(self.detect_button)
        panel_buttons.addWidget(self.ok_button)
        panel_layout.addLayout(panel_buttons)
        self.detect_button.clicked.connect(self.start_detection)
        self.ok_button.clicked.connect(self.accept_detection)
        self.toolbar = QWidget(self.stage)
        self.toolbar.setObjectName("bottomToolbar")
        self.toolbar.setStyleSheet(
            """
            QWidget#bottomToolbar {
                background: rgba(20, 20, 20, 175);
                border-radius: 8px;
            }
            QToolButton {
                min-width: 44px;
                min-height: 38px;
                padding: 4px;
            }
            """
        )
        toolbar_layout = QHBoxLayout(self.toolbar)
        toolbar_layout.setContentsMargins(8, 6, 8, 6)
        self.photo_button = self._tool_button(QStyle.SP_DialogSaveButton, "Zrob zdjecie")
        self.record_button = self._tool_button(QStyle.SP_MediaPlay, "Start/stop nagrywania")
        self.settings_button = self._tool_button(QStyle.SP_FileDialogDetailedView, "Ustawienia obrazu")
        toolbar_layout.addWidget(self.photo_button)
        toolbar_layout.addWidget(self.record_button)
        toolbar_layout.addWidget(self.settings_button)
        self.photo_button.clicked.connect(self.take_photo)
        self.record_button.clicked.connect(self.toggle_recording)
        self.settings_button.clicked.connect(self.open_settings)
        quit_action = QAction("Zamknij", self)
        quit_action.triggered.connect(self.close)
        self.addAction(quit_action)
    def _tool_button(self, icon_id: QStyle.StandardPixmap, tooltip: str) -> QToolButton:
        button = QToolButton()
        button.setIcon(self.style().standardIcon(icon_id))
        button.setToolTip(tooltip)
        return button
    def resizeEvent(self, event: Any) -> None:
        super().resizeEvent(event)
        self.video_label.setGeometry(self.stage.rect())
        panel_width = min(420, max(280, self.stage.width() // 3))
        self.result_panel.setGeometry(self.stage.width() - panel_width - 18, 18, panel_width, 190)
        self.toolbar.adjustSize()
        toolbar_size = self.toolbar.sizeHint()
        self.toolbar.setGeometry(
            (self.stage.width() - toolbar_size.width()) // 2,
            self.stage.height() - toolbar_size.height() - 18,
            toolbar_size.width(),
            toolbar_size.height(),
        )
    def closeEvent(self, event: Any) -> None:
        if self.video_recorder.is_recording:
            self.video_recorder.stop(self.current_metadata("video"))
        self.worker.stop()
        self.worker.wait(2000)
        super().closeEvent(event)
    @Slot(object)
    def on_frame_ready(self, frame: np.ndarray) -> None:
        self.last_frame = frame.copy()
        if self.video_recorder.is_recording:
            self.video_recorder.write(frame)
        self._show_frame(frame)
    @Slot(object)
    def on_detection_ready(self, result: DetectionResult) -> None:
        self.last_detection = result
        self.overlay_result = result if result.xyxy else None
        self._update_result_text(result)
    @Slot(str)
    def on_camera_error(self, message: str) -> None:
        self.result_text.setPlainText(message)
    def start_detection(self) -> None:
        self.overlay_result = None
        self.result_text.setPlainText("Wykrywanie...")
        self.worker.start_detection()
    def accept_detection(self) -> None:
        self.worker.accept_detection()
        self.overlay_result = None
        if self.last_detection:
            self._update_result_text(self.last_detection, accepted=True)
    def take_photo(self) -> None:
        if self.last_frame is None:
            QMessageBox.warning(self, "Zdjecie", "Brak klatki z kamery")
            return
        path = self.media_store.save_photo(self.last_frame, self.current_metadata("photo"))
        self.statusBar().showMessage(f"Zapisano zdjecie: {path}", 5000)
    def toggle_recording(self) -> None:
        if self.last_frame is None:
            QMessageBox.warning(self, "Wideo", "Brak klatki z kamery")
            return
        if self.video_recorder.is_recording:
            path = self.video_recorder.stop(self.current_metadata("video"))
            self.record_button.setIcon(self.style().standardIcon(QStyle.SP_MediaPlay))
            self.statusBar().showMessage(f"Zapisano film: {path}", 5000)
            return
        try:
            path = self.video_recorder.start(self.last_frame)
        except RuntimeError as exc:
            QMessageBox.warning(self, "Wideo", str(exc))
            return
        self.record_button.setIcon(self.style().standardIcon(QStyle.SP_MediaStop))
        self.statusBar().showMessage(f"Nagrywanie: {path}", 5000)
    def open_settings(self) -> None:
        dialog = SettingsDialog(self.config, self)
        dialog.settings_saved.connect(self.save_camera_settings)
        dialog.exec()
    @Slot(dict)
    def save_camera_settings(self, camera_config: dict[str, Any]) -> None:
        self.config["camera"] = camera_config
        self.app_config.save(self.config)
        self.worker.update_camera_config(camera_config)
    def current_metadata(self, media_type: str) -> dict[str, Any]:
        return {
            "media_type": media_type,
            "created_at": datetime.now().isoformat(timespec="seconds"),
            "detection": self.last_detection.to_metadata() if self.last_detection else None,
            "camera": {
                "width": self.config["camera"].get("width"),
                "height": self.config["camera"].get("height"),
                "fps": self.config["camera"].get("fps"),
                "properties": self.config["camera"].get("properties", {}),
            },
            "detection_config": self.config.get("detection", {}),
        }
    def _update_result_text(self, result: DetectionResult, accepted: bool = False) -> None:
        status = "Zatwierdzono" if accepted else "Wynik"
        lines = [status]
        if result.error:
            lines.append(f"Komunikat: {result.error}")
        if result.confidence is not None:
            lines.append(f"YOLO confidence: {result.confidence:.3f}")
        if result.parsed:
            lines.append(f"Zamowienie: {result.parsed.order_number or '-'}")
            lines.append(f"Kolor: {result.parsed.color_code or '-'}")
            lines.append(f"Model: {result.parsed.product_model or '-'}")
        if result.raw_text:
            lines.append("")
            lines.append(result.raw_text)
        self.result_text.setPlainText("\n".join(lines))
    def _show_frame(self, frame_bgr: np.ndarray) -> None:
        display_frame = frame_bgr.copy()
        if self.overlay_result is not None:
            self._draw_detection(display_frame, self.overlay_result)
        frame_rgb = cv2.cvtColor(display_frame, cv2.COLOR_BGR2RGB)
        h, w, channels = frame_rgb.shape
        image = QImage(frame_rgb.data, w, h, channels * w, QImage.Format_RGB888).copy()
        pixmap = QPixmap.fromImage(image)
        self.video_label.setPixmap(
            pixmap.scaled(self.video_label.size(), Qt.KeepAspectRatio, Qt.SmoothTransformation)
        )
    def _draw_detection(self, frame_bgr: np.ndarray, result: DetectionResult) -> None:
        mode = self.config["detection"].get("mode", "best")
        boxes = result.all_boxes if mode == "all" else [result.to_metadata()]
        for item in boxes:
            xyxy = item.get("xyxy") or item.get("bbox_xyxy")
            if not xyxy:
                continue
            x1, y1, x2, y2 = [int(value) for value in xyxy]
            confidence = item.get("confidence")
            class_name = item.get("class_name") or "label"
            cv2.rectangle(frame_bgr, (x1, y1), (x2, y2), (0, 220, 0), 3)
            caption = f"{class_name} {confidence:.2f}" if confidence is not None else class_name
            cv2.putText(
                frame_bgr,
                caption,
                (x1, max(24, y1 - 8)),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.8,
                (0, 220, 0),
                2,
                cv2.LINE_AA,
            )
 def run_app(app_config: AppConfig) -> int:
    app = QApplication([])
    window = MainWindow(app_config)
    window.show()
    return app.exec()
--- a/app/media.py
+++ b/app/media.py
@@ -0,0 +1,101 @@
 from __future__ import annotations
 import json
 from datetime import datetime
 from pathlib import Path
 from typing import Any
 import cv2
 import numpy as np
 def timestamp_name() -> str:
    return datetime.now().strftime("%Y%m%d_%H%M%S")
 def write_metadata(media_path: Path, metadata: dict[str, Any]) -> Path:
    json_path = media_path.with_suffix(".json")
    with json_path.open("w", encoding="utf-8") as metadata_file:
        json.dump(metadata, metadata_file, indent=2, ensure_ascii=False)
        metadata_file.write("\n")
    return json_path
 class MediaStore:
    def __init__(self, config: dict[str, Any], app_config: Any) -> None:
        self.config = config
        self.app_config = app_config
    def photo_path(self) -> Path:
        capture_cfg = self.config["capture"]
        directory = self.app_config.resolve_path(capture_cfg["photos_dir"])
        directory.mkdir(parents=True, exist_ok=True)
        extension = capture_cfg.get("image_extension", "jpg").lstrip(".")
        return directory / f"{timestamp_name()}.{extension}"
    def video_path(self) -> Path:
        capture_cfg = self.config["capture"]
        directory = self.app_config.resolve_path(capture_cfg["videos_dir"])
        directory.mkdir(parents=True, exist_ok=True)
        extension = capture_cfg.get("video_extension", "mp4").lstrip(".")
        return directory / f"{timestamp_name()}.{extension}"
    def save_photo(self, frame_bgr: np.ndarray, metadata: dict[str, Any]) -> Path:
        path = self.photo_path()
        cv2.imwrite(str(path), frame_bgr)
        write_metadata(path, metadata)
        return path
 class VideoRecorder:
    def __init__(self, config: dict[str, Any], app_config: Any) -> None:
        self.config = config
        self.app_config = app_config
        self.path: Path | None = None
        self.writer: cv2.VideoWriter | None = None
        self.started_at: str | None = None
    @property
    def is_recording(self) -> bool:
        return self.writer is not None
    def start(self, frame_bgr: np.ndarray) -> Path:
        if self.writer is not None:
            raise RuntimeError("Nagrywanie juz trwa")
        capture_cfg = self.config["capture"]
        self.path = MediaStore(self.config, self.app_config).video_path()
        h, w = frame_bgr.shape[:2]
        fps = float(self.config["camera"].get("fps", 30))
        codec = str(capture_cfg.get("video_codec", "mp4v"))
        fourcc = cv2.VideoWriter_fourcc(*codec[:4])
        self.writer = cv2.VideoWriter(str(self.path), fourcc, fps, (w, h))
        if not self.writer.isOpened():
            self.writer = None
            raise RuntimeError("Nie mozna uruchomic zapisu wideo")
        self.started_at = datetime.now().isoformat(timespec="seconds")
        self.write(frame_bgr)
        return self.path
    def write(self, frame_bgr: np.ndarray) -> None:
        if self.writer is not None:
            self.writer.write(frame_bgr)
    def stop(self, metadata: dict[str, Any]) -> Path | None:
        if self.writer is None:
            return None
        self.writer.release()
        self.writer = None
        path = self.path
        if path is not None:
            metadata = {
                **metadata,
                "recording": {
                    "started_at": self.started_at,
                    "stopped_at": datetime.now().isoformat(timespec="seconds"),
                },
            }
            write_metadata(path, metadata)
        self.path = None
        self.started_at = None
        return path
--- a/app/settings_dialog.py
+++ b/app/settings_dialog.py
@@ -0,0 +1,125 @@
 from __future__ import annotations
 from copy import deepcopy
 from typing import Any
 from PySide6.QtCore import Qt, Signal
 from PySide6.QtWidgets import (
    QCheckBox,
    QDialog,
    QFormLayout,
    QHBoxLayout,
    QLabel,
    QPushButton,
    QSlider,
    QSpinBox,
    QVBoxLayout,
    QWidget,
 )
 CAMERA_PROPERTY_LABELS = {
    "brightness": "Jasnosc",
    "contrast": "Kontrast",
    "saturation": "Nasycenie",
    "hue": "Barwa",
    "gain": "Gain",
    "exposure": "Ekspozycja",
    "sharpness": "Ostrosc",
    "auto_exposure": "Auto ekspozycja",
    "focus": "Focus",
    "auto_focus": "Auto focus",
 }
 class PropertySlider(QWidget):
    value_changed = Signal(str, object)
    def __init__(self, name: str, value: float | None) -> None:
        super().__init__()
        self.name = name
        self.enabled_box = QCheckBox()
        self.enabled_box.setChecked(value is not None)
        self.slider = QSlider(Qt.Horizontal)
        self.slider.setRange(-100, 100)
        self.slider.setValue(int(value) if value is not None else 0)
        self.value_box = QSpinBox()
        self.value_box.setRange(-100, 100)
        self.value_box.setValue(self.slider.value())
        layout = QHBoxLayout(self)
        layout.setContentsMargins(0, 0, 0, 0)
        layout.addWidget(self.enabled_box)
        layout.addWidget(self.slider, 1)
        layout.addWidget(self.value_box)
        self.slider.valueChanged.connect(self.value_box.setValue)
        self.value_box.valueChanged.connect(self.slider.setValue)
        self.slider.valueChanged.connect(self._emit_value)
        self.enabled_box.toggled.connect(self._emit_value)
    def _emit_value(self) -> None:
        self.value_changed.emit(self.name, self.value())
    def value(self) -> float | None:
        if not self.enabled_box.isChecked():
            return None
        return float(self.slider.value())
 class SettingsDialog(QDialog):
    settings_saved = Signal(dict)
    def __init__(self, config: dict[str, Any], parent: QWidget | None = None) -> None:
        super().__init__(parent)
        self.setWindowTitle("Ustawienia obrazu")
        self.setMinimumWidth(520)
        self.config = deepcopy(config)
        self.property_widgets: dict[str, PropertySlider] = {}
        camera_cfg = self.config["camera"]
        main_layout = QVBoxLayout(self)
        form = QFormLayout()
        self.width_box = QSpinBox()
        self.width_box.setRange(160, 7680)
        self.width_box.setValue(int(camera_cfg.get("width", 1920)))
        self.height_box = QSpinBox()
        self.height_box.setRange(120, 4320)
        self.height_box.setValue(int(camera_cfg.get("height", 1080)))
        self.fps_box = QSpinBox()
        self.fps_box.setRange(1, 240)
        self.fps_box.setValue(int(camera_cfg.get("fps", 30)))
        form.addRow("Szerokosc", self.width_box)
        form.addRow("Wysokosc", self.height_box)
        form.addRow("FPS", self.fps_box)
        for name, label in CAMERA_PROPERTY_LABELS.items():
            widget = PropertySlider(name, camera_cfg.get("properties", {}).get(name))
            self.property_widgets[name] = widget
            form.addRow(QLabel(label), widget)
        main_layout.addLayout(form)
        buttons = QHBoxLayout()
        buttons.addStretch(1)
        cancel_button = QPushButton("Anuluj")
        save_button = QPushButton("Zapisz")
        save_button.setDefault(True)
        buttons.addWidget(cancel_button)
        buttons.addWidget(save_button)
        main_layout.addLayout(buttons)
        cancel_button.clicked.connect(self.reject)
        save_button.clicked.connect(self._save)
    def _save(self) -> None:
        self.config["camera"]["width"] = int(self.width_box.value())
        self.config["camera"]["height"] = int(self.height_box.value())
        self.config["camera"]["fps"] = int(self.fps_box.value())
        self.config["camera"]["properties"] = {
            name: widget.value() for name, widget in self.property_widgets.items()
        }
        self.settings_saved.emit(self.config["camera"])
        self.accept()
--- a/app_config.json
+++ b/app_config.json
@@ -0,0 +1,53 @@
 {
  "camera": {
    "index": 0,
    "width": 1920,
    "height": 1080,
    "fps": 30,
    "backend": "auto",
    "properties": {
      "brightness": null,
      "contrast": null,
      "saturation": null,
      "hue": null,
      "gain": null,
      "exposure": null,
      "sharpness": null,
      "auto_exposure": null,
      "focus": null,
      "auto_focus": null
    }
  },
  "detection": {
    "model_path": "models/best.pt",
    "confidence_threshold": 0.25,
    "mode": "best",
    "frame_stride": 5,
    "image_size": 640,
    "device": "cpu"
  },
  "ocr": {
    "enabled": true,
    "language": "eng",
    "tesseract_cmd": null,
    "threshold": true,
    "scale": 2.0
  },
  "capture": {
    "photos_dir": "captures/photos",
    "videos_dir": "captures/videos",
    "image_extension": "jpg",
    "video_extension": "mp4",
    "video_codec": "mp4v"
  },
  "label_data": {
    "models": [
      "Regius",
      "Duvell"
    ],
    "colors": [
      "T-NF-BLK-OUT-BST-G",
      "T-BLK-G"
    ]
  }
 }
--- a/captures/photos/.gitkeep
+++ b/captures/photos/.gitkeep
@@ -0,0 +1 @@
--- a/captures/videos/.gitkeep
+++ b/captures/videos/.gitkeep
@@ -0,0 +1 @@
--- a/main.py
+++ b/main.py
@@ -0,0 +1,5 @@
 from app.main import main
 if __name__ == "__main__":
    main()
--- a/models/.gitkeep
+++ b/models/.gitkeep
@@ -0,0 +1 @@
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,15 @@
 [project]
 name = "duck-stain-yolo"
 version = "0.1.0"
 description = "PySide6 camera app for YOLO label detection and OCR metadata capture."
 requires-python = ">=3.10"
 dependencies = [
  "PySide6>=6.6",
  "opencv-python>=4.8",
  "ultralytics>=8.0",
  "pytesseract>=0.3",
  "numpy>=1.26",
 ]
 [tool.ruff]
 line-length = 100
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,9 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 PySide6>=6.6
 opencv-python>=4.8
 torch==2.5.1+cpu; platform_system == "Linux"
 torchvision==0.20.1+cpu; platform_system == "Linux"
 ultralytics>=8.0
 pytesseract>=0.3
 numpy>=1.26