Initial MVP application skeleton

Add PySide6 camera UI, YOLO/Tesseract detection pipeline, capture metadata, configuration, and project gitignore.
2026-05-07 00:18:38 +02:00
commit 090865af76
18 changed files with 1140 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,31 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+.pytest_cache/
+.ruff_cache/
+.mypy_cache/
+
+# Virtual environments
+.venv/
+.venv-*/
+venv/
+env/
+
+# Local/runtime data
+captures/photos/*
+captures/videos/*
+!captures/photos/.gitkeep
+!captures/videos/.gitkeep
+models/*
+!models/.gitkeep
+
+# OS/editor
+.DS_Store
+.idea/
+.vscode/
+
+# Ultralytics/runtime caches
+runs/
+*.onnx
+*.engine
--- a/README.md
+++ b/README.md
@@ -0,0 +1,42 @@
+# Duck Stain YOLO
+
+MVP aplikacji okienkowej do podgladu kamery USB, wykrywania etykiety modelem YOLOv8 i zapisu zdjec/filmow z metadanymi JSON.
+
+## Uruchomienie
+
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+python main.py
+```
+
+Na macOS z Tesseractem:
+
+```bash
+brew install tesseract
+```
+
+Na Ubuntu/WSL:
+
+```bash
+sudo apt install tesseract-ocr
+```
+
+Na Linuksie `requirements.txt` wymusza CPU build PyTorch, zeby nie pobierac wariantu CUDA. Na macOS pip zainstaluje standardowy wariant CPU dla procesora Intel.
+
+Umiesc wytrenowany model jako `models/best.pt` albo zmien `detection.model_path` w `app_config.json`.
+
+## Konfiguracja
+
+Glowny plik konfiguracji: `app_config.json`.
+
+Istotne ustawienia:
+
+- `camera.width`, `camera.height`, `camera.fps` - rozdzielczosc i FPS kamery.
+- `camera.properties` - parametry OpenCV ustawiane na kamerze, np. jasnosc, kontrast, ekspozycja. `null` oznacza brak wymuszania wartosci.
+- `detection.mode` - `best` rysuje najlepsza etykiete, `all` rysuje wszystkie wykrycia.
+- `detection.frame_stride` - YOLO uruchamiany co N klatek podczas aktywnego wykrywania.
+- `label_data.models`, `label_data.colors` - slowniki do walidacji tekstu z etykiety.
+
+Zdjecia trafiaja do `captures/photos`, filmy do `captures/videos`. Obok kazdego pliku media zapisywany jest JSON z aktualnym wynikiem detekcji/OCR.
--- a/app/init.py
+++ b/app/init.py
@@ -0,0 +1 @@
+__all__ = []
--- a/app/camera.py
+++ b/app/camera.py
@@ -0,0 +1,129 @@
+from __future__ import annotations
+
+import threading
+import time
+from typing import Any
+
+import cv2
+import numpy as np
+from PySide6.QtCore import QThread, Signal, Slot
+
+from app.detection import DetectionPipeline, DetectionResult
+
+
+CV_CAP_PROPS = {
+    "brightness": cv2.CAP_PROP_BRIGHTNESS,
+    "contrast": cv2.CAP_PROP_CONTRAST,
+    "saturation": cv2.CAP_PROP_SATURATION,
+    "hue": cv2.CAP_PROP_HUE,
+    "gain": cv2.CAP_PROP_GAIN,
+    "exposure": cv2.CAP_PROP_EXPOSURE,
+    "sharpness": cv2.CAP_PROP_SHARPNESS,
+    "auto_exposure": cv2.CAP_PROP_AUTO_EXPOSURE,
+    "focus": cv2.CAP_PROP_FOCUS,
+    "auto_focus": cv2.CAP_PROP_AUTOFOCUS,
+}
+
+
+def backend_for_name(name: str) -> int:
+    if name == "avfoundation":
+        return cv2.CAP_AVFOUNDATION
+    if name == "v4l2":
+        return cv2.CAP_V4L2
+    if name == "dshow":
+        return cv2.CAP_DSHOW
+    return cv2.CAP_ANY
+
+
+class CameraWorker(QThread):
+    frame_ready = Signal(object)
+    detection_ready = Signal(object)
+    camera_error = Signal(str)
+
+    def __init__(self, config: dict[str, Any], app_config: Any) -> None:
+        super().__init__()
+        self.config = config
+        self.app_config = app_config
+        self.pipeline = DetectionPipeline(config, app_config)
+        self._running = threading.Event()
+        self._running.set()
+        self._detecting = False
+        self._accepted = False
+        self._frame_count = 0
+        self._capture: cv2.VideoCapture | None = None
+        self._lock = threading.Lock()
+
+    def stop(self) -> None:
+        self._running.clear()
+
+    @Slot()
+    def start_detection(self) -> None:
+        with self._lock:
+            self._detecting = True
+            self._accepted = False
+            self._frame_count = 0
+
+    @Slot()
+    def accept_detection(self) -> None:
+        with self._lock:
+            self._detecting = False
+            self._accepted = True
+
+    @Slot(dict)
+    def update_camera_config(self, camera_config: dict[str, Any]) -> None:
+        with self._lock:
+            self.config["camera"] = camera_config
+            capture = self._capture
+        if capture is not None:
+            self._apply_camera_settings(capture)
+
+    def run(self) -> None:
+        camera_cfg = self.config["camera"]
+        capture = cv2.VideoCapture(
+            int(camera_cfg.get("index", 0)),
+            backend_for_name(str(camera_cfg.get("backend", "auto"))),
+        )
+        self._capture = capture
+        if not capture.isOpened():
+            self.camera_error.emit("Nie mozna otworzyc kamery USB")
+            return
+
+        self._apply_camera_settings(capture)
+
+        try:
+            while self._running.is_set():
+                ok, frame = capture.read()
+                if not ok or frame is None:
+                    self.camera_error.emit("Nie mozna odczytac klatki z kamery")
+                    time.sleep(0.2)
+                    continue
+
+                self.frame_ready.emit(frame)
+                self._maybe_detect(frame)
+        finally:
+            capture.release()
+            self._capture = None
+
+    def _apply_camera_settings(self, capture: cv2.VideoCapture) -> None:
+        camera_cfg = self.config["camera"]
+        capture.set(cv2.CAP_PROP_FRAME_WIDTH, int(camera_cfg.get("width", 1920)))
+        capture.set(cv2.CAP_PROP_FRAME_HEIGHT, int(camera_cfg.get("height", 1080)))
+        capture.set(cv2.CAP_PROP_FPS, int(camera_cfg.get("fps", 30)))
+
+        for name, value in camera_cfg.get("properties", {}).items():
+            if value is None or name not in CV_CAP_PROPS:
+                continue
+            capture.set(CV_CAP_PROPS[name], float(value))
+
+    def _maybe_detect(self, frame: np.ndarray) -> None:
+        with self._lock:
+            detecting = self._detecting and not self._accepted
+            frame_stride = max(1, int(self.config["detection"].get("frame_stride", 5)))
+            self._frame_count += 1
+            should_detect = detecting and self._frame_count % frame_stride == 0
+
+        if not should_detect:
+            return
+
+        result: DetectionResult = self.pipeline.process(frame)
+        self.detection_ready.emit(result)
--- a/app/config.py
+++ b/app/config.py
@@ -0,0 +1,96 @@
+from __future__ import annotations
+
+import json
+from copy import deepcopy
+from pathlib import Path
+from typing import Any
+
+
+APP_ROOT = Path(__file__).resolve().parent.parent
+CONFIG_PATH = APP_ROOT / "app_config.json"
+
+
+DEFAULT_CONFIG: dict[str, Any] = {
+    "camera": {
+        "index": 0,
+        "width": 1920,
+        "height": 1080,
+        "fps": 30,
+        "backend": "auto",
+        "properties": {
+            "brightness": None,
+            "contrast": None,
+            "saturation": None,
+            "hue": None,
+            "gain": None,
+            "exposure": None,
+            "sharpness": None,
+            "auto_exposure": None,
+            "focus": None,
+            "auto_focus": None,
+        },
+    },
+    "detection": {
+        "model_path": "models/best.pt",
+        "confidence_threshold": 0.25,
+        "mode": "best",
+        "frame_stride": 5,
+        "image_size": 640,
+        "device": "cpu",
+    },
+    "ocr": {
+        "enabled": True,
+        "language": "eng",
+        "tesseract_cmd": None,
+        "threshold": True,
+        "scale": 2.0,
+    },
+    "capture": {
+        "photos_dir": "captures/photos",
+        "videos_dir": "captures/videos",
+        "image_extension": "jpg",
+        "video_extension": "mp4",
+        "video_codec": "mp4v",
+    },
+    "label_data": {"models": ["Regius", "Duvell"], "colors": ["T-NF-BLK-OUT-BST-G", "T-BLK-G"]},
+}
+
+
+def deep_merge(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
+    result = deepcopy(base)
+    for key, value in override.items():
+        if isinstance(value, dict) and isinstance(result.get(key), dict):
+            result[key] = deep_merge(result[key], value)
+        else:
+            result[key] = value
+    return result
+
+
+class AppConfig:
+    def __init__(self, path: Path = CONFIG_PATH) -> None:
+        self.path = path
+        self.data = self.load()
+
+    def load(self) -> dict[str, Any]:
+        if not self.path.exists():
+            self.path.parent.mkdir(parents=True, exist_ok=True)
+            self.save(DEFAULT_CONFIG)
+            return deepcopy(DEFAULT_CONFIG)
+
+        with self.path.open("r", encoding="utf-8") as config_file:
+            loaded = json.load(config_file)
+        return deep_merge(DEFAULT_CONFIG, loaded)
+
+    def save(self, data: dict[str, Any] | None = None) -> None:
+        if data is not None:
+            self.data = data
+        self.path.parent.mkdir(parents=True, exist_ok=True)
+        with self.path.open("w", encoding="utf-8") as config_file:
+            json.dump(self.data, config_file, indent=2, ensure_ascii=False)
+            config_file.write("\n")
+
+    def resolve_path(self, configured_path: str) -> Path:
+        path = Path(configured_path)
+        if path.is_absolute():
+            return path
+        return APP_ROOT / path
--- a/app/detection.py
+++ b/app/detection.py
@@ -0,0 +1,183 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+import cv2
+import numpy as np
+
+from app.label_parser import ParsedLabel, parse_label_text
+
+
+@dataclass
+class DetectionResult:
+    xyxy: tuple[int, int, int, int] | None = None
+    confidence: float | None = None
+    class_name: str | None = None
+    raw_text: str = ""
+    parsed: ParsedLabel | None = None
+    error: str | None = None
+    all_boxes: list[dict[str, Any]] = field(default_factory=list)
+
+    def to_metadata(self) -> dict[str, Any]:
+        return {
+            "bbox_xyxy": list(self.xyxy) if self.xyxy else None,
+            "confidence": self.confidence,
+            "class_name": self.class_name,
+            "raw_text": self.raw_text,
+            "parsed": self.parsed.to_dict() if self.parsed else None,
+            "error": self.error,
+            "all_boxes": self.all_boxes,
+        }
+
+
+class YoloLabelDetector:
+    def __init__(self, config: dict[str, Any], app_config: Any) -> None:
+        self.config = config
+        self.app_config = app_config
+        self.model = None
+        self.load_error: str | None = None
+        self._load_model()
+
+    def _load_model(self) -> None:
+        model_path = self.app_config.resolve_path(self.config["detection"]["model_path"])
+        if not model_path.exists():
+            self.load_error = f"Brak modelu: {model_path}"
+            return
+
+        try:
+            from ultralytics import YOLO
+
+            self.model = YOLO(str(model_path))
+        except Exception as exc:  # pragma: no cover - depends on optional runtime deps
+            self.load_error = f"Nie mozna zaladowac YOLO: {exc}"
+
+    def detect(self, frame_bgr: np.ndarray) -> DetectionResult:
+        if self.model is None:
+            return DetectionResult(error=self.load_error or "Model YOLO nie jest zaladowany")
+
+        detection_cfg = self.config["detection"]
+        try:
+            results = self.model.predict(
+                source=frame_bgr,
+                conf=float(detection_cfg["confidence_threshold"]),
+                imgsz=int(detection_cfg["image_size"]),
+                device=detection_cfg.get("device", "cpu"),
+                verbose=False,
+            )
+        except Exception as exc:  # pragma: no cover - depends on model runtime
+            return DetectionResult(error=f"Blad YOLO: {exc}")
+
+        boxes = []
+        names = getattr(self.model, "names", {})
+        for result in results:
+            for box in result.boxes:
+                x1, y1, x2, y2 = [int(v) for v in box.xyxy[0].tolist()]
+                confidence = float(box.conf[0])
+                class_id = int(box.cls[0]) if box.cls is not None else -1
+                class_name = names.get(class_id, str(class_id)) if isinstance(names, dict) else str(class_id)
+                boxes.append(
+                    {
+                        "xyxy": (x1, y1, x2, y2),
+                        "confidence": confidence,
+                        "class_name": class_name,
+                    }
+                )
+
+        if not boxes:
+            return DetectionResult(error="Nie wykryto etykiety")
+
+        boxes.sort(key=lambda item: item["confidence"], reverse=True)
+        selected = boxes[0]
+        result = DetectionResult(
+            xyxy=selected["xyxy"],
+            confidence=selected["confidence"],
+            class_name=selected["class_name"],
+            all_boxes=[
+                {
+                    "xyxy": list(item["xyxy"]),
+                    "confidence": item["confidence"],
+                    "class_name": item["class_name"],
+                }
+                for item in boxes
+            ],
+        )
+        return result
+
+
+class TesseractOcr:
+    def __init__(self, config: dict[str, Any]) -> None:
+        self.config = config
+        self.load_error: str | None = None
+        self.pytesseract = None
+        self._load()
+
+    def _load(self) -> None:
+        if not self.config["ocr"].get("enabled", True):
+            return
+        try:
+            import pytesseract
+
+            command = self.config["ocr"].get("tesseract_cmd")
+            if command:
+                pytesseract.pytesseract.tesseract_cmd = command
+            self.pytesseract = pytesseract
+        except Exception as exc:
+            self.load_error = f"Nie mozna zaladowac pytesseract: {exc}"
+
+    def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> tuple[str, str | None]:
+        if not self.config["ocr"].get("enabled", True):
+            return "", None
+        if self.pytesseract is None:
+            return "", self.load_error or "OCR nie jest zaladowany"
+
+        x1, y1, x2, y2 = bbox
+        h, w = frame_bgr.shape[:2]
+        x1, y1 = max(0, x1), max(0, y1)
+        x2, y2 = min(w, x2), min(h, y2)
+        if x2 <= x1 or y2 <= y1:
+            return "", "Nieprawidlowy bbox OCR"
+
+        roi = frame_bgr[y1:y2, x1:x2]
+        scale = float(self.config["ocr"].get("scale", 1.0))
+        if scale != 1.0:
+            roi = cv2.resize(roi, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
+
+        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
+        if self.config["ocr"].get("threshold", True):
+            gray = cv2.GaussianBlur(gray, (3, 3), 0)
+            gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
+
+        try:
+            text = self.pytesseract.image_to_string(
+                gray,
+                lang=self.config["ocr"].get("language", "eng"),
+                config="--psm 6",
+            )
+        except Exception as exc:
+            return "", f"Blad OCR: {exc}"
+        return text, None
+
+
+class DetectionPipeline:
+    def __init__(self, config: dict[str, Any], app_config: Any) -> None:
+        self.config = config
+        self.detector = YoloLabelDetector(config, app_config)
+        self.ocr = TesseractOcr(config)
+
+    def process(self, frame_bgr: np.ndarray) -> DetectionResult:
+        result = self.detector.detect(frame_bgr)
+        if result.xyxy is None:
+            return result
+
+        text, ocr_error = self.ocr.read_label(frame_bgr, result.xyxy)
+        result.raw_text = text
+        result.parsed = parse_label_text(
+            text,
+            self.config["label_data"].get("colors", []),
+            self.config["label_data"].get("models", []),
+        )
+        if ocr_error:
+            result.error = ocr_error
+        return result
--- a/app/label_parser.py
+++ b/app/label_parser.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass, asdict
+
+
+ORDER_RE = re.compile(r"\b(?P<order>\d{4}/\d{4}/(?:[1-9]|[1-9]\d))\b")
+
+
+@dataclass
+class ParsedLabel:
+    order_number: str | None
+    color_code: str | None
+    product_model: str | None
+    raw_text: str
+
+    def to_dict(self) -> dict[str, str | None]:
+        return asdict(self)
+
+
+def normalize_ocr_text(text: str) -> str:
+    return " ".join(text.replace("\n", " ").replace("\r", " ").split())
+
+
+def parse_label_text(text: str, known_colors: list[str], known_models: list[str]) -> ParsedLabel:
+    normalized = normalize_ocr_text(text)
+    order_match = ORDER_RE.search(normalized)
+
+    normalized_upper = normalized.upper()
+    color_code = next(
+        (color for color in known_colors if color.upper() in normalized_upper),
+        None,
+    )
+    product_model = next(
+        (model for model in known_models if re.search(rf"\b{re.escape(model)}\b", normalized, re.I)),
+        None,
+    )
+
+    return ParsedLabel(
+        order_number=order_match.group("order") if order_match else None,
+        color_code=color_code,
+        product_model=product_model,
+        raw_text=normalized,
+    )
--- a/app/main.py
+++ b/app/main.py
@@ -0,0 +1,11 @@
+from __future__ import annotations
+
+import sys
+
+from app.config import AppConfig
+from app.main_window import run_app
+
+
+def main() -> None:
+    app_config = AppConfig()
+    sys.exit(run_app(app_config))
--- a/app/main_window.py
+++ b/app/main_window.py
@@ -0,0 +1,292 @@
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any
+
+import cv2
+import numpy as np
+from PySide6.QtCore import Qt, Slot
+from PySide6.QtGui import QAction, QImage, QPixmap
+from PySide6.QtWidgets import (
+    QApplication,
+    QHBoxLayout,
+    QLabel,
+    QMainWindow,
+    QMessageBox,
+    QPushButton,
+    QTextEdit,
+    QToolButton,
+    QVBoxLayout,
+    QWidget,
+    QStyle,
+)
+
+from app.camera import CameraWorker
+from app.config import AppConfig
+from app.detection import DetectionResult
+from app.media import MediaStore, VideoRecorder
+from app.settings_dialog import SettingsDialog
+
+
+class MainWindow(QMainWindow):
+    def __init__(self, app_config: AppConfig) -> None:
+        super().__init__()
+        self.app_config = app_config
+        self.config = app_config.data
+        self.last_frame: np.ndarray | None = None
+        self.overlay_result: DetectionResult | None = None
+        self.last_detection: DetectionResult | None = None
+        self.media_store = MediaStore(self.config, self.app_config)
+        self.video_recorder = VideoRecorder(self.config, self.app_config)
+
+        self.setWindowTitle("Duck Stain YOLO")
+        self.resize(1280, 720)
+        self._build_ui()
+
+        self.worker = CameraWorker(self.config, self.app_config)
+        self.worker.frame_ready.connect(self.on_frame_ready)
+        self.worker.detection_ready.connect(self.on_detection_ready)
+        self.worker.camera_error.connect(self.on_camera_error)
+        self.worker.start()
+
+    def _build_ui(self) -> None:
+        self.stage = QWidget()
+        self.setCentralWidget(self.stage)
+
+        self.video_label = QLabel(self.stage)
+        self.video_label.setAlignment(Qt.AlignCenter)
+        self.video_label.setStyleSheet("background: #111; color: #ddd;")
+        self.video_label.setText("Kamera")
+
+        self.result_panel = QWidget(self.stage)
+        self.result_panel.setObjectName("resultPanel")
+        self.result_panel.setStyleSheet(
+            """
+            QWidget#resultPanel {
+                background: rgba(20, 20, 20, 170);
+                border-radius: 8px;
+            }
+            QTextEdit {
+                background: transparent;
+                color: white;
+                border: 0;
+                font-size: 13px;
+            }
+            QPushButton {
+                min-height: 28px;
+                padding: 4px 12px;
+            }
+            """
+        )
+        panel_layout = QVBoxLayout(self.result_panel)
+        self.result_text = QTextEdit()
+        self.result_text.setReadOnly(True)
+        self.result_text.setFixedHeight(118)
+        panel_layout.addWidget(self.result_text)
+        panel_buttons = QHBoxLayout()
+        self.detect_button = QPushButton("wykryj")
+        self.ok_button = QPushButton("ok")
+        panel_buttons.addStretch(1)
+        panel_buttons.addWidget(self.detect_button)
+        panel_buttons.addWidget(self.ok_button)
+        panel_layout.addLayout(panel_buttons)
+        self.detect_button.clicked.connect(self.start_detection)
+        self.ok_button.clicked.connect(self.accept_detection)
+
+        self.toolbar = QWidget(self.stage)
+        self.toolbar.setObjectName("bottomToolbar")
+        self.toolbar.setStyleSheet(
+            """
+            QWidget#bottomToolbar {
+                background: rgba(20, 20, 20, 175);
+                border-radius: 8px;
+            }
+            QToolButton {
+                min-width: 44px;
+                min-height: 38px;
+                padding: 4px;
+            }
+            """
+        )
+        toolbar_layout = QHBoxLayout(self.toolbar)
+        toolbar_layout.setContentsMargins(8, 6, 8, 6)
+        self.photo_button = self._tool_button(QStyle.SP_DialogSaveButton, "Zrob zdjecie")
+        self.record_button = self._tool_button(QStyle.SP_MediaPlay, "Start/stop nagrywania")
+        self.settings_button = self._tool_button(QStyle.SP_FileDialogDetailedView, "Ustawienia obrazu")
+        toolbar_layout.addWidget(self.photo_button)
+        toolbar_layout.addWidget(self.record_button)
+        toolbar_layout.addWidget(self.settings_button)
+        self.photo_button.clicked.connect(self.take_photo)
+        self.record_button.clicked.connect(self.toggle_recording)
+        self.settings_button.clicked.connect(self.open_settings)
+
+        quit_action = QAction("Zamknij", self)
+        quit_action.triggered.connect(self.close)
+        self.addAction(quit_action)
+
+    def _tool_button(self, icon_id: QStyle.StandardPixmap, tooltip: str) -> QToolButton:
+        button = QToolButton()
+        button.setIcon(self.style().standardIcon(icon_id))
+        button.setToolTip(tooltip)
+        return button
+
+    def resizeEvent(self, event: Any) -> None:
+        super().resizeEvent(event)
+        self.video_label.setGeometry(self.stage.rect())
+
+        panel_width = min(420, max(280, self.stage.width() // 3))
+        self.result_panel.setGeometry(self.stage.width() - panel_width - 18, 18, panel_width, 190)
+
+        self.toolbar.adjustSize()
+        toolbar_size = self.toolbar.sizeHint()
+        self.toolbar.setGeometry(
+            (self.stage.width() - toolbar_size.width()) // 2,
+            self.stage.height() - toolbar_size.height() - 18,
+            toolbar_size.width(),
+            toolbar_size.height(),
+        )
+
+    def closeEvent(self, event: Any) -> None:
+        if self.video_recorder.is_recording:
+            self.video_recorder.stop(self.current_metadata("video"))
+        self.worker.stop()
+        self.worker.wait(2000)
+        super().closeEvent(event)
+
+    @Slot(object)
+    def on_frame_ready(self, frame: np.ndarray) -> None:
+        self.last_frame = frame.copy()
+        if self.video_recorder.is_recording:
+            self.video_recorder.write(frame)
+        self._show_frame(frame)
+
+    @Slot(object)
+    def on_detection_ready(self, result: DetectionResult) -> None:
+        self.last_detection = result
+        self.overlay_result = result if result.xyxy else None
+        self._update_result_text(result)
+
+    @Slot(str)
+    def on_camera_error(self, message: str) -> None:
+        self.result_text.setPlainText(message)
+
+    def start_detection(self) -> None:
+        self.overlay_result = None
+        self.result_text.setPlainText("Wykrywanie...")
+        self.worker.start_detection()
+
+    def accept_detection(self) -> None:
+        self.worker.accept_detection()
+        self.overlay_result = None
+        if self.last_detection:
+            self._update_result_text(self.last_detection, accepted=True)
+
+    def take_photo(self) -> None:
+        if self.last_frame is None:
+            QMessageBox.warning(self, "Zdjecie", "Brak klatki z kamery")
+            return
+        path = self.media_store.save_photo(self.last_frame, self.current_metadata("photo"))
+        self.statusBar().showMessage(f"Zapisano zdjecie: {path}", 5000)
+
+    def toggle_recording(self) -> None:
+        if self.last_frame is None:
+            QMessageBox.warning(self, "Wideo", "Brak klatki z kamery")
+            return
+
+        if self.video_recorder.is_recording:
+            path = self.video_recorder.stop(self.current_metadata("video"))
+            self.record_button.setIcon(self.style().standardIcon(QStyle.SP_MediaPlay))
+            self.statusBar().showMessage(f"Zapisano film: {path}", 5000)
+            return
+
+        try:
+            path = self.video_recorder.start(self.last_frame)
+        except RuntimeError as exc:
+            QMessageBox.warning(self, "Wideo", str(exc))
+            return
+        self.record_button.setIcon(self.style().standardIcon(QStyle.SP_MediaStop))
+        self.statusBar().showMessage(f"Nagrywanie: {path}", 5000)
+
+    def open_settings(self) -> None:
+        dialog = SettingsDialog(self.config, self)
+        dialog.settings_saved.connect(self.save_camera_settings)
+        dialog.exec()
+
+    @Slot(dict)
+    def save_camera_settings(self, camera_config: dict[str, Any]) -> None:
+        self.config["camera"] = camera_config
+        self.app_config.save(self.config)
+        self.worker.update_camera_config(camera_config)
+
+    def current_metadata(self, media_type: str) -> dict[str, Any]:
+        return {
+            "media_type": media_type,
+            "created_at": datetime.now().isoformat(timespec="seconds"),
+            "detection": self.last_detection.to_metadata() if self.last_detection else None,
+            "camera": {
+                "width": self.config["camera"].get("width"),
+                "height": self.config["camera"].get("height"),
+                "fps": self.config["camera"].get("fps"),
+                "properties": self.config["camera"].get("properties", {}),
+            },
+            "detection_config": self.config.get("detection", {}),
+        }
+
+    def _update_result_text(self, result: DetectionResult, accepted: bool = False) -> None:
+        status = "Zatwierdzono" if accepted else "Wynik"
+        lines = [status]
+        if result.error:
+            lines.append(f"Komunikat: {result.error}")
+        if result.confidence is not None:
+            lines.append(f"YOLO confidence: {result.confidence:.3f}")
+        if result.parsed:
+            lines.append(f"Zamowienie: {result.parsed.order_number or '-'}")
+            lines.append(f"Kolor: {result.parsed.color_code or '-'}")
+            lines.append(f"Model: {result.parsed.product_model or '-'}")
+        if result.raw_text:
+            lines.append("")
+            lines.append(result.raw_text)
+        self.result_text.setPlainText("\n".join(lines))
+
+    def _show_frame(self, frame_bgr: np.ndarray) -> None:
+        display_frame = frame_bgr.copy()
+        if self.overlay_result is not None:
+            self._draw_detection(display_frame, self.overlay_result)
+
+        frame_rgb = cv2.cvtColor(display_frame, cv2.COLOR_BGR2RGB)
+        h, w, channels = frame_rgb.shape
+        image = QImage(frame_rgb.data, w, h, channels * w, QImage.Format_RGB888).copy()
+        pixmap = QPixmap.fromImage(image)
+        self.video_label.setPixmap(
+            pixmap.scaled(self.video_label.size(), Qt.KeepAspectRatio, Qt.SmoothTransformation)
+        )
+
+    def _draw_detection(self, frame_bgr: np.ndarray, result: DetectionResult) -> None:
+        mode = self.config["detection"].get("mode", "best")
+        boxes = result.all_boxes if mode == "all" else [result.to_metadata()]
+        for item in boxes:
+            xyxy = item.get("xyxy") or item.get("bbox_xyxy")
+            if not xyxy:
+                continue
+            x1, y1, x2, y2 = [int(value) for value in xyxy]
+            confidence = item.get("confidence")
+            class_name = item.get("class_name") or "label"
+            cv2.rectangle(frame_bgr, (x1, y1), (x2, y2), (0, 220, 0), 3)
+            caption = f"{class_name} {confidence:.2f}" if confidence is not None else class_name
+            cv2.putText(
+                frame_bgr,
+                caption,
+                (x1, max(24, y1 - 8)),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.8,
+                (0, 220, 0),
+                2,
+                cv2.LINE_AA,
+            )
+
+
+def run_app(app_config: AppConfig) -> int:
+    app = QApplication([])
+    window = MainWindow(app_config)
+    window.show()
+    return app.exec()
--- a/app/media.py
+++ b/app/media.py
@@ -0,0 +1,101 @@
+from __future__ import annotations
+
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+import cv2
+import numpy as np
+
+
+def timestamp_name() -> str:
+    return datetime.now().strftime("%Y%m%d_%H%M%S")
+
+
+def write_metadata(media_path: Path, metadata: dict[str, Any]) -> Path:
+    json_path = media_path.with_suffix(".json")
+    with json_path.open("w", encoding="utf-8") as metadata_file:
+        json.dump(metadata, metadata_file, indent=2, ensure_ascii=False)
+        metadata_file.write("\n")
+    return json_path
+
+
+class MediaStore:
+    def __init__(self, config: dict[str, Any], app_config: Any) -> None:
+        self.config = config
+        self.app_config = app_config
+
+    def photo_path(self) -> Path:
+        capture_cfg = self.config["capture"]
+        directory = self.app_config.resolve_path(capture_cfg["photos_dir"])
+        directory.mkdir(parents=True, exist_ok=True)
+        extension = capture_cfg.get("image_extension", "jpg").lstrip(".")
+        return directory / f"{timestamp_name()}.{extension}"
+
+    def video_path(self) -> Path:
+        capture_cfg = self.config["capture"]
+        directory = self.app_config.resolve_path(capture_cfg["videos_dir"])
+        directory.mkdir(parents=True, exist_ok=True)
+        extension = capture_cfg.get("video_extension", "mp4").lstrip(".")
+        return directory / f"{timestamp_name()}.{extension}"
+
+    def save_photo(self, frame_bgr: np.ndarray, metadata: dict[str, Any]) -> Path:
+        path = self.photo_path()
+        cv2.imwrite(str(path), frame_bgr)
+        write_metadata(path, metadata)
+        return path
+
+
+class VideoRecorder:
+    def __init__(self, config: dict[str, Any], app_config: Any) -> None:
+        self.config = config
+        self.app_config = app_config
+        self.path: Path | None = None
+        self.writer: cv2.VideoWriter | None = None
+        self.started_at: str | None = None
+
+    @property
+    def is_recording(self) -> bool:
+        return self.writer is not None
+
+    def start(self, frame_bgr: np.ndarray) -> Path:
+        if self.writer is not None:
+            raise RuntimeError("Nagrywanie juz trwa")
+
+        capture_cfg = self.config["capture"]
+        self.path = MediaStore(self.config, self.app_config).video_path()
+        h, w = frame_bgr.shape[:2]
+        fps = float(self.config["camera"].get("fps", 30))
+        codec = str(capture_cfg.get("video_codec", "mp4v"))
+        fourcc = cv2.VideoWriter_fourcc(*codec[:4])
+        self.writer = cv2.VideoWriter(str(self.path), fourcc, fps, (w, h))
+        if not self.writer.isOpened():
+            self.writer = None
+            raise RuntimeError("Nie mozna uruchomic zapisu wideo")
+        self.started_at = datetime.now().isoformat(timespec="seconds")
+        self.write(frame_bgr)
+        return self.path
+
+    def write(self, frame_bgr: np.ndarray) -> None:
+        if self.writer is not None:
+            self.writer.write(frame_bgr)
+
+    def stop(self, metadata: dict[str, Any]) -> Path | None:
+        if self.writer is None:
+            return None
+        self.writer.release()
+        self.writer = None
+        path = self.path
+        if path is not None:
+            metadata = {
+                **metadata,
+                "recording": {
+                    "started_at": self.started_at,
+                    "stopped_at": datetime.now().isoformat(timespec="seconds"),
+                },
+            }
+            write_metadata(path, metadata)
+        self.path = None
+        self.started_at = None
+        return path
--- a/app/settings_dialog.py
+++ b/app/settings_dialog.py
@@ -0,0 +1,125 @@
+from __future__ import annotations
+
+from copy import deepcopy
+from typing import Any
+
+from PySide6.QtCore import Qt, Signal
+from PySide6.QtWidgets import (
+    QCheckBox,
+    QDialog,
+    QFormLayout,
+    QHBoxLayout,
+    QLabel,
+    QPushButton,
+    QSlider,
+    QSpinBox,
+    QVBoxLayout,
+    QWidget,
+)
+
+
+CAMERA_PROPERTY_LABELS = {
+    "brightness": "Jasnosc",
+    "contrast": "Kontrast",
+    "saturation": "Nasycenie",
+    "hue": "Barwa",
+    "gain": "Gain",
+    "exposure": "Ekspozycja",
+    "sharpness": "Ostrosc",
+    "auto_exposure": "Auto ekspozycja",
+    "focus": "Focus",
+    "auto_focus": "Auto focus",
+}
+
+
+class PropertySlider(QWidget):
+    value_changed = Signal(str, object)
+
+    def __init__(self, name: str, value: float | None) -> None:
+        super().__init__()
+        self.name = name
+        self.enabled_box = QCheckBox()
+        self.enabled_box.setChecked(value is not None)
+        self.slider = QSlider(Qt.Horizontal)
+        self.slider.setRange(-100, 100)
+        self.slider.setValue(int(value) if value is not None else 0)
+        self.value_box = QSpinBox()
+        self.value_box.setRange(-100, 100)
+        self.value_box.setValue(self.slider.value())
+
+        layout = QHBoxLayout(self)
+        layout.setContentsMargins(0, 0, 0, 0)
+        layout.addWidget(self.enabled_box)
+        layout.addWidget(self.slider, 1)
+        layout.addWidget(self.value_box)
+
+        self.slider.valueChanged.connect(self.value_box.setValue)
+        self.value_box.valueChanged.connect(self.slider.setValue)
+        self.slider.valueChanged.connect(self._emit_value)
+        self.enabled_box.toggled.connect(self._emit_value)
+
+    def _emit_value(self) -> None:
+        self.value_changed.emit(self.name, self.value())
+
+    def value(self) -> float | None:
+        if not self.enabled_box.isChecked():
+            return None
+        return float(self.slider.value())
+
+
+class SettingsDialog(QDialog):
+    settings_saved = Signal(dict)
+
+    def __init__(self, config: dict[str, Any], parent: QWidget | None = None) -> None:
+        super().__init__(parent)
+        self.setWindowTitle("Ustawienia obrazu")
+        self.setMinimumWidth(520)
+        self.config = deepcopy(config)
+        self.property_widgets: dict[str, PropertySlider] = {}
+
+        camera_cfg = self.config["camera"]
+        main_layout = QVBoxLayout(self)
+        form = QFormLayout()
+
+        self.width_box = QSpinBox()
+        self.width_box.setRange(160, 7680)
+        self.width_box.setValue(int(camera_cfg.get("width", 1920)))
+        self.height_box = QSpinBox()
+        self.height_box.setRange(120, 4320)
+        self.height_box.setValue(int(camera_cfg.get("height", 1080)))
+        self.fps_box = QSpinBox()
+        self.fps_box.setRange(1, 240)
+        self.fps_box.setValue(int(camera_cfg.get("fps", 30)))
+
+        form.addRow("Szerokosc", self.width_box)
+        form.addRow("Wysokosc", self.height_box)
+        form.addRow("FPS", self.fps_box)
+
+        for name, label in CAMERA_PROPERTY_LABELS.items():
+            widget = PropertySlider(name, camera_cfg.get("properties", {}).get(name))
+            self.property_widgets[name] = widget
+            form.addRow(QLabel(label), widget)
+
+        main_layout.addLayout(form)
+
+        buttons = QHBoxLayout()
+        buttons.addStretch(1)
+        cancel_button = QPushButton("Anuluj")
+        save_button = QPushButton("Zapisz")
+        save_button.setDefault(True)
+        buttons.addWidget(cancel_button)
+        buttons.addWidget(save_button)
+        main_layout.addLayout(buttons)
+
+        cancel_button.clicked.connect(self.reject)
+        save_button.clicked.connect(self._save)
+
+    def _save(self) -> None:
+        self.config["camera"]["width"] = int(self.width_box.value())
+        self.config["camera"]["height"] = int(self.height_box.value())
+        self.config["camera"]["fps"] = int(self.fps_box.value())
+        self.config["camera"]["properties"] = {
+            name: widget.value() for name, widget in self.property_widgets.items()
+        }
+        self.settings_saved.emit(self.config["camera"])
+        self.accept()
--- a/app_config.json
+++ b/app_config.json
@@ -0,0 +1,53 @@
+{
+  "camera": {
+    "index": 0,
+    "width": 1920,
+    "height": 1080,
+    "fps": 30,
+    "backend": "auto",
+    "properties": {
+      "brightness": null,
+      "contrast": null,
+      "saturation": null,
+      "hue": null,
+      "gain": null,
+      "exposure": null,
+      "sharpness": null,
+      "auto_exposure": null,
+      "focus": null,
+      "auto_focus": null
+    }
+  },
+  "detection": {
+    "model_path": "models/best.pt",
+    "confidence_threshold": 0.25,
+    "mode": "best",
+    "frame_stride": 5,
+    "image_size": 640,
+    "device": "cpu"
+  },
+  "ocr": {
+    "enabled": true,
+    "language": "eng",
+    "tesseract_cmd": null,
+    "threshold": true,
+    "scale": 2.0
+  },
+  "capture": {
+    "photos_dir": "captures/photos",
+    "videos_dir": "captures/videos",
+    "image_extension": "jpg",
+    "video_extension": "mp4",
+    "video_codec": "mp4v"
+  },
+  "label_data": {
+    "models": [
+      "Regius",
+      "Duvell"
+    ],
+    "colors": [
+      "T-NF-BLK-OUT-BST-G",
+      "T-BLK-G"
+    ]
+  }
+}
--- a/captures/photos/.gitkeep
+++ b/captures/photos/.gitkeep
@@ -0,0 +1 @@
+
--- a/captures/videos/.gitkeep
+++ b/captures/videos/.gitkeep
@@ -0,0 +1 @@
+
--- a/main.py
+++ b/main.py
@@ -0,0 +1,5 @@
+from app.main import main
+
+
+if __name__ == "__main__":
+    main()
--- a/models/.gitkeep
+++ b/models/.gitkeep
@@ -0,0 +1 @@
+
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,15 @@
+[project]
+name = "duck-stain-yolo"
+version = "0.1.0"
+description = "PySide6 camera app for YOLO label detection and OCR metadata capture."
+requires-python = ">=3.10"
+dependencies = [
+  "PySide6>=6.6",
+  "opencv-python>=4.8",
+  "ultralytics>=8.0",
+  "pytesseract>=0.3",
+  "numpy>=1.26",
+]
+
+[tool.ruff]
+line-length = 100
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,9 @@
+--extra-index-url https://download.pytorch.org/whl/cpu
+
+PySide6>=6.6
+opencv-python>=4.8
+torch==2.5.1+cpu; platform_system == "Linux"
+torchvision==0.20.1+cpu; platform_system == "Linux"
+ultralytics>=8.0
+pytesseract>=0.3
+numpy>=1.26