From 090865af76f26473ac9c565cc5ab471900c18e44 Mon Sep 17 00:00:00 2001 From: bartool Date: Thu, 7 May 2026 00:18:38 +0200 Subject: [PATCH] Initial MVP application skeleton Add PySide6 camera UI, YOLO/Tesseract detection pipeline, capture metadata, configuration, and project gitignore. --- .gitignore | 31 +++++ README.md | 42 ++++++ app/__init__.py | 1 + app/camera.py | 129 +++++++++++++++++ app/config.py | 96 +++++++++++++ app/detection.py | 183 ++++++++++++++++++++++++ app/label_parser.py | 44 ++++++ app/main.py | 11 ++ app/main_window.py | 292 +++++++++++++++++++++++++++++++++++++++ app/media.py | 101 ++++++++++++++ app/settings_dialog.py | 125 +++++++++++++++++ app_config.json | 53 +++++++ captures/photos/.gitkeep | 1 + captures/videos/.gitkeep | 1 + main.py | 5 + models/.gitkeep | 1 + pyproject.toml | 15 ++ requirements.txt | 9 ++ 18 files changed, 1140 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 app/__init__.py create mode 100644 app/camera.py create mode 100644 app/config.py create mode 100644 app/detection.py create mode 100644 app/label_parser.py create mode 100644 app/main.py create mode 100644 app/main_window.py create mode 100644 app/media.py create mode 100644 app/settings_dialog.py create mode 100644 app_config.json create mode 100644 captures/photos/.gitkeep create mode 100644 captures/videos/.gitkeep create mode 100644 main.py create mode 100644 models/.gitkeep create mode 100644 pyproject.toml create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5e9f059 --- /dev/null +++ b/.gitignore @@ -0,0 +1,31 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +.pytest_cache/ +.ruff_cache/ +.mypy_cache/ + +# Virtual environments +.venv/ +.venv-*/ +venv/ +env/ + +# Local/runtime data +captures/photos/* +captures/videos/* +!captures/photos/.gitkeep +!captures/videos/.gitkeep +models/* +!models/.gitkeep + +# OS/editor +.DS_Store +.idea/ +.vscode/ + +# Ultralytics/runtime caches +runs/ +*.onnx +*.engine diff --git a/README.md b/README.md new file mode 100644 index 0000000..e70e9d8 --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ +# Duck Stain YOLO + +MVP aplikacji okienkowej do podgladu kamery USB, wykrywania etykiety modelem YOLOv8 i zapisu zdjec/filmow z metadanymi JSON. + +## Uruchomienie + +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +python main.py +``` + +Na macOS z Tesseractem: + +```bash +brew install tesseract +``` + +Na Ubuntu/WSL: + +```bash +sudo apt install tesseract-ocr +``` + +Na Linuksie `requirements.txt` wymusza CPU build PyTorch, zeby nie pobierac wariantu CUDA. Na macOS pip zainstaluje standardowy wariant CPU dla procesora Intel. + +Umiesc wytrenowany model jako `models/best.pt` albo zmien `detection.model_path` w `app_config.json`. + +## Konfiguracja + +Glowny plik konfiguracji: `app_config.json`. + +Istotne ustawienia: + +- `camera.width`, `camera.height`, `camera.fps` - rozdzielczosc i FPS kamery. +- `camera.properties` - parametry OpenCV ustawiane na kamerze, np. jasnosc, kontrast, ekspozycja. `null` oznacza brak wymuszania wartosci. +- `detection.mode` - `best` rysuje najlepsza etykiete, `all` rysuje wszystkie wykrycia. +- `detection.frame_stride` - YOLO uruchamiany co N klatek podczas aktywnego wykrywania. +- `label_data.models`, `label_data.colors` - slowniki do walidacji tekstu z etykiety. + +Zdjecia trafiaja do `captures/photos`, filmy do `captures/videos`. Obok kazdego pliku media zapisywany jest JSON z aktualnym wynikiem detekcji/OCR. diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..a9a2c5b --- /dev/null +++ b/app/__init__.py @@ -0,0 +1 @@ +__all__ = [] diff --git a/app/camera.py b/app/camera.py new file mode 100644 index 0000000..e7e0700 --- /dev/null +++ b/app/camera.py @@ -0,0 +1,129 @@ +from __future__ import annotations + +import threading +import time +from typing import Any + +import cv2 +import numpy as np +from PySide6.QtCore import QThread, Signal, Slot + +from app.detection import DetectionPipeline, DetectionResult + + +CV_CAP_PROPS = { + "brightness": cv2.CAP_PROP_BRIGHTNESS, + "contrast": cv2.CAP_PROP_CONTRAST, + "saturation": cv2.CAP_PROP_SATURATION, + "hue": cv2.CAP_PROP_HUE, + "gain": cv2.CAP_PROP_GAIN, + "exposure": cv2.CAP_PROP_EXPOSURE, + "sharpness": cv2.CAP_PROP_SHARPNESS, + "auto_exposure": cv2.CAP_PROP_AUTO_EXPOSURE, + "focus": cv2.CAP_PROP_FOCUS, + "auto_focus": cv2.CAP_PROP_AUTOFOCUS, +} + + +def backend_for_name(name: str) -> int: + if name == "avfoundation": + return cv2.CAP_AVFOUNDATION + if name == "v4l2": + return cv2.CAP_V4L2 + if name == "dshow": + return cv2.CAP_DSHOW + return cv2.CAP_ANY + + +class CameraWorker(QThread): + frame_ready = Signal(object) + detection_ready = Signal(object) + camera_error = Signal(str) + + def __init__(self, config: dict[str, Any], app_config: Any) -> None: + super().__init__() + self.config = config + self.app_config = app_config + self.pipeline = DetectionPipeline(config, app_config) + self._running = threading.Event() + self._running.set() + self._detecting = False + self._accepted = False + self._frame_count = 0 + self._capture: cv2.VideoCapture | None = None + self._lock = threading.Lock() + + def stop(self) -> None: + self._running.clear() + + @Slot() + def start_detection(self) -> None: + with self._lock: + self._detecting = True + self._accepted = False + self._frame_count = 0 + + @Slot() + def accept_detection(self) -> None: + with self._lock: + self._detecting = False + self._accepted = True + + @Slot(dict) + def update_camera_config(self, camera_config: dict[str, Any]) -> None: + with self._lock: + self.config["camera"] = camera_config + capture = self._capture + if capture is not None: + self._apply_camera_settings(capture) + + def run(self) -> None: + camera_cfg = self.config["camera"] + capture = cv2.VideoCapture( + int(camera_cfg.get("index", 0)), + backend_for_name(str(camera_cfg.get("backend", "auto"))), + ) + self._capture = capture + if not capture.isOpened(): + self.camera_error.emit("Nie mozna otworzyc kamery USB") + return + + self._apply_camera_settings(capture) + + try: + while self._running.is_set(): + ok, frame = capture.read() + if not ok or frame is None: + self.camera_error.emit("Nie mozna odczytac klatki z kamery") + time.sleep(0.2) + continue + + self.frame_ready.emit(frame) + self._maybe_detect(frame) + finally: + capture.release() + self._capture = None + + def _apply_camera_settings(self, capture: cv2.VideoCapture) -> None: + camera_cfg = self.config["camera"] + capture.set(cv2.CAP_PROP_FRAME_WIDTH, int(camera_cfg.get("width", 1920))) + capture.set(cv2.CAP_PROP_FRAME_HEIGHT, int(camera_cfg.get("height", 1080))) + capture.set(cv2.CAP_PROP_FPS, int(camera_cfg.get("fps", 30))) + + for name, value in camera_cfg.get("properties", {}).items(): + if value is None or name not in CV_CAP_PROPS: + continue + capture.set(CV_CAP_PROPS[name], float(value)) + + def _maybe_detect(self, frame: np.ndarray) -> None: + with self._lock: + detecting = self._detecting and not self._accepted + frame_stride = max(1, int(self.config["detection"].get("frame_stride", 5))) + self._frame_count += 1 + should_detect = detecting and self._frame_count % frame_stride == 0 + + if not should_detect: + return + + result: DetectionResult = self.pipeline.process(frame) + self.detection_ready.emit(result) diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..25bee96 --- /dev/null +++ b/app/config.py @@ -0,0 +1,96 @@ +from __future__ import annotations + +import json +from copy import deepcopy +from pathlib import Path +from typing import Any + + +APP_ROOT = Path(__file__).resolve().parent.parent +CONFIG_PATH = APP_ROOT / "app_config.json" + + +DEFAULT_CONFIG: dict[str, Any] = { + "camera": { + "index": 0, + "width": 1920, + "height": 1080, + "fps": 30, + "backend": "auto", + "properties": { + "brightness": None, + "contrast": None, + "saturation": None, + "hue": None, + "gain": None, + "exposure": None, + "sharpness": None, + "auto_exposure": None, + "focus": None, + "auto_focus": None, + }, + }, + "detection": { + "model_path": "models/best.pt", + "confidence_threshold": 0.25, + "mode": "best", + "frame_stride": 5, + "image_size": 640, + "device": "cpu", + }, + "ocr": { + "enabled": True, + "language": "eng", + "tesseract_cmd": None, + "threshold": True, + "scale": 2.0, + }, + "capture": { + "photos_dir": "captures/photos", + "videos_dir": "captures/videos", + "image_extension": "jpg", + "video_extension": "mp4", + "video_codec": "mp4v", + }, + "label_data": {"models": ["Regius", "Duvell"], "colors": ["T-NF-BLK-OUT-BST-G", "T-BLK-G"]}, +} + + +def deep_merge(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]: + result = deepcopy(base) + for key, value in override.items(): + if isinstance(value, dict) and isinstance(result.get(key), dict): + result[key] = deep_merge(result[key], value) + else: + result[key] = value + return result + + +class AppConfig: + def __init__(self, path: Path = CONFIG_PATH) -> None: + self.path = path + self.data = self.load() + + def load(self) -> dict[str, Any]: + if not self.path.exists(): + self.path.parent.mkdir(parents=True, exist_ok=True) + self.save(DEFAULT_CONFIG) + return deepcopy(DEFAULT_CONFIG) + + with self.path.open("r", encoding="utf-8") as config_file: + loaded = json.load(config_file) + return deep_merge(DEFAULT_CONFIG, loaded) + + def save(self, data: dict[str, Any] | None = None) -> None: + if data is not None: + self.data = data + self.path.parent.mkdir(parents=True, exist_ok=True) + with self.path.open("w", encoding="utf-8") as config_file: + json.dump(self.data, config_file, indent=2, ensure_ascii=False) + config_file.write("\n") + + def resolve_path(self, configured_path: str) -> Path: + path = Path(configured_path) + if path.is_absolute(): + return path + return APP_ROOT / path diff --git a/app/detection.py b/app/detection.py new file mode 100644 index 0000000..8e945d0 --- /dev/null +++ b/app/detection.py @@ -0,0 +1,183 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +import cv2 +import numpy as np + +from app.label_parser import ParsedLabel, parse_label_text + + +@dataclass +class DetectionResult: + xyxy: tuple[int, int, int, int] | None = None + confidence: float | None = None + class_name: str | None = None + raw_text: str = "" + parsed: ParsedLabel | None = None + error: str | None = None + all_boxes: list[dict[str, Any]] = field(default_factory=list) + + def to_metadata(self) -> dict[str, Any]: + return { + "bbox_xyxy": list(self.xyxy) if self.xyxy else None, + "confidence": self.confidence, + "class_name": self.class_name, + "raw_text": self.raw_text, + "parsed": self.parsed.to_dict() if self.parsed else None, + "error": self.error, + "all_boxes": self.all_boxes, + } + + +class YoloLabelDetector: + def __init__(self, config: dict[str, Any], app_config: Any) -> None: + self.config = config + self.app_config = app_config + self.model = None + self.load_error: str | None = None + self._load_model() + + def _load_model(self) -> None: + model_path = self.app_config.resolve_path(self.config["detection"]["model_path"]) + if not model_path.exists(): + self.load_error = f"Brak modelu: {model_path}" + return + + try: + from ultralytics import YOLO + + self.model = YOLO(str(model_path)) + except Exception as exc: # pragma: no cover - depends on optional runtime deps + self.load_error = f"Nie mozna zaladowac YOLO: {exc}" + + def detect(self, frame_bgr: np.ndarray) -> DetectionResult: + if self.model is None: + return DetectionResult(error=self.load_error or "Model YOLO nie jest zaladowany") + + detection_cfg = self.config["detection"] + try: + results = self.model.predict( + source=frame_bgr, + conf=float(detection_cfg["confidence_threshold"]), + imgsz=int(detection_cfg["image_size"]), + device=detection_cfg.get("device", "cpu"), + verbose=False, + ) + except Exception as exc: # pragma: no cover - depends on model runtime + return DetectionResult(error=f"Blad YOLO: {exc}") + + boxes = [] + names = getattr(self.model, "names", {}) + for result in results: + for box in result.boxes: + x1, y1, x2, y2 = [int(v) for v in box.xyxy[0].tolist()] + confidence = float(box.conf[0]) + class_id = int(box.cls[0]) if box.cls is not None else -1 + class_name = names.get(class_id, str(class_id)) if isinstance(names, dict) else str(class_id) + boxes.append( + { + "xyxy": (x1, y1, x2, y2), + "confidence": confidence, + "class_name": class_name, + } + ) + + if not boxes: + return DetectionResult(error="Nie wykryto etykiety") + + boxes.sort(key=lambda item: item["confidence"], reverse=True) + selected = boxes[0] + result = DetectionResult( + xyxy=selected["xyxy"], + confidence=selected["confidence"], + class_name=selected["class_name"], + all_boxes=[ + { + "xyxy": list(item["xyxy"]), + "confidence": item["confidence"], + "class_name": item["class_name"], + } + for item in boxes + ], + ) + return result + + +class TesseractOcr: + def __init__(self, config: dict[str, Any]) -> None: + self.config = config + self.load_error: str | None = None + self.pytesseract = None + self._load() + + def _load(self) -> None: + if not self.config["ocr"].get("enabled", True): + return + try: + import pytesseract + + command = self.config["ocr"].get("tesseract_cmd") + if command: + pytesseract.pytesseract.tesseract_cmd = command + self.pytesseract = pytesseract + except Exception as exc: + self.load_error = f"Nie mozna zaladowac pytesseract: {exc}" + + def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> tuple[str, str | None]: + if not self.config["ocr"].get("enabled", True): + return "", None + if self.pytesseract is None: + return "", self.load_error or "OCR nie jest zaladowany" + + x1, y1, x2, y2 = bbox + h, w = frame_bgr.shape[:2] + x1, y1 = max(0, x1), max(0, y1) + x2, y2 = min(w, x2), min(h, y2) + if x2 <= x1 or y2 <= y1: + return "", "Nieprawidlowy bbox OCR" + + roi = frame_bgr[y1:y2, x1:x2] + scale = float(self.config["ocr"].get("scale", 1.0)) + if scale != 1.0: + roi = cv2.resize(roi, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) + + gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) + if self.config["ocr"].get("threshold", True): + gray = cv2.GaussianBlur(gray, (3, 3), 0) + gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] + + try: + text = self.pytesseract.image_to_string( + gray, + lang=self.config["ocr"].get("language", "eng"), + config="--psm 6", + ) + except Exception as exc: + return "", f"Blad OCR: {exc}" + return text, None + + +class DetectionPipeline: + def __init__(self, config: dict[str, Any], app_config: Any) -> None: + self.config = config + self.detector = YoloLabelDetector(config, app_config) + self.ocr = TesseractOcr(config) + + def process(self, frame_bgr: np.ndarray) -> DetectionResult: + result = self.detector.detect(frame_bgr) + if result.xyxy is None: + return result + + text, ocr_error = self.ocr.read_label(frame_bgr, result.xyxy) + result.raw_text = text + result.parsed = parse_label_text( + text, + self.config["label_data"].get("colors", []), + self.config["label_data"].get("models", []), + ) + if ocr_error: + result.error = ocr_error + return result diff --git a/app/label_parser.py b/app/label_parser.py new file mode 100644 index 0000000..26318b0 --- /dev/null +++ b/app/label_parser.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +import re +from dataclasses import dataclass, asdict + + +ORDER_RE = re.compile(r"\b(?P\d{4}/\d{4}/(?:[1-9]|[1-9]\d))\b") + + +@dataclass +class ParsedLabel: + order_number: str | None + color_code: str | None + product_model: str | None + raw_text: str + + def to_dict(self) -> dict[str, str | None]: + return asdict(self) + + +def normalize_ocr_text(text: str) -> str: + return " ".join(text.replace("\n", " ").replace("\r", " ").split()) + + +def parse_label_text(text: str, known_colors: list[str], known_models: list[str]) -> ParsedLabel: + normalized = normalize_ocr_text(text) + order_match = ORDER_RE.search(normalized) + + normalized_upper = normalized.upper() + color_code = next( + (color for color in known_colors if color.upper() in normalized_upper), + None, + ) + product_model = next( + (model for model in known_models if re.search(rf"\b{re.escape(model)}\b", normalized, re.I)), + None, + ) + + return ParsedLabel( + order_number=order_match.group("order") if order_match else None, + color_code=color_code, + product_model=product_model, + raw_text=normalized, + ) diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..3527025 --- /dev/null +++ b/app/main.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +import sys + +from app.config import AppConfig +from app.main_window import run_app + + +def main() -> None: + app_config = AppConfig() + sys.exit(run_app(app_config)) diff --git a/app/main_window.py b/app/main_window.py new file mode 100644 index 0000000..219a9d4 --- /dev/null +++ b/app/main_window.py @@ -0,0 +1,292 @@ +from __future__ import annotations + +from datetime import datetime +from typing import Any + +import cv2 +import numpy as np +from PySide6.QtCore import Qt, Slot +from PySide6.QtGui import QAction, QImage, QPixmap +from PySide6.QtWidgets import ( + QApplication, + QHBoxLayout, + QLabel, + QMainWindow, + QMessageBox, + QPushButton, + QTextEdit, + QToolButton, + QVBoxLayout, + QWidget, + QStyle, +) + +from app.camera import CameraWorker +from app.config import AppConfig +from app.detection import DetectionResult +from app.media import MediaStore, VideoRecorder +from app.settings_dialog import SettingsDialog + + +class MainWindow(QMainWindow): + def __init__(self, app_config: AppConfig) -> None: + super().__init__() + self.app_config = app_config + self.config = app_config.data + self.last_frame: np.ndarray | None = None + self.overlay_result: DetectionResult | None = None + self.last_detection: DetectionResult | None = None + self.media_store = MediaStore(self.config, self.app_config) + self.video_recorder = VideoRecorder(self.config, self.app_config) + + self.setWindowTitle("Duck Stain YOLO") + self.resize(1280, 720) + self._build_ui() + + self.worker = CameraWorker(self.config, self.app_config) + self.worker.frame_ready.connect(self.on_frame_ready) + self.worker.detection_ready.connect(self.on_detection_ready) + self.worker.camera_error.connect(self.on_camera_error) + self.worker.start() + + def _build_ui(self) -> None: + self.stage = QWidget() + self.setCentralWidget(self.stage) + + self.video_label = QLabel(self.stage) + self.video_label.setAlignment(Qt.AlignCenter) + self.video_label.setStyleSheet("background: #111; color: #ddd;") + self.video_label.setText("Kamera") + + self.result_panel = QWidget(self.stage) + self.result_panel.setObjectName("resultPanel") + self.result_panel.setStyleSheet( + """ + QWidget#resultPanel { + background: rgba(20, 20, 20, 170); + border-radius: 8px; + } + QTextEdit { + background: transparent; + color: white; + border: 0; + font-size: 13px; + } + QPushButton { + min-height: 28px; + padding: 4px 12px; + } + """ + ) + panel_layout = QVBoxLayout(self.result_panel) + self.result_text = QTextEdit() + self.result_text.setReadOnly(True) + self.result_text.setFixedHeight(118) + panel_layout.addWidget(self.result_text) + panel_buttons = QHBoxLayout() + self.detect_button = QPushButton("wykryj") + self.ok_button = QPushButton("ok") + panel_buttons.addStretch(1) + panel_buttons.addWidget(self.detect_button) + panel_buttons.addWidget(self.ok_button) + panel_layout.addLayout(panel_buttons) + self.detect_button.clicked.connect(self.start_detection) + self.ok_button.clicked.connect(self.accept_detection) + + self.toolbar = QWidget(self.stage) + self.toolbar.setObjectName("bottomToolbar") + self.toolbar.setStyleSheet( + """ + QWidget#bottomToolbar { + background: rgba(20, 20, 20, 175); + border-radius: 8px; + } + QToolButton { + min-width: 44px; + min-height: 38px; + padding: 4px; + } + """ + ) + toolbar_layout = QHBoxLayout(self.toolbar) + toolbar_layout.setContentsMargins(8, 6, 8, 6) + self.photo_button = self._tool_button(QStyle.SP_DialogSaveButton, "Zrob zdjecie") + self.record_button = self._tool_button(QStyle.SP_MediaPlay, "Start/stop nagrywania") + self.settings_button = self._tool_button(QStyle.SP_FileDialogDetailedView, "Ustawienia obrazu") + toolbar_layout.addWidget(self.photo_button) + toolbar_layout.addWidget(self.record_button) + toolbar_layout.addWidget(self.settings_button) + self.photo_button.clicked.connect(self.take_photo) + self.record_button.clicked.connect(self.toggle_recording) + self.settings_button.clicked.connect(self.open_settings) + + quit_action = QAction("Zamknij", self) + quit_action.triggered.connect(self.close) + self.addAction(quit_action) + + def _tool_button(self, icon_id: QStyle.StandardPixmap, tooltip: str) -> QToolButton: + button = QToolButton() + button.setIcon(self.style().standardIcon(icon_id)) + button.setToolTip(tooltip) + return button + + def resizeEvent(self, event: Any) -> None: + super().resizeEvent(event) + self.video_label.setGeometry(self.stage.rect()) + + panel_width = min(420, max(280, self.stage.width() // 3)) + self.result_panel.setGeometry(self.stage.width() - panel_width - 18, 18, panel_width, 190) + + self.toolbar.adjustSize() + toolbar_size = self.toolbar.sizeHint() + self.toolbar.setGeometry( + (self.stage.width() - toolbar_size.width()) // 2, + self.stage.height() - toolbar_size.height() - 18, + toolbar_size.width(), + toolbar_size.height(), + ) + + def closeEvent(self, event: Any) -> None: + if self.video_recorder.is_recording: + self.video_recorder.stop(self.current_metadata("video")) + self.worker.stop() + self.worker.wait(2000) + super().closeEvent(event) + + @Slot(object) + def on_frame_ready(self, frame: np.ndarray) -> None: + self.last_frame = frame.copy() + if self.video_recorder.is_recording: + self.video_recorder.write(frame) + self._show_frame(frame) + + @Slot(object) + def on_detection_ready(self, result: DetectionResult) -> None: + self.last_detection = result + self.overlay_result = result if result.xyxy else None + self._update_result_text(result) + + @Slot(str) + def on_camera_error(self, message: str) -> None: + self.result_text.setPlainText(message) + + def start_detection(self) -> None: + self.overlay_result = None + self.result_text.setPlainText("Wykrywanie...") + self.worker.start_detection() + + def accept_detection(self) -> None: + self.worker.accept_detection() + self.overlay_result = None + if self.last_detection: + self._update_result_text(self.last_detection, accepted=True) + + def take_photo(self) -> None: + if self.last_frame is None: + QMessageBox.warning(self, "Zdjecie", "Brak klatki z kamery") + return + path = self.media_store.save_photo(self.last_frame, self.current_metadata("photo")) + self.statusBar().showMessage(f"Zapisano zdjecie: {path}", 5000) + + def toggle_recording(self) -> None: + if self.last_frame is None: + QMessageBox.warning(self, "Wideo", "Brak klatki z kamery") + return + + if self.video_recorder.is_recording: + path = self.video_recorder.stop(self.current_metadata("video")) + self.record_button.setIcon(self.style().standardIcon(QStyle.SP_MediaPlay)) + self.statusBar().showMessage(f"Zapisano film: {path}", 5000) + return + + try: + path = self.video_recorder.start(self.last_frame) + except RuntimeError as exc: + QMessageBox.warning(self, "Wideo", str(exc)) + return + self.record_button.setIcon(self.style().standardIcon(QStyle.SP_MediaStop)) + self.statusBar().showMessage(f"Nagrywanie: {path}", 5000) + + def open_settings(self) -> None: + dialog = SettingsDialog(self.config, self) + dialog.settings_saved.connect(self.save_camera_settings) + dialog.exec() + + @Slot(dict) + def save_camera_settings(self, camera_config: dict[str, Any]) -> None: + self.config["camera"] = camera_config + self.app_config.save(self.config) + self.worker.update_camera_config(camera_config) + + def current_metadata(self, media_type: str) -> dict[str, Any]: + return { + "media_type": media_type, + "created_at": datetime.now().isoformat(timespec="seconds"), + "detection": self.last_detection.to_metadata() if self.last_detection else None, + "camera": { + "width": self.config["camera"].get("width"), + "height": self.config["camera"].get("height"), + "fps": self.config["camera"].get("fps"), + "properties": self.config["camera"].get("properties", {}), + }, + "detection_config": self.config.get("detection", {}), + } + + def _update_result_text(self, result: DetectionResult, accepted: bool = False) -> None: + status = "Zatwierdzono" if accepted else "Wynik" + lines = [status] + if result.error: + lines.append(f"Komunikat: {result.error}") + if result.confidence is not None: + lines.append(f"YOLO confidence: {result.confidence:.3f}") + if result.parsed: + lines.append(f"Zamowienie: {result.parsed.order_number or '-'}") + lines.append(f"Kolor: {result.parsed.color_code or '-'}") + lines.append(f"Model: {result.parsed.product_model or '-'}") + if result.raw_text: + lines.append("") + lines.append(result.raw_text) + self.result_text.setPlainText("\n".join(lines)) + + def _show_frame(self, frame_bgr: np.ndarray) -> None: + display_frame = frame_bgr.copy() + if self.overlay_result is not None: + self._draw_detection(display_frame, self.overlay_result) + + frame_rgb = cv2.cvtColor(display_frame, cv2.COLOR_BGR2RGB) + h, w, channels = frame_rgb.shape + image = QImage(frame_rgb.data, w, h, channels * w, QImage.Format_RGB888).copy() + pixmap = QPixmap.fromImage(image) + self.video_label.setPixmap( + pixmap.scaled(self.video_label.size(), Qt.KeepAspectRatio, Qt.SmoothTransformation) + ) + + def _draw_detection(self, frame_bgr: np.ndarray, result: DetectionResult) -> None: + mode = self.config["detection"].get("mode", "best") + boxes = result.all_boxes if mode == "all" else [result.to_metadata()] + for item in boxes: + xyxy = item.get("xyxy") or item.get("bbox_xyxy") + if not xyxy: + continue + x1, y1, x2, y2 = [int(value) for value in xyxy] + confidence = item.get("confidence") + class_name = item.get("class_name") or "label" + cv2.rectangle(frame_bgr, (x1, y1), (x2, y2), (0, 220, 0), 3) + caption = f"{class_name} {confidence:.2f}" if confidence is not None else class_name + cv2.putText( + frame_bgr, + caption, + (x1, max(24, y1 - 8)), + cv2.FONT_HERSHEY_SIMPLEX, + 0.8, + (0, 220, 0), + 2, + cv2.LINE_AA, + ) + + +def run_app(app_config: AppConfig) -> int: + app = QApplication([]) + window = MainWindow(app_config) + window.show() + return app.exec() diff --git a/app/media.py b/app/media.py new file mode 100644 index 0000000..52657bb --- /dev/null +++ b/app/media.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +import json +from datetime import datetime +from pathlib import Path +from typing import Any + +import cv2 +import numpy as np + + +def timestamp_name() -> str: + return datetime.now().strftime("%Y%m%d_%H%M%S") + + +def write_metadata(media_path: Path, metadata: dict[str, Any]) -> Path: + json_path = media_path.with_suffix(".json") + with json_path.open("w", encoding="utf-8") as metadata_file: + json.dump(metadata, metadata_file, indent=2, ensure_ascii=False) + metadata_file.write("\n") + return json_path + + +class MediaStore: + def __init__(self, config: dict[str, Any], app_config: Any) -> None: + self.config = config + self.app_config = app_config + + def photo_path(self) -> Path: + capture_cfg = self.config["capture"] + directory = self.app_config.resolve_path(capture_cfg["photos_dir"]) + directory.mkdir(parents=True, exist_ok=True) + extension = capture_cfg.get("image_extension", "jpg").lstrip(".") + return directory / f"{timestamp_name()}.{extension}" + + def video_path(self) -> Path: + capture_cfg = self.config["capture"] + directory = self.app_config.resolve_path(capture_cfg["videos_dir"]) + directory.mkdir(parents=True, exist_ok=True) + extension = capture_cfg.get("video_extension", "mp4").lstrip(".") + return directory / f"{timestamp_name()}.{extension}" + + def save_photo(self, frame_bgr: np.ndarray, metadata: dict[str, Any]) -> Path: + path = self.photo_path() + cv2.imwrite(str(path), frame_bgr) + write_metadata(path, metadata) + return path + + +class VideoRecorder: + def __init__(self, config: dict[str, Any], app_config: Any) -> None: + self.config = config + self.app_config = app_config + self.path: Path | None = None + self.writer: cv2.VideoWriter | None = None + self.started_at: str | None = None + + @property + def is_recording(self) -> bool: + return self.writer is not None + + def start(self, frame_bgr: np.ndarray) -> Path: + if self.writer is not None: + raise RuntimeError("Nagrywanie juz trwa") + + capture_cfg = self.config["capture"] + self.path = MediaStore(self.config, self.app_config).video_path() + h, w = frame_bgr.shape[:2] + fps = float(self.config["camera"].get("fps", 30)) + codec = str(capture_cfg.get("video_codec", "mp4v")) + fourcc = cv2.VideoWriter_fourcc(*codec[:4]) + self.writer = cv2.VideoWriter(str(self.path), fourcc, fps, (w, h)) + if not self.writer.isOpened(): + self.writer = None + raise RuntimeError("Nie mozna uruchomic zapisu wideo") + self.started_at = datetime.now().isoformat(timespec="seconds") + self.write(frame_bgr) + return self.path + + def write(self, frame_bgr: np.ndarray) -> None: + if self.writer is not None: + self.writer.write(frame_bgr) + + def stop(self, metadata: dict[str, Any]) -> Path | None: + if self.writer is None: + return None + self.writer.release() + self.writer = None + path = self.path + if path is not None: + metadata = { + **metadata, + "recording": { + "started_at": self.started_at, + "stopped_at": datetime.now().isoformat(timespec="seconds"), + }, + } + write_metadata(path, metadata) + self.path = None + self.started_at = None + return path diff --git a/app/settings_dialog.py b/app/settings_dialog.py new file mode 100644 index 0000000..28b1a0f --- /dev/null +++ b/app/settings_dialog.py @@ -0,0 +1,125 @@ +from __future__ import annotations + +from copy import deepcopy +from typing import Any + +from PySide6.QtCore import Qt, Signal +from PySide6.QtWidgets import ( + QCheckBox, + QDialog, + QFormLayout, + QHBoxLayout, + QLabel, + QPushButton, + QSlider, + QSpinBox, + QVBoxLayout, + QWidget, +) + + +CAMERA_PROPERTY_LABELS = { + "brightness": "Jasnosc", + "contrast": "Kontrast", + "saturation": "Nasycenie", + "hue": "Barwa", + "gain": "Gain", + "exposure": "Ekspozycja", + "sharpness": "Ostrosc", + "auto_exposure": "Auto ekspozycja", + "focus": "Focus", + "auto_focus": "Auto focus", +} + + +class PropertySlider(QWidget): + value_changed = Signal(str, object) + + def __init__(self, name: str, value: float | None) -> None: + super().__init__() + self.name = name + self.enabled_box = QCheckBox() + self.enabled_box.setChecked(value is not None) + self.slider = QSlider(Qt.Horizontal) + self.slider.setRange(-100, 100) + self.slider.setValue(int(value) if value is not None else 0) + self.value_box = QSpinBox() + self.value_box.setRange(-100, 100) + self.value_box.setValue(self.slider.value()) + + layout = QHBoxLayout(self) + layout.setContentsMargins(0, 0, 0, 0) + layout.addWidget(self.enabled_box) + layout.addWidget(self.slider, 1) + layout.addWidget(self.value_box) + + self.slider.valueChanged.connect(self.value_box.setValue) + self.value_box.valueChanged.connect(self.slider.setValue) + self.slider.valueChanged.connect(self._emit_value) + self.enabled_box.toggled.connect(self._emit_value) + + def _emit_value(self) -> None: + self.value_changed.emit(self.name, self.value()) + + def value(self) -> float | None: + if not self.enabled_box.isChecked(): + return None + return float(self.slider.value()) + + +class SettingsDialog(QDialog): + settings_saved = Signal(dict) + + def __init__(self, config: dict[str, Any], parent: QWidget | None = None) -> None: + super().__init__(parent) + self.setWindowTitle("Ustawienia obrazu") + self.setMinimumWidth(520) + self.config = deepcopy(config) + self.property_widgets: dict[str, PropertySlider] = {} + + camera_cfg = self.config["camera"] + main_layout = QVBoxLayout(self) + form = QFormLayout() + + self.width_box = QSpinBox() + self.width_box.setRange(160, 7680) + self.width_box.setValue(int(camera_cfg.get("width", 1920))) + self.height_box = QSpinBox() + self.height_box.setRange(120, 4320) + self.height_box.setValue(int(camera_cfg.get("height", 1080))) + self.fps_box = QSpinBox() + self.fps_box.setRange(1, 240) + self.fps_box.setValue(int(camera_cfg.get("fps", 30))) + + form.addRow("Szerokosc", self.width_box) + form.addRow("Wysokosc", self.height_box) + form.addRow("FPS", self.fps_box) + + for name, label in CAMERA_PROPERTY_LABELS.items(): + widget = PropertySlider(name, camera_cfg.get("properties", {}).get(name)) + self.property_widgets[name] = widget + form.addRow(QLabel(label), widget) + + main_layout.addLayout(form) + + buttons = QHBoxLayout() + buttons.addStretch(1) + cancel_button = QPushButton("Anuluj") + save_button = QPushButton("Zapisz") + save_button.setDefault(True) + buttons.addWidget(cancel_button) + buttons.addWidget(save_button) + main_layout.addLayout(buttons) + + cancel_button.clicked.connect(self.reject) + save_button.clicked.connect(self._save) + + def _save(self) -> None: + self.config["camera"]["width"] = int(self.width_box.value()) + self.config["camera"]["height"] = int(self.height_box.value()) + self.config["camera"]["fps"] = int(self.fps_box.value()) + self.config["camera"]["properties"] = { + name: widget.value() for name, widget in self.property_widgets.items() + } + self.settings_saved.emit(self.config["camera"]) + self.accept() diff --git a/app_config.json b/app_config.json new file mode 100644 index 0000000..ac8d937 --- /dev/null +++ b/app_config.json @@ -0,0 +1,53 @@ +{ + "camera": { + "index": 0, + "width": 1920, + "height": 1080, + "fps": 30, + "backend": "auto", + "properties": { + "brightness": null, + "contrast": null, + "saturation": null, + "hue": null, + "gain": null, + "exposure": null, + "sharpness": null, + "auto_exposure": null, + "focus": null, + "auto_focus": null + } + }, + "detection": { + "model_path": "models/best.pt", + "confidence_threshold": 0.25, + "mode": "best", + "frame_stride": 5, + "image_size": 640, + "device": "cpu" + }, + "ocr": { + "enabled": true, + "language": "eng", + "tesseract_cmd": null, + "threshold": true, + "scale": 2.0 + }, + "capture": { + "photos_dir": "captures/photos", + "videos_dir": "captures/videos", + "image_extension": "jpg", + "video_extension": "mp4", + "video_codec": "mp4v" + }, + "label_data": { + "models": [ + "Regius", + "Duvell" + ], + "colors": [ + "T-NF-BLK-OUT-BST-G", + "T-BLK-G" + ] + } +} diff --git a/captures/photos/.gitkeep b/captures/photos/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/captures/photos/.gitkeep @@ -0,0 +1 @@ + diff --git a/captures/videos/.gitkeep b/captures/videos/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/captures/videos/.gitkeep @@ -0,0 +1 @@ + diff --git a/main.py b/main.py new file mode 100644 index 0000000..99c5e16 --- /dev/null +++ b/main.py @@ -0,0 +1,5 @@ +from app.main import main + + +if __name__ == "__main__": + main() diff --git a/models/.gitkeep b/models/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/models/.gitkeep @@ -0,0 +1 @@ + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..d120bc3 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,15 @@ +[project] +name = "duck-stain-yolo" +version = "0.1.0" +description = "PySide6 camera app for YOLO label detection and OCR metadata capture." +requires-python = ">=3.10" +dependencies = [ + "PySide6>=6.6", + "opencv-python>=4.8", + "ultralytics>=8.0", + "pytesseract>=0.3", + "numpy>=1.26", +] + +[tool.ruff] +line-length = 100 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1a7f997 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +--extra-index-url https://download.pytorch.org/whl/cpu + +PySide6>=6.6 +opencv-python>=4.8 +torch==2.5.1+cpu; platform_system == "Linux" +torchvision==0.20.1+cpu; platform_system == "Linux" +ultralytics>=8.0 +pytesseract>=0.3 +numpy>=1.26