Initial MVP application skeleton

Add PySide6 camera UI, YOLO/Tesseract detection pipeline, capture metadata, configuration, and project gitignore.
This commit is contained in:
2026-05-07 00:18:38 +02:00
commit 090865af76
18 changed files with 1140 additions and 0 deletions

31
.gitignore vendored Normal file
View File

@@ -0,0 +1,31 @@
# Python
__pycache__/
*.py[cod]
*$py.class
.pytest_cache/
.ruff_cache/
.mypy_cache/
# Virtual environments
.venv/
.venv-*/
venv/
env/
# Local/runtime data
captures/photos/*
captures/videos/*
!captures/photos/.gitkeep
!captures/videos/.gitkeep
models/*
!models/.gitkeep
# OS/editor
.DS_Store
.idea/
.vscode/
# Ultralytics/runtime caches
runs/
*.onnx
*.engine

42
README.md Normal file
View File

@@ -0,0 +1,42 @@
# Duck Stain YOLO
MVP aplikacji okienkowej do podgladu kamery USB, wykrywania etykiety modelem YOLOv8 i zapisu zdjec/filmow z metadanymi JSON.
## Uruchomienie
```bash
python3 -m venv .venv
source .venv/bin/activate
pip install -r requirements.txt
python main.py
```
Na macOS z Tesseractem:
```bash
brew install tesseract
```
Na Ubuntu/WSL:
```bash
sudo apt install tesseract-ocr
```
Na Linuksie `requirements.txt` wymusza CPU build PyTorch, zeby nie pobierac wariantu CUDA. Na macOS pip zainstaluje standardowy wariant CPU dla procesora Intel.
Umiesc wytrenowany model jako `models/best.pt` albo zmien `detection.model_path` w `app_config.json`.
## Konfiguracja
Glowny plik konfiguracji: `app_config.json`.
Istotne ustawienia:
- `camera.width`, `camera.height`, `camera.fps` - rozdzielczosc i FPS kamery.
- `camera.properties` - parametry OpenCV ustawiane na kamerze, np. jasnosc, kontrast, ekspozycja. `null` oznacza brak wymuszania wartosci.
- `detection.mode` - `best` rysuje najlepsza etykiete, `all` rysuje wszystkie wykrycia.
- `detection.frame_stride` - YOLO uruchamiany co N klatek podczas aktywnego wykrywania.
- `label_data.models`, `label_data.colors` - slowniki do walidacji tekstu z etykiety.
Zdjecia trafiaja do `captures/photos`, filmy do `captures/videos`. Obok kazdego pliku media zapisywany jest JSON z aktualnym wynikiem detekcji/OCR.

1
app/__init__.py Normal file
View File

@@ -0,0 +1 @@
__all__ = []

129
app/camera.py Normal file
View File

@@ -0,0 +1,129 @@
from __future__ import annotations
import threading
import time
from typing import Any
import cv2
import numpy as np
from PySide6.QtCore import QThread, Signal, Slot
from app.detection import DetectionPipeline, DetectionResult
CV_CAP_PROPS = {
"brightness": cv2.CAP_PROP_BRIGHTNESS,
"contrast": cv2.CAP_PROP_CONTRAST,
"saturation": cv2.CAP_PROP_SATURATION,
"hue": cv2.CAP_PROP_HUE,
"gain": cv2.CAP_PROP_GAIN,
"exposure": cv2.CAP_PROP_EXPOSURE,
"sharpness": cv2.CAP_PROP_SHARPNESS,
"auto_exposure": cv2.CAP_PROP_AUTO_EXPOSURE,
"focus": cv2.CAP_PROP_FOCUS,
"auto_focus": cv2.CAP_PROP_AUTOFOCUS,
}
def backend_for_name(name: str) -> int:
if name == "avfoundation":
return cv2.CAP_AVFOUNDATION
if name == "v4l2":
return cv2.CAP_V4L2
if name == "dshow":
return cv2.CAP_DSHOW
return cv2.CAP_ANY
class CameraWorker(QThread):
frame_ready = Signal(object)
detection_ready = Signal(object)
camera_error = Signal(str)
def __init__(self, config: dict[str, Any], app_config: Any) -> None:
super().__init__()
self.config = config
self.app_config = app_config
self.pipeline = DetectionPipeline(config, app_config)
self._running = threading.Event()
self._running.set()
self._detecting = False
self._accepted = False
self._frame_count = 0
self._capture: cv2.VideoCapture | None = None
self._lock = threading.Lock()
def stop(self) -> None:
self._running.clear()
@Slot()
def start_detection(self) -> None:
with self._lock:
self._detecting = True
self._accepted = False
self._frame_count = 0
@Slot()
def accept_detection(self) -> None:
with self._lock:
self._detecting = False
self._accepted = True
@Slot(dict)
def update_camera_config(self, camera_config: dict[str, Any]) -> None:
with self._lock:
self.config["camera"] = camera_config
capture = self._capture
if capture is not None:
self._apply_camera_settings(capture)
def run(self) -> None:
camera_cfg = self.config["camera"]
capture = cv2.VideoCapture(
int(camera_cfg.get("index", 0)),
backend_for_name(str(camera_cfg.get("backend", "auto"))),
)
self._capture = capture
if not capture.isOpened():
self.camera_error.emit("Nie mozna otworzyc kamery USB")
return
self._apply_camera_settings(capture)
try:
while self._running.is_set():
ok, frame = capture.read()
if not ok or frame is None:
self.camera_error.emit("Nie mozna odczytac klatki z kamery")
time.sleep(0.2)
continue
self.frame_ready.emit(frame)
self._maybe_detect(frame)
finally:
capture.release()
self._capture = None
def _apply_camera_settings(self, capture: cv2.VideoCapture) -> None:
camera_cfg = self.config["camera"]
capture.set(cv2.CAP_PROP_FRAME_WIDTH, int(camera_cfg.get("width", 1920)))
capture.set(cv2.CAP_PROP_FRAME_HEIGHT, int(camera_cfg.get("height", 1080)))
capture.set(cv2.CAP_PROP_FPS, int(camera_cfg.get("fps", 30)))
for name, value in camera_cfg.get("properties", {}).items():
if value is None or name not in CV_CAP_PROPS:
continue
capture.set(CV_CAP_PROPS[name], float(value))
def _maybe_detect(self, frame: np.ndarray) -> None:
with self._lock:
detecting = self._detecting and not self._accepted
frame_stride = max(1, int(self.config["detection"].get("frame_stride", 5)))
self._frame_count += 1
should_detect = detecting and self._frame_count % frame_stride == 0
if not should_detect:
return
result: DetectionResult = self.pipeline.process(frame)
self.detection_ready.emit(result)

96
app/config.py Normal file
View File

@@ -0,0 +1,96 @@
from __future__ import annotations
import json
from copy import deepcopy
from pathlib import Path
from typing import Any
APP_ROOT = Path(__file__).resolve().parent.parent
CONFIG_PATH = APP_ROOT / "app_config.json"
DEFAULT_CONFIG: dict[str, Any] = {
"camera": {
"index": 0,
"width": 1920,
"height": 1080,
"fps": 30,
"backend": "auto",
"properties": {
"brightness": None,
"contrast": None,
"saturation": None,
"hue": None,
"gain": None,
"exposure": None,
"sharpness": None,
"auto_exposure": None,
"focus": None,
"auto_focus": None,
},
},
"detection": {
"model_path": "models/best.pt",
"confidence_threshold": 0.25,
"mode": "best",
"frame_stride": 5,
"image_size": 640,
"device": "cpu",
},
"ocr": {
"enabled": True,
"language": "eng",
"tesseract_cmd": None,
"threshold": True,
"scale": 2.0,
},
"capture": {
"photos_dir": "captures/photos",
"videos_dir": "captures/videos",
"image_extension": "jpg",
"video_extension": "mp4",
"video_codec": "mp4v",
},
"label_data": {"models": ["Regius", "Duvell"], "colors": ["T-NF-BLK-OUT-BST-G", "T-BLK-G"]},
}
def deep_merge(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
result = deepcopy(base)
for key, value in override.items():
if isinstance(value, dict) and isinstance(result.get(key), dict):
result[key] = deep_merge(result[key], value)
else:
result[key] = value
return result
class AppConfig:
def __init__(self, path: Path = CONFIG_PATH) -> None:
self.path = path
self.data = self.load()
def load(self) -> dict[str, Any]:
if not self.path.exists():
self.path.parent.mkdir(parents=True, exist_ok=True)
self.save(DEFAULT_CONFIG)
return deepcopy(DEFAULT_CONFIG)
with self.path.open("r", encoding="utf-8") as config_file:
loaded = json.load(config_file)
return deep_merge(DEFAULT_CONFIG, loaded)
def save(self, data: dict[str, Any] | None = None) -> None:
if data is not None:
self.data = data
self.path.parent.mkdir(parents=True, exist_ok=True)
with self.path.open("w", encoding="utf-8") as config_file:
json.dump(self.data, config_file, indent=2, ensure_ascii=False)
config_file.write("\n")
def resolve_path(self, configured_path: str) -> Path:
path = Path(configured_path)
if path.is_absolute():
return path
return APP_ROOT / path

183
app/detection.py Normal file
View File

@@ -0,0 +1,183 @@
from __future__ import annotations
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
import cv2
import numpy as np
from app.label_parser import ParsedLabel, parse_label_text
@dataclass
class DetectionResult:
xyxy: tuple[int, int, int, int] | None = None
confidence: float | None = None
class_name: str | None = None
raw_text: str = ""
parsed: ParsedLabel | None = None
error: str | None = None
all_boxes: list[dict[str, Any]] = field(default_factory=list)
def to_metadata(self) -> dict[str, Any]:
return {
"bbox_xyxy": list(self.xyxy) if self.xyxy else None,
"confidence": self.confidence,
"class_name": self.class_name,
"raw_text": self.raw_text,
"parsed": self.parsed.to_dict() if self.parsed else None,
"error": self.error,
"all_boxes": self.all_boxes,
}
class YoloLabelDetector:
def __init__(self, config: dict[str, Any], app_config: Any) -> None:
self.config = config
self.app_config = app_config
self.model = None
self.load_error: str | None = None
self._load_model()
def _load_model(self) -> None:
model_path = self.app_config.resolve_path(self.config["detection"]["model_path"])
if not model_path.exists():
self.load_error = f"Brak modelu: {model_path}"
return
try:
from ultralytics import YOLO
self.model = YOLO(str(model_path))
except Exception as exc: # pragma: no cover - depends on optional runtime deps
self.load_error = f"Nie mozna zaladowac YOLO: {exc}"
def detect(self, frame_bgr: np.ndarray) -> DetectionResult:
if self.model is None:
return DetectionResult(error=self.load_error or "Model YOLO nie jest zaladowany")
detection_cfg = self.config["detection"]
try:
results = self.model.predict(
source=frame_bgr,
conf=float(detection_cfg["confidence_threshold"]),
imgsz=int(detection_cfg["image_size"]),
device=detection_cfg.get("device", "cpu"),
verbose=False,
)
except Exception as exc: # pragma: no cover - depends on model runtime
return DetectionResult(error=f"Blad YOLO: {exc}")
boxes = []
names = getattr(self.model, "names", {})
for result in results:
for box in result.boxes:
x1, y1, x2, y2 = [int(v) for v in box.xyxy[0].tolist()]
confidence = float(box.conf[0])
class_id = int(box.cls[0]) if box.cls is not None else -1
class_name = names.get(class_id, str(class_id)) if isinstance(names, dict) else str(class_id)
boxes.append(
{
"xyxy": (x1, y1, x2, y2),
"confidence": confidence,
"class_name": class_name,
}
)
if not boxes:
return DetectionResult(error="Nie wykryto etykiety")
boxes.sort(key=lambda item: item["confidence"], reverse=True)
selected = boxes[0]
result = DetectionResult(
xyxy=selected["xyxy"],
confidence=selected["confidence"],
class_name=selected["class_name"],
all_boxes=[
{
"xyxy": list(item["xyxy"]),
"confidence": item["confidence"],
"class_name": item["class_name"],
}
for item in boxes
],
)
return result
class TesseractOcr:
def __init__(self, config: dict[str, Any]) -> None:
self.config = config
self.load_error: str | None = None
self.pytesseract = None
self._load()
def _load(self) -> None:
if not self.config["ocr"].get("enabled", True):
return
try:
import pytesseract
command = self.config["ocr"].get("tesseract_cmd")
if command:
pytesseract.pytesseract.tesseract_cmd = command
self.pytesseract = pytesseract
except Exception as exc:
self.load_error = f"Nie mozna zaladowac pytesseract: {exc}"
def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> tuple[str, str | None]:
if not self.config["ocr"].get("enabled", True):
return "", None
if self.pytesseract is None:
return "", self.load_error or "OCR nie jest zaladowany"
x1, y1, x2, y2 = bbox
h, w = frame_bgr.shape[:2]
x1, y1 = max(0, x1), max(0, y1)
x2, y2 = min(w, x2), min(h, y2)
if x2 <= x1 or y2 <= y1:
return "", "Nieprawidlowy bbox OCR"
roi = frame_bgr[y1:y2, x1:x2]
scale = float(self.config["ocr"].get("scale", 1.0))
if scale != 1.0:
roi = cv2.resize(roi, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
if self.config["ocr"].get("threshold", True):
gray = cv2.GaussianBlur(gray, (3, 3), 0)
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
try:
text = self.pytesseract.image_to_string(
gray,
lang=self.config["ocr"].get("language", "eng"),
config="--psm 6",
)
except Exception as exc:
return "", f"Blad OCR: {exc}"
return text, None
class DetectionPipeline:
def __init__(self, config: dict[str, Any], app_config: Any) -> None:
self.config = config
self.detector = YoloLabelDetector(config, app_config)
self.ocr = TesseractOcr(config)
def process(self, frame_bgr: np.ndarray) -> DetectionResult:
result = self.detector.detect(frame_bgr)
if result.xyxy is None:
return result
text, ocr_error = self.ocr.read_label(frame_bgr, result.xyxy)
result.raw_text = text
result.parsed = parse_label_text(
text,
self.config["label_data"].get("colors", []),
self.config["label_data"].get("models", []),
)
if ocr_error:
result.error = ocr_error
return result

44
app/label_parser.py Normal file
View File

@@ -0,0 +1,44 @@
from __future__ import annotations
import re
from dataclasses import dataclass, asdict
ORDER_RE = re.compile(r"\b(?P<order>\d{4}/\d{4}/(?:[1-9]|[1-9]\d))\b")
@dataclass
class ParsedLabel:
order_number: str | None
color_code: str | None
product_model: str | None
raw_text: str
def to_dict(self) -> dict[str, str | None]:
return asdict(self)
def normalize_ocr_text(text: str) -> str:
return " ".join(text.replace("\n", " ").replace("\r", " ").split())
def parse_label_text(text: str, known_colors: list[str], known_models: list[str]) -> ParsedLabel:
normalized = normalize_ocr_text(text)
order_match = ORDER_RE.search(normalized)
normalized_upper = normalized.upper()
color_code = next(
(color for color in known_colors if color.upper() in normalized_upper),
None,
)
product_model = next(
(model for model in known_models if re.search(rf"\b{re.escape(model)}\b", normalized, re.I)),
None,
)
return ParsedLabel(
order_number=order_match.group("order") if order_match else None,
color_code=color_code,
product_model=product_model,
raw_text=normalized,
)

11
app/main.py Normal file
View File

@@ -0,0 +1,11 @@
from __future__ import annotations
import sys
from app.config import AppConfig
from app.main_window import run_app
def main() -> None:
app_config = AppConfig()
sys.exit(run_app(app_config))

292
app/main_window.py Normal file
View File

@@ -0,0 +1,292 @@
from __future__ import annotations
from datetime import datetime
from typing import Any
import cv2
import numpy as np
from PySide6.QtCore import Qt, Slot
from PySide6.QtGui import QAction, QImage, QPixmap
from PySide6.QtWidgets import (
QApplication,
QHBoxLayout,
QLabel,
QMainWindow,
QMessageBox,
QPushButton,
QTextEdit,
QToolButton,
QVBoxLayout,
QWidget,
QStyle,
)
from app.camera import CameraWorker
from app.config import AppConfig
from app.detection import DetectionResult
from app.media import MediaStore, VideoRecorder
from app.settings_dialog import SettingsDialog
class MainWindow(QMainWindow):
def __init__(self, app_config: AppConfig) -> None:
super().__init__()
self.app_config = app_config
self.config = app_config.data
self.last_frame: np.ndarray | None = None
self.overlay_result: DetectionResult | None = None
self.last_detection: DetectionResult | None = None
self.media_store = MediaStore(self.config, self.app_config)
self.video_recorder = VideoRecorder(self.config, self.app_config)
self.setWindowTitle("Duck Stain YOLO")
self.resize(1280, 720)
self._build_ui()
self.worker = CameraWorker(self.config, self.app_config)
self.worker.frame_ready.connect(self.on_frame_ready)
self.worker.detection_ready.connect(self.on_detection_ready)
self.worker.camera_error.connect(self.on_camera_error)
self.worker.start()
def _build_ui(self) -> None:
self.stage = QWidget()
self.setCentralWidget(self.stage)
self.video_label = QLabel(self.stage)
self.video_label.setAlignment(Qt.AlignCenter)
self.video_label.setStyleSheet("background: #111; color: #ddd;")
self.video_label.setText("Kamera")
self.result_panel = QWidget(self.stage)
self.result_panel.setObjectName("resultPanel")
self.result_panel.setStyleSheet(
"""
QWidget#resultPanel {
background: rgba(20, 20, 20, 170);
border-radius: 8px;
}
QTextEdit {
background: transparent;
color: white;
border: 0;
font-size: 13px;
}
QPushButton {
min-height: 28px;
padding: 4px 12px;
}
"""
)
panel_layout = QVBoxLayout(self.result_panel)
self.result_text = QTextEdit()
self.result_text.setReadOnly(True)
self.result_text.setFixedHeight(118)
panel_layout.addWidget(self.result_text)
panel_buttons = QHBoxLayout()
self.detect_button = QPushButton("wykryj")
self.ok_button = QPushButton("ok")
panel_buttons.addStretch(1)
panel_buttons.addWidget(self.detect_button)
panel_buttons.addWidget(self.ok_button)
panel_layout.addLayout(panel_buttons)
self.detect_button.clicked.connect(self.start_detection)
self.ok_button.clicked.connect(self.accept_detection)
self.toolbar = QWidget(self.stage)
self.toolbar.setObjectName("bottomToolbar")
self.toolbar.setStyleSheet(
"""
QWidget#bottomToolbar {
background: rgba(20, 20, 20, 175);
border-radius: 8px;
}
QToolButton {
min-width: 44px;
min-height: 38px;
padding: 4px;
}
"""
)
toolbar_layout = QHBoxLayout(self.toolbar)
toolbar_layout.setContentsMargins(8, 6, 8, 6)
self.photo_button = self._tool_button(QStyle.SP_DialogSaveButton, "Zrob zdjecie")
self.record_button = self._tool_button(QStyle.SP_MediaPlay, "Start/stop nagrywania")
self.settings_button = self._tool_button(QStyle.SP_FileDialogDetailedView, "Ustawienia obrazu")
toolbar_layout.addWidget(self.photo_button)
toolbar_layout.addWidget(self.record_button)
toolbar_layout.addWidget(self.settings_button)
self.photo_button.clicked.connect(self.take_photo)
self.record_button.clicked.connect(self.toggle_recording)
self.settings_button.clicked.connect(self.open_settings)
quit_action = QAction("Zamknij", self)
quit_action.triggered.connect(self.close)
self.addAction(quit_action)
def _tool_button(self, icon_id: QStyle.StandardPixmap, tooltip: str) -> QToolButton:
button = QToolButton()
button.setIcon(self.style().standardIcon(icon_id))
button.setToolTip(tooltip)
return button
def resizeEvent(self, event: Any) -> None:
super().resizeEvent(event)
self.video_label.setGeometry(self.stage.rect())
panel_width = min(420, max(280, self.stage.width() // 3))
self.result_panel.setGeometry(self.stage.width() - panel_width - 18, 18, panel_width, 190)
self.toolbar.adjustSize()
toolbar_size = self.toolbar.sizeHint()
self.toolbar.setGeometry(
(self.stage.width() - toolbar_size.width()) // 2,
self.stage.height() - toolbar_size.height() - 18,
toolbar_size.width(),
toolbar_size.height(),
)
def closeEvent(self, event: Any) -> None:
if self.video_recorder.is_recording:
self.video_recorder.stop(self.current_metadata("video"))
self.worker.stop()
self.worker.wait(2000)
super().closeEvent(event)
@Slot(object)
def on_frame_ready(self, frame: np.ndarray) -> None:
self.last_frame = frame.copy()
if self.video_recorder.is_recording:
self.video_recorder.write(frame)
self._show_frame(frame)
@Slot(object)
def on_detection_ready(self, result: DetectionResult) -> None:
self.last_detection = result
self.overlay_result = result if result.xyxy else None
self._update_result_text(result)
@Slot(str)
def on_camera_error(self, message: str) -> None:
self.result_text.setPlainText(message)
def start_detection(self) -> None:
self.overlay_result = None
self.result_text.setPlainText("Wykrywanie...")
self.worker.start_detection()
def accept_detection(self) -> None:
self.worker.accept_detection()
self.overlay_result = None
if self.last_detection:
self._update_result_text(self.last_detection, accepted=True)
def take_photo(self) -> None:
if self.last_frame is None:
QMessageBox.warning(self, "Zdjecie", "Brak klatki z kamery")
return
path = self.media_store.save_photo(self.last_frame, self.current_metadata("photo"))
self.statusBar().showMessage(f"Zapisano zdjecie: {path}", 5000)
def toggle_recording(self) -> None:
if self.last_frame is None:
QMessageBox.warning(self, "Wideo", "Brak klatki z kamery")
return
if self.video_recorder.is_recording:
path = self.video_recorder.stop(self.current_metadata("video"))
self.record_button.setIcon(self.style().standardIcon(QStyle.SP_MediaPlay))
self.statusBar().showMessage(f"Zapisano film: {path}", 5000)
return
try:
path = self.video_recorder.start(self.last_frame)
except RuntimeError as exc:
QMessageBox.warning(self, "Wideo", str(exc))
return
self.record_button.setIcon(self.style().standardIcon(QStyle.SP_MediaStop))
self.statusBar().showMessage(f"Nagrywanie: {path}", 5000)
def open_settings(self) -> None:
dialog = SettingsDialog(self.config, self)
dialog.settings_saved.connect(self.save_camera_settings)
dialog.exec()
@Slot(dict)
def save_camera_settings(self, camera_config: dict[str, Any]) -> None:
self.config["camera"] = camera_config
self.app_config.save(self.config)
self.worker.update_camera_config(camera_config)
def current_metadata(self, media_type: str) -> dict[str, Any]:
return {
"media_type": media_type,
"created_at": datetime.now().isoformat(timespec="seconds"),
"detection": self.last_detection.to_metadata() if self.last_detection else None,
"camera": {
"width": self.config["camera"].get("width"),
"height": self.config["camera"].get("height"),
"fps": self.config["camera"].get("fps"),
"properties": self.config["camera"].get("properties", {}),
},
"detection_config": self.config.get("detection", {}),
}
def _update_result_text(self, result: DetectionResult, accepted: bool = False) -> None:
status = "Zatwierdzono" if accepted else "Wynik"
lines = [status]
if result.error:
lines.append(f"Komunikat: {result.error}")
if result.confidence is not None:
lines.append(f"YOLO confidence: {result.confidence:.3f}")
if result.parsed:
lines.append(f"Zamowienie: {result.parsed.order_number or '-'}")
lines.append(f"Kolor: {result.parsed.color_code or '-'}")
lines.append(f"Model: {result.parsed.product_model or '-'}")
if result.raw_text:
lines.append("")
lines.append(result.raw_text)
self.result_text.setPlainText("\n".join(lines))
def _show_frame(self, frame_bgr: np.ndarray) -> None:
display_frame = frame_bgr.copy()
if self.overlay_result is not None:
self._draw_detection(display_frame, self.overlay_result)
frame_rgb = cv2.cvtColor(display_frame, cv2.COLOR_BGR2RGB)
h, w, channels = frame_rgb.shape
image = QImage(frame_rgb.data, w, h, channels * w, QImage.Format_RGB888).copy()
pixmap = QPixmap.fromImage(image)
self.video_label.setPixmap(
pixmap.scaled(self.video_label.size(), Qt.KeepAspectRatio, Qt.SmoothTransformation)
)
def _draw_detection(self, frame_bgr: np.ndarray, result: DetectionResult) -> None:
mode = self.config["detection"].get("mode", "best")
boxes = result.all_boxes if mode == "all" else [result.to_metadata()]
for item in boxes:
xyxy = item.get("xyxy") or item.get("bbox_xyxy")
if not xyxy:
continue
x1, y1, x2, y2 = [int(value) for value in xyxy]
confidence = item.get("confidence")
class_name = item.get("class_name") or "label"
cv2.rectangle(frame_bgr, (x1, y1), (x2, y2), (0, 220, 0), 3)
caption = f"{class_name} {confidence:.2f}" if confidence is not None else class_name
cv2.putText(
frame_bgr,
caption,
(x1, max(24, y1 - 8)),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,
(0, 220, 0),
2,
cv2.LINE_AA,
)
def run_app(app_config: AppConfig) -> int:
app = QApplication([])
window = MainWindow(app_config)
window.show()
return app.exec()

101
app/media.py Normal file
View File

@@ -0,0 +1,101 @@
from __future__ import annotations
import json
from datetime import datetime
from pathlib import Path
from typing import Any
import cv2
import numpy as np
def timestamp_name() -> str:
return datetime.now().strftime("%Y%m%d_%H%M%S")
def write_metadata(media_path: Path, metadata: dict[str, Any]) -> Path:
json_path = media_path.with_suffix(".json")
with json_path.open("w", encoding="utf-8") as metadata_file:
json.dump(metadata, metadata_file, indent=2, ensure_ascii=False)
metadata_file.write("\n")
return json_path
class MediaStore:
def __init__(self, config: dict[str, Any], app_config: Any) -> None:
self.config = config
self.app_config = app_config
def photo_path(self) -> Path:
capture_cfg = self.config["capture"]
directory = self.app_config.resolve_path(capture_cfg["photos_dir"])
directory.mkdir(parents=True, exist_ok=True)
extension = capture_cfg.get("image_extension", "jpg").lstrip(".")
return directory / f"{timestamp_name()}.{extension}"
def video_path(self) -> Path:
capture_cfg = self.config["capture"]
directory = self.app_config.resolve_path(capture_cfg["videos_dir"])
directory.mkdir(parents=True, exist_ok=True)
extension = capture_cfg.get("video_extension", "mp4").lstrip(".")
return directory / f"{timestamp_name()}.{extension}"
def save_photo(self, frame_bgr: np.ndarray, metadata: dict[str, Any]) -> Path:
path = self.photo_path()
cv2.imwrite(str(path), frame_bgr)
write_metadata(path, metadata)
return path
class VideoRecorder:
def __init__(self, config: dict[str, Any], app_config: Any) -> None:
self.config = config
self.app_config = app_config
self.path: Path | None = None
self.writer: cv2.VideoWriter | None = None
self.started_at: str | None = None
@property
def is_recording(self) -> bool:
return self.writer is not None
def start(self, frame_bgr: np.ndarray) -> Path:
if self.writer is not None:
raise RuntimeError("Nagrywanie juz trwa")
capture_cfg = self.config["capture"]
self.path = MediaStore(self.config, self.app_config).video_path()
h, w = frame_bgr.shape[:2]
fps = float(self.config["camera"].get("fps", 30))
codec = str(capture_cfg.get("video_codec", "mp4v"))
fourcc = cv2.VideoWriter_fourcc(*codec[:4])
self.writer = cv2.VideoWriter(str(self.path), fourcc, fps, (w, h))
if not self.writer.isOpened():
self.writer = None
raise RuntimeError("Nie mozna uruchomic zapisu wideo")
self.started_at = datetime.now().isoformat(timespec="seconds")
self.write(frame_bgr)
return self.path
def write(self, frame_bgr: np.ndarray) -> None:
if self.writer is not None:
self.writer.write(frame_bgr)
def stop(self, metadata: dict[str, Any]) -> Path | None:
if self.writer is None:
return None
self.writer.release()
self.writer = None
path = self.path
if path is not None:
metadata = {
**metadata,
"recording": {
"started_at": self.started_at,
"stopped_at": datetime.now().isoformat(timespec="seconds"),
},
}
write_metadata(path, metadata)
self.path = None
self.started_at = None
return path

125
app/settings_dialog.py Normal file
View File

@@ -0,0 +1,125 @@
from __future__ import annotations
from copy import deepcopy
from typing import Any
from PySide6.QtCore import Qt, Signal
from PySide6.QtWidgets import (
QCheckBox,
QDialog,
QFormLayout,
QHBoxLayout,
QLabel,
QPushButton,
QSlider,
QSpinBox,
QVBoxLayout,
QWidget,
)
CAMERA_PROPERTY_LABELS = {
"brightness": "Jasnosc",
"contrast": "Kontrast",
"saturation": "Nasycenie",
"hue": "Barwa",
"gain": "Gain",
"exposure": "Ekspozycja",
"sharpness": "Ostrosc",
"auto_exposure": "Auto ekspozycja",
"focus": "Focus",
"auto_focus": "Auto focus",
}
class PropertySlider(QWidget):
value_changed = Signal(str, object)
def __init__(self, name: str, value: float | None) -> None:
super().__init__()
self.name = name
self.enabled_box = QCheckBox()
self.enabled_box.setChecked(value is not None)
self.slider = QSlider(Qt.Horizontal)
self.slider.setRange(-100, 100)
self.slider.setValue(int(value) if value is not None else 0)
self.value_box = QSpinBox()
self.value_box.setRange(-100, 100)
self.value_box.setValue(self.slider.value())
layout = QHBoxLayout(self)
layout.setContentsMargins(0, 0, 0, 0)
layout.addWidget(self.enabled_box)
layout.addWidget(self.slider, 1)
layout.addWidget(self.value_box)
self.slider.valueChanged.connect(self.value_box.setValue)
self.value_box.valueChanged.connect(self.slider.setValue)
self.slider.valueChanged.connect(self._emit_value)
self.enabled_box.toggled.connect(self._emit_value)
def _emit_value(self) -> None:
self.value_changed.emit(self.name, self.value())
def value(self) -> float | None:
if not self.enabled_box.isChecked():
return None
return float(self.slider.value())
class SettingsDialog(QDialog):
settings_saved = Signal(dict)
def __init__(self, config: dict[str, Any], parent: QWidget | None = None) -> None:
super().__init__(parent)
self.setWindowTitle("Ustawienia obrazu")
self.setMinimumWidth(520)
self.config = deepcopy(config)
self.property_widgets: dict[str, PropertySlider] = {}
camera_cfg = self.config["camera"]
main_layout = QVBoxLayout(self)
form = QFormLayout()
self.width_box = QSpinBox()
self.width_box.setRange(160, 7680)
self.width_box.setValue(int(camera_cfg.get("width", 1920)))
self.height_box = QSpinBox()
self.height_box.setRange(120, 4320)
self.height_box.setValue(int(camera_cfg.get("height", 1080)))
self.fps_box = QSpinBox()
self.fps_box.setRange(1, 240)
self.fps_box.setValue(int(camera_cfg.get("fps", 30)))
form.addRow("Szerokosc", self.width_box)
form.addRow("Wysokosc", self.height_box)
form.addRow("FPS", self.fps_box)
for name, label in CAMERA_PROPERTY_LABELS.items():
widget = PropertySlider(name, camera_cfg.get("properties", {}).get(name))
self.property_widgets[name] = widget
form.addRow(QLabel(label), widget)
main_layout.addLayout(form)
buttons = QHBoxLayout()
buttons.addStretch(1)
cancel_button = QPushButton("Anuluj")
save_button = QPushButton("Zapisz")
save_button.setDefault(True)
buttons.addWidget(cancel_button)
buttons.addWidget(save_button)
main_layout.addLayout(buttons)
cancel_button.clicked.connect(self.reject)
save_button.clicked.connect(self._save)
def _save(self) -> None:
self.config["camera"]["width"] = int(self.width_box.value())
self.config["camera"]["height"] = int(self.height_box.value())
self.config["camera"]["fps"] = int(self.fps_box.value())
self.config["camera"]["properties"] = {
name: widget.value() for name, widget in self.property_widgets.items()
}
self.settings_saved.emit(self.config["camera"])
self.accept()

53
app_config.json Normal file
View File

@@ -0,0 +1,53 @@
{
"camera": {
"index": 0,
"width": 1920,
"height": 1080,
"fps": 30,
"backend": "auto",
"properties": {
"brightness": null,
"contrast": null,
"saturation": null,
"hue": null,
"gain": null,
"exposure": null,
"sharpness": null,
"auto_exposure": null,
"focus": null,
"auto_focus": null
}
},
"detection": {
"model_path": "models/best.pt",
"confidence_threshold": 0.25,
"mode": "best",
"frame_stride": 5,
"image_size": 640,
"device": "cpu"
},
"ocr": {
"enabled": true,
"language": "eng",
"tesseract_cmd": null,
"threshold": true,
"scale": 2.0
},
"capture": {
"photos_dir": "captures/photos",
"videos_dir": "captures/videos",
"image_extension": "jpg",
"video_extension": "mp4",
"video_codec": "mp4v"
},
"label_data": {
"models": [
"Regius",
"Duvell"
],
"colors": [
"T-NF-BLK-OUT-BST-G",
"T-BLK-G"
]
}
}

1
captures/photos/.gitkeep Normal file
View File

@@ -0,0 +1 @@

1
captures/videos/.gitkeep Normal file
View File

@@ -0,0 +1 @@

5
main.py Normal file
View File

@@ -0,0 +1,5 @@
from app.main import main
if __name__ == "__main__":
main()

1
models/.gitkeep Normal file
View File

@@ -0,0 +1 @@

15
pyproject.toml Normal file
View File

@@ -0,0 +1,15 @@
[project]
name = "duck-stain-yolo"
version = "0.1.0"
description = "PySide6 camera app for YOLO label detection and OCR metadata capture."
requires-python = ">=3.10"
dependencies = [
"PySide6>=6.6",
"opencv-python>=4.8",
"ultralytics>=8.0",
"pytesseract>=0.3",
"numpy>=1.26",
]
[tool.ruff]
line-length = 100

9
requirements.txt Normal file
View File

@@ -0,0 +1,9 @@
--extra-index-url https://download.pytorch.org/whl/cpu
PySide6>=6.6
opencv-python>=4.8
torch==2.5.1+cpu; platform_system == "Linux"
torchvision==0.20.1+cpu; platform_system == "Linux"
ultralytics>=8.0
pytesseract>=0.3
numpy>=1.26