feat: Add video playback functionality and inference support

- Introduced VideoPlayer class to handle local video playback, emitting frames via frame_ready signal. - Updated MainWindow to switch between camera and video sources, integrating video playback controls. - Enhanced AppMenuBar with options to open video files and manage inference models. - Implemented BboxOverlay for displaying detection results on video frames. - Added InferenceManager to manage YOLO inference in a separate process, with error handling and restart logic. - Created tests for BboxOverlay and InferenceManager to ensure functionality and robustness. - Updated pyproject.toml to include optional dependencies for inference support.
2026-05-13 21:30:13 +02:00
parent ac51498b7a
commit e9b474b1ed
14 changed files with 1524 additions and 49 deletions
--- a/app/inference/bbox_overlay.py
+++ b/app/inference/bbox_overlay.py
@@ -0,0 +1,154 @@
+"""BboxOverlay — draws YOLO detection bounding boxes on the camera view."""
+
+from __future__ import annotations
+
+import logging
+from typing import NamedTuple
+
+from PySide6.QtCore import QRect, QSize, Qt, Slot
+from PySide6.QtGui import QColor, QFont, QPainter, QPen
+
+from app.config import (
+    BBOX_COLOR,
+    BBOX_FONT_SIZE,
+    BBOX_LABEL_BG_COLOR,
+    BBOX_LABEL_TEXT_COLOR,
+    BBOX_LINE_WIDTH,
+)
+from app.overlay.overlay_layer import IOverlayLayer
+
+logger = logging.getLogger(__name__)
+
+
+class Detection(NamedTuple):
+    """
+    A single object detection result.
+
+    Coordinates (x1, y1, x2, y2) are in pixels of the *source frame*
+    (i.e. the frame that was submitted to inference).  BboxOverlay maps
+    them to the letterboxed video_rect before drawing.
+    """
+
+    x1: float
+    y1: float
+    x2: float
+    y2: float
+    conf: float
+    label: str
+
+
+class BboxOverlay(IOverlayLayer):
+    """
+    Overlay layer that renders detection bounding boxes.
+
+    Usage:
+        overlay = BboxOverlay()
+        camera_view.add_overlay_layer(overlay)
+        inference_manager.detections_ready.connect(overlay.on_detections)
+
+    Thread safety:
+        on_detections() is called from the GUI thread (via Qt signal).
+        paint() is also called from the GUI thread (paintEvent).
+        No locks required.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        self._detections: list[Detection] = []
+        self._source_size: QSize = QSize(0, 0)
+
+        self._pen = QPen(QColor(*BBOX_COLOR))
+        self._pen.setWidth(BBOX_LINE_WIDTH)
+        self._pen.setJoinStyle(Qt.PenJoinStyle.MiterJoin)
+
+        self._font = QFont("Monospace")
+        self._font.setStyleHint(QFont.StyleHint.TypeWriter)
+        self._font.setPointSize(BBOX_FONT_SIZE)
+        self._font.setBold(True)
+
+        self._box_color = QColor(*BBOX_COLOR)
+        self._bg_color = QColor(*BBOX_LABEL_BG_COLOR)
+        self._text_color = QColor(*BBOX_LABEL_TEXT_COLOR)
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    @Slot(object, object)
+    def on_detections(
+        self,
+        detections: list[Detection],
+        source_size: tuple[int, int],
+    ) -> None:
+        """
+        Receive detection results from InferenceManager.
+
+        Args:
+            detections:  List of Detection namedtuples (pixel coords).
+            source_size: (width, height) of the frame that was inferred.
+        """
+        self._detections = detections
+        self._source_size = QSize(*source_size)
+
+    def clear(self) -> None:
+        """Remove all currently displayed detections."""
+        self._detections = []
+
+    # ------------------------------------------------------------------
+    # IOverlayLayer implementation
+    # ------------------------------------------------------------------
+
+    def paint(self, painter: QPainter, video_rect: QRect) -> None:
+        if not self._detections:
+            return
+        if self._source_size.isEmpty():
+            return
+
+        src_w = self._source_size.width()
+        src_h = self._source_size.height()
+        vr = video_rect
+
+        # Scale factors: source-pixel → video_rect-pixel
+        scale_x = vr.width() / src_w
+        scale_y = vr.height() / src_h
+
+        painter.setFont(self._font)
+        fm = painter.fontMetrics()
+
+        for det in self._detections:
+            # Map to widget coordinates
+            wx1 = vr.x() + int(det.x1 * scale_x)
+            wy1 = vr.y() + int(det.y1 * scale_y)
+            wx2 = vr.x() + int(det.x2 * scale_x)
+            wy2 = vr.y() + int(det.y2 * scale_y)
+
+            box_rect = QRect(wx1, wy1, wx2 - wx1, wy2 - wy1)
+
+            # Draw bounding box
+            painter.setPen(self._pen)
+            painter.setBrush(Qt.BrushStyle.NoBrush)
+            painter.drawRect(box_rect)
+
+            # Label text: "label 0.87"
+            label_text = f"{det.label} {det.conf:.2f}"
+            text_w = fm.horizontalAdvance(label_text) + 6
+            text_h = fm.height() + 2
+
+            # Position label above box, clamped to video_rect
+            lx = wx1
+            ly = wy1 - text_h
+            if ly < vr.top():
+                ly = wy1  # draw inside box if no room above
+
+            label_bg = QRect(lx, ly, text_w, text_h)
+
+            painter.setPen(Qt.PenStyle.NoPen)
+            painter.setBrush(self._bg_color)
+            painter.drawRect(label_bg)
+
+            painter.setPen(QPen(self._text_color))
+            painter.drawText(
+                lx + 3,
+                ly + fm.ascent() + 1,
+                label_text,
+            )