feat: Add video playback functionality and inference support

- Introduced VideoPlayer class to handle local video playback, emitting frames via frame_ready signal.
- Updated MainWindow to switch between camera and video sources, integrating video playback controls.
- Enhanced AppMenuBar with options to open video files and manage inference models.
- Implemented BboxOverlay for displaying detection results on video frames.
- Added InferenceManager to manage YOLO inference in a separate process, with error handling and restart logic.
- Created tests for BboxOverlay and InferenceManager to ensure functionality and robustness.
- Updated pyproject.toml to include optional dependencies for inference support.
This commit is contained in:
2026-05-13 21:30:13 +02:00
parent ac51498b7a
commit e9b474b1ed
14 changed files with 1524 additions and 49 deletions

View File

@@ -0,0 +1,154 @@
"""BboxOverlay — draws YOLO detection bounding boxes on the camera view."""
from __future__ import annotations
import logging
from typing import NamedTuple
from PySide6.QtCore import QRect, QSize, Qt, Slot
from PySide6.QtGui import QColor, QFont, QPainter, QPen
from app.config import (
BBOX_COLOR,
BBOX_FONT_SIZE,
BBOX_LABEL_BG_COLOR,
BBOX_LABEL_TEXT_COLOR,
BBOX_LINE_WIDTH,
)
from app.overlay.overlay_layer import IOverlayLayer
logger = logging.getLogger(__name__)
class Detection(NamedTuple):
"""
A single object detection result.
Coordinates (x1, y1, x2, y2) are in pixels of the *source frame*
(i.e. the frame that was submitted to inference). BboxOverlay maps
them to the letterboxed video_rect before drawing.
"""
x1: float
y1: float
x2: float
y2: float
conf: float
label: str
class BboxOverlay(IOverlayLayer):
"""
Overlay layer that renders detection bounding boxes.
Usage:
overlay = BboxOverlay()
camera_view.add_overlay_layer(overlay)
inference_manager.detections_ready.connect(overlay.on_detections)
Thread safety:
on_detections() is called from the GUI thread (via Qt signal).
paint() is also called from the GUI thread (paintEvent).
No locks required.
"""
def __init__(self) -> None:
super().__init__()
self._detections: list[Detection] = []
self._source_size: QSize = QSize(0, 0)
self._pen = QPen(QColor(*BBOX_COLOR))
self._pen.setWidth(BBOX_LINE_WIDTH)
self._pen.setJoinStyle(Qt.PenJoinStyle.MiterJoin)
self._font = QFont("Monospace")
self._font.setStyleHint(QFont.StyleHint.TypeWriter)
self._font.setPointSize(BBOX_FONT_SIZE)
self._font.setBold(True)
self._box_color = QColor(*BBOX_COLOR)
self._bg_color = QColor(*BBOX_LABEL_BG_COLOR)
self._text_color = QColor(*BBOX_LABEL_TEXT_COLOR)
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
@Slot(object, object)
def on_detections(
self,
detections: list[Detection],
source_size: tuple[int, int],
) -> None:
"""
Receive detection results from InferenceManager.
Args:
detections: List of Detection namedtuples (pixel coords).
source_size: (width, height) of the frame that was inferred.
"""
self._detections = detections
self._source_size = QSize(*source_size)
def clear(self) -> None:
"""Remove all currently displayed detections."""
self._detections = []
# ------------------------------------------------------------------
# IOverlayLayer implementation
# ------------------------------------------------------------------
def paint(self, painter: QPainter, video_rect: QRect) -> None:
if not self._detections:
return
if self._source_size.isEmpty():
return
src_w = self._source_size.width()
src_h = self._source_size.height()
vr = video_rect
# Scale factors: source-pixel → video_rect-pixel
scale_x = vr.width() / src_w
scale_y = vr.height() / src_h
painter.setFont(self._font)
fm = painter.fontMetrics()
for det in self._detections:
# Map to widget coordinates
wx1 = vr.x() + int(det.x1 * scale_x)
wy1 = vr.y() + int(det.y1 * scale_y)
wx2 = vr.x() + int(det.x2 * scale_x)
wy2 = vr.y() + int(det.y2 * scale_y)
box_rect = QRect(wx1, wy1, wx2 - wx1, wy2 - wy1)
# Draw bounding box
painter.setPen(self._pen)
painter.setBrush(Qt.BrushStyle.NoBrush)
painter.drawRect(box_rect)
# Label text: "label 0.87"
label_text = f"{det.label} {det.conf:.2f}"
text_w = fm.horizontalAdvance(label_text) + 6
text_h = fm.height() + 2
# Position label above box, clamped to video_rect
lx = wx1
ly = wy1 - text_h
if ly < vr.top():
ly = wy1 # draw inside box if no room above
label_bg = QRect(lx, ly, text_w, text_h)
painter.setPen(Qt.PenStyle.NoPen)
painter.setBrush(self._bg_color)
painter.drawRect(label_bg)
painter.setPen(QPen(self._text_color))
painter.drawText(
lx + 3,
ly + fm.ascent() + 1,
label_text,
)