feat: Add video playback functionality and inference support
- Introduced VideoPlayer class to handle local video playback, emitting frames via frame_ready signal. - Updated MainWindow to switch between camera and video sources, integrating video playback controls. - Enhanced AppMenuBar with options to open video files and manage inference models. - Implemented BboxOverlay for displaying detection results on video frames. - Added InferenceManager to manage YOLO inference in a separate process, with error handling and restart logic. - Created tests for BboxOverlay and InferenceManager to ensure functionality and robustness. - Updated pyproject.toml to include optional dependencies for inference support.
This commit is contained in:
154
app/inference/bbox_overlay.py
Normal file
154
app/inference/bbox_overlay.py
Normal file
@@ -0,0 +1,154 @@
|
||||
"""BboxOverlay — draws YOLO detection bounding boxes on the camera view."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import NamedTuple
|
||||
|
||||
from PySide6.QtCore import QRect, QSize, Qt, Slot
|
||||
from PySide6.QtGui import QColor, QFont, QPainter, QPen
|
||||
|
||||
from app.config import (
|
||||
BBOX_COLOR,
|
||||
BBOX_FONT_SIZE,
|
||||
BBOX_LABEL_BG_COLOR,
|
||||
BBOX_LABEL_TEXT_COLOR,
|
||||
BBOX_LINE_WIDTH,
|
||||
)
|
||||
from app.overlay.overlay_layer import IOverlayLayer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Detection(NamedTuple):
|
||||
"""
|
||||
A single object detection result.
|
||||
|
||||
Coordinates (x1, y1, x2, y2) are in pixels of the *source frame*
|
||||
(i.e. the frame that was submitted to inference). BboxOverlay maps
|
||||
them to the letterboxed video_rect before drawing.
|
||||
"""
|
||||
|
||||
x1: float
|
||||
y1: float
|
||||
x2: float
|
||||
y2: float
|
||||
conf: float
|
||||
label: str
|
||||
|
||||
|
||||
class BboxOverlay(IOverlayLayer):
|
||||
"""
|
||||
Overlay layer that renders detection bounding boxes.
|
||||
|
||||
Usage:
|
||||
overlay = BboxOverlay()
|
||||
camera_view.add_overlay_layer(overlay)
|
||||
inference_manager.detections_ready.connect(overlay.on_detections)
|
||||
|
||||
Thread safety:
|
||||
on_detections() is called from the GUI thread (via Qt signal).
|
||||
paint() is also called from the GUI thread (paintEvent).
|
||||
No locks required.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._detections: list[Detection] = []
|
||||
self._source_size: QSize = QSize(0, 0)
|
||||
|
||||
self._pen = QPen(QColor(*BBOX_COLOR))
|
||||
self._pen.setWidth(BBOX_LINE_WIDTH)
|
||||
self._pen.setJoinStyle(Qt.PenJoinStyle.MiterJoin)
|
||||
|
||||
self._font = QFont("Monospace")
|
||||
self._font.setStyleHint(QFont.StyleHint.TypeWriter)
|
||||
self._font.setPointSize(BBOX_FONT_SIZE)
|
||||
self._font.setBold(True)
|
||||
|
||||
self._box_color = QColor(*BBOX_COLOR)
|
||||
self._bg_color = QColor(*BBOX_LABEL_BG_COLOR)
|
||||
self._text_color = QColor(*BBOX_LABEL_TEXT_COLOR)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@Slot(object, object)
|
||||
def on_detections(
|
||||
self,
|
||||
detections: list[Detection],
|
||||
source_size: tuple[int, int],
|
||||
) -> None:
|
||||
"""
|
||||
Receive detection results from InferenceManager.
|
||||
|
||||
Args:
|
||||
detections: List of Detection namedtuples (pixel coords).
|
||||
source_size: (width, height) of the frame that was inferred.
|
||||
"""
|
||||
self._detections = detections
|
||||
self._source_size = QSize(*source_size)
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Remove all currently displayed detections."""
|
||||
self._detections = []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# IOverlayLayer implementation
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def paint(self, painter: QPainter, video_rect: QRect) -> None:
|
||||
if not self._detections:
|
||||
return
|
||||
if self._source_size.isEmpty():
|
||||
return
|
||||
|
||||
src_w = self._source_size.width()
|
||||
src_h = self._source_size.height()
|
||||
vr = video_rect
|
||||
|
||||
# Scale factors: source-pixel → video_rect-pixel
|
||||
scale_x = vr.width() / src_w
|
||||
scale_y = vr.height() / src_h
|
||||
|
||||
painter.setFont(self._font)
|
||||
fm = painter.fontMetrics()
|
||||
|
||||
for det in self._detections:
|
||||
# Map to widget coordinates
|
||||
wx1 = vr.x() + int(det.x1 * scale_x)
|
||||
wy1 = vr.y() + int(det.y1 * scale_y)
|
||||
wx2 = vr.x() + int(det.x2 * scale_x)
|
||||
wy2 = vr.y() + int(det.y2 * scale_y)
|
||||
|
||||
box_rect = QRect(wx1, wy1, wx2 - wx1, wy2 - wy1)
|
||||
|
||||
# Draw bounding box
|
||||
painter.setPen(self._pen)
|
||||
painter.setBrush(Qt.BrushStyle.NoBrush)
|
||||
painter.drawRect(box_rect)
|
||||
|
||||
# Label text: "label 0.87"
|
||||
label_text = f"{det.label} {det.conf:.2f}"
|
||||
text_w = fm.horizontalAdvance(label_text) + 6
|
||||
text_h = fm.height() + 2
|
||||
|
||||
# Position label above box, clamped to video_rect
|
||||
lx = wx1
|
||||
ly = wy1 - text_h
|
||||
if ly < vr.top():
|
||||
ly = wy1 # draw inside box if no room above
|
||||
|
||||
label_bg = QRect(lx, ly, text_w, text_h)
|
||||
|
||||
painter.setPen(Qt.PenStyle.NoPen)
|
||||
painter.setBrush(self._bg_color)
|
||||
painter.drawRect(label_bg)
|
||||
|
||||
painter.setPen(QPen(self._text_color))
|
||||
painter.drawText(
|
||||
lx + 3,
|
||||
ly + fm.ascent() + 1,
|
||||
label_text,
|
||||
)
|
||||
Reference in New Issue
Block a user