duck-preview/app/inference/bbox_overlay.py

"""BboxOverlay — draws YOLO detection bounding boxes on the camera view."""

from __future__ import annotations

import logging
from typing import NamedTuple

from PySide6.QtCore import QRect, QSize, Qt, Slot
from PySide6.QtGui import QColor, QFont, QPainter, QPen

from app.config import (
    BBOX_COLOR,
    BBOX_FONT_SIZE,
    BBOX_LABEL_BG_COLOR,
    BBOX_LABEL_TEXT_COLOR,
    BBOX_LINE_WIDTH,
)
from app.overlay.overlay_layer import IOverlayLayer

logger = logging.getLogger(__name__)


class Detection(NamedTuple):
    """
    A single object detection result.

    Coordinates (x1, y1, x2, y2) are in pixels of the *source frame*
    (i.e. the frame that was submitted to inference).  BboxOverlay maps
    them to the letterboxed video_rect before drawing.
    """

    x1: float
    y1: float
    x2: float
    y2: float
    conf: float
    label: str


class BboxOverlay(IOverlayLayer):
    """
    Overlay layer that renders detection bounding boxes.

    Usage:
        overlay = BboxOverlay()
        camera_view.add_overlay_layer(overlay)
        inference_manager.detections_ready.connect(overlay.on_detections)

    Thread safety:
        on_detections() is called from the GUI thread (via Qt signal).
        paint() is also called from the GUI thread (paintEvent).
        No locks required.
    """

    def __init__(self) -> None:
        super().__init__()
        self._detections: list[Detection] = []
        self._source_size: QSize = QSize(0, 0)

        self._pen = QPen(QColor(*BBOX_COLOR))
        self._pen.setWidth(BBOX_LINE_WIDTH)
        self._pen.setJoinStyle(Qt.PenJoinStyle.MiterJoin)

        self._font = QFont("Monospace")
        self._font.setStyleHint(QFont.StyleHint.TypeWriter)
        self._font.setPointSize(BBOX_FONT_SIZE)
        self._font.setBold(True)

        self._box_color = QColor(*BBOX_COLOR)
        self._bg_color = QColor(*BBOX_LABEL_BG_COLOR)
        self._text_color = QColor(*BBOX_LABEL_TEXT_COLOR)

    # ------------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------------

    @Slot(object, object)
    def on_detections(
        self,
        detections: list[Detection],
        source_size: tuple[int, int],
    ) -> None:
        """
        Receive detection results from InferenceManager.

        Args:
            detections:  List of Detection namedtuples (pixel coords).
            source_size: (width, height) of the frame that was inferred.
        """
        self._detections = detections
        self._source_size = QSize(*source_size)

    def clear(self) -> None:
        """Remove all currently displayed detections."""
        self._detections = []

    # ------------------------------------------------------------------
    # IOverlayLayer implementation
    # ------------------------------------------------------------------

    def paint(self, painter: QPainter, video_rect: QRect) -> None:
        if not self._detections:
            return
        if self._source_size.isEmpty():
            return

        src_w = self._source_size.width()
        src_h = self._source_size.height()
        vr = video_rect

        # Scale factors: source-pixel → video_rect-pixel
        scale_x = vr.width() / src_w
        scale_y = vr.height() / src_h

        painter.setFont(self._font)
        fm = painter.fontMetrics()

        for det in self._detections:
            # Map to widget coordinates
            wx1 = vr.x() + int(det.x1 * scale_x)
            wy1 = vr.y() + int(det.y1 * scale_y)
            wx2 = vr.x() + int(det.x2 * scale_x)
            wy2 = vr.y() + int(det.y2 * scale_y)

            box_rect = QRect(wx1, wy1, wx2 - wx1, wy2 - wy1)

            # Draw bounding box
            painter.setPen(self._pen)
            painter.setBrush(Qt.BrushStyle.NoBrush)
            painter.drawRect(box_rect)

            # Label text: "label 0.87"
            label_text = f"{det.label} {det.conf:.2f}"
            text_w = fm.horizontalAdvance(label_text) + 6
            text_h = fm.height() + 2

            # Position label above box, clamped to video_rect
            lx = wx1
            ly = wy1 - text_h
            if ly < vr.top():
                ly = wy1  # draw inside box if no room above

            label_bg = QRect(lx, ly, text_w, text_h)

            painter.setPen(Qt.PenStyle.NoPen)
            painter.setBrush(self._bg_color)
            painter.drawRect(label_bg)

            painter.setPen(QPen(self._text_color))
            painter.drawText(
                lx + 3,
                ly + fm.ascent() + 1,
                label_text,
            )