feat: Improve frame conversion strategy and logging in InferenceManager

2026-05-13 21:47:43 +02:00
parent e9b474b1ed
commit 3b8f7eb3d4
2 changed files with 31 additions and 34 deletions
--- a/app/inference/worker_manager.py
+++ b/app/inference/worker_manager.py
@@ -144,6 +144,12 @@ class InferenceManager(QObject):
          - manager is not running
          - manager is paused
          - worker is still busy with previous frame (drop_if_busy)
        Frame conversion strategy:
          Use QVideoFrame.toImage() → QImage.Format_RGB32 → bits().
          This handles all pixel formats (NV12, YUV420P, BGRA, MJPG, etc.)
          because Qt decodes them internally.  The cost is a CPU colour-space
          conversion, but it only happens when the worker is idle (drop_if_busy).
        """
        if not self.is_running or self._paused or self._busy:
            return
@@ -151,44 +157,35 @@ class InferenceManager(QObject):
        if not frame.isValid():
            return
-        # Map frame to read-only memory, copy raw bytes, unmap
+        # Convert frame to RGB via Qt's built-in decoder.
-        if not frame.map(QVideoFrame.MapMode.ReadOnly):
+        # toImage() handles NV12, YUV420P, MJPG, BGRA — any pixel format.
-            logger.warning("InferenceManager: failed to map QVideoFrame")
+        image = frame.toImage()
        if image.isNull():
            logger.warning("InferenceManager: toImage() returned null")
            return
        width = image.width()
        height = image.height()
        # Ensure we have packed RGB32 (BGRX on little-endian, 4 bytes/pixel)
        from PySide6.QtGui import QImage  # noqa: PLC0415
        if image.format() != QImage.Format.Format_RGB32:
            image = image.convertToFormat(QImage.Format.Format_RGB32)
        # Extract RGB bytes (drop alpha/padding channel)
        try:
-            width = frame.width()
+            import numpy as np  # noqa: PLC0415
-            height = frame.height()
+            # bits() returns BGRX (B G R 0xFF) for Format_RGB32
-            raw = bytes(frame.bits(0))   # plane 0 — copies data
+            ptr = image.bits()
-        finally:
+            arr = np.frombuffer(ptr, dtype=np.uint8).reshape((height, width, 4))
-            frame.unmap()
+            # Swap B↔R and drop X → RGB
-
+            rgb = arr[:, :, [2, 1, 0]].copy()
-        if not raw:
+            raw = rgb.tobytes()
        except Exception as exc:
            logger.warning("InferenceManager: frame conversion failed: %s", exc)
            return
-        # Detect number of channels from byte count
+        channels = 3
        expected_rgb = width * height * 3
        expected_rgba = width * height * 4
        if len(raw) >= expected_rgba:
            # BGRA / RGBA — convert to RGB by stripping alpha and swapping B/R
            try:
                import numpy as np  # noqa: PLC0415
                arr = np.frombuffer(raw, dtype=np.uint8).reshape((height, width, 4))
                # Qt delivers BGRA → swap to RGB
                rgb = arr[:, :, [2, 1, 0]].copy()
                raw = rgb.tobytes()
                channels = 3
            except Exception as exc:
                logger.warning("Frame colour conversion failed: %s", exc)
                return
        elif len(raw) >= expected_rgb:
            channels = 3
        else:
            logger.warning(
                "Unexpected frame size: %d bytes for %dx%d",
                len(raw), width, height,
            )
            return
        self._frame_id += 1
        packet = FramePacket(
--- a/app/pipeline/frame_dispatcher.py
+++ b/app/pipeline/frame_dispatcher.py
@@ -69,7 +69,7 @@ class FrameDispatcher(QObject):
        if len(self._subscribers) < before:
            logger.debug("Subscriber removed: %r", callback)
        else:
-            logger.warning("Subscriber not found for removal: %r", callback)
+            logger.debug("Subscriber not found for removal: %r", callback)
    def subscriber_count(self) -> int:
        return len(self._subscribers)