diff --git a/app/inference/worker_manager.py b/app/inference/worker_manager.py index 7598cb7..24f71db 100644 --- a/app/inference/worker_manager.py +++ b/app/inference/worker_manager.py @@ -144,6 +144,12 @@ class InferenceManager(QObject): - manager is not running - manager is paused - worker is still busy with previous frame (drop_if_busy) + + Frame conversion strategy: + Use QVideoFrame.toImage() → QImage.Format_RGB32 → bits(). + This handles all pixel formats (NV12, YUV420P, BGRA, MJPG, etc.) + because Qt decodes them internally. The cost is a CPU colour-space + conversion, but it only happens when the worker is idle (drop_if_busy). """ if not self.is_running or self._paused or self._busy: return @@ -151,44 +157,35 @@ class InferenceManager(QObject): if not frame.isValid(): return - # Map frame to read-only memory, copy raw bytes, unmap - if not frame.map(QVideoFrame.MapMode.ReadOnly): - logger.warning("InferenceManager: failed to map QVideoFrame") + # Convert frame to RGB via Qt's built-in decoder. + # toImage() handles NV12, YUV420P, MJPG, BGRA — any pixel format. + image = frame.toImage() + if image.isNull(): + logger.warning("InferenceManager: toImage() returned null") return + width = image.width() + height = image.height() + + # Ensure we have packed RGB32 (BGRX on little-endian, 4 bytes/pixel) + from PySide6.QtGui import QImage # noqa: PLC0415 + if image.format() != QImage.Format.Format_RGB32: + image = image.convertToFormat(QImage.Format.Format_RGB32) + + # Extract RGB bytes (drop alpha/padding channel) try: - width = frame.width() - height = frame.height() - raw = bytes(frame.bits(0)) # plane 0 — copies data - finally: - frame.unmap() - - if not raw: + import numpy as np # noqa: PLC0415 + # bits() returns BGRX (B G R 0xFF) for Format_RGB32 + ptr = image.bits() + arr = np.frombuffer(ptr, dtype=np.uint8).reshape((height, width, 4)) + # Swap B↔R and drop X → RGB + rgb = arr[:, :, [2, 1, 0]].copy() + raw = rgb.tobytes() + except Exception as exc: + logger.warning("InferenceManager: frame conversion failed: %s", exc) return - # Detect number of channels from byte count - expected_rgb = width * height * 3 - expected_rgba = width * height * 4 - if len(raw) >= expected_rgba: - # BGRA / RGBA — convert to RGB by stripping alpha and swapping B/R - try: - import numpy as np # noqa: PLC0415 - arr = np.frombuffer(raw, dtype=np.uint8).reshape((height, width, 4)) - # Qt delivers BGRA → swap to RGB - rgb = arr[:, :, [2, 1, 0]].copy() - raw = rgb.tobytes() - channels = 3 - except Exception as exc: - logger.warning("Frame colour conversion failed: %s", exc) - return - elif len(raw) >= expected_rgb: - channels = 3 - else: - logger.warning( - "Unexpected frame size: %d bytes for %dx%d", - len(raw), width, height, - ) - return + channels = 3 self._frame_id += 1 packet = FramePacket( diff --git a/app/pipeline/frame_dispatcher.py b/app/pipeline/frame_dispatcher.py index 4fbbdc0..c507c40 100644 --- a/app/pipeline/frame_dispatcher.py +++ b/app/pipeline/frame_dispatcher.py @@ -69,7 +69,7 @@ class FrameDispatcher(QObject): if len(self._subscribers) < before: logger.debug("Subscriber removed: %r", callback) else: - logger.warning("Subscriber not found for removal: %r", callback) + logger.debug("Subscriber not found for removal: %r", callback) def subscriber_count(self) -> int: return len(self._subscribers)