feat: Improve frame conversion strategy and logging in InferenceManager

This commit is contained in:
2026-05-13 21:47:43 +02:00
parent e9b474b1ed
commit 3b8f7eb3d4
2 changed files with 31 additions and 34 deletions

View File

@@ -144,6 +144,12 @@ class InferenceManager(QObject):
- manager is not running - manager is not running
- manager is paused - manager is paused
- worker is still busy with previous frame (drop_if_busy) - worker is still busy with previous frame (drop_if_busy)
Frame conversion strategy:
Use QVideoFrame.toImage() → QImage.Format_RGB32 → bits().
This handles all pixel formats (NV12, YUV420P, BGRA, MJPG, etc.)
because Qt decodes them internally. The cost is a CPU colour-space
conversion, but it only happens when the worker is idle (drop_if_busy).
""" """
if not self.is_running or self._paused or self._busy: if not self.is_running or self._paused or self._busy:
return return
@@ -151,44 +157,35 @@ class InferenceManager(QObject):
if not frame.isValid(): if not frame.isValid():
return return
# Map frame to read-only memory, copy raw bytes, unmap # Convert frame to RGB via Qt's built-in decoder.
if not frame.map(QVideoFrame.MapMode.ReadOnly): # toImage() handles NV12, YUV420P, MJPG, BGRA — any pixel format.
logger.warning("InferenceManager: failed to map QVideoFrame") image = frame.toImage()
if image.isNull():
logger.warning("InferenceManager: toImage() returned null")
return return
width = image.width()
height = image.height()
# Ensure we have packed RGB32 (BGRX on little-endian, 4 bytes/pixel)
from PySide6.QtGui import QImage # noqa: PLC0415
if image.format() != QImage.Format.Format_RGB32:
image = image.convertToFormat(QImage.Format.Format_RGB32)
# Extract RGB bytes (drop alpha/padding channel)
try: try:
width = frame.width() import numpy as np # noqa: PLC0415
height = frame.height() # bits() returns BGRX (B G R 0xFF) for Format_RGB32
raw = bytes(frame.bits(0)) # plane 0 — copies data ptr = image.bits()
finally: arr = np.frombuffer(ptr, dtype=np.uint8).reshape((height, width, 4))
frame.unmap() # Swap B↔R and drop X → RGB
rgb = arr[:, :, [2, 1, 0]].copy()
if not raw: raw = rgb.tobytes()
except Exception as exc:
logger.warning("InferenceManager: frame conversion failed: %s", exc)
return return
# Detect number of channels from byte count channels = 3
expected_rgb = width * height * 3
expected_rgba = width * height * 4
if len(raw) >= expected_rgba:
# BGRA / RGBA — convert to RGB by stripping alpha and swapping B/R
try:
import numpy as np # noqa: PLC0415
arr = np.frombuffer(raw, dtype=np.uint8).reshape((height, width, 4))
# Qt delivers BGRA → swap to RGB
rgb = arr[:, :, [2, 1, 0]].copy()
raw = rgb.tobytes()
channels = 3
except Exception as exc:
logger.warning("Frame colour conversion failed: %s", exc)
return
elif len(raw) >= expected_rgb:
channels = 3
else:
logger.warning(
"Unexpected frame size: %d bytes for %dx%d",
len(raw), width, height,
)
return
self._frame_id += 1 self._frame_id += 1
packet = FramePacket( packet = FramePacket(

View File

@@ -69,7 +69,7 @@ class FrameDispatcher(QObject):
if len(self._subscribers) < before: if len(self._subscribers) < before:
logger.debug("Subscriber removed: %r", callback) logger.debug("Subscriber removed: %r", callback)
else: else:
logger.warning("Subscriber not found for removal: %r", callback) logger.debug("Subscriber not found for removal: %r", callback)
def subscriber_count(self) -> int: def subscriber_count(self) -> int:
return len(self._subscribers) return len(self._subscribers)