duck-stain-yolo/app/ocr/tesseract.py

from __future__ import annotations

import time

import numpy as np

from app.ocr.base import OcrLine, OcrResult, crop_bbox, prepare_ocr_image


class TesseractOcrEngine:
    name = "tesseract"

    def __init__(self, config: dict) -> None:
        self.config = config
        self.load_error: str | None = None
        self.pytesseract = None
        self._load()

    def _load(self) -> None:
        try:
            import pytesseract

            command = self.config.get("tesseract_cmd")
            if command:
                pytesseract.pytesseract.tesseract_cmd = command
            self.pytesseract = pytesseract
        except Exception as exc:
            self.load_error = f"Nie mozna zaladowac pytesseract: {exc}"

    def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> OcrResult:
        started = time.perf_counter()
        if self.pytesseract is None:
            return OcrResult(
                error=self.load_error or "OCR Tesseract nie jest zaladowany",
                elapsed_ms=self._elapsed_ms(started),
                engine=self.name,
            )

        margin = int(self.config.get("margin", 0))
        roi = crop_bbox(frame_bgr, bbox, margin=margin)
        if roi is None:
            return OcrResult(
                error="Nieprawidlowy bbox OCR",
                elapsed_ms=self._elapsed_ms(started),
                engine=self.name,
            )

        image = prepare_ocr_image(roi, self.config)
        psm = int(self.config.get("psm", 6))
        language = self.config.get("language", "eng")
        extra_config = str(self.config.get("config", "")).strip()
        tesseract_config = f"--psm {psm}"
        if extra_config:
            tesseract_config = f"{tesseract_config} {extra_config}"

        try:
            text = self.pytesseract.image_to_string(
                image,
                lang=language,
                config=tesseract_config,
            )
        except Exception as exc:
            return OcrResult(
                error=f"Blad OCR Tesseract: {exc}",
                elapsed_ms=self._elapsed_ms(started),
                engine=self.name,
            )

        confidence = self._mean_confidence(image, language, tesseract_config)
        return OcrResult(
            text=text,
            confidence=confidence,
            lines=[OcrLine(text=line) for line in text.splitlines() if line.strip()],
            elapsed_ms=self._elapsed_ms(started),
            engine=self.name,
        )

    def _mean_confidence(self, image: np.ndarray, language: str, tesseract_config: str) -> float | None:
        if self.pytesseract is None:
            return None
        try:
            data = self.pytesseract.image_to_data(
                image,
                lang=language,
                config=tesseract_config,
                output_type=self.pytesseract.Output.DICT,
            )
        except Exception:
            return None

        values = []
        for raw_conf in data.get("conf", []):
            try:
                confidence = float(raw_conf)
            except (TypeError, ValueError):
                continue
            if confidence >= 0:
                values.append(confidence / 100.0)
        if not values:
            return None
        return sum(values) / len(values)

    def _elapsed_ms(self, started: float) -> float:
        return (time.perf_counter() - started) * 1000.0