from __future__ import annotations import time import numpy as np from app.ocr.base import OcrLine, OcrResult, crop_bbox, prepare_ocr_image class TesseractOcrEngine: name = "tesseract" def __init__(self, config: dict) -> None: self.config = config self.load_error: str | None = None self.pytesseract = None self._load() def _load(self) -> None: try: import pytesseract command = self.config.get("tesseract_cmd") if command: pytesseract.pytesseract.tesseract_cmd = command self.pytesseract = pytesseract except Exception as exc: self.load_error = f"Nie mozna zaladowac pytesseract: {exc}" def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> OcrResult: started = time.perf_counter() if self.pytesseract is None: return OcrResult( error=self.load_error or "OCR Tesseract nie jest zaladowany", elapsed_ms=self._elapsed_ms(started), engine=self.name, ) margin = int(self.config.get("margin", 0)) roi = crop_bbox(frame_bgr, bbox, margin=margin) if roi is None: return OcrResult( error="Nieprawidlowy bbox OCR", elapsed_ms=self._elapsed_ms(started), engine=self.name, ) image = prepare_ocr_image(roi, self.config) psm = int(self.config.get("psm", 6)) language = self.config.get("language", "eng") extra_config = str(self.config.get("config", "")).strip() tesseract_config = f"--psm {psm}" if extra_config: tesseract_config = f"{tesseract_config} {extra_config}" try: text = self.pytesseract.image_to_string( image, lang=language, config=tesseract_config, ) except Exception as exc: return OcrResult( error=f"Blad OCR Tesseract: {exc}", elapsed_ms=self._elapsed_ms(started), engine=self.name, ) confidence = self._mean_confidence(image, language, tesseract_config) return OcrResult( text=text, confidence=confidence, lines=[OcrLine(text=line) for line in text.splitlines() if line.strip()], elapsed_ms=self._elapsed_ms(started), engine=self.name, ) def _mean_confidence(self, image: np.ndarray, language: str, tesseract_config: str) -> float | None: if self.pytesseract is None: return None try: data = self.pytesseract.image_to_data( image, lang=language, config=tesseract_config, output_type=self.pytesseract.Output.DICT, ) except Exception: return None values = [] for raw_conf in data.get("conf", []): try: confidence = float(raw_conf) except (TypeError, ValueError): continue if confidence >= 0: values.append(confidence / 100.0) if not values: return None return sum(values) / len(values) def _elapsed_ms(self, started: float) -> float: return (time.perf_counter() - started) * 1000.0