Files
duck-stain-yolo/app/ocr/tesseract.py

105 lines
3.4 KiB
Python

from __future__ import annotations
import time
import numpy as np
from app.ocr.base import OcrLine, OcrResult, crop_bbox, prepare_ocr_image
class TesseractOcrEngine:
name = "tesseract"
def __init__(self, config: dict) -> None:
self.config = config
self.load_error: str | None = None
self.pytesseract = None
self._load()
def _load(self) -> None:
try:
import pytesseract
command = self.config.get("tesseract_cmd")
if command:
pytesseract.pytesseract.tesseract_cmd = command
self.pytesseract = pytesseract
except Exception as exc:
self.load_error = f"Nie mozna zaladowac pytesseract: {exc}"
def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> OcrResult:
started = time.perf_counter()
if self.pytesseract is None:
return OcrResult(
error=self.load_error or "OCR Tesseract nie jest zaladowany",
elapsed_ms=self._elapsed_ms(started),
engine=self.name,
)
margin = int(self.config.get("margin", 0))
roi = crop_bbox(frame_bgr, bbox, margin=margin)
if roi is None:
return OcrResult(
error="Nieprawidlowy bbox OCR",
elapsed_ms=self._elapsed_ms(started),
engine=self.name,
)
image = prepare_ocr_image(roi, self.config)
psm = int(self.config.get("psm", 6))
language = self.config.get("language", "eng")
extra_config = str(self.config.get("config", "")).strip()
tesseract_config = f"--psm {psm}"
if extra_config:
tesseract_config = f"{tesseract_config} {extra_config}"
try:
text = self.pytesseract.image_to_string(
image,
lang=language,
config=tesseract_config,
)
except Exception as exc:
return OcrResult(
error=f"Blad OCR Tesseract: {exc}",
elapsed_ms=self._elapsed_ms(started),
engine=self.name,
)
confidence = self._mean_confidence(image, language, tesseract_config)
return OcrResult(
text=text,
confidence=confidence,
lines=[OcrLine(text=line) for line in text.splitlines() if line.strip()],
elapsed_ms=self._elapsed_ms(started),
engine=self.name,
)
def _mean_confidence(self, image: np.ndarray, language: str, tesseract_config: str) -> float | None:
if self.pytesseract is None:
return None
try:
data = self.pytesseract.image_to_data(
image,
lang=language,
config=tesseract_config,
output_type=self.pytesseract.Output.DICT,
)
except Exception:
return None
values = []
for raw_conf in data.get("conf", []):
try:
confidence = float(raw_conf)
except (TypeError, ValueError):
continue
if confidence >= 0:
values.append(confidence / 100.0)
if not values:
return None
return sum(values) / len(values)
def _elapsed_ms(self, started: float) -> float:
return (time.perf_counter() - started) * 1000.0