105 lines
3.4 KiB
Python
105 lines
3.4 KiB
Python
from __future__ import annotations
|
|
|
|
import time
|
|
|
|
import numpy as np
|
|
|
|
from app.ocr.base import OcrLine, OcrResult, crop_bbox, prepare_ocr_image
|
|
|
|
|
|
class TesseractOcrEngine:
|
|
name = "tesseract"
|
|
|
|
def __init__(self, config: dict) -> None:
|
|
self.config = config
|
|
self.load_error: str | None = None
|
|
self.pytesseract = None
|
|
self._load()
|
|
|
|
def _load(self) -> None:
|
|
try:
|
|
import pytesseract
|
|
|
|
command = self.config.get("tesseract_cmd")
|
|
if command:
|
|
pytesseract.pytesseract.tesseract_cmd = command
|
|
self.pytesseract = pytesseract
|
|
except Exception as exc:
|
|
self.load_error = f"Nie mozna zaladowac pytesseract: {exc}"
|
|
|
|
def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> OcrResult:
|
|
started = time.perf_counter()
|
|
if self.pytesseract is None:
|
|
return OcrResult(
|
|
error=self.load_error or "OCR Tesseract nie jest zaladowany",
|
|
elapsed_ms=self._elapsed_ms(started),
|
|
engine=self.name,
|
|
)
|
|
|
|
margin = int(self.config.get("margin", 0))
|
|
roi = crop_bbox(frame_bgr, bbox, margin=margin)
|
|
if roi is None:
|
|
return OcrResult(
|
|
error="Nieprawidlowy bbox OCR",
|
|
elapsed_ms=self._elapsed_ms(started),
|
|
engine=self.name,
|
|
)
|
|
|
|
image = prepare_ocr_image(roi, self.config)
|
|
psm = int(self.config.get("psm", 6))
|
|
language = self.config.get("language", "eng")
|
|
extra_config = str(self.config.get("config", "")).strip()
|
|
tesseract_config = f"--psm {psm}"
|
|
if extra_config:
|
|
tesseract_config = f"{tesseract_config} {extra_config}"
|
|
|
|
try:
|
|
text = self.pytesseract.image_to_string(
|
|
image,
|
|
lang=language,
|
|
config=tesseract_config,
|
|
)
|
|
except Exception as exc:
|
|
return OcrResult(
|
|
error=f"Blad OCR Tesseract: {exc}",
|
|
elapsed_ms=self._elapsed_ms(started),
|
|
engine=self.name,
|
|
)
|
|
|
|
confidence = self._mean_confidence(image, language, tesseract_config)
|
|
return OcrResult(
|
|
text=text,
|
|
confidence=confidence,
|
|
lines=[OcrLine(text=line) for line in text.splitlines() if line.strip()],
|
|
elapsed_ms=self._elapsed_ms(started),
|
|
engine=self.name,
|
|
)
|
|
|
|
def _mean_confidence(self, image: np.ndarray, language: str, tesseract_config: str) -> float | None:
|
|
if self.pytesseract is None:
|
|
return None
|
|
try:
|
|
data = self.pytesseract.image_to_data(
|
|
image,
|
|
lang=language,
|
|
config=tesseract_config,
|
|
output_type=self.pytesseract.Output.DICT,
|
|
)
|
|
except Exception:
|
|
return None
|
|
|
|
values = []
|
|
for raw_conf in data.get("conf", []):
|
|
try:
|
|
confidence = float(raw_conf)
|
|
except (TypeError, ValueError):
|
|
continue
|
|
if confidence >= 0:
|
|
values.append(confidence / 100.0)
|
|
if not values:
|
|
return None
|
|
return sum(values) / len(values)
|
|
|
|
def _elapsed_ms(self, started: float) -> float:
|
|
return (time.perf_counter() - started) * 1000.0
|