Implement OCR engine architecture with base, factory, and specific engines
This commit is contained in:
104
app/ocr/tesseract.py
Normal file
104
app/ocr/tesseract.py
Normal file
@@ -0,0 +1,104 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
|
||||
from app.ocr.base import OcrLine, OcrResult, crop_bbox, prepare_ocr_image
|
||||
|
||||
|
||||
class TesseractOcrEngine:
|
||||
name = "tesseract"
|
||||
|
||||
def __init__(self, config: dict) -> None:
|
||||
self.config = config
|
||||
self.load_error: str | None = None
|
||||
self.pytesseract = None
|
||||
self._load()
|
||||
|
||||
def _load(self) -> None:
|
||||
try:
|
||||
import pytesseract
|
||||
|
||||
command = self.config.get("tesseract_cmd")
|
||||
if command:
|
||||
pytesseract.pytesseract.tesseract_cmd = command
|
||||
self.pytesseract = pytesseract
|
||||
except Exception as exc:
|
||||
self.load_error = f"Nie mozna zaladowac pytesseract: {exc}"
|
||||
|
||||
def read_label(self, frame_bgr: np.ndarray, bbox: tuple[int, int, int, int]) -> OcrResult:
|
||||
started = time.perf_counter()
|
||||
if self.pytesseract is None:
|
||||
return OcrResult(
|
||||
error=self.load_error or "OCR Tesseract nie jest zaladowany",
|
||||
elapsed_ms=self._elapsed_ms(started),
|
||||
engine=self.name,
|
||||
)
|
||||
|
||||
margin = int(self.config.get("margin", 0))
|
||||
roi = crop_bbox(frame_bgr, bbox, margin=margin)
|
||||
if roi is None:
|
||||
return OcrResult(
|
||||
error="Nieprawidlowy bbox OCR",
|
||||
elapsed_ms=self._elapsed_ms(started),
|
||||
engine=self.name,
|
||||
)
|
||||
|
||||
image = prepare_ocr_image(roi, self.config)
|
||||
psm = int(self.config.get("psm", 6))
|
||||
language = self.config.get("language", "eng")
|
||||
extra_config = str(self.config.get("config", "")).strip()
|
||||
tesseract_config = f"--psm {psm}"
|
||||
if extra_config:
|
||||
tesseract_config = f"{tesseract_config} {extra_config}"
|
||||
|
||||
try:
|
||||
text = self.pytesseract.image_to_string(
|
||||
image,
|
||||
lang=language,
|
||||
config=tesseract_config,
|
||||
)
|
||||
except Exception as exc:
|
||||
return OcrResult(
|
||||
error=f"Blad OCR Tesseract: {exc}",
|
||||
elapsed_ms=self._elapsed_ms(started),
|
||||
engine=self.name,
|
||||
)
|
||||
|
||||
confidence = self._mean_confidence(image, language, tesseract_config)
|
||||
return OcrResult(
|
||||
text=text,
|
||||
confidence=confidence,
|
||||
lines=[OcrLine(text=line) for line in text.splitlines() if line.strip()],
|
||||
elapsed_ms=self._elapsed_ms(started),
|
||||
engine=self.name,
|
||||
)
|
||||
|
||||
def _mean_confidence(self, image: np.ndarray, language: str, tesseract_config: str) -> float | None:
|
||||
if self.pytesseract is None:
|
||||
return None
|
||||
try:
|
||||
data = self.pytesseract.image_to_data(
|
||||
image,
|
||||
lang=language,
|
||||
config=tesseract_config,
|
||||
output_type=self.pytesseract.Output.DICT,
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
values = []
|
||||
for raw_conf in data.get("conf", []):
|
||||
try:
|
||||
confidence = float(raw_conf)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if confidence >= 0:
|
||||
values.append(confidence / 100.0)
|
||||
if not values:
|
||||
return None
|
||||
return sum(values) / len(values)
|
||||
|
||||
def _elapsed_ms(self, started: float) -> float:
|
||||
return (time.perf_counter() - started) * 1000.0
|
||||
Reference in New Issue
Block a user