from __future__ import annotations import re from dataclasses import dataclass, asdict ORDER_RE = re.compile(r"\b(?P\d{4}/\d{4}/(?:[1-9]|[1-9]\d))\b") @dataclass class ParsedLabel: order_number: str | None color_code: str | None product_model: str | None raw_text: str def to_dict(self) -> dict[str, str | None]: return asdict(self) def normalize_ocr_text(text: str) -> str: return " ".join(text.replace("\n", " ").replace("\r", " ").split()) def parse_label_text(text: str, known_colors: list[str], known_models: list[str]) -> ParsedLabel: normalized = normalize_ocr_text(text) order_match = ORDER_RE.search(normalized) normalized_upper = normalized.upper() color_code = next( (color for color in known_colors if color.upper() in normalized_upper), None, ) product_model = next( (model for model in known_models if re.search(rf"\b{re.escape(model)}\b", normalized, re.I)), None, ) return ParsedLabel( order_number=order_match.group("order") if order_match else None, color_code=color_code, product_model=product_model, raw_text=normalized, )