diff --git a/html_sample/Aquila.html b/html_sample/Aquila.html
new file mode 100644
index 0000000..1ec61b6
--- /dev/null
+++ b/html_sample/Aquila.html
@@ -0,0 +1,613 @@
+
+
+
+
+
+
+
+
+Mayones Guitars & Basses
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Dot. zam.: 0093/2025/6 z datą realizacji: 2025-02-28 -
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/html_sample/Ori.html b/html_sample/Ori.html
new file mode 100644
index 0000000..d5da186
--- /dev/null
+++ b/html_sample/Ori.html
@@ -0,0 +1,612 @@
+
+
+
+
+
+
+
+
+Mayones Guitars & Basses
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Dot. zam.: 0376/2024/11 z datą realizacji: 2024-09-01 STRINGS Co., Ltd | Thailand -
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/html_sample/regius.html b/html_sample/regius.html
new file mode 100644
index 0000000..d039bb3
--- /dev/null
+++ b/html_sample/regius.html
@@ -0,0 +1,613 @@
+
+
+
+
+
+
+
+
+Mayones Guitars & Basses
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Dot. zam.: 0462/2024/4 z datą realizacji: 2024-11-30 -
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/main.py b/main.py
index 5da8401..c9f2810 100644
--- a/main.py
+++ b/main.py
@@ -26,17 +26,23 @@ def get_finish_type(row_data):
if not color_top or not color_body:
return None
- top_last_char = color_top[-1]
- body_last_char = color_body[-1]
+ # pobierz czÄĹÄ po ostatnim myĹlniku
+ top_suffix = color_top.split('-')[-1].upper()
+ body_suffix = color_body.split('-')[-1].upper()
- if top_last_char == 'G' and body_last_char == 'G':
+ if (top_suffix == 'G' or top_suffix == None) and body_suffix == 'G':
return "GLOSS"
- elif top_last_char == 'S' and body_last_char == 'S':
+ elif (top_suffix == 'S' or top_suffix == None) and body_suffix == 'S':
return "SATIN"
- elif top_last_char in ('G', 'S') and body_last_char == 'M':
+ elif top_suffix in ('G', 'S') and body_suffix == 'M':
return "MIX"
- elif top_last_char in ('M', 'R') and body_last_char in ('M', 'R'):
+ elif top_suffix in ('M', 'R', 'MAT', 'RAW') and body_suffix in ('M', 'R', 'MAT'):
return "MAT"
+ elif top_suffix in ('M', 'R', 'MAT', 'RAW') and body_suffix in ('G', 'S'):
+ return "MIX"
+ elif top_suffix is None and body_suffix in ('M', 'R', 'MAT'):
+ return "MAT"
+
except (KeyError, AttributeError):
return None
return None
@@ -104,9 +110,10 @@ def main():
info.get("color_body"),
info.get("color_neck"),
info.get("color_head"),
- info.get("finish"),
+ info.get("finish_kc"),
+ info.get("finish_s"),
]
-
+ print(f"raw_data: {row_data}")
rows_to_process.append(row_data)
counter += 1
diff --git a/mayo.py b/mayo.py
index a4d689d..1cc8559 100644
--- a/mayo.py
+++ b/mayo.py
@@ -54,7 +54,7 @@ class MayoSession:
- wykoĹczenie
"""
r = self.session.get(url)
- r.encoding = "utf-8"
+ r.encoding = 'ISO-8859-2' # Poprawione kodowanie na podstawie tagu meta w HTML
soup = BeautifulSoup(r.text, "html.parser")
# --- nr zamĂłwienia ---
@@ -67,45 +67,33 @@ class MayoSession:
# --- kolory i wykoĹczenie ---
color_sections = {}
- # ZnajdĹş wszystkie zawierajÄ
ce z tekstem "KOLOR"
- for p in soup.find_all("p"):
- a_tag = p.find("a")
- if not a_tag:
- continue
+
+ # Szukamy linkĂłw (a) wewnÄ
trz pogrubienia (b), ktĂłre zawierajÄ
"KOLOR -"
+ for a_tag in soup.select('b > a'):
title = a_tag.get_text(strip=True)
- if title.startswith("KOLOR"):
- # np. "KOLOR - Top"
+ if title.startswith("KOLOR -"):
label = title.replace("KOLOR - ", "").strip()
-
- # znajdĹş
z wartoĹciÄ
koloru (niedaleko tego )
- td = p.find_parent("td")
- if td:
- # przejdĹş do nastÄpnego
, tam jest z kolorem
- next_td = td.find_next_sibling("td")
- if next_td:
- span = next_td.find("span")
- if span:
- text = span.get_text(" ", strip=True)
- # usuĹ ewentualne znaki nadmiarowe
- text = re.sub(r"\s+", " ", text)
- color_match = re.search(r"^\s*-\s*([A-Z0-9-]+)", text)
- if color_match:
- text = color_match.group(1)
- color_sections[label] = text
-
- # --- normalizacja nazewnictwa ---
- color_top = color_sections.get("Top")
- color_body = color_sections.get("Korpus")
- color_neck = color_sections.get("Szyjka")
- color_head = color_sections.get("GĹĂłwka")
- finish = color_sections.get("WykoĹczenie [K/C]")
+
+ parent_td = a_tag.find_parent('td')
+ if parent_td:
+ value_td = parent_td.find_next_sibling('td')
+ if value_td:
+ text = value_td.get_text(" ", strip=True)
+ value = None
+ # WartoĹÄ jest zazwyczaj pomiÄdzy myĹlnikiem a ukoĹnikiem
+ match = re.search(r'-\s*([^/]+)', text)
+ # print(f"label: {label}, match: {match}, text: {text}")
+ if match:
+ value = match.group(1).strip()
+ color_sections[label] = value
return {
"order_number": order_number,
"model": model,
- "color_top": color_top,
- "color_body": color_body,
- "color_neck": color_neck,
- "color_head": color_head,
- "finish": finish,
+ "color_top": color_sections.get("Top"),
+ "color_body": color_sections.get("Korpus"),
+ "color_neck": color_sections.get("Szyjka"),
+ "color_head": color_sections.get("GĹĂłwka"),
+ "finish_kc": color_sections.get("WykoĹczenie [K/C]"),
+ "finish_s": color_sections.get("WykoĹczenie [S]"),
}