diff --git a/backend/mayo/mayo1.py b/backend/mayo/mayo1.py new file mode 100644 index 0000000..d54982f --- /dev/null +++ b/backend/mayo/mayo1.py @@ -0,0 +1,127 @@ +from bs4 import BeautifulSoup + +def clean(text): + return " ".join(text.split()) + +def parse_html(path): + with open(path, encoding="ISO-8859-2") as f: + soup = BeautifulSoup(f, "html.parser") + + tresc = soup.find("div", id="tresc") + + result = { + "meta": {}, + "sections": {} + } + + # ----------------------- + # 🔹 1. META (LEPSZE) + # ----------------------- + + # 👉 Dot. zam. + first_table = tresc.find("table") + + if first_table: + b_tags = first_table.find_all("b") + print(first_table.getText()) + + if len(b_tags) >= 2: + result["meta"]["nr_zamownia"] = clean(b_tags[1].get_text()) + result["meta"]["realizacja"] = clean(b_tags[3].get_text()) + + client = first_table.find('span', attrs={'style': "font-weight:bold;"} ) + if client: + print(client.get_text()) + + + # 👉 formularz (Model, Odbiorca itd.) + form_table = tresc.find("form") + + if form_table: + table = form_table.find_parent("table") + + if table: + # 🔥 przeszukujemy CAŁĄ tabelę (wszystkie tr) + # Model + model_input = table.find("input", {"name": "s_nr_kat"}) + if model_input: + result["meta"]["Model"] = clean(model_input.get("value", "")) + + # Odbiorca + odb_input = table.find("input", {"name": "s_odbiorca"}) + if odb_input: + result["meta"]["Odbiorca"] = clean(odb_input.get("value", "")) + + # Grupa + grupa_select = table.find("select", {"name": "s_grupa"}) + if grupa_select: + selected = grupa_select.find("option", selected=True) + if selected: + result["meta"]["Grupa"] = clean(selected.get_text()) + + # ----------------------- + # 🔹 2. SEKCJE (SZYJKA itd.) + # ----------------------- + current_section = None + + for tr in tresc.find_all("tr"): + tds = tr.find_all("td") + + if not tds: + continue + + # 🔸 Sekcja (np. SZYJKA) + if len(tds) == 1: + text = clean(tds[0].get_text()) + + if text.isupper() and len(text) < 40: + current_section = text + result["sections"][current_section] = {} + continue + + # 🔸 Element w sekcji + if len(tds) >= 2 and current_section: + key_tag = tds[0].find("b") + + if not key_tag: + continue + + key = clean(key_tag.get_text()) + + # usuń linki / śmieci + key = key.replace("\xa0", "").strip() + + value_td = tds[1] + + # zbierz wszystkie teksty (ignorując "Notatka") + texts = [] + + for x in value_td.stripped_strings: + if "Notatka" in x: + continue + texts.append(x) + + value = clean(" ".join(texts)) + + if key: + result["sections"][current_section][key] = value + + return result + +import time + +start = time.perf_counter() + +data = parse_html("g.htm") + +end = time.perf_counter() + +print(f"Czas wykonania: {end - start:.6f} sekund") + +# from pprint import pprint +# pprint(data) + +import json + +with open("output.json", "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) \ No newline at end of file diff --git a/backend/mayo/mayo_session.py b/backend/mayo/mayo_session.py new file mode 100644 index 0000000..1ea2d7d --- /dev/null +++ b/backend/mayo/mayo_session.py @@ -0,0 +1,176 @@ +import requests +from bs4 import BeautifulSoup +from urllib.parse import urljoin +import re +import logging +from pprint import pprint + +class MayoSession: + def __init__(self, base_url, login, password, db="1"): + """ + base_url: np. 'http://192.168.0.152/mayo2' + login, password: dane logowania + db: numer bazy (np. "1" = Mayones 2) + """ + self.session = requests.Session() + self.base_url = base_url + self.login_url = f"{self.base_url}/login.php" + self.credentials = { + "login": login, + "pass": password, + "baza": db + } + + def login(self): + """Loguje się do systemu lokalnego.""" + r = self.session.post(self.login_url, data=self.credentials) + if "Zaloguj się" in r.text or "login" in r.url: + raise Exception("Nie udało się zalogować do Mayo.") + logging.info("✅ Zalogowano poprawnie do systemu Mayo.") + + def ensure_logged_in(self): + test_url = f"{self.base_url}/index.php" + + r = self.session.get(test_url) + + if "Wyloguj" not in r.text: + logging.info("🔐 Sesja wygasła — loguję ponownie...") + self.login() + + def get_order_page(self, url): + self.ensure_logged_in() + + r = self.session.get(url) + + if "login" in r.url or "Zaloguj" in r.text: + self.login() + r = self.session.get(url) + + return r.text + + def search_order(self, order_number): + self.ensure_logged_in() + + url = f"{self.base_url}/index.php?filtr=1&strona=0&sort_order=1" + + payload = { + "zaw": "", + "r_od": "", + "nr_zam": str(order_number).zfill(4), # 🔥 ważne + "typ_kl": "", + "klient": "", + "r_do": "", + "row_count": "25" + } + + # headers = { + # "Content-Type": "application/x-www-form-urlencoded", + # "Referer": f"{self.base_url}/index.php", + # "Origin": self.base_url + # } + + r = self.session.post(url, data=payload) + + # 🔥 fallback jeśli sesja padła w trakcie + if "login" in r.url or "Zaloguj" in r.text: + logging.warning("⚠️ Sesja padła — ponawiam logowanie...") + self.login() + r = self.session.post(url, data=payload) + + return r.text + + + def parse_search_results(self, html): + soup = BeautifulSoup(html, "html.parser") + + results = [] + + # tabela wyników + table = soup.find("table", class_="std2") + + if not table: + return results + + # rows = table.find_all("tr") + tbody = table.find("tbody") + rows = tbody.find_all("tr") if tbody else table.find_all("tr") + + for row in rows: + tds = row.find_all("td") + + # pomijamy header / dziwne wiersze + if len(tds) < 3: + continue + + link_tag = tds[0].find("a") + + if not link_tag: + continue + + # 🔹 order_id + order_id = link_tag.get_text(strip=True) + + # 🔹 url (pełny) + relative_url = link_tag.get("href") + # full_url = urljoin(self.base_url, relative_url) + full_url = f"{self.base_url}/{relative_url}" + + # 🔹 prod_list + prod_list = tds[1].get_text(strip=True).replace("\xa0", "") + + # 🔹 client + client = tds[2].get_text(strip=True) + + results.append({ + "order_id": order_id, + "client": client, + "prod_list": prod_list, + "url": full_url, + "guitars_url": [] + }) + + return results + + def parse_order_list(self, html): + soup = BeautifulSoup(html, "html.parser") + + results = [] + + # tabela wyników + table = soup.find("table", class_="std2") + + if not table: + return results + + # rows = table.find_all("tr") + tbody = table.find("tbody") + rows = tbody.find_all("tr") if tbody else table.find_all("tr") + + for row in rows: + links = row.find_all("a", href=True) + + for link in links: + relative_url = link.get("href") + + if "id_zestawu=" in relative_url: + full_url = f"{self.base_url}/{relative_url}" + results.append(full_url) + break # jeden link na wiersz + + return results + + +if __name__ == "__main__" : + mayo = MayoSession("http://10.8.0.6/mayo2", "nowakb", "def") + req = mayo.search_order("0027") + + orders = mayo.parse_search_results(req) + pprint(orders) + + for order in orders: + html = mayo.get_order_page(order["url"]) + guitars = mayo.parse_order_list(html) + order["guitars_url"] = guitars + + print("---------------") + pprint(orders)