skrypt do scrapowania tresci z lokalnej strony mayo
This commit is contained in:
176
backend/mayo/mayo_session.py
Normal file
176
backend/mayo/mayo_session.py
Normal file
@@ -0,0 +1,176 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urljoin
|
||||
import re
|
||||
import logging
|
||||
from pprint import pprint
|
||||
|
||||
class MayoSession:
|
||||
def __init__(self, base_url, login, password, db="1"):
|
||||
"""
|
||||
base_url: np. 'http://192.168.0.152/mayo2'
|
||||
login, password: dane logowania
|
||||
db: numer bazy (np. "1" = Mayones 2)
|
||||
"""
|
||||
self.session = requests.Session()
|
||||
self.base_url = base_url
|
||||
self.login_url = f"{self.base_url}/login.php"
|
||||
self.credentials = {
|
||||
"login": login,
|
||||
"pass": password,
|
||||
"baza": db
|
||||
}
|
||||
|
||||
def login(self):
|
||||
"""Loguje się do systemu lokalnego."""
|
||||
r = self.session.post(self.login_url, data=self.credentials)
|
||||
if "Zaloguj się" in r.text or "login" in r.url:
|
||||
raise Exception("Nie udało się zalogować do Mayo.")
|
||||
logging.info("✅ Zalogowano poprawnie do systemu Mayo.")
|
||||
|
||||
def ensure_logged_in(self):
|
||||
test_url = f"{self.base_url}/index.php"
|
||||
|
||||
r = self.session.get(test_url)
|
||||
|
||||
if "Wyloguj" not in r.text:
|
||||
logging.info("🔐 Sesja wygasła — loguję ponownie...")
|
||||
self.login()
|
||||
|
||||
def get_order_page(self, url):
|
||||
self.ensure_logged_in()
|
||||
|
||||
r = self.session.get(url)
|
||||
|
||||
if "login" in r.url or "Zaloguj" in r.text:
|
||||
self.login()
|
||||
r = self.session.get(url)
|
||||
|
||||
return r.text
|
||||
|
||||
def search_order(self, order_number):
|
||||
self.ensure_logged_in()
|
||||
|
||||
url = f"{self.base_url}/index.php?filtr=1&strona=0&sort_order=1"
|
||||
|
||||
payload = {
|
||||
"zaw": "",
|
||||
"r_od": "",
|
||||
"nr_zam": str(order_number).zfill(4), # 🔥 ważne
|
||||
"typ_kl": "",
|
||||
"klient": "",
|
||||
"r_do": "",
|
||||
"row_count": "25"
|
||||
}
|
||||
|
||||
# headers = {
|
||||
# "Content-Type": "application/x-www-form-urlencoded",
|
||||
# "Referer": f"{self.base_url}/index.php",
|
||||
# "Origin": self.base_url
|
||||
# }
|
||||
|
||||
r = self.session.post(url, data=payload)
|
||||
|
||||
# 🔥 fallback jeśli sesja padła w trakcie
|
||||
if "login" in r.url or "Zaloguj" in r.text:
|
||||
logging.warning("⚠️ Sesja padła — ponawiam logowanie...")
|
||||
self.login()
|
||||
r = self.session.post(url, data=payload)
|
||||
|
||||
return r.text
|
||||
|
||||
|
||||
def parse_search_results(self, html):
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
results = []
|
||||
|
||||
# tabela wyników
|
||||
table = soup.find("table", class_="std2")
|
||||
|
||||
if not table:
|
||||
return results
|
||||
|
||||
# rows = table.find_all("tr")
|
||||
tbody = table.find("tbody")
|
||||
rows = tbody.find_all("tr") if tbody else table.find_all("tr")
|
||||
|
||||
for row in rows:
|
||||
tds = row.find_all("td")
|
||||
|
||||
# pomijamy header / dziwne wiersze
|
||||
if len(tds) < 3:
|
||||
continue
|
||||
|
||||
link_tag = tds[0].find("a")
|
||||
|
||||
if not link_tag:
|
||||
continue
|
||||
|
||||
# 🔹 order_id
|
||||
order_id = link_tag.get_text(strip=True)
|
||||
|
||||
# 🔹 url (pełny)
|
||||
relative_url = link_tag.get("href")
|
||||
# full_url = urljoin(self.base_url, relative_url)
|
||||
full_url = f"{self.base_url}/{relative_url}"
|
||||
|
||||
# 🔹 prod_list
|
||||
prod_list = tds[1].get_text(strip=True).replace("\xa0", "")
|
||||
|
||||
# 🔹 client
|
||||
client = tds[2].get_text(strip=True)
|
||||
|
||||
results.append({
|
||||
"order_id": order_id,
|
||||
"client": client,
|
||||
"prod_list": prod_list,
|
||||
"url": full_url,
|
||||
"guitars_url": []
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def parse_order_list(self, html):
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
results = []
|
||||
|
||||
# tabela wyników
|
||||
table = soup.find("table", class_="std2")
|
||||
|
||||
if not table:
|
||||
return results
|
||||
|
||||
# rows = table.find_all("tr")
|
||||
tbody = table.find("tbody")
|
||||
rows = tbody.find_all("tr") if tbody else table.find_all("tr")
|
||||
|
||||
for row in rows:
|
||||
links = row.find_all("a", href=True)
|
||||
|
||||
for link in links:
|
||||
relative_url = link.get("href")
|
||||
|
||||
if "id_zestawu=" in relative_url:
|
||||
full_url = f"{self.base_url}/{relative_url}"
|
||||
results.append(full_url)
|
||||
break # jeden link na wiersz
|
||||
|
||||
return results
|
||||
|
||||
|
||||
if __name__ == "__main__" :
|
||||
mayo = MayoSession("http://10.8.0.6/mayo2", "nowakb", "def")
|
||||
req = mayo.search_order("0027")
|
||||
|
||||
orders = mayo.parse_search_results(req)
|
||||
pprint(orders)
|
||||
|
||||
for order in orders:
|
||||
html = mayo.get_order_page(order["url"])
|
||||
guitars = mayo.parse_order_list(html)
|
||||
order["guitars_url"] = guitars
|
||||
|
||||
print("---------------")
|
||||
pprint(orders)
|
||||
Reference in New Issue
Block a user