import csv from bs4 import BeautifulSoup # Load your HTML (replace this with reading from a file or requests.get().text) with open("packages_table.html", "r", encoding="utf-8") as f: html = f.read() soup = BeautifulSoup(html, "html.parser") rows: list[tuple[str, str, str]] = [] for tr in soup.select("tbody > tr"): # 1️⃣ First column: repo name name_tag = tr.select_one("th a") name = name_tag.get_text(strip=True) if name_tag else "" # 2️⃣ Second and third columns: prefer span[title], fall back to span text td_tags = tr.select("td") if len(td_tags) >= 2: def extract_value(td): span = td.select_one("span") if span: # Prefer title attribute, else text content return span.get("title") or span.get_text(strip=True) # Sometimes there's no , just text inside or