Files
Nix-Presentation/public/stats.py

44 lines
1.4 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import csv
from bs4 import BeautifulSoup
# Load your HTML (replace this with reading from a file or requests.get().text)
with open("packages_table.html", "r", encoding="utf-8") as f:
html = f.read()
soup = BeautifulSoup(html, "html.parser")
rows: list[tuple[str, str, str]] = []
for tr in soup.select("tbody > tr"):
# 1⃣ First column: repo name
name_tag = tr.select_one("th a")
name = name_tag.get_text(strip=True) if name_tag else ""
# 2⃣ Second and third columns: prefer span[title], fall back to span text
td_tags = tr.select("td")
if len(td_tags) >= 2:
def extract_value(td):
span = td.select_one("span")
if span:
# Prefer title attribute, else text content
return span.get("title") or span.get_text(strip=True)
# Sometimes there's no <span>, just text inside <a> or <td>
return td.get_text(strip=True)
packages = extract_value(td_tags[0])
fresh_packages = extract_value(td_tags[1])
else:
packages = fresh_packages = ""
rows.append((name, packages, fresh_packages))
# Write to CSV
with open("packages.csv", "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["Name", "Packages", "Fresh Packages"])
writer.writerows(rows)
print("✅ Extracted", len(rows), "rows into packages.csv")