Some checks failed
Build and Upload Release (Windows EXE) / Build Windows EXE (release) Has been cancelled
235 lines
7.8 KiB
Python
235 lines
7.8 KiB
Python
import argparse
|
|
import json
|
|
import re
|
|
import sys
|
|
from typing import Iterable, List, Set, Dict
|
|
from urllib.parse import urlparse, parse_qs
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
WORKSHOP_ITEM_ID_REGEX = re.compile(r"id=(\d+)")
|
|
|
|
|
|
def extract_id_from_href(href: str) -> str | None:
|
|
if not href:
|
|
return None
|
|
|
|
# Accept absolute or relative Steam workshop/sharedfiles links
|
|
if "filedetails" not in href or "id=" not in href:
|
|
return None
|
|
|
|
try:
|
|
parsed = urlparse(href)
|
|
# Handle relative URLs like "/sharedfiles/filedetails/?id=123"
|
|
query = parsed.query or href.split("?", 1)[1] if "?" in href else ""
|
|
qs = parse_qs(query)
|
|
if "id" in qs and qs["id"]:
|
|
candidate = qs["id"][0]
|
|
return candidate if candidate.isdigit() else None
|
|
except Exception:
|
|
match = WORKSHOP_ITEM_ID_REGEX.search(href)
|
|
if match:
|
|
return match.group(1)
|
|
return None
|
|
|
|
|
|
def parse_main_item_id(url: str) -> str | None:
|
|
try:
|
|
parsed = urlparse(url)
|
|
qs = parse_qs(parsed.query)
|
|
if "id" in qs and qs["id"]:
|
|
candidate = qs["id"][0]
|
|
return candidate if candidate.isdigit() else None
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
|
|
def collect_ids_from_elements(elements: Iterable) -> Set[str]:
|
|
ids: Set[str] = set()
|
|
for el in elements:
|
|
href = getattr(el, "get", None)
|
|
if callable(href):
|
|
link = el.get("href", "")
|
|
else:
|
|
link = ""
|
|
item_id = extract_id_from_href(link)
|
|
if item_id:
|
|
ids.add(item_id)
|
|
return ids
|
|
|
|
|
|
def extract_required_item_ids_from_html(html: str) -> Set[str]:
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
|
|
# Strategy 1: Look for a section headed "Required items" and parse links within
|
|
section_ids: Set[str] = set()
|
|
heading_candidates = soup.find_all(string=re.compile(r"^\s*Required\s+items\s*$", re.IGNORECASE))
|
|
for heading in heading_candidates:
|
|
parent = heading.parent
|
|
if parent is None:
|
|
continue
|
|
|
|
# Search within nearby container siblings/descendants for links
|
|
container = parent
|
|
for _ in range(3): # climb up a few levels to catch the full block
|
|
if container is None:
|
|
break
|
|
links = container.find_all("a", href=True)
|
|
section_ids |= collect_ids_from_elements(links)
|
|
container = container.parent
|
|
|
|
if section_ids:
|
|
return section_ids
|
|
|
|
# Strategy 2: Look for any block that contains the sentence used by Steam
|
|
hint_blocks = soup.find_all(string=re.compile(r"requires\s+all\s+of\s+the\s+following\s+other\s+items", re.IGNORECASE))
|
|
for hint in hint_blocks:
|
|
container = hint.parent
|
|
for _ in range(3):
|
|
if container is None:
|
|
break
|
|
links = container.find_all("a", href=True)
|
|
section_ids |= collect_ids_from_elements(links)
|
|
container = container.parent
|
|
|
|
if section_ids:
|
|
return section_ids
|
|
|
|
# Strategy 3 (fallback): scan all anchors on the page
|
|
all_links = soup.find_all("a", href=True)
|
|
return collect_ids_from_elements(all_links)
|
|
|
|
|
|
def fetch_page(url: str, timeout: int = 20) -> str:
|
|
headers = {
|
|
"User-Agent": (
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
"Chrome/126.0.0.0 Safari/537.36"
|
|
),
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
|
}
|
|
# Steam can occasionally require a cookie for age gates. Provide innocuous defaults.
|
|
cookies = {
|
|
"birthtime": "568022401", # 1987-12-20
|
|
"lastagecheckage": "1-January-1990",
|
|
"mature_content": "1",
|
|
}
|
|
resp = requests.get(url, headers=headers, cookies=cookies, timeout=timeout)
|
|
resp.raise_for_status()
|
|
return resp.text
|
|
|
|
|
|
def extract_required_item_ids(url: str) -> List[str]:
|
|
html = fetch_page(url)
|
|
found_ids = extract_required_item_ids_from_html(html)
|
|
|
|
# Remove the current page's ID if present
|
|
current_id = parse_main_item_id(url)
|
|
if current_id and current_id in found_ids:
|
|
found_ids.remove(current_id)
|
|
|
|
return sorted(found_ids, key=int)
|
|
|
|
|
|
def resolve_workshop_names(ids: List[str], timeout: int = 20) -> Dict[str, str]:
|
|
"""Resolve Workshop IDs to human-readable titles using Steam API, with HTML fallback.
|
|
|
|
Uses ISteamRemoteStorage.GetPublishedFileDetails, batching up to 100 IDs per call.
|
|
Falls back to scraping each item's page if the API fails.
|
|
"""
|
|
id_list = [i for i in dict.fromkeys([i for i in ids if i and i.isdigit()])]
|
|
if not id_list:
|
|
return {}
|
|
|
|
headers = {
|
|
"User-Agent": (
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
"Chrome/126.0.0.0 Safari/537.36"
|
|
),
|
|
}
|
|
|
|
api_url = "https://api.steampowered.com/ISteamRemoteStorage/GetPublishedFileDetails/v1/"
|
|
resolved: Dict[str, str] = {}
|
|
|
|
try:
|
|
session = requests.Session()
|
|
session.headers.update(headers)
|
|
batch_size = 100
|
|
for start in range(0, len(id_list), batch_size):
|
|
batch = id_list[start:start + batch_size]
|
|
data = {"itemcount": len(batch)}
|
|
for idx, pub_id in enumerate(batch):
|
|
data[f"publishedfileids[{idx}]"] = pub_id
|
|
resp = session.post(api_url, data=data, timeout=timeout)
|
|
resp.raise_for_status()
|
|
payload = resp.json()
|
|
details = payload.get("response", {}).get("publishedfiledetails", [])
|
|
for entry in details:
|
|
if entry.get("result") == 1:
|
|
title = entry.get("title")
|
|
pub_id = str(entry.get("publishedfileid"))
|
|
if pub_id and title:
|
|
resolved[pub_id] = title
|
|
except Exception:
|
|
# API failure; fall back to HTML scraping below
|
|
pass
|
|
|
|
# Fallback for unresolved IDs: scrape the item page
|
|
unresolved = [i for i in id_list if i not in resolved]
|
|
for pub_id in unresolved:
|
|
try:
|
|
page_url = f"https://steamcommunity.com/sharedfiles/filedetails/?id={pub_id}"
|
|
html = fetch_page(page_url, timeout=timeout)
|
|
soup = BeautifulSoup(html, "html.parser")
|
|
name = None
|
|
og = soup.find("meta", attrs={"property": "og:title"})
|
|
if og and og.get("content"):
|
|
name = og.get("content").strip()
|
|
if not name:
|
|
title_div = soup.find("div", class_="workshopItemTitle")
|
|
if title_div and title_div.text:
|
|
name = title_div.text.strip()
|
|
if name:
|
|
resolved[pub_id] = name
|
|
except Exception:
|
|
# Leave unresolved if both methods fail
|
|
pass
|
|
|
|
return resolved
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description="Extract Steam Workshop 'Required items' IDs from a Workshop item page")
|
|
parser.add_argument("url", help="Steam Workshop item URL (e.g., https://steamcommunity.com/sharedfiles/filedetails/?id=XXXXXXXX)")
|
|
parser.add_argument("--json", action="store_true", help="Print JSON array instead of plain text")
|
|
args = parser.parse_args()
|
|
|
|
try:
|
|
ids = extract_required_item_ids(args.url)
|
|
except requests.HTTPError as http_err:
|
|
print(f"HTTP error: {http_err}", file=sys.stderr)
|
|
sys.exit(2)
|
|
except Exception as exc:
|
|
print(f"Failed to extract IDs: {exc}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if args.json:
|
|
print(json.dumps(ids))
|
|
else:
|
|
if not ids:
|
|
print("No required item IDs found.")
|
|
else:
|
|
print("\n".join(ids))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|
|
|