Intial Release
Some checks failed
Build and Upload Release (Windows EXE) / Build Windows EXE (release) Has been cancelled
Some checks failed
Build and Upload Release (Windows EXE) / Build Windows EXE (release) Has been cancelled
This commit is contained in:
234
steam_required_ids.py
Normal file
234
steam_required_ids.py
Normal file
@@ -0,0 +1,234 @@
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from typing import Iterable, List, Set, Dict
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
WORKSHOP_ITEM_ID_REGEX = re.compile(r"id=(\d+)")
|
||||
|
||||
|
||||
def extract_id_from_href(href: str) -> str | None:
|
||||
if not href:
|
||||
return None
|
||||
|
||||
# Accept absolute or relative Steam workshop/sharedfiles links
|
||||
if "filedetails" not in href or "id=" not in href:
|
||||
return None
|
||||
|
||||
try:
|
||||
parsed = urlparse(href)
|
||||
# Handle relative URLs like "/sharedfiles/filedetails/?id=123"
|
||||
query = parsed.query or href.split("?", 1)[1] if "?" in href else ""
|
||||
qs = parse_qs(query)
|
||||
if "id" in qs and qs["id"]:
|
||||
candidate = qs["id"][0]
|
||||
return candidate if candidate.isdigit() else None
|
||||
except Exception:
|
||||
match = WORKSHOP_ITEM_ID_REGEX.search(href)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return None
|
||||
|
||||
|
||||
def parse_main_item_id(url: str) -> str | None:
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
qs = parse_qs(parsed.query)
|
||||
if "id" in qs and qs["id"]:
|
||||
candidate = qs["id"][0]
|
||||
return candidate if candidate.isdigit() else None
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def collect_ids_from_elements(elements: Iterable) -> Set[str]:
|
||||
ids: Set[str] = set()
|
||||
for el in elements:
|
||||
href = getattr(el, "get", None)
|
||||
if callable(href):
|
||||
link = el.get("href", "")
|
||||
else:
|
||||
link = ""
|
||||
item_id = extract_id_from_href(link)
|
||||
if item_id:
|
||||
ids.add(item_id)
|
||||
return ids
|
||||
|
||||
|
||||
def extract_required_item_ids_from_html(html: str) -> Set[str]:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
# Strategy 1: Look for a section headed "Required items" and parse links within
|
||||
section_ids: Set[str] = set()
|
||||
heading_candidates = soup.find_all(string=re.compile(r"^\s*Required\s+items\s*$", re.IGNORECASE))
|
||||
for heading in heading_candidates:
|
||||
parent = heading.parent
|
||||
if parent is None:
|
||||
continue
|
||||
|
||||
# Search within nearby container siblings/descendants for links
|
||||
container = parent
|
||||
for _ in range(3): # climb up a few levels to catch the full block
|
||||
if container is None:
|
||||
break
|
||||
links = container.find_all("a", href=True)
|
||||
section_ids |= collect_ids_from_elements(links)
|
||||
container = container.parent
|
||||
|
||||
if section_ids:
|
||||
return section_ids
|
||||
|
||||
# Strategy 2: Look for any block that contains the sentence used by Steam
|
||||
hint_blocks = soup.find_all(string=re.compile(r"requires\s+all\s+of\s+the\s+following\s+other\s+items", re.IGNORECASE))
|
||||
for hint in hint_blocks:
|
||||
container = hint.parent
|
||||
for _ in range(3):
|
||||
if container is None:
|
||||
break
|
||||
links = container.find_all("a", href=True)
|
||||
section_ids |= collect_ids_from_elements(links)
|
||||
container = container.parent
|
||||
|
||||
if section_ids:
|
||||
return section_ids
|
||||
|
||||
# Strategy 3 (fallback): scan all anchors on the page
|
||||
all_links = soup.find_all("a", href=True)
|
||||
return collect_ids_from_elements(all_links)
|
||||
|
||||
|
||||
def fetch_page(url: str, timeout: int = 20) -> str:
|
||||
headers = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/126.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
||||
}
|
||||
# Steam can occasionally require a cookie for age gates. Provide innocuous defaults.
|
||||
cookies = {
|
||||
"birthtime": "568022401", # 1987-12-20
|
||||
"lastagecheckage": "1-January-1990",
|
||||
"mature_content": "1",
|
||||
}
|
||||
resp = requests.get(url, headers=headers, cookies=cookies, timeout=timeout)
|
||||
resp.raise_for_status()
|
||||
return resp.text
|
||||
|
||||
|
||||
def extract_required_item_ids(url: str) -> List[str]:
|
||||
html = fetch_page(url)
|
||||
found_ids = extract_required_item_ids_from_html(html)
|
||||
|
||||
# Remove the current page's ID if present
|
||||
current_id = parse_main_item_id(url)
|
||||
if current_id and current_id in found_ids:
|
||||
found_ids.remove(current_id)
|
||||
|
||||
return sorted(found_ids, key=int)
|
||||
|
||||
|
||||
def resolve_workshop_names(ids: List[str], timeout: int = 20) -> Dict[str, str]:
|
||||
"""Resolve Workshop IDs to human-readable titles using Steam API, with HTML fallback.
|
||||
|
||||
Uses ISteamRemoteStorage.GetPublishedFileDetails, batching up to 100 IDs per call.
|
||||
Falls back to scraping each item's page if the API fails.
|
||||
"""
|
||||
id_list = [i for i in dict.fromkeys([i for i in ids if i and i.isdigit()])]
|
||||
if not id_list:
|
||||
return {}
|
||||
|
||||
headers = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/126.0.0.0 Safari/537.36"
|
||||
),
|
||||
}
|
||||
|
||||
api_url = "https://api.steampowered.com/ISteamRemoteStorage/GetPublishedFileDetails/v1/"
|
||||
resolved: Dict[str, str] = {}
|
||||
|
||||
try:
|
||||
session = requests.Session()
|
||||
session.headers.update(headers)
|
||||
batch_size = 100
|
||||
for start in range(0, len(id_list), batch_size):
|
||||
batch = id_list[start:start + batch_size]
|
||||
data = {"itemcount": len(batch)}
|
||||
for idx, pub_id in enumerate(batch):
|
||||
data[f"publishedfileids[{idx}]"] = pub_id
|
||||
resp = session.post(api_url, data=data, timeout=timeout)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
details = payload.get("response", {}).get("publishedfiledetails", [])
|
||||
for entry in details:
|
||||
if entry.get("result") == 1:
|
||||
title = entry.get("title")
|
||||
pub_id = str(entry.get("publishedfileid"))
|
||||
if pub_id and title:
|
||||
resolved[pub_id] = title
|
||||
except Exception:
|
||||
# API failure; fall back to HTML scraping below
|
||||
pass
|
||||
|
||||
# Fallback for unresolved IDs: scrape the item page
|
||||
unresolved = [i for i in id_list if i not in resolved]
|
||||
for pub_id in unresolved:
|
||||
try:
|
||||
page_url = f"https://steamcommunity.com/sharedfiles/filedetails/?id={pub_id}"
|
||||
html = fetch_page(page_url, timeout=timeout)
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
name = None
|
||||
og = soup.find("meta", attrs={"property": "og:title"})
|
||||
if og and og.get("content"):
|
||||
name = og.get("content").strip()
|
||||
if not name:
|
||||
title_div = soup.find("div", class_="workshopItemTitle")
|
||||
if title_div and title_div.text:
|
||||
name = title_div.text.strip()
|
||||
if name:
|
||||
resolved[pub_id] = name
|
||||
except Exception:
|
||||
# Leave unresolved if both methods fail
|
||||
pass
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Extract Steam Workshop 'Required items' IDs from a Workshop item page")
|
||||
parser.add_argument("url", help="Steam Workshop item URL (e.g., https://steamcommunity.com/sharedfiles/filedetails/?id=XXXXXXXX)")
|
||||
parser.add_argument("--json", action="store_true", help="Print JSON array instead of plain text")
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
ids = extract_required_item_ids(args.url)
|
||||
except requests.HTTPError as http_err:
|
||||
print(f"HTTP error: {http_err}", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
except Exception as exc:
|
||||
print(f"Failed to extract IDs: {exc}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(ids))
|
||||
else:
|
||||
if not ids:
|
||||
print("No required item IDs found.")
|
||||
else:
|
||||
print("\n".join(ids))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user