From dda1618fa4aa32b520d8d2f4a671210fbfde8b19 Mon Sep 17 00:00:00 2001 From: HRiggs Date: Sun, 5 Oct 2025 15:16:14 -0400 Subject: [PATCH] find workshop ids script --- scripts/find_workshop_ids.py | 378 +++++++++++++++++++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 scripts/find_workshop_ids.py diff --git a/scripts/find_workshop_ids.py b/scripts/find_workshop_ids.py new file mode 100644 index 0000000..36f0626 --- /dev/null +++ b/scripts/find_workshop_ids.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python3 + +""" +Find corresponding Steam Workshop IDs for Project Zomboid mods listed in a mods list file. + +This script reads a semicolon-separated list of mod entries (like the contents of mods.txt), +indexes the local Workshop directory for Project Zomboid (app id 108600), and for each mod +attempts to find the Workshop item id. + +Matching strategy (in order): +1) If an entry looks like "/", extract the digits as the workshop id directly. +2) Exact match on mod IDs parsed from mod.info files. +3) Exact match on mod names (from mod.info or workshop.txt when present). +4) Normalized match (case-insensitive, non-alphanumeric removed) against mod IDs and names. + +Outputs a CSV-like file with semicolon-separated fields per input entry: + original_entry;workshop_id|NOT_FOUND;match_type;matched_value;source_path + +Defaults: +- Mods file: mods.txt in current directory +- Workshop directory: G:\SteamLibrary\steamapps\workshop\content\108600 +- Output file: workshop_ids_out.txt in current directory + +Usage examples: + python scripts/find_workshop_ids.py + python scripts/find_workshop_ids.py --mods-file d:\\7. Git\\tools\\mods.txt \ + --workshop-dir G:\\SteamLibrary\\steamapps\\workshop\\content\\108600 \ + --output workshop_ids_out.txt +""" + +from __future__ import annotations + +import argparse +import csv +import re +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, Iterable, List, Optional, Tuple + + +DEFAULT_WORKSHOP_DIR = Path(r"G:\SteamLibrary\steamapps\workshop\content\108600") + + +@dataclass(frozen=True) +class ModRecord: + workshop_id: str + mod_id: Optional[str] + mod_name: Optional[str] + source_path: Path + + +def normalize(text: str) -> str: + """Lowercase and strip all non-alphanumeric characters for fuzzy comparisons.""" + return re.sub(r"[^a-z0-9]+", "", text.lower()) + + +def parse_mods_list(mods_text: str) -> List[str]: + """Split by semicolons and newlines, strip whitespace, and drop empty entries.""" + raw_tokens = re.split(r"[;\n\r]+", mods_text) + tokens: List[str] = [] + for token in raw_tokens: + trimmed = token.strip() + if trimmed: + tokens.append(trimmed) + return tokens + + +def read_text_file(path: Path) -> str: + try: + return path.read_text(encoding="utf-8", errors="ignore") + except Exception: + return "" + + +def parse_mod_info(mod_info_text: str) -> Tuple[List[str], Optional[str]]: + """ + Extract mod IDs and mod name from a mod.info file. + - IDs may appear as `id=SomeId` and may contain multiple separated by commas/semicolons. + - Name appears as `name=Some Name`. + """ + ids: List[str] = [] + name: Optional[str] = None + for line in mod_info_text.splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + if line.lower().startswith("id="): + value = line[3:].strip() + # Split on common separators for multiple IDs in one line + for part in re.split(r"[,;]", value): + part_trimmed = part.strip() + if part_trimmed: + ids.append(part_trimmed) + elif line.lower().startswith("name="): + value = line[5:].strip() + if value: + name = value + return ids, name + + +def parse_workshop_txt(workshop_txt: str) -> Optional[str]: + """Extract a human-readable name from workshop.txt if present.""" + for line in workshop_txt.splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + if line.lower().startswith("name="): + value = line[5:].strip() + if value: + return value + return None + + +def index_workshop(workshop_dir: Path) -> Tuple[Dict[str, List[ModRecord]], Dict[str, List[ModRecord]], Dict[str, List[ModRecord]]]: + """ + Walk the workshop directory and build two lookup indices: + - by_id: normalized mod id -> ModRecord list + - by_name: normalized mod name -> ModRecord list + - by_workshop: workshop id string -> ModRecord list + """ + by_id: Dict[str, List[ModRecord]] = {} + by_name: Dict[str, List[ModRecord]] = {} + by_workshop: Dict[str, List[ModRecord]] = {} + + if not workshop_dir.exists() or not workshop_dir.is_dir(): + return by_id, by_name, by_workshop + + for child in workshop_dir.iterdir(): + if not child.is_dir(): + continue + if not child.name.isdigit(): + continue + workshop_id = child.name + + # Typical structure: /mods/*/mod.info + mods_root = child / "mods" + mod_info_paths: List[Path] = [] + if mods_root.exists() and mods_root.is_dir(): + # mod.info may exist directly inside mods_root or nested one level down + for sub in mods_root.rglob("mod.info"): + if sub.is_file(): + mod_info_paths.append(sub) + + # Fall back to any mod.info anywhere inside the workshop item (less common) + if not mod_info_paths: + for sub in child.rglob("mod.info"): + if sub.is_file(): + mod_info_paths.append(sub) + + # Try to get workshop name from workshop.txt (optional) + workshop_name = None + workshop_txt_path = child / "workshop.txt" + if workshop_txt_path.exists(): + workshop_name = parse_workshop_txt(read_text_file(workshop_txt_path)) + + # If no mod.info was found, still index by the workshop name to help matching + if not mod_info_paths and workshop_name: + record = ModRecord(workshop_id=workshop_id, mod_id=None, mod_name=workshop_name, source_path=workshop_txt_path) + key = normalize(workshop_name) + by_name.setdefault(key, []).append(record) + by_workshop.setdefault(workshop_id, []).append(record) + continue + + for mod_info_path in mod_info_paths: + text = read_text_file(mod_info_path) + if not text: + continue + mod_ids, mod_name = parse_mod_info(text) + + # Prefer mod.info name; fall back to workshop.txt name if absent + effective_name = mod_name or workshop_name + + if mod_ids: + for mod_id in mod_ids: + record = ModRecord( + workshop_id=workshop_id, + mod_id=mod_id, + mod_name=effective_name, + source_path=mod_info_path, + ) + by_id.setdefault(normalize(mod_id), []).append(record) + by_workshop.setdefault(workshop_id, []).append(record) + + if effective_name: + record_for_name = ModRecord( + workshop_id=workshop_id, + mod_id=(mod_ids[0] if mod_ids else None), + mod_name=effective_name, + source_path=mod_info_path, + ) + by_name.setdefault(normalize(effective_name), []).append(record_for_name) + + return by_id, by_name, by_workshop + + +def try_extract_numeric_workshop_id(token: str) -> Optional[str]: + """Return leading numeric id if the token looks like '/'.""" + match = re.match(r"^(\d{6,})(?:/|\\).*$", token) + if match: + return match.group(1) + return None + + +def match_token( + token: str, + by_id: Dict[str, List[ModRecord]], + by_name: Dict[str, List[ModRecord]], +) -> Tuple[str, str, str, str]: + """ + Attempt to find a workshop id for the given token. + Returns tuple: (workshop_id_or_NOT_FOUND, match_type, matched_value, source_path) + """ + # 1) Direct numeric extraction + numeric = try_extract_numeric_workshop_id(token) + if numeric: + return numeric, "provided_numeric", numeric, "" + + norm = normalize(token) + + # 2) Exact id match + if norm in by_id and by_id[norm]: + record = by_id[norm][0] + return record.workshop_id, "mod_id", (record.mod_id or ""), str(record.source_path) + + # 3) Exact name match + if norm in by_name and by_name[norm]: + record = by_name[norm][0] + return record.workshop_id, "mod_name", (record.mod_name or ""), str(record.source_path) + + # 4) Heuristic: strip bracketed tags like [B42] + token_wo_brackets = re.sub(r"\[[^\]]*\]", "", token).strip() + if token_wo_brackets and token_wo_brackets != token: + norm2 = normalize(token_wo_brackets) + if norm2 in by_id and by_id[norm2]: + record = by_id[norm2][0] + return record.workshop_id, "mod_id_normalized", (record.mod_id or ""), str(record.source_path) + if norm2 in by_name and by_name[norm2]: + record = by_name[norm2][0] + return record.workshop_id, "mod_name_normalized", (record.mod_name or ""), str(record.source_path) + + # Not found + return "NOT_FOUND", "no_match", "", "" + + +def write_output( + output_path: Path, + rows: Iterable[Tuple[str, str, str, str, str]], + mods_line: Optional[str] = None, +) -> None: + """Write semicolon-separated output and optionally append a Mods= line at the end.""" + with output_path.open("w", encoding="utf-8", newline="") as f: + writer = csv.writer(f, delimiter=";", lineterminator="\n", quoting=csv.QUOTE_MINIMAL) + writer.writerow(["input", "workshop_id", "match_type", "matched_value", "source_path"]) + for row in rows: + writer.writerow(list(row)) + if mods_line: + f.write("\n") + f.write(mods_line) + f.write("\n") + + +def main() -> int: + parser = argparse.ArgumentParser(description="Resolve Project Zomboid mod entries to Workshop IDs") + parser.add_argument( + "--mods-file", + type=Path, + default=Path("mods.txt"), + help="Path to the mods list file (semicolon-separated)", + ) + parser.add_argument( + "--workshop-dir", + type=Path, + default=DEFAULT_WORKSHOP_DIR, + help="Path to the 108600 workshop content directory", + ) + parser.add_argument( + "--output", + type=Path, + default=Path("workshop_ids_out.txt"), + help="Path to write the output mapping (CSV with semicolons)", + ) + args = parser.parse_args() + + mods_file: Path = args.mods_file + workshop_dir: Path = args.workshop_dir + output_path: Path = args.output + + mods_text = read_text_file(mods_file) + if not mods_text: + print(f"Mods file not found or empty: {mods_file}") + return 2 + + tokens = parse_mods_list(mods_text) + if not tokens: + print(f"No entries found in mods file: {mods_file}") + return 2 + + by_id, by_name, by_workshop = index_workshop(workshop_dir) + if not by_id and not by_name: + print(f"No workshop items indexed under: {workshop_dir}") + # Continue anyway so provided numeric ids can still pass through + + rows: List[Tuple[str, str, str, str, str]] = [] + found = 0 + for token in tokens: + workshop_id, match_type, matched_value, source_path = match_token(token, by_id, by_name) + if workshop_id != "NOT_FOUND": + found += 1 + rows.append((token, workshop_id, match_type, matched_value, source_path)) + + # Build Mods= line + def extract_numeric_and_suffix(token_text: str) -> Tuple[Optional[str], Optional[str]]: + m = re.match(r"^(\d{6,})(?:[\\/](.*))?$", token_text) + if not m: + return None, None + return m.group(1), (m.group(2) or None) + + mods_pairs: List[Tuple[str, str]] = [] # (workshop_id, mod_id) + seen: set[Tuple[str, str]] = set() + for token, workshop_id, match_type, matched_value, _source_path in rows: + if workshop_id == "NOT_FOUND": + continue + candidate_mod_ids: List[str] = [] + + if match_type in ("mod_id", "mod_id_normalized") and matched_value: + candidate_mod_ids = [matched_value] + else: + # Try to derive from the indexed records + records = by_workshop.get(workshop_id, []) + if match_type in ("mod_name", "mod_name_normalized") and matched_value: + nm = normalize(matched_value) + for rec in records: + if rec.mod_name and normalize(rec.mod_name) == nm and rec.mod_id: + candidate_mod_ids.append(rec.mod_id) + if not candidate_mod_ids: + num, suffix = extract_numeric_and_suffix(token) + if num == workshop_id and records: + if suffix: + suffix_norm = normalize(suffix) + # Try folder name or mod_id match + for rec in records: + folder_name = rec.source_path.parent.name if rec.source_path else "" + if rec.mod_id and (normalize(rec.mod_id) == suffix_norm or normalize(folder_name) == suffix_norm): + candidate_mod_ids.append(rec.mod_id) + break + # If still none, include all available mod_ids for this workshop + if not candidate_mod_ids: + for rec in records: + if rec.mod_id: + candidate_mod_ids.append(rec.mod_id) + # Fallback to first non-empty mod_id + if not candidate_mod_ids and by_workshop.get(workshop_id): + for rec in by_workshop[workshop_id]: + if rec.mod_id: + candidate_mod_ids.append(rec.mod_id) + break + + for mod_id in candidate_mod_ids: + key = (workshop_id, mod_id) + if key not in seen: + seen.add(key) + mods_pairs.append(key) + + mods_line = None + if mods_pairs: + mods_line = "Mods=" + ";".join([f"{wid}\\{mid}" for wid, mid in mods_pairs]) + ";" + + output_path.parent.mkdir(parents=True, exist_ok=True) + write_output(output_path, rows, mods_line) + + print(f"Resolved {found}/{len(tokens)} entries. Wrote: {output_path}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) + +