#!/usr/bin/env python3 """ Find corresponding Steam Workshop IDs for Project Zomboid mods listed in a mods list file. This script reads a semicolon-separated list of mod entries (like the contents of mods.txt), indexes the local Workshop directory for Project Zomboid (app id 108600), and for each mod attempts to find the Workshop item id. Matching strategy (in order): 1) If an entry looks like "/", extract the digits as the workshop id directly. 2) Exact match on mod IDs parsed from mod.info files. 3) Exact match on mod names (from mod.info or workshop.txt when present). 4) Normalized match (case-insensitive, non-alphanumeric removed) against mod IDs and names. Outputs a CSV-like file with semicolon-separated fields per input entry: original_entry;workshop_id|NOT_FOUND;match_type;matched_value;source_path Defaults: - Mods file: mods.txt in current directory - Workshop directory: G:\SteamLibrary\steamapps\workshop\content\108600 - Output file: workshop_ids_out.txt in current directory Usage examples: python scripts/find_workshop_ids.py python scripts/find_workshop_ids.py --mods-file d:\\7. Git\\tools\\mods.txt \ --workshop-dir G:\\SteamLibrary\\steamapps\\workshop\\content\\108600 \ --output workshop_ids_out.txt """ from __future__ import annotations import argparse import csv import re from dataclasses import dataclass from pathlib import Path from typing import Dict, Iterable, List, Optional, Tuple DEFAULT_WORKSHOP_DIR = Path(r"G:\SteamLibrary\steamapps\workshop\content\108600") @dataclass(frozen=True) class ModRecord: workshop_id: str mod_id: Optional[str] mod_name: Optional[str] source_path: Path def normalize(text: str) -> str: """Lowercase and strip all non-alphanumeric characters for fuzzy comparisons.""" return re.sub(r"[^a-z0-9]+", "", text.lower()) def parse_mods_list(mods_text: str) -> List[str]: """Split by semicolons and newlines, strip whitespace, and drop empty entries.""" raw_tokens = re.split(r"[;\n\r]+", mods_text) tokens: List[str] = [] for token in raw_tokens: trimmed = token.strip() if trimmed: tokens.append(trimmed) return tokens def read_text_file(path: Path) -> str: try: return path.read_text(encoding="utf-8", errors="ignore") except Exception: return "" def parse_mod_info(mod_info_text: str) -> Tuple[List[str], Optional[str]]: """ Extract mod IDs and mod name from a mod.info file. - IDs may appear as `id=SomeId` and may contain multiple separated by commas/semicolons. - Name appears as `name=Some Name`. """ ids: List[str] = [] name: Optional[str] = None for line in mod_info_text.splitlines(): line = line.strip() if not line or line.startswith("#"): continue if line.lower().startswith("id="): value = line[3:].strip() # Split on common separators for multiple IDs in one line for part in re.split(r"[,;]", value): part_trimmed = part.strip() if part_trimmed: ids.append(part_trimmed) elif line.lower().startswith("name="): value = line[5:].strip() if value: name = value return ids, name def parse_workshop_txt(workshop_txt: str) -> Optional[str]: """Extract a human-readable name from workshop.txt if present.""" for line in workshop_txt.splitlines(): line = line.strip() if not line or line.startswith("#"): continue if line.lower().startswith("name="): value = line[5:].strip() if value: return value return None def index_workshop(workshop_dir: Path) -> Tuple[Dict[str, List[ModRecord]], Dict[str, List[ModRecord]], Dict[str, List[ModRecord]]]: """ Walk the workshop directory and build two lookup indices: - by_id: normalized mod id -> ModRecord list - by_name: normalized mod name -> ModRecord list - by_workshop: workshop id string -> ModRecord list """ by_id: Dict[str, List[ModRecord]] = {} by_name: Dict[str, List[ModRecord]] = {} by_workshop: Dict[str, List[ModRecord]] = {} if not workshop_dir.exists() or not workshop_dir.is_dir(): return by_id, by_name, by_workshop for child in workshop_dir.iterdir(): if not child.is_dir(): continue if not child.name.isdigit(): continue workshop_id = child.name # Typical structure: /mods/*/mod.info mods_root = child / "mods" mod_info_paths: List[Path] = [] if mods_root.exists() and mods_root.is_dir(): # mod.info may exist directly inside mods_root or nested one level down for sub in mods_root.rglob("mod.info"): if sub.is_file(): mod_info_paths.append(sub) # Fall back to any mod.info anywhere inside the workshop item (less common) if not mod_info_paths: for sub in child.rglob("mod.info"): if sub.is_file(): mod_info_paths.append(sub) # Try to get workshop name from workshop.txt (optional) workshop_name = None workshop_txt_path = child / "workshop.txt" if workshop_txt_path.exists(): workshop_name = parse_workshop_txt(read_text_file(workshop_txt_path)) # If no mod.info was found, still index by the workshop name to help matching if not mod_info_paths and workshop_name: record = ModRecord(workshop_id=workshop_id, mod_id=None, mod_name=workshop_name, source_path=workshop_txt_path) key = normalize(workshop_name) by_name.setdefault(key, []).append(record) by_workshop.setdefault(workshop_id, []).append(record) continue for mod_info_path in mod_info_paths: text = read_text_file(mod_info_path) if not text: continue mod_ids, mod_name = parse_mod_info(text) # Prefer mod.info name; fall back to workshop.txt name if absent effective_name = mod_name or workshop_name if mod_ids: for mod_id in mod_ids: record = ModRecord( workshop_id=workshop_id, mod_id=mod_id, mod_name=effective_name, source_path=mod_info_path, ) by_id.setdefault(normalize(mod_id), []).append(record) by_workshop.setdefault(workshop_id, []).append(record) if effective_name: record_for_name = ModRecord( workshop_id=workshop_id, mod_id=(mod_ids[0] if mod_ids else None), mod_name=effective_name, source_path=mod_info_path, ) by_name.setdefault(normalize(effective_name), []).append(record_for_name) return by_id, by_name, by_workshop def try_extract_numeric_workshop_id(token: str) -> Optional[str]: """Return leading numeric id if the token looks like '/'.""" match = re.match(r"^(\d{6,})(?:/|\\).*$", token) if match: return match.group(1) return None def match_token( token: str, by_id: Dict[str, List[ModRecord]], by_name: Dict[str, List[ModRecord]], ) -> Tuple[str, str, str, str]: """ Attempt to find a workshop id for the given token. Returns tuple: (workshop_id_or_NOT_FOUND, match_type, matched_value, source_path) """ # 1) Direct numeric extraction numeric = try_extract_numeric_workshop_id(token) if numeric: return numeric, "provided_numeric", numeric, "" norm = normalize(token) # 2) Exact id match if norm in by_id and by_id[norm]: record = by_id[norm][0] return record.workshop_id, "mod_id", (record.mod_id or ""), str(record.source_path) # 3) Exact name match if norm in by_name and by_name[norm]: record = by_name[norm][0] return record.workshop_id, "mod_name", (record.mod_name or ""), str(record.source_path) # 4) Heuristic: strip bracketed tags like [B42] token_wo_brackets = re.sub(r"\[[^\]]*\]", "", token).strip() if token_wo_brackets and token_wo_brackets != token: norm2 = normalize(token_wo_brackets) if norm2 in by_id and by_id[norm2]: record = by_id[norm2][0] return record.workshop_id, "mod_id_normalized", (record.mod_id or ""), str(record.source_path) if norm2 in by_name and by_name[norm2]: record = by_name[norm2][0] return record.workshop_id, "mod_name_normalized", (record.mod_name or ""), str(record.source_path) # Not found return "NOT_FOUND", "no_match", "", "" def write_output( output_path: Path, rows: Iterable[Tuple[str, str, str, str, str]], mods_line: Optional[str] = None, ) -> None: """Write semicolon-separated output and optionally append a Mods= line at the end.""" with output_path.open("w", encoding="utf-8", newline="") as f: writer = csv.writer(f, delimiter=";", lineterminator="\n", quoting=csv.QUOTE_MINIMAL) writer.writerow(["input", "workshop_id", "match_type", "matched_value", "source_path"]) for row in rows: writer.writerow(list(row)) if mods_line: f.write("\n") f.write(mods_line) f.write("\n") def main() -> int: parser = argparse.ArgumentParser(description="Resolve Project Zomboid mod entries to Workshop IDs") parser.add_argument( "--mods-file", type=Path, default=Path("mods.txt"), help="Path to the mods list file (semicolon-separated)", ) parser.add_argument( "--workshop-dir", type=Path, default=DEFAULT_WORKSHOP_DIR, help="Path to the 108600 workshop content directory", ) parser.add_argument( "--output", type=Path, default=Path("workshop_ids_out.txt"), help="Path to write the output mapping (CSV with semicolons)", ) args = parser.parse_args() mods_file: Path = args.mods_file workshop_dir: Path = args.workshop_dir output_path: Path = args.output mods_text = read_text_file(mods_file) if not mods_text: print(f"Mods file not found or empty: {mods_file}") return 2 tokens = parse_mods_list(mods_text) if not tokens: print(f"No entries found in mods file: {mods_file}") return 2 by_id, by_name, by_workshop = index_workshop(workshop_dir) if not by_id and not by_name: print(f"No workshop items indexed under: {workshop_dir}") # Continue anyway so provided numeric ids can still pass through rows: List[Tuple[str, str, str, str, str]] = [] found = 0 for token in tokens: workshop_id, match_type, matched_value, source_path = match_token(token, by_id, by_name) if workshop_id != "NOT_FOUND": found += 1 rows.append((token, workshop_id, match_type, matched_value, source_path)) # Build Mods= line def extract_numeric_and_suffix(token_text: str) -> Tuple[Optional[str], Optional[str]]: m = re.match(r"^(\d{6,})(?:[\\/](.*))?$", token_text) if not m: return None, None return m.group(1), (m.group(2) or None) mods_pairs: List[Tuple[str, str]] = [] # (workshop_id, mod_id) seen: set[Tuple[str, str]] = set() for token, workshop_id, match_type, matched_value, _source_path in rows: if workshop_id == "NOT_FOUND": continue candidate_mod_ids: List[str] = [] if match_type in ("mod_id", "mod_id_normalized") and matched_value: candidate_mod_ids = [matched_value] else: # Try to derive from the indexed records records = by_workshop.get(workshop_id, []) if match_type in ("mod_name", "mod_name_normalized") and matched_value: nm = normalize(matched_value) for rec in records: if rec.mod_name and normalize(rec.mod_name) == nm and rec.mod_id: candidate_mod_ids.append(rec.mod_id) if not candidate_mod_ids: num, suffix = extract_numeric_and_suffix(token) if num == workshop_id and records: if suffix: suffix_norm = normalize(suffix) # Try folder name or mod_id match for rec in records: folder_name = rec.source_path.parent.name if rec.source_path else "" if rec.mod_id and (normalize(rec.mod_id) == suffix_norm or normalize(folder_name) == suffix_norm): candidate_mod_ids.append(rec.mod_id) break # If still none, include all available mod_ids for this workshop if not candidate_mod_ids: for rec in records: if rec.mod_id: candidate_mod_ids.append(rec.mod_id) # Fallback to first non-empty mod_id if not candidate_mod_ids and by_workshop.get(workshop_id): for rec in by_workshop[workshop_id]: if rec.mod_id: candidate_mod_ids.append(rec.mod_id) break for mod_id in candidate_mod_ids: key = (workshop_id, mod_id) if key not in seen: seen.add(key) mods_pairs.append(key) mods_line = None if mods_pairs: mods_line = "Mods=" + ";".join([f"{wid}\\{mid}" for wid, mid in mods_pairs]) + ";" output_path.parent.mkdir(parents=True, exist_ok=True) write_output(output_path, rows, mods_line) print(f"Resolved {found}/{len(tokens)} entries. Wrote: {output_path}") return 0 if __name__ == "__main__": raise SystemExit(main())