find workshop ids script

2025-10-05 15:16:14 -04:00
parent 9ffa45698e
commit dda1618fa4
1 changed files with 378 additions and 0 deletions
--- a/scripts/find_workshop_ids.py
+++ b/scripts/find_workshop_ids.py
@@ -0,0 +1,378 @@
+#!/usr/bin/env python3
+
+"""
+Find corresponding Steam Workshop IDs for Project Zomboid mods listed in a mods list file.
+
+This script reads a semicolon-separated list of mod entries (like the contents of mods.txt),
+indexes the local Workshop directory for Project Zomboid (app id 108600), and for each mod
+attempts to find the Workshop item id.
+
+Matching strategy (in order):
+1) If an entry looks like "<digits>/<anything>", extract the digits as the workshop id directly.
+2) Exact match on mod IDs parsed from mod.info files.
+3) Exact match on mod names (from mod.info or workshop.txt when present).
+4) Normalized match (case-insensitive, non-alphanumeric removed) against mod IDs and names.
+
+Outputs a CSV-like file with semicolon-separated fields per input entry:
+  original_entry;workshop_id|NOT_FOUND;match_type;matched_value;source_path
+
+Defaults:
+- Mods file: mods.txt in current directory
+- Workshop directory: G:\SteamLibrary\steamapps\workshop\content\108600
+- Output file: workshop_ids_out.txt in current directory
+
+Usage examples:
+  python scripts/find_workshop_ids.py
+  python scripts/find_workshop_ids.py --mods-file d:\\7. Git\\tools\\mods.txt \
+      --workshop-dir G:\\SteamLibrary\\steamapps\\workshop\\content\\108600 \
+      --output workshop_ids_out.txt
+"""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import re
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Tuple
+
+
+DEFAULT_WORKSHOP_DIR = Path(r"G:\SteamLibrary\steamapps\workshop\content\108600")
+
+
+@dataclass(frozen=True)
+class ModRecord:
+    workshop_id: str
+    mod_id: Optional[str]
+    mod_name: Optional[str]
+    source_path: Path
+
+
+def normalize(text: str) -> str:
+    """Lowercase and strip all non-alphanumeric characters for fuzzy comparisons."""
+    return re.sub(r"[^a-z0-9]+", "", text.lower())
+
+
+def parse_mods_list(mods_text: str) -> List[str]:
+    """Split by semicolons and newlines, strip whitespace, and drop empty entries."""
+    raw_tokens = re.split(r"[;\n\r]+", mods_text)
+    tokens: List[str] = []
+    for token in raw_tokens:
+        trimmed = token.strip()
+        if trimmed:
+            tokens.append(trimmed)
+    return tokens
+
+
+def read_text_file(path: Path) -> str:
+    try:
+        return path.read_text(encoding="utf-8", errors="ignore")
+    except Exception:
+        return ""
+
+
+def parse_mod_info(mod_info_text: str) -> Tuple[List[str], Optional[str]]:
+    """
+    Extract mod IDs and mod name from a mod.info file.
+    - IDs may appear as `id=SomeId` and may contain multiple separated by commas/semicolons.
+    - Name appears as `name=Some Name`.
+    """
+    ids: List[str] = []
+    name: Optional[str] = None
+    for line in mod_info_text.splitlines():
+        line = line.strip()
+        if not line or line.startswith("#"):
+            continue
+        if line.lower().startswith("id="):
+            value = line[3:].strip()
+            # Split on common separators for multiple IDs in one line
+            for part in re.split(r"[,;]", value):
+                part_trimmed = part.strip()
+                if part_trimmed:
+                    ids.append(part_trimmed)
+        elif line.lower().startswith("name="):
+            value = line[5:].strip()
+            if value:
+                name = value
+    return ids, name
+
+
+def parse_workshop_txt(workshop_txt: str) -> Optional[str]:
+    """Extract a human-readable name from workshop.txt if present."""
+    for line in workshop_txt.splitlines():
+        line = line.strip()
+        if not line or line.startswith("#"):
+            continue
+        if line.lower().startswith("name="):
+            value = line[5:].strip()
+            if value:
+                return value
+    return None
+
+
+def index_workshop(workshop_dir: Path) -> Tuple[Dict[str, List[ModRecord]], Dict[str, List[ModRecord]], Dict[str, List[ModRecord]]]:
+    """
+    Walk the workshop directory and build two lookup indices:
+    - by_id: normalized mod id -> ModRecord list
+    - by_name: normalized mod name -> ModRecord list
+    - by_workshop: workshop id string -> ModRecord list
+    """
+    by_id: Dict[str, List[ModRecord]] = {}
+    by_name: Dict[str, List[ModRecord]] = {}
+    by_workshop: Dict[str, List[ModRecord]] = {}
+
+    if not workshop_dir.exists() or not workshop_dir.is_dir():
+        return by_id, by_name, by_workshop
+
+    for child in workshop_dir.iterdir():
+        if not child.is_dir():
+            continue
+        if not child.name.isdigit():
+            continue
+        workshop_id = child.name
+
+        # Typical structure: <workshop_id>/mods/*/mod.info
+        mods_root = child / "mods"
+        mod_info_paths: List[Path] = []
+        if mods_root.exists() and mods_root.is_dir():
+            # mod.info may exist directly inside mods_root or nested one level down
+            for sub in mods_root.rglob("mod.info"):
+                if sub.is_file():
+                    mod_info_paths.append(sub)
+
+        # Fall back to any mod.info anywhere inside the workshop item (less common)
+        if not mod_info_paths:
+            for sub in child.rglob("mod.info"):
+                if sub.is_file():
+                    mod_info_paths.append(sub)
+
+        # Try to get workshop name from workshop.txt (optional)
+        workshop_name = None
+        workshop_txt_path = child / "workshop.txt"
+        if workshop_txt_path.exists():
+            workshop_name = parse_workshop_txt(read_text_file(workshop_txt_path))
+
+        # If no mod.info was found, still index by the workshop name to help matching
+        if not mod_info_paths and workshop_name:
+            record = ModRecord(workshop_id=workshop_id, mod_id=None, mod_name=workshop_name, source_path=workshop_txt_path)
+            key = normalize(workshop_name)
+            by_name.setdefault(key, []).append(record)
+            by_workshop.setdefault(workshop_id, []).append(record)
+            continue
+
+        for mod_info_path in mod_info_paths:
+            text = read_text_file(mod_info_path)
+            if not text:
+                continue
+            mod_ids, mod_name = parse_mod_info(text)
+
+            # Prefer mod.info name; fall back to workshop.txt name if absent
+            effective_name = mod_name or workshop_name
+
+            if mod_ids:
+                for mod_id in mod_ids:
+                    record = ModRecord(
+                        workshop_id=workshop_id,
+                        mod_id=mod_id,
+                        mod_name=effective_name,
+                        source_path=mod_info_path,
+                    )
+                    by_id.setdefault(normalize(mod_id), []).append(record)
+                    by_workshop.setdefault(workshop_id, []).append(record)
+
+            if effective_name:
+                record_for_name = ModRecord(
+                    workshop_id=workshop_id,
+                    mod_id=(mod_ids[0] if mod_ids else None),
+                    mod_name=effective_name,
+                    source_path=mod_info_path,
+                )
+                by_name.setdefault(normalize(effective_name), []).append(record_for_name)
+
+    return by_id, by_name, by_workshop
+
+
+def try_extract_numeric_workshop_id(token: str) -> Optional[str]:
+    """Return leading numeric id if the token looks like '<digits>/<anything>'."""
+    match = re.match(r"^(\d{6,})(?:/|\\).*$", token)
+    if match:
+        return match.group(1)
+    return None
+
+
+def match_token(
+    token: str,
+    by_id: Dict[str, List[ModRecord]],
+    by_name: Dict[str, List[ModRecord]],
+) -> Tuple[str, str, str, str]:
+    """
+    Attempt to find a workshop id for the given token.
+    Returns tuple: (workshop_id_or_NOT_FOUND, match_type, matched_value, source_path)
+    """
+    # 1) Direct numeric extraction
+    numeric = try_extract_numeric_workshop_id(token)
+    if numeric:
+        return numeric, "provided_numeric", numeric, ""
+
+    norm = normalize(token)
+
+    # 2) Exact id match
+    if norm in by_id and by_id[norm]:
+        record = by_id[norm][0]
+        return record.workshop_id, "mod_id", (record.mod_id or ""), str(record.source_path)
+
+    # 3) Exact name match
+    if norm in by_name and by_name[norm]:
+        record = by_name[norm][0]
+        return record.workshop_id, "mod_name", (record.mod_name or ""), str(record.source_path)
+
+    # 4) Heuristic: strip bracketed tags like [B42]
+    token_wo_brackets = re.sub(r"\[[^\]]*\]", "", token).strip()
+    if token_wo_brackets and token_wo_brackets != token:
+        norm2 = normalize(token_wo_brackets)
+        if norm2 in by_id and by_id[norm2]:
+            record = by_id[norm2][0]
+            return record.workshop_id, "mod_id_normalized", (record.mod_id or ""), str(record.source_path)
+        if norm2 in by_name and by_name[norm2]:
+            record = by_name[norm2][0]
+            return record.workshop_id, "mod_name_normalized", (record.mod_name or ""), str(record.source_path)
+
+    # Not found
+    return "NOT_FOUND", "no_match", "", ""
+
+
+def write_output(
+    output_path: Path,
+    rows: Iterable[Tuple[str, str, str, str, str]],
+    mods_line: Optional[str] = None,
+) -> None:
+    """Write semicolon-separated output and optionally append a Mods= line at the end."""
+    with output_path.open("w", encoding="utf-8", newline="") as f:
+        writer = csv.writer(f, delimiter=";", lineterminator="\n", quoting=csv.QUOTE_MINIMAL)
+        writer.writerow(["input", "workshop_id", "match_type", "matched_value", "source_path"])
+        for row in rows:
+            writer.writerow(list(row))
+        if mods_line:
+            f.write("\n")
+            f.write(mods_line)
+            f.write("\n")
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Resolve Project Zomboid mod entries to Workshop IDs")
+    parser.add_argument(
+        "--mods-file",
+        type=Path,
+        default=Path("mods.txt"),
+        help="Path to the mods list file (semicolon-separated)",
+    )
+    parser.add_argument(
+        "--workshop-dir",
+        type=Path,
+        default=DEFAULT_WORKSHOP_DIR,
+        help="Path to the 108600 workshop content directory",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=Path("workshop_ids_out.txt"),
+        help="Path to write the output mapping (CSV with semicolons)",
+    )
+    args = parser.parse_args()
+
+    mods_file: Path = args.mods_file
+    workshop_dir: Path = args.workshop_dir
+    output_path: Path = args.output
+
+    mods_text = read_text_file(mods_file)
+    if not mods_text:
+        print(f"Mods file not found or empty: {mods_file}")
+        return 2
+
+    tokens = parse_mods_list(mods_text)
+    if not tokens:
+        print(f"No entries found in mods file: {mods_file}")
+        return 2
+
+    by_id, by_name, by_workshop = index_workshop(workshop_dir)
+    if not by_id and not by_name:
+        print(f"No workshop items indexed under: {workshop_dir}")
+        # Continue anyway so provided numeric ids can still pass through
+
+    rows: List[Tuple[str, str, str, str, str]] = []
+    found = 0
+    for token in tokens:
+        workshop_id, match_type, matched_value, source_path = match_token(token, by_id, by_name)
+        if workshop_id != "NOT_FOUND":
+            found += 1
+        rows.append((token, workshop_id, match_type, matched_value, source_path))
+
+    # Build Mods= line
+    def extract_numeric_and_suffix(token_text: str) -> Tuple[Optional[str], Optional[str]]:
+        m = re.match(r"^(\d{6,})(?:[\\/](.*))?$", token_text)
+        if not m:
+            return None, None
+        return m.group(1), (m.group(2) or None)
+
+    mods_pairs: List[Tuple[str, str]] = []  # (workshop_id, mod_id)
+    seen: set[Tuple[str, str]] = set()
+    for token, workshop_id, match_type, matched_value, _source_path in rows:
+        if workshop_id == "NOT_FOUND":
+            continue
+        candidate_mod_ids: List[str] = []
+
+        if match_type in ("mod_id", "mod_id_normalized") and matched_value:
+            candidate_mod_ids = [matched_value]
+        else:
+            # Try to derive from the indexed records
+            records = by_workshop.get(workshop_id, [])
+            if match_type in ("mod_name", "mod_name_normalized") and matched_value:
+                nm = normalize(matched_value)
+                for rec in records:
+                    if rec.mod_name and normalize(rec.mod_name) == nm and rec.mod_id:
+                        candidate_mod_ids.append(rec.mod_id)
+            if not candidate_mod_ids:
+                num, suffix = extract_numeric_and_suffix(token)
+                if num == workshop_id and records:
+                    if suffix:
+                        suffix_norm = normalize(suffix)
+                        # Try folder name or mod_id match
+                        for rec in records:
+                            folder_name = rec.source_path.parent.name if rec.source_path else ""
+                            if rec.mod_id and (normalize(rec.mod_id) == suffix_norm or normalize(folder_name) == suffix_norm):
+                                candidate_mod_ids.append(rec.mod_id)
+                                break
+                    # If still none, include all available mod_ids for this workshop
+                    if not candidate_mod_ids:
+                        for rec in records:
+                            if rec.mod_id:
+                                candidate_mod_ids.append(rec.mod_id)
+            # Fallback to first non-empty mod_id
+            if not candidate_mod_ids and by_workshop.get(workshop_id):
+                for rec in by_workshop[workshop_id]:
+                    if rec.mod_id:
+                        candidate_mod_ids.append(rec.mod_id)
+                        break
+
+        for mod_id in candidate_mod_ids:
+            key = (workshop_id, mod_id)
+            if key not in seen:
+                seen.add(key)
+                mods_pairs.append(key)
+
+    mods_line = None
+    if mods_pairs:
+        mods_line = "Mods=" + ";".join([f"{wid}\\{mid}" for wid, mid in mods_pairs]) + ";"
+
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    write_output(output_path, rows, mods_line)
+
+    print(f"Resolved {found}/{len(tokens)} entries. Wrote: {output_path}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
+
+