Files
Tools/scripts/find_workshop_ids.py
2025-10-05 15:16:14 -04:00

379 lines
14 KiB
Python

#!/usr/bin/env python3
"""
Find corresponding Steam Workshop IDs for Project Zomboid mods listed in a mods list file.
This script reads a semicolon-separated list of mod entries (like the contents of mods.txt),
indexes the local Workshop directory for Project Zomboid (app id 108600), and for each mod
attempts to find the Workshop item id.
Matching strategy (in order):
1) If an entry looks like "<digits>/<anything>", extract the digits as the workshop id directly.
2) Exact match on mod IDs parsed from mod.info files.
3) Exact match on mod names (from mod.info or workshop.txt when present).
4) Normalized match (case-insensitive, non-alphanumeric removed) against mod IDs and names.
Outputs a CSV-like file with semicolon-separated fields per input entry:
original_entry;workshop_id|NOT_FOUND;match_type;matched_value;source_path
Defaults:
- Mods file: mods.txt in current directory
- Workshop directory: G:\SteamLibrary\steamapps\workshop\content\108600
- Output file: workshop_ids_out.txt in current directory
Usage examples:
python scripts/find_workshop_ids.py
python scripts/find_workshop_ids.py --mods-file d:\\7. Git\\tools\\mods.txt \
--workshop-dir G:\\SteamLibrary\\steamapps\\workshop\\content\\108600 \
--output workshop_ids_out.txt
"""
from __future__ import annotations
import argparse
import csv
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Iterable, List, Optional, Tuple
DEFAULT_WORKSHOP_DIR = Path(r"G:\SteamLibrary\steamapps\workshop\content\108600")
@dataclass(frozen=True)
class ModRecord:
workshop_id: str
mod_id: Optional[str]
mod_name: Optional[str]
source_path: Path
def normalize(text: str) -> str:
"""Lowercase and strip all non-alphanumeric characters for fuzzy comparisons."""
return re.sub(r"[^a-z0-9]+", "", text.lower())
def parse_mods_list(mods_text: str) -> List[str]:
"""Split by semicolons and newlines, strip whitespace, and drop empty entries."""
raw_tokens = re.split(r"[;\n\r]+", mods_text)
tokens: List[str] = []
for token in raw_tokens:
trimmed = token.strip()
if trimmed:
tokens.append(trimmed)
return tokens
def read_text_file(path: Path) -> str:
try:
return path.read_text(encoding="utf-8", errors="ignore")
except Exception:
return ""
def parse_mod_info(mod_info_text: str) -> Tuple[List[str], Optional[str]]:
"""
Extract mod IDs and mod name from a mod.info file.
- IDs may appear as `id=SomeId` and may contain multiple separated by commas/semicolons.
- Name appears as `name=Some Name`.
"""
ids: List[str] = []
name: Optional[str] = None
for line in mod_info_text.splitlines():
line = line.strip()
if not line or line.startswith("#"):
continue
if line.lower().startswith("id="):
value = line[3:].strip()
# Split on common separators for multiple IDs in one line
for part in re.split(r"[,;]", value):
part_trimmed = part.strip()
if part_trimmed:
ids.append(part_trimmed)
elif line.lower().startswith("name="):
value = line[5:].strip()
if value:
name = value
return ids, name
def parse_workshop_txt(workshop_txt: str) -> Optional[str]:
"""Extract a human-readable name from workshop.txt if present."""
for line in workshop_txt.splitlines():
line = line.strip()
if not line or line.startswith("#"):
continue
if line.lower().startswith("name="):
value = line[5:].strip()
if value:
return value
return None
def index_workshop(workshop_dir: Path) -> Tuple[Dict[str, List[ModRecord]], Dict[str, List[ModRecord]], Dict[str, List[ModRecord]]]:
"""
Walk the workshop directory and build two lookup indices:
- by_id: normalized mod id -> ModRecord list
- by_name: normalized mod name -> ModRecord list
- by_workshop: workshop id string -> ModRecord list
"""
by_id: Dict[str, List[ModRecord]] = {}
by_name: Dict[str, List[ModRecord]] = {}
by_workshop: Dict[str, List[ModRecord]] = {}
if not workshop_dir.exists() or not workshop_dir.is_dir():
return by_id, by_name, by_workshop
for child in workshop_dir.iterdir():
if not child.is_dir():
continue
if not child.name.isdigit():
continue
workshop_id = child.name
# Typical structure: <workshop_id>/mods/*/mod.info
mods_root = child / "mods"
mod_info_paths: List[Path] = []
if mods_root.exists() and mods_root.is_dir():
# mod.info may exist directly inside mods_root or nested one level down
for sub in mods_root.rglob("mod.info"):
if sub.is_file():
mod_info_paths.append(sub)
# Fall back to any mod.info anywhere inside the workshop item (less common)
if not mod_info_paths:
for sub in child.rglob("mod.info"):
if sub.is_file():
mod_info_paths.append(sub)
# Try to get workshop name from workshop.txt (optional)
workshop_name = None
workshop_txt_path = child / "workshop.txt"
if workshop_txt_path.exists():
workshop_name = parse_workshop_txt(read_text_file(workshop_txt_path))
# If no mod.info was found, still index by the workshop name to help matching
if not mod_info_paths and workshop_name:
record = ModRecord(workshop_id=workshop_id, mod_id=None, mod_name=workshop_name, source_path=workshop_txt_path)
key = normalize(workshop_name)
by_name.setdefault(key, []).append(record)
by_workshop.setdefault(workshop_id, []).append(record)
continue
for mod_info_path in mod_info_paths:
text = read_text_file(mod_info_path)
if not text:
continue
mod_ids, mod_name = parse_mod_info(text)
# Prefer mod.info name; fall back to workshop.txt name if absent
effective_name = mod_name or workshop_name
if mod_ids:
for mod_id in mod_ids:
record = ModRecord(
workshop_id=workshop_id,
mod_id=mod_id,
mod_name=effective_name,
source_path=mod_info_path,
)
by_id.setdefault(normalize(mod_id), []).append(record)
by_workshop.setdefault(workshop_id, []).append(record)
if effective_name:
record_for_name = ModRecord(
workshop_id=workshop_id,
mod_id=(mod_ids[0] if mod_ids else None),
mod_name=effective_name,
source_path=mod_info_path,
)
by_name.setdefault(normalize(effective_name), []).append(record_for_name)
return by_id, by_name, by_workshop
def try_extract_numeric_workshop_id(token: str) -> Optional[str]:
"""Return leading numeric id if the token looks like '<digits>/<anything>'."""
match = re.match(r"^(\d{6,})(?:/|\\).*$", token)
if match:
return match.group(1)
return None
def match_token(
token: str,
by_id: Dict[str, List[ModRecord]],
by_name: Dict[str, List[ModRecord]],
) -> Tuple[str, str, str, str]:
"""
Attempt to find a workshop id for the given token.
Returns tuple: (workshop_id_or_NOT_FOUND, match_type, matched_value, source_path)
"""
# 1) Direct numeric extraction
numeric = try_extract_numeric_workshop_id(token)
if numeric:
return numeric, "provided_numeric", numeric, ""
norm = normalize(token)
# 2) Exact id match
if norm in by_id and by_id[norm]:
record = by_id[norm][0]
return record.workshop_id, "mod_id", (record.mod_id or ""), str(record.source_path)
# 3) Exact name match
if norm in by_name and by_name[norm]:
record = by_name[norm][0]
return record.workshop_id, "mod_name", (record.mod_name or ""), str(record.source_path)
# 4) Heuristic: strip bracketed tags like [B42]
token_wo_brackets = re.sub(r"\[[^\]]*\]", "", token).strip()
if token_wo_brackets and token_wo_brackets != token:
norm2 = normalize(token_wo_brackets)
if norm2 in by_id and by_id[norm2]:
record = by_id[norm2][0]
return record.workshop_id, "mod_id_normalized", (record.mod_id or ""), str(record.source_path)
if norm2 in by_name and by_name[norm2]:
record = by_name[norm2][0]
return record.workshop_id, "mod_name_normalized", (record.mod_name or ""), str(record.source_path)
# Not found
return "NOT_FOUND", "no_match", "", ""
def write_output(
output_path: Path,
rows: Iterable[Tuple[str, str, str, str, str]],
mods_line: Optional[str] = None,
) -> None:
"""Write semicolon-separated output and optionally append a Mods= line at the end."""
with output_path.open("w", encoding="utf-8", newline="") as f:
writer = csv.writer(f, delimiter=";", lineterminator="\n", quoting=csv.QUOTE_MINIMAL)
writer.writerow(["input", "workshop_id", "match_type", "matched_value", "source_path"])
for row in rows:
writer.writerow(list(row))
if mods_line:
f.write("\n")
f.write(mods_line)
f.write("\n")
def main() -> int:
parser = argparse.ArgumentParser(description="Resolve Project Zomboid mod entries to Workshop IDs")
parser.add_argument(
"--mods-file",
type=Path,
default=Path("mods.txt"),
help="Path to the mods list file (semicolon-separated)",
)
parser.add_argument(
"--workshop-dir",
type=Path,
default=DEFAULT_WORKSHOP_DIR,
help="Path to the 108600 workshop content directory",
)
parser.add_argument(
"--output",
type=Path,
default=Path("workshop_ids_out.txt"),
help="Path to write the output mapping (CSV with semicolons)",
)
args = parser.parse_args()
mods_file: Path = args.mods_file
workshop_dir: Path = args.workshop_dir
output_path: Path = args.output
mods_text = read_text_file(mods_file)
if not mods_text:
print(f"Mods file not found or empty: {mods_file}")
return 2
tokens = parse_mods_list(mods_text)
if not tokens:
print(f"No entries found in mods file: {mods_file}")
return 2
by_id, by_name, by_workshop = index_workshop(workshop_dir)
if not by_id and not by_name:
print(f"No workshop items indexed under: {workshop_dir}")
# Continue anyway so provided numeric ids can still pass through
rows: List[Tuple[str, str, str, str, str]] = []
found = 0
for token in tokens:
workshop_id, match_type, matched_value, source_path = match_token(token, by_id, by_name)
if workshop_id != "NOT_FOUND":
found += 1
rows.append((token, workshop_id, match_type, matched_value, source_path))
# Build Mods= line
def extract_numeric_and_suffix(token_text: str) -> Tuple[Optional[str], Optional[str]]:
m = re.match(r"^(\d{6,})(?:[\\/](.*))?$", token_text)
if not m:
return None, None
return m.group(1), (m.group(2) or None)
mods_pairs: List[Tuple[str, str]] = [] # (workshop_id, mod_id)
seen: set[Tuple[str, str]] = set()
for token, workshop_id, match_type, matched_value, _source_path in rows:
if workshop_id == "NOT_FOUND":
continue
candidate_mod_ids: List[str] = []
if match_type in ("mod_id", "mod_id_normalized") and matched_value:
candidate_mod_ids = [matched_value]
else:
# Try to derive from the indexed records
records = by_workshop.get(workshop_id, [])
if match_type in ("mod_name", "mod_name_normalized") and matched_value:
nm = normalize(matched_value)
for rec in records:
if rec.mod_name and normalize(rec.mod_name) == nm and rec.mod_id:
candidate_mod_ids.append(rec.mod_id)
if not candidate_mod_ids:
num, suffix = extract_numeric_and_suffix(token)
if num == workshop_id and records:
if suffix:
suffix_norm = normalize(suffix)
# Try folder name or mod_id match
for rec in records:
folder_name = rec.source_path.parent.name if rec.source_path else ""
if rec.mod_id and (normalize(rec.mod_id) == suffix_norm or normalize(folder_name) == suffix_norm):
candidate_mod_ids.append(rec.mod_id)
break
# If still none, include all available mod_ids for this workshop
if not candidate_mod_ids:
for rec in records:
if rec.mod_id:
candidate_mod_ids.append(rec.mod_id)
# Fallback to first non-empty mod_id
if not candidate_mod_ids and by_workshop.get(workshop_id):
for rec in by_workshop[workshop_id]:
if rec.mod_id:
candidate_mod_ids.append(rec.mod_id)
break
for mod_id in candidate_mod_ids:
key = (workshop_id, mod_id)
if key not in seen:
seen.add(key)
mods_pairs.append(key)
mods_line = None
if mods_pairs:
mods_line = "Mods=" + ";".join([f"{wid}\\{mid}" for wid, mid in mods_pairs]) + ";"
output_path.parent.mkdir(parents=True, exist_ok=True)
write_output(output_path, rows, mods_line)
print(f"Resolved {found}/{len(tokens)} entries. Wrote: {output_path}")
return 0
if __name__ == "__main__":
raise SystemExit(main())