feature: webui, kobolcpp intergration memory, game module

2026-04-30 02:43:23 -04:00
parent 9da4259df1
commit 00f1ac4bef
40 changed files with 5651 additions and 1 deletions
@@ -0,0 +1,373 @@
+from __future__ import annotations
+
+import argparse
+import json
+import subprocess
+import sys
+import time
+import urllib.error
+import urllib.request
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+
+@dataclass
+class TestResult:
+    name: str
+    passed: bool
+    details: str
+
+
+def main() -> int:
+    args = parse_args()
+    repo_root = Path.cwd()
+    model_path = Path(args.model_path).expanduser().resolve()
+    koboldcpp_path = Path(args.koboldcpp_path).expanduser().resolve()
+
+    if not model_path.exists():
+        print(f"FAIL: model does not exist: {model_path}")
+        return 2
+    if not koboldcpp_path.exists():
+        print(f"FAIL: koboldcpp executable does not exist: {koboldcpp_path}")
+        return 2
+
+    logs_dir = repo_root / "data" / "logs"
+    logs_dir.mkdir(parents=True, exist_ok=True)
+    safe_name = model_path.stem.replace(" ", "_")
+    stdout_path = logs_dir / f"model-smoke-{safe_name}-ctx{args.context_size}.out.log"
+    stderr_path = logs_dir / f"model-smoke-{safe_name}-ctx{args.context_size}.err.log"
+
+    process: subprocess.Popen[Any] | None = None
+    try:
+        if args.stop_existing:
+            stop_existing_koboldcpp()
+
+        process = start_koboldcpp(
+            koboldcpp_path=koboldcpp_path,
+            model_path=model_path,
+            port=args.port,
+            context_size=args.context_size,
+            extra_args=args.kobold_arg,
+            stdout_path=stdout_path,
+            stderr_path=stderr_path,
+        )
+        base_url = f"http://127.0.0.1:{args.port}"
+        model_id = wait_for_model(base_url, process, args.startup_timeout_seconds)
+        print(f"Model ready: {model_id}")
+
+        results = [
+            run_dialogue_test(base_url, model_id),
+            run_auto_tool_test(base_url, model_id),
+            run_forced_tool_test(base_url, model_id),
+            run_agent_action_prompt_test(base_url, model_id),
+        ]
+
+        passed = all(result.passed for result in results)
+        print()
+        print("KoboldCpp model smoke test summary")
+        print(f"Model: {model_path}")
+        print(f"Context size: {args.context_size}")
+        print(f"KoboldCpp logs: {stdout_path} | {stderr_path}")
+        for result in results:
+            status = "PASS" if result.passed else "FAIL"
+            print(f"{status}: {result.name} - {result.details}")
+        print()
+        print("PASS" if passed else "FAIL")
+        return 0 if passed else 1
+    finally:
+        if process is not None and process.poll() is None and not args.keep_running:
+            process.terminate()
+            try:
+                process.wait(timeout=20)
+            except subprocess.TimeoutExpired:
+                process.kill()
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Start KoboldCpp with a GGUF and run LocalDiplomacy tool-calling smoke tests."
+    )
+    parser.add_argument("model_path", help="Path to the GGUF model to test.")
+    parser.add_argument("--context-size", type=int, default=40960, help="KoboldCpp context size.")
+    parser.add_argument("--koboldcpp-path", default="./koboldcpp.exe", help="Path to koboldcpp.exe.")
+    parser.add_argument("--port", type=int, default=5001, help="KoboldCpp HTTP port.")
+    parser.add_argument("--startup-timeout-seconds", type=float, default=240.0)
+    parser.add_argument("--request-timeout-seconds", type=float, default=180.0)
+    parser.add_argument("--keep-running", action="store_true", help="Leave the started KoboldCpp process running.")
+    parser.add_argument("--stop-existing", action="store_true", help="Stop existing koboldcpp.exe processes first.")
+    parser.add_argument(
+        "--kobold-arg",
+        action="append",
+        default=[],
+        help="Extra argument passed to KoboldCpp. Repeat for multiple args.",
+    )
+    return parser.parse_args()
+
+
+def start_koboldcpp(
+    *,
+    koboldcpp_path: Path,
+    model_path: Path,
+    port: int,
+    context_size: int,
+    extra_args: list[str],
+    stdout_path: Path,
+    stderr_path: Path,
+) -> subprocess.Popen[Any]:
+    command = [
+        str(koboldcpp_path),
+        "--model",
+        str(model_path),
+        "--port",
+        str(port),
+        "--contextsize",
+        str(context_size),
+        "--jinja",
+        "--jinjatools",
+        *extra_args,
+    ]
+    print("Starting KoboldCpp:")
+    print(" ".join(quote_arg(part) for part in command))
+    return subprocess.Popen(
+        command,
+        stdout=stdout_path.open("w", encoding="utf-8", errors="replace"),
+        stderr=stderr_path.open("w", encoding="utf-8", errors="replace"),
+        creationflags=subprocess.CREATE_NO_WINDOW if sys.platform == "win32" else 0,
+    )
+
+
+def wait_for_model(base_url: str, process: subprocess.Popen[Any], timeout_seconds: float) -> str:
+    deadline = time.monotonic() + timeout_seconds
+    last_error = ""
+    while time.monotonic() < deadline:
+        if process.poll() is not None:
+            raise RuntimeError(f"KoboldCpp exited early with code {process.returncode}.")
+        try:
+            data = get_json(f"{base_url}/v1/models", timeout=10)
+            models = data.get("data") or []
+            if models:
+                return str(models[0].get("id") or "local-model")
+        except Exception as exc:  # noqa: BLE001 - report the last startup error.
+            last_error = str(exc)
+        time.sleep(2)
+    raise TimeoutError(f"KoboldCpp did not become ready within {timeout_seconds}s. Last error: {last_error}")
+
+
+def run_dialogue_test(base_url: str, model_id: str) -> TestResult:
+    data = chat(
+        base_url,
+        {
+            "model": model_id,
+            "messages": [
+                {
+                    "role": "system",
+                    "content": "/no_think You are Derthert, a Bannerlord lord. Answer in one sentence. Never expose reasoning.",
+                },
+                {"role": "user", "content": "/no_think Greetings. What news from the border?"},
+            ],
+            "temperature": 0.3,
+            "max_tokens": 120,
+        },
+    )
+    content = first_message(data).get("content") or ""
+    bad_markers = ["<think>", "</think>", '"arguments"', "<tool_call"]
+    passed = bool(content.strip()) and not any(marker in content for marker in bad_markers)
+    return TestResult("dialogue_no_visible_reasoning", passed, truncate(content.strip()))
+
+
+def run_auto_tool_test(base_url: str, model_id: str) -> TestResult:
+    data = chat(
+        base_url,
+        {
+            "model": model_id,
+            "messages": [
+                {
+                    "role": "system",
+                    "content": "/no_think Use tools through the API when a game action is needed. Never expose reasoning.",
+                },
+                {"role": "user", "content": "/no_think Derthert should propose peace with Battania."},
+            ],
+            "tools": [action_tool_schema(["propose_peace", "declare_war"])],
+            "tool_choice": "auto",
+            "temperature": 0.0,
+            "max_tokens": 300,
+        },
+    )
+    return validate_tool_call("auto_tool_call", data)
+
+
+def run_forced_tool_test(base_url: str, model_id: str) -> TestResult:
+    data = chat(
+        base_url,
+        {
+            "model": model_id,
+            "messages": [
+                {
+                    "role": "system",
+                    "content": "/no_think Use tools through the API when a game action is needed. Never expose reasoning.",
+                },
+                {
+                    "role": "user",
+                    "content": (
+                        "/no_think Use propose_game_action now: action_type propose_peace, "
+                        "actor_id lord_derthert, target_id kingdom_battania, reason end the border raids."
+                    ),
+                },
+            ],
+            "tools": [action_tool_schema(["accept_peace", "propose_peace", "reject_peace"])],
+            "tool_choice": {"type": "function", "function": {"name": "propose_game_action"}},
+            "temperature": 0.0,
+            "max_tokens": 300,
+        },
+    )
+    return validate_tool_call("forced_tool_call", data)
+
+
+def run_agent_action_prompt_test(base_url: str, model_id: str) -> TestResult:
+    context = {
+        "campaign_id": "mock-campaign",
+        "player_id": "player",
+        "npc_id": "lord_derthert",
+        "npc_name": "Derthert",
+        "npc_kingdom_id": "kingdom_vlandia",
+        "player_kingdom_id": "kingdom_vlandia",
+        "kingdom_state": {"wars": [{"enemy_kingdom_id": "kingdom_battania", "days": 42}], "war_fatigue": 0.67},
+        "nearby_parties": [{"id": "party_battania_raiders", "faction_id": "kingdom_battania"}],
+        "nearby_settlements": [{"id": "town_sargot", "owner_kingdom_id": "kingdom_vlandia"}],
+        "recent_events": [{"id": "event_border_raids", "summary": "Border raids have strained Vlandia and Battania."}],
+    }
+    data = chat(
+        base_url,
+        {
+            "model": model_id,
+            "messages": [
+                {
+                    "role": "system",
+                    "content": (
+                        "/no_think You are LocalDiplomacy's Bannerlord action planner. "
+                        "Call the provided tool through the API. Do not answer in prose. Use only IDs present in the request."
+                    ),
+                },
+                {
+                    "role": "user",
+                    "content": (
+                        "/no_think "
+                        f"Context: {json.dumps(context, ensure_ascii=False)}\n"
+                        "Command: Use propose_game_action now: action_type propose_peace, actor_id lord_derthert, "
+                        "target_id kingdom_battania, reason end the border raids."
+                    ),
+                },
+            ],
+            "tools": [action_tool_schema(["accept_peace", "propose_peace", "reject_peace"])],
+            "tool_choice": {"type": "function", "function": {"name": "propose_game_action"}},
+            "temperature": 0.0,
+            "max_tokens": 400,
+        },
+    )
+    return validate_tool_call("agent_action_prompt_tool_call", data)
+
+
+def action_tool_schema(action_types: list[str]) -> dict[str, Any]:
+    return {
+        "type": "function",
+        "function": {
+            "name": "propose_game_action",
+            "description": "Queue one Bannerlord game action proposal for mod-side validation.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "action_type": {"type": "string", "enum": action_types},
+                    "actor_id": {"type": "string"},
+                    "target_id": {"type": "string"},
+                    "args": {"type": "object"},
+                    "confidence": {"type": "number", "minimum": 0, "maximum": 1},
+                    "reason": {"type": "string"},
+                    "requires_player_confirmation": {"type": "boolean"},
+                },
+                "required": ["action_type", "actor_id", "target_id", "reason"],
+            },
+        },
+    }
+
+
+def validate_tool_call(name: str, data: dict[str, Any]) -> TestResult:
+    message = first_message(data)
+    tool_calls = message.get("tool_calls") or []
+    if not tool_calls:
+        return TestResult(name, False, f"no tool_calls; content={truncate(str(message.get('content') or ''))}")
+
+    function = (tool_calls[0] or {}).get("function") or {}
+    arguments_raw = function.get("arguments") or "{}"
+    try:
+        arguments = json.loads(arguments_raw)
+    except json.JSONDecodeError:
+        return TestResult(name, False, f"invalid arguments JSON: {truncate(arguments_raw)}")
+
+    passed = (
+        function.get("name") == "propose_game_action"
+        and arguments.get("action_type") == "propose_peace"
+        and bool(arguments.get("actor_id"))
+        and bool(arguments.get("target_id"))
+    )
+    return TestResult(name, passed, json.dumps(arguments, ensure_ascii=False))
+
+
+def chat(base_url: str, payload: dict[str, Any]) -> dict[str, Any]:
+    return post_json(f"{base_url}/v1/chat/completions", payload, timeout=180)
+
+
+def first_message(data: dict[str, Any]) -> dict[str, Any]:
+    choices = data.get("choices") or [{}]
+    choice = choices[0] or {}
+    return choice.get("message") or {}
+
+
+def get_json(url: str, timeout: float) -> dict[str, Any]:
+    with urllib.request.urlopen(url, timeout=timeout) as response:
+        return json.loads(response.read().decode("utf-8"))
+
+
+def post_json(url: str, payload: dict[str, Any], timeout: float) -> dict[str, Any]:
+    body = json.dumps(payload).encode("utf-8")
+    request = urllib.request.Request(
+        url,
+        data=body,
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(request, timeout=timeout) as response:
+            return json.loads(response.read().decode("utf-8"))
+    except urllib.error.HTTPError as exc:
+        error_body = exc.read().decode("utf-8", errors="replace")
+        raise RuntimeError(f"HTTP {exc.code}: {error_body}") from exc
+
+
+def stop_existing_koboldcpp() -> None:
+    if sys.platform != "win32":
+        return
+    subprocess.run(
+        [
+            "powershell",
+            "-NoProfile",
+            "-Command",
+            "Get-Process koboldcpp -ErrorAction SilentlyContinue | Stop-Process -Force",
+        ],
+        check=False,
+    )
+
+
+def quote_arg(value: str) -> str:
+    if " " in value or "\t" in value:
+        return f'"{value}"'
+    return value
+
+
+def truncate(value: str, limit: int = 220) -> str:
+    value = " ".join(value.split())
+    return value if len(value) <= limit else f"{value[: limit - 3]}..."
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())