feature: webui, kobolcpp intergration memory, game module
This commit is contained in:
@@ -0,0 +1,373 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestResult:
|
||||
name: str
|
||||
passed: bool
|
||||
details: str
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
repo_root = Path.cwd()
|
||||
model_path = Path(args.model_path).expanduser().resolve()
|
||||
koboldcpp_path = Path(args.koboldcpp_path).expanduser().resolve()
|
||||
|
||||
if not model_path.exists():
|
||||
print(f"FAIL: model does not exist: {model_path}")
|
||||
return 2
|
||||
if not koboldcpp_path.exists():
|
||||
print(f"FAIL: koboldcpp executable does not exist: {koboldcpp_path}")
|
||||
return 2
|
||||
|
||||
logs_dir = repo_root / "data" / "logs"
|
||||
logs_dir.mkdir(parents=True, exist_ok=True)
|
||||
safe_name = model_path.stem.replace(" ", "_")
|
||||
stdout_path = logs_dir / f"model-smoke-{safe_name}-ctx{args.context_size}.out.log"
|
||||
stderr_path = logs_dir / f"model-smoke-{safe_name}-ctx{args.context_size}.err.log"
|
||||
|
||||
process: subprocess.Popen[Any] | None = None
|
||||
try:
|
||||
if args.stop_existing:
|
||||
stop_existing_koboldcpp()
|
||||
|
||||
process = start_koboldcpp(
|
||||
koboldcpp_path=koboldcpp_path,
|
||||
model_path=model_path,
|
||||
port=args.port,
|
||||
context_size=args.context_size,
|
||||
extra_args=args.kobold_arg,
|
||||
stdout_path=stdout_path,
|
||||
stderr_path=stderr_path,
|
||||
)
|
||||
base_url = f"http://127.0.0.1:{args.port}"
|
||||
model_id = wait_for_model(base_url, process, args.startup_timeout_seconds)
|
||||
print(f"Model ready: {model_id}")
|
||||
|
||||
results = [
|
||||
run_dialogue_test(base_url, model_id),
|
||||
run_auto_tool_test(base_url, model_id),
|
||||
run_forced_tool_test(base_url, model_id),
|
||||
run_agent_action_prompt_test(base_url, model_id),
|
||||
]
|
||||
|
||||
passed = all(result.passed for result in results)
|
||||
print()
|
||||
print("KoboldCpp model smoke test summary")
|
||||
print(f"Model: {model_path}")
|
||||
print(f"Context size: {args.context_size}")
|
||||
print(f"KoboldCpp logs: {stdout_path} | {stderr_path}")
|
||||
for result in results:
|
||||
status = "PASS" if result.passed else "FAIL"
|
||||
print(f"{status}: {result.name} - {result.details}")
|
||||
print()
|
||||
print("PASS" if passed else "FAIL")
|
||||
return 0 if passed else 1
|
||||
finally:
|
||||
if process is not None and process.poll() is None and not args.keep_running:
|
||||
process.terminate()
|
||||
try:
|
||||
process.wait(timeout=20)
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Start KoboldCpp with a GGUF and run LocalDiplomacy tool-calling smoke tests."
|
||||
)
|
||||
parser.add_argument("model_path", help="Path to the GGUF model to test.")
|
||||
parser.add_argument("--context-size", type=int, default=40960, help="KoboldCpp context size.")
|
||||
parser.add_argument("--koboldcpp-path", default="./koboldcpp.exe", help="Path to koboldcpp.exe.")
|
||||
parser.add_argument("--port", type=int, default=5001, help="KoboldCpp HTTP port.")
|
||||
parser.add_argument("--startup-timeout-seconds", type=float, default=240.0)
|
||||
parser.add_argument("--request-timeout-seconds", type=float, default=180.0)
|
||||
parser.add_argument("--keep-running", action="store_true", help="Leave the started KoboldCpp process running.")
|
||||
parser.add_argument("--stop-existing", action="store_true", help="Stop existing koboldcpp.exe processes first.")
|
||||
parser.add_argument(
|
||||
"--kobold-arg",
|
||||
action="append",
|
||||
default=[],
|
||||
help="Extra argument passed to KoboldCpp. Repeat for multiple args.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def start_koboldcpp(
|
||||
*,
|
||||
koboldcpp_path: Path,
|
||||
model_path: Path,
|
||||
port: int,
|
||||
context_size: int,
|
||||
extra_args: list[str],
|
||||
stdout_path: Path,
|
||||
stderr_path: Path,
|
||||
) -> subprocess.Popen[Any]:
|
||||
command = [
|
||||
str(koboldcpp_path),
|
||||
"--model",
|
||||
str(model_path),
|
||||
"--port",
|
||||
str(port),
|
||||
"--contextsize",
|
||||
str(context_size),
|
||||
"--jinja",
|
||||
"--jinjatools",
|
||||
*extra_args,
|
||||
]
|
||||
print("Starting KoboldCpp:")
|
||||
print(" ".join(quote_arg(part) for part in command))
|
||||
return subprocess.Popen(
|
||||
command,
|
||||
stdout=stdout_path.open("w", encoding="utf-8", errors="replace"),
|
||||
stderr=stderr_path.open("w", encoding="utf-8", errors="replace"),
|
||||
creationflags=subprocess.CREATE_NO_WINDOW if sys.platform == "win32" else 0,
|
||||
)
|
||||
|
||||
|
||||
def wait_for_model(base_url: str, process: subprocess.Popen[Any], timeout_seconds: float) -> str:
|
||||
deadline = time.monotonic() + timeout_seconds
|
||||
last_error = ""
|
||||
while time.monotonic() < deadline:
|
||||
if process.poll() is not None:
|
||||
raise RuntimeError(f"KoboldCpp exited early with code {process.returncode}.")
|
||||
try:
|
||||
data = get_json(f"{base_url}/v1/models", timeout=10)
|
||||
models = data.get("data") or []
|
||||
if models:
|
||||
return str(models[0].get("id") or "local-model")
|
||||
except Exception as exc: # noqa: BLE001 - report the last startup error.
|
||||
last_error = str(exc)
|
||||
time.sleep(2)
|
||||
raise TimeoutError(f"KoboldCpp did not become ready within {timeout_seconds}s. Last error: {last_error}")
|
||||
|
||||
|
||||
def run_dialogue_test(base_url: str, model_id: str) -> TestResult:
|
||||
data = chat(
|
||||
base_url,
|
||||
{
|
||||
"model": model_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "/no_think You are Derthert, a Bannerlord lord. Answer in one sentence. Never expose reasoning.",
|
||||
},
|
||||
{"role": "user", "content": "/no_think Greetings. What news from the border?"},
|
||||
],
|
||||
"temperature": 0.3,
|
||||
"max_tokens": 120,
|
||||
},
|
||||
)
|
||||
content = first_message(data).get("content") or ""
|
||||
bad_markers = ["<think>", "</think>", '"arguments"', "<tool_call"]
|
||||
passed = bool(content.strip()) and not any(marker in content for marker in bad_markers)
|
||||
return TestResult("dialogue_no_visible_reasoning", passed, truncate(content.strip()))
|
||||
|
||||
|
||||
def run_auto_tool_test(base_url: str, model_id: str) -> TestResult:
|
||||
data = chat(
|
||||
base_url,
|
||||
{
|
||||
"model": model_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "/no_think Use tools through the API when a game action is needed. Never expose reasoning.",
|
||||
},
|
||||
{"role": "user", "content": "/no_think Derthert should propose peace with Battania."},
|
||||
],
|
||||
"tools": [action_tool_schema(["propose_peace", "declare_war"])],
|
||||
"tool_choice": "auto",
|
||||
"temperature": 0.0,
|
||||
"max_tokens": 300,
|
||||
},
|
||||
)
|
||||
return validate_tool_call("auto_tool_call", data)
|
||||
|
||||
|
||||
def run_forced_tool_test(base_url: str, model_id: str) -> TestResult:
|
||||
data = chat(
|
||||
base_url,
|
||||
{
|
||||
"model": model_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "/no_think Use tools through the API when a game action is needed. Never expose reasoning.",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"/no_think Use propose_game_action now: action_type propose_peace, "
|
||||
"actor_id lord_derthert, target_id kingdom_battania, reason end the border raids."
|
||||
),
|
||||
},
|
||||
],
|
||||
"tools": [action_tool_schema(["accept_peace", "propose_peace", "reject_peace"])],
|
||||
"tool_choice": {"type": "function", "function": {"name": "propose_game_action"}},
|
||||
"temperature": 0.0,
|
||||
"max_tokens": 300,
|
||||
},
|
||||
)
|
||||
return validate_tool_call("forced_tool_call", data)
|
||||
|
||||
|
||||
def run_agent_action_prompt_test(base_url: str, model_id: str) -> TestResult:
|
||||
context = {
|
||||
"campaign_id": "mock-campaign",
|
||||
"player_id": "player",
|
||||
"npc_id": "lord_derthert",
|
||||
"npc_name": "Derthert",
|
||||
"npc_kingdom_id": "kingdom_vlandia",
|
||||
"player_kingdom_id": "kingdom_vlandia",
|
||||
"kingdom_state": {"wars": [{"enemy_kingdom_id": "kingdom_battania", "days": 42}], "war_fatigue": 0.67},
|
||||
"nearby_parties": [{"id": "party_battania_raiders", "faction_id": "kingdom_battania"}],
|
||||
"nearby_settlements": [{"id": "town_sargot", "owner_kingdom_id": "kingdom_vlandia"}],
|
||||
"recent_events": [{"id": "event_border_raids", "summary": "Border raids have strained Vlandia and Battania."}],
|
||||
}
|
||||
data = chat(
|
||||
base_url,
|
||||
{
|
||||
"model": model_id,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"/no_think You are LocalDiplomacy's Bannerlord action planner. "
|
||||
"Call the provided tool through the API. Do not answer in prose. Use only IDs present in the request."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"/no_think "
|
||||
f"Context: {json.dumps(context, ensure_ascii=False)}\n"
|
||||
"Command: Use propose_game_action now: action_type propose_peace, actor_id lord_derthert, "
|
||||
"target_id kingdom_battania, reason end the border raids."
|
||||
),
|
||||
},
|
||||
],
|
||||
"tools": [action_tool_schema(["accept_peace", "propose_peace", "reject_peace"])],
|
||||
"tool_choice": {"type": "function", "function": {"name": "propose_game_action"}},
|
||||
"temperature": 0.0,
|
||||
"max_tokens": 400,
|
||||
},
|
||||
)
|
||||
return validate_tool_call("agent_action_prompt_tool_call", data)
|
||||
|
||||
|
||||
def action_tool_schema(action_types: list[str]) -> dict[str, Any]:
|
||||
return {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "propose_game_action",
|
||||
"description": "Queue one Bannerlord game action proposal for mod-side validation.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action_type": {"type": "string", "enum": action_types},
|
||||
"actor_id": {"type": "string"},
|
||||
"target_id": {"type": "string"},
|
||||
"args": {"type": "object"},
|
||||
"confidence": {"type": "number", "minimum": 0, "maximum": 1},
|
||||
"reason": {"type": "string"},
|
||||
"requires_player_confirmation": {"type": "boolean"},
|
||||
},
|
||||
"required": ["action_type", "actor_id", "target_id", "reason"],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def validate_tool_call(name: str, data: dict[str, Any]) -> TestResult:
|
||||
message = first_message(data)
|
||||
tool_calls = message.get("tool_calls") or []
|
||||
if not tool_calls:
|
||||
return TestResult(name, False, f"no tool_calls; content={truncate(str(message.get('content') or ''))}")
|
||||
|
||||
function = (tool_calls[0] or {}).get("function") or {}
|
||||
arguments_raw = function.get("arguments") or "{}"
|
||||
try:
|
||||
arguments = json.loads(arguments_raw)
|
||||
except json.JSONDecodeError:
|
||||
return TestResult(name, False, f"invalid arguments JSON: {truncate(arguments_raw)}")
|
||||
|
||||
passed = (
|
||||
function.get("name") == "propose_game_action"
|
||||
and arguments.get("action_type") == "propose_peace"
|
||||
and bool(arguments.get("actor_id"))
|
||||
and bool(arguments.get("target_id"))
|
||||
)
|
||||
return TestResult(name, passed, json.dumps(arguments, ensure_ascii=False))
|
||||
|
||||
|
||||
def chat(base_url: str, payload: dict[str, Any]) -> dict[str, Any]:
|
||||
return post_json(f"{base_url}/v1/chat/completions", payload, timeout=180)
|
||||
|
||||
|
||||
def first_message(data: dict[str, Any]) -> dict[str, Any]:
|
||||
choices = data.get("choices") or [{}]
|
||||
choice = choices[0] or {}
|
||||
return choice.get("message") or {}
|
||||
|
||||
|
||||
def get_json(url: str, timeout: float) -> dict[str, Any]:
|
||||
with urllib.request.urlopen(url, timeout=timeout) as response:
|
||||
return json.loads(response.read().decode("utf-8"))
|
||||
|
||||
|
||||
def post_json(url: str, payload: dict[str, Any], timeout: float) -> dict[str, Any]:
|
||||
body = json.dumps(payload).encode("utf-8")
|
||||
request = urllib.request.Request(
|
||||
url,
|
||||
data=body,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=timeout) as response:
|
||||
return json.loads(response.read().decode("utf-8"))
|
||||
except urllib.error.HTTPError as exc:
|
||||
error_body = exc.read().decode("utf-8", errors="replace")
|
||||
raise RuntimeError(f"HTTP {exc.code}: {error_body}") from exc
|
||||
|
||||
|
||||
def stop_existing_koboldcpp() -> None:
|
||||
if sys.platform != "win32":
|
||||
return
|
||||
subprocess.run(
|
||||
[
|
||||
"powershell",
|
||||
"-NoProfile",
|
||||
"-Command",
|
||||
"Get-Process koboldcpp -ErrorAction SilentlyContinue | Stop-Process -Force",
|
||||
],
|
||||
check=False,
|
||||
)
|
||||
|
||||
|
||||
def quote_arg(value: str) -> str:
|
||||
if " " in value or "\t" in value:
|
||||
return f'"{value}"'
|
||||
return value
|
||||
|
||||
|
||||
def truncate(value: str, limit: int = 220) -> str:
|
||||
value = " ".join(value.split())
|
||||
return value if len(value) <= limit else f"{value[: limit - 3]}..."
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user