feat: add cornerstone
This commit is contained in:
@@ -0,0 +1,159 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from html.parser import HTMLParser
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
|
||||
class CornerstoneError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
class CornerstoneClient:
|
||||
def __init__(self, base_url: str = "https://finder.cstone.space") -> None:
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self._items: list[dict[str, Any]] | None = None
|
||||
|
||||
async def list_items(self) -> list[dict[str, Any]]:
|
||||
if self._items is not None:
|
||||
return self._items
|
||||
body = await self._get_json("GetSearch")
|
||||
if isinstance(body, str):
|
||||
body = json.loads(body)
|
||||
if not isinstance(body, list):
|
||||
raise CornerstoneError("Cornerstone search response was not a list.")
|
||||
self._items = [
|
||||
{"id": item.get("id"), "name": item.get("name"), "sold": bool(item.get("Sold"))}
|
||||
for item in body
|
||||
if isinstance(item, dict) and item.get("id") and item.get("name")
|
||||
]
|
||||
return self._items
|
||||
|
||||
async def get_item_page(self, item_id: str) -> dict[str, Any]:
|
||||
async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
|
||||
response = await client.get(
|
||||
f"{self.base_url}/Search/{item_id.strip()}",
|
||||
headers={"Accept": "text/html,application/xhtml+xml"},
|
||||
)
|
||||
if response.status_code >= 400:
|
||||
raise CornerstoneError(f"Cornerstone HTTP {response.status_code}: {response.text[:240]}")
|
||||
return {"url": str(response.url), "html": response.text}
|
||||
|
||||
async def _get_json(self, path: str) -> Any:
|
||||
async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client:
|
||||
response = await client.get(f"{self.base_url}/{path.lstrip('/')}", headers={"Accept": "application/json"})
|
||||
try:
|
||||
body = response.json()
|
||||
except ValueError as exc:
|
||||
raise CornerstoneError(f"Cornerstone returned non-JSON response: HTTP {response.status_code}") from exc
|
||||
if response.status_code >= 400:
|
||||
raise CornerstoneError(f"Cornerstone HTTP {response.status_code}: {body}")
|
||||
return body
|
||||
|
||||
|
||||
class CornerstonePageParser(HTMLParser):
|
||||
def __init__(self) -> None:
|
||||
super().__init__(convert_charrefs=True)
|
||||
self.title = ""
|
||||
self.tables: list[list[list[str]]] = []
|
||||
self._skip_depth = 0
|
||||
self._in_title = False
|
||||
self._current_table: list[list[str]] | None = None
|
||||
self._current_row: list[str] | None = None
|
||||
self._current_cell: list[str] | None = None
|
||||
|
||||
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
||||
tag = tag.casefold()
|
||||
if tag in {"script", "style"}:
|
||||
self._skip_depth += 1
|
||||
return
|
||||
if self._skip_depth:
|
||||
return
|
||||
if tag == "title":
|
||||
self._in_title = True
|
||||
elif tag == "table":
|
||||
self._current_table = []
|
||||
elif tag == "tr" and self._current_table is not None:
|
||||
self._current_row = []
|
||||
elif tag in {"td", "th"} and self._current_row is not None:
|
||||
self._current_cell = []
|
||||
|
||||
def handle_endtag(self, tag: str) -> None:
|
||||
tag = tag.casefold()
|
||||
if tag in {"script", "style"} and self._skip_depth:
|
||||
self._skip_depth -= 1
|
||||
return
|
||||
if self._skip_depth:
|
||||
return
|
||||
if tag == "title":
|
||||
self._in_title = False
|
||||
elif tag in {"td", "th"} and self._current_cell is not None and self._current_row is not None:
|
||||
text = " ".join("".join(self._current_cell).split())
|
||||
self._current_row.append(text)
|
||||
self._current_cell = None
|
||||
elif tag == "tr" and self._current_row is not None and self._current_table is not None:
|
||||
if any(cell for cell in self._current_row):
|
||||
self._current_table.append(self._current_row)
|
||||
self._current_row = None
|
||||
elif tag == "table" and self._current_table is not None:
|
||||
if self._current_table:
|
||||
self.tables.append(self._current_table)
|
||||
self._current_table = None
|
||||
|
||||
def handle_data(self, data: str) -> None:
|
||||
if self._skip_depth:
|
||||
return
|
||||
if self._in_title:
|
||||
self.title += data
|
||||
if self._current_cell is not None:
|
||||
self._current_cell.append(data)
|
||||
|
||||
|
||||
def parse_cornerstone_item_page(html: str) -> dict[str, Any]:
|
||||
parser = CornerstonePageParser()
|
||||
parser.feed(html)
|
||||
info: dict[str, Any] = {"page_title": " ".join(parser.title.split())}
|
||||
general: dict[str, str] = {}
|
||||
locations = []
|
||||
|
||||
for table in parser.tables:
|
||||
if not table:
|
||||
continue
|
||||
header = [cell.casefold() for cell in table[0]]
|
||||
if len(header) >= 3 and "location" in header[0] and "price" in header[1] and "verified" in header[2]:
|
||||
for row in table[1:]:
|
||||
if len(row) < 3:
|
||||
continue
|
||||
locations.append(
|
||||
{
|
||||
"location": row[0],
|
||||
"base_price": _parse_cornerstone_price(row[1]),
|
||||
"base_price_display": row[1],
|
||||
"verified": row[2],
|
||||
}
|
||||
)
|
||||
elif all(len(row) >= 2 for row in table):
|
||||
for row in table:
|
||||
key = row[0].strip().lower().replace(" ", "_")
|
||||
value = row[1].strip()
|
||||
if key and value and key not in general:
|
||||
general[key] = value
|
||||
|
||||
info["name"] = general.get("name") or _name_from_title(info["page_title"])
|
||||
if general:
|
||||
info["general"] = general
|
||||
info["locations"] = locations
|
||||
return info
|
||||
|
||||
|
||||
def _parse_cornerstone_price(value: str) -> int | None:
|
||||
digits = "".join(char for char in value if char.isdigit())
|
||||
return int(digits) if digits else None
|
||||
|
||||
|
||||
def _name_from_title(title: str) -> str | None:
|
||||
if " - " not in title:
|
||||
return title or None
|
||||
return title.rsplit(" - ", 1)[-1].strip() or None
|
||||
Reference in New Issue
Block a user