"""Direct Wikidata lookup helpers.""" from __future__ import annotations import os from datetime import datetime, timezone from typing import Any, Optional import httpx WIKIDATA_TIMEOUT = float(os.getenv("ATLAS_WIKIDATA_TIMEOUT", "10")) WIKIDATA_USER_AGENT = os.getenv( "ATLAS_WIKIDATA_USER_AGENT", "Atlas/1.0 (contact: lukas.goldschmidt+atlas@googlemail.com)", ) async def lookup_wikidata(subject: str) -> Optional[dict[str, Any]]: term = (subject or "").strip() if not term: return None async with httpx.AsyncClient(timeout=WIKIDATA_TIMEOUT, follow_redirects=True) as client: search = await client.get( "https://www.wikidata.org/w/api.php", params={ "action": "wbsearchentities", "search": term, "language": "en", "format": "json", "limit": 1, }, headers={"Accept": "application/json", "User-Agent": WIKIDATA_USER_AGENT}, ) if search.status_code >= 400: return None payload = search.json() results = payload.get("search") or [] if not results: return None top = results[0] qid = top.get("id") if not qid: return None entity = await client.get( f"https://www.wikidata.org/wiki/Special:EntityData/{qid}.json", params={"flavor": "dump"}, headers={"Accept": "application/json", "User-Agent": WIKIDATA_USER_AGENT}, ) if entity.status_code >= 400: return None entity_payload = entity.json() return { "qid": qid, "label": top.get("label") or term, "description": top.get("description"), "entity": entity_payload.get("entities", {}).get(qid, {}), "source": "wikidata", "wikidata_status": "hit", "retrieved_at": datetime.now(timezone.utc).isoformat(), } async def fetch_wikidata_entity(qid: str) -> Optional[dict[str, Any]]: qid = (qid or "").strip() if not qid: return None async with httpx.AsyncClient(timeout=WIKIDATA_TIMEOUT, follow_redirects=True) as client: entity = await client.get( f"https://www.wikidata.org/wiki/Special:EntityData/{qid}.json", params={"flavor": "dump"}, headers={"Accept": "application/json", "User-Agent": WIKIDATA_USER_AGENT}, ) if entity.status_code >= 400: return None entity_payload = entity.json() entity_block = entity_payload.get("entities", {}).get(qid, {}) label = None description = None if isinstance(entity_block, dict): labels = entity_block.get("labels", {}) descriptions = entity_block.get("descriptions", {}) label = (labels.get("en") or {}).get("value") if isinstance(labels, dict) else None description = (descriptions.get("en") or {}).get("value") if isinstance(descriptions, dict) else None return { "qid": qid, "entity": entity_block, "label": label, "description": description, "source": "wikidata", "wikidata_status": "enriched", "retrieved_at": datetime.now(timezone.utc).isoformat(), }