| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- """Direct Wikidata lookup helpers."""
- from __future__ import annotations
- import os
- from datetime import datetime, timezone
- from typing import Any, Optional
- import httpx
- WIKIDATA_TIMEOUT = float(os.getenv("ATLAS_WIKIDATA_TIMEOUT", "10"))
- WIKIDATA_USER_AGENT = os.getenv(
- "ATLAS_WIKIDATA_USER_AGENT",
- "Atlas/1.0 (contact: lukas.goldschmidt+atlas@googlemail.com)",
- )
- async def lookup_wikidata(subject: str) -> Optional[dict[str, Any]]:
- term = (subject or "").strip()
- if not term:
- return None
- async with httpx.AsyncClient(timeout=WIKIDATA_TIMEOUT, follow_redirects=True) as client:
- search = await client.get(
- "https://www.wikidata.org/w/api.php",
- params={
- "action": "wbsearchentities",
- "search": term,
- "language": "en",
- "format": "json",
- "limit": 1,
- },
- headers={"Accept": "application/json", "User-Agent": WIKIDATA_USER_AGENT},
- )
- if search.status_code >= 400:
- return None
- payload = search.json()
- results = payload.get("search") or []
- if not results:
- return None
- top = results[0]
- qid = top.get("id")
- if not qid:
- return None
- entity = await client.get(
- f"https://www.wikidata.org/wiki/Special:EntityData/{qid}.json",
- params={"flavor": "dump"},
- headers={"Accept": "application/json", "User-Agent": WIKIDATA_USER_AGENT},
- )
- if entity.status_code >= 400:
- return None
- entity_payload = entity.json()
- return {
- "qid": qid,
- "label": top.get("label") or term,
- "description": top.get("description"),
- "entity": entity_payload.get("entities", {}).get(qid, {}),
- "source": "wikidata",
- "retrieved_at": datetime.now(timezone.utc).isoformat(),
- }
|