| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- """Direct Wikidata lookup helpers."""
- from __future__ import annotations
- import os
- from datetime import datetime, timezone
- from typing import Any, Optional
- import httpx
- WIKIDATA_TIMEOUT = float(os.getenv("ATLAS_WIKIDATA_TIMEOUT", "10"))
- WIKIDATA_USER_AGENT = os.getenv(
- "ATLAS_WIKIDATA_USER_AGENT",
- "Atlas/1.0 (contact: lukas.goldschmidt+atlas@googlemail.com)",
- )
- async def lookup_wikidata(subject: str) -> Optional[dict[str, Any]]:
- term = (subject or "").strip()
- if not term:
- return None
- async with httpx.AsyncClient(timeout=WIKIDATA_TIMEOUT, follow_redirects=True) as client:
- search = await client.get(
- "https://www.wikidata.org/w/api.php",
- params={
- "action": "wbsearchentities",
- "search": term,
- "language": "en",
- "format": "json",
- "limit": 1,
- },
- headers={"Accept": "application/json", "User-Agent": WIKIDATA_USER_AGENT},
- )
- if search.status_code >= 400:
- return None
- payload = search.json()
- results = payload.get("search") or []
- if not results:
- return None
- top = results[0]
- qid = top.get("id")
- if not qid:
- return None
- entity = await client.get(
- f"https://www.wikidata.org/wiki/Special:EntityData/{qid}.json",
- params={"flavor": "dump"},
- headers={"Accept": "application/json", "User-Agent": WIKIDATA_USER_AGENT},
- )
- if entity.status_code >= 400:
- return None
- entity_payload = entity.json()
- return {
- "qid": qid,
- "label": top.get("label") or term,
- "description": top.get("description"),
- "entity": entity_payload.get("entities", {}).get(qid, {}),
- "source": "wikidata",
- "wikidata_status": "hit",
- "retrieved_at": datetime.now(timezone.utc).isoformat(),
- }
- async def fetch_wikidata_entity(qid: str) -> Optional[dict[str, Any]]:
- qid = (qid or "").strip()
- if not qid:
- return None
- async with httpx.AsyncClient(timeout=WIKIDATA_TIMEOUT, follow_redirects=True) as client:
- entity = await client.get(
- f"https://www.wikidata.org/wiki/Special:EntityData/{qid}.json",
- params={"flavor": "dump"},
- headers={"Accept": "application/json", "User-Agent": WIKIDATA_USER_AGENT},
- )
- if entity.status_code >= 400:
- return None
- entity_payload = entity.json()
- entity_block = entity_payload.get("entities", {}).get(qid, {})
- label = None
- description = None
- if isinstance(entity_block, dict):
- labels = entity_block.get("labels", {})
- descriptions = entity_block.get("descriptions", {})
- label = (labels.get("en") or {}).get("value") if isinstance(labels, dict) else None
- description = (descriptions.get("en") or {}).get("value") if isinstance(descriptions, dict) else None
- return {
- "qid": qid,
- "entity": entity_block,
- "label": label,
- "description": description,
- "source": "wikidata",
- "wikidata_status": "enriched",
- "retrieved_at": datetime.now(timezone.utc).isoformat(),
- }
|