"""Infer Atlas canonical types from Wikidata classes via ontology reasoning.""" from __future__ import annotations from functools import lru_cache from pathlib import Path from typing import Iterable, Optional from rdflib import Graph, URIRef from rdflib.namespace import RDFS ONTOLOGY_PATH = Path(__file__).resolve().parents[1] / "ontology" / "wikidata_subclassof.ttl" ROOT_CLASS_MAP: dict[str, str] = { "http://dbpedia.org/ontology/Person": "Person", "http://dbpedia.org/ontology/Artist": "Person", "http://dbpedia.org/ontology/Politician": "Person", "http://dbpedia.org/ontology/Organisation": "Organization", "http://dbpedia.org/ontology/Company": "Organization", "http://dbpedia.org/ontology/University": "Organization", "http://dbpedia.org/ontology/Place": "Location", "http://dbpedia.org/ontology/Location": "Location", "http://dbpedia.org/ontology/PopulatedPlace": "Location", "http://dbpedia.org/ontology/Settlement": "Location", "http://dbpedia.org/ontology/CreativeWork": "CreativeWork", "http://dbpedia.org/ontology/Film": "CreativeWork", "http://dbpedia.org/ontology/MusicalWork": "CreativeWork", "http://dbpedia.org/ontology/Album": "CreativeWork", "http://dbpedia.org/ontology/Event": "Event", "http://dbpedia.org/ontology/Product": "Product", "http://dbpedia.org/ontology/Species": "Taxon", "http://dbpedia.org/ontology/Taxon": "Taxon", } @lru_cache(maxsize=1) def _load_graph() -> Graph: graph = Graph() if ONTOLOGY_PATH.exists(): graph.parse(ONTOLOGY_PATH, format="turtle") return graph def _qid_to_uri(qid: str) -> URIRef: return URIRef(f"http://wikidata.dbpedia.org/resource/{qid}") def infer_atlas_type_from_p31(qids: Iterable[str]) -> Optional[str]: """Infer the Atlas type from Wikidata P31 classes using the ontology graph.""" graph = _load_graph() if len(graph) == 0: return None root_nodes = {URIRef(uri): atlas_type for uri, atlas_type in ROOT_CLASS_MAP.items()} for qid in qids: if not qid: continue start = _qid_to_uri(qid) inferred = _walk_to_root(graph, start, root_nodes) if inferred: return inferred return None def _walk_to_root(graph: Graph, start: URIRef, roots: dict[URIRef, str]) -> Optional[str]: visited: set[URIRef] = set() queue: list[URIRef] = [start] while queue: node = queue.pop(0) if node in visited: continue visited.add(node) if node in roots: return roots[node] for parent in graph.objects(node, RDFS.subClassOf): if isinstance(parent, URIRef): queue.append(parent) return None