| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778 |
- """Infer Atlas canonical types from Wikidata classes via ontology reasoning."""
- from __future__ import annotations
- from functools import lru_cache
- from pathlib import Path
- from typing import Iterable, Optional
- from rdflib import Graph, URIRef
- from rdflib.namespace import RDFS
- ONTOLOGY_PATH = Path(__file__).resolve().parents[1] / "ontology" / "wikidata_subclassof.ttl"
- ROOT_CLASS_MAP: dict[str, str] = {
- "http://dbpedia.org/ontology/Person": "Person",
- "http://dbpedia.org/ontology/Artist": "Person",
- "http://dbpedia.org/ontology/Politician": "Person",
- "http://dbpedia.org/ontology/Organisation": "Organization",
- "http://dbpedia.org/ontology/Company": "Organization",
- "http://dbpedia.org/ontology/University": "Organization",
- "http://dbpedia.org/ontology/Place": "Location",
- "http://dbpedia.org/ontology/Location": "Location",
- "http://dbpedia.org/ontology/PopulatedPlace": "Location",
- "http://dbpedia.org/ontology/Settlement": "Location",
- "http://dbpedia.org/ontology/CreativeWork": "CreativeWork",
- "http://dbpedia.org/ontology/Film": "CreativeWork",
- "http://dbpedia.org/ontology/MusicalWork": "CreativeWork",
- "http://dbpedia.org/ontology/Album": "CreativeWork",
- "http://dbpedia.org/ontology/Event": "Event",
- "http://dbpedia.org/ontology/Product": "Product",
- "http://dbpedia.org/ontology/Species": "Taxon",
- "http://dbpedia.org/ontology/Taxon": "Taxon",
- }
- @lru_cache(maxsize=1)
- def _load_graph() -> Graph:
- graph = Graph()
- if ONTOLOGY_PATH.exists():
- graph.parse(ONTOLOGY_PATH, format="turtle")
- return graph
- def _qid_to_uri(qid: str) -> URIRef:
- return URIRef(f"http://wikidata.dbpedia.org/resource/{qid}")
- def infer_atlas_type_from_p31(qids: Iterable[str]) -> Optional[str]:
- """Infer the Atlas type from Wikidata P31 classes using the ontology graph."""
- graph = _load_graph()
- if len(graph) == 0:
- return None
- root_nodes = {URIRef(uri): atlas_type for uri, atlas_type in ROOT_CLASS_MAP.items()}
- for qid in qids:
- if not qid:
- continue
- start = _qid_to_uri(qid)
- inferred = _walk_to_root(graph, start, root_nodes)
- if inferred:
- return inferred
- return None
- def _walk_to_root(graph: Graph, start: URIRef, roots: dict[URIRef, str]) -> Optional[str]:
- visited: set[URIRef] = set()
- queue: list[URIRef] = [start]
- while queue:
- node = queue.pop(0)
- if node in visited:
- continue
- visited.add(node)
- if node in roots:
- return roots[node]
- for parent in graph.objects(node, RDFS.subClassOf):
- if isinstance(parent, URIRef):
- queue.append(parent)
- return None
|