wikidata_type_reasoner.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. """Infer Atlas canonical types from Wikidata classes via ontology reasoning."""
  2. from __future__ import annotations
  3. from functools import lru_cache
  4. from pathlib import Path
  5. from typing import Iterable, Optional
  6. from rdflib import Graph, URIRef
  7. from rdflib.namespace import RDFS
  8. ONTOLOGY_PATH = Path(__file__).resolve().parents[1] / "ontology" / "wikidata_subclassof.ttl"
  9. ROOT_CLASS_MAP: dict[str, str] = {
  10. "http://dbpedia.org/ontology/Person": "Person",
  11. "http://dbpedia.org/ontology/Artist": "Person",
  12. "http://dbpedia.org/ontology/Politician": "Person",
  13. "http://dbpedia.org/ontology/Organisation": "Organization",
  14. "http://dbpedia.org/ontology/Company": "Organization",
  15. "http://dbpedia.org/ontology/University": "Organization",
  16. "http://dbpedia.org/ontology/Place": "Location",
  17. "http://dbpedia.org/ontology/Location": "Location",
  18. "http://dbpedia.org/ontology/PopulatedPlace": "Location",
  19. "http://dbpedia.org/ontology/Settlement": "Location",
  20. "http://dbpedia.org/ontology/CreativeWork": "CreativeWork",
  21. "http://dbpedia.org/ontology/Film": "CreativeWork",
  22. "http://dbpedia.org/ontology/MusicalWork": "CreativeWork",
  23. "http://dbpedia.org/ontology/Album": "CreativeWork",
  24. "http://dbpedia.org/ontology/Event": "Event",
  25. "http://dbpedia.org/ontology/Product": "Product",
  26. "http://dbpedia.org/ontology/Species": "Taxon",
  27. "http://dbpedia.org/ontology/Taxon": "Taxon",
  28. }
  29. @lru_cache(maxsize=1)
  30. def _load_graph() -> Graph:
  31. graph = Graph()
  32. if ONTOLOGY_PATH.exists():
  33. graph.parse(ONTOLOGY_PATH, format="turtle")
  34. return graph
  35. def _qid_to_uri(qid: str) -> URIRef:
  36. return URIRef(f"http://wikidata.dbpedia.org/resource/{qid}")
  37. def infer_atlas_type_from_p31(qids: Iterable[str]) -> Optional[str]:
  38. """Infer the Atlas type from Wikidata P31 classes using the ontology graph."""
  39. graph = _load_graph()
  40. if len(graph) == 0:
  41. return None
  42. root_nodes = {URIRef(uri): atlas_type for uri, atlas_type in ROOT_CLASS_MAP.items()}
  43. for qid in qids:
  44. if not qid:
  45. continue
  46. start = _qid_to_uri(qid)
  47. inferred = _walk_to_root(graph, start, root_nodes)
  48. if inferred:
  49. return inferred
  50. return None
  51. def _walk_to_root(graph: Graph, start: URIRef, roots: dict[URIRef, str]) -> Optional[str]:
  52. visited: set[URIRef] = set()
  53. queue: list[URIRef] = [start]
  54. while queue:
  55. node = queue.pop(0)
  56. if node in visited:
  57. continue
  58. visited.add(node)
  59. if node in roots:
  60. return roots[node]
  61. for parent in graph.objects(node, RDFS.subClassOf):
  62. if isinstance(parent, URIRef):
  63. queue.append(parent)
  64. return None