triple_export.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. """Serialize resolved Atlas entities to Turtle for inspection or write-path preparation."""
  2. from __future__ import annotations
  3. import json
  4. from app.models import AtlasEntity, AtlasProvenance
  5. PREFIXES = """@prefix atlas: <http://world.eu.org/atlas_ontology#> .
  6. @prefix atlas_data: <http://world.eu.org/atlas_data#> .
  7. @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
  8. @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
  9. """
  10. def _safe_fragment(value: str) -> str:
  11. value = (value or "").strip().lower()
  12. out = []
  13. for ch in value:
  14. if ch.isalnum() or ch in ["_", "-"]:
  15. out.append(ch)
  16. else:
  17. out.append("_")
  18. frag = "".join(out).strip("_")
  19. return frag or "entity"
  20. def _entity_node(entity: AtlasEntity) -> str:
  21. return f"atlas_data:entity_{_safe_fragment(entity.atlas_id)}"
  22. def _alias_node(alias_label: str) -> str:
  23. return f"atlas_data:alias_{_safe_fragment(alias_label)}"
  24. def _identifier_node(identifier_value: str) -> str:
  25. return f"atlas_data:ident_{_safe_fragment(identifier_value)}"
  26. def _provenance_node(source: str, retrieved_at: str | None, retrieval_method: str) -> str:
  27. parts = [source, retrieval_method, retrieved_at or ""]
  28. return f"atlas_data:prov_{_safe_fragment('_'.join(parts))}"
  29. def _type_assertion_node(entity: AtlasEntity, source: str) -> str:
  30. return f"atlas_data:typeassert_{_safe_fragment(entity.atlas_id)}_{_safe_fragment(source)}"
  31. def _literal(text: str) -> str:
  32. return text.replace("\\", "\\\\").replace('"', '\\"')
  33. def _identifier_type_resource(identifier_type: str) -> str:
  34. kind = _safe_fragment(identifier_type)
  35. if kind == "mid":
  36. return "atlas:Mid"
  37. if kind in {"qid", "wikidata_qid", "wikidataqid"}:
  38. return "atlas:WikidataQID"
  39. return f"atlas:{kind.capitalize()}"
  40. def _pick_provenance(entity: AtlasEntity, source_hint: str | None = None, method_hint: str | None = None) -> AtlasProvenance | None:
  41. if not entity.provenance:
  42. return None
  43. if method_hint:
  44. for p in entity.provenance:
  45. if p.retrieval_method == method_hint:
  46. return p
  47. if source_hint:
  48. for p in entity.provenance:
  49. if p.source == source_hint:
  50. return p
  51. return entity.provenance[0]
  52. def entity_to_turtle(entity: AtlasEntity) -> str:
  53. lines: list[str] = [PREFIXES]
  54. subject = _entity_node(entity)
  55. claim_nodes = [f"atlas_data:claim_ident_{_safe_fragment(i.value)}" for i in entity.identifiers]
  56. if entity.entity_type and entity.entity_type != "unknown":
  57. claim_nodes.append(f"atlas_data:claim_type_{_safe_fragment(entity.atlas_id)}")
  58. lines.append(f"{subject} a atlas:Entity ;")
  59. lines.append(f' atlas:canonicalLabel "{_literal(entity.canonical_label)}" ;')
  60. if entity.canonical_description:
  61. lines.append(f' atlas:canonicalDescription "{_literal(entity.canonical_description)}" ;')
  62. # Lean raw payload persistence (as JSON strings)
  63. wd = entity.raw_payload.get("wikidata") if isinstance(entity.raw_payload, dict) else None
  64. if isinstance(wd, dict) and wd.get("status") == "ok":
  65. lines.append(f' atlas:rawWikidataJson "{_literal(json.dumps(wd, ensure_ascii=False))}"^^xsd:string ;')
  66. trends_payload = entity.raw_payload.get("g_trends_payload") or {}
  67. # In our current model, trends live under raw_payload keys directly (non-wikidata)
  68. if isinstance(entity.raw_payload, dict):
  69. trends_payload = {k: v for k, v in entity.raw_payload.items() if k != "wikidata"}
  70. if isinstance(trends_payload, dict) and trends_payload:
  71. lines.append(f' atlas:rawTrendsJson "{_literal(json.dumps(trends_payload, ensure_ascii=False))}"^^xsd:string ;')
  72. if entity.entity_type and entity.entity_type != "unknown":
  73. lines.append(f" atlas:hasCanonicalType atlas:{_safe_fragment(entity.entity_type).capitalize()} ;")
  74. for alias in entity.aliases:
  75. lines.append(f" atlas:hasAlias {_alias_node(alias.label)} ;")
  76. for ident in entity.identifiers:
  77. lines.append(f" atlas:hasIdentifier {_identifier_node(ident.value)} ;")
  78. for claim_node in claim_nodes:
  79. lines.append(f" atlas:hasClaim {claim_node} ;")
  80. lines.append(f" atlas:needsCuration {'true' if entity.needs_curation else 'false'} .")
  81. lines.append("")
  82. for alias in entity.aliases:
  83. alias_node = _alias_node(alias.label)
  84. lines.append(f"{alias_node} a atlas:Alias ;")
  85. lines.append(f' atlas:aliasLabel "{_literal(alias.label)}" ;')
  86. lines.append(f" atlas:resolvedTo {subject} .")
  87. lines.append("")
  88. for ident in entity.identifiers:
  89. ident_node = _identifier_node(ident.value)
  90. lines.append(f"{ident_node} a atlas:Identifier ;")
  91. lines.append(f' atlas:identifierValue "{_literal(ident.value)}" ;')
  92. lines.append(f' atlas:identifierSource "{_literal(ident.source)}" ;')
  93. lines.append(f" atlas:identifierType {_identifier_type_resource(ident.identifier_type)} ;")
  94. prov = _pick_provenance(entity, source_hint=ident.source)
  95. if prov:
  96. lines.append(f" atlas:hasIdentifierProvenance {_provenance_node(prov.source, prov.retrieved_at, prov.retrieval_method)} .")
  97. else:
  98. lines[-1] = lines[-1].rstrip(" ;") + " ."
  99. lines.append("")
  100. for prov in entity.provenance:
  101. prov_node = _provenance_node(prov.source, prov.retrieved_at, prov.retrieval_method)
  102. lines.append(f"{prov_node} a atlas:Provenance ;")
  103. lines.append(f' atlas:provenanceSource "{_literal(prov.source)}" ;')
  104. lines.append(f' atlas:retrievalMethod "{_literal(prov.retrieval_method)}" ;')
  105. lines.append(f' atlas:confidence "{prov.confidence}"^^xsd:decimal ;')
  106. if prov.retrieved_at:
  107. lines.append(f' atlas:retrievedAt "{_literal(prov.retrieved_at)}"^^xsd:dateTime .')
  108. else:
  109. lines[-1] = lines[-1].rstrip(" ;") + " ."
  110. lines.append("")
  111. wd = entity.raw_payload.get("wikidata") or {}
  112. if wd.get("status") == "ok":
  113. typeassert_node = _type_assertion_node(entity, "wikidata")
  114. lines.append(f"{typeassert_node} a atlas:TypeAssertion ;")
  115. lines.append(" atlas:assertedType atlas:WikidataType_Q5 ;")
  116. prov = _pick_provenance(entity, source_hint="wikidata")
  117. if prov:
  118. lines.append(f" atlas:hasAssertionProvenance {_provenance_node(prov.source, prov.retrieved_at, prov.retrieval_method)} ;")
  119. lines.append(' atlas:assertionReason "wikidata instance-of" .')
  120. lines.append("")
  121. if entity.entity_type and entity.entity_type != "unknown":
  122. typeassert_node = _type_assertion_node(entity, "canonical")
  123. lines.append(f"{typeassert_node} a atlas:TypeAssertion ;")
  124. lines.append(f" atlas:assertedType atlas:{_safe_fragment(entity.entity_type).capitalize()} ;")
  125. prov = _pick_provenance(entity, method_hint="type-classification")
  126. if prov:
  127. lines.append(f" atlas:hasAssertionProvenance {_provenance_node(prov.source, prov.retrieved_at, prov.retrieval_method)} ;")
  128. lines.append(' atlas:assertionReason "canonical type adjudication" .')
  129. lines.append("")
  130. # Claim nodes with explicit claim-object semantics
  131. for ident in entity.identifiers:
  132. claim_node = f"atlas_data:claim_ident_{_safe_fragment(ident.value)}"
  133. ident_node = _identifier_node(ident.value)
  134. prov = _pick_provenance(entity, source_hint=ident.source)
  135. lines.append(f"{claim_node} a atlas:Claim ;")
  136. lines.append(f" atlas:claimSubjectIri {subject} ;")
  137. lines.append(' atlas:claimPredicate "atlas:hasIdentifier" ;')
  138. lines.append(f" atlas:claimObjectIri {ident_node} ;")
  139. lines.append(' atlas:claimLayer "raw" ;')
  140. lines.append(' atlas:claimStatus "active" ;')
  141. if prov:
  142. lines.append(f" atlas:hasProvenance {_provenance_node(prov.source, prov.retrieved_at, prov.retrieval_method)} .")
  143. else:
  144. lines[-1] = lines[-1].rstrip(" ;") + " ."
  145. lines.append("")
  146. if entity.entity_type and entity.entity_type != "unknown":
  147. claim_node = f"atlas_data:claim_type_{_safe_fragment(entity.atlas_id)}"
  148. prov = _pick_provenance(entity, method_hint="type-classification")
  149. lines.append(f"{claim_node} a atlas:Claim ;")
  150. lines.append(f" atlas:claimSubjectIri {subject} ;")
  151. lines.append(' atlas:claimPredicate "atlas:hasCanonicalType" ;')
  152. lines.append(f" atlas:claimObjectIri atlas:{_safe_fragment(entity.entity_type).capitalize()} ;")
  153. lines.append(' atlas:claimLayer "derived" ;')
  154. lines.append(' atlas:claimStatus "active" ;')
  155. if prov:
  156. lines.append(f" atlas:hasProvenance {_provenance_node(prov.source, prov.retrieved_at, prov.retrieval_method)} .")
  157. else:
  158. lines[-1] = lines[-1].rstrip(" ;") + " ."
  159. lines.append("")
  160. return "\n".join(lines).strip() + "\n"