triple_export.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. """Serialize resolved Atlas entities to Turtle for inspection or write-path preparation."""
  2. from __future__ import annotations
  3. import json
  4. from app.models import AtlasClaim, AtlasEntity
  5. PREFIXES = """@prefix atlas: <http://world.eu.org/atlas_ontology#> .
  6. @prefix atlas_data: <http://world.eu.org/atlas_data#> .
  7. @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
  8. @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
  9. """
  10. def _safe_fragment(value: str) -> str:
  11. value = (value or "").strip().lower()
  12. out = []
  13. for ch in value:
  14. if ch.isalnum() or ch in ["_", "-"]:
  15. out.append(ch)
  16. else:
  17. out.append("_")
  18. frag = "".join(out).strip("_")
  19. return frag or "entity"
  20. def _entity_node(entity: AtlasEntity) -> str:
  21. return f"atlas_data:entity_{_safe_fragment(entity.atlas_id)}"
  22. def _alias_node(alias_label: str) -> str:
  23. return f"atlas_data:alias_{_safe_fragment(alias_label)}"
  24. def _claim_node(claim: AtlasClaim) -> str:
  25. hash_part = claim.claim_id.split("_", maxsplit=2)[-1]
  26. return f"atlas_data:Claim_{_safe_fragment(hash_part)}"
  27. def _provenance_node(claim: AtlasClaim) -> str:
  28. prov = claim.provenance
  29. if prov is None:
  30. return ""
  31. parts = [claim.claim_id, prov.source, prov.retrieval_method, prov.retrieved_at or ""]
  32. return f"atlas_data:prov_{_safe_fragment('_'.join(parts))}"
  33. def _literal(text: str) -> str:
  34. return text.replace("\\", "\\\\").replace('"', '\\"')
  35. def _claim_object_iri(claim: AtlasClaim) -> str | None:
  36. if claim.object.kind == "type":
  37. return claim.object.value
  38. if claim.object.kind == "identifier" and claim.object.id_type:
  39. return f"atlas_data:ident_{_safe_fragment(claim.object.id_type + '_' + claim.object.value)}"
  40. return None
  41. def entity_to_turtle(entity: AtlasEntity) -> str:
  42. lines: list[str] = [PREFIXES]
  43. subject = _entity_node(entity)
  44. lines.append(f"{subject} a atlas:Entity ;")
  45. lines.append(f' atlas:canonicalLabel "{_literal(entity.canonical_label)}" ;')
  46. if entity.canonical_description:
  47. lines.append(f' atlas:canonicalDescription "{_literal(entity.canonical_description)}" ;')
  48. if entity.entity_type and entity.entity_type != "unknown":
  49. lines.append(f" atlas:hasCanonicalType atlas:{_safe_fragment(entity.entity_type).capitalize()} ;")
  50. wd = entity.raw_payload.get("wikidata") if isinstance(entity.raw_payload, dict) else None
  51. if isinstance(wd, dict) and wd.get("status") == "ok":
  52. lines.append(f' atlas:rawWikidataJson "{_literal(json.dumps(wd, ensure_ascii=False))}"^^xsd:string ;')
  53. if isinstance(entity.raw_payload, dict):
  54. trends_payload = {k: v for k, v in entity.raw_payload.items() if k != "wikidata"}
  55. if trends_payload:
  56. lines.append(f' atlas:rawTrendsJson "{_literal(json.dumps(trends_payload, ensure_ascii=False))}"^^xsd:string ;')
  57. for alias in entity.aliases:
  58. lines.append(f" atlas:hasAlias {_alias_node(alias.label)} ;")
  59. for claim in entity.claims:
  60. lines.append(f" atlas:hasClaim {_claim_node(claim)} ;")
  61. lines.append(f" atlas:needsCuration {'true' if entity.needs_curation else 'false'} .")
  62. lines.append("")
  63. for alias in entity.aliases:
  64. alias_node = _alias_node(alias.label)
  65. lines.append(f"{alias_node} a atlas:Alias ;")
  66. lines.append(f' atlas:aliasLabel "{_literal(alias.label)}" ;')
  67. lines.append(f" atlas:resolvedTo {subject} .")
  68. lines.append("")
  69. # Materialize identifier resources from identifier claims.
  70. for claim in entity.claims:
  71. if claim.predicate != "atlas:hasIdentifier" or claim.object.kind != "identifier":
  72. continue
  73. ident_node = _claim_object_iri(claim)
  74. if not ident_node:
  75. continue
  76. id_type = claim.object.id_type or "unknown"
  77. id_type_iri = "atlas:Mid" if id_type == "mid" else ("atlas:WikidataQID" if id_type == "qid" else f"atlas:{_safe_fragment(id_type).capitalize()}")
  78. lines.append(f"{ident_node} a atlas:Identifier ;")
  79. lines.append(f' atlas:identifierValue "{_literal(claim.object.value)}" ;')
  80. lines.append(f' atlas:identifierType {id_type_iri} .')
  81. lines.append("")
  82. for claim in entity.claims:
  83. claim_node = _claim_node(claim)
  84. lines.append(f"{claim_node} a atlas:Claim ;")
  85. lines.append(f" atlas:claimSubjectIri {subject} ;")
  86. lines.append(f' atlas:claimPredicate "{_literal(claim.predicate)}" ;')
  87. obj_iri = _claim_object_iri(claim)
  88. if obj_iri:
  89. lines.append(f" atlas:claimObjectIri {obj_iri} ;")
  90. else:
  91. lines.append(f' atlas:claimObjectLiteral "{_literal(claim.object.value)}" ;')
  92. lines.append(f' atlas:claimLayer "{_literal(claim.layer)}" ;')
  93. lines.append(f' atlas:claimStatus "{_literal(claim.status)}" ;')
  94. prov_node = _provenance_node(claim)
  95. if prov_node:
  96. lines.append(f" atlas:hasProvenance {prov_node} .")
  97. else:
  98. lines[-1] = lines[-1].rstrip(" ;") + " ."
  99. lines.append("")
  100. if claim.provenance:
  101. prov = claim.provenance
  102. lines.append(f"{prov_node} a atlas:Provenance ;")
  103. lines.append(f' atlas:provenanceSource "{_literal(prov.source)}" ;')
  104. lines.append(f' atlas:retrievalMethod "{_literal(prov.retrieval_method)}" ;')
  105. lines.append(f' atlas:confidence "{prov.confidence}"^^xsd:decimal ;')
  106. if prov.retrieved_at:
  107. lines.append(f' atlas:retrievedAt "{_literal(prov.retrieved_at)}"^^xsd:dateTime .')
  108. else:
  109. lines[-1] = lines[-1].rstrip(" ;") + " ."
  110. lines.append("")
  111. return "\n".join(lines).strip() + "\n"