resolve.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. from __future__ import annotations
  2. import hashlib
  3. import os
  4. from dataclasses import dataclass
  5. from typing import Any
  6. from .atlas_model import CurateFlag, Entity, Identifier
  7. from .atlas_store import _sparql_select, _sparql_update
  8. from .wikidata import WikidataSearch
  9. ATLAS = "http://world.eu.org/atlas_ontology#"
  10. ATLAS_D = "http://world.eu.org/atlas_data#"
  11. DEFAULT_ENDPOINT = os.getenv("ATLAS_VIRTUOSO_MCP_SSE_URL", "http://192.168.0.249:8501/mcp/sse")
  12. DEFAULT_UPDATE_ENDPOINT = os.getenv("ATLAS_VIRTUOSO_MCP_SSE_URL", DEFAULT_ENDPOINT)
  13. def _hash_id(subject: str) -> str:
  14. return hashlib.sha1(subject.strip().lower().encode("utf-8")).hexdigest()[:16]
  15. def _entity_iri(atlas_id: str) -> str:
  16. return f"<{ATLAS_D}entity_{atlas_id}>"
  17. def _escape_literal(value: str) -> str:
  18. return value.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n")
  19. def _label_lookup_query(subject: str) -> str:
  20. safe = _escape_literal(subject)
  21. return f"""
  22. PREFIX atlas: <{ATLAS}>
  23. SELECT ?atlasId ?label ?type ?qid ?alias WHERE {{
  24. VALUES ?needle {{ \"{safe}\" }}
  25. {{
  26. ?entity a atlas:Entity ;
  27. atlas:atlasId ?atlasId ;
  28. atlas:canonicalLabel ?label .
  29. OPTIONAL {{ ?entity atlas:hasCanonicalType ?type . }}
  30. OPTIONAL {{ ?entity atlas:hasIdentifier ?ident . ?ident atlas:scheme \"wikidata-qid\" ; atlas:value ?qid . }}
  31. FILTER(LCASE(STR(?label)) = LCASE(?needle))
  32. }}
  33. UNION
  34. {{
  35. ?entity a atlas:Entity ;
  36. atlas:atlasId ?atlasId ;
  37. atlas:aliasLabel ?alias .
  38. OPTIONAL {{ ?entity atlas:canonicalLabel ?label . }}
  39. OPTIONAL {{ ?entity atlas:hasCanonicalType ?type . }}
  40. OPTIONAL {{ ?entity atlas:hasIdentifier ?ident . ?ident atlas:scheme \"wikidata-qid\" ; atlas:value ?qid . }}
  41. FILTER(LCASE(STR(?alias)) = LCASE(?needle))
  42. }}
  43. }}
  44. LIMIT 1
  45. """.strip()
  46. async def _wikidata_lookup(subject: str) -> dict[str, Any] | None:
  47. search = WikidataSearch({"search": subject, "limit": 1})
  48. result = await search.search()
  49. items = result.get("results", [])
  50. return items[0] if items else None
  51. def _entity_from_wikidata(subject: str, wd: dict[str, Any]) -> Entity:
  52. atlas_id = _hash_id(subject)
  53. label = wd.get("label") or subject
  54. description = wd.get("description")
  55. qid = wd.get("id")
  56. entity_type = wd.get("type") or "Thing"
  57. ent = Entity(
  58. id=atlas_id,
  59. label=label,
  60. description=description,
  61. type=f"atlas:{entity_type}" if not entity_type.startswith("atlas:") else entity_type,
  62. aliases=[subject] if subject.lower() != label.lower() else [],
  63. identifiers=[Identifier(scheme="wikidata-qid", value=qid)] if qid else [],
  64. needs_curation=False,
  65. )
  66. return ent
  67. def _entity_to_turtle(entity: Entity) -> str:
  68. lines = []
  69. e = _entity_iri(entity.id)
  70. lines.append(f"{e}")
  71. lines.append(" a atlas:Entity ;")
  72. lines.append(f' atlas:atlasId "{_escape_literal(entity.id)}" ;')
  73. lines.append(f' atlas:canonicalLabel "{_escape_literal(entity.label)}"@en ;')
  74. if entity.description:
  75. lines.append(f' atlas:canonicalDescription "{_escape_literal(entity.description)}"@en ;')
  76. if entity.type:
  77. lines.append(f" atlas:hasCanonicalType {entity.type} ;")
  78. for alias in entity.aliases:
  79. lines.append(f' atlas:aliasLabel "{_escape_literal(alias)}"@en ;')
  80. for ident in entity.identifiers:
  81. ident_iri = f"<{ATLAS_D}ident_{ident.scheme}_{_hash_id(ident.value)}>.".rstrip(".")
  82. lines.append(f" atlas:hasIdentifier {ident_iri} ;")
  83. lines.append(f' atlas:needsCuration "{str(entity.needs_curation).lower()}"^^xsd:boolean .')
  84. lines.append("")
  85. for ident in entity.identifiers:
  86. ident_iri = f"<{ATLAS_D}ident_{ident.scheme}_{_hash_id(ident.value)}>"
  87. lines.append(f"{ident_iri}")
  88. lines.append(" a atlas:Identifier ;")
  89. lines.append(f' atlas:scheme "{_escape_literal(ident.scheme)}" ;')
  90. lines.append(f' atlas:value "{_escape_literal(ident.value)}" .')
  91. lines.append("")
  92. return "\n".join(lines)
  93. def _flatten_exception_details(exc: BaseException) -> list[str]:
  94. parts = [f"{type(exc).__name__}: {exc}"]
  95. nested = getattr(exc, "exceptions", None)
  96. if nested:
  97. for sub in nested:
  98. parts.extend(_flatten_exception_details(sub))
  99. return parts
  100. async def _persist_entity(entity: Entity) -> None:
  101. ttl = _entity_to_turtle(entity)
  102. query = f"""
  103. PREFIX atlas: <{ATLAS}>
  104. PREFIX atlas_data: <{ATLAS_D}>
  105. PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
  106. INSERT DATA {{
  107. GRAPH <{ATLAS_D}> {{
  108. {ttl}
  109. }}
  110. }}
  111. """.strip()
  112. await _sparql_update(DEFAULT_UPDATE_ENDPOINT, query)
  113. async def _load_entity(subject: str) -> dict[str, Any] | None:
  114. rows = await _sparql_select(DEFAULT_ENDPOINT, _label_lookup_query(subject))
  115. if not rows:
  116. return None
  117. row = rows[0]
  118. return {
  119. "atlas_id": row.get("atlasId", {}).get("value"),
  120. "label": row.get("label", {}).get("value"),
  121. "type": row.get("type", {}).get("value"),
  122. "wikidata_id": row.get("qid", {}).get("value"),
  123. "alias": row.get("alias", {}).get("value"),
  124. }
  125. @dataclass
  126. class ResolveService:
  127. load_entity_fn: Any = _load_entity
  128. wikidata_lookup_fn: Any = _wikidata_lookup
  129. persist_entity_fn: Any = _persist_entity
  130. async def resolve(self, *, subject: str, context: dict[str, Any] | None = None,
  131. constraints: dict[str, Any] | None = None,
  132. hints: dict[str, Any] | None = None,
  133. debug: dict[str, Any] | None = None) -> dict[str, Any]:
  134. try:
  135. subject = (subject or "").strip()
  136. if not subject:
  137. return {"status": "not_found"}
  138. stored = await self.load_entity_fn(subject)
  139. if stored:
  140. return {
  141. "status": "resolved",
  142. "atlas_id": stored.get("atlas_id"),
  143. "label": stored.get("label"),
  144. "type": stored.get("type"),
  145. "wikidata_id": stored.get("wikidata_id"),
  146. "alias": stored.get("alias") or subject,
  147. }
  148. wd = await self.wikidata_lookup_fn(subject)
  149. if not wd:
  150. return {"status": "not_found"}
  151. entity = _entity_from_wikidata(subject, wd)
  152. await self.persist_entity_fn(entity)
  153. return {
  154. "status": "resolved",
  155. "atlas_id": entity.id,
  156. "label": entity.label,
  157. "type": entity.type,
  158. "wikidata_id": wd.get("id"),
  159. "alias": subject,
  160. }
  161. except Exception as exc:
  162. detail = " | ".join(_flatten_exception_details(exc))
  163. return {
  164. "status": "error",
  165. "error": {"code": "RESOLVE_FAILED", "message": detail},
  166. }