resolve.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. from __future__ import annotations
  2. import hashlib
  3. import os
  4. import logging
  5. from dataclasses import dataclass
  6. from typing import Any
  7. from .atlas_model import Entity, Identifier
  8. from .atlas_store import load_entity_by_subject, save_entity_minimal
  9. from .wikidata import WikidataSearch
  10. ATLAS = "http://world.eu.org/atlas_ontology#"
  11. DEFAULT_ENDPOINT = os.getenv("ATLAS_VIRTUOSO_MCP_SSE_URL", "http://192.168.0.249:8501/mcp/sse")
  12. DEFAULT_UPDATE_ENDPOINT = os.getenv("ATLAS_VIRTUOSO_MCP_SSE_URL", DEFAULT_ENDPOINT)
  13. DEBUG_LOGS = os.getenv("ATLAS_DEBUG_LOGS", "false").lower() in {"1", "true", "yes", "on"}
  14. logger = logging.getLogger(__name__)
  15. def _hash_id(subject: str) -> str:
  16. return hashlib.sha1(subject.strip().lower().encode("utf-8")).hexdigest()[:16]
  17. def _entity_iri(atlas_id: str) -> str:
  18. return f"atlas_data:entity_{atlas_id}"
  19. async def _wikidata_lookup(subject: str) -> dict[str, Any] | None:
  20. search = WikidataSearch({"search": subject, "limit": 1})
  21. result = await search.search()
  22. items = result.get("results", [])
  23. return items[0] if items else None
  24. def _infer_atlas_type(label: str | None, description: str | None) -> str:
  25. text = f"{label or ''} {description or ''}".lower()
  26. if any(k in text for k in ["president", "person", "singer", "composer", "human", "actor", "writer"]):
  27. return "atlas:Person"
  28. if any(k in text for k in ["city", "town", "village", "country", "state", "location", "place"]):
  29. return "atlas:Location"
  30. if any(k in text for k in ["company", "organization", "organisation", "institution", "foundation", "band"]):
  31. return "atlas:Organization"
  32. return "atlas:Other"
  33. def _entity_from_wikidata(subject: str, wd: dict[str, Any]) -> Entity:
  34. atlas_id = _hash_id(subject)
  35. label = wd.get("label") or subject
  36. description = wd.get("description")
  37. qid = wd.get("id")
  38. entity_type = _infer_atlas_type(label, description)
  39. ent = Entity(
  40. id=atlas_id,
  41. label=label,
  42. description=description,
  43. type=entity_type,
  44. aliases=[subject] if subject.lower() != label.lower() else [],
  45. identifiers=[Identifier(scheme="wikidata-qid", value=qid)] if qid else [],
  46. needs_curation=True,
  47. )
  48. return ent
  49. def _flatten_exception_details(exc: BaseException) -> list[str]:
  50. parts = [f"{type(exc).__name__}: {exc}"]
  51. nested = getattr(exc, "exceptions", None)
  52. if nested:
  53. for sub in nested:
  54. parts.extend(_flatten_exception_details(sub))
  55. return parts
  56. async def _persist_entity(entity: Entity) -> None:
  57. await save_entity_minimal(entity, DEFAULT_UPDATE_ENDPOINT)
  58. async def _load_entity(subject: str) -> dict[str, Any] | None:
  59. return await load_entity_by_subject(subject, DEFAULT_ENDPOINT)
  60. @dataclass
  61. class ResolveService:
  62. load_entity_fn: Any = _load_entity
  63. wikidata_lookup_fn: Any = _wikidata_lookup
  64. persist_entity_fn: Any = _persist_entity
  65. async def resolve(self, *, subject: str, context: dict[str, Any] | None = None,
  66. constraints: dict[str, Any] | None = None,
  67. hints: dict[str, Any] | None = None,
  68. debug: dict[str, Any] | None = None) -> dict[str, Any]:
  69. try:
  70. subject = (subject or "").strip()
  71. if not subject:
  72. return {"status": "not_found"}
  73. if DEBUG_LOGS:
  74. logger.info("resolve start subject=%s", subject)
  75. stored = await self.load_entity_fn(subject)
  76. if stored:
  77. if DEBUG_LOGS:
  78. logger.info("store hit subject=%s atlas_id=%s", subject, stored.get("atlas_id"))
  79. return {
  80. "status": "resolved",
  81. "atlas_id": stored.get("atlas_id"),
  82. "label": stored.get("label"),
  83. "type": stored.get("type"),
  84. "wikidata_id": stored.get("wikidata_id"),
  85. "alias": stored.get("alias") or subject,
  86. }
  87. wd = await self.wikidata_lookup_fn(subject)
  88. if not wd:
  89. if DEBUG_LOGS:
  90. logger.info("wikidata miss subject=%s", subject)
  91. return {"status": "not_found"}
  92. entity = _entity_from_wikidata(subject, wd)
  93. if DEBUG_LOGS:
  94. logger.info(
  95. "wikidata hit subject=%s qid=%s atlas_id=%s type=%s",
  96. subject,
  97. wd.get("id"),
  98. entity.id,
  99. entity.type,
  100. )
  101. await self.persist_entity_fn(entity)
  102. return {
  103. "status": "resolved",
  104. "atlas_id": entity.id,
  105. "label": entity.label,
  106. "type": entity.type,
  107. "wikidata_id": wd.get("id"),
  108. "alias": subject,
  109. }
  110. except Exception as exc:
  111. detail = " | ".join(_flatten_exception_details(exc))
  112. return {
  113. "status": "error",
  114. "error": {"code": "RESOLVE_FAILED", "message": detail},
  115. }