Lukas Goldschmidt 1 maand geleden
bovenliggende
commit
d6b9da445a
3 gewijzigde bestanden met toevoegingen van 16 en 2 verwijderingen
  1. 1 0
      .gitignore
  2. 8 2
      app/atlas.py
  3. 7 0
      app/storage_service.py

+ 1 - 0
.gitignore

@@ -25,6 +25,7 @@ scripts/logs/
 *.log
 *.pid
 *.sqlite3
+.atlas-maintenance.checkpoint
 
 # OS / editor
 .DS_Store

+ 8 - 2
app/atlas.py

@@ -31,8 +31,11 @@ def _now_date() -> str:
 
 
 async def resolve_entity(subject: str, context: str | None = None) -> AtlasEntity:
+    # Normalize once so cache lookups and downstream resolvers speak the same name.
     normalized = normalize_entity(subject)
     token = normalized.strip().lower()
+
+    # Fast path: reuse the last resolved entity if we already have it.
     cached = _entity_cache.get(token)
     if cached is not None:
         try:
@@ -41,10 +44,10 @@ async def resolve_entity(subject: str, context: str | None = None) -> AtlasEntit
             pass
         return cached
 
+    # Prefer the local graph when it already knows this entity.
     virt_hit = await _virtuoso_store.lookup(token)
     if virt_hit is not None:
-        # Make the returned raw payload reflect the original caller input
-        # (so tests and UI/debug output stay stable).
+        # Keep debug output anchored to the caller's wording.
         if isinstance(virt_hit.raw_payload, dict):
             virt_hit.raw_payload.setdefault("source", "virtuoso")
             virt_hit.raw_payload["raw"] = subject
@@ -56,6 +59,7 @@ async def resolve_entity(subject: str, context: str | None = None) -> AtlasEntit
             pass
         return virt_hit
 
+    # Fall back to live resolution, then shape the result into Atlas form.
     resolution = resolve_entity_via_trends(subject)
     classification = await classify_entity_type(subject, resolution, context)
     wikidata = await lookup_wikidata(subject)
@@ -69,6 +73,7 @@ async def resolve_entity(subject: str, context: str | None = None) -> AtlasEntit
 
 
 def _entity_from_resolution(subject: str, resolution: dict, classification: TypeClassification, wikidata: dict | None = None) -> AtlasEntity:
+    # Pick the cleanest label we have; fall back to the caller's wording.
     canonical_label = (
         resolution.get("canonical_label")
         or resolution.get("normalized")
@@ -128,6 +133,7 @@ def _entity_from_resolution(subject: str, resolution: dict, classification: Type
             )
         )
 
+    # The derived type is the one we expect other parts of Atlas to trust.
     claims.append(
             AtlasClaim(
             claim_id=f"clm_drv_canonical_type_{claim_hash(atlas_id, 'atlas:hasCanonicalType', canonical_type, 'derived')}",

+ 7 - 0
app/storage_service.py

@@ -46,11 +46,13 @@ def entity_iri(entity_id: str) -> str:
 
 class AtlasStorageService:
     def __init__(self, call_tool: CallToolFn | None = None):
+        # Tests can inject a fake transport; production uses the MCP session client.
         self._call_tool_override = call_tool
         self._tool_cache: dict[str, tuple[float, dict[str, Any]]] = {}
         self._tool_cache_ttl_seconds = float(os.getenv("ATLAS_VIRTUOSO_CALL_CACHE_TTL", "30"))
 
     def _cache_key(self, tool_name: str, payload: dict[str, Any]) -> str:
+        # Stable keying keeps equivalent tool calls from duplicating work.
         return f"{tool_name}:{json.dumps(payload, sort_keys=True, separators=(',', ':'))}"
 
     def _cache_get(self, key: str) -> dict[str, Any] | None:
@@ -67,6 +69,7 @@ class AtlasStorageService:
         self._tool_cache[key] = (time.time() + self._tool_cache_ttl_seconds, value)
 
     async def _call_tool(self, tool_name: str, payload: dict[str, Any], *, cache_result: bool = True) -> dict[str, Any]:
+        # Cache read-heavy calls, but let write paths pass through untouched.
         cache_key = self._cache_key(tool_name, payload)
         if cache_result:
             cached = self._cache_get(cache_key)
@@ -98,6 +101,7 @@ class AtlasStorageService:
             raise RuntimeError(f"Virtuoso MCP call failed for {tool_name}: {exc}")
 
     async def write_entity(self, entity: AtlasEntity) -> dict[str, Any]:
+        # Turn an Atlas entity into Turtle, then hand it to Virtuoso in one insert.
         ttl = entity_to_turtle(entity)
         try:
             result = await self._call_tool(
@@ -129,6 +133,7 @@ class AtlasStorageService:
             }
 
     async def read_entity_claims(self, entity_id: str, include_superseded: bool = False) -> dict[str, Any]:
+        # Pull the entity's claim graph, with active claims by default.
         iri = entity_iri(entity_id)
         status_filter = "" if include_superseded else 'FILTER(?status = "active")'
         query = f"""
@@ -182,6 +187,7 @@ ORDER BY ?claim
             }
 
     async def sparql_update(self, query: str) -> dict[str, Any]:
+        # Write raw SPARQL when a higher-level helper would just get in the way.
         return await self._call_tool("sparql_update", {"query": query}, cache_result=False)
 
     async def supersede_claims(self, claim_iris: list[str]) -> None:
@@ -201,6 +207,7 @@ WHERE {{
         await self.sparql_update(query)
 
     async def replace_entity_core(self, entity_id: str, *, canonical_label: str, canonical_description: str | None, canonical_type: str | None) -> None:
+        # Replace the entity's canonical fields without disturbing its claims.
         iri = entity_iri(entity_id)
         desc_insert = f'  <{iri}> atlas:canonicalDescription "{canonical_description.replace("\\", "\\\\").replace("\"", "\\\"")}" .\n' if canonical_description else ""
         type_insert = f"  <{iri}> atlas:hasCanonicalType atlas:{canonical_type} .\n" if canonical_type else ""