Lukas Goldschmidt 1 місяць тому
батько
коміт
6d58e75824
4 змінених файлів з 156 додано та 93 видалено
  1. 110 1
      app/atlas_store.py
  2. 7 1
      app/main.py
  3. 38 91
      app/resolve.py
  4. 1 0
      run.sh

+ 110 - 1
app/atlas_store.py

@@ -17,8 +17,11 @@ from __future__ import annotations
 import json
 import re
 import asyncio
+import os
 from typing import Any, Dict, List, Optional
 
+import logging
+
 from mcp import ClientSession
 from mcp.client.sse import sse_client
 
@@ -40,6 +43,10 @@ PREFIX xsd:        <{XSD}>
 PREFIX rdf:        <{RDF}>
 """
 
+DEFAULT_GRAPH_IRI = os.getenv("ATLAS_GRAPH_IRI", ATLAS_D)
+DEBUG_LOGS = os.getenv("ATLAS_DEBUG_LOGS", "false").lower() in {"1", "true", "yes", "on"}
+logger = logging.getLogger(__name__)
+
 
 # ---------------------------------------------------------------------------
 # Internal helpers
@@ -107,12 +114,114 @@ async def _sparql_select(endpoint: str, query: str) -> List[Dict[str, Any]]:
                         raise RuntimeError(f"sparql_query failed: {result}")
 
                 data = result.structuredContent if result.structuredContent is not None else result.content
+                if DEBUG_LOGS:
+                    if isinstance(data, dict):
+                        logger.info("sparql_select raw keys=%s", list(data.keys()))
+                    else:
+                        logger.info("sparql_select raw type=%s", type(data).__name__)
+
+                # Some MCP servers return content as a list of TextContent items.
+                if isinstance(data, list) and data:
+                    first = data[0]
+                    text = getattr(first, "text", None)
+                    if text:
+                        try:
+                            data = json.loads(text)
+                            if DEBUG_LOGS and isinstance(data, dict):
+                                logger.info("sparql_select decoded list->dict keys=%s", list(data.keys()))
+                        except Exception:
+                            if DEBUG_LOGS:
+                                logger.info("sparql_select could not decode list text as JSON")
+
                 if isinstance(data, dict):
-                    return data.get("results", {}).get("bindings", []) or []
+                    bindings = data.get("results", {}).get("bindings", []) or []
+                    if DEBUG_LOGS:
+                        logger.info("sparql_select extracted bindings=%s", len(bindings) if bindings is not None else 0)
+                    return bindings
                 return []
 
     return await _run()
 
+
+async def load_entity_by_subject(subject: str, endpoint: str, graph_iri: str = DEFAULT_GRAPH_IRI) -> Dict[str, Any] | None:
+    needle = _escape((subject or "").strip())
+    if not needle:
+        return None
+
+    label_query = f"""
+{PREFIXES}
+SELECT ?atlasId ?label ?type ?qid WHERE {{
+  VALUES ?needle {{ "{needle}" }}
+  GRAPH <{graph_iri}> {{
+    ?entity a atlas:Entity ;
+            atlas:atlasId ?atlasId ;
+            atlas:canonicalLabel ?label .
+    OPTIONAL {{ ?entity atlas:hasCanonicalType ?type . }}
+    OPTIONAL {{ ?entity atlas:hasIdentifier ?ident . ?ident atlas:scheme "wikidata-qid" ; atlas:value ?qid . }}
+    FILTER(LCASE(STR(?label)) = LCASE(STR(?needle)))
+  }}
+}}
+LIMIT 1
+""".strip()
+
+    alias_query = f"""
+{PREFIXES}
+SELECT ?atlasId ?label ?type ?qid ?alias WHERE {{
+  VALUES ?needle {{ "{needle}" }}
+  GRAPH <{graph_iri}> {{
+    ?entity a atlas:Entity ;
+            atlas:atlasId ?atlasId ;
+            atlas:aliasLabel ?alias .
+    OPTIONAL {{ ?entity atlas:canonicalLabel ?label . }}
+    OPTIONAL {{ ?entity atlas:hasCanonicalType ?type . }}
+    OPTIONAL {{ ?entity atlas:hasIdentifier ?ident . ?ident atlas:scheme "wikidata-qid" ; atlas:value ?qid . }}
+    FILTER(LCASE(STR(?alias)) = LCASE(STR(?needle)))
+  }}
+}}
+LIMIT 1
+""".strip()
+
+    if DEBUG_LOGS:
+        logger.info("store lookup by subject: needle=%s graph=%s", (subject or "").strip(), graph_iri)
+        logger.info("store label query=%s", label_query)
+
+    rows = await _sparql_select(endpoint, label_query)
+    if not rows:
+        if DEBUG_LOGS:
+            logger.info("store alias query=%s", alias_query)
+        rows = await _sparql_select(endpoint, alias_query)
+
+    if DEBUG_LOGS:
+        logger.info("store lookup rows=%s", len(rows) if rows else 0)
+    if not rows:
+        return None
+    row = rows[0]
+    type_value = row.get("type", {}).get("value")
+    if type_value and type_value.startswith(ATLAS):
+        type_value = f"atlas:{type_value.split('#', 1)[-1]}"
+    return {
+        "atlas_id": row.get("atlasId", {}).get("value"),
+        "label": row.get("label", {}).get("value"),
+        "type": type_value,
+        "wikidata_id": row.get("qid", {}).get("value"),
+        "alias": row.get("alias", {}).get("value"),
+    }
+
+
+
+
+async def save_entity_minimal(entity: Entity, endpoint: str, graph_iri: str = DEFAULT_GRAPH_IRI) -> None:
+    body = _build_insert_body(entity)
+    query = f"""
+{PREFIXES}
+INSERT DATA {{
+  GRAPH <{graph_iri}> {{
+{body}
+  }}
+}}
+""".strip()
+    await _sparql_update(endpoint, query)
+
 # ---------------------------------------------------------------------------
 # Save
 # ---------------------------------------------------------------------------

+ 7 - 1
app/main.py

@@ -4,13 +4,20 @@ from __future__ import annotations
 
 from datetime import datetime, timezone
 from typing import Dict
+import logging
+import os
 
 from fastapi import FastAPI
+from dotenv import load_dotenv
+
+load_dotenv()
 
 from .mcp_server import mcp
 
 START_TIME = datetime.now(timezone.utc)
 
+logging.basicConfig(level=logging.INFO, force=True)
+
 app = FastAPI(
     title="Atlas2-MCP",
     description="Atlas2 semantic resolution scaffold (single resolve tool).",
@@ -29,4 +36,3 @@ async def health() -> Dict[str, object]:
         "uptime_seconds": round(uptime_seconds, 2),
         "tools": ["resolve"],
     }
-

+ 38 - 91
app/resolve.py

@@ -2,18 +2,21 @@ from __future__ import annotations
 
 import hashlib
 import os
+import logging
 from dataclasses import dataclass
 from typing import Any
 
-from .atlas_model import CurateFlag, Entity, Identifier
-from .atlas_store import _sparql_select, _sparql_update
+from .atlas_model import Entity, Identifier
+from .atlas_store import load_entity_by_subject, save_entity_minimal
 from .wikidata import WikidataSearch
 
 
 ATLAS = "http://world.eu.org/atlas_ontology#"
-ATLAS_D = "http://world.eu.org/atlas_data#"
 DEFAULT_ENDPOINT = os.getenv("ATLAS_VIRTUOSO_MCP_SSE_URL", "http://192.168.0.249:8501/mcp/sse")
 DEFAULT_UPDATE_ENDPOINT = os.getenv("ATLAS_VIRTUOSO_MCP_SSE_URL", DEFAULT_ENDPOINT)
+DEBUG_LOGS = os.getenv("ATLAS_DEBUG_LOGS", "false").lower() in {"1", "true", "yes", "on"}
+
+logger = logging.getLogger(__name__)
 
 
 def _hash_id(subject: str) -> str:
@@ -21,40 +24,7 @@ def _hash_id(subject: str) -> str:
 
 
 def _entity_iri(atlas_id: str) -> str:
-    return f"<{ATLAS_D}entity_{atlas_id}>"
-
-
-def _escape_literal(value: str) -> str:
-    return value.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n")
-
-
-def _label_lookup_query(subject: str) -> str:
-    safe = _escape_literal(subject)
-    return f"""
-PREFIX atlas: <{ATLAS}>
-SELECT ?atlasId ?label ?type ?qid ?alias WHERE {{
-  VALUES ?needle {{ \"{safe}\" }}
-  {{
-    ?entity a atlas:Entity ;
-            atlas:atlasId ?atlasId ;
-            atlas:canonicalLabel ?label .
-    OPTIONAL {{ ?entity atlas:hasCanonicalType ?type . }}
-    OPTIONAL {{ ?entity atlas:hasIdentifier ?ident . ?ident atlas:scheme \"wikidata-qid\" ; atlas:value ?qid . }}
-    FILTER(LCASE(STR(?label)) = LCASE(?needle))
-  }}
-  UNION
-  {{
-    ?entity a atlas:Entity ;
-            atlas:atlasId ?atlasId ;
-            atlas:aliasLabel ?alias .
-    OPTIONAL {{ ?entity atlas:canonicalLabel ?label . }}
-    OPTIONAL {{ ?entity atlas:hasCanonicalType ?type . }}
-    OPTIONAL {{ ?entity atlas:hasIdentifier ?ident . ?ident atlas:scheme \"wikidata-qid\" ; atlas:value ?qid . }}
-    FILTER(LCASE(STR(?alias)) = LCASE(?needle))
-  }}
-}}
-LIMIT 1
-""".strip()
+    return f"atlas_data:entity_{atlas_id}"
 
 
 async def _wikidata_lookup(subject: str) -> dict[str, Any] | None:
@@ -64,53 +34,36 @@ async def _wikidata_lookup(subject: str) -> dict[str, Any] | None:
     return items[0] if items else None
 
 
+def _infer_atlas_type(label: str | None, description: str | None) -> str:
+    text = f"{label or ''} {description or ''}".lower()
+    if any(k in text for k in ["president", "person", "singer", "composer", "human", "actor", "writer"]):
+        return "atlas:Person"
+    if any(k in text for k in ["city", "town", "village", "country", "state", "location", "place"]):
+        return "atlas:Location"
+    if any(k in text for k in ["company", "organization", "organisation", "institution", "foundation", "band"]):
+        return "atlas:Organization"
+    return "atlas:Other"
+
+
 def _entity_from_wikidata(subject: str, wd: dict[str, Any]) -> Entity:
     atlas_id = _hash_id(subject)
     label = wd.get("label") or subject
     description = wd.get("description")
     qid = wd.get("id")
-    entity_type = wd.get("type") or "Thing"
+    entity_type = _infer_atlas_type(label, description)
 
     ent = Entity(
         id=atlas_id,
         label=label,
         description=description,
-        type=f"atlas:{entity_type}" if not entity_type.startswith("atlas:") else entity_type,
+        type=entity_type,
         aliases=[subject] if subject.lower() != label.lower() else [],
         identifiers=[Identifier(scheme="wikidata-qid", value=qid)] if qid else [],
-        needs_curation=False,
+        needs_curation=True,
     )
     return ent
 
 
-def _entity_to_turtle(entity: Entity) -> str:
-    lines = []
-    e = _entity_iri(entity.id)
-    lines.append(f"{e}")
-    lines.append("  a atlas:Entity ;")
-    lines.append(f'  atlas:atlasId "{_escape_literal(entity.id)}" ;')
-    lines.append(f'  atlas:canonicalLabel "{_escape_literal(entity.label)}"@en ;')
-    if entity.description:
-        lines.append(f'  atlas:canonicalDescription "{_escape_literal(entity.description)}"@en ;')
-    if entity.type:
-        lines.append(f"  atlas:hasCanonicalType {entity.type} ;")
-    for alias in entity.aliases:
-        lines.append(f'  atlas:aliasLabel "{_escape_literal(alias)}"@en ;')
-    for ident in entity.identifiers:
-        ident_iri = f"<{ATLAS_D}ident_{ident.scheme}_{_hash_id(ident.value)}>.".rstrip(".")
-        lines.append(f"  atlas:hasIdentifier {ident_iri} ;")
-    lines.append(f'  atlas:needsCuration "{str(entity.needs_curation).lower()}"^^xsd:boolean .')
-    lines.append("")
-    for ident in entity.identifiers:
-        ident_iri = f"<{ATLAS_D}ident_{ident.scheme}_{_hash_id(ident.value)}>"
-        lines.append(f"{ident_iri}")
-        lines.append("  a atlas:Identifier ;")
-        lines.append(f'  atlas:scheme "{_escape_literal(ident.scheme)}" ;')
-        lines.append(f'  atlas:value "{_escape_literal(ident.value)}" .')
-        lines.append("")
-    return "\n".join(lines)
-
-
 def _flatten_exception_details(exc: BaseException) -> list[str]:
     parts = [f"{type(exc).__name__}: {exc}"]
     nested = getattr(exc, "exceptions", None)
@@ -121,32 +74,11 @@ def _flatten_exception_details(exc: BaseException) -> list[str]:
 
 
 async def _persist_entity(entity: Entity) -> None:
-    ttl = _entity_to_turtle(entity)
-    query = f"""
-PREFIX atlas: <{ATLAS}>
-PREFIX atlas_data: <{ATLAS_D}>
-PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
-INSERT DATA {{
-  GRAPH <{ATLAS_D}> {{
-{ttl}
-  }}
-}}
-""".strip()
-    await _sparql_update(DEFAULT_UPDATE_ENDPOINT, query)
+    await save_entity_minimal(entity, DEFAULT_UPDATE_ENDPOINT)
 
 
 async def _load_entity(subject: str) -> dict[str, Any] | None:
-    rows = await _sparql_select(DEFAULT_ENDPOINT, _label_lookup_query(subject))
-    if not rows:
-        return None
-    row = rows[0]
-    return {
-        "atlas_id": row.get("atlasId", {}).get("value"),
-        "label": row.get("label", {}).get("value"),
-        "type": row.get("type", {}).get("value"),
-        "wikidata_id": row.get("qid", {}).get("value"),
-        "alias": row.get("alias", {}).get("value"),
-    }
+    return await load_entity_by_subject(subject, DEFAULT_ENDPOINT)
 
 
 @dataclass
@@ -164,8 +96,13 @@ class ResolveService:
             if not subject:
                 return {"status": "not_found"}
 
+            if DEBUG_LOGS:
+                logger.info("resolve start subject=%s", subject)
+
             stored = await self.load_entity_fn(subject)
             if stored:
+                if DEBUG_LOGS:
+                    logger.info("store hit subject=%s atlas_id=%s", subject, stored.get("atlas_id"))
                 return {
                     "status": "resolved",
                     "atlas_id": stored.get("atlas_id"),
@@ -177,9 +114,19 @@ class ResolveService:
 
             wd = await self.wikidata_lookup_fn(subject)
             if not wd:
+                if DEBUG_LOGS:
+                    logger.info("wikidata miss subject=%s", subject)
                 return {"status": "not_found"}
 
             entity = _entity_from_wikidata(subject, wd)
+            if DEBUG_LOGS:
+                logger.info(
+                    "wikidata hit subject=%s qid=%s atlas_id=%s type=%s",
+                    subject,
+                    wd.get("id"),
+                    entity.id,
+                    entity.type,
+                )
             await self.persist_entity_fn(entity)
 
             return {

+ 1 - 0
run.sh

@@ -18,6 +18,7 @@ PID_FILE="logs/server.pid"
 nohup uvicorn app.main:app \
   --host 0.0.0.0 \
   --port "${PORT}" \
+  --env-file .env \
   >"${LOG_FILE}" 2>&1 &
 
 echo $! >"${PID_FILE}"