Просмотр исходного кода

release: v0.0.2 harmonize payload timestamps and entity/claim model

Lukas Goldschmidt 1 месяц назад
Родитель
Сommit
36532ac5a5

+ 32 - 0
.gitignore

@@ -0,0 +1,32 @@
+# Python
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+*.so
+
+# Virtual envs
+.venv/
+venv/
+
+# Environment / secrets
+.env
+.env.*
+!.env.example
+
+# Tooling caches
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+
+# Runtime outputs
+logs/
+scripts/logs/
+*.log
+*.pid
+*.sqlite3
+
+# OS / editor
+.DS_Store
+.idea/
+.vscode/

+ 10 - 10
README.md

@@ -1,4 +1,4 @@
-# Atlas McP
+# Atlas MCP
 
 Atlas-MCP implements the semantic intelligence tier for the existing MCP stack. It follows the manifest’s mandate: Atlas is the only layer that resolves and enriches entities. For now, Atlas has exactly two public responsibilities: entity resolution and enrichment. The facts-mcp docs reinforce the same design pressure: keep the authoritative truth layer small, canonical, and explicit; Atlas should not blur into that role, but instead cooperate with it through clean graph contracts.
 
@@ -21,7 +21,7 @@ atlas-mcp/
 ├── README.md
 ├── PROJECT.md
 ├── requirements.txt
-├── gitignore
+├── .gitignore
 ├── app/
 │   ├── __init__.py
 │   └── main.py
@@ -33,15 +33,15 @@ atlas-mcp/
 └── MANIFEST.md ← existing architecture manifesto (reference)
 ```
 
-## v0.0.1 status
-Atlas v0.0.1 is the first stable slice of the entity-resolution pipeline:
-- MCP-first `resolve_entity` / `enrich_entity`
-- automatic persistence hook inside resolution
-- active-claims-only normal responses
-- debug responses with raw/derived claims and Turtle dump
-- claim-level provenance and Wikidata lookup
+## v0.0.2 status
+Atlas v0.0.2 establishes the new entity-centered canonical model:
+- entity core fields are stable (`atlas_id`, `canonical_label`, `entity_type`)
+- claims are attached to the entity and carry per-claim provenance/timestamps
+- `atlas_id` is now opaque/hash-based (no semantic parsing)
+- MID/QID are represented as identifier claims (not encoded in IDs)
+- trends/wikidata payload timestamp semantics are harmonized around `retrieved_at`
 
-See `RELEASE_NOTES_v0.0.1.md` for the full summary.
+See `RELEASE_NOTES_v0.0.2.md` for the full summary.
 
 ## Next steps
 * Keep the `/health` endpoint as a minimal service check on port `8550`.

+ 23 - 0
RELEASE_NOTES_v0.0.2.md

@@ -0,0 +1,23 @@
+# Atlas v0.0.2 Release Notes
+
+## Summary
+Atlas v0.0.2 introduces the first coherent entity-centered data contract for Atlas responses and triples.
+
+## Highlights
+- Canonical entity core stabilized around:
+  - `atlas_id`
+  - `canonical_label`
+  - `entity_type`
+- Claims are now attached to the entity as first-class statement objects.
+- Claim-level provenance is explicit and retained per claim.
+- `atlas_id` is now opaque/hash-based (semantic parsing removed).
+- MID/QID are represented as identifier claims.
+
+## Payload harmonization
+To align semantics across similar payload objects:
+- Google Trends payload now uses `retrieved_at` (previously `resolved_at`).
+- Wikidata payload keeps `retrieved_at`.
+- Both payloads expose `source` and timestamp fields with aligned meaning.
+
+## Notes
+This version intentionally prioritizes model coherence and contract clarity while Atlas is still establishing its stable baseline.

+ 68 - 38
app/atlas.py

@@ -6,9 +6,10 @@ from app.cache import EntityCache
 from app.entity_normalize import normalize_entity
 from app.models import (
     AtlasAlias,
+    AtlasClaim,
+    AtlasClaimObject,
     AtlasEntity,
     AtlasEnrichmentDataset,
-    AtlasIdentifier,
     AtlasProvenance,
 )
 from app.trends_resolution import resolve_entity_via_trends
@@ -61,66 +62,96 @@ async def resolve_entity(subject: str, context: str | None = None) -> AtlasEntit
 
 
 def _entity_from_resolution(subject: str, resolution: dict, classification: TypeClassification, wikidata: dict | None = None) -> AtlasEntity:
+    import hashlib
+
     canonical_label = (
         resolution.get("canonical_label")
         or resolution.get("normalized")
         or subject.strip()
     )
-    atlas_id = resolution.get("mid")
-    if atlas_id:
-        atlas_id = f"atlas:mid:{atlas_id.strip()}"
-    else:
-        slug = canonical_label.strip().lower().replace(" ", "-") or "entity"
-        atlas_id = f"atlas:{slug}"
 
-    identifiers = []
-    mid = resolution.get("mid")
-    if mid:
-        identifiers.append(
-            AtlasIdentifier(value=mid, source="google", identifier_type="mid")
-        )
-    if wikidata and wikidata.get("qid"):
-        identifiers.append(
-            AtlasIdentifier(value=wikidata["qid"], source="wikidata", identifier_type="qid")
-        )
+    canonical_type = (
+        classification.canonical_type
+        or resolution.get("type")
+        or "unknown"
+    )
 
-    provenance = [
+    # atlas_id is opaque identity: hash-part only, never semantic content.
+    stable_key = "|".join(
+        [
+            (resolution.get("mid") or "").strip(),
+            (wikidata or {}).get("qid") or "",
+            canonical_label.strip().lower(),
+        ]
+    )
+    digest = hashlib.sha1(stable_key.encode("utf-8")).hexdigest()[:16]
+    atlas_id = f"atlas:{digest}"
+
+    trends_prov = AtlasProvenance(
+        source=resolution.get("source") or "resolver",
+        retrieval_method="trends-resolution",
+        confidence=0.9 if resolution.get("mid") else 0.3,
+        retrieved_at=resolution.get("retrieved_at"),
+    )
+    wikidata_prov = (
         AtlasProvenance(
-            source=resolution.get("source") or "resolver",
-            retrieval_method="trends-resolution",
-            confidence=0.9 if resolution.get("mid") else 0.3,
-            retrieved_at=resolution.get("resolved_at"),
+            source="wikidata",
+            retrieval_method="wbsearchentities + entitydata",
+            confidence=0.99,
+            retrieved_at=wikidata.get("retrieved_at"),
+        )
+        if wikidata and wikidata.get("qid")
+        else None
+    )
+
+    claims: list[AtlasClaim] = []
+    mid = resolution.get("mid")
+    if mid:
+        claims.append(
+            AtlasClaim(
+                claim_id=f"clm_raw_ident_mid_{mid}",
+                subject=atlas_id,
+                predicate="atlas:hasIdentifier",
+                object=AtlasClaimObject(kind="identifier", id_type="mid", value=mid),
+                layer="raw",
+                provenance=trends_prov,
+            )
         )
-    ]
-    if classification.provenance:
-        provenance.append(classification.provenance)
     if wikidata and wikidata.get("qid"):
-        provenance.append(
-            AtlasProvenance(
-                source="wikidata",
-                retrieval_method="wbsearchentities + entitydata",
-                confidence=0.99,
-                retrieved_at=wikidata.get("retrieved_at"),
+        claims.append(
+            AtlasClaim(
+                claim_id=f"clm_raw_ident_qid_{wikidata['qid']}",
+                subject=atlas_id,
+                predicate="atlas:hasIdentifier",
+                object=AtlasClaimObject(kind="identifier", id_type="qid", value=wikidata["qid"]),
+                layer="raw",
+                provenance=wikidata_prov,
             )
         )
 
-    canonical_type = (
-        classification.canonical_type
-        or resolution.get("type")
-        or "unknown"
+    claims.append(
+        AtlasClaim(
+            claim_id="clm_drv_canonical_type",
+            subject=atlas_id,
+            predicate="atlas:hasCanonicalType",
+            object=AtlasClaimObject(kind="type", value=f"atlas:{canonical_type}"),
+            layer="derived",
+            provenance=classification.provenance,
+        )
     )
 
     payload = dict(resolution)
     if wikidata:
         payload["wikidata"] = {
             "status": "ok",
+            "source": "wikidata",
             "qid": wikidata.get("qid"),
             "label": wikidata.get("label"),
             "description": wikidata.get("description"),
             "retrieved_at": wikidata.get("retrieved_at"),
         }
     else:
-        payload["wikidata"] = {"status": "missing"}
+        payload["wikidata"] = {"status": "missing", "source": "wikidata", "retrieved_at": None}
 
     return AtlasEntity(
         atlas_id=atlas_id,
@@ -128,8 +159,7 @@ def _entity_from_resolution(subject: str, resolution: dict, classification: Type
         canonical_description=(wikidata or {}).get("description"),
         entity_type=canonical_type,
         aliases=[AtlasAlias(label=subject.strip() or canonical_label)],
-        identifiers=identifiers,
-        provenance=provenance,
+        claims=claims,
         raw_payload=payload,
         needs_curation=classification.needs_curation,
     )

+ 1 - 1
app/cache.py

@@ -31,7 +31,7 @@ class EntityCache:
         tokens.add(entity.atlas_id)
         for alias in entity.aliases:
             tokens.add(alias.label)
-        mid = entity.raw_payload.get("mid") if isinstance(entity.raw_payload, dict) else None
+        mid = entity.active_identifier("mid")
         if mid:
             tokens.add(mid)
         for token in tokens:

+ 20 - 87
app/claims.py

@@ -4,7 +4,7 @@ from __future__ import annotations
 
 from typing import Any
 
-from app.models import AtlasEntity, AtlasProvenance
+from app.models import AtlasClaim, AtlasEntity, AtlasProvenance
 
 
 def _prov_to_dict(p: AtlasProvenance | None) -> dict[str, Any] | None:
@@ -18,20 +18,6 @@ def _prov_to_dict(p: AtlasProvenance | None) -> dict[str, Any] | None:
     }
 
 
-def _pick_provenance(entity: AtlasEntity, *, source_hint: str | None = None, method_hint: str | None = None) -> AtlasProvenance | None:
-    if not entity.provenance:
-        return None
-    if method_hint:
-        for p in entity.provenance:
-            if p.retrieval_method == method_hint:
-                return p
-    if source_hint:
-        for p in entity.provenance:
-            if p.source == source_hint:
-                return p
-    return entity.provenance[0]
-
-
 def _id_type_resource(identifier_type: str) -> str:
     if identifier_type == "mid":
         return "atlas:Mid"
@@ -40,78 +26,25 @@ def _id_type_resource(identifier_type: str) -> str:
     return f"atlas:{identifier_type}"
 
 
-def build_claim_sets(entity: AtlasEntity) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
-    raw_claims: list[dict[str, Any]] = []
-    derived_claims: list[dict[str, Any]] = []
-
-    for ident in entity.identifiers:
-        prov = _pick_provenance(entity, source_hint=ident.source)
-        raw_claims.append(
-            {
-                "claim_id": f"clm_raw_ident_{ident.identifier_type}_{ident.value}",
-                "layer": "raw",
-                "subject": entity.atlas_id,
-                "predicate": "atlas:hasIdentifier",
-                "object": {
-                    "kind": "identifier",
-                    "id_type": _id_type_resource(ident.identifier_type),
-                    "value": ident.value,
-                },
-                "provenance": _prov_to_dict(prov),
-            }
-        )
-
-    for alias in entity.aliases:
-        raw_claims.append(
-            {
-                "claim_id": f"clm_raw_alias_{alias.label}",
-                "layer": "raw",
-                "subject": entity.atlas_id,
-                "predicate": "atlas:hasAlias",
-                "object": {"kind": "alias", "value": alias.label},
-                "provenance": _prov_to_dict(_pick_provenance(entity, method_hint="trends-resolution")),
-            }
-        )
-
-    wd = entity.raw_payload.get("wikidata") or {}
-    if wd.get("status") == "ok":
-        derived_claims.append(
-            {
-                "claim_id": "clm_drv_wikidata_type",
-                "layer": "derived",
-                "subject": entity.atlas_id,
-                "predicate": "atlas:hasExternalType",
-                "object": {"kind": "external_type", "value": "atlas:WikidataType_Q5", "qid": wd.get("qid")},
-                "provenance": {
-                    "source": "wikidata",
-                    "method": "wbsearchentities + entitydata",
-                    "confidence": 0.99,
-                    "retrieved_at": wd.get("retrieved_at"),
-                },
-            }
-        )
-    else:
-        raw_claims.append(
-            {
-                "claim_id": "clm_raw_wikidata_missing",
-                "layer": "raw",
-                "subject": entity.atlas_id,
-                "predicate": "atlas:wikidataLookupStatus",
-                "object": {"kind": "literal", "value": wd.get("status", "missing")},
-                "provenance": _prov_to_dict(_pick_provenance(entity, method_hint="trends-resolution")),
-            }
-        )
+def _claim_to_dict(claim: AtlasClaim) -> dict[str, Any]:
+    obj: dict[str, Any] = {
+        "kind": claim.object.kind,
+        "value": claim.object.value,
+    }
+    if claim.object.id_type:
+        obj["id_type"] = _id_type_resource(claim.object.id_type)
+    return {
+        "claim_id": claim.claim_id,
+        "layer": claim.layer,
+        "status": claim.status,
+        "subject": claim.subject,
+        "predicate": claim.predicate,
+        "object": obj,
+        "provenance": _prov_to_dict(claim.provenance),
+    }
 
-    type_prov = _pick_provenance(entity, method_hint="type-classification")
-    derived_claims.append(
-        {
-            "claim_id": "clm_drv_canonical_type",
-            "layer": "derived",
-            "subject": entity.atlas_id,
-            "predicate": "atlas:hasCanonicalType",
-            "object": {"kind": "type", "value": f"atlas:{entity.entity_type}"},
-            "provenance": _prov_to_dict(type_prov),
-        }
-    )
 
+def build_claim_sets(entity: AtlasEntity) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
+    raw_claims = [_claim_to_dict(c) for c in entity.claims if c.layer == "raw"]
+    derived_claims = [_claim_to_dict(c) for c in entity.claims if c.layer == "derived"]
     return raw_claims, derived_claims

+ 1 - 1
app/main.py

@@ -14,7 +14,7 @@ START_TIME = datetime.now(timezone.utc)
 app = FastAPI(
     title="Atlas-MCP",
     description="Semantic intelligence layer for entity resolution and enrichment.",
-    version="0.1.0",
+    version="0.0.2",
 )
 app.mount("/mcp", mcp.sse_app())
 

+ 0 - 17
app/mcp_server.py

@@ -29,23 +29,6 @@ async def resolve_entity_tool(subject: str, context: str | None = None, debug: b
         "entity_type": entity.entity_type,
         "needs_curation": entity.needs_curation,
         "aliases": [alias.label for alias in entity.aliases],
-        "identifiers": [
-            {
-                "value": identifier.value,
-                "source": identifier.source,
-                "identifier_type": identifier.identifier_type,
-            }
-            for identifier in entity.identifiers
-        ],
-        "provenance": [
-            {
-                "source": provenance.source,
-                "retrieval_method": provenance.retrieval_method,
-                "confidence": provenance.confidence,
-                "retrieved_at": provenance.retrieved_at,
-            }
-            for provenance in entity.provenance
-        ],
         "g_trends_payload": {k: v for k, v in entity.raw_payload.items() if k != "wikidata"},
         "wikidata_payload": (
             entity.raw_payload.get("wikidata")

+ 30 - 2
app/models.py

@@ -26,6 +26,24 @@ class AtlasProvenance:
     retrieved_at: Optional[str] = None
 
 
+@dataclass
+class AtlasClaimObject:
+    kind: str
+    value: str
+    id_type: str | None = None
+
+
+@dataclass
+class AtlasClaim:
+    claim_id: str
+    subject: str
+    predicate: str
+    object: AtlasClaimObject
+    layer: str
+    status: str = "active"
+    provenance: AtlasProvenance | None = None
+
+
 @dataclass
 class AtlasEntity:
     atlas_id: str
@@ -33,11 +51,21 @@ class AtlasEntity:
     canonical_description: str | None = None
     entity_type: str = "unknown"
     aliases: List[AtlasAlias] = field(default_factory=list)
-    identifiers: List[AtlasIdentifier] = field(default_factory=list)
-    provenance: List[AtlasProvenance] = field(default_factory=list)
+    claims: List[AtlasClaim] = field(default_factory=list)
     raw_payload: Dict[str, Any] = field(default_factory=dict)
     needs_curation: bool = False
 
+    def active_identifier(self, identifier_type: str) -> str | None:
+        for claim in self.claims:
+            if (
+                claim.status == "active"
+                and claim.predicate == "atlas:hasIdentifier"
+                and claim.object.kind == "identifier"
+                and claim.object.id_type == identifier_type
+            ):
+                return claim.object.value
+        return None
+
 
 @dataclass
 class AtlasEnrichmentDataset:

+ 4 - 4
app/trends_resolution.py

@@ -61,7 +61,7 @@ def _provider() -> GoogleTrendsProvider | None:
         return None
 
 
-def _resolved_at() -> str:
+def _retrieved_at() -> str:
     return datetime.now(timezone.utc).isoformat()
 
 
@@ -77,7 +77,7 @@ def resolve_entity_via_trends(subject: str) -> dict[str, Any]:
             "type": None,
             "candidates": [],
             "source": "empty",
-            "resolved_at": _resolved_at(),
+            "retrieved_at": _retrieved_at(),
         }
 
     provider = _provider()
@@ -93,7 +93,7 @@ def resolve_entity_via_trends(subject: str) -> dict[str, Any]:
                 "type": best.get("type") if best else None,
                 "candidates": suggestions,
                 "source": "google-trends",
-                "resolved_at": _resolved_at(),
+                "retrieved_at": _retrieved_at(),
             }
         except Exception:
             pass
@@ -106,5 +106,5 @@ def resolve_entity_via_trends(subject: str) -> dict[str, Any]:
         "type": None,
         "candidates": [],
         "source": "fallback",
-        "resolved_at": _resolved_at(),
+        "retrieved_at": _retrieved_at(),
     }

+ 58 - 113
app/triple_export.py

@@ -4,7 +4,7 @@ from __future__ import annotations
 
 import json
 
-from app.models import AtlasEntity, AtlasProvenance
+from app.models import AtlasClaim, AtlasEntity
 
 PREFIXES = """@prefix atlas: <http://world.eu.org/atlas_ontology#> .
 @prefix atlas_data: <http://world.eu.org/atlas_data#> .
@@ -34,78 +34,56 @@ def _alias_node(alias_label: str) -> str:
     return f"atlas_data:alias_{_safe_fragment(alias_label)}"
 
 
-def _identifier_node(identifier_value: str) -> str:
-    return f"atlas_data:ident_{_safe_fragment(identifier_value)}"
+def _claim_node(claim: AtlasClaim) -> str:
+    return f"atlas_data:claim_{_safe_fragment(claim.claim_id)}"
 
 
-def _provenance_node(source: str, retrieved_at: str | None, retrieval_method: str) -> str:
-    parts = [source, retrieval_method, retrieved_at or ""]
+def _provenance_node(claim: AtlasClaim) -> str:
+    prov = claim.provenance
+    if prov is None:
+        return ""
+    parts = [claim.claim_id, prov.source, prov.retrieval_method, prov.retrieved_at or ""]
     return f"atlas_data:prov_{_safe_fragment('_'.join(parts))}"
 
 
-def _type_assertion_node(entity: AtlasEntity, source: str) -> str:
-    return f"atlas_data:typeassert_{_safe_fragment(entity.atlas_id)}_{_safe_fragment(source)}"
-
-
 def _literal(text: str) -> str:
     return text.replace("\\", "\\\\").replace('"', '\\"')
 
 
-def _identifier_type_resource(identifier_type: str) -> str:
-    kind = _safe_fragment(identifier_type)
-    if kind == "mid":
-        return "atlas:Mid"
-    if kind in {"qid", "wikidata_qid", "wikidataqid"}:
-        return "atlas:WikidataQID"
-    return f"atlas:{kind.capitalize()}"
-
-
-def _pick_provenance(entity: AtlasEntity, source_hint: str | None = None, method_hint: str | None = None) -> AtlasProvenance | None:
-    if not entity.provenance:
-        return None
-    if method_hint:
-        for p in entity.provenance:
-            if p.retrieval_method == method_hint:
-                return p
-    if source_hint:
-        for p in entity.provenance:
-            if p.source == source_hint:
-                return p
-    return entity.provenance[0]
+def _claim_object_iri(claim: AtlasClaim) -> str | None:
+    if claim.object.kind == "type":
+        return claim.object.value
+    if claim.object.kind == "identifier" and claim.object.id_type:
+        return f"atlas_data:ident_{_safe_fragment(claim.object.id_type + '_' + claim.object.value)}"
+    return None
 
 
 def entity_to_turtle(entity: AtlasEntity) -> str:
     lines: list[str] = [PREFIXES]
     subject = _entity_node(entity)
 
-    claim_nodes = [f"atlas_data:claim_ident_{_safe_fragment(i.value)}" for i in entity.identifiers]
-    if entity.entity_type and entity.entity_type != "unknown":
-        claim_nodes.append(f"atlas_data:claim_type_{_safe_fragment(entity.atlas_id)}")
-
     lines.append(f"{subject} a atlas:Entity ;")
     lines.append(f'  atlas:canonicalLabel "{_literal(entity.canonical_label)}" ;')
     if entity.canonical_description:
         lines.append(f'  atlas:canonicalDescription "{_literal(entity.canonical_description)}" ;')
+    if entity.entity_type and entity.entity_type != "unknown":
+        lines.append(f"  atlas:hasCanonicalType atlas:{_safe_fragment(entity.entity_type).capitalize()} ;")
 
-    # Lean raw payload persistence (as JSON strings)
     wd = entity.raw_payload.get("wikidata") if isinstance(entity.raw_payload, dict) else None
     if isinstance(wd, dict) and wd.get("status") == "ok":
         lines.append(f'  atlas:rawWikidataJson "{_literal(json.dumps(wd, ensure_ascii=False))}"^^xsd:string ;')
 
-    trends_payload = entity.raw_payload.get("g_trends_payload") or {}
-    # In our current model, trends live under raw_payload keys directly (non-wikidata)
     if isinstance(entity.raw_payload, dict):
         trends_payload = {k: v for k, v in entity.raw_payload.items() if k != "wikidata"}
-    if isinstance(trends_payload, dict) and trends_payload:
-        lines.append(f'  atlas:rawTrendsJson "{_literal(json.dumps(trends_payload, ensure_ascii=False))}"^^xsd:string ;')
-    if entity.entity_type and entity.entity_type != "unknown":
-        lines.append(f"  atlas:hasCanonicalType atlas:{_safe_fragment(entity.entity_type).capitalize()} ;")
+        if trends_payload:
+            lines.append(f'  atlas:rawTrendsJson "{_literal(json.dumps(trends_payload, ensure_ascii=False))}"^^xsd:string ;')
+
     for alias in entity.aliases:
         lines.append(f"  atlas:hasAlias {_alias_node(alias.label)} ;")
-    for ident in entity.identifiers:
-        lines.append(f"  atlas:hasIdentifier {_identifier_node(ident.value)} ;")
-    for claim_node in claim_nodes:
-        lines.append(f"  atlas:hasClaim {claim_node} ;")
+
+    for claim in entity.claims:
+        lines.append(f"  atlas:hasClaim {_claim_node(claim)} ;")
+
     lines.append(f"  atlas:needsCuration {'true' if entity.needs_curation else 'false'} .")
     lines.append("")
 
@@ -116,82 +94,49 @@ def entity_to_turtle(entity: AtlasEntity) -> str:
         lines.append(f"  atlas:resolvedTo {subject} .")
         lines.append("")
 
-    for ident in entity.identifiers:
-        ident_node = _identifier_node(ident.value)
+    # Materialize identifier resources from identifier claims.
+    for claim in entity.claims:
+        if claim.predicate != "atlas:hasIdentifier" or claim.object.kind != "identifier":
+            continue
+        ident_node = _claim_object_iri(claim)
+        if not ident_node:
+            continue
+        id_type = claim.object.id_type or "unknown"
+        id_type_iri = "atlas:Mid" if id_type == "mid" else ("atlas:WikidataQID" if id_type == "qid" else f"atlas:{_safe_fragment(id_type).capitalize()}")
         lines.append(f"{ident_node} a atlas:Identifier ;")
-        lines.append(f'  atlas:identifierValue "{_literal(ident.value)}" ;')
-        lines.append(f'  atlas:identifierSource "{_literal(ident.source)}" ;')
-        lines.append(f"  atlas:identifierType {_identifier_type_resource(ident.identifier_type)} ;")
-        prov = _pick_provenance(entity, source_hint=ident.source)
-        if prov:
-            lines.append(f"  atlas:hasIdentifierProvenance {_provenance_node(prov.source, prov.retrieved_at, prov.retrieval_method)} .")
-        else:
-            lines[-1] = lines[-1].rstrip(" ;") + " ."
+        lines.append(f'  atlas:identifierValue "{_literal(claim.object.value)}" ;')
+        lines.append(f'  atlas:identifierType {id_type_iri} .')
         lines.append("")
 
-    for prov in entity.provenance:
-        prov_node = _provenance_node(prov.source, prov.retrieved_at, prov.retrieval_method)
-        lines.append(f"{prov_node} a atlas:Provenance ;")
-        lines.append(f'  atlas:provenanceSource "{_literal(prov.source)}" ;')
-        lines.append(f'  atlas:retrievalMethod "{_literal(prov.retrieval_method)}" ;')
-        lines.append(f'  atlas:confidence "{prov.confidence}"^^xsd:decimal ;')
-        if prov.retrieved_at:
-            lines.append(f'  atlas:retrievedAt "{_literal(prov.retrieved_at)}"^^xsd:dateTime .')
-        else:
-            lines[-1] = lines[-1].rstrip(" ;") + " ."
-        lines.append("")
-
-    wd = entity.raw_payload.get("wikidata") or {}
-    if wd.get("status") == "ok":
-        typeassert_node = _type_assertion_node(entity, "wikidata")
-        lines.append(f"{typeassert_node} a atlas:TypeAssertion ;")
-        lines.append("  atlas:assertedType atlas:WikidataType_Q5 ;")
-        prov = _pick_provenance(entity, source_hint="wikidata")
-        if prov:
-            lines.append(f"  atlas:hasAssertionProvenance {_provenance_node(prov.source, prov.retrieved_at, prov.retrieval_method)} ;")
-        lines.append('  atlas:assertionReason "wikidata instance-of" .')
-        lines.append("")
-
-    if entity.entity_type and entity.entity_type != "unknown":
-        typeassert_node = _type_assertion_node(entity, "canonical")
-        lines.append(f"{typeassert_node} a atlas:TypeAssertion ;")
-        lines.append(f"  atlas:assertedType atlas:{_safe_fragment(entity.entity_type).capitalize()} ;")
-        prov = _pick_provenance(entity, method_hint="type-classification")
-        if prov:
-            lines.append(f"  atlas:hasAssertionProvenance {_provenance_node(prov.source, prov.retrieved_at, prov.retrieval_method)} ;")
-        lines.append('  atlas:assertionReason "canonical type adjudication" .')
-        lines.append("")
-
-    # Claim nodes with explicit claim-object semantics
-    for ident in entity.identifiers:
-        claim_node = f"atlas_data:claim_ident_{_safe_fragment(ident.value)}"
-        ident_node = _identifier_node(ident.value)
-        prov = _pick_provenance(entity, source_hint=ident.source)
+    for claim in entity.claims:
+        claim_node = _claim_node(claim)
         lines.append(f"{claim_node} a atlas:Claim ;")
         lines.append(f"  atlas:claimSubjectIri {subject} ;")
-        lines.append('  atlas:claimPredicate "atlas:hasIdentifier" ;')
-        lines.append(f"  atlas:claimObjectIri {ident_node} ;")
-        lines.append('  atlas:claimLayer "raw" ;')
-        lines.append('  atlas:claimStatus "active" ;')
-        if prov:
-            lines.append(f"  atlas:hasProvenance {_provenance_node(prov.source, prov.retrieved_at, prov.retrieval_method)} .")
+        lines.append(f'  atlas:claimPredicate "{_literal(claim.predicate)}" ;')
+        obj_iri = _claim_object_iri(claim)
+        if obj_iri:
+            lines.append(f"  atlas:claimObjectIri {obj_iri} ;")
         else:
-            lines[-1] = lines[-1].rstrip(" ;") + " ."
-        lines.append("")
-
-    if entity.entity_type and entity.entity_type != "unknown":
-        claim_node = f"atlas_data:claim_type_{_safe_fragment(entity.atlas_id)}"
-        prov = _pick_provenance(entity, method_hint="type-classification")
-        lines.append(f"{claim_node} a atlas:Claim ;")
-        lines.append(f"  atlas:claimSubjectIri {subject} ;")
-        lines.append('  atlas:claimPredicate "atlas:hasCanonicalType" ;')
-        lines.append(f"  atlas:claimObjectIri atlas:{_safe_fragment(entity.entity_type).capitalize()} ;")
-        lines.append('  atlas:claimLayer "derived" ;')
-        lines.append('  atlas:claimStatus "active" ;')
-        if prov:
-            lines.append(f"  atlas:hasProvenance {_provenance_node(prov.source, prov.retrieved_at, prov.retrieval_method)} .")
+            lines.append(f'  atlas:claimObjectLiteral "{_literal(claim.object.value)}" ;')
+        lines.append(f'  atlas:claimLayer "{_literal(claim.layer)}" ;')
+        lines.append(f'  atlas:claimStatus "{_literal(claim.status)}" ;')
+        prov_node = _provenance_node(claim)
+        if prov_node:
+            lines.append(f"  atlas:hasProvenance {prov_node} .")
         else:
             lines[-1] = lines[-1].rstrip(" ;") + " ."
         lines.append("")
 
+        if claim.provenance:
+            prov = claim.provenance
+            lines.append(f"{prov_node} a atlas:Provenance ;")
+            lines.append(f'  atlas:provenanceSource "{_literal(prov.source)}" ;')
+            lines.append(f'  atlas:retrievalMethod "{_literal(prov.retrieval_method)}" ;')
+            lines.append(f'  atlas:confidence "{prov.confidence}"^^xsd:decimal ;')
+            if prov.retrieved_at:
+                lines.append(f'  atlas:retrievedAt "{_literal(prov.retrieved_at)}"^^xsd:dateTime .')
+            else:
+                lines[-1] = lines[-1].rstrip(" ;") + " ."
+            lines.append("")
+
     return "\n".join(lines).strip() + "\n"

+ 31 - 14
app/virtuoso_store.py

@@ -10,7 +10,7 @@ from typing import Optional
 from mcp import ClientSession
 from mcp.client.sse import sse_client
 
-from app.models import AtlasAlias, AtlasEntity, AtlasIdentifier, AtlasProvenance
+from app.models import AtlasAlias, AtlasClaim, AtlasClaimObject, AtlasEntity, AtlasProvenance
 
 VIRTUOSO_MCP_SSE_URL = os.getenv("ATLAS_VIRTUOSO_MCP_SSE_URL", "http://192.168.0.249:8501/mcp/sse")
 VIRTUOSO_MCP_TIMEOUT = float(os.getenv("ATLAS_VIRTUOSO_MCP_TIMEOUT", "10"))
@@ -135,35 +135,52 @@ def _entity_from_binding(binding: dict) -> AtlasEntity:
     if entity_type.startswith("http://world.eu.org/atlas_ontology#"):
         entity_type = entity_type.split("#", 1)[-1]
     mid = binding.get("mid", {}).get("value")
-    identifiers = []
-    if mid:
-        identifiers.append(AtlasIdentifier(value=mid, source="virtuoso", identifier_type="mid"))
 
     desc = binding.get("desc", {}).get("value")
     raw_wd = binding.get("rawWd", {}).get("value")
     raw_trends = binding.get("rawTrends", {}).get("value")
 
-    atlas_id = f"atlas:mid:{mid}" if mid else f"atlas:{label.strip().lower().replace(' ', '-') }"
-    provenance = [
-        AtlasProvenance(
-            source="virtuoso-cache",
-            retrieval_method="sparql",
-            confidence=0.95,
+    atlas_id = entity_uri.split("#", 1)[-1].replace("entity_", "atlas:") if "#" in entity_uri else f"atlas:{label.strip().lower().replace(' ', '-') }"
+    base_prov = AtlasProvenance(
+        source="virtuoso-cache",
+        retrieval_method="sparql",
+        confidence=0.95,
+    )
+    claims: list[AtlasClaim] = []
+    if mid:
+        claims.append(
+            AtlasClaim(
+                claim_id=f"clm_raw_ident_mid_{mid}",
+                subject=atlas_id,
+                predicate="atlas:hasIdentifier",
+                object=AtlasClaimObject(kind="identifier", id_type="mid", value=mid),
+                layer="raw",
+                provenance=base_prov,
+            )
         )
-    ]
+    if entity_type and entity_type != "unknown":
+        claims.append(
+            AtlasClaim(
+                claim_id="clm_drv_canonical_type",
+                subject=atlas_id,
+                predicate="atlas:hasCanonicalType",
+                object=AtlasClaimObject(kind="type", value=f"atlas:{entity_type}"),
+                layer="derived",
+                provenance=base_prov,
+            )
+        )
+
     return AtlasEntity(
         atlas_id=atlas_id,
         canonical_label=label or entity_uri,
         canonical_description=desc,
         entity_type=entity_type or "unknown",
         aliases=[AtlasAlias(label=label or entity_uri)],
-        identifiers=identifiers,
-        provenance=provenance,
+        claims=claims,
         raw_payload={
             "source": "virtuoso",
             "raw": label or entity_uri,
             "normalized": (label or entity_uri),
-            "mid": mid,
             "wikidata": (json.loads(raw_wd) if raw_wd else {"status": "missing"}),
             **(json.loads(raw_trends) if raw_trends else {}),
         },

+ 0 - 12
gitignore

@@ -1,12 +0,0 @@
-.venv/
-__pycache__/
-*.pyc
-*.pyo
-*.pyd
-*.log
-*.sqlite3
-.env
-.env.*
-.DS_Store
-.idea/
-.vscode/

+ 19 - 10
tests/test_atlas_contracts.py

@@ -2,7 +2,7 @@ import pytest
 
 import app.atlas as atlas_module
 from app.atlas import enrich_entity, resolve_entity
-from app.models import AtlasAlias, AtlasEntity, AtlasIdentifier, AtlasProvenance
+from app.models import AtlasAlias, AtlasClaim, AtlasClaimObject, AtlasEntity, AtlasProvenance
 from app.type_classifier import TypeClassification
 
 
@@ -11,9 +11,10 @@ async def test_resolve_entity_returns_canonical_structure():
     entity = await resolve_entity("Trump")
 
     assert entity.atlas_id.startswith("atlas:")
+    assert len(entity.atlas_id) > 10
     assert entity.canonical_label
     assert entity.aliases[0].label.lower() == "trump" or entity.aliases[0].label.lower() == "donald trump"
-    assert entity.provenance
+    assert entity.claims
     assert entity.raw_payload["raw"] == "Trump"
 
 
@@ -30,18 +31,26 @@ async def test_enrich_entity_returns_dataset_shape():
 
 def test_internal_models_support_identity_and_provenance():
     entity = AtlasEntity(
-        atlas_id="atlas:donald-trump",
+        atlas_id="atlas:abcd1234abcd1234",
         canonical_label="Donald Trump",
         entity_type="person",
         aliases=[AtlasAlias(label="Trump")],
-        identifiers=[AtlasIdentifier(value="Q22686", source="wikidata", identifier_type="wikidata-qid")],
-        provenance=[AtlasProvenance(source="google-trends", retrieval_method="entity-resolution", confidence=0.93)],
+        claims=[
+            AtlasClaim(
+                claim_id="clm_raw_ident_qid_Q22686",
+                subject="atlas:abcd1234abcd1234",
+                predicate="atlas:hasIdentifier",
+                object=AtlasClaimObject(kind="identifier", id_type="qid", value="Q22686"),
+                layer="raw",
+                provenance=AtlasProvenance(source="google-trends", retrieval_method="entity-resolution", confidence=0.93),
+            )
+        ],
     )
 
-    assert entity.atlas_id == "atlas:donald-trump"
+    assert entity.atlas_id == "atlas:abcd1234abcd1234"
     assert entity.aliases[0].label == "Trump"
-    assert entity.identifiers[0].value == "Q22686"
-    assert entity.provenance[0].source == "google-trends"
+    assert entity.claims[0].object.value == "Q22686"
+    assert entity.claims[0].provenance.source == "google-trends"
 
 
 @pytest.mark.anyio
@@ -59,7 +68,7 @@ async def test_resolve_entity_passes_context_to_classifier(monkeypatch):
             "mid": None,
             "type": "Person",
             "source": "resolver",
-            "resolved_at": "2026-04-03T00:00:00Z",
+            "retrieved_at": "2026-04-03T00:00:00Z",
             "candidates": [],
             "raw": subject,
         }
@@ -112,7 +121,7 @@ async def test_resolve_entity_marks_needs_curation(monkeypatch):
             "mid": None,
             "type": "Unknown",
             "source": "resolver",
-            "resolved_at": "2026-04-03T00:00:00Z",
+            "retrieved_at": "2026-04-03T00:00:00Z",
             "candidates": [],
             "raw": subject,
         }

+ 27 - 10
tests/test_claims.py

@@ -1,22 +1,39 @@
 from app.claims import build_claim_sets
-from app.models import AtlasAlias, AtlasEntity, AtlasIdentifier, AtlasProvenance
+from app.models import AtlasAlias, AtlasClaim, AtlasClaimObject, AtlasEntity, AtlasProvenance
 
 
 def test_build_claim_sets_attaches_provenance_per_claim():
     entity = AtlasEntity(
-        atlas_id="atlas:mid:/m/0cqt90",
+        atlas_id="atlas:0cqt90abcd123456",
         canonical_label="Donald Trump",
         canonical_description="45th and 47th U.S. President",
         entity_type="Person",
         aliases=[AtlasAlias(label="Donald Trump")],
-        identifiers=[
-            AtlasIdentifier(value="/m/0cqt90", source="google", identifier_type="mid"),
-            AtlasIdentifier(value="Q22686", source="wikidata", identifier_type="qid"),
-        ],
-        provenance=[
-            AtlasProvenance(source="google", retrieval_method="trends-resolution", confidence=0.9, retrieved_at="2026-04-03T00:00:00Z"),
-            AtlasProvenance(source="wikidata", retrieval_method="wbsearchentities + entitydata", confidence=0.99, retrieved_at="2026-04-03T00:00:01Z"),
-            AtlasProvenance(source="openai-llm", retrieval_method="type-classification", confidence=1.0, retrieved_at="2026-04-03T00:00:02Z"),
+        claims=[
+            AtlasClaim(
+                claim_id="clm_raw_ident_mid_/m/0cqt90",
+                subject="atlas:0cqt90abcd123456",
+                predicate="atlas:hasIdentifier",
+                object=AtlasClaimObject(kind="identifier", id_type="mid", value="/m/0cqt90"),
+                layer="raw",
+                provenance=AtlasProvenance(source="google", retrieval_method="trends-resolution", confidence=0.9, retrieved_at="2026-04-03T00:00:00Z"),
+            ),
+            AtlasClaim(
+                claim_id="clm_raw_ident_qid_Q22686",
+                subject="atlas:0cqt90abcd123456",
+                predicate="atlas:hasIdentifier",
+                object=AtlasClaimObject(kind="identifier", id_type="qid", value="Q22686"),
+                layer="raw",
+                provenance=AtlasProvenance(source="wikidata", retrieval_method="wbsearchentities + entitydata", confidence=0.99, retrieved_at="2026-04-03T00:00:01Z"),
+            ),
+            AtlasClaim(
+                claim_id="clm_drv_canonical_type",
+                subject="atlas:0cqt90abcd123456",
+                predicate="atlas:hasCanonicalType",
+                object=AtlasClaimObject(kind="type", value="atlas:Person"),
+                layer="derived",
+                provenance=AtlasProvenance(source="openai-llm", retrieval_method="type-classification", confidence=1.0, retrieved_at="2026-04-03T00:00:02Z"),
+            ),
         ],
         raw_payload={"wikidata": {"status": "ok", "qid": "Q22686", "retrieved_at": "2026-04-03T00:00:01Z"}},
         needs_curation=False,

+ 12 - 4
tests/test_debug_export_file.py

@@ -1,17 +1,25 @@
 from pathlib import Path
 
-from app.models import AtlasAlias, AtlasEntity, AtlasIdentifier, AtlasProvenance
+from app.models import AtlasAlias, AtlasClaim, AtlasClaimObject, AtlasEntity, AtlasProvenance
 from app.triple_export import entity_to_turtle
 
 
 def test_debug_turtle_can_be_written_to_file(tmp_path: Path):
     entity = AtlasEntity(
-        atlas_id="atlas:mid:/m/012gx2",
+        atlas_id="atlas:012gx2abcd123456",
         canonical_label="Joe Biden",
         entity_type="Person",
         aliases=[AtlasAlias(label="Joe Biden")],
-        identifiers=[AtlasIdentifier(value="/m/012gx2", source="google", identifier_type="mid")],
-        provenance=[AtlasProvenance(source="google-trends", retrieval_method="trends-resolution", confidence=0.9, retrieved_at="2026-04-03T17:33:21.651528+00:00")],
+        claims=[
+            AtlasClaim(
+                claim_id="clm_raw_ident_mid_/m/012gx2",
+                subject="atlas:012gx2abcd123456",
+                predicate="atlas:hasIdentifier",
+                object=AtlasClaimObject(kind="identifier", id_type="mid", value="/m/012gx2"),
+                layer="raw",
+                provenance=AtlasProvenance(source="google-trends", retrieval_method="trends-resolution", confidence=0.9, retrieved_at="2026-04-03T17:33:21.651528+00:00"),
+            )
+        ],
         needs_curation=False,
     )
     turtle = entity_to_turtle(entity)

+ 15 - 7
tests/test_storage_service.py

@@ -1,6 +1,6 @@
 import pytest
 
-from app.models import AtlasAlias, AtlasEntity, AtlasIdentifier, AtlasProvenance
+from app.models import AtlasAlias, AtlasClaim, AtlasClaimObject, AtlasEntity, AtlasProvenance
 from app.storage_service import AtlasStorageService, entity_iri
 
 
@@ -14,13 +14,21 @@ async def test_write_entity_uses_batch_insert():
 
     svc = AtlasStorageService(call_tool=fake_call)
     entity = AtlasEntity(
-        atlas_id="atlas:mid:/m/0cqt90",
+        atlas_id="atlas:0cqt90abcd123456",
         canonical_label="Donald Trump",
         canonical_description="45th and 47th U.S. President",
         entity_type="Person",
         aliases=[AtlasAlias(label="Donald Trump")],
-        identifiers=[AtlasIdentifier(value="/m/0cqt90", source="google", identifier_type="mid")],
-        provenance=[AtlasProvenance(source="google", retrieval_method="trends-resolution", confidence=0.9)],
+        claims=[
+            AtlasClaim(
+                claim_id="clm_raw_ident_mid_/m/0cqt90",
+                subject="atlas:0cqt90abcd123456",
+                predicate="atlas:hasIdentifier",
+                object=AtlasClaimObject(kind="identifier", id_type="mid", value="/m/0cqt90"),
+                layer="raw",
+                provenance=AtlasProvenance(source="google", retrieval_method="trends-resolution", confidence=0.9),
+            )
+        ],
     )
 
     result = await svc.write_entity(entity)
@@ -39,11 +47,11 @@ async def test_read_entity_claims_uses_sparql_query():
         return {"results": {"bindings": []}}
 
     svc = AtlasStorageService(call_tool=fake_call)
-    result = await svc.read_entity_claims("atlas:mid:/m/0cqt90")
+    result = await svc.read_entity_claims("atlas:0cqt90abcd123456")
 
     assert result["status"] == "ok"
     assert calls[0][0] == "sparql_query"
-    assert entity_iri("atlas:mid:/m/0cqt90") in calls[0][1]["query"]
+    assert entity_iri("atlas:0cqt90abcd123456") in calls[0][1]["query"]
     assert 'FILTER(?status = "active")' in calls[0][1]["query"]
 
 
@@ -56,7 +64,7 @@ async def test_read_entity_claims_include_superseded_removes_filter():
         return {"results": {"bindings": []}}
 
     svc = AtlasStorageService(call_tool=fake_call)
-    result = await svc.read_entity_claims("atlas:mid:/m/0cqt90", include_superseded=True)
+    result = await svc.read_entity_claims("atlas:0cqt90abcd123456", include_superseded=True)
 
     assert result["status"] == "ok"
     assert calls[0][0] == "sparql_query"

+ 19 - 4
tests/test_triple_export.py

@@ -1,15 +1,30 @@
-from app.models import AtlasAlias, AtlasEntity, AtlasIdentifier, AtlasProvenance
+from app.models import AtlasAlias, AtlasClaim, AtlasClaimObject, AtlasEntity, AtlasProvenance
 from app.triple_export import entity_to_turtle
 
 
 def test_entity_to_turtle_contains_expected_triples():
     entity = AtlasEntity(
-        atlas_id="atlas:mid:/m/012gx2",
+        atlas_id="atlas:012gx2abcd123456",
         canonical_label="Joe Biden",
         entity_type="Person",
         aliases=[AtlasAlias(label="Joe Biden")],
-        identifiers=[AtlasIdentifier(value="/m/012gx2", source="google", identifier_type="mid")],
-        provenance=[AtlasProvenance(source="google-trends", retrieval_method="trends-resolution", confidence=0.9, retrieved_at="2026-04-03T17:33:21.651528+00:00")],
+        claims=[
+            AtlasClaim(
+                claim_id="clm_raw_ident_mid_/m/012gx2",
+                subject="atlas:012gx2abcd123456",
+                predicate="atlas:hasIdentifier",
+                object=AtlasClaimObject(kind="identifier", id_type="mid", value="/m/012gx2"),
+                layer="raw",
+                provenance=AtlasProvenance(source="google-trends", retrieval_method="trends-resolution", confidence=0.9, retrieved_at="2026-04-03T17:33:21.651528+00:00"),
+            ),
+            AtlasClaim(
+                claim_id="clm_drv_canonical_type",
+                subject="atlas:012gx2abcd123456",
+                predicate="atlas:hasCanonicalType",
+                object=AtlasClaimObject(kind="type", value="atlas:Person"),
+                layer="derived",
+            ),
+        ],
         needs_curation=False,
     )
     ttl = entity_to_turtle(entity)