"""Claim extraction helpers for Atlas layered outputs.""" from __future__ import annotations from typing import Any from app.models import AtlasEntity, AtlasProvenance def _prov_to_dict(p: AtlasProvenance | None) -> dict[str, Any] | None: if p is None: return None return { "source": p.source, "method": p.retrieval_method, "confidence": p.confidence, "retrieved_at": p.retrieved_at, } def _pick_provenance(entity: AtlasEntity, *, source_hint: str | None = None, method_hint: str | None = None) -> AtlasProvenance | None: if not entity.provenance: return None if method_hint: for p in entity.provenance: if p.retrieval_method == method_hint: return p if source_hint: for p in entity.provenance: if p.source == source_hint: return p return entity.provenance[0] def _id_type_resource(identifier_type: str) -> str: if identifier_type == "mid": return "atlas:Mid" if identifier_type == "qid": return "atlas:WikidataQID" return f"atlas:{identifier_type}" def build_claim_sets(entity: AtlasEntity) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: raw_claims: list[dict[str, Any]] = [] derived_claims: list[dict[str, Any]] = [] for ident in entity.identifiers: prov = _pick_provenance(entity, source_hint=ident.source) raw_claims.append( { "claim_id": f"clm_raw_ident_{ident.identifier_type}_{ident.value}", "layer": "raw", "subject": entity.atlas_id, "predicate": "atlas:hasIdentifier", "object": { "kind": "identifier", "id_type": _id_type_resource(ident.identifier_type), "value": ident.value, }, "provenance": _prov_to_dict(prov), } ) for alias in entity.aliases: raw_claims.append( { "claim_id": f"clm_raw_alias_{alias.label}", "layer": "raw", "subject": entity.atlas_id, "predicate": "atlas:hasAlias", "object": {"kind": "alias", "value": alias.label}, "provenance": _prov_to_dict(_pick_provenance(entity, method_hint="trends-resolution")), } ) wd = entity.raw_payload.get("wikidata") or {} if wd.get("status") == "ok": derived_claims.append( { "claim_id": "clm_drv_wikidata_type", "layer": "derived", "subject": entity.atlas_id, "predicate": "atlas:hasExternalType", "object": {"kind": "external_type", "value": "atlas:WikidataType_Q5", "qid": wd.get("qid")}, "provenance": { "source": "wikidata", "method": "wbsearchentities + entitydata", "confidence": 0.99, "retrieved_at": wd.get("retrieved_at"), }, } ) else: raw_claims.append( { "claim_id": "clm_raw_wikidata_missing", "layer": "raw", "subject": entity.atlas_id, "predicate": "atlas:wikidataLookupStatus", "object": {"kind": "literal", "value": wd.get("status", "missing")}, "provenance": _prov_to_dict(_pick_provenance(entity, method_hint="trends-resolution")), } ) type_prov = _pick_provenance(entity, method_hint="type-classification") derived_claims.append( { "claim_id": "clm_drv_canonical_type", "layer": "derived", "subject": entity.atlas_id, "predicate": "atlas:hasCanonicalType", "object": {"kind": "type", "value": f"atlas:{entity.entity_type}"}, "provenance": _prov_to_dict(type_prov), } ) return raw_claims, derived_claims