| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117 |
- """Claim extraction helpers for Atlas layered outputs."""
- from __future__ import annotations
- from typing import Any
- from app.models import AtlasEntity, AtlasProvenance
- def _prov_to_dict(p: AtlasProvenance | None) -> dict[str, Any] | None:
- if p is None:
- return None
- return {
- "source": p.source,
- "method": p.retrieval_method,
- "confidence": p.confidence,
- "retrieved_at": p.retrieved_at,
- }
- def _pick_provenance(entity: AtlasEntity, *, source_hint: str | None = None, method_hint: str | None = None) -> AtlasProvenance | None:
- if not entity.provenance:
- return None
- if method_hint:
- for p in entity.provenance:
- if p.retrieval_method == method_hint:
- return p
- if source_hint:
- for p in entity.provenance:
- if p.source == source_hint:
- return p
- return entity.provenance[0]
- def _id_type_resource(identifier_type: str) -> str:
- if identifier_type == "mid":
- return "atlas:Mid"
- if identifier_type == "qid":
- return "atlas:WikidataQID"
- return f"atlas:{identifier_type}"
- def build_claim_sets(entity: AtlasEntity) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
- raw_claims: list[dict[str, Any]] = []
- derived_claims: list[dict[str, Any]] = []
- for ident in entity.identifiers:
- prov = _pick_provenance(entity, source_hint=ident.source)
- raw_claims.append(
- {
- "claim_id": f"clm_raw_ident_{ident.identifier_type}_{ident.value}",
- "layer": "raw",
- "subject": entity.atlas_id,
- "predicate": "atlas:hasIdentifier",
- "object": {
- "kind": "identifier",
- "id_type": _id_type_resource(ident.identifier_type),
- "value": ident.value,
- },
- "provenance": _prov_to_dict(prov),
- }
- )
- for alias in entity.aliases:
- raw_claims.append(
- {
- "claim_id": f"clm_raw_alias_{alias.label}",
- "layer": "raw",
- "subject": entity.atlas_id,
- "predicate": "atlas:hasAlias",
- "object": {"kind": "alias", "value": alias.label},
- "provenance": _prov_to_dict(_pick_provenance(entity, method_hint="trends-resolution")),
- }
- )
- wd = entity.raw_payload.get("wikidata") or {}
- if wd.get("status") == "ok":
- derived_claims.append(
- {
- "claim_id": "clm_drv_wikidata_type",
- "layer": "derived",
- "subject": entity.atlas_id,
- "predicate": "atlas:hasExternalType",
- "object": {"kind": "external_type", "value": "atlas:WikidataType_Q5", "qid": wd.get("qid")},
- "provenance": {
- "source": "wikidata",
- "method": "wbsearchentities + entitydata",
- "confidence": 0.99,
- "retrieved_at": wd.get("retrieved_at"),
- },
- }
- )
- else:
- raw_claims.append(
- {
- "claim_id": "clm_raw_wikidata_missing",
- "layer": "raw",
- "subject": entity.atlas_id,
- "predicate": "atlas:wikidataLookupStatus",
- "object": {"kind": "literal", "value": wd.get("status", "missing")},
- "provenance": _prov_to_dict(_pick_provenance(entity, method_hint="trends-resolution")),
- }
- )
- type_prov = _pick_provenance(entity, method_hint="type-classification")
- derived_claims.append(
- {
- "claim_id": "clm_drv_canonical_type",
- "layer": "derived",
- "subject": entity.atlas_id,
- "predicate": "atlas:hasCanonicalType",
- "object": {"kind": "type", "value": f"atlas:{entity.entity_type}"},
- "provenance": _prov_to_dict(type_prov),
- }
- )
- return raw_claims, derived_claims
|