test_atlas_contracts.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. import pytest
  2. import app.atlas as atlas_module
  3. from app.atlas import enrich_entity, resolve_entity
  4. from app.models import AtlasAlias, AtlasEntity, AtlasIdentifier, AtlasProvenance
  5. from app.type_classifier import TypeClassification
  6. @pytest.mark.anyio
  7. async def test_resolve_entity_returns_canonical_structure():
  8. entity = await resolve_entity("Trump")
  9. assert entity.atlas_id.startswith("atlas:")
  10. assert entity.canonical_label
  11. assert entity.aliases[0].label.lower() == "trump" or entity.aliases[0].label.lower() == "donald trump"
  12. assert entity.provenance
  13. assert entity.raw_payload["raw"] == "Trump"
  14. @pytest.mark.anyio
  15. async def test_enrich_entity_returns_dataset_shape():
  16. entity = await resolve_entity("Trump")
  17. result = enrich_entity(entity, constraints={"type": "person"}, depth=2)
  18. assert result.seed_entity.atlas_id == entity.atlas_id
  19. assert result.query_context == {"type": "person"}
  20. assert result.depth == 2
  21. assert result.related_entities == []
  22. def test_internal_models_support_identity_and_provenance():
  23. entity = AtlasEntity(
  24. atlas_id="atlas:donald-trump",
  25. canonical_label="Donald Trump",
  26. entity_type="person",
  27. aliases=[AtlasAlias(label="Trump")],
  28. identifiers=[AtlasIdentifier(value="Q22686", source="wikidata", identifier_type="wikidata-qid")],
  29. provenance=[AtlasProvenance(source="google-trends", retrieval_method="entity-resolution", confidence=0.93)],
  30. )
  31. assert entity.atlas_id == "atlas:donald-trump"
  32. assert entity.aliases[0].label == "Trump"
  33. assert entity.identifiers[0].value == "Q22686"
  34. assert entity.provenance[0].source == "google-trends"
  35. @pytest.mark.anyio
  36. async def test_resolve_entity_passes_context_to_classifier(monkeypatch):
  37. captured = {}
  38. async def fake_classifier(subject, resolution, context):
  39. captured["context"] = context
  40. return TypeClassification(canonical_type="Person", provenance=None, needs_curation=False)
  41. def fake_trends(subject):
  42. return {
  43. "canonical_label": subject,
  44. "normalized": subject,
  45. "mid": None,
  46. "type": "Person",
  47. "source": "resolver",
  48. "resolved_at": "2026-04-03T00:00:00Z",
  49. "candidates": [],
  50. "raw": subject,
  51. }
  52. writes = []
  53. async def fake_write(entity):
  54. writes.append(entity)
  55. return {"status": "ok"}
  56. monkeypatch.setattr("app.atlas.classify_entity_type", fake_classifier)
  57. monkeypatch.setattr("app.atlas.resolve_entity_via_trends", fake_trends)
  58. monkeypatch.setattr(atlas_module._storage, "write_entity", fake_write)
  59. entity = await resolve_entity("Sample", context="news paragraph")
  60. assert captured["context"] == "news paragraph"
  61. assert entity.entity_type == "Person"
  62. assert writes and writes[0].canonical_label == "Sample"
  63. @pytest.mark.anyio
  64. async def test_resolve_entity_persists_cached_hits(monkeypatch):
  65. cached_entity = AtlasEntity(atlas_id="atlas:x", canonical_label="Cached Entity")
  66. monkeypatch.setattr("app.atlas._entity_cache.get", lambda token: cached_entity)
  67. writes = []
  68. async def fake_write(entity):
  69. writes.append(entity)
  70. return {"status": "ok"}
  71. monkeypatch.setattr(atlas_module._storage, "write_entity", fake_write)
  72. entity = await resolve_entity("Cached Entity")
  73. assert entity is cached_entity
  74. assert writes and writes[0] is cached_entity
  75. @pytest.mark.anyio
  76. async def test_resolve_entity_marks_needs_curation(monkeypatch):
  77. async def fake_classifier(subject, resolution, context):
  78. return TypeClassification(canonical_type=None, provenance=None, needs_curation=True)
  79. def fake_trends(subject):
  80. return {
  81. "canonical_label": subject,
  82. "normalized": subject,
  83. "mid": None,
  84. "type": "Unknown",
  85. "source": "resolver",
  86. "resolved_at": "2026-04-03T00:00:00Z",
  87. "candidates": [],
  88. "raw": subject,
  89. }
  90. monkeypatch.setattr("app.atlas.classify_entity_type", fake_classifier)
  91. monkeypatch.setattr("app.atlas.resolve_entity_via_trends", fake_trends)
  92. entity = await resolve_entity("Mysterious")
  93. assert entity.needs_curation is True