models.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. """Atlas internal data models."""
  2. from dataclasses import dataclass, field
  3. from typing import Any, Dict, List, Optional
  4. @dataclass
  5. class AtlasIdentifier:
  6. value: str
  7. source: str
  8. identifier_type: str
  9. @dataclass
  10. class AtlasAlias:
  11. label: str
  12. language: str = "und"
  13. source: str = "query"
  14. @dataclass
  15. class AtlasProvenance:
  16. source: str
  17. retrieval_method: str
  18. confidence: float = 0.0
  19. retrieved_at: Optional[str] = None
  20. @dataclass
  21. class AtlasClaimObject:
  22. kind: str
  23. value: str
  24. id_type: str | None = None
  25. @dataclass
  26. class AtlasClaim:
  27. claim_id: str
  28. subject: str
  29. predicate: str
  30. object: AtlasClaimObject
  31. layer: str
  32. status: str = "active"
  33. provenance: AtlasProvenance | None = None
  34. @dataclass
  35. class AtlasEntity:
  36. atlas_id: str
  37. canonical_label: str
  38. canonical_description: str | None = None
  39. entity_type: str = "unknown"
  40. aliases: List[AtlasAlias] = field(default_factory=list)
  41. claims: List[AtlasClaim] = field(default_factory=list)
  42. raw_payload: Dict[str, Any] = field(default_factory=dict)
  43. needs_curation: bool = False
  44. def active_identifier(self, identifier_type: str) -> str | None:
  45. for claim in self.claims:
  46. if (
  47. claim.status == "active"
  48. and claim.predicate == "atlas:hasIdentifier"
  49. and claim.object.kind == "identifier"
  50. and claim.object.id_type == identifier_type
  51. ):
  52. return claim.object.value
  53. return None
  54. @dataclass
  55. class AtlasEnrichmentDataset:
  56. seed_entity: AtlasEntity
  57. related_entities: List[AtlasEntity] = field(default_factory=list)
  58. query_context: Dict[str, Any] = field(default_factory=dict)
  59. depth: int = 1