entity_normalize.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. """Entity normalization helpers reused from news-mcp."""
  2. from __future__ import annotations
  3. import json
  4. from functools import lru_cache
  5. from pathlib import Path
  6. from typing import Iterable
  7. from .config import ENTITY_ALIASES_FILE
  8. def _alias_map() -> dict[str, str]:
  9. path = Path(ENTITY_ALIASES_FILE)
  10. if not path.exists():
  11. return {}
  12. try:
  13. raw = json.loads(path.read_text(encoding="utf-8"))
  14. except Exception:
  15. return {}
  16. out: dict[str, str] = {}
  17. if isinstance(raw, dict):
  18. for k, v in raw.items():
  19. if k and v:
  20. out[str(k).strip().lower()] = str(v).strip()
  21. return out
  22. def _lookup_alias(key: str) -> str | None:
  23. return _alias_map().get(key)
  24. def normalize_entity(value: str) -> str:
  25. key = str(value).strip().lower()
  26. if not key:
  27. return ""
  28. return _lookup_alias(key) or str(value).strip()
  29. def normalize_entities(values: Iterable[str]) -> list[str]:
  30. out: list[str] = []
  31. seen: set[str] = set()
  32. for value in values or []:
  33. norm = normalize_entity(value)
  34. key = norm.lower()
  35. if not norm or key in seen:
  36. continue
  37. seen.add(key)
  38. out.append(norm)
  39. return out