| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- from __future__ import annotations
- import json
- from functools import lru_cache
- from pathlib import Path
- from typing import Iterable
- from news_mcp.config import ENTITY_ALIASES_FILE
- # Small, explicit canonical alias map.
- # Keep this conservative and grow it only when a shorthand is clearly useful.
- @lru_cache(maxsize=1)
- def _alias_map() -> dict[str, str]:
- path = Path(ENTITY_ALIASES_FILE)
- if not path.exists():
- return {}
- try:
- raw = json.loads(path.read_text(encoding="utf-8"))
- except Exception:
- return {}
- out: dict[str, str] = {}
- if isinstance(raw, dict):
- for k, v in raw.items():
- if k and v:
- out[str(k).strip().lower()] = str(v).strip()
- return out
- def _lookup_alias(key: str) -> str | None:
- return _alias_map().get(key)
- def normalize_entity(value: str) -> str:
- key = str(value).strip().lower()
- if not key:
- return ""
- return _lookup_alias(key) or str(value).strip()
- def normalize_query(value: str) -> str:
- return normalize_entity(value)
- def normalize_entities(values: Iterable[str]) -> list[str]:
- out: list[str] = []
- seen: set[str] = set()
- for value in values or []:
- norm = normalize_entity(value)
- key = norm.lower()
- if not norm or key in seen:
- continue
- seen.add(key)
- out.append(norm)
- return out
|