from __future__ import annotations import json from functools import lru_cache from pathlib import Path from typing import Iterable from news_mcp.config import ENTITY_ALIASES_FILE # Small, explicit canonical alias map. # Keep this conservative and grow it only when a shorthand is clearly useful. @lru_cache(maxsize=1) def _alias_map() -> dict[str, str]: path = Path(ENTITY_ALIASES_FILE) if not path.exists(): return {} try: raw = json.loads(path.read_text(encoding="utf-8")) except Exception: return {} out: dict[str, str] = {} if isinstance(raw, dict): for k, v in raw.items(): if k and v: out[str(k).strip().lower()] = str(v).strip() return out def _lookup_alias(key: str) -> str | None: return _alias_map().get(key) def normalize_entity(value: str) -> str: key = str(value).strip().lower() if not key: return "" return _lookup_alias(key) or str(value).strip() def normalize_query(value: str) -> str: return normalize_entity(value) def normalize_entities(values: Iterable[str]) -> list[str]: out: list[str] = [] seen: set[str] = set() for value in values or []: norm = normalize_entity(value) key = norm.lower() if not norm or key in seen: continue seen.add(key) out.append(norm) return out