"""Entity normalization helpers reused from news-mcp.""" from __future__ import annotations import json from functools import lru_cache from pathlib import Path from typing import Iterable from .config import ENTITY_ALIASES_FILE def _alias_map() -> dict[str, str]: path = Path(ENTITY_ALIASES_FILE) if not path.exists(): return {} try: raw = json.loads(path.read_text(encoding="utf-8")) except Exception: return {} out: dict[str, str] = {} if isinstance(raw, dict): for k, v in raw.items(): if k and v: out[str(k).strip().lower()] = str(v).strip() return out def _lookup_alias(key: str) -> str | None: return _alias_map().get(key) def normalize_entity(value: str) -> str: key = str(value).strip().lower() if not key: return "" return _lookup_alias(key) or str(value).strip() def normalize_entities(values: Iterable[str]) -> list[str]: out: list[str] = [] seen: set[str] = set() for value in values or []: norm = normalize_entity(value) key = norm.lower() if not norm or key in seen: continue seen.add(key) out.append(norm) return out