| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- """Entity normalization helpers reused from news-mcp."""
- from __future__ import annotations
- import json
- from functools import lru_cache
- from pathlib import Path
- from typing import Iterable
- from .config import ENTITY_ALIASES_FILE
- def _alias_map() -> dict[str, str]:
- path = Path(ENTITY_ALIASES_FILE)
- if not path.exists():
- return {}
- try:
- raw = json.loads(path.read_text(encoding="utf-8"))
- except Exception:
- return {}
- out: dict[str, str] = {}
- if isinstance(raw, dict):
- for k, v in raw.items():
- if k and v:
- out[str(k).strip().lower()] = str(v).strip()
- return out
- def _lookup_alias(key: str) -> str | None:
- return _alias_map().get(key)
- def normalize_entity(value: str) -> str:
- key = str(value).strip().lower()
- if not key:
- return ""
- return _lookup_alias(key) or str(value).strip()
- def normalize_entities(values: Iterable[str]) -> list[str]:
- out: list[str] = []
- seen: set[str] = set()
- for value in values or []:
- norm = normalize_entity(value)
- key = norm.lower()
- if not norm or key in seen:
- continue
- seen.add(key)
- out.append(norm)
- return out
|