"""Google Trends-backed entity resolution borrowed from news-mcp.""" from __future__ import annotations import json from datetime import datetime, timezone from functools import lru_cache from typing import Any from urllib.parse import quote import httpx from .entity_normalize import normalize_entity class GoogleTrendsError(RuntimeError): pass class GoogleTrendsProvider: _SUGGESTIONS_URL = "https://trends.google.com/trends/api/autocomplete/" def __init__(self, *, hl: str = "en-US", tz: int = 120, timeout: float = 10.0): self.hl = hl self.tz = tz self.timeout = timeout self._headers = { "User-Agent": ( "Mozilla/5.0 (X11; Linux x86_64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/135.0.0.0 Safari/537.36" ), "Accept": "application/json,text/javascript,*/*;q=0.1", } def suggestions(self, keyword: str) -> list[dict[str, Any]]: url = self._SUGGESTIONS_URL + quote(keyword) params = {"hl": self.hl, "tz": str(self.tz)} response = httpx.get( url, params=params, headers=self._headers, timeout=self.timeout, follow_redirects=True, ) response.raise_for_status() text = response.text.strip() if text.startswith(")]}',"): text = text[5:] payload = json.loads(text) default = payload.get("default") if isinstance(payload, dict) else None topics = default.get("topics") if isinstance(default, dict) else None return topics if isinstance(topics, list) else [] @lru_cache(maxsize=1) def _provider() -> GoogleTrendsProvider | None: try: return GoogleTrendsProvider() except Exception: return None def _retrieved_at() -> str: return datetime.now(timezone.utc).isoformat() @lru_cache(maxsize=1024) def resolve_entity_via_trends(subject: str) -> dict[str, Any]: normalized = normalize_entity(subject) if not normalized: return { "raw": subject, "normalized": "", "canonical_label": "", "mid": None, "type": None, "candidates": [], "source": "empty", "retrieved_at": _retrieved_at(), } provider = _provider() if provider is not None: try: suggestions = provider.suggestions(normalized) best = suggestions[0] if suggestions else None return { "raw": subject, "normalized": normalized, "canonical_label": best.get("title") if best else normalized, "mid": best.get("mid") if best else None, "type": best.get("type") if best else None, "candidates": suggestions, "source": "google-trends", "retrieved_at": _retrieved_at(), } except Exception: pass return { "raw": subject, "normalized": normalized, "canonical_label": normalized, "mid": None, "type": None, "candidates": [], "source": "fallback", "retrieved_at": _retrieved_at(), }