| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110 |
- """Google Trends-backed entity resolution borrowed from news-mcp."""
- from __future__ import annotations
- import json
- from datetime import datetime, timezone
- from functools import lru_cache
- from typing import Any
- from urllib.parse import quote
- import httpx
- from .entity_normalize import normalize_entity
- class GoogleTrendsError(RuntimeError):
- pass
- class GoogleTrendsProvider:
- _SUGGESTIONS_URL = "https://trends.google.com/trends/api/autocomplete/"
- def __init__(self, *, hl: str = "en-US", tz: int = 120, timeout: float = 10.0):
- self.hl = hl
- self.tz = tz
- self.timeout = timeout
- self._headers = {
- "User-Agent": (
- "Mozilla/5.0 (X11; Linux x86_64) "
- "AppleWebKit/537.36 (KHTML, like Gecko) "
- "Chrome/135.0.0.0 Safari/537.36"
- ),
- "Accept": "application/json,text/javascript,*/*;q=0.1",
- }
- def suggestions(self, keyword: str) -> list[dict[str, Any]]:
- url = self._SUGGESTIONS_URL + quote(keyword)
- params = {"hl": self.hl, "tz": str(self.tz)}
- response = httpx.get(
- url,
- params=params,
- headers=self._headers,
- timeout=self.timeout,
- follow_redirects=True,
- )
- response.raise_for_status()
- text = response.text.strip()
- if text.startswith(")]}',"):
- text = text[5:]
- payload = json.loads(text)
- default = payload.get("default") if isinstance(payload, dict) else None
- topics = default.get("topics") if isinstance(default, dict) else None
- return topics if isinstance(topics, list) else []
- @lru_cache(maxsize=1)
- def _provider() -> GoogleTrendsProvider | None:
- try:
- return GoogleTrendsProvider()
- except Exception:
- return None
- def _retrieved_at() -> str:
- return datetime.now(timezone.utc).isoformat()
- @lru_cache(maxsize=1024)
- def resolve_entity_via_trends(subject: str) -> dict[str, Any]:
- normalized = normalize_entity(subject)
- if not normalized:
- return {
- "raw": subject,
- "normalized": "",
- "canonical_label": "",
- "mid": None,
- "type": None,
- "candidates": [],
- "source": "empty",
- "retrieved_at": _retrieved_at(),
- }
- provider = _provider()
- if provider is not None:
- try:
- suggestions = provider.suggestions(normalized)
- best = suggestions[0] if suggestions else None
- return {
- "raw": subject,
- "normalized": normalized,
- "canonical_label": best.get("title") if best else normalized,
- "mid": best.get("mid") if best else None,
- "type": best.get("type") if best else None,
- "candidates": suggestions,
- "source": "google-trends",
- "retrieved_at": _retrieved_at(),
- }
- except Exception:
- pass
- return {
- "raw": subject,
- "normalized": normalized,
- "canonical_label": normalized,
- "mid": None,
- "type": None,
- "candidates": [],
- "source": "fallback",
- "retrieved_at": _retrieved_at(),
- }
|