trends_resolution.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. """Google Trends-backed entity resolution borrowed from news-mcp."""
  2. from __future__ import annotations
  3. import json
  4. from datetime import datetime, timezone
  5. from functools import lru_cache
  6. from typing import Any
  7. from urllib.parse import quote
  8. import httpx
  9. from .entity_normalize import normalize_entity
  10. class GoogleTrendsError(RuntimeError):
  11. pass
  12. class GoogleTrendsProvider:
  13. _SUGGESTIONS_URL = "https://trends.google.com/trends/api/autocomplete/"
  14. def __init__(self, *, hl: str = "en-US", tz: int = 120, timeout: float = 10.0):
  15. self.hl = hl
  16. self.tz = tz
  17. self.timeout = timeout
  18. self._headers = {
  19. "User-Agent": (
  20. "Mozilla/5.0 (X11; Linux x86_64) "
  21. "AppleWebKit/537.36 (KHTML, like Gecko) "
  22. "Chrome/135.0.0.0 Safari/537.36"
  23. ),
  24. "Accept": "application/json,text/javascript,*/*;q=0.1",
  25. }
  26. def suggestions(self, keyword: str) -> list[dict[str, Any]]:
  27. url = self._SUGGESTIONS_URL + quote(keyword)
  28. params = {"hl": self.hl, "tz": str(self.tz)}
  29. response = httpx.get(
  30. url,
  31. params=params,
  32. headers=self._headers,
  33. timeout=self.timeout,
  34. follow_redirects=True,
  35. )
  36. response.raise_for_status()
  37. text = response.text.strip()
  38. if text.startswith(")]}',"):
  39. text = text[5:]
  40. payload = json.loads(text)
  41. default = payload.get("default") if isinstance(payload, dict) else None
  42. topics = default.get("topics") if isinstance(default, dict) else None
  43. return topics if isinstance(topics, list) else []
  44. @lru_cache(maxsize=1)
  45. def _provider() -> GoogleTrendsProvider | None:
  46. try:
  47. return GoogleTrendsProvider()
  48. except Exception:
  49. return None
  50. def _resolved_at() -> str:
  51. return datetime.now(timezone.utc).isoformat()
  52. @lru_cache(maxsize=1024)
  53. def resolve_entity_via_trends(subject: str) -> dict[str, Any]:
  54. normalized = normalize_entity(subject)
  55. if not normalized:
  56. return {
  57. "raw": subject,
  58. "normalized": "",
  59. "canonical_label": "",
  60. "mid": None,
  61. "type": None,
  62. "candidates": [],
  63. "source": "empty",
  64. "resolved_at": _resolved_at(),
  65. }
  66. provider = _provider()
  67. if provider is not None:
  68. try:
  69. suggestions = provider.suggestions(normalized)
  70. best = suggestions[0] if suggestions else None
  71. return {
  72. "raw": subject,
  73. "normalized": normalized,
  74. "canonical_label": best.get("title") if best else normalized,
  75. "mid": best.get("mid") if best else None,
  76. "type": best.get("type") if best else None,
  77. "candidates": suggestions,
  78. "source": "google-trends",
  79. "resolved_at": _resolved_at(),
  80. }
  81. except Exception:
  82. pass
  83. return {
  84. "raw": subject,
  85. "normalized": normalized,
  86. "canonical_label": normalized,
  87. "mid": None,
  88. "type": None,
  89. "candidates": [],
  90. "source": "fallback",
  91. "resolved_at": _resolved_at(),
  92. }