trends_resolution.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. from __future__ import annotations
  2. import json
  3. from datetime import datetime, timezone
  4. from functools import lru_cache
  5. from typing import Any
  6. from urllib.parse import quote
  7. import httpx
  8. from news_mcp.entity_normalize import normalize_entity
  9. class GoogleTrendsError(RuntimeError):
  10. pass
  11. class GoogleTrendsProvider:
  12. """Minimal in-process Google Trends adapter used by news-mcp.
  13. We only need entity suggestions for the resolver path, so keep this module
  14. intentionally narrow rather than importing the full trends-mcp server.
  15. """
  16. _SUGGESTIONS_URL = "https://trends.google.com/trends/api/autocomplete/"
  17. def __init__(self, *, hl: str = "en-US", tz: int = 120, timeout: float = 10.0):
  18. self.hl = hl
  19. self.tz = tz
  20. self.timeout = timeout
  21. self._headers = {
  22. "User-Agent": (
  23. "Mozilla/5.0 (X11; Linux x86_64) "
  24. "AppleWebKit/537.36 (KHTML, like Gecko) "
  25. "Chrome/135.0.0.0 Safari/537.36"
  26. ),
  27. "Accept": "application/json,text/javascript,*/*;q=0.1",
  28. }
  29. def suggestions(self, keyword: str) -> list[dict[str, Any]]:
  30. url = self._SUGGESTIONS_URL + quote(keyword)
  31. params = {"hl": self.hl, "tz": str(self.tz)}
  32. try:
  33. response = httpx.get(url, params=params, headers=self._headers, timeout=self.timeout, follow_redirects=True)
  34. response.raise_for_status()
  35. text = response.text.strip()
  36. if text.startswith(")]}',"):
  37. text = text[5:]
  38. payload = json.loads(text)
  39. default = payload.get("default") if isinstance(payload, dict) else None
  40. topics = default.get("topics") if isinstance(default, dict) else None
  41. return topics if isinstance(topics, list) else []
  42. except Exception as exc: # pragma: no cover - network/provider dependent
  43. raise GoogleTrendsError(f"suggestions failed for {keyword!r}: {exc}") from exc
  44. @lru_cache(maxsize=1)
  45. def _provider() -> GoogleTrendsProvider | None:
  46. try:
  47. return GoogleTrendsProvider()
  48. except Exception:
  49. return None
  50. def _resolved_at() -> str:
  51. return datetime.now(timezone.utc).isoformat()
  52. @lru_cache(maxsize=1024)
  53. def resolve_entity_via_trends(entity: str) -> dict[str, Any]:
  54. """Resolve an entity locally via Google Trends suggestions.
  55. The returned shape intentionally mirrors the former trends-mcp bridge so the
  56. rest of news-mcp can stay unchanged during the migration.
  57. """
  58. normalized = normalize_entity(entity)
  59. if not normalized:
  60. return {
  61. "raw": entity,
  62. "normalized": "",
  63. "canonical_label": "",
  64. "mid": None,
  65. "type": None,
  66. "candidates": [],
  67. "source": "empty",
  68. "resolved_at": _resolved_at(),
  69. }
  70. provider = _provider()
  71. if provider is not None:
  72. try:
  73. suggestions = provider.suggestions(normalized)
  74. best = suggestions[0] if suggestions else None
  75. return {
  76. "raw": entity,
  77. "normalized": normalized,
  78. "canonical_label": best.get("title") if best else normalized,
  79. "mid": best.get("mid") if best else None,
  80. "type": best.get("type") if best else None,
  81. "candidates": suggestions,
  82. "source": "google-trends",
  83. "resolved_at": _resolved_at(),
  84. }
  85. except Exception:
  86. pass
  87. # Conservative fallback: keep the local normalized form and leave MID unset.
  88. return {
  89. "raw": entity,
  90. "normalized": normalized,
  91. "canonical_label": normalized,
  92. "mid": None,
  93. "type": None,
  94. "candidates": [],
  95. "source": "fallback",
  96. "resolved_at": _resolved_at(),
  97. }