rss_breakingthenews.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. from __future__ import annotations
  2. import hashlib
  3. from typing import Any, Dict, List
  4. import feedparser
  5. from news_mcp.config import RSS_FEED_URL, RSS_FEED_URLS
  6. def _canonical_url(url: str) -> str:
  7. # Minimal canonicalization for v1.
  8. return url.strip()
  9. def fetch_breakingthenews_articles(limit: int = 50) -> List[Dict[str, Any]]:
  10. rss_urls = [u.strip() for u in RSS_FEED_URLS.split(",") if u.strip()]
  11. if not rss_urls:
  12. rss_urls = [RSS_FEED_URL]
  13. articles: List[Dict[str, Any]] = []
  14. # Evenly pull from feeds; keep total below `limit`.
  15. per_feed_limit = max(1, int(limit / max(1, len(rss_urls))))
  16. for feed_url in rss_urls:
  17. feed = feedparser.parse(feed_url)
  18. for entry in feed.entries[:per_feed_limit]:
  19. title = str(getattr(entry, "title", "")).strip()
  20. url = _canonical_url(str(getattr(entry, "link", "")).strip())
  21. source = "RSS"
  22. timestamp = str(getattr(entry, "published", "")) or str(getattr(entry, "updated", ""))
  23. summary = str(getattr(entry, "summary", "")) or str(getattr(entry, "description", ""))
  24. if not title or not url:
  25. continue
  26. articles.append(
  27. {
  28. "title": title,
  29. "url": url,
  30. "source": source,
  31. "timestamp": timestamp,
  32. "summary": summary,
  33. }
  34. )
  35. if len(articles) >= limit:
  36. return articles
  37. return articles
  38. def normalize_topic_from_title(title: str) -> str:
  39. t = title.lower()
  40. if any(k in t for k in ["btc", "bitcoin", "eth", "ethereum", "crypto"]):
  41. return "crypto"
  42. if any(k in t for k in ["rate", "rates", "inflation", "fed", "treasury", "euro"]):
  43. return "macro"
  44. if any(k in t for k in ["regulation", "sec", "ban", "law"]):
  45. return "regulation"
  46. if any(k in t for k in ["ai", "llm", "model", "openai", "anthropic"]):
  47. return "ai"
  48. return "other"
  49. def cluster_id_for_title(topic: str, title: str) -> str:
  50. key = f"{topic}|{title.strip().lower()}"
  51. return hashlib.sha1(key.encode("utf-8")).hexdigest()