rss_breakingthenews.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. from __future__ import annotations
  2. import hashlib
  3. from typing import Any, Dict, List
  4. import feedparser
  5. from news_mcp.config import RSS_FEED_URL
  6. def _canonical_url(url: str) -> str:
  7. # Minimal canonicalization for v1.
  8. return url.strip()
  9. def fetch_breakingthenews_articles(limit: int = 50) -> List[Dict[str, Any]]:
  10. feed = feedparser.parse(RSS_FEED_URL)
  11. articles: List[Dict[str, Any]] = []
  12. for entry in feed.entries[:limit]:
  13. title = str(getattr(entry, "title", "")).strip()
  14. url = _canonical_url(str(getattr(entry, "link", "")).strip())
  15. source = "BreakingTheNews"
  16. timestamp = str(getattr(entry, "published", "")) or str(getattr(entry, "updated", ""))
  17. summary = str(getattr(entry, "summary", "")) or str(getattr(entry, "description", ""))
  18. if not title or not url:
  19. continue
  20. articles.append(
  21. {
  22. "title": title,
  23. "url": url,
  24. "source": source,
  25. "timestamp": timestamp,
  26. "summary": summary,
  27. }
  28. )
  29. return articles
  30. def normalize_topic_from_title(title: str) -> str:
  31. t = title.lower()
  32. if any(k in t for k in ["btc", "bitcoin", "eth", "ethereum", "crypto"]):
  33. return "crypto"
  34. if any(k in t for k in ["rate", "rates", "inflation", "fed", "treasury", "euro"]):
  35. return "macro"
  36. if any(k in t for k in ["regulation", "sec", "ban", "law"]):
  37. return "regulation"
  38. if any(k in t for k in ["ai", "llm", "model", "openai", "anthropic"]):
  39. return "ai"
  40. return "other"
  41. def cluster_id_for_title(topic: str, title: str) -> str:
  42. key = f"{topic}|{title.strip().lower()}"
  43. return hashlib.sha1(key.encode("utf-8")).hexdigest()