from __future__ import annotations import hashlib from typing import Any, Dict, List import feedparser from news_mcp.config import RSS_FEED_URL, RSS_FEED_URLS def _canonical_url(url: str) -> str: # Minimal canonicalization for v1. return url.strip() def fetch_breakingthenews_articles(limit: int = 50) -> List[Dict[str, Any]]: rss_urls = [u.strip() for u in RSS_FEED_URLS.split(",") if u.strip()] if not rss_urls: rss_urls = [RSS_FEED_URL] articles: List[Dict[str, Any]] = [] # Evenly pull from feeds; keep total below `limit`. per_feed_limit = max(1, int(limit / max(1, len(rss_urls)))) for feed_url in rss_urls: feed = feedparser.parse(feed_url) for entry in feed.entries[:per_feed_limit]: title = str(getattr(entry, "title", "")).strip() url = _canonical_url(str(getattr(entry, "link", "")).strip()) source = "RSS" timestamp = str(getattr(entry, "published", "")) or str(getattr(entry, "updated", "")) summary = str(getattr(entry, "summary", "")) or str(getattr(entry, "description", "")) if not title or not url: continue articles.append( { "title": title, "url": url, "source": source, "timestamp": timestamp, "summary": summary, } ) if len(articles) >= limit: return articles return articles def normalize_topic_from_title(title: str) -> str: t = title.lower() if any(k in t for k in ["btc", "bitcoin", "eth", "ethereum", "crypto"]): return "crypto" if any(k in t for k in ["rate", "rates", "inflation", "fed", "treasury", "euro"]): return "macro" if any(k in t for k in ["regulation", "sec", "ban", "law"]): return "regulation" if any(k in t for k in ["ai", "llm", "model", "openai", "anthropic"]): return "ai" return "other" def cluster_id_for_title(topic: str, title: str) -> str: key = f"{topic}|{title.strip().lower()}" return hashlib.sha1(key.encode("utf-8")).hexdigest()