from __future__ import annotations import hashlib from typing import Any, Dict, List import feedparser from news_mcp.config import RSS_FEED_URL def _canonical_url(url: str) -> str: # Minimal canonicalization for v1. return url.strip() def fetch_breakingthenews_articles(limit: int = 50) -> List[Dict[str, Any]]: feed = feedparser.parse(RSS_FEED_URL) articles: List[Dict[str, Any]] = [] for entry in feed.entries[:limit]: title = str(getattr(entry, "title", "")).strip() url = _canonical_url(str(getattr(entry, "link", "")).strip()) source = "BreakingTheNews" timestamp = str(getattr(entry, "published", "")) or str(getattr(entry, "updated", "")) summary = str(getattr(entry, "summary", "")) or str(getattr(entry, "description", "")) if not title or not url: continue articles.append( { "title": title, "url": url, "source": source, "timestamp": timestamp, "summary": summary, } ) return articles def normalize_topic_from_title(title: str) -> str: t = title.lower() if any(k in t for k in ["btc", "bitcoin", "eth", "ethereum", "crypto"]): return "crypto" if any(k in t for k in ["rate", "rates", "inflation", "fed", "treasury", "euro"]): return "macro" if any(k in t for k in ["regulation", "sec", "ban", "law"]): return "regulation" if any(k in t for k in ["ai", "llm", "model", "openai", "anthropic"]): return "ai" return "other" def cluster_id_for_title(topic: str, title: str) -> str: key = f"{topic}|{title.strip().lower()}" return hashlib.sha1(key.encode("utf-8")).hexdigest()