lucky
/
news-mcp


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
							from __future__ import annotations

import hashlib
from typing import Any, Dict, List

import feedparser

from news_mcp.config import RSS_FEED_URL


def _canonical_url(url: str) -> str:
    # Minimal canonicalization for v1.
    return url.strip()


def fetch_breakingthenews_articles(limit: int = 50) -> List[Dict[str, Any]]:
    feed = feedparser.parse(RSS_FEED_URL)
    articles: List[Dict[str, Any]] = []

    for entry in feed.entries[:limit]:
        title = str(getattr(entry, "title", "")).strip()
        url = _canonical_url(str(getattr(entry, "link", "")).strip())
        source = "BreakingTheNews"
        timestamp = str(getattr(entry, "published", "")) or str(getattr(entry, "updated", ""))
        summary = str(getattr(entry, "summary", "")) or str(getattr(entry, "description", ""))

        if not title or not url:
            continue

        articles.append(
            {
                "title": title,
                "url": url,
                "source": source,
                "timestamp": timestamp,
                "summary": summary,
            }
        )

    return articles


def normalize_topic_from_title(title: str) -> str:
    t = title.lower()
    if any(k in t for k in ["btc", "bitcoin", "eth", "ethereum", "crypto"]):
        return "crypto"
    if any(k in t for k in ["rate", "rates", "inflation", "fed", "treasury", "euro"]):
        return "macro"
    if any(k in t for k in ["regulation", "sec", "ban", "law"]):
        return "regulation"
    if any(k in t for k in ["ai", "llm", "model", "openai", "anthropic"]):
        return "ai"
    return "other"


def cluster_id_for_title(topic: str, title: str) -> str:
    key = f"{topic}|{title.strip().lower()}"
    return hashlib.sha1(key.encode("utf-8")).hexdigest()