1 개월 전 · ad50051d62
--- a/news_mcp/config.py
+++ b/news_mcp/config.py
@@ -13,6 +13,9 @@ DATA_DIR.mkdir(parents=True, exist_ok=True)
 
				 DB_PATH = Path(os.getenv("NEWS_MCP_DB_PATH", str(DATA_DIR / "news.sqlite")))
			
 
				 
			
 
				 RSS_FEED_URL = os.getenv("NEWS_RSS_FEED_URL", "https://breakingthenews.net/news-feed.xml")
			
 
				+# Optional multi-feed mode: comma-separated RSS URLs.
			
 
				+# If set (non-empty), this overrides RSS_FEED_URL.
			
 
				+RSS_FEED_URLS = os.getenv("NEWS_RSS_FEED_URLS", "").strip()
			
 
				 
			
 
				 # Clusters TTL (hours)
			
 
				 CLUSTERS_TTL_HOURS = float(os.getenv("NEWS_CLUSTERS_TTL_HOURS", "24"))
			
--- a/news_mcp/jobs/poller.py
+++ b/news_mcp/jobs/poller.py
@@ -2,7 +2,7 @@ from __future__ import annotations
 
				 
			
 
				 from typing import Any, Dict
			
 
				 
			
 
				-from news_mcp.config import CLUSTERS_TTL_HOURS, DB_PATH
			
 
				+from news_mcp.config import CLUSTERS_TTL_HOURS, DB_PATH, RSS_FEED_URL, RSS_FEED_URLS
			
 
				 from news_mcp.dedup.cluster import dedup_and_cluster_articles
			
 
				 from news_mcp.enrichment.enrich import enrich_cluster
			
 
				 from news_mcp.enrichment.groq_enrich import classify_cluster_groq
			
@@ -19,7 +19,10 @@ async def refresh_clusters(topic: str | None = None, limit: int = 80) -> None:
 
				 
			
 
				     # Skip expensive work if the feed content (titles/urls/timestamps) didn't change.
			
 
				     import hashlib
			
 
				-    feed_key = "breakingthenews"  # v1: single feed
			
 
				+    rss_urls = [u.strip() for u in RSS_FEED_URLS.split(",") if u.strip()]
			
 
				+    if not rss_urls:
			
 
				+        rss_urls = [RSS_FEED_URL]
			
 
				+    feed_key = "breakingthenews:" + hashlib.sha1(",".join(rss_urls).encode("utf-8")).hexdigest()
			
 
				     material = "\n".join(
			
 
				         f"{a.get('title','')}|{a.get('url','')}|{a.get('timestamp','')}"
			
 
				         for a in articles
			
--- a/news_mcp/sources/rss_breakingthenews.py
+++ b/news_mcp/sources/rss_breakingthenews.py
@@ -5,7 +5,7 @@ from typing import Any, Dict, List
 
				 
			
 
				 import feedparser
			
 
				 
			
 
				-from news_mcp.config import RSS_FEED_URL
			
 
				+from news_mcp.config import RSS_FEED_URL, RSS_FEED_URLS
			
 
				 
			
 
				 
			
 
				 def _canonical_url(url: str) -> str:
			
@@ -14,28 +14,39 @@ def _canonical_url(url: str) -> str:
 
				 
			
 
				 
			
 
				 def fetch_breakingthenews_articles(limit: int = 50) -> List[Dict[str, Any]]:
			
 
				-    feed = feedparser.parse(RSS_FEED_URL)
			
 
				+    rss_urls = [u.strip() for u in RSS_FEED_URLS.split(",") if u.strip()]
			
 
				+    if not rss_urls:
			
 
				+        rss_urls = [RSS_FEED_URL]
			
 
				+
			
 
				     articles: List[Dict[str, Any]] = []
			
 
				 
			
 
				-    for entry in feed.entries[:limit]:
			
 
				-        title = str(getattr(entry, "title", "")).strip()
			
 
				-        url = _canonical_url(str(getattr(entry, "link", "")).strip())
			
 
				-        source = "BreakingTheNews"
			
 
				-        timestamp = str(getattr(entry, "published", "")) or str(getattr(entry, "updated", ""))
			
 
				-        summary = str(getattr(entry, "summary", "")) or str(getattr(entry, "description", ""))
			
 
				-
			
 
				-        if not title or not url:
			
 
				-            continue
			
 
				-
			
 
				-        articles.append(
			
 
				-            {
			
 
				-                "title": title,
			
 
				-                "url": url,
			
 
				-                "source": source,
			
 
				-                "timestamp": timestamp,
			
 
				-                "summary": summary,
			
 
				-            }
			
 
				-        )
			
 
				+    # Evenly pull from feeds; keep total below `limit`.
			
 
				+    per_feed_limit = max(1, int(limit / max(1, len(rss_urls))))
			
 
				+
			
 
				+    for feed_url in rss_urls:
			
 
				+        feed = feedparser.parse(feed_url)
			
 
				+        for entry in feed.entries[:per_feed_limit]:
			
 
				+            title = str(getattr(entry, "title", "")).strip()
			
 
				+            url = _canonical_url(str(getattr(entry, "link", "")).strip())
			
 
				+            source = "RSS"
			
 
				+            timestamp = str(getattr(entry, "published", "")) or str(getattr(entry, "updated", ""))
			
 
				+            summary = str(getattr(entry, "summary", "")) or str(getattr(entry, "description", ""))
			
 
				+
			
 
				+            if not title or not url:
			
 
				+                continue
			
 
				+
			
 
				+            articles.append(
			
 
				+                {
			
 
				+                    "title": title,
			
 
				+                    "url": url,
			
 
				+                    "source": source,
			
 
				+                    "timestamp": timestamp,
			
 
				+                    "summary": summary,
			
 
				+                }
			
 
				+            )
			
 
				+
			
 
				+            if len(articles) >= limit:
			
 
				+                return articles
			
 
				 
			
 
				     return articles