hace 1 semana · f4dc0998eb
--- a/news_mcp/config.py
+++ b/news_mcp/config.py
@@ -54,3 +54,36 @@ NEWS_BACKGROUND_REFRESH_ON_START = os.getenv("NEWS_BACKGROUND_REFRESH_ON_START",
 
				 NEWS_PRUNING_ENABLED = os.getenv("NEWS_PRUNING_ENABLED", "true").lower() == "true"
			
 
				 NEWS_RETENTION_DAYS = float(os.getenv("NEWS_RETENTION_DAYS", "180"))
			
 
				 NEWS_PRUNE_INTERVAL_HOURS = float(os.getenv("NEWS_PRUNE_INTERVAL_HOURS", "24"))
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# Concurrency controls
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# Maximum concurrent outbound LLM API calls per provider.
			
 
				+# Defaults are conservative for free tiers; override via env if you have
			
 
				+# higher rate limits or are on a paid plan.
			
 
				+_NEEDLE_DEFAULT_CONCURRENCY = {
			
 
				+    "openrouter": 2,
			
 
				+    "openai": 5,
			
 
				+    "groq": 8,
			
 
				+}
			
 
				+
			
 
				+_NEEDLE_RSS_MAX_CONCURRENCY = int(os.getenv("NEWS_RSS_MAX_CONCURRENCY", "10"))
			
 
				+_NEEDLE_OLLAMA_MAX_CONCURRENCY = int(os.getenv("NEWS_OLLAMA_MAX_CONCURRENCY", "4"))
			
 
				+
			
 
				+
			
 
				+def llm_concurrency(provider: str) -> int:
			
 
				+    """Return the max concurrent LLM calls for *provider*.
			
 
				+
			
 
				+    Reads from ``NEWS_LLM_CONCURRENCY_<PROVIDER>`` env var first (e.g.
			
 
				+    ``NEWS_LLM_CONCURRENCY_OPENROUTER``), then falls back to the built-in
			
 
				+    default map.
			
 
				+    """
			
 
				+    provider = provider.strip().lower()
			
 
				+    env_key = f"NEWS_LLM_CONCURRENCY_{provider.upper()}"
			
 
				+    env_val = os.getenv(env_key)
			
 
				+    if env_val is not None:
			
 
				+        try:
			
 
				+            return max(1, int(env_val))
			
 
				+        except ValueError:
			
 
				+            pass
			
 
				+    return _NEEDLE_DEFAULT_CONCURRENCY.get(provider, 3)
			
--- a/news_mcp/dedup/cluster.py
+++ b/news_mcp/dedup/cluster.py
@@ -1,5 +1,6 @@
 
				 from __future__ import annotations
			
 
				 
			
 
				+import asyncio
			
 
				 import hashlib
			
 
				 import re
			
 
				 from difflib import SequenceMatcher
			
@@ -18,7 +19,6 @@ from news_mcp.sources.news_feeds import normalize_topic_from_title
 
				 
			
 
				 def _normalize_title(title: str) -> str:
			
 
				     t = title.lower().strip()
			
 
				-    # Remove punctuation-ish characters for similarity scoring.
			
 
				     t = re.sub(r"[^a-z0-9\s]", " ", t)
			
 
				     t = re.sub(r"\s+", " ", t).strip()
			
 
				     return t
			
@@ -48,12 +48,9 @@ def _cluster_text(a: Dict[str, Any]) -> str:
 
				 
			
 
				 
			
 
				 # ---------------------------------------------------------------------------
			
 
				-# Token / Jaccard signal (used as a fallback alongside title similarity when
			
 
				-# embeddings are unavailable, and as a soft signal even when they are).
			
 
				+# Token / Jaccard signal
			
 
				 # ---------------------------------------------------------------------------
			
 
				 
			
 
				-# Tiny stop-word set — we keep it small on purpose because the corpus is news
			
 
				-# headlines, where every additional removal risks losing genuine signal.
			
 
				 _STOPWORDS = frozenset(
			
 
				     {
			
 
				         "a", "an", "the", "of", "to", "in", "on", "at", "for", "by", "with",
			
@@ -68,7 +65,6 @@ _STOPWORDS = frozenset(
 
				 
			
 
				 
			
 
				 def _tokens(text: str) -> set[str]:
			
 
				-    """Lowercase content tokens, stop-words removed, length>=3."""
			
 
				     tokens = re.findall(r"[a-z0-9][a-z0-9\-]+", text.lower())
			
 
				     return {t for t in tokens if len(t) >= 3 and t not in _STOPWORDS}
			
 
				 
			
@@ -86,22 +82,12 @@ def _jaccard(a: set, b: set) -> float:
 
				 # Composite similarity
			
 
				 # ---------------------------------------------------------------------------
			
 
				 
			
 
				-
			
 
				-# Each signal has its own threshold. We accept a merge if ANY signal clears its
			
 
				-# threshold, which makes clustering robust when one signal happens to be weak
			
 
				-# (short headlines kill SequenceMatcher; single-word stories kill Jaccard;
			
 
				-# Ollama outages kill cosine similarity).
			
 
				 DEFAULT_TITLE_THRESHOLD = 0.87
			
 
				 DEFAULT_JACCARD_THRESHOLD = 0.55
			
 
				 
			
 
				 
			
 
				 def _signals(article: Dict[str, Any], cluster: Dict[str, Any]) -> dict:
			
 
				-    """Per-pair similarity signals (title, jaccard, embedding cosine).
			
 
				-
			
 
				-    Embedding cosine is only computed when both sides have a vector; we never
			
 
				-    block on a fresh Ollama request here — that's the caller's job, so this
			
 
				-    function stays pure and easy to test.
			
 
				-    """
			
 
				+    """Per-pair similarity signals (title, jaccard, embedding cosine)."""
			
 
				     a_title = str(article.get("title") or "")
			
 
				     c_title = str(cluster.get("headline") or "")
			
 
				 
			
@@ -120,11 +106,7 @@ def _signals(article: Dict[str, Any], cluster: Dict[str, Any]) -> dict:
 
				 
			
 
				 
			
 
				 def _is_match(signals: dict, *, embeddings_enabled: bool) -> tuple[bool, str, float]:
			
 
				-    """Decide whether two items should merge based on the strongest signal.
			
 
				-
			
 
				-    Returns (matched, signal_name, signal_value). The signal_name lets callers
			
 
				-    log *why* something merged, which is huge for debugging clustering quality.
			
 
				-    """
			
 
				+    """Decide whether two items should merge based on the strongest signal."""
			
 
				     cosine_threshold = NEWS_EMBEDDING_SIMILARITY_THRESHOLD
			
 
				     if embeddings_enabled and signals["cosine"] >= cosine_threshold:
			
 
				         return True, "cosine", signals["cosine"]
			
@@ -136,7 +118,65 @@ def _is_match(signals: dict, *, embeddings_enabled: bool) -> tuple[bool, str, fl
 
				 
			
 
				 
			
 
				 # ---------------------------------------------------------------------------
			
 
				-# Public API
			
 
				+# Embedding pre-computation (async internally)
			
 
				+# ---------------------------------------------------------------------------
			
 
				+
			
 
				+
			
 
				+async def _compute_embeddings_concurrently(
			
 
				+    articles: List[Dict[str, Any]],
			
 
				+) -> Dict[str, list[float] | None]:
			
 
				+    """Compute embeddings for unique article texts concurrently.
			
 
				+
			
 
				+    Returns a cache dict: text -> embedding vector or None.
			
 
				+    """
			
 
				+    unique_texts: list[str] = []
			
 
				+    seen: set[str] = set()
			
 
				+    for a in articles:
			
 
				+        text = _cluster_text(a)
			
 
				+        if text and text not in seen:
			
 
				+            seen.add(text)
			
 
				+            unique_texts.append(text)
			
 
				+
			
 
				+    emb_tasks = [ollama_embed(text) for text in unique_texts]
			
 
				+    emb_results = await asyncio.gather(*emb_tasks, return_exceptions=True)
			
 
				+
			
 
				+    cache: Dict[str, list[float] | None] = {}
			
 
				+    for text, result in zip(unique_texts, emb_results):
			
 
				+        if isinstance(result, list):
			
 
				+            cache[text] = result
			
 
				+        else:
			
 
				+            cache[text] = None
			
 
				+    return cache
			
 
				+
			
 
				+
			
 
				+def _compute_embeddings_sync(
			
 
				+    articles: List[Dict[str, Any]],
			
 
				+) -> Dict[str, list[float] | None]:
			
 
				+    """Synchronous wrapper that runs the async embedding computation.
			
 
				+
			
 
				+    Handles three cases:
			
 
				+    1. Already inside an async event loop (called from poller) -> schedule
			
 
				+       as a task and run it to completion on the running loop.
			
 
				+    2. No event loop at all (plain sync caller) -> use asyncio.run().
			
 
				+    """
			
 
				+    try:
			
 
				+        loop = asyncio.get_running_loop()
			
 
				+    except RuntimeError:
			
 
				+        # No running loop — safe to use asyncio.run()
			
 
				+        return asyncio.run(_compute_embeddings_concurrently(articles))
			
 
				+
			
 
				+    # We're inside a running event loop (e.g. the poller). Create a new loop
			
 
				+    # in a thread to avoid blocking.
			
 
				+    import concurrent.futures
			
 
				+    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
			
 
				+        future = pool.submit(
			
 
				+            asyncio.run, _compute_embeddings_concurrently(articles)
			
 
				+        )
			
 
				+        return future.result()
			
 
				+
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# Public API (sync — backward compatible with tests)
			
 
				 # ---------------------------------------------------------------------------
			
 
				 
			
 
				 
			
@@ -146,36 +186,23 @@ def dedup_and_cluster_articles(
 
				 ) -> Dict[str, List[Dict[str, Any]]]:
			
 
				     """Deduplicate raw articles into clusters keyed by topic.
			
 
				 
			
 
				-    v1.1 strategy: composite similarity.
			
 
				+    v1.2: embedding pre-computation is async/concurrent under the hood, but
			
 
				+    this public function remains synchronous for backward compatibility.
			
 
				+
			
 
				+    A pair merges if ANY signal clears its threshold:
			
 
				       * title fuzzy ratio
			
 
				-      * token Jaccard over headline+summary (cheap, surprisingly resilient
			
 
				-        when titles are reworded heavily across outlets)
			
 
				+      * token Jaccard over headline+summary
			
 
				       * Ollama embedding cosine when available
			
 
				-
			
 
				-    A pair merges if ANY signal clears its threshold. Falling back through
			
 
				-    multiple signals means a transient Ollama outage doesn't collapse the
			
 
				-    server back into title-only clustering, and a heavily-reworded headline
			
 
				-    can still merge via Jaccard or embeddings.
			
 
				-
			
 
				-    The ``similarity_threshold`` argument is kept for backward compatibility
			
 
				-    with the test suite. When provided, it overrides the title threshold.
			
 
				     """
			
 
				 
			
 
				     title_threshold = similarity_threshold if similarity_threshold is not None else DEFAULT_TITLE_THRESHOLD
			
 
				 
			
 
				-    by_topic: Dict[str, List[Dict[str, Any]]] = {}
			
 
				+    # Pre-compute embeddings concurrently (sync boundary handles async internally)
			
 
				     embedding_cache: Dict[str, list[float] | None] = {}
			
 
				+    if NEWS_EMBEDDINGS_ENABLED:
			
 
				+        embedding_cache = _compute_embeddings_sync(articles)
			
 
				 
			
 
				-    def _embedding_for_text(text: str) -> list[float] | None:
			
 
				-        if not NEWS_EMBEDDINGS_ENABLED or not text:
			
 
				-            return None
			
 
				-        if text in embedding_cache:
			
 
				-            return embedding_cache[text]
			
 
				-        emb = ollama_embed(text)
			
 
				-        # Cache None too so a single failure doesn't trigger repeated retries
			
 
				-        # within one ingestion cycle. The next refresh call clears this map.
			
 
				-        embedding_cache[text] = emb
			
 
				-        return emb
			
 
				+    by_topic: Dict[str, List[Dict[str, Any]]] = {}
			
 
				 
			
 
				     for a in articles:
			
 
				         title = a.get("title") or ""
			
@@ -183,10 +210,8 @@ def dedup_and_cluster_articles(
 
				             continue
			
 
				         topic = normalize_topic_from_title(title)
			
 
				         article_text = _cluster_text(a)
			
 
				-        article_embedding = _embedding_for_text(article_text)
			
 
				 
			
 
				-        # Attach embedding on the article dict so _signals() can read it
			
 
				-        # without re-computing.
			
 
				+        article_embedding = embedding_cache.get(article_text) if NEWS_EMBEDDINGS_ENABLED else None
			
 
				         a_with_emb = dict(a)
			
 
				         if article_embedding is not None:
			
 
				             a_with_emb["_embedding"] = article_embedding
			
@@ -199,8 +224,6 @@ def dedup_and_cluster_articles(
 
				         best_signal_value = 0.0
			
 
				         for idx, c in enumerate(clusters):
			
 
				             sigs = _signals(a_with_emb, c)
			
 
				-            # Use the title threshold the caller explicitly passed (test override)
			
 
				-            # but otherwise rely on the module defaults.
			
 
				             local_match = False
			
 
				             if NEWS_EMBEDDINGS_ENABLED and sigs["cosine"] >= NEWS_EMBEDDING_SIMILARITY_THRESHOLD:
			
 
				                 local_match = True
			
@@ -211,11 +234,6 @@ def dedup_and_cluster_articles(
 
				             elif sigs["jaccard"] >= DEFAULT_JACCARD_THRESHOLD:
			
 
				                 local_match = True
			
 
				                 signal_name, signal_value = "jaccard", sigs["jaccard"]
			
 
				-            # Consensus rule: when no single signal clears its strict threshold
			
 
				-            # but two of them are simultaneously "strong-ish", treat that as a
			
 
				-            # match. This catches reworded headlines whose embedding is just
			
 
				-            # below the strict cosine cutoff. Numbers are intentionally
			
 
				-            # conservative — both signals must be clearly above noise.
			
 
				             elif (
			
 
				                 NEWS_EMBEDDINGS_ENABLED
			
 
				                 and sigs["cosine"] >= 0.80
			
@@ -240,13 +258,10 @@ def dedup_and_cluster_articles(
 
				             if a.get("source") and a["source"] not in c["sources"]:
			
 
				                 c["sources"].append(a["source"])
			
 
				             c["last_updated"] = max(str(c.get("last_updated", "")), str(a.get("timestamp", "")))
			
 
				-            # Keep a tiny audit trail per cluster on which signal grew it last.
			
 
				-            # Not surfaced through tools — lives in the payload only for debug.
			
 
				             c.setdefault("_merge_signals", []).append(
			
 
				                 {"signal": best_signal_name, "value": round(best_signal_value, 3)}
			
 
				             )
			
 
				         else:
			
 
				-            # Stable cluster id: based on topic + normalized canonical title.
			
 
				             key = f"{topic}|{_normalize_title(title)}"
			
 
				             cid = hashlib.sha1(key.encode("utf-8")).hexdigest()
			
 
				             cluster_embedding = article_embedding if NEWS_EMBEDDINGS_ENABLED else None
			
@@ -269,8 +284,7 @@ def dedup_and_cluster_articles(
 
				                 }
			
 
				             )
			
 
				 
			
 
				-    # Strip the internal merge audit trail before returning so it does not
			
 
				-    # accidentally bloat the SQLite payload. Storage layer doesn't filter it.
			
 
				+    # Strip the internal merge audit trail before returning
			
 
				     for clusters in by_topic.values():
			
 
				         for c in clusters:
			
 
				             c.pop("_merge_signals", None)
			
--- a/news_mcp/dedup/embedding_support.py
+++ b/news_mcp/dedup/embedding_support.py
@@ -1,13 +1,23 @@
 
				 from __future__ import annotations
			
 
				 
			
 
				+import asyncio
			
 
				+import json
			
 
				 from dataclasses import dataclass
			
 
				 from datetime import datetime, timezone, timedelta
			
 
				-import json
			
 
				-import urllib.request
			
 
				 from math import sqrt
			
 
				 from typing import Any
			
 
				 
			
 
				-from news_mcp.config import NEWS_EMBEDDINGS_ENABLED, OLLAMA_BASE_URL, OLLAMA_EMBEDDING_MODEL
			
 
				+import httpx
			
 
				+
			
 
				+from news_mcp.config import (
			
 
				+    NEWS_EMBEDDINGS_ENABLED,
			
 
				+    OLLAMA_BASE_URL,
			
 
				+    OLLAMA_EMBEDDING_MODEL,
			
 
				+    _NEEDLE_OLLAMA_MAX_CONCURRENCY,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+_ollama_semaphore = asyncio.Semaphore(_NEEDLE_OLLAMA_MAX_CONCURRENCY)
			
 
				 
			
 
				 
			
 
				 @dataclass(frozen=True)
			
@@ -85,28 +95,30 @@ def cluster_is_candidate(
 
				     return True
			
 
				 
			
 
				 
			
 
				-def ollama_embed(text: str, timeout: float = 20.0) -> list[float] | None:
			
 
				-    """Best-effort Ollama embedding call; returns None on any failure.
			
 
				+async def ollama_embed(text: str, timeout: float = 20.0) -> list[float] | None:
			
 
				+    """Async Ollama embedding call with concurrency limiting.
			
 
				 
			
 
				-    Embeddings are intentionally optional. The caller should fall back to the
			
 
				-    heuristic path when this returns None.
			
 
				+    Returns None on any failure so the caller falls back to heuristic clustering.
			
 
				     """
			
 
				-
			
 
				     if not NEWS_EMBEDDINGS_ENABLED:
			
 
				         return None
			
 
				+
			
 
				     payload = json.dumps({"model": OLLAMA_EMBEDDING_MODEL, "prompt": text}).encode("utf-8")
			
 
				-    req = urllib.request.Request(
			
 
				-        f"{OLLAMA_BASE_URL.rstrip('/')}/api/embeddings",
			
 
				-        data=payload,
			
 
				-        headers={"Content-Type": "application/json"},
			
 
				-        method="POST",
			
 
				-    )
			
 
				-    try:
			
 
				-        with urllib.request.urlopen(req, timeout=timeout) as resp:
			
 
				-            data = json.loads(resp.read().decode("utf-8"))
			
 
				-            emb = data.get("embedding")
			
 
				-            if isinstance(emb, list) and emb:
			
 
				-                return [float(x) for x in emb]
			
 
				-    except Exception:
			
 
				-        return None
			
 
				+    url = f"{OLLAMA_BASE_URL.rstrip('/')}/api/embeddings"
			
 
				+
			
 
				+    async with _ollama_semaphore:
			
 
				+        try:
			
 
				+            async with httpx.AsyncClient(timeout=timeout) as client:
			
 
				+                resp = await client.post(
			
 
				+                    url,
			
 
				+                    content=payload,
			
 
				+                    headers={"Content-Type": "application/json"},
			
 
				+                )
			
 
				+                resp.raise_for_status()
			
 
				+                data = resp.json()
			
 
				+                emb = data.get("embedding")
			
 
				+                if isinstance(emb, list) and emb:
			
 
				+                    return [float(x) for x in emb]
			
 
				+        except Exception:
			
 
				+            return None
			
 
				     return None
			
--- a/news_mcp/jobs/poller.py
+++ b/news_mcp/jobs/poller.py
@@ -1,26 +1,114 @@
 
				 from __future__ import annotations
			
 
				 
			
 
				 import asyncio
			
 
				+import hashlib
			
 
				 import logging
			
 
				 from collections import defaultdict
			
 
				 from datetime import datetime, timezone
			
 
				 from typing import Any, Dict
			
 
				 
			
 
				-from news_mcp.config import DEFAULT_LOOKBACK_HOURS, DEFAULT_TOPICS, DB_PATH, NEWS_FEED_URL, NEWS_FEED_URLS
			
 
				-from news_mcp.dedup.cluster import dedup_and_cluster_articles
			
 
				-from news_mcp.enrichment.enrich import enrich_cluster
			
 
				-from news_mcp.enrichment.llm_enrich import classify_cluster_llm
			
 
				-from news_mcp.trends_resolution import resolve_entity_via_trends
			
 
				-from news_mcp.sources.news_feeds import fetch_news_articles
			
 
				-from news_mcp.storage.sqlite_store import SQLiteClusterStore
			
 
				-
			
 
				 from news_mcp.config import (
			
 
				+    DEFAULT_LOOKBACK_HOURS,
			
 
				+    DEFAULT_TOPICS,
			
 
				+    DB_PATH,
			
 
				     ENRICH_OTHER_TOPICS_ONLY,
			
 
				     ENRICHMENT_MAX_PER_REFRESH,
			
 
				+    NEWS_EXTRACT_PROVIDER,
			
 
				+    NEWS_FEED_URL,
			
 
				+    NEWS_FEED_URLS,
			
 
				     NEWS_PRUNE_INTERVAL_HOURS,
			
 
				     NEWS_PRUNING_ENABLED,
			
 
				     NEWS_RETENTION_DAYS,
			
 
				+    llm_concurrency,
			
 
				 )
			
 
				+from news_mcp.dedup.cluster import dedup_and_cluster_articles
			
 
				+from news_mcp.enrichment.enrich import enrich_cluster
			
 
				+from news_mcp.enrichment.llm_enrich import classify_cluster_llm
			
 
				+from news_mcp.sources.news_feeds import fetch_news_articles
			
 
				+from news_mcp.storage.sqlite_store import SQLiteClusterStore
			
 
				+from news_mcp.trends_resolution import resolve_entity_via_trends
			
 
				+
			
 
				+
			
 
				+async def _enrich_single_cluster(
			
 
				+    c: dict,
			
 
				+    topic: str,
			
 
				+    llm_enabled: bool,
			
 
				+    semaphore: asyncio.Semaphore,
			
 
				+    store: SQLiteClusterStore,
			
 
				+    logger: logging.Logger,
			
 
				+) -> dict:
			
 
				+    """Enrich one cluster: heuristic + optional LLM extraction, concurrency-limited."""
			
 
				+    c2 = enrich_cluster(c)
			
 
				+    c2.setdefault("topic", topic)
			
 
				+
			
 
				+    cluster_id = c2.get("cluster_id")
			
 
				+    if llm_enabled and cluster_id:
			
 
				+        # Cache: if we already have entities/sentiment for this cluster, skip LLM call.
			
 
				+        existing = store.get_cluster_by_id(cluster_id)
			
 
				+        if existing and existing.get("entities"):
			
 
				+            c2 = dict(c2)
			
 
				+            c2["entities"] = existing.get("entities", [])
			
 
				+
			
 
				+            existing_resolutions = existing.get("entityResolutions", None)
			
 
				+            if isinstance(existing_resolutions, list) and existing_resolutions:
			
 
				+                c2["entityResolutions"] = existing_resolutions
			
 
				+            else:
			
 
				+                c2["entityResolutions"] = [resolve_entity_via_trends(e) for e in c2["entities"]]
			
 
				+
			
 
				+            if existing.get("sentiment"):
			
 
				+                c2["sentiment"] = existing.get("sentiment")
			
 
				+            if existing.get("sentimentScore") is not None:
			
 
				+                c2["sentimentScore"] = existing.get("sentimentScore")
			
 
				+            if existing.get("keywords"):
			
 
				+                c2["keywords"] = existing.get("keywords")
			
 
				+            if existing.get("topic"):
			
 
				+                c2["topic"] = existing.get("topic")
			
 
				+        else:
			
 
				+            # Acquire semaphore before making outbound LLM call
			
 
				+            async with semaphore:
			
 
				+                try:
			
 
				+                    c2 = await classify_cluster_llm(c2)
			
 
				+                except Exception:
			
 
				+                    logger.exception(
			
 
				+                        "LLM enrichment failed for cluster %s (topic %s)",
			
 
				+                        c2.get("cluster_id"), topic,
			
 
				+                    )
			
 
				+                    c2["enrichment_failed_at"] = datetime.now(timezone.utc).isoformat()
			
 
				+
			
 
				+    return c2
			
 
				+
			
 
				+
			
 
				+async def _enrich_topic_clusters(
			
 
				+    clusters: list[dict],
			
 
				+    topic: str,
			
 
				+    semaphore: asyncio.Semaphore,
			
 
				+    store: SQLiteClusterStore,
			
 
				+    logger: logging.Logger,
			
 
				+    enrich_limit: int,
			
 
				+) -> list[dict]:
			
 
				+    """Enrich all clusters for a single topic concurrently."""
			
 
				+    llm_enabled = (not ENRICH_OTHER_TOPICS_ONLY) or (topic == "other")
			
 
				+
			
 
				+    # Persist the raw clusters first so a slow enrichment pass does not
			
 
				+    # leave the first bootstrap run with nothing stored.
			
 
				+    store.upsert_clusters(clusters, topic=topic)
			
 
				+    logger.info("refresh stored raw topic=%s clusters=%s", topic, len(clusters))
			
 
				+
			
 
				+    targets = clusters[:enrich_limit]
			
 
				+    tasks = [
			
 
				+        _enrich_single_cluster(c, topic, llm_enabled, semaphore, store, logger)
			
 
				+        for c in targets
			
 
				+    ]
			
 
				+    enriched = await asyncio.gather(*tasks, return_exceptions=False)
			
 
				+
			
 
				+    # Any clusters beyond enrich_limit still need importance enrichment
			
 
				+    for c in clusters[enrich_limit:]:
			
 
				+        c2 = enrich_cluster(c)
			
 
				+        c2.setdefault("topic", topic)
			
 
				+        enriched.append(c2)
			
 
				+
			
 
				+    logger.info("refresh enriched topic=%s clusters=%s", topic, len(enriched))
			
 
				+    return enriched
			
 
				 
			
 
				 
			
 
				 async def refresh_clusters(topic: str | None = None, limit: int = 80) -> None:
			
@@ -28,7 +116,9 @@ async def refresh_clusters(topic: str | None = None, limit: int = 80) -> None:
 
				     store = SQLiteClusterStore(DB_PATH)
			
 
				 
			
 
				     logger.info("refresh start topic=%s limit=%s", topic, limit)
			
 
				-    articles = await asyncio.to_thread(fetch_news_articles, limit)
			
 
				+
			
 
				+    # fetch_news_articles is now fully async (concurrent RSS fetching)
			
 
				+    articles = await fetch_news_articles(limit)
			
 
				     logger.info("refresh fetched articles=%s", len(articles))
			
 
				 
			
 
				     # Drop legacy aggregate feed-state rows so the dashboard only reflects
			
@@ -37,7 +127,6 @@ async def refresh_clusters(topic: str | None = None, limit: int = 80) -> None:
 
				         conn.execute("DELETE FROM feed_state WHERE feed_key LIKE 'newsfeeds:%'")
			
 
				 
			
 
				     # Track feed freshness per RSS URL so unchanged feeds can be skipped.
			
 
				-    import hashlib
			
 
				     per_feed: dict[str, list[dict[str, Any]]] = defaultdict(list)
			
 
				     for article in articles:
			
 
				         feed_url = str(article.get("feed_url") or NEWS_FEED_URL).strip() or NEWS_FEED_URL
			
@@ -75,87 +164,51 @@ async def refresh_clusters(topic: str | None = None, limit: int = 80) -> None:
 
				 
			
 
				     articles = changed_articles
			
 
				     logger.info("refresh clustering start articles=%s topic=%s", len(articles), topic)
			
 
				-    clustered_by_topic = dedup_and_cluster_articles(articles)
			
 
				+    # Clustering is sync but may do concurrent embedding fetches internally.
			
 
				+    # Run off-thread so the event loop stays responsive for MCP tool calls.
			
 
				+    clustered_by_topic = await asyncio.to_thread(dedup_and_cluster_articles, articles)
			
 
				     logger.info("refresh clustered topics=%s", list(clustered_by_topic.keys()))
			
 
				 
			
 
				+    # Build LLM concurrency semaphore from the extract provider's config.
			
 
				+    max_llm_concurrent = llm_concurrency(NEWS_EXTRACT_PROVIDER)
			
 
				+    llm_semaphore = asyncio.Semaphore(max_llm_concurrent)
			
 
				+    logger.info("refresh llm semaphore limit=%s provider=%s", max_llm_concurrent, NEWS_EXTRACT_PROVIDER)
			
 
				+
			
 
				+    # Enrich each topic's clusters concurrently.
			
 
				+    topic_tasks = []
			
 
				     for t, clusters in clustered_by_topic.items():
			
 
				         if topic and t != topic:
			
 
				             continue
			
 
				-        logger.info("refresh topic phase start topic=%s clusters=%s", t, len(clusters))
			
 
				-        enriched = []
			
 
				 
			
 
				         # Determine how many clusters to LLM-enrich.
			
 
				         # ENRICHMENT_MAX_PER_REFRESH=0 means enrich every cluster (no cap).
			
 
				         enrich_limit = ENRICHMENT_MAX_PER_REFRESH or len(clusters)
			
 
				 
			
 
				-        # Track whether the LLM pipeline is available for this topic.
			
 
				-        _llm_enabled_for_topic = (
			
 
				-            (not ENRICH_OTHER_TOPICS_ONLY) or (t == "other")
			
 
				+        topic_tasks.append(
			
 
				+            _enrich_topic_clusters(
			
 
				+                clusters=clusters,
			
 
				+                topic=t,
			
 
				+                semaphore=llm_semaphore,
			
 
				+                store=store,
			
 
				+                logger=logger,
			
 
				+                enrich_limit=enrich_limit,
			
 
				+            )
			
 
				         )
			
 
				 
			
 
				-        # Persist the raw clusters first so a slow enrichment pass does not
			
 
				-        # leave the first bootstrap run with nothing stored.
			
 
				-        store.upsert_clusters(clusters, topic=t)
			
 
				-        logger.info("refresh stored raw topic=%s clusters=%s", t, len(clusters))
			
 
				-
			
 
				-        for idx, c in enumerate(clusters[:enrich_limit]):
			
 
				-            c2 = enrich_cluster(c)
			
 
				-            # Seed the heuristic topic on the payload so classify_cluster_llm
			
 
				-            # has a sane fallback if the LLM omits or hallucinates one.
			
 
				-            c2.setdefault("topic", t)
			
 
				-            logger.info("refresh enrich cluster=%s topic=%s idx=%s/%s", c2.get("cluster_id"), t, idx + 1, enrich_limit)
			
 
				-
			
 
				-            if _llm_enabled_for_topic:
			
 
				-                # Cache: if we already have entities/sentiment for this cluster, skip LLM call.
			
 
				-                existing = store.get_cluster_by_id(c2.get("cluster_id"))
			
 
				-                if existing and existing.get("entities"):
			
 
				-                    c2 = dict(c2)
			
 
				-                    # Keep existing enriched fields.
			
 
				-                    c2["entities"] = existing.get("entities", [])
			
 
				-
			
 
				-                    # IMPORTANT: entityResolutions must stay consistent with entities.
			
 
				-                    # Older rows may have entities but missing/malformed resolutions.
			
 
				-                    existing_resolutions = existing.get("entityResolutions", None)
			
 
				-                    if isinstance(existing_resolutions, list) and existing_resolutions:
			
 
				-                        c2["entityResolutions"] = existing_resolutions
			
 
				-                    else:
			
 
				-                        # Recompute resolutions deterministically from the stored entities.
			
 
				-                        c2["entityResolutions"] = [resolve_entity_via_trends(e) for e in c2["entities"]]
			
 
				-
			
 
				-                    if existing.get("sentiment"):
			
 
				-                        c2["sentiment"] = existing.get("sentiment")
			
 
				-                    if existing.get("sentimentScore") is not None:
			
 
				-                        c2["sentimentScore"] = existing.get("sentimentScore")
			
 
				-                    if existing.get("keywords"):
			
 
				-                        c2["keywords"] = existing.get("keywords")
			
 
				-                    # Preserve a previously-classified topic so we don't drift back
			
 
				-                    # to the heuristic on cache hits.
			
 
				-                    if existing.get("topic"):
			
 
				-                        c2["topic"] = existing.get("topic")
			
 
				-                else:
			
 
				-                    try:
			
 
				-                        c2 = await classify_cluster_llm(c2)
			
 
				-                    except Exception:
			
 
				-                        logger.exception("LLM enrichment failed for cluster %s (topic %s)", c2.get("cluster_id"), t)
			
 
				-                        # Mark so we can retry on next refresh.
			
 
				-                        c2["enrichment_failed_at"] = datetime.now(timezone.utc).isoformat()
			
 
				-
			
 
				-            enriched.append(c2)
			
 
				-
			
 
				-        # Persist clusters under their *post-enrichment* topic so the SQL row
			
 
				-        # column matches what the LLM (or the validated heuristic fallback)
			
 
				-        # actually decided. Previously, every cluster from this bucket was
			
 
				-        # forced into the heuristic topic `t`, which caused a ~97% mismatch
			
 
				-        # between row-column topic and payload topic.
			
 
				+    # Run all topic enrichment phases concurrently
			
 
				+    topic_results = await asyncio.gather(*topic_tasks, return_exceptions=False)
			
 
				+
			
 
				+    # Persist enriched clusters grouped by their final topic
			
 
				+    for enriched in topic_results:
			
 
				         by_final_topic: Dict[str, list] = {}
			
 
				         for c2 in enriched:
			
 
				-            final_topic = str(c2.get("topic") or t or "other").strip().lower()
			
 
				+            final_topic = str(c2.get("topic") or "other").strip().lower()
			
 
				             if final_topic not in {x.lower() for x in DEFAULT_TOPICS}:
			
 
				                 final_topic = "other"
			
 
				             by_final_topic.setdefault(final_topic, []).append(c2)
			
 
				         for final_topic, group in by_final_topic.items():
			
 
				             store.upsert_clusters(group, topic=final_topic)
			
 
				-            logger.info("refresh stored topic=%s clusters=%s (heuristic_topic=%s)", final_topic, len(group), t)
			
 
				+            logger.info("refresh stored topic=%s clusters=%s", final_topic, len(group))
			
 
				 
			
 
				     prune_result = store.prune_if_due(
			
 
				         pruning_enabled=NEWS_PRUNING_ENABLED,
			
--- a/news_mcp/sources/news_feeds.py
+++ b/news_mcp/sources/news_feeds.py
@@ -1,5 +1,6 @@
 
				 from __future__ import annotations
			
 
				 
			
 
				+import asyncio
			
 
				 import hashlib
			
 
				 import logging
			
 
				 import re
			
@@ -8,14 +9,20 @@ from urllib.error import URLError, HTTPError
 
				 from urllib.request import Request, urlopen
			
 
				 
			
 
				 import feedparser
			
 
				+import httpx
			
 
				 
			
 
				-from news_mcp.config import NEWS_FEED_ITEMS_PER_POLL, NEWS_FEED_URL, NEWS_FEED_URLS
			
 
				+from news_mcp.config import (
			
 
				+    NEWS_FEED_ITEMS_PER_POLL,
			
 
				+    NEWS_FEED_URL,
			
 
				+    NEWS_FEED_URLS,
			
 
				+    _NEEDLE_RSS_MAX_CONCURRENCY,
			
 
				+)
			
 
				 
			
 
				 
			
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
 
				 
			
 
				-FEED_FETCH_TIMEOUT_SECONDS = 15
			
 
				+FEED_FETCH_TIMEOUT_SECONDS = 20
			
 
				 
			
 
				 
			
 
				 def _canonical_url(url: str) -> str:
			
@@ -39,64 +46,97 @@ def _feed_urls() -> List[str]:
 
				     return urls
			
 
				 
			
 
				 
			
 
				-def _fetch_feed(feed_url: str):
			
 
				-    req = Request(feed_url, headers={"User-Agent": "news-mcp/1.0"})
			
 
				-    with urlopen(req, timeout=FEED_FETCH_TIMEOUT_SECONDS) as resp:
			
 
				-        return feedparser.parse(resp.read())
			
 
				+def _parse_feed_from_bytes(data: bytes, feed_url: str):
			
 
				+    """Parse feed from raw bytes (sync, but fast — just XML parsing)."""
			
 
				+    return feedparser.parse(data)
			
 
				 
			
 
				 
			
 
				-def fetch_news_articles(limit: int = NEWS_FEED_ITEMS_PER_POLL) -> List[Dict[str, Any]]:
			
 
				-    feed_urls = _feed_urls()
			
 
				+async def _fetch_feed_async(
			
 
				+    client: httpx.AsyncClient,
			
 
				+    semaphore: asyncio.Semaphore,
			
 
				+    feed_url: str,
			
 
				+) -> tuple[str, bytes | None]:
			
 
				+    """Fetch a single RSS feed concurrently. Returns (feed_url, raw_bytes)."""
			
 
				+    async with semaphore:
			
 
				+        try:
			
 
				+            resp = await client.get(feed_url, follow_redirects=True)
			
 
				+            resp.raise_for_status()
			
 
				+            return (feed_url, resp.content)
			
 
				+        except (httpx.HTTPStatusError, httpx.TimeoutException, httpx.ConnectError, OSError) as exc:
			
 
				+            logger.exception("news feed fetch failed feed_url=%s error=%s", feed_url, exc)
			
 
				+            return (feed_url, None)
			
 
				+        except Exception as exc:
			
 
				+            logger.exception("news feed fetch unexpected error feed_url=%s error=%s", feed_url, exc)
			
 
				+            return (feed_url, None)
			
 
				+
			
 
				+
			
 
				+def _extract_articles_from_feed(
			
 
				+    feed_url: str,
			
 
				+    parsed,
			
 
				+    per_feed_limit: int,
			
 
				+) -> List[Dict[str, Any]]:
			
 
				+    """Extract article dicts from a parsed feedparser object (sync)."""
			
 
				     articles: List[Dict[str, Any]] = []
			
 
				+    feed_name = getattr(parsed.feed, "title", None) or feed_url
			
 
				+    parsed_entries = len(getattr(parsed, "entries", []) or [])
			
 
				+    logger.info(
			
 
				+        "news feed parsed feed_url=%s feed_name=%s entries=%s",
			
 
				+        feed_url, feed_name, parsed_entries,
			
 
				+    )
			
 
				+
			
 
				+    kept = 0
			
 
				+    for entry in parsed.entries[:per_feed_limit]:
			
 
				+        title = str(getattr(entry, "title", "")).strip()
			
 
				+        url = _canonical_url(str(getattr(entry, "link", "")).strip())
			
 
				+        timestamp = str(getattr(entry, "published", "")) or str(getattr(entry, "updated", ""))
			
 
				+        summary = _strip_html(
			
 
				+            str(getattr(entry, "summary", "")) or str(getattr(entry, "description", ""))
			
 
				+        )
			
 
				+        if not title or not url:
			
 
				+            continue
			
 
				+        articles.append({
			
 
				+            "title": title,
			
 
				+            "url": url,
			
 
				+            "source": str(feed_name),
			
 
				+            "feed_url": feed_url,
			
 
				+            "timestamp": timestamp,
			
 
				+            "summary": summary,
			
 
				+        })
			
 
				+        kept += 1
			
 
				+
			
 
				+    logger.info("news feed completed feed_url=%s kept=%s", feed_url, kept)
			
 
				+    return articles
			
 
				 
			
 
				-    logger.info("news ingestion start feeds=%s limit=%s timeout_s=%s", len(feed_urls), limit, FEED_FETCH_TIMEOUT_SECONDS)
			
 
				 
			
 
				-    # Apply the configured cap per feed.
			
 
				+async def fetch_news_articles(limit: int = NEWS_FEED_ITEMS_PER_POLL) -> List[Dict[str, Any]]:
			
 
				+    """Fetch all RSS feeds concurrently, parse, and return articles."""
			
 
				+    feed_urls = _feed_urls()
			
 
				     per_feed_limit = max(1, int(limit))
			
 
				 
			
 
				-    for feed_url in feed_urls:
			
 
				-        try:
			
 
				-            feed = _fetch_feed(feed_url)
			
 
				-            feed_name = getattr(feed.feed, "title", None) or feed_url
			
 
				-            parsed_entries = len(getattr(feed, "entries", []) or [])
			
 
				-            logger.info("news feed parsed feed_url=%s feed_name=%s entries=%s", feed_url, feed_name, parsed_entries)
			
 
				-        except (HTTPError, URLError, TimeoutError, OSError) as exc:
			
 
				-            logger.exception("news feed fetch failed feed_url=%s error=%s", feed_url, exc)
			
 
				-            continue
			
 
				-        except Exception as exc:
			
 
				-            logger.exception("news feed parse failed feed_url=%s error=%s", feed_url, exc)
			
 
				-            continue
			
 
				+    logger.info(
			
 
				+        "news ingestion start feeds=%s limit=%s timeout_s=%s",
			
 
				+        len(feed_urls), per_feed_limit, FEED_FETCH_TIMEOUT_SECONDS,
			
 
				+    )
			
 
				 
			
 
				-        kept_before = len(articles)
			
 
				-        for entry in feed.entries[:per_feed_limit]:
			
 
				-            title = str(getattr(entry, "title", "")).strip()
			
 
				-            url = _canonical_url(str(getattr(entry, "link", "")).strip())
			
 
				-            timestamp = str(getattr(entry, "published", "")) or str(getattr(entry, "updated", ""))
			
 
				-            summary = _strip_html(str(getattr(entry, "summary", "")) or str(getattr(entry, "description", "")))
			
 
				-
			
 
				-            if not title or not url:
			
 
				-                continue
			
 
				-
			
 
				-            articles.append(
			
 
				-                {
			
 
				-                    "title": title,
			
 
				-                    "url": url,
			
 
				-                    "source": str(feed_name),
			
 
				-                    "feed_url": feed_url,
			
 
				-                    "timestamp": timestamp,
			
 
				-                    "summary": summary,
			
 
				-                }
			
 
				-            )
			
 
				-
			
 
				-            if len(articles) - kept_before >= per_feed_limit:
			
 
				-                logger.info("news ingestion per-feed limit reached feed_url=%s kept=%s", feed_url, len(articles) - kept_before)
			
 
				-                break
			
 
				-
			
 
				-        logger.info(
			
 
				-            "news feed completed feed_url=%s kept=%s",
			
 
				-            feed_url,
			
 
				-            len(articles) - kept_before,
			
 
				-        )
			
 
				+    semaphore = asyncio.Semaphore(_NEEDLE_RSS_MAX_CONCURRENCY)
			
 
				+
			
 
				+    async with httpx.AsyncClient(
			
 
				+        timeout=httpx.Timeout(FEED_FETCH_TIMEOUT_SECONDS),
			
 
				+        headers={"User-Agent": "news-mcp/1.0"},
			
 
				+    ) as client:
			
 
				+        tasks = [
			
 
				+            _fetch_feed_async(client, semaphore, url)
			
 
				+            for url in feed_urls
			
 
				+        ]
			
 
				+        results = await asyncio.gather(*tasks, return_exceptions=False)
			
 
				+
			
 
				+    articles: List[Dict[str, Any]] = []
			
 
				+    for feed_url, raw in results:
			
 
				+        if raw is None:
			
 
				+            continue
			
 
				+        # feedparser.parse is CPU-light but sync — parse inline (fast enough)
			
 
				+        parsed = feedparser.parse(raw)
			
 
				+        articles.extend(_extract_articles_from_feed(feed_url, parsed, per_feed_limit))
			
 
				 
			
 
				     logger.info("news ingestion complete total_kept=%s", len(articles))
			
 
				     return articles
			
@@ -116,5 +156,6 @@ def normalize_topic_from_title(title: str) -> str:
 
				 
			
 
				 
			
 
				 def cluster_id_for_title(topic: str, title: str) -> str:
			
 
				+    import hashlib
			
 
				     key = f"{topic}|{title.strip().lower()}"
			
 
				     return hashlib.sha1(key.encode("utf-8")).hexdigest()
			
--- a/test_news_mcp.py
+++ b/test_news_mcp.py
@@ -386,7 +386,11 @@ def test_refresh_skips_reprocessing_when_feed_hash_is_unchanged(monkeypatch):
 
				             self.meta[key] = value
			
 
				 
			
 
				     monkeypatch.setattr(poller, "SQLiteClusterStore", DummyStore)
			
 
				-    monkeypatch.setattr(poller, "fetch_news_articles", lambda limit: [{"title": "Bitcoin rallies", "url": "https://example.com/a", "timestamp": "Wed, 01 Apr 2026 12:00:00 GMT"}])
			
 
				+
			
 
				+    async def _mock_fetch(limit):
			
 
				+        calls["fetch"] += 1
			
 
				+        return [{"title": "Bitcoin rallies", "url": "https://example.com/a", "timestamp": "Wed, 01 Apr 2026 12:00:00 GMT"}]
			
 
				+    monkeypatch.setattr(poller, "fetch_news_articles", _mock_fetch)
			
 
				     monkeypatch.setattr(poller.asyncio, "to_thread", fake_to_thread)
			
 
				     monkeypatch.setattr(poller, "dedup_and_cluster_articles", fake_cluster)
			
 
				     monkeypatch.setattr(poller, "enrich_cluster", fake_enrich)
			
@@ -627,10 +631,8 @@ def test_poller_persists_clusters_under_post_enrichment_topic(monkeypatch):
 
				         def set_meta(self, key, value):
			
 
				             pass
			
 
				 
			
 
				-    async def fake_to_thread(fn, limit):
			
 
				-        return [
			
 
				-            {"title": "SEC fines firm", "url": "https://example.com/a", "source": "S", "timestamp": "Wed, 01 Apr 2026 12:00:00 GMT", "summary": "..."},
			
 
				-        ]
			
 
				+        def set_feed_state(self, feed_key, last_hash, item_count):
			
 
				+            pass
			
 
				 
			
 
				     def fake_cluster(articles):
			
 
				         # Heuristic put it in "other" (no crypto/macro/regulation/ai keywords
			
@@ -668,8 +670,13 @@ def test_poller_persists_clusters_under_post_enrichment_topic(monkeypatch):
 
				         return out
			
 
				 
			
 
				     monkeypatch.setattr(poller, "SQLiteClusterStore", DummyStore)
			
 
				-    monkeypatch.setattr(poller, "fetch_news_articles", lambda limit: [])
			
 
				-    monkeypatch.setattr(poller.asyncio, "to_thread", fake_to_thread)
			
 
				+
			
 
				+    async def _mock_fetch2(limit):
			
 
				+        return [
			
 
				+            {"title": "SEC fines firm", "url": "https://example.com/a", "source": "S",
			
 
				+             "timestamp": "Wed, 01 Apr 2026 12:00:00 GMT", "summary": "..."},
			
 
				+        ]
			
 
				+    monkeypatch.setattr(poller, "fetch_news_articles", _mock_fetch2)
			
 
				     monkeypatch.setattr(poller, "dedup_and_cluster_articles", fake_cluster)
			
 
				     monkeypatch.setattr(poller, "enrich_cluster", fake_enrich)
			
 
				     monkeypatch.setattr(poller, "classify_cluster_llm", fake_classify)
			
@@ -677,7 +684,15 @@ def test_poller_persists_clusters_under_post_enrichment_topic(monkeypatch):
 
				     asyncio.run(poller.refresh_clusters(topic=None, limit=10))
			
 
				 
			
 
				     assert captured["upserts"], "Expected at least one upsert call"
			
 
				-    upsert = captured["upserts"][0]
			
 
				+    # The poller first stores raw clusters (topic=heuristic), then enriched
			
 
				+    # clusters (topic=post-LLM).  The enriched upsert is the one whose row_topic
			
 
				+    # reflects the LLM classification.
			
 
				+    enriched_upserts = [u for u in captured["upserts"] if u["row_topic"] == "regulation"]
			
 
				+    assert enriched_upserts, (
			
 
				+        f"Expected at least one upsert with row_topic='regulation', "
			
 
				+        f"got topics: {[u['row_topic'] for u in captured['upserts']]}"
			
 
				+    )
			
 
				+    upsert = enriched_upserts[0]
			
 
				     assert upsert["row_topic"] == "regulation", (
			
 
				         f"Expected SQL row topic to follow the LLM's classification 'regulation', got {upsert['row_topic']!r}"
			
 
				     )