2 weeks ago · 8a2a0c6279
--- a/news_mcp/jobs/poller.py
+++ b/news_mcp/jobs/poller.py
@@ -46,6 +46,7 @@ async def refresh_clusters(topic: str | None = None, limit: int = 80) -> None:
 
															     changed_articles: list[dict[str, Any]] = []
														
 
															     changed_feed_urls: list[str] = []
														
 
															     for feed_url, feed_articles in per_feed.items():
														
 
															+        logger.info("refresh feed batch start feed_url=%s count=%s", feed_url, len(feed_articles))
														
 
															         material = "\n".join(
														
 
															             f"{a.get('title','')}|{a.get('url','')}|{a.get('timestamp','')}"
														
 
															             for a in feed_articles
														
@@ -59,6 +60,7 @@ async def refresh_clusters(topic: str | None = None, limit: int = 80) -> None:
 
															             logger.info("refresh changed feed_url=%s count=%s topic=%s", feed_url, len(feed_articles), topic)
														
 
															             changed_feed_urls.append(feed_url)
														
 
															             changed_articles.extend(feed_articles)
														
 
															+        logger.info("refresh feed batch complete feed_url=%s changed_total=%s", feed_url, len(changed_articles))
														
 
															     if not changed_articles:
														
 
															         logger.info("refresh unchanged all feeds topic=%s", topic)
														
@@ -72,12 +74,14 @@ async def refresh_clusters(topic: str | None = None, limit: int = 80) -> None:
 
															         return
														
 
															     articles = changed_articles
														
 
															+    logger.info("refresh clustering start articles=%s topic=%s", len(articles), topic)
														
 
															     clustered_by_topic = dedup_and_cluster_articles(articles)
														
 
															     logger.info("refresh clustered topics=%s", list(clustered_by_topic.keys()))
														
 
															     for t, clusters in clustered_by_topic.items():
														
 
															         if topic and t != topic:
														
 
															             continue
														
 
															+        logger.info("refresh topic phase start topic=%s clusters=%s", t, len(clusters))
														
 
															         enriched = []
														
 
															         # Determine how many clusters to LLM-enrich.
														
@@ -89,11 +93,17 @@ async def refresh_clusters(topic: str | None = None, limit: int = 80) -> None:
 
															             (not ENRICH_OTHER_TOPICS_ONLY) or (t == "other")
														
 
															         )
														
 
															+        # Persist the raw clusters first so a slow enrichment pass does not
														
 
															+        # leave the first bootstrap run with nothing stored.
														
 
															+        store.upsert_clusters(clusters, topic=t)
														
 
															+        logger.info("refresh stored raw topic=%s clusters=%s", t, len(clusters))
														
 
															+
														
 
															         for idx, c in enumerate(clusters[:enrich_limit]):
														
 
															             c2 = enrich_cluster(c)
														
 
															             # Seed the heuristic topic on the payload so classify_cluster_llm
														
 
															             # has a sane fallback if the LLM omits or hallucinates one.
														
 
															             c2.setdefault("topic", t)
														
 
															+            logger.info("refresh enrich cluster=%s topic=%s idx=%s/%s", c2.get("cluster_id"), t, idx + 1, enrich_limit)
														
 
															             if _llm_enabled_for_topic:
														
 
															                 # Cache: if we already have entities/sentiment for this cluster, skip LLM call.