Browse Source

poller fix

Lukas Goldschmidt 2 tuần trước cách đây
mục cha
commit
ce9d05d1a6
1 tập tin đã thay đổi với 10 bổ sung1 xóa
  1. 10 1
      news_mcp/jobs/poller.py

+ 10 - 1
news_mcp/jobs/poller.py

@@ -44,6 +44,7 @@ async def refresh_clusters(topic: str | None = None, limit: int = 80) -> None:
         per_feed[feed_url].append(article)
 
     changed_articles: list[dict[str, Any]] = []
+    changed_feed_urls: list[str] = []
     for feed_url, feed_articles in per_feed.items():
         material = "\n".join(
             f"{a.get('title','')}|{a.get('url','')}|{a.get('timestamp','')}"
@@ -56,7 +57,7 @@ async def refresh_clusters(topic: str | None = None, limit: int = 80) -> None:
             logger.info("refresh unchanged feed_url=%s count=%s topic=%s", feed_url, len(feed_articles), topic)
         else:
             logger.info("refresh changed feed_url=%s count=%s topic=%s", feed_url, len(feed_articles), topic)
-            store.set_feed_state(feed_key, last_hash, len(feed_articles))
+            changed_feed_urls.append(feed_url)
             changed_articles.extend(feed_articles)
 
     if not changed_articles:
@@ -151,5 +152,13 @@ async def refresh_clusters(topic: str | None = None, limit: int = 80) -> None:
         retention_days=NEWS_RETENTION_DAYS,
         interval_hours=NEWS_PRUNE_INTERVAL_HOURS,
     )
+    for feed_url in changed_feed_urls:
+        feed_articles = per_feed[feed_url]
+        material = "\n".join(
+            f"{a.get('title','')}|{a.get('url','')}|{a.get('timestamp','')}"
+            for a in feed_articles
+        )
+        last_hash = hashlib.sha1(material.encode("utf-8")).hexdigest()
+        store.set_feed_state(feed_url, last_hash, len(feed_articles))
     store.set_meta("last_refresh_at", datetime.now(timezone.utc).isoformat())
     logger.info("refresh prune_result=%s", prune_result)