Pārlūkot izejas kodu

fix: track disabled feeds, reset their item count to 0

Lukas Goldschmidt 1 nedēļu atpakaļ
vecāks
revīzija
7f5176281a
1 mainītis faili ar 19 papildinājumiem un 7 dzēšanām
  1. 19 7
      news_mcp/jobs/poller.py

+ 19 - 7
news_mcp/jobs/poller.py

@@ -103,14 +103,20 @@ class ClusterPoller:
         """Run one full polling cycle. Returns statistics."""
         self.stats = PollStats(started_at=datetime.now(timezone.utc).isoformat())
 
-        # 1. Load enabled feed URLs
+        # 1. Load configured + enabled feed URLs
         configured_urls = self._load_feed_urls()
         enabled_urls = self.store.get_enabled_feed_urls(configured_urls)
-        self.logger.info("poll start: enabled_feeds=%d configured=%d", len(enabled_urls), len(configured_urls))
+        disabled_urls = [u for u in configured_urls if u not in enabled_urls]
+        self.logger.info("poll start: enabled_feeds=%d disabled=%d configured=%d",
+                         len(enabled_urls), len(disabled_urls), len(configured_urls))
 
-        # 2. Fetch articles from all enabled feeds, per-feed dedup
+        # 2. Fetch articles from enabled feeds only, per-feed dedup
         feed_map, feed_stats = await self._fetch_feeds(enabled_urls)
 
+        # Add disabled feeds to stats with zero counts
+        for du in disabled_urls:
+            feed_stats.append(FeedStats(feed_url=du, fetched=0, duplicate=0, stale=0, ingested=0))
+
         # Flatten all fresh articles (stats already tracked per-feed in feed_stats)
         all_fresh = [a for articles in feed_map.values() for a in articles]
 
@@ -118,7 +124,7 @@ class ClusterPoller:
             self.logger.info("poll: no fresh articles from any feed")
             self.stats.feeds = feed_stats
             self._save_feed_stats(feed_stats)
-            self._prune_and_finalize(enabled_urls, feed_map)
+            self._prune_and_finalize(enabled_urls, disabled_urls, feed_map)
             return self.stats
 
         # 3. Retention filter
@@ -128,7 +134,7 @@ class ClusterPoller:
             self.logger.info("poll: all %d fresh articles dropped by retention", len(all_fresh))
             self.stats.feeds = feed_stats
             self._save_feed_stats(feed_stats)
-            self._prune_and_finalize(enabled_urls, feed_map)
+            self._prune_and_finalize(enabled_urls, disabled_urls, feed_map)
             return self.stats
 
         # 4. Pre-seed existing clusters for cross-cycle merging
@@ -146,7 +152,7 @@ class ClusterPoller:
         # 8. Persist feed stats + prune
         self.stats.feeds = feed_stats
         self._save_feed_stats(feed_stats)
-        self._prune_and_finalize(enabled_urls, feed_map)
+        self._prune_and_finalize(enabled_urls, disabled_urls, feed_map)
 
         self.logger.info(
             "poll complete: clusters=%d newly_enriched=%d already_enriched=%d failed=%d",
@@ -461,9 +467,11 @@ class ClusterPoller:
     def _prune_and_finalize(
         self,
         enabled_urls: list[str],
+        disabled_urls: list[str],
         feed_map: dict[str, list[dict]],
     ) -> None:
-        """Run pruning and update feed_state hashes + timestamps."""
+        """Run pruning and update feed_state hashes + timestamps.
+        Disabled feeds get their last_item_count reset to 0."""
         prune_result = self.store.prune_if_due(
             pruning_enabled=NEWS_PRUNING_ENABLED,
             retention_days=NEWS_RETENTION_DAYS,
@@ -479,6 +487,10 @@ class ClusterPoller:
             content_hash = hashlib.sha1(material.encode("utf-8")).hexdigest()
             self.store.set_feed_state(feed_url, content_hash, len(feed_articles))
 
+        # Reset disabled feeds to 0 items so dashboard reflects reality
+        for du in disabled_urls:
+            self.store.set_feed_state(du, "", 0)
+
         # Drop legacy aggregate feed-state rows
         with self.store._conn() as conn:
             conn.execute("DELETE FROM feed_state WHERE feed_key LIKE 'newsfeeds:%'")