|
|
@@ -103,14 +103,20 @@ class ClusterPoller:
|
|
|
"""Run one full polling cycle. Returns statistics."""
|
|
|
self.stats = PollStats(started_at=datetime.now(timezone.utc).isoformat())
|
|
|
|
|
|
- # 1. Load enabled feed URLs
|
|
|
+ # 1. Load configured + enabled feed URLs
|
|
|
configured_urls = self._load_feed_urls()
|
|
|
enabled_urls = self.store.get_enabled_feed_urls(configured_urls)
|
|
|
- self.logger.info("poll start: enabled_feeds=%d configured=%d", len(enabled_urls), len(configured_urls))
|
|
|
+ disabled_urls = [u for u in configured_urls if u not in enabled_urls]
|
|
|
+ self.logger.info("poll start: enabled_feeds=%d disabled=%d configured=%d",
|
|
|
+ len(enabled_urls), len(disabled_urls), len(configured_urls))
|
|
|
|
|
|
- # 2. Fetch articles from all enabled feeds, per-feed dedup
|
|
|
+ # 2. Fetch articles from enabled feeds only, per-feed dedup
|
|
|
feed_map, feed_stats = await self._fetch_feeds(enabled_urls)
|
|
|
|
|
|
+ # Add disabled feeds to stats with zero counts
|
|
|
+ for du in disabled_urls:
|
|
|
+ feed_stats.append(FeedStats(feed_url=du, fetched=0, duplicate=0, stale=0, ingested=0))
|
|
|
+
|
|
|
# Flatten all fresh articles (stats already tracked per-feed in feed_stats)
|
|
|
all_fresh = [a for articles in feed_map.values() for a in articles]
|
|
|
|
|
|
@@ -118,7 +124,7 @@ class ClusterPoller:
|
|
|
self.logger.info("poll: no fresh articles from any feed")
|
|
|
self.stats.feeds = feed_stats
|
|
|
self._save_feed_stats(feed_stats)
|
|
|
- self._prune_and_finalize(enabled_urls, feed_map)
|
|
|
+ self._prune_and_finalize(enabled_urls, disabled_urls, feed_map)
|
|
|
return self.stats
|
|
|
|
|
|
# 3. Retention filter
|
|
|
@@ -128,7 +134,7 @@ class ClusterPoller:
|
|
|
self.logger.info("poll: all %d fresh articles dropped by retention", len(all_fresh))
|
|
|
self.stats.feeds = feed_stats
|
|
|
self._save_feed_stats(feed_stats)
|
|
|
- self._prune_and_finalize(enabled_urls, feed_map)
|
|
|
+ self._prune_and_finalize(enabled_urls, disabled_urls, feed_map)
|
|
|
return self.stats
|
|
|
|
|
|
# 4. Pre-seed existing clusters for cross-cycle merging
|
|
|
@@ -146,7 +152,7 @@ class ClusterPoller:
|
|
|
# 8. Persist feed stats + prune
|
|
|
self.stats.feeds = feed_stats
|
|
|
self._save_feed_stats(feed_stats)
|
|
|
- self._prune_and_finalize(enabled_urls, feed_map)
|
|
|
+ self._prune_and_finalize(enabled_urls, disabled_urls, feed_map)
|
|
|
|
|
|
self.logger.info(
|
|
|
"poll complete: clusters=%d newly_enriched=%d already_enriched=%d failed=%d",
|
|
|
@@ -461,9 +467,11 @@ class ClusterPoller:
|
|
|
def _prune_and_finalize(
|
|
|
self,
|
|
|
enabled_urls: list[str],
|
|
|
+ disabled_urls: list[str],
|
|
|
feed_map: dict[str, list[dict]],
|
|
|
) -> None:
|
|
|
- """Run pruning and update feed_state hashes + timestamps."""
|
|
|
+ """Run pruning and update feed_state hashes + timestamps.
|
|
|
+ Disabled feeds get their last_item_count reset to 0."""
|
|
|
prune_result = self.store.prune_if_due(
|
|
|
pruning_enabled=NEWS_PRUNING_ENABLED,
|
|
|
retention_days=NEWS_RETENTION_DAYS,
|
|
|
@@ -479,6 +487,10 @@ class ClusterPoller:
|
|
|
content_hash = hashlib.sha1(material.encode("utf-8")).hexdigest()
|
|
|
self.store.set_feed_state(feed_url, content_hash, len(feed_articles))
|
|
|
|
|
|
+ # Reset disabled feeds to 0 items so dashboard reflects reality
|
|
|
+ for du in disabled_urls:
|
|
|
+ self.store.set_feed_state(du, "", 0)
|
|
|
+
|
|
|
# Drop legacy aggregate feed-state rows
|
|
|
with self.store._conn() as conn:
|
|
|
conn.execute("DELETE FROM feed_state WHERE feed_key LIKE 'newsfeeds:%'")
|