|
|
@@ -106,17 +106,11 @@ class ClusterPoller:
|
|
|
# 1. Load configured + enabled feed URLs
|
|
|
configured_urls = self._load_feed_urls()
|
|
|
enabled_urls = self.store.get_enabled_feed_urls(configured_urls)
|
|
|
- disabled_urls = [u for u in configured_urls if u not in enabled_urls]
|
|
|
- self.logger.info("poll start: enabled_feeds=%d disabled=%d configured=%d",
|
|
|
- len(enabled_urls), len(disabled_urls), len(configured_urls))
|
|
|
+ self.logger.info("poll start: enabled_feeds=%d configured=%d", len(enabled_urls), len(configured_urls))
|
|
|
|
|
|
# 2. Fetch articles from enabled feeds only, per-feed dedup
|
|
|
feed_map, feed_stats = await self._fetch_feeds(enabled_urls)
|
|
|
|
|
|
- # Add disabled feeds to stats with zero counts
|
|
|
- for du in disabled_urls:
|
|
|
- feed_stats.append(FeedStats(feed_url=du, fetched=0, duplicate=0, stale=0, ingested=0))
|
|
|
-
|
|
|
# Flatten all fresh articles (stats already tracked per-feed in feed_stats)
|
|
|
all_fresh = [a for articles in feed_map.values() for a in articles]
|
|
|
|
|
|
@@ -124,7 +118,7 @@ class ClusterPoller:
|
|
|
self.logger.info("poll: no fresh articles from any feed")
|
|
|
self.stats.feeds = feed_stats
|
|
|
self._save_feed_stats(feed_stats)
|
|
|
- self._prune_and_finalize(enabled_urls, disabled_urls, feed_map)
|
|
|
+ self._prune_and_finalize(enabled_urls, feed_map)
|
|
|
return self.stats
|
|
|
|
|
|
# 3. Retention filter
|
|
|
@@ -134,7 +128,7 @@ class ClusterPoller:
|
|
|
self.logger.info("poll: all %d fresh articles dropped by retention", len(all_fresh))
|
|
|
self.stats.feeds = feed_stats
|
|
|
self._save_feed_stats(feed_stats)
|
|
|
- self._prune_and_finalize(enabled_urls, disabled_urls, feed_map)
|
|
|
+ self._prune_and_finalize(enabled_urls, feed_map)
|
|
|
return self.stats
|
|
|
|
|
|
# 4. Pre-seed existing clusters for cross-cycle merging
|
|
|
@@ -152,7 +146,7 @@ class ClusterPoller:
|
|
|
# 8. Persist feed stats + prune
|
|
|
self.stats.feeds = feed_stats
|
|
|
self._save_feed_stats(feed_stats)
|
|
|
- self._prune_and_finalize(enabled_urls, disabled_urls, feed_map)
|
|
|
+ self._prune_and_finalize(enabled_urls, feed_map)
|
|
|
|
|
|
self.logger.info(
|
|
|
"poll complete: clusters=%d newly_enriched=%d already_enriched=%d failed=%d",
|
|
|
@@ -467,11 +461,9 @@ class ClusterPoller:
|
|
|
def _prune_and_finalize(
|
|
|
self,
|
|
|
enabled_urls: list[str],
|
|
|
- disabled_urls: list[str],
|
|
|
feed_map: dict[str, list[dict]],
|
|
|
) -> None:
|
|
|
- """Run pruning and update feed_state hashes + timestamps.
|
|
|
- Disabled feeds get their last_item_count reset to 0."""
|
|
|
+ """Run pruning and update feed_state hashes + timestamps."""
|
|
|
prune_result = self.store.prune_if_due(
|
|
|
pruning_enabled=NEWS_PRUNING_ENABLED,
|
|
|
retention_days=NEWS_RETENTION_DAYS,
|
|
|
@@ -487,10 +479,6 @@ class ClusterPoller:
|
|
|
content_hash = hashlib.sha1(material.encode("utf-8")).hexdigest()
|
|
|
self.store.set_feed_state(feed_url, content_hash, len(feed_articles))
|
|
|
|
|
|
- # Reset disabled feeds to 0 items so dashboard reflects reality
|
|
|
- for du in disabled_urls:
|
|
|
- self.store.set_feed_state(du, "", 0)
|
|
|
-
|
|
|
# Drop legacy aggregate feed-state rows
|
|
|
with self.store._conn() as conn:
|
|
|
conn.execute("DELETE FROM feed_state WHERE feed_key LIKE 'newsfeeds:%'")
|