|
@@ -168,22 +168,29 @@ class ClusterPoller:
|
|
|
self._prune_and_finalize(enabled_urls, feed_map)
|
|
self._prune_and_finalize(enabled_urls, feed_map)
|
|
|
return self.stats
|
|
return self.stats
|
|
|
|
|
|
|
|
- # 3c. For changed-content articles, clear enriched_at on their existing
|
|
|
|
|
- # clusters so the next enrichment cycle re-processes them with the
|
|
|
|
|
- # updated article data.
|
|
|
|
|
|
|
+ # 3c. For changed-content articles, clear enriched_at in the cluster
|
|
|
|
|
+ # payload JSON so the next enrichment cycle re-processes them with
|
|
|
|
|
+ # the updated article data. (enriched_at is stored inside the JSON
|
|
|
|
|
+ # payload, not as a separate SQL column.)
|
|
|
if seen_changed:
|
|
if seen_changed:
|
|
|
from news_mcp.article_identity import article_key as _ak
|
|
from news_mcp.article_identity import article_key as _ak
|
|
|
|
|
+ import json as _json
|
|
|
changed_keys = {_ak(a) for a in seen_changed}
|
|
changed_keys = {_ak(a) for a in seen_changed}
|
|
|
with self.store._conn() as conn:
|
|
with self.store._conn() as conn:
|
|
|
for ak in changed_keys:
|
|
for ak in changed_keys:
|
|
|
row = conn.execute(
|
|
row = conn.execute(
|
|
|
- "SELECT cluster_id FROM seen_articles WHERE article_key=?", (ak,)
|
|
|
|
|
|
|
+ "SELECT cluster_id, payload FROM seen_articles sa "
|
|
|
|
|
+ "JOIN clusters c ON c.cluster_id = sa.cluster_id "
|
|
|
|
|
+ "WHERE sa.article_key=?",
|
|
|
|
|
+ (ak,),
|
|
|
).fetchone()
|
|
).fetchone()
|
|
|
if row:
|
|
if row:
|
|
|
changed_cluster_ids.add(row[0])
|
|
changed_cluster_ids.add(row[0])
|
|
|
|
|
+ payload = _json.loads(row[1])
|
|
|
|
|
+ payload.pop("enriched_at", None)
|
|
|
conn.execute(
|
|
conn.execute(
|
|
|
- "UPDATE clusters SET enriched_at=NULL WHERE cluster_id=?",
|
|
|
|
|
- (row[0],),
|
|
|
|
|
|
|
+ "UPDATE clusters SET payload=? WHERE cluster_id=?",
|
|
|
|
|
+ (_json.dumps(payload, ensure_ascii=False), row[0]),
|
|
|
)
|
|
)
|
|
|
if changed_cluster_ids:
|
|
if changed_cluster_ids:
|
|
|
self.logger.info("content_changed: clusters=%d will re-enrich", len(changed_cluster_ids))
|
|
self.logger.info("content_changed: clusters=%d will re-enrich", len(changed_cluster_ids))
|