1 nedēļu atpakaļ · fab4b5ec31
--- a/news_mcp/dedup/cluster.py
+++ b/news_mcp/dedup/cluster.py
@@ -362,7 +362,9 @@ def _merge_orphan_clusters(
 
				         base["sources"] = all_sources
			
 
				         base["first_seen"] = first_seen
			
 
				         base["last_updated"] = last_updated
			
 
				-        base["cluster_id"] = _stable_cluster_id(base.get("topic", "other"), all_articles)
			
 
				+        # Keep the base cluster's original ID so the enrichment cache
			
 
				+        # (keyed by cluster_id) survives the merge.
			
 
				+        base.setdefault("cluster_id", _stable_cluster_id(base.get("topic", "other"), all_articles))
			
 
				         merged.append(base)
			
 
				 
			
 
				     return merged
			
@@ -488,9 +490,13 @@ def dedup_and_cluster_articles(
 
				     for topic, clusters in by_topic.items():
			
 
				         # Merge orphans (clusters that share articles)
			
 
				         clusters = _merge_orphan_clusters(clusters)
			
 
				-        # Recompute stable IDs from the final article sets
			
 
				+        # Assign stable IDs only to clusters that don't already have one.
			
 
				+        # Pre-seeded clusters from the DB carry their original cluster_id —
			
 
				+        # keeping it stable across cycles so the enrichment cache (keyed by
			
 
				+        # cluster_id) continues to work even after new articles are merged in.
			
 
				         for c in clusters:
			
 
				-            c["cluster_id"] = _stable_cluster_id(topic, c.get("articles", []) or [])
			
 
				+            if not c.get("cluster_id"):
			
 
				+                c["cluster_id"] = _stable_cluster_id(topic, c.get("articles", []) or [])
			
 
				         by_topic[topic] = clusters
			
 
				 
			
 
				     # Cross-topic dedup: merge clusters with overlapping headlines and entities
			
@@ -630,7 +636,9 @@ def _merge_duplicate_clusters(
 
				         base["keywords"] = all_keywords
			
 
				         base["first_seen"] = first_seen
			
 
				         base["last_updated"] = last_updated
			
 
				-        base["cluster_id"] = _stable_cluster_id(base.get("topic", "other"), all_articles)
			
 
				+        # Keep the base cluster's original ID so the enrichment cache
			
 
				+        # (keyed by cluster_id) survives the merge.
			
 
				+        base.setdefault("cluster_id", _stable_cluster_id(base.get("topic", "other"), all_articles))
			
 
				         merged_by_topic.setdefault(base_topic, []).append(base)
			
 
				 
			
 
				     return merged_by_topic
			
--- a/test_news_mcp.py
+++ b/test_news_mcp.py
@@ -964,14 +964,11 @@ def test_cross_cycle_merge_topic_mismatch():
 
				         f"{[c['headline'] for c in all_clusters]}"
			
 
				     )
			
 
				 
			
 
				-    # The surviving cluster must carry the *same* cluster_id regardless of
			
 
				-    # which topic wins, i.e. cluster_id is now purely article-key based.
			
 
				-    from news_mcp.dedup.cluster import _stable_cluster_id
			
 
				-    expected_cid = _stable_cluster_id(
			
 
				-        "other",
			
 
				-        [{"title": "Hegseth says US will keep pressure on Iran", "url": url}],
			
 
				-    )
			
 
				-    assert all_clusters[0]["cluster_id"] == expected_cid
			
 
				+    # The surviving cluster must carry the *same* cluster_id from the
			
 
				+    # pre-seeded DB cluster, even after absorbing new articles.
			
 
				+    # cluster_id is set once at creation and never recomputed, so the
			
 
				+    # enrichment cache (keyed by cluster_id) survives across cycles.
			
 
				+    assert all_clusters[0]["cluster_id"] == "old-id"
			
 
				 
			
 
				     # The existing article must still be in the merged cluster.
			
 
				     article_urls = [a["url"] for a in all_clusters[0]["articles"]]