|
|
@@ -4,7 +4,7 @@ from typing import Any, Dict, List, Tuple
|
|
|
|
|
|
from news_mcp.sources.news_feeds import normalize_topic_from_title
|
|
|
from news_mcp.dedup.embedding_support import CandidateRules, cluster_is_candidate, cosine_similarity, ollama_embed
|
|
|
-from news_mcp.config import NEWS_EMBEDDINGS_ENABLED
|
|
|
+from news_mcp.config import NEWS_EMBEDDINGS_ENABLED, NEWS_EMBEDDING_SIMILARITY_THRESHOLD
|
|
|
|
|
|
import re
|
|
|
from difflib import SequenceMatcher
|
|
|
@@ -80,7 +80,7 @@ def dedup_and_cluster_articles(
|
|
|
|
|
|
threshold = similarity_threshold
|
|
|
if NEWS_EMBEDDINGS_ENABLED:
|
|
|
- threshold = max(similarity_threshold, 0.82)
|
|
|
+ threshold = max(similarity_threshold, NEWS_EMBEDDING_SIMILARITY_THRESHOLD)
|
|
|
|
|
|
if best_idx is not None and best_sim >= threshold:
|
|
|
c = clusters[best_idx]
|