hai 1 semana · e77a2e6e3e
--- a/news_mcp/enrichment/llm_enrich.py
+++ b/news_mcp/enrichment/llm_enrich.py
@@ -56,6 +56,16 @@ async def classify_cluster_llm(cluster: Dict[str, Any]) -> Dict[str, Any]:
 
				     _topic_labels = {t.lower() for t in DEFAULT_TOPICS}
			
 
				     keywords = [k for k in keywords if k.lower() not in _topic_labels]
			
 
				 
			
 
				+    # Enforce per-keyword length cap (max 2 words) as a hard guard.
			
 
				+    # The prompt requests this but the LLM occasionally ignores it.
			
 
				+    keywords = [k for k in keywords if len(k.split()) <= 2]
			
 
				+
			
 
				+    # De-duplicate entities vs keywords — entities list is the
			
 
				+    # authoritative source for proper nouns; keywords should be the
			
 
				+    # thematic complement, not a repeat.
			
 
				+    _entity_keys = {e.strip().lower() for e in entities}
			
 
				+    keywords = [k for k in keywords if k.strip().lower() not in _entity_keys]
			
 
				+
			
 
				     out.update({
			
 
				         "topic": topic,
			
 
				         "entities": entities,
			
--- a/news_mcp/mcp_server_fastmcp.py
+++ b/news_mcp/mcp_server_fastmcp.py
@@ -866,7 +866,7 @@ async def detect_emerging_topics(limit: int = 10, timeframe: str = "24h", topic:
 
				 
			
 
				         related = []
			
 
				         if ent in entity_cooccur:
			
 
				-            for other, _cnt in entity_cooccur[ent].most_common(5):
			
 
				+            for other, _cnt in entity_cooccur[ent].most_common(10):
			
 
				                 if other != ent:
			
 
				                     related.append(other)
			
 
				 
			
@@ -890,7 +890,7 @@ async def detect_emerging_topics(limit: int = 10, timeframe: str = "24h", topic:
 
				         scored.append({
			
 
				             "topic": ent,
			
 
				             "trend_score": min(0.99, round(composed_score, 3)),
			
 
				-            "related_entities": related[:3] if related else [ent],
			
 
				+            "related_entities": related[:5] if related else [ent],
			
 
				             "related_keywords": related_kws[:5],
			
 
				             "velocity": round(velocity, 2),
			
 
				             "recent_count": recent_n,
			
@@ -942,7 +942,7 @@ async def detect_emerging_topics(limit: int = 10, timeframe: str = "24h", topic:
 
				                     kw_related_ents.append(other)
			
 
				                 else:
			
 
				                     kw_related_kws.append(other)
			
 
				-                if len(kw_related_kws) >= 5 and len(kw_related_ents) >= 3:
			
 
				+                if len(kw_related_kws) >= 5 and len(kw_related_ents) >= 5:
			
 
				                     break
			
 
				 
			
 
				         kw_scored.append({
			
--- a/prompts/extract_entities.prompt
+++ b/prompts/extract_entities.prompt
@@ -15,6 +15,14 @@ Entity rules (strict):
 
				 - Prefer canonical entity forms over aliases when obvious (for example, use full organization or place names where helpful).
			
 
				 - Do NOT return empty entities if any such names/places appear.
			
 
				 
			
 
				+Keyword rules (strict):
			
 
				+- Each keyword MUST be 1-2 words. Never 3+.
			
 
				+- Keywords are thematic search tags, NOT headline restatements or verb phrases.
			
 
				+- Good keywords: noun phrases or named concepts (e.g. "drone strikes", "energy infrastructure", "nuclear plant", "oil refinery").
			
 
				+- Bad keywords: full headline fragments, verb-heavy phrases, or anything over 2 words.
			
 
				+- Keywords should capture the *themes* of the story, not repeat entity names already in the entities list.
			
 
				+- Return 2-4 keywords. Fewer is better than bad ones.
			
 
				+
			
 
				 Sentiment rules:
			
 
				 - positive: clearly encouraging, improving, or supportive tone
			
 
				 - negative: clearly alarming, worsening, severe, conflict, loss, risk, warning tone