Sfoglia il codice sorgente

news-mcp: add get_news_sentiment + LLM-derived importance

Lukas Goldschmidt 1 mese fa
parent
commit
9c337fea91
3 ha cambiato i file con 109 aggiunte e 2 eliminazioni
  1. 19 2
      news_mcp/enrichment/importance.py
  2. 70 0
      news_mcp/mcp_server_fastmcp.py
  3. 20 0
      test_news_mcp.py

+ 19 - 2
news_mcp/enrichment/importance.py

@@ -4,8 +4,25 @@ from typing import Any, Dict
 
 
 def compute_importance(cluster: Dict[str, Any]) -> float:
-    # v1 heuristic: more sources/number of articles => higher importance; capped.
+    """Compute an importance score for an already-enriched cluster.
+
+    Preference: use LLM-derived signals when available.
+
+    Heuristic blend:
+    - consensus/coverage: sources + number of articles
+    - signal strength: |sentimentScore| (LLM-derived)
+    """
+
     sources = len(set(cluster.get("sources", [])))
     article_count = len(cluster.get("articles", []))
-    score = 0.15 * sources + 0.02 * article_count
+    sentiment_score = cluster.get("sentimentScore")
+    if sentiment_score is None:
+        sentiment_score = 0.0
+
+    # Coverage term (kept conservative)
+    coverage = 0.10 * sources + 0.01 * article_count
+    # LLM signal term: higher magnitude sentiment => higher importance.
+    signal = 0.60 * min(1.0, abs(float(sentiment_score)))
+
+    score = coverage + signal
     return min(0.99, round(score, 2))

+ 70 - 0
news_mcp/mcp_server_fastmcp.py

@@ -9,6 +9,7 @@ from news_mcp.config import NEWS_REFRESH_INTERVAL_SECONDS, NEWS_BACKGROUND_REFRE
 from news_mcp.jobs.poller import refresh_clusters
 from news_mcp.storage.sqlite_store import SQLiteClusterStore
 from news_mcp.enrichment.groq_enrich import summarize_cluster_groq
+from collections import Counter
 
 
 mcp = FastMCP(
@@ -182,6 +183,75 @@ async def detect_emerging_topics(limit: int = 10):
     return emerging[:limit]
 
 
+@mcp.tool(description="What's the overall sentiment around an entity within a timeframe?")
+async def get_news_sentiment(entity: str, timeframe: str = "24h"):
+    store = SQLiteClusterStore(DB_PATH)
+
+    ent = str(entity).strip().lower()
+    if not ent:
+        return {
+            "entity": entity,
+            "sentiment": "neutral",
+            "score": 0.0,
+            "cluster_count": 0,
+        }
+
+    # timeframe: accept '24h' or '24'
+    tf = str(timeframe).strip().lower()
+    try:
+        hours = int(tf[:-1]) if tf.endswith("h") else int(tf)
+    except Exception:
+        hours = 24
+    hours = max(1, min(int(hours), 168))
+
+    clusters = store.get_latest_clusters_all_topics(ttl_hours=hours, limit=500)
+    matched = []
+    for c in clusters:
+        ents = c.get("entities") or []
+        if any(ent in str(e).lower() for e in ents):
+            matched.append(c)
+
+    if not matched:
+        return {
+            "entity": entity,
+            "sentiment": "neutral",
+            "score": 0.0,
+            "cluster_count": 0,
+        }
+
+    scores = []
+    labels = []
+    for c in matched:
+        s = c.get("sentimentScore")
+        if s is not None:
+            try:
+                scores.append(float(s))
+            except Exception:
+                pass
+        lbl = c.get("sentiment")
+        if lbl:
+            labels.append(str(lbl).lower())
+
+    avg_score = sum(scores) / len(scores) if scores else 0.0
+
+    # Majority vote on sentiment label, fall back to sign of avg score.
+    if labels:
+        majority = Counter(labels).most_common(1)[0][0]
+        if majority in {"positive", "negative", "neutral"}:
+            sentiment = majority
+        else:
+            sentiment = "positive" if avg_score > 0 else "negative" if avg_score < 0 else "neutral"
+    else:
+        sentiment = "positive" if avg_score > 0 else "negative" if avg_score < 0 else "neutral"
+
+    return {
+        "entity": entity,
+        "sentiment": sentiment,
+        "score": round(avg_score, 3),
+        "cluster_count": len(matched),
+    }
+
+
 app = FastAPI(title="News MCP Server")
 
 app.mount("/mcp", mcp.sse_app())

+ 20 - 0
test_news_mcp.py

@@ -5,6 +5,7 @@ from pathlib import Path
 
 from news_mcp.dedup.cluster import dedup_and_cluster_articles
 from news_mcp.storage.sqlite_store import SQLiteClusterStore
+from news_mcp.enrichment.importance import compute_importance
 
 
 def _article(title: str, url: str = "https://example.com/x", source: str = "Src", ts: str = "Mon, 30 Mar 2026 12:00:00 GMT"):
@@ -71,3 +72,22 @@ def test_sqlite_summary_cache_roundtrip():
         assert cached is not None
         assert cached["mergedSummary"] == "Merged summary"
         assert cached["keyFacts"] == ["Fact 1"]
+
+
+def test_importance_prefers_llm_signal():
+    # Two clusters with same coverage but different sentiment magnitude.
+    base = {
+        "sources": ["A", "B"],
+        "articles": [{}, {}],
+        "sentiment": "neutral",
+        "sentimentScore": 0.0,
+    }
+    pos = dict(base, sentimentScore=0.9)
+    neg = dict(base, sentimentScore=-0.8)
+
+    imp_base = compute_importance(base)
+    imp_pos = compute_importance(pos)
+    imp_neg = compute_importance(neg)
+
+    assert imp_pos >= imp_base
+    assert imp_neg >= imp_base