Bladeren bron

news-mcp: add get_related_entities tool

Lukas Goldschmidt 1 maand geleden
bovenliggende
commit
e62eb24610
1 gewijzigde bestanden met toevoegingen van 95 en 0 verwijderingen
  1. 95 0
      news_mcp/mcp_server_fastmcp.py

+ 95 - 0
news_mcp/mcp_server_fastmcp.py

@@ -416,6 +416,101 @@ async def get_news_sentiment(entity: str, timeframe: str = "24h"):
     }
 
 
+def _parse_timeframe_to_hours(timeframe: str) -> int:
+    tf = str(timeframe).strip().lower()
+    try:
+        if tf.endswith("d"):
+            days = int(tf[:-1])
+            return max(1, days * 24)
+        if tf.endswith("h"):
+            return max(1, int(tf[:-1]))
+        return max(1, int(tf))
+    except Exception:
+        return 24
+
+
+@mcp.tool(
+    description="Given a subject entity, find related entities via co-occurrence inside recent clusters (entity-only, no topic fallback)."
+)
+async def get_related_entities(subject: str, timeframe: str = "24h", limit: int = 10):
+    store = SQLiteClusterStore(DB_PATH)
+    limit = max(1, min(int(limit), 30))
+
+    subj = normalize_query(subject).strip().lower()
+    if not subj:
+        return []
+
+    resolved = resolve_entity_via_trends(subj)
+    query_terms = {
+        subj,
+        str(resolved.get("normalized") or "").strip().lower(),
+        str(resolved.get("canonical_label") or "").strip().lower(),
+        str(resolved.get("mid") or "").strip().lower(),
+    }
+    query_terms = {q for q in query_terms if q}
+
+    hours = _parse_timeframe_to_hours(timeframe)
+    clusters = store.get_latest_clusters_all_topics(ttl_hours=hours, limit=500)
+
+    # Aggregate related metrics per entity.
+    rel_count = Counter()
+    rel_imp_sum = Counter()
+    rel_sent_sum = Counter()
+    rel_sent_n = Counter()
+
+    for c in clusters:
+        haystack = _cluster_entity_haystack(c)
+        if not any(term in item for item in haystack for term in query_terms):
+            continue
+
+        ents = [str(e).strip().lower() for e in (c.get("entities", []) or []) if str(e).strip()]
+        # remove generic/meta-ish short tokens conservatively
+        ents = [e for e in ents if len(e) >= 4]
+        for e in ents:
+            if e in query_terms:
+                continue
+            rel_count[e] += 1
+            try:
+                rel_imp_sum[e] += float(c.get("importance", 0.0) or 0.0)
+            except Exception:
+                pass
+
+            # sentiment aggregation based on sentimentScore if available.
+            s = c.get("sentimentScore")
+            if s is not None:
+                try:
+                    rel_sent_sum[e] += float(s)
+                    rel_sent_n[e] += 1
+                except Exception:
+                    pass
+
+    # Sort by count, then avg importance.
+    items = []
+    for ent, cnt in rel_count.most_common():
+        avg_imp = rel_imp_sum[ent] / max(1, cnt)
+        avg_score = rel_sent_sum[ent] / max(1, rel_sent_n[ent]) if rel_sent_n[ent] else 0.0
+        if avg_score >= 0.15:
+            sentiment = "positive"
+        elif avg_score <= -0.15:
+            sentiment = "negative"
+        else:
+            sentiment = "neutral"
+
+        items.append(
+            {
+                "entity": ent,
+                "count": cnt,
+                "avg_importance": round(avg_imp, 3),
+                "sentiment": sentiment,
+                "score": round(avg_score, 3),
+            }
+        )
+        if len(items) >= limit:
+            break
+
+    return items
+
+
 app = FastAPI(title="News MCP Server")
 
 logger = logging.getLogger("news_mcp.startup")