|
@@ -416,6 +416,101 @@ async def get_news_sentiment(entity: str, timeframe: str = "24h"):
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+def _parse_timeframe_to_hours(timeframe: str) -> int:
|
|
|
|
|
+ tf = str(timeframe).strip().lower()
|
|
|
|
|
+ try:
|
|
|
|
|
+ if tf.endswith("d"):
|
|
|
|
|
+ days = int(tf[:-1])
|
|
|
|
|
+ return max(1, days * 24)
|
|
|
|
|
+ if tf.endswith("h"):
|
|
|
|
|
+ return max(1, int(tf[:-1]))
|
|
|
|
|
+ return max(1, int(tf))
|
|
|
|
|
+ except Exception:
|
|
|
|
|
+ return 24
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+@mcp.tool(
|
|
|
|
|
+ description="Given a subject entity, find related entities via co-occurrence inside recent clusters (entity-only, no topic fallback)."
|
|
|
|
|
+)
|
|
|
|
|
+async def get_related_entities(subject: str, timeframe: str = "24h", limit: int = 10):
|
|
|
|
|
+ store = SQLiteClusterStore(DB_PATH)
|
|
|
|
|
+ limit = max(1, min(int(limit), 30))
|
|
|
|
|
+
|
|
|
|
|
+ subj = normalize_query(subject).strip().lower()
|
|
|
|
|
+ if not subj:
|
|
|
|
|
+ return []
|
|
|
|
|
+
|
|
|
|
|
+ resolved = resolve_entity_via_trends(subj)
|
|
|
|
|
+ query_terms = {
|
|
|
|
|
+ subj,
|
|
|
|
|
+ str(resolved.get("normalized") or "").strip().lower(),
|
|
|
|
|
+ str(resolved.get("canonical_label") or "").strip().lower(),
|
|
|
|
|
+ str(resolved.get("mid") or "").strip().lower(),
|
|
|
|
|
+ }
|
|
|
|
|
+ query_terms = {q for q in query_terms if q}
|
|
|
|
|
+
|
|
|
|
|
+ hours = _parse_timeframe_to_hours(timeframe)
|
|
|
|
|
+ clusters = store.get_latest_clusters_all_topics(ttl_hours=hours, limit=500)
|
|
|
|
|
+
|
|
|
|
|
+ # Aggregate related metrics per entity.
|
|
|
|
|
+ rel_count = Counter()
|
|
|
|
|
+ rel_imp_sum = Counter()
|
|
|
|
|
+ rel_sent_sum = Counter()
|
|
|
|
|
+ rel_sent_n = Counter()
|
|
|
|
|
+
|
|
|
|
|
+ for c in clusters:
|
|
|
|
|
+ haystack = _cluster_entity_haystack(c)
|
|
|
|
|
+ if not any(term in item for item in haystack for term in query_terms):
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ ents = [str(e).strip().lower() for e in (c.get("entities", []) or []) if str(e).strip()]
|
|
|
|
|
+ # remove generic/meta-ish short tokens conservatively
|
|
|
|
|
+ ents = [e for e in ents if len(e) >= 4]
|
|
|
|
|
+ for e in ents:
|
|
|
|
|
+ if e in query_terms:
|
|
|
|
|
+ continue
|
|
|
|
|
+ rel_count[e] += 1
|
|
|
|
|
+ try:
|
|
|
|
|
+ rel_imp_sum[e] += float(c.get("importance", 0.0) or 0.0)
|
|
|
|
|
+ except Exception:
|
|
|
|
|
+ pass
|
|
|
|
|
+
|
|
|
|
|
+ # sentiment aggregation based on sentimentScore if available.
|
|
|
|
|
+ s = c.get("sentimentScore")
|
|
|
|
|
+ if s is not None:
|
|
|
|
|
+ try:
|
|
|
|
|
+ rel_sent_sum[e] += float(s)
|
|
|
|
|
+ rel_sent_n[e] += 1
|
|
|
|
|
+ except Exception:
|
|
|
|
|
+ pass
|
|
|
|
|
+
|
|
|
|
|
+ # Sort by count, then avg importance.
|
|
|
|
|
+ items = []
|
|
|
|
|
+ for ent, cnt in rel_count.most_common():
|
|
|
|
|
+ avg_imp = rel_imp_sum[ent] / max(1, cnt)
|
|
|
|
|
+ avg_score = rel_sent_sum[ent] / max(1, rel_sent_n[ent]) if rel_sent_n[ent] else 0.0
|
|
|
|
|
+ if avg_score >= 0.15:
|
|
|
|
|
+ sentiment = "positive"
|
|
|
|
|
+ elif avg_score <= -0.15:
|
|
|
|
|
+ sentiment = "negative"
|
|
|
|
|
+ else:
|
|
|
|
|
+ sentiment = "neutral"
|
|
|
|
|
+
|
|
|
|
|
+ items.append(
|
|
|
|
|
+ {
|
|
|
|
|
+ "entity": ent,
|
|
|
|
|
+ "count": cnt,
|
|
|
|
|
+ "avg_importance": round(avg_imp, 3),
|
|
|
|
|
+ "sentiment": sentiment,
|
|
|
|
|
+ "score": round(avg_score, 3),
|
|
|
|
|
+ }
|
|
|
|
|
+ )
|
|
|
|
|
+ if len(items) >= limit:
|
|
|
|
|
+ break
|
|
|
|
|
+
|
|
|
|
|
+ return items
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
app = FastAPI(title="News MCP Server")
|
|
app = FastAPI(title="News MCP Server")
|
|
|
|
|
|
|
|
logger = logging.getLogger("news_mcp.startup")
|
|
logger = logging.getLogger("news_mcp.startup")
|