Procházet zdrojové kódy

news-mcp: add capabilities tool and recipes

Lukas Goldschmidt před 1 měsícem
rodič
revize
5686c134e4
2 změnil soubory, kde provedl 149 přidání a 0 odebrání
  1. 11 0
      README.md
  2. 138 0
      news_mcp/mcp_server_fastmcp.py

+ 11 - 0
README.md

@@ -56,6 +56,9 @@ Health:
 - merges recent co-occurrence data from cached clusters with Google Trends suggestions and returns
   related entities (with `mid` when available) plus source/score metadata
 
+7) `get_capabilities()`
+- describes the server’s tool surface, composition recipes, and output conventions for agents
+
 ### Entity aliasing
 
 The server keeps a conservative alias map in `config/entity_aliases.json` for obvious shorthands
@@ -203,6 +206,14 @@ mcporter --config "$CONFIG" call news.get_related_recent_entities subject=Iran t
 mcporter --config "$CONFIG" call news.get_related_recent_entities subject="iran war" timeframe=72h limit=12 include_trends=true
 ```
 
+### 7) Capabilities / composition guidance
+
+```bash
+mcporter --config "$CONFIG" call news.get_capabilities
+```
+
+Use this when you want the server to explain how to chain the tools together, which fields to keep hidden (e.g. `cluster_id`), and how to present sources/timestamps consistently.
+
 ## Blacklist enforcement (optional back-clean)
 
 If you change `ENTITY_BLACKLIST`, existing clusters in `news.sqlite` may still

+ 138 - 0
news_mcp/mcp_server_fastmcp.py

@@ -82,6 +82,121 @@ def _sort_clusters_by_recency(clusters: list[dict]) -> list[dict]:
     )
 
 
+def _tool_card(name: str, description: str, inputs: list[dict], outputs: list[str], notes: list[str] | None = None) -> dict:
+    return {
+        "name": name,
+        "description": description,
+        "inputs": inputs,
+        "outputs": outputs,
+        "notes": notes or [],
+    }
+
+
+NEWS_TOOL_CARDS = [
+    _tool_card(
+        "get_latest_events",
+        "Get the newest deduplicated clusters for a topic or resolved entity-like query.",
+        [
+            {"name": "topic", "type": "string", "default": "crypto", "meaning": "coarse category or entity-like topic"},
+            {"name": "limit", "type": "integer", "default": 5, "range": "1-20"},
+            {"name": "include_articles", "type": "boolean", "default": False},
+        ],
+        ["headline", "summary", "entities", "sentiment", "importance", "sources", "timestamp", "articles?"],
+        ["Use when you want the freshest clusters and are willing to let the server decide topic vs entity mode."],
+    ),
+    _tool_card(
+        "get_events_for_entity",
+        "Search recent clusters for a person, place, company, or theme by entity matching.",
+        [
+            {"name": "entity", "type": "string", "meaning": "entity label or phrase"},
+            {"name": "timeframe", "type": "string", "default": "24h", "examples": ["24h", "72h", "3d"]},
+            {"name": "limit", "type": "integer", "default": 10, "range": "1-30"},
+            {"name": "include_articles", "type": "boolean", "default": False},
+        ],
+        ["headline", "summary", "entities", "sentiment", "importance", "sources", "timestamp", "articles?"],
+        ["Normalization is automatic; use this for an entity-centered deep dive."],
+    ),
+    _tool_card(
+        "get_event_summary",
+        "Produce a concise LLM-written explanation for one cluster and key facts.",
+        [
+            {"name": "event_id", "type": "string", "meaning": "cluster_id; do not surface in user-facing prose"},
+            {"name": "include_articles", "type": "boolean", "default": False},
+        ],
+        ["headline", "mergedSummary", "keyFacts", "sources", "articles?"],
+        ["Prefer this after you have already chosen a specific cluster to explain."],
+    ),
+    _tool_card(
+        "detect_emerging_topics",
+        "Surface entities and phrases starting to matter in the recent window.",
+        [{"name": "limit", "type": "integer", "default": 10, "range": "1-20"}],
+        ["topic", "trend_score", "related_entities", "signal_type", "count", "avg_importance"],
+        ["Good for 'what is heating up?' style questions."],
+    ),
+    _tool_card(
+        "get_news_sentiment",
+        "Estimate sentiment around an entity over a lookback window.",
+        [
+            {"name": "entity", "type": "string"},
+            {"name": "timeframe", "type": "string", "default": "24h"},
+        ],
+        ["entity", "sentiment", "score", "cluster_count"],
+        ["Use after locating a cluster set or entity neighborhood."],
+    ),
+    _tool_card(
+        "get_related_recent_entities",
+        "Blend local co-occurrence with Google Trends related topics, while preserving mids where available.",
+        [
+            {"name": "subject", "type": "string", "meaning": "canonical entity or subject phrase"},
+            {"name": "timeframe", "type": "string", "default": "72h"},
+            {"name": "limit", "type": "integer", "default": 10, "range": "1-25"},
+            {"name": "include_trends", "type": "boolean", "default": True},
+        ],
+        ["subject", "related[].normalized", "related[].canonical_label", "related[].mid", "related[].sources", "related[].scores"],
+        ["Use this to drill from a subject into related entities, then feed those into get_events_for_entity."],
+    ),
+]
+
+
+NEWS_COMPOSITION_RECIPES = [
+    {
+        "name": "fresh-news-tail",
+        "steps": [
+            "get_latest_events(topic=...)",
+            "optionally get_event_summary(event_id=...) for the strongest cluster",
+        ],
+        "notes": ["Best for a quick tail of what is happening now."]
+    },
+    {
+        "name": "entity-deep-dive",
+        "steps": [
+            "get_events_for_entity(entity=...)",
+            "get_event_summary(event_id=...)",
+            "get_news_sentiment(entity=..., timeframe=...)",
+        ],
+        "notes": ["Prefer canonical entity labels when you have them; the server normalizes for you."],
+    },
+    {
+        "name": "subject-neighborhood",
+        "steps": [
+            "get_related_recent_entities(subject=...)",
+            "for each strong related entity, call get_events_for_entity(entity=...)",
+        ],
+        "notes": ["Use this when you want a graph-like expansion around a subject."]
+    },
+    {
+        "name": "emerging-signal",
+        "steps": [
+            "detect_emerging_topics(limit=...)",
+            "choose a topic/entity",
+            "get_events_for_entity(entity=...)",
+            "get_news_sentiment(entity=...)",
+        ],
+        "notes": ["Good for trend scouting and risk mapping."],
+    },
+]
+
+
 @mcp.tool(description="Investigate a topic and return the newest deduplicated news clusters, sorted by recency.")
 async def get_latest_events(topic: str = "crypto", limit: int = 5, include_articles: bool = False):
     limit = max(1, min(int(limit), 20))
@@ -467,6 +582,28 @@ async def get_news_sentiment(entity: str, timeframe: str = "24h"):
     }
 
 
+@mcp.tool(description="Describe the server tool surface, how tools fit together, and output conventions for downstream agents.")
+async def get_capabilities():
+    return {
+        "server": {
+            "name": "news-mcp",
+            "purpose": "Recent news clusters, entity drill-down, sentiment, emerging topics, and related-entity expansion.",
+            "output_conventions": {
+                "cluster_ids": "Do not surface cluster_id in user-facing prose unless explicitly requested; treat it as internal navigation metadata.",
+                "sources": "Always preserve and display sources when summarizing a cluster or entity result.",
+                "timestamps": "Mention timestamps consistently when comparing multiple clusters or when recency matters.",
+            },
+        },
+        "tools": NEWS_TOOL_CARDS,
+        "recipes": NEWS_COMPOSITION_RECIPES,
+        "guidance": [
+            "Use get_latest_events for a tail, get_events_for_entity for entity deep dives, and get_related_recent_entities for neighborhood expansion.",
+            "Prefer normalized/canonical entities when possible, but the server will resolve common aliases and MIDs for you.",
+            "When presenting results to users, summarize the cluster; avoid exposing internal IDs unless they are needed for follow-up tool calls.",
+        ],
+    }
+
+
 def _parse_timeframe_to_hours(timeframe: str) -> int:
     tf = str(timeframe).strip().lower()
     try:
@@ -536,6 +673,7 @@ def root():
             "detect_emerging_topics",
             "get_news_sentiment",
             "get_related_recent_entities",
+            "get_capabilities",
         ],
         "refresh": {
             "enabled": NEWS_BACKGROUND_REFRESH_ENABLED,