Bladeren bron

docs: document /mcp JSON-RPC minimal compatibility

Lukas Goldschmidt 1 maand geleden
bovenliggende
commit
a8a671f6c4
3 gewijzigde bestanden met toevoegingen van 297 en 26 verwijderingen
  1. 1 0
      .gitignore
  2. 12 0
      README.md
  3. 284 26
      virtuoso_mcp.py

+ 1 - 0
.gitignore

@@ -11,6 +11,7 @@ venv/
 
 # Logs
 *.log
+logs
 
 # VSCode
 .vscode/

+ 12 - 0
README.md

@@ -85,6 +85,18 @@ Set `MCP_ALLOW_EXAMPLE_LOAD=true` to enable the `load_examples` tool. It loads T
 - `insert_triple` (single-triple update helper)
 - `load_examples` (optional; requires `MCP_ALLOW_EXAMPLE_LOAD=true`; loads fixtures such as `examples/catalog_fixture.ttl`)
 
+## MCP JSON-RPC compatibility (v0.1, minimal)
+
+The `/mcp` endpoint supports:
+
+1) **Legacy tool router** (unchanged): `{ "tool": "...", "input": {...} }`
+2) **Minimal JSON-RPC 2.0** messages:
+   - `initialize`
+   - `tools/list`
+   - `tools/call`
+
+This is intended to work with OpenClaw’s MCP bridge and allow tool discovery + calling with request ids.
+
 ## Layering recommendation
 
 Keep ontology discovery in `virtuoso_mcp` so any specialized layer (garden, inventory, analytics, etc.) can reuse it. Domain modules should call these generic tools instead of re-implementing ontology probing logic.

+ 284 - 26
virtuoso_mcp.py

@@ -62,6 +62,180 @@ class ToolRequest(BaseModel):
     input: Dict[str, Any] = {}
 
 
+# --- MCP (minimal JSON-RPC 2.0) models ---
+
+class JsonRpcRequest(BaseModel):
+    jsonrpc: str = "2.0"
+    id: Optional[Any] = None
+    method: str
+    params: Dict[str, Any] = {}
+
+
+def mcp_error(id_value: Any, message: str, code: int = -32000) -> Dict[str, Any]:
+    return {
+        "jsonrpc": "2.0",
+        "id": id_value,
+        "error": {
+            "code": code,
+            "message": message,
+        },
+    }
+
+
+def mcp_result(id_value: Any, result: Dict[str, Any]) -> Dict[str, Any]:
+    return {
+        "jsonrpc": "2.0",
+        "id": id_value,
+        "result": result,
+    }
+
+
+def _mcp_tool_definition(name: str) -> Dict[str, Any]:
+    # Incremental compliance step: add explicit input schemas for the most-used tools.
+    # We still keep `additionalProperties: True` so we don't break existing clients.
+    base: Dict[str, Any] = {
+        "name": name,
+        "description": TOOL_DOCS.get(name, ""),
+        "inputSchema": {
+            "type": "object",
+            "additionalProperties": True,
+            "properties": {},
+            "required": [],
+        },
+    }
+
+    if name == "sparql_query":
+        base["inputSchema"]["properties"] = {
+            "query": {"type": "string", "description": "SPARQL SELECT query (bounded + guardrailed)"}
+        }
+        base["inputSchema"]["required"] = ["query"]
+        return base
+
+    if name == "search_label":
+        base["inputSchema"]["properties"] = {
+            "term": {"type": "string", "description": "Substring to search in rdfs:label"},
+            "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max results"},
+        }
+        base["inputSchema"]["required"] = ["term"]
+        return base
+
+    if name == "get_entities_by_type":
+        base["inputSchema"]["properties"] = {
+            "type_uri": {"type": "string", "description": "RDF type URI"},
+            "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max subjects"},
+        }
+        base["inputSchema"]["required"] = ["type_uri"]
+        return base
+
+    if name == "list_graphs":
+        # No inputs.
+        return base
+
+    if name == "list_classes":
+        base["inputSchema"]["properties"] = {
+            "term": {"type": "string", "description": "Optional substring to match labels/comments"},
+            "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max results"},
+        }
+        base["inputSchema"]["required"] = []
+        return base
+
+    if name == "list_properties":
+        base["inputSchema"]["properties"] = {
+            "term": {"type": "string", "description": "Optional substring to match labels/comments"},
+            "domain_uri": {"type": "string", "description": "Optional rdfs:domain class URI"},
+            "range_uri": {"type": "string", "description": "Optional rdfs:range class URI"},
+            "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max results"},
+        }
+        base["inputSchema"]["required"] = []
+        return base
+
+    if name == "describe_class":
+        base["inputSchema"]["properties"] = {
+            "class_uri": {"type": "string", "description": "Class URI to describe"},
+        }
+        base["inputSchema"]["required"] = ["class_uri"]
+        return base
+
+    if name == "describe_property":
+        base["inputSchema"]["properties"] = {
+            "property_uri": {"type": "string", "description": "Property URI to describe"},
+            "usage_limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "How many usage examples to include"},
+        }
+        base["inputSchema"]["required"] = ["property_uri"]
+        return base
+
+    # ---- Entity navigation batch (B) ----
+    if name == "get_predicates_for_subject":
+        base["inputSchema"]["properties"] = {
+            "subject_uri": {"type": "string", "description": "Subject URI"},
+            "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max predicates"},
+        }
+        base["inputSchema"]["required"] = ["subject_uri"]
+        return base
+
+    if name == "get_labels_for_subject":
+        base["inputSchema"]["properties"] = {
+            "subject_uri": {"type": "string", "description": "Subject URI"},
+        }
+        base["inputSchema"]["required"] = ["subject_uri"]
+        return base
+
+    if name == "traverse_property":
+        base["inputSchema"]["properties"] = {
+            "subject_uri": {"type": "string", "description": "Starting subject URI"},
+            "property_uri": {"type": "string", "description": "Predicate URI to traverse"},
+            "direction": {
+                "type": "string",
+                "enum": ["outgoing", "incoming"],
+                "description": "Traversal direction",
+            },
+            "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max neighbors"},
+        }
+        base["inputSchema"]["required"] = ["subject_uri", "property_uri"]
+        return base
+
+    if name == "describe_subject":
+        base["inputSchema"]["properties"] = {
+            "subject_uri": {"type": "string", "description": "Subject URI"},
+            "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max outgoing predicate/object pairs"},
+        }
+        base["inputSchema"]["required"] = ["subject_uri"]
+        return base
+
+    if name == "path_traverse":
+        base["inputSchema"]["properties"] = {
+            "subject_uri": {"type": "string", "description": "Starting subject URI"},
+            "property_path": {
+                "type": "string",
+                "description": "Comma-separated list of predicate URIs (alternative to 'properties')",
+            },
+            "properties": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "List of predicate URIs",
+            },
+            "direction": {
+                "type": "string",
+                "enum": ["outgoing", "incoming"],
+                "description": "Traversal direction",
+            },
+            "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max results"},
+        }
+        base["inputSchema"]["required"] = ["subject_uri"]
+        return base
+
+    # ---- Relationship analytics batch (C) ----
+    if name == "property_usage_statistics":
+        base["inputSchema"]["properties"] = {
+            "property_uri": {"type": "string", "description": "Predicate URI"},
+            "examples_limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "How many usage examples to include"},
+        }
+        base["inputSchema"]["required"] = ["property_uri"]
+        return base
+
+    return base
+
+
 # --- CORE SPARQL FUNCTION ---
 
 def _build_auth() -> Optional[HTTPDigestAuth]:
@@ -728,36 +902,120 @@ TOOL_DOCS = {
 # --- MCP ENDPOINT ---
 
 @app.post("/mcp")
-def handle_mcp(tool_request: ToolRequest, http_request: Request):
-    tool_name = tool_request.tool
-    input_data = tool_request.input or {}
-    client_host = http_request.client.host if http_request.client else "unknown"
-    trimmed_input = json.dumps(input_data, ensure_ascii=False, default=str)
-    if len(trimmed_input) > 1024:
-        trimmed_input = f"{trimmed_input[:1024]}…"
-    timestamp = datetime.now(timezone.utc).isoformat()
-    tool_logger.info(
-        "tool=%s client=%s time=%s input=%s",
-        tool_name,
-        client_host,
-        timestamp,
-        trimmed_input,
-    )
+async def handle_mcp(http_request: Request):
+    """Minimal MCP-ish JSON-RPC 2.0 endpoint on POST /mcp.
+
+    Backward compatible legacy mode:
+      {"tool": "search_label", "input": {...}}
 
-    if tool_name not in TOOLS:
-        raise HTTPException(status_code=400, detail=f"Unknown tool: {tool_name}")
+    Minimal JSON-RPC mode (first step towards MCP compliance):
+      {"jsonrpc":"2.0","id":1,"method":"initialize","params":{...}}
+      {"jsonrpc":"2.0","id":2,"method":"tools/list","params":{...}}
+      {"jsonrpc":"2.0","id":3,"method":"tools/call","params":{ "tool": "...", "params": {...} }}
+    """
 
+    body = None
     try:
-        result = TOOLS[tool_name](input_data)
-        return {
-            "status": "ok",
-            "tool": tool_name,
-            "description": TOOL_DOCS.get(tool_name, ""),
-            "result": result,
-        }
+        body = await http_request.json()
+    except Exception:
+        body = None
+
+    # ---- Legacy mode ----
+    if isinstance(body, dict) and "tool" in body:
+        legacy = ToolRequest(**body)
+        tool_name = legacy.tool
+        input_data = legacy.input or {}
+
+        client_host = http_request.client.host if http_request.client else "unknown"
+        trimmed_input = json.dumps(input_data, ensure_ascii=False, default=str)
+        if len(trimmed_input) > 1024:
+            trimmed_input = f"{trimmed_input[:1024]}…"
+        timestamp = datetime.now(timezone.utc).isoformat()
+        tool_logger.info(
+            "tool=%s client=%s time=%s input=%s",
+            tool_name,
+            client_host,
+            timestamp,
+            trimmed_input,
+        )
+
+        if tool_name not in TOOLS:
+            raise HTTPException(status_code=400, detail=f"Unknown tool: {tool_name}")
+
+        try:
+            result = TOOLS[tool_name](input_data)
+            return {
+                "status": "ok",
+                "tool": tool_name,
+                "description": TOOL_DOCS.get(tool_name, ""),
+                "result": result,
+            }
+        except Exception as exc:
+            logger.error("Tool %s failed: %s", tool_name, exc)
+            raise HTTPException(status_code=500, detail=str(exc))
+
+    # ---- JSON-RPC 2.0 mode ----
+    if not isinstance(body, dict):
+        return mcp_error(None, "Invalid JSON-RPC request", code=-32600)
+
+    try:
+        rpc_req = JsonRpcRequest(**body)
     except Exception as exc:
-        logger.error("Tool %s failed: %s", tool_name, exc)
-        raise HTTPException(status_code=500, detail=str(exc))
+        # If body is malformed, still surface the id if present.
+        rpc_id = body.get("id")
+        logger.warning("Invalid JSON-RPC request: %s", exc)
+        return mcp_error(rpc_id, "Invalid JSON-RPC request", code=-32600)
+
+    method = rpc_req.method
+    rpc_id = rpc_req.id
+    params = rpc_req.params or {}
+
+    if method == "initialize":
+        tools = [_mcp_tool_definition(name) for name in sorted(TOOLS.keys())]
+        return mcp_result(
+            rpc_id,
+            {
+                "protocolVersion": "0.1",
+                "capabilities": {
+                    "tools": True,
+                    "list": True,
+                    "call": True,
+                },
+                "tools": tools,
+            },
+        )
+
+    if method in {"tools/list", "tools/listTools"}:
+        tools = [_mcp_tool_definition(name) for name in sorted(TOOLS.keys())]
+        return mcp_result(rpc_id, {"tools": tools})
+
+    if method in {"tools/call", "tools/callTool"}:
+        # Different clients sometimes wrap the call slightly differently.
+        tool_name = (
+            params.get("tool")
+            or params.get("name")
+            or params.get("toolName")
+        )
+        input_data = (
+            params.get("params")
+            or params.get("input")
+            or params.get("arguments")
+            or {}
+        )
+        if not tool_name:
+            return mcp_error(rpc_id, "Missing tool name", code=-32602)
+        if tool_name not in TOOLS:
+            return mcp_error(rpc_id, f"Unknown tool: {tool_name}", code=-32601)
+        try:
+            result = TOOLS[tool_name](input_data)
+            return mcp_result(rpc_id, {"result": result})
+        except HTTPException as exc:
+            return mcp_error(rpc_id, str(exc.detail), code=exc.status_code)
+        except Exception as exc:
+            logger.error("Tool %s failed: %s", tool_name, exc)
+            return mcp_error(rpc_id, str(exc), code=-32000)
+
+    return mcp_error(rpc_id, f"Method not found: {method}", code=-32601)
 
 
 # --- HEALTH CHECK ---