1 kuukausi sitten · f97d81f361
--- a/PROJECT.md
+++ b/PROJECT.md
@@ -1,78 +1,27 @@
 
				-# Project: MCP Bridge for Virtuoso (Community Edition)
			
 
				+# Project: Virtuoso MCP Server
			
 
				 
			
 
				-## Overview
			
 
				+## Current state (2026-03)
			
 
				 
			
 
				-Build a minimal MCP server that proxies Virtuoso Community Edition SPARQL endpoint for LLM agents, then expand to additional data sources (PostgreSQL, CouchDB, Qdrant) while keeping tooling tightly structured.
			
 
				+The server is now transport-standardized on FastMCP for MCP clients while preserving a compatibility router for existing scripts.
			
 
				 
			
 
				-## Stage 1 — Minimal MCP Server (Virtuoso only)
			
 
				+- FastMCP SSE mount: `/mcp`
			
 
				+- SSE stream endpoint: `/mcp/sse`
			
 
				+- Legacy tool router: `POST /rpc`
			
 
				+- Health/discovery: `/`, `/health`
			
 
				 
			
 
				-- Implement `sparql_query` tool that POSTs to `http://localhost:8891/sparql` with Accept header `application/sparql-results+json`.
			
 
				-- Return parsed JSON straight to the caller; consider timeouts and result limits.
			
 
				-- Provide sanitization / guardrails to prevent runaway queries (SELECT-only + LIMIT enforcement).
			
 
				-- Validate the server works from a simple CLI script before wiring to OpenClaw.
			
 
				+## Main goals
			
 
				 
			
 
				-## Stage 2 — Helper Tools
			
 
				+1. Keep core Virtuoso tooling generic and reusable.
			
 
				+2. Keep guardrails strict (SELECT-only raw query, bounded limits).
			
 
				+3. Keep domain logic in plugins (garden layer), not in transport glue.
			
 
				+4. Maintain compatibility during migration (`/rpc`) while new clients use FastMCP transport.
			
 
				 
			
 
				-- `get_entities_by_type`: fetches all subjects of `rdf:type <TYPE>`.
			
 
				-- `search_label`: filters `rdfs:label` via case-insensitive substring matching.
			
 
				-- `list_graphs`: enumerates distinct graphs that currently contain triples.
			
 
				-- `get_predicates_for_subject`: lists distinct predicates for a subject URI.
			
 
				-- `get_labels_for_subject`: returns labels for a subject URI.
			
 
				-- `insert_triple`: insert a single triple (debugging updates).
			
 
				-- `load_examples`: optionally load Turtle example files from `examples/` into a graph (guarded by `MCP_ALLOW_EXAMPLE_LOAD=true`).
			
 
				-- Later add more semantic tools (predicate discovery, ontology hints) rather than letting the agent write arbitrary SPARQL.
			
 
				+## Plugin policy
			
 
				 
			
 
				-## Testing data policy
			
 
				+Domain plugins may extend `TOOLS`, but should avoid redundant aliases for core capabilities. Garden tool surface has been trimmed to domain-unique helpers.
			
 
				 
			
 
				-- Fixtures loaded through `load_examples` must describe harmless domains (roses, fish, wine bottles, etc.) so the MCP server tests remain generic and easy to share.
			
 
				-- Actual garden/ganja breeding data stays in the `garden_layer` domain plugin; we do not reuse those URIs in the core MCP toolset or pretend the generic fixtures are the same dataset.
			
 
				+## Next recommended steps
			
 
				 
			
 
				-## Stage 3 — Schema Awareness & Introspection
			
 
				-
			
 
				-- Tools for predicate discovery and class hierarchy.
			
 
				-- Graph-level tooling (e.g., `graph_stats`, `graph_prefixes`).
			
 
				-- Cache basic ontology info to reduce repeated introspection.
			
 
				-
			
 
				-## Stage 4 — Multi-Database Expansion
			
 
				-
			
 
				-- PostgreSQL connector (`sql_query`) via `psycopg` or SQLAlchemy; wrap results in MCP tool schema.
			
 
				-- CouchDB connector (`document_lookup`) via its REST API.
			
 
				-- Qdrant/Chroma connector (`vector_search`) for embedding similarity.
			
 
				-- Each connector implements sanitization, pagination, and ability to annotate results with metadata.
			
 
				-
			
 
				-## Stage 5 — Cross-Source Reasoning
			
 
				-
			
 
				-- MCP server composes SPARQL + SQL + vector results into coherent tool responses.
			
 
				-- Example workflow:
			
 
				-  1. `sparql_query` → IDs + labels.
			
 
				-  2. `sql_query` → metadata for those IDs.
			
 
				-  3. `vector_search` → semantically related docs.
			
 
				-- Provide helper endpoints for the LLM to request multi-source aggregations (e.g., `entity_context`).
			
 
				-
			
 
				-## Tech Stack
			
 
				-
			
 
				-- Python + FastAPI (or lightweight async server).
			
 
				-- `requests` for SPARQL HTTP calls; optional `rdflib` for validation/parsing.
			
 
				-- DB drivers for PostgreSQL/CouchDB; `qdrant-client` or similar for vector search.
			
 
				-- JSON-based MCP schema compatible with OpenClaw tool expectations.
			
 
				-
			
 
				-## Constraints & Safeguards
			
 
				-
			
 
				-- Virtuoso Community Edition cannot load OPAL/VAL (`val_dav.vad` is unsupported).
			
 
				-- Guard against complex SPARQL by providing helper tools and imposing query limits/timeouts.
			
 
				-- Log queries and enforce sanitization to avoid exposing unfiltered input.
			
 
				-- Evaluate performance (SPARQL can be slow); consider caching frequent patterns.
			
 
				-
			
 
				-## Future Extensions
			
 
				-
			
 
				-- Ontology-aware prompting and reasoning layer.
			
 
				-- Caching of frequent query results.
			
 
				-- Hybrid symbolic + vector search mix.
			
 
				-- Expose MCP server as a possible `tools.json` descriptor for OpenClaw.
			
 
				-
			
 
				-## Domain plugin layers
			
 
				-
			
 
				-- Introduce a `DOMAIN_LAYERS` environment variable that lists plugin modules (default `garden_layer.plugin`).
			
 
				-- Each plugin module exposes a `register_layer(tools)` hook that registers domain-prefixed tools (e.g., `garden_add_seedling`).
			
 
				-- On startup, the MCP server imports those modules, calls their hooks, and the new endpoints appear in the `/mcp` tool list without modifying the single FastAPI route.
			
 
				-- This keeps the core server generic while letting any specialized layer (garden, almanac, inventory) add helpers via a simple plugin contract.
			
 
				+- Move remaining internal callers from `/rpc` to native MCP client flows where practical.
			
 
				+- Add protocol-level integration tests for FastMCP transport in addition to `/rpc` functional tests.
			
 
				+- Keep plugin schemas/documentation in sync with exposed tool names.
			
--- a/README.md
+++ b/README.md
@@ -1,122 +1,67 @@
 
				-# MCP Bridge for Virtuoso (Community Edition)
			
 
				+# Virtuoso MCP Server
			
 
				 
			
 
				-A custom MCP server that lets OpenClaw (or any LLM agent) access Virtuoso Community Edition as a semantic backend without running raw SPARQL from the agent. The MCP layer exposes structured tools that orchestrate queries and later aggregate data across additional stores (PostgreSQL, CouchDB, Qdrant).
			
 
				+MCP server for Virtuoso Community Edition with guardrailed SPARQL tools and optional domain plugins.
			
 
				 
			
 
				-## Vision
			
 
				+## Current transport
			
 
				 
			
 
				-- LLMs never issue SQL/SPARQL directly—they call MCP tools.
			
 
				-- The MCP server handles orchestration, sanitization, rate limiting, and multi-source composition.
			
 
				-- Start with Virtuoso (SPARQL) and progressively add new connectors.
			
 
				+- **FastMCP (SSE)** mounted at `GET /mcp/sse`
			
 
				+- message endpoint provided by FastMCP under `/mcp/messages/`
			
 
				+- **legacy compatibility router** at `POST /rpc` (`{"tool":"...","input":{...}}`)
			
 
				+- health/discovery at `GET /` and `GET /health`
			
 
				 
			
 
				-## Architecture
			
 
				+## Why this shape
			
 
				 
			
 
				-```
			
 
				-LLM Agent (OpenClaw)
			
 
				-↓
			
 
				-MCP Server
			
 
				-├── Virtuoso (SPARQL)
			
 
				-├── PostgreSQL
			
 
				-└── Vector DBs (e.g., Qdrant)
			
 
				-```
			
 
				-
			
 
				-## Guardrails (current)
			
 
				-
			
 
				-- `sparql_query` is **SELECT-only** and always uses a LIMIT (default `SPARQL_DEFAULT_LIMIT`).
			
 
				-- Any LIMIT above `SPARQL_MAX_LIMIT` is clamped.
			
 
				-- Example data loads are disabled unless `MCP_ALLOW_EXAMPLE_LOAD=true` is set.
			
 
				-
			
 
				-## Configuration (env)
			
 
				-
			
 
				-`run.sh` and `test.sh` will source a local `.env` file if present. Use `.env.example` as a template.
			
 
				-
			
 
				-- `VIRTUOSO_ENDPOINT` (default `http://localhost:8891/sparql`; can be `.../sparql-auth` for digest auth)
			
 
				-- `VIRTUOSO_USER` / `VIRTUOSO_PASS` (optional; enables HTTP Digest auth)
			
 
				-- `GRAPH_URI` (used for prefix `:`)
			
 
				-- `SPARQL_TIMEOUT` (seconds)
			
 
				-- `SPARQL_UPDATE_TIMEOUT` (seconds)
			
 
				-- `SPARQL_DEFAULT_LIMIT`
			
 
				-- `SPARQL_MAX_LIMIT`
			
 
				-- `MCP_ALLOW_EXAMPLE_LOAD` (`true`/`false`)
			
 
				-- `EXAMPLE_GRAPH` (graph URI for `load_examples`, default `http://example.org/catalog#test`)
			
 
				-
			
 
				-## Design Principles
			
 
				-
			
 
				-1. Tool-based abstraction: Provide helpers such as `sparql_query`, `get_entities_by_type`, `list_graphs` instead of exposing raw SPARQL.
			
 
				-2. Gradual complexity: Ship a minimal working setup, then layer on helper tooling, schema introspection, and connectors.
			
 
				-3. Separation of concerns: Virtuoso stores RDF, MCP runs tool interfaces, and LLMs focus on reasoning/tool selection.
			
 
				-4. Guardrails: Raw queries are SELECT-only, bounded by a default LIMIT, and clamped to a maximum size.
			
 
				+- mcporter compatibility via FastMCP transport
			
 
				+- stable migration path for existing internal scripts via `/rpc`
			
 
				 
			
 
				-## Success Criteria
			
 
				+## Run
			
 
				 
			
 
				-- Phase 1: MCP tool (`sparql_query`) returns valid SPARQL JSON results.
			
 
				-- Phase 2: LLM relies on helper tools instead of free-form queries (Stage 2 helpers are now present).
			
 
				-- Phase 3: Multiple data sources accessible through a unified MCP interface.
			
 
				-
			
 
				-## Example loading (test instances)
			
 
				-
			
 
				-Set `MCP_ALLOW_EXAMPLE_LOAD=true` to enable the `load_examples` tool. It loads Turtle fixtures (e.g., `examples/catalog_fixture.ttl`) into the `EXAMPLE_GRAPH` (default `http://example.org/catalog#test`). This is meant for test instances only and uses harmless sample data.
			
 
				-
			
 
				-**Note:** the example files are Turtle (`.ttl`) and the loader sends them as SPARQL Update with Turtle prefixes preserved.
			
 
				-
			
 
				-## Current helper tools
			
 
				-
			
 
				-### Core query/navigation
			
 
				-- `sparql_query` (SELECT-only, LIMIT enforced)
			
 
				-- `list_graphs`
			
 
				-- `search_label`
			
 
				-- `get_entities_by_type`
			
 
				-- `get_predicates_for_subject`
			
 
				-- `get_labels_for_subject`
			
 
				-- `traverse_property` (follow any property link, incoming or outgoing, and get labels/descriptions)
			
 
				-
			
 
				-### Ontology discovery (generic, reusable across domain layers)
			
 
				-- `list_classes` (list ontology classes, optional term filter)
			
 
				-- `list_properties` (list ontology properties, optional term/domain/range filters)
			
 
				-- `describe_class` (class label/comment + properties declaring it as domain)
			
 
				-- `describe_property` (property label/comment/domain/range/type + usage samples)
			
 
				-
			
 
				-### Relationship helpers
			
 
				-- `describe_subject` (see all predicates/objects for a subject with optional labels)
			
 
				-- `path_traverse` (walk a configured property path from a subject and return each step)
			
 
				-- `property_usage_statistics` (count property usage and sample subjects/objects)
			
 
				-- `batch_insert` (send TTL or multiple triples in a single guarded update; useful for staging domain changes)
			
 
				+```bash
			
 
				+pip install -r requirements.txt
			
 
				+./run.sh
			
 
				+```
			
 
				 
			
 
				-### Update/test helpers
			
 
				-- `insert_triple` (single-triple update helper)
			
 
				-- `load_examples` (optional; requires `MCP_ALLOW_EXAMPLE_LOAD=true`; loads fixtures such as `examples/catalog_fixture.ttl`)
			
 
				+Default host/port: `0.0.0.0:8501`
			
 
				 
			
 
				-## MCP JSON-RPC compatibility (v0.1, minimal)
			
 
				+## Core tools
			
 
				 
			
 
				-The `/mcp` endpoint supports:
			
 
				+- Query/navigation: `sparql_query`, `list_graphs`, `search_label`, `get_entities_by_type`, `get_predicates_for_subject`, `get_labels_for_subject`, `traverse_property`
			
 
				+- Ontology discovery: `list_classes`, `list_properties`, `describe_class`, `describe_property`
			
 
				+- Relationship helpers: `describe_subject`, `path_traverse`, `property_usage_statistics`
			
 
				+- Update helpers: `insert_triple`, `batch_insert`, `load_examples`
			
 
				 
			
 
				-1) **Legacy tool router** (unchanged): `{ "tool": "...", "input": {...} }`
			
 
				-2) **Minimal JSON-RPC 2.0** messages:
			
 
				-   - `initialize`
			
 
				-   - `tools/list`
			
 
				-   - `tools/call`
			
 
				+## Domain plugin tools (garden layer)
			
 
				 
			
 
				-This is intended to work with OpenClaw’s MCP bridge and allow tool discovery + calling with request ids.
			
 
				+Current garden-prefixed set is intentionally trimmed to non-redundant domain helpers:
			
 
				 
			
 
				-## Roadmap note: what “more MCP compliant” could add
			
 
				+- `garden_add_seedling`
			
 
				+- `garden_cycle_plants`
			
 
				+- `garden_latest_cycle_by_dates`
			
 
				+- `garden_clone_to`
			
 
				+- `garden_cycle_list_detailed`
			
 
				+- `garden_reassign_cycle`
			
 
				 
			
 
				-In practice, your current server already covers the MCP tool surface used by
			
 
				-OpenClaw (`initialize`, `tools/list`, `tools/call`).
			
 
				+## Guardrails
			
 
				 
			
 
				-If you later want broader MCP-client compatibility, the next typical additions
			
 
				-are:
			
 
				+- `sparql_query` is SELECT-only
			
 
				+- LIMIT is enforced (`SPARQL_DEFAULT_LIMIT`, clamped by `SPARQL_MAX_LIMIT`)
			
 
				+- fixture loading requires `MCP_ALLOW_EXAMPLE_LOAD=true`
			
 
				 
			
 
				-* **Resources** (`resources/list`, `resources/read`) for stable domain records
			
 
				-  (e.g., plant/cycle summaries)
			
 
				-* **Prompts** (`prompts/list`, `prompts/get`) for reusable instructions
			
 
				-* **Optional eventing** (often via SSE depending on the client/transport), if
			
 
				-  you later need push notifications instead of polling
			
 
				+## Environment
			
 
				 
			
 
				-You do **not** need SSE to make a tool-based MCP server work for OpenClaw.
			
 
				+Use `.env` (optional). Key variables:
			
 
				 
			
 
				-## Layering recommendation
			
 
				+- `VIRTUOSO_ENDPOINT`, `VIRTUOSO_USER`, `VIRTUOSO_PASS`
			
 
				+- `SPARQL_TIMEOUT`, `SPARQL_UPDATE_TIMEOUT`
			
 
				+- `SPARQL_DEFAULT_LIMIT`, `SPARQL_MAX_LIMIT`
			
 
				+- `GRAPH_URI`, `EXAMPLE_GRAPH`
			
 
				+- `MCP_ALLOW_EXAMPLE_LOAD`
			
 
				+- `DOMAIN_LAYERS` (default: `garden_layer.plugin`)
			
 
				 
			
 
				-Keep ontology discovery in `virtuoso_mcp` so any specialized layer (garden, inventory, analytics, etc.) can reuse it. Domain modules should call these generic tools instead of re-implementing ontology probing logic.
			
 
				+## Tests
			
 
				 
			
 
				-## Domain plugin layers
			
 
				+```bash
			
 
				+./test.sh
			
 
				+```
			
 
				 
			
 
				-To expose domain-specific helpers automatically, set the `DOMAIN_LAYERS` environment variable to a comma-separated list of Python modules (the default is `garden_layer.plugin`). Each module must expose a `register_layer(tools)` function that receives the MCP `TOOLS` dictionary and adds prefixed entries (e.g., `garden_add_seedling`). `virtuoso_mcp` calls those hooks at startup, so simply `pip install --upgrade git+https://repo.home.world.eu.org/lucky/garden_layer.git` and update `DOMAIN_LAYERS` to include `garden_layer.plugin`. The workspace already contains the `garden_layer` source tree (`workspace/garden_layer`), so during local iteration you can also install it in editable form: `pip install -e /home/lucky/.openclaw/workspace/garden_layer`. The new tools appear in the `/mcp` tool list (`curl -sS http://127.0.0.1:8501/ | jq .tools`) without changing the single `/mcp` endpoint surface.
			
 
				+(Exercises `/rpc` compatibility flow and validates guardrailed behavior.)
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ uvicorn[standard]>=0.23
 
				 pydantic>=2.6
			
 
				 requests>=2.31
			
 
				 pytest>=8.4
			
 
				+mcp>=1.0.0
			
--- a/run.sh
+++ b/run.sh
@@ -36,7 +36,7 @@ if [[ -f "$PID_FILE" ]]; then
 
				   fi
			
 
				 fi
			
 
				 
			
 
				-nohup python3 -m uvicorn virtuoso_mcp:app --host 0.0.0.0 --port 8501 >"$LOG_FILE" 2>&1 &
			
 
				+nohup python3 -m uvicorn server_fastmcp:app --host 0.0.0.0 --port 8501 >"$LOG_FILE" 2>&1 &
			
 
				 PID=$!
			
 
				 echo "$PID" >"$PID_FILE"
			
 
				 echo "Server started (PID $PID). Logs: $LOG_FILE"
			
--- a/server_fastmcp.py
+++ b/server_fastmcp.py
@@ -0,0 +1,94 @@
 
				+"""FastMCP transport wrapper for virtuoso tool logic.
			
 
				+
			
 
				+This module keeps the existing Virtuoso tool implementations untouched
			
 
				+and exposes them through the MCP SDK transport.
			
 
				+"""
			
 
				+
			
 
				+from fastapi import FastAPI
			
 
				+from fastapi import HTTPException
			
 
				+from pydantic import BaseModel
			
 
				+from mcp.server.fastmcp import FastMCP
			
 
				+from mcp.server.transport_security import TransportSecuritySettings
			
 
				+
			
 
				+from virtuoso_mcp import (
			
 
				+    TOOLS,
			
 
				+    TOOL_DOCS,
			
 
				+    VIRTUOSO_ENDPOINT,
			
 
				+    SPARQL_DEFAULT_LIMIT,
			
 
				+    SPARQL_MAX_LIMIT,
			
 
				+    ALLOW_EXAMPLE_LOAD,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+mcp = FastMCP(
			
 
				+    "virtuoso",
			
 
				+    transport_security=TransportSecuritySettings(
			
 
				+        enable_dns_rebinding_protection=False
			
 
				+    ),
			
 
				+)
			
 
				+
			
 
				+
			
 
				+def _wrap_tool(tool_name: str):
			
 
				+    def _tool(input: dict | None = None):
			
 
				+        payload = input or {}
			
 
				+        return TOOLS[tool_name](payload)
			
 
				+
			
 
				+    _tool.__name__ = f"tool_{tool_name}"
			
 
				+    _tool.__doc__ = TOOL_DOCS.get(tool_name, "")
			
 
				+    return _tool
			
 
				+
			
 
				+
			
 
				+for _name in sorted(TOOLS.keys()):
			
 
				+    mcp.add_tool(
			
 
				+        _wrap_tool(_name),
			
 
				+        name=_name,
			
 
				+        description=TOOL_DOCS.get(_name, ""),
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+app = FastAPI(title="Virtuoso MCP Server")
			
 
				+app.mount("/mcp", mcp.sse_app())
			
 
				+
			
 
				+
			
 
				+class LegacyToolRequest(BaseModel):
			
 
				+    tool: str
			
 
				+    input: dict = {}
			
 
				+
			
 
				+
			
 
				+@app.post("/rpc")
			
 
				+def legacy_rpc(req: LegacyToolRequest):
			
 
				+    if req.tool not in TOOLS:
			
 
				+        raise HTTPException(status_code=400, detail=f"Unknown tool: {req.tool}")
			
 
				+    try:
			
 
				+        result = TOOLS[req.tool](req.input or {})
			
 
				+        return {
			
 
				+            "status": "ok",
			
 
				+            "tool": req.tool,
			
 
				+            "description": TOOL_DOCS.get(req.tool, ""),
			
 
				+            "result": result,
			
 
				+        }
			
 
				+    except HTTPException:
			
 
				+        raise
			
 
				+    except Exception as exc:
			
 
				+        raise HTTPException(status_code=500, detail=str(exc))
			
 
				+
			
 
				+
			
 
				+@app.get("/")
			
 
				+def root():
			
 
				+    return {
			
 
				+        "status": "MCP server running",
			
 
				+        "transport": "fastmcp+sse",
			
 
				+        "mount": "/mcp",
			
 
				+        "tools": sorted(TOOLS.keys()),
			
 
				+        "virtuoso": VIRTUOSO_ENDPOINT,
			
 
				+        "guardrails": {
			
 
				+            "default_limit": SPARQL_DEFAULT_LIMIT,
			
 
				+            "max_limit": SPARQL_MAX_LIMIT,
			
 
				+            "allow_example_load": ALLOW_EXAMPLE_LOAD,
			
 
				+        },
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+@app.get("/health")
			
 
				+def health():
			
 
				+    return root()
			
--- a/test.sh
+++ b/test.sh
@@ -51,7 +51,7 @@ fail() {
 
				 
			
 
				 call_mcp() {
			
 
				   local payload="$1"
			
 
				-  curl -sS -X POST "$BASE_URL/mcp" \
			
 
				+  curl -sS -X POST "$BASE_URL/rpc" \
			
 
				     -H "Content-Type: application/json" \
			
 
				     -d "$payload"
			
 
				 }
			
--- a/virtuoso_mcp.py
+++ b/virtuoso_mcp.py
@@ -24,8 +24,8 @@ VIRTUOSO_ENDPOINT = os.getenv("VIRTUOSO_ENDPOINT") or os.getenv(
 
				 )
			
 
				 VIRTUOSO_USER = os.getenv("VIRTUOSO_USER")
			
 
				 VIRTUOSO_PASS = os.getenv("VIRTUOSO_PASS")
			
 
				-SPARQL_TIMEOUT = float(os.getenv("SPARQL_TIMEOUT", 10.0))
			
 
				-SPARQL_UPDATE_TIMEOUT = float(os.getenv("SPARQL_UPDATE_TIMEOUT", 15.0))
			
 
				+SPARQL_TIMEOUT = float(os.getenv("SPARQL_TIMEOUT", 30.0))
			
 
				+SPARQL_UPDATE_TIMEOUT = float(os.getenv("SPARQL_UPDATE_TIMEOUT", 30.0))
			
 
				 SPARQL_DEFAULT_LIMIT = int(os.getenv("SPARQL_DEFAULT_LIMIT", 100))
			
 
				 SPARQL_MAX_LIMIT = int(os.getenv("SPARQL_MAX_LIMIT", 500))
			
 
				 GRAPH_URI = os.getenv("GRAPH_URI", "http://example.org/catalog#")