Explorar el Código

added sparql_update tool

Lukas Goldschmidt hace 1 mes
padre
commit
a1e34bcd4b
Se han modificado 3 ficheros con 58 adiciones y 1 borrados
  1. 2 1
      README.md
  2. 23 0
      test.sh
  3. 33 0
      virtuoso_mcp.py

+ 2 - 1
README.md

@@ -25,7 +25,7 @@ Default host/port: `0.0.0.0:8501`
 
 ## Core tools
 
-- Query/navigation: `sparql_query`, `list_graphs`, `search_label`, `get_entities_by_type`, `get_predicates_for_subject`, `get_labels_for_subject`, `traverse_property`
+- Query/navigation: `sparql_query`, `sparql_update`, `list_graphs`, `search_label`, `get_entities_by_type`, `get_predicates_for_subject`, `get_labels_for_subject`, `traverse_property`
 - Ontology discovery: `list_classes`, `list_properties`, `describe_class`, `describe_property`
 - Relationship helpers: `describe_subject`, `path_traverse`, `property_usage_statistics`
 - Update helpers: `insert_triple`, `batch_insert`, `load_examples`
@@ -44,6 +44,7 @@ Current garden-prefixed set is intentionally trimmed to non-redundant domain hel
 ## Guardrails
 
 - `sparql_query` is SELECT-only
+- `sparql_update` allows guarded INSERT/DELETE updates only
 - LIMIT is enforced (`SPARQL_DEFAULT_LIMIT`, clamped by `SPARQL_MAX_LIMIT`)
 - fixture loading requires `MCP_ALLOW_EXAMPLE_LOAD=true`
 

+ 23 - 0
test.sh

@@ -120,6 +120,29 @@ if assert_tool_ok "insert_triple" "$payload"; then
   echo "$TOOL_LAST_RESPONSE" | jq -r '.query' | sed 's/^/  /'
 fi
 
+TOOL_LAST_RESPONSE=""
+UPDATE_QUERY=$(cat <<'EOF'
+INSERT DATA {
+  GRAPH <http://example.org/catalog#test> {
+    <http://example.org/plain#UpdateToolItem> <http://www.w3.org/2000/01/rdf-schema#label> "update tool item" .
+  }
+}
+EOF
+)
+payload=$(jq -n --arg query "$UPDATE_QUERY" '{"tool":"sparql_update","input":{"query":$query}}')
+if assert_tool_ok "sparql_update (INSERT DATA)" "$payload"; then
+  echo "Update query preview:"
+  echo "$TOOL_LAST_RESPONSE" | jq -r '.query' | sed 's/^/  /'
+fi
+
+BAD_UPDATE=$(jq -n --arg query "SELECT * WHERE { ?s ?p ?o }" '{"tool":"sparql_update","input":{"query":$query}}')
+BAD_RESPONSE="$(call_mcp "$BAD_UPDATE")"
+if echo "$BAD_RESPONSE" | jq -e '.status == "ok"' >/dev/null 2>&1; then
+  fail "sparql_update rejects SELECT" "unexpected success"
+else
+  echo "Confirmed sparql_update rejects read-only queries."
+fi
+
 EXAMPLE_TTL=$(cat <<'EOF'
 @prefix ex: <http://example.org/plain#> .
 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

+ 33 - 0
virtuoso_mcp.py

@@ -119,6 +119,14 @@ def _mcp_tool_definition(name: str) -> Dict[str, Any]:
         base["inputSchema"]["required"] = ["query"]
         return base
 
+    if name == "sparql_update":
+        base["inputSchema"]["properties"] = {
+            "query": {"type": "string", "description": "SPARQL UPDATE query (INSERT/DELETE only, guardrailed)"},
+            "require_update_keyword": {"type": "boolean", "description": "Reject queries that do not contain INSERT or DELETE", "default": True},
+        }
+        base["inputSchema"]["required"] = ["query"]
+        return base
+
     if name == "search_label":
         base["inputSchema"]["properties"] = {
             "term": {"type": "string", "description": "Substring to search in rdfs:label"},
@@ -361,6 +369,19 @@ def guard_select_query(query: str) -> str:
     return _apply_limit(query, SPARQL_DEFAULT_LIMIT, SPARQL_MAX_LIMIT)
 
 
+def guard_update_query(query: str, require_update_keyword: bool = True) -> str:
+    """Allow only SPARQL UPDATE statements that actually mutate data."""
+    lowered = query.lower()
+    if re.search(r"\b(select|ask|construct|describe)\b", lowered):
+        raise HTTPException(status_code=400, detail="Only SPARQL UPDATE statements are allowed")
+    if not re.search(r"\b(insert|delete)\b", lowered):
+        if require_update_keyword:
+            raise HTTPException(status_code=400, detail="SPARQL UPDATE must contain INSERT or DELETE")
+    if re.search(r"\b(load|clear|drop|create|move|copy|add)\b", lowered):
+        raise HTTPException(status_code=400, detail="This update tool only allows INSERT or DELETE operations")
+    return query
+
+
 def ttl_to_sparql_insert(ttl_text: str, graph: Optional[str]) -> str:
     prefix_lines: List[str] = []
     body_lines: List[str] = []
@@ -711,6 +732,16 @@ def tool_property_usage_statistics(input_data: Dict[str, Any]) -> Dict[str, Any]
     }
 
 
+def tool_sparql_update(input_data: Dict[str, Any]) -> Dict[str, Any]:
+    query = input_data.get("query")
+    if not query:
+        raise ValueError("Missing 'query' field")
+    require_update_keyword = input_data.get("require_update_keyword", True)
+    guarded = guard_update_query(query, require_update_keyword=require_update_keyword)
+    result = run_sparql_update(guarded)
+    return {**result, "query": guarded}
+
+
 def tool_batch_insert(input_data: Dict[str, Any]) -> Dict[str, Any]:
     ttl_text = input_data.get("ttl")
     triples = input_data.get("triples")
@@ -829,6 +860,7 @@ def tool_insert_triple(input_data: Dict[str, Any]) -> Dict[str, Any]:
 # --- TOOL REGISTRY ---
 TOOLS = {
     "sparql_query": tool_sparql_query,
+    "sparql_update": tool_sparql_update,
     "list_graphs": tool_list_graphs,
     "search_label": tool_search_label,
     "get_entities_by_type": tool_get_entities_by_type,
@@ -901,6 +933,7 @@ load_domain_layers(TOOLS, TOOL_SCHEMAS)
 
 TOOL_DOCS = {
     "sparql_query": "Execute a bounded SELECT query and return the JSON result.",
+    "sparql_update": "Execute a guarded SPARQL UPDATE query limited to INSERT/DELETE operations.",
     "list_graphs": "List up to 50 active graph URIs.",
     "search_label": "Search rdfs:label values that contain a term (case-insensitive).",
     "get_entities_by_type": "List subjects of a given rdf:type.",