virtuoso_mcp.py 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094
  1. import json
  2. import logging
  3. import os
  4. import re
  5. from datetime import datetime, timezone
  6. from importlib import import_module
  7. from pathlib import Path
  8. from typing import Any, Callable, Dict, List, Optional
  9. import requests
  10. from requests.auth import HTTPDigestAuth
  11. from fastapi import FastAPI, HTTPException, Request
  12. from pydantic import BaseModel
  13. LOG_LEVEL = os.getenv("MCP_LOG_LEVEL", "INFO").upper()
  14. logging.basicConfig(level=getattr(logging, LOG_LEVEL, logging.INFO))
  15. logger = logging.getLogger("virtuoso_mcp")
  16. app = FastAPI(title="MCP Server")
  17. # --- CONFIG ---
  18. VIRTUOSO_ENDPOINT = os.getenv("VIRTUOSO_ENDPOINT") or os.getenv(
  19. "VIRTUOSO_SPARQL", "http://localhost:8891/sparql"
  20. )
  21. VIRTUOSO_USER = os.getenv("VIRTUOSO_USER")
  22. VIRTUOSO_PASS = os.getenv("VIRTUOSO_PASS")
  23. SPARQL_TIMEOUT = float(os.getenv("SPARQL_TIMEOUT", 30.0))
  24. SPARQL_UPDATE_TIMEOUT = float(os.getenv("SPARQL_UPDATE_TIMEOUT", 30.0))
  25. SPARQL_DEFAULT_LIMIT = int(os.getenv("SPARQL_DEFAULT_LIMIT", 100))
  26. SPARQL_MAX_LIMIT = int(os.getenv("SPARQL_MAX_LIMIT", 500))
  27. GRAPH_URI = os.getenv("GRAPH_URI", "http://example.org/catalog#")
  28. EXAMPLES_DIR = Path(__file__).resolve().parent / "examples"
  29. EXAMPLE_GRAPH = os.getenv(
  30. "EXAMPLE_GRAPH", "http://example.org/catalog#test"
  31. )
  32. ALLOW_EXAMPLE_LOAD = os.getenv("MCP_ALLOW_EXAMPLE_LOAD", "false").lower() == "true"
  33. SESSION = requests.Session()
  34. LOGS_DIR = Path(__file__).resolve().parent / "logs"
  35. LOGS_DIR.mkdir(parents=True, exist_ok=True)
  36. tool_logger = logging.getLogger("virtuoso_mcp.tools")
  37. tool_handler = logging.FileHandler(LOGS_DIR / "tool_usage.log")
  38. tool_handler.setFormatter(logging.Formatter("%(asctime)s %(message)s"))
  39. tool_logger.addHandler(tool_handler)
  40. tool_logger.setLevel(logging.INFO)
  41. tool_logger.propagate = False
  42. PREFIXES = f"""
  43. PREFIX : <{GRAPH_URI}>
  44. PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
  45. PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
  46. PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
  47. PREFIX dc: <http://purl.org/dc/elements/1.1/>
  48. """.strip()
  49. # --- MODELS ---
  50. class SparqlQueryRequest(BaseModel):
  51. query: str
  52. class ToolRequest(BaseModel):
  53. tool: str
  54. input: Dict[str, Any] = {}
  55. # --- MCP (minimal JSON-RPC 2.0) models ---
  56. class JsonRpcRequest(BaseModel):
  57. jsonrpc: str = "2.0"
  58. id: Optional[Any] = None
  59. method: str
  60. params: Dict[str, Any] = {}
  61. def mcp_error(id_value: Any, message: str, code: int = -32000) -> Dict[str, Any]:
  62. return {
  63. "jsonrpc": "2.0",
  64. "id": id_value,
  65. "error": {
  66. "code": code,
  67. "message": message,
  68. },
  69. }
  70. def mcp_result(id_value: Any, result: Dict[str, Any]) -> Dict[str, Any]:
  71. return {
  72. "jsonrpc": "2.0",
  73. "id": id_value,
  74. "result": result,
  75. }
  76. def _mcp_tool_definition(name: str) -> Dict[str, Any]:
  77. if name in TOOL_SCHEMAS:
  78. schema = TOOL_SCHEMAS[name]
  79. # schema is expected to be an MCP-compatible inputSchema object.
  80. return {
  81. "name": name,
  82. "description": TOOL_DOCS.get(name, ""),
  83. "inputSchema": schema,
  84. }
  85. # Incremental compliance step: add explicit input schemas for the most-used tools.
  86. # We still keep `additionalProperties: True` so we don't break existing clients.
  87. base: Dict[str, Any] = {
  88. "name": name,
  89. "description": TOOL_DOCS.get(name, ""),
  90. "inputSchema": {
  91. "type": "object",
  92. "additionalProperties": True,
  93. "properties": {},
  94. "required": [],
  95. },
  96. }
  97. if name == "sparql_query":
  98. base["inputSchema"]["properties"] = {
  99. "query": {"type": "string", "description": "SPARQL SELECT query (bounded + guardrailed)"}
  100. }
  101. base["inputSchema"]["required"] = ["query"]
  102. return base
  103. if name == "sparql_update":
  104. base["inputSchema"]["properties"] = {
  105. "query": {"type": "string", "description": "SPARQL UPDATE query (INSERT/DELETE only, guardrailed)"},
  106. "require_update_keyword": {"type": "boolean", "description": "Reject queries that do not contain INSERT or DELETE", "default": True},
  107. }
  108. base["inputSchema"]["required"] = ["query"]
  109. return base
  110. if name == "search_label":
  111. base["inputSchema"]["properties"] = {
  112. "term": {"type": "string", "description": "Substring to search in rdfs:label"},
  113. "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max results"},
  114. }
  115. base["inputSchema"]["required"] = ["term"]
  116. return base
  117. if name == "get_entities_by_type":
  118. base["inputSchema"]["properties"] = {
  119. "type_uri": {"type": "string", "description": "RDF type URI"},
  120. "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max subjects"},
  121. }
  122. base["inputSchema"]["required"] = ["type_uri"]
  123. return base
  124. if name == "list_graphs":
  125. # No inputs.
  126. return base
  127. if name == "list_classes":
  128. base["inputSchema"]["properties"] = {
  129. "term": {"type": "string", "description": "Optional substring to match labels/comments"},
  130. "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max results"},
  131. }
  132. base["inputSchema"]["required"] = []
  133. return base
  134. if name == "list_properties":
  135. base["inputSchema"]["properties"] = {
  136. "term": {"type": "string", "description": "Optional substring to match labels/comments"},
  137. "domain_uri": {"type": "string", "description": "Optional rdfs:domain class URI"},
  138. "range_uri": {"type": "string", "description": "Optional rdfs:range class URI"},
  139. "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max results"},
  140. }
  141. base["inputSchema"]["required"] = []
  142. return base
  143. if name == "describe_class":
  144. base["inputSchema"]["properties"] = {
  145. "class_uri": {"type": "string", "description": "Class URI to describe"},
  146. }
  147. base["inputSchema"]["required"] = ["class_uri"]
  148. return base
  149. if name == "describe_property":
  150. base["inputSchema"]["properties"] = {
  151. "property_uri": {"type": "string", "description": "Property URI to describe"},
  152. "usage_limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "How many usage examples to include"},
  153. }
  154. base["inputSchema"]["required"] = ["property_uri"]
  155. return base
  156. # ---- Entity navigation batch (B) ----
  157. if name == "get_predicates_for_subject":
  158. base["inputSchema"]["properties"] = {
  159. "subject_uri": {"type": "string", "description": "Subject URI"},
  160. "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max predicates"},
  161. }
  162. base["inputSchema"]["required"] = ["subject_uri"]
  163. return base
  164. if name == "get_labels_for_subject":
  165. base["inputSchema"]["properties"] = {
  166. "subject_uri": {"type": "string", "description": "Subject URI"},
  167. }
  168. base["inputSchema"]["required"] = ["subject_uri"]
  169. return base
  170. if name == "traverse_property":
  171. base["inputSchema"]["properties"] = {
  172. "subject_uri": {"type": "string", "description": "Starting subject URI"},
  173. "property_uri": {"type": "string", "description": "Predicate URI to traverse"},
  174. "direction": {
  175. "type": "string",
  176. "enum": ["outgoing", "incoming"],
  177. "description": "Traversal direction",
  178. },
  179. "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max neighbors"},
  180. }
  181. base["inputSchema"]["required"] = ["subject_uri", "property_uri"]
  182. return base
  183. if name == "describe_subject":
  184. base["inputSchema"]["properties"] = {
  185. "subject_uri": {"type": "string", "description": "Subject URI"},
  186. "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max outgoing predicate/object pairs"},
  187. }
  188. base["inputSchema"]["required"] = ["subject_uri"]
  189. return base
  190. if name == "path_traverse":
  191. base["inputSchema"]["properties"] = {
  192. "subject_uri": {"type": "string", "description": "Starting subject URI"},
  193. "property_path": {
  194. "type": "string",
  195. "description": "Comma-separated list of predicate URIs (alternative to 'properties')",
  196. },
  197. "properties": {
  198. "type": "array",
  199. "items": {"type": "string"},
  200. "description": "List of predicate URIs",
  201. },
  202. "direction": {
  203. "type": "string",
  204. "enum": ["outgoing", "incoming"],
  205. "description": "Traversal direction",
  206. },
  207. "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max results"},
  208. }
  209. base["inputSchema"]["required"] = ["subject_uri"]
  210. return base
  211. # ---- Relationship analytics batch (C) ----
  212. if name == "property_usage_statistics":
  213. base["inputSchema"]["properties"] = {
  214. "property_uri": {"type": "string", "description": "Predicate URI"},
  215. "examples_limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "How many usage examples to include"},
  216. }
  217. base["inputSchema"]["required"] = ["property_uri"]
  218. return base
  219. return base
  220. # --- CORE SPARQL FUNCTION ---
  221. def _build_auth() -> Optional[HTTPDigestAuth]:
  222. if VIRTUOSO_USER and VIRTUOSO_PASS:
  223. return HTTPDigestAuth(VIRTUOSO_USER, VIRTUOSO_PASS)
  224. return None
  225. def _with_prefixes(query: str) -> str:
  226. if re.search(r"^\s*prefix\b", query, re.IGNORECASE):
  227. return query
  228. return f"{PREFIXES}\n{query}"
  229. def run_sparql(query: str) -> Dict[str, Any]:
  230. """Execute a SPARQL query against Virtuoso and return the JSON payload."""
  231. logger.debug("Sending SPARQL query: %s", query)
  232. try:
  233. response = SESSION.post(
  234. VIRTUOSO_ENDPOINT,
  235. data=_with_prefixes(query).encode("utf-8"),
  236. headers={
  237. "Accept": "application/sparql-results+json",
  238. "Content-Type": "application/sparql-query",
  239. },
  240. timeout=SPARQL_TIMEOUT,
  241. auth=_build_auth(),
  242. )
  243. if not response.ok:
  244. logger.warning("SPARQL request failed: %s", response.status_code)
  245. response.raise_for_status()
  246. return response.json()
  247. except Exception as exc: # pragma: no cover - propagate for FastAPI
  248. logger.warning("SPARQL request failed: %s", exc)
  249. raise HTTPException(status_code=500, detail=str(exc))
  250. def run_sparql_update(query: str) -> Dict[str, Any]:
  251. """Execute a SPARQL UPDATE (INSERT/DELETE) against Virtuoso."""
  252. logger.debug("Sending SPARQL update: %s", query)
  253. try:
  254. response = SESSION.post(
  255. VIRTUOSO_ENDPOINT,
  256. data=_with_prefixes(query).encode("utf-8"),
  257. headers={"Content-Type": "application/sparql-update"},
  258. timeout=SPARQL_UPDATE_TIMEOUT,
  259. auth=_build_auth(),
  260. )
  261. if not response.ok:
  262. detail = (response.text or "").strip()
  263. logger.warning("SPARQL update failed: %s", response.status_code)
  264. raise HTTPException(
  265. status_code=500,
  266. detail=detail or f"SPARQL update failed with {response.status_code}",
  267. )
  268. return {"status": "ok"}
  269. except HTTPException:
  270. raise
  271. except Exception as exc: # pragma: no cover - propagate for FastAPI
  272. logger.warning("SPARQL update failed: %s", exc)
  273. raise HTTPException(status_code=500, detail=str(exc))
  274. # --- TOOL HELPERS ---
  275. def escape_sparql_string(value: str) -> str:
  276. """Escape a string for SPARQL literal usage."""
  277. if value is None:
  278. return ""
  279. return (
  280. str(value)
  281. .replace("\\", "\\\\")
  282. .replace('"', "\\\"")
  283. .replace("\n", "\\n")
  284. .replace("\r", "")
  285. )
  286. def sanitize_term(term: str) -> str:
  287. """Escape quotes inside label searches so we can safely interpolate strings."""
  288. return escape_sparql_string(term)
  289. def _extract_limit(query: str) -> Optional[int]:
  290. match = re.search(r"\blimit\s+(\d+)\b", query, re.IGNORECASE)
  291. if not match:
  292. return None
  293. try:
  294. return int(match.group(1))
  295. except ValueError:
  296. return None
  297. def _apply_limit(query: str, default_limit: int, max_limit: int) -> str:
  298. limit = _extract_limit(query)
  299. if limit is None:
  300. return f"{query.strip()}\nLIMIT {default_limit}"
  301. if limit > max_limit:
  302. return re.sub(
  303. r"\blimit\s+\d+\b",
  304. f"LIMIT {max_limit}",
  305. query,
  306. flags=re.IGNORECASE,
  307. )
  308. return query
  309. def guard_select_query(query: str) -> str:
  310. """Enforce that raw queries are read-only and bounded by LIMIT."""
  311. lowered = query.lower()
  312. if re.search(r"\b(insert|delete|load|clear|drop|create|move|copy|add)\b", lowered):
  313. raise HTTPException(status_code=400, detail="SPARQL update operations are not allowed")
  314. if "select" not in lowered:
  315. raise HTTPException(status_code=400, detail="Only SELECT queries are allowed")
  316. return _apply_limit(query, SPARQL_DEFAULT_LIMIT, SPARQL_MAX_LIMIT)
  317. def guard_update_query(query: str, require_update_keyword: bool = True) -> str:
  318. """Allow only SPARQL UPDATE statements that actually mutate data."""
  319. lowered = query.lower()
  320. if re.search(r"\b(select|ask|construct|describe)\b", lowered):
  321. raise HTTPException(status_code=400, detail="Only SPARQL UPDATE statements are allowed")
  322. if not re.search(r"\b(insert|delete)\b", lowered):
  323. if require_update_keyword:
  324. raise HTTPException(status_code=400, detail="SPARQL UPDATE must contain INSERT or DELETE")
  325. if re.search(r"\b(load|clear|drop|create|move|copy|add)\b", lowered):
  326. raise HTTPException(status_code=400, detail="This update tool only allows INSERT or DELETE operations")
  327. return query
  328. def ttl_to_sparql_insert(ttl_text: str, graph: Optional[str]) -> str:
  329. prefix_lines: List[str] = []
  330. body_lines: List[str] = []
  331. for raw_line in ttl_text.splitlines():
  332. line = raw_line.strip()
  333. if not line:
  334. continue
  335. prefix_match = re.match(r"@prefix\s+([\w-]+):\s*<([^>]+)>\s*\.", line)
  336. if prefix_match:
  337. prefix_lines.append(
  338. f"PREFIX {prefix_match.group(1)}: <{prefix_match.group(2)}>"
  339. )
  340. continue
  341. if line.startswith("@base"):
  342. # Skip @base entries for now; they are rare in our exports.
  343. continue
  344. body_lines.append(raw_line)
  345. if not body_lines:
  346. raise HTTPException(status_code=400, detail="No RDF triples found in input")
  347. prefixes = "\n".join(prefix_lines)
  348. body = "\n".join(body_lines)
  349. if graph:
  350. insert_body = f"GRAPH <{graph}> {{\n{body}\n}}"
  351. else:
  352. insert_body = body
  353. return f"{prefixes}\nINSERT DATA {{\n{insert_body}\n}}"
  354. # --- MCP TOOL IMPLEMENTATIONS ---
  355. def tool_sparql_query(input_data: Dict[str, Any]) -> Dict[str, Any]:
  356. query = input_data.get("query")
  357. if not query:
  358. raise ValueError("Missing 'query' field")
  359. guarded = guard_select_query(query)
  360. return run_sparql(guarded)
  361. def tool_list_graphs(_input: Dict[str, Any]) -> Dict[str, Any]:
  362. query = """
  363. SELECT DISTINCT ?g WHERE {
  364. GRAPH ?g { ?s ?p ?o }
  365. }
  366. LIMIT 50
  367. """
  368. return run_sparql(query)
  369. def tool_search_label(input_data: Dict[str, Any]) -> Dict[str, Any]:
  370. term = input_data.get("term", "")
  371. sanitized = sanitize_term(term)
  372. limit = int(input_data.get("limit", 20))
  373. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  374. query = f"""
  375. SELECT ?s ?label WHERE {{
  376. ?s rdfs:label ?label .
  377. FILTER(CONTAINS(LCASE(?label), LCASE(\"{sanitized}\")))
  378. }}
  379. LIMIT {limit}
  380. """
  381. return run_sparql(query)
  382. def tool_get_entities_by_type(input_data: Dict[str, Any]) -> Dict[str, Any]:
  383. type_uri = input_data.get("type_uri")
  384. if not type_uri:
  385. raise ValueError("Missing 'type_uri' field")
  386. limit = int(input_data.get("limit", 50))
  387. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  388. query = f"""
  389. SELECT ?s WHERE {{
  390. ?s rdf:type <{type_uri}> .
  391. }}
  392. LIMIT {limit}
  393. """
  394. return run_sparql(query)
  395. def tool_get_predicates_for_subject(input_data: Dict[str, Any]) -> Dict[str, Any]:
  396. subject_uri = input_data.get("subject_uri")
  397. if not subject_uri:
  398. raise ValueError("Missing 'subject_uri' field")
  399. limit = int(input_data.get("limit", 50))
  400. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  401. query = f"""
  402. SELECT DISTINCT ?p WHERE {{
  403. <{subject_uri}> ?p ?o .
  404. }}
  405. LIMIT {limit}
  406. """
  407. return run_sparql(query)
  408. def tool_get_labels_for_subject(input_data: Dict[str, Any]) -> Dict[str, Any]:
  409. subject_uri = input_data.get("subject_uri")
  410. if not subject_uri:
  411. raise ValueError("Missing 'subject_uri' field")
  412. query = f"""
  413. SELECT ?label WHERE {{
  414. <{subject_uri}> rdfs:label ?label .
  415. }}
  416. LIMIT 20
  417. """
  418. return run_sparql(query)
  419. def tool_traverse_property(input_data: Dict[str, Any]) -> Dict[str, Any]:
  420. subject_uri = input_data.get("subject_uri")
  421. property_uri = input_data.get("property_uri")
  422. if not subject_uri or not property_uri:
  423. raise ValueError("Missing 'subject_uri' or 'property_uri'")
  424. direction = input_data.get("direction", "outgoing")
  425. limit = int(input_data.get("limit", 50))
  426. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  427. if direction not in {"outgoing", "incoming"}:
  428. raise ValueError("direction must be 'outgoing' or 'incoming'")
  429. if direction == "outgoing":
  430. triple = f"<{subject_uri}> <{property_uri}> ?neighbor ."
  431. else:
  432. triple = f"?neighbor <{property_uri}> <{subject_uri}> ."
  433. query = f"""
  434. SELECT ?neighbor ?label ?description WHERE {{
  435. {triple}
  436. OPTIONAL {{ ?neighbor rdfs:label ?label }}
  437. OPTIONAL {{ ?neighbor dc:description ?description }}
  438. }}
  439. LIMIT {limit}
  440. """
  441. return run_sparql(query)
  442. def tool_list_classes(input_data: Dict[str, Any]) -> Dict[str, Any]:
  443. """List ontology classes (rdfs:Class and owl:Class) with optional term filtering."""
  444. term = sanitize_term(input_data.get("term", ""))
  445. limit = int(input_data.get("limit", 50))
  446. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  447. term_filter = ""
  448. if term:
  449. term_filter = f"""
  450. FILTER(
  451. CONTAINS(LCASE(COALESCE(STR(?label), STR(?class))), LCASE(\"{term}\")) ||
  452. CONTAINS(LCASE(COALESCE(STR(?comment), \"\")), LCASE(\"{term}\"))
  453. )
  454. """
  455. query = f"""
  456. SELECT DISTINCT ?class ?label ?comment WHERE {{
  457. {{ ?class rdf:type rdfs:Class . }}
  458. UNION
  459. {{ ?class rdf:type <http://www.w3.org/2002/07/owl#Class> . }}
  460. OPTIONAL {{ ?class rdfs:label ?label }}
  461. OPTIONAL {{ ?class rdfs:comment ?comment }}
  462. {term_filter}
  463. }}
  464. LIMIT {limit}
  465. """
  466. return run_sparql(query)
  467. def tool_list_properties(input_data: Dict[str, Any]) -> Dict[str, Any]:
  468. """List ontology properties with optional term/domain/range filtering."""
  469. term = sanitize_term(input_data.get("term", ""))
  470. domain_uri = input_data.get("domain_uri")
  471. range_uri = input_data.get("range_uri")
  472. limit = int(input_data.get("limit", 100))
  473. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  474. filters = []
  475. if term:
  476. filters.append(
  477. f"""
  478. FILTER(
  479. CONTAINS(LCASE(COALESCE(STR(?label), STR(?property))), LCASE(\"{term}\")) ||
  480. CONTAINS(LCASE(COALESCE(STR(?comment), \"\")), LCASE(\"{term}\"))
  481. )
  482. """
  483. )
  484. if domain_uri:
  485. filters.append(f"FILTER(?domain = <{domain_uri}>)")
  486. if range_uri:
  487. filters.append(f"FILTER(?range = <{range_uri}>)")
  488. query = f"""
  489. SELECT DISTINCT ?property ?label ?comment ?domain ?range WHERE {{
  490. {{ ?property rdf:type rdf:Property . }}
  491. UNION
  492. {{ ?property rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> . }}
  493. UNION
  494. {{ ?property rdf:type <http://www.w3.org/2002/07/owl#DatatypeProperty> . }}
  495. OPTIONAL {{ ?property rdfs:label ?label }}
  496. OPTIONAL {{ ?property rdfs:comment ?comment }}
  497. OPTIONAL {{ ?property rdfs:domain ?domain }}
  498. OPTIONAL {{ ?property rdfs:range ?range }}
  499. {' '.join(filters)}
  500. }}
  501. LIMIT {limit}
  502. """
  503. return run_sparql(query)
  504. def tool_describe_class(input_data: Dict[str, Any]) -> Dict[str, Any]:
  505. """Describe a class and include properties that declare it as rdfs:domain."""
  506. class_uri = input_data.get("class_uri")
  507. if not class_uri:
  508. raise ValueError("Missing 'class_uri' field")
  509. query = f"""
  510. SELECT ?label ?comment ?property ?propertyLabel ?propertyComment ?range WHERE {{
  511. OPTIONAL {{ <{class_uri}> rdfs:label ?label }}
  512. OPTIONAL {{ <{class_uri}> rdfs:comment ?comment }}
  513. OPTIONAL {{
  514. ?property rdfs:domain <{class_uri}> .
  515. OPTIONAL {{ ?property rdfs:label ?propertyLabel }}
  516. OPTIONAL {{ ?property rdfs:comment ?propertyComment }}
  517. OPTIONAL {{ ?property rdfs:range ?range }}
  518. }}
  519. }}
  520. LIMIT {SPARQL_MAX_LIMIT}
  521. """
  522. return run_sparql(query)
  523. def tool_describe_property(input_data: Dict[str, Any]) -> Dict[str, Any]:
  524. """Describe a property and include usage examples from the graph."""
  525. property_uri = input_data.get("property_uri")
  526. if not property_uri:
  527. raise ValueError("Missing 'property_uri' field")
  528. usage_limit = int(input_data.get("usage_limit", 10))
  529. usage_limit = min(max(usage_limit, 1), SPARQL_MAX_LIMIT)
  530. metadata_query = f"""
  531. SELECT ?label ?comment ?domain ?range ?type WHERE {{
  532. OPTIONAL {{ <{property_uri}> rdfs:label ?label }}
  533. OPTIONAL {{ <{property_uri}> rdfs:comment ?comment }}
  534. OPTIONAL {{ <{property_uri}> rdfs:domain ?domain }}
  535. OPTIONAL {{ <{property_uri}> rdfs:range ?range }}
  536. OPTIONAL {{ <{property_uri}> rdf:type ?type }}
  537. }}
  538. LIMIT {SPARQL_MAX_LIMIT}
  539. """
  540. usage_query = f"""
  541. SELECT ?subject ?subjectLabel ?object ?objectLabel WHERE {{
  542. ?subject <{property_uri}> ?object .
  543. OPTIONAL {{ ?subject rdfs:label ?subjectLabel }}
  544. OPTIONAL {{ ?object rdfs:label ?objectLabel }}
  545. }}
  546. LIMIT {usage_limit}
  547. """
  548. return {
  549. "metadata": run_sparql(metadata_query),
  550. "usage": run_sparql(usage_query),
  551. }
  552. def tool_describe_subject(input_data: Dict[str, Any]) -> Dict[str, Any]:
  553. subject_uri = input_data.get("subject_uri")
  554. if not subject_uri:
  555. raise ValueError("Missing 'subject_uri' field")
  556. limit = int(input_data.get("limit", 50))
  557. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  558. query = f"""
  559. SELECT ?predicate ?object ?objectLabel WHERE {{
  560. <{subject_uri}> ?predicate ?object .
  561. OPTIONAL {{ ?object rdfs:label ?objectLabel }}
  562. }}
  563. LIMIT {limit}
  564. """
  565. return run_sparql(query)
  566. def tool_path_traverse(input_data: Dict[str, Any]) -> Dict[str, Any]:
  567. subject_uri = input_data.get("subject_uri")
  568. property_path = input_data.get("property_path") or input_data.get("properties")
  569. if not subject_uri or not property_path:
  570. raise ValueError("Missing 'subject_uri' or 'property_path'")
  571. if isinstance(property_path, str):
  572. property_path = [p.strip() for p in property_path.split(",") if p.strip()]
  573. if not isinstance(property_path, list) or not property_path:
  574. raise ValueError("'property_path' must be a non-empty list of property URIs")
  575. direction = input_data.get("direction", "outgoing")
  576. limit = int(input_data.get("limit", 50))
  577. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  578. statements = []
  579. optional_lines = []
  580. select_terms = []
  581. prev_subject = f"<{subject_uri}>"
  582. for idx, prop_uri in enumerate(property_path, start=1):
  583. step_var = f"?n{idx}"
  584. if direction == "outgoing":
  585. statements.append(f"{prev_subject} <{prop_uri}> {step_var} .")
  586. else:
  587. statements.append(f"{step_var} <{prop_uri}> {prev_subject} .")
  588. select_terms.append(step_var)
  589. optional_lines.append(f"OPTIONAL {{ {step_var} rdfs:label {step_var}Label }}")
  590. optional_lines.append(f"OPTIONAL {{ {step_var} dc:description {step_var}Description }}")
  591. prev_subject = step_var
  592. select_clause = " ".join(select_terms)
  593. query = f"""
  594. SELECT {select_clause} WHERE {{
  595. {'\n '.join(statements)}
  596. {'\n '.join(optional_lines)}
  597. }}
  598. LIMIT {limit}
  599. """
  600. return {
  601. "property_path": property_path,
  602. "direction": direction,
  603. "result": run_sparql(query),
  604. }
  605. def tool_property_usage_statistics(input_data: Dict[str, Any]) -> Dict[str, Any]:
  606. property_uri = input_data.get("property_uri")
  607. if not property_uri:
  608. raise ValueError("Missing 'property_uri' field")
  609. examples_limit = int(input_data.get("examples_limit", 5))
  610. examples_limit = min(max(examples_limit, 1), SPARQL_MAX_LIMIT)
  611. count_query = f"""
  612. SELECT (COUNT(DISTINCT ?subject) AS ?usageCount) WHERE {{
  613. ?subject <{property_uri}> ?object .
  614. }}
  615. LIMIT {SPARQL_MAX_LIMIT}
  616. """
  617. usage_query = f"""
  618. SELECT ?subject ?subjectLabel ?object ?objectLabel WHERE {{
  619. ?subject <{property_uri}> ?object .
  620. OPTIONAL {{ ?subject rdfs:label ?subjectLabel }}
  621. OPTIONAL {{ ?object rdfs:label ?objectLabel }}
  622. }}
  623. LIMIT {examples_limit}
  624. """
  625. return {
  626. "count": run_sparql(count_query),
  627. "examples": run_sparql(usage_query),
  628. }
  629. def tool_sparql_update(input_data: Dict[str, Any]) -> Dict[str, Any]:
  630. query = input_data.get("query")
  631. if not query:
  632. raise ValueError("Missing 'query' field")
  633. require_update_keyword = input_data.get("require_update_keyword", True)
  634. guarded = guard_update_query(query, require_update_keyword=require_update_keyword)
  635. result = run_sparql_update(guarded)
  636. return {**result, "query": guarded}
  637. def tool_batch_insert(input_data: Dict[str, Any]) -> Dict[str, Any]:
  638. ttl_text = input_data.get("ttl")
  639. triples = input_data.get("triples")
  640. graph = input_data.get("graph") or GRAPH_URI
  641. if not ttl_text and not triples:
  642. raise ValueError("Provide either 'ttl' text or 'triples' list")
  643. def _format_object(obj_value: Any, obj_type: str, datatype: Optional[str], lang: Optional[str]) -> str:
  644. if obj_type == "uri":
  645. return f"<{obj_value}>"
  646. if obj_type == "literal":
  647. return f'"{escape_sparql_string(obj_value)}"'
  648. if obj_type == "typed_literal":
  649. if not datatype:
  650. raise ValueError("Missing datatype for typed_literal")
  651. return f'"{escape_sparql_string(obj_value)}"^^<{datatype}>'
  652. if obj_type == "lang_literal":
  653. if not lang:
  654. raise ValueError("Missing lang for lang_literal")
  655. return f'"{escape_sparql_string(obj_value)}"@{lang}'
  656. raise ValueError(f"Unknown object_type: {obj_type}")
  657. if ttl_text:
  658. query = ttl_to_sparql_insert(ttl_text, graph)
  659. else:
  660. lines = []
  661. for triple in triples:
  662. subj = triple.get("subject")
  663. pred = triple.get("predicate")
  664. obj_value = triple.get("object")
  665. if not subj or not pred or obj_value is None:
  666. raise ValueError("Each triple must provide subject, predicate, and object")
  667. obj_type = triple.get("object_type", "uri")
  668. datatype = triple.get("datatype")
  669. lang = triple.get("lang")
  670. obj_text = _format_object(obj_value, obj_type, datatype, lang)
  671. lines.append(f"<{subj}> <{pred}> {obj_text} .")
  672. ttl_bulk = "\n".join(lines)
  673. query = ttl_to_sparql_insert(ttl_bulk, graph)
  674. result = run_sparql_update(query)
  675. return {**result, "query": query}
  676. def tool_load_examples(input_data: Dict[str, Any]) -> Dict[str, Any]:
  677. if not ALLOW_EXAMPLE_LOAD:
  678. raise HTTPException(status_code=403, detail="Example loading is disabled")
  679. requested = input_data.get("files") or []
  680. if isinstance(requested, str):
  681. requested = [requested]
  682. graph = input_data.get("graph") or EXAMPLE_GRAPH
  683. files_to_load = []
  684. if requested:
  685. files_to_load = requested
  686. else:
  687. files_to_load = sorted(p.name for p in EXAMPLES_DIR.glob("*.ttl"))
  688. if not files_to_load:
  689. raise HTTPException(status_code=404, detail="No example files available")
  690. results = []
  691. for filename in files_to_load:
  692. file_path = (EXAMPLES_DIR / filename).resolve()
  693. if not file_path.exists():
  694. raise HTTPException(status_code=400, detail=f"Missing example file: {filename}")
  695. if EXAMPLES_DIR not in file_path.parents and file_path != EXAMPLES_DIR:
  696. raise HTTPException(status_code=400, detail="Invalid example file path")
  697. ttl_text = file_path.read_text(encoding="utf-8")
  698. update_query = ttl_to_sparql_insert(ttl_text, graph)
  699. run_sparql_update(update_query)
  700. results.append({"file": filename, "graph": graph})
  701. return {"loaded": results}
  702. def tool_insert_triple(input_data: Dict[str, Any]) -> Dict[str, Any]:
  703. subject = input_data.get("subject")
  704. predicate = input_data.get("predicate")
  705. obj = input_data.get("object")
  706. obj_type = input_data.get("object_type", "uri")
  707. graph = input_data.get("graph")
  708. if not subject or not predicate or obj is None:
  709. raise ValueError("Missing 'subject', 'predicate', or 'object' field")
  710. if obj_type == "uri":
  711. obj_value = f"<{obj}>"
  712. elif obj_type == "literal":
  713. obj_value = f"\"{escape_sparql_string(obj)}\""
  714. elif obj_type == "typed_literal":
  715. datatype = input_data.get("datatype")
  716. if not datatype:
  717. raise ValueError("Missing 'datatype' for typed_literal")
  718. obj_value = f"\"{escape_sparql_string(obj)}\"^^<{datatype}>"
  719. elif obj_type == "lang_literal":
  720. lang = input_data.get("lang")
  721. if not lang:
  722. raise ValueError("Missing 'lang' for lang_literal")
  723. obj_value = f"\"{escape_sparql_string(obj)}\"@{lang}"
  724. else:
  725. raise ValueError("Unknown object_type")
  726. ttl = f"<{subject}> <{predicate}> {obj_value} ."
  727. update_query = ttl_to_sparql_insert(ttl, graph)
  728. try:
  729. result = run_sparql_update(update_query)
  730. except HTTPException as exc:
  731. detail = f"{exc.detail}\n\nSPARQL:\n{update_query}"
  732. raise HTTPException(status_code=exc.status_code, detail=detail)
  733. return {**result, "query": update_query}
  734. # --- TOOL REGISTRY ---
  735. TOOLS = {
  736. "sparql_query": tool_sparql_query,
  737. "sparql_update": tool_sparql_update,
  738. "list_graphs": tool_list_graphs,
  739. "search_label": tool_search_label,
  740. "get_entities_by_type": tool_get_entities_by_type,
  741. "get_predicates_for_subject": tool_get_predicates_for_subject,
  742. "get_labels_for_subject": tool_get_labels_for_subject,
  743. "traverse_property": tool_traverse_property,
  744. "list_classes": tool_list_classes,
  745. "list_properties": tool_list_properties,
  746. "describe_class": tool_describe_class,
  747. "describe_property": tool_describe_property,
  748. "describe_subject": tool_describe_subject,
  749. "path_traverse": tool_path_traverse,
  750. "property_usage_statistics": tool_property_usage_statistics,
  751. "batch_insert": tool_batch_insert,
  752. "insert_triple": tool_insert_triple,
  753. "load_examples": tool_load_examples,
  754. }
  755. # Tool input schemas registered by domain layers (e.g., garden_layer).
  756. TOOL_SCHEMAS: Dict[str, Any] = {}
  757. def load_domain_layers(
  758. tools: Dict[str, Callable[[Dict[str, Any]], Any]],
  759. tool_schemas: Dict[str, Any],
  760. ) -> None:
  761. raw = os.getenv("DOMAIN_LAYERS", "garden_layer.plugin")
  762. modules = [item.strip() for item in raw.split(",") if item.strip()]
  763. if not modules:
  764. return
  765. for module_name in modules:
  766. module = None
  767. try:
  768. module = import_module(module_name)
  769. except ImportError as exc:
  770. base = module_name.split(".", 1)[0]
  771. if base != module_name:
  772. try:
  773. module = import_module(base)
  774. logger.info("Falling back to base module '%s' for domain layer '%s'", base, module_name)
  775. except ImportError:
  776. logger.warning(
  777. "Domain layer '%s' could not be imported and base module '%s' is missing: %s",
  778. module_name,
  779. base,
  780. exc,
  781. )
  782. else:
  783. logger.warning("Domain layer '%s' could not be imported: %s", module_name, exc)
  784. if module is None:
  785. continue
  786. register = getattr(module, "register_layer", None)
  787. if not callable(register):
  788. logger.warning("Domain layer '%s' does not expose register_layer", module_name)
  789. continue
  790. try:
  791. # Domain layer may optionally register input schemas.
  792. try:
  793. if register.__code__.co_argcount >= 2:
  794. register(tools, tool_schemas)
  795. else:
  796. register(tools)
  797. except Exception:
  798. register(tools)
  799. logger.info("Loaded domain layer '%s'", module_name)
  800. except Exception as exc:
  801. logger.exception("Domain layer '%s' failed to register: %s", module_name, exc)
  802. load_domain_layers(TOOLS, TOOL_SCHEMAS)
  803. TOOL_DOCS = {
  804. "sparql_query": "Execute a bounded SELECT query and return the JSON result.",
  805. "sparql_update": "Execute a guarded SPARQL UPDATE query limited to INSERT/DELETE operations.",
  806. "list_graphs": "List up to 50 active graph URIs.",
  807. "search_label": "Search rdfs:label values that contain a term (case-insensitive).",
  808. "get_entities_by_type": "List subjects of a given rdf:type.",
  809. "get_predicates_for_subject": "List distinct predicates used by a subject.",
  810. "get_labels_for_subject": "Fetch rdfs:label values for a subject.",
  811. "traverse_property": "Traverse a property (incoming or outgoing) for a subject and return labels/descriptions.",
  812. "list_classes": "List ontology classes with optional label/comment term filtering.",
  813. "list_properties": "List ontology properties with optional term/domain/range filters.",
  814. "describe_class": "Describe a class and list properties that use it as rdfs:domain.",
  815. "describe_property": "Describe a property (label/comment/domain/range/type) and sample usage.",
  816. "describe_subject": "Return subject predicates/objects (with labels) to inspect an individual node.",
  817. "path_traverse": "Follow a property path (list of predicates) from a subject, returning each step's nodes.",
  818. "property_usage_statistics": "Count how often a property is used and sample subjects/objects.",
  819. "batch_insert": "Insert multiple triples or TTL at once with a single guarded update.",
  820. "insert_triple": "Insert a single triple (useful for debugging updates).",
  821. "load_examples": "Load Turtle fixtures from the `examples/` directory when MCP_ALLOW_EXAMPLE_LOAD=true.",
  822. }
  823. # --- MCP ENDPOINT ---
  824. @app.post("/mcp")
  825. async def handle_mcp(http_request: Request):
  826. """Minimal MCP-ish JSON-RPC 2.0 endpoint on POST /mcp.
  827. Backward compatible legacy mode:
  828. {"tool": "search_label", "input": {...}}
  829. Minimal JSON-RPC mode (first step towards MCP compliance):
  830. {"jsonrpc":"2.0","id":1,"method":"initialize","params":{...}}
  831. {"jsonrpc":"2.0","id":2,"method":"tools/list","params":{...}}
  832. {"jsonrpc":"2.0","id":3,"method":"tools/call","params":{ "tool": "...", "params": {...} }}
  833. """
  834. body = None
  835. try:
  836. body = await http_request.json()
  837. except Exception:
  838. body = None
  839. # ---- Legacy mode ----
  840. if isinstance(body, dict) and "tool" in body:
  841. legacy = ToolRequest(**body)
  842. tool_name = legacy.tool
  843. input_data = legacy.input or {}
  844. client_host = http_request.client.host if http_request.client else "unknown"
  845. trimmed_input = json.dumps(input_data, ensure_ascii=False, default=str)
  846. if len(trimmed_input) > 1024:
  847. trimmed_input = f"{trimmed_input[:1024]}…"
  848. timestamp = datetime.now(timezone.utc).isoformat()
  849. tool_logger.info(
  850. "tool=%s client=%s time=%s input=%s",
  851. tool_name,
  852. client_host,
  853. timestamp,
  854. trimmed_input,
  855. )
  856. if tool_name not in TOOLS:
  857. raise HTTPException(status_code=400, detail=f"Unknown tool: {tool_name}")
  858. try:
  859. result = TOOLS[tool_name](input_data)
  860. return {
  861. "status": "ok",
  862. "tool": tool_name,
  863. "description": TOOL_DOCS.get(tool_name, ""),
  864. "result": result,
  865. }
  866. except Exception as exc:
  867. logger.error("Tool %s failed: %s", tool_name, exc)
  868. raise HTTPException(status_code=500, detail=str(exc))
  869. # ---- JSON-RPC 2.0 mode ----
  870. if not isinstance(body, dict):
  871. return mcp_error(None, "Invalid JSON-RPC request", code=-32600)
  872. try:
  873. rpc_req = JsonRpcRequest(**body)
  874. except Exception as exc:
  875. # If body is malformed, still surface the id if present.
  876. rpc_id = body.get("id")
  877. logger.warning("Invalid JSON-RPC request: %s", exc)
  878. return mcp_error(rpc_id, "Invalid JSON-RPC request", code=-32600)
  879. method = rpc_req.method
  880. rpc_id = rpc_req.id
  881. params = rpc_req.params or {}
  882. if method == "initialize":
  883. tools = [_mcp_tool_definition(name) for name in sorted(TOOLS.keys())]
  884. return mcp_result(
  885. rpc_id,
  886. {
  887. "protocolVersion": "0.1",
  888. "capabilities": {
  889. "tools": True,
  890. "list": True,
  891. "call": True,
  892. },
  893. "tools": tools,
  894. },
  895. )
  896. if method in {"tools/list", "tools/listTools"}:
  897. tools = [_mcp_tool_definition(name) for name in sorted(TOOLS.keys())]
  898. return mcp_result(rpc_id, {"tools": tools})
  899. if method in {"tools/call", "tools/callTool"}:
  900. # Different clients sometimes wrap the call slightly differently.
  901. tool_name = (
  902. params.get("tool")
  903. or params.get("name")
  904. or params.get("toolName")
  905. )
  906. input_data = (
  907. params.get("params")
  908. or params.get("input")
  909. or params.get("arguments")
  910. or {}
  911. )
  912. if not tool_name:
  913. return mcp_error(rpc_id, "Missing tool name", code=-32602)
  914. if tool_name not in TOOLS:
  915. return mcp_error(rpc_id, f"Unknown tool: {tool_name}", code=-32601)
  916. try:
  917. result = TOOLS[tool_name](input_data)
  918. return mcp_result(rpc_id, {"result": result})
  919. except HTTPException as exc:
  920. return mcp_error(rpc_id, str(exc.detail), code=exc.status_code)
  921. except Exception as exc:
  922. logger.error("Tool %s failed: %s", tool_name, exc)
  923. return mcp_error(rpc_id, str(exc), code=-32000)
  924. return mcp_error(rpc_id, f"Method not found: {method}", code=-32601)
  925. # --- HEALTH CHECK ---
  926. @app.get("/")
  927. def root():
  928. return {
  929. "status": "MCP server running",
  930. "tools": list(TOOLS.keys()),
  931. "virtuoso": VIRTUOSO_ENDPOINT,
  932. "guardrails": {
  933. "default_limit": SPARQL_DEFAULT_LIMIT,
  934. "max_limit": SPARQL_MAX_LIMIT,
  935. "allow_example_load": ALLOW_EXAMPLE_LOAD,
  936. "turtle_examples": True,
  937. },
  938. }
  939. @app.get("/health")
  940. def health():
  941. return root()