virtuoso_mcp.py 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040
  1. import json
  2. import logging
  3. import os
  4. import re
  5. from datetime import datetime, timezone
  6. from importlib import import_module
  7. from pathlib import Path
  8. from typing import Any, Callable, Dict, List, Optional
  9. import requests
  10. from requests.auth import HTTPDigestAuth
  11. from fastapi import FastAPI, HTTPException, Request
  12. from pydantic import BaseModel
  13. LOG_LEVEL = os.getenv("MCP_LOG_LEVEL", "INFO").upper()
  14. logging.basicConfig(level=getattr(logging, LOG_LEVEL, logging.INFO))
  15. logger = logging.getLogger("virtuoso_mcp")
  16. app = FastAPI(title="MCP Server")
  17. # --- CONFIG ---
  18. VIRTUOSO_ENDPOINT = os.getenv("VIRTUOSO_ENDPOINT") or os.getenv(
  19. "VIRTUOSO_SPARQL", "http://localhost:8891/sparql"
  20. )
  21. VIRTUOSO_USER = os.getenv("VIRTUOSO_USER")
  22. VIRTUOSO_PASS = os.getenv("VIRTUOSO_PASS")
  23. SPARQL_TIMEOUT = float(os.getenv("SPARQL_TIMEOUT", 10.0))
  24. SPARQL_UPDATE_TIMEOUT = float(os.getenv("SPARQL_UPDATE_TIMEOUT", 15.0))
  25. SPARQL_DEFAULT_LIMIT = int(os.getenv("SPARQL_DEFAULT_LIMIT", 100))
  26. SPARQL_MAX_LIMIT = int(os.getenv("SPARQL_MAX_LIMIT", 500))
  27. GRAPH_URI = os.getenv("GRAPH_URI", "http://example.org/catalog#")
  28. EXAMPLES_DIR = Path(__file__).resolve().parent / "examples"
  29. EXAMPLE_GRAPH = os.getenv(
  30. "EXAMPLE_GRAPH", "http://example.org/catalog#test"
  31. )
  32. ALLOW_EXAMPLE_LOAD = os.getenv("MCP_ALLOW_EXAMPLE_LOAD", "false").lower() == "true"
  33. SESSION = requests.Session()
  34. LOGS_DIR = Path(__file__).resolve().parent / "logs"
  35. LOGS_DIR.mkdir(parents=True, exist_ok=True)
  36. tool_logger = logging.getLogger("virtuoso_mcp.tools")
  37. tool_handler = logging.FileHandler(LOGS_DIR / "tool_usage.log")
  38. tool_handler.setFormatter(logging.Formatter("%(asctime)s %(message)s"))
  39. tool_logger.addHandler(tool_handler)
  40. tool_logger.setLevel(logging.INFO)
  41. tool_logger.propagate = False
  42. PREFIXES = f"""
  43. PREFIX : <{GRAPH_URI}>
  44. PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
  45. PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
  46. PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
  47. PREFIX dc: <http://purl.org/dc/elements/1.1/>
  48. """.strip()
  49. # --- MODELS ---
  50. class SparqlQueryRequest(BaseModel):
  51. query: str
  52. class ToolRequest(BaseModel):
  53. tool: str
  54. input: Dict[str, Any] = {}
  55. # --- MCP (minimal JSON-RPC 2.0) models ---
  56. class JsonRpcRequest(BaseModel):
  57. jsonrpc: str = "2.0"
  58. id: Optional[Any] = None
  59. method: str
  60. params: Dict[str, Any] = {}
  61. def mcp_error(id_value: Any, message: str, code: int = -32000) -> Dict[str, Any]:
  62. return {
  63. "jsonrpc": "2.0",
  64. "id": id_value,
  65. "error": {
  66. "code": code,
  67. "message": message,
  68. },
  69. }
  70. def mcp_result(id_value: Any, result: Dict[str, Any]) -> Dict[str, Any]:
  71. return {
  72. "jsonrpc": "2.0",
  73. "id": id_value,
  74. "result": result,
  75. }
  76. def _mcp_tool_definition(name: str) -> Dict[str, Any]:
  77. # Incremental compliance step: add explicit input schemas for the most-used tools.
  78. # We still keep `additionalProperties: True` so we don't break existing clients.
  79. base: Dict[str, Any] = {
  80. "name": name,
  81. "description": TOOL_DOCS.get(name, ""),
  82. "inputSchema": {
  83. "type": "object",
  84. "additionalProperties": True,
  85. "properties": {},
  86. "required": [],
  87. },
  88. }
  89. if name == "sparql_query":
  90. base["inputSchema"]["properties"] = {
  91. "query": {"type": "string", "description": "SPARQL SELECT query (bounded + guardrailed)"}
  92. }
  93. base["inputSchema"]["required"] = ["query"]
  94. return base
  95. if name == "search_label":
  96. base["inputSchema"]["properties"] = {
  97. "term": {"type": "string", "description": "Substring to search in rdfs:label"},
  98. "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max results"},
  99. }
  100. base["inputSchema"]["required"] = ["term"]
  101. return base
  102. if name == "get_entities_by_type":
  103. base["inputSchema"]["properties"] = {
  104. "type_uri": {"type": "string", "description": "RDF type URI"},
  105. "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max subjects"},
  106. }
  107. base["inputSchema"]["required"] = ["type_uri"]
  108. return base
  109. if name == "list_graphs":
  110. # No inputs.
  111. return base
  112. if name == "list_classes":
  113. base["inputSchema"]["properties"] = {
  114. "term": {"type": "string", "description": "Optional substring to match labels/comments"},
  115. "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max results"},
  116. }
  117. base["inputSchema"]["required"] = []
  118. return base
  119. if name == "list_properties":
  120. base["inputSchema"]["properties"] = {
  121. "term": {"type": "string", "description": "Optional substring to match labels/comments"},
  122. "domain_uri": {"type": "string", "description": "Optional rdfs:domain class URI"},
  123. "range_uri": {"type": "string", "description": "Optional rdfs:range class URI"},
  124. "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max results"},
  125. }
  126. base["inputSchema"]["required"] = []
  127. return base
  128. if name == "describe_class":
  129. base["inputSchema"]["properties"] = {
  130. "class_uri": {"type": "string", "description": "Class URI to describe"},
  131. }
  132. base["inputSchema"]["required"] = ["class_uri"]
  133. return base
  134. if name == "describe_property":
  135. base["inputSchema"]["properties"] = {
  136. "property_uri": {"type": "string", "description": "Property URI to describe"},
  137. "usage_limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "How many usage examples to include"},
  138. }
  139. base["inputSchema"]["required"] = ["property_uri"]
  140. return base
  141. # ---- Entity navigation batch (B) ----
  142. if name == "get_predicates_for_subject":
  143. base["inputSchema"]["properties"] = {
  144. "subject_uri": {"type": "string", "description": "Subject URI"},
  145. "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max predicates"},
  146. }
  147. base["inputSchema"]["required"] = ["subject_uri"]
  148. return base
  149. if name == "get_labels_for_subject":
  150. base["inputSchema"]["properties"] = {
  151. "subject_uri": {"type": "string", "description": "Subject URI"},
  152. }
  153. base["inputSchema"]["required"] = ["subject_uri"]
  154. return base
  155. if name == "traverse_property":
  156. base["inputSchema"]["properties"] = {
  157. "subject_uri": {"type": "string", "description": "Starting subject URI"},
  158. "property_uri": {"type": "string", "description": "Predicate URI to traverse"},
  159. "direction": {
  160. "type": "string",
  161. "enum": ["outgoing", "incoming"],
  162. "description": "Traversal direction",
  163. },
  164. "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max neighbors"},
  165. }
  166. base["inputSchema"]["required"] = ["subject_uri", "property_uri"]
  167. return base
  168. if name == "describe_subject":
  169. base["inputSchema"]["properties"] = {
  170. "subject_uri": {"type": "string", "description": "Subject URI"},
  171. "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max outgoing predicate/object pairs"},
  172. }
  173. base["inputSchema"]["required"] = ["subject_uri"]
  174. return base
  175. if name == "path_traverse":
  176. base["inputSchema"]["properties"] = {
  177. "subject_uri": {"type": "string", "description": "Starting subject URI"},
  178. "property_path": {
  179. "type": "string",
  180. "description": "Comma-separated list of predicate URIs (alternative to 'properties')",
  181. },
  182. "properties": {
  183. "type": "array",
  184. "items": {"type": "string"},
  185. "description": "List of predicate URIs",
  186. },
  187. "direction": {
  188. "type": "string",
  189. "enum": ["outgoing", "incoming"],
  190. "description": "Traversal direction",
  191. },
  192. "limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "Max results"},
  193. }
  194. base["inputSchema"]["required"] = ["subject_uri"]
  195. return base
  196. # ---- Relationship analytics batch (C) ----
  197. if name == "property_usage_statistics":
  198. base["inputSchema"]["properties"] = {
  199. "property_uri": {"type": "string", "description": "Predicate URI"},
  200. "examples_limit": {"type": "integer", "minimum": 1, "maximum": SPARQL_MAX_LIMIT, "description": "How many usage examples to include"},
  201. }
  202. base["inputSchema"]["required"] = ["property_uri"]
  203. return base
  204. return base
  205. # --- CORE SPARQL FUNCTION ---
  206. def _build_auth() -> Optional[HTTPDigestAuth]:
  207. if VIRTUOSO_USER and VIRTUOSO_PASS:
  208. return HTTPDigestAuth(VIRTUOSO_USER, VIRTUOSO_PASS)
  209. return None
  210. def _with_prefixes(query: str) -> str:
  211. if re.search(r"^\s*prefix\b", query, re.IGNORECASE):
  212. return query
  213. return f"{PREFIXES}\n{query}"
  214. def run_sparql(query: str) -> Dict[str, Any]:
  215. """Execute a SPARQL query against Virtuoso and return the JSON payload."""
  216. logger.debug("Sending SPARQL query: %s", query)
  217. try:
  218. response = SESSION.post(
  219. VIRTUOSO_ENDPOINT,
  220. data=_with_prefixes(query).encode("utf-8"),
  221. headers={
  222. "Accept": "application/sparql-results+json",
  223. "Content-Type": "application/sparql-query",
  224. },
  225. timeout=SPARQL_TIMEOUT,
  226. auth=_build_auth(),
  227. )
  228. if not response.ok:
  229. logger.warning("SPARQL request failed: %s", response.status_code)
  230. response.raise_for_status()
  231. return response.json()
  232. except Exception as exc: # pragma: no cover - propagate for FastAPI
  233. logger.warning("SPARQL request failed: %s", exc)
  234. raise HTTPException(status_code=500, detail=str(exc))
  235. def run_sparql_update(query: str) -> Dict[str, Any]:
  236. """Execute a SPARQL UPDATE (INSERT/DELETE) against Virtuoso."""
  237. logger.debug("Sending SPARQL update: %s", query)
  238. try:
  239. response = SESSION.post(
  240. VIRTUOSO_ENDPOINT,
  241. data=_with_prefixes(query).encode("utf-8"),
  242. headers={"Content-Type": "application/sparql-update"},
  243. timeout=SPARQL_UPDATE_TIMEOUT,
  244. auth=_build_auth(),
  245. )
  246. if not response.ok:
  247. detail = (response.text or "").strip()
  248. logger.warning("SPARQL update failed: %s", response.status_code)
  249. raise HTTPException(
  250. status_code=500,
  251. detail=detail or f"SPARQL update failed with {response.status_code}",
  252. )
  253. return {"status": "ok"}
  254. except HTTPException:
  255. raise
  256. except Exception as exc: # pragma: no cover - propagate for FastAPI
  257. logger.warning("SPARQL update failed: %s", exc)
  258. raise HTTPException(status_code=500, detail=str(exc))
  259. # --- TOOL HELPERS ---
  260. def escape_sparql_string(value: str) -> str:
  261. """Escape a string for SPARQL literal usage."""
  262. if value is None:
  263. return ""
  264. return (
  265. str(value)
  266. .replace("\\", "\\\\")
  267. .replace('"', "\\\"")
  268. .replace("\n", "\\n")
  269. .replace("\r", "")
  270. )
  271. def sanitize_term(term: str) -> str:
  272. """Escape quotes inside label searches so we can safely interpolate strings."""
  273. return escape_sparql_string(term)
  274. def _extract_limit(query: str) -> Optional[int]:
  275. match = re.search(r"\blimit\s+(\d+)\b", query, re.IGNORECASE)
  276. if not match:
  277. return None
  278. try:
  279. return int(match.group(1))
  280. except ValueError:
  281. return None
  282. def _apply_limit(query: str, default_limit: int, max_limit: int) -> str:
  283. limit = _extract_limit(query)
  284. if limit is None:
  285. return f"{query.strip()}\nLIMIT {default_limit}"
  286. if limit > max_limit:
  287. return re.sub(
  288. r"\blimit\s+\d+\b",
  289. f"LIMIT {max_limit}",
  290. query,
  291. flags=re.IGNORECASE,
  292. )
  293. return query
  294. def guard_select_query(query: str) -> str:
  295. """Enforce that raw queries are read-only and bounded by LIMIT."""
  296. lowered = query.lower()
  297. if re.search(r"\b(insert|delete|load|clear|drop|create|move|copy|add)\b", lowered):
  298. raise HTTPException(status_code=400, detail="SPARQL update operations are not allowed")
  299. if "select" not in lowered:
  300. raise HTTPException(status_code=400, detail="Only SELECT queries are allowed")
  301. return _apply_limit(query, SPARQL_DEFAULT_LIMIT, SPARQL_MAX_LIMIT)
  302. def ttl_to_sparql_insert(ttl_text: str, graph: Optional[str]) -> str:
  303. prefix_lines: List[str] = []
  304. body_lines: List[str] = []
  305. for raw_line in ttl_text.splitlines():
  306. line = raw_line.strip()
  307. if not line:
  308. continue
  309. prefix_match = re.match(r"@prefix\s+([\w-]+):\s*<([^>]+)>\s*\.", line)
  310. if prefix_match:
  311. prefix_lines.append(
  312. f"PREFIX {prefix_match.group(1)}: <{prefix_match.group(2)}>"
  313. )
  314. continue
  315. if line.startswith("@base"):
  316. # Skip @base entries for now; they are rare in our exports.
  317. continue
  318. body_lines.append(raw_line)
  319. if not body_lines:
  320. raise HTTPException(status_code=400, detail="No RDF triples found in input")
  321. prefixes = "\n".join(prefix_lines)
  322. body = "\n".join(body_lines)
  323. if graph:
  324. insert_body = f"GRAPH <{graph}> {{\n{body}\n}}"
  325. else:
  326. insert_body = body
  327. return f"{prefixes}\nINSERT DATA {{\n{insert_body}\n}}"
  328. # --- MCP TOOL IMPLEMENTATIONS ---
  329. def tool_sparql_query(input_data: Dict[str, Any]) -> Dict[str, Any]:
  330. query = input_data.get("query")
  331. if not query:
  332. raise ValueError("Missing 'query' field")
  333. guarded = guard_select_query(query)
  334. return run_sparql(guarded)
  335. def tool_list_graphs(_input: Dict[str, Any]) -> Dict[str, Any]:
  336. query = """
  337. SELECT DISTINCT ?g WHERE {
  338. GRAPH ?g { ?s ?p ?o }
  339. }
  340. LIMIT 50
  341. """
  342. return run_sparql(query)
  343. def tool_search_label(input_data: Dict[str, Any]) -> Dict[str, Any]:
  344. term = input_data.get("term", "")
  345. sanitized = sanitize_term(term)
  346. limit = int(input_data.get("limit", 20))
  347. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  348. query = f"""
  349. SELECT ?s ?label WHERE {{
  350. ?s rdfs:label ?label .
  351. FILTER(CONTAINS(LCASE(?label), LCASE(\"{sanitized}\")))
  352. }}
  353. LIMIT {limit}
  354. """
  355. return run_sparql(query)
  356. def tool_get_entities_by_type(input_data: Dict[str, Any]) -> Dict[str, Any]:
  357. type_uri = input_data.get("type_uri")
  358. if not type_uri:
  359. raise ValueError("Missing 'type_uri' field")
  360. limit = int(input_data.get("limit", 50))
  361. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  362. query = f"""
  363. SELECT ?s WHERE {{
  364. ?s rdf:type <{type_uri}> .
  365. }}
  366. LIMIT {limit}
  367. """
  368. return run_sparql(query)
  369. def tool_get_predicates_for_subject(input_data: Dict[str, Any]) -> Dict[str, Any]:
  370. subject_uri = input_data.get("subject_uri")
  371. if not subject_uri:
  372. raise ValueError("Missing 'subject_uri' field")
  373. limit = int(input_data.get("limit", 50))
  374. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  375. query = f"""
  376. SELECT DISTINCT ?p WHERE {{
  377. <{subject_uri}> ?p ?o .
  378. }}
  379. LIMIT {limit}
  380. """
  381. return run_sparql(query)
  382. def tool_get_labels_for_subject(input_data: Dict[str, Any]) -> Dict[str, Any]:
  383. subject_uri = input_data.get("subject_uri")
  384. if not subject_uri:
  385. raise ValueError("Missing 'subject_uri' field")
  386. query = f"""
  387. SELECT ?label WHERE {{
  388. <{subject_uri}> rdfs:label ?label .
  389. }}
  390. LIMIT 20
  391. """
  392. return run_sparql(query)
  393. def tool_traverse_property(input_data: Dict[str, Any]) -> Dict[str, Any]:
  394. subject_uri = input_data.get("subject_uri")
  395. property_uri = input_data.get("property_uri")
  396. if not subject_uri or not property_uri:
  397. raise ValueError("Missing 'subject_uri' or 'property_uri'")
  398. direction = input_data.get("direction", "outgoing")
  399. limit = int(input_data.get("limit", 50))
  400. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  401. if direction not in {"outgoing", "incoming"}:
  402. raise ValueError("direction must be 'outgoing' or 'incoming'")
  403. if direction == "outgoing":
  404. triple = f"<{subject_uri}> <{property_uri}> ?neighbor ."
  405. else:
  406. triple = f"?neighbor <{property_uri}> <{subject_uri}> ."
  407. query = f"""
  408. SELECT ?neighbor ?label ?description WHERE {{
  409. {triple}
  410. OPTIONAL {{ ?neighbor rdfs:label ?label }}
  411. OPTIONAL {{ ?neighbor dc:description ?description }}
  412. }}
  413. LIMIT {limit}
  414. """
  415. return run_sparql(query)
  416. def tool_list_classes(input_data: Dict[str, Any]) -> Dict[str, Any]:
  417. """List ontology classes (rdfs:Class and owl:Class) with optional term filtering."""
  418. term = sanitize_term(input_data.get("term", ""))
  419. limit = int(input_data.get("limit", 50))
  420. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  421. term_filter = ""
  422. if term:
  423. term_filter = f"""
  424. FILTER(
  425. CONTAINS(LCASE(COALESCE(STR(?label), STR(?class))), LCASE(\"{term}\")) ||
  426. CONTAINS(LCASE(COALESCE(STR(?comment), \"\")), LCASE(\"{term}\"))
  427. )
  428. """
  429. query = f"""
  430. SELECT DISTINCT ?class ?label ?comment WHERE {{
  431. {{ ?class rdf:type rdfs:Class . }}
  432. UNION
  433. {{ ?class rdf:type <http://www.w3.org/2002/07/owl#Class> . }}
  434. OPTIONAL {{ ?class rdfs:label ?label }}
  435. OPTIONAL {{ ?class rdfs:comment ?comment }}
  436. {term_filter}
  437. }}
  438. LIMIT {limit}
  439. """
  440. return run_sparql(query)
  441. def tool_list_properties(input_data: Dict[str, Any]) -> Dict[str, Any]:
  442. """List ontology properties with optional term/domain/range filtering."""
  443. term = sanitize_term(input_data.get("term", ""))
  444. domain_uri = input_data.get("domain_uri")
  445. range_uri = input_data.get("range_uri")
  446. limit = int(input_data.get("limit", 100))
  447. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  448. filters = []
  449. if term:
  450. filters.append(
  451. f"""
  452. FILTER(
  453. CONTAINS(LCASE(COALESCE(STR(?label), STR(?property))), LCASE(\"{term}\")) ||
  454. CONTAINS(LCASE(COALESCE(STR(?comment), \"\")), LCASE(\"{term}\"))
  455. )
  456. """
  457. )
  458. if domain_uri:
  459. filters.append(f"FILTER(?domain = <{domain_uri}>)")
  460. if range_uri:
  461. filters.append(f"FILTER(?range = <{range_uri}>)")
  462. query = f"""
  463. SELECT DISTINCT ?property ?label ?comment ?domain ?range WHERE {{
  464. {{ ?property rdf:type rdf:Property . }}
  465. UNION
  466. {{ ?property rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> . }}
  467. UNION
  468. {{ ?property rdf:type <http://www.w3.org/2002/07/owl#DatatypeProperty> . }}
  469. OPTIONAL {{ ?property rdfs:label ?label }}
  470. OPTIONAL {{ ?property rdfs:comment ?comment }}
  471. OPTIONAL {{ ?property rdfs:domain ?domain }}
  472. OPTIONAL {{ ?property rdfs:range ?range }}
  473. {' '.join(filters)}
  474. }}
  475. LIMIT {limit}
  476. """
  477. return run_sparql(query)
  478. def tool_describe_class(input_data: Dict[str, Any]) -> Dict[str, Any]:
  479. """Describe a class and include properties that declare it as rdfs:domain."""
  480. class_uri = input_data.get("class_uri")
  481. if not class_uri:
  482. raise ValueError("Missing 'class_uri' field")
  483. query = f"""
  484. SELECT ?label ?comment ?property ?propertyLabel ?propertyComment ?range WHERE {{
  485. OPTIONAL {{ <{class_uri}> rdfs:label ?label }}
  486. OPTIONAL {{ <{class_uri}> rdfs:comment ?comment }}
  487. OPTIONAL {{
  488. ?property rdfs:domain <{class_uri}> .
  489. OPTIONAL {{ ?property rdfs:label ?propertyLabel }}
  490. OPTIONAL {{ ?property rdfs:comment ?propertyComment }}
  491. OPTIONAL {{ ?property rdfs:range ?range }}
  492. }}
  493. }}
  494. LIMIT {SPARQL_MAX_LIMIT}
  495. """
  496. return run_sparql(query)
  497. def tool_describe_property(input_data: Dict[str, Any]) -> Dict[str, Any]:
  498. """Describe a property and include usage examples from the graph."""
  499. property_uri = input_data.get("property_uri")
  500. if not property_uri:
  501. raise ValueError("Missing 'property_uri' field")
  502. usage_limit = int(input_data.get("usage_limit", 10))
  503. usage_limit = min(max(usage_limit, 1), SPARQL_MAX_LIMIT)
  504. metadata_query = f"""
  505. SELECT ?label ?comment ?domain ?range ?type WHERE {{
  506. OPTIONAL {{ <{property_uri}> rdfs:label ?label }}
  507. OPTIONAL {{ <{property_uri}> rdfs:comment ?comment }}
  508. OPTIONAL {{ <{property_uri}> rdfs:domain ?domain }}
  509. OPTIONAL {{ <{property_uri}> rdfs:range ?range }}
  510. OPTIONAL {{ <{property_uri}> rdf:type ?type }}
  511. }}
  512. LIMIT {SPARQL_MAX_LIMIT}
  513. """
  514. usage_query = f"""
  515. SELECT ?subject ?subjectLabel ?object ?objectLabel WHERE {{
  516. ?subject <{property_uri}> ?object .
  517. OPTIONAL {{ ?subject rdfs:label ?subjectLabel }}
  518. OPTIONAL {{ ?object rdfs:label ?objectLabel }}
  519. }}
  520. LIMIT {usage_limit}
  521. """
  522. return {
  523. "metadata": run_sparql(metadata_query),
  524. "usage": run_sparql(usage_query),
  525. }
  526. def tool_describe_subject(input_data: Dict[str, Any]) -> Dict[str, Any]:
  527. subject_uri = input_data.get("subject_uri")
  528. if not subject_uri:
  529. raise ValueError("Missing 'subject_uri' field")
  530. limit = int(input_data.get("limit", 50))
  531. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  532. query = f"""
  533. SELECT ?predicate ?object ?objectLabel WHERE {{
  534. <{subject_uri}> ?predicate ?object .
  535. OPTIONAL {{ ?object rdfs:label ?objectLabel }}
  536. }}
  537. LIMIT {limit}
  538. """
  539. return run_sparql(query)
  540. def tool_path_traverse(input_data: Dict[str, Any]) -> Dict[str, Any]:
  541. subject_uri = input_data.get("subject_uri")
  542. property_path = input_data.get("property_path") or input_data.get("properties")
  543. if not subject_uri or not property_path:
  544. raise ValueError("Missing 'subject_uri' or 'property_path'")
  545. if isinstance(property_path, str):
  546. property_path = [p.strip() for p in property_path.split(",") if p.strip()]
  547. if not isinstance(property_path, list) or not property_path:
  548. raise ValueError("'property_path' must be a non-empty list of property URIs")
  549. direction = input_data.get("direction", "outgoing")
  550. limit = int(input_data.get("limit", 50))
  551. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  552. statements = []
  553. optional_lines = []
  554. select_terms = []
  555. prev_subject = f"<{subject_uri}>"
  556. for idx, prop_uri in enumerate(property_path, start=1):
  557. step_var = f"?n{idx}"
  558. if direction == "outgoing":
  559. statements.append(f"{prev_subject} <{prop_uri}> {step_var} .")
  560. else:
  561. statements.append(f"{step_var} <{prop_uri}> {prev_subject} .")
  562. select_terms.append(step_var)
  563. optional_lines.append(f"OPTIONAL {{ {step_var} rdfs:label {step_var}Label }}")
  564. optional_lines.append(f"OPTIONAL {{ {step_var} dc:description {step_var}Description }}")
  565. prev_subject = step_var
  566. select_clause = " ".join(select_terms)
  567. query = f"""
  568. SELECT {select_clause} WHERE {{
  569. {'\n '.join(statements)}
  570. {'\n '.join(optional_lines)}
  571. }}
  572. LIMIT {limit}
  573. """
  574. return {
  575. "property_path": property_path,
  576. "direction": direction,
  577. "result": run_sparql(query),
  578. }
  579. def tool_property_usage_statistics(input_data: Dict[str, Any]) -> Dict[str, Any]:
  580. property_uri = input_data.get("property_uri")
  581. if not property_uri:
  582. raise ValueError("Missing 'property_uri' field")
  583. examples_limit = int(input_data.get("examples_limit", 5))
  584. examples_limit = min(max(examples_limit, 1), SPARQL_MAX_LIMIT)
  585. count_query = f"""
  586. SELECT (COUNT(DISTINCT ?subject) AS ?usageCount) WHERE {{
  587. ?subject <{property_uri}> ?object .
  588. }}
  589. LIMIT {SPARQL_MAX_LIMIT}
  590. """
  591. usage_query = f"""
  592. SELECT ?subject ?subjectLabel ?object ?objectLabel WHERE {{
  593. ?subject <{property_uri}> ?object .
  594. OPTIONAL {{ ?subject rdfs:label ?subjectLabel }}
  595. OPTIONAL {{ ?object rdfs:label ?objectLabel }}
  596. }}
  597. LIMIT {examples_limit}
  598. """
  599. return {
  600. "count": run_sparql(count_query),
  601. "examples": run_sparql(usage_query),
  602. }
  603. def tool_batch_insert(input_data: Dict[str, Any]) -> Dict[str, Any]:
  604. ttl_text = input_data.get("ttl")
  605. triples = input_data.get("triples")
  606. graph = input_data.get("graph") or GRAPH_URI
  607. if not ttl_text and not triples:
  608. raise ValueError("Provide either 'ttl' text or 'triples' list")
  609. def _format_object(obj_value: Any, obj_type: str, datatype: Optional[str], lang: Optional[str]) -> str:
  610. if obj_type == "uri":
  611. return f"<{obj_value}>"
  612. if obj_type == "literal":
  613. return f'"{escape_sparql_string(obj_value)}"'
  614. if obj_type == "typed_literal":
  615. if not datatype:
  616. raise ValueError("Missing datatype for typed_literal")
  617. return f'"{escape_sparql_string(obj_value)}"^^<{datatype}>'
  618. if obj_type == "lang_literal":
  619. if not lang:
  620. raise ValueError("Missing lang for lang_literal")
  621. return f'"{escape_sparql_string(obj_value)}"@{lang}'
  622. raise ValueError(f"Unknown object_type: {obj_type}")
  623. if ttl_text:
  624. query = ttl_to_sparql_insert(ttl_text, graph)
  625. else:
  626. lines = []
  627. for triple in triples:
  628. subj = triple.get("subject")
  629. pred = triple.get("predicate")
  630. obj_value = triple.get("object")
  631. if not subj or not pred or obj_value is None:
  632. raise ValueError("Each triple must provide subject, predicate, and object")
  633. obj_type = triple.get("object_type", "uri")
  634. datatype = triple.get("datatype")
  635. lang = triple.get("lang")
  636. obj_text = _format_object(obj_value, obj_type, datatype, lang)
  637. lines.append(f"<{subj}> <{pred}> {obj_text} .")
  638. ttl_bulk = "\n".join(lines)
  639. query = ttl_to_sparql_insert(ttl_bulk, graph)
  640. result = run_sparql_update(query)
  641. return {**result, "query": query}
  642. def tool_load_examples(input_data: Dict[str, Any]) -> Dict[str, Any]:
  643. if not ALLOW_EXAMPLE_LOAD:
  644. raise HTTPException(status_code=403, detail="Example loading is disabled")
  645. requested = input_data.get("files") or []
  646. if isinstance(requested, str):
  647. requested = [requested]
  648. graph = input_data.get("graph") or EXAMPLE_GRAPH
  649. files_to_load = []
  650. if requested:
  651. files_to_load = requested
  652. else:
  653. files_to_load = sorted(p.name for p in EXAMPLES_DIR.glob("*.ttl"))
  654. if not files_to_load:
  655. raise HTTPException(status_code=404, detail="No example files available")
  656. results = []
  657. for filename in files_to_load:
  658. file_path = (EXAMPLES_DIR / filename).resolve()
  659. if not file_path.exists():
  660. raise HTTPException(status_code=400, detail=f"Missing example file: {filename}")
  661. if EXAMPLES_DIR not in file_path.parents and file_path != EXAMPLES_DIR:
  662. raise HTTPException(status_code=400, detail="Invalid example file path")
  663. ttl_text = file_path.read_text(encoding="utf-8")
  664. update_query = ttl_to_sparql_insert(ttl_text, graph)
  665. run_sparql_update(update_query)
  666. results.append({"file": filename, "graph": graph})
  667. return {"loaded": results}
  668. def tool_insert_triple(input_data: Dict[str, Any]) -> Dict[str, Any]:
  669. subject = input_data.get("subject")
  670. predicate = input_data.get("predicate")
  671. obj = input_data.get("object")
  672. obj_type = input_data.get("object_type", "uri")
  673. graph = input_data.get("graph")
  674. if not subject or not predicate or obj is None:
  675. raise ValueError("Missing 'subject', 'predicate', or 'object' field")
  676. if obj_type == "uri":
  677. obj_value = f"<{obj}>"
  678. elif obj_type == "literal":
  679. obj_value = f"\"{escape_sparql_string(obj)}\""
  680. elif obj_type == "typed_literal":
  681. datatype = input_data.get("datatype")
  682. if not datatype:
  683. raise ValueError("Missing 'datatype' for typed_literal")
  684. obj_value = f"\"{escape_sparql_string(obj)}\"^^<{datatype}>"
  685. elif obj_type == "lang_literal":
  686. lang = input_data.get("lang")
  687. if not lang:
  688. raise ValueError("Missing 'lang' for lang_literal")
  689. obj_value = f"\"{escape_sparql_string(obj)}\"@{lang}"
  690. else:
  691. raise ValueError("Unknown object_type")
  692. ttl = f"<{subject}> <{predicate}> {obj_value} ."
  693. update_query = ttl_to_sparql_insert(ttl, graph)
  694. try:
  695. result = run_sparql_update(update_query)
  696. except HTTPException as exc:
  697. detail = f"{exc.detail}\n\nSPARQL:\n{update_query}"
  698. raise HTTPException(status_code=exc.status_code, detail=detail)
  699. return {**result, "query": update_query}
  700. # --- TOOL REGISTRY ---
  701. TOOLS = {
  702. "sparql_query": tool_sparql_query,
  703. "list_graphs": tool_list_graphs,
  704. "search_label": tool_search_label,
  705. "get_entities_by_type": tool_get_entities_by_type,
  706. "get_predicates_for_subject": tool_get_predicates_for_subject,
  707. "get_labels_for_subject": tool_get_labels_for_subject,
  708. "traverse_property": tool_traverse_property,
  709. "list_classes": tool_list_classes,
  710. "list_properties": tool_list_properties,
  711. "describe_class": tool_describe_class,
  712. "describe_property": tool_describe_property,
  713. "describe_subject": tool_describe_subject,
  714. "path_traverse": tool_path_traverse,
  715. "property_usage_statistics": tool_property_usage_statistics,
  716. "batch_insert": tool_batch_insert,
  717. "insert_triple": tool_insert_triple,
  718. "load_examples": tool_load_examples,
  719. }
  720. def load_domain_layers(tools: Dict[str, Callable[[Dict[str, Any]], Any]]) -> None:
  721. raw = os.getenv("DOMAIN_LAYERS", "garden_layer.plugin")
  722. modules = [item.strip() for item in raw.split(",") if item.strip()]
  723. if not modules:
  724. return
  725. for module_name in modules:
  726. module = None
  727. try:
  728. module = import_module(module_name)
  729. except ImportError as exc:
  730. base = module_name.split(".", 1)[0]
  731. if base != module_name:
  732. try:
  733. module = import_module(base)
  734. logger.info("Falling back to base module '%s' for domain layer '%s'", base, module_name)
  735. except ImportError:
  736. logger.warning(
  737. "Domain layer '%s' could not be imported and base module '%s' is missing: %s",
  738. module_name,
  739. base,
  740. exc,
  741. )
  742. else:
  743. logger.warning("Domain layer '%s' could not be imported: %s", module_name, exc)
  744. if module is None:
  745. continue
  746. register = getattr(module, "register_layer", None)
  747. if not callable(register):
  748. logger.warning("Domain layer '%s' does not expose register_layer", module_name)
  749. continue
  750. try:
  751. register(tools)
  752. logger.info("Loaded domain layer '%s'", module_name)
  753. except Exception as exc:
  754. logger.exception("Domain layer '%s' failed to register: %s", module_name, exc)
  755. load_domain_layers(TOOLS)
  756. TOOL_DOCS = {
  757. "sparql_query": "Execute a bounded SELECT query and return the JSON result.",
  758. "list_graphs": "List up to 50 active graph URIs.",
  759. "search_label": "Search rdfs:label values that contain a term (case-insensitive).",
  760. "get_entities_by_type": "List subjects of a given rdf:type.",
  761. "get_predicates_for_subject": "List distinct predicates used by a subject.",
  762. "get_labels_for_subject": "Fetch rdfs:label values for a subject.",
  763. "traverse_property": "Traverse a property (incoming or outgoing) for a subject and return labels/descriptions.",
  764. "list_classes": "List ontology classes with optional label/comment term filtering.",
  765. "list_properties": "List ontology properties with optional term/domain/range filters.",
  766. "describe_class": "Describe a class and list properties that use it as rdfs:domain.",
  767. "describe_property": "Describe a property (label/comment/domain/range/type) and sample usage.",
  768. "describe_subject": "Return subject predicates/objects (with labels) to inspect an individual node.",
  769. "path_traverse": "Follow a property path (list of predicates) from a subject, returning each step's nodes.",
  770. "property_usage_statistics": "Count how often a property is used and sample subjects/objects.",
  771. "batch_insert": "Insert multiple triples or TTL at once with a single guarded update.",
  772. "insert_triple": "Insert a single triple (useful for debugging updates).",
  773. "load_examples": "Load Turtle fixtures from the `examples/` directory when MCP_ALLOW_EXAMPLE_LOAD=true.",
  774. }
  775. # --- MCP ENDPOINT ---
  776. @app.post("/mcp")
  777. async def handle_mcp(http_request: Request):
  778. """Minimal MCP-ish JSON-RPC 2.0 endpoint on POST /mcp.
  779. Backward compatible legacy mode:
  780. {"tool": "search_label", "input": {...}}
  781. Minimal JSON-RPC mode (first step towards MCP compliance):
  782. {"jsonrpc":"2.0","id":1,"method":"initialize","params":{...}}
  783. {"jsonrpc":"2.0","id":2,"method":"tools/list","params":{...}}
  784. {"jsonrpc":"2.0","id":3,"method":"tools/call","params":{ "tool": "...", "params": {...} }}
  785. """
  786. body = None
  787. try:
  788. body = await http_request.json()
  789. except Exception:
  790. body = None
  791. # ---- Legacy mode ----
  792. if isinstance(body, dict) and "tool" in body:
  793. legacy = ToolRequest(**body)
  794. tool_name = legacy.tool
  795. input_data = legacy.input or {}
  796. client_host = http_request.client.host if http_request.client else "unknown"
  797. trimmed_input = json.dumps(input_data, ensure_ascii=False, default=str)
  798. if len(trimmed_input) > 1024:
  799. trimmed_input = f"{trimmed_input[:1024]}…"
  800. timestamp = datetime.now(timezone.utc).isoformat()
  801. tool_logger.info(
  802. "tool=%s client=%s time=%s input=%s",
  803. tool_name,
  804. client_host,
  805. timestamp,
  806. trimmed_input,
  807. )
  808. if tool_name not in TOOLS:
  809. raise HTTPException(status_code=400, detail=f"Unknown tool: {tool_name}")
  810. try:
  811. result = TOOLS[tool_name](input_data)
  812. return {
  813. "status": "ok",
  814. "tool": tool_name,
  815. "description": TOOL_DOCS.get(tool_name, ""),
  816. "result": result,
  817. }
  818. except Exception as exc:
  819. logger.error("Tool %s failed: %s", tool_name, exc)
  820. raise HTTPException(status_code=500, detail=str(exc))
  821. # ---- JSON-RPC 2.0 mode ----
  822. if not isinstance(body, dict):
  823. return mcp_error(None, "Invalid JSON-RPC request", code=-32600)
  824. try:
  825. rpc_req = JsonRpcRequest(**body)
  826. except Exception as exc:
  827. # If body is malformed, still surface the id if present.
  828. rpc_id = body.get("id")
  829. logger.warning("Invalid JSON-RPC request: %s", exc)
  830. return mcp_error(rpc_id, "Invalid JSON-RPC request", code=-32600)
  831. method = rpc_req.method
  832. rpc_id = rpc_req.id
  833. params = rpc_req.params or {}
  834. if method == "initialize":
  835. tools = [_mcp_tool_definition(name) for name in sorted(TOOLS.keys())]
  836. return mcp_result(
  837. rpc_id,
  838. {
  839. "protocolVersion": "0.1",
  840. "capabilities": {
  841. "tools": True,
  842. "list": True,
  843. "call": True,
  844. },
  845. "tools": tools,
  846. },
  847. )
  848. if method in {"tools/list", "tools/listTools"}:
  849. tools = [_mcp_tool_definition(name) for name in sorted(TOOLS.keys())]
  850. return mcp_result(rpc_id, {"tools": tools})
  851. if method in {"tools/call", "tools/callTool"}:
  852. # Different clients sometimes wrap the call slightly differently.
  853. tool_name = (
  854. params.get("tool")
  855. or params.get("name")
  856. or params.get("toolName")
  857. )
  858. input_data = (
  859. params.get("params")
  860. or params.get("input")
  861. or params.get("arguments")
  862. or {}
  863. )
  864. if not tool_name:
  865. return mcp_error(rpc_id, "Missing tool name", code=-32602)
  866. if tool_name not in TOOLS:
  867. return mcp_error(rpc_id, f"Unknown tool: {tool_name}", code=-32601)
  868. try:
  869. result = TOOLS[tool_name](input_data)
  870. return mcp_result(rpc_id, {"result": result})
  871. except HTTPException as exc:
  872. return mcp_error(rpc_id, str(exc.detail), code=exc.status_code)
  873. except Exception as exc:
  874. logger.error("Tool %s failed: %s", tool_name, exc)
  875. return mcp_error(rpc_id, str(exc), code=-32000)
  876. return mcp_error(rpc_id, f"Method not found: {method}", code=-32601)
  877. # --- HEALTH CHECK ---
  878. @app.get("/")
  879. def root():
  880. return {
  881. "status": "MCP server running",
  882. "tools": list(TOOLS.keys()),
  883. "virtuoso": VIRTUOSO_ENDPOINT,
  884. "guardrails": {
  885. "default_limit": SPARQL_DEFAULT_LIMIT,
  886. "max_limit": SPARQL_MAX_LIMIT,
  887. "allow_example_load": ALLOW_EXAMPLE_LOAD,
  888. "turtle_examples": True,
  889. },
  890. }
  891. @app.get("/health")
  892. def health():
  893. return root()