virtuoso_mcp.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740
  1. import logging
  2. import os
  3. import re
  4. from importlib import import_module
  5. from pathlib import Path
  6. from typing import Any, Callable, Dict, List, Optional
  7. import requests
  8. from requests.auth import HTTPDigestAuth
  9. from fastapi import FastAPI, HTTPException
  10. from pydantic import BaseModel
  11. LOG_LEVEL = os.getenv("MCP_LOG_LEVEL", "INFO").upper()
  12. logging.basicConfig(level=getattr(logging, LOG_LEVEL, logging.INFO))
  13. logger = logging.getLogger("virtuoso_mcp")
  14. app = FastAPI(title="MCP Server")
  15. # --- CONFIG ---
  16. VIRTUOSO_ENDPOINT = os.getenv("VIRTUOSO_ENDPOINT") or os.getenv(
  17. "VIRTUOSO_SPARQL", "http://localhost:8891/sparql"
  18. )
  19. VIRTUOSO_USER = os.getenv("VIRTUOSO_USER")
  20. VIRTUOSO_PASS = os.getenv("VIRTUOSO_PASS")
  21. SPARQL_TIMEOUT = float(os.getenv("SPARQL_TIMEOUT", 10.0))
  22. SPARQL_UPDATE_TIMEOUT = float(os.getenv("SPARQL_UPDATE_TIMEOUT", 15.0))
  23. SPARQL_DEFAULT_LIMIT = int(os.getenv("SPARQL_DEFAULT_LIMIT", 100))
  24. SPARQL_MAX_LIMIT = int(os.getenv("SPARQL_MAX_LIMIT", 500))
  25. GRAPH_URI = os.getenv("GRAPH_URI", "http://world.eu.org/example1")
  26. EXAMPLES_DIR = Path(__file__).resolve().parent / "examples"
  27. EXAMPLE_GRAPH = os.getenv(
  28. "EXAMPLE_GRAPH", "http://world.eu.org/cannabis-breeding#test"
  29. )
  30. ALLOW_EXAMPLE_LOAD = os.getenv("MCP_ALLOW_EXAMPLE_LOAD", "false").lower() == "true"
  31. SESSION = requests.Session()
  32. PREFIXES = f"""
  33. PREFIX : <{GRAPH_URI}>
  34. PREFIX cb: <http://world.eu.org/cannabis-breeding#>
  35. PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
  36. PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
  37. PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
  38. PREFIX dc: <http://purl.org/dc/elements/1.1/>
  39. """.strip()
  40. # --- MODELS ---
  41. class SparqlQueryRequest(BaseModel):
  42. query: str
  43. class ToolRequest(BaseModel):
  44. tool: str
  45. input: Dict[str, Any] = {}
  46. # --- CORE SPARQL FUNCTION ---
  47. def _build_auth() -> Optional[HTTPDigestAuth]:
  48. if VIRTUOSO_USER and VIRTUOSO_PASS:
  49. return HTTPDigestAuth(VIRTUOSO_USER, VIRTUOSO_PASS)
  50. return None
  51. def _with_prefixes(query: str) -> str:
  52. if re.search(r"^\s*prefix\b", query, re.IGNORECASE):
  53. return query
  54. return f"{PREFIXES}\n{query}"
  55. def run_sparql(query: str) -> Dict[str, Any]:
  56. """Execute a SPARQL query against Virtuoso and return the JSON payload."""
  57. logger.debug("Sending SPARQL query: %s", query)
  58. try:
  59. response = SESSION.post(
  60. VIRTUOSO_ENDPOINT,
  61. data=_with_prefixes(query).encode("utf-8"),
  62. headers={
  63. "Accept": "application/sparql-results+json",
  64. "Content-Type": "application/sparql-query",
  65. },
  66. timeout=SPARQL_TIMEOUT,
  67. auth=_build_auth(),
  68. )
  69. if not response.ok:
  70. logger.warning("SPARQL request failed: %s", response.status_code)
  71. response.raise_for_status()
  72. return response.json()
  73. except Exception as exc: # pragma: no cover - propagate for FastAPI
  74. logger.warning("SPARQL request failed: %s", exc)
  75. raise HTTPException(status_code=500, detail=str(exc))
  76. def run_sparql_update(query: str) -> Dict[str, Any]:
  77. """Execute a SPARQL UPDATE (INSERT/DELETE) against Virtuoso."""
  78. logger.debug("Sending SPARQL update: %s", query)
  79. try:
  80. response = SESSION.post(
  81. VIRTUOSO_ENDPOINT,
  82. data=_with_prefixes(query).encode("utf-8"),
  83. headers={"Content-Type": "application/sparql-update"},
  84. timeout=SPARQL_UPDATE_TIMEOUT,
  85. auth=_build_auth(),
  86. )
  87. if not response.ok:
  88. detail = (response.text or "").strip()
  89. logger.warning("SPARQL update failed: %s", response.status_code)
  90. raise HTTPException(
  91. status_code=500,
  92. detail=detail or f"SPARQL update failed with {response.status_code}",
  93. )
  94. return {"status": "ok"}
  95. except HTTPException:
  96. raise
  97. except Exception as exc: # pragma: no cover - propagate for FastAPI
  98. logger.warning("SPARQL update failed: %s", exc)
  99. raise HTTPException(status_code=500, detail=str(exc))
  100. # --- TOOL HELPERS ---
  101. def escape_sparql_string(value: str) -> str:
  102. """Escape a string for SPARQL literal usage."""
  103. if value is None:
  104. return ""
  105. return (
  106. str(value)
  107. .replace("\\", "\\\\")
  108. .replace('"', "\\\"")
  109. .replace("\n", "\\n")
  110. .replace("\r", "")
  111. )
  112. def sanitize_term(term: str) -> str:
  113. """Escape quotes inside label searches so we can safely interpolate strings."""
  114. return escape_sparql_string(term)
  115. def _extract_limit(query: str) -> Optional[int]:
  116. match = re.search(r"\blimit\s+(\d+)\b", query, re.IGNORECASE)
  117. if not match:
  118. return None
  119. try:
  120. return int(match.group(1))
  121. except ValueError:
  122. return None
  123. def _apply_limit(query: str, default_limit: int, max_limit: int) -> str:
  124. limit = _extract_limit(query)
  125. if limit is None:
  126. return f"{query.strip()}\nLIMIT {default_limit}"
  127. if limit > max_limit:
  128. return re.sub(
  129. r"\blimit\s+\d+\b",
  130. f"LIMIT {max_limit}",
  131. query,
  132. flags=re.IGNORECASE,
  133. )
  134. return query
  135. def guard_select_query(query: str) -> str:
  136. """Enforce that raw queries are read-only and bounded by LIMIT."""
  137. lowered = query.lower()
  138. if re.search(r"\b(insert|delete|load|clear|drop|create|move|copy|add)\b", lowered):
  139. raise HTTPException(status_code=400, detail="SPARQL update operations are not allowed")
  140. if "select" not in lowered:
  141. raise HTTPException(status_code=400, detail="Only SELECT queries are allowed")
  142. return _apply_limit(query, SPARQL_DEFAULT_LIMIT, SPARQL_MAX_LIMIT)
  143. def ttl_to_sparql_insert(ttl_text: str, graph: Optional[str]) -> str:
  144. prefix_lines: List[str] = []
  145. body_lines: List[str] = []
  146. for raw_line in ttl_text.splitlines():
  147. line = raw_line.strip()
  148. if not line:
  149. continue
  150. prefix_match = re.match(r"@prefix\s+([\w-]+):\s*<([^>]+)>\s*\.", line)
  151. if prefix_match:
  152. prefix_lines.append(
  153. f"PREFIX {prefix_match.group(1)}: <{prefix_match.group(2)}>"
  154. )
  155. continue
  156. if line.startswith("@base"):
  157. # Skip @base entries for now; they are rare in our exports.
  158. continue
  159. body_lines.append(raw_line)
  160. if not body_lines:
  161. raise HTTPException(status_code=400, detail="No RDF triples found in input")
  162. prefixes = "\n".join(prefix_lines)
  163. body = "\n".join(body_lines)
  164. if graph:
  165. insert_body = f"GRAPH <{graph}> {{\n{body}\n}}"
  166. else:
  167. insert_body = body
  168. return f"{prefixes}\nINSERT DATA {{\n{insert_body}\n}}"
  169. # --- MCP TOOL IMPLEMENTATIONS ---
  170. def tool_sparql_query(input_data: Dict[str, Any]) -> Dict[str, Any]:
  171. query = input_data.get("query")
  172. if not query:
  173. raise ValueError("Missing 'query' field")
  174. guarded = guard_select_query(query)
  175. return run_sparql(guarded)
  176. def tool_list_graphs(_input: Dict[str, Any]) -> Dict[str, Any]:
  177. query = """
  178. SELECT DISTINCT ?g WHERE {
  179. GRAPH ?g { ?s ?p ?o }
  180. }
  181. LIMIT 50
  182. """
  183. return run_sparql(query)
  184. def tool_search_label(input_data: Dict[str, Any]) -> Dict[str, Any]:
  185. term = input_data.get("term", "")
  186. sanitized = sanitize_term(term)
  187. limit = int(input_data.get("limit", 20))
  188. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  189. query = f"""
  190. SELECT ?s ?label WHERE {{
  191. ?s rdfs:label ?label .
  192. FILTER(CONTAINS(LCASE(?label), LCASE(\"{sanitized}\")))
  193. }}
  194. LIMIT {limit}
  195. """
  196. return run_sparql(query)
  197. def tool_get_entities_by_type(input_data: Dict[str, Any]) -> Dict[str, Any]:
  198. type_uri = input_data.get("type_uri")
  199. if not type_uri:
  200. raise ValueError("Missing 'type_uri' field")
  201. limit = int(input_data.get("limit", 50))
  202. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  203. query = f"""
  204. SELECT ?s WHERE {{
  205. ?s rdf:type <{type_uri}> .
  206. }}
  207. LIMIT {limit}
  208. """
  209. return run_sparql(query)
  210. def tool_get_predicates_for_subject(input_data: Dict[str, Any]) -> Dict[str, Any]:
  211. subject_uri = input_data.get("subject_uri")
  212. if not subject_uri:
  213. raise ValueError("Missing 'subject_uri' field")
  214. limit = int(input_data.get("limit", 50))
  215. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  216. query = f"""
  217. SELECT DISTINCT ?p WHERE {{
  218. <{subject_uri}> ?p ?o .
  219. }}
  220. LIMIT {limit}
  221. """
  222. return run_sparql(query)
  223. def tool_get_labels_for_subject(input_data: Dict[str, Any]) -> Dict[str, Any]:
  224. subject_uri = input_data.get("subject_uri")
  225. if not subject_uri:
  226. raise ValueError("Missing 'subject_uri' field")
  227. query = f"""
  228. SELECT ?label WHERE {{
  229. <{subject_uri}> rdfs:label ?label .
  230. }}
  231. LIMIT 20
  232. """
  233. return run_sparql(query)
  234. def tool_traverse_property(input_data: Dict[str, Any]) -> Dict[str, Any]:
  235. subject_uri = input_data.get("subject_uri")
  236. property_uri = input_data.get("property_uri")
  237. if not subject_uri or not property_uri:
  238. raise ValueError("Missing 'subject_uri' or 'property_uri'")
  239. direction = input_data.get("direction", "outgoing")
  240. limit = int(input_data.get("limit", 50))
  241. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  242. if direction not in {"outgoing", "incoming"}:
  243. raise ValueError("direction must be 'outgoing' or 'incoming'")
  244. if direction == "outgoing":
  245. triple = f"<{subject_uri}> <{property_uri}> ?neighbor ."
  246. else:
  247. triple = f"?neighbor <{property_uri}> <{subject_uri}> ."
  248. query = f"""
  249. SELECT ?neighbor ?label ?description WHERE {{
  250. {triple}
  251. OPTIONAL {{ ?neighbor rdfs:label ?label }}
  252. OPTIONAL {{ ?neighbor dc:description ?description }}
  253. }}
  254. LIMIT {limit}
  255. """
  256. return run_sparql(query)
  257. def tool_list_classes(input_data: Dict[str, Any]) -> Dict[str, Any]:
  258. """List ontology classes (rdfs:Class and owl:Class) with optional term filtering."""
  259. term = sanitize_term(input_data.get("term", ""))
  260. limit = int(input_data.get("limit", 50))
  261. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  262. term_filter = ""
  263. if term:
  264. term_filter = f"""
  265. FILTER(
  266. CONTAINS(LCASE(COALESCE(STR(?label), STR(?class))), LCASE(\"{term}\")) ||
  267. CONTAINS(LCASE(COALESCE(STR(?comment), \"\")), LCASE(\"{term}\"))
  268. )
  269. """
  270. query = f"""
  271. SELECT DISTINCT ?class ?label ?comment WHERE {{
  272. {{ ?class rdf:type rdfs:Class . }}
  273. UNION
  274. {{ ?class rdf:type <http://www.w3.org/2002/07/owl#Class> . }}
  275. OPTIONAL {{ ?class rdfs:label ?label }}
  276. OPTIONAL {{ ?class rdfs:comment ?comment }}
  277. {term_filter}
  278. }}
  279. LIMIT {limit}
  280. """
  281. return run_sparql(query)
  282. def tool_list_properties(input_data: Dict[str, Any]) -> Dict[str, Any]:
  283. """List ontology properties with optional term/domain/range filtering."""
  284. term = sanitize_term(input_data.get("term", ""))
  285. domain_uri = input_data.get("domain_uri")
  286. range_uri = input_data.get("range_uri")
  287. limit = int(input_data.get("limit", 100))
  288. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  289. filters = []
  290. if term:
  291. filters.append(
  292. f"""
  293. FILTER(
  294. CONTAINS(LCASE(COALESCE(STR(?label), STR(?property))), LCASE(\"{term}\")) ||
  295. CONTAINS(LCASE(COALESCE(STR(?comment), \"\")), LCASE(\"{term}\"))
  296. )
  297. """
  298. )
  299. if domain_uri:
  300. filters.append(f"FILTER(?domain = <{domain_uri}>)")
  301. if range_uri:
  302. filters.append(f"FILTER(?range = <{range_uri}>)")
  303. query = f"""
  304. SELECT DISTINCT ?property ?label ?comment ?domain ?range WHERE {{
  305. {{ ?property rdf:type rdf:Property . }}
  306. UNION
  307. {{ ?property rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> . }}
  308. UNION
  309. {{ ?property rdf:type <http://www.w3.org/2002/07/owl#DatatypeProperty> . }}
  310. OPTIONAL {{ ?property rdfs:label ?label }}
  311. OPTIONAL {{ ?property rdfs:comment ?comment }}
  312. OPTIONAL {{ ?property rdfs:domain ?domain }}
  313. OPTIONAL {{ ?property rdfs:range ?range }}
  314. {' '.join(filters)}
  315. }}
  316. LIMIT {limit}
  317. """
  318. return run_sparql(query)
  319. def tool_describe_class(input_data: Dict[str, Any]) -> Dict[str, Any]:
  320. """Describe a class and include properties that declare it as rdfs:domain."""
  321. class_uri = input_data.get("class_uri")
  322. if not class_uri:
  323. raise ValueError("Missing 'class_uri' field")
  324. query = f"""
  325. SELECT ?label ?comment ?property ?propertyLabel ?propertyComment ?range WHERE {{
  326. OPTIONAL {{ <{class_uri}> rdfs:label ?label }}
  327. OPTIONAL {{ <{class_uri}> rdfs:comment ?comment }}
  328. OPTIONAL {{
  329. ?property rdfs:domain <{class_uri}> .
  330. OPTIONAL {{ ?property rdfs:label ?propertyLabel }}
  331. OPTIONAL {{ ?property rdfs:comment ?propertyComment }}
  332. OPTIONAL {{ ?property rdfs:range ?range }}
  333. }}
  334. }}
  335. LIMIT {SPARQL_MAX_LIMIT}
  336. """
  337. return run_sparql(query)
  338. def tool_describe_property(input_data: Dict[str, Any]) -> Dict[str, Any]:
  339. """Describe a property and include usage examples from the graph."""
  340. property_uri = input_data.get("property_uri")
  341. if not property_uri:
  342. raise ValueError("Missing 'property_uri' field")
  343. usage_limit = int(input_data.get("usage_limit", 10))
  344. usage_limit = min(max(usage_limit, 1), SPARQL_MAX_LIMIT)
  345. metadata_query = f"""
  346. SELECT ?label ?comment ?domain ?range ?type WHERE {{
  347. OPTIONAL {{ <{property_uri}> rdfs:label ?label }}
  348. OPTIONAL {{ <{property_uri}> rdfs:comment ?comment }}
  349. OPTIONAL {{ <{property_uri}> rdfs:domain ?domain }}
  350. OPTIONAL {{ <{property_uri}> rdfs:range ?range }}
  351. OPTIONAL {{ <{property_uri}> rdf:type ?type }}
  352. }}
  353. LIMIT {SPARQL_MAX_LIMIT}
  354. """
  355. usage_query = f"""
  356. SELECT ?subject ?subjectLabel ?object ?objectLabel WHERE {{
  357. ?subject <{property_uri}> ?object .
  358. OPTIONAL {{ ?subject rdfs:label ?subjectLabel }}
  359. OPTIONAL {{ ?object rdfs:label ?objectLabel }}
  360. }}
  361. LIMIT {usage_limit}
  362. """
  363. return {
  364. "metadata": run_sparql(metadata_query),
  365. "usage": run_sparql(usage_query),
  366. }
  367. def tool_describe_subject(input_data: Dict[str, Any]) -> Dict[str, Any]:
  368. subject_uri = input_data.get("subject_uri")
  369. if not subject_uri:
  370. raise ValueError("Missing 'subject_uri' field")
  371. limit = int(input_data.get("limit", 50))
  372. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  373. query = f"""
  374. SELECT ?predicate ?object ?objectLabel WHERE {{
  375. <{subject_uri}> ?predicate ?object .
  376. OPTIONAL {{ ?object rdfs:label ?objectLabel }}
  377. }}
  378. LIMIT {limit}
  379. """
  380. return run_sparql(query)
  381. def tool_path_traverse(input_data: Dict[str, Any]) -> Dict[str, Any]:
  382. subject_uri = input_data.get("subject_uri")
  383. property_path = input_data.get("property_path") or input_data.get("properties")
  384. if not subject_uri or not property_path:
  385. raise ValueError("Missing 'subject_uri' or 'property_path'")
  386. if isinstance(property_path, str):
  387. property_path = [p.strip() for p in property_path.split(",") if p.strip()]
  388. if not isinstance(property_path, list) or not property_path:
  389. raise ValueError("'property_path' must be a non-empty list of property URIs")
  390. direction = input_data.get("direction", "outgoing")
  391. limit = int(input_data.get("limit", 50))
  392. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  393. statements = []
  394. optional_lines = []
  395. select_terms = []
  396. prev_subject = f"<{subject_uri}>"
  397. for idx, prop_uri in enumerate(property_path, start=1):
  398. step_var = f"?n{idx}"
  399. if direction == "outgoing":
  400. statements.append(f"{prev_subject} <{prop_uri}> {step_var} .")
  401. else:
  402. statements.append(f"{step_var} <{prop_uri}> {prev_subject} .")
  403. select_terms.append(step_var)
  404. optional_lines.append(f"OPTIONAL {{ {step_var} rdfs:label {step_var}Label }}")
  405. optional_lines.append(f"OPTIONAL {{ {step_var} dc:description {step_var}Description }}")
  406. prev_subject = step_var
  407. select_clause = " ".join(select_terms)
  408. query = f"""
  409. SELECT {select_clause} WHERE {{
  410. {'\n '.join(statements)}
  411. {'\n '.join(optional_lines)}
  412. }}
  413. LIMIT {limit}
  414. """
  415. return {
  416. "property_path": property_path,
  417. "direction": direction,
  418. "result": run_sparql(query),
  419. }
  420. def tool_property_usage_statistics(input_data: Dict[str, Any]) -> Dict[str, Any]:
  421. property_uri = input_data.get("property_uri")
  422. if not property_uri:
  423. raise ValueError("Missing 'property_uri' field")
  424. examples_limit = int(input_data.get("examples_limit", 5))
  425. examples_limit = min(max(examples_limit, 1), SPARQL_MAX_LIMIT)
  426. count_query = f"""
  427. SELECT (COUNT(DISTINCT ?subject) AS ?usageCount) WHERE {{
  428. ?subject <{property_uri}> ?object .
  429. }}
  430. LIMIT {SPARQL_MAX_LIMIT}
  431. """
  432. usage_query = f"""
  433. SELECT ?subject ?subjectLabel ?object ?objectLabel WHERE {{
  434. ?subject <{property_uri}> ?object .
  435. OPTIONAL {{ ?subject rdfs:label ?subjectLabel }}
  436. OPTIONAL {{ ?object rdfs:label ?objectLabel }}
  437. }}
  438. LIMIT {examples_limit}
  439. """
  440. return {
  441. "count": run_sparql(count_query),
  442. "examples": run_sparql(usage_query),
  443. }
  444. def tool_batch_insert(input_data: Dict[str, Any]) -> Dict[str, Any]:
  445. ttl_text = input_data.get("ttl")
  446. triples = input_data.get("triples")
  447. graph = input_data.get("graph") or GRAPH_URI
  448. if not ttl_text and not triples:
  449. raise ValueError("Provide either 'ttl' text or 'triples' list")
  450. def _format_object(obj_value: Any, obj_type: str, datatype: Optional[str], lang: Optional[str]) -> str:
  451. if obj_type == "uri":
  452. return f"<{obj_value}>"
  453. if obj_type == "literal":
  454. return f'"{escape_sparql_string(obj_value)}"'
  455. if obj_type == "typed_literal":
  456. if not datatype:
  457. raise ValueError("Missing datatype for typed_literal")
  458. return f'"{escape_sparql_string(obj_value)}"^^<{datatype}>'
  459. if obj_type == "lang_literal":
  460. if not lang:
  461. raise ValueError("Missing lang for lang_literal")
  462. return f'"{escape_sparql_string(obj_value)}"@{lang}'
  463. raise ValueError(f"Unknown object_type: {obj_type}")
  464. if ttl_text:
  465. query = ttl_to_sparql_insert(ttl_text, graph)
  466. else:
  467. lines = []
  468. for triple in triples:
  469. subj = triple.get("subject")
  470. pred = triple.get("predicate")
  471. obj_value = triple.get("object")
  472. if not subj or not pred or obj_value is None:
  473. raise ValueError("Each triple must provide subject, predicate, and object")
  474. obj_type = triple.get("object_type", "uri")
  475. datatype = triple.get("datatype")
  476. lang = triple.get("lang")
  477. obj_text = _format_object(obj_value, obj_type, datatype, lang)
  478. lines.append(f"<{subj}> <{pred}> {obj_text} .")
  479. ttl_bulk = "\n".join(lines)
  480. query = ttl_to_sparql_insert(ttl_bulk, graph)
  481. result = run_sparql_update(query)
  482. return {**result, "query": query}
  483. def tool_insert_triple(input_data: Dict[str, Any]) -> Dict[str, Any]:
  484. subject = input_data.get("subject")
  485. predicate = input_data.get("predicate")
  486. obj = input_data.get("object")
  487. obj_type = input_data.get("object_type", "uri")
  488. graph = input_data.get("graph")
  489. if not subject or not predicate or obj is None:
  490. raise ValueError("Missing 'subject', 'predicate', or 'object' field")
  491. if obj_type == "uri":
  492. obj_value = f"<{obj}>"
  493. elif obj_type == "literal":
  494. obj_value = f"\"{escape_sparql_string(obj)}\""
  495. elif obj_type == "typed_literal":
  496. datatype = input_data.get("datatype")
  497. if not datatype:
  498. raise ValueError("Missing 'datatype' for typed_literal")
  499. obj_value = f"\"{escape_sparql_string(obj)}\"^^<{datatype}>"
  500. elif obj_type == "lang_literal":
  501. lang = input_data.get("lang")
  502. if not lang:
  503. raise ValueError("Missing 'lang' for lang_literal")
  504. obj_value = f"\"{escape_sparql_string(obj)}\"@{lang}"
  505. else:
  506. raise ValueError("Unknown object_type")
  507. ttl = f"<{subject}> <{predicate}> {obj_value} ."
  508. update_query = ttl_to_sparql_insert(ttl, graph)
  509. try:
  510. result = run_sparql_update(update_query)
  511. except HTTPException as exc:
  512. detail = f"{exc.detail}\n\nSPARQL:\n{update_query}"
  513. raise HTTPException(status_code=exc.status_code, detail=detail)
  514. return {**result, "query": update_query}
  515. def tool_load_examples(input_data: Dict[str, Any]) -> Dict[str, Any]:
  516. if not ALLOW_EXAMPLE_LOAD:
  517. raise HTTPException(status_code=403, detail="Example loading is disabled")
  518. files = input_data.get("files") or []
  519. if isinstance(files, str):
  520. files = [files]
  521. if not files:
  522. files = [p.name for p in EXAMPLES_DIR.glob("*.ttl")]
  523. graph = input_data.get("graph") or EXAMPLE_GRAPH
  524. results = []
  525. for filename in files:
  526. file_path = (EXAMPLES_DIR / filename).resolve()
  527. if not file_path.exists():
  528. raise HTTPException(status_code=400, detail=f"Missing example file: {filename}")
  529. if EXAMPLES_DIR not in file_path.parents:
  530. raise HTTPException(status_code=400, detail="Invalid example file path")
  531. ttl_text = file_path.read_text(encoding="utf-8")
  532. update_query = ttl_to_sparql_insert(ttl_text, graph)
  533. run_sparql_update(update_query)
  534. results.append({"file": filename, "graph": graph})
  535. return {"loaded": results}
  536. # --- TOOL REGISTRY ---
  537. TOOLS = {
  538. "sparql_query": tool_sparql_query,
  539. "list_graphs": tool_list_graphs,
  540. "search_label": tool_search_label,
  541. "get_entities_by_type": tool_get_entities_by_type,
  542. "get_predicates_for_subject": tool_get_predicates_for_subject,
  543. "get_labels_for_subject": tool_get_labels_for_subject,
  544. "traverse_property": tool_traverse_property,
  545. "list_classes": tool_list_classes,
  546. "list_properties": tool_list_properties,
  547. "describe_class": tool_describe_class,
  548. "describe_property": tool_describe_property,
  549. "describe_subject": tool_describe_subject,
  550. "path_traverse": tool_path_traverse,
  551. "property_usage_statistics": tool_property_usage_statistics,
  552. "batch_insert": tool_batch_insert,
  553. "insert_triple": tool_insert_triple,
  554. "load_examples": tool_load_examples,
  555. }
  556. def load_domain_layers(tools: Dict[str, Callable[[Dict[str, Any]], Any]]) -> None:
  557. raw = os.getenv("DOMAIN_LAYERS", "garden_layer.plugin")
  558. modules = [item.strip() for item in raw.split(",") if item.strip()]
  559. if not modules:
  560. return
  561. for module_name in modules:
  562. try:
  563. module = import_module(module_name)
  564. except ImportError as exc:
  565. logger.warning("Domain layer '%s' could not be imported: %s", module_name, exc)
  566. continue
  567. register = getattr(module, "register_layer", None)
  568. if not callable(register):
  569. logger.warning("Domain layer '%s' does not expose register_layer", module_name)
  570. continue
  571. try:
  572. register(tools)
  573. logger.info("Loaded domain layer '%s'", module_name)
  574. except Exception as exc:
  575. logger.exception("Domain layer '%s' failed to register: %s", module_name, exc)
  576. load_domain_layers(TOOLS)
  577. TOOL_DOCS = {
  578. "sparql_query": "Execute a bounded SELECT query and return the JSON result.",
  579. "list_graphs": "List up to 50 active graph URIs.",
  580. "search_label": "Search rdfs:label values that contain a term (case-insensitive).",
  581. "get_entities_by_type": "List subjects of a given rdf:type.",
  582. "get_predicates_for_subject": "List distinct predicates used by a subject.",
  583. "get_labels_for_subject": "Fetch rdfs:label values for a subject.",
  584. "traverse_property": "Traverse a property (incoming or outgoing) for a subject and return labels/descriptions.",
  585. "list_classes": "List ontology classes with optional label/comment term filtering.",
  586. "list_properties": "List ontology properties with optional term/domain/range filters.",
  587. "describe_class": "Describe a class and list properties that use it as rdfs:domain.",
  588. "describe_property": "Describe a property (label/comment/domain/range/type) and sample usage.",
  589. "describe_subject": "Return subject predicates/objects (with labels) to inspect an individual node.",
  590. "path_traverse": "Follow a property path (list of predicates) from a subject, returning each step's nodes.",
  591. "property_usage_statistics": "Count how often a property is used and sample subjects/objects.",
  592. "batch_insert": "Insert multiple triples or TTL at once with a single guarded update.",
  593. "insert_triple": "Insert a single triple (useful for debugging updates).",
  594. "load_examples": "Load Turtle examples from the local examples/ directory into a graph.",
  595. }
  596. # --- MCP ENDPOINT ---
  597. @app.post("/mcp")
  598. def handle_mcp(request: ToolRequest):
  599. tool_name = request.tool
  600. input_data = request.input or {}
  601. if tool_name not in TOOLS:
  602. raise HTTPException(status_code=400, detail=f"Unknown tool: {tool_name}")
  603. try:
  604. result = TOOLS[tool_name](input_data)
  605. return {
  606. "status": "ok",
  607. "tool": tool_name,
  608. "description": TOOL_DOCS.get(tool_name, ""),
  609. "result": result,
  610. }
  611. except Exception as exc:
  612. logger.error("Tool %s failed: %s", tool_name, exc)
  613. raise HTTPException(status_code=500, detail=str(exc))
  614. # --- HEALTH CHECK ---
  615. @app.get("/")
  616. def root():
  617. return {
  618. "status": "MCP server running",
  619. "tools": list(TOOLS.keys()),
  620. "virtuoso": VIRTUOSO_ENDPOINT,
  621. "guardrails": {
  622. "default_limit": SPARQL_DEFAULT_LIMIT,
  623. "max_limit": SPARQL_MAX_LIMIT,
  624. "allow_example_load": ALLOW_EXAMPLE_LOAD,
  625. "turtle_examples": True,
  626. },
  627. }
  628. @app.get("/health")
  629. def health():
  630. return root()