virtuoso_mcp.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782
  1. import json
  2. import logging
  3. import os
  4. import re
  5. from datetime import datetime, timezone
  6. from importlib import import_module
  7. from pathlib import Path
  8. from typing import Any, Callable, Dict, List, Optional
  9. import requests
  10. from requests.auth import HTTPDigestAuth
  11. from fastapi import FastAPI, HTTPException, Request
  12. from pydantic import BaseModel
  13. LOG_LEVEL = os.getenv("MCP_LOG_LEVEL", "INFO").upper()
  14. logging.basicConfig(level=getattr(logging, LOG_LEVEL, logging.INFO))
  15. logger = logging.getLogger("virtuoso_mcp")
  16. app = FastAPI(title="MCP Server")
  17. # --- CONFIG ---
  18. VIRTUOSO_ENDPOINT = os.getenv("VIRTUOSO_ENDPOINT") or os.getenv(
  19. "VIRTUOSO_SPARQL", "http://localhost:8891/sparql"
  20. )
  21. VIRTUOSO_USER = os.getenv("VIRTUOSO_USER")
  22. VIRTUOSO_PASS = os.getenv("VIRTUOSO_PASS")
  23. SPARQL_TIMEOUT = float(os.getenv("SPARQL_TIMEOUT", 10.0))
  24. SPARQL_UPDATE_TIMEOUT = float(os.getenv("SPARQL_UPDATE_TIMEOUT", 15.0))
  25. SPARQL_DEFAULT_LIMIT = int(os.getenv("SPARQL_DEFAULT_LIMIT", 100))
  26. SPARQL_MAX_LIMIT = int(os.getenv("SPARQL_MAX_LIMIT", 500))
  27. GRAPH_URI = os.getenv("GRAPH_URI", "http://example.org/catalog#")
  28. EXAMPLES_DIR = Path(__file__).resolve().parent / "examples"
  29. EXAMPLE_GRAPH = os.getenv(
  30. "EXAMPLE_GRAPH", "http://example.org/catalog#test"
  31. )
  32. ALLOW_EXAMPLE_LOAD = os.getenv("MCP_ALLOW_EXAMPLE_LOAD", "false").lower() == "true"
  33. SESSION = requests.Session()
  34. LOGS_DIR = Path(__file__).resolve().parent / "logs"
  35. LOGS_DIR.mkdir(parents=True, exist_ok=True)
  36. tool_logger = logging.getLogger("virtuoso_mcp.tools")
  37. tool_handler = logging.FileHandler(LOGS_DIR / "tool_usage.log")
  38. tool_handler.setFormatter(logging.Formatter("%(asctime)s %(message)s"))
  39. tool_logger.addHandler(tool_handler)
  40. tool_logger.setLevel(logging.INFO)
  41. tool_logger.propagate = False
  42. PREFIXES = f"""
  43. PREFIX : <{GRAPH_URI}>
  44. PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
  45. PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
  46. PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
  47. PREFIX dc: <http://purl.org/dc/elements/1.1/>
  48. """.strip()
  49. # --- MODELS ---
  50. class SparqlQueryRequest(BaseModel):
  51. query: str
  52. class ToolRequest(BaseModel):
  53. tool: str
  54. input: Dict[str, Any] = {}
  55. # --- CORE SPARQL FUNCTION ---
  56. def _build_auth() -> Optional[HTTPDigestAuth]:
  57. if VIRTUOSO_USER and VIRTUOSO_PASS:
  58. return HTTPDigestAuth(VIRTUOSO_USER, VIRTUOSO_PASS)
  59. return None
  60. def _with_prefixes(query: str) -> str:
  61. if re.search(r"^\s*prefix\b", query, re.IGNORECASE):
  62. return query
  63. return f"{PREFIXES}\n{query}"
  64. def run_sparql(query: str) -> Dict[str, Any]:
  65. """Execute a SPARQL query against Virtuoso and return the JSON payload."""
  66. logger.debug("Sending SPARQL query: %s", query)
  67. try:
  68. response = SESSION.post(
  69. VIRTUOSO_ENDPOINT,
  70. data=_with_prefixes(query).encode("utf-8"),
  71. headers={
  72. "Accept": "application/sparql-results+json",
  73. "Content-Type": "application/sparql-query",
  74. },
  75. timeout=SPARQL_TIMEOUT,
  76. auth=_build_auth(),
  77. )
  78. if not response.ok:
  79. logger.warning("SPARQL request failed: %s", response.status_code)
  80. response.raise_for_status()
  81. return response.json()
  82. except Exception as exc: # pragma: no cover - propagate for FastAPI
  83. logger.warning("SPARQL request failed: %s", exc)
  84. raise HTTPException(status_code=500, detail=str(exc))
  85. def run_sparql_update(query: str) -> Dict[str, Any]:
  86. """Execute a SPARQL UPDATE (INSERT/DELETE) against Virtuoso."""
  87. logger.debug("Sending SPARQL update: %s", query)
  88. try:
  89. response = SESSION.post(
  90. VIRTUOSO_ENDPOINT,
  91. data=_with_prefixes(query).encode("utf-8"),
  92. headers={"Content-Type": "application/sparql-update"},
  93. timeout=SPARQL_UPDATE_TIMEOUT,
  94. auth=_build_auth(),
  95. )
  96. if not response.ok:
  97. detail = (response.text or "").strip()
  98. logger.warning("SPARQL update failed: %s", response.status_code)
  99. raise HTTPException(
  100. status_code=500,
  101. detail=detail or f"SPARQL update failed with {response.status_code}",
  102. )
  103. return {"status": "ok"}
  104. except HTTPException:
  105. raise
  106. except Exception as exc: # pragma: no cover - propagate for FastAPI
  107. logger.warning("SPARQL update failed: %s", exc)
  108. raise HTTPException(status_code=500, detail=str(exc))
  109. # --- TOOL HELPERS ---
  110. def escape_sparql_string(value: str) -> str:
  111. """Escape a string for SPARQL literal usage."""
  112. if value is None:
  113. return ""
  114. return (
  115. str(value)
  116. .replace("\\", "\\\\")
  117. .replace('"', "\\\"")
  118. .replace("\n", "\\n")
  119. .replace("\r", "")
  120. )
  121. def sanitize_term(term: str) -> str:
  122. """Escape quotes inside label searches so we can safely interpolate strings."""
  123. return escape_sparql_string(term)
  124. def _extract_limit(query: str) -> Optional[int]:
  125. match = re.search(r"\blimit\s+(\d+)\b", query, re.IGNORECASE)
  126. if not match:
  127. return None
  128. try:
  129. return int(match.group(1))
  130. except ValueError:
  131. return None
  132. def _apply_limit(query: str, default_limit: int, max_limit: int) -> str:
  133. limit = _extract_limit(query)
  134. if limit is None:
  135. return f"{query.strip()}\nLIMIT {default_limit}"
  136. if limit > max_limit:
  137. return re.sub(
  138. r"\blimit\s+\d+\b",
  139. f"LIMIT {max_limit}",
  140. query,
  141. flags=re.IGNORECASE,
  142. )
  143. return query
  144. def guard_select_query(query: str) -> str:
  145. """Enforce that raw queries are read-only and bounded by LIMIT."""
  146. lowered = query.lower()
  147. if re.search(r"\b(insert|delete|load|clear|drop|create|move|copy|add)\b", lowered):
  148. raise HTTPException(status_code=400, detail="SPARQL update operations are not allowed")
  149. if "select" not in lowered:
  150. raise HTTPException(status_code=400, detail="Only SELECT queries are allowed")
  151. return _apply_limit(query, SPARQL_DEFAULT_LIMIT, SPARQL_MAX_LIMIT)
  152. def ttl_to_sparql_insert(ttl_text: str, graph: Optional[str]) -> str:
  153. prefix_lines: List[str] = []
  154. body_lines: List[str] = []
  155. for raw_line in ttl_text.splitlines():
  156. line = raw_line.strip()
  157. if not line:
  158. continue
  159. prefix_match = re.match(r"@prefix\s+([\w-]+):\s*<([^>]+)>\s*\.", line)
  160. if prefix_match:
  161. prefix_lines.append(
  162. f"PREFIX {prefix_match.group(1)}: <{prefix_match.group(2)}>"
  163. )
  164. continue
  165. if line.startswith("@base"):
  166. # Skip @base entries for now; they are rare in our exports.
  167. continue
  168. body_lines.append(raw_line)
  169. if not body_lines:
  170. raise HTTPException(status_code=400, detail="No RDF triples found in input")
  171. prefixes = "\n".join(prefix_lines)
  172. body = "\n".join(body_lines)
  173. if graph:
  174. insert_body = f"GRAPH <{graph}> {{\n{body}\n}}"
  175. else:
  176. insert_body = body
  177. return f"{prefixes}\nINSERT DATA {{\n{insert_body}\n}}"
  178. # --- MCP TOOL IMPLEMENTATIONS ---
  179. def tool_sparql_query(input_data: Dict[str, Any]) -> Dict[str, Any]:
  180. query = input_data.get("query")
  181. if not query:
  182. raise ValueError("Missing 'query' field")
  183. guarded = guard_select_query(query)
  184. return run_sparql(guarded)
  185. def tool_list_graphs(_input: Dict[str, Any]) -> Dict[str, Any]:
  186. query = """
  187. SELECT DISTINCT ?g WHERE {
  188. GRAPH ?g { ?s ?p ?o }
  189. }
  190. LIMIT 50
  191. """
  192. return run_sparql(query)
  193. def tool_search_label(input_data: Dict[str, Any]) -> Dict[str, Any]:
  194. term = input_data.get("term", "")
  195. sanitized = sanitize_term(term)
  196. limit = int(input_data.get("limit", 20))
  197. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  198. query = f"""
  199. SELECT ?s ?label WHERE {{
  200. ?s rdfs:label ?label .
  201. FILTER(CONTAINS(LCASE(?label), LCASE(\"{sanitized}\")))
  202. }}
  203. LIMIT {limit}
  204. """
  205. return run_sparql(query)
  206. def tool_get_entities_by_type(input_data: Dict[str, Any]) -> Dict[str, Any]:
  207. type_uri = input_data.get("type_uri")
  208. if not type_uri:
  209. raise ValueError("Missing 'type_uri' field")
  210. limit = int(input_data.get("limit", 50))
  211. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  212. query = f"""
  213. SELECT ?s WHERE {{
  214. ?s rdf:type <{type_uri}> .
  215. }}
  216. LIMIT {limit}
  217. """
  218. return run_sparql(query)
  219. def tool_get_predicates_for_subject(input_data: Dict[str, Any]) -> Dict[str, Any]:
  220. subject_uri = input_data.get("subject_uri")
  221. if not subject_uri:
  222. raise ValueError("Missing 'subject_uri' field")
  223. limit = int(input_data.get("limit", 50))
  224. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  225. query = f"""
  226. SELECT DISTINCT ?p WHERE {{
  227. <{subject_uri}> ?p ?o .
  228. }}
  229. LIMIT {limit}
  230. """
  231. return run_sparql(query)
  232. def tool_get_labels_for_subject(input_data: Dict[str, Any]) -> Dict[str, Any]:
  233. subject_uri = input_data.get("subject_uri")
  234. if not subject_uri:
  235. raise ValueError("Missing 'subject_uri' field")
  236. query = f"""
  237. SELECT ?label WHERE {{
  238. <{subject_uri}> rdfs:label ?label .
  239. }}
  240. LIMIT 20
  241. """
  242. return run_sparql(query)
  243. def tool_traverse_property(input_data: Dict[str, Any]) -> Dict[str, Any]:
  244. subject_uri = input_data.get("subject_uri")
  245. property_uri = input_data.get("property_uri")
  246. if not subject_uri or not property_uri:
  247. raise ValueError("Missing 'subject_uri' or 'property_uri'")
  248. direction = input_data.get("direction", "outgoing")
  249. limit = int(input_data.get("limit", 50))
  250. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  251. if direction not in {"outgoing", "incoming"}:
  252. raise ValueError("direction must be 'outgoing' or 'incoming'")
  253. if direction == "outgoing":
  254. triple = f"<{subject_uri}> <{property_uri}> ?neighbor ."
  255. else:
  256. triple = f"?neighbor <{property_uri}> <{subject_uri}> ."
  257. query = f"""
  258. SELECT ?neighbor ?label ?description WHERE {{
  259. {triple}
  260. OPTIONAL {{ ?neighbor rdfs:label ?label }}
  261. OPTIONAL {{ ?neighbor dc:description ?description }}
  262. }}
  263. LIMIT {limit}
  264. """
  265. return run_sparql(query)
  266. def tool_list_classes(input_data: Dict[str, Any]) -> Dict[str, Any]:
  267. """List ontology classes (rdfs:Class and owl:Class) with optional term filtering."""
  268. term = sanitize_term(input_data.get("term", ""))
  269. limit = int(input_data.get("limit", 50))
  270. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  271. term_filter = ""
  272. if term:
  273. term_filter = f"""
  274. FILTER(
  275. CONTAINS(LCASE(COALESCE(STR(?label), STR(?class))), LCASE(\"{term}\")) ||
  276. CONTAINS(LCASE(COALESCE(STR(?comment), \"\")), LCASE(\"{term}\"))
  277. )
  278. """
  279. query = f"""
  280. SELECT DISTINCT ?class ?label ?comment WHERE {{
  281. {{ ?class rdf:type rdfs:Class . }}
  282. UNION
  283. {{ ?class rdf:type <http://www.w3.org/2002/07/owl#Class> . }}
  284. OPTIONAL {{ ?class rdfs:label ?label }}
  285. OPTIONAL {{ ?class rdfs:comment ?comment }}
  286. {term_filter}
  287. }}
  288. LIMIT {limit}
  289. """
  290. return run_sparql(query)
  291. def tool_list_properties(input_data: Dict[str, Any]) -> Dict[str, Any]:
  292. """List ontology properties with optional term/domain/range filtering."""
  293. term = sanitize_term(input_data.get("term", ""))
  294. domain_uri = input_data.get("domain_uri")
  295. range_uri = input_data.get("range_uri")
  296. limit = int(input_data.get("limit", 100))
  297. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  298. filters = []
  299. if term:
  300. filters.append(
  301. f"""
  302. FILTER(
  303. CONTAINS(LCASE(COALESCE(STR(?label), STR(?property))), LCASE(\"{term}\")) ||
  304. CONTAINS(LCASE(COALESCE(STR(?comment), \"\")), LCASE(\"{term}\"))
  305. )
  306. """
  307. )
  308. if domain_uri:
  309. filters.append(f"FILTER(?domain = <{domain_uri}>)")
  310. if range_uri:
  311. filters.append(f"FILTER(?range = <{range_uri}>)")
  312. query = f"""
  313. SELECT DISTINCT ?property ?label ?comment ?domain ?range WHERE {{
  314. {{ ?property rdf:type rdf:Property . }}
  315. UNION
  316. {{ ?property rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> . }}
  317. UNION
  318. {{ ?property rdf:type <http://www.w3.org/2002/07/owl#DatatypeProperty> . }}
  319. OPTIONAL {{ ?property rdfs:label ?label }}
  320. OPTIONAL {{ ?property rdfs:comment ?comment }}
  321. OPTIONAL {{ ?property rdfs:domain ?domain }}
  322. OPTIONAL {{ ?property rdfs:range ?range }}
  323. {' '.join(filters)}
  324. }}
  325. LIMIT {limit}
  326. """
  327. return run_sparql(query)
  328. def tool_describe_class(input_data: Dict[str, Any]) -> Dict[str, Any]:
  329. """Describe a class and include properties that declare it as rdfs:domain."""
  330. class_uri = input_data.get("class_uri")
  331. if not class_uri:
  332. raise ValueError("Missing 'class_uri' field")
  333. query = f"""
  334. SELECT ?label ?comment ?property ?propertyLabel ?propertyComment ?range WHERE {{
  335. OPTIONAL {{ <{class_uri}> rdfs:label ?label }}
  336. OPTIONAL {{ <{class_uri}> rdfs:comment ?comment }}
  337. OPTIONAL {{
  338. ?property rdfs:domain <{class_uri}> .
  339. OPTIONAL {{ ?property rdfs:label ?propertyLabel }}
  340. OPTIONAL {{ ?property rdfs:comment ?propertyComment }}
  341. OPTIONAL {{ ?property rdfs:range ?range }}
  342. }}
  343. }}
  344. LIMIT {SPARQL_MAX_LIMIT}
  345. """
  346. return run_sparql(query)
  347. def tool_describe_property(input_data: Dict[str, Any]) -> Dict[str, Any]:
  348. """Describe a property and include usage examples from the graph."""
  349. property_uri = input_data.get("property_uri")
  350. if not property_uri:
  351. raise ValueError("Missing 'property_uri' field")
  352. usage_limit = int(input_data.get("usage_limit", 10))
  353. usage_limit = min(max(usage_limit, 1), SPARQL_MAX_LIMIT)
  354. metadata_query = f"""
  355. SELECT ?label ?comment ?domain ?range ?type WHERE {{
  356. OPTIONAL {{ <{property_uri}> rdfs:label ?label }}
  357. OPTIONAL {{ <{property_uri}> rdfs:comment ?comment }}
  358. OPTIONAL {{ <{property_uri}> rdfs:domain ?domain }}
  359. OPTIONAL {{ <{property_uri}> rdfs:range ?range }}
  360. OPTIONAL {{ <{property_uri}> rdf:type ?type }}
  361. }}
  362. LIMIT {SPARQL_MAX_LIMIT}
  363. """
  364. usage_query = f"""
  365. SELECT ?subject ?subjectLabel ?object ?objectLabel WHERE {{
  366. ?subject <{property_uri}> ?object .
  367. OPTIONAL {{ ?subject rdfs:label ?subjectLabel }}
  368. OPTIONAL {{ ?object rdfs:label ?objectLabel }}
  369. }}
  370. LIMIT {usage_limit}
  371. """
  372. return {
  373. "metadata": run_sparql(metadata_query),
  374. "usage": run_sparql(usage_query),
  375. }
  376. def tool_describe_subject(input_data: Dict[str, Any]) -> Dict[str, Any]:
  377. subject_uri = input_data.get("subject_uri")
  378. if not subject_uri:
  379. raise ValueError("Missing 'subject_uri' field")
  380. limit = int(input_data.get("limit", 50))
  381. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  382. query = f"""
  383. SELECT ?predicate ?object ?objectLabel WHERE {{
  384. <{subject_uri}> ?predicate ?object .
  385. OPTIONAL {{ ?object rdfs:label ?objectLabel }}
  386. }}
  387. LIMIT {limit}
  388. """
  389. return run_sparql(query)
  390. def tool_path_traverse(input_data: Dict[str, Any]) -> Dict[str, Any]:
  391. subject_uri = input_data.get("subject_uri")
  392. property_path = input_data.get("property_path") or input_data.get("properties")
  393. if not subject_uri or not property_path:
  394. raise ValueError("Missing 'subject_uri' or 'property_path'")
  395. if isinstance(property_path, str):
  396. property_path = [p.strip() for p in property_path.split(",") if p.strip()]
  397. if not isinstance(property_path, list) or not property_path:
  398. raise ValueError("'property_path' must be a non-empty list of property URIs")
  399. direction = input_data.get("direction", "outgoing")
  400. limit = int(input_data.get("limit", 50))
  401. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  402. statements = []
  403. optional_lines = []
  404. select_terms = []
  405. prev_subject = f"<{subject_uri}>"
  406. for idx, prop_uri in enumerate(property_path, start=1):
  407. step_var = f"?n{idx}"
  408. if direction == "outgoing":
  409. statements.append(f"{prev_subject} <{prop_uri}> {step_var} .")
  410. else:
  411. statements.append(f"{step_var} <{prop_uri}> {prev_subject} .")
  412. select_terms.append(step_var)
  413. optional_lines.append(f"OPTIONAL {{ {step_var} rdfs:label {step_var}Label }}")
  414. optional_lines.append(f"OPTIONAL {{ {step_var} dc:description {step_var}Description }}")
  415. prev_subject = step_var
  416. select_clause = " ".join(select_terms)
  417. query = f"""
  418. SELECT {select_clause} WHERE {{
  419. {'\n '.join(statements)}
  420. {'\n '.join(optional_lines)}
  421. }}
  422. LIMIT {limit}
  423. """
  424. return {
  425. "property_path": property_path,
  426. "direction": direction,
  427. "result": run_sparql(query),
  428. }
  429. def tool_property_usage_statistics(input_data: Dict[str, Any]) -> Dict[str, Any]:
  430. property_uri = input_data.get("property_uri")
  431. if not property_uri:
  432. raise ValueError("Missing 'property_uri' field")
  433. examples_limit = int(input_data.get("examples_limit", 5))
  434. examples_limit = min(max(examples_limit, 1), SPARQL_MAX_LIMIT)
  435. count_query = f"""
  436. SELECT (COUNT(DISTINCT ?subject) AS ?usageCount) WHERE {{
  437. ?subject <{property_uri}> ?object .
  438. }}
  439. LIMIT {SPARQL_MAX_LIMIT}
  440. """
  441. usage_query = f"""
  442. SELECT ?subject ?subjectLabel ?object ?objectLabel WHERE {{
  443. ?subject <{property_uri}> ?object .
  444. OPTIONAL {{ ?subject rdfs:label ?subjectLabel }}
  445. OPTIONAL {{ ?object rdfs:label ?objectLabel }}
  446. }}
  447. LIMIT {examples_limit}
  448. """
  449. return {
  450. "count": run_sparql(count_query),
  451. "examples": run_sparql(usage_query),
  452. }
  453. def tool_batch_insert(input_data: Dict[str, Any]) -> Dict[str, Any]:
  454. ttl_text = input_data.get("ttl")
  455. triples = input_data.get("triples")
  456. graph = input_data.get("graph") or GRAPH_URI
  457. if not ttl_text and not triples:
  458. raise ValueError("Provide either 'ttl' text or 'triples' list")
  459. def _format_object(obj_value: Any, obj_type: str, datatype: Optional[str], lang: Optional[str]) -> str:
  460. if obj_type == "uri":
  461. return f"<{obj_value}>"
  462. if obj_type == "literal":
  463. return f'"{escape_sparql_string(obj_value)}"'
  464. if obj_type == "typed_literal":
  465. if not datatype:
  466. raise ValueError("Missing datatype for typed_literal")
  467. return f'"{escape_sparql_string(obj_value)}"^^<{datatype}>'
  468. if obj_type == "lang_literal":
  469. if not lang:
  470. raise ValueError("Missing lang for lang_literal")
  471. return f'"{escape_sparql_string(obj_value)}"@{lang}'
  472. raise ValueError(f"Unknown object_type: {obj_type}")
  473. if ttl_text:
  474. query = ttl_to_sparql_insert(ttl_text, graph)
  475. else:
  476. lines = []
  477. for triple in triples:
  478. subj = triple.get("subject")
  479. pred = triple.get("predicate")
  480. obj_value = triple.get("object")
  481. if not subj or not pred or obj_value is None:
  482. raise ValueError("Each triple must provide subject, predicate, and object")
  483. obj_type = triple.get("object_type", "uri")
  484. datatype = triple.get("datatype")
  485. lang = triple.get("lang")
  486. obj_text = _format_object(obj_value, obj_type, datatype, lang)
  487. lines.append(f"<{subj}> <{pred}> {obj_text} .")
  488. ttl_bulk = "\n".join(lines)
  489. query = ttl_to_sparql_insert(ttl_bulk, graph)
  490. result = run_sparql_update(query)
  491. return {**result, "query": query}
  492. def tool_load_examples(input_data: Dict[str, Any]) -> Dict[str, Any]:
  493. if not ALLOW_EXAMPLE_LOAD:
  494. raise HTTPException(status_code=403, detail="Example loading is disabled")
  495. requested = input_data.get("files") or []
  496. if isinstance(requested, str):
  497. requested = [requested]
  498. graph = input_data.get("graph") or EXAMPLE_GRAPH
  499. files_to_load = []
  500. if requested:
  501. files_to_load = requested
  502. else:
  503. files_to_load = sorted(p.name for p in EXAMPLES_DIR.glob("*.ttl"))
  504. if not files_to_load:
  505. raise HTTPException(status_code=404, detail="No example files available")
  506. results = []
  507. for filename in files_to_load:
  508. file_path = (EXAMPLES_DIR / filename).resolve()
  509. if not file_path.exists():
  510. raise HTTPException(status_code=400, detail=f"Missing example file: {filename}")
  511. if EXAMPLES_DIR not in file_path.parents and file_path != EXAMPLES_DIR:
  512. raise HTTPException(status_code=400, detail="Invalid example file path")
  513. ttl_text = file_path.read_text(encoding="utf-8")
  514. update_query = ttl_to_sparql_insert(ttl_text, graph)
  515. run_sparql_update(update_query)
  516. results.append({"file": filename, "graph": graph})
  517. return {"loaded": results}
  518. def tool_insert_triple(input_data: Dict[str, Any]) -> Dict[str, Any]:
  519. subject = input_data.get("subject")
  520. predicate = input_data.get("predicate")
  521. obj = input_data.get("object")
  522. obj_type = input_data.get("object_type", "uri")
  523. graph = input_data.get("graph")
  524. if not subject or not predicate or obj is None:
  525. raise ValueError("Missing 'subject', 'predicate', or 'object' field")
  526. if obj_type == "uri":
  527. obj_value = f"<{obj}>"
  528. elif obj_type == "literal":
  529. obj_value = f"\"{escape_sparql_string(obj)}\""
  530. elif obj_type == "typed_literal":
  531. datatype = input_data.get("datatype")
  532. if not datatype:
  533. raise ValueError("Missing 'datatype' for typed_literal")
  534. obj_value = f"\"{escape_sparql_string(obj)}\"^^<{datatype}>"
  535. elif obj_type == "lang_literal":
  536. lang = input_data.get("lang")
  537. if not lang:
  538. raise ValueError("Missing 'lang' for lang_literal")
  539. obj_value = f"\"{escape_sparql_string(obj)}\"@{lang}"
  540. else:
  541. raise ValueError("Unknown object_type")
  542. ttl = f"<{subject}> <{predicate}> {obj_value} ."
  543. update_query = ttl_to_sparql_insert(ttl, graph)
  544. try:
  545. result = run_sparql_update(update_query)
  546. except HTTPException as exc:
  547. detail = f"{exc.detail}\n\nSPARQL:\n{update_query}"
  548. raise HTTPException(status_code=exc.status_code, detail=detail)
  549. return {**result, "query": update_query}
  550. # --- TOOL REGISTRY ---
  551. TOOLS = {
  552. "sparql_query": tool_sparql_query,
  553. "list_graphs": tool_list_graphs,
  554. "search_label": tool_search_label,
  555. "get_entities_by_type": tool_get_entities_by_type,
  556. "get_predicates_for_subject": tool_get_predicates_for_subject,
  557. "get_labels_for_subject": tool_get_labels_for_subject,
  558. "traverse_property": tool_traverse_property,
  559. "list_classes": tool_list_classes,
  560. "list_properties": tool_list_properties,
  561. "describe_class": tool_describe_class,
  562. "describe_property": tool_describe_property,
  563. "describe_subject": tool_describe_subject,
  564. "path_traverse": tool_path_traverse,
  565. "property_usage_statistics": tool_property_usage_statistics,
  566. "batch_insert": tool_batch_insert,
  567. "insert_triple": tool_insert_triple,
  568. "load_examples": tool_load_examples,
  569. }
  570. def load_domain_layers(tools: Dict[str, Callable[[Dict[str, Any]], Any]]) -> None:
  571. raw = os.getenv("DOMAIN_LAYERS", "garden_layer.plugin")
  572. modules = [item.strip() for item in raw.split(",") if item.strip()]
  573. if not modules:
  574. return
  575. for module_name in modules:
  576. module = None
  577. try:
  578. module = import_module(module_name)
  579. except ImportError as exc:
  580. base = module_name.split(".", 1)[0]
  581. if base != module_name:
  582. try:
  583. module = import_module(base)
  584. logger.info("Falling back to base module '%s' for domain layer '%s'", base, module_name)
  585. except ImportError:
  586. logger.warning(
  587. "Domain layer '%s' could not be imported and base module '%s' is missing: %s",
  588. module_name,
  589. base,
  590. exc,
  591. )
  592. else:
  593. logger.warning("Domain layer '%s' could not be imported: %s", module_name, exc)
  594. if module is None:
  595. continue
  596. register = getattr(module, "register_layer", None)
  597. if not callable(register):
  598. logger.warning("Domain layer '%s' does not expose register_layer", module_name)
  599. continue
  600. try:
  601. register(tools)
  602. logger.info("Loaded domain layer '%s'", module_name)
  603. except Exception as exc:
  604. logger.exception("Domain layer '%s' failed to register: %s", module_name, exc)
  605. load_domain_layers(TOOLS)
  606. TOOL_DOCS = {
  607. "sparql_query": "Execute a bounded SELECT query and return the JSON result.",
  608. "list_graphs": "List up to 50 active graph URIs.",
  609. "search_label": "Search rdfs:label values that contain a term (case-insensitive).",
  610. "get_entities_by_type": "List subjects of a given rdf:type.",
  611. "get_predicates_for_subject": "List distinct predicates used by a subject.",
  612. "get_labels_for_subject": "Fetch rdfs:label values for a subject.",
  613. "traverse_property": "Traverse a property (incoming or outgoing) for a subject and return labels/descriptions.",
  614. "list_classes": "List ontology classes with optional label/comment term filtering.",
  615. "list_properties": "List ontology properties with optional term/domain/range filters.",
  616. "describe_class": "Describe a class and list properties that use it as rdfs:domain.",
  617. "describe_property": "Describe a property (label/comment/domain/range/type) and sample usage.",
  618. "describe_subject": "Return subject predicates/objects (with labels) to inspect an individual node.",
  619. "path_traverse": "Follow a property path (list of predicates) from a subject, returning each step's nodes.",
  620. "property_usage_statistics": "Count how often a property is used and sample subjects/objects.",
  621. "batch_insert": "Insert multiple triples or TTL at once with a single guarded update.",
  622. "insert_triple": "Insert a single triple (useful for debugging updates).",
  623. "load_examples": "Load Turtle fixtures from the `examples/` directory when MCP_ALLOW_EXAMPLE_LOAD=true.",
  624. }
  625. # --- MCP ENDPOINT ---
  626. @app.post("/mcp")
  627. def handle_mcp(tool_request: ToolRequest, http_request: Request):
  628. tool_name = tool_request.tool
  629. input_data = tool_request.input or {}
  630. client_host = http_request.client.host if http_request.client else "unknown"
  631. trimmed_input = json.dumps(input_data, ensure_ascii=False, default=str)
  632. if len(trimmed_input) > 1024:
  633. trimmed_input = f"{trimmed_input[:1024]}…"
  634. timestamp = datetime.now(timezone.utc).isoformat()
  635. tool_logger.info(
  636. "tool=%s client=%s time=%s input=%s",
  637. tool_name,
  638. client_host,
  639. timestamp,
  640. trimmed_input,
  641. )
  642. if tool_name not in TOOLS:
  643. raise HTTPException(status_code=400, detail=f"Unknown tool: {tool_name}")
  644. try:
  645. result = TOOLS[tool_name](input_data)
  646. return {
  647. "status": "ok",
  648. "tool": tool_name,
  649. "description": TOOL_DOCS.get(tool_name, ""),
  650. "result": result,
  651. }
  652. except Exception as exc:
  653. logger.error("Tool %s failed: %s", tool_name, exc)
  654. raise HTTPException(status_code=500, detail=str(exc))
  655. # --- HEALTH CHECK ---
  656. @app.get("/")
  657. def root():
  658. return {
  659. "status": "MCP server running",
  660. "tools": list(TOOLS.keys()),
  661. "virtuoso": VIRTUOSO_ENDPOINT,
  662. "guardrails": {
  663. "default_limit": SPARQL_DEFAULT_LIMIT,
  664. "max_limit": SPARQL_MAX_LIMIT,
  665. "allow_example_load": ALLOW_EXAMPLE_LOAD,
  666. "turtle_examples": True,
  667. },
  668. }
  669. @app.get("/health")
  670. def health():
  671. return root()