virtuoso_mcp.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752
  1. import json
  2. import logging
  3. import os
  4. import re
  5. from datetime import datetime, timezone
  6. from importlib import import_module
  7. from pathlib import Path
  8. from typing import Any, Callable, Dict, List, Optional
  9. import requests
  10. from requests.auth import HTTPDigestAuth
  11. from fastapi import FastAPI, HTTPException, Request
  12. from pydantic import BaseModel
  13. LOG_LEVEL = os.getenv("MCP_LOG_LEVEL", "INFO").upper()
  14. logging.basicConfig(level=getattr(logging, LOG_LEVEL, logging.INFO))
  15. logger = logging.getLogger("virtuoso_mcp")
  16. app = FastAPI(title="MCP Server")
  17. # --- CONFIG ---
  18. VIRTUOSO_ENDPOINT = os.getenv("VIRTUOSO_ENDPOINT") or os.getenv(
  19. "VIRTUOSO_SPARQL", "http://localhost:8891/sparql"
  20. )
  21. VIRTUOSO_USER = os.getenv("VIRTUOSO_USER")
  22. VIRTUOSO_PASS = os.getenv("VIRTUOSO_PASS")
  23. SPARQL_TIMEOUT = float(os.getenv("SPARQL_TIMEOUT", 10.0))
  24. SPARQL_UPDATE_TIMEOUT = float(os.getenv("SPARQL_UPDATE_TIMEOUT", 15.0))
  25. SPARQL_DEFAULT_LIMIT = int(os.getenv("SPARQL_DEFAULT_LIMIT", 100))
  26. SPARQL_MAX_LIMIT = int(os.getenv("SPARQL_MAX_LIMIT", 500))
  27. GRAPH_URI = os.getenv("GRAPH_URI", "http://world.eu.org/example1")
  28. IN_CYCLE = "http://world.eu.org/cannabis-breeding#inCycle"
  29. CLONE_OF = "http://world.eu.org/cannabis-breeding#cloneOf"
  30. EXAMPLES_DIR = Path(__file__).resolve().parent / "examples"
  31. EXAMPLE_GRAPH = os.getenv(
  32. "EXAMPLE_GRAPH", "http://world.eu.org/cannabis-breeding#test"
  33. )
  34. ALLOW_EXAMPLE_LOAD = os.getenv("MCP_ALLOW_EXAMPLE_LOAD", "false").lower() == "true"
  35. SESSION = requests.Session()
  36. LOGS_DIR = Path(__file__).resolve().parent / "logs"
  37. LOGS_DIR.mkdir(parents=True, exist_ok=True)
  38. tool_logger = logging.getLogger("virtuoso_mcp.tools")
  39. tool_handler = logging.FileHandler(LOGS_DIR / "tool_usage.log")
  40. tool_handler.setFormatter(logging.Formatter("%(asctime)s %(message)s"))
  41. tool_logger.addHandler(tool_handler)
  42. tool_logger.setLevel(logging.INFO)
  43. tool_logger.propagate = False
  44. PREFIXES = f"""
  45. PREFIX : <{GRAPH_URI}>
  46. PREFIX cb: <http://world.eu.org/cannabis-breeding#>
  47. PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
  48. PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
  49. PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
  50. PREFIX dc: <http://purl.org/dc/elements/1.1/>
  51. """.strip()
  52. # --- MODELS ---
  53. class SparqlQueryRequest(BaseModel):
  54. query: str
  55. class ToolRequest(BaseModel):
  56. tool: str
  57. input: Dict[str, Any] = {}
  58. # --- CORE SPARQL FUNCTION ---
  59. def _build_auth() -> Optional[HTTPDigestAuth]:
  60. if VIRTUOSO_USER and VIRTUOSO_PASS:
  61. return HTTPDigestAuth(VIRTUOSO_USER, VIRTUOSO_PASS)
  62. return None
  63. def _with_prefixes(query: str) -> str:
  64. if re.search(r"^\s*prefix\b", query, re.IGNORECASE):
  65. return query
  66. return f"{PREFIXES}\n{query}"
  67. def run_sparql(query: str) -> Dict[str, Any]:
  68. """Execute a SPARQL query against Virtuoso and return the JSON payload."""
  69. logger.debug("Sending SPARQL query: %s", query)
  70. try:
  71. response = SESSION.post(
  72. VIRTUOSO_ENDPOINT,
  73. data=_with_prefixes(query).encode("utf-8"),
  74. headers={
  75. "Accept": "application/sparql-results+json",
  76. "Content-Type": "application/sparql-query",
  77. },
  78. timeout=SPARQL_TIMEOUT,
  79. auth=_build_auth(),
  80. )
  81. if not response.ok:
  82. logger.warning("SPARQL request failed: %s", response.status_code)
  83. response.raise_for_status()
  84. return response.json()
  85. except Exception as exc: # pragma: no cover - propagate for FastAPI
  86. logger.warning("SPARQL request failed: %s", exc)
  87. raise HTTPException(status_code=500, detail=str(exc))
  88. def run_sparql_update(query: str) -> Dict[str, Any]:
  89. """Execute a SPARQL UPDATE (INSERT/DELETE) against Virtuoso."""
  90. logger.debug("Sending SPARQL update: %s", query)
  91. try:
  92. response = SESSION.post(
  93. VIRTUOSO_ENDPOINT,
  94. data=_with_prefixes(query).encode("utf-8"),
  95. headers={"Content-Type": "application/sparql-update"},
  96. timeout=SPARQL_UPDATE_TIMEOUT,
  97. auth=_build_auth(),
  98. )
  99. if not response.ok:
  100. detail = (response.text or "").strip()
  101. logger.warning("SPARQL update failed: %s", response.status_code)
  102. raise HTTPException(
  103. status_code=500,
  104. detail=detail or f"SPARQL update failed with {response.status_code}",
  105. )
  106. return {"status": "ok"}
  107. except HTTPException:
  108. raise
  109. except Exception as exc: # pragma: no cover - propagate for FastAPI
  110. logger.warning("SPARQL update failed: %s", exc)
  111. raise HTTPException(status_code=500, detail=str(exc))
  112. # --- TOOL HELPERS ---
  113. def escape_sparql_string(value: str) -> str:
  114. """Escape a string for SPARQL literal usage."""
  115. if value is None:
  116. return ""
  117. return (
  118. str(value)
  119. .replace("\\", "\\\\")
  120. .replace('"', "\\\"")
  121. .replace("\n", "\\n")
  122. .replace("\r", "")
  123. )
  124. def sanitize_term(term: str) -> str:
  125. """Escape quotes inside label searches so we can safely interpolate strings."""
  126. return escape_sparql_string(term)
  127. def _extract_limit(query: str) -> Optional[int]:
  128. match = re.search(r"\blimit\s+(\d+)\b", query, re.IGNORECASE)
  129. if not match:
  130. return None
  131. try:
  132. return int(match.group(1))
  133. except ValueError:
  134. return None
  135. def _apply_limit(query: str, default_limit: int, max_limit: int) -> str:
  136. limit = _extract_limit(query)
  137. if limit is None:
  138. return f"{query.strip()}\nLIMIT {default_limit}"
  139. if limit > max_limit:
  140. return re.sub(
  141. r"\blimit\s+\d+\b",
  142. f"LIMIT {max_limit}",
  143. query,
  144. flags=re.IGNORECASE,
  145. )
  146. return query
  147. def guard_select_query(query: str) -> str:
  148. """Enforce that raw queries are read-only and bounded by LIMIT."""
  149. lowered = query.lower()
  150. if re.search(r"\b(insert|delete|load|clear|drop|create|move|copy|add)\b", lowered):
  151. raise HTTPException(status_code=400, detail="SPARQL update operations are not allowed")
  152. if "select" not in lowered:
  153. raise HTTPException(status_code=400, detail="Only SELECT queries are allowed")
  154. return _apply_limit(query, SPARQL_DEFAULT_LIMIT, SPARQL_MAX_LIMIT)
  155. def ttl_to_sparql_insert(ttl_text: str, graph: Optional[str]) -> str:
  156. prefix_lines: List[str] = []
  157. body_lines: List[str] = []
  158. for raw_line in ttl_text.splitlines():
  159. line = raw_line.strip()
  160. if not line:
  161. continue
  162. prefix_match = re.match(r"@prefix\s+([\w-]+):\s*<([^>]+)>\s*\.", line)
  163. if prefix_match:
  164. prefix_lines.append(
  165. f"PREFIX {prefix_match.group(1)}: <{prefix_match.group(2)}>"
  166. )
  167. continue
  168. if line.startswith("@base"):
  169. # Skip @base entries for now; they are rare in our exports.
  170. continue
  171. body_lines.append(raw_line)
  172. if not body_lines:
  173. raise HTTPException(status_code=400, detail="No RDF triples found in input")
  174. prefixes = "\n".join(prefix_lines)
  175. body = "\n".join(body_lines)
  176. if graph:
  177. insert_body = f"GRAPH <{graph}> {{\n{body}\n}}"
  178. else:
  179. insert_body = body
  180. return f"{prefixes}\nINSERT DATA {{\n{insert_body}\n}}"
  181. # --- MCP TOOL IMPLEMENTATIONS ---
  182. def tool_sparql_query(input_data: Dict[str, Any]) -> Dict[str, Any]:
  183. query = input_data.get("query")
  184. if not query:
  185. raise ValueError("Missing 'query' field")
  186. guarded = guard_select_query(query)
  187. return run_sparql(guarded)
  188. def tool_list_graphs(_input: Dict[str, Any]) -> Dict[str, Any]:
  189. query = """
  190. SELECT DISTINCT ?g WHERE {
  191. GRAPH ?g { ?s ?p ?o }
  192. }
  193. LIMIT 50
  194. """
  195. return run_sparql(query)
  196. def tool_search_label(input_data: Dict[str, Any]) -> Dict[str, Any]:
  197. term = input_data.get("term", "")
  198. sanitized = sanitize_term(term)
  199. limit = int(input_data.get("limit", 20))
  200. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  201. query = f"""
  202. SELECT ?s ?label WHERE {{
  203. ?s rdfs:label ?label .
  204. FILTER(CONTAINS(LCASE(?label), LCASE(\"{sanitized}\")))
  205. }}
  206. LIMIT {limit}
  207. """
  208. return run_sparql(query)
  209. def tool_get_entities_by_type(input_data: Dict[str, Any]) -> Dict[str, Any]:
  210. type_uri = input_data.get("type_uri")
  211. if not type_uri:
  212. raise ValueError("Missing 'type_uri' field")
  213. limit = int(input_data.get("limit", 50))
  214. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  215. query = f"""
  216. SELECT ?s WHERE {{
  217. ?s rdf:type <{type_uri}> .
  218. }}
  219. LIMIT {limit}
  220. """
  221. return run_sparql(query)
  222. def tool_get_predicates_for_subject(input_data: Dict[str, Any]) -> Dict[str, Any]:
  223. subject_uri = input_data.get("subject_uri")
  224. if not subject_uri:
  225. raise ValueError("Missing 'subject_uri' field")
  226. limit = int(input_data.get("limit", 50))
  227. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  228. query = f"""
  229. SELECT DISTINCT ?p WHERE {{
  230. <{subject_uri}> ?p ?o .
  231. }}
  232. LIMIT {limit}
  233. """
  234. return run_sparql(query)
  235. def tool_get_labels_for_subject(input_data: Dict[str, Any]) -> Dict[str, Any]:
  236. subject_uri = input_data.get("subject_uri")
  237. if not subject_uri:
  238. raise ValueError("Missing 'subject_uri' field")
  239. query = f"""
  240. SELECT ?label WHERE {{
  241. <{subject_uri}> rdfs:label ?label .
  242. }}
  243. LIMIT 20
  244. """
  245. return run_sparql(query)
  246. def tool_traverse_property(input_data: Dict[str, Any]) -> Dict[str, Any]:
  247. subject_uri = input_data.get("subject_uri")
  248. property_uri = input_data.get("property_uri")
  249. if not subject_uri or not property_uri:
  250. raise ValueError("Missing 'subject_uri' or 'property_uri'")
  251. direction = input_data.get("direction", "outgoing")
  252. limit = int(input_data.get("limit", 50))
  253. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  254. if direction not in {"outgoing", "incoming"}:
  255. raise ValueError("direction must be 'outgoing' or 'incoming'")
  256. if direction == "outgoing":
  257. triple = f"<{subject_uri}> <{property_uri}> ?neighbor ."
  258. else:
  259. triple = f"?neighbor <{property_uri}> <{subject_uri}> ."
  260. query = f"""
  261. SELECT ?neighbor ?label ?description WHERE {{
  262. {triple}
  263. OPTIONAL {{ ?neighbor rdfs:label ?label }}
  264. OPTIONAL {{ ?neighbor dc:description ?description }}
  265. }}
  266. LIMIT {limit}
  267. """
  268. return run_sparql(query)
  269. def tool_list_classes(input_data: Dict[str, Any]) -> Dict[str, Any]:
  270. """List ontology classes (rdfs:Class and owl:Class) with optional term filtering."""
  271. term = sanitize_term(input_data.get("term", ""))
  272. limit = int(input_data.get("limit", 50))
  273. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  274. term_filter = ""
  275. if term:
  276. term_filter = f"""
  277. FILTER(
  278. CONTAINS(LCASE(COALESCE(STR(?label), STR(?class))), LCASE(\"{term}\")) ||
  279. CONTAINS(LCASE(COALESCE(STR(?comment), \"\")), LCASE(\"{term}\"))
  280. )
  281. """
  282. query = f"""
  283. SELECT DISTINCT ?class ?label ?comment WHERE {{
  284. {{ ?class rdf:type rdfs:Class . }}
  285. UNION
  286. {{ ?class rdf:type <http://www.w3.org/2002/07/owl#Class> . }}
  287. OPTIONAL {{ ?class rdfs:label ?label }}
  288. OPTIONAL {{ ?class rdfs:comment ?comment }}
  289. {term_filter}
  290. }}
  291. LIMIT {limit}
  292. """
  293. return run_sparql(query)
  294. def tool_list_properties(input_data: Dict[str, Any]) -> Dict[str, Any]:
  295. """List ontology properties with optional term/domain/range filtering."""
  296. term = sanitize_term(input_data.get("term", ""))
  297. domain_uri = input_data.get("domain_uri")
  298. range_uri = input_data.get("range_uri")
  299. limit = int(input_data.get("limit", 100))
  300. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  301. filters = []
  302. if term:
  303. filters.append(
  304. f"""
  305. FILTER(
  306. CONTAINS(LCASE(COALESCE(STR(?label), STR(?property))), LCASE(\"{term}\")) ||
  307. CONTAINS(LCASE(COALESCE(STR(?comment), \"\")), LCASE(\"{term}\"))
  308. )
  309. """
  310. )
  311. if domain_uri:
  312. filters.append(f"FILTER(?domain = <{domain_uri}>)")
  313. if range_uri:
  314. filters.append(f"FILTER(?range = <{range_uri}>)")
  315. query = f"""
  316. SELECT DISTINCT ?property ?label ?comment ?domain ?range WHERE {{
  317. {{ ?property rdf:type rdf:Property . }}
  318. UNION
  319. {{ ?property rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> . }}
  320. UNION
  321. {{ ?property rdf:type <http://www.w3.org/2002/07/owl#DatatypeProperty> . }}
  322. OPTIONAL {{ ?property rdfs:label ?label }}
  323. OPTIONAL {{ ?property rdfs:comment ?comment }}
  324. OPTIONAL {{ ?property rdfs:domain ?domain }}
  325. OPTIONAL {{ ?property rdfs:range ?range }}
  326. {' '.join(filters)}
  327. }}
  328. LIMIT {limit}
  329. """
  330. return run_sparql(query)
  331. def tool_describe_class(input_data: Dict[str, Any]) -> Dict[str, Any]:
  332. """Describe a class and include properties that declare it as rdfs:domain."""
  333. class_uri = input_data.get("class_uri")
  334. if not class_uri:
  335. raise ValueError("Missing 'class_uri' field")
  336. query = f"""
  337. SELECT ?label ?comment ?property ?propertyLabel ?propertyComment ?range WHERE {{
  338. OPTIONAL {{ <{class_uri}> rdfs:label ?label }}
  339. OPTIONAL {{ <{class_uri}> rdfs:comment ?comment }}
  340. OPTIONAL {{
  341. ?property rdfs:domain <{class_uri}> .
  342. OPTIONAL {{ ?property rdfs:label ?propertyLabel }}
  343. OPTIONAL {{ ?property rdfs:comment ?propertyComment }}
  344. OPTIONAL {{ ?property rdfs:range ?range }}
  345. }}
  346. }}
  347. LIMIT {SPARQL_MAX_LIMIT}
  348. """
  349. return run_sparql(query)
  350. def tool_describe_property(input_data: Dict[str, Any]) -> Dict[str, Any]:
  351. """Describe a property and include usage examples from the graph."""
  352. property_uri = input_data.get("property_uri")
  353. if not property_uri:
  354. raise ValueError("Missing 'property_uri' field")
  355. usage_limit = int(input_data.get("usage_limit", 10))
  356. usage_limit = min(max(usage_limit, 1), SPARQL_MAX_LIMIT)
  357. metadata_query = f"""
  358. SELECT ?label ?comment ?domain ?range ?type WHERE {{
  359. OPTIONAL {{ <{property_uri}> rdfs:label ?label }}
  360. OPTIONAL {{ <{property_uri}> rdfs:comment ?comment }}
  361. OPTIONAL {{ <{property_uri}> rdfs:domain ?domain }}
  362. OPTIONAL {{ <{property_uri}> rdfs:range ?range }}
  363. OPTIONAL {{ <{property_uri}> rdf:type ?type }}
  364. }}
  365. LIMIT {SPARQL_MAX_LIMIT}
  366. """
  367. usage_query = f"""
  368. SELECT ?subject ?subjectLabel ?object ?objectLabel WHERE {{
  369. ?subject <{property_uri}> ?object .
  370. OPTIONAL {{ ?subject rdfs:label ?subjectLabel }}
  371. OPTIONAL {{ ?object rdfs:label ?objectLabel }}
  372. }}
  373. LIMIT {usage_limit}
  374. """
  375. return {
  376. "metadata": run_sparql(metadata_query),
  377. "usage": run_sparql(usage_query),
  378. }
  379. def tool_describe_subject(input_data: Dict[str, Any]) -> Dict[str, Any]:
  380. subject_uri = input_data.get("subject_uri")
  381. if not subject_uri:
  382. raise ValueError("Missing 'subject_uri' field")
  383. limit = int(input_data.get("limit", 50))
  384. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  385. query = f"""
  386. SELECT ?predicate ?object ?objectLabel WHERE {{
  387. <{subject_uri}> ?predicate ?object .
  388. OPTIONAL {{ ?object rdfs:label ?objectLabel }}
  389. }}
  390. LIMIT {limit}
  391. """
  392. return run_sparql(query)
  393. def tool_path_traverse(input_data: Dict[str, Any]) -> Dict[str, Any]:
  394. subject_uri = input_data.get("subject_uri")
  395. property_path = input_data.get("property_path") or input_data.get("properties")
  396. if not subject_uri or not property_path:
  397. raise ValueError("Missing 'subject_uri' or 'property_path'")
  398. if isinstance(property_path, str):
  399. property_path = [p.strip() for p in property_path.split(",") if p.strip()]
  400. if not isinstance(property_path, list) or not property_path:
  401. raise ValueError("'property_path' must be a non-empty list of property URIs")
  402. direction = input_data.get("direction", "outgoing")
  403. limit = int(input_data.get("limit", 50))
  404. limit = min(max(limit, 1), SPARQL_MAX_LIMIT)
  405. statements = []
  406. optional_lines = []
  407. select_terms = []
  408. prev_subject = f"<{subject_uri}>"
  409. for idx, prop_uri in enumerate(property_path, start=1):
  410. step_var = f"?n{idx}"
  411. if direction == "outgoing":
  412. statements.append(f"{prev_subject} <{prop_uri}> {step_var} .")
  413. else:
  414. statements.append(f"{step_var} <{prop_uri}> {prev_subject} .")
  415. select_terms.append(step_var)
  416. optional_lines.append(f"OPTIONAL {{ {step_var} rdfs:label {step_var}Label }}")
  417. optional_lines.append(f"OPTIONAL {{ {step_var} dc:description {step_var}Description }}")
  418. prev_subject = step_var
  419. select_clause = " ".join(select_terms)
  420. query = f"""
  421. SELECT {select_clause} WHERE {{
  422. {'\n '.join(statements)}
  423. {'\n '.join(optional_lines)}
  424. }}
  425. LIMIT {limit}
  426. """
  427. return {
  428. "property_path": property_path,
  429. "direction": direction,
  430. "result": run_sparql(query),
  431. }
  432. def tool_property_usage_statistics(input_data: Dict[str, Any]) -> Dict[str, Any]:
  433. property_uri = input_data.get("property_uri")
  434. if not property_uri:
  435. raise ValueError("Missing 'property_uri' field")
  436. examples_limit = int(input_data.get("examples_limit", 5))
  437. examples_limit = min(max(examples_limit, 1), SPARQL_MAX_LIMIT)
  438. count_query = f"""
  439. SELECT (COUNT(DISTINCT ?subject) AS ?usageCount) WHERE {{
  440. ?subject <{property_uri}> ?object .
  441. }}
  442. LIMIT {SPARQL_MAX_LIMIT}
  443. """
  444. usage_query = f"""
  445. SELECT ?subject ?subjectLabel ?object ?objectLabel WHERE {{
  446. ?subject <{property_uri}> ?object .
  447. OPTIONAL {{ ?subject rdfs:label ?subjectLabel }}
  448. OPTIONAL {{ ?object rdfs:label ?objectLabel }}
  449. }}
  450. LIMIT {examples_limit}
  451. """
  452. return {
  453. "count": run_sparql(count_query),
  454. "examples": run_sparql(usage_query),
  455. }
  456. def tool_batch_insert(input_data: Dict[str, Any]) -> Dict[str, Any]:
  457. ttl_text = input_data.get("ttl")
  458. triples = input_data.get("triples")
  459. graph = input_data.get("graph") or GRAPH_URI
  460. if not ttl_text and not triples:
  461. raise ValueError("Provide either 'ttl' text or 'triples' list")
  462. def _format_object(obj_value: Any, obj_type: str, datatype: Optional[str], lang: Optional[str]) -> str:
  463. if obj_type == "uri":
  464. return f"<{obj_value}>"
  465. if obj_type == "literal":
  466. return f'"{escape_sparql_string(obj_value)}"'
  467. if obj_type == "typed_literal":
  468. if not datatype:
  469. raise ValueError("Missing datatype for typed_literal")
  470. return f'"{escape_sparql_string(obj_value)}"^^<{datatype}>'
  471. if obj_type == "lang_literal":
  472. if not lang:
  473. raise ValueError("Missing lang for lang_literal")
  474. return f'"{escape_sparql_string(obj_value)}"@{lang}'
  475. raise ValueError(f"Unknown object_type: {obj_type}")
  476. if ttl_text:
  477. query = ttl_to_sparql_insert(ttl_text, graph)
  478. else:
  479. lines = []
  480. for triple in triples:
  481. subj = triple.get("subject")
  482. pred = triple.get("predicate")
  483. obj_value = triple.get("object")
  484. if not subj or not pred or obj_value is None:
  485. raise ValueError("Each triple must provide subject, predicate, and object")
  486. obj_type = triple.get("object_type", "uri")
  487. datatype = triple.get("datatype")
  488. lang = triple.get("lang")
  489. obj_text = _format_object(obj_value, obj_type, datatype, lang)
  490. lines.append(f"<{subj}> <{pred}> {obj_text} .")
  491. ttl_bulk = "\n".join(lines)
  492. query = ttl_to_sparql_insert(ttl_bulk, graph)
  493. result = run_sparql_update(query)
  494. return {**result, "query": query}
  495. def tool_insert_triple(input_data: Dict[str, Any]) -> Dict[str, Any]:
  496. subject = input_data.get("subject")
  497. predicate = input_data.get("predicate")
  498. obj = input_data.get("object")
  499. obj_type = input_data.get("object_type", "uri")
  500. graph = input_data.get("graph")
  501. if not subject or not predicate or obj is None:
  502. raise ValueError("Missing 'subject', 'predicate', or 'object' field")
  503. if obj_type == "uri":
  504. obj_value = f"<{obj}>"
  505. elif obj_type == "literal":
  506. obj_value = f"\"{escape_sparql_string(obj)}\""
  507. elif obj_type == "typed_literal":
  508. datatype = input_data.get("datatype")
  509. if not datatype:
  510. raise ValueError("Missing 'datatype' for typed_literal")
  511. obj_value = f"\"{escape_sparql_string(obj)}\"^^<{datatype}>"
  512. elif obj_type == "lang_literal":
  513. lang = input_data.get("lang")
  514. if not lang:
  515. raise ValueError("Missing 'lang' for lang_literal")
  516. obj_value = f"\"{escape_sparql_string(obj)}\"@{lang}"
  517. else:
  518. raise ValueError("Unknown object_type")
  519. ttl = f"<{subject}> <{predicate}> {obj_value} ."
  520. update_query = ttl_to_sparql_insert(ttl, graph)
  521. try:
  522. result = run_sparql_update(update_query)
  523. except HTTPException as exc:
  524. detail = f"{exc.detail}\n\nSPARQL:\n{update_query}"
  525. raise HTTPException(status_code=exc.status_code, detail=detail)
  526. return {**result, "query": update_query}
  527. # --- TOOL REGISTRY ---
  528. TOOLS = {
  529. "sparql_query": tool_sparql_query,
  530. "list_graphs": tool_list_graphs,
  531. "search_label": tool_search_label,
  532. "get_entities_by_type": tool_get_entities_by_type,
  533. "get_predicates_for_subject": tool_get_predicates_for_subject,
  534. "get_labels_for_subject": tool_get_labels_for_subject,
  535. "traverse_property": tool_traverse_property,
  536. "list_classes": tool_list_classes,
  537. "list_properties": tool_list_properties,
  538. "describe_class": tool_describe_class,
  539. "describe_property": tool_describe_property,
  540. "describe_subject": tool_describe_subject,
  541. "path_traverse": tool_path_traverse,
  542. "property_usage_statistics": tool_property_usage_statistics,
  543. "batch_insert": tool_batch_insert,
  544. "insert_triple": tool_insert_triple,
  545. }
  546. def load_domain_layers(tools: Dict[str, Callable[[Dict[str, Any]], Any]]) -> None:
  547. raw = os.getenv("DOMAIN_LAYERS", "garden_layer.plugin")
  548. modules = [item.strip() for item in raw.split(",") if item.strip()]
  549. if not modules:
  550. return
  551. for module_name in modules:
  552. module = None
  553. try:
  554. module = import_module(module_name)
  555. except ImportError as exc:
  556. base = module_name.split(".", 1)[0]
  557. if base != module_name:
  558. try:
  559. module = import_module(base)
  560. logger.info("Falling back to base module '%s' for domain layer '%s'", base, module_name)
  561. except ImportError:
  562. logger.warning(
  563. "Domain layer '%s' could not be imported and base module '%s' is missing: %s",
  564. module_name,
  565. base,
  566. exc,
  567. )
  568. else:
  569. logger.warning("Domain layer '%s' could not be imported: %s", module_name, exc)
  570. if module is None:
  571. continue
  572. register = getattr(module, "register_layer", None)
  573. if not callable(register):
  574. logger.warning("Domain layer '%s' does not expose register_layer", module_name)
  575. continue
  576. try:
  577. register(tools)
  578. logger.info("Loaded domain layer '%s'", module_name)
  579. except Exception as exc:
  580. logger.exception("Domain layer '%s' failed to register: %s", module_name, exc)
  581. load_domain_layers(TOOLS)
  582. TOOL_DOCS = {
  583. "sparql_query": "Execute a bounded SELECT query and return the JSON result.",
  584. "list_graphs": "List up to 50 active graph URIs.",
  585. "search_label": "Search rdfs:label values that contain a term (case-insensitive).",
  586. "get_entities_by_type": "List subjects of a given rdf:type.",
  587. "cycle_plants": "List plants (with labels/clone parents) that belong to a specific cycle.",
  588. "get_predicates_for_subject": "List distinct predicates used by a subject.",
  589. "get_labels_for_subject": "Fetch rdfs:label values for a subject.",
  590. "traverse_property": "Traverse a property (incoming or outgoing) for a subject and return labels/descriptions.",
  591. "list_classes": "List ontology classes with optional label/comment term filtering.",
  592. "list_properties": "List ontology properties with optional term/domain/range filters.",
  593. "describe_class": "Describe a class and list properties that use it as rdfs:domain.",
  594. "describe_property": "Describe a property (label/comment/domain/range/type) and sample usage.",
  595. "describe_subject": "Return subject predicates/objects (with labels) to inspect an individual node.",
  596. "path_traverse": "Follow a property path (list of predicates) from a subject, returning each step's nodes.",
  597. "property_usage_statistics": "Count how often a property is used and sample subjects/objects.",
  598. "batch_insert": "Insert multiple triples or TTL at once with a single guarded update.",
  599. "reassign_cycle": "Move a subject to another production cycle by updating its inCycle link.",
  600. "insert_triple": "Insert a single triple (useful for debugging updates).",
  601. }
  602. # --- MCP ENDPOINT ---
  603. @app.post("/mcp")
  604. def handle_mcp(tool_request: ToolRequest, http_request: Request):
  605. tool_name = tool_request.tool
  606. input_data = tool_request.input or {}
  607. client_host = http_request.client.host if http_request.client else "unknown"
  608. trimmed_input = json.dumps(input_data, ensure_ascii=False, default=str)
  609. if len(trimmed_input) > 1024:
  610. trimmed_input = f"{trimmed_input[:1024]}…"
  611. timestamp = datetime.now(timezone.utc).isoformat()
  612. tool_logger.info(
  613. "tool=%s client=%s time=%s input=%s",
  614. tool_name,
  615. client_host,
  616. timestamp,
  617. trimmed_input,
  618. )
  619. if tool_name not in TOOLS:
  620. raise HTTPException(status_code=400, detail=f"Unknown tool: {tool_name}")
  621. try:
  622. result = TOOLS[tool_name](input_data)
  623. return {
  624. "status": "ok",
  625. "tool": tool_name,
  626. "description": TOOL_DOCS.get(tool_name, ""),
  627. "result": result,
  628. }
  629. except Exception as exc:
  630. logger.error("Tool %s failed: %s", tool_name, exc)
  631. raise HTTPException(status_code=500, detail=str(exc))
  632. # --- HEALTH CHECK ---
  633. @app.get("/")
  634. def root():
  635. return {
  636. "status": "MCP server running",
  637. "tools": list(TOOLS.keys()),
  638. "virtuoso": VIRTUOSO_ENDPOINT,
  639. "guardrails": {
  640. "default_limit": SPARQL_DEFAULT_LIMIT,
  641. "max_limit": SPARQL_MAX_LIMIT,
  642. "allow_example_load": ALLOW_EXAMPLE_LOAD,
  643. "turtle_examples": True,
  644. },
  645. }
  646. @app.get("/health")
  647. def health():
  648. return root()