diff --git a/databricks-skills/README.md b/databricks-skills/README.md index 47ac07db..18e59c89 100644 --- a/databricks-skills/README.md +++ b/databricks-skills/README.md @@ -102,7 +102,8 @@ cp -r ai-dev-kit/databricks-skills/databricks-agent-bricks .claude/skills/ ### πŸš€ Development & Deployment - **databricks-bundles** - DABs for multi-environment deployments - **databricks-app-apx** - Full-stack apps (FastAPI + React) -- **databricks-apps-python** - Python web apps (Dash, Streamlit, Flask) with foundation model integration +- **databricks-app-python** - Python web apps (Dash, Streamlit, Flask) with foundation model integration +- **databricks-custom-mcp-server** - Build, deploy, and govern custom MCP servers on Databricks Apps (FastMCP + UC Connection + Supervisor Agent attachment) - **databricks-python-sdk** - Python SDK, Connect, CLI, REST API - **databricks-config** - Profile authentication setup - **databricks-lakebase-provisioned** - Managed PostgreSQL for OLTP workloads diff --git a/databricks-skills/databricks-custom-mcp-server/1-build-fastmcp-server.md b/databricks-skills/databricks-custom-mcp-server/1-build-fastmcp-server.md new file mode 100644 index 00000000..1be61c98 --- /dev/null +++ b/databricks-skills/databricks-custom-mcp-server/1-build-fastmcp-server.md @@ -0,0 +1,227 @@ +# 1. Build the FastMCP Server + +The server itself is FastAPI + FastMCP + your tools. Same shape as the official `mcp-server-hello-world` template; the only thing you change are the tools and the data-access helpers. + +## Server skeleton + +`server/app.py`: + +```python +"""Custom MCP server β€” FastMCP + FastAPI, deployed as a Databricks App. + +Exposes domain tools over the MCP streamable-HTTP transport at /mcp. +Authenticates downstream Databricks resources via WorkspaceClient (the +app's SP credentials at runtime, dev profile locally). +""" +from __future__ import annotations + +import os +import time +import uuid +from contextlib import contextmanager +from pathlib import Path + +import psycopg +from databricks.sdk import WorkspaceClient +from databricks.sdk.service.sql import StatementState +from mcp.server.fastmcp import FastMCP + +# ─── workspace + lakebase helpers (lazy init, refresh on TTL) ────────────── + +_w: WorkspaceClient | None = None + +def get_w() -> WorkspaceClient: + global _w + if _w is None: + profile = os.environ.get("DATABRICKS_CONFIG_PROFILE") + _w = WorkspaceClient(profile=profile) if profile else WorkspaceClient() + return _w + +# Lakebase OAuth tokens last ~1h β€” cache with a 10-min safety buffer. +_lakebase_cred = {"host": None, "user": None, "token": None, "exp": 0.0} + +def get_lakebase_cred(instance_name: str) -> tuple[str, str, str]: + now = time.time() + if _lakebase_cred["token"] and _lakebase_cred["exp"] > now + 60: + return _lakebase_cred["host"], _lakebase_cred["user"], _lakebase_cred["token"] + w = get_w() + inst = w.database.get_database_instance(name=instance_name) + cred = w.database.generate_database_credential( + request_id=str(uuid.uuid4()), instance_names=[instance_name], + ) + _lakebase_cred.update({ + "host": inst.read_write_dns, + "user": w.current_user.me().user_name, + "token": cred.token, + "exp": now + 50 * 60, + }) + return _lakebase_cred["host"], _lakebase_cred["user"], _lakebase_cred["token"] + +@contextmanager +def pg_conn(instance_name: str, db_name: str): + host, user, token = get_lakebase_cred(instance_name) + conn = psycopg.connect( + host=host, port=5432, dbname=db_name, user=user, password=token, + sslmode="require", autocommit=True, + ) + try: + yield conn + finally: + conn.close() + +def wh_query(warehouse_id: str, sql: str) -> list[tuple]: + w = get_w() + resp = w.statement_execution.execute_statement( + warehouse_id=warehouse_id, statement=sql, wait_timeout="50s", + ) + while resp.status.state in (StatementState.PENDING, StatementState.RUNNING): + time.sleep(0.15) + resp = w.statement_execution.get_statement(resp.statement_id) + if resp.status.state != StatementState.SUCCEEDED: + err = resp.status.error.message if resp.status.error else str(resp.status.state) + raise RuntimeError(f"warehouse query failed: {err}") + return resp.result.data_array or [] + +# ─── FastMCP server with tool definitions ────────────────────────────────── + +mcp = FastMCP("my-custom-mcp") + +@mcp.tool() +def query_lakebase_orders(customer_id: str, limit: int = 10) -> dict: + """Read recent orders for a customer from Lakebase. + + Args: + customer_id: Customer ID to look up + limit: Max rows (default 10, max 100) + """ + with pg_conn("my-lakebase-instance", "my_db") as c, c.cursor() as cur: + cur.execute( + "SELECT order_id, status, created_at FROM orders " + "WHERE customer_id = %s ORDER BY created_at DESC LIMIT %s", + (customer_id, max(1, min(int(limit), 100))), + ) + rows = cur.fetchall() + return { + "binding": "my_catalog.my_db.orders", + "object_type": "LAKEBASE_TABLE", + "rows": [ + {"order_id": r[0], "status": r[1], "created_at": str(r[2])} + for r in rows + ], + } + +@mcp.tool() +def query_metric_view(view: str, dimension: str | None = None) -> dict: + """Query a Unity Catalog Metric View using MEASURE() semantics. + + Args: + view: Short view name (e.g. 'mv_sales_by_region') + dimension: Optional GROUP BY dimension + """ + select = f"SELECT MEASURE(net_revenue) AS net_revenue" + if dimension: + select = f"SELECT {dimension}, " + select.removeprefix("SELECT ") + sql = f"{select} FROM my_catalog.gold.{view}" + if dimension: + sql += f" GROUP BY {dimension}" + rows = wh_query("", sql) + return {"view": view, "rows": [tuple(r) for r in rows]} + +# ─── ASGI app for Databricks Apps to serve ───────────────────────────────── + +http_app = mcp.streamable_http_app() + +if __name__ == "__main__": + # Local dev only + import uvicorn + port = int(os.environ.get("PORT", 8000)) + uvicorn.run(http_app, host="0.0.0.0", port=port, log_level="info") +``` + +## Tool design rules + +| Rule | Why | +|---|---| +| **Type-hint every parameter** | The MCP protocol's tool-discovery embeds these in the schema sent to the LLM. No types = no schema = LLM picks wrong tool. | +| **Write a clear one-line docstring summary** | The first line is what the LLM sees when choosing which tool to call. "Read orders for a customer" vs "Database accessor". | +| **Return a dict with stable keys**, not a free-form string | Lets downstream agents pattern-match. Include `binding` / `object_type` keys so the agent can cite the source. | +| **Validate inputs at the tool boundary** | LLMs hallucinate. Cap `limit`, reject empty strings, sanity-check ID formats. The tool is your SQL injection boundary. | +| **Never do destructive ops by default** | Read tools are fine. Write tools should require explicit caller flags or sit behind a separate auth check. | +| **Avoid one giant `query_anything` tool** | LLMs route better between 10 specific tools than they construct SQL for one generic tool. | + +## User-token passthrough (when the tool needs the *end user's* identity) + +By default, your tools authenticate as the **app's service principal** (`get_w()` returns the app SP's `WorkspaceClient`). If a tool needs to act as the *calling user* (e.g., respect that user's row-level UC grants), use the `x-forwarded-access-token` header that Databricks Apps SSO injects: + +```python +import contextvars +header_store: contextvars.ContextVar[dict] = contextvars.ContextVar("headers") + +# in your FastAPI middleware (combined_app): +@combined_app.middleware("http") +async def capture_headers(request, call_next): + header_store.set(dict(request.headers)) + return await call_next(request) + +def get_user_w() -> WorkspaceClient: + """WorkspaceClient acting as the calling user, not the app SP.""" + if "DATABRICKS_APP_NAME" not in os.environ: + return WorkspaceClient() # local dev + headers = header_store.get({}) + token = headers.get("x-forwarded-access-token") + if not token: + raise PermissionError("no x-forwarded-access-token in request") + return WorkspaceClient(token=token, auth_type="pat") +``` + +This is the same pattern the stock `mcp-server-hello-world` template uses. Use `get_user_w()` for tools that should respect UC row-filters / per-user grants; use `get_w()` for tools that read shared reference data. + +## `pyproject.toml` + `requirements.txt` + +```toml +# pyproject.toml +[project] +name = "my-custom-mcp" +version = "0.1.0" +requires-python = ">=3.11" +dependencies = [ + "fastapi>=0.115", + "uvicorn[standard]>=0.27", + "mcp[cli]>=1.14.0", + "fastmcp>=2.12", + "databricks-sdk>=0.40", + "psycopg[binary]>=3.2", + "pyyaml>=6.0", +] + +[project.scripts] +custom-mcp-server = "server.main:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["server"] +``` + +Pin `mcp >= 1.14.0` β€” older versions don't implement the streamable-HTTP transport that Supervisor Agents expect. + +## Local testing + +```bash +# Run the server locally on :8000 +uv run uvicorn server.app:http_app --host 0.0.0.0 --port 8000 + +# In another terminal, hit it with the SDK +uv run python -c " +from databricks_mcp import DatabricksMCPClient +from databricks.sdk import WorkspaceClient +c = DatabricksMCPClient(server_url='http://localhost:8000/mcp', + workspace_client=WorkspaceClient(profile='dev')) +print(c.list_tools()) +print(c.call_tool('query_lakebase_orders', {'customer_id': 'CUST_001'})) +" +``` + +If `list_tools()` returns your tool names with full schemas β€” the protocol layer is right. Move on to [2-deploy-as-databricks-app.md](2-deploy-as-databricks-app.md). diff --git a/databricks-skills/databricks-custom-mcp-server/2-deploy-as-databricks-app.md b/databricks-skills/databricks-custom-mcp-server/2-deploy-as-databricks-app.md new file mode 100644 index 00000000..61884730 --- /dev/null +++ b/databricks-skills/databricks-custom-mcp-server/2-deploy-as-databricks-app.md @@ -0,0 +1,178 @@ +# 2. Deploy as a Databricks App + +The MCP server is a regular Databricks App with a specific `app.yaml` command. After deploy, grant the app's SP read access to whatever data its tools touch, then verify the server is reachable. + +## `app.yaml` (3 lines) + +```yaml +command: ["uvicorn", "server.app:http_app", "--host", "0.0.0.0", "--port", "8000"] + +env: + - name: "PORT" + value: "8000" + # Optional β€” if your tools talk to another deployed app: + # - name: "ONTOS_APP_URL" + # value: "https://my-other-app.cloud.databricksapps.com" +``` + +The literal `server.app:http_app` matches the `http_app = mcp.streamable_http_app()` line at the bottom of `server/app.py`. + +## `manifest.yaml` + +```yaml +version: 1 +name: "My Custom MCP Server" +description: "MCP server exposing tools to AI agents." +``` + +That's the entire manifest the Apps platform reads. Don't expect MCP-specific schema fields β€” there aren't any today. + +## Deploy via the SDK (programmatic, idempotent) + +```python +import datetime +from databricks.sdk import WorkspaceClient +from databricks.sdk.service.apps import App, AppDeployment + +w = WorkspaceClient(profile="") +APP_NAME = "my-custom-mcp" + +# 1. Upload source to a workspace path +src_remote = f"/Workspace/Users/{w.current_user.me().user_name}/{APP_NAME}" +# (Use w.workspace.import_ or w.workspace.upload to push your local dir.) + +# 2. Ensure the app exists +try: + w.apps.get(name=APP_NAME) + print(f" βœ“ app {APP_NAME} already exists; redeploying") +except Exception: + w.apps.create(app=App(name=APP_NAME)).result(timeout=datetime.timedelta(minutes=10)) + print(f" βœ“ created app {APP_NAME}") + +# 3. Trigger deployment +deployment = w.apps.deploy_and_wait( + app_name=APP_NAME, + app_deployment=AppDeployment(source_code_path=src_remote), + timeout=datetime.timedelta(minutes=10), +) +print(f" βœ“ deployment {deployment.deployment_id} succeeded") +print(f" url: {w.apps.get(name=APP_NAME).url}") +``` + +## Deploy via the CLI (fastest for iteration) + +```bash +databricks apps deploy my-custom-mcp \ + --source-code-path /Workspace/Users/$USER/my-custom-mcp \ + -p +``` + +Watch logs while it boots: + +```bash +databricks apps logs my-custom-mcp -p +``` + +Look for: +``` +INFO: Started server process [69] +INFO: Application startup complete. +INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit) +``` + +If you see Python tracebacks instead, the most common culprits are missing `requirements.txt` entries or your imports breaking before lifespan startup. + +## Grant the app SP access to its data + +The Apps platform mints a fresh service principal per app at create-time. Find its client_id and grant whatever your tools read: + +```python +app = w.apps.get(name=APP_NAME) +sp_client_id = app.service_principal_client_id +print(f"SP to grant: {sp_client_id}") + +# UC reads (warehouses, catalogs, Metric Views, federated catalogs) +for sql in [ + f"GRANT USE CATALOG ON CATALOG my_catalog TO `{sp_client_id}`", + f"GRANT USE SCHEMA, SELECT ON SCHEMA my_catalog.gold TO `{sp_client_id}`", + f"GRANT SELECT ON VIEW my_catalog.gold.mv_sales TO `{sp_client_id}`", +]: + w.statement_execution.execute_statement( + warehouse_id="", statement=sql, wait_timeout="30s", + ) + +# Warehouse CAN_USE +from databricks.sdk.service.iam import AccessControlRequest, PermissionLevel +w.warehouses.update_permissions( + warehouse_id="", + access_control_list=[ + AccessControlRequest(service_principal_name=sp_client_id, + permission_level=PermissionLevel.CAN_USE) + ], +) + +# Lakebase Postgres role + grants (if your tools read Lakebase) +from databricks.sdk.service.database import ( + DatabaseInstanceRole, DatabaseInstanceRoleIdentityType, +) +try: + w.database.create_database_instance_role( + instance_name="", + database_instance_role=DatabaseInstanceRole( + name=sp_client_id, + identity_type=DatabaseInstanceRoleIdentityType.SERVICE_PRINCIPAL, + ), + ) +except Exception as e: + if "already" not in str(e).lower(): raise +# Then run GRANT USAGE/SELECT inside Postgres for whichever schemas the tools read. +``` + +## Smoke-test the deployed server + +```bash +TOKEN=$(databricks auth token --profile

| jq -r .access_token) +URL="https://my-custom-mcp-.cloud.databricksapps.com" + +# Health (FastAPI's auto-generated root) +curl -fsS -H "Authorization: Bearer $TOKEN" $URL/healthz +# If you don't have /healthz, hit / instead β€” should return a JSON or HTML status page. + +# MCP discovery +curl -fsS -H "Authorization: Bearer $TOKEN" -H "Accept: application/json" $URL/mcp +# Should return 401 + WWW-Authenticate if no token (the Apps-SSO 401-on-JSON case), +# or 200 + a streaming session response if you've authenticated correctly. +``` + +Then exercise the protocol with the SDK: + +```python +from databricks_mcp import DatabricksMCPClient +from databricks.sdk import WorkspaceClient +c = DatabricksMCPClient(server_url=f"{URL}/mcp", + workspace_client=WorkspaceClient(profile="

")) +tools = c.list_tools() +print([t.name for t in tools]) +result = c.call_tool("query_lakebase_orders", {"customer_id": "CUST_001"}) +print(result) +``` + +## Restart-as-recovery + +A running MCP-server app can drift into a state where `/healthz` returns 200 but tool calls hang or fail silently β€” particularly after long idles, mid-deploy aborts, or downstream dependency reboots. **Stop + start cycles cure it**: + +```python +import datetime +w.apps.stop_and_wait(name=APP_NAME, timeout=datetime.timedelta(minutes=3)) +a = w.apps.start_and_wait(name=APP_NAME, timeout=datetime.timedelta(minutes=5)) +print(f"state: {a.app_status.state.value}") # expect RUNNING +``` + +Bake a `make restart-mcp` (or equivalent) into your project Makefile β€” you'll use it more often than you think. + +## After deploy + +Two next steps depending on how the server is going to be consumed: + +- **Direct programmatic clients** (your own agent code, scripts): you're done. `DatabricksMCPClient(server_url=..., workspace_client=...)` is the contract; ship. +- **Agent Bricks / Supervisor Agents / Databricks Assistant**: go to [3-register-in-unity-catalog.md](3-register-in-unity-catalog.md) to register as a UC Connection, then [4-attach-to-supervisor-agent.md](4-attach-to-supervisor-agent.md). diff --git a/databricks-skills/databricks-custom-mcp-server/3-register-in-unity-catalog.md b/databricks-skills/databricks-custom-mcp-server/3-register-in-unity-catalog.md new file mode 100644 index 00000000..caaedf7a --- /dev/null +++ b/databricks-skills/databricks-custom-mcp-server/3-register-in-unity-catalog.md @@ -0,0 +1,205 @@ +# 3. Register the MCP Server in Unity Catalog + +There are two paths: + +1. **`register_mcp_server_via_dcr()`** β€” the supported, one-call path. **Try this first.** Works only on workspaces where the OAuth Authorization Server has Dynamic Client Registration enabled. + +2. **Manual four-layer recipe** β€” when DCR isn't enabled (more common than docs suggest as of 2026-05). Requires account-admin perms; ~5 minutes of scripted work + 1 user click. + +Both paths end with a UC Connection visible in Catalog Explorer β†’ Connections, with `is_mcp_connection: "true"` in its options β€” that's the flag UC and Supervisor Agents use to recognise an MCP server. + +--- + +## Path 1 β€” DCR (try first) + +```python +import requests +from databricks.sdk import WorkspaceClient +from databricks_mcp import register_mcp_server_via_dcr + +# CRITICAL pre-step: patch requests.get so the discovery handshake works +# behind the Databricks Apps SSO proxy. Without this, an unauthenticated +# GET to /mcp returns 302β†’200 HTML (workspace login page) instead of the +# 401+JSON the discovery code expects. +_orig_get = requests.get +def _patched_get(url, **kw): + headers = kw.pop("headers", {}) or {} + headers.setdefault("Accept", "application/json") + kw.setdefault("allow_redirects", False) + return _orig_get(url, headers=headers, **kw) +requests.get = _patched_get + +url = register_mcp_server_via_dcr( + connection_name="my_mcp_conn", + mcp_url="https://my-mcp-server-.cloud.databricksapps.com/mcp", + workspace_client=WorkspaceClient(profile="

"), +) +print(url) +``` + +If this returns a URL pointing at `/explore/connections/my_mcp_conn`, DCR worked. Skip to [4-attach-to-supervisor-agent.md](4-attach-to-supervisor-agent.md). + +### Likely failure modes + +| Error | Meaning | Path forward | +|---|---|---| +| `RuntimeError: Expected HTTP 401 from MCP URL, got 200` | You skipped the `requests.get` patch above | Apply the patch, retry | +| `Authorization Server does NOT support Dynamic Client Registration (missing 'registration_endpoint')` | Workspace OAuth doesn't expose DCR | **Switch to Path 2** below | +| `HTTPStatusError: '401 Unauthorized' on /mcp` | The caller's SP doesn't have `CAN_USE` on the MCP app | Run `w.apps.update_permissions(app_name=…, access_control_list=[…CAN_USE])` | + +--- + +## Path 2 β€” Manual four-layer recipe (no-DCR workaround) + +Use this when path 1 fails with the DCR error. The recipe is: + +``` +Layer 1: Account admin creates a custom OAuth integration with the right redirect URIs +Layer 2: Workspace user creates a UC HTTP connection embedding that integration's credentials +Layer 3: Workspace user detaches any duplicate tool_type=app on the supervisor (if applicable) +Layer 4: End user clicks the consent link once per identity +``` + +### Layer 1 β€” Custom OAuth integration (account admin) + +Why the auto-provisioned client doesn't work: every Databricks App ships with a `oauth2_app_client_id` (`w.apps.get(name).oauth2_app_client_id`). That client's `redirect_urls` allowlist contains the **app's own** callback only β€” *not* UC's consent landing page (`/login/oauth/http.html`). And: that client is **not patchable** by callers (`AccountClient.custom_app_integration.get(integration_id=)` returns `Not Found` β€” it's platform-owned). + +So mint a fresh integration: + +```python +from databricks.sdk import AccountClient + +# IMPORTANT: this must be an account-scoped profile, not workspace. +# Account profiles have `account_id` + `host = https://accounts..databricks.net`. +ac = AccountClient(profile="") + +new = ac.custom_app_integration.create( + name="my-mcp-uc-client", + confidential=True, + redirect_urls=[ + # The UC connection consent landing page β€” note `/login/oauth/http.html` + "https:///login/oauth/http.html", + ], + scopes=["all-apis", "offline_access"], +) +print(f"client_id : {new.client_id}") +print(f"client_secret: {new.client_secret} ← only shown once!") +``` + +**Required perms**: account admin. A workspace admin or metastore admin will hit `Not Found` on `custom_app_integration.list/get/create`. + +**Save the `client_secret` immediately** β€” Databricks doesn't show it again. For a non-demo deployment, store it in a Databricks Secret scope and reference it via `secret_ref` in the connection options. + +### Layer 2 β€” UC HTTP Connection + +There is **no `MCP` connection type** in UC (as of 2026-05). `CREATE CONNECTION TYPE MCP` returns `CONNECTION_TYPE_NOT_SUPPORTED`. Instead, use type `HTTP` with `is_mcp_connection: "true"`: + +```python +from databricks.sdk.service import catalog +w = WorkspaceClient(profile="") + +# Drop any prior connection with the same name (idempotency) +try: w.connections.delete("my_mcp_conn") +except Exception: pass + +w.connections.create( + name="my_mcp_conn", + connection_type=catalog.ConnectionType.HTTP, + options={ + "host": "https://my-mcp-server-.cloud.databricksapps.com", + "port": "443", + "base_path": "/mcp", + "oauth_credential_exchange_method": "header_and_body", + "client_id": new.client_id, + "client_secret": new.client_secret, + "authorization_endpoint": f"/oidc/v1/authorize", + "token_endpoint": f"/oidc/v1/token", + "oauth_scope": "all-apis offline_access", + "is_mcp_connection": "true", + }, + comment="My custom MCP server, UC-registered.", +) +``` + +UC will accept the credentials and create the connection β€” but it will **not validate the redirect URI allowlist at create time**. Validation happens at consent (layer 4). + +### Layer 3 β€” Detach duplicate supervisor registration (if applicable) + +If you previously attached this MCP server as `tool_type=app` on a supervisor (e.g., during prototyping), **detach it now**: + +```python +SUPERVISOR_ID = "" +try: + w.supervisor_agents.delete_tool( + name=f"supervisor-agents/{SUPERVISOR_ID}/tools/", + ) + print("detached old tool_type=app registration") +except Exception: + pass # not attached, nothing to do +``` + +Why: both routes will enumerate the same MCP tool names. The supervisor's main agent rejects with: + +> `Error: Duplicate tool name 'resolve_concept' detected for agent 'main'.` + +This catches you ~5 minutes after first wiring up the UC route. Pre-empt it. + +### Layer 4 β€” Per-user OAuth consent (one human click) + +After layers 1-3, the first time a user calls through the supervisor, they'll see: + +``` +error: { + code: "oauth", + message: "Credential for user identity('') is not found for the + connection 'my_mcp_conn'. Please login first to the connection by + visiting https:///explore/connections/my_mcp_conn" +} +``` + +**This is OAuth working as designed** β€” UC stores credentials per `(user_id, connection_name)` for audit + governance. The user opens that URL, clicks **Sign in**, completes the consent screen, and lands back on the connection page. UC caches a refresh token; every subsequent supervisor call routes silently until the refresh token rotates out (typically months). + +If the consent fails with `invalid_request: redirect_uri ... not registered for OAuth application`, the layer-1 integration's `redirect_urls` doesn't include the right URI. Re-mint with `--rotate-secret` and make sure `redirect_urls` contains exactly `https:///login/oauth/http.html`. + +--- + +## Verification β€” the one character that proves it works + +After the consent click, call the supervisor's serving endpoint: + +```bash +curl -sS \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"input":[{"role":"user","content":""}]}' \ + https:///serving-endpoints//invocations +``` + +Find the `mcp_approval_request` event in the response: + +```json +{ + "type": "mcp_approval_request", + "server_label": "my_mcp_conn", ← UC Connection name (underscore-style) + "name": "query_lakebase_orders", + "arguments": "{\"customer_id\": \"CUST_001\"}" +} +``` + +If `server_label` is the **UC Connection name** (`my_mcp_conn`, underscores), you're routing through UC. If it's the **app name** (`my-mcp-server`, dashes), you're still on the `tool_type=app` route β€” go back to layer 3 and detach it. + +That one-character difference (`_` vs `-`) is the verification that matters. + +--- + +## Production hardening + +- **Secrets**: move `client_secret` from a literal option to a Databricks Secret reference. UC encrypts options at rest, but secrets are still better practice. +- **Scopes**: prune `oauth_scope` to the minimum your MCP server's tools need. `all-apis` works but is overscoped. +- **Per-user grants**: confirm every user who'll call the supervisor has been granted `USE CONNECTION` on the connection itself. +- **Rotation**: the custom OAuth integration's secret rotates manually via `--rotate-secret`. Set a calendar reminder. +- **Audit**: UC logs every connection invocation under `system.access.audit`. Wire that into your monitoring. + +See [scripts/register_mcp_in_uc.py](scripts/register_mcp_in_uc.py) for an idempotent reference implementation of the full four-layer recipe. + +When this all works β†’ [4-attach-to-supervisor-agent.md](4-attach-to-supervisor-agent.md). diff --git a/databricks-skills/databricks-custom-mcp-server/4-attach-to-supervisor-agent.md b/databricks-skills/databricks-custom-mcp-server/4-attach-to-supervisor-agent.md new file mode 100644 index 00000000..26266a22 --- /dev/null +++ b/databricks-skills/databricks-custom-mcp-server/4-attach-to-supervisor-agent.md @@ -0,0 +1,196 @@ +# 4. Attach to a Supervisor Agent + +Once the MCP server is deployed (and optionally registered as a UC Connection β€” see [3-register-in-unity-catalog.md](3-register-in-unity-catalog.md)), attach it as a Tool on a Supervisor Agent. The supervisor then exposes a single serving endpoint that downstream consumers call. + +## Two registration variants + +| `tool_type` | Auth model | When to use | +|---|---|---| +| `app` | Supervisor's own SP credentials β†’ `CAN_USE` on the MCP app | Quick demo, no UC wiring needed | +| `uc_connection` | UC's per-user OAuth credential cache, governed | **Production** β€” real audit trail, per-user consent gating | + +Pick **one**. Attaching both with the same MCP server enumerates duplicate tool names and the supervisor rejects everything with `Error: Duplicate tool name 'X' detected for agent 'main'`. + +## Create the Supervisor Agent + +```python +from databricks.sdk import WorkspaceClient +from databricks.sdk.service.supervisoragents import SupervisorAgent, Tool, App, UcConnection, GenieSpace + +w = WorkspaceClient(profile="

") + +# Idempotent: look up by display_name, else create +sa = None +for existing in w.supervisor_agents.list_supervisor_agents(page_size=100): + if existing.display_name == "My Orchestrator": + sa = existing + break +if sa is None: + sa = w.supervisor_agents.create_supervisor_agent( + supervisor_agent=SupervisorAgent( + display_name="My Orchestrator", + description="Routes between domain MCP server and analytics surfaces.", + instructions=( + "For inventory / order questions, call the `my_mcp_conn` tool. " + "For analytic aggregates (totals by category / region), route to " + "the `my_genie_space` tool. Refuse ungrounded questions rather " + "than substitute concepts." + ), + ) + ) +print(f"supervisor_agent_id: {sa.supervisor_agent_id}") +print(f"endpoint_name : {sa.endpoint_name}") +``` + +The `endpoint_name` is the serving endpoint to call later β€” typically `mas--endpoint`. + +## Attach the MCP tool + +### Variant A β€” `tool_type=app` (no UC wiring) + +```python +w.supervisor_agents.create_tool( + parent=f"supervisor-agents/{sa.supervisor_agent_id}", + tool_id="my_mcp_app", + tool=Tool( + tool_type="app", + app=App(name="my-custom-mcp"), # exact Databricks App name (with dashes) + description=( + "MCP server exposing my-domain tools: query_lakebase_orders, " + "query_metric_view, etc." + ), + ), +) +``` + +Auth model: every supervisor invocation runs as the supervisor's own SP, which calls the MCP app's `/mcp` endpoint with bearer auth. The supervisor SP needs **CAN_USE** on the MCP app: + +```python +from databricks.sdk.service.apps import AppAccessControlRequest, AppPermissionLevel +mcp = w.apps.get(name="my-custom-mcp") +sup_endpoint_sp = w.serving_endpoints.get(name=sa.endpoint_name).creator # check actual SP +# (or however your supervisor endpoint's SP is identified in your workspace) +w.apps.update_permissions( + app_name="my-custom-mcp", + access_control_list=[ + AppAccessControlRequest(service_principal_name=sup_endpoint_sp, + permission_level=AppPermissionLevel.CAN_USE), + ], +) +``` + +### Variant B β€” `tool_type=uc_connection` (UC-governed) + +Pre-req: UC Connection exists ([3-register-in-unity-catalog.md](3-register-in-unity-catalog.md)). + +```python +w.supervisor_agents.create_tool( + parent=f"supervisor-agents/{sa.supervisor_agent_id}", + tool_id="my_mcp_uc", + tool=Tool( + tool_type="uc_connection", + uc_connection=UcConnection(name="my_mcp_conn"), # UC connection name (underscores) + description=( + "UC-governed MCP connection β€” per-user OAuth credentials, " + "audit trail in system.access.audit, human-in-the-loop " + "mcp_approval_request gate on every tool call." + ), + ), +) +``` + +Auth model: each calling user has a refresh token cached by UC. The supervisor mints a fresh access token via UC's token endpoint and uses it to call the MCP server. The MCP server's SSO accepts the token because it's a valid workspace user token with `CAN_USE` on the underlying app. + +Required: the user has completed the one-time consent click (layer 4 of the UC registration recipe). + +## Optional β€” attach other tools on the same supervisor + +```python +# Genie Space (analytics surface) +w.supervisor_agents.create_tool( + parent=f"supervisor-agents/{sa.supervisor_agent_id}", + tool_id="my_genie", + tool=Tool( + tool_type="genie_space", + genie_space=GenieSpace(id=""), + description="Genie space over the canonical Metric View.", + ), +) +``` + +The supervisor's instructions (set at `create_supervisor_agent` time) tell the LLM when to route to each tool. Keep instructions ≀ 5 sentences β€” the LLM gets confused by long routing rules. + +## Call the supervisor endpoint + +The serving endpoint is OpenAI-compatible with one quirk β€” the field is `input`, not `messages`: + +```python +import httpx, time +host = w.config.host.rstrip("/") +TOKEN = w.config.authenticate()["Authorization"].removeprefix("Bearer ").strip() + +t0 = time.perf_counter() +r = httpx.post( + f"{host}/serving-endpoints/{sa.endpoint_name}/invocations", + headers={"Authorization": f"Bearer {TOKEN}", "Content-Type": "application/json"}, + json={"input": [{"role": "user", "content": "What's in order O123's status?"}]}, + timeout=120.0, +) +print(f"HTTP {r.status_code} Β· {(time.perf_counter()-t0)*1000:.0f} ms") +body = r.json() +``` + +## Response shape + +```json +{ + "id": "resp_", + "status": "completed", + "output": [ + { + "type": "message", + "role": "assistant", + "content": [{"type": "output_text", + "text": "I'll look that up via the MCP server."}] + }, + { + "type": "mcp_approval_request", + "server_label": "my_mcp_conn", ← matches the tool you attached + "name": "query_lakebase_orders", + "arguments": "{\"customer_id\": \"CUST_001\", \"limit\": 10}" + } + ] +} +``` + +The `mcp_approval_request` is **deliberate** β€” supervisor agents emit an approval request before *actually executing* the tool, so callers can run human-in-the-loop gating. To execute, your caller code needs to send the approval back as a follow-up turn. This is a feature, not a bug β€” the demo story for governance teams is exactly this. + +## Errors you'll hit + +| Error | Cause | Fix | +|---|---|---| +| `Duplicate tool name 'X' detected for agent 'main'` | Both `tool_type=app` and `tool_type=uc_connection` attached for the same MCP server | `delete_tool` on one of them | +| `oauth: Credential for user identity('') is not found for the connection` | UC's per-user credential cache is empty for the caller | User opens the connection URL and clicks **Sign in** once | +| `serving endpoint not found` | `sa.endpoint_name` is still provisioning | Wait ~30-60s after `create_supervisor_agent`; re-fetch with `w.supervisor_agents.get_supervisor_agent` | +| Supervisor calls succeed but `mcp_approval_request` never appears | The LLM didn't think a tool call was warranted | Sharpen the tool description; the LLM routes on description quality | + +## Detach / cleanup + +```python +# Drop a single tool +w.supervisor_agents.delete_tool( + name=f"supervisor-agents/{sa.supervisor_agent_id}/tools/my_mcp_app", +) + +# Drop the supervisor itself (also drops its endpoint) +w.supervisor_agents.delete_supervisor_agent(supervisor_agent_id=sa.supervisor_agent_id) +``` + +`delete_tool` is the SDK method β€” `w.supervisor_agents.delete_tool(name="supervisor-agents//tools/")`. Several REST-API path guesses (e.g. `/api/2.0/agents/supervisor-agents/...`) return 404; the SDK method is the only one that works reliably. + +--- + +When this all works: +- Catalog Explorer β†’ Connections β†’ your connection exists with `is_mcp_connection: true` +- The Supervisor's serving endpoint returns 200 with `mcp_approval_request` referencing your MCP server +- `server_label` matches the registration tool_type you intended (`uc_connection` name for UC-governed, app name for `tool_type=app`) diff --git a/databricks-skills/databricks-custom-mcp-server/SKILL.md b/databricks-skills/databricks-custom-mcp-server/SKILL.md new file mode 100644 index 00000000..e382033c --- /dev/null +++ b/databricks-skills/databricks-custom-mcp-server/SKILL.md @@ -0,0 +1,169 @@ +--- +name: databricks-custom-mcp-server +description: "Build, deploy, and govern a custom Model Context Protocol (MCP) server on Databricks Apps end-to-end β€” FastMCP server skeleton, tool definitions over Lakebase / UC Metric Views / SQL warehouses, deployment via Databricks Apps, registration as a Unity Catalog Connection (with the no-DCR workaround when the workspace OAuth server lacks `registration_endpoint`), and attachment as a tool on a Supervisor Agent. Use when the user wants to build a custom MCP server, expose internal tools to agents, register an MCP server in Unity Catalog, wire MCP into Supervisor Agents / Agent Bricks, or hit any of the non-obvious gotchas around OAuth Dynamic Client Registration, account-level OAuth integrations, redirect URI allowlists, or duplicate-tool-name conflicts when both `tool_type=app` and `tool_type=uc_connection` route to the same MCP server." +--- + +# Custom MCP Server on Databricks + +Build a custom MCP server that exposes Databricks-native data (Lakebase, Unity Catalog Metric Views, SQL warehouses, federated catalogs) as tools to AI agents β€” and govern it through Unity Catalog + Supervisor Agents. + +This skill captures the **non-obvious cross-product wiring** that doesn't appear in any single doc page: how to make the OAuth handshake survive the Databricks Apps SSO proxy, how to register the server in UC when the workspace doesn't have DCR enabled yet, and how to swap supervisor tool registrations without hitting duplicate-tool-name conflicts. + +--- + +## Critical Rules (always follow) + +- **MUST** deploy the MCP server as a Databricks App (the only supported MCP hosting model today). Not a Job, not a model-serving endpoint. +- **MUST** use `FastMCP` from the `mcp` package + FastAPI β€” same as the official `mcp-server-hello-world` template. Don't roll your own MCP transport. +- **MUST** expose `/mcp` as the streamable-HTTP endpoint. Clients (including Supervisor Agents) hit `/mcp`. +- **MUST** authenticate to downstream Databricks resources (Lakebase, warehouses) via `WorkspaceClient()` β€” *not* hardcoded tokens. +- **MUST** test the deployed server with the SDK's `DatabricksMCPClient` (`pip install databricks-mcp`) before claiming it works. +- **WHEN** registering in UC: if `register_mcp_server_via_dcr` fails with `Authorization Server does NOT support Dynamic Client Registration`, use the **manual four-layer recipe** in [3-register-in-unity-catalog.md](3-register-in-unity-catalog.md). Don't ship a half-registered connection. +- **WHEN** attaching to a Supervisor Agent: pick **one** of `tool_type=app` or `tool_type=uc_connection`, not both. Both routes enumerate the same MCP tool names; the supervisor rejects the duplicates. + +--- + +## When to Use Each Surface + +| Surface | When | Tradeoff | +|---|---|---| +| Direct MCP HTTP client (`DatabricksMCPClient`) | Programmatic testing, your own agent code | No governance layer; great for dev | +| Supervisor Agent with `tool_type=app` | Quick demo wiring, no UC registration needed | Uses app's SP credentials; bypasses UC OAuth | +| Supervisor Agent with `tool_type=uc_connection` | **Production** β€” every call goes through UC's per-user OAuth credential cache | Real audit trail, per-user consent gating, but needs the OAuth wiring in [3-register-in-unity-catalog.md](3-register-in-unity-catalog.md) | + +--- + +## Lifecycle (the four steps that matter) + +``` +β”Œβ”€ 1. Build ──────────────────────────────────────────┐ +β”‚ FastMCP server + @mcp.tool() decorated functions β”‚ +β”‚ tools wrap Lakebase / Metric View / warehouse SQL β”‚ +β”‚ β†’ see [1-build-fastmcp-server.md] β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β–Ό +β”Œβ”€ 2. Deploy ─────────────────────────────────────────┐ +β”‚ Upload to /Workspace/Users// β”‚ +β”‚ databricks apps create / deploy β”‚ +β”‚ app.yaml: command=["uvicorn", "app:http_app", …] β”‚ +β”‚ β†’ see [2-deploy-as-databricks-app.md] β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β–Ό +β”Œβ”€ 3. Register in UC ─────────────────────────────────┐ +β”‚ IDEAL : register_mcp_server_via_dcr(name, url) β”‚ +β”‚ COMMON : Manual four-layer recipe when DCR is off β”‚ +β”‚ (Account-admin custom OAuth integration β”‚ +β”‚ β†’ UC HTTP connection + is_mcp_connectionβ”‚ +β”‚ β†’ user consent click) β”‚ +β”‚ β†’ see [3-register-in-unity-catalog.md] β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β–Ό +β”Œβ”€ 4. Attach to Supervisor Agent ─────────────────────┐ +β”‚ Tool(tool_type="uc_connection", β”‚ +β”‚ uc_connection=UcConnection(name=)) β”‚ +β”‚ Detach any duplicate tool_type="app" registration β”‚ +β”‚ β†’ see [4-attach-to-supervisor-agent.md] β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +--- + +## Quick Start + +```bash +# 1. Scaffold + deploy a hello-world MCP server (~5 min) +databricks bundle init mlops-stacks --template-dir mcp-server-hello-world +cd my-mcp-server +databricks apps create my-mcp-server +databricks apps deploy my-mcp-server --source-code-path /Workspace/Users//my-mcp-server + +# 2. Smoke-test via SDK (verifies streamable HTTP transport + auth) +python -c " +from databricks_mcp import DatabricksMCPClient +from databricks.sdk import WorkspaceClient +c = DatabricksMCPClient( + server_url='https://my-mcp-server-.cloud.databricksapps.com/mcp', + workspace_client=WorkspaceClient(), +) +print(c.list_tools()) +" + +# 3. Try DCR registration first (works if workspace has it enabled) +python -c " +from databricks_mcp import register_mcp_server_via_dcr +from databricks.sdk import WorkspaceClient +url = register_mcp_server_via_dcr( + connection_name='my_mcp_conn', + mcp_url='https://my-mcp-server-.cloud.databricksapps.com/mcp', + workspace_client=WorkspaceClient(), +) +print(url) +" +# If DCR works: skip to step 4. +# If it fails with 'Authorization Server does NOT support Dynamic Client Registration': +# follow the manual four-layer recipe in 3-register-in-unity-catalog.md + +# 4. Attach to a Supervisor Agent (uc_connection variant) +# See 4-attach-to-supervisor-agent.md +``` + +--- + +## Reference Files + +| File | When to read | +|---|---| +| [1-build-fastmcp-server.md](1-build-fastmcp-server.md) | Writing the MCP server itself β€” tool decorators, OAuth user-token passthrough, Lakebase/MetricView/warehouse helpers | +| [2-deploy-as-databricks-app.md](2-deploy-as-databricks-app.md) | `app.yaml`, `manifest.yaml`, deployment + grants flow, restart-as-recovery | +| [3-register-in-unity-catalog.md](3-register-in-unity-catalog.md) | **THE BIG ONE** β€” DCR-or-manual-recipe, Apps-SSO 401 quirk, per-user OAuth consent, four-layer permission stack | +| [4-attach-to-supervisor-agent.md](4-attach-to-supervisor-agent.md) | `Tool(tool_type="uc_connection")` shape, duplicate-tool-name conflict, `mcp_approval_request` human-in-the-loop | +| [scripts/register_mcp_in_uc.py](scripts/register_mcp_in_uc.py) | Production-ready idempotent four-layer registration script | + +--- + +## Common Issues + +| Issue | Cause | Fix | +|---|---|---| +| `register_mcp_server_via_dcr` fails: *"Expected HTTP 401 from MCP URL, got 200"* | Apps-SSO returns 302β†’200 HTML for plain GETs; the discovery code follows the redirect | Monkey-patch `requests.get` to send `Accept: application/json` and `allow_redirects=False` before importing `databricks_mcp.connector` | +| DCR fails: *"Authorization Server does NOT support Dynamic Client Registration (missing 'registration_endpoint')"* | Workspace OAuth server hasn't enabled DCR yet | Use the manual four-layer recipe β€” mint a custom OAuth integration via `AccountClient.custom_app_integration.create`, build a synthetic `dcr_result`, pass it to `databricks_mcp.connector.create_uc_connection` | +| `CREATE CONNECTION TYPE MCP` SQL fails: *"CONNECTION_TYPE_NOT_SUPPORTED"* | There's no first-class `MCP` connection type in UC yet | Use `TYPE HTTP` + `is_mcp_connection: "true"` in options β€” UC and Supervisor Agents recognise MCP via that flag | +| User-consent click fails: *"redirect_uri not registered for OAuth application"* | The app's auto-provisioned OAuth client doesn't allowlist UC's consent landing page (`/login/oauth/http.html`) | Account admin must create a **separate** custom OAuth integration with that redirect URI in `redirect_urls`. The Apps-platform-managed OAuth client is not patchable. | +| Supervisor: *"Duplicate tool name 'resolve_concept' detected for agent 'main'"* | Both `tool_type=app` and `tool_type=uc_connection` registrations enumerate the same MCP tool names | Detach one. Use `w.supervisor_agents.delete_tool(name=…)` before attaching the other | +| Supervisor: *"Credential for user identity('') is not found for the connection"* | UC's per-user OAuth credential cache is empty for that user | User opens the connection URL once in a browser and clicks Sign in. Credential cached; subsequent calls succeed silently until refresh-token rotation | + +--- + +## Validation Checklist + +Before claiming "the MCP server is registered in Unity Catalog and the Supervisor calls through it": + +``` +- [ ] Deployed app's /mcp endpoint returns 401+JSON for unauth GET with Accept: application/json +- [ ] DatabricksMCPClient.list_tools() returns expected tool names with workspace OAuth bearer auth +- [ ] UC Connection visible in Catalog Explorer β†’ Connections β†’ +- [ ] Connection options include is_mcp_connection: "true" +- [ ] At least one user has completed the per-user OAuth consent click +- [ ] Supervisor's list_tools shows tool_type=uc_connection registration +- [ ] No tool_type=app registration on the same supervisor (would duplicate tool names) +- [ ] Calling the supervisor endpoint returns mcp_approval_request with server=, not +``` + +That last line is the verification that matters most β€” `server_label` in the supervisor response is `` (underscore-style) when the call routes through UC, vs `` (dash-style) when it uses `tool_type=app`. One character of difference, all the governance story rides on it. + +--- + +## Related Skills + +- **[databricks-app-python](../databricks-app-python/SKILL.md)** β€” General Databricks Apps deployment (auth, app.yaml, frameworks). The MCP server is a specific kind of Databricks App; that skill covers the platform basics. +- **[databricks-python-sdk](../databricks-python-sdk/SKILL.md)** β€” WorkspaceClient + AccountClient setup, profiles. +- **[databricks-unity-catalog](../databricks-unity-catalog/SKILL.md)** β€” UC Connections, permissions, catalog discovery. +- **[databricks-lakebase-provisioned](../databricks-lakebase-provisioned/SKILL.md)** β€” Lakebase OAuth credential rotation; relevant for MCP tools that read Lakebase. +- **[databricks-metric-views](../databricks-metric-views/SKILL.md)** β€” `MEASURE()` semantics; relevant for MCP tools that wrap Metric Views. + +## Upstream References + +- **Stock template**: Databricks publishes `mcp-server-hello-world` as a Databricks Apps starter. Use as a skeleton; replace tools. +- **`databricks-mcp` package** (`pip install databricks-mcp`): contains `DatabricksMCPClient`, `register_mcp_server_via_dcr`, and the internal helpers (`discover_protected_resource_metadata`, `create_uc_connection`) we hijack in the manual recipe. +- **`mcp` package** (`pip install mcp`): the official MCP protocol library; `FastMCP` is your entry point. +- **MCP spec**: https://modelcontextprotocol.io/specification diff --git a/databricks-skills/databricks-custom-mcp-server/scripts/register_mcp_in_uc.py b/databricks-skills/databricks-custom-mcp-server/scripts/register_mcp_in_uc.py new file mode 100644 index 00000000..128e96c0 --- /dev/null +++ b/databricks-skills/databricks-custom-mcp-server/scripts/register_mcp_in_uc.py @@ -0,0 +1,213 @@ +"""Register a custom-MCP-server-as-Databricks-App in Unity Catalog end-to-end. + +Implements the four-layer no-DCR recipe documented in +3-register-in-unity-catalog.md: + 1. Account admin creates a custom OAuth integration with the UC consent + redirect URI in its allowlist. + 2. Workspace user creates a UC HTTP connection embedding those credentials, + with is_mcp_connection: "true" so UC and Supervisor Agents treat it as + MCP. + 3. Workspace user detaches any duplicate tool_type=app registration on a + target Supervisor Agent (avoids duplicate-tool-name conflict). + 4. (Manual, downstream) The end user clicks the consent link to cache a + per-user refresh token in UC. + +Idempotent. Supports --rotate-secret to mint a new client_secret. + +Requires: + β€’ account-admin perms on the Databricks account + β€’ metastore admin on the target workspace + β€’ The MCP server already deployed as a Databricks App + β€’ Two CLI profiles: one account-scoped, one workspace-scoped + +Usage: + python register_mcp_in_uc.py \\ + --account-profile ACCOUNT \\ + --workspace-profile DEFAULT \\ + --connection-name my_mcp_conn \\ + --integration-name my-mcp-uc-client \\ + --mcp-host https://my-mcp-server-.cloud.databricksapps.com \\ + --workspace-host https:// \\ + --supervisor-id # optional; only if you want auto-detach + auto-attach +""" +from __future__ import annotations + +import argparse +import json +import pathlib +import sys + +from databricks.sdk import AccountClient, WorkspaceClient +from databricks.sdk.service import catalog +from databricks.sdk.service.supervisoragents import Tool, UcConnection + + +def step1_oauth_integration(ac: AccountClient, name: str, redirect_uri: str, + rotate: bool, creds_file: pathlib.Path) -> dict: + existing = None + for it in ac.custom_app_integration.list(): + if it.name == name: + existing = it + break + + if existing and not rotate: + if creds_file.exists(): + print(f" βœ“ integration exists ({existing.integration_id}) β€” using cached secret") + return json.loads(creds_file.read_text()) + print(f" ⚠ integration exists but secret cache missing β€” forcing rotate") + rotate = True + + if existing and rotate: + print(f" β€’ deleting {existing.integration_id} (rotate)") + ac.custom_app_integration.delete(integration_id=existing.integration_id) + + new = ac.custom_app_integration.create( + name=name, confidential=True, + redirect_urls=[redirect_uri], + scopes=["all-apis", "offline_access"], + ) + creds = { + "integration_id": new.integration_id, + "client_id": new.client_id, + "client_secret": new.client_secret or "", + } + creds_file.write_text(json.dumps(creds, indent=2)) + print(f" βœ“ created {new.integration_id}; secret cached β†’ {creds_file}") + return creds + + +def step2_uc_connection(w: WorkspaceClient, name: str, creds: dict, + mcp_host: str, workspace_host: str) -> None: + try: + w.connections.delete(name) + print(f" β€’ dropped existing {name!r}") + except Exception: + pass + w.connections.create( + name=name, + connection_type=catalog.ConnectionType.HTTP, + options={ + "host": mcp_host, + "port": "443", + "base_path": "/mcp", + "oauth_credential_exchange_method": "header_and_body", + "client_id": creds["client_id"], + "client_secret": creds["client_secret"], + "authorization_endpoint": f"{workspace_host}/oidc/v1/authorize", + "token_endpoint": f"{workspace_host}/oidc/v1/token", + "oauth_scope": "all-apis offline_access", + "is_mcp_connection": "true", + }, + comment="UC-native registration of an MCP server hosted as a Databricks App.", + ) + print(f" βœ“ created UC Connection {name!r}") + + +def step3_detach_app_tools(w: WorkspaceClient, supervisor_id: str) -> None: + """Detach any tool_type=app on this supervisor whose underlying app name + matches the connection β€” avoids duplicate-tool-name conflicts.""" + tools = list(w.supervisor_agents.list_tools( + parent=f"supervisor-agents/{supervisor_id}", + )) + for t in tools: + if t.tool_type == "app": + print(f" β€’ detaching tool_type=app tool {t.tool_id!r}") + w.supervisor_agents.delete_tool( + name=f"supervisor-agents/{supervisor_id}/tools/{t.tool_id}", + ) + if not any(t.tool_type == "app" for t in tools): + print(f" βœ“ no tool_type=app to detach") + + +def step4_attach_uc_tool(w: WorkspaceClient, supervisor_id: str, + tool_id: str, connection_name: str) -> None: + try: + w.supervisor_agents.create_tool( + parent=f"supervisor-agents/{supervisor_id}", + tool_id=tool_id, + tool=Tool( + tool_type="uc_connection", + uc_connection=UcConnection(name=connection_name), + description=( + f"UC-governed MCP connection to {connection_name}. " + "Per-user OAuth credentials, audit trail in " + "system.access.audit, mcp_approval_request gate " + "on every tool call." + ), + ), + ) + print(f" βœ“ attached tool_type=uc_connection ({tool_id})") + except Exception as e: + if "already exists" in str(e).lower(): + print(f" βœ“ uc_connection tool already attached") + else: + raise + + +def main() -> int: + p = argparse.ArgumentParser() + p.add_argument("--account-profile", required=True) + p.add_argument("--workspace-profile", required=True) + p.add_argument("--connection-name", required=True) + p.add_argument("--integration-name", required=True) + p.add_argument("--mcp-host", required=True, + help="https://.cloud.databricksapps.com (no /mcp suffix)") + p.add_argument("--workspace-host", required=True, + help="https://.cloud.databricks.com") + p.add_argument("--supervisor-id", default=None, + help="If set, auto-detach any duplicate app tools and attach " + "the uc_connection tool to this supervisor.") + p.add_argument("--supervisor-tool-id", default=None, + help="tool_id for the new uc_connection registration " + "(default: _uc)") + p.add_argument("--rotate-secret", action="store_true") + p.add_argument("--creds-file", default="/tmp/mcp-uc-client.json", + help="Where to cache the OAuth client_secret (gitignored)") + args = p.parse_args() + + redirect_uri = f"{args.workspace_host.rstrip('/')}/login/oauth/http.html" + creds_file = pathlib.Path(args.creds_file) + tool_id = args.supervisor_tool_id or f"{args.connection_name}_uc" + + print(f"workspace : {args.workspace_host}") + print(f"connection: {args.connection_name}") + print(f"redirect : {redirect_uri}\n") + + print("[1/4] Account-level custom OAuth integration") + ac = AccountClient(profile=args.account_profile) + creds = step1_oauth_integration(ac, args.integration_name, redirect_uri, + args.rotate_secret, creds_file) + print(f" client_id: {creds['client_id']}\n") + + print("[2/4] UC HTTP Connection with is_mcp_connection: true") + w = WorkspaceClient(profile=args.workspace_profile) + step2_uc_connection(w, args.connection_name, creds, + args.mcp_host, args.workspace_host) + print() + + if args.supervisor_id: + print("[3/4] Detach duplicate tool_type=app on supervisor") + step3_detach_app_tools(w, args.supervisor_id) + print() + + print("[4/4] Attach UC connection as supervisor tool") + step4_attach_uc_tool(w, args.supervisor_id, tool_id, args.connection_name) + print() + else: + print("[3-4/4] Skipped supervisor wiring (no --supervisor-id given)\n") + + print("─" * 70) + print("βœ“ MCP server registered in Unity Catalog") + print(f" catalog: {args.workspace_host.rstrip('/')}/explore/connections/{args.connection_name}") + if args.supervisor_id: + print(f" super.: {args.workspace_host.rstrip('/')}/agents/supervisor/{args.supervisor_id}") + print() + print("β†’ ONE HUMAN STEP REMAINS:") + print(f" Open the connection URL above. Click 'Sign in' / 'Authorize'.") + print(f" After consent, every supervisor call for your user_id routes") + print(f" silently through the UC Connection.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/databricks-skills/install_skills.sh b/databricks-skills/install_skills.sh index c3d963c6..225bde2b 100755 --- a/databricks-skills/install_skills.sh +++ b/databricks-skills/install_skills.sh @@ -47,7 +47,7 @@ MLFLOW_REPO_RAW_URL="https://raw.githubusercontent.com/mlflow/skills" MLFLOW_REPO_REF="main" # Databricks skills (hosted in this repo) -DATABRICKS_SKILLS="databricks-agent-bricks databricks-ai-functions databricks-aibi-dashboards databricks-bundles databricks-apps-python databricks-config databricks-dbsql databricks-docs databricks-genie databricks-iceberg databricks-jobs databricks-lakebase-autoscale databricks-lakebase-provisioned databricks-metric-views databricks-mlflow-evaluation databricks-model-serving databricks-python-sdk databricks-execution-compute databricks-spark-declarative-pipelines databricks-spark-structured-streaming databricks-synthetic-data-gen databricks-unity-catalog databricks-unstructured-pdf-generation databricks-vector-search databricks-zerobus-ingest spark-python-data-source" +DATABRICKS_SKILLS="databricks-agent-bricks databricks-ai-functions databricks-aibi-dashboards databricks-bundles databricks-app-python databricks-config databricks-custom-mcp-server databricks-dbsql databricks-docs databricks-genie databricks-iceberg databricks-jobs databricks-lakebase-autoscale databricks-lakebase-provisioned databricks-metric-views databricks-mlflow-evaluation databricks-model-serving databricks-python-sdk databricks-execution-compute databricks-spark-declarative-pipelines databricks-spark-structured-streaming databricks-synthetic-data-gen databricks-unity-catalog databricks-unstructured-pdf-generation databricks-vector-search databricks-zerobus-ingest spark-python-data-source" # MLflow skills (fetched from mlflow/skills repo) MLFLOW_SKILLS="agent-evaluation analyze-mlflow-chat-session analyze-mlflow-trace instrumenting-with-mlflow-tracing mlflow-onboarding querying-mlflow-metrics retrieving-mlflow-traces searching-mlflow-docs" @@ -72,8 +72,9 @@ get_skill_description() { "databricks-aibi-dashboards") echo "Databricks AI/BI Dashboards - create and manage dashboards" ;; "databricks-bundles") echo "Databricks Asset Bundles - deployment and configuration" ;; "databricks-app-apx") echo "Databricks Apps with React/Next.js (APX framework)" ;; - "databricks-apps-python") echo "Databricks Apps with Python (Dash, Streamlit) and foundation model integration" ;; + "databricks-app-python") echo "Databricks Apps with Python (Dash, Streamlit) and foundation model integration" ;; "databricks-config") echo "Profile authentication setup for Databricks" ;; + "databricks-custom-mcp-server") echo "Build, deploy, and govern custom MCP servers on Databricks Apps (FastMCP + UC Connection registration + Supervisor Agent attachment)" ;; "databricks-dbsql") echo "Databricks SQL - SQL scripting, MVs, geospatial, AI functions, federation" ;; "databricks-docs") echo "Documentation reference via llms.txt" ;; "databricks-genie") echo "Genie Spaces - create, curate, and query via Conversation API" ;; @@ -119,7 +120,8 @@ get_skill_extra_files() { "databricks-bundles") echo "alerts_guidance.md SDP_guidance.md" ;; "databricks-iceberg") echo "1-managed-iceberg-tables.md 2-uniform-and-compatibility.md 3-iceberg-rest-catalog.md 4-snowflake-interop.md 5-external-engine-interop.md" ;; "databricks-app-apx") echo "backend-patterns.md best-practices.md frontend-patterns.md" ;; - "databricks-apps-python") echo "1-authorization.md 2-app-resources.md 3-frameworks.md 4-deployment.md 5-lakebase.md 6-mcp-approach.md examples/llm_config.py examples/fm-minimal-chat.py examples/fm-parallel-calls.py examples/fm-structured-outputs.py" ;; + "databricks-app-python") echo "1-authorization.md 2-app-resources.md 3-frameworks.md 4-deployment.md 5-lakebase.md 6-mcp-approach.md examples/llm_config.py examples/fm-minimal-chat.py examples/fm-parallel-calls.py examples/fm-structured-outputs.py" ;; + "databricks-custom-mcp-server") echo "1-build-fastmcp-server.md 2-deploy-as-databricks-app.md 3-register-in-unity-catalog.md 4-attach-to-supervisor-agent.md scripts/register_mcp_in_uc.py" ;; "databricks-jobs") echo "task-types.md triggers-schedules.md notifications-monitoring.md examples.md" ;; "databricks-python-sdk") echo "doc-index.md examples/1-authentication.py examples/2-clusters-and-jobs.py examples/3-sql-and-warehouses.py examples/4-unity-catalog.py examples/5-serving-and-vector-search.py" ;; "databricks-unity-catalog") echo "5-system-tables.md" ;;