From 37e2a5ffb904f8cf9ad3d5c85ccb81a58d83cac6 Mon Sep 17 00:00:00 2001
From: Chris Burns <29541485+ChrisJBurns@users.noreply.github.com>
Date: Wed, 4 Mar 2026 16:25:30 +0000
Subject: [PATCH 1/2] Update OTel docs to match current implementation

Update observability documentation to reflect the current ToolHive
OpenTelemetry implementation, including new semantic convention
attribute names, vMCP metrics, trace context propagation, and the
legacy attributes compatibility flag.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 docs/toolhive/concepts/observability.mdx      | 185 ++++++++++++------
 .../guides-cli/telemetry-and-metrics.mdx      |  32 +--
 .../guides-vmcp/telemetry-and-metrics.mdx     |  55 +++++-
 3 files changed, 195 insertions(+), 77 deletions(-)

diff --git a/docs/toolhive/concepts/observability.mdx b/docs/toolhive/concepts/observability.mdx
index e51a6ab1..a7ccc9dc 100644
--- a/docs/toolhive/concepts/observability.mdx
+++ b/docs/toolhive/concepts/observability.mdx
@@ -31,56 +31,75 @@ Here's what a trace looks like when a client calls a tool in the GitHub MCP
 server (some fields omitted for brevity):
 
 ```text
-Span: mcp.tools/call (150ms)
-├── service.name: toolhive-mcp-proxy
+Span: tools/call create_issue (150ms)
+├── service.name: thv-github
 ├── service.version: v0.1.9
-├── http.duration_ms: 150.3
-├── http.host: localhost:14972
-├── http.method: POST
-├── http.request_content_length: 256
-├── http.response_content_length: 1024
-├── http.status_code: 202
-├── http.url: /messages?session_id=b1d22d07-b35f-4260-9c0c-b872f92f64b1
-├── http.user_agent: claude-code/1.0.53
-├── mcp.method: tools/call
-├── mcp.request.id: 5
+├── http.request.method: POST
+├── http.request.body.size: 256
+├── http.response.status_code: 202
+├── http.response.body.size: 1024
+├── url.full: /messages?session_id=b1d22d07-b35f-4260-9c0c-b872f92f64b1
+├── url.path: /messages
+├── url.scheme: https
+├── server.address: localhost:14972
+├── user_agent.original: claude-code/1.0.53
+├── mcp.method.name: tools/call
 ├── mcp.server.name: github
-├── mcp.tool.arguments: owner=stacklok, repo=toolhive, pullNumber=1131
-├── mcp.tool.name: create_issue
-└── mcp.transport: stdio
+├── mcp.session.id: abc123
+├── rpc.system.name: jsonrpc
+├── jsonrpc.protocol.version: 2.0
+├── jsonrpc.request.id: 5
+├── gen_ai.tool.name: create_issue
+├── gen_ai.operation.name: execute_tool
+├── gen_ai.tool.call.arguments: owner=stacklok, repo=toolhive, pullNumber=1131
+├── network.transport: tcp
+└── network.protocol.name: http
 ```
 
 ### MCP-specific traces
 
 ToolHive automatically captures traces for all MCP operations, including:
 
-- **Tool calls** (`mcp.tools/call`) - When AI assistants use tools
-- **Resource access** (`mcp.resources/read`) - When servers read files or data
-- **Prompt operations** (`mcp.prompts/get`) - When servers retrieve prompts
-- **Connection events** (`mcp.initialize`) - When clients connect to servers
+- **Tool calls** (`tools/call`) - When AI assistants use tools
+- **Resource access** (`resources/read`) - When servers read files or data
+- **Prompt operations** (`prompts/get`) - When servers retrieve prompts
+- **Connection events** (`initialize`) - When clients connect to servers
 
 ### Trace attributes
 
-Each trace includes detailed context across three layers:
+Each trace includes detailed context across several layers:
 
 #### Service information
 
 ```text
-service.name: toolhive-mcp-proxy
+service.name: thv-github
 service.version: v0.1.9
+host.name: my-machine
 ```
 
-#### HTTP layer information
+#### HTTP layer
 
 ```text
-http.duration_ms: 150.3
-http.host: localhost:14972
-http.method: POST
-http.request_content_length: 256
-http.response_content_length: 1024
-http.status_code: 202
-http.url: /messages?session_id=b1d22d07-b35f-4260-9c0c-b872f92f64b1
-http.user_agent: claude-code/1.0.53
+http.request.method: POST
+http.request.body.size: 256
+http.response.status_code: 202
+http.response.body.size: 1024
+url.full: /messages?session_id=b1d22d07-b35f-4260-9c0c-b872f92f64b1
+url.path: /messages
+url.scheme: https
+url.query: session_id=b1d22d07-b35f-4260-9c0c-b872f92f64b1
+server.address: localhost:14972
+user_agent.original: claude-code/1.0.53
+```
+
+#### Network layer
+
+```text
+network.transport: tcp
+network.protocol.name: http
+network.protocol.version: 1.1
+client.address: 127.0.0.1
+client.port: 52431
 ```
 
 #### MCP protocol details
@@ -89,31 +108,41 @@ Details about the MCP operation being performed (some fields are specific to
 each operation):
 
 ```text
-mcp.client.name: claude-code
-mcp.method: tools/call
-mcp.request.id: 123
+mcp.method.name: tools/call
 mcp.server.name: github
-mcp.tool.arguments: owner=stacklok, repo=toolhive, path=pkg/telemetry/middleware.go, start_index=130, max_length=1000
-mcp.tool.name: get_file_contents
-mcp.transport: stdio
-rpc.service: mcp
-rpc.system: jsonrpc
+mcp.session.id: abc123
+mcp.protocol.version: 2025-03-26
+mcp.is_batch: false
+rpc.system.name: jsonrpc
+jsonrpc.protocol.version: 2.0
+jsonrpc.request.id: 123
 ```
 
 #### Method-specific attributes
 
-- **`mcp.tools/call`** traces include:
-  - `mcp.tool.name` - The name of the tool being called
-  - `mcp.tool.arguments` - Sanitized tool arguments (sensitive values redacted)
+- **`tools/call`** traces include:
+  - `gen_ai.tool.name` - The name of the tool being called
+  - `gen_ai.operation.name` - Set to `execute_tool`
+  - `gen_ai.tool.call.arguments` - Sanitized tool arguments (sensitive values
+    redacted)
 
-- **`mcp.resources/read`** traces include:
+- **`resources/read`** traces include:
   - `mcp.resource.uri` - The URI of the resource being accessed
 
-- **`mcp.prompts/get`** traces include:
-  - `mcp.prompt.name` - The name of the prompt being retrieved
+- **`prompts/get`** traces include:
+  - `gen_ai.prompt.name` - The name of the prompt being retrieved
+
+- **`initialize`** traces include:
+  - `mcp.protocol.version` - The MCP protocol version negotiated
 
-- **`mcp.initialize`** traces include:
-  - `mcp.client.name` - The name of the connecting client
+:::note[Legacy attribute names]
+
+By default, ToolHive emits both the new OpenTelemetry semantic convention
+attribute names shown above and legacy attribute names (e.g., `http.method`,
+`mcp.method`, `mcp.tool.name`) for backward compatibility with existing
+dashboards. You can control this with the `--otel-use-legacy-attributes` flag.
+
+:::
 
 ## Metrics collection
 
@@ -171,6 +200,40 @@ toolhive_mcp_tool_calls_total{server="github",status="success",tool="list_pull_r
 toolhive_mcp_tool_calls_total{server="github",status="success",tool="search_issues"} 2
 ```
 
+### MCP semantic convention metrics
+
+In addition to the ToolHive-prefixed metrics above, ToolHive emits metrics that
+follow the
+[OpenTelemetry MCP semantic conventions](https://github.com/open-telemetry/semantic-conventions):
+
+| Metric                          | Type      | Description                              |
+| ------------------------------- | --------- | ---------------------------------------- |
+| `mcp.server.operation.duration` | Histogram | Duration of MCP server operations        |
+| `mcp.client.operation.duration` | Histogram | Duration of MCP client operations (vMCP) |
+
+These metrics use the same labels as the ToolHive-prefixed metrics and are
+compatible with dashboards built for the OpenTelemetry MCP semantic conventions.
+
+### vMCP metrics
+
+When using Virtual MCP Server (vMCP), additional metrics are available for
+monitoring backend operations, workflow executions, and optimizer performance.
+For details, see the
+[vMCP telemetry guide](../guides-vmcp/telemetry-and-metrics.mdx).
+
+## Trace context propagation
+
+ToolHive supports two methods of trace context propagation:
+
+- **HTTP headers**: Standard W3C Trace Context (`traceparent` and `tracestate`
+  headers) and W3C Baggage propagation
+- **MCP `_meta` field**: Trace context embedded in MCP request parameters via
+  the `params._meta` field, following the MCP specification
+
+When both are present, the MCP `_meta` trace context takes priority. This
+enables proper trace correlation across MCP server boundaries, even when MCP
+clients inject trace context into the request payload rather than HTTP headers.
+
 ## Export options
 
 ToolHive supports multiple export formats to integrate with your existing
@@ -217,13 +280,13 @@ ToolHive automatically protects sensitive information in traces:
 For example, a tool call with sensitive arguments:
 
 ```text
-mcp.tool.arguments: password=secret123, api_key=abc456, title=Bug report
+gen_ai.tool.call.arguments: password=secret123, api_key=abc456, title=Bug report
 ```
 
-Is sanitized in the trace as:
+ToolHive sanitizes this in the trace as:
 
 ```text
-mcp.tool.arguments: password=[REDACTED], api_key=[REDACTED], title=Bug report
+gen_ai.tool.call.arguments: password=[REDACTED], api_key=[REDACTED], title=Bug report
 ```
 
 ## Monitoring examples
@@ -254,15 +317,15 @@ When a client calls the `create_issue` tool:
 **Generated trace**:
 
 ```text
-Span: mcp.tools/call
-├── mcp.method: tools/call
-├── mcp.request.id: req_456
-├── mcp.tool.name: create_issue
-├── mcp.tool.arguments: title=Bug report, body=Found an issue with the API
+Span: tools/call create_issue
+├── mcp.method.name: tools/call
+├── jsonrpc.request.id: req_456
+├── gen_ai.tool.name: create_issue
+├── gen_ai.tool.call.arguments: title=Bug report, body=Found an issue with...
 ├── mcp.server.name: github
-├── mcp.transport: sse
-├── http.method: POST
-├── http.status_code: 200
+├── network.transport: tcp
+├── http.request.method: POST
+├── http.response.status_code: 200
 └── duration: 850ms
 ```
 
@@ -281,10 +344,10 @@ Failed requests generate error traces and metrics:
 **Error trace**:
 
 ```text
-Span: mcp.tools/call
-├── mcp.method: tools/call
-├── mcp.tool.name: invalid_tool
-├── http.status_code: 400
+Span: tools/call invalid_tool
+├── mcp.method.name: tools/call
+├── gen_ai.tool.name: invalid_tool
+├── http.response.status_code: 400
 ├── span.status: ERROR
 ├── span.status_message: Tool not found
 └── duration: 12ms
diff --git a/docs/toolhive/guides-cli/telemetry-and-metrics.mdx b/docs/toolhive/guides-cli/telemetry-and-metrics.mdx
index c985b57e..d4bdf78c 100644
--- a/docs/toolhive/guides-cli/telemetry-and-metrics.mdx
+++ b/docs/toolhive/guides-cli/telemetry-and-metrics.mdx
@@ -50,9 +50,9 @@ scheme or path (e.g., use `api.honeycomb.io` or `api.honeycomb.io:443`, not
 
 :::
 
-By default, the service name is set to `toolhive-mcp-proxy`, and the sampling
-rate is `0.1` (10%). You can customize these settings with additional
-[configuration options](#configuration-options).
+By default, the service name is set to `thv-<server-name>` (e.g., `thv-fetch`),
+and the sampling rate is `0.1` (10%). You can customize these settings with
+additional [configuration options](#configuration-options).
 
 :::tip[Recommendation]
 
@@ -218,21 +218,23 @@ thv run [--otel-endpoint <URL>] [--otel-service-name <NAME>] \
   [--otel-sampling-rate <RATE>] [--otel-headers <KEY=VALUE>] \
   [--otel-custom-attributes <KEY=VALUE>] [--otel-env-vars <VAR1,VAR2>] \
   [--otel-insecure] [--otel-enable-prometheus-metrics-path] \
+  [--otel-use-legacy-attributes=<true|false>] \
   <SERVER>
 ```
 
-| Flag                                    | Description                                                   | Default              |
-| --------------------------------------- | ------------------------------------------------------------- | -------------------- |
-| `--otel-endpoint`                       | OTLP endpoint (e.g., `api.honeycomb.io`)                      | None                 |
-| `--otel-metrics-enabled`                | Enable OTLP metrics export (when OTLP endpoint is configured) | `true`               |
-| `--otel-tracing-enabled`                | Enable distributed tracing (when OTLP endpoint is configured) | `true`               |
-| `--otel-service-name`                   | Service name for telemetry                                    | `toolhive-mcp-proxy` |
-| `--otel-sampling-rate`                  | Trace sampling rate (0.0-1.0)                                 | `0.1` (10%)          |
-| `--otel-headers`                        | Authentication headers in `key=value` format                  | None                 |
-| `--otel-custom-attributes`              | Custom resource attributes in `key=value` format              | None                 |
-| `--otel-env-vars`                       | List of environment variables to include in telemetry spans   | None                 |
-| `--otel-insecure`                       | Connect using HTTP instead of HTTPS                           | `false`              |
-| `--otel-enable-prometheus-metrics-path` | Enable `/metrics` endpoint                                    | `false`              |
+| Flag                                    | Description                                                         | Default        |
+| --------------------------------------- | ------------------------------------------------------------------- | -------------- |
+| `--otel-endpoint`                       | OTLP endpoint (e.g., `api.honeycomb.io`)                            | None           |
+| `--otel-metrics-enabled`                | Enable OTLP metrics export (when OTLP endpoint is configured)       | `true`         |
+| `--otel-tracing-enabled`                | Enable distributed tracing (when OTLP endpoint is configured)       | `true`         |
+| `--otel-service-name`                   | Service name for telemetry                                          | `thv-<SERVER>` |
+| `--otel-sampling-rate`                  | Trace sampling rate (0.0-1.0)                                       | `0.1` (10%)    |
+| `--otel-headers`                        | Authentication headers in `key=value` format                        | None           |
+| `--otel-custom-attributes`              | Custom resource attributes in `key=value` format                    | None           |
+| `--otel-env-vars`                       | List of environment variables to include in telemetry spans         | None           |
+| `--otel-insecure`                       | Connect using HTTP instead of HTTPS                                 | `false`        |
+| `--otel-enable-prometheus-metrics-path` | Enable `/metrics` endpoint                                          | `false`        |
+| `--otel-use-legacy-attributes`          | Emit legacy attribute names alongside new OTel semantic conventions | `true`         |
 
 ### Global configuration
 
diff --git a/docs/toolhive/guides-vmcp/telemetry-and-metrics.mdx b/docs/toolhive/guides-vmcp/telemetry-and-metrics.mdx
index 345a0256..bcae2d3a 100644
--- a/docs/toolhive/guides-vmcp/telemetry-and-metrics.mdx
+++ b/docs/toolhive/guides-vmcp/telemetry-and-metrics.mdx
@@ -40,7 +40,7 @@ spec:
       serviceName: 'my-vmcp'
       insecure: true
       tracingEnabled: true
-      samplingRate: '0.1'
+      samplingRate: '0.05'
       metricsEnabled: true
       enablePrometheusMetricsPath: true
     # highlight-end
@@ -114,6 +114,59 @@ vMCP supports two methods for collecting metrics:
 - **Pull via Prometheus**: Set `enablePrometheusMetricsPath: true` to expose a
   `/metrics` endpoint on the vMCP service port (4483) for Prometheus to scrape
 
+### Backend metrics
+
+These metrics track requests to individual MCP server backends:
+
+| Metric                                    | Type      | Description                                              |
+| ----------------------------------------- | --------- | -------------------------------------------------------- |
+| `toolhive_vmcp_backends_discovered`       | Gauge     | Number of backends discovered                            |
+| `toolhive_vmcp_backend_requests`          | Counter   | Total requests per backend                               |
+| `toolhive_vmcp_backend_errors`            | Counter   | Total errors per backend                                 |
+| `toolhive_vmcp_backend_requests_duration` | Histogram | Duration of backend requests                             |
+| `mcp.client.operation.duration`           | Histogram | MCP client operation duration (OTel semantic convention) |
+
+### Workflow metrics
+
+These metrics track workflow execution across backends:
+
+| Metric                              | Type      | Description                     |
+| ----------------------------------- | --------- | ------------------------------- |
+| `toolhive_vmcp_workflow_executions` | Counter   | Total workflow executions       |
+| `toolhive_vmcp_workflow_errors`     | Counter   | Total workflow execution errors |
+| `toolhive_vmcp_workflow_duration`   | Histogram | Duration of workflow executions |
+
+### Optimizer metrics
+
+When the vMCP optimizer is enabled, these metrics track tool-finding and
+tool-calling performance:
+
+| Metric                                          | Type      | Description                             |
+| ----------------------------------------------- | --------- | --------------------------------------- |
+| `toolhive_vmcp_optimizer_find_tool_requests`    | Counter   | Total FindTool calls                    |
+| `toolhive_vmcp_optimizer_find_tool_errors`      | Counter   | Total FindTool errors                   |
+| `toolhive_vmcp_optimizer_find_tool_duration`    | Histogram | Duration of FindTool calls              |
+| `toolhive_vmcp_optimizer_find_tool_results`     | Histogram | Number of tools returned per call       |
+| `toolhive_vmcp_optimizer_token_savings_percent` | Histogram | Token savings percentage per call       |
+| `toolhive_vmcp_optimizer_call_tool_requests`    | Counter   | Total CallTool calls                    |
+| `toolhive_vmcp_optimizer_call_tool_errors`      | Counter   | Total CallTool errors                   |
+| `toolhive_vmcp_optimizer_call_tool_not_found`   | Counter   | CallTool calls where tool was not found |
+| `toolhive_vmcp_optimizer_call_tool_duration`    | Histogram | Duration of CallTool calls              |
+
+## Distributed tracing
+
+vMCP creates client-side spans for backend operations with the following span
+names:
+
+- `tools/call <tool_name>` - Tool calls to backends
+- `resources/read` - Resource reads from backends
+- `prompts/get <prompt_name>` - Prompt retrieval from backends
+- `list_capabilities` - Backend capability discovery
+
+Each span includes attributes for the target backend (`target.workload_id`,
+`target.workload_name`, `target.base_url`) and the relevant MCP attributes
+(`mcp.method.name`, `gen_ai.tool.name`, `mcp.resource.uri`).
+
 ## Related information
 
 - [Observability concepts](../concepts/observability.mdx) - Overview of

From 274446a6f104582e72afcfa0064233b5424ccf68 Mon Sep 17 00:00:00 2001
From: Chris Burns <29541485+ChrisJBurns@users.noreply.github.com>
Date: Tue, 17 Mar 2026 19:17:49 +0000
Subject: [PATCH 2/2] Address PR review feedback on OTel docs

- Remove mcp.is_batch from trace example (only emitted when true)
- Add mcp.client.name to initialize span attributes (always emitted)
- Clarify OTLP vs Prometheus metric naming for MCP semantic conventions
- Document Prometheus-safe name for mcp.client.operation.duration

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 docs/toolhive/concepts/observability.mdx           | 11 ++++++++---
 .../toolhive/guides-vmcp/telemetry-and-metrics.mdx | 14 +++++++-------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/docs/toolhive/concepts/observability.mdx b/docs/toolhive/concepts/observability.mdx
index 6fb551c5..d76a251d 100644
--- a/docs/toolhive/concepts/observability.mdx
+++ b/docs/toolhive/concepts/observability.mdx
@@ -112,7 +112,6 @@ mcp.method.name: tools/call
 mcp.server.name: github
 mcp.session.id: abc123
 mcp.protocol.version: 2025-03-26
-mcp.is_batch: false
 rpc.system.name: jsonrpc
 jsonrpc.protocol.version: 2.0
 jsonrpc.request.id: 123
@@ -133,6 +132,7 @@ jsonrpc.request.id: 123
   - `gen_ai.prompt.name` - The name of the prompt being retrieved
 
 - **`initialize`** traces include:
+  - `mcp.client.name` - The name of the connecting client (always emitted)
   - `mcp.protocol.version` - The MCP protocol version negotiated
 
 :::note[Legacy attribute names]
@@ -211,8 +211,13 @@ follow the
 | `mcp.server.operation.duration` | Histogram | Duration of MCP server operations        |
 | `mcp.client.operation.duration` | Histogram | Duration of MCP client operations (vMCP) |
 
-These metrics use the same labels as the ToolHive-prefixed metrics and are
-compatible with dashboards built for the OpenTelemetry MCP semantic conventions.
+These metric names follow the OpenTelemetry MCP semantic conventions in OTLP
+exports and use the same labels as the ToolHive-prefixed metrics. When exposed
+via the Prometheus `/metrics` endpoint, their names are converted to
+Prometheus-safe form by replacing dots (`.`) with underscores (`_`):
+
+- `mcp.server.operation.duration` → `mcp_server_operation_duration`
+- `mcp.client.operation.duration` → `mcp_client_operation_duration`
 
 ### vMCP metrics
 
diff --git a/docs/toolhive/guides-vmcp/telemetry-and-metrics.mdx b/docs/toolhive/guides-vmcp/telemetry-and-metrics.mdx
index 3919fa02..729f8e12 100644
--- a/docs/toolhive/guides-vmcp/telemetry-and-metrics.mdx
+++ b/docs/toolhive/guides-vmcp/telemetry-and-metrics.mdx
@@ -118,13 +118,13 @@ vMCP supports two methods for collecting metrics:
 
 These metrics track requests to individual MCP server backends:
 
-| Metric                                    | Type      | Description                                              |
-| ----------------------------------------- | --------- | -------------------------------------------------------- |
-| `toolhive_vmcp_backends_discovered`       | Gauge     | Number of backends discovered                            |
-| `toolhive_vmcp_backend_requests`          | Counter   | Total requests per backend                               |
-| `toolhive_vmcp_backend_errors`            | Counter   | Total errors per backend                                 |
-| `toolhive_vmcp_backend_requests_duration` | Histogram | Duration of backend requests                             |
-| `mcp.client.operation.duration`           | Histogram | MCP client operation duration (OTel semantic convention) |
+| Metric                                    | Type      | Description                                                                   |
+| ----------------------------------------- | --------- | ----------------------------------------------------------------------------- |
+| `toolhive_vmcp_backends_discovered`       | Gauge     | Number of backends discovered                                                 |
+| `toolhive_vmcp_backend_requests`          | Counter   | Total requests per backend                                                    |
+| `toolhive_vmcp_backend_errors`            | Counter   | Total errors per backend                                                      |
+| `toolhive_vmcp_backend_requests_duration` | Histogram | Duration of backend requests                                                  |
+| `mcp.client.operation.duration`           | Histogram | MCP client operation duration (`mcp_client_operation_duration` on `/metrics`) |
 
 ### Workflow metrics