EntityProcess · christso · Jun 3, 2026 · Jun 3, 2026
diff --git a/apps/cli/src/commands/eval/commands/run.ts b/apps/cli/src/commands/eval/commands/run.ts
@@ -143,7 +143,7 @@ export const evalRunCommand = command({
     otelBackend: option({
       type: optional(string),
       long: 'otel-backend',
-      description: 'Use a backend preset (langfuse, braintrust, confident)',
+      description: 'Use a backend preset (langfuse, braintrust, confident, phoenix)',
     }),
     otelCaptureContent: flag({
       long: 'otel-capture-content',

diff --git a/apps/cli/src/commands/eval/run-eval.ts b/apps/cli/src/commands/eval/run-eval.ts
@@ -1172,7 +1172,8 @@ export async function runEvalCommand(
       if (options.otelBackend) {
         const preset = OTEL_BACKEND_PRESETS[options.otelBackend];
         if (preset) {
-          endpoint = preset.endpoint;
+          endpoint =
+            typeof preset.endpoint === 'function' ? preset.endpoint(process.env) : preset.endpoint;
           headers = preset.headers(process.env);
         } else {
           console.warn(`Unknown OTel backend preset: ${options.otelBackend}`);

diff --git a/apps/web/src/content/docs/docs/evaluation/running-evals.mdx b/apps/web/src/content/docs/docs/evaluation/running-evals.mdx
@@ -125,7 +125,7 @@ OpenTelemetry-compatible backend.
 Stream traces directly to an observability backend during evaluation using `--export-otel`:
 
 ```bash
-# Use a backend preset (braintrust, langfuse, confident)
+# Use a backend preset (braintrust, langfuse, confident, phoenix)
 agentv eval evals/my-eval.yaml --export-otel --otel-backend braintrust
 
 # Include message content and tool I/O in spans (disabled by default for privacy)
@@ -179,6 +179,22 @@ export LANGFUSE_SECRET_KEY=sk-...
 agentv eval evals/my-eval.yaml --export-otel --otel-backend langfuse --otel-capture-content
 ```
 
+#### Phoenix
+
+```bash
+# Local Phoenix defaults to http://localhost:6006/v1/traces
+agentv eval evals/my-eval.yaml --export-otel --otel-backend phoenix
+
+# Hosted or remote Phoenix
+export PHOENIX_COLLECTOR_ENDPOINT=https://app.phoenix.arize.com/s/my-space
+export PHOENIX_API_KEY=px-...
+export PHOENIX_PROJECT_NAME=agentv-evals
+
+agentv eval evals/my-eval.yaml --export-otel --otel-backend phoenix --otel-capture-content
+```
+
+See [Phoenix](/docs/integrations/phoenix/) for project routing, privacy notes, and the separate repo-local dataset/experiment adapter.
+
 #### Custom OTLP Endpoint
 
 For backends not covered by presets, configure via environment variables:
@@ -400,14 +416,20 @@ Project-local YAML config takes precedence over home/global YAML config. AgentV
 execution:
   verbose: true
   keep_workspaces: false
+  export_otel: true
+  otel_backend: phoenix
   otel_file: .agentv/results/otel-{timestamp}.json
 ```
 
 | Field | CLI equivalent | Type | Default | Description |
 |-------|---------------|------|---------|-------------|
 | `verbose` | `--verbose` | boolean | `false` | Enable verbose logging |
 | `keep_workspaces` | `--keep-workspaces` | boolean | `false` | Always keep temp workspaces after eval |
+| `export_otel` | `--export-otel` | boolean | `false` | Stream traces via OTLP/HTTP |
+| `otel_backend` | `--otel-backend` | string | none | Backend preset: `braintrust`, `langfuse`, `confident`, or `phoenix` |
 | `otel_file` | `--otel-file` | string | none | Write OTLP JSON trace to file |
+| `otel_capture_content` | `--otel-capture-content` | boolean | `false` | Include message and tool content in exported spans |
+| `otel_group_turns` | `--otel-group-turns` | boolean | `false` | Group multi-turn messages under `agentv.turn.N` spans |
 
 ### TypeScript config (`agentv.config.ts`)
 

diff --git a/apps/web/src/content/docs/docs/integrations/phoenix.mdx b/apps/web/src/content/docs/docs/integrations/phoenix.mdx
@@ -0,0 +1,127 @@
+---
+title: Phoenix
+description: Export AgentV traces to Phoenix and understand the repo-local Phoenix adapter
+sidebar:
+  order: 2
+---
+
+AgentV integrates with [Arize Phoenix](https://arize.com/docs/phoenix/) through two separate surfaces:
+
+- **OTLP trace export** from normal `agentv eval` runs. This is the primary supported path for observing AgentV executions in Phoenix.
+- **Repo-local dataset/experiment adapter** in `packages/phoenix-adapter`. This keeps AgentV eval YAML as the source of truth while converting suites into Phoenix dataset and experiment payloads. The adapter is private and intentionally limited while parity work continues.
+
+AgentV scoring remains authoritative for AgentV-authored evals. Phoenix receives traces, run metadata, and adapter experiment artifacts; it does not replace AgentV's YAML loader, target runner, workspace lifecycle, or grader semantics.
+
+## Quick Start: Trace Export
+
+Start Phoenix locally or point AgentV at a hosted Phoenix collector endpoint:
+
+```bash
+# Local Phoenix default: http://localhost:6006
+agentv eval evals/my-eval.yaml --export-otel --otel-backend phoenix
+
+# Hosted or remote Phoenix
+export PHOENIX_COLLECTOR_ENDPOINT=https://app.phoenix.arize.com/s/my-space
+export PHOENIX_API_KEY=px-...
+export PHOENIX_PROJECT_NAME=agentv-evals
+
+agentv eval evals/my-eval.yaml --export-otel --otel-backend phoenix
+```
+
+The `phoenix` preset sends standard OTLP/HTTP traces to `{PHOENIX_COLLECTOR_ENDPOINT}/v1/traces`. If `PHOENIX_COLLECTOR_ENDPOINT` already ends in `/v1/traces`, AgentV uses it as-is. When unset, AgentV defaults to `http://localhost:6006/v1/traces`.
+
+## Environment Variables
+
+| Variable | Required | Description |
+| --- | --- | --- |
+| `PHOENIX_COLLECTOR_ENDPOINT` | no | Phoenix collector base URL or full OTLP traces URL. Defaults to `http://localhost:6006`. |
+| `PHOENIX_API_KEY` | hosted Phoenix | Adds `Authorization: Bearer ...` to OTLP exports. |
+| `PHOENIX_PROJECT_NAME` | no | Adds `x-project-name` for Phoenix project routing. |
+| `PHOENIX_PROJECT` | no | Fallback project name if `PHOENIX_PROJECT_NAME` is unset. |
+| `OTEL_EXPORTER_OTLP_HEADERS` | no | Extra OTLP headers, merged after preset headers. |
+
+Phoenix project routing via `x-project-name` requires Phoenix's OTLP HTTP endpoint support for that header. See Phoenix's [project setup docs](https://arize.com/docs/phoenix/tracing/how-to-tracing/setup-tracing/setup-projects) for the current behavior.
+
+## Config.yaml Alternative
+
+Set default Phoenix export in `.agentv/config.yaml`:
+
+```yaml
+execution:
+  export_otel: true
+  otel_backend: phoenix
+```
+
+Add content capture only when your Phoenix instance is approved to store prompts, outputs, and tool I/O:
+
+```yaml
+execution:
+  export_otel: true
+  otel_backend: phoenix
+  otel_capture_content: true
+```
+
+:::caution[Privacy]
+`--otel-capture-content` sends full message and tool content to Phoenix. Leave it disabled unless the data and Phoenix deployment meet your privacy requirements.
+:::
+
+## What Appears in Phoenix
+
+Each eval test case produces an `agentv.eval` trace with AgentV attributes such as test ID, suite, target, score, duration, token usage, and tool summary. With streaming providers, AgentV also emits model and tool spans. With `--otel-group-turns`, multi-turn eval messages are grouped under `agentv.turn.N` spans.
+
+```bash
+agentv eval evals/my-eval.yaml \
+  --export-otel \
+  --otel-backend phoenix \
+  --otel-group-turns
+```
+
+## Dataset/Experiment Adapter
+
+The repo-local `@agentv/phoenix-adapter` package converts AgentV eval YAML suites into Phoenix dataset payloads and can run Phoenix experiments for adapter verification:
+
+```bash
+bun --filter @agentv/phoenix-adapter phoenix:assert-smoke
+bun --filter @agentv/phoenix-adapter phoenix:dry-run
+```
+
+Use the adapter when you are developing or verifying Phoenix dataset/experiment parity. Use normal `agentv eval --export-otel --otel-backend phoenix` when you want to observe real AgentV eval runs.
+
+Current adapter support is intentionally small:
+
+| Family | Status |
+| --- | --- |
+| `contains`, `regex`, `equals`, `is-json` | Supported by the deterministic adapter |
+| Other deterministic string variants | Planned parity work |
+| `llm-grader`, `rubrics`, `code-grader`, trace and metric graders | Reported as unsupported |
+| Custom/plugin graders | Reported as unsupported by family name |
+
+Unsupported adapter entries stay visible in reports and do not block conversion unless `--fail-on-unsupported` is set. They should not be interpreted as passing scores.
+
+## Integration Contract
+
+- AgentV eval YAML remains the source of truth for test discovery, interpolation, assertion parsing, and metadata.
+- AgentV scoring remains authoritative unless a Phoenix-native evaluator is explicitly proven equivalent and documented.
+- Phoenix is optional observability and experiment infrastructure; it is not required for normal AgentV eval execution.
+- The adapter remains private until real AgentV target execution, deterministic parity, and release expectations are complete.
+
+## Troubleshooting
+
+### Traces do not appear
+
+Verify the collector endpoint and that Phoenix is listening:
+
+```bash
+echo "$PHOENIX_COLLECTOR_ENDPOINT"
+agentv eval evals/my-eval.yaml --export-otel --otel-backend phoenix
+```
+
+For local Phoenix, the preset expects Phoenix at `http://localhost:6006`.
+
+### Hosted Phoenix returns 401 or 403
+
+Check that `PHOENIX_API_KEY` is set and valid for the target Phoenix space.
+
+### Traces appear in the wrong project
+
+Set `PHOENIX_PROJECT_NAME` to the project that should receive the spans. Extra headers in `OTEL_EXPORTER_OTLP_HEADERS` are merged after preset headers, so they can override the preset if needed.