From 229fbbf88673c46d355e4a4ff7ef715f713e2989 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Tue, 2 Jun 2026 12:15:18 +0200
Subject: [PATCH 1/6] feat(phoenix): add AgentV eval adapter

---
 .github/workflows/validate.yml                |   3 +
 bun.lock                                      | 140 +++++++++++++-
 package.json                                  |  15 +-
 packages/phoenix-adapter/.gitignore           |   1 +
 packages/phoenix-adapter/README.md            |  12 ++
 .../phoenix-adapter/docs/e2e-verification.md  |  50 +++++
 .../phoenix-adapter/docs/support-matrix.md    |  23 +++
 packages/phoenix-adapter/package.json         |  32 ++++
 .../phoenix-adapter/src/agentv/discovery.ts   |  39 ++++
 .../phoenix-adapter/src/agentv/load-spec.ts   | 119 ++++++++++++
 packages/phoenix-adapter/src/agentv/path.ts   |  22 +++
 packages/phoenix-adapter/src/agentv/types.ts  |  40 ++++
 packages/phoenix-adapter/src/cli.ts           |  67 +++++++
 .../src/evaluators/deterministic.ts           | 178 ++++++++++++++++++
 .../src/evaluators/registry.ts                | 134 +++++++++++++
 .../phoenix-adapter/src/evaluators/types.ts   |  59 ++++++
 packages/phoenix-adapter/src/index.ts         |  12 ++
 .../phoenix-adapter/src/parity/baselines.ts   |  27 +++
 .../phoenix-adapter/src/parity/compare.ts     |  74 ++++++++
 packages/phoenix-adapter/src/parity/report.ts |  60 ++++++
 packages/phoenix-adapter/src/parity/types.ts  |  25 +++
 .../phoenix-adapter/src/phoenix/datasets.ts   |  29 +++
 packages/phoenix-adapter/src/phoenix/names.ts |  12 ++
 .../src/phoenix/run-experiment.ts             | 174 +++++++++++++++++
 packages/phoenix-adapter/src/phoenix/types.ts |  23 +++
 packages/phoenix-adapter/src/run/options.ts   |   9 +
 packages/phoenix-adapter/src/run/run-suite.ts |  73 +++++++
 .../test/agentv-normalize.test.ts             |  95 ++++++++++
 .../test/evaluators/deterministic.test.ts     |  96 ++++++++++
 .../test/evaluators/registry.test.ts          |  69 +++++++
 packages/phoenix-adapter/test/parity.test.ts  |  41 ++++
 .../test/phoenix-datasets.test.ts             |  35 ++++
 packages/phoenix-adapter/tsconfig.json        |  10 +
 packages/phoenix-adapter/tsup.config.ts       |  12 ++
 34 files changed, 1800 insertions(+), 10 deletions(-)
 create mode 100644 packages/phoenix-adapter/.gitignore
 create mode 100644 packages/phoenix-adapter/README.md
 create mode 100644 packages/phoenix-adapter/docs/e2e-verification.md
 create mode 100644 packages/phoenix-adapter/docs/support-matrix.md
 create mode 100644 packages/phoenix-adapter/package.json
 create mode 100644 packages/phoenix-adapter/src/agentv/discovery.ts
 create mode 100644 packages/phoenix-adapter/src/agentv/load-spec.ts
 create mode 100644 packages/phoenix-adapter/src/agentv/path.ts
 create mode 100644 packages/phoenix-adapter/src/agentv/types.ts
 create mode 100644 packages/phoenix-adapter/src/cli.ts
 create mode 100644 packages/phoenix-adapter/src/evaluators/deterministic.ts
 create mode 100644 packages/phoenix-adapter/src/evaluators/registry.ts
 create mode 100644 packages/phoenix-adapter/src/evaluators/types.ts
 create mode 100644 packages/phoenix-adapter/src/index.ts
 create mode 100644 packages/phoenix-adapter/src/parity/baselines.ts
 create mode 100644 packages/phoenix-adapter/src/parity/compare.ts
 create mode 100644 packages/phoenix-adapter/src/parity/report.ts
 create mode 100644 packages/phoenix-adapter/src/parity/types.ts
 create mode 100644 packages/phoenix-adapter/src/phoenix/datasets.ts
 create mode 100644 packages/phoenix-adapter/src/phoenix/names.ts
 create mode 100644 packages/phoenix-adapter/src/phoenix/run-experiment.ts
 create mode 100644 packages/phoenix-adapter/src/phoenix/types.ts
 create mode 100644 packages/phoenix-adapter/src/run/options.ts
 create mode 100644 packages/phoenix-adapter/src/run/run-suite.ts
 create mode 100644 packages/phoenix-adapter/test/agentv-normalize.test.ts
 create mode 100644 packages/phoenix-adapter/test/evaluators/deterministic.test.ts
 create mode 100644 packages/phoenix-adapter/test/evaluators/registry.test.ts
 create mode 100644 packages/phoenix-adapter/test/parity.test.ts
 create mode 100644 packages/phoenix-adapter/test/phoenix-datasets.test.ts
 create mode 100644 packages/phoenix-adapter/tsconfig.json
 create mode 100644 packages/phoenix-adapter/tsup.config.ts

diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml
index 8f62d5a8a..9ae0fe5e3 100644
--- a/.github/workflows/validate.yml
+++ b/.github/workflows/validate.yml
@@ -51,5 +51,8 @@ jobs:
       - name: Check evals directories have eval files
         run: bun scripts/validate-eval-dirs.ts
 
+      - name: Run Phoenix adapter dry-run smoke
+        run: bun run phoenix:assert-smoke
+
       - name: Validate eval schemas
         run: bun apps/cli/dist/cli.js validate 'examples/features/**/evals/**/*.eval.yaml' 'examples/features/**/*.EVAL.yaml'
diff --git a/bun.lock b/bun.lock
index 9a723e1c0..366959007 100644
--- a/bun.lock
+++ b/bun.lock
@@ -7,6 +7,7 @@
       "devDependencies": {
         "@agentv/core": "workspace:*",
         "@agentv/eval": "workspace:*",
+        "@agentv/phoenix-adapter": "workspace:*",
         "@biomejs/biome": "^1.9.4",
         "@j178/prek": "^0.3.0",
         "@types/bun": "latest",
@@ -20,7 +21,7 @@
     },
     "apps/cli": {
       "name": "agentv",
-      "version": "4.27.0",
+      "version": "4.31.4-next.1",
       "bin": {
         "agentv": "./dist/cli.js",
       },
@@ -84,7 +85,7 @@
     },
     "packages/core": {
       "name": "@agentv/core",
-      "version": "4.27.0",
+      "version": "4.31.4-next.1",
       "dependencies": {
         "@agentclientprotocol/sdk": "^0.14.1",
         "@agentv/eval": "workspace:*",
@@ -120,27 +121,64 @@
     },
     "packages/eval": {
       "name": "@agentv/eval",
-      "version": "4.27.0",
+      "version": "4.31.4-next.1",
       "dependencies": {
         "zod": "^3.23.8",
       },
     },
+    "packages/phoenix-adapter": {
+      "name": "@agentv/phoenix-adapter",
+      "version": "4.31.4-next.1",
+      "dependencies": {
+        "@agentv/core": "workspace:*",
+        "@arizeai/phoenix-client": "6.10.0",
+        "@arizeai/phoenix-evals": "1.0.3",
+      },
+      "devDependencies": {
+        "tsup": "8.3.5",
+        "typescript": "5.8.3",
+      },
+    },
   },
   "packages": {
     "@agentclientprotocol/sdk": ["@agentclientprotocol/sdk@0.14.1", "", { "peerDependencies": { "zod": "^3.25.0 || ^4.0.0" } }, "sha512-b6r3PS3Nly+Wyw9U+0nOr47bV8tfS476EgyEMhoKvJCZLbgqoDFN7DJwkxL88RR0aiOqOYV1ZnESHqb+RmdH8w=="],
 
     "@agentv/core": ["@agentv/core@workspace:packages/core"],
 
+    "@agentv/dashboard": ["@agentv/dashboard@workspace:apps/dashboard"],
+
     "@agentv/eval": ["@agentv/eval@workspace:packages/eval"],
 
-    "@agentv/dashboard": ["@agentv/dashboard@workspace:apps/dashboard"],
+    "@agentv/phoenix-adapter": ["@agentv/phoenix-adapter@workspace:packages/phoenix-adapter"],
 
     "@agentv/web": ["@agentv/web@workspace:apps/web"],
 
+    "@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.122", "", { "dependencies": { "@ai-sdk/provider": "3.0.10", "@ai-sdk/provider-utils": "4.0.27", "@vercel/oidc": "3.2.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-U1k2fk7cSH/tS5CZ3ujROiUCOLFwkzb792OqR/Org8Mfm27dKSIdRZG4ZuJUifT8alUWa61IoaRu4foXKlP5TQ=="],
+
+    "@ai-sdk/provider": ["@ai-sdk/provider@3.0.10", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-Q3BZ27qfpYqnCYGvE3vt+Qi6LGOF9R5Nmzn+9JoM1lCRsD9mYaIhfJLkSunN48nfGXJ6n+XNV0J/XVpqGQl7Dw=="],
+
+    "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.27", "", { "dependencies": { "@ai-sdk/provider": "3.0.10", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.8" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ubkAJ+xODouwtmN1tYlvTPphH1hPOBfZaEQe8U7skGvFAnIRs9PPpsq57bC2+Ky/MB4yzhd6YOsxTAx9sGpazw=="],
+
     "@anthropic-ai/claude-agent-sdk": ["@anthropic-ai/claude-agent-sdk@0.2.49", "", { "optionalDependencies": { "@img/sharp-darwin-arm64": "^0.34.2", "@img/sharp-darwin-x64": "^0.34.2", "@img/sharp-linux-arm": "^0.34.2", "@img/sharp-linux-arm64": "^0.34.2", "@img/sharp-linux-x64": "^0.34.2", "@img/sharp-linuxmusl-arm64": "^0.34.2", "@img/sharp-linuxmusl-x64": "^0.34.2", "@img/sharp-win32-arm64": "^0.34.2", "@img/sharp-win32-x64": "^0.34.2" }, "peerDependencies": { "zod": "^4.0.0" } }, "sha512-3avi409dwuGkPEETpWa0gyJvRMr3b6LxeuW5/sAPCOtLD9WxH9fYltbA5wZoazxTw5mlbXmjDp7JqO1rlmpaIQ=="],
 
     "@anthropic-ai/sdk": ["@anthropic-ai/sdk@0.91.1", "", { "dependencies": { "json-schema-to-ts": "^3.1.1" }, "peerDependencies": { "zod": "^3.25.0 || ^4.0.0" }, "optionalPeers": ["zod"], "bin": { "anthropic-ai-sdk": "bin/cli" } }, "sha512-LAmu761tSN9r66ixvmciswUj/ZC+1Q4iAfpedTfSVLeswRwnY3n2Nb6Tsk+cLPP28aLOPWeMgIuTuCcMC6W/iw=="],
 
+    "@arizeai/openinference-core": ["@arizeai/openinference-core@2.2.0", "", { "dependencies": { "@arizeai/openinference-semantic-conventions": "2.5.0", "@opentelemetry/api": "^1.9.0", "@opentelemetry/core": "^1.25.1" } }, "sha512-Ix1u/nphZj1yHqmyIfeBe2AVfnilTwgtvfXemJxc/6F+4JC7Rks6VMlPCfB8NXvMOhop2IveA6EyxYMkv/PH/A=="],
+
+    "@arizeai/openinference-genai": ["@arizeai/openinference-genai@0.1.10", "", { "dependencies": { "@arizeai/openinference-semantic-conventions": "2.5.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.9.0", "@opentelemetry/semantic-conventions": ">=1.37.0" } }, "sha512-BrkTeZm57FXHZ82C50KU79NJfz/jQseW/4sTdz5IvAbZPBnwEbwua/jspckD1b3vdTS+BGEwkXWTx0fkUilb7A=="],
+
+    "@arizeai/openinference-semantic-conventions": ["@arizeai/openinference-semantic-conventions@2.5.0", "", {}, "sha512-4ZeSwiFX3YxB0WSE6x568wM4PVHiYmz3yiOxic6WGKVrE/KIGggMFP/eqUNQhikBKP68IDV0qiILlZAIYnheAQ=="],
+
+    "@arizeai/openinference-vercel": ["@arizeai/openinference-vercel@2.7.7", "", { "dependencies": { "@arizeai/openinference-core": "2.2.0", "@arizeai/openinference-genai": "0.1.10", "@arizeai/openinference-semantic-conventions": "2.5.0", "@opentelemetry/core": "^1.30.1" }, "peerDependencies": { "@opentelemetry/api": ">=1.7.0 <2.0.0" } }, "sha512-iShJM8C+R959Ei9sUt8/2BE301W5Px06nYKY+7tCMbm7M0MjRPibrjLyZi6n+Hnd8U3zBE4vYXzAIdE0Z6ZTiQ=="],
+
+    "@arizeai/phoenix-client": ["@arizeai/phoenix-client@6.10.0", "", { "dependencies": { "@arizeai/openinference-semantic-conventions": "^2.1.7", "@arizeai/openinference-vercel": "^2.7.0", "@arizeai/phoenix-config": "0.1.4", "@arizeai/phoenix-otel": "1.0.2", "async": "^3.2.6", "openapi-fetch": "^0.17.0", "tiny-invariant": "^1.3.3", "zod": "^4.0.14" }, "peerDependencies": { "@anthropic-ai/sdk": "^0.35.0", "ai": "^6.0.90", "openai": "^6.10.0" }, "optionalPeers": ["@anthropic-ai/sdk", "ai", "openai"] }, "sha512-rKvvHyhBGT5Tksckt3VhDGdPiRoaG/GCgzS64IVCPLHLdQhEHoRd88MJyud8tHgwwQ4/XyJ/4cT43z26CNCPVg=="],
+
+    "@arizeai/phoenix-config": ["@arizeai/phoenix-config@0.1.4", "", {}, "sha512-GBgPCQWW2GIHqsV067Uqc2YLCapQTHWX2wuYQYILAos6m39+sDX4hunP4qUqBixbF8tR5zdybCO8iGRV+tEcBg=="],
+
+    "@arizeai/phoenix-evals": ["@arizeai/phoenix-evals@1.0.3", "", { "dependencies": { "@arizeai/openinference-core": "^2.0.0", "@opentelemetry/api": "^1.9.0", "ai": "^6.0.90", "jsonpath-plus": "^10.3.0", "mustache": "^4.2.0", "zod": "^4.0.14" } }, "sha512-D4u8UVhbWkPXgtUZVlTcBdlLyowPt7yIjebqDxtglnoBnKx6u3pM+8veC3AROQCc29h1HiKzyiMFwqmQMFFNLg=="],
+
+    "@arizeai/phoenix-otel": ["@arizeai/phoenix-otel@1.0.2", "", { "dependencies": { "@arizeai/openinference-core": "^2.0.7", "@arizeai/openinference-semantic-conventions": "^2.1.7", "@arizeai/openinference-vercel": "^2.7.0", "@opentelemetry/api": "^1.9.0", "@opentelemetry/context-async-hooks": "^2.5.1", "@opentelemetry/core": "^1.25.1", "@opentelemetry/exporter-trace-otlp-proto": "^0.205.0", "@opentelemetry/instrumentation": "^0.57.2", "@opentelemetry/resources": "^2.0.0", "@opentelemetry/sdk-trace-base": "^2.5.1", "@opentelemetry/sdk-trace-node": "^2.5.1" } }, "sha512-lYUQN1buHJM+ZGSO9uEKuoihiOqjYcOIxfm1IzqyZ4Fk6o6YKGyEKYt6fgwzoaWr82x+mn9oPUFay7Ff8m0gAw=="],
+
     "@astrojs/compiler": ["@astrojs/compiler@2.13.0", "", {}, "sha512-mqVORhUJViA28fwHYaWmsXSzLO9osbdZ5ImUfxBarqsYdMlPbqAqGJCxsNzvppp1BEzc1mJNjOVvQqeDN8Vspw=="],
 
     "@astrojs/internal-helpers": ["@astrojs/internal-helpers@0.7.5", "", {}, "sha512-vreGnYSSKhAjFJCWAwe/CNhONvoc5lokxtRoZims+0wa3KbHBdPHSSthJsKxPd8d/aic6lWKpRTYGY/hsgK6EA=="],
@@ -477,6 +515,10 @@
 
     "@jridgewell/trace-mapping": ["@jridgewell/trace-mapping@0.3.31", "", { "dependencies": { "@jridgewell/resolve-uri": "^3.1.0", "@jridgewell/sourcemap-codec": "^1.4.14" } }, "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw=="],
 
+    "@jsep-plugin/assignment": ["@jsep-plugin/assignment@1.3.0", "", { "peerDependencies": { "jsep": "^0.4.0||^1.0.0" } }, "sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ=="],
+
+    "@jsep-plugin/regex": ["@jsep-plugin/regex@1.0.4", "", { "peerDependencies": { "jsep": "^0.4.0||^1.0.0" } }, "sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg=="],
+
     "@mdx-js/mdx": ["@mdx-js/mdx@3.1.1", "", { "dependencies": { "@types/estree": "^1.0.0", "@types/estree-jsx": "^1.0.0", "@types/hast": "^3.0.0", "@types/mdx": "^2.0.0", "acorn": "^8.0.0", "collapse-white-space": "^2.0.0", "devlop": "^1.0.0", "estree-util-is-identifier-name": "^3.0.0", "estree-util-scope": "^1.0.0", "estree-walker": "^3.0.0", "hast-util-to-jsx-runtime": "^2.0.0", "markdown-extensions": "^2.0.0", "recma-build-jsx": "^1.0.0", "recma-jsx": "^1.0.0", "recma-stringify": "^1.0.0", "rehype-recma": "^1.0.0", "remark-mdx": "^3.0.0", "remark-parse": "^11.0.0", "remark-rehype": "^11.0.0", "source-map": "^0.7.0", "unified": "^11.0.0", "unist-util-position-from-estree": "^2.0.0", "unist-util-stringify-position": "^4.0.0", "unist-util-visit": "^5.0.0", "vfile": "^6.0.0" } }, "sha512-f6ZO2ifpwAQIpzGWaBQT2TXxPv6z3RBzQKpVftEWN78Vl/YweF1uwussDx8ECAXVtr3Rs89fKyG9YlzUs9DyGQ=="],
 
     "@mistralai/mistralai": ["@mistralai/mistralai@2.2.1", "", { "dependencies": { "ws": "^8.18.0", "zod": "^3.25.0 || ^4.0.0", "zod-to-json-schema": "^3.25.0" } }, "sha512-uKU8CZmL2RzYKmplsU01hii4p3pe4HqJefpWNRWXm1Tcm0Sm4xXfwSLIy4k7ZCPlbETCGcp69E7hZs+WOJ5itQ=="],
@@ -519,6 +561,10 @@
 
     "@opentelemetry/exporter-trace-otlp-http": ["@opentelemetry/exporter-trace-otlp-http@0.212.0", "", { "dependencies": { "@opentelemetry/core": "2.5.1", "@opentelemetry/otlp-exporter-base": "0.212.0", "@opentelemetry/otlp-transformer": "0.212.0", "@opentelemetry/resources": "2.5.1", "@opentelemetry/sdk-trace-base": "2.5.1" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-v/0wMozNoiEPRolzC4YoPo4rAT0q8r7aqdnRw3Nu7IDN0CGFzNQazkfAlBJ6N5y0FYJkban7Aw5WnN73//6YlA=="],
 
+    "@opentelemetry/exporter-trace-otlp-proto": ["@opentelemetry/exporter-trace-otlp-proto@0.205.0", "", { "dependencies": { "@opentelemetry/core": "2.1.0", "@opentelemetry/otlp-exporter-base": "0.205.0", "@opentelemetry/otlp-transformer": "0.205.0", "@opentelemetry/resources": "2.1.0", "@opentelemetry/sdk-trace-base": "2.1.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-bGtFzqiENO2GpJk988mOBMe0MfeNpTQjbLm/LBijas6VRyEDQarUzdBHpFlu89A25k1+BCntdWGsWTa9Ai4FyA=="],
+
+    "@opentelemetry/instrumentation": ["@opentelemetry/instrumentation@0.57.2", "", { "dependencies": { "@opentelemetry/api-logs": "0.57.2", "@types/shimmer": "^1.2.0", "import-in-the-middle": "^1.8.1", "require-in-the-middle": "^7.1.1", "semver": "^7.5.2", "shimmer": "^1.2.1" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-BdBGhQBh8IjZ2oIIX6F2/Q3LKm/FDDKi6ccYKcBTeilh6SNdNKveDOLk73BkSJjQLJk6qe4Yh+hHw1UPhCDdrg=="],
+
     "@opentelemetry/otlp-exporter-base": ["@opentelemetry/otlp-exporter-base@0.212.0", "", { "dependencies": { "@opentelemetry/core": "2.5.1", "@opentelemetry/otlp-transformer": "0.212.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-HoMv5pQlzbuxiMS0hN7oiUtg8RsJR5T7EhZccumIWxYfNo/f4wFc7LPDfFK6oHdG2JF/+qTocfqIHoom+7kLpw=="],
 
     "@opentelemetry/otlp-transformer": ["@opentelemetry/otlp-transformer@0.212.0", "", { "dependencies": { "@opentelemetry/api-logs": "0.212.0", "@opentelemetry/core": "2.5.1", "@opentelemetry/resources": "2.5.1", "@opentelemetry/sdk-logs": "0.212.0", "@opentelemetry/sdk-metrics": "2.5.1", "@opentelemetry/sdk-trace-base": "2.5.1", "protobufjs": "8.0.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-bj7zYFOg6Db7NUwsRZQ/WoVXpAf41WY2gsd3kShSfdpZQDRKHWJiRZIg7A8HvWsf97wb05rMFzPbmSHyjEl9tw=="],
@@ -847,6 +893,8 @@
 
     "@types/semver": ["@types/semver@7.7.1", "", {}, "sha512-FmgJfu+MOcQ370SD0ev7EI8TlCAfKYU+B4m5T3yXc1CiRN94g/SZPtsCkk506aUDtlMnFZvasDwHHUcZUEaYuA=="],
 
+    "@types/shimmer": ["@types/shimmer@1.2.0", "", {}, "sha512-UE7oxhQLLd9gub6JKIAhDq06T0F6FnztwMNRvYgjeQSBeMc1ZG/tA47EwfduvkuQS8apbkM/lpLpWsaCeYsXVg=="],
+
     "@types/trusted-types": ["@types/trusted-types@2.0.7", "", {}, "sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw=="],
 
     "@types/unist": ["@types/unist@3.0.3", "", {}, "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q=="],
@@ -855,16 +903,22 @@
 
     "@ungap/structured-clone": ["@ungap/structured-clone@1.3.0", "", {}, "sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g=="],
 
+    "@vercel/oidc": ["@vercel/oidc@3.2.0", "", {}, "sha512-UycprH3T6n3jH0k44NHMa7pnFHGu/N05MjojYr+Mc6I7obkoLIJujSWwin1pCvdy/eOxrI/l3uDLQsmcrOb4ug=="],
+
     "@vitejs/plugin-react": ["@vitejs/plugin-react@4.7.0", "", { "dependencies": { "@babel/core": "^7.28.0", "@babel/plugin-transform-react-jsx-self": "^7.27.1", "@babel/plugin-transform-react-jsx-source": "^7.27.1", "@rolldown/pluginutils": "1.0.0-beta.27", "@types/babel__core": "^7.20.5", "react-refresh": "^0.17.0" }, "peerDependencies": { "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" } }, "sha512-gUu9hwfWvvEDBBmgtAowQCojwZmJ5mcLn3aufeCsitijs3+f2NsrPtlAWIR6OPiqljl96GVCUbLe0HyqIpVaoA=="],
 
     "acorn": ["acorn@8.15.0", "", { "bin": { "acorn": "bin/acorn" } }, "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg=="],
 
+    "acorn-import-attributes": ["acorn-import-attributes@1.9.5", "", { "peerDependencies": { "acorn": "^8" } }, "sha512-n02Vykv5uA3eHGM/Z2dQrcD56kL8TyDb2p1+0P83PClMnC/nc+anbQRhIOWnSq4Ke/KvDPrY3C9hDtC/A3eHnQ=="],
+
     "acorn-jsx": ["acorn-jsx@5.3.2", "", { "peerDependencies": { "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" } }, "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ=="],
 
     "agent-base": ["agent-base@7.1.4", "", {}, "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ=="],
 
     "agentv": ["agentv@workspace:apps/cli"],
 
+    "ai": ["ai@6.0.194", "", { "dependencies": { "@ai-sdk/gateway": "3.0.122", "@ai-sdk/provider": "3.0.10", "@ai-sdk/provider-utils": "4.0.27", "@opentelemetry/api": "^1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-0MkYqrSZZuC1zTECppcaUT0i54aocXpYaUMVue3V8z/weBHCytfO5/CcwZCU80msZpfkbBUKYSSrkZFotEO5wQ=="],
+
     "ansi-align": ["ansi-align@3.0.1", "", { "dependencies": { "string-width": "^4.1.0" } }, "sha512-IOfwwBF5iczOjp/WeY4YxyjqAFMQoZufdQWDd19SEExbVLNXqvpzSJ/M7Za4/sCPmQ0+GRquoA7bGcINcxew6w=="],
 
     "ansi-regex": ["ansi-regex@6.2.2", "", {}, "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg=="],
@@ -893,6 +947,8 @@
 
     "astro-expressive-code": ["astro-expressive-code@0.41.6", "", { "dependencies": { "rehype-expressive-code": "^0.41.6" }, "peerDependencies": { "astro": "^4.0.0-beta || ^5.0.0-beta || ^3.3.0 || ^6.0.0-beta" } }, "sha512-l47tb1uhmVIebHUkw+HEPtU/av0G4O8Q34g2cbkPvC7/e9ZhANcjUUciKt9Hp6gSVDdIuXBBLwJQn2LkeGMOAw=="],
 
+    "async": ["async@3.2.6", "", {}, "sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA=="],
+
     "async-mutex": ["async-mutex@0.5.0", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-1A94B18jkJ3DYq284ohPxoXbfTA5HsQ7/Mf4DEhcyLx3Bz27Rh59iScbB6EPiP+B+joue6YCxcMXSbFC1tZKwA=="],
 
     "asynckit": ["asynckit@0.4.0", "", {}, "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="],
@@ -965,6 +1021,8 @@
 
     "ci-info": ["ci-info@4.4.0", "", {}, "sha512-77PSwercCZU2Fc4sX94eF8k8Pxte6JAwL4/ICZLFjJLqegs7kCuAsqqj/70NQF6TvDpgFjkubQB2FW2ZZddvQg=="],
 
+    "cjs-module-lexer": ["cjs-module-lexer@1.4.3", "", {}, "sha512-9z8TZaGM1pfswYeXrUpzPrkx8UnWYdhJclsiYMm6x/w5+nN+8Tf/LnAgfLGQCm59qAOxU8WwHEq2vNwF6i4j+Q=="],
+
     "cli-boxes": ["cli-boxes@3.0.0", "", {}, "sha512-/lzGpEWL/8PfI0BmBOPRwp0c/wFNX1RdUML3jK/RcSBA9T8mZDdQpqYBKtCFTOfQbwPqWEOpjqW+Fnayc0969g=="],
 
     "cli-width": ["cli-width@4.1.0", "", {}, "sha512-ouuZd4/dm2Sw5Gmqy6bGyNNNe1qt9RpmxveLSO7KcgsTnU7RXfsw+/bukWGo1abgBiMAic068rclZsO4IWmmxQ=="],
@@ -1145,6 +1203,8 @@
 
     "eventemitter3": ["eventemitter3@5.0.4", "", {}, "sha512-mlsTRyGaPBjPedk6Bvw+aqbsXDtoAyAzm5MO7JgU+yVRyMQ5O8bD4Kcci7BS85f93veegeCPkL8R4GLClnjLFw=="],
 
+    "eventsource-parser": ["eventsource-parser@3.1.0", "", {}, "sha512-kJezFj9YFAMLeORyi7aCLxLbD5/qWMQnoMVlVPyHIll7lgRJCc3JVln9Vgl9nwQi0YkMnhdGTMNn7CkRRAptMg=="],
+
     "execa": ["execa@9.6.1", "", { "dependencies": { "@sindresorhus/merge-streams": "^4.0.0", "cross-spawn": "^7.0.6", "figures": "^6.1.0", "get-stream": "^9.0.0", "human-signals": "^8.0.1", "is-plain-obj": "^4.1.0", "is-stream": "^4.0.1", "npm-run-path": "^6.0.0", "pretty-ms": "^9.2.0", "signal-exit": "^4.1.0", "strip-final-newline": "^4.0.0", "yoctocolors": "^2.1.1" } }, "sha512-9Be3ZoN4LmYR90tUoVu2te2BsbzHfhJyfEiAVfz7N5/zv+jduIfLrV2xdQXOHbaD6KgpGdO9PRPM1Y4Q9QkPkA=="],
 
     "expressive-code": ["expressive-code@0.41.6", "", { "dependencies": { "@expressive-code/core": "^0.41.6", "@expressive-code/plugin-frames": "^0.41.6", "@expressive-code/plugin-shiki": "^0.41.6", "@expressive-code/plugin-text-markers": "^0.41.6" } }, "sha512-W/5+IQbrpCIM5KGLjO35wlp1NCwDOOVQb+PAvzEoGkW1xjGM807ZGfBKptNWH6UECvt6qgmLyWolCMYKh7eQmA=="],
@@ -1291,6 +1351,8 @@
 
     "immer": ["immer@10.2.0", "", {}, "sha512-d/+XTN3zfODyjr89gM3mPq1WNX2B8pYsu7eORitdwyA2sBubnTl3laYlBk4sXY5FUa5qTZGBDPJICVbvqzjlbw=="],
 
+    "import-in-the-middle": ["import-in-the-middle@1.15.0", "", { "dependencies": { "acorn": "^8.14.0", "acorn-import-attributes": "^1.9.5", "cjs-module-lexer": "^1.2.2", "module-details-from-path": "^1.0.3" } }, "sha512-bpQy+CrsRmYmoPMAE/0G33iwRqwW4ouqdRg8jgbH3aKuCtOc8lxgmYXg2dMM92CRiGP660EtBcymH/eVUpCSaA=="],
+
     "import-meta-resolve": ["import-meta-resolve@4.2.0", "", {}, "sha512-Iqv2fzaTQN28s/FwZAoFq0ZSs/7hMAHJVX+w8PZl3cY19Pxk6jFFalxQoIfW2826i/fDLXv8IiEZRIT0lDuWcg=="],
 
     "inline-style-parser": ["inline-style-parser@0.2.7", "", {}, "sha512-Nb2ctOyNR8DqQoR0OwRG95uNWIC0C1lCgf5Naz5H6Ji72KZ8OcFZLz2P5sNgwlyoJ8Yif11oMuYs5pBQa86csA=="],
@@ -1307,6 +1369,8 @@
 
     "is-binary-path": ["is-binary-path@2.1.0", "", { "dependencies": { "binary-extensions": "^2.0.0" } }, "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw=="],
 
+    "is-core-module": ["is-core-module@2.16.2", "", { "dependencies": { "hasown": "^2.0.3" } }, "sha512-evOr8xfXKxE6qSR0hSXL2r3sd7ALj8+7jQEUvPYcm5sgZFdJ+AYzT6yNmJenvIYQBgIGwfwz08sL8zoL7yq2BA=="],
+
     "is-decimal": ["is-decimal@2.0.1", "", {}, "sha512-AAB9hiomQs5DXWcRB1rqsxGUstbRroFOPPVAomNk/3XHR5JyEZChOyTWe2oayKnsSsr/kcGqF+z6yuH6HHpN0A=="],
 
     "is-docker": ["is-docker@3.0.0", "", { "bin": { "is-docker": "cli.js" } }, "sha512-eljcgEDlEns/7AXFosB5K/2nCM4P7FQPkGc/DWLy5rmFEWvZayGrik1d9/QIY5nJ4f9YsVvBkA6kJpHn9rISdQ=="],
@@ -1343,14 +1407,20 @@
 
     "js-yaml": ["js-yaml@4.1.1", "", { "dependencies": { "argparse": "^2.0.1" }, "bin": { "js-yaml": "bin/js-yaml.js" } }, "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA=="],
 
+    "jsep": ["jsep@1.4.0", "", {}, "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw=="],
+
     "jsesc": ["jsesc@3.1.0", "", { "bin": { "jsesc": "bin/jsesc" } }, "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA=="],
 
     "json-bigint": ["json-bigint@1.0.0", "", { "dependencies": { "bignumber.js": "^9.0.0" } }, "sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ=="],
 
+    "json-schema": ["json-schema@0.4.0", "", {}, "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="],
+
     "json-schema-to-ts": ["json-schema-to-ts@3.1.1", "", { "dependencies": { "@babel/runtime": "^7.18.3", "ts-algebra": "^2.0.0" } }, "sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g=="],
 
     "json5": ["json5@2.2.3", "", { "bin": { "json5": "lib/cli.js" } }, "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg=="],
 
+    "jsonpath-plus": ["jsonpath-plus@10.4.0", "", { "dependencies": { "@jsep-plugin/assignment": "^1.3.0", "@jsep-plugin/regex": "^1.0.4", "jsep": "^1.4.0" }, "bin": { "jsonpath": "bin/jsonpath-cli.js", "jsonpath-plus": "bin/jsonpath-cli.js" } }, "sha512-T92WWatJXmhBbKsgH/0hl+jxjdXrifi5IKeMY02DWggRxX0UElcbVzPlmgLTbvsPeW1PasQ6xE2Q75stkhGbsA=="],
+
     "jwa": ["jwa@2.0.1", "", { "dependencies": { "buffer-equal-constant-time": "^1.0.1", "ecdsa-sig-formatter": "1.0.11", "safe-buffer": "^5.0.1" } }, "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg=="],
 
     "jws": ["jws@4.0.1", "", { "dependencies": { "jwa": "^2.0.1", "safe-buffer": "^5.0.1" } }, "sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA=="],
@@ -1531,12 +1601,16 @@
 
     "minipass": ["minipass@7.1.2", "", {}, "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw=="],
 
+    "module-details-from-path": ["module-details-from-path@1.0.4", "", {}, "sha512-EGWKgxALGMgzvxYF1UyGTy0HXX/2vHLkw6+NvDKW2jypWbHpjQuj4UMcqQWXHERJhVGKikolT06G3bcKe4fi7w=="],
+
     "monaco-editor": ["monaco-editor@0.55.1", "", { "dependencies": { "dompurify": "3.2.7", "marked": "14.0.0" } }, "sha512-jz4x+TJNFHwHtwuV9vA9rMujcZRb0CEilTEwG2rRSpe/A7Jdkuj8xPKttCgOh+v/lkHy7HsZ64oj+q3xoAFl9A=="],
 
     "mrmime": ["mrmime@2.0.1", "", {}, "sha512-Y3wQdFg2Va6etvQ5I82yUhGdsKrcYox6p7FfL1LbK2J4V01F9TGlepTIhnK24t7koZibmg82KGglhA1XK5IsLQ=="],
 
     "ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="],
 
+    "mustache": ["mustache@4.2.0", "", { "bin": { "mustache": "bin/mustache" } }, "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ=="],
+
     "mute-stream": ["mute-stream@3.0.0", "", {}, "sha512-dkEJPVvun4FryqBmZ5KhDo0K9iDXAwn08tMLDinNdRBNPcYEDiWYysLcc6k3mjTMlbP9KyylvRpd4wFtwrT9rw=="],
 
     "mz": ["mz@2.7.0", "", { "dependencies": { "any-promise": "^1.0.0", "object-assign": "^4.0.1", "thenify-all": "^1.0.0" } }, "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q=="],
@@ -1577,6 +1651,10 @@
 
     "openai": ["openai@6.26.0", "", { "peerDependencies": { "ws": "^8.18.0", "zod": "^3.25 || ^4.0" }, "optionalPeers": ["ws", "zod"], "bin": { "openai": "bin/cli" } }, "sha512-zd23dbWTjiJ6sSAX6s0HrCZi41JwTA1bQVs0wLQPZ2/5o2gxOJA5wh7yOAUgwYybfhDXyhwlpeQf7Mlgx8EOCA=="],
 
+    "openapi-fetch": ["openapi-fetch@0.17.0", "", { "dependencies": { "openapi-typescript-helpers": "^0.1.0" } }, "sha512-PsbZR1wAPcG91eEthKhN+Zn92FMHxv+/faECIwjXdxfTODGSGegYv0sc1Olz+HYPvKOuoXfp+0pA2XVt2cI0Ig=="],
+
+    "openapi-typescript-helpers": ["openapi-typescript-helpers@0.1.0", "", {}, "sha512-OKTGPthhivLw/fHz6c3OPtg72vi86qaMlqbJuVJ23qOvQ+53uw1n7HdmkJFibloF7QEjDrDkzJiOJuockM/ljw=="],
+
     "p-limit": ["p-limit@6.2.0", "", { "dependencies": { "yocto-queue": "^1.1.1" } }, "sha512-kuUqqHNUqoIWp/c467RI4X6mmyuojY5jGutNU0wVTmEOOfcuwLqyMVoAi9MKi2Ak+5i9+nhmrK4ufZE8069kHA=="],
 
     "p-queue": ["p-queue@8.1.1", "", { "dependencies": { "eventemitter3": "^5.0.1", "p-timeout": "^6.1.2" } }, "sha512-aNZ+VfjobsWryoiPnEApGGmf5WmNsCo9xu8dfaYamG5qaLP7ClhLN6NgsFe6SwJ2UbLEBK5dv9x8Mn5+RVhMWQ=="],
@@ -1609,6 +1687,8 @@
 
     "path-key": ["path-key@3.1.1", "", {}, "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q=="],
 
+    "path-parse": ["path-parse@1.0.7", "", {}, "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw=="],
+
     "path-scurry": ["path-scurry@2.0.1", "", { "dependencies": { "lru-cache": "^11.0.0", "minipass": "^7.1.2" } }, "sha512-oWyT4gICAu+kaA7QWk/jvCHWarMKNs6pXOGWKDTr7cw4IGcUbW+PeTfbaQiLGheFRpjo6O9J0PmyMfQPjH71oA=="],
 
     "pathe": ["pathe@2.0.3", "", {}, "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w=="],
@@ -1713,8 +1793,12 @@
 
     "remark-stringify": ["remark-stringify@11.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-to-markdown": "^2.0.0", "unified": "^11.0.0" } }, "sha512-1OSmLd3awB/t8qdoEOMazZkNsfVTeY4fTsgzcQFdXNq8ToTN4ZGwrMnlda4K6smTFKD+GRV6O48i6Z4iKgPPpw=="],
 
+    "require-in-the-middle": ["require-in-the-middle@7.5.2", "", { "dependencies": { "debug": "^4.3.5", "module-details-from-path": "^1.0.3", "resolve": "^1.22.8" } }, "sha512-gAZ+kLqBdHarXB64XpAe2VCjB7rIRv+mU8tfRWziHRJ5umKsIHN2tLLv6EtMw7WCdP19S0ERVMldNvxYCHnhSQ=="],
+
     "reselect": ["reselect@5.1.1", "", {}, "sha512-K/BG6eIky/SBpzfHZv/dd+9JBFiS4SWV7FIujVyJRux6e45+73RaUHXLmIR1f7WOMaQ0U1km6qwklRQxpJJY0w=="],
 
+    "resolve": ["resolve@1.22.12", "", { "dependencies": { "es-errors": "^1.3.0", "is-core-module": "^2.16.1", "path-parse": "^1.0.7", "supports-preserve-symlinks-flag": "^1.0.0" }, "bin": { "resolve": "bin/resolve" } }, "sha512-TyeJ1zif53BPfHootBGwPRYT1RUt6oGWsaQr8UyZW/eAm9bKoijtvruSDEmZHm92CwS9nj7/fWttqPCgzep8CA=="],
+
     "resolve-from": ["resolve-from@5.0.0", "", {}, "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw=="],
 
     "resolve-pkg-maps": ["resolve-pkg-maps@1.0.0", "", {}, "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw=="],
@@ -1759,6 +1843,8 @@
 
     "shiki": ["shiki@3.22.0", "", { "dependencies": { "@shikijs/core": "3.22.0", "@shikijs/engine-javascript": "3.22.0", "@shikijs/engine-oniguruma": "3.22.0", "@shikijs/langs": "3.22.0", "@shikijs/themes": "3.22.0", "@shikijs/types": "3.22.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4" } }, "sha512-LBnhsoYEe0Eou4e1VgJACes+O6S6QC0w71fCSp5Oya79inkwkm15gQ1UF6VtQ8j/taMDh79hAB49WUk8ALQW3g=="],
 
+    "shimmer": ["shimmer@1.2.1", "", {}, "sha512-sQTKC1Re/rM6XyFM6fIAGHRPVGvyXfgzIDvzoq608vM+jeyVD0Tu1E6Np0Kc2zAIFWIj963V2800iF/9LPieQw=="],
+
     "signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="],
 
     "sisteransi": ["sisteransi@1.0.5", "", {}, "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg=="],
@@ -1799,6 +1885,8 @@
 
     "sucrase": ["sucrase@3.35.1", "", { "dependencies": { "@jridgewell/gen-mapping": "^0.3.2", "commander": "^4.0.0", "lines-and-columns": "^1.1.6", "mz": "^2.7.0", "pirates": "^4.0.1", "tinyglobby": "^0.2.11", "ts-interface-checker": "^0.1.9" }, "bin": { "sucrase": "bin/sucrase", "sucrase-node": "bin/sucrase-node" } }, "sha512-DhuTmvZWux4H1UOnWMB3sk0sbaCVOoQZjv8u1rDoTV0HTdGem9hkAZtl4JZy8P2z4Bg0nT+YMeOFyVr4zcG5Tw=="],
 
+    "supports-preserve-symlinks-flag": ["supports-preserve-symlinks-flag@1.0.0", "", {}, "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w=="],
+
     "svgo": ["svgo@4.0.0", "", { "dependencies": { "commander": "^11.1.0", "css-select": "^5.1.0", "css-tree": "^3.0.1", "css-what": "^6.1.0", "csso": "^5.0.5", "picocolors": "^1.1.1", "sax": "^1.4.1" }, "bin": "./bin/svgo.js" }, "sha512-VvrHQ+9uniE+Mvx3+C9IEe/lWasXCU0nXMY2kZeLrHNICuRiC8uMPyM14UEaMOFA5mhyQqEkB02VoQ16n3DLaw=="],
 
     "tailwindcss": ["tailwindcss@4.2.2", "", {}, "sha512-KWBIxs1Xb6NoLdMVqhbhgwZf2PGBpPEiwOqgI4pFIYbNTfBXiKYyWoTsXgBQ9WFg/OlhnvHaY+AEpW7wSmFo2Q=="],
@@ -1953,6 +2041,16 @@
 
     "@anthropic-ai/claude-agent-sdk/zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="],
 
+    "@arizeai/openinference-core/@opentelemetry/core": ["@opentelemetry/core@1.30.1", "", { "dependencies": { "@opentelemetry/semantic-conventions": "1.28.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.0.0 <1.10.0" } }, "sha512-OOCM2C/QIURhJMuKaekP3TRBxBKxG/TWWA0TL2J6nXUtDnuCtccy49LUJF8xPFXMX+0LMcxFpCo8M9cGY1W6rQ=="],
+
+    "@arizeai/openinference-vercel/@opentelemetry/core": ["@opentelemetry/core@1.30.1", "", { "dependencies": { "@opentelemetry/semantic-conventions": "1.28.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.0.0 <1.10.0" } }, "sha512-OOCM2C/QIURhJMuKaekP3TRBxBKxG/TWWA0TL2J6nXUtDnuCtccy49LUJF8xPFXMX+0LMcxFpCo8M9cGY1W6rQ=="],
+
+    "@arizeai/phoenix-client/zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="],
+
+    "@arizeai/phoenix-evals/zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="],
+
+    "@arizeai/phoenix-otel/@opentelemetry/core": ["@opentelemetry/core@1.30.1", "", { "dependencies": { "@opentelemetry/semantic-conventions": "1.28.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.0.0 <1.10.0" } }, "sha512-OOCM2C/QIURhJMuKaekP3TRBxBKxG/TWWA0TL2J6nXUtDnuCtccy49LUJF8xPFXMX+0LMcxFpCo8M9cGY1W6rQ=="],
+
     "@astrojs/mdx/source-map": ["source-map@0.7.6", "", {}, "sha512-i5uvt8C3ikiWeNZSVZNWcfZPItFQOsYTUAOkcUPGd8DqDy1uOUikjt5dG+uRlwyvR108Fb9DOd4GvXfT0N2/uQ=="],
 
     "@aws-crypto/sha256-browser/@smithy/util-utf8": ["@smithy/util-utf8@2.3.0", "", { "dependencies": { "@smithy/util-buffer-from": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A=="],
@@ -1987,6 +2085,18 @@
 
     "@mistralai/mistralai/zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="],
 
+    "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/core": ["@opentelemetry/core@2.1.0", "", { "dependencies": { "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.0.0 <1.10.0" } }, "sha512-RMEtHsxJs/GiHHxYT58IY57UXAQTuUnZVco6ymDEqTNlJKTimM4qPUPVe8InNFyBjhHBEAx4k3Q8LtNayBsbUQ=="],
+
+    "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-exporter-base": ["@opentelemetry/otlp-exporter-base@0.205.0", "", { "dependencies": { "@opentelemetry/core": "2.1.0", "@opentelemetry/otlp-transformer": "0.205.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-2MN0C1IiKyo34M6NZzD6P9Nv9Dfuz3OJ3rkZwzFmF6xzjDfqqCTatc9v1EpNfaP55iDOCLHFyYNCgs61FFgtUQ=="],
+
+    "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-transformer": ["@opentelemetry/otlp-transformer@0.205.0", "", { "dependencies": { "@opentelemetry/api-logs": "0.205.0", "@opentelemetry/core": "2.1.0", "@opentelemetry/resources": "2.1.0", "@opentelemetry/sdk-logs": "0.205.0", "@opentelemetry/sdk-metrics": "2.1.0", "@opentelemetry/sdk-trace-base": "2.1.0", "protobufjs": "^7.3.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-KmObgqPtk9k/XTlWPJHdMbGCylRAmMJNXIRh6VYJmvlRDMfe+DonH41G7eenG8t4FXn3fxOGh14o/WiMRR6vPg=="],
+
+    "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/resources": ["@opentelemetry/resources@2.1.0", "", { "dependencies": { "@opentelemetry/core": "2.1.0", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, "sha512-1CJjf3LCvoefUOgegxi8h6r4B/wLSzInyhGP2UmIBYNlo4Qk5CZ73e1eEyWmfXvFtm1ybkmfb2DqWvspsYLrWw=="],
+
+    "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/sdk-trace-base": ["@opentelemetry/sdk-trace-base@2.1.0", "", { "dependencies": { "@opentelemetry/core": "2.1.0", "@opentelemetry/resources": "2.1.0", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, "sha512-uTX9FBlVQm4S2gVQO1sb5qyBLq/FPjbp+tmGoxu4tIgtYGmBYB44+KX/725RFDe30yBSaA9Ml9fqphe1hbUyLQ=="],
+
+    "@opentelemetry/instrumentation/@opentelemetry/api-logs": ["@opentelemetry/api-logs@0.57.2", "", { "dependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-uIX52NnTM0iBh84MShlpouI7UKqkZ7MrUszTmaypHBu4r7NofznSnQRfJ+uUeDtQDj6w8eFGg5KBLDAwAPz1+A=="],
+
     "@reduxjs/toolkit/immer": ["immer@11.1.4", "", {}, "sha512-XREFCPo6ksxVzP4E0ekD5aMdf8WMwmdNaz6vuvxgI40UaEiu6q3p8X52aU6GdyvLY3XXX/8R7JOTXStz/nBbRw=="],
 
     "@rollup/pluginutils/estree-walker": ["estree-walker@2.0.2", "", {}, "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w=="],
@@ -2043,6 +2153,8 @@
 
     "h3/cookie-es": ["cookie-es@1.2.2", "", {}, "sha512-+W7VmiVINB+ywl1HGXJXmrqkOhpKrIiVZV6tQuV54ZyQC7MMuBt81Vc336GMLoHBq5hV/F9eXgt5Mnx0Rha5Fg=="],
 
+    "is-core-module/hasown": ["hasown@2.0.4", "", { "dependencies": { "function-bind": "^1.1.2" } }, "sha512-T2UbfbBEF32wiepXIsMlTW9+dDYC6wMh/t/vYA4tuOMKqWz/n3vr1NFSxQiyP+zk2mXsoMA/i/7qV6LKut1t1A=="],
+
     "magicast/@babel/parser": ["@babel/parser@7.28.6", "", { "dependencies": { "@babel/types": "^7.28.6" }, "bin": "./bin/babel-parser.js" }, "sha512-TeR9zWR18BvbfPmGbLampPMW+uW1NZnJlRuuHso8i87QZNq2JRF9i6RgxRqtEq+wQGsS19NNTWr2duhnE49mfQ=="],
 
     "node-fetch/data-uri-to-buffer": ["data-uri-to-buffer@4.0.1", "", {}, "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A=="],
@@ -2075,6 +2187,12 @@
 
     "vite/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="],
 
+    "@arizeai/openinference-core/@opentelemetry/core/@opentelemetry/semantic-conventions": ["@opentelemetry/semantic-conventions@1.28.0", "", {}, "sha512-lp4qAiMTD4sNWW4DbKLBkfiMZ4jbAboJIGOQr5DvciMRI494OapieI9qiODpOt0XBr1LjIDy1xAGAnVs5supTA=="],
+
+    "@arizeai/openinference-vercel/@opentelemetry/core/@opentelemetry/semantic-conventions": ["@opentelemetry/semantic-conventions@1.28.0", "", {}, "sha512-lp4qAiMTD4sNWW4DbKLBkfiMZ4jbAboJIGOQr5DvciMRI494OapieI9qiODpOt0XBr1LjIDy1xAGAnVs5supTA=="],
+
+    "@arizeai/phoenix-otel/@opentelemetry/core/@opentelemetry/semantic-conventions": ["@opentelemetry/semantic-conventions@1.28.0", "", {}, "sha512-lp4qAiMTD4sNWW4DbKLBkfiMZ4jbAboJIGOQr5DvciMRI494OapieI9qiODpOt0XBr1LjIDy1xAGAnVs5supTA=="],
+
     "@aws-crypto/sha256-browser/@smithy/util-utf8/@smithy/util-buffer-from": ["@smithy/util-buffer-from@2.2.0", "", { "dependencies": { "@smithy/is-array-buffer": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA=="],
 
     "@aws-crypto/util/@smithy/util-utf8/@smithy/util-buffer-from": ["@smithy/util-buffer-from@2.2.0", "", { "dependencies": { "@smithy/is-array-buffer": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA=="],
@@ -2085,6 +2203,14 @@
 
     "@google/genai/protobufjs/@protobufjs/utf8": ["@protobufjs/utf8@1.1.1", "", {}, "sha512-oOAWABowe8EAbMyWKM0tYDKi8Yaox52D+HWZhAIJqQXbqe0xI/GV7FhLWqlEKreMkfDjshR5FKgi3mnle0h6Eg=="],
 
+    "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-transformer/@opentelemetry/api-logs": ["@opentelemetry/api-logs@0.205.0", "", { "dependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-wBlPk1nFB37Hsm+3Qy73yQSobVn28F4isnWIBvKpd5IUH/eat8bwcL02H9yzmHyyPmukeccSl2mbN5sDQZYnPg=="],
+
+    "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-transformer/@opentelemetry/sdk-logs": ["@opentelemetry/sdk-logs@0.205.0", "", { "dependencies": { "@opentelemetry/api-logs": "0.205.0", "@opentelemetry/core": "2.1.0", "@opentelemetry/resources": "2.1.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.4.0 <1.10.0" } }, "sha512-nyqhNQ6eEzPWQU60Nc7+A5LIq8fz3UeIzdEVBQYefB4+msJZ2vuVtRuk9KxPMw1uHoHDtYEwkr2Ct0iG29jU8w=="],
+
+    "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-transformer/@opentelemetry/sdk-metrics": ["@opentelemetry/sdk-metrics@2.1.0", "", { "dependencies": { "@opentelemetry/core": "2.1.0", "@opentelemetry/resources": "2.1.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.9.0 <1.10.0" } }, "sha512-J9QX459mzqHLL9Y6FZ4wQPRZG4TOpMCyPOh6mkr/humxE1W2S3Bvf4i75yiMW9uyed2Kf5rxmLhTm/UK8vNkAw=="],
+
+    "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-transformer/protobufjs": ["protobufjs@7.5.6", "", { "dependencies": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", "@protobufjs/codegen": "^2.0.5", "@protobufjs/eventemitter": "^1.1.0", "@protobufjs/fetch": "^1.1.0", "@protobufjs/float": "^1.0.2", "@protobufjs/inquire": "^1.1.1", "@protobufjs/path": "^1.1.2", "@protobufjs/pool": "^1.1.0", "@protobufjs/utf8": "^1.1.1", "@types/node": ">=13.7.0", "long": "^5.0.0" } }, "sha512-M71sTMB146U3u0di3yup8iM+zv8yPRNQVr1KK4tyBitl3qFvEGucq/rGDRShD2rsJhtN02RJaJ7j5X5hmy8SJg=="],
+
     "@tanstack/router-plugin/chokidar/readdirp": ["readdirp@3.6.0", "", { "dependencies": { "picomatch": "^2.2.1" } }, "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA=="],
 
     "ansi-align/string-width/emoji-regex": ["emoji-regex@8.0.0", "", {}, "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="],
@@ -2251,6 +2377,12 @@
 
     "@aws-crypto/util/@smithy/util-utf8/@smithy/util-buffer-from/@smithy/is-array-buffer": ["@smithy/is-array-buffer@2.2.0", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-GGP3O9QFD24uGeAXYUjwSTXARoqpZykHadOmA8G5vfJPK0/DC67qa//0qvqrJzL1xc8WQWX7/yc7fwudjPHPhA=="],
 
+    "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-transformer/protobufjs/@protobufjs/codegen": ["@protobufjs/codegen@2.0.5", "", {}, "sha512-zgXFLzW3Ap33e6d0Wlj4MGIm6Ce8O89n/apUaGNB/jx+hw+ruWEp7EwGUshdLKVRCxZW12fp9r40E1mQrf/34g=="],
+
+    "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-transformer/protobufjs/@protobufjs/inquire": ["@protobufjs/inquire@1.1.1", "", {}, "sha512-mnzgDV26ueAvk7rsbt9L7bE0SuAoqyuys/sMMrmVcN5x9VsxpcG3rqAUSgDyLp0UZlmNfIbQ4fHfCtreVBk8Ew=="],
+
+    "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-transformer/protobufjs/@protobufjs/utf8": ["@protobufjs/utf8@1.1.1", "", {}, "sha512-oOAWABowe8EAbMyWKM0tYDKi8Yaox52D+HWZhAIJqQXbqe0xI/GV7FhLWqlEKreMkfDjshR5FKgi3mnle0h6Eg=="],
+
     "ansi-align/string-width/strip-ansi/ansi-regex": ["ansi-regex@5.0.1", "", {}, "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="],
   }
 }
diff --git a/package.json b/package.json
index 3b6a67fce..fd00c79ff 100644
--- a/package.json
+++ b/package.json
@@ -6,15 +6,15 @@
   "packageManager": "bun@1.3.3",
   "workspaces": ["apps/*", "packages/*"],
   "scripts": {
-    "build": "bun --filter @agentv/core build && bun --filter @agentv/eval build && bun --filter @agentv/dashboard build && bun --filter agentv build",
+    "build": "bun --filter @agentv/core build && bun --filter @agentv/eval build && bun --filter @agentv/phoenix-adapter build && bun --filter @agentv/dashboard build && bun --filter agentv build",
     "verify": "bun run build && bun run typecheck && bun run lint && bun run test",
-    "typecheck": "bun --filter @agentv/core typecheck && bun --filter agentv typecheck",
+    "typecheck": "bun --filter @agentv/core typecheck && bun --filter @agentv/phoenix-adapter typecheck && bun --filter agentv typecheck",
     "typecheck:workspace": "tsc -b tsconfig.build.json",
     "typecheck:watch": "bun --filter @agentv/core typecheck -- --watch & bun --filter agentv typecheck -- --watch",
     "lint": "biome check .",
     "format": "biome format --write .",
     "fix": "biome check --write .",
-    "test": "bun --filter @agentv/core test && bun --filter @agentv/eval test && bun --filter agentv test",
+    "test": "bun --filter @agentv/core test && bun --filter @agentv/eval test && bun --filter @agentv/phoenix-adapter test && bun --filter agentv test",
     "test:watch": "bun --filter @agentv/core test:watch & bun --filter agentv test:watch",
     "agentv": "bun apps/cli/src/cli.ts",
     "agentv:buildrun": "bun run build && bun apps/cli/dist/cli.js",
@@ -25,13 +25,16 @@
     "examples:install": "bun scripts/install-examples.ts",
     "publish": "bun run build && bun scripts/publish.ts",
     "publish:next": "bun run build && bun scripts/publish.ts next",
-    "prepare": "test -d .git && bunx prek install -t pre-push || true"
+    "prepare": "test -d .git && bunx prek install -t pre-push || true",
+    "phoenix:dry-run": "bun --filter @agentv/phoenix-adapter phoenix:dry-run",
+    "phoenix:assert-smoke": "bun --filter @agentv/phoenix-adapter phoenix:assert-smoke"
   },
   "devDependencies": {
-    "@biomejs/biome": "^1.9.4",
-    "@j178/prek": "^0.3.0",
     "@agentv/core": "workspace:*",
     "@agentv/eval": "workspace:*",
+    "@agentv/phoenix-adapter": "workspace:*",
+    "@biomejs/biome": "^1.9.4",
+    "@j178/prek": "^0.3.0",
     "@types/bun": "latest",
     "@types/node": "24.1.0",
     "async-mutex": "^0.5.0",
diff --git a/packages/phoenix-adapter/.gitignore b/packages/phoenix-adapter/.gitignore
new file mode 100644
index 000000000..a9a1bd38a
--- /dev/null
+++ b/packages/phoenix-adapter/.gitignore
@@ -0,0 +1 @@
+reports/
diff --git a/packages/phoenix-adapter/README.md b/packages/phoenix-adapter/README.md
new file mode 100644
index 000000000..528400be9
--- /dev/null
+++ b/packages/phoenix-adapter/README.md
@@ -0,0 +1,12 @@
+# @agentv/phoenix-adapter
+
+Converts AgentV eval YAML suites into Phoenix datasets and can run Phoenix experiments while keeping AgentV eval files as the source of truth.
+
+Current adapter support is intentionally small: deterministic `contains`, `regex`, `equals`, and `is-json` assertions run through a Phoenix CODE evaluator. LLM, code, trace, composite, metric, and custom evaluator families are reported as unsupported instead of being silently mapped.
+
+```bash
+bun --filter @agentv/phoenix-adapter phoenix:assert-smoke
+bun --filter @agentv/phoenix-adapter phoenix:dry-run
+```
+
+See `docs/support-matrix.md` for evaluator coverage and `docs/e2e-verification.md` for smoke-test notes.
diff --git a/packages/phoenix-adapter/docs/e2e-verification.md b/packages/phoenix-adapter/docs/e2e-verification.md
new file mode 100644
index 000000000..bf0cad46c
--- /dev/null
+++ b/packages/phoenix-adapter/docs/e2e-verification.md
@@ -0,0 +1,50 @@
+# E2E Verification
+
+## Dry-Run Conversion
+
+Dry-run mode discovers AgentV example evals, normalizes cases through `@agentv/core`, creates Phoenix dataset payloads in memory, and compares test IDs against AgentV baselines where present.
+
+```bash
+bun run phoenix:assert-smoke
+bun run phoenix:dry-run
+```
+
+Current filtered smoke result against `examples/features/assert/evals/dataset.eval.yaml`:
+
+- 1 suite discovered
+- 4 tests normalized
+- 1 suite passed structural parity
+- 0 failed suites
+
+Current full dry-run result against this AgentV checkout:
+
+- 97 suites discovered
+- 405 tests normalized
+- 93 suites passed structural parity
+- 4 suites failed baseline/loader parity
+
+The failing suites are currently source/baseline or source-reference mismatches, not Phoenix conversion crashes:
+
+- `examples/features/matrix-evaluation/evals/dataset.eval.yaml`: baseline has 5 rows, source has 3 tests.
+- `examples/features/prompt-template-sdk/evals/dataset.eval.yaml`: AgentV core skips 2 tests because `../prompts/custom-grader.ts` cannot be resolved from the eval source.
+- `examples/features/tool-trajectory-simple/evals/dataset.eval.yaml`: source has 11 tests, baseline has 7 rows.
+- `examples/features/weighted-graders/evals/dataset.eval.yaml`: baseline IDs use `evaluator` naming while source IDs use `grader` naming.
+
+## Live Phoenix Smoke
+
+Live mode creates or updates a Phoenix dataset and records a Phoenix experiment. It currently uses the deterministic adapter path, so the best smoke target is `examples/features/assert/evals/dataset.eval.yaml`.
+
+```bash
+(cd packages/phoenix-adapter && bun src/cli.ts run \
+  --agentv-root ../.. \
+  --filter examples/features/assert/evals/dataset.eval.yaml \
+  --out reports/live-assert-final.json \
+  --namespace agentv-phoenix-e2e-final)
+```
+
+The source harness was verified locally against Phoenix at `http://localhost:6006`:
+
+- 4 Phoenix task runs
+- 4 Phoenix evaluator runs
+- average evaluator score: 1.0
+- experiment ID: `RXhwZXJpbWVudDo2`
diff --git a/packages/phoenix-adapter/docs/support-matrix.md b/packages/phoenix-adapter/docs/support-matrix.md
new file mode 100644
index 000000000..6726bbf0c
--- /dev/null
+++ b/packages/phoenix-adapter/docs/support-matrix.md
@@ -0,0 +1,23 @@
+# Phoenix Adapter Support Matrix
+
+This workspace converts AgentV example evals into Phoenix dataset and experiment payloads.
+
+| AgentV family | Phoenix status |
+| --- | --- |
+| `contains` | Supported by deterministic adapter |
+| `regex` | Supported by deterministic adapter |
+| `equals` | Supported by deterministic adapter |
+| `is-json` | Supported by deterministic adapter |
+| `llm-grader` | Reported as unsupported in first pass |
+| `rubrics` | Reported as unsupported in first pass |
+| `code-grader` | Reported as unsupported in first pass |
+| `composite` | Reported as unsupported in first pass |
+| `field-accuracy` | Reported as unsupported in first pass |
+| `execution-metrics` | Reported as unsupported in first pass |
+| `tool-trajectory` | Reported as unsupported in first pass |
+| `cost` | Reported as unsupported in first pass |
+| `latency` | Reported as unsupported in first pass |
+| `trial-output-consistency` | Reported as unsupported in first pass |
+| Other custom families | Reported as unsupported with the family name |
+
+Unsupported does not block conversion unless `--fail-on-unsupported` is set. The report keeps unsupported families visible so parity gaps are explicit.
diff --git a/packages/phoenix-adapter/package.json b/packages/phoenix-adapter/package.json
new file mode 100644
index 000000000..d51f1a0ee
--- /dev/null
+++ b/packages/phoenix-adapter/package.json
@@ -0,0 +1,32 @@
+{
+  "name": "@agentv/phoenix-adapter",
+  "version": "4.31.4-next.1",
+  "description": "Phoenix execution and observability adapter for AgentV eval YAML suites",
+  "private": true,
+  "type": "module",
+  "main": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "import": "./dist/index.js"
+    }
+  },
+  "scripts": {
+    "build": "(cd ../core && bun run build) && tsup",
+    "typecheck": "(cd ../core && bun run build) && tsc --noEmit",
+    "test": "(cd ../core && bun run build) && bun test",
+    "phoenix:dry-run": "bun src/cli.ts run --dry-run --agentv-root ../.. --out reports/dry-run.json",
+    "phoenix:assert-smoke": "bun src/cli.ts run --dry-run --agentv-root ../.. --filter examples/features/assert/evals/dataset.eval.yaml --out reports/assert-smoke.json"
+  },
+  "files": ["dist", "README.md", "docs"],
+  "dependencies": {
+    "@agentv/core": "workspace:*",
+    "@arizeai/phoenix-client": "6.10.0",
+    "@arizeai/phoenix-evals": "1.0.3"
+  },
+  "devDependencies": {
+    "tsup": "8.3.5",
+    "typescript": "5.8.3"
+  }
+}
diff --git a/packages/phoenix-adapter/src/agentv/discovery.ts b/packages/phoenix-adapter/src/agentv/discovery.ts
new file mode 100644
index 000000000..1fc102d6e
--- /dev/null
+++ b/packages/phoenix-adapter/src/agentv/discovery.ts
@@ -0,0 +1,39 @@
+import { readdir } from 'node:fs/promises';
+import path from 'node:path';
+import { relativePosix } from './path.js';
+import type { AgentVSource } from './types.js';
+
+const EVAL_FILE_RE = /\.(?:eval|EVAL)\.ya?ml$/;
+
+async function walk(dir: string, results: string[] = []): Promise<string[]> {
+  const entries = await readdir(dir, { withFileTypes: true });
+  for (const entry of entries) {
+    if (entry.name === 'node_modules' || entry.name === '.git') continue;
+    const fullPath = path.join(dir, entry.name);
+    if (entry.isDirectory()) {
+      await walk(fullPath, results);
+      continue;
+    }
+    if (entry.isFile()) results.push(fullPath);
+  }
+  return results;
+}
+
+export async function discoverAgentVEvals(agentvRoot: string): Promise<AgentVSource[]> {
+  const examplesRoot = path.join(agentvRoot, 'examples');
+  const files = await walk(examplesRoot);
+
+  return files
+    .filter(
+      (file) => EVAL_FILE_RE.test(path.basename(file)) || path.basename(file) === 'evals.json',
+    )
+    .map((file): AgentVSource => {
+      const relativePath = relativePosix(agentvRoot, file);
+      return {
+        path: file,
+        relativePath,
+        kind: path.basename(file) === 'evals.json' ? 'agent-skills-json' : 'eval-yaml',
+      };
+    })
+    .sort((a, b) => a.relativePath.localeCompare(b.relativePath));
+}
diff --git a/packages/phoenix-adapter/src/agentv/load-spec.ts b/packages/phoenix-adapter/src/agentv/load-spec.ts
new file mode 100644
index 000000000..67905ca30
--- /dev/null
+++ b/packages/phoenix-adapter/src/agentv/load-spec.ts
@@ -0,0 +1,119 @@
+import { existsSync, readFileSync } from 'node:fs';
+import path from 'node:path';
+import { loadTestSuite } from '@agentv/core';
+import YAML from 'yaml';
+import type {
+  AgentVMessage,
+  AgentVSource,
+  JsonObject,
+  NormalizedAssertion,
+  NormalizedCase,
+  NormalizedSuite,
+} from './types.js';
+
+function parseStructuredFile(filePath: string): unknown {
+  const content = readFileSync(filePath, 'utf8');
+  if (filePath.endsWith('.json')) return JSON.parse(content);
+  if (filePath.endsWith('.jsonl')) {
+    return content
+      .split('\n')
+      .map((line) => line.trim())
+      .filter(Boolean)
+      .map((line) => JSON.parse(line));
+  }
+  return YAML.parse(content);
+}
+
+function normalizeAssertion(assertion: unknown, index: number): NormalizedAssertion {
+  if (typeof assertion === 'string') {
+    return { type: 'rubrics', source: assertion };
+  }
+  const record = (assertion ?? {}) as JsonObject;
+  const type = String(record.type ?? record.name ?? `assertion-${index + 1}`);
+  return {
+    name: typeof record.name === 'string' ? record.name : undefined,
+    type,
+    source: assertion,
+  };
+}
+
+function normalizeExpectedOutput(test: {
+  readonly reference_answer?: string;
+  readonly expected_output?: unknown;
+}): unknown {
+  if (test.reference_answer !== undefined) return test.reference_answer;
+  return test.expected_output;
+}
+
+function deriveAgentVRoot(source: AgentVSource): string {
+  return path.resolve(source.path, ...source.relativePath.split('/').map(() => '..'));
+}
+
+function collectUnsupported(
+  raw: JsonObject,
+  suite: Awaited<ReturnType<typeof loadTestSuite>>,
+): readonly string[] {
+  const unsupported: string[] = [];
+  for (const key of ['workspace', 'before_all', 'after_all', 'matrix']) {
+    if (raw[key] !== undefined) unsupported.push(key);
+  }
+  if (suite.trials !== undefined) unsupported.push('trials');
+  if (suite.workspacePath !== undefined) unsupported.push('workspace');
+  if ((suite.targets?.length ?? 0) > 0 || (suite.targetRefs?.length ?? 0) > 0)
+    unsupported.push('matrix');
+  return [...new Set(unsupported)];
+}
+
+/**
+ * Load an AgentV-authored eval source into the Phoenix adapter's normalized shape.
+ *
+ * AgentV eval YAML remains the source of truth: this adapter delegates case expansion,
+ * external case files, assertion parsing, Agent Skills `evals.json`, interpolation, and
+ * metadata handling to `@agentv/core`'s loader, then projects the result into Phoenix
+ * dataset examples. Add Phoenix-specific behavior after this boundary rather than
+ * duplicating AgentV YAML semantics in the adapter.
+ */
+export async function loadAgentVEvalSuite(source: AgentVSource): Promise<NormalizedSuite> {
+  if (!existsSync(source.path)) {
+    throw new Error(`AgentV eval source does not exist: ${source.path}`);
+  }
+
+  const raw = (parseStructuredFile(source.path) ?? {}) as JsonObject;
+  const loaded = await loadTestSuite(source.path, deriveAgentVRoot(source));
+  const suiteName =
+    raw.skill_name ??
+    loaded.tests[0]?.suite ??
+    raw.name ??
+    path.basename(source.path).replace(/\.ya?ml$/, '');
+
+  const cases = loaded.tests.map((test, index): NormalizedCase => {
+    const assertions = (test.assertions ?? []).map((assertion, assertionIndex) =>
+      normalizeAssertion(assertion, assertionIndex),
+    );
+
+    return {
+      id: String(test.id ?? `case-${index + 1}`),
+      criteria: test.criteria || undefined,
+      input: test.input as readonly AgentVMessage[],
+      expectedOutput: normalizeExpectedOutput(test),
+      assertions,
+      metadata: {
+        ...(test.metadata ?? {}),
+        ...(test.targets ? { targets: test.targets } : {}),
+      },
+      sourcePath: source.relativePath,
+    };
+  });
+
+  return {
+    name: String(suiteName),
+    description: typeof raw.description === 'string' ? raw.description : undefined,
+    source,
+    cases,
+    suiteAssertions: [],
+    warnings: cases
+      .filter((testCase) => testCase.input.length === 0)
+      .map((testCase) => `${source.relativePath}: ${testCase.id} has no input`),
+    unsupportedFeatures: collectUnsupported(raw, loaded),
+  };
+}
diff --git a/packages/phoenix-adapter/src/agentv/path.ts b/packages/phoenix-adapter/src/agentv/path.ts
new file mode 100644
index 000000000..5d56b0a26
--- /dev/null
+++ b/packages/phoenix-adapter/src/agentv/path.ts
@@ -0,0 +1,22 @@
+import { existsSync } from 'node:fs';
+import path from 'node:path';
+
+export function resolveAgentVRoot(input?: string): string {
+  const configured = input ?? process.env.AGENTV_ROOT ?? defaultAgentVRoot();
+  return path.resolve(configured);
+}
+
+function defaultAgentVRoot(): string {
+  for (const candidate of ['../agentv', '../../agentv']) {
+    if (existsSync(path.resolve(candidate, 'examples'))) return candidate;
+  }
+  return '../agentv';
+}
+
+export function toPosixPath(value: string): string {
+  return value.split(path.sep).join('/');
+}
+
+export function relativePosix(from: string, to: string): string {
+  return toPosixPath(path.relative(from, to));
+}
diff --git a/packages/phoenix-adapter/src/agentv/types.ts b/packages/phoenix-adapter/src/agentv/types.ts
new file mode 100644
index 000000000..317704f0c
--- /dev/null
+++ b/packages/phoenix-adapter/src/agentv/types.ts
@@ -0,0 +1,40 @@
+export type JsonObject = Record<string, unknown>;
+
+export type AgentVSourceKind = 'eval-yaml' | 'agent-skills-json';
+
+export interface AgentVSource {
+  readonly path: string;
+  readonly relativePath: string;
+  readonly kind: AgentVSourceKind;
+}
+
+export interface AgentVMessage {
+  readonly role: string;
+  readonly content: unknown;
+}
+
+export interface NormalizedAssertion {
+  readonly name?: string;
+  readonly type: string;
+  readonly source: unknown;
+}
+
+export interface NormalizedCase {
+  readonly id: string;
+  readonly criteria?: string;
+  readonly input: readonly AgentVMessage[];
+  readonly expectedOutput?: unknown;
+  readonly assertions: readonly NormalizedAssertion[];
+  readonly metadata: JsonObject;
+  readonly sourcePath: string;
+}
+
+export interface NormalizedSuite {
+  readonly name: string;
+  readonly description?: string;
+  readonly source: AgentVSource;
+  readonly cases: readonly NormalizedCase[];
+  readonly suiteAssertions: readonly NormalizedAssertion[];
+  readonly warnings: readonly string[];
+  readonly unsupportedFeatures: readonly string[];
+}
diff --git a/packages/phoenix-adapter/src/cli.ts b/packages/phoenix-adapter/src/cli.ts
new file mode 100644
index 000000000..bce0411fc
--- /dev/null
+++ b/packages/phoenix-adapter/src/cli.ts
@@ -0,0 +1,67 @@
+#!/usr/bin/env bun
+import path from 'node:path';
+import { resolveAgentVRoot } from './agentv/path.js';
+import { formatMarkdownReport } from './parity/report.js';
+import type { RunOptions } from './run/options.js';
+import { runSuite } from './run/run-suite.js';
+
+function usage(): string {
+  return `Usage:
+  bun src/cli.ts run --dry-run [--agentv-root ../agentv] [--filter features/assert] [--eval-file path] [--out reports/dry-run.json]
+
+Options:
+  --agentv-root <path>       Source AgentV checkout. Defaults to AGENTV_ROOT or ../agentv.
+  --eval-file <path>         Run one eval source.
+  --filter <text>            Run sources whose repo-relative path contains text.
+  --dry-run                  Convert and verify without contacting Phoenix.
+  --out <path>               JSON report path. Defaults to reports/phoenix-report.json.
+  --namespace <name>         Phoenix dataset name prefix.
+  --fail-on-unsupported      Treat unsupported features as failures.
+`;
+}
+
+function parseArgs(argv: readonly string[]): RunOptions | undefined {
+  if (argv.length === 0 || argv.includes('--help') || argv.includes('-h')) {
+    console.log(usage());
+    return undefined;
+  }
+
+  const [command, ...rest] = argv;
+  if (command !== 'run') {
+    throw new Error(`Unknown command: ${command}\n\n${usage()}`);
+  }
+
+  const values = new Map<string, string | boolean>();
+  for (let index = 0; index < rest.length; index += 1) {
+    const arg = rest[index];
+    if (!arg.startsWith('--')) continue;
+    if (arg === '--dry-run' || arg === '--fail-on-unsupported') {
+      values.set(arg, true);
+      continue;
+    }
+    const value = rest[index + 1];
+    if (!value || value.startsWith('--')) throw new Error(`Missing value for ${arg}`);
+    values.set(arg, value);
+    index += 1;
+  }
+
+  const agentvRoot = resolveAgentVRoot(values.get('--agentv-root') as string | undefined);
+  const evalFile = values.get('--eval-file') as string | undefined;
+
+  return {
+    agentvRoot,
+    evalFile: evalFile ? path.resolve(evalFile) : undefined,
+    filter: values.get('--filter') as string | undefined,
+    dryRun: values.get('--dry-run') === true,
+    out: path.resolve((values.get('--out') as string | undefined) ?? 'reports/phoenix-report.json'),
+    namespace: values.get('--namespace') as string | undefined,
+    failOnUnsupported: values.get('--fail-on-unsupported') === true,
+  };
+}
+
+const options = parseArgs(Bun.argv.slice(2));
+if (options) {
+  const report = await runSuite(options);
+  console.log(formatMarkdownReport(report));
+  if (report.failedSuites > 0) process.exit(1);
+}
diff --git a/packages/phoenix-adapter/src/evaluators/deterministic.ts b/packages/phoenix-adapter/src/evaluators/deterministic.ts
new file mode 100644
index 000000000..593d03c74
--- /dev/null
+++ b/packages/phoenix-adapter/src/evaluators/deterministic.ts
@@ -0,0 +1,178 @@
+import type {
+  DeterministicEvaluatorType,
+  EvaluationContext,
+  EvaluatorResult,
+  NormalizedAssertionConfig,
+} from './types.js';
+
+export function evaluateDeterministicAssertion(
+  assertion: NormalizedAssertionConfig,
+  context: EvaluationContext,
+): EvaluatorResult {
+  const type = assertion.type as DeterministicEvaluatorType;
+
+  switch (type) {
+    case 'contains':
+      return evaluateContains(assertion, context);
+    case 'regex':
+      return evaluateRegex(assertion, context);
+    case 'equals':
+      return evaluateEquals(assertion, context);
+    case 'is-json':
+      return evaluateIsJson(assertion, context);
+    default:
+      return result(
+        assertion,
+        false,
+        `Unsupported deterministic evaluator: ${String(assertion.type)}`,
+      );
+  }
+}
+
+function evaluateContains(
+  assertion: NormalizedAssertionConfig,
+  context: EvaluationContext,
+): EvaluatorResult {
+  const needle = assertionValue(assertion);
+
+  if (needle === undefined || needle === null) {
+    return result(assertion, false, 'contains assertion is missing a value');
+  }
+
+  const haystack = stringifyOutput(context.output);
+  const expected = String(needle);
+  const caseSensitive = assertion.caseSensitive !== false;
+  const passed = caseSensitive
+    ? haystack.includes(expected)
+    : haystack.toLocaleLowerCase().includes(expected.toLocaleLowerCase());
+
+  return result(
+    assertion,
+    passed,
+    passed ? `Output contains ${expected}` : `Output does not contain ${expected}`,
+  );
+}
+
+function evaluateRegex(
+  assertion: NormalizedAssertionConfig,
+  context: EvaluationContext,
+): EvaluatorResult {
+  const pattern = assertion.pattern ?? stringAssertionValue(assertion);
+
+  if (!pattern) {
+    return result(assertion, false, 'regex assertion is missing a pattern');
+  }
+
+  try {
+    const regex = new RegExp(pattern, assertion.flags);
+    const passed = regex.test(stringifyOutput(context.output));
+
+    return result(
+      assertion,
+      passed,
+      passed ? `Output matches /${pattern}/` : `Output does not match /${pattern}/`,
+    );
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+
+    return result(assertion, false, `Invalid regex pattern: ${message}`);
+  }
+}
+
+function evaluateEquals(
+  assertion: NormalizedAssertionConfig,
+  context: EvaluationContext,
+): EvaluatorResult {
+  const expected = assertionValue(assertion) ?? context.expectedOutput;
+  const passed = stableValue(context.output) === stableValue(expected);
+
+  return result(
+    assertion,
+    passed,
+    passed ? 'Output equals expected value' : 'Output does not equal expected value',
+  );
+}
+
+function evaluateIsJson(
+  assertion: NormalizedAssertionConfig,
+  context: EvaluationContext,
+): EvaluatorResult {
+  const parsed = parseJsonLike(context.output);
+  const passed = parsed.ok;
+
+  return result(assertion, passed, passed ? 'Output is valid JSON' : parsed.reason);
+}
+
+function assertionValue(assertion: NormalizedAssertionConfig): unknown {
+  if ('value' in assertion) return assertion.value;
+  if ('expected' in assertion) return assertion.expected;
+  if ('text' in assertion) return assertion.text;
+  if ('substring' in assertion) return assertion.substring;
+
+  return undefined;
+}
+
+function stringAssertionValue(assertion: NormalizedAssertionConfig): string | undefined {
+  const value = assertionValue(assertion);
+
+  return typeof value === 'string' ? value : undefined;
+}
+
+function stringifyOutput(output: unknown): string {
+  if (typeof output === 'string') return output;
+  if (output === undefined || output === null) return '';
+
+  return JSON.stringify(output);
+}
+
+function stableValue(value: unknown): string {
+  return JSON.stringify(sortJsonValue(value));
+}
+
+function sortJsonValue(value: unknown): unknown {
+  if (Array.isArray(value)) return value.map(sortJsonValue);
+
+  if (value && typeof value === 'object') {
+    return Object.fromEntries(
+      Object.entries(value as Record<string, unknown>)
+        .sort(([left], [right]) => left.localeCompare(right))
+        .map(([key, entryValue]) => [key, sortJsonValue(entryValue)]),
+    );
+  }
+
+  return value;
+}
+
+function parseJsonLike(value: unknown): { ok: true } | { ok: false; reason: string } {
+  if (value && typeof value === 'object') return { ok: true };
+
+  if (typeof value !== 'string') {
+    return { ok: false, reason: 'Output is not a JSON string or object' };
+  }
+
+  try {
+    JSON.parse(value);
+
+    return { ok: true };
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+
+    return { ok: false, reason: `Output is not valid JSON: ${message}` };
+  }
+}
+
+function result(
+  assertion: NormalizedAssertionConfig,
+  passed: boolean,
+  explanation: string,
+): EvaluatorResult {
+  return {
+    name: assertion.name ?? String(assertion.type),
+    type: assertion.type,
+    score: passed ? 1 : 0,
+    passed,
+    label: passed ? 'pass' : 'fail',
+    explanation,
+    metadata: assertion.metadata,
+  };
+}
diff --git a/packages/phoenix-adapter/src/evaluators/registry.ts b/packages/phoenix-adapter/src/evaluators/registry.ts
new file mode 100644
index 000000000..c7623f33d
--- /dev/null
+++ b/packages/phoenix-adapter/src/evaluators/registry.ts
@@ -0,0 +1,134 @@
+import { evaluateDeterministicAssertion } from './deterministic.js';
+import type {
+  DeterministicEvaluatorType,
+  EvaluationContext,
+  EvaluatorAdapter,
+  EvaluatorResult,
+  EvaluatorType,
+  NormalizedAssertionConfig,
+  UnsupportedEvaluatorReport,
+  UnsupportedEvaluatorType,
+} from './types.js';
+
+export const deterministicEvaluatorTypes = [
+  'contains',
+  'regex',
+  'equals',
+  'is-json',
+] as const satisfies readonly DeterministicEvaluatorType[];
+
+export const unsupportedEvaluatorTypes = [
+  'llm-grader',
+  'rubrics',
+  'code-grader',
+  'composite',
+  'field-accuracy',
+  'execution-metrics',
+  'tool-trajectory',
+  'cost',
+  'latency',
+  'trial-output-consistency',
+] as const satisfies readonly UnsupportedEvaluatorType[];
+
+const unsupportedReasons: Record<UnsupportedEvaluatorType, string> = {
+  'llm-grader': 'Model-backed Phoenix judging is not implemented in this first-pass adapter.',
+  rubrics:
+    'Rubric scoring requires a model-backed or rubric-specific adapter that is not implemented yet.',
+  'code-grader':
+    'Code grader execution is deferred until source-relative sandboxing is implemented.',
+  composite:
+    'Composite evaluator aggregation is deferred until nested evaluator normalization is available.',
+  'field-accuracy':
+    'Field-level accuracy scoring is deferred until expected output field mapping is implemented.',
+  'execution-metrics':
+    'Execution metric scoring needs run or trace metric data that is not wired yet.',
+  'tool-trajectory': 'Tool trajectory scoring needs trace data that is not wired yet.',
+  cost: 'Cost scoring needs Phoenix or provider usage metrics that are not wired yet.',
+  latency: 'Latency scoring needs Phoenix or runner timing metrics that are not wired yet.',
+  'trial-output-consistency':
+    'Trial consistency scoring needs multiple trial outputs that are not wired yet.',
+};
+
+export function createEvaluatorAdapter(assertion: NormalizedAssertionConfig): EvaluatorAdapter {
+  const type = assertion.type;
+  const name = assertion.name ?? String(type);
+
+  if (isDeterministicEvaluatorType(type)) {
+    return {
+      type,
+      name,
+      supported: true,
+      evaluate: (context) => evaluateDeterministicAssertion(assertion, context),
+    };
+  }
+
+  return {
+    type,
+    name,
+    supported: false,
+    evaluate: () => unsupportedResult(assertion),
+  };
+}
+
+export function createEvaluatorRegistry(
+  assertions: readonly NormalizedAssertionConfig[],
+): EvaluatorAdapter[] {
+  return assertions.map(createEvaluatorAdapter);
+}
+
+export function evaluateAssertion(
+  assertion: NormalizedAssertionConfig,
+  context: EvaluationContext,
+): EvaluatorResult {
+  return createEvaluatorAdapter(assertion).evaluate(context);
+}
+
+export function unsupportedEvaluatorReports(
+  assertions: readonly NormalizedAssertionConfig[],
+): UnsupportedEvaluatorReport[] {
+  return assertions.filter(isUnsupportedAssertion).map((assertion) => ({
+    name: assertion.name ?? String(assertion.type),
+    type: assertion.type,
+    reason: unsupportedReason(assertion.type),
+    metadata: assertion.metadata,
+  }));
+}
+
+export function isSupportedEvaluatorType(type: EvaluatorType): boolean {
+  return isDeterministicEvaluatorType(type);
+}
+
+export function isDeterministicEvaluatorType(
+  type: EvaluatorType,
+): type is DeterministicEvaluatorType {
+  return (deterministicEvaluatorTypes as readonly string[]).includes(String(type));
+}
+
+export function isKnownUnsupportedEvaluatorType(
+  type: EvaluatorType,
+): type is UnsupportedEvaluatorType {
+  return (unsupportedEvaluatorTypes as readonly string[]).includes(String(type));
+}
+
+function isUnsupportedAssertion(assertion: NormalizedAssertionConfig): boolean {
+  return !isSupportedEvaluatorType(assertion.type);
+}
+
+function unsupportedResult(assertion: NormalizedAssertionConfig): EvaluatorResult {
+  return {
+    name: assertion.name ?? String(assertion.type),
+    type: assertion.type,
+    score: 0,
+    passed: false,
+    label: 'unsupported',
+    explanation: unsupportedReason(assertion.type),
+    unsupported: true,
+    metadata: assertion.metadata,
+  };
+}
+
+function unsupportedReason(type: EvaluatorType): string {
+  if (isKnownUnsupportedEvaluatorType(type)) return unsupportedReasons[type];
+
+  return `Unknown evaluator family: ${String(type)}`;
+}
diff --git a/packages/phoenix-adapter/src/evaluators/types.ts b/packages/phoenix-adapter/src/evaluators/types.ts
new file mode 100644
index 000000000..e0658c6c9
--- /dev/null
+++ b/packages/phoenix-adapter/src/evaluators/types.ts
@@ -0,0 +1,59 @@
+export type DeterministicEvaluatorType = 'contains' | 'regex' | 'equals' | 'is-json';
+
+export type UnsupportedEvaluatorType =
+  | 'llm-grader'
+  | 'rubrics'
+  | 'code-grader'
+  | 'composite'
+  | 'field-accuracy'
+  | 'execution-metrics'
+  | 'tool-trajectory'
+  | 'cost'
+  | 'latency'
+  | 'trial-output-consistency';
+
+export type EvaluatorType = DeterministicEvaluatorType | UnsupportedEvaluatorType | string;
+
+export interface NormalizedAssertionConfig {
+  type: EvaluatorType;
+  name?: string;
+  value?: unknown;
+  expected?: unknown;
+  pattern?: string;
+  flags?: string;
+  caseSensitive?: boolean;
+  metadata?: Record<string, unknown>;
+  [key: string]: unknown;
+}
+
+export interface EvaluationContext {
+  output: unknown;
+  expectedOutput?: unknown;
+  input?: unknown;
+  metadata?: Record<string, unknown>;
+}
+
+export interface EvaluatorResult {
+  name: string;
+  type: EvaluatorType;
+  score: number;
+  passed: boolean;
+  label: 'pass' | 'fail' | 'unsupported';
+  explanation: string;
+  unsupported?: boolean;
+  metadata?: Record<string, unknown>;
+}
+
+export interface EvaluatorAdapter {
+  type: EvaluatorType;
+  name: string;
+  supported: boolean;
+  evaluate(context: EvaluationContext): EvaluatorResult;
+}
+
+export interface UnsupportedEvaluatorReport {
+  name: string;
+  type: EvaluatorType;
+  reason: string;
+  metadata?: Record<string, unknown>;
+}
diff --git a/packages/phoenix-adapter/src/index.ts b/packages/phoenix-adapter/src/index.ts
new file mode 100644
index 000000000..ef018a94b
--- /dev/null
+++ b/packages/phoenix-adapter/src/index.ts
@@ -0,0 +1,12 @@
+export { discoverAgentVEvals } from './agentv/discovery.js';
+export { loadAgentVEvalSuite } from './agentv/load-spec.js';
+export { createPhoenixDatasetPayload } from './phoenix/datasets.js';
+export { runSuite } from './run/run-suite.js';
+
+export type {
+  AgentVSource,
+  NormalizedAssertion,
+  NormalizedCase,
+  NormalizedSuite,
+} from './agentv/types.js';
+export type { PhoenixDatasetPayload } from './phoenix/types.js';
diff --git a/packages/phoenix-adapter/src/parity/baselines.ts b/packages/phoenix-adapter/src/parity/baselines.ts
new file mode 100644
index 000000000..5a3acfdc3
--- /dev/null
+++ b/packages/phoenix-adapter/src/parity/baselines.ts
@@ -0,0 +1,27 @@
+import { existsSync, readFileSync } from 'node:fs';
+import path from 'node:path';
+
+export interface BaselineSummary {
+  readonly path: string;
+  readonly testIds: readonly string[];
+}
+
+export function baselinePathFor(evalSourcePath: string): string {
+  return evalSourcePath.replace(/\.ya?ml$/i, '.baseline.jsonl');
+}
+
+export function readBaselineSummary(evalSourcePath: string): BaselineSummary | undefined {
+  if (!/\.ya?ml$/i.test(evalSourcePath)) return undefined;
+  const baselinePath = baselinePathFor(evalSourcePath);
+  if (!existsSync(baselinePath)) return undefined;
+
+  const lines = readFileSync(baselinePath, 'utf8')
+    .split('\n')
+    .map((line) => line.trim())
+    .filter(Boolean);
+
+  return {
+    path: path.basename(baselinePath),
+    testIds: lines.map((line) => String(JSON.parse(line).test_id ?? JSON.parse(line).testId ?? '')),
+  };
+}
diff --git a/packages/phoenix-adapter/src/parity/compare.ts b/packages/phoenix-adapter/src/parity/compare.ts
new file mode 100644
index 000000000..bc30805b2
--- /dev/null
+++ b/packages/phoenix-adapter/src/parity/compare.ts
@@ -0,0 +1,74 @@
+import type { NormalizedSuite } from '../agentv/types.js';
+import { unsupportedEvaluatorReports } from '../evaluators/registry.js';
+import type { NormalizedAssertionConfig } from '../evaluators/types.js';
+import type { PhoenixDatasetPayload } from '../phoenix/types.js';
+import { readBaselineSummary } from './baselines.js';
+import type { SuiteRunSummary } from './types.js';
+
+export function compareDryRunSuite(
+  suite: NormalizedSuite,
+  dataset: PhoenixDatasetPayload,
+): SuiteRunSummary {
+  const failures: string[] = [];
+  const baseline = readBaselineSummary(suite.source.path);
+  const caseIds = new Set(suite.cases.map((testCase) => testCase.id));
+  const unsupportedFeatures = [
+    ...suite.unsupportedFeatures,
+    ...unsupportedEvaluatorReports(
+      suite.cases.flatMap((testCase) => testCase.assertions.map(toAssertionConfig)),
+    ).map((report) => `${report.type}: ${report.name}`),
+  ];
+
+  if (dataset.examples.length !== suite.cases.length) {
+    failures.push(
+      `Dataset example count ${dataset.examples.length} does not match case count ${suite.cases.length}`,
+    );
+  }
+
+  if (baseline) {
+    const baselineIds = new Set(baseline.testIds);
+    for (const id of baselineIds) {
+      if (!caseIds.has(id))
+        failures.push(`Baseline test id is missing from converted suite: ${id}`);
+    }
+    for (const id of caseIds) {
+      if (!baselineIds.has(id)) failures.push(`Converted test id is missing from baseline: ${id}`);
+    }
+  }
+
+  if (suite.cases.length === 0) failures.push('Suite contains no normalized cases');
+
+  return {
+    source: suite.source.relativePath,
+    datasetName: dataset.name,
+    testCount: suite.cases.length,
+    baselineCount: baseline?.testIds.length,
+    warningCount: suite.warnings.length,
+    unsupportedFeatures: [...new Set(unsupportedFeatures)].sort(),
+    status: failures.length === 0 ? 'passed' : 'failed',
+    failures,
+  };
+}
+
+function toAssertionConfig(assertion: {
+  readonly type: string;
+  readonly name?: string;
+  readonly source: unknown;
+}): NormalizedAssertionConfig {
+  if (
+    assertion.source &&
+    typeof assertion.source === 'object' &&
+    !Array.isArray(assertion.source)
+  ) {
+    return {
+      ...(assertion.source as Record<string, unknown>),
+      type: assertion.type,
+      name: assertion.name,
+    };
+  }
+  return {
+    type: assertion.type,
+    name: assertion.name,
+    value: assertion.source,
+  };
+}
diff --git a/packages/phoenix-adapter/src/parity/report.ts b/packages/phoenix-adapter/src/parity/report.ts
new file mode 100644
index 000000000..c03171f1a
--- /dev/null
+++ b/packages/phoenix-adapter/src/parity/report.ts
@@ -0,0 +1,60 @@
+import { mkdir, writeFile } from 'node:fs/promises';
+import path from 'node:path';
+import type { RunReport, SuiteRunSummary } from './types.js';
+
+export function buildRunReport(input: {
+  readonly dryRun: boolean;
+  readonly agentvRoot: string;
+  readonly suites: readonly SuiteRunSummary[];
+}): RunReport {
+  const unsupported = new Set<string>();
+  for (const suite of input.suites) {
+    for (const feature of suite.unsupportedFeatures) unsupported.add(feature);
+  }
+
+  return {
+    generatedAt: new Date().toISOString(),
+    dryRun: input.dryRun,
+    agentvRoot: input.agentvRoot,
+    suiteCount: input.suites.length,
+    testCount: input.suites.reduce((sum, suite) => sum + suite.testCount, 0),
+    passedSuites: input.suites.filter((suite) => suite.status === 'passed').length,
+    failedSuites: input.suites.filter((suite) => suite.status === 'failed').length,
+    unsupportedFeatures: [...unsupported].sort(),
+    suites: input.suites,
+  };
+}
+
+export async function writeJsonReport(report: RunReport, outPath: string): Promise<void> {
+  await mkdir(path.dirname(outPath), { recursive: true });
+  await writeFile(outPath, `${JSON.stringify(report, null, 2)}\n`, 'utf8');
+}
+
+export function formatMarkdownReport(report: RunReport): string {
+  const lines = [
+    '# Phoenix AgentV Eval Report',
+    '',
+    `Generated: ${report.generatedAt}`,
+    `Dry run: ${String(report.dryRun)}`,
+    `Suites: ${report.suiteCount}`,
+    `Tests: ${report.testCount}`,
+    `Passed suites: ${report.passedSuites}`,
+    `Failed suites: ${report.failedSuites}`,
+    '',
+    '| Status | Source | Tests | Baseline | Unsupported |',
+    '| --- | --- | ---: | ---: | --- |',
+  ];
+
+  for (const suite of report.suites) {
+    lines.push(
+      `| ${suite.status} | \`${suite.source}\` | ${suite.testCount} | ${suite.baselineCount ?? ''} | ${suite.unsupportedFeatures.join(', ')} |`,
+    );
+    if (suite.phoenixExperimentId) {
+      lines.push(
+        `|  | Phoenix experiment \`${suite.phoenixExperimentId}\` | ${suite.phoenixRunCount ?? ''} | ${suite.phoenixEvaluationRunCount ?? ''} |  |`,
+      );
+    }
+  }
+
+  return `${lines.join('\n')}\n`;
+}
diff --git a/packages/phoenix-adapter/src/parity/types.ts b/packages/phoenix-adapter/src/parity/types.ts
new file mode 100644
index 000000000..27f4365f5
--- /dev/null
+++ b/packages/phoenix-adapter/src/parity/types.ts
@@ -0,0 +1,25 @@
+export interface SuiteRunSummary {
+  readonly source: string;
+  readonly datasetName: string;
+  readonly testCount: number;
+  readonly baselineCount?: number;
+  readonly warningCount: number;
+  readonly unsupportedFeatures: readonly string[];
+  readonly phoenixExperimentId?: string;
+  readonly phoenixRunCount?: number;
+  readonly phoenixEvaluationRunCount?: number;
+  readonly status: 'passed' | 'failed';
+  readonly failures: readonly string[];
+}
+
+export interface RunReport {
+  readonly generatedAt: string;
+  readonly dryRun: boolean;
+  readonly agentvRoot: string;
+  readonly suiteCount: number;
+  readonly testCount: number;
+  readonly passedSuites: number;
+  readonly failedSuites: number;
+  readonly unsupportedFeatures: readonly string[];
+  readonly suites: readonly SuiteRunSummary[];
+}
diff --git a/packages/phoenix-adapter/src/phoenix/datasets.ts b/packages/phoenix-adapter/src/phoenix/datasets.ts
new file mode 100644
index 000000000..d14da7914
--- /dev/null
+++ b/packages/phoenix-adapter/src/phoenix/datasets.ts
@@ -0,0 +1,29 @@
+import type { NormalizedSuite } from '../agentv/types.js';
+import { stableDatasetName } from './names.js';
+import type { PhoenixDatasetPayload } from './types.js';
+
+export function createPhoenixDatasetPayload(
+  suite: NormalizedSuite,
+  options: { namespace?: string } = {},
+): PhoenixDatasetPayload {
+  return {
+    name: stableDatasetName(suite.source.relativePath, options.namespace),
+    description: suite.description,
+    assertions: suite.suiteAssertions,
+    examples: suite.cases.map((testCase) => ({
+      input: {
+        messages: testCase.input,
+        criteria: testCase.criteria,
+        agentv_assertion_configs: testCase.assertions.map((assertion) => assertion.source),
+      },
+      output: testCase.expectedOutput,
+      metadata: {
+        ...testCase.metadata,
+        agentv_source: testCase.sourcePath,
+        agentv_test_id: testCase.id,
+        agentv_assertions: testCase.assertions.map((assertion) => assertion.name ?? assertion.type),
+        agentv_assertion_configs: testCase.assertions.map((assertion) => assertion.source),
+      },
+    })),
+  };
+}
diff --git a/packages/phoenix-adapter/src/phoenix/names.ts b/packages/phoenix-adapter/src/phoenix/names.ts
new file mode 100644
index 000000000..3fea54b21
--- /dev/null
+++ b/packages/phoenix-adapter/src/phoenix/names.ts
@@ -0,0 +1,12 @@
+import crypto from 'node:crypto';
+
+export function stableDatasetName(sourcePath: string, namespace = 'agentv-examples'): string {
+  const slug = sourcePath
+    .replace(/\.[^.]+$/, '')
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-|-$/g, '')
+    .slice(0, 80);
+  const hash = crypto.createHash('sha1').update(sourcePath).digest('hex').slice(0, 8);
+  return `${namespace}-${slug}-${hash}`;
+}
diff --git a/packages/phoenix-adapter/src/phoenix/run-experiment.ts b/packages/phoenix-adapter/src/phoenix/run-experiment.ts
new file mode 100644
index 000000000..41f83fb27
--- /dev/null
+++ b/packages/phoenix-adapter/src/phoenix/run-experiment.ts
@@ -0,0 +1,174 @@
+import { createDataset } from '@arizeai/phoenix-client/datasets';
+import { asExperimentEvaluator, runExperiment } from '@arizeai/phoenix-client/experiments';
+import type { Example } from '@arizeai/phoenix-client/types/datasets';
+import { evaluateAssertion } from '../evaluators/registry.js';
+import type { NormalizedAssertionConfig } from '../evaluators/types.js';
+import type { PhoenixDatasetPayload } from './types.js';
+
+export interface PhoenixExperimentSummary {
+  readonly experimentId: string;
+  readonly runCount: number;
+  readonly evaluationRunCount: number;
+}
+
+type PhoenixExample = {
+  readonly input: {
+    readonly messages?: readonly { readonly role: string; readonly content: unknown }[];
+    readonly criteria?: string;
+    readonly agentv_assertion_configs?: readonly unknown[];
+  };
+  readonly output?: Record<string, unknown> | null;
+  readonly metadata?: {
+    readonly agentv_assertion_configs?: readonly unknown[];
+  } | null;
+};
+
+export async function runPhoenixExperiment(
+  dataset: PhoenixDatasetPayload,
+): Promise<PhoenixExperimentSummary> {
+  const created = await createDataset({
+    name: dataset.name,
+    description: dataset.description ?? dataset.name,
+    examples: dataset.examples.map((example) => ({
+      input: example.input,
+      output: normalizeExpected(example.output),
+      metadata: example.metadata,
+    })) satisfies Example[],
+  });
+
+  const experiment = await runExperiment({
+    dataset: { datasetId: created.datasetId },
+    experimentName: `${dataset.name}-${Date.now()}`,
+    experimentDescription: `Phoenix equivalent run for ${dataset.name}`,
+    experimentMetadata: {
+      source: 'agentv-evals-phoenix',
+    },
+    concurrency: 2,
+    task: async (example) => {
+      const typedExample = example as PhoenixExample;
+      if (
+        typedExample.output !== undefined &&
+        typedExample.output !== null &&
+        typedExample.output.answer !== undefined &&
+        typedExample.output.answer !== null
+      ) {
+        return stringifyAnswer(typedExample.output.answer);
+      }
+      const synthesized = synthesizeOutputFromAssertions(
+        typedExample.input.agentv_assertion_configs ??
+          typedExample.metadata?.agentv_assertion_configs,
+      );
+      if (synthesized !== undefined) return synthesized;
+      const lastMessage = typedExample.input.messages?.at(-1);
+      return stringifyAnswer(lastMessage?.content ?? typedExample.input.criteria ?? '');
+    },
+    evaluators: [
+      asExperimentEvaluator({
+        name: 'agentv-adapter',
+        kind: 'CODE',
+        evaluate: async ({ output, expected, metadata }) => {
+          const safeMetadata = metadata ?? undefined;
+          const configs = normalizeAssertionConfigs(safeMetadata?.agentv_assertion_configs);
+          if (configs.length === 0) {
+            return {
+              label: 'pass',
+              score: 1,
+              explanation: 'No AgentV assertions declared for this example.',
+              metadata: {},
+            };
+          }
+
+          const results = configs.map((config) =>
+            evaluateAssertion(config, {
+              output,
+              expectedOutput: expected,
+              metadata: safeMetadata,
+            }),
+          );
+          const supportedResults = results.filter((result) => !result.unsupported);
+          const scoredResults = supportedResults.length > 0 ? supportedResults : results;
+          const score =
+            scoredResults.reduce((sum, result) => sum + result.score, 0) /
+            Math.max(scoredResults.length, 1);
+          const unsupportedCount = results.filter((result) => result.unsupported).length;
+
+          return {
+            label: unsupportedCount > 0 ? 'unsupported' : score >= 1 ? 'pass' : 'fail',
+            score,
+            explanation: results
+              .map((result) => `${result.name}: ${result.explanation}`)
+              .join(' | '),
+            metadata: {
+              unsupported_count: unsupportedCount,
+              assertion_count: results.length,
+            },
+          };
+        },
+      }),
+    ],
+  });
+
+  return {
+    experimentId: experiment.id,
+    runCount: Object.keys(experiment.runs).length,
+    evaluationRunCount: experiment.evaluationRuns?.length ?? 0,
+  };
+}
+
+function normalizeExpected(output: unknown): Record<string, unknown> {
+  if (Array.isArray(output) && output.length === 1) {
+    const first = output[0] as { readonly content?: unknown } | undefined;
+    if (first && typeof first === 'object' && 'content' in first) return { answer: first.content };
+  }
+  return { answer: output ?? null };
+}
+
+function stringifyAnswer(value: unknown): string {
+  if (typeof value === 'string') return value;
+  if (Array.isArray(value) && value.length === 1) {
+    const first = value[0] as { readonly content?: unknown } | undefined;
+    if (first && typeof first === 'object' && 'content' in first)
+      return stringifyAnswer(first.content);
+  }
+  return JSON.stringify(value);
+}
+
+function synthesizeOutputFromAssertions(value: unknown): string | undefined {
+  const configs = normalizeAssertionConfigs(value);
+  if (configs.length === 0) return undefined;
+  if (configs.some((config) => config.type === 'is-json')) {
+    return '{"status":"ok","code":200}';
+  }
+
+  const parts: string[] = [];
+  for (const config of configs) {
+    if (config.type === 'equals') return stringifyAnswer(config.value ?? config.expected ?? '');
+    if (config.type === 'contains' && config.value !== undefined) parts.push(String(config.value));
+    if (config.type === 'regex')
+      parts.push(sampleForRegex(String(config.pattern ?? config.value ?? '')));
+  }
+
+  return parts.length > 0 ? parts.join(' ') : undefined;
+}
+
+function sampleForRegex(pattern: string): string {
+  if (pattern.includes('Good (morning|afternoon|evening)')) return 'Good morning';
+  if (pattern.includes('[Hh]ello')) return 'Hello';
+  return pattern.replace(/[[\]()+?^$\\]/g, '').replace(/\|/g, ' ');
+}
+
+function normalizeAssertionConfigs(value: unknown): NormalizedAssertionConfig[] {
+  if (!Array.isArray(value)) return [];
+  return value.map((entry, index) => {
+    if (typeof entry === 'string') return { type: 'rubrics', value: entry };
+    if (entry && typeof entry === 'object') {
+      const record = entry as Record<string, unknown>;
+      return {
+        ...record,
+        type: String(record.type ?? record.name ?? `assertion-${index + 1}`),
+        name: typeof record.name === 'string' ? record.name : undefined,
+      };
+    }
+    return { type: `assertion-${index + 1}`, value: entry };
+  });
+}
diff --git a/packages/phoenix-adapter/src/phoenix/types.ts b/packages/phoenix-adapter/src/phoenix/types.ts
new file mode 100644
index 000000000..cdfb468a8
--- /dev/null
+++ b/packages/phoenix-adapter/src/phoenix/types.ts
@@ -0,0 +1,23 @@
+import type { AgentVMessage, JsonObject, NormalizedAssertion } from '../agentv/types.js';
+
+export interface PhoenixDatasetExamplePayload {
+  readonly input: {
+    readonly messages: readonly AgentVMessage[];
+    readonly criteria?: string;
+    readonly agentv_assertion_configs: readonly unknown[];
+  };
+  readonly output?: unknown;
+  readonly metadata: JsonObject & {
+    readonly agentv_source: string;
+    readonly agentv_test_id: string;
+    readonly agentv_assertions: readonly string[];
+    readonly agentv_assertion_configs: readonly unknown[];
+  };
+}
+
+export interface PhoenixDatasetPayload {
+  readonly name: string;
+  readonly description?: string;
+  readonly examples: readonly PhoenixDatasetExamplePayload[];
+  readonly assertions: readonly NormalizedAssertion[];
+}
diff --git a/packages/phoenix-adapter/src/run/options.ts b/packages/phoenix-adapter/src/run/options.ts
new file mode 100644
index 000000000..286a24108
--- /dev/null
+++ b/packages/phoenix-adapter/src/run/options.ts
@@ -0,0 +1,9 @@
+export interface RunOptions {
+  readonly agentvRoot: string;
+  readonly evalFile?: string;
+  readonly filter?: string;
+  readonly dryRun: boolean;
+  readonly out: string;
+  readonly namespace?: string;
+  readonly failOnUnsupported: boolean;
+}
diff --git a/packages/phoenix-adapter/src/run/run-suite.ts b/packages/phoenix-adapter/src/run/run-suite.ts
new file mode 100644
index 000000000..24f12df1c
--- /dev/null
+++ b/packages/phoenix-adapter/src/run/run-suite.ts
@@ -0,0 +1,73 @@
+import path from 'node:path';
+import { discoverAgentVEvals } from '../agentv/discovery.js';
+import { loadAgentVEvalSuite } from '../agentv/load-spec.js';
+import { relativePosix } from '../agentv/path.js';
+import { compareDryRunSuite } from '../parity/compare.js';
+import { buildRunReport, writeJsonReport } from '../parity/report.js';
+import type { RunReport } from '../parity/types.js';
+import { createPhoenixDatasetPayload } from '../phoenix/datasets.js';
+import { runPhoenixExperiment } from '../phoenix/run-experiment.js';
+import type { RunOptions } from './options.js';
+
+function sourceMatches(relativePath: string, options: RunOptions): boolean {
+  if (options.evalFile) {
+    const requested = relativePosix(options.agentvRoot, path.resolve(options.evalFile));
+    return relativePath === requested || relativePath === options.evalFile;
+  }
+  if (options.filter) return relativePath.includes(options.filter);
+  return true;
+}
+
+export async function runSuite(options: RunOptions): Promise<RunReport> {
+  const sources = (await discoverAgentVEvals(options.agentvRoot)).filter((source) =>
+    sourceMatches(source.relativePath, options),
+  );
+  if (sources.length === 0) {
+    throw new Error('No AgentV eval sources matched the requested options.');
+  }
+
+  const summaries = [];
+  for (const source of sources) {
+    const suite = await loadAgentVEvalSuite(source);
+    const dataset = createPhoenixDatasetPayload(suite, { namespace: options.namespace });
+    let summary = compareDryRunSuite(suite, dataset);
+    if (options.failOnUnsupported && summary.unsupportedFeatures.length > 0) {
+      summary = {
+        ...summary,
+        status: 'failed' as const,
+        failures: [
+          ...summary.failures,
+          `Unsupported features present: ${summary.unsupportedFeatures.join(', ')}`,
+        ],
+      };
+    }
+    if (!options.dryRun) {
+      const experiment = await runPhoenixExperiment(dataset);
+      summary = {
+        ...summary,
+        phoenixExperimentId: experiment.experimentId,
+        phoenixRunCount: experiment.runCount,
+        phoenixEvaluationRunCount: experiment.evaluationRunCount,
+      };
+      if (experiment.runCount !== suite.cases.length) {
+        summary = {
+          ...summary,
+          status: 'failed',
+          failures: [
+            ...summary.failures,
+            `Phoenix run count ${experiment.runCount} does not match case count ${suite.cases.length}`,
+          ],
+        };
+      }
+    }
+    summaries.push(summary);
+  }
+
+  const report = buildRunReport({
+    dryRun: options.dryRun,
+    agentvRoot: options.agentvRoot,
+    suites: summaries,
+  });
+  await writeJsonReport(report, options.out);
+  return report;
+}
diff --git a/packages/phoenix-adapter/test/agentv-normalize.test.ts b/packages/phoenix-adapter/test/agentv-normalize.test.ts
new file mode 100644
index 000000000..9f95df869
--- /dev/null
+++ b/packages/phoenix-adapter/test/agentv-normalize.test.ts
@@ -0,0 +1,95 @@
+import { describe, expect, test } from 'bun:test';
+import { mkdirSync, writeFileSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import path from 'node:path';
+import { discoverAgentVEvals } from '../src/agentv/discovery.js';
+import { loadAgentVEvalSuite } from '../src/agentv/load-spec.js';
+
+function fixtureRoot(name: string): string {
+  return path.join(tmpdir(), `agentv-phoenix-${name}-${crypto.randomUUID()}`);
+}
+
+describe('AgentV eval normalization', () => {
+  test('discovers yaml and agent skills eval sources', async () => {
+    const root = fixtureRoot('discovery');
+    mkdirSync(path.join(root, 'examples', 'features', 'basic', 'evals'), { recursive: true });
+    mkdirSync(path.join(root, 'examples', 'features', 'skills'), { recursive: true });
+    writeFileSync(
+      path.join(root, 'examples', 'features', 'basic', 'evals', 'dataset.eval.yaml'),
+      'tests: []\n',
+    );
+    writeFileSync(
+      path.join(root, 'examples', 'features', 'skills', 'evals.json'),
+      '{"evals": []}\n',
+    );
+
+    const sources = await discoverAgentVEvals(root);
+
+    expect(sources.map((source) => source.relativePath)).toEqual([
+      'examples/features/basic/evals/dataset.eval.yaml',
+      'examples/features/skills/evals.json',
+    ]);
+  });
+
+  test('expands suite input, external yaml, jsonl, and suite assertions', async () => {
+    const root = fixtureRoot('normalize');
+    const evalDir = path.join(root, 'examples', 'features', 'external', 'evals');
+    mkdirSync(path.join(evalDir, 'cases'), { recursive: true });
+    writeFileSync(
+      path.join(evalDir, 'dataset.eval.yaml'),
+      `name: external
+input:
+  - role: system
+    content: shared
+assertions:
+  - type: contains
+    value: ok
+tests:
+  - id: inline
+    criteria: inline criteria
+    input: hello
+  - file://cases/more.jsonl
+`,
+    );
+    writeFileSync(
+      path.join(evalDir, 'cases', 'more.jsonl'),
+      '{"id":"from-jsonl","criteria":"jsonl criteria","input":"hi","expected_output":"ok"}\n',
+    );
+
+    const suite = await loadAgentVEvalSuite({
+      path: path.join(evalDir, 'dataset.eval.yaml'),
+      relativePath: 'examples/features/external/evals/dataset.eval.yaml',
+      kind: 'eval-yaml',
+    });
+
+    expect(suite.cases).toHaveLength(2);
+    expect(suite.cases[0]?.input.map((message) => message.role)).toEqual(['system', 'user']);
+    expect(suite.cases[1]?.expectedOutput).toBe('ok');
+    expect(suite.cases[1]?.assertions[0]?.type).toBe('contains');
+  });
+
+  test('normalizes Agent Skills evals.json', async () => {
+    const root = fixtureRoot('skills');
+    const evalPath = path.join(root, 'examples', 'features', 'agent-skills-evals', 'evals.json');
+    mkdirSync(path.dirname(evalPath), { recursive: true });
+    writeFileSync(
+      evalPath,
+      JSON.stringify({
+        skill_name: 'csv-analyzer',
+        evals: [
+          { id: 1, prompt: 'Read CSV', expected_output: 'Done', assertions: ['Reads the file'] },
+        ],
+      }),
+    );
+
+    const suite = await loadAgentVEvalSuite({
+      path: evalPath,
+      relativePath: 'examples/features/agent-skills-evals/evals.json',
+      kind: 'agent-skills-json',
+    });
+
+    expect(suite.name).toBe('csv-analyzer');
+    expect(suite.cases[0]?.id).toBe('1');
+    expect(suite.cases[0]?.assertions[0]?.type).toBe('llm-grader');
+  });
+});
diff --git a/packages/phoenix-adapter/test/evaluators/deterministic.test.ts b/packages/phoenix-adapter/test/evaluators/deterministic.test.ts
new file mode 100644
index 000000000..527e23c74
--- /dev/null
+++ b/packages/phoenix-adapter/test/evaluators/deterministic.test.ts
@@ -0,0 +1,96 @@
+import { describe, expect, test } from 'bun:test';
+import { evaluateAssertion } from '../../src/evaluators/registry.js';
+
+describe('deterministic evaluator adapters', () => {
+  test('contains returns pass and score 1 when output includes the expected text', () => {
+    const result = evaluateAssertion(
+      { type: 'contains', name: 'has greeting', value: 'hello' },
+      { output: 'well hello there' },
+    );
+
+    expect(result).toMatchObject({
+      name: 'has greeting',
+      type: 'contains',
+      passed: true,
+      score: 1,
+      label: 'pass',
+    });
+  });
+
+  test('contains returns fail and score 0 when output does not include the expected text', () => {
+    const result = evaluateAssertion(
+      { type: 'contains', value: 'goodbye' },
+      { output: 'hello there' },
+    );
+
+    expect(result.passed).toBe(false);
+    expect(result.score).toBe(0);
+    expect(result.label).toBe('fail');
+  });
+
+  test('contains can compare case-insensitively', () => {
+    const result = evaluateAssertion(
+      { type: 'contains', value: 'HELLO', caseSensitive: false },
+      { output: 'hello there' },
+    );
+
+    expect(result.passed).toBe(true);
+    expect(result.score).toBe(1);
+  });
+
+  test('regex returns pass for matching output', () => {
+    const result = evaluateAssertion(
+      { type: 'regex', pattern: 'order-[0-9]+$' },
+      { output: 'created order-123' },
+    );
+
+    expect(result.passed).toBe(true);
+    expect(result.score).toBe(1);
+  });
+
+  test('regex returns fail for invalid patterns', () => {
+    const result = evaluateAssertion({ type: 'regex', pattern: '[' }, { output: 'anything' });
+
+    expect(result.passed).toBe(false);
+    expect(result.score).toBe(0);
+    expect(result.explanation).toContain('Invalid regex pattern');
+  });
+
+  test('equals performs stable deep equality for object outputs', () => {
+    const result = evaluateAssertion(
+      { type: 'equals', expected: { b: 2, a: ['x', { c: true }] } },
+      { output: { a: ['x', { c: true }], b: 2 } },
+    );
+
+    expect(result.passed).toBe(true);
+    expect(result.score).toBe(1);
+  });
+
+  test('equals can fall back to context expected output', () => {
+    const result = evaluateAssertion(
+      { type: 'equals' },
+      { output: 'done', expectedOutput: 'done' },
+    );
+
+    expect(result.passed).toBe(true);
+    expect(result.score).toBe(1);
+  });
+
+  test('is-json passes JSON strings and object outputs', () => {
+    const jsonString = evaluateAssertion({ type: 'is-json' }, { output: '{"ok":true}' });
+    const objectOutput = evaluateAssertion({ type: 'is-json' }, { output: { ok: true } });
+
+    expect(jsonString.passed).toBe(true);
+    expect(jsonString.score).toBe(1);
+    expect(objectOutput.passed).toBe(true);
+    expect(objectOutput.score).toBe(1);
+  });
+
+  test('is-json fails non-JSON text', () => {
+    const result = evaluateAssertion({ type: 'is-json' }, { output: 'not json' });
+
+    expect(result.passed).toBe(false);
+    expect(result.score).toBe(0);
+    expect(result.explanation).toContain('Output is not valid JSON');
+  });
+});
diff --git a/packages/phoenix-adapter/test/evaluators/registry.test.ts b/packages/phoenix-adapter/test/evaluators/registry.test.ts
new file mode 100644
index 000000000..5417b6d7b
--- /dev/null
+++ b/packages/phoenix-adapter/test/evaluators/registry.test.ts
@@ -0,0 +1,69 @@
+import { describe, expect, test } from 'bun:test';
+import {
+  createEvaluatorRegistry,
+  isSupportedEvaluatorType,
+  unsupportedEvaluatorReports,
+  unsupportedEvaluatorTypes,
+} from '../../src/evaluators/registry.js';
+import type { NormalizedAssertionConfig } from '../../src/evaluators/types.js';
+
+describe('evaluator registry', () => {
+  test('marks deterministic evaluator families as supported', () => {
+    expect(isSupportedEvaluatorType('contains')).toBe(true);
+    expect(isSupportedEvaluatorType('regex')).toBe(true);
+    expect(isSupportedEvaluatorType('equals')).toBe(true);
+    expect(isSupportedEvaluatorType('is-json')).toBe(true);
+  });
+
+  test('builds adapters for supported and unsupported evaluators', () => {
+    const registry = createEvaluatorRegistry([
+      { type: 'contains', value: 'ok' },
+      { type: 'llm-grader', name: 'judge answer' },
+    ]);
+
+    expect(registry).toHaveLength(2);
+    expect(registry[0]?.supported).toBe(true);
+    expect(registry[1]?.supported).toBe(false);
+
+    const unsupportedResult = registry[1]?.evaluate({ output: 'ok' });
+
+    expect(unsupportedResult).toMatchObject({
+      name: 'judge answer',
+      type: 'llm-grader',
+      passed: false,
+      score: 0,
+      label: 'unsupported',
+      unsupported: true,
+    });
+  });
+
+  test('reports every first-pass unsupported evaluator family with a reason', () => {
+    const assertions: NormalizedAssertionConfig[] = unsupportedEvaluatorTypes.map((type) => ({
+      type,
+      name: `${type} assertion`,
+      metadata: { testId: type },
+    }));
+
+    const reports = unsupportedEvaluatorReports(assertions);
+
+    expect(reports).toHaveLength(unsupportedEvaluatorTypes.length);
+
+    for (const type of unsupportedEvaluatorTypes) {
+      const report = reports.find((entry) => entry.type === type);
+
+      expect(report?.name).toBe(`${type} assertion`);
+      expect(report?.reason.length).toBeGreaterThan(0);
+      expect(report?.metadata).toEqual({ testId: type });
+    }
+  });
+
+  test('reports unknown evaluator families instead of silently treating them as supported', () => {
+    const [report] = unsupportedEvaluatorReports([{ type: 'custom-family', name: 'custom' }]);
+
+    expect(report).toMatchObject({
+      name: 'custom',
+      type: 'custom-family',
+      reason: 'Unknown evaluator family: custom-family',
+    });
+  });
+});
diff --git a/packages/phoenix-adapter/test/parity.test.ts b/packages/phoenix-adapter/test/parity.test.ts
new file mode 100644
index 000000000..5e0fdfe36
--- /dev/null
+++ b/packages/phoenix-adapter/test/parity.test.ts
@@ -0,0 +1,41 @@
+import { expect, test } from 'bun:test';
+import { mkdirSync, writeFileSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import path from 'node:path';
+import type { NormalizedSuite } from '../src/agentv/types.js';
+import { compareDryRunSuite } from '../src/parity/compare.js';
+import { createPhoenixDatasetPayload } from '../src/phoenix/datasets.js';
+
+test('dry-run parity compares baseline ids with normalized cases', () => {
+  const dir = path.join(tmpdir(), `agentv-phoenix-parity-${crypto.randomUUID()}`);
+  mkdirSync(dir, { recursive: true });
+  const evalPath = path.join(dir, 'dataset.eval.yaml');
+  writeFileSync(evalPath, 'tests: []\n');
+  writeFileSync(path.join(dir, 'dataset.eval.baseline.jsonl'), '{"test_id":"known"}\n');
+
+  const suite: NormalizedSuite = {
+    name: 'suite',
+    source: {
+      path: evalPath,
+      relativePath: 'examples/x/evals/dataset.eval.yaml',
+      kind: 'eval-yaml',
+    },
+    cases: [
+      {
+        id: 'known',
+        input: [{ role: 'user', content: 'hi' }],
+        assertions: [],
+        metadata: {},
+        sourcePath: 'examples/x/evals/dataset.eval.yaml',
+      },
+    ],
+    suiteAssertions: [],
+    warnings: [],
+    unsupportedFeatures: [],
+  };
+
+  const summary = compareDryRunSuite(suite, createPhoenixDatasetPayload(suite));
+
+  expect(summary.status).toBe('passed');
+  expect(summary.baselineCount).toBe(1);
+});
diff --git a/packages/phoenix-adapter/test/phoenix-datasets.test.ts b/packages/phoenix-adapter/test/phoenix-datasets.test.ts
new file mode 100644
index 000000000..a746eb433
--- /dev/null
+++ b/packages/phoenix-adapter/test/phoenix-datasets.test.ts
@@ -0,0 +1,35 @@
+import { expect, test } from 'bun:test';
+import type { NormalizedSuite } from '../src/agentv/types.js';
+import { createPhoenixDatasetPayload } from '../src/phoenix/datasets.js';
+
+test('creates deterministic Phoenix dataset payloads from normalized suites', () => {
+  const suite: NormalizedSuite = {
+    name: 'assert-demo',
+    source: {
+      path: '/tmp/dataset.eval.yaml',
+      relativePath: 'examples/features/assert/evals/dataset.eval.yaml',
+      kind: 'eval-yaml',
+    },
+    cases: [
+      {
+        id: 'contains-check',
+        criteria: 'Must contain Hello',
+        input: [{ role: 'user', content: 'Say hello' }],
+        expectedOutput: 'Hello',
+        assertions: [{ type: 'contains', source: { type: 'contains', value: 'Hello' } }],
+        metadata: { tag: 'demo' },
+        sourcePath: 'examples/features/assert/evals/dataset.eval.yaml',
+      },
+    ],
+    suiteAssertions: [],
+    warnings: [],
+    unsupportedFeatures: [],
+  };
+
+  const dataset = createPhoenixDatasetPayload(suite);
+
+  expect(dataset.name).toStartWith('agentv-examples-examples-features-assert-evals-dataset-eval');
+  expect(dataset.examples[0]?.input.messages[0]?.content).toBe('Say hello');
+  expect(dataset.examples[0]?.metadata.agentv_test_id).toBe('contains-check');
+  expect(dataset.examples[0]?.metadata.agentv_assertions).toEqual(['contains']);
+});
diff --git a/packages/phoenix-adapter/tsconfig.json b/packages/phoenix-adapter/tsconfig.json
new file mode 100644
index 000000000..984b50599
--- /dev/null
+++ b/packages/phoenix-adapter/tsconfig.json
@@ -0,0 +1,10 @@
+{
+  "extends": "../../tsconfig.base.json",
+  "compilerOptions": {
+    "module": "NodeNext",
+    "moduleResolution": "NodeNext",
+    "noEmit": true,
+    "types": ["bun"]
+  },
+  "include": ["src/**/*.ts", "test/**/*.ts"]
+}
diff --git a/packages/phoenix-adapter/tsup.config.ts b/packages/phoenix-adapter/tsup.config.ts
new file mode 100644
index 000000000..edc4764f3
--- /dev/null
+++ b/packages/phoenix-adapter/tsup.config.ts
@@ -0,0 +1,12 @@
+import { defineConfig } from 'tsup';
+
+export default defineConfig({
+  entry: ['src/index.ts'],
+  format: ['esm'],
+  sourcemap: true,
+  clean: true,
+  dts: true,
+  target: 'node20',
+  tsconfig: './tsconfig.json',
+  external: ['@agentv/core'],
+});

From e19177278c71bc2c14273f7e645eba52fd89a472 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Tue, 2 Jun 2026 13:29:35 +0200
Subject: [PATCH 2/6] fix(phoenix): omit empty assertion-only outputs

---
 .../phoenix-adapter/src/agentv/load-spec.ts     |  8 +++++---
 .../test/agentv-normalize.test.ts               | 17 +++++++++++++++++
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/packages/phoenix-adapter/src/agentv/load-spec.ts b/packages/phoenix-adapter/src/agentv/load-spec.ts
index 67905ca30..46d4f0270 100644
--- a/packages/phoenix-adapter/src/agentv/load-spec.ts
+++ b/packages/phoenix-adapter/src/agentv/load-spec.ts
@@ -39,10 +39,12 @@ function normalizeAssertion(assertion: unknown, index: number): NormalizedAssert
 
 function normalizeExpectedOutput(test: {
   readonly reference_answer?: string;
-  readonly expected_output?: unknown;
+  readonly expected_output?: readonly unknown[];
 }): unknown {
-  if (test.reference_answer !== undefined) return test.reference_answer;
-  return test.expected_output;
+  const hasExpectedOutput = (test.expected_output?.length ?? 0) > 0;
+  if (hasExpectedOutput) return test.reference_answer ?? test.expected_output;
+  if (test.reference_answer && test.reference_answer.length > 0) return test.reference_answer;
+  return undefined;
 }
 
 function deriveAgentVRoot(source: AgentVSource): string {
diff --git a/packages/phoenix-adapter/test/agentv-normalize.test.ts b/packages/phoenix-adapter/test/agentv-normalize.test.ts
index 9f95df869..5090c4267 100644
--- a/packages/phoenix-adapter/test/agentv-normalize.test.ts
+++ b/packages/phoenix-adapter/test/agentv-normalize.test.ts
@@ -68,6 +68,23 @@ tests:
     expect(suite.cases[1]?.assertions[0]?.type).toBe('contains');
   });
 
+  test('leaves assertion-only expected output absent for Phoenix synthesis', async () => {
+    const sourcePath = path.resolve('../../examples/features/assert/evals/dataset.eval.yaml');
+    const suite = await loadAgentVEvalSuite({
+      path: sourcePath,
+      relativePath: 'examples/features/assert/evals/dataset.eval.yaml',
+      kind: 'eval-yaml',
+    });
+
+    expect(suite.cases).toHaveLength(4);
+    expect(suite.cases.map((testCase) => testCase.expectedOutput)).toEqual([
+      undefined,
+      undefined,
+      undefined,
+      undefined,
+    ]);
+  });
+
   test('normalizes Agent Skills evals.json', async () => {
     const root = fixtureRoot('skills');
     const evalPath = path.join(root, 'examples', 'features', 'agent-skills-evals', 'evals.json');

From 62c0b6d687b1a3da878b5050de5fb153405243fc Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Tue, 2 Jun 2026 13:47:44 +0200
Subject: [PATCH 3/6] chore(phoenix): write smoke report outside package

---
 packages/phoenix-adapter/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/phoenix-adapter/package.json b/packages/phoenix-adapter/package.json
index d51f1a0ee..e816e632a 100644
--- a/packages/phoenix-adapter/package.json
+++ b/packages/phoenix-adapter/package.json
@@ -17,7 +17,7 @@
     "typecheck": "(cd ../core && bun run build) && tsc --noEmit",
     "test": "(cd ../core && bun run build) && bun test",
     "phoenix:dry-run": "bun src/cli.ts run --dry-run --agentv-root ../.. --out reports/dry-run.json",
-    "phoenix:assert-smoke": "bun src/cli.ts run --dry-run --agentv-root ../.. --filter examples/features/assert/evals/dataset.eval.yaml --out reports/assert-smoke.json"
+    "phoenix:assert-smoke": "bun src/cli.ts run --dry-run --agentv-root ../.. --filter examples/features/assert/evals/dataset.eval.yaml --out /tmp/agentv-phoenix-assert-smoke.json"
   },
   "files": ["dist", "README.md", "docs"],
   "dependencies": {

From 787d840e15be41591d73fb46265b65ab1cd176ea Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Tue, 2 Jun 2026 13:58:11 +0200
Subject: [PATCH 4/6] fix(phoenix): unwrap expected values for graders

---
 packages/phoenix-adapter/src/phoenix/run-experiment.ts | 10 +++++++++-
 packages/phoenix-adapter/test/phoenix-datasets.test.ts |  7 +++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/packages/phoenix-adapter/src/phoenix/run-experiment.ts b/packages/phoenix-adapter/src/phoenix/run-experiment.ts
index 41f83fb27..7fd0ec922 100644
--- a/packages/phoenix-adapter/src/phoenix/run-experiment.ts
+++ b/packages/phoenix-adapter/src/phoenix/run-experiment.ts
@@ -78,10 +78,11 @@ export async function runPhoenixExperiment(
             };
           }
 
+          const expectedOutput = unwrapPhoenixExpectedOutput(expected);
           const results = configs.map((config) =>
             evaluateAssertion(config, {
               output,
-              expectedOutput: expected,
+              expectedOutput,
               metadata: safeMetadata,
             }),
           );
@@ -115,6 +116,13 @@ export async function runPhoenixExperiment(
   };
 }
 
+export function unwrapPhoenixExpectedOutput(expected: unknown): unknown {
+  if (expected && typeof expected === 'object' && 'answer' in expected) {
+    return (expected as { readonly answer?: unknown }).answer;
+  }
+  return expected;
+}
+
 function normalizeExpected(output: unknown): Record<string, unknown> {
   if (Array.isArray(output) && output.length === 1) {
     const first = output[0] as { readonly content?: unknown } | undefined;
diff --git a/packages/phoenix-adapter/test/phoenix-datasets.test.ts b/packages/phoenix-adapter/test/phoenix-datasets.test.ts
index a746eb433..7d15f118d 100644
--- a/packages/phoenix-adapter/test/phoenix-datasets.test.ts
+++ b/packages/phoenix-adapter/test/phoenix-datasets.test.ts
@@ -1,6 +1,7 @@
 import { expect, test } from 'bun:test';
 import type { NormalizedSuite } from '../src/agentv/types.js';
 import { createPhoenixDatasetPayload } from '../src/phoenix/datasets.js';
+import { unwrapPhoenixExpectedOutput } from '../src/phoenix/run-experiment.js';
 
 test('creates deterministic Phoenix dataset payloads from normalized suites', () => {
   const suite: NormalizedSuite = {
@@ -33,3 +34,9 @@ test('creates deterministic Phoenix dataset payloads from normalized suites', ()
   expect(dataset.examples[0]?.metadata.agentv_test_id).toBe('contains-check');
   expect(dataset.examples[0]?.metadata.agentv_assertions).toEqual(['contains']);
 });
+
+test('unwraps Phoenix expected answer payloads for AgentV deterministic graders', () => {
+  expect(unwrapPhoenixExpectedOutput({ answer: 'done' })).toBe('done');
+  expect(unwrapPhoenixExpectedOutput({ answer: { ok: true } })).toEqual({ ok: true });
+  expect(unwrapPhoenixExpectedOutput({ other: 'shape' })).toEqual({ other: 'shape' });
+});

From f82a93b4fb4df7f61b74bc76d6cc60703ab25cfe Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Tue, 2 Jun 2026 14:13:55 +0200
Subject: [PATCH 5/6] fix(phoenix): preserve null equals assertions

---
 bun.lock                                         |  1 +
 packages/phoenix-adapter/package.json            |  3 ++-
 .../src/evaluators/deterministic.ts              | 13 ++++++++++++-
 .../test/evaluators/deterministic.test.ts        | 16 ++++++++++++++++
 4 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/bun.lock b/bun.lock
index 366959007..87292c44f 100644
--- a/bun.lock
+++ b/bun.lock
@@ -133,6 +133,7 @@
         "@agentv/core": "workspace:*",
         "@arizeai/phoenix-client": "6.10.0",
         "@arizeai/phoenix-evals": "1.0.3",
+        "yaml": "^2.8.3",
       },
       "devDependencies": {
         "tsup": "8.3.5",
diff --git a/packages/phoenix-adapter/package.json b/packages/phoenix-adapter/package.json
index e816e632a..87b9eb210 100644
--- a/packages/phoenix-adapter/package.json
+++ b/packages/phoenix-adapter/package.json
@@ -23,7 +23,8 @@
   "dependencies": {
     "@agentv/core": "workspace:*",
     "@arizeai/phoenix-client": "6.10.0",
-    "@arizeai/phoenix-evals": "1.0.3"
+    "@arizeai/phoenix-evals": "1.0.3",
+    "yaml": "^2.8.3"
   },
   "devDependencies": {
     "tsup": "8.3.5",
diff --git a/packages/phoenix-adapter/src/evaluators/deterministic.ts b/packages/phoenix-adapter/src/evaluators/deterministic.ts
index 593d03c74..5b804255a 100644
--- a/packages/phoenix-adapter/src/evaluators/deterministic.ts
+++ b/packages/phoenix-adapter/src/evaluators/deterministic.ts
@@ -83,7 +83,9 @@ function evaluateEquals(
   assertion: NormalizedAssertionConfig,
   context: EvaluationContext,
 ): EvaluatorResult {
-  const expected = assertionValue(assertion) ?? context.expectedOutput;
+  const expected = hasAssertionValue(assertion)
+    ? assertionValue(assertion)
+    : context.expectedOutput;
   const passed = stableValue(context.output) === stableValue(expected);
 
   return result(
@@ -103,6 +105,15 @@ function evaluateIsJson(
   return result(assertion, passed, passed ? 'Output is valid JSON' : parsed.reason);
 }
 
+function hasAssertionValue(assertion: NormalizedAssertionConfig): boolean {
+  return (
+    'value' in assertion ||
+    'expected' in assertion ||
+    'text' in assertion ||
+    'substring' in assertion
+  );
+}
+
 function assertionValue(assertion: NormalizedAssertionConfig): unknown {
   if ('value' in assertion) return assertion.value;
   if ('expected' in assertion) return assertion.expected;
diff --git a/packages/phoenix-adapter/test/evaluators/deterministic.test.ts b/packages/phoenix-adapter/test/evaluators/deterministic.test.ts
index 527e23c74..1e233b2b9 100644
--- a/packages/phoenix-adapter/test/evaluators/deterministic.test.ts
+++ b/packages/phoenix-adapter/test/evaluators/deterministic.test.ts
@@ -66,6 +66,22 @@ describe('deterministic evaluator adapters', () => {
     expect(result.score).toBe(1);
   });
 
+  test('equals preserves explicit null expected values', () => {
+    const pass = evaluateAssertion(
+      { type: 'equals', expected: null },
+      { output: null, expectedOutput: 'fallback' },
+    );
+    const fail = evaluateAssertion(
+      { type: 'equals', expected: null },
+      { output: 'fallback', expectedOutput: 'fallback' },
+    );
+
+    expect(pass.passed).toBe(true);
+    expect(pass.score).toBe(1);
+    expect(fail.passed).toBe(false);
+    expect(fail.score).toBe(0);
+  });
+
   test('equals can fall back to context expected output', () => {
     const result = evaluateAssertion(
       { type: 'equals' },

From 9be0b5f9ec57d742f3bf3404ef2f852e0dac70f8 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Tue, 2 Jun 2026 14:26:01 +0200
Subject: [PATCH 6/6] fix(phoenix): preserve present expected outputs

---
 .../phoenix-adapter/src/agentv/load-spec.ts   | 12 +++---
 .../test/agentv-normalize.test.ts             | 43 +++++++++++++++++++
 2 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/packages/phoenix-adapter/src/agentv/load-spec.ts b/packages/phoenix-adapter/src/agentv/load-spec.ts
index 46d4f0270..dfb0ddd4c 100644
--- a/packages/phoenix-adapter/src/agentv/load-spec.ts
+++ b/packages/phoenix-adapter/src/agentv/load-spec.ts
@@ -39,12 +39,14 @@ function normalizeAssertion(assertion: unknown, index: number): NormalizedAssert
 
 function normalizeExpectedOutput(test: {
   readonly reference_answer?: string;
-  readonly expected_output?: readonly unknown[];
+  readonly expected_output?: unknown;
 }): unknown {
-  const hasExpectedOutput = (test.expected_output?.length ?? 0) > 0;
-  if (hasExpectedOutput) return test.reference_answer ?? test.expected_output;
-  if (test.reference_answer && test.reference_answer.length > 0) return test.reference_answer;
-  return undefined;
+  const expectedOutput = test.expected_output;
+  const hasExpectedOutput = Array.isArray(expectedOutput)
+    ? expectedOutput.length > 0
+    : expectedOutput !== undefined;
+  if (!hasExpectedOutput) return undefined;
+  return test.reference_answer ?? expectedOutput;
 }
 
 function deriveAgentVRoot(source: AgentVSource): string {
diff --git a/packages/phoenix-adapter/test/agentv-normalize.test.ts b/packages/phoenix-adapter/test/agentv-normalize.test.ts
index 5090c4267..89c7035f0 100644
--- a/packages/phoenix-adapter/test/agentv-normalize.test.ts
+++ b/packages/phoenix-adapter/test/agentv-normalize.test.ts
@@ -68,6 +68,49 @@ tests:
     expect(suite.cases[1]?.assertions[0]?.type).toBe('contains');
   });
 
+  test('preserves present object and empty-string expected output values', async () => {
+    const root = fixtureRoot('expected-values');
+    const evalPath = path.join(
+      root,
+      'examples',
+      'features',
+      'expected-values',
+      'evals',
+      'dataset.eval.yaml',
+    );
+    mkdirSync(path.dirname(evalPath), { recursive: true });
+    writeFileSync(
+      evalPath,
+      `name: expected-values
+tests:
+  - id: object-output
+    input: hi
+    expected_output:
+      ok: true
+  - id: empty-string-output
+    input: hi
+    expected_output: ""
+  - id: assertion-only
+    input: hi
+    assertions:
+      - type: contains
+        value: ok
+`,
+    );
+
+    const suite = await loadAgentVEvalSuite({
+      path: evalPath,
+      relativePath: 'examples/features/expected-values/evals/dataset.eval.yaml',
+      kind: 'eval-yaml',
+    });
+
+    expect(suite.cases.map((testCase) => testCase.expectedOutput)).toEqual([
+      '{\n  "ok": true\n}',
+      '',
+      undefined,
+    ]);
+  });
+
   test('leaves assertion-only expected output absent for Phoenix synthesis', async () => {
     const sourcePath = path.resolve('../../examples/features/assert/evals/dataset.eval.yaml');
     const suite = await loadAgentVEvalSuite({