From fc195b0fd2f0cfa68cd883727bcd9482f9227cd2 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Wed, 3 Jun 2026 08:03:27 +0200
Subject: [PATCH] test(cli): simplify churny local coverage

---
 .../test/commands/eval/pipeline/bench.test.ts | 29 +------
 .../test/commands/eval/pipeline/grade.test.ts | 47 +++--------
 .../test/commands/eval/pipeline/input.test.ts | 48 ++---------
 apps/cli/test/commands/results/serve.test.ts  | 31 +------
 .../commands/results/studio-config.test.ts    | 80 +++++--------------
 apps/cli/test/unit/studio-navigation.test.ts  | 62 --------------
 apps/dashboard/src/lib/navigation.test.ts     | 37 +++++++++
 .../src/content/docs/docs/tools/dashboard.mdx | 20 +++--
 8 files changed, 92 insertions(+), 262 deletions(-)
 delete mode 100644 apps/cli/test/unit/studio-navigation.test.ts
diff --git a/apps/cli/test/commands/eval/pipeline/bench.test.ts b/apps/cli/test/commands/eval/pipeline/bench.test.ts
index 1fab8843..8514f5a9 100644
--- a/apps/cli/test/commands/eval/pipeline/bench.test.ts
+++ b/apps/cli/test/commands/eval/pipeline/bench.test.ts
@@ -59,8 +59,7 @@ describe('pipeline bench', () => {
     await rm(OUT_DIR, { recursive: true, force: true });
   });
 
-  it('writes grading.json with merged scores and pass_rate', async () => {
-    // Write LLM grader result to disk (the default flow)
+  it('writes grading, index, and benchmark artifacts', async () => {
     await writeFile(
       join(OUT_DIR, 'test-01', 'llm_grader_results', 'relevance.json'),
       JSON.stringify({
@@ -76,19 +75,6 @@ describe('pipeline bench', () => {
     expect(grading.summary.pass_rate).toBeGreaterThan(0);
     expect(grading.assertions.length).toBeGreaterThan(0);
     expect(grading.graders).toHaveLength(2);
-  }, 30_000);
-
-  it('writes index.jsonl with one entry per test', async () => {
-    await writeFile(
-      join(OUT_DIR, 'test-01', 'llm_grader_results', 'relevance.json'),
-      JSON.stringify({
-        score: 0.8,
-        assertions: [{ text: 'Relevant', passed: true }],
-      }),
-    );
-
-    const { execa } = await import('execa');
-    await execa('bun', [CLI_ENTRY, 'pipeline', 'bench', OUT_DIR]);
 
     const indexContent = await readFile(join(OUT_DIR, 'index.jsonl'), 'utf8');
     const lines = indexContent
@@ -98,19 +84,6 @@ describe('pipeline bench', () => {
     expect(lines).toHaveLength(1);
     expect(lines[0].test_id).toBe('test-01');
     expect(lines[0].score).toBeGreaterThan(0);
-  }, 30_000);
-
-  it('writes benchmark.json with run_summary', async () => {
-    await writeFile(
-      join(OUT_DIR, 'test-01', 'llm_grader_results', 'relevance.json'),
-      JSON.stringify({
-        score: 0.8,
-        assertions: [{ text: 'ok', passed: true }],
-      }),
-    );
-
-    const { execa } = await import('execa');
-    await execa('bun', [CLI_ENTRY, 'pipeline', 'bench', OUT_DIR]);
 
     const benchmark = JSON.parse(await readFile(join(OUT_DIR, 'benchmark.json'), 'utf8'));
     expect(benchmark.metadata.targets).toContain('test-target');
diff --git a/apps/cli/test/commands/eval/pipeline/grade.test.ts b/apps/cli/test/commands/eval/pipeline/grade.test.ts
index d2cdf802..cf9abac3 100644
--- a/apps/cli/test/commands/eval/pipeline/grade.test.ts
+++ b/apps/cli/test/commands/eval/pipeline/grade.test.ts
@@ -46,7 +46,7 @@ describe('pipeline grade', () => {
     await rm(OUT_DIR, { recursive: true, force: true });
   });
 
-  it('writes code_grader_results/<name>.json with score', async () => {
+  it('writes code_grader_results/<name>.json with score and assertions', async () => {
     const { execa } = await import('execa');
     await execa('bun', [CLI_ENTRY, 'pipeline', 'grade', OUT_DIR]);
 
@@ -55,15 +55,6 @@ describe('pipeline grade', () => {
     );
     expect(result.score).toBe(1);
     expect(result.name).toBe('always_pass');
-  }, 30_000);
-
-  it('includes assertions from code grader output', async () => {
-    const { execa } = await import('execa');
-    await execa('bun', [CLI_ENTRY, 'pipeline', 'grade', OUT_DIR]);
-
-    const result = JSON.parse(
-      await readFile(join(OUT_DIR, 'test-01', 'code_grader_results', 'always_pass.json'), 'utf8'),
-    );
     expect(result.assertions).toHaveLength(1);
     expect(result.assertions[0].passed).toBe(true);
   }, 30_000);
@@ -83,7 +74,6 @@ describe('pipeline grade — builtin assertions', () => {
       JSON.stringify({ input: [{ role: 'user', content: 'say hello' }] }),
     );
 
-    // contains assertion — should pass
     await writeFile(
       join(builtinGradersDir, 'has_hello.json'),
       JSON.stringify({
@@ -95,7 +85,6 @@ describe('pipeline grade — builtin assertions', () => {
       }),
     );
 
-    // regex assertion — should pass
     await writeFile(
       join(builtinGradersDir, 'matches_pattern.json'),
       JSON.stringify({
@@ -107,7 +96,6 @@ describe('pipeline grade — builtin assertions', () => {
       }),
     );
 
-    // contains assertion — should fail
     await writeFile(
       join(builtinGradersDir, 'has_goodbye.json'),
       JSON.stringify({
@@ -134,48 +122,37 @@ describe('pipeline grade — builtin assertions', () => {
     await rm(BUILTIN_OUT, { recursive: true, force: true });
   });
 
-  it('evaluates contains assertion and writes result', async () => {
+  it('evaluates builtin assertions and writes results', async () => {
     const { execa } = await import('execa');
     await execa('bun', [CLI_ENTRY, 'pipeline', 'grade', BUILTIN_OUT]);
 
-    const result = JSON.parse(
+    const containsResult = JSON.parse(
       await readFile(join(BUILTIN_OUT, 'test-01', 'code_grader_results', 'has_hello.json'), 'utf8'),
     );
-    expect(result.score).toBe(1);
-    expect(result.type).toBe('contains');
-    expect(result.assertions[0].passed).toBe(true);
-  }, 30_000);
-
-  it('evaluates regex assertion and writes result', async () => {
-    const { execa } = await import('execa');
-    await execa('bun', [CLI_ENTRY, 'pipeline', 'grade', BUILTIN_OUT]);
+    expect(containsResult.score).toBe(1);
+    expect(containsResult.type).toBe('contains');
+    expect(containsResult.assertions[0].passed).toBe(true);
 
-    const result = JSON.parse(
+    const regexResult = JSON.parse(
       await readFile(
         join(BUILTIN_OUT, 'test-01', 'code_grader_results', 'matches_pattern.json'),
         'utf8',
       ),
     );
-    expect(result.score).toBe(1);
-    expect(result.type).toBe('regex');
-  }, 30_000);
+    expect(regexResult.score).toBe(1);
+    expect(regexResult.type).toBe('regex');
 
-  it('scores 0 when contains assertion does not match', async () => {
-    const { execa } = await import('execa');
-    await execa('bun', [CLI_ENTRY, 'pipeline', 'grade', BUILTIN_OUT]);
-
-    const result = JSON.parse(
+    const failingContainsResult = JSON.parse(
       await readFile(
         join(BUILTIN_OUT, 'test-01', 'code_grader_results', 'has_goodbye.json'),
         'utf8',
       ),
     );
-    expect(result.score).toBe(0);
-    expect(result.assertions[0].passed).toBe(false);
+    expect(failingContainsResult.score).toBe(0);
+    expect(failingContainsResult.assertions[0].passed).toBe(false);
   }, 30_000);
 
   it('applies negate to invert score', async () => {
-    // Overwrite has_goodbye with negate: true — "not contains goodbye" should pass
     await writeFile(
       join(BUILTIN_OUT, 'test-01', 'code_graders', 'has_goodbye.json'),
       JSON.stringify({
diff --git a/apps/cli/test/commands/eval/pipeline/input.test.ts b/apps/cli/test/commands/eval/pipeline/input.test.ts
index d814675e..f994207c 100644
--- a/apps/cli/test/commands/eval/pipeline/input.test.ts
+++ b/apps/cli/test/commands/eval/pipeline/input.test.ts
@@ -12,65 +12,41 @@ describe('pipeline input', () => {
     await rm(OUT_DIR, { recursive: true, force: true });
   });
 
-  it('writes manifest.json with test_ids and eval_file', async () => {
+  it('materializes the default input workspace', async () => {
     const { execa } = await import('execa');
     await execa('bun', [CLI_ENTRY, 'pipeline', 'input', EVAL_PATH, '--out', OUT_DIR]);
 
     const manifest = JSON.parse(await readFile(join(OUT_DIR, 'manifest.json'), 'utf8'));
     expect(manifest.test_ids).toEqual(['test-01']);
     expect(manifest.eval_file).toContain('input-test.eval.yaml');
-  }, 30_000);
-
-  it('writes per-test input.json with input and input_files', async () => {
-    const { execa } = await import('execa');
-    await execa('bun', [CLI_ENTRY, 'pipeline', 'input', EVAL_PATH, '--out', OUT_DIR]);
+    expect(manifest.experiment).toBeUndefined();
 
     const input = JSON.parse(
       await readFile(join(OUT_DIR, 'input-test', 'test-01', 'input.json'), 'utf8'),
     );
     expect(input.input).toHaveLength(1);
     expect(input.input[0].content).toBe('hello world');
-  }, 30_000);
-
-  it('writes code_graders/<name>.json with resolved command', async () => {
-    const { execa } = await import('execa');
-    await execa('bun', [CLI_ENTRY, 'pipeline', 'input', EVAL_PATH, '--out', OUT_DIR]);
 
-    const grader = JSON.parse(
+    const codeGrader = JSON.parse(
       await readFile(
         join(OUT_DIR, 'input-test', 'test-01', 'code_graders', 'contains_hello.json'),
         'utf8',
       ),
     );
-    expect(grader.command).toBeDefined();
-    expect(grader.name).toBe('contains_hello');
-  }, 30_000);
+    expect(codeGrader.command).toBeDefined();
+    expect(codeGrader.name).toBe('contains_hello');
 
-  it('writes llm_graders/<name>.json with prompt content', async () => {
-    const { execa } = await import('execa');
-    await execa('bun', [CLI_ENTRY, 'pipeline', 'input', EVAL_PATH, '--out', OUT_DIR]);
-
-    const grader = JSON.parse(
+    const llmGrader = JSON.parse(
       await readFile(
         join(OUT_DIR, 'input-test', 'test-01', 'llm_graders', 'relevance.json'),
         'utf8',
       ),
     );
-    expect(grader.prompt_content).toBeDefined();
-    expect(grader.name).toBe('relevance');
-  }, 30_000);
-
-  it('writes criteria.md', async () => {
-    const { execa } = await import('execa');
-    await execa('bun', [CLI_ENTRY, 'pipeline', 'input', EVAL_PATH, '--out', OUT_DIR]);
+    expect(llmGrader.prompt_content).toBeDefined();
+    expect(llmGrader.name).toBe('relevance');
 
     const criteria = await readFile(join(OUT_DIR, 'input-test', 'test-01', 'criteria.md'), 'utf8');
     expect(criteria).toContain('Response echoes the input');
-  }, 30_000);
-
-  it('writes invoke.json', async () => {
-    const { execa } = await import('execa');
-    await execa('bun', [CLI_ENTRY, 'pipeline', 'input', EVAL_PATH, '--out', OUT_DIR]);
 
     const invoke = JSON.parse(
       await readFile(join(OUT_DIR, 'input-test', 'test-01', 'invoke.json'), 'utf8'),
@@ -95,14 +71,6 @@ describe('pipeline input', () => {
     expect(manifest.experiment).toBe('without_skills');
   }, 30_000);
 
-  it('omits experiment from manifest when --experiment is not provided', async () => {
-    const { execa } = await import('execa');
-    await execa('bun', [CLI_ENTRY, 'pipeline', 'input', EVAL_PATH, '--out', OUT_DIR]);
-
-    const manifest = JSON.parse(await readFile(join(OUT_DIR, 'manifest.json'), 'utf8'));
-    expect(manifest.experiment).toBeUndefined();
-  }, 30_000);
-
   it('writes code_graders/<name>.json for deterministic assertions', async () => {
     const { execa } = await import('execa');
     const builtinEvalPath = join(FIXTURE_DIR, 'builtin-test.eval.yaml');
diff --git a/apps/cli/test/commands/results/serve.test.ts b/apps/cli/test/commands/results/serve.test.ts
index 519382f8..8d2e9f3e 100644
--- a/apps/cli/test/commands/results/serve.test.ts
+++ b/apps/cli/test/commands/results/serve.test.ts
@@ -563,15 +563,12 @@ describe('serve app', () => {
       });
     });
 
-    it('computes pass_rate using the configured dashboard threshold (strict threshold yields lower rate)', async () => {
+    it('computes pass_rate using the configured dashboard threshold', async () => {
       const runsDir = path.join(tempDir, '.agentv', 'results', 'runs');
       mkdirSync(runsDir, { recursive: true });
       const filename = '2026-03-25T10-00-00-000Z';
       const runDir = path.join(runsDir, filename);
       mkdirSync(runDir, { recursive: true });
-      // Two results: score=0.8 and score=0.6
-      // With DEFAULT_THRESHOLD=0.8: score=0.8 passes → 1/2 = 50%
-      // With threshold=0.9: neither passes → 0%
       const resultHigh = { ...RESULT_A, test_id: 'high', score: 0.8 };
       const resultLow = { ...RESULT_B, test_id: 'low', score: 0.6 };
       writeFileSync(path.join(runDir, 'index.jsonl'), toJsonl(resultHigh, resultLow));
@@ -584,35 +581,9 @@ describe('serve app', () => {
       expect(res.status).toBe(200);
       const data = (await res.json()) as { runs: Array<{ pass_rate: number }> };
       expect(data.runs).toHaveLength(1);
-      // With threshold=0.9: neither 0.8 nor 0.6 passes → 0%
       expect(data.runs[0].pass_rate).toBe(0);
     });
 
-    it('computes pass_rate using the configured dashboard threshold (lenient threshold yields higher rate)', async () => {
-      const runsDir = path.join(tempDir, '.agentv', 'results', 'runs');
-      mkdirSync(runsDir, { recursive: true });
-      const filename = '2026-03-25T12-00-00-000Z';
-      const runDir = path.join(runsDir, filename);
-      mkdirSync(runDir, { recursive: true });
-      // Two results: score=0.8 and score=0.6
-      // With DEFAULT_THRESHOLD=0.8: score=0.8 passes → 1/2 = 50%
-      // With threshold=0.5: both pass → 2/2 = 100%
-      const resultHigh = { ...RESULT_A, test_id: 'high', score: 0.8 };
-      const resultLow = { ...RESULT_B, test_id: 'low', score: 0.6 };
-      writeFileSync(path.join(runDir, 'index.jsonl'), toJsonl(resultHigh, resultLow));
-
-      mkdirSync(path.join(tempDir, '.agentv'), { recursive: true });
-      writeFileSync(path.join(tempDir, '.agentv', 'config.yaml'), 'dashboard:\n  threshold: 0.5\n');
-
-      const app = createApp([], tempDir, tempDir, undefined, { studioDir });
-      const res = await app.request('/api/runs');
-      expect(res.status).toBe(200);
-      const data = (await res.json()) as { runs: Array<{ pass_rate: number }> };
-      expect(data.runs).toHaveLength(1);
-      // With threshold=0.5: both 0.8 and 0.6 pass → 100%
-      expect(data.runs[0].pass_rate).toBe(1);
-    });
-
     it('infers the experiment name from the run id when live results have not written it yet', async () => {
       const runsDir = path.join(tempDir, '.agentv', 'results', 'runs', 'issue-1198-live-name');
       mkdirSync(runsDir, { recursive: true });
diff --git a/apps/cli/test/commands/results/studio-config.test.ts b/apps/cli/test/commands/results/studio-config.test.ts
index 5dd80b72..cb9ab2e8 100644
--- a/apps/cli/test/commands/results/studio-config.test.ts
+++ b/apps/cli/test/commands/results/studio-config.test.ts
@@ -24,28 +24,15 @@ describe('loadStudioConfig', () => {
     expect(config.threshold).toBe(DEFAULT_THRESHOLD);
   });
 
-  it('reads threshold from dashboard section', () => {
-    writeFileSync(path.join(tempDir, 'config.yaml'), 'dashboard:\n  threshold: 0.6\n');
-    const config = loadStudioConfig(tempDir);
-    expect(config.threshold).toBe(0.6);
-  });
-
-  it('reads pass_threshold from dashboard section as fallback', () => {
-    writeFileSync(path.join(tempDir, 'config.yaml'), 'dashboard:\n  pass_threshold: 0.6\n');
-    const config = loadStudioConfig(tempDir);
-    expect(config.threshold).toBe(0.6);
-  });
-
-  it('reads threshold from studio section as fallback (legacy)', () => {
-    writeFileSync(path.join(tempDir, 'config.yaml'), 'studio:\n  threshold: 0.6\n');
-    const config = loadStudioConfig(tempDir);
-    expect(config.threshold).toBe(0.6);
-  });
-
-  it('reads pass_threshold from studio section as fallback (legacy)', () => {
-    writeFileSync(path.join(tempDir, 'config.yaml'), 'studio:\n  pass_threshold: 0.6\n');
-    const config = loadStudioConfig(tempDir);
-    expect(config.threshold).toBe(0.6);
+  it.each([
+    ['dashboard.threshold', 'dashboard:\n  threshold: 0.6\n'],
+    ['dashboard.pass_threshold fallback', 'dashboard:\n  pass_threshold: 0.6\n'],
+    ['legacy studio.threshold fallback', 'studio:\n  threshold: 0.6\n'],
+    ['legacy studio.pass_threshold fallback', 'studio:\n  pass_threshold: 0.6\n'],
+    ['legacy root pass_threshold fallback', 'pass_threshold: 0.6\n'],
+  ])('reads %s', (_name, yaml) => {
+    writeFileSync(path.join(tempDir, 'config.yaml'), yaml);
+    expect(loadStudioConfig(tempDir).threshold).toBe(0.6);
   });
 
   it('prefers dashboard.threshold over dashboard.pass_threshold', () => {
@@ -75,12 +62,6 @@ describe('loadStudioConfig', () => {
     expect(config.threshold).toBe(0.5);
   });
 
-  it('falls back to root-level pass_threshold (legacy)', () => {
-    writeFileSync(path.join(tempDir, 'config.yaml'), 'pass_threshold: 0.7\n');
-    const config = loadStudioConfig(tempDir);
-    expect(config.threshold).toBe(0.7);
-  });
-
   it('prefers dashboard section over root-level pass_threshold', () => {
     writeFileSync(
       path.join(tempDir, 'config.yaml'),
@@ -90,16 +71,12 @@ describe('loadStudioConfig', () => {
     expect(config.threshold).toBe(0.9);
   });
 
-  it('clamps threshold to 0 when negative', () => {
-    writeFileSync(path.join(tempDir, 'config.yaml'), 'dashboard:\n  threshold: -0.5\n');
-    const config = loadStudioConfig(tempDir);
-    expect(config.threshold).toBe(0);
-  });
-
-  it('clamps threshold to 1 when above 1', () => {
-    writeFileSync(path.join(tempDir, 'config.yaml'), 'dashboard:\n  threshold: 1.5\n');
-    const config = loadStudioConfig(tempDir);
-    expect(config.threshold).toBe(1);
+  it.each([
+    ['negative', -0.5, 0],
+    ['above 1', 1.5, 1],
+  ])('clamps %s threshold', (_name, value, expected) => {
+    writeFileSync(path.join(tempDir, 'config.yaml'), `dashboard:\n  threshold: ${value}\n`);
+    expect(loadStudioConfig(tempDir).threshold).toBe(expected);
   });
 
   it('returns defaults for empty config.yaml', () => {
@@ -140,10 +117,10 @@ describe('saveStudioConfig', () => {
     expect((parsed.dashboard as Record<string, unknown>).threshold).toBe(0.9);
   });
 
-  it('removes legacy root-level pass_threshold on save', () => {
+  it('writes canonical dashboard.threshold and removes legacy threshold fields on save', () => {
     writeFileSync(
       path.join(tempDir, 'config.yaml'),
-      'required_version: ">=4.2.0"\npass_threshold: 0.8\n',
+      'required_version: ">=4.2.0"\npass_threshold: 0.8\ndashboard:\n  pass_threshold: 0.6\nstudio:\n  theme: dark\n  pass_threshold: 0.5\n',
     );
     saveStudioConfig(tempDir, { threshold: 0.7 });
 
@@ -151,28 +128,9 @@ describe('saveStudioConfig', () => {
     const parsed = parseYaml(raw) as Record<string, unknown>;
     expect(parsed.required_version).toBe('>=4.2.0');
     expect(parsed.pass_threshold).toBeUndefined();
-    expect((parsed.dashboard as Record<string, unknown>).threshold).toBe(0.7);
-  });
-
-  it('removes legacy pass_threshold from dashboard section on save', () => {
-    writeFileSync(path.join(tempDir, 'config.yaml'), 'dashboard:\n  pass_threshold: 0.8\n');
-    saveStudioConfig(tempDir, { threshold: 0.7 });
-
-    const raw = readFileSync(path.join(tempDir, 'config.yaml'), 'utf-8');
-    const parsed = parseYaml(raw) as Record<string, unknown>;
-    const dashboard = parsed.dashboard as Record<string, unknown>;
-    expect(dashboard.pass_threshold).toBeUndefined();
-    expect(dashboard.threshold).toBe(0.7);
-  });
-
-  it('migrates legacy studio section to dashboard on save', () => {
-    writeFileSync(path.join(tempDir, 'config.yaml'), 'studio:\n  pass_threshold: 0.8\n');
-    saveStudioConfig(tempDir, { threshold: 0.7 });
-
-    const raw = readFileSync(path.join(tempDir, 'config.yaml'), 'utf-8');
-    const parsed = parseYaml(raw) as Record<string, unknown>;
-    const dashboard = parsed.dashboard as Record<string, unknown>;
     expect(parsed.studio).toBeUndefined();
+    const dashboard = parsed.dashboard as Record<string, unknown>;
+    expect(dashboard.theme).toBe('dark');
     expect(dashboard.pass_threshold).toBeUndefined();
     expect(dashboard.threshold).toBe(0.7);
   });
diff --git a/apps/cli/test/unit/studio-navigation.test.ts b/apps/cli/test/unit/studio-navigation.test.ts
deleted file mode 100644
index d75c729a..00000000
--- a/apps/cli/test/unit/studio-navigation.test.ts
+++ /dev/null
@@ -1,62 +0,0 @@
-import { describe, expect, it } from 'bun:test';
-
-import {
-  categoryPath,
-  evalPath,
-  experimentPath,
-  jobPath,
-  projectHomePath,
-  resolveIndexRoute,
-  runPath,
-  runsHomePath,
-  suitePath,
-} from '../../../dashboard/src/lib/navigation.ts';
-
-describe('studio navigation helpers', () => {
-  it('redirects when the preferred project id matches a registered project', () => {
-    expect(resolveIndexRoute(['demo-project'], undefined, 'demo-project', 'analytics')).toEqual({
-      kind: 'redirect',
-      redirectPath: '/projects/demo-project?tab=analytics',
-    });
-  });
-
-  it('keeps explicit single-project mode on the legacy root home', () => {
-    expect(resolveIndexRoute(['demo-project'], false, 'runs')).toEqual({
-      kind: 'single-project-home',
-    });
-  });
-
-  it('keeps the dashboard for zero or many projects', () => {
-    expect(resolveIndexRoute([], true)).toEqual({ kind: 'dashboard' });
-    expect(resolveIndexRoute(['one', 'two'], true)).toEqual({ kind: 'dashboard' });
-  });
-
-  it('builds project-scoped drill-down paths', () => {
-    expect(projectHomePath('demo project', 'runs')).toBe('/projects/demo%20project?tab=runs');
-    expect(runPath('run::1', 'demo project')).toBe('/projects/demo%20project/runs/run%3A%3A1');
-    expect(evalPath('run::1', 'case/a', 'demo project')).toBe(
-      '/projects/demo%20project/evals/run%3A%3A1/case%2Fa',
-    );
-    expect(jobPath('job/1', 'demo project')).toBe('/projects/demo%20project/jobs/job%2F1');
-    expect(categoryPath('run::1', 'Safety > PII', 'demo project')).toBe(
-      '/projects/demo%20project/runs/run%3A%3A1/category/Safety%20%3E%20PII',
-    );
-    expect(suitePath('run::1', 'evals/smoke.eval.yaml', 'demo project')).toBe(
-      '/projects/demo%20project/runs/run%3A%3A1/suite/evals%2Fsmoke.eval.yaml',
-    );
-    expect(experimentPath('prod-baseline', 'demo project')).toBe(
-      '/projects/demo%20project/experiments/prod-baseline',
-    );
-  });
-
-  it('keeps unscoped paths for legacy single-project routes', () => {
-    expect(runPath('run::1')).toBe('/runs/run%3A%3A1');
-    expect(evalPath('run::1', 'case/a')).toBe('/evals/run%3A%3A1/case%2Fa');
-    expect(jobPath('job/1')).toBe('/jobs/job%2F1');
-    expect(categoryPath('run::1', 'Safety')).toBe('/runs/run%3A%3A1/category/Safety');
-    expect(suitePath('run::1', 'evals/smoke.eval.yaml')).toBe(
-      '/runs/run%3A%3A1/suite/evals%2Fsmoke.eval.yaml',
-    );
-    expect(runsHomePath()).toBe('/?tab=runs');
-  });
-});
diff --git a/apps/dashboard/src/lib/navigation.test.ts b/apps/dashboard/src/lib/navigation.test.ts
index 1c246750..b735b239 100644
--- a/apps/dashboard/src/lib/navigation.test.ts
+++ b/apps/dashboard/src/lib/navigation.test.ts
@@ -1,9 +1,16 @@
 import { describe, expect, it } from 'bun:test';
 
 import {
+  categoryPath,
+  evalPath,
+  experimentPath,
   initialProjectRedirectStorageKey,
+  jobPath,
   resolveIndexRoute,
   resolveInitialProjectRedirect,
+  runPath,
+  runsHomePath,
+  suitePath,
 } from './navigation';
 
 describe('resolveInitialProjectRedirect', () => {
@@ -44,3 +51,33 @@ describe('resolveIndexRoute', () => {
     expect(resolveIndexRoute(['alpha'], true)).toEqual({ kind: 'dashboard' });
   });
 });
+
+describe('route path helpers', () => {
+  it('builds project-scoped drill-down paths', () => {
+    expect(runPath('run::1', 'demo project')).toBe('/projects/demo%20project/runs/run%3A%3A1');
+    expect(evalPath('run::1', 'case/a', 'demo project')).toBe(
+      '/projects/demo%20project/evals/run%3A%3A1/case%2Fa',
+    );
+    expect(jobPath('job/1', 'demo project')).toBe('/projects/demo%20project/jobs/job%2F1');
+    expect(categoryPath('run::1', 'Safety > PII', 'demo project')).toBe(
+      '/projects/demo%20project/runs/run%3A%3A1/category/Safety%20%3E%20PII',
+    );
+    expect(suitePath('run::1', 'evals/smoke.eval.yaml', 'demo project')).toBe(
+      '/projects/demo%20project/runs/run%3A%3A1/suite/evals%2Fsmoke.eval.yaml',
+    );
+    expect(experimentPath('prod-baseline', 'demo project')).toBe(
+      '/projects/demo%20project/experiments/prod-baseline',
+    );
+  });
+
+  it('keeps unscoped paths for legacy single-project routes', () => {
+    expect(runPath('run::1')).toBe('/runs/run%3A%3A1');
+    expect(evalPath('run::1', 'case/a')).toBe('/evals/run%3A%3A1/case%2Fa');
+    expect(jobPath('job/1')).toBe('/jobs/job%2F1');
+    expect(categoryPath('run::1', 'Safety')).toBe('/runs/run%3A%3A1/category/Safety');
+    expect(suitePath('run::1', 'evals/smoke.eval.yaml')).toBe(
+      '/runs/run%3A%3A1/suite/evals%2Fsmoke.eval.yaml',
+    );
+    expect(runsHomePath()).toBe('/?tab=runs');
+  });
+});
diff --git a/apps/web/src/content/docs/docs/tools/dashboard.mdx b/apps/web/src/content/docs/docs/tools/dashboard.mdx
index a28084db..c0b458a8 100644
--- a/apps/web/src/content/docs/docs/tools/dashboard.mdx
+++ b/apps/web/src/content/docs/docs/tools/dashboard.mdx
@@ -62,6 +62,17 @@ agentv dashboard .agentv/results/runs/2026-03-30T11-45-56-989Z
 - **Analytics** — two modes: an aggregated experiment × target matrix, and a per-run view for selecting individual runs to compare side-by-side with optional retroactive tags. Includes a collapsible charts section with baseline comparison analytics
 - **Remote Results** — sync and browse runs pushed from other machines or CI (see [Remote Results](#remote-results))
 
+## Pass threshold
+
+Dashboard treats scores greater than or equal to the configured threshold as passing when it calculates pass rates. Configure this in `.agentv/config.yaml`:
+
+```yaml
+dashboard:
+  threshold: 0.8
+```
+
+Legacy `studio.threshold`, `studio.pass_threshold`, and root-level `pass_threshold` values are still read for existing projects. When Dashboard saves settings, it writes the canonical `dashboard.threshold` field and preserves unrelated config.
+
 ## Run Detail
 
 Click any run to see a breakdown by suite, per-test scores, target, duration, and cost. The source label (`local` or `remote`) tells you where the run came from.
@@ -182,14 +193,11 @@ This satisfies the 24/7-Dashboard use case: the server stays up; projects come a
 
 ### Launching the Dashboard
 
-Dashboard auto-detects the mode based on how many projects are registered:
-
-- `0` or `1` registered: single-project view
-- `2+` registered: Projects dashboard
+Dashboard opens the Projects dashboard by default, even when no projects or one project are registered. When launched from a registered project, the UI redirects to that project's runs tab on first load. Use `--single` only when you need the legacy single-project route layout.
 
 ```bash
-agentv dashboard          # auto-detects
-agentv dashboard --single # force single-project view
+agentv dashboard          # Projects dashboard
+agentv dashboard --single # legacy single-project route layout
 ```
 
 The landing page shows a card for each project with run count, pass rate, and last run time.