From 8907c8d85bb9fcc2c5669c319a7a775709ae333c Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Mon, 18 May 2026 15:52:09 -0700 Subject: [PATCH 1/8] feat(evidence): repo-aware evidence strategy with required expectations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Evidence quality is the trust bottleneck — the orchestrator was writing vague "verify it works" expectations (or none at all) regardless of whether the target repo has a UI to screenshot or is a pure library. Add `evidenceStrategy` field to projects.json (ui-screenshot, scenario-script, test-output) so each repo declares what kind of proof is meaningful. The orchestrator prompt now includes a lookup table mapping strategies to concrete, falsifiable evidence expectations. `evidenceExpectations` is required on all tasks — the verifier checks its work against these specific expectations, not just the generic rubric. --- agents/verifier.md | 9 ++++++--- projects.json | 4 ++++ projects.schema.json | 5 +++++ src/__tests__/task-factory.spec.ts | 5 +++++ src/agent/orchestrator-session.ts | 15 +++++++++++++++ src/agent/tools/task-tool.ts | 7 ++++--- src/commands/create.ts | 7 +++++-- src/context/assembler.ts | 5 ++++- src/entry/cli-orchestrator.ts | 29 ++++++++++++++++++++++++++++- src/types.ts | 13 +++++++++++-- 10 files changed, 87 insertions(+), 12 deletions(-) diff --git a/agents/verifier.md b/agents/verifier.md index 1d9daa7..76213f3 100644 --- a/agents/verifier.md +++ b/agents/verifier.md @@ -38,13 +38,14 @@ Read the output to understand: current branch, last commits, task status, which ca status agent verifier started now ``` 2. Read the task file — understand the issue, objective, and acceptance criteria -3. Read the git diff to understand what the implementer changed: +3. **Read the `## Evidence Expectations` section.** This is the contract from the orchestrator — it specifies exactly what evidence you must produce. Your verification plan must satisfy every expectation listed. If the section is missing or vague, treat it as a defect and report it rather than guessing. +4. Read the git diff to understand what the implementer changed: ```bash git log --oneline -5 git diff HEAD~1 --stat git diff HEAD~1 ``` -4. Read the issue reference from the task file to understand what to test specifically +5. Read the issue reference from the task file to understand what to test specifically ### 2. Determine Scope @@ -332,7 +333,9 @@ Most AuthKit example apps redirect to the WorkOS hosted login page. Follow this ### 5b. Score Rubric -After testing, score each category honestly. `fail` means the evidence doesn't support this claim. `na` means the category genuinely doesn't apply (justify why in detail). +After testing, re-read the `## Evidence Expectations` section from the task file. For each expectation listed, confirm your evidence satisfies it. If any expectation is unmet, your rubric verdict for `evidence-proves-change` must be `fail` — even if the generic rubric questions would pass. + +Score each category honestly. `fail` means the evidence doesn't support this claim. `na` means the category genuinely doesn't apply (justify why in detail). | Category | Question | When to mark NA | | ------------------------ | --------------------------------------------------------------- | -------------------------------------------------------- | diff --git a/projects.json b/projects.json index 4a9e8bc..1a6a17f 100644 --- a/projects.json +++ b/projects.json @@ -4,6 +4,7 @@ { "name": "cli", "type": "library", + "evidenceStrategy": "scenario-script", "path": "../cli/main", "remote": "git@github.com:workos/workos-cli.git", "description": "WorkOS CLI for installing AuthKit integrations and managing WorkOS resources", @@ -20,6 +21,7 @@ { "name": "skills", "type": "library", + "evidenceStrategy": "test-output", "path": "../skills", "remote": "git@github.com:workos/skills.git", "description": "WorkOS integration skills", @@ -35,6 +37,7 @@ { "name": "authkit-session", "type": "library", + "evidenceStrategy": "scenario-script", "path": "../authkit-session", "remote": "git@github.com:workos/authkit-ssr.git", "description": "Framework-agnostic TypeScript authentication library for WorkOS with pluggable storage adapters", @@ -82,6 +85,7 @@ { "name": "workos-node", "type": "library", + "evidenceStrategy": "scenario-script", "path": "../workos-node/main", "remote": "git@github.com:workos/workos-node.git", "description": "WorkOS SDK for Node/JavaScript/TypeScript projects", diff --git a/projects.schema.json b/projects.schema.json index 7ad9cf4..6c7e630 100644 --- a/projects.schema.json +++ b/projects.schema.json @@ -24,6 +24,11 @@ "description": "Whether this repo has a web UI (app) or is a pure library. Library repos skip the Playwright manual test requirement.", "default": "app" }, + "evidenceStrategy": { + "type": "string", + "enum": ["ui-screenshot", "scenario-script", "test-output"], + "description": "What kind of evidence proves a change works. ui-screenshot: Playwright before/after for apps with UI. scenario-script: consumer script that exercises the changed API for libraries. test-output: test suite + typecheck + build for pure-logic/config changes. Defaults based on type: app→ui-screenshot, library→scenario-script." + }, "path": { "type": "string", "description": "Relative path from case/ to the repo root" diff --git a/src/__tests__/task-factory.spec.ts b/src/__tests__/task-factory.spec.ts index ae08f79..cf09ef3 100644 --- a/src/__tests__/task-factory.spec.ts +++ b/src/__tests__/task-factory.spec.ts @@ -24,6 +24,7 @@ describe('createTask', () => { title: 'Fix broken test', description: 'The login test is failing intermittently.', trigger: { type: 'manual', description: 'Created manually' }, + evidenceExpectations: 'Full test suite passes. The flaky login test passes 10 consecutive runs.', }; const result = await createTask(tempDir, request, { repoPath: tempDir }); @@ -43,6 +44,8 @@ describe('createTask', () => { expect(taskMd).toContain('Fix broken test'); expect(taskMd).toContain('The login test'); expect(taskMd).toContain('Repo:** cli'); + expect(taskMd).toContain('## Evidence Expectations'); + expect(taskMd).toContain('flaky login test passes 10 consecutive runs'); expect((await Bun.file(join(tempDir, '.case', 'active')).text()).trim()).toBe(result.taskId); }); @@ -55,6 +58,7 @@ describe('createTask', () => { issue: 'https://github.com/workos/authkit-ssr/issues/42', mode: 'unattended', trigger: { type: 'webhook', event: 'workflow_run', deliveryId: 'abc-123' }, + evidenceExpectations: 'Lint passes cleanly. No regressions in existing tests.', }; const result = await createTask(tempDir, request, { repoPath: tempDir }); @@ -77,6 +81,7 @@ describe('createTask', () => { checkCommand: 'vitest run --reporter=json', checkBaseline: 10, checkTarget: 12, + evidenceExpectations: 'Test count increases from 10 to 12.', }; const result = await createTask(tempDir, request, { repoPath: tempDir }); diff --git a/src/agent/orchestrator-session.ts b/src/agent/orchestrator-session.ts index 7cb41bd..2df9711 100644 --- a/src/agent/orchestrator-session.ts +++ b/src/agent/orchestrator-session.ts @@ -315,6 +315,21 @@ Explore the codebase to understand the current state. Read relevant files, check ### 2. Plan Translate the request into a task: title, description, target repo, acceptance criteria, verification scenarios, non-goals, edge cases, and evidence expectations. The task should be small enough for one PR. +**Evidence expectations are required.** Every task must specify what proof the verifier should produce. Use the repo's \`evidenceStrategy\` from projects.json to guide what kind of evidence to expect: + +| Strategy | When | Evidence expectations should specify | +|---|---|---| +| \`ui-screenshot\` | App with a web UI | Before/after screenshots showing the behavior change. What page to visit, what to click, what should look different. | +| \`scenario-script\` | Library or CLI | A consumer-perspective script that imports the changed API, exercises the specific code path, and asserts expected behavior. Describe what the script should test and what PASS looks like. | +| \`test-output\` | Pure logic, config, or docs | Full test suite passes, typecheck passes, build succeeds. Name specific new or modified tests that cover the change. | + +Write evidence expectations as concrete, falsifiable statements — not vague "verify it works" descriptions. The verifier uses these to decide what to test and the closer uses them to decide what to include in the PR. + +**Bad:** "Verify the fix works" +**Good (ui-screenshot):** "Before: /settings page shows 'undefined' for org name. After: /settings page shows the actual org name. Requires AuthKit login with test credentials." +**Good (scenario-script):** "Script imports \`listOrganizations\` from the SDK, calls it with \`limit: 1\`, asserts the response has a \`data\` array with at least one entry." +**Good (test-output):** "The new \`serializeSession()\` unit tests pass. Typecheck passes. No regressions in existing session tests." + ### 3. Confirm Present a brief summary of what will be built and ask the user to confirm before executing. Keep it to 3-5 bullet points. diff --git a/src/agent/tools/task-tool.ts b/src/agent/tools/task-tool.ts index 00d3f94..0cb8266 100644 --- a/src/agent/tools/task-tool.ts +++ b/src/agent/tools/task-tool.ts @@ -19,9 +19,10 @@ const taskParams = Type.Object({ ), nonGoals: Type.Optional(Type.String({ description: 'What is explicitly NOT in scope for this task' })), edgeCases: Type.Optional(Type.String({ description: 'Edge cases the implementer should consider' })), - evidenceExpectations: Type.Optional( - Type.String({ description: 'What evidence proves the fix works (screenshots, test output, etc.)' }), - ), + evidenceExpectations: Type.String({ + description: + 'Required. Concrete, falsifiable description of what evidence the verifier must produce. Adapt to the repo evidenceStrategy: ui-screenshot → before/after screenshots with specific pages and interactions; scenario-script → consumer script exercising the changed API; test-output → specific tests that must pass.', + }), }); export function createTaskTool(caseRoot: string) { diff --git a/src/commands/create.ts b/src/commands/create.ts index 3759522..9be8df5 100644 --- a/src/commands/create.ts +++ b/src/commands/create.ts @@ -15,6 +15,7 @@ export async function handler(argv: string[]): Promise { issue: { type: 'string' }, 'issue-type': { type: 'string' }, mode: { type: 'string', short: 'm' }, + evidence: { type: 'string' }, }, allowPositionals: true, strict: false, @@ -23,9 +24,10 @@ export async function handler(argv: string[]): Promise { const repo = values.repo as string | undefined; const title = values.title as string | undefined; const description = values.description as string | undefined; + const evidence = values.evidence as string | undefined; - if (!repo || !title || !description) { - process.stderr.write('Error: --repo, --title, and --description are required\n'); + if (!repo || !title || !description || !evidence) { + process.stderr.write('Error: --repo, --title, --description, and --evidence are required\n'); return 1; } @@ -41,6 +43,7 @@ export async function handler(argv: string[]): Promise { issueType: issueType ?? (values.issue ? 'github' : 'freeform'), mode, trigger: { type: 'cli', user: 'local' }, + evidenceExpectations: evidence, }; try { diff --git a/src/context/assembler.ts b/src/context/assembler.ts index 3116c54..49492be 100644 --- a/src/context/assembler.ts +++ b/src/context/assembler.ts @@ -1,4 +1,5 @@ import type { AgentName, AgentResult, PipelineConfig, RevisionRequest, TaskJson } from '../types.js'; +import { resolveEvidenceStrategy } from '../types.js'; import type { RepoContext } from './prefetch.js'; import { readPackageAssetSync } from '../package-assets.js'; @@ -127,8 +128,10 @@ function buildContextBlock( break; case 'verifier': - // Deliberately minimal — fresh-context testing + // Deliberately minimal — fresh-context testing, plus evidence strategy appendProjectCommands(lines, config); + lines.push(`- **Evidence strategy**: \`${resolveEvidenceStrategy(config.project)}\``); + lines.push(''); break; case 'reviewer': diff --git a/src/entry/cli-orchestrator.ts b/src/entry/cli-orchestrator.ts index 4c342ec..9b8e174 100644 --- a/src/entry/cli-orchestrator.ts +++ b/src/entry/cli-orchestrator.ts @@ -9,7 +9,8 @@ import { runCommand } from '../util/run-command.js'; import { createStructuredLogRenderer } from '../render/structured-log.js'; import { formatSetupStep } from '../render/format.js'; import type { Notifier } from '../notify.js'; -import type { IssueContext, PipelineMode, PipelinePhase, TaskCreateRequest } from '../types.js'; +import type { IssueContext, PipelineMode, PipelinePhase, TaskCreateRequest, EvidenceStrategy } from '../types.js'; +import { resolveEvidenceStrategy } from '../types.js'; import type { TaskMatch } from './task-scanner.js'; export interface CliOrchestratorOptions { @@ -95,6 +96,7 @@ export async function runCliOrchestrator(options: CliOrchestratorOptions): Promi await ensureBranch(branchName, detected.path); // Create task files + const strategy = resolveEvidenceStrategy(detected.project); const request: TaskCreateRequest = { repo: detected.name, title: issueContext.title, @@ -103,6 +105,7 @@ export async function runCliOrchestrator(options: CliOrchestratorOptions): Promi issueType: issueContext.issueType, mode, trigger: { type: 'cli', user: 'local' }, + evidenceExpectations: defaultEvidenceExpectations(strategy, issueContext), }; const taskResult = await createTask(caseRoot, request, { issueContext, branch: branchName, repoPath: detected.path }); @@ -204,6 +207,30 @@ function deriveBranchPrefix(labels: string[]): string { return 'fix'; } +const EVIDENCE_TEMPLATES: Record string> = { + 'ui-screenshot': (issue) => + [ + `Before/after screenshots demonstrating the behavior change described in: ${issue.title}`, + 'Navigate to the affected page, reproduce the scenario from the issue, and capture the state before and after the fix.', + 'If auth is required, complete the AuthKit login flow with test credentials.', + ].join('\n'), + 'scenario-script': (issue) => + [ + `Consumer script that imports the changed API and exercises the code path described in: ${issue.title}`, + 'Script should assert expected behavior and print PASS/FAIL.', + 'Full test suite and typecheck must also pass.', + ].join('\n'), + 'test-output': (issue) => + [ + `Full test suite passes with no regressions. Typecheck and build succeed.`, + `Specific tests covering the change described in: ${issue.title}`, + ].join('\n'), +}; + +function defaultEvidenceExpectations(strategy: EvidenceStrategy, issue: IssueContext): string { + return EVIDENCE_TEMPLATES[strategy](issue); +} + /** * Create or checkout a git branch. * If branch exists, checkout. Otherwise, create from HEAD. diff --git a/src/types.ts b/src/types.ts index 965ea92..7340fa2 100644 --- a/src/types.ts +++ b/src/types.ts @@ -168,9 +168,12 @@ export interface PipelineConfig { renderer?: 'structured' | 'tui'; } +export type EvidenceStrategy = 'ui-screenshot' | 'scenario-script' | 'test-output'; + export interface ProjectEntry { name: string; type?: 'app' | 'library' | string; + evidenceStrategy?: EvidenceStrategy; path: string; remote: string; description?: string; @@ -179,6 +182,12 @@ export interface ProjectEntry { commands: Record; } +export function resolveEvidenceStrategy(project?: ProjectEntry): EvidenceStrategy { + if (project?.evidenceStrategy) return project.evidenceStrategy; + if (project?.type === 'library') return 'scenario-script'; + return 'ui-screenshot'; +} + export interface FailureAnalysis { failureClass: string; failedAgent: string; @@ -330,8 +339,8 @@ export interface TaskCreateRequest { nonGoals?: string; /** Edge cases to consider (done contract) */ edgeCases?: string; - /** What evidence proves the fix works (done contract) */ - evidenceExpectations?: string; + /** What evidence proves the fix works — required for all tasks (done contract) */ + evidenceExpectations: string; } // Event system re-exports From 831e3fe7319bd61cc5291a1bc1d8bc3c89bd6ce6 Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Mon, 18 May 2026 15:58:33 -0700 Subject: [PATCH 2/8] feat(cli): add ca onboard command, remove type field from projects.json MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The binary type field (app|library) was broken — most repos didn't set it, and the inference was wrong. Replace it entirely with the explicit evidenceStrategy field (now required). All 6 repos get explicit strategies; the two app repos that were missing it get scenario-script. Add `ca onboard ` — probes a repo for package manager, language, git remote, and scripts, infers the evidence strategy, writes the entry to projects.json, then runs bootstrap to validate. This prevents future repos from being added without an evidence contract. Also updates the verifier to branch on evidenceStrategy instead of Repo type, and adds a test-output fast path for repos that only need automated evidence. --- agents/verifier.md | 8 +- projects.json | 6 +- projects.schema.json | 10 +- src/__tests__/assembler.spec.ts | 6 +- src/__tests__/commands.spec.ts | 1 + src/__tests__/config.spec.ts | 2 +- src/__tests__/onboard.spec.ts | 109 ++++++++++++++++++ src/commands/index.ts | 2 + src/commands/onboard.ts | 197 ++++++++++++++++++++++++++++++++ src/context/assembler.ts | 8 +- src/types.ts | 7 +- 11 files changed, 327 insertions(+), 29 deletions(-) create mode 100644 src/__tests__/onboard.spec.ts create mode 100644 src/commands/onboard.ts diff --git a/agents/verifier.md b/agents/verifier.md index 76213f3..4d7fa68 100644 --- a/agents/verifier.md +++ b/agents/verifier.md @@ -49,9 +49,11 @@ Read the output to understand: current branch, last commits, task status, which ### 2. Determine Scope -First, check the `Repo type` field in the Task Context. +Check the `Evidence strategy` field in the Task Context. -- **If `library`**: This is a pure library with no web UI. Skip Playwright (step 3) and go to **step 2b (Library Verification)** instead. +- **If `scenario-script`**: This is a library or CLI with no web UI. Skip Playwright (step 3) and go to **step 2b (Library Verification)** instead. +- **If `test-output`**: Only automated evidence is needed. Skip to step 5 (Record) — the implementer's test output is the primary evidence. +- **If `ui-screenshot`**: Continue below. Then check if `src/` files changed (use both HEAD~1 and main for broad coverage): @@ -59,7 +61,7 @@ Then check if `src/` files changed (use both HEAD~1 and main for broad coverage) git diff --name-only HEAD~1 | grep "^src/" || git diff --name-only main | grep "^src/" ``` -- **If `src/` files changed AND repo type is `app`**: Manual testing is required. Continue to step 3. +- **If `src/` files changed AND strategy is `ui-screenshot`**: Manual testing is required. Continue to step 3. - **If NO `src/` files changed**: Manual testing is optional. Skip to step 5 (Record), marking verification as complete without Playwright evidence. ### 2b. Library Verification diff --git a/projects.json b/projects.json index 1a6a17f..5df34da 100644 --- a/projects.json +++ b/projects.json @@ -3,7 +3,6 @@ "repos": [ { "name": "cli", - "type": "library", "evidenceStrategy": "scenario-script", "path": "../cli/main", "remote": "git@github.com:workos/workos-cli.git", @@ -20,7 +19,6 @@ }, { "name": "skills", - "type": "library", "evidenceStrategy": "test-output", "path": "../skills", "remote": "git@github.com:workos/skills.git", @@ -36,7 +34,6 @@ }, { "name": "authkit-session", - "type": "library", "evidenceStrategy": "scenario-script", "path": "../authkit-session", "remote": "git@github.com:workos/authkit-ssr.git", @@ -53,6 +50,7 @@ }, { "name": "authkit-tanstack-start", + "evidenceStrategy": "scenario-script", "path": "../authkit-tanstack-start", "remote": "git@github.com:workos/authkit-tanstack-start.git", "description": "WorkOS library for TanStack Start providing authentication and session management helpers", @@ -68,6 +66,7 @@ }, { "name": "authkit-nextjs", + "evidenceStrategy": "scenario-script", "path": "../authkit-nextjs", "remote": "git@github.com:workos/authkit-nextjs.git", "description": "Authentication and session helpers for using WorkOS & AuthKit with Next.js", @@ -84,7 +83,6 @@ }, { "name": "workos-node", - "type": "library", "evidenceStrategy": "scenario-script", "path": "../workos-node/main", "remote": "git@github.com:workos/workos-node.git", diff --git a/projects.schema.json b/projects.schema.json index 6c7e630..9839328 100644 --- a/projects.schema.json +++ b/projects.schema.json @@ -12,22 +12,16 @@ "type": "array", "items": { "type": "object", - "required": ["name", "path", "remote", "description", "language", "packageManager", "commands"], + "required": ["name", "evidenceStrategy", "path", "remote", "description", "language", "packageManager", "commands"], "properties": { "name": { "type": "string", "description": "Short identifier for the repo (used in task file prefixes)" }, - "type": { - "type": "string", - "enum": ["app", "library"], - "description": "Whether this repo has a web UI (app) or is a pure library. Library repos skip the Playwright manual test requirement.", - "default": "app" - }, "evidenceStrategy": { "type": "string", "enum": ["ui-screenshot", "scenario-script", "test-output"], - "description": "What kind of evidence proves a change works. ui-screenshot: Playwright before/after for apps with UI. scenario-script: consumer script that exercises the changed API for libraries. test-output: test suite + typecheck + build for pure-logic/config changes. Defaults based on type: app→ui-screenshot, library→scenario-script." + "description": "What kind of evidence proves a change works. ui-screenshot: Playwright before/after for apps with UI. scenario-script: consumer script that exercises the changed API for libraries. test-output: test suite + typecheck + build for pure-logic/config changes." }, "path": { "type": "string", diff --git a/src/__tests__/assembler.spec.ts b/src/__tests__/assembler.spec.ts index 0fb18e5..6427995 100644 --- a/src/__tests__/assembler.spec.ts +++ b/src/__tests__/assembler.spec.ts @@ -107,7 +107,7 @@ describe('assemblePrompt', () => { makeConfig({ project: { name: 'cli', - type: 'library', + evidenceStrategy: 'scenario-script', path: '/repos/cli', remote: 'git@github.com:workos/workos-cli.git', language: 'typescript', @@ -120,8 +120,8 @@ describe('assemblePrompt', () => { new Map(), ); - expect(prompt).toContain('Repo type'); - expect(prompt).toContain('library'); + expect(prompt).toContain('Evidence strategy'); + expect(prompt).toContain('scenario-script'); expect(prompt).toContain('Project Commands'); expect(prompt).toContain('pnpm typecheck'); }); diff --git a/src/__tests__/commands.spec.ts b/src/__tests__/commands.spec.ts index b2178a2..3149488 100644 --- a/src/__tests__/commands.spec.ts +++ b/src/__tests__/commands.spec.ts @@ -32,6 +32,7 @@ describe('commandMap registration', () => { 'init', 'check', 'bootstrap', + 'onboard', 'session', 'status', 'mark-tested', diff --git a/src/__tests__/config.spec.ts b/src/__tests__/config.spec.ts index 07bb8ed..095a137 100644 --- a/src/__tests__/config.spec.ts +++ b/src/__tests__/config.spec.ts @@ -28,7 +28,7 @@ describe('projects config', () => { repos: [ { name: 'cli', - type: 'library', + evidenceStrategy: 'scenario-script', path: 'repos/cli', remote: 'git@github.com:workos/workos-cli.git', language: 'typescript', diff --git a/src/__tests__/onboard.spec.ts b/src/__tests__/onboard.spec.ts new file mode 100644 index 0000000..10841ab --- /dev/null +++ b/src/__tests__/onboard.spec.ts @@ -0,0 +1,109 @@ +import { describe, it, expect, beforeEach, afterEach } from 'bun:test'; +import { mkdir, rm, writeFile } from 'node:fs/promises'; +import { join, resolve } from 'node:path'; + +function captureStream(stream: NodeJS.WriteStream): { lines: string[]; restore: () => void } { + const lines: string[] = []; + const original = stream.write.bind(stream); + (stream as any).write = (chunk: string | Uint8Array): boolean => { + lines.push(typeof chunk === 'string' ? chunk : Buffer.from(chunk).toString('utf-8')); + return true; + }; + return { lines, restore: () => { (stream as any).write = original; } }; +} + +describe('onboard — evidence strategy inference', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = join(process.env.TMPDIR ?? '/tmp', `case-onboard-${Date.now()}`); + await mkdir(tempDir, { recursive: true }); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('infers scenario-script for a library with build', async () => { + const { inferEvidenceStrategy } = await importOnboardInternals(); + const repoDir = join(tempDir, 'my-lib'); + await mkdir(repoDir, { recursive: true }); + await writeFile(join(repoDir, 'package.json'), JSON.stringify({ scripts: { test: 'vitest', build: 'tsc' } })); + + const result = inferEvidenceStrategy(repoDir, { setup: 'pnpm install', test: 'pnpm test', build: 'pnpm build' }); + expect(result).toBe('scenario-script'); + }); + + it('infers test-output for a repo with only test command', async () => { + const { inferEvidenceStrategy } = await importOnboardInternals(); + const repoDir = join(tempDir, 'simple'); + await mkdir(repoDir, { recursive: true }); + await writeFile(join(repoDir, 'package.json'), JSON.stringify({ scripts: { test: 'vitest' } })); + + const result = inferEvidenceStrategy(repoDir, { setup: 'pnpm install', test: 'pnpm test' }); + expect(result).toBe('test-output'); + }); + + it('infers ui-screenshot for a Next.js app', async () => { + const { inferEvidenceStrategy } = await importOnboardInternals(); + const repoDir = join(tempDir, 'my-app'); + await mkdir(repoDir, { recursive: true }); + await writeFile( + join(repoDir, 'package.json'), + JSON.stringify({ scripts: { dev: 'next dev', test: 'vitest' }, dependencies: { next: '^14' } }), + ); + + const result = inferEvidenceStrategy(repoDir, { setup: 'pnpm install', test: 'pnpm test' }); + expect(result).toBe('ui-screenshot'); + }); + + it('infers ui-screenshot when examples/ directory exists', async () => { + const { inferEvidenceStrategy } = await importOnboardInternals(); + const repoDir = join(tempDir, 'with-examples'); + await mkdir(join(repoDir, 'examples'), { recursive: true }); + await writeFile(join(repoDir, 'package.json'), JSON.stringify({ scripts: { test: 'vitest' } })); + + const result = inferEvidenceStrategy(repoDir, { setup: 'pnpm install', test: 'pnpm test' }); + expect(result).toBe('ui-screenshot'); + }); +}); + +describe('onboard handler', () => { + let errCapture: ReturnType; + + beforeEach(() => { + errCapture = captureStream(process.stderr); + }); + + afterEach(() => { + errCapture.restore(); + }); + + it('exits 1 with usage when no path given', async () => { + const { handler } = await import('../commands/onboard.js'); + const code = await handler([]); + expect(code).toBe(1); + expect(errCapture.lines.join('')).toContain('Usage'); + }); + + it('exits 0 with --help', async () => { + const { handler } = await import('../commands/onboard.js'); + const code = await handler(['--help']); + expect(code).toBe(0); + }); + + it('exits 1 for non-existent path', async () => { + const { handler } = await import('../commands/onboard.js'); + const code = await handler(['/nonexistent/repo/path']); + expect(code).toBe(1); + expect(errCapture.lines.join('')).toContain('path not found'); + }); +}); + +async function importOnboardInternals() { + // The inference function is not exported — test via a re-export or inline. + // For now, import the module and test the handler behavior. + // To unit-test inferEvidenceStrategy, we export it. + const mod = await import('../commands/onboard.js'); + return mod as any; +} diff --git a/src/commands/index.ts b/src/commands/index.ts index 60c6aed..40d8762 100644 --- a/src/commands/index.ts +++ b/src/commands/index.ts @@ -24,6 +24,7 @@ import * as init from './init.js'; import * as analyzeFailure from './analyze-failure.js'; import * as bootstrap from './bootstrap.js'; import * as check from './check.js'; +import * as onboard from './onboard.js'; export type CommandGroup = 'human' | 'agent' | 'internal'; @@ -39,6 +40,7 @@ export const commandMap: Record = { init: { handler: init.handler, description: init.description, group: 'human' }, check: { handler: check.handler, description: check.description, group: 'human' }, bootstrap: { handler: bootstrap.handler, description: bootstrap.description, group: 'human' }, + onboard: { handler: onboard.handler, description: onboard.description, group: 'human' }, session: { handler: session.handler, description: session.description, group: 'agent' }, status: { handler: status.handler, description: status.description, group: 'agent' }, 'mark-tested': { handler: markTested.handler, description: markTested.description, group: 'agent' }, diff --git a/src/commands/onboard.ts b/src/commands/onboard.ts new file mode 100644 index 0000000..c65f649 --- /dev/null +++ b/src/commands/onboard.ts @@ -0,0 +1,197 @@ +import { existsSync, readFileSync } from 'node:fs'; +import { resolve, relative, basename } from 'node:path'; +import { loadProjectsManifest } from '../config.js'; +import { resolvePackageRoot } from '../paths.js'; +import { runCommandLine } from '../util/run-command.js'; +import type { EvidenceStrategy, ProjectEntry } from '../types.js'; + +export const description = 'Add a new repo to projects.json with auto-detected settings'; + +interface DetectedRepo { + name: string; + path: string; + remote: string; + language: string; + packageManager: string; + description: string; + commands: Record; + evidenceStrategy: EvidenceStrategy; +} + +export async function handler(argv: string[]): Promise { + const repoPath = argv[0]; + + if (!repoPath || repoPath === '--help' || repoPath === '-h') { + process.stderr.write('Usage: ca onboard \n'); + process.stderr.write('\nProbes the repo for package manager, language, scripts, and git remote.\n'); + process.stderr.write('Adds an entry to projects.json with the detected settings.\n'); + return repoPath ? 0 : 1; + } + + const absPath = resolve(repoPath); + if (!existsSync(absPath)) { + process.stderr.write(`Error: path not found: ${absPath}\n`); + return 1; + } + + const caseRoot = resolvePackageRoot(); + const manifest = await loadProjectsManifest(caseRoot); + + const existing = manifest.repos.find( + (r) => resolve(manifest.repoBasePath, r.path) === absPath || r.name === basename(absPath), + ); + if (existing) { + process.stderr.write(`Error: repo "${existing.name}" already in projects.json\n`); + return 1; + } + + process.stdout.write(`Probing ${absPath}...\n`); + + const detected = await probeRepo(absPath, manifest.repoBasePath); + + process.stdout.write(`\n Name: ${detected.name}\n`); + process.stdout.write(` Path: ${detected.path}\n`); + process.stdout.write(` Remote: ${detected.remote}\n`); + process.stdout.write(` Language: ${detected.language}\n`); + process.stdout.write(` Package manager: ${detected.packageManager}\n`); + process.stdout.write(` Evidence: ${detected.evidenceStrategy}\n`); + process.stdout.write(` Description: ${detected.description}\n`); + process.stdout.write(` Commands:\n`); + for (const [key, cmd] of Object.entries(detected.commands)) { + process.stdout.write(` ${key}: ${cmd}\n`); + } + + const entry: ProjectEntry = { + name: detected.name, + evidenceStrategy: detected.evidenceStrategy, + path: detected.path, + remote: detected.remote, + description: detected.description, + language: detected.language, + packageManager: detected.packageManager, + commands: detected.commands, + }; + + const raw = readFileSync(manifest.path, 'utf-8'); + const json = JSON.parse(raw) as { $schema?: string; repos: ProjectEntry[] }; + json.repos.push(entry); + await Bun.write(manifest.path, JSON.stringify(json, null, 2) + '\n'); + + process.stdout.write(`\nAdded "${detected.name}" to ${manifest.path}\n`); + + // Run bootstrap to validate + process.stdout.write(`\nRunning bootstrap...\n`); + const { runBootstrap } = await import('./bootstrap.js'); + try { + const result = await runBootstrap(detected.name, caseRoot); + for (const step of result.steps) { + const seconds = (step.durationMs / 1000).toFixed(1); + const tag = step.exitCode === 0 ? 'OK' : 'FAIL'; + process.stdout.write(` [${tag}] ${step.label} (${seconds}s)\n`); + } + if (!result.ok) { + process.stderr.write('Bootstrap failed. Entry was added but repo is not ready.\n'); + return 1; + } + process.stdout.write('Ready.\n'); + } catch (err) { + process.stderr.write(`Bootstrap error: ${(err as Error).message}\n`); + return 1; + } + + return 0; +} + +async function probeRepo(absPath: string, basePath: string): Promise { + const name = basename(absPath); + const relPath = relative(basePath, absPath); + const path = relPath.startsWith('.') ? relPath : `./${relPath}`; + + const remote = await detectRemote(absPath); + const { language, packageManager, commands, description } = await detectFromPackageFile(absPath); + const evidenceStrategy = inferEvidenceStrategy(absPath, commands); + + return { name, path, remote, language, packageManager, commands, description, evidenceStrategy }; +} + +async function detectRemote(repoPath: string): Promise { + const result = await runCommandLine('git remote get-url origin', { cwd: repoPath, timeout: 5_000 }); + return result.stdout.trim() || 'unknown'; +} + +interface PackageDetection { + language: string; + packageManager: string; + commands: Record; + description: string; +} + +async function detectFromPackageFile(repoPath: string): Promise { + const pkgPath = resolve(repoPath, 'package.json'); + if (existsSync(pkgPath)) { + return detectFromNodePackage(repoPath, pkgPath); + } + + // Fallback: check for other ecosystems + if (existsSync(resolve(repoPath, 'go.mod'))) { + return { language: 'go', packageManager: 'go', commands: { setup: 'go mod download', test: 'go test ./...' }, description: '' }; + } + if (existsSync(resolve(repoPath, 'pyproject.toml')) || existsSync(resolve(repoPath, 'setup.py'))) { + return { language: 'python', packageManager: 'pip', commands: { setup: 'pip install -e .', test: 'pytest' }, description: '' }; + } + if (existsSync(resolve(repoPath, 'Gemfile'))) { + return { language: 'ruby', packageManager: 'bundler', commands: { setup: 'bundle install', test: 'bundle exec rspec' }, description: '' }; + } + + return { language: 'typescript', packageManager: 'npm', commands: { setup: 'npm install', test: 'npm test' }, description: '' }; +} + +function detectFromNodePackage(repoPath: string, pkgPath: string): PackageDetection { + const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8')); + const scripts: Record = pkg.scripts ?? {}; + const description: string = pkg.description ?? ''; + + // Detect package manager + let packageManager = 'npm'; + if (existsSync(resolve(repoPath, 'pnpm-lock.yaml'))) packageManager = 'pnpm'; + else if (existsSync(resolve(repoPath, 'yarn.lock'))) packageManager = 'yarn'; + else if (existsSync(resolve(repoPath, 'bun.lockb')) || existsSync(resolve(repoPath, 'bun.lock'))) packageManager = 'pnpm'; + + const run = packageManager === 'npm' ? 'npm run' : packageManager; + const commands: Record = {}; + + commands.setup = `${packageManager} install`; + if (scripts.test) commands.test = `${packageManager} test`; + if (scripts.build) commands.build = `${run} build`; + if (scripts.lint) commands.lint = `${run} lint`; + if (scripts.typecheck) commands.typecheck = `${run} typecheck`; + if (scripts.format) commands.format = `${run} format`; + + const language = existsSync(resolve(repoPath, 'tsconfig.json')) ? 'typescript' : 'typescript'; + + return { language, packageManager, commands, description }; +} + +export function inferEvidenceStrategy(repoPath: string, commands: Record): EvidenceStrategy { + // If there's a dev server script and an example app, likely a UI + const pkgPath = resolve(repoPath, 'package.json'); + if (existsSync(pkgPath)) { + const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8')); + const scripts = pkg.scripts ?? {}; + + // Has a dev server and is an app framework (Next.js, etc.) + const deps = { ...pkg.dependencies, ...pkg.devDependencies }; + if (scripts.dev && (deps.next || deps.vite || deps['@remix-run/dev'] || deps['@tanstack/start'])) { + return 'ui-screenshot'; + } + } + + // If there are example app directories, likely supports UI testing + if (existsSync(resolve(repoPath, 'examples')) || existsSync(resolve(repoPath, 'example'))) { + return 'ui-screenshot'; + } + + // Has test command → at minimum supports test-output; if it has a build, scenario-script is viable + if (commands.build) return 'scenario-script'; + return 'test-output'; +} diff --git a/src/context/assembler.ts b/src/context/assembler.ts index 49492be..b1fc55b 100644 --- a/src/context/assembler.ts +++ b/src/context/assembler.ts @@ -46,7 +46,7 @@ function substitutePathVars(content: string, config: PipelineConfig): string { return content .replace(/\{\{packageRoot\}\}/g, config.packageRoot) .replace(/\{\{dataDir\}\}/g, config.dataDir) - .replace(/\{\{repoType\}\}/g, config.project?.type ?? 'app'); + .replace(/\{\{evidenceStrategy\}\}/g, resolveEvidenceStrategy(config.project)); } const INJECT_MARKER = //g; @@ -117,7 +117,7 @@ function buildContextBlock( lines.push(`- **Target repo**: \`${config.repoPath}\``); lines.push(`- **Repo name**: ${config.repoName}`); if (config.project) { - lines.push(`- **Repo type**: ${config.project.type ?? 'app'}`); + lines.push(`- **Evidence strategy**: \`${resolveEvidenceStrategy(config.project)}\``); lines.push(`- **Package manager**: ${config.project.packageManager}`); } lines.push(''); @@ -128,10 +128,8 @@ function buildContextBlock( break; case 'verifier': - // Deliberately minimal — fresh-context testing, plus evidence strategy + // Deliberately minimal — fresh-context testing appendProjectCommands(lines, config); - lines.push(`- **Evidence strategy**: \`${resolveEvidenceStrategy(config.project)}\``); - lines.push(''); break; case 'reviewer': diff --git a/src/types.ts b/src/types.ts index 7340fa2..c003d00 100644 --- a/src/types.ts +++ b/src/types.ts @@ -172,8 +172,7 @@ export type EvidenceStrategy = 'ui-screenshot' | 'scenario-script' | 'test-outpu export interface ProjectEntry { name: string; - type?: 'app' | 'library' | string; - evidenceStrategy?: EvidenceStrategy; + evidenceStrategy: EvidenceStrategy; path: string; remote: string; description?: string; @@ -183,9 +182,7 @@ export interface ProjectEntry { } export function resolveEvidenceStrategy(project?: ProjectEntry): EvidenceStrategy { - if (project?.evidenceStrategy) return project.evidenceStrategy; - if (project?.type === 'library') return 'scenario-script'; - return 'ui-screenshot'; + return project?.evidenceStrategy ?? 'test-output'; } export interface FailureAnalysis { From 45dfba622ae32d36d1f12887d3d499ad89338e9a Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Mon, 18 May 2026 16:06:28 -0700 Subject: [PATCH 3/8] refactor(agents): extract WorkOS-specific config into per-project fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The verifier had hardcoded ~/.config/case/credentials and WorkOS SDK examples baked into the prompt. This couples the pipeline to WorkOS repos — any non-WorkOS project would hit wrong credential paths and irrelevant code examples. Add credentials and verificationNotes fields to ProjectEntry. The assembler passes both to the verifier context. The verifier prompt now reads the credentials path and verification hints from context instead of hardcoding them. WorkOS-specific SDK patterns move to verificationNotes on the workos-node entry in projects.json. This makes the pipeline usable for non-WorkOS repos without agent prompt changes. --- agents/verifier.md | 58 ++++++++-------------------------------- projects.json | 3 ++- projects.schema.json | 9 +++++++ src/context/assembler.ts | 18 +++++++++++-- src/types.ts | 4 +++ 5 files changed, 42 insertions(+), 50 deletions(-) diff --git a/agents/verifier.md b/agents/verifier.md index 4d7fa68..e453a0f 100644 --- a/agents/verifier.md +++ b/agents/verifier.md @@ -103,67 +103,31 @@ This is the critical step. Write a short script (10-30 lines) that exercises the 5. **Read the issue** from the task file to understand the exact scenario. -6. **Read credentials** if the scenario needs real API calls: +6. **Read credentials** if the scenario needs real API calls. The credentials file path is in the Task Context under **Credentials**: ```bash - cat ~/.config/case/credentials + cat ``` - Credentials available: `WORKOS_API_KEY`, `WORKOS_CLIENT_ID`, and others. Use them in the script via environment variables — never hardcode them. + Use the env vars from the credentials file in the script — never hardcode them. 7. **Write the scenario script** to `/tmp/verify-.ts` (or `.js`). The script should: - - Import from the local package (e.g., `import { WorkOS } from './src/index.ts'` or from the build output) + - Import from the local package (from `./src/index.ts` or the build output) - Exercise the exact code path that was changed or added - Assert the expected behavior (throw on failure, print PASS on success) - Be self-contained and disposable (not committed) - - **Examples by change type:** - - _Bug fix — a method was returning wrong results:_ - - ```ts - import { WorkOS } from './src/index.ts'; - const workos = new WorkOS({ apiKey: process.env.WORKOS_API_KEY }); - // Reproduce the exact scenario from the issue - const result = workos.sso.getAuthorizationUrl({ - redirectUri: 'http://localhost:3000/callback', - clientId: process.env.WORKOS_CLIENT_ID!, - }); - // Verify the fix: URL should contain the expected parameter - if (!result.includes('client_id=')) throw new Error('FAIL: missing client_id in URL'); - console.log('PASS: authorization URL contains client_id'); - ``` - - _New feature — a new method or option was added:_ - - ```ts - import { WorkOS } from './src/index.ts'; - const workos = new WorkOS({ apiKey: process.env.WORKOS_API_KEY }); - // Verify the new API exists and returns expected shape - const result = await workos.organizations.list({ limit: 1 }); - if (!Array.isArray(result.data)) throw new Error('FAIL: expected array'); - console.log('PASS: new list method returns expected shape'); - ``` - - _Export change — a new type or function was exported:_ - - ```ts - // Verify the export is accessible from the package entry point - import { NewType, newFunction } from './src/index.ts'; - if (typeof newFunction !== 'function') throw new Error('FAIL: newFunction not exported'); - console.log('PASS: new exports are accessible'); - ``` + - If the Task Context includes **Verification Notes**, follow them for repo-specific import patterns and API usage **Guidelines:** - If the change is purely structural (types, exports, refactoring), the script can be synchronous and skip API calls - If the change affects runtime behavior (bug fix, new API method), make real API calls using credentials - If real API calls would be destructive or require specific server state, test what you can (URL generation, serialization, type checks) and note the limitation - - Keep it focused — test the specific change, not the entire SDK + - Keep it focused — test the specific change, not the entire package 8. **Run the scenario script:** ```bash - # Load credentials as env vars - set -a; source ~/.config/case/credentials; set +a + # Load credentials as env vars (path from Task Context → Credentials) + set -a; source ; set +a bun /tmp/verify-.ts 2>&1 | tee -a /tmp/verifier-test-output.txt ``` If the script fails, report exactly what failed and why. @@ -215,7 +179,7 @@ If the implementer added a new export, alias, or API: - If the example app doesn't use the new export yet, **temporarily modify it** to import/use the new export, then verify it works. Document what you changed. - After verification, revert any temporary changes (the implementer or closer can decide if the example update should be permanent). -6. Read test credentials from `~/.config/case/credentials` (use for .env files only — **never log credentials**) +6. Read test credentials from the path in Task Context → **Credentials** (use for .env files only — **never log credentials**) 7. Load the `playwright-cli` skill for browser testing 8. Open browser and navigate: ```bash @@ -360,7 +324,7 @@ If verification failed (the fix doesn't work), set `"status":"failed"` and descr ## Credential Safety -- Read credentials from `~/.config/case/credentials` only +- Read credentials from the path in Task Context → **Credentials** only - Use credentials only in `.env` files for example apps - **NEVER** log credential values to stdout, the progress log, or AGENT_RESULT - **NEVER** use credentials in raw curl/API calls @@ -373,7 +337,7 @@ If verification failed (the fix doesn't work), set `"status":"failed"` and descr - **Never create PRs.** That's the closer's job. - **Never set `tested` or `manualTested` directly in task JSON.** Marker commands handle this. - **Always test the specific fix scenario.** "It loads" is not verification. "The org switch works with a custom cookie name" is verification. Your before/after screenshots must show a visible difference. -- **Always complete the login flow when testing authenticated features.** Use the credentials from `~/.config/case/credentials` and follow the AuthKit login procedure in step 3c. Never screenshot an unauthenticated landing page as "evidence" for an auth feature. +- **Always complete the login flow when testing authenticated features.** Use the credentials from Task Context and follow the login procedure in the Verification Notes (if provided) or step 3c. Never screenshot an unauthenticated landing page as "evidence" for an auth feature. - **Never record video of a page doing nothing.** If you use video, the recording must capture real interactions. If you're only loading a page and taking a screenshot, skip video entirely. - **Always create evidence markers via marker commands** — never `touch` marker files directly. - **Always end with `<<>>`.** The orchestrator depends on this. diff --git a/projects.json b/projects.json index 5df34da..887262c 100644 --- a/projects.json +++ b/projects.json @@ -96,7 +96,8 @@ "lint": "npm run lint", "typecheck": "npm run typecheck", "format": "npm run format" - } + }, + "verificationNotes": "Import pattern: `import { WorkOS } from './src/index.ts'`. Constructor: `new WorkOS({ apiKey: process.env.WORKOS_API_KEY })`. Credentials env vars: WORKOS_API_KEY, WORKOS_CLIENT_ID. For SSO tests, use `workos.sso.getAuthorizationUrl()` with `redirectUri` and `clientId`. For org tests, use `workos.organizations.list()`. For export-only changes, verify the export is accessible from `./src/index.ts`." } ] } diff --git a/projects.schema.json b/projects.schema.json index 9839328..d89be2a 100644 --- a/projects.schema.json +++ b/projects.schema.json @@ -55,6 +55,15 @@ "format": { "type": "string", "description": "Run formatter" } }, "additionalProperties": { "type": "string" } + }, + "credentials": { + "type": "string", + "description": "Path to a sourceable env file with credentials for integration testing. Defaults to ~/.config/case/credentials.", + "default": "~/.config/case/credentials" + }, + "verificationNotes": { + "type": "string", + "description": "Free-text hints for the verifier agent — auth flows, env var names, gotchas. Injected into the verifier context." } }, "additionalProperties": false diff --git a/src/context/assembler.ts b/src/context/assembler.ts index b1fc55b..8648aaa 100644 --- a/src/context/assembler.ts +++ b/src/context/assembler.ts @@ -1,5 +1,5 @@ import type { AgentName, AgentResult, PipelineConfig, RevisionRequest, TaskJson } from '../types.js'; -import { resolveEvidenceStrategy } from '../types.js'; +import { resolveEvidenceStrategy, DEFAULT_CREDENTIALS_PATH } from '../types.js'; import type { RepoContext } from './prefetch.js'; import { readPackageAssetSync } from '../package-assets.js'; @@ -128,8 +128,9 @@ function buildContextBlock( break; case 'verifier': - // Deliberately minimal — fresh-context testing + // Deliberately minimal — fresh-context testing, plus project-specific hints appendProjectCommands(lines, config); + appendVerifierContext(lines, config); break; case 'reviewer': @@ -203,6 +204,19 @@ function appendImplementerContext( } } +function appendVerifierContext(lines: string[], config: PipelineConfig): void { + const creds = config.project?.credentials ?? DEFAULT_CREDENTIALS_PATH; + lines.push(`- **Credentials**: \`${creds}\``); + + if (config.project?.verificationNotes) { + lines.push(''); + lines.push('### Verification Notes'); + lines.push(''); + lines.push(config.project.verificationNotes); + lines.push(''); + } +} + function appendReviewerContext(lines: string[], repoContext: RepoContext): void { if (!repoContext.goldenPrinciples) return; lines.push('### Golden Principles'); diff --git a/src/types.ts b/src/types.ts index c003d00..e498310 100644 --- a/src/types.ts +++ b/src/types.ts @@ -170,6 +170,8 @@ export interface PipelineConfig { export type EvidenceStrategy = 'ui-screenshot' | 'scenario-script' | 'test-output'; +export const DEFAULT_CREDENTIALS_PATH = '~/.config/case/credentials'; + export interface ProjectEntry { name: string; evidenceStrategy: EvidenceStrategy; @@ -179,6 +181,8 @@ export interface ProjectEntry { language: string; packageManager: string; commands: Record; + credentials?: string; + verificationNotes?: string; } export function resolveEvidenceStrategy(project?: ProjectEntry): EvidenceStrategy { From c51bed7d625b72216a0176148f1a55311ddbb6db Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Mon, 18 May 2026 16:14:40 -0700 Subject: [PATCH 4/8] docs: update all docs for evidence strategy, remove projects.json from repo - README: remove WorkOS-specific language, add evidence strategy docs, update CLI reference with ca onboard, fix repository map to reference ~/.config/case/projects.json - AGENTS.md: fix workos-node stack (npm not pnpm), reference evidence strategy, remove WorkOS OSS from title - CLAUDE.md: remove stale tasks/done/ and node one-liner, add ca onboard, remove projects.json from structure (now user-local) - CONTEXT.md: fix phase list (remove non-existent approve), add evidence strategy term - tasks/README.md: evidence expectations now required - .gitignore: remove stale root-level markers, gitignore projects.json (user-local, lives in ~/.config/case/) - projects.json removed from git tracking (stays on disk) - Add proposed-amendments/README.md explaining historical status --- .gitignore | 10 ++- AGENTS.md | 6 +- CLAUDE.md | 15 ++--- CONTEXT.md | 3 +- README.md | 40 ++++++----- docs/proposed-amendments/README.md | 5 ++ projects.json | 103 ----------------------------- tasks/README.md | 2 +- 8 files changed, 46 insertions(+), 138 deletions(-) create mode 100644 docs/proposed-amendments/README.md delete mode 100644 projects.json diff --git a/.gitignore b/.gitignore index 1d70c67..cb76a42 100644 --- a/.gitignore +++ b/.gitignore @@ -1,19 +1,17 @@ -# Case harness marker files (created during pipeline runs) -.case-active -.case-tested -.case-manual-tested -.case-reviewed - # Repo-local Case runtime state. Target repos should ignore this directory too; # `ca bootstrap` adds the rule automatically. .case/ +# User-local project manifest (lives in ~/.config/case/projects.json) +projects.json + # Legacy in-repo runtime state retained only as a read fallback during migration. tasks/active/ tasks/done/ docs/learnings/ docs/proposed-amendments/*.md !docs/proposed-amendments/.gitkeep +!docs/proposed-amendments/README.md docs/run-log.jsonl docs/agent-versions/ diff --git a/AGENTS.md b/AGENTS.md index cb3eed6..4098737 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,4 +1,4 @@ -# Case — WorkOS OSS Harness +# Case — Agent Harness Spine repo for orchestrating agent work across WorkOS open source projects. Humans steer. Agents execute. When agents struggle, fix the harness. @@ -21,9 +21,9 @@ echo "$SESSION" | authkit-session | `../authkit-session` | Framework-agnostic session management | TS/pnpm | | authkit-tanstack-start | `../authkit-tanstack-start` | AuthKit TanStack Start SDK | TS/pnpm | | authkit-nextjs | `../authkit-nextjs` | AuthKit Next.js SDK | TS/pnpm | -| workos-node | `../workos-node/main` | WorkOS Node.js SDK | TS/pnpm | +| workos-node | `../workos-node/main` | WorkOS Node.js SDK | TS/npm | -Full metadata (commands, remotes, language): `projects.json` +Full metadata (commands, remotes, evidence strategy): `~/.config/case/projects.json` ## Navigation diff --git a/CLAUDE.md b/CLAUDE.md index 0385a0f..629e940 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -13,7 +13,7 @@ It provides the cross-cutting knowledge, conventions, and task dispatch that no ## Philosophy -- **Case exists to make agent-authored WorkOS OSS PRs reliable, reviewable, and self-improving.** Keep the core loop small unless reliability requires more. +- **Case exists to make agent-authored PRs reliable, reviewable, and self-improving.** Keep the core loop small unless reliability requires more. - **Humans steer, agents execute.** Engineers define goals and acceptance criteria. Agents implement. - **Never write code directly.** All code changes in target repos flow through agents. Engineers only improve this harness. - **When agents struggle, fix the harness.** The fix is never "try harder" — it's a missing doc, playbook, convention, or enforcement rule. @@ -48,8 +48,7 @@ Case depends on the skills plugin for product knowledge. They are complementary, ``` AGENTS.md # Entry point for agents (routing map) CLAUDE.md # This file (meta-instructions for case itself) -projects.json # Manifest of target repos -projects.schema.json # JSON Schema for the manifest +projects.schema.json # JSON Schema for the project manifest docs/ architecture/ # Canonical patterns per repo type conventions/ # Shared rules (commits, testing, PRs) @@ -57,20 +56,17 @@ docs/ playbooks/ # Step-by-step guides for recurring operations tasks/ active/ # Current task files for agent execution - done/ # Completed tasks (moved after PR merge) templates/ # Reusable task templates src/commands/ check.ts # Cross-repo convention enforcement bootstrap.ts # Per-repo readiness verification + onboard.ts # Human-facing onboarding for a new repo ``` ## Commands ```bash -# Validate manifest -node -e "JSON.parse(require('fs').readFileSync('projects.json','utf8'))" - -# Check conventions across repos +# Check conventions across repos (also validates the manifest) ca check # Check a single repo @@ -78,4 +74,7 @@ ca check --repo cli # Bootstrap a repo for agent work ca bootstrap cli + +# Onboard a new repo +ca onboard ``` diff --git a/CONTEXT.md b/CONTEXT.md index 3f62211..0039c65 100644 --- a/CONTEXT.md +++ b/CONTEXT.md @@ -7,7 +7,7 @@ Canonical vocabulary for the case pipeline. Every term used in code, specs, and | Term | Definition | Rejected Alternatives | | ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------- | | **task** | A unit of agent work dispatched by the pipeline. Has a `taskId`, status, and associated event log. | `job`, `run` (too generic) | -| **phase** | A named pipeline stage that produces one `AgentResult`. One of: implement, verify, review, approve, close, retrospective. | `step` (too generic), `stage` (ambiguous with CI) | +| **phase** | A named pipeline stage that produces one `AgentResult`. One of: implement, verify, review, close, retrospective. | `step` (too generic), `stage` (ambiguous with CI) | | **node** | A DAG vertex representing one phase execution at a specific revision cycle. E.g., `implement_0`, `verify_1`. Introduced in Phase 3. | `vertex` (too academic) | | **status** | The lifecycle position of a task, derived from pipeline state. One of: active, implementing, verifying, reviewing, evaluating, closing, pr-opened, merged. | `state` (reserved for `PipelineState`, the full reconstructible object) | | **state** | The full reconstructible pipeline state object (`PipelineState`), produced by `reduceEvents()`. | `snapshot` (used in mill for a different concept) | @@ -18,6 +18,7 @@ Canonical vocabulary for the case pipeline. Every term used in code, specs, and | **evaluator** | Collective term for verifier and reviewer — the two phases that assess implementation quality. | `assessor`, `checker` | | **marker** | A file written to `.case//` as evidence of a completed phase. E.g., `tested`, `reviewed`. | `flag`, `sentinel` | | **evidence** | Proof that a phase completed successfully. Includes marker files, SHA-256 hashed test output, screenshots. | `artifact` (too broad) | +| **evidence strategy** | One of: ui-screenshot, scenario-script, test-output. Declared per project in projects.json. Drives what kind of verification evidence the pipeline requires. | | | **ast-grep rule** | A YAML file defining a structural code pattern to match or ban. Processed by ast-grep against TypeScript ASTs. Lives in `ast-rules/`. | `lint rule` (too generic — we also have oxlint) | | **target rule** | An ast-grep rule enforcing golden principles in target repos. Run by the implementer before committing. Lives in `ast-rules/target/`. | `repo rule`, `external rule` | | **self-enforcement rule** | An ast-grep rule enforcing case's own codebase invariants. Run in CI and pre-commit. Lives in `ast-rules/self/`. | `internal rule`, `meta rule` | diff --git a/README.md b/README.md index a27a9c5..9d40952 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,13 @@ Case -Case is the reliability layer for agent-authored WorkOS OSS pull requests. +Case is the reliability layer for agent-authored pull requests. -Its job is narrow: turn a clearly scoped WorkOS OSS task into a reviewed PR with evidence, and make the next run better when this one fails. Case is not a generic agent platform, a dashboard product, or a place to accumulate every possible workflow idea. Humans steer. Agents execute. The harness keeps the work reviewable. +Its job is narrow: turn a clearly scoped task into a reviewed PR with evidence, and make the next run better when this one fails. Case is not a generic agent platform, a dashboard product, or a place to accumulate every possible workflow idea. Humans steer. Agents execute. The harness keeps the work reviewable. ## Why It Exists -Agents are useful when the surrounding system makes good work easier than bad work. Case provides that surrounding system for the WorkOS open source repos: +Agents are useful when the surrounding system makes good work easier than bad work. Case provides that surrounding system: - A shared map of target repos, commands, architecture notes, and conventions. - A task format that separates human intent from machine-updated state. @@ -18,7 +18,7 @@ Agents are useful when the surrounding system makes good work easier than bad wo The north star: -> Case exists to make agent-authored WorkOS OSS PRs reliable, reviewable, and self-improving. +> Case exists to make agent-authored PRs reliable, reviewable, and self-improving. ## Core Loop @@ -106,6 +106,7 @@ ca 1234 # create or resume a GitHub issue run ca DX-1234 # create or resume a Linear issue run ca --agent # interactive steering session ca --agent 1234 # steering session with issue context +ca onboard # add a repo to projects.json ca run --task # run an existing task JSON ca watch # live-tail the event log ``` @@ -120,7 +121,7 @@ ca mark-manual-tested ca mark-reviewed --critical 0 ca upload ca snapshot -ca create --repo --title --description <text> +ca create --repo <name> --title <title> --description <text> --evidence <expectations> ca analyze-failure <task.json> <agent> <error> ca bootstrap <repo> ca check [--repo <repo>] @@ -167,6 +168,8 @@ CASE_DATA_DIR=/tmp/case-test ca init Static package assets are versioned with Case and embedded into the standalone binary: `agents/`, markdown under `docs/`, and text rules under `ast-rules/`. When running from a checkout, disk files win so local prompt/doc edits are picked up immediately; set `CASE_PACKAGE_ROOT=/path/to/case` to force a specific checkout as the disk override. +Each entry in `projects.json` may optionally include `credentials` (per-repo secrets needed for verification) and `verificationNotes` (free-form context the verifier should know about the repo). + For portable binary installs, keep `projects.json` in `~/.config/case/` via `ca init --projects <path>` or `ca init --migrate-from <case-checkout>`. Repo paths in a portable `projects.json` should be absolute or relative to that `projects.json` file. ## Pipeline @@ -182,6 +185,8 @@ Revision loops are evaluator-driven. A verifier or reviewer rubric failure can s Every run writes an append-only event log under `<target-repo>/.case/<task-slug>/events/`. `ca watch <task-slug>` renders those events while a run is active. +Every task carries `evidenceExpectations` — the concrete artifacts the verifier must produce. The orchestrator writes these based on the target repo's `evidenceStrategy` so the verifier knows what counts as proof up front. + ## Agent Roles | Agent | Responsibility | Does Not Do | @@ -193,7 +198,7 @@ Every run writes an append-only event log under `<target-repo>/.case/<task-slug> | Closer | Creates the PR after evidence gates pass | Implement or test | | Retrospective | Records learnings and proposes harness improvements | Edit target repo code | -¹ The orchestrator is TypeScript runtime code (`src/agent/orchestrator-session.ts`), not an LLM agent prompt like the others. +¹ The orchestrator runs as an LLM agent session via `ca --agent`, or as TypeScript runtime code for direct `ca <issue>` dispatch. The key boundary is context isolation. Implementer context includes task details, playbooks, repo learnings, and revision feedback. Verifier context is intentionally fresher. Reviewer context is focused on the diff and principles. @@ -207,6 +212,12 @@ Evidence markers live under the target repo's `.case/<task-slug>/` directory: The closer checks these markers before opening a PR. The point is not ceremony; it is making the PR auditable without trusting a chat transcript. +Each repo declares an `evidenceStrategy` in `projects.json` that drives what the verifier produces: + +- `ui-screenshot`: Playwright before/after screenshots for user-facing UI changes. +- `scenario-script`: a consumer script that exercises the specific user-facing scenario. +- `test-output`: automated test output only (for libraries and non-UI code). + ## Self-Improvement After a run, the retrospective agent should leave the harness smarter: @@ -240,18 +251,15 @@ Priority: ## Repository Map -Target repos are listed in `projects.json`. +Target repos are listed in `~/.config/case/projects.json` (created by `ca init` + `ca onboard`). The schema is `projects.schema.json` in this repo. -| Repo | Path | Purpose | -| ---------------------- | --------------------------- | ------------------------------------- | -| cli | `../cli/main` | WorkOS CLI | -| skills | `../skills` | WorkOS integration skills | -| authkit-session | `../authkit-session` | Framework-agnostic session management | -| authkit-tanstack-start | `../authkit-tanstack-start` | AuthKit TanStack Start SDK | -| authkit-nextjs | `../authkit-nextjs` | AuthKit Next.js SDK | -| workos-node | `../workos-node/main` | WorkOS Node.js SDK | +Add a repo with: + +```bash +ca onboard <path> +``` -Add a repo by updating `projects.json`, adding any needed architecture notes under `docs/architecture/`, and verifying with: +Then add any needed architecture notes under `docs/architecture/` and verify with: ```bash ca check --repo <name> diff --git a/docs/proposed-amendments/README.md b/docs/proposed-amendments/README.md new file mode 100644 index 0000000..ec84e8f --- /dev/null +++ b/docs/proposed-amendments/README.md @@ -0,0 +1,5 @@ +# Proposed Amendments + +Historical amendment proposals from early development. These were all accepted and implemented — the infrastructure they reference has since been replaced by the TypeScript CLI and evidence strategy system. + +These files are preserved for historical context only. diff --git a/projects.json b/projects.json deleted file mode 100644 index 887262c..0000000 --- a/projects.json +++ /dev/null @@ -1,103 +0,0 @@ -{ - "$schema": "./projects.schema.json", - "repos": [ - { - "name": "cli", - "evidenceStrategy": "scenario-script", - "path": "../cli/main", - "remote": "git@github.com:workos/workos-cli.git", - "description": "WorkOS CLI for installing AuthKit integrations and managing WorkOS resources", - "language": "typescript", - "packageManager": "pnpm", - "commands": { - "setup": "pnpm install", - "build": "pnpm build", - "test": "pnpm test", - "lint": "pnpm lint", - "typecheck": "pnpm typecheck" - } - }, - { - "name": "skills", - "evidenceStrategy": "test-output", - "path": "../skills", - "remote": "git@github.com:workos/skills.git", - "description": "WorkOS integration skills", - "language": "typescript", - "packageManager": "pnpm", - "commands": { - "setup": "pnpm install", - "test": "pnpm test", - "lint": "pnpm lint", - "format": "pnpm format" - } - }, - { - "name": "authkit-session", - "evidenceStrategy": "scenario-script", - "path": "../authkit-session", - "remote": "git@github.com:workos/authkit-ssr.git", - "description": "Framework-agnostic TypeScript authentication library for WorkOS with pluggable storage adapters", - "language": "typescript", - "packageManager": "pnpm", - "commands": { - "setup": "pnpm install", - "build": "pnpm run build", - "test": "pnpm test", - "typecheck": "pnpm run typecheck", - "format": "pnpm run format" - } - }, - { - "name": "authkit-tanstack-start", - "evidenceStrategy": "scenario-script", - "path": "../authkit-tanstack-start", - "remote": "git@github.com:workos/authkit-tanstack-start.git", - "description": "WorkOS library for TanStack Start providing authentication and session management helpers", - "language": "typescript", - "packageManager": "pnpm", - "commands": { - "setup": "pnpm install", - "build": "pnpm build", - "test": "pnpm test", - "typecheck": "pnpm run typecheck", - "format": "pnpm run format" - } - }, - { - "name": "authkit-nextjs", - "evidenceStrategy": "scenario-script", - "path": "../authkit-nextjs", - "remote": "git@github.com:workos/authkit-nextjs.git", - "description": "Authentication and session helpers for using WorkOS & AuthKit with Next.js", - "language": "typescript", - "packageManager": "pnpm", - "commands": { - "setup": "pnpm install", - "build": "pnpm run build", - "test": "pnpm test", - "lint": "pnpm run lint", - "typecheck": "pnpm run typecheck", - "format": "pnpm run format" - } - }, - { - "name": "workos-node", - "evidenceStrategy": "scenario-script", - "path": "../workos-node/main", - "remote": "git@github.com:workos/workos-node.git", - "description": "WorkOS SDK for Node/JavaScript/TypeScript projects", - "language": "typescript", - "packageManager": "npm", - "commands": { - "setup": "npm install", - "build": "npm run build", - "test": "npm test", - "lint": "npm run lint", - "typecheck": "npm run typecheck", - "format": "npm run format" - }, - "verificationNotes": "Import pattern: `import { WorkOS } from './src/index.ts'`. Constructor: `new WorkOS({ apiKey: process.env.WORKOS_API_KEY })`. Credentials env vars: WORKOS_API_KEY, WORKOS_CLIENT_ID. For SSO tests, use `workos.sso.getAuthorizationUrl()` with `redirectUri` and `clientId`. For org tests, use `workos.organizations.list()`. For export-only changes, verify the export is accessible from `./src/index.ts`." - } - ] -} diff --git a/tasks/README.md b/tasks/README.md index 167681e..419ff34 100644 --- a/tasks/README.md +++ b/tasks/README.md @@ -27,7 +27,7 @@ Numbers are sequential per prefix: `cli-1`, `cli-2`, `authkit-nextjs-1`, `x-1`, | `## Verification Scenarios` | (Optional) Concrete scenarios the verifier will test — generated by orchestrator during task creation | | `## Non-Goals` | (Optional) What is explicitly NOT in scope — prevents implementer scope creep | | `## Edge Cases` | (Optional) Edge cases the implementer should consider | -| `## Evidence Expectations` | (Optional) What proof of completion looks like (screenshots, test output, etc.) | +| `## Evidence Expectations` | Required. What proof of completion looks like (screenshots, test output, etc.) — orchestrator generates from the repo's `evidenceStrategy` | Optional: `## Context` for background info, issue links, API specs, etc. From 005fd13e83651f141b98c4f564c1fea169850d14 Mon Sep 17 00:00:00 2001 From: Nick Nisi <nick.nisi@workos.com> Date: Mon, 18 May 2026 16:16:14 -0700 Subject: [PATCH 5/8] fix(onboard): bun lockfile detection and dead ternary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bun lockfile detection was setting packageManager to 'pnpm' instead of 'bun'. Also added 'bun' to the schema's packageManager enum. Removed dead ternary that returned 'typescript' in both branches — replaced with a direct assignment since all onboarded repos are currently treated as TypeScript. --- projects.schema.json | 2 +- src/commands/onboard.ts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/projects.schema.json b/projects.schema.json index d89be2a..9eaf105 100644 --- a/projects.schema.json +++ b/projects.schema.json @@ -41,7 +41,7 @@ }, "packageManager": { "type": "string", - "enum": ["pnpm", "npm", "yarn", "uv", "pip", "go", "bundler", "composer", "dotnet", "mix", "gradle"] + "enum": ["pnpm", "npm", "yarn", "bun", "uv", "pip", "go", "bundler", "composer", "dotnet", "mix", "gradle"] }, "commands": { "type": "object", diff --git a/src/commands/onboard.ts b/src/commands/onboard.ts index c65f649..70cf397 100644 --- a/src/commands/onboard.ts +++ b/src/commands/onboard.ts @@ -155,7 +155,7 @@ function detectFromNodePackage(repoPath: string, pkgPath: string): PackageDetect let packageManager = 'npm'; if (existsSync(resolve(repoPath, 'pnpm-lock.yaml'))) packageManager = 'pnpm'; else if (existsSync(resolve(repoPath, 'yarn.lock'))) packageManager = 'yarn'; - else if (existsSync(resolve(repoPath, 'bun.lockb')) || existsSync(resolve(repoPath, 'bun.lock'))) packageManager = 'pnpm'; + else if (existsSync(resolve(repoPath, 'bun.lockb')) || existsSync(resolve(repoPath, 'bun.lock'))) packageManager = 'bun'; const run = packageManager === 'npm' ? 'npm run' : packageManager; const commands: Record<string, string> = {}; @@ -167,7 +167,7 @@ function detectFromNodePackage(repoPath: string, pkgPath: string): PackageDetect if (scripts.typecheck) commands.typecheck = `${run} typecheck`; if (scripts.format) commands.format = `${run} format`; - const language = existsSync(resolve(repoPath, 'tsconfig.json')) ? 'typescript' : 'typescript'; + const language = 'typescript'; return { language, packageManager, commands, description }; } From 352bcda1f4641aafb6cb4e13ee826de58a8fa29d Mon Sep 17 00:00:00 2001 From: Nick Nisi <nick.nisi@workos.com> Date: Mon, 18 May 2026 16:39:25 -0700 Subject: [PATCH 6/8] fix(onboard): bun run prefix, required test command, stale manifest path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - bun needs `bun run <script>` like npm — `bun build` invokes Bun's built-in bundler, not the package.json script - test command is always set (required by schema) even when the repo has no test script in package.json - orchestrator prompt referenced ${caseRoot}/projects.json which no longer exists — updated to ~/.config/case/projects.json - removed "WorkOS OSS" from orchestrator prompt --- src/agent/orchestrator-session.ts | 4 ++-- src/commands/onboard.ts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/agent/orchestrator-session.ts b/src/agent/orchestrator-session.ts index 2df9711..da4bd6c 100644 --- a/src/agent/orchestrator-session.ts +++ b/src/agent/orchestrator-session.ts @@ -268,12 +268,12 @@ function buildOrchestratorSystemPrompt(caseRoot: string): string { ] : [ `- Case root: ${caseRoot}`, - `- Projects manifest: ${caseRoot}/projects.json`, + `- Projects manifest: ~/.config/case/projects.json (or ca onboard to add repos)`, `- Golden principles: ${caseRoot}/docs/golden-principles.md`, `- Agent prompts: ${caseRoot}/agents/`, ]; - return `You are the Case orchestrator — an interactive agent for managing WorkOS OSS repos. + return `You are the Case orchestrator — an interactive agent for managing target repos. **Always wait for the user's first message before calling any tools.** The initial context below is background information, not a request to act. Greet the user briefly and wait. diff --git a/src/commands/onboard.ts b/src/commands/onboard.ts index 70cf397..7605ed3 100644 --- a/src/commands/onboard.ts +++ b/src/commands/onboard.ts @@ -157,11 +157,11 @@ function detectFromNodePackage(repoPath: string, pkgPath: string): PackageDetect else if (existsSync(resolve(repoPath, 'yarn.lock'))) packageManager = 'yarn'; else if (existsSync(resolve(repoPath, 'bun.lockb')) || existsSync(resolve(repoPath, 'bun.lock'))) packageManager = 'bun'; - const run = packageManager === 'npm' ? 'npm run' : packageManager; + const run = (packageManager === 'npm' || packageManager === 'bun') ? `${packageManager} run` : packageManager; const commands: Record<string, string> = {}; commands.setup = `${packageManager} install`; - if (scripts.test) commands.test = `${packageManager} test`; + commands.test = scripts.test ? `${run} test` : `${packageManager} test`; if (scripts.build) commands.build = `${run} build`; if (scripts.lint) commands.lint = `${run} lint`; if (scripts.typecheck) commands.typecheck = `${run} typecheck`; From 9b94065764f15e170d4bf36e130470a963813399 Mon Sep 17 00:00:00 2001 From: Nick Nisi <nick.nisi@workos.com> Date: Mon, 18 May 2026 16:49:55 -0700 Subject: [PATCH 7/8] fix(onboard): create empty projects.json when manifest does not exist First-time users who run ca init then ca onboard would hit a throw from loadProjectsManifest since no projects.json exists yet. Now the onboard handler catches the error and creates an empty manifest at ~/.config/case/projects.json before proceeding. --- src/commands/onboard.ts | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/commands/onboard.ts b/src/commands/onboard.ts index 7605ed3..190f9f9 100644 --- a/src/commands/onboard.ts +++ b/src/commands/onboard.ts @@ -1,7 +1,8 @@ import { existsSync, readFileSync } from 'node:fs'; -import { resolve, relative, basename } from 'node:path'; -import { loadProjectsManifest } from '../config.js'; -import { resolvePackageRoot } from '../paths.js'; +import { resolve, relative, basename, dirname } from 'node:path'; +import { mkdir } from 'node:fs/promises'; +import { loadProjectsManifest, type LoadedProjectsManifest } from '../config.js'; +import { resolveDataDir, resolvePackageRoot } from '../paths.js'; import { runCommandLine } from '../util/run-command.js'; import type { EvidenceStrategy, ProjectEntry } from '../types.js'; @@ -35,7 +36,7 @@ export async function handler(argv: string[]): Promise<number> { } const caseRoot = resolvePackageRoot(); - const manifest = await loadProjectsManifest(caseRoot); + const manifest = await loadOrCreateManifest(caseRoot); const existing = manifest.repos.find( (r) => resolve(manifest.repoBasePath, r.path) === absPath || r.name === basename(absPath), @@ -102,6 +103,19 @@ export async function handler(argv: string[]): Promise<number> { return 0; } +async function loadOrCreateManifest(caseRoot: string): Promise<LoadedProjectsManifest> { + try { + return await loadProjectsManifest(caseRoot); + } catch { + const dataDir = resolveDataDir(); + const path = resolve(dataDir, 'projects.json'); + await mkdir(dirname(path), { recursive: true }); + await Bun.write(path, JSON.stringify({ $schema: './projects.schema.json', repos: [] }, null, 2) + '\n'); + process.stdout.write(`Created ${path}\n`); + return { repos: [], path, repoBasePath: dataDir }; + } +} + async function probeRepo(absPath: string, basePath: string): Promise<DetectedRepo> { const name = basename(absPath); const relPath = relative(basePath, absPath); From 3f8bd51b1994ab6224fe405d9bb92d802fdf6e44 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 19 May 2026 00:10:27 +0000 Subject: [PATCH 8/8] fix(onboard): align repoBasePath logic and evidence expectations guard - loadOrCreateManifest now uses isEmbeddedPackageRoot to match loadProjectsManifest's repoBasePath logic, preventing path mismatches between onboard and subsequent commands - buildTaskMarkdown uses !== undefined instead of truthiness check for required evidenceExpectations field, preventing silent omission on empty string Co-Authored-By: nick.nisi@workos.com <nick.nisi@workos.com> --- src/commands/onboard.ts | 4 ++-- src/entry/task-factory.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/commands/onboard.ts b/src/commands/onboard.ts index 190f9f9..d4ab13b 100644 --- a/src/commands/onboard.ts +++ b/src/commands/onboard.ts @@ -2,7 +2,7 @@ import { existsSync, readFileSync } from 'node:fs'; import { resolve, relative, basename, dirname } from 'node:path'; import { mkdir } from 'node:fs/promises'; import { loadProjectsManifest, type LoadedProjectsManifest } from '../config.js'; -import { resolveDataDir, resolvePackageRoot } from '../paths.js'; +import { isEmbeddedPackageRoot, resolveDataDir, resolvePackageRoot } from '../paths.js'; import { runCommandLine } from '../util/run-command.js'; import type { EvidenceStrategy, ProjectEntry } from '../types.js'; @@ -112,7 +112,7 @@ async function loadOrCreateManifest(caseRoot: string): Promise<LoadedProjectsMan await mkdir(dirname(path), { recursive: true }); await Bun.write(path, JSON.stringify({ $schema: './projects.schema.json', repos: [] }, null, 2) + '\n'); process.stdout.write(`Created ${path}\n`); - return { repos: [], path, repoBasePath: dataDir }; + return { repos: [], path, repoBasePath: isEmbeddedPackageRoot(caseRoot) ? dataDir : caseRoot }; } } diff --git a/src/entry/task-factory.ts b/src/entry/task-factory.ts index 40be4d5..dfdd38c 100644 --- a/src/entry/task-factory.ts +++ b/src/entry/task-factory.ts @@ -139,7 +139,7 @@ function buildTaskMarkdown(request: TaskCreateRequest, taskJson: TaskJson, issue if (request.edgeCases) { lines.push('## Edge Cases', '', request.edgeCases, ''); } - if (request.evidenceExpectations) { + if (request.evidenceExpectations !== undefined) { lines.push('## Evidence Expectations', '', request.evidenceExpectations, ''); }