diff --git a/CLAUDE.md b/CLAUDE.md index e903fac5..f448a52f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -40,6 +40,8 @@ Commands with Teams Variant ship as `{name}.md` (parallel subagents) and `{name} **Working Memory**: Three shell-script hooks (`scripts/hooks/`) provide automatic session continuity. Toggleable via `devflow memory --enable/--disable/--status` or `devflow init --memory/--no-memory`. Stop hook → reads last turn from session transcript (`~/.claude/projects/{encoded-cwd}/{session_id}.jsonl`), spawns background `claude -p --model haiku` to update `.memory/WORKING-MEMORY.md` with structured sections (`## Now`, `## Progress`, `## Decisions`, `## Modified Files`, `## Context`, `## Session Log`; throttled: skips if triggered <2min ago; concurrent sessions serialize via mkdir-based lock). SessionStart hook → injects previous memory + git state as `additionalContext` on `/clear`, startup, or compact (warns if >1h stale; injects pre-compact memory snapshot when compaction happened mid-session). PreCompact hook → saves git state + WORKING-MEMORY.md snapshot + bootstraps minimal WORKING-MEMORY.md if none exists. Zero-ceremony context preservation. +**Ambient Mode**: Three-layer architecture for always-on intent classification. SessionStart hook (`session-start-classification`) reads lean classification rules (`~/.claude/skills/devflow:router/references/classification-rules.md`, ~30 lines) and injects as `additionalContext` — once per session, deterministic, zero model overhead. UserPromptSubmit hook (`preamble`) injects a one-sentence prompt per message triggering classification + router loading via Skill tool. Router SKILL.md is a pure skill lookup table (~50 lines) loaded on-demand only for GUIDED/ORCHESTRATED depth — maps intent×depth to domain and orchestration skills. Toggleable via `devflow ambient --enable/--disable/--status` or `devflow init`. + **Self-Learning**: A SessionEnd hook (`session-end-learning`) accumulates session IDs and triggers a background `claude -p --model sonnet` every 3 sessions (5 at 15+ observations) to detect repeated workflows and procedural knowledge from batch transcripts. Observations accumulate in `.memory/learning-log.jsonl` with confidence scores, temporal decay, and daily run caps. When confidence thresholds are met (5 observations with 7-day temporal spread for both workflow and procedural types), artifacts are auto-created as slash commands (`.claude/commands/self-learning/`) or skills (`.claude/skills/{slug}/`). Loaded artifacts are reinforced locally (no LLM) on each session end. Single toggle mechanism: hook presence in `settings.json` IS the enabled state — no `enabled` field in `learning.json`. Toggleable via `devflow learn --enable/--disable/--status` or `devflow init --learn/--no-learn`. Configurable model/throttle/caps/debug via `devflow learn --configure`. Use `devflow learn --reset` to remove all artifacts + log + transient state. Use `devflow learn --purge` to remove invalid observations. Debug logs stored at `~/.devflow/logs/{project-slug}/`. **Claude Code Flags**: Typed registry (`src/cli/utils/flags.ts`) for managing Claude Code feature flags (env vars and top-level settings). Pure functions `applyFlags`/`stripFlags`/`getDefaultFlags` follow the `applyTeamsConfig`/`stripTeamsConfig` pattern. Initial flags: `tool-search`, `lsp`, `clear-context-on-plan` (default ON), `brief`, `disable-1m-context` (default OFF). Manageable via `devflow flags --enable/--disable/--status/--list`. Stored in manifest `features.flags: string[]`. @@ -55,7 +57,7 @@ devflow/ ├── plugins/devflow-*/ # 17 plugins (8 core + 9 optional language/ecosystem) ├── docs/reference/ # Detailed reference documentation ├── scripts/ # Helper scripts (statusline, docs-helpers) -│ └── hooks/ # Working Memory + ambient + learning hooks (stop, session-start, pre-compact, preamble, session-end-learning, stop-update-learning [deprecated], background-learning) +│ └── hooks/ # Working Memory + ambient + learning hooks (stop, session-start-memory, session-start-classification, pre-compact, preamble, session-end-learning, stop-update-learning [deprecated], background-learning) ├── src/cli/ # TypeScript CLI (init, list, uninstall, ambient, learn, flags) ├── .claude-plugin/ # Marketplace registry ├── .docs/ # Project docs (reviews, design) — per-project diff --git a/package-lock.json b/package-lock.json index 5fbf032d..c8bb7511 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "devflow-kit", - "version": "1.8.3", + "version": "2.0.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "devflow-kit", - "version": "1.8.3", + "version": "2.0.0", "license": "MIT", "dependencies": { "@clack/prompts": "^0.9.1", diff --git a/plugins/devflow-ambient/README.md b/plugins/devflow-ambient/README.md index 54acb282..5b3f3629 100644 --- a/plugins/devflow-ambient/README.md +++ b/plugins/devflow-ambient/README.md @@ -1,11 +1,11 @@ # devflow-ambient -Ambient mode — classifies intent and applies proportional effort via a `UserPromptSubmit` hook. No slash command — ambient mode activates automatically on every prompt when enabled. +Ambient mode — classifies intent and applies proportional effort via a `SessionStart` hook and a `UserPromptSubmit` hook. No slash command — ambient mode activates automatically on every prompt when enabled. ## Activation ```bash -devflow ambient --enable # Register UserPromptSubmit hook +devflow ambient --enable # Register ambient mode hooks devflow ambient --disable # Remove hook devflow ambient --status # Check if enabled ``` @@ -44,9 +44,9 @@ Skills are loaded via the Skill tool and work happens in the main session: | Intent | Skills | Main Session Work | Post-Work | |--------|--------|-------------------|-----------| -| IMPLEMENT | test-driven-development, patterns, research | Implement with TDD | `Task(subagent_type="Simplifier")` | -| DEBUG | software-design, testing | Investigate, diagnose, fix | `Task(subagent_type="Simplifier")` | -| PLAN | patterns, software-design | Explore and design | — | +| IMPLEMENT | test-driven-development, patterns, research | Implement with TDD | `Agent(subagent_type="Simplifier")` | +| DEBUG | test-driven-development, software-design, testing | Investigate, diagnose, fix | `Agent(subagent_type="Simplifier")` | +| PLAN | test-driven-development, patterns, software-design, security | Explore and design | — | | REVIEW | quality-gates, software-design | Review directly | — | ## ORCHESTRATED Pipelines diff --git a/plugins/devflow-code-review/commands/code-review-teams.md b/plugins/devflow-code-review/commands/code-review-teams.md index 44d78045..ecffc5fc 100644 --- a/plugins/devflow-code-review/commands/code-review-teams.md +++ b/plugins/devflow-code-review/commands/code-review-teams.md @@ -34,7 +34,7 @@ Run a comprehensive code review of the current branch by spawning a review team For each reviewable worktree, spawn Git agent: ``` -Task(subagent_type="Git", run_in_background=false): +Agent(subagent_type="Git", run_in_background=false): "OPERATION: ensure-pr-ready WORKTREE_PATH: {worktree_path} (omit if cwd) Validate branch, commit if needed, push, create PR if needed. @@ -218,7 +218,7 @@ Spawn 2 agents **in a single message**: **Git Agent (PR Comments)**: ``` -Task(subagent_type="Git", run_in_background=false): +Agent(subagent_type="Git", run_in_background=false): "OPERATION: comment-pr WORKTREE_PATH: {worktree_path} (omit if cwd) Read reviews from {worktree_path}/.docs/reviews/{branch_slug}/{timestamp}/ diff --git a/plugins/devflow-code-review/commands/code-review.md b/plugins/devflow-code-review/commands/code-review.md index 990c8986..96a14167 100644 --- a/plugins/devflow-code-review/commands/code-review.md +++ b/plugins/devflow-code-review/commands/code-review.md @@ -34,7 +34,7 @@ Run a comprehensive code review of the current branch by spawning parallel revie For each reviewable worktree, spawn Git agent: ``` -Task(subagent_type="Git", run_in_background=false): +Agent(subagent_type="Git", run_in_background=false): "OPERATION: ensure-pr-ready WORKTREE_PATH: {worktree_path} (omit if cwd) Validate branch, commit if needed, push, create PR if needed. @@ -109,7 +109,7 @@ Spawn Reviewer agents **in a single message**. Always run 7 core reviews; condit Each Reviewer invocation (all in one message, **NOT background**): ``` -Task(subagent_type="Reviewer", run_in_background=false): +Agent(subagent_type="Reviewer", run_in_background=false): "Review focusing on {focus}. Load the pattern skill for your focus from the Focus Areas table. Follow 6-step process from devflow:review-methodology. PR: #{pr_number}, Base: {base_branch} @@ -126,7 +126,7 @@ In multi-worktree mode, process worktrees **sequentially** (one worktree at a ti **Git Agent (PR Comments)** per worktree: ``` -Task(subagent_type="Git", run_in_background=false): +Agent(subagent_type="Git", run_in_background=false): "OPERATION: comment-pr WORKTREE_PATH: {worktree_path} (omit if cwd) Read reviews from {worktree_path}/.docs/reviews/{branch-slug}/{timestamp}/ @@ -139,7 +139,7 @@ Check for existing inline comments at same file:line before creating new ones to **Synthesizer Agent** per worktree: ``` -Task(subagent_type="Synthesizer", run_in_background=false): +Agent(subagent_type="Synthesizer", run_in_background=false): "Mode: review WORKTREE_PATH: {worktree_path} (omit if cwd) REVIEW_BASE_DIR: {worktree_path}/.docs/reviews/{branch-slug}/{timestamp} diff --git a/plugins/devflow-debug/commands/debug-teams.md b/plugins/devflow-debug/commands/debug-teams.md index efd67411..fe1ee166 100644 --- a/plugins/devflow-debug/commands/debug-teams.md +++ b/plugins/devflow-debug/commands/debug-teams.md @@ -32,7 +32,7 @@ Read `.memory/knowledge/decisions.md` and `.memory/knowledge/pitfalls.md`. Known If `$ARGUMENTS` starts with `#`, fetch the GitHub issue: ``` -Task(subagent_type="Git"): +Agent(subagent_type="Git"): "OPERATION: fetch-issue ISSUE: {issue number} Return issue title, body, labels, and any linked error logs." diff --git a/plugins/devflow-debug/commands/debug.md b/plugins/devflow-debug/commands/debug.md index ac3b65c5..f8890d3d 100644 --- a/plugins/devflow-debug/commands/debug.md +++ b/plugins/devflow-debug/commands/debug.md @@ -32,7 +32,7 @@ Read `.memory/knowledge/decisions.md` and `.memory/knowledge/pitfalls.md`. Known If `$ARGUMENTS` starts with `#`, fetch the GitHub issue: ``` -Task(subagent_type="Git"): +Agent(subagent_type="Git"): "OPERATION: fetch-issue ISSUE: {issue number} Return issue title, body, labels, and any linked error logs." @@ -48,7 +48,7 @@ Analyze the bug description (from arguments or issue) and identify 3-5 plausible Spawn one Explore agent per hypothesis in a **single message** (parallel execution): ``` -Task(subagent_type="Explore"): +Agent(subagent_type="Explore"): "Investigate this bug: {bug_description} Hypothesis: {hypothesis A description} @@ -67,7 +67,7 @@ Return a structured report: - Evidence AGAINST: [list with file:line refs] - Key finding: {one-sentence summary}" -Task(subagent_type="Explore"): +Agent(subagent_type="Explore"): "Investigate this bug: {bug_description} Hypothesis: {hypothesis B description} @@ -75,7 +75,7 @@ Focus area: {specific code area, mechanism, or condition} [same steps and return format]" -Task(subagent_type="Explore"): +Agent(subagent_type="Explore"): "Investigate this bug: {bug_description} Hypothesis: {hypothesis C description} @@ -91,7 +91,7 @@ Focus area: {specific code area, mechanism, or condition} Once all investigators return, spawn a Synthesizer agent to aggregate findings: ``` -Task(subagent_type="Synthesizer"): +Agent(subagent_type="Synthesizer"): "You are a root cause analyst. Synthesize these investigation reports: {paste all investigator reports} diff --git a/plugins/devflow-implement/commands/implement-teams.md b/plugins/devflow-implement/commands/implement-teams.md index 1a394a07..cc675fbb 100644 --- a/plugins/devflow-implement/commands/implement-teams.md +++ b/plugins/devflow-implement/commands/implement-teams.md @@ -30,7 +30,7 @@ Record the current branch name as `BASE_BRANCH` - this will be the PR target. Spawn Git agent to set up task environment. The Git agent derives the branch name automatically from the issue or task description: ``` -Task(subagent_type="Git"): +Agent(subagent_type="Git"): "OPERATION: setup-task BASE_BRANCH: {current branch name} ISSUE_INPUT: {issue number if $ARGUMENTS starts with #, otherwise omit} @@ -51,7 +51,7 @@ Return the branch setup summary." Spawn Skimmer agent for codebase overview: ``` -Task(subagent_type="Skimmer"): +Agent(subagent_type="Skimmer"): "Orient in codebase for: {task description} Run rskim on source directories (NOT repo root) to identify relevant files, functions, integration points" ``` @@ -156,7 +156,7 @@ Step 3: GATE — Verify TeamDelete succeeded You MUST spawn the Synthesizer agent. ``` -Task(subagent_type="Synthesizer"): +Agent(subagent_type="Synthesizer"): "Synthesize EXPLORATION outputs for: {task} Mode: exploration Explorer consensus: {team exploration consensus output} @@ -252,7 +252,7 @@ Step 3: GATE — Verify TeamDelete succeeded You MUST spawn the Synthesizer agent. ``` -Task(subagent_type="Synthesizer"): +Agent(subagent_type="Synthesizer"): "Synthesize PLANNING outputs for: {task} Mode: planning Planner consensus: {team planning consensus output} @@ -282,7 +282,7 @@ Based on Phase 6 synthesis, use the three-strategy framework: **SINGLE_CODER** (default): ``` -Task(subagent_type="Coder"): +Agent(subagent_type="Coder"): "TASK_ID: {task-id} TASK_DESCRIPTION: {description} BASE_BRANCH: {base branch} @@ -300,7 +300,7 @@ Spawn Coders one at a time, passing handoff summaries between phases: **Phase 1 Coder:** ``` -Task(subagent_type="Coder"): +Agent(subagent_type="Coder"): "TASK_ID: {task-id} TASK_DESCRIPTION: {phase 1 description} BASE_BRANCH: {base branch} @@ -313,7 +313,7 @@ HANDOFF_REQUIRED: true" **Phase 2+ Coders** (after prior phase completes): ``` -Task(subagent_type="Coder"): +Agent(subagent_type="Coder"): "TASK_ID: {task-id} TASK_DESCRIPTION: {phase N description} BASE_BRANCH: {base branch} @@ -335,7 +335,7 @@ HANDOFF_REQUIRED: {true if not last phase}" Spawn multiple Coders **in a single message**, each with independent subtask: ``` -Task(subagent_type="Coder"): # Coder 1 +Agent(subagent_type="Coder"): # Coder 1 "TASK_ID: {task-id}-part1 TASK_DESCRIPTION: {independent subtask 1} BASE_BRANCH: {base branch} @@ -344,7 +344,7 @@ PATTERNS: {patterns} CREATE_PR: false DOMAIN: {subtask 1 domain}" -Task(subagent_type="Coder"): # Coder 2 (same message) +Agent(subagent_type="Coder"): # Coder 2 (same message) "TASK_ID: {task-id}-part2 TASK_DESCRIPTION: {independent subtask 2} BASE_BRANCH: {base branch} @@ -365,7 +365,7 @@ DOMAIN: {subtask 2 domain}" After Coder completes, spawn Validator to verify correctness: ``` -Task(subagent_type="Validator", model="haiku"): +Agent(subagent_type="Validator", model="haiku"): "FILES_CHANGED: {list of files from Coder output} VALIDATION_SCOPE: full Run build, typecheck, lint, test. Report pass/fail with failure details." @@ -377,7 +377,7 @@ Run build, typecheck, lint, test. Report pass/fail with failure details." 3. If `validation_retry_count <= 2`: - Spawn Coder with fix context: ``` - Task(subagent_type="Coder"): + Agent(subagent_type="Coder"): "TASK_ID: {task-id} TASK_DESCRIPTION: Fix validation failures OPERATION: validation-fix @@ -395,7 +395,7 @@ Run build, typecheck, lint, test. Report pass/fail with failure details." After validation passes, spawn Simplifier to polish the code: ``` -Task(subagent_type="Simplifier"): +Agent(subagent_type="Simplifier"): "Simplify recently implemented code Task: {task description} FILES_CHANGED: {list of files from Coder output} @@ -407,7 +407,7 @@ Focus on code modified by Coder, apply project standards, enhance clarity" After Simplifier completes, spawn Scrutinizer as final quality gate: ``` -Task(subagent_type="Scrutinizer"): +Agent(subagent_type="Scrutinizer"): "TASK_DESCRIPTION: {task description} FILES_CHANGED: {list of files from Coder output} Evaluate 9 pillars, fix P0/P1 issues, report status" @@ -420,7 +420,7 @@ If Scrutinizer returns BLOCKED, report to user and halt. If Scrutinizer made code changes (status: FIXED), spawn Validator to verify: ``` -Task(subagent_type="Validator", model="haiku"): +Agent(subagent_type="Validator", model="haiku"): "FILES_CHANGED: {files modified by Scrutinizer} VALIDATION_SCOPE: changed-only Verify Scrutinizer's fixes didn't break anything." @@ -503,7 +503,7 @@ Step 3: GATE — Verify TeamDelete succeeded 3. If `alignment_fix_count <= 2`: - Spawn Coder to fix misalignments: ``` - Task(subagent_type="Coder"): + Agent(subagent_type="Coder"): "TASK_ID: {task-id} TASK_DESCRIPTION: Fix alignment issues OPERATION: alignment-fix @@ -513,7 +513,7 @@ Step 3: GATE — Verify TeamDelete succeeded ``` - Spawn Validator to verify fix didn't break tests: ``` - Task(subagent_type="Validator", model="haiku"): + Agent(subagent_type="Validator", model="haiku"): "FILES_CHANGED: {files modified by fix Coder} VALIDATION_SCOPE: changed-only" ``` @@ -526,7 +526,7 @@ Step 3: GATE — Verify TeamDelete succeeded After Evaluator passes, spawn Tester for scenario-based acceptance testing (standalone agent, not a teammate — testing is sequential, not debate): ``` -Task(subagent_type="Tester"): +Agent(subagent_type="Tester"): "ORIGINAL_REQUEST: {task description or issue content} EXECUTION_PLAN: {synthesized plan from Phase 6} FILES_CHANGED: {list of files from Coder output} @@ -542,7 +542,7 @@ Design and execute scenario-based acceptance tests. Report PASS or FAIL with evi 3. If `qa_retry_count <= 2`: - Spawn Coder to fix QA failures: ``` - Task(subagent_type="Coder"): + Agent(subagent_type="Coder"): "TASK_ID: {task-id} TASK_DESCRIPTION: Fix QA test failures OPERATION: qa-fix @@ -552,7 +552,7 @@ Design and execute scenario-based acceptance tests. Report PASS or FAIL with evi ``` - Spawn Validator to verify fix didn't break tests: ``` - Task(subagent_type="Validator", model="haiku"): + Agent(subagent_type="Validator", model="haiku"): "FILES_CHANGED: {files modified by fix Coder} VALIDATION_SCOPE: changed-only" ``` @@ -652,7 +652,7 @@ If the Coder's report includes Key Decisions with architectural significance: 6. **Clean handoffs** - Each phase passes structured data to next; sequential Coders pass implementation summaries 7. **Honest reporting** - Display agent outputs directly 8. **Simplification pass** - Code refined for clarity before PR -9. **Strict delegation** - Never perform agent work in main session. "Spawn X" means call Task tool with X, not do X's work yourself +9. **Strict delegation** - Never perform agent work in main session. "Spawn X" means call Agent tool with X, not do X's work yourself 10. **Validator owns validation** - Never run `npm test`, `npm run build`, or similar in main session; always delegate to Validator agent 11. **Coder owns fixes** - Never implement fixes in main session; spawn Coder for validation failures and alignment fixes 12. **Loop limits** - Max 2 validation retries, max 2 alignment fix iterations before escalating to user diff --git a/plugins/devflow-implement/commands/implement.md b/plugins/devflow-implement/commands/implement.md index 79a1634f..22b64d01 100644 --- a/plugins/devflow-implement/commands/implement.md +++ b/plugins/devflow-implement/commands/implement.md @@ -30,7 +30,7 @@ Record the current branch name as `BASE_BRANCH` - this will be the PR target. Spawn Git agent to set up task environment. The Git agent derives the branch name automatically from the issue or task description: ``` -Task(subagent_type="Git"): +Agent(subagent_type="Git"): "OPERATION: setup-task BASE_BRANCH: {current branch name} ISSUE_INPUT: {issue number if $ARGUMENTS starts with #, otherwise omit} @@ -51,7 +51,7 @@ Return the branch setup summary." Spawn Skimmer agent for codebase overview: ``` -Task(subagent_type="Skimmer"): +Agent(subagent_type="Skimmer"): "Orient in codebase for: {task description} Run rskim on source directories (NOT repo root) to identify relevant files, functions, integration points" ``` @@ -77,7 +77,7 @@ Track success/failure of each explorer for synthesis context. You MUST spawn the Synthesizer agent - "spawn Synthesizer" means delegate to the agent, not do the work yourself. ``` -Task(subagent_type="Synthesizer"): +Agent(subagent_type="Synthesizer"): "Synthesize EXPLORATION outputs for: {task} Mode: exploration Explorer outputs: {all 4 outputs} @@ -117,7 +117,7 @@ Spawn 3 Plan agents **in a single message**, each with exploration synthesis: You MUST spawn the Synthesizer agent - "spawn Synthesizer" means delegate to the agent, not do the work yourself. ``` -Task(subagent_type="Synthesizer"): +Agent(subagent_type="Synthesizer"): "Synthesize PLANNING outputs for: {task} Mode: planning Planner outputs: {all 3 outputs} @@ -147,7 +147,7 @@ Based on Phase 6 synthesis, use the three-strategy framework: **SINGLE_CODER** (default): ``` -Task(subagent_type="Coder"): +Agent(subagent_type="Coder"): "TASK_ID: {task-id} TASK_DESCRIPTION: {description} BASE_BRANCH: {base branch} @@ -165,7 +165,7 @@ Spawn Coders one at a time, passing handoff summaries between phases: **Phase 1 Coder:** ``` -Task(subagent_type="Coder"): +Agent(subagent_type="Coder"): "TASK_ID: {task-id} TASK_DESCRIPTION: {phase 1 description} BASE_BRANCH: {base branch} @@ -178,7 +178,7 @@ HANDOFF_REQUIRED: true" **Phase 2+ Coders** (after prior phase completes): ``` -Task(subagent_type="Coder"): +Agent(subagent_type="Coder"): "TASK_ID: {task-id} TASK_DESCRIPTION: {phase N description} BASE_BRANCH: {base branch} @@ -200,7 +200,7 @@ HANDOFF_REQUIRED: {true if not last phase}" Spawn multiple Coders **in a single message**, each with independent subtask: ``` -Task(subagent_type="Coder"): # Coder 1 +Agent(subagent_type="Coder"): # Coder 1 "TASK_ID: {task-id}-part1 TASK_DESCRIPTION: {independent subtask 1} BASE_BRANCH: {base branch} @@ -209,7 +209,7 @@ PATTERNS: {patterns} CREATE_PR: false DOMAIN: {subtask 1 domain}" -Task(subagent_type="Coder"): # Coder 2 (same message) +Agent(subagent_type="Coder"): # Coder 2 (same message) "TASK_ID: {task-id}-part2 TASK_DESCRIPTION: {independent subtask 2} BASE_BRANCH: {base branch} @@ -230,7 +230,7 @@ DOMAIN: {subtask 2 domain}" After Coder completes, spawn Validator to verify correctness: ``` -Task(subagent_type="Validator", model="haiku"): +Agent(subagent_type="Validator", model="haiku"): "FILES_CHANGED: {list of files from Coder output} VALIDATION_SCOPE: full Run build, typecheck, lint, test. Report pass/fail with failure details." @@ -242,7 +242,7 @@ Run build, typecheck, lint, test. Report pass/fail with failure details." 3. If `validation_retry_count <= 2`: - Spawn Coder with fix context: ``` - Task(subagent_type="Coder"): + Agent(subagent_type="Coder"): "TASK_ID: {task-id} TASK_DESCRIPTION: Fix validation failures OPERATION: validation-fix @@ -260,7 +260,7 @@ Run build, typecheck, lint, test. Report pass/fail with failure details." After validation passes, spawn Simplifier to polish the code: ``` -Task(subagent_type="Simplifier"): +Agent(subagent_type="Simplifier"): "Simplify recently implemented code Task: {task description} FILES_CHANGED: {list of files from Coder output} @@ -272,7 +272,7 @@ Focus on code modified by Coder, apply project standards, enhance clarity" After Simplifier completes, spawn Scrutinizer as final quality gate: ``` -Task(subagent_type="Scrutinizer"): +Agent(subagent_type="Scrutinizer"): "TASK_DESCRIPTION: {task description} FILES_CHANGED: {list of files from Coder output} Evaluate 9 pillars, fix P0/P1 issues, report status" @@ -285,7 +285,7 @@ If Scrutinizer returns BLOCKED, report to user and halt. If Scrutinizer made code changes (status: FIXED), spawn Validator to verify: ``` -Task(subagent_type="Validator", model="haiku"): +Agent(subagent_type="Validator", model="haiku"): "FILES_CHANGED: {files modified by Scrutinizer} VALIDATION_SCOPE: changed-only Verify Scrutinizer's fixes didn't break anything." @@ -300,7 +300,7 @@ Verify Scrutinizer's fixes didn't break anything." After Scrutinizer passes (and re-validation if needed), spawn Evaluator to validate alignment: ``` -Task(subagent_type="Evaluator"): +Agent(subagent_type="Evaluator"): "ORIGINAL_REQUEST: {task description or issue content} EXECUTION_PLAN: {synthesized plan from Phase 6} FILES_CHANGED: {list of files from Coder output} @@ -316,7 +316,7 @@ Validate alignment with request and plan. Report ALIGNED or MISALIGNED with deta 3. If `alignment_fix_count <= 2`: - Spawn Coder to fix misalignments: ``` - Task(subagent_type="Coder"): + Agent(subagent_type="Coder"): "TASK_ID: {task-id} TASK_DESCRIPTION: Fix alignment issues OPERATION: alignment-fix @@ -326,7 +326,7 @@ Validate alignment with request and plan. Report ALIGNED or MISALIGNED with deta ``` - Spawn Validator to verify fix didn't break tests: ``` - Task(subagent_type="Validator", model="haiku"): + Agent(subagent_type="Validator", model="haiku"): "FILES_CHANGED: {files modified by fix Coder} VALIDATION_SCOPE: changed-only" ``` @@ -339,7 +339,7 @@ Validate alignment with request and plan. Report ALIGNED or MISALIGNED with deta After Evaluator passes, spawn Tester for scenario-based acceptance testing: ``` -Task(subagent_type="Tester"): +Agent(subagent_type="Tester"): "ORIGINAL_REQUEST: {task description or issue content} EXECUTION_PLAN: {synthesized plan from Phase 6} FILES_CHANGED: {list of files from Coder output} @@ -355,7 +355,7 @@ Design and execute scenario-based acceptance tests. Report PASS or FAIL with evi 3. If `qa_retry_count <= 2`: - Spawn Coder to fix QA failures: ``` - Task(subagent_type="Coder"): + Agent(subagent_type="Coder"): "TASK_ID: {task-id} TASK_DESCRIPTION: Fix QA test failures OPERATION: qa-fix @@ -365,7 +365,7 @@ Design and execute scenario-based acceptance tests. Report PASS or FAIL with evi ``` - Spawn Validator to verify fix didn't break tests: ``` - Task(subagent_type="Validator", model="haiku"): + Agent(subagent_type="Validator", model="haiku"): "FILES_CHANGED: {files modified by fix Coder} VALIDATION_SCOPE: changed-only" ``` @@ -463,7 +463,7 @@ If the Coder's report includes Key Decisions with architectural significance: 5. **Clean handoffs** - Each phase passes structured data to next; sequential Coders pass implementation summaries 6. **Honest reporting** - Display agent outputs directly 7. **Simplification pass** - Code refined for clarity before PR -8. **Strict delegation** - Never perform agent work in main session. "Spawn X" means call Task tool with X, not do X's work yourself +8. **Strict delegation** - Never perform agent work in main session. "Spawn X" means call Agent tool with X, not do X's work yourself 9. **Validator owns validation** - Never run `npm test`, `npm run build`, or similar in main session; always delegate to Validator agent 10. **Coder owns fixes** - Never implement fixes in main session; spawn Coder for validation failures and alignment fixes 11. **Loop limits** - Max 2 validation retries, max 2 alignment fix iterations before escalating to user diff --git a/plugins/devflow-resolve/commands/resolve-teams.md b/plugins/devflow-resolve/commands/resolve-teams.md index c077c735..ba1ddb8d 100644 --- a/plugins/devflow-resolve/commands/resolve-teams.md +++ b/plugins/devflow-resolve/commands/resolve-teams.md @@ -35,7 +35,7 @@ Process issues from code review reports: validate them (false positive check), a For each resolvable worktree, spawn Git agent: ``` -Task(subagent_type="Git", run_in_background=false): +Agent(subagent_type="Git", run_in_background=false): "OPERATION: validate-branch WORKTREE_PATH: {worktree_path} (omit if cwd) Check feature branch, clean working directory, reviews exist. @@ -195,7 +195,7 @@ For each issue deferred as TECH_DEBT: If any fixes were made, spawn Simplifier agent to refine the changed code: ``` -Task(subagent_type="Simplifier", run_in_background=false): +Agent(subagent_type="Simplifier", run_in_background=false): "TASK_DESCRIPTION: Issue resolution fixes WORKTREE_PATH: {worktree_path} (omit if cwd) FILES_CHANGED: {list of files modified by Resolvers} @@ -209,7 +209,7 @@ Simplify and refine the fixes for clarity and consistency" If any issues were deferred, spawn Git agent: ``` -Task(subagent_type="Git"): +Agent(subagent_type="Git"): "OPERATION: manage-debt WORKTREE_PATH: {worktree_path} (omit if cwd) REVIEW_DIR: {TARGET_DIR} diff --git a/plugins/devflow-resolve/commands/resolve.md b/plugins/devflow-resolve/commands/resolve.md index dde82592..666e5606 100644 --- a/plugins/devflow-resolve/commands/resolve.md +++ b/plugins/devflow-resolve/commands/resolve.md @@ -35,7 +35,7 @@ Process issues from code review reports: validate them (false positive check), a For each resolvable worktree, spawn Git agent: ``` -Task(subagent_type="Git", run_in_background=false): +Agent(subagent_type="Git", run_in_background=false): "OPERATION: validate-branch WORKTREE_PATH: {worktree_path} (omit if cwd) Check feature branch, clean working directory, reviews exist. @@ -104,7 +104,7 @@ Create execution plan: Spawn Resolver agents based on dependency analysis. For independent batches, spawn **in a single message**: ``` -Task(subagent_type="Resolver"): +Agent(subagent_type="Resolver"): "ISSUES: [{issue1}, {issue2}, ...] BRANCH: {branch-slug} BATCH_ID: batch-{n} @@ -141,7 +141,7 @@ For each issue deferred as TECH_DEBT: If any fixes were made, spawn Simplifier agent to refine the changed code: ``` -Task(subagent_type="Simplifier", run_in_background=false): +Agent(subagent_type="Simplifier", run_in_background=false): "TASK_DESCRIPTION: Issue resolution fixes WORKTREE_PATH: {worktree_path} (omit if cwd) FILES_CHANGED: {list of files modified by Resolvers} @@ -155,7 +155,7 @@ Simplify and refine the fixes for clarity and consistency" If any issues were deferred, spawn Git agent: ``` -Task(subagent_type="Git"): +Agent(subagent_type="Git"): "OPERATION: manage-debt WORKTREE_PATH: {worktree_path} (omit if cwd) REVIEW_DIR: {TARGET_DIR} diff --git a/plugins/devflow-self-review/commands/self-review.md b/plugins/devflow-self-review/commands/self-review.md index de4ba583..29a306c7 100644 --- a/plugins/devflow-self-review/commands/self-review.md +++ b/plugins/devflow-self-review/commands/self-review.md @@ -29,7 +29,7 @@ Detect changed files and build context: Spawn Simplifier agent to refine code for clarity and consistency: -Task(subagent_type="Simplifier", run_in_background=false): +Agent(subagent_type="Simplifier", run_in_background=false): "TASK_DESCRIPTION: {task_description} FILES_CHANGED: {files_changed} KNOWLEDGE_CONTEXT: {knowledge_context or 'None'} @@ -42,7 +42,7 @@ If knowledge context is provided, verify no known pitfall patterns are being rei Spawn Scrutinizer agent for quality evaluation and fixing: -Task(subagent_type="Scrutinizer", run_in_background=false): +Agent(subagent_type="Scrutinizer", run_in_background=false): "TASK_DESCRIPTION: {task_description} FILES_CHANGED: {files_changed} KNOWLEDGE_CONTEXT: {knowledge_context or 'None'} @@ -55,7 +55,7 @@ If knowledge context is provided, check whether any known pitfall patterns are b If Scrutinizer made changes (STATUS == FIXED): -Task(subagent_type="Validator", run_in_background=false): +Agent(subagent_type="Validator", run_in_background=false): "FILES_CHANGED: {scrutinizer_modified_files} VALIDATION_SCOPE: changed-only Run build, typecheck, lint, test on modified files" diff --git a/plugins/devflow-specify/commands/specify-teams.md b/plugins/devflow-specify/commands/specify-teams.md index 0b03b8ed..7638999f 100644 --- a/plugins/devflow-specify/commands/specify-teams.md +++ b/plugins/devflow-specify/commands/specify-teams.md @@ -51,7 +51,7 @@ Use AskUserQuestion to confirm understanding before spawning any agents. Spawn Skimmer agent for codebase context: ``` -Task(subagent_type="Skimmer"): +Agent(subagent_type="Skimmer"): "Orient in codebase for requirements exploration: {feature} Run rskim on source directories (NOT repo root) to find: project structure, similar features, patterns, integration points Return: codebase context for requirements (not implementation details)" @@ -148,7 +148,7 @@ Step 3: GATE — Verify TeamDelete succeeded **WAIT** for Phase 4, then spawn Synthesizer: ``` -Task(subagent_type="Synthesizer"): +Agent(subagent_type="Synthesizer"): "Synthesize EXPLORATION outputs for: {feature} Mode: exploration Explorer consensus: {team exploration consensus output} @@ -230,7 +230,7 @@ Step 3: GATE — Verify TeamDelete succeeded **WAIT** for Phase 6, then spawn Synthesizer: ``` -Task(subagent_type="Synthesizer"): +Agent(subagent_type="Synthesizer"): "Synthesize PLANNING outputs for: {feature} Mode: planning Planner consensus: {team planning consensus output} diff --git a/plugins/devflow-specify/commands/specify.md b/plugins/devflow-specify/commands/specify.md index 5154e4cf..83568845 100644 --- a/plugins/devflow-specify/commands/specify.md +++ b/plugins/devflow-specify/commands/specify.md @@ -51,7 +51,7 @@ Use AskUserQuestion to confirm understanding before spawning any agents. Spawn Skimmer agent for codebase context: ``` -Task(subagent_type="Skimmer"): +Agent(subagent_type="Skimmer"): "Orient in codebase for requirements exploration: {feature} Run rskim on source directories (NOT repo root) to find: project structure, similar features, patterns, integration points Return: codebase context for requirements (not implementation details)" @@ -77,7 +77,7 @@ Spawn 4 Explore agents **in a single message**, each with Skimmer context and pr **WAIT** for Phase 4, then spawn Synthesizer: ``` -Task(subagent_type="Synthesizer"): +Agent(subagent_type="Synthesizer"): "Synthesize EXPLORATION outputs for: {feature} Mode: exploration Combine into: user needs, similar features, constraints, failure modes" @@ -98,7 +98,7 @@ Spawn 3 Plan agents **in a single message**, each with exploration synthesis: **WAIT** for Phase 6, then spawn Synthesizer: ``` -Task(subagent_type="Synthesizer"): +Agent(subagent_type="Synthesizer"): "Synthesize PLANNING outputs for: {feature} Mode: planning Combine into: user stories, scope breakdown, acceptance criteria, open questions" diff --git a/scripts/hooks/preamble b/scripts/hooks/preamble index a5a9ae07..1b58f7c1 100755 --- a/scripts/hooks/preamble +++ b/scripts/hooks/preamble @@ -31,13 +31,8 @@ if [ "$WORD_COUNT" -lt 2 ]; then exit 0 fi -# Detection-only preamble — classification rules and router skill reference. -# Skill mappings live in devflow:router SKILL.md, not here. +# Minimal preamble — classification rules injected at SessionStart, not here. # SYNC: must match tests/ambient.test.ts preamble drift detection -PREAMBLE="AMBIENT MODE ENABLED: Classify user intent and depth. -Intents: CHAT (greetings/confirmations), EXPLORE (find/explain/analyze/trace/map), PLAN (plan/design/architecture), IMPLEMENT (add/create/build/implement), REVIEW (check/review), RESOLVE (resolve review issues), DEBUG (fix/bug/error), PIPELINE (end-to-end). -Depth: QUICK (chat, simple lookups, git ops, config, rename/comment tweaks, 1-2 line edits) | GUIDED (code changes ≤2 files, clear bugs, focused reviews, focused exploration, focused design/plan) | ORCHESTRATED (>2 files, multi-module, vague bugs, full/branch/PR reviews, deep exploration, system-level design, RESOLVE and PIPELINE always). -QUICK: respond normally. No classification, no skills. -GUIDED/ORCHESTRATED: Load devflow:router skill FIRST via Skill tool for skill mappings. Then load all skills it specifies. State: Devflow: INTENT/DEPTH. Loading: [skills]." +PREAMBLE="Classify this request's intent and depth, then load devflow:router via Skill tool." json_prompt_output "$PREAMBLE" diff --git a/scripts/hooks/session-start-classification b/scripts/hooks/session-start-classification new file mode 100755 index 00000000..f498f606 --- /dev/null +++ b/scripts/hooks/session-start-classification @@ -0,0 +1,30 @@ +#!/bin/bash + +# SessionStart Classification Hook +# Reads classification-rules.md and injects as additionalContext. +# Single source of truth for ambient classification rules. + +set -e + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +source "$SCRIPT_DIR/json-parse" +if [ "$_JSON_AVAILABLE" = "false" ]; then exit 0; fi + +INPUT=$(cat) + +CWD=$(echo "$INPUT" | json_field "cwd" "") +if [ -z "$CWD" ]; then exit 0; fi + +CLASSIFICATION_RULES="$HOME/.claude/skills/devflow:router/references/classification-rules.md" +if [ -f "$CLASSIFICATION_RULES" ]; then + CONTEXT=$(cat "$CLASSIFICATION_RULES") +elif [ -f "$HOME/.claude/skills/devflow:router/SKILL.md" ]; then + # Fallback for upgrade window: old install without classification-rules.md + CONTEXT=$(awk '/^---$/{n++; next} n>=2' "$HOME/.claude/skills/devflow:router/SKILL.md") +else + exit 0 +fi + +if [ "${#CONTEXT}" -gt 4096 ]; then exit 0; fi + +json_session_output "$CONTEXT" diff --git a/shared/skills/debug:orch/SKILL.md b/shared/skills/debug:orch/SKILL.md index 21e57372..e0b8055f 100644 --- a/shared/skills/debug:orch/SKILL.md +++ b/shared/skills/debug:orch/SKILL.md @@ -2,7 +2,6 @@ name: debug:orch description: Agent orchestration for DEBUG intent — hypothesis investigation, root cause analysis, optional fix user-invocable: false -allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion --- # Debug Orchestration @@ -37,7 +36,7 @@ If fewer than 3 hypotheses are possible, proceed with 2. ## Phase 2: Investigate (Parallel) -Spawn one `Task(subagent_type="Explore")` per hypothesis **in a single message** (parallel execution): +Spawn one `Agent(subagent_type="Explore")` per hypothesis **in a single message** (parallel execution): - Each investigator searches for evidence FOR and AGAINST its hypothesis - Must provide file:line references for all evidence @@ -47,7 +46,7 @@ Spawn one `Task(subagent_type="Explore")` per hypothesis **in a single message** Evaluate investigation results: -- **One CONFIRMED**: Spawn 1-2 additional `Task(subagent_type="Explore")` agents to validate from different angles (prevent confirmation bias) +- **One CONFIRMED**: Spawn 1-2 additional `Agent(subagent_type="Explore")` agents to validate from different angles (prevent confirmation bias) - **Multiple PARTIAL**: Look for a unifying root cause that explains all partial evidence - **All DISPROVED**: Report honestly — "No root cause identified from initial hypotheses." Generate 2-3 second-round hypotheses if conversation context suggests avenues not yet explored. @@ -64,7 +63,7 @@ Present root cause analysis: Ask user via AskUserQuestion: "Want me to implement this fix?" -- **YES** → Implement the fix directly in main session using GUIDED approach: load devflow:patterns, devflow:research, and devflow:test-driven-development skills, then code the fix. Spawn `Task(subagent_type="Simplifier")` on changed files after. +- **YES** → Implement the fix directly in main session using GUIDED approach: load devflow:patterns, devflow:research, and devflow:test-driven-development skills, then code the fix. Spawn `Agent(subagent_type="Simplifier")` on changed files after. - **NO** → Done. Report stands as documentation. ## Error Handling diff --git a/shared/skills/explore:orch/SKILL.md b/shared/skills/explore:orch/SKILL.md index d00fd3f9..cf4003fe 100644 --- a/shared/skills/explore:orch/SKILL.md +++ b/shared/skills/explore:orch/SKILL.md @@ -2,7 +2,6 @@ name: explore:orch description: Agent orchestration for EXPLORE intent — codebase analysis, flow tracing, architecture mapping user-invocable: false -allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion --- # Explore Orchestration @@ -23,7 +22,7 @@ Agent pipeline for EXPLORE intent in ambient GUIDED and ORCHESTRATED modes. Code For GUIDED depth, the main session performs exploration directly: -1. **Spawn Skimmer** — `Task(subagent_type="Skimmer")` targeting the area of interest. Use orientation output to ground exploration in real file structures and patterns. +1. **Spawn Skimmer** — `Agent(subagent_type="Skimmer")` targeting the area of interest. Use orientation output to ground exploration in real file structures and patterns. 2. **Trace** — Using Skimmer findings, trace the flow or analyze the subsystem directly in main session. Follow call chains, read key files, map integration points. 3. **Present** — Deliver structured findings using the Output format below. Use AskUserQuestion to offer drill-down into specific areas. @@ -31,7 +30,7 @@ For GUIDED depth, the main session performs exploration directly: ### Phase 1: Orient -Spawn `Task(subagent_type="Skimmer")` to get codebase overview relevant to the exploration question: +Spawn `Agent(subagent_type="Skimmer")` to get codebase overview relevant to the exploration question: - File structure and module boundaries in the target area - Entry points and key abstractions @@ -39,7 +38,7 @@ Spawn `Task(subagent_type="Skimmer")` to get codebase overview relevant to the e ### Phase 2: Explore -Based on Skimmer findings, spawn 2-3 `Task(subagent_type="Explore")` agents **in a single message** (parallel execution): +Based on Skimmer findings, spawn 2-3 `Agent(subagent_type="Explore")` agents **in a single message** (parallel execution): - **Flow explorer**: Trace the primary call chain end-to-end — entry point through to side effects - **Dependency explorer**: Map imports, shared types, module boundaries, and integration points @@ -49,7 +48,7 @@ Adjust explorer focus based on the specific exploration question. ### Phase 3: Synthesize -Spawn `Task(subagent_type="Synthesizer")` in `exploration` mode with combined findings: +Spawn `Agent(subagent_type="Synthesizer")` in `exploration` mode with combined findings: - Merge overlapping discoveries from parallel explorers - Resolve any contradictions between explorer findings diff --git a/shared/skills/implement:orch/SKILL.md b/shared/skills/implement:orch/SKILL.md index 394ab020..22e9ef39 100644 --- a/shared/skills/implement:orch/SKILL.md +++ b/shared/skills/implement:orch/SKILL.md @@ -2,7 +2,6 @@ name: implement:orch description: Agent orchestration for IMPLEMENT intent — pre-flight, Coder, quality gates user-invocable: false -allowed-tools: Read, Grep, Glob, Bash, Task --- # Implementation Orchestration @@ -29,7 +28,7 @@ Detect branch type before spawning Coder: - **Protected branches** (`main`, `master`, `develop`, `integration`, `trunk`, `release/*`, `staging`, `production`): record current branch as `BASE_BRANCH`, then spawn Git agent to auto-create a feature branch: ``` -Task(subagent_type="Git"): +Agent(subagent_type="Git"): "OPERATION: setup-task BASE_BRANCH: {current branch name} ISSUE_INPUT: {issue number if ticket mentioned in conversation, otherwise omit} @@ -57,7 +56,7 @@ If the orchestrator receives a `WORKTREE_PATH` context (e.g., from multi-worktre Record git SHA before first Coder: `git rev-parse HEAD` -Spawn `Task(subagent_type="Coder")` with input variables: +Spawn `Agent(subagent_type="Coder")` with input variables: - **TASK_ID**: Generated from timestamp (e.g., `task-2026-03-19_1430`) - **TASK_DESCRIPTION**: From conversation context - **BASE_BRANCH**: Current branch (or newly created branch from Phase 1) @@ -88,12 +87,12 @@ Pass FILES_CHANGED to all quality gate agents. Run sequentially — each gate must pass before the next: -1. `Task(subagent_type="Validator")` (build + typecheck + lint + tests) — retry up to 2× on failure (Coder fixes between retries) -2. `Task(subagent_type="Simplifier")` — code clarity and maintainability pass on FILES_CHANGED -3. `Task(subagent_type="Scrutinizer")` — 9-pillar quality evaluation on FILES_CHANGED -4. `Task(subagent_type="Validator")` (re-validate after Simplifier/Scrutinizer changes) -5. `Task(subagent_type="Evaluator")` — verify implementation matches original request — retry up to 2× if misalignment found -6. `Task(subagent_type="Tester")` — scenario-based acceptance testing from user's perspective — retry up to 2× if QA fails +1. `Agent(subagent_type="Validator")` (build + typecheck + lint + tests) — retry up to 2× on failure (Coder fixes between retries) +2. `Agent(subagent_type="Simplifier")` — code clarity and maintainability pass on FILES_CHANGED +3. `Agent(subagent_type="Scrutinizer")` — 9-pillar quality evaluation on FILES_CHANGED +4. `Agent(subagent_type="Validator")` (re-validate after Simplifier/Scrutinizer changes) +5. `Agent(subagent_type="Evaluator")` — verify implementation matches original request — retry up to 2× if misalignment found +6. `Agent(subagent_type="Tester")` — scenario-based acceptance testing from user's perspective — retry up to 2× if QA fails If any gate exhausts retries, halt pipeline and report what passed and what failed. diff --git a/shared/skills/pipeline:orch/SKILL.md b/shared/skills/pipeline:orch/SKILL.md index c54c54db..5fdac854 100644 --- a/shared/skills/pipeline:orch/SKILL.md +++ b/shared/skills/pipeline:orch/SKILL.md @@ -1,20 +1,19 @@ --- name: pipeline:orch -description: End-to-end meta-orchestrator chaining implement → review → resolve with user gates between stages +description: End-to-end meta-orchestrator chaining implement → review → resolve with status reporting between stages user-invocable: false -allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion --- # Pipeline Orchestration -Meta-orchestrator chaining implement → review → resolve with user gates between stages. For ambient PIPELINE intent ("implement this end to end", "build and review"). +Meta-orchestrator chaining implement → review → resolve with status reporting between stages. For ambient PIPELINE intent ("implement this end to end", "build and review"). ## Iron Law -> **USER GATES BETWEEN STAGES** +> **FULL PIPELINE, NO INTERRUPTIONS** > -> Never auto-chain from review to resolve without user confirmation. -> Critical findings require human judgment. Each gate is mandatory. +> Pipeline runs end-to-end without pausing. Report status between stages +> but never stop to ask. Each stage auto-proceeds to the next. --- @@ -31,13 +30,12 @@ If implementation returns **BLOCKED**: halt entire pipeline, report blocker. Cleanup: delete `.docs/handoff.md` if it exists (no longer needed before review). -## Phase 2: Gate — Review Decision +## Phase 2: Status — Review Decision -Use AskUserQuestion: -> "Implementation complete ({n} files changed, all quality gates passed). Proceed with multi-agent review? (This spawns 7+ reviewer agents)" +Log implementation results: +> "Implementation complete ({n} files changed, all quality gates passed). Proceeding to multi-agent review." -- **User says NO** → stop pipeline, report implementation results only -- **User says YES** → continue to Phase 3 +Auto-proceed to Phase 3. ## Phase 3: Review @@ -45,16 +43,15 @@ Load `devflow:review:orch` via the Skill tool, then execute its full pipeline (P Report review results (merge recommendation, issue counts). -## Phase 4: Gate — Resolve Decision +## Phase 4: Status — Resolve Decision If **blocking issues found**: -> Use AskUserQuestion: "Found {n} blocking issues. Auto-resolve? (Spawns resolver agents per batch)" +> Log: "Found {n} blocking issues. Auto-resolving." -If **no blocking issues**: -> "Review clean — no resolution needed." → stop pipeline with success summary +Auto-proceed to Phase 5. -- **User says NO** → stop pipeline, report implementation + review results -- **User says YES** → continue to Phase 5 +If **no blocking issues**: +> "Review clean — no resolution needed." → skip to Phase 6 with success summary. ## Phase 5: Resolve @@ -71,7 +68,6 @@ End-to-end report: ## Error Handling - **Implementation BLOCKED**: Halt at Phase 1, report blocker -- **User declines gate**: Stop cleanly, report completed stages - **Review finds no changes**: Skip review, report implementation only - **All issues resolved**: Report full success - **Partial resolution**: Report what was fixed and what remains diff --git a/shared/skills/plan:orch/SKILL.md b/shared/skills/plan:orch/SKILL.md index 1ae8ae49..0741667c 100644 --- a/shared/skills/plan:orch/SKILL.md +++ b/shared/skills/plan:orch/SKILL.md @@ -2,7 +2,6 @@ name: plan:orch description: Agent orchestration for PLAN intent — codebase orientation, design exploration, gap validation user-invocable: false -allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion --- # Plan Orchestration @@ -25,7 +24,7 @@ This is a lightweight variant of the Plan phase in `/implement` for ambient ORCH For GUIDED depth, the main session performs planning directly: -1. **Spawn Skimmer** — `Task(subagent_type="Skimmer")` targeting the area of interest. Use orientation output to ground design decisions in real file structures and patterns. +1. **Spawn Skimmer** — `Agent(subagent_type="Skimmer")` targeting the area of interest. Use orientation output to ground design decisions in real file structures and patterns. 2. **Design** — Using Skimmer findings + loaded pattern/design skills, design the approach directly in main session. 3. **Present** — Deliver structured plan using the Output format below. Use AskUserQuestion for ambiguous design choices. @@ -35,7 +34,7 @@ If the orchestrator receives a `WORKTREE_PATH` context (e.g., from multi-worktre ## Phase 1: Orient -Spawn `Task(subagent_type="Skimmer")` to get codebase overview relevant to the planning question: +Spawn `Agent(subagent_type="Skimmer")` to get codebase overview relevant to the planning question: - Existing patterns and conventions in the affected area - File structure and module boundaries @@ -44,7 +43,7 @@ Spawn `Task(subagent_type="Skimmer")` to get codebase overview relevant to the p ## Phase 2: Explore -Based on Skimmer findings, spawn 2-3 `Task(subagent_type="Explore")` agents **in a single message** (parallel execution): +Based on Skimmer findings, spawn 2-3 `Agent(subagent_type="Explore")` agents **in a single message** (parallel execution): - **Integration explorer**: Examine integration points — APIs, shared types, module boundaries the plan must respect - **Pattern explorer**: Find existing implementations of similar features to follow as templates @@ -54,7 +53,7 @@ Adjust explorer focus based on the specific planning question. ## Phase 3: Design -Spawn `Task(subagent_type="Plan")` with combined Skimmer + Explore findings: +Spawn `Agent(subagent_type="Plan")` with combined Skimmer + Explore findings: - Design implementation approach with file-level specificity - Reference existing patterns discovered in Phase 1-2 diff --git a/shared/skills/research/SKILL.md b/shared/skills/research/SKILL.md index de87a17e..47e4db36 100644 --- a/shared/skills/research/SKILL.md +++ b/shared/skills/research/SKILL.md @@ -59,7 +59,7 @@ Delegate research to an Explore subagent to keep main session context clean. **Spawn an Explore agent** with this prompt template: ``` -Task(subagent_type="Explore"): +Agent(subagent_type="Explore"): "Research existing solutions for: {need description} Search for: diff --git a/shared/skills/resolve:orch/SKILL.md b/shared/skills/resolve:orch/SKILL.md index cd963a58..4c42a5ef 100644 --- a/shared/skills/resolve:orch/SKILL.md +++ b/shared/skills/resolve:orch/SKILL.md @@ -2,7 +2,6 @@ name: resolve:orch description: Agent orchestration for RESOLVE intent in ambient mode — issue resolution from review reports user-invocable: false -allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion --- # Resolve Orchestration @@ -52,7 +51,7 @@ Determine execution: batches with no shared files can run in parallel. ## Phase 4: Resolve (Parallel) -Spawn `Task(subagent_type="Resolver")` agents — one per batch, parallel where possible. +Spawn `Agent(subagent_type="Resolver")` agents — one per batch, parallel where possible. Each receives: - **ISSUES**: Array of issues in the batch @@ -69,7 +68,7 @@ Resolvers follow a 3-tier risk approach: Aggregate results from all Resolver agents: - Count: fixed, false positives, deferred -Spawn `Task(subagent_type="Simplifier")` on all files modified by Resolvers. +Spawn `Agent(subagent_type="Simplifier")` on all files modified by Resolvers. ## Phase 6: Report diff --git a/shared/skills/review:orch/SKILL.md b/shared/skills/review:orch/SKILL.md index 1068944c..8292175b 100644 --- a/shared/skills/review:orch/SKILL.md +++ b/shared/skills/review:orch/SKILL.md @@ -2,7 +2,6 @@ name: review:orch description: Agent orchestration for REVIEW intent in ambient ORCHESTRATED mode — multi-agent code review with parallel reviewers user-invocable: false -allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion --- # Review Orchestration @@ -22,7 +21,7 @@ This is a lightweight variant of `/code-review` for ambient ORCHESTRATED mode. E ## Phase 1: Pre-flight -Spawn `Task(subagent_type="Git")` with action `ensure-pr-ready`: +Spawn `Agent(subagent_type="Git")` with action `ensure-pr-ready`: - Extract: branch, base_branch, branch_slug, pr_number - If BLOCKED (detached HEAD, no commits ahead of base): halt with message @@ -78,8 +77,8 @@ Each reviewer receives: After all reviewers complete, spawn in parallel: -1. `Task(subagent_type="Git")` with action `comment-pr` — post review summary as PR comment (deduplicate: check existing comments first) -2. `Task(subagent_type="Synthesizer")` in review mode — reads all `{focus}.md` files from disk, writes `review-summary.md` +1. `Agent(subagent_type="Git")` with action `comment-pr` — post review summary as PR comment (deduplicate: check existing comments first) +2. `Agent(subagent_type="Synthesizer")` in review mode — reads all `{focus}.md` files from disk, writes `review-summary.md` ## Phase 6: Finalize diff --git a/shared/skills/router/SKILL.md b/shared/skills/router/SKILL.md index 8bb04d54..86300862 100644 --- a/shared/skills/router/SKILL.md +++ b/shared/skills/router/SKILL.md @@ -1,146 +1,51 @@ --- name: router -description: This skill should be used when classifying user intent for Devflow mode, auto-loading relevant skills without explicit command invocation. Used by the always-on UserPromptSubmit hook. +description: This skill should be used after ambient classification to load the relevant skills for the classified intent and depth. Maps GUIDED and ORCHESTRATED classifications to domain and orchestration skills. user-invocable: false -# No allowed-tools: orchestrator requires unrestricted access (Skill, Agent, Edit, Write, Bash) --- # Router -Classify user intent and auto-load relevant skills. Zero overhead for simple requests, skill loading + optional agent orchestration for substantive work. - -**Note:** The UserPromptSubmit hook injects a detection-only preamble (classification rules only). This SKILL.md contains the full skill mappings — load it via Skill tool for complete routing logic. - -## Iron Law - -> **PROPORTIONAL RESPONSE MATCHED TO SCOPE** -> -> QUICK gets zero overhead. GUIDED gets skill loading + main session implementation -> with Simplifier cleanup. ORCHESTRATED gets full skill loading via the Skill tool plus -> agent pipeline execution. Misclassification in either direction is a failure — -> false-positive ORCHESTRATED is expensive (5-6 agent spawns), false-negative -> GUIDED leaves quality on the table. - ---- - -## Step 1: Classify Intent - -Determine what the user is trying to do from their prompt. - -| Intent | Signal Words / Patterns | -|--------|------------------------| -| **CHAT** | greetings, meta-questions, confirmations, short responses | -| **EXPLORE** | "what is", "where is", "find", "show me", "explain", "how does" | -| **PLAN** | "how should", "design", "architecture", "approach", "strategy" | -| **IMPLEMENT** | "add", "create", "implement", "build", "write", "make" | -| **REVIEW** | "check", "look at", "review", "is this ok", "any issues" | -| **RESOLVE** | "resolve", "fix review issues", "address feedback", "fix findings" | -| **DEBUG** | "fix", "bug", "broken", "failing", "error", "why does" | -| **PIPELINE** | "end to end", "implement and review", "build and review", "full pipeline" | - -**Ambiguous prompts:** "Update the README" → QUICK. Git operations like "commit this" → QUICK. Code-change prompts without clear scope → GUIDED (not QUICK). - -## Step 2: Classify Depth - -Determine how much enforcement the prompt warrants. - -| Depth | Criteria | Action | -|-------|----------|--------| -| **QUICK** | CHAT intent. EXPLORE simple lookups ("where is X?"). Git/devops operations (commit, push, merge, branch, pr, deploy, reinstall). Single-word continuations. Rename/comment tweaks, config changes. 1-2 line edits. | Respond normally. Zero overhead. Do not state classification. | -| **GUIDED** | IMPLEMENT with small scope (≤2 files, single module). DEBUG with clear error location (stack trace, specific file, known function). PLAN for focused design questions (specific area/pattern). REVIEW (small scope — see below). | Load skills via Skill tool. Main session implements directly. Spawn Simplifier after code changes. State classification. | -| **ORCHESTRATED** | IMPLEMENT with larger scope (>2 files, multi-module, complex). DEBUG with vague/cross-cutting bug (no clear location, multiple possible causes). PLAN for system-level architecture (caching layer, auth system, multi-module design). REVIEW (large scope — see below). RESOLVE (always). PIPELINE (always). | Load skills via Skill tool, then orchestrate agents. State classification. | - -**Scope-based decision criteria:** - -| Intent | GUIDED (small scope) | ORCHESTRATED (large scope) | -|--------|---------------------|---------------------------| -| **IMPLEMENT** | ≤2 files, single module, clear task | >2 files, multi-module, complex | -| **DEBUG** | Clear error with known location (stack trace, specific file) | Vague/cross-cutting bug, multiple possible causes | -| **PLAN** | Focused question about specific area/pattern | System-level architecture, multi-module design | -| **EXPLORE** | Focused flow/module analysis, single subsystem | Multi-system architecture mapping, cross-cutting analysis | -| **REVIEW** | Continuation: match prior IMPLEMENT depth. Standalone: "check this"/"review this file" → GUIDED | Continuation: match prior IMPLEMENT depth. Standalone: "full review"/"branch review"/"PR review" → ORCHESTRATED | -| **RESOLVE** | — | Always ORCHESTRATED | -| **PIPELINE** | — | Always ORCHESTRATED | - -**Classification conservatism:** When choosing between GUIDED and ORCHESTRATED, prefer GUIDED — escalate only when scope clearly exceeds main-session capacity. When choosing between QUICK and GUIDED, prefer GUIDED if the prompt involves code changes (implement, debug, fix, add, create code) or asks for analysis/explanation of a subsystem. Reserve QUICK for truly zero-overhead prompts: chat, simple lookups, git ops, config changes, trivial edits. - -## Step 3: Select Skills - -Based on classified intent and depth, invoke each selected skill using the Skill tool. - -### GUIDED-depth skills - -| Intent | Primary Skills | Secondary (if file type matches) | -|--------|---------------|----------------------------------| -| **IMPLEMENT** | devflow:test-driven-development, devflow:patterns, devflow:research | devflow:typescript (.ts), devflow:react (.tsx/.jsx), devflow:go (.go), devflow:java (.java), devflow:python (.py), devflow:rust (.rs), devflow:ui-design (CSS/UI), devflow:boundary-validation (forms/API), devflow:security (auth/crypto) | -| **EXPLORE** | devflow:explore:orch | — | -| **DEBUG** | devflow:test-driven-development, devflow:software-design, devflow:testing | devflow:git (if git operations involved) | -| **PLAN** | devflow:test-driven-development, devflow:plan:orch, devflow:patterns, devflow:software-design | — | -| **REVIEW** | devflow:quality-gates, devflow:software-design | devflow:testing | - -### ORCHESTRATED-depth skills - -| Intent | Primary Skills | Secondary (if file type matches) | -|--------|---------------|----------------------------------| -| **IMPLEMENT** | devflow:implement:orch, devflow:patterns | devflow:typescript (.ts), devflow:react (.tsx/.jsx), devflow:go (.go), devflow:java (.java), devflow:python (.py), devflow:rust (.rs), devflow:ui-design (CSS/UI), devflow:boundary-validation (forms/API), devflow:security (auth/crypto) | -| **EXPLORE** | devflow:explore:orch | — | -| **DEBUG** | devflow:debug:orch, devflow:test-driven-development, devflow:software-design | devflow:git (if git operations involved) | -| **PLAN** | devflow:plan:orch, devflow:test-driven-development, devflow:patterns, devflow:software-design | — | -| **REVIEW** | devflow:review:orch | — (reviewers load their own pattern skills) | -| **RESOLVE** | devflow:resolve:orch, devflow:test-driven-development, devflow:software-design | — | -| **PIPELINE** | devflow:pipeline:orch, devflow:patterns | — | - -**Excluded from ambient loading** (loaded by agents internally): devflow:review-methodology, devflow:complexity, devflow:consistency, devflow:database, devflow:dependencies, devflow:documentation, devflow:regression, devflow:architecture, devflow:accessibility, devflow:performance, devflow:qa. These skills are always installed (universal skill installation) but loaded by Reviewer/Tester agents at runtime, not by the router. - -See `references/skill-catalog.md` for the full skill-to-intent mapping with file pattern triggers. - -## Step 4: Apply - - -When classification is GUIDED or ORCHESTRATED, skill loading is NON-NEGOTIABLE. -Do not rationalize skipping skills. Do not respond without loading them first. -BLOCKING REQUIREMENT: Your FIRST tool calls MUST be Skill tool invocations — before -writing ANY text about the task. Invoke all selected skills, THEN state classification, -THEN proceed with work. Do NOT write implementation text before all Skill tools return. -For IMPLEMENT intent, enforce TDD: write the failing test before ANY production code. -NOTE: Skills loaded in the main session via Devflow mode are reference patterns only — -their allowed-tools metadata does NOT restrict your tool access. You retain full access -to all tools (Edit, Write, Bash, Agent, etc.) for implementation work. - - -- **QUICK:** Respond directly. No preamble, no classification statement. -- **GUIDED:** First, invoke each selected skill using the Skill tool. After all Skill tools return, state classification briefly: `Devflow: IMPLEMENT/GUIDED. Loading: devflow:patterns, devflow:research.` Then work directly in main session. After code changes, spawn Simplifier on changed files. -- **ORCHESTRATED:** First, invoke each selected skill using the Skill tool. After all Skill tools return, state classification briefly: `Devflow: IMPLEMENT/ORCHESTRATED. Loading: devflow:implement:orch, devflow:patterns.` Then orchestrate agents per the loaded orchestration skill's pipeline. - -### GUIDED Behavior by Intent - -| Intent | Main Session Work | Post-Work | -|--------|------------------|-----------| -| **IMPLEMENT** | Implement directly with loaded skills. Follow TDD cycle. | Spawn Simplifier on changed files. | -| **EXPLORE** | Spawn Skimmer for orientation, then trace flow/analyze directly in main session. | No Simplifier (no code changes). | -| **DEBUG** | Investigate directly — reproduce bug, diagnose from stack trace/error, fix. | Spawn Simplifier on changed files. | -| **PLAN** | Spawn Skimmer for orientation, then design directly with loaded pattern/design skills. | No Simplifier (no code changes). | -| **REVIEW** | Review directly with loaded skills (self-review in main session). | No Simplifier. | - -State classification as: `Devflow: INTENT/DEPTH. Loading: [skills].` QUICK is silent. - -## Edge Cases - -| Case | Handling | -|------|----------| -| Mixed intent ("fix this bug and add a test") | Use the higher-overhead intent (IMPLEMENT > DEBUG) | -| Continuation of previous conversation | Inherit previous classification unless prompt clearly shifts | -| User explicitly requests no enforcement | Respect immediately — classify as QUICK | -| Prompt references specific Devflow command | Skip ambient — the command has its own orchestration | -| Scope ambiguous between GUIDED and ORCHESTRATED | Default to GUIDED; escalate if complexity emerges during work | -| REVIEW after IMPLEMENT/GUIDED | GUIDED (continuation — match prior depth) | -| REVIEW after IMPLEMENT/ORCHESTRATED | ORCHESTRATED (continuation — match prior depth) | -| REVIEW standalone, large scope ("full review", "branch", "PR") | ORCHESTRATED | -| REVIEW standalone, small scope ("check this", specific file) | GUIDED | -| REVIEW standalone, ambiguous | GUIDED (conservative) | -| RESOLVE intent | Always ORCHESTRATED | -| PIPELINE intent | Always ORCHESTRATED | -| EXPLORE simple lookup ("where is X?") | QUICK — no skills needed | -| EXPLORE focused subsystem ("explain the auth flow") | GUIDED — Skimmer + main session trace | -| EXPLORE multi-system ("map the full architecture") | ORCHESTRATED — Skimmer + parallel Explore agents + Synthesizer | -| Multiple triggers per session | Each runs independently; context compaction handles accumulation | +State classification: `Devflow: INTENT/DEPTH. Loading: [skills].` +Load all listed skills via Skill tool before writing any text about the task. +GUIDED: work directly in main session. Spawn Simplifier after code changes. +- GUIDED EXPLORE: spawn Skimmer + Explore agents, then analyze directly. +- GUIDED PLAN: spawn Skimmer for orientation, then plan directly. +ORCHESTRATED: follow the loaded orchestration skill's pipeline. + +## GUIDED + +| Intent | Skills | +|--------|--------| +| IMPLEMENT | devflow:test-driven-development, devflow:patterns, devflow:research | +| EXPLORE | — | +| DEBUG | devflow:test-driven-development, devflow:software-design, devflow:testing | +| PLAN | devflow:test-driven-development, devflow:patterns, devflow:software-design, devflow:security | +| REVIEW | devflow:quality-gates, devflow:software-design | + +## ORCHESTRATED + +| Intent | Skills | +|--------|--------| +| IMPLEMENT | devflow:implement:orch, devflow:patterns | +| EXPLORE | devflow:explore:orch | +| DEBUG | devflow:debug:orch | +| PLAN | devflow:plan:orch, devflow:patterns, devflow:software-design, devflow:security | +| REVIEW | devflow:review:orch | +| RESOLVE | devflow:resolve:orch | +| PIPELINE | devflow:pipeline:orch, devflow:patterns | + +## Secondary Skills (GUIDED IMPLEMENT + DEBUG only, load all that match) + +| Pattern | Skill | +|---------|-------| +| .ts, .tsx | devflow:typescript | +| .tsx, .jsx | devflow:react | +| .go | devflow:go | +| .java | devflow:java | +| .py | devflow:python | +| .rs | devflow:rust | +| CSS/UI/styling | devflow:ui-design | +| Forms/API/input | devflow:boundary-validation | +| Auth/crypto/secrets | devflow:security | +| Git operations | devflow:git | diff --git a/shared/skills/router/references/classification-rules.md b/shared/skills/router/references/classification-rules.md new file mode 100644 index 00000000..4e56e57e --- /dev/null +++ b/shared/skills/router/references/classification-rules.md @@ -0,0 +1,31 @@ +# Ambient Classification + +Classify each prompt by **intent** and **depth** before responding. + +## Intent Signals + +- **CHAT**: greetings, confirmations, meta-questions, short responses +- **EXPLORE**: "what is", "where is", "find", "explain", "how does", "analyze", "analysis", "trace", "map" +- **PLAN**: "how should", "plan", "design", "architecture", "approach", "strategy" +- **IMPLEMENT**: "add", "create", "implement", "build", "write", "make" +- **REVIEW**: "check", "look at", "review", "is this ok", "any issues" +- **RESOLVE**: "resolve", "fix review issues", "address feedback", "fix findings" +- **DEBUG**: "fix", "bug", "broken", "failing", "error", "why does" +- **PIPELINE**: "end to end", "implement and review", "build and review", "full pipeline" + +## Depth Criteria + +- **QUICK**: CHAT intent. Simple lookups ("where is X?"). Git/devops ops (commit, push, branch, deploy). Config changes. Rename/comment tweaks. 1-2 line edits. +- **GUIDED**: Quick focused changes without a plan — ≤2 files, clear bugs with known fix, focused exploration, quick review. Orchestration would add no value. +- **ORCHESTRATED**: Substantive code work — multi-file, multi-module, complex or vague bugs, full reviews, system-level design. A detailed plan or specification in the prompt is a strong ORCHESTRATED signal. RESOLVE and PIPELINE always. + +Default to ORCHESTRATED for substantive work — it produces better results. +Reserve GUIDED for small focused changes where orchestration adds no value. +Prefer GUIDED over QUICK for any prompt involving code changes. + +## Action + +Classify every message — including the first message of a session — then: + +- **QUICK**: Respond directly. Do not display classification or load the router. +- **GUIDED/ORCHESTRATED**: Load `devflow:router` via Skill tool. diff --git a/shared/skills/router/references/skill-catalog.md b/shared/skills/router/references/skill-catalog.md index ec3d872d..97274a4c 100644 --- a/shared/skills/router/references/skill-catalog.md +++ b/shared/skills/router/references/skill-catalog.md @@ -29,8 +29,8 @@ These skills may be loaded during GUIDED and ORCHESTRATED-depth ambient routing. | Skill | When to Load | Depth | File Patterns | |-------|-------------|-------|---------------| | devflow:debug:orch | ORCHESTRATED only | ORCHESTRATED | Any — orchestrates investigation pipeline | -| devflow:test-driven-development | Always for DEBUG | GUIDED + ORCHESTRATED | Any code file — bug fix needs regression test first | -| devflow:software-design | Always for DEBUG | GUIDED + ORCHESTRATED | Any code file | +| devflow:test-driven-development | Always for DEBUG | GUIDED | Any code file — bug fix needs regression test first | +| devflow:software-design | Always for DEBUG | GUIDED | Any code file | | devflow:testing | Always for DEBUG (GUIDED) | GUIDED | Any code file | | devflow:git | Git operations involved | GUIDED + ORCHESTRATED | User mentions git, rebase, merge, etc. | @@ -50,8 +50,6 @@ These skills may be loaded during GUIDED and ORCHESTRATED-depth ambient routing. | Skill | When to Load | Depth | File Patterns | |-------|-------------|-------|---------------| | devflow:resolve:orch | Always for RESOLVE | ORCHESTRATED | Any — orchestrates issue resolution pipeline | -| devflow:test-driven-development | Always for RESOLVE | ORCHESTRATED | Any code file — fixes need regression tests | -| devflow:software-design | Always for RESOLVE | ORCHESTRATED | Any code file | RESOLVE is always ORCHESTRATED — it requires multi-agent resolution with Resolver agents and Simplifier. @@ -62,7 +60,7 @@ RESOLVE is always ORCHESTRATED — it requires multi-agent resolution with Resol | devflow:pipeline:orch | Always for PIPELINE | ORCHESTRATED | Any — meta-orchestrator for implement → review → resolve | | devflow:patterns | Always for PIPELINE | ORCHESTRATED | Any code file | -PIPELINE is always ORCHESTRATED — it chains multiple orchestration stages with user gates. +PIPELINE is always ORCHESTRATED — it chains multiple orchestration stages with status reporting between phases. ### EXPLORE Intent @@ -77,7 +75,7 @@ EXPLORE depth: simple lookups ("where is X?") → QUICK. Focused subsystem/flow | Skill | When to Load | Depth | File Patterns | |-------|-------------|-------|---------------| | devflow:plan:orch | ORCHESTRATED only | ORCHESTRATED | Any — orchestrates design pipeline | -| devflow:test-driven-development | Always for PLAN | GUIDED + ORCHESTRATED | Any planning context — plans must account for test-first workflow | +| devflow:test-driven-development | Always for PLAN | GUIDED | Any planning context — plans must account for test-first workflow | | devflow:patterns | Always for PLAN | GUIDED + ORCHESTRATED | Any planning context | | devflow:software-design | Always for PLAN | GUIDED + ORCHESTRATED | System design discussions | diff --git a/src/cli/commands/ambient.ts b/src/cli/commands/ambient.ts index d671fdc0..8550f9b0 100644 --- a/src/cli/commands/ambient.ts +++ b/src/cli/commands/ambient.ts @@ -8,23 +8,25 @@ import type { HookMatcher, Settings } from '../utils/hooks.js'; const PREAMBLE_HOOK_MARKER = 'preamble'; const LEGACY_HOOK_MARKER = 'ambient-prompt'; +const CLASSIFICATION_HOOK_MARKER = 'session-start-classification'; -/** Filter hook entries from a parsed Settings object. Returns true if any were removed. */ +/** Filter hook entries from a parsed Settings object for a given event. Returns true if any were removed. */ function filterHookEntries( settings: Settings, + eventName: string, shouldRemove: (matcher: HookMatcher) => boolean, ): boolean { - if (!settings.hooks?.UserPromptSubmit) return false; + if (!settings.hooks?.[eventName]) return false; - const before = settings.hooks.UserPromptSubmit.length; - settings.hooks.UserPromptSubmit = settings.hooks.UserPromptSubmit.filter( + const before = settings.hooks[eventName].length; + settings.hooks[eventName] = settings.hooks[eventName].filter( (matcher) => !shouldRemove(matcher), ); - if (settings.hooks.UserPromptSubmit.length === before) return false; + if (settings.hooks[eventName].length === before) return false; - if (settings.hooks.UserPromptSubmit.length === 0) { - delete settings.hooks.UserPromptSubmit; + if (settings.hooks[eventName].length === 0) { + delete settings.hooks[eventName]; } if (settings.hooks && Object.keys(settings.hooks).length === 0) { delete settings.hooks; @@ -40,66 +42,93 @@ const isAmbient = (matcher: HookMatcher) => h.command.includes(PREAMBLE_HOOK_MARKER) || h.command.includes(LEGACY_HOOK_MARKER), ); +const isClassification = (matcher: HookMatcher) => + matcher.hooks.some((h) => h.command.includes(CLASSIFICATION_HOOK_MARKER)); + /** * Remove only the legacy `ambient-prompt` hook entries. * Used by `addAmbientHook` to clean before adding the new preamble hook. */ export function removeLegacyAmbientHook(settingsJson: string): string { const settings: Settings = JSON.parse(settingsJson); - if (!filterHookEntries(settings, isLegacy)) return settingsJson; + if (!filterHookEntries(settings, 'UserPromptSubmit', isLegacy)) return settingsJson; return JSON.stringify(settings, null, 2) + '\n'; } /** - * Add the ambient UserPromptSubmit hook to settings JSON. + * Add the ambient UserPromptSubmit hook and SessionStart classification hook to settings JSON. * Removes any legacy `ambient-prompt` hook first, then adds the new `preamble` hook. - * Idempotent — returns unchanged JSON if the new hook already exists. + * Also adds the SessionStart classification hook (reads router SKILL.md). + * Idempotent — each hook checked independently. */ export function addAmbientHook(settingsJson: string, devflowDir: string): string { const settings: Settings = JSON.parse(settingsJson); - const legacyRemoved = filterHookEntries(settings, isLegacy); - - // Check if the NEW preamble hook already exists - if (settings.hooks?.UserPromptSubmit?.some((m) => - m.hooks.some((h) => h.command.includes(PREAMBLE_HOOK_MARKER)), - )) { - return legacyRemoved ? JSON.stringify(settings, null, 2) + '\n' : settingsJson; - } + let changed = filterHookEntries(settings, 'UserPromptSubmit', isLegacy); if (!settings.hooks) { settings.hooks = {}; } - const hookCommand = path.join(devflowDir, 'scripts', 'hooks', 'run-hook') + ' preamble'; + // --- UserPromptSubmit: preamble hook --- + const hasPreamble = settings.hooks.UserPromptSubmit?.some((m) => + m.hooks.some((h) => h.command.includes(PREAMBLE_HOOK_MARKER)), + ); - const newEntry: HookMatcher = { - hooks: [ - { - type: 'command', - command: hookCommand, - timeout: 5, - }, - ], - }; + if (!hasPreamble) { + if (!settings.hooks.UserPromptSubmit) { + settings.hooks.UserPromptSubmit = []; + } - if (!settings.hooks.UserPromptSubmit) { - settings.hooks.UserPromptSubmit = []; + settings.hooks.UserPromptSubmit.push({ + hooks: [ + { + type: 'command', + command: path.join(devflowDir, 'scripts', 'hooks', 'run-hook') + ' preamble', + timeout: 5, + }, + ], + }); + changed = true; } - settings.hooks.UserPromptSubmit.push(newEntry); + // --- SessionStart: classification hook --- + const hasClassificationHook = settings.hooks.SessionStart?.some((m) => + m.hooks.some((h) => h.command.includes(CLASSIFICATION_HOOK_MARKER)), + ); + + if (!hasClassificationHook) { + if (!settings.hooks.SessionStart) { + settings.hooks.SessionStart = []; + } + + settings.hooks.SessionStart.push({ + hooks: [ + { + type: 'command', + command: path.join(devflowDir, 'scripts', 'hooks', 'run-hook') + ' session-start-classification', + timeout: 5, + }, + ], + }); + changed = true; + } + if (!changed) return settingsJson; return JSON.stringify(settings, null, 2) + '\n'; } /** - * Remove the ambient UserPromptSubmit hook from settings JSON. - * Removes BOTH legacy `ambient-prompt` and current `preamble` hooks. - * Idempotent — returns unchanged JSON if hook not present. - * Preserves other UserPromptSubmit hooks. Cleans empty arrays/objects. + * Remove the ambient hooks from settings JSON. + * Removes preamble + legacy from UserPromptSubmit, and classification from SessionStart. + * Idempotent — returns unchanged JSON if hooks not present. + * Preserves other hooks. Cleans empty arrays/objects. */ export function removeAmbientHook(settingsJson: string): string { const settings: Settings = JSON.parse(settingsJson); - if (!filterHookEntries(settings, isAmbient)) return settingsJson; + const removedPrompt = filterHookEntries(settings, 'UserPromptSubmit', isAmbient); + const removedClassification = filterHookEntries(settings, 'SessionStart', isClassification); + + if (!removedPrompt && !removedClassification) return settingsJson; return JSON.stringify(settings, null, 2) + '\n'; } @@ -109,15 +138,17 @@ export function removeAmbientHook(settingsJson: string): string { export function hasAmbientHook(settingsJson: string): boolean { const settings: Settings = JSON.parse(settingsJson); - if (!settings.hooks?.UserPromptSubmit) { - return false; - } - - return settings.hooks.UserPromptSubmit.some((matcher) => + const hasPreamble = settings.hooks?.UserPromptSubmit?.some((matcher) => matcher.hooks.some((h) => h.command.includes(PREAMBLE_HOOK_MARKER) || h.command.includes(LEGACY_HOOK_MARKER), ), - ); + ) ?? false; + + const hasClassificationHook = settings.hooks?.SessionStart?.some((matcher) => + isClassification(matcher), + ) ?? false; + + return hasPreamble || hasClassificationHook; } interface AmbientOptions { @@ -128,8 +159,8 @@ interface AmbientOptions { export const ambientCommand = new Command('ambient') .description('Enable or disable ambient mode (always-on quality enforcement)') - .option('--enable', 'Register UserPromptSubmit hook for ambient mode') - .option('--disable', 'Remove ambient mode hook') + .option('--enable', 'Register ambient mode hooks') + .option('--disable', 'Remove ambient mode hooks') .option('--status', 'Check if ambient mode is enabled') .action(async (options: AmbientOptions) => { const hasFlag = options.enable || options.disable || options.status; @@ -189,7 +220,7 @@ export const ambientCommand = new Command('ambient') return; } await fs.writeFile(settingsPath, updated, 'utf-8'); - p.log.success('Ambient mode enabled — UserPromptSubmit hook registered'); + p.log.success('Ambient mode enabled — hooks registered'); p.log.info(color.dim('Skills auto-load and agents orchestrate based on each prompt')); } diff --git a/tests/ambient.test.ts b/tests/ambient.test.ts index 81bb0eb9..35ecd758 100644 --- a/tests/ambient.test.ts +++ b/tests/ambient.test.ts @@ -26,6 +26,15 @@ describe('addAmbientHook', () => { expect(settings.hooks.UserPromptSubmit[0].hooks[0].timeout).toBe(5); }); + it('adds SessionStart classification hook to empty settings', () => { + const result = addAmbientHook('{}', '/home/user/.devflow'); + const settings = JSON.parse(result); + + expect(settings.hooks.SessionStart).toHaveLength(1); + expect(settings.hooks.SessionStart[0].hooks[0].command).toContain('session-start-classification'); + expect(settings.hooks.SessionStart[0].hooks[0].timeout).toBe(5); + }); + it('adds alongside existing hooks', () => { const input = JSON.stringify({ hooks: { @@ -37,6 +46,7 @@ describe('addAmbientHook', () => { expect(settings.hooks.Stop).toHaveLength(1); expect(settings.hooks.UserPromptSubmit).toHaveLength(1); + expect(settings.hooks.SessionStart).toHaveLength(1); }); it('adds alongside existing UserPromptSubmit hooks', () => { @@ -53,6 +63,20 @@ describe('addAmbientHook', () => { expect(settings.hooks.UserPromptSubmit[1].hooks[0].command).toContain('preamble'); }); + it('preserves existing SessionStart hooks (session-start-memory)', () => { + const input = JSON.stringify({ + hooks: { + SessionStart: [{ hooks: [{ type: 'command', command: '/path/to/run-hook session-start-memory' }] }], + }, + }); + const result = addAmbientHook(input, '/home/user/.devflow'); + const settings = JSON.parse(result); + + expect(settings.hooks.SessionStart).toHaveLength(2); + expect(settings.hooks.SessionStart[0].hooks[0].command).toContain('session-start-memory'); + expect(settings.hooks.SessionStart[1].hooks[0].command).toContain('session-start-classification'); + }); + it('is idempotent — does not add duplicate hooks', () => { const first = addAmbientHook('{}', '/home/user/.devflow'); const second = addAmbientHook(first, '/home/user/.devflow'); @@ -60,6 +84,14 @@ describe('addAmbientHook', () => { expect(second).toBe(first); }); + it('idempotent for SessionStart classification hook', () => { + const first = addAmbientHook('{}', '/home/user/.devflow'); + const second = addAmbientHook(first, '/home/user/.devflow'); + const settings = JSON.parse(second); + + expect(settings.hooks.SessionStart).toHaveLength(1); + }); + it('preserves other settings', () => { const input = JSON.stringify({ statusLine: { type: 'command', command: 'statusline.sh' }, @@ -71,15 +103,19 @@ describe('addAmbientHook', () => { expect(settings.statusLine.command).toBe('statusline.sh'); expect(settings.env.SOME_VAR).toBe('1'); expect(settings.hooks.UserPromptSubmit).toHaveLength(1); + expect(settings.hooks.SessionStart).toHaveLength(1); }); it('uses correct devflowDir path in command via run-hook wrapper', () => { const result = addAmbientHook('{}', '/custom/path/.devflow'); const settings = JSON.parse(result); - const command = settings.hooks.UserPromptSubmit[0].hooks[0].command; + const preambleCmd = settings.hooks.UserPromptSubmit[0].hooks[0].command; + const classificationCmd = settings.hooks.SessionStart[0].hooks[0].command; - expect(command).toContain('/custom/path/.devflow/scripts/hooks/run-hook'); - expect(command).toContain('preamble'); + expect(preambleCmd).toContain('/custom/path/.devflow/scripts/hooks/run-hook'); + expect(preambleCmd).toContain('preamble'); + expect(classificationCmd).toContain('/custom/path/.devflow/scripts/hooks/run-hook'); + expect(classificationCmd).toContain('session-start-classification'); }); it('replaces legacy ambient-prompt hook with new preamble hook', () => { @@ -115,10 +151,29 @@ describe('addAmbientHook', () => { expect(settings.hooks.UserPromptSubmit[0].hooks[0].command).toBe('other-hook.sh'); expect(settings.hooks.UserPromptSubmit[1].hooks[0].command).toContain('preamble'); }); + + it('adds SessionStart hook even when preamble already exists (upgrade path)', () => { + // Simulates existing user who has preamble but not classification hook + const input = JSON.stringify({ + hooks: { + UserPromptSubmit: [ + { hooks: [{ type: 'command', command: '/home/user/.devflow/scripts/hooks/run-hook preamble', timeout: 5 }] }, + ], + }, + }); + const result = addAmbientHook(input, '/home/user/.devflow'); + const settings = JSON.parse(result); + + // Preamble preserved (not duplicated) + expect(settings.hooks.UserPromptSubmit).toHaveLength(1); + // SessionStart classification hook added + expect(settings.hooks.SessionStart).toHaveLength(1); + expect(settings.hooks.SessionStart[0].hooks[0].command).toContain('session-start-classification'); + }); }); describe('removeAmbientHook', () => { - it('removes ambient hook', () => { + it('removes ambient hook — clears both UserPromptSubmit and SessionStart', () => { const withHook = addAmbientHook('{}', '/home/user/.devflow'); const result = removeAmbientHook(withHook); const settings = JSON.parse(result); @@ -142,6 +197,26 @@ describe('removeAmbientHook', () => { expect(settings.hooks.UserPromptSubmit[0].hooks[0].command).toBe('other-hook.sh'); }); + it('preserves other SessionStart hooks when removing classification', () => { + const input = JSON.stringify({ + hooks: { + SessionStart: [ + { hooks: [{ type: 'command', command: '/path/to/run-hook session-start-memory' }] }, + { hooks: [{ type: 'command', command: '/path/to/run-hook session-start-classification' }] }, + ], + UserPromptSubmit: [ + { hooks: [{ type: 'command', command: '/path/to/preamble' }] }, + ], + }, + }); + const result = removeAmbientHook(input); + const settings = JSON.parse(result); + + expect(settings.hooks.SessionStart).toHaveLength(1); + expect(settings.hooks.SessionStart[0].hooks[0].command).toContain('session-start-memory'); + expect(settings.hooks.UserPromptSubmit).toBeUndefined(); + }); + it('cleans empty hooks object when last hook removed', () => { const input = JSON.stringify({ hooks: { @@ -365,43 +440,209 @@ describe('skill invocation helpers', () => { }); }); +/** Parse router SKILL.md markdown tables into intent→skills maps */ +function parseRouterTables(content: string): { guided: Map; orchestrated: Map } { + const guided = new Map(); + const orchestrated = new Map(); + + let currentSection: 'guided' | 'orchestrated' | null = null; + + for (const line of content.split('\n')) { + if (line.startsWith('## GUIDED')) { currentSection = 'guided'; continue; } + if (line.startsWith('## ORCHESTRATED')) { currentSection = 'orchestrated'; continue; } + if (line.startsWith('## ') && currentSection) { currentSection = null; continue; } + + if (!currentSection) continue; + + const match = line.match(/^\|\s*(\w+)\s*\|\s*(.+?)\s*\|$/); + if (!match || match[1] === 'Intent') continue; + + const intent = match[1]; + const skillsStr = match[2].trim(); + const skills = skillsStr === '—' || skillsStr === '-' + ? [] + : skillsStr.split(',').map(s => s.trim()); + + const table = currentSection === 'guided' ? guided : orchestrated; + table.set(intent, skills); + } + + return { guided, orchestrated }; +} + +/** Extract intent names from classification-rules.md Intent Signals section only */ +function parseClassificationIntents(content: string): string[] { + const intents: string[] = []; + let inIntentSection = false; + + for (const line of content.split('\n')) { + if (line.includes('Intent Signals')) { inIntentSection = true; continue; } + if (line.startsWith('## ') && inIntentSection) break; // left the section + + if (!inIntentSection) continue; + + const match = line.match(/^\-\s*\*\*(\w+)\*\*/); + if (match) intents.push(match[1]); + } + return intents; +} + +describe('router structural validation', () => { + const routerPath = path.resolve(__dirname, '../shared/skills/router/SKILL.md'); + const rulesPath = path.resolve(__dirname, '../shared/skills/router/references/classification-rules.md'); + const sharedSkillsDir = path.resolve(__dirname, '../shared/skills'); + + it('router covers all ORCHESTRATED intents (every non-CHAT intent has a row)', async () => { + const rulesContent = await fs.readFile(rulesPath, 'utf-8'); + const routerContent = await fs.readFile(routerPath, 'utf-8'); + + const nonChatIntents = parseClassificationIntents(rulesContent).filter(i => i !== 'CHAT'); + const { orchestrated } = parseRouterTables(routerContent); + + for (const intent of nonChatIntents) { + expect(orchestrated.has(intent), `ORCHESTRATED table missing intent: ${intent}`).toBe(true); + } + }); + + it('RESOLVE and PIPELINE have no GUIDED rows (always ORCHESTRATED)', async () => { + const routerContent = await fs.readFile(routerPath, 'utf-8'); + const { guided } = parseRouterTables(routerContent); + + expect(guided.has('RESOLVE'), 'RESOLVE must not have a GUIDED row — classification says always ORCHESTRATED').toBe(false); + expect(guided.has('PIPELINE'), 'PIPELINE must not have a GUIDED row — classification says always ORCHESTRATED').toBe(false); + }); + + it('router table skills are canonical — every prefixed ref exists in shared/skills/', async () => { + const routerContent = await fs.readFile(routerPath, 'utf-8'); + const { guided, orchestrated } = parseRouterTables(routerContent); + + const allSkills = new Set(); + for (const skills of [...guided.values(), ...orchestrated.values()]) { + for (const skill of skills) { + if (skill.startsWith('devflow:')) { + allSkills.add(skill.replace('devflow:', '')); + } + } + } + + const entries = await fs.readdir(sharedSkillsDir); + + for (const skill of allSkills) { + expect(entries, `shared/skills/${skill}/ not found — router references nonexistent skill`).toContain(skill); + } + }); + + it('integration test expectations align with router skill tables', async () => { + const integrationPath = path.resolve(__dirname, './integration/ambient-activation.test.ts'); + const routerContent = await fs.readFile(routerPath, 'utf-8'); + const testContent = await fs.readFile(integrationPath, 'utf-8'); + const { guided, orchestrated } = parseRouterTables(routerContent); + + // Split integration tests into blocks and extract intent/depth + expected/required arrays + const blocks = testContent.split(/\bit\(/); + + for (const block of blocks) { + const nameMatch = block.match(/^'([^']+)'/); + if (!nameMatch) continue; + const name = nameMatch[1]; + + const classMatch = name.match(/(IMPLEMENT|EXPLORE|DEBUG|PLAN|REVIEW|RESOLVE|PIPELINE)\/(GUIDED|ORCHESTRATED)/); + if (!classMatch) continue; + + const [, intent, depth] = classMatch; + const table = depth === 'GUIDED' ? guided : orchestrated; + const routerSkills = table.get(intent); + + // Extract expected or required array from block + const arrayMatch = block.match(/const (?:expected|required) = \[([^\]]*)\]/); + if (!arrayMatch) continue; // Some tests (like EXPLORE/GUIDED) have no expected array — skip + + const testSkills = arrayMatch[1] + .split(',') + .map(s => s.trim().replace(/['"]/g, '')) + .filter(Boolean) + .map(s => `devflow:${s}`); + + expect(routerSkills, `${name}: router has no ${depth} row for ${intent}`).toBeDefined(); + if (!routerSkills) return; + + // Every skill the test asserts must appear in the router table row + for (const skill of testSkills) { + expect( + routerSkills.includes(skill), + `${name}: test asserts '${skill}' but router ${depth} ${intent} row is [${routerSkills.join(', ')}]`, + ).toBe(true); + } + } + }); +}); + describe('preamble drift detection', () => { - it('preamble PREAMBLE contains required classification elements', async () => { + it('preamble contains classify and devflow:router instructions', async () => { const hookPath = path.resolve(__dirname, '../scripts/hooks/preamble'); const hookContent = await fs.readFile(hookPath, 'utf-8'); - // Extract the PREAMBLE string from the shell script (may be multiline) + // Extract the PREAMBLE string from the shell script const match = hookContent.match(/PREAMBLE="([^"]+)"/); expect(match).not.toBeNull(); - const shellPreamble = match![1]; - - // The preamble is detection-only: classification rules + router skill reference. - // Verify structural elements rather than exact string match to allow wording refinement. - expect(shellPreamble).toContain('AMBIENT MODE'); - - // Must contain depth definitions - expect(shellPreamble).toContain('QUICK'); - expect(shellPreamble).toContain('GUIDED'); - expect(shellPreamble).toContain('ORCHESTRATED'); - - // Must contain intent names for each category - expect(shellPreamble).toContain('CHAT'); - expect(shellPreamble).toContain('EXPLORE'); - expect(shellPreamble).toContain('PLAN'); - expect(shellPreamble).toContain('IMPLEMENT'); - expect(shellPreamble).toContain('REVIEW'); - expect(shellPreamble).toContain('RESOLVE'); - expect(shellPreamble).toContain('DEBUG'); - expect(shellPreamble).toContain('PIPELINE'); - - // Must reference the router skill (detection-only: no direct skill mappings) + if (!match) return; + const shellPreamble = match[1]; + + // SYNC: preamble must instruct classification + router loading + expect(shellPreamble.toLowerCase()).toContain('classify'); expect(shellPreamble).toContain('devflow:router'); + }); - // Must instruct Skill tool invocation - expect(shellPreamble).toContain('Skill tool'); + it('classification-rules.md contains required classification elements', async () => { + const rulesPath = path.resolve(__dirname, '../shared/skills/router/references/classification-rules.md'); + const rulesContent = await fs.readFile(rulesPath, 'utf-8'); + + // Must contain Intent Signals heading + expect(rulesContent).toContain('Intent Signals'); + + // Must contain all 8 intents + expect(rulesContent).toContain('CHAT'); + expect(rulesContent).toContain('EXPLORE'); + expect(rulesContent).toContain('PLAN'); + expect(rulesContent).toContain('IMPLEMENT'); + expect(rulesContent).toContain('REVIEW'); + expect(rulesContent).toContain('RESOLVE'); + expect(rulesContent).toContain('DEBUG'); + expect(rulesContent).toContain('PIPELINE'); + + // Must contain all 3 depths + expect(rulesContent).toContain('QUICK'); + expect(rulesContent).toContain('GUIDED'); + expect(rulesContent).toContain('ORCHESTRATED'); + + // Must reference devflow:router for GUIDED/ORCHESTRATED + expect(rulesContent).toContain('devflow:router'); + }); + + it('router SKILL.md contains skill lookup tables', async () => { + const routerPath = path.resolve(__dirname, '../shared/skills/router/SKILL.md'); + const routerContent = await fs.readFile(routerPath, 'utf-8'); + + // Must contain GUIDED/ORCHESTRATED headings + expect(routerContent).toContain('## GUIDED'); + expect(routerContent).toContain('## ORCHESTRATED'); + + // Must contain classification output format + expect(routerContent).toContain('Devflow:'); + expect(routerContent).toContain('Loading:'); + + // Must contain intent names in tables + expect(routerContent).toContain('IMPLEMENT'); + expect(routerContent).toContain('EXPLORE'); + expect(routerContent).toContain('DEBUG'); + expect(routerContent).toContain('PLAN'); + expect(routerContent).toContain('REVIEW'); + }); + + it('session-start-classification hook reads classification-rules.md', async () => { + const hookPath = path.resolve(__dirname, '../scripts/hooks/session-start-classification'); + const hookContent = await fs.readFile(hookPath, 'utf-8'); - // Must include classification output format - expect(shellPreamble).toContain('Devflow:'); - expect(shellPreamble).toContain('Loading:'); + expect(hookContent).toContain('classification-rules.md'); }); }); diff --git a/tests/integration/ambient-activation.test.ts b/tests/integration/ambient-activation.test.ts index 6b6c06dd..a14e6d98 100644 --- a/tests/integration/ambient-activation.test.ts +++ b/tests/integration/ambient-activation.test.ts @@ -12,7 +12,7 @@ import { * Integration tests for Devflow ambient mode classification and skill loading. * * GUIDED tests use two-tier assertions: - * Hard: router skill loaded (proves non-QUICK classification — system works) + * Hard: router skill loaded via Skill tool (proves non-QUICK classification — system works) * Soft: specific skills match expectations (quality metric, logged but not gating) * * ORCHESTRATED tests use strict assertions (deterministic at that scope). @@ -30,10 +30,11 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => { // --- QUICK tier: no skills loaded --- - it('QUICK — chat: "thanks" loads no skills', async () => { + it('preamble filter — single-word prompt skipped before classification', async () => { + // "thanks" is ≤2 words — preamble's word-count filter skips it before classification runs const result = await runClaudeStreaming('thanks', { timeout: 20000 }); expect(hasSkillInvocations(result)).toBe(false); - console.log(`QUICK chat: no skills (${result.durationMs}ms)`); + console.log(`preamble filter (single-word): no skills (${result.durationMs}ms)`); }); it('QUICK — explore: "where is the config?" loads no skills', async () => { @@ -42,18 +43,32 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => { console.log(`QUICK explore: no skills (${result.durationMs}ms)`); }); + it('CHAT/QUICK — multi-word chat passes preamble but classified QUICK', async () => { + // Passes preamble's word-count filter (>2 words) but classified CHAT/QUICK — no skills loaded + const result = await runClaudeStreaming('sounds good, thanks for explaining that', { timeout: 20000 }); + expect(hasSkillInvocations(result)).toBe(false); + console.log(`CHAT/QUICK (multi-word): no skills (${result.durationMs}ms)`); + }); + + it('preamble filter — slash command prefix skipped before classification', async () => { + // Preamble filters prompts starting with "/" — no classification or skill loading + const result = await runClaudeStreaming('/help with something', { timeout: 20000 }); + expect(hasSkillInvocations(result)).toBe(false); + console.log(`preamble filter (slash command): no skills (${result.durationMs}ms)`); + }); + // --- GUIDED tier: router must load (hard), specific skills logged (soft) --- - it('EXPLORE/GUIDED — loads router and explore skills', async () => { - const expected = ['explore:orch']; + it('EXPLORE/GUIDED — loads router only (no additional skills)', async () => { + // GUIDED EXPLORE dispatches no additional skills — router instructs to spawn Skimmer + Explore agents directly const { result, passed, attempts, model } = await runClaudeStreamingWithRetry( 'explain how the plugin loading system works from registration through initialization', - (r) => hasSkillInvocations(r) && hasRequiredSkills(r, ['router']), + (r) => hasRequiredSkills(r, ['router']), ); const skills = getSkillInvocations(result); - const hasExpected = hasRequiredSkills(result, expected); - console.log(`EXPLORE/GUIDED: ${passed ? 'PASS' : 'FAIL'} (${model}, ${attempts} attempts, ${result.durationMs}ms). Skills: [${skills.join(', ')}]${passed && !hasExpected ? ` ⚠ expected: ${expected.join(', ')}` : ''}`); + const nonRouter = skills.filter((s) => s !== 'router' && s !== 'devflow:router'); + console.log(`EXPLORE/GUIDED: ${passed ? 'PASS' : 'FAIL'} (${model}, ${attempts} attempts, ${result.durationMs}ms). Skills: [${skills.join(', ')}]${nonRouter.length > 0 ? ` ⚠ unexpected non-router: ${nonRouter.join(', ')}` : ''}`); expect(passed).toBe(true); }); @@ -61,7 +76,7 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => { const expected = ['patterns', 'test-driven-development', 'research']; const { result, passed, attempts, model } = await runClaudeStreamingWithRetry( 'add a retry mechanism with exponential backoff to the HTTP client module', - (r) => hasSkillInvocations(r) && hasRequiredSkills(r, ['router']), + (r) => hasRequiredSkills(r, ['router']), ); const skills = getSkillInvocations(result); @@ -71,10 +86,10 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => { }); it('DEBUG/GUIDED — loads router and debug skills', async () => { - const expected = ['software-design', 'testing']; + const expected = ['test-driven-development', 'software-design', 'testing']; const { result, passed, attempts, model } = await runClaudeStreamingWithRetry( 'fix the bug where the date formatter returns wrong timezone offset for DST transitions', - (r) => hasSkillInvocations(r) && hasRequiredSkills(r, ['router']), + (r) => hasRequiredSkills(r, ['router']), ); const skills = getSkillInvocations(result); @@ -84,10 +99,10 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => { }); it('PLAN/GUIDED — loads router and planning skills', async () => { - const expected = ['patterns', 'software-design']; + const expected = ['test-driven-development', 'patterns', 'software-design', 'security']; const { result, passed, attempts, model } = await runClaudeStreamingWithRetry( 'how should we design a caching layer for API responses?', - (r) => hasSkillInvocations(r) && hasRequiredSkills(r, ['router']), + (r) => hasRequiredSkills(r, ['router']), ); const skills = getSkillInvocations(result); @@ -100,7 +115,7 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => { const expected = ['quality-gates', 'software-design']; const { result, passed, attempts, model } = await runClaudeStreamingWithRetry( 'check this error handling in the authentication module', - (r) => hasSkillInvocations(r) && hasRequiredSkills(r, ['router']), + (r) => hasRequiredSkills(r, ['router']), ); const skills = getSkillInvocations(result); @@ -137,8 +152,8 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => { expect(passed).toBe(true); }); - it('RESOLVE/ORCHESTRATED — loads resolve, software-design', async () => { - const required = ['resolve:orch', 'software-design']; + it('RESOLVE/ORCHESTRATED — loads resolve:orch', async () => { + const required = ['resolve:orch']; const { result, passed, attempts, model } = await runClaudeStreamingWithRetry( 'resolve the review findings from the last code review', (r) => hasSkillInvocations(r) && hasRequiredSkills(r, required), @@ -163,6 +178,32 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => { expect(passed).toBe(true); }); + it('DEBUG/ORCHESTRATED — loads debug:orch', async () => { + const required = ['debug:orch']; + const { result, passed, attempts, model } = await runClaudeStreamingWithRetry( + 'the webhook processor silently drops events across three modules when the payload exceeds 1MB — debug why the size check, queue handler, and retry logic all fail to surface the error', + (r) => hasSkillInvocations(r) && hasRequiredSkills(r, required), + ); + + const skills = getSkillInvocations(result); + console.log(`DEBUG/ORCHESTRATED: ${passed ? 'PASS' : 'FAIL'} (${model}, ${attempts} attempts, ${result.durationMs}ms). Skills: [${skills.join(', ')}]`); + if (!passed) console.warn(`Expected: ${required.join(', ')}. Got: [${skills.join(', ')}]`); + expect(passed).toBe(true); + }); + + it('PLAN/ORCHESTRATED — loads plan:orch, patterns', async () => { + const required = ['plan:orch', 'patterns']; + const { result, passed, attempts, model } = await runClaudeStreamingWithRetry( + 'design the architecture for a multi-service notification system with email, SMS, and push channels that supports user preferences and delivery guarantees', + (r) => hasSkillInvocations(r) && hasRequiredSkills(r, required), + ); + + const skills = getSkillInvocations(result); + console.log(`PLAN/ORCHESTRATED: ${passed ? 'PASS' : 'FAIL'} (${model}, ${attempts} attempts, ${result.durationMs}ms). Skills: [${skills.join(', ')}]`); + if (!passed) console.warn(`Expected: ${required.join(', ')}. Got: [${skills.join(', ')}]`); + expect(passed).toBe(true); + }); + it('PIPELINE/ORCHESTRATED — loads pipeline, patterns', async () => { const required = ['pipeline:orch', 'patterns']; const { result, passed, attempts, model } = await runClaudeStreamingWithRetry( diff --git a/tests/integration/helpers.ts b/tests/integration/helpers.ts index 84eaafe1..87b844ff 100644 --- a/tests/integration/helpers.ts +++ b/tests/integration/helpers.ts @@ -1,4 +1,6 @@ import { execSync, spawn, ChildProcess } from 'child_process'; +import { readFileSync } from 'fs'; +import { resolve } from 'path'; const CLASSIFICATION_PATTERN = /devflow:\s*(CHAT|EXPLORE|PLAN|IMPLEMENT|DEBUG|REVIEW|RESOLVE|PIPELINE)\s*\/\s*(QUICK|GUIDED|ORCHESTRATED)/i; @@ -14,13 +16,18 @@ export function isClaudeAvailable(): boolean { } } -// SYNC: must match scripts/hooks/preamble PREAMBLE structure -const DEVFLOW_PREAMBLE = - `AMBIENT MODE ENABLED: Classify user intent and depth. -Intents: CHAT (greetings/confirmations), EXPLORE (find/explain/analyze/trace/map), PLAN (plan/design/architecture), IMPLEMENT (add/create/build/implement), REVIEW (check/review), RESOLVE (resolve review issues), DEBUG (fix/bug/error), PIPELINE (end-to-end). -Depth: QUICK (chat, simple lookups, git ops, config, rename/comment tweaks, 1-2 line edits) | GUIDED (code changes ≤2 files, clear bugs, focused reviews, focused exploration, focused design/plan) | ORCHESTRATED (>2 files, multi-module, vague bugs, full/branch/PR reviews, deep exploration, system-level design, RESOLVE and PIPELINE always). -QUICK: respond normally. No classification, no skills. -GUIDED/ORCHESTRATED: Load devflow:router skill FIRST via Skill tool for skill mappings. Then load all skills it specifies. State: Devflow: INTENT/DEPTH. Loading: [skills].`; +/** + * Read classification-rules.md from disk. + * Simulates SessionStart injection for integration tests. + */ +function loadRouterContext(): string { + const rulesPath = resolve(__dirname, '../../shared/skills/router/references/classification-rules.md'); + return readFileSync(rulesPath, 'utf-8').trim(); +} + +// Simulates SessionStart injection (classification rules) + per-message preamble +const DEVFLOW_PREAMBLE = loadRouterContext() + + '\nClassify this request\'s intent and depth, then load devflow:router via Skill tool.'; /** Result from a streaming claude invocation */ export interface StreamResult { diff --git a/tests/skill-references.test.ts b/tests/skill-references.test.ts index 952ec3b8..47537ec1 100644 --- a/tests/skill-references.test.ts +++ b/tests/skill-references.test.ts @@ -687,25 +687,38 @@ describe('Test infrastructure skill references', () => { } }); - it('DEVFLOW_PREAMBLE skill refs in tests/integration/helpers.ts exist in actual hook preamble', () => { - const helpersPath = path.join(ROOT, 'tests', 'integration', 'helpers.ts'); - const helpersContent = readFileSync(helpersPath, 'utf-8'); - const hookPath = path.join(ROOT, 'scripts', 'hooks', 'preamble'); - const hookContent = readFileSync(hookPath, 'utf-8'); + it('DEVFLOW_PREAMBLE reads classification-rules.md which has valid refs', () => { + // helpers.ts loads DEVFLOW_PREAMBLE from classification-rules.md at runtime. + // Verify the classification rules reference devflow:router (loaded via Skill tool). + const rulesPath = path.join(ROOT, 'shared', 'skills', 'router', 'references', 'classification-rules.md'); + const rulesContent = readFileSync(rulesPath, 'utf-8'); + + const rulesRefs = extractPrefixedRefs(rulesContent); + const skillRefs = filterNonSkillRefs(rulesRefs); + const canonicalSkills = new Set(getAllSkillNames()); + + for (const ref of skillRefs) { + expect( + canonicalSkills.has(ref), + `classification-rules.md has 'devflow:${ref}' but it is not in canonical skill set`, + ).toBe(true); + } + }); - const helpersRefs = extractPrefixedRefs(helpersContent); - const hookRefs = extractPrefixedRefs(hookContent); - const hookSkillSet = new Set(hookRefs); + it('router SKILL.md skill refs match canonical set', () => { + // The lean router SKILL.md contains skill lookup tables. + const canonicalSkills = new Set(getAllSkillNames()); + const routerPath = path.join(ROOT, 'shared', 'skills', 'router', 'SKILL.md'); + const routerContent = readFileSync(routerPath, 'utf-8'); - // The new preamble is detection-only — helpers.ts DEVFLOW_PREAMBLE also has only router ref. - // Just verify helpers.ts has at least one skill ref (devflow:router). - expect(helpersRefs.length, 'helpers.ts DEVFLOW_PREAMBLE should have skill refs').toBeGreaterThan(0); + const routerRefs = extractPrefixedRefs(routerContent); + expect(routerRefs.length, 'router SKILL.md should have devflow: skill refs').toBeGreaterThan(0); - const skillRefs = filterNonSkillRefs(helpersRefs); + const skillRefs = filterNonSkillRefs(routerRefs); for (const ref of skillRefs) { expect( - hookSkillSet.has(ref), - `tests/integration/helpers.ts DEVFLOW_PREAMBLE has 'devflow:${ref}' but scripts/hooks/preamble does not — preamble drift`, + canonicalSkills.has(ref), + `router SKILL.md has 'devflow:${ref}' but it is not in canonical skill set`, ).toBe(true); } });