diff --git a/CLAUDE.md b/CLAUDE.md
index e903fac5..f448a52f 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -40,6 +40,8 @@ Commands with Teams Variant ship as `{name}.md` (parallel subagents) and `{name}
**Working Memory**: Three shell-script hooks (`scripts/hooks/`) provide automatic session continuity. Toggleable via `devflow memory --enable/--disable/--status` or `devflow init --memory/--no-memory`. Stop hook → reads last turn from session transcript (`~/.claude/projects/{encoded-cwd}/{session_id}.jsonl`), spawns background `claude -p --model haiku` to update `.memory/WORKING-MEMORY.md` with structured sections (`## Now`, `## Progress`, `## Decisions`, `## Modified Files`, `## Context`, `## Session Log`; throttled: skips if triggered <2min ago; concurrent sessions serialize via mkdir-based lock). SessionStart hook → injects previous memory + git state as `additionalContext` on `/clear`, startup, or compact (warns if >1h stale; injects pre-compact memory snapshot when compaction happened mid-session). PreCompact hook → saves git state + WORKING-MEMORY.md snapshot + bootstraps minimal WORKING-MEMORY.md if none exists. Zero-ceremony context preservation.
+**Ambient Mode**: Three-layer architecture for always-on intent classification. SessionStart hook (`session-start-classification`) reads lean classification rules (`~/.claude/skills/devflow:router/references/classification-rules.md`, ~30 lines) and injects as `additionalContext` — once per session, deterministic, zero model overhead. UserPromptSubmit hook (`preamble`) injects a one-sentence prompt per message triggering classification + router loading via Skill tool. Router SKILL.md is a pure skill lookup table (~50 lines) loaded on-demand only for GUIDED/ORCHESTRATED depth — maps intent×depth to domain and orchestration skills. Toggleable via `devflow ambient --enable/--disable/--status` or `devflow init`.
+
**Self-Learning**: A SessionEnd hook (`session-end-learning`) accumulates session IDs and triggers a background `claude -p --model sonnet` every 3 sessions (5 at 15+ observations) to detect repeated workflows and procedural knowledge from batch transcripts. Observations accumulate in `.memory/learning-log.jsonl` with confidence scores, temporal decay, and daily run caps. When confidence thresholds are met (5 observations with 7-day temporal spread for both workflow and procedural types), artifacts are auto-created as slash commands (`.claude/commands/self-learning/`) or skills (`.claude/skills/{slug}/`). Loaded artifacts are reinforced locally (no LLM) on each session end. Single toggle mechanism: hook presence in `settings.json` IS the enabled state — no `enabled` field in `learning.json`. Toggleable via `devflow learn --enable/--disable/--status` or `devflow init --learn/--no-learn`. Configurable model/throttle/caps/debug via `devflow learn --configure`. Use `devflow learn --reset` to remove all artifacts + log + transient state. Use `devflow learn --purge` to remove invalid observations. Debug logs stored at `~/.devflow/logs/{project-slug}/`.
**Claude Code Flags**: Typed registry (`src/cli/utils/flags.ts`) for managing Claude Code feature flags (env vars and top-level settings). Pure functions `applyFlags`/`stripFlags`/`getDefaultFlags` follow the `applyTeamsConfig`/`stripTeamsConfig` pattern. Initial flags: `tool-search`, `lsp`, `clear-context-on-plan` (default ON), `brief`, `disable-1m-context` (default OFF). Manageable via `devflow flags --enable/--disable/--status/--list`. Stored in manifest `features.flags: string[]`.
@@ -55,7 +57,7 @@ devflow/
├── plugins/devflow-*/ # 17 plugins (8 core + 9 optional language/ecosystem)
├── docs/reference/ # Detailed reference documentation
├── scripts/ # Helper scripts (statusline, docs-helpers)
-│ └── hooks/ # Working Memory + ambient + learning hooks (stop, session-start, pre-compact, preamble, session-end-learning, stop-update-learning [deprecated], background-learning)
+│ └── hooks/ # Working Memory + ambient + learning hooks (stop, session-start-memory, session-start-classification, pre-compact, preamble, session-end-learning, stop-update-learning [deprecated], background-learning)
├── src/cli/ # TypeScript CLI (init, list, uninstall, ambient, learn, flags)
├── .claude-plugin/ # Marketplace registry
├── .docs/ # Project docs (reviews, design) — per-project
diff --git a/package-lock.json b/package-lock.json
index 5fbf032d..c8bb7511 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "devflow-kit",
- "version": "1.8.3",
+ "version": "2.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "devflow-kit",
- "version": "1.8.3",
+ "version": "2.0.0",
"license": "MIT",
"dependencies": {
"@clack/prompts": "^0.9.1",
diff --git a/plugins/devflow-ambient/README.md b/plugins/devflow-ambient/README.md
index 54acb282..5b3f3629 100644
--- a/plugins/devflow-ambient/README.md
+++ b/plugins/devflow-ambient/README.md
@@ -1,11 +1,11 @@
# devflow-ambient
-Ambient mode — classifies intent and applies proportional effort via a `UserPromptSubmit` hook. No slash command — ambient mode activates automatically on every prompt when enabled.
+Ambient mode — classifies intent and applies proportional effort via a `SessionStart` hook and a `UserPromptSubmit` hook. No slash command — ambient mode activates automatically on every prompt when enabled.
## Activation
```bash
-devflow ambient --enable # Register UserPromptSubmit hook
+devflow ambient --enable # Register ambient mode hooks
devflow ambient --disable # Remove hook
devflow ambient --status # Check if enabled
```
@@ -44,9 +44,9 @@ Skills are loaded via the Skill tool and work happens in the main session:
| Intent | Skills | Main Session Work | Post-Work |
|--------|--------|-------------------|-----------|
-| IMPLEMENT | test-driven-development, patterns, research | Implement with TDD | `Task(subagent_type="Simplifier")` |
-| DEBUG | software-design, testing | Investigate, diagnose, fix | `Task(subagent_type="Simplifier")` |
-| PLAN | patterns, software-design | Explore and design | — |
+| IMPLEMENT | test-driven-development, patterns, research | Implement with TDD | `Agent(subagent_type="Simplifier")` |
+| DEBUG | test-driven-development, software-design, testing | Investigate, diagnose, fix | `Agent(subagent_type="Simplifier")` |
+| PLAN | test-driven-development, patterns, software-design, security | Explore and design | — |
| REVIEW | quality-gates, software-design | Review directly | — |
## ORCHESTRATED Pipelines
diff --git a/plugins/devflow-code-review/commands/code-review-teams.md b/plugins/devflow-code-review/commands/code-review-teams.md
index 44d78045..ecffc5fc 100644
--- a/plugins/devflow-code-review/commands/code-review-teams.md
+++ b/plugins/devflow-code-review/commands/code-review-teams.md
@@ -34,7 +34,7 @@ Run a comprehensive code review of the current branch by spawning a review team
For each reviewable worktree, spawn Git agent:
```
-Task(subagent_type="Git", run_in_background=false):
+Agent(subagent_type="Git", run_in_background=false):
"OPERATION: ensure-pr-ready
WORKTREE_PATH: {worktree_path} (omit if cwd)
Validate branch, commit if needed, push, create PR if needed.
@@ -218,7 +218,7 @@ Spawn 2 agents **in a single message**:
**Git Agent (PR Comments)**:
```
-Task(subagent_type="Git", run_in_background=false):
+Agent(subagent_type="Git", run_in_background=false):
"OPERATION: comment-pr
WORKTREE_PATH: {worktree_path} (omit if cwd)
Read reviews from {worktree_path}/.docs/reviews/{branch_slug}/{timestamp}/
diff --git a/plugins/devflow-code-review/commands/code-review.md b/plugins/devflow-code-review/commands/code-review.md
index 990c8986..96a14167 100644
--- a/plugins/devflow-code-review/commands/code-review.md
+++ b/plugins/devflow-code-review/commands/code-review.md
@@ -34,7 +34,7 @@ Run a comprehensive code review of the current branch by spawning parallel revie
For each reviewable worktree, spawn Git agent:
```
-Task(subagent_type="Git", run_in_background=false):
+Agent(subagent_type="Git", run_in_background=false):
"OPERATION: ensure-pr-ready
WORKTREE_PATH: {worktree_path} (omit if cwd)
Validate branch, commit if needed, push, create PR if needed.
@@ -109,7 +109,7 @@ Spawn Reviewer agents **in a single message**. Always run 7 core reviews; condit
Each Reviewer invocation (all in one message, **NOT background**):
```
-Task(subagent_type="Reviewer", run_in_background=false):
+Agent(subagent_type="Reviewer", run_in_background=false):
"Review focusing on {focus}. Load the pattern skill for your focus from the Focus Areas table.
Follow 6-step process from devflow:review-methodology.
PR: #{pr_number}, Base: {base_branch}
@@ -126,7 +126,7 @@ In multi-worktree mode, process worktrees **sequentially** (one worktree at a ti
**Git Agent (PR Comments)** per worktree:
```
-Task(subagent_type="Git", run_in_background=false):
+Agent(subagent_type="Git", run_in_background=false):
"OPERATION: comment-pr
WORKTREE_PATH: {worktree_path} (omit if cwd)
Read reviews from {worktree_path}/.docs/reviews/{branch-slug}/{timestamp}/
@@ -139,7 +139,7 @@ Check for existing inline comments at same file:line before creating new ones to
**Synthesizer Agent** per worktree:
```
-Task(subagent_type="Synthesizer", run_in_background=false):
+Agent(subagent_type="Synthesizer", run_in_background=false):
"Mode: review
WORKTREE_PATH: {worktree_path} (omit if cwd)
REVIEW_BASE_DIR: {worktree_path}/.docs/reviews/{branch-slug}/{timestamp}
diff --git a/plugins/devflow-debug/commands/debug-teams.md b/plugins/devflow-debug/commands/debug-teams.md
index efd67411..fe1ee166 100644
--- a/plugins/devflow-debug/commands/debug-teams.md
+++ b/plugins/devflow-debug/commands/debug-teams.md
@@ -32,7 +32,7 @@ Read `.memory/knowledge/decisions.md` and `.memory/knowledge/pitfalls.md`. Known
If `$ARGUMENTS` starts with `#`, fetch the GitHub issue:
```
-Task(subagent_type="Git"):
+Agent(subagent_type="Git"):
"OPERATION: fetch-issue
ISSUE: {issue number}
Return issue title, body, labels, and any linked error logs."
diff --git a/plugins/devflow-debug/commands/debug.md b/plugins/devflow-debug/commands/debug.md
index ac3b65c5..f8890d3d 100644
--- a/plugins/devflow-debug/commands/debug.md
+++ b/plugins/devflow-debug/commands/debug.md
@@ -32,7 +32,7 @@ Read `.memory/knowledge/decisions.md` and `.memory/knowledge/pitfalls.md`. Known
If `$ARGUMENTS` starts with `#`, fetch the GitHub issue:
```
-Task(subagent_type="Git"):
+Agent(subagent_type="Git"):
"OPERATION: fetch-issue
ISSUE: {issue number}
Return issue title, body, labels, and any linked error logs."
@@ -48,7 +48,7 @@ Analyze the bug description (from arguments or issue) and identify 3-5 plausible
Spawn one Explore agent per hypothesis in a **single message** (parallel execution):
```
-Task(subagent_type="Explore"):
+Agent(subagent_type="Explore"):
"Investigate this bug: {bug_description}
Hypothesis: {hypothesis A description}
@@ -67,7 +67,7 @@ Return a structured report:
- Evidence AGAINST: [list with file:line refs]
- Key finding: {one-sentence summary}"
-Task(subagent_type="Explore"):
+Agent(subagent_type="Explore"):
"Investigate this bug: {bug_description}
Hypothesis: {hypothesis B description}
@@ -75,7 +75,7 @@ Focus area: {specific code area, mechanism, or condition}
[same steps and return format]"
-Task(subagent_type="Explore"):
+Agent(subagent_type="Explore"):
"Investigate this bug: {bug_description}
Hypothesis: {hypothesis C description}
@@ -91,7 +91,7 @@ Focus area: {specific code area, mechanism, or condition}
Once all investigators return, spawn a Synthesizer agent to aggregate findings:
```
-Task(subagent_type="Synthesizer"):
+Agent(subagent_type="Synthesizer"):
"You are a root cause analyst. Synthesize these investigation reports:
{paste all investigator reports}
diff --git a/plugins/devflow-implement/commands/implement-teams.md b/plugins/devflow-implement/commands/implement-teams.md
index 1a394a07..cc675fbb 100644
--- a/plugins/devflow-implement/commands/implement-teams.md
+++ b/plugins/devflow-implement/commands/implement-teams.md
@@ -30,7 +30,7 @@ Record the current branch name as `BASE_BRANCH` - this will be the PR target.
Spawn Git agent to set up task environment. The Git agent derives the branch name automatically from the issue or task description:
```
-Task(subagent_type="Git"):
+Agent(subagent_type="Git"):
"OPERATION: setup-task
BASE_BRANCH: {current branch name}
ISSUE_INPUT: {issue number if $ARGUMENTS starts with #, otherwise omit}
@@ -51,7 +51,7 @@ Return the branch setup summary."
Spawn Skimmer agent for codebase overview:
```
-Task(subagent_type="Skimmer"):
+Agent(subagent_type="Skimmer"):
"Orient in codebase for: {task description}
Run rskim on source directories (NOT repo root) to identify relevant files, functions, integration points"
```
@@ -156,7 +156,7 @@ Step 3: GATE — Verify TeamDelete succeeded
You MUST spawn the Synthesizer agent.
```
-Task(subagent_type="Synthesizer"):
+Agent(subagent_type="Synthesizer"):
"Synthesize EXPLORATION outputs for: {task}
Mode: exploration
Explorer consensus: {team exploration consensus output}
@@ -252,7 +252,7 @@ Step 3: GATE — Verify TeamDelete succeeded
You MUST spawn the Synthesizer agent.
```
-Task(subagent_type="Synthesizer"):
+Agent(subagent_type="Synthesizer"):
"Synthesize PLANNING outputs for: {task}
Mode: planning
Planner consensus: {team planning consensus output}
@@ -282,7 +282,7 @@ Based on Phase 6 synthesis, use the three-strategy framework:
**SINGLE_CODER** (default):
```
-Task(subagent_type="Coder"):
+Agent(subagent_type="Coder"):
"TASK_ID: {task-id}
TASK_DESCRIPTION: {description}
BASE_BRANCH: {base branch}
@@ -300,7 +300,7 @@ Spawn Coders one at a time, passing handoff summaries between phases:
**Phase 1 Coder:**
```
-Task(subagent_type="Coder"):
+Agent(subagent_type="Coder"):
"TASK_ID: {task-id}
TASK_DESCRIPTION: {phase 1 description}
BASE_BRANCH: {base branch}
@@ -313,7 +313,7 @@ HANDOFF_REQUIRED: true"
**Phase 2+ Coders** (after prior phase completes):
```
-Task(subagent_type="Coder"):
+Agent(subagent_type="Coder"):
"TASK_ID: {task-id}
TASK_DESCRIPTION: {phase N description}
BASE_BRANCH: {base branch}
@@ -335,7 +335,7 @@ HANDOFF_REQUIRED: {true if not last phase}"
Spawn multiple Coders **in a single message**, each with independent subtask:
```
-Task(subagent_type="Coder"): # Coder 1
+Agent(subagent_type="Coder"): # Coder 1
"TASK_ID: {task-id}-part1
TASK_DESCRIPTION: {independent subtask 1}
BASE_BRANCH: {base branch}
@@ -344,7 +344,7 @@ PATTERNS: {patterns}
CREATE_PR: false
DOMAIN: {subtask 1 domain}"
-Task(subagent_type="Coder"): # Coder 2 (same message)
+Agent(subagent_type="Coder"): # Coder 2 (same message)
"TASK_ID: {task-id}-part2
TASK_DESCRIPTION: {independent subtask 2}
BASE_BRANCH: {base branch}
@@ -365,7 +365,7 @@ DOMAIN: {subtask 2 domain}"
After Coder completes, spawn Validator to verify correctness:
```
-Task(subagent_type="Validator", model="haiku"):
+Agent(subagent_type="Validator", model="haiku"):
"FILES_CHANGED: {list of files from Coder output}
VALIDATION_SCOPE: full
Run build, typecheck, lint, test. Report pass/fail with failure details."
@@ -377,7 +377,7 @@ Run build, typecheck, lint, test. Report pass/fail with failure details."
3. If `validation_retry_count <= 2`:
- Spawn Coder with fix context:
```
- Task(subagent_type="Coder"):
+ Agent(subagent_type="Coder"):
"TASK_ID: {task-id}
TASK_DESCRIPTION: Fix validation failures
OPERATION: validation-fix
@@ -395,7 +395,7 @@ Run build, typecheck, lint, test. Report pass/fail with failure details."
After validation passes, spawn Simplifier to polish the code:
```
-Task(subagent_type="Simplifier"):
+Agent(subagent_type="Simplifier"):
"Simplify recently implemented code
Task: {task description}
FILES_CHANGED: {list of files from Coder output}
@@ -407,7 +407,7 @@ Focus on code modified by Coder, apply project standards, enhance clarity"
After Simplifier completes, spawn Scrutinizer as final quality gate:
```
-Task(subagent_type="Scrutinizer"):
+Agent(subagent_type="Scrutinizer"):
"TASK_DESCRIPTION: {task description}
FILES_CHANGED: {list of files from Coder output}
Evaluate 9 pillars, fix P0/P1 issues, report status"
@@ -420,7 +420,7 @@ If Scrutinizer returns BLOCKED, report to user and halt.
If Scrutinizer made code changes (status: FIXED), spawn Validator to verify:
```
-Task(subagent_type="Validator", model="haiku"):
+Agent(subagent_type="Validator", model="haiku"):
"FILES_CHANGED: {files modified by Scrutinizer}
VALIDATION_SCOPE: changed-only
Verify Scrutinizer's fixes didn't break anything."
@@ -503,7 +503,7 @@ Step 3: GATE — Verify TeamDelete succeeded
3. If `alignment_fix_count <= 2`:
- Spawn Coder to fix misalignments:
```
- Task(subagent_type="Coder"):
+ Agent(subagent_type="Coder"):
"TASK_ID: {task-id}
TASK_DESCRIPTION: Fix alignment issues
OPERATION: alignment-fix
@@ -513,7 +513,7 @@ Step 3: GATE — Verify TeamDelete succeeded
```
- Spawn Validator to verify fix didn't break tests:
```
- Task(subagent_type="Validator", model="haiku"):
+ Agent(subagent_type="Validator", model="haiku"):
"FILES_CHANGED: {files modified by fix Coder}
VALIDATION_SCOPE: changed-only"
```
@@ -526,7 +526,7 @@ Step 3: GATE — Verify TeamDelete succeeded
After Evaluator passes, spawn Tester for scenario-based acceptance testing (standalone agent, not a teammate — testing is sequential, not debate):
```
-Task(subagent_type="Tester"):
+Agent(subagent_type="Tester"):
"ORIGINAL_REQUEST: {task description or issue content}
EXECUTION_PLAN: {synthesized plan from Phase 6}
FILES_CHANGED: {list of files from Coder output}
@@ -542,7 +542,7 @@ Design and execute scenario-based acceptance tests. Report PASS or FAIL with evi
3. If `qa_retry_count <= 2`:
- Spawn Coder to fix QA failures:
```
- Task(subagent_type="Coder"):
+ Agent(subagent_type="Coder"):
"TASK_ID: {task-id}
TASK_DESCRIPTION: Fix QA test failures
OPERATION: qa-fix
@@ -552,7 +552,7 @@ Design and execute scenario-based acceptance tests. Report PASS or FAIL with evi
```
- Spawn Validator to verify fix didn't break tests:
```
- Task(subagent_type="Validator", model="haiku"):
+ Agent(subagent_type="Validator", model="haiku"):
"FILES_CHANGED: {files modified by fix Coder}
VALIDATION_SCOPE: changed-only"
```
@@ -652,7 +652,7 @@ If the Coder's report includes Key Decisions with architectural significance:
6. **Clean handoffs** - Each phase passes structured data to next; sequential Coders pass implementation summaries
7. **Honest reporting** - Display agent outputs directly
8. **Simplification pass** - Code refined for clarity before PR
-9. **Strict delegation** - Never perform agent work in main session. "Spawn X" means call Task tool with X, not do X's work yourself
+9. **Strict delegation** - Never perform agent work in main session. "Spawn X" means call Agent tool with X, not do X's work yourself
10. **Validator owns validation** - Never run `npm test`, `npm run build`, or similar in main session; always delegate to Validator agent
11. **Coder owns fixes** - Never implement fixes in main session; spawn Coder for validation failures and alignment fixes
12. **Loop limits** - Max 2 validation retries, max 2 alignment fix iterations before escalating to user
diff --git a/plugins/devflow-implement/commands/implement.md b/plugins/devflow-implement/commands/implement.md
index 79a1634f..22b64d01 100644
--- a/plugins/devflow-implement/commands/implement.md
+++ b/plugins/devflow-implement/commands/implement.md
@@ -30,7 +30,7 @@ Record the current branch name as `BASE_BRANCH` - this will be the PR target.
Spawn Git agent to set up task environment. The Git agent derives the branch name automatically from the issue or task description:
```
-Task(subagent_type="Git"):
+Agent(subagent_type="Git"):
"OPERATION: setup-task
BASE_BRANCH: {current branch name}
ISSUE_INPUT: {issue number if $ARGUMENTS starts with #, otherwise omit}
@@ -51,7 +51,7 @@ Return the branch setup summary."
Spawn Skimmer agent for codebase overview:
```
-Task(subagent_type="Skimmer"):
+Agent(subagent_type="Skimmer"):
"Orient in codebase for: {task description}
Run rskim on source directories (NOT repo root) to identify relevant files, functions, integration points"
```
@@ -77,7 +77,7 @@ Track success/failure of each explorer for synthesis context.
You MUST spawn the Synthesizer agent - "spawn Synthesizer" means delegate to the agent, not do the work yourself.
```
-Task(subagent_type="Synthesizer"):
+Agent(subagent_type="Synthesizer"):
"Synthesize EXPLORATION outputs for: {task}
Mode: exploration
Explorer outputs: {all 4 outputs}
@@ -117,7 +117,7 @@ Spawn 3 Plan agents **in a single message**, each with exploration synthesis:
You MUST spawn the Synthesizer agent - "spawn Synthesizer" means delegate to the agent, not do the work yourself.
```
-Task(subagent_type="Synthesizer"):
+Agent(subagent_type="Synthesizer"):
"Synthesize PLANNING outputs for: {task}
Mode: planning
Planner outputs: {all 3 outputs}
@@ -147,7 +147,7 @@ Based on Phase 6 synthesis, use the three-strategy framework:
**SINGLE_CODER** (default):
```
-Task(subagent_type="Coder"):
+Agent(subagent_type="Coder"):
"TASK_ID: {task-id}
TASK_DESCRIPTION: {description}
BASE_BRANCH: {base branch}
@@ -165,7 +165,7 @@ Spawn Coders one at a time, passing handoff summaries between phases:
**Phase 1 Coder:**
```
-Task(subagent_type="Coder"):
+Agent(subagent_type="Coder"):
"TASK_ID: {task-id}
TASK_DESCRIPTION: {phase 1 description}
BASE_BRANCH: {base branch}
@@ -178,7 +178,7 @@ HANDOFF_REQUIRED: true"
**Phase 2+ Coders** (after prior phase completes):
```
-Task(subagent_type="Coder"):
+Agent(subagent_type="Coder"):
"TASK_ID: {task-id}
TASK_DESCRIPTION: {phase N description}
BASE_BRANCH: {base branch}
@@ -200,7 +200,7 @@ HANDOFF_REQUIRED: {true if not last phase}"
Spawn multiple Coders **in a single message**, each with independent subtask:
```
-Task(subagent_type="Coder"): # Coder 1
+Agent(subagent_type="Coder"): # Coder 1
"TASK_ID: {task-id}-part1
TASK_DESCRIPTION: {independent subtask 1}
BASE_BRANCH: {base branch}
@@ -209,7 +209,7 @@ PATTERNS: {patterns}
CREATE_PR: false
DOMAIN: {subtask 1 domain}"
-Task(subagent_type="Coder"): # Coder 2 (same message)
+Agent(subagent_type="Coder"): # Coder 2 (same message)
"TASK_ID: {task-id}-part2
TASK_DESCRIPTION: {independent subtask 2}
BASE_BRANCH: {base branch}
@@ -230,7 +230,7 @@ DOMAIN: {subtask 2 domain}"
After Coder completes, spawn Validator to verify correctness:
```
-Task(subagent_type="Validator", model="haiku"):
+Agent(subagent_type="Validator", model="haiku"):
"FILES_CHANGED: {list of files from Coder output}
VALIDATION_SCOPE: full
Run build, typecheck, lint, test. Report pass/fail with failure details."
@@ -242,7 +242,7 @@ Run build, typecheck, lint, test. Report pass/fail with failure details."
3. If `validation_retry_count <= 2`:
- Spawn Coder with fix context:
```
- Task(subagent_type="Coder"):
+ Agent(subagent_type="Coder"):
"TASK_ID: {task-id}
TASK_DESCRIPTION: Fix validation failures
OPERATION: validation-fix
@@ -260,7 +260,7 @@ Run build, typecheck, lint, test. Report pass/fail with failure details."
After validation passes, spawn Simplifier to polish the code:
```
-Task(subagent_type="Simplifier"):
+Agent(subagent_type="Simplifier"):
"Simplify recently implemented code
Task: {task description}
FILES_CHANGED: {list of files from Coder output}
@@ -272,7 +272,7 @@ Focus on code modified by Coder, apply project standards, enhance clarity"
After Simplifier completes, spawn Scrutinizer as final quality gate:
```
-Task(subagent_type="Scrutinizer"):
+Agent(subagent_type="Scrutinizer"):
"TASK_DESCRIPTION: {task description}
FILES_CHANGED: {list of files from Coder output}
Evaluate 9 pillars, fix P0/P1 issues, report status"
@@ -285,7 +285,7 @@ If Scrutinizer returns BLOCKED, report to user and halt.
If Scrutinizer made code changes (status: FIXED), spawn Validator to verify:
```
-Task(subagent_type="Validator", model="haiku"):
+Agent(subagent_type="Validator", model="haiku"):
"FILES_CHANGED: {files modified by Scrutinizer}
VALIDATION_SCOPE: changed-only
Verify Scrutinizer's fixes didn't break anything."
@@ -300,7 +300,7 @@ Verify Scrutinizer's fixes didn't break anything."
After Scrutinizer passes (and re-validation if needed), spawn Evaluator to validate alignment:
```
-Task(subagent_type="Evaluator"):
+Agent(subagent_type="Evaluator"):
"ORIGINAL_REQUEST: {task description or issue content}
EXECUTION_PLAN: {synthesized plan from Phase 6}
FILES_CHANGED: {list of files from Coder output}
@@ -316,7 +316,7 @@ Validate alignment with request and plan. Report ALIGNED or MISALIGNED with deta
3. If `alignment_fix_count <= 2`:
- Spawn Coder to fix misalignments:
```
- Task(subagent_type="Coder"):
+ Agent(subagent_type="Coder"):
"TASK_ID: {task-id}
TASK_DESCRIPTION: Fix alignment issues
OPERATION: alignment-fix
@@ -326,7 +326,7 @@ Validate alignment with request and plan. Report ALIGNED or MISALIGNED with deta
```
- Spawn Validator to verify fix didn't break tests:
```
- Task(subagent_type="Validator", model="haiku"):
+ Agent(subagent_type="Validator", model="haiku"):
"FILES_CHANGED: {files modified by fix Coder}
VALIDATION_SCOPE: changed-only"
```
@@ -339,7 +339,7 @@ Validate alignment with request and plan. Report ALIGNED or MISALIGNED with deta
After Evaluator passes, spawn Tester for scenario-based acceptance testing:
```
-Task(subagent_type="Tester"):
+Agent(subagent_type="Tester"):
"ORIGINAL_REQUEST: {task description or issue content}
EXECUTION_PLAN: {synthesized plan from Phase 6}
FILES_CHANGED: {list of files from Coder output}
@@ -355,7 +355,7 @@ Design and execute scenario-based acceptance tests. Report PASS or FAIL with evi
3. If `qa_retry_count <= 2`:
- Spawn Coder to fix QA failures:
```
- Task(subagent_type="Coder"):
+ Agent(subagent_type="Coder"):
"TASK_ID: {task-id}
TASK_DESCRIPTION: Fix QA test failures
OPERATION: qa-fix
@@ -365,7 +365,7 @@ Design and execute scenario-based acceptance tests. Report PASS or FAIL with evi
```
- Spawn Validator to verify fix didn't break tests:
```
- Task(subagent_type="Validator", model="haiku"):
+ Agent(subagent_type="Validator", model="haiku"):
"FILES_CHANGED: {files modified by fix Coder}
VALIDATION_SCOPE: changed-only"
```
@@ -463,7 +463,7 @@ If the Coder's report includes Key Decisions with architectural significance:
5. **Clean handoffs** - Each phase passes structured data to next; sequential Coders pass implementation summaries
6. **Honest reporting** - Display agent outputs directly
7. **Simplification pass** - Code refined for clarity before PR
-8. **Strict delegation** - Never perform agent work in main session. "Spawn X" means call Task tool with X, not do X's work yourself
+8. **Strict delegation** - Never perform agent work in main session. "Spawn X" means call Agent tool with X, not do X's work yourself
9. **Validator owns validation** - Never run `npm test`, `npm run build`, or similar in main session; always delegate to Validator agent
10. **Coder owns fixes** - Never implement fixes in main session; spawn Coder for validation failures and alignment fixes
11. **Loop limits** - Max 2 validation retries, max 2 alignment fix iterations before escalating to user
diff --git a/plugins/devflow-resolve/commands/resolve-teams.md b/plugins/devflow-resolve/commands/resolve-teams.md
index c077c735..ba1ddb8d 100644
--- a/plugins/devflow-resolve/commands/resolve-teams.md
+++ b/plugins/devflow-resolve/commands/resolve-teams.md
@@ -35,7 +35,7 @@ Process issues from code review reports: validate them (false positive check), a
For each resolvable worktree, spawn Git agent:
```
-Task(subagent_type="Git", run_in_background=false):
+Agent(subagent_type="Git", run_in_background=false):
"OPERATION: validate-branch
WORKTREE_PATH: {worktree_path} (omit if cwd)
Check feature branch, clean working directory, reviews exist.
@@ -195,7 +195,7 @@ For each issue deferred as TECH_DEBT:
If any fixes were made, spawn Simplifier agent to refine the changed code:
```
-Task(subagent_type="Simplifier", run_in_background=false):
+Agent(subagent_type="Simplifier", run_in_background=false):
"TASK_DESCRIPTION: Issue resolution fixes
WORKTREE_PATH: {worktree_path} (omit if cwd)
FILES_CHANGED: {list of files modified by Resolvers}
@@ -209,7 +209,7 @@ Simplify and refine the fixes for clarity and consistency"
If any issues were deferred, spawn Git agent:
```
-Task(subagent_type="Git"):
+Agent(subagent_type="Git"):
"OPERATION: manage-debt
WORKTREE_PATH: {worktree_path} (omit if cwd)
REVIEW_DIR: {TARGET_DIR}
diff --git a/plugins/devflow-resolve/commands/resolve.md b/plugins/devflow-resolve/commands/resolve.md
index dde82592..666e5606 100644
--- a/plugins/devflow-resolve/commands/resolve.md
+++ b/plugins/devflow-resolve/commands/resolve.md
@@ -35,7 +35,7 @@ Process issues from code review reports: validate them (false positive check), a
For each resolvable worktree, spawn Git agent:
```
-Task(subagent_type="Git", run_in_background=false):
+Agent(subagent_type="Git", run_in_background=false):
"OPERATION: validate-branch
WORKTREE_PATH: {worktree_path} (omit if cwd)
Check feature branch, clean working directory, reviews exist.
@@ -104,7 +104,7 @@ Create execution plan:
Spawn Resolver agents based on dependency analysis. For independent batches, spawn **in a single message**:
```
-Task(subagent_type="Resolver"):
+Agent(subagent_type="Resolver"):
"ISSUES: [{issue1}, {issue2}, ...]
BRANCH: {branch-slug}
BATCH_ID: batch-{n}
@@ -141,7 +141,7 @@ For each issue deferred as TECH_DEBT:
If any fixes were made, spawn Simplifier agent to refine the changed code:
```
-Task(subagent_type="Simplifier", run_in_background=false):
+Agent(subagent_type="Simplifier", run_in_background=false):
"TASK_DESCRIPTION: Issue resolution fixes
WORKTREE_PATH: {worktree_path} (omit if cwd)
FILES_CHANGED: {list of files modified by Resolvers}
@@ -155,7 +155,7 @@ Simplify and refine the fixes for clarity and consistency"
If any issues were deferred, spawn Git agent:
```
-Task(subagent_type="Git"):
+Agent(subagent_type="Git"):
"OPERATION: manage-debt
WORKTREE_PATH: {worktree_path} (omit if cwd)
REVIEW_DIR: {TARGET_DIR}
diff --git a/plugins/devflow-self-review/commands/self-review.md b/plugins/devflow-self-review/commands/self-review.md
index de4ba583..29a306c7 100644
--- a/plugins/devflow-self-review/commands/self-review.md
+++ b/plugins/devflow-self-review/commands/self-review.md
@@ -29,7 +29,7 @@ Detect changed files and build context:
Spawn Simplifier agent to refine code for clarity and consistency:
-Task(subagent_type="Simplifier", run_in_background=false):
+Agent(subagent_type="Simplifier", run_in_background=false):
"TASK_DESCRIPTION: {task_description}
FILES_CHANGED: {files_changed}
KNOWLEDGE_CONTEXT: {knowledge_context or 'None'}
@@ -42,7 +42,7 @@ If knowledge context is provided, verify no known pitfall patterns are being rei
Spawn Scrutinizer agent for quality evaluation and fixing:
-Task(subagent_type="Scrutinizer", run_in_background=false):
+Agent(subagent_type="Scrutinizer", run_in_background=false):
"TASK_DESCRIPTION: {task_description}
FILES_CHANGED: {files_changed}
KNOWLEDGE_CONTEXT: {knowledge_context or 'None'}
@@ -55,7 +55,7 @@ If knowledge context is provided, check whether any known pitfall patterns are b
If Scrutinizer made changes (STATUS == FIXED):
-Task(subagent_type="Validator", run_in_background=false):
+Agent(subagent_type="Validator", run_in_background=false):
"FILES_CHANGED: {scrutinizer_modified_files}
VALIDATION_SCOPE: changed-only
Run build, typecheck, lint, test on modified files"
diff --git a/plugins/devflow-specify/commands/specify-teams.md b/plugins/devflow-specify/commands/specify-teams.md
index 0b03b8ed..7638999f 100644
--- a/plugins/devflow-specify/commands/specify-teams.md
+++ b/plugins/devflow-specify/commands/specify-teams.md
@@ -51,7 +51,7 @@ Use AskUserQuestion to confirm understanding before spawning any agents.
Spawn Skimmer agent for codebase context:
```
-Task(subagent_type="Skimmer"):
+Agent(subagent_type="Skimmer"):
"Orient in codebase for requirements exploration: {feature}
Run rskim on source directories (NOT repo root) to find: project structure, similar features, patterns, integration points
Return: codebase context for requirements (not implementation details)"
@@ -148,7 +148,7 @@ Step 3: GATE — Verify TeamDelete succeeded
**WAIT** for Phase 4, then spawn Synthesizer:
```
-Task(subagent_type="Synthesizer"):
+Agent(subagent_type="Synthesizer"):
"Synthesize EXPLORATION outputs for: {feature}
Mode: exploration
Explorer consensus: {team exploration consensus output}
@@ -230,7 +230,7 @@ Step 3: GATE — Verify TeamDelete succeeded
**WAIT** for Phase 6, then spawn Synthesizer:
```
-Task(subagent_type="Synthesizer"):
+Agent(subagent_type="Synthesizer"):
"Synthesize PLANNING outputs for: {feature}
Mode: planning
Planner consensus: {team planning consensus output}
diff --git a/plugins/devflow-specify/commands/specify.md b/plugins/devflow-specify/commands/specify.md
index 5154e4cf..83568845 100644
--- a/plugins/devflow-specify/commands/specify.md
+++ b/plugins/devflow-specify/commands/specify.md
@@ -51,7 +51,7 @@ Use AskUserQuestion to confirm understanding before spawning any agents.
Spawn Skimmer agent for codebase context:
```
-Task(subagent_type="Skimmer"):
+Agent(subagent_type="Skimmer"):
"Orient in codebase for requirements exploration: {feature}
Run rskim on source directories (NOT repo root) to find: project structure, similar features, patterns, integration points
Return: codebase context for requirements (not implementation details)"
@@ -77,7 +77,7 @@ Spawn 4 Explore agents **in a single message**, each with Skimmer context and pr
**WAIT** for Phase 4, then spawn Synthesizer:
```
-Task(subagent_type="Synthesizer"):
+Agent(subagent_type="Synthesizer"):
"Synthesize EXPLORATION outputs for: {feature}
Mode: exploration
Combine into: user needs, similar features, constraints, failure modes"
@@ -98,7 +98,7 @@ Spawn 3 Plan agents **in a single message**, each with exploration synthesis:
**WAIT** for Phase 6, then spawn Synthesizer:
```
-Task(subagent_type="Synthesizer"):
+Agent(subagent_type="Synthesizer"):
"Synthesize PLANNING outputs for: {feature}
Mode: planning
Combine into: user stories, scope breakdown, acceptance criteria, open questions"
diff --git a/scripts/hooks/preamble b/scripts/hooks/preamble
index a5a9ae07..1b58f7c1 100755
--- a/scripts/hooks/preamble
+++ b/scripts/hooks/preamble
@@ -31,13 +31,8 @@ if [ "$WORD_COUNT" -lt 2 ]; then
exit 0
fi
-# Detection-only preamble — classification rules and router skill reference.
-# Skill mappings live in devflow:router SKILL.md, not here.
+# Minimal preamble — classification rules injected at SessionStart, not here.
# SYNC: must match tests/ambient.test.ts preamble drift detection
-PREAMBLE="AMBIENT MODE ENABLED: Classify user intent and depth.
-Intents: CHAT (greetings/confirmations), EXPLORE (find/explain/analyze/trace/map), PLAN (plan/design/architecture), IMPLEMENT (add/create/build/implement), REVIEW (check/review), RESOLVE (resolve review issues), DEBUG (fix/bug/error), PIPELINE (end-to-end).
-Depth: QUICK (chat, simple lookups, git ops, config, rename/comment tweaks, 1-2 line edits) | GUIDED (code changes ≤2 files, clear bugs, focused reviews, focused exploration, focused design/plan) | ORCHESTRATED (>2 files, multi-module, vague bugs, full/branch/PR reviews, deep exploration, system-level design, RESOLVE and PIPELINE always).
-QUICK: respond normally. No classification, no skills.
-GUIDED/ORCHESTRATED: Load devflow:router skill FIRST via Skill tool for skill mappings. Then load all skills it specifies. State: Devflow: INTENT/DEPTH. Loading: [skills]."
+PREAMBLE="Classify this request's intent and depth, then load devflow:router via Skill tool."
json_prompt_output "$PREAMBLE"
diff --git a/scripts/hooks/session-start-classification b/scripts/hooks/session-start-classification
new file mode 100755
index 00000000..f498f606
--- /dev/null
+++ b/scripts/hooks/session-start-classification
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+# SessionStart Classification Hook
+# Reads classification-rules.md and injects as additionalContext.
+# Single source of truth for ambient classification rules.
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+source "$SCRIPT_DIR/json-parse"
+if [ "$_JSON_AVAILABLE" = "false" ]; then exit 0; fi
+
+INPUT=$(cat)
+
+CWD=$(echo "$INPUT" | json_field "cwd" "")
+if [ -z "$CWD" ]; then exit 0; fi
+
+CLASSIFICATION_RULES="$HOME/.claude/skills/devflow:router/references/classification-rules.md"
+if [ -f "$CLASSIFICATION_RULES" ]; then
+ CONTEXT=$(cat "$CLASSIFICATION_RULES")
+elif [ -f "$HOME/.claude/skills/devflow:router/SKILL.md" ]; then
+ # Fallback for upgrade window: old install without classification-rules.md
+ CONTEXT=$(awk '/^---$/{n++; next} n>=2' "$HOME/.claude/skills/devflow:router/SKILL.md")
+else
+ exit 0
+fi
+
+if [ "${#CONTEXT}" -gt 4096 ]; then exit 0; fi
+
+json_session_output "$CONTEXT"
diff --git a/shared/skills/debug:orch/SKILL.md b/shared/skills/debug:orch/SKILL.md
index 21e57372..e0b8055f 100644
--- a/shared/skills/debug:orch/SKILL.md
+++ b/shared/skills/debug:orch/SKILL.md
@@ -2,7 +2,6 @@
name: debug:orch
description: Agent orchestration for DEBUG intent — hypothesis investigation, root cause analysis, optional fix
user-invocable: false
-allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion
---
# Debug Orchestration
@@ -37,7 +36,7 @@ If fewer than 3 hypotheses are possible, proceed with 2.
## Phase 2: Investigate (Parallel)
-Spawn one `Task(subagent_type="Explore")` per hypothesis **in a single message** (parallel execution):
+Spawn one `Agent(subagent_type="Explore")` per hypothesis **in a single message** (parallel execution):
- Each investigator searches for evidence FOR and AGAINST its hypothesis
- Must provide file:line references for all evidence
@@ -47,7 +46,7 @@ Spawn one `Task(subagent_type="Explore")` per hypothesis **in a single message**
Evaluate investigation results:
-- **One CONFIRMED**: Spawn 1-2 additional `Task(subagent_type="Explore")` agents to validate from different angles (prevent confirmation bias)
+- **One CONFIRMED**: Spawn 1-2 additional `Agent(subagent_type="Explore")` agents to validate from different angles (prevent confirmation bias)
- **Multiple PARTIAL**: Look for a unifying root cause that explains all partial evidence
- **All DISPROVED**: Report honestly — "No root cause identified from initial hypotheses." Generate 2-3 second-round hypotheses if conversation context suggests avenues not yet explored.
@@ -64,7 +63,7 @@ Present root cause analysis:
Ask user via AskUserQuestion: "Want me to implement this fix?"
-- **YES** → Implement the fix directly in main session using GUIDED approach: load devflow:patterns, devflow:research, and devflow:test-driven-development skills, then code the fix. Spawn `Task(subagent_type="Simplifier")` on changed files after.
+- **YES** → Implement the fix directly in main session using GUIDED approach: load devflow:patterns, devflow:research, and devflow:test-driven-development skills, then code the fix. Spawn `Agent(subagent_type="Simplifier")` on changed files after.
- **NO** → Done. Report stands as documentation.
## Error Handling
diff --git a/shared/skills/explore:orch/SKILL.md b/shared/skills/explore:orch/SKILL.md
index d00fd3f9..cf4003fe 100644
--- a/shared/skills/explore:orch/SKILL.md
+++ b/shared/skills/explore:orch/SKILL.md
@@ -2,7 +2,6 @@
name: explore:orch
description: Agent orchestration for EXPLORE intent — codebase analysis, flow tracing, architecture mapping
user-invocable: false
-allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion
---
# Explore Orchestration
@@ -23,7 +22,7 @@ Agent pipeline for EXPLORE intent in ambient GUIDED and ORCHESTRATED modes. Code
For GUIDED depth, the main session performs exploration directly:
-1. **Spawn Skimmer** — `Task(subagent_type="Skimmer")` targeting the area of interest. Use orientation output to ground exploration in real file structures and patterns.
+1. **Spawn Skimmer** — `Agent(subagent_type="Skimmer")` targeting the area of interest. Use orientation output to ground exploration in real file structures and patterns.
2. **Trace** — Using Skimmer findings, trace the flow or analyze the subsystem directly in main session. Follow call chains, read key files, map integration points.
3. **Present** — Deliver structured findings using the Output format below. Use AskUserQuestion to offer drill-down into specific areas.
@@ -31,7 +30,7 @@ For GUIDED depth, the main session performs exploration directly:
### Phase 1: Orient
-Spawn `Task(subagent_type="Skimmer")` to get codebase overview relevant to the exploration question:
+Spawn `Agent(subagent_type="Skimmer")` to get codebase overview relevant to the exploration question:
- File structure and module boundaries in the target area
- Entry points and key abstractions
@@ -39,7 +38,7 @@ Spawn `Task(subagent_type="Skimmer")` to get codebase overview relevant to the e
### Phase 2: Explore
-Based on Skimmer findings, spawn 2-3 `Task(subagent_type="Explore")` agents **in a single message** (parallel execution):
+Based on Skimmer findings, spawn 2-3 `Agent(subagent_type="Explore")` agents **in a single message** (parallel execution):
- **Flow explorer**: Trace the primary call chain end-to-end — entry point through to side effects
- **Dependency explorer**: Map imports, shared types, module boundaries, and integration points
@@ -49,7 +48,7 @@ Adjust explorer focus based on the specific exploration question.
### Phase 3: Synthesize
-Spawn `Task(subagent_type="Synthesizer")` in `exploration` mode with combined findings:
+Spawn `Agent(subagent_type="Synthesizer")` in `exploration` mode with combined findings:
- Merge overlapping discoveries from parallel explorers
- Resolve any contradictions between explorer findings
diff --git a/shared/skills/implement:orch/SKILL.md b/shared/skills/implement:orch/SKILL.md
index 394ab020..22e9ef39 100644
--- a/shared/skills/implement:orch/SKILL.md
+++ b/shared/skills/implement:orch/SKILL.md
@@ -2,7 +2,6 @@
name: implement:orch
description: Agent orchestration for IMPLEMENT intent — pre-flight, Coder, quality gates
user-invocable: false
-allowed-tools: Read, Grep, Glob, Bash, Task
---
# Implementation Orchestration
@@ -29,7 +28,7 @@ Detect branch type before spawning Coder:
- **Protected branches** (`main`, `master`, `develop`, `integration`, `trunk`, `release/*`, `staging`, `production`): record current branch as `BASE_BRANCH`, then spawn Git agent to auto-create a feature branch:
```
-Task(subagent_type="Git"):
+Agent(subagent_type="Git"):
"OPERATION: setup-task
BASE_BRANCH: {current branch name}
ISSUE_INPUT: {issue number if ticket mentioned in conversation, otherwise omit}
@@ -57,7 +56,7 @@ If the orchestrator receives a `WORKTREE_PATH` context (e.g., from multi-worktre
Record git SHA before first Coder: `git rev-parse HEAD`
-Spawn `Task(subagent_type="Coder")` with input variables:
+Spawn `Agent(subagent_type="Coder")` with input variables:
- **TASK_ID**: Generated from timestamp (e.g., `task-2026-03-19_1430`)
- **TASK_DESCRIPTION**: From conversation context
- **BASE_BRANCH**: Current branch (or newly created branch from Phase 1)
@@ -88,12 +87,12 @@ Pass FILES_CHANGED to all quality gate agents.
Run sequentially — each gate must pass before the next:
-1. `Task(subagent_type="Validator")` (build + typecheck + lint + tests) — retry up to 2× on failure (Coder fixes between retries)
-2. `Task(subagent_type="Simplifier")` — code clarity and maintainability pass on FILES_CHANGED
-3. `Task(subagent_type="Scrutinizer")` — 9-pillar quality evaluation on FILES_CHANGED
-4. `Task(subagent_type="Validator")` (re-validate after Simplifier/Scrutinizer changes)
-5. `Task(subagent_type="Evaluator")` — verify implementation matches original request — retry up to 2× if misalignment found
-6. `Task(subagent_type="Tester")` — scenario-based acceptance testing from user's perspective — retry up to 2× if QA fails
+1. `Agent(subagent_type="Validator")` (build + typecheck + lint + tests) — retry up to 2× on failure (Coder fixes between retries)
+2. `Agent(subagent_type="Simplifier")` — code clarity and maintainability pass on FILES_CHANGED
+3. `Agent(subagent_type="Scrutinizer")` — 9-pillar quality evaluation on FILES_CHANGED
+4. `Agent(subagent_type="Validator")` (re-validate after Simplifier/Scrutinizer changes)
+5. `Agent(subagent_type="Evaluator")` — verify implementation matches original request — retry up to 2× if misalignment found
+6. `Agent(subagent_type="Tester")` — scenario-based acceptance testing from user's perspective — retry up to 2× if QA fails
If any gate exhausts retries, halt pipeline and report what passed and what failed.
diff --git a/shared/skills/pipeline:orch/SKILL.md b/shared/skills/pipeline:orch/SKILL.md
index c54c54db..5fdac854 100644
--- a/shared/skills/pipeline:orch/SKILL.md
+++ b/shared/skills/pipeline:orch/SKILL.md
@@ -1,20 +1,19 @@
---
name: pipeline:orch
-description: End-to-end meta-orchestrator chaining implement → review → resolve with user gates between stages
+description: End-to-end meta-orchestrator chaining implement → review → resolve with status reporting between stages
user-invocable: false
-allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion
---
# Pipeline Orchestration
-Meta-orchestrator chaining implement → review → resolve with user gates between stages. For ambient PIPELINE intent ("implement this end to end", "build and review").
+Meta-orchestrator chaining implement → review → resolve with status reporting between stages. For ambient PIPELINE intent ("implement this end to end", "build and review").
## Iron Law
-> **USER GATES BETWEEN STAGES**
+> **FULL PIPELINE, NO INTERRUPTIONS**
>
-> Never auto-chain from review to resolve without user confirmation.
-> Critical findings require human judgment. Each gate is mandatory.
+> Pipeline runs end-to-end without pausing. Report status between stages
+> but never stop to ask. Each stage auto-proceeds to the next.
---
@@ -31,13 +30,12 @@ If implementation returns **BLOCKED**: halt entire pipeline, report blocker.
Cleanup: delete `.docs/handoff.md` if it exists (no longer needed before review).
-## Phase 2: Gate — Review Decision
+## Phase 2: Status — Review Decision
-Use AskUserQuestion:
-> "Implementation complete ({n} files changed, all quality gates passed). Proceed with multi-agent review? (This spawns 7+ reviewer agents)"
+Log implementation results:
+> "Implementation complete ({n} files changed, all quality gates passed). Proceeding to multi-agent review."
-- **User says NO** → stop pipeline, report implementation results only
-- **User says YES** → continue to Phase 3
+Auto-proceed to Phase 3.
## Phase 3: Review
@@ -45,16 +43,15 @@ Load `devflow:review:orch` via the Skill tool, then execute its full pipeline (P
Report review results (merge recommendation, issue counts).
-## Phase 4: Gate — Resolve Decision
+## Phase 4: Status — Resolve Decision
If **blocking issues found**:
-> Use AskUserQuestion: "Found {n} blocking issues. Auto-resolve? (Spawns resolver agents per batch)"
+> Log: "Found {n} blocking issues. Auto-resolving."
-If **no blocking issues**:
-> "Review clean — no resolution needed." → stop pipeline with success summary
+Auto-proceed to Phase 5.
-- **User says NO** → stop pipeline, report implementation + review results
-- **User says YES** → continue to Phase 5
+If **no blocking issues**:
+> "Review clean — no resolution needed." → skip to Phase 6 with success summary.
## Phase 5: Resolve
@@ -71,7 +68,6 @@ End-to-end report:
## Error Handling
- **Implementation BLOCKED**: Halt at Phase 1, report blocker
-- **User declines gate**: Stop cleanly, report completed stages
- **Review finds no changes**: Skip review, report implementation only
- **All issues resolved**: Report full success
- **Partial resolution**: Report what was fixed and what remains
diff --git a/shared/skills/plan:orch/SKILL.md b/shared/skills/plan:orch/SKILL.md
index 1ae8ae49..0741667c 100644
--- a/shared/skills/plan:orch/SKILL.md
+++ b/shared/skills/plan:orch/SKILL.md
@@ -2,7 +2,6 @@
name: plan:orch
description: Agent orchestration for PLAN intent — codebase orientation, design exploration, gap validation
user-invocable: false
-allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion
---
# Plan Orchestration
@@ -25,7 +24,7 @@ This is a lightweight variant of the Plan phase in `/implement` for ambient ORCH
For GUIDED depth, the main session performs planning directly:
-1. **Spawn Skimmer** — `Task(subagent_type="Skimmer")` targeting the area of interest. Use orientation output to ground design decisions in real file structures and patterns.
+1. **Spawn Skimmer** — `Agent(subagent_type="Skimmer")` targeting the area of interest. Use orientation output to ground design decisions in real file structures and patterns.
2. **Design** — Using Skimmer findings + loaded pattern/design skills, design the approach directly in main session.
3. **Present** — Deliver structured plan using the Output format below. Use AskUserQuestion for ambiguous design choices.
@@ -35,7 +34,7 @@ If the orchestrator receives a `WORKTREE_PATH` context (e.g., from multi-worktre
## Phase 1: Orient
-Spawn `Task(subagent_type="Skimmer")` to get codebase overview relevant to the planning question:
+Spawn `Agent(subagent_type="Skimmer")` to get codebase overview relevant to the planning question:
- Existing patterns and conventions in the affected area
- File structure and module boundaries
@@ -44,7 +43,7 @@ Spawn `Task(subagent_type="Skimmer")` to get codebase overview relevant to the p
## Phase 2: Explore
-Based on Skimmer findings, spawn 2-3 `Task(subagent_type="Explore")` agents **in a single message** (parallel execution):
+Based on Skimmer findings, spawn 2-3 `Agent(subagent_type="Explore")` agents **in a single message** (parallel execution):
- **Integration explorer**: Examine integration points — APIs, shared types, module boundaries the plan must respect
- **Pattern explorer**: Find existing implementations of similar features to follow as templates
@@ -54,7 +53,7 @@ Adjust explorer focus based on the specific planning question.
## Phase 3: Design
-Spawn `Task(subagent_type="Plan")` with combined Skimmer + Explore findings:
+Spawn `Agent(subagent_type="Plan")` with combined Skimmer + Explore findings:
- Design implementation approach with file-level specificity
- Reference existing patterns discovered in Phase 1-2
diff --git a/shared/skills/research/SKILL.md b/shared/skills/research/SKILL.md
index de87a17e..47e4db36 100644
--- a/shared/skills/research/SKILL.md
+++ b/shared/skills/research/SKILL.md
@@ -59,7 +59,7 @@ Delegate research to an Explore subagent to keep main session context clean.
**Spawn an Explore agent** with this prompt template:
```
-Task(subagent_type="Explore"):
+Agent(subagent_type="Explore"):
"Research existing solutions for: {need description}
Search for:
diff --git a/shared/skills/resolve:orch/SKILL.md b/shared/skills/resolve:orch/SKILL.md
index cd963a58..4c42a5ef 100644
--- a/shared/skills/resolve:orch/SKILL.md
+++ b/shared/skills/resolve:orch/SKILL.md
@@ -2,7 +2,6 @@
name: resolve:orch
description: Agent orchestration for RESOLVE intent in ambient mode — issue resolution from review reports
user-invocable: false
-allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion
---
# Resolve Orchestration
@@ -52,7 +51,7 @@ Determine execution: batches with no shared files can run in parallel.
## Phase 4: Resolve (Parallel)
-Spawn `Task(subagent_type="Resolver")` agents — one per batch, parallel where possible.
+Spawn `Agent(subagent_type="Resolver")` agents — one per batch, parallel where possible.
Each receives:
- **ISSUES**: Array of issues in the batch
@@ -69,7 +68,7 @@ Resolvers follow a 3-tier risk approach:
Aggregate results from all Resolver agents:
- Count: fixed, false positives, deferred
-Spawn `Task(subagent_type="Simplifier")` on all files modified by Resolvers.
+Spawn `Agent(subagent_type="Simplifier")` on all files modified by Resolvers.
## Phase 6: Report
diff --git a/shared/skills/review:orch/SKILL.md b/shared/skills/review:orch/SKILL.md
index 1068944c..8292175b 100644
--- a/shared/skills/review:orch/SKILL.md
+++ b/shared/skills/review:orch/SKILL.md
@@ -2,7 +2,6 @@
name: review:orch
description: Agent orchestration for REVIEW intent in ambient ORCHESTRATED mode — multi-agent code review with parallel reviewers
user-invocable: false
-allowed-tools: Read, Grep, Glob, Bash, Task, AskUserQuestion
---
# Review Orchestration
@@ -22,7 +21,7 @@ This is a lightweight variant of `/code-review` for ambient ORCHESTRATED mode. E
## Phase 1: Pre-flight
-Spawn `Task(subagent_type="Git")` with action `ensure-pr-ready`:
+Spawn `Agent(subagent_type="Git")` with action `ensure-pr-ready`:
- Extract: branch, base_branch, branch_slug, pr_number
- If BLOCKED (detached HEAD, no commits ahead of base): halt with message
@@ -78,8 +77,8 @@ Each reviewer receives:
After all reviewers complete, spawn in parallel:
-1. `Task(subagent_type="Git")` with action `comment-pr` — post review summary as PR comment (deduplicate: check existing comments first)
-2. `Task(subagent_type="Synthesizer")` in review mode — reads all `{focus}.md` files from disk, writes `review-summary.md`
+1. `Agent(subagent_type="Git")` with action `comment-pr` — post review summary as PR comment (deduplicate: check existing comments first)
+2. `Agent(subagent_type="Synthesizer")` in review mode — reads all `{focus}.md` files from disk, writes `review-summary.md`
## Phase 6: Finalize
diff --git a/shared/skills/router/SKILL.md b/shared/skills/router/SKILL.md
index 8bb04d54..86300862 100644
--- a/shared/skills/router/SKILL.md
+++ b/shared/skills/router/SKILL.md
@@ -1,146 +1,51 @@
---
name: router
-description: This skill should be used when classifying user intent for Devflow mode, auto-loading relevant skills without explicit command invocation. Used by the always-on UserPromptSubmit hook.
+description: This skill should be used after ambient classification to load the relevant skills for the classified intent and depth. Maps GUIDED and ORCHESTRATED classifications to domain and orchestration skills.
user-invocable: false
-# No allowed-tools: orchestrator requires unrestricted access (Skill, Agent, Edit, Write, Bash)
---
# Router
-Classify user intent and auto-load relevant skills. Zero overhead for simple requests, skill loading + optional agent orchestration for substantive work.
-
-**Note:** The UserPromptSubmit hook injects a detection-only preamble (classification rules only). This SKILL.md contains the full skill mappings — load it via Skill tool for complete routing logic.
-
-## Iron Law
-
-> **PROPORTIONAL RESPONSE MATCHED TO SCOPE**
->
-> QUICK gets zero overhead. GUIDED gets skill loading + main session implementation
-> with Simplifier cleanup. ORCHESTRATED gets full skill loading via the Skill tool plus
-> agent pipeline execution. Misclassification in either direction is a failure —
-> false-positive ORCHESTRATED is expensive (5-6 agent spawns), false-negative
-> GUIDED leaves quality on the table.
-
----
-
-## Step 1: Classify Intent
-
-Determine what the user is trying to do from their prompt.
-
-| Intent | Signal Words / Patterns |
-|--------|------------------------|
-| **CHAT** | greetings, meta-questions, confirmations, short responses |
-| **EXPLORE** | "what is", "where is", "find", "show me", "explain", "how does" |
-| **PLAN** | "how should", "design", "architecture", "approach", "strategy" |
-| **IMPLEMENT** | "add", "create", "implement", "build", "write", "make" |
-| **REVIEW** | "check", "look at", "review", "is this ok", "any issues" |
-| **RESOLVE** | "resolve", "fix review issues", "address feedback", "fix findings" |
-| **DEBUG** | "fix", "bug", "broken", "failing", "error", "why does" |
-| **PIPELINE** | "end to end", "implement and review", "build and review", "full pipeline" |
-
-**Ambiguous prompts:** "Update the README" → QUICK. Git operations like "commit this" → QUICK. Code-change prompts without clear scope → GUIDED (not QUICK).
-
-## Step 2: Classify Depth
-
-Determine how much enforcement the prompt warrants.
-
-| Depth | Criteria | Action |
-|-------|----------|--------|
-| **QUICK** | CHAT intent. EXPLORE simple lookups ("where is X?"). Git/devops operations (commit, push, merge, branch, pr, deploy, reinstall). Single-word continuations. Rename/comment tweaks, config changes. 1-2 line edits. | Respond normally. Zero overhead. Do not state classification. |
-| **GUIDED** | IMPLEMENT with small scope (≤2 files, single module). DEBUG with clear error location (stack trace, specific file, known function). PLAN for focused design questions (specific area/pattern). REVIEW (small scope — see below). | Load skills via Skill tool. Main session implements directly. Spawn Simplifier after code changes. State classification. |
-| **ORCHESTRATED** | IMPLEMENT with larger scope (>2 files, multi-module, complex). DEBUG with vague/cross-cutting bug (no clear location, multiple possible causes). PLAN for system-level architecture (caching layer, auth system, multi-module design). REVIEW (large scope — see below). RESOLVE (always). PIPELINE (always). | Load skills via Skill tool, then orchestrate agents. State classification. |
-
-**Scope-based decision criteria:**
-
-| Intent | GUIDED (small scope) | ORCHESTRATED (large scope) |
-|--------|---------------------|---------------------------|
-| **IMPLEMENT** | ≤2 files, single module, clear task | >2 files, multi-module, complex |
-| **DEBUG** | Clear error with known location (stack trace, specific file) | Vague/cross-cutting bug, multiple possible causes |
-| **PLAN** | Focused question about specific area/pattern | System-level architecture, multi-module design |
-| **EXPLORE** | Focused flow/module analysis, single subsystem | Multi-system architecture mapping, cross-cutting analysis |
-| **REVIEW** | Continuation: match prior IMPLEMENT depth. Standalone: "check this"/"review this file" → GUIDED | Continuation: match prior IMPLEMENT depth. Standalone: "full review"/"branch review"/"PR review" → ORCHESTRATED |
-| **RESOLVE** | — | Always ORCHESTRATED |
-| **PIPELINE** | — | Always ORCHESTRATED |
-
-**Classification conservatism:** When choosing between GUIDED and ORCHESTRATED, prefer GUIDED — escalate only when scope clearly exceeds main-session capacity. When choosing between QUICK and GUIDED, prefer GUIDED if the prompt involves code changes (implement, debug, fix, add, create code) or asks for analysis/explanation of a subsystem. Reserve QUICK for truly zero-overhead prompts: chat, simple lookups, git ops, config changes, trivial edits.
-
-## Step 3: Select Skills
-
-Based on classified intent and depth, invoke each selected skill using the Skill tool.
-
-### GUIDED-depth skills
-
-| Intent | Primary Skills | Secondary (if file type matches) |
-|--------|---------------|----------------------------------|
-| **IMPLEMENT** | devflow:test-driven-development, devflow:patterns, devflow:research | devflow:typescript (.ts), devflow:react (.tsx/.jsx), devflow:go (.go), devflow:java (.java), devflow:python (.py), devflow:rust (.rs), devflow:ui-design (CSS/UI), devflow:boundary-validation (forms/API), devflow:security (auth/crypto) |
-| **EXPLORE** | devflow:explore:orch | — |
-| **DEBUG** | devflow:test-driven-development, devflow:software-design, devflow:testing | devflow:git (if git operations involved) |
-| **PLAN** | devflow:test-driven-development, devflow:plan:orch, devflow:patterns, devflow:software-design | — |
-| **REVIEW** | devflow:quality-gates, devflow:software-design | devflow:testing |
-
-### ORCHESTRATED-depth skills
-
-| Intent | Primary Skills | Secondary (if file type matches) |
-|--------|---------------|----------------------------------|
-| **IMPLEMENT** | devflow:implement:orch, devflow:patterns | devflow:typescript (.ts), devflow:react (.tsx/.jsx), devflow:go (.go), devflow:java (.java), devflow:python (.py), devflow:rust (.rs), devflow:ui-design (CSS/UI), devflow:boundary-validation (forms/API), devflow:security (auth/crypto) |
-| **EXPLORE** | devflow:explore:orch | — |
-| **DEBUG** | devflow:debug:orch, devflow:test-driven-development, devflow:software-design | devflow:git (if git operations involved) |
-| **PLAN** | devflow:plan:orch, devflow:test-driven-development, devflow:patterns, devflow:software-design | — |
-| **REVIEW** | devflow:review:orch | — (reviewers load their own pattern skills) |
-| **RESOLVE** | devflow:resolve:orch, devflow:test-driven-development, devflow:software-design | — |
-| **PIPELINE** | devflow:pipeline:orch, devflow:patterns | — |
-
-**Excluded from ambient loading** (loaded by agents internally): devflow:review-methodology, devflow:complexity, devflow:consistency, devflow:database, devflow:dependencies, devflow:documentation, devflow:regression, devflow:architecture, devflow:accessibility, devflow:performance, devflow:qa. These skills are always installed (universal skill installation) but loaded by Reviewer/Tester agents at runtime, not by the router.
-
-See `references/skill-catalog.md` for the full skill-to-intent mapping with file pattern triggers.
-
-## Step 4: Apply
-
-
-When classification is GUIDED or ORCHESTRATED, skill loading is NON-NEGOTIABLE.
-Do not rationalize skipping skills. Do not respond without loading them first.
-BLOCKING REQUIREMENT: Your FIRST tool calls MUST be Skill tool invocations — before
-writing ANY text about the task. Invoke all selected skills, THEN state classification,
-THEN proceed with work. Do NOT write implementation text before all Skill tools return.
-For IMPLEMENT intent, enforce TDD: write the failing test before ANY production code.
-NOTE: Skills loaded in the main session via Devflow mode are reference patterns only —
-their allowed-tools metadata does NOT restrict your tool access. You retain full access
-to all tools (Edit, Write, Bash, Agent, etc.) for implementation work.
-
-
-- **QUICK:** Respond directly. No preamble, no classification statement.
-- **GUIDED:** First, invoke each selected skill using the Skill tool. After all Skill tools return, state classification briefly: `Devflow: IMPLEMENT/GUIDED. Loading: devflow:patterns, devflow:research.` Then work directly in main session. After code changes, spawn Simplifier on changed files.
-- **ORCHESTRATED:** First, invoke each selected skill using the Skill tool. After all Skill tools return, state classification briefly: `Devflow: IMPLEMENT/ORCHESTRATED. Loading: devflow:implement:orch, devflow:patterns.` Then orchestrate agents per the loaded orchestration skill's pipeline.
-
-### GUIDED Behavior by Intent
-
-| Intent | Main Session Work | Post-Work |
-|--------|------------------|-----------|
-| **IMPLEMENT** | Implement directly with loaded skills. Follow TDD cycle. | Spawn Simplifier on changed files. |
-| **EXPLORE** | Spawn Skimmer for orientation, then trace flow/analyze directly in main session. | No Simplifier (no code changes). |
-| **DEBUG** | Investigate directly — reproduce bug, diagnose from stack trace/error, fix. | Spawn Simplifier on changed files. |
-| **PLAN** | Spawn Skimmer for orientation, then design directly with loaded pattern/design skills. | No Simplifier (no code changes). |
-| **REVIEW** | Review directly with loaded skills (self-review in main session). | No Simplifier. |
-
-State classification as: `Devflow: INTENT/DEPTH. Loading: [skills].` QUICK is silent.
-
-## Edge Cases
-
-| Case | Handling |
-|------|----------|
-| Mixed intent ("fix this bug and add a test") | Use the higher-overhead intent (IMPLEMENT > DEBUG) |
-| Continuation of previous conversation | Inherit previous classification unless prompt clearly shifts |
-| User explicitly requests no enforcement | Respect immediately — classify as QUICK |
-| Prompt references specific Devflow command | Skip ambient — the command has its own orchestration |
-| Scope ambiguous between GUIDED and ORCHESTRATED | Default to GUIDED; escalate if complexity emerges during work |
-| REVIEW after IMPLEMENT/GUIDED | GUIDED (continuation — match prior depth) |
-| REVIEW after IMPLEMENT/ORCHESTRATED | ORCHESTRATED (continuation — match prior depth) |
-| REVIEW standalone, large scope ("full review", "branch", "PR") | ORCHESTRATED |
-| REVIEW standalone, small scope ("check this", specific file) | GUIDED |
-| REVIEW standalone, ambiguous | GUIDED (conservative) |
-| RESOLVE intent | Always ORCHESTRATED |
-| PIPELINE intent | Always ORCHESTRATED |
-| EXPLORE simple lookup ("where is X?") | QUICK — no skills needed |
-| EXPLORE focused subsystem ("explain the auth flow") | GUIDED — Skimmer + main session trace |
-| EXPLORE multi-system ("map the full architecture") | ORCHESTRATED — Skimmer + parallel Explore agents + Synthesizer |
-| Multiple triggers per session | Each runs independently; context compaction handles accumulation |
+State classification: `Devflow: INTENT/DEPTH. Loading: [skills].`
+Load all listed skills via Skill tool before writing any text about the task.
+GUIDED: work directly in main session. Spawn Simplifier after code changes.
+- GUIDED EXPLORE: spawn Skimmer + Explore agents, then analyze directly.
+- GUIDED PLAN: spawn Skimmer for orientation, then plan directly.
+ORCHESTRATED: follow the loaded orchestration skill's pipeline.
+
+## GUIDED
+
+| Intent | Skills |
+|--------|--------|
+| IMPLEMENT | devflow:test-driven-development, devflow:patterns, devflow:research |
+| EXPLORE | — |
+| DEBUG | devflow:test-driven-development, devflow:software-design, devflow:testing |
+| PLAN | devflow:test-driven-development, devflow:patterns, devflow:software-design, devflow:security |
+| REVIEW | devflow:quality-gates, devflow:software-design |
+
+## ORCHESTRATED
+
+| Intent | Skills |
+|--------|--------|
+| IMPLEMENT | devflow:implement:orch, devflow:patterns |
+| EXPLORE | devflow:explore:orch |
+| DEBUG | devflow:debug:orch |
+| PLAN | devflow:plan:orch, devflow:patterns, devflow:software-design, devflow:security |
+| REVIEW | devflow:review:orch |
+| RESOLVE | devflow:resolve:orch |
+| PIPELINE | devflow:pipeline:orch, devflow:patterns |
+
+## Secondary Skills (GUIDED IMPLEMENT + DEBUG only, load all that match)
+
+| Pattern | Skill |
+|---------|-------|
+| .ts, .tsx | devflow:typescript |
+| .tsx, .jsx | devflow:react |
+| .go | devflow:go |
+| .java | devflow:java |
+| .py | devflow:python |
+| .rs | devflow:rust |
+| CSS/UI/styling | devflow:ui-design |
+| Forms/API/input | devflow:boundary-validation |
+| Auth/crypto/secrets | devflow:security |
+| Git operations | devflow:git |
diff --git a/shared/skills/router/references/classification-rules.md b/shared/skills/router/references/classification-rules.md
new file mode 100644
index 00000000..4e56e57e
--- /dev/null
+++ b/shared/skills/router/references/classification-rules.md
@@ -0,0 +1,31 @@
+# Ambient Classification
+
+Classify each prompt by **intent** and **depth** before responding.
+
+## Intent Signals
+
+- **CHAT**: greetings, confirmations, meta-questions, short responses
+- **EXPLORE**: "what is", "where is", "find", "explain", "how does", "analyze", "analysis", "trace", "map"
+- **PLAN**: "how should", "plan", "design", "architecture", "approach", "strategy"
+- **IMPLEMENT**: "add", "create", "implement", "build", "write", "make"
+- **REVIEW**: "check", "look at", "review", "is this ok", "any issues"
+- **RESOLVE**: "resolve", "fix review issues", "address feedback", "fix findings"
+- **DEBUG**: "fix", "bug", "broken", "failing", "error", "why does"
+- **PIPELINE**: "end to end", "implement and review", "build and review", "full pipeline"
+
+## Depth Criteria
+
+- **QUICK**: CHAT intent. Simple lookups ("where is X?"). Git/devops ops (commit, push, branch, deploy). Config changes. Rename/comment tweaks. 1-2 line edits.
+- **GUIDED**: Quick focused changes without a plan — ≤2 files, clear bugs with known fix, focused exploration, quick review. Orchestration would add no value.
+- **ORCHESTRATED**: Substantive code work — multi-file, multi-module, complex or vague bugs, full reviews, system-level design. A detailed plan or specification in the prompt is a strong ORCHESTRATED signal. RESOLVE and PIPELINE always.
+
+Default to ORCHESTRATED for substantive work — it produces better results.
+Reserve GUIDED for small focused changes where orchestration adds no value.
+Prefer GUIDED over QUICK for any prompt involving code changes.
+
+## Action
+
+Classify every message — including the first message of a session — then:
+
+- **QUICK**: Respond directly. Do not display classification or load the router.
+- **GUIDED/ORCHESTRATED**: Load `devflow:router` via Skill tool.
diff --git a/shared/skills/router/references/skill-catalog.md b/shared/skills/router/references/skill-catalog.md
index ec3d872d..97274a4c 100644
--- a/shared/skills/router/references/skill-catalog.md
+++ b/shared/skills/router/references/skill-catalog.md
@@ -29,8 +29,8 @@ These skills may be loaded during GUIDED and ORCHESTRATED-depth ambient routing.
| Skill | When to Load | Depth | File Patterns |
|-------|-------------|-------|---------------|
| devflow:debug:orch | ORCHESTRATED only | ORCHESTRATED | Any — orchestrates investigation pipeline |
-| devflow:test-driven-development | Always for DEBUG | GUIDED + ORCHESTRATED | Any code file — bug fix needs regression test first |
-| devflow:software-design | Always for DEBUG | GUIDED + ORCHESTRATED | Any code file |
+| devflow:test-driven-development | Always for DEBUG | GUIDED | Any code file — bug fix needs regression test first |
+| devflow:software-design | Always for DEBUG | GUIDED | Any code file |
| devflow:testing | Always for DEBUG (GUIDED) | GUIDED | Any code file |
| devflow:git | Git operations involved | GUIDED + ORCHESTRATED | User mentions git, rebase, merge, etc. |
@@ -50,8 +50,6 @@ These skills may be loaded during GUIDED and ORCHESTRATED-depth ambient routing.
| Skill | When to Load | Depth | File Patterns |
|-------|-------------|-------|---------------|
| devflow:resolve:orch | Always for RESOLVE | ORCHESTRATED | Any — orchestrates issue resolution pipeline |
-| devflow:test-driven-development | Always for RESOLVE | ORCHESTRATED | Any code file — fixes need regression tests |
-| devflow:software-design | Always for RESOLVE | ORCHESTRATED | Any code file |
RESOLVE is always ORCHESTRATED — it requires multi-agent resolution with Resolver agents and Simplifier.
@@ -62,7 +60,7 @@ RESOLVE is always ORCHESTRATED — it requires multi-agent resolution with Resol
| devflow:pipeline:orch | Always for PIPELINE | ORCHESTRATED | Any — meta-orchestrator for implement → review → resolve |
| devflow:patterns | Always for PIPELINE | ORCHESTRATED | Any code file |
-PIPELINE is always ORCHESTRATED — it chains multiple orchestration stages with user gates.
+PIPELINE is always ORCHESTRATED — it chains multiple orchestration stages with status reporting between phases.
### EXPLORE Intent
@@ -77,7 +75,7 @@ EXPLORE depth: simple lookups ("where is X?") → QUICK. Focused subsystem/flow
| Skill | When to Load | Depth | File Patterns |
|-------|-------------|-------|---------------|
| devflow:plan:orch | ORCHESTRATED only | ORCHESTRATED | Any — orchestrates design pipeline |
-| devflow:test-driven-development | Always for PLAN | GUIDED + ORCHESTRATED | Any planning context — plans must account for test-first workflow |
+| devflow:test-driven-development | Always for PLAN | GUIDED | Any planning context — plans must account for test-first workflow |
| devflow:patterns | Always for PLAN | GUIDED + ORCHESTRATED | Any planning context |
| devflow:software-design | Always for PLAN | GUIDED + ORCHESTRATED | System design discussions |
diff --git a/src/cli/commands/ambient.ts b/src/cli/commands/ambient.ts
index d671fdc0..8550f9b0 100644
--- a/src/cli/commands/ambient.ts
+++ b/src/cli/commands/ambient.ts
@@ -8,23 +8,25 @@ import type { HookMatcher, Settings } from '../utils/hooks.js';
const PREAMBLE_HOOK_MARKER = 'preamble';
const LEGACY_HOOK_MARKER = 'ambient-prompt';
+const CLASSIFICATION_HOOK_MARKER = 'session-start-classification';
-/** Filter hook entries from a parsed Settings object. Returns true if any were removed. */
+/** Filter hook entries from a parsed Settings object for a given event. Returns true if any were removed. */
function filterHookEntries(
settings: Settings,
+ eventName: string,
shouldRemove: (matcher: HookMatcher) => boolean,
): boolean {
- if (!settings.hooks?.UserPromptSubmit) return false;
+ if (!settings.hooks?.[eventName]) return false;
- const before = settings.hooks.UserPromptSubmit.length;
- settings.hooks.UserPromptSubmit = settings.hooks.UserPromptSubmit.filter(
+ const before = settings.hooks[eventName].length;
+ settings.hooks[eventName] = settings.hooks[eventName].filter(
(matcher) => !shouldRemove(matcher),
);
- if (settings.hooks.UserPromptSubmit.length === before) return false;
+ if (settings.hooks[eventName].length === before) return false;
- if (settings.hooks.UserPromptSubmit.length === 0) {
- delete settings.hooks.UserPromptSubmit;
+ if (settings.hooks[eventName].length === 0) {
+ delete settings.hooks[eventName];
}
if (settings.hooks && Object.keys(settings.hooks).length === 0) {
delete settings.hooks;
@@ -40,66 +42,93 @@ const isAmbient = (matcher: HookMatcher) =>
h.command.includes(PREAMBLE_HOOK_MARKER) || h.command.includes(LEGACY_HOOK_MARKER),
);
+const isClassification = (matcher: HookMatcher) =>
+ matcher.hooks.some((h) => h.command.includes(CLASSIFICATION_HOOK_MARKER));
+
/**
* Remove only the legacy `ambient-prompt` hook entries.
* Used by `addAmbientHook` to clean before adding the new preamble hook.
*/
export function removeLegacyAmbientHook(settingsJson: string): string {
const settings: Settings = JSON.parse(settingsJson);
- if (!filterHookEntries(settings, isLegacy)) return settingsJson;
+ if (!filterHookEntries(settings, 'UserPromptSubmit', isLegacy)) return settingsJson;
return JSON.stringify(settings, null, 2) + '\n';
}
/**
- * Add the ambient UserPromptSubmit hook to settings JSON.
+ * Add the ambient UserPromptSubmit hook and SessionStart classification hook to settings JSON.
* Removes any legacy `ambient-prompt` hook first, then adds the new `preamble` hook.
- * Idempotent — returns unchanged JSON if the new hook already exists.
+ * Also adds the SessionStart classification hook (reads router SKILL.md).
+ * Idempotent — each hook checked independently.
*/
export function addAmbientHook(settingsJson: string, devflowDir: string): string {
const settings: Settings = JSON.parse(settingsJson);
- const legacyRemoved = filterHookEntries(settings, isLegacy);
-
- // Check if the NEW preamble hook already exists
- if (settings.hooks?.UserPromptSubmit?.some((m) =>
- m.hooks.some((h) => h.command.includes(PREAMBLE_HOOK_MARKER)),
- )) {
- return legacyRemoved ? JSON.stringify(settings, null, 2) + '\n' : settingsJson;
- }
+ let changed = filterHookEntries(settings, 'UserPromptSubmit', isLegacy);
if (!settings.hooks) {
settings.hooks = {};
}
- const hookCommand = path.join(devflowDir, 'scripts', 'hooks', 'run-hook') + ' preamble';
+ // --- UserPromptSubmit: preamble hook ---
+ const hasPreamble = settings.hooks.UserPromptSubmit?.some((m) =>
+ m.hooks.some((h) => h.command.includes(PREAMBLE_HOOK_MARKER)),
+ );
- const newEntry: HookMatcher = {
- hooks: [
- {
- type: 'command',
- command: hookCommand,
- timeout: 5,
- },
- ],
- };
+ if (!hasPreamble) {
+ if (!settings.hooks.UserPromptSubmit) {
+ settings.hooks.UserPromptSubmit = [];
+ }
- if (!settings.hooks.UserPromptSubmit) {
- settings.hooks.UserPromptSubmit = [];
+ settings.hooks.UserPromptSubmit.push({
+ hooks: [
+ {
+ type: 'command',
+ command: path.join(devflowDir, 'scripts', 'hooks', 'run-hook') + ' preamble',
+ timeout: 5,
+ },
+ ],
+ });
+ changed = true;
}
- settings.hooks.UserPromptSubmit.push(newEntry);
+ // --- SessionStart: classification hook ---
+ const hasClassificationHook = settings.hooks.SessionStart?.some((m) =>
+ m.hooks.some((h) => h.command.includes(CLASSIFICATION_HOOK_MARKER)),
+ );
+
+ if (!hasClassificationHook) {
+ if (!settings.hooks.SessionStart) {
+ settings.hooks.SessionStart = [];
+ }
+
+ settings.hooks.SessionStart.push({
+ hooks: [
+ {
+ type: 'command',
+ command: path.join(devflowDir, 'scripts', 'hooks', 'run-hook') + ' session-start-classification',
+ timeout: 5,
+ },
+ ],
+ });
+ changed = true;
+ }
+ if (!changed) return settingsJson;
return JSON.stringify(settings, null, 2) + '\n';
}
/**
- * Remove the ambient UserPromptSubmit hook from settings JSON.
- * Removes BOTH legacy `ambient-prompt` and current `preamble` hooks.
- * Idempotent — returns unchanged JSON if hook not present.
- * Preserves other UserPromptSubmit hooks. Cleans empty arrays/objects.
+ * Remove the ambient hooks from settings JSON.
+ * Removes preamble + legacy from UserPromptSubmit, and classification from SessionStart.
+ * Idempotent — returns unchanged JSON if hooks not present.
+ * Preserves other hooks. Cleans empty arrays/objects.
*/
export function removeAmbientHook(settingsJson: string): string {
const settings: Settings = JSON.parse(settingsJson);
- if (!filterHookEntries(settings, isAmbient)) return settingsJson;
+ const removedPrompt = filterHookEntries(settings, 'UserPromptSubmit', isAmbient);
+ const removedClassification = filterHookEntries(settings, 'SessionStart', isClassification);
+
+ if (!removedPrompt && !removedClassification) return settingsJson;
return JSON.stringify(settings, null, 2) + '\n';
}
@@ -109,15 +138,17 @@ export function removeAmbientHook(settingsJson: string): string {
export function hasAmbientHook(settingsJson: string): boolean {
const settings: Settings = JSON.parse(settingsJson);
- if (!settings.hooks?.UserPromptSubmit) {
- return false;
- }
-
- return settings.hooks.UserPromptSubmit.some((matcher) =>
+ const hasPreamble = settings.hooks?.UserPromptSubmit?.some((matcher) =>
matcher.hooks.some((h) =>
h.command.includes(PREAMBLE_HOOK_MARKER) || h.command.includes(LEGACY_HOOK_MARKER),
),
- );
+ ) ?? false;
+
+ const hasClassificationHook = settings.hooks?.SessionStart?.some((matcher) =>
+ isClassification(matcher),
+ ) ?? false;
+
+ return hasPreamble || hasClassificationHook;
}
interface AmbientOptions {
@@ -128,8 +159,8 @@ interface AmbientOptions {
export const ambientCommand = new Command('ambient')
.description('Enable or disable ambient mode (always-on quality enforcement)')
- .option('--enable', 'Register UserPromptSubmit hook for ambient mode')
- .option('--disable', 'Remove ambient mode hook')
+ .option('--enable', 'Register ambient mode hooks')
+ .option('--disable', 'Remove ambient mode hooks')
.option('--status', 'Check if ambient mode is enabled')
.action(async (options: AmbientOptions) => {
const hasFlag = options.enable || options.disable || options.status;
@@ -189,7 +220,7 @@ export const ambientCommand = new Command('ambient')
return;
}
await fs.writeFile(settingsPath, updated, 'utf-8');
- p.log.success('Ambient mode enabled — UserPromptSubmit hook registered');
+ p.log.success('Ambient mode enabled — hooks registered');
p.log.info(color.dim('Skills auto-load and agents orchestrate based on each prompt'));
}
diff --git a/tests/ambient.test.ts b/tests/ambient.test.ts
index 81bb0eb9..35ecd758 100644
--- a/tests/ambient.test.ts
+++ b/tests/ambient.test.ts
@@ -26,6 +26,15 @@ describe('addAmbientHook', () => {
expect(settings.hooks.UserPromptSubmit[0].hooks[0].timeout).toBe(5);
});
+ it('adds SessionStart classification hook to empty settings', () => {
+ const result = addAmbientHook('{}', '/home/user/.devflow');
+ const settings = JSON.parse(result);
+
+ expect(settings.hooks.SessionStart).toHaveLength(1);
+ expect(settings.hooks.SessionStart[0].hooks[0].command).toContain('session-start-classification');
+ expect(settings.hooks.SessionStart[0].hooks[0].timeout).toBe(5);
+ });
+
it('adds alongside existing hooks', () => {
const input = JSON.stringify({
hooks: {
@@ -37,6 +46,7 @@ describe('addAmbientHook', () => {
expect(settings.hooks.Stop).toHaveLength(1);
expect(settings.hooks.UserPromptSubmit).toHaveLength(1);
+ expect(settings.hooks.SessionStart).toHaveLength(1);
});
it('adds alongside existing UserPromptSubmit hooks', () => {
@@ -53,6 +63,20 @@ describe('addAmbientHook', () => {
expect(settings.hooks.UserPromptSubmit[1].hooks[0].command).toContain('preamble');
});
+ it('preserves existing SessionStart hooks (session-start-memory)', () => {
+ const input = JSON.stringify({
+ hooks: {
+ SessionStart: [{ hooks: [{ type: 'command', command: '/path/to/run-hook session-start-memory' }] }],
+ },
+ });
+ const result = addAmbientHook(input, '/home/user/.devflow');
+ const settings = JSON.parse(result);
+
+ expect(settings.hooks.SessionStart).toHaveLength(2);
+ expect(settings.hooks.SessionStart[0].hooks[0].command).toContain('session-start-memory');
+ expect(settings.hooks.SessionStart[1].hooks[0].command).toContain('session-start-classification');
+ });
+
it('is idempotent — does not add duplicate hooks', () => {
const first = addAmbientHook('{}', '/home/user/.devflow');
const second = addAmbientHook(first, '/home/user/.devflow');
@@ -60,6 +84,14 @@ describe('addAmbientHook', () => {
expect(second).toBe(first);
});
+ it('idempotent for SessionStart classification hook', () => {
+ const first = addAmbientHook('{}', '/home/user/.devflow');
+ const second = addAmbientHook(first, '/home/user/.devflow');
+ const settings = JSON.parse(second);
+
+ expect(settings.hooks.SessionStart).toHaveLength(1);
+ });
+
it('preserves other settings', () => {
const input = JSON.stringify({
statusLine: { type: 'command', command: 'statusline.sh' },
@@ -71,15 +103,19 @@ describe('addAmbientHook', () => {
expect(settings.statusLine.command).toBe('statusline.sh');
expect(settings.env.SOME_VAR).toBe('1');
expect(settings.hooks.UserPromptSubmit).toHaveLength(1);
+ expect(settings.hooks.SessionStart).toHaveLength(1);
});
it('uses correct devflowDir path in command via run-hook wrapper', () => {
const result = addAmbientHook('{}', '/custom/path/.devflow');
const settings = JSON.parse(result);
- const command = settings.hooks.UserPromptSubmit[0].hooks[0].command;
+ const preambleCmd = settings.hooks.UserPromptSubmit[0].hooks[0].command;
+ const classificationCmd = settings.hooks.SessionStart[0].hooks[0].command;
- expect(command).toContain('/custom/path/.devflow/scripts/hooks/run-hook');
- expect(command).toContain('preamble');
+ expect(preambleCmd).toContain('/custom/path/.devflow/scripts/hooks/run-hook');
+ expect(preambleCmd).toContain('preamble');
+ expect(classificationCmd).toContain('/custom/path/.devflow/scripts/hooks/run-hook');
+ expect(classificationCmd).toContain('session-start-classification');
});
it('replaces legacy ambient-prompt hook with new preamble hook', () => {
@@ -115,10 +151,29 @@ describe('addAmbientHook', () => {
expect(settings.hooks.UserPromptSubmit[0].hooks[0].command).toBe('other-hook.sh');
expect(settings.hooks.UserPromptSubmit[1].hooks[0].command).toContain('preamble');
});
+
+ it('adds SessionStart hook even when preamble already exists (upgrade path)', () => {
+ // Simulates existing user who has preamble but not classification hook
+ const input = JSON.stringify({
+ hooks: {
+ UserPromptSubmit: [
+ { hooks: [{ type: 'command', command: '/home/user/.devflow/scripts/hooks/run-hook preamble', timeout: 5 }] },
+ ],
+ },
+ });
+ const result = addAmbientHook(input, '/home/user/.devflow');
+ const settings = JSON.parse(result);
+
+ // Preamble preserved (not duplicated)
+ expect(settings.hooks.UserPromptSubmit).toHaveLength(1);
+ // SessionStart classification hook added
+ expect(settings.hooks.SessionStart).toHaveLength(1);
+ expect(settings.hooks.SessionStart[0].hooks[0].command).toContain('session-start-classification');
+ });
});
describe('removeAmbientHook', () => {
- it('removes ambient hook', () => {
+ it('removes ambient hook — clears both UserPromptSubmit and SessionStart', () => {
const withHook = addAmbientHook('{}', '/home/user/.devflow');
const result = removeAmbientHook(withHook);
const settings = JSON.parse(result);
@@ -142,6 +197,26 @@ describe('removeAmbientHook', () => {
expect(settings.hooks.UserPromptSubmit[0].hooks[0].command).toBe('other-hook.sh');
});
+ it('preserves other SessionStart hooks when removing classification', () => {
+ const input = JSON.stringify({
+ hooks: {
+ SessionStart: [
+ { hooks: [{ type: 'command', command: '/path/to/run-hook session-start-memory' }] },
+ { hooks: [{ type: 'command', command: '/path/to/run-hook session-start-classification' }] },
+ ],
+ UserPromptSubmit: [
+ { hooks: [{ type: 'command', command: '/path/to/preamble' }] },
+ ],
+ },
+ });
+ const result = removeAmbientHook(input);
+ const settings = JSON.parse(result);
+
+ expect(settings.hooks.SessionStart).toHaveLength(1);
+ expect(settings.hooks.SessionStart[0].hooks[0].command).toContain('session-start-memory');
+ expect(settings.hooks.UserPromptSubmit).toBeUndefined();
+ });
+
it('cleans empty hooks object when last hook removed', () => {
const input = JSON.stringify({
hooks: {
@@ -365,43 +440,209 @@ describe('skill invocation helpers', () => {
});
});
+/** Parse router SKILL.md markdown tables into intent→skills maps */
+function parseRouterTables(content: string): { guided: Map; orchestrated: Map } {
+ const guided = new Map();
+ const orchestrated = new Map();
+
+ let currentSection: 'guided' | 'orchestrated' | null = null;
+
+ for (const line of content.split('\n')) {
+ if (line.startsWith('## GUIDED')) { currentSection = 'guided'; continue; }
+ if (line.startsWith('## ORCHESTRATED')) { currentSection = 'orchestrated'; continue; }
+ if (line.startsWith('## ') && currentSection) { currentSection = null; continue; }
+
+ if (!currentSection) continue;
+
+ const match = line.match(/^\|\s*(\w+)\s*\|\s*(.+?)\s*\|$/);
+ if (!match || match[1] === 'Intent') continue;
+
+ const intent = match[1];
+ const skillsStr = match[2].trim();
+ const skills = skillsStr === '—' || skillsStr === '-'
+ ? []
+ : skillsStr.split(',').map(s => s.trim());
+
+ const table = currentSection === 'guided' ? guided : orchestrated;
+ table.set(intent, skills);
+ }
+
+ return { guided, orchestrated };
+}
+
+/** Extract intent names from classification-rules.md Intent Signals section only */
+function parseClassificationIntents(content: string): string[] {
+ const intents: string[] = [];
+ let inIntentSection = false;
+
+ for (const line of content.split('\n')) {
+ if (line.includes('Intent Signals')) { inIntentSection = true; continue; }
+ if (line.startsWith('## ') && inIntentSection) break; // left the section
+
+ if (!inIntentSection) continue;
+
+ const match = line.match(/^\-\s*\*\*(\w+)\*\*/);
+ if (match) intents.push(match[1]);
+ }
+ return intents;
+}
+
+describe('router structural validation', () => {
+ const routerPath = path.resolve(__dirname, '../shared/skills/router/SKILL.md');
+ const rulesPath = path.resolve(__dirname, '../shared/skills/router/references/classification-rules.md');
+ const sharedSkillsDir = path.resolve(__dirname, '../shared/skills');
+
+ it('router covers all ORCHESTRATED intents (every non-CHAT intent has a row)', async () => {
+ const rulesContent = await fs.readFile(rulesPath, 'utf-8');
+ const routerContent = await fs.readFile(routerPath, 'utf-8');
+
+ const nonChatIntents = parseClassificationIntents(rulesContent).filter(i => i !== 'CHAT');
+ const { orchestrated } = parseRouterTables(routerContent);
+
+ for (const intent of nonChatIntents) {
+ expect(orchestrated.has(intent), `ORCHESTRATED table missing intent: ${intent}`).toBe(true);
+ }
+ });
+
+ it('RESOLVE and PIPELINE have no GUIDED rows (always ORCHESTRATED)', async () => {
+ const routerContent = await fs.readFile(routerPath, 'utf-8');
+ const { guided } = parseRouterTables(routerContent);
+
+ expect(guided.has('RESOLVE'), 'RESOLVE must not have a GUIDED row — classification says always ORCHESTRATED').toBe(false);
+ expect(guided.has('PIPELINE'), 'PIPELINE must not have a GUIDED row — classification says always ORCHESTRATED').toBe(false);
+ });
+
+ it('router table skills are canonical — every prefixed ref exists in shared/skills/', async () => {
+ const routerContent = await fs.readFile(routerPath, 'utf-8');
+ const { guided, orchestrated } = parseRouterTables(routerContent);
+
+ const allSkills = new Set();
+ for (const skills of [...guided.values(), ...orchestrated.values()]) {
+ for (const skill of skills) {
+ if (skill.startsWith('devflow:')) {
+ allSkills.add(skill.replace('devflow:', ''));
+ }
+ }
+ }
+
+ const entries = await fs.readdir(sharedSkillsDir);
+
+ for (const skill of allSkills) {
+ expect(entries, `shared/skills/${skill}/ not found — router references nonexistent skill`).toContain(skill);
+ }
+ });
+
+ it('integration test expectations align with router skill tables', async () => {
+ const integrationPath = path.resolve(__dirname, './integration/ambient-activation.test.ts');
+ const routerContent = await fs.readFile(routerPath, 'utf-8');
+ const testContent = await fs.readFile(integrationPath, 'utf-8');
+ const { guided, orchestrated } = parseRouterTables(routerContent);
+
+ // Split integration tests into blocks and extract intent/depth + expected/required arrays
+ const blocks = testContent.split(/\bit\(/);
+
+ for (const block of blocks) {
+ const nameMatch = block.match(/^'([^']+)'/);
+ if (!nameMatch) continue;
+ const name = nameMatch[1];
+
+ const classMatch = name.match(/(IMPLEMENT|EXPLORE|DEBUG|PLAN|REVIEW|RESOLVE|PIPELINE)\/(GUIDED|ORCHESTRATED)/);
+ if (!classMatch) continue;
+
+ const [, intent, depth] = classMatch;
+ const table = depth === 'GUIDED' ? guided : orchestrated;
+ const routerSkills = table.get(intent);
+
+ // Extract expected or required array from block
+ const arrayMatch = block.match(/const (?:expected|required) = \[([^\]]*)\]/);
+ if (!arrayMatch) continue; // Some tests (like EXPLORE/GUIDED) have no expected array — skip
+
+ const testSkills = arrayMatch[1]
+ .split(',')
+ .map(s => s.trim().replace(/['"]/g, ''))
+ .filter(Boolean)
+ .map(s => `devflow:${s}`);
+
+ expect(routerSkills, `${name}: router has no ${depth} row for ${intent}`).toBeDefined();
+ if (!routerSkills) return;
+
+ // Every skill the test asserts must appear in the router table row
+ for (const skill of testSkills) {
+ expect(
+ routerSkills.includes(skill),
+ `${name}: test asserts '${skill}' but router ${depth} ${intent} row is [${routerSkills.join(', ')}]`,
+ ).toBe(true);
+ }
+ }
+ });
+});
+
describe('preamble drift detection', () => {
- it('preamble PREAMBLE contains required classification elements', async () => {
+ it('preamble contains classify and devflow:router instructions', async () => {
const hookPath = path.resolve(__dirname, '../scripts/hooks/preamble');
const hookContent = await fs.readFile(hookPath, 'utf-8');
- // Extract the PREAMBLE string from the shell script (may be multiline)
+ // Extract the PREAMBLE string from the shell script
const match = hookContent.match(/PREAMBLE="([^"]+)"/);
expect(match).not.toBeNull();
- const shellPreamble = match![1];
-
- // The preamble is detection-only: classification rules + router skill reference.
- // Verify structural elements rather than exact string match to allow wording refinement.
- expect(shellPreamble).toContain('AMBIENT MODE');
-
- // Must contain depth definitions
- expect(shellPreamble).toContain('QUICK');
- expect(shellPreamble).toContain('GUIDED');
- expect(shellPreamble).toContain('ORCHESTRATED');
-
- // Must contain intent names for each category
- expect(shellPreamble).toContain('CHAT');
- expect(shellPreamble).toContain('EXPLORE');
- expect(shellPreamble).toContain('PLAN');
- expect(shellPreamble).toContain('IMPLEMENT');
- expect(shellPreamble).toContain('REVIEW');
- expect(shellPreamble).toContain('RESOLVE');
- expect(shellPreamble).toContain('DEBUG');
- expect(shellPreamble).toContain('PIPELINE');
-
- // Must reference the router skill (detection-only: no direct skill mappings)
+ if (!match) return;
+ const shellPreamble = match[1];
+
+ // SYNC: preamble must instruct classification + router loading
+ expect(shellPreamble.toLowerCase()).toContain('classify');
expect(shellPreamble).toContain('devflow:router');
+ });
- // Must instruct Skill tool invocation
- expect(shellPreamble).toContain('Skill tool');
+ it('classification-rules.md contains required classification elements', async () => {
+ const rulesPath = path.resolve(__dirname, '../shared/skills/router/references/classification-rules.md');
+ const rulesContent = await fs.readFile(rulesPath, 'utf-8');
+
+ // Must contain Intent Signals heading
+ expect(rulesContent).toContain('Intent Signals');
+
+ // Must contain all 8 intents
+ expect(rulesContent).toContain('CHAT');
+ expect(rulesContent).toContain('EXPLORE');
+ expect(rulesContent).toContain('PLAN');
+ expect(rulesContent).toContain('IMPLEMENT');
+ expect(rulesContent).toContain('REVIEW');
+ expect(rulesContent).toContain('RESOLVE');
+ expect(rulesContent).toContain('DEBUG');
+ expect(rulesContent).toContain('PIPELINE');
+
+ // Must contain all 3 depths
+ expect(rulesContent).toContain('QUICK');
+ expect(rulesContent).toContain('GUIDED');
+ expect(rulesContent).toContain('ORCHESTRATED');
+
+ // Must reference devflow:router for GUIDED/ORCHESTRATED
+ expect(rulesContent).toContain('devflow:router');
+ });
+
+ it('router SKILL.md contains skill lookup tables', async () => {
+ const routerPath = path.resolve(__dirname, '../shared/skills/router/SKILL.md');
+ const routerContent = await fs.readFile(routerPath, 'utf-8');
+
+ // Must contain GUIDED/ORCHESTRATED headings
+ expect(routerContent).toContain('## GUIDED');
+ expect(routerContent).toContain('## ORCHESTRATED');
+
+ // Must contain classification output format
+ expect(routerContent).toContain('Devflow:');
+ expect(routerContent).toContain('Loading:');
+
+ // Must contain intent names in tables
+ expect(routerContent).toContain('IMPLEMENT');
+ expect(routerContent).toContain('EXPLORE');
+ expect(routerContent).toContain('DEBUG');
+ expect(routerContent).toContain('PLAN');
+ expect(routerContent).toContain('REVIEW');
+ });
+
+ it('session-start-classification hook reads classification-rules.md', async () => {
+ const hookPath = path.resolve(__dirname, '../scripts/hooks/session-start-classification');
+ const hookContent = await fs.readFile(hookPath, 'utf-8');
- // Must include classification output format
- expect(shellPreamble).toContain('Devflow:');
- expect(shellPreamble).toContain('Loading:');
+ expect(hookContent).toContain('classification-rules.md');
});
});
diff --git a/tests/integration/ambient-activation.test.ts b/tests/integration/ambient-activation.test.ts
index 6b6c06dd..a14e6d98 100644
--- a/tests/integration/ambient-activation.test.ts
+++ b/tests/integration/ambient-activation.test.ts
@@ -12,7 +12,7 @@ import {
* Integration tests for Devflow ambient mode classification and skill loading.
*
* GUIDED tests use two-tier assertions:
- * Hard: router skill loaded (proves non-QUICK classification — system works)
+ * Hard: router skill loaded via Skill tool (proves non-QUICK classification — system works)
* Soft: specific skills match expectations (quality metric, logged but not gating)
*
* ORCHESTRATED tests use strict assertions (deterministic at that scope).
@@ -30,10 +30,11 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => {
// --- QUICK tier: no skills loaded ---
- it('QUICK — chat: "thanks" loads no skills', async () => {
+ it('preamble filter — single-word prompt skipped before classification', async () => {
+ // "thanks" is ≤2 words — preamble's word-count filter skips it before classification runs
const result = await runClaudeStreaming('thanks', { timeout: 20000 });
expect(hasSkillInvocations(result)).toBe(false);
- console.log(`QUICK chat: no skills (${result.durationMs}ms)`);
+ console.log(`preamble filter (single-word): no skills (${result.durationMs}ms)`);
});
it('QUICK — explore: "where is the config?" loads no skills', async () => {
@@ -42,18 +43,32 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => {
console.log(`QUICK explore: no skills (${result.durationMs}ms)`);
});
+ it('CHAT/QUICK — multi-word chat passes preamble but classified QUICK', async () => {
+ // Passes preamble's word-count filter (>2 words) but classified CHAT/QUICK — no skills loaded
+ const result = await runClaudeStreaming('sounds good, thanks for explaining that', { timeout: 20000 });
+ expect(hasSkillInvocations(result)).toBe(false);
+ console.log(`CHAT/QUICK (multi-word): no skills (${result.durationMs}ms)`);
+ });
+
+ it('preamble filter — slash command prefix skipped before classification', async () => {
+ // Preamble filters prompts starting with "/" — no classification or skill loading
+ const result = await runClaudeStreaming('/help with something', { timeout: 20000 });
+ expect(hasSkillInvocations(result)).toBe(false);
+ console.log(`preamble filter (slash command): no skills (${result.durationMs}ms)`);
+ });
+
// --- GUIDED tier: router must load (hard), specific skills logged (soft) ---
- it('EXPLORE/GUIDED — loads router and explore skills', async () => {
- const expected = ['explore:orch'];
+ it('EXPLORE/GUIDED — loads router only (no additional skills)', async () => {
+ // GUIDED EXPLORE dispatches no additional skills — router instructs to spawn Skimmer + Explore agents directly
const { result, passed, attempts, model } = await runClaudeStreamingWithRetry(
'explain how the plugin loading system works from registration through initialization',
- (r) => hasSkillInvocations(r) && hasRequiredSkills(r, ['router']),
+ (r) => hasRequiredSkills(r, ['router']),
);
const skills = getSkillInvocations(result);
- const hasExpected = hasRequiredSkills(result, expected);
- console.log(`EXPLORE/GUIDED: ${passed ? 'PASS' : 'FAIL'} (${model}, ${attempts} attempts, ${result.durationMs}ms). Skills: [${skills.join(', ')}]${passed && !hasExpected ? ` ⚠ expected: ${expected.join(', ')}` : ''}`);
+ const nonRouter = skills.filter((s) => s !== 'router' && s !== 'devflow:router');
+ console.log(`EXPLORE/GUIDED: ${passed ? 'PASS' : 'FAIL'} (${model}, ${attempts} attempts, ${result.durationMs}ms). Skills: [${skills.join(', ')}]${nonRouter.length > 0 ? ` ⚠ unexpected non-router: ${nonRouter.join(', ')}` : ''}`);
expect(passed).toBe(true);
});
@@ -61,7 +76,7 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => {
const expected = ['patterns', 'test-driven-development', 'research'];
const { result, passed, attempts, model } = await runClaudeStreamingWithRetry(
'add a retry mechanism with exponential backoff to the HTTP client module',
- (r) => hasSkillInvocations(r) && hasRequiredSkills(r, ['router']),
+ (r) => hasRequiredSkills(r, ['router']),
);
const skills = getSkillInvocations(result);
@@ -71,10 +86,10 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => {
});
it('DEBUG/GUIDED — loads router and debug skills', async () => {
- const expected = ['software-design', 'testing'];
+ const expected = ['test-driven-development', 'software-design', 'testing'];
const { result, passed, attempts, model } = await runClaudeStreamingWithRetry(
'fix the bug where the date formatter returns wrong timezone offset for DST transitions',
- (r) => hasSkillInvocations(r) && hasRequiredSkills(r, ['router']),
+ (r) => hasRequiredSkills(r, ['router']),
);
const skills = getSkillInvocations(result);
@@ -84,10 +99,10 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => {
});
it('PLAN/GUIDED — loads router and planning skills', async () => {
- const expected = ['patterns', 'software-design'];
+ const expected = ['test-driven-development', 'patterns', 'software-design', 'security'];
const { result, passed, attempts, model } = await runClaudeStreamingWithRetry(
'how should we design a caching layer for API responses?',
- (r) => hasSkillInvocations(r) && hasRequiredSkills(r, ['router']),
+ (r) => hasRequiredSkills(r, ['router']),
);
const skills = getSkillInvocations(result);
@@ -100,7 +115,7 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => {
const expected = ['quality-gates', 'software-design'];
const { result, passed, attempts, model } = await runClaudeStreamingWithRetry(
'check this error handling in the authentication module',
- (r) => hasSkillInvocations(r) && hasRequiredSkills(r, ['router']),
+ (r) => hasRequiredSkills(r, ['router']),
);
const skills = getSkillInvocations(result);
@@ -137,8 +152,8 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => {
expect(passed).toBe(true);
});
- it('RESOLVE/ORCHESTRATED — loads resolve, software-design', async () => {
- const required = ['resolve:orch', 'software-design'];
+ it('RESOLVE/ORCHESTRATED — loads resolve:orch', async () => {
+ const required = ['resolve:orch'];
const { result, passed, attempts, model } = await runClaudeStreamingWithRetry(
'resolve the review findings from the last code review',
(r) => hasSkillInvocations(r) && hasRequiredSkills(r, required),
@@ -163,6 +178,32 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => {
expect(passed).toBe(true);
});
+ it('DEBUG/ORCHESTRATED — loads debug:orch', async () => {
+ const required = ['debug:orch'];
+ const { result, passed, attempts, model } = await runClaudeStreamingWithRetry(
+ 'the webhook processor silently drops events across three modules when the payload exceeds 1MB — debug why the size check, queue handler, and retry logic all fail to surface the error',
+ (r) => hasSkillInvocations(r) && hasRequiredSkills(r, required),
+ );
+
+ const skills = getSkillInvocations(result);
+ console.log(`DEBUG/ORCHESTRATED: ${passed ? 'PASS' : 'FAIL'} (${model}, ${attempts} attempts, ${result.durationMs}ms). Skills: [${skills.join(', ')}]`);
+ if (!passed) console.warn(`Expected: ${required.join(', ')}. Got: [${skills.join(', ')}]`);
+ expect(passed).toBe(true);
+ });
+
+ it('PLAN/ORCHESTRATED — loads plan:orch, patterns', async () => {
+ const required = ['plan:orch', 'patterns'];
+ const { result, passed, attempts, model } = await runClaudeStreamingWithRetry(
+ 'design the architecture for a multi-service notification system with email, SMS, and push channels that supports user preferences and delivery guarantees',
+ (r) => hasSkillInvocations(r) && hasRequiredSkills(r, required),
+ );
+
+ const skills = getSkillInvocations(result);
+ console.log(`PLAN/ORCHESTRATED: ${passed ? 'PASS' : 'FAIL'} (${model}, ${attempts} attempts, ${result.durationMs}ms). Skills: [${skills.join(', ')}]`);
+ if (!passed) console.warn(`Expected: ${required.join(', ')}. Got: [${skills.join(', ')}]`);
+ expect(passed).toBe(true);
+ });
+
it('PIPELINE/ORCHESTRATED — loads pipeline, patterns', async () => {
const required = ['pipeline:orch', 'patterns'];
const { result, passed, attempts, model } = await runClaudeStreamingWithRetry(
diff --git a/tests/integration/helpers.ts b/tests/integration/helpers.ts
index 84eaafe1..87b844ff 100644
--- a/tests/integration/helpers.ts
+++ b/tests/integration/helpers.ts
@@ -1,4 +1,6 @@
import { execSync, spawn, ChildProcess } from 'child_process';
+import { readFileSync } from 'fs';
+import { resolve } from 'path';
const CLASSIFICATION_PATTERN = /devflow:\s*(CHAT|EXPLORE|PLAN|IMPLEMENT|DEBUG|REVIEW|RESOLVE|PIPELINE)\s*\/\s*(QUICK|GUIDED|ORCHESTRATED)/i;
@@ -14,13 +16,18 @@ export function isClaudeAvailable(): boolean {
}
}
-// SYNC: must match scripts/hooks/preamble PREAMBLE structure
-const DEVFLOW_PREAMBLE =
- `AMBIENT MODE ENABLED: Classify user intent and depth.
-Intents: CHAT (greetings/confirmations), EXPLORE (find/explain/analyze/trace/map), PLAN (plan/design/architecture), IMPLEMENT (add/create/build/implement), REVIEW (check/review), RESOLVE (resolve review issues), DEBUG (fix/bug/error), PIPELINE (end-to-end).
-Depth: QUICK (chat, simple lookups, git ops, config, rename/comment tweaks, 1-2 line edits) | GUIDED (code changes ≤2 files, clear bugs, focused reviews, focused exploration, focused design/plan) | ORCHESTRATED (>2 files, multi-module, vague bugs, full/branch/PR reviews, deep exploration, system-level design, RESOLVE and PIPELINE always).
-QUICK: respond normally. No classification, no skills.
-GUIDED/ORCHESTRATED: Load devflow:router skill FIRST via Skill tool for skill mappings. Then load all skills it specifies. State: Devflow: INTENT/DEPTH. Loading: [skills].`;
+/**
+ * Read classification-rules.md from disk.
+ * Simulates SessionStart injection for integration tests.
+ */
+function loadRouterContext(): string {
+ const rulesPath = resolve(__dirname, '../../shared/skills/router/references/classification-rules.md');
+ return readFileSync(rulesPath, 'utf-8').trim();
+}
+
+// Simulates SessionStart injection (classification rules) + per-message preamble
+const DEVFLOW_PREAMBLE = loadRouterContext() +
+ '\nClassify this request\'s intent and depth, then load devflow:router via Skill tool.';
/** Result from a streaming claude invocation */
export interface StreamResult {
diff --git a/tests/skill-references.test.ts b/tests/skill-references.test.ts
index 952ec3b8..47537ec1 100644
--- a/tests/skill-references.test.ts
+++ b/tests/skill-references.test.ts
@@ -687,25 +687,38 @@ describe('Test infrastructure skill references', () => {
}
});
- it('DEVFLOW_PREAMBLE skill refs in tests/integration/helpers.ts exist in actual hook preamble', () => {
- const helpersPath = path.join(ROOT, 'tests', 'integration', 'helpers.ts');
- const helpersContent = readFileSync(helpersPath, 'utf-8');
- const hookPath = path.join(ROOT, 'scripts', 'hooks', 'preamble');
- const hookContent = readFileSync(hookPath, 'utf-8');
+ it('DEVFLOW_PREAMBLE reads classification-rules.md which has valid refs', () => {
+ // helpers.ts loads DEVFLOW_PREAMBLE from classification-rules.md at runtime.
+ // Verify the classification rules reference devflow:router (loaded via Skill tool).
+ const rulesPath = path.join(ROOT, 'shared', 'skills', 'router', 'references', 'classification-rules.md');
+ const rulesContent = readFileSync(rulesPath, 'utf-8');
+
+ const rulesRefs = extractPrefixedRefs(rulesContent);
+ const skillRefs = filterNonSkillRefs(rulesRefs);
+ const canonicalSkills = new Set(getAllSkillNames());
+
+ for (const ref of skillRefs) {
+ expect(
+ canonicalSkills.has(ref),
+ `classification-rules.md has 'devflow:${ref}' but it is not in canonical skill set`,
+ ).toBe(true);
+ }
+ });
- const helpersRefs = extractPrefixedRefs(helpersContent);
- const hookRefs = extractPrefixedRefs(hookContent);
- const hookSkillSet = new Set(hookRefs);
+ it('router SKILL.md skill refs match canonical set', () => {
+ // The lean router SKILL.md contains skill lookup tables.
+ const canonicalSkills = new Set(getAllSkillNames());
+ const routerPath = path.join(ROOT, 'shared', 'skills', 'router', 'SKILL.md');
+ const routerContent = readFileSync(routerPath, 'utf-8');
- // The new preamble is detection-only — helpers.ts DEVFLOW_PREAMBLE also has only router ref.
- // Just verify helpers.ts has at least one skill ref (devflow:router).
- expect(helpersRefs.length, 'helpers.ts DEVFLOW_PREAMBLE should have skill refs').toBeGreaterThan(0);
+ const routerRefs = extractPrefixedRefs(routerContent);
+ expect(routerRefs.length, 'router SKILL.md should have devflow: skill refs').toBeGreaterThan(0);
- const skillRefs = filterNonSkillRefs(helpersRefs);
+ const skillRefs = filterNonSkillRefs(routerRefs);
for (const ref of skillRefs) {
expect(
- hookSkillSet.has(ref),
- `tests/integration/helpers.ts DEVFLOW_PREAMBLE has 'devflow:${ref}' but scripts/hooks/preamble does not — preamble drift`,
+ canonicalSkills.has(ref),
+ `router SKILL.md has 'devflow:${ref}' but it is not in canonical skill set`,
).toBe(true);
}
});