diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 17141d47..fe38af55 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -7,21 +7,21 @@ }, "plugins": [ { - "name": "devflow-specify", - "source": "./plugins/devflow-specify", - "description": "Interactive feature specification - creates well-defined GitHub issues", + "name": "devflow-plan", + "source": "./plugins/devflow-plan", + "description": "Unified design planning with gap analysis and design review", "version": "1.8.3", "keywords": [ - "specification", - "requirements", "planning", - "issues" + "design", + "gap-analysis", + "architecture" ] }, { "name": "devflow-implement", "source": "./plugins/devflow-implement", - "description": "Complete task implementation workflow with exploration, planning, and coding", + "description": "Complete task implementation workflow - accepts plan documents, issues, or task descriptions", "version": "1.8.3", "keywords": [ "implementation", diff --git a/.gitignore b/.gitignore index 53b8a9f1..fcd670ba 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ plugins/*/agents/evaluator.md plugins/*/agents/tester.md plugins/*/agents/scrutinizer.md plugins/*/agents/validator.md +plugins/*/agents/designer.md npm-debug.log* yarn-debug.log* yarn-error.log* diff --git a/CLAUDE.md b/CLAUDE.md index f448a52f..44f2376e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -16,8 +16,8 @@ Plugin marketplace with 17 plugins (8 core + 9 optional language/ecosystem), eac | Plugin | Purpose | Teams Variant | |--------|---------|---------------| -| `devflow-specify` | Feature specification workflow | Optional | | `devflow-implement` | Complete task implementation lifecycle | Optional | +| `devflow-plan` | Unified design planning with gap analysis | Optional | | `devflow-code-review` | Comprehensive code review | Optional | | `devflow-resolve` | Review issue resolution | Optional | | `devflow-debug` | Competing hypothesis debugging | Optional | @@ -52,8 +52,8 @@ Commands with Teams Variant ship as `{name}.md` (parallel subagents) and `{name} ``` devflow/ -├── shared/skills/ # 39 skills (single source of truth) -├── shared/agents/ # 11 shared agents (single source of truth) +├── shared/skills/ # 41 skills (single source of truth) +├── shared/agents/ # 12 shared agents (single source of truth) ├── plugins/devflow-*/ # 17 plugins (8 core + 9 optional language/ecosystem) ├── docs/reference/ # Detailed reference documentation ├── scripts/ # Helper scripts (statusline, docs-helpers) @@ -98,7 +98,7 @@ All generated docs live under `.docs/` in the project root: │ ├── {focus}.md # Reviewer reports (security.md, etc.) │ ├── review-summary.md # Synthesizer output │ └── resolution-summary.md # Written by /resolve -└── design/ # Implementation plans +└── design/ # Design artifacts from /plan ``` Working memory files live in a dedicated `.memory/` directory: @@ -132,26 +132,26 @@ Working memory files live in a dedicated `.memory/` directory: **Universal Skill Installation**: All skills from all plugins are always installed, regardless of plugin selection. Skills are tiny markdown files installed as `~/.claude/skills/devflow:{name}/` (namespaced to avoid collisions with other plugin ecosystems). Source directories in `shared/skills/` stay unprefixed — the `devflow:` prefix is applied at install-time only. Shadow overrides live at `~/.devflow/skills/{name}/` (unprefixed); when shadowed, the installer copies the user's version to the prefixed install target. Only commands and agents remain plugin-specific. -**Model Strategy**: Explicit model assignments in agent frontmatter override the user's session model. Opus for analysis agents (reviewer, scrutinizer, evaluator), Sonnet for execution agents (coder, simplifier, resolver, skimmer, tester), Haiku for I/O agents (git, synthesizer, validator). +**Model Strategy**: Explicit model assignments in agent frontmatter override the user's session model. Opus for analysis agents (reviewer, scrutinizer, evaluator, designer), Sonnet for execution agents (coder, simplifier, resolver, skimmer, tester), Haiku for I/O agents (git, synthesizer, validator). ## Agent & Command Roster **Orchestration commands** (spawn agents, never do agent work in main session): -- `/specify` — Skimmer + Explore + Synthesizer + Plan + Synthesizer → GitHub issue -- `/implement` — Git + Skimmer + Explore + Synthesizer + Plan + Synthesizer + Coder + Simplifier + Scrutinizer + Evaluator + Tester → PR +- `/plan` — Skimmer + Explore + Designer + Synthesizer + Plan + Designer → design artifact +- `/implement` — Git + Coder + Validator + Simplifier + Scrutinizer + Evaluator + Tester → PR (accepts plan documents, issues, or task descriptions) - `/code-review` — 7-11 Reviewer agents + Git + Synthesizer - `/resolve` — N Resolver agents + Git - `/debug` — Agent Teams competing hypotheses - `/self-review` — Simplifier then Scrutinizer (sequential) - `/audit-claude` — CLAUDE.md audit (optional plugin) -**Shared agents** (11): git, synthesizer, skimmer, simplifier, coder, reviewer, resolver, evaluator, tester, scrutinizer, validator +**Shared agents** (12): git, synthesizer, skimmer, simplifier, coder, reviewer, resolver, evaluator, tester, scrutinizer, validator, designer **Plugin-specific agents** (1): claude-md-auditor **Orchestration skills** (7): implement:orch, explore:orch, debug:orch, plan:orch, review:orch, resolve:orch, pipeline:orch. These enable the same agent pipelines as slash commands but triggered via ambient intent classification. -**Agent Teams**: 5 commands use Agent Teams (`/code-review`, `/implement`, `/debug`, `/specify`, `/resolve`). One-team-per-session constraint — must TeamDelete before creating next team. +**Agent Teams**: 5 commands use Agent Teams (`/code-review`, `/implement`, `/plan`, `/debug`, `/resolve`). One-team-per-session constraint — must TeamDelete before creating next team. ## Key Conventions diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index faa8b8a0..2ff72557 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -24,8 +24,8 @@ After setup, Devflow commands (`/code-review`, `/implement`, etc.) are available ``` devflow/ -├── shared/skills/ # 39 skills (single source of truth) -├── shared/agents/ # 11 shared agents (single source of truth) +├── shared/skills/ # 41 skills (single source of truth) +├── shared/agents/ # 12 shared agents (single source of truth) ├── plugins/devflow-*/ # 17 plugins (8 core + 9 optional) ├── scripts/hooks/ # Working Memory hooks ├── src/cli/ # TypeScript CLI (init, list, uninstall) diff --git a/README.md b/README.md index 126f1255..b42a7acb 100644 --- a/README.md +++ b/README.md @@ -48,11 +48,11 @@ Devflow: IMPLEMENT/ORCHESTRATED **18 parallel code reviewers.** Security, architecture, performance, complexity, consistency, regression, testing, and more. Each produces findings with severity, confidence scoring, and concrete fixes. Conditional reviewers activate when relevant (TypeScript for `.ts` files, database for schema changes). Every finding gets validated and resolved automatically. -**39 skills grounded in expert material.** Every skill is backed by peer-reviewed papers, canonical books, and industry standards — security (OWASP, Shostack), architecture (Parnas, Evans, Fowler), performance (Brendan Gregg), testing (Beck, Meszaros), design (Wlaschin, Hickey). 200+ sources total. +**41 skills grounded in expert material.** Every skill is backed by peer-reviewed papers, canonical books, and industry standards — security (OWASP, Shostack), architecture (Parnas, Evans, Fowler), performance (Brendan Gregg), testing (Beck, Meszaros), design (Wlaschin, Hickey). 200+ sources total. **Skill shadowing.** Override any built-in skill with your own version. Drop a file into `~/.devflow/skills/{name}/` and the installer uses yours instead of the default — same activation, your rules. -**Full lifecycle.** `/implement` takes a task from exploration through planning, coding, validation, and refinement. `/specify` defines features with clarification gates. `/debug` investigates bugs with competing hypotheses in parallel. `/self-review` runs Simplifier + Scrutinizer quality passes. +**Full lifecycle.** `/plan` takes a feature idea through codebase exploration, gap analysis, design review, and outputs a plan document ready for `/implement`. `/implement` accepts that plan document (or an issue or task description directly) and drives it through coding, validation, and refinement to a PR. `/debug` investigates bugs with competing hypotheses in parallel. `/self-review` runs Simplifier + Scrutinizer quality passes. **Everything is composable.** 17 plugins (8 core + 9 language/ecosystem). Install only what you need. Six commands cover the entire development lifecycle. @@ -61,7 +61,7 @@ Devflow: IMPLEMENT/ORCHESTRATED ``` devflow · feat/auth-middleware* · 3↑ · v1.8.3 +5 · 12 files · +234 -56 Current Session ████░░░░ 42% · Session 5h ██░░░░░░ 18% · 7d █░░░░░░░ 8% -Opus 4.6 [1m] · 23m · $1.24 · 2 CLAUDE.md · 4 MCPs · 8 hooks · 39 skills +Opus 4.6 [1m] · 23m · $1.24 · 2 CLAUDE.md · 4 MCPs · 8 hooks · 41 skills ``` **Security.** Deny lists block dangerous tool patterns out of the box — configurable during init. @@ -78,8 +78,8 @@ That's it. The interactive wizard handles plugin selection, feature configuratio | Command | What it does | |---------|-------------| -| `/specify` | Define a feature with clarification gates → GitHub issue | -| `/implement` | Full lifecycle: explore → plan → code → validate → refine → PR | +| `/plan` | Full design pipeline: explore → gap analysis → design → PR-ready plan document | +| `/implement` | Execute plan: accepts plan documents from `/plan`, issues, or task descriptions → PR | | `/code-review` | Multi-perspective parallel code review | | `/resolve` | Validate and fix all review issues | | `/debug` | Competing hypothesis investigation | diff --git a/docs/cli-reference.md b/docs/cli-reference.md index be2545c9..cf0cde7a 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -43,7 +43,7 @@ npx devflow-kit init --plugin=implement,code-review # Install multiple | Plugin | Type | Description | |--------|------|-------------| -| `devflow-specify` | Core | Feature specification workflow | +| `devflow-plan` | Core | Unified design planning with gap analysis | | `devflow-implement` | Core | Complete task implementation lifecycle | | `devflow-code-review` | Core | Comprehensive code review | | `devflow-resolve` | Core | Review issue resolution | diff --git a/docs/commands.md b/docs/commands.md index 536607bd..14dd67c1 100644 --- a/docs/commands.md +++ b/docs/commands.md @@ -2,38 +2,43 @@ Devflow provides six commands that orchestrate specialized agents. Commands spawn agents — they never do the work themselves. -## /specify +## /plan -Interactive feature specification with three mandatory gates: +Unified design planning from requirements discovery through implementation design: -1. **Understanding Gate** — Confirm the feature idea is understood -2. **Scope Gate** — Validate priorities and boundaries -3. **Acceptance Gate** — Confirm success criteria +1. **Gate 0** — Confirm understanding of the requirement +2. **Requirements Discovery** — Parallel exploration agents analyze codebase +3. **Gap Analysis** — Identify missing pieces, risks, and dependencies +4. **Gate 1** — Validate scope and gaps with user +5. **Implementation Design** — Parallel planning agents design the approach +6. **Design Review + Gate 2** — Review the design artifact, user approves the final plan -Creates a well-defined GitHub issue ready for `/implement`. +Produces a machine-readable design artifact in `.docs/design/` consumed by `/implement`. ``` -/specify # Start interactive specification +/plan add JWT auth # From description +/plan #42 # From GitHub issue +/plan #12 #15 #18 # Multi-issue +/plan # From conversation context ``` ## /implement -Executes a single task through the complete development lifecycle: +Executes a single task through the complete development lifecycle. Accepts plan documents, GitHub issues, or task descriptions. -1. **Setup** — Auto-create feature branch (detects repo naming conventions) -2. **Exploration** — Analyze codebase for relevant patterns and dependencies -3. **Planning** — Design the implementation approach -4. **Implementation** — Write code on the feature branch -5. **Validation** — Build, typecheck, lint, and test -6. **Refinement** — Simplifier (code clarity) + Scrutinizer (9-pillar quality) -7. **Alignment** — Evaluator verifies implementation matches the original request -8. **QA Testing** — Tester executes scenario-based acceptance tests +1. **Setup** — Auto-create feature branch, parse plan document or fetch issue +2. **Implementation** — Write code on the feature branch +3. **Validation** — Build, typecheck, lint, and test +4. **Refinement** — Simplifier (code clarity) + Scrutinizer (9-pillar quality) +5. **Alignment** — Evaluator verifies implementation matches the original request +6. **QA Testing** — Tester executes scenario-based acceptance tests Creates a PR when complete. ``` -/implement add JWT auth # From description +/implement .docs/design/42-jwt-auth.2026-04-07_1430.md # From plan document /implement #42 # From GitHub issue +/implement add JWT auth # From description /implement # From conversation context ``` diff --git a/docs/reference/agent-design.md b/docs/reference/agent-design.md index e4e04d8b..f5c9aa26 100644 --- a/docs/reference/agent-design.md +++ b/docs/reference/agent-design.md @@ -49,7 +49,7 @@ When an agent only needs a subset of tools, prefer platform-enforced restriction | Agent Type | Target Lines | Examples | |------------|-------------|----------| | Utility | 50-80 | Skimmer, Simplifier, Validator | -| Worker | 80-120 | Coder, Reviewer, Git | +| Worker | 80-120 | Coder, Reviewer, Git, Designer | | Orchestration | 100-150 | (Commands handle orchestration, not agents) | ## What Belongs Where @@ -106,4 +106,4 @@ Before committing a new or modified agent: 3. Test with explicit invocation 4. Document in plugin README.md -**Note:** Shared agents live in `shared/agents/` and are distributed at build time. Only create plugin-specific agents when tightly coupled to a single workflow (e.g., `claude-md-auditor.md`). +**Note:** Shared agents live in `shared/agents/` and are distributed at build time (e.g., `git.md`, `coder.md`, `designer.md`). Only create plugin-specific agents when tightly coupled to a single workflow (e.g., `claude-md-auditor.md`). diff --git a/docs/reference/file-organization.md b/docs/reference/file-organization.md index bb26b61a..fd5a437b 100644 --- a/docs/reference/file-organization.md +++ b/docs/reference/file-organization.md @@ -9,19 +9,19 @@ devflow/ ├── .claude-plugin/ # Marketplace registry (repo root) │ └── marketplace.json ├── shared/ -│ ├── skills/ # SINGLE SOURCE OF TRUTH (39 skills) +│ ├── skills/ # SINGLE SOURCE OF TRUTH (41 skills) │ │ ├── git/ │ │ │ ├── SKILL.md │ │ │ └── references/ │ │ ├── software-design/ │ │ └── ... -│ └── agents/ # SINGLE SOURCE OF TRUTH (11 shared agents) +│ └── agents/ # SINGLE SOURCE OF TRUTH (12 shared agents) │ ├── git.md │ ├── synthesizer.md │ ├── coder.md │ └── ... ├── plugins/ # Plugin collection (17 plugins) -│ ├── devflow-specify/ +│ ├── devflow-plan/ │ │ ├── .claude-plugin/ │ │ │ └── plugin.json │ │ ├── commands/ @@ -135,7 +135,7 @@ Skills and agents are **not duplicated** in git. Instead: ### Shared vs Plugin-Specific Agents -- **Shared** (11): `git`, `synthesizer`, `skimmer`, `simplifier`, `coder`, `reviewer`, `resolver`, `evaluator`, `tester`, `scrutinizer`, `validator` +- **Shared** (12): `git`, `synthesizer`, `skimmer`, `simplifier`, `coder`, `reviewer`, `resolver`, `evaluator`, `tester`, `scrutinizer`, `validator`, `designer` - **Plugin-specific** (1): `claude-md-auditor` — committed directly in its plugin ## Settings Override diff --git a/docs/reference/skills-architecture.md b/docs/reference/skills-architecture.md index 65c645f0..0cc89c0e 100644 --- a/docs/reference/skills-architecture.md +++ b/docs/reference/skills-architecture.md @@ -18,9 +18,9 @@ Shared patterns used by multiple agents. | `docs-framework` | Documentation conventions (.docs/ structure, naming, templates) | Synthesizer | | `git` | Git safety, atomic commits, PR descriptions, GitHub API patterns | Coder, Git, Resolver | | `patterns` | CRUD, API endpoints, events, config, logging | Coder, Resolver | -| `agent-teams` | Agent Teams patterns for peer-to-peer collaboration, debate, consensus | /code-review, /implement, /debug | +| `agent-teams` | Agent Teams patterns for peer-to-peer collaboration, debate, consensus | /code-review, /implement, /debug, /plan | | `router` | Intent classification and proportional skill loading for Devflow mode (unrestricted tools — orchestrator) | Ambient UserPromptSubmit hook | -| `knowledge-persistence` | Record/load architectural decisions and pitfalls to `.memory/knowledge/` | /implement, /code-review, /resolve, /debug, /specify, /self-review | +| `knowledge-persistence` | Record/load architectural decisions and pitfalls to `.memory/knowledge/` | /implement, /code-review, /resolve, /debug, /plan, /self-review | | `qa` | Scenario-based acceptance testing methodology, evidence collection | Tester | ### Tier 1b: Pattern Skills @@ -47,6 +47,8 @@ Listed in Claude Code's skill catalog. May auto-invoke based on description matc | Skill | Purpose | Agent Refs | |-------|---------|------------| | `boundary-validation` | Boundary validation enforcement | Coder | +| `gap-analysis` | Gap analysis for design plans — missing flows, edge cases, failure modes | Designer | +| `design-review` | Design review patterns — architectural feasibility, tradeoffs, alternatives | Designer | | `research` | Research-before-building enforcement for utility code | Coder | | `test-driven-development` | RED-GREEN-REFACTOR cycle enforcement | Coder | @@ -263,12 +265,3 @@ For language/framework patterns: 4. Add to relevant plugin manifests 5. Run `npm run build` to distribute -## Clarification Gates - -The `/specify` command uses **mandatory clarification gates**: - -1. **Gate 0 (Before Exploration)**: Confirm understanding of feature idea -2. **Gate 1 (After Exploration)**: Validate scope and priorities -3. **Gate 2 (Before Issue Creation)**: Confirm acceptance criteria - -No gate may be skipped. If user says "whatever you think", state recommendation and get explicit approval. diff --git a/plugins/devflow-ambient/.claude-plugin/plugin.json b/plugins/devflow-ambient/.claude-plugin/plugin.json index 0813519c..5c2165d3 100644 --- a/plugins/devflow-ambient/.claude-plugin/plugin.json +++ b/plugins/devflow-ambient/.claude-plugin/plugin.json @@ -26,7 +26,8 @@ "reviewer", "git", "synthesizer", - "resolver" + "resolver", + "designer" ], "skills": [ "router", @@ -51,6 +52,8 @@ "patterns", "knowledge-persistence", "qa", - "worktree-support" + "worktree-support", + "gap-analysis", + "design-review" ] } diff --git a/plugins/devflow-implement/.claude-plugin/plugin.json b/plugins/devflow-implement/.claude-plugin/plugin.json index c8ef0de7..0af7e8d4 100644 --- a/plugins/devflow-implement/.claude-plugin/plugin.json +++ b/plugins/devflow-implement/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "devflow-implement", - "description": "Complete task implementation workflow - orchestrates exploration, planning, coding, validation, and PR creation", + "description": "Complete task implementation workflow - accepts plan documents, issues, or task descriptions", "author": { "name": "Dean0x" }, @@ -18,8 +18,6 @@ ], "agents": [ "git", - "skimmer", - "synthesizer", "coder", "simplifier", "scrutinizer", diff --git a/plugins/devflow-implement/README.md b/plugins/devflow-implement/README.md index d6991aaf..3a6674bc 100644 --- a/plugins/devflow-implement/README.md +++ b/plugins/devflow-implement/README.md @@ -1,6 +1,6 @@ # devflow-implement -Complete task implementation workflow for Claude Code. Orchestrates exploration, planning, coding, validation, and PR creation through specialized agents. +Complete task implementation workflow for Claude Code. Accepts plan documents, issues, or task descriptions and drives them through coding, validation, and PR creation. ## Installation @@ -22,15 +22,14 @@ npx devflow-kit init --plugin=implement ## Workflow -1. **Exploration** - Skimmer + Explore agents understand the codebase -2. **Planning** - Plan agents design implementation approach -3. **Implementation** - Coder agent implements on feature branch -4. **Validation** - Validator runs build/test/lint checks -5. **Simplification** - Simplifier refines code clarity -6. **Self-Review** - Scrutinizer evaluates against 9-pillar framework -7. **Alignment Check** - Evaluator validates against original request -8. **QA Testing** - Tester executes scenario-based acceptance tests -9. **PR Creation** - Git agent creates pull request +1. **Setup** - Git agent creates feature branch, parses plan document or fetches issue +2. **Implementation** - Coder agent implements on feature branch +3. **Validation** - Validator runs build/test/lint checks +4. **Simplification** - Simplifier refines code clarity +5. **Self-Review** - Scrutinizer evaluates against 9-pillar framework +6. **Alignment Check** - Evaluator validates against original request +7. **QA Testing** - Tester executes scenario-based acceptance tests +8. **PR Creation** - Git agent creates pull request ## Components @@ -39,8 +38,6 @@ npx devflow-kit init --plugin=implement ### Agents - `git` - GitHub operations (setup, PR creation) -- `skimmer` - Codebase orientation -- `synthesizer` - Output synthesis - `coder` - Autonomous implementation - `simplifier` - Code refinement - `scrutinizer` - Self-review (9-pillar framework) @@ -64,6 +61,6 @@ npx devflow-kit init --plugin=implement ## Related Plugins -- [devflow-specify](../devflow-specify) - Specify the task first +- [devflow-plan](../devflow-plan) - Plan the task first - [devflow-code-review](../devflow-code-review) - Review the implementation - [devflow-resolve](../devflow-resolve) - Fix review issues diff --git a/plugins/devflow-implement/commands/implement-teams.md b/plugins/devflow-implement/commands/implement-teams.md index cc675fbb..33dad8a7 100644 --- a/plugins/devflow-implement/commands/implement-teams.md +++ b/plugins/devflow-implement/commands/implement-teams.md @@ -1,26 +1,30 @@ --- -description: Execute a single task through the complete lifecycle - orchestrates team-based exploration, planning, implementation, and quality gates +description: Execute a single task through team-based implementation, quality gates, and PR creation - accepts plan documents, issues, or task descriptions --- # Implement Command -Orchestrate a single task from exploration through implementation by spawning specialized agent teams for collaborative exploration and planning, then implementation agents for coding and quality gates. +Orchestrate a single task through implementation by spawning specialized agent teams for collaborative alignment checking, then implementation agents for coding and quality gates. ## Usage ``` /implement -/implement #42 (GitHub issue number) -/implement (use conversation context) +/implement #42 (GitHub issue number) +/implement .docs/design/42-jwt-auth.2026-04-07_1430.md (plan document from /plan) +/implement (use conversation context) ``` ## Input `$ARGUMENTS` contains whatever follows `/implement`: +- Plan document path: `.docs/design/42-jwt-auth.2026-04-07_1430.md` (path to an existing `.md` file) +- GitHub issue: `#42` - Task description: "implement JWT auth" -- GitHub issue: "#42" - Empty: use conversation context +> **Tip**: For best results, run `/plan` first to produce a design artifact, then pass it to `/implement`. + ## Phases ### Phase 1: Setup @@ -34,7 +38,7 @@ Agent(subagent_type="Git"): "OPERATION: setup-task BASE_BRANCH: {current branch name} ISSUE_INPUT: {issue number if $ARGUMENTS starts with #, otherwise omit} -TASK_DESCRIPTION: {task description from $ARGUMENTS if not an issue number, otherwise omit} +TASK_DESCRIPTION: {task description from $ARGUMENTS if not an issue number or .md path, otherwise omit} Derive branch name from issue or description, create feature branch, and fetch issue if specified. Return the branch setup summary." ``` @@ -46,230 +50,21 @@ Return the branch setup summary." - `ISSUE_CONTENT`: Full issue body including description (if provided) - `ACCEPTANCE_CRITERIA`: Extracted acceptance criteria from issue (if provided) -### Phase 2: Orient - -Spawn Skimmer agent for codebase overview: - -``` -Agent(subagent_type="Skimmer"): -"Orient in codebase for: {task description} -Run rskim on source directories (NOT repo root) to identify relevant files, functions, integration points" -``` - -### Phase 3: Exploration Team - -Create an agent team for collaborative codebase exploration: - -``` -Create a team named "explore-{task-id}" to explore the codebase for: {task description} - -Spawn exploration teammates with self-contained prompts: - -- Name: "architecture-explorer" - Prompt: | - You are exploring a codebase for task: {task description} - 1. Read your skill: `Read ~/.claude/skills/devflow:patterns/SKILL.md` - 2. Read `.memory/knowledge/decisions.md` and `.memory/knowledge/pitfalls.md` if they exist. - Consider prior decisions and known pitfalls relevant to this task. - 3. Skimmer context (files/patterns already identified): - {skimmer output} - 4. Your deliverable: Find similar implementations, established patterns, - module structure, and architectural conventions relevant to this task. - 5. Document findings with file:path references. - 6. Report completion: SendMessage(type: "message", recipient: "team-lead", - summary: "Architecture exploration done") - -- Name: "integration-explorer" - Prompt: | - You are exploring a codebase for task: {task description} - 1. Read your skill: `Read ~/.claude/skills/devflow:patterns/SKILL.md` - 2. Read `.memory/knowledge/decisions.md` and `.memory/knowledge/pitfalls.md` if they exist. - Consider prior decisions and known pitfalls relevant to this task. - 3. Skimmer context (files/patterns already identified): - {skimmer output} - 4. Your deliverable: Find entry points, services, database models, - configuration, and integration points relevant to this task. - 5. Document findings with file:path references. - 6. Report completion: SendMessage(type: "message", recipient: "team-lead", - summary: "Integration exploration done") - -- Name: "reusable-code-explorer" - Prompt: | - You are exploring a codebase for task: {task description} - 1. Read your skill: `Read ~/.claude/skills/devflow:patterns/SKILL.md` - 2. Read `.memory/knowledge/decisions.md` and `.memory/knowledge/pitfalls.md` if they exist. - Consider prior decisions and known pitfalls relevant to this task. - 3. Skimmer context (files/patterns already identified): - {skimmer output} - 4. Your deliverable: Find utilities, helpers, validation patterns, - and error handling that can be reused for this task. - 5. Document findings with file:path references. - 6. Report completion: SendMessage(type: "message", recipient: "team-lead", - summary: "Reusable code exploration done") - -- Name: "edge-case-explorer" - Prompt: | - You are exploring a codebase for task: {task description} - 1. Read your skill: `Read ~/.claude/skills/devflow:patterns/SKILL.md` - 2. Read `.memory/knowledge/decisions.md` and `.memory/knowledge/pitfalls.md` if they exist. - Consider prior decisions and known pitfalls relevant to this task. - 3. Skimmer context (files/patterns already identified): - {skimmer output} - 4. Your deliverable: Find error scenarios, race conditions, permission - failures, and boundary cases relevant to this task. - 5. Document findings with file:path references. - 6. Report completion: SendMessage(type: "message", recipient: "team-lead", - summary: "Edge case exploration done") - -After initial exploration, lead initiates debate: -SendMessage(type: "broadcast", summary: "Debate: challenge exploration findings"): -- Architecture challenges edge cases: "This boundary isn't handled by existing patterns" -- Integration challenges reusable code: "That helper doesn't cover our integration point" -- Edge cases challenges architecture: "This pattern fails under concurrent access" -Teammates use SendMessage(type: "message", recipient: "{name}") for direct challenges. - -Max 2 debate rounds, then submit consensus exploration findings. -``` - -**Exploration team output**: Consensus findings on patterns, integration points, reusable code, edge cases. - -**Team Shutdown Protocol** (must complete before Phase 5): - -``` -Step 1: Shutdown each teammate - SendMessage(type: "shutdown_request", recipient: "architecture-explorer", content: "Exploration complete") - SendMessage(type: "shutdown_request", recipient: "integration-explorer", content: "Exploration complete") - SendMessage(type: "shutdown_request", recipient: "reusable-code-explorer", content: "Exploration complete") - SendMessage(type: "shutdown_request", recipient: "edge-case-explorer", content: "Exploration complete") - Wait for each shutdown_response (approve: true) - -Step 2: TeamDelete - -Step 3: GATE — Verify TeamDelete succeeded - If failed → retry once after 5s - If retry failed → HALT and report: "Exploration team cleanup failed. Cannot create planning team." -``` - -### Phase 4: Synthesize Exploration - -**CRITICAL**: Do NOT synthesize outputs yourself in the main session. -You MUST spawn the Synthesizer agent. - -``` -Agent(subagent_type="Synthesizer"): -"Synthesize EXPLORATION outputs for: {task} -Mode: exploration -Explorer consensus: {team exploration consensus output} -Combine into: patterns, integration points, reusable code, edge cases" -``` - -### Phase 5: Planning Team - -Create an agent team for collaborative implementation planning: - -``` -Create a team named "plan-{task-id}" to plan implementation of: {task description} - -Spawn planning teammates with self-contained prompts: - -- Name: "implementation-planner" - Prompt: | - You are planning implementation for task: {task description} - 1. Read your skill: `Read ~/.claude/skills/devflow:patterns/SKILL.md` - 2. Exploration synthesis (what we know about the codebase): - {synthesis output from Phase 4} - 3. Your deliverable: Step-by-step coding approach with specific files - to create/modify, dependencies between steps, and execution order. - 4. Report completion: SendMessage(type: "message", recipient: "team-lead", - summary: "Implementation plan ready") - -- Name: "testing-planner" - Prompt: | - You are planning the test strategy for task: {task description} - 1. Read your skill: `Read ~/.claude/skills/devflow:testing/SKILL.md` - 2. Exploration synthesis (what we know about the codebase): - {synthesis output from Phase 4} - 3. Your deliverable: Test strategy — unit tests, integration tests, - edge case coverage, testing patterns to follow from the codebase. - 4. Report completion: SendMessage(type: "message", recipient: "team-lead", - summary: "Test plan ready") - -- Name: "risk-planner" - Prompt: | - You are assessing risk and execution strategy for task: {task description} - 1. Read your skill: `Read ~/.claude/skills/devflow:patterns/SKILL.md` - 2. Exploration synthesis (what we know about the codebase): - {synthesis output from Phase 4} - 3. Your deliverable: Risk assessment, rollback strategy, and execution - strategy decision (SINGLE_CODER vs SEQUENTIAL_CODERS vs PARALLEL_CODERS) - based on artifact independence, context capacity, and domain specialization. - 4. Report completion: SendMessage(type: "message", recipient: "team-lead", - summary: "Risk assessment ready") - -After initial planning, lead initiates debate: -SendMessage(type: "broadcast", summary: "Debate: challenge implementation plans"): -- Testing challenges implementation: "This approach is untestable without major refactoring" -- Risk challenges both: "Rollback is impossible with this migration strategy" -- Implementation challenges testing: "Full coverage here adds 3x complexity for minimal value" -Teammates use SendMessage(type: "message", recipient: "{name}") for direct challenges. - -Max 2 debate rounds, then submit consensus plan. -``` - -**Execution Strategy** (from Risk & Execution planner, validated by team): - -| Axis | Signals | Decision Impact | -|------|---------|-----------------| -| **Artifact Independence** | Shared contracts? Integration points? | If coupled → SINGLE_CODER | -| **Context Capacity** | File count, module breadth, pattern complexity | HIGH/CRITICAL → SEQUENTIAL_CODERS | -| **Domain Specialization** | Tech stack detected (backend, frontend, tests) | Determines DOMAIN hints for Coders | - -**Context Risk Levels:** -- **LOW**: <10 files, single module → SINGLE_CODER -- **MEDIUM**: 10-20 files, 2-3 modules → Consider SEQUENTIAL_CODERS -- **HIGH**: 20-30 files, multiple modules → SEQUENTIAL_CODERS (2-3 phases) -- **CRITICAL**: >30 files, cross-cutting concerns → SEQUENTIAL_CODERS (more phases) - -**Team Shutdown Protocol** (must complete before Phase 7): - -``` -Step 1: Shutdown each teammate - SendMessage(type: "shutdown_request", recipient: "implementation-planner", content: "Planning complete") - SendMessage(type: "shutdown_request", recipient: "testing-planner", content: "Planning complete") - SendMessage(type: "shutdown_request", recipient: "risk-planner", content: "Planning complete") - Wait for each shutdown_response (approve: true) - -Step 2: TeamDelete - -Step 3: GATE — Verify TeamDelete succeeded - If failed → retry once after 5s - If retry failed → HALT and report: "Planning team cleanup failed. Cannot proceed to implementation." -``` - -### Phase 6: Synthesize Planning - -**CRITICAL**: Do NOT synthesize outputs yourself in the main session. -You MUST spawn the Synthesizer agent. - -``` -Agent(subagent_type="Synthesizer"): -"Synthesize PLANNING outputs for: {task} -Mode: planning -Planner consensus: {team planning consensus output} -Combine into: execution plan with strategy decision (SINGLE_CODER | SEQUENTIAL_CODERS | PARALLEL_CODERS)" -``` - -**Synthesizer returns:** -- Execution strategy type and reasoning -- Context risk level -- Subtask breakdown with DOMAIN hints (if not SINGLE_CODER) -- Implementation plan with dependencies +**Plan Document Handling** (when $ARGUMENTS is a path ending in `.md`): +1. Read the plan document from the path provided +2. Extract from YAML frontmatter: `execution-strategy`, `context-risk`, `issue` number +3. Extract from body: Subtask Breakdown, Implementation Plan, Patterns to Follow, Acceptance Criteria +4. If `issue` field present in frontmatter: pass to Git agent as ISSUE_INPUT +5. Use extracted content as EXECUTION_PLAN for the Coder phase (replaces exploration/planning output) +6. Captured values override defaults from Git agent where present -### Phase 7: Implement +### Phase 2: Implement -Based on Phase 6 synthesis, use the three-strategy framework: +Based on Setup context (plan document, issue body, or conversation context), use the three-strategy framework: -**Strategy Selection** (from planning team consensus): +**Strategy Selection**: +- If plan document provided: use `execution-strategy` from frontmatter (default: SINGLE_CODER if absent) +- Otherwise: default to SINGLE_CODER unless task description signals high complexity | Strategy | When | Frequency | |----------|------|-----------| @@ -286,8 +81,8 @@ Agent(subagent_type="Coder"): "TASK_ID: {task-id} TASK_DESCRIPTION: {description} BASE_BRANCH: {base branch} -EXECUTION_PLAN: {full plan from synthesis} -PATTERNS: {patterns from exploration} +EXECUTION_PLAN: {full plan from setup context} +PATTERNS: {patterns from plan document or empty} CREATE_PR: true DOMAIN: {detected domain or 'fullstack'}" ``` @@ -305,7 +100,7 @@ Agent(subagent_type="Coder"): TASK_DESCRIPTION: {phase 1 description} BASE_BRANCH: {base branch} EXECUTION_PLAN: {phase 1 steps} -PATTERNS: {patterns from exploration} +PATTERNS: {patterns from plan document or empty} CREATE_PR: false DOMAIN: {phase 1 domain, e.g., 'backend'} HANDOFF_REQUIRED: true" @@ -318,7 +113,7 @@ Agent(subagent_type="Coder"): TASK_DESCRIPTION: {phase N description} BASE_BRANCH: {base branch} EXECUTION_PLAN: {phase N steps} -PATTERNS: {patterns from exploration} +PATTERNS: {patterns from plan document or empty} CREATE_PR: {true if last phase, false otherwise} DOMAIN: {phase N domain, e.g., 'frontend'} PRIOR_PHASE_SUMMARY: {summary from previous Coder} @@ -360,7 +155,7 @@ DOMAIN: {subtask 2 domain}" - Different files/modules with no imports between them - Each subtask is self-contained -### Phase 8: Validate +### Phase 3: Validate After Coder completes, spawn Validator to verify correctness: @@ -385,12 +180,12 @@ Run build, typecheck, lint, test. Report pass/fail with failure details." SCOPE: Fix only the listed failures, no other changes CREATE_PR: false" ``` - - Loop back to Phase 8 (re-validate) + - Loop back to Phase 3 (re-validate) 4. If `validation_retry_count > 2`: Report failures to user and halt -**If PASS:** Continue to Phase 9 +**If PASS:** Continue to Phase 4 -### Phase 9: Simplify +### Phase 4: Simplify After validation passes, spawn Simplifier to polish the code: @@ -402,7 +197,7 @@ FILES_CHANGED: {list of files from Coder output} Focus on code modified by Coder, apply project standards, enhance clarity" ``` -### Phase 10: Self-Review +### Phase 5: Self-Review After Simplifier completes, spawn Scrutinizer as final quality gate: @@ -415,7 +210,7 @@ Evaluate 9 pillars, fix P0/P1 issues, report status" If Scrutinizer returns BLOCKED, report to user and halt. -### Phase 11: Re-Validate (if Scrutinizer made changes) +### Phase 6: Re-Validate (if Scrutinizer made changes) If Scrutinizer made code changes (status: FIXED), spawn Validator to verify: @@ -428,9 +223,9 @@ Verify Scrutinizer's fixes didn't break anything." **If FAIL:** Report to user - Scrutinizer broke tests, needs manual intervention. -**If PASS:** Continue to Phase 12 +**If PASS:** Continue to Phase 7 -### Phase 12: Evaluator↔Coder Dialogue +### Phase 7: Evaluator↔Coder Dialogue After Scrutinizer passes (and re-validation if needed), check alignment using direct dialogue: @@ -445,7 +240,7 @@ Spawn teammates with self-contained prompts: Prompt: | You are validating that the implementation aligns with the original request. ORIGINAL_REQUEST: {task description or issue content} - EXECUTION_PLAN: {synthesized plan from Phase 6} + EXECUTION_PLAN: {execution plan from Phase 1} FILES_CHANGED: {list of files from Coder output} ACCEPTANCE_CRITERIA: {extracted criteria if available} @@ -480,7 +275,7 @@ Spawn teammates with self-contained prompts: summary: "Alignment fixes complete") ``` -**Team Shutdown Protocol** (must complete before Phase 13): +**Team Shutdown Protocol** (must complete before Phase 8): ``` Step 1: Shutdown each teammate @@ -495,7 +290,7 @@ Step 3: GATE — Verify TeamDelete succeeded If retry failed → HALT and report: "Alignment team cleanup failed." ``` -**If ALIGNED:** Continue to Phase 13 +**If ALIGNED:** Continue to Phase 8 **If MISALIGNED:** 1. Extract misalignment details from Evaluator output @@ -518,23 +313,23 @@ Step 3: GATE — Verify TeamDelete succeeded VALIDATION_SCOPE: changed-only" ``` - If Validator FAIL: Report to user - - If Validator PASS: Loop back to Phase 12 (re-check alignment) + - If Validator PASS: Loop back to Phase 7 (re-check alignment) 4. If `alignment_fix_count > 2`: Report misalignments to user for decision -### Phase 13: QA Testing +### Phase 8: QA Testing After Evaluator passes, spawn Tester for scenario-based acceptance testing (standalone agent, not a teammate — testing is sequential, not debate): ``` Agent(subagent_type="Tester"): "ORIGINAL_REQUEST: {task description or issue content} -EXECUTION_PLAN: {synthesized plan from Phase 6} +EXECUTION_PLAN: {execution plan from Phase 1} FILES_CHANGED: {list of files from Coder output} ACCEPTANCE_CRITERIA: {extracted criteria if available} Design and execute scenario-based acceptance tests. Report PASS or FAIL with evidence." ``` -**If PASS:** Continue to Phase 14 +**If PASS:** Continue to Phase 9 **If FAIL:** 1. Extract failure details from Tester output @@ -557,21 +352,19 @@ Design and execute scenario-based acceptance tests. Report PASS or FAIL with evi VALIDATION_SCOPE: changed-only" ``` - If Validator FAIL: Report to user - - If Validator PASS: Loop back to Phase 13 (re-run Tester) + - If Validator PASS: Loop back to Phase 8 (re-run Tester) 4. If `qa_retry_count > 2`: Report QA failures to user for decision -### Phase 14: Create PR +### Phase 9: Create PR **For SEQUENTIAL_CODERS or PARALLEL_CODERS**: The last sequential Coder (with CREATE_PR: true) handles PR creation. For parallel coders, create unified PR using `devflow:git` skill patterns. Push branch and run `gh pr create` with comprehensive description, targeting `BASE_BRANCH`. **For SINGLE_CODER**: PR is created by the Coder agent (CREATE_PR: true). -### Phase 15: Report +### Phase 10: Report + Record Decisions Display completion summary with phase status, PR info, and next steps. -### Phase 16: Record Decisions (if any) - If the Coder's report includes Key Decisions with architectural significance: 1. Read `~/.claude/skills/devflow:knowledge-persistence/SKILL.md` and follow its extraction procedure to record decisions to `.memory/knowledge/decisions.md` 2. Source field: `/implement {TASK_ID}` @@ -584,79 +377,57 @@ If the Coder's report includes Key Decisions with architectural significance: │ ├─ Phase 1: Setup │ └─ Git agent (operation: setup-task) - creates feature branch, fetches issue +│ └─ Plan document parsing (if .md path provided) - extracts execution plan, strategy │ -├─ Phase 2: Orient -│ └─ Skimmer agent (codebase overview via skim) -│ -├─ Phase 3: Exploration Team (Agent Teams) -│ ├─ Architecture Explorer (teammate) -│ ├─ Integration Explorer (teammate) -│ ├─ Reusable Code Explorer (teammate) -│ ├─ Edge Case Explorer (teammate) -│ └─ Debate → consensus exploration findings -│ -├─ Phase 4: Synthesize Exploration -│ └─ Synthesizer agent (mode: exploration) -│ -├─ Phase 5: Planning Team (Agent Teams) -│ ├─ Implementation Planner (teammate) -│ ├─ Testing Planner (teammate) -│ ├─ Risk & Execution Planner (teammate) -│ └─ Debate → consensus plan with strategy decision -│ -├─ Phase 6: Synthesize Planning -│ └─ Synthesizer agent (mode: planning) → returns strategy + DOMAIN hints -│ -├─ Phase 7: Implement (3-strategy framework) +├─ Phase 2: Implement (3-strategy framework) │ ├─ SINGLE_CODER (80%): One Coder, full plan, CREATE_PR: true │ ├─ SEQUENTIAL_CODERS (15%): N Coders with handoff summaries │ └─ PARALLEL_CODERS (5%): N Coders in single message (rare) │ -├─ Phase 8: Validate +├─ Phase 3: Validate │ └─ Validator agent (build, typecheck, lint, test) │ └─ If FAIL: Coder fix loop (max 2 retries) → re-validate │ -├─ Phase 9: Simplify +├─ Phase 4: Simplify │ └─ Simplifier agent (refines code clarity and consistency) │ -├─ Phase 10: Self-Review +├─ Phase 5: Self-Review │ └─ Scrutinizer agent (final quality gate, fixes P0/P1) │ -├─ Phase 11: Re-Validate (if Scrutinizer made changes) +├─ Phase 6: Re-Validate (if Scrutinizer made changes) │ └─ Validator agent (verify Scrutinizer fixes) │ -├─ Phase 12: Evaluator↔Coder Dialogue (Agent Teams) +├─ Phase 7: Evaluator↔Coder Dialogue (Agent Teams) │ └─ Direct Evaluator↔Coder messaging (max 2 exchanges) │ -├─ Phase 13: QA Testing +├─ Phase 8: QA Testing │ └─ Tester agent (scenario-based acceptance tests) │ └─ If FAIL: Coder fix loop (max 2 retries) → Validator → re-test │ -├─ Phase 14: Create PR (if needed) +├─ Phase 9: Create PR (if needed) │ └─ SINGLE_CODER: handled by Coder │ └─ SEQUENTIAL: handled by last Coder │ └─ PARALLEL: orchestrator creates unified PR │ -├─ Phase 15: Display agent outputs -│ -└─ Phase 16: Record Decisions (inline, if any) +└─ Phase 10: Report + Record Decisions (inline, if any) ``` ## Principles 1. **Orchestration only** - Command spawns teams/agents, never does work itself -2. **Team-based exploration** - Exploration and planning use Agent Teams for debate -3. **Coherence-first** - Single Coder produces more consistent code (default ~80% of tasks) -4. **Bounded debate** - Max 2 exchange rounds in any team, then converge -5. **Agent ownership** - Each agent owns its output completely -6. **Clean handoffs** - Each phase passes structured data to next; sequential Coders pass implementation summaries -7. **Honest reporting** - Display agent outputs directly -8. **Simplification pass** - Code refined for clarity before PR -9. **Strict delegation** - Never perform agent work in main session. "Spawn X" means call Agent tool with X, not do X's work yourself -10. **Validator owns validation** - Never run `npm test`, `npm run build`, or similar in main session; always delegate to Validator agent -11. **Coder owns fixes** - Never implement fixes in main session; spawn Coder for validation failures and alignment fixes -12. **Loop limits** - Max 2 validation retries, max 2 alignment fix iterations before escalating to user -13. **Cleanup always** - Team resources released after exploration and planning phases +2. **Plan-first** - Plan documents from `/plan` skip exploration/planning overhead entirely +3. **Team-based alignment** - Alignment check uses Agent Teams for Evaluator↔Coder dialogue +4. **Coherence-first** - Single Coder produces more consistent code (default ~80% of tasks) +5. **Bounded debate** - Max 2 exchange rounds in any team, then converge +6. **Agent ownership** - Each agent owns its output completely +7. **Clean handoffs** - Each phase passes structured data to next; sequential Coders pass implementation summaries +8. **Honest reporting** - Display agent outputs directly +9. **Simplification pass** - Code refined for clarity before PR +10. **Strict delegation** - Never perform agent work in main session. "Spawn X" means call Agent tool with X, not do X's work yourself +11. **Validator owns validation** - Never run `npm test`, `npm run build`, or similar in main session; always delegate to Validator agent +12. **Coder owns fixes** - Never implement fixes in main session; spawn Coder for validation failures and alignment fixes +13. **Loop limits** - Max 2 validation retries, max 2 alignment fix iterations before escalating to user +14. **Cleanup always** - Team resources released after alignment phase ## Error Handling diff --git a/plugins/devflow-implement/commands/implement.md b/plugins/devflow-implement/commands/implement.md index 22b64d01..7cbf4b13 100644 --- a/plugins/devflow-implement/commands/implement.md +++ b/plugins/devflow-implement/commands/implement.md @@ -1,26 +1,30 @@ --- -description: Execute a single task through the complete lifecycle - orchestrates exploration, planning, implementation, and simplification with parallel agents +description: Execute a single task through implementation, quality gates, and PR creation - accepts plan documents, issues, or task descriptions --- # Implement Command -Orchestrate a single task from exploration through implementation by spawning specialized agents. The orchestrator only spawns agents and passes context - all work is done by agents. +Orchestrate a single task through implementation by spawning specialized agents. The orchestrator only spawns agents and passes context - all work is done by agents. ## Usage ``` /implement -/implement #42 (GitHub issue number) -/implement (use conversation context) +/implement #42 (GitHub issue number) +/implement .docs/design/42-jwt-auth.2026-04-07_1430.md (plan document from /plan) +/implement (use conversation context) ``` ## Input `$ARGUMENTS` contains whatever follows `/implement`: +- Plan document path: `.docs/design/42-jwt-auth.2026-04-07_1430.md` (path to an existing `.md` file) +- GitHub issue: `#42` - Task description: "implement JWT auth" -- GitHub issue: "#42" - Empty: use conversation context +> **Tip**: For best results, run `/plan` first to produce a design artifact, then pass it to `/implement`. + ## Phases ### Phase 1: Setup @@ -34,7 +38,7 @@ Agent(subagent_type="Git"): "OPERATION: setup-task BASE_BRANCH: {current branch name} ISSUE_INPUT: {issue number if $ARGUMENTS starts with #, otherwise omit} -TASK_DESCRIPTION: {task description from $ARGUMENTS if not an issue number, otherwise omit} +TASK_DESCRIPTION: {task description from $ARGUMENTS if not an issue number or .md path, otherwise omit} Derive branch name from issue or description, create feature branch, and fetch issue if specified. Return the branch setup summary." ``` @@ -46,95 +50,21 @@ Return the branch setup summary." - `ISSUE_CONTENT`: Full issue body including description (if provided) - `ACCEPTANCE_CRITERIA`: Extracted acceptance criteria from issue (if provided) -### Phase 2: Orient - -Spawn Skimmer agent for codebase overview: - -``` -Agent(subagent_type="Skimmer"): -"Orient in codebase for: {task description} -Run rskim on source directories (NOT repo root) to identify relevant files, functions, integration points" -``` - -### Phase 3: Explore (Parallel) - -Spawn 4 Explore agents **in a single message**, each with Skimmer context: - -| Focus | Thoroughness | Find | -|-------|-------------|------| -| Architecture | medium | Similar implementations, patterns, module structure | -| Integration | medium | Entry points, services, database models, configuration | -| Reusable code | medium | Utilities, helpers, validation patterns, error handling | -| Edge cases | quick | Error scenarios, race conditions, permission failures | - -Track success/failure of each explorer for synthesis context. - -### Phase 4: Synthesize Exploration - -**WAIT** for Phase 3 to complete. - -**CRITICAL**: Do NOT synthesize outputs yourself in the main session. -You MUST spawn the Synthesizer agent - "spawn Synthesizer" means delegate to the agent, not do the work yourself. - -``` -Agent(subagent_type="Synthesizer"): -"Synthesize EXPLORATION outputs for: {task} -Mode: exploration -Explorer outputs: {all 4 outputs} -Failed explorations: {any failures} -Combine into: patterns, integration points, reusable code, edge cases" -``` - -### Phase 5: Plan (Parallel) - -Spawn 3 Plan agents **in a single message**, each with exploration synthesis: - -| Focus | Output | -|-------|--------| -| Implementation steps | Ordered steps with files and dependencies | -| Testing strategy | Unit tests, integration tests, edge case tests | -| Execution strategy | SINGLE_CODER vs SEQUENTIAL_CODERS vs PARALLEL_CODERS decision | - -**Execution Strategy planner analyzes 3 axes:** +**Plan Document Handling** (when $ARGUMENTS is a path ending in `.md`): +1. Read the plan document from the path provided +2. Extract from YAML frontmatter: `execution-strategy`, `context-risk`, `issue` number +3. Extract from body: Subtask Breakdown, Implementation Plan, Patterns to Follow, Acceptance Criteria +4. If `issue` field present in frontmatter: pass to Git agent as ISSUE_INPUT +5. Use extracted content as EXECUTION_PLAN for the Coder phase (replaces exploration/planning output) +6. Captured values override defaults from Git agent where present -| Axis | Signals | Decision Impact | -|------|---------|-----------------| -| **Artifact Independence** | Shared contracts? Integration points? | If coupled → SINGLE_CODER | -| **Context Capacity** | File count, module breadth, pattern complexity | HIGH/CRITICAL → SEQUENTIAL_CODERS | -| **Domain Specialization** | Tech stack detected (backend, frontend, tests) | Determines DOMAIN hints for Coders | +### Phase 2: Implement -**Context Risk Levels:** -- **LOW**: <10 files, single module → SINGLE_CODER -- **MEDIUM**: 10-20 files, 2-3 modules → Consider SEQUENTIAL_CODERS -- **HIGH**: 20-30 files, multiple modules → SEQUENTIAL_CODERS (2-3 phases) -- **CRITICAL**: >30 files, cross-cutting concerns → SEQUENTIAL_CODERS (more phases) +Based on Setup context (plan document, issue body, or conversation context), use the three-strategy framework: -### Phase 6: Synthesize Planning - -**WAIT** for Phase 5 to complete. - -**CRITICAL**: Do NOT synthesize outputs yourself in the main session. -You MUST spawn the Synthesizer agent - "spawn Synthesizer" means delegate to the agent, not do the work yourself. - -``` -Agent(subagent_type="Synthesizer"): -"Synthesize PLANNING outputs for: {task} -Mode: planning -Planner outputs: {all 3 outputs} -Combine into: execution plan with strategy decision (SINGLE_CODER | SEQUENTIAL_CODERS | PARALLEL_CODERS)" -``` - -**Synthesizer returns:** -- Execution strategy type and reasoning -- Context risk level -- Subtask breakdown with DOMAIN hints (if not SINGLE_CODER) -- Implementation plan with dependencies - -### Phase 7: Implement - -Based on Phase 6 synthesis, use the three-strategy framework: - -**Strategy Selection** (from Execution Strategy planner): +**Strategy Selection**: +- If plan document provided: use `execution-strategy` from frontmatter (default: SINGLE_CODER if absent) +- Otherwise: default to SINGLE_CODER unless task description signals high complexity | Strategy | When | Frequency | |----------|------|-----------| @@ -151,8 +81,8 @@ Agent(subagent_type="Coder"): "TASK_ID: {task-id} TASK_DESCRIPTION: {description} BASE_BRANCH: {base branch} -EXECUTION_PLAN: {full plan from synthesis} -PATTERNS: {patterns from exploration} +EXECUTION_PLAN: {full plan from setup context} +PATTERNS: {patterns from plan document or empty} CREATE_PR: true DOMAIN: {detected domain or 'fullstack'}" ``` @@ -170,7 +100,7 @@ Agent(subagent_type="Coder"): TASK_DESCRIPTION: {phase 1 description} BASE_BRANCH: {base branch} EXECUTION_PLAN: {phase 1 steps} -PATTERNS: {patterns from exploration} +PATTERNS: {patterns from plan document or empty} CREATE_PR: false DOMAIN: {phase 1 domain, e.g., 'backend'} HANDOFF_REQUIRED: true" @@ -183,7 +113,7 @@ Agent(subagent_type="Coder"): TASK_DESCRIPTION: {phase N description} BASE_BRANCH: {base branch} EXECUTION_PLAN: {phase N steps} -PATTERNS: {patterns from exploration} +PATTERNS: {patterns from plan document or empty} CREATE_PR: {true if last phase, false otherwise} DOMAIN: {phase N domain, e.g., 'frontend'} PRIOR_PHASE_SUMMARY: {summary from previous Coder} @@ -225,7 +155,7 @@ DOMAIN: {subtask 2 domain}" - Different files/modules with no imports between them - Each subtask is self-contained -### Phase 8: Validate +### Phase 3: Validate After Coder completes, spawn Validator to verify correctness: @@ -250,12 +180,12 @@ Run build, typecheck, lint, test. Report pass/fail with failure details." SCOPE: Fix only the listed failures, no other changes CREATE_PR: false" ``` - - Loop back to Phase 8 (re-validate) + - Loop back to Phase 3 (re-validate) 4. If `validation_retry_count > 2`: Report failures to user and halt -**If PASS:** Continue to Phase 9 +**If PASS:** Continue to Phase 4 -### Phase 9: Simplify +### Phase 4: Simplify After validation passes, spawn Simplifier to polish the code: @@ -267,7 +197,7 @@ FILES_CHANGED: {list of files from Coder output} Focus on code modified by Coder, apply project standards, enhance clarity" ``` -### Phase 10: Self-Review +### Phase 5: Self-Review After Simplifier completes, spawn Scrutinizer as final quality gate: @@ -280,7 +210,7 @@ Evaluate 9 pillars, fix P0/P1 issues, report status" If Scrutinizer returns BLOCKED, report to user and halt. -### Phase 11: Re-Validate (if Scrutinizer made changes) +### Phase 6: Re-Validate (if Scrutinizer made changes) If Scrutinizer made code changes (status: FIXED), spawn Validator to verify: @@ -293,22 +223,22 @@ Verify Scrutinizer's fixes didn't break anything." **If FAIL:** Report to user - Scrutinizer broke tests, needs manual intervention. -**If PASS:** Continue to Phase 12 +**If PASS:** Continue to Phase 7 -### Phase 12: Alignment Check +### Phase 7: Alignment Check After Scrutinizer passes (and re-validation if needed), spawn Evaluator to validate alignment: ``` Agent(subagent_type="Evaluator"): "ORIGINAL_REQUEST: {task description or issue content} -EXECUTION_PLAN: {synthesized plan from Phase 6} +EXECUTION_PLAN: {execution plan from Phase 1} FILES_CHANGED: {list of files from Coder output} ACCEPTANCE_CRITERIA: {extracted criteria if available} Validate alignment with request and plan. Report ALIGNED or MISALIGNED with details." ``` -**If ALIGNED:** Continue to Phase 13 +**If ALIGNED:** Continue to Phase 8 **If MISALIGNED:** 1. Extract misalignment details from Evaluator output @@ -331,23 +261,23 @@ Validate alignment with request and plan. Report ALIGNED or MISALIGNED with deta VALIDATION_SCOPE: changed-only" ``` - If Validator FAIL: Report to user - - If Validator PASS: Loop back to Phase 12 (re-check alignment) + - If Validator PASS: Loop back to Phase 7 (re-check alignment) 4. If `alignment_fix_count > 2`: Report misalignments to user for decision -### Phase 13: QA Testing +### Phase 8: QA Testing After Evaluator passes, spawn Tester for scenario-based acceptance testing: ``` Agent(subagent_type="Tester"): "ORIGINAL_REQUEST: {task description or issue content} -EXECUTION_PLAN: {synthesized plan from Phase 6} +EXECUTION_PLAN: {execution plan from Phase 1} FILES_CHANGED: {list of files from Coder output} ACCEPTANCE_CRITERIA: {extracted criteria if available} Design and execute scenario-based acceptance tests. Report PASS or FAIL with evidence." ``` -**If PASS:** Continue to Phase 14 +**If PASS:** Continue to Phase 9 **If FAIL:** 1. Extract failure details from Tester output @@ -370,21 +300,19 @@ Design and execute scenario-based acceptance tests. Report PASS or FAIL with evi VALIDATION_SCOPE: changed-only" ``` - If Validator FAIL: Report to user - - If Validator PASS: Loop back to Phase 13 (re-run Tester) + - If Validator PASS: Loop back to Phase 8 (re-run Tester) 4. If `qa_retry_count > 2`: Report QA failures to user for decision -### Phase 14: Create PR +### Phase 9: Create PR **For SEQUENTIAL_CODERS or PARALLEL_CODERS**: The last sequential Coder (with CREATE_PR: true) handles PR creation. For parallel coders, create unified PR using `devflow:git` skill patterns. Push branch and run `gh pr create` with comprehensive description, targeting `BASE_BRANCH`. **For SINGLE_CODER**: PR is created by the Coder agent (CREATE_PR: true). -### Phase 15: Report +### Phase 10: Report + Record Decisions Display completion summary with phase status, PR info, and next steps. -### Phase 16: Record Decisions (if any) - If the Coder's report includes Key Decisions with architectural significance: 1. Read `~/.claude/skills/devflow:knowledge-persistence/SKILL.md` and follow its extraction procedure to record decisions to `.memory/knowledge/decisions.md` 2. Source field: `/implement {TASK_ID}` @@ -397,68 +325,47 @@ If the Coder's report includes Key Decisions with architectural significance: │ ├─ Phase 1: Setup │ └─ Git agent (operation: setup-task) - creates feature branch, fetches issue +│ └─ Plan document parsing (if .md path provided) - extracts execution plan, strategy │ -├─ Phase 2: Orient -│ └─ Skimmer agent (codebase overview via skim) -│ -├─ Phase 3: Explore (PARALLEL, with Skimmer context) -│ ├─ Explore: Architecture -│ ├─ Explore: Integration -│ ├─ Explore: Reusable code -│ └─ Explore: Edge cases -│ -├─ Phase 4: Synthesize Exploration -│ └─ Synthesizer agent (mode: exploration) -│ -├─ Phase 5: Plan (PARALLEL) -│ ├─ Plan: Implementation steps -│ ├─ Plan: Testing strategy -│ └─ Plan: Execution strategy (3-strategy decision) -│ -├─ Phase 6: Synthesize Planning -│ └─ Synthesizer agent (mode: planning) → returns strategy + DOMAIN hints -│ -├─ Phase 7: Implement (3-strategy framework) +├─ Phase 2: Implement (3-strategy framework) │ ├─ SINGLE_CODER (80%): One Coder, full plan, CREATE_PR: true │ ├─ SEQUENTIAL_CODERS (15%): N Coders with handoff summaries │ └─ PARALLEL_CODERS (5%): N Coders in single message (rare) │ -├─ Phase 8: Validate +├─ Phase 3: Validate │ └─ Validator agent (build, typecheck, lint, test) │ └─ If FAIL: Coder fix loop (max 2 retries) → re-validate │ -├─ Phase 9: Simplify +├─ Phase 4: Simplify │ └─ Simplifier agent (refines code clarity and consistency) │ -├─ Phase 10: Self-Review +├─ Phase 5: Self-Review │ └─ Scrutinizer agent (final quality gate, fixes P0/P1) │ -├─ Phase 11: Re-Validate (if Scrutinizer made changes) +├─ Phase 6: Re-Validate (if Scrutinizer made changes) │ └─ Validator agent (verify Scrutinizer fixes) │ -├─ Phase 12: Alignment Check +├─ Phase 7: Alignment Check │ └─ Evaluator agent (validates alignment - reports only, no fixes) │ └─ If MISALIGNED: Coder fix loop (max 2 iterations) → Validator → re-check │ -├─ Phase 13: QA Testing +├─ Phase 8: QA Testing │ └─ Tester agent (scenario-based acceptance tests) │ └─ If FAIL: Coder fix loop (max 2 retries) → Validator → re-test │ -├─ Phase 14: Create PR (if needed) +├─ Phase 9: Create PR (if needed) │ └─ SINGLE_CODER: handled by Coder │ └─ SEQUENTIAL: handled by last Coder │ └─ PARALLEL: orchestrator creates unified PR │ -├─ Phase 15: Display agent outputs -│ -└─ Phase 16: Record Decisions (inline, if any) +└─ Phase 10: Report + Record Decisions (inline, if any) ``` ## Principles 1. **Orchestration only** - Command spawns agents, never does work itself -2. **Coherence-first** - Single Coder produces more consistent code (default ~80% of tasks) -3. **Parallel exploration** - Explore and plan phases run in parallel; sequential phases wait +2. **Plan-first** - Plan documents from `/plan` skip exploration/planning overhead entirely +3. **Coherence-first** - Single Coder produces more consistent code (default ~80% of tasks) 4. **Agent ownership** - Each agent owns its output completely 5. **Clean handoffs** - Each phase passes structured data to next; sequential Coders pass implementation summaries 6. **Honest reporting** - Display agent outputs directly diff --git a/plugins/devflow-plan/.claude-plugin/plugin.json b/plugins/devflow-plan/.claude-plugin/plugin.json new file mode 100644 index 00000000..6384247a --- /dev/null +++ b/plugins/devflow-plan/.claude-plugin/plugin.json @@ -0,0 +1,32 @@ +{ + "name": "devflow-plan", + "description": "Unified design planning - combines requirements discovery, gap analysis, implementation planning, and design review into a single workflow", + "author": { + "name": "Dean0x" + }, + "version": "1.8.3", + "homepage": "https://github.com/dean0x/devflow", + "repository": "https://github.com/dean0x/devflow", + "license": "MIT", + "keywords": [ + "planning", + "design", + "gap-analysis", + "requirements", + "architecture" + ], + "agents": [ + "git", + "skimmer", + "synthesizer", + "designer" + ], + "skills": [ + "agent-teams", + "gap-analysis", + "design-review", + "patterns", + "knowledge-persistence", + "worktree-support" + ] +} diff --git a/plugins/devflow-plan/README.md b/plugins/devflow-plan/README.md new file mode 100644 index 00000000..3f25f352 --- /dev/null +++ b/plugins/devflow-plan/README.md @@ -0,0 +1,43 @@ +# devflow-plan + +Unified design planning plugin. Combines requirements discovery, gap analysis, implementation planning, and design review into a single workflow that produces a machine-readable design artifact. + +## Commands + +| Command | Description | +|---------|-------------| +| `/plan ` | Plan a feature from description | +| `/plan #42` | Plan from a GitHub issue | +| `/plan #12 #15 #18` | Plan across multiple issues | +| `/plan` | Plan from conversation context | + +## Agents + +| Agent | Purpose | +|-------|---------| +| `git` | Fetch GitHub issues (single and batch) | +| `skimmer` | Codebase orientation | +| `synthesizer` | Combines exploration, gap analysis, and planning outputs | +| `designer` | Gap analysis and design review (mode-driven) | + +## Workflow + +14-phase pipeline organized in 6 blocks: + +1. **Requirements Discovery** — Orient, explore requirements, synthesize +2. **Gap Analysis** — Parallel analysis across completeness, architecture, security, performance +3. **Scope Approval** — Mandatory gate to validate scope and gap resolutions +4. **Implementation Design** — Explore implementation, plan steps and tests +5. **Design Review + Approval** — Anti-pattern detection + mandatory gate to confirm plan +6. **Output** — Write design artifact, optionally create GitHub issue + +## Output + +Design artifacts written to `.docs/design/` with YAML frontmatter consumable by `/implement`. + +## Next Step + +``` +/implement {artifact-path} +/implement #{issue-number} +``` diff --git a/plugins/devflow-plan/agents/designer.md b/plugins/devflow-plan/agents/designer.md new file mode 100644 index 00000000..0e56cb94 --- /dev/null +++ b/plugins/devflow-plan/agents/designer.md @@ -0,0 +1,100 @@ +--- +name: Designer +description: Design analysis agent with mode-driven skill loading. Modes: gap-analysis (completeness, architecture, security, performance, consistency, dependencies), design-review (anti-pattern detection). +model: opus +skills: devflow:worktree-support +--- + +# Designer Agent + +You are a design analysis specialist. You detect gaps and anti-patterns in design documents, specifications, and implementation plans before implementation begins. Your mode and focus determine which skill you load and which analysis you perform. + +## Input + +The orchestrator provides: +- **Mode**: Which analysis type to perform (`gap-analysis` or `design-review`) +- **Focus**: Which aspect to analyze (gap-analysis only — see Modes table) +- **Artifacts**: Design documents, specifications, issue bodies, or implementation plans to analyze + +**Worktree Support**: If `WORKTREE_PATH` is provided, follow the `devflow:worktree-support` skill for path resolution. If omitted, use cwd. + +## Modes + +| Mode | Focus (optional) | Skill File (Read this first) | +|------|-------------------|------------------------------| +| `gap-analysis` | completeness, architecture, security, performance, consistency, dependencies | `~/.claude/skills/devflow:gap-analysis/SKILL.md` | +| `design-review` | (all anti-patterns in one pass) | `~/.claude/skills/devflow:design-review/SKILL.md` | + +## Responsibilities + +1. **Load mode skill** — Read the skill file from the table above for your assigned mode. This gives you detection patterns and checklists specific to your analysis type. +2. **Apply focus-specific analysis** — Use detection patterns from the loaded skill to scan the provided artifacts. For `gap-analysis`, apply only the patterns for your assigned focus. For `design-review`, apply all 6 anti-pattern rules. +3. **Assess confidence (0-100%)** — For each finding, assess certainty. Report at 80%+, suggest at 60-79%, drop below 60%. +4. **Cite evidence** — Every finding must reference specific text from the provided artifacts using direct quotes or line references. +5. **Write findings to output** — Format findings clearly with severity, confidence, evidence, and resolution. + +## Output + +```markdown +# Design Analysis: {Mode} — {Focus (if applicable)} + +## Findings + +### CRITICAL +**[{FOCUS}] Gap/Anti-Pattern: {title}** — Confidence: {n}% +- Evidence: "{quoted text from artifact}" +- Issue: {what is missing or wrong} +- Resolution: {concrete action to address} + +### HIGH +{findings...} + +### MEDIUM +{findings...} + +### LOW +{findings...} + +## Suggestions (60-79% confidence) +- **{title}** (Confidence: {n}%) — {brief description, no fix required} + +## Summary +| Severity | Count | +|----------|-------| +| CRITICAL | {n} | +| HIGH | {n} | +| MEDIUM | {n} | +| LOW | {n} | + +**Overall Assessment**: {BLOCKING | SHOULD-ADDRESS | INFORMATIONAL} +``` + +## Confidence Scale + +| Range | Label | Meaning | +|-------|-------|---------| +| 90-100% | Certain | Clearly a gap or anti-pattern — unambiguous evidence in artifact | +| 80-89% | High | Very likely an issue, minor chance of false positive | +| 60-79% | Medium | Plausible issue, depends on context not visible in artifact | +| < 60% | Low | Possible concern — drop, don't report | + +## Principles + +1. **Evidence-based** — Never flag a gap without citing specific text from the artifact +2. **Confidence-calibrated** — Report only what you are ≥80% sure about +3. **Actionable** — Every finding includes a concrete resolution, not just a problem statement +4. **No speculation** — If you cannot find evidence in the provided artifacts, do not invent it +5. **Single focus** — In gap-analysis mode, analyze only your assigned focus area; ignore others + +## Boundaries + +**Handle autonomously:** +- Loading assigned skill file +- Scanning artifacts for focus-specific patterns +- Assessing confidence and categorizing findings +- Writing structured findings report + +**Escalate to orchestrator:** +- Context documents are missing or unreadable +- Fundamental ambiguity that cannot be resolved without user input +- Artifacts reference external systems not present in the provided context diff --git a/plugins/devflow-plan/commands/plan-teams.md b/plugins/devflow-plan/commands/plan-teams.md new file mode 100644 index 00000000..f7e2eb8d --- /dev/null +++ b/plugins/devflow-plan/commands/plan-teams.md @@ -0,0 +1,465 @@ +--- +description: Unified design planning with agent teams - collaborative exploration, gap analysis, and planning with team debate for higher-confidence outputs +--- + +# Plan Command (Teams Variant) + +Same as `/plan` but uses Agent Teams for exploration and planning phases to enable team debate and consensus. Gap analysis and design review remain as parallel subagents (independent checkers — debate adds no value there). + +The orchestrator only spawns agents, teams, and gates — all work is done by agents and teammates. + +## Usage + +``` +/plan +/plan #42 (GitHub issue) +/plan #12 #15 #18 (multi-issue) +/plan (use conversation context) +``` + +## Input + +`$ARGUMENTS` contains whatever follows `/plan`: +- Starts with `#` followed by numbers → issue mode (parse all `#N` tokens, space-separated) +- Path to existing `.md` file → **error**: "Use /implement with plan documents" +- Other text → feature description +- Empty → use conversation context + +For **multi-issue** mode: collect all `#N` tokens from `$ARGUMENTS` as `ISSUE_NUMBERS`. + +## Clarification Gates + +**MANDATORY**: Three gates that must complete before proceeding. + +| Gate | Phase | Purpose | +|------|-------|---------| +| Gate 0 | Phase 1 | Confirm understanding before exploration | +| Gate 1 | Phase 7 | Validate scope + gap analysis results | +| Gate 2 | Phase 13 | Confirm final plan + design review | + +No gate may be skipped. If user says "proceed" or "whatever you think", state recommendation and get explicit confirmation. + +## Phases + +--- + +### Block 1: Requirements Discovery + +#### Phase 1: Gate 0 — Confirm Understanding + +Present interpretation using AskUserQuestion: +- Core problem this solves +- Target users +- Expected outcome +- Key assumptions + +For multi-issue: present unified scope across all issues. + +**MANDATORY**: Do not spawn any agents or teams until Gate 0 is confirmed. + +#### Phase 2: Orient + Load Knowledge + +Spawn Skimmer agent for codebase context: + +``` +Agent(subagent_type="Skimmer"): +"Orient in codebase for design planning: {feature/issues} +Run rskim on source directories (NOT repo root) to identify: +- Existing patterns and conventions in the affected area +- File structure and module boundaries +- Similar prior implementations +- Test patterns and coverage approach +Return codebase context for requirements analysis." +``` + +While Skimmer runs, read `.memory/knowledge/decisions.md` and `.memory/knowledge/pitfalls.md`. Pass Skimmer context and project knowledge to all subsequent agents and teammates. + +#### Phase 3: Exploration Team + +Create an agent team for collaborative requirements exploration: + +``` +Create a team named "explore-reqs-{feature-slug}" for requirements exploration of: {feature/issues} + +Spawn exploration teammates with self-contained prompts: + +- Name: "user-perspective-explorer" + Prompt: | + You are exploring requirements for: {feature/issues} + 1. Skimmer context: {Phase 2 output} + 2. Project knowledge: {Phase 2 decisions + pitfalls} + 3. Your deliverable: Target users, their goals, pain points, user journeys, + and success scenarios. What does the user need this to do? + 4. Report completion: SendMessage(type: "message", recipient: "team-lead", + summary: "User perspective exploration done") + +- Name: "similar-features-explorer" + Prompt: | + You are exploring requirements for: {feature/issues} + 1. Skimmer context: {Phase 2 output} + 2. Project knowledge: {Phase 2 decisions + pitfalls} + 3. Your deliverable: Comparable features in the codebase or domain, scope + patterns, edge cases discovered from similar implementations. + 4. Report completion: SendMessage(type: "message", recipient: "team-lead", + summary: "Similar features exploration done") + +- Name: "constraints-explorer" + Prompt: | + You are exploring requirements for: {feature/issues} + 1. Skimmer context: {Phase 2 output} + 2. Project knowledge: {Phase 2 decisions + pitfalls} + 3. Your deliverable: Dependencies, business rules, security constraints, + performance constraints, and prior architectural decisions that constrain scope. + 4. Report completion: SendMessage(type: "message", recipient: "team-lead", + summary: "Constraints exploration done") + +- Name: "failure-modes-explorer" + Prompt: | + You are exploring requirements for: {feature/issues} + 1. Skimmer context: {Phase 2 output} + 2. Project knowledge: {Phase 2 decisions + pitfalls} + 3. Your deliverable: Error states, edge cases, validation needs, known pitfalls, + and failure scenarios that must be handled. + 4. Report completion: SendMessage(type: "message", recipient: "team-lead", + summary: "Failure modes exploration done") + +After initial exploration, lead initiates debate: +SendMessage(type: "broadcast", summary: "Debate: challenge requirements findings"): +- User perspective challenges constraints: "This constraint blocks the core user need" +- Similar features challenges failure modes: "These edge cases are handled differently elsewhere" +- Failure modes challenges user perspective: "This user journey has an unhandled failure" +- Constraints challenges similar features: "That pattern violated an architectural decision" +Teammates use SendMessage(type: "message", recipient: "{name}") for direct challenges. + +Max 2 debate rounds, then submit consensus requirements findings. +``` + +**Exploration team output**: Consensus findings on user needs, similar features, constraints, failure modes. + +**Team Shutdown Protocol** (must complete before Phase 4): + +``` +Step 1: Shutdown each teammate + SendMessage(type: "shutdown_request", recipient: "user-perspective-explorer", content: "Exploration complete") + SendMessage(type: "shutdown_request", recipient: "similar-features-explorer", content: "Exploration complete") + SendMessage(type: "shutdown_request", recipient: "constraints-explorer", content: "Exploration complete") + SendMessage(type: "shutdown_request", recipient: "failure-modes-explorer", content: "Exploration complete") + Wait for each shutdown_response (approve: true) + +Step 2: TeamDelete + +Step 3: GATE — Verify TeamDelete succeeded + If failed → retry once after 5s + If retry failed → HALT and report: "Exploration team cleanup failed. Cannot create gap analysis agents." +``` + +#### Phase 4: Synthesize Exploration + +``` +Agent(subagent_type="Synthesizer"): +"Synthesize EXPLORATION outputs for: {feature/issues} +Mode: exploration +Team consensus output: {Phase 3 team output} +Combine into: user needs, similar features, constraints, failure modes" +``` + +--- + +### Block 2: Gap Analysis + +#### Phase 5: Gap Analysis (Parallel Subagents) + +Gap analysis uses parallel subagents, not a team — designers work independently on different focus areas; debate between them has no value. + +**Single-issue**: Spawn 4 Designer agents **in a single message**: + +| Focus | What it checks | +|-------|----------------| +| completeness | Missing AC, undefined error states, vague requirements | +| architecture | Pattern violations, missing integration points, layering issues | +| security | Auth gaps, input validation, secret handling, OWASP | +| performance | N+1 patterns, missing caching, concurrency, query patterns | + +**Multi-issue**: Spawn 6 Designer agents **in a single message** (same 4 plus): + +| Focus | What it checks | +|-------|----------------| +| consistency | Cross-issue contradictions, duplicate requirements, conflicting scope | +| dependencies | Inter-issue ordering, shared resources, breaking change propagation | + +Each designer receives: +- Mode: `gap-analysis` +- Focus: (their assigned focus) +- Exploration synthesis from Phase 4 +- Skimmer context from Phase 2 +- Project knowledge from Phase 2 +- Multi-issue: all issue bodies + +#### Phase 6: Synthesize Gap Analysis + +``` +Agent(subagent_type="Synthesizer"): +"Synthesize GAP ANALYSIS outputs for: {feature/issues} +Mode: design +Designer outputs: {all designer outputs} +Deduplicate, boost confidence for multi-agent flags, categorize by severity." +``` + +--- + +### Block 3: Scope Approval + +#### Phase 7: Gate 1 — Validate Scope + Gaps + +Use AskUserQuestion to present: + +1. **Scope Summary** — core problem, priority, v1 included, exclusions +2. **Gap Analysis Results** — blocking gaps with resolutions, should-address items, informational + +User can: accept, modify scope, or override specific gaps. + +**MANDATORY**: Do not proceed to implementation design until Gate 1 is confirmed. + +--- + +### Block 4: Implementation Design + +#### Phase 8: Explore Implementation (Parallel Subagents) + +Spawn 4 Explore agents **in a single message**, each with Skimmer context + accepted scope: + +| Focus | Thoroughness | Find | +|-------|-------------|------| +| Architecture | medium | Similar implementations, patterns, module structure | +| Integration | medium | Entry points, services, database models, configuration | +| Reusable code | medium | Utilities, helpers, validation patterns, error handling | +| Edge cases | quick | Error scenarios, race conditions, permission failures | + +#### Phase 9: Synthesize Implementation Exploration + +``` +Agent(subagent_type="Synthesizer"): +"Synthesize IMPLEMENTATION EXPLORATION outputs for: {feature/issues} +Mode: exploration +Explorer outputs: {all 4 outputs} +Combine into: patterns to follow, integration points, reusable code, edge cases" +``` + +#### Phase 10: Planning Team + +Create an agent team for collaborative implementation planning: + +``` +Create a team named "plan-design-{feature-slug}" to plan implementation of: {feature/issues} + +Spawn planning teammates with self-contained prompts: + +- Name: "implementation-planner" + Prompt: | + You are planning implementation for: {feature/issues} + 1. Read your skill: `Read ~/.claude/skills/devflow:patterns/SKILL.md` + 2. Exploration synthesis: {Phase 9 output} + 3. Gap analysis (accepted): {Phase 6 synthesis — blocking gaps need mitigations} + 4. Your deliverable: Step-by-step implementation approach with specific files + to create/modify, dependencies between steps, and explicit gap mitigations. + 5. Report completion: SendMessage(type: "message", recipient: "team-lead", + summary: "Implementation plan ready") + +- Name: "testing-planner" + Prompt: | + You are planning the test strategy for: {feature/issues} + 1. Read your skill: `Read ~/.claude/skills/devflow:testing/SKILL.md` + 2. Exploration synthesis: {Phase 9 output} + 3. Gap analysis (accepted): {Phase 6 synthesis — gaps to verify coverage for} + 4. Your deliverable: Test strategy — unit tests, integration tests, + edge case coverage, testing patterns from codebase, gap verification tests. + 5. Report completion: SendMessage(type: "message", recipient: "team-lead", + summary: "Test plan ready") + +- Name: "risk-planner" + Prompt: | + You are assessing risk and execution strategy for: {feature/issues} + 1. Read your skill: `Read ~/.claude/skills/devflow:patterns/SKILL.md` + 2. Exploration synthesis: {Phase 9 output} + 3. Gap analysis (accepted): {Phase 6 synthesis — unresolved risks} + 4. Your deliverable: Risk assessment, rollback strategy, and execution + strategy decision (SINGLE_CODER vs SEQUENTIAL_CODERS vs PARALLEL_CODERS) + based on artifact independence, context capacity, and domain specialization. + 5. Report completion: SendMessage(type: "message", recipient: "team-lead", + summary: "Risk assessment ready") + +After initial planning, lead initiates debate: +SendMessage(type: "broadcast", summary: "Debate: challenge implementation plans"): +- Testing challenges implementation: "This approach is untestable without major refactoring" +- Risk challenges both: "Rollback is impossible with this migration strategy" +- Implementation challenges testing: "Full coverage here adds 3x complexity for minimal value" +Teammates use SendMessage(type: "message", recipient: "{name}") for direct challenges. + +Max 2 debate rounds, then submit consensus plan. +``` + +**Team Shutdown Protocol** (must complete before Phase 11): + +``` +Step 1: Shutdown each teammate + SendMessage(type: "shutdown_request", recipient: "implementation-planner", content: "Planning complete") + SendMessage(type: "shutdown_request", recipient: "testing-planner", content: "Planning complete") + SendMessage(type: "shutdown_request", recipient: "risk-planner", content: "Planning complete") + Wait for each shutdown_response (approve: true) + +Step 2: TeamDelete + +Step 3: GATE — Verify TeamDelete succeeded + If failed → retry once after 5s + If retry failed → HALT and report: "Planning team cleanup failed. Cannot proceed to design review." +``` + +#### Phase 11: Synthesize Planning + +``` +Agent(subagent_type="Synthesizer"): +"Synthesize PLANNING outputs for: {feature/issues} +Mode: planning +Team consensus output: {Phase 10 team output} +Combine into: execution plan with strategy decision, gap mitigations integrated" +``` + +--- + +### Block 5: Design Review + Approval + +#### Phase 12: Design Review (Single Subagent) + +Design review uses a single independent agent — not a team. + +``` +Agent(subagent_type="Designer"): +"Mode: design-review +Artifacts: + Implementation plan: {Phase 11 planning synthesis} + Implementation exploration: {Phase 9 exploration synthesis} + Codebase context: {Phase 2 output} +Review the full plan for all 6 anti-patterns. Report all findings with evidence." +``` + +#### Phase 13: Gate 2 — Confirm Plan + Design Review + +Use AskUserQuestion to present: +1. Implementation plan summary (execution strategy, key steps, test strategy) +2. Design review findings with proposed mitigations +3. Acceptance criteria +4. Risk assessment + +User can: accept, revise (re-run phases 10-12), or cancel. + +**MANDATORY**: Do not write design artifact until Gate 2 is confirmed. + +--- + +### Block 6: Output + +#### Phase 14: Output + +**Store design artifact:** + +Write design artifact to disk: +- If issue number: `.docs/design/{issue-number}-{topic-slug}.{YYYY-MM-DD_HHMM}.md` +- If multi-issue: `.docs/design/{first-issue-number}-multi.{YYYY-MM-DD_HHMM}.md` +- If no issue: `.docs/design/{topic-slug}.{YYYY-MM-DD_HHMM}.md` + +Create parent directory if needed. + +**Artifact YAML frontmatter:** +```yaml +--- +type: design-artifact +version: 1 +status: APPROVED +issue: 42 +title: "Feature Title" +slug: feature-slug +created: {ISO timestamp} +execution-strategy: SINGLE_CODER +context-risk: LOW +--- +``` + +Required sections: Problem Statement, Acceptance Criteria, Scope, Gap Analysis Results, Execution Strategy, Subtask Breakdown, Implementation Plan, Patterns to Follow, Integration Points, Design Review Results, Risk Assessment. + +**Create GitHub issue (optional):** + +If the feature does not already have a GitHub issue, create via `gh issue create` with problem statement, user stories, scope, acceptance criteria, and link to design artifact. + +**Report:** + +Display: artifact path, issue URL, gap analysis summary, design review summary, suggested next step (`/implement`). + +--- + +## Architecture + +``` +/plan-teams (orchestrator - spawns agents and teams only) +│ +├─ Block 1: Requirements Discovery +│ ├─ Phase 1: GATE 0 - Confirm Understanding ⛔ MANDATORY +│ ├─ Phase 2: Orient + Load Knowledge +│ │ ├─ Skimmer agent (codebase context) +│ │ └─ Read decisions.md + pitfalls.md +│ ├─ Phase 3: Exploration Team (4 teammates + debate) +│ │ ├─ user-perspective-explorer +│ │ ├─ similar-features-explorer +│ │ ├─ constraints-explorer +│ │ └─ failure-modes-explorer +│ │ [Team Shutdown Protocol] +│ └─ Phase 4: Synthesize Exploration (Synthesizer agent) +│ +├─ Block 2: Gap Analysis (PARALLEL SUBAGENTS — no team) +│ ├─ Phase 5: Designer: completeness +│ │ Designer: architecture +│ │ Designer: security +│ │ Designer: performance +│ │ Designer: consistency (multi-issue only) +│ │ Designer: dependencies (multi-issue only) +│ └─ Phase 6: Synthesize Gap Analysis (Synthesizer: design) +│ +├─ Block 3: Scope Approval +│ └─ Phase 7: GATE 1 - Validate Scope + Gaps ⛔ MANDATORY +│ +├─ Block 4: Implementation Design +│ ├─ Phase 8: Explore Implementation (PARALLEL SUBAGENTS) +│ ├─ Phase 9: Synthesize Implementation Exploration +│ ├─ Phase 10: Planning Team (3 teammates + debate) +│ │ ├─ implementation-planner +│ │ ├─ testing-planner +│ │ └─ risk-planner +│ │ [Team Shutdown Protocol] +│ └─ Phase 11: Synthesize Planning (Synthesizer: planning) +│ +├─ Block 5: Design Review + Approval +│ ├─ Phase 12: Designer agent (mode: design-review) +│ └─ Phase 13: GATE 2 - Confirm Plan + Design Review ⛔ MANDATORY +│ +└─ Block 6: Output + └─ Phase 14: Output + ├─ Store design artifact (.docs/design/) + ├─ Create GitHub issue (optional) + └─ Report summary + next step +``` + +## Principles + +1. **Orchestration only** — Command spawns agents and teams, never does work itself +2. **Three mandatory gates** — None may be skipped +3. **Teams for exploration and planning** — Debate increases confidence in subjective analysis +4. **Subagents for gap analysis and design review** — Independent checks; debate adds no value +5. **One team at a time** — Always complete Team Shutdown Protocol before creating next team +6. **Evidence-based gaps** — Every gap cites specific text; no speculation +7. **Design artifacts are machine-readable** — `/implement` can consume the YAML frontmatter directly + +## Error Handling + +- Team cleanup failures halt execution — one team at a time is a hard constraint +- If user selects "Revise" at Gate 2, loop back to Phase 10 (spawn new planning team) +- If user selects "Cancel" at any gate, stop gracefully without writing artifact +- If any subagent fails outside a team, report phase, agent, error, and offer retry diff --git a/plugins/devflow-plan/commands/plan.md b/plugins/devflow-plan/commands/plan.md new file mode 100644 index 00000000..59892b71 --- /dev/null +++ b/plugins/devflow-plan/commands/plan.md @@ -0,0 +1,417 @@ +--- +description: Unified design planning - combines requirements discovery, gap analysis, implementation planning, and design review into a single workflow +--- + +# Plan Command + +Orchestrate design planning from requirements discovery through gap analysis to implementation design. Produces a machine-readable design artifact consumed by `/implement`. + +The orchestrator only spawns agents and gates — all analytical work is done by agents. + +## Usage + +``` +/plan +/plan #42 (GitHub issue) +/plan #12 #15 #18 (multi-issue) +/plan (use conversation context) +``` + +## Input + +`$ARGUMENTS` contains whatever follows `/plan`: +- Starts with `#` followed by numbers → issue mode (parse all `#N` tokens, space-separated) +- Path to existing `.md` file → **error**: "Use /implement with plan documents" +- Other text → feature description +- Empty → use conversation context + +For **multi-issue** mode: collect all `#N` tokens from `$ARGUMENTS` as `ISSUE_NUMBERS`. + +## Clarification Gates + +**MANDATORY**: Three gates that must complete before proceeding. + +| Gate | Phase | Purpose | +|------|-------|---------| +| Gate 0 | Phase 1 | Confirm understanding before exploration | +| Gate 1 | Phase 7 | Validate scope + gap analysis results | +| Gate 2 | Phase 13 | Confirm final plan + design review | + +No gate may be skipped. If user says "proceed" or "whatever you think", state recommendation and get explicit confirmation. + +## Phases + +--- + +### Block 1: Requirements Discovery + +#### Phase 1: Gate 0 — Confirm Understanding + +Present interpretation using AskUserQuestion: +- Core problem this solves +- Target users +- Expected outcome +- Key assumptions + +For multi-issue: present unified scope across all issues. + +**MANDATORY**: Do not spawn any agents until Gate 0 is confirmed. + +#### Phase 2: Orient + Load Knowledge + +Spawn Skimmer agent for codebase context: + +``` +Agent(subagent_type="Skimmer"): +"Orient in codebase for design planning: {feature/issues} +Run rskim on source directories (NOT repo root) to identify: +- Existing patterns and conventions in the affected area +- File structure and module boundaries +- Similar prior implementations +- Test patterns and coverage approach +Return codebase context for requirements analysis." +``` + +While Skimmer runs, read `.memory/knowledge/decisions.md` and `.memory/knowledge/pitfalls.md`. Pass Skimmer context and project knowledge to all subsequent agents — prior decisions constrain design, known pitfalls inform gap analysis. + +#### Phase 3: Explore Requirements (Parallel) + +Spawn 4 Explore agents **in a single message**, each with Skimmer context and project knowledge: + +| Focus | Thoroughness | Find | +|-------|-------------|------| +| User perspective | medium | Target users, goals, pain points, user journeys | +| Similar features | medium | Comparable features, scope patterns, edge cases | +| Constraints | quick | Dependencies, business rules, prior architectural decisions | +| Failure modes | quick | Error states, edge cases, known pitfalls | + +#### Phase 4: Synthesize Exploration + +**WAIT** for Phase 3 to complete. + +``` +Agent(subagent_type="Synthesizer"): +"Synthesize EXPLORATION outputs for: {feature/issues} +Mode: exploration +Explorer outputs: {all 4 outputs} +Combine into: user needs, similar features, constraints, failure modes" +``` + +--- + +### Block 2: Gap Analysis + +#### Phase 5: Gap Analysis (Parallel) + +**Single-issue**: Spawn 4 Designer agents **in a single message**: + +| Focus | What it checks | +|-------|----------------| +| completeness | Missing AC, undefined error states, vague requirements | +| architecture | Pattern violations, missing integration points, layering issues | +| security | Auth gaps, input validation, secret handling, OWASP | +| performance | N+1 patterns, missing caching, concurrency, query patterns | + +**Multi-issue**: Spawn 6 Designer agents **in a single message** (same 4 plus): + +| Focus | What it checks | +|-------|----------------| +| consistency | Cross-issue contradictions, duplicate requirements, conflicting scope | +| dependencies | Inter-issue ordering, shared resources, breaking change propagation | + +Each designer receives: +- Mode: `gap-analysis` +- Focus: (their assigned focus from table) +- Exploration synthesis from Phase 4 +- Skimmer context from Phase 2 +- Project knowledge from Phase 2 +- Multi-issue: all issue bodies + +``` +Agent(subagent_type="Designer"): +"Mode: gap-analysis +Focus: {completeness|architecture|security|performance|consistency|dependencies} +Artifacts: + Feature/Issues: {feature description or issue bodies} + Exploration synthesis: {Phase 4 output} + Codebase context: {Phase 2 output} + Project knowledge: {decisions + pitfalls} +Analyze only your assigned focus area. Cite evidence from provided artifacts." +``` + +#### Phase 6: Synthesize Gap Analysis + +**WAIT** for Phase 5 to complete. + +``` +Agent(subagent_type="Synthesizer"): +"Synthesize GAP ANALYSIS outputs for: {feature/issues} +Mode: design +Designer outputs: {all designer outputs} +Deduplicate, boost confidence for multi-agent flags, categorize by severity." +``` + +--- + +### Block 3: Scope Approval + +#### Phase 7: Gate 1 — Validate Scope + Gaps + +Use AskUserQuestion to present and validate: + +1. **Scope Summary** + - Core problem + - Priority level (Critical/High/Medium/Low) + - v1 scope (what's included) + - Explicit exclusions + +2. **Gap Analysis Results** (from Phase 6) + - Blocking gaps (CRITICAL/HIGH) with proposed resolutions + - Should-address recommendations (MEDIUM) + - Informational items (LOW) + +User can: +- Accept scope and gaps as presented +- Modify scope (add/remove items) +- Override specific gaps (accept risk and proceed) + +**MANDATORY**: Do not proceed to implementation design until Gate 1 is confirmed. + +--- + +### Block 4: Implementation Design + +#### Phase 8: Explore Implementation (Parallel) + +Spawn 4 Explore agents **in a single message**, each with Skimmer context + accepted scope: + +| Focus | Thoroughness | Find | +|-------|-------------|------| +| Architecture | medium | Similar implementations, patterns, module structure | +| Integration | medium | Entry points, services, database models, configuration | +| Reusable code | medium | Utilities, helpers, validation patterns, error handling | +| Edge cases | quick | Error scenarios, race conditions, permission failures | + +#### Phase 9: Synthesize Implementation Exploration + +**WAIT** for Phase 8 to complete. + +``` +Agent(subagent_type="Synthesizer"): +"Synthesize IMPLEMENTATION EXPLORATION outputs for: {feature/issues} +Mode: exploration +Explorer outputs: {all 4 outputs} +Combine into: patterns to follow, integration points, reusable code, edge cases" +``` + +#### Phase 10: Plan Implementation (Parallel) + +Spawn 3 Plan agents **in a single message**, each with implementation exploration synthesis: + +| Focus | Output | +|-------|--------| +| Implementation steps | Ordered steps with files, dependencies, gap mitigations | +| Testing strategy | Unit tests, integration tests, edge case tests | +| Execution strategy | SINGLE_CODER vs SEQUENTIAL_CODERS vs PARALLEL_CODERS | + +Implementation steps planner: include explicit gap mitigations (from Phase 6) in the relevant steps. + +#### Phase 11: Synthesize Planning + +**WAIT** for Phase 10 to complete. + +``` +Agent(subagent_type="Synthesizer"): +"Synthesize PLANNING outputs for: {feature/issues} +Mode: planning +Planner outputs: {all 3 outputs} +Combine into: execution plan with strategy decision, gap mitigations integrated" +``` + +--- + +### Block 5: Design Review + Approval + +#### Phase 12: Design Review + +Spawn 1 Designer agent with mode `design-review`: + +``` +Agent(subagent_type="Designer"): +"Mode: design-review +Artifacts: + Implementation plan: {Phase 11 planning synthesis} + Implementation exploration: {Phase 9 exploration synthesis} + Codebase context: {Phase 2 output} +Review the full plan for all 6 anti-patterns. Report all findings with evidence." +``` + +#### Phase 13: Gate 2 — Confirm Plan + Design Review + +Use AskUserQuestion to present: + +1. **Implementation Plan Summary** + - Execution strategy (SINGLE_CODER / SEQUENTIAL_CODERS / PARALLEL_CODERS) + - Key implementation steps with files + - Test strategy + +2. **Design Review Findings** (from Phase 12) + - Each anti-pattern finding with severity and proposed mitigation + - Which findings are already addressed in the plan + +3. **Acceptance Criteria** (from gap analysis + exploration) + +4. **Risk Assessment** + - Context risk level (LOW/MEDIUM/HIGH/CRITICAL) + - Unresolved gaps carried forward + +User can: +- **Accept** — proceed to output phases +- **Revise** — re-run phases 10-12 with new constraints (loop back, no limit on revisions) +- **Cancel** — stop gracefully, no artifact written + +**MANDATORY**: Do not write design artifact until Gate 2 is confirmed. + +--- + +### Block 6: Output + +#### Phase 14: Output + +**Store design artifact:** + +Write design artifact to disk: +- If issue number: `.docs/design/{issue-number}-{topic-slug}.{YYYY-MM-DD_HHMM}.md` +- If multi-issue: `.docs/design/{first-issue-number}-multi.{YYYY-MM-DD_HHMM}.md` +- If no issue: `.docs/design/{topic-slug}.{YYYY-MM-DD_HHMM}.md` + +Create parent directory if needed. + +**Artifact format:** + +```yaml +--- +type: design-artifact +version: 1 +status: APPROVED +issue: 42 +title: "Feature Title" +slug: feature-slug +created: 2026-04-07T14:30:00Z +execution-strategy: SINGLE_CODER +context-risk: LOW +--- +``` + +Required sections: +1. **Problem Statement** — core problem and target users +2. **Acceptance Criteria** — testable success conditions (from exploration + gap analysis) +3. **Scope** — v1 included, deferred, excluded +4. **Gap Analysis Results** — blocking gaps with resolutions, should-address items +5. **Execution Strategy** — SINGLE_CODER/SEQUENTIAL/PARALLEL with rationale +6. **Subtask Breakdown** — phases with domains and dependencies (if not SINGLE_CODER) +7. **Implementation Plan** — ordered steps with files and gap mitigations +8. **Patterns to Follow** — from exploration synthesis (file:line references) +9. **Integration Points** — entry points, services, models to connect +10. **Design Review Results** — anti-pattern findings with mitigations +11. **Risk Assessment** — context risk level, unresolved risks + +**Create GitHub issue (optional):** + +If the feature does not already have a GitHub issue: +- Create via `gh issue create` +- Title: feature name +- Body: problem statement, user stories, v1 scope, acceptance criteria, link to design artifact path +- Labels: feature, priority level + +Skip if issue number was provided as input. + +**Report:** + +Display completion summary: +- Design artifact path +- Issue URL (if created or if pre-existing) +- Gap analysis summary (N blocking, M should-address) +- Design review summary (N anti-patterns found, M mitigated in plan) +- Suggested next step: `/implement {artifact-path}` or `/implement #{issue-number}` + +--- + +## Architecture + +``` +/plan (orchestrator - spawns agents only) +│ +├─ Block 1: Requirements Discovery +│ ├─ Phase 1: GATE 0 - Confirm Understanding ⛔ MANDATORY +│ │ └─ AskUserQuestion: Validate interpretation +│ ├─ Phase 2: Orient + Load Knowledge +│ │ ├─ Skimmer agent (codebase context) +│ │ └─ Read decisions.md + pitfalls.md +│ ├─ Phase 3: Explore Requirements (PARALLEL) +│ │ ├─ Explore: User perspective +│ │ ├─ Explore: Similar features +│ │ ├─ Explore: Constraints +│ │ └─ Explore: Failure modes +│ └─ Phase 4: Synthesize Exploration +│ └─ Synthesizer agent (mode: exploration) +│ +├─ Block 2: Gap Analysis +│ ├─ Phase 5: Gap Analysis (PARALLEL) +│ │ ├─ Designer: completeness +│ │ ├─ Designer: architecture +│ │ ├─ Designer: security +│ │ ├─ Designer: performance +│ │ ├─ Designer: consistency (multi-issue only) +│ │ └─ Designer: dependencies (multi-issue only) +│ └─ Phase 6: Synthesize Gap Analysis +│ └─ Synthesizer agent (mode: design) +│ +├─ Block 3: Scope Approval +│ └─ Phase 7: GATE 1 - Validate Scope + Gaps ⛔ MANDATORY +│ └─ AskUserQuestion: Confirm scope and gap resolutions +│ +├─ Block 4: Implementation Design +│ ├─ Phase 8: Explore Implementation (PARALLEL) +│ │ ├─ Explore: Architecture +│ │ ├─ Explore: Integration +│ │ ├─ Explore: Reusable code +│ │ └─ Explore: Edge cases +│ ├─ Phase 9: Synthesize Implementation Exploration +│ │ └─ Synthesizer agent (mode: exploration) +│ ├─ Phase 10: Plan Implementation (PARALLEL) +│ │ ├─ Plan: Implementation steps +│ │ ├─ Plan: Testing strategy +│ │ └─ Plan: Execution strategy +│ └─ Phase 11: Synthesize Planning +│ └─ Synthesizer agent (mode: planning) +│ +├─ Block 5: Design Review + Approval +│ ├─ Phase 12: Design Review +│ │ └─ Designer agent (mode: design-review) +│ └─ Phase 13: GATE 2 - Confirm Plan + Design Review ⛔ MANDATORY +│ └─ AskUserQuestion: Final plan approval +│ +└─ Block 6: Output + └─ Phase 14: Output + ├─ Store design artifact (.docs/design/) + ├─ Create GitHub issue (optional) + └─ Report summary + next step +``` + +## Principles + +1. **Orchestration only** — Command spawns agents, never does agent work itself +2. **Three mandatory gates** — Gate 0 (understand), Gate 1 (scope+gaps), Gate 2 (plan+review); none may be skipped +3. **Parallel execution** — Explore phases and gap analysis run in parallel; synthesis phases wait +4. **Evidence-based gaps** — Every gap cites specific text; no speculation +5. **Scope ruthlessly** — Small, focused plans ship faster; gate 1 enforces scope discipline +6. **Strict delegation** — Never synthesize, analyze, or plan in main session; always spawn agents +7. **Design artifacts are machine-readable** — `/implement` can consume the YAML frontmatter directly + +## Error Handling + +- If any agent fails, report the phase, agent type, and error +- If user selects "Revise" at Gate 2, loop back to Phase 10 with user's constraints +- If user selects "Cancel" at any gate, stop gracefully without writing artifact +- If `.docs/design/` does not exist, create it in Phase 14 diff --git a/plugins/devflow-specify/.claude-plugin/plugin.json b/plugins/devflow-specify/.claude-plugin/plugin.json deleted file mode 100644 index 640a533a..00000000 --- a/plugins/devflow-specify/.claude-plugin/plugin.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "name": "devflow-specify", - "description": "Interactive feature specification - creates well-defined GitHub issues through requirements exploration and clarification", - "author": { - "name": "Dean0x" - }, - "version": "1.8.3", - "homepage": "https://github.com/dean0x/devflow", - "repository": "https://github.com/dean0x/devflow", - "license": "MIT", - "keywords": [ - "specification", - "requirements", - "planning", - "issues", - "github" - ], - "agents": [ - "skimmer", - "synthesizer" - ], - "skills": [ - "agent-teams" - ] -} diff --git a/plugins/devflow-specify/README.md b/plugins/devflow-specify/README.md deleted file mode 100644 index 5f384111..00000000 --- a/plugins/devflow-specify/README.md +++ /dev/null @@ -1,46 +0,0 @@ -# devflow-specify - -Interactive feature specification plugin for Claude Code. Creates well-defined GitHub issues through requirements exploration and clarification. - -## Installation - -```bash -# Via Devflow CLI -npx devflow-kit init --plugin=specify - -# Via Claude Code (when available) -/plugin install dean0x/devflow-specify -``` - -## Usage - -``` -/specify [feature idea] -``` - -The command guides you through mandatory clarification gates: -1. **Gate 0**: Confirm understanding of feature idea -2. **Gate 1**: Validate scope and priorities after exploration -3. **Gate 2**: Confirm acceptance criteria before issue creation - -## Components - -### Command -- `/specify` - Interactive feature specification workflow - -### Agents -- `skimmer` - Codebase orientation using skim for file/function discovery -- `synthesizer` - Combines exploration outputs into coherent summary - -## Output - -Creates a well-defined GitHub issue with: -- Clear title and description -- Acceptance criteria -- Technical context from codebase exploration -- Priority and scope boundaries - -## Related Plugins - -- [devflow-implement](../devflow-implement) - Implement the specified feature -- [devflow-code-review](../devflow-code-review) - Review the implementation diff --git a/plugins/devflow-specify/commands/specify-teams.md b/plugins/devflow-specify/commands/specify-teams.md deleted file mode 100644 index 7638999f..00000000 --- a/plugins/devflow-specify/commands/specify-teams.md +++ /dev/null @@ -1,321 +0,0 @@ ---- -description: Specify a single feature through agent-team exploration with debate and interactive clarification ---- - -# Specify Command - -Transform a rough feature idea into a well-defined, implementation-ready GitHub issue through multi-perspective requirements exploration and user clarification. - -**Focus**: Requirements only (what/why/for whom), not implementation (that's `/implement`'s job). - -## Usage - -``` -/specify User authentication with social login -/specify Rate limiting for API endpoints -/specify (use conversation context) -``` - -## Input - -`$ARGUMENTS` contains whatever follows `/specify`: -- Feature description: "User authentication with social login" -- Empty: infer from conversation context - -## Clarification Gates - -**MANDATORY**: Three gates that must complete before proceeding: - -| Gate | When | Purpose | -|------|------|---------| -| Gate 0 | Before exploration | Confirm understanding of feature idea | -| Gate 1 | After exploration | Validate scope and priorities | -| Gate 2 | Before issue creation | Confirm acceptance criteria | - -No gate may be skipped. If user says "whatever you think", state recommendation and get explicit approval. - -## Phases - -### Phase 1: Gate 0 - Confirm Understanding - -Present interpretation to user: -- Core problem this solves -- Target users -- Expected outcome -- Key assumptions - -Use AskUserQuestion to confirm understanding before spawning any agents. - -### Phase 2: Orient - -Spawn Skimmer agent for codebase context: - -``` -Agent(subagent_type="Skimmer"): -"Orient in codebase for requirements exploration: {feature} -Run rskim on source directories (NOT repo root) to find: project structure, similar features, patterns, integration points -Return: codebase context for requirements (not implementation details)" -``` - -### Phase 3: Load Project Knowledge - -Read `.memory/knowledge/decisions.md` and `.memory/knowledge/pitfalls.md`. Pass their content as context to the exploration team below — prior decisions constrain requirements, known pitfalls inform failure modes. - -### Phase 4: Explore Requirements (Agent Teams) - -Create an agent team for collaborative requirements exploration: - -``` -Create a team named "spec-explore-{feature-slug}" to explore requirements for: {feature} - -Spawn exploration teammates with self-contained prompts: - -- Name: "user-perspective-explorer" - Prompt: | - You are exploring requirements for feature: {feature} - 1. Codebase context from Skimmer: - {skimmer output} - 2. Your deliverable: Target users, their goals, pain points, and user - journeys for this feature. How will users interact with it? - 3. Document findings with references to existing UX patterns in the codebase. - 4. Report completion: SendMessage(type: "message", recipient: "team-lead", - summary: "User perspective exploration done") - -- Name: "similar-features-explorer" - Prompt: | - You are exploring requirements for feature: {feature} - 1. Codebase context from Skimmer: - {skimmer output} - 2. Your deliverable: Find comparable features in the codebase, scope - patterns, and precedents. What exists that this feature should follow? - 3. Document findings with file:path references. - 4. Report completion: SendMessage(type: "message", recipient: "team-lead", - summary: "Similar features exploration done") - -- Name: "constraints-explorer" - Prompt: | - You are exploring requirements for feature: {feature} - 1. Codebase context from Skimmer: - {skimmer output} - 2. Your deliverable: Dependencies, business rules, security requirements, - performance limits, and technical constraints for this feature. - 3. Document findings with file:path references. - 4. Report completion: SendMessage(type: "message", recipient: "team-lead", - summary: "Constraints exploration done") - -- Name: "failure-mode-explorer" - Prompt: | - You are exploring requirements for feature: {feature} - 1. Codebase context from Skimmer: - {skimmer output} - 2. Your deliverable: Error states, edge cases, validation needs, - and what could go wrong with this feature. - 3. Document findings with references to existing error handling patterns. - 4. Report completion: SendMessage(type: "message", recipient: "team-lead", - summary: "Failure mode exploration done") - -After initial exploration, lead initiates debate: -SendMessage(type: "broadcast", summary: "Debate: challenge requirements findings"): -- Constraints challenges user perspective: "This requirement conflicts with X constraint" -- Failure modes challenges similar features: "That pattern failed in Y scenario" -- Similar features validates user perspective: "This UX pattern works well in Z" -Teammates use SendMessage(type: "message", recipient: "{name}") for direct challenges. - -Max 2 debate rounds, then submit consensus requirements findings. -``` - -**Exploration team output**: Consensus findings on user needs, similar features, constraints, failure modes. - -**Team Shutdown Protocol** (must complete before Phase 6): - -``` -Step 1: Shutdown each teammate - SendMessage(type: "shutdown_request", recipient: "user-perspective-explorer", content: "Exploration complete") - SendMessage(type: "shutdown_request", recipient: "similar-features-explorer", content: "Exploration complete") - SendMessage(type: "shutdown_request", recipient: "constraints-explorer", content: "Exploration complete") - SendMessage(type: "shutdown_request", recipient: "failure-mode-explorer", content: "Exploration complete") - Wait for each shutdown_response (approve: true) - -Step 2: TeamDelete - -Step 3: GATE — Verify TeamDelete succeeded - If failed → retry once after 5s - If retry failed → HALT and report: "Exploration team cleanup failed. Cannot create planning team." -``` - -### Phase 5: Synthesize Exploration - -**WAIT** for Phase 4, then spawn Synthesizer: - -``` -Agent(subagent_type="Synthesizer"): -"Synthesize EXPLORATION outputs for: {feature} -Mode: exploration -Explorer consensus: {team exploration consensus output} -Combine into: user needs, similar features, constraints, failure modes" -``` - -### Phase 6: Plan Scope (Agent Teams) - -Create an agent team for collaborative scope planning: - -``` -Create a team named "spec-plan-{feature-slug}" to plan scope for: {feature} - -Spawn planning teammates with self-contained prompts: - -- Name: "user-stories-planner" - Prompt: | - You are planning scope for feature: {feature} - Exploration synthesis (what we know): - {synthesis output from Phase 5} - - Your deliverable: User stories in "As X, I want Y, so that Z" format. - Cover all actors, actions, and outcomes identified in exploration. - Report completion: SendMessage(type: "message", recipient: "team-lead", - summary: "User stories ready") - -- Name: "scope-boundaries-planner" - Prompt: | - You are planning scope for feature: {feature} - Exploration synthesis (what we know): - {synthesis output from Phase 5} - - Your deliverable: v1 MVP scope, v2 deferred items, explicitly out of scope, - and dependencies on other features or systems. - Report completion: SendMessage(type: "message", recipient: "team-lead", - summary: "Scope boundaries ready") - -- Name: "acceptance-criteria-planner" - Prompt: | - You are planning scope for feature: {feature} - Exploration synthesis (what we know): - {synthesis output from Phase 5} - - Your deliverable: Testable acceptance criteria for success cases, - failure cases, and edge cases. Every criterion must be verifiable. - Report completion: SendMessage(type: "message", recipient: "team-lead", - summary: "Acceptance criteria ready") - -After initial planning, lead initiates debate: -SendMessage(type: "broadcast", summary: "Debate: challenge scope plans"): -- Scope challenges user stories: "This story is too broad for v1" -- Acceptance challenges scope: "These boundaries leave this edge case uncovered" -- User stories challenges acceptance: "This criterion is untestable" -Teammates use SendMessage(type: "message", recipient: "{name}") for direct challenges. - -Max 2 debate rounds, then submit consensus scope plan. -``` - -**Planning team output**: Consensus on user stories, scope boundaries, acceptance criteria. - -**Team Shutdown Protocol** (must complete before Gate 1): - -``` -Step 1: Shutdown each teammate - SendMessage(type: "shutdown_request", recipient: "user-stories-planner", content: "Planning complete") - SendMessage(type: "shutdown_request", recipient: "scope-boundaries-planner", content: "Planning complete") - SendMessage(type: "shutdown_request", recipient: "acceptance-criteria-planner", content: "Planning complete") - Wait for each shutdown_response (approve: true) - -Step 2: TeamDelete - -Step 3: GATE — Verify TeamDelete succeeded - If failed → retry once after 5s - If retry failed → HALT and report: "Planning team cleanup failed. Cannot proceed to Gate 1." -``` - -### Phase 7: Synthesize Planning - -**WAIT** for Phase 6, then spawn Synthesizer: - -``` -Agent(subagent_type="Synthesizer"): -"Synthesize PLANNING outputs for: {feature} -Mode: planning -Planner consensus: {team planning consensus output} -Combine into: user stories, scope breakdown, acceptance criteria, open questions" -``` - -### Phase 8: Gate 1 - Validate Scope - -Use AskUserQuestion to validate: -- Primary problem being solved -- Priority level (Critical/High/Medium/Low) -- v1 scope selection (minimal/medium/full) -- Explicit exclusions - -### Phase 9: Gate 2 - Confirm Criteria - -Present specification summary, then use AskUserQuestion for final confirmation: -- Success UX pattern -- Error handling approach -- Ready to create issue / Needs changes / Cancel - -### Phase 10: Create Issue - -Create GitHub issue with `gh issue create`: -- Title: feature name -- Body: problem statement, user stories, scope (v1/deferred/excluded), acceptance criteria, constraints, priority -- Labels: feature, priority level - -Report issue number and URL. - -## Architecture - -``` -/specify (orchestrator - spawns teams and agents) -│ -├─ Phase 1: GATE 0 - Confirm Understanding ⛔ MANDATORY -│ └─ AskUserQuestion: Validate interpretation -│ -├─ Phase 2: Orient -│ └─ Skimmer agent (codebase context via skim) -│ -├─ Phase 3: Load Project Knowledge -│ └─ Read decisions.md + pitfalls.md -│ -├─ Phase 4: Explore Requirements (Agent Teams) -│ ├─ User Perspective Explorer (teammate) -│ ├─ Similar Features Explorer (teammate) -│ ├─ Constraints Explorer (teammate) -│ ├─ Failure Mode Explorer (teammate) -│ └─ Debate → consensus requirements findings -│ -├─ Phase 5: Synthesize Exploration -│ └─ Synthesizer agent (mode: exploration) -│ -├─ Phase 6: Plan Scope (Agent Teams) -│ ├─ User Stories Planner (teammate) -│ ├─ Scope Boundaries Planner (teammate) -│ ├─ Acceptance Criteria Planner (teammate) -│ └─ Debate → consensus scope plan -│ -├─ Phase 7: Synthesize Planning -│ └─ Synthesizer agent (mode: planning) -│ -├─ Phase 8: GATE 1 - Validate Scope ⛔ MANDATORY -│ └─ AskUserQuestion: Confirm scope and priorities -│ -├─ Phase 9: GATE 2 - Confirm Criteria ⛔ MANDATORY -│ └─ AskUserQuestion: Final spec approval -│ -├─ Phase 10: Create Issue -│ └─ gh issue create -│ -└─ Report: Issue number, URL, /implement command -``` - -## Principles - -1. **Confirm before exploring** - Validate understanding with user before spawning agents -2. **Requirements, not implementation** - Focus on what and why, not how -3. **Multiple perspectives** - Explore from user, scope, constraint, and failure angles -4. **User drives decisions** - Three mandatory gates ensure user approval -5. **Scope ruthlessly** - Small, focused issues ship faster -6. **Testable criteria** - Every requirement must be verifiable -7. **Enable /implement** - Output must be actionable for implementation - -## Error Handling - -If user selects "Needs changes" at any gate, iterate until confirmed. If "Cancel", stop gracefully without creating issue. diff --git a/plugins/devflow-specify/commands/specify.md b/plugins/devflow-specify/commands/specify.md deleted file mode 100644 index 83568845..00000000 --- a/plugins/devflow-specify/commands/specify.md +++ /dev/null @@ -1,186 +0,0 @@ ---- -description: Specify a single feature through requirements exploration and interactive clarification - creates a well-defined GitHub issue ready for /implement ---- - -# Specify Command - -Transform a rough feature idea into a well-defined, implementation-ready GitHub issue through multi-perspective requirements exploration and user clarification. - -**Focus**: Requirements only (what/why/for whom), not implementation (that's `/implement`'s job). - -## Usage - -``` -/specify User authentication with social login -/specify Rate limiting for API endpoints -/specify (use conversation context) -``` - -## Input - -`$ARGUMENTS` contains whatever follows `/specify`: -- Feature description: "User authentication with social login" -- Empty: infer from conversation context - -## Clarification Gates - -**MANDATORY**: Three gates that must complete before proceeding: - -| Gate | When | Purpose | -|------|------|---------| -| Gate 0 | Before exploration | Confirm understanding of feature idea | -| Gate 1 | After exploration | Validate scope and priorities | -| Gate 2 | Before issue creation | Confirm acceptance criteria | - -No gate may be skipped. If user says "whatever you think", state recommendation and get explicit approval. - -## Phases - -### Phase 1: Gate 0 - Confirm Understanding - -Present interpretation to user: -- Core problem this solves -- Target users -- Expected outcome -- Key assumptions - -Use AskUserQuestion to confirm understanding before spawning any agents. - -### Phase 2: Orient - -Spawn Skimmer agent for codebase context: - -``` -Agent(subagent_type="Skimmer"): -"Orient in codebase for requirements exploration: {feature} -Run rskim on source directories (NOT repo root) to find: project structure, similar features, patterns, integration points -Return: codebase context for requirements (not implementation details)" -``` - -### Phase 3: Load Project Knowledge - -Read `.memory/knowledge/decisions.md` and `.memory/knowledge/pitfalls.md`. Pass their content as context to the Explore agents below — prior decisions constrain requirements, known pitfalls inform failure modes. - -### Phase 4: Explore Requirements (Parallel) - -Spawn 4 Explore agents **in a single message**, each with Skimmer context and project knowledge (if loaded): - -| Focus | Thoroughness | Find | -|-------|-------------|------| -| User perspective | medium | Target users, goals, pain points, user journeys | -| Similar features | medium | Comparable features, scope patterns, edge cases | -| Constraints | quick | Dependencies, business rules, security, performance, **prior architectural decisions** | -| Failure modes | quick | Error states, edge cases, validation needs, **known pitfalls** | - -### Phase 5: Synthesize Exploration - -**WAIT** for Phase 4, then spawn Synthesizer: - -``` -Agent(subagent_type="Synthesizer"): -"Synthesize EXPLORATION outputs for: {feature} -Mode: exploration -Combine into: user needs, similar features, constraints, failure modes" -``` - -### Phase 6: Plan Scope (Parallel) - -Spawn 3 Plan agents **in a single message**, each with exploration synthesis: - -| Focus | Output | -|-------|--------| -| User stories | Actors, actions, outcomes in "As X, I want Y, so that Z" format | -| Scope boundaries | v1 MVP, v2 deferred, out of scope, dependencies | -| Acceptance criteria | Success/failure/edge case criteria (testable) | - -### Phase 7: Synthesize Planning - -**WAIT** for Phase 6, then spawn Synthesizer: - -``` -Agent(subagent_type="Synthesizer"): -"Synthesize PLANNING outputs for: {feature} -Mode: planning -Combine into: user stories, scope breakdown, acceptance criteria, open questions" -``` - -### Phase 8: Gate 1 - Validate Scope - -Use AskUserQuestion to validate: -- Primary problem being solved -- Priority level (Critical/High/Medium/Low) -- v1 scope selection (minimal/medium/full) -- Explicit exclusions - -### Phase 9: Gate 2 - Confirm Criteria - -Present specification summary, then use AskUserQuestion for final confirmation: -- Success UX pattern -- Error handling approach -- Ready to create issue / Needs changes / Cancel - -### Phase 10: Create Issue - -Create GitHub issue with `gh issue create`: -- Title: feature name -- Body: problem statement, user stories, scope (v1/deferred/excluded), acceptance criteria, constraints, priority -- Labels: feature, priority level - -Report issue number and URL. - -## Architecture - -``` -/specify (orchestrator - spawns agents only) -│ -├─ Phase 1: GATE 0 - Confirm Understanding ⛔ MANDATORY -│ └─ AskUserQuestion: Validate interpretation -│ -├─ Phase 2: Orient -│ └─ Skimmer agent (codebase context via skim) -│ -├─ Phase 3: Load Project Knowledge -│ └─ Read decisions.md + pitfalls.md -│ -├─ Phase 4: Explore Requirements (PARALLEL) -│ ├─ Explore: User perspective -│ ├─ Explore: Similar features -│ ├─ Explore: Constraints -│ └─ Explore: Failure modes -│ -├─ Phase 5: Synthesize Exploration -│ └─ Synthesizer agent (mode: exploration) -│ -├─ Phase 6: Plan Scope (PARALLEL) -│ ├─ Plan: User stories -│ ├─ Plan: Scope boundaries -│ └─ Plan: Acceptance criteria -│ -├─ Phase 7: Synthesize Planning -│ └─ Synthesizer agent (mode: planning) -│ -├─ Phase 8: GATE 1 - Validate Scope ⛔ MANDATORY -│ └─ AskUserQuestion: Confirm scope and priorities -│ -├─ Phase 9: GATE 2 - Confirm Criteria ⛔ MANDATORY -│ └─ AskUserQuestion: Final spec approval -│ -├─ Phase 10: Create Issue -│ └─ gh issue create -│ -└─ Report: Issue number, URL, /implement command -``` - -## Principles - -1. **Confirm before exploring** - Validate understanding with user before spawning agents -2. **Requirements, not implementation** - Focus on what and why, not how -3. **Multiple perspectives** - Explore from user, scope, constraint, and failure angles -4. **User drives decisions** - Three mandatory gates ensure user approval -5. **Scope ruthlessly** - Small, focused issues ship faster -6. **Testable criteria** - Every requirement must be verifiable -7. **Enable /implement** - Output must be actionable for implementation - -## Error Handling - -If user selects "Needs changes" at any gate, iterate until confirmed. If "Cancel", stop gracefully without creating issue. diff --git a/shared/agents/designer.md b/shared/agents/designer.md new file mode 100644 index 00000000..0e56cb94 --- /dev/null +++ b/shared/agents/designer.md @@ -0,0 +1,100 @@ +--- +name: Designer +description: Design analysis agent with mode-driven skill loading. Modes: gap-analysis (completeness, architecture, security, performance, consistency, dependencies), design-review (anti-pattern detection). +model: opus +skills: devflow:worktree-support +--- + +# Designer Agent + +You are a design analysis specialist. You detect gaps and anti-patterns in design documents, specifications, and implementation plans before implementation begins. Your mode and focus determine which skill you load and which analysis you perform. + +## Input + +The orchestrator provides: +- **Mode**: Which analysis type to perform (`gap-analysis` or `design-review`) +- **Focus**: Which aspect to analyze (gap-analysis only — see Modes table) +- **Artifacts**: Design documents, specifications, issue bodies, or implementation plans to analyze + +**Worktree Support**: If `WORKTREE_PATH` is provided, follow the `devflow:worktree-support` skill for path resolution. If omitted, use cwd. + +## Modes + +| Mode | Focus (optional) | Skill File (Read this first) | +|------|-------------------|------------------------------| +| `gap-analysis` | completeness, architecture, security, performance, consistency, dependencies | `~/.claude/skills/devflow:gap-analysis/SKILL.md` | +| `design-review` | (all anti-patterns in one pass) | `~/.claude/skills/devflow:design-review/SKILL.md` | + +## Responsibilities + +1. **Load mode skill** — Read the skill file from the table above for your assigned mode. This gives you detection patterns and checklists specific to your analysis type. +2. **Apply focus-specific analysis** — Use detection patterns from the loaded skill to scan the provided artifacts. For `gap-analysis`, apply only the patterns for your assigned focus. For `design-review`, apply all 6 anti-pattern rules. +3. **Assess confidence (0-100%)** — For each finding, assess certainty. Report at 80%+, suggest at 60-79%, drop below 60%. +4. **Cite evidence** — Every finding must reference specific text from the provided artifacts using direct quotes or line references. +5. **Write findings to output** — Format findings clearly with severity, confidence, evidence, and resolution. + +## Output + +```markdown +# Design Analysis: {Mode} — {Focus (if applicable)} + +## Findings + +### CRITICAL +**[{FOCUS}] Gap/Anti-Pattern: {title}** — Confidence: {n}% +- Evidence: "{quoted text from artifact}" +- Issue: {what is missing or wrong} +- Resolution: {concrete action to address} + +### HIGH +{findings...} + +### MEDIUM +{findings...} + +### LOW +{findings...} + +## Suggestions (60-79% confidence) +- **{title}** (Confidence: {n}%) — {brief description, no fix required} + +## Summary +| Severity | Count | +|----------|-------| +| CRITICAL | {n} | +| HIGH | {n} | +| MEDIUM | {n} | +| LOW | {n} | + +**Overall Assessment**: {BLOCKING | SHOULD-ADDRESS | INFORMATIONAL} +``` + +## Confidence Scale + +| Range | Label | Meaning | +|-------|-------|---------| +| 90-100% | Certain | Clearly a gap or anti-pattern — unambiguous evidence in artifact | +| 80-89% | High | Very likely an issue, minor chance of false positive | +| 60-79% | Medium | Plausible issue, depends on context not visible in artifact | +| < 60% | Low | Possible concern — drop, don't report | + +## Principles + +1. **Evidence-based** — Never flag a gap without citing specific text from the artifact +2. **Confidence-calibrated** — Report only what you are ≥80% sure about +3. **Actionable** — Every finding includes a concrete resolution, not just a problem statement +4. **No speculation** — If you cannot find evidence in the provided artifacts, do not invent it +5. **Single focus** — In gap-analysis mode, analyze only your assigned focus area; ignore others + +## Boundaries + +**Handle autonomously:** +- Loading assigned skill file +- Scanning artifacts for focus-specific patterns +- Assessing confidence and categorizing findings +- Writing structured findings report + +**Escalate to orchestrator:** +- Context documents are missing or unreadable +- Fundamental ambiguity that cannot be resolved without user input +- Artifacts reference external systems not present in the provided context diff --git a/shared/agents/git.md b/shared/agents/git.md index f1cb287a..ed89d85d 100644 --- a/shared/agents/git.md +++ b/shared/agents/git.md @@ -25,6 +25,7 @@ The orchestrator provides: | `validate-branch` | Pre-flight for /resolve: check branch state | `WORKTREE_PATH` (optional) | | `setup-task` | Create feature branch and fetch issue | `BASE_BRANCH`, `ISSUE_INPUT` (optional), `TASK_DESCRIPTION` (optional) | | `fetch-issue` | Fetch GitHub issue for implementation | `ISSUE_INPUT` (number or search term) | +| `fetch-issues-batch` | Fetch multiple GitHub issues for multi-issue planning | `ISSUE_NUMBERS` | | `comment-pr` | Create PR inline comments for review findings | `PR_NUMBER`, `REVIEW_BASE_DIR`, `TIMESTAMP`, `WORKTREE_PATH` (optional) | | `manage-debt` | Update tech debt backlog with pre-existing issues | `REVIEW_DIR`, `TIMESTAMP`, `WORKTREE_PATH` (optional) | | `create-release` | Create GitHub release with version tag | `VERSION`, `CHANGELOG_CONTENT` | @@ -177,6 +178,39 @@ Fetch comprehensive issue details for implementation planning. --- +## Operation: fetch-issues-batch + +Fetch multiple GitHub issues for multi-issue planning flows. + +**Input:** `ISSUE_NUMBERS` - Array of issue numbers (e.g., "12 15 18") + +**Process:** +1. Parse space-separated issue numbers +2. Fetch each issue via `gh issue view {number} --json number,title,body,labels,assignees,milestone,comments` +3. Extract acceptance criteria and dependencies from each +4. Identify cross-issue relationships (shared labels, mutual references, dependency chains) + +**Output:** +```markdown +## Issues Batch ({n} issues) + +### Issue #{number1}: {title} +**Labels**: {labels} | **Priority**: {priority} +{body summary} +**Acceptance Criteria**: {extracted} +**Dependencies**: {extracted} + +### Issue #{number2}: {title} +... + +### Cross-Issue Analysis +- **Shared labels**: {common labels} +- **Dependencies**: {dependency chain if any} +- **Conflicts**: {conflicting requirements if any} +``` + +--- + ## Operation: comment-pr Create inline PR comments for blocking and should-fix issues from code review. diff --git a/shared/agents/synthesizer.md b/shared/agents/synthesizer.md index d2f94105..63336a5e 100644 --- a/shared/agents/synthesizer.md +++ b/shared/agents/synthesizer.md @@ -1,18 +1,18 @@ --- name: Synthesizer -description: Combines outputs from multiple agents into actionable summaries (modes: exploration, planning, review) +description: Combines outputs from multiple agents into actionable summaries (modes: exploration, planning, review, design) model: haiku skills: devflow:review-methodology, devflow:docs-framework, devflow:worktree-support --- # Synthesizer Agent -You are a synthesis specialist. You combine outputs from multiple parallel agents into clear, actionable summaries. You operate in three modes: exploration, planning, and review. +You are a synthesis specialist. You combine outputs from multiple parallel agents into clear, actionable summaries. You operate in four modes: exploration, planning, review, and design. ## Input The orchestrator provides: -- **Mode**: `exploration` | `planning` | `review` +- **Mode**: `exploration` | `planning` | `review` | `design` - **Agent outputs**: Results from parallel agents to synthesize - **Output path**: Where to save synthesis (if applicable) @@ -125,6 +125,45 @@ Analyze 3 axes to determine strategy: --- +## Mode: Design + +Synthesize outputs from multiple Designer agents (gap analysis across different focus areas). + +**Process:** +1. Extract findings from each designer agent +2. Deduplicate: If multiple designers flag the same issue, boost confidence by 10% per additional agent (cap at 100%) +3. Categorize by actionability: + - **Blocking** (CRITICAL/HIGH): Must be resolved before implementation + - **Should-Address** (MEDIUM): Recommended improvements + - **Informational** (LOW): Noted but not actionable +4. Sort by severity within each category + +**Output:** +```markdown +## Design Synthesis + +### Blocking Gaps +| Gap | Focus | Severity | Confidence | Resolution | +|-----|-------|----------|------------|------------| +| {gap} | {focus area} | {CRITICAL/HIGH} | {n}% | {proposed resolution} | + +### Should-Address +| Gap | Focus | Severity | Confidence | Recommendation | +|-----|-------|----------|------------|----------------| +| {gap} | {focus area} | MEDIUM | {n}% | {recommendation} | + +### Informational +| Gap | Focus | Confidence | Note | +|-----|-------|------------|------| +| {gap} | {focus area} | {n}% | {note} | + +### Key Insights +1. {cross-cutting insight} +2. {insight} +``` + +--- + ## Mode: Review Synthesize outputs from multiple Reviewer agents. Apply strict merge rules. diff --git a/shared/skills/agent-teams/SKILL.md b/shared/skills/agent-teams/SKILL.md index 82475e1b..71ccd821 100644 --- a/shared/skills/agent-teams/SKILL.md +++ b/shared/skills/agent-teams/SKILL.md @@ -37,7 +37,6 @@ Size team to task complexity. Assign distinct, non-overlapping perspectives. | Full review | 4-5 | Core perspectives covered | | Debug investigation | 3-5 | One per hypothesis | | Implementation | 2-4 | Domain-separated work units | -| Specification | 3-4 | Debate requirements before user gates | | Resolution | 2-4 | Cross-validate fixes across batches | **Model guidance**: Explorers/Reviewers inherit parent model. Validators use `model: haiku`. @@ -119,6 +118,6 @@ Lead MUST always handle cleanup: ## Extended References -- `references/team-patterns.md` - Team structures for review, implement, debug, specify, resolve workflows +- `references/team-patterns.md` - Team structures for review, implement, plan, debug, resolve workflows - `references/communication.md` - Message protocols, broadcast patterns, debate formats - `references/cleanup.md` - Session management, orphan detection, resource cleanup diff --git a/shared/skills/agent-teams/references/cleanup.md b/shared/skills/agent-teams/references/cleanup.md index 5d211273..b409f7f9 100644 --- a/shared/skills/agent-teams/references/cleanup.md +++ b/shared/skills/agent-teams/references/cleanup.md @@ -74,7 +74,7 @@ After cleanup, verify: ## Sequential Team Transition Protocol -Commands that create multiple teams (e.g., `/implement`, `/specify`) MUST follow this 4-step protocol between teams. Skipping steps causes silent failures due to the one-team-per-session constraint. +Commands that create multiple teams (e.g., `/implement`, `/plan`) MUST follow this 4-step protocol between teams. Skipping steps causes silent failures due to the one-team-per-session constraint. ``` Step 1: SHUTDOWN — Send shutdown_request to each teammate by name diff --git a/shared/skills/agent-teams/references/team-patterns.md b/shared/skills/agent-teams/references/team-patterns.md index a6fd7231..3e2cfd12 100644 --- a/shared/skills/agent-teams/references/team-patterns.md +++ b/shared/skills/agent-teams/references/team-patterns.md @@ -95,9 +95,9 @@ Lead spawns: --- -## Specification Team +## Planning Team -### Requirements Exploration Team (4 perspectives) +### Requirements Discovery Team (4 perspectives) ``` Lead spawns: @@ -117,7 +117,7 @@ Lead spawns: 5. Lead collects consensus after max 2 exchange rounds ``` -### Scope Planning Team (3 perspectives) +### Design Planning Team (3 perspectives) ``` Lead spawns: @@ -136,7 +136,7 @@ Lead spawns: 5. Lead collects consensus after max 2 exchange rounds ``` -**Note**: Specification teams complement (not replace) the 3 mandatory clarification gates. User still drives all decisions via Gate 0, Gate 1, and Gate 2. +**Note**: Planning teams support gap analysis and design review. User approves the final design artifact before implementation. --- @@ -210,8 +210,8 @@ Lead spawns (one per hypothesis): | Full review | 4 | 5 | Core perspectives | | Exploration | 3 | 4 | Diminishing returns beyond 4 | | Planning | 2 | 3 | Too many cooks | -| Specification (explore) | 3 | 4 | Requirements need diverse perspectives | -| Specification (scope) | 2 | 3 | Scope planning benefits from focus | +| Planning (discover) | 3 | 4 | Requirements need diverse perspectives | +| Planning (design) | 2 | 3 | Design planning benefits from focus | | Resolution | 2 | 4 | One per independent batch | | Debugging | 3 | 5 | One per viable hypothesis | | Parallel coding | 2 | 3 | Merge complexity grows fast | diff --git a/shared/skills/design-review/SKILL.md b/shared/skills/design-review/SKILL.md new file mode 100644 index 00000000..d79d427f --- /dev/null +++ b/shared/skills/design-review/SKILL.md @@ -0,0 +1,155 @@ +--- +name: design-review +description: This skill should be used when reviewing implementation plans for anti-patterns, checking design quality before implementation, or performing plan-level quality assessment. Provides 6 anti-pattern detection rules for the designer agent and inline planning. +user-invocable: false +allowed-tools: Read, Grep, Glob +--- + +# Design Review + +Anti-pattern detection for implementation plans. Catches structural problems before they become code. + +## Iron Law + +> **CATCH IT IN THE PLAN, NOT IN THE PR** +> +> A plan that implies N+1 queries will produce N+1 queries. A plan that omits error +> handling will produce code with no error handling. Review the plan as rigorously as +> you review the code — the plan is the blueprint that every implementation decision +> follows. + +--- + +## Anti-Pattern Detection Rules + +### 1. N+1 Queries + +**What to look for in a plan:** +- Steps that fetch a list, then loop to fetch related data per item +- "For each X, get Y" language without mentioning batch operations +- JOIN or batch-fetch never mentioned for related data +- "Display X with Y" without specifying how Y is fetched + +**How to flag:** +``` +[N+1 RISK] Step {N}: "{quoted plan text}" — fetches {entity} per iteration. +Resolution: Replace loop with batch query: {proposed batch operation}. +``` + +**What to suggest instead:** +- Identify all IDs in the list first, then batch-fetch related records in one query +- Use ORM eager loading (`.include`, `.with`, JOIN) at the list query level +- Cache frequently accessed related data at the service layer + +### 2. God Functions + +**What to look for in a plan:** +- Single step handling more than 3 distinct responsibilities +- "Validate AND process AND notify AND log AND update" in one step +- Steps described as "the main handler" that touch multiple subsystems +- No clear single-sentence description of what the function does + +**How to flag:** +``` +[GOD FUNCTION RISK] Step {N}: "{quoted plan text}" — handles {n} responsibilities. +Resolution: Split into: {responsibility 1} → {responsibility 2} → {responsibility 3}. +``` + +**What to suggest instead:** +- Each function should have a single, clear sentence description of its purpose +- Extract notification, logging, and side effects to separate functions +- Orchestrate composition at a higher level (service layer calls multiple focused functions) + +### 3. Missing Parallelism + +**What to look for in a plan:** +- Sequential steps with no data dependency between them +- "First do A, then do B, then do C" where B doesn't need A's output +- Multiple external API calls or DB queries listed sequentially +- Fan-out operations (notify all users, update all records) without parallel mention + +**How to flag:** +``` +[SEQUENTIAL BOTTLENECK] Steps {N}-{M}: "{quoted steps}" — no dependency between them. +Resolution: Execute {step A} and {step B} in parallel: {proposed parallel pattern}. +``` + +**What to suggest instead:** +- Use `Promise.all` / goroutines / async.gather for independent operations +- Identify the critical path and move independent work off it +- Explicitly note which steps have ordering requirements vs. which are independent + +### 4. Error Handling Gaps + +**What to look for in a plan:** +- Happy path fully described, failure paths absent or vague +- "If error, return error" without specifying what the caller does with it +- Partial operations (multi-step) without rollback plan +- External calls (API, DB, queue) without failure mode specification +- Retry logic absent for transient failures (network timeouts, rate limits) + +**How to flag:** +``` +[ERROR HANDLING GAP] Step {N}: "{quoted plan text}" — failure path unspecified. +Resolution: Add: {on failure scenario} → {specific recovery action}. +``` + +**What to suggest instead:** +- For each external call, specify: what to do on timeout, on 4xx, on 5xx +- For multi-step operations, specify compensation (rollback/undo) for each step +- Use Result types or explicit error returns — no silent failures + +### 5. Missing Caching + +**What to look for in a plan:** +- Expensive operations called on every request without cache mention +- "Fetch configuration on each request", "call third-party API per user action" +- Reference data (categories, settings, permissions) loaded repeatedly +- Computed aggregates recalculated on every read + +**How to flag:** +``` +[CACHING GAP] Step {N}: "{quoted plan text}" — {expensive operation} on every request. +Resolution: Cache {data} for {TTL}: {proposed cache strategy and invalidation}. +``` + +**What to suggest instead:** +- Application-level cache (Redis/Memcached) for cross-request data +- In-memory cache for process-local reference data with short TTL +- Specify cache invalidation: event-driven (on update), time-based (TTL), or both + +### 6. Poor Decomposition + +**What to look for in a plan:** +- Steps combining unrelated concerns (auth + business logic + presentation) +- Unclear ownership — which module/layer owns each step? +- Cross-cutting steps that appear multiple times without abstraction +- "Also update X" or "also send to Y" appended to otherwise focused steps + +**How to flag:** +``` +[DECOMPOSITION ISSUE] Step {N}: "{quoted plan text}" — combines {concern A} and {concern B}. +Resolution: {concern A} belongs in {layer/module}; {concern B} belongs in {layer/module}. +``` + +**What to suggest instead:** +- Assign each step to exactly one layer: presentation, business logic, data access +- Extract cross-cutting concerns (logging, auth, validation) to middleware or decorators +- Each module should have a clear bounded context — changes to one shouldn't require changes to another + +--- + +## Extended References + +| Reference | Content | +|-----------|---------| +| `references/anti-patterns.md` | Before/after plan examples for each anti-pattern | + +## Severity Guidelines + +| Level | Criteria | +|-------|----------| +| **CRITICAL** | Anti-pattern that will cause correctness failures (data loss, security breach) | +| **HIGH** | Anti-pattern that will cause significant performance or maintainability problems | +| **MEDIUM** | Anti-pattern that will require architectural rework post-implementation | +| **LOW** | Anti-pattern that reduces code quality but is easily refactored | diff --git a/shared/skills/design-review/references/anti-patterns.md b/shared/skills/design-review/references/anti-patterns.md new file mode 100644 index 00000000..38d7ca8c --- /dev/null +++ b/shared/skills/design-review/references/anti-patterns.md @@ -0,0 +1,207 @@ +# Design Anti-Patterns: Before/After Plan Examples + +Concrete before/after plan snippets for each of the 6 anti-patterns. Use these to recognize and correct problems in implementation plans. + +--- + +## 1. N+1 Queries + +### Before (Anti-Pattern) +``` +Step 4: Display order history page +- Fetch list of orders for user (query: SELECT * FROM orders WHERE user_id = ?) +- For each order, fetch the product name (query: SELECT name FROM products WHERE id = ?) +- For each order, fetch the customer's address (query: SELECT * FROM addresses WHERE id = ?) +- Render order list with product names and addresses +``` +Problems: 1 query for orders + N queries for products + N queries for addresses = 1 + 2N total queries. + +### After (Corrected) +``` +Step 4: Display order history page +- Fetch orders for user with JOIN to products and addresses: + SELECT o.*, p.name as product_name, a.* + FROM orders o + JOIN products p ON p.id = o.product_id + JOIN addresses a ON a.id = o.address_id + WHERE o.user_id = ? + ORDER BY o.created_at DESC + LIMIT 50 +- Render order list from joined result (1 query total) +``` + +--- + +## 2. God Functions + +### Before (Anti-Pattern) +``` +Step 3: Implement processCheckout() function +- Validate cart items (stock available, prices match) +- Apply discount codes +- Calculate tax based on shipping address +- Charge payment method via Stripe API +- Create order record in database +- Update inventory counts for each item +- Send confirmation email to customer +- Send fulfillment webhook to warehouse +- Log analytics event +- Return order confirmation +``` +Problems: 9 responsibilities in 1 function. Untestable in isolation. Any change breaks everything. + +### After (Corrected) +``` +Step 3: Implement checkout pipeline +- validateCart(cart): verify stock and prices → Result +- applyDiscounts(cart, codes): → Result +- calculateTax(cart, address): → Result +- chargePayment(cart, paymentMethod): Stripe API call → Result +- createOrder(chargedCart): DB write → Result +- orchestrateCheckout(): calls above in sequence, rolls back on failure + +Post-order async (background jobs): +- updateInventory(order): decrement stock counts +- sendConfirmationEmail(order): customer notification +- notifyWarehouse(order): fulfillment webhook +- trackAnalytics(order): analytics event +``` + +--- + +## 3. Missing Parallelism + +### Before (Anti-Pattern) +``` +Step 5: Build user dashboard +- Fetch user profile from DB +- Then fetch recent orders from DB +- Then fetch unread notification count from DB +- Then fetch account balance from billing service +- Then render dashboard with all data +``` +Problems: 4 independent operations run sequentially. Total time = T1 + T2 + T3 + T4 instead of max(T1..T4). + +### After (Corrected) +``` +Step 5: Build user dashboard +- Fetch all data in parallel (no ordering dependency): + Promise.all([ + fetchUserProfile(userId), // ~10ms + fetchRecentOrders(userId, 10), // ~20ms + fetchNotificationCount(userId), // ~5ms + fetchAccountBalance(userId), // ~30ms (billing service) + ]) +- Wait for all 4 to complete (total ~30ms, not ~65ms) +- Render dashboard with combined result +- Error handling: if any fetch fails, show partial data with error indicator for failed section +``` + +--- + +## 4. Error Handling Gaps + +### Before (Anti-Pattern) +``` +Step 7: Implement file upload +- Receive multipart file upload from user +- Validate file type and size +- Upload to S3 bucket +- Save file metadata to database +- Return file URL to client +``` +Problems: No failure paths. What if S3 is down? What if DB write fails after S3 upload? Orphaned files, data inconsistency. + +### After (Corrected) +``` +Step 7: Implement file upload +- Receive multipart file upload from user +- Validate file type (JPEG/PNG/PDF only) and size (max 10MB) + - On validation failure: return 422 with specific error message, no upload attempted +- Upload to S3 bucket + - On S3 timeout (>30s): return 503, log error, do not write to DB + - On S3 error: return 502 with "Storage unavailable", log error with request ID +- Save file metadata to database + - On DB failure after successful S3 upload: + - Queue cleanup job to delete orphaned S3 object + - Return 500, log error with S3 key for manual cleanup if queue fails +- Return 201 with file URL and metadata +- Idempotency: if file with same hash already exists for user, return existing URL (no duplicate upload) +``` + +--- + +## 5. Missing Caching + +### Before (Anti-Pattern) +``` +Step 2: Load application configuration +- On each API request, fetch feature flags from feature-flag service +- On each request, fetch user's role permissions from database +- On each request, fetch supported currencies list from config service +- Process request with loaded configuration +``` +Problems: 3 external calls per request. At 1000 req/s = 3000 calls/s to config services. Latency added to every request. + +### After (Corrected) +``` +Step 2: Load application configuration (cached) +- Feature flags: cached in Redis, TTL 60s, refreshed on change event + - On cache miss: fetch from feature-flag service, write to Redis + - Stale-while-revalidate: serve stale if refresh fails (max 300s stale) +- User permissions: cached per user in Redis, TTL 300s, invalidated on role change + - Cache key: `permissions:{userId}` + - On role change event: `DEL permissions:{userId}` +- Supported currencies: in-process cache (singleton), TTL 1h, loaded at startup + - Currencies are static — process-level cache is sufficient + - Reload on explicit admin action only +- Total added latency: ~0ms for cache hits (>99% of requests) +``` + +--- + +## 6. Poor Decomposition + +### Before (Anti-Pattern) +``` +Step 4: Handle user registration endpoint +- Parse request body +- Check if user is authenticated (if so, return 400) +- Validate email format and uniqueness +- Hash password with bcrypt +- Create user record +- Also send welcome email +- Also create default workspace for user +- Also log registration event for analytics +- Also update user count metric +- Return 201 with user object +``` +Problems: HTTP handler contains business logic, side effects, and infrastructure concerns. Untestable without full stack. + +### After (Corrected) +``` +Step 4: Handle user registration + +HTTP Layer (controller): +- Parse and validate request body (email, password format, required fields) +- Return 422 on validation failure with field-level errors +- Call UserService.register(email, password) +- Return 201 with user DTO on success; map domain errors to HTTP codes + +Domain Layer (UserService.register): +- Check uniqueness (throws DuplicateEmailError if taken) +- Hash password (bcrypt, cost 12) +- Create User entity +- Persist via UserRepository +- Emit UserRegistered domain event +- Return created User + +Event Handlers (async, triggered by UserRegistered event): +- WelcomeEmailHandler: send welcome email +- WorkspaceSetupHandler: create default workspace +- AnalyticsHandler: log registration event +- MetricsHandler: increment user count + +Separation: Controller handles HTTP. Service handles business logic. Events handle side effects. +Each layer independently testable. +``` diff --git a/shared/skills/docs-framework/SKILL.md b/shared/skills/docs-framework/SKILL.md index 6597d8c3..4a843709 100644 --- a/shared/skills/docs-framework/SKILL.md +++ b/shared/skills/docs-framework/SKILL.md @@ -34,6 +34,8 @@ All generated documentation lives under `.docs/` in the project root: │ └── {timestamp}/ # Second review (incremental) │ ├── {focus}.md │ └── review-summary.md +├── design/ # Design artifacts from /plan +│ └── {issue}-{topic-slug}.{timestamp}.md # Design document ├── status/ # Development logs │ ├── {timestamp}.md │ ├── compact/{timestamp}.md @@ -82,6 +84,7 @@ TOPIC_SLUG=$(echo "$TOPIC" | tr '[:upper:]' '[:lower:]' | tr ' ' '-' | sed 's/[^ | Resolution summary | `resolution-summary.md` in timestamped dir | `2025-12-26_1430/resolution-summary.md` | | Review head marker | `.last-review-head` | Plain text file with SHA | | Status logs | `{timestamp}.md` | `2025-12-26_1430.md` | +| Design documents | `{issue}-{topic-slug}.{timestamp}.md` | `42-jwt-auth.2026-04-07_1430.md` | --- @@ -113,6 +116,7 @@ source .devflow/scripts/docs-helpers.sh 2>/dev/null || { | Working Memory | `.memory/WORKING-MEMORY.md` | Overwrites (auto-maintained by Stop hook) | | Knowledge (decisions) | `.memory/knowledge/decisions.md` | Append-only (ADR-NNN sequential IDs) | | Knowledge (pitfalls) | `.memory/knowledge/pitfalls.md` | Append-only (PF-NNN sequential IDs) | +| Designer (via /plan) | `.docs/design/{issue}-{topic-slug}.{timestamp}.md` | Creates new design artifact | ### Agents That Don't Persist diff --git a/shared/skills/gap-analysis/SKILL.md b/shared/skills/gap-analysis/SKILL.md new file mode 100644 index 00000000..482a73b0 --- /dev/null +++ b/shared/skills/gap-analysis/SKILL.md @@ -0,0 +1,135 @@ +--- +name: gap-analysis +description: This skill should be used when analyzing design documents, specifications, or plans for completeness gaps, architectural issues, security concerns, and performance implications. Provides focus-specific detection patterns for the designer agent. +user-invocable: false +allowed-tools: Read, Grep, Glob +--- + +# Gap Analysis + +Domain expertise for detecting gaps in design documents, specifications, and plans before implementation begins. + +## Iron Law + +> **GAPS IN DESIGN BECOME BUGS IN PRODUCTION** +> +> Every undefined error state, vague requirement, and missing integration point is a bug +> waiting to be written. Catch gaps in the plan so the implementation reflects reality, +> not assumption. Evidence-required: every gap must cite specific text from the artifact. + +--- + +## Focus Areas + +### 1. Completeness + +Detect missing pieces that will block or break implementation. + +**Detection patterns:** +- Missing acceptance criteria — requirements without testable success conditions +- Undefined error states — happy path described, failure paths absent +- Vague requirements — "fast", "secure", "user-friendly" without measurable thresholds +- Ambiguous user journeys — "user submits form" without specifying what happens next +- Missing rollback specification — no recovery path if operation partially fails +- Unspecified data constraints — fields mentioned without type, size, or validation rules + +**Evidence trigger:** Any requirement phrase that cannot be directly translated into a test case. + +### 2. Architecture + +Detect design decisions that conflict with existing patterns or create structural problems. + +**Detection patterns:** +- Implied patterns violating existing conventions — new module structure that contradicts codebase layout +- Missing integration points — feature touches shared state but integration method unspecified +- Layering issues — business logic pushed to wrong layer (e.g., validation in presentation layer) +- Undeclared dependencies — feature needs data/service not mentioned in scope +- Missing schema changes — new data structure implied but migration not mentioned +- Shared resource contention — multiple features/requests accessing same resource without concurrency plan + +**Evidence trigger:** Any design element that references an existing system component without specifying the integration contract. + +### 3. Security + +Detect authentication, authorization, and data protection gaps. + +**Detection patterns:** +- Auth gaps — feature creates new endpoints or data access without specifying auth requirements +- Input validation missing — user-provided data consumed without specifying sanitization +- Secret handling — credentials, tokens, or keys mentioned without specifying storage/transmission +- OWASP implications — mass assignment, IDOR, SSRF risks implied by the design +- Privilege escalation paths — role or permission changes without specifying boundary checks +- Audit trail missing — sensitive operations without logging or change tracking + +**Evidence trigger:** Any data flow that crosses a trust boundary without specifying the security control. + +### 4. Performance + +Detect design decisions that will cause performance problems at scale. + +**Detection patterns:** +- N+1 patterns implied — loop-based data fetching without batch operation specified +- Missing caching strategy — repeated expensive operations (DB queries, API calls) without cache layer +- Concurrency concerns — shared mutable state accessed without locking or isolation strategy +- Query patterns — filtering/sorting on unindexed fields implied by the feature +- Unbounded result sets — list operations without pagination or size limits +- Synchronous blocking — long-running operations blocking request threads without async design + +**Evidence trigger:** Any data access pattern that scales linearly with user count or data volume without a mitigation strategy. + +### 5. Consistency (multi-issue only) + +Detect contradictions and duplications across multiple issue specifications. + +**Detection patterns:** +- Cross-issue contradictions — Issue A says feature X works one way, Issue B implies another +- Duplicate requirements — same functionality specified in multiple issues without coordination +- Conflicting scope — one issue's in-scope is another issue's out-of-scope +- Naming conflicts — same entity called different names across issues +- Interface mismatches — Issue A produces output that Issue B consumes, but shapes differ + +**Evidence trigger:** Any requirement in one issue that cannot coexist with a requirement in another issue. + +### 6. Dependencies (multi-issue only) + +Detect ordering constraints and shared resource conflicts across issues. + +**Detection patterns:** +- Inter-issue ordering — Issue B depends on data/schema/interface created by Issue A +- Shared resources — multiple issues modify the same file, table, or API without coordination +- Breaking change propagation — one issue changes a contract that other issues assume stable +- Circular dependencies — two issues each require the other to be implemented first +- Implicit sequencing — implementation order not specified but technically required + +**Evidence trigger:** Any issue that references state, contracts, or resources that may be in flux due to another issue in the batch. + +--- + +## Extended References + +| Reference | Content | +|-----------|---------| +| `references/check-layers.md` | Detailed detection checklists per focus area | + +## Severity Guidelines + +| Level | Criteria | Examples | +|-------|----------|----------| +| **CRITICAL** | Gap that will cause data loss, security breach, or system failure | Missing auth on sensitive endpoint, undefined error recovery for financial transaction | +| **HIGH** | Gap that will cause incorrect behavior or significant technical debt | Missing pagination on unbounded list, undefined race condition in concurrent update | +| **MEDIUM** | Gap that will require rework if unaddressed | Vague acceptance criteria, missing caching strategy for frequently-read data | +| **LOW** | Gap that is an improvement opportunity | Naming inconsistency, minor UX ambiguity with obvious resolution | + +## Confidence Calibration + +- **Report at 80%+**: Gap is clearly present, evidence directly cited +- **Suggest at 60-79%**: Plausible gap, depends on context not visible in artifact +- **Drop below 60%**: Speculation without textual evidence + +Output format per gap: +``` +**[FOCUS] Gap: {title}** — Severity: {CRITICAL/HIGH/MEDIUM/LOW} | Confidence: {n}% +Evidence: "{quoted text from artifact}" +Issue: {what is missing or contradictory} +Resolution: {concrete action to address the gap} +``` diff --git a/shared/skills/gap-analysis/references/check-layers.md b/shared/skills/gap-analysis/references/check-layers.md new file mode 100644 index 00000000..0bf4186f --- /dev/null +++ b/shared/skills/gap-analysis/references/check-layers.md @@ -0,0 +1,125 @@ +# Gap Analysis Check Layers + +Detailed detection checklists and examples for each focus area. Use these patterns to systematically scan design artifacts. + +--- + +## Completeness Checklist + +For each requirement or user story, verify: + +- [ ] **Testable success condition** — Can you write a test that definitively passes or fails? + - BAD: "Login should be fast" + - GOOD: "Login completes in < 2s at p99 under 100 concurrent users" + +- [ ] **Failure path specified** — What happens when the operation fails? + - BAD: "User submits order" + - GOOD: "User submits order → if payment fails, show error X and preserve cart; if DB fails, show error Y and queue retry" + +- [ ] **All actors identified** — Who initiates, who receives, who observes? + - BAD: "Email sent on registration" + - GOOD: "System sends welcome email to registered user; admin receives copy if domain is corporate" + +- [ ] **Data constraints defined** — Type, size, format, required/optional? + - BAD: "User provides profile photo" + - GOOD: "Profile photo: JPEG/PNG, max 5MB, min 100×100px, max 4096×4096px" + +- [ ] **Boundary conditions covered** — Empty state, single item, max capacity? + - BAD: "Show list of items" + - GOOD: "Show list (empty state: 'No items yet' message; max 1000 shown; pagination at 25)" + +--- + +## Architecture Checklist + +For each new component or modification, verify: + +- [ ] **Integration contract defined** — How does this connect to existing systems? + - BAD: "Feature uses the existing auth service" + - GOOD: "Feature calls `/api/auth/verify` with Bearer token, expects `{userId, roles}` response" + +- [ ] **Layer ownership clear** — Which layer owns each responsibility? + - BAD: "Validate email format in the UI and save to DB" + - GOOD: "UI: format validation only; API: business rule validation; DB: constraint enforcement" + +- [ ] **Schema changes documented** — New tables, columns, indexes, migrations? + - BAD: "Store user preferences" + - GOOD: "Add `user_preferences` JSONB column to `users` table; migration #042; index on `user_id`" + +- [ ] **State management specified** — Where does state live, how does it change? + - BAD: "Track order status" + - GOOD: "Order status: `pending → confirmed → shipped → delivered | cancelled`; state machine in `OrderService`; event sourced" + +--- + +## Security Checklist + +For each data flow across a trust boundary, verify: + +- [ ] **Authentication specified** — Who must be authenticated? + - BAD: "API endpoint to get user data" + - GOOD: "GET /api/users/:id — requires valid JWT; 401 if missing; 403 if requesting other user's data (RBAC: admin can access all)" + +- [ ] **Authorization specified** — Who can perform this action? + - BAD: "Admin can delete users" + - GOOD: "DELETE /api/users/:id — requires role=admin; cannot delete self; audit log entry required; soft delete only" + +- [ ] **Input validation specified** — What inputs are sanitized and how? + - BAD: "Accept search query from user" + - GOOD: "Search query: max 256 chars; strip HTML; parameterized query (no interpolation); rate limit 10 req/min" + +- [ ] **Secret handling specified** — How are credentials stored and transmitted? + - BAD: "Store API key for third-party service" + - GOOD: "API key stored in environment variable `THIRD_PARTY_KEY`; never logged; never returned in API responses; rotated quarterly" + +--- + +## Performance Checklist + +For each data access or computation, verify: + +- [ ] **Query patterns identified** — Are queries bounded and indexed? + - BAD: "Get all orders for dashboard" + - GOOD: "GET /api/dashboard: fetch last 30 days orders for current user; index on `(user_id, created_at)`; max 500 rows; cached 5 min" + +- [ ] **Batch vs. loop specified** — Are N items fetched in 1 query or N queries? + - BAD: "Display user name next to each comment" + - GOOD: "Fetch all comments first, collect unique user IDs, batch-fetch users in single query: `SELECT * FROM users WHERE id = ANY($1)`" + +- [ ] **Async vs. sync specified** — Are slow operations handled asynchronously? + - BAD: "Send confirmation email after order" + - GOOD: "Email queued to background job (max 30s delay acceptable); order creation returns immediately; email failure does not fail order" + +- [ ] **Cache strategy specified** — What is cached, for how long, invalidated how? + - BAD: "Product catalog loaded on each request" + - GOOD: "Product catalog: Redis cache, 10-min TTL, invalidated on admin product update event; stale-while-revalidate pattern" + +--- + +## Consistency Checklist (multi-issue) + +Across all issues in the batch, verify: + +- [ ] **Same entity, same name** — Is the same concept called the same thing? + - CONFLICT: Issue A calls it "customer", Issue B calls it "user", Issue C calls it "account" + +- [ ] **Compatible interfaces** — Does Issue A's output match Issue B's expected input? + - CONFLICT: Issue A returns `{ userId: string }`, Issue B expects `{ user_id: number }` + +- [ ] **Non-conflicting scope** — Does one issue's "in scope" conflict with another's "out of scope"? + - CONFLICT: Issue A scope includes "email notifications"; Issue B explicitly excludes "email" + +--- + +## Dependencies Checklist (multi-issue) + +Across all issues in the batch, verify: + +- [ ] **Implementation order feasible** — Can issues be implemented independently? + - BLOCKED: Issue B creates a feature that stores data in a table Issue A creates — Issue A must ship first + +- [ ] **Shared files identified** — Do multiple issues modify the same files? + - RISK: Issues A, B, and C all modify `src/models/user.ts` — coordinate to avoid merge conflicts + +- [ ] **API contracts stable** — Do any issues change APIs that other issues consume? + - RISK: Issue A changes the auth token format; Issues B and C assume the old format — need coordinated rollout diff --git a/shared/skills/plan:orch/SKILL.md b/shared/skills/plan:orch/SKILL.md index 0741667c..27f2082b 100644 --- a/shared/skills/plan:orch/SKILL.md +++ b/shared/skills/plan:orch/SKILL.md @@ -1,14 +1,14 @@ --- name: plan:orch -description: Agent orchestration for PLAN intent — codebase orientation, design exploration, gap validation +description: Agent orchestration for PLAN intent — codebase orientation, gap analysis, design exploration, implementation planning, design review user-invocable: false --- # Plan Orchestration -Agent pipeline for PLAN intent in ambient ORCHESTRATED mode. Codebase orientation, targeted exploration, architecture design, and gap validation. +Agent pipeline for PLAN intent in ambient ORCHESTRATED mode. Codebase orientation, gap analysis, targeted exploration, implementation planning, and design review. -This is a lightweight variant of the Plan phase in `/implement` for ambient ORCHESTRATED mode. +This is a focused variant of the `/plan` command pipeline for ambient ORCHESTRATED mode — no user gates, lighter weight, stays in conversation context. ## Iron Law @@ -25,13 +25,15 @@ This is a lightweight variant of the Plan phase in `/implement` for ambient ORCH For GUIDED depth, the main session performs planning directly: 1. **Spawn Skimmer** — `Agent(subagent_type="Skimmer")` targeting the area of interest. Use orientation output to ground design decisions in real file structures and patterns. -2. **Design** — Using Skimmer findings + loaded pattern/design skills, design the approach directly in main session. +2. **Design** — Using Skimmer findings + loaded pattern/design skills, design the approach directly in main session. Apply `devflow:design-review` skill inline to check the plan for anti-patterns before presenting. 3. **Present** — Deliver structured plan using the Output format below. Use AskUserQuestion for ambiguous design choices. ## Worktree Support If the orchestrator receives a `WORKTREE_PATH` context (e.g., from multi-worktree workflows), pass it through to all spawned agents. Each agent's "Worktree Support" section handles path resolution. +--- + ## Phase 1: Orient Spawn `Agent(subagent_type="Skimmer")` to get codebase overview relevant to the planning question: @@ -51,32 +53,94 @@ Based on Skimmer findings, spawn 2-3 `Agent(subagent_type="Explore")` agents **i Adjust explorer focus based on the specific planning question. -## Phase 3: Design +## Phase 3: Gap Analysis Lite + +Spawn 2 `Agent(subagent_type="Designer")` agents **in a single message** (parallel execution): + +``` +Agent(subagent_type="Designer"): +"Mode: gap-analysis +Focus: completeness +Artifacts: + Planning question: {user's intent} + Exploration findings: {Phase 2 outputs} + Codebase context: {Phase 1 output} +Identify missing requirements, undefined error states, vague acceptance criteria." + +Agent(subagent_type="Designer"): +"Mode: gap-analysis +Focus: architecture +Artifacts: + Planning question: {user's intent} + Exploration findings: {Phase 2 outputs} + Codebase context: {Phase 1 output} +Identify pattern violations, missing integration points, layering issues." +``` + +## Phase 4: Synthesize + +Spawn `Agent(subagent_type="Synthesizer")` combining gap analysis and explore outputs: + +``` +Agent(subagent_type="Synthesizer"): +"Mode: design +Designer outputs: {Phase 3 designer outputs} +Combine gap findings with exploration context into blocking vs. should-address categorization." +``` + +## Phase 5: Plan -Spawn `Agent(subagent_type="Plan")` with combined Skimmer + Explore findings: +Spawn `Agent(subagent_type="Plan")` with all findings: - Design implementation approach with file-level specificity -- Reference existing patterns discovered in Phase 1-2 +- Reference existing patterns discovered in Phases 1-2 - Include: architecture decisions, file changes, new files needed, test strategy -- Flag any areas where existing patterns conflict with the proposed approach +- Integrate gap mitigations from Phase 4 into the relevant steps +- Flag areas where existing patterns conflict with the proposed approach -## Phase 4: Validate +## Phase 6: Design Review Lite -Main session reviews the plan for: +Main session reviews the plan inline using the loaded `devflow:design-review` skill: -- **Gaps**: Missing files, unhandled edge cases, integration points not addressed -- **Risks**: Areas where the plan deviates from existing patterns, potential regressions -- **Ambiguities**: Design choices that need user input +- Check for N+1 query implications +- Check for god functions +- Check for missing parallelism +- Check for error handling gaps +- Check for missing caching +- Check for poor decomposition -Present plan to user with identified risks. Use AskUserQuestion for any ambiguous design choices. +Note findings directly in the plan presentation. This is inline review — no agent spawn needed. + +## Phase 7: Present + +Present plan to user with: +- Implementation approach (file-level) +- Gap analysis findings (from Phase 4 synthesis) +- Design review notes (from Phase 6 inline check) +- Risk areas + +Use AskUserQuestion for any ambiguous design choices that need user input before proceeding to IMPLEMENT. + +## Phase 8: Persist + +If the plan is substantial (>10 implementation steps or HIGH/CRITICAL context risk): +- Write to `.docs/design/{topic-slug}.{YYYY-MM-DD_HHMM}.md` with YAML frontmatter +- Note the artifact path in the output + +Otherwise: plan stays in conversation context, ready for IMPLEMENT to consume directly. + +--- ## Output Structured plan ready to feed into IMPLEMENT/ORCHESTRATED if user proceeds: - Goal and scope +- Gap analysis findings (blocking vs. should-address) - Architecture decisions with rationale - File-level change list (create/modify/delete) - Test strategy +- Design review notes (anti-patterns checked, any concerns) - Risks and mitigations - Open questions (if any) +- Design artifact path (if written to disk) diff --git a/shared/skills/router/SKILL.md b/shared/skills/router/SKILL.md index 86300862..b8a1011d 100644 --- a/shared/skills/router/SKILL.md +++ b/shared/skills/router/SKILL.md @@ -20,7 +20,7 @@ ORCHESTRATED: follow the loaded orchestration skill's pipeline. | IMPLEMENT | devflow:test-driven-development, devflow:patterns, devflow:research | | EXPLORE | — | | DEBUG | devflow:test-driven-development, devflow:software-design, devflow:testing | -| PLAN | devflow:test-driven-development, devflow:patterns, devflow:software-design, devflow:security | +| PLAN | devflow:test-driven-development, devflow:patterns, devflow:software-design, devflow:security, devflow:design-review | | REVIEW | devflow:quality-gates, devflow:software-design | ## ORCHESTRATED @@ -30,7 +30,7 @@ ORCHESTRATED: follow the loaded orchestration skill's pipeline. | IMPLEMENT | devflow:implement:orch, devflow:patterns | | EXPLORE | devflow:explore:orch | | DEBUG | devflow:debug:orch | -| PLAN | devflow:plan:orch, devflow:patterns, devflow:software-design, devflow:security | +| PLAN | devflow:plan:orch, devflow:patterns, devflow:software-design, devflow:security, devflow:design-review | | REVIEW | devflow:review:orch | | RESOLVE | devflow:resolve:orch | | PIPELINE | devflow:pipeline:orch, devflow:patterns | diff --git a/src/cli/commands/init.ts b/src/cli/commands/init.ts index 5bf56379..9713cd8c 100644 --- a/src/cli/commands/init.ts +++ b/src/cli/commands/init.ts @@ -301,8 +301,8 @@ export const initCommand = new Command('init') } else if (process.stdin.isTTY) { // Short hints to prevent overflow in multiselect — full descriptions live in plugins.ts const pluginHints: Record = { - 'devflow-specify': 'feature specs → GitHub issues', - 'devflow-implement': 'explore, plan, code, review', + 'devflow-plan': 'gap analysis, design review', + 'devflow-implement': 'code, validate, self-review, PR', 'devflow-code-review': 'parallel specialized reviewers', 'devflow-resolve': 'fix review issues by risk', 'devflow-debug': 'competing hypotheses', diff --git a/src/cli/plugins.ts b/src/cli/plugins.ts index e4318729..bf56826f 100644 --- a/src/cli/plugins.ts +++ b/src/cli/plugins.ts @@ -50,17 +50,17 @@ export const DEVFLOW_PLUGINS: PluginDefinition[] = [ skills: ['software-design', 'docs-framework', 'git', 'boundary-validation', 'research', 'test-driven-development', 'testing'], }, { - name: 'devflow-specify', - description: 'Interactive feature specification - creates well-defined GitHub issues', - commands: ['/specify'], - agents: ['skimmer', 'synthesizer'], - skills: ['agent-teams'], + name: 'devflow-plan', + description: 'Unified design planning with gap analysis and design review', + commands: ['/plan'], + agents: ['git', 'skimmer', 'synthesizer', 'designer'], + skills: ['agent-teams', 'gap-analysis', 'design-review', 'patterns', 'knowledge-persistence', 'worktree-support'], }, { name: 'devflow-implement', - description: 'Complete task implementation workflow with exploration, planning, and coding', + description: 'Complete task implementation workflow - accepts plan documents, issues, or task descriptions', commands: ['/implement'], - agents: ['git', 'skimmer', 'synthesizer', 'coder', 'simplifier', 'scrutinizer', 'evaluator', 'tester', 'validator'], + agents: ['git', 'coder', 'simplifier', 'scrutinizer', 'evaluator', 'tester', 'validator'], skills: ['agent-teams', 'patterns', 'knowledge-persistence', 'qa', 'quality-gates', 'worktree-support'], }, { @@ -95,7 +95,7 @@ export const DEVFLOW_PLUGINS: PluginDefinition[] = [ name: 'devflow-ambient', description: 'Ambient mode — intent classification with proportional agent orchestration', commands: ['/ambient'], - agents: ['coder', 'validator', 'simplifier', 'scrutinizer', 'evaluator', 'tester', 'skimmer', 'reviewer', 'git', 'synthesizer', 'resolver'], + agents: ['coder', 'validator', 'simplifier', 'scrutinizer', 'evaluator', 'tester', 'skimmer', 'reviewer', 'git', 'synthesizer', 'resolver', 'designer'], skills: [ 'router', 'implement:orch', @@ -120,6 +120,8 @@ export const DEVFLOW_PLUGINS: PluginDefinition[] = [ 'knowledge-persistence', 'qa', 'worktree-support', + 'gap-analysis', + 'design-review', ], }, { @@ -202,6 +204,7 @@ export const DEVFLOW_PLUGINS: PluginDefinition[] = [ */ export const LEGACY_PLUGIN_NAMES: Record = { 'devflow-frontend-design': 'devflow-ui-design', + 'devflow-specify': 'devflow-plan', }; /** @@ -210,6 +213,8 @@ export const LEGACY_PLUGIN_NAMES: Record = { */ export const LEGACY_COMMAND_NAMES: string[] = [ 'review', + 'specify', + 'specify-teams', ]; /** @@ -223,6 +228,9 @@ export const LEGACY_AGENT_NAMES: string[] = [ /** * Deprecated skill names from old installations (prefixed with devflow-). * Used during uninstall to clean up legacy installs. + * + * Pruning: entries can be removed after 2 major versions. + * Users who skip major versions should run uninstall + reinstall. */ export const LEGACY_SKILL_NAMES: string[] = [ 'devflow-core-patterns', @@ -381,6 +389,9 @@ export const LEGACY_SKILL_NAMES: string[] = [ 'pipeline:orch', // v2.0.0 quality-gates: bare name for pre-namespace installs 'quality-gates', + // v2.x plan plugin: new skills bare names for pre-namespace installs + 'gap-analysis', + 'design-review', ]; /** diff --git a/tests/integration/ambient-activation.test.ts b/tests/integration/ambient-activation.test.ts index a14e6d98..ccd56a12 100644 --- a/tests/integration/ambient-activation.test.ts +++ b/tests/integration/ambient-activation.test.ts @@ -99,7 +99,7 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => { }); it('PLAN/GUIDED — loads router and planning skills', async () => { - const expected = ['test-driven-development', 'patterns', 'software-design', 'security']; + const expected = ['test-driven-development', 'patterns', 'software-design', 'security', 'design-review']; const { result, passed, attempts, model } = await runClaudeStreamingWithRetry( 'how should we design a caching layer for API responses?', (r) => hasRequiredSkills(r, ['router']), @@ -192,7 +192,7 @@ describe.skipIf(!isClaudeAvailable())('devflow classification', () => { }); it('PLAN/ORCHESTRATED — loads plan:orch, patterns', async () => { - const required = ['plan:orch', 'patterns']; + const required = ['plan:orch', 'patterns', 'design-review']; const { result, passed, attempts, model } = await runClaudeStreamingWithRetry( 'design the architecture for a multi-service notification system with email, SMS, and push channels that supports user preferences and delivery guarantees', (r) => hasSkillInvocations(r) && hasRequiredSkills(r, required), diff --git a/tests/plugins.test.ts b/tests/plugins.test.ts index f83a6dcb..bdfb33dc 100644 --- a/tests/plugins.test.ts +++ b/tests/plugins.test.ts @@ -49,11 +49,11 @@ describe('buildAssetMaps', () => { // 'accessibility' first appears in devflow-accessibility (optional plugin) expect(skillsMap.get('accessibility')).toBe('devflow-accessibility'); - // 'git' first appears in devflow-implement - expect(agentsMap.get('git')).toBe('devflow-implement'); + // 'git' first appears in devflow-plan (inserted before devflow-implement) + expect(agentsMap.get('git')).toBe('devflow-plan'); - // 'synthesizer' first appears in devflow-specify - expect(agentsMap.get('synthesizer')).toBe('devflow-specify'); + // 'synthesizer' first appears in devflow-plan + expect(agentsMap.get('synthesizer')).toBe('devflow-plan'); }); it('returns empty maps for empty input', () => { diff --git a/tests/skill-references.test.ts b/tests/skill-references.test.ts index 47537ec1..c53956b5 100644 --- a/tests/skill-references.test.ts +++ b/tests/skill-references.test.ts @@ -131,7 +131,6 @@ const COMMAND_REFS = new Set([ 'resolve', 'debug', 'implement', - 'specify', 'self-review', 'audit-claude', 'plan',