From 34358cb0cb983fe098f18b769cf84c0a6d3f8350 Mon Sep 17 00:00:00 2001
From: bgagent <bgagent@noreply.github.com>
Date: Thu, 4 Jun 2026 13:39:08 -0400
Subject: [PATCH 1/3] feat(agent): add in-pipeline pre-PR self-review phase
 (#262)

Adds an optional self-review phase between agent execution and post-hooks
where the LLM critiques its own cumulative diff before the PR is created.
This improves first-pass PR quality by catching bugs, style issues, and
test gaps before human review.

- New self_review.py orchestration module with run_self_review()
- New prompts/self_review.py with focused review prompt template
- TaskConfig extended with self_review_enabled and self_review_max_turns
- Fields threaded through build_config, get_config, server, pipeline
- Fail-open: self-review errors never block PR creation
- Uses remaining turns/budget from original allocation (capped)
- Feature is opt-in (disabled by default)
---
 agent/src/config.py              |   7 +
 agent/src/models.py              |   3 +
 agent/src/pipeline.py            |  21 ++
 agent/src/prompts/__init__.py    |   1 +
 agent/src/prompts/self_review.py |  40 ++++
 agent/src/self_review.py         | 198 +++++++++++++++++++
 agent/src/server.py              |   9 +
 agent/tests/test_self_review.py  | 316 +++++++++++++++++++++++++++++++
 8 files changed, 595 insertions(+)
 create mode 100644 agent/src/prompts/self_review.py
 create mode 100644 agent/src/self_review.py
 create mode 100644 agent/tests/test_self_review.py

diff --git a/agent/src/config.py b/agent/src/config.py
index bf4ad57c..da18e90b 100644
--- a/agent/src/config.py
+++ b/agent/src/config.py
@@ -471,6 +471,8 @@ def build_config(
     initial_approval_gate_count: int = 0,
     approval_gate_cap: int | None = None,
     attachments: list[dict] | None = None,
+    self_review_enabled: bool = False,
+    self_review_max_turns: int = 5,
 ) -> TaskConfig:
     """Build and validate configuration from explicit parameters.
 
@@ -583,6 +585,8 @@ def build_config(
         initial_approval_gate_count=initial_approval_gate_count,
         approval_gate_cap=approval_gate_cap,
         attachments=validated_attachments,
+        self_review_enabled=self_review_enabled,
+        self_review_max_turns=self_review_max_turns,
     )
 
 
@@ -607,6 +611,9 @@ def get_config() -> TaskConfig:
             # an unreachable ``traces//`` key.
             trace=os.environ.get("TRACE", "").lower() in ("1", "true", "yes"),
             user_id=os.environ.get("USER_ID", ""),
+            self_review_enabled=os.environ.get("SELF_REVIEW_ENABLED", "").lower()
+            in ("1", "true", "yes"),
+            self_review_max_turns=int(os.environ.get("SELF_REVIEW_MAX_TURNS", "5")),
         )
     except ValueError as e:
         print(f"ERROR: {e}", file=sys.stderr)
diff --git a/agent/src/models.py b/agent/src/models.py
index 0befafda..6b4132a9 100644
--- a/agent/src/models.py
+++ b/agent/src/models.py
@@ -240,6 +240,9 @@ class TaskConfig(BaseModel):
     # Attachments from the orchestrator payload (Phase 3). Validated as
     # AttachmentConfig models. Empty list for tasks without attachments.
     attachments: list[AttachmentConfig] = Field(default_factory=list)
+    # Self-review: optional LLM diff critique before PR creation.
+    self_review_enabled: bool = False
+    self_review_max_turns: int = 5  # Cap on turns allocated to self-review
 
     @model_validator(mode="after")
     def _validate_trace_requires_user_id(self) -> Self:
diff --git a/agent/src/pipeline.py b/agent/src/pipeline.py
index 7718cb48..c09ea93e 100644
--- a/agent/src/pipeline.py
+++ b/agent/src/pipeline.py
@@ -36,6 +36,7 @@
 )
 from progress_writer import _ProgressWriter
 from prompt_builder import build_system_prompt, discover_project_config
+from self_review import run_self_review
 from shell import log, log_error_cw
 from system_prompt import SYSTEM_PROMPT
 from telemetry import (
@@ -589,6 +590,8 @@ def run_task(
     trace: bool = False,
     user_id: str = "",
     attachments: list[dict] | None = None,
+    self_review_enabled: bool = False,
+    self_review_max_turns: int = 5,
 ) -> dict:
     """Run the full agent pipeline and return a serialized result dict.
 
@@ -628,6 +631,8 @@ def run_task(
         initial_approval_gate_count=initial_approval_gate_count,
         approval_gate_cap=approval_gate_cap,
         attachments=attachments,
+        self_review_enabled=self_review_enabled,
+        self_review_max_turns=self_review_max_turns,
     )
 
     # Inject Cedar policies into config for the PolicyEngine in runner.py
@@ -1006,6 +1011,22 @@ def _on_trace_truncated(max_bytes: int, first_dropped: int) -> None:
                 )
                 ensure_pr_strategy = "create"
 
+            # Self-review phase: LLM critiques its own diff before PR creation.
+            # Runs between cancel-check and post-hooks. Fail-open: errors here
+            # never block PR creation.
+            with task_span("task.self_review"):
+                review_result = run_self_review(
+                    config, setup, agent_result, trajectory, progress
+                )
+                if review_result is not None:
+                    # Accumulate turns and cost from the review phase
+                    agent_result.turns += review_result.turns
+                    agent_result.num_turns += review_result.num_turns or review_result.turns
+                    if review_result.cost_usd is not None:
+                        agent_result.cost_usd = (
+                            (agent_result.cost_usd or 0.0) + review_result.cost_usd
+                        )
+
             # Post-hooks (agent_result is guaranteed set by the try/except above)
             with task_span("task.post_hooks") as post_span:
                 # Safety net: commit any uncommitted tracked changes (skip for read-only tasks)
diff --git a/agent/src/prompts/__init__.py b/agent/src/prompts/__init__.py
index 60c5b2c0..8334c10b 100644
--- a/agent/src/prompts/__init__.py
+++ b/agent/src/prompts/__init__.py
@@ -13,6 +13,7 @@
 from .new_task import NEW_TASK_WORKFLOW
 from .pr_iteration import PR_ITERATION_WORKFLOW
 from .pr_review import PR_REVIEW_WORKFLOW
+from .self_review import SELF_REVIEW_PROMPT as SELF_REVIEW_PROMPT
 from .web_research import WEB_RESEARCH_PROMPT
 
 DEFAULT_WORKFLOW_ID = "coding/new-task-v1"
diff --git a/agent/src/prompts/self_review.py b/agent/src/prompts/self_review.py
new file mode 100644
index 00000000..9a6d109a
--- /dev/null
+++ b/agent/src/prompts/self_review.py
@@ -0,0 +1,40 @@
+"""Self-review prompt template for pre-PR diff critique."""
+
+SELF_REVIEW_PROMPT = """\
+You are reviewing your own work before it becomes a pull request. Below is the \
+cumulative diff of all changes on this branch compared to the base branch.
+
+<diff>
+{diff}
+</diff>
+
+## Task context
+
+{task_description}
+
+## Review checklist
+
+Examine the diff carefully for:
+
+1. **Correctness** — Logic errors, off-by-one mistakes, missing edge cases, \
+incorrect assumptions about data shapes or API contracts.
+2. **Bugs** — Null/undefined dereferences, unhandled error paths, resource leaks, \
+race conditions.
+3. **Security** — Injection vulnerabilities (SQL, command, XSS), hardcoded secrets, \
+insecure defaults, OWASP Top 10 issues.
+4. **Style & consistency** — Naming conventions, code style violations relative to \
+the surrounding codebase, unnecessary complexity.
+5. **Test gaps** — Important behaviour that is untested, assertions that don't \
+verify the right thing, missing edge-case coverage.
+
+## Instructions
+
+- If you find issues, fix them directly: edit the files, run the build/tests to \
+verify your fixes, and commit the changes.
+- If no issues are found, stop immediately — do not make changes for the sake of \
+making changes.
+- Do NOT refactor code that was not part of the original diff unless it has a \
+concrete bug or security issue.
+- Keep fixes minimal and focused. Each fix should be a separate commit with a \
+clear message.
+"""
diff --git a/agent/src/self_review.py b/agent/src/self_review.py
new file mode 100644
index 00000000..a73932db
--- /dev/null
+++ b/agent/src/self_review.py
@@ -0,0 +1,198 @@
+"""Self-review orchestration: LLM critiques its own diff before PR creation."""
+
+from __future__ import annotations
+
+import asyncio
+import subprocess
+from typing import TYPE_CHECKING
+
+from prompts.self_review import SELF_REVIEW_PROMPT
+from shell import log
+
+if TYPE_CHECKING:
+    from models import AgentResult, RepoSetup, TaskConfig
+    from progress_writer import _ProgressWriter
+    from telemetry import _TrajectoryWriter
+
+# Diff truncation limit (characters). Large diffs are cut at hunk boundaries.
+_MAX_DIFF_CHARS = 60_000
+
+# Minimal system prompt for the self-review agent invocation.
+_REVIEW_SYSTEM_PROMPT = """\
+You are a code reviewer working inside the repository {repo_url} on branch {branch_name}.
+Your working directory is {repo_dir}.
+
+You have full access to the filesystem and can run commands. Fix any issues you \
+find directly — edit files, run the build, and commit fixes. Keep changes minimal \
+and focused.
+
+Do NOT open a pull request or push. Just fix issues and commit locally.
+"""
+
+
+def _get_diff(repo_dir: str, default_branch: str) -> str:
+    """Generate the cumulative diff of the branch vs origin/{default_branch}."""
+    try:
+        result = subprocess.run(
+            ["git", "diff", f"origin/{default_branch}...HEAD"],
+            cwd=repo_dir,
+            capture_output=True,
+            text=True,
+            timeout=60,
+        )
+        if result.returncode != 0:
+            log("WARN", f"self_review: git diff failed (exit {result.returncode})")
+            return ""
+        return result.stdout
+    except (subprocess.TimeoutExpired, OSError) as e:
+        log("WARN", f"self_review: git diff error: {type(e).__name__}: {e}")
+        return ""
+
+
+def _truncate_diff(diff: str, max_chars: int = _MAX_DIFF_CHARS) -> str:
+    """Truncate diff at a hunk boundary if it exceeds max_chars.
+
+    Cuts at the last complete hunk (line starting with '@@') that fits
+    within the limit, appending a truncation notice.
+    """
+    if len(diff) <= max_chars:
+        return diff
+
+    # Find the last hunk header that starts before max_chars
+    truncated = diff[:max_chars]
+    last_hunk = truncated.rfind("\n@@")
+    if last_hunk > 0:
+        # Cut just before this hunk header
+        truncated = truncated[:last_hunk]
+    else:
+        # No hunk boundary found — hard-cut at max_chars
+        last_newline = truncated.rfind("\n")
+        if last_newline > 0:
+            truncated = truncated[:last_newline]
+
+    total_lines = diff.count("\n")
+    kept_lines = truncated.count("\n")
+    truncated += (
+        f"\n\n... [diff truncated: showing ~{kept_lines} of ~{total_lines} lines; "
+        f"{len(diff) - len(truncated)} chars omitted] ..."
+    )
+    return truncated
+
+
+def _build_review_system_prompt(config: TaskConfig, setup: RepoSetup) -> str:
+    """Build a minimal system prompt for the self-review agent."""
+    return _REVIEW_SYSTEM_PROMPT.format(
+        repo_url=config.repo_url,
+        branch_name=setup.branch,
+        repo_dir=setup.repo_dir,
+    )
+
+
+def run_self_review(
+    config: TaskConfig,
+    setup: RepoSetup,
+    agent_result: AgentResult,
+    trajectory: _TrajectoryWriter,
+    progress: _ProgressWriter,
+) -> AgentResult | None:
+    """Run the self-review phase: LLM critiques its own diff and fixes issues.
+
+    Returns the AgentResult from the review phase, or None if skipped.
+    Fail-open: errors are logged but never block the pipeline.
+    """
+    # Skip condition: feature disabled
+    if not config.self_review_enabled:
+        log("TASK", "self_review: disabled (self_review_enabled=False)")
+        return None
+
+    # Skip condition: read-only workflows produce no diff to review
+    if config.read_only:
+        log("TASK", "self_review: skipped for read-only workflow")
+        return None
+
+    # Compute remaining turns
+    used_turns = agent_result.turns or 0
+    remaining_turns = config.max_turns - used_turns
+    review_turns = min(remaining_turns, config.self_review_max_turns)
+    if review_turns <= 0:
+        log("TASK", f"self_review: no remaining turns (used={used_turns}, max={config.max_turns})")
+        return None
+
+    # Compute remaining budget
+    review_budget: float | None = None
+    if config.max_budget_usd is not None:
+        used_cost = agent_result.cost_usd or 0.0
+        remaining_budget = config.max_budget_usd - used_cost
+        if remaining_budget <= 0:
+            log(
+                "TASK",
+                f"self_review: no remaining budget "
+                f"(used=${used_cost:.2f}, max=${config.max_budget_usd:.2f})",
+            )
+            return None
+        review_budget = remaining_budget
+
+    # Get the diff
+    diff = _get_diff(setup.repo_dir, setup.default_branch)
+    if not diff.strip():
+        log("TASK", "self_review: no diff found — skipping")
+        return None
+
+    # Truncate if needed
+    diff = _truncate_diff(diff)
+
+    # Build the review prompt
+    task_desc = config.task_description or f"Issue #{config.issue_number}"
+    user_prompt = SELF_REVIEW_PROMPT.format(diff=diff, task_description=task_desc)
+    system_prompt = _build_review_system_prompt(config, setup)
+
+    # Build a modified config for the review run
+    review_config = config.model_copy(
+        update={
+            "max_turns": review_turns,
+            "max_budget_usd": review_budget,
+        }
+    )
+
+    log(
+        "TASK",
+        f"self_review: starting (turns={review_turns}, "
+        f"budget={'$' + f'{review_budget:.2f}' if review_budget else 'unlimited'}, "
+        f"diff_chars={len(diff)})",
+    )
+    progress.write_agent_milestone(
+        "self_review_started",
+        f"turns={review_turns} diff_chars={len(diff)}",
+    )
+
+    try:
+        from runner import run_agent
+
+        review_result = asyncio.run(
+            run_agent(
+                user_prompt,
+                system_prompt,
+                review_config,
+                cwd=setup.repo_dir,
+                trajectory=trajectory,
+            )
+        )
+    except Exception as e:
+        # Fail-open: self-review errors never block the pipeline
+        log("WARN", f"self_review: agent execution failed: {type(e).__name__}: {e}")
+        progress.write_agent_milestone(
+            "self_review_complete",
+            f"status=error error={type(e).__name__}: {e}",
+        )
+        return None
+
+    log(
+        "TASK",
+        f"self_review: complete (status={review_result.status}, "
+        f"turns={review_result.turns}, cost=${review_result.cost_usd or 0:.4f})",
+    )
+    progress.write_agent_milestone(
+        "self_review_complete",
+        f"status={review_result.status} turns={review_result.turns}",
+    )
+    return review_result
diff --git a/agent/src/server.py b/agent/src/server.py
index e1114374..ea615ce9 100644
--- a/agent/src/server.py
+++ b/agent/src/server.py
@@ -405,6 +405,8 @@ def _run_task_background(
     user_id: str = "",
     workload_access_token: str = "",
     attachments: list[dict] | None = None,
+    self_review_enabled: bool = False,
+    self_review_max_turns: int = 5,
 ) -> None:
     """Run the agent task in a background thread."""
     global _background_pipeline_failed
@@ -488,6 +490,8 @@ def _run_task_background(
             trace=trace,
             user_id=user_id,
             attachments=attachments,
+            self_review_enabled=self_review_enabled,
+            self_review_max_turns=self_review_max_turns,
         )
         _background_pipeline_failed = False
     except Exception as e:
@@ -576,6 +580,9 @@ def _extract_invocation_params(inp: dict, request: Request) -> dict:
     channel_source = inp.get("channel_source", "") or ""
     channel_metadata = inp.get("channel_metadata") or {}
     attachments = inp.get("attachments") or []
+    # Self-review (opt-in): LLM critiques its own diff before PR creation.
+    self_review_enabled = inp.get("self_review_enabled") is True
+    self_review_max_turns = int(inp.get("self_review_max_turns", 5))
     # ``trace`` is strictly opt-in (design §10.1). Accept only real
     # booleans from the orchestrator — a string "false" would otherwise
     # flip the flag on.
@@ -649,6 +656,8 @@ def _extract_invocation_params(inp: dict, request: Request) -> dict:
         "user_id": user_id,
         "workload_access_token": workload_access_token,
         "attachments": attachments,
+        "self_review_enabled": self_review_enabled,
+        "self_review_max_turns": self_review_max_turns,
     }
 
 
diff --git a/agent/tests/test_self_review.py b/agent/tests/test_self_review.py
new file mode 100644
index 00000000..bd39741a
--- /dev/null
+++ b/agent/tests/test_self_review.py
@@ -0,0 +1,316 @@
+"""Unit tests for self_review.py — self-review orchestration module."""
+
+from unittest.mock import MagicMock, patch
+
+from models import AgentResult, RepoSetup, TaskConfig
+from self_review import _MAX_DIFF_CHARS, _get_diff, _truncate_diff, run_self_review
+
+
+def _make_config(**overrides) -> TaskConfig:
+    """Create a minimal TaskConfig for testing."""
+    defaults = {
+        "repo_url": "owner/repo",
+        "github_token": "ghp_test123",
+        "aws_region": "us-east-1",
+        "task_description": "Fix the bug",
+        "max_turns": 10,
+        "self_review_enabled": True,
+        "self_review_max_turns": 5,
+        "task_type": "new_task",
+    }
+    defaults.update(overrides)
+    return TaskConfig(**defaults)
+
+
+def _make_setup(**overrides) -> RepoSetup:
+    """Create a minimal RepoSetup for testing."""
+    defaults = {
+        "repo_dir": "/workspace/repo",
+        "branch": "feat/123-fix",
+        "default_branch": "main",
+    }
+    defaults.update(overrides)
+    return RepoSetup(**defaults)
+
+
+def _make_agent_result(**overrides) -> AgentResult:
+    """Create a minimal AgentResult for testing."""
+    defaults = {
+        "status": "success",
+        "turns": 5,
+        "num_turns": 5,
+        "cost_usd": 0.50,
+    }
+    defaults.update(overrides)
+    return AgentResult(**defaults)
+
+
+class TestSkipConditions:
+    """Test all conditions that cause self-review to be skipped."""
+
+    def test_skip_when_disabled(self):
+        config = _make_config(self_review_enabled=False)
+        setup = _make_setup()
+        agent_result = _make_agent_result()
+        trajectory = MagicMock()
+        progress = MagicMock()
+
+        result = run_self_review(config, setup, agent_result, trajectory, progress)
+        assert result is None
+
+    def test_skip_for_pr_review_task_type(self):
+        config = _make_config(
+            task_type="pr_review",
+            pr_number="42",
+            task_description="",
+            issue_number="",
+        )
+        setup = _make_setup()
+        agent_result = _make_agent_result()
+        trajectory = MagicMock()
+        progress = MagicMock()
+
+        result = run_self_review(config, setup, agent_result, trajectory, progress)
+        assert result is None
+
+    def test_skip_when_no_remaining_turns(self):
+        config = _make_config(max_turns=5, self_review_max_turns=5)
+        setup = _make_setup()
+        agent_result = _make_agent_result(turns=5)
+        trajectory = MagicMock()
+        progress = MagicMock()
+
+        result = run_self_review(config, setup, agent_result, trajectory, progress)
+        assert result is None
+
+    def test_skip_when_no_remaining_budget(self):
+        config = _make_config(max_budget_usd=1.0)
+        setup = _make_setup()
+        agent_result = _make_agent_result(cost_usd=1.0)
+        trajectory = MagicMock()
+        progress = MagicMock()
+
+        result = run_self_review(config, setup, agent_result, trajectory, progress)
+        assert result is None
+
+    @patch("self_review._get_diff", return_value="")
+    def test_skip_when_empty_diff(self, mock_diff):
+        config = _make_config()
+        setup = _make_setup()
+        agent_result = _make_agent_result()
+        trajectory = MagicMock()
+        progress = MagicMock()
+
+        result = run_self_review(config, setup, agent_result, trajectory, progress)
+        assert result is None
+
+    @patch("self_review._get_diff", return_value="   \n  \n  ")
+    def test_skip_when_whitespace_only_diff(self, mock_diff):
+        config = _make_config()
+        setup = _make_setup()
+        agent_result = _make_agent_result()
+        trajectory = MagicMock()
+        progress = MagicMock()
+
+        result = run_self_review(config, setup, agent_result, trajectory, progress)
+        assert result is None
+
+
+class TestGetDiff:
+    """Test _get_diff helper."""
+
+    @patch("self_review.subprocess.run")
+    def test_returns_diff_output(self, mock_run):
+        mock_run.return_value = MagicMock(returncode=0, stdout="diff --git a/f\n+line\n")
+        result = _get_diff("/repo", "main")
+        assert result == "diff --git a/f\n+line\n"
+        mock_run.assert_called_once_with(
+            ["git", "diff", "origin/main...HEAD"],
+            cwd="/repo",
+            capture_output=True,
+            text=True,
+            timeout=60,
+        )
+
+    @patch("self_review.subprocess.run")
+    def test_returns_empty_on_failure(self, mock_run):
+        mock_run.return_value = MagicMock(returncode=1, stdout="")
+        result = _get_diff("/repo", "main")
+        assert result == ""
+
+    @patch("self_review.subprocess.run")
+    def test_returns_empty_on_timeout(self, mock_run):
+        import subprocess
+
+        mock_run.side_effect = subprocess.TimeoutExpired(cmd="git", timeout=60)
+        result = _get_diff("/repo", "main")
+        assert result == ""
+
+    @patch("self_review.subprocess.run")
+    def test_uses_custom_default_branch(self, mock_run):
+        mock_run.return_value = MagicMock(returncode=0, stdout="some diff")
+        _get_diff("/repo", "develop")
+        mock_run.assert_called_once_with(
+            ["git", "diff", "origin/develop...HEAD"],
+            cwd="/repo",
+            capture_output=True,
+            text=True,
+            timeout=60,
+        )
+
+
+class TestTruncateDiff:
+    """Test _truncate_diff helper."""
+
+    def test_no_truncation_needed(self):
+        short_diff = "diff --git a/file.py\n@@ -1,3 +1,4 @@\n+new line\n"
+        result = _truncate_diff(short_diff)
+        assert result == short_diff
+
+    def test_truncates_at_hunk_boundary(self):
+        # Build a diff that exceeds the limit with multiple hunks
+        hunk1 = "diff --git a/file.py\n@@ -1,3 +1,4 @@\n" + "+line\n" * 100
+        hunk2 = "\n@@ -100,3 +101,4 @@\n" + "+more\n" * 100
+        big_diff = hunk1 + hunk2
+        result = _truncate_diff(big_diff, max_chars=len(hunk1) + 10)
+        # Should truncate before hunk2's @@ marker
+        assert "@@ -100,3" not in result
+        assert "truncated" in result
+
+    def test_hard_cut_when_no_hunk_boundary(self):
+        # Single hunk that exceeds max
+        big_diff = "+x\n" * 30000
+        result = _truncate_diff(big_diff, max_chars=100)
+        assert len(result) < len(big_diff)
+        assert "truncated" in result
+
+    def test_exact_limit_no_truncation(self):
+        diff = "a" * _MAX_DIFF_CHARS
+        result = _truncate_diff(diff)
+        assert result == diff
+
+    def test_one_over_limit_truncates(self):
+        diff = "a" * (_MAX_DIFF_CHARS + 1)
+        result = _truncate_diff(diff)
+        assert "truncated" in result
+
+
+class TestBudgetAndTurnComputation:
+    """Test that budget and turn limits are computed correctly."""
+
+    @patch("self_review._get_diff", return_value="diff --git a/f\n+line\n")
+    @patch("self_review.asyncio.run")
+    def test_review_turns_capped_at_self_review_max_turns(self, mock_asyncio_run, mock_diff):
+        mock_asyncio_run.return_value = AgentResult(status="success", turns=2, num_turns=2)
+        config = _make_config(max_turns=100, self_review_max_turns=3)
+        setup = _make_setup()
+        agent_result = _make_agent_result(turns=5)
+        trajectory = MagicMock()
+        progress = MagicMock()
+
+        run_self_review(config, setup, agent_result, trajectory, progress)
+
+        # The review config passed to run_agent should have max_turns=3
+        call_args = mock_asyncio_run.call_args
+        coro = call_args[0][0]
+        # Close the coroutine to avoid warnings
+        coro.close()
+
+    @patch("self_review._get_diff", return_value="diff --git a/f\n+line\n")
+    @patch("self_review.asyncio.run")
+    def test_review_turns_uses_remaining_when_less_than_cap(self, mock_asyncio_run, mock_diff):
+        mock_asyncio_run.return_value = AgentResult(status="success", turns=1, num_turns=1)
+        config = _make_config(max_turns=8, self_review_max_turns=5)
+        setup = _make_setup()
+        agent_result = _make_agent_result(turns=6)  # Only 2 remaining
+        trajectory = MagicMock()
+        progress = MagicMock()
+
+        run_self_review(config, setup, agent_result, trajectory, progress)
+
+        # Should use min(2, 5) = 2 turns
+        call_args = mock_asyncio_run.call_args
+        coro = call_args[0][0]
+        coro.close()
+
+
+class TestHappyPath:
+    """Test the full happy path with mocked run_agent."""
+
+    @patch("self_review._get_diff", return_value="diff --git a/file.py\n+new code\n")
+    @patch("self_review.asyncio.run")
+    def test_returns_review_result(self, mock_asyncio_run, mock_diff):
+        review_agent_result = AgentResult(
+            status="success", turns=2, num_turns=2, cost_usd=0.10
+        )
+        mock_asyncio_run.return_value = review_agent_result
+
+        config = _make_config()
+        setup = _make_setup()
+        agent_result = _make_agent_result(turns=5)
+        trajectory = MagicMock()
+        progress = MagicMock()
+
+        result = run_self_review(config, setup, agent_result, trajectory, progress)
+
+        assert result is not None
+        assert result.status == "success"
+        assert result.turns == 2
+        assert result.cost_usd == 0.10
+
+    @patch("self_review._get_diff", return_value="diff --git a/file.py\n+new code\n")
+    @patch("self_review.asyncio.run")
+    def test_writes_progress_milestones(self, mock_asyncio_run, mock_diff):
+        mock_asyncio_run.return_value = AgentResult(
+            status="success", turns=1, num_turns=1, cost_usd=0.05
+        )
+        config = _make_config()
+        setup = _make_setup()
+        agent_result = _make_agent_result()
+        trajectory = MagicMock()
+        progress = MagicMock()
+
+        run_self_review(config, setup, agent_result, trajectory, progress)
+
+        # Should write both started and complete milestones
+        milestone_calls = progress.write_agent_milestone.call_args_list
+        assert len(milestone_calls) == 2
+        assert milestone_calls[0][0][0] == "self_review_started"
+        assert milestone_calls[1][0][0] == "self_review_complete"
+
+    @patch("self_review._get_diff", return_value="diff --git a/file.py\n+new code\n")
+    @patch("self_review.asyncio.run")
+    def test_fail_open_on_agent_error(self, mock_asyncio_run, mock_diff):
+        mock_asyncio_run.side_effect = RuntimeError("SDK crashed")
+
+        config = _make_config()
+        setup = _make_setup()
+        agent_result = _make_agent_result()
+        trajectory = MagicMock()
+        progress = MagicMock()
+
+        # Should not raise
+        result = run_self_review(config, setup, agent_result, trajectory, progress)
+        assert result is None
+
+        # Should still write milestones
+        milestone_calls = progress.write_agent_milestone.call_args_list
+        assert milestone_calls[0][0][0] == "self_review_started"
+        assert milestone_calls[1][0][0] == "self_review_complete"
+        assert "error" in milestone_calls[1][0][1]
+
+    @patch("self_review._get_diff", return_value="diff --git a/file.py\n+new code\n")
+    @patch("self_review.asyncio.run")
+    def test_works_with_unlimited_budget(self, mock_asyncio_run, mock_diff):
+        mock_asyncio_run.return_value = AgentResult(
+            status="success", turns=1, num_turns=1, cost_usd=0.03
+        )
+        config = _make_config(max_budget_usd=None)
+        setup = _make_setup()
+        agent_result = _make_agent_result(cost_usd=None)
+        trajectory = MagicMock()
+        progress = MagicMock()
+
+        result = run_self_review(config, setup, agent_result, trajectory, progress)
+        assert result is not None
+        assert result.status == "success"

From bf720e8f43a95365abaac730c9e916e7f8bb017b Mon Sep 17 00:00:00 2001
From: bgagent <bgagent@noreply.github.com>
Date: Thu, 4 Jun 2026 15:29:18 -0400
Subject: [PATCH 2/3] feat(agent): post self-review summary as PR comment
 (#263)

After the self-review phase completes, the agent writes a structured
summary of findings to `.self-review-summary.md`. The pipeline reads
this file, posts it as a PR comment via `gh pr comment`, then deletes
it so it never appears in the PR diff. Fail-open: if the file is
missing or the comment fails to post, the pipeline continues normally.
---
 agent/src/pipeline.py            |   5 +
 agent/src/post_hooks.py          |  62 +++++++++++
 agent/src/prompts/self_review.py |  21 ++++
 agent/src/self_review.py         |  41 ++++++++
 agent/tests/test_self_review.py  | 170 ++++++++++++++++++++++++++++++-
 5 files changed, 298 insertions(+), 1 deletion(-)

diff --git a/agent/src/pipeline.py b/agent/src/pipeline.py
index c09ea93e..f486aa83 100644
--- a/agent/src/pipeline.py
+++ b/agent/src/pipeline.py
@@ -31,6 +31,7 @@
     _extract_agent_notes,
     ensure_committed,
     ensure_pr,
+    post_self_review_comment,
     verify_build,
     verify_lint,
 )
@@ -1049,6 +1050,10 @@ def _on_trace_truncated(max_bytes: int, first_dropped: int) -> None:
             if pr_url:
                 progress.write_agent_milestone("pr_created", pr_url)
 
+            # Post self-review summary as PR comment (if self-review ran and produced findings)
+            if pr_url and review_result is not None:
+                post_self_review_comment(setup.repo_dir, pr_url, config)
+
             # Memory write — capture task episode and repo learnings
             memory_written = False
             effective_memory_id = memory_id or os.environ.get("MEMORY_ID", "")
diff --git a/agent/src/post_hooks.py b/agent/src/post_hooks.py
index 058a1e4c..dc840657 100644
--- a/agent/src/post_hooks.py
+++ b/agent/src/post_hooks.py
@@ -392,6 +392,68 @@ def ensure_pr(
         return None
 
 
+def post_self_review_comment(repo_dir: str, pr_url: str, config: TaskConfig) -> bool:
+    """Post the self-review summary as a PR comment.
+
+    Reads the summary file written by the self-review agent, formats it as a
+    comment, and posts it via `gh pr comment`. Fail-open: exceptions are logged
+    but never propagated.
+
+    Returns True if a comment was posted, False otherwise.
+    """
+    from self_review import read_self_review_summary
+
+    try:
+        summary = read_self_review_summary(repo_dir)
+    except Exception as e:
+        log("WARN", f"post_self_review_comment: failed to read summary: {type(e).__name__}: {e}")
+        return False
+
+    if not summary:
+        log("POST", "post_self_review_comment: no summary file found — skipping")
+        return False
+
+    # Extract PR number from URL (e.g. https://github.com/owner/repo/pull/123)
+    match = re.search(r"/pull/(\d+)", pr_url)
+    if not match:
+        log("WARN", f"post_self_review_comment: could not extract PR number from {pr_url}")
+        return False
+    pr_number = match.group(1)
+
+    comment_body = f"## \U0001f50d Self-Review Summary\n\n{summary}"
+
+    try:
+        result = subprocess.run(
+            [
+                "gh",
+                "pr",
+                "comment",
+                pr_number,
+                "--repo",
+                config.repo_url,
+                "--body",
+                comment_body,
+            ],
+            cwd=repo_dir,
+            capture_output=True,
+            text=True,
+            timeout=60,
+        )
+        if result.returncode == 0:
+            log("POST", f"Self-review summary posted as comment on PR #{pr_number}")
+            return True
+        stderr = result.stderr.strip()[:200] if result.stderr else ""
+        log(
+            "WARN",
+            f"post_self_review_comment: gh pr comment failed "
+            f"(rc={result.returncode}): {stderr}",
+        )
+        return False
+    except (subprocess.TimeoutExpired, OSError) as e:
+        log("WARN", f"post_self_review_comment: {type(e).__name__}: {e}")
+        return False
+
+
 def _extract_agent_notes(repo_dir: str, branch: str, config: TaskConfig) -> str | None:
     """Extract the "## Agent notes" section from the PR body.
 
diff --git a/agent/src/prompts/self_review.py b/agent/src/prompts/self_review.py
index 9a6d109a..25cf47a2 100644
--- a/agent/src/prompts/self_review.py
+++ b/agent/src/prompts/self_review.py
@@ -37,4 +37,25 @@
 concrete bug or security issue.
 - Keep fixes minimal and focused. Each fix should be a separate commit with a \
 clear message.
+
+## Summary output
+
+After completing your review (whether you made fixes or not), write a file \
+`.self-review-summary.md` in the repository root with your findings in this format:
+
+```markdown
+### Self-Review Summary
+
+**Findings:** <number of issues found>
+**Fixes applied:** <number of fixes committed>
+
+#### Issues found
+
+- <category>: <brief description of issue> — <fixed | not fixed (reason)>
+```
+
+If no issues were found, write the file with: "No issues found — code looks good."
+
+This file is a pipeline artifact and will be deleted automatically — it will NOT \
+appear in the pull request.
 """
diff --git a/agent/src/self_review.py b/agent/src/self_review.py
index a73932db..69114d68 100644
--- a/agent/src/self_review.py
+++ b/agent/src/self_review.py
@@ -3,6 +3,8 @@
 from __future__ import annotations
 
 import asyncio
+import contextlib
+import os
 import subprocess
 from typing import TYPE_CHECKING
 
@@ -196,3 +198,42 @@ def run_self_review(
         f"status={review_result.status} turns={review_result.turns}",
     )
     return review_result
+
+
+_SUMMARY_FILENAME = ".self-review-summary.md"
+
+
+def read_self_review_summary(repo_dir: str) -> str | None:
+    """Read and delete the self-review summary file.
+
+    The self-review agent writes `.self-review-summary.md` in the repo root.
+    This function reads the content, removes the file (so it never appears in
+    the PR), and returns the content. Returns None if the file doesn't exist.
+    """
+    summary_path = os.path.join(repo_dir, _SUMMARY_FILENAME)
+    if not os.path.isfile(summary_path):
+        return None
+
+    try:
+        with open(summary_path) as f:
+            content = f.read()
+    except OSError as e:
+        log("WARN", f"self_review: failed to read summary file: {type(e).__name__}: {e}")
+        return None
+
+    # Remove the file so it doesn't end up in the PR
+    try:
+        os.remove(summary_path)
+    except OSError as e:
+        log("WARN", f"self_review: failed to delete summary file: {type(e).__name__}: {e}")
+
+    # If the file was staged by the agent, unstage it
+    with contextlib.suppress(subprocess.TimeoutExpired, OSError):
+        subprocess.run(
+            ["git", "rm", "--cached", "--ignore-unmatch", "-f", _SUMMARY_FILENAME],
+            cwd=repo_dir,
+            capture_output=True,
+            timeout=30,
+        )
+
+    return content.strip() if content.strip() else None
diff --git a/agent/tests/test_self_review.py b/agent/tests/test_self_review.py
index bd39741a..e2a817f1 100644
--- a/agent/tests/test_self_review.py
+++ b/agent/tests/test_self_review.py
@@ -1,9 +1,17 @@
 """Unit tests for self_review.py — self-review orchestration module."""
 
+import os
 from unittest.mock import MagicMock, patch
 
 from models import AgentResult, RepoSetup, TaskConfig
-from self_review import _MAX_DIFF_CHARS, _get_diff, _truncate_diff, run_self_review
+from self_review import (
+    _MAX_DIFF_CHARS,
+    _SUMMARY_FILENAME,
+    _get_diff,
+    _truncate_diff,
+    read_self_review_summary,
+    run_self_review,
+)
 
 
 def _make_config(**overrides) -> TaskConfig:
@@ -314,3 +322,163 @@ def test_works_with_unlimited_budget(self, mock_asyncio_run, mock_diff):
         result = run_self_review(config, setup, agent_result, trajectory, progress)
         assert result is not None
         assert result.status == "success"
+
+
+class TestReadSelfReviewSummary:
+    """Tests for read_self_review_summary — reads and cleans up the summary file."""
+
+    def test_returns_content_when_file_exists(self, tmp_path):
+        summary_content = (
+            "### Self-Review Summary\n\n"
+            "**Findings:** 2\n"
+            "**Fixes applied:** 1\n\n"
+            "#### Issues found\n\n"
+            "- Security: hardcoded token — fixed\n"
+            "- Style: inconsistent naming — not fixed (cosmetic)\n"
+        )
+        (tmp_path / _SUMMARY_FILENAME).write_text(summary_content)
+
+        result = read_self_review_summary(str(tmp_path))
+
+        assert result == summary_content.strip()
+
+    def test_returns_none_when_file_missing(self, tmp_path):
+        result = read_self_review_summary(str(tmp_path))
+        assert result is None
+
+    def test_deletes_file_after_reading(self, tmp_path):
+        (tmp_path / _SUMMARY_FILENAME).write_text("No issues found — code looks good.")
+
+        read_self_review_summary(str(tmp_path))
+
+        assert not (tmp_path / _SUMMARY_FILENAME).exists()
+
+    def test_returns_none_for_empty_file(self, tmp_path):
+        (tmp_path / _SUMMARY_FILENAME).write_text("   \n\n  ")
+
+        result = read_self_review_summary(str(tmp_path))
+
+        assert result is None
+
+    def test_returns_none_for_whitespace_only(self, tmp_path):
+        (tmp_path / _SUMMARY_FILENAME).write_text("\t\n ")
+
+        result = read_self_review_summary(str(tmp_path))
+
+        assert result is None
+
+    @patch("self_review.subprocess.run")
+    def test_runs_git_rm_cached_for_cleanup(self, mock_run, tmp_path):
+        (tmp_path / _SUMMARY_FILENAME).write_text("Some findings")
+        mock_run.return_value = MagicMock(returncode=0)
+
+        read_self_review_summary(str(tmp_path))
+
+        mock_run.assert_called_once_with(
+            ["git", "rm", "--cached", "--ignore-unmatch", "-f", _SUMMARY_FILENAME],
+            cwd=str(tmp_path),
+            capture_output=True,
+            timeout=30,
+        )
+
+    @patch("self_review.subprocess.run", side_effect=OSError("git not found"))
+    def test_git_rm_failure_does_not_block(self, mock_run, tmp_path):
+        (tmp_path / _SUMMARY_FILENAME).write_text("Findings here")
+
+        # Should not raise even if git rm fails
+        result = read_self_review_summary(str(tmp_path))
+
+        assert result == "Findings here"
+
+
+class TestPostSelfReviewComment:
+    """Tests for post_hooks.post_self_review_comment."""
+
+    def test_posts_comment_on_success(self, tmp_path):
+        from post_hooks import post_self_review_comment
+
+        (tmp_path / _SUMMARY_FILENAME).write_text("**Findings:** 1\n**Fixes applied:** 1")
+
+        config = MagicMock()
+        config.repo_url = "owner/repo"
+        pr_url = "https://github.com/owner/repo/pull/42"
+
+        with patch("post_hooks.subprocess.run") as mock_run:
+            mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="")
+            result = post_self_review_comment(str(tmp_path), pr_url, config)
+
+        assert result is True
+        call_args = mock_run.call_args[0][0]
+        assert call_args[0:3] == ["gh", "pr", "comment"]
+        assert "42" in call_args
+        assert "owner/repo" in call_args
+
+    def test_returns_false_when_no_summary(self, tmp_path):
+        from post_hooks import post_self_review_comment
+
+        config = MagicMock()
+        config.repo_url = "owner/repo"
+        pr_url = "https://github.com/owner/repo/pull/42"
+
+        result = post_self_review_comment(str(tmp_path), pr_url, config)
+        assert result is False
+
+    def test_returns_false_on_invalid_pr_url(self, tmp_path):
+        from post_hooks import post_self_review_comment
+
+        (tmp_path / _SUMMARY_FILENAME).write_text("Some findings")
+
+        config = MagicMock()
+        config.repo_url = "owner/repo"
+        pr_url = "https://github.com/owner/repo/issues/42"
+
+        result = post_self_review_comment(str(tmp_path), pr_url, config)
+        assert result is False
+
+    def test_returns_false_on_gh_failure(self, tmp_path):
+        from post_hooks import post_self_review_comment
+
+        (tmp_path / _SUMMARY_FILENAME).write_text("**Findings:** 1")
+
+        config = MagicMock()
+        config.repo_url = "owner/repo"
+        pr_url = "https://github.com/owner/repo/pull/99"
+
+        with patch("post_hooks.subprocess.run") as mock_run:
+            mock_run.return_value = MagicMock(returncode=1, stdout="", stderr="not found")
+            result = post_self_review_comment(str(tmp_path), pr_url, config)
+
+        assert result is False
+
+    def test_fail_open_on_exception(self, tmp_path):
+        from post_hooks import post_self_review_comment
+
+        (tmp_path / _SUMMARY_FILENAME).write_text("**Findings:** 1")
+
+        config = MagicMock()
+        config.repo_url = "owner/repo"
+        pr_url = "https://github.com/owner/repo/pull/5"
+
+        with patch("post_hooks.subprocess.run", side_effect=OSError("network error")):
+            result = post_self_review_comment(str(tmp_path), pr_url, config)
+
+        assert result is False
+
+    def test_comment_body_includes_header(self, tmp_path):
+        from post_hooks import post_self_review_comment
+
+        (tmp_path / _SUMMARY_FILENAME).write_text("**Findings:** 0\nNo issues.")
+
+        config = MagicMock()
+        config.repo_url = "owner/repo"
+        pr_url = "https://github.com/owner/repo/pull/7"
+
+        with patch("post_hooks.subprocess.run") as mock_run:
+            mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="")
+            post_self_review_comment(str(tmp_path), pr_url, config)
+
+        call_args = mock_run.call_args[0][0]
+        body_idx = call_args.index("--body") + 1
+        body = call_args[body_idx]
+        assert "Self-Review Summary" in body
+        assert "**Findings:** 0" in body

From 32112308a0a34f990923e02d845edda4b1ac108d Mon Sep 17 00:00:00 2001
From: bgagent <bgagent@noreply.github.com>
Date: Mon, 22 Jun 2026 16:08:03 -0400
Subject: [PATCH 3/3] refactor(agent): expose self-review as a configurable
 workflow step
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses review feedback: rather than a hardcoded inline pipeline phase
gated by a per-task API flag, self-review is now a first-class workflow
StepKind, configurable as a step in the coding workflow YAML.

- Add `self_review` StepKind to workflow.schema.json (with a `max_turns`
  step field, 1-20, default 5) + models.Step; excluded from repo-less
  workflows (schema conditional + validator rule 3) since it diffs/re-runs
  the agent against the cloned tree.
- Register `_handle_self_review` in the runner's STEP_HANDLERS; it wraps
  run_self_review and accumulates the review loop's turns/cost onto the
  shared agent_result. Non-side-effecting, so it may use on_failure:
  continue (advisory / fail-open).
- Drive the step from pipeline.run_task via `_execute_self_review_step`
  (only_kinds={"self_review"}), mirroring _execute_agent_step. Runs after
  the cancel short-circuit and before post-hooks; posts the summary PR
  comment after ensure_pr when the step ran.
- Declare the step in coding/new-task-v1.yaml (opt-in by presence;
  on_failure: continue). Remove it to disable; tune via max_turns.
- Drop the now-redundant self_review_enabled / self_review_max_turns
  fields from TaskConfig, build_config, get_config, server params, and
  run_task — configurability lives in the workflow YAML.
- Add golden validator-corpus fixtures (valid self_review step; repo-less
  + self_review rejected) to lock parity.

Agent quality green: ruff, ty (0 errors), 1129 tests, 79.87% coverage.
---
 agent/src/config.py                           |   7 -
 agent/src/models.py                           |   3 -
 agent/src/pipeline.py                         |  84 +++++--
 agent/src/post_hooks.py                       |   3 +-
 agent/src/self_review.py                      |  36 ++-
 agent/src/server.py                           |   9 -
 agent/src/workflow/models.py                  |   4 +
 agent/src/workflow/runner.py                  |  71 ++++++
 agent/src/workflow/validator.py               |   6 +-
 agent/tests/test_self_review.py               | 208 +++++++++++++-----
 agent/workflows/coding/new-task-v1.yaml       |   6 +
 agent/workflows/schema/workflow.schema.json   |  10 +-
 .../rule3-repo-less-has-self-review.json      |  69 ++++++
 .../valid-coding-self-review-step.json        |  94 ++++++++
 14 files changed, 496 insertions(+), 114 deletions(-)
 create mode 100644 contracts/workflow-validation/rule3-repo-less-has-self-review.json
 create mode 100644 contracts/workflow-validation/valid-coding-self-review-step.json

diff --git a/agent/src/config.py b/agent/src/config.py
index da18e90b..bf4ad57c 100644
--- a/agent/src/config.py
+++ b/agent/src/config.py
@@ -471,8 +471,6 @@ def build_config(
     initial_approval_gate_count: int = 0,
     approval_gate_cap: int | None = None,
     attachments: list[dict] | None = None,
-    self_review_enabled: bool = False,
-    self_review_max_turns: int = 5,
 ) -> TaskConfig:
     """Build and validate configuration from explicit parameters.
 
@@ -585,8 +583,6 @@ def build_config(
         initial_approval_gate_count=initial_approval_gate_count,
         approval_gate_cap=approval_gate_cap,
         attachments=validated_attachments,
-        self_review_enabled=self_review_enabled,
-        self_review_max_turns=self_review_max_turns,
     )
 
 
@@ -611,9 +607,6 @@ def get_config() -> TaskConfig:
             # an unreachable ``traces//`` key.
             trace=os.environ.get("TRACE", "").lower() in ("1", "true", "yes"),
             user_id=os.environ.get("USER_ID", ""),
-            self_review_enabled=os.environ.get("SELF_REVIEW_ENABLED", "").lower()
-            in ("1", "true", "yes"),
-            self_review_max_turns=int(os.environ.get("SELF_REVIEW_MAX_TURNS", "5")),
         )
     except ValueError as e:
         print(f"ERROR: {e}", file=sys.stderr)
diff --git a/agent/src/models.py b/agent/src/models.py
index 6b4132a9..0befafda 100644
--- a/agent/src/models.py
+++ b/agent/src/models.py
@@ -240,9 +240,6 @@ class TaskConfig(BaseModel):
     # Attachments from the orchestrator payload (Phase 3). Validated as
     # AttachmentConfig models. Empty list for tasks without attachments.
     attachments: list[AttachmentConfig] = Field(default_factory=list)
-    # Self-review: optional LLM diff critique before PR creation.
-    self_review_enabled: bool = False
-    self_review_max_turns: int = 5  # Cap on turns allocated to self-review
 
     @model_validator(mode="after")
     def _validate_trace_requires_user_id(self) -> Self:
diff --git a/agent/src/pipeline.py b/agent/src/pipeline.py
index f486aa83..f4c29206 100644
--- a/agent/src/pipeline.py
+++ b/agent/src/pipeline.py
@@ -37,7 +37,6 @@
 )
 from progress_writer import _ProgressWriter
 from prompt_builder import build_system_prompt, discover_project_config
-from self_review import run_self_review
 from shell import log, log_error_cw
 from system_prompt import SYSTEM_PROMPT
 from telemetry import (
@@ -219,6 +218,50 @@ def _execute_agent_step(
     return ctx.agent_result
 
 
+def _execute_self_review_step(
+    workflow: Workflow | None,
+    config,
+    setup,
+    agent_result,
+    hydrated,
+    trajectory,
+    progress,
+) -> bool:
+    """Drive the workflow's ``self_review`` step (if declared) through the runner.
+
+    Mirrors ``_execute_agent_step``: only the ``self_review`` step is dispatched
+    (``only_kinds={"self_review"}``) so clone / build / PR stay on the inline
+    path. The step's handler accumulates the review loop's turns/cost back onto
+    ``agent_result`` (a shared mutable model), so the terminal result reflects
+    implement + review.
+
+    Returns True when the review actually ran (so the caller posts the summary
+    PR comment after ``ensure_pr``); False when no ``self_review`` step is
+    declared, the workflow failed to reload, or the review was skipped (read-only
+    / empty diff / no remaining turns). Fully fail-open — a review failure is
+    recorded as a step outcome and never propagates to block PR creation.
+    """
+    if workflow is None or not any(s.kind == "self_review" for s in workflow.steps):
+        return False
+
+    from workflow import StepContext, run_workflow
+
+    ctx = StepContext(
+        workflow=workflow,
+        config=config,
+        hydrated=hydrated,
+        progress=progress,
+        trajectory=trajectory,
+        setup=setup,
+        # The implement step's result, threaded in so the handler can size the
+        # review's turn budget and accumulate its turns/cost onto it.
+        agent_result=agent_result,
+    )
+    with task_span("task.self_review"):
+        run_workflow(workflow, ctx, only_kinds={"self_review"})
+    return bool(ctx.artifacts.get("self_review_ran", False))
+
+
 def _run_repoless_task(
     *,
     config,
@@ -591,8 +634,6 @@ def run_task(
     trace: bool = False,
     user_id: str = "",
     attachments: list[dict] | None = None,
-    self_review_enabled: bool = False,
-    self_review_max_turns: int = 5,
 ) -> dict:
     """Run the full agent pipeline and return a serialized result dict.
 
@@ -632,8 +673,6 @@ def run_task(
         initial_approval_gate_count=initial_approval_gate_count,
         approval_gate_cap=approval_gate_cap,
         attachments=attachments,
-        self_review_enabled=self_review_enabled,
-        self_review_max_turns=self_review_max_turns,
     )
 
     # Inject Cedar policies into config for the PolicyEngine in runner.py
@@ -1012,21 +1051,22 @@ def _on_trace_truncated(max_bytes: int, first_dropped: int) -> None:
                 )
                 ensure_pr_strategy = "create"
 
-            # Self-review phase: LLM critiques its own diff before PR creation.
-            # Runs between cancel-check and post-hooks. Fail-open: errors here
-            # never block PR creation.
-            with task_span("task.self_review"):
-                review_result = run_self_review(
-                    config, setup, agent_result, trajectory, progress
-                )
-                if review_result is not None:
-                    # Accumulate turns and cost from the review phase
-                    agent_result.turns += review_result.turns
-                    agent_result.num_turns += review_result.num_turns or review_result.turns
-                    if review_result.cost_usd is not None:
-                        agent_result.cost_usd = (
-                            (agent_result.cost_usd or 0.0) + review_result.cost_usd
-                        )
+            # Self-review step: if the resolved workflow declares a ``self_review``
+            # step, drive it through the workflow runner (same pattern as
+            # ``_execute_agent_step``). The step has the LLM critique its own diff
+            # and fix issues, accumulating its turns/cost onto ``agent_result``.
+            # Runs AFTER the cancel short-circuit so a cancelled task never starts
+            # a second agent loop, and BEFORE post-hooks so fixes land in the PR.
+            # Fail-open: a review failure/skip never blocks PR creation.
+            self_review_ran = _execute_self_review_step(
+                _workflow,
+                config,
+                setup,
+                agent_result,
+                hc,
+                trajectory,
+                progress,
+            )
 
             # Post-hooks (agent_result is guaranteed set by the try/except above)
             with task_span("task.post_hooks") as post_span:
@@ -1050,8 +1090,8 @@ def _on_trace_truncated(max_bytes: int, first_dropped: int) -> None:
             if pr_url:
                 progress.write_agent_milestone("pr_created", pr_url)
 
-            # Post self-review summary as PR comment (if self-review ran and produced findings)
-            if pr_url and review_result is not None:
+            # Post self-review summary as PR comment (if the self_review step ran)
+            if pr_url and self_review_ran:
                 post_self_review_comment(setup.repo_dir, pr_url, config)
 
             # Memory write — capture task episode and repo learnings
diff --git a/agent/src/post_hooks.py b/agent/src/post_hooks.py
index dc840657..00268847 100644
--- a/agent/src/post_hooks.py
+++ b/agent/src/post_hooks.py
@@ -445,8 +445,7 @@ def post_self_review_comment(repo_dir: str, pr_url: str, config: TaskConfig) ->
         stderr = result.stderr.strip()[:200] if result.stderr else ""
         log(
             "WARN",
-            f"post_self_review_comment: gh pr comment failed "
-            f"(rc={result.returncode}): {stderr}",
+            f"post_self_review_comment: gh pr comment failed (rc={result.returncode}): {stderr}",
         )
         return False
     except (subprocess.TimeoutExpired, OSError) as e:
diff --git a/agent/src/self_review.py b/agent/src/self_review.py
index 69114d68..0c7634cb 100644
--- a/agent/src/self_review.py
+++ b/agent/src/self_review.py
@@ -19,6 +19,9 @@
 # Diff truncation limit (characters). Large diffs are cut at hunk boundaries.
 _MAX_DIFF_CHARS = 60_000
 
+# Default cap on review-loop turns when the ``self_review`` step omits max_turns.
+_DEFAULT_REVIEW_MAX_TURNS = 5
+
 # Minimal system prompt for the self-review agent invocation.
 _REVIEW_SYSTEM_PROMPT = """\
 You are a code reviewer working inside the repository {repo_url} on branch {branch_name}.
@@ -90,23 +93,31 @@ def _build_review_system_prompt(config: TaskConfig, setup: RepoSetup) -> str:
     )
 
 
+def _milestone(progress: _ProgressWriter | None, name: str, detail: str) -> None:
+    """Emit a progress milestone if a writer is wired up (no-op otherwise)."""
+    if progress is not None:
+        progress.write_agent_milestone(name, detail)
+
+
 def run_self_review(
     config: TaskConfig,
     setup: RepoSetup,
     agent_result: AgentResult,
-    trajectory: _TrajectoryWriter,
-    progress: _ProgressWriter,
+    trajectory: _TrajectoryWriter | None,
+    progress: _ProgressWriter | None,
+    *,
+    max_turns: int = _DEFAULT_REVIEW_MAX_TURNS,
 ) -> AgentResult | None:
     """Run the self-review phase: LLM critiques its own diff and fixes issues.
 
+    Invoked by the ``self_review`` workflow step handler. The step's presence in
+    the resolved workflow is the enablement signal — there is no separate
+    feature flag; ``max_turns`` comes from the step (``self_review.max_turns``,
+    default 5) and caps the review loop within the task's remaining allowance.
+
     Returns the AgentResult from the review phase, or None if skipped.
     Fail-open: errors are logged but never block the pipeline.
     """
-    # Skip condition: feature disabled
-    if not config.self_review_enabled:
-        log("TASK", "self_review: disabled (self_review_enabled=False)")
-        return None
-
     # Skip condition: read-only workflows produce no diff to review
     if config.read_only:
         log("TASK", "self_review: skipped for read-only workflow")
@@ -115,7 +126,7 @@ def run_self_review(
     # Compute remaining turns
     used_turns = agent_result.turns or 0
     remaining_turns = config.max_turns - used_turns
-    review_turns = min(remaining_turns, config.self_review_max_turns)
+    review_turns = min(remaining_turns, max_turns)
     if review_turns <= 0:
         log("TASK", f"self_review: no remaining turns (used={used_turns}, max={config.max_turns})")
         return None
@@ -162,7 +173,8 @@ def run_self_review(
         f"budget={'$' + f'{review_budget:.2f}' if review_budget else 'unlimited'}, "
         f"diff_chars={len(diff)})",
     )
-    progress.write_agent_milestone(
+    _milestone(
+        progress,
         "self_review_started",
         f"turns={review_turns} diff_chars={len(diff)}",
     )
@@ -182,7 +194,8 @@ def run_self_review(
     except Exception as e:
         # Fail-open: self-review errors never block the pipeline
         log("WARN", f"self_review: agent execution failed: {type(e).__name__}: {e}")
-        progress.write_agent_milestone(
+        _milestone(
+            progress,
             "self_review_complete",
             f"status=error error={type(e).__name__}: {e}",
         )
@@ -193,7 +206,8 @@ def run_self_review(
         f"self_review: complete (status={review_result.status}, "
         f"turns={review_result.turns}, cost=${review_result.cost_usd or 0:.4f})",
     )
-    progress.write_agent_milestone(
+    _milestone(
+        progress,
         "self_review_complete",
         f"status={review_result.status} turns={review_result.turns}",
     )
diff --git a/agent/src/server.py b/agent/src/server.py
index ea615ce9..e1114374 100644
--- a/agent/src/server.py
+++ b/agent/src/server.py
@@ -405,8 +405,6 @@ def _run_task_background(
     user_id: str = "",
     workload_access_token: str = "",
     attachments: list[dict] | None = None,
-    self_review_enabled: bool = False,
-    self_review_max_turns: int = 5,
 ) -> None:
     """Run the agent task in a background thread."""
     global _background_pipeline_failed
@@ -490,8 +488,6 @@ def _run_task_background(
             trace=trace,
             user_id=user_id,
             attachments=attachments,
-            self_review_enabled=self_review_enabled,
-            self_review_max_turns=self_review_max_turns,
         )
         _background_pipeline_failed = False
     except Exception as e:
@@ -580,9 +576,6 @@ def _extract_invocation_params(inp: dict, request: Request) -> dict:
     channel_source = inp.get("channel_source", "") or ""
     channel_metadata = inp.get("channel_metadata") or {}
     attachments = inp.get("attachments") or []
-    # Self-review (opt-in): LLM critiques its own diff before PR creation.
-    self_review_enabled = inp.get("self_review_enabled") is True
-    self_review_max_turns = int(inp.get("self_review_max_turns", 5))
     # ``trace`` is strictly opt-in (design §10.1). Accept only real
     # booleans from the orchestrator — a string "false" would otherwise
     # flip the flag on.
@@ -656,8 +649,6 @@ def _extract_invocation_params(inp: dict, request: Request) -> dict:
         "user_id": user_id,
         "workload_access_token": workload_access_token,
         "attachments": attachments,
-        "self_review_enabled": self_review_enabled,
-        "self_review_max_turns": self_review_max_turns,
     }
 
 
diff --git a/agent/src/workflow/models.py b/agent/src/workflow/models.py
index 1411a16e..887d425a 100644
--- a/agent/src/workflow/models.py
+++ b/agent/src/workflow/models.py
@@ -31,6 +31,7 @@
     "run_agent",
     "verify_build",
     "verify_lint",
+    "self_review",
     "ensure_pr",
     "post_review",
     "deliver_artifact",
@@ -140,6 +141,9 @@ class Step(BaseModel):
     strategy: EnsurePrStrategy | None = None
     gate: VerifyGate | None = None
     target: DeliverTarget | None = None
+    # self_review: cap on turns the review loop may use (default applied by the
+    # handler when omitted).
+    max_turns: int | None = None
 
 
 class TerminalOutcomes(BaseModel):
diff --git a/agent/src/workflow/runner.py b/agent/src/workflow/runner.py
index 4f818689..b58008c1 100644
--- a/agent/src/workflow/runner.py
+++ b/agent/src/workflow/runner.py
@@ -375,6 +375,10 @@ def _milestone(ctx: StepContext, milestone: str) -> None:
 # pipeline.py) to avoid pulling the SDK / boto3 at module import and to keep the
 # orchestration core importable in isolation for tests.
 
+# Default turn cap for a self_review step that omits max_turns (kept in sync
+# with the schema's self_review max_turns default and self_review.py).
+_DEFAULT_SELF_REVIEW_MAX_TURNS = 5
+
 
 def _handle_clone_repo(step: Step, ctx: StepContext) -> StepOutcome:
     """Clone + prepare the repo (replaces the inline ``setup_repo`` call).
@@ -560,6 +564,72 @@ def _handle_verify_lint(step: Step, ctx: StepContext) -> StepOutcome:
     )
 
 
+def _handle_self_review(step: Step, ctx: StepContext) -> StepOutcome:
+    """Have the agent critique its own diff and fix issues before the PR opens.
+
+    Declaring a ``self_review`` step in the workflow *is* the enablement — there
+    is no separate feature flag. The step runs a second, review-focused
+    ``run_agent`` loop over the cumulative branch diff (``self_review.run_self_review``),
+    capped at ``step.max_turns`` turns (default 5) drawn from the task's
+    remaining turn/budget allowance, and accumulates the review's turns/cost back
+    onto ``ctx.agent_result`` so the terminal result reflects the full task.
+
+    Authored as an advisory step (``on_failure: continue``): a review that is
+    skipped (read-only, no diff, no remaining turns) or fails is recorded but
+    never blocks PR creation — matching the original fail-open design. The
+    summary the review agent writes is posted as a PR comment by the pipeline
+    after ``ensure_pr`` (``self_review_ran`` in this outcome's data signals it).
+    """
+    if ctx.setup is None:
+        return StepOutcome(
+            kind=step.kind,
+            name=_step_key(step),
+            status="failed",
+            error="self_review requires a cloned repo (no clone_repo step ran)",
+        )
+    if ctx.agent_result is None:
+        return StepOutcome(
+            kind=step.kind,
+            name=_step_key(step),
+            status="failed",
+            error="self_review requires a prior run_agent result to size its turn budget",
+        )
+
+    from self_review import run_self_review
+
+    review_result = run_self_review(
+        ctx.config,
+        ctx.setup,
+        ctx.agent_result,
+        ctx.trajectory,
+        ctx.progress,
+        max_turns=step.max_turns or _DEFAULT_SELF_REVIEW_MAX_TURNS,
+    )
+    if review_result is None:
+        # Skipped (read-only / empty diff / no remaining turns) — not a failure.
+        return StepOutcome(
+            kind=step.kind,
+            name=_step_key(step),
+            status="succeeded",
+            data={"self_review_ran": False},
+        )
+
+    # Accumulate the review phase's turns/cost onto the running agent result so
+    # the terminal TaskResult reflects the whole task (implement + review).
+    ar = ctx.agent_result
+    ar.turns += review_result.turns
+    ar.num_turns += review_result.num_turns or review_result.turns
+    if review_result.cost_usd is not None:
+        ar.cost_usd = (ar.cost_usd or 0.0) + review_result.cost_usd
+
+    return StepOutcome(
+        kind=step.kind,
+        name=_step_key(step),
+        status="succeeded",
+        data={"self_review_ran": True, "review_status": review_result.status},
+    )
+
+
 def _handle_ensure_pr(step: Step, ctx: StepContext) -> StepOutcome:
     """Create / push+resolve / resolve a PR per the step's ``strategy``.
 
@@ -645,6 +715,7 @@ def _handle_deliver_artifact(step: Step, ctx: StepContext) -> StepOutcome:
     "run_agent": _handle_run_agent,
     "verify_build": _handle_verify_build,
     "verify_lint": _handle_verify_lint,
+    "self_review": _handle_self_review,
     "ensure_pr": _handle_ensure_pr,
     "post_review": _handle_post_review,
     "deliver_artifact": _handle_deliver_artifact,
diff --git a/agent/src/workflow/validator.py b/agent/src/workflow/validator.py
index 25f2a6ee..21f27c45 100644
--- a/agent/src/workflow/validator.py
+++ b/agent/src/workflow/validator.py
@@ -40,8 +40,12 @@
 _SIDE_EFFECTING_KINDS = frozenset({"ensure_pr", "post_review", "deliver_artifact"})
 
 # Steps that only make sense when a repo is cloned (rule 3 / rule 7).
+# self_review diffs and re-runs the agent against the cloned working tree, so it
+# is repo-only too. It is deliberately NOT in _SIDE_EFFECTING_KINDS: it commits
+# locally but never pushes or opens a PR, so it may be marked on_failure:
+# continue (advisory / fail-open).
 _REPO_ONLY_KINDS = frozenset(
-    {"clone_repo", "ensure_pr", "post_review", "verify_build", "verify_lint"}
+    {"clone_repo", "ensure_pr", "post_review", "verify_build", "verify_lint", "self_review"}
 )
 
 # Built-in (Phase 1-3) policy modules / MCP servers. Registry refs (registry://)
diff --git a/agent/tests/test_self_review.py b/agent/tests/test_self_review.py
index e2a817f1..11f62deb 100644
--- a/agent/tests/test_self_review.py
+++ b/agent/tests/test_self_review.py
@@ -1,6 +1,5 @@
 """Unit tests for self_review.py — self-review orchestration module."""
 
-import os
 from unittest.mock import MagicMock, patch
 
 from models import AgentResult, RepoSetup, TaskConfig
@@ -16,63 +15,42 @@
 
 def _make_config(**overrides) -> TaskConfig:
     """Create a minimal TaskConfig for testing."""
-    defaults = {
-        "repo_url": "owner/repo",
-        "github_token": "ghp_test123",
-        "aws_region": "us-east-1",
-        "task_description": "Fix the bug",
-        "max_turns": 10,
-        "self_review_enabled": True,
-        "self_review_max_turns": 5,
-        "task_type": "new_task",
-    }
-    defaults.update(overrides)
-    return TaskConfig(**defaults)
+    base = TaskConfig(
+        repo_url="owner/repo",
+        github_token="ghp_test123",
+        aws_region="us-east-1",
+        task_description="Fix the bug",
+        max_turns=10,
+    )
+    return base.model_copy(update=overrides) if overrides else base
 
 
 def _make_setup(**overrides) -> RepoSetup:
     """Create a minimal RepoSetup for testing."""
-    defaults = {
-        "repo_dir": "/workspace/repo",
-        "branch": "feat/123-fix",
-        "default_branch": "main",
-    }
-    defaults.update(overrides)
-    return RepoSetup(**defaults)
+    base = RepoSetup(
+        repo_dir="/workspace/repo",
+        branch="feat/123-fix",
+        default_branch="main",
+    )
+    return base.model_copy(update=overrides) if overrides else base
 
 
 def _make_agent_result(**overrides) -> AgentResult:
     """Create a minimal AgentResult for testing."""
-    defaults = {
-        "status": "success",
-        "turns": 5,
-        "num_turns": 5,
-        "cost_usd": 0.50,
-    }
-    defaults.update(overrides)
-    return AgentResult(**defaults)
+    base = AgentResult(
+        status="success",
+        turns=5,
+        num_turns=5,
+        cost_usd=0.50,
+    )
+    return base.model_copy(update=overrides) if overrides else base
 
 
 class TestSkipConditions:
     """Test all conditions that cause self-review to be skipped."""
 
-    def test_skip_when_disabled(self):
-        config = _make_config(self_review_enabled=False)
-        setup = _make_setup()
-        agent_result = _make_agent_result()
-        trajectory = MagicMock()
-        progress = MagicMock()
-
-        result = run_self_review(config, setup, agent_result, trajectory, progress)
-        assert result is None
-
-    def test_skip_for_pr_review_task_type(self):
-        config = _make_config(
-            task_type="pr_review",
-            pr_number="42",
-            task_description="",
-            issue_number="",
-        )
+    def test_skip_for_read_only_workflow(self):
+        config = _make_config(read_only=True)
         setup = _make_setup()
         agent_result = _make_agent_result()
         trajectory = MagicMock()
@@ -82,13 +60,13 @@ def test_skip_for_pr_review_task_type(self):
         assert result is None
 
     def test_skip_when_no_remaining_turns(self):
-        config = _make_config(max_turns=5, self_review_max_turns=5)
+        config = _make_config(max_turns=5)
         setup = _make_setup()
         agent_result = _make_agent_result(turns=5)
         trajectory = MagicMock()
         progress = MagicMock()
 
-        result = run_self_review(config, setup, agent_result, trajectory, progress)
+        result = run_self_review(config, setup, agent_result, trajectory, progress, max_turns=5)
         assert result is None
 
     def test_skip_when_no_remaining_budget(self):
@@ -208,17 +186,17 @@ class TestBudgetAndTurnComputation:
 
     @patch("self_review._get_diff", return_value="diff --git a/f\n+line\n")
     @patch("self_review.asyncio.run")
-    def test_review_turns_capped_at_self_review_max_turns(self, mock_asyncio_run, mock_diff):
+    def test_review_turns_capped_at_step_max_turns(self, mock_asyncio_run, mock_diff):
         mock_asyncio_run.return_value = AgentResult(status="success", turns=2, num_turns=2)
-        config = _make_config(max_turns=100, self_review_max_turns=3)
+        config = _make_config(max_turns=100)
         setup = _make_setup()
         agent_result = _make_agent_result(turns=5)
         trajectory = MagicMock()
         progress = MagicMock()
 
-        run_self_review(config, setup, agent_result, trajectory, progress)
+        # The step's max_turns (3) caps the review even though 95 turns remain.
+        run_self_review(config, setup, agent_result, trajectory, progress, max_turns=3)
 
-        # The review config passed to run_agent should have max_turns=3
         call_args = mock_asyncio_run.call_args
         coro = call_args[0][0]
         # Close the coroutine to avoid warnings
@@ -228,15 +206,15 @@ def test_review_turns_capped_at_self_review_max_turns(self, mock_asyncio_run, mo
     @patch("self_review.asyncio.run")
     def test_review_turns_uses_remaining_when_less_than_cap(self, mock_asyncio_run, mock_diff):
         mock_asyncio_run.return_value = AgentResult(status="success", turns=1, num_turns=1)
-        config = _make_config(max_turns=8, self_review_max_turns=5)
+        config = _make_config(max_turns=8)
         setup = _make_setup()
         agent_result = _make_agent_result(turns=6)  # Only 2 remaining
         trajectory = MagicMock()
         progress = MagicMock()
 
-        run_self_review(config, setup, agent_result, trajectory, progress)
+        # Should use min(2 remaining, 5 cap) = 2 turns
+        run_self_review(config, setup, agent_result, trajectory, progress, max_turns=5)
 
-        # Should use min(2, 5) = 2 turns
         call_args = mock_asyncio_run.call_args
         coro = call_args[0][0]
         coro.close()
@@ -248,9 +226,7 @@ class TestHappyPath:
     @patch("self_review._get_diff", return_value="diff --git a/file.py\n+new code\n")
     @patch("self_review.asyncio.run")
     def test_returns_review_result(self, mock_asyncio_run, mock_diff):
-        review_agent_result = AgentResult(
-            status="success", turns=2, num_turns=2, cost_usd=0.10
-        )
+        review_agent_result = AgentResult(status="success", turns=2, num_turns=2, cost_usd=0.10)
         mock_asyncio_run.return_value = review_agent_result
 
         config = _make_config()
@@ -482,3 +458,121 @@ def test_comment_body_includes_header(self, tmp_path):
         body = call_args[body_idx]
         assert "Self-Review Summary" in body
         assert "**Findings:** 0" in body
+
+
+class TestSelfReviewStepHandler:
+    """Tests for the workflow ``self_review`` step handler (workflow/runner.py).
+
+    The step's presence in a workflow is the enablement signal; the handler
+    wraps ``run_self_review`` and accumulates the review's turns/cost onto the
+    shared ``ctx.agent_result``.
+    """
+
+    def _ctx(self, *, setup, agent_result):
+        from workflow import Step, StepContext, Workflow
+
+        wf = Workflow.model_validate(
+            {
+                "id": "coding/new-task-v1",
+                "version": "1.0.0",
+                "domain": "coding",
+                "prompt": {"template": "registry://prompt/x"},
+                "hydration": {"sources": ["task_description"]},
+                "agent_config": {"tier": "standard", "allowed_tools": ["Bash"]},
+                "steps": [
+                    {"kind": "run_agent"},
+                    {"kind": "self_review", "name": "review", "max_turns": 4},
+                ],
+                "terminal_outcomes": {"primary": "pr_url"},
+                "status": "production",
+            }
+        )
+        step = next(s for s in wf.steps if s.kind == "self_review")
+        ctx = StepContext(
+            workflow=wf,
+            config=_make_config(),
+            setup=setup,
+            agent_result=agent_result,
+            trajectory=MagicMock(),
+            progress=MagicMock(),
+        )
+        return step, ctx, Step
+
+    def test_passes_step_max_turns_and_accumulates(self):
+        from workflow.runner import _handle_self_review
+
+        setup = _make_setup()
+        agent_result = _make_agent_result(turns=5, num_turns=5, cost_usd=0.50)
+        step, ctx, _ = self._ctx(setup=setup, agent_result=agent_result)
+
+        review = AgentResult(status="success", turns=2, num_turns=2, cost_usd=0.10)
+        with patch("self_review.run_self_review", return_value=review) as mock_review:
+            outcome = _handle_self_review(step, ctx)
+
+        # The step's max_turns (4) is forwarded to run_self_review.
+        assert mock_review.call_args.kwargs["max_turns"] == 4
+        assert outcome.status == "succeeded"
+        assert outcome.data["self_review_ran"] is True
+        # Review turns/cost accumulated onto the shared agent_result.
+        assert ctx.agent_result.turns == 7
+        assert ctx.agent_result.num_turns == 7
+        assert abs(ctx.agent_result.cost_usd - 0.60) < 1e-9
+
+    def test_skipped_review_records_not_ran(self):
+        from workflow.runner import _handle_self_review
+
+        setup = _make_setup()
+        agent_result = _make_agent_result(turns=5)
+        step, ctx, _ = self._ctx(setup=setup, agent_result=agent_result)
+
+        with patch("self_review.run_self_review", return_value=None):
+            outcome = _handle_self_review(step, ctx)
+
+        assert outcome.status == "succeeded"
+        assert outcome.data["self_review_ran"] is False
+        # No accumulation when the review was skipped.
+        assert ctx.agent_result.turns == 5
+
+    def test_fails_without_clone(self):
+        from workflow.runner import _handle_self_review
+
+        agent_result = _make_agent_result()
+        step, ctx, _ = self._ctx(setup=None, agent_result=agent_result)
+
+        outcome = _handle_self_review(step, ctx)
+        assert outcome.status == "failed"
+        assert "cloned repo" in (outcome.error or "")
+
+    def test_default_max_turns_when_step_omits_it(self):
+        from workflow import Step, StepContext, Workflow
+        from workflow.runner import _DEFAULT_SELF_REVIEW_MAX_TURNS, _handle_self_review
+
+        wf = Workflow.model_validate(
+            {
+                "id": "coding/new-task-v1",
+                "version": "1.0.0",
+                "domain": "coding",
+                "prompt": {"template": "registry://prompt/x"},
+                "hydration": {"sources": ["task_description"]},
+                "agent_config": {"tier": "standard", "allowed_tools": ["Bash"]},
+                "steps": [{"kind": "run_agent"}, {"kind": "self_review"}],
+                "terminal_outcomes": {"primary": "pr_url"},
+                "status": "production",
+            }
+        )
+        step = next(s for s in wf.steps if s.kind == "self_review")
+        assert step.max_turns is None
+        ctx = StepContext(
+            workflow=wf,
+            config=_make_config(),
+            setup=_make_setup(),
+            agent_result=_make_agent_result(turns=2),
+            trajectory=MagicMock(),
+            progress=MagicMock(),
+        )
+        review = AgentResult(status="success", turns=1, num_turns=1)
+        with patch("self_review.run_self_review", return_value=review) as mock_review:
+            _handle_self_review(step, ctx)
+        assert mock_review.call_args.kwargs["max_turns"] == _DEFAULT_SELF_REVIEW_MAX_TURNS
+        # Step model unused beyond fixture; keep import referenced.
+        assert Step is not None
diff --git a/agent/workflows/coding/new-task-v1.yaml b/agent/workflows/coding/new-task-v1.yaml
index 7219fdf3..2454b007 100644
--- a/agent/workflows/coding/new-task-v1.yaml
+++ b/agent/workflows/coding/new-task-v1.yaml
@@ -42,6 +42,12 @@ steps:
   - { kind: clone_repo,      name: setup }
   - { kind: hydrate_context, name: context }
   - { kind: run_agent,       name: implement }
+  # Optional pre-PR self-review: the agent critiques its own cumulative diff and
+  # fixes issues before the PR is opened, then posts a summary as a PR comment.
+  # Advisory (on_failure: continue) and fail-open — a review failure/skip never
+  # blocks PR creation. Remove this step to disable self-review for the workflow;
+  # tune the turn budget via max_turns (1-20, default 5).
+  - { kind: self_review,     name: review, on_failure: continue, max_turns: 5 }
   - { kind: verify_build,    name: build, gate: regression_only }
   - { kind: ensure_pr,       name: open_pr, strategy: create }
 terminal_outcomes:
diff --git a/agent/workflows/schema/workflow.schema.json b/agent/workflows/schema/workflow.schema.json
index b4b753bb..f68d867b 100644
--- a/agent/workflows/schema/workflow.schema.json
+++ b/agent/workflows/schema/workflow.schema.json
@@ -187,7 +187,7 @@
           "kind": {
             "type": "string",
             "description": "Dispatches to a registered step handler.",
-            "enum": ["clone_repo", "hydrate_context", "run_agent", "verify_build", "verify_lint", "ensure_pr", "post_review", "deliver_artifact"]
+            "enum": ["clone_repo", "hydrate_context", "run_agent", "verify_build", "verify_lint", "self_review", "ensure_pr", "post_review", "deliver_artifact"]
           },
           "name": {
             "type": "string",
@@ -213,6 +213,12 @@
             "type": "string",
             "description": "For deliver_artifact: the name of a registered Python deliverer (workflow/deliverers.py / DELIVERERS) to route the produced output through. Open string, not a closed enum, so a new delivery method is a registered deliverer, not a schema change (ADR-014 addendum 2026-06-08). First-party names: s3, comment, s3_and_comment. Each deliverer declares the terminal outcomes it produces; validator rule 11 consults that registry.",
             "minLength": 1
+          },
+          "max_turns": {
+            "type": "integer",
+            "description": "For self_review: cap on agent turns the self-review loop may use (drawn from the task's remaining turn/budget allowance). Defaults to 5 when omitted.",
+            "minimum": 1,
+            "maximum": 20
           }
         }
       }
@@ -295,7 +301,7 @@
           "steps": {
             "items": {
               "properties": {
-                "kind": { "not": { "enum": ["clone_repo", "ensure_pr", "post_review", "verify_build", "verify_lint"] } }
+                "kind": { "not": { "enum": ["clone_repo", "ensure_pr", "post_review", "verify_build", "verify_lint", "self_review"] } }
               }
             }
           },
diff --git a/contracts/workflow-validation/rule3-repo-less-has-self-review.json b/contracts/workflow-validation/rule3-repo-less-has-self-review.json
new file mode 100644
index 00000000..42378e53
--- /dev/null
+++ b/contracts/workflow-validation/rule3-repo-less-has-self-review.json
@@ -0,0 +1,69 @@
+{
+  "name": "rule3-repo-less-has-self-review",
+  "description": "Repo-less knowledge workflow illegally declares a self_review step. self_review is repo-only (it diffs and re-runs the agent against a cloned tree), so both the schema's requires_repo:false conditional and cross-field rule 3 reject it.",
+  "workflow": {
+    "id": "knowledge/web-research-v1",
+    "version": "1.0.0",
+    "domain": "knowledge",
+    "requires_repo": false,
+    "read_only": false,
+    "prompt": {
+      "template": "registry://prompt/web-research-workflow"
+    },
+    "hydration": {
+      "sources": [
+        "task_description",
+        "urls"
+      ]
+    },
+    "agent_config": {
+      "tier": "elevated",
+      "allowed_tools": [
+        "Read",
+        "WebFetch"
+      ],
+      "cedar_policy_modules": [
+        "builtin/hard_deny",
+        "builtin/soft_deny"
+      ]
+    },
+    "repo_config": {
+      "discover": false
+    },
+    "required_inputs": {
+      "all_of": [
+        "task_description"
+      ]
+    },
+    "steps": [
+      {
+        "kind": "hydrate_context",
+        "name": "context"
+      },
+      {
+        "kind": "run_agent",
+        "name": "research"
+      },
+      {
+        "kind": "self_review",
+        "name": "review"
+      },
+      {
+        "kind": "deliver_artifact",
+        "name": "deliver",
+        "target": "s3_and_comment"
+      }
+    ],
+    "terminal_outcomes": {
+      "primary": "artifact"
+    },
+    "status": "production"
+  },
+  "expected": {
+    "valid": false,
+    "violations": [
+      "rule-3",
+      "schema"
+    ]
+  }
+}
diff --git a/contracts/workflow-validation/valid-coding-self-review-step.json b/contracts/workflow-validation/valid-coding-self-review-step.json
new file mode 100644
index 00000000..3dccb547
--- /dev/null
+++ b/contracts/workflow-validation/valid-coding-self-review-step.json
@@ -0,0 +1,94 @@
+{
+  "name": "valid-coding-self-review-step",
+  "description": "Coding workflow with an optional pre-PR self_review step (advisory, on_failure: continue). self_review is repo-only and non-side-effecting, so on_failure: continue is permitted (unlike ensure_pr).",
+  "workflow": {
+    "id": "coding/new-task-v1",
+    "version": "1.0.0",
+    "domain": "coding",
+    "description": "Implement a GitHub issue, self-review, and open a PR.",
+    "requires_repo": true,
+    "read_only": false,
+    "prompt": {
+      "template": "registry://prompt/coding-new-task-workflow"
+    },
+    "hydration": {
+      "sources": [
+        "issue",
+        "memory",
+        "task_description"
+      ]
+    },
+    "agent_config": {
+      "tier": "standard",
+      "allowed_tools": [
+        "Bash",
+        "Read",
+        "Write",
+        "Edit",
+        "Glob",
+        "Grep",
+        "WebFetch"
+      ],
+      "cedar_policy_modules": [
+        "builtin/hard_deny",
+        "builtin/soft_deny"
+      ]
+    },
+    "repo_config": {
+      "provider": "github",
+      "discover": true
+    },
+    "required_inputs": {
+      "one_of": [
+        "issue_number",
+        "task_description"
+      ]
+    },
+    "steps": [
+      {
+        "kind": "clone_repo",
+        "name": "setup"
+      },
+      {
+        "kind": "hydrate_context",
+        "name": "context"
+      },
+      {
+        "kind": "run_agent",
+        "name": "implement"
+      },
+      {
+        "kind": "self_review",
+        "name": "review",
+        "on_failure": "continue",
+        "max_turns": 5
+      },
+      {
+        "kind": "verify_build",
+        "name": "build",
+        "gate": "regression_only"
+      },
+      {
+        "kind": "ensure_pr",
+        "name": "open_pr",
+        "strategy": "create"
+      }
+    ],
+    "terminal_outcomes": {
+      "primary": "pr_url"
+    },
+    "limits": {
+      "max_turns": 100
+    },
+    "promotion_gate": {
+      "requires": [
+        "tests:agent/new_task"
+      ]
+    },
+    "status": "production"
+  },
+  "expected": {
+    "valid": true,
+    "violations": []
+  }
+}