aws-samples · nizar-lahlali · Jun 4, 2026 · Jun 4, 2026 · Jun 22, 2026 · Jun 23, 2026
@@ -31,6 +31,7 @@
     _extract_agent_notes,
     ensure_committed,
     ensure_pr,
+    post_self_review_comment,
     verify_build,
     verify_lint,
 )
@@ -217,6 +218,50 @@ def _execute_agent_step(
     return ctx.agent_result
 
 
+def _execute_self_review_step(
+    workflow: Workflow | None,
+    config,
+    setup,
+    agent_result,
+    hydrated,
+    trajectory,
+    progress,
+) -> bool:
+    """Drive the workflow's ``self_review`` step (if declared) through the runner.
+
+    Mirrors ``_execute_agent_step``: only the ``self_review`` step is dispatched
+    (``only_kinds={"self_review"}``) so clone / build / PR stay on the inline
+    path. The step's handler accumulates the review loop's turns/cost back onto
+    ``agent_result`` (a shared mutable model), so the terminal result reflects
+    implement + review.
+
+    Returns True when the review actually ran (so the caller posts the summary
+    PR comment after ``ensure_pr``); False when no ``self_review`` step is
+    declared, the workflow failed to reload, or the review was skipped (read-only
+    / empty diff / no remaining turns). Fully fail-open — a review failure is
+    recorded as a step outcome and never propagates to block PR creation.
+    """
+    if workflow is None or not any(s.kind == "self_review" for s in workflow.steps):
+        return False
+
+    from workflow import StepContext, run_workflow
+
+    ctx = StepContext(
+        workflow=workflow,
+        config=config,
+        hydrated=hydrated,
+        progress=progress,
+        trajectory=trajectory,
+        setup=setup,
+        # The implement step's result, threaded in so the handler can size the
+        # review's turn budget and accumulate its turns/cost onto it.
+        agent_result=agent_result,
+    )
+    with task_span("task.self_review"):
+        run_workflow(workflow, ctx, only_kinds={"self_review"})
+    return bool(ctx.artifacts.get("self_review_ran", False))
+
+
 def _run_repoless_task(
     *,
     config,
@@ -1006,6 +1051,23 @@ def _on_trace_truncated(max_bytes: int, first_dropped: int) -> None:
                 )
                 ensure_pr_strategy = "create"
 
+            # Self-review step: if the resolved workflow declares a ``self_review``
+            # step, drive it through the workflow runner (same pattern as
+            # ``_execute_agent_step``). The step has the LLM critique its own diff
+            # and fix issues, accumulating its turns/cost onto ``agent_result``.
+            # Runs AFTER the cancel short-circuit so a cancelled task never starts
+            # a second agent loop, and BEFORE post-hooks so fixes land in the PR.
+            # Fail-open: a review failure/skip never blocks PR creation.
+            self_review_ran = _execute_self_review_step(
+                _workflow,
+                config,
+                setup,
+                agent_result,
+                hc,
+                trajectory,
+                progress,
+            )
+
             # Post-hooks (agent_result is guaranteed set by the try/except above)
             with task_span("task.post_hooks") as post_span:
                 # Safety net: commit any uncommitted tracked changes (skip for read-only tasks)
@@ -1028,6 +1090,10 @@ def _on_trace_truncated(max_bytes: int, first_dropped: int) -> None:
             if pr_url:
                 progress.write_agent_milestone("pr_created", pr_url)
 
+            # Post self-review summary as PR comment (if the self_review step ran)
+            if pr_url and self_review_ran:
+                post_self_review_comment(setup.repo_dir, pr_url, config)
+
             # Memory write — capture task episode and repo learnings
             memory_written = False
             effective_memory_id = memory_id or os.environ.get("MEMORY_ID", "")

@@ -392,6 +392,67 @@ def ensure_pr(
         return None
 
 
+def post_self_review_comment(repo_dir: str, pr_url: str, config: TaskConfig) -> bool:
+    """Post the self-review summary as a PR comment.
+
+    Reads the summary file written by the self-review agent, formats it as a
+    comment, and posts it via `gh pr comment`. Fail-open: exceptions are logged
+    but never propagated.
+
+    Returns True if a comment was posted, False otherwise.
+    """
+    from self_review import read_self_review_summary
+
+    try:
+        summary = read_self_review_summary(repo_dir)
+    except Exception as e:
+        log("WARN", f"post_self_review_comment: failed to read summary: {type(e).__name__}: {e}")
+        return False
+
+    if not summary:
+        log("POST", "post_self_review_comment: no summary file found — skipping")
+        return False
+
+    # Extract PR number from URL (e.g. https://github.com/owner/repo/pull/123)
+    match = re.search(r"/pull/(\d+)", pr_url)
+    if not match:
+        log("WARN", f"post_self_review_comment: could not extract PR number from {pr_url}")
+        return False
+    pr_number = match.group(1)
+
+    comment_body = f"## \U0001f50d Self-Review Summary\n\n{summary}"
+
+    try:
+        result = subprocess.run(
+            [
+                "gh",
+                "pr",
+                "comment",
+                pr_number,
+                "--repo",
+                config.repo_url,
+                "--body",
+                comment_body,
+            ],
+            cwd=repo_dir,
+            capture_output=True,
+            text=True,
+            timeout=60,
+        )
+        if result.returncode == 0:
+            log("POST", f"Self-review summary posted as comment on PR #{pr_number}")
+            return True
+        stderr = result.stderr.strip()[:200] if result.stderr else ""
+        log(
+            "WARN",
+            f"post_self_review_comment: gh pr comment failed (rc={result.returncode}): {stderr}",
+        )
+        return False
+    except (subprocess.TimeoutExpired, OSError) as e:
+        log("WARN", f"post_self_review_comment: {type(e).__name__}: {e}")
+        return False
+
+
 def _extract_agent_notes(repo_dir: str, branch: str, config: TaskConfig) -> str | None:
     """Extract the "## Agent notes" section from the PR body.
 

@@ -13,6 +13,7 @@
 from .new_task import NEW_TASK_WORKFLOW
 from .pr_iteration import PR_ITERATION_WORKFLOW
 from .pr_review import PR_REVIEW_WORKFLOW
+from .self_review import SELF_REVIEW_PROMPT as SELF_REVIEW_PROMPT
 from .web_research import WEB_RESEARCH_PROMPT
 
 DEFAULT_WORKFLOW_ID = "coding/new-task-v1"

@@ -0,0 +1,61 @@
+"""Self-review prompt template for pre-PR diff critique."""
+
+SELF_REVIEW_PROMPT = """\
+You are reviewing your own work before it becomes a pull request. Below is the \
+cumulative diff of all changes on this branch compared to the base branch.
+
+<diff>
+{diff}
+</diff>
+
+## Task context
+
+{task_description}
+
+## Review checklist
+
+Examine the diff carefully for:
+
+1. **Correctness** — Logic errors, off-by-one mistakes, missing edge cases, \
+incorrect assumptions about data shapes or API contracts.
+2. **Bugs** — Null/undefined dereferences, unhandled error paths, resource leaks, \
+race conditions.
+3. **Security** — Injection vulnerabilities (SQL, command, XSS), hardcoded secrets, \
+insecure defaults, OWASP Top 10 issues.
+4. **Style & consistency** — Naming conventions, code style violations relative to \
+the surrounding codebase, unnecessary complexity.
+5. **Test gaps** — Important behaviour that is untested, assertions that don't \
+verify the right thing, missing edge-case coverage.
+
+## Instructions
+
+- If you find issues, fix them directly: edit the files, run the build/tests to \
+verify your fixes, and commit the changes.
+- If no issues are found, stop immediately — do not make changes for the sake of \
+making changes.
+- Do NOT refactor code that was not part of the original diff unless it has a \
+concrete bug or security issue.
+- Keep fixes minimal and focused. Each fix should be a separate commit with a \
+clear message.
+
+## Summary output
+
+After completing your review (whether you made fixes or not), write a file \
+`.self-review-summary.md` in the repository root with your findings in this format:
+
+```markdown
+### Self-Review Summary
+
+**Findings:** <number of issues found>
+**Fixes applied:** <number of fixes committed>
+
+#### Issues found
+
+- <category>: <brief description of issue> — <fixed | not fixed (reason)>
+```
+
+If no issues were found, write the file with: "No issues found — code looks good."
+
+This file is a pipeline artifact and will be deleted automatically — it will NOT \
+appear in the pull request.
+"""