diff --git a/.dockerignore b/.dockerignore
index 9402c7e7..8e9cc5ab 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -9,6 +9,14 @@ cdk/cdk.out/
 cdk/lib/
 cdk/node_modules/
 
+# integ-runner output dirs. The agent artifact's build context is the repo
+# root, and integ-runner writes its synth/snapshot output UNDER that root
+# (cdk/test/integ/cdk-integ.out.<test>.ts[.snapshot]/). Without these excludes,
+# staging the root copies its own output dir into itself recursively until the
+# path overflows (ENAMETOOLONG). Mirrors .gitignore lines 70-71.
+cdk/test/integ/cdk-integ.out.*/
+cdk/test/integ/*.snapshot/
+
 # CLI and docs build artifacts
 cli/lib/
 cli/node_modules/
diff --git a/.github/workflows/integ-sweeper.yml b/.github/workflows/integ-sweeper.yml
new file mode 100644
index 00000000..30f80d5f
--- /dev/null
+++ b/.github/workflows/integ-sweeper.yml
@@ -0,0 +1,204 @@
+name: integ-sweeper
+# Reclaims stranded ephemeral integ stacks (issue #317 / PR #348 follow-up).
+#
+# The Phase-1 lifecycle integ test (integ.yml + cdk/test/integ/integ.task-lifecycle.ts)
+# deploys a per-run `int-<commit-sha>` stack running the AgentCore Runtime in VPC
+# mode. That runtime injects AWS-service-managed `agentic_ai` ENIs into the private
+# subnets, which AWS releases only ASYNCHRONOUSLY (observed: 1+ hours after the
+# runtime is deleted). So the in-run `cdk destroy` reliably fails the subnet/SG/VPC
+# deletes (DependencyViolation) and the integ run tolerates that failure
+# (destroy.expectError) rather than blocking on a wait it can't win. The per-run
+# UNIQUE stack name means a stranded stack never blocks a later run — but nothing
+# in the run reclaims it either.
+#
+# THIS workflow is that reclaimer: on a schedule (after the ENIs have had time to
+# detach), it deletes every `int-*` stack, and FAILS LOUDLY + opens a tracking
+# issue for any `int-*` stack older than the alarm threshold that still won't
+# delete — so a genuine leak (cost in the shared account) surfaces instead of
+# accumulating silently.
+on:
+  workflow_dispatch: {}
+  schedule:
+    # Every 2 hours. Frequent enough that a normal stranded stack (ENIs release in
+    # ~1-2h) is reclaimed within a cycle or two, well before the 6h alarm age.
+    - cron: "0 */2 * * *"
+
+concurrency:
+  group: integ-sweeper
+  cancel-in-progress: false
+
+permissions:
+  contents: none
+
+jobs:
+  sweep:
+    name: Reclaim stranded int-* stacks
+    runs-on: ubuntu-latest
+    # The integ deploy role (secrets.AWS_ROLE_TO_ASSUME) is scoped to the `integ`
+    # environment — same as integ.yml. The environment's protection rules must
+    # permit this scheduled run to assume the role (no manual approval is possible
+    # on a cron trigger).
+    environment: integ
+    timeout-minutes: 30
+    permissions:
+      id-token: write   # OIDC role assumption
+      contents: read
+      issues: write     # open a tracking issue on a genuine leak
+    env:
+      # Stacks older than this (hours) that STILL fail to delete are treated as a
+      # genuine leak → fail the job + file an issue. Comfortably past the observed
+      # ENI-release window so normal teardown lag never false-alarms.
+      ALARM_AGE_HOURS: "6"
+      AWS_REGION: ${{ vars.AWS_REGION || 'us-east-1' }}
+      AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'us-east-1' }}
+    steps:
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@e7f100cf4c008499ea8adda475de1042d6975c7b # v6.2.0
+        with:
+          role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
+          aws-region: ${{ vars.AWS_REGION || 'us-east-1' }}
+
+      - name: Sweep int-* stacks
+        id: sweep
+        run: |
+          set -uo pipefail
+
+          # Only the integ test's own per-run stacks are eligible. The test names
+          # them `int-<commit-hash>` where the hash is the 8-char short SHA
+          # (integ.task-lifecycle.ts: COMMIT_HASH.slice(0,8)). We therefore sweep
+          # ONLY names matching `int-<8 lowercase hex>` — NOT a bare `int-*` glob.
+          # `int-` is a short prefix; an unguarded glob in a shared account could
+          # delete an unrelated stack that merely starts with those 4 chars. The
+          # `int-local` fallback name (local dev runs) is intentionally NOT swept:
+          # CI never produces it, so a match would be someone's local stack.
+          STACK_RE='^int-[0-9a-f]{8}$'
+
+          # All non-deleted int-* stacks (active, DELETE_FAILED, or rollback states);
+          # the JMESPath prefilter narrows the API page, the regex below is the
+          # authoritative guard.
+          mapfile -t candidates < <(
+            aws cloudformation list-stacks \
+              --stack-status-filter CREATE_COMPLETE CREATE_FAILED ROLLBACK_COMPLETE ROLLBACK_FAILED \
+                UPDATE_COMPLETE UPDATE_ROLLBACK_COMPLETE UPDATE_ROLLBACK_FAILED DELETE_FAILED \
+              --query 'StackSummaries[?starts_with(StackName, `int-`)].StackName' \
+              --output text 2>/dev/null | tr '\t' '\n' | sort -u
+          )
+
+          stacks=()
+          for c in "${candidates[@]}"; do
+            [ -n "$c" ] || continue
+            if [[ "$c" =~ $STACK_RE ]]; then
+              stacks+=("$c")
+            else
+              echo "Skipping '$c' — does not match ${STACK_RE} (not a sweepable integ stack)."
+            fi
+          done
+
+          if [ "${#stacks[@]}" -eq 0 ]; then
+            echo "No int-* stacks present. Nothing to sweep."
+            exit 0
+          fi
+
+          echo "Found ${#stacks[@]} int-* stack(s): ${stacks[*]}"
+          now_epoch="$(date -u +%s)"
+          alarm_secs=$(( ALARM_AGE_HOURS * 3600 ))
+          leaked=""
+
+          for stack in "${stacks[@]}"; do
+            [ -n "$stack" ] || continue
+            echo "::group::$stack"
+
+            # Best-effort delete (idempotent; no-op if already deleting/gone).
+            aws cloudformation delete-stack --stack-name "$stack" || true
+            # Give CloudFormation a moment, then read the resulting status.
+            sleep 15
+            status="$(aws cloudformation describe-stacks --stack-name "$stack" \
+              --query 'Stacks[0].StackStatus' --output text 2>&1 || true)"
+
+            if echo "$status" | grep -qiE 'does not exist|ValidationError'; then
+              echo "✅ $stack deleted (or gone)."
+              echo "::endgroup::"
+              continue
+            fi
+
+            # Still present — how old is it? Alarm only if past the threshold.
+            created="$(aws cloudformation describe-stacks --stack-name "$stack" \
+              --query 'Stacks[0].CreationTime' --output text 2>/dev/null || true)"
+            created_epoch="$(date -u -d "$created" +%s 2>/dev/null || echo 0)"
+            age_secs=$(( now_epoch - created_epoch ))
+            age_hours=$(( age_secs / 3600 ))
+
+            if [ "$created_epoch" -gt 0 ] && [ "$age_secs" -ge "$alarm_secs" ]; then
+              echo "❌ $stack still present (status: $status), age ${age_hours}h ≥ ${ALARM_AGE_HOURS}h — LEAK."
+              leaked="${leaked}\n- \`${stack}\` — status \`${status}\`, age ~${age_hours}h"
+            else
+              echo "⏳ $stack still present (status: $status), age ~${age_hours}h — within ${ALARM_AGE_HOURS}h window; ENIs likely not yet released. Will retry next cycle."
+            fi
+            echo "::endgroup::"
+          done
+
+          if [ -n "$leaked" ]; then
+            {
+              echo "leaked<<EOF"
+              echo -e "$leaked"
+              echo "EOF"
+            } >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Open issue on genuine leak
+        if: steps.sweep.outputs.leaked != ''
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          # Pass via env (not inline ${{ }} interpolation) so the value never
+          # expands into the shell script body — avoids template injection
+          # (zizmor template-injection). Stack names are AWS-controlled, but env
+          # is the correct, lint-clean pattern regardless.
+          LEAKED: ${{ steps.sweep.outputs.leaked }}
+          # Stable label used both to tag the tracking issue and to find an
+          # existing open one — this is the dedup key, so it must not change.
+          LEAK_LABEL: integ-leak
+        run: |
+          set -euo pipefail
+          body_file="$(mktemp)"
+          {
+            echo "The integ-sweeper found stranded \`int-*\` CloudFormation stacks older than ${ALARM_AGE_HOURS}h that still fail to delete — likely a real leak in the shared integ account (each carries a VPC + NAT gateway + interface endpoints + the AgentCore runtime, billing hourly)."
+            echo ""
+            echo "These are normally reclaimed automatically once the AgentCore \`agentic_ai\` ENIs detach (~1-2h). Past ${ALARM_AGE_HOURS}h, investigate: the ENIs may be genuinely stuck (needs manual ENI/VPC cleanup) or the deploy role lacks teardown permissions."
+            echo ""
+            echo "### Stranded stacks (as of this run)"
+            echo -e "${LEAKED}"
+            echo ""
+            echo "| Field | Value |"
+            echo "| --- | --- |"
+            echo "| Workflow run | [integ-sweeper #${GITHUB_RUN_NUMBER}](${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}) |"
+            echo "| Region | \`${AWS_REGION}\` |"
+            echo ""
+            echo "Close this issue once the stacks are deleted and the sweeper run is green."
+          } > "${body_file}"
+
+          # Dedup: a stuck stack re-alarms every 2h cycle. Without this guard each
+          # cycle files a fresh duplicate. Find an existing OPEN issue carrying the
+          # stable leak label and comment on it instead of opening another; only
+          # open a new issue when none exists. `--search` scopes to open issues with
+          # the label; `--json number --jq '.[0].number'` yields the first match (or
+          # empty). Ensure the label exists first (idempotent; ignore "already exists").
+          gh label create "${LEAK_LABEL}" \
+            --description "Stranded integ stacks flagged by integ-sweeper" \
+            --color B60205 2>/dev/null || true
+
+          existing="$(gh issue list --state open --label "${LEAK_LABEL}" \
+            --json number --jq '.[0].number // empty' 2>/dev/null || true)"
+
+          if [ -n "${existing}" ]; then
+            echo "Existing open leak issue #${existing} — commenting instead of opening a duplicate."
+            gh issue comment "${existing}" --body-file "${body_file}"
+          else
+            gh issue create \
+              --title "Stranded integ stacks not reclaimed (>${ALARM_AGE_HOURS}h)" \
+              --label "${LEAK_LABEL}" \
+              --body-file "${body_file}"
+          fi
+
+      - name: Fail job on genuine leak
+        if: steps.sweep.outputs.leaked != ''
+        run: exit 1
diff --git a/.github/workflows/integ.yml b/.github/workflows/integ.yml
index 4ca180b7..5819c239 100644
--- a/.github/workflows/integ.yml
+++ b/.github/workflows/integ.yml
@@ -7,10 +7,20 @@ name: integ
 #
 # Trigger model mirrors deploy.yml: build.yml completes -> workflow_run picks it
 # up in the trusted base-repo context (secrets/OIDC available even for fork PRs)
-# -> we resolve whether the PR touches cdk/** or agent/** -> an admin approves
-# the `integ` environment gate -> deploy/assert/destroy runs against the shared
-# account -> a commit status `integ-smoke` is posted back to the PR head so it
-# shows up as a (required) check that blocks merge.
+# -> we resolve whether the PR touches cdk/** or agent/** -> deploy/assert/destroy
+# runs against the shared account -> a commit status `integ-smoke` is posted back
+# to the PR head so it shows up as a (required) check that blocks merge.
+#
+# Fork-code gate: the ENFORCED gate on fork-authored test code is the
+# `safe-to-test` label check in the `resolve` job below — a maintainer must apply
+# it before this workflow will run a fork PR. The `integ` GitHub environment is a
+# SECOND, OPTIONAL layer: it only adds a manual approval if required reviewers are
+# configured on it, and at time of writing NONE are. Two consequences worth
+# knowing: (a) do not rely on the environment as the fork-code gate — that is the
+# label's job; (b) the scheduled integ-sweeper (.github/workflows/integ-sweeper.yml)
+# also runs under `environment: integ`, and a cron trigger CANNOT satisfy a manual
+# approval, so adding required reviewers here would silently break the sweeper.
+# If reviewers are ever wanted for PR runs, give the sweeper its own environment.
 #
 # Local dev path is unchanged: run `mise //cdk:integ` with your own AWS creds.
 #
@@ -19,9 +29,10 @@ name: integ
 on:
   # zizmor: ignore[dangerous-triggers] — intentional; workflow_run is required so
   # fork PRs can run against the shared account (a fork `pull_request` job gets no
-  # secrets/OIDC). Mitigations: build-success guard, path-filter, `integ`
-  # environment approval gate (admin reviews fork test code before it runs with
-  # the privileged role), least-privilege role, status-only tokens per job.
+  # secrets/OIDC). Mitigations: build-success guard, path-filter, the
+  # `safe-to-test` label gate (the enforced fork-code review gate), the `integ`
+  # environment (an optional second approval layer IF reviewers are configured —
+  # currently none), least-privilege role, status-only tokens per job.
   workflow_run:
     workflows: [build]
     types: [completed]
@@ -41,8 +52,9 @@ jobs:
   # docs/cli-only PRs get an immediate green (skipped) status and never deadlock
   # the required check.
   resolve:
-    # Manual dispatch is restricted to main (defence in depth — the `integ`
-    # environment approval is the primary gate). PR runs come via workflow_run.
+    # Manual dispatch is restricted to main (defence in depth). For fork PRs the
+    # primary gate is the `safe-to-test` label check below. PR runs come via
+    # workflow_run.
     if: >-
       (github.event_name == 'workflow_dispatch' && github.ref == 'refs/heads/main') ||
       (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success')
@@ -129,10 +141,12 @@ jobs:
             exit 0
           fi
 
-          # Fork-PR safety: only run fork-authored code after a maintainer has
-          # applied the `safe-to-test` label (defence in depth on top of the
-          # `integ` environment approval). If it's absent, leave the status
-          # pending and don't run — re-trigger once the label is added.
+          # Fork-PR safety: this is the ENFORCED gate on fork-authored code — only
+          # run it after a maintainer has applied the `safe-to-test` label. (The
+          # `integ` environment can add a second approval layer, but only if
+          # required reviewers are configured on it — currently none, so this label
+          # is the effective gate.) If absent, leave the status pending and don't
+          # run — re-trigger once the label is added.
           if [[ "$WF_HEAD_REPO" != "$REPO" ]]; then
             if ! LABELS=$(gh api "repos/$REPO/issues/$PR_NUMBER/labels" --jq '.[].name'); then
               echo "::error::Failed to read labels for PR #$PR_NUMBER."
@@ -157,7 +171,7 @@ jobs:
             exit 1
           fi
           if echo "$CHANGED" | grep -Eq '^(cdk|agent)/'; then
-            post_status pending "awaiting admin approval / running"
+            post_status pending "awaiting integ run"
             echo "applicable=true" >> "$GITHUB_OUTPUT"
             echo "PR #$PR_NUMBER touches cdk/** or agent/** — integ applies."
           else
@@ -166,16 +180,23 @@ jobs:
             echo "PR #$PR_NUMBER has no cdk/** or agent/** changes — integ skipped (green)."
           fi
 
-  # The admin-gated deploy -> assert -> destroy. The `integ` environment's
-  # required reviewer is the approval gate; while it waits, the integ-smoke
-  # status stays pending and merge stays blocked.
+  # The deploy -> assert -> destroy job. It runs in the `integ` environment; if
+  # required reviewers are ever configured there, a pending approval holds this
+  # job (and the integ-smoke status stays pending / merge stays blocked) until
+  # approved. With no reviewers configured (current state), it proceeds directly
+  # once `resolve` marks it applicable and the `safe-to-test` gate has passed.
   integ:
     needs: resolve
     if: needs.resolve.outputs.applicable == 'true'
     name: CDK integ smoke (Task API)
     runs-on: ubuntu-latest
     environment: integ
-    timeout-minutes: 45
+    # The lifecycle test (integ.task-lifecycle.ts) deploys the full AgentStack
+    # (orchestrator + AgentCore runtime + Docker image build) and drives real
+    # agent runs through their terminal states before destroying — far heavier
+    # than the Phase-0 trimmed smoke test. 90 min covers deploy + cold Docker
+    # build + agent runs + teardown with margin.
+    timeout-minutes: 90
     permissions:
       id-token: write
       contents: read
@@ -186,9 +207,11 @@ jobs:
       - name: Checkout PR head (incl. forks)
         uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
         with:
-          # Approving the `integ` environment authorizes this fork-authored test
-          # code to run with the privileged role — the approver MUST review
-          # cdk/test/integ/** changes before approving.
+          # This checks out fork-authored test code that then runs with the
+          # privileged role. The `safe-to-test` label (gated in `resolve`) is the
+          # enforced review point for cdk/test/integ/** changes; if required
+          # reviewers are configured on the `integ` environment they add a second
+          # manual review before this runs.
           repository: ${{ needs.resolve.outputs.head_repo }}
           ref: ${{ needs.resolve.outputs.head_sha }}
           persist-credentials: false
@@ -215,29 +238,98 @@ jobs:
         run: yarn install --immutable
 
       - name: Run integ tests (deploy → assert → destroy)
+        # COMMIT_HASH drives the per-run unique stack name `int-<hash>` (see
+        # cdk/test/integ/integ.task-lifecycle.ts + cdk/mise.toml). Using the
+        # resolved head SHA means a stranded stack from a failed teardown never
+        # collides with / blocks a later run on a different commit.
+        #
+        # INTEG_SANDBOX_REPO / INTEG_PAT_SECRET_ID bind the gate scenarios (3 & 4)
+        # to the account's provisioned sandbox repo + PAT secret instead of a
+        # hardcoded contributor repo. Same vars the sandbox-cleanup step reads.
+        # When unset, the test falls back to its literals and the gates degrade to
+        # clone-failures (still synthesizes).
+        env:
+          COMMIT_HASH: ${{ needs.resolve.outputs.head_sha }}
+          INTEG_SANDBOX_REPO: ${{ vars.INTEG_SANDBOX_REPO }}
+          INTEG_PAT_SECRET_ID: ${{ vars.INTEG_PAT_SECRET_ID }}
         run: mise //cdk:integ
 
       # Safety net: integ-runner forces teardown on success and failure, but if
-      # the run is cancelled or crashes mid-deploy the stack can be stranded in
-      # the shared account. Delete it directly via CloudFormation so we never
+      # the run is cancelled or crashes mid-deploy a stack can be stranded in
+      # the shared account. Delete them directly via CloudFormation so we never
       # leak billable resources.
       #
-      # NOTE: `cdk destroy backgroundagent-integ` would NOT work here — it
-      # synthesizes the main app (src/main.ts), which does not contain the integ
-      # stack, so it exits 0 having deleted nothing. Target the stack by its
-      # literal CloudFormation name instead. delete-stack is idempotent (no-op if
+      # NOTE: `cdk destroy <stack>` would NOT work here — it synthesizes the
+      # main app (src/main.ts), which does not contain the integ stacks, so it
+      # exits 0 having deleted nothing. Target each stack by its literal
+      # CloudFormation name instead. delete-stack is idempotent (no-op if
       # already gone), so `|| true` only guards transient API errors.
-      - name: Ensure stack torn down
+      #
+      # Best-effort delete-stack safety net for crash/cancel cases. integ-runner
+      # already runs its own destroy (and tolerates the expected ENI DELETE_FAILED
+      # via expectError); this only catches a run that died BEFORE integ-runner's
+      # own teardown (e.g. the job was cancelled mid-deploy).
+      #
+      # Stacks swept: backgroundagent-integ (Phase-0 smoke, fixed name) and the
+      # Phase-1 per-run stack `int-<short-sha>` (matches the name computed in
+      # cdk/test/integ/integ.task-lifecycle.ts from the same head SHA).
+      #
+      # IMPORTANT — this step does NOT retry-until-deleted and does NOT fail the
+      # job on a stranded stack. The AgentCore Runtime's service-managed
+      # `agentic_ai` ENIs are released asynchronously by AWS (minutes to hours),
+      # so an immediate delete reliably hits DELETE_FAILED on the subnets/SG/VPC.
+      # Because the stack name is now per-commit-UNIQUE, a stranded `int-<sha>`
+      # stack never blocks a future run, so we leave it for the out-of-band
+      # ephemeral sweeper (.github/workflows/integ-sweeper.yml) to reclaim once the
+      # ENIs detach — that sweeper FAILS LOUDLY + files an issue for any int-*
+      # stack still stuck past its alarm age, so leaks surface rather than
+      # accumulate. Here we just fire one delete to start the teardown and move on.
+      - name: Ensure stacks torn down (best effort)
         if: always()
         env:
           AWS_REGION: ${{ vars.AWS_REGION || 'us-east-1' }}
           AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'us-east-1' }}
+          HEAD_SHA: ${{ needs.resolve.outputs.head_sha }}
+        run: |
+          set -uo pipefail
+          INT_STACK="int-$(printf '%s' "$HEAD_SHA" | cut -c1-8)"
+          for stack in backgroundagent-integ "$INT_STACK"; do
+            echo "Best-effort delete-stack: $stack"
+            aws cloudformation delete-stack --stack-name "$stack" || true
+          done
+          echo "Initiated teardown; stranded int-* stacks (if any) are reclaimed by the ephemeral sweeper once their ENIs detach."
+
+      # Sandbox cleanup for the gate scenarios (3 & 4): coding/new-task-v1 pushes
+      # a `bgagent/<task_id>/<slug>` branch and (on approve) opens a PR on the
+      # sandbox repo. The agent never closes these, so each run would accumulate
+      # stale branches/PRs. Reconstructing the exact branch name in the test is
+      # fragile (it depends on the agent-side slug), so we sweep by prefix here:
+      # delete every `bgagent/*` branch on the sandbox, which also closes the
+      # associated PRs. Reads the same PAT the agent used, from the pre-seeded
+      # secret. Gated on the repo vars being set so this is a no-op until the
+      # sandbox + secret are provisioned. Never fails the job — best-effort.
+      - name: Clean up sandbox PRs/branches
+        if: always() && vars.INTEG_SANDBOX_REPO != '' && vars.INTEG_PAT_SECRET_ID != ''
+        env:
+          AWS_REGION: ${{ vars.AWS_REGION || 'us-east-1' }}
+          AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'us-east-1' }}
+          SANDBOX_REPO: ${{ vars.INTEG_SANDBOX_REPO }}
+          PAT_SECRET_ID: ${{ vars.INTEG_PAT_SECRET_ID }}
         run: |
           set -euo pipefail
-          aws cloudformation delete-stack --stack-name backgroundagent-integ || true
-          # No `|| true` on the wait: a DELETE_FAILED must surface loudly so we
-          # never silently leak billable resources in the shared account.
-          aws cloudformation wait stack-delete-complete --stack-name backgroundagent-integ
+          GH_TOKEN="$(aws secretsmanager get-secret-value \
+            --secret-id "$PAT_SECRET_ID" \
+            --query SecretString --output text)"
+          export GH_TOKEN
+          # List bgagent/* branch refs; delete each (deleting the branch closes
+          # any open PR from it). Best-effort: never fail the job on cleanup.
+          gh api "repos/${SANDBOX_REPO}/git/matching-refs/heads/bgagent/" \
+            --jq '.[].ref | sub("^refs/heads/"; "")' 2>/dev/null \
+          | while read -r branch; do
+              [ -n "$branch" ] || continue
+              echo "Deleting sandbox branch: $branch"
+              gh api -X DELETE "repos/${SANDBOX_REPO}/git/refs/heads/${branch}" || true
+            done || true
 
   # Post the final integ-smoke status back to the PR head so the check flips from
   # pending to success/failure. Skipped for workflow_dispatch (no PR to gate).
diff --git a/cdk/mise.toml b/cdk/mise.toml
index 60332012..990db7fb 100644
--- a/cdk/mise.toml
+++ b/cdk/mise.toml
@@ -63,10 +63,26 @@ description = "CDK deploy-then-verify integration tests (integ-runner). Needs AW
 depends = [":compile"]
 run = [
   "mkdir -p $TMPDIR",
+  # Per-run unique stack naming: the lifecycle test names its stack `int-<hash>`
+  # from the COMMIT_HASH env var (read directly via process.env in the test —
+  # integ-runner synths in a subprocess that inherits the env but not our shell's
+  # CDK context). A stranded stack (the AgentCore ENI teardown race) then never
+  # blocks the next run. Source: COMMIT_HASH (set by CI from the resolved head
+  # SHA), falling back to the local git SHA, then "local" outside a checkout.
+  #
   # No --update-on-failed: .snapshot/ is gitignored, so there is no committed
   # snapshot to diff against or update. --force re-runs the deploy-then-verify
   # unconditionally, which is what we want in CI.
-  "npx integ-runner --language typescript --directory test/integ --force",
+  #
+  # --verbose: integ-runner otherwise prints only a one-line pass/fail per test,
+  # which hides WHICH assertion failed and its actual-vs-expected payload. The
+  # lifecycle test polls DynamoDB for terminal task status; without --verbose a
+  # failure (e.g. task stuck at SUBMITTED instead of COMPLETED) is undiagnosable
+  # from the log alone. Verbose surfaces the assertion diffs we need.
+  '''
+  export COMMIT_HASH="${COMMIT_HASH:-$(git rev-parse HEAD 2>/dev/null || echo local)}"
+  npx integ-runner --language typescript --directory test/integ --force --verbose
+  ''',
 ]
 
 [tasks.bundle]
diff --git a/cdk/test/integ/integ.task-lifecycle.ts b/cdk/test/integ/integ.task-lifecycle.ts
new file mode 100644
index 00000000..bee378d3
--- /dev/null
+++ b/cdk/test/integ/integ.task-lifecycle.ts
@@ -0,0 +1,552 @@
+/**
+ *  MIT No Attribution
+ *
+ *  Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy of
+ *  the Software without restriction, including without limitation the rights to
+ *  use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+ *  the Software, and to permit persons to whom the Software is furnished to do so.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ */
+
+/*
+ * Phase-1 deploy-then-verify lifecycle test for issue #317.
+ *
+ * Where Phase 0 (integ.task-api-smoke.ts) deployed a TRIMMED stack and asserted
+ * a task merely persists at SUBMITTED, Phase 1 deploys the REAL, full AgentStack
+ * (orchestrator + AgentCore runtime/memory + agent container) and drives a live
+ * agent through its lifecycle, asserting the four terminal paths from the Cedar
+ * HITL E2E matrix (docs/design/CEDAR_HITL_GATES.md §15.3):
+ *
+ *   1. submit -> run -> COMPLETED                       (repo-less default/agent-v1)
+ *   2. submit -> run -> FAILED                          (coding/new-task-v1, bad repo)
+ *   3. submit -> run -> AWAITING_APPROVAL -> approve    (write_env_files soft-deny gate)
+ *   4. submit -> run -> AWAITING_APPROVAL -> deny       (write_env_files soft-deny gate)
+ *
+ * This is environment-agnostic: it deploys to whatever account/region the
+ * caller's AWS credentials resolve to (CI assumes the integ role; local runs use
+ * your own creds). It should run in a DEDICATED integ account with no
+ * backgroundagent-dev/main stack, so the AgentCore account-unique runtime/memory
+ * names don't collide. We deploy the committed AgentStack unchanged: it leaves
+ * runtimeName/memoryName UNSET and CDK auto-generates names scoped to the
+ * per-run stack name (int-<commit-hash>, see below), guaranteeing uniqueness.
+ * (A local developer's uncommitted agent.ts name pin must be stashed before a
+ * local `mise //cdk:integ`, or it would collide.)
+ *
+ * Determinism: there is no mock/scripted agent mode — every scenario runs the
+ * real `claude` CLI against Bedrock. We bound cost and wall-clock with low
+ * max_turns and a max_budget_usd cap, and steer terminal states with simple,
+ * purpose-built task descriptions.
+ */
+
+import { randomBytes } from 'node:crypto';
+import { ExpectedResult, IntegTest } from '@aws-cdk/integ-tests-alpha';
+import { App, type CfnOutput, Duration } from 'aws-cdk-lib';
+import { TaskStatus } from '../../src/constructs/task-status';
+import { AgentStack } from '../../src/stacks/agent';
+
+// NOTE on assertion shape: every terminal/gate check below runs inside
+// `waitForAssertions` (a polling Step Functions waiter). Nested `Match.*`
+// matchers (objectLike / stringLikeRegexp) CANNOT be used there — the assertion
+// provider serializes the Match object's internals ({name, partial, pattern})
+// into the expected pattern, and the waiter then treats those as literal
+// required keys that never exist on the row, so the assertion fails forever even
+// when the data is correct (observed live: a COMPLETED task polled 25× and timed
+// out). Polled assertions therefore use ONLY flat, exact scalar values (the
+// `status`/decision string), which serialize cleanly. Asserting field PRESENCE
+// (task_id/user_id/timestamps/approval metadata, #317) needs a non-polled
+// getItem with assertAtPath — tracked as a follow-up on #317.
+
+const app = new App();
+
+// Per-run UNIQUE stack name: `int-<commit-hash>`. A fixed name is a trap for this
+// stack — the AgentCore Runtime injects service-managed `agentic_ai` ENIs that AWS
+// releases ASYNCHRONOUSLY, so `cdk destroy` reliably fails the subnet/SG/VPC
+// deletes (DependencyViolation) and strands the stack. With a fixed name that
+// stranded stack BLOCKS the next run (name conflict). A unique per-commit name
+// means a failed teardown never blocks a later run, and the out-of-band ephemeral
+// sweeper (.github/workflows/integ-sweeper.yml) reclaims `int-*` stacks once their
+// ENIs detach, alarming if any stays stuck past its age threshold.
+//
+// The hash comes from the COMMIT_HASH env var (set by CI from the resolved head
+// SHA; the mise //cdk:integ task falls back to the local git SHA). We read the
+// ENV directly rather than CDK context: integ-runner synthesizes the test app in
+// its own subprocess and does NOT forward CDK_CONTEXT_JSON / `-c` from our shell
+// to that synth, but the subprocess DOES inherit the environment — so the env var
+// reaches `process.env` here reliably where `tryGetContext` would not. Falls back
+// to 'local' outside CI/git. (Date.now()/random are avoided — they'd break integ
+// snapshot determinism; CI always supplies a real sha.)
+const commitHash = (process.env.COMMIT_HASH ?? '').slice(0, 8) || 'local';
+const stackName = `int-${commitHash}`;
+
+// The real, full production stack. Environment-agnostic on purpose (same
+// rationale as Phase 0): an explicit env would force the IntegTest DeployAssert
+// stack — always environment-agnostic — into cross-region references it cannot
+// resolve when reading this stack's outputs in the assertions below.
+//
+// DO NOT set runtimeName/memoryName here or pin them in agent.ts for this
+// deploy: the committed defaults auto-generate stack-name-scoped unique names,
+// so each `int-<hash>` stack gets its own non-colliding AgentCore names.
+const stack = new AgentStack(app, stackName, {
+  description: 'ABCA Phase-1 integ lifecycle stack (full AgentStack: orchestrator + agent runtime)',
+});
+
+// AgentStack exposes its API URL, Cognito IDs, and table names only as
+// CfnOutputs (its constructs are private consts). Read the output tokens by
+// construct id rather than adding public accessors to the production stack.
+// CfnOutput exposes a `value` getter that returns the underlying token.
+const output = (id: string): string => (stack.node.findChild(id) as CfnOutput).value;
+
+const apiUrl = output('ApiUrl');
+const userPoolId = output('UserPoolId');
+const appClientId = output('AppClientId');
+const taskTableName = output('TaskTableName');
+const taskApprovalsTableName = output('TaskApprovalsTableName');
+// The submit path enforces an onboarding gate: a repo must have an active row in
+// RepoTable or POST /tasks returns 422 REPO_NOT_ONBOARDED before clone/preflight.
+// The gate scenarios onboard SANDBOX_REPO here (a putItem assertion) rather than
+// adding a Blueprint construct to the production stack — test-side only.
+const repoTableName = output('RepoTableName');
+// AgentStack creates its OWN empty GitHubTokenSecret (agent.ts:181,
+// RemovalPolicy.DESTROY) — it does not reference an external one. The gate
+// scenarios populate it post-deploy from the pre-seeded secret below, which is
+// exactly the documented operator flow (docs/guides/QUICK_START.md §4: read the
+// GitHubTokenSecretArn output, put-secret-value the PAT into it). Automating
+// that copy here keeps us aligned with the design (no agent.ts change) and the
+// throwaway secret tears down with the stack.
+const githubTokenSecretArn = output('GitHubTokenSecretArn');
+
+// --- Gate-scenario configuration (scenarios 3 & 4) ----------------------------
+// These two constants are the ONLY out-of-band wiring the gate scenarios need.
+// They point at resources an operator provisions once in the integ account
+// (whichever account the run deploys to); scenarios 1 & 2 do NOT depend on them
+// and run regardless.
+//
+//   SANDBOX_REPO  — a throwaway GitHub repo (owner/name) with a committed
+//                   baseline (README + default branch). coding/new-task-v1
+//                   clones it, the agent attempts a `config.env` write that
+//                   trips the write_env_files soft-deny gate, and (on approve)
+//                   pushes a `bgagent/<task_id>/<slug>` branch + opens a PR. The
+//                   CI `always()` cleanup step deletes those branches each run.
+//                   The PAT below must have Contents+PR WRITE on this repo (a
+//                   read-only token clones fine but the agent's `git push` 403s).
+//   PRESEEDED_PAT_SECRET — name of a STABLE Secrets Manager secret in the integ
+//                   account holding a fine-grained PAT scoped to SANDBOX_REPO.
+//                   Resolved by NAME (not ARN) so it is account-agnostic; copied
+//                   into the stack-created GitHubTokenSecret by the token-seeding
+//                   assertion below.
+//
+// Sourced from CI repo vars (INTEG_SANDBOX_REPO / INTEG_PAT_SECRET_ID — the same
+// vars the integ.yml sandbox-cleanup step reads), so the gate scenarios bind to
+// whatever sandbox+secret the running account provisioned. There is deliberately
+// NO fallback literal: an account that hasn't provisioned a sandbox (e.g. upstream
+// aws-samples, or any fork) leaves both unset, and scenarios 3 & 4 SKIP with a
+// clear message (see the chain-assembly block at the bottom) rather than silently
+// routing the gate runs — which clone and push with a write-PAT — into one
+// contributor's personal repo. Set both vars to exercise the Cedar gates;
+// scenarios 1 & 2 always run regardless.
+const SANDBOX_REPO = process.env.INTEG_SANDBOX_REPO;
+const PRESEEDED_PAT_SECRET = process.env.INTEG_PAT_SECRET_ID;
+
+// Gate scenarios (3 & 4) require BOTH a sandbox repo and its pre-seeded PAT. When
+// either is unset, skip them (scenarios 1 & 2 still run). This keeps the test
+// account-agnostic: it never falls back to a hardcoded personal repo.
+const gatesEnabled = Boolean(SANDBOX_REPO && PRESEEDED_PAT_SECRET);
+if (!gatesEnabled) {
+  // eslint-disable-next-line no-console
+  console.warn(
+    '[integ.task-lifecycle] INTEG_SANDBOX_REPO / INTEG_PAT_SECRET_ID not set — ' +
+      'skipping Cedar gate scenarios 3 & 4 (approve/deny). Set both to exercise the gates.',
+  );
+}
+
+const integ = new IntegTest(app, 'TaskLifecycle', {
+  testCases: [stack],
+  // Disable the two-phase update workflow. By default integ-runner deploys the
+  // committed snapshot first, then re-deploys the current version to verify
+  // in-place updates don't break. The AgentCore Runtime takes several minutes to
+  // go CREATING -> READY and is partly immutable; the second deploy phase races
+  // the first (Runtime still CREATING) -> 409 "agent is currently being modified"
+  // -> integ-runner aborts mid-deploy and teardown strands a CREATING Runtime.
+  // We validate runtime BEHAVIOR, not stack-update safety, so a single clean
+  // deploy is correct here.
+  stackUpdateWorkflow: false,
+  // Force teardown on success and failure so a failed assertion never strands
+  // the (expensive) full stack in the shared E2E account.
+  //
+  // expectError on destroy: `cdk destroy` RELIABLY fails this stack — the
+  // AgentCore Runtime's service-managed `agentic_ai` ENIs are released
+  // asynchronously by AWS, so the subnet/SG/VPC deletes hit DependencyViolation
+  // ("has dependencies and cannot be deleted" / "has a dependent object") while
+  // the ENIs linger. Without expectError, integ-runner would mark the whole run
+  // FAILED on teardown alone — masking whether the ASSERTIONS passed. We tolerate
+  // the teardown failure (scoped to the dependency-violation message so unrelated
+  // teardown bugs still surface) and hand the stranded `int-<hash>` stack to the
+  // out-of-band ephemeral sweeper (.github/workflows/integ-sweeper.yml), which
+  // reclaims it once AWS detaches the ENIs and alarms if it stays stuck.
+  cdkCommandOptions: {
+    destroy: {
+      args: { force: true },
+      expectError: true,
+      expectedMessage: 'cannot be deleted|dependent object|DELETE_FAILED',
+    },
+  },
+});
+
+// --- Authentication (same pattern as Phase 0) ---------------------------------
+// A throwaway user the assertions authenticate as. The pool disables self-signup,
+// so create + confirm it administratively, then mint a token via USER_PASSWORD_AUTH.
+// The password is generated per-synth (no credential-shaped literal in source) and
+// satisfies the Cognito default policy by construction.
+const username = 'integ-lifecycle@example.com';
+const password = `Aa1!${randomBytes(18).toString('base64url')}`;
+
+// Service name MUST be the AWS SDK v2 form 'CognitoIdentityServiceProvider' — the
+// assertion provider maps only the v2 key to the real client package (see the
+// long note in integ.task-api-smoke.ts).
+const cognitoService = 'CognitoIdentityServiceProvider';
+
+const createUser = integ.assertions.awsApiCall(cognitoService, 'adminCreateUser', {
+  UserPoolId: userPoolId,
+  Username: username,
+  MessageAction: 'SUPPRESS',
+  TemporaryPassword: password,
+});
+
+const setPassword = integ.assertions.awsApiCall(cognitoService, 'adminSetUserPassword', {
+  UserPoolId: userPoolId,
+  Username: username,
+  Password: password,
+  Permanent: true,
+});
+
+const auth = integ.assertions.awsApiCall(cognitoService, 'initiateAuth', {
+  AuthFlow: 'USER_PASSWORD_AUTH',
+  ClientId: appClientId,
+  AuthParameters: { USERNAME: username, PASSWORD: password },
+});
+
+const idToken = auth.getAttString('AuthenticationResult.IdToken');
+
+// Conservative polling windows. Agent runs are real LLM sessions over a freshly
+// cold-started AgentCore runtime; the first invocation pays the cold-start tax.
+const TERMINAL_POLL = { totalTimeout: Duration.minutes(12), interval: Duration.seconds(30) };
+// The interim AWAITING_APPROVAL state appears mid-run, before terminal — poll it
+// on a shorter window so a stuck gate fails fast instead of burning the full
+// terminal budget waiting for a state that will never arrive.
+const GATE_POLL = { totalTimeout: Duration.minutes(8), interval: Duration.seconds(15) };
+
+// --- Scenario 1: COMPLETED (repo-less default/agent-v1) -----------------------
+// The default workflow is read-only (Read/Glob/Grep/WebFetch), requires no repo,
+// and delivers an artifact to S3. A trivial, self-contained instruction completes
+// in a single turn. No GitHub repo or token is involved.
+const submitComplete = integ.assertions.httpApiCall(`${apiUrl}tasks`, {
+  method: 'POST',
+  headers: {
+    'Content-Type': 'application/json',
+    'Authorization': idToken,
+  },
+  body: JSON.stringify({
+    workflow_ref: 'default/agent-v1',
+    // Keep this a plain, benign natural-language request. An earlier terse,
+    // imperative phrasing ("Reply with exactly the single word: done. Do not
+    // use any tools.") tripped the Bedrock content-policy guardrail at submit
+    // (400 VALIDATION_ERROR "Task description was blocked by content policy").
+    task_description: 'Please write a one-sentence summary explaining what a pull request is in software development.',
+    max_turns: 2,
+    max_budget_usd: 0.5,
+  }),
+});
+
+// Poll the task row until it reaches COMPLETED. No getAttString is read off this
+// call, so flattenResponse stays false and the nested objectLike expect works.
+const pollComplete = integ.assertions.awsApiCall('DynamoDB', 'getItem', {
+  TableName: taskTableName,
+  Key: { task_id: { S: submitComplete.getAttString('body.data.task_id') } },
+});
+pollComplete
+  .expect(ExpectedResult.objectLike({ Item: { status: { S: TaskStatus.COMPLETED } } }))
+  .waitForAssertions(TERMINAL_POLL);
+
+// --- Scenario 2: FAILED (coding/new-task-v1, onboarded repo, clone fails) ------
+// The submit path runs the onboarding gate (RepoTable) BEFORE clone/preflight,
+// so an un-onboarded repo is rejected at submit (422 REPO_NOT_ONBOARDED) and the
+// task never reaches a terminal FAILED. To exercise the terminal-error path we
+// must therefore ONBOARD the repo first, then make CLONE fail: the onboarding
+// gate only checks RepoTable, not GitHub, so we onboard a repo slug that does
+// not exist on GitHub. Submit then passes admission, preflight/clone 404s, and
+// the orchestrator writes terminal FAILED + error_message — no agent turn, no
+// runtime spin-up. (onboardFailRepo is sequenced before this submit.)
+const failRepo = `abca-integ-nonexistent/does-not-exist-${randomBytes(6).toString('hex')}`;
+const onboardFailRepo = integ.assertions.awsApiCall('DynamoDB', 'putItem', {
+  TableName: repoTableName,
+  Item: {
+    repo: { S: failRepo },
+    status: { S: 'active' },
+    onboarded_at: { S: '2026-01-01T00:00:00.000Z' },
+    updated_at: { S: '2026-01-01T00:00:00.000Z' },
+  },
+});
+
+const submitFail = integ.assertions.httpApiCall(`${apiUrl}tasks`, {
+  method: 'POST',
+  headers: {
+    'Content-Type': 'application/json',
+    'Authorization': idToken,
+  },
+  body: JSON.stringify({
+    workflow_ref: 'coding/new-task-v1',
+    repo: failRepo,
+    task_description: 'This task targets a nonexistent repo and must fail at clone/preflight.',
+    max_turns: 1,
+    max_budget_usd: 0.5,
+  }),
+});
+
+const pollFail = integ.assertions.awsApiCall('DynamoDB', 'getItem', {
+  TableName: taskTableName,
+  Key: { task_id: { S: submitFail.getAttString('body.data.task_id') } },
+});
+pollFail
+  .expect(ExpectedResult.objectLike({ Item: { status: { S: TaskStatus.FAILED } } }))
+  .waitForAssertions(TERMINAL_POLL);
+
+// --- Execution order (scenarios 1 & 2) ----------------------------------------
+// Auth first, then SEED THE GITHUB TOKEN BEFORE ANY SUBMIT. This ordering is
+// load-bearing: the orchestrator's resolveGitHubToken caches the secret value
+// for 5 min keyed by ARN (context-hydration.ts). Any coding-workflow task that
+// runs GitHub preflight reads + caches the token. Scenario 2 (coding/new-task-v1)
+// runs preflight too — so if it ran BEFORE the seed, it would cache the stack's
+// INITIAL EMPTY secret and every later gate task would reuse that empty token →
+// preflight 401 GITHUB_UNREACHABLE → FAILED before ever reaching the gate
+// (observed live). Seeding right after auth means the secret is populated before
+// the first token read, so no empty value is ever cached. This is exactly the
+// documented operator flow (QUICK_START §4: populate the secret before submitting
+// tasks) — no agent.ts change. The seed only happens when the gates are enabled
+// (it is sourced from the pre-seeded PAT secret); scenario 2 targets a
+// nonexistent repo and fails at clone regardless of token, so it is unaffected.
+//
+// Onboarding: scenario 2's repo and the sandbox both need a RepoTable row before
+// submit (else 422 REPO_NOT_ONBOARDED), so both onboard steps precede their
+// submits. Gate approve/deny run sequentially since each POST needs the
+// request_id read from the parked task's approval row.
+let chain = createUser
+  .next(setPassword)
+  .next(auth)
+  .next(onboardFailRepo)
+  .next(submitComplete)
+  .next(submitFail)
+  .next(pollComplete)
+  .next(pollFail);
+
+// --- Scenarios 3 & 4 (Cedar gates) — only when a sandbox is configured --------
+// Every assertion call below is CONSTRUCTED only inside this block, so when the
+// gates are disabled nothing is registered with the integ provider and the run
+// reduces cleanly to scenarios 1 & 2 (no skipped/failing gate steps, no PAT seed
+// into the stack secret, no clone of a personal repo).
+if (gatesEnabled) {
+  // Narrow the env-sourced config to non-null for this block.
+  const sandboxRepo = SANDBOX_REPO as string;
+  const patSecretId = PRESEEDED_PAT_SECRET as string;
+
+  // Re-mint a FRESH token right before each approve/deny POST. The Cognito app
+  // client uses the default 60-min ID-token validity (task-api.ts sets no
+  // idTokenValidity), but the strictly-serial .next() chain reaches the gate POSTs
+  // only after ~32 min (approve) / ~48 min (deny) of polling budget PLUS real agent
+  // cold-start + runtime — the live run took ~54 min. Reusing the original token
+  // would risk a 401 (expired) → the decision never records → false timeout keyed
+  // to agent latency. These re-auths run just before their POSTs in the chain, so
+  // each token is minted minutes (not ~50 min) before use. The user/password are
+  // permanent (adminSetUserPassword above), so re-auth needs no new setup.
+  const reAuthApprove = integ.assertions.awsApiCall(cognitoService, 'initiateAuth', {
+    AuthFlow: 'USER_PASSWORD_AUTH',
+    ClientId: appClientId,
+    AuthParameters: { USERNAME: username, PASSWORD: password },
+  });
+  const approveToken = reAuthApprove.getAttString('AuthenticationResult.IdToken');
+
+  const reAuthDeny = integ.assertions.awsApiCall(cognitoService, 'initiateAuth', {
+    AuthFlow: 'USER_PASSWORD_AUTH',
+    ClientId: appClientId,
+    AuthParameters: { USERNAME: username, PASSWORD: password },
+  });
+  const denyToken = reAuthDeny.getAttString('AuthenticationResult.IdToken');
+
+  // --- Token seeding (prerequisite for gate scenarios) ------------------------
+  // Copy the pre-seeded PAT into the stack-created GitHubTokenSecret so the agent
+  // runtime can clone the sandbox and push a branch. This automates the documented
+  // operator step (QUICK_START.md §4). No getAttString is read off seedPut, and the
+  // SecretString token is consumed inline by seedPut, never asserted on.
+  const seedGet = integ.assertions.awsApiCall('SecretsManager', 'getSecretValue', {
+    SecretId: patSecretId,
+  });
+
+  const seedPut = integ.assertions.awsApiCall('SecretsManager', 'putSecretValue', {
+    SecretId: githubTokenSecretArn,
+    SecretString: seedGet.getAttString('SecretString'),
+  });
+
+  // Onboard the sandbox so the gate submits pass the onboarding gate (otherwise
+  // 422 REPO_NOT_ONBOARDED at submit, before the agent ever runs). A minimal active
+  // row is enough — the agent reads the GitHub token from the platform-default
+  // GitHubTokenSecret we seeded above, so the blueprint needs no per-repo token.
+  const onboardSandbox = integ.assertions.awsApiCall('DynamoDB', 'putItem', {
+    TableName: repoTableName,
+    Item: {
+      repo: { S: sandboxRepo },
+      status: { S: 'active' },
+      onboarded_at: { S: '2026-01-01T00:00:00.000Z' },
+      updated_at: { S: '2026-01-01T00:00:00.000Z' },
+    },
+  });
+
+  // --- Scenario 3: AWAITING_APPROVAL -> approve -------------------------------
+  // coding/new-task-v1 against the sandbox. The task asks the agent to write a
+  // `config.env` file, which the Write tool routes through the write_env_files
+  // soft-deny rule (agent/policies/soft_deny.cedar) -> the task parks at
+  // AWAITING_APPROVAL with a PENDING approval row. We approve it, then assert the
+  // row flips to APPROVED. (Post-approval the agent may COMPLETE or FAIL — both
+  // terminal — so the deterministic assertion is the recorded decision, not a
+  // specific terminal status.)
+  const submitApprove = integ.assertions.httpApiCall(`${apiUrl}tasks`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'Authorization': idToken,
+    },
+    body: JSON.stringify({
+      workflow_ref: 'coding/new-task-v1',
+      repo: sandboxRepo,
+      task_description: 'Create a file named config.env at the repo root with the single line FOO=bar, then commit it.',
+      max_turns: 6,
+      max_budget_usd: 0.5,
+    }),
+  });
+  const approveTaskId = submitApprove.getAttString('body.data.task_id');
+
+  // Wait for the gate to open (interim AWAITING_APPROVAL).
+  const pollGateApprove = integ.assertions.awsApiCall('DynamoDB', 'getItem', {
+    TableName: taskTableName,
+    Key: { task_id: { S: approveTaskId } },
+  });
+  pollGateApprove
+    .expect(ExpectedResult.objectLike({ Item: { status: { S: TaskStatus.AWAITING_APPROVAL } } }))
+    .waitForAssertions(GATE_POLL);
+
+  // Read the PENDING approval row's request_id (SK). Querying by task_id (PK) is
+  // required because we do not know the agent-minted request_id. The status=PENDING
+  // FilterExpression makes Items[0] deterministic: a task could trip the gate more
+  // than once (or carry already-decided rows), and an unfiltered query orders only
+  // by SK, so without the filter Items[0] could be the wrong/decided row and the
+  // POST would target the wrong request_id. getAttString here flips this call to a
+  // flattened response, so we do NOT .expect() on it.
+  const queryApprove = integ.assertions.awsApiCall('DynamoDB', 'query', {
+    TableName: taskApprovalsTableName,
+    KeyConditionExpression: 'task_id = :tid',
+    FilterExpression: '#st = :pending',
+    ExpressionAttributeNames: { '#st': 'status' },
+    ExpressionAttributeValues: { ':tid': { S: approveTaskId }, ':pending': { S: 'PENDING' } },
+  });
+  const approveRequestId = queryApprove.getAttString('Items.0.request_id.S');
+
+  const approve = integ.assertions.httpApiCall(`${apiUrl}tasks/${approveTaskId}/approve`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      // Fresh token (see reAuthApprove) — the original idToken may be expired by now.
+      'Authorization': approveToken,
+    },
+    body: JSON.stringify({ request_id: approveRequestId, decision: 'approve', scope: 'this_call' }),
+  });
+
+  // Assert the decision was recorded on the approval row. Now that request_id is
+  // known we read the exact row by its full key.
+  const pollApproveDecision = integ.assertions.awsApiCall('DynamoDB', 'getItem', {
+    TableName: taskApprovalsTableName,
+    Key: { task_id: { S: approveTaskId }, request_id: { S: approveRequestId } },
+  });
+  pollApproveDecision
+    .expect(ExpectedResult.objectLike({ Item: { status: { S: 'APPROVED' } } }))
+    .waitForAssertions(GATE_POLL);
+
+  // --- Scenario 4: AWAITING_APPROVAL -> deny ----------------------------------
+  // Identical trigger to scenario 3; we deny instead and assert the row flips to
+  // DENIED.
+  const submitDeny = integ.assertions.httpApiCall(`${apiUrl}tasks`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'Authorization': idToken,
+    },
+    body: JSON.stringify({
+      workflow_ref: 'coding/new-task-v1',
+      repo: sandboxRepo,
+      task_description: 'Create a file named config.env at the repo root with the single line FOO=bar, then commit it.',
+      max_turns: 6,
+      max_budget_usd: 0.5,
+    }),
+  });
+  const denyTaskId = submitDeny.getAttString('body.data.task_id');
+
+  const pollGateDeny = integ.assertions.awsApiCall('DynamoDB', 'getItem', {
+    TableName: taskTableName,
+    Key: { task_id: { S: denyTaskId } },
+  });
+  pollGateDeny
+    .expect(ExpectedResult.objectLike({ Item: { status: { S: TaskStatus.AWAITING_APPROVAL } } }))
+    .waitForAssertions(GATE_POLL);
+
+  const queryDeny = integ.assertions.awsApiCall('DynamoDB', 'query', {
+    TableName: taskApprovalsTableName,
+    KeyConditionExpression: 'task_id = :tid',
+    FilterExpression: '#st = :pending',
+    ExpressionAttributeNames: { '#st': 'status' },
+    ExpressionAttributeValues: { ':tid': { S: denyTaskId }, ':pending': { S: 'PENDING' } },
+  });
+  const denyRequestId = queryDeny.getAttString('Items.0.request_id.S');
+
+  const deny = integ.assertions.httpApiCall(`${apiUrl}tasks/${denyTaskId}/deny`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      // Fresh token (see reAuthDeny) — the original idToken may be expired by now.
+      'Authorization': denyToken,
+    },
+    body: JSON.stringify({ request_id: denyRequestId, decision: 'deny', reason: 'integ: exercising the deny path' }),
+  });
+
+  const pollDenyDecision = integ.assertions.awsApiCall('DynamoDB', 'getItem', {
+    TableName: taskApprovalsTableName,
+    Key: { task_id: { S: denyTaskId }, request_id: { S: denyRequestId } },
+  });
+  pollDenyDecision
+    .expect(ExpectedResult.objectLike({ Item: { status: { S: 'DENIED' } } }))
+    .waitForAssertions(GATE_POLL);
+
+  // Splice the gate steps into the chain. seedPut/onboardSandbox precede the gate
+  // submits (token + onboarding must exist first); approve/deny run sequentially.
+  chain = chain
+    .next(seedGet)
+    .next(seedPut)
+    .next(onboardSandbox)
+    .next(submitApprove)
+    .next(submitDeny)
+    .next(pollGateApprove)
+    .next(queryApprove)
+    .next(reAuthApprove)
+    .next(approve)
+    .next(pollApproveDecision)
+    .next(pollGateDeny)
+    .next(queryDeny)
+    .next(reAuthDeny)
+    .next(deny)
+    .next(pollDenyDecision);
+}
diff --git a/docs/guides/ROADMAP.md b/docs/guides/ROADMAP.md
index 3e8aa438..1c9b392d 100644
--- a/docs/guides/ROADMAP.md
+++ b/docs/guides/ROADMAP.md
@@ -225,7 +225,7 @@ Planned capabilities, grouped by theme. Items are independent and may ship in an
 
 | Capability | Description |
 |------------|-------------|
-| **Deployed runtime E2E verification** | **Phase 0 landed:** `@aws-cdk/integ-tests-alpha` + `integ-runner` deploy a trimmed Task API stack to a real account, assert the create-and-persist happy path (task persists at `SUBMITTED`), then tear it down (`mise //cdk:integ`). In CI it runs per-PR via `workflow_run` when the diff touches `cdk/**` or `agent/**`, behind the `integ` environment's admin-approval gate, and posts a required `integ-smoke` status that blocks merge (`workflow_dispatch` retained for manual runs). Phase 1 (full lifecycle / real agent runs) and Phase 2 (channels) follow. See [ADR-013](../decisions/ADR-013-tiered-validation-pyramid.md). |
+| **Deployed runtime E2E verification** | **Phase 0 landed:** `@aws-cdk/integ-tests-alpha` + `integ-runner` deploy a trimmed Task API stack to a real account, assert the create-and-persist happy path (task persists at `SUBMITTED`), then tear it down (`mise //cdk:integ`). In CI it runs per-PR via `workflow_run` when the diff touches `cdk/**` or `agent/**`, behind the `integ` environment's admin-approval gate, and posts a required `integ-smoke` status that blocks merge (`workflow_dispatch` retained for manual runs). **Phase 1 landed ([#317](https://github.com/aws-samples/sample-autonomous-cloud-coding-agents/issues/317)):** a second test (`integ.task-lifecycle.ts`) deploys the *full* `AgentStack` (orchestrator + AgentCore runtime + agent container) to the dedicated E2E account and drives a real agent through the four terminal paths from the Cedar HITL matrix — `COMPLETED`, `FAILED`, and `AWAITING_APPROVAL` → approve/deny — capping cost with low `max_turns` + `max_budget_usd`. Phase 2 (channels) follows. See [ADR-013](../decisions/ADR-013-tiered-validation-pyramid.md). |
 | **Admission backlog observability** | Metric and alarm when `SUBMITTED` task depth exceeds an operator threshold (capacity and admission health). |
 | **Admission queue with deferred pickup** | When admission is at capacity, persist tasks in a durable queue instead of failing them. Automatically re-attempt admission and continue processing in FIFO order (with optional priority lanes) as concurrency becomes available. Preserve cancel/idempotency semantics and expose queue position/ETA in task status. |
 | **Safe orchestrator deploys** | Pre-deploy checks for active tasks (drain or warn); blue-green or canary Lambda deploy for the durable orchestrator with rollback on error regressions (`OBSERVABILITY.md`). |
diff --git a/docs/src/content/docs/roadmap/Roadmap.md b/docs/src/content/docs/roadmap/Roadmap.md
index 1d54f545..e43b43e8 100644
--- a/docs/src/content/docs/roadmap/Roadmap.md
+++ b/docs/src/content/docs/roadmap/Roadmap.md
@@ -229,7 +229,7 @@ Planned capabilities, grouped by theme. Items are independent and may ship in an
 
 | Capability | Description |
 |------------|-------------|
-| **Deployed runtime E2E verification** | **Phase 0 landed:** `@aws-cdk/integ-tests-alpha` + `integ-runner` deploy a trimmed Task API stack to a real account, assert the create-and-persist happy path (task persists at `SUBMITTED`), then tear it down (`mise //cdk:integ`). In CI it runs per-PR via `workflow_run` when the diff touches `cdk/**` or `agent/**`, behind the `integ` environment's admin-approval gate, and posts a required `integ-smoke` status that blocks merge (`workflow_dispatch` retained for manual runs). Phase 1 (full lifecycle / real agent runs) and Phase 2 (channels) follow. See [ADR-013](/architecture/adr-013-tiered-validation-pyramid). |
+| **Deployed runtime E2E verification** | **Phase 0 landed:** `@aws-cdk/integ-tests-alpha` + `integ-runner` deploy a trimmed Task API stack to a real account, assert the create-and-persist happy path (task persists at `SUBMITTED`), then tear it down (`mise //cdk:integ`). In CI it runs per-PR via `workflow_run` when the diff touches `cdk/**` or `agent/**`, behind the `integ` environment's admin-approval gate, and posts a required `integ-smoke` status that blocks merge (`workflow_dispatch` retained for manual runs). **Phase 1 landed ([#317](https://github.com/aws-samples/sample-autonomous-cloud-coding-agents/issues/317)):** a second test (`integ.task-lifecycle.ts`) deploys the *full* `AgentStack` (orchestrator + AgentCore runtime + agent container) to the dedicated E2E account and drives a real agent through the four terminal paths from the Cedar HITL matrix — `COMPLETED`, `FAILED`, and `AWAITING_APPROVAL` → approve/deny — capping cost with low `max_turns` + `max_budget_usd`. Phase 2 (channels) follows. See [ADR-013](/architecture/adr-013-tiered-validation-pyramid). |
 | **Admission backlog observability** | Metric and alarm when `SUBMITTED` task depth exceeds an operator threshold (capacity and admission health). |
 | **Admission queue with deferred pickup** | When admission is at capacity, persist tasks in a durable queue instead of failing them. Automatically re-attempt admission and continue processing in FIFO order (with optional priority lanes) as concurrency becomes available. Preserve cancel/idempotency semantics and expose queue position/ETA in task status. |
 | **Safe orchestrator deploys** | Pre-deploy checks for active tasks (drain or warn); blue-green or canary Lambda deploy for the durable orchestrator with rollback on error regressions (`OBSERVABILITY.md`). |