From 139b47a0698ec486bcf8474c4fd02f52131140bc Mon Sep 17 00:00:00 2001
From: Manas Srivastava <mastermanas805@gmail.com>
Date: Sat, 6 Jun 2026 10:14:15 +0530
Subject: [PATCH] test(ci): deploy-failure auto-debug path + anon-stack gap +
 with_failed_deploy factory (#70)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Failure-diagnosis CI integration tests for the deploy-failure AUTO-DEBUG
PATH (docs/ci/02-FAILURE-DIAGNOSIS-AND-AUTODEBUG.md §5), api side.

1. Auto-debug PATH integration test (deploy_autodebug_path_test.go):
   seeds a status=failed deployment + an older lifecycle row + a
   failure_autopsy deployment_events row (reason/exit_code/last_lines/
   hint) against a real test DB, then asserts the full agent debug loop
   as ONE coherent contract:
     - GET /api/v1/deployments/:id  → status=failed + non-empty
       error_message (the one-line cause)
     - GET /api/v1/deployments/:id/events → autopsy with reason +
       non-empty last_lines + hint, newest-first, count correct
     - auth-negative: no/invalid bearer → 401
     - cross-team: another team's token → 404 (no existence leak)

2. Anonymous-stack failure-diagnosis contract (stack_anon_failure_diag_
   test.go): drives an anon stack (NULL team_id) to failed, asserts
   GET /stacks/:slug (slug-bearer, no auth) returns status=failed and
   the raw build error is persisted on the service row, then PINS the
   documented gap by enumerating the LIVE router (router.New +
   GetRoutes) and asserting NO /stacks/:slug/events route exists. Adding
   a stack-autopsy endpoint later reds this test deliberately. Anon
   failure-diagnosis is status + logs only (no classified autopsy).

3. with_failed_deploy factory flag (internal_e2e_account.go): cohort-
   only, inert-by-default pre-seed of ONE failed deployment + ONE
   failure_autopsy event via the production deploy models
   (CreateDeployment → UpdateDeploymentStatus → UpsertDeploymentAutopsy),
   surfaced as failed_deploy_id, reaped with the team. Lets the web wave
   load /app/deployments/:id and render the FailureAutopsyPanel against a
   real backend. Tests: seeds exactly one failed deploy + one autopsy
   with the factory payload; omitting seeds none; seam-driven seed_failed
   503; whitebox sqlmock coverage of all three seed error branches.

The producer↔consumer schema parity (worker autopsy write ↔ api /events
read) is asserted in the worker PR's deploy_failure_autopsy_schema_
parity_test.go (cross-referenced).

make gate: green except pre-existing local-only flakes outside this diff
(internal/models/TestLinkGitHubID DB-pollution, handlers
TestQueue_CredIssueError NATS flake). CI (fresh DB, Go 1.25) is
authoritative. New tests + donebar/manner-matrix/error-envelope guards
all pass.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../handlers/deploy_autodebug_path_test.go    | 243 ++++++++++++++++++
 internal/handlers/internal_e2e_account.go     | 148 ++++++++++-
 .../internal_e2e_account_export_test.go       |  23 ++
 ...internal_e2e_account_failed_deploy_test.go | 155 +++++++++++
 ...internal_e2e_account_seed_whitebox_test.go |  93 +++++++
 .../handlers/internal_e2e_account_test.go     |  19 +-
 .../handlers/stack_anon_failure_diag_test.go  | 195 ++++++++++++++
 7 files changed, 859 insertions(+), 17 deletions(-)
 create mode 100644 internal/handlers/deploy_autodebug_path_test.go
 create mode 100644 internal/handlers/internal_e2e_account_failed_deploy_test.go
 create mode 100644 internal/handlers/stack_anon_failure_diag_test.go

diff --git a/internal/handlers/deploy_autodebug_path_test.go b/internal/handlers/deploy_autodebug_path_test.go
new file mode 100644
index 0000000..da7b3b3
--- /dev/null
+++ b/internal/handlers/deploy_autodebug_path_test.go
@@ -0,0 +1,243 @@
+package handlers_test
+
+// deploy_autodebug_path_test.go — the end-to-end AUTO-DEBUG PATH integration
+// test for a FAILED deployment (task #70, docs/ci/02-FAILURE-DIAGNOSIS-AND-
+// AUTODEBUG.md §5.1).
+//
+// The pieces of the failure-diagnosis surface are each unit/integration-tested
+// elsewhere (deploy_events_endpoint_test.go: ordering/empty/clamp/cross-team;
+// deploy_buildfailed_autopsy_test.go: the autopsy "failure" field on GET
+// /deploy/:id). This file asserts them as ONE coherent contract — the exact
+// loop an MCP agent or the dashboard FailureAutopsyPanel runs to diagnose a
+// failed deploy WITHOUT cluster access:
+//
+//   1. GET /api/v1/deployments/:id        → status="failed" + non-empty
+//                                            error_message (the one-line cause
+//                                            the worker autopsy stamped).
+//   2. GET /api/v1/deployments/:id/events → events[] carrying the
+//                                            failure_autopsy with reason +
+//                                            non-empty last_lines + hint, newest
+//                                            first, count correct.
+//   3. auth-negative: no / invalid bearer → 401 (the surface is gated).
+//   4. cross-team: another team's token   → 404 (you can NOT read another
+//                                            team's failure — never 403, no
+//                                            existence leak).
+//
+// This mirrors the seeding pattern in deploy_events_endpoint_test.go and
+// deploy_lifecycle_block_integration_test.go (real Postgres test DB via
+// testhelpers.SetupTestDB, the production RequireAuth chain via
+// NewTestAppWithServices), so the HTTP envelope, route resolution, JWT
+// middleware, and model SQL path are exercised end-to-end against the same SQL
+// the production handler issues. The producer side (worker autopsy) and this
+// consumer side (the /events + /:id read) are proven schema-compatible by the
+// worker's deploy_failure_autopsy_schema_parity_test.go.
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/google/uuid"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"instant.dev/internal/testhelpers"
+)
+
+// adbDeploymentEnvelope is the GET /api/v1/deployments/:id response shape (the
+// item.error one-liner is the agent's first debug read).
+type adbDeploymentEnvelope struct {
+	OK   bool `json:"ok"`
+	Item struct {
+		AppID  string `json:"app_id"`
+		Status string `json:"status"`
+		Error  string `json:"error"`
+	} `json:"item"`
+}
+
+// adbEventsEnvelope is the GET /api/v1/deployments/:id/events response shape.
+type adbEventsEnvelope struct {
+	OK           bool   `json:"ok"`
+	DeploymentID string `json:"deployment_id"`
+	Events       []struct {
+		Kind      string   `json:"kind"`
+		Reason    string   `json:"reason"`
+		ExitCode  *int     `json:"exit_code"`
+		Event     string   `json:"event"`
+		LastLines []string `json:"last_lines"`
+		Hint      string   `json:"hint"`
+		CreatedAt string   `json:"created_at"`
+	} `json:"events"`
+	Count int `json:"count"`
+}
+
+// TestDeployAutodebugPath_FailedDeploy_FullAgentLoop is the §5.1 contract:
+// status+error_message AND the events autopsy AND auth-negative AND cross-team,
+// asserted as one coherent debug-path test against a real test DB.
+func TestDeployAutodebugPath_FailedDeploy_FullAgentLoop(t *testing.T) {
+	db, cleanDB := testhelpers.SetupTestDB(t)
+	defer cleanDB()
+	rdb, cleanRedis := testhelpers.SetupTestRedis(t)
+	defer cleanRedis()
+
+	teamID := testhelpers.MustCreateTeamDB(t, db, "pro")
+	otherTeamID := testhelpers.MustCreateTeamDB(t, db, "pro")
+	ownerJWT := testhelpers.MustSignSessionJWT(t,
+		"aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", teamID, "adb-owner@example.com")
+	otherJWT := testhelpers.MustSignSessionJWT(t,
+		"bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", otherTeamID, "adb-other@example.com")
+
+	// Seed a FAILED deployment with the one-line error_message the worker
+	// autopsy stamps ("<reason>: <hint snippet>").
+	depID := uuid.New()
+	appID := "adb" + uuid.NewString()[:8]
+	const wantErrorMessage = "OOMKilled: Your app exceeded its memory limit and was killed by the kernel."
+	_, err := db.Exec(`
+		INSERT INTO deployments (id, team_id, app_id, port, tier, status, error_message)
+		VALUES ($1, $2, $3, 8080, 'pro', 'failed', $4)
+	`, depID, teamID, appID, wantErrorMessage)
+	require.NoError(t, err)
+
+	// Older lifecycle row + newer failure_autopsy row (the real autopsy shape).
+	_, err = db.Exec(`
+		INSERT INTO deployment_events
+			(deployment_id, kind, reason, exit_code, event, last_lines, hint, created_at)
+		VALUES ($1, 'lifecycle', 'image_pull_failed', NULL, 'ErrImagePull',
+			'["pulling image","ErrImagePull"]', 'check the image reference',
+			now() - interval '10 minutes')
+	`, depID)
+	require.NoError(t, err)
+
+	autopsyLastLines := []string{
+		"npm ERR! code ELIFECYCLE",
+		"FATAL: out of memory: Killed process 1 (node)",
+	}
+	_, err = db.Exec(`
+		INSERT INTO deployment_events
+			(deployment_id, kind, reason, exit_code, event, last_lines, hint, created_at)
+		VALUES ($1, 'failure_autopsy', 'OOMKilled', 137, 'OOMKilling: Memory cgroup out of memory',
+			'["npm ERR! code ELIFECYCLE","FATAL: out of memory: Killed process 1 (node)"]',
+			'Your app exceeded its memory limit and was killed by the kernel.',
+			now() - interval '1 minute')
+	`, depID)
+	require.NoError(t, err)
+
+	app, cleanApp := testhelpers.NewTestAppWithServices(t, db, rdb,
+		"postgres,redis,mongodb,queue,webhook,storage,deploy")
+	defer cleanApp()
+
+	// ── Step 1: GET /api/v1/deployments/:id → status=failed + error_message ──
+	t.Run("status_and_error_message", func(t *testing.T) {
+		req := httptest.NewRequest(http.MethodGet, "/api/v1/deployments/"+appID, nil)
+		req.Header.Set("Authorization", "Bearer "+ownerJWT)
+		req.Header.Set("X-Forwarded-For", "10.70.0.1")
+		resp, err := app.Test(req, 5000)
+		require.NoError(t, err)
+		defer resp.Body.Close()
+		require.Equal(t, http.StatusOK, resp.StatusCode)
+
+		var env adbDeploymentEnvelope
+		require.NoError(t, json.NewDecoder(resp.Body).Decode(&env))
+		assert.True(t, env.OK)
+		assert.Equal(t, appID, env.Item.AppID)
+		assert.Equal(t, "failed", env.Item.Status,
+			"the agent's first read must show the deploy is failed")
+		assert.NotEmpty(t, env.Item.Error,
+			"error_message must be non-empty — it is the one-line cause the agent acts on")
+		assert.Equal(t, wantErrorMessage, env.Item.Error)
+	})
+
+	// ── Step 2: GET /api/v1/deployments/:id/events → autopsy timeline ────────
+	t.Run("events_autopsy_timeline", func(t *testing.T) {
+		req := httptest.NewRequest(http.MethodGet, "/api/v1/deployments/"+appID+"/events", nil)
+		req.Header.Set("Authorization", "Bearer "+ownerJWT)
+		req.Header.Set("X-Forwarded-For", "10.70.0.2")
+		resp, err := app.Test(req, 5000)
+		require.NoError(t, err)
+		defer resp.Body.Close()
+		require.Equal(t, http.StatusOK, resp.StatusCode)
+
+		var env adbEventsEnvelope
+		require.NoError(t, json.NewDecoder(resp.Body).Decode(&env))
+		assert.True(t, env.OK)
+		assert.Equal(t, depID.String(), env.DeploymentID,
+			"deployment_id must echo the canonical UUID the agent can re-query")
+		assert.Equal(t, 2, env.Count)
+		require.Len(t, env.Events, 2)
+
+		// Newest first (DESC by created_at): the autopsy row leads.
+		autopsy := env.Events[0]
+		assert.Equal(t, "failure_autopsy", autopsy.Kind,
+			"the dedicated classified row is kind=failure_autopsy")
+		assert.Equal(t, "OOMKilled", autopsy.Reason,
+			"reason is the machine-readable classification the agent branches on")
+		require.NotNil(t, autopsy.ExitCode)
+		assert.Equal(t, 137, *autopsy.ExitCode)
+		assert.NotEmpty(t, autopsy.LastLines,
+			"last_lines (the real build/pod error tail) MUST be non-empty — "+
+				"it is the surface the agent reads to fix the Dockerfile/config")
+		assert.Equal(t, autopsyLastLines, autopsy.LastLines)
+		assert.NotEmpty(t, autopsy.Hint,
+			"hint is the plain-language remedy the agent acts on")
+		assert.Contains(t, autopsy.Hint, "memory")
+		assert.NotEmpty(t, autopsy.CreatedAt)
+
+		// Older row trails.
+		assert.Equal(t, "image_pull_failed", env.Events[1].Reason, "older row trails (DESC)")
+		assert.Equal(t, "lifecycle", env.Events[1].Kind)
+	})
+
+	// ── Step 3: auth-negative — the debug surface is gated ───────────────────
+	t.Run("auth_negative_401", func(t *testing.T) {
+		// No bearer.
+		reqNoAuth := httptest.NewRequest(http.MethodGet, "/api/v1/deployments/"+appID+"/events", nil)
+		reqNoAuth.Header.Set("X-Forwarded-For", "10.70.0.3")
+		respNoAuth, err := app.Test(reqNoAuth, 5000)
+		require.NoError(t, err)
+		defer respNoAuth.Body.Close()
+		assert.Equal(t, http.StatusUnauthorized, respNoAuth.StatusCode,
+			"no bearer → 401 (events surface is RequireAuth)")
+
+		// Garbage bearer.
+		reqBad := httptest.NewRequest(http.MethodGet, "/api/v1/deployments/"+appID+"/events", nil)
+		reqBad.Header.Set("Authorization", "Bearer not-a-valid-jwt")
+		reqBad.Header.Set("X-Forwarded-For", "10.70.0.4")
+		respBad, err := app.Test(reqBad, 5000)
+		require.NoError(t, err)
+		defer respBad.Body.Close()
+		assert.Equal(t, http.StatusUnauthorized, respBad.StatusCode,
+			"invalid bearer → 401")
+	})
+
+	// ── Step 4: cross-team — you can NOT read another team's failure ─────────
+	t.Run("cross_team_404", func(t *testing.T) {
+		// /:id (status) read.
+		reqGet := httptest.NewRequest(http.MethodGet, "/api/v1/deployments/"+appID, nil)
+		reqGet.Header.Set("Authorization", "Bearer "+otherJWT)
+		reqGet.Header.Set("X-Forwarded-For", "10.70.0.5")
+		respGet, err := app.Test(reqGet, 5000)
+		require.NoError(t, err)
+		defer respGet.Body.Close()
+		require.Equal(t, http.StatusNotFound, respGet.StatusCode,
+			"cross-team GET /:id must be 404, never 403 (no existence leak)")
+
+		// /events read.
+		reqEv := httptest.NewRequest(http.MethodGet, "/api/v1/deployments/"+appID+"/events", nil)
+		reqEv.Header.Set("Authorization", "Bearer "+otherJWT)
+		reqEv.Header.Set("X-Forwarded-For", "10.70.0.6")
+		respEv, err := app.Test(reqEv, 5000)
+		require.NoError(t, err)
+		defer respEv.Body.Close()
+		require.Equal(t, http.StatusNotFound, respEv.StatusCode,
+			"cross-team /events must be 404, never 403 (no existence leak)")
+
+		var envelope struct {
+			OK    bool   `json:"ok"`
+			Error string `json:"error"`
+		}
+		require.NoError(t, json.NewDecoder(respEv.Body).Decode(&envelope))
+		assert.False(t, envelope.OK)
+		assert.Equal(t, "not_found", envelope.Error)
+	})
+}
diff --git a/internal/handlers/internal_e2e_account.go b/internal/handlers/internal_e2e_account.go
index bb1e340..cd9afcc 100644
--- a/internal/handlers/internal_e2e_account.go
+++ b/internal/handlers/internal_e2e_account.go
@@ -153,6 +153,25 @@ type e2eCreateRequest struct {
 	// snapshot, exactly like a real provision under that tier, and are reaped
 	// with the team (team_id→NULL + marked-for-reaper) by ReapAccount.
 	WithResources bool `json:"with_resources"`
+
+	// WithFailedDeploy, when true, pre-seeds ONE deployment row in
+	// status='failed' (with a one-line error_message) plus ONE failure_autopsy
+	// deployment_events row (reason/exit_code/last_lines/hint) on the minted
+	// team — the EXACT shape the worker autopsy writes for a real build
+	// failure. This lets the web wave load /app/deployments/:id and render the
+	// FailureAutopsyPanel against a REAL backend (not a mock), and lets an
+	// agent journey exercise GET /api/v1/deployments/:id/events without first
+	// having to drive (and fail) a real Kaniko build.
+	//
+	// Cohort-only + inert by default (omitted → no deploy seeded). The seeded
+	// deployment is owned by the minted is_test_cohort team, so it is reaped
+	// with the team (DeleteTeamHard cascades deployments + deployment_events)
+	// by ReapAccount. No backend RPC, no k8s namespace — pure DB rows via the
+	// SAME models the production deploy path uses, so the seed is synchronous +
+	// sub-millisecond, safe inside the mint request. The seeded deployment's
+	// app_id is surfaced in the response as failed_deploy_id so the caller can
+	// navigate straight to /app/deployments/<failed_deploy_id>.
+	WithFailedDeploy bool `json:"with_failed_deploy"`
 }
 
 // e2eSeedResourceTypes is the closed set of FAST, row-only resource types the
@@ -162,6 +181,44 @@ type e2eCreateRequest struct {
 // automatically expands what the seed creates AND what the seed test asserts.
 var e2eSeedResourceTypes = []string{"webhook", "cache"}
 
+// e2eFailedDeploy* constants describe the single seeded failed deployment +
+// its failure_autopsy event. Named (not inline literals) so the seed payload
+// is a single source of truth the seed test asserts against — adding a field
+// here is a one-place change. The shape mirrors what the worker autopsy writes
+// for a real OOMKilled build failure so the web FailureAutopsyPanel renders the
+// same content it would for a genuine failure.
+const (
+	// e2eFailedDeployErrorMessage is the one-line cause stamped on
+	// deployments.error_message (the "<reason>: <hint snippet>" the worker
+	// autopsy writes; surfaced by GET /api/v1/deployments/:id as item.error).
+	e2eFailedDeployErrorMessage = "OOMKilled: Your app exceeded its memory limit and was killed by the kernel."
+
+	// e2eFailedDeployReason is the classified failure reason on the autopsy row
+	// (matches models.FailureReasonOOMKilled — a string literal here keeps this
+	// file free of an api-internal import the worker-mirror constants avoid).
+	e2eFailedDeployReason = "OOMKilled"
+
+	// e2eFailedDeployEvent is the k8s event text on the autopsy row.
+	e2eFailedDeployEvent = "OOMKilling: Memory cgroup out of memory: Killed process 1 (node)"
+
+	// e2eFailedDeployHint is the plain-language remedy on the autopsy row.
+	e2eFailedDeployHint = "Your app exceeded its memory limit and was killed by the kernel. " +
+		"Reduce memory usage or upgrade to a tier with a higher memory cap."
+
+	// e2eFailedDeployExitCode is the container exit code on the autopsy row.
+	e2eFailedDeployExitCode = 137
+)
+
+// e2eFailedDeployLastLines is the build-pod log tail on the autopsy row — the
+// real error output the FailureAutopsyPanel renders and an agent reads to fix
+// the failure. Non-empty so the panel's "diagnostics pending" empty-state is
+// NOT what the web test sees.
+var e2eFailedDeployLastLines = []string{
+	"npm ERR! code ELIFECYCLE",
+	"<--- Last few GCs --->",
+	"FATAL ERROR: Reached heap limit Allocation failed - JavaScript heap out of memory",
+}
+
 // authorize runs the X-E2E-Token guard. It returns true iff the token is
 // configured AND the header matches in constant time. On any failure it has
 // ALREADY written the 404 response and bumped the unauthorized metric — the
@@ -301,6 +358,23 @@ func (h *E2EAccountHandler) CreateAccount(c *fiber.Ctx) error {
 		seededTokens = toks
 	}
 
+	// 3c. Optionally pre-seed ONE failed deployment + its failure_autopsy event
+	//     so the web wave can render the FailureAutopsyPanel against a real
+	//     backend and an agent journey can exercise GET /deployments/:id/events.
+	//     Synchronous (pure DB rows via the production deploy models, no backend
+	//     RPC / no k8s namespace). A seed failure is a hard error for the same
+	//     reason as with_resources: a partial account makes the journey flaky.
+	var failedDeployID string
+	if req.WithFailedDeploy {
+		appID, derr := e2eSeedFailedDeploy(h, ctx, team.ID, tier, env)
+		if derr != nil {
+			metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpCreate, e2eResultError).Inc()
+			slog.Error("internal.e2e.create.failed_deploy_seed_failed", "error", derr, "team_id", team.ID.String())
+			return respondError(c, fiber.StatusServiceUnavailable, "seed_failed", "failed to seed failed deployment")
+		}
+		failedDeployID = appID
+	}
+
 	// 4. Mint the session JWT with the SAME signer + claim shape the customer
 	//    auth path uses, so it authenticates through ordinary RequireAuth.
 	expiresAt := time.Now().UTC().Add(e2eSessionTTL)
@@ -338,15 +412,20 @@ func (h *E2EAccountHandler) CreateAccount(c *fiber.Ctx) error {
 	if seededTokens == nil {
 		seededTokens = []string{}
 	}
+	// failed_deploy_id is the app_id of the seeded failed deployment when
+	// with_failed_deploy was set, "" otherwise. The caller navigates to
+	// /app/deployments/<failed_deploy_id> (or GETs /api/v1/deployments/<id>)
+	// to drive the FailureAutopsyPanel / events surface against a real backend.
 	return c.JSON(fiber.Map{
-		"team_id":       team.ID.String(),
-		"user_id":       user.ID.String(),
-		"email":         email,
-		"tier":          tier,
-		"session_jwt":   sessionJWT,
-		"expires_at":    expiresAt.Format(time.RFC3339),
-		"seeded_tokens": seededTokens,
-		"seeded_count":  len(seededTokens),
+		"team_id":          team.ID.String(),
+		"user_id":          user.ID.String(),
+		"email":            email,
+		"tier":             tier,
+		"session_jwt":      sessionJWT,
+		"expires_at":       expiresAt.Format(time.RFC3339),
+		"seeded_tokens":    seededTokens,
+		"seeded_count":     len(seededTokens),
+		"failed_deploy_id": failedDeployID,
 	})
 }
 
@@ -388,6 +467,59 @@ func (h *E2EAccountHandler) seedFastResources(ctx context.Context, teamID uuid.U
 	return tokens, nil
 }
 
+// e2eSeedFailedDeploy pre-seeds ONE failed deployment + its failure_autopsy
+// event on teamID, returning the deployment's app_id. It uses the SAME
+// production models the real deploy path uses — CreateDeployment (status
+// 'building') → UpdateDeploymentStatus(...,"failed", error_message) →
+// UpsertDeploymentAutopsy — so the seeded rows are indistinguishable from a
+// genuine OOMKilled build failure for every read path (GET /deployments/:id
+// item.error, GET /deployments/:id/events autopsy row, the web
+// FailureAutopsyPanel). No backend RPC, no k8s namespace: pure DB rows, so the
+// seed is synchronous + sub-millisecond. Any error aborts (returns it) — the
+// caller turns it into a 503 so CI never receives a half-seeded account.
+//
+// The deployment is owned by the cohort team and carries the team's tier
+// snapshot (TTLPolicy=permanent so the deployment_expirer never sweeps it
+// mid-journey); DeleteTeamHard cascades both the deployment and its
+// deployment_events on reap.
+//
+// A package-var seam (not a direct method call) so a test can force the
+// caller's seed_failed (503) arm deterministically.
+var e2eSeedFailedDeploy = (*E2EAccountHandler).seedFailedDeploy
+
+func (h *E2EAccountHandler) seedFailedDeploy(ctx context.Context, teamID uuid.UUID, tier, env string) (string, error) {
+	appID := "e2e-fail-" + uuid.NewString()[:10]
+
+	d, err := models.CreateDeployment(ctx, h.db, models.CreateDeploymentParams{
+		TeamID:    teamID,
+		AppID:     appID,
+		Port:      8080,
+		Tier:      tier,
+		Env:       env,
+		TTLPolicy: models.DeployTTLPolicyPermanent,
+	})
+	if err != nil {
+		return "", fmt.Errorf("seed failed deploy: create: %w", err)
+	}
+
+	if err := models.UpdateDeploymentStatus(ctx, h.db, d.ID, "failed", e2eFailedDeployErrorMessage); err != nil {
+		return "", fmt.Errorf("seed failed deploy: set failed: %w", err)
+	}
+
+	if err := models.UpsertDeploymentAutopsy(ctx, h.db, models.UpsertAutopsyParams{
+		DeploymentID: d.ID,
+		Reason:       e2eFailedDeployReason,
+		ExitCode:     sql.NullInt32{Int32: e2eFailedDeployExitCode, Valid: true},
+		Event:        e2eFailedDeployEvent,
+		LastLines:    e2eFailedDeployLastLines,
+		Hint:         e2eFailedDeployHint,
+	}); err != nil {
+		return "", fmt.Errorf("seed failed deploy: autopsy: %w", err)
+	}
+
+	return appID, nil
+}
+
 // ReapAccount handles DELETE /internal/e2e/account/:team_id.
 func (h *E2EAccountHandler) ReapAccount(c *fiber.Ctx) error {
 	if !h.authorize(c, e2eMetricOpReap) {
diff --git a/internal/handlers/internal_e2e_account_export_test.go b/internal/handlers/internal_e2e_account_export_test.go
index 1869005..020a3e4 100644
--- a/internal/handlers/internal_e2e_account_export_test.go
+++ b/internal/handlers/internal_e2e_account_export_test.go
@@ -62,3 +62,26 @@ func SetE2ESignSessionJWTForTest(fn func(jwtSecret string, userID, teamID uuid.U
 	e2eSignSessionJWT = fn
 	return func() { e2eSignSessionJWT = prev }
 }
+
+// SetE2ESeedFailedDeployForTest overrides the e2eSeedFailedDeploy seam so a
+// test can force CreateAccount's with_failed_deploy seed_failed (503) arm
+// deterministically, without making the real deployments table reject an
+// insert mid-request. Returns a restore func.
+func SetE2ESeedFailedDeployForTest(err error) (restore func()) {
+	prev := e2eSeedFailedDeploy
+	e2eSeedFailedDeploy = func(_ *E2EAccountHandler, _ context.Context, _ uuid.UUID, _, _ string) (string, error) {
+		return "", err
+	}
+	return func() { e2eSeedFailedDeploy = prev }
+}
+
+// E2EFailedDeploySeedForTest exposes the seeded failed-deploy autopsy payload
+// so the seed test asserts the API serves exactly what the factory wrote
+// (reason/exit_code/last_lines/hint) — single source of truth, not a re-typed
+// expectation. Returns copies so a test cannot mutate the handler's constants.
+func E2EFailedDeploySeedForTest() (errorMessage, reason, event, hint string, exitCode int, lastLines []string) {
+	ll := make([]string, len(e2eFailedDeployLastLines))
+	copy(ll, e2eFailedDeployLastLines)
+	return e2eFailedDeployErrorMessage, e2eFailedDeployReason, e2eFailedDeployEvent,
+		e2eFailedDeployHint, e2eFailedDeployExitCode, ll
+}
diff --git a/internal/handlers/internal_e2e_account_failed_deploy_test.go b/internal/handlers/internal_e2e_account_failed_deploy_test.go
new file mode 100644
index 0000000..9b7083a
--- /dev/null
+++ b/internal/handlers/internal_e2e_account_failed_deploy_test.go
@@ -0,0 +1,155 @@
+package handlers_test
+
+// internal_e2e_account_failed_deploy_test.go — coverage for the
+// with_failed_deploy factory pre-seed (task #70,
+// docs/ci/02-FAILURE-DIAGNOSIS-AND-AUTODEBUG.md §5.4 enabler).
+//
+// The with_failed_deploy flag lets the web wave load /app/deployments/:id and
+// render the FailureAutopsyPanel against a REAL backend. Contract pinned here:
+//
+//   - with_failed_deploy=true  → exactly ONE failed deployment + ONE
+//     failure_autopsy deployment_events row, owned by the minted cohort team,
+//     carrying the factory's reason/exit_code/last_lines/hint payload; the
+//     response surfaces the deployment's app_id as failed_deploy_id.
+//   - with_failed_deploy omitted → ZERO deployments seeded (inert by default).
+//   - non-cohort / token-unset paths unaffected (the seed runs only on a
+//     successful authorized mint, which is already cohort-scoped).
+//   - a seed FAILURE surfaces as a 503 seed_failed (never a half-seeded 200).
+//
+// Seeds are asserted from the DB directly (mirrors the with_resources seed
+// test) AND the autopsy payload is compared against the handler's exported
+// single-source-of-truth constants (E2EFailedDeploySeedForTest) so a future
+// payload edit auto-updates the assertion rather than drifting.
+
+import (
+	"context"
+	"errors"
+	"net/http"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"instant.dev/internal/handlers"
+	"instant.dev/internal/testhelpers"
+)
+
+// TestE2EAccount_Create_WithFailedDeploy_SeedsOneFailedDeployAndAutopsy asserts
+// the seed writes exactly one failed deployment + one autopsy event with the
+// factory's payload, owned by the minted team, and surfaces failed_deploy_id.
+func TestE2EAccount_Create_WithFailedDeploy_SeedsOneFailedDeployAndAutopsy(t *testing.T) {
+	skipUnlessE2EDB(t)
+	db, cleanup := testhelpers.SetupTestDB(t)
+	defer cleanup()
+	app := newE2ETestApp(t, db, nil, testE2EToken)
+
+	resp := postE2ECreate(t, app, testE2EToken, `{"tier":"pro","with_failed_deploy":true}`)
+	require.Equal(t, http.StatusOK, resp.StatusCode)
+	out := decodeE2ECreate(t, resp)
+
+	require.NotEmpty(t, out.FailedDeployID,
+		"failed_deploy_id must be surfaced so the web wave can navigate to it")
+
+	ctx := context.Background()
+
+	// Exactly ONE deployment, in status=failed, owned by the minted team, with
+	// the factory's one-line error_message.
+	wantErrMsg, wantReason, wantEvent, wantHint, wantExit, wantLines :=
+		handlers.E2EFailedDeploySeedForTest()
+
+	var (
+		depCount     int
+		status       string
+		appID        string
+		teamID       string
+		errorMessage string
+	)
+	require.NoError(t, db.QueryRowContext(ctx,
+		`SELECT count(*) FROM deployments WHERE team_id = $1`, out.TeamID).Scan(&depCount))
+	require.Equal(t, 1, depCount, "exactly one deployment must be seeded")
+
+	require.NoError(t, db.QueryRowContext(ctx, `
+		SELECT app_id, status, team_id::text, error_message
+		FROM deployments WHERE team_id = $1
+	`, out.TeamID).Scan(&appID, &status, &teamID, &errorMessage))
+	require.Equal(t, out.FailedDeployID, appID, "failed_deploy_id must echo the seeded deployment's app_id")
+	require.Equal(t, "failed", status)
+	require.Equal(t, out.TeamID, teamID, "seeded deployment must be owned by the minted team")
+	require.Equal(t, wantErrMsg, errorMessage, "error_message must be the factory's one-liner")
+
+	// Exactly ONE failure_autopsy deployment_events row with the factory payload.
+	var (
+		depID        string
+		eventCount   int
+		autReason    string
+		autExitCode  int
+		autEvent     string
+		autHint      string
+		autLastLines []byte
+	)
+	require.NoError(t, db.QueryRowContext(ctx,
+		`SELECT id::text FROM deployments WHERE team_id = $1`, out.TeamID).Scan(&depID))
+
+	require.NoError(t, db.QueryRowContext(ctx, `
+		SELECT count(*) FROM deployment_events
+		WHERE deployment_id = $1 AND kind = 'failure_autopsy'
+	`, depID).Scan(&eventCount))
+	require.Equal(t, 1, eventCount, "exactly one failure_autopsy event must be seeded")
+
+	require.NoError(t, db.QueryRowContext(ctx, `
+		SELECT reason, exit_code, event, hint, last_lines
+		FROM deployment_events
+		WHERE deployment_id = $1 AND kind = 'failure_autopsy'
+	`, depID).Scan(&autReason, &autExitCode, &autEvent, &autHint, &autLastLines))
+
+	require.Equal(t, wantReason, autReason)
+	require.Equal(t, wantExit, autExitCode)
+	require.Equal(t, wantEvent, autEvent)
+	require.Equal(t, wantHint, autHint)
+	// last_lines is JSONB — assert it carries the factory's (non-empty) tail.
+	require.NotEmpty(t, wantLines, "factory last_lines must be non-empty by design")
+	for _, line := range wantLines {
+		require.Contains(t, string(autLastLines), line,
+			"seeded last_lines must carry the factory's build-error tail")
+	}
+}
+
+// TestE2EAccount_Create_WithoutFailedDeploy_SeedsNothing pins inert-by-default:
+// omitting with_failed_deploy seeds ZERO deployments and surfaces an empty
+// failed_deploy_id.
+func TestE2EAccount_Create_WithoutFailedDeploy_SeedsNothing(t *testing.T) {
+	skipUnlessE2EDB(t)
+	db, cleanup := testhelpers.SetupTestDB(t)
+	defer cleanup()
+	app := newE2ETestApp(t, db, nil, testE2EToken)
+
+	resp := postE2ECreate(t, app, testE2EToken, `{"tier":"free"}`)
+	require.Equal(t, http.StatusOK, resp.StatusCode)
+	out := decodeE2ECreate(t, resp)
+	require.Empty(t, out.FailedDeployID,
+		"failed_deploy_id must be empty when with_failed_deploy is omitted")
+
+	var n int
+	require.NoError(t, db.QueryRowContext(context.Background(),
+		`SELECT count(*) FROM deployments WHERE team_id = $1`, out.TeamID).Scan(&n))
+	require.Equal(t, 0, n, "no deployment must be seeded when with_failed_deploy is omitted")
+}
+
+// TestE2EAccount_Create_WithFailedDeploy_SeedFailure_Returns503 forces the
+// failed-deploy seed to fail (via the e2eSeedFailedDeploy seam) and asserts
+// CreateAccount surfaces a 503 seed_failed — CI must never receive a
+// half-seeded account.
+func TestE2EAccount_Create_WithFailedDeploy_SeedFailure_Returns503(t *testing.T) {
+	skipUnlessE2EDB(t)
+	db, cleanup := testhelpers.SetupTestDB(t)
+	defer cleanup()
+	app := newE2ETestApp(t, db, nil, testE2EToken)
+
+	restore := handlers.SetE2ESeedFailedDeployForTest(errors.New("deploy seed exploded"))
+	defer restore()
+
+	resp := postE2ECreate(t, app, testE2EToken, `{"tier":"pro","with_failed_deploy":true}`)
+	require.Equal(t, http.StatusServiceUnavailable, resp.StatusCode,
+		"a failed-deploy seed failure must surface as 503, never a half-seeded 200")
+	out := decodeE2ECreate(t, resp)
+	require.Equal(t, "seed_failed", out.Error)
+}
diff --git a/internal/handlers/internal_e2e_account_seed_whitebox_test.go b/internal/handlers/internal_e2e_account_seed_whitebox_test.go
index 6223958..2b58f1f 100644
--- a/internal/handlers/internal_e2e_account_seed_whitebox_test.go
+++ b/internal/handlers/internal_e2e_account_seed_whitebox_test.go
@@ -11,6 +11,7 @@ package handlers
 
 import (
 	"context"
+	"database/sql"
 	"errors"
 	"testing"
 	"time"
@@ -75,3 +76,95 @@ func TestSeedFastResources_MarkResourceActiveError(t *testing.T) {
 	require.Nil(t, toks)
 	require.NoError(t, mock.ExpectationsWereMet())
 }
+
+// ── seedFailedDeploy error arms ──────────────────────────────────────────────
+//
+// The with_failed_deploy seed has three error branches; the happy path is
+// covered end-to-end by the external suite against a real test DB. These drive
+// each failure branch deterministically with sqlmock so the 100%-patch gate is
+// satisfied without a flaky "make the real DB fail" dance:
+//
+//   - CreateDeployment error      → "seed failed deploy: create: ..."
+//   - UpdateDeploymentStatus err  → "seed failed deploy: set failed: ..."
+//   - UpsertDeploymentAutopsy err → "seed failed deploy: autopsy: ..."
+
+// failedDeployReturningRow builds a single deployments row in the column order
+// scanDeployment expects, so a mocked CreateDeployment INSERT … RETURNING parses
+// cleanly and the test can advance to the UPDATE / autopsy steps. Mirrors the
+// AddRow shape in deploy_redeploy_inplace_mock_test.go (deploymentColumnsList).
+func failedDeployReturningRow() *sqlmock.Rows {
+	envVarsJSON := []byte("{}")
+	return sqlmock.NewRows(deploymentColumnsList).AddRow(
+		uuid.New(),             // id
+		uuid.New(),             // team_id
+		uuid.NullUUID{},        // resource_id
+		"e2e-fail-x",           // app_id
+		"app-e2e-fail-x",       // provider_id
+		"building",             // status
+		"",                     // app_url
+		envVarsJSON,            // env_vars
+		8080,                   // port
+		"pro",                  // tier
+		"development",          // env
+		false,                  // private
+		"",                     // allowed_ips
+		sql.NullString{},       // error_message
+		time.Now(), time.Now(), // created_at, updated_at
+		sql.NullString{}, sql.NullString{}, "unset", 0, // notify_*
+		sql.NullTime{}, "permanent", 0, sql.NullTime{}, // ttl_*
+		"tarball", "", "", // source, image_ref, registry_creds_enc
+		"", "", "", // git_url, git_ref, git_token_enc
+		sql.NullTime{}, false, false, // last_activity_at, scaled_to_zero, always_on
+	)
+}
+
+func TestSeedFailedDeploy_CreateDeploymentError(t *testing.T) {
+	db, mock, err := sqlmock.New(sqlmock.QueryMatcherOption(sqlmock.QueryMatcherRegexp))
+	require.NoError(t, err)
+	defer db.Close()
+
+	mock.ExpectQuery(`INSERT INTO deployments`).WillReturnError(errors.New("insert boom"))
+
+	h := &E2EAccountHandler{db: db, cfg: &config.Config{}}
+	appID, serr := h.seedFailedDeploy(context.Background(), uuid.New(), "pro", "")
+	require.Error(t, serr)
+	require.Contains(t, serr.Error(), "seed failed deploy: create")
+	require.Contains(t, serr.Error(), "insert boom")
+	require.Empty(t, appID)
+	require.NoError(t, mock.ExpectationsWereMet())
+}
+
+func TestSeedFailedDeploy_UpdateStatusError(t *testing.T) {
+	db, mock, err := sqlmock.New(sqlmock.QueryMatcherOption(sqlmock.QueryMatcherRegexp))
+	require.NoError(t, err)
+	defer db.Close()
+
+	mock.ExpectQuery(`INSERT INTO deployments`).WillReturnRows(failedDeployReturningRow())
+	mock.ExpectExec(`UPDATE deployments`).WillReturnError(errors.New("update boom"))
+
+	h := &E2EAccountHandler{db: db, cfg: &config.Config{}}
+	appID, serr := h.seedFailedDeploy(context.Background(), uuid.New(), "pro", "")
+	require.Error(t, serr)
+	require.Contains(t, serr.Error(), "seed failed deploy: set failed")
+	require.Contains(t, serr.Error(), "update boom")
+	require.Empty(t, appID)
+	require.NoError(t, mock.ExpectationsWereMet())
+}
+
+func TestSeedFailedDeploy_AutopsyError(t *testing.T) {
+	db, mock, err := sqlmock.New(sqlmock.QueryMatcherOption(sqlmock.QueryMatcherRegexp))
+	require.NoError(t, err)
+	defer db.Close()
+
+	mock.ExpectQuery(`INSERT INTO deployments`).WillReturnRows(failedDeployReturningRow())
+	mock.ExpectExec(`UPDATE deployments`).WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectExec(`INSERT INTO deployment_events`).WillReturnError(errors.New("autopsy boom"))
+
+	h := &E2EAccountHandler{db: db, cfg: &config.Config{}}
+	appID, serr := h.seedFailedDeploy(context.Background(), uuid.New(), "pro", "")
+	require.Error(t, serr)
+	require.Contains(t, serr.Error(), "seed failed deploy: autopsy")
+	require.Contains(t, serr.Error(), "autopsy boom")
+	require.Empty(t, appID)
+	require.NoError(t, mock.ExpectationsWereMet())
+}
diff --git a/internal/handlers/internal_e2e_account_test.go b/internal/handlers/internal_e2e_account_test.go
index ccae448..de3cfb7 100644
--- a/internal/handlers/internal_e2e_account_test.go
+++ b/internal/handlers/internal_e2e_account_test.go
@@ -92,15 +92,16 @@ func newE2ETestApp(t *testing.T, db *sql.DB, rdb *redis.Client, token string) *f
 
 // e2eCreateResp is the create-endpoint response shape we assert on.
 type e2eCreateResp struct {
-	TeamID       string   `json:"team_id"`
-	UserID       string   `json:"user_id"`
-	Email        string   `json:"email"`
-	Tier         string   `json:"tier"`
-	SessionJWT   string   `json:"session_jwt"`
-	ExpiresAt    string   `json:"expires_at"`
-	SeededTokens []string `json:"seeded_tokens"`
-	SeededCount  int      `json:"seeded_count"`
-	Error        string   `json:"error"`
+	TeamID         string   `json:"team_id"`
+	UserID         string   `json:"user_id"`
+	Email          string   `json:"email"`
+	Tier           string   `json:"tier"`
+	SessionJWT     string   `json:"session_jwt"`
+	ExpiresAt      string   `json:"expires_at"`
+	SeededTokens   []string `json:"seeded_tokens"`
+	SeededCount    int      `json:"seeded_count"`
+	FailedDeployID string   `json:"failed_deploy_id"`
+	Error          string   `json:"error"`
 }
 
 func postE2ECreate(t *testing.T, app *fiber.App, token, body string) *http.Response {
diff --git a/internal/handlers/stack_anon_failure_diag_test.go b/internal/handlers/stack_anon_failure_diag_test.go
new file mode 100644
index 0000000..c75c56e
--- /dev/null
+++ b/internal/handlers/stack_anon_failure_diag_test.go
@@ -0,0 +1,195 @@
+package handlers_test
+
+// stack_anon_failure_diag_test.go — the ANONYMOUS-stack failure-diagnosis
+// contract test (task #70, docs/ci/02-FAILURE-DIAGNOSIS-AND-AUTODEBUG.md §3 +
+// §5.2).
+//
+// Anonymous users cannot use /deploy/new (RequireAuth; deployments.team_id is
+// NOT NULL — memory project_anonymous_deploy_via_stacks_not_deploy_new). They
+// deploy via POST /stacks/new (OptionalAuth; anon stacks carry NULL team_id).
+//
+// ANON FAILURE-DIAGNOSIS IS STATUS + LOGS ONLY (the documented gap):
+//
+//   - GET /stacks/:slug (slug-bearer, NO auth) returns status="failed" — the
+//     stack-level failure is visible to the anonymous owner.
+//   - the raw err.Error() string the deploy goroutine hit is persisted at the
+//     SERVICE level (stack_services.error_msg via UpdateStackServiceStatus;
+//     UpdateStackStatus's errMsg arg is intentionally NOT persisted — the
+//     stacks table has no error column). So the failure string lives on the
+//     service row, and the per-service build logs are read via
+//     GET /stacks/:slug/logs/:svc.
+//   - there is NO classified autopsy: NO /stacks/:slug/events route, NO
+//     reason/last_lines/hint. That is the diagnosis-quality gap vs the
+//     authenticated /api/v1/deployments/:id/events surface.
+//
+// This test PINS that contract so that:
+//   (a) anon users provably get status=failed (regression guard on the thin
+//       surface they DO have), and
+//   (b) adding a stack-autopsy endpoint later is a DELIBERATE, test-updating
+//       change — the route-absence assertion below REDS the moment someone adds
+//       GET /stacks/:slug/events, forcing them to update this contract.
+//
+// In short: anon failure-diagnosis is status + logs only (gap: no classified
+// autopsy).
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/redis/go-redis/v9"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"instant.dev/internal/config"
+	"instant.dev/internal/email"
+	"instant.dev/internal/plans"
+	"instant.dev/internal/router"
+	"instant.dev/internal/testhelpers"
+)
+
+// anonStackEventsRoutePath is the route that would expose a classified autopsy
+// to anonymous stack owners. It does NOT exist today — the assertion below pins
+// its absence. Named so a future PR adding the route greps to exactly here.
+const anonStackEventsRoutePath = "/stacks/:slug/events"
+
+// TestStackAnonFailureDiag_StatusAndLogsOnly drives an anonymous stack to
+// status=failed and asserts the thin diagnosis surface: GET /stacks/:slug
+// (slug-bearer, no auth) returns status=failed, and the failure string is
+// stored on the service row. Anon failure-diagnosis is status + logs only
+// (gap: no classified autopsy).
+func TestStackAnonFailureDiag_StatusAndLogsOnly(t *testing.T) {
+	requireCoverageDB(t)
+	db, cleanDB := testhelpers.SetupTestDB(t)
+	defer cleanDB()
+	ensureStackTables2(t, db)
+
+	// Anonymous stack: NULL team_id, status driven to 'failed'. seedStack with
+	// teamID=nil mirrors the mig-005 anon-stack shape (NULL team_id).
+	stackID, slug := seedStack(t, db, nil, "failed")
+
+	// The deploy goroutine's raw err.Error() lands on the SERVICE row via
+	// UpdateStackServiceStatus(...,"failed", errMsg). seedStack created a 'web'
+	// service in 'healthy' — flip it to failed with the raw build error string
+	// so this test exercises the real failure-string truth surface.
+	const rawBuildErr = "kaniko build failed: COPY failed: no source files were specified"
+	_, err := db.Exec(`
+		UPDATE stack_services SET status = 'failed', error_msg = $2
+		WHERE stack_id = $1
+	`, stackID, rawBuildErr)
+	require.NoError(t, err)
+
+	app, _ := newCoverageStackApp(t, db)
+
+	// GET /stacks/:slug with NO Authorization header — the anonymous owner
+	// reads their own stack by slug (slug IS the bearer for an anon stack).
+	req := httptest.NewRequest(http.MethodGet, "/stacks/"+slug, nil)
+	resp, err := app.Test(req, 5000)
+	require.NoError(t, err)
+	defer resp.Body.Close()
+	require.Equal(t, http.StatusOK, resp.StatusCode,
+		"anon owner reads their own stack by slug with NO auth")
+
+	var body struct {
+		OK       bool   `json:"ok"`
+		StackID  string `json:"stack_id"`
+		Status   string `json:"status"`
+		Services []struct {
+			Name   string `json:"name"`
+			Status string `json:"status"`
+		} `json:"services"`
+	}
+	require.NoError(t, json.NewDecoder(resp.Body).Decode(&body))
+	assert.True(t, body.OK)
+	assert.Equal(t, slug, body.StackID)
+	assert.Equal(t, "failed", body.Status,
+		"anon stack failure IS visible at the stack level (status=failed)")
+
+	// The failing service is surfaced. (serializeServices intentionally does
+	// NOT echo the raw error_msg string — the per-service failure detail is
+	// read via the logs surface, not the status JSON. The string is persisted
+	// on the row, asserted below from the DB.)
+	require.NotEmpty(t, body.Services)
+	var sawFailedSvc bool
+	for _, s := range body.Services {
+		if s.Status == "failed" {
+			sawFailedSvc = true
+		}
+	}
+	assert.True(t, sawFailedSvc, "the failed service is surfaced in the status JSON")
+
+	// Truth surface: the raw err.Error() string is persisted on the service
+	// row (stack_services.error_msg) — this is what the logs/diagnostics path
+	// reads. The stacks table itself has NO error column (UpdateStackStatus's
+	// errMsg arg is discarded by design), so the failure string lives here.
+	var storedErr string
+	require.NoError(t, db.QueryRow(
+		`SELECT error_msg FROM stack_services WHERE stack_id = $1 AND status = 'failed'`,
+		stackID).Scan(&storedErr))
+	assert.Equal(t, rawBuildErr, storedErr,
+		"the raw build error is persisted on the service row (the anon truth surface)")
+}
+
+// TestStackAnonFailureDiag_NoClassifiedAutopsyEndpoint pins the documented gap:
+// there is NO /stacks/:slug/events route. An anonymous stack owner gets status
+// + logs but NO classified reason/last_lines/hint (unlike the authenticated
+// /api/v1/deployments/:id/events surface).
+//
+// This walks the LIVE production router (router.New + GetRoutes) — the same
+// authoritative route table the done-bar guard uses — so the assertion can't
+// drift from what's actually mounted. The moment someone adds a stack-autopsy
+// route, this test REDS, forcing the §3 contract + the anon-gap docs to be
+// updated deliberately rather than the gap silently closing untested.
+func TestStackAnonFailureDiag_NoClassifiedAutopsyEndpoint(t *testing.T) {
+	cfg := anonStackRouterConfig()
+	rdb := redis.NewClient(&redis.Options{Addr: "127.0.0.1:6379"})
+	defer func() { _ = rdb.Close() }()
+
+	app := router.New(cfg, nil, rdb, nil, email.NewNoop(), plans.Default(), nil, nil)
+
+	// Enumerate the live route table. There must be NO route whose path is
+	// /stacks/:slug/events under ANY method.
+	for _, r := range app.GetRoutes(true) {
+		assert.NotEqual(t, anonStackEventsRoutePath, r.Path,
+			"a %s %s route now EXISTS — anonymous stacks gained a classified-autopsy "+
+				"endpoint. This closes the documented §3 gap; UPDATE this test + "+
+				"docs/ci/02-FAILURE-DIAGNOSIS-AND-AUTODEBUG.md §3 to assert the new "+
+				"reason/last_lines/hint contract instead of the absence.",
+			r.Method, r.Path)
+	}
+
+	// Sanity: the routes anon DOES have (status + per-service logs) ARE present,
+	// so the assertion above is meaningful (not vacuously true because stacks
+	// routes failed to register at all).
+	var sawGet, sawLogs bool
+	for _, r := range app.GetRoutes(true) {
+		if r.Method == http.MethodGet && r.Path == "/stacks/:slug" {
+			sawGet = true
+		}
+		if r.Method == http.MethodGet && r.Path == "/stacks/:slug/logs/:svc" {
+			sawLogs = true
+		}
+	}
+	assert.True(t, sawGet, "GET /stacks/:slug (status surface) must be mounted")
+	assert.True(t, sawLogs, "GET /stacks/:slug/logs/:svc (logs surface) must be mounted")
+}
+
+// anonStackRouterConfig is a minimal config sufficient for router.New to mount
+// the full route table for the route-presence enumeration above. No DB call is
+// made (the test only inspects GetRoutes, never serves a request), so the nil
+// db passed to router.New is safe here.
+func anonStackRouterConfig() *config.Config {
+	return &config.Config{
+		Port:                     "8080",
+		JWTSecret:                testhelpers.TestJWTSecret,
+		AESKey:                   testhelpers.TestAESKeyHex,
+		EnabledServices:          "postgres,redis,mongodb,queue,webhook,storage,deploy",
+		Environment:              "development",
+		PostgresProvisionBackend: "local",
+		ComputeProvider:          "noop",
+		QueueBackend:             "legacy_open",
+		ObjectStoreBucket:        "instant-shared",
+		// AdminPathPrefix empty → admin subtree skipped; irrelevant to stacks.
+	}
+}