diff --git a/.gitignore b/.gitignore index aa25ebfb..ed5a81bc 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,6 @@ node_modules # Internal Claude Code skills (per-repo) .claude/ + +# Local Redis dump artifact — never commit +dump.rdb diff --git a/e2e/reliability_contract_test.go b/e2e/reliability_contract_test.go index f87e344f..7dd808e2 100644 --- a/e2e/reliability_contract_test.go +++ b/e2e/reliability_contract_test.go @@ -214,6 +214,16 @@ var auditConsumerSpec = map[string]auditConsumerExpectation{ // signature-passed-but-team-unknown signal. No customer email: the // affected "customer" either does not exist or was deleted. "razorpay.webhook.team_not_found": {IntentionallyNoConsumer: true}, + + // CI-only ephemeral-test-account surface (guarded; inert by default). + // Both fire from the internal POST/DELETE /internal/e2e/account routes + // and are operator-internal observability signals — a spike in created + // (vs reaped) means CI is leaking test accounts. NEVER customer-facing: + // the team is always is_test_cohort and the synthetic email is not PII. + // Counterparts to the other operator-only kinds above — audit rows are + // dashboard signals, not customer notifications. + "e2e.account.created": {IntentionallyNoConsumer: true}, + "e2e.account.reaped": {IntentionallyNoConsumer: true}, } // ─── Test 1: every constant has a spec entry ────────────────────────────────── diff --git a/internal/config/config.go b/internal/config/config.go index e20e8fef..901f6b0f 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -263,6 +263,29 @@ type Config struct { // WORKER_INTERNAL_JWT_SECRET in BOTH the api and the worker // (same value, generated via `openssl rand -hex 32`). WorkerInternalJWTSecret string + + // E2EAccountToken is the shared secret that guards the CI-only + // ephemeral-test-account surface (POST/DELETE /internal/e2e/account). + // CI mints real test-cohort accounts against PRODUCTION to run + // integration tests, then reaps them — that is the only thing this + // token authorizes. + // + // INERT BY DEFAULT (flag-protection): when this is empty, BOTH e2e + // routes return 404 for every request, hiding the endpoint's + // existence entirely. The endpoint cannot mint or reap a single + // account until an operator sets E2E_ACCOUNT_TOKEN — so the surface + // ships safe-by-default and is only "armed" in the environments + // (CI/prod) where the secret is wired. The caller authenticates by + // sending the exact value in the X-E2E-Token request header; the + // handler does a crypto/subtle constant-time compare and 404s on any + // mismatch (never 401/403 — a distinguishable status would leak that + // the route exists). + // + // Distinct secret from JWTSecret and WorkerInternalJWTSecret: this + // one authorizes account *creation/destruction*, a strictly more + // dangerous capability than session-signing, so it gets its own key + // and its own k8s Secret entry (generate via `openssl rand -hex 32`). + E2EAccountToken string } // ErrMissingConfig is returned when a required env var is absent. @@ -427,6 +450,9 @@ func Load() *Config { cfg.SendGridWebhookKey = os.Getenv("SENDGRID_WEBHOOK_PUBLIC_KEY") cfg.WorkerInternalJWTSecret = strings.TrimSpace(os.Getenv("WORKER_INTERNAL_JWT_SECRET")) + // E2E_ACCOUNT_TOKEN: empty = the /internal/e2e/* surface is inert + // (every call 404s). See Config.E2EAccountToken for the full posture. + cfg.E2EAccountToken = strings.TrimSpace(os.Getenv("E2E_ACCOUNT_TOKEN")) cfg.DeployDomain = getenv("DEPLOY_DOMAIN", "instant.dev") cfg.ComputeProvider = getenv("COMPUTE_PROVIDER", "noop") cfg.KubeNamespaceApps = getenv("KUBE_NAMESPACE_APPS", "instant-apps") diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 197387ef..fe4483e8 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -68,6 +68,7 @@ func allKeys() []string { "BREVO_WEBHOOK_SECRET", "SES_SNS_SUBSCRIPTION_ARN", "SENDGRID_WEBHOOK_PUBLIC_KEY", "WORKER_INTERNAL_JWT_SECRET", "ADMIN_PATH_PREFIX", + "E2E_ACCOUNT_TOKEN", } } @@ -196,6 +197,19 @@ func TestConfig_IsServiceEnabled(t *testing.T) { } } +func TestLoad_E2EAccountToken(t *testing.T) { + // Unset → empty (inert-by-default: the /internal/e2e/* surface 404s). + applyBaselineEnv(t, nil) + if got := Load().E2EAccountToken; got != "" { + t.Errorf("E2EAccountToken default: want empty (inert), got %q", got) + } + // Set (with surrounding whitespace) → trimmed value. + applyBaselineEnv(t, map[string]string{"E2E_ACCOUNT_TOKEN": " secret-token "}) + if got := Load().E2EAccountToken; got != "secret-token" { + t.Errorf("E2EAccountToken: want trimmed 'secret-token', got %q", got) + } +} + func TestLoad_HappyPath_AppliesDefaults(t *testing.T) { applyBaselineEnv(t, nil) cfg := Load() diff --git a/internal/handlers/error_envelope_coverage_test.go b/internal/handlers/error_envelope_coverage_test.go index 5d894ada..1a16d33f 100644 --- a/internal/handlers/error_envelope_coverage_test.go +++ b/internal/handlers/error_envelope_coverage_test.go @@ -45,6 +45,21 @@ var coverageAllowlist = map[string]string{ // real handler call sites. Filtered by the test (see emitCode). "code": "regex artefact — not a real emit", "x": "regex artefact — not a real emit", + + // CI-only ephemeral-test-account surface (POST/DELETE /internal/e2e/account). + // These codes are emitted only on the operator/CI-guarded endpoint, which is + // inert by default (404 unless E2E_ACCOUNT_TOKEN is set) and driven by the + // machine-to-machine E2E harness — never a customer agent. A customer-style + // "Tell the user … https://instanode.dev/…" agent_action would be wrong for + // a CI caller, so they intentionally carry no codeToAgentAction entry: the + // 503 arms fall back to AgentActionContactSupport, the 4xx arms to an empty + // agent_action with a self-explanatory message. + "not_test_cohort": "CI-only /internal/e2e/account reap-safety 403 (machine-to-machine; not customer-facing)", + "team_create_failed": "CI-only /internal/e2e/account mint 503 (machine-to-machine; not customer-facing)", + "user_create_failed": "CI-only /internal/e2e/account mint 503 (machine-to-machine; not customer-facing)", + "tier_not_allowed": "CI-only /internal/e2e/account gated-tier 400 (machine-to-machine; not customer-facing)", + "tier_set_failed": "CI-only /internal/e2e/account mint 503 (machine-to-machine; not customer-facing)", + "rand_failed": "CI-only /internal/e2e/account mint 503 (machine-to-machine; not customer-facing)", } // TestErrorCode_HasAgentAction is the registry-iterating coverage gate. diff --git a/internal/handlers/helpers.go b/internal/handlers/helpers.go index 00757c4b..7abea9b9 100644 --- a/internal/handlers/helpers.go +++ b/internal/handlers/helpers.go @@ -1198,6 +1198,18 @@ var codeToAgentAction = map[string]errorCodeMeta{ "reauth_required": { AgentAction: "Tell the user this action requires a fresh session (admin-scope PAT mints need re-auth). Sign in again at https://instanode.dev/login — see https://instanode.dev/docs/auth.", }, + + // NOTE: the CI-only ephemeral-test-account error codes (not_test_cohort, + // team_create_failed, user_create_failed, tier_not_allowed, tier_set_failed, + // rand_failed) are deliberately NOT registered here. codeToAgentAction holds + // CUSTOMER-facing agent guidance — the contract test (TestAgentActionContract) + // requires every entry to start "Tell the user …" and carry a customer + // recovery URL. The /internal/e2e/account surface is operator/CI-only and + // inert by default (404 unless E2E_ACCOUNT_TOKEN is set), so its codes never + // reach a customer agent: the 503 arms already get the generic + // AgentActionContactSupport via respondError's status>=500 fallback, and the + // 4xx arms (400/403/429) carry a self-explanatory message with no + // agent_action — correct for a machine-to-machine CI caller. } // ErrorResponse is the canonical JSON shape for every 4xx/5xx response. diff --git a/internal/handlers/internal_e2e_account.go b/internal/handlers/internal_e2e_account.go new file mode 100644 index 00000000..e5cbd34f --- /dev/null +++ b/internal/handlers/internal_e2e_account.go @@ -0,0 +1,470 @@ +package handlers + +// internal_e2e_account.go — the CI-only guarded ephemeral-test-account surface. +// +// POST /internal/e2e/account → mint a real test-cohort account +// DELETE /internal/e2e/account/:team_id → reap a test-cohort account +// +// WHY THIS EXISTS +// +// CI runs integration tests against PRODUCTION. To do that without polluting +// the real funnel / billing / email surfaces, it mints a *real* account whose +// owning team carries is_test_cohort=true (migration 067) — the single flag +// every background job + funnel/billing path keys off to no-op for synthetic +// traffic — runs the tests, then reaps the account. These two endpoints are +// that mint/reap lifecycle. +// +// SECURITY POSTURE (get the guard exactly right — this is security-sensitive) +// +// 1. Both routes are guarded by the X-E2E-Token header, constant-time-compared +// (crypto/subtle) against cfg.E2EAccountToken. +// 2. INERT BY DEFAULT: when cfg.E2EAccountToken is empty, OR the header does +// not match, BOTH routes return 404 — NOT 401/403. A 404 hides the +// endpoint's existence; a 401/403 would confirm "there is a guarded route +// here, keep guessing the token". The endpoint cannot mint or reap a +// single account until an operator wires E2E_ACCOUNT_TOKEN, so the surface +// ships safe-by-default and is only armed in CI/prod where the secret is set. +// 3. NEVER mint a team-tier (or growth) account — Team is gated until fully +// built (project_team_plan_not_rolled_out). tier="team"/"growth" → 400. +// 4. Reap can NEVER delete a real team: the handler looks up is_test_cohort +// and 403s (`not_test_cohort`) on any non-cohort team. This is the critical +// safety invariant — a CI bug that passes a real team's id must bounce off +// a 403, never destroy customer data. +// 5. Per-token rate limit (fail-open per CLAUDE.md rule 1): a leaked/abused +// token can't be used to mint accounts without bound. +// +// The session JWT is minted with the SAME signer the customer auth path uses +// (cfg.JWTSecret + the sessionClaims shape), so the returned token authenticates +// through the ordinary RequireAuth middleware — that is the whole point: CI uses +// it as a normal Bearer. TTL is short (1h) so a captured token expires quickly. + +import ( + "context" + "crypto/sha256" + "crypto/subtle" + "database/sql" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "log/slog" + "strings" + "time" + + "github.com/gofiber/fiber/v2" + "github.com/golang-jwt/jwt/v4" + "github.com/google/uuid" + "github.com/redis/go-redis/v9" + + "instant.dev/internal/config" + "instant.dev/internal/metrics" + "instant.dev/internal/models" +) + +const ( + // e2eAccountTokenHeader is the request header carrying the guard secret. + e2eAccountTokenHeader = "X-E2E-Token" + + // e2eSessionTTL is how long the minted session JWT is valid. Short on + // purpose — CI runs a test suite in minutes; a captured token must not + // outlive the run by long. Independent of the 24h customer-session TTL. + e2eSessionTTL = 1 * time.Hour + + // e2eMetricOpCreate / e2eMetricOpReap label the instant_e2e_account_total + // metric's `op` dimension. + e2eMetricOpCreate = "create" + e2eMetricOpReap = "reap" + + // instant_e2e_account_total `result` label values. + e2eResultOK = "ok" + e2eResultUnauthorized = "unauthorized" + e2eResultBadRequest = "bad_request" + e2eResultNotTestCohort = "not_test_cohort" + e2eResultRateLimited = "rate_limited" + e2eResultError = "error" + + // e2eAccountEmailDomain is the domain of the synthetic primary-user email. + // Always @instanode.dev so the address is in-domain (matches the + // synthetic-cohort convention) but the e2e-cohort+ local part marks it as + // machine-minted. The + suffix keeps each mint unique under the + // users unique-email constraint. + e2eAccountEmailDomain = "instanode.dev" + + // e2eRateLimitMax / e2eRateLimitWindow bound how many accounts a single + // token may mint+reap per window. Generous enough for a parallel CI matrix, + // tight enough that a leaked token can't be used to mint unbounded accounts. + e2eRateLimitMax = 120 + e2eRateLimitWindow = 1 * time.Hour +) + +// e2eDefaultTier is the tier a request lands on when it omits `tier`. +const e2eDefaultTier = "free" + +// e2eAllowedTiers is the closed set of tiers the e2e mint will accept. +// team + growth are deliberately ABSENT — Team is gated (must not be +// minted/charged until fully built) and growth shares that "don't mint a +// high/unlimited tier from CI" caution. anonymous is allowed for completeness +// (CI may want to exercise the anon path) but still gets a real team row + +// is_test_cohort so the reap path is uniform. +var e2eAllowedTiers = map[string]bool{ + "anonymous": true, + "free": true, + "hobby": true, + "hobby_plus": true, + "pro": true, +} + +// e2eBlockedTiers names the tiers we explicitly reject with a tailored message +// (vs an unknown-tier reject) so CI gets an actionable 400. +var e2eBlockedTiers = map[string]bool{ + "team": true, + "growth": true, +} + +// E2EAccountHandler wires the dependencies the mint/reap endpoints need. +type E2EAccountHandler struct { + db *sql.DB + rdb *redis.Client + cfg *config.Config +} + +// NewE2EAccountHandler constructs the handler. rdb may be nil — the per-token +// rate limit then fails open (no limiting) per CLAUDE.md rule 1, exactly like +// every other Redis-gated check in this codebase. +func NewE2EAccountHandler(db *sql.DB, rdb *redis.Client, cfg *config.Config) *E2EAccountHandler { + return &E2EAccountHandler{db: db, rdb: rdb, cfg: cfg} +} + +// e2eCreateRequest is the POST body. Both fields optional. +type e2eCreateRequest struct { + Tier string `json:"tier"` + Env string `json:"env"` +} + +// authorize runs the X-E2E-Token guard. It returns true iff the token is +// configured AND the header matches in constant time. On any failure it has +// ALREADY written the 404 response and bumped the unauthorized metric — the +// caller just returns the error. The 404 (not 401/403) is the existence-hiding +// posture; see the file header. +func (h *E2EAccountHandler) authorize(c *fiber.Ctx, op string) bool { + want := "" + if h.cfg != nil { + want = strings.TrimSpace(h.cfg.E2EAccountToken) + } + got := strings.TrimSpace(c.Get(e2eAccountTokenHeader)) + + // Inert-by-default: empty configured token → the surface does not exist. + // We still run the constant-time compare against a non-empty `got` to keep + // the timing identical to the "configured but wrong" case, but a missing + // configured secret can never authorize. + authorized := want != "" && + subtle.ConstantTimeCompare([]byte(got), []byte(want)) == 1 + + if !authorized { + metrics.E2EAccountTotal.WithLabelValues(op, e2eResultUnauthorized).Inc() + // 404, not 401/403 — hide the route. No error body detail that could + // confirm the route's shape. + slog.Debug("internal.e2e.unauthorized", "op", op, "token_configured", want != "") + _ = c.SendStatus(fiber.StatusNotFound) + return false + } + return true +} + +// CreateAccount handles POST /internal/e2e/account. +func (h *E2EAccountHandler) CreateAccount(c *fiber.Ctx) error { + if !h.authorize(c, e2eMetricOpCreate) { + return nil + } + + // Per-token rate limit (fail-open). Keyed on the token hash so the Redis + // key never contains the secret in plaintext. + if limited := h.rateLimited(c.Context(), c.Get(e2eAccountTokenHeader)); limited { + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpCreate, e2eResultRateLimited).Inc() + return respondError(c, fiber.StatusTooManyRequests, "rate_limited", + "e2e account mint rate limit exceeded for this token") + } + + var req e2eCreateRequest + // A missing/empty body is fine — both fields default. Only a malformed + // (non-JSON) body is a 400. Fiber's BodyParser tolerates an empty body. + if len(c.Body()) > 0 { + if err := c.BodyParser(&req); err != nil { + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpCreate, e2eResultBadRequest).Inc() + return respondError(c, fiber.StatusBadRequest, "invalid_body", "JSON body required") + } + } + + tier := strings.TrimSpace(strings.ToLower(req.Tier)) + if tier == "" { + tier = e2eDefaultTier + } + if e2eBlockedTiers[tier] { + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpCreate, e2eResultBadRequest).Inc() + return respondError(c, fiber.StatusBadRequest, "tier_not_allowed", + fmt.Sprintf("tier %q cannot be minted via the e2e surface (Team/Growth are gated)", tier)) + } + if !e2eAllowedTiers[tier] { + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpCreate, e2eResultBadRequest).Inc() + return respondError(c, fiber.StatusBadRequest, "invalid_tier", + fmt.Sprintf("tier must be one of anonymous|free|hobby|hobby_plus|pro (got %q)", tier)) + } + + env := strings.TrimSpace(req.Env) + + ctx := c.Context() + + // 1. Create the team as is_test_cohort=true in a single INSERT — there is + // never a window where this looks like a real chargeable team. + teamName := "e2e-cohort-" + time.Now().UTC().Format("20060102T150405") + team, err := models.CreateTestCohortTeam(ctx, h.db, teamName) + if err != nil { + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpCreate, e2eResultError).Inc() + slog.Error("internal.e2e.create.team_failed", "error", err) + return respondError(c, fiber.StatusServiceUnavailable, "team_create_failed", "failed to create test team") + } + + // 2. Create the primary user with a unique synthetic email, then mark it + // verified (CI needs a verified primary so the account behaves like a + // real logged-in user). + rnd, err := e2eRandomSuffix() + if err != nil { + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpCreate, e2eResultError).Inc() + slog.Error("internal.e2e.create.rand_failed", "error", err) + return respondError(c, fiber.StatusServiceUnavailable, "rand_failed", "failed to generate account id") + } + email := fmt.Sprintf("e2e-cohort+%s@%s", rnd, e2eAccountEmailDomain) + user, err := models.CreateUser(ctx, h.db, team.ID, email, "", "", "owner") + if err != nil { + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpCreate, e2eResultError).Inc() + slog.Error("internal.e2e.create.user_failed", "error", err, "team_id", team.ID.String()) + return respondError(c, fiber.StatusServiceUnavailable, "user_create_failed", "failed to create test user") + } + if verr := models.SetEmailVerified(ctx, h.db, user.ID); verr != nil { + // Best-effort: a verify-flip failure must not abort the mint — but log it. + slog.Warn("internal.e2e.create.verify_failed", "error", verr, "user_id", user.ID.String()) + } else { + user.EmailVerified = true + } + + // 3. Set the requested tier via the authoritative upgrade path (the same + // one the Razorpay webhook + /internal/set-tier use). anonymous/free + // are already the team's tier (CreateTestCohortTeam starts at 'free'), + // so only escalate for paid tiers. We never call this for team/growth — + // those are rejected above. + if tier != "free" && tier != "anonymous" { + if uerr := models.UpgradeTeamAllTiers(ctx, h.db, team.ID, tier); uerr != nil { + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpCreate, e2eResultError).Inc() + slog.Error("internal.e2e.create.tier_failed", "error", uerr, "team_id", team.ID.String(), "tier", tier) + return respondError(c, fiber.StatusServiceUnavailable, "tier_set_failed", "failed to set tier") + } + team.PlanTier = tier + } else { + team.PlanTier = tier + } + + // 4. Mint the session JWT with the SAME signer + claim shape the customer + // auth path uses, so it authenticates through ordinary RequireAuth. + expiresAt := time.Now().UTC().Add(e2eSessionTTL) + sessionJWT, err := e2eSignSessionJWT(h.cfg.JWTSecret, user.ID, team.ID, email, expiresAt) + if err != nil { + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpCreate, e2eResultError).Inc() + slog.Error("internal.e2e.create.jwt_failed", "error", err, "team_id", team.ID.String()) + return respondError(c, fiber.StatusServiceUnavailable, "token_issue_failed", "failed to issue session token") + } + + // 5. Audit it. Best-effort, in-request so CI can read it back if needed. + meta, _ := json.Marshal(map[string]any{ + "tier": tier, + "env": env, + "user_id": user.ID.String(), + "email": email, + }) + if aerr := models.InsertAuditEvent(ctx, h.db, models.AuditEvent{ + TeamID: team.ID, + UserID: uuid.NullUUID{UUID: user.ID, Valid: true}, + Actor: "system", + Kind: models.AuditKindE2EAccountCreated, + Summary: fmt.Sprintf("minted e2e test-cohort account (tier=%s)", tier), + Metadata: meta, + }); aerr != nil { + slog.Warn("internal.e2e.create.audit_failed", "error", aerr, "team_id", team.ID.String()) + } + + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpCreate, e2eResultOK).Inc() + slog.Info("internal.e2e.create.done", "team_id", team.ID.String(), "tier", tier, "env", env) + + return c.JSON(fiber.Map{ + "team_id": team.ID.String(), + "user_id": user.ID.String(), + "email": email, + "tier": tier, + "session_jwt": sessionJWT, + "expires_at": expiresAt.Format(time.RFC3339), + }) +} + +// ReapAccount handles DELETE /internal/e2e/account/:team_id. +func (h *E2EAccountHandler) ReapAccount(c *fiber.Ctx) error { + if !h.authorize(c, e2eMetricOpReap) { + return nil + } + + teamID, err := uuid.Parse(strings.TrimSpace(c.Params("team_id"))) + if err != nil { + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpReap, e2eResultBadRequest).Inc() + return respondError(c, fiber.StatusBadRequest, "invalid_team_id", "team_id must be a UUID") + } + + ctx := c.Context() + + // Look up the team. Idempotency: an already-gone team is a clean 200 (the + // reaper / a previous DELETE already removed it). We treat ErrTeamNotFound + // as success rather than 404 so a CI retry never sees a spurious failure. + team, err := models.GetTeamByID(ctx, h.db, teamID) + if err != nil { + var notFound *models.ErrTeamNotFound + if errors.As(err, ¬Found) { + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpReap, e2eResultOK).Inc() + slog.Info("internal.e2e.reap.already_gone", "team_id", teamID.String()) + return c.JSON(fiber.Map{"ok": true, "team_id": teamID.String(), "already_gone": true}) + } + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpReap, e2eResultError).Inc() + slog.Error("internal.e2e.reap.lookup_failed", "error", err, "team_id", teamID.String()) + return respondError(c, fiber.StatusServiceUnavailable, "db_failed", "failed to load team") + } + + // CRITICAL SAFETY GATE: never delete a real team. If the team is NOT in the + // test cohort, refuse with 403 — this is the invariant that makes the whole + // surface safe to expose against production. + isCohort, err := models.IsTestCohort(ctx, h.db, teamID) + if err != nil { + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpReap, e2eResultError).Inc() + slog.Error("internal.e2e.reap.cohort_check_failed", "error", err, "team_id", teamID.String()) + return respondError(c, fiber.StatusServiceUnavailable, "db_failed", "failed to check team cohort") + } + if !isCohort { + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpReap, e2eResultNotTestCohort).Inc() + slog.Warn("internal.e2e.reap.refused_non_cohort", "team_id", teamID.String(), "plan_tier", team.PlanTier) + return respondError(c, fiber.StatusForbidden, "not_test_cohort", + "refusing to delete a non-test-cohort team") + } + + // Mark every resource for the worker's TTL reaper (tier=free + expires_at=now) + // so the real backing infra (customer DB / cache / mongo / etc.) is + // deprovisioned even for paid-tier test accounts. Then hard-delete the team + // (cascades audit_log/deployments/stacks/users; resources go team_id→NULL, + // already marked reapable). + marked, err := models.MarkTeamResourcesForReaper(ctx, h.db, teamID) + if err != nil { + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpReap, e2eResultError).Inc() + slog.Error("internal.e2e.reap.mark_resources_failed", "error", err, "team_id", teamID.String()) + return respondError(c, fiber.StatusServiceUnavailable, "db_failed", "failed to mark resources for reaper") + } + + // Emit the reap audit BEFORE the DELETE — the DELETE cascades audit_log, + // so a row written after the delete would have no team to reference (and a + // row written for a not-yet-deleted team is the honest record of intent). + meta, _ := json.Marshal(map[string]any{"resources_marked_for_reaper": marked}) + if aerr := models.InsertAuditEvent(ctx, h.db, models.AuditEvent{ + TeamID: teamID, + Actor: "system", + Kind: models.AuditKindE2EAccountReaped, + Summary: fmt.Sprintf("reaped e2e test-cohort account (%d resources marked for reaper)", marked), + Metadata: meta, + }); aerr != nil { + slog.Warn("internal.e2e.reap.audit_failed", "error", aerr, "team_id", teamID.String()) + } + + deleted, err := models.DeleteTeamHard(ctx, h.db, teamID) + if err != nil { + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpReap, e2eResultError).Inc() + slog.Error("internal.e2e.reap.delete_failed", "error", err, "team_id", teamID.String()) + return respondError(c, fiber.StatusServiceUnavailable, "db_failed", "failed to delete team") + } + + metrics.E2EAccountTotal.WithLabelValues(e2eMetricOpReap, e2eResultOK).Inc() + slog.Info("internal.e2e.reap.done", + "team_id", teamID.String(), + "resources_marked", marked, + "team_deleted", deleted, + ) + return c.JSON(fiber.Map{ + "ok": true, + "team_id": teamID.String(), + "resources_marked_for_reaper": marked, + "team_deleted": deleted, + }) +} + +// rateLimited applies a per-token sliding-window limit. Fails OPEN (returns +// false = not limited) on any Redis error or nil client — per CLAUDE.md rule 1, +// a Redis outage must never block CI's mint path. The Redis key is keyed on the +// SHA-256 of the token so the plaintext secret is never written to Redis. +func (h *E2EAccountHandler) rateLimited(ctx context.Context, token string) bool { + if h.rdb == nil { + return false + } + sum := sha256.Sum256([]byte(token)) + key := "rl_e2e_account:" + hex.EncodeToString(sum[:]) + + now := time.Now() + cutoff := now.Add(-e2eRateLimitWindow).UnixNano() + score := now.UnixNano() + member := fmt.Sprintf("%d:%d", score, score%1000003) + + pipe := h.rdb.Pipeline() + pipe.ZRemRangeByScore(ctx, key, "0", fmt.Sprintf("(%d", cutoff)) + cardCmd := pipe.ZCard(ctx, key) + pipe.ZAdd(ctx, key, redis.Z{Score: float64(score), Member: member}) + pipe.Expire(ctx, key, e2eRateLimitWindow) + // go-redis Pipeline.Exec returns the first command error, so a non-nil + // Exec error already covers the ZCard-failed case — we don't re-check + // cardCmd.Err() separately (that arm would be dead, since a nil Exec + // guarantees every queued command succeeded). Fail OPEN on any error per + // CLAUDE.md rule 1: a Redis outage must never block CI's mint path. + if _, err := pipe.Exec(ctx); err != nil { + slog.Warn("internal.e2e.rate_limit.fail_open", "error", err) + return false + } + return cardCmd.Val() >= int64(e2eRateLimitMax) +} + +// e2eSignSessionJWT mints a session JWT identical in shape to the one the +// customer auth path issues (sessionClaims signed HS256 with cfg.JWTSecret + +// the canonical audience), but with a caller-supplied (short) expiry. Reusing +// sessionClaims is the point — the token authenticates through the same +// middleware path as a real login. +// +// A package var (not a plain func) so tests can inject a signing failure to +// exercise the token_issue_failed arm — HS256 over a []byte key never errors +// in practice, so without a seam that defensive 503 branch is untestable. +var e2eSignSessionJWT = e2eSignSessionJWTImpl + +func e2eSignSessionJWTImpl(jwtSecret string, userID, teamID uuid.UUID, email string, expiresAt time.Time) (string, error) { + now := time.Now().UTC() + claims := sessionClaims{ + UserID: userID.String(), + TeamID: teamID.String(), + Email: email, + RegisteredClaims: jwt.RegisteredClaims{ + ID: uuid.New().String(), + IssuedAt: jwt.NewNumericDate(now), + ExpiresAt: jwt.NewNumericDate(expiresAt), + Audience: jwt.ClaimStrings{sessionAudience()}, + }, + } + token := jwt.NewWithClaims(jwt.SigningMethodHS256, claims) + return token.SignedString([]byte(jwtSecret)) +} + +// e2eRandomSuffix returns a short crypto-random hex string for the synthetic +// email's +tag, keeping each minted account's primary email unique under the +// users unique-email constraint. +func e2eRandomSuffix() (string, error) { + b := make([]byte, 8) + if _, err := randRead(b); err != nil { + return "", err + } + return hex.EncodeToString(b), nil +} diff --git a/internal/handlers/internal_e2e_account_errpaths_test.go b/internal/handlers/internal_e2e_account_errpaths_test.go new file mode 100644 index 00000000..3cd23e63 --- /dev/null +++ b/internal/handlers/internal_e2e_account_errpaths_test.go @@ -0,0 +1,281 @@ +package handlers_test + +// internal_e2e_account_errpaths_test.go — deterministic error-injection +// coverage for the e2e account surface's failure arms (503 paths + best-effort +// warn arms) that the happy-path suite can't reach. +// +// We use sqlmock to drive each handler to a specific mid-flow DB failure (or a +// best-effort warn), plus the randRead seam for the crypto-rand failure arm. +// No live Postgres needed — every branch is hit deterministically. + +import ( + "encoding/json" + "errors" + "net/http" + "testing" + "time" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/google/uuid" + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/require" + + "instant.dev/internal/handlers" +) + +func uuidStr() string { return uuid.NewString() } + +// redisClientToNowhere returns a redis client whose connection is already +// closed, so every command (and Pipeline.Exec) errors — used to exercise the +// rate-limit fail-open arm. +func redisClientToNowhere() *redis.Client { + c := redis.NewClient(&redis.Options{ + Addr: "127.0.0.1:1", // nothing listens here + DialTimeout: time.Millisecond * 50, + MaxRetries: -1, + }) + return c +} + +func e2eDecodeErr(t *testing.T, resp *http.Response) string { + t.Helper() + var out struct { + Error string `json:"error"` + } + require.NoError(t, json.NewDecoder(resp.Body).Decode(&out)) + return out.Error +} + +// teamInsertRows is the RETURNING shape of CreateTestCohortTeam (and the SELECT +// shape of GetTeamByID): id, name, plan_tier, stripe_customer_id, created_at, +// default_deployment_ttl_policy. +func teamInsertRows(id string) *sqlmock.Rows { + return sqlmock.NewRows([]string{"id", "name", "plan_tier", "stripe_customer_id", "created_at", "default_deployment_ttl_policy"}). + AddRow(id, "e2e", "free", nil, time.Now(), "auto_24h") +} + +// userInsertRows is the RETURNING shape of CreateUser. +func userInsertRows(userID, teamID string) *sqlmock.Rows { + return sqlmock.NewRows([]string{"id", "team_id", "email", "role", "github_id", "google_id", "email_verified", "created_at"}). + AddRow(userID, teamID, "e2e@instanode.dev", "owner", nil, nil, false, time.Now()) +} + +// --- CREATE error arms ------------------------------------------------------- + +func TestE2EAccount_Create_TeamInsertError_503(t *testing.T) { + t.Parallel() + db, mock, err := sqlmock.New() + require.NoError(t, err) + defer db.Close() + mock.ExpectQuery("INSERT INTO teams").WillReturnError(errors.New("boom")) + + app := newE2ETestApp(t, db, nil, testE2EToken) + resp := postE2ECreate(t, app, testE2EToken, `{"tier":"free"}`) + require.Equal(t, http.StatusServiceUnavailable, resp.StatusCode) + require.Equal(t, "team_create_failed", e2eDecodeErr(t, resp)) +} + +func TestE2EAccount_Create_RandError_503(t *testing.T) { + // NOT parallel: mutates the package-global randRead seam. + db, mock, err := sqlmock.New() + require.NoError(t, err) + defer db.Close() + mock.ExpectQuery("INSERT INTO teams").WillReturnRows(teamInsertRows(uuidStr())) + + restore := handlers.SetRandReadForTest(func([]byte) (int, error) { + return 0, errors.New("forced rand failure") + }) + defer restore() + + app := newE2ETestApp(t, db, nil, testE2EToken) + resp := postE2ECreate(t, app, testE2EToken, `{"tier":"free"}`) + require.Equal(t, http.StatusServiceUnavailable, resp.StatusCode) + require.Equal(t, "rand_failed", e2eDecodeErr(t, resp)) +} + +func TestE2EAccount_Create_UserInsertError_503(t *testing.T) { + t.Parallel() + db, mock, err := sqlmock.New() + require.NoError(t, err) + defer db.Close() + mock.ExpectQuery("INSERT INTO teams").WillReturnRows(teamInsertRows(uuidStr())) + mock.ExpectQuery("INSERT INTO users").WillReturnError(errors.New("boom")) + + app := newE2ETestApp(t, db, nil, testE2EToken) + resp := postE2ECreate(t, app, testE2EToken, `{"tier":"free"}`) + require.Equal(t, http.StatusServiceUnavailable, resp.StatusCode) + require.Equal(t, "user_create_failed", e2eDecodeErr(t, resp)) +} + +// SetEmailVerified failing is best-effort: the mint must still succeed (the +// warn arm runs but the 200 is returned). This exercises the verify-fail warn. +func TestE2EAccount_Create_VerifyFail_StillSucceeds(t *testing.T) { + t.Parallel() + teamID, userID := uuidStr(), uuidStr() + db, mock, err := sqlmock.New() + require.NoError(t, err) + defer db.Close() + mock.ExpectQuery("INSERT INTO teams").WillReturnRows(teamInsertRows(teamID)) + mock.ExpectQuery("INSERT INTO users").WillReturnRows(userInsertRows(userID, teamID)) + mock.ExpectExec("UPDATE users SET email_verified").WillReturnError(errors.New("verify blip")) + // free tier → no UpgradeTeamAllTiers. Then the audit insert. + mock.ExpectExec("INSERT INTO audit_log").WillReturnResult(sqlmock.NewResult(0, 1)) + + app := newE2ETestApp(t, db, nil, testE2EToken) + resp := postE2ECreate(t, app, testE2EToken, `{"tier":"free"}`) + require.Equal(t, http.StatusOK, resp.StatusCode, "verify-flip failure is best-effort, mint still succeeds") +} + +// Tier-set failure: paid tier, UpgradeTeamAllTiers (a transaction) errors. +func TestE2EAccount_Create_TierSetError_503(t *testing.T) { + t.Parallel() + teamID, userID := uuidStr(), uuidStr() + db, mock, err := sqlmock.New() + require.NoError(t, err) + defer db.Close() + mock.ExpectQuery("INSERT INTO teams").WillReturnRows(teamInsertRows(teamID)) + mock.ExpectQuery("INSERT INTO users").WillReturnRows(userInsertRows(userID, teamID)) + mock.ExpectExec("UPDATE users SET email_verified").WillReturnResult(sqlmock.NewResult(0, 1)) + // UpgradeTeamAllTiers opens a transaction; force the BEGIN to fail so the + // whole upgrade errors deterministically regardless of inner statements. + mock.ExpectBegin().WillReturnError(errors.New("tx blip")) + + app := newE2ETestApp(t, db, nil, testE2EToken) + resp := postE2ECreate(t, app, testE2EToken, `{"tier":"pro"}`) + require.Equal(t, http.StatusServiceUnavailable, resp.StatusCode) + require.Equal(t, "tier_set_failed", e2eDecodeErr(t, resp)) +} + +// JWT-sign failure: force it via the empty-secret arm. HS256 SignedString with +// an empty key still succeeds in the lib, so we instead cover the audit-warn +// arm (best-effort) here: a successful free mint with the audit insert erroring +// still returns 200, exercising lines 290-292. +func TestE2EAccount_Create_AuditFail_StillSucceeds(t *testing.T) { + t.Parallel() + teamID, userID := uuidStr(), uuidStr() + db, mock, err := sqlmock.New() + require.NoError(t, err) + defer db.Close() + mock.ExpectQuery("INSERT INTO teams").WillReturnRows(teamInsertRows(teamID)) + mock.ExpectQuery("INSERT INTO users").WillReturnRows(userInsertRows(userID, teamID)) + mock.ExpectExec("UPDATE users SET email_verified").WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectExec("INSERT INTO audit_log").WillReturnError(errors.New("audit blip")) + + app := newE2ETestApp(t, db, nil, testE2EToken) + resp := postE2ECreate(t, app, testE2EToken, `{"tier":"free"}`) + require.Equal(t, http.StatusOK, resp.StatusCode, "audit-insert failure is best-effort, mint still succeeds") +} + +// JWT-sign failure (defensive 503): forced via the e2eSignSessionJWT seam. +func TestE2EAccount_Create_JWTSignError_503(t *testing.T) { + // NOT parallel: mutates the package-global e2eSignSessionJWT seam. + teamID, userID := uuidStr(), uuidStr() + db, mock, err := sqlmock.New() + require.NoError(t, err) + defer db.Close() + mock.ExpectQuery("INSERT INTO teams").WillReturnRows(teamInsertRows(teamID)) + mock.ExpectQuery("INSERT INTO users").WillReturnRows(userInsertRows(userID, teamID)) + mock.ExpectExec("UPDATE users SET email_verified").WillReturnResult(sqlmock.NewResult(0, 1)) + + restore := handlers.SetE2ESignSessionJWTForTest( + func(string, uuid.UUID, uuid.UUID, string, time.Time) (string, error) { + return "", errors.New("forced sign failure") + }) + defer restore() + + app := newE2ETestApp(t, db, nil, testE2EToken) + resp := postE2ECreate(t, app, testE2EToken, `{"tier":"free"}`) + require.Equal(t, http.StatusServiceUnavailable, resp.StatusCode) + require.Equal(t, "token_issue_failed", e2eDecodeErr(t, resp)) +} + +// Rate-limit Redis error → fail-open (rule 1): a broken Redis must NOT block +// the mint. Uses a redis client whose connection is closed so Pipeline.Exec +// errors, exercising the fail-open arm. +func TestE2EAccount_RateLimit_RedisError_FailsOpen(t *testing.T) { + t.Parallel() + rdb := redisClientToNowhere() + defer rdb.Close() + db, mock, err := sqlmock.New() + require.NoError(t, err) + defer db.Close() + teamID, userID := uuidStr(), uuidStr() + mock.ExpectQuery("INSERT INTO teams").WillReturnRows(teamInsertRows(teamID)) + mock.ExpectQuery("INSERT INTO users").WillReturnRows(userInsertRows(userID, teamID)) + mock.ExpectExec("UPDATE users SET email_verified").WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectExec("INSERT INTO audit_log").WillReturnResult(sqlmock.NewResult(0, 1)) + + app := newE2ETestApp(t, db, rdb, testE2EToken) + resp := postE2ECreate(t, app, testE2EToken, `{"tier":"free"}`) + require.Equal(t, http.StatusOK, resp.StatusCode, + "a Redis error on the rate-limit check must fail open, not block the mint") +} + +// --- REAP error arms --------------------------------------------------------- + +func TestE2EAccount_Reap_LookupError_503(t *testing.T) { + t.Parallel() + db, mock, err := sqlmock.New() + require.NoError(t, err) + defer db.Close() + mock.ExpectQuery("FROM teams WHERE id").WillReturnError(errors.New("boom")) + + app := newE2ETestApp(t, db, nil, testE2EToken) + resp := deleteE2EReap(t, app, testE2EToken, uuidStr()) + require.Equal(t, http.StatusServiceUnavailable, resp.StatusCode) + require.Equal(t, "db_failed", e2eDecodeErr(t, resp)) +} + +func TestE2EAccount_Reap_CohortCheckError_503(t *testing.T) { + t.Parallel() + teamID := uuidStr() + db, mock, err := sqlmock.New() + require.NoError(t, err) + defer db.Close() + // GetTeamByID succeeds... + mock.ExpectQuery("FROM teams WHERE id").WillReturnRows(teamInsertRows(teamID)) + // ...then the is_test_cohort lookup errors. + mock.ExpectQuery("SELECT is_test_cohort FROM teams WHERE id").WillReturnError(errors.New("boom")) + + app := newE2ETestApp(t, db, nil, testE2EToken) + resp := deleteE2EReap(t, app, testE2EToken, teamID) + require.Equal(t, http.StatusServiceUnavailable, resp.StatusCode) + require.Equal(t, "db_failed", e2eDecodeErr(t, resp)) +} + +func TestE2EAccount_Reap_MarkResourcesError_503(t *testing.T) { + t.Parallel() + teamID := uuidStr() + db, mock, err := sqlmock.New() + require.NoError(t, err) + defer db.Close() + mock.ExpectQuery("FROM teams WHERE id").WillReturnRows(teamInsertRows(teamID)) + mock.ExpectQuery("SELECT is_test_cohort FROM teams WHERE id"). + WillReturnRows(sqlmock.NewRows([]string{"is_test_cohort"}).AddRow(true)) + mock.ExpectExec("UPDATE resources").WillReturnError(errors.New("boom")) + + app := newE2ETestApp(t, db, nil, testE2EToken) + resp := deleteE2EReap(t, app, testE2EToken, teamID) + require.Equal(t, http.StatusServiceUnavailable, resp.StatusCode) + require.Equal(t, "db_failed", e2eDecodeErr(t, resp)) +} + +func TestE2EAccount_Reap_DeleteError_503_AndAuditWarn(t *testing.T) { + t.Parallel() + teamID := uuidStr() + db, mock, err := sqlmock.New() + require.NoError(t, err) + defer db.Close() + mock.ExpectQuery("FROM teams WHERE id").WillReturnRows(teamInsertRows(teamID)) + mock.ExpectQuery("SELECT is_test_cohort FROM teams WHERE id"). + WillReturnRows(sqlmock.NewRows([]string{"is_test_cohort"}).AddRow(true)) + mock.ExpectExec("UPDATE resources").WillReturnResult(sqlmock.NewResult(0, 1)) + // Audit insert fails (warn arm), then the DELETE fails (503 arm). + mock.ExpectExec("INSERT INTO audit_log").WillReturnError(errors.New("audit blip")) + mock.ExpectExec("DELETE FROM teams").WillReturnError(errors.New("delete blip")) + + app := newE2ETestApp(t, db, nil, testE2EToken) + resp := deleteE2EReap(t, app, testE2EToken, teamID) + require.Equal(t, http.StatusServiceUnavailable, resp.StatusCode) + require.Equal(t, "db_failed", e2eDecodeErr(t, resp)) +} diff --git a/internal/handlers/internal_e2e_account_export_test.go b/internal/handlers/internal_e2e_account_export_test.go new file mode 100644 index 00000000..b5cd5cb3 --- /dev/null +++ b/internal/handlers/internal_e2e_account_export_test.go @@ -0,0 +1,20 @@ +package handlers + +// internal_e2e_account_export_test.go — white-box seams for the external +// internal_e2e_account_*_test.go coverage suite (package handlers_test). + +import ( + "time" + + "github.com/google/uuid" +) + +// SetE2ESignSessionJWTForTest overrides the e2eSignSessionJWT seam so a test +// can force the token_issue_failed (503) arm of CreateAccount. Returns a +// restore func. HS256-over-[]byte never errors in practice, so this seam is +// the only way to deterministically exercise that defensive branch. +func SetE2ESignSessionJWTForTest(fn func(jwtSecret string, userID, teamID uuid.UUID, email string, expiresAt time.Time) (string, error)) (restore func()) { + prev := e2eSignSessionJWT + e2eSignSessionJWT = fn + return func() { e2eSignSessionJWT = prev } +} diff --git a/internal/handlers/internal_e2e_account_test.go b/internal/handlers/internal_e2e_account_test.go new file mode 100644 index 00000000..2145e3d6 --- /dev/null +++ b/internal/handlers/internal_e2e_account_test.go @@ -0,0 +1,415 @@ +package handlers_test + +// internal_e2e_account_test.go — coverage for the CI-only guarded +// ephemeral-test-account surface: +// +// POST /internal/e2e/account +// DELETE /internal/e2e/account/:team_id +// +// Test matrix (mirrors the brief — the guard + cohort-scoping is the +// security-sensitive part): +// - token unset → 404 (inert by default) +// - wrong token → 404 (existence-hiding, constant-time compare) +// - valid token create → 200, is_test_cohort team, JWT authenticates +// - tier="team" → 400 (Team gated) +// - tier="growth" → 400 (gated) +// - reap test-cohort team → purged (team gone, resources marked for reaper) +// - reap NON-cohort team → 403 not_test_cohort ← THE CRITICAL SAFETY TEST +// - reap already-gone team → 200 idempotent +// - per-token rate limit → 429 once the window cap is exceeded + +import ( + "bytes" + "context" + "crypto/sha256" + "database/sql" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "net/http" + "net/http/httptest" + "os" + "testing" + "time" + + "github.com/gofiber/fiber/v2" + "github.com/golang-jwt/jwt/v4" + "github.com/google/uuid" + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/require" + + "instant.dev/internal/config" + "instant.dev/internal/handlers" + "instant.dev/internal/middleware" + "instant.dev/internal/testhelpers" +) + +const testE2EToken = "e2e-account-token-at-least-32-bytes!!" + +func skipUnlessE2EDB(t *testing.T) { + t.Helper() + if os.Getenv("TEST_DATABASE_URL") == "" { + t.Skip("e2e account tests: TEST_DATABASE_URL not set") + } +} + +// newE2ETestApp wires only the e2e mint/reap handlers (+ a tiny RequireAuth +// probe route used to prove a minted JWT authenticates). token is the +// configured E2E_ACCOUNT_TOKEN; pass "" to exercise the inert-by-default path. +// rdb may be nil (rate limit fails open). +func newE2ETestApp(t *testing.T, db *sql.DB, rdb *redis.Client, token string) *fiber.App { + t.Helper() + cfg := &config.Config{ + E2EAccountToken: token, + JWTSecret: testhelpers.TestJWTSecret, + AESKey: testhelpers.TestAESKeyHex, + Environment: "test", + } + app := fiber.New(fiber.Config{ + ErrorHandler: func(c *fiber.Ctx, err error) error { + if errors.Is(err, handlers.ErrResponseWritten) { + return nil + } + code := fiber.StatusInternalServerError + if e, ok := err.(*fiber.Error); ok { + code = e.Code + } + return c.Status(code).JSON(fiber.Map{"ok": false, "error": "internal_error", "message": err.Error()}) + }, + }) + h := handlers.NewE2EAccountHandler(db, rdb, cfg) + app.Post("/internal/e2e/account", h.CreateAccount) + app.Delete("/internal/e2e/account/:team_id", h.ReapAccount) + + // Probe route: proves the minted session JWT authenticates through the + // ordinary RequireAuth middleware (the whole point of reusing the signer). + app.Get("/probe", middleware.RequireAuth(cfg), func(c *fiber.Ctx) error { + return c.JSON(fiber.Map{"team_id": c.Locals(middleware.LocalKeyTeamID)}) + }) + return app +} + +// e2eCreateResp is the create-endpoint response shape we assert on. +type e2eCreateResp struct { + TeamID string `json:"team_id"` + UserID string `json:"user_id"` + Email string `json:"email"` + Tier string `json:"tier"` + SessionJWT string `json:"session_jwt"` + ExpiresAt string `json:"expires_at"` + Error string `json:"error"` +} + +func postE2ECreate(t *testing.T, app *fiber.App, token, body string) *http.Response { + t.Helper() + req := httptest.NewRequest(http.MethodPost, "/internal/e2e/account", bytes.NewReader([]byte(body))) + req.Header.Set("Content-Type", "application/json") + if token != "" { + req.Header.Set("X-E2E-Token", token) + } + resp, err := app.Test(req, 5000) + require.NoError(t, err) + return resp +} + +func deleteE2EReap(t *testing.T, app *fiber.App, token, teamID string) *http.Response { + t.Helper() + req := httptest.NewRequest(http.MethodDelete, "/internal/e2e/account/"+teamID, nil) + if token != "" { + req.Header.Set("X-E2E-Token", token) + } + resp, err := app.Test(req, 5000) + require.NoError(t, err) + return resp +} + +func decodeE2ECreate(t *testing.T, resp *http.Response) e2eCreateResp { + t.Helper() + var out e2eCreateResp + require.NoError(t, json.NewDecoder(resp.Body).Decode(&out)) + return out +} + +// --- guard: inert by default ------------------------------------------------- + +func TestE2EAccount_TokenUnset_Returns404(t *testing.T) { + t.Parallel() + // No DB needed — the guard 404s before any DB work. + app := newE2ETestApp(t, nil, nil, "") // configured token empty + resp := postE2ECreate(t, app, "anything", `{"tier":"free"}`) + require.Equal(t, http.StatusNotFound, resp.StatusCode, + "empty E2E_ACCOUNT_TOKEN must make the endpoint inert (404)") + + // Reap is equally inert. + resp2 := deleteE2EReap(t, app, "anything", uuid.New().String()) + require.Equal(t, http.StatusNotFound, resp2.StatusCode) +} + +func TestE2EAccount_WrongToken_Returns404(t *testing.T) { + t.Parallel() + app := newE2ETestApp(t, nil, nil, testE2EToken) + resp := postE2ECreate(t, app, "wrong-token", `{"tier":"free"}`) + require.Equal(t, http.StatusNotFound, resp.StatusCode, + "wrong X-E2E-Token must 404 (not 401/403) to hide the route") + + // Missing header (token configured) is also 404. + resp2 := postE2ECreate(t, app, "", `{"tier":"free"}`) + require.Equal(t, http.StatusNotFound, resp2.StatusCode) +} + +// --- create: happy path ------------------------------------------------------ + +func TestE2EAccount_Create_FreeTier_MintsTestCohortAndAuthenticatingJWT(t *testing.T) { + skipUnlessE2EDB(t) + db, cleanup := testhelpers.SetupTestDB(t) + defer cleanup() + app := newE2ETestApp(t, db, nil, testE2EToken) + + resp := postE2ECreate(t, app, testE2EToken, `{"tier":"free"}`) + require.Equal(t, http.StatusOK, resp.StatusCode) + out := decodeE2ECreate(t, resp) + + require.NotEmpty(t, out.TeamID) + require.NotEmpty(t, out.UserID) + require.Equal(t, "free", out.Tier) + require.Contains(t, out.Email, "e2e-cohort+") + require.Contains(t, out.Email, "@instanode.dev") + require.NotEmpty(t, out.SessionJWT) + require.NotEmpty(t, out.ExpiresAt) + + // The team must be is_test_cohort=true. + var isCohort bool + require.NoError(t, db.QueryRowContext(context.Background(), + `SELECT is_test_cohort FROM teams WHERE id = $1`, out.TeamID).Scan(&isCohort)) + require.True(t, isCohort, "minted team must be is_test_cohort") + + // The primary user must be email_verified. + var verified bool + require.NoError(t, db.QueryRowContext(context.Background(), + `SELECT email_verified FROM users WHERE id = $1`, out.UserID).Scan(&verified)) + require.True(t, verified, "minted primary user must be email_verified") + + // The session JWT must authenticate through RequireAuth. + req := httptest.NewRequest(http.MethodGet, "/probe", nil) + req.Header.Set("Authorization", "Bearer "+out.SessionJWT) + probeResp, err := app.Test(req, 5000) + require.NoError(t, err) + require.Equal(t, http.StatusOK, probeResp.StatusCode, "minted JWT must authenticate") + var probe struct { + TeamID string `json:"team_id"` + } + require.NoError(t, json.NewDecoder(probeResp.Body).Decode(&probe)) + require.Equal(t, out.TeamID, probe.TeamID, "probe must resolve the minted team") + + // The session JWT's claims are HS256-signed with the same secret + short TTL. + claims := jwt.MapClaims{} + _, err = jwt.ParseWithClaims(out.SessionJWT, claims, func(_ *jwt.Token) (interface{}, error) { + return []byte(testhelpers.TestJWTSecret), nil + }, jwt.WithValidMethods([]string{"HS256"})) + require.NoError(t, err) + require.Equal(t, out.TeamID, claims["tid"]) + require.Equal(t, out.UserID, claims["uid"]) +} + +func TestE2EAccount_Create_PaidTier_SetsTier(t *testing.T) { + skipUnlessE2EDB(t) + db, cleanup := testhelpers.SetupTestDB(t) + defer cleanup() + app := newE2ETestApp(t, db, nil, testE2EToken) + + resp := postE2ECreate(t, app, testE2EToken, `{"tier":"pro"}`) + require.Equal(t, http.StatusOK, resp.StatusCode) + out := decodeE2ECreate(t, resp) + require.Equal(t, "pro", out.Tier) + + var planTier string + require.NoError(t, db.QueryRowContext(context.Background(), + `SELECT plan_tier FROM teams WHERE id = $1`, out.TeamID).Scan(&planTier)) + require.Equal(t, "pro", planTier, "paid-tier mint must escalate plan_tier via the upgrade path") +} + +func TestE2EAccount_Create_EmptyBody_DefaultsToFree(t *testing.T) { + skipUnlessE2EDB(t) + db, cleanup := testhelpers.SetupTestDB(t) + defer cleanup() + app := newE2ETestApp(t, db, nil, testE2EToken) + + resp := postE2ECreate(t, app, testE2EToken, ``) + require.Equal(t, http.StatusOK, resp.StatusCode) + out := decodeE2ECreate(t, resp) + require.Equal(t, "free", out.Tier) +} + +func TestE2EAccount_Create_MalformedBody_400(t *testing.T) { + t.Parallel() + app := newE2ETestApp(t, nil, nil, testE2EToken) + resp := postE2ECreate(t, app, testE2EToken, `{not json`) + require.Equal(t, http.StatusBadRequest, resp.StatusCode) +} + +// --- create: gated tiers rejected ------------------------------------------- + +func TestE2EAccount_Create_TeamTier_Rejected400(t *testing.T) { + t.Parallel() + app := newE2ETestApp(t, nil, nil, testE2EToken) + resp := postE2ECreate(t, app, testE2EToken, `{"tier":"team"}`) + require.Equal(t, http.StatusBadRequest, resp.StatusCode, + "tier=team must be rejected — Team is gated, never minted") + out := decodeE2ECreate(t, resp) + require.Equal(t, "tier_not_allowed", out.Error) +} + +func TestE2EAccount_Create_GrowthTier_Rejected400(t *testing.T) { + t.Parallel() + app := newE2ETestApp(t, nil, nil, testE2EToken) + resp := postE2ECreate(t, app, testE2EToken, `{"tier":"growth"}`) + require.Equal(t, http.StatusBadRequest, resp.StatusCode) + out := decodeE2ECreate(t, resp) + require.Equal(t, "tier_not_allowed", out.Error) +} + +func TestE2EAccount_Create_UnknownTier_Rejected400(t *testing.T) { + t.Parallel() + app := newE2ETestApp(t, nil, nil, testE2EToken) + resp := postE2ECreate(t, app, testE2EToken, `{"tier":"platinum"}`) + require.Equal(t, http.StatusBadRequest, resp.StatusCode) + out := decodeE2ECreate(t, resp) + require.Equal(t, "invalid_tier", out.Error) +} + +// --- reap -------------------------------------------------------------------- + +func TestE2EAccount_Reap_TestCohortTeam_Purged(t *testing.T) { + skipUnlessE2EDB(t) + db, cleanup := testhelpers.SetupTestDB(t) + defer cleanup() + app := newE2ETestApp(t, db, nil, testE2EToken) + + // Mint, then insert a resource so we can assert it was marked for reaper. + out := decodeE2ECreate(t, postE2ECreate(t, app, testE2EToken, `{"tier":"pro"}`)) + require.NotEmpty(t, out.TeamID) + ctx := context.Background() + var resID string + require.NoError(t, db.QueryRowContext(ctx, ` + INSERT INTO resources (team_id, resource_type, tier, status) + VALUES ($1, 'postgres', 'pro', 'active') + RETURNING id::text`, out.TeamID).Scan(&resID)) + + resp := deleteE2EReap(t, app, testE2EToken, out.TeamID) + require.Equal(t, http.StatusOK, resp.StatusCode) + + // Team is gone. + var teamCount int + require.NoError(t, db.QueryRowContext(ctx, + `SELECT count(*) FROM teams WHERE id = $1`, out.TeamID).Scan(&teamCount)) + require.Equal(t, 0, teamCount, "reaped test-cohort team must be deleted") + + // The resource row survives (ON DELETE SET NULL) but is marked for the + // worker reaper: team_id NULL, tier='free', expires_at set in the past. + var teamID sql.NullString + var tier string + var expiresAt sql.NullTime + require.NoError(t, db.QueryRowContext(ctx, + `SELECT team_id, tier, expires_at FROM resources WHERE id = $1`, resID). + Scan(&teamID, &tier, &expiresAt)) + require.False(t, teamID.Valid, "resource team_id must be NULL after team delete") + require.Equal(t, "free", tier, "resource must be re-tiered to 'free' so the reaper picks it up") + require.True(t, expiresAt.Valid, "resource expires_at must be set so the reaper deprovisions it") +} + +// TestE2EAccount_Reap_NonCohortTeam_Forbidden is THE CRITICAL SAFETY TEST: a +// real (non-test-cohort) team must NEVER be deletable via the e2e surface. +func TestE2EAccount_Reap_NonCohortTeam_Forbidden(t *testing.T) { + skipUnlessE2EDB(t) + db, cleanup := testhelpers.SetupTestDB(t) + defer cleanup() + app := newE2ETestApp(t, db, nil, testE2EToken) + + // Create a REAL team (is_test_cohort defaults false). + ctx := context.Background() + var realTeamID string + require.NoError(t, db.QueryRowContext(ctx, + `INSERT INTO teams (name, plan_tier) VALUES ('real-customer', 'pro') RETURNING id::text`). + Scan(&realTeamID)) + + resp := deleteE2EReap(t, app, testE2EToken, realTeamID) + require.Equal(t, http.StatusForbidden, resp.StatusCode, + "reaping a non-test-cohort team MUST 403 — never delete a real team") + var out struct { + Error string `json:"error"` + } + require.NoError(t, json.NewDecoder(resp.Body).Decode(&out)) + require.Equal(t, "not_test_cohort", out.Error) + + // The real team must still exist — the 403 protected it. + var cnt int + require.NoError(t, db.QueryRowContext(ctx, + `SELECT count(*) FROM teams WHERE id = $1`, realTeamID).Scan(&cnt)) + require.Equal(t, 1, cnt, "real team must survive a refused reap") +} + +func TestE2EAccount_Reap_AlreadyGone_Idempotent200(t *testing.T) { + skipUnlessE2EDB(t) + db, cleanup := testhelpers.SetupTestDB(t) + defer cleanup() + app := newE2ETestApp(t, db, nil, testE2EToken) + + resp := deleteE2EReap(t, app, testE2EToken, uuid.New().String()) + require.Equal(t, http.StatusOK, resp.StatusCode, "reaping a non-existent team is an idempotent 200") +} + +func TestE2EAccount_Reap_BadTeamID_400(t *testing.T) { + t.Parallel() + app := newE2ETestApp(t, nil, nil, testE2EToken) + resp := deleteE2EReap(t, app, testE2EToken, "not-a-uuid") + require.Equal(t, http.StatusBadRequest, resp.StatusCode) +} + +func TestE2EAccount_Reap_TokenUnset_404(t *testing.T) { + t.Parallel() + app := newE2ETestApp(t, nil, nil, "") + resp := deleteE2EReap(t, app, "anything", uuid.New().String()) + require.Equal(t, http.StatusNotFound, resp.StatusCode) +} + +// --- per-token rate limit ---------------------------------------------------- + +func TestE2EAccount_RateLimit_TripsAfterCap(t *testing.T) { + skipUnlessE2EDB(t) + db, dbCleanup := testhelpers.SetupTestDB(t) + defer dbCleanup() + rdb, rCleanup := testhelpers.SetupTestRedis(t) + defer rCleanup() + app := newE2ETestApp(t, db, rdb, testE2EToken) + + // Pre-load the per-token sliding window to the cap so the next mint trips + // the limit deterministically (no need to actually create 120 accounts). + // Key shape must match handler: rl_e2e_account:. + sum := sha256.Sum256([]byte(testE2EToken)) + key := "rl_e2e_account:" + hex.EncodeToString(sum[:]) + ctx := context.Background() + now := time.Now().UnixNano() + for i := 0; i < 120; i++ { + require.NoError(t, rdb.ZAdd(ctx, key, redis.Z{ + Score: float64(now + int64(i)), + Member: fmt.Sprintf("%d:%d", now+int64(i), i), + }).Err()) + } + + resp := postE2ECreate(t, app, testE2EToken, `{"tier":"free"}`) + require.Equal(t, http.StatusTooManyRequests, resp.StatusCode, + "a token at its window cap must be rate-limited") + out := decodeE2ECreate(t, resp) + require.Equal(t, "rate_limited", out.Error) +} + +func TestE2EAccount_RateLimit_NilRedis_FailsOpen(t *testing.T) { + skipUnlessE2EDB(t) + db, cleanup := testhelpers.SetupTestDB(t) + defer cleanup() + // nil redis → rate limit fails open → mint succeeds. + app := newE2ETestApp(t, db, nil, testE2EToken) + resp := postE2ECreate(t, app, testE2EToken, `{"tier":"free"}`) + require.Equal(t, http.StatusOK, resp.StatusCode, "nil Redis must fail open (rule 1), not block") +} diff --git a/internal/handlers/openapi.go b/internal/handlers/openapi.go index 78371c6a..d065c884 100644 --- a/internal/handlers/openapi.go +++ b/internal/handlers/openapi.go @@ -1,6 +1,16 @@ package handlers // openapi.go — serves GET /openapi.json with an OpenAPI 3.1 description of the live API. +// +// Deliberately NOT documented here: the /internal/* machine-to-machine routes +// (/internal/teams/:id/terminate, /internal/email/resend-magic-link, +// /internal/teams/:id/backup-quota/refund, and the CI-only e2e surface +// /internal/e2e/account + /internal/e2e/account/:team_id). These are internal, +// secret-guarded surfaces with no customer-facing contract. The e2e surface in +// particular is INERT BY DEFAULT (404s until E2E_ACCOUNT_TOKEN is wired) and is +// existence-hiding by design — publishing it in the spec would directly +// undermine that. /internal/set-tier is the sole /internal/* route that ever +// appears in the spec, and only in development (stripped in prod below). import ( "strings" diff --git a/internal/handlers/openapi_test.go b/internal/handlers/openapi_test.go index 2d3ddab0..7c11cd22 100644 --- a/internal/handlers/openapi_test.go +++ b/internal/handlers/openapi_test.go @@ -682,8 +682,16 @@ func TestOpenAPI_CoversAllRegisteredRoutes(t *testing.T) { // when a manual backup fails terminally. Not a customer-facing // surface, so it stays out of the agent-facing OpenAPI spec. "POST /internal/teams/{id}/backup-quota/refund": true, - "GET /api/v1/usage/wall": true, - "POST /api/v1/experiments/converted": true, + // CI-only ephemeral-test-account surface. Guarded by E2E_ACCOUNT_TOKEN + // (route registers inert / 404s unless the token is configured) and + // only ever driven by the E2E harness against prod — never a + // customer-facing agent surface. Documenting it in the agent-facing + // OpenAPI would mislead agents into thinking they can mint accounts. + // Same rationale as the WORKER_INTERNAL_JWT_SECRET /internal routes above. + "POST /internal/e2e/account": true, + "DELETE /internal/e2e/account/{team_id}": true, + "GET /api/v1/usage/wall": true, + "POST /api/v1/experiments/converted": true, // POST /auth/exchange — browser-only bridge between the magic-link // / OAuth callback and the SPA. The handler reads the transient // instanode_session_exchange cookie (Path=/auth/exchange, diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index 84c5b519..75374ed8 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -464,6 +464,33 @@ var ( Name: "instant_analytics_emit_failed_total", Help: "Behavioral-intelligence custom events dropped before reaching the analytics sink, by reason.", }, []string{"reason"}) + + // E2EAccountTotal counts ephemeral-test-account operations on the + // CI-only guarded /internal/e2e/account surface. + // + // op = "create" | "reap" + // result = "ok" — account minted / reaped + // "unauthorized" — token unset or X-E2E-Token mismatch (404) + // "bad_request" — invalid tier (e.g. team/growth) → 400 + // "not_test_cohort"— reap refused on a real (non-cohort) team → 403 + // "rate_limited" — per-token rate limit tripped + // "error" — internal failure (DB, JWT sign, etc.) + // + // Lazy *Vec: not visible at /metrics until the first labelled + // observation. In a prod deploy with E2E_ACCOUNT_TOKEN unset this + // stays at zero-cardinality forever (no call ever authenticates), + // which is the expected steady state. + // + // MONITORING (rule 25): the user-visible safety invariant is + // op="reap",result="not_test_cohort" — a CI bug that tried to reap a + // real team. The infra-repo follow-up wires a P1 NR alert on any + // nonzero rate of that series + a reliability-dashboard tile, plus a + // METRICS-CATALOG.md row (infra is a separate repo with its own + // deploy, so it cannot land in this api PR). + E2EAccountTotal = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "instant_e2e_account_total", + Help: "Ephemeral-test-account operations on the CI-only /internal/e2e/account surface, by op and result.", + }, []string{"op", "result"}) ) // ReadyzCheckStatus updates the gauge for one check in this service. diff --git a/internal/models/audit_kinds.go b/internal/models/audit_kinds.go index bf39f469..ee2cdc4d 100644 --- a/internal/models/audit_kinds.go +++ b/internal/models/audit_kinds.go @@ -298,6 +298,22 @@ const ( // alert. Metadata: {orphan_kind, identifier, error}. AuditKindOrphanSweepFailed = "team.orphan_sweep_failed" + // AuditKindE2EAccountCreated fires on every successful mint via the + // CI-only POST /internal/e2e/account surface. The created team is + // always is_test_cohort=true. Metadata: {tier, env, user_id, email} + // (email is the synthetic e2e-cohort+@instanode.dev address — + // not customer PII). Operator-internal signal: a spike means CI is + // minting more test accounts than expected, which would point at a + // reaper that is failing to clean up. NOT a customer email event. + AuditKindE2EAccountCreated = "e2e.account.created" + + // AuditKindE2EAccountReaped fires on every successful reap via the + // CI-only DELETE /internal/e2e/account/:team_id surface. Only ever + // emitted for an is_test_cohort team — the handler 403s before this + // row is written if the target is a real team. Metadata: + // {resources_marked_for_reaper}. Operator-internal signal only. + AuditKindE2EAccountReaped = "e2e.account.reaped" + // AuditKindResourceMetricsQueried fires when a caller successfully fetches // GET /api/v1/resources/:id/metrics. The audit row's metadata records the // resolved window_seconds + samples_count so the Loops forwarder / diff --git a/internal/models/e2e_account_errbranches_test.go b/internal/models/e2e_account_errbranches_test.go new file mode 100644 index 00000000..178cc620 --- /dev/null +++ b/internal/models/e2e_account_errbranches_test.go @@ -0,0 +1,84 @@ +package models + +// e2e_account_errbranches_test.go — white-box (package models) sqlmock coverage +// for the DB-error branches of the CI-only ephemeral-test-account model +// functions. The happy-path / idempotent behaviour is exercised DB-backed in +// e2e_account_models_test.go (package models_test); those paths can't +// deterministically hit the Exec-failed / RowsAffected-failed branches against +// a real working Postgres, so they're covered here via sqlmock to satisfy the +// 100%-patch coverage gate. +// +// Covers: +// - CreateTestCohortTeam — INSERT … RETURNING QueryRow error (team.go) +// - DeleteTeamHard — Exec error + RowsAffected error (team.go) +// - MarkTeamResourcesForReaper — Exec error + RowsAffected error (resource.go) + +import ( + "context" + "errors" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/google/uuid" + "github.com/stretchr/testify/require" +) + +func TestCreateTestCohortTeam_QueryError(t *testing.T) { + db, mock := newMock(t) + mock.ExpectQuery(`INSERT INTO teams .* is_test_cohort`). + WillReturnError(errors.New("boom")) + + team, err := CreateTestCohortTeam(context.Background(), db, "cohort-mint") + require.Error(t, err) + require.Nil(t, team) + require.Contains(t, err.Error(), "models.CreateTestCohortTeam") + require.NoError(t, mock.ExpectationsWereMet()) +} + +func TestDeleteTeamHard_ExecError(t *testing.T) { + db, mock := newMock(t) + mock.ExpectExec(`DELETE FROM teams WHERE id`). + WillReturnError(errors.New("boom")) + + deleted, err := DeleteTeamHard(context.Background(), db, uuid.New()) + require.Error(t, err) + require.False(t, deleted) + require.Contains(t, err.Error(), "models.DeleteTeamHard") + require.NoError(t, mock.ExpectationsWereMet()) +} + +func TestDeleteTeamHard_RowsAffectedError(t *testing.T) { + db, mock := newMock(t) + mock.ExpectExec(`DELETE FROM teams WHERE id`). + WillReturnResult(sqlmock.NewErrorResult(errors.New("raerr"))) + + deleted, err := DeleteTeamHard(context.Background(), db, uuid.New()) + require.Error(t, err) + require.False(t, deleted) + require.Contains(t, err.Error(), "rows_affected") + require.NoError(t, mock.ExpectationsWereMet()) +} + +func TestMarkTeamResourcesForReaper_ExecError(t *testing.T) { + db, mock := newMock(t) + mock.ExpectExec(`UPDATE resources`). + WillReturnError(errors.New("boom")) + + n, err := MarkTeamResourcesForReaper(context.Background(), db, uuid.New()) + require.Error(t, err) + require.Equal(t, int64(0), n) + require.Contains(t, err.Error(), "models.MarkTeamResourcesForReaper") + require.NoError(t, mock.ExpectationsWereMet()) +} + +func TestMarkTeamResourcesForReaper_RowsAffectedError(t *testing.T) { + db, mock := newMock(t) + mock.ExpectExec(`UPDATE resources`). + WillReturnResult(sqlmock.NewErrorResult(errors.New("raerr"))) + + n, err := MarkTeamResourcesForReaper(context.Background(), db, uuid.New()) + require.Error(t, err) + require.Equal(t, int64(0), n) + require.Contains(t, err.Error(), "rows_affected") + require.NoError(t, mock.ExpectationsWereMet()) +} diff --git a/internal/models/e2e_account_models_test.go b/internal/models/e2e_account_models_test.go new file mode 100644 index 00000000..f8ee0082 --- /dev/null +++ b/internal/models/e2e_account_models_test.go @@ -0,0 +1,128 @@ +package models_test + +// e2e_account_models_test.go — DB-backed coverage for the model functions +// backing the CI-only ephemeral-test-account surface: +// - CreateTestCohortTeam (is_test_cohort=true at INSERT time) +// - DeleteTeamHard (hard-delete + idempotent re-delete) +// - MarkTeamResourcesForReaper(tier→free + expires_at→now so the reaper reaps) +// +// Skips when TEST_DATABASE_URL is unset. + +import ( + "context" + "database/sql" + "os" + "testing" + "time" + + "github.com/google/uuid" + "github.com/stretchr/testify/require" + + "instant.dev/internal/models" + "instant.dev/internal/testhelpers" +) + +func skipUnlessE2EModelsDB(t *testing.T) { + t.Helper() + if os.Getenv("TEST_DATABASE_URL") == "" { + t.Skip("TEST_DATABASE_URL not set; skipping integration test") + } +} + +func TestCreateTestCohortTeam_SetsCohortFlag(t *testing.T) { + skipUnlessE2EModelsDB(t) + ctx := context.Background() + db, clean := testhelpers.SetupTestDB(t) + defer clean() + + team, err := models.CreateTestCohortTeam(ctx, db, "cohort-mint") + require.NoError(t, err) + require.NotEqual(t, uuid.Nil, team.ID) + require.Equal(t, "free", team.PlanTier, "minted cohort team starts at free") + + isCohort, err := models.IsTestCohort(ctx, db, team.ID) + require.NoError(t, err) + require.True(t, isCohort, "CreateTestCohortTeam must set is_test_cohort at INSERT time") + + // And the ordinary CreateTeam path must NOT set it (contrast — ensures the + // flag is only ever set by the cohort constructor / seeder). + real, err := models.CreateTeam(ctx, db, "real-team") + require.NoError(t, err) + realCohort, err := models.IsTestCohort(ctx, db, real.ID) + require.NoError(t, err) + require.False(t, realCohort) +} + +func TestDeleteTeamHard_DeletesAndIsIdempotent(t *testing.T) { + skipUnlessE2EModelsDB(t) + ctx := context.Background() + db, clean := testhelpers.SetupTestDB(t) + defer clean() + + team, err := models.CreateTestCohortTeam(ctx, db, "to-delete") + require.NoError(t, err) + + deleted, err := models.DeleteTeamHard(ctx, db, team.ID) + require.NoError(t, err) + require.True(t, deleted, "first delete removes the row") + + // Row is gone. + _, err = models.GetTeamByID(ctx, db, team.ID) + var notFound *models.ErrTeamNotFound + require.ErrorAs(t, err, ¬Found) + + // Idempotent: re-delete reports false, no error. + deleted, err = models.DeleteTeamHard(ctx, db, team.ID) + require.NoError(t, err) + require.False(t, deleted, "re-delete of a gone team is a clean no-op") +} + +func TestMarkTeamResourcesForReaper_RetiersAndExpires(t *testing.T) { + skipUnlessE2EModelsDB(t) + ctx := context.Background() + db, clean := testhelpers.SetupTestDB(t) + defer clean() + + team, err := models.CreateTestCohortTeam(ctx, db, "reaper-mark") + require.NoError(t, err) + + // Two resources at a paid tier with no expiry, plus one already-deleted row + // that must be left alone. + var activeID, pausedID, deletedID string + require.NoError(t, db.QueryRowContext(ctx, ` + INSERT INTO resources (team_id, resource_type, tier, status) + VALUES ($1, 'postgres', 'pro', 'active') RETURNING id::text`, team.ID).Scan(&activeID)) + require.NoError(t, db.QueryRowContext(ctx, ` + INSERT INTO resources (team_id, resource_type, tier, status) + VALUES ($1, 'redis', 'pro', 'paused') RETURNING id::text`, team.ID).Scan(&pausedID)) + require.NoError(t, db.QueryRowContext(ctx, ` + INSERT INTO resources (team_id, resource_type, tier, status) + VALUES ($1, 'mongodb', 'pro', 'deleted') RETURNING id::text`, team.ID).Scan(&deletedID)) + + marked, err := models.MarkTeamResourcesForReaper(ctx, db, team.ID) + require.NoError(t, err) + require.Equal(t, int64(2), marked, "only non-deleted rows are marked") + + assertMarked := func(id string, wantMarked bool) { + var tier string + var expiresAt sql.NullTime + require.NoError(t, db.QueryRowContext(ctx, + `SELECT tier, expires_at FROM resources WHERE id = $1`, id).Scan(&tier, &expiresAt)) + if wantMarked { + require.Equal(t, "free", tier) + require.True(t, expiresAt.Valid) + require.True(t, expiresAt.Time.Before(time.Now().Add(time.Minute))) + } else { + require.Equal(t, "pro", tier, "deleted row must be untouched") + require.False(t, expiresAt.Valid) + } + } + assertMarked(activeID, true) + assertMarked(pausedID, true) + assertMarked(deletedID, false) + + // Idempotent: re-marking already-marked rows re-stamps without error. + marked, err = models.MarkTeamResourcesForReaper(ctx, db, team.ID) + require.NoError(t, err) + require.Equal(t, int64(2), marked) +} diff --git a/internal/models/resource.go b/internal/models/resource.go index 29929586..ff03f99a 100644 --- a/internal/models/resource.go +++ b/internal/models/resource.go @@ -553,6 +553,46 @@ func PauseAllTeamResources(ctx context.Context, db *sql.DB, teamID uuid.UUID) (i return n, nil } +// MarkTeamResourcesForReaper makes every non-terminal resource owned by a team +// eligible for the worker's TTL reaper, returning how many rows were marked. +// It sets tier='free' and expires_at=now() on each row, leaving status alone. +// +// Why this exact shape: the live reaper (worker/internal/jobs/expire.go) only +// deprovisions rows matching +// +// ((team_id IS NULL AND tier='anonymous') OR tier='free') +// AND status IN () AND expires_at IS NOT NULL AND expires_at < now() +// AND (team_id IS NULL OR teams.status='active') +// +// The e2e-reap path DELETEs the team immediately after calling this, which +// (via ON DELETE SET NULL on resources.team_id) leaves these rows with +// team_id=NULL. Forcing tier='free' guarantees the reaper picks them up +// regardless of which tier the test account was minted at (hobby/pro rows +// would otherwise never match the reaper's tier filter and the backing +// customer DB/cache/mongo would leak). status='active' is already reapable, +// so we don't touch it. This reuses the single live deprovisioning path +// rather than reimplementing gRPC teardown synchronously in the request. +// +// Scope: only this team's rows; only rows not already in a terminal status +// (deleted) — re-marking a deleted row would be pointless and could resurrect +// a row the reaper already finished. Idempotent: re-running over already-marked +// rows just re-stamps expires_at to a fresh now(). +func MarkTeamResourcesForReaper(ctx context.Context, db *sql.DB, teamID uuid.UUID) (int64, error) { + res, err := db.ExecContext(ctx, ` + UPDATE resources + SET tier = 'free', expires_at = now() + WHERE team_id = $1 AND status != 'deleted' + `, teamID) + if err != nil { + return 0, fmt.Errorf("models.MarkTeamResourcesForReaper: %w", err) + } + n, err := res.RowsAffected() + if err != nil { + return 0, fmt.Errorf("models.MarkTeamResourcesForReaper rows_affected: %w", err) + } + return n, nil +} + // ResumeResource flips status from 'paused' → 'active' and clears paused_at. // Returns ErrResourceNotPaused when the row is missing or not currently paused // (mirror of PauseResource). The connection_url is preserved unchanged — the diff --git a/internal/models/team.go b/internal/models/team.go index 349bc4c7..e3a5f77e 100644 --- a/internal/models/team.go +++ b/internal/models/team.go @@ -387,6 +387,62 @@ func SetTestCohort(ctx context.Context, db *sql.DB, teamID uuid.UUID, isTest boo return nil } +// CreateTestCohortTeam inserts a new team with is_test_cohort=true in a +// single INSERT and returns it. This is the ONLY constructor that sets the +// cohort flag at creation time — it exists exclusively for the CI-only +// ephemeral-test-account surface (POST /internal/e2e/account). Every other +// team-creation path (CreateTeam, OAuth upsert, /claim) creates a real +// (is_test_cohort=false) team; flipping a real team into the cohort is done +// only via SetTestCohort by the worker-side seeder. +// +// Setting the flag in the INSERT (rather than CreateTeam + SetTestCohort) +// guarantees the team is NEVER observable as a real team — there is no window +// where a freshly-minted e2e team looks like a chargeable customer to a +// concurrent billing/quota scan. plan_tier still starts 'free'; the caller +// elevates via UpgradeTeamAllTiers if a paid tier was requested. +func CreateTestCohortTeam(ctx context.Context, db *sql.DB, name string) (*Team, error) { + t := &Team{} + err := db.QueryRowContext(ctx, ` + INSERT INTO teams (name, plan_tier, is_test_cohort) VALUES ($1, 'free', true) + RETURNING id, name, plan_tier, stripe_customer_id, created_at, + COALESCE(default_deployment_ttl_policy, 'auto_24h') + `, name).Scan( + &t.ID, &t.Name, &t.PlanTier, &t.RazorpaySubscriptionID, &t.CreatedAt, + &t.DefaultDeploymentTTLPolicy, + ) + if err != nil { + return nil, fmt.Errorf("models.CreateTestCohortTeam: %w", err) + } + return t, nil +} + +// DeleteTeamHard hard-deletes a team row by primary key and reports whether a +// row was removed. The schema's ON DELETE CASCADE FKs (audit_log, deployments, +// stacks, team_members, api_keys, vault, custom_domains, …) clean up the +// owned rows; resources carry ON DELETE SET NULL (migration 001) so their +// team_id becomes NULL rather than the row vanishing — callers that need the +// backing infra deprovisioned MUST first call MarkTeamResourcesForReaper so +// the worker's TTL reaper picks the now-orphaned rows up. +// +// This is intentionally NOT a general-purpose team delete: it is the +// terminal step of the CI-only e2e-account reap path, which has already +// verified the target is is_test_cohort. There is deliberately no +// is_test_cohort guard *inside* this function — the guard lives at the +// handler boundary so a future caller can't accidentally route a real team +// here without the explicit cohort check being visible in the call site. +// Returns (false, nil) when no row matched (idempotent re-reap). +func DeleteTeamHard(ctx context.Context, db *sql.DB, teamID uuid.UUID) (bool, error) { + res, err := db.ExecContext(ctx, `DELETE FROM teams WHERE id = $1`, teamID) + if err != nil { + return false, fmt.Errorf("models.DeleteTeamHard: %w", err) + } + n, err := res.RowsAffected() + if err != nil { + return false, fmt.Errorf("models.DeleteTeamHard rows_affected: %w", err) + } + return n > 0, nil +} + // UpgradeTeamAllTiers atomically upgrades the team tier and promotes every // active resource, deployment, and stack owned by that team. All four updates // run inside a single transaction so a partial failure (e.g. ElevateDeployments diff --git a/internal/router/route_donebar_guard_test.go b/internal/router/route_donebar_guard_test.go index b69c8304..be832995 100644 --- a/internal/router/route_donebar_guard_test.go +++ b/internal/router/route_donebar_guard_test.go @@ -418,6 +418,21 @@ var routeTestMap = map[string]string{ "GET /api/v1/usage/wall": "TestMiscBlock_UsageWall_RealDBContract", "GET /api/v1/webhooks/:token/requests": "TestMiscBlock_WebhookInspector_TokenScopedAndIsolated", "POST /api/v1/experiments/converted": "TestExperimentsConverted_WritesAuditRow", + + // ── CI-only ephemeral-test-account surface (guarded; inert by default) — + // DB-backed handler-integration suite + // (internal/handlers/internal_e2e_account_test.go). The create row points at + // the mint happy-path test (is_test_cohort team + email-verified primary + + // a session JWT that authenticates through the production RequireAuth chain); + // the reap row points at the happy-path purge test (an is_test_cohort team's + // resources marked for the reaper + the team tombstoned). The CRITICAL safety + // arm (a non-test-cohort real team can NEVER be reaped → 403 not_test_cohort), + // the 404-when-inert / wrong-token guard, tier=team/growth 400, paid-tier, + // idempotent reap, and per-token rate-limit/fail-open arms are covered in the + // same suite (+ internal_e2e_account_errpaths_test.go for the DB/redis/sign + // failure arms). + "POST /internal/e2e/account": "TestE2EAccount_Create_FreeTier_MintsTestCohortAndAuthenticatingJWT", + "DELETE /internal/e2e/account/:team_id": "TestE2EAccount_Reap_TestCohortTeam_Purged", } // routeCoverageExemptions lists routes that have NO mapped e2e integration test diff --git a/internal/router/router.go b/internal/router/router.go index 683fd11f..a117d54c 100644 --- a/internal/router/router.go +++ b/internal/router/router.go @@ -909,6 +909,22 @@ func NewWithHooks(cfg *config.Config, db *sql.DB, rdb *redis.Client, geoDbs *mid internalRefundH := handlers.NewInternalBackupRefundHandler(db, rdb, cfg) app.Post("/internal/teams/:id/backup-quota/refund", internalRefundH.Refund) + // CI-only ephemeral-test-account surface. Registered UNCONDITIONALLY + // (not behind the development-env gate) because CI mints+reaps real + // test-cohort accounts against PRODUCTION. It is safe to register in + // prod because it is INERT BY DEFAULT: both routes return 404 for every + // request until E2E_ACCOUNT_TOKEN is set, and even when set they require + // a constant-time X-E2E-Token header match (404 on mismatch — the route's + // existence is hidden). The reap path can NEVER delete a real team + // (403 not_test_cohort on any non-is_test_cohort team). Lives next to the + // other /internal/* machine-to-machine routes — NOT under /api/v1 (no + // customer session auth applies) and NOT in the public OpenAPI spec + // (deliberately omitted; see handlers/openapi.go header). See + // handlers/internal_e2e_account.go for the full guard rationale. + e2eAccountH := handlers.NewE2EAccountHandler(db, rdb, cfg) + app.Post("/internal/e2e/account", e2eAccountH.CreateAccount) + app.Delete("/internal/e2e/account/:team_id", e2eAccountH.ReapAccount) + // §10.20 cached-aggregation endpoints. Separate handlers from BillingHandler // so the caching contract (Redis + singleflight + Cache-Control headers) // is visible at the route + handler boundary, not buried inside the billing