From 82a5463b3e96f1bd2748f99dbe5175c262ae5b74 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 7 Jun 2026 19:16:13 +0530 Subject: [PATCH] =?UTF-8?q?fix(e2e):=20bound=20provision/mint/reap=20fetch?= =?UTF-8?q?es=20at=2045s=20(hang=20=E2=86=92=20fast=20retry)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit playwright.live.config sets a test timeout but NO per-request timeout, so a provision (CREATE) or reap (DROP) that intermittently HANGS on postgres-customers contention runs until the whole test dies (seen as 2-3min vector/cache timeouts). A hang isn't slowness — raising the test timeout just makes it last longer. Bound the provision POST, the mint, and both reap DELETEs at 45s so a hang fails fast and Playwright retries (passes when contention clears). Pairs with the orphan-customer-DB sweep that removes the contention source. Co-Authored-By: Claude Opus 4.8 (1M context) --- e2e/cleanup-ledger.ts | 4 ++++ e2e/factory.ts | 7 +++++++ e2e/live-anon-provision.spec.ts | 6 ++++++ 3 files changed, 17 insertions(+) diff --git a/e2e/cleanup-ledger.ts b/e2e/cleanup-ledger.ts index 845025a..f272d47 100644 --- a/e2e/cleanup-ledger.ts +++ b/e2e/cleanup-ledger.ts @@ -184,6 +184,10 @@ export async function reapEntities( method: 'DELETE', headers, failOnStatusCode: false, + // Bound the reap DELETE: a DROP that hangs on postgres-customers + // contention must fail fast (45s) rather than hang teardown to the test + // timeout. The on-disk ledger backstop re-reaps on the next run. + timeout: 45_000, }) const status = resp.status() const bodyText = status >= 200 && status < 300 ? '' : await resp.text().catch(() => '') diff --git a/e2e/factory.ts b/e2e/factory.ts index 58d9228..8c38910 100644 --- a/e2e/factory.ts +++ b/e2e/factory.ts @@ -135,6 +135,10 @@ export async function mintUser( with_failed_deploy: !!opts.withFailedDeploy, }), failOnStatusCode: false, + // Bound the mint: with_resources provisions real DBs server-side, which can + // hang on postgres-customers contention. 45s fails fast → the spec SKIPs + // (mint==null) rather than hanging the whole test on a stalled mint. + timeout: 45_000, }) // Inert-by-default 404: the token is wrong or the endpoint isn't armed on // this stack. Treat as "can't mint" → null (caller SKIPS), never a red. @@ -217,6 +221,9 @@ export async function reap(request: APIRequestContext, teamID: string): Promise< method: 'DELETE', headers: { [E2E_ACCOUNT_TOKEN_HEADER]: accountToken() }, failOnStatusCode: false, + // Bound the cohort reap: the team-cascade drops customer DBs; a hung DROP + // must fail fast (45s), not hang teardown. afterAll + ledger reaper backstop. + timeout: 45_000, }) const ok = [200, 202, 204, 404, 410].includes(resp.status()) if (!ok) { diff --git a/e2e/live-anon-provision.spec.ts b/e2e/live-anon-provision.spec.ts index 9423db2..4864b38 100644 --- a/e2e/live-anon-provision.spec.ts +++ b/e2e/live-anon-provision.spec.ts @@ -281,6 +281,12 @@ test.describe('LIVE — every anonymous provision flow → backend-assert → re headers: id.headers, data: JSON.stringify({ name }), failOnStatusCode: false, + // Bound the provision call: a CREATE that intermittently hangs on + // postgres-customers contention would otherwise eat the whole test + // timeout (a hang, not slowness — raising the test timeout just makes it + // last longer). 45s fails fast → Playwright retries → passes on the next + // attempt when the contention clears. Pairs with the orphan-DB sweep. + timeout: 45_000, }) test.skip(