From 7123e20595b96d14823dcd72704b6070e8c594b8 Mon Sep 17 00:00:00 2001 From: Jack Zhuang <277994282+os-zhuang@users.noreply.github.com> Date: Thu, 4 Jun 2026 15:08:21 +0800 Subject: [PATCH 1/2] fix(publish): per-request timeout + retry; one template's failure no longer aborts the run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The marketplace publish died mid-run against staging: the control plane is a cold singleton, and a heavy version POST exceeded undici's default 5-min headers timeout, which threw and killed the whole script — so only the first template published and the rest never ran. - postJson: explicit AbortController timeout (PUBLISH_TIMEOUT_MS, default 240s) + retry (PUBLISH_RETRIES, default 4) with backoff on timeout / network error / 5xx. 4xx (incl. 409 "already published") returns immediately. On exhausted retries it returns a structured failure instead of throwing. - main loop: wrap publishOne in try/catch and continue — every template is attempted; failures are collected and reported, exit 1 only at the end. Net: a cold start or one stuck package no longer blocks the other templates from publishing, and transient timeouts self-heal on retry. Co-Authored-By: Claude Opus 4.8 --- scripts/publish-template.mjs | 90 +++++++++++++++++++++++++++++------- 1 file changed, 74 insertions(+), 16 deletions(-) diff --git a/scripts/publish-template.mjs b/scripts/publish-template.mjs index 0aa06f0..3667999 100644 --- a/scripts/publish-template.mjs +++ b/scripts/publish-template.mjs @@ -123,22 +123,69 @@ async function postJson(path, body) { json: { success: true, data: { id: 'dry-run', created: true } }, }; } - const res = await fetch(url, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${OS_CLOUD_API_KEY}`, - }, - body: JSON.stringify(body), - }); - const text = await res.text(); - let json; - try { - json = JSON.parse(text); - } catch { - json = { raw: text }; + // Per-request timeout + retry. The control plane is a singleton that can be + // cold (it isn't kept warm), so the first heavy POST after idle can take far + // longer than undici's default 5-min headers timeout — which previously threw + // and killed the whole run. We use an explicit AbortController timeout and + // retry transient failures (timeout / network / 5xx). 4xx (incl. 409) returns + // immediately — those are deterministic, not worth retrying. + const TIMEOUT_MS = Number(process.env.PUBLISH_TIMEOUT_MS ?? 240_000); + const MAX_ATTEMPTS = Number(process.env.PUBLISH_RETRIES ?? 4); + let lastErr; + for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) { + const ac = new AbortController(); + const timer = setTimeout(() => ac.abort(), TIMEOUT_MS); + try { + const res = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${OS_CLOUD_API_KEY}`, + }, + body: JSON.stringify(body), + signal: ac.signal, + }); + clearTimeout(timer); + const text = await res.text(); + let json; + try { + json = JSON.parse(text); + } catch { + json = { raw: text }; + } + // Retry server errors (5xx) — a cold/overloaded singleton often 502/503s + // before it warms up. Client errors (4xx) are returned as-is. + if (res.status >= 500 && attempt < MAX_ATTEMPTS) { + log(` ↻ ${path} → ${res.status}, retry ${attempt}/${MAX_ATTEMPTS - 1}…`); + await sleep(backoffMs(attempt)); + continue; + } + return { ok: res.ok, status: res.status, json }; + } catch (err) { + clearTimeout(timer); + lastErr = err; + const reason = err?.name === 'AbortError' ? `timeout after ${TIMEOUT_MS}ms` : (err?.message ?? String(err)); + if (attempt < MAX_ATTEMPTS) { + log(` ↻ ${path} → ${reason}, retry ${attempt}/${MAX_ATTEMPTS - 1}…`); + await sleep(backoffMs(attempt)); + continue; + } + // Exhausted retries — surface a structured failure, don't throw (so one + // stuck template doesn't kill the whole run). + return { ok: false, status: 0, json: { error: `request failed: ${reason}` } }; + } } - return { ok: res.ok, status: res.status, json }; + return { ok: false, status: 0, json: { error: `request failed: ${lastErr?.message ?? 'unknown'}` } }; +} + +function sleep(ms) { + return new Promise((r) => setTimeout(r, ms)); +} + +// Backoff with a cap: 3s, 6s, 12s, 24s, 30s… — gives a cold singleton time to +// spin up between attempts. +function backoffMs(attempt) { + return Math.min(3000 * 2 ** (attempt - 1), 30_000); } /** @@ -235,12 +282,23 @@ async function main() { if (DRY_RUN) log('DRY_RUN=1 — no HTTP calls will be made.'); const results = { created: 0, updated: 0, skipped: 0, failed: 0 }; + const failures = []; for (const t of templates) { - const r = await publishOne(t); + // One template's failure must NOT abort the others — publish is per-package + // and idempotent, so we attempt every template and report at the end. + let r; + try { + r = await publishOne(t); + } catch (err) { + log(` ✗ ${t.pkg.name} threw: ${err?.message ?? err}`); + r = 'failed'; + } results[r] = (results[r] ?? 0) + 1; + if (r === 'failed') failures.push(t.pkg.name); } log('\n── Summary ──'); log(JSON.stringify(results, null, 2)); + if (failures.length) log(`Failed: ${failures.join(', ')}`); process.exit(results.failed > 0 ? 1 : 0); } From 045cc2c2233792b22431d18eac27568229a8fd18 Mon Sep 17 00:00:00 2001 From: Jack Zhuang <277994282+os-zhuang@users.noreply.github.com> Date: Thu, 4 Jun 2026 15:10:03 +0800 Subject: [PATCH 2/2] style: prettier-format publish-template.mjs --- scripts/publish-template.mjs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/scripts/publish-template.mjs b/scripts/publish-template.mjs index 3667999..02a14aa 100644 --- a/scripts/publish-template.mjs +++ b/scripts/publish-template.mjs @@ -164,7 +164,10 @@ async function postJson(path, body) { } catch (err) { clearTimeout(timer); lastErr = err; - const reason = err?.name === 'AbortError' ? `timeout after ${TIMEOUT_MS}ms` : (err?.message ?? String(err)); + const reason = + err?.name === 'AbortError' + ? `timeout after ${TIMEOUT_MS}ms` + : (err?.message ?? String(err)); if (attempt < MAX_ATTEMPTS) { log(` ↻ ${path} → ${reason}, retry ${attempt}/${MAX_ATTEMPTS - 1}…`); await sleep(backoffMs(attempt)); @@ -175,7 +178,11 @@ async function postJson(path, body) { return { ok: false, status: 0, json: { error: `request failed: ${reason}` } }; } } - return { ok: false, status: 0, json: { error: `request failed: ${lastErr?.message ?? 'unknown'}` } }; + return { + ok: false, + status: 0, + json: { error: `request failed: ${lastErr?.message ?? 'unknown'}` }, + }; } function sleep(ms) {