-
Notifications
You must be signed in to change notification settings - Fork 0
322 lines (300 loc) · 14.6 KB
/
ci.yml
File metadata and controls
322 lines (300 loc) · 14.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
# instant.dev/api — CI
#
# Multi-repo note: go.mod uses `replace instant.dev/proto => ../proto`. The
# "Checkout proto sibling" step clones the proto repo next to this checkout.
# If your GitHub repo is not named `proto`, set the `PROTO_REPO` repository
# variable (e.g. `myorg/instant-proto`) or fork/rename to match
# `${{ github.repository_owner }}/proto`.
name: CI
on:
push:
branches: [master]
# CI-minute savings (2026-05-21): skip CI on docs-only commits.
paths-ignore:
- '**.md'
- 'docs/**'
- 'CLAUDE.md'
- '.gitignore'
- 'LICENSE'
- 'BUGBASH-*/**'
pull_request:
branches: [master]
paths-ignore:
- '**.md'
- 'docs/**'
- 'CLAUDE.md'
- '.gitignore'
- 'LICENSE'
- 'BUGBASH-*/**'
schedule:
# Weekly — reserved for optional scheduled jobs (see e2e job).
- cron: '0 6 * * 1'
workflow_dispatch:
concurrency:
# CI-minute savings (2026-05-21): cancel prior in-flight CI run for the
# same branch/PR when a new commit lands. Different PRs/branches still
# run in parallel (group key includes github.ref).
group: ci-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
# Stale-green guard. A PR can show a green CI run that was executed BEFORE a
# breaking commit landed on the base branch — merging it would ship a broken
# master. This job FAILS if the PR branch does not contain origin/<base> as
# an ancestor, forcing an "Update branch" before the PR can merge.
up-to-date-with-base:
runs-on: ubuntu-latest
if: github.event_name == 'pull_request'
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Fail if PR branch is behind its base branch
run: |
BASE="${{ github.event.pull_request.base.ref }}"
git fetch origin "${BASE}" --depth=1
if git merge-base --is-ancestor "origin/${BASE}" HEAD; then
echo "PR branch contains origin/${BASE} — up to date."
else
echo "::error::PR branch is behind origin/${BASE}. Update the branch (merge/rebase ${BASE}) and re-run CI so it validates against current base."
exit 1
fi
build-and-test:
runs-on: ubuntu-latest
services:
postgres:
image: postgres:16-alpine
env:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: instant_dev_test
ports:
- 5432:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
redis:
image: redis:7-alpine
ports:
- 6379:6379
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
env:
TEST_DATABASE_URL: postgres://postgres:postgres@localhost:5432/instant_dev_test?sslmode=disable
TEST_REDIS_URL: redis://localhost:6379/15
# db-provider admin target. internal/providers/db/local.go CREATEs a
# customer database per /db/new; in tests it connects to
# TEST_POSTGRES_CUSTOMERS_URL. testhelpers defaults this to an
# unreachable localhost:5434, so without this every postgres-
# provisioning test (TestDBNew_*, TestBulkTwin_*) 503'd. Points at an
# instant_customers DB created on the same service container below —
# exactly as deploy.yml's proven-green gate does.
TEST_POSTGRES_CUSTOMERS_URL: postgres://postgres:postgres@localhost:5432/instant_customers?sslmode=disable
steps:
- uses: actions/checkout@v6
- name: Checkout proto sibling (for go.mod replace ../proto)
uses: actions/checkout@v6
with:
repository: ${{ vars.PROTO_REPO || format('{0}/proto', github.repository_owner) }}
token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }}
path: _proto_ci
- name: Place ../proto for Go replace directive
run: mv _proto_ci ../proto
- name: Checkout common sibling (for go.mod replace ../common)
uses: actions/checkout@v6
with:
repository: ${{ vars.COMMON_REPO || format('{0}/common', github.repository_owner) }}
token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }}
path: _common_ci
- name: Place ../common for Go replace directive
run: mv _common_ci ../common
- uses: actions/setup-go@v6
with:
go-version: '1.25'
- name: Apply DB migrations to the test database
# Mirrors deploy.yml's proven-green gate. Before this step CI ran
# tests against a BARE Postgres whose schema came ONLY from
# testhelpers.runMigrations — a hand-maintained mirror. This step
# applies the REAL migration files (exactly like `make test-db-up`),
# then creates instant_customers — the db provider's local backend
# (internal/providers/db/local.go) CREATEs a customer database per
# /db/new and connects to TEST_POSTGRES_CUSTOMERS_URL for it. Without
# this DB every postgres provision (TestDBNew_*, TestBulkTwin_*) 503'd.
env:
PGPASSWORD: postgres
run: |
for f in $(ls internal/db/migrations/*.sql | sort); do
echo "→ applying $(basename "$f")"
psql -h localhost -U postgres -d instant_dev_test -f "$f" >/dev/null
done
echo "all migrations applied to instant_dev_test"
psql -h localhost -U postgres -d postgres -c "CREATE DATABASE instant_customers" >/dev/null
echo "created instant_customers (db-provider admin target)"
- run: go build ./...
- run: go vet ./...
- name: Start NATS with monitoring (queue provider health-checks :8222)
# internal/providers/queue/local.go Provision() health-checks
# http://<NATSHost>:8222/healthz then returns nats://<host>:4222.
# TestQueue_* build a handler with an empty NATSHost, which defaults to
# "localhost" (queueprovider.New("")), so they need a real NATS
# reachable on localhost:8222. GitHub service containers can't pass the
# `-m` monitoring flag, so we run nats-server here instead. NATS-DOWN
# tests use the reserved non-resolvable host `nats.test`, so a live NATS
# on localhost does not collide with their 503 expectations.
run: |
docker run -d --name nats -p 4222:4222 -p 8222:8222 nats:2.10-alpine -m 8222
for i in $(seq 1 15); do
curl -sf http://localhost:8222/healthz >/dev/null && { echo "NATS healthy after ${i}s"; break; }
echo "waiting for NATS monitoring endpoint (${i}/15)"; sleep 1
done
curl -sf http://localhost:8222/healthz >/dev/null || { echo "::error::NATS monitoring never came up"; exit 1; }
# The gate. This MUST stay equal to deploy.yml's proven-green
# invocation (`go test ./... -short -count=1 -p 1`) PLUS `-race`:
# - `-p 1` is load-bearing: every package shares the single
# instant_dev_test DB + redis/15. Default parallelism runs ~25
# package binaries at once and they corrupt each other's DB/redis
# state mid-test. `-p 1` serialises package execution.
# - `-short` matches deploy.yml so the two gates run the identical
# hermetic suite (tests that genuinely need a live k8s/provisioner
# stack are tagged `e2e` and excluded from `./...` anyway).
# - `-race` is the extra rigor CI adds over deploy.yml — it caught
# the BillingHandler.ensureRazorpayFns data race.
- run: go test ./... -short -race -count=1 -p 1
# Wave 5 — push the gated-test result to New Relic so a red run is
# studyable from an NR dashboard, not just the GitHub Actions log.
# if: always() so a FAILED `go test` step still records the failure
# (InstantCITestRun result=fail + InstantCITestFailure). No-ops cleanly
# when the NR secret/account is absent (fork PRs) — never reds the PR.
- name: Emit CI result to New Relic
if: always()
uses: ./.github/actions/nr-ci-event
with:
license-key: ${{ secrets.NEW_RELIC_LICENSE_KEY }}
account-id: ${{ secrets.NEW_RELIC_ACCOUNT_ID }}
result: ${{ job.status == 'success' && 'pass' || 'fail' }}
suite: build-and-test
pr-number: ${{ github.event.pull_request.number }}
failed-step: ${{ job.status != 'success' && 'go build / vet / test (-short -race -p 1)' || '' }}
repo: ${{ github.repository }}
workflow: ${{ github.workflow }}
branch: ${{ github.ref_name }}
commit-sha: ${{ github.sha }}
log-url: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
event-name: ${{ github.event_name }}
actor: ${{ github.actor }}
# E2E requires a live Kubernetes stack (see repo CLAUDE.md). This job does not
# run on push/PR — only on schedule or manual dispatch — so default CI stays fast.
e2e:
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Checkout proto sibling
uses: actions/checkout@v6
with:
repository: ${{ vars.PROTO_REPO || format('{0}/proto', github.repository_owner) }}
token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }}
path: _proto_ci
- run: mv _proto_ci ../proto
- name: Checkout common sibling
uses: actions/checkout@v6
with:
repository: ${{ vars.COMMON_REPO || format('{0}/common', github.repository_owner) }}
token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }}
path: _common_ci
- run: mv _common_ci ../common
- uses: actions/setup-go@v6
with:
go-version: '1.25'
- name: E2E placeholder (wire to k8s / secrets)
run: |
echo "Configure services, secrets, and port-forwards, then run e.g.:"
echo " go test ./e2e/... -tags e2e -count=1 -timeout 180s"
echo "See CLAUDE.md (Full-stack E2E) for required env vars."
# Cross-repo Layer-1 auth-contract gate. The api owns the CORS allowlist
# and the /auth/exchange + /auth/email/start endpoints — an api-side
# change that drops access-control-allow-credentials would not trigger
# the instanode-web CI on its own, so the browser-level regression
# (2026-05-29 → 2026-05-30) could ship despite green api unit tests.
#
# This job fires a repository_dispatch on instanode-web; instanode-web's
# .github/workflows/auth-contract-e2e.yml listens for the matching
# `auth-contract-e2e-from-api` type and runs the Chromium smoke against
# the same prod targets. The dispatch result will not gate this PR
# mechanically (cross-repo status checks aren't wired here yet — see
# follow-up issue), but it surfaces the failure in the instanode-web
# Actions tab so anyone reviewing the api PR can click through.
#
# Auth: REPO_ACCESS_TOKEN must have `repo` scope on instanode-web. If the
# secret is missing the step soft-skips (warn, don't fail) so the api CI
# stays green during initial rollout — flip the soft-skip to `exit 1`
# once the secret is provisioned on all relevant environments.
dispatch-auth-contract-e2e:
name: Trigger instanode-web auth-contract smoke
runs-on: ubuntu-latest
needs: build-and-test
if: github.event_name == 'pull_request' || (github.event_name == 'push' && github.ref == 'refs/heads/master')
steps:
- name: Fire repository_dispatch on instanode-web
env:
DISPATCH_TOKEN: ${{ secrets.REPO_ACCESS_TOKEN }}
# SECURITY: avoid interpolating untrusted github.event.* fields
# into the shell. Only stable repo-controlled identifiers are
# exposed and the payload is constructed via printf with
# parameter expansion (no string concatenation of attacker
# input).
SHA: ${{ github.sha }}
PR_NUMBER: ${{ github.event.pull_request.number }}
TRIGGER: ${{ github.event_name }}
run: |
set -euo pipefail
if [ -z "${DISPATCH_TOKEN:-}" ]; then
echo "::warning::REPO_ACCESS_TOKEN not set; skipping cross-repo auth-contract dispatch. " \
"Provision the secret on the api repo (with `repo` scope on instanode-web) to enable Layer-1 gate."
exit 0
fi
# PR_NUMBER may be empty on push events; default to "main".
# Defense-in-depth: enforce numeric PR number even though
# github.event.pull_request.number is an integer assigned by
# GitHub, never user-controlled.
pr="${PR_NUMBER:-main}"
case "$pr" in
main|[0-9]*) ;;
*) echo "::error::unexpected PR_NUMBER value: $pr"; exit 1 ;;
esac
# SHA is a 40-char hex from github.sha — repo-controlled. Validate
# shape to keep the JSON payload trivially-injection-proof.
case "$SHA" in
[0-9a-f]*) ;;
*) echo "::error::unexpected SHA shape: $SHA"; exit 1 ;;
esac
# TRIGGER is github.event_name — a GitHub-controlled enum
# (push|pull_request|schedule|workflow_dispatch|...). Allowlist
# the values this job is reachable from.
case "$TRIGGER" in
push|pull_request) ;;
*) echo "::error::unexpected TRIGGER: $TRIGGER"; exit 1 ;;
esac
payload=$(printf '{"event_type":"auth-contract-e2e-from-api","client_payload":{"api_sha":"%s","api_pr":"%s","trigger":"%s","api_url":"https://api.instanode.dev","web_origin":"https://instanode.dev"}}' \
"$SHA" "$pr" "$TRIGGER")
echo "Dispatching to InstaNode-dev/instanode-web: $payload"
http_code=$(curl -sS -o /tmp/dispatch.out -w '%{http_code}' \
-X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${DISPATCH_TOKEN}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/InstaNode-dev/instanode-web/dispatches \
-d "$payload")
echo "dispatch response: HTTP $http_code"
cat /tmp/dispatch.out || true
# GitHub returns 204 on success. Treat anything else as a soft
# failure during the rollout window — log and pass so a transient
# cross-repo hiccup doesn't red the api PR. Tighten to `exit 1`
# once we have a week of clean runs.
if [ "$http_code" != "204" ]; then
echo "::warning::cross-repo dispatch returned $http_code (expected 204). Not failing the api PR yet."
fi