diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0391c1dd..35ef9c06 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -288,6 +288,21 @@ jobs: GOOGLE_REFRESH_TOKEN: ${{ secrets.GOOGLE_REFRESH_TOKEN }} GOOGLE_SPREADSHEET_ID: ${{ vars.GOOGLE_SPREADSHEET_ID }} + - name: Smokescreen test + run: | + if [ -z "${STRIPE_API_KEY:-}" ]; then + echo "::warning::smokescreen.test.sh skipped — STRIPE_API_KEY not available" + exit 0 + fi + bash e2e/smokescreen.test.sh + env: + STRIPE_API_KEY: ${{ secrets.STRIPE_API_KEY }} + ENGINE_IMAGE: 'ghcr.io/${{ github.repository }}:${{ github.sha }}' + GOOGLE_CLIENT_ID: ${{ vars.GOOGLE_CLIENT_ID }} + GOOGLE_CLIENT_SECRET: ${{ secrets.GOOGLE_CLIENT_SECRET }} + GOOGLE_REFRESH_TOKEN: ${{ secrets.GOOGLE_REFRESH_TOKEN }} + GOOGLE_SPREADSHEET_ID: ${{ vars.GOOGLE_SPREADSHEET_ID }} + - name: Publish test run: | if [ -z "${STRIPE_NPM_REGISTRY:-}" ]; then diff --git a/apps/engine/package.json b/apps/engine/package.json index 2246c8be..80dcc649 100644 --- a/apps/engine/package.json +++ b/apps/engine/package.json @@ -43,6 +43,7 @@ "@stripe/sync-state-postgres": "workspace:*", "@stripe/sync-ts-cli": "workspace:*", "citty": "^0.1.6", + "https-proxy-agent": "^7.0.6", "dotenv": "^16.4.7", "googleapis": "^148.0.0", "hono": "^4", diff --git a/apps/engine/src/cli/index.ts b/apps/engine/src/cli/index.ts index 8a8cb22b..b68a5eff 100755 --- a/apps/engine/src/cli/index.ts +++ b/apps/engine/src/cli/index.ts @@ -1,4 +1,17 @@ #!/usr/bin/env node +import https from 'node:https' +import { HttpsProxyAgent } from 'https-proxy-agent' + +const proxyUrl = process.env.HTTPS_PROXY || process.env.HTTP_PROXY +if (proxyUrl) { + // Patch the global HTTPS agent so gaxios/googleapis routes through the proxy. + // The engine's only outbound HTTPS targets are external APIs (Stripe, Google) so + // this does not incorrectly proxy internal traffic. The Stripe SDK is scoped + // separately via an explicit httpClient in makeClient(). If NO_PROXY support is + // needed in future, replace with a per-host agent or proxy-from-env. + https.globalAgent = new HttpsProxyAgent(proxyUrl) +} + import { runMain } from 'citty' import { createProgram } from './command.js' diff --git a/compose.yml b/compose.yml index baec1841..443448bd 100644 --- a/compose.yml +++ b/compose.yml @@ -78,6 +78,19 @@ services: retries: 20 start_period: 30s + # --- Smokescreen (HTTP CONNECT proxy) --- + + smokescreen: + build: docker/smokescreen + ports: + - '4750:4750' + healthcheck: + test: ['CMD-SHELL', 'echo | nc -z localhost 4750'] + interval: 5s + timeout: 3s + retries: 5 + profiles: [smokescreen] + # --- Local npm registry (for publish testing) --- npm-registry: diff --git a/docker/smokescreen/Dockerfile b/docker/smokescreen/Dockerfile new file mode 100644 index 00000000..8597c56e --- /dev/null +++ b/docker/smokescreen/Dockerfile @@ -0,0 +1,9 @@ +FROM golang:1.23 AS builder +WORKDIR /app +RUN git clone --depth 1 https://github.com/stripe/smokescreen.git . +RUN CGO_ENABLED=0 GOOS=linux go build -o smokescreen -ldflags="-s -w" . + +FROM alpine:3.19 +RUN apk add --no-cache netcat-openbsd +COPY --from=builder /app/smokescreen /usr/local/bin/smokescreen +ENTRYPOINT ["smokescreen"] diff --git a/e2e/smokescreen.test.sh b/e2e/smokescreen.test.sh new file mode 100755 index 00000000..b40fdc21 --- /dev/null +++ b/e2e/smokescreen.test.sh @@ -0,0 +1,165 @@ +#!/usr/bin/env bash +# Test src-stripe, dest-pg, and dest-sheets through smokescreen HTTP CONNECT proxy. +# +# Uses Docker network isolation to ENFORCE that all outbound HTTPS goes through +# smokescreen — the engine container has no direct internet access. +# Without a working proxy, Stripe and Google API calls would fail outright. +# +# Required: STRIPE_API_KEY +# Optional: ENGINE_IMAGE (skips local build — CI passes the pre-built image) +# Optional: GOOGLE_CLIENT_ID, GOOGLE_CLIENT_SECRET, GOOGLE_REFRESH_TOKEN, GOOGLE_SPREADSHEET_ID +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" + +# In CI the pre-built image is passed via ENGINE_IMAGE; locally we build from source. +BUILD_ENGINE=false +if [ -z "${ENGINE_IMAGE:-}" ]; then + ENGINE_IMAGE="sync-engine:smokescreen-test" + BUILD_ENGINE=true +fi + +SMOKESCREEN_IMAGE="sync-engine-smokescreen:test" +S="$$" # unique suffix for this run +NET="smokescreen-isolated-${S}" +SMOKESCREEN_CONTAINER="smokescreen-${S}" +ENGINE_CONTAINER="engine-smokescreen-${S}" +PG_CONTAINER="pg-smokescreen-${S}" +ENGINE_URL="" # set after container starts + +cleanup() { + docker rm -f "$ENGINE_CONTAINER" "$SMOKESCREEN_CONTAINER" "$PG_CONTAINER" >/dev/null 2>&1 || true + docker network rm "$NET" >/dev/null 2>&1 || true +} +trap cleanup EXIT + +# ── Build images ──────────────────────────────────────────────────────────── + +echo "==> Building smokescreen image" +docker build -t "$SMOKESCREEN_IMAGE" "$REPO_ROOT/docker/smokescreen" + +if $BUILD_ENGINE; then + echo "==> Building engine image" + docker build -t "$ENGINE_IMAGE" "$REPO_ROOT" +fi + +# ── Isolated network ───────────────────────────────────────────────────────── +# --internal means no default gateway → containers cannot reach the internet directly. + +echo "==> Creating isolated Docker network: $NET" +docker network create --internal "$NET" + +# ── Postgres (on isolated network — reachable by engine, not internet-exposed) ── + +echo "==> Starting Postgres" +docker run -d --name "$PG_CONTAINER" \ + --network "$NET" \ + -e POSTGRES_USER=postgres \ + -e POSTGRES_PASSWORD=postgres \ + -e POSTGRES_DB=postgres \ + postgres:18 +PG_URL="postgres://postgres:postgres@${PG_CONTAINER}:5432/postgres" + +# ── Smokescreen (isolated net + bridge → has internet, proxies for engine) ─── + +echo "==> Starting smokescreen" +docker run -d --name "$SMOKESCREEN_CONTAINER" \ + --network "$NET" \ + "$SMOKESCREEN_IMAGE" +# Connect to default bridge so smokescreen itself can reach the internet +docker network connect bridge "$SMOKESCREEN_CONTAINER" + +for i in $(seq 1 20); do + docker exec "$SMOKESCREEN_CONTAINER" nc -z localhost 4750 >/dev/null 2>&1 && break + [ "$i" -eq 20 ] && { echo "FAIL: smokescreen health check timed out"; exit 1; } + sleep 0.5 +done +echo " Smokescreen ready" + +# ── Engine (isolated network ONLY — HTTPS must route through smokescreen) ──── + +echo "==> Starting engine (HTTPS_PROXY=http://${SMOKESCREEN_CONTAINER}:4750)" +# No -p port mapping: --internal networks block port publishing on Linux. +# Instead, reach the engine by its container IP on the bridge (host has a +# directly connected route to the bridge subnet even for --internal networks). +docker run -d --name "$ENGINE_CONTAINER" \ + --network "$NET" \ + -e PORT=3000 \ + -e HTTPS_PROXY="http://${SMOKESCREEN_CONTAINER}:4750" \ + "$ENGINE_IMAGE" + +# Wait for the container to get an IP assignment +ENGINE_IP="" +for i in $(seq 1 10); do + ENGINE_IP=$(docker inspect "$ENGINE_CONTAINER" \ + --format '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' 2>/dev/null) + [ -n "$ENGINE_IP" ] && break + sleep 0.5 +done +[ -n "$ENGINE_IP" ] || { echo "FAIL: could not get engine container IP"; exit 1; } +ENGINE_URL="http://${ENGINE_IP}:3000" + +for i in $(seq 1 20); do + curl -sf "${ENGINE_URL}/health" >/dev/null && break + [ "$i" -eq 20 ] && { + echo "FAIL: engine health check timed out (IP: $ENGINE_IP)" + echo "==> Engine container logs:" + docker logs "$ENGINE_CONTAINER" 2>&1 || true + echo "==> Engine container inspect (State):" + docker inspect "$ENGINE_CONTAINER" --format '{{json .State}}' 2>&1 || true + exit 1 + } + sleep 0.5 +done +echo " Engine ready at $ENGINE_URL" + +for i in $(seq 1 20); do + docker exec "$PG_CONTAINER" pg_isready -U postgres >/dev/null 2>&1 && break + [ "$i" -eq 20 ] && { echo "FAIL: postgres health check timed out"; exit 1; } + sleep 0.5 +done +echo " Postgres ready" + +# ── 1) Read from Stripe (HTTPS → smokescreen → api.stripe.com) ─────────────── + +echo "==> src-stripe: read through smokescreen" +READ_PARAMS=$(printf \ + '{"source":{"name":"stripe","api_key":"%s","backfill_limit":5},"destination":{"name":"postgres","url":"postgres://unused:5432/db","schema":"stripe"},"streams":[{"name":"products"}]}' \ + "$STRIPE_API_KEY") +OUTPUT=$(curl -sf --max-time 30 -X POST "${ENGINE_URL}/read" \ + -H "X-Pipeline: $READ_PARAMS") +RECORD_COUNT=$(echo "$OUTPUT" | grep -c '"type":"record"' || true) +echo " Got $RECORD_COUNT record(s)" +[ "$RECORD_COUNT" -gt 0 ] || { echo "FAIL: no records from Stripe"; exit 1; } + +# ── 2) Write to Postgres (direct TCP on isolated network) ───────────────────── + +echo "==> dest-pg: setup + write" +PG_PARAMS=$(printf \ + '{"source":{"name":"stripe","api_key":"%s"},"destination":{"name":"postgres","url":"%s","schema":"stripe_smokescreen_test"}}' \ + "$STRIPE_API_KEY" "$PG_URL") +curl -sf --max-time 30 -X POST "${ENGINE_URL}/setup" \ + -H "X-Pipeline: $PG_PARAMS" && echo " setup OK" +echo "$OUTPUT" | curl -sf --max-time 60 -X POST "${ENGINE_URL}/write" \ + -H "X-Pipeline: $PG_PARAMS" \ + -H "Content-Type: application/x-ndjson" \ + --data-binary @- | head -3 || true +echo " dest-pg OK" + +# ── 3) Write to Google Sheets (HTTPS → smokescreen → googleapis.com) ───────── + +if [ -n "${GOOGLE_CLIENT_ID:-}" ]; then + echo "==> dest-sheets: write through smokescreen" + SHEETS_PARAMS=$(printf \ + '{"source":{"name":"stripe","api_key":"%s"},"destination":{"name":"google-sheets","client_id":"%s","client_secret":"%s","access_token":"unused","refresh_token":"%s","spreadsheet_id":"%s"}}' \ + "$STRIPE_API_KEY" "$GOOGLE_CLIENT_ID" "$GOOGLE_CLIENT_SECRET" "$GOOGLE_REFRESH_TOKEN" "$GOOGLE_SPREADSHEET_ID") + echo "$OUTPUT" | curl -sf --max-time 60 -X POST "${ENGINE_URL}/write" \ + -H "X-Pipeline: $SHEETS_PARAMS" \ + -H "Content-Type: application/x-ndjson" \ + --data-binary @- | head -3 || true + echo " dest-sheets OK" +else + echo "==> Skipping dest-sheets (GOOGLE_CLIENT_ID not set)" +fi + +echo "==> All smokescreen tests passed" diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 98e38c48..425438d9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -91,6 +91,9 @@ importers: hono: specifier: ^4 version: 4.12.8 + https-proxy-agent: + specifier: ^7.0.6 + version: 7.0.6 pg: specifier: ^8.16.3 version: 8.16.3