From 732f8d4fb17bdf6f42a8d7d8e999b00115a0e350 Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Wed, 20 May 2026 15:46:21 +0100 Subject: [PATCH 1/4] fix(webapp): sanitize Prisma P1001 leaks in /api/* responses Adds an Express middleware that intercepts /api/* responses and, when the status is 5xx and the body matches a known Prisma P1001 ("Can't reach database server") leak, rewrites the body to a generic {"error":"Internal Server Error"} before it reaches the client. The status code is preserved. Sits outside Remix so it catches leaks the per-route try/catch from #3664 didn't cover. Deliberately surgical: one rule (P1001 only), one namespace (/api/* only). /engine/, /otel/, /realtime/, and UI routes are intentionally excluded. Expandable later by appending to LEAK_RULES or MONITORED_PREFIXES. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../universal-api-error-boundary.md | 6 + apps/webapp/app/entry.server.tsx | 1 + .../app/services/apiErrorBoundary.server.ts | 142 ++++++ apps/webapp/server.ts | 6 + apps/webapp/test/apiErrorBoundary.test.ts | 482 ++++++++++++++++++ 5 files changed, 637 insertions(+) create mode 100644 .server-changes/universal-api-error-boundary.md create mode 100644 apps/webapp/app/services/apiErrorBoundary.server.ts create mode 100644 apps/webapp/test/apiErrorBoundary.test.ts diff --git a/.server-changes/universal-api-error-boundary.md b/.server-changes/universal-api-error-boundary.md new file mode 100644 index 00000000000..ee537c85815 --- /dev/null +++ b/.server-changes/universal-api-error-boundary.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: improvement +--- + +`/api/*` responses no longer leak Prisma "Can't reach database server" (P1001) errors when the database is unreachable — affected responses are rewritten to a generic Internal Server Error before reaching the client. diff --git a/apps/webapp/app/entry.server.tsx b/apps/webapp/app/entry.server.tsx index 11c3274e865..6f348c32889 100644 --- a/apps/webapp/app/entry.server.tsx +++ b/apps/webapp/app/entry.server.tsx @@ -289,6 +289,7 @@ process.on("uncaughtException", (error, origin) => { singleton("RunEngineEventBusHandlers", registerRunEngineEventBusHandlers); singleton("SetupBatchQueueCallbacks", setupBatchQueueCallbacks); +export { apiErrorBoundary } from "./services/apiErrorBoundary.server"; export { apiRateLimiter } from "./services/apiRateLimit.server"; export { engineRateLimiter } from "./services/engineRateLimit.server"; export { runWithHttpContext } from "./services/httpAsyncStorage.server"; diff --git a/apps/webapp/app/services/apiErrorBoundary.server.ts b/apps/webapp/app/services/apiErrorBoundary.server.ts new file mode 100644 index 00000000000..9777907c45c --- /dev/null +++ b/apps/webapp/app/services/apiErrorBoundary.server.ts @@ -0,0 +1,142 @@ +import type { RequestHandler } from "express"; +import { logger } from "./logger.server"; + +// Last line of defense for leaked error messages in `/api/*` responses. +// +// PR #3664 added a per-route try/catch to every `api.v1.*` loader/action so an +// unhandled throw can no longer reach Remix's default error path and surface +// `error.message` to the client. This middleware sits one layer further out: +// when an `/api/*` response ends with status 5xx and the assembled body matches +// a known leak rule, the body is rewritten to a generic +// `{"error":"Internal Server Error"}` before it reaches the client. The +// original status code is preserved — a leaky 503 stays 503 — so status-aware +// clients keep any nuance the route deliberately set. +// +// Remix's express adapter writes the response body via `res.write(chunk)` (not +// `res.end(chunk)`), so this middleware must buffer writes and evaluate the +// assembled body at `end` time. SSE responses bypass buffering by content-type +// inspection; oversized responses bypass via a hard cap so streaming code +// paths that exceed the cap are not held in memory. +// +// Conservative initial rule set: only the patterns we have observed leaking +// in production are filtered. Mirrors `FINGERPRINT_RULES` in +// `apps/webapp/sentry.server.ts`. Add a rule here when a new leak is spotted. +const LEAK_RULES: ReadonlyArray<{ name: string; pattern: RegExp }> = [ + { + // Prisma "Can't reach database server" — surfaces connection strings and + // host info when Postgres is unreachable mid-query (the failure mode that + // motivated #3664 and the Sentry P1001 fingerprint rule). + name: "prisma-p1001", + pattern: /\bP1001\b|Can't reach database server/i, + }, +]; + +const SANITIZED_BODY = JSON.stringify({ error: "Internal Server Error" }); + +// Path prefixes the middleware monitors. Scoped to `/api/*` only — the SDK +// surface where leaks have actually been reported. `/engine/*`, `/otel/*`, +// `/realtime/*`, and UI/dashboard routes are intentionally out of scope: +// they have different traffic profiles, leak shapes, or already self-handle +// errors. Expand this list reactively when a leak is observed on another +// namespace. +const MONITORED_PREFIXES = ["/api/"] as const; + +// Error responses are tiny. A buffered response that grows past this cap is +// not a 5xx error body — bail out of buffering so streaming responses do not +// pile up in memory. +const MAX_BUFFER_BYTES = 64 * 1024; + +export const apiErrorBoundary: RequestHandler = (req, res, next) => { + if (!MONITORED_PREFIXES.some((prefix) => req.path.startsWith(prefix))) { + return next(); + } + + const originalWrite = res.write.bind(res); + const originalEnd = res.end.bind(res); + + let chunks: Buffer[] = []; + let bufferedBytes = 0; + let bypass = false; + + const flushAndBypass = () => { + if (bypass) return; + bypass = true; + if (chunks.length === 0) return; + const buffered = Buffer.concat(chunks); + chunks = []; + bufferedBytes = 0; + (originalWrite as (c: Buffer) => boolean)(buffered); + }; + + const isStreamingContentType = (): boolean => { + const ct = String(res.getHeader("content-type") ?? ""); + if (!ct) return false; + return ct.includes("text/event-stream") || ct.includes("application/octet-stream"); + }; + + const toBuffer = (chunk: unknown): Buffer => { + if (Buffer.isBuffer(chunk)) return chunk; + // Covers Uint8Array (Remix's stream output), Uint16Array, Int32Array, + // DataView, etc. — anything backed by an ArrayBuffer. Without this, + // `String(chunk)` byte-mangles typed arrays into "85,110,..." strings. + if (ArrayBuffer.isView(chunk)) { + return Buffer.from(chunk.buffer, chunk.byteOffset, chunk.byteLength); + } + if (typeof chunk === "string") return Buffer.from(chunk); + return Buffer.from(String(chunk)); + }; + + const patchedWrite = (chunk: unknown, ...rest: unknown[]): boolean => { + // If headers have already been flushed, we cannot rewrite Content-Type + // or status later — sanitization is impossible. Flush + bypass and let + // bytes flow through unmodified. + if (!bypass && (res.headersSent || isStreamingContentType())) flushAndBypass(); + if (bypass) { + return (originalWrite as (c: unknown, ...r: unknown[]) => boolean)(chunk, ...rest); + } + if (chunk != null) { + const buf = toBuffer(chunk); + chunks.push(buf); + bufferedBytes += buf.length; + if (bufferedBytes > MAX_BUFFER_BYTES) flushAndBypass(); + } + return true; + }; + + const patchedEnd = (chunk?: unknown, ...rest: unknown[]) => { + // Same guard as patchedWrite: if headers were flushed (e.g. via an + // explicit `res.flushHeaders()` before end-with-body), we cannot rewrite + // the response — flush buffered chunks unchanged and skip sanitization. + if (!bypass && res.headersSent) flushAndBypass(); + if (bypass) { + return (originalEnd as (c?: unknown, ...r: unknown[]) => typeof res)(chunk, ...rest); + } + if (chunk != null) chunks.push(toBuffer(chunk)); + const body = Buffer.concat(chunks); + bypass = true; + + if (res.statusCode >= 500 && body.length > 0) { + const text = body.toString("utf8"); + const matched = LEAK_RULES.find((rule) => rule.pattern.test(text)); + if (matched) { + logger.error("apiErrorBoundary sanitized leaked error response", { + rule: matched.name, + path: req.path, + method: req.method, + status: res.statusCode, + }); + res.setHeader("Content-Type", "application/json"); + res.removeHeader("Content-Length"); + return (originalEnd as (c?: unknown) => typeof res)(SANITIZED_BODY); + } + } + + if (body.length > 0) (originalWrite as (c: Buffer) => boolean)(body); + return (originalEnd as () => typeof res)(); + }; + + res.write = patchedWrite as unknown as typeof res.write; + res.end = patchedEnd as unknown as typeof res.end; + + next(); +}; diff --git a/apps/webapp/server.ts b/apps/webapp/server.ts index e266c6985c8..5567c1f8c5a 100644 --- a/apps/webapp/server.ts +++ b/apps/webapp/server.ts @@ -119,6 +119,7 @@ if (ENABLE_CLUSTER && cluster.isPrimary) { if (process.env.HTTP_SERVER_DISABLED !== "true") { const socketIo: { io: IoServer } | undefined = build.entry.module.socketIo; const wss: WebSocketServer | undefined = build.entry.module.wss; + const apiErrorBoundary: express.RequestHandler = build.entry.module.apiErrorBoundary; const apiRateLimiter: RateLimitMiddleware = build.entry.module.apiRateLimiter; const engineRateLimiter: RateLimitMiddleware = build.entry.module.engineRateLimiter; const runWithHttpContext: RunWithHttpContextFunction = build.entry.module.runWithHttpContext; @@ -168,6 +169,11 @@ if (ENABLE_CLUSTER && cluster.isPrimary) { }); } + // Universal error boundary for /api/* responses. Sits outside Remix so + // it catches leaks regardless of whether the per-route try/catch from + // PR #3664 covered them. + app.use(apiErrorBoundary); + app.use(apiRateLimiter); app.use(engineRateLimiter); diff --git a/apps/webapp/test/apiErrorBoundary.test.ts b/apps/webapp/test/apiErrorBoundary.test.ts new file mode 100644 index 00000000000..36f3ad53dea --- /dev/null +++ b/apps/webapp/test/apiErrorBoundary.test.ts @@ -0,0 +1,482 @@ +import { beforeEach, describe, expect, test, vi } from "vitest"; + +// Intentional exception to the root CLAUDE.md "Never mock anything" rule. +// +// That rule targets unit-under-test and external-service mocking (Prisma, +// Redis, etc.) to avoid mock/prod drift — testcontainers is the prescribed +// alternative for those. Here we're mocking `logger.server` only as an +// *observability sink* so two tests can assert `logger.error` is/isn't +// called on the sanitize path. There is no production behavior the mock +// can disagree with; the alternatives (refactoring the middleware to a +// DI factory, or importing the real logger which pulls in Sentry + env +// validation) add indirection that neither this PR nor the codebase +// pattern wants. Mirrors the existing approach in 7+ other webapp test +// files that mock `logger.server` the same way. +vi.mock("../app/services/logger.server", () => ({ + logger: { + info: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + warn: vi.fn(), + }, +})); + +import compression from "compression"; +import express, { type Express } from "express"; +import request from "supertest"; +import { apiErrorBoundary } from "../app/services/apiErrorBoundary.server.js"; +import { logger } from "../app/services/logger.server.js"; + +function buildApp(): Express { + const app = express(); + app.use(apiErrorBoundary); + return app; +} + +describe("apiErrorBoundary", () => { + beforeEach(() => { + vi.mocked(logger.error).mockClear(); + }); + + test("sanitizes a 500 body that matches a leak rule", async () => { + const app = buildApp(); + app.get("/api/v1/leaky", (_req, res) => { + res.status(500).json({ + message: + "PrismaClientInitializationError: Can't reach database server at host:5432 (P1001)", + }); + }); + + const response = await request(app).get("/api/v1/leaky"); + + expect(response.status).toBe(500); + expect(response.body).toEqual({ error: "Internal Server Error" }); + }); + + // Remix's express adapter writes responses via + // `writeReadableStreamToWritable`, which calls `res.write(chunk)` for each + // body chunk and then `res.end()` with no arguments. Any sanitizer that + // only inspects `res.end`'s chunk is a no-op against Remix-emitted responses. + test("sanitizes a 500 body written via res.write then res.end() (Remix adapter pattern)", async () => { + const app = buildApp(); + app.get("/api/v1/leaky-remix-style", (_req, res) => { + res.statusCode = 500; + res.setHeader("Content-Type", "application/json"); + res.write( + Buffer.from( + JSON.stringify({ + message: + "PrismaClientInitializationError: Can't reach database server at host:5432 (P1001)", + }) + ) + ); + res.end(); + }); + + const response = await request(app).get("/api/v1/leaky-remix-style"); + + expect(response.status).toBe(500); + expect(response.body).toEqual({ error: "Internal Server Error" }); + }); + + // Remix's express adapter pipes a ReadableStream into the + // response, so chunks reach `res.write` as raw Uint8Array — not Node + // Buffers. Treating them with `String(chunk)` byte-mangles the body. + test("sanitizes a 500 body written as a Uint8Array (Remix stream pattern)", async () => { + const app = buildApp(); + app.get("/api/v1/leaky-uint8array", (_req, res) => { + res.statusCode = 500; + res.setHeader("Content-Type", "application/json"); + const payload = JSON.stringify({ + message: "PrismaClientInitializationError: Can't reach database server (P1001)", + }); + res.write(new Uint8Array(Buffer.from(payload, "utf8"))); + res.end(); + }); + + const response = await request(app).get("/api/v1/leaky-uint8array"); + + expect(response.status).toBe(500); + expect(response.body).toEqual({ error: "Internal Server Error" }); + }); + + test("passes through a clean 5xx body written as a Uint8Array without mangling bytes", async () => { + const app = buildApp(); + app.get("/api/v1/clean-uint8array", (_req, res) => { + res.statusCode = 500; + res.setHeader("Content-Type", "text/plain"); + res.write(new Uint8Array(Buffer.from("Unexpected Server Error", "utf8"))); + res.end(); + }); + + const response = await request(app).get("/api/v1/clean-uint8array"); + + expect(response.status).toBe(500); + expect(response.text).toBe("Unexpected Server Error"); + }); + + test("passes through a clean 500 body written via res.write then res.end()", async () => { + const app = buildApp(); + app.get("/api/v1/clean-500-remix-style", (_req, res) => { + res.statusCode = 500; + res.setHeader("Content-Type", "application/json"); + res.write(Buffer.from(JSON.stringify({ error: "Service unavailable for reasons" }))); + res.end(); + }); + + const response = await request(app).get("/api/v1/clean-500-remix-style"); + + expect(response.status).toBe(500); + expect(response.body).toEqual({ error: "Service unavailable for reasons" }); + }); + + test("passes through a 500 body that does not match any rule", async () => { + const app = buildApp(); + app.get("/api/v1/clean-500", (_req, res) => { + res.status(500).json({ error: "Service unavailable for reasons" }); + }); + + const response = await request(app).get("/api/v1/clean-500"); + + expect(response.status).toBe(500); + expect(response.body).toEqual({ error: "Service unavailable for reasons" }); + }); + + test("passes through 200 responses", async () => { + const app = buildApp(); + app.get("/api/v1/ok", (_req, res) => { + res.status(200).json({ hello: "world" }); + }); + + const response = await request(app).get("/api/v1/ok"); + + expect(response.status).toBe(200); + expect(response.body).toEqual({ hello: "world" }); + }); + + // Only /api/* is monitored. /engine/*, /otel/*, /realtime/*, and UI/dashboard + // routes all pass through unchanged regardless of their body content. + test.each([ + ["/engine/v1/leaky"], + ["/otel/v1/traces"], + ["/realtime/v1/runs/run_123"], + ["/dashboard/leaky"], + ])("does NOT sanitize non-/api/ path: %s", async (path) => { + const app = buildApp(); + app.get(path, (_req, res) => { + res.status(500).json({ message: "Boom: P1001 from a non-api route" }); + }); + + const response = await request(app).get(path); + + expect(response.status).toBe(500); + expect(response.body).toEqual({ message: "Boom: P1001 from a non-api route" }); + }); + + // Kept as a separate assertion so the dashboard exclusion has its own + // grep-able test name; the rationale is different (UI ErrorBoundary). + test("does not sanitize dashboard/UI paths", async () => { + const app = buildApp(); + app.get("/dashboard/leaky", (_req, res) => { + res.status(500).json({ message: "P1001 leak from a dashboard route" }); + }); + + const response = await request(app).get("/dashboard/leaky"); + + expect(response.status).toBe(500); + expect(response.body).toEqual({ message: "P1001 leak from a dashboard route" }); + }); + + test("passes through 4xx responses untouched even if body contains a leak pattern", async () => { + const app = buildApp(); + app.get("/api/v1/bad-request", (_req, res) => { + res.status(400).json({ error: "Invalid params: P1001 included to ensure 4xx is exempt" }); + }); + + const response = await request(app).get("/api/v1/bad-request"); + + expect(response.status).toBe(400); + expect(response.body).toEqual({ + error: "Invalid params: P1001 included to ensure 4xx is exempt", + }); + }); + + test("streams text/event-stream responses written as Uint8Array without buffering (Remix stream pattern)", async () => { + const app = buildApp(); + app.get("/api/v1/stream", (_req, res) => { + res.statusCode = 200; + res.setHeader("Content-Type", "text/event-stream"); + const enc = new TextEncoder(); + res.write(enc.encode("data: chunk-1\n\n")); + res.write(enc.encode("data: chunk-2\n\n")); + res.end(); + }); + + const response = await request(app).get("/api/v1/stream"); + + expect(response.status).toBe(200); + expect(response.text).toBe("data: chunk-1\n\ndata: chunk-2\n\n"); + }); + + // Real Remix responses arrive as multiple Uint8Array chunks via + // writeReadableStreamToWritable. The single-chunk Uint8Array test above + // exercises one slice; this exercises the chunk-pushing/concat loop. + test("assembles multi-chunk Uint8Array writes into a single body without mangling", async () => { + const app = buildApp(); + app.get("/api/v1/multichunk", (_req, res) => { + res.statusCode = 200; + res.setHeader("Content-Type", "application/json"); + const enc = new TextEncoder(); + res.write(enc.encode('{"part1":"hello"')); + res.write(enc.encode(',"part2":"world"}')); + res.end(); + }); + + const response = await request(app).get("/api/v1/multichunk"); + + expect(response.status).toBe(200); + expect(response.body).toEqual({ part1: "hello", part2: "world" }); + }); + + // The actual production leak path: Remix's returnLastResortErrorResponse + // returns Content-Type: text/plain in non-production mode with the raw + // error message appended. Our middleware must detect the leak in text/plain + // bodies and rewrite the response to JSON before flushing. + test("sanitizes a text/plain 500 body matching a leak rule (Remix non-prod error shape)", async () => { + const app = buildApp(); + app.get("/api/v1/text-plain-leak", (_req, res) => { + res.statusCode = 500; + res.setHeader("Content-Type", "text/plain"); + res.write( + new TextEncoder().encode( + "Unexpected Server Error\n\nError: PrismaClientInitializationError: Can't reach database server (P1001)" + ) + ); + res.end(); + }); + + const response = await request(app).get("/api/v1/text-plain-leak"); + + expect(response.status).toBe(500); + expect(response.headers["content-type"]).toMatch(/application\/json/); + expect(response.body).toEqual({ error: "Internal Server Error" }); + }); + + // Responses that exceed MAX_BUFFER_BYTES (64KB) bypass the buffer to avoid + // holding large streams in memory. Bytes must still reach the client intact. + test("passes through a 5xx response larger than the buffer cap without mangling bytes", async () => { + const app = buildApp(); + const filler = "x".repeat(80 * 1024); + app.get("/api/v1/large-5xx", (_req, res) => { + res.statusCode = 500; + res.setHeader("Content-Type", "application/json"); + res.write(new TextEncoder().encode(JSON.stringify({ filler, marker: "END" }))); + res.end(); + }); + + const response = await request(app).get("/api/v1/large-5xx"); + + expect(response.status).toBe(500); + expect(response.body.filler).toHaveLength(80 * 1024); + expect(response.body.marker).toBe("END"); + }); + + // If a route flushes headers before writing the body, the middleware + // cannot rewrite Content-Type / Content-Length later. It must skip + // sanitization gracefully — no crash, no byte-mangling — and let the + // original body through. (Per-route #3664 catches remain the primary + // defense; this is just about not making things worse.) + test("skips sanitization gracefully when response headers are already flushed", async () => { + const app = buildApp(); + app.get("/api/v1/headers-flushed", (_req, res) => { + res.statusCode = 500; + res.setHeader("Content-Type", "text/plain"); + res.flushHeaders(); + res.write(new TextEncoder().encode("leaky body with P1001 written after headers flushed")); + res.end(); + }); + + const response = await request(app).get("/api/v1/headers-flushed"); + + expect(response.status).toBe(500); + // Bytes must not be mangled into "85,110,..." style. + expect(response.text).not.toMatch(/^\d+(?:,\d+){5,}/); + // We cannot change Content-Type after flush, so the original text leaks + // through. That is an accepted limitation — per-route try/catch is the + // primary defense. + expect(response.text).toContain("leaky body with P1001"); + }); + + // Same shape of bug as the original Uint8Array miss — if a chunk arrives + // as a typed array that is not specifically Uint8Array (e.g. Uint16Array, + // DataView), the `instanceof Uint8Array` check misses it and toBuffer + // falls through to String(chunk), byte-mangling the body. Guarding via + // ArrayBuffer.isView() catches all typed-array variants. + test("preserves bytes when chunk is a non-Uint8Array typed array (Uint16Array)", async () => { + const app = buildApp(); + const text = "Hello, world!"; + const utf16 = new Uint16Array(text.length); + for (let i = 0; i < text.length; i++) utf16[i] = text.charCodeAt(i); + const expectedBytes = Buffer.from(utf16.buffer, utf16.byteOffset, utf16.byteLength); + + app.get("/api/v1/uint16", (_req, res) => { + res.statusCode = 200; + // Use application/json so the chunk hits the buffer path (not the + // streaming-content-type bypass). + res.setHeader("Content-Type", "application/json"); + res.write(utf16); + res.end(); + }); + + const response = await request(app) + .get("/api/v1/uint16") + .buffer(true) + .parse((res, cb) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => cb(null, Buffer.concat(chunks))); + }); + + expect(response.status).toBe(200); + expect(Buffer.compare(response.body as Buffer, expectedBytes)).toBe(0); + }); + + test("sanitizes a 503 body that matches a leak rule, preserving the original status", async () => { + const app = buildApp(); + app.get("/api/v1/leaky-503", (_req, res) => { + res.status(503).json({ + message: "PrismaClientInitializationError: Can't reach database server (P1001)", + }); + }); + + const response = await request(app).get("/api/v1/leaky-503"); + + // The 503 is preserved so status-aware clients still know "service + // unavailable, retry" vs a generic 500. The body is sanitized. + expect(response.status).toBe(503); + expect(response.body).toEqual({ error: "Internal Server Error" }); + }); + + test("sanitizes a leak on POST as well as GET (method-independent)", async () => { + const app = buildApp(); + app.post("/api/v1/leaky-post", (_req, res) => { + res.status(500).json({ message: "Boom: Can't reach database server (P1001)" }); + }); + + const response = await request(app).post("/api/v1/leaky-post").send({}); + + expect(response.status).toBe(500); + expect(response.body).toEqual({ error: "Internal Server Error" }); + }); + + test("passes through an empty-body 5xx response without crashing", async () => { + const app = buildApp(); + app.get("/api/v1/empty-500", (_req, res) => { + res.statusCode = 500; + res.end(); + }); + + const response = await request(app).get("/api/v1/empty-500"); + + expect(response.status).toBe(500); + expect(response.text).toBe(""); + }); + + test("matches leak rule case-insensitively (e.g. lowercase 'p1001' still sanitized)", async () => { + const app = buildApp(); + app.get("/api/v1/leaky-lowercase", (_req, res) => { + res.status(500).json({ message: "boom: can't reach database server, code p1001" }); + }); + + const response = await request(app).get("/api/v1/leaky-lowercase"); + + expect(response.status).toBe(500); + expect(response.body).toEqual({ error: "Internal Server Error" }); + }); + + // server.ts mounts compression() before apiErrorBoundary, so compression's + // res.write/end wrappers are what our middleware's originalWrite/End point + // to. Verifies that the sanitized body still reaches the client correctly + // when compression is in the chain (supertest auto-decompresses). + test("sanitizes leak correctly when compression middleware is mounted in front", async () => { + const app = express(); + app.use(compression({ threshold: 0 })); + app.use(apiErrorBoundary); + app.get("/api/v1/compressed-leak", (_req, res) => { + res.status(500).json({ + message: "Boom: Can't reach database server (P1001)", + // Padding to ensure compression has something meaningful to do. + padding: "x".repeat(2048), + }); + }); + + const response = await request(app) + .get("/api/v1/compressed-leak") + .set("Accept-Encoding", "gzip"); + + expect(response.status).toBe(500); + expect(response.body).toEqual({ error: "Internal Server Error" }); + }); + + test("passes a non-leak compressed response through without mangling bytes", async () => { + const app = express(); + app.use(compression({ threshold: 0 })); + app.use(apiErrorBoundary); + app.get("/api/v1/compressed-ok", (_req, res) => { + res.status(200).json({ hello: "world", padding: "y".repeat(2048) }); + }); + + const response = await request(app) + .get("/api/v1/compressed-ok") + .set("Accept-Encoding", "gzip"); + + expect(response.status).toBe(200); + expect(response.body.hello).toBe("world"); + expect(response.body.padding).toHaveLength(2048); + }); + + // HEAD requests run the loader but Node strips the body before sending. + // The middleware still buffers + matches against the (would-be) body, so + // its sanitize/log path still fires — we just want to confirm no crash + // and that the status is preserved. + test("does not crash on HEAD requests to leaky routes", async () => { + const app = buildApp(); + app.get("/api/v1/leaky-head", (_req, res) => { + res.status(500).json({ message: "Boom: P1001" }); + }); + + const response = await request(app).head("/api/v1/leaky-head"); + + expect(response.status).toBe(500); + expect(response.text ?? "").toBe(""); + }); + + test("logs at error level when sanitization occurs", async () => { + const app = buildApp(); + app.get("/api/v1/leaky-logged", (_req, res) => { + res.status(500).json({ message: "boom P1001 boom" }); + }); + + await request(app).get("/api/v1/leaky-logged"); + + expect(logger.error).toHaveBeenCalledTimes(1); + const [message, context] = vi.mocked(logger.error).mock.calls[0]; + expect(message).toMatch(/sanitized/i); + expect(context).toMatchObject({ + rule: "prisma-p1001", + path: "/api/v1/leaky-logged", + status: 500, + }); + }); + + test("does not log when no sanitization occurs", async () => { + const app = buildApp(); + app.get("/api/v1/quiet", (_req, res) => { + res.status(200).json({ ok: true }); + }); + + await request(app).get("/api/v1/quiet"); + + expect(logger.error).not.toHaveBeenCalled(); + }); +}); From 758e915bbbd78e9212d7be9291f15a3fe442cb1d Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Wed, 20 May 2026 16:41:03 +0100 Subject: [PATCH 2/4] test(webapp): lock in byte-integrity for single-call res.end paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit patchedWrite applies isStreamingContentType / MAX_BUFFER_BYTES bypass checks before buffering; patchedEnd does not, since a single-call res.end(chunk) is not a streaming pileup scenario. These tests pin that contract by verifying byte integrity for the two shapes a single-call end can take: - octet-stream body via res.end(binary) — all 512 bytes spanning the 0-255 range round-trip byte-perfect - 80KB JSON body via res.end(buffer) — payload intact, no truncation or mangling from being buffered past the cap Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/webapp/test/apiErrorBoundary.test.ts | 52 +++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/apps/webapp/test/apiErrorBoundary.test.ts b/apps/webapp/test/apiErrorBoundary.test.ts index 36f3ad53dea..c835735b669 100644 --- a/apps/webapp/test/apiErrorBoundary.test.ts +++ b/apps/webapp/test/apiErrorBoundary.test.ts @@ -238,6 +238,58 @@ describe("apiErrorBoundary", () => { expect(response.body).toEqual({ part1: "hello", part2: "world" }); }); + // CodeRabbit raised a concern that patchedEnd doesn't re-apply the + // isStreamingContentType / MAX_BUFFER_BYTES bypass checks for single-call + // `res.end(chunk)` paths. The bypasses exist for streaming pileup; a + // single-call end isn't streaming, so the body is fully assembled either + // way. These two tests lock in that no harm occurs: bytes flow through + // unchanged for the scenarios CodeRabbit named (octet-stream content-type + // and >64KB body via single-call res.end). + test("preserves bytes for octet-stream body delivered via single-call res.end(chunk)", async () => { + const app = buildApp(); + // Construct binary bytes that span the full 0-255 range to catch any + // accidental encoding interpretation. + const binary = Buffer.from(Array.from({ length: 512 }, (_, i) => i % 256)); + + app.get("/api/v1/octet-end", (_req, res) => { + res.statusCode = 200; + res.setHeader("Content-Type", "application/octet-stream"); + res.end(binary); + }); + + const response = await request(app) + .get("/api/v1/octet-end") + .buffer(true) + .parse((res, cb) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => cb(null, Buffer.concat(chunks))); + }); + + expect(response.status).toBe(200); + expect(Buffer.compare(response.body as Buffer, binary)).toBe(0); + }); + + test("preserves bytes for >64KB body delivered via single-call res.end(chunk)", async () => { + const app = buildApp(); + const payload = JSON.stringify({ + filler: "x".repeat(80 * 1024), + marker: "END_OF_LARGE_BODY", + }); + + app.get("/api/v1/large-end", (_req, res) => { + res.statusCode = 200; + res.setHeader("Content-Type", "application/json"); + res.end(Buffer.from(payload, "utf8")); + }); + + const response = await request(app).get("/api/v1/large-end"); + + expect(response.status).toBe(200); + expect(response.body.filler).toHaveLength(80 * 1024); + expect(response.body.marker).toBe("END_OF_LARGE_BODY"); + }); + // The actual production leak path: Remix's returnLastResortErrorResponse // returns Content-Type: text/plain in non-production mode with the raw // error message appended. Our middleware must detect the leak in text/plain From 05c5e18174e80a8bd90d6be95a2c2a23d51c1275 Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Thu, 21 May 2026 10:37:59 +0100 Subject: [PATCH 3/4] test(webapp): neutralize comment on single-call res.end byte-integrity tests Rewords the test-block comment to describe the contract being pinned without referencing the external reviewer that prompted the tests. Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/webapp/test/apiErrorBoundary.test.ts | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/apps/webapp/test/apiErrorBoundary.test.ts b/apps/webapp/test/apiErrorBoundary.test.ts index c835735b669..da47a761231 100644 --- a/apps/webapp/test/apiErrorBoundary.test.ts +++ b/apps/webapp/test/apiErrorBoundary.test.ts @@ -238,13 +238,12 @@ describe("apiErrorBoundary", () => { expect(response.body).toEqual({ part1: "hello", part2: "world" }); }); - // CodeRabbit raised a concern that patchedEnd doesn't re-apply the - // isStreamingContentType / MAX_BUFFER_BYTES bypass checks for single-call - // `res.end(chunk)` paths. The bypasses exist for streaming pileup; a - // single-call end isn't streaming, so the body is fully assembled either - // way. These two tests lock in that no harm occurs: bytes flow through - // unchanged for the scenarios CodeRabbit named (octet-stream content-type - // and >64KB body via single-call res.end). + // patchedWrite applies the isStreamingContentType / MAX_BUFFER_BYTES + // bypass checks before buffering; patchedEnd does not, because a + // single-call `res.end(chunk)` is one-shot rather than streaming + // pileup — the body is fully assembled either way. These tests pin + // the byte-integrity contract for that single-call shape: octet-stream + // bodies and >64KB bodies both flow through unchanged. test("preserves bytes for octet-stream body delivered via single-call res.end(chunk)", async () => { const app = buildApp(); // Construct binary bytes that span the full 0-255 range to catch any From 343fde250c0a9eff0323cf5e444459388e1bc7b0 Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Thu, 21 May 2026 11:11:55 +0100 Subject: [PATCH 4/4] fix(webapp): apply streaming content-type bypass on patchedEnd too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit patchedWrite checks isStreamingContentType() before buffering; patchedEnd only had the headersSent half. A 5xx response that sets a streaming content-type and delivers its body via single-call res.end(chunk) would get buffered, UTF-8 decoded, and run through the leak regex — wrong shape for binary/SSE payloads. Mirrors the patchedWrite guard so the same bypass applies regardless of write/end call shape. Test pins the contract: a 5xx octet-stream body whose bytes contain "P1001" passes through unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../app/services/apiErrorBoundary.server.ts | 11 +++-- apps/webapp/test/apiErrorBoundary.test.ts | 43 ++++++++++++++++--- 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/apps/webapp/app/services/apiErrorBoundary.server.ts b/apps/webapp/app/services/apiErrorBoundary.server.ts index 9777907c45c..0dd0dbef4bf 100644 --- a/apps/webapp/app/services/apiErrorBoundary.server.ts +++ b/apps/webapp/app/services/apiErrorBoundary.server.ts @@ -104,10 +104,13 @@ export const apiErrorBoundary: RequestHandler = (req, res, next) => { }; const patchedEnd = (chunk?: unknown, ...rest: unknown[]) => { - // Same guard as patchedWrite: if headers were flushed (e.g. via an - // explicit `res.flushHeaders()` before end-with-body), we cannot rewrite - // the response — flush buffered chunks unchanged and skip sanitization. - if (!bypass && res.headersSent) flushAndBypass(); + // Mirror patchedWrite's bypass guards. headersSent: if headers were + // flushed (e.g. via an explicit `res.flushHeaders()` before end-with-body), + // we cannot rewrite the response. isStreamingContentType: if a single-call + // `res.end(chunk)` carries a streaming content-type (SSE / octet-stream), + // the leak rules are JSON-text heuristics and have no business interpreting + // that payload — flush unchanged and skip sanitization. + if (!bypass && (res.headersSent || isStreamingContentType())) flushAndBypass(); if (bypass) { return (originalEnd as (c?: unknown, ...r: unknown[]) => typeof res)(chunk, ...rest); } diff --git a/apps/webapp/test/apiErrorBoundary.test.ts b/apps/webapp/test/apiErrorBoundary.test.ts index da47a761231..83325432d1a 100644 --- a/apps/webapp/test/apiErrorBoundary.test.ts +++ b/apps/webapp/test/apiErrorBoundary.test.ts @@ -238,12 +238,45 @@ describe("apiErrorBoundary", () => { expect(response.body).toEqual({ part1: "hello", part2: "world" }); }); + // The streaming-content-type bypass must fire on single-call res.end + // too, not only on res.write. If a 5xx response sets a streaming + // content-type and delivers its body via res.end(chunk) directly, the + // body must pass through untouched — the leak rules are JSON-text + // heuristics and have no business interpreting binary or SSE payloads. + test("does NOT sanitize a 5xx octet-stream body delivered via single-call res.end(chunk), even if bytes contain leak pattern", async () => { + const app = buildApp(); + // Construct a binary payload whose bytes happen to spell "P1001" + // somewhere — proves the streaming bypass fires before the regex. + const binary = Buffer.concat([ + Buffer.from([0x00, 0xff, 0xab]), + Buffer.from("P1001", "ascii"), + Buffer.from([0xcd, 0xef, 0x00]), + ]); + + app.get("/api/v1/octet-5xx", (_req, res) => { + res.statusCode = 500; + res.setHeader("Content-Type", "application/octet-stream"); + res.end(binary); + }); + + const response = await request(app) + .get("/api/v1/octet-5xx") + .buffer(true) + .parse((res, cb) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => cb(null, Buffer.concat(chunks))); + }); + + expect(response.status).toBe(500); + expect(Buffer.compare(response.body as Buffer, binary)).toBe(0); + }); + // patchedWrite applies the isStreamingContentType / MAX_BUFFER_BYTES - // bypass checks before buffering; patchedEnd does not, because a - // single-call `res.end(chunk)` is one-shot rather than streaming - // pileup — the body is fully assembled either way. These tests pin - // the byte-integrity contract for that single-call shape: octet-stream - // bodies and >64KB bodies both flow through unchanged. + // bypass checks before buffering; patchedEnd applies the same + // streaming-content-type check. These tests pin the byte-integrity + // contract for the single-call res.end shape: octet-stream bodies + // and >64KB bodies both flow through unchanged. test("preserves bytes for octet-stream body delivered via single-call res.end(chunk)", async () => { const app = buildApp(); // Construct binary bytes that span the full 0-255 range to catch any