From a3950ada79c6109b9f419e7c35aa201a021bb659 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Wed, 10 Jun 2026 10:20:42 -0400 Subject: [PATCH 1/4] fix(chromedriverproxy): forward loopback Host and strip Origin so ChromeDriver accepts proxied requests The ChromeDriver proxy fronts the internal `chromedriver --port=9225` on :9224. ChromeDriver (Chrome 111+) rejects any request whose Host/Origin header is not localhost or an IP, returning HTTP 500 with body "Host header or origin header is specified and is not whitelisted or localhost" (DNS-rebinding protection). The reverse-proxy Rewrite did `r.Out.Host = r.In.Host`, which overrode the loopback host that SetURL had set, forwarding the inbound Host to ChromeDriver. Over a direct 127.0.0.1 connection (docker e2e) the inbound Host is already loopback so it works, but behind an ingress (e.g. {instance}.dev-yul-hypeman-1.kernel.sh:9224, the hypeman e2e path) the real hostname gets forwarded and ChromeDriver 500s every request. This looked like a slow/never-ready chromedriver because WaitChromeDriver polls /status for a 200 that never comes. handleCreateSession had the same class of bug: it copied all client headers (including Origin) to the upstream. Fix: drop the `r.Out.Host = r.In.Host` line so the upstream sees the loopback Host, strip the Origin header in the rewrite, and skip the Origin header in handleCreateSession's header-copy loop. Adds a regression test (TestHandler_RewritesHostAndStripsOrigin) exercising both the reverse-proxy path and POST /session with an ingress Host/Origin. Co-Authored-By: Claude Opus 4.8 (1M context) --- server/lib/chromedriverproxy/proxy.go | 16 ++++++- server/lib/chromedriverproxy/proxy_test.go | 52 ++++++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/server/lib/chromedriverproxy/proxy.go b/server/lib/chromedriverproxy/proxy.go index e9a46641..e481dbb3 100644 --- a/server/lib/chromedriverproxy/proxy.go +++ b/server/lib/chromedriverproxy/proxy.go @@ -58,7 +58,14 @@ func Handler(logger *slog.Logger, opts *Options) http.Handler { reverseProxy := &httputil.ReverseProxy{ Rewrite: func(r *httputil.ProxyRequest) { r.SetURL(upstream) - r.Out.Host = r.In.Host + // ChromeDriver (Chrome 111+) rejects any request whose Host/Origin + // header is not localhost or an IP, returning HTTP 500 ("Host header + // or origin header is specified and is not whitelisted or localhost") + // as DNS-rebinding protection. SetURL clears Out.Host so net/http + // uses the loopback upstream as the Host — do NOT restore the inbound + // Host (e.g. an ingress hostname like {instance}.) or + // ChromeDriver refuses every request. Strip Origin for the same reason. + r.Out.Header.Del("Origin") }, } @@ -135,6 +142,13 @@ func handleCreateSession(w http.ResponseWriter, r *http.Request, logger *slog.Lo return } for k, vv := range r.Header { + // Skip Origin: ChromeDriver rejects non-localhost Origin/Host headers + // (see the reverse-proxy rewrite above). proxyReq already targets the + // loopback upstream, so its Host is correct; don't forward the client's + // Origin (e.g. an ingress hostname) or ChromeDriver returns HTTP 500. + if strings.EqualFold(k, "Origin") { + continue + } for _, v := range vv { proxyReq.Header.Add(k, v) } diff --git a/server/lib/chromedriverproxy/proxy_test.go b/server/lib/chromedriverproxy/proxy_test.go index 25ee0a62..53528c9e 100644 --- a/server/lib/chromedriverproxy/proxy_test.go +++ b/server/lib/chromedriverproxy/proxy_test.go @@ -126,6 +126,58 @@ func TestHandler_PostSession_InjectsDebuggerAddress(t *testing.T) { "webSocketUrl in capabilities should be rewritten to proxy address") } +// TestHandler_RewritesHostAndStripsOrigin is a regression test for the +// ChromeDriver "Host header or origin header ... is not whitelisted or +// localhost" HTTP 500. ChromeDriver (Chrome 111+) rejects requests whose +// Host/Origin is not loopback. When the proxy is fronted by an ingress (e.g. +// {instance}.:9224), the inbound Host/Origin must NOT be forwarded to +// the upstream; the upstream must see the loopback host and no Origin. +func TestHandler_RewritesHostAndStripsOrigin(t *testing.T) { + type seen struct { + host string + origin string + } + var got seen + backend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + got.host = r.Host + got.origin = r.Header.Get("Origin") + w.WriteHeader(http.StatusOK) + })) + defer backend.Close() + + backendURL, _ := url.Parse(backend.URL) + handler := Handler(silentLogger(), testOptions(backendURL.Host, "127.0.0.1:9922")) + + const ingressHost = "inst.dev-yul-hypeman-1.kernel.sh" + + t.Run("reverse-proxy passthrough", func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/status", nil) + req.Host = ingressHost + req.Header.Set("Origin", "https://"+ingressHost) + rec := httptest.NewRecorder() + + handler.ServeHTTP(rec, req) + + require.Equal(t, http.StatusOK, rec.Code) + assert.Equal(t, backendURL.Host, got.host, "upstream must see the loopback Host, not the ingress host") + assert.Empty(t, got.origin, "Origin must be stripped before reaching ChromeDriver") + }) + + t.Run("POST /session", func(t *testing.T) { + req := httptest.NewRequest(http.MethodPost, "/session", strings.NewReader(`{"capabilities":{}}`)) + req.Host = ingressHost + req.Header.Set("Origin", "https://"+ingressHost) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + + handler.ServeHTTP(rec, req) + + require.Equal(t, http.StatusOK, rec.Code) + assert.Equal(t, backendURL.Host, got.host, "upstream must see the loopback Host, not the ingress host") + assert.Empty(t, got.origin, "Origin must be stripped before reaching ChromeDriver") + }) +} + func TestHandler_HTTPPassthrough(t *testing.T) { backend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") From 0c4b282d8810337aa2ab457282e60f2448589071 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Wed, 10 Jun 2026 11:18:54 -0400 Subject: [PATCH 2/4] e2e: strip Docker exec stream-multiplexing headers from Exec output dockerBackend.Exec returned the raw multiplexed Docker stream, so callers that parse Exec output saw Docker's 8-byte frame headers interleaved with the data. Use testcontainers exec.Multiplexed() to demultiplex into clean combined stdout+stderr, matching what tests expect. Co-Authored-By: Claude Opus 4.8 (1M context) --- server/e2e/backend_docker.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/e2e/backend_docker.go b/server/e2e/backend_docker.go index 7c583923..c5cdd11f 100644 --- a/server/e2e/backend_docker.go +++ b/server/e2e/backend_docker.go @@ -10,6 +10,7 @@ import ( "github.com/docker/go-connections/nat" instanceoapi "github.com/kernel/kernel-images/server/lib/oapi" "github.com/testcontainers/testcontainers-go" + tcexec "github.com/testcontainers/testcontainers-go/exec" "github.com/testcontainers/testcontainers-go/wait" ) @@ -209,7 +210,8 @@ func (c *dockerBackend) WaitChromeDriver(ctx context.Context) error { // Exec executes a command inside the container and returns the combined output. func (c *dockerBackend) Exec(ctx context.Context, cmd []string) (int, string, error) { - exitCode, reader, err := c.ctr.Exec(ctx, cmd) + // Use Multiplexed() to strip Docker's multiplexing headers from output. + exitCode, reader, err := c.ctr.Exec(ctx, cmd, tcexec.Multiplexed()) if err != nil { return exitCode, "", err } From 16595f2d70f8f117b1a18347b2c416e9908f7d7f Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Wed, 10 Jun 2026 11:31:45 -0400 Subject: [PATCH 3/4] e2e: add Backend.SupportsHostAccess; skip host-access tests on backends without it Mirrors the host-access handling from the private hypeman-e2e work so the two repos' e2e Backend surface stays identical (no sync drift). Add Backend.SupportsHostAccess() (docker=true, hypeman=false) and have TestContainer.Start t.Skip when a test requests ContainerConfig.HostAccess on a backend that can't bridge the instance to a service on the test host. Keyed on the request, so a test that doesn't ask for HostAccess is never skipped. Co-Authored-By: Claude Opus 4.8 (1M context) --- server/e2e/backend.go | 7 +++++++ server/e2e/backend_docker.go | 4 ++++ server/e2e/backend_hypeman.go | 12 +++++++++--- server/e2e/container.go | 11 +++++++++++ 4 files changed, 31 insertions(+), 3 deletions(-) diff --git a/server/e2e/backend.go b/server/e2e/backend.go index 2c554912..96ff7c50 100644 --- a/server/e2e/backend.go +++ b/server/e2e/backend.go @@ -42,6 +42,13 @@ type Backend interface { // Stop tears the instance down and releases its resources. Stop(ctx context.Context) error + // SupportsHostAccess reports whether the backend can bridge the instance to + // a service the test stands up on its own host (ContainerConfig.HostAccess). + // The Docker backend can (host.docker.internal); a remote VM backend cannot. + // TestContainer.Start uses this to skip host-fixture tests on backends that + // can't satisfy them, rather than failing. + SupportsHostAccess() bool + // APIBaseURL returns the base URL for the instance's control-plane API // server (container port 10001). APIBaseURL() string diff --git a/server/e2e/backend_docker.go b/server/e2e/backend_docker.go index c5cdd11f..fc944e04 100644 --- a/server/e2e/backend_docker.go +++ b/server/e2e/backend_docker.go @@ -33,6 +33,10 @@ func newDockerBackend(image string) Backend { return &dockerBackend{Image: image} } +// SupportsHostAccess reports that the Docker backend can bridge the container +// to services on the test host via host.docker.internal. +func (c *dockerBackend) SupportsHostAccess() bool { return true } + // Start starts the container with the given configuration using testcontainers-go. func (c *dockerBackend) Start(ctx context.Context, cfg ContainerConfig) error { // Build environment variables diff --git a/server/e2e/backend_hypeman.go b/server/e2e/backend_hypeman.go index 691d7518..1facf05d 100644 --- a/server/e2e/backend_hypeman.go +++ b/server/e2e/backend_hypeman.go @@ -209,13 +209,19 @@ func deriveIngressDomain(baseURL string) string { return strings.TrimPrefix(u.Hostname(), "hypeman.") } +// SupportsHostAccess reports that the hypeman backend cannot bridge a remote VM +// to a service on the test host: there is no equivalent of Docker's +// host.docker.internal. TestContainer.Start skips host-fixture tests on this +// backend rather than failing them. +func (c *hypemanBackend) SupportsHostAccess() bool { return false } + // Start creates and boots a hypeman instance for the image, waits for it to // reach the Running state, then prepares the chosen routing mode. func (c *hypemanBackend) Start(ctx context.Context, cfg ContainerConfig) error { if cfg.HostAccess { - // A remote VM has no equivalent of Docker's host.docker.internal; we - // reject rather than silently ignore so host-fixture tests (capmonster, - // persisted-login) fail loudly here and stay on the Docker backend. + // Defensive: TestContainer.Start skips host-access tests on backends + // that don't support them (SupportsHostAccess), so this should be + // unreachable from the suite. Kept as a guard for direct callers. return fmt.Errorf("hypeman backend does not support ContainerConfig.HostAccess (no host loopback bridge for remote instances); run host-access tests on the docker backend") } diff --git a/server/e2e/container.go b/server/e2e/container.go index 790248bd..6b643fdc 100644 --- a/server/e2e/container.go +++ b/server/e2e/container.go @@ -20,6 +20,7 @@ type TestContainer struct { // Image is the OCI image reference under test. Image string + tb testing.TB backend Backend } @@ -30,12 +31,22 @@ func NewTestContainer(tb testing.TB, image string) *TestContainer { tb.Helper() return &TestContainer{ Image: image, + tb: tb, backend: newBackend(tb, image), } } // Start starts the instance with the given configuration. +// +// If the test requests HostAccess but the selected backend can't bridge the +// instance to the test host (e.g. the hypeman backend, which runs a remote VM), +// the test is skipped rather than failed: such tests rely on a fixture served +// from the runner's loopback that a remote instance cannot reach. This keeps the +// hypeman CI job green while preserving coverage on the Docker backend. func (c *TestContainer) Start(ctx context.Context, cfg ContainerConfig) error { + if cfg.HostAccess && !c.backend.SupportsHostAccess() { + c.tb.Skipf("skipping host-access test: %s backend has no host-loopback bridge for the instance", backendKindFromEnv()) + } return c.backend.Start(ctx, cfg) } From 6a783ef1e482228c8ab58b5f8237f4dfd8344325 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Thu, 11 Jun 2026 08:27:12 -0400 Subject: [PATCH 4/4] test(devtoolsproxy): raise UpstreamManager detect timeout 20s -> 60s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TestUpstreamManagerDetectsChromiumAndRestart launches a real browser and waits for UpstreamManager to scrape the "DevTools listening on ws://..." line. On shared CI runners chromium cold-start has a long tail: recent runs printed the line in ~6s warm but 15-17s when contended, occasionally exceeding the 20s budget and failing at exactly ~20.15s — a timeout, not a missing line. Raise the wait to 60s (still fails fast if the browser truly never starts). Co-Authored-By: Claude Opus 4.8 (1M context) --- server/lib/devtoolsproxy/proxy_test.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/server/lib/devtoolsproxy/proxy_test.go b/server/lib/devtoolsproxy/proxy_test.go index 8d2d7130..c3de042e 100644 --- a/server/lib/devtoolsproxy/proxy_test.go +++ b/server/lib/devtoolsproxy/proxy_test.go @@ -234,6 +234,16 @@ func TestDialUpstreamWithRetry_RechecksCurrentAfterMissedUpdate(t *testing.T) { } } +// browserDetectTimeout bounds how long the test waits for the UpstreamManager +// to scrape the "DevTools listening on ws://..." line from a freshly launched +// browser. Chromium's cold-start time has a long tail on shared CI runners: +// across recent CI runs this same launch printed the line in ~6s on a warm +// runner but took 15-17s on a slow/contended one, and occasionally exceeded the +// previous 20s budget (failing at exactly ~20.15s — the timeout, not a missing +// line). 60s gives ample headroom for the slow tail while still failing fast if +// the browser truly never comes up. +const browserDetectTimeout = 60 * time.Second + func TestUpstreamManagerDetectsChromiumAndRestart(t *testing.T) { browser, err := findBrowserBinary() if err != nil { @@ -317,7 +327,7 @@ func TestUpstreamManagerDetectsChromiumAndRestart(t *testing.T) { }() // Wait for initial upstream containing port1 - ok := waitForCondition(20*time.Second, func() bool { + ok := waitForCondition(browserDetectTimeout, func() bool { u := mgr.Current() return strings.Contains(u, fmt.Sprintf(":%d/", port1)) }) @@ -351,7 +361,7 @@ func TestUpstreamManagerDetectsChromiumAndRestart(t *testing.T) { }() // Expect manager to update to new port - ok = waitForCondition(20*time.Second, func() bool { + ok = waitForCondition(browserDetectTimeout, func() bool { u := mgr.Current() return strings.Contains(u, fmt.Sprintf(":%d/", port2)) })