From 123301b310b7b35df1bee308059b0e0959777f5d Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 29 May 2026 16:32:53 +0200 Subject: [PATCH] testserver: fail on unexpanded shell/test variables in API requests The test server now inspects each request's URL and body and fails the test if it finds an unexpanded uppercase variable reference (${FOO} or $FOO). Acceptance scripts inject test/env variables by that convention (UNIQUE_NAME, JOB_ID, ...), while DABs interpolation is lowercase and namespaced (${resources.jobs.foo.id}), so it is ignored with no allowlist upkeep. Endpoints carrying variable-shaped content are exempt (file uploads and /telemetry-ext), and a global allowedDollarTokens escape hatch covers intentional cases. This catches the class of bug fixed in #5376, where a ${UNIQUE_NAME} left inside single quotes was sent to the API literally: locally the fake server accepted it, so it only failed intermittently on cloud and never showed up in output.txt. One fixture that used $VAR-shaped placeholder values (resources/jobs/instance_pool_and_node_type) now uses plain literals. Co-authored-by: Isaac --- .../databricks.yml | 8 +- .../instance_pool_and_node_type/output.txt | 12 +- libs/testserver/server.go | 11 ++ libs/testserver/varguard.go | 59 ++++++++++ libs/testserver/varguard_test.go | 106 ++++++++++++++++++ 5 files changed, 186 insertions(+), 10 deletions(-) create mode 100644 libs/testserver/varguard.go create mode 100644 libs/testserver/varguard_test.go diff --git a/acceptance/bundle/resources/jobs/instance_pool_and_node_type/databricks.yml b/acceptance/bundle/resources/jobs/instance_pool_and_node_type/databricks.yml index dfae4d46ba0..6534d4a2c38 100644 --- a/acceptance/bundle/resources/jobs/instance_pool_and_node_type/databricks.yml +++ b/acceptance/bundle/resources/jobs/instance_pool_and_node_type/databricks.yml @@ -4,15 +4,15 @@ bundle: resources: jobs: some_other_job: - name: "[${bundle.target}] Test Wheel Job $UNIQUE_NAME" + name: "[${bundle.target}] Test Wheel Job" tasks: - task_key: TestTask new_cluster: num_workers: 1 - spark_version: $DEFAULT_SPARK_VERSION - node_type_id: $NODE_TYPE_ID + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge data_security_mode: USER_ISOLATION - instance_pool_id: $TEST_INSTANCE_POOL_ID + instance_pool_id: my-instance-pool python_wheel_task: package_name: my_test_code entry_point: run diff --git a/acceptance/bundle/resources/jobs/instance_pool_and_node_type/output.txt b/acceptance/bundle/resources/jobs/instance_pool_and_node_type/output.txt index fb2387e9e72..8492a4dec6b 100644 --- a/acceptance/bundle/resources/jobs/instance_pool_and_node_type/output.txt +++ b/acceptance/bundle/resources/jobs/instance_pool_and_node_type/output.txt @@ -4,9 +4,9 @@ { "new_cluster": { "data_security_mode": "USER_ISOLATION", - "instance_pool_id": "$TEST_INSTANCE_POOL_ID", + "instance_pool_id": "my-instance-pool", "num_workers": 1, - "spark_version": "$DEFAULT_SPARK_VERSION" + "spark_version": "15.4.x-scala2.12" }, "python_wheel_task": { "entry_point": "run", @@ -25,9 +25,9 @@ { "new_cluster": { "data_security_mode": "USER_ISOLATION", - "instance_pool_id": "$TEST_INSTANCE_POOL_ID", + "instance_pool_id": "my-instance-pool", "num_workers": 1, - "spark_version": "$DEFAULT_SPARK_VERSION" + "spark_version": "15.4.x-scala2.12" }, "python_wheel_task": { "entry_point": "run", @@ -52,9 +52,9 @@ Deployment complete! { "new_cluster": { "data_security_mode": "USER_ISOLATION", - "instance_pool_id": "$TEST_INSTANCE_POOL_ID", + "instance_pool_id": "my-instance-pool", "num_workers": 1, - "spark_version": "$DEFAULT_SPARK_VERSION" + "spark_version": "15.4.x-scala2.12" }, "python_wheel_task": { "entry_point": "run", diff --git a/libs/testserver/server.go b/libs/testserver/server.go index eb0268d694f..b5730ca35d2 100644 --- a/libs/testserver/server.go +++ b/libs/testserver/server.go @@ -339,6 +339,17 @@ func (s *Server) serve(w http.ResponseWriter, r *http.Request, handler HandlerFu StatusCode: 500, Body: []byte("INJECTED"), } + } else if token, found := detectUnexpandedVar(request.URL.Path, request.URL.RawQuery, request.Body); found { + s.t.Errorf("Unexpanded variable %q reached the test server in %s %s. "+ + "A shell/test variable was sent to the API without being expanded "+ + "(likely left inside single quotes in the script). Expand it, or if "+ + "intentional add it to allowedDollarTokens in libs/testserver.", + token, request.Method, request.URL.Path) + resp = EncodedResponse{ + StatusCode: 400, + Body: fmt.Appendf(nil, "unexpanded variable %q in request", token), + Headers: getJsonHeaders(), + } } else { respAny := handler(request) if respAny == nil && request.Context.Err() != nil { diff --git a/libs/testserver/varguard.go b/libs/testserver/varguard.go new file mode 100644 index 00000000000..8b8ffb4c2ea --- /dev/null +++ b/libs/testserver/varguard.go @@ -0,0 +1,59 @@ +package testserver + +import ( + "regexp" + "strings" +) + +// unexpandedVarRegex matches shell-style variable references whose name is all +// uppercase: braced `${FOO}` or bare `$FOO`. Acceptance scripts inject env/test +// variables by this convention (UNIQUE_NAME, CURRENT_USER_NAME, JOB_ID, ...). +// Restricting to uppercase names deliberately ignores DABs interpolation, which +// is always lowercase and namespaced (${resources.jobs.foo.id}, ${var.catalog}, +// ${bundle.target}); those legitimately reach the API unresolved in some tests. +// The leading [A-Z_] also excludes non-variable dollar usage like $1, $5, $$. +var unexpandedVarRegex = regexp.MustCompile(`\$\{[A-Z_][A-Z0-9_]*\}|\$[A-Z_][A-Z0-9_]*`) + +// allowedDollarTokens are the exact `${FOO}`/`$FOO` tokens known to be +// legitimate in API request payloads. The guard rejects everything else, so +// this list is the deliberate, reviewed escape hatch for intentional cases. It +// is empty today; add entries here (with justification) only when a payload +// genuinely needs an uppercase variable-shaped string. +var allowedDollarTokens = map[string]struct{}{} + +// exemptPathPrefixes are request paths whose bodies must not be inspected: +// - file-content uploads carry opaque bytes (notebooks, source files, secret +// values) that may contain variable-shaped text; +// - telemetry serializes the bundle config verbatim, including unresolved +// interpolation and arbitrary user strings, so it is logging, not a payload. +var exemptPathPrefixes = []string{ + "/api/2.0/workspace/import", + "/api/2.0/workspace-files/import-file/", + "/api/2.0/fs/files/", + "/api/2.0/dbfs/", + "/api/2.0/secrets/put", + "/telemetry-ext", +} + +// detectUnexpandedVar reports the first uppercase shell/test variable reference +// that reached the test server unexpanded. Acceptance scripts interpolate +// variables like ${UNIQUE_NAME} into API payloads; if one is left inside single +// quotes it is sent literally, which only fails intermittently on cloud (see PR +// #5376). Catching it locally turns that class of bug into a deterministic +// failure. +func detectUnexpandedVar(path, rawQuery string, body []byte) (string, bool) { + for _, prefix := range exemptPathPrefixes { + if strings.HasPrefix(path, prefix) { + return "", false + } + } + + for _, s := range []string{path, rawQuery, string(body)} { + for _, match := range unexpandedVarRegex.FindAllString(s, -1) { + if _, ok := allowedDollarTokens[match]; !ok { + return match, true + } + } + } + return "", false +} diff --git a/libs/testserver/varguard_test.go b/libs/testserver/varguard_test.go new file mode 100644 index 00000000000..7a90f2dea4e --- /dev/null +++ b/libs/testserver/varguard_test.go @@ -0,0 +1,106 @@ +package testserver + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDetectUnexpandedVar(t *testing.T) { + tests := []struct { + name string + path string + rawQuery string + body string + want string + found bool + }{ + { + name: "braced uppercase var in body", + path: "/api/2.0/pipelines", + body: `{"name": "test-pipeline-${UNIQUE_NAME}"}`, + want: "${UNIQUE_NAME}", + found: true, + }, + { + name: "bare uppercase var in body", + path: "/api/2.0/pipelines", + body: `{"name": "test-pipeline-$UNIQUE_NAME"}`, + want: "$UNIQUE_NAME", + found: true, + }, + { + name: "uppercase var in path", + path: "/api/2.1/jobs/${JOB_ID}/get", + found: true, + want: "${JOB_ID}", + }, + { + name: "uppercase var in query", + path: "/api/2.1/jobs/get", + rawQuery: "job_id=$JOB_ID", + found: true, + want: "$JOB_ID", + }, + { + name: "DABs interpolation is ignored (lowercase, namespaced)", + path: "/api/2.1/unity-catalog/volumes", + body: `{"catalog_name": "${resources.volumes.bar.bad..syntax}"}`, + found: false, + }, + { + name: "lowercase bare var is ignored", + path: "/api/2.0/pipelines", + body: `{"x": "$job_id"}`, + found: false, + }, + { + name: "clean body", + path: "/api/2.0/pipelines", + body: `{"name": "test-pipeline-abc123"}`, + found: false, + }, + { + name: "dollar amounts are not variables", + path: "/api/2.0/pipelines", + body: `{"price": "$5", "positional": "$1", "literal": "$$"}`, + found: false, + }, + { + name: "file content endpoint is exempt", + path: "/api/2.0/workspace/import", + body: `{"content": "echo $PATH"}`, + found: false, + }, + { + name: "import-file endpoint is exempt", + path: "/api/2.0/workspace-files/import-file/Workspace/Users/x/notebook.py", + body: "print('${NOT_A_BUG}')", + found: false, + }, + { + name: "telemetry endpoint is exempt", + path: "/telemetry-ext", + body: `{"logs": ["name: test-${UNIQUE_NAME}"]}`, + found: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, found := detectUnexpandedVar(tt.path, tt.rawQuery, []byte(tt.body)) + assert.Equal(t, tt.found, found) + if tt.found { + assert.Equal(t, tt.want, got) + } + }) + } +} + +func TestDetectUnexpandedVarAllowlist(t *testing.T) { + allowedDollarTokens["${ALLOWED_EXAMPLE}"] = struct{}{} + t.Cleanup(func() { delete(allowedDollarTokens, "${ALLOWED_EXAMPLE}") }) + + _, found := detectUnexpandedVar("/api/2.0/pipelines", "", []byte(`{"x": "${ALLOWED_EXAMPLE}"}`)) + assert.False(t, found) +}