Skip to content

Commit b35a5bf

Browse files
committed
Validate SPOG org id extraction
Signed-off-by: Madhavendra Rathore <madhavendra.rathore@databricks.com>
1 parent 3a66597 commit b35a5bf

2 files changed

Lines changed: 45 additions & 12 deletions

File tree

src/databricks/sql/session.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,8 @@ def _create_backend(
173173

174174
# All-purpose-compute Thrift http_path:
175175
# [/]sql/protocolv1/o/<workspace-id>/<cluster-id>[/...][?...]
176-
_CLUSTER_PATH_ORG_ID_RE = re.compile(r"(?:^|/)sql/protocolv1/o/(\d+)/[^/?]+")
176+
_ORG_ID_RE = re.compile(r"^[0-9]+$")
177+
_CLUSTER_PATH_ORG_ID_RE = re.compile(r"^/?sql/protocolv1/o/([0-9]+)/[^/?]+")
177178

178179
@staticmethod
179180
def _extract_spog_headers(http_path, existing_headers):
@@ -214,12 +215,12 @@ def _extract_spog_headers(http_path, existing_headers):
214215
query_string = http_path.split("?", 1)[1]
215216
params = parse_qs(query_string)
216217
value = params.get("o", [None])[0]
217-
if value:
218+
if value and Session._ORG_ID_RE.fullmatch(value):
218219
org_id = value
219220
source = "?o= in http_path"
220221

221222
if org_id is None:
222-
cluster_match = Session._CLUSTER_PATH_ORG_ID_RE.search(http_path)
223+
cluster_match = Session._CLUSTER_PATH_ORG_ID_RE.match(http_path)
223224
if cluster_match:
224225
org_id = cluster_match.group(1)
225226
source = "cluster path segment"

tests/unit/test_session.py

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,10 @@ def test_socket_timeout_passthrough(self, mock_client_class):
162162

163163
@patch("%s.session.ThriftDatabricksClient" % PACKAGE_NAME)
164164
def test_configuration_passthrough(self, mock_client_class):
165-
mock_session_config = {"ANSI_MODE": "FALSE", "QUERY_TAGS": "team:engineering,project:data-pipeline"}
165+
mock_session_config = {
166+
"ANSI_MODE": "FALSE",
167+
"QUERY_TAGS": "team:engineering,project:data-pipeline",
168+
}
166169
databricks.sql.connect(
167170
session_configuration=mock_session_config, **self.DUMMY_CONNECTION_ARGS
168171
)
@@ -218,10 +221,15 @@ def test_query_tags_dict_sets_session_config(self, mock_client_class):
218221
)
219222

220223
call_kwargs = mock_client_class.return_value.open_session.call_args[1]
221-
assert call_kwargs["session_configuration"]["QUERY_TAGS"] == "team:data-eng,project:etl"
224+
assert (
225+
call_kwargs["session_configuration"]["QUERY_TAGS"]
226+
== "team:data-eng,project:etl"
227+
)
222228

223229
@patch("%s.session.ThriftDatabricksClient" % PACKAGE_NAME)
224-
def test_query_tags_dict_takes_precedence_over_session_config(self, mock_client_class):
230+
def test_query_tags_dict_takes_precedence_over_session_config(
231+
self, mock_client_class
232+
):
225233
databricks.sql.connect(
226234
query_tags={"team": "new-team"},
227235
session_configuration={"QUERY_TAGS": "team:old-team,other:value"},
@@ -242,9 +250,7 @@ def test_extracts_org_id_from_query_param(self):
242250
assert result == {"x-databricks-org-id": "6051921418418893"}
243251

244252
def test_no_query_param_returns_empty(self):
245-
result = Session._extract_spog_headers(
246-
"/sql/1.0/warehouses/abc123", []
247-
)
253+
result = Session._extract_spog_headers("/sql/1.0/warehouses/abc123", [])
248254
assert result == {}
249255

250256
def test_no_o_param_returns_empty(self):
@@ -281,6 +287,24 @@ def test_multiple_query_params(self):
281287
)
282288
assert result == {"x-databricks-org-id": "12345"}
283289

290+
def test_non_numeric_query_param_returns_empty(self):
291+
result = Session._extract_spog_headers(
292+
"/sql/1.0/warehouses/abc123?o=abc123", []
293+
)
294+
assert result == {}
295+
296+
def test_control_char_query_param_returns_empty(self):
297+
result = Session._extract_spog_headers(
298+
"/sql/1.0/warehouses/abc123?o=123%0D%0AX-Injected:%20yes", []
299+
)
300+
assert result == {}
301+
302+
def test_empty_query_param_returns_empty(self):
303+
result = Session._extract_spog_headers(
304+
"/sql/1.0/warehouses/abc123?o=", []
305+
)
306+
assert result == {}
307+
284308
def test_extracts_org_id_from_cluster_path_segment(self):
285309
# All-purpose-compute path embeds workspace ID in /o/<wsid>/<cluster>.
286310
# Without ?o=, the driver must still set x-databricks-org-id so that
@@ -311,10 +335,18 @@ def test_explicit_header_wins_over_cluster_path_segment(self):
311335
)
312336
assert result == {}
313337

338+
def test_nested_cluster_path_prefix_returns_empty(self):
339+
result = Session._extract_spog_headers(
340+
"evil/sql/protocolv1/o/999/0528-220959-uzmcn1qt", []
341+
)
342+
assert result == {}
343+
344+
def test_incomplete_cluster_path_returns_empty(self):
345+
result = Session._extract_spog_headers("sql/protocolv1/o/999/", [])
346+
assert result == {}
347+
314348
def test_warehouse_path_without_query_param_returns_empty(self):
315349
# Regression guard: the new cluster-path regex must not accidentally
316350
# match warehouse paths (which never embed the workspace ID).
317-
result = Session._extract_spog_headers(
318-
"/sql/1.0/warehouses/abc123", []
319-
)
351+
result = Session._extract_spog_headers("/sql/1.0/warehouses/abc123", [])
320352
assert result == {}

0 commit comments

Comments
 (0)