From aa8bd2470dbb16f58628d03470ff49e96ad71e65 Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Fri, 15 May 2026 09:21:01 +0000 Subject: [PATCH] feat(backend/kernel): wire TSparkParameter through to kernel bind_param MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lifts the NotSupportedError that execute_command currently raises for parametrized queries. The kernel-side PyO3 binding for Statement.bind_param landed in databricks-sql-kernel#18; this commit wires the connector's TSparkParameter shape through to it. Implementation: - New `bind_tspark_params(kernel_stmt, parameters)` in type_mapping.py forwards each TSparkParameter to the kernel as `(ordinal, value.stringValue, type)`. ordinal is the 1-based position in the parameters list; the connector's `ordinal: bool` flag is checked only to reject named bindings (kernel v0 doesn't accept them on the wire). - execute_command no longer raises on `parameters=[...]`. The query_tags branch stays — that's a separate gap. Tests: - 6 new unit tests in tests/unit/test_kernel_type_mapping.py for the mapper: - positional forwarding preserves ordering and (ordinal, value, type) - None value forwards as SQL NULL - VOID passes through verbatim (kernel parser ignores value for VOID) - named bindings raise NotSupportedError with a pointed message - missing TSparkParameter.type defaults to STRING (defensive) - empty parameters list is a no-op - 3 new e2e tests in tests/e2e/test_kernel_backend.py against dogfood: - mixed-type round-trip (INT, STRING, BOOLEAN) via the connector's native IntegerParameter/StringParameter/BooleanParameter - None parameter (VoidParameter → SQL NULL) - DECIMAL parameter with precision/scale carried in the SQL type string (auto-inferred — explicit-arg path has a pre-existing bug in native.py where format-args are swapped) Unit pass: 45/45 (was 39). Full unit suite: 625 pass. Live e2e: 14/14 (was 11). Co-authored-by: Isaac Signed-off-by: Vikrant Puppala --- src/databricks/sql/backend/kernel/client.py | 16 ++- .../sql/backend/kernel/type_mapping.py | 66 ++++++++++++- tests/e2e/test_kernel_backend.py | 66 +++++++++++++ tests/unit/test_kernel_type_mapping.py | 99 +++++++++++++++++++ 4 files changed, 233 insertions(+), 14 deletions(-) diff --git a/src/databricks/sql/backend/kernel/client.py b/src/databricks/sql/backend/kernel/client.py index 6d62e986a..a54501edf 100644 --- a/src/databricks/sql/backend/kernel/client.py +++ b/src/databricks/sql/backend/kernel/client.py @@ -14,10 +14,6 @@ Phase 1 gaps documented in the integration design: -- Parameter binding (``parameters=[TSparkParameter, ...]``) is not - yet supported — the PyO3 ``Statement`` doesn't expose - ``bind_param``. ``execute_command(parameters=[...])`` raises - ``NotSupportedError``. - ``query_tags`` on execute is not supported (kernel exposes ``statement_conf`` but PyO3 doesn't surface it). - ``get_tables`` with a non-empty ``table_types`` filter applies @@ -262,11 +258,6 @@ def execute_command( ) -> Union["ResultSet", None]: if self._kernel_session is None: raise InterfaceError("Cannot execute_command without an open session.") - if parameters: - raise NotSupportedError( - "Parameter binding is not yet supported on the kernel backend " - "(PyO3 Statement.bind_param lands in a follow-up PR)." - ) if query_tags: raise NotSupportedError( "Statement-level query_tags are not yet supported on the kernel backend." @@ -275,6 +266,13 @@ def execute_command( stmt = self._kernel_session.statement() try: stmt.set_sql(operation) + if parameters: + # Lazy import — type_mapping touches pyarrow at + # module load; keep `execute_command` callable from + # contexts that don't yet need it. + from databricks.sql.backend.kernel.type_mapping import bind_tspark_params + + bind_tspark_params(stmt, parameters) if async_op: async_exec = stmt.submit() command_id = CommandId.from_sea_statement_id(async_exec.statement_id) diff --git a/src/databricks/sql/backend/kernel/type_mapping.py b/src/databricks/sql/backend/kernel/type_mapping.py index a91160d17..ce5d2939f 100644 --- a/src/databricks/sql/backend/kernel/type_mapping.py +++ b/src/databricks/sql/backend/kernel/type_mapping.py @@ -6,18 +6,20 @@ flattens the conversion so ``KernelResultSet`` and any future kernel-result wrapper share the same mapping. -Parameter binding (``TSparkParameter`` → kernel ``TypedValue``) is -not yet implemented — the PyO3 ``Statement`` doesn't expose a -``bind_param`` method on this branch. It'll land in a follow-up -once that PyO3 surface ships. +Parameter binding (``TSparkParameter`` → kernel +``Statement.bind_param``) is handled by ``bind_tspark_params`` — +forwards the connector's already-string-encoded form to the kernel +binding without an intermediate Python-typed round-trip. """ from __future__ import annotations -from typing import List, Tuple +from typing import Any, List, Tuple import pyarrow +from databricks.sql.thrift_api.TCLIService import ttypes + def _arrow_type_to_dbapi_string(arrow_type: pyarrow.DataType) -> str: """Map a pyarrow type to the Databricks SQL type name used in @@ -77,3 +79,57 @@ def description_from_arrow_schema(schema: pyarrow.Schema) -> List[Tuple]: ) for field in schema ] + + +def _tspark_param_value_str(param: ttypes.TSparkParameter) -> Any: + """Extract the string-encoded value from a ``TSparkParameter``, + or ``None`` for SQL NULL. + + Native parameters (``IntegerParameter`` etc.) always wrap their + value in ``TSparkParameterValue(stringValue=str(self.value))``; + ``VoidParameter`` sets ``stringValue="None"`` but the type is + ``"VOID"`` — the kernel-side parser ignores the value when the + type is VOID, so we don't have to special-case here. + """ + if param.value is None: + return None + return param.value.stringValue + + +def bind_tspark_params( + kernel_stmt, parameters: List[ttypes.TSparkParameter] +) -> None: + """Bind a list of ``TSparkParameter`` onto a kernel ``Statement``. + + The kernel expects positional bindings only (SEA v0 doesn't + accept named bindings on the wire). The connector's + ``TSparkParameter`` has an ``ordinal: bool`` flag; ``True`` means + "treat as positional in source-list order". Native bindings + almost always come through positional today; named-binding + parameters surface as ``NotSupportedError`` so the user gets a + clear message instead of a server-side rejection. + + Compound types (``ARRAY`` / ``MAP`` / ``STRUCT``) are routed + through the kernel parser which currently rejects them — same + user-visible message ("compound parameter types … are not yet + supported"). Tracked as a follow-up. + """ + for i, param in enumerate(parameters, start=1): + # The connector's `ordinal` field is a bool (True/False) on + # native params and indicates positional vs named. Named + # params can't flow through the kernel today; raise early + # rather than letting the server reject. + if getattr(param, "ordinal", None) is False and getattr(param, "name", None): + from databricks.sql.exc import NotSupportedError + + raise NotSupportedError( + f"Named parameter binding (got name={param.name!r}) is not yet " + "supported on the kernel backend; pass parameters positionally." + ) + + sql_type = param.type or "STRING" + value_str = _tspark_param_value_str(param) + # The kernel takes 1-based ordinals; `i` is already that. + # Errors from the kernel side (bad literal, unsupported type, + # etc.) come up as KernelError and bubble through normally. + kernel_stmt.bind_param(i, value_str, sql_type) diff --git a/tests/e2e/test_kernel_backend.py b/tests/e2e/test_kernel_backend.py index 19fa5072f..c4c0425f5 100644 --- a/tests/e2e/test_kernel_backend.py +++ b/tests/e2e/test_kernel_backend.py @@ -184,3 +184,69 @@ def test_bad_sql_surfaces_as_databaseerror(conn): # Structured fields copied off the kernel exception: assert getattr(err, "code", None) == "SqlError" assert getattr(err, "sql_state", None) == "42P01" + + +# ── Parameter binding ───────────────────────────────────────────── + + +def test_parameterized_query_round_trips(conn): + """Positional parameter binding via the kernel backend. The + connector's native parameter classes (IntegerParameter etc.) + serialize to TSparkParameter under the hood; the kernel + backend's mapper forwards them positionally to the kernel. + """ + from databricks.sql.parameters.native import ( + IntegerParameter, + StringParameter, + BooleanParameter, + ) + + with conn.cursor() as cur: + cur.execute( + "SELECT ? AS i, ? AS s, ? AS b", + [ + IntegerParameter(42), + StringParameter("alice"), + BooleanParameter(True), + ], + ) + rows = cur.fetchall() + assert len(rows) == 1 + assert rows[0][0] == 42 + assert rows[0][1] == "alice" + assert rows[0][2] is True + + +def test_parameterized_query_with_null(conn): + """`None` in the parameter list flows through as VoidParameter + → kernel TypedValue::Null.""" + with conn.cursor() as cur: + cur.execute("SELECT ? IS NULL AS is_null", [None]) + rows = cur.fetchall() + assert rows[0][0] is True + + +def test_parameterized_query_decimal(conn): + """DECIMAL parameters carry precision/scale in the SQL type + string ('DECIMAL(p,s)') — the kernel parser extracts them so + fractional digits survive the wire. + + Uses the connector's auto-inference path + (`calculate_decimal_cast_string`) to derive precision/scale + from the value; the explicit-arg path + (`DecimalParameter(v, scale=, precision=)`) has a pre-existing + bug in this branch where the format-args are passed + `(scale, precision)` instead of `(precision, scale)` — out of + scope for this PR. + """ + import decimal + from databricks.sql.parameters.native import DecimalParameter + + with conn.cursor() as cur: + cur.execute( + "SELECT ? AS d", + [DecimalParameter(decimal.Decimal("-123.45"))], + ) + rows = cur.fetchall() + # Server echoes back as decimal.Decimal. + assert str(rows[0][0]) == "-123.45" diff --git a/tests/unit/test_kernel_type_mapping.py b/tests/unit/test_kernel_type_mapping.py index 5ab5bde74..3ebf45001 100644 --- a/tests/unit/test_kernel_type_mapping.py +++ b/tests/unit/test_kernel_type_mapping.py @@ -71,3 +71,102 @@ def test_description_from_schema_preserves_field_names_and_order(): for d in desc: assert len(d) == 7 assert d[2:] == (None, None, None, None, None) + + +# ─── bind_tspark_params ────────────────────────────────────────────────── + + +def _mk_param(*, type, value, ordinal=True, name=None): + """Build a minimal TSparkParameter for tests.""" + from databricks.sql.thrift_api.TCLIService import ttypes + + p = ttypes.TSparkParameter(ordinal=ordinal, name=name, type=type) + p.value = ttypes.TSparkParameterValue(stringValue=value) if value is not None else None + return p + + +class _RecordingStmt: + """Stand-in for the kernel `Statement` pyclass — records every + `bind_param` call so tests can assert the (ordinal, value, type) + triples the mapper forwarded.""" + + def __init__(self): + self.calls = [] + + def bind_param(self, ordinal, value_str, sql_type): + self.calls.append((ordinal, value_str, sql_type)) + + +def test_bind_tspark_params_forwards_each_param_positionally(): + from databricks.sql.backend.kernel.type_mapping import bind_tspark_params + + params = [ + _mk_param(type="INT", value="42"), + _mk_param(type="STRING", value="alice"), + _mk_param(type="DATE", value="2026-05-15"), + ] + stmt = _RecordingStmt() + bind_tspark_params(stmt, params) + assert stmt.calls == [ + (1, "42", "INT"), + (2, "alice", "STRING"), + (3, "2026-05-15", "DATE"), + ] + + +def test_bind_tspark_params_null_value(): + """TSparkParameter with value=None → kernel sees value_str=None, + interpreted as SQL NULL regardless of the SQL type.""" + from databricks.sql.backend.kernel.type_mapping import bind_tspark_params + + p = _mk_param(type="STRING", value=None) + stmt = _RecordingStmt() + bind_tspark_params(stmt, [p]) + assert stmt.calls == [(1, None, "STRING")] + + +def test_bind_tspark_params_void_passes_through(): + """VoidParameter sets type='VOID' with stringValue='None'; the + kernel parser ignores the value when type=VOID.""" + from databricks.sql.backend.kernel.type_mapping import bind_tspark_params + + p = _mk_param(type="VOID", value="None") + stmt = _RecordingStmt() + bind_tspark_params(stmt, [p]) + assert stmt.calls == [(1, "None", "VOID")] + + +def test_bind_tspark_params_named_param_rejected(): + """The kernel doesn't accept named bindings on the SEA wire; + surface that at the connector layer so the user gets a pointed + error instead of a server-side rejection.""" + from databricks.sql.backend.kernel.type_mapping import bind_tspark_params + from databricks.sql.exc import NotSupportedError + + p = _mk_param(type="INT", value="42", ordinal=False, name="my_param") + stmt = _RecordingStmt() + with pytest.raises(NotSupportedError, match="(?i)named"): + bind_tspark_params(stmt, [p]) + # Nothing should have been forwarded before the rejection. + assert stmt.calls == [] + + +def test_bind_tspark_params_missing_type_defaults_to_string(): + """Defensive: a TSparkParameter with no `type` shouldn't crash + the mapper — fall back to STRING and let the kernel parse.""" + from databricks.sql.backend.kernel.type_mapping import bind_tspark_params + from databricks.sql.thrift_api.TCLIService import ttypes + + p = ttypes.TSparkParameter(ordinal=True, name=None, type=None) + p.value = ttypes.TSparkParameterValue(stringValue="hello") + stmt = _RecordingStmt() + bind_tspark_params(stmt, [p]) + assert stmt.calls == [(1, "hello", "STRING")] + + +def test_bind_tspark_params_empty_list_is_noop(): + from databricks.sql.backend.kernel.type_mapping import bind_tspark_params + + stmt = _RecordingStmt() + bind_tspark_params(stmt, []) + assert stmt.calls == []