Skip to content

Commit 5bbbc90

Browse files
committed
perf: reorder RESULT_KIND dispatch and replace getattr with direct access
Two micro-optimizations in _set_result() hot path: 1. Reorder the RESULT_KIND if/elif chain to check ROWS first (was third), since it is by far the most common result type. VOID is second. SET_KEYSPACE and SCHEMA_CHANGE (rare) are now last. 2. Add continuous_paging_options = None class attribute to _QueryMessage, allowing direct attribute access instead of getattr(self.message, 'continuous_paging_options', None). Benchmark (2M iters, Python 3.14): RESULT_KIND reorder: 35.5 -> 24.3 ns (1.46x, -11.2 ns/dispatch) getattr -> direct: 32.0 -> 18.3 ns (1.75x, -13.7 ns/access) Combined: ~25 ns saved per query
1 parent 580b455 commit 5bbbc90

3 files changed

Lines changed: 140 additions & 20 deletions

File tree

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# Copyright ScyllaDB, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
Micro-benchmark: RESULT_KIND dispatch ordering and getattr vs direct access.
17+
18+
Measures the cost difference between:
19+
1. Checking RESULT_KIND_ROWS first vs third in the if/elif chain
20+
2. getattr(msg, 'continuous_paging_options', None) vs msg.continuous_paging_options
21+
22+
Run:
23+
python benchmarks/bench_result_kind_dispatch.py
24+
"""
25+
26+
import sys
27+
import timeit
28+
29+
30+
def bench():
31+
n = 2_000_000
32+
33+
# Simulate the result kind values
34+
RESULT_KIND_SET_KEYSPACE = 0x0003
35+
RESULT_KIND_SCHEMA_CHANGE = 0x0005
36+
RESULT_KIND_ROWS = 0x0002
37+
RESULT_KIND_VOID = 0x0001
38+
39+
kind = RESULT_KIND_ROWS # the common case
40+
41+
# Old order: SET_KEYSPACE, SCHEMA_CHANGE, ROWS, VOID
42+
def old_dispatch():
43+
if kind == RESULT_KIND_SET_KEYSPACE:
44+
return 'set_keyspace'
45+
elif kind == RESULT_KIND_SCHEMA_CHANGE:
46+
return 'schema_change'
47+
elif kind == RESULT_KIND_ROWS:
48+
return 'rows'
49+
elif kind == RESULT_KIND_VOID:
50+
return 'void'
51+
52+
# New order: ROWS, VOID, SET_KEYSPACE, SCHEMA_CHANGE
53+
def new_dispatch():
54+
if kind == RESULT_KIND_ROWS:
55+
return 'rows'
56+
elif kind == RESULT_KIND_VOID:
57+
return 'void'
58+
elif kind == RESULT_KIND_SET_KEYSPACE:
59+
return 'set_keyspace'
60+
elif kind == RESULT_KIND_SCHEMA_CHANGE:
61+
return 'schema_change'
62+
63+
print(f"=== RESULT_KIND dispatch order ({n:,} iters) ===\n")
64+
65+
# Warmup
66+
for _ in range(10000):
67+
old_dispatch()
68+
new_dispatch()
69+
70+
t_old = timeit.timeit(old_dispatch, number=n)
71+
t_new = timeit.timeit(new_dispatch, number=n)
72+
ns_old = t_old / n * 1e9
73+
ns_new = t_new / n * 1e9
74+
saving = ns_old - ns_new
75+
speedup = ns_old / ns_new if ns_new > 0 else float('inf')
76+
print(f" Old (ROWS=3rd): {ns_old:.1f} ns")
77+
print(f" New (ROWS=1st): {ns_new:.1f} ns")
78+
print(f" Saving: {saving:.1f} ns ({speedup:.2f}x)")
79+
80+
# getattr vs direct attribute access
81+
print(f"\n=== getattr vs direct attribute access ({n:,} iters) ===\n")
82+
83+
class OldMsg:
84+
pass
85+
86+
class NewMsg:
87+
continuous_paging_options = None
88+
89+
old_msg = OldMsg()
90+
new_msg = NewMsg()
91+
92+
def old_getattr():
93+
return getattr(old_msg, 'continuous_paging_options', None)
94+
95+
def new_direct():
96+
return new_msg.continuous_paging_options
97+
98+
for _ in range(10000):
99+
old_getattr()
100+
new_direct()
101+
102+
t_old = timeit.timeit(old_getattr, number=n)
103+
t_new = timeit.timeit(new_direct, number=n)
104+
ns_old = t_old / n * 1e9
105+
ns_new = t_new / n * 1e9
106+
saving = ns_old - ns_new
107+
speedup = ns_old / ns_new if ns_new > 0 else float('inf')
108+
print(f" getattr(msg, 'continuous_paging_options', None): {ns_old:.1f} ns")
109+
print(f" msg.continuous_paging_options: {ns_new:.1f} ns")
110+
print(f" Saving: {saving:.1f} ns ({speedup:.2f}x)")
111+
112+
113+
if __name__ == "__main__":
114+
print(f"Python {sys.version}\n")
115+
bench()

cassandra/cluster.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4751,7 +4751,26 @@ def _set_result(self, host, connection, pool, response):
47514751
if tablet is not None:
47524752
self.session.cluster.metadata._tablets.add_tablet(self.query.keyspace, self.query.table, tablet)
47534753

4754-
if response.kind == RESULT_KIND_SET_KEYSPACE:
4754+
if response.kind == RESULT_KIND_ROWS:
4755+
self._paging_state = response.paging_state
4756+
# Use pre-cached column names/types from PreparedStatement
4757+
# when available to avoid rebuilding lists from metadata.
4758+
ps = self.prepared_statement
4759+
if ps is not None and ps._result_col_names is not None:
4760+
col_names = ps._result_col_names
4761+
col_types = ps._result_col_types
4762+
else:
4763+
col_names = response.column_names
4764+
col_types = response.column_types
4765+
self._col_names = col_names
4766+
self._col_types = col_types
4767+
if self.message.continuous_paging_options:
4768+
self._handle_continuous_paging_first_response(connection, response)
4769+
else:
4770+
self._set_final_result(self.row_factory(col_names, response.parsed_rows))
4771+
elif response.kind == RESULT_KIND_VOID:
4772+
self._set_final_result(None)
4773+
elif response.kind == RESULT_KIND_SET_KEYSPACE:
47554774
session = getattr(self, 'session', None)
47564775
# since we're running on the event loop thread, we need to
47574776
# use a non-blocking method for setting the keyspace on
@@ -4771,25 +4790,6 @@ def _set_result(self, host, connection, pool, response):
47714790
refresh_schema_and_set_result,
47724791
self.session.cluster.control_connection,
47734792
self, connection, **response.schema_change_event)
4774-
elif response.kind == RESULT_KIND_ROWS:
4775-
self._paging_state = response.paging_state
4776-
# Use pre-cached column names/types from PreparedStatement
4777-
# when available to avoid rebuilding lists from metadata.
4778-
ps = self.prepared_statement
4779-
if ps is not None and ps._result_col_names is not None:
4780-
col_names = ps._result_col_names
4781-
col_types = ps._result_col_types
4782-
else:
4783-
col_names = response.column_names
4784-
col_types = response.column_types
4785-
self._col_names = col_names
4786-
self._col_types = col_types
4787-
if getattr(self.message, 'continuous_paging_options', None):
4788-
self._handle_continuous_paging_first_response(connection, response)
4789-
else:
4790-
self._set_final_result(self.row_factory(col_names, response.parsed_rows))
4791-
elif response.kind == RESULT_KIND_VOID:
4792-
self._set_final_result(None)
47934793
else:
47944794
self._set_final_result(response)
47954795
elif isinstance(response, ErrorMessage):

cassandra/protocol.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,11 @@ def recv_body(cls, f, *args):
544544

545545
class _QueryMessage(_MessageType):
546546

547+
# DSE continuous paging: stored when the feature is active, otherwise None.
548+
# Declared as a class attribute so that callers can use direct attribute
549+
# access instead of getattr(msg, 'continuous_paging_options', None).
550+
continuous_paging_options = None
551+
547552
def __init__(self, query_params, consistency_level,
548553
serial_consistency_level=None, fetch_size=None,
549554
paging_state=None, timestamp=None, skip_meta=False,

0 commit comments

Comments
 (0)