From 4943e219c6f68af4812e1792623c305d6a5edcea Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Fri, 8 May 2026 17:02:00 +0300 Subject: [PATCH 1/2] [mypyc] Add `librt.strings.isalnum` codepoint primitive Wraps `Py_UNICODE_ISALNUM` for the codepoint fast path, mirroring the already-merged `librt.strings.isspace` (#21462) and `isdigit` (#21504). Microbenchmark, both paths mypyc-compiled, scanning 2.5M codepoints per call: `s[i].isalnum()` runs at ~6.1 ns/codepoint; the codepoint path `c: i32 = i32(ord(s[i])); isalnum(c)` at ~4.8 ns/codepoint, roughly 1.3x faster. The gain is larger inside tokenizer-style loops that mix `isalnum` with literal-i32 compares (no per-character `str` materialization at all). --- mypy/typeshed/stubs/librt/librt/strings.pyi | 1 + mypyc/lib-rt/codepoint_extra_ops.h | 4 ++++ mypyc/lib-rt/strings/librt_strings.c | 4 ++++ mypyc/primitives/librt_strings_ops.py | 9 +++++++++ mypyc/test-data/irbuild-librt-strings.test | 14 ++++++++++++++ mypyc/test-data/run-librt-strings.test | 4 +++- 6 files changed, 35 insertions(+), 1 deletion(-) diff --git a/mypy/typeshed/stubs/librt/librt/strings.pyi b/mypy/typeshed/stubs/librt/librt/strings.pyi index 215fdf6fd56b..5ab1e978d465 100644 --- a/mypy/typeshed/stubs/librt/librt/strings.pyi +++ b/mypy/typeshed/stubs/librt/librt/strings.pyi @@ -45,3 +45,4 @@ def read_f64_be(b: bytes, index: i64, /) -> float: ... # obtained via ord(s[i])). Negative inputs return False. def isspace(c: i32, /) -> bool: ... def isdigit(c: i32, /) -> bool: ... +def isalnum(c: i32, /) -> bool: ... diff --git a/mypyc/lib-rt/codepoint_extra_ops.h b/mypyc/lib-rt/codepoint_extra_ops.h index 13e530e1d90f..a4f4c6880caf 100644 --- a/mypyc/lib-rt/codepoint_extra_ops.h +++ b/mypyc/lib-rt/codepoint_extra_ops.h @@ -17,4 +17,8 @@ static inline bool LibRTStrings_IsDigit(int32_t c) { return c >= 0 && Py_UNICODE_ISDIGIT((Py_UCS4)c); } +static inline bool LibRTStrings_IsAlnum(int32_t c) { + return c >= 0 && Py_UNICODE_ISALNUM((Py_UCS4)c); +} + #endif // MYPYC_CODEPOINT_EXTRA_OPS_H diff --git a/mypyc/lib-rt/strings/librt_strings.c b/mypyc/lib-rt/strings/librt_strings.c index 97a1c67f4623..ce15107f7e09 100644 --- a/mypyc/lib-rt/strings/librt_strings.c +++ b/mypyc/lib-rt/strings/librt_strings.c @@ -1192,6 +1192,7 @@ cp_parse_i32(PyObject *arg, int32_t *out) { DEFINE_CP_BOOL_WRAPPER(isspace, LibRTStrings_IsSpace) DEFINE_CP_BOOL_WRAPPER(isdigit, LibRTStrings_IsDigit) +DEFINE_CP_BOOL_WRAPPER(isalnum, LibRTStrings_IsAlnum) static PyMethodDef librt_strings_module_methods[] = { {"write_i16_le", (PyCFunction) write_i16_le, METH_FASTCALL, @@ -1260,6 +1261,9 @@ static PyMethodDef librt_strings_module_methods[] = { {"isdigit", cp_isdigit, METH_O, PyDoc_STR("Test whether a codepoint (i32) is a Unicode digit.") }, + {"isalnum", cp_isalnum, METH_O, + PyDoc_STR("Test whether a codepoint (i32) is alphanumeric.") + }, {NULL, NULL, 0, NULL} }; diff --git a/mypyc/primitives/librt_strings_ops.py b/mypyc/primitives/librt_strings_ops.py index 968aeb6014c4..0432cade7b9a 100644 --- a/mypyc/primitives/librt_strings_ops.py +++ b/mypyc/primitives/librt_strings_ops.py @@ -413,3 +413,12 @@ error_kind=ERR_NEVER, dependencies=[LIBRT_STRINGS, CODEPOINT_EXTRA_OPS], ) + +function_op( + name="librt.strings.isalnum", + arg_types=[int32_rprimitive], + return_type=bool_rprimitive, + c_function_name="LibRTStrings_IsAlnum", + error_kind=ERR_NEVER, + dependencies=[LIBRT_STRINGS, CODEPOINT_EXTRA_OPS], +) diff --git a/mypyc/test-data/irbuild-librt-strings.test b/mypyc/test-data/irbuild-librt-strings.test index bffa96dd5009..8b27f6a67256 100644 --- a/mypyc/test-data/irbuild-librt-strings.test +++ b/mypyc/test-data/irbuild-librt-strings.test @@ -359,3 +359,17 @@ def is_d(c): L0: r0 = LibRTStrings_IsDigit(c) return r0 + +[case testLibrtStringsIsAlnumIR] +from librt.strings import isalnum +from mypy_extensions import i32 + +def is_an(c: i32) -> bool: + return isalnum(c) +[out] +def is_an(c): + c :: i32 + r0 :: bool +L0: + r0 = LibRTStrings_IsAlnum(c) + return r0 diff --git a/mypyc/test-data/run-librt-strings.test b/mypyc/test-data/run-librt-strings.test index 211f88f72e08..d6285b373b79 100644 --- a/mypyc/test-data/run-librt-strings.test +++ b/mypyc/test-data/run-librt-strings.test @@ -1443,7 +1443,7 @@ def test_new_without_init_is_usable() -> None: [case testLibrtStringsCodepointClassifiers_librt] from typing import Any from mypy_extensions import i32 -from librt.strings import isspace, isdigit +from librt.strings import isspace, isdigit, isalnum def test_codepoint_classifiers() -> None: @@ -1451,6 +1451,7 @@ def test_codepoint_classifiers() -> None: for bad in (i32(-1), i32(-113)): assert not isspace(bad) assert not isdigit(bad) + assert not isalnum(bad) # Verify each codepoint primitive agrees with the matching str method # across all Unicode codepoints, including the ord(chr(i)) round-trip. # Any forces generic dispatch on the str side. @@ -1460,3 +1461,4 @@ def test_codepoint_classifiers() -> None: o = ord(c) assert isspace(o) == isspace(i) == a.isspace() assert isdigit(o) == isdigit(i) == a.isdigit() + assert isalnum(o) == isalnum(i) == a.isalnum() From 8683267ec3f882bf8cea219d1141ec5fda666454 Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Tue, 19 May 2026 15:30:57 +0300 Subject: [PATCH 2/2] [mypyc] Test librt.strings codepoint primitives via Any dispatch The existing run-test for the codepoint classifiers exercises only the compiled fast path: mypyc rewrites `isspace(c)` / `isdigit(c)` / `isalnum(c)` into direct calls to the underlying C symbols, so the PyMethodDef wrappers (`cp_isspace`, `cp_isdigit`, `cp_isalnum`) and their i32 range check never get exercised by the existing test. Iterate the librt functions in a tuple so the callee is opaque to mypyc and dispatch falls back to the generic path, hitting the PyMethodDef wrappers. Also assert the OverflowError raised by the wrappers' `cp_parse_i32` for inputs outside i32 range. --- mypyc/test-data/run-librt-strings.test | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/mypyc/test-data/run-librt-strings.test b/mypyc/test-data/run-librt-strings.test index d6285b373b79..141d830d3d0b 100644 --- a/mypyc/test-data/run-librt-strings.test +++ b/mypyc/test-data/run-librt-strings.test @@ -1445,6 +1445,8 @@ from typing import Any from mypy_extensions import i32 from librt.strings import isspace, isdigit, isalnum +from testutil import assertRaises + def test_codepoint_classifiers() -> None: # Negative values are not codepoints. @@ -1462,3 +1464,23 @@ def test_codepoint_classifiers() -> None: assert isspace(o) == isspace(i) == a.isspace() assert isdigit(o) == isdigit(i) == a.isdigit() assert isalnum(o) == isalnum(i) == a.isalnum() + + +def test_codepoint_classifiers_via_any() -> None: + # Iterate so the callee is opaque to mypyc and dispatch falls back to + # the PyMethodDef wrapper, exercising the i32 range check. + for fn, true_input, false_input in ( + (isspace, " ", "a"), + (isdigit, "5", "a"), + (isalnum, "A", " "), + ): + f: Any = fn + assert f(ord(true_input)) is True + assert f(ord(false_input)) is False + # Negative values are valid i32, just not codepoints. + assert f(-1) is False + # Inputs outside i32 range raise OverflowError through the wrapper. + with assertRaises(OverflowError, "codepoint out of i32 range"): + f(1 << 40) + with assertRaises(OverflowError, "codepoint out of i32 range"): + f(-(1 << 40))