From 9dbabcec6e71eba4772e42bb259f5549d9e31b6e Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Fri, 8 May 2026 17:02:55 +0300 Subject: [PATCH] [mypyc] Add `librt.strings.isalpha` codepoint primitive Wraps `Py_UNICODE_ISALPHA` for the codepoint fast path, mirroring the already-merged `librt.strings.isspace` (#21462), `isdigit` (#21504), and the in-flight `isalnum`. Microbenchmark, both paths mypyc-compiled, scanning 2.5M codepoints per call: `s[i].isalpha()` runs at ~17.8 ns/codepoint; the codepoint path `c: i32 = i32(ord(s[i])); isalpha(c)` at ~3.5 ns/codepoint, roughly 5x faster. (`str.isalpha()` walks the full Unicode category database per character; the codepoint path goes straight through the `Py_UNICODE_ISALPHA` macro with no `str` materialization.) --- mypy/typeshed/stubs/librt/librt/strings.pyi | 1 + mypyc/lib-rt/codepoint_extra_ops.h | 4 ++++ mypyc/lib-rt/strings/librt_strings.c | 4 ++++ mypyc/primitives/librt_strings_ops.py | 9 +++++++++ mypyc/test-data/irbuild-librt-strings.test | 14 ++++++++++++++ mypyc/test-data/run-librt-strings.test | 5 ++++- 6 files changed, 36 insertions(+), 1 deletion(-) diff --git a/mypy/typeshed/stubs/librt/librt/strings.pyi b/mypy/typeshed/stubs/librt/librt/strings.pyi index 5ab1e978d465..01aee3ff758d 100644 --- a/mypy/typeshed/stubs/librt/librt/strings.pyi +++ b/mypy/typeshed/stubs/librt/librt/strings.pyi @@ -46,3 +46,4 @@ def read_f64_be(b: bytes, index: i64, /) -> float: ... def isspace(c: i32, /) -> bool: ... def isdigit(c: i32, /) -> bool: ... def isalnum(c: i32, /) -> bool: ... +def isalpha(c: i32, /) -> bool: ... diff --git a/mypyc/lib-rt/codepoint_extra_ops.h b/mypyc/lib-rt/codepoint_extra_ops.h index a4f4c6880caf..bb83f92e4b87 100644 --- a/mypyc/lib-rt/codepoint_extra_ops.h +++ b/mypyc/lib-rt/codepoint_extra_ops.h @@ -21,4 +21,8 @@ static inline bool LibRTStrings_IsAlnum(int32_t c) { return c >= 0 && Py_UNICODE_ISALNUM((Py_UCS4)c); } +static inline bool LibRTStrings_IsAlpha(int32_t c) { + return c >= 0 && Py_UNICODE_ISALPHA((Py_UCS4)c); +} + #endif // MYPYC_CODEPOINT_EXTRA_OPS_H diff --git a/mypyc/lib-rt/strings/librt_strings.c b/mypyc/lib-rt/strings/librt_strings.c index ce15107f7e09..cbc3e5f753fa 100644 --- a/mypyc/lib-rt/strings/librt_strings.c +++ b/mypyc/lib-rt/strings/librt_strings.c @@ -1193,6 +1193,7 @@ cp_parse_i32(PyObject *arg, int32_t *out) { DEFINE_CP_BOOL_WRAPPER(isspace, LibRTStrings_IsSpace) DEFINE_CP_BOOL_WRAPPER(isdigit, LibRTStrings_IsDigit) DEFINE_CP_BOOL_WRAPPER(isalnum, LibRTStrings_IsAlnum) +DEFINE_CP_BOOL_WRAPPER(isalpha, LibRTStrings_IsAlpha) static PyMethodDef librt_strings_module_methods[] = { {"write_i16_le", (PyCFunction) write_i16_le, METH_FASTCALL, @@ -1264,6 +1265,9 @@ static PyMethodDef librt_strings_module_methods[] = { {"isalnum", cp_isalnum, METH_O, PyDoc_STR("Test whether a codepoint (i32) is alphanumeric.") }, + {"isalpha", cp_isalpha, METH_O, + PyDoc_STR("Test whether a codepoint (i32) is a Unicode letter.") + }, {NULL, NULL, 0, NULL} }; diff --git a/mypyc/primitives/librt_strings_ops.py b/mypyc/primitives/librt_strings_ops.py index 0432cade7b9a..93fa717cf529 100644 --- a/mypyc/primitives/librt_strings_ops.py +++ b/mypyc/primitives/librt_strings_ops.py @@ -422,3 +422,12 @@ error_kind=ERR_NEVER, dependencies=[LIBRT_STRINGS, CODEPOINT_EXTRA_OPS], ) + +function_op( + name="librt.strings.isalpha", + arg_types=[int32_rprimitive], + return_type=bool_rprimitive, + c_function_name="LibRTStrings_IsAlpha", + error_kind=ERR_NEVER, + dependencies=[LIBRT_STRINGS, CODEPOINT_EXTRA_OPS], +) diff --git a/mypyc/test-data/irbuild-librt-strings.test b/mypyc/test-data/irbuild-librt-strings.test index 8b27f6a67256..e5d18b6eb852 100644 --- a/mypyc/test-data/irbuild-librt-strings.test +++ b/mypyc/test-data/irbuild-librt-strings.test @@ -373,3 +373,17 @@ def is_an(c): L0: r0 = LibRTStrings_IsAlnum(c) return r0 + +[case testLibrtStringsIsAlphaIR] +from librt.strings import isalpha +from mypy_extensions import i32 + +def is_a(c: i32) -> bool: + return isalpha(c) +[out] +def is_a(c): + c :: i32 + r0 :: bool +L0: + r0 = LibRTStrings_IsAlpha(c) + return r0 diff --git a/mypyc/test-data/run-librt-strings.test b/mypyc/test-data/run-librt-strings.test index 141d830d3d0b..aa38c713d384 100644 --- a/mypyc/test-data/run-librt-strings.test +++ b/mypyc/test-data/run-librt-strings.test @@ -1443,7 +1443,7 @@ def test_new_without_init_is_usable() -> None: [case testLibrtStringsCodepointClassifiers_librt] from typing import Any from mypy_extensions import i32 -from librt.strings import isspace, isdigit, isalnum +from librt.strings import isspace, isdigit, isalnum, isalpha from testutil import assertRaises @@ -1454,6 +1454,7 @@ def test_codepoint_classifiers() -> None: assert not isspace(bad) assert not isdigit(bad) assert not isalnum(bad) + assert not isalpha(bad) # Verify each codepoint primitive agrees with the matching str method # across all Unicode codepoints, including the ord(chr(i)) round-trip. # Any forces generic dispatch on the str side. @@ -1464,6 +1465,7 @@ def test_codepoint_classifiers() -> None: assert isspace(o) == isspace(i) == a.isspace() assert isdigit(o) == isdigit(i) == a.isdigit() assert isalnum(o) == isalnum(i) == a.isalnum() + assert isalpha(o) == isalpha(i) == a.isalpha() def test_codepoint_classifiers_via_any() -> None: @@ -1473,6 +1475,7 @@ def test_codepoint_classifiers_via_any() -> None: (isspace, " ", "a"), (isdigit, "5", "a"), (isalnum, "A", " "), + (isalpha, "A", " "), ): f: Any = fn assert f(ord(true_input)) is True