diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h index cc8b8bafaf994..10f1448a2dde9 100644 --- a/mypyc/lib-rt/CPy.h +++ b/mypyc/lib-rt/CPy.h @@ -780,7 +780,7 @@ Py_ssize_t CPyStr_Count(PyObject *unicode, PyObject *substring, CPyTagged start) Py_ssize_t CPyStr_CountFull(PyObject *unicode, PyObject *substring, CPyTagged start, CPyTagged end); CPyTagged CPyStr_Ord(PyObject *obj); PyObject *CPyStr_Multiply(PyObject *str, CPyTagged count); - +bool CPyStr_IsSpace(PyObject *str); // Bytes operations diff --git a/mypyc/lib-rt/str_ops.c b/mypyc/lib-rt/str_ops.c index f91ace78a301d..a5a7ba4b980d7 100644 --- a/mypyc/lib-rt/str_ops.c +++ b/mypyc/lib-rt/str_ops.c @@ -630,3 +630,27 @@ PyObject *CPyStr_Multiply(PyObject *str, CPyTagged count) { } return PySequence_Repeat(str, temp_count); } + + +bool CPyStr_IsSpace(PyObject *str) { + Py_ssize_t len = PyUnicode_GET_LENGTH(str); + if (len == 0) return false; + + if (PyUnicode_IS_ASCII(str)) { + const Py_UCS1 *data = PyUnicode_1BYTE_DATA(str); + for (Py_ssize_t i = 0; i < len; i++) { + if (!_Py_ascii_whitespace[data[i]]) + return false; + } + return true; + } + + int kind = PyUnicode_KIND(str); + const void *data = PyUnicode_DATA(str); + for (Py_ssize_t i = 0; i < len; i++) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + if (!Py_UNICODE_ISSPACE(ch)) + return false; + } + return true; +} diff --git a/mypyc/primitives/str_ops.py b/mypyc/primitives/str_ops.py index f6d3f722dd7bc..ac32225d7ac9b 100644 --- a/mypyc/primitives/str_ops.py +++ b/mypyc/primitives/str_ops.py @@ -397,6 +397,14 @@ error_kind=ERR_NEG_INT, ) +method_op( + name="isspace", + arg_types=[str_rprimitive], + return_type=bool_rprimitive, + c_function_name="CPyStr_IsSpace", + error_kind=ERR_NEVER, +) + # obj.decode() method_op( name="decode", diff --git a/mypyc/test-data/fixtures/ir.py b/mypyc/test-data/fixtures/ir.py index 4f8a296729119..ee68f7b5a6110 100644 --- a/mypyc/test-data/fixtures/ir.py +++ b/mypyc/test-data/fixtures/ir.py @@ -131,6 +131,7 @@ def removeprefix(self, prefix: str, /) -> str: ... def removesuffix(self, suffix: str, /) -> str: ... def islower(self) -> bool: ... def count(self, substr: str, start: Optional[int] = None, end: Optional[int] = None) -> int: pass + def isspace(self) -> bool: ... class float: def __init__(self, x: object) -> None: pass diff --git a/mypyc/test-data/irbuild-str.test b/mypyc/test-data/irbuild-str.test index ee618bb34f65f..bb43aa7d51c45 100644 --- a/mypyc/test-data/irbuild-str.test +++ b/mypyc/test-data/irbuild-str.test @@ -972,3 +972,14 @@ def i_times_s(s, n): L0: r0 = CPyStr_Multiply(s, n) return r0 + +[case testStrIsSpace] +def is_space(x: str) -> bool: + return x.isspace() +[out] +def is_space(x): + x :: str + r0 :: bool +L0: + r0 = CPyStr_IsSpace(x) + return r0 diff --git a/mypyc/test-data/run-strings.test b/mypyc/test-data/run-strings.test index 49bf95d6be902..ca309ea3f136c 100644 --- a/mypyc/test-data/run-strings.test +++ b/mypyc/test-data/run-strings.test @@ -1257,3 +1257,15 @@ FMT: Final = "{} {}" def test_format() -> None: assert FMT.format(400 + 20, "roll" + "up") == "420 rollup" + +[case testIsSpace] +from typing import Any + +def test_isspace() -> None: + # Verify correctness across all Unicode codepoints. + # Exercises UCS-1 (0x00-0xFF), UCS-2 (0x100-0xFFFF), and UCS-4 (0x10000-x10FFFF inclusive) string kinds. + # Any forces generic dispatch so we compare our primitive against stdlib's + for i in range(0x110000): + c = chr(i) + a: Any = c + assert c.isspace() == a.isspace()