From b861e672e1176f2229e1d46e2041e47bae8d5fd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 12 Apr 2026 17:49:47 +0200 Subject: [PATCH 1/3] gh-148323: release the GIL in `bytes.join` when operands are immutable --- Lib/test/test_bytes.py | 16 +++++++++++----- ...026-04-12-17-36-12.gh-issue-148323.HicoJQ.rst | 3 +++ Objects/stringlib/join.h | 6 ++++-- 3 files changed, 18 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-04-12-17-36-12.gh-issue-148323.HicoJQ.rst diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index b1cdbe04765ed0..d07400b3d4f835 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -629,14 +629,20 @@ def test_join(self): self.assertEqual(dot_join([b"ab", memoryview(b"cd")]), b"ab.:cd") self.assertEqual(dot_join([bytearray(b"ab"), b"cd"]), b"ab.:cd") self.assertEqual(dot_join([b"ab", bytearray(b"cd")]), b"ab.:cd") - # Stress it with many items - seq = [b"abc"] * 100000 - expected = b"abc" + b".:abc" * 99999 + # Stress it with many items or many views on immutable bytes + N = 0x100000 # threshold for releasing the GIL in join() + seq = [b"abc"] * N + expected = b"abc" + b".:abc" * (N - 1) + self.assertGreater(len(expected), N) self.assertEqual(dot_join(seq), expected) + views = list(map(memoryview, seq)) + self.assertEqual(dot_join(views), expected) # Stress test with empty separator - seq = [b"abc"] * 100000 - expected = b"abc" * 100000 + expected = b"abc" * N + self.assertGreater(len(expected), N) self.assertEqual(self.type2test(b"").join(seq), expected) + views = list(map(memoryview, seq)) + self.assertEqual(self.type2test(b"").join(views), expected) self.assertRaises(TypeError, self.type2test(b" ").join, None) # Error handling and cleanup when some item in the middle of the # sequence has the wrong type. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-12-17-36-12.gh-issue-148323.HicoJQ.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-12-17-36-12.gh-issue-148323.HicoJQ.rst new file mode 100644 index 00000000000000..b94d8e0a53c1a9 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-12-17-36-12.gh-issue-148323.HicoJQ.rst @@ -0,0 +1,3 @@ +Improve performance of :meth:`bytes.join` when the operands are known to be +buffer views on :class:`bytes` objects (but not subclasses thereof) by +releasing the GIL during the concatenation. Patch by Bénédikt Tran. diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h index de6bd83ffe4c8b..606df8ad85b86a 100644 --- a/Objects/stringlib/join.h +++ b/Objects/stringlib/join.h @@ -18,7 +18,7 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) Py_buffer *buffers = NULL; #define NB_STATIC_BUFFERS 10 Py_buffer static_buffers[NB_STATIC_BUFFERS]; -#define GIL_THRESHOLD 1048576 +#define GIL_THRESHOLD 1048576 // 0x100000 int drop_gil = 1; PyThreadState *save = NULL; @@ -81,7 +81,9 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) * races anyway, but this is a conservative approach that avoids * changing the behaviour of that data race. */ - drop_gil = 0; + if (!PyBytes_CheckExact(buffers[i].obj)) { + drop_gil = 0; + } } nbufs = i + 1; /* for error cleanup */ itemlen = buffers[i].len; From 9e43c1870517f8a873b7de7d08816427a3a9a460 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 12 Apr 2026 18:01:59 +0200 Subject: [PATCH 2/3] Update Objects/stringlib/join.h --- Objects/stringlib/join.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h index 606df8ad85b86a..53102d98883fb4 100644 --- a/Objects/stringlib/join.h +++ b/Objects/stringlib/join.h @@ -18,7 +18,7 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) Py_buffer *buffers = NULL; #define NB_STATIC_BUFFERS 10 Py_buffer static_buffers[NB_STATIC_BUFFERS]; -#define GIL_THRESHOLD 1048576 // 0x100000 +#define GIL_THRESHOLD 1048576 int drop_gil = 1; PyThreadState *save = NULL; From eb5cb1cdaf355e058fd508a1947c22e065c4b78e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 12 Apr 2026 20:16:53 +0200 Subject: [PATCH 3/3] Update Objects/stringlib/join.h --- Objects/stringlib/join.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h index 53102d98883fb4..470daf980f50b6 100644 --- a/Objects/stringlib/join.h +++ b/Objects/stringlib/join.h @@ -81,7 +81,8 @@ STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) * races anyway, but this is a conservative approach that avoids * changing the behaviour of that data race. */ - if (!PyBytes_CheckExact(buffers[i].obj)) { + PyObject *bufobj = buffers[i].obj; + if (!bufobj || !PyBytes_CheckExact(bufobj)) { drop_gil = 0; } }