From 0b18bc07ae849f12191a0d715479e0ed6209784a Mon Sep 17 00:00:00 2001 From: raminfp Date: Fri, 13 Feb 2026 19:35:46 +0330 Subject: [PATCH 1/3] gh-144759: Fix undefined behavior from NULL pointer arithmetic in lexer Guard against NULL pointer arithmetic in `_PyLexer_remember_fstring_buffers` and `_PyLexer_restore_fstring_buffers`. When `start` or `multi_line_start` are NULL (uninitialized in tok_mode_stack[0]), performing `NULL - tok->buf` is undefined behavior. Add explicit NULL checks to store -1 as sentinel and restore NULL accordingly. --- .../2026-02-13-12-00-00.gh-issue-144759.d3qYpe.rst | 4 ++++ Parser/lexer/buffer.c | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-02-13-12-00-00.gh-issue-144759.d3qYpe.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-13-12-00-00.gh-issue-144759.d3qYpe.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-13-12-00-00.gh-issue-144759.d3qYpe.rst new file mode 100644 index 00000000000000..fb550428d16c9e --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-13-12-00-00.gh-issue-144759.d3qYpe.rst @@ -0,0 +1,4 @@ +Fix undefined behavior in the lexer when ``start`` and ``multi_line_start`` +pointers are ``NULL`` in :c:func:`_PyLexer_remember_fstring_buffers` and +:c:func:`_PyLexer_restore_fstring_buffers`. The ``NULL`` pointer arithmetic +(``NULL - valid_pointer``) is now guarded with explicit ``NULL`` checks. diff --git a/Parser/lexer/buffer.c b/Parser/lexer/buffer.c index 63aa1ea2ad4f60..e122fd0d9878ea 100644 --- a/Parser/lexer/buffer.c +++ b/Parser/lexer/buffer.c @@ -13,8 +13,8 @@ _PyLexer_remember_fstring_buffers(struct tok_state *tok) for (index = tok->tok_mode_stack_index; index >= 0; --index) { mode = &(tok->tok_mode_stack[index]); - mode->start_offset = mode->start - tok->buf; - mode->multi_line_start_offset = mode->multi_line_start - tok->buf; + mode->start_offset = mode->start == NULL ? -1 : mode->start - tok->buf; + mode->multi_line_start_offset = mode->multi_line_start == NULL ? -1 : mode->multi_line_start - tok->buf; } } @@ -27,8 +27,8 @@ _PyLexer_restore_fstring_buffers(struct tok_state *tok) for (index = tok->tok_mode_stack_index; index >= 0; --index) { mode = &(tok->tok_mode_stack[index]); - mode->start = tok->buf + mode->start_offset; - mode->multi_line_start = tok->buf + mode->multi_line_start_offset; + mode->start = mode->start_offset < 0 ? NULL : tok->buf + mode->start_offset; + mode->multi_line_start = mode->multi_line_start_offset < 0 ? NULL : tok->buf + mode->multi_line_start_offset; } } From f9500a448bb74cc7c2f47c740b02957b12d0fa18 Mon Sep 17 00:00:00 2001 From: raminfp Date: Fri, 13 Feb 2026 19:46:12 +0330 Subject: [PATCH 2/3] gh-144759: Fix NEWS entry to use plain markup for internal functions Replace :c:func: references with double-backtick markup since these are internal functions without documentation entries. --- .../2026-02-13-12-00-00.gh-issue-144759.d3qYpe.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-13-12-00-00.gh-issue-144759.d3qYpe.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-13-12-00-00.gh-issue-144759.d3qYpe.rst index fb550428d16c9e..46786d0672b0a8 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-13-12-00-00.gh-issue-144759.d3qYpe.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-13-12-00-00.gh-issue-144759.d3qYpe.rst @@ -1,4 +1,4 @@ Fix undefined behavior in the lexer when ``start`` and ``multi_line_start`` -pointers are ``NULL`` in :c:func:`_PyLexer_remember_fstring_buffers` and -:c:func:`_PyLexer_restore_fstring_buffers`. The ``NULL`` pointer arithmetic +pointers are ``NULL`` in ``_PyLexer_remember_fstring_buffers()`` and +``_PyLexer_restore_fstring_buffers()``. The ``NULL`` pointer arithmetic (``NULL - valid_pointer``) is now guarded with explicit ``NULL`` checks. From 323479d804dfe7bad1d793c0f9b3ec417a270e37 Mon Sep 17 00:00:00 2001 From: raminfp Date: Fri, 13 Feb 2026 23:34:27 +0330 Subject: [PATCH 3/3] gh-144759: Add regression test for NULL pointer arithmetic in lexer Add test_lexer_buffer_realloc_with_null_start to test_repl.py that exercises the code path where the lexer buffer is reallocated while tok_mode_stack[0] has NULL start/multi_line_start pointers. This triggers _PyLexer_remember_fstring_buffers and verifies the NULL checks prevent undefined behavior. --- Lib/test/test_repl.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Lib/test/test_repl.py b/Lib/test/test_repl.py index 6cdb1ca65c6aed..40965835bcec00 100644 --- a/Lib/test/test_repl.py +++ b/Lib/test/test_repl.py @@ -143,6 +143,22 @@ def test_multiline_string_parsing(self): output = kill_python(p) self.assertEqual(p.returncode, 0) + @cpython_only + def test_lexer_buffer_realloc_with_null_start(self): + # gh-144759: NULL pointer arithmetic in the lexer when start and + # multi_line_start are NULL (uninitialized in tok_mode_stack[0]) + # and the lexer buffer is reallocated while parsing long input. + long_value = "a" * 2000 + user_input = dedent(f"""\ + x = f'{{{long_value!r}}}' + print(x) + """) + p = spawn_repl() + p.stdin.write(user_input) + output = kill_python(p) + self.assertEqual(p.returncode, 0) + self.assertIn(long_value, output) + def test_close_stdin(self): user_input = dedent(''' import os