From 1d1ac33e24a2aa2b5a8eb8e045660cb1191aea4b Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Fri, 29 May 2026 14:14:55 +0100 Subject: [PATCH 1/3] Store the libbz2 decompression errors on the `BZ2Decompressor` and re-raise it on re-trys --- Lib/test/test_bz2.py | 12 ++++++++++++ Modules/_bz2module.c | 15 ++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py index d8e3b671ec229f9..8246c7692b050a3 100644 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -1032,6 +1032,18 @@ def test_failure(self): # Previously, a second call could crash due to internal inconsistency self.assertRaises(Exception, bzd.decompress, self.BAD_DATA * 30) + def test_decompress_after_data_error(self): + data = bytes.fromhex( + "425a6839314159265359000000000000007fffff000000000000000000000000" + "00000000000000000000000000000000000000e0370000000000000000000000" + "000000000000000000000000000000000000000000000000000083f3" + ) + bzd = BZ2Decompressor() + with self.assertRaisesRegex(OSError, "Invalid data stream"): + bzd.decompress(data) + with self.assertRaisesRegex(OSError, "Invalid data stream"): + bzd.decompress(b'\x00' * 18) + @support.refcount_test def test_refleaks_in___init__(self): gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount') diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index 4cf8beed9ee3eba..e9a48698a6f682e 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -108,6 +108,7 @@ typedef struct { typedef struct { PyObject_HEAD bz_stream bzs; + int bzerror; char eof; /* Py_T_BOOL expects a char */ PyObject *unused_data; char needs_input; @@ -435,8 +436,11 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length) d->bzs_avail_in_real += bzs->avail_in; - if (catch_bz2_error(bzret)) + if (catch_bz2_error(bzret)) { + d->bzerror = bzret; + FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 0); goto error; + } if (bzret == BZ_STREAM_END) { FT_ATOMIC_STORE_CHAR_RELAXED(d->eof, 1); break; @@ -607,10 +611,15 @@ _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data, PyObject *result = NULL; PyMutex_Lock(&self->mutex); - if (self->eof) + if (self->eof) { PyErr_SetString(PyExc_EOFError, "End of stream already reached"); - else + } + else if (self->bzerror) { + catch_bz2_error(self->bzerror); + } + else { result = decompress(self, data->buf, data->len, max_length); + } PyMutex_Unlock(&self->mutex); return result; } From fb5ccfca7e1e93aca9949290001b8fbcd063005e Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sat, 30 May 2026 09:36:48 +0100 Subject: [PATCH 2/3] Greg's review notes and news entry --- Lib/test/test_bz2.py | 3 +++ .../Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst | 3 +++ Modules/_bz2module.c | 1 + 3 files changed, 7 insertions(+) create mode 100644 Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py index 8246c7692b050a3..3ecd8754fa8ccd4 100644 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -1041,6 +1041,9 @@ def test_decompress_after_data_error(self): bzd = BZ2Decompressor() with self.assertRaisesRegex(OSError, "Invalid data stream"): bzd.decompress(data) + # Previously, a second call could crash due to internal inconsistency + self.assertFalse(bzd.needs_input) + self.assertFalse(bzd.eof) with self.assertRaisesRegex(OSError, "Invalid data stream"): bzd.decompress(b'\x00' * 18) diff --git a/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst b/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst new file mode 100644 index 000000000000000..a37d86cf423f820 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst @@ -0,0 +1,3 @@ +Fix a possible stack buffer overflow in :mod:`bz2` when a +:class:`bz2.BZ2Decompressor` is reused after a decompression error. +The decompressor now becomes unusable after libbz2 reports an error. diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index e9a48698a6f682e..77d7ee5264eac36 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -615,6 +615,7 @@ _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data, PyErr_SetString(PyExc_EOFError, "End of stream already reached"); } else if (self->bzerror) { + // Re-entering BZ2_bzDecompress() after an error can write out of bounds. catch_bz2_error(self->bzerror); } else { From 79393ad8d7fea2bdd696dffe28fc843171ef204b Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Wed, 3 Jun 2026 10:04:39 +0100 Subject: [PATCH 3/3] Raise a `ValueError` --- Lib/test/test_bz2.py | 2 +- Modules/_bz2module.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py index 3ecd8754fa8ccd4..64293d757331d75 100644 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -1044,7 +1044,7 @@ def test_decompress_after_data_error(self): # Previously, a second call could crash due to internal inconsistency self.assertFalse(bzd.needs_input) self.assertFalse(bzd.eof) - with self.assertRaisesRegex(OSError, "Invalid data stream"): + with self.assertRaisesRegex(ValueError, "previous error"): bzd.decompress(b'\x00' * 18) @support.refcount_test diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index 77d7ee5264eac36..d2ac35813161734 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -616,7 +616,8 @@ _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data, } else if (self->bzerror) { // Re-entering BZ2_bzDecompress() after an error can write out of bounds. - catch_bz2_error(self->bzerror); + PyErr_SetString(PyExc_ValueError, + "Decompressor is unusable after a previous error"); } else { result = decompress(self, data->buf, data->len, max_length);