From b7196ba02d7c725beca4fe5c8123bc8087bf2bd3 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 26 Dec 2025 21:14:15 +0200 Subject: [PATCH 1/6] gh-143214: Add the wrapcol parameter in binascii.b2a_base64() and base64.b64encode() --- Doc/library/base64.rst | 17 ++++- Doc/library/binascii.rst | 21 ++++-- Doc/whatsnew/3.15.rst | 17 +++++ .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 4 ++ Lib/base64.py | 21 +++--- Lib/email/base64mime.py | 19 +++-- Lib/email/contentmanager.py | 18 +---- Lib/plistlib.py | 9 +-- Lib/ssl.py | 7 +- ...-12-27-13-47-59.gh-issue-143214.gf6nZK.rst | 2 + Modules/binascii.c | 72 +++++++++++++------ Modules/clinic/binascii.c.h | 29 +++++--- 15 files changed, 155 insertions(+), 84 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-12-27-13-47-59.gh-issue-143214.gf6nZK.rst diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 2d901824335145..4e4cd0e5e87aae 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -64,6 +64,15 @@ POST request. May assert or raise a :exc:`ValueError` if the length of *altchars* is not 2. Raises a :exc:`TypeError` if *altchars* is not a :term:`bytes-like object`. + If *wrapcol* is non-zero, the output will be represented in lines of + no more than *wrapcol* characters each, separated by a newline + (``b'\n'``) character. + If *wrapcol* is zero (default), the output will be represented as + a single line. + + .. versionchanged:: next + Added the *wrapcol* parameter. + .. function:: b64decode(s, altchars=None, validate=False) @@ -214,9 +223,11 @@ Refer to the documentation of the individual functions for more information. instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This feature is not supported by the "standard" Ascii85 encoding. - *wrapcol* controls whether the output should have newline (``b'\n'``) - characters added to it. If this is non-zero, each output line will be - at most this many characters long, excluding the trailing newline. + If *wrapcol* is non-zero, the output will be represented in lines of + no more than *wrapcol* characters each, separated by a newline + (``b'\n'``) character. + If *wrapcol* is zero (default), the output will be represented as + a single line. *pad* controls whether the input is padded to a multiple of 4 before encoding. Note that the ``btoa`` implementation always pads. diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 1bab785684bbab..f325f4bc6152fa 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -58,7 +58,7 @@ The :mod:`binascii` module defines the following functions: Valid base64: - * Conforms to :rfc:`3548`. + * Conforms to :rfc:`4648`. * Contains only characters from the base64 alphabet. * Contains no excess data after padding (including excess padding, newlines, etc.). * Does not start with a padding. @@ -67,15 +67,26 @@ The :mod:`binascii` module defines the following functions: Added the *strict_mode* parameter. -.. function:: b2a_base64(data, *, newline=True) +.. function:: b2a_base64(data, *, wrapcol=0, newline=True) - Convert binary data to a line of ASCII characters in base64 coding. The return - value is the converted line, including a newline char if *newline* is - true. The output of this function conforms to :rfc:`3548`. + Convert binary data to a line(s) of ASCII characters in base64 coding, + as specified in :rfc:`4648`. + + If *wrapcol* is non-zero, the output will be represented in lines of + no more than *wrapcol* characters each, separated by a newline + (``b'\n'``) character. + If *wrapcol* is zero (default), the output will be represented as + a single line. + + If *newline* is true (default), a newline character will be added + at the end of the output. .. versionchanged:: 3.6 Added the *newline* parameter. + .. versionchanged:: next + Added the *wrapcol* parameter. + .. function:: a2b_qp(data, header=False) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 0d35eed38f303d..2bf97172a5191c 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -428,6 +428,23 @@ argparse inline code when color output is enabled. (Contributed by Savannah Ostrowski in :gh:`142390`.) +base64 +------ + +* Added the *pad* parameter in :func:`~base64.z85encode`. + (Contributed by Hauke D in :gh:`143103`.) + +* Added the *wrapcol* parameter in :func:`~base64.b64encode`. + (Contributed by Serhiy Storchaka in :gh:``.) + + +binascii +-------- + +* Added the *wrapcol* parameter in :func:`~binascii.b2a_base64`. + (Contributed by Serhiy Storchaka in :gh:``.) + + calendar -------- diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index e625bf2fef1912..705721021e9f49 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -2142,6 +2142,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(which)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(who)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(withdata)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(wrapcol)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(writable)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(write)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(write_through)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 771f0f8cb4ad87..7c2f44ef6dbe7a 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -865,6 +865,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(which) STRUCT_FOR_ID(who) STRUCT_FOR_ID(withdata) + STRUCT_FOR_ID(wrapcol) STRUCT_FOR_ID(writable) STRUCT_FOR_ID(write) STRUCT_FOR_ID(write_through) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 499a2569b9a06c..6e7bad986dbeda 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -2140,6 +2140,7 @@ extern "C" { INIT_ID(which), \ INIT_ID(who), \ INIT_ID(withdata), \ + INIT_ID(wrapcol), \ INIT_ID(writable), \ INIT_ID(write), \ INIT_ID(write_through), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 1375f46018f943..660115931da0a0 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -3240,6 +3240,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(wrapcol); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(writable); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/base64.py b/Lib/base64.py index c2fdee8eab9690..3f3b9de04e8580 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -45,14 +45,17 @@ def _bytes_from_decode_data(s): # Base64 encoding/decoding uses binascii -def b64encode(s, altchars=None): +def b64encode(s, altchars=None, *, wrapcol=0): """Encode the bytes-like object s using Base64 and return a bytes object. Optional altchars should be a byte string of length 2 which specifies an alternative alphabet for the '+' and '/' characters. This allows an application to e.g. generate url or filesystem safe Base64 strings. + + If wrapcol is non-zero, the output will be represented in lines of + no more than wrapcol characters each, separated by a newline character. """ - encoded = binascii.b2a_base64(s, newline=False) + encoded = binascii.b2a_base64(s, wrapcol=wrapcol, newline=False) if altchars is not None: assert len(altchars) == 2, repr(altchars) return encoded.translate(bytes.maketrans(b'+/', altchars)) @@ -327,9 +330,8 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This feature is not supported by the "standard" Adobe encoding. - wrapcol controls whether the output should have newline (b'\\n') characters - added to it. If this is non-zero, each output line will be at most this - many characters long, excluding the trailing newline. + If wrapcol is non-zero, the output will be represented in lines of + no more than wrapcol characters each, separated by a newline character. pad controls whether the input is padded to a multiple of 4 before encoding. Note that the btoa implementation always pads. @@ -566,11 +568,10 @@ def encodebytes(s): """Encode a bytestring into a bytes object containing multiple lines of base-64 data.""" _input_type_check(s) - pieces = [] - for i in range(0, len(s), MAXBINSIZE): - chunk = s[i : i + MAXBINSIZE] - pieces.append(binascii.b2a_base64(chunk)) - return b"".join(pieces) + result = binascii.b2a_base64(s, wrapcol=MAXLINESIZE) + if result == b'\n': + return b'' + return result def decodebytes(s): diff --git a/Lib/email/base64mime.py b/Lib/email/base64mime.py index a5a3f737a97b51..5766f9ad655bc3 100644 --- a/Lib/email/base64mime.py +++ b/Lib/email/base64mime.py @@ -83,16 +83,15 @@ def body_encode(s, maxlinelen=76, eol=NL): if not s: return "" - encvec = [] - max_unencoded = maxlinelen * 3 // 4 - for i in range(0, len(s), max_unencoded): - # BAW: should encode() inherit b2a_base64()'s dubious behavior in - # adding a newline to the encoded string? - enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii") - if enc.endswith(NL) and eol != NL: - enc = enc[:-1] + eol - encvec.append(enc) - return EMPTYSTRING.join(encvec) + if not eol: + return b2a_base64(s, newline=False).decode("ascii") + + # BAW: should encode() inherit b2a_base64()'s dubious behavior in + # adding a newline to the encoded string? + enc = b2a_base64(s, wrapcol=maxlinelen).decode("ascii") + if eol != NL: + enc = enc.replace(NL, eol) + return enc def decode(string): diff --git a/Lib/email/contentmanager.py b/Lib/email/contentmanager.py index 11d1536db27d79..13fcb9787f1f32 100644 --- a/Lib/email/contentmanager.py +++ b/Lib/email/contentmanager.py @@ -129,19 +129,6 @@ def _finalize_set(msg, disposition, filename, cid, params): msg.set_param(key, value) -# XXX: This is a cleaned-up version of base64mime.body_encode (including a bug -# fix in the calculation of unencoded_bytes_per_line). It would be nice to -# drop both this and quoprimime.body_encode in favor of enhanced binascii -# routines that accepted a max_line_length parameter. -def _encode_base64(data, max_line_length): - encoded_lines = [] - unencoded_bytes_per_line = max_line_length // 4 * 3 - for i in range(0, len(data), unencoded_bytes_per_line): - thisline = data[i:i+unencoded_bytes_per_line] - encoded_lines.append(binascii.b2a_base64(thisline).decode('ascii')) - return ''.join(encoded_lines) - - def _encode_text(string, charset, cte, policy): # If max_line_length is 0 or None, there is no limit. maxlen = policy.max_line_length or sys.maxsize @@ -176,7 +163,7 @@ def normal_body(lines): return b'\n'.join(lines) + b'\n' data = quoprimime.body_encode(normal_body(lines).decode('latin-1'), maxlen) elif cte == 'base64': - data = _encode_base64(embedded_body(lines), maxlen) + data = binascii.b2a_base64(embedded_body(lines), wrapcol=maxlen).decode('ascii') else: raise ValueError("Unknown content transfer encoding {}".format(cte)) return cte, data @@ -234,7 +221,8 @@ def set_bytes_content(msg, data, maintype, subtype, cte='base64', params=None, headers=None): _prepare_set(msg, maintype, subtype, headers) if cte == 'base64': - data = _encode_base64(data, max_line_length=msg.policy.max_line_length) + data = binascii.b2a_base64(data, wrapcol=msg.policy.max_line_length) + data = data.decode('ascii') elif cte == 'quoted-printable': # XXX: quoprimime.body_encode won't encode newline characters in data, # so we can't use it. This means max_line_length is ignored. Another diff --git a/Lib/plistlib.py b/Lib/plistlib.py index 655c51eea3da5d..e3d4f2e4bdd9d4 100644 --- a/Lib/plistlib.py +++ b/Lib/plistlib.py @@ -122,13 +122,7 @@ def __hash__(self): r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]") def _encode_base64(s, maxlinelength=76): - # copied from base64.encodebytes(), with added maxlinelength argument - maxbinsize = (maxlinelength//4)*3 - pieces = [] - for i in range(0, len(s), maxbinsize): - chunk = s[i : i + maxbinsize] - pieces.append(binascii.b2a_base64(chunk)) - return b''.join(pieces) + return binascii.b2a_base64(s, wrapcol=maxlinelength, newline=False) def _decode_base64(s): if isinstance(s, str): @@ -385,6 +379,7 @@ def write_bytes(self, data): maxlinelength = max( 16, 76 - len(self.indent.replace(b"\t", b" " * 8) * self._indent_level)) + maxlinelength = maxlinelength // 4 * 4 for line in _encode_base64(data, maxlinelength).split(b"\n"): if line: diff --git a/Lib/ssl.py b/Lib/ssl.py index 67a2990b2817e2..612b32cd0765ec 100644 --- a/Lib/ssl.py +++ b/Lib/ssl.py @@ -1534,11 +1534,8 @@ def DER_cert_to_PEM_cert(der_cert_bytes): """Takes a certificate in binary DER format and returns the PEM version of it as a string.""" - f = str(base64.standard_b64encode(der_cert_bytes), 'ASCII', 'strict') - ss = [PEM_HEADER] - ss += [f[i:i+64] for i in range(0, len(f), 64)] - ss.append(PEM_FOOTER + '\n') - return '\n'.join(ss) + f = str(base64.b64encode(der_cert_bytes, wrapcol=64), 'ASCII') + return f'{PEM_HEADER}\n{f}\n{PEM_FOOTER}\n' def PEM_cert_to_DER_cert(pem_cert_string): """Takes a certificate in ASCII PEM format and returns the diff --git a/Misc/NEWS.d/next/Library/2025-12-27-13-47-59.gh-issue-143214.gf6nZK.rst b/Misc/NEWS.d/next/Library/2025-12-27-13-47-59.gh-issue-143214.gf6nZK.rst new file mode 100644 index 00000000000000..0936c4d8e4f708 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-27-13-47-59.gh-issue-143214.gf6nZK.rst @@ -0,0 +1,2 @@ +Add the *wrapcol* parameter in :func:`binascii.b2a_base64` and +:func:`base64.b64encode`. diff --git a/Modules/binascii.c b/Modules/binascii.c index 13e4bc5be03ebd..2247be8a5d5aab 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -189,6 +189,28 @@ ascii_buffer_converter(PyObject *arg, Py_buffer *buf) return Py_CLEANUP_SUPPORTED; } +static Py_ssize_t +wraplines(unsigned char *data, Py_ssize_t size, size_t width) +{ + if ((size_t)size <= width) { + return size; + } + unsigned char *src = data + size; + Py_ssize_t newlines = (size - 1) / width; + Py_ssize_t line_len = size - newlines * width; + size += newlines; + unsigned char *dst = data + size; + + while ((src -= line_len) != data) { + dst -= line_len; + memmove(dst, src, line_len); + *--dst = '\n'; + line_len = width; + } + assert(dst == data + width); + return size; +} + #include "clinic/binascii.c.h" /*[clinic input] @@ -523,42 +545,40 @@ binascii.b2a_base64 data: Py_buffer / * + wrapcol: size_t = 0 newline: bool = True Base64-code line of data. [clinic start generated code]*/ static PyObject * -binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) -/*[clinic end generated code: output=4ad62c8e8485d3b3 input=0e20ff59c5f2e3e1]*/ +binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, size_t wrapcol, + int newline) +/*[clinic end generated code: output=2edc7311a9515eac input=2ee4214e6d489e2e]*/ { - const unsigned char *bin_data; int leftbits = 0; - unsigned char this_ch; unsigned int leftchar = 0; - Py_ssize_t bin_len; - binascii_state *state; - bin_data = data->buf; - bin_len = data->len; + const unsigned char *bin_data = data->buf; + Py_ssize_t bin_len = data->len; assert(bin_len >= 0); - if ( bin_len > BASE64_MAXBIN ) { - state = get_binascii_state(module); - if (state == NULL) { - return NULL; + size_t out_len = ((size_t)bin_len + 2u) / 3u * 4u; + if (out_len > PY_SSIZE_T_MAX) { + goto toolong; + } + if (wrapcol && out_len) { + out_len += (out_len - 1) / wrapcol; + if (out_len > PY_SSIZE_T_MAX) { + goto toolong; } - PyErr_SetString(state->Error, "Too much data for base64 line"); - return NULL; } - - /* We're lazy and allocate too much (fixed up later). - "+2" leaves room for up to two pad characters. - Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */ - Py_ssize_t out_len = bin_len*2 + 2; if (newline) { out_len++; + if (out_len > PY_SSIZE_T_MAX) { + goto toolong; + } } PyBytesWriter *writer = PyBytesWriter_Create(out_len); if (writer == NULL) { @@ -573,7 +593,7 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) /* See if there are 6-bit groups ready */ while ( leftbits >= 6 ) { - this_ch = (leftchar >> (leftbits-6)) & 0x3f; + unsigned char this_ch = (leftchar >> (leftbits-6)) & 0x3f; leftbits -= 6; *ascii_data++ = table_b2a_base64[this_ch]; } @@ -586,10 +606,22 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2]; *ascii_data++ = BASE64_PAD; } + if (wrapcol) { + unsigned char *start = PyBytesWriter_GetData(writer); + ascii_data = start + wraplines(start, ascii_data - start, wrapcol); + } if (newline) *ascii_data++ = '\n'; /* Append a courtesy newline */ return PyBytesWriter_FinishWithPointer(writer, ascii_data); + +toolong: + binascii_state *state = get_binascii_state(module); + if (state == NULL) { + return NULL; + } + PyErr_SetString(state->Error, "Too much data for base64 line"); + return NULL; } diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index ce29e0d11a45cd..524f5fc93d0c21 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -6,6 +6,7 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif +#include "pycore_long.h" // _PyLong_Size_t_Converter() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(binascii_a2b_uu__doc__, @@ -193,7 +194,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } PyDoc_STRVAR(binascii_b2a_base64__doc__, -"b2a_base64($module, data, /, *, newline=True)\n" +"b2a_base64($module, data, /, *, wrapcol=0, newline=True)\n" "--\n" "\n" "Base64-code line of data."); @@ -202,7 +203,8 @@ PyDoc_STRVAR(binascii_b2a_base64__doc__, {"b2a_base64", _PyCFunction_CAST(binascii_b2a_base64), METH_FASTCALL|METH_KEYWORDS, binascii_b2a_base64__doc__}, static PyObject * -binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline); +binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, size_t wrapcol, + int newline); static PyObject * binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -210,7 +212,7 @@ binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 1 + #define NUM_KEYWORDS 2 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -219,7 +221,7 @@ binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(newline), }, + .ob_item = { &_Py_ID(wrapcol), &_Py_ID(newline), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -228,16 +230,17 @@ binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "newline", NULL}; + static const char * const _keywords[] = {"", "wrapcol", "newline", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "b2a_base64", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; + size_t wrapcol = 0; int newline = 1; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, @@ -251,12 +254,20 @@ binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P if (!noptargs) { goto skip_optional_kwonly; } - newline = PyObject_IsTrue(args[1]); + if (args[1]) { + if (!_PyLong_Size_t_Converter(args[1], &wrapcol)) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + newline = PyObject_IsTrue(args[2]); if (newline < 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_b2a_base64_impl(module, &data, newline); + return_value = binascii_b2a_base64_impl(module, &data, wrapcol, newline); exit: /* Cleanup for data */ @@ -812,4 +823,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -/*[clinic end generated code: output=fba6a71e0d7d092f input=a9049054013a1b77]*/ +/*[clinic end generated code: output=644ccdc8e0d56e65 input=a9049054013a1b77]*/ From 50eb52dcdcb66d918dd3ecfc07076f6f113587f3 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 27 Dec 2025 14:36:31 +0200 Subject: [PATCH 2/6] Fix errors. --- Doc/whatsnew/3.15.rst | 4 ++-- Modules/binascii.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 2bf97172a5191c..461b8e87868585 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -435,14 +435,14 @@ base64 (Contributed by Hauke D in :gh:`143103`.) * Added the *wrapcol* parameter in :func:`~base64.b64encode`. - (Contributed by Serhiy Storchaka in :gh:``.) + (Contributed by Serhiy Storchaka in :gh:`143214`.) binascii -------- * Added the *wrapcol* parameter in :func:`~binascii.b2a_base64`. - (Contributed by Serhiy Storchaka in :gh:``.) + (Contributed by Serhiy Storchaka in :gh:`143214`.) calendar diff --git a/Modules/binascii.c b/Modules/binascii.c index 2247be8a5d5aab..4a2b7ba33567ba 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -615,7 +615,7 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, size_t wrapcol, return PyBytesWriter_FinishWithPointer(writer, ascii_data); -toolong: +toolong:; binascii_state *state = get_binascii_state(module); if (state == NULL) { return NULL; From 7c7f2dc571dfa48c35e3ddba1194be7153f2dee2 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 27 Dec 2025 14:59:39 +0200 Subject: [PATCH 3/6] Add tests. --- Lib/test/test_base64.py | 9 +++++++++ Lib/test/test_binascii.py | 24 ++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 288caf663e8321..eb1cc2dafb752d 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -172,6 +172,15 @@ def test_b64encode(self): b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE" b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT" b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==") + + eq(base64.b64encode(b"www.python.org", wrapcol=0), b'd3d3LnB5dGhvbi5vcmc=') + eq(base64.b64encode(b"www.python.org", wrapcol=8), b'd3d3LnB5\ndGhvbi5v\ncmc=') + eq(base64.b64encode(b"www.python.org", wrapcol=76), b'd3d3LnB5dGhvbi5vcmc=') + eq(base64.b64encode(b"www.python.org", wrapcol=1), + b'd\n3\nd\n3\nL\nn\nB\n5\nd\nG\nh\nv\nb\ni\n5\nv\nc\nm\nc\n=') + eq(base64.b64encode(b"", wrapcol=0), b'') + eq(base64.b64encode(b"", wrapcol=8), b'') + # Test with arbitrary alternative characters eq(base64.b64encode(b'\xd3V\xbeo\xf7\x1d', altchars=b'*$'), b'01a*b$cd') eq(base64.b64encode(b'\xd3V\xbeo\xf7\x1d', altchars=bytearray(b'*$')), diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 7ed7d7c47b6de1..fc625f0d1b5b9a 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -479,6 +479,30 @@ def test_b2a_base64_newline(self): b'aGVsbG8=\n') self.assertEqual(binascii.b2a_base64(b, newline=False), b'aGVsbG8=') + b = self.type2test(b'') + self.assertEqual(binascii.b2a_base64(b), b'\n') + self.assertEqual(binascii.b2a_base64(b, newline=True), b'\n') + self.assertEqual(binascii.b2a_base64(b, newline=False), b'') + + def test_b2a_base64_wrapcol(self): + b = self.type2test(b'www.python.org') + self.assertEqual(binascii.b2a_base64(b), + b'd3d3LnB5dGhvbi5vcmc=\n') + self.assertEqual(binascii.b2a_base64(b, wrapcol=0), + b'd3d3LnB5dGhvbi5vcmc=\n') + self.assertEqual(binascii.b2a_base64(b, wrapcol=8), + b'd3d3LnB5\ndGhvbi5v\ncmc=\n') + self.assertEqual(binascii.b2a_base64(b, wrapcol=76), + b'd3d3LnB5dGhvbi5vcmc=\n') + self.assertEqual(binascii.b2a_base64(b, wrapcol=8, newline=False), + b'd3d3LnB5\ndGhvbi5v\ncmc=') + self.assertEqual(binascii.b2a_base64(b, wrapcol=1), + b'd\n3\nd\n3\nL\nn\nB\n5\nd\nG\nh\nv\nb\ni\n5\nv\nc\nm\nc\n=\n') + b = self.type2test(b'') + self.assertEqual(binascii.b2a_base64(b), b'\n') + self.assertEqual(binascii.b2a_base64(b, wrapcol=0), b'\n') + self.assertEqual(binascii.b2a_base64(b, wrapcol=8), b'\n') + self.assertEqual(binascii.b2a_base64(b, wrapcol=8, newline=False), b'') @hypothesis.given( binary=hypothesis.strategies.binary(), From e429f03e02f3da0fc8e33057087a976fa8d02d2e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 27 Dec 2025 16:14:11 +0200 Subject: [PATCH 4/6] =?UTF-8?q?Update=20the=20name=20of=20Hauke=20D=C3=A4m?= =?UTF-8?q?pfling.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Doc/whatsnew/3.15.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 461b8e87868585..d515cdf75c98cf 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -432,7 +432,7 @@ base64 ------ * Added the *pad* parameter in :func:`~base64.z85encode`. - (Contributed by Hauke D in :gh:`143103`.) + (Contributed by Hauke Dämpfling in :gh:`143103`.) * Added the *wrapcol* parameter in :func:`~base64.b64encode`. (Contributed by Serhiy Storchaka in :gh:`143214`.) From 46c6a25abd8a0dc3f8bc4fcc8fdbd00299d78f5f Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 27 Dec 2025 21:35:58 +0200 Subject: [PATCH 5/6] Address review comments (signature, docstrings). --- Doc/library/base64.rst | 2 +- Lib/base64.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 4e4cd0e5e87aae..0b0cc02ee048d2 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -51,7 +51,7 @@ The :rfc:`4648` encodings are suitable for encoding binary data so that it can b safely sent by email, used as parts of URLs, or included as part of an HTTP POST request. -.. function:: b64encode(s, altchars=None) +.. function:: b64encode(s, altchars=None, *, wrapcol=0) Encode the :term:`bytes-like object` *s* using Base64 and return the encoded :class:`bytes`. diff --git a/Lib/base64.py b/Lib/base64.py index 3f3b9de04e8580..59e02e3a9c50db 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -53,7 +53,8 @@ def b64encode(s, altchars=None, *, wrapcol=0): application to e.g. generate url or filesystem safe Base64 strings. If wrapcol is non-zero, the output will be represented in lines of - no more than wrapcol characters each, separated by a newline character. + no more than wrapcol characters each, separated by a newline (b'\\n') + character. """ encoded = binascii.b2a_base64(s, wrapcol=wrapcol, newline=False) if altchars is not None: @@ -331,7 +332,8 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): feature is not supported by the "standard" Adobe encoding. If wrapcol is non-zero, the output will be represented in lines of - no more than wrapcol characters each, separated by a newline character. + no more than wrapcol characters each, separated by a newline (b'\\n') + character. pad controls whether the input is padded to a multiple of 4 before encoding. Note that the btoa implementation always pads. From 5062ae55c286429e68d5b1271bdab5c9472d6476 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 27 Dec 2025 23:29:13 +0200 Subject: [PATCH 6/6] Add more comments nd tests. --- Lib/test/test_base64.py | 13 +++++++++++++ Lib/test/test_binascii.py | 13 +++++++++++++ Modules/binascii.c | 12 +++++++++++- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index eb1cc2dafb752d..125183a843c1f2 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -2,6 +2,7 @@ import base64 import binascii import string +import sys import os from array import array from test.support import cpython_only @@ -178,6 +179,18 @@ def test_b64encode(self): eq(base64.b64encode(b"www.python.org", wrapcol=76), b'd3d3LnB5dGhvbi5vcmc=') eq(base64.b64encode(b"www.python.org", wrapcol=1), b'd\n3\nd\n3\nL\nn\nB\n5\nd\nG\nh\nv\nb\ni\n5\nv\nc\nm\nc\n=') + eq(base64.b64encode(b"www.python.org", wrapcol=sys.maxsize), + b'd3d3LnB5dGhvbi5vcmc=') + eq(base64.b64encode(b"www.python.org", wrapcol=sys.maxsize*2), + b'd3d3LnB5dGhvbi5vcmc=') + with self.assertRaises(OverflowError): + base64.b64encode(b"www.python.org", wrapcol=2**1000) + with self.assertRaises(ValueError): + base64.b64encode(b"www.python.org", wrapcol=-8) + with self.assertRaises(TypeError): + base64.b64encode(b"www.python.org", wrapcol=8.0) + with self.assertRaises(TypeError): + base64.b64encode(b"www.python.org", wrapcol='8') eq(base64.b64encode(b"", wrapcol=0), b'') eq(base64.b64encode(b"", wrapcol=8), b'') diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index fc625f0d1b5b9a..9035681c8bf5ef 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -4,6 +4,7 @@ import binascii import array import re +import sys from test.support import bigmemtest, _1G, _4G from test.support.hypothesis_helper import hypothesis @@ -498,6 +499,18 @@ def test_b2a_base64_wrapcol(self): b'd3d3LnB5\ndGhvbi5v\ncmc=') self.assertEqual(binascii.b2a_base64(b, wrapcol=1), b'd\n3\nd\n3\nL\nn\nB\n5\nd\nG\nh\nv\nb\ni\n5\nv\nc\nm\nc\n=\n') + self.assertEqual(binascii.b2a_base64(b, wrapcol=sys.maxsize), + b'd3d3LnB5dGhvbi5vcmc=\n') + self.assertEqual(binascii.b2a_base64(b, wrapcol=sys.maxsize*2), + b'd3d3LnB5dGhvbi5vcmc=\n') + with self.assertRaises(OverflowError): + binascii.b2a_base64(b, wrapcol=2**1000) + with self.assertRaises(ValueError): + binascii.b2a_base64(b, wrapcol=-8) + with self.assertRaises(TypeError): + binascii.b2a_base64(b, wrapcol=8.0) + with self.assertRaises(TypeError): + binascii.b2a_base64(b, wrapcol='8') b = self.type2test(b'') self.assertEqual(binascii.b2a_base64(b), b'\n') self.assertEqual(binascii.b2a_base64(b, wrapcol=0), b'\n') diff --git a/Modules/binascii.c b/Modules/binascii.c index 4a2b7ba33567ba..96d416316d0853 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -189,6 +189,10 @@ ascii_buffer_converter(PyObject *arg, Py_buffer *buf) return Py_CLEANUP_SUPPORTED; } +/* The function inserts '\n' each width characters, moving the data right. + * It assumes that we allocated enough space for all of the newlines in data. + * Returns the size of the data including the newlines. + */ static Py_ssize_t wraplines(unsigned char *data, Py_ssize_t size, size_t width) { @@ -564,12 +568,18 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, size_t wrapcol, assert(bin_len >= 0); + /* Each group of 3 bytes (rounded up) gets encoded as 4 characters, + * not counting newlines. + * Note that 'b' gets encoded as 'Yg==' (1 in, 4 out). + * + * Use unsigned integer arithmetic to avoid signed integer overflow. + */ size_t out_len = ((size_t)bin_len + 2u) / 3u * 4u; if (out_len > PY_SSIZE_T_MAX) { goto toolong; } if (wrapcol && out_len) { - out_len += (out_len - 1) / wrapcol; + out_len += (out_len - 1u) / wrapcol; if (out_len > PY_SSIZE_T_MAX) { goto toolong; }