From 797b16fa23a7da57c30a85c5444e2bde91106d04 Mon Sep 17 00:00:00 2001 From: Pierre Sassoulas Date: Sat, 13 Jun 2026 18:33:58 +0200 Subject: [PATCH 1/3] [perf] Make ``PrettyPrinter`` format lazily so output can be budget-capped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``_format`` and the per-type helpers now ``yield`` their output as a stream of string chunks instead of writing to a file-like object, and ``pformat`` joins them. On top of that, ``pformat_lines`` pulls from the formatter only until a budget is reached: pformat_lines(obj, max_lines=None, max_chars=None) It stops on the first chunk that reaches *either* budget, so a huge collection costs O(budget) rather than O(N). Either dimension may be ``None`` (unbounded); with both ``None`` the whole object is formatted. Motivation ---------- Assertion diffs are truncated to a handful of lines/chars before being shown. Formatting the whole of a large ``==`` comparison and then throwing almost all of it away is pure waste. With a lazy formatter the truncating caller simply stops pulling once it has enough. Benchmark (``PrettyPrinter`` alone, width 80):: list(range(500_000)): pformat().splitlines() ~805 ms pformat_lines(max_lines=11) ~0.027 ms (~30000x) [8 small ints] (common small diff): pformat().splitlines() ~0.0133 ms pformat_lines(max_lines=11) ~0.0185 ms (+~5 us) ["x"*100_000] * 3 (flat, few huge elements): pformat_lines(max_chars=640) stops after ~100_000 chars (one element) instead of 300_000 Why a lazy generator rather than a fast path + budget stream ------------------------------------------------------------ An earlier approach kept a cheap ``pformat().splitlines()`` fast path guarded by ``len(obj) <= max_lines`` plus a flatness check, falling back to a write-intercepting budget-stream class for the rest. Two problems: * ``len(obj)`` is only a *lower* bound on the line count — one nested element (``[{...50 keys...}]``) expands to many lines — so the guard needed the flatness scan to stay correct, and even then it bounded only *lines*, never *chars*: a flat container of a few enormous strings has almost no lines but blows the char budget. * it was two code paths plus a stream class plus an exception used for control flow. Because the formatter is lazy, "stop pulling at the budget" is the whole optimisation: correct regardless of how lines/chars are distributed across elements, bounding both dimensions, with no ``len()`` proxy to get wrong and no fast/slow branch. The common small-diff case costs only ~5 us more than the unbounded path (it is never the bottleneck — a failing assertion isn't hot), while large comparisons drop by orders of magnitude. ``_pprint_set``/``_pprint_dict`` also try a plain ``sorted`` first and fall back to the ``_safe_key`` wrapper only for unorderable mixes. This diverges structurally from the upstream cpython ``pprint`` it was vendored from; the module header notes it is no longer kept in sync. Co-Authored-By: Claude Opus 4.7 (1M context) Co-Authored-By: Claude Opus 4.8 (1M context) --- src/_pytest/_io/pprint.py | 335 ++++++++++++++++++++------------------ testing/io/test_pprint.py | 84 ++++++++++ 2 files changed, 262 insertions(+), 157 deletions(-) diff --git a/src/_pytest/_io/pprint.py b/src/_pytest/_io/pprint.py index ec41b449ddf..06caf436e60 100644 --- a/src/_pytest/_io/pprint.py +++ b/src/_pytest/_io/pprint.py @@ -3,6 +3,14 @@ # (https://github.com/python/cpython/) at commit # c5140945c723ae6c4b7ee81ff720ac8ea4b52cfd (python3.12). # +# It has since been adapted to emit its output lazily as a stream of +# string chunks (``_format`` and the per-type helpers are generators) +# rather than writing to a file-like object. This lets ``pformat_lines`` +# stop formatting as soon as a line/char budget is reached, so a huge +# collection a caller is going to truncate anyway is never fully built. +# As a result this copy has diverged structurally from upstream and is +# no longer kept in sync with it. +# # # Original Author: Fred L. Drake, Jr. # fdrake@acm.org @@ -17,13 +25,12 @@ import collections as _collections from collections.abc import Callable +from collections.abc import Iterable from collections.abc import Iterator import dataclasses as _dataclasses -from io import StringIO as _StringIO import re import types as _types from typing import Any -from typing import IO class _safe_key: @@ -87,28 +94,62 @@ def __init__( self._width = width def pformat(self, object: Any) -> str: - sio = _StringIO() - self._format(object, sio, 0, 0, set(), 0) - return sio.getvalue() + return "".join(self._format(object, 0, 0, set(), 0)) + + def pformat_lines( + self, + object: Any, + max_lines: int | None = None, + max_chars: int | None = None, + ) -> list[str]: + """Pretty-print ``object`` and return its lines. + + ``_format`` yields the output as a stream of chunks, so this can + stop pulling from it as soon as a budget is reached — useful when + a downstream truncator is going to drop everything past that + budget anyway. + + ``max_lines`` / ``max_chars`` bound the two truncation dimensions + independently; either may be ``None`` to leave that dimension + unbounded. With both ``None`` the whole object is formatted. The + budget is a stopping condition, not a precise cut: formatting + stops on the first chunk that reaches it, so the result may + slightly overshoot (the caller truncates to the exact limit). + """ + if max_lines is None and max_chars is None: + return self.pformat(object).splitlines() + n_lines = 0 + n_chars = 0 + chunks: list[str] = [] + for chunk in self._format(object, 0, 0, set(), 0): + chunks.append(chunk) + if max_chars is not None: + n_chars += len(chunk) + if max_lines is not None: + n_lines += chunk.count("\n") + if (max_lines is not None and n_lines >= max_lines) or ( + max_chars is not None and n_chars >= max_chars + ): + break + return "".join(chunks).splitlines() def _format( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: + ) -> Iterator[str]: objid = id(object) if objid in context: - stream.write(_recursion(object)) + yield _recursion(object) return p = self._dispatch.get(type(object).__repr__, None) if p is not None: context.add(objid) - p(self, object, stream, indent, allowance, context, level + 1) + yield from p(self, object, indent, allowance, context, level + 1) context.remove(objid) elif ( _dataclasses.is_dataclass(object) @@ -120,125 +161,126 @@ def _format( and "__create_fn__" in object.__repr__.__wrapped__.__qualname__ ): context.add(objid) - self._pprint_dataclass( - object, stream, indent, allowance, context, level + 1 + yield from self._pprint_dataclass( + object, indent, allowance, context, level + 1 ) context.remove(objid) else: - stream.write(self._repr(object, context, level)) + yield self._repr(object, context, level) def _pprint_dataclass( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: + ) -> Iterator[str]: cls_name = object.__class__.__name__ items = [ (f.name, getattr(object, f.name)) for f in _dataclasses.fields(object) if f.repr ] - stream.write(cls_name + "(") - self._format_namespace_items(items, stream, indent, allowance, context, level) - stream.write(")") + yield cls_name + "(" + yield from self._format_namespace_items( + items, indent, allowance, context, level + ) + yield ")" _dispatch: dict[ Callable[..., str], - Callable[[PrettyPrinter, Any, IO[str], int, int, set[int], int], None], + Callable[[PrettyPrinter, Any, int, int, set[int], int], Iterator[str]], ] = {} def _pprint_dict( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: - write = stream.write - write("{") - items = object.items() - self._format_dict_items(items, stream, indent, allowance, context, level) - write("}") + ) -> Iterator[str]: + yield "{" + yield from self._format_dict_items( + object.items(), indent, allowance, context, level + ) + yield "}" _dispatch[dict.__repr__] = _pprint_dict def _pprint_ordered_dict( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: + ) -> Iterator[str]: if not len(object): - stream.write(repr(object)) + yield repr(object) return cls = object.__class__ - stream.write(cls.__name__ + "(") - self._pprint_dict(object, stream, indent, allowance, context, level) - stream.write(")") + yield cls.__name__ + "(" + yield from self._pprint_dict(object, indent, allowance, context, level) + yield ")" _dispatch[_collections.OrderedDict.__repr__] = _pprint_ordered_dict def _pprint_list( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: - stream.write("[") - self._format_items(object, stream, indent, allowance, context, level) - stream.write("]") + ) -> Iterator[str]: + yield "[" + yield from self._format_items(object, indent, allowance, context, level) + yield "]" _dispatch[list.__repr__] = _pprint_list def _pprint_tuple( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: - stream.write("(") - self._format_items(object, stream, indent, allowance, context, level) - stream.write(")") + ) -> Iterator[str]: + yield "(" + yield from self._format_items(object, indent, allowance, context, level) + yield ")" _dispatch[tuple.__repr__] = _pprint_tuple def _pprint_set( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: + ) -> Iterator[str]: if not len(object): - stream.write(repr(object)) + yield repr(object) return typ = object.__class__ if typ is set: - stream.write("{") + yield "{" endchar = "}" else: - stream.write(typ.__name__ + "({") + yield typ.__name__ + "({" endchar = "})" - object = sorted(object, key=_safe_key) - self._format_items(object, stream, indent, allowance, context, level) - stream.write(endchar) + try: + object = sorted(object) + except TypeError: + # Heterogeneous element types — fall back to a key that + # tolerates unorderable pairs by string-comparing their types. + object = sorted(object, key=_safe_key) + yield from self._format_items(object, indent, allowance, context, level) + yield endchar _dispatch[set.__repr__] = _pprint_set _dispatch[frozenset.__repr__] = _pprint_set @@ -246,15 +288,13 @@ def _pprint_set( def _pprint_str( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: - write = stream.write + ) -> Iterator[str]: if not len(object): - write(repr(object)) + yield repr(object) return chunks = [] lines = object.splitlines(True) @@ -289,90 +329,84 @@ def _pprint_str( if current: chunks.append(repr(current)) if len(chunks) == 1: - write(rep) + yield rep return if level == 1: - write("(") + yield "(" for i, rep in enumerate(chunks): if i > 0: - write("\n" + " " * indent) - write(rep) + yield "\n" + " " * indent + yield rep if level == 1: - write(")") + yield ")" _dispatch[str.__repr__] = _pprint_str def _pprint_bytes( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: - write = stream.write + ) -> Iterator[str]: if len(object) <= 4: - write(repr(object)) + yield repr(object) return parens = level == 1 if parens: indent += 1 allowance += 1 - write("(") + yield "(" delim = "" for rep in _wrap_bytes_repr(object, self._width - indent, allowance): - write(delim) - write(rep) + yield delim + yield rep if not delim: delim = "\n" + " " * indent if parens: - write(")") + yield ")" _dispatch[bytes.__repr__] = _pprint_bytes def _pprint_bytearray( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: - write = stream.write - write("bytearray(") - self._pprint_bytes( - bytes(object), stream, indent + 10, allowance + 1, context, level + 1 + ) -> Iterator[str]: + yield "bytearray(" + yield from self._pprint_bytes( + bytes(object), indent + 10, allowance + 1, context, level + 1 ) - write(")") + yield ")" _dispatch[bytearray.__repr__] = _pprint_bytearray def _pprint_mappingproxy( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: - stream.write("mappingproxy(") - self._format(object.copy(), stream, indent, allowance, context, level) - stream.write(")") + ) -> Iterator[str]: + yield "mappingproxy(" + yield from self._format(object.copy(), indent, allowance, context, level) + yield ")" _dispatch[_types.MappingProxyType.__repr__] = _pprint_mappingproxy def _pprint_simplenamespace( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: + ) -> Iterator[str]: if type(object) is _types.SimpleNamespace: # The SimpleNamespace repr is "namespace" instead of the class # name, so we do the same here. For subclasses; use the class name. @@ -380,95 +414,89 @@ def _pprint_simplenamespace( else: cls_name = object.__class__.__name__ items = object.__dict__.items() - stream.write(cls_name + "(") - self._format_namespace_items(items, stream, indent, allowance, context, level) - stream.write(")") + yield cls_name + "(" + yield from self._format_namespace_items( + items, indent, allowance, context, level + ) + yield ")" _dispatch[_types.SimpleNamespace.__repr__] = _pprint_simplenamespace def _format_dict_items( self, - items: list[tuple[Any, Any]], - stream: IO[str], + items: Iterable[tuple[Any, Any]], indent: int, allowance: int, context: set[int], level: int, - ) -> None: - if not items: - return - - write = stream.write + ) -> Iterator[str]: item_indent = indent + self._indent_per_level delimnl = "\n" + " " * item_indent + emitted = False for key, ent in items: - write(delimnl) - write(self._repr(key, context, level)) - write(": ") - self._format(ent, stream, item_indent, 1, context, level) - write(",") + emitted = True + yield delimnl + yield self._repr(key, context, level) + yield ": " + yield from self._format(ent, item_indent, 1, context, level) + yield "," - write("\n" + " " * indent) + if emitted: + yield "\n" + " " * indent def _format_namespace_items( self, - items: list[tuple[Any, Any]], - stream: IO[str], + items: Iterable[tuple[Any, Any]], indent: int, allowance: int, context: set[int], level: int, - ) -> None: - if not items: - return - - write = stream.write + ) -> Iterator[str]: item_indent = indent + self._indent_per_level delimnl = "\n" + " " * item_indent + emitted = False for key, ent in items: - write(delimnl) - write(key) - write("=") + emitted = True + yield delimnl + yield key + yield "=" if id(ent) in context: # Special-case representation of recursion to match standard # recursive dataclass repr. - write("...") + yield "..." else: - self._format( + yield from self._format( ent, - stream, item_indent + len(key) + 1, 1, context, level, ) - write(",") + yield "," - write("\n" + " " * indent) + if emitted: + yield "\n" + " " * indent def _format_items( self, - items: list[Any], - stream: IO[str], + items: Iterable[Any], indent: int, allowance: int, context: set[int], level: int, - ) -> None: - if not items: - return - - write = stream.write + ) -> Iterator[str]: item_indent = indent + self._indent_per_level delimnl = "\n" + " " * item_indent - + emitted = False for item in items: - write(delimnl) - self._format(item, stream, item_indent, 1, context, level) - write(",") + emitted = True + yield delimnl + yield from self._format(item, item_indent, 1, context, level) + yield "," - write("\n" + " " * indent) + if emitted: + yield "\n" + " " * indent def _repr(self, object: Any, context: set[int], level: int) -> str: return self._safe_repr(object, context.copy(), self._depth, level) @@ -476,114 +504,107 @@ def _repr(self, object: Any, context: set[int], level: int) -> str: def _pprint_default_dict( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: + ) -> Iterator[str]: rdf = self._repr(object.default_factory, context, level) - stream.write(f"{object.__class__.__name__}({rdf}, ") - self._pprint_dict(object, stream, indent, allowance, context, level) - stream.write(")") + yield f"{object.__class__.__name__}({rdf}, " + yield from self._pprint_dict(object, indent, allowance, context, level) + yield ")" _dispatch[_collections.defaultdict.__repr__] = _pprint_default_dict def _pprint_counter( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: - stream.write(object.__class__.__name__ + "(") + ) -> Iterator[str]: + yield object.__class__.__name__ + "(" if object: - stream.write("{") + yield "{" items = object.most_common() - self._format_dict_items(items, stream, indent, allowance, context, level) - stream.write("}") + yield from self._format_dict_items(items, indent, allowance, context, level) + yield "}" - stream.write(")") + yield ")" _dispatch[_collections.Counter.__repr__] = _pprint_counter def _pprint_chain_map( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: + ) -> Iterator[str]: if not len(object.maps) or (len(object.maps) == 1 and not len(object.maps[0])): - stream.write(repr(object)) + yield repr(object) return - stream.write(object.__class__.__name__ + "(") - self._format_items(object.maps, stream, indent, allowance, context, level) - stream.write(")") + yield object.__class__.__name__ + "(" + yield from self._format_items(object.maps, indent, allowance, context, level) + yield ")" _dispatch[_collections.ChainMap.__repr__] = _pprint_chain_map def _pprint_deque( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: - stream.write(object.__class__.__name__ + "(") + ) -> Iterator[str]: + yield object.__class__.__name__ + "(" if object.maxlen is not None: - stream.write(f"maxlen={object.maxlen}, ") - stream.write("[") + yield f"maxlen={object.maxlen}, " + yield "[" - self._format_items(object, stream, indent, allowance + 1, context, level) - stream.write("])") + yield from self._format_items(object, indent, allowance + 1, context, level) + yield "])" _dispatch[_collections.deque.__repr__] = _pprint_deque def _pprint_user_dict( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: - self._format(object.data, stream, indent, allowance, context, level - 1) + ) -> Iterator[str]: + yield from self._format(object.data, indent, allowance, context, level - 1) _dispatch[_collections.UserDict.__repr__] = _pprint_user_dict def _pprint_user_list( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: - self._format(object.data, stream, indent, allowance, context, level - 1) + ) -> Iterator[str]: + yield from self._format(object.data, indent, allowance, context, level - 1) _dispatch[_collections.UserList.__repr__] = _pprint_user_list def _pprint_user_string( self, object: Any, - stream: IO[str], indent: int, allowance: int, context: set[int], level: int, - ) -> None: - self._format(object.data, stream, indent, allowance, context, level - 1) + ) -> Iterator[str]: + yield from self._format(object.data, indent, allowance, context, level - 1) _dispatch[_collections.UserString.__repr__] = _pprint_user_string diff --git a/testing/io/test_pprint.py b/testing/io/test_pprint.py index 1326ef34b2e..2c08734cf46 100644 --- a/testing/io/test_pprint.py +++ b/testing/io/test_pprint.py @@ -406,3 +406,87 @@ class DataclassWithTwoItems: ) def test_consistent_pretty_printer(data: Any, expected: str) -> None: assert PrettyPrinter().pformat(data) == textwrap.dedent(expected).strip() + + +class TestPformatLines: + """``pformat_lines`` returns the pretty-printed lines, pulling from + the lazy formatter only until a line/char budget is reached so an + input a downstream truncator will clip anyway is never fully built. + """ + + def test_no_budget_matches_pformat_splitlines(self) -> None: + pp = PrettyPrinter() + data = list(range(50)) + assert pp.pformat_lines(data) == pp.pformat(data).splitlines() + + def test_under_budget_is_complete_and_a_prefix(self) -> None: + # When the whole thing fits, the result is the full pformat, + # regardless of how the budget was reached. + pp = PrettyPrinter() + data = list(range(5)) + full = pp.pformat(data).splitlines() + assert pp.pformat_lines(data, max_lines=11) == full + assert pp.pformat_lines(data, max_chars=10_000) == full + + def test_line_budget_stops_early(self) -> None: + pp = PrettyPrinter() + # 50 scalars, one per line, budget well below 50. + full = pp.pformat(list(range(50))).splitlines() + lines = pp.pformat_lines(list(range(50)), max_lines=11) + assert len(lines) <= 11 + 1 # budget, plus a trailing partial line + # everything but the last line (which may stop mid-line) is a + # prefix of the full output + assert lines[:-1] == full[: len(lines) - 1] + + def test_char_budget_stops_early(self) -> None: + # A *flat* container of huge strings has few lines but explodes on + # chars; a line-only budget wouldn't stop it. The char budget must. + pp = PrettyPrinter() + data = ["x" * 100_000, "y" * 100_000, "z" * 100_000] + lines = pp.pformat_lines(data, max_chars=640) + assert sum(len(line) for line in lines) < 200_000 # bailed, didn't format all 3 + + def test_nested_element_respects_line_budget(self) -> None: + # ``len(object)`` is only a *lower* bound on the line count: a + # single nested element expands to many lines. The lazy pull must + # stop regardless of the container's element count. + pp = PrettyPrinter() + for data in ([{i: "x" * 40 for i in range(50)}], {1: list(range(100))}): + lines = pp.pformat_lines(data, max_lines=11) + assert len(lines) <= 11 + 1 + + def test_nested_dataclass_element_respects_line_budget(self) -> None: + @dataclass + class Many: + a: int + b: int + c: int + d: int + e: int + f: int + g: int + h: int + + pp = PrettyPrinter() + lines = pp.pformat_lines([Many(*range(8))], max_lines=4) + assert len(lines) <= 4 + 1 + assert len(lines) < len(pp.pformat([Many(*range(8))]).splitlines()) + + def test_sized_non_iterable_does_not_raise(self) -> None: + class Sized: + def __len__(self) -> int: + return 3 + + pp = PrettyPrinter() + obj = Sized() + assert pp.pformat_lines(obj, max_lines=5) == pp.pformat(obj).splitlines() + + +def test_pformat_sorts_heterogeneous_set() -> None: + # The set sort tries a natural sort first and falls back to a key + # that compares the element types' names only for unorderable + # mixes; both must succeed. + pp = PrettyPrinter() + assert pp.pformat({3, 1, 2}) == "{\n 1,\n 2,\n 3,\n}" + # Mixed unorderable types must not raise. + pp.pformat({1, "a", 2, "b"}) From d33f7548c1b0d4f871ec33a5d8955a18547a55db Mon Sep 17 00:00:00 2001 From: Pierre Sassoulas Date: Sat, 13 Jun 2026 18:34:16 +0200 Subject: [PATCH 2/3] [perf] Skip the newline count on chunks without a newline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In ``pformat_lines``'s budget loop, ``chunk.count("\n")`` ran on every chunk, but most chunks (brackets, indentation, item reprs) contain no newline. Guarding the call with ``"\n" in chunk`` skips it on those and recovers part of the per-chunk budget-tracking overhead: formatting an 8-element list under a budget drops from ~0.0185 ms to ~0.0163 ms (versus ~0.0132 ms for an uncapped ``pformat().splitlines()``, so the budget overhead roughly halves, from ~+5 us to ~+3 us). The win is small and only matters on the ``-v`` truncating path of a failing assertion (the default path doesn't format the diff at all), so this is kept as a separate commit — easy to drop if the extra branch isn't judged worth it. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/_pytest/_io/pprint.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/_pytest/_io/pprint.py b/src/_pytest/_io/pprint.py index 06caf436e60..d9fd6955032 100644 --- a/src/_pytest/_io/pprint.py +++ b/src/_pytest/_io/pprint.py @@ -125,7 +125,10 @@ def pformat_lines( chunks.append(chunk) if max_chars is not None: n_chars += len(chunk) - if max_lines is not None: + if max_lines is not None and "\n" in chunk: + # Guard the count: most chunks (brackets, indents, item + # reprs) have no newline, and skipping the call on them + # is meaningfully cheaper than counting every chunk. n_lines += chunk.count("\n") if (max_lines is not None and n_lines >= max_lines) or ( max_chars is not None and n_chars >= max_chars From abf49628a8e7b48374e3125ecc714384bd545e62 Mon Sep 17 00:00:00 2001 From: Pierre Sassoulas Date: Sun, 14 Jun 2026 11:00:40 +0200 Subject: [PATCH 3/3] [perf] pprint: apply review feedback on ``pformat_lines`` Addresses review on #14588: * make ``max_lines`` / ``max_chars`` keyword-only so they can't be confused at the call site. * drop the implementation detail (``_format``) and the "what the caller does" note from the docstring; describe the behaviour instead. * comment the set-sort fast path ("try a direct sort first, faster than the fallback"). * assert the heterogeneous-set output in the test rather than only checking it does not raise. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/_pytest/_io/pprint.py | 21 +++++++++++---------- testing/io/test_pprint.py | 5 +++-- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/_pytest/_io/pprint.py b/src/_pytest/_io/pprint.py index d9fd6955032..2685d838b68 100644 --- a/src/_pytest/_io/pprint.py +++ b/src/_pytest/_io/pprint.py @@ -99,22 +99,21 @@ def pformat(self, object: Any) -> str: def pformat_lines( self, object: Any, + *, max_lines: int | None = None, max_chars: int | None = None, ) -> list[str]: """Pretty-print ``object`` and return its lines. - ``_format`` yields the output as a stream of chunks, so this can - stop pulling from it as soon as a budget is reached — useful when - a downstream truncator is going to drop everything past that - budget anyway. - - ``max_lines`` / ``max_chars`` bound the two truncation dimensions + ``max_lines`` / ``max_chars`` bound the two output dimensions independently; either may be ``None`` to leave that dimension - unbounded. With both ``None`` the whole object is formatted. The - budget is a stopping condition, not a precise cut: formatting - stops on the first chunk that reaches it, so the result may - slightly overshoot (the caller truncates to the exact limit). + unbounded, and with both ``None`` the whole object is formatted. + When a bound is given the object is only formatted far enough to + reach it, so a huge object costs O(budget) rather than O(N). + + The budget is a stopping condition, not a precise cut: formatting + stops on the first piece of output that reaches it, so the result + may slightly overshoot the bound. """ if max_lines is None and max_chars is None: return self.pformat(object).splitlines() @@ -277,6 +276,8 @@ def _pprint_set( yield typ.__name__ + "({" endchar = "})" try: + # Try a direct sort first; it is faster than the fallback and + # works for the common homogeneous, orderable case. object = sorted(object) except TypeError: # Heterogeneous element types — fall back to a key that diff --git a/testing/io/test_pprint.py b/testing/io/test_pprint.py index 2c08734cf46..805809b3778 100644 --- a/testing/io/test_pprint.py +++ b/testing/io/test_pprint.py @@ -488,5 +488,6 @@ def test_pformat_sorts_heterogeneous_set() -> None: # mixes; both must succeed. pp = PrettyPrinter() assert pp.pformat({3, 1, 2}) == "{\n 1,\n 2,\n 3,\n}" - # Mixed unorderable types must not raise. - pp.pformat({1, "a", 2, "b"}) + # Mixed unorderable types must not raise; the fallback orders by type + # name (ints before strs), then by value. + assert pp.pformat({1, "a", 2, "b"}) == "{\n 1,\n 2,\n 'a',\n 'b',\n}"