From 797b16fa23a7da57c30a85c5444e2bde91106d04 Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Sat, 13 Jun 2026 18:33:58 +0200
Subject: [PATCH 1/3] [perf] Make ``PrettyPrinter`` format lazily so output can
 be budget-capped
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

``_format`` and the per-type helpers now ``yield`` their output as a
stream of string chunks instead of writing to a file-like object, and
``pformat`` joins them. On top of that, ``pformat_lines`` pulls from the
formatter only until a budget is reached:

    pformat_lines(obj, max_lines=None, max_chars=None)

It stops on the first chunk that reaches *either* budget, so a huge
collection costs O(budget) rather than O(N). Either dimension may be
``None`` (unbounded); with both ``None`` the whole object is formatted.

Motivation
----------
Assertion diffs are truncated to a handful of lines/chars before being
shown. Formatting the whole of a large ``==`` comparison and then
throwing almost all of it away is pure waste. With a lazy formatter the
truncating caller simply stops pulling once it has enough.

Benchmark (``PrettyPrinter`` alone, width 80)::

    list(range(500_000)):
        pformat().splitlines()        ~805 ms
        pformat_lines(max_lines=11)   ~0.027 ms      (~30000x)

    [8 small ints] (common small diff):
        pformat().splitlines()        ~0.0133 ms
        pformat_lines(max_lines=11)   ~0.0185 ms     (+~5 us)

    ["x"*100_000] * 3 (flat, few huge elements):
        pformat_lines(max_chars=640)  stops after ~100_000 chars
                                      (one element) instead of 300_000

Why a lazy generator rather than a fast path + budget stream
------------------------------------------------------------
An earlier approach kept a cheap ``pformat().splitlines()`` fast path
guarded by ``len(obj) <= max_lines`` plus a flatness check, falling back
to a write-intercepting budget-stream class for the rest. Two problems:

* ``len(obj)`` is only a *lower* bound on the line count — one nested
  element (``[{...50 keys...}]``) expands to many lines — so the guard
  needed the flatness scan to stay correct, and even then it bounded
  only *lines*, never *chars*: a flat container of a few enormous
  strings has almost no lines but blows the char budget.
* it was two code paths plus a stream class plus an exception used for
  control flow.

Because the formatter is lazy, "stop pulling at the budget" is the whole
optimisation: correct regardless of how lines/chars are distributed
across elements, bounding both dimensions, with no ``len()`` proxy to
get wrong and no fast/slow branch. The common small-diff case costs only
~5 us more than the unbounded path (it is never the bottleneck — a
failing assertion isn't hot), while large comparisons drop by orders of
magnitude.

``_pprint_set``/``_pprint_dict`` also try a plain ``sorted`` first and
fall back to the ``_safe_key`` wrapper only for unorderable mixes.

This diverges structurally from the upstream cpython ``pprint`` it was
vendored from; the module header notes it is no longer kept in sync.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/_pytest/_io/pprint.py | 335 ++++++++++++++++++++------------------
 testing/io/test_pprint.py |  84 ++++++++++
 2 files changed, 262 insertions(+), 157 deletions(-)

diff --git a/src/_pytest/_io/pprint.py b/src/_pytest/_io/pprint.py
index ec41b449ddf..06caf436e60 100644
--- a/src/_pytest/_io/pprint.py
+++ b/src/_pytest/_io/pprint.py
@@ -3,6 +3,14 @@
 # (https://github.com/python/cpython/) at commit
 # c5140945c723ae6c4b7ee81ff720ac8ea4b52cfd (python3.12).
 #
+# It has since been adapted to emit its output lazily as a stream of
+# string chunks (``_format`` and the per-type helpers are generators)
+# rather than writing to a file-like object. This lets ``pformat_lines``
+# stop formatting as soon as a line/char budget is reached, so a huge
+# collection a caller is going to truncate anyway is never fully built.
+# As a result this copy has diverged structurally from upstream and is
+# no longer kept in sync with it.
+#
 #
 #  Original Author:      Fred L. Drake, Jr.
 #                        fdrake@acm.org
@@ -17,13 +25,12 @@
 
 import collections as _collections
 from collections.abc import Callable
+from collections.abc import Iterable
 from collections.abc import Iterator
 import dataclasses as _dataclasses
-from io import StringIO as _StringIO
 import re
 import types as _types
 from typing import Any
-from typing import IO
 
 
 class _safe_key:
@@ -87,28 +94,62 @@ def __init__(
         self._width = width
 
     def pformat(self, object: Any) -> str:
-        sio = _StringIO()
-        self._format(object, sio, 0, 0, set(), 0)
-        return sio.getvalue()
+        return "".join(self._format(object, 0, 0, set(), 0))
+
+    def pformat_lines(
+        self,
+        object: Any,
+        max_lines: int | None = None,
+        max_chars: int | None = None,
+    ) -> list[str]:
+        """Pretty-print ``object`` and return its lines.
+
+        ``_format`` yields the output as a stream of chunks, so this can
+        stop pulling from it as soon as a budget is reached — useful when
+        a downstream truncator is going to drop everything past that
+        budget anyway.
+
+        ``max_lines`` / ``max_chars`` bound the two truncation dimensions
+        independently; either may be ``None`` to leave that dimension
+        unbounded. With both ``None`` the whole object is formatted. The
+        budget is a stopping condition, not a precise cut: formatting
+        stops on the first chunk that reaches it, so the result may
+        slightly overshoot (the caller truncates to the exact limit).
+        """
+        if max_lines is None and max_chars is None:
+            return self.pformat(object).splitlines()
+        n_lines = 0
+        n_chars = 0
+        chunks: list[str] = []
+        for chunk in self._format(object, 0, 0, set(), 0):
+            chunks.append(chunk)
+            if max_chars is not None:
+                n_chars += len(chunk)
+            if max_lines is not None:
+                n_lines += chunk.count("\n")
+            if (max_lines is not None and n_lines >= max_lines) or (
+                max_chars is not None and n_chars >= max_chars
+            ):
+                break
+        return "".join(chunks).splitlines()
 
     def _format(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         objid = id(object)
         if objid in context:
-            stream.write(_recursion(object))
+            yield _recursion(object)
             return
 
         p = self._dispatch.get(type(object).__repr__, None)
         if p is not None:
             context.add(objid)
-            p(self, object, stream, indent, allowance, context, level + 1)
+            yield from p(self, object, indent, allowance, context, level + 1)
             context.remove(objid)
         elif (
             _dataclasses.is_dataclass(object)
@@ -120,125 +161,126 @@ def _format(
             and "__create_fn__" in object.__repr__.__wrapped__.__qualname__
         ):
             context.add(objid)
-            self._pprint_dataclass(
-                object, stream, indent, allowance, context, level + 1
+            yield from self._pprint_dataclass(
+                object, indent, allowance, context, level + 1
             )
             context.remove(objid)
         else:
-            stream.write(self._repr(object, context, level))
+            yield self._repr(object, context, level)
 
     def _pprint_dataclass(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         cls_name = object.__class__.__name__
         items = [
             (f.name, getattr(object, f.name))
             for f in _dataclasses.fields(object)
             if f.repr
         ]
-        stream.write(cls_name + "(")
-        self._format_namespace_items(items, stream, indent, allowance, context, level)
-        stream.write(")")
+        yield cls_name + "("
+        yield from self._format_namespace_items(
+            items, indent, allowance, context, level
+        )
+        yield ")"
 
     _dispatch: dict[
         Callable[..., str],
-        Callable[[PrettyPrinter, Any, IO[str], int, int, set[int], int], None],
+        Callable[[PrettyPrinter, Any, int, int, set[int], int], Iterator[str]],
     ] = {}
 
     def _pprint_dict(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        write = stream.write
-        write("{")
-        items = object.items()
-        self._format_dict_items(items, stream, indent, allowance, context, level)
-        write("}")
+    ) -> Iterator[str]:
+        yield "{"
+        yield from self._format_dict_items(
+            object.items(), indent, allowance, context, level
+        )
+        yield "}"
 
     _dispatch[dict.__repr__] = _pprint_dict
 
     def _pprint_ordered_dict(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         if not len(object):
-            stream.write(repr(object))
+            yield repr(object)
             return
         cls = object.__class__
-        stream.write(cls.__name__ + "(")
-        self._pprint_dict(object, stream, indent, allowance, context, level)
-        stream.write(")")
+        yield cls.__name__ + "("
+        yield from self._pprint_dict(object, indent, allowance, context, level)
+        yield ")"
 
     _dispatch[_collections.OrderedDict.__repr__] = _pprint_ordered_dict
 
     def _pprint_list(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        stream.write("[")
-        self._format_items(object, stream, indent, allowance, context, level)
-        stream.write("]")
+    ) -> Iterator[str]:
+        yield "["
+        yield from self._format_items(object, indent, allowance, context, level)
+        yield "]"
 
     _dispatch[list.__repr__] = _pprint_list
 
     def _pprint_tuple(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        stream.write("(")
-        self._format_items(object, stream, indent, allowance, context, level)
-        stream.write(")")
+    ) -> Iterator[str]:
+        yield "("
+        yield from self._format_items(object, indent, allowance, context, level)
+        yield ")"
 
     _dispatch[tuple.__repr__] = _pprint_tuple
 
     def _pprint_set(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         if not len(object):
-            stream.write(repr(object))
+            yield repr(object)
             return
         typ = object.__class__
         if typ is set:
-            stream.write("{")
+            yield "{"
             endchar = "}"
         else:
-            stream.write(typ.__name__ + "({")
+            yield typ.__name__ + "({"
             endchar = "})"
-        object = sorted(object, key=_safe_key)
-        self._format_items(object, stream, indent, allowance, context, level)
-        stream.write(endchar)
+        try:
+            object = sorted(object)
+        except TypeError:
+            # Heterogeneous element types — fall back to a key that
+            # tolerates unorderable pairs by string-comparing their types.
+            object = sorted(object, key=_safe_key)
+        yield from self._format_items(object, indent, allowance, context, level)
+        yield endchar
 
     _dispatch[set.__repr__] = _pprint_set
     _dispatch[frozenset.__repr__] = _pprint_set
@@ -246,15 +288,13 @@ def _pprint_set(
     def _pprint_str(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        write = stream.write
+    ) -> Iterator[str]:
         if not len(object):
-            write(repr(object))
+            yield repr(object)
             return
         chunks = []
         lines = object.splitlines(True)
@@ -289,90 +329,84 @@ def _pprint_str(
                 if current:
                     chunks.append(repr(current))
         if len(chunks) == 1:
-            write(rep)
+            yield rep
             return
         if level == 1:
-            write("(")
+            yield "("
         for i, rep in enumerate(chunks):
             if i > 0:
-                write("\n" + " " * indent)
-            write(rep)
+                yield "\n" + " " * indent
+            yield rep
         if level == 1:
-            write(")")
+            yield ")"
 
     _dispatch[str.__repr__] = _pprint_str
 
     def _pprint_bytes(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        write = stream.write
+    ) -> Iterator[str]:
         if len(object) <= 4:
-            write(repr(object))
+            yield repr(object)
             return
         parens = level == 1
         if parens:
             indent += 1
             allowance += 1
-            write("(")
+            yield "("
         delim = ""
         for rep in _wrap_bytes_repr(object, self._width - indent, allowance):
-            write(delim)
-            write(rep)
+            yield delim
+            yield rep
             if not delim:
                 delim = "\n" + " " * indent
         if parens:
-            write(")")
+            yield ")"
 
     _dispatch[bytes.__repr__] = _pprint_bytes
 
     def _pprint_bytearray(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        write = stream.write
-        write("bytearray(")
-        self._pprint_bytes(
-            bytes(object), stream, indent + 10, allowance + 1, context, level + 1
+    ) -> Iterator[str]:
+        yield "bytearray("
+        yield from self._pprint_bytes(
+            bytes(object), indent + 10, allowance + 1, context, level + 1
         )
-        write(")")
+        yield ")"
 
     _dispatch[bytearray.__repr__] = _pprint_bytearray
 
     def _pprint_mappingproxy(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        stream.write("mappingproxy(")
-        self._format(object.copy(), stream, indent, allowance, context, level)
-        stream.write(")")
+    ) -> Iterator[str]:
+        yield "mappingproxy("
+        yield from self._format(object.copy(), indent, allowance, context, level)
+        yield ")"
 
     _dispatch[_types.MappingProxyType.__repr__] = _pprint_mappingproxy
 
     def _pprint_simplenamespace(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         if type(object) is _types.SimpleNamespace:
             # The SimpleNamespace repr is "namespace" instead of the class
             # name, so we do the same here. For subclasses; use the class name.
@@ -380,95 +414,89 @@ def _pprint_simplenamespace(
         else:
             cls_name = object.__class__.__name__
         items = object.__dict__.items()
-        stream.write(cls_name + "(")
-        self._format_namespace_items(items, stream, indent, allowance, context, level)
-        stream.write(")")
+        yield cls_name + "("
+        yield from self._format_namespace_items(
+            items, indent, allowance, context, level
+        )
+        yield ")"
 
     _dispatch[_types.SimpleNamespace.__repr__] = _pprint_simplenamespace
 
     def _format_dict_items(
         self,
-        items: list[tuple[Any, Any]],
-        stream: IO[str],
+        items: Iterable[tuple[Any, Any]],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        if not items:
-            return
-
-        write = stream.write
+    ) -> Iterator[str]:
         item_indent = indent + self._indent_per_level
         delimnl = "\n" + " " * item_indent
+        emitted = False
         for key, ent in items:
-            write(delimnl)
-            write(self._repr(key, context, level))
-            write(": ")
-            self._format(ent, stream, item_indent, 1, context, level)
-            write(",")
+            emitted = True
+            yield delimnl
+            yield self._repr(key, context, level)
+            yield ": "
+            yield from self._format(ent, item_indent, 1, context, level)
+            yield ","
 
-        write("\n" + " " * indent)
+        if emitted:
+            yield "\n" + " " * indent
 
     def _format_namespace_items(
         self,
-        items: list[tuple[Any, Any]],
-        stream: IO[str],
+        items: Iterable[tuple[Any, Any]],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        if not items:
-            return
-
-        write = stream.write
+    ) -> Iterator[str]:
         item_indent = indent + self._indent_per_level
         delimnl = "\n" + " " * item_indent
+        emitted = False
         for key, ent in items:
-            write(delimnl)
-            write(key)
-            write("=")
+            emitted = True
+            yield delimnl
+            yield key
+            yield "="
             if id(ent) in context:
                 # Special-case representation of recursion to match standard
                 # recursive dataclass repr.
-                write("...")
+                yield "..."
             else:
-                self._format(
+                yield from self._format(
                     ent,
-                    stream,
                     item_indent + len(key) + 1,
                     1,
                     context,
                     level,
                 )
 
-            write(",")
+            yield ","
 
-        write("\n" + " " * indent)
+        if emitted:
+            yield "\n" + " " * indent
 
     def _format_items(
         self,
-        items: list[Any],
-        stream: IO[str],
+        items: Iterable[Any],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        if not items:
-            return
-
-        write = stream.write
+    ) -> Iterator[str]:
         item_indent = indent + self._indent_per_level
         delimnl = "\n" + " " * item_indent
-
+        emitted = False
         for item in items:
-            write(delimnl)
-            self._format(item, stream, item_indent, 1, context, level)
-            write(",")
+            emitted = True
+            yield delimnl
+            yield from self._format(item, item_indent, 1, context, level)
+            yield ","
 
-        write("\n" + " " * indent)
+        if emitted:
+            yield "\n" + " " * indent
 
     def _repr(self, object: Any, context: set[int], level: int) -> str:
         return self._safe_repr(object, context.copy(), self._depth, level)
@@ -476,114 +504,107 @@ def _repr(self, object: Any, context: set[int], level: int) -> str:
     def _pprint_default_dict(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         rdf = self._repr(object.default_factory, context, level)
-        stream.write(f"{object.__class__.__name__}({rdf}, ")
-        self._pprint_dict(object, stream, indent, allowance, context, level)
-        stream.write(")")
+        yield f"{object.__class__.__name__}({rdf}, "
+        yield from self._pprint_dict(object, indent, allowance, context, level)
+        yield ")"
 
     _dispatch[_collections.defaultdict.__repr__] = _pprint_default_dict
 
     def _pprint_counter(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        stream.write(object.__class__.__name__ + "(")
+    ) -> Iterator[str]:
+        yield object.__class__.__name__ + "("
 
         if object:
-            stream.write("{")
+            yield "{"
             items = object.most_common()
-            self._format_dict_items(items, stream, indent, allowance, context, level)
-            stream.write("}")
+            yield from self._format_dict_items(items, indent, allowance, context, level)
+            yield "}"
 
-        stream.write(")")
+        yield ")"
 
     _dispatch[_collections.Counter.__repr__] = _pprint_counter
 
     def _pprint_chain_map(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         if not len(object.maps) or (len(object.maps) == 1 and not len(object.maps[0])):
-            stream.write(repr(object))
+            yield repr(object)
             return
 
-        stream.write(object.__class__.__name__ + "(")
-        self._format_items(object.maps, stream, indent, allowance, context, level)
-        stream.write(")")
+        yield object.__class__.__name__ + "("
+        yield from self._format_items(object.maps, indent, allowance, context, level)
+        yield ")"
 
     _dispatch[_collections.ChainMap.__repr__] = _pprint_chain_map
 
     def _pprint_deque(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        stream.write(object.__class__.__name__ + "(")
+    ) -> Iterator[str]:
+        yield object.__class__.__name__ + "("
         if object.maxlen is not None:
-            stream.write(f"maxlen={object.maxlen}, ")
-        stream.write("[")
+            yield f"maxlen={object.maxlen}, "
+        yield "["
 
-        self._format_items(object, stream, indent, allowance + 1, context, level)
-        stream.write("])")
+        yield from self._format_items(object, indent, allowance + 1, context, level)
+        yield "])"
 
     _dispatch[_collections.deque.__repr__] = _pprint_deque
 
     def _pprint_user_dict(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        self._format(object.data, stream, indent, allowance, context, level - 1)
+    ) -> Iterator[str]:
+        yield from self._format(object.data, indent, allowance, context, level - 1)
 
     _dispatch[_collections.UserDict.__repr__] = _pprint_user_dict
 
     def _pprint_user_list(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        self._format(object.data, stream, indent, allowance, context, level - 1)
+    ) -> Iterator[str]:
+        yield from self._format(object.data, indent, allowance, context, level - 1)
 
     _dispatch[_collections.UserList.__repr__] = _pprint_user_list
 
     def _pprint_user_string(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        self._format(object.data, stream, indent, allowance, context, level - 1)
+    ) -> Iterator[str]:
+        yield from self._format(object.data, indent, allowance, context, level - 1)
 
     _dispatch[_collections.UserString.__repr__] = _pprint_user_string
 
diff --git a/testing/io/test_pprint.py b/testing/io/test_pprint.py
index 1326ef34b2e..2c08734cf46 100644
--- a/testing/io/test_pprint.py
+++ b/testing/io/test_pprint.py
@@ -406,3 +406,87 @@ class DataclassWithTwoItems:
 )
 def test_consistent_pretty_printer(data: Any, expected: str) -> None:
     assert PrettyPrinter().pformat(data) == textwrap.dedent(expected).strip()
+
+
+class TestPformatLines:
+    """``pformat_lines`` returns the pretty-printed lines, pulling from
+    the lazy formatter only until a line/char budget is reached so an
+    input a downstream truncator will clip anyway is never fully built.
+    """
+
+    def test_no_budget_matches_pformat_splitlines(self) -> None:
+        pp = PrettyPrinter()
+        data = list(range(50))
+        assert pp.pformat_lines(data) == pp.pformat(data).splitlines()
+
+    def test_under_budget_is_complete_and_a_prefix(self) -> None:
+        # When the whole thing fits, the result is the full pformat,
+        # regardless of how the budget was reached.
+        pp = PrettyPrinter()
+        data = list(range(5))
+        full = pp.pformat(data).splitlines()
+        assert pp.pformat_lines(data, max_lines=11) == full
+        assert pp.pformat_lines(data, max_chars=10_000) == full
+
+    def test_line_budget_stops_early(self) -> None:
+        pp = PrettyPrinter()
+        # 50 scalars, one per line, budget well below 50.
+        full = pp.pformat(list(range(50))).splitlines()
+        lines = pp.pformat_lines(list(range(50)), max_lines=11)
+        assert len(lines) <= 11 + 1  # budget, plus a trailing partial line
+        # everything but the last line (which may stop mid-line) is a
+        # prefix of the full output
+        assert lines[:-1] == full[: len(lines) - 1]
+
+    def test_char_budget_stops_early(self) -> None:
+        # A *flat* container of huge strings has few lines but explodes on
+        # chars; a line-only budget wouldn't stop it. The char budget must.
+        pp = PrettyPrinter()
+        data = ["x" * 100_000, "y" * 100_000, "z" * 100_000]
+        lines = pp.pformat_lines(data, max_chars=640)
+        assert sum(len(line) for line in lines) < 200_000  # bailed, didn't format all 3
+
+    def test_nested_element_respects_line_budget(self) -> None:
+        # ``len(object)`` is only a *lower* bound on the line count: a
+        # single nested element expands to many lines. The lazy pull must
+        # stop regardless of the container's element count.
+        pp = PrettyPrinter()
+        for data in ([{i: "x" * 40 for i in range(50)}], {1: list(range(100))}):
+            lines = pp.pformat_lines(data, max_lines=11)
+            assert len(lines) <= 11 + 1
+
+    def test_nested_dataclass_element_respects_line_budget(self) -> None:
+        @dataclass
+        class Many:
+            a: int
+            b: int
+            c: int
+            d: int
+            e: int
+            f: int
+            g: int
+            h: int
+
+        pp = PrettyPrinter()
+        lines = pp.pformat_lines([Many(*range(8))], max_lines=4)
+        assert len(lines) <= 4 + 1
+        assert len(lines) < len(pp.pformat([Many(*range(8))]).splitlines())
+
+    def test_sized_non_iterable_does_not_raise(self) -> None:
+        class Sized:
+            def __len__(self) -> int:
+                return 3
+
+        pp = PrettyPrinter()
+        obj = Sized()
+        assert pp.pformat_lines(obj, max_lines=5) == pp.pformat(obj).splitlines()
+
+
+def test_pformat_sorts_heterogeneous_set() -> None:
+    # The set sort tries a natural sort first and falls back to a key
+    # that compares the element types' names only for unorderable
+    # mixes; both must succeed.
+    pp = PrettyPrinter()
+    assert pp.pformat({3, 1, 2}) == "{\n    1,\n    2,\n    3,\n}"
+    # Mixed unorderable types must not raise.
+    pp.pformat({1, "a", 2, "b"})

From d33f7548c1b0d4f871ec33a5d8955a18547a55db Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Sat, 13 Jun 2026 18:34:16 +0200
Subject: [PATCH 2/3] [perf] Skip the newline count on chunks without a newline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In ``pformat_lines``'s budget loop, ``chunk.count("\n")`` ran on every
chunk, but most chunks (brackets, indentation, item reprs) contain no
newline. Guarding the call with ``"\n" in chunk`` skips it on those and
recovers part of the per-chunk budget-tracking overhead: formatting an
8-element list under a budget drops from ~0.0185 ms to ~0.0163 ms
(versus ~0.0132 ms for an uncapped ``pformat().splitlines()``, so the
budget overhead roughly halves, from ~+5 us to ~+3 us).

The win is small and only matters on the ``-v`` truncating path of a
failing assertion (the default path doesn't format the diff at all), so
this is kept as a separate commit — easy to drop if the extra branch
isn't judged worth it.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/_pytest/_io/pprint.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/_pytest/_io/pprint.py b/src/_pytest/_io/pprint.py
index 06caf436e60..d9fd6955032 100644
--- a/src/_pytest/_io/pprint.py
+++ b/src/_pytest/_io/pprint.py
@@ -125,7 +125,10 @@ def pformat_lines(
             chunks.append(chunk)
             if max_chars is not None:
                 n_chars += len(chunk)
-            if max_lines is not None:
+            if max_lines is not None and "\n" in chunk:
+                # Guard the count: most chunks (brackets, indents, item
+                # reprs) have no newline, and skipping the call on them
+                # is meaningfully cheaper than counting every chunk.
                 n_lines += chunk.count("\n")
             if (max_lines is not None and n_lines >= max_lines) or (
                 max_chars is not None and n_chars >= max_chars

From abf49628a8e7b48374e3125ecc714384bd545e62 Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Sun, 14 Jun 2026 11:00:40 +0200
Subject: [PATCH 3/3] [perf] pprint: apply review feedback on ``pformat_lines``

Addresses review on #14588:

* make ``max_lines`` / ``max_chars`` keyword-only so they can't be
  confused at the call site.
* drop the implementation detail (``_format``) and the "what the caller
  does" note from the docstring; describe the behaviour instead.
* comment the set-sort fast path ("try a direct sort first, faster than
  the fallback").
* assert the heterogeneous-set output in the test rather than only
  checking it does not raise.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/_pytest/_io/pprint.py | 21 +++++++++++----------
 testing/io/test_pprint.py |  5 +++--
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/src/_pytest/_io/pprint.py b/src/_pytest/_io/pprint.py
index d9fd6955032..2685d838b68 100644
--- a/src/_pytest/_io/pprint.py
+++ b/src/_pytest/_io/pprint.py
@@ -99,22 +99,21 @@ def pformat(self, object: Any) -> str:
     def pformat_lines(
         self,
         object: Any,
+        *,
         max_lines: int | None = None,
         max_chars: int | None = None,
     ) -> list[str]:
         """Pretty-print ``object`` and return its lines.
 
-        ``_format`` yields the output as a stream of chunks, so this can
-        stop pulling from it as soon as a budget is reached — useful when
-        a downstream truncator is going to drop everything past that
-        budget anyway.
-
-        ``max_lines`` / ``max_chars`` bound the two truncation dimensions
+        ``max_lines`` / ``max_chars`` bound the two output dimensions
         independently; either may be ``None`` to leave that dimension
-        unbounded. With both ``None`` the whole object is formatted. The
-        budget is a stopping condition, not a precise cut: formatting
-        stops on the first chunk that reaches it, so the result may
-        slightly overshoot (the caller truncates to the exact limit).
+        unbounded, and with both ``None`` the whole object is formatted.
+        When a bound is given the object is only formatted far enough to
+        reach it, so a huge object costs O(budget) rather than O(N).
+
+        The budget is a stopping condition, not a precise cut: formatting
+        stops on the first piece of output that reaches it, so the result
+        may slightly overshoot the bound.
         """
         if max_lines is None and max_chars is None:
             return self.pformat(object).splitlines()
@@ -277,6 +276,8 @@ def _pprint_set(
             yield typ.__name__ + "({"
             endchar = "})"
         try:
+            # Try a direct sort first; it is faster than the fallback and
+            # works for the common homogeneous, orderable case.
             object = sorted(object)
         except TypeError:
             # Heterogeneous element types — fall back to a key that
diff --git a/testing/io/test_pprint.py b/testing/io/test_pprint.py
index 2c08734cf46..805809b3778 100644
--- a/testing/io/test_pprint.py
+++ b/testing/io/test_pprint.py
@@ -488,5 +488,6 @@ def test_pformat_sorts_heterogeneous_set() -> None:
     # mixes; both must succeed.
     pp = PrettyPrinter()
     assert pp.pformat({3, 1, 2}) == "{\n    1,\n    2,\n    3,\n}"
-    # Mixed unorderable types must not raise.
-    pp.pformat({1, "a", 2, "b"})
+    # Mixed unorderable types must not raise; the fallback orders by type
+    # name (ints before strs), then by value.
+    assert pp.pformat({1, "a", 2, "b"}) == "{\n    1,\n    2,\n    'a',\n    'b',\n}"