Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions changelog/14523.improvement.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Assertion explanations are now built lazily and the truncator stops
the comparison helpers as soon as it has enough output, so comparing
two large collections no longer builds the full diff in order to
discard it. A focused micro-benchmark the worst case scenario
(``set(range(500_000)) == set(range(1, 500_001))``) drops from ~2,200 ms
to ~43 ms; but realistic test suite with mostly small diffs should be
unchanged.

The truncation footer no longer reports the hidden-line count
(``...Full output truncated (N lines hidden), ...`` becomes
``...Full output truncated, ...``); diff lines now carry a redundant
``\x1b[0m`` reset prefix (invisible to terminals) so we can handle
line one by one.
2 changes: 1 addition & 1 deletion doc/en/example/reportingdemo.rst
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ Here is a nice run of several failures and how ``pytest`` presents things:
E 1
E 1...
E
E ...Full output truncated (7 lines hidden), use '-vv' to show
E ...Full output truncated, use '-vv' to show
failure_demo.py:62: AssertionError
_________________ TestSpecialisedExplanations.test_eq_list _________________
Expand Down
4 changes: 2 additions & 2 deletions doc/en/how-to/output.rst
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ Now we can increase pytest's verbosity:
E 'banana',
E 'apple',...
E
E ...Full output truncated (7 lines hidden), use '-vv' to show
E ...Full output truncated, use '-vv' to show
test_verbosity_example.py:8: AssertionError
____________________________ test_numbers_fail _____________________________
Expand All @@ -190,7 +190,7 @@ Now we can increase pytest's verbosity:
E {'10': 10, '20': 20, '30': 30, '40': 40}
E ...
E
E ...Full output truncated (16 lines hidden), use '-vv' to show
E ...Full output truncated, use '-vv' to show
test_verbosity_example.py:14: AssertionError
___________________________ test_long_text_fail ____________________________
Expand Down
135 changes: 134 additions & 1 deletion src/_pytest/_io/pprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import collections as _collections
from collections.abc import Callable
from collections.abc import Iterable
from collections.abc import Iterator
import dataclasses as _dataclasses
from io import StringIO as _StringIO
Expand Down Expand Up @@ -56,6 +57,51 @@ def _safe_tuple(t):
return _safe_key(t[0]), _safe_key(t[1])


class _LineBudgetExceeded(Exception):
"""Internal: signals that ``_LineBudgetStream`` has reached its cap.

Raised from inside ``stream.write`` so the formatter's recursion
unwinds at the next write; the caller catches it and uses whatever
output accumulated so far.
"""


class _LineBudgetStream:
r"""Stream that collects ``write`` calls into lines and bails out
once ``max_lines`` have been produced.

Lets ``PrettyPrinter._format`` stop early on huge collections: the
formatter writes one ``\n``-terminated chunk per element, so the
budget check fires on element boundaries.
"""

__slots__ = ("_lines", "_max", "_pending")

def __init__(self, max_lines: int) -> None:
self._lines: list[str] = []
self._pending: list[str] = []
self._max = max_lines

def write(self, s: str) -> None:
if "\n" not in s:
if s:
self._pending.append(s)
return
parts = s.split("\n")
self._pending.append(parts[0])
self._lines.append("".join(self._pending))
self._lines.extend(parts[1:-1])
self._pending = [parts[-1]] if parts[-1] else []
if len(self._lines) >= self._max:
raise _LineBudgetExceeded

def finish(self) -> list[str]:
if self._pending:
self._lines.append("".join(self._pending))
self._pending = []
return self._lines


class PrettyPrinter:
def __init__(
self,
Expand Down Expand Up @@ -91,6 +137,88 @@ def pformat(self, object: Any) -> str:
self._format(object, sio, 0, 0, set(), 0)
return sio.getvalue()

def pformat_lines(self, object: Any, max_lines: int | None = None) -> list[str]:
"""Pretty-print ``object`` and return its lines.

With ``max_lines=None`` this is just ``self.pformat(object).
splitlines()`` — the fast C path through ``StringIO`` and
``str.splitlines``.

With ``max_lines`` set, the formatter is wired to a Python-level
stream that raises once that many lines have been produced; the
caller gets back whatever was emitted before the abort. For
huge collections this turns an O(N) pformat into O(``max_lines``)
— useful when a downstream truncator is going to drop everything
past a small budget anyway.

Flat containers small enough that pformat will obviously fit in
the budget take the fast path too: the pure-Python budget stream
is ~1.3x slower than ``StringIO`` + ``splitlines`` on tiny inputs
(per-write overhead dominates), and paying that on common-case
small assertion diffs is a net loss.
"""
if max_lines is None:
return self.pformat(object).splitlines()
# Sufficient condition for "the budget will never fire": when
# every formatted child renders inline (scalars, strings — types
# ``_format`` writes as a single line), pformat emits one line
# per child, so ``len(object) <= max_lines`` bounds the output.
# ``len(object)`` alone is only a *lower* bound — a single nested
# container element can expand to many lines — so the inline
# check is required to keep the fast path correct.
try:
size = len(object)
except TypeError:
size = -1
if 0 <= size <= max_lines and self._renders_flat(object):
return self.pformat(object).splitlines()
stream = _LineBudgetStream(max_lines)
try:
# ``_format``'s ``IO[str]`` annotation is overly tight — it
# only ever calls ``stream.write(str)``, which is all this
# budget-aware stream implements.
self._format(object, stream, 0, 0, set(), 0) # type: ignore[arg-type]
except _LineBudgetExceeded:
pass
return stream.finish()

def _renders_inline(self, object: Any) -> bool:
"""Return ``True`` when ``_format`` writes ``object`` on one line.

Mirrors ``_format``'s dispatch: container types (in ``_dispatch``)
and dataclasses with a generated ``repr`` recurse and can span
multiple lines; everything else goes through ``_repr``, a single
``write``.
"""
if type(object).__repr__ in self._dispatch:
return False
return not (
_dataclasses.is_dataclass(object)
and not isinstance(object, type)
and object.__dataclass_params__.repr # type:ignore[attr-defined]
and hasattr(object.__repr__, "__wrapped__")
and "__create_fn__" in object.__repr__.__wrapped__.__qualname__
)

def _renders_flat(self, object: Any) -> bool:
"""Return ``True`` when every child of ``object`` renders inline.

Conservative: an unrecognised container shape (no ``items`` and
not iterable) returns ``False`` so the caller falls back to the
budget-aware stream rather than risk an unbounded fast path.
"""
if isinstance(object, _collections.abc.Mapping):
children: Iterable[Any] = (
*object.keys(),
*object.values(),
)
else:
try:
children = iter(object)
except TypeError:
return False
return all(self._renders_inline(child) for child in children)

def _format(
self,
object: Any,
Expand Down Expand Up @@ -236,7 +364,12 @@ def _pprint_set(
else:
stream.write(typ.__name__ + "({")
endchar = "})"
object = sorted(object, key=_safe_key)
try:
object = sorted(object)
except TypeError:
# Heterogeneous element types — fall back to a key that
# tolerates unorderable pairs by string-comparing their types.
object = sorted(object, key=_safe_key)
self._format_items(object, stream, indent, allowance, context, level)
stream.write(endchar)

Expand Down
61 changes: 45 additions & 16 deletions src/_pytest/assertion/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,13 +181,21 @@ def callbinrepr(op, left: object, right: object) -> str | None:
config=item.config, op=op, left=left, right=right
)
for new_expl in hook_result:
# Plugin-supplied lists are truncated here; the built-in impl
# already truncates as it streams, so re-applying truncation
# to its output is a near no-op (the body fits the budget,
# only the footer line is re-emitted with the same wording).
# ``materialize_with_truncation`` can return ``[]`` when the
# input was a truthy-but-empty iterable, so re-check after
# materialising.
if new_expl:
new_expl = truncate.truncate_if_required(new_expl, item)
new_expl = [line.replace("\n", "\\n") for line in new_expl]
res = "\n~".join(new_expl)
if item.config.getvalue("assertmode") == "rewrite":
res = res.replace("%", "%%")
return res
new_expl = truncate.materialize_with_truncation(new_expl, item.config)
if new_expl:
new_expl = [line.replace("\n", "\\n") for line in new_expl]
res = "\n~".join(new_expl)
if item.config.getvalue("assertmode") == "rewrite":
res = res.replace("%", "%%")
return res
return None

saved_assert_hooks = util._reprcompare, util._assertion_pass
Expand Down Expand Up @@ -218,19 +226,40 @@ def pytest_sessionfinish(session: Session) -> None:
def pytest_assertrepr_compare(
config: Config, op: str, left: Any, right: Any
) -> list[str] | None:
"""Return an explanation for ``left op right``.

Internally ``util.assertrepr_compare`` is a generator; we feed it
through ``materialize_with_truncation`` so a huge comparison
short-circuits at the truncation threshold without building the
full diff, while still returning the ``list[str] | None`` shape
the hook spec advertises.
"""
if config.pluginmanager.has_plugin("terminalreporter"):
highlighter = config.get_terminal_writer()._highlight
else:
# Keep it plaintext when not using terminalrepoterer (#14377).
highlighter = util.dummy_highlighter
explanation = list(
util.assertrepr_compare(
op=op,
left=left,
right=right,
verbose=config.get_verbosity(Config.VERBOSITY_ASSERTIONS),
highlighter=highlighter,
assertion_text_diff_style=util.get_assertion_text_diff_style(config),
)
# When truncation is going to clip the explanation downstream, tell
# the comparison helpers to cap their pformat output at the same
# budget so they don't spend O(N) formatting lines we're about to
# drop. ``+ 3`` matches the truncator's own ``line_cap``: 2 lines
# for the truncation message it appends (blank + footer) plus 1
# for overshoot detection. ``difflib.ndiff`` over two K-line
# pformat outputs produces at least K output lines (more when the
# sides differ), and the truncator pulls at most ``trunc_lines +
# 3`` lines from the whole explanation, so a per-side pformat
# budget of ``trunc_lines + 3`` covers the worst case. With
# truncation disabled the cap stays ``None`` and the user gets the
# full diff.
should_truncate, trunc_lines, _ = truncate._get_truncation_parameters(config)
pformat_cap = trunc_lines + 3 if should_truncate and trunc_lines > 0 else None
lines = util.assertrepr_compare(
op=op,
left=left,
right=right,
verbose=config.get_verbosity(Config.VERBOSITY_ASSERTIONS),
highlighter=highlighter,
assertion_text_diff_style=util.get_assertion_text_diff_style(config),
pformat_cap=pformat_cap,
)
return explanation or None
return truncate.materialize_with_truncation(lines, config) or None
5 changes: 4 additions & 1 deletion src/_pytest/assertion/_compare_any.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def _compare_eq_any(
highlighter: _HighlightFunc,
verbose: int,
assertion_text_diff_style: _AssertionTextDiffStyle,
pformat_cap: int | None = None,
) -> Iterator[str]:
"""Yield the per-line explanation for ``left == right`` (without summary).

Expand Down Expand Up @@ -73,7 +74,9 @@ def _compare_eq_any(
yield from _compare_eq_mapping(left, right, highlighter, verbose)

if isiterable(left) and isiterable(right):
yield from _compare_eq_iterable(left, right, highlighter, verbose)
yield from _compare_eq_iterable(
left, right, highlighter, verbose, pformat_cap
)


def _compare_eq_cls(
Expand Down
26 changes: 18 additions & 8 deletions src/_pytest/assertion/_compare_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,36 @@ def _compare_eq_iterable(
right: Iterable[object],
highlighter: _HighlightFunc,
verbose: int = 0,
pformat_cap: int | None = None,
) -> Iterator[str]:
if verbose <= 0 and not running_on_ci():
yield "Use -v to get more diff"
return
# dynamic import to speedup pytest
import difflib

left_formatting = PrettyPrinter().pformat(left).splitlines()
right_formatting = PrettyPrinter().pformat(right).splitlines()
# ``pformat_cap`` is computed by the dispatcher from the
# truncator's ``truncation_limit_lines``: when truncation is going
# to drop everything past that budget anyway, we don't bother
# formatting more. ``None`` means no cap (``-vv`` or CI: the user
# wants the full diff).
pp = PrettyPrinter()
left_formatting = pp.pformat_lines(left, max_lines=pformat_cap)
right_formatting = pp.pformat_lines(right, max_lines=pformat_cap)

yield ""
yield "Full diff:"
# "right" is the expected base against which we compare "left",
# see https://github.com/pytest-dev/pytest/issues/3333
yield from highlighter(
"\n".join(
line.rstrip() for line in difflib.ndiff(right_formatting, left_formatting)
),
lexer="diff",
).splitlines()
#
# Yield each ndiff line through the highlighter individually so the
# streaming truncator can stop pulling from ``difflib.ndiff`` as
# soon as its budget is full. The diff lexer is line-oriented, so
# per-line highlighting is equivalent — it just adds a redundant
# ``\x1b[0m`` reset at the start of each line (invisible to the
# terminal).
for line in difflib.ndiff(right_formatting, left_formatting):
yield highlighter(line.rstrip(), lexer="diff")


def _compare_eq_sequence(
Expand Down
15 changes: 12 additions & 3 deletions src/_pytest/assertion/_compare_set.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

from collections.abc import Callable
from collections.abc import Iterable
from collections.abc import Iterator
from collections.abc import Set as AbstractSet
from typing import TypeAlias
Expand Down Expand Up @@ -77,14 +76,24 @@ def _compare_lt_set(

SetComparisonFunction: TypeAlias = Callable[
[AbstractSet[object], AbstractSet[object], _HighlightFunc, int],
Iterable[str],
Iterator[str],
]


def _both_sets_are_equal(
left: AbstractSet[object],
right: AbstractSet[object],
highlighter: _HighlightFunc,
verbose: int = 0,
) -> Iterator[str]:
yield "Both sets are equal"


SET_COMPARISON_FUNCTIONS: dict[str, SetComparisonFunction] = {
# == can't be done here without a prior refactor because there's an additional
# explanation for iterable in _compare_eq_any
# "==": _compare_eq_set,
"!=": lambda *a, **kw: ["Both sets are equal"],
"!=": _both_sets_are_equal,
">=": _compare_gte_set,
"<=": _compare_lte_set,
">": _compare_gt_set,
Expand Down
Loading
Loading