From fe8f264dff736d152aa558b451a155f7cc7ea819 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Mon, 27 Apr 2026 13:39:33 +0300 Subject: [PATCH 1/2] Compare dist sizes vs latest PyPI release Co-authored-by: Andrew Murray <3112309+radarhere@users.noreply.github.com> --- .github/compare-dist-sizes.py | 284 ++++++++++++++++++++++++++++++++++ .github/workflows/wheels.yml | 23 +++ 2 files changed, 307 insertions(+) create mode 100644 .github/compare-dist-sizes.py diff --git a/.github/compare-dist-sizes.py b/.github/compare-dist-sizes.py new file mode 100644 index 00000000000..90df39f78b6 --- /dev/null +++ b/.github/compare-dist-sizes.py @@ -0,0 +1,284 @@ +"""Compare sizes of newly-built dists against the latest release on PyPI. + +Fetches file sizes for the latest Pillow release from the PyPI JSON API +(no download required) and compares them to a directory of freshly-built +wheels and sdist. Outputs a table to stdout (and to +`$GITHUB_STEP_SUMMARY` if set). + +Usage: + `uv run .github/compare-dist-sizes.py ` +""" + +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "humanize", +# "prettytable>=3.16", +# "termcolor", +# ] +# /// + +from __future__ import annotations + +import argparse +import json +import os +import re +import sys +import urllib.request +from pathlib import Path + +import humanize +from prettytable import PrettyTable, TableStyle +from termcolor import colored + +PYPI_JSON_URL = "https://pypi.org/pypi/pillow/json" + +# Wheel filename: {distribution}-{version}(-{build})?-{python}-{abi}-{platform}.whl +# sdist filename: {distribution}-{version}.tar.gz +WHEEL_RE = re.compile( + r"^(?P[^-]+)-(?P[^-]+)" + r"(?:-(?P\d[^-]*))?" + r"-(?P[^-]+)-(?P[^-]+)-(?P[^-]+)\.whl$", + re.IGNORECASE, +) +SDIST_RE = re.compile( + r"^(?P[^-]+)-(?P.+)\.tar\.gz$", + re.IGNORECASE, +) + + +def key_for(filename: str) -> str | None: + """Return a version-independent identifier for a dist file.""" + if m := WHEEL_RE.match(filename): + build = f"-{m['build']}" if m["build"] else "" + return f"wheel:{build}-{m['python']}-{m['abi']}-{m['platform']}" + if m := SDIST_RE.match(filename): + return "sdist" + msg = f"Unexpected dist name: {filename}" + raise ValueError(msg) + + +def display_for(filename: str) -> str: + """Strip the `pillow-{version}-` prefix for compact table display.""" + if m := WHEEL_RE.match(filename): + build = f"{m['build']}-" if m["build"] else "" + return f"{build}{m['python']}-{m['abi']}-{m['platform']}.whl" + if SDIST_RE.match(filename): + return "sdist (.tar.gz)" + return filename + + +def fetch_pypi_sizes() -> tuple[str, dict[str, tuple[str, int]]]: + """Return (version, {key: (filename, size)}) for the latest PyPI release.""" + with urllib.request.urlopen(PYPI_JSON_URL) as response: + data = json.load(response) + version = data["info"]["version"] + sizes: dict[str, tuple[str, int]] = {} + for entry in data.get("urls", []): + filename = entry["filename"] + key = key_for(filename) + if key is None: + continue + sizes[key] = (filename, entry["size"]) + return version, sizes + + +def collect_local_sizes(dist_dir: Path) -> dict[str, tuple[str, int]]: + sizes: dict[str, tuple[str, int]] = {} + for path in sorted(dist_dir.iterdir()): + if not path.is_file(): + continue + key = key_for(path.name) + if key is None: + continue + sizes[key] = (path.name, path.stat().st_size) + return sizes + + +def human(n: int | None) -> str: + if n is None: + return "n/a" + return humanize.naturalsize(n) + + +def pct_change(before: int | None, after: int | None) -> str: + if not before or after is None: + return "n/a" + delta = (after - before) / before * 100 + return f"{delta:+.2f}%" + + +def render_table( + baseline_label: str, + baseline_sizes: dict[str, tuple[str, int]], + local_sizes: dict[str, tuple[str, int]], + *, + markdown: bool, +) -> str: + color = not markdown + table = PrettyTable() + table.set_style(TableStyle.MARKDOWN if markdown else TableStyle.SINGLE_BORDER) + table.field_names = ["File", "Size before", "Size now", "Change"] + table.align = "r" + table.align["File"] = "l" + + def pct_severity(text: str) -> str | None: + """Return "good" / "warn" / "bad" based on the change percent.""" + if text == "n/a": + return None + pct = float(text.rstrip("%")) + if pct <= 0: + return "good" + if pct >= 5: + return "bad" + if pct >= 1: + return "warn" + return None + + ANSI_COLORS = {"good": "green", "warn": "yellow", "bad": "red"} + EMOJI = {"good": "🟢", "warn": "🟡", "bad": "🔴"} + + def style(cells: list[str], role: str) -> list[str]: + severity = pct_severity(cells[3]) + if markdown: + if severity: + cells[3] = f"{EMOJI[severity]} {cells[3]}" + if role == "orphan": + return [f"*{c}*" for c in cells] + if role == "summary": + return [f"**{c}**" for c in cells] + return cells + if role == "orphan": + return [colored(c, "dark_grey") for c in cells] + bold_attrs = ["bold"] if role == "summary" else [] + if severity: + cells[3] = colored(cells[3], ANSI_COLORS[severity], attrs=bold_attrs) + elif bold_attrs: + cells[3] = colored(cells[3], attrs=bold_attrs) + if bold_attrs: + cells[:3] = [colored(c, attrs=bold_attrs) for c in cells[:3]] + return cells + + keys = sorted(set(baseline_sizes) | set(local_sizes)) + # Put sdist first for readability + keys.sort(key=lambda k: (k != "sdist", k)) + + wheel_before = 0 + wheel_after = 0 + total_before = 0 + total_after = 0 + wheel_before_count = 0 + wheel_after_count = 0 + total_after_count = 0 + for i, key in enumerate(keys): + baseline_entry = baseline_sizes.get(key) + local_entry = local_sizes.get(key) + display_name = display_for((local_entry or baseline_entry)[0]) + before = baseline_entry[1] if baseline_entry else None + after = local_entry[1] if local_entry else None + if after is None: + # Removed since baseline: ignore in totals + role = "orphan" + else: + # Present locally (in both, or newly added): count in totals + total_after += after + total_after_count += 1 + if before is not None: + total_before += before + if key != "sdist": + wheel_after += after + wheel_after_count += 1 + if before is not None: + wheel_before += before + wheel_before_count += 1 + role = "data" + cells = [ + display_name, + human(before), + human(after), + pct_change(before, after), + ] + table.add_row(style(cells, role)) + + if not markdown: + table.add_divider() + + if wheel_after_count: + avg_before = wheel_before // wheel_before_count if wheel_before_count else None + table.add_row( + style( + [ + f"wheel average ({wheel_after_count} wheels)", + human(avg_before), + human(wheel_after // wheel_after_count), + pct_change(avg_before, wheel_after // wheel_after_count), + ], + "summary", + ) + ) + table.add_row( + style( + [ + f"wheel total ({wheel_after_count} wheels)", + human(wheel_before), + human(wheel_after), + pct_change(wheel_before, wheel_after), + ], + "summary", + ), + divider=not markdown, + ) + + if total_after_count: + table.add_row( + style( + [ + f"artifacts total ({total_after_count} artifacts)", + human(total_before), + human(total_after), + pct_change(total_before, total_after), + ], + "summary", + ) + ) + + title = f"## Dist size comparison vs {baseline_label}" + if color: + title = colored(title, attrs=["bold"]) + return f"{title}\n\n{table.get_string()}\n" + + +def main() -> int: + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "dist_dir", + type=Path, + help="Directory containing newly-built wheels and sdist", + ) + args = parser.parse_args() + + if not args.dist_dir.is_dir(): + print(f"error: {args.dist_dir} is not a directory", file=sys.stderr) + return 1 + + baseline_version, baseline_sizes = fetch_pypi_sizes() + baseline_label = f"Pillow {baseline_version} on PyPI" + + local_sizes = collect_local_sizes(args.dist_dir) + + print(render_table(baseline_label, baseline_sizes, local_sizes, markdown=False)) + + if summary_path := os.environ.get("GITHUB_STEP_SUMMARY"): + with open(summary_path, "a", encoding="utf-8") as f: + f.write( + render_table(baseline_label, baseline_sizes, local_sizes, markdown=True) + ) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index fa3271de0c4..e2008ac6cf9 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -13,6 +13,7 @@ on: paths: &paths - ".ci/requirements-cibw.txt" - ".ci/requirements-sbom.txt" + - ".github/compare-dist-sizes.py" - ".github/dependencies.json" - ".github/generate-sbom.py" - ".github/workflows/wheels*" @@ -255,6 +256,28 @@ jobs: echo $files [ "$files" -eq $EXPECTED_DISTS ] || exit 1 + compare-dist-sizes: + needs: [build-native-wheels, windows, sdist] + runs-on: ubuntu-latest + name: Compare dist sizes vs PyPI + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 + with: + enable-cache: false + + - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + pattern: dist-* + path: dist + merge-multiple: true + + - name: Compare dist sizes vs latest PyPI release + run: uv run .github/compare-dist-sizes.py dist + scientific-python-nightly-wheels-publish: if: github.event.repository.fork == false && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') needs: count-dists From 260509628c3cbda2b3c5808abbbb0524f7907c5c Mon Sep 17 00:00:00 2001 From: Andrew Murray <3112309+radarhere@users.noreply.github.com> Date: Tue, 12 May 2026 02:14:59 +1000 Subject: [PATCH 2/2] Compare dist sizes (#172) Co-authored-by: Andrew Murray Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- .github/compare-dist-sizes.py | 117 +++++++++++++++------------------- 1 file changed, 52 insertions(+), 65 deletions(-) diff --git a/.github/compare-dist-sizes.py b/.github/compare-dist-sizes.py index 90df39f78b6..ed7b9be0ed6 100644 --- a/.github/compare-dist-sizes.py +++ b/.github/compare-dist-sizes.py @@ -13,7 +13,7 @@ # requires-python = ">=3.10" # dependencies = [ # "humanize", -# "prettytable>=3.16", +# "prettytable", # "termcolor", # ] # /// @@ -37,8 +37,7 @@ # Wheel filename: {distribution}-{version}(-{build})?-{python}-{abi}-{platform}.whl # sdist filename: {distribution}-{version}.tar.gz WHEEL_RE = re.compile( - r"^(?P[^-]+)-(?P[^-]+)" - r"(?:-(?P\d[^-]*))?" + r"^[^-]+-[^-]+(?:-(?P\d[^-]*))?" r"-(?P[^-]+)-(?P[^-]+)-(?P[^-]+)\.whl$", re.IGNORECASE, ) @@ -48,12 +47,12 @@ ) -def key_for(filename: str) -> str | None: +def key_for(filename: str) -> str: """Return a version-independent identifier for a dist file.""" if m := WHEEL_RE.match(filename): - build = f"-{m['build']}" if m["build"] else "" - return f"wheel:{build}-{m['python']}-{m['abi']}-{m['platform']}" - if m := SDIST_RE.match(filename): + build = f"{m['build']}-" if m["build"] else "" + return f"wheel:{build}{m['python']}-{m['abi']}-{m['platform']}" + if SDIST_RE.match(filename): return "sdist" msg = f"Unexpected dist name: {filename}" raise ValueError(msg) @@ -78,8 +77,6 @@ def fetch_pypi_sizes() -> tuple[str, dict[str, tuple[str, int]]]: for entry in data.get("urls", []): filename = entry["filename"] key = key_for(filename) - if key is None: - continue sizes[key] = (filename, entry["size"]) return version, sizes @@ -90,8 +87,6 @@ def collect_local_sizes(dist_dir: Path) -> dict[str, tuple[str, int]]: if not path.is_file(): continue key = key_for(path.name) - if key is None: - continue sizes[key] = (path.name, path.stat().st_size) return sizes @@ -103,12 +98,25 @@ def human(n: int | None) -> str: def pct_change(before: int | None, after: int | None) -> str: - if not before or after is None: + if before is None or after is None: return "n/a" - delta = (after - before) / before * 100 + delta = 0 if before == 0 else (after - before) / before * 100 return f"{delta:+.2f}%" +def pct_severity(text: str) -> dict[str, str] | None: + """Return status indicators based on the change percent.""" + if text == "n/a": + return None + pct = float(text.rstrip("%")) + if pct >= 5: + return {"color": "red", "emoji": "🔴"} + if pct > 0: + return {"color": "yellow", "emoji": "🟡"} + else: + return {"color": "green", "emoji": "🟢"} + + def render_table( baseline_label: str, baseline_sizes: dict[str, tuple[str, int]], @@ -116,62 +124,44 @@ def render_table( *, markdown: bool, ) -> str: - color = not markdown table = PrettyTable() table.set_style(TableStyle.MARKDOWN if markdown else TableStyle.SINGLE_BORDER) table.field_names = ["File", "Size before", "Size now", "Change"] table.align = "r" table.align["File"] = "l" - def pct_severity(text: str) -> str | None: - """Return "good" / "warn" / "bad" based on the change percent.""" - if text == "n/a": - return None - pct = float(text.rstrip("%")) - if pct <= 0: - return "good" - if pct >= 5: - return "bad" - if pct >= 1: - return "warn" - return None - - ANSI_COLORS = {"good": "green", "warn": "yellow", "bad": "red"} - EMOJI = {"good": "🟢", "warn": "🟡", "bad": "🔴"} - def style(cells: list[str], role: str) -> list[str]: severity = pct_severity(cells[3]) if markdown: if severity: - cells[3] = f"{EMOJI[severity]} {cells[3]}" + cells[3] = f"{severity['emoji']} {cells[3]}" if role == "orphan": return [f"*{c}*" for c in cells] if role == "summary": return [f"**{c}**" for c in cells] return cells + if role == "orphan": return [colored(c, "dark_grey") for c in cells] + bold_attrs = ["bold"] if role == "summary" else [] + if bold_attrs: + cells[:3] = [colored(c, attrs=bold_attrs) for c in cells[:3]] if severity: - cells[3] = colored(cells[3], ANSI_COLORS[severity], attrs=bold_attrs) + cells[3] = colored(cells[3], severity["color"], attrs=bold_attrs) elif bold_attrs: cells[3] = colored(cells[3], attrs=bold_attrs) - if bold_attrs: - cells[:3] = [colored(c, attrs=bold_attrs) for c in cells[:3]] return cells - keys = sorted(set(baseline_sizes) | set(local_sizes)) + keys = list(set(baseline_sizes) | set(local_sizes)) # Put sdist first for readability keys.sort(key=lambda k: (k != "sdist", k)) - wheel_before = 0 - wheel_after = 0 - total_before = 0 - total_after = 0 - wheel_before_count = 0 - wheel_after_count = 0 - total_after_count = 0 - for i, key in enumerate(keys): + wheel_before = [] + wheel_after = [] + total_before = [] + total_after = [] + for key in keys: baseline_entry = baseline_sizes.get(key) local_entry = local_sizes.get(key) display_name = display_for((local_entry or baseline_entry)[0]) @@ -182,16 +172,13 @@ def style(cells: list[str], role: str) -> list[str]: role = "orphan" else: # Present locally (in both, or newly added): count in totals - total_after += after - total_after_count += 1 + total_after.append(after) if before is not None: - total_before += before + total_before.append(before) if key != "sdist": - wheel_after += after - wheel_after_count += 1 + wheel_after.append(after) if before is not None: - wheel_before += before - wheel_before_count += 1 + wheel_before.append(before) role = "data" cells = [ display_name, @@ -204,15 +191,15 @@ def style(cells: list[str], role: str) -> list[str]: if not markdown: table.add_divider() - if wheel_after_count: - avg_before = wheel_before // wheel_before_count if wheel_before_count else None + if wheel_after: + avg_before = sum(wheel_before) // len(wheel_before) if wheel_before else None table.add_row( style( [ - f"wheel average ({wheel_after_count} wheels)", + f"wheel average ({len(wheel_after)} wheels)", human(avg_before), - human(wheel_after // wheel_after_count), - pct_change(avg_before, wheel_after // wheel_after_count), + human(sum(wheel_after) // len(wheel_after)), + pct_change(avg_before, sum(wheel_after) // len(wheel_after)), ], "summary", ) @@ -220,31 +207,31 @@ def style(cells: list[str], role: str) -> list[str]: table.add_row( style( [ - f"wheel total ({wheel_after_count} wheels)", - human(wheel_before), - human(wheel_after), - pct_change(wheel_before, wheel_after), + f"wheel total ({len(wheel_after)} wheels)", + human(sum(wheel_before)), + human(sum(wheel_after)), + pct_change(sum(wheel_before), sum(wheel_after)), ], "summary", ), divider=not markdown, ) - if total_after_count: + if total_after: table.add_row( style( [ - f"artifacts total ({total_after_count} artifacts)", - human(total_before), - human(total_after), - pct_change(total_before, total_after), + f"artifacts total ({len(total_after)} artifacts)", + human(sum(total_before)), + human(sum(total_after)), + pct_change(sum(total_before), sum(total_after)), ], "summary", ) ) title = f"## Dist size comparison vs {baseline_label}" - if color: + if not markdown: title = colored(title, attrs=["bold"]) return f"{title}\n\n{table.get_string()}\n"