From fe8f264dff736d152aa558b451a155f7cc7ea819 Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com>
Date: Mon, 27 Apr 2026 13:39:33 +0300
Subject: [PATCH 1/2] Compare dist sizes vs latest PyPI release

Co-authored-by: Andrew Murray <3112309+radarhere@users.noreply.github.com>
---
 .github/compare-dist-sizes.py | 284 ++++++++++++++++++++++++++++++++++
 .github/workflows/wheels.yml  |  23 +++
 2 files changed, 307 insertions(+)
 create mode 100644 .github/compare-dist-sizes.py
diff --git a/.github/compare-dist-sizes.py b/.github/compare-dist-sizes.py
new file mode 100644
index 00000000000..90df39f78b6
--- /dev/null
+++ b/.github/compare-dist-sizes.py
@@ -0,0 +1,284 @@
+"""Compare sizes of newly-built dists against the latest release on PyPI.
+
+Fetches file sizes for the latest Pillow release from the PyPI JSON API
+(no download required) and compares them to a directory of freshly-built
+wheels and sdist. Outputs a table to stdout (and to
+`$GITHUB_STEP_SUMMARY` if set).
+
+Usage:
+    `uv run .github/compare-dist-sizes.py <dist-dir>`
+"""
+
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "humanize",
+#     "prettytable>=3.16",
+#     "termcolor",
+# ]
+# ///
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import re
+import sys
+import urllib.request
+from pathlib import Path
+
+import humanize
+from prettytable import PrettyTable, TableStyle
+from termcolor import colored
+
+PYPI_JSON_URL = "https://pypi.org/pypi/pillow/json"
+
+# Wheel filename: {distribution}-{version}(-{build})?-{python}-{abi}-{platform}.whl
+# sdist filename: {distribution}-{version}.tar.gz
+WHEEL_RE = re.compile(
+    r"^(?P<dist>[^-]+)-(?P<version>[^-]+)"
+    r"(?:-(?P<build>\d[^-]*))?"
+    r"-(?P<python>[^-]+)-(?P<abi>[^-]+)-(?P<platform>[^-]+)\.whl$",
+    re.IGNORECASE,
+)
+SDIST_RE = re.compile(
+    r"^(?P<dist>[^-]+)-(?P<version>.+)\.tar\.gz$",
+    re.IGNORECASE,
+)
+
+
+def key_for(filename: str) -> str | None:
+    """Return a version-independent identifier for a dist file."""
+    if m := WHEEL_RE.match(filename):
+        build = f"-{m['build']}" if m["build"] else ""
+        return f"wheel:{build}-{m['python']}-{m['abi']}-{m['platform']}"
+    if m := SDIST_RE.match(filename):
+        return "sdist"
+    msg = f"Unexpected dist name: {filename}"
+    raise ValueError(msg)
+
+
+def display_for(filename: str) -> str:
+    """Strip the `pillow-{version}-` prefix for compact table display."""
+    if m := WHEEL_RE.match(filename):
+        build = f"{m['build']}-" if m["build"] else ""
+        return f"{build}{m['python']}-{m['abi']}-{m['platform']}.whl"
+    if SDIST_RE.match(filename):
+        return "sdist (.tar.gz)"
+    return filename
+
+
+def fetch_pypi_sizes() -> tuple[str, dict[str, tuple[str, int]]]:
+    """Return (version, {key: (filename, size)}) for the latest PyPI release."""
+    with urllib.request.urlopen(PYPI_JSON_URL) as response:
+        data = json.load(response)
+    version = data["info"]["version"]
+    sizes: dict[str, tuple[str, int]] = {}
+    for entry in data.get("urls", []):
+        filename = entry["filename"]
+        key = key_for(filename)
+        if key is None:
+            continue
+        sizes[key] = (filename, entry["size"])
+    return version, sizes
+
+
+def collect_local_sizes(dist_dir: Path) -> dict[str, tuple[str, int]]:
+    sizes: dict[str, tuple[str, int]] = {}
+    for path in sorted(dist_dir.iterdir()):
+        if not path.is_file():
+            continue
+        key = key_for(path.name)
+        if key is None:
+            continue
+        sizes[key] = (path.name, path.stat().st_size)
+    return sizes
+
+
+def human(n: int | None) -> str:
+    if n is None:
+        return "n/a"
+    return humanize.naturalsize(n)
+
+
+def pct_change(before: int | None, after: int | None) -> str:
+    if not before or after is None:
+        return "n/a"
+    delta = (after - before) / before * 100
+    return f"{delta:+.2f}%"
+
+
+def render_table(
+    baseline_label: str,
+    baseline_sizes: dict[str, tuple[str, int]],
+    local_sizes: dict[str, tuple[str, int]],
+    *,
+    markdown: bool,
+) -> str:
+    color = not markdown
+    table = PrettyTable()
+    table.set_style(TableStyle.MARKDOWN if markdown else TableStyle.SINGLE_BORDER)
+    table.field_names = ["File", "Size before", "Size now", "Change"]
+    table.align = "r"
+    table.align["File"] = "l"
+
+    def pct_severity(text: str) -> str | None:
+        """Return "good" / "warn" / "bad" based on the change percent."""
+        if text == "n/a":
+            return None
+        pct = float(text.rstrip("%"))
+        if pct <= 0:
+            return "good"
+        if pct >= 5:
+            return "bad"
+        if pct >= 1:
+            return "warn"
+        return None
+
+    ANSI_COLORS = {"good": "green", "warn": "yellow", "bad": "red"}
+    EMOJI = {"good": "🟢", "warn": "🟡", "bad": "🔴"}
+
+    def style(cells: list[str], role: str) -> list[str]:
+        severity = pct_severity(cells[3])
+        if markdown:
+            if severity:
+                cells[3] = f"{EMOJI[severity]} {cells[3]}"
+            if role == "orphan":
+                return [f"*{c}*" for c in cells]
+            if role == "summary":
+                return [f"**{c}**" for c in cells]
+            return cells
+        if role == "orphan":
+            return [colored(c, "dark_grey") for c in cells]
+        bold_attrs = ["bold"] if role == "summary" else []
+        if severity:
+            cells[3] = colored(cells[3], ANSI_COLORS[severity], attrs=bold_attrs)
+        elif bold_attrs:
+            cells[3] = colored(cells[3], attrs=bold_attrs)
+        if bold_attrs:
+            cells[:3] = [colored(c, attrs=bold_attrs) for c in cells[:3]]
+        return cells
+
+    keys = sorted(set(baseline_sizes) | set(local_sizes))
+    # Put sdist first for readability
+    keys.sort(key=lambda k: (k != "sdist", k))
+
+    wheel_before = 0
+    wheel_after = 0
+    total_before = 0
+    total_after = 0
+    wheel_before_count = 0
+    wheel_after_count = 0
+    total_after_count = 0
+    for i, key in enumerate(keys):
+        baseline_entry = baseline_sizes.get(key)
+        local_entry = local_sizes.get(key)
+        display_name = display_for((local_entry or baseline_entry)[0])
+        before = baseline_entry[1] if baseline_entry else None
+        after = local_entry[1] if local_entry else None
+        if after is None:
+            # Removed since baseline: ignore in totals
+            role = "orphan"
+        else:
+            # Present locally (in both, or newly added): count in totals
+            total_after += after
+            total_after_count += 1
+            if before is not None:
+                total_before += before
+            if key != "sdist":
+                wheel_after += after
+                wheel_after_count += 1
+                if before is not None:
+                    wheel_before += before
+                    wheel_before_count += 1
+            role = "data"
+        cells = [
+            display_name,
+            human(before),
+            human(after),
+            pct_change(before, after),
+        ]
+        table.add_row(style(cells, role))
+
+    if not markdown:
+        table.add_divider()
+
+    if wheel_after_count:
+        avg_before = wheel_before // wheel_before_count if wheel_before_count else None
+        table.add_row(
+            style(
+                [
+                    f"wheel average ({wheel_after_count} wheels)",
+                    human(avg_before),
+                    human(wheel_after // wheel_after_count),
+                    pct_change(avg_before, wheel_after // wheel_after_count),
+                ],
+                "summary",
+            )
+        )
+        table.add_row(
+            style(
+                [
+                    f"wheel total ({wheel_after_count} wheels)",
+                    human(wheel_before),
+                    human(wheel_after),
+                    pct_change(wheel_before, wheel_after),
+                ],
+                "summary",
+            ),
+            divider=not markdown,
+        )
+
+    if total_after_count:
+        table.add_row(
+            style(
+                [
+                    f"artifacts total ({total_after_count} artifacts)",
+                    human(total_before),
+                    human(total_after),
+                    pct_change(total_before, total_after),
+                ],
+                "summary",
+            )
+        )
+
+    title = f"## Dist size comparison vs {baseline_label}"
+    if color:
+        title = colored(title, attrs=["bold"])
+    return f"{title}\n\n{table.get_string()}\n"
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "dist_dir",
+        type=Path,
+        help="Directory containing newly-built wheels and sdist",
+    )
+    args = parser.parse_args()
+
+    if not args.dist_dir.is_dir():
+        print(f"error: {args.dist_dir} is not a directory", file=sys.stderr)
+        return 1
+
+    baseline_version, baseline_sizes = fetch_pypi_sizes()
+    baseline_label = f"Pillow {baseline_version} on PyPI"
+
+    local_sizes = collect_local_sizes(args.dist_dir)
+
+    print(render_table(baseline_label, baseline_sizes, local_sizes, markdown=False))
+
+    if summary_path := os.environ.get("GITHUB_STEP_SUMMARY"):
+        with open(summary_path, "a", encoding="utf-8") as f:
+            f.write(
+                render_table(baseline_label, baseline_sizes, local_sizes, markdown=True)
+            )
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index fa3271de0c4..e2008ac6cf9 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -13,6 +13,7 @@ on:
     paths: &paths
       - ".ci/requirements-cibw.txt"
       - ".ci/requirements-sbom.txt"
+      - ".github/compare-dist-sizes.py"
       - ".github/dependencies.json"
       - ".github/generate-sbom.py"
       - ".github/workflows/wheels*"
@@ -255,6 +256,28 @@ jobs:
           echo $files
           [ "$files" -eq $EXPECTED_DISTS ] || exit 1
 
+  compare-dist-sizes:
+    needs: [build-native-wheels, windows, sdist]
+    runs-on: ubuntu-latest
+    name: Compare dist sizes vs PyPI
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+
+      - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
+        with:
+          enable-cache: false
+
+      - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+        with:
+          pattern: dist-*
+          path: dist
+          merge-multiple: true
+
+      - name: Compare dist sizes vs latest PyPI release
+        run: uv run .github/compare-dist-sizes.py dist
+
   scientific-python-nightly-wheels-publish:
     if: github.event.repository.fork == false && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
     needs: count-dists

From 260509628c3cbda2b3c5808abbbb0524f7907c5c Mon Sep 17 00:00:00 2001
From: Andrew Murray <3112309+radarhere@users.noreply.github.com>
Date: Tue, 12 May 2026 02:14:59 +1000
Subject: [PATCH 2/2] Compare dist sizes (#172)

Co-authored-by: Andrew Murray <radarhere@users.noreply.github.com>
Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com>
---
 .github/compare-dist-sizes.py | 117 +++++++++++++++-------------------
 1 file changed, 52 insertions(+), 65 deletions(-)

diff --git a/.github/compare-dist-sizes.py b/.github/compare-dist-sizes.py
index 90df39f78b6..ed7b9be0ed6 100644
--- a/.github/compare-dist-sizes.py
+++ b/.github/compare-dist-sizes.py
@@ -13,7 +13,7 @@
 # requires-python = ">=3.10"
 # dependencies = [
 #     "humanize",
-#     "prettytable>=3.16",
+#     "prettytable",
 #     "termcolor",
 # ]
 # ///
@@ -37,8 +37,7 @@
 # Wheel filename: {distribution}-{version}(-{build})?-{python}-{abi}-{platform}.whl
 # sdist filename: {distribution}-{version}.tar.gz
 WHEEL_RE = re.compile(
-    r"^(?P<dist>[^-]+)-(?P<version>[^-]+)"
-    r"(?:-(?P<build>\d[^-]*))?"
+    r"^[^-]+-[^-]+(?:-(?P<build>\d[^-]*))?"
     r"-(?P<python>[^-]+)-(?P<abi>[^-]+)-(?P<platform>[^-]+)\.whl$",
     re.IGNORECASE,
 )
@@ -48,12 +47,12 @@
 )
 
 
-def key_for(filename: str) -> str | None:
+def key_for(filename: str) -> str:
     """Return a version-independent identifier for a dist file."""
     if m := WHEEL_RE.match(filename):
-        build = f"-{m['build']}" if m["build"] else ""
-        return f"wheel:{build}-{m['python']}-{m['abi']}-{m['platform']}"
-    if m := SDIST_RE.match(filename):
+        build = f"{m['build']}-" if m["build"] else ""
+        return f"wheel:{build}{m['python']}-{m['abi']}-{m['platform']}"
+    if SDIST_RE.match(filename):
         return "sdist"
     msg = f"Unexpected dist name: {filename}"
     raise ValueError(msg)
@@ -78,8 +77,6 @@ def fetch_pypi_sizes() -> tuple[str, dict[str, tuple[str, int]]]:
     for entry in data.get("urls", []):
         filename = entry["filename"]
         key = key_for(filename)
-        if key is None:
-            continue
         sizes[key] = (filename, entry["size"])
     return version, sizes
 
@@ -90,8 +87,6 @@ def collect_local_sizes(dist_dir: Path) -> dict[str, tuple[str, int]]:
         if not path.is_file():
             continue
         key = key_for(path.name)
-        if key is None:
-            continue
         sizes[key] = (path.name, path.stat().st_size)
     return sizes
 
@@ -103,12 +98,25 @@ def human(n: int | None) -> str:
 
 
 def pct_change(before: int | None, after: int | None) -> str:
-    if not before or after is None:
+    if before is None or after is None:
         return "n/a"
-    delta = (after - before) / before * 100
+    delta = 0 if before == 0 else (after - before) / before * 100
     return f"{delta:+.2f}%"
 
 
+def pct_severity(text: str) -> dict[str, str] | None:
+    """Return status indicators based on the change percent."""
+    if text == "n/a":
+        return None
+    pct = float(text.rstrip("%"))
+    if pct >= 5:
+        return {"color": "red", "emoji": "🔴"}
+    if pct > 0:
+        return {"color": "yellow", "emoji": "🟡"}
+    else:
+        return {"color": "green", "emoji": "🟢"}
+
+
 def render_table(
     baseline_label: str,
     baseline_sizes: dict[str, tuple[str, int]],
@@ -116,62 +124,44 @@ def render_table(
     *,
     markdown: bool,
 ) -> str:
-    color = not markdown
     table = PrettyTable()
     table.set_style(TableStyle.MARKDOWN if markdown else TableStyle.SINGLE_BORDER)
     table.field_names = ["File", "Size before", "Size now", "Change"]
     table.align = "r"
     table.align["File"] = "l"
 
-    def pct_severity(text: str) -> str | None:
-        """Return "good" / "warn" / "bad" based on the change percent."""
-        if text == "n/a":
-            return None
-        pct = float(text.rstrip("%"))
-        if pct <= 0:
-            return "good"
-        if pct >= 5:
-            return "bad"
-        if pct >= 1:
-            return "warn"
-        return None
-
-    ANSI_COLORS = {"good": "green", "warn": "yellow", "bad": "red"}
-    EMOJI = {"good": "🟢", "warn": "🟡", "bad": "🔴"}
-
     def style(cells: list[str], role: str) -> list[str]:
         severity = pct_severity(cells[3])
         if markdown:
             if severity:
-                cells[3] = f"{EMOJI[severity]} {cells[3]}"
+                cells[3] = f"{severity['emoji']} {cells[3]}"
             if role == "orphan":
                 return [f"*{c}*" for c in cells]
             if role == "summary":
                 return [f"**{c}**" for c in cells]
             return cells
+
         if role == "orphan":
             return [colored(c, "dark_grey") for c in cells]
+
         bold_attrs = ["bold"] if role == "summary" else []
+        if bold_attrs:
+            cells[:3] = [colored(c, attrs=bold_attrs) for c in cells[:3]]
         if severity:
-            cells[3] = colored(cells[3], ANSI_COLORS[severity], attrs=bold_attrs)
+            cells[3] = colored(cells[3], severity["color"], attrs=bold_attrs)
         elif bold_attrs:
             cells[3] = colored(cells[3], attrs=bold_attrs)
-        if bold_attrs:
-            cells[:3] = [colored(c, attrs=bold_attrs) for c in cells[:3]]
         return cells
 
-    keys = sorted(set(baseline_sizes) | set(local_sizes))
+    keys = list(set(baseline_sizes) | set(local_sizes))
     # Put sdist first for readability
     keys.sort(key=lambda k: (k != "sdist", k))
 
-    wheel_before = 0
-    wheel_after = 0
-    total_before = 0
-    total_after = 0
-    wheel_before_count = 0
-    wheel_after_count = 0
-    total_after_count = 0
-    for i, key in enumerate(keys):
+    wheel_before = []
+    wheel_after = []
+    total_before = []
+    total_after = []
+    for key in keys:
         baseline_entry = baseline_sizes.get(key)
         local_entry = local_sizes.get(key)
         display_name = display_for((local_entry or baseline_entry)[0])
@@ -182,16 +172,13 @@ def style(cells: list[str], role: str) -> list[str]:
             role = "orphan"
         else:
             # Present locally (in both, or newly added): count in totals
-            total_after += after
-            total_after_count += 1
+            total_after.append(after)
             if before is not None:
-                total_before += before
+                total_before.append(before)
             if key != "sdist":
-                wheel_after += after
-                wheel_after_count += 1
+                wheel_after.append(after)
                 if before is not None:
-                    wheel_before += before
-                    wheel_before_count += 1
+                    wheel_before.append(before)
             role = "data"
         cells = [
             display_name,
@@ -204,15 +191,15 @@ def style(cells: list[str], role: str) -> list[str]:
     if not markdown:
         table.add_divider()
 
-    if wheel_after_count:
-        avg_before = wheel_before // wheel_before_count if wheel_before_count else None
+    if wheel_after:
+        avg_before = sum(wheel_before) // len(wheel_before) if wheel_before else None
         table.add_row(
             style(
                 [
-                    f"wheel average ({wheel_after_count} wheels)",
+                    f"wheel average ({len(wheel_after)} wheels)",
                     human(avg_before),
-                    human(wheel_after // wheel_after_count),
-                    pct_change(avg_before, wheel_after // wheel_after_count),
+                    human(sum(wheel_after) // len(wheel_after)),
+                    pct_change(avg_before, sum(wheel_after) // len(wheel_after)),
                 ],
                 "summary",
             )
@@ -220,31 +207,31 @@ def style(cells: list[str], role: str) -> list[str]:
         table.add_row(
             style(
                 [
-                    f"wheel total ({wheel_after_count} wheels)",
-                    human(wheel_before),
-                    human(wheel_after),
-                    pct_change(wheel_before, wheel_after),
+                    f"wheel total ({len(wheel_after)} wheels)",
+                    human(sum(wheel_before)),
+                    human(sum(wheel_after)),
+                    pct_change(sum(wheel_before), sum(wheel_after)),
                 ],
                 "summary",
             ),
             divider=not markdown,
         )
 
-    if total_after_count:
+    if total_after:
         table.add_row(
             style(
                 [
-                    f"artifacts total ({total_after_count} artifacts)",
-                    human(total_before),
-                    human(total_after),
-                    pct_change(total_before, total_after),
+                    f"artifacts total ({len(total_after)} artifacts)",
+                    human(sum(total_before)),
+                    human(sum(total_after)),
+                    pct_change(sum(total_before), sum(total_after)),
                 ],
                 "summary",
             )
         )
 
     title = f"## Dist size comparison vs {baseline_label}"
-    if color:
+    if not markdown:
         title = colored(title, attrs=["bold"])
     return f"{title}\n\n{table.get_string()}\n"