diff --git a/.github/workflows/build-and-release.yaml b/.github/workflows/build-and-release.yaml
index 6cbac0cb1..557050731 100644
--- a/.github/workflows/build-and-release.yaml
+++ b/.github/workflows/build-and-release.yaml
@@ -11,7 +11,10 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-22.04, windows-2022, macos-14, macos-15]
+        # Keep the Apple Silicon release build on macos-14 and use an explicit
+        # Intel runner for the x86_64 wheel so each macOS job produces a
+        # distinct artifact.
+        os: [ubuntu-22.04, windows-2022, macos-14, macos-15-intel]
 
     steps:
       - uses: actions/checkout@v4
@@ -82,7 +85,9 @@ jobs:
           # Keep native arm64 builds on a portable CPU baseline instead of
           # tuning wheels to the hosted runner.
           CIBW_ENVIRONMENT: CMAKE_ARGS="-DGGML_NATIVE=off"
-          CIBW_BUILD: "cp38-* cp39-* cp310-* cp311-* cp312-*"
+          # wheel.py-api = "py3" means one aarch64 build covers every supported
+          # CPython version, so building more just duplicates the artifact name.
+          CIBW_BUILD: "cp38-*"
         with:
           output-dir: wheelhouse
 
@@ -93,27 +98,8 @@ jobs:
           path: ./wheelhouse/*.whl
 
   build_wheels_riscv64:
-    name: Build riscv64 wheels (${{ matrix.shard.name }})
+    name: Build riscv64 wheels
     runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        shard:
-          - name: cp310
-            build: "cp310-*"
-            artifact: wheels_riscv64_cp310
-          - name: cp311
-            build: "cp311-*"
-            artifact: wheels_riscv64_cp311
-          - name: cp312
-            build: "cp312-*"
-            artifact: wheels_riscv64_cp312
-          - name: cp313
-            build: "cp313-*"
-            artifact: wheels_riscv64_cp313
-          - name: cp314
-            build: "cp314-*"
-            artifact: wheels_riscv64_cp314
     steps:
       - uses: actions/checkout@v4
         with:
@@ -133,16 +119,16 @@ jobs:
           # Build riscv64 wheels against a conservative baseline instead of
           # enabling RVV-related extensions from the build container.
           CIBW_ENVIRONMENT: CMAKE_ARGS="-DGGML_NATIVE=off -DGGML_RVV=off -DGGML_RV_ZFH=off -DGGML_RV_ZVFH=off -DGGML_RV_ZICBOP=off -DGGML_RV_ZIHINTPAUSE=off"
-          # Split the emulated riscv64 build into one Python version per job
-          # to minimize wall-clock time without changing the release artifacts.
-          CIBW_BUILD: ${{ matrix.shard.build }}
+          # wheel.py-api = "py3" means one riscv64 build covers every supported
+          # CPython version, so sharding by interpreter only duplicates assets.
+          CIBW_BUILD: "cp310-*"
         with:
           output-dir: wheelhouse
 
       - name: Upload wheels as artifacts
         uses: actions/upload-artifact@v4
         with:
-          name: ${{ matrix.shard.artifact }}
+          name: wheels_riscv64
           path: ./wheelhouse/*.whl
 
   build_sdist:
diff --git a/.github/workflows/build-wheels-cuda.yaml b/.github/workflows/build-wheels-cuda.yaml
index 17daaa12a..548076300 100644
--- a/.github/workflows/build-wheels-cuda.yaml
+++ b/.github/workflows/build-wheels-cuda.yaml
@@ -21,7 +21,9 @@ jobs:
         run: |
           $matrix = @{
               'os' = @('ubuntu-22.04') #, 'windows-2022')
-              'pyver' = @("3.9", "3.10", "3.11", "3.12")
+              # wheel.py-api = "py3" makes the CUDA wheel interpreter-agnostic,
+              # so one builder per toolkit version is sufficient.
+              'pyver' = @("3.9")
               'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1") #, "12.5.1", "12.6.1")
               'releasetag' = @("basic")
           }
diff --git a/.github/workflows/build-wheels-metal.yaml b/.github/workflows/build-wheels-metal.yaml
index 98f511e4a..4157469a2 100644
--- a/.github/workflows/build-wheels-metal.yaml
+++ b/.github/workflows/build-wheels-metal.yaml
@@ -38,14 +38,17 @@ jobs:
           CIBW_REPAIR_WHEEL_COMMAND: ""
           CIBW_ARCHS: "arm64"
           CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=on -DCMAKE_CROSSCOMPILING=ON"
-          CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
+          # wheel.py-api = "py3" means one Metal wheel covers every supported
+          # CPython version, so extra selectors only repeat the same artifact.
+          CIBW_BUILD: "cp39-*"
         with:
           package-dir: .
           output-dir: wheelhouse2
 
       - uses: actions/upload-artifact@v4
+        if: matrix.os == 'macos-14'
         with:
-          name: wheels-mac_${{ matrix.os }}
+          name: wheels-mac
           path: ./wheelhouse2/*.whl
 
   release:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9bc8d6654..6b214fe93 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+- fix(ci): Avoid duplicate `py3-none` release builds across wheel workflows by @abetlen in #2172
+
 ## [0.3.20]
 
 - refactor: Replace deprecated llama.cpp references in library, docs, and examples by @abetlen in #2170