Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions .github/workflows/linux_cuda_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ jobs:
build_config: Release
architecture: x64
dockerfile_path: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1'
docker_image_repo: onnxruntimecuda12manylinuxbuild
extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --parallel --nvcc_threads 4 --flash_nvcc_threads 4 --cuda_version=12.8 --cuda_home=/usr/local/cuda-12.8 --cudnn_home=/usr/local/cuda-12.8 --enable_cuda_profiling --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON'
docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda13_x64_almalinux8_gcc14:20251107.1'
docker_image_repo: onnxruntimecuda13manylinuxbuild
extra_build_flags: '--use_binskim_compliant_compile_flags --build_wheel --parallel --nvcc_threads 4 --flash_nvcc_threads 4 --cuda_version=13.0 --cuda_home=/usr/local/cuda-13.0 --cudnn_home=/usr/local/cuda-13.0 --enable_cuda_profiling --build_java --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON'
python_path_prefix: 'PATH=/opt/python/cp310-cp310/bin:$PATH'
run_tests: false # <<< Do not run tests in this job
upload_build_output: true # <<< Upload the build/Release directory
Expand Down Expand Up @@ -57,8 +57,8 @@ jobs:
id: build_docker_image_step
with:
dockerfile: ${{ github.workspace }}/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
image-name: ghcr.io/microsoft/onnxruntime/onnxruntimecuda12manylinuxbuild
build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1'
image-name: ghcr.io/microsoft/onnxruntime/onnxruntimecuda13manylinuxbuild
build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda13_x64_almalinux8_gcc14:20251107.1'
push: true
azure-container-registry-name: onnxruntimebuildcache
env:
Expand Down Expand Up @@ -91,6 +91,15 @@ jobs:
echo "Warning: perms.txt not found in artifact."
fi

# Verify the GPU is accessible inside Docker before running the full test suite.
# If the NVIDIA Container Toolkit fails to expose /dev/nvidia* devices,
# tests will fail with "CUDA failure 100" and waste 10+ minutes.
- name: Verify GPU access in Docker
run: |
docker run --rm --gpus all \
"${{ steps.build_docker_image_step.outputs.full-image-name }}" \
nvidia-smi

# --- Run Tests using the downloaded build ---
# The run-build-script-in-docker action mounts ${{ runner.temp }} to /onnxruntime_src/build
# So build.py --build_dir build/Release inside the container correctly finds the artifacts.
Expand All @@ -102,5 +111,5 @@ jobs:
build_config: Release
mode: 'test' # Set mode to test
execution_providers: 'cuda'
extra_build_flags: '--use_binskim_compliant_compile_flags --cuda_version=12.8 --cuda_home=/usr/local/cuda-12.8 --cudnn_home=/usr/local/cuda-12.8 --enable_cuda_profiling --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON'
extra_build_flags: '--use_binskim_compliant_compile_flags --cuda_version=13.0 --cuda_home=/usr/local/cuda-13.0 --cudnn_home=/usr/local/cuda-13.0 --enable_cuda_profiling --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 onnxruntime_BUILD_UNIT_TESTS=ON onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON'
python_path_prefix: 'PATH=/opt/python/cp310-cp310/bin:$PATH'
28 changes: 21 additions & 7 deletions .github/workflows/linux_cuda_plugin_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,17 @@ jobs:
build_config: Release
architecture: x64
dockerfile_path: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1'
docker_image_repo: onnxruntimecuda12manylinuxbuild
docker_build_args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda13_x64_almalinux8_gcc14:20251107.1'
docker_image_repo: onnxruntimecuda13manylinuxbuild
extra_build_flags: >-
--use_binskim_compliant_compile_flags
--build_wheel
--parallel
--nvcc_threads 4
--flash_nvcc_threads 4
--cuda_version=12.8
--cuda_home=/usr/local/cuda-12.8
--cudnn_home=/usr/local/cuda-12.8
--cuda_version=13.0
--cuda_home=/usr/local/cuda-13.0
--cudnn_home=/usr/local/cuda-13.0
--enable_cuda_profiling
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
--cmake_extra_defines onnxruntime_QUICK_BUILD=ON
Expand Down Expand Up @@ -67,8 +67,8 @@ jobs:
id: build_docker_image_step
with:
dockerfile: ${{ github.workspace }}/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
image-name: ghcr.io/microsoft/onnxruntime/onnxruntimecuda12manylinuxbuild
build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc14:20251017.1'
image-name: ghcr.io/microsoft/onnxruntime/onnxruntimecuda13manylinuxbuild
build-args: '--build-arg BASEIMAGE=onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda13_x64_almalinux8_gcc14:20251107.1'
push: true
azure-container-registry-name: onnxruntimebuildcache
env:
Expand Down Expand Up @@ -100,6 +100,15 @@ jobs:
echo "Warning: perms.txt not found in artifact."
fi

# Verify the GPU is accessible inside Docker before running the full test suite.
# If the NVIDIA Container Toolkit fails to expose /dev/nvidia* devices,
# tests will fail with "CUDA failure 100" and waste 10+ minutes.
- name: Verify GPU access in Docker
run: |
docker run --rm --gpus all \
"${{ steps.build_docker_image_step.outputs.full-image-name }}" \
nvidia-smi

# --- Install the ORT wheel and run CUDA plugin EP tests ---
- name: Run CUDA Plugin EP Python Tests
run: |
Expand All @@ -111,6 +120,11 @@ jobs:
bash -c "
set -ex
export PATH=/opt/python/cp312-cp312/bin:\$PATH
# Ensure libcudart.so.13 is findable regardless of host-runner NVIDIA Container Toolkit configuration.
# The CUDA runtime library lives in the container image at /usr/local/cuda-13.0/lib64, but the
# LD_LIBRARY_PATH may not include this path when the runner's NVIDIA toolkit only mounts driver
# libraries at /usr/local/nvidia/lib64.
export LD_LIBRARY_PATH=/usr/local/cuda-13.0/lib64:\${LD_LIBRARY_PATH:-}

# Install the ORT wheel
python -m pip install /build/Release/Release/dist/onnxruntime*.whl
Expand Down
77 changes: 77 additions & 0 deletions .github/workflows/nightly_webgpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
name: Nightly ONNX Runtime WebGPU Builds

on:
schedule:
- cron: '0 9 * * *' # Daily at 09:00 UTC
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
webgpu_shader_key_validation:
runs-on: [
"self-hosted",
"1ES.Pool=onnxruntime-github-Win2022-GPU-A10",
"JobId=webgpu_shader_validation-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}"
]
timeout-minutes: 90
env:
ALLOW_RELEASED_ONNX_OPSET_ONLY: "0"
ONNXRUNTIME_TEST_GPU_DEVICE_ID: "0"
steps:
- name: Checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
submodules: none

- name: Setup Python
uses: actions/setup-python@v6
with:
python-version: "3.12"
architecture: x64

- name: Locate vcvarsall and Setup Env
uses: ./.github/actions/locate-vcvarsall-and-setup-env
with:
architecture: x64

- name: Install python modules
run: python -m pip install -r tools\ci_build\github\windows\python\requirements.txt
shell: cmd
working-directory: ${{ github.workspace }}

- name: Setup Node.js
uses: actions/setup-node@v6
with:
node-version: "24"

- name: Build and Test
shell: pwsh
run: |
$env:ORT_WEBGPU_EP_SHADER_DUMP_FILE = "${{ github.workspace }}\RelWithDebInfo\RelWithDebInfo\shader_dump.log"

python.exe ${{ github.workspace }}\tools\ci_build\build.py `
--config RelWithDebInfo `
--build_dir ${{ github.workspace }} `
--use_binskim_compliant_compile_flags `
--cmake_generator "Visual Studio 17 2022" `
--build_shared_lib `
--use_webgpu `
--wgsl_template static `
--cmake_extra_defines onnxruntime_BUILD_DAWN_SHARED_LIBRARY=ON `
--update `
--build --parallel `
--test

- name: Check log file
shell: cmd
run: |
dir ${{ github.workspace }}\RelWithDebInfo\RelWithDebInfo\shader_dump.log

- name: Validate shader keys
uses: ./.github/actions/webgpu-validate-shader-key
with:
log_file_path: ${{ github.workspace }}\RelWithDebInfo\RelWithDebInfo\shader_dump.log
8 changes: 8 additions & 0 deletions .github/workflows/windows_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ jobs:
runs-on: [
"self-hosted",
"1ES.Pool=onnxruntime-github-Win2022-GPU-A10",
"1ES.ImageOverride=onnxruntime-Win-CPU-VS2022-Latest-NVMe-x64-test",
"JobId=windows-cuda-test-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}"
]
steps:
Expand Down Expand Up @@ -222,6 +223,13 @@ jobs:
with:
whl-directory: ${{ runner.temp }}\build\RelWithDebInfo\RelWithDebInfo\dist

# Verify the GPU is accessible before running the full test suite.
# If the NVIDIA driver is not available, tests will fail with
# "CUDA failure 100" and waste significant time.
- name: Verify GPU access
shell: pwsh
run: nvidia-smi

- name: Run Tests
working-directory: ${{ runner.temp }}
run: |
Expand Down
8 changes: 8 additions & 0 deletions .github/workflows/windows_cuda_plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ jobs:
runs-on: [
"self-hosted",
"1ES.Pool=onnxruntime-github-Win2022-GPU-A10",
"1ES.ImageOverride=onnxruntime-Win-CPU-VS2022-Latest-NVMe-x64-test",
"JobId=windows-cuda-plugin-test-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}"
]
steps:
Expand Down Expand Up @@ -187,6 +188,13 @@ jobs:
with:
whl-directory: ${{ runner.temp }}\build\Release\Release\dist

# Verify the GPU is accessible before running the full test suite.
# If the NVIDIA driver is not available, tests will fail with
# "CUDA failure 100" and waste significant time.
- name: Verify GPU access
shell: pwsh
run: nvidia-smi

- name: Run CUDA Plugin EP Python Tests
working-directory: ${{ github.workspace }}\onnxruntime\test\python\transformers
shell: pwsh
Expand Down
1 change: 1 addition & 0 deletions cmake/onnxruntime_mlas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ onnxruntime_add_static_library(onnxruntime_mlas
${MLAS_SRC_DIR}/qlutgemm.cpp
${MLAS_SRC_DIR}/sqnbitgemm_q8_block.h
${MLAS_SRC_DIR}/flashattn.cpp
${MLAS_SRC_DIR}/flashattn_qkv.cpp
${MLAS_SRC_DIR}/qkv_quant.cpp
${MLAS_SRC_DIR}/cast.cpp
${MLAS_SRC_DIR}/layernorm.cpp
Expand Down
1 change: 1 addition & 0 deletions cmake/onnxruntime_python.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ if (onnxruntime_USE_CUDA AND NOT WIN32)
)
include(cutlass)
target_include_directories(onnxruntime_pybind11_state PRIVATE ${cutlass_SOURCE_DIR}/include)
target_link_libraries(onnxruntime_pybind11_state PRIVATE CUDA::cudart)
endif()
if (onnxruntime_USE_CUDA AND WIN32)
target_compile_definitions(onnxruntime_pybind11_state PRIVATE ORT_NO_CUDA_IN_PYBIND)
Expand Down
Loading
Loading