Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 95 additions & 75 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,72 +18,78 @@ jobs:
- name: 'Dependencies'
run: |
apt-get update
apt-get install -y git python3.9 pip cudnn9-cuda-12
apt-get install -y git python3.9 pip cudnn9-cuda-12 ccache
pip install cmake==3.21.0 pybind11[global] ninja
- name: 'Checkout'
uses: actions/checkout@v3
with:
submodules: recursive
- name: ccache
uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad
- uses: actions/cache@v4
with:
path: /root/.ccache
key: ccache-${{ runner.os }}-core-${{ github.ref }}
restore-keys: ccache-${{ runner.os }}-core-
- name: 'Build'
run: NVTE_USE_CCACHE=1 NVTE_CCACHE_BIN=sccache pip install --no-build-isolation . -v
run: NVTE_USE_CCACHE=1 pip install --no-build-isolation . -v
env:
NVTE_FRAMEWORK: none
MAX_JOBS: 1
SCCACHE_GHA_ENABLED: "true"
- name: 'Sanity check'
run: python3 -c "import transformer_engine"
working-directory: /
pytorch:
name: 'PyTorch'
runs-on: ubuntu-latest
container:
image: ubuntu:24.04
options: --user root
steps:
- name: Move /var/lib/docker/
shell: bash -euxo pipefail {0}
run: sudo mv /var/lib/docker/ "${GITHUB_WORKSPACE}/docker"

- name: Maximize build space
uses: easimon/maximize-build-space@c28619d8999a147d5e09c1199f84ff6af6ad5794
with:
root-reserve-mb: 5120
temp-reserve-mb: 32
swap-size-mb: 10240
remove-dotnet: 'true'
remove-android: 'true'
remove-haskell: 'true'
remove-codeql: 'true'
build-mount-path: '/var/lib/docker/'

- name: Restore /var/lib/docker/
shell: bash -euxo pipefail {0}
run: sudo sh -c "mv ${GITHUB_WORKSPACE}/docker/* /var/lib/docker"

- name: 'Container dependencies'
run: apt-get update && apt-get install -y git python3 python3-pip ccache
- name: 'Checkout'
uses: actions/checkout@v3
with:
submodules: recursive

- name: Start named container
run: |
docker run -v $(pwd):$(pwd) -w $(pwd) --name builder -d nvcr.io/nvidia/cuda:12.8.0-devel-ubuntu22.04 sleep infinity
- name: "Disk space check before dependencies"
run: df -lh

- name: 'Dependencies'
run: |
docker exec builder bash -c '\
apt-get update && \
apt-get install -y git python3.9 pip cudnn9-cuda-12 && \
pip install cmake torch ninja pydantic importlib-metadata>=1.0 packaging pybind11 numpy einops onnxscript && \
apt-get clean \
'
export PIP_BREAK_SYSTEM_PACKAGES=1
pip install cmake ninja pybind11 numpy packaging
pip install torch --index-url https://download.pytorch.org/whl/cu130
pip install "nvidia-cuda-nvcc<13.1" "nvidia-cuda-cccl<13.1" "nvidia-cuda-crt<13.1" "nvidia-nvvm<13.1" "nvidia-cuda-profiler-api<13.1" "nvidia-nvml-dev<13.1"
export CUDA_PATH=$(python3 -c "import nvidia; print(list(nvidia.__path__)[0] + '/cu13')")
echo $CUDA_PATH
ln -s $CUDA_PATH/lib/libcudart.so.13 $CUDA_PATH/lib/libcudart.so
ln -s $CUDA_PATH/lib/libcublas.so.13 $CUDA_PATH/lib/libcublas.so
ln -s $CUDA_PATH/../nccl/lib/libnccl.so.2 $CUDA_PATH/../nccl/lib/libnccl.so
ln -s $CUDA_PATH/lib $CUDA_PATH/lib64

- name: "Disk space check after dependencies"
run: df -lh
- uses: actions/cache@v4
with:
path: /root/.ccache
key: ccache-${{ runner.os }}-pytorch-${{ github.ref }}
restore-keys: ccache-${{ runner.os }}-pytorch-
- name: "Disk space check after dependencies and ccache"
run: df -lh
- name: 'Build'
run: docker exec builder bash -c 'pip install --no-build-isolation . -v --no-deps'
run: |
export CUDA_PATH=$(python3 -c "import nvidia; print(list(nvidia.__path__)[0] + '/cu13')")
export CUDNN_PATH=$(python3 -c "import nvidia; print(list(nvidia.__path__)[0] + '/cudnn')")
export PATH=$CUDA_PATH/bin:$PATH
export NVTE_BUILD_USE_NVIDIA_WHEELS=1
export PIP_BREAK_SYSTEM_PACKAGES=1
NVTE_USE_CCACHE=1 pip install --no-build-isolation . -v
env:
NVTE_FRAMEWORK: pytorch
MAX_JOBS: 1
MAX_JOBS: 2
CCACHE_DIR: /root/.ccache
CCACHE_MAXSIZE: 2G
- name: 'Sanity check'
run: docker exec builder bash -c 'python3 tests/pytorch/test_sanity_import.py'
run: python3 tests/pytorch/test_sanity_import.py
jax:
name: 'JAX'
runs-on: ubuntu-latest
Expand All @@ -92,65 +98,79 @@ jobs:
options: --user root
steps:
- name: 'Dependencies'
run: pip install cmake==3.21.0 pybind11[global]
run: |
pip install cmake==3.21.0 pybind11[global]
apt-get update && apt-get install -y ccache
- name: 'Checkout'
uses: actions/checkout@v3
with:
submodules: recursive
- name: ccache
uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad
- uses: actions/cache@v4
with:
path: /root/.ccache
key: ccache-${{ runner.os }}-jax-${{ github.ref }}
restore-keys: ccache-${{ runner.os }}-jax-
- name: 'Build'
run: |
NVTE_CCACHE_BIN=sccache NVTE_USE_CCACHE=1 pip install --no-build-isolation . -v
NVTE_USE_CCACHE=1 pip install --no-build-isolation . -v
env:
NVTE_FRAMEWORK: jax
MAX_JOBS: 1
SCCACHE_GHA_ENABLED: "true"
- name: 'Sanity check'
run: python3 tests/jax/test_sanity_import.py
all:
name: 'All'
runs-on: ubuntu-latest
container:
image: ubuntu:24.04
options: --user root
steps:
- name: Move /var/lib/docker/
shell: bash -euxo pipefail {0}
run: sudo mv /var/lib/docker/ "${GITHUB_WORKSPACE}/docker"

- name: Maximize build space
uses: easimon/maximize-build-space@c28619d8999a147d5e09c1199f84ff6af6ad5794
with:
root-reserve-mb: 5120
temp-reserve-mb: 32
swap-size-mb: 10240
remove-dotnet: 'true'
remove-android: 'true'
remove-haskell: 'true'
remove-codeql: 'true'
build-mount-path: '/var/lib/docker/'

- name: Restore /var/lib/docker/
shell: bash -euxo pipefail {0}
run: sudo sh -c "mv ${GITHUB_WORKSPACE}/docker/* /var/lib/docker"

- name: 'Container dependencies'
run: apt-get update && apt-get install -y git python3 python3-pip ccache
- name: 'Checkout'
uses: actions/checkout@v3
with:
submodules: recursive

- name: Start named container
run: |
docker run -v $(pwd):$(pwd) -w $(pwd) --name builder -d ghcr.io/nvidia/jax:jax sleep infinity
- name: "Disk space check before dependencies"
run: df -lh

- name: 'Dependencies'
run: |
docker exec builder bash -c '\
pip install cmake==3.21.0 pybind11[global] einops onnxscript && \
pip install torch --no-cache-dir --index-url https://download.pytorch.org/whl/cu130
'
export PIP_BREAK_SYSTEM_PACKAGES=1
pip install cmake ninja pybind11 numpy packaging
pip install torch --index-url https://download.pytorch.org/whl/cu130
pip install jax[cuda13]
pip install "nvidia-cuda-nvcc<13.1" "nvidia-cuda-cccl<13.1" "nvidia-cuda-crt<13.1" "nvidia-nvvm<13.1" "nvidia-cuda-profiler-api<13.1" "nvidia-nvml-dev<13.1"
export CUDA_PATH=$(python3 -c "import nvidia; print(list(nvidia.__path__)[0] + '/cu13')")
echo $CUDA_PATH
ln -s $CUDA_PATH/lib/libcudart.so.13 $CUDA_PATH/lib/libcudart.so
ln -s $CUDA_PATH/lib/libcublas.so.13 $CUDA_PATH/lib/libcublas.so
ln -s $CUDA_PATH/../nccl/lib/libnccl.so.2 $CUDA_PATH/../nccl/lib/libnccl.so
ln -s $CUDA_PATH/lib $CUDA_PATH/lib64

- name: "Disk space check after dependencies"
run: df -lh
- uses: actions/cache@v4
with:
path: /root/.ccache
key: ccache-${{ runner.os }}-all-${{ github.ref }}
restore-keys: ccache-${{ runner.os }}-all-
- name: "Disk space check after dependencies and ccache"
run: df -lh
- name: 'Build'
run: docker exec builder bash -c 'pip install --no-cache-dir --no-build-isolation . -v --no-deps'
run: |
export CUDA_PATH=$(python3 -c "import nvidia; print(list(nvidia.__path__)[0] + '/cu13')")
export CUDNN_PATH=$(python3 -c "import nvidia; print(list(nvidia.__path__)[0] + '/cudnn')")
export PATH=$CUDA_PATH/bin:$PATH
export NVTE_BUILD_USE_NVIDIA_WHEELS=1
export PIP_BREAK_SYSTEM_PACKAGES=1
NVTE_USE_CCACHE=1 pip install --no-build-isolation . -v
env:
NVTE_FRAMEWORK: all
MAX_JOBS: 1
- name: 'Sanity check'
run: docker exec builder bash -c 'python3 tests/pytorch/test_sanity_import.py && python3 tests/jax/test_sanity_import.py'
MAX_JOBS: 2
CCACHE_DIR: /root/.ccache
CCACHE_MAXSIZE: 2G
- name: 'Sanity check (pytorch)'
run: python3 tests/pytorch/test_sanity_import.py
- name: 'Sanity check (jax)'
run: python3 tests/jax/test_sanity_import.py
11 changes: 10 additions & 1 deletion build_tools/jax.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@

import setuptools

from .utils import get_cuda_include_dirs, all_files_in_dir, debug_build_enabled
from .utils import (
get_cuda_include_dirs,
all_files_in_dir,
debug_build_enabled,
get_cuda_library_dirs,
)
from typing import List


Expand Down Expand Up @@ -92,6 +97,9 @@ def setup_jax_extension(
]
)

# Library dirs
library_dirs = get_cuda_library_dirs()

# Compile flags
cxx_flags = ["-O3"]
if debug_build_enabled():
Expand All @@ -109,4 +117,5 @@ def setup_jax_extension(
include_dirs=[str(path) for path in include_dirs],
extra_compile_args=cxx_flags,
libraries=["nccl"],
library_dirs=[str(path) for path in library_dirs],
)
34 changes: 34 additions & 0 deletions build_tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,33 @@ def get_cuda_include_dirs() -> Tuple[str, str]:
]


@functools.lru_cache(maxsize=None)
def get_cuda_library_dirs() -> Tuple[str, str]:
"""Returns the CUDA library directory."""

force_wheels = bool(int(os.getenv("NVTE_BUILD_USE_NVIDIA_WHEELS", "0")))
# If cuda is installed via toolkit, all necessary headers
# are bundled inside the top level cuda directory.
if not force_wheels and cuda_toolkit_include_path() is not None:
return []

# Use pip wheels to include all headers.
try:
import nvidia
except ModuleNotFoundError as e:
raise RuntimeError("CUDA not found.")

if nvidia.__file__ is not None:
cuda_root = Path(nvidia.__file__).parent
else:
cuda_root = Path(nvidia.__path__[0]) # namespace
return [
subdir / "lib"
for subdir in cuda_root.iterdir()
if subdir.is_dir() and (subdir / "lib").is_dir()
]


@functools.lru_cache(maxsize=None)
def cuda_archs() -> str:
archs = os.getenv("NVTE_CUDA_ARCHS")
Expand Down Expand Up @@ -292,6 +319,13 @@ def cuda_version() -> Tuple[int, ...]:
version_str = get_version("nvidia-cuda-runtime-cu12")
version_tuple = tuple(int(part) for part in version_str.split(".") if part.isdigit())
return version_tuple
except:
pass

try:
version_str = get_version("nvidia-cuda-runtime")
version_tuple = tuple(int(part) for part in version_str.split(".") if part.isdigit())
return version_tuple
except importlib.metadata.PackageNotFoundError:
raise RuntimeError("Could neither find NVCC executable nor CUDA runtime Python package.")

Expand Down
4 changes: 2 additions & 2 deletions transformer_engine/common/util/logging.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
#include <cudnn.h>
#include <nvrtc.h>

#include "nccl.h"

#ifdef NVTE_WITH_CUBLASMP
#include <cublasmp.h>

#include "nccl.h"
#endif // NVTE_WITH_CUBLASMP

#include <iostream>
Expand Down
1 change: 1 addition & 0 deletions transformer_engine/jax/csrc/extensions/cgemm_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#ifndef TRANSFORMER_ENGINE_JAX_CGEMM_HELPER_H_
#define TRANSFORMER_ENGINE_JAX_CGEMM_HELPER_H_

#include <nccl.h>
#include <unistd.h>

#include <chrono>
Expand Down
Loading