diff --git a/.github/workflows/precompiled.yaml b/.github/workflows/precompiled.yaml index 21f25eab9..d9838a818 100644 --- a/.github/workflows/precompiled.yaml +++ b/.github/workflows/precompiled.yaml @@ -28,6 +28,7 @@ jobs: kernel_flavors: ${{ steps.extract_driver_branch.outputs.kernel_flavors }} dist: ${{ steps.extract_driver_branch.outputs.dist }} lts_kernel: ${{ steps.extract_driver_branch.outputs.lts_kernel }} + platforms: ${{ steps.extract_driver_branch.outputs.platforms }} steps: - name: Checkout code uses: actions/checkout@v6 @@ -54,6 +55,11 @@ jobs: lts_kernel_json=$(printf '%s\n' "${LTS_KERNEL[@]}" | jq -R . | jq -cs .) echo "lts_kernel=$lts_kernel_json" >> $GITHUB_OUTPUT + # platforms for precompiled build (amd64 always; arm64 for ubuntu24.04) + PLATFORMS=("amd64" "arm64") + platforms_json=$(printf '%s\n' "${PLATFORMS[@]}" | jq -R . | jq -cs .) + echo "platforms=$platforms_json" >> $GITHUB_OUTPUT + precompiled-build-image: needs: set-driver-version-matrix runs-on: linux-amd64-cpu4 @@ -63,6 +69,7 @@ jobs: flavor: ${{ fromJson(needs.set-driver-version-matrix.outputs.kernel_flavors) }} dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }} lts_kernel: ${{ fromJson(needs.set-driver-version-matrix.outputs.lts_kernel) }} + platform: ${{ fromJson(needs.set-driver-version-matrix.outputs.platforms) }} exclude: - dist: ubuntu24.04 driver_branch: 535 @@ -70,6 +77,8 @@ jobs: dist: ubuntu24.04 - flavor: azure-fde dist: ubuntu22.04 + - dist: ubuntu22.04 + platform: arm64 steps: - uses: actions/checkout@v6 name: Check out code @@ -83,6 +92,7 @@ jobs: GENERATE_ARTIFACTS="false" echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV echo "BUILD_MULTI_ARCH_IMAGES=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV + echo "DOCKER_BUILD_PLATFORM_OPTIONS=--platform=linux/${{ matrix.platform }}" >> $GITHUB_ENV - name: Set up QEMU uses: docker/setup-qemu-action@v3 @@ -100,6 +110,7 @@ jobs: env: IMAGE_NAME: ghcr.io/nvidia/driver LTS_KERNEL: ${{ matrix.lts_kernel }} + DOCKER_BUILD_PLATFORM_OPTIONS: --platform=linux/${{ matrix.platform }} run: | if [[ "${{ matrix.dist }}" == "ubuntu22.04" ]]; then BASE_TARGET="jammy" @@ -120,6 +131,7 @@ jobs: IMAGE_NAME: ghcr.io/nvidia/driver PRECOMPILED: "true" DIST: signed_${{ matrix.dist }} + DOCKER_BUILD_PLATFORM_OPTIONS: --platform=linux/${{ matrix.platform }} run: | source kernel_version.txt && \ make DRIVER_VERSIONS=${DRIVER_VERSIONS} DRIVER_BRANCH=${{ matrix.driver_branch }} build-${DIST}-${DRIVER_VERSION} @@ -136,34 +148,40 @@ jobs: elif [[ "${{ matrix.dist }}" == "ubuntu24.04" ]]; then BASE_TARGET="noble" fi - tar -cvf kernel-version-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}.tar kernel_version.txt - docker save "${PRIVATE_REGISTRY}/nvidia/driver:base-${BASE_TARGET}-${LTS_KERNEL}-${{ matrix.flavor }}-${{ matrix.driver_branch }}" \ - -o ./base-images-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}.tar - docker save "${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}" \ - -o ./driver-images-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}.tar - # set env for artifacts upload - echo "KERNEL_VERSION=$KERNEL_VERSION" >> $GITHUB_ENV - echo "DIST=$DIST" >> $GITHUB_ENV + if [[ "${{ matrix.platform }}" == "amd64" ]]; then + PLATFORM_SUFFIX="" + else + PLATFORM_SUFFIX="-${{ matrix.platform }}" + fi + BASE_ARTIFACT="base-images-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${{ matrix.dist }}${PLATFORM_SUFFIX}" + DRIVER_ARTIFACT="driver-images-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${{ matrix.dist }}${PLATFORM_SUFFIX}" + KERNEL_VERSION_ARTIFACT="kernel-version-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${{ matrix.dist }}${PLATFORM_SUFFIX}" + echo "BASE_ARTIFACT_NAME=${BASE_ARTIFACT}" >> $GITHUB_ENV + echo "DRIVER_ARTIFACT_NAME=${DRIVER_ARTIFACT}" >> $GITHUB_ENV + echo "KERNEL_VERSION_ARTIFACT_NAME=${KERNEL_VERSION_ARTIFACT}" >> $GITHUB_ENV + tar -cvf "${KERNEL_VERSION_ARTIFACT}.tar" kernel_version.txt + docker save "${PRIVATE_REGISTRY}/nvidia/driver:base-${BASE_TARGET}-${LTS_KERNEL}-${{ matrix.flavor }}-${{ matrix.driver_branch }}" -o "${BASE_ARTIFACT}.tar" + docker save "${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${KERNEL_VERSION}-${{ matrix.dist }}" -o "${DRIVER_ARTIFACT}.tar" - name: Upload base image as an artifact uses: actions/upload-artifact@v6 with: - name: base-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }} - path: ./base-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }}.tar + name: ${{ env.BASE_ARTIFACT_NAME }} + path: ./${{ env.BASE_ARTIFACT_NAME }}.tar retention-days: 1 - name: Upload build image as an artifact uses: actions/upload-artifact@v6 with: - name: driver-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }} - path: ./driver-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }}.tar + name: ${{ env.DRIVER_ARTIFACT_NAME }} + path: ./${{ env.DRIVER_ARTIFACT_NAME }}.tar retention-days: 1 - name: Upload kernel version as an artifact uses: actions/upload-artifact@v6 with: - name: kernel-version-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }} - path: ./kernel-version-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }}.tar + name: ${{ env.KERNEL_VERSION_ARTIFACT_NAME }} + path: ./${{ env.KERNEL_VERSION_ARTIFACT_NAME }}.tar retention-days: 1 determine-e2e-test-matrix: @@ -172,9 +190,12 @@ jobs: matrix: dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }} lts_kernel: ${{ fromJson(needs.set-driver-version-matrix.outputs.lts_kernel) }} + platform: ${{ fromJson(needs.set-driver-version-matrix.outputs.platforms) }} exclude: - lts_kernel: 5.15 dist: ubuntu24.04 + - platform: arm64 + dist: ubuntu22.04 needs: - precompiled-build-image - set-driver-version-matrix @@ -193,7 +214,7 @@ jobs: with: pattern: kernel-version-* path: ./kernel-version-artifacts - merge-multiple: false + merge-multiple: true - name: Set kernel version env: @@ -205,7 +226,12 @@ jobs: KERNEL_FLAVORS=($(echo "$kernel_flavors_json" | jq -r '.[]')) driver_branch_json='${{ needs.set-driver-version-matrix.outputs.driver_branch }}' DRIVER_BRANCHES=($(echo "$driver_branch_json" | jq -r '.[]')) - + if [[ "${{ matrix.platform }}" == "arm64" ]]; then + PLATFORM_SUFFIX="-arm64" + else + PLATFORM_SUFFIX="" + fi + echo "PLATFORM_SUFFIX=${PLATFORM_SUFFIX}" >> $GITHUB_ENV # remove 535 driver branch for ubuntu24.04 if [ "$DIST" == "ubuntu24.04" ]; then DRIVER_BRANCHES=($(for branch in "${DRIVER_BRANCHES[@]}"; do @@ -213,21 +239,21 @@ jobs: done)) fi source ./tests/scripts/ci-precompiled-helpers.sh - KERNEL_VERSIONS=($(get_kernel_versions_to_test KERNEL_FLAVORS[@] DRIVER_BRANCHES[@] $DIST $LTS_KERNEL)) + KERNEL_VERSIONS=($(get_kernel_versions_to_test KERNEL_FLAVORS[@] DRIVER_BRANCHES[@] $DIST $LTS_KERNEL $PLATFORM_SUFFIX)) if [ -z "$KERNEL_VERSIONS" ]; then # no new kernel release echo "Skipping e2e tests" exit 0 fi # Convert array to JSON format and assign - echo "[]" > ./matrix_values_${{ matrix.dist }}_${{ matrix.lts_kernel }}.json - printf '%s\n' "${KERNEL_VERSIONS[@]}" | jq -R . | jq -s . > ./matrix_values_${{ matrix.dist }}_${{ matrix.lts_kernel }}.json + echo "[]" > ./matrix_values_${{ matrix.dist }}_${{ matrix.lts_kernel }}${PLATFORM_SUFFIX}.json + printf '%s\n' "${KERNEL_VERSIONS[@]}" | jq -R . | jq -s . > ./matrix_values_${{ matrix.dist }}_${{ matrix.lts_kernel }}${PLATFORM_SUFFIX}.json - name: Upload kernel matrix values as artifacts uses: actions/upload-artifact@v6 with: - name: matrix-values-${{ matrix.dist }}-${{ matrix.lts_kernel }} - path: ./matrix_values_${{ matrix.dist }}_${{ matrix.lts_kernel }}.json + name: matrix-values-${{ matrix.dist }}-${{ matrix.lts_kernel }}${{ env.PLATFORM_SUFFIX }} + path: ./matrix_values_${{ matrix.dist }}_${{ matrix.lts_kernel }}${{ env.PLATFORM_SUFFIX }}.json retention-days: 1 collect-e2e-test-matrix: @@ -256,25 +282,13 @@ jobs: echo "matrix_values_not_empty=0" >> $GITHUB_OUTPUT kernel_versions=() - # Read and merge kernel_version values from dist files - dist_json='${{ needs.set-driver-version-matrix.outputs.dist }}' - DIST=($(echo "$dist_json" | jq -r '.[]')) - lts_kernel_json='${{ needs.set-driver-version-matrix.outputs.lts_kernel }}' - LTS_KERNEL=($(echo "$lts_kernel_json" | jq -r '.[]')) - for dist in "${DIST[@]}"; do - for kernel in "${LTS_KERNEL[@]}"; do - artifact_name="matrix-values-${dist}-${kernel}" - file_path="./matrix-values-artifacts/${artifact_name}/matrix_values_${dist}_${kernel}.json" - if [ -f "$file_path" ]; then - echo "Successfully found artifact: $artifact_name at $file_path" - value=$(jq -r '.[]' "$file_path") - kernel_versions+=($value) - echo "matrix_values_not_empty=1" >> $GITHUB_OUTPUT - else - echo "Artifact not found: $artifact_name" - fi - done + # Read and merge kernel_version values from all platform artifacts (amd64 and arm64) + for f in $(find ./matrix-values-artifacts -name "matrix_values_*.json" -type f 2>/dev/null); do + value=$(jq -r '.[]' "$f") + kernel_versions+=($value) + echo "matrix_values_not_empty=1" >> $GITHUB_OUTPUT done + kernel_versions=($(printf '%s\n' "${kernel_versions[@]}" | sort -u)) echo "Collected Kernel Versions: ${kernel_versions[@]}" combined_values=$(printf '%s\n' "${kernel_versions[@]}" | jq -R . | jq -s -c . | tr -d ' \n') echo "Combined Kernel Versions JSON: $combined_values" @@ -334,6 +348,16 @@ jobs: echo "${{ secrets.AWS_SSH_KEY }}" > ${{ github.workspace }}/key.pem && chmod 400 ${{ github.workspace }}/key.pem echo "PRIVATE_REGISTRY=ghcr.io" >> $GITHUB_ENV KERNEL_VERSION="${{ matrix.kernel_version }}" + if [[ "${KERNEL_VERSION}" == *-arm64 ]]; then + PLATFORM=arm64 + PLATFORM_SUFFIX="-arm64" + KERNEL_VERSION="${KERNEL_VERSION%-arm64}" + else + PLATFORM=amd64 + PLATFORM_SUFFIX="" + fi + echo "PLATFORM=$PLATFORM" >> $GITHUB_ENV + echo "PLATFORM_SUFFIX=$PLATFORM_SUFFIX" >> $GITHUB_ENV # Extract the last segment after the last dash DIST=${KERNEL_VERSION##*-} echo "DIST=$DIST" >> $GITHUB_ENV @@ -342,22 +366,29 @@ jobs: driver_branch_json="${{ needs.set-driver-version-matrix.outputs.driver_branch }}" DRIVER_BRANCHES=($(echo "$driver_branch_json" | jq -r '.[]')) echo "DRIVER_BRANCHES=${DRIVER_BRANCHES[*]}" >> $GITHUB_ENV - - name: Set kernel version in holodeck_${{ env.DIST }}.yaml + - name: Set kernel version and architecture in holodeck_ubuntu.yaml run: | - yq eval '.spec += {"kernel": {"version": strenv(KERNEL_VERSION)}}' -i tests/holodeck_${{ env.DIST }}.yaml + yq eval '.spec += {"kernel": {"version": strenv(KERNEL_VERSION)}}' -i tests/holodeck_ubuntu.yaml + if [[ "$DIST" == "ubuntu24.04" ]]; then + yq eval '.spec.instance.os = "ubuntu-24.04"' -i tests/holodeck_ubuntu.yaml + fi + if [[ "$PLATFORM" == "arm64" ]]; then + yq eval '.spec.instance.image.architecture = strenv(PLATFORM)' -i tests/holodeck_ubuntu.yaml + yq eval '.spec.instance.type = "g5g.xlarge"' -i tests/holodeck_ubuntu.yaml + yq eval '.spec.instance.region = "us-west-2"' -i tests/holodeck_ubuntu.yaml + fi - name: Set up Holodeck - uses: NVIDIA/holodeck@v0.2.18 + uses: NVIDIA/holodeck@main env: AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SSH_KEY: ${{ secrets.AWS_SSH_KEY }} - DIST: ${{ env.DIST }} with: aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_ssh_key: ${{ secrets.AWS_SSH_KEY }} - holodeck_config: "tests/holodeck_${{ env.DIST }}.yaml" + holodeck_config: "tests/holodeck_ubuntu.yaml" - name: Get public dns name id: get_public_dns_name uses: mikefarah/yq@master @@ -394,7 +425,7 @@ jobs: TEST_CASE_ARGS="${GPU_OPERATOR_OPTIONS} --set driver.version=${DRIVER_VERSION}" # add escape character for space TEST_CASE_ARGS=$(printf '%q ' "$TEST_CASE_ARGS") - IMAGE_PATH="./tests/driver-images-${DRIVER_VERSION}-${KERNEL_VERSION}-${DIST}.tar" + IMAGE_PATH="./tests/driver-images-${DRIVER_VERSION}-${KERNEL_VERSION}-${DIST}${{ env.PLATFORM_SUFFIX }}.tar" ./tests/ci-run-e2e.sh "${TEST_CASE}" "${TEST_CASE_ARGS}" ${IMAGE_PATH} || status=$? if [ $status -eq 1 ]; then echo "e2e validation failed for driver version $DRIVER_VERSION with status $status" @@ -436,6 +467,11 @@ jobs: - name: Set image vars run: | echo "PRIVATE_REGISTRY=ghcr.io" >> $GITHUB_ENV + KERNEL_VERSION="${{ matrix.kernel_version }}" + if [[ "${KERNEL_VERSION}" == *-arm64 ]]; then + KERNEL_VERSION="${KERNEL_VERSION%-arm64}" + fi + echo "KERNEL_VERSION=$KERNEL_VERSION" >> $GITHUB_ENV - name: Download base image artifact if: ${{ ! (matrix.driver_branch == 535 && contains(matrix.kernel_version, 'ubuntu24.04')) }} @@ -447,9 +483,9 @@ jobs: - name: Publish base image if: ${{ ! (matrix.driver_branch == 535 && contains(matrix.kernel_version, 'ubuntu24.04')) }} run: | - LTS_KERNEL=$(echo "${{ matrix.kernel_version }}" | sed -E 's/^([0-9]+\.[0-9]+)\..*/\1/') - KERNEL_FLAVOR=$(echo "${{ matrix.kernel_version }}" | sed -E 's/^[0-9]+\.[0-9]+\.[0-9]+-[0-9]+-(.*)-ubuntu[0-9]+\.[0-9]+$/\1/') - DIST=$(echo "${{ matrix.kernel_version }}" | sed -E 's/^.*-(ubuntu[0-9]+\.[0-9]+)$/\1/') + LTS_KERNEL=$(echo "${{ env.KERNEL_VERSION }}" | sed -E 's/^([0-9]+\.[0-9]+)\..*/\1/') + KERNEL_FLAVOR=$(echo "${{ env.KERNEL_VERSION }}" | sed -E 's/^[0-9]+\.[0-9]+\.[0-9]+-[0-9]+-(.*)-ubuntu[0-9]+\.[0-9]+$/\1/') + DIST=$(echo "${{ env.KERNEL_VERSION }}" | sed -E 's/^.*-(ubuntu[0-9]+\.[0-9]+)$/\1/') if [[ "${DIST}" == "ubuntu22.04" ]]; then BASE_TARGET="jammy" elif [[ "${DIST}" == "ubuntu24.04" ]]; then diff --git a/Makefile b/Makefile index 5572cf13b..ff097d4f4 100644 --- a/Makefile +++ b/Makefile @@ -215,6 +215,7 @@ $(BASE_BUILD_TARGETS): DOCKER_BUILDKIT=1 \ $(DOCKER) $(BUILDX) build --pull --no-cache \ $(DOCKER_BUILD_OPTIONS) \ + $(DOCKER_BUILD_PLATFORM_OPTIONS) \ --tag $(IMAGE) \ --target $(TARGET) \ --build-arg CUDA_VERSION="$(CUDA_VERSION)" \ diff --git a/base/generate-ci-config b/base/generate-ci-config index 94ba87943..490130235 100755 --- a/base/generate-ci-config +++ b/base/generate-ci-config @@ -16,7 +16,12 @@ export DEBIAN_FRONTEND=noninteractive apt-get update -y -qq > /dev/null # Generate a list of all kernel versions which have NVIDIA precompiled driver packages available. -SUPPORTED_KERNELS_LINUX_SIGNATURES_LIST=$(apt-cache search "linux-signatures-nvidia.*${LTS_KERNEL}.*${KERNEL_FLAVOR}" | awk '{print $1}' | sed -e "s/^.*${LTS_KERNEL}/${LTS_KERNEL}/" | sort -n -t'-' -k2| grep "${KERNEL_FLAVOR}$") +# linux-signatures-nvidia (secure boot signatures) is not available for arm64 +if [ "$(uname -m)" = "aarch64" ]; then + SUPPORTED_KERNELS_LINUX_LIST=$(apt-cache search "linux-objects-nvidia-${DRIVER_BRANCH}-server.*${LTS_KERNEL}.*${KERNEL_FLAVOR}" | awk '{print $1}' | grep -v "open" | sed -e "s/^.*${LTS_KERNEL}/${LTS_KERNEL}/" | sort -n -t'-' -k2) +else + SUPPORTED_KERNELS_LINUX_LIST=$(apt-cache search "linux-signatures-nvidia.*${LTS_KERNEL}.*${KERNEL_FLAVOR}" | awk '{print $1}' | sed -e "s/^.*${LTS_KERNEL}/${LTS_KERNEL}/" | sort -n -t'-' -k2| grep "${KERNEL_FLAVOR}$") +fi # Generate a list of all kernel versions which have linux-image packages available. SUPPORTED_KERNELS_LINUX_IMAGE_LIST=$(apt-cache search linux-image-${LTS_KERNEL}.*-${KERNEL_FLAVOR} | awk '{print $1}' | sed -e "s/^.*${LTS_KERNEL}/${LTS_KERNEL}/" | sort -n -t'-' -k2 | grep "${KERNEL_FLAVOR}$") @@ -26,7 +31,7 @@ DRIVER_VERSION=$(apt-cache show nvidia-utils-${DRIVER_BRANCH}-server |grep Versi # Get the latest kernel from linux-signatures-list and linux-images-list # As list is already sorted , compare the kernel version and find exact match # get the latest kernel version with tail -SK=$(grep -Fxf <(echo "$SUPPORTED_KERNELS_LINUX_SIGNATURES_LIST") <(echo "$SUPPORTED_KERNELS_LINUX_IMAGE_LIST") | tail -n1) +SK=$(grep -Fxf <(echo "$SUPPORTED_KERNELS_LINUX_LIST") <(echo "$SUPPORTED_KERNELS_LINUX_IMAGE_LIST") | tail -n1) # Write to file echo "export KERNEL_VERSION=$SK DRIVER_VERSION=$DRIVER_VERSION DRIVER_VERSIONS=$DRIVER_VERSION" > /var/kernel_version.txt diff --git a/multi-arch.mk b/multi-arch.mk index 263fa88fb..a9cf25035 100644 --- a/multi-arch.mk +++ b/multi-arch.mk @@ -27,5 +27,4 @@ $(DRIVER_PUSH_TARGETS): push-%: build-ubuntu18.04%: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64 build-signed_ubuntu20.04%: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64 build-signed_ubuntu22.04%: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64 -build-signed_ubuntu24.04%: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64 build-sles%: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64 diff --git a/tests/holodeck_ubuntu22.04.yaml b/tests/holodeck_ubuntu.yaml similarity index 64% rename from tests/holodeck_ubuntu22.04.yaml rename to tests/holodeck_ubuntu.yaml index 0e870a7da..d4244e718 100644 --- a/tests/holodeck_ubuntu22.04.yaml +++ b/tests/holodeck_ubuntu.yaml @@ -11,19 +11,9 @@ spec: instance: type: g4dn.xlarge region: us-west-1 - ingressIpRanges: - - 18.190.12.32/32 - - 3.143.46.93/32 - - 52.15.119.136/32 - - 35.155.108.162/32 - - 35.162.190.51/32 - - 54.201.61.24/32 - - 52.24.205.48/32 - - 44.235.4.62/32 - - 44.230.241.223/32 + os: ubuntu-22.04 image: architecture: amd64 - imageId: ami-0007a86be89339c9f containerRuntime: install: true name: containerd diff --git a/tests/holodeck_ubuntu24.04.yaml b/tests/holodeck_ubuntu24.04.yaml deleted file mode 100644 index 7e22a2361..000000000 --- a/tests/holodeck_ubuntu24.04.yaml +++ /dev/null @@ -1,34 +0,0 @@ -apiVersion: holodeck.nvidia.com/v1alpha1 -kind: Environment -metadata: - name: HOLODECK_NAME - description: "end-to-end test infrastructure" -spec: - provider: aws - auth: - keyName: cnt-ci - privateKey: HOLODECK_PRIVATE_KEY - instance: - type: g4dn.xlarge - region: us-west-1 - ingressIpRanges: - - 18.190.12.32/32 - - 3.143.46.93/32 - - 52.15.119.136/32 - - 35.155.108.162/32 - - 35.162.190.51/32 - - 54.201.61.24/32 - - 52.24.205.48/32 - - 44.235.4.62/32 - - 44.230.241.223/32 - image: - architecture: amd64 - imageId: ami-00271c85bf8a52b84 - containerRuntime: - install: true - name: containerd - kubernetes: - install: true - installer: kubeadm - version: v1.33.0 - crictlVersion: v1.33.0 diff --git a/tests/scripts/ci-precompiled-helpers.sh b/tests/scripts/ci-precompiled-helpers.sh index 3050049db..bce9b55f9 100644 --- a/tests/scripts/ci-precompiled-helpers.sh +++ b/tests/scripts/ci-precompiled-helpers.sh @@ -1,6 +1,6 @@ get_kernel_versions_to_test() { - if [[ "$#" -ne 4 ]]; then - echo " Error:$0 must be called with KERNEL_FLAVORS DRIVER_BRANCHES DIST LTS_KERNEL" >&2 + if [[ "$#" -lt 4 || "$#" -gt 5 ]]; then + echo " Error:$0 must be called with KERNEL_FLAVORS DRIVER_BRANCHES DIST LTS_KERNEL or KERNEL_FLAVORS DRIVER_BRANCHES DIST LTS_KERNEL PLATFORM_SUFFIX" >&2 exit 1 fi @@ -8,11 +8,11 @@ get_kernel_versions_to_test() { local -a DRIVER_BRANCHES=("${!2}") local DIST="$3" local LTS_KERNEL="$4" - + local PLATFORM_SUFFIX="$5" kernel_versions=() for kernel_flavor in "${KERNEL_FLAVORS[@]}"; do for DRIVER_BRANCH in "${DRIVER_BRANCHES[@]}"; do - source ./tests/scripts/findkernelversion.sh "${kernel_flavor}" "$DRIVER_BRANCH" "$DIST" "$LTS_KERNEL" >&2 + source ./tests/scripts/findkernelversion.sh "${kernel_flavor}" "$DRIVER_BRANCH" "$DIST" "$LTS_KERNEL" "$PLATFORM_SUFFIX" >&2 if [[ "$should_continue" == true ]]; then break fi @@ -25,7 +25,7 @@ get_kernel_versions_to_test() { # Remove duplicates kernel_versions=($(printf "%s\n" "${kernel_versions[@]}" | sort -u)) for i in "${!kernel_versions[@]}"; do - kernel_versions[$i]="${kernel_versions[$i]}-$DIST" + kernel_versions[$i]="${kernel_versions[$i]}-$DIST$PLATFORM_SUFFIX" done echo "${kernel_versions[@]}" } diff --git a/tests/scripts/findkernelversion.sh b/tests/scripts/findkernelversion.sh index 9731a39c7..ec8f417c7 100755 --- a/tests/scripts/findkernelversion.sh +++ b/tests/scripts/findkernelversion.sh @@ -1,7 +1,7 @@ #!/bin/bash -if [[ $# -ne 4 ]]; then - echo " KERNEL_FLAVOR DRIVER_BRANCH DIST LTS_KERNEL are required" +if [[ $# -lt 4 || $# -gt 5 ]]; then + echo " KERNEL_FLAVOR DRIVER_BRANCH DIST LTS_KERNEL or KERNEL_FLAVOR DRIVER_BRANCH DIST LTS_KERNEL PLATFORM_SUFFIX are required" exit 1 fi @@ -9,6 +9,7 @@ export KERNEL_FLAVOR="${1}" export DRIVER_BRANCH="${2}" export DIST="${3}" export LTS_KERNEL="${4}" +export PLATFORM_SUFFIX="${5}" export REGCTL_VERSION=v0.7.1 mkdir -p bin @@ -18,24 +19,24 @@ export PATH=$(pwd)/bin:${PATH} # calculate kernel version of latest image prefix="kernel-version-${DRIVER_BRANCH}-${LTS_KERNEL}" -suffix="${kernel_flavor}-${DIST}" +suffix="${KERNEL_FLAVOR}-${DIST}${PLATFORM_SUFFIX}" artifact_dir="./kernel-version-artifacts" -artifact=$(find "$artifact_dir" -maxdepth 1 -type d -name "${prefix}*-${suffix}" | head -1) -if [ -n "$artifact" ]; then - artifact_name=$(basename "$artifact") - if [ -f "$artifact/${artifact_name}.tar" ]; then - tar -xf "$artifact/${artifact_name}.tar" -C ./ - export $(grep -oP 'KERNEL_VERSION=[^ ]+' ./kernel_version.txt) - rm -f kernel_version.txt - fi +artifact_file=$(find "$artifact_dir" -maxdepth 1 -type f -name "${prefix}*-${suffix}.tar" | head -1) +if [ -n "$artifact_file" ]; then + tar -xf "$artifact_file" -C ./ + export $(grep -oE 'KERNEL_VERSION=[^ ]+' ./kernel_version.txt) + rm -f kernel_version.txt fi # calculate driver tag status_nvcr=0 status_ghcr=0 -regctl tag ls nvcr.io/nvidia/driver | grep "^${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST}$" || status_nvcr=$? -regctl tag ls ghcr.io/nvidia/driver | grep "^${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST}$" || status_ghcr=$? +PLATFORM=$(echo "${PLATFORM_SUFFIX}" | sed 's/-//') +[ -z "$PLATFORM" ] && PLATFORM=amd64 +regctl manifest inspect nvcr.io/nvidia/driver:${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST} --platform=linux/${PLATFORM} > /dev/null 2>&1; status_nvcr=$? +regctl manifest inspect ghcr.io/nvidia/driver:${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST} --platform=linux/${PLATFORM} > /dev/null 2>&1; status_ghcr=$? + if [[ $status_nvcr -eq 0 || $status_ghcr -eq 0 ]]; then export should_continue=false else diff --git a/ubuntu24.04/precompiled/Dockerfile b/ubuntu24.04/precompiled/Dockerfile index c36043c7d..7c5d48016 100644 --- a/ubuntu24.04/precompiled/Dockerfile +++ b/ubuntu24.04/precompiled/Dockerfile @@ -17,7 +17,8 @@ ENV NVIDIA_VISIBLE_DEVICES=void RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections -RUN dpkg --add-architecture i386 && \ +# Add i386 architecture only for amd64 builds (not available on ARM) +RUN if [ "$TARGETARCH" = "amd64" ]; then dpkg --add-architecture i386; fi && \ apt-get update && apt-get install -y --no-install-recommends \ apt-utils \ build-essential \ @@ -31,11 +32,12 @@ RUN dpkg --add-architecture i386 && \ pkg-config && \ rm -rf /var/lib/apt/lists/* -# Fetch GPG keys for CUDA repo -RUN apt-key del 3bf863cc && \ +# Fetch GPG keys for CUDA repo (architecture-specific) +RUN CUDA_ARCH=$([ "$TARGETARCH" = "arm64" ] && echo "sbsa" || echo "x86_64") && \ + apt-key del 3bf863cc && \ rm /etc/apt/sources.list.d/cuda.list && \ - curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/3bf863cc.pub | gpg --dearmor -o /etc/apt/keyrings/cuda.pub && \ - echo "deb [signed-by=/etc/apt/keyrings/cuda.pub] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64 /" > /etc/apt/sources.list.d/cuda.list + curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/${CUDA_ARCH}/3bf863cc.pub | gpg --dearmor -o /etc/apt/keyrings/cuda.pub && \ + echo "deb [signed-by=/etc/apt/keyrings/cuda.pub] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/${CUDA_ARCH} /" > /etc/apt/sources.list.d/cuda.list RUN usermod -o -u 0 -g 0 _apt diff --git a/ubuntu24.04/precompiled/local-repo.sh b/ubuntu24.04/precompiled/local-repo.sh index 1e35de722..1771b0cd6 100755 --- a/ubuntu24.04/precompiled/local-repo.sh +++ b/ubuntu24.04/precompiled/local-repo.sh @@ -68,7 +68,12 @@ download_driver_package_deps () { pushd ${LOCAL_REPO_DIR} download_apt_with_dep linux-objects-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} - download_apt_with_dep linux-signatures-nvidia-${KERNEL_VERSION} + + # linux-signatures-nvidia (secure boot signatures) is not available for arm64 + if [ "$TARGETARCH" = "amd64" ]; then + download_apt_with_dep linux-signatures-nvidia-${KERNEL_VERSION} + fi + download_apt_with_dep linux-modules-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} download_apt_with_dep linux-modules-nvidia-${DRIVER_BRANCH}-server-open-${KERNEL_VERSION} download_apt_with_dep nvidia-utils-${DRIVER_BRANCH}-server @@ -76,7 +81,11 @@ download_driver_package_deps () { download_apt_with_dep libnvidia-decode-${DRIVER_BRANCH}-server download_apt_with_dep libnvidia-extra-${DRIVER_BRANCH}-server download_apt_with_dep libnvidia-encode-${DRIVER_BRANCH}-server - download_apt_with_dep libnvidia-fbc1-${DRIVER_BRANCH}-server + + # libnvidia-fbc1 (FrameBuffer Capture) is not available for arm64 + if [ "$TARGETARCH" = "amd64" ]; then + download_apt_with_dep libnvidia-fbc1-${DRIVER_BRANCH}-server + fi fabricmanager_download nscq_download diff --git a/ubuntu24.04/precompiled/nvidia-driver b/ubuntu24.04/precompiled/nvidia-driver index 824c0a1bc..571802183 100755 --- a/ubuntu24.04/precompiled/nvidia-driver +++ b/ubuntu24.04/precompiled/nvidia-driver @@ -400,21 +400,37 @@ _install_driver() { nvidia-headless-no-dkms-${DRIVER_BRANCH}-server \ libnvidia-decode-${DRIVER_BRANCH}-server \ libnvidia-extra-${DRIVER_BRANCH}-server \ - libnvidia-encode-${DRIVER_BRANCH}-server \ - libnvidia-fbc1-${DRIVER_BRANCH}-server + libnvidia-encode-${DRIVER_BRANCH}-server + + # libnvidia-fbc1 (FrameBuffer Capture) is not available for arm64 + if [ "$TARGETARCH" = "amd64" ]; then + apt-get install -y --no-install-recommends libnvidia-fbc1-${DRIVER_BRANCH}-server + fi # Now install the precompiled kernel module packages signed by Canonical + # linux-signatures-nvidia (secure boot signatures) is not available for arm64 if [ "$KERNEL_TYPE" = "kernel-open" ]; then echo "Installing Open NVIDIA driver kernel modules..." - apt-get install --no-install-recommends -y \ - linux-signatures-nvidia-${KERNEL_VERSION} \ - linux-modules-nvidia-${DRIVER_BRANCH}-server-open-${KERNEL_VERSION} + if [ "$TARGETARCH" = "arm64" ]; then + apt-get install --no-install-recommends -y \ + linux-modules-nvidia-${DRIVER_BRANCH}-server-open-${KERNEL_VERSION} + else + apt-get install --no-install-recommends -y \ + linux-signatures-nvidia-${KERNEL_VERSION} \ + linux-modules-nvidia-${DRIVER_BRANCH}-server-open-${KERNEL_VERSION} + fi else echo "Installing Closed NVIDIA driver kernel modules..." - apt-get install --no-install-recommends -y \ - linux-objects-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} \ - linux-signatures-nvidia-${KERNEL_VERSION} \ - linux-modules-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} + if [ "$TARGETARCH" = "arm64" ]; then + apt-get install --no-install-recommends -y \ + linux-objects-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} \ + linux-modules-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} + else + apt-get install --no-install-recommends -y \ + linux-objects-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} \ + linux-signatures-nvidia-${KERNEL_VERSION} \ + linux-modules-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} + fi fi }