-
Notifications
You must be signed in to change notification settings - Fork 75
Support Data Center precompiled driver container for Arm (Ubuntu 24.04) #533
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -28,6 +28,7 @@ jobs: | |||||
| kernel_flavors: ${{ steps.extract_driver_branch.outputs.kernel_flavors }} | ||||||
| dist: ${{ steps.extract_driver_branch.outputs.dist }} | ||||||
| lts_kernel: ${{ steps.extract_driver_branch.outputs.lts_kernel }} | ||||||
| platforms: ${{ steps.extract_driver_branch.outputs.platforms }} | ||||||
| steps: | ||||||
| - name: Checkout code | ||||||
| uses: actions/checkout@v6 | ||||||
|
|
@@ -54,6 +55,11 @@ jobs: | |||||
| lts_kernel_json=$(printf '%s\n' "${LTS_KERNEL[@]}" | jq -R . | jq -cs .) | ||||||
| echo "lts_kernel=$lts_kernel_json" >> $GITHUB_OUTPUT | ||||||
|
|
||||||
| # platforms for precompiled build (amd64 always; arm64 for ubuntu24.04) | ||||||
| PLATFORMS=("amd64" "arm64") | ||||||
| platforms_json=$(printf '%s\n' "${PLATFORMS[@]}" | jq -R . | jq -cs .) | ||||||
| echo "platforms=$platforms_json" >> $GITHUB_OUTPUT | ||||||
|
|
||||||
| precompiled-build-image: | ||||||
| needs: set-driver-version-matrix | ||||||
| runs-on: linux-amd64-cpu4 | ||||||
|
|
@@ -63,13 +69,16 @@ jobs: | |||||
| flavor: ${{ fromJson(needs.set-driver-version-matrix.outputs.kernel_flavors) }} | ||||||
| dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }} | ||||||
| lts_kernel: ${{ fromJson(needs.set-driver-version-matrix.outputs.lts_kernel) }} | ||||||
| platform: ${{ fromJson(needs.set-driver-version-matrix.outputs.platforms) }} | ||||||
| exclude: | ||||||
| - dist: ubuntu24.04 | ||||||
| driver_branch: 535 | ||||||
| - lts_kernel: 5.15 | ||||||
| dist: ubuntu24.04 | ||||||
| - flavor: azure-fde | ||||||
| dist: ubuntu22.04 | ||||||
| - dist: ubuntu22.04 | ||||||
| platform: arm64 | ||||||
| steps: | ||||||
| - uses: actions/checkout@v6 | ||||||
| name: Check out code | ||||||
|
|
@@ -83,6 +92,7 @@ jobs: | |||||
| GENERATE_ARTIFACTS="false" | ||||||
| echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV | ||||||
| echo "BUILD_MULTI_ARCH_IMAGES=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV | ||||||
| echo "DOCKER_BUILD_PLATFORM_OPTIONS=--platform=linux/${{ matrix.platform }}" >> $GITHUB_ENV | ||||||
|
|
||||||
| - name: Set up QEMU | ||||||
| uses: docker/setup-qemu-action@v3 | ||||||
|
|
@@ -100,6 +110,7 @@ jobs: | |||||
| env: | ||||||
| IMAGE_NAME: ghcr.io/nvidia/driver | ||||||
| LTS_KERNEL: ${{ matrix.lts_kernel }} | ||||||
| DOCKER_BUILD_PLATFORM_OPTIONS: --platform=linux/${{ matrix.platform }} | ||||||
| run: | | ||||||
| if [[ "${{ matrix.dist }}" == "ubuntu22.04" ]]; then | ||||||
| BASE_TARGET="jammy" | ||||||
|
|
@@ -120,6 +131,7 @@ jobs: | |||||
| IMAGE_NAME: ghcr.io/nvidia/driver | ||||||
| PRECOMPILED: "true" | ||||||
| DIST: signed_${{ matrix.dist }} | ||||||
| DOCKER_BUILD_PLATFORM_OPTIONS: --platform=linux/${{ matrix.platform }} | ||||||
| run: | | ||||||
| source kernel_version.txt && \ | ||||||
| make DRIVER_VERSIONS=${DRIVER_VERSIONS} DRIVER_BRANCH=${{ matrix.driver_branch }} build-${DIST}-${DRIVER_VERSION} | ||||||
|
|
@@ -136,34 +148,40 @@ jobs: | |||||
| elif [[ "${{ matrix.dist }}" == "ubuntu24.04" ]]; then | ||||||
| BASE_TARGET="noble" | ||||||
| fi | ||||||
| tar -cvf kernel-version-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}.tar kernel_version.txt | ||||||
| docker save "${PRIVATE_REGISTRY}/nvidia/driver:base-${BASE_TARGET}-${LTS_KERNEL}-${{ matrix.flavor }}-${{ matrix.driver_branch }}" \ | ||||||
| -o ./base-images-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}.tar | ||||||
| docker save "${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}" \ | ||||||
| -o ./driver-images-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}.tar | ||||||
| # set env for artifacts upload | ||||||
| echo "KERNEL_VERSION=$KERNEL_VERSION" >> $GITHUB_ENV | ||||||
| echo "DIST=$DIST" >> $GITHUB_ENV | ||||||
| if [[ "${{ matrix.platform }}" == "amd64" ]]; then | ||||||
| PLATFORM_SUFFIX="" | ||||||
| else | ||||||
| PLATFORM_SUFFIX="-${{ matrix.platform }}" | ||||||
| fi | ||||||
| BASE_ARTIFACT="base-images-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${{ matrix.dist }}${PLATFORM_SUFFIX}" | ||||||
| DRIVER_ARTIFACT="driver-images-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${{ matrix.dist }}${PLATFORM_SUFFIX}" | ||||||
| KERNEL_VERSION_ARTIFACT="kernel-version-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${{ matrix.dist }}${PLATFORM_SUFFIX}" | ||||||
| echo "BASE_ARTIFACT_NAME=${BASE_ARTIFACT}" >> $GITHUB_ENV | ||||||
| echo "DRIVER_ARTIFACT_NAME=${DRIVER_ARTIFACT}" >> $GITHUB_ENV | ||||||
| echo "KERNEL_VERSION_ARTIFACT_NAME=${KERNEL_VERSION_ARTIFACT}" >> $GITHUB_ENV | ||||||
| tar -cvf "${KERNEL_VERSION_ARTIFACT}.tar" kernel_version.txt | ||||||
| docker save "${PRIVATE_REGISTRY}/nvidia/driver:base-${BASE_TARGET}-${LTS_KERNEL}-${{ matrix.flavor }}-${{ matrix.driver_branch }}" -o "${BASE_ARTIFACT}.tar" | ||||||
| docker save "${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${KERNEL_VERSION}-${{ matrix.dist }}" -o "${DRIVER_ARTIFACT}.tar" | ||||||
|
|
||||||
| - name: Upload base image as an artifact | ||||||
| uses: actions/upload-artifact@v6 | ||||||
| with: | ||||||
| name: base-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }} | ||||||
| path: ./base-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }}.tar | ||||||
| name: ${{ env.BASE_ARTIFACT_NAME }} | ||||||
| path: ./${{ env.BASE_ARTIFACT_NAME }}.tar | ||||||
| retention-days: 1 | ||||||
|
|
||||||
| - name: Upload build image as an artifact | ||||||
| uses: actions/upload-artifact@v6 | ||||||
| with: | ||||||
| name: driver-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }} | ||||||
| path: ./driver-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }}.tar | ||||||
| name: ${{ env.DRIVER_ARTIFACT_NAME }} | ||||||
| path: ./${{ env.DRIVER_ARTIFACT_NAME }}.tar | ||||||
| retention-days: 1 | ||||||
|
|
||||||
| - name: Upload kernel version as an artifact | ||||||
| uses: actions/upload-artifact@v6 | ||||||
| with: | ||||||
| name: kernel-version-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }} | ||||||
| path: ./kernel-version-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }}.tar | ||||||
| name: ${{ env.KERNEL_VERSION_ARTIFACT_NAME }} | ||||||
| path: ./${{ env.KERNEL_VERSION_ARTIFACT_NAME }}.tar | ||||||
| retention-days: 1 | ||||||
|
|
||||||
| determine-e2e-test-matrix: | ||||||
|
|
@@ -172,9 +190,12 @@ jobs: | |||||
| matrix: | ||||||
| dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }} | ||||||
| lts_kernel: ${{ fromJson(needs.set-driver-version-matrix.outputs.lts_kernel) }} | ||||||
| platform: ${{ fromJson(needs.set-driver-version-matrix.outputs.platforms) }} | ||||||
| exclude: | ||||||
| - lts_kernel: 5.15 | ||||||
| dist: ubuntu24.04 | ||||||
| - platform: arm64 | ||||||
| dist: ubuntu22.04 | ||||||
| needs: | ||||||
| - precompiled-build-image | ||||||
| - set-driver-version-matrix | ||||||
|
|
@@ -193,7 +214,7 @@ jobs: | |||||
| with: | ||||||
| pattern: kernel-version-* | ||||||
| path: ./kernel-version-artifacts | ||||||
| merge-multiple: false | ||||||
| merge-multiple: true | ||||||
|
|
||||||
| - name: Set kernel version | ||||||
| env: | ||||||
|
|
@@ -205,29 +226,34 @@ jobs: | |||||
| KERNEL_FLAVORS=($(echo "$kernel_flavors_json" | jq -r '.[]')) | ||||||
| driver_branch_json='${{ needs.set-driver-version-matrix.outputs.driver_branch }}' | ||||||
| DRIVER_BRANCHES=($(echo "$driver_branch_json" | jq -r '.[]')) | ||||||
|
|
||||||
| if [[ "${{ matrix.platform }}" == "arm64" ]]; then | ||||||
| PLATFORM_SUFFIX="-arm64" | ||||||
| else | ||||||
| PLATFORM_SUFFIX="" | ||||||
| fi | ||||||
| echo "PLATFORM_SUFFIX=${PLATFORM_SUFFIX}" >> $GITHUB_ENV | ||||||
| # remove 535 driver branch for ubuntu24.04 | ||||||
| if [ "$DIST" == "ubuntu24.04" ]; then | ||||||
| DRIVER_BRANCHES=($(for branch in "${DRIVER_BRANCHES[@]}"; do | ||||||
| [[ $branch != "535" ]] && echo "$branch" | ||||||
| done)) | ||||||
| fi | ||||||
| source ./tests/scripts/ci-precompiled-helpers.sh | ||||||
| KERNEL_VERSIONS=($(get_kernel_versions_to_test KERNEL_FLAVORS[@] DRIVER_BRANCHES[@] $DIST $LTS_KERNEL)) | ||||||
| KERNEL_VERSIONS=($(get_kernel_versions_to_test KERNEL_FLAVORS[@] DRIVER_BRANCHES[@] $DIST $LTS_KERNEL $PLATFORM_SUFFIX)) | ||||||
| if [ -z "$KERNEL_VERSIONS" ]; then | ||||||
| # no new kernel release | ||||||
| echo "Skipping e2e tests" | ||||||
| exit 0 | ||||||
| fi | ||||||
| # Convert array to JSON format and assign | ||||||
| echo "[]" > ./matrix_values_${{ matrix.dist }}_${{ matrix.lts_kernel }}.json | ||||||
| printf '%s\n' "${KERNEL_VERSIONS[@]}" | jq -R . | jq -s . > ./matrix_values_${{ matrix.dist }}_${{ matrix.lts_kernel }}.json | ||||||
| echo "[]" > ./matrix_values_${{ matrix.dist }}_${{ matrix.lts_kernel }}${PLATFORM_SUFFIX}.json | ||||||
| printf '%s\n' "${KERNEL_VERSIONS[@]}" | jq -R . | jq -s . > ./matrix_values_${{ matrix.dist }}_${{ matrix.lts_kernel }}${PLATFORM_SUFFIX}.json | ||||||
|
|
||||||
| - name: Upload kernel matrix values as artifacts | ||||||
| uses: actions/upload-artifact@v6 | ||||||
| with: | ||||||
| name: matrix-values-${{ matrix.dist }}-${{ matrix.lts_kernel }} | ||||||
| path: ./matrix_values_${{ matrix.dist }}_${{ matrix.lts_kernel }}.json | ||||||
| name: matrix-values-${{ matrix.dist }}-${{ matrix.lts_kernel }}${{ env.PLATFORM_SUFFIX }} | ||||||
| path: ./matrix_values_${{ matrix.dist }}_${{ matrix.lts_kernel }}${{ env.PLATFORM_SUFFIX }}.json | ||||||
| retention-days: 1 | ||||||
|
|
||||||
| collect-e2e-test-matrix: | ||||||
|
|
@@ -256,24 +282,12 @@ jobs: | |||||
| echo "matrix_values_not_empty=0" >> $GITHUB_OUTPUT | ||||||
| kernel_versions=() | ||||||
|
|
||||||
| # Read and merge kernel_version values from dist files | ||||||
| dist_json='${{ needs.set-driver-version-matrix.outputs.dist }}' | ||||||
| DIST=($(echo "$dist_json" | jq -r '.[]')) | ||||||
| lts_kernel_json='${{ needs.set-driver-version-matrix.outputs.lts_kernel }}' | ||||||
| LTS_KERNEL=($(echo "$lts_kernel_json" | jq -r '.[]')) | ||||||
| for dist in "${DIST[@]}"; do | ||||||
| for kernel in "${LTS_KERNEL[@]}"; do | ||||||
| artifact_name="matrix-values-${dist}-${kernel}" | ||||||
| file_path="./matrix-values-artifacts/${artifact_name}/matrix_values_${dist}_${kernel}.json" | ||||||
| if [ -f "$file_path" ]; then | ||||||
| echo "Successfully found artifact: $artifact_name at $file_path" | ||||||
| value=$(jq -r '.[]' "$file_path") | ||||||
| kernel_versions+=($value) | ||||||
| echo "matrix_values_not_empty=1" >> $GITHUB_OUTPUT | ||||||
| else | ||||||
| echo "Artifact not found: $artifact_name" | ||||||
| fi | ||||||
| done | ||||||
| # Read and merge kernel_version values from all platform artifacts (amd64 and arm64) | ||||||
| for f in $(find ./matrix-values-artifacts -name "matrix_values_*.json" -type f 2>/dev/null); do | ||||||
| value=$(jq -r '.[]' "$f") | ||||||
| kernel_versions+=($value) | ||||||
| echo "matrix_values_not_empty=1" >> $GITHUB_OUTPUT | ||||||
| kernel_versions=($(printf '%s\n' "${kernel_versions[@]}" | sort -u)) | ||||||
| done | ||||||
| echo "Collected Kernel Versions: ${kernel_versions[@]}" | ||||||
| combined_values=$(printf '%s\n' "${kernel_versions[@]}" | jq -R . | jq -s -c . | tr -d ' \n') | ||||||
|
|
@@ -334,6 +348,16 @@ jobs: | |||||
| echo "${{ secrets.AWS_SSH_KEY }}" > ${{ github.workspace }}/key.pem && chmod 400 ${{ github.workspace }}/key.pem | ||||||
| echo "PRIVATE_REGISTRY=ghcr.io" >> $GITHUB_ENV | ||||||
| KERNEL_VERSION="${{ matrix.kernel_version }}" | ||||||
| if [[ "${KERNEL_VERSION}" == *-arm64 ]]; then | ||||||
| PLATFORM=arm64 | ||||||
| PLATFORM_SUFFIX="-arm64" | ||||||
| KERNEL_VERSION="${KERNEL_VERSION%-arm64}" | ||||||
| else | ||||||
| PLATFORM=amd64 | ||||||
| PLATFORM_SUFFIX="" | ||||||
| fi | ||||||
| echo "PLATFORM=$PLATFORM" >> $GITHUB_ENV | ||||||
| echo "PLATFORM_SUFFIX=$PLATFORM_SUFFIX" >> $GITHUB_ENV | ||||||
| # Extract the last segment after the last dash | ||||||
| DIST=${KERNEL_VERSION##*-} | ||||||
| echo "DIST=$DIST" >> $GITHUB_ENV | ||||||
|
|
@@ -342,22 +366,29 @@ jobs: | |||||
| driver_branch_json="${{ needs.set-driver-version-matrix.outputs.driver_branch }}" | ||||||
| DRIVER_BRANCHES=($(echo "$driver_branch_json" | jq -r '.[]')) | ||||||
| echo "DRIVER_BRANCHES=${DRIVER_BRANCHES[*]}" >> $GITHUB_ENV | ||||||
| - name: Set kernel version in holodeck_${{ env.DIST }}.yaml | ||||||
| - name: Set kernel version and architecture in holodeck_ubuntu.yaml | ||||||
| run: | | ||||||
| yq eval '.spec += {"kernel": {"version": strenv(KERNEL_VERSION)}}' -i tests/holodeck_${{ env.DIST }}.yaml | ||||||
| yq eval '.spec += {"kernel": {"version": strenv(KERNEL_VERSION)}}' -i tests/holodeck_ubuntu.yaml | ||||||
| if [[ "$DIST" == "ubuntu24.04" ]]; then | ||||||
| yq eval '.spec.instance.os = "ubuntu-24.04"' -i tests/holodeck_ubuntu.yaml | ||||||
| fi | ||||||
| if [[ "$PLATFORM" == "arm64" ]]; then | ||||||
| yq eval '.spec.instance.image.architecture = strenv(PLATFORM)' -i tests/holodeck_ubuntu.yaml | ||||||
| yq eval '.spec.instance.type = "g5g.xlarge"' -i tests/holodeck_ubuntu.yaml | ||||||
| yq eval '.spec.instance.region = "us-west-2"' -i tests/holodeck_ubuntu.yaml | ||||||
| fi | ||||||
|
|
||||||
| - name: Set up Holodeck | ||||||
| uses: NVIDIA/holodeck@v0.2.18 | ||||||
| uses: NVIDIA/holodeck@main | ||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will update it and specify the actual version once @ArangoGutierrez releases the new version of Holodeck.
|
||||||
| uses: NVIDIA/holodeck@main | |
| uses: NVIDIA/holodeck@v0.2.18 |
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
sort -udeduplication command is placed inside the for loop, which means it will be executed on every iteration and will lose accumulated values. The line should be moved outside the loop, after thedonestatement. This will cause incorrect behavior where only the last file's kernel versions are kept after deduplication instead of all accumulated versions.