From c2a15d8f0217412383215c05c2cc3ae36e750443 Mon Sep 17 00:00:00 2001 From: Abigail McCarthy <20771501+a-mccarthy@users.noreply.github.com> Date: Fri, 22 May 2026 11:06:21 -0400 Subject: [PATCH 1/4] Updates for coco 1.1 Signed-off-by: Abigail McCarthy <20771501+a-mccarthy@users.noreply.github.com> --- .../confidential-containers-deploy.rst | 60 +++++++++---------- confidential-containers/release-notes.rst | 22 +++++++ .../supported-platforms.rst | 17 +++--- gpu-operator/deploy-kata-containers.rst | 41 ++++++++----- repo.toml | 5 +- 5 files changed, 89 insertions(+), 56 deletions(-) diff --git a/confidential-containers/confidential-containers-deploy.rst b/confidential-containers/confidential-containers-deploy.rst index e4c415e16..7d1a5db2a 100644 --- a/confidential-containers/confidential-containers-deploy.rst +++ b/confidential-containers/confidential-containers-deploy.rst @@ -142,21 +142,6 @@ Kubernetes Cluster * A Kubernetes cluster with cluster administrator privileges. Refer to the :ref:`Supported Software Components ` table for supported Kubernetes versions. -* containerd version 2.2.2 installed. - Refer to the `containerd Getting Started guide `_ for installation instructions. - - To verify the installed version, run the following command: - - .. code-block:: console - - $ containerd --version - - *Example Output:* - - .. code-block:: output - - containerd containerd.io 2.2.2 ... - * Helm installed. Use the command below to install Helm or refer to the `Helm documentation `_ for installation instructions. @@ -294,13 +279,13 @@ Install the Kata Containers Helm Chart Install Kata Containers using the ``kata-deploy`` Helm chart. The ``kata-deploy`` chart installs all required components from the Kata Containers project including the Kata Containers runtime binary, runtime configuration, UVM kernel, and images that NVIDIA uses for Confidential Containers and native Kata containers. -The minimum required version is 3.29.0. +The minimum required version is ${kata_version}. #. Set the chart version and registry path: .. code-block:: console - $ export VERSION="3.29.0" + $ export VERSION="${kata_version}" $ export CHART="oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy" @@ -311,7 +296,6 @@ The minimum required version is 3.29.0. $ helm install kata-deploy "${CHART}" \ --namespace kata-system --create-namespace \ --set nfd.enabled=false \ - --wait --timeout 10m \ --version "${VERSION}" *Example Output:* @@ -325,33 +309,45 @@ The minimum required version is 3.29.0. DESCRIPTION: Install complete TEST SUITE: None - .. note:: - - The ``--wait`` flag in the install command instructs Helm to wait until the release is deployed before returning. - It can take a 2-3 minutes to return output. - - There is a `known Helm issue `_ on single node clusters, that may result in the Helm command finishing before all deployed pods are finished initializing. - If you are deploying to a single node cluster, you may need to wait for an additional few minutes after the Helm command completes for the ``kata-deploy`` pod to be in the Running state. - .. note:: Both ``kata-deploy`` and the GPU Operator deploy Node Feature Discovery (NFD) by default. The install command includes ``--set nfd.enabled=false`` to prevent ``kata-deploy`` from deploying NFD. The GPU Operator will deploy and manage NFD in the next step. + .. note:: + + The Helm install command returns as soon as the Kubernetes resources are created. + The ``kata-deploy`` DaemonSet then takes several minutes per node to extract artifacts, restart containerd, and label the node before its pods report ready. + You can use either of the optional verification steps below to confirm readiness before continuing. -#. Optional: Verify that the ``kata-deploy`` pod is running: + +#. Optional: Verify that the ``kata-deploy`` DaemonSet has finished rolling out on every node: .. code-block:: console - $ kubectl get pods -n kata-system | grep kata-deploy + $ kubectl -n kata-system rollout status ds/kata-deploy --timeout=20m + + *Example Output:* + + .. code-block:: output + + Waiting for daemon set "kata-deploy" rollout to finish: 0 of 1 updated pods are available... + daemon set "kata-deploy" successfully rolled out + + +#. Optional: Verify that the ``kata-deploy`` pods are running: + + .. code-block:: console + + $ kubectl get pods -n kata-system *Example Output:* .. code-block:: output - NAME READY STATUS RESTARTS AGE - kata-deploy-b2lzs 1/1 Running 0 6m37s + NAME READY STATUS RESTARTS AGE + kata-deploy-b2lzs 1/1 Running 0 6m37s #. Optional: Verify that the ``kata-qemu-nvidia-gpu``, ``kata-qemu-nvidia-gpu-snp``, and ``kata-qemu-nvidia-gpu-tdx`` runtime classes are available: @@ -415,7 +411,7 @@ Install the NVIDIA GPU Operator and configure it to deploy Confidential Containe --set sandboxWorkloads.mode=kata \ --set nfd.enabled=true \ --set nfd.nodefeaturerules=true \ - --version=v26.3.1 + --version=${gpu_operator_version} *Example Output:* @@ -701,7 +697,7 @@ The following example installs the GPU Operator with both ``P_GPU_ALIAS`` and `` --set kataSandboxDevicePlugin.env[0].value="" \ --set kataSandboxDevicePlugin.env[1].name=NVSWITCH_ALIAS \ --set kataSandboxDevicePlugin.env[1].value="" \ - --version=v26.3.1 + --version=${gpu_operator_version} After installing the GPU Operator, you can view the GPU or NVSwitch resource types available on a node by running the following command: diff --git a/confidential-containers/release-notes.rst b/confidential-containers/release-notes.rst index 5f7ddbe4f..9b2c733f9 100644 --- a/confidential-containers/release-notes.rst +++ b/confidential-containers/release-notes.rst @@ -26,6 +26,28 @@ This document describes the new features and known issues for the NVIDIA Confide ---- +.. _coco-v1.1.0: + +1.1.0 +===== + +This release expands hardware coverage and updates the validated software stack. + +New Features +------------ + +* Added support for the NVIDIA HGX B300 platform with both single-GPU and multi-GPU passthrough. + +* Added support for Ubuntu 26.04 as a host operating system. + +* Added support for the following software components: + + * Kata Containers 3.31.0 + * containerd 2.3.x + * NVIDIA GPU Operator v26.3.1 + +---- + .. _coco-v1.0.0: 1.0.0 diff --git a/confidential-containers/supported-platforms.rst b/confidential-containers/supported-platforms.rst index 986d170ef..13672b6b7 100644 --- a/confidential-containers/supported-platforms.rst +++ b/confidential-containers/supported-platforms.rst @@ -52,6 +52,9 @@ NVIDIA GPUs * - NVIDIA B200 - Single-GPU, Multi-GPU + * - NVIDIA HGX B300 + - Single-GPU, Multi-GPU + * - NVIDIA RTX Pro 6000 BSE - Single-GPU @@ -75,10 +78,10 @@ CPU Platforms - Operating System - Kernel Version * - AMD Genoa / Milan - - Ubuntu 25.10 + - Ubuntu 25.10 or 26.04 - 6.17+ * - Intel Emerald Rapids (ER) / Granite Rapids (GR) - - Ubuntu 25.10 + - Ubuntu 25.10 or 26.04 - 6.17+ For additional information on node configuration, refer to the `Confidential Computing Deployment Guide `_ for information about supported NVIDIA GPUs, such as the NVIDIA Hopper H100. @@ -88,7 +91,7 @@ The following topics in the deployment guide apply to a cloud-native environment * Hardware selection and initial hardware configuration, such as BIOS settings. * Host operating system selection, initial configuration, and validation. -When following the cloud-native sections in the deployment guide linked above, use Ubuntu 25.10 as the host OS with its default kernel version and configuration. +When following the cloud-native sections in the deployment guide linked above, use Ubuntu 25.10 or 26.04 as the host OS with its default kernel version and configuration. For additional resources on machine setup: @@ -114,15 +117,15 @@ Supported Software Components * - `QEMU `__ - 10.1 \+ Patches * - `Containerd `__ - - 2.2.2 + - 2.2.x or 2.3.x * - `Kubernetes `__ - 1.32 \+ * - `NVIDIA GPU Operator `__ and its components. - + Refer to the :ref:`GPU Operator Component Matrix ` for the list of components and versions included in each release. - - v26.3.1 and higher + - ${gpu_operator_version} and higher * - `Kata Containers `__ - - 3.29 (installed with ``kata-deploy`` Helm chart) + - ${kata_version} (installed with ``kata-deploy`` Helm chart) * - `Key Broker Service (KBS) protocol `__ - 0.4.0 * - `Kata Lifecycle Manager `__ diff --git a/gpu-operator/deploy-kata-containers.rst b/gpu-operator/deploy-kata-containers.rst index 6cd877c40..289b042d9 100644 --- a/gpu-operator/deploy-kata-containers.rst +++ b/gpu-operator/deploy-kata-containers.rst @@ -280,13 +280,13 @@ Install the Kata Containers Helm Chart Install Kata Containers using the ``kata-deploy`` Helm chart. The ``kata-deploy`` chart installs all required components from the Kata Containers project including the Kata Containers runtime binary, runtime configuration, UVM kernel, and images that NVIDIA uses for Kata Containers. -The minimum required version is 3.29.0. +The minimum required version is ${kata_version}. #. Set the chart version and registry path: .. code-block:: console - $ export VERSION="3.29.0" + $ export VERSION="${kata_version}" $ export CHART="oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy" @@ -297,7 +297,6 @@ The minimum required version is 3.29.0. $ helm install kata-deploy "${CHART}" \ --namespace kata-system --create-namespace \ --set nfd.enabled=false \ - --wait --timeout 10m \ --version "${VERSION}" *Example Output:* @@ -311,33 +310,45 @@ The minimum required version is 3.29.0. DESCRIPTION: Install complete TEST SUITE: None - .. note:: - - The ``--wait`` flag in the install command instructs Helm to wait until the release is deployed before returning. - It can take a few minutes to return output. - - There is a `known Helm issue `_ on single node clusters, that may result in the Helm command finishing before all deployed pods are finished initializing. - If you are deploying to a single node cluster, you may need to wait for an additional few minutes after the Helm command completes for the ``kata-deploy`` pod to be in the Running state. - .. note:: Both ``kata-deploy`` and the GPU Operator deploy Node Feature Discovery (NFD) by default. The install command includes ``--set nfd.enabled=false`` to prevent ``kata-deploy`` from deploying NFD. The GPU Operator will deploy and manage NFD in the next step. + .. note:: -#. Optional: Verify that the ``kata-deploy`` pod is running: + The Helm install command returns as soon as the Kubernetes resources are created. + The ``kata-deploy`` DaemonSet then takes several minutes per node to extract artifacts, restart containerd, and label the node before its pods report ready. + You can use either of the optional verification steps below to confirm readiness before continuing. + + +#. Optional: Verify that the ``kata-deploy`` DaemonSet has finished rolling out on every node: .. code-block:: console - $ kubectl get pods -n kata-system | grep kata-deploy + $ kubectl -n kata-system rollout status ds/kata-deploy --timeout=20m + + *Example Output:* + + .. code-block:: output + + Waiting for daemon set "kata-deploy" rollout to finish: 0 of 1 updated pods are available... + daemon set "kata-deploy" successfully rolled out + + +#. Optional: Verify that the ``kata-deploy`` pods are running: + + .. code-block:: console + + $ kubectl get pods -n kata-system *Example Output:* .. code-block:: output - NAME READY STATUS RESTARTS AGE - kata-deploy-b2lzs 1/1 Running 0 6m37s + NAME READY STATUS RESTARTS AGE + kata-deploy-b2lzs 1/1 Running 0 6m37s #. Optional: Verify that the ``kata-qemu-nvidia-gpu`` runtime class is available: diff --git a/repo.toml b/repo.toml index 385a779ee..392597459 100644 --- a/repo.toml +++ b/repo.toml @@ -172,7 +172,7 @@ docs_root = "${root}/gpu-operator" project = "gpu-operator" name = "NVIDIA GPU Operator" version = "26.3" # Update repo_docs.projects.openshift.version to match latest patch version maj.min.patch -source_substitutions = { minor_version = "26.3", version = "v26.3.1", recommended = "580.126.20", dra_version = "25.12.0" } +source_substitutions = { minor_version = "26.3", version = "v26.3.1", recommended = "580.126.20", dra_version = "25.12.0", kata_version = "3.31.0" } copyright_start = 2020 sphinx_exclude_patterns = [ "life-cycle-policy.rst", @@ -209,7 +209,8 @@ output_format = "linkcheck" docs_root = "${root}/confidential-containers" project = "confidential-containers" name = "NVIDIA Confidential Containers Architecture" -version = "1.0.0" +version = "1.1.0" +source_substitutions = { kata_version = "3.31.0", gpu_operator_version = "v26.3.1", gpu_operator_minor_version = "26.3" } copyright_start = 2020 [repo_docs.projects.confidential-containers.builds.linkcheck] From 46c8027b92769c51626e0d7ea49f16e3ba43b4e6 Mon Sep 17 00:00:00 2001 From: Abigail McCarthy <20771501+a-mccarthy@users.noreply.github.com> Date: Wed, 27 May 2026 11:15:40 -0400 Subject: [PATCH 2/4] Add kata helm values file Signed-off-by: Abigail McCarthy <20771501+a-mccarthy@users.noreply.github.com> --- .../confidential-containers-deploy.rst | 14 +- confidential-containers/release-notes.rst | 1 - .../samples/kata-nvidia-gpu-values.yaml | 125 ++++++++++++++++++ .../supported-platforms.rst | 2 +- 4 files changed, 132 insertions(+), 10 deletions(-) create mode 100644 confidential-containers/samples/kata-nvidia-gpu-values.yaml diff --git a/confidential-containers/confidential-containers-deploy.rst b/confidential-containers/confidential-containers-deploy.rst index 7d1a5db2a..0fdc964a1 100644 --- a/confidential-containers/confidential-containers-deploy.rst +++ b/confidential-containers/confidential-containers-deploy.rst @@ -288,14 +288,18 @@ The minimum required version is ${kata_version}. $ export VERSION="${kata_version}" $ export CHART="oci://ghcr.io/kata-containers/kata-deploy-charts/kata-deploy" +#. Create a values file, such as ``kata-nvidia-gpu-values.yaml``, to configure the ``kata-deploy`` chart for NVIDIA Confidential Containers: -#. Install the kata-deploy Helm chart: + .. literalinclude:: ./samples/kata-nvidia-gpu-values.yaml + :language: yaml + +#. Install the kata-deploy Helm chart with the values file: .. code-block:: console $ helm install kata-deploy "${CHART}" \ --namespace kata-system --create-namespace \ - --set nfd.enabled=false \ + -f kata-nvidia-gpu-values.yaml \ --version "${VERSION}" *Example Output:* @@ -309,12 +313,6 @@ The minimum required version is ${kata_version}. DESCRIPTION: Install complete TEST SUITE: None - .. note:: - - Both ``kata-deploy`` and the GPU Operator deploy Node Feature Discovery (NFD) by default. - The install command includes ``--set nfd.enabled=false`` to prevent ``kata-deploy`` from deploying NFD. - The GPU Operator will deploy and manage NFD in the next step. - .. note:: The Helm install command returns as soon as the Kubernetes resources are created. diff --git a/confidential-containers/release-notes.rst b/confidential-containers/release-notes.rst index 9b2c733f9..7fef3fcb0 100644 --- a/confidential-containers/release-notes.rst +++ b/confidential-containers/release-notes.rst @@ -44,7 +44,6 @@ New Features * Kata Containers 3.31.0 * containerd 2.3.x - * NVIDIA GPU Operator v26.3.1 ---- diff --git a/confidential-containers/samples/kata-nvidia-gpu-values.yaml b/confidential-containers/samples/kata-nvidia-gpu-values.yaml new file mode 100644 index 000000000..662feb513 --- /dev/null +++ b/confidential-containers/samples/kata-nvidia-gpu-values.yaml @@ -0,0 +1,125 @@ +# Example values file to enable NVIDIA GPU shims for the NVIDIA +# Confidential Containers Reference Architecture. + +# Disable verbose debug logging in kata-deploy and the Kata runtime. Change this to true if you want to troubleshoot issues. +debug: false + +# Disable Node Feature Discovery (NFD) deployment by kata-deploy. +# Both kata-deploy and the GPU Operator deploy NFD by default. This +# reference architecture relies on the NFD instance that the GPU Operator +# deploys and manages, so the kata-deploy NFD is turned off to avoid a +# duplicate, conflicting deployment. +nfd: + enabled: false + +# Install the nydus snapshotter on each node alongside containerd. +# The confidential -snp and -tdx shims below use nydus to pull container +# images directly into the confidential VM (guest pull), which keeps image +# contents inside the trusted execution environment (TEE). +snapshotter: + setup: ["nydus"] + +# Disable every shim the kata-deploy chart enables by default, then opt in +# only to the NVIDIA GPU shims. +# The default chart installs a broad set of hypervisor and TEE shims +# (clh, dragonball, fc, qemu, qemu-sev, qemu-snp, qemu-tdx, stratovirt, +# and the -runtime-rs variants) that are not part of this reference +# architecture. +shims: + disableAll: true + + # Non-confidential NVIDIA GPU passthrough shim used when Confidential + # Computing mode is off on the node. The runtime class is restricted to + # nodes where the GPU Operator's Confidential Computing Manager has + # reported nvidia.com/cc.ready.state=false, so it will not schedule on + # CC-ready nodes. The empty containerd snapshotter falls back to the + # default (overlayfs); guest pull is not used for this non-confidential + # path. + qemu-nvidia-gpu: + enabled: true + supportedArches: + - amd64 + allowedHypervisorAnnotations: [] + containerd: + snapshotter: "" + runtimeClass: + # This label is automatically added by the GPU Operator. + nodeSelector: + nvidia.com/cc.ready.state: "false" + + # Note: the upstream kata-deploy chart also distributes -runtime-rs + # variants of the NVIDIA GPU shims (an alternative Rust-based runtime). + # They are not yet supported by this reference architecture and are + # left disabled by the disableAll setting above. + + # Confidential NVIDIA GPU passthrough shim for AMD SEV-SNP nodes. + # The runtime class is pinned to nodes where the GPU Operator has set + # nvidia.com/cc.ready.state=true (CC mode applied) AND where Node + # Feature Discovery has set amd.feature.node.kubernetes.io/snp=true + # (host CPU advertises SEV-SNP). The nydus containerd snapshotter and + # CRI-O guestPull pull container images directly into the confidential + # VM. Set agent.httpsProxy / agent.noProxy if the guest needs to reach + # the registry through a proxy. + qemu-nvidia-gpu-snp: + enabled: true + supportedArches: + - amd64 + allowedHypervisorAnnotations: [] + containerd: + snapshotter: "nydus" + forceGuestPull: false + crio: + guestPull: true + agent: + httpsProxy: "" + noProxy: "" + runtimeClass: + # These labels are automatically added by the GPU Operator and NFD + # respectively. + nodeSelector: + nvidia.com/cc.ready.state: "true" + amd.feature.node.kubernetes.io/snp: "true" + + # Confidential NVIDIA GPU passthrough shim for Intel TDX nodes. + # The runtime class is pinned to nodes where the GPU Operator has set + # nvidia.com/cc.ready.state=true (CC mode applied) AND where Node + # Feature Discovery has set intel.feature.node.kubernetes.io/tdx=true + # (host CPU advertises TDX). Snapshotter, guest pull, and proxy + # behavior match the SNP shim above. + qemu-nvidia-gpu-tdx: + enabled: true + supportedArches: + - amd64 + allowedHypervisorAnnotations: [] + containerd: + snapshotter: "nydus" + forceGuestPull: false + crio: + guestPull: true + agent: + httpsProxy: "" + noProxy: "" + runtimeClass: + # These labels are automatically added by the GPU Operator and NFD + # respectively. + nodeSelector: + nvidia.com/cc.ready.state: "true" + intel.feature.node.kubernetes.io/tdx: "true" + +# Default shim per architecture used by kata-deploy when a pod does not +# request a specific runtime class. Set to the non-confidential NVIDIA +# GPU shim so pods only run inside a confidential VM when they +# explicitly request the kata-qemu-nvidia-gpu-snp or +# kata-qemu-nvidia-gpu-tdx runtime class. +defaultShim: + amd64: qemu-nvidia-gpu # Can be changed to qemu-nvidia-gpu-snp or qemu-nvidia-gpu-tdx if preferred + +# Create one Kubernetes RuntimeClass per enabled shim above +# (kata-qemu-nvidia-gpu, kata-qemu-nvidia-gpu-snp, kata-qemu-nvidia-gpu-tdx). +# createDefault: false suppresses the generic "kata" RuntimeClass since +# you should always reference a specific NVIDIA shim +# by name in pod specs. +runtimeClasses: + enabled: true + createDefault: false + defaultName: "kata" diff --git a/confidential-containers/supported-platforms.rst b/confidential-containers/supported-platforms.rst index 13672b6b7..88c184c82 100644 --- a/confidential-containers/supported-platforms.rst +++ b/confidential-containers/supported-platforms.rst @@ -117,7 +117,7 @@ Supported Software Components * - `QEMU `__ - 10.1 \+ Patches * - `Containerd `__ - - 2.2.x or 2.3.x + - 2.2.2 or 2.3.x * - `Kubernetes `__ - 1.32 \+ * - `NVIDIA GPU Operator `__ and its components. From c8cc586e19c792c4396649fe21a12ac7a9f0f21b Mon Sep 17 00:00:00 2001 From: Abigail McCarthy <20771501+a-mccarthy@users.noreply.github.com> Date: Wed, 27 May 2026 14:22:06 -0400 Subject: [PATCH 3/4] Add docs changelog Signed-off-by: Abigail McCarthy <20771501+a-mccarthy@users.noreply.github.com> --- confidential-containers/release-notes.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/confidential-containers/release-notes.rst b/confidential-containers/release-notes.rst index 7fef3fcb0..072cb0634 100644 --- a/confidential-containers/release-notes.rst +++ b/confidential-containers/release-notes.rst @@ -45,6 +45,19 @@ New Features * Kata Containers 3.31.0 * containerd 2.3.x + +Docs Changelog +-------------- + +The :ref:`coco-install-kata-chart` procedure was updated for this release. +Changes include: + +* Installs ``kata-deploy`` with a values file instead of inline ``--set`` flags. + +* Includes a new sample values file, :file:`samples/kata-nvidia-gpu-values.yaml`, that configures the ``kata-deploy`` Helm chart for the NVIDIA Confidential Containers reference architecture (NVIDIA GPU shims only, NFD disabled, ``nydus`` snapshotter, and per-shim runtime class node selectors). + +* Adds a readiness verification step using ``kubectl rollout status ds/kata-deploy``. This step relies on the readiness reporting added in Kata Containers 3.31.0 and lets you confirm that ``kata-deploy`` has finished extracting artifacts and restarting containerd on every node before continuing. + ---- .. _coco-v1.0.0: From a41ea7eb3830ff845cbfad7442d2d6950ddcbf68 Mon Sep 17 00:00:00 2001 From: Abigail McCarthy <20771501+a-mccarthy@users.noreply.github.com> Date: Wed, 27 May 2026 14:55:34 -0400 Subject: [PATCH 4/4] Review sample comments Signed-off-by: Abigail McCarthy <20771501+a-mccarthy@users.noreply.github.com> --- .../samples/kata-nvidia-gpu-values.yaml | 44 ++++++------------- 1 file changed, 13 insertions(+), 31 deletions(-) diff --git a/confidential-containers/samples/kata-nvidia-gpu-values.yaml b/confidential-containers/samples/kata-nvidia-gpu-values.yaml index 662feb513..cb7ebf608 100644 --- a/confidential-containers/samples/kata-nvidia-gpu-values.yaml +++ b/confidential-containers/samples/kata-nvidia-gpu-values.yaml @@ -1,7 +1,7 @@ # Example values file to enable NVIDIA GPU shims for the NVIDIA # Confidential Containers Reference Architecture. -# Disable verbose debug logging in kata-deploy and the Kata runtime. Change this to true if you want to troubleshoot issues. +# Set to true for verbose kata-deploy and Kata runtime logging. debug: false # Disable Node Feature Discovery (NFD) deployment by kata-deploy. @@ -19,12 +19,8 @@ nfd: snapshotter: setup: ["nydus"] -# Disable every shim the kata-deploy chart enables by default, then opt in -# only to the NVIDIA GPU shims. -# The default chart installs a broad set of hypervisor and TEE shims -# (clh, dragonball, fc, qemu, qemu-sev, qemu-snp, qemu-tdx, stratovirt, -# and the -runtime-rs variants) that are not part of this reference -# architecture. +# Disable the chart's default hypervisor/TEE shims and opt in only to +# the NVIDIA GPU shims supported by this reference architecture. shims: disableAll: true @@ -47,19 +43,10 @@ shims: nodeSelector: nvidia.com/cc.ready.state: "false" - # Note: the upstream kata-deploy chart also distributes -runtime-rs - # variants of the NVIDIA GPU shims (an alternative Rust-based runtime). - # They are not yet supported by this reference architecture and are - # left disabled by the disableAll setting above. - - # Confidential NVIDIA GPU passthrough shim for AMD SEV-SNP nodes. - # The runtime class is pinned to nodes where the GPU Operator has set - # nvidia.com/cc.ready.state=true (CC mode applied) AND where Node - # Feature Discovery has set amd.feature.node.kubernetes.io/snp=true - # (host CPU advertises SEV-SNP). The nydus containerd snapshotter and - # CRI-O guestPull pull container images directly into the confidential - # VM. Set agent.httpsProxy / agent.noProxy if the guest needs to reach - # the registry through a proxy. + # Confidential NVIDIA GPU passthrough for AMD SEV-SNP nodes. + # Scheduled where the GPU Operator reports CC mode is on AND NFD + # reports SEV-SNP support. Set agent.httpsProxy / agent.noProxy if + # the guest needs a proxy to reach the registry. qemu-nvidia-gpu-snp: enabled: true supportedArches: @@ -80,12 +67,9 @@ shims: nvidia.com/cc.ready.state: "true" amd.feature.node.kubernetes.io/snp: "true" - # Confidential NVIDIA GPU passthrough shim for Intel TDX nodes. - # The runtime class is pinned to nodes where the GPU Operator has set - # nvidia.com/cc.ready.state=true (CC mode applied) AND where Node - # Feature Discovery has set intel.feature.node.kubernetes.io/tdx=true - # (host CPU advertises TDX). Snapshotter, guest pull, and proxy - # behavior match the SNP shim above. + # Confidential NVIDIA GPU passthrough for Intel TDX nodes. + # Same selectors and snapshotter behavior as the SNP shim above, + # but pinned to TDX-capable hosts. qemu-nvidia-gpu-tdx: enabled: true supportedArches: @@ -106,11 +90,9 @@ shims: nvidia.com/cc.ready.state: "true" intel.feature.node.kubernetes.io/tdx: "true" -# Default shim per architecture used by kata-deploy when a pod does not -# request a specific runtime class. Set to the non-confidential NVIDIA -# GPU shim so pods only run inside a confidential VM when they -# explicitly request the kata-qemu-nvidia-gpu-snp or -# kata-qemu-nvidia-gpu-tdx runtime class. +# Default shim when a pod does not request a runtime class. Set to the +# non-confidential shim so pods only run in a confidential VM when +# they explicitly request the -snp or -tdx runtime class. defaultShim: amd64: qemu-nvidia-gpu # Can be changed to qemu-nvidia-gpu-snp or qemu-nvidia-gpu-tdx if preferred