From 3b4211b48d2a8d2af758f89617c31890b3fe7ca4 Mon Sep 17 00:00:00 2001 From: Rubin Du Date: Tue, 31 Mar 2026 10:59:37 -0700 Subject: [PATCH] Add --query-vgpu-mode and --set-vgpu-mode options Add support for querying and configuring vGPU mode via PRC knob 0x29 on Hopper+ GPUs with Nova Core. The vGPU mode is toggled through the FSP RPC interface and requires a reboot to take effect. New CLI options: --query-vgpu-mode Query current vGPU mode (on/off/unsupported) --set-vgpu-mode Set vGPU mode to on or off Signed-off-by: Rubin Du --- cli/main.py | 5 +++++ cli/per_gpu.py | 23 +++++++++++++++++++++++ gpu/prc.py | 2 +- nvidia_gpu_tools.py | 31 +++++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 1 deletion(-) diff --git a/cli/main.py b/cli/main.py index 10c3fd1..2882b97 100644 --- a/cli/main.py +++ b/cli/main.py @@ -137,6 +137,11 @@ def create_args(): "The GPU or switch needs to be reset to make the selected mode active. See --reset-after-ppcie-mode-switch for one way of doing it.") argp.add_argument("--test-ppcie-mode-switch", action='store_true', default=False, help="Test switching PPCIE mode.") + argp.add_argument("--query-vgpu-mode", action='store_true', default=False, + help="Query the current vGPU mode of the GPU. Supported on Hopper+ GPUs with Nova Core.") + argp.add_argument("--set-vgpu-mode", choices=["off", "on"], + help="Configure vGPU mode on GPUs with Nova Core. The choices are off (disabled) or on (enabled). " + "A reboot is required to activate the new mode.") argp.add_argument("--set-bar0-firewall-mode", choices=["off", "on"], help="Configure BAR0 firewall mode. The choices are off (disabled) or on (enabled).") argp.add_argument("--query-bar0-firewall-mode", action='store_true', default=False, diff --git a/cli/per_gpu.py b/cli/per_gpu.py index c275e4f..4bc5cf1 100644 --- a/cli/per_gpu.py +++ b/cli/per_gpu.py @@ -222,6 +222,29 @@ def main_per_gpu(gpu, opts): cc_mode = gpu.query_cc_mode() info(f"{gpu} CC mode is {cc_mode}") + if opts.set_vgpu_mode: + if not gpu.is_gpu() or not gpu.is_hopper_plus: + error(f"Configuring vGPU mode is not supported on {gpu}") + return False + + try: + gpu.set_vgpu_mode(opts.set_vgpu_mode) + except GpuError as err: + _, _, tb = sys.exc_info() + traceback.print_tb(tb) + gpu.debug_dump() + raise + + info(f"{gpu} vGPU mode set to {opts.set_vgpu_mode}. A reboot is required to activate the new mode.") + + if opts.query_vgpu_mode: + if not gpu.is_gpu() or not gpu.is_hopper_plus: + error(f"Querying vGPU mode is not supported on {gpu}") + return False + + vgpu_mode = gpu.query_vgpu_mode() + info(f"{gpu} vGPU mode is {vgpu_mode}") + if opts.query_bar0_firewall_mode: if not gpu.is_bar0_firewall_supported: error(f"Querying BAR0 firewall mode is not supported on {gpu}") diff --git a/gpu/prc.py b/gpu/prc.py index b72406c..df3a608 100644 --- a/gpu/prc.py +++ b/gpu/prc.py @@ -99,7 +99,7 @@ class PrcKnob(Enum): PRC_KNOB_ID_40 = 40 - PRC_KNOB_ID_41 = 41 + PRC_KNOB_ID_VGPU = 41 PRC_KNOB_ID_42 = 42 diff --git a/nvidia_gpu_tools.py b/nvidia_gpu_tools.py index 2446c12..914bed6 100755 --- a/nvidia_gpu_tools.py +++ b/nvidia_gpu_tools.py @@ -3835,6 +3835,37 @@ def set_cc_mode(self, mode): self.fsp_rpc.prc_knob_check_and_write(PrcKnob.PRC_KNOB_ID_CCD.value, cc_dev_mode) self.fsp_rpc.prc_knob_check_and_write(PrcKnob.PRC_KNOB_ID_CCM.value, cc_mode) + def query_vgpu_mode(self): + assert self.is_hopper_plus + + self._init_fsp_rpc() + + try: + knob_value = self.fsp_rpc.prc_knob_read(PrcKnob.PRC_KNOB_ID_VGPU.value) + except FspRpcError as err: + if err.is_invalid_knob_error: + return "unsupported" + raise + + if knob_value == 0x1: + return "on" + else: + return "off" + + def set_vgpu_mode(self, mode): + assert self.is_hopper_plus + + if mode == "on": + vgpu_value = 0x1 + elif mode == "off": + vgpu_value = 0x0 + else: + raise ValueError(f"Invalid vGPU mode {mode}") + + self._init_fsp_rpc() + + self.fsp_rpc.prc_knob_check_and_write(PrcKnob.PRC_KNOB_ID_VGPU.value, vgpu_value) + def query_bar0_firewall_mode(self): assert self.is_bar0_firewall_supported