Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,11 @@ def create_args():
"The GPU or switch needs to be reset to make the selected mode active. See --reset-after-ppcie-mode-switch for one way of doing it.")
argp.add_argument("--test-ppcie-mode-switch", action='store_true', default=False,
help="Test switching PPCIE mode.")
argp.add_argument("--query-vgpu-mode", action='store_true', default=False,
help="Query the current vGPU mode of the GPU. Supported on Hopper+ GPUs with Nova Core.")
argp.add_argument("--set-vgpu-mode", choices=["off", "on"],
help="Configure vGPU mode on GPUs with Nova Core. The choices are off (disabled) or on (enabled). "
"A reboot is required to activate the new mode.")
argp.add_argument("--set-bar0-firewall-mode", choices=["off", "on"],
help="Configure BAR0 firewall mode. The choices are off (disabled) or on (enabled).")
argp.add_argument("--query-bar0-firewall-mode", action='store_true', default=False,
Expand Down
23 changes: 23 additions & 0 deletions cli/per_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,29 @@ def main_per_gpu(gpu, opts):
cc_mode = gpu.query_cc_mode()
info(f"{gpu} CC mode is {cc_mode}")

if opts.set_vgpu_mode:
if not gpu.is_gpu() or not gpu.is_hopper_plus:
error(f"Configuring vGPU mode is not supported on {gpu}")
return False

try:
gpu.set_vgpu_mode(opts.set_vgpu_mode)
except GpuError as err:
_, _, tb = sys.exc_info()
traceback.print_tb(tb)
gpu.debug_dump()
raise

info(f"{gpu} vGPU mode set to {opts.set_vgpu_mode}. A reboot is required to activate the new mode.")

if opts.query_vgpu_mode:
if not gpu.is_gpu() or not gpu.is_hopper_plus:
error(f"Querying vGPU mode is not supported on {gpu}")
return False

vgpu_mode = gpu.query_vgpu_mode()
info(f"{gpu} vGPU mode is {vgpu_mode}")

if opts.query_bar0_firewall_mode:
if not gpu.is_bar0_firewall_supported:
error(f"Querying BAR0 firewall mode is not supported on {gpu}")
Expand Down
2 changes: 1 addition & 1 deletion gpu/prc.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class PrcKnob(Enum):

PRC_KNOB_ID_40 = 40

PRC_KNOB_ID_41 = 41
PRC_KNOB_ID_VGPU = 41

PRC_KNOB_ID_42 = 42

Expand Down
31 changes: 31 additions & 0 deletions nvidia_gpu_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3835,6 +3835,37 @@ def set_cc_mode(self, mode):
self.fsp_rpc.prc_knob_check_and_write(PrcKnob.PRC_KNOB_ID_CCD.value, cc_dev_mode)
self.fsp_rpc.prc_knob_check_and_write(PrcKnob.PRC_KNOB_ID_CCM.value, cc_mode)

def query_vgpu_mode(self):
assert self.is_hopper_plus

self._init_fsp_rpc()

try:
knob_value = self.fsp_rpc.prc_knob_read(PrcKnob.PRC_KNOB_ID_VGPU.value)
except FspRpcError as err:
if err.is_invalid_knob_error:
return "unsupported"
raise

if knob_value == 0x1:
return "on"
else:
return "off"

def set_vgpu_mode(self, mode):
assert self.is_hopper_plus

if mode == "on":
vgpu_value = 0x1
elif mode == "off":
vgpu_value = 0x0
else:
raise ValueError(f"Invalid vGPU mode {mode}")

self._init_fsp_rpc()

self.fsp_rpc.prc_knob_check_and_write(PrcKnob.PRC_KNOB_ID_VGPU.value, vgpu_value)

def query_bar0_firewall_mode(self):
assert self.is_bar0_firewall_supported

Expand Down