From f466b9384ea623e3d4e799897a4cd08d5ece0073 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 15 Mar 2026 17:18:49 +0000 Subject: [PATCH 1/2] Initial plan From 9f6656dba0e2c8a8824629ccb73b0e92b44ad1da Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 15 Mar 2026 17:24:11 +0000 Subject: [PATCH 2/2] Fix AMD APU VIS_VRAM detection issues found in PR review Co-authored-by: Readon <3614708+Readon@users.noreply.github.com> --- gpustack_runtime/detector/amd.py | 37 +++++++------------ .../detector/pyrocmsmi/__init__.py | 8 ++-- 2 files changed, 17 insertions(+), 28 deletions(-) diff --git a/gpustack_runtime/detector/amd.py b/gpustack_runtime/detector/amd.py index bc19590..a044c68 100644 --- a/gpustack_runtime/detector/amd.py +++ b/gpustack_runtime/detector/amd.py @@ -177,19 +177,8 @@ def detect(self) -> Devices | None: dev_mem_status = DeviceMemoryStatusEnum.HEALTHY try: dev_gpu_vram_usage = pyamdsmi.amdsmi_get_gpu_vram_usage(dev) - dev_mem = dev_gpu_vram_usage.get("vram_total") - dev_mem_used = dev_gpu_vram_usage.get("vram_used") - # On APUs with unified memory (e.g., AMD Strix Halo), VRAM - # reports only the BIOS carveout (~512 MiB); VIS_VRAM reports - # the full usable system memory. Use VIS_VRAM when larger. - with contextlib.suppress(pyrocmsmi.ROCMSMIError): - dev_mem_vis_vram = byte_to_mebibyte( - pyrocmsmi.rsmi_dev_memory_total_get( - dev_idx, - pyrocmsmi.RSMI_MEM_TYPE_VIS_VRAM, - ), - ) - dev_mem = max(dev_mem, dev_mem_vis_vram) + dev_mem = dev_gpu_vram_usage.get("vram_total") or 0 + dev_mem_used = dev_gpu_vram_usage.get("vram_used") or 0 dev_ecc_count = pyamdsmi.amdsmi_get_gpu_ecc_count( dev, pyamdsmi.AmdSmiGpuBlock.UMC, @@ -200,17 +189,6 @@ def detect(self) -> Devices | None: dev_mem = byte_to_mebibyte( # byte to MiB pyrocmsmi.rsmi_dev_memory_total_get(dev_idx), ) - # On APUs with unified memory (e.g., AMD Strix Halo), VRAM - # reports only the BIOS carveout (~512 MiB); VIS_VRAM reports - # the full usable system memory. Use VIS_VRAM when larger. - with contextlib.suppress(pyrocmsmi.ROCMSMIError): - dev_mem_vis_vram = byte_to_mebibyte( - pyrocmsmi.rsmi_dev_memory_total_get( - dev_idx, - pyrocmsmi.RSMI_MEM_TYPE_VIS_VRAM, - ), - ) - dev_mem = max(dev_mem, dev_mem_vis_vram) dev_mem_used = byte_to_mebibyte( # byte to MiB pyrocmsmi.rsmi_dev_memory_usage_get(dev_idx), ) @@ -220,6 +198,17 @@ def detect(self) -> Devices | None: ) if dev_ecc_count.uncorrectable_err > 0: dev_mem_status = DeviceMemoryStatusEnum.UNHEALTHY + # On APUs with unified memory (e.g., AMD Strix Halo), VRAM + # reports only the BIOS carveout (~512 MiB); VIS_VRAM reports + # the full usable system memory. Use VIS_VRAM when larger. + with contextlib.suppress(pyrocmsmi.ROCMSMIError): + dev_mem_vis_vram = byte_to_mebibyte( + pyrocmsmi.rsmi_dev_memory_total_get( + dev_idx, + pyrocmsmi.RSMI_MEM_TYPE_VIS_VRAM, + ), + ) + dev_mem = max(dev_mem, dev_mem_vis_vram) dev_power = None dev_power_used = None diff --git a/gpustack_runtime/detector/pyrocmsmi/__init__.py b/gpustack_runtime/detector/pyrocmsmi/__init__.py index 088b33e..3fedd0e 100644 --- a/gpustack_runtime/detector/pyrocmsmi/__init__.py +++ b/gpustack_runtime/detector/pyrocmsmi/__init__.py @@ -64,8 +64,8 @@ ## Memory Types ## RSMI_MEM_TYPE_VRAM = 0 -RSMI_MEM_TYPE_GTT = 1 -RSMI_MEM_TYPE_VIS_VRAM = 2 +RSMI_MEM_TYPE_VIS_VRAM = 1 +RSMI_MEM_TYPE_GTT = 2 ## Error Codes ## ROCMSMI_ERROR_UNINITIALIZED = -99997 @@ -231,7 +231,7 @@ def rsmi_dev_busy_percent_get(device=0): def rsmi_dev_memory_usage_get(device=0, memory_type=None): if memory_type is None: - memory_type = rsmi_memory_type_t.RSMI_MEM_TYPE_VRAM + memory_type = RSMI_MEM_TYPE_VRAM c_used = c_uint64() fn = _rocmsmiGetFunctionPointer("rsmi_dev_memory_usage_get") ret = fn(device, memory_type, byref(c_used)) @@ -241,7 +241,7 @@ def rsmi_dev_memory_usage_get(device=0, memory_type=None): def rsmi_dev_memory_total_get(device=0, memory_type=None): if memory_type is None: - memory_type = rsmi_memory_type_t.RSMI_MEM_TYPE_VRAM + memory_type = RSMI_MEM_TYPE_VRAM c_total = c_uint64() fn = _rocmsmiGetFunctionPointer("rsmi_dev_memory_total_get") ret = fn(device, memory_type, byref(c_total))