From e109b8358202be39c91e20cf837567b294d6b756 Mon Sep 17 00:00:00 2001 From: Annie Ren Date: Tue, 16 Jun 2026 23:51:28 -0600 Subject: [PATCH] Add mmq device table for RDNA3.5 --- ggml/src/ggml-cuda/mmq.cuh | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh index edf546d8f1e2..b58ac9e7b428 100644 --- a/ggml/src/ggml-cuda/mmq.cuh +++ b/ggml/src/ggml-cuda/mmq.cuh @@ -140,6 +140,9 @@ static constexpr __device__ int get_mmq_x_max_device() { } static int get_mmq_y_host(const int cc) { + if (GGML_CUDA_CC_IS_RDNA3_5(cc)) { + return 64; + } return GGML_CUDA_CC_IS_AMD(cc) ? (GGML_CUDA_CC_IS_RDNA1(cc) ? 64 : 128) : ((GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_VOLTA) ? 128 : 64); } @@ -155,7 +158,9 @@ if (type == GGML_TYPE_NVFP4 || type == GGML_TYPE_MXFP4) { static constexpr __device__ int get_mmq_y_device() { #if defined(GGML_USE_HIP) -#if defined(RDNA1) +#if defined(RDNA3_5) + return 64; +#elif defined(RDNA1) return 64; #else return 128; @@ -296,6 +301,9 @@ static constexpr __device__ int mmq_get_granularity_device(const int /*mmq_x*/) #if defined(GGML_USE_HIP) static int mmq_get_nwarps_host(const int cc, const int warp_size) { + if (GGML_CUDA_CC_IS_RDNA3_5(cc)) { + return 4; + } return amd_mfma_available(cc) ? 8 : 256/warp_size; } #else @@ -305,7 +313,9 @@ static int mmq_get_nwarps_host(const int /*cc*/, const int warp_size) { #endif // (GGML_USE_HIP) static constexpr __device__ int mmq_get_nwarps_device() { -#if defined(AMD_MFMA_AVAILABLE) || defined(AMD_WMMA_AVAILABLE) +#if defined(RDNA3_5) + return 4; +#elif defined(AMD_MFMA_AVAILABLE) || defined(AMD_WMMA_AVAILABLE) return 8; #else return 256/ggml_cuda_get_physical_warp_size();