diff --git a/tests/cpp/operator/test_cublaslt_gemm.cu b/tests/cpp/operator/test_cublaslt_gemm.cu index 7fa630033..fd3742d99 100644 --- a/tests/cpp/operator/test_cublaslt_gemm.cu +++ b/tests/cpp/operator/test_cublaslt_gemm.cu @@ -108,6 +108,49 @@ static const ProdGemmConfig prod_gemm_sweep[] = { {"DeepSeek3_ExpertMLP_dn_wgrad_mbs1_NT", 2048, 7168, 4096, false, true}, {"DeepSeek3_ExpertMLP_dn_wgrad_mbs2_NT", 2048, 7168, 8192, false, true}, {"DeepSeek3_ExpertMLP_dn_wgrad_mbs4_NT", 2048, 7168, 16384, false, true}, + // DeepSeek4 (from https://amd-hub.atlassian.net/browse/AIHPBLAS-3861) + {"DeepSeek4_M6144_N32_K7168_TN", 6144, 32, 7168, true, false}, + {"DeepSeek4_M6144_N64_K7168_TN", 6144, 64, 7168, true, false}, + {"DeepSeek4_M6144_N96_K7168_TN", 6144, 96, 7168, true, false}, + {"DeepSeek4_M6144_N128_K7168_TN", 6144, 128, 7168, true, false}, + {"DeepSeek4_M6144_N160_K7168_TN", 6144, 160, 7168, true, false}, + {"DeepSeek4_M6144_N192_K7168_TN", 6144, 192, 7168, true, false}, + {"DeepSeek4_M6144_N224_K7168_TN", 6144, 224, 7168, true, false}, + {"DeepSeek4_M6144_N256_K7168_TN", 6144, 256, 7168, true, false}, + {"DeepSeek4_M6144_N288_K7168_TN", 6144, 288, 7168, true, false}, + {"DeepSeek4_M6144_N320_K7168_TN", 6144, 320, 7168, true, false}, + {"DeepSeek4_M6144_N352_K7168_TN", 6144, 352, 7168, true, false}, + {"DeepSeek4_M6144_N384_K7168_TN", 6144, 384, 7168, true, false}, + {"DeepSeek4_M6144_N416_K7168_TN", 6144, 416, 7168, true, false}, + {"DeepSeek4_M6144_N448_K7168_TN", 6144, 448, 7168, true, false}, + {"DeepSeek4_M6144_N480_K7168_TN", 6144, 480, 7168, true, false}, + {"DeepSeek4_M6144_N512_K7168_TN", 6144, 512, 7168, true, false}, + {"DeepSeek4_M6144_N544_K7168_TN", 6144, 544, 7168, true, false}, + {"DeepSeek4_M6144_N576_K7168_TN", 6144, 576, 7168, true, false}, + {"DeepSeek4_M6144_N640_K7168_TN", 6144, 640, 7168, true, false}, + {"DeepSeek4_M6144_N800_K7168_TN", 6144, 800, 7168, true, false}, + {"DeepSeek4_M6144_N832_K7168_TN", 6144, 832, 7168, true, false}, + {"DeepSeek4_M7168_N32_K3072_TN", 7168, 32, 3072, true, false}, + {"DeepSeek4_M7168_N64_K3072_TN", 7168, 64, 3072, true, false}, + {"DeepSeek4_M7168_N96_K3072_TN", 7168, 96, 3072, true, false}, + {"DeepSeek4_M7168_N128_K3072_TN", 7168, 128, 3072, true, false}, + {"DeepSeek4_M7168_N160_K3072_TN", 7168, 160, 3072, true, false}, + {"DeepSeek4_M7168_N192_K3072_TN", 7168, 192, 3072, true, false}, + {"DeepSeek4_M7168_N224_K3072_TN", 7168, 224, 3072, true, false}, + {"DeepSeek4_M7168_N256_K3072_TN", 7168, 256, 3072, true, false}, + {"DeepSeek4_M7168_N288_K3072_TN", 7168, 288, 3072, true, false}, + {"DeepSeek4_M7168_N320_K3072_TN", 7168, 320, 3072, true, false}, + {"DeepSeek4_M7168_N352_K3072_TN", 7168, 352, 3072, true, false}, + {"DeepSeek4_M7168_N384_K3072_TN", 7168, 384, 3072, true, false}, + {"DeepSeek4_M7168_N416_K3072_TN", 7168, 416, 3072, true, false}, + {"DeepSeek4_M7168_N448_K3072_TN", 7168, 448, 3072, true, false}, + {"DeepSeek4_M7168_N480_K3072_TN", 7168, 480, 3072, true, false}, + {"DeepSeek4_M7168_N512_K3072_TN", 7168, 512, 3072, true, false}, + {"DeepSeek4_M7168_N544_K3072_TN", 7168, 544, 3072, true, false}, + {"DeepSeek4_M7168_N576_K3072_TN", 7168, 576, 3072, true, false}, + {"DeepSeek4_M7168_N640_K3072_TN", 7168, 640, 3072, true, false}, + {"DeepSeek4_M7168_N800_K3072_TN", 7168, 800, 3072, true, false}, + {"DeepSeek4_M7168_N832_K3072_TN", 7168, 832, 3072, true, false}, // Qwen3 {"Qwen3_LNLinear_QKV_fwd_mbs1_TN", 4096, 9216, 4096, true, false}, {"Qwen3_LNLinear_QKV_fwd_mbs2_TN", 8192, 9216, 4096, true, false},