From 016686fc336a9fc1aab9504a5ccd09f352146628 Mon Sep 17 00:00:00 2001 From: ppalanga Date: Wed, 17 Jun 2026 08:19:31 -0700 Subject: [PATCH 1/3] Update amd-master.yaml Adding EP configurations --- .github/configs/amd-master.yaml | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 606e3c2af..d5e334b8b 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -1,5 +1,5 @@ dsr1-fp4-mi355x-sglang: - image: lmsysorg/sglang:v0.5.12-rocm700-mi35x + image: lmsysorg/sglang:v0.5.13-rocm700-mi35x model: amd/DeepSeek-R1-0528-MXFP4-Preview model-prefix: dsr1 runner: mi355x @@ -11,13 +11,18 @@ dsr1-fp4-mi355x-sglang: - isl: 1024 osl: 1024 search-space: - - { tp: 4, conc-start: 4, conc-end: 64 } - - { tp: 8, conc-start: 4, conc-end: 64 } + - { tp: 4, conc-start: 4, conc-end: 256 } + - { tp: 8, conc-start: 4, conc-end: 256 } + - { tp: 4, ep: 4, conc-start: 4, conc-end: 256 } + - { tp: 8, ep: 8, conc-start: 4, conc-end: 256 } - isl: 8192 osl: 1024 search-space: - - { tp: 4, conc-start: 4, conc-end: 64 } - - { tp: 8, conc-start: 4, conc-end: 64 } + - { tp: 4, conc-start: 4, conc-end: 256 } + - { tp: 8, conc-start: 4, conc-end: 256 } + - { tp: 4, ep: 4, conc-start: 4, conc-end: 256 } + - { tp: 8, ep: 8, conc-start: 4, conc-end: 256 } + # Agentic-coding sweep commented out for this image-bump PR — the # 10-conc agentic matrix amplifies sweep cost and the bump validation # only needs the fixed-seq-len throughput shape. Re-enable once the From 98e80e6d5ca8a8aba1e71e46e7cfbe17edd3a48a Mon Sep 17 00:00:00 2001 From: ppalanga Date: Wed, 17 Jun 2026 08:24:14 -0700 Subject: [PATCH 2/3] Update dsr1_fp4_mi355x.sh --- benchmarks/single_node/fixed_seq_len/dsr1_fp4_mi355x.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/benchmarks/single_node/fixed_seq_len/dsr1_fp4_mi355x.sh b/benchmarks/single_node/fixed_seq_len/dsr1_fp4_mi355x.sh index bb6ce75cb..b7d758d29 100644 --- a/benchmarks/single_node/fixed_seq_len/dsr1_fp4_mi355x.sh +++ b/benchmarks/single_node/fixed_seq_len/dsr1_fp4_mi355x.sh @@ -5,6 +5,7 @@ source "$(dirname "$0")/../../benchmark_lib.sh" check_env_vars \ MODEL \ TP \ + EP_SIZE \ CONC \ ISL \ OSL \ @@ -37,16 +38,21 @@ fi # Start GPU monitoring (power, temperature, clocks every second) start_gpu_monitor +if [ "${EP_SIZE:-1}" -gt 1 ]; then + PARALLEL_ARGS+=( --ep-size "$EP_SIZE" ) +fi + set -x python3 -m sglang.launch_server --model-path=$MODEL --trust-remote-code \ --host=0.0.0.0 --port=$PORT \ +"${PARALLEL_ARGS[@]}" \ --tensor-parallel-size=$TP \ --chunked-prefill-size=$PREFILL_SIZE \ --mem-fraction-static=0.8 \ --disable-radix-cache \ --num-continuous-decode-steps=4 \ --max-prefill-tokens=$PREFILL_SIZE \ ---cuda-graph-max-bs=128 \ +--cuda-graph-max-bs=512 \ --attention-backend aiter \ --kv-cache-dtype fp8_e4m3 $EVAL_CONTEXT_ARGS > $SERVER_LOG 2>&1 & From 008eeaa5d8afcadb678599144de5e0bdd17237c3 Mon Sep 17 00:00:00 2001 From: ppalanga Date: Wed, 17 Jun 2026 08:30:01 -0700 Subject: [PATCH 3/3] Update perf-changelog.yaml --- perf-changelog.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 1f88b47aa..8cae1e723 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3919,3 +3919,8 @@ - "Also update all configs for DSR1 TRTLLM FP8 to reflect latest released image usage" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1767 + - config-keys: + - dsr1-fp4-mi355x-sglang + description: + - "Introduce EP configurations" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1811