From 75f8c2e23fcf91a6d009737cb259776f235c24b3 Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Tue, 23 Jun 2026 07:53:47 +0800 Subject: [PATCH 1/2] chore: use MiniMax-M3 GB300 performance image --- .github/configs/nvidia-master.yaml | 2 +- .../1k1k/disagg-gb300-1p1d-dep2-tep8-3n.yaml | 2 +- .../1k1k/disagg-gb300-1p1d-dep2-tp4-marlin-2n.yaml | 2 +- .../1k1k/disagg-gb300-1p2d-dep2-dep4-3n.yaml | 2 +- .../1k1k/disagg-gb300-2p1d-dep2-dep8-3n.yaml | 2 +- .../1k1k/disagg-gb300-2p1d-dep2-tep8-3n.yaml | 2 +- .../1k1k/disagg-gb300-2p2d-dep2-tep8-5n.yaml | 2 +- .../1k1k/disagg-gb300-3p2d-dep2-tep8-6n.yaml | 2 +- .../8k1k/disagg-gb300-1p1d-dep2-tp4-marlin-2n.yaml | 2 +- .../8k1k/disagg-gb300-1p2d-dep2-dep8-5n.yaml | 2 +- .../8k1k/disagg-gb300-2p2d-dep2-dep8-5n.yaml | 2 +- .../8k1k/disagg-gb300-2p2d-dep2-tep8-5n.yaml | 2 +- .../8k1k/disagg-gb300-3p2d-dep2-dep8-6n.yaml | 2 +- .../8k1k/disagg-gb300-3p2d-dep2-tep8-6n.yaml | 2 +- .../8k1k/disagg-gb300-4p2d-dep2-dep8-6n.yaml | 2 +- .../8k1k/disagg-gb300-5p2d-dep2-tep8-7n.yaml | 2 +- perf-changelog.yaml | 7 +++++++ 17 files changed, 23 insertions(+), 16 deletions(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index c99f59634..1ee6e98d0 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -11609,7 +11609,7 @@ qwen3.5-fp8-h100-sglang-agentic: # DEP8, DEP4. 4 GPU/node (GB300 NVL72). 4p3d (3 decode workers) skipped. # kv-cache-dtype=fp8 added. srun_options mem=0 required. minimaxm3-fp8-gb300-dynamo-vllm: - image: vllm/vllm-openai:nightly-aarch64 + image: vllm/vllm-openai:minimax-m3-perf-arm64-13.0.1-7a67223 model: MiniMaxAI/MiniMax-M3-MXFP8 model-prefix: minimaxm3 runner: gb300-nv diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-1p1d-dep2-tep8-3n.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-1p1d-dep2-tep8-3n.yaml index fd79fcee1..4b00b5660 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-1p1d-dep2-tep8-3n.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-1p1d-dep2-tep8-3n.yaml @@ -5,7 +5,7 @@ name: "minimax-m3-vllm-disagg-gb300-1p1d-dep2-tep8-fp8-1k1k" model: path: "minimax-m3-mxfp8" - container: "vllm/vllm-openai:nightly-aarch64" + container: "vllm/vllm-openai:minimax-m3-perf-arm64-13.0.1-7a67223" precision: "fp8" dynamo: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-1p1d-dep2-tp4-marlin-2n.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-1p1d-dep2-tp4-marlin-2n.yaml index 59612a695..26fa89b94 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-1p1d-dep2-tp4-marlin-2n.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-1p1d-dep2-tp4-marlin-2n.yaml @@ -5,7 +5,7 @@ name: "minimax-m3-vllm-disagg-gb300-1p1d-dep2-tp4-marlin-fp8-1k1k" model: path: "minimax-m3-mxfp8" - container: "vllm/vllm-openai:nightly-aarch64" + container: "vllm/vllm-openai:minimax-m3-perf-arm64-13.0.1-7a67223" precision: "fp8" dynamo: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-1p2d-dep2-dep4-3n.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-1p2d-dep2-dep4-3n.yaml index d5dc421ad..af5315c76 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-1p2d-dep2-dep4-3n.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-1p2d-dep2-dep4-3n.yaml @@ -5,7 +5,7 @@ name: "minimax-m3-vllm-disagg-gb300-1p2d-dep2-dep4-fp8-1k1k" model: path: "minimax-m3-mxfp8" - container: "vllm/vllm-openai:nightly-aarch64" + container: "vllm/vllm-openai:minimax-m3-perf-arm64-13.0.1-7a67223" precision: "fp8" dynamo: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-2p1d-dep2-dep8-3n.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-2p1d-dep2-dep8-3n.yaml index a1c7cdb6c..7cc5f50c4 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-2p1d-dep2-dep8-3n.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-2p1d-dep2-dep8-3n.yaml @@ -5,7 +5,7 @@ name: "minimax-m3-vllm-disagg-gb300-2p1d-dep2-dep8-fp8-1k1k" model: path: "minimax-m3-mxfp8" - container: "vllm/vllm-openai:nightly-aarch64" + container: "vllm/vllm-openai:minimax-m3-perf-arm64-13.0.1-7a67223" precision: "fp8" dynamo: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-2p1d-dep2-tep8-3n.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-2p1d-dep2-tep8-3n.yaml index 94709574a..0c4f3498c 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-2p1d-dep2-tep8-3n.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-2p1d-dep2-tep8-3n.yaml @@ -5,7 +5,7 @@ name: "minimax-m3-vllm-disagg-gb300-2p1d-dep2-tep8-fp8-1k1k" model: path: "minimax-m3-mxfp8" - container: "vllm/vllm-openai:nightly-aarch64" + container: "vllm/vllm-openai:minimax-m3-perf-arm64-13.0.1-7a67223" precision: "fp8" dynamo: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-2p2d-dep2-tep8-5n.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-2p2d-dep2-tep8-5n.yaml index 049893137..5babf0835 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-2p2d-dep2-tep8-5n.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-2p2d-dep2-tep8-5n.yaml @@ -5,7 +5,7 @@ name: "minimax-m3-vllm-disagg-gb300-2p2d-dep2-tep8-fp8-1k1k" model: path: "minimax-m3-mxfp8" - container: "vllm/vllm-openai:nightly-aarch64" + container: "vllm/vllm-openai:minimax-m3-perf-arm64-13.0.1-7a67223" precision: "fp8" dynamo: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-3p2d-dep2-tep8-6n.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-3p2d-dep2-tep8-6n.yaml index 95ef6e17d..d4176055a 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-3p2d-dep2-tep8-6n.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/1k1k/disagg-gb300-3p2d-dep2-tep8-6n.yaml @@ -5,7 +5,7 @@ name: "minimax-m3-vllm-disagg-gb300-3p2d-dep2-tep8-fp8-1k1k" model: path: "minimax-m3-mxfp8" - container: "vllm/vllm-openai:nightly-aarch64" + container: "vllm/vllm-openai:minimax-m3-perf-arm64-13.0.1-7a67223" precision: "fp8" dynamo: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-1p1d-dep2-tp4-marlin-2n.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-1p1d-dep2-tp4-marlin-2n.yaml index 4bb218ae8..4ee41241e 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-1p1d-dep2-tp4-marlin-2n.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-1p1d-dep2-tp4-marlin-2n.yaml @@ -5,7 +5,7 @@ name: "minimax-m3-vllm-disagg-gb300-1p1d-dep2-tp4-marlin-fp8-8k1k" model: path: "minimax-m3-mxfp8" - container: "vllm/vllm-openai:nightly-aarch64" + container: "vllm/vllm-openai:minimax-m3-perf-arm64-13.0.1-7a67223" precision: "fp8" dynamo: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-1p2d-dep2-dep8-5n.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-1p2d-dep2-dep8-5n.yaml index 88b923633..b56b65b26 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-1p2d-dep2-dep8-5n.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-1p2d-dep2-dep8-5n.yaml @@ -5,7 +5,7 @@ name: "minimax-m3-vllm-disagg-gb300-1p2d-dep2-dep8-fp8-8k1k" model: path: "minimax-m3-mxfp8" - container: "vllm/vllm-openai:nightly-aarch64" + container: "vllm/vllm-openai:minimax-m3-perf-arm64-13.0.1-7a67223" precision: "fp8" dynamo: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-2p2d-dep2-dep8-5n.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-2p2d-dep2-dep8-5n.yaml index 61bbddf4e..7beba3420 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-2p2d-dep2-dep8-5n.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-2p2d-dep2-dep8-5n.yaml @@ -5,7 +5,7 @@ name: "minimax-m3-vllm-disagg-gb300-2p2d-dep2-dep8-fp8-8k1k" model: path: "minimax-m3-mxfp8" - container: "vllm/vllm-openai:nightly-aarch64" + container: "vllm/vllm-openai:minimax-m3-perf-arm64-13.0.1-7a67223" precision: "fp8" dynamo: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-2p2d-dep2-tep8-5n.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-2p2d-dep2-tep8-5n.yaml index 428943e5f..1ea678ace 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-2p2d-dep2-tep8-5n.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-2p2d-dep2-tep8-5n.yaml @@ -5,7 +5,7 @@ name: "minimax-m3-vllm-disagg-gb300-2p2d-dep2-tep8-fp8-8k1k" model: path: "minimax-m3-mxfp8" - container: "vllm/vllm-openai:nightly-aarch64" + container: "vllm/vllm-openai:minimax-m3-perf-arm64-13.0.1-7a67223" precision: "fp8" dynamo: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-3p2d-dep2-dep8-6n.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-3p2d-dep2-dep8-6n.yaml index 7feaa1d18..f4e000a5f 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-3p2d-dep2-dep8-6n.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-3p2d-dep2-dep8-6n.yaml @@ -5,7 +5,7 @@ name: "minimax-m3-vllm-disagg-gb300-3p2d-dep2-dep8-fp8-8k1k" model: path: "minimax-m3-mxfp8" - container: "vllm/vllm-openai:nightly-aarch64" + container: "vllm/vllm-openai:minimax-m3-perf-arm64-13.0.1-7a67223" precision: "fp8" dynamo: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-3p2d-dep2-tep8-6n.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-3p2d-dep2-tep8-6n.yaml index b9276d154..35950dc32 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-3p2d-dep2-tep8-6n.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-3p2d-dep2-tep8-6n.yaml @@ -5,7 +5,7 @@ name: "minimax-m3-vllm-disagg-gb300-3p2d-dep2-tep8-fp8-8k1k" model: path: "minimax-m3-mxfp8" - container: "vllm/vllm-openai:nightly-aarch64" + container: "vllm/vllm-openai:minimax-m3-perf-arm64-13.0.1-7a67223" precision: "fp8" dynamo: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-4p2d-dep2-dep8-6n.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-4p2d-dep2-dep8-6n.yaml index 9d025f69d..1526cd7ad 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-4p2d-dep2-dep8-6n.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-4p2d-dep2-dep8-6n.yaml @@ -5,7 +5,7 @@ name: "minimax-m3-vllm-disagg-gb300-4p2d-dep2-dep8-fp8-8k1k" model: path: "minimax-m3-mxfp8" - container: "vllm/vllm-openai:nightly-aarch64" + container: "vllm/vllm-openai:minimax-m3-perf-arm64-13.0.1-7a67223" precision: "fp8" dynamo: diff --git a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-5p2d-dep2-tep8-7n.yaml b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-5p2d-dep2-tep8-7n.yaml index 2663a6178..dbc9c5c9a 100644 --- a/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-5p2d-dep2-tep8-7n.yaml +++ b/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8/8k1k/disagg-gb300-5p2d-dep2-tep8-7n.yaml @@ -5,7 +5,7 @@ name: "minimax-m3-vllm-disagg-gb300-5p2d-dep2-tep8-fp8-8k1k" model: path: "minimax-m3-mxfp8" - container: "vllm/vllm-openai:nightly-aarch64" + container: "vllm/vllm-openai:minimax-m3-perf-arm64-13.0.1-7a67223" precision: "fp8" dynamo: diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 12cb29600..e0a231eeb 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -4036,3 +4036,10 @@ - "Image: lmsysorg/sglang:nightly-dev-cu13-20260608-303757cc" - "6 topologies across 1k/1k and 8k/1k: 1P1D TP4 STP + wide-EP (DEP4 prefill / DEP16 decode) from 1P1D up to 8P1D, recipes under benchmarks/multi_node/srt-slurm-recipes/sglang/qwen3.5/gb200-fp8/" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1810 + +- config-keys: + - minimaxm3-fp8-gb300-dynamo-vllm + description: + - "Update the GB300 MiniMax-M3 Dynamo-vLLM image to vllm/vllm-openai:minimax-m3-perf-arm64-13.0.1-7a67223" + - "Use the dedicated ARM64 MiniMax-M3 performance image; benchmark settings unchanged" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1888 From bc58405d4b5eb89288ef689733b1889af63b91d6 Mon Sep 17 00:00:00 2001 From: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com> Date: Tue, 23 Jun 2026 15:24:50 +0800 Subject: [PATCH 2/2] fix(vllm): size MNNVL workspace for one-shot all-reduce --- .../configs/minimax-m3-vllm-fixes.sh | 32 +++++++++++++++++++ runners/launch_gb300-nv.sh | 17 ++++++++-- 2 files changed, 46 insertions(+), 3 deletions(-) create mode 100755 benchmarks/multi_node/srt-slurm-recipes/configs/minimax-m3-vllm-fixes.sh diff --git a/benchmarks/multi_node/srt-slurm-recipes/configs/minimax-m3-vllm-fixes.sh b/benchmarks/multi_node/srt-slurm-recipes/configs/minimax-m3-vllm-fixes.sh new file mode 100755 index 000000000..7d5f52a97 --- /dev/null +++ b/benchmarks/multi_node/srt-slurm-recipes/configs/minimax-m3-vllm-fixes.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +set -euo pipefail + +python3 - <<'PYEOF' +from importlib.util import find_spec +from pathlib import Path + +spec = find_spec("vllm") +if not spec or not spec.origin: + raise RuntimeError("vllm is not installed") +root = Path(spec.origin).parent +patches = { + root / "distributed/device_communicators/flashinfer_all_reduce.py": [ + ( + " comm_backend=comm_backend,\n" + " group=group,\n", + " comm_backend=comm_backend,\n" + ' force_oneshot_support=backend == "mnnvl",\n' + " group=group,\n", + ), + ], +} +for path, edits in patches.items(): + source = path.read_text() + for old, new in edits: + if new in source: + continue + if source.count(old) != 1: + raise RuntimeError(f"missing or ambiguous patch anchor in {path}") + source = source.replace(old, new, 1) + path.write_text(source) +PYEOF diff --git a/runners/launch_gb300-nv.sh b/runners/launch_gb300-nv.sh index 66d1fbfe2..65825bdf1 100644 --- a/runners/launch_gb300-nv.sh +++ b/runners/launch_gb300-nv.sh @@ -106,6 +106,7 @@ export OSL="$OSL" echo "Cloning srt-slurm repository..." RUN_KEY=$(printf "%s" "${RESULT_FILENAME:-${RUNNER_NAME:-gb300-nv}}" | sha1sum | cut -c1-12) SRT_REPO_DIR="${GITHUB_WORKSPACE}/srt-slurm-${GITHUB_RUN_ID:-manual}-${GITHUB_RUN_ATTEMPT:-0}-${RUN_KEY}" +SRTCTL_SETUP_SCRIPT="" rm -rf "$SRT_REPO_DIR" if [[ "$IS_AGENTIC" == "1" ]]; then @@ -171,6 +172,10 @@ elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "minimaxm3" ]]; then git checkout main mkdir -p recipes/vllm/minimax-m3-gb300-fp8 cp -rT "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/vllm/minimax-m3-gb300-fp8" recipes/vllm/minimax-m3-gb300-fp8 + SRTCTL_SETUP_SCRIPT="minimax-m3-vllm-fixes.sh" + cp \ + "$GITHUB_WORKSPACE/benchmarks/multi_node/srt-slurm-recipes/configs/$SRTCTL_SETUP_SCRIPT" \ + "configs/$SRTCTL_SETUP_SCRIPT" elif [[ $FRAMEWORK == "dynamo-vllm" && $MODEL_PREFIX == "kimik2.5" && $PRECISION == "fp4" ]]; then git clone https://github.com/NVIDIA/srt-slurm.git "$SRT_REPO_DIR" cd "$SRT_REPO_DIR" @@ -274,12 +279,18 @@ sed -i "s/^name:.*/name: \"${RUNNER_NAME}\"/" "$CONFIG_FILE" # seq-len recipes still resolve model.path to an NFS-visible location # where the precheck is a useful sanity guard, so keep enforcement on # for them. -PREFLIGHT_FLAG="" +SRTCTL_APPLY_ARGS=( + -f "$CONFIG_FILE" + --tags "gb300,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" +) if [[ "$IS_AGENTIC" == "1" ]]; then - PREFLIGHT_FLAG="--no-preflight" + SRTCTL_APPLY_ARGS+=(--no-preflight) +fi +if [[ -n "$SRTCTL_SETUP_SCRIPT" ]]; then + SRTCTL_APPLY_ARGS+=(--setup-script "$SRTCTL_SETUP_SCRIPT") fi -SRTCTL_OUTPUT=$(srtctl apply $PREFLIGHT_FLAG -f "$CONFIG_FILE" --tags "gb300,${MODEL_PREFIX},${PRECISION},${ISL}x${OSL},infmax-$(date +%Y%m%d)" 2>&1) +SRTCTL_OUTPUT=$(srtctl apply "${SRTCTL_APPLY_ARGS[@]}" 2>&1) echo "$SRTCTL_OUTPUT" JOB_ID=$(echo "$SRTCTL_OUTPUT" | grep -oP '✅ Job \K[0-9]+' || echo "$SRTCTL_OUTPUT" | grep -oP 'Job \K[0-9]+')