From 47e97b893f518cfb467e2cc9ea746f10dd76744e Mon Sep 17 00:00:00 2001 From: learning-to-play <66660475+learning-to-play@users.noreply.github.com> Date: Mon, 18 May 2026 08:00:04 -0700 Subject: [PATCH] Enables batch size > 1 in vllm_decode.py --- src/maxtext/inference/vllm_decode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/maxtext/inference/vllm_decode.py b/src/maxtext/inference/vllm_decode.py index c2b1e5e5d2..67ebc88116 100644 --- a/src/maxtext/inference/vllm_decode.py +++ b/src/maxtext/inference/vllm_decode.py @@ -124,7 +124,7 @@ def decode_with_vllm(config: Config) -> None: token=config.hf_access_token, ) - prompts = [config.prompt] + prompts = [config.prompt] * int(config.per_device_batch_size) if config.use_chat_template: # Format the prompt using chat template if specified messages = [