diff --git a/src/maxtext/layers/moe.py b/src/maxtext/layers/moe.py index 70e67490f5..c8253d5203 100644 --- a/src/maxtext/layers/moe.py +++ b/src/maxtext/layers/moe.py @@ -368,7 +368,7 @@ def __init__( else: self._tensor_parallelism_name = "tensor" - if self.config.attention == "vllm_rpa": + if self.config.attention == "vllm_rpa" and self.config.enable_dp_attention: self._expert_parallelism_name = "attn_dp_expert" else: self._expert_parallelism_name = "expert"