diff --git a/fastdeploy/config.py b/fastdeploy/config.py index 50e0ba08a37..b15a6dc824b 100644 --- a/fastdeploy/config.py +++ b/fastdeploy/config.py @@ -2392,7 +2392,7 @@ def get_max_chunk_tokens(self, mm_max_tokens_per_item=None): num_tokens = self.scheduler_config.max_num_seqs else: num_tokens = self.scheduler_config.max_num_batched_tokens - if mm_max_tokens_per_item is not None: + if mm_max_tokens_per_item is not None and self.deploy_modality != DeployModality.TEXT: max_mm_tokens = max( mm_max_tokens_per_item.get("image", 0), mm_max_tokens_per_item.get("video", 0),