From fc843ed14ff14b6d04f3c5f545636bfd2765d641 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Thu, 30 Oct 2025 17:33:49 -0500 Subject: [PATCH 01/21] Add support to download models automatically if --model specified in vllm args --- vec_inf/client/_slurm_script_generator.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index a5ede3c1..e1f32c50 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -137,6 +137,13 @@ def _generate_launch_cmd(self) -> str: Server launch command. """ launcher_script = ["\n"] + + # Check if --model is specified in vllm_args to use HuggingFace model name + model_path = self.model_weights_path + vllm_args_copy = self.params["vllm_args"].copy() + if "--model" in vllm_args_copy: + model_path = vllm_args_copy.pop("--model") + if self.use_container: launcher_script.append( SLURM_SCRIPT_TEMPLATE["container_command"].format( @@ -151,12 +158,12 @@ def _generate_launch_cmd(self) -> str: ) launcher_script.append( "\n".join(SLURM_SCRIPT_TEMPLATE["launch_cmd"]).format( - model_weights_path=self.model_weights_path, + model_weights_path=model_path, model_name=self.params["model_name"], ) ) - for arg, value in self.params["vllm_args"].items(): + for arg, value in vllm_args_copy.items(): if isinstance(value, bool): launcher_script.append(f" {arg} \\") else: @@ -256,6 +263,12 @@ def _generate_model_launch_script(self, model_name: str) -> Path: model_name=model_name, ) ) + # Check if --model is specified in vllm_args to use HuggingFace model name + model_path = model_params["model_weights_path"] + vllm_args_copy = model_params["vllm_args"].copy() + if "--model" in vllm_args_copy: + model_path = vllm_args_copy.pop("--model") + if self.use_container: script_content.append( BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_command"].format( @@ -265,11 +278,12 @@ def _generate_model_launch_script(self, model_name: str) -> Path: ) script_content.append( "\n".join(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["launch_cmd"]).format( - model_weights_path=model_params["model_weights_path"], + model_weights_path=model_path, model_name=model_name, ) ) - for arg, value in model_params["vllm_args"].items(): + + for arg, value in vllm_args_copy.items(): if isinstance(value, bool): script_content.append(f" {arg} \\") else: From 5f790ffcce8dadc3c9edf79724eb378e6ee77d97 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Thu, 30 Oct 2025 18:21:34 -0500 Subject: [PATCH 02/21] create model dir if it doesn't exist --- vec_inf/client/_slurm_script_generator.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index e1f32c50..e52c94ba 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -38,9 +38,11 @@ def __init__(self, params: dict[str, Any]): self.additional_binds = self.params.get("bind", "") if self.additional_binds: self.additional_binds = f" --bind {self.additional_binds}" - self.model_weights_path = str( - Path(self.params["model_weights_parent_dir"], self.params["model_name"]) + model_weights_path = Path( + self.params["model_weights_parent_dir"], self.params["model_name"] ) + model_weights_path.mkdir(parents=True, exist_ok=True) + self.model_weights_path = str(model_weights_path) env_dict: dict[str, str] = self.params.get("env", {}) # Create string of environment variables self.env_str = "" @@ -210,11 +212,13 @@ def __init__(self, params: dict[str, Any]): self.params["models"][model_name]["additional_binds"] = ( f" --bind {self.params['models'][model_name]['bind']}" ) + model_weights_path = Path( + self.params["models"][model_name]["model_weights_parent_dir"], + model_name, + ) + model_weights_path.mkdir(parents=True, exist_ok=True) self.params["models"][model_name]["model_weights_path"] = str( - Path( - self.params["models"][model_name]["model_weights_parent_dir"], - model_name, - ) + model_weights_path ) def _write_to_log_dir(self, script_content: list[str], script_name: str) -> Path: From 0f22bec693f56dba6b15c2887934d5be0fb320d9 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 16:40:16 -0600 Subject: [PATCH 03/21] Check model weights existence before binding; use HF model name if missing --- .../client/test_slurm_script_generator.py | 31 +++++++++-- vec_inf/client/_slurm_script_generator.py | 52 +++++++++++++------ vec_inf/client/_slurm_templates.py | 8 +-- 3 files changed, 69 insertions(+), 22 deletions(-) diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py index 0b55f59b..bad302e2 100644 --- a/tests/vec_inf/client/test_slurm_script_generator.py +++ b/tests/vec_inf/client/test_slurm_script_generator.py @@ -12,6 +12,15 @@ ) +@pytest.fixture(autouse=True) +def patch_model_weights_exists(monkeypatch): + """Ensure model weights directory existence checks default to True.""" + + monkeypatch.setattr( + "vec_inf.client._slurm_script_generator.Path.exists", lambda self: True + ) + + class TestSlurmScriptGenerator: """Tests for SlurmScriptGenerator class.""" @@ -164,9 +173,8 @@ def test_generate_server_setup_singularity(self, singularity_params): setup = generator._generate_server_setup() assert "ray stop" in setup - assert ( - "module load " in setup - ) # Remove module name since it's inconsistent between clusters + # Note: module_load_cmd may be empty in some configs, so we don't assert it + # The container setup should still work without it def test_generate_launch_cmd_venv(self, basic_params): """Test launch command generation with virtual environment.""" @@ -190,6 +198,23 @@ def test_generate_launch_cmd_singularity(self, singularity_params): assert "--bind /scratch:/scratch,/data:/data" in launch_cmd assert "source" not in launch_cmd + def test_generate_launch_cmd_singularity_no_local_weights( + self, singularity_params, monkeypatch + ): + """Test container launch when model weights directory is missing.""" + + monkeypatch.setattr( + "vec_inf.client._slurm_script_generator.Path.exists", + lambda self: False, + ) + + generator = SlurmScriptGenerator(singularity_params) + launch_cmd = generator._generate_launch_cmd() + + assert "exec --nv" in launch_cmd + assert "--bind /path/to/model_weights/test-model" not in launch_cmd + assert "vllm serve test-model" in launch_cmd + def test_generate_launch_cmd_boolean_args(self, basic_params): """Test launch command with boolean vLLM arguments.""" params = basic_params.copy() diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index e52c94ba..215526eb 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -41,8 +41,14 @@ def __init__(self, params: dict[str, Any]): model_weights_path = Path( self.params["model_weights_parent_dir"], self.params["model_name"] ) - model_weights_path.mkdir(parents=True, exist_ok=True) + self.model_weights_exists = model_weights_path.exists() self.model_weights_path = str(model_weights_path) + self.model_source = ( + self.model_weights_path if self.model_weights_exists else self.params["model_name"] + ) + self.model_bind_option = ( + f" --bind {self.model_weights_path}" if self.model_weights_exists else "" + ) env_dict: dict[str, str] = self.params.get("env", {}) # Create string of environment variables self.env_str = "" @@ -53,6 +59,14 @@ def __init__(self, params: dict[str, Any]): self.env_str += "," self.env_str += key + "=" + val + # # Ensure CUDA_VISIBLE_DEVICES is passed through to the container + # if self.use_container and "CUDA_VISIBLE_DEVICES" not in env_dict: + # if len(self.env_str) == 0: + # self.env_str = "--env " + # else: + # self.env_str += "," + # self.env_str += "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" + def _generate_script_content(self) -> str: """Generate the complete Slurm script content. @@ -109,7 +123,7 @@ def _generate_server_setup(self) -> str: server_setup_str = server_setup_str.replace( "CONTAINER_PLACEHOLDER", SLURM_SCRIPT_TEMPLATE["container_command"].format( - model_weights_path=self.model_weights_path, + model_bind_option=self.model_bind_option, additional_binds=self.additional_binds, env_str=self.env_str, ), @@ -140,16 +154,15 @@ def _generate_launch_cmd(self) -> str: """ launcher_script = ["\n"] - # Check if --model is specified in vllm_args to use HuggingFace model name - model_path = self.model_weights_path vllm_args_copy = self.params["vllm_args"].copy() + model_source = self.model_source if "--model" in vllm_args_copy: - model_path = vllm_args_copy.pop("--model") + model_source = vllm_args_copy.pop("--model") if self.use_container: launcher_script.append( SLURM_SCRIPT_TEMPLATE["container_command"].format( - model_weights_path=self.model_weights_path, + model_bind_option=self.model_bind_option, additional_binds=self.additional_binds, env_str=self.env_str, ) @@ -160,7 +173,7 @@ def _generate_launch_cmd(self) -> str: ) launcher_script.append( "\n".join(SLURM_SCRIPT_TEMPLATE["launch_cmd"]).format( - model_weights_path=model_path, + model_source=model_source, model_name=self.params["model_name"], ) ) @@ -216,9 +229,19 @@ def __init__(self, params: dict[str, Any]): self.params["models"][model_name]["model_weights_parent_dir"], model_name, ) - model_weights_path.mkdir(parents=True, exist_ok=True) - self.params["models"][model_name]["model_weights_path"] = str( - model_weights_path + model_weights_exists = model_weights_path.exists() + model_weights_path_str = str(model_weights_path) + self.params["models"][model_name]["model_weights_path"] = ( + model_weights_path_str + ) + self.params["models"][model_name]["model_weights_exists"] = ( + model_weights_exists + ) + self.params["models"][model_name]["model_bind_option"] = ( + f" --bind {model_weights_path_str}" if model_weights_exists else "" + ) + self.params["models"][model_name]["model_source"] = ( + model_weights_path_str if model_weights_exists else model_name ) def _write_to_log_dir(self, script_content: list[str], script_name: str) -> Path: @@ -267,22 +290,21 @@ def _generate_model_launch_script(self, model_name: str) -> Path: model_name=model_name, ) ) - # Check if --model is specified in vllm_args to use HuggingFace model name - model_path = model_params["model_weights_path"] vllm_args_copy = model_params["vllm_args"].copy() + model_source = model_params.get("model_source", model_params["model_weights_path"]) if "--model" in vllm_args_copy: - model_path = vllm_args_copy.pop("--model") + model_source = vllm_args_copy.pop("--model") if self.use_container: script_content.append( BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_command"].format( - model_weights_path=model_params["model_weights_path"], + model_bind_option=model_params.get("model_bind_option", ""), additional_binds=model_params["additional_binds"], ) ) script_content.append( "\n".join(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["launch_cmd"]).format( - model_weights_path=model_path, + model_source=model_source, model_name=model_name, ) ) diff --git a/vec_inf/client/_slurm_templates.py b/vec_inf/client/_slurm_templates.py index 209534d8..47dc6527 100644 --- a/vec_inf/client/_slurm_templates.py +++ b/vec_inf/client/_slurm_templates.py @@ -99,7 +99,7 @@ class SlurmScriptTemplate(TypedDict): "env_vars": [ f"export {CONTAINER_MODULE_NAME}_BINDPATH=${CONTAINER_MODULE_NAME}_BINDPATH,$(echo /dev/infiniband* | sed -e 's/ /,/g')" ], - "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\", + "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}}{{model_bind_option}}{{additional_binds}} --containall {IMAGE_PATH} \\", "activate_venv": "source {venv}/bin/activate", "server_setup": { "single_node": [ @@ -147,7 +147,7 @@ class SlurmScriptTemplate(TypedDict): ' && mv temp.json "$json_path"', ], "launch_cmd": [ - "vllm serve {model_weights_path} \\", + "vllm serve {model_source} \\", " --served-model-name {model_name} \\", ' --host "0.0.0.0" \\', " --port $vllm_port_number \\", @@ -238,9 +238,9 @@ class BatchModelLaunchScriptTemplate(TypedDict): ' "$json_path" > temp_{model_name}.json \\', ' && mv temp_{model_name}.json "$json_path"\n', ], - "container_command": f"{CONTAINER_MODULE_NAME} exec --nv --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\", + "container_command": f"{CONTAINER_MODULE_NAME} exec --nv{{model_bind_option}}{{additional_binds}} --containall {IMAGE_PATH} \\", "launch_cmd": [ - "vllm serve {model_weights_path} \\", + "vllm serve {model_source} \\", " --served-model-name {model_name} \\", ' --host "0.0.0.0" \\', " --port $vllm_port_number \\", From 9f2fdd21b7e22b1e557e6f1ee63cd7581a45dc9a Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 16:43:37 -0600 Subject: [PATCH 04/21] Remove commented code --- vec_inf/client/_slurm_script_generator.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index 215526eb..4c1b0e5d 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -59,14 +59,6 @@ def __init__(self, params: dict[str, Any]): self.env_str += "," self.env_str += key + "=" + val - # # Ensure CUDA_VISIBLE_DEVICES is passed through to the container - # if self.use_container and "CUDA_VISIBLE_DEVICES" not in env_dict: - # if len(self.env_str) == 0: - # self.env_str = "--env " - # else: - # self.env_str += "," - # self.env_str += "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" - def _generate_script_content(self) -> str: """Generate the complete Slurm script content. From 38011beecad2e5eb5e458a660373c960d3802704 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 16:47:57 -0600 Subject: [PATCH 05/21] Apply code formatting fixes from pre-commit --- tests/vec_inf/client/test_slurm_script_generator.py | 2 -- vec_inf/client/_slurm_script_generator.py | 10 +++++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py index bad302e2..0abd80bb 100644 --- a/tests/vec_inf/client/test_slurm_script_generator.py +++ b/tests/vec_inf/client/test_slurm_script_generator.py @@ -15,7 +15,6 @@ @pytest.fixture(autouse=True) def patch_model_weights_exists(monkeypatch): """Ensure model weights directory existence checks default to True.""" - monkeypatch.setattr( "vec_inf.client._slurm_script_generator.Path.exists", lambda self: True ) @@ -202,7 +201,6 @@ def test_generate_launch_cmd_singularity_no_local_weights( self, singularity_params, monkeypatch ): """Test container launch when model weights directory is missing.""" - monkeypatch.setattr( "vec_inf.client._slurm_script_generator.Path.exists", lambda self: False, diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index 4c1b0e5d..364dd6b8 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -44,7 +44,9 @@ def __init__(self, params: dict[str, Any]): self.model_weights_exists = model_weights_path.exists() self.model_weights_path = str(model_weights_path) self.model_source = ( - self.model_weights_path if self.model_weights_exists else self.params["model_name"] + self.model_weights_path + if self.model_weights_exists + else self.params["model_name"] ) self.model_bind_option = ( f" --bind {self.model_weights_path}" if self.model_weights_exists else "" @@ -283,7 +285,9 @@ def _generate_model_launch_script(self, model_name: str) -> Path: ) ) vllm_args_copy = model_params["vllm_args"].copy() - model_source = model_params.get("model_source", model_params["model_weights_path"]) + model_source = model_params.get( + "model_source", model_params["model_weights_path"] + ) if "--model" in vllm_args_copy: model_source = vllm_args_copy.pop("--model") @@ -300,7 +304,7 @@ def _generate_model_launch_script(self, model_name: str) -> Path: model_name=model_name, ) ) - + for arg, value in vllm_args_copy.items(): if isinstance(value, bool): script_content.append(f" {arg} \\") From 4de35635e7d8f9c2343372dc29ad59da983de832 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 16:57:21 -0600 Subject: [PATCH 06/21] revert unnecessary test change --- tests/vec_inf/client/test_slurm_script_generator.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py index 0abd80bb..62b36140 100644 --- a/tests/vec_inf/client/test_slurm_script_generator.py +++ b/tests/vec_inf/client/test_slurm_script_generator.py @@ -172,8 +172,9 @@ def test_generate_server_setup_singularity(self, singularity_params): setup = generator._generate_server_setup() assert "ray stop" in setup - # Note: module_load_cmd may be empty in some configs, so we don't assert it - # The container setup should still work without it + assert ( + "module load " in setup + ) # Remove module name since it's inconsistent between clusters def test_generate_launch_cmd_venv(self, basic_params): """Test launch command generation with virtual environment.""" From 8b6a2119da676c0d077af288f72f6dc199e03af1 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 17:05:28 -0600 Subject: [PATCH 07/21] Apply formatting fixes from pre-commit --- vec_inf/client/_slurm_script_generator.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index 0f78fb18..01b786ea 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -121,7 +121,9 @@ def _generate_server_setup(self) -> str: server_script.append("\n".join(SLURM_SCRIPT_TEMPLATE["container_setup"])) server_script.append( SLURM_SCRIPT_TEMPLATE["bind_path"].format( - model_weights_path=self.model_weights_path if self.model_weights_exists else "", + model_weights_path=self.model_weights_path + if self.model_weights_exists + else "", additional_binds=self.additional_binds, ) ) @@ -289,7 +291,9 @@ def _generate_model_launch_script(self, model_name: str) -> Path: script_content.append(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_setup"]) script_content.append( BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["bind_path"].format( - model_weights_path=model_params["model_weights_path"] if model_params.get("model_weights_exists", True) else "", + model_weights_path=model_params["model_weights_path"] + if model_params.get("model_weights_exists", True) + else "", additional_binds=model_params["additional_binds"], ) ) From c68cb3553a4c7abdff595e8fef08395f66076256 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Wed, 12 Nov 2025 17:21:10 -0600 Subject: [PATCH 08/21] Add tests for model weights existence coverage --- .../client/test_slurm_script_generator.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py index b962f0f9..a81a962d 100644 --- a/tests/vec_inf/client/test_slurm_script_generator.py +++ b/tests/vec_inf/client/test_slurm_script_generator.py @@ -176,6 +176,21 @@ def test_generate_server_setup_singularity(self, singularity_params): "module load " in setup ) # Remove module name since it's inconsistent between clusters + def test_generate_server_setup_singularity_no_weights( + self, singularity_params, monkeypatch + ): + """Test server setup when model weights don't exist.""" + monkeypatch.setattr( + "vec_inf.client._slurm_script_generator.Path.exists", + lambda self: False, + ) + + generator = SlurmScriptGenerator(singularity_params) + setup = generator._generate_server_setup() + + assert "ray stop" in setup + assert "/path/to/model_weights/test-model" not in setup + def test_generate_launch_cmd_venv(self, basic_params): """Test launch command generation with virtual environment.""" generator = SlurmScriptGenerator(basic_params) @@ -415,6 +430,24 @@ def test_generate_model_launch_script_singularity( mock_touch.assert_called_once() mock_write_text.assert_called_once() + @patch("pathlib.Path.touch") + @patch("pathlib.Path.write_text") + def test_generate_model_launch_script_singularity_no_weights( + self, mock_write_text, mock_touch, batch_singularity_params, monkeypatch + ): + """Test batch model launch script when model weights don't exist.""" + monkeypatch.setattr( + "vec_inf.client._slurm_script_generator.Path.exists", + lambda self: False, + ) + + generator = BatchSlurmScriptGenerator(batch_singularity_params) + script_path = generator._generate_model_launch_script("model1") + + assert script_path.name == "launch_model1.sh" + call_args = mock_write_text.call_args[0][0] + assert "/path/to/model_weights/model1" not in call_args + @patch("vec_inf.client._slurm_script_generator.datetime") @patch("pathlib.Path.touch") @patch("pathlib.Path.write_text") From b610891a831aa6bd6511331fc68237927c6e0b70 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Mon, 5 Jan 2026 17:32:03 -0600 Subject: [PATCH 09/21] Remove redundant /dev/infiniband --- vec_inf/client/_slurm_templates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vec_inf/client/_slurm_templates.py b/vec_inf/client/_slurm_templates.py index ab607fa1..62697006 100644 --- a/vec_inf/client/_slurm_templates.py +++ b/vec_inf/client/_slurm_templates.py @@ -98,7 +98,7 @@ class SlurmScriptTemplate(TypedDict): f"{CONTAINER_MODULE_NAME} exec {IMAGE_PATH} ray stop", ], "imports": "source {src_dir}/find_port.sh", - "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,$(echo /dev/infiniband* | sed -e 's/ /,/g'),/dev,/tmp{{model_weights_path}}{{additional_binds}}", + "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp{{model_weights_path}}{{additional_binds}}", "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --containall {IMAGE_PATH} \\", "activate_venv": "source {venv}/bin/activate", "server_setup": { From a7a5debbed1d2c6d868a13c38e480efe64ebea4d Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Mon, 5 Jan 2026 17:35:28 -0600 Subject: [PATCH 10/21] Remove unused variable --- vec_inf/client/_slurm_script_generator.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index 01b786ea..89bac418 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -47,9 +47,6 @@ def __init__(self, params: dict[str, Any]): if self.model_weights_exists else self.params["model_name"] ) - self.model_bind_option = ( - f",{self.model_weights_path}" if self.model_weights_exists else "" - ) self.env_str = self._generate_env_str() def _generate_env_str(self) -> str: From bb3142bb674b19658d5355ddb9a389c807208172 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Mon, 5 Jan 2026 17:45:44 -0600 Subject: [PATCH 11/21] Add warning if downloading weights and HF cache not set --- vec_inf/client/_slurm_script_generator.py | 13 ++++++++ vec_inf/client/_utils.py | 40 +++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index 89bac418..0c86d688 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -15,6 +15,7 @@ SLURM_SCRIPT_TEMPLATE, ) from vec_inf.client._slurm_vars import CONTAINER_MODULE_NAME +from vec_inf.client._utils import check_and_warn_hf_cache class SlurmScriptGenerator: @@ -47,6 +48,11 @@ def __init__(self, params: dict[str, Any]): if self.model_weights_exists else self.params["model_name"] ) + check_and_warn_hf_cache( + self.model_weights_exists, + self.model_weights_path, + self.params.get("env", {}), + ) self.env_str = self._generate_env_str() def _generate_env_str(self) -> str: @@ -253,6 +259,13 @@ def __init__(self, params: dict[str, Any]): self.params["models"][model_name]["model_source"] = ( model_weights_path_str if model_weights_exists else model_name ) + check_and_warn_hf_cache( + model_weights_exists, + model_weights_path_str, + self.params["models"][model_name].get("env", {}), + model_name, + ) + def _write_to_log_dir(self, script_content: list[str], script_name: str) -> Path: """Write the generated Slurm script to the log directory. diff --git a/vec_inf/client/_utils.py b/vec_inf/client/_utils.py index 67d907ec..e39b28d9 100644 --- a/vec_inf/client/_utils.py +++ b/vec_inf/client/_utils.py @@ -456,3 +456,43 @@ def check_required_fields(params: dict[str, Any]) -> dict[str, Any]: f"{arg} is required, please set it in the command arguments or environment variables" ) return env_overrides + + +def check_and_warn_hf_cache( + model_weights_exists: bool, + model_weights_path: str, + env_dict: dict[str, str], + model_name: str | None = None, +) -> None: + """Warn if model weights don't exist and HuggingFace cache directory is not set. + + Parameters + ---------- + model_weights_exists : bool + Whether the model weights exist at the expected path. + model_weights_path : str + The expected path to the model weights. + env_dict : dict[str, str] + Dictionary of environment variables to check (from --env parameter). + model_name : str | None, optional + Optional model name to include in the warning message (for batch mode). + """ + if model_weights_exists: + return + + hf_cache_vars = ["HF_HOME", "HF_HUB_CACHE", "HUGGINGFACE_HUB_CACHE"] + hf_cache_set = any( + os.environ.get(var) or env_dict.get(var) for var in hf_cache_vars + ) + + if not hf_cache_set: + model_prefix = f"Model weights for '{model_name}' " if model_name else "Model weights " + warnings.warn( + f"{model_prefix}not found at '{model_weights_path}' and no " + f"HuggingFace cache directory is set (HF_HOME, HF_HUB_CACHE, or " + f"HUGGINGFACE_HUB_CACHE). The model may be downloaded to your home " + f"directory, which could consume your storage quota. Consider setting " + f"one of these environment variables to a shared cache location.", + UserWarning, + stacklevel=4, + ) From 2db9c6c860fc2857ca5e9d450587266ac10d9ff5 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Mon, 5 Jan 2026 17:49:34 -0600 Subject: [PATCH 12/21] format ONLY --- vec_inf/client/_slurm_script_generator.py | 1 - vec_inf/client/_utils.py | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index 0c86d688..433a8f27 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -266,7 +266,6 @@ def __init__(self, params: dict[str, Any]): model_name, ) - def _write_to_log_dir(self, script_content: list[str], script_name: str) -> Path: """Write the generated Slurm script to the log directory. diff --git a/vec_inf/client/_utils.py b/vec_inf/client/_utils.py index e39b28d9..2f3b01d6 100644 --- a/vec_inf/client/_utils.py +++ b/vec_inf/client/_utils.py @@ -486,7 +486,9 @@ def check_and_warn_hf_cache( ) if not hf_cache_set: - model_prefix = f"Model weights for '{model_name}' " if model_name else "Model weights " + model_prefix = ( + f"Model weights for '{model_name}' " if model_name else "Model weights " + ) warnings.warn( f"{model_prefix}not found at '{model_weights_path}' and no " f"HuggingFace cache directory is set (HF_HOME, HF_HUB_CACHE, or " From 079c86a58726730b706db8d394db24afc5d308d6 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Thu, 29 Jan 2026 17:37:24 -0600 Subject: [PATCH 13/21] Add --hf-model CLI option and config field Adds hf_model field to ModelConfig and LaunchOptions to specify a HuggingFace model id for vLLM to download at runtime. --- vec_inf/cli/_cli.py | 11 +++++++++++ vec_inf/client/config.py | 10 ++++++++++ vec_inf/client/models.py | 4 ++++ 3 files changed, 25 insertions(+) diff --git a/vec_inf/cli/_cli.py b/vec_inf/cli/_cli.py index b3fe6248..076f2aec 100644 --- a/vec_inf/cli/_cli.py +++ b/vec_inf/cli/_cli.py @@ -132,6 +132,15 @@ def cli() -> None: type=str, help="Path to parent directory containing model weights", ) +@click.option( + "--hf-model", + type=str, + help=( + "Full HuggingFace model id/path to use for vLLM serve (e.g. " + "'meta-llama/Meta-Llama-3.1-8B-Instruct'). " + "Keeps model-name as the short identifier for config/logs/job naming." + ), +) @click.option( "--vllm-args", type=str, @@ -200,6 +209,8 @@ def launch( Path to SLURM log directory - model_weights_parent_dir : str, optional Path to model weights directory + - hf_model : str, optional + Full HuggingFace model id/path to use for vLLM serve - vllm_args : str, optional vLLM engine arguments - env : str, optional diff --git a/vec_inf/client/config.py b/vec_inf/client/config.py index 4bcbe508..edbdf4cb 100644 --- a/vec_inf/client/config.py +++ b/vec_inf/client/config.py @@ -66,6 +66,9 @@ class ModelConfig(BaseModel): Directory path for storing logs model_weights_parent_dir : Path, optional Base directory containing model weights + hf_model : str, optional + HuggingFace model id for vLLM to download (e.g. "meta-llama/Llama-3.1-8B"). + Used as model source when local weights don't exist. vllm_args : dict[str, Any], optional Additional arguments for vLLM engine configuration @@ -148,6 +151,13 @@ class ModelConfig(BaseModel): default=Path(DEFAULT_ARGS["model_weights_parent_dir"]), description="Base directory for model weights", ) + hf_model: Optional[str] = Field( + default=None, + description=( + "Full HuggingFace model id/path to use for vLLM serve (e.g. " + "'meta-llama/Meta-Llama-3.1-8B-Instruct')." + ), + ) vllm_args: Optional[dict[str, Any]] = Field( default={}, description="vLLM engine arguments" ) diff --git a/vec_inf/client/models.py b/vec_inf/client/models.py index 29ace42d..c53fae7f 100644 --- a/vec_inf/client/models.py +++ b/vec_inf/client/models.py @@ -222,6 +222,9 @@ class LaunchOptions: Directory for logs model_weights_parent_dir : str, optional Parent directory containing model weights + hf_model : str, optional + HuggingFace model id for vLLM to download (e.g. "meta-llama/Llama-3.1-8B"). + Used as model source when local weights don't exist. vllm_args : str, optional Additional arguments for vLLM env : str, optional @@ -250,6 +253,7 @@ class LaunchOptions: venv: Optional[str] = None log_dir: Optional[str] = None model_weights_parent_dir: Optional[str] = None + hf_model: Optional[str] = None vllm_args: Optional[str] = None env: Optional[str] = None config: Optional[str] = None From 20163da637308c45a11d55529e4f74b5aac9b7d1 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Thu, 29 Jan 2026 17:37:24 -0600 Subject: [PATCH 14/21] Use hf_model as model source when local weights missing Updates SlurmScriptGenerator and BatchSlurmScriptGenerator to use hf_model for vllm serve when local weights don't exist. Priority: local weights > hf_model > model name. --- vec_inf/client/_slurm_script_generator.py | 26 ++++++++++++++++------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index 433a8f27..badf6504 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -43,11 +43,13 @@ def __init__(self, params: dict[str, Any]): ) self.model_weights_exists = model_weights_path.exists() self.model_weights_path = str(model_weights_path) - self.model_source = ( - self.model_weights_path - if self.model_weights_exists - else self.params["model_name"] - ) + # Determine model source: local weights > hf_model > model name + if self.model_weights_exists: + self.model_source = self.model_weights_path + elif self.params.get("hf_model"): + self.model_source = self.params["hf_model"] + else: + self.model_source = self.params["model_name"] check_and_warn_hf_cache( self.model_weights_exists, self.model_weights_path, @@ -256,9 +258,17 @@ def __init__(self, params: dict[str, Any]): self.params["models"][model_name]["model_weights_exists"] = ( model_weights_exists ) - self.params["models"][model_name]["model_source"] = ( - model_weights_path_str if model_weights_exists else model_name - ) + # Determine model source: local weights > hf_model > model name + if model_weights_exists: + self.params["models"][model_name]["model_source"] = ( + model_weights_path_str + ) + elif self.params["models"][model_name].get("hf_model"): + self.params["models"][model_name]["model_source"] = self.params[ + "models" + ][model_name]["hf_model"] + else: + self.params["models"][model_name]["model_source"] = model_name check_and_warn_hf_cache( model_weights_exists, model_weights_path_str, From ed82b778d7b14b39c59328f57fdf76d6191f150b Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Thu, 29 Jan 2026 17:37:24 -0600 Subject: [PATCH 15/21] Pass hf_model from CLI to launch params --- vec_inf/client/_helper.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vec_inf/client/_helper.py b/vec_inf/client/_helper.py index ce6e156e..d2b82f4e 100644 --- a/vec_inf/client/_helper.py +++ b/vec_inf/client/_helper.py @@ -204,6 +204,10 @@ def _apply_cli_overrides(self, params: dict[str, Any]) -> None: params : dict[str, Any] Dictionary of launch parameters to override """ + if self.kwargs.get("hf_model"): + params["hf_model"] = self.kwargs["hf_model"] + del self.kwargs["hf_model"] + if self.kwargs.get("vllm_args"): vllm_args = self._process_vllm_args(self.kwargs["vllm_args"]) for key, value in vllm_args.items(): From d3f6772b9d440640f68a71e4b8960a819dcea8d7 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Thu, 29 Jan 2026 17:37:25 -0600 Subject: [PATCH 16/21] Add tests for hf_model override in slurm script generation --- .../client/test_slurm_script_generator.py | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py index a81a962d..9f3914b9 100644 --- a/tests/vec_inf/client/test_slurm_script_generator.py +++ b/tests/vec_inf/client/test_slurm_script_generator.py @@ -202,6 +202,21 @@ def test_generate_launch_cmd_venv(self, basic_params): assert "--max-model-len 8192" in launch_cmd assert "--enforce-eager" in launch_cmd + def test_generate_launch_cmd_with_hf_model_override( + self, basic_params, monkeypatch + ): + """Test launch command uses hf_model when local weights don't exist.""" + monkeypatch.setattr( + "vec_inf.client._slurm_script_generator.Path.exists", lambda self: False + ) + params = basic_params.copy() + params["hf_model"] = "meta-llama/Meta-Llama-3.1-8B-Instruct" + generator = SlurmScriptGenerator(params) + launch_cmd = generator._generate_launch_cmd() + + assert "vllm serve meta-llama/Meta-Llama-3.1-8B-Instruct" in launch_cmd + assert "vllm serve /path/to/model_weights/test-model" not in launch_cmd + def test_generate_launch_cmd_singularity(self, singularity_params): """Test launch command generation with Singularity.""" generator = SlurmScriptGenerator(singularity_params) @@ -416,6 +431,25 @@ def test_generate_model_launch_script_basic( mock_touch.assert_called_once() mock_write_text.assert_called_once() + @patch("pathlib.Path.touch") + @patch("pathlib.Path.write_text") + def test_generate_model_launch_script_with_hf_model_override( + self, mock_write_text, mock_touch, batch_params, monkeypatch + ): + """Test batch launch script uses hf_model when local weights don't exist.""" + monkeypatch.setattr( + "vec_inf.client._slurm_script_generator.Path.exists", lambda self: False + ) + params = batch_params.copy() + params["models"] = {k: v.copy() for k, v in batch_params["models"].items()} + params["models"]["model1"]["hf_model"] = "meta-llama/Meta-Llama-3.1-8B-Instruct" + + generator = BatchSlurmScriptGenerator(params) + generator._generate_model_launch_script("model1") + + call_args = mock_write_text.call_args[0][0] + assert "vllm serve meta-llama/Meta-Llama-3.1-8B-Instruct" in call_args + @patch("pathlib.Path.touch") @patch("pathlib.Path.write_text") def test_generate_model_launch_script_singularity( From 1c312dfadae69f8b3c997d5c3b7447672a727593 Mon Sep 17 00:00:00 2001 From: rohan-uiuc Date: Thu, 29 Jan 2026 17:37:25 -0600 Subject: [PATCH 17/21] Add documentation for --hf-model option --- docs/user_guide.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/user_guide.md b/docs/user_guide.md index c94313af..3ac97e4e 100644 --- a/docs/user_guide.md +++ b/docs/user_guide.md @@ -55,6 +55,17 @@ To overwrite default `vllm serve` arguments, you can specify the arguments in a vec-inf launch Meta-Llama-3.1-8B-Instruct --vllm-args '--max-model-len=65536,--compilation-config=3' ``` +To download models directly from HuggingFace Hub without needing local weights, use `--hf-model`: + +```bash +vec-inf launch Qwen2.5-3B-Instruct \ + --hf-model Qwen/Qwen2.5-3B-Instruct \ + --env 'HF_HOME=/path/to/cache' \ + --vllm-args '--max-model-len=4096' +``` + +Set `HF_HOME` via `--env` to control where models are cached. If local weights exist, they take priority over `--hf-model`. + For the full list of `vllm serve` arguments, you can find them [here](https://docs.vllm.ai/en/stable/serving/engine_args.html), make sure you select the correct vLLM version. #### Custom models From 03864a5639bb397f368b6579264d716eefcd3a1d Mon Sep 17 00:00:00 2001 From: XkunW Date: Mon, 30 Mar 2026 13:18:39 -0400 Subject: [PATCH 18/21] Fix typos --- vec_inf/client/_slurm_script_generator.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index d1a0c7e9..0c7aeaf7 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -36,10 +36,10 @@ def __init__(self, params: dict[str, Any]): self.is_multinode = int(self.params["num_nodes"]) > 1 self.use_container = self.params["venv"] == CONTAINER_MODULE_NAME self.additional_binds = ( - {self.params['bind']} if self.params.get("bind") else "" + self.params["bind"] if self.params.get("bind") else "" ) - self.model_weights_path = Path( - self.params["model_weights_parent_dir"], self.params["model_name"] + self.model_weights_path = str( + Path(self.params["model_weights_parent_dir"], self.params["model_name"]) ) self.env_str = self._generate_env_str() @@ -187,7 +187,7 @@ def _generate_launch_cmd(self) -> str: launch_cmd.append( "\n".join(SLURM_SCRIPT_TEMPLATE["launch_cmd"][self.engine]).format( # type: ignore[literal-required] - model_weights_path=self.model_weights_path if not self.params.get("hf_model") else self.params["hf_model"], + model_weights_path=self.params.get("hf_model") or self.model_weights_path, model_name=self.params["model_name"], ) ) @@ -217,7 +217,7 @@ def _generate_multinode_sglang_launch_cmd(self) -> str: SLURM_SCRIPT_TEMPLATE["launch_cmd"]["sglang_multinode"] ).format( num_nodes=self.params["num_nodes"], - model_weights_path=self.model_weights_path if not self.params.get("hf_model") else self.params["hf_model"], + model_weights_path=self.params.get("hf_model") or self.model_weights_path, model_name=self.params["model_name"], ) @@ -277,7 +277,7 @@ def __init__(self, params: dict[str, Any]): self.use_container = self.params["venv"] == CONTAINER_MODULE_NAME for model_name in self.params["models"]: self.params["models"][model_name]["additional_binds"] = ( - {self.params['models'][model_name]['bind']} + self.params["models"][model_name]["bind"] if self.params["models"][model_name].get("bind") else "" ) @@ -352,7 +352,7 @@ def _generate_model_launch_script(self, model_name: str) -> Path: "\n".join( BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["launch_cmd"][model_params["engine"]] ).format( - model_weights_path=model_params["model_weights_path"] if not model_params.get("hf_model") else model_params["hf_model"], + model_weights_path=model_params.get("hf_model") or model_params["model_weights_path"], model_name=model_name, ) ) From 292200f32ca2b068306e1d9b928dd926fe5324fe Mon Sep 17 00:00:00 2001 From: XkunW Date: Mon, 30 Mar 2026 13:18:49 -0400 Subject: [PATCH 19/21] Fix tests --- tests/vec_inf/client/test_engine_selection.py | 12 +++ tests/vec_inf/client/test_helper.py | 12 +++ .../client/test_slurm_script_generator.py | 76 +++++++------------ 3 files changed, 53 insertions(+), 47 deletions(-) diff --git a/tests/vec_inf/client/test_engine_selection.py b/tests/vec_inf/client/test_engine_selection.py index 8812e8ed..d4571086 100644 --- a/tests/vec_inf/client/test_engine_selection.py +++ b/tests/vec_inf/client/test_engine_selection.py @@ -24,6 +24,12 @@ def _set_required_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("VEC_INF_ACCOUNT", "test-account") monkeypatch.setenv("VEC_INF_WORK_DIR", "/tmp") + @pytest.fixture(autouse=True) + def _mock_validate_weights_path(self) -> None: + """Avoid disk checks for fake model paths in fixtures.""" + with patch("vec_inf.client._helper.utils.validate_weights_path"): + yield + @pytest.fixture def model_config_vllm(self) -> ModelConfig: """Fixture providing a vLLM model configuration.""" @@ -187,6 +193,12 @@ def _set_required_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("VEC_INF_ACCOUNT", "test-account") monkeypatch.setenv("VEC_INF_WORK_DIR", "/tmp") + @pytest.fixture(autouse=True) + def _mock_validate_weights_path(self) -> None: + """Avoid disk checks for fake model paths in fixtures.""" + with patch("vec_inf.client._helper.utils.validate_weights_path"): + yield + @pytest.fixture def model_config(self) -> ModelConfig: """Fixture providing a basic model configuration.""" diff --git a/tests/vec_inf/client/test_helper.py b/tests/vec_inf/client/test_helper.py index 7f910003..3a659856 100644 --- a/tests/vec_inf/client/test_helper.py +++ b/tests/vec_inf/client/test_helper.py @@ -32,6 +32,12 @@ class TestModelLauncher: """Tests for the ModelLauncher class.""" + @pytest.fixture(autouse=True) + def _mock_validate_weights_path(self) -> None: + """Avoid disk checks for fake model paths in fixtures.""" + with patch("vec_inf.client._helper.utils.validate_weights_path"): + yield + @pytest.fixture def model_config(self) -> ModelConfig: """Fixture providing a basic model configuration for tests.""" @@ -385,6 +391,12 @@ def test_launch_with_sglang_engine( class TestBatchModelLauncher: """Tests for the BatchModelLauncher class.""" + @pytest.fixture(autouse=True) + def _mock_validate_weights_path(self) -> None: + """Avoid disk checks for fake model paths in fixtures.""" + with patch("vec_inf.client._helper.utils.validate_weights_path"): + yield + @pytest.fixture def batch_model_configs(self) -> list[ModelConfig]: """Fixture providing batch model configurations for tests.""" diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py index 60f66a8c..cad88f20 100644 --- a/tests/vec_inf/client/test_slurm_script_generator.py +++ b/tests/vec_inf/client/test_slurm_script_generator.py @@ -12,14 +12,6 @@ ) -@pytest.fixture(autouse=True) -def patch_model_weights_exists(monkeypatch): - """Ensure model weights directory existence checks default to True.""" - monkeypatch.setattr( - "vec_inf.client._slurm_script_generator.Path.exists", lambda self: True - ) - - class TestSlurmScriptGenerator: """Tests for SlurmScriptGenerator class.""" @@ -123,7 +115,7 @@ def test_init_singularity(self, singularity_params): assert generator.params == singularity_params assert generator.use_container assert not generator.is_multinode - assert generator.additional_binds == ",/scratch:/scratch,/data:/data" + assert generator.additional_binds == "/scratch:/scratch,/data:/data" assert generator.model_weights_path == "/path/to/model_weights/test-model" assert ( generator.env_str @@ -195,18 +187,16 @@ def test_generate_server_setup_singularity(self, singularity_params): ) # Remove module name since it's inconsistent between clusters def test_generate_server_setup_singularity_no_weights( - self, singularity_params, monkeypatch + self, singularity_params ): - """Test server setup when model weights don't exist.""" - monkeypatch.setattr( - "vec_inf.client._slurm_script_generator.Path.exists", - lambda self: False, - ) + """Test server setup when using hf_model (no local weights in bind path).""" + params = singularity_params.copy() + params["hf_model"] = "test-org/test-model" - generator = SlurmScriptGenerator(singularity_params) + generator = SlurmScriptGenerator(params) setup = generator._generate_server_setup() - assert "ray stop" in setup + assert "module load" in setup or "apptainer" in setup.lower() assert "/path/to/model_weights/test-model" not in setup def test_generate_launch_cmd_venv(self, basic_params): @@ -337,13 +327,8 @@ def test_generate_script_content_sglang(self, basic_params): assert "sglang.launch_server" in content assert "find_available_port" in content - def test_generate_launch_cmd_with_hf_model_override( - self, basic_params, monkeypatch - ): - """Test launch command uses hf_model when local weights don't exist.""" - monkeypatch.setattr( - "vec_inf.client._slurm_script_generator.Path.exists", lambda self: False - ) + def test_generate_launch_cmd_with_hf_model_override(self, basic_params): + """Test launch command uses hf_model when specified.""" params = basic_params.copy() params["hf_model"] = "meta-llama/Meta-Llama-3.1-8B-Instruct" generator = SlurmScriptGenerator(params) @@ -361,20 +346,18 @@ def test_generate_launch_cmd_singularity(self, singularity_params): assert "source" not in launch_cmd def test_generate_launch_cmd_singularity_no_local_weights( - self, singularity_params, monkeypatch + self, singularity_params ): - """Test container launch when model weights directory is missing.""" - monkeypatch.setattr( - "vec_inf.client._slurm_script_generator.Path.exists", - lambda self: False, - ) + """Test container launch when using hf_model instead of local weights.""" + params = singularity_params.copy() + params["hf_model"] = "test-org/test-model" - generator = SlurmScriptGenerator(singularity_params) + generator = SlurmScriptGenerator(params) launch_cmd = generator._generate_launch_cmd() assert "exec --nv" in launch_cmd - assert "--bind /path/to/model_weights/test-model" not in launch_cmd - assert "vllm serve test-model" in launch_cmd + assert "vllm serve test-org/test-model" in launch_cmd + assert "vllm serve /path/to/model_weights/test-model" not in launch_cmd def test_generate_launch_cmd_boolean_args(self, basic_params): """Test launch command with boolean vLLM arguments.""" @@ -522,11 +505,11 @@ def test_init_singularity(self, batch_singularity_params): assert generator.use_container assert ( generator.params["models"]["model1"]["additional_binds"] - == ",/scratch:/scratch,/data:/data" + == "/scratch:/scratch,/data:/data" ) assert ( generator.params["models"]["model2"]["additional_binds"] - == ",/scratch:/scratch,/data:/data" + == "/scratch:/scratch,/data:/data" ) def test_init_singularity_no_bind(self, batch_params): @@ -571,12 +554,9 @@ def test_generate_model_launch_script_basic( @patch("pathlib.Path.touch") @patch("pathlib.Path.write_text") def test_generate_model_launch_script_with_hf_model_override( - self, mock_write_text, mock_touch, batch_params, monkeypatch + self, mock_write_text, mock_touch, batch_params ): - """Test batch launch script uses hf_model when local weights don't exist.""" - monkeypatch.setattr( - "vec_inf.client._slurm_script_generator.Path.exists", lambda self: False - ) + """Test batch launch script uses hf_model when specified.""" params = batch_params.copy() params["models"] = {k: v.copy() for k, v in batch_params["models"].items()} params["models"]["model1"]["hf_model"] = "meta-llama/Meta-Llama-3.1-8B-Instruct" @@ -604,20 +584,22 @@ def test_generate_model_launch_script_singularity( @patch("pathlib.Path.touch") @patch("pathlib.Path.write_text") def test_generate_model_launch_script_singularity_no_weights( - self, mock_write_text, mock_touch, batch_singularity_params, monkeypatch + self, mock_write_text, mock_touch, batch_singularity_params ): - """Test batch model launch script when model weights don't exist.""" - monkeypatch.setattr( - "vec_inf.client._slurm_script_generator.Path.exists", - lambda self: False, - ) + """Test batch model launch script when using hf_model (no local weights).""" + params = batch_singularity_params.copy() + params["models"] = { + k: v.copy() for k, v in batch_singularity_params["models"].items() + } + params["models"]["model1"]["hf_model"] = "test-org/model1" - generator = BatchSlurmScriptGenerator(batch_singularity_params) + generator = BatchSlurmScriptGenerator(params) script_path = generator._generate_model_launch_script("model1") assert script_path.name == "launch_model1.sh" call_args = mock_write_text.call_args[0][0] assert "/path/to/model_weights/model1" not in call_args + assert "vllm serve test-org/model1" in call_args @patch("vec_inf.client._slurm_script_generator.datetime") @patch("pathlib.Path.touch") From ba9030fc6953718a7aabc98b61d7be1bf987caed Mon Sep 17 00:00:00 2001 From: XkunW Date: Mon, 30 Mar 2026 13:24:21 -0400 Subject: [PATCH 20/21] ruff check & format --- vec_inf/client/_helper.py | 3 +-- vec_inf/client/_slurm_script_generator.py | 10 +++++----- vec_inf/client/_utils.py | 12 ++++++------ vec_inf/client/config.py | 2 +- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/vec_inf/client/_helper.py b/vec_inf/client/_helper.py index b1fca2da..ba1c8887 100644 --- a/vec_inf/client/_helper.py +++ b/vec_inf/client/_helper.py @@ -5,7 +5,6 @@ """ import json -import os import time import warnings from pathlib import Path @@ -356,7 +355,7 @@ def _get_launch_params(self) -> dict[str, Any]: # Override config defaults with CLI arguments self._apply_cli_overrides(params) - # Validate that weights path exists or HF model provided, and check HF cache configuration + # Validate weights path exists or HF model provided, and check HF cache config utils.validate_weights_path(params, self.model_name) # Check for required fields without default vals, will raise an error if missing diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py index 0c7aeaf7..dcca77d8 100644 --- a/vec_inf/client/_slurm_script_generator.py +++ b/vec_inf/client/_slurm_script_generator.py @@ -35,9 +35,7 @@ def __init__(self, params: dict[str, Any]): self.engine = params.get("engine", "vllm") self.is_multinode = int(self.params["num_nodes"]) > 1 self.use_container = self.params["venv"] == CONTAINER_MODULE_NAME - self.additional_binds = ( - self.params["bind"] if self.params.get("bind") else "" - ) + self.additional_binds = self.params["bind"] if self.params.get("bind") else "" self.model_weights_path = str( Path(self.params["model_weights_parent_dir"], self.params["model_name"]) ) @@ -187,7 +185,8 @@ def _generate_launch_cmd(self) -> str: launch_cmd.append( "\n".join(SLURM_SCRIPT_TEMPLATE["launch_cmd"][self.engine]).format( # type: ignore[literal-required] - model_weights_path=self.params.get("hf_model") or self.model_weights_path, + model_weights_path=self.params.get("hf_model") + or self.model_weights_path, model_name=self.params["model_name"], ) ) @@ -352,7 +351,8 @@ def _generate_model_launch_script(self, model_name: str) -> Path: "\n".join( BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["launch_cmd"][model_params["engine"]] ).format( - model_weights_path=model_params.get("hf_model") or model_params["model_weights_path"], + model_weights_path=model_params.get("hf_model") + or model_params["model_weights_path"], model_name=model_name, ) ) diff --git a/vec_inf/client/_utils.py b/vec_inf/client/_utils.py index 62b4c7a5..bfa48ee4 100644 --- a/vec_inf/client/_utils.py +++ b/vec_inf/client/_utils.py @@ -523,9 +523,7 @@ def check_hf_cache_and_bind(params: dict[str, Any], model_name: str) -> None: """ hf_cache_vars = ["HF_HOME", "HF_HUB_CACHE", "HUGGINGFACE_HUB_CACHE"] env_vars = params.get("env", {}) - set_cache_values = { - env_vars[var] for var in hf_cache_vars if var in env_vars - } + set_cache_values = {env_vars[var] for var in hf_cache_vars if var in env_vars} if not set_cache_values: warnings.warn( @@ -540,9 +538,11 @@ def check_hf_cache_and_bind(params: dict[str, Any], model_name: str) -> None: return bind_str = params.get("bind", "") - existing_hosts = { - b.split(":")[0] for b in bind_str.split(",") if b.strip() - } if bind_str else set() + existing_hosts = ( + {b.split(":")[0] for b in bind_str.split(",") if b.strip()} + if bind_str + else set() + ) new_paths = set_cache_values - existing_hosts if new_paths: diff --git a/vec_inf/client/config.py b/vec_inf/client/config.py index f0d821d4..6a7c40a4 100644 --- a/vec_inf/client/config.py +++ b/vec_inf/client/config.py @@ -162,7 +162,7 @@ class ModelConfig(BaseModel): description=( "Full HuggingFace model id/path to use for vLLM serve (e.g. " "'meta-llama/Meta-Llama-3.1-8B-Instruct')." - ) + ), ) engine: Optional[str] = Field( default="vllm", From 8e66e261f13d5b017d82c47e5670ab8cd2cad7e3 Mon Sep 17 00:00:00 2001 From: XkunW Date: Mon, 30 Mar 2026 13:26:46 -0400 Subject: [PATCH 21/21] ruff format --- tests/vec_inf/client/test_slurm_script_generator.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py index cad88f20..4e8a364f 100644 --- a/tests/vec_inf/client/test_slurm_script_generator.py +++ b/tests/vec_inf/client/test_slurm_script_generator.py @@ -186,9 +186,7 @@ def test_generate_server_setup_singularity(self, singularity_params): "module load " in setup or "apptainer" in setup.lower() ) # Remove module name since it's inconsistent between clusters - def test_generate_server_setup_singularity_no_weights( - self, singularity_params - ): + def test_generate_server_setup_singularity_no_weights(self, singularity_params): """Test server setup when using hf_model (no local weights in bind path).""" params = singularity_params.copy() params["hf_model"] = "test-org/test-model" @@ -345,9 +343,7 @@ def test_generate_launch_cmd_singularity(self, singularity_params): assert "apptainer exec --nv" in launch_cmd assert "source" not in launch_cmd - def test_generate_launch_cmd_singularity_no_local_weights( - self, singularity_params - ): + def test_generate_launch_cmd_singularity_no_local_weights(self, singularity_params): """Test container launch when using hf_model instead of local weights.""" params = singularity_params.copy() params["hf_model"] = "test-org/test-model"