From fc843ed14ff14b6d04f3c5f545636bfd2765d641 Mon Sep 17 00:00:00 2001
From: rohan-uiuc <rohan13@illinois.edu>
Date: Thu, 30 Oct 2025 17:33:49 -0500
Subject: [PATCH 01/21] Add support to download models automatically if --model
 specified in vllm args

---
 vec_inf/client/_slurm_script_generator.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py
index a5ede3c1..e1f32c50 100644
--- a/vec_inf/client/_slurm_script_generator.py
+++ b/vec_inf/client/_slurm_script_generator.py
@@ -137,6 +137,13 @@ def _generate_launch_cmd(self) -> str:
             Server launch command.
         """
         launcher_script = ["\n"]
+
+        # Check if --model is specified in vllm_args to use HuggingFace model name
+        model_path = self.model_weights_path
+        vllm_args_copy = self.params["vllm_args"].copy()
+        if "--model" in vllm_args_copy:
+            model_path = vllm_args_copy.pop("--model")
+
         if self.use_container:
             launcher_script.append(
                 SLURM_SCRIPT_TEMPLATE["container_command"].format(
@@ -151,12 +158,12 @@ def _generate_launch_cmd(self) -> str:
             )
         launcher_script.append(
             "\n".join(SLURM_SCRIPT_TEMPLATE["launch_cmd"]).format(
-                model_weights_path=self.model_weights_path,
+                model_weights_path=model_path,
                 model_name=self.params["model_name"],
             )
         )
 
-        for arg, value in self.params["vllm_args"].items():
+        for arg, value in vllm_args_copy.items():
             if isinstance(value, bool):
                 launcher_script.append(f"    {arg} \\")
             else:
@@ -256,6 +263,12 @@ def _generate_model_launch_script(self, model_name: str) -> Path:
                 model_name=model_name,
             )
         )
+        # Check if --model is specified in vllm_args to use HuggingFace model name
+        model_path = model_params["model_weights_path"]
+        vllm_args_copy = model_params["vllm_args"].copy()
+        if "--model" in vllm_args_copy:
+            model_path = vllm_args_copy.pop("--model")
+
         if self.use_container:
             script_content.append(
                 BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_command"].format(
@@ -265,11 +278,12 @@ def _generate_model_launch_script(self, model_name: str) -> Path:
             )
         script_content.append(
             "\n".join(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["launch_cmd"]).format(
-                model_weights_path=model_params["model_weights_path"],
+                model_weights_path=model_path,
                 model_name=model_name,
             )
         )
-        for arg, value in model_params["vllm_args"].items():
+        
+        for arg, value in vllm_args_copy.items():
             if isinstance(value, bool):
                 script_content.append(f"    {arg} \\")
             else:

From 5f790ffcce8dadc3c9edf79724eb378e6ee77d97 Mon Sep 17 00:00:00 2001
From: rohan-uiuc <rohan13@illinois.edu>
Date: Thu, 30 Oct 2025 18:21:34 -0500
Subject: [PATCH 02/21] create model dir if it doesn't exist

---
 vec_inf/client/_slurm_script_generator.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py
index e1f32c50..e52c94ba 100644
--- a/vec_inf/client/_slurm_script_generator.py
+++ b/vec_inf/client/_slurm_script_generator.py
@@ -38,9 +38,11 @@ def __init__(self, params: dict[str, Any]):
         self.additional_binds = self.params.get("bind", "")
         if self.additional_binds:
             self.additional_binds = f" --bind {self.additional_binds}"
-        self.model_weights_path = str(
-            Path(self.params["model_weights_parent_dir"], self.params["model_name"])
+        model_weights_path = Path(
+            self.params["model_weights_parent_dir"], self.params["model_name"]
         )
+        model_weights_path.mkdir(parents=True, exist_ok=True)
+        self.model_weights_path = str(model_weights_path)
         env_dict: dict[str, str] = self.params.get("env", {})
         # Create string of environment variables
         self.env_str = ""
@@ -210,11 +212,13 @@ def __init__(self, params: dict[str, Any]):
                 self.params["models"][model_name]["additional_binds"] = (
                     f" --bind {self.params['models'][model_name]['bind']}"
                 )
+            model_weights_path = Path(
+                self.params["models"][model_name]["model_weights_parent_dir"],
+                model_name,
+            )
+            model_weights_path.mkdir(parents=True, exist_ok=True)
             self.params["models"][model_name]["model_weights_path"] = str(
-                Path(
-                    self.params["models"][model_name]["model_weights_parent_dir"],
-                    model_name,
-                )
+                model_weights_path
             )
 
     def _write_to_log_dir(self, script_content: list[str], script_name: str) -> Path:

From 0f22bec693f56dba6b15c2887934d5be0fb320d9 Mon Sep 17 00:00:00 2001
From: rohan-uiuc <rohan13@illinois.edu>
Date: Wed, 12 Nov 2025 16:40:16 -0600
Subject: [PATCH 03/21] Check model weights existence before binding; use HF
 model name if missing

---
 .../client/test_slurm_script_generator.py     | 31 +++++++++--
 vec_inf/client/_slurm_script_generator.py     | 52 +++++++++++++------
 vec_inf/client/_slurm_templates.py            |  8 +--
 3 files changed, 69 insertions(+), 22 deletions(-)

diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py
index 0b55f59b..bad302e2 100644
--- a/tests/vec_inf/client/test_slurm_script_generator.py
+++ b/tests/vec_inf/client/test_slurm_script_generator.py
@@ -12,6 +12,15 @@
 )
 
 
+@pytest.fixture(autouse=True)
+def patch_model_weights_exists(monkeypatch):
+    """Ensure model weights directory existence checks default to True."""
+
+    monkeypatch.setattr(
+        "vec_inf.client._slurm_script_generator.Path.exists", lambda self: True
+    )
+
+
 class TestSlurmScriptGenerator:
     """Tests for SlurmScriptGenerator class."""
 
@@ -164,9 +173,8 @@ def test_generate_server_setup_singularity(self, singularity_params):
         setup = generator._generate_server_setup()
 
         assert "ray stop" in setup
-        assert (
-            "module load " in setup
-        )  # Remove module name since it's inconsistent between clusters
+        # Note: module_load_cmd may be empty in some configs, so we don't assert it
+        # The container setup should still work without it
 
     def test_generate_launch_cmd_venv(self, basic_params):
         """Test launch command generation with virtual environment."""
@@ -190,6 +198,23 @@ def test_generate_launch_cmd_singularity(self, singularity_params):
         assert "--bind /scratch:/scratch,/data:/data" in launch_cmd
         assert "source" not in launch_cmd
 
+    def test_generate_launch_cmd_singularity_no_local_weights(
+        self, singularity_params, monkeypatch
+    ):
+        """Test container launch when model weights directory is missing."""
+
+        monkeypatch.setattr(
+            "vec_inf.client._slurm_script_generator.Path.exists",
+            lambda self: False,
+        )
+
+        generator = SlurmScriptGenerator(singularity_params)
+        launch_cmd = generator._generate_launch_cmd()
+
+        assert "exec --nv" in launch_cmd
+        assert "--bind /path/to/model_weights/test-model" not in launch_cmd
+        assert "vllm serve test-model" in launch_cmd
+
     def test_generate_launch_cmd_boolean_args(self, basic_params):
         """Test launch command with boolean vLLM arguments."""
         params = basic_params.copy()
diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py
index e52c94ba..215526eb 100644
--- a/vec_inf/client/_slurm_script_generator.py
+++ b/vec_inf/client/_slurm_script_generator.py
@@ -41,8 +41,14 @@ def __init__(self, params: dict[str, Any]):
         model_weights_path = Path(
             self.params["model_weights_parent_dir"], self.params["model_name"]
         )
-        model_weights_path.mkdir(parents=True, exist_ok=True)
+        self.model_weights_exists = model_weights_path.exists()
         self.model_weights_path = str(model_weights_path)
+        self.model_source = (
+            self.model_weights_path if self.model_weights_exists else self.params["model_name"]
+        )
+        self.model_bind_option = (
+            f" --bind {self.model_weights_path}" if self.model_weights_exists else ""
+        )
         env_dict: dict[str, str] = self.params.get("env", {})
         # Create string of environment variables
         self.env_str = ""
@@ -53,6 +59,14 @@ def __init__(self, params: dict[str, Any]):
                 self.env_str += ","
             self.env_str += key + "=" + val
 
+        # # Ensure CUDA_VISIBLE_DEVICES is passed through to the container
+        # if self.use_container and "CUDA_VISIBLE_DEVICES" not in env_dict:
+        #     if len(self.env_str) == 0:
+        #         self.env_str = "--env "
+        #     else:
+        #         self.env_str += ","
+        #     self.env_str += "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
+
     def _generate_script_content(self) -> str:
         """Generate the complete Slurm script content.
 
@@ -109,7 +123,7 @@ def _generate_server_setup(self) -> str:
                 server_setup_str = server_setup_str.replace(
                     "CONTAINER_PLACEHOLDER",
                     SLURM_SCRIPT_TEMPLATE["container_command"].format(
-                        model_weights_path=self.model_weights_path,
+                        model_bind_option=self.model_bind_option,
                         additional_binds=self.additional_binds,
                         env_str=self.env_str,
                     ),
@@ -140,16 +154,15 @@ def _generate_launch_cmd(self) -> str:
         """
         launcher_script = ["\n"]
 
-        # Check if --model is specified in vllm_args to use HuggingFace model name
-        model_path = self.model_weights_path
         vllm_args_copy = self.params["vllm_args"].copy()
+        model_source = self.model_source
         if "--model" in vllm_args_copy:
-            model_path = vllm_args_copy.pop("--model")
+            model_source = vllm_args_copy.pop("--model")
 
         if self.use_container:
             launcher_script.append(
                 SLURM_SCRIPT_TEMPLATE["container_command"].format(
-                    model_weights_path=self.model_weights_path,
+                    model_bind_option=self.model_bind_option,
                     additional_binds=self.additional_binds,
                     env_str=self.env_str,
                 )
@@ -160,7 +173,7 @@ def _generate_launch_cmd(self) -> str:
             )
         launcher_script.append(
             "\n".join(SLURM_SCRIPT_TEMPLATE["launch_cmd"]).format(
-                model_weights_path=model_path,
+                model_source=model_source,
                 model_name=self.params["model_name"],
             )
         )
@@ -216,9 +229,19 @@ def __init__(self, params: dict[str, Any]):
                 self.params["models"][model_name]["model_weights_parent_dir"],
                 model_name,
             )
-            model_weights_path.mkdir(parents=True, exist_ok=True)
-            self.params["models"][model_name]["model_weights_path"] = str(
-                model_weights_path
+            model_weights_exists = model_weights_path.exists()
+            model_weights_path_str = str(model_weights_path)
+            self.params["models"][model_name]["model_weights_path"] = (
+                model_weights_path_str
+            )
+            self.params["models"][model_name]["model_weights_exists"] = (
+                model_weights_exists
+            )
+            self.params["models"][model_name]["model_bind_option"] = (
+                f" --bind {model_weights_path_str}" if model_weights_exists else ""
+            )
+            self.params["models"][model_name]["model_source"] = (
+                model_weights_path_str if model_weights_exists else model_name
             )
 
     def _write_to_log_dir(self, script_content: list[str], script_name: str) -> Path:
@@ -267,22 +290,21 @@ def _generate_model_launch_script(self, model_name: str) -> Path:
                 model_name=model_name,
             )
         )
-        # Check if --model is specified in vllm_args to use HuggingFace model name
-        model_path = model_params["model_weights_path"]
         vllm_args_copy = model_params["vllm_args"].copy()
+        model_source = model_params.get("model_source", model_params["model_weights_path"])
         if "--model" in vllm_args_copy:
-            model_path = vllm_args_copy.pop("--model")
+            model_source = vllm_args_copy.pop("--model")
 
         if self.use_container:
             script_content.append(
                 BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_command"].format(
-                    model_weights_path=model_params["model_weights_path"],
+                    model_bind_option=model_params.get("model_bind_option", ""),
                     additional_binds=model_params["additional_binds"],
                 )
             )
         script_content.append(
             "\n".join(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["launch_cmd"]).format(
-                model_weights_path=model_path,
+                model_source=model_source,
                 model_name=model_name,
             )
         )
diff --git a/vec_inf/client/_slurm_templates.py b/vec_inf/client/_slurm_templates.py
index 209534d8..47dc6527 100644
--- a/vec_inf/client/_slurm_templates.py
+++ b/vec_inf/client/_slurm_templates.py
@@ -99,7 +99,7 @@ class SlurmScriptTemplate(TypedDict):
     "env_vars": [
         f"export {CONTAINER_MODULE_NAME}_BINDPATH=${CONTAINER_MODULE_NAME}_BINDPATH,$(echo /dev/infiniband* | sed -e 's/ /,/g')"
     ],
-    "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\",
+    "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}}{{model_bind_option}}{{additional_binds}} --containall {IMAGE_PATH} \\",
     "activate_venv": "source {venv}/bin/activate",
     "server_setup": {
         "single_node": [
@@ -147,7 +147,7 @@ class SlurmScriptTemplate(TypedDict):
         '    && mv temp.json "$json_path"',
     ],
     "launch_cmd": [
-        "vllm serve {model_weights_path} \\",
+        "vllm serve {model_source} \\",
         "    --served-model-name {model_name} \\",
         '    --host "0.0.0.0" \\',
         "    --port $vllm_port_number \\",
@@ -238,9 +238,9 @@ class BatchModelLaunchScriptTemplate(TypedDict):
         '    "$json_path" > temp_{model_name}.json \\',
         '    && mv temp_{model_name}.json "$json_path"\n',
     ],
-    "container_command": f"{CONTAINER_MODULE_NAME} exec --nv --bind {{model_weights_path}}{{additional_binds}} --containall {IMAGE_PATH} \\",
+    "container_command": f"{CONTAINER_MODULE_NAME} exec --nv{{model_bind_option}}{{additional_binds}} --containall {IMAGE_PATH} \\",
     "launch_cmd": [
-        "vllm serve {model_weights_path} \\",
+        "vllm serve {model_source} \\",
         "    --served-model-name {model_name} \\",
         '    --host "0.0.0.0" \\',
         "    --port $vllm_port_number \\",

From 9f2fdd21b7e22b1e557e6f1ee63cd7581a45dc9a Mon Sep 17 00:00:00 2001
From: rohan-uiuc <rohan13@illinois.edu>
Date: Wed, 12 Nov 2025 16:43:37 -0600
Subject: [PATCH 04/21] Remove commented code

---
 vec_inf/client/_slurm_script_generator.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py
index 215526eb..4c1b0e5d 100644
--- a/vec_inf/client/_slurm_script_generator.py
+++ b/vec_inf/client/_slurm_script_generator.py
@@ -59,14 +59,6 @@ def __init__(self, params: dict[str, Any]):
                 self.env_str += ","
             self.env_str += key + "=" + val
 
-        # # Ensure CUDA_VISIBLE_DEVICES is passed through to the container
-        # if self.use_container and "CUDA_VISIBLE_DEVICES" not in env_dict:
-        #     if len(self.env_str) == 0:
-        #         self.env_str = "--env "
-        #     else:
-        #         self.env_str += ","
-        #     self.env_str += "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
-
     def _generate_script_content(self) -> str:
         """Generate the complete Slurm script content.
 

From 38011beecad2e5eb5e458a660373c960d3802704 Mon Sep 17 00:00:00 2001
From: rohan-uiuc <rohan13@illinois.edu>
Date: Wed, 12 Nov 2025 16:47:57 -0600
Subject: [PATCH 05/21] Apply code formatting fixes from pre-commit

---
 tests/vec_inf/client/test_slurm_script_generator.py |  2 --
 vec_inf/client/_slurm_script_generator.py           | 10 +++++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py
index bad302e2..0abd80bb 100644
--- a/tests/vec_inf/client/test_slurm_script_generator.py
+++ b/tests/vec_inf/client/test_slurm_script_generator.py
@@ -15,7 +15,6 @@
 @pytest.fixture(autouse=True)
 def patch_model_weights_exists(monkeypatch):
     """Ensure model weights directory existence checks default to True."""
-
     monkeypatch.setattr(
         "vec_inf.client._slurm_script_generator.Path.exists", lambda self: True
     )
@@ -202,7 +201,6 @@ def test_generate_launch_cmd_singularity_no_local_weights(
         self, singularity_params, monkeypatch
     ):
         """Test container launch when model weights directory is missing."""
-
         monkeypatch.setattr(
             "vec_inf.client._slurm_script_generator.Path.exists",
             lambda self: False,
diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py
index 4c1b0e5d..364dd6b8 100644
--- a/vec_inf/client/_slurm_script_generator.py
+++ b/vec_inf/client/_slurm_script_generator.py
@@ -44,7 +44,9 @@ def __init__(self, params: dict[str, Any]):
         self.model_weights_exists = model_weights_path.exists()
         self.model_weights_path = str(model_weights_path)
         self.model_source = (
-            self.model_weights_path if self.model_weights_exists else self.params["model_name"]
+            self.model_weights_path
+            if self.model_weights_exists
+            else self.params["model_name"]
         )
         self.model_bind_option = (
             f" --bind {self.model_weights_path}" if self.model_weights_exists else ""
@@ -283,7 +285,9 @@ def _generate_model_launch_script(self, model_name: str) -> Path:
             )
         )
         vllm_args_copy = model_params["vllm_args"].copy()
-        model_source = model_params.get("model_source", model_params["model_weights_path"])
+        model_source = model_params.get(
+            "model_source", model_params["model_weights_path"]
+        )
         if "--model" in vllm_args_copy:
             model_source = vllm_args_copy.pop("--model")
 
@@ -300,7 +304,7 @@ def _generate_model_launch_script(self, model_name: str) -> Path:
                 model_name=model_name,
             )
         )
-        
+
         for arg, value in vllm_args_copy.items():
             if isinstance(value, bool):
                 script_content.append(f"    {arg} \\")

From 4de35635e7d8f9c2343372dc29ad59da983de832 Mon Sep 17 00:00:00 2001
From: rohan-uiuc <rohan13@illinois.edu>
Date: Wed, 12 Nov 2025 16:57:21 -0600
Subject: [PATCH 06/21] revert unnecessary test change

---
 tests/vec_inf/client/test_slurm_script_generator.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py
index 0abd80bb..62b36140 100644
--- a/tests/vec_inf/client/test_slurm_script_generator.py
+++ b/tests/vec_inf/client/test_slurm_script_generator.py
@@ -172,8 +172,9 @@ def test_generate_server_setup_singularity(self, singularity_params):
         setup = generator._generate_server_setup()
 
         assert "ray stop" in setup
-        # Note: module_load_cmd may be empty in some configs, so we don't assert it
-        # The container setup should still work without it
+        assert (
+            "module load " in setup
+        )  # Remove module name since it's inconsistent between clusters
 
     def test_generate_launch_cmd_venv(self, basic_params):
         """Test launch command generation with virtual environment."""

From 8b6a2119da676c0d077af288f72f6dc199e03af1 Mon Sep 17 00:00:00 2001
From: rohan-uiuc <rohan13@illinois.edu>
Date: Wed, 12 Nov 2025 17:05:28 -0600
Subject: [PATCH 07/21] Apply formatting fixes from pre-commit

---
 vec_inf/client/_slurm_script_generator.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py
index 0f78fb18..01b786ea 100644
--- a/vec_inf/client/_slurm_script_generator.py
+++ b/vec_inf/client/_slurm_script_generator.py
@@ -121,7 +121,9 @@ def _generate_server_setup(self) -> str:
             server_script.append("\n".join(SLURM_SCRIPT_TEMPLATE["container_setup"]))
             server_script.append(
                 SLURM_SCRIPT_TEMPLATE["bind_path"].format(
-                    model_weights_path=self.model_weights_path if self.model_weights_exists else "",
+                    model_weights_path=self.model_weights_path
+                    if self.model_weights_exists
+                    else "",
                     additional_binds=self.additional_binds,
                 )
             )
@@ -289,7 +291,9 @@ def _generate_model_launch_script(self, model_name: str) -> Path:
             script_content.append(BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["container_setup"])
         script_content.append(
             BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["bind_path"].format(
-                model_weights_path=model_params["model_weights_path"] if model_params.get("model_weights_exists", True) else "",
+                model_weights_path=model_params["model_weights_path"]
+                if model_params.get("model_weights_exists", True)
+                else "",
                 additional_binds=model_params["additional_binds"],
             )
         )

From c68cb3553a4c7abdff595e8fef08395f66076256 Mon Sep 17 00:00:00 2001
From: rohan-uiuc <rohan13@illinois.edu>
Date: Wed, 12 Nov 2025 17:21:10 -0600
Subject: [PATCH 08/21] Add tests for model weights existence coverage

---
 .../client/test_slurm_script_generator.py     | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py
index b962f0f9..a81a962d 100644
--- a/tests/vec_inf/client/test_slurm_script_generator.py
+++ b/tests/vec_inf/client/test_slurm_script_generator.py
@@ -176,6 +176,21 @@ def test_generate_server_setup_singularity(self, singularity_params):
             "module load " in setup
         )  # Remove module name since it's inconsistent between clusters
 
+    def test_generate_server_setup_singularity_no_weights(
+        self, singularity_params, monkeypatch
+    ):
+        """Test server setup when model weights don't exist."""
+        monkeypatch.setattr(
+            "vec_inf.client._slurm_script_generator.Path.exists",
+            lambda self: False,
+        )
+
+        generator = SlurmScriptGenerator(singularity_params)
+        setup = generator._generate_server_setup()
+
+        assert "ray stop" in setup
+        assert "/path/to/model_weights/test-model" not in setup
+
     def test_generate_launch_cmd_venv(self, basic_params):
         """Test launch command generation with virtual environment."""
         generator = SlurmScriptGenerator(basic_params)
@@ -415,6 +430,24 @@ def test_generate_model_launch_script_singularity(
         mock_touch.assert_called_once()
         mock_write_text.assert_called_once()
 
+    @patch("pathlib.Path.touch")
+    @patch("pathlib.Path.write_text")
+    def test_generate_model_launch_script_singularity_no_weights(
+        self, mock_write_text, mock_touch, batch_singularity_params, monkeypatch
+    ):
+        """Test batch model launch script when model weights don't exist."""
+        monkeypatch.setattr(
+            "vec_inf.client._slurm_script_generator.Path.exists",
+            lambda self: False,
+        )
+
+        generator = BatchSlurmScriptGenerator(batch_singularity_params)
+        script_path = generator._generate_model_launch_script("model1")
+
+        assert script_path.name == "launch_model1.sh"
+        call_args = mock_write_text.call_args[0][0]
+        assert "/path/to/model_weights/model1" not in call_args
+
     @patch("vec_inf.client._slurm_script_generator.datetime")
     @patch("pathlib.Path.touch")
     @patch("pathlib.Path.write_text")

From b610891a831aa6bd6511331fc68237927c6e0b70 Mon Sep 17 00:00:00 2001
From: rohan-uiuc <rohan13@illinois.edu>
Date: Mon, 5 Jan 2026 17:32:03 -0600
Subject: [PATCH 09/21] Remove redundant /dev/infiniband

---
 vec_inf/client/_slurm_templates.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vec_inf/client/_slurm_templates.py b/vec_inf/client/_slurm_templates.py
index ab607fa1..62697006 100644
--- a/vec_inf/client/_slurm_templates.py
+++ b/vec_inf/client/_slurm_templates.py
@@ -98,7 +98,7 @@ class SlurmScriptTemplate(TypedDict):
         f"{CONTAINER_MODULE_NAME} exec {IMAGE_PATH} ray stop",
     ],
     "imports": "source {src_dir}/find_port.sh",
-    "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,$(echo /dev/infiniband* | sed -e 's/ /,/g'),/dev,/tmp{{model_weights_path}}{{additional_binds}}",
+    "bind_path": f"export {CONTAINER_MODULE_NAME.upper()}_BINDPATH=${CONTAINER_MODULE_NAME.upper()}_BINDPATH,/dev,/tmp{{model_weights_path}}{{additional_binds}}",
     "container_command": f"{CONTAINER_MODULE_NAME} exec --nv {{env_str}} --containall {IMAGE_PATH} \\",
     "activate_venv": "source {venv}/bin/activate",
     "server_setup": {

From a7a5debbed1d2c6d868a13c38e480efe64ebea4d Mon Sep 17 00:00:00 2001
From: rohan-uiuc <rohan13@illinois.edu>
Date: Mon, 5 Jan 2026 17:35:28 -0600
Subject: [PATCH 10/21] Remove unused variable

---
 vec_inf/client/_slurm_script_generator.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py
index 01b786ea..89bac418 100644
--- a/vec_inf/client/_slurm_script_generator.py
+++ b/vec_inf/client/_slurm_script_generator.py
@@ -47,9 +47,6 @@ def __init__(self, params: dict[str, Any]):
             if self.model_weights_exists
             else self.params["model_name"]
         )
-        self.model_bind_option = (
-            f",{self.model_weights_path}" if self.model_weights_exists else ""
-        )
         self.env_str = self._generate_env_str()
 
     def _generate_env_str(self) -> str:

From bb3142bb674b19658d5355ddb9a389c807208172 Mon Sep 17 00:00:00 2001
From: rohan-uiuc <rohan13@illinois.edu>
Date: Mon, 5 Jan 2026 17:45:44 -0600
Subject: [PATCH 11/21] Add warning if downloading weights and HF cache not set

---
 vec_inf/client/_slurm_script_generator.py | 13 ++++++++
 vec_inf/client/_utils.py                  | 40 +++++++++++++++++++++++
 2 files changed, 53 insertions(+)

diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py
index 89bac418..0c86d688 100644
--- a/vec_inf/client/_slurm_script_generator.py
+++ b/vec_inf/client/_slurm_script_generator.py
@@ -15,6 +15,7 @@
     SLURM_SCRIPT_TEMPLATE,
 )
 from vec_inf.client._slurm_vars import CONTAINER_MODULE_NAME
+from vec_inf.client._utils import check_and_warn_hf_cache
 
 
 class SlurmScriptGenerator:
@@ -47,6 +48,11 @@ def __init__(self, params: dict[str, Any]):
             if self.model_weights_exists
             else self.params["model_name"]
         )
+        check_and_warn_hf_cache(
+            self.model_weights_exists,
+            self.model_weights_path,
+            self.params.get("env", {}),
+        )
         self.env_str = self._generate_env_str()
 
     def _generate_env_str(self) -> str:
@@ -253,6 +259,13 @@ def __init__(self, params: dict[str, Any]):
             self.params["models"][model_name]["model_source"] = (
                 model_weights_path_str if model_weights_exists else model_name
             )
+            check_and_warn_hf_cache(
+                model_weights_exists,
+                model_weights_path_str,
+                self.params["models"][model_name].get("env", {}),
+                model_name,
+            )
+
 
     def _write_to_log_dir(self, script_content: list[str], script_name: str) -> Path:
         """Write the generated Slurm script to the log directory.
diff --git a/vec_inf/client/_utils.py b/vec_inf/client/_utils.py
index 67d907ec..e39b28d9 100644
--- a/vec_inf/client/_utils.py
+++ b/vec_inf/client/_utils.py
@@ -456,3 +456,43 @@ def check_required_fields(params: dict[str, Any]) -> dict[str, Any]:
                     f"{arg} is required, please set it in the command arguments or environment variables"
                 )
     return env_overrides
+
+
+def check_and_warn_hf_cache(
+    model_weights_exists: bool,
+    model_weights_path: str,
+    env_dict: dict[str, str],
+    model_name: str | None = None,
+) -> None:
+    """Warn if model weights don't exist and HuggingFace cache directory is not set.
+
+    Parameters
+    ----------
+    model_weights_exists : bool
+        Whether the model weights exist at the expected path.
+    model_weights_path : str
+        The expected path to the model weights.
+    env_dict : dict[str, str]
+        Dictionary of environment variables to check (from --env parameter).
+    model_name : str | None, optional
+        Optional model name to include in the warning message (for batch mode).
+    """
+    if model_weights_exists:
+        return
+
+    hf_cache_vars = ["HF_HOME", "HF_HUB_CACHE", "HUGGINGFACE_HUB_CACHE"]
+    hf_cache_set = any(
+        os.environ.get(var) or env_dict.get(var) for var in hf_cache_vars
+    )
+
+    if not hf_cache_set:
+        model_prefix = f"Model weights for '{model_name}' " if model_name else "Model weights "
+        warnings.warn(
+            f"{model_prefix}not found at '{model_weights_path}' and no "
+            f"HuggingFace cache directory is set (HF_HOME, HF_HUB_CACHE, or "
+            f"HUGGINGFACE_HUB_CACHE). The model may be downloaded to your home "
+            f"directory, which could consume your storage quota. Consider setting "
+            f"one of these environment variables to a shared cache location.",
+            UserWarning,
+            stacklevel=4,
+        )

From 2db9c6c860fc2857ca5e9d450587266ac10d9ff5 Mon Sep 17 00:00:00 2001
From: rohan-uiuc <rohan13@illinois.edu>
Date: Mon, 5 Jan 2026 17:49:34 -0600
Subject: [PATCH 12/21] format ONLY

---
 vec_inf/client/_slurm_script_generator.py | 1 -
 vec_inf/client/_utils.py                  | 4 +++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py
index 0c86d688..433a8f27 100644
--- a/vec_inf/client/_slurm_script_generator.py
+++ b/vec_inf/client/_slurm_script_generator.py
@@ -266,7 +266,6 @@ def __init__(self, params: dict[str, Any]):
                 model_name,
             )
 
-
     def _write_to_log_dir(self, script_content: list[str], script_name: str) -> Path:
         """Write the generated Slurm script to the log directory.
 
diff --git a/vec_inf/client/_utils.py b/vec_inf/client/_utils.py
index e39b28d9..2f3b01d6 100644
--- a/vec_inf/client/_utils.py
+++ b/vec_inf/client/_utils.py
@@ -486,7 +486,9 @@ def check_and_warn_hf_cache(
     )
 
     if not hf_cache_set:
-        model_prefix = f"Model weights for '{model_name}' " if model_name else "Model weights "
+        model_prefix = (
+            f"Model weights for '{model_name}' " if model_name else "Model weights "
+        )
         warnings.warn(
             f"{model_prefix}not found at '{model_weights_path}' and no "
             f"HuggingFace cache directory is set (HF_HOME, HF_HUB_CACHE, or "

From 079c86a58726730b706db8d394db24afc5d308d6 Mon Sep 17 00:00:00 2001
From: rohan-uiuc <rohan13@illinois.edu>
Date: Thu, 29 Jan 2026 17:37:24 -0600
Subject: [PATCH 13/21] Add --hf-model CLI option and config field

Adds hf_model field to ModelConfig and LaunchOptions to specify
a HuggingFace model id for vLLM to download at runtime.
---
 vec_inf/cli/_cli.py      | 11 +++++++++++
 vec_inf/client/config.py | 10 ++++++++++
 vec_inf/client/models.py |  4 ++++
 3 files changed, 25 insertions(+)

diff --git a/vec_inf/cli/_cli.py b/vec_inf/cli/_cli.py
index b3fe6248..076f2aec 100644
--- a/vec_inf/cli/_cli.py
+++ b/vec_inf/cli/_cli.py
@@ -132,6 +132,15 @@ def cli() -> None:
     type=str,
     help="Path to parent directory containing model weights",
 )
+@click.option(
+    "--hf-model",
+    type=str,
+    help=(
+        "Full HuggingFace model id/path to use for vLLM serve (e.g. "
+        "'meta-llama/Meta-Llama-3.1-8B-Instruct'). "
+        "Keeps model-name as the short identifier for config/logs/job naming."
+    ),
+)
 @click.option(
     "--vllm-args",
     type=str,
@@ -200,6 +209,8 @@ def launch(
             Path to SLURM log directory
         - model_weights_parent_dir : str, optional
             Path to model weights directory
+        - hf_model : str, optional
+            Full HuggingFace model id/path to use for vLLM serve
         - vllm_args : str, optional
             vLLM engine arguments
         - env : str, optional
diff --git a/vec_inf/client/config.py b/vec_inf/client/config.py
index 4bcbe508..edbdf4cb 100644
--- a/vec_inf/client/config.py
+++ b/vec_inf/client/config.py
@@ -66,6 +66,9 @@ class ModelConfig(BaseModel):
         Directory path for storing logs
     model_weights_parent_dir : Path, optional
         Base directory containing model weights
+    hf_model : str, optional
+        HuggingFace model id for vLLM to download (e.g. "meta-llama/Llama-3.1-8B").
+        Used as model source when local weights don't exist.
     vllm_args : dict[str, Any], optional
         Additional arguments for vLLM engine configuration
 
@@ -148,6 +151,13 @@ class ModelConfig(BaseModel):
         default=Path(DEFAULT_ARGS["model_weights_parent_dir"]),
         description="Base directory for model weights",
     )
+    hf_model: Optional[str] = Field(
+        default=None,
+        description=(
+            "Full HuggingFace model id/path to use for vLLM serve (e.g. "
+            "'meta-llama/Meta-Llama-3.1-8B-Instruct')."
+        ),
+    )
     vllm_args: Optional[dict[str, Any]] = Field(
         default={}, description="vLLM engine arguments"
     )
diff --git a/vec_inf/client/models.py b/vec_inf/client/models.py
index 29ace42d..c53fae7f 100644
--- a/vec_inf/client/models.py
+++ b/vec_inf/client/models.py
@@ -222,6 +222,9 @@ class LaunchOptions:
         Directory for logs
     model_weights_parent_dir : str, optional
         Parent directory containing model weights
+    hf_model : str, optional
+        HuggingFace model id for vLLM to download (e.g. "meta-llama/Llama-3.1-8B").
+        Used as model source when local weights don't exist.
     vllm_args : str, optional
         Additional arguments for vLLM
     env : str, optional
@@ -250,6 +253,7 @@ class LaunchOptions:
     venv: Optional[str] = None
     log_dir: Optional[str] = None
     model_weights_parent_dir: Optional[str] = None
+    hf_model: Optional[str] = None
     vllm_args: Optional[str] = None
     env: Optional[str] = None
     config: Optional[str] = None

From 20163da637308c45a11d55529e4f74b5aac9b7d1 Mon Sep 17 00:00:00 2001
From: rohan-uiuc <rohan13@illinois.edu>
Date: Thu, 29 Jan 2026 17:37:24 -0600
Subject: [PATCH 14/21] Use hf_model as model source when local weights missing

Updates SlurmScriptGenerator and BatchSlurmScriptGenerator to use
hf_model for vllm serve when local weights don't exist.
Priority: local weights > hf_model > model name.
---
 vec_inf/client/_slurm_script_generator.py | 26 ++++++++++++++++-------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py
index 433a8f27..badf6504 100644
--- a/vec_inf/client/_slurm_script_generator.py
+++ b/vec_inf/client/_slurm_script_generator.py
@@ -43,11 +43,13 @@ def __init__(self, params: dict[str, Any]):
         )
         self.model_weights_exists = model_weights_path.exists()
         self.model_weights_path = str(model_weights_path)
-        self.model_source = (
-            self.model_weights_path
-            if self.model_weights_exists
-            else self.params["model_name"]
-        )
+        # Determine model source: local weights > hf_model > model name
+        if self.model_weights_exists:
+            self.model_source = self.model_weights_path
+        elif self.params.get("hf_model"):
+            self.model_source = self.params["hf_model"]
+        else:
+            self.model_source = self.params["model_name"]
         check_and_warn_hf_cache(
             self.model_weights_exists,
             self.model_weights_path,
@@ -256,9 +258,17 @@ def __init__(self, params: dict[str, Any]):
             self.params["models"][model_name]["model_weights_exists"] = (
                 model_weights_exists
             )
-            self.params["models"][model_name]["model_source"] = (
-                model_weights_path_str if model_weights_exists else model_name
-            )
+            # Determine model source: local weights > hf_model > model name
+            if model_weights_exists:
+                self.params["models"][model_name]["model_source"] = (
+                    model_weights_path_str
+                )
+            elif self.params["models"][model_name].get("hf_model"):
+                self.params["models"][model_name]["model_source"] = self.params[
+                    "models"
+                ][model_name]["hf_model"]
+            else:
+                self.params["models"][model_name]["model_source"] = model_name
             check_and_warn_hf_cache(
                 model_weights_exists,
                 model_weights_path_str,

From ed82b778d7b14b39c59328f57fdf76d6191f150b Mon Sep 17 00:00:00 2001
From: rohan-uiuc <rohan13@illinois.edu>
Date: Thu, 29 Jan 2026 17:37:24 -0600
Subject: [PATCH 15/21] Pass hf_model from CLI to launch params

---
 vec_inf/client/_helper.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/vec_inf/client/_helper.py b/vec_inf/client/_helper.py
index ce6e156e..d2b82f4e 100644
--- a/vec_inf/client/_helper.py
+++ b/vec_inf/client/_helper.py
@@ -204,6 +204,10 @@ def _apply_cli_overrides(self, params: dict[str, Any]) -> None:
         params : dict[str, Any]
             Dictionary of launch parameters to override
         """
+        if self.kwargs.get("hf_model"):
+            params["hf_model"] = self.kwargs["hf_model"]
+            del self.kwargs["hf_model"]
+
         if self.kwargs.get("vllm_args"):
             vllm_args = self._process_vllm_args(self.kwargs["vllm_args"])
             for key, value in vllm_args.items():

From d3f6772b9d440640f68a71e4b8960a819dcea8d7 Mon Sep 17 00:00:00 2001
From: rohan-uiuc <rohan13@illinois.edu>
Date: Thu, 29 Jan 2026 17:37:25 -0600
Subject: [PATCH 16/21] Add tests for hf_model override in slurm script
 generation

---
 .../client/test_slurm_script_generator.py     | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py
index a81a962d..9f3914b9 100644
--- a/tests/vec_inf/client/test_slurm_script_generator.py
+++ b/tests/vec_inf/client/test_slurm_script_generator.py
@@ -202,6 +202,21 @@ def test_generate_launch_cmd_venv(self, basic_params):
         assert "--max-model-len 8192" in launch_cmd
         assert "--enforce-eager" in launch_cmd
 
+    def test_generate_launch_cmd_with_hf_model_override(
+        self, basic_params, monkeypatch
+    ):
+        """Test launch command uses hf_model when local weights don't exist."""
+        monkeypatch.setattr(
+            "vec_inf.client._slurm_script_generator.Path.exists", lambda self: False
+        )
+        params = basic_params.copy()
+        params["hf_model"] = "meta-llama/Meta-Llama-3.1-8B-Instruct"
+        generator = SlurmScriptGenerator(params)
+        launch_cmd = generator._generate_launch_cmd()
+
+        assert "vllm serve meta-llama/Meta-Llama-3.1-8B-Instruct" in launch_cmd
+        assert "vllm serve /path/to/model_weights/test-model" not in launch_cmd
+
     def test_generate_launch_cmd_singularity(self, singularity_params):
         """Test launch command generation with Singularity."""
         generator = SlurmScriptGenerator(singularity_params)
@@ -416,6 +431,25 @@ def test_generate_model_launch_script_basic(
         mock_touch.assert_called_once()
         mock_write_text.assert_called_once()
 
+    @patch("pathlib.Path.touch")
+    @patch("pathlib.Path.write_text")
+    def test_generate_model_launch_script_with_hf_model_override(
+        self, mock_write_text, mock_touch, batch_params, monkeypatch
+    ):
+        """Test batch launch script uses hf_model when local weights don't exist."""
+        monkeypatch.setattr(
+            "vec_inf.client._slurm_script_generator.Path.exists", lambda self: False
+        )
+        params = batch_params.copy()
+        params["models"] = {k: v.copy() for k, v in batch_params["models"].items()}
+        params["models"]["model1"]["hf_model"] = "meta-llama/Meta-Llama-3.1-8B-Instruct"
+
+        generator = BatchSlurmScriptGenerator(params)
+        generator._generate_model_launch_script("model1")
+
+        call_args = mock_write_text.call_args[0][0]
+        assert "vllm serve meta-llama/Meta-Llama-3.1-8B-Instruct" in call_args
+
     @patch("pathlib.Path.touch")
     @patch("pathlib.Path.write_text")
     def test_generate_model_launch_script_singularity(

From 1c312dfadae69f8b3c997d5c3b7447672a727593 Mon Sep 17 00:00:00 2001
From: rohan-uiuc <rohan13@illinois.edu>
Date: Thu, 29 Jan 2026 17:37:25 -0600
Subject: [PATCH 17/21] Add documentation for --hf-model option

---
 docs/user_guide.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/docs/user_guide.md b/docs/user_guide.md
index c94313af..3ac97e4e 100644
--- a/docs/user_guide.md
+++ b/docs/user_guide.md
@@ -55,6 +55,17 @@ To overwrite default `vllm serve` arguments, you can specify the arguments in a
 vec-inf launch Meta-Llama-3.1-8B-Instruct --vllm-args '--max-model-len=65536,--compilation-config=3'
 ```
 
+To download models directly from HuggingFace Hub without needing local weights, use `--hf-model`:
+
+```bash
+vec-inf launch Qwen2.5-3B-Instruct \
+  --hf-model Qwen/Qwen2.5-3B-Instruct \
+  --env 'HF_HOME=/path/to/cache' \
+  --vllm-args '--max-model-len=4096'
+```
+
+Set `HF_HOME` via `--env` to control where models are cached. If local weights exist, they take priority over `--hf-model`.
+
 For the full list of `vllm serve` arguments, you can find them [here](https://docs.vllm.ai/en/stable/serving/engine_args.html), make sure you select the correct vLLM version.
 
 #### Custom models

From 03864a5639bb397f368b6579264d716eefcd3a1d Mon Sep 17 00:00:00 2001
From: XkunW <marshall.wang@vectorinstitute.ai>
Date: Mon, 30 Mar 2026 13:18:39 -0400
Subject: [PATCH 18/21] Fix typos

---
 vec_inf/client/_slurm_script_generator.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py
index d1a0c7e9..0c7aeaf7 100644
--- a/vec_inf/client/_slurm_script_generator.py
+++ b/vec_inf/client/_slurm_script_generator.py
@@ -36,10 +36,10 @@ def __init__(self, params: dict[str, Any]):
         self.is_multinode = int(self.params["num_nodes"]) > 1
         self.use_container = self.params["venv"] == CONTAINER_MODULE_NAME
         self.additional_binds = (
-            {self.params['bind']} if self.params.get("bind") else ""
+            self.params["bind"] if self.params.get("bind") else ""
         )
-        self.model_weights_path = Path(
-            self.params["model_weights_parent_dir"], self.params["model_name"]
+        self.model_weights_path = str(
+            Path(self.params["model_weights_parent_dir"], self.params["model_name"])
         )
         self.env_str = self._generate_env_str()
 
@@ -187,7 +187,7 @@ def _generate_launch_cmd(self) -> str:
 
         launch_cmd.append(
             "\n".join(SLURM_SCRIPT_TEMPLATE["launch_cmd"][self.engine]).format(  # type: ignore[literal-required]
-                model_weights_path=self.model_weights_path if not self.params.get("hf_model") else self.params["hf_model"],
+                model_weights_path=self.params.get("hf_model") or self.model_weights_path,
                 model_name=self.params["model_name"],
             )
         )
@@ -217,7 +217,7 @@ def _generate_multinode_sglang_launch_cmd(self) -> str:
             SLURM_SCRIPT_TEMPLATE["launch_cmd"]["sglang_multinode"]
         ).format(
             num_nodes=self.params["num_nodes"],
-            model_weights_path=self.model_weights_path if not self.params.get("hf_model") else self.params["hf_model"],
+            model_weights_path=self.params.get("hf_model") or self.model_weights_path,
             model_name=self.params["model_name"],
         )
 
@@ -277,7 +277,7 @@ def __init__(self, params: dict[str, Any]):
         self.use_container = self.params["venv"] == CONTAINER_MODULE_NAME
         for model_name in self.params["models"]:
             self.params["models"][model_name]["additional_binds"] = (
-                {self.params['models'][model_name]['bind']}
+                self.params["models"][model_name]["bind"]
                 if self.params["models"][model_name].get("bind")
                 else ""
             )
@@ -352,7 +352,7 @@ def _generate_model_launch_script(self, model_name: str) -> Path:
             "\n".join(
                 BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["launch_cmd"][model_params["engine"]]
             ).format(
-                model_weights_path=model_params["model_weights_path"] if not model_params.get("hf_model") else model_params["hf_model"],
+                model_weights_path=model_params.get("hf_model") or model_params["model_weights_path"],
                 model_name=model_name,
             )
         )

From 292200f32ca2b068306e1d9b928dd926fe5324fe Mon Sep 17 00:00:00 2001
From: XkunW <marshall.wang@vectorinstitute.ai>
Date: Mon, 30 Mar 2026 13:18:49 -0400
Subject: [PATCH 19/21] Fix tests

---
 tests/vec_inf/client/test_engine_selection.py | 12 +++
 tests/vec_inf/client/test_helper.py           | 12 +++
 .../client/test_slurm_script_generator.py     | 76 +++++++------------
 3 files changed, 53 insertions(+), 47 deletions(-)

diff --git a/tests/vec_inf/client/test_engine_selection.py b/tests/vec_inf/client/test_engine_selection.py
index 8812e8ed..d4571086 100644
--- a/tests/vec_inf/client/test_engine_selection.py
+++ b/tests/vec_inf/client/test_engine_selection.py
@@ -24,6 +24,12 @@ def _set_required_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> None:
         monkeypatch.setenv("VEC_INF_ACCOUNT", "test-account")
         monkeypatch.setenv("VEC_INF_WORK_DIR", "/tmp")
 
+    @pytest.fixture(autouse=True)
+    def _mock_validate_weights_path(self) -> None:
+        """Avoid disk checks for fake model paths in fixtures."""
+        with patch("vec_inf.client._helper.utils.validate_weights_path"):
+            yield
+
     @pytest.fixture
     def model_config_vllm(self) -> ModelConfig:
         """Fixture providing a vLLM model configuration."""
@@ -187,6 +193,12 @@ def _set_required_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> None:
         monkeypatch.setenv("VEC_INF_ACCOUNT", "test-account")
         monkeypatch.setenv("VEC_INF_WORK_DIR", "/tmp")
 
+    @pytest.fixture(autouse=True)
+    def _mock_validate_weights_path(self) -> None:
+        """Avoid disk checks for fake model paths in fixtures."""
+        with patch("vec_inf.client._helper.utils.validate_weights_path"):
+            yield
+
     @pytest.fixture
     def model_config(self) -> ModelConfig:
         """Fixture providing a basic model configuration."""
diff --git a/tests/vec_inf/client/test_helper.py b/tests/vec_inf/client/test_helper.py
index 7f910003..3a659856 100644
--- a/tests/vec_inf/client/test_helper.py
+++ b/tests/vec_inf/client/test_helper.py
@@ -32,6 +32,12 @@
 class TestModelLauncher:
     """Tests for the ModelLauncher class."""
 
+    @pytest.fixture(autouse=True)
+    def _mock_validate_weights_path(self) -> None:
+        """Avoid disk checks for fake model paths in fixtures."""
+        with patch("vec_inf.client._helper.utils.validate_weights_path"):
+            yield
+
     @pytest.fixture
     def model_config(self) -> ModelConfig:
         """Fixture providing a basic model configuration for tests."""
@@ -385,6 +391,12 @@ def test_launch_with_sglang_engine(
 class TestBatchModelLauncher:
     """Tests for the BatchModelLauncher class."""
 
+    @pytest.fixture(autouse=True)
+    def _mock_validate_weights_path(self) -> None:
+        """Avoid disk checks for fake model paths in fixtures."""
+        with patch("vec_inf.client._helper.utils.validate_weights_path"):
+            yield
+
     @pytest.fixture
     def batch_model_configs(self) -> list[ModelConfig]:
         """Fixture providing batch model configurations for tests."""
diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py
index 60f66a8c..cad88f20 100644
--- a/tests/vec_inf/client/test_slurm_script_generator.py
+++ b/tests/vec_inf/client/test_slurm_script_generator.py
@@ -12,14 +12,6 @@
 )
 
 
-@pytest.fixture(autouse=True)
-def patch_model_weights_exists(monkeypatch):
-    """Ensure model weights directory existence checks default to True."""
-    monkeypatch.setattr(
-        "vec_inf.client._slurm_script_generator.Path.exists", lambda self: True
-    )
-
-
 class TestSlurmScriptGenerator:
     """Tests for SlurmScriptGenerator class."""
 
@@ -123,7 +115,7 @@ def test_init_singularity(self, singularity_params):
         assert generator.params == singularity_params
         assert generator.use_container
         assert not generator.is_multinode
-        assert generator.additional_binds == ",/scratch:/scratch,/data:/data"
+        assert generator.additional_binds == "/scratch:/scratch,/data:/data"
         assert generator.model_weights_path == "/path/to/model_weights/test-model"
         assert (
             generator.env_str
@@ -195,18 +187,16 @@ def test_generate_server_setup_singularity(self, singularity_params):
         )  # Remove module name since it's inconsistent between clusters
 
     def test_generate_server_setup_singularity_no_weights(
-        self, singularity_params, monkeypatch
+        self, singularity_params
     ):
-        """Test server setup when model weights don't exist."""
-        monkeypatch.setattr(
-            "vec_inf.client._slurm_script_generator.Path.exists",
-            lambda self: False,
-        )
+        """Test server setup when using hf_model (no local weights in bind path)."""
+        params = singularity_params.copy()
+        params["hf_model"] = "test-org/test-model"
 
-        generator = SlurmScriptGenerator(singularity_params)
+        generator = SlurmScriptGenerator(params)
         setup = generator._generate_server_setup()
 
-        assert "ray stop" in setup
+        assert "module load" in setup or "apptainer" in setup.lower()
         assert "/path/to/model_weights/test-model" not in setup
 
     def test_generate_launch_cmd_venv(self, basic_params):
@@ -337,13 +327,8 @@ def test_generate_script_content_sglang(self, basic_params):
         assert "sglang.launch_server" in content
         assert "find_available_port" in content
 
-    def test_generate_launch_cmd_with_hf_model_override(
-        self, basic_params, monkeypatch
-    ):
-        """Test launch command uses hf_model when local weights don't exist."""
-        monkeypatch.setattr(
-            "vec_inf.client._slurm_script_generator.Path.exists", lambda self: False
-        )
+    def test_generate_launch_cmd_with_hf_model_override(self, basic_params):
+        """Test launch command uses hf_model when specified."""
         params = basic_params.copy()
         params["hf_model"] = "meta-llama/Meta-Llama-3.1-8B-Instruct"
         generator = SlurmScriptGenerator(params)
@@ -361,20 +346,18 @@ def test_generate_launch_cmd_singularity(self, singularity_params):
         assert "source" not in launch_cmd
 
     def test_generate_launch_cmd_singularity_no_local_weights(
-        self, singularity_params, monkeypatch
+        self, singularity_params
     ):
-        """Test container launch when model weights directory is missing."""
-        monkeypatch.setattr(
-            "vec_inf.client._slurm_script_generator.Path.exists",
-            lambda self: False,
-        )
+        """Test container launch when using hf_model instead of local weights."""
+        params = singularity_params.copy()
+        params["hf_model"] = "test-org/test-model"
 
-        generator = SlurmScriptGenerator(singularity_params)
+        generator = SlurmScriptGenerator(params)
         launch_cmd = generator._generate_launch_cmd()
 
         assert "exec --nv" in launch_cmd
-        assert "--bind /path/to/model_weights/test-model" not in launch_cmd
-        assert "vllm serve test-model" in launch_cmd
+        assert "vllm serve test-org/test-model" in launch_cmd
+        assert "vllm serve /path/to/model_weights/test-model" not in launch_cmd
 
     def test_generate_launch_cmd_boolean_args(self, basic_params):
         """Test launch command with boolean vLLM arguments."""
@@ -522,11 +505,11 @@ def test_init_singularity(self, batch_singularity_params):
         assert generator.use_container
         assert (
             generator.params["models"]["model1"]["additional_binds"]
-            == ",/scratch:/scratch,/data:/data"
+            == "/scratch:/scratch,/data:/data"
         )
         assert (
             generator.params["models"]["model2"]["additional_binds"]
-            == ",/scratch:/scratch,/data:/data"
+            == "/scratch:/scratch,/data:/data"
         )
 
     def test_init_singularity_no_bind(self, batch_params):
@@ -571,12 +554,9 @@ def test_generate_model_launch_script_basic(
     @patch("pathlib.Path.touch")
     @patch("pathlib.Path.write_text")
     def test_generate_model_launch_script_with_hf_model_override(
-        self, mock_write_text, mock_touch, batch_params, monkeypatch
+        self, mock_write_text, mock_touch, batch_params
     ):
-        """Test batch launch script uses hf_model when local weights don't exist."""
-        monkeypatch.setattr(
-            "vec_inf.client._slurm_script_generator.Path.exists", lambda self: False
-        )
+        """Test batch launch script uses hf_model when specified."""
         params = batch_params.copy()
         params["models"] = {k: v.copy() for k, v in batch_params["models"].items()}
         params["models"]["model1"]["hf_model"] = "meta-llama/Meta-Llama-3.1-8B-Instruct"
@@ -604,20 +584,22 @@ def test_generate_model_launch_script_singularity(
     @patch("pathlib.Path.touch")
     @patch("pathlib.Path.write_text")
     def test_generate_model_launch_script_singularity_no_weights(
-        self, mock_write_text, mock_touch, batch_singularity_params, monkeypatch
+        self, mock_write_text, mock_touch, batch_singularity_params
     ):
-        """Test batch model launch script when model weights don't exist."""
-        monkeypatch.setattr(
-            "vec_inf.client._slurm_script_generator.Path.exists",
-            lambda self: False,
-        )
+        """Test batch model launch script when using hf_model (no local weights)."""
+        params = batch_singularity_params.copy()
+        params["models"] = {
+            k: v.copy() for k, v in batch_singularity_params["models"].items()
+        }
+        params["models"]["model1"]["hf_model"] = "test-org/model1"
 
-        generator = BatchSlurmScriptGenerator(batch_singularity_params)
+        generator = BatchSlurmScriptGenerator(params)
         script_path = generator._generate_model_launch_script("model1")
 
         assert script_path.name == "launch_model1.sh"
         call_args = mock_write_text.call_args[0][0]
         assert "/path/to/model_weights/model1" not in call_args
+        assert "vllm serve test-org/model1" in call_args
 
     @patch("vec_inf.client._slurm_script_generator.datetime")
     @patch("pathlib.Path.touch")

From ba9030fc6953718a7aabc98b61d7be1bf987caed Mon Sep 17 00:00:00 2001
From: XkunW <marshall.wang@vectorinstitute.ai>
Date: Mon, 30 Mar 2026 13:24:21 -0400
Subject: [PATCH 20/21] ruff check & format

---
 vec_inf/client/_helper.py                 |  3 +--
 vec_inf/client/_slurm_script_generator.py | 10 +++++-----
 vec_inf/client/_utils.py                  | 12 ++++++------
 vec_inf/client/config.py                  |  2 +-
 4 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/vec_inf/client/_helper.py b/vec_inf/client/_helper.py
index b1fca2da..ba1c8887 100644
--- a/vec_inf/client/_helper.py
+++ b/vec_inf/client/_helper.py
@@ -5,7 +5,6 @@
 """
 
 import json
-import os
 import time
 import warnings
 from pathlib import Path
@@ -356,7 +355,7 @@ def _get_launch_params(self) -> dict[str, Any]:
         # Override config defaults with CLI arguments
         self._apply_cli_overrides(params)
 
-        # Validate that weights path exists or HF model provided, and check HF cache configuration
+        # Validate weights path exists or HF model provided, and check HF cache config
         utils.validate_weights_path(params, self.model_name)
 
         # Check for required fields without default vals, will raise an error if missing
diff --git a/vec_inf/client/_slurm_script_generator.py b/vec_inf/client/_slurm_script_generator.py
index 0c7aeaf7..dcca77d8 100644
--- a/vec_inf/client/_slurm_script_generator.py
+++ b/vec_inf/client/_slurm_script_generator.py
@@ -35,9 +35,7 @@ def __init__(self, params: dict[str, Any]):
         self.engine = params.get("engine", "vllm")
         self.is_multinode = int(self.params["num_nodes"]) > 1
         self.use_container = self.params["venv"] == CONTAINER_MODULE_NAME
-        self.additional_binds = (
-            self.params["bind"] if self.params.get("bind") else ""
-        )
+        self.additional_binds = self.params["bind"] if self.params.get("bind") else ""
         self.model_weights_path = str(
             Path(self.params["model_weights_parent_dir"], self.params["model_name"])
         )
@@ -187,7 +185,8 @@ def _generate_launch_cmd(self) -> str:
 
         launch_cmd.append(
             "\n".join(SLURM_SCRIPT_TEMPLATE["launch_cmd"][self.engine]).format(  # type: ignore[literal-required]
-                model_weights_path=self.params.get("hf_model") or self.model_weights_path,
+                model_weights_path=self.params.get("hf_model")
+                or self.model_weights_path,
                 model_name=self.params["model_name"],
             )
         )
@@ -352,7 +351,8 @@ def _generate_model_launch_script(self, model_name: str) -> Path:
             "\n".join(
                 BATCH_MODEL_LAUNCH_SCRIPT_TEMPLATE["launch_cmd"][model_params["engine"]]
             ).format(
-                model_weights_path=model_params.get("hf_model") or model_params["model_weights_path"],
+                model_weights_path=model_params.get("hf_model")
+                or model_params["model_weights_path"],
                 model_name=model_name,
             )
         )
diff --git a/vec_inf/client/_utils.py b/vec_inf/client/_utils.py
index 62b4c7a5..bfa48ee4 100644
--- a/vec_inf/client/_utils.py
+++ b/vec_inf/client/_utils.py
@@ -523,9 +523,7 @@ def check_hf_cache_and_bind(params: dict[str, Any], model_name: str) -> None:
     """
     hf_cache_vars = ["HF_HOME", "HF_HUB_CACHE", "HUGGINGFACE_HUB_CACHE"]
     env_vars = params.get("env", {})
-    set_cache_values = {
-        env_vars[var] for var in hf_cache_vars if var in env_vars
-    }
+    set_cache_values = {env_vars[var] for var in hf_cache_vars if var in env_vars}
 
     if not set_cache_values:
         warnings.warn(
@@ -540,9 +538,11 @@ def check_hf_cache_and_bind(params: dict[str, Any], model_name: str) -> None:
         return
 
     bind_str = params.get("bind", "")
-    existing_hosts = {
-        b.split(":")[0] for b in bind_str.split(",") if b.strip()
-    } if bind_str else set()
+    existing_hosts = (
+        {b.split(":")[0] for b in bind_str.split(",") if b.strip()}
+        if bind_str
+        else set()
+    )
 
     new_paths = set_cache_values - existing_hosts
     if new_paths:
diff --git a/vec_inf/client/config.py b/vec_inf/client/config.py
index f0d821d4..6a7c40a4 100644
--- a/vec_inf/client/config.py
+++ b/vec_inf/client/config.py
@@ -162,7 +162,7 @@ class ModelConfig(BaseModel):
         description=(
             "Full HuggingFace model id/path to use for vLLM serve (e.g. "
             "'meta-llama/Meta-Llama-3.1-8B-Instruct')."
-        )
+        ),
     )
     engine: Optional[str] = Field(
         default="vllm",

From 8e66e261f13d5b017d82c47e5670ab8cd2cad7e3 Mon Sep 17 00:00:00 2001
From: XkunW <marshall.wang@vectorinstitute.ai>
Date: Mon, 30 Mar 2026 13:26:46 -0400
Subject: [PATCH 21/21] ruff format

---
 tests/vec_inf/client/test_slurm_script_generator.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tests/vec_inf/client/test_slurm_script_generator.py b/tests/vec_inf/client/test_slurm_script_generator.py
index cad88f20..4e8a364f 100644
--- a/tests/vec_inf/client/test_slurm_script_generator.py
+++ b/tests/vec_inf/client/test_slurm_script_generator.py
@@ -186,9 +186,7 @@ def test_generate_server_setup_singularity(self, singularity_params):
             "module load " in setup or "apptainer" in setup.lower()
         )  # Remove module name since it's inconsistent between clusters
 
-    def test_generate_server_setup_singularity_no_weights(
-        self, singularity_params
-    ):
+    def test_generate_server_setup_singularity_no_weights(self, singularity_params):
         """Test server setup when using hf_model (no local weights in bind path)."""
         params = singularity_params.copy()
         params["hf_model"] = "test-org/test-model"
@@ -345,9 +343,7 @@ def test_generate_launch_cmd_singularity(self, singularity_params):
         assert "apptainer exec --nv" in launch_cmd
         assert "source" not in launch_cmd
 
-    def test_generate_launch_cmd_singularity_no_local_weights(
-        self, singularity_params
-    ):
+    def test_generate_launch_cmd_singularity_no_local_weights(self, singularity_params):
         """Test container launch when using hf_model instead of local weights."""
         params = singularity_params.copy()
         params["hf_model"] = "test-org/test-model"