From 5d13c658a03701ffa2a707784a9c5fc92845ee6e Mon Sep 17 00:00:00 2001
From: Gokul A <nargokul@amazon.com>
Date: Mon, 13 Apr 2026 12:49:59 -0700
Subject: [PATCH 1/5] fix: Remove hardcoded lmi config name in ModelBuilder
 optimize

Replace hardcoded config_name='lmi' in ModelBuilder.optimize() with
the model's dynamically resolved default config name. The JumpStart
metadata for models like llama-3-1-8b-instruct no longer includes an
'lmi' config, causing ValueError during optimization.

Also fix test_jumpstart_session_with_config_name to use the model's
resolved config_name instead of hardcoding 'tgi', and update unit
test assertions in test_js_builder.py accordingly.
---
 src/sagemaker/serve/builder/model_builder.py              | 8 +++++---
 .../sagemaker/jumpstart/model/test_jumpstart_model.py     | 2 +-
 tests/unit/sagemaker/serve/builder/test_js_builder.py     | 4 ++--
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/sagemaker/serve/builder/model_builder.py b/src/sagemaker/serve/builder/model_builder.py
index 3c19e4aa43..2e74f5eba5 100644
--- a/src/sagemaker/serve/builder/model_builder.py
+++ b/src/sagemaker/serve/builder/model_builder.py
@@ -1728,9 +1728,11 @@ def _model_builder_optimize_wrapper(
         if self._is_jumpstart_model_id():
             self.build(mode=self.mode, sagemaker_session=self.sagemaker_session)
             if self.pysdk_model:
-                self.pysdk_model.set_deployment_config(
-                    instance_type=instance_type, config_name="lmi"
-                )
+                config_name = self.pysdk_model.config_name
+                if config_name:
+                    self.pysdk_model.set_deployment_config(
+                        instance_type=instance_type, config_name=config_name
+                    )
             input_args = self._optimize_for_jumpstart(
                 output_path=output_path,
                 instance_type=instance_type,
diff --git a/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py b/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py
index c8b89db7b6..56e3234863 100644
--- a/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py
+++ b/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py
@@ -414,7 +414,7 @@ def test_jumpstart_session_with_config_name():
             pass
 
     assert (
-        "md/js_model_id#meta-textgeneration-llama-2-7b md/js_model_ver#* md/js_config#tgi"
+        f"md/js_model_id#meta-textgeneration-llama-2-7b md/js_model_ver#* md/js_config#{model.config_name}"
         in mock_make_request.call_args[0][1]["headers"]["User-Agent"]
     )
 
diff --git a/tests/unit/sagemaker/serve/builder/test_js_builder.py b/tests/unit/sagemaker/serve/builder/test_js_builder.py
index 415d7eab5b..25d829b056 100644
--- a/tests/unit/sagemaker/serve/builder/test_js_builder.py
+++ b/tests/unit/sagemaker/serve/builder/test_js_builder.py
@@ -1696,7 +1696,7 @@ def test_optimize_on_js_model_should_ignore_pre_optimized_configurations(
 
         assert mock_lmi_js_model.set_deployment_config.call_args_list[0].kwargs == {
             "instance_type": "ml.g5.24xlarge",
-            "config_name": "lmi",
+            "config_name": mock_lmi_js_model.config_name,
         }
         assert optimized_model.env == {
             "SAGEMAKER_PROGRAM": "inference.py",
@@ -1784,7 +1784,7 @@ def test_optimize_on_js_model_should_ignore_pre_optimized_configurations_no_over
 
         assert mock_lmi_js_model.set_deployment_config.call_args_list[0].kwargs == {
             "instance_type": "ml.g5.24xlarge",
-            "config_name": "lmi",
+            "config_name": mock_lmi_js_model.config_name,
         }
         assert optimized_model.env == {
             "SAGEMAKER_PROGRAM": "inference.py",

From 0819771318141904f8e34d926f1e390abcc37414 Mon Sep 17 00:00:00 2001
From: Gokul A <nargokul@amazon.com>
Date: Mon, 13 Apr 2026 14:35:12 -0700
Subject: [PATCH 2/5] fix: Use flexible Environment assertions in serve deep
 unit tests

The JumpStart metadata update changed the default config from 'lmi' to
'max_context_best_price_performance', which adds extra environment
variables to container_defs. Updated all three test assertions to use
ANY for the Environment dict and separately verify only the specific
env vars each test cares about (OPTION_QUANTIZE, OPTION_TENSOR_PARALLEL_DEGREE,
OPTION_SPECULATIVE_DRAFT_MODEL).
---
 .../serve/test_serve_js_deep_unit_tests.py    | 48 +++++++------------
 1 file changed, 18 insertions(+), 30 deletions(-)

diff --git a/tests/integ/sagemaker/serve/test_serve_js_deep_unit_tests.py b/tests/integ/sagemaker/serve/test_serve_js_deep_unit_tests.py
index 3b59cae321..ce5db857b9 100644
--- a/tests/integ/sagemaker/serve/test_serve_js_deep_unit_tests.py
+++ b/tests/integ/sagemaker/serve/test_serve_js_deep_unit_tests.py
@@ -62,16 +62,7 @@ def test_js_model_with_optimize_speculative_decoding_config_gated_requests_are_e
             role=ANY,
             container_defs={
                 "Image": ANY,
-                "Environment": {
-                    "SAGEMAKER_PROGRAM": "inference.py",
-                    "SAGEMAKER_MODEL_SERVER_TIMEOUT": "3600",
-                    "ENDPOINT_SERVER_TIMEOUT": "3600",
-                    "MODEL_CACHE_ROOT": "/opt/ml/model",
-                    "SAGEMAKER_ENV": "1",
-                    "HF_MODEL_ID": "/opt/ml/model",
-                    "SAGEMAKER_MODEL_SERVER_WORKERS": "1",
-                    "OPTION_SPECULATIVE_DRAFT_MODEL": "/opt/ml/additional-model-data-sources/draft_model/",
-                },
+                "Environment": ANY,
                 "AdditionalModelDataSources": [
                     {
                         "ChannelName": "draft_model",
@@ -96,6 +87,11 @@ def test_js_model_with_optimize_speculative_decoding_config_gated_requests_are_e
             enable_network_isolation=True,
             tags=ANY,
         )
+        # Verify the specific environment variables we care about
+        actual_env = mock_create_model.call_args[1]["container_defs"]["Environment"]
+        assert actual_env["OPTION_SPECULATIVE_DRAFT_MODEL"] == "/opt/ml/additional-model-data-sources/draft_model/"
+        assert actual_env["SAGEMAKER_PROGRAM"] == "inference.py"
+        assert actual_env["HF_MODEL_ID"] == "/opt/ml/model"
         mock_endpoint_from_production_variants.assert_called_once()
 
 
@@ -149,16 +145,7 @@ def test_js_model_with_optimize_sharding_and_resource_requirements_requests_are_
             role=ANY,
             container_defs={
                 "Image": ANY,
-                "Environment": {
-                    "SAGEMAKER_PROGRAM": "inference.py",
-                    "SAGEMAKER_MODEL_SERVER_TIMEOUT": "3600",
-                    "ENDPOINT_SERVER_TIMEOUT": "3600",
-                    "MODEL_CACHE_ROOT": "/opt/ml/model",
-                    "SAGEMAKER_ENV": "1",
-                    "HF_MODEL_ID": "/opt/ml/model",
-                    "SAGEMAKER_MODEL_SERVER_WORKERS": "1",
-                    "OPTION_TENSOR_PARALLEL_DEGREE": "8",
-                },
+                "Environment": ANY,
                 "ModelDataSource": {
                     "S3DataSource": {
                         "S3Uri": ANY,
@@ -172,6 +159,11 @@ def test_js_model_with_optimize_sharding_and_resource_requirements_requests_are_
             enable_network_isolation=False,  # should be set to false
             tags=ANY,
         )
+        # Verify the specific environment variables we care about
+        actual_env = mock_create_model.call_args[1]["container_defs"]["Environment"]
+        assert actual_env["OPTION_TENSOR_PARALLEL_DEGREE"] == "8"
+        assert actual_env["SAGEMAKER_PROGRAM"] == "inference.py"
+        assert actual_env["HF_MODEL_ID"] == "/opt/ml/model"
         mock_endpoint_from_production_variants.assert_called_once_with(
             name=ANY,
             production_variants=ANY,
@@ -237,16 +229,7 @@ def test_js_model_with_optimize_quantization_on_pre_optimized_model_requests_are
             role=ANY,
             container_defs={
                 "Image": ANY,
-                "Environment": {
-                    "SAGEMAKER_PROGRAM": "inference.py",
-                    "SAGEMAKER_MODEL_SERVER_TIMEOUT": "3600",
-                    "ENDPOINT_SERVER_TIMEOUT": "3600",
-                    "MODEL_CACHE_ROOT": "/opt/ml/model",
-                    "SAGEMAKER_ENV": "1",
-                    "HF_MODEL_ID": "/opt/ml/model",
-                    "SAGEMAKER_MODEL_SERVER_WORKERS": "1",
-                    "OPTION_QUANTIZE": "fp8",
-                },
+                "Environment": ANY,
                 "ModelDataSource": {
                     "S3DataSource": {
                         "S3Uri": ANY,
@@ -260,4 +243,9 @@ def test_js_model_with_optimize_quantization_on_pre_optimized_model_requests_are
             enable_network_isolation=True,  # should be set to false
             tags=ANY,
         )
+        # Verify the specific environment variables we care about
+        actual_env = mock_create_model.call_args[1]["container_defs"]["Environment"]
+        assert actual_env["OPTION_QUANTIZE"] == "fp8"
+        assert actual_env["SAGEMAKER_PROGRAM"] == "inference.py"
+        assert actual_env["HF_MODEL_ID"] == "/opt/ml/model"
         mock_endpoint_from_production_variants.assert_called_once()

From 2dfcff567da60733e76b4741760e41adf4c9d0a3 Mon Sep 17 00:00:00 2001
From: Molly He <mollyhe@amazon.com>
Date: Wed, 15 Apr 2026 14:42:24 -0700
Subject: [PATCH 3/5] Fix codestyle

---
 tests/integ/sagemaker/serve/test_serve_js_deep_unit_tests.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/integ/sagemaker/serve/test_serve_js_deep_unit_tests.py b/tests/integ/sagemaker/serve/test_serve_js_deep_unit_tests.py
index ce5db857b9..7b554069d9 100644
--- a/tests/integ/sagemaker/serve/test_serve_js_deep_unit_tests.py
+++ b/tests/integ/sagemaker/serve/test_serve_js_deep_unit_tests.py
@@ -89,7 +89,10 @@ def test_js_model_with_optimize_speculative_decoding_config_gated_requests_are_e
         )
         # Verify the specific environment variables we care about
         actual_env = mock_create_model.call_args[1]["container_defs"]["Environment"]
-        assert actual_env["OPTION_SPECULATIVE_DRAFT_MODEL"] == "/opt/ml/additional-model-data-sources/draft_model/"
+        assert (
+            actual_env["OPTION_SPECULATIVE_DRAFT_MODEL"]
+            == "/opt/ml/additional-model-data-sources/draft_model/"
+        )
         assert actual_env["SAGEMAKER_PROGRAM"] == "inference.py"
         assert actual_env["HF_MODEL_ID"] == "/opt/ml/model"
         mock_endpoint_from_production_variants.assert_called_once()

From 4fd497cc49d2ac8c2ffea66792c5a98abd6784c6 Mon Sep 17 00:00:00 2001
From: Molly He <mollyhe@amazon.com>
Date: Wed, 15 Apr 2026 23:19:19 -0700
Subject: [PATCH 4/5] Replace meta-* model with huggingface-* gated model

---
 tests/integ/sagemaker/jumpstart/constants.py              | 1 +
 .../estimator/test_jumpstart_private_hub_estimator.py     | 6 +++---
 .../private_hub/model/test_jumpstart_private_hub_model.py | 8 ++++----
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tests/integ/sagemaker/jumpstart/constants.py b/tests/integ/sagemaker/jumpstart/constants.py
index 70448e9214..bdd99faf8c 100644
--- a/tests/integ/sagemaker/jumpstart/constants.py
+++ b/tests/integ/sagemaker/jumpstart/constants.py
@@ -53,6 +53,7 @@ def _to_s3_path(filename: str, s3_prefix: Optional[str]) -> str:
     ("meta-textgeneration-llama-2-7b", "2.*"): ("training-datasets/sec_amazon/"),
     ("meta-textgeneration-llama-2-7b", "3.*"): ("training-datasets/sec_amazon/"),
     ("meta-textgeneration-llama-2-7b", "4.*"): ("training-datasets/sec_amazon/"),
+    ("huggingface-llm-gemma-7b", "*"): ("training-datasets/genuq/small/"),
     ("meta-textgenerationneuron-llama-2-7b", "*"): ("training-datasets/sec_amazon/"),
 }
 
diff --git a/tests/integ/sagemaker/jumpstart/private_hub/estimator/test_jumpstart_private_hub_estimator.py b/tests/integ/sagemaker/jumpstart/private_hub/estimator/test_jumpstart_private_hub_estimator.py
index a6e33f1bdf..0cf4b1fb86 100644
--- a/tests/integ/sagemaker/jumpstart/private_hub/estimator/test_jumpstart_private_hub_estimator.py
+++ b/tests/integ/sagemaker/jumpstart/private_hub/estimator/test_jumpstart_private_hub_estimator.py
@@ -38,7 +38,7 @@
 
 TEST_MODEL_IDS = {
     "huggingface-spc-bert-base-cased",
-    "meta-textgeneration-llama-2-7b",
+    "huggingface-llm-gemma-7b",
     "catboost-regression-model",
 }
 
@@ -138,7 +138,7 @@ def test_jumpstart_hub_estimator_with_session(setup, add_model_references):
 
 def test_jumpstart_hub_gated_estimator_with_eula(setup, add_model_references):
 
-    model_id, model_version = "meta-textgeneration-llama-2-7b", "*"
+    model_id, model_version = "huggingface-llm-gemma-7b", "*"
 
     estimator = JumpStartEstimator(
         model_id=model_id,
@@ -172,7 +172,7 @@ def test_jumpstart_hub_gated_estimator_with_eula(setup, add_model_references):
 
 def test_jumpstart_hub_gated_estimator_without_eula(setup, add_model_references):
 
-    model_id, model_version = "meta-textgeneration-llama-2-7b", "*"
+    model_id, model_version = "huggingface-llm-gemma-7b", "*"
 
     estimator = JumpStartEstimator(
         model_id=model_id,
diff --git a/tests/integ/sagemaker/jumpstart/private_hub/model/test_jumpstart_private_hub_model.py b/tests/integ/sagemaker/jumpstart/private_hub/model/test_jumpstart_private_hub_model.py
index 76334330f5..1c207978c3 100644
--- a/tests/integ/sagemaker/jumpstart/private_hub/model/test_jumpstart_private_hub_model.py
+++ b/tests/integ/sagemaker/jumpstart/private_hub/model/test_jumpstart_private_hub_model.py
@@ -40,8 +40,8 @@
 TEST_MODEL_IDS = {
     "catboost-classification-model",
     "model-txt2img-stabilityai-stable-diffusion-v2-1-base",
-    "meta-textgeneration-llama-2-7b",
-    "meta-textgeneration-llama-3-2-1b",
+    "huggingface-llm-gemma-7b",
+    "huggingface-llm-gemma-3-1b-instruct",
     "catboost-regression-model",
 }
 
@@ -104,7 +104,7 @@ def test_jumpstart_hub_model_with_default_session(setup, add_model_references):
 
 def test_jumpstart_hub_gated_model(setup, add_model_references):
 
-    model_id = "meta-textgeneration-llama-3-2-1b"
+    model_id = "huggingface-llm-gemma-3-1b-instruct"
 
     model = JumpStartModel(
         model_id=model_id,
@@ -128,7 +128,7 @@ def test_jumpstart_hub_gated_model(setup, add_model_references):
 @pytest.mark.skip(reason="blocking PR checks and release pipeline.")
 def test_jumpstart_gated_model_inference_component_enabled(setup, add_model_references):
 
-    model_id = "meta-textgeneration-llama-3-2-1b"
+    model_id = "huggingface-llm-gemma-3-1b-instruct"
 
     hub_name = os.environ[ENV_VAR_JUMPSTART_SDK_TEST_HUB_NAME]
 

From 53448a493732786973363b46218fc2c592573bab Mon Sep 17 00:00:00 2001
From: Molly He <mollyhe@amazon.com>
Date: Wed, 15 Apr 2026 23:56:17 -0700
Subject: [PATCH 5/5] Skip unsupported test, xfail capcity and resource error

---
 tests/integ/sagemaker/jumpstart/constants.py  |  1 -
 .../jumpstart/model/test_jumpstart_model.py   |  4 ++++
 .../test_jumpstart_private_hub_estimator.py   | 23 ++++++++++++++++---
 ...model_builder_inference_component_happy.py |  5 ++++
 4 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/tests/integ/sagemaker/jumpstart/constants.py b/tests/integ/sagemaker/jumpstart/constants.py
index bdd99faf8c..70448e9214 100644
--- a/tests/integ/sagemaker/jumpstart/constants.py
+++ b/tests/integ/sagemaker/jumpstart/constants.py
@@ -53,7 +53,6 @@ def _to_s3_path(filename: str, s3_prefix: Optional[str]) -> str:
     ("meta-textgeneration-llama-2-7b", "2.*"): ("training-datasets/sec_amazon/"),
     ("meta-textgeneration-llama-2-7b", "3.*"): ("training-datasets/sec_amazon/"),
     ("meta-textgeneration-llama-2-7b", "4.*"): ("training-datasets/sec_amazon/"),
-    ("huggingface-llm-gemma-7b", "*"): ("training-datasets/genuq/small/"),
     ("meta-textgenerationneuron-llama-2-7b", "*"): ("training-datasets/sec_amazon/"),
 }
 
diff --git a/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py b/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py
index 56e3234863..8b95f9b622 100644
--- a/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py
+++ b/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py
@@ -34,6 +34,7 @@
     download_inference_assets,
     get_sm_session,
     get_tabular_data,
+    x_fail_if_ice,
 )
 
 INF2_SUPPORTED_REGIONS = {
@@ -191,7 +192,10 @@ def test_jumpstart_gated_model(setup):
     assert response is not None
 
 
+@x_fail_if_ice
 def test_jumpstart_gated_model_inference_component_enabled(setup):
+    # x_fail_if_ice marks this test as xfail on CapacityError — ml.g5.2xlarge capacity
+    # is shared across parallel CI runs and may be transiently exhausted.
 
     model_id = "meta-textgeneration-llama-2-7b"
 
diff --git a/tests/integ/sagemaker/jumpstart/private_hub/estimator/test_jumpstart_private_hub_estimator.py b/tests/integ/sagemaker/jumpstart/private_hub/estimator/test_jumpstart_private_hub_estimator.py
index 0cf4b1fb86..998ec32059 100644
--- a/tests/integ/sagemaker/jumpstart/private_hub/estimator/test_jumpstart_private_hub_estimator.py
+++ b/tests/integ/sagemaker/jumpstart/private_hub/estimator/test_jumpstart_private_hub_estimator.py
@@ -38,7 +38,6 @@
 
 TEST_MODEL_IDS = {
     "huggingface-spc-bert-base-cased",
-    "huggingface-llm-gemma-7b",
     "catboost-regression-model",
 }
 
@@ -136,9 +135,18 @@ def test_jumpstart_hub_estimator_with_session(setup, add_model_references):
     assert response is not None
 
 
+@pytest.mark.skip(
+    reason=(
+        "meta-textgeneration-llama-2-7b has been removed from the SageMaker public JumpStart hub. "
+        "Gated model EULA enforcement is covered by test_jumpstart_hub_gated_model in "
+        "test_jumpstart_private_hub_model.py and test_gated_model_training_v1/v2 in "
+        "test_jumpstart_estimator.py. TODO: replace with a suitable gated model that supports "
+        "training via private hub without requiring specific VPC endpoint configuration."
+    )
+)
 def test_jumpstart_hub_gated_estimator_with_eula(setup, add_model_references):
 
-    model_id, model_version = "huggingface-llm-gemma-7b", "*"
+    model_id, model_version = "meta-textgeneration-llama-2-7b", "*"
 
     estimator = JumpStartEstimator(
         model_id=model_id,
@@ -170,9 +178,18 @@ def test_jumpstart_hub_gated_estimator_with_eula(setup, add_model_references):
     assert response is not None
 
 
+@pytest.mark.skip(
+    reason=(
+        "meta-textgeneration-llama-2-7b has been removed from the SageMaker public JumpStart hub. "
+        "Gated model EULA enforcement is covered by test_jumpstart_hub_gated_model in "
+        "test_jumpstart_private_hub_model.py and test_gated_model_training_v1/v2 in "
+        "test_jumpstart_estimator.py. TODO: replace with a suitable gated model that supports "
+        "training via private hub without requiring specific VPC endpoint configuration."
+    )
+)
 def test_jumpstart_hub_gated_estimator_without_eula(setup, add_model_references):
 
-    model_id, model_version = "huggingface-llm-gemma-7b", "*"
+    model_id, model_version = "meta-textgeneration-llama-2-7b", "*"
 
     estimator = JumpStartEstimator(
         model_id=model_id,
diff --git a/tests/integ/sagemaker/serve/test_serve_model_builder_inference_component_happy.py b/tests/integ/sagemaker/serve/test_serve_model_builder_inference_component_happy.py
index 7191de4e7d..45513477c0 100644
--- a/tests/integ/sagemaker/serve/test_serve_model_builder_inference_component_happy.py
+++ b/tests/integ/sagemaker/serve/test_serve_model_builder_inference_component_happy.py
@@ -105,6 +105,11 @@ def test_model_builder_ic_sagemaker_endpoint(
             if caught_ex:
                 logger.exception(caught_ex)
                 cleanup_resources(sagemaker_session, [LLAMA_IC_NAME])
+                if "ResourceLimitExceeded" in str(caught_ex) or "CapacityError" in str(caught_ex):
+                    # Mark as xfail rather than hard-failing — ml.g5.24xlarge capacity is shared
+                    # across parallel CI runs and may be transiently exhausted. This is not a
+                    # code regression; the test should be retried when capacity is available.
+                    pytest.xfail(str(caught_ex))
                 assert False, f"{caught_ex} thrown when running mb-IC deployment test."
 
             cleanup_resources(sagemaker_session, [LLAMA_IC_NAME])