From 9a918bafcb9b51af4d7497c715bb71ed503672a2 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Tue, 18 Nov 2025 13:58:55 -0500 Subject: [PATCH 01/29] initial commit --- .../use_cases/model_endpoint_use_cases.py | 18 +++++ .../live_model_endpoint_infra_gateway.py | 21 ++++++ .../k8s_endpoint_resource_delegate.py | 70 +++++++++++++++++++ .../services/live_model_endpoint_service.py | 63 +++++++++++++++++ 4 files changed, 172 insertions(+) diff --git a/model-engine/model_engine_server/domain/use_cases/model_endpoint_use_cases.py b/model-engine/model_engine_server/domain/use_cases/model_endpoint_use_cases.py index ea8466430..eeb7910cb 100644 --- a/model-engine/model_engine_server/domain/use_cases/model_endpoint_use_cases.py +++ b/model-engine/model_engine_server/domain/use_cases/model_endpoint_use_cases.py @@ -603,6 +603,24 @@ async def execute(self, user: User, model_endpoint_id: str) -> DeleteModelEndpoi model_endpoint_id=model_endpoint_id ) if model_endpoint is None: + # Check for orphaned K8s resources + from model_engine_server.infra.services.live_model_endpoint_service import ( + LiveModelEndpointService, + ) + + if isinstance(self.model_endpoint_service, LiveModelEndpointService): + owner = await self.model_endpoint_service._cleanup_orphaned_k8s_resources( + model_endpoint_id + ) + if owner is not None: + # Verify authorization - user must match owner (created_by from K8s labels) + # Note: For team-based auth, we'd need to look up team_id from user_id, + # but for orphan cleanup, user_id match is sufficient + if user.user_id != owner and not user.is_privileged_user: + raise ObjectNotAuthorizedException + # Resources were cleaned up successfully + return DeleteModelEndpointV1Response(deleted=True) + # No orphaned resources found raise ObjectNotFoundException if not self.authz_module.check_access_write_owned_entity(user, model_endpoint): raise ObjectNotAuthorizedException diff --git a/model-engine/model_engine_server/infra/gateways/live_model_endpoint_infra_gateway.py b/model-engine/model_engine_server/infra/gateways/live_model_endpoint_infra_gateway.py index bca30e10a..4637fd57e 100644 --- a/model-engine/model_engine_server/infra/gateways/live_model_endpoint_infra_gateway.py +++ b/model-engine/model_engine_server/infra/gateways/live_model_endpoint_infra_gateway.py @@ -258,6 +258,27 @@ async def delete_model_endpoint_infra(self, model_endpoint_record: ModelEndpoint endpoint_type=endpoint_type, ) + async def delete_model_endpoint_infra_by_id( + self, endpoint_id: str, deployment_name: str, endpoint_type: ModelEndpointType + ) -> bool: + """ + Deletes model endpoint infrastructure when DB record doesn't exist (orphaned resources). + This method accepts minimal parameters extracted from K8s resources. + + Args: + endpoint_id: The endpoint ID + deployment_name: The deployment name (from K8s resource) + endpoint_type: The endpoint type (SYNC, STREAMING, or ASYNC) + + Returns: + True if resources were successfully deleted, False otherwise + """ + return await self.resource_gateway.delete_resources( + endpoint_id=endpoint_id, + deployment_name=deployment_name, + endpoint_type=endpoint_type, + ) + async def restart_model_endpoint_infra( self, model_endpoint_record: ModelEndpointRecord ) -> None: diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py index 45ab0d73e..475ce8ab6 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py @@ -1163,6 +1163,76 @@ async def _get_deployment(endpoint_id, deployment_name): raise return deployment_config + @staticmethod + async def _get_deployment_by_endpoint_id_label(endpoint_id: str) -> Optional[V1Deployment]: + """ + Gets a Deployment by querying K8s with endpoint_id label selector. + Used when DB record doesn't exist but K8s resources might (orphaned resources). + + Args: + endpoint_id: The endpoint_id to search for + + Returns: + The first deployment found with matching endpoint_id label, or None if not found + """ + apps_client = get_kubernetes_apps_client() + label_selector = f"endpoint_id={endpoint_id}" + try: + deployments = await apps_client.list_namespaced_deployment( + namespace=hmi_config.endpoint_namespace, + label_selector=label_selector, + ) + if deployments.items: + return deployments.items[0] + return None + except ApiException as e: + if e.status == 404: + return None + logger.exception(f"Error querying deployments by endpoint_id label {endpoint_id}") + raise + + @staticmethod + async def _determine_endpoint_type_from_k8s(endpoint_id: str) -> ModelEndpointType: + """ + Determines endpoint type by checking for HPA/KEDA (SYNC/STREAMING) vs ASYNC. + Defaults to STREAMING if unable to determine (common for MCPx endpoints). + + Args: + endpoint_id: The endpoint_id to check + + Returns: + The determined ModelEndpointType + """ + k8s_resource_group_name = _endpoint_id_to_k8s_resource_group_name(endpoint_id) + autoscaling_client = get_kubernetes_autoscaling_client() + custom_objects_client = get_kubernetes_custom_objects_client() + + # Check for HPA (indicates SYNC/STREAMING) + try: + await autoscaling_client.read_namespaced_horizontal_pod_autoscaler( + k8s_resource_group_name, hmi_config.endpoint_namespace + ) + return ModelEndpointType.STREAMING # Default to STREAMING for MCPx + except ApiException: + pass + + # Check for KEDA ScaledObject (indicates SYNC/STREAMING) + try: + await custom_objects_client.get_namespaced_custom_object( + group="keda.sh", + version="v1alpha1", + namespace=hmi_config.endpoint_namespace, + plural="scaledobjects", + name=k8s_resource_group_name, + ) + return ModelEndpointType.STREAMING # Default to STREAMING for MCPx + except ApiException: + pass + + # If no HPA/KEDA found, likely ASYNC + # But MCPx uses STREAMING, so default to that + return ModelEndpointType.STREAMING + @staticmethod async def _get_all_config_maps() -> ( List[kubernetes_asyncio.client.models.v1_config_map.V1ConfigMap] diff --git a/model-engine/model_engine_server/infra/services/live_model_endpoint_service.py b/model-engine/model_engine_server/infra/services/live_model_endpoint_service.py index 6c28f4990..c04e9863b 100644 --- a/model-engine/model_engine_server/infra/services/live_model_endpoint_service.py +++ b/model-engine/model_engine_server/infra/services/live_model_endpoint_service.py @@ -33,6 +33,9 @@ from model_engine_server.domain.services import ModelEndpointService from model_engine_server.domain.use_cases.model_endpoint_use_cases import MODEL_BUNDLE_CHANGED_KEY from model_engine_server.infra.gateways import ModelEndpointInfraGateway +from model_engine_server.infra.gateways.resources.k8s_endpoint_resource_delegate import ( + K8SEndpointResourceDelegate, +) from model_engine_server.infra.repositories import ModelEndpointCacheRepository from model_engine_server.infra.repositories.model_endpoint_record_repository import ( ModelEndpointRecordRepository, @@ -407,6 +410,66 @@ async def delete_model_endpoint(self, model_endpoint_id: str) -> None: logger.info(f"Endpoint delete released lock for {created_by}, {name}") + async def _cleanup_orphaned_k8s_resources(self, endpoint_id: str) -> Optional[str]: + """ + Cleans up orphaned K8s resources when DB record doesn't exist. + Returns the owner (created_by) from K8s labels if resources were found, None otherwise. + + Args: + endpoint_id: The endpoint_id to check for orphaned resources + + Returns: + The owner (created_by) from K8s labels if resources found, None otherwise + """ + try: + deployment = await K8SEndpointResourceDelegate._get_deployment_by_endpoint_id_label( + endpoint_id + ) + if deployment is None: + return None + + # Extract owner and deployment name from K8s labels + labels = deployment.metadata.labels or {} + owner = labels.get("created_by") or labels.get("user_id") + deployment_name = deployment.metadata.name + + if not owner: + logger.warning( + f"Found orphaned K8s resources for endpoint_id {endpoint_id} but no owner label" + ) + return None + + # Determine endpoint type + endpoint_type = await K8SEndpointResourceDelegate._determine_endpoint_type_from_k8s( + endpoint_id + ) + + # Clean up resources + logger.info( + f"Cleaning up orphaned K8s resources for endpoint_id {endpoint_id}, " + f"deployment_name {deployment_name}, owner {owner}" + ) + deleted = await self.model_endpoint_infra_gateway.delete_model_endpoint_infra_by_id( + endpoint_id=endpoint_id, + deployment_name=deployment_name, + endpoint_type=endpoint_type, + ) + if deleted: + logger.info( + f"Successfully cleaned up orphaned K8s resources for endpoint_id {endpoint_id}" + ) + else: + logger.warning( + f"Failed to clean up some orphaned K8s resources for endpoint_id {endpoint_id}" + ) + + return owner + except Exception as e: + logger.exception( + f"Error cleaning up orphaned K8s resources for endpoint_id {endpoint_id}: {e}" + ) + return None + async def restart_model_endpoint(self, model_endpoint_id: str) -> None: record = await self.model_endpoint_record_repository.get_model_endpoint_record( model_endpoint_id=model_endpoint_id From 841b6c191ac344cd574bcd73db11a464105fb162 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Thu, 18 Dec 2025 18:00:36 -0500 Subject: [PATCH 02/29] fix: Add timeout configuration for MCP servers - Add 5-minute Istio VirtualService timeout for MCP servers - Add 10-minute aiohttp ClientSession timeout for MCP passthrough forwarders - Fixes 30-second timeout errors causing MCP request failures MCP servers often have long-running operations that exceed the default 30-second Istio timeout. This change: 1. Detects MCP servers by checking if forwarder_type is 'passthrough' and routes contain '/mcp' 2. Sets Istio VirtualService timeout to 5 minutes for MCP servers 3. Sets aiohttp ClientSession timeout to 10 minutes for MCP passthrough forwarders 4. Keeps default timeouts for non-MCP endpoints --- .../service_template_config_map.yaml | 1 + .../inference/forwarding/forwarding.py | 23 +++++++++++++++++-- .../gateways/resources/k8s_resource_types.py | 22 ++++++++++++++++++ 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/charts/model-engine/templates/service_template_config_map.yaml b/charts/model-engine/templates/service_template_config_map.yaml index 311553813..4a29bb371 100644 --- a/charts/model-engine/templates/service_template_config_map.yaml +++ b/charts/model-engine/templates/service_template_config_map.yaml @@ -938,6 +938,7 @@ data: host: "${RESOURCE_NAME}.${NAMESPACE}.svc.cluster.local" port: number: 80 + ${MCP_TIMEOUT} {{- end }} {{- if .Values.destinationrule.enabled }} destination-rule.yaml: |- diff --git a/model-engine/model_engine_server/inference/forwarding/forwarding.py b/model-engine/model_engine_server/inference/forwarding/forwarding.py index 5183955b9..b1753dc50 100644 --- a/model-engine/model_engine_server/inference/forwarding/forwarding.py +++ b/model-engine/model_engine_server/inference/forwarding/forwarding.py @@ -630,6 +630,23 @@ def endpoint(route: str) -> str: @dataclass class PassthroughForwarder(ModelEngineSerializationMixin): passthrough_endpoint: str + + # Default timeout: 5 minutes (matching aiohttp default) + # MCP servers need longer timeout: 10 minutes to handle long-running operations + DEFAULT_TIMEOUT_SECONDS = 5 * 60 + MCP_TIMEOUT_SECONDS = 10 * 60 + + def _is_mcp_server(self) -> bool: + """Detect if this is an MCP server by checking if endpoint contains /mcp""" + return "/mcp" in self.passthrough_endpoint.lower() + + def _get_timeout(self) -> aiohttp.ClientTimeout: + """Get appropriate timeout based on server type""" + timeout_seconds = ( + self.MCP_TIMEOUT_SECONDS if self._is_mcp_server() + else self.DEFAULT_TIMEOUT_SECONDS + ) + return aiohttp.ClientTimeout(total=timeout_seconds) async def _make_request( self, request: Any, aioclient: aiohttp.ClientSession @@ -656,7 +673,8 @@ async def _make_request( ) async def forward_stream(self, request: Any): - async with aiohttp.ClientSession() as aioclient: + timeout = self._get_timeout() + async with aiohttp.ClientSession(timeout=timeout) as aioclient: response = await self._make_request(request, aioclient) response_headers = response.headers yield (response_headers, response.status) @@ -670,7 +688,8 @@ async def forward_stream(self, request: Any): yield await response.read() async def forward_sync(self, request: Any): - async with aiohttp.ClientSession() as aioclient: + timeout = self._get_timeout() + async with aiohttp.ClientSession(timeout=timeout) as aioclient: response = await self._make_request(request, aioclient) return response diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 6f0d3133c..4bc23d3df 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -382,6 +382,7 @@ class VirtualServiceArguments(_BaseEndpointArguments): """Keyword-arguments for substituting into virtual-service templates.""" DNS_HOST_DOMAIN: str + MCP_TIMEOUT: str # Timeout for MCP servers (e.g., "300s" for 5 minutes, "" for default) class LwsServiceEntryArguments(_BaseEndpointArguments): @@ -1361,6 +1362,26 @@ def get_endpoint_resource_arguments_from_request( SERVICE_NAME_OVERRIDE=service_name_override, ) elif endpoint_resource_name == "virtual-service": + # Detect MCP servers by checking if they use passthrough forwarder and have /mcp routes + # MCP servers need longer timeout (5 minutes) to handle long-running operations + is_mcp_server = False + if isinstance(flavor, RunnableImageLike): + # Check if forwarder type is passthrough (MCP servers use passthrough) + if flavor.forwarder_type == "passthrough": + # Check if any routes contain /mcp + all_routes = [] + if flavor.predict_route: + all_routes.append(flavor.predict_route) + if flavor.routes: + all_routes.extend(flavor.routes) + if flavor.extra_routes: + all_routes.extend(flavor.extra_routes) + # MCP servers have routes containing /mcp + is_mcp_server = any("/mcp" in route.lower() for route in all_routes) + + # Format timeout as YAML: "timeout: 300s" for MCP servers, empty string for others + mcp_timeout = "timeout: 300s" if is_mcp_server else "" + return VirtualServiceArguments( # Base resource arguments RESOURCE_NAME=k8s_resource_group_name, @@ -1373,6 +1394,7 @@ def get_endpoint_resource_arguments_from_request( OWNER=owner, GIT_TAG=GIT_TAG, DNS_HOST_DOMAIN=infra_config().dns_host_domain, + MCP_TIMEOUT=mcp_timeout, ) elif endpoint_resource_name == "destination-rule": return DestinationRuleArguments( From 35b20514e65b32214388a476f1f5fab769336f57 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 19 Dec 2025 12:16:15 -0500 Subject: [PATCH 03/29] Remove orphaned K8s resource cleanup code Revert orphaned resource cleanup logic that was added for handling cases where DB records don't exist but K8s resources do. This simplifies the delete endpoint logic to just raise ObjectNotFoundException when the endpoint doesn't exist in the DB. --- .../use_cases/model_endpoint_use_cases.py | 18 ------ .../k8s_endpoint_resource_delegate.py | 28 --------- .../services/live_model_endpoint_service.py | 60 ------------------- 3 files changed, 106 deletions(-) diff --git a/model-engine/model_engine_server/domain/use_cases/model_endpoint_use_cases.py b/model-engine/model_engine_server/domain/use_cases/model_endpoint_use_cases.py index eeb7910cb..ea8466430 100644 --- a/model-engine/model_engine_server/domain/use_cases/model_endpoint_use_cases.py +++ b/model-engine/model_engine_server/domain/use_cases/model_endpoint_use_cases.py @@ -603,24 +603,6 @@ async def execute(self, user: User, model_endpoint_id: str) -> DeleteModelEndpoi model_endpoint_id=model_endpoint_id ) if model_endpoint is None: - # Check for orphaned K8s resources - from model_engine_server.infra.services.live_model_endpoint_service import ( - LiveModelEndpointService, - ) - - if isinstance(self.model_endpoint_service, LiveModelEndpointService): - owner = await self.model_endpoint_service._cleanup_orphaned_k8s_resources( - model_endpoint_id - ) - if owner is not None: - # Verify authorization - user must match owner (created_by from K8s labels) - # Note: For team-based auth, we'd need to look up team_id from user_id, - # but for orphan cleanup, user_id match is sufficient - if user.user_id != owner and not user.is_privileged_user: - raise ObjectNotAuthorizedException - # Resources were cleaned up successfully - return DeleteModelEndpointV1Response(deleted=True) - # No orphaned resources found raise ObjectNotFoundException if not self.authz_module.check_access_write_owned_entity(user, model_endpoint): raise ObjectNotAuthorizedException diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py index 475ce8ab6..6cf53e612 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py @@ -1163,34 +1163,6 @@ async def _get_deployment(endpoint_id, deployment_name): raise return deployment_config - @staticmethod - async def _get_deployment_by_endpoint_id_label(endpoint_id: str) -> Optional[V1Deployment]: - """ - Gets a Deployment by querying K8s with endpoint_id label selector. - Used when DB record doesn't exist but K8s resources might (orphaned resources). - - Args: - endpoint_id: The endpoint_id to search for - - Returns: - The first deployment found with matching endpoint_id label, or None if not found - """ - apps_client = get_kubernetes_apps_client() - label_selector = f"endpoint_id={endpoint_id}" - try: - deployments = await apps_client.list_namespaced_deployment( - namespace=hmi_config.endpoint_namespace, - label_selector=label_selector, - ) - if deployments.items: - return deployments.items[0] - return None - except ApiException as e: - if e.status == 404: - return None - logger.exception(f"Error querying deployments by endpoint_id label {endpoint_id}") - raise - @staticmethod async def _determine_endpoint_type_from_k8s(endpoint_id: str) -> ModelEndpointType: """ diff --git a/model-engine/model_engine_server/infra/services/live_model_endpoint_service.py b/model-engine/model_engine_server/infra/services/live_model_endpoint_service.py index c04e9863b..12c026738 100644 --- a/model-engine/model_engine_server/infra/services/live_model_endpoint_service.py +++ b/model-engine/model_engine_server/infra/services/live_model_endpoint_service.py @@ -410,66 +410,6 @@ async def delete_model_endpoint(self, model_endpoint_id: str) -> None: logger.info(f"Endpoint delete released lock for {created_by}, {name}") - async def _cleanup_orphaned_k8s_resources(self, endpoint_id: str) -> Optional[str]: - """ - Cleans up orphaned K8s resources when DB record doesn't exist. - Returns the owner (created_by) from K8s labels if resources were found, None otherwise. - - Args: - endpoint_id: The endpoint_id to check for orphaned resources - - Returns: - The owner (created_by) from K8s labels if resources found, None otherwise - """ - try: - deployment = await K8SEndpointResourceDelegate._get_deployment_by_endpoint_id_label( - endpoint_id - ) - if deployment is None: - return None - - # Extract owner and deployment name from K8s labels - labels = deployment.metadata.labels or {} - owner = labels.get("created_by") or labels.get("user_id") - deployment_name = deployment.metadata.name - - if not owner: - logger.warning( - f"Found orphaned K8s resources for endpoint_id {endpoint_id} but no owner label" - ) - return None - - # Determine endpoint type - endpoint_type = await K8SEndpointResourceDelegate._determine_endpoint_type_from_k8s( - endpoint_id - ) - - # Clean up resources - logger.info( - f"Cleaning up orphaned K8s resources for endpoint_id {endpoint_id}, " - f"deployment_name {deployment_name}, owner {owner}" - ) - deleted = await self.model_endpoint_infra_gateway.delete_model_endpoint_infra_by_id( - endpoint_id=endpoint_id, - deployment_name=deployment_name, - endpoint_type=endpoint_type, - ) - if deleted: - logger.info( - f"Successfully cleaned up orphaned K8s resources for endpoint_id {endpoint_id}" - ) - else: - logger.warning( - f"Failed to clean up some orphaned K8s resources for endpoint_id {endpoint_id}" - ) - - return owner - except Exception as e: - logger.exception( - f"Error cleaning up orphaned K8s resources for endpoint_id {endpoint_id}: {e}" - ) - return None - async def restart_model_endpoint(self, model_endpoint_id: str) -> None: record = await self.model_endpoint_record_repository.get_model_endpoint_record( model_endpoint_id=model_endpoint_id From 0ed67b8b081a0adf45313bad93e313dd5e5fd752 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 19 Dec 2025 12:19:40 -0500 Subject: [PATCH 04/29] Revert aiohttp timeout changes - only fix Istio VirtualService timeout --- .../inference/forwarding/forwarding.py | 23 ++----------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/model-engine/model_engine_server/inference/forwarding/forwarding.py b/model-engine/model_engine_server/inference/forwarding/forwarding.py index b1753dc50..5183955b9 100644 --- a/model-engine/model_engine_server/inference/forwarding/forwarding.py +++ b/model-engine/model_engine_server/inference/forwarding/forwarding.py @@ -630,23 +630,6 @@ def endpoint(route: str) -> str: @dataclass class PassthroughForwarder(ModelEngineSerializationMixin): passthrough_endpoint: str - - # Default timeout: 5 minutes (matching aiohttp default) - # MCP servers need longer timeout: 10 minutes to handle long-running operations - DEFAULT_TIMEOUT_SECONDS = 5 * 60 - MCP_TIMEOUT_SECONDS = 10 * 60 - - def _is_mcp_server(self) -> bool: - """Detect if this is an MCP server by checking if endpoint contains /mcp""" - return "/mcp" in self.passthrough_endpoint.lower() - - def _get_timeout(self) -> aiohttp.ClientTimeout: - """Get appropriate timeout based on server type""" - timeout_seconds = ( - self.MCP_TIMEOUT_SECONDS if self._is_mcp_server() - else self.DEFAULT_TIMEOUT_SECONDS - ) - return aiohttp.ClientTimeout(total=timeout_seconds) async def _make_request( self, request: Any, aioclient: aiohttp.ClientSession @@ -673,8 +656,7 @@ async def _make_request( ) async def forward_stream(self, request: Any): - timeout = self._get_timeout() - async with aiohttp.ClientSession(timeout=timeout) as aioclient: + async with aiohttp.ClientSession() as aioclient: response = await self._make_request(request, aioclient) response_headers = response.headers yield (response_headers, response.status) @@ -688,8 +670,7 @@ async def forward_stream(self, request: Any): yield await response.read() async def forward_sync(self, request: Any): - timeout = self._get_timeout() - async with aiohttp.ClientSession(timeout=timeout) as aioclient: + async with aiohttp.ClientSession() as aioclient: response = await self._make_request(request, aioclient) return response From 13739e61d5e4a17c35691cfab1a149aeb1493b43 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 19 Dec 2025 12:22:13 -0500 Subject: [PATCH 05/29] Simplify timeout logic - just check passthrough forwarder type --- .../gateways/resources/k8s_resource_types.py | 27 +++++-------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 4bc23d3df..0c8e6dca2 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -382,7 +382,7 @@ class VirtualServiceArguments(_BaseEndpointArguments): """Keyword-arguments for substituting into virtual-service templates.""" DNS_HOST_DOMAIN: str - MCP_TIMEOUT: str # Timeout for MCP servers (e.g., "300s" for 5 minutes, "" for default) + MCP_TIMEOUT: str # Timeout for passthrough forwarders (e.g., "300s" for 5 minutes, "" for default) class LwsServiceEntryArguments(_BaseEndpointArguments): @@ -1362,25 +1362,10 @@ def get_endpoint_resource_arguments_from_request( SERVICE_NAME_OVERRIDE=service_name_override, ) elif endpoint_resource_name == "virtual-service": - # Detect MCP servers by checking if they use passthrough forwarder and have /mcp routes - # MCP servers need longer timeout (5 minutes) to handle long-running operations - is_mcp_server = False - if isinstance(flavor, RunnableImageLike): - # Check if forwarder type is passthrough (MCP servers use passthrough) - if flavor.forwarder_type == "passthrough": - # Check if any routes contain /mcp - all_routes = [] - if flavor.predict_route: - all_routes.append(flavor.predict_route) - if flavor.routes: - all_routes.extend(flavor.routes) - if flavor.extra_routes: - all_routes.extend(flavor.extra_routes) - # MCP servers have routes containing /mcp - is_mcp_server = any("/mcp" in route.lower() for route in all_routes) - - # Format timeout as YAML: "timeout: 300s" for MCP servers, empty string for others - mcp_timeout = "timeout: 300s" if is_mcp_server else "" + # Set 5-minute timeout for passthrough forwarders (used by MCP servers) + # to fix 30-second default timeout issue + is_passthrough = isinstance(flavor, RunnableImageLike) and flavor.forwarder_type == "passthrough" + timeout = "timeout: 300s" if is_passthrough else "" return VirtualServiceArguments( # Base resource arguments @@ -1394,7 +1379,7 @@ def get_endpoint_resource_arguments_from_request( OWNER=owner, GIT_TAG=GIT_TAG, DNS_HOST_DOMAIN=infra_config().dns_host_domain, - MCP_TIMEOUT=mcp_timeout, + MCP_TIMEOUT=timeout, ) elif endpoint_resource_name == "destination-rule": return DestinationRuleArguments( From e28f72cdba67fe05d3e054f3bc4e1ccc477c88c2 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 19 Dec 2025 12:25:35 -0500 Subject: [PATCH 06/29] Remove unused orphaned cleanup methods - not needed for timeout fix --- .../live_model_endpoint_infra_gateway.py | 21 ---------- .../k8s_endpoint_resource_delegate.py | 42 ------------------- 2 files changed, 63 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/live_model_endpoint_infra_gateway.py b/model-engine/model_engine_server/infra/gateways/live_model_endpoint_infra_gateway.py index 4637fd57e..bca30e10a 100644 --- a/model-engine/model_engine_server/infra/gateways/live_model_endpoint_infra_gateway.py +++ b/model-engine/model_engine_server/infra/gateways/live_model_endpoint_infra_gateway.py @@ -258,27 +258,6 @@ async def delete_model_endpoint_infra(self, model_endpoint_record: ModelEndpoint endpoint_type=endpoint_type, ) - async def delete_model_endpoint_infra_by_id( - self, endpoint_id: str, deployment_name: str, endpoint_type: ModelEndpointType - ) -> bool: - """ - Deletes model endpoint infrastructure when DB record doesn't exist (orphaned resources). - This method accepts minimal parameters extracted from K8s resources. - - Args: - endpoint_id: The endpoint ID - deployment_name: The deployment name (from K8s resource) - endpoint_type: The endpoint type (SYNC, STREAMING, or ASYNC) - - Returns: - True if resources were successfully deleted, False otherwise - """ - return await self.resource_gateway.delete_resources( - endpoint_id=endpoint_id, - deployment_name=deployment_name, - endpoint_type=endpoint_type, - ) - async def restart_model_endpoint_infra( self, model_endpoint_record: ModelEndpointRecord ) -> None: diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py index 6cf53e612..45ab0d73e 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py @@ -1163,48 +1163,6 @@ async def _get_deployment(endpoint_id, deployment_name): raise return deployment_config - @staticmethod - async def _determine_endpoint_type_from_k8s(endpoint_id: str) -> ModelEndpointType: - """ - Determines endpoint type by checking for HPA/KEDA (SYNC/STREAMING) vs ASYNC. - Defaults to STREAMING if unable to determine (common for MCPx endpoints). - - Args: - endpoint_id: The endpoint_id to check - - Returns: - The determined ModelEndpointType - """ - k8s_resource_group_name = _endpoint_id_to_k8s_resource_group_name(endpoint_id) - autoscaling_client = get_kubernetes_autoscaling_client() - custom_objects_client = get_kubernetes_custom_objects_client() - - # Check for HPA (indicates SYNC/STREAMING) - try: - await autoscaling_client.read_namespaced_horizontal_pod_autoscaler( - k8s_resource_group_name, hmi_config.endpoint_namespace - ) - return ModelEndpointType.STREAMING # Default to STREAMING for MCPx - except ApiException: - pass - - # Check for KEDA ScaledObject (indicates SYNC/STREAMING) - try: - await custom_objects_client.get_namespaced_custom_object( - group="keda.sh", - version="v1alpha1", - namespace=hmi_config.endpoint_namespace, - plural="scaledobjects", - name=k8s_resource_group_name, - ) - return ModelEndpointType.STREAMING # Default to STREAMING for MCPx - except ApiException: - pass - - # If no HPA/KEDA found, likely ASYNC - # But MCPx uses STREAMING, so default to that - return ModelEndpointType.STREAMING - @staticmethod async def _get_all_config_maps() -> ( List[kubernetes_asyncio.client.models.v1_config_map.V1ConfigMap] From 59295c73577dbd614d9140fa667241e241c87940 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 19 Dec 2025 12:26:08 -0500 Subject: [PATCH 07/29] Add back /mcp route check - passthrough alone is not sufficient --- .../gateways/resources/k8s_resource_types.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 0c8e6dca2..204b9d1a0 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -1362,10 +1362,19 @@ def get_endpoint_resource_arguments_from_request( SERVICE_NAME_OVERRIDE=service_name_override, ) elif endpoint_resource_name == "virtual-service": - # Set 5-minute timeout for passthrough forwarders (used by MCP servers) - # to fix 30-second default timeout issue - is_passthrough = isinstance(flavor, RunnableImageLike) and flavor.forwarder_type == "passthrough" - timeout = "timeout: 300s" if is_passthrough else "" + # Set 5-minute timeout for MCP servers to fix 30-second default timeout issue + # MCP servers use passthrough forwarder and have routes containing /mcp + is_mcp_server = False + if isinstance(flavor, RunnableImageLike) and flavor.forwarder_type == "passthrough": + all_routes = [] + if flavor.predict_route: + all_routes.append(flavor.predict_route) + if flavor.routes: + all_routes.extend(flavor.routes) + if flavor.extra_routes: + all_routes.extend(flavor.extra_routes) + is_mcp_server = any("/mcp" in route.lower() for route in all_routes) + timeout = "timeout: 300s" if is_mcp_server else "" return VirtualServiceArguments( # Base resource arguments From 2b39d41d378a7aea096287b992ff656f28a7e056 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 19 Dec 2025 12:33:17 -0500 Subject: [PATCH 08/29] Update comment: clarify that empty string defaults to 30 seconds --- .../infra/gateways/resources/k8s_resource_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 204b9d1a0..447a8f127 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -382,7 +382,7 @@ class VirtualServiceArguments(_BaseEndpointArguments): """Keyword-arguments for substituting into virtual-service templates.""" DNS_HOST_DOMAIN: str - MCP_TIMEOUT: str # Timeout for passthrough forwarders (e.g., "300s" for 5 minutes, "" for default) + MCP_TIMEOUT: str # Timeout for passthrough forwarders (e.g., "300s" for 5 minutes, "" defaults to 30 seconds) class LwsServiceEntryArguments(_BaseEndpointArguments): From 6e01fb994a2e3b599d9639c874285f7fd1018cf6 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 19 Dec 2025 12:34:10 -0500 Subject: [PATCH 09/29] Update comment format --- .../infra/gateways/resources/k8s_resource_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 447a8f127..354de3d20 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -382,7 +382,7 @@ class VirtualServiceArguments(_BaseEndpointArguments): """Keyword-arguments for substituting into virtual-service templates.""" DNS_HOST_DOMAIN: str - MCP_TIMEOUT: str # Timeout for passthrough forwarders (e.g., "300s" for 5 minutes, "" defaults to 30 seconds) + MCP_TIMEOUT: str # Timeout for passthrough forwarders (e.g., "300s" for 5 minutes). "" (Default) is 30 seconds class LwsServiceEntryArguments(_BaseEndpointArguments): From 7769c54b9972f37ebf59910e6b08630fc128db35 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 19 Dec 2025 12:34:36 -0500 Subject: [PATCH 10/29] Simplify comment to just state default timeout --- .../infra/gateways/resources/k8s_resource_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 354de3d20..a5fcf886d 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -382,7 +382,7 @@ class VirtualServiceArguments(_BaseEndpointArguments): """Keyword-arguments for substituting into virtual-service templates.""" DNS_HOST_DOMAIN: str - MCP_TIMEOUT: str # Timeout for passthrough forwarders (e.g., "300s" for 5 minutes). "" (Default) is 30 seconds + MCP_TIMEOUT: str # "" (Default) is 30 seconds class LwsServiceEntryArguments(_BaseEndpointArguments): From 8b436c07581ecd3095542e4482ae595d16644081 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 19 Dec 2025 13:57:23 -0500 Subject: [PATCH 11/29] Fix black formatting - split long line --- .../infra/gateways/resources/k8s_resource_types.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 11e2e7077..736161ef3 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -1365,7 +1365,10 @@ def get_endpoint_resource_arguments_from_request( # Set 5-minute timeout for MCP servers to fix 30-second default timeout issue # MCP servers use passthrough forwarder and have routes containing /mcp is_mcp_server = False - if isinstance(flavor, RunnableImageLike) and flavor.forwarder_type == "passthrough": + if ( + isinstance(flavor, RunnableImageLike) + and flavor.forwarder_type == "passthrough" + ): all_routes = [] if flavor.predict_route: all_routes.append(flavor.predict_route) From f93cce063a6db86e09fc810c2ccc448f44eb0e49 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 14:59:02 -0500 Subject: [PATCH 12/29] fix: Remove trailing whitespace in k8s_resource_types.py --- .../infra/gateways/resources/k8s_resource_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 736161ef3..ead91f7a6 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -1378,7 +1378,7 @@ def get_endpoint_resource_arguments_from_request( all_routes.extend(flavor.extra_routes) is_mcp_server = any("/mcp" in route.lower() for route in all_routes) timeout = "timeout: 300s" if is_mcp_server else "" - + return VirtualServiceArguments( # Base resource arguments RESOURCE_NAME=k8s_resource_group_name, From b892af9a0d63ceea490d85dfdca857d0ad6bc18d Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 15:07:08 -0500 Subject: [PATCH 13/29] fix: Add proper indentation to MCP_TIMEOUT in YAML template --- .../infra/gateways/resources/k8s_resource_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index ead91f7a6..d6cce29c1 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -1377,7 +1377,7 @@ def get_endpoint_resource_arguments_from_request( if flavor.extra_routes: all_routes.extend(flavor.extra_routes) is_mcp_server = any("/mcp" in route.lower() for route in all_routes) - timeout = "timeout: 300s" if is_mcp_server else "" + timeout = " timeout: 300s" if is_mcp_server else "" return VirtualServiceArguments( # Base resource arguments From 66f8e7707540ff638bb1194a41eac3d75d35cb51 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 15:18:57 -0500 Subject: [PATCH 14/29] fix: Add newline to MCP_TIMEOUT to ensure valid YAML formatting --- .../infra/gateways/resources/k8s_resource_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index d6cce29c1..5faae95a4 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -1377,7 +1377,7 @@ def get_endpoint_resource_arguments_from_request( if flavor.extra_routes: all_routes.extend(flavor.extra_routes) is_mcp_server = any("/mcp" in route.lower() for route in all_routes) - timeout = " timeout: 300s" if is_mcp_server else "" + timeout = " timeout: 300s\n" if is_mcp_server else "" return VirtualServiceArguments( # Base resource arguments From df554018ef20bb946768df2e1f64a94ee67154cf Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 15:22:22 -0500 Subject: [PATCH 15/29] fix: Use YAML comment for empty MCP_TIMEOUT to avoid invalid blank line --- .../infra/gateways/resources/k8s_resource_types.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 5faae95a4..8fdac9778 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -1377,7 +1377,8 @@ def get_endpoint_resource_arguments_from_request( if flavor.extra_routes: all_routes.extend(flavor.extra_routes) is_mcp_server = any("/mcp" in route.lower() for route in all_routes) - timeout = " timeout: 300s\n" if is_mcp_server else "" + # When empty, use a YAML comment to avoid invalid blank line + timeout = " timeout: 300s\n" if is_mcp_server else " # timeout: default (30s)\n" return VirtualServiceArguments( # Base resource arguments From 668eb3d03e35a1050b0d24b41a2fb066e90f5a2e Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 15:26:43 -0500 Subject: [PATCH 16/29] fix: Remove blank lines from YAML after template substitution to handle empty MCP_TIMEOUT --- .../infra/gateways/resources/k8s_endpoint_resource_delegate.py | 2 ++ .../infra/gateways/resources/k8s_resource_types.py | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py index 9c6de78c1..5a7207cfa 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py @@ -232,6 +232,8 @@ def load_k8s_yaml(key: str, substitution_kwargs: ResourceArguments) -> Dict[str, template_str = config_map_str["data"][key] yaml_str = Template(template_str).substitute(**substitution_kwargs) + # Remove blank lines that result from empty template substitutions (e.g., MCP_TIMEOUT) + yaml_str = "\n".join(line for line in yaml_str.split("\n") if line.strip() or not line) try: yaml_obj = yaml.safe_load(yaml_str) except: diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 8fdac9778..d6cce29c1 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -1377,8 +1377,7 @@ def get_endpoint_resource_arguments_from_request( if flavor.extra_routes: all_routes.extend(flavor.extra_routes) is_mcp_server = any("/mcp" in route.lower() for route in all_routes) - # When empty, use a YAML comment to avoid invalid blank line - timeout = " timeout: 300s\n" if is_mcp_server else " # timeout: default (30s)\n" + timeout = " timeout: 300s" if is_mcp_server else "" return VirtualServiceArguments( # Base resource arguments From 5bbe67f748e485f203db4c81003e2d8ab73dc703 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 15:26:51 -0500 Subject: [PATCH 17/29] fix: Improve blank line removal logic in YAML post-processing --- .../gateways/resources/k8s_endpoint_resource_delegate.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py index 5a7207cfa..1d6926f86 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py @@ -232,8 +232,9 @@ def load_k8s_yaml(key: str, substitution_kwargs: ResourceArguments) -> Dict[str, template_str = config_map_str["data"][key] yaml_str = Template(template_str).substitute(**substitution_kwargs) - # Remove blank lines that result from empty template substitutions (e.g., MCP_TIMEOUT) - yaml_str = "\n".join(line for line in yaml_str.split("\n") if line.strip() or not line) + # Remove blank lines (whitespace-only) that result from empty template substitutions (e.g., MCP_TIMEOUT) + # Keep lines with content and truly empty lines (for YAML structure), but remove whitespace-only lines + yaml_str = "\n".join(line for line in yaml_str.split("\n") if line.strip() or line == "") try: yaml_obj = yaml.safe_load(yaml_str) except: From 9619cb512944da29c483c8e3e67b14bc1ead8146 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 15:31:10 -0500 Subject: [PATCH 18/29] fix: Remove all whitespace-only lines from YAML to fix empty MCP_TIMEOUT issue --- .../gateways/resources/k8s_endpoint_resource_delegate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py index 1d6926f86..b1a089d82 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py @@ -233,8 +233,8 @@ def load_k8s_yaml(key: str, substitution_kwargs: ResourceArguments) -> Dict[str, yaml_str = Template(template_str).substitute(**substitution_kwargs) # Remove blank lines (whitespace-only) that result from empty template substitutions (e.g., MCP_TIMEOUT) - # Keep lines with content and truly empty lines (for YAML structure), but remove whitespace-only lines - yaml_str = "\n".join(line for line in yaml_str.split("\n") if line.strip() or line == "") + # Remove lines that are only whitespace (have indentation but no content) + yaml_str = "\n".join(line for line in yaml_str.split("\n") if line.strip()) try: yaml_obj = yaml.safe_load(yaml_str) except: From 9bc80c4d22bac68000319ac95615354df6b1685b Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 15:36:54 -0500 Subject: [PATCH 19/29] fix: Target specific blank line pattern for empty MCP_TIMEOUT instead of removing all whitespace lines --- .../resources/k8s_endpoint_resource_delegate.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py index b1a089d82..0face4547 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py @@ -232,9 +232,19 @@ def load_k8s_yaml(key: str, substitution_kwargs: ResourceArguments) -> Dict[str, template_str = config_map_str["data"][key] yaml_str = Template(template_str).substitute(**substitution_kwargs) - # Remove blank lines (whitespace-only) that result from empty template substitutions (e.g., MCP_TIMEOUT) - # Remove lines that are only whitespace (have indentation but no content) - yaml_str = "\n".join(line for line in yaml_str.split("\n") if line.strip()) + # Remove blank lines that result from empty template substitutions (e.g., MCP_TIMEOUT) + # Specifically target lines with 10 spaces (indentation level of MCP_TIMEOUT) and no content + lines = yaml_str.split("\n") + filtered_lines = [] + for i, line in enumerate(lines): + # Skip lines that are exactly 10 spaces (empty MCP_TIMEOUT substitution) + if line == " ": + continue + # Also skip lines that are whitespace-only at that indentation level + if line.startswith(" ") and not line[10:].strip(): + continue + filtered_lines.append(line) + yaml_str = "\n".join(filtered_lines) try: yaml_obj = yaml.safe_load(yaml_str) except: From a58ef4fe6513e9a2c30a4a048b04d6e3c88c2f20 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 15:42:41 -0500 Subject: [PATCH 20/29] fix: Use regex to remove blank lines from empty MCP_TIMEOUT substitution --- .../resources/k8s_endpoint_resource_delegate.py | 17 ++++------------- .../gateways/resources/k8s_resource_types.py | 4 +++- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py index 0face4547..d8579c4b0 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py @@ -232,19 +232,10 @@ def load_k8s_yaml(key: str, substitution_kwargs: ResourceArguments) -> Dict[str, template_str = config_map_str["data"][key] yaml_str = Template(template_str).substitute(**substitution_kwargs) - # Remove blank lines that result from empty template substitutions (e.g., MCP_TIMEOUT) - # Specifically target lines with 10 spaces (indentation level of MCP_TIMEOUT) and no content - lines = yaml_str.split("\n") - filtered_lines = [] - for i, line in enumerate(lines): - # Skip lines that are exactly 10 spaces (empty MCP_TIMEOUT substitution) - if line == " ": - continue - # Also skip lines that are whitespace-only at that indentation level - if line.startswith(" ") and not line[10:].strip(): - continue - filtered_lines.append(line) - yaml_str = "\n".join(filtered_lines) + # Remove blank lines that result from empty MCP_TIMEOUT substitution + # Pattern: line with exactly 10 spaces (indentation) followed by newline + import re + yaml_str = re.sub(r"^ $\n", "", yaml_str, flags=re.MULTILINE) try: yaml_obj = yaml.safe_load(yaml_str) except: diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index d6cce29c1..2fd4b4f3c 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -1377,7 +1377,9 @@ def get_endpoint_resource_arguments_from_request( if flavor.extra_routes: all_routes.extend(flavor.extra_routes) is_mcp_server = any("/mcp" in route.lower() for route in all_routes) - timeout = " timeout: 300s" if is_mcp_server else "" + # Format timeout with proper indentation and newline to ensure valid YAML + # When empty, use empty string which will be handled by template structure + timeout = " timeout: 300s\n" if is_mcp_server else "" return VirtualServiceArguments( # Base resource arguments From a811a5bffb77b44554dad14d17922aa6f6d6aef9 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 15:48:25 -0500 Subject: [PATCH 21/29] fix: Improve regex pattern to handle empty MCP_TIMEOUT lines with or without trailing newline --- .../infra/gateways/resources/k8s_endpoint_resource_delegate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py index d8579c4b0..34ce7d30d 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py @@ -235,7 +235,8 @@ def load_k8s_yaml(key: str, substitution_kwargs: ResourceArguments) -> Dict[str, # Remove blank lines that result from empty MCP_TIMEOUT substitution # Pattern: line with exactly 10 spaces (indentation) followed by newline import re - yaml_str = re.sub(r"^ $\n", "", yaml_str, flags=re.MULTILINE) + # Remove lines that are exactly 10 spaces (empty MCP_TIMEOUT) + yaml_str = re.sub(r"^ $\n?", "", yaml_str, flags=re.MULTILINE) try: yaml_obj = yaml.safe_load(yaml_str) except: From fea83d11cdbc5f22650811829631ad3ffb68348c Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 15:54:09 -0500 Subject: [PATCH 22/29] fix: Use simple line filtering to remove empty MCP_TIMEOUT lines --- .../gateways/resources/k8s_endpoint_resource_delegate.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py index 34ce7d30d..0d23e8cfe 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py @@ -1,5 +1,6 @@ import datetime import os +import re from string import Template from typing import Any, Dict, List, Optional, Tuple @@ -233,10 +234,10 @@ def load_k8s_yaml(key: str, substitution_kwargs: ResourceArguments) -> Dict[str, yaml_str = Template(template_str).substitute(**substitution_kwargs) # Remove blank lines that result from empty MCP_TIMEOUT substitution - # Pattern: line with exactly 10 spaces (indentation) followed by newline - import re - # Remove lines that are exactly 10 spaces (empty MCP_TIMEOUT) - yaml_str = re.sub(r"^ $\n?", "", yaml_str, flags=re.MULTILINE) + # Remove lines that are exactly 10 spaces (empty MCP_TIMEOUT at that indentation level) + lines = yaml_str.split("\n") + filtered_lines = [line for line in lines if line != " "] + yaml_str = "\n".join(filtered_lines) try: yaml_obj = yaml.safe_load(yaml_str) except: From 630dab6bff7f591b4a799b9b9a0631242b0db0fe Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 15:58:54 -0500 Subject: [PATCH 23/29] fix: Filter out whitespace-only lines from YAML template substitutions --- .../resources/k8s_endpoint_resource_delegate.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py index 0d23e8cfe..38b8c9127 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py @@ -233,10 +233,18 @@ def load_k8s_yaml(key: str, substitution_kwargs: ResourceArguments) -> Dict[str, template_str = config_map_str["data"][key] yaml_str = Template(template_str).substitute(**substitution_kwargs) - # Remove blank lines that result from empty MCP_TIMEOUT substitution - # Remove lines that are exactly 10 spaces (empty MCP_TIMEOUT at that indentation level) + # Remove blank lines that result from empty template substitutions (e.g., MCP_TIMEOUT) + # Specifically remove lines that are only whitespace (indentation but no content) + # This handles the case where MCP_TIMEOUT is empty and creates " " (10 spaces) lines = yaml_str.split("\n") - filtered_lines = [line for line in lines if line != " "] + filtered_lines = [] + for line in lines: + # Skip lines that are only whitespace (empty substitutions) + if line.strip(): + filtered_lines.append(line) + elif not line: # Keep truly empty lines (for YAML structure) + filtered_lines.append(line) + # Skip whitespace-only lines (empty substitutions) yaml_str = "\n".join(filtered_lines) try: yaml_obj = yaml.safe_load(yaml_str) From caa364c2d0d7e61877005c255997169252e10807 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 16:02:06 -0500 Subject: [PATCH 24/29] fix: Simplify blank line removal logic for empty template substitutions --- .../resources/k8s_endpoint_resource_delegate.py | 14 +++----------- .../infra/gateways/resources/k8s_resource_types.py | 6 +++--- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py index 38b8c9127..88a8370d6 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py @@ -234,21 +234,13 @@ def load_k8s_yaml(key: str, substitution_kwargs: ResourceArguments) -> Dict[str, yaml_str = Template(template_str).substitute(**substitution_kwargs) # Remove blank lines that result from empty template substitutions (e.g., MCP_TIMEOUT) - # Specifically remove lines that are only whitespace (indentation but no content) - # This handles the case where MCP_TIMEOUT is empty and creates " " (10 spaces) + # Remove lines that contain only whitespace (empty substitutions create lines with just indentation) lines = yaml_str.split("\n") - filtered_lines = [] - for line in lines: - # Skip lines that are only whitespace (empty substitutions) - if line.strip(): - filtered_lines.append(line) - elif not line: # Keep truly empty lines (for YAML structure) - filtered_lines.append(line) - # Skip whitespace-only lines (empty substitutions) + filtered_lines = [line for line in lines if line.strip() or line == ""] yaml_str = "\n".join(filtered_lines) try: yaml_obj = yaml.safe_load(yaml_str) - except: + except Exception as e: logger.exception("Could not load yaml string: %s", yaml_str) raise return yaml_obj diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 2fd4b4f3c..6fee31716 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -1377,9 +1377,9 @@ def get_endpoint_resource_arguments_from_request( if flavor.extra_routes: all_routes.extend(flavor.extra_routes) is_mcp_server = any("/mcp" in route.lower() for route in all_routes) - # Format timeout with proper indentation and newline to ensure valid YAML - # When empty, use empty string which will be handled by template structure - timeout = " timeout: 300s\n" if is_mcp_server else "" + # Format timeout with proper indentation + # When not MCP server, use empty string - the blank line will be removed in load_k8s_yaml + timeout = " timeout: 300s" if is_mcp_server else "" return VirtualServiceArguments( # Base resource arguments From 59e857430acc5510b6e03b3893c5e0510e8d6b6c Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 16:02:45 -0500 Subject: [PATCH 25/29] fix: Format timeout assignment to comply with Black formatting rules --- .../infra/gateways/resources/k8s_resource_types.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 6fee31716..e7297c05e 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -1379,7 +1379,9 @@ def get_endpoint_resource_arguments_from_request( is_mcp_server = any("/mcp" in route.lower() for route in all_routes) # Format timeout with proper indentation # When not MCP server, use empty string - the blank line will be removed in load_k8s_yaml - timeout = " timeout: 300s" if is_mcp_server else "" + timeout = ( + " timeout: 300s" if is_mcp_server else "" + ) return VirtualServiceArguments( # Base resource arguments From 7c02088ed5dbf80fdc46264ea3609616c1c9185e Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 16:08:19 -0500 Subject: [PATCH 26/29] fix: Format timeout assignment as if/else to comply with Black --- .../infra/gateways/resources/k8s_resource_types.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index e7297c05e..6ec04880a 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -1379,9 +1379,10 @@ def get_endpoint_resource_arguments_from_request( is_mcp_server = any("/mcp" in route.lower() for route in all_routes) # Format timeout with proper indentation # When not MCP server, use empty string - the blank line will be removed in load_k8s_yaml - timeout = ( - " timeout: 300s" if is_mcp_server else "" - ) + if is_mcp_server: + timeout = " timeout: 300s" + else: + timeout = "" return VirtualServiceArguments( # Base resource arguments From b6a6edd12dd9cef0bd44a4e837b6b370c5bb6974 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 16:09:14 -0500 Subject: [PATCH 27/29] fix: Apply Black formatting to k8s_resource_types.py --- .../infra/gateways/resources/k8s_resource_types.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index 6ec04880a..f298ffd14 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -1365,10 +1365,7 @@ def get_endpoint_resource_arguments_from_request( # Set 5-minute timeout for MCP servers to fix 30-second default timeout issue # MCP servers use passthrough forwarder and have routes containing /mcp is_mcp_server = False - if ( - isinstance(flavor, RunnableImageLike) - and flavor.forwarder_type == "passthrough" - ): + if isinstance(flavor, RunnableImageLike) and flavor.forwarder_type == "passthrough": all_routes = [] if flavor.predict_route: all_routes.append(flavor.predict_route) From a634ed79e97929507f3a4701d2d4c704e0f0cf04 Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 16:13:02 -0500 Subject: [PATCH 28/29] revert: Remove blank line removal logic - keep original MCP timeout functionality only --- .../gateways/resources/k8s_endpoint_resource_delegate.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py index 88a8370d6..355aca6e6 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_endpoint_resource_delegate.py @@ -233,14 +233,9 @@ def load_k8s_yaml(key: str, substitution_kwargs: ResourceArguments) -> Dict[str, template_str = config_map_str["data"][key] yaml_str = Template(template_str).substitute(**substitution_kwargs) - # Remove blank lines that result from empty template substitutions (e.g., MCP_TIMEOUT) - # Remove lines that contain only whitespace (empty substitutions create lines with just indentation) - lines = yaml_str.split("\n") - filtered_lines = [line for line in lines if line.strip() or line == ""] - yaml_str = "\n".join(filtered_lines) try: yaml_obj = yaml.safe_load(yaml_str) - except Exception as e: + except: logger.exception("Could not load yaml string: %s", yaml_str) raise return yaml_obj From 8c63c356b9dc2c140ed2b2ee372dd63c8ca9d88c Mon Sep 17 00:00:00 2001 From: Ashwin Ranade Date: Fri, 16 Jan 2026 16:13:11 -0500 Subject: [PATCH 29/29] docs: Update comment to remove reference to removed blank line removal logic --- .../infra/gateways/resources/k8s_resource_types.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py index f298ffd14..836a82124 100644 --- a/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py +++ b/model-engine/model_engine_server/infra/gateways/resources/k8s_resource_types.py @@ -1374,8 +1374,8 @@ def get_endpoint_resource_arguments_from_request( if flavor.extra_routes: all_routes.extend(flavor.extra_routes) is_mcp_server = any("/mcp" in route.lower() for route in all_routes) - # Format timeout with proper indentation - # When not MCP server, use empty string - the blank line will be removed in load_k8s_yaml + # Format timeout with proper indentation (10 spaces to match YAML structure) + # When not MCP server, use empty string if is_mcp_server: timeout = " timeout: 300s" else: