diff --git a/src/strands/tools/mcp/mcp_client.py b/src/strands/tools/mcp/mcp_client.py index 51a627c7c..fa89d9123 100644 --- a/src/strands/tools/mcp/mcp_client.py +++ b/src/strands/tools/mcp/mcp_client.py @@ -114,6 +114,7 @@ def __init__( transport_callable: Callable[[], MCPTransport], *, startup_timeout: int = 30, + cleanup_timeout: float = 5.0, tool_filters: ToolFilters | None = None, prefix: str | None = None, elicitation_callback: ElicitationFnT | None = None, @@ -125,6 +126,8 @@ def __init__( transport_callable: A callable that returns an MCPTransport (read_stream, write_stream) tuple. startup_timeout: Timeout after which MCP server initialization should be cancelled. Defaults to 30. + cleanup_timeout: Maximum seconds to wait for the background thread to stop during cleanup. + Prevents hangs when the MCP server process is unresponsive. Defaults to 5.0. tool_filters: Optional filters to apply to tools. prefix: Optional prefix for tool names. elicitation_callback: Optional callback function to handle elicitation requests from the MCP server. @@ -133,6 +136,7 @@ def __init__( See TasksConfig for details. This feature is experimental and subject to change. """ self._startup_timeout = startup_timeout + self._cleanup_timeout = cleanup_timeout self._tool_filters = tool_filters self._prefix = prefix self._elicitation_callback = elicitation_callback @@ -349,12 +353,22 @@ async def _set_close_event() -> None: if self._close_future and not self._close_future.done(): self._close_future.set_result(None) - # Not calling _invoke_on_background_thread since the session does not need to exist - # we only need the thread and event loop to exist. - asyncio.run_coroutine_threadsafe(coro=_set_close_event(), loop=self._background_thread_event_loop) + try: + # Not calling _invoke_on_background_thread since the session does not need to exist + # we only need the thread and event loop to exist. + asyncio.run_coroutine_threadsafe(coro=_set_close_event(), loop=self._background_thread_event_loop) + except RuntimeError: + # Event loop may already be closed (e.g., during interpreter shutdown) + logger.debug("event loop already closed, cannot signal background thread") self._log_debug_with_thread("waiting for background thread to join") - self._background_thread.join() + self._background_thread.join(timeout=self._cleanup_timeout) + if self._background_thread.is_alive(): + logger.warning( + "background thread did not stop within %.1f seconds; " + "proceeding without waiting to avoid hanging the process", + self._cleanup_timeout, + ) if self._background_thread_event_loop is not None: self._background_thread_event_loop.close() diff --git a/tests/strands/tools/mcp/test_mcp_client.py b/tests/strands/tools/mcp/test_mcp_client.py index 5eedd1e33..4b7905658 100644 --- a/tests/strands/tools/mcp/test_mcp_client.py +++ b/tests/strands/tools/mcp/test_mcp_client.py @@ -516,7 +516,7 @@ def test_stop_with_background_thread_but_no_event_loop(): client.stop(None, None, None) # Verify thread was joined - mock_thread.join.assert_called_once() + mock_thread.join.assert_called_once_with(timeout=client._cleanup_timeout) # Verify cleanup occurred assert client._background_thread is None @@ -539,7 +539,7 @@ def test_stop_closes_event_loop(): client.stop(None, None, None) # Verify thread was joined - mock_thread.join.assert_called_once() + mock_thread.join.assert_called_once_with(timeout=client._cleanup_timeout) # Verify event loop was closed mock_event_loop.close.assert_called_once() @@ -549,7 +549,44 @@ def test_stop_closes_event_loop(): assert client._background_thread_event_loop is None -def test_mcp_client_state_reset_after_timeout(): +def test_stop_does_not_hang_when_background_thread_unresponsive(): + """Test that stop() does not hang indefinitely when the background thread fails to stop. + + Regression test for https://github.com/strands-agents/sdk-python/issues/1732. + """ + client = MCPClient(MagicMock(), cleanup_timeout=0.1) + + mock_thread = MagicMock() + # Simulate a thread that never stops + mock_thread.is_alive.return_value = True + client._background_thread = mock_thread + client._background_thread_event_loop = None + + client.stop(None, None, None) + + mock_thread.join.assert_called_once_with(timeout=0.1) + # Verify cleanup still occurred despite thread being alive + assert client._background_thread is None + + +def test_stop_handles_closed_event_loop(): + """Test that stop() handles RuntimeError from a closed event loop gracefully.""" + client = MCPClient(MagicMock()) + + mock_thread = MagicMock() + mock_thread.is_alive.return_value = False + mock_event_loop = MagicMock() + mock_event_loop.close = MagicMock() + + client._background_thread = mock_thread + client._background_thread_event_loop = mock_event_loop + + with patch("asyncio.run_coroutine_threadsafe", side_effect=RuntimeError("Event loop is closed")): + # Should not raise — RuntimeError is caught + client.stop(None, None, None) + + mock_thread.join.assert_called_once_with(timeout=client._cleanup_timeout) + assert client._background_thread is None """Test that all client state is properly reset after timeout.""" def slow_transport():