From a7dfb234304e0c6ef8a1cdfe0a170ea0b389402a Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 10 Apr 2026 15:10:22 +0200 Subject: [PATCH 1/3] . --- tests/integrations/litellm/test_litellm.py | 31 +++++++++++++--------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 0196ff413f..33b69fe506 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -465,7 +465,9 @@ def test_embeddings_no_pii( assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] -def test_exception_handling(sentry_init, capture_events): +def test_exception_handling( + reset_litellm_executor, sentry_init, capture_events, get_rate_limit_model_response +): sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, @@ -474,19 +476,22 @@ def test_exception_handling(sentry_init, capture_events): messages = [{"role": "user", "content": "Hello!"}] - with start_transaction(name="litellm test"): - kwargs = { - "model": "gpt-3.5-turbo", - "messages": messages, - } + client = OpenAI(api_key="z") - _input_callback(kwargs) - _failure_callback( - kwargs, - Exception("API rate limit reached"), - datetime.now(), - datetime.now(), - ) + model_response = get_rate_limit_model_response() + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + with pytest.raises(litellm.RateLimitError): + litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + ) # Should have error event and transaction assert len(events) >= 1 From c8d86a8bbe98beaf6801c6a884b569e311aeccdb Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 10 Apr 2026 15:14:17 +0200 Subject: [PATCH 2/3] add fixture --- tests/conftest.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 796cfaf310..b8327622f4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1080,6 +1080,28 @@ def inner(response_content, serialize_pydantic=False, request_headers=None): return inner +@pytest.fixture +def get_rate_limit_model_response(): + def inner(request_headers=None): + if request_headers is None: + request_headers = {} + + model_request = HttpxRequest( + "POST", + "/responses", + headers=request_headers, + ) + + response = HttpxResponse( + 429, + request=model_request, + ) + + return response + + return inner + + @pytest.fixture def streaming_chat_completions_model_response(): return [ From 54925ab48aa2fe0ef6a3037225812d4d9f773e86 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 10 Apr 2026 16:30:41 +0200 Subject: [PATCH 3/3] patch completions client instead of embeddings --- tests/integrations/litellm/test_litellm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 33b69fe506..b73f73cbbc 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -481,7 +481,7 @@ def test_exception_handling( model_response = get_rate_limit_model_response() with mock.patch.object( - client.embeddings._client._client, + client.completions._client._client, "send", return_value=model_response, ):