From a7dfb234304e0c6ef8a1cdfe0a170ea0b389402a Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 10 Apr 2026 15:10:22 +0200
Subject: [PATCH 1/3] .

---
 tests/integrations/litellm/test_litellm.py | 31 +++++++++++++---------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index 0196ff413f..33b69fe506 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -465,7 +465,9 @@ def test_embeddings_no_pii(
         assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
 
 
-def test_exception_handling(sentry_init, capture_events):
+def test_exception_handling(
+    reset_litellm_executor, sentry_init, capture_events, get_rate_limit_model_response
+):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
@@ -474,19 +476,22 @@ def test_exception_handling(sentry_init, capture_events):
 
     messages = [{"role": "user", "content": "Hello!"}]
 
-    with start_transaction(name="litellm test"):
-        kwargs = {
-            "model": "gpt-3.5-turbo",
-            "messages": messages,
-        }
+    client = OpenAI(api_key="z")
 
-        _input_callback(kwargs)
-        _failure_callback(
-            kwargs,
-            Exception("API rate limit reached"),
-            datetime.now(),
-            datetime.now(),
-        )
+    model_response = get_rate_limit_model_response()
+
+    with mock.patch.object(
+        client.embeddings._client._client,
+        "send",
+        return_value=model_response,
+    ):
+        with start_transaction(name="litellm test"):
+            with pytest.raises(litellm.RateLimitError):
+                litellm.completion(
+                    model="gpt-3.5-turbo",
+                    messages=messages,
+                    client=client,
+                )
 
     # Should have error event and transaction
     assert len(events) >= 1

From c8d86a8bbe98beaf6801c6a884b569e311aeccdb Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 10 Apr 2026 15:14:17 +0200
Subject: [PATCH 2/3] add fixture

---
 tests/conftest.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tests/conftest.py b/tests/conftest.py
index 796cfaf310..b8327622f4 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1080,6 +1080,28 @@ def inner(response_content, serialize_pydantic=False, request_headers=None):
     return inner
 
 
+@pytest.fixture
+def get_rate_limit_model_response():
+    def inner(request_headers=None):
+        if request_headers is None:
+            request_headers = {}
+
+        model_request = HttpxRequest(
+            "POST",
+            "/responses",
+            headers=request_headers,
+        )
+
+        response = HttpxResponse(
+            429,
+            request=model_request,
+        )
+
+        return response
+
+    return inner
+
+
 @pytest.fixture
 def streaming_chat_completions_model_response():
     return [

From 54925ab48aa2fe0ef6a3037225812d4d9f773e86 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 10 Apr 2026 16:30:41 +0200
Subject: [PATCH 3/3] patch completions client instead of embeddings

---
 tests/integrations/litellm/test_litellm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index 33b69fe506..b73f73cbbc 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -481,7 +481,7 @@ def test_exception_handling(
     model_response = get_rate_limit_model_response()
 
     with mock.patch.object(
-        client.embeddings._client._client,
+        client.completions._client._client,
         "send",
         return_value=model_response,
     ):