Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1080,6 +1080,28 @@ def inner(response_content, serialize_pydantic=False, request_headers=None):
return inner


@pytest.fixture
def get_rate_limit_model_response():
def inner(request_headers=None):
if request_headers is None:
request_headers = {}

model_request = HttpxRequest(
"POST",
"/responses",
headers=request_headers,
)

response = HttpxResponse(
429,
request=model_request,
)

return response

return inner


@pytest.fixture
def streaming_chat_completions_model_response():
return [
Expand Down
31 changes: 18 additions & 13 deletions tests/integrations/litellm/test_litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,9 @@ def test_embeddings_no_pii(
assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]


def test_exception_handling(sentry_init, capture_events):
def test_exception_handling(
reset_litellm_executor, sentry_init, capture_events, get_rate_limit_model_response
):
sentry_init(
integrations=[LiteLLMIntegration()],
traces_sample_rate=1.0,
Expand All @@ -474,19 +476,22 @@ def test_exception_handling(sentry_init, capture_events):

messages = [{"role": "user", "content": "Hello!"}]

with start_transaction(name="litellm test"):
kwargs = {
"model": "gpt-3.5-turbo",
"messages": messages,
}
client = OpenAI(api_key="z")
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OpenAI client retries 429 causing unnecessary test delay

Low Severity

OpenAI(api_key="z") uses the default max_retries=2, so when the mock returns a 429 response, the OpenAI client will automatically retry twice with exponential backoff (~1.5 seconds of sleeping) before finally raising RateLimitError. The old test called _failure_callback directly and had no such delay. Creating the client with max_retries=0 would avoid unnecessary retries and keep the test fast.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 179f14b. Configure here.


_input_callback(kwargs)
_failure_callback(
kwargs,
Exception("API rate limit reached"),
datetime.now(),
datetime.now(),
)
model_response = get_rate_limit_model_response()

with mock.patch.object(
client.completions._client._client,
"send",
return_value=model_response,
):
with start_transaction(name="litellm test"):
with pytest.raises(litellm.RateLimitError):
litellm.completion(
model="gpt-3.5-turbo",
messages=messages,
client=client,
)

# Should have error event and transaction
assert len(events) >= 1
Expand Down
Loading