Skip to content

Commit 79fffdd

Browse files
jsondaicopybara-github
authored andcommitted
chore: GenAI Client(evals) - Improve retry budget, add jitter, and expand retryable errors
PiperOrigin-RevId: 896691317
1 parent 9e9dd70 commit 79fffdd

File tree

3 files changed

+245
-103
lines changed

3 files changed

+245
-103
lines changed

tests/unit/vertexai/genai/test_evals.py

Lines changed: 135 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6504,6 +6504,8 @@ def test_predefined_metric_retry_fail_on_resource_exhausted(
65046504
genai_errors.ClientError(code=429, response_json=error_response_json),
65056505
genai_errors.ClientError(code=429, response_json=error_response_json),
65066506
genai_errors.ClientError(code=429, response_json=error_response_json),
6507+
genai_errors.ClientError(code=429, response_json=error_response_json),
6508+
genai_errors.ClientError(code=429, response_json=error_response_json),
65076509
]
65086510

65096511
result = _evals_common._execute_evaluation(
@@ -6512,18 +6514,13 @@ def test_predefined_metric_retry_fail_on_resource_exhausted(
65126514
metrics=[metric],
65136515
)
65146516

6515-
assert mock_private_evaluate_instances.call_count == 3
6516-
assert mock_sleep.call_count == 2
6517+
assert mock_private_evaluate_instances.call_count == 5
6518+
assert mock_sleep.call_count == 4
65176519
assert len(result.summary_metrics) == 1
65186520
summary_metric = result.summary_metrics[0]
65196521
assert summary_metric.metric_name == "summarization_quality"
65206522
assert summary_metric.mean_score is None
65216523
assert summary_metric.num_cases_error == 1
6522-
assert (
6523-
"Judge model resource exhausted after 3 retries"
6524-
) in result.eval_case_results[0].response_candidate_results[0].metric_results[
6525-
"summarization_quality"
6526-
].error_message
65276524

65286525

65296526
class TestEvaluationDataset:
@@ -7094,3 +7091,134 @@ def test_rate_limiter_no_sleep_when_enough_time_passed(self):
70947091
elapsed = real_time.time() - start
70957092
# 5 calls at 1000 QPS should take ~0.005s, certainly under 1s
70967093
assert elapsed < 1.0
7094+
7095+
7096+
class TestCallWithRetry:
7097+
"""Tests for the shared _call_with_retry helper."""
7098+
7099+
@mock.patch("time.sleep", return_value=None)
7100+
def test_call_with_retry_success_on_first_try(self, mock_sleep):
7101+
"""Tests that _call_with_retry returns immediately on success."""
7102+
fn = mock.Mock(return_value="success")
7103+
result = _evals_metric_handlers._call_with_retry(fn, "test_metric")
7104+
assert result == "success"
7105+
assert fn.call_count == 1
7106+
assert mock_sleep.call_count == 0
7107+
7108+
@mock.patch("time.sleep", return_value=None)
7109+
def test_call_with_retry_success_after_retries(self, mock_sleep):
7110+
"""Tests that _call_with_retry succeeds after transient failures."""
7111+
error_json = {"error": {"code": 429, "message": "exhausted"}}
7112+
fn = mock.Mock(
7113+
side_effect=[
7114+
genai_errors.ClientError(code=429, response_json=error_json),
7115+
genai_errors.ClientError(code=429, response_json=error_json),
7116+
"success",
7117+
]
7118+
)
7119+
result = _evals_metric_handlers._call_with_retry(fn, "test_metric")
7120+
assert result == "success"
7121+
assert fn.call_count == 3
7122+
assert mock_sleep.call_count == 2
7123+
7124+
@mock.patch("time.sleep", return_value=None)
7125+
def test_call_with_retry_raises_after_max_retries(self, mock_sleep):
7126+
"""Tests that _call_with_retry raises after exhausting retries."""
7127+
error_json = {"error": {"code": 429, "message": "exhausted"}}
7128+
fn = mock.Mock(
7129+
side_effect=genai_errors.ClientError(code=429, response_json=error_json)
7130+
)
7131+
with pytest.raises(genai_errors.ClientError):
7132+
_evals_metric_handlers._call_with_retry(fn, "test_metric")
7133+
assert fn.call_count == 5 # _MAX_RETRIES
7134+
assert mock_sleep.call_count == 4
7135+
7136+
@mock.patch("time.sleep", return_value=None)
7137+
def test_call_with_retry_retries_on_server_error(self, mock_sleep):
7138+
"""Tests retry on 503 ServiceUnavailable (ServerError)."""
7139+
error_json = {"error": {"code": 503, "message": "unavailable"}}
7140+
fn = mock.Mock(
7141+
side_effect=[
7142+
genai_errors.ServerError(code=503, response_json=error_json),
7143+
"success",
7144+
]
7145+
)
7146+
result = _evals_metric_handlers._call_with_retry(fn, "test_metric")
7147+
assert result == "success"
7148+
assert fn.call_count == 2
7149+
7150+
@mock.patch("time.sleep", return_value=None)
7151+
def test_call_with_retry_no_retry_on_non_retryable(self, mock_sleep):
7152+
"""Tests that non-retryable errors are raised immediately."""
7153+
error_json = {"error": {"code": 400, "message": "bad request"}}
7154+
fn = mock.Mock(
7155+
side_effect=genai_errors.ClientError(code=400, response_json=error_json)
7156+
)
7157+
with pytest.raises(genai_errors.ClientError):
7158+
_evals_metric_handlers._call_with_retry(fn, "test_metric")
7159+
assert fn.call_count == 1
7160+
assert mock_sleep.call_count == 0
7161+
7162+
7163+
class TestComputationMetricRetry:
7164+
"""Tests for retry behavior in ComputationMetricHandler."""
7165+
7166+
@mock.patch.object(
7167+
_evals_metric_handlers.ComputationMetricHandler,
7168+
"SUPPORTED_COMPUTATION_METRICS",
7169+
frozenset(["bleu"]),
7170+
)
7171+
@mock.patch("time.sleep", return_value=None)
7172+
# fmt: off
7173+
@mock.patch(
7174+
"vertexai._genai.evals.Evals.evaluate_instances"
7175+
)
7176+
# fmt: on
7177+
def test_computation_metric_retry_on_resource_exhausted(
7178+
self,
7179+
mock_evaluate_instances,
7180+
mock_sleep,
7181+
mock_api_client_fixture,
7182+
):
7183+
"""Tests that ComputationMetricHandler retries on 429."""
7184+
dataset_df = pd.DataFrame(
7185+
[
7186+
{
7187+
"prompt": "Test prompt",
7188+
"response": "Test response",
7189+
"reference": "Test reference",
7190+
}
7191+
]
7192+
)
7193+
input_dataset = vertexai_genai_types.EvaluationDataset(
7194+
eval_dataset_df=dataset_df
7195+
)
7196+
metric = vertexai_genai_types.Metric(name="bleu")
7197+
error_response_json = {
7198+
"error": {
7199+
"code": 429,
7200+
"message": "Resource exhausted.",
7201+
"status": "RESOURCE_EXHAUSTED",
7202+
}
7203+
}
7204+
mock_bleu_result = mock.MagicMock()
7205+
mock_bleu_result.model_dump.return_value = {
7206+
"bleu_results": {"bleu_metric_values": [{"score": 0.85}]}
7207+
}
7208+
mock_evaluate_instances.side_effect = [
7209+
genai_errors.ClientError(code=429, response_json=error_response_json),
7210+
genai_errors.ClientError(code=429, response_json=error_response_json),
7211+
mock_bleu_result,
7212+
]
7213+
7214+
result = _evals_common._execute_evaluation(
7215+
api_client=mock_api_client_fixture,
7216+
dataset=input_dataset,
7217+
metrics=[metric],
7218+
)
7219+
7220+
assert mock_evaluate_instances.call_count == 3
7221+
assert mock_sleep.call_count == 2
7222+
summary_metric = result.summary_metrics[0]
7223+
assert summary_metric.metric_name == "bleu"
7224+
assert summary_metric.mean_score == 0.85

0 commit comments

Comments
 (0)