@@ -6504,6 +6504,8 @@ def test_predefined_metric_retry_fail_on_resource_exhausted(
65046504 genai_errors .ClientError (code = 429 , response_json = error_response_json ),
65056505 genai_errors .ClientError (code = 429 , response_json = error_response_json ),
65066506 genai_errors .ClientError (code = 429 , response_json = error_response_json ),
6507+ genai_errors .ClientError (code = 429 , response_json = error_response_json ),
6508+ genai_errors .ClientError (code = 429 , response_json = error_response_json ),
65076509 ]
65086510
65096511 result = _evals_common ._execute_evaluation (
@@ -6512,18 +6514,13 @@ def test_predefined_metric_retry_fail_on_resource_exhausted(
65126514 metrics = [metric ],
65136515 )
65146516
6515- assert mock_private_evaluate_instances .call_count == 3
6516- assert mock_sleep .call_count == 2
6517+ assert mock_private_evaluate_instances .call_count == 5
6518+ assert mock_sleep .call_count == 4
65176519 assert len (result .summary_metrics ) == 1
65186520 summary_metric = result .summary_metrics [0 ]
65196521 assert summary_metric .metric_name == "summarization_quality"
65206522 assert summary_metric .mean_score is None
65216523 assert summary_metric .num_cases_error == 1
6522- assert (
6523- "Judge model resource exhausted after 3 retries"
6524- ) in result .eval_case_results [0 ].response_candidate_results [0 ].metric_results [
6525- "summarization_quality"
6526- ].error_message
65276524
65286525
65296526class TestEvaluationDataset :
@@ -7094,3 +7091,134 @@ def test_rate_limiter_no_sleep_when_enough_time_passed(self):
70947091 elapsed = real_time .time () - start
70957092 # 5 calls at 1000 QPS should take ~0.005s, certainly under 1s
70967093 assert elapsed < 1.0
7094+
7095+
7096+ class TestCallWithRetry :
7097+ """Tests for the shared _call_with_retry helper."""
7098+
7099+ @mock .patch ("time.sleep" , return_value = None )
7100+ def test_call_with_retry_success_on_first_try (self , mock_sleep ):
7101+ """Tests that _call_with_retry returns immediately on success."""
7102+ fn = mock .Mock (return_value = "success" )
7103+ result = _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
7104+ assert result == "success"
7105+ assert fn .call_count == 1
7106+ assert mock_sleep .call_count == 0
7107+
7108+ @mock .patch ("time.sleep" , return_value = None )
7109+ def test_call_with_retry_success_after_retries (self , mock_sleep ):
7110+ """Tests that _call_with_retry succeeds after transient failures."""
7111+ error_json = {"error" : {"code" : 429 , "message" : "exhausted" }}
7112+ fn = mock .Mock (
7113+ side_effect = [
7114+ genai_errors .ClientError (code = 429 , response_json = error_json ),
7115+ genai_errors .ClientError (code = 429 , response_json = error_json ),
7116+ "success" ,
7117+ ]
7118+ )
7119+ result = _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
7120+ assert result == "success"
7121+ assert fn .call_count == 3
7122+ assert mock_sleep .call_count == 2
7123+
7124+ @mock .patch ("time.sleep" , return_value = None )
7125+ def test_call_with_retry_raises_after_max_retries (self , mock_sleep ):
7126+ """Tests that _call_with_retry raises after exhausting retries."""
7127+ error_json = {"error" : {"code" : 429 , "message" : "exhausted" }}
7128+ fn = mock .Mock (
7129+ side_effect = genai_errors .ClientError (code = 429 , response_json = error_json )
7130+ )
7131+ with pytest .raises (genai_errors .ClientError ):
7132+ _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
7133+ assert fn .call_count == 5 # _MAX_RETRIES
7134+ assert mock_sleep .call_count == 4
7135+
7136+ @mock .patch ("time.sleep" , return_value = None )
7137+ def test_call_with_retry_retries_on_server_error (self , mock_sleep ):
7138+ """Tests retry on 503 ServiceUnavailable (ServerError)."""
7139+ error_json = {"error" : {"code" : 503 , "message" : "unavailable" }}
7140+ fn = mock .Mock (
7141+ side_effect = [
7142+ genai_errors .ServerError (code = 503 , response_json = error_json ),
7143+ "success" ,
7144+ ]
7145+ )
7146+ result = _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
7147+ assert result == "success"
7148+ assert fn .call_count == 2
7149+
7150+ @mock .patch ("time.sleep" , return_value = None )
7151+ def test_call_with_retry_no_retry_on_non_retryable (self , mock_sleep ):
7152+ """Tests that non-retryable errors are raised immediately."""
7153+ error_json = {"error" : {"code" : 400 , "message" : "bad request" }}
7154+ fn = mock .Mock (
7155+ side_effect = genai_errors .ClientError (code = 400 , response_json = error_json )
7156+ )
7157+ with pytest .raises (genai_errors .ClientError ):
7158+ _evals_metric_handlers ._call_with_retry (fn , "test_metric" )
7159+ assert fn .call_count == 1
7160+ assert mock_sleep .call_count == 0
7161+
7162+
7163+ class TestComputationMetricRetry :
7164+ """Tests for retry behavior in ComputationMetricHandler."""
7165+
7166+ @mock .patch .object (
7167+ _evals_metric_handlers .ComputationMetricHandler ,
7168+ "SUPPORTED_COMPUTATION_METRICS" ,
7169+ frozenset (["bleu" ]),
7170+ )
7171+ @mock .patch ("time.sleep" , return_value = None )
7172+ # fmt: off
7173+ @mock .patch (
7174+ "vertexai._genai.evals.Evals.evaluate_instances"
7175+ )
7176+ # fmt: on
7177+ def test_computation_metric_retry_on_resource_exhausted (
7178+ self ,
7179+ mock_evaluate_instances ,
7180+ mock_sleep ,
7181+ mock_api_client_fixture ,
7182+ ):
7183+ """Tests that ComputationMetricHandler retries on 429."""
7184+ dataset_df = pd .DataFrame (
7185+ [
7186+ {
7187+ "prompt" : "Test prompt" ,
7188+ "response" : "Test response" ,
7189+ "reference" : "Test reference" ,
7190+ }
7191+ ]
7192+ )
7193+ input_dataset = vertexai_genai_types .EvaluationDataset (
7194+ eval_dataset_df = dataset_df
7195+ )
7196+ metric = vertexai_genai_types .Metric (name = "bleu" )
7197+ error_response_json = {
7198+ "error" : {
7199+ "code" : 429 ,
7200+ "message" : "Resource exhausted." ,
7201+ "status" : "RESOURCE_EXHAUSTED" ,
7202+ }
7203+ }
7204+ mock_bleu_result = mock .MagicMock ()
7205+ mock_bleu_result .model_dump .return_value = {
7206+ "bleu_results" : {"bleu_metric_values" : [{"score" : 0.85 }]}
7207+ }
7208+ mock_evaluate_instances .side_effect = [
7209+ genai_errors .ClientError (code = 429 , response_json = error_response_json ),
7210+ genai_errors .ClientError (code = 429 , response_json = error_response_json ),
7211+ mock_bleu_result ,
7212+ ]
7213+
7214+ result = _evals_common ._execute_evaluation (
7215+ api_client = mock_api_client_fixture ,
7216+ dataset = input_dataset ,
7217+ metrics = [metric ],
7218+ )
7219+
7220+ assert mock_evaluate_instances .call_count == 3
7221+ assert mock_sleep .call_count == 2
7222+ summary_metric = result .summary_metrics [0 ]
7223+ assert summary_metric .metric_name == "bleu"
7224+ assert summary_metric .mean_score == 0.85
0 commit comments