Skip to content

Commit c6c76b1

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Client(evals) - auto-infer metric/candidate and validate inputs for generate_loss_clusters
PiperOrigin-RevId: 894079615
1 parent 2f2a211 commit c6c76b1

File tree

7 files changed

+1198
-15
lines changed

7 files changed

+1198
-15
lines changed
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
# pylint: disable=protected-access,bad-continuation,missing-function-docstring
16+
17+
from tests.unit.vertexai.genai.replays import pytest_helper
18+
from vertexai._genai import types
19+
import pytest
20+
21+
22+
def _make_eval_result():
23+
"""Creates an EvaluationResult with representative data for loss analysis."""
24+
return types.EvaluationResult(
25+
eval_case_results=[
26+
types.EvalCaseResult(
27+
eval_case_index=0,
28+
response_candidate_results=[
29+
types.ResponseCandidateResult(
30+
response_index=0,
31+
metric_results={
32+
"multi_turn_task_success_v1": types.EvalCaseMetricResult(
33+
score=0.0,
34+
explanation="Failed tool invocation",
35+
)
36+
},
37+
)
38+
],
39+
)
40+
],
41+
evaluation_dataset=[
42+
types.EvaluationDataset(
43+
eval_cases=[
44+
types.EvalCase(
45+
agent_data=types.evals.AgentData(
46+
agents={
47+
"travel-agent": types.evals.AgentConfig(
48+
agent_id="travel-agent",
49+
agent_type="ToolUseAgent",
50+
description="A travel agent that can book flights.",
51+
)
52+
},
53+
turns=[
54+
types.evals.ConversationTurn(
55+
turn_index=0,
56+
events=[
57+
types.evals.AgentEvent(
58+
author="user",
59+
content={
60+
"parts": [
61+
{"text": "Book a flight to Paris."}
62+
]
63+
},
64+
),
65+
types.evals.AgentEvent(
66+
author="travel-agent",
67+
content={
68+
"parts": [
69+
{"text": "I can help with that."}
70+
]
71+
},
72+
),
73+
],
74+
)
75+
],
76+
)
77+
)
78+
]
79+
)
80+
],
81+
metadata=types.EvaluationRunMetadata(candidate_names=["travel-agent"]),
82+
)
83+
84+
85+
def test_gen_loss_clusters(client):
86+
"""Tests that generate_loss_clusters() returns GenerateLossClustersResponse."""
87+
eval_result = _make_eval_result()
88+
response = client.evals.generate_loss_clusters(
89+
eval_result=eval_result,
90+
config=types.LossAnalysisConfig(
91+
metric="multi_turn_task_success_v1",
92+
candidate="travel-agent",
93+
),
94+
)
95+
assert isinstance(response, types.GenerateLossClustersResponse)
96+
assert len(response.results) >= 1
97+
result = response.results[0]
98+
assert result.config.metric == "multi_turn_task_success_v1"
99+
assert result.config.candidate == "travel-agent"
100+
assert len(result.clusters) >= 1
101+
for cluster in result.clusters:
102+
assert cluster.cluster_id is not None
103+
assert cluster.taxonomy_entry is not None
104+
assert cluster.taxonomy_entry.l1_category is not None
105+
106+
107+
pytest_plugins = ("pytest_asyncio",)
108+
109+
110+
@pytest.mark.asyncio
111+
async def test_gen_loss_clusters_async(client):
112+
"""Tests that generate_loss_clusters() async returns GenerateLossClustersResponse."""
113+
eval_result = _make_eval_result()
114+
response = await client.aio.evals.generate_loss_clusters(
115+
eval_result=eval_result,
116+
config=types.LossAnalysisConfig(
117+
metric="multi_turn_task_success_v1",
118+
candidate="travel-agent",
119+
),
120+
)
121+
assert isinstance(response, types.GenerateLossClustersResponse)
122+
assert len(response.results) >= 1
123+
result = response.results[0]
124+
assert result.config.metric == "multi_turn_task_success_v1"
125+
assert len(result.clusters) >= 1
126+
127+
128+
pytestmark = pytest_helper.setup(
129+
file=__file__,
130+
globals_for_file=globals(),
131+
test_method="evals.generate_loss_clusters",
132+
)

0 commit comments

Comments
 (0)