Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 90 additions & 10 deletions tests/unit/vertexai/genai/test_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,82 @@ def test_response_structure(self):
assert result.clusters[0].item_count == 3
assert result.clusters[1].cluster_id == "cluster-2"

def test_response_show_with_results(self, capsys):
def test_get_loss_analysis_html(self):
"""Tests that _get_loss_analysis_html generates valid HTML with data."""
from vertexai._genai import _evals_visualization
import json

data = {
"results": [
{
"config": {
"metric": "test_metric",
"candidate": "test-candidate",
},
"clusters": [
{
"cluster_id": "c1",
"taxonomy_entry": {
"l1_category": "Tool Calling",
"l2_category": "Missing Invocation",
"description": "Agent failed to call the tool.",
},
"item_count": 5,
"examples": [
{
"evaluation_result": {
"request": {
"prompt": {
"agent_data": {
"turns": [
{
"turn_index": 0,
"events": [
{
"author": "user",
"content": {
"parts": [
{
"text": "Find flights to Paris"
}
],
},
}
],
}
],
},
},
},
},
"failed_rubrics": [
{
"rubric_id": "tool_use",
"classification_rationale": "Did not invoke find_flights.",
}
],
}
],
},
],
}
]
}
html = _evals_visualization._get_loss_analysis_html(json.dumps(data))
assert "Loss Pattern Analysis" in html
assert "test_metric" not in html # data is Base64-encoded in the HTML
assert "<!DOCTYPE html>" in html
assert "extractScenarioPreview" in html
assert "example-scenario" in html
assert "DOMPurify" in html # uses DOMPurify for sanitization
assert "example-section-label" in html # labels for scenario/rubrics
assert "Analysis Summary" in html # summary heading

def test_display_loss_clusters_response_no_ipython(self):
"""Tests graceful fallback when not in IPython."""
from vertexai._genai import _evals_visualization
from unittest import mock

response = common_types.GenerateLossClustersResponse(
results=[
common_types.LossAnalysisResult(
Expand All @@ -541,12 +616,17 @@ def test_response_show_with_results(self, capsys):
)
],
)
response.show()
captured = capsys.readouterr()
assert "test_metric" in captured.out
assert "c1" in captured.out
with mock.patch.object(
_evals_visualization, "_is_ipython_env", return_value=False
):
# Should not raise, just log a warning
response.show()

def test_display_loss_analysis_result_no_ipython(self):
"""Tests graceful fallback for individual result when not in IPython."""
from vertexai._genai import _evals_visualization
from unittest import mock

def test_loss_analysis_result_show(self, capsys):
result = common_types.LossAnalysisResult(
config=common_types.LossAnalysisConfig(
metric="test_metric",
Expand All @@ -563,10 +643,10 @@ def test_loss_analysis_result_show(self, capsys):
),
],
)
result.show()
captured = capsys.readouterr()
assert "test_metric" in captured.out
assert "c1" in captured.out
with mock.patch.object(
_evals_visualization, "_is_ipython_env", return_value=False
):
result.show()


def _make_eval_result(
Expand Down
Loading
Loading