@@ -419,34 +419,181 @@ def _postprocess_user_scenarios_response(
419419def _display_loss_analysis_result (
420420 result : types .LossAnalysisResult ,
421421) -> None :
422- """Displays a LossAnalysisResult as a formatted pandas DataFrame."""
423- metric = result .config .metric if result .config else None
424- candidate = result .config .candidate if result .config else None
425- rows : list [dict [str , Any ]] = []
426- for cluster in result .clusters or []:
427- entry = cluster .taxonomy_entry
428- row = {
429- "metric" : metric ,
430- "candidate" : candidate ,
431- "cluster_id" : cluster .cluster_id ,
432- "l1_category" : entry .l1_category if entry else None ,
433- "l2_category" : entry .l2_category if entry else None ,
434- "description" : entry .description if entry else None ,
435- "item_count" : cluster .item_count ,
436- }
437- rows .append (row )
438-
439- if not rows :
440- logger .info ("No loss clusters found." )
441- return
442-
443- df = pd .DataFrame (rows )
444- try :
445- from IPython .display import display # pylint: disable=g-import-not-at-top
446-
447- display (df )
448- except ImportError :
449- print (df .to_string ()) # pylint: disable=print-function
422+ """Displays a LossAnalysisResult as a formatted pandas DataFrame."""
423+ metric = result .config .metric if result .config else None
424+ candidate = result .config .candidate if result .config else None
425+ rows : list [dict [str , Any ]] = []
426+ for cluster in result .clusters or []:
427+ entry = cluster .taxonomy_entry
428+ row = {
429+ "metric" : metric ,
430+ "candidate" : candidate ,
431+ "cluster_id" : cluster .cluster_id ,
432+ "l1_category" : entry .l1_category if entry else None ,
433+ "l2_category" : entry .l2_category if entry else None ,
434+ "description" : entry .description if entry else None ,
435+ "item_count" : cluster .item_count ,
436+ }
437+ rows .append (row )
438+
439+ if not rows :
440+ logger .info ("No loss clusters found." )
441+ return
442+
443+ df = pd .DataFrame (rows )
444+ try :
445+ from IPython .display import display # pylint: disable=g-import-not-at-top
446+
447+ display (df )
448+ except ImportError :
449+ print (df .to_string ()) # pylint: disable=print-function
450+
451+
452+ def _resolve_metric_name (
453+ metric : Optional [Any ],
454+ ) -> Optional [str ]:
455+ """Extracts a metric name string from a metric argument.
456+
457+ Accepts a string, a Metric object, or a LazyLoadedPrebuiltMetric
458+ (RubricMetric) and returns the metric name as a string.
459+
460+ For LazyLoadedPrebuiltMetric (e.g., RubricMetric.MULTI_TURN_TASK_SUCCESS),
461+ this resolves to the API metric spec name (e.g.,
462+ "multi_turn_task_success_v1") so it matches the keys in eval results.
463+
464+ Args:
465+ metric: A metric name string, Metric object, RubricMetric enum value, or
466+ None.
467+
468+ Returns:
469+ The metric name as a string, or None if metric is None.
470+ """
471+ if metric is None :
472+ return None
473+ if isinstance (metric , str ):
474+ return metric
475+ # LazyLoadedPrebuiltMetric: resolve to versioned API spec name.
476+ if hasattr (metric , "_get_api_metric_spec_name" ):
477+ spec_name : Optional [str ] = metric ._get_api_metric_spec_name ()
478+ if spec_name :
479+ return spec_name
480+ # Metric objects and other types with a .name attribute.
481+ if hasattr (metric , "name" ):
482+ return str (metric .name )
483+ return str (metric )
484+
485+
486+ def _resolve_loss_analysis_config (
487+ eval_result : types .EvaluationResult ,
488+ config : Optional [types .LossAnalysisConfig ] = None ,
489+ metric : Optional [str ] = None ,
490+ candidate : Optional [str ] = None ,
491+ ) -> types .LossAnalysisConfig :
492+ """Resolves and validates the LossAnalysisConfig for generate_loss_clusters.
493+
494+ Auto-infers `metric` and `candidate` from the EvaluationResult when not
495+ explicitly provided. Validates that provided values exist in the eval result.
496+
497+ Args:
498+ eval_result: The EvaluationResult from client.evals.evaluate().
499+ config: Optional explicit LossAnalysisConfig. If provided, metric and
500+ candidate from config take precedence over the separate arguments.
501+ metric: Optional metric name override.
502+ candidate: Optional candidate name override.
503+
504+ Returns:
505+ A resolved LossAnalysisConfig with metric and candidate populated.
506+
507+ Raises:
508+ ValueError: If metric/candidate cannot be inferred or are invalid.
509+ """
510+ # Start from config if provided, otherwise create a new one.
511+ if config is not None :
512+ resolved_metric = metric or config .metric
513+ resolved_candidate = candidate or config .candidate
514+ resolved_config = config .model_copy (
515+ update = {"metric" : resolved_metric , "candidate" : resolved_candidate }
516+ )
517+ else :
518+ resolved_config = types .LossAnalysisConfig (
519+ metric = metric , candidate = candidate
520+ )
521+
522+ # Collect available metric names from the eval result.
523+ available_metrics : set [str ] = set ()
524+ if eval_result .eval_case_results :
525+ for case_result in eval_result .eval_case_results :
526+ for resp_cand in case_result .response_candidate_results or []:
527+ for m_name in (resp_cand .metric_results or {}).keys ():
528+ available_metrics .add (m_name )
529+
530+ # Collect available candidate names from metadata.
531+ available_candidates : list [str ] = []
532+ if eval_result .metadata and eval_result .metadata .candidate_names :
533+ available_candidates = list (eval_result .metadata .candidate_names )
534+
535+ # Auto-infer metric if not provided.
536+ if not resolved_config .metric :
537+ if len (available_metrics ) == 1 :
538+ resolved_config = resolved_config .model_copy (
539+ update = {"metric" : next (iter (available_metrics ))}
540+ )
541+ elif len (available_metrics ) == 0 :
542+ raise ValueError (
543+ "Cannot infer metric: no metric results found in eval_result."
544+ " Please provide metric explicitly via"
545+ " config=types.LossAnalysisConfig(metric='...')."
546+ )
547+ else :
548+ raise ValueError (
549+ "Cannot infer metric: multiple metrics found in eval_result:"
550+ f" { sorted (available_metrics )} . Please provide metric"
551+ " explicitly via config=types.LossAnalysisConfig(metric='...')."
552+ )
553+
554+ # Validate metric if provided explicitly.
555+ if available_metrics and resolved_config .metric not in available_metrics :
556+ raise ValueError (
557+ f"Metric '{ resolved_config .metric } ' not found in eval_result."
558+ f" Available metrics: { sorted (available_metrics )} ."
559+ )
560+
561+ # Auto-infer candidate if not provided.
562+ if not resolved_config .candidate :
563+ if len (available_candidates ) == 1 :
564+ resolved_config = resolved_config .model_copy (
565+ update = {"candidate" : available_candidates [0 ]}
566+ )
567+ elif len (available_candidates ) == 0 :
568+ # Fallback: use default candidate naming convention from SDK.
569+ resolved_config = resolved_config .model_copy (
570+ update = {"candidate" : "candidate_1" }
571+ )
572+ logger .warning (
573+ "No candidate names found in eval_result.metadata."
574+ " Defaulting to 'candidate_1'. If this is incorrect, provide"
575+ " candidate explicitly via"
576+ " config=types.LossAnalysisConfig(candidate='...')."
577+ )
578+ else :
579+ raise ValueError (
580+ "Cannot infer candidate: multiple candidates found in"
581+ f" eval_result: { available_candidates } . Please provide"
582+ " candidate explicitly via"
583+ " config=types.LossAnalysisConfig(candidate='...')."
584+ )
585+
586+ # Validate candidate if provided explicitly and candidates are known.
587+ if (
588+ available_candidates
589+ and resolved_config .candidate not in available_candidates
590+ ):
591+ raise ValueError (
592+ f"Candidate '{ resolved_config .candidate } ' not found in"
593+ f" eval_result. Available candidates: { available_candidates } ."
594+ )
595+
596+ return resolved_config
450597
451598
452599def _poll_operation (
0 commit comments