|
24 | 24 | from logging import INFO |
25 | 25 | from typing import Callable, Dict, List, Literal, Optional, Sequence, Tuple, TypeVar, Union |
26 | 26 |
|
27 | | -from pydantic import ValidationError |
28 | | - |
29 | 27 | from humanloop import EvaluatorResponse, FlowResponse, PromptResponse, ToolResponse |
30 | 28 | from humanloop.core.api_error import ApiError |
31 | 29 | from humanloop.eval_utils.context import EvaluationContext |
|
59 | 57 | from humanloop.types.datapoint_response_target_value import DatapointResponseTargetValue |
60 | 58 | from humanloop.types.evaluation_run_response import EvaluationRunResponse |
61 | 59 | from humanloop.types.run_stats_response import RunStatsResponse |
| 60 | +from humanloop.types.validation_error import ValidationError |
62 | 61 |
|
63 | 62 | if typing.TYPE_CHECKING: |
64 | 63 | from humanloop.client import BaseHumanloop |
@@ -305,7 +304,6 @@ def run_eval( |
305 | 304 | file_dict = {**file_, **version} |
306 | 305 | hl_file: Union[PromptResponse, FlowResponse, ToolResponse, EvaluatorResponse] |
307 | 306 |
|
308 | | - # NOTE: This could be cleaner, use polymorphism to avoid the if-else |
309 | 307 | if type_ == "flow": |
310 | 308 | # Be more lenient with Flow versions as they are arbitrary json |
311 | 309 | try: |
@@ -716,7 +714,7 @@ def _check_evaluation_improvement( |
716 | 714 | return True, 0, 0 |
717 | 715 |
|
718 | 716 | previous_evaluator_stats_by_path = _get_evaluator_stats_by_path( |
719 | | - stat=stats.run_stats[1], |
| 717 | + stat=stats.run_stats[1], # Latest Run is at index 0; previous Run is at index 1 |
720 | 718 | evaluation=evaluation, |
721 | 719 | ) |
722 | 720 | if evaluator_path in latest_evaluator_stats_by_path and evaluator_path in previous_evaluator_stats_by_path: |
|
0 commit comments