From ada5711477c0cd8e88b0a0106bb4f3580b5b0d1c Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Thu, 25 Jun 2026 14:35:56 +0200 Subject: [PATCH 1/3] feat(evals): Forward/pick up `bt eval <...> --sample N` flag --- py/src/braintrust/logger.py | 7 +++ py/src/braintrust/test_logger.py | 101 +++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py index c6f46802..7dd2210b 100644 --- a/py/src/braintrust/logger.py +++ b/py/src/braintrust/logger.py @@ -1,5 +1,6 @@ import atexit import base64 +import builtins import concurrent.futures import contextlib import contextvars @@ -1807,6 +1808,12 @@ def init_dataset( """ state = state or _state + sample_rate = getattr(builtins, "__bt_eval_sample_rate", None) + if isinstance(sample_rate, (int, float)) and not isinstance(sample_rate, bool): + if _internal_btql is None: + _internal_btql = {"sample": sample_rate} + elif "sample" not in _internal_btql: + _internal_btql = {**_internal_btql, "sample": sample_rate} def compute_metadata(): state.login(org_name=org_name, api_key=api_key, app_url=app_url) diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py index 1f3149e5..9a170089 100644 --- a/py/src/braintrust/test_logger.py +++ b/py/src/braintrust/test_logger.py @@ -1,6 +1,7 @@ # pyright: reportUnknownVariableType=false # pyright: reportPrivateUsage=false import asyncio +import builtins import json import logging import os @@ -3455,6 +3456,106 @@ def test_extract_attachments_with_json_attachment(self): class TestDatasetInternalBtql(TestCase): """Test that _internal_btql parameters (especially limit) are properly passed through to BTQL queries.""" + def test_init_dataset_applies_bt_eval_sample_runtime_value(self): + """Test that bt eval --sample is injected into dataset BTQL.""" + from braintrust.logger import init_dataset + + monkeypatch = pytest.MonkeyPatch() + monkeypatch.setattr(builtins, "__bt_eval_sample_rate", 5, raising=False) + try: + dataset = init_dataset(project="test-project", name="test-dataset", use_output=False, state=MagicMock()) + + self.assertEqual(dataset._internal_btql, {"sample": 5}) + finally: + monkeypatch.undo() + + def test_init_dataset_merges_bt_eval_sample_with_internal_btql(self): + """Test that bt eval --sample is added to existing BTQL filters.""" + from braintrust.logger import init_dataset + + monkeypatch = pytest.MonkeyPatch() + monkeypatch.setattr(builtins, "__bt_eval_sample_rate", 5, raising=False) + try: + internal_btql = {"where": {"op": "eq", "left": "metadata.kind", "right": "synthetic"}} + dataset = init_dataset( + project="test-project", + name="test-dataset", + use_output=False, + _internal_btql=internal_btql, + state=MagicMock(), + ) + + self.assertEqual( + dataset._internal_btql, + {"where": {"op": "eq", "left": "metadata.kind", "right": "synthetic"}, "sample": 5}, + ) + self.assertEqual(internal_btql, {"where": {"op": "eq", "left": "metadata.kind", "right": "synthetic"}}) + finally: + monkeypatch.undo() + + def test_init_dataset_preserves_explicit_internal_btql_sample(self): + """Test that an explicit BTQL sample overrides bt eval --sample.""" + from braintrust.logger import init_dataset + + monkeypatch = pytest.MonkeyPatch() + monkeypatch.setattr(builtins, "__bt_eval_sample_rate", 5, raising=False) + try: + dataset = init_dataset( + project="test-project", + name="test-dataset", + use_output=False, + _internal_btql={"filter": "metadata.kind = 'synthetic'", "sample": 2}, + state=MagicMock(), + ) + + self.assertEqual(dataset._internal_btql, {"filter": "metadata.kind = 'synthetic'", "sample": 2}) + finally: + monkeypatch.undo() + + def test_init_dataset_keeps_btql_unchanged_without_eval_sample_runtime_value(self): + """Test that ordinary init_dataset calls are unchanged outside bt eval --sample.""" + from braintrust.logger import init_dataset + + monkeypatch = pytest.MonkeyPatch() + monkeypatch.delattr(builtins, "__bt_eval_sample_rate", raising=False) + try: + dataset = init_dataset(project="test-project", name="test-dataset", use_output=False, state=MagicMock()) + + self.assertIsNone(dataset._internal_btql) + finally: + monkeypatch.undo() + + def test_init_dataset_forwards_bt_eval_sample_runtime_value_to_fetch(self): + """Test that bt eval --sample is included in fetched dataset BTQL.""" + from braintrust.logger import init_dataset + + monkeypatch = pytest.MonkeyPatch() + monkeypatch.setattr(builtins, "__bt_eval_sample_rate", 5, raising=False) + try: + mock_state = MagicMock() + mock_state.org_id = "test-org" + + mock_app_conn = MagicMock() + mock_app_conn.post_json.return_value = { + "project": {"id": "test-project-id", "name": "test-project"}, + "dataset": {"id": "test-dataset-id", "name": "test-dataset"}, + } + mock_state.app_conn.return_value = mock_app_conn + + mock_api_conn = MagicMock() + mock_response = MagicMock() + mock_response.json.return_value = {"data": [], "cursor": None} + mock_api_conn.post.return_value = mock_response + mock_state.api_conn.return_value = mock_api_conn + + dataset = init_dataset(project="test-project", name="test-dataset", use_output=False, state=mock_state) + list(dataset.fetch()) + + query_json = mock_api_conn.post.call_args[1]["json"]["query"] + self.assertEqual(query_json["sample"], 5) + finally: + monkeypatch.undo() + @patch("braintrust.logger.BraintrustState") def test_dataset_internal_btql_limit_not_overwritten(self, mock_state_class): """Test that custom limit in _internal_btql is not overwritten by DEFAULT_FETCH_BATCH_SIZE.""" From 72d9a6ad1644a75febcf957705ad76ed2acecfb0 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 30 Jun 2026 13:48:27 +0200 Subject: [PATCH 2/3] use generic btql --- py/src/braintrust/logger.py | 8 ++++---- py/src/braintrust/test_logger.py | 34 ++++++++++++++++++-------------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py index 7dd2210b..80a551b8 100644 --- a/py/src/braintrust/logger.py +++ b/py/src/braintrust/logger.py @@ -1808,12 +1808,12 @@ def init_dataset( """ state = state or _state - sample_rate = getattr(builtins, "__bt_eval_sample_rate", None) - if isinstance(sample_rate, (int, float)) and not isinstance(sample_rate, bool): + cli_internal_btql = getattr(builtins, "__bt_eval_internal_btql", None) + if isinstance(cli_internal_btql, Mapping): if _internal_btql is None: - _internal_btql = {"sample": sample_rate} + _internal_btql = dict(cli_internal_btql) elif "sample" not in _internal_btql: - _internal_btql = {**_internal_btql, "sample": sample_rate} + _internal_btql = {**_internal_btql, **cli_internal_btql} def compute_metadata(): state.login(org_name=org_name, api_key=api_key, app_url=app_url) diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py index 9a170089..c14ca83d 100644 --- a/py/src/braintrust/test_logger.py +++ b/py/src/braintrust/test_logger.py @@ -3456,12 +3456,12 @@ def test_extract_attachments_with_json_attachment(self): class TestDatasetInternalBtql(TestCase): """Test that _internal_btql parameters (especially limit) are properly passed through to BTQL queries.""" - def test_init_dataset_applies_bt_eval_sample_runtime_value(self): - """Test that bt eval --sample is injected into dataset BTQL.""" + def test_init_dataset_applies_bt_eval_internal_btql_runtime_value(self): + """Test that bt eval runtime BTQL is injected into dataset BTQL.""" from braintrust.logger import init_dataset monkeypatch = pytest.MonkeyPatch() - monkeypatch.setattr(builtins, "__bt_eval_sample_rate", 5, raising=False) + monkeypatch.setattr(builtins, "__bt_eval_internal_btql", {"sample": 5}, raising=False) try: dataset = init_dataset(project="test-project", name="test-dataset", use_output=False, state=MagicMock()) @@ -3469,12 +3469,12 @@ def test_init_dataset_applies_bt_eval_sample_runtime_value(self): finally: monkeypatch.undo() - def test_init_dataset_merges_bt_eval_sample_with_internal_btql(self): - """Test that bt eval --sample is added to existing BTQL filters.""" + def test_init_dataset_merges_bt_eval_internal_btql_with_internal_btql(self): + """Test that bt eval runtime BTQL is added to existing BTQL filters.""" from braintrust.logger import init_dataset monkeypatch = pytest.MonkeyPatch() - monkeypatch.setattr(builtins, "__bt_eval_sample_rate", 5, raising=False) + monkeypatch.setattr(builtins, "__bt_eval_internal_btql", {"sample": 5, "limit": 10}, raising=False) try: internal_btql = {"where": {"op": "eq", "left": "metadata.kind", "right": "synthetic"}} dataset = init_dataset( @@ -3487,18 +3487,22 @@ def test_init_dataset_merges_bt_eval_sample_with_internal_btql(self): self.assertEqual( dataset._internal_btql, - {"where": {"op": "eq", "left": "metadata.kind", "right": "synthetic"}, "sample": 5}, + { + "where": {"op": "eq", "left": "metadata.kind", "right": "synthetic"}, + "sample": 5, + "limit": 10, + }, ) self.assertEqual(internal_btql, {"where": {"op": "eq", "left": "metadata.kind", "right": "synthetic"}}) finally: monkeypatch.undo() def test_init_dataset_preserves_explicit_internal_btql_sample(self): - """Test that an explicit BTQL sample overrides bt eval --sample.""" + """Test that an explicit BTQL sample overrides bt eval runtime BTQL.""" from braintrust.logger import init_dataset monkeypatch = pytest.MonkeyPatch() - monkeypatch.setattr(builtins, "__bt_eval_sample_rate", 5, raising=False) + monkeypatch.setattr(builtins, "__bt_eval_internal_btql", {"sample": 5, "limit": 10}, raising=False) try: dataset = init_dataset( project="test-project", @@ -3512,12 +3516,12 @@ def test_init_dataset_preserves_explicit_internal_btql_sample(self): finally: monkeypatch.undo() - def test_init_dataset_keeps_btql_unchanged_without_eval_sample_runtime_value(self): - """Test that ordinary init_dataset calls are unchanged outside bt eval --sample.""" + def test_init_dataset_keeps_btql_unchanged_without_eval_internal_btql_runtime_value(self): + """Test that ordinary init_dataset calls are unchanged without runtime BTQL.""" from braintrust.logger import init_dataset monkeypatch = pytest.MonkeyPatch() - monkeypatch.delattr(builtins, "__bt_eval_sample_rate", raising=False) + monkeypatch.delattr(builtins, "__bt_eval_internal_btql", raising=False) try: dataset = init_dataset(project="test-project", name="test-dataset", use_output=False, state=MagicMock()) @@ -3525,12 +3529,12 @@ def test_init_dataset_keeps_btql_unchanged_without_eval_sample_runtime_value(sel finally: monkeypatch.undo() - def test_init_dataset_forwards_bt_eval_sample_runtime_value_to_fetch(self): - """Test that bt eval --sample is included in fetched dataset BTQL.""" + def test_init_dataset_forwards_bt_eval_internal_btql_runtime_value_to_fetch(self): + """Test that bt eval runtime BTQL is included in fetched dataset BTQL.""" from braintrust.logger import init_dataset monkeypatch = pytest.MonkeyPatch() - monkeypatch.setattr(builtins, "__bt_eval_sample_rate", 5, raising=False) + monkeypatch.setattr(builtins, "__bt_eval_internal_btql", {"sample": 5}, raising=False) try: mock_state = MagicMock() mock_state.org_id = "test-org" From f54039e1e64a7d4308d62d8ff93c3cfde4c59b9d Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 30 Jun 2026 14:04:03 +0200 Subject: [PATCH 3/3] simplify --- py/src/braintrust/logger.py | 4 ++-- py/src/braintrust/test_logger.py | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py index 80a551b8..10e8bc00 100644 --- a/py/src/braintrust/logger.py +++ b/py/src/braintrust/logger.py @@ -1812,8 +1812,8 @@ def init_dataset( if isinstance(cli_internal_btql, Mapping): if _internal_btql is None: _internal_btql = dict(cli_internal_btql) - elif "sample" not in _internal_btql: - _internal_btql = {**_internal_btql, **cli_internal_btql} + else: + _internal_btql = {**cli_internal_btql, **_internal_btql} def compute_metadata(): state.login(org_name=org_name, api_key=api_key, app_url=app_url) diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py index c14ca83d..9c7cfe1b 100644 --- a/py/src/braintrust/test_logger.py +++ b/py/src/braintrust/test_logger.py @@ -3497,8 +3497,8 @@ def test_init_dataset_merges_bt_eval_internal_btql_with_internal_btql(self): finally: monkeypatch.undo() - def test_init_dataset_preserves_explicit_internal_btql_sample(self): - """Test that an explicit BTQL sample overrides bt eval runtime BTQL.""" + def test_init_dataset_merges_bt_eval_internal_btql_without_overriding_explicit_keys(self): + """Test that explicit BTQL keys override bt eval runtime BTQL.""" from braintrust.logger import init_dataset monkeypatch = pytest.MonkeyPatch() @@ -3512,7 +3512,10 @@ def test_init_dataset_preserves_explicit_internal_btql_sample(self): state=MagicMock(), ) - self.assertEqual(dataset._internal_btql, {"filter": "metadata.kind = 'synthetic'", "sample": 2}) + self.assertEqual( + dataset._internal_btql, + {"filter": "metadata.kind = 'synthetic'", "sample": 2, "limit": 10}, + ) finally: monkeypatch.undo()