From ada5711477c0cd8e88b0a0106bb4f3580b5b0d1c Mon Sep 17 00:00:00 2001
From: Luca Forstner <luca.forstner@gmail.com>
Date: Thu, 25 Jun 2026 14:35:56 +0200
Subject: [PATCH 1/3] feat(evals): Forward/pick up `bt eval <...> --sample N`
 flag

---
 py/src/braintrust/logger.py      |   7 +++
 py/src/braintrust/test_logger.py | 101 +++++++++++++++++++++++++++++++
 2 files changed, 108 insertions(+)

diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py
index c6f46802..7dd2210b 100644
--- a/py/src/braintrust/logger.py
+++ b/py/src/braintrust/logger.py
@@ -1,5 +1,6 @@
 import atexit
 import base64
+import builtins
 import concurrent.futures
 import contextlib
 import contextvars
@@ -1807,6 +1808,12 @@ def init_dataset(
     """
 
     state = state or _state
+    sample_rate = getattr(builtins, "__bt_eval_sample_rate", None)
+    if isinstance(sample_rate, (int, float)) and not isinstance(sample_rate, bool):
+        if _internal_btql is None:
+            _internal_btql = {"sample": sample_rate}
+        elif "sample" not in _internal_btql:
+            _internal_btql = {**_internal_btql, "sample": sample_rate}
 
     def compute_metadata():
         state.login(org_name=org_name, api_key=api_key, app_url=app_url)
diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py
index 1f3149e5..9a170089 100644
--- a/py/src/braintrust/test_logger.py
+++ b/py/src/braintrust/test_logger.py
@@ -1,6 +1,7 @@
 # pyright: reportUnknownVariableType=false
 # pyright: reportPrivateUsage=false
 import asyncio
+import builtins
 import json
 import logging
 import os
@@ -3455,6 +3456,106 @@ def test_extract_attachments_with_json_attachment(self):
 class TestDatasetInternalBtql(TestCase):
     """Test that _internal_btql parameters (especially limit) are properly passed through to BTQL queries."""
 
+    def test_init_dataset_applies_bt_eval_sample_runtime_value(self):
+        """Test that bt eval --sample is injected into dataset BTQL."""
+        from braintrust.logger import init_dataset
+
+        monkeypatch = pytest.MonkeyPatch()
+        monkeypatch.setattr(builtins, "__bt_eval_sample_rate", 5, raising=False)
+        try:
+            dataset = init_dataset(project="test-project", name="test-dataset", use_output=False, state=MagicMock())
+
+            self.assertEqual(dataset._internal_btql, {"sample": 5})
+        finally:
+            monkeypatch.undo()
+
+    def test_init_dataset_merges_bt_eval_sample_with_internal_btql(self):
+        """Test that bt eval --sample is added to existing BTQL filters."""
+        from braintrust.logger import init_dataset
+
+        monkeypatch = pytest.MonkeyPatch()
+        monkeypatch.setattr(builtins, "__bt_eval_sample_rate", 5, raising=False)
+        try:
+            internal_btql = {"where": {"op": "eq", "left": "metadata.kind", "right": "synthetic"}}
+            dataset = init_dataset(
+                project="test-project",
+                name="test-dataset",
+                use_output=False,
+                _internal_btql=internal_btql,
+                state=MagicMock(),
+            )
+
+            self.assertEqual(
+                dataset._internal_btql,
+                {"where": {"op": "eq", "left": "metadata.kind", "right": "synthetic"}, "sample": 5},
+            )
+            self.assertEqual(internal_btql, {"where": {"op": "eq", "left": "metadata.kind", "right": "synthetic"}})
+        finally:
+            monkeypatch.undo()
+
+    def test_init_dataset_preserves_explicit_internal_btql_sample(self):
+        """Test that an explicit BTQL sample overrides bt eval --sample."""
+        from braintrust.logger import init_dataset
+
+        monkeypatch = pytest.MonkeyPatch()
+        monkeypatch.setattr(builtins, "__bt_eval_sample_rate", 5, raising=False)
+        try:
+            dataset = init_dataset(
+                project="test-project",
+                name="test-dataset",
+                use_output=False,
+                _internal_btql={"filter": "metadata.kind = 'synthetic'", "sample": 2},
+                state=MagicMock(),
+            )
+
+            self.assertEqual(dataset._internal_btql, {"filter": "metadata.kind = 'synthetic'", "sample": 2})
+        finally:
+            monkeypatch.undo()
+
+    def test_init_dataset_keeps_btql_unchanged_without_eval_sample_runtime_value(self):
+        """Test that ordinary init_dataset calls are unchanged outside bt eval --sample."""
+        from braintrust.logger import init_dataset
+
+        monkeypatch = pytest.MonkeyPatch()
+        monkeypatch.delattr(builtins, "__bt_eval_sample_rate", raising=False)
+        try:
+            dataset = init_dataset(project="test-project", name="test-dataset", use_output=False, state=MagicMock())
+
+            self.assertIsNone(dataset._internal_btql)
+        finally:
+            monkeypatch.undo()
+
+    def test_init_dataset_forwards_bt_eval_sample_runtime_value_to_fetch(self):
+        """Test that bt eval --sample is included in fetched dataset BTQL."""
+        from braintrust.logger import init_dataset
+
+        monkeypatch = pytest.MonkeyPatch()
+        monkeypatch.setattr(builtins, "__bt_eval_sample_rate", 5, raising=False)
+        try:
+            mock_state = MagicMock()
+            mock_state.org_id = "test-org"
+
+            mock_app_conn = MagicMock()
+            mock_app_conn.post_json.return_value = {
+                "project": {"id": "test-project-id", "name": "test-project"},
+                "dataset": {"id": "test-dataset-id", "name": "test-dataset"},
+            }
+            mock_state.app_conn.return_value = mock_app_conn
+
+            mock_api_conn = MagicMock()
+            mock_response = MagicMock()
+            mock_response.json.return_value = {"data": [], "cursor": None}
+            mock_api_conn.post.return_value = mock_response
+            mock_state.api_conn.return_value = mock_api_conn
+
+            dataset = init_dataset(project="test-project", name="test-dataset", use_output=False, state=mock_state)
+            list(dataset.fetch())
+
+            query_json = mock_api_conn.post.call_args[1]["json"]["query"]
+            self.assertEqual(query_json["sample"], 5)
+        finally:
+            monkeypatch.undo()
+
     @patch("braintrust.logger.BraintrustState")
     def test_dataset_internal_btql_limit_not_overwritten(self, mock_state_class):
         """Test that custom limit in _internal_btql is not overwritten by DEFAULT_FETCH_BATCH_SIZE."""

From 72d9a6ad1644a75febcf957705ad76ed2acecfb0 Mon Sep 17 00:00:00 2001
From: Luca Forstner <luca.forstner@gmail.com>
Date: Tue, 30 Jun 2026 13:48:27 +0200
Subject: [PATCH 2/3] use generic btql

---
 py/src/braintrust/logger.py      |  8 ++++----
 py/src/braintrust/test_logger.py | 34 ++++++++++++++++++--------------
 2 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py
index 7dd2210b..80a551b8 100644
--- a/py/src/braintrust/logger.py
+++ b/py/src/braintrust/logger.py
@@ -1808,12 +1808,12 @@ def init_dataset(
     """
 
     state = state or _state
-    sample_rate = getattr(builtins, "__bt_eval_sample_rate", None)
-    if isinstance(sample_rate, (int, float)) and not isinstance(sample_rate, bool):
+    cli_internal_btql = getattr(builtins, "__bt_eval_internal_btql", None)
+    if isinstance(cli_internal_btql, Mapping):
         if _internal_btql is None:
-            _internal_btql = {"sample": sample_rate}
+            _internal_btql = dict(cli_internal_btql)
         elif "sample" not in _internal_btql:
-            _internal_btql = {**_internal_btql, "sample": sample_rate}
+            _internal_btql = {**_internal_btql, **cli_internal_btql}
 
     def compute_metadata():
         state.login(org_name=org_name, api_key=api_key, app_url=app_url)
diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py
index 9a170089..c14ca83d 100644
--- a/py/src/braintrust/test_logger.py
+++ b/py/src/braintrust/test_logger.py
@@ -3456,12 +3456,12 @@ def test_extract_attachments_with_json_attachment(self):
 class TestDatasetInternalBtql(TestCase):
     """Test that _internal_btql parameters (especially limit) are properly passed through to BTQL queries."""
 
-    def test_init_dataset_applies_bt_eval_sample_runtime_value(self):
-        """Test that bt eval --sample is injected into dataset BTQL."""
+    def test_init_dataset_applies_bt_eval_internal_btql_runtime_value(self):
+        """Test that bt eval runtime BTQL is injected into dataset BTQL."""
         from braintrust.logger import init_dataset
 
         monkeypatch = pytest.MonkeyPatch()
-        monkeypatch.setattr(builtins, "__bt_eval_sample_rate", 5, raising=False)
+        monkeypatch.setattr(builtins, "__bt_eval_internal_btql", {"sample": 5}, raising=False)
         try:
             dataset = init_dataset(project="test-project", name="test-dataset", use_output=False, state=MagicMock())
 
@@ -3469,12 +3469,12 @@ def test_init_dataset_applies_bt_eval_sample_runtime_value(self):
         finally:
             monkeypatch.undo()
 
-    def test_init_dataset_merges_bt_eval_sample_with_internal_btql(self):
-        """Test that bt eval --sample is added to existing BTQL filters."""
+    def test_init_dataset_merges_bt_eval_internal_btql_with_internal_btql(self):
+        """Test that bt eval runtime BTQL is added to existing BTQL filters."""
         from braintrust.logger import init_dataset
 
         monkeypatch = pytest.MonkeyPatch()
-        monkeypatch.setattr(builtins, "__bt_eval_sample_rate", 5, raising=False)
+        monkeypatch.setattr(builtins, "__bt_eval_internal_btql", {"sample": 5, "limit": 10}, raising=False)
         try:
             internal_btql = {"where": {"op": "eq", "left": "metadata.kind", "right": "synthetic"}}
             dataset = init_dataset(
@@ -3487,18 +3487,22 @@ def test_init_dataset_merges_bt_eval_sample_with_internal_btql(self):
 
             self.assertEqual(
                 dataset._internal_btql,
-                {"where": {"op": "eq", "left": "metadata.kind", "right": "synthetic"}, "sample": 5},
+                {
+                    "where": {"op": "eq", "left": "metadata.kind", "right": "synthetic"},
+                    "sample": 5,
+                    "limit": 10,
+                },
             )
             self.assertEqual(internal_btql, {"where": {"op": "eq", "left": "metadata.kind", "right": "synthetic"}})
         finally:
             monkeypatch.undo()
 
     def test_init_dataset_preserves_explicit_internal_btql_sample(self):
-        """Test that an explicit BTQL sample overrides bt eval --sample."""
+        """Test that an explicit BTQL sample overrides bt eval runtime BTQL."""
         from braintrust.logger import init_dataset
 
         monkeypatch = pytest.MonkeyPatch()
-        monkeypatch.setattr(builtins, "__bt_eval_sample_rate", 5, raising=False)
+        monkeypatch.setattr(builtins, "__bt_eval_internal_btql", {"sample": 5, "limit": 10}, raising=False)
         try:
             dataset = init_dataset(
                 project="test-project",
@@ -3512,12 +3516,12 @@ def test_init_dataset_preserves_explicit_internal_btql_sample(self):
         finally:
             monkeypatch.undo()
 
-    def test_init_dataset_keeps_btql_unchanged_without_eval_sample_runtime_value(self):
-        """Test that ordinary init_dataset calls are unchanged outside bt eval --sample."""
+    def test_init_dataset_keeps_btql_unchanged_without_eval_internal_btql_runtime_value(self):
+        """Test that ordinary init_dataset calls are unchanged without runtime BTQL."""
         from braintrust.logger import init_dataset
 
         monkeypatch = pytest.MonkeyPatch()
-        monkeypatch.delattr(builtins, "__bt_eval_sample_rate", raising=False)
+        monkeypatch.delattr(builtins, "__bt_eval_internal_btql", raising=False)
         try:
             dataset = init_dataset(project="test-project", name="test-dataset", use_output=False, state=MagicMock())
 
@@ -3525,12 +3529,12 @@ def test_init_dataset_keeps_btql_unchanged_without_eval_sample_runtime_value(sel
         finally:
             monkeypatch.undo()
 
-    def test_init_dataset_forwards_bt_eval_sample_runtime_value_to_fetch(self):
-        """Test that bt eval --sample is included in fetched dataset BTQL."""
+    def test_init_dataset_forwards_bt_eval_internal_btql_runtime_value_to_fetch(self):
+        """Test that bt eval runtime BTQL is included in fetched dataset BTQL."""
         from braintrust.logger import init_dataset
 
         monkeypatch = pytest.MonkeyPatch()
-        monkeypatch.setattr(builtins, "__bt_eval_sample_rate", 5, raising=False)
+        monkeypatch.setattr(builtins, "__bt_eval_internal_btql", {"sample": 5}, raising=False)
         try:
             mock_state = MagicMock()
             mock_state.org_id = "test-org"

From f54039e1e64a7d4308d62d8ff93c3cfde4c59b9d Mon Sep 17 00:00:00 2001
From: Luca Forstner <luca.forstner@gmail.com>
Date: Tue, 30 Jun 2026 14:04:03 +0200
Subject: [PATCH 3/3] simplify

---
 py/src/braintrust/logger.py      | 4 ++--
 py/src/braintrust/test_logger.py | 9 ++++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py
index 80a551b8..10e8bc00 100644
--- a/py/src/braintrust/logger.py
+++ b/py/src/braintrust/logger.py
@@ -1812,8 +1812,8 @@ def init_dataset(
     if isinstance(cli_internal_btql, Mapping):
         if _internal_btql is None:
             _internal_btql = dict(cli_internal_btql)
-        elif "sample" not in _internal_btql:
-            _internal_btql = {**_internal_btql, **cli_internal_btql}
+        else:
+            _internal_btql = {**cli_internal_btql, **_internal_btql}
 
     def compute_metadata():
         state.login(org_name=org_name, api_key=api_key, app_url=app_url)
diff --git a/py/src/braintrust/test_logger.py b/py/src/braintrust/test_logger.py
index c14ca83d..9c7cfe1b 100644
--- a/py/src/braintrust/test_logger.py
+++ b/py/src/braintrust/test_logger.py
@@ -3497,8 +3497,8 @@ def test_init_dataset_merges_bt_eval_internal_btql_with_internal_btql(self):
         finally:
             monkeypatch.undo()
 
-    def test_init_dataset_preserves_explicit_internal_btql_sample(self):
-        """Test that an explicit BTQL sample overrides bt eval runtime BTQL."""
+    def test_init_dataset_merges_bt_eval_internal_btql_without_overriding_explicit_keys(self):
+        """Test that explicit BTQL keys override bt eval runtime BTQL."""
         from braintrust.logger import init_dataset
 
         monkeypatch = pytest.MonkeyPatch()
@@ -3512,7 +3512,10 @@ def test_init_dataset_preserves_explicit_internal_btql_sample(self):
                 state=MagicMock(),
             )
 
-            self.assertEqual(dataset._internal_btql, {"filter": "metadata.kind = 'synthetic'", "sample": 2})
+            self.assertEqual(
+                dataset._internal_btql,
+                {"filter": "metadata.kind = 'synthetic'", "sample": 2, "limit": 10},
+            )
         finally:
             monkeypatch.undo()