Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions crates/buzz-agent/src/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,13 +133,21 @@ impl RunCtx<'_> {
// exactly the history that was just sent to `complete()` (the
// assistant response is appended below, after this point). Pairing
// them lets the gate add a conservative estimate for any history
// appended before the next request. Preserve both when a response
// omits usage (`None`) rather than clobbering — a one-off missing
// field shouldn't blind the gate or zero the growth baseline.
// appended before the next request. Uses `context_pressure_bytes`
// (the same measure the gate's `current_bytes` uses) so the
// `grown` delta is coherent — an image contributes its visual-
// token equivalent here, not its base64 length. Preserve both when
// a response omits usage (`None`) rather than clobbering — a
// one-off missing field shouldn't blind the gate or zero the
// growth baseline.
if let Some(tokens) = response.input_tokens {
*self.last_request_input_tokens = Some(tokens);
*self.last_request_history_bytes =
Some(self.history.iter().map(HistoryItem::estimated_bytes).sum());
*self.last_request_history_bytes = Some(
self.history
.iter()
.map(HistoryItem::context_pressure_bytes)
.sum(),
);
}

if !response.text.is_empty() {
Expand Down
13 changes: 10 additions & 3 deletions crates/buzz-agent/src/handoff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,11 @@ impl RunCtx<'_> {
// token estimate of the bytes added since the measurement.
Some(measured_tokens) => {
let measured_bytes = self.last_request_history_bytes.unwrap_or(0);
let current_bytes: usize =
self.history.iter().map(HistoryItem::estimated_bytes).sum();
let current_bytes: usize = self
.history
.iter()
.map(HistoryItem::context_pressure_bytes)
.sum();
let grown = current_bytes.saturating_sub(measured_bytes);
let projected = measured_tokens.saturating_add(estimate_tokens_from_bytes(grown));
projected
Expand All @@ -119,7 +122,11 @@ impl RunCtx<'_> {
// since a handoff re-adds the current prompt verbatim — that is a
// prompt-cap concern (MAX_PROMPT_BYTES), not this gate.
None => {
let bytes: usize = self.history.iter().map(HistoryItem::estimated_bytes).sum();
let bytes: usize = self
.history
.iter()
.map(HistoryItem::context_pressure_bytes)
.sum();
bytes
> byte_fallback_threshold(
self.cfg.max_context_tokens,
Expand Down
132 changes: 124 additions & 8 deletions crates/buzz-agent/src/types.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,51 @@
use serde::Deserialize;
use serde_json::Value;

/// Byte-equivalent charged to the handoff/context-pressure gate for a single
/// image tool result. The gate maps bytes to tokens at 1 byte/token (see
/// `handoff::CONSERVATIVE_BYTES_PER_TOKEN`), so this is also the per-image
/// token budget. Providers bill an image as visual *tiles*, not its base64
/// length: Anthropic caps at ~1600 tokens/image and OpenAI high-detail lands
/// ~1.1K–1.5K. We charge 16 KiB — a generous ceiling that still over-counts
/// the real ~2K cost, while being ~190× smaller than the base64 length of a
/// typical multi-MiB screenshot. Charging `data.len()` to the gate instead
/// made a single `view_image` (~3.1M base64 bytes) trip the handoff gate on a
/// fresh context.
const IMAGE_CONTEXT_TOKEN_EQUIV: usize = 16 * 1024;

#[derive(Debug, Clone)]
pub enum ToolResultContent {
Text(String),
Image { data: String, mime_type: String },
}

impl ToolResultContent {
/// Real serialized size in bytes. Used by `truncate_history` to keep the
/// outgoing request body under `max_history_bytes` — an image rides the
/// wire as its full base64 string, so that string's length is what counts
/// here. For context-window/handoff pressure use
/// [`Self::context_pressure_bytes`] instead, which charges an image its
/// (far smaller) visual-token equivalent.
pub fn estimated_bytes(&self) -> usize {
match self {
Self::Text(s) => s.len(),
// This is request-size pressure accounting, not a visual-token
// estimate. Count the base64 bytes we will actually serialize so
// image-heavy sessions cannot silently exceed provider/body caps.
Self::Image { data, mime_type } => data.len() + mime_type.len(),
}
}

/// Token-equivalent context-window pressure, in bytes (the handoff gate
/// maps bytes→tokens at 1:1). Identical to [`Self::estimated_bytes`] for
/// text, but an image is charged a flat [`IMAGE_CONTEXT_TOKEN_EQUIV`]
/// budget rather than its base64 length — providers bill it as visual
/// tiles (~2K tokens), so counting `data.len()` over-counts by ~1500× and
/// forces a handoff on a single image.
pub fn context_pressure_bytes(&self) -> usize {
match self {
Self::Text(s) => s.len(),
Self::Image { data: _, mime_type } => IMAGE_CONTEXT_TOKEN_EQUIV + mime_type.len(),
}
}

pub fn as_text_lossy(&self) -> String {
match self {
Self::Text(s) => s.clone(),
Expand All @@ -40,6 +68,19 @@ pub enum HistoryItem {

impl HistoryItem {
pub fn estimated_bytes(&self) -> usize {
self.size_with(ToolResultContent::estimated_bytes)
}

/// Token-equivalent context-window pressure, in bytes. Mirrors
/// [`Self::estimated_bytes`] but charges image tool results their visual-
/// token equivalent rather than their base64 length — see
/// [`ToolResultContent::context_pressure_bytes`]. The handoff gate uses
/// this; `truncate_history` (request-body sizing) uses `estimated_bytes`.
pub fn context_pressure_bytes(&self) -> usize {
self.size_with(ToolResultContent::context_pressure_bytes)
}

fn size_with(&self, content_size: fn(&ToolResultContent) -> usize) -> usize {
match self {
Self::User(s) => s.len(),
Self::Assistant { text, tool_calls } => {
Expand All @@ -56,11 +97,7 @@ impl HistoryItem {
.sum::<usize>()
}
Self::ToolResult(r) => {
r.provider_id.len()
+ r.content
.iter()
.map(ToolResultContent::estimated_bytes)
.sum::<usize>()
r.provider_id.len() + r.content.iter().map(content_size).sum::<usize>()
}
}
}
Expand Down Expand Up @@ -219,3 +256,82 @@ pub fn clamp(mut s: String, max: usize) -> String {
}
s
}

#[cfg(test)]
mod tests {
use super::*;

fn image_item(base64_len: usize) -> HistoryItem {
HistoryItem::ToolResult(ToolResult {
provider_id: "call_1".into(),
content: vec![ToolResultContent::Image {
data: "A".repeat(base64_len),
mime_type: "image/png".into(),
}],
is_error: false,
})
}

#[test]
fn image_estimated_bytes_is_real_wire_size() {
// `truncate_history` relies on this to keep the request body under
// `max_history_bytes`, so an image must report its full base64 length.
let img = ToolResultContent::Image {
data: "A".repeat(3_000_000),
mime_type: "image/png".into(),
};
assert_eq!(img.estimated_bytes(), 3_000_000 + "image/png".len());
}

#[test]
fn image_context_pressure_is_token_equivalent_not_base64_len() {
// The handoff gate must charge an image its visual-token equivalent,
// not its base64 length — otherwise one screenshot trips the gate.
let img = ToolResultContent::Image {
data: "A".repeat(3_000_000),
mime_type: "image/png".into(),
};
assert_eq!(
img.context_pressure_bytes(),
IMAGE_CONTEXT_TOKEN_EQUIV + "image/png".len()
);
// And it must be independent of the (huge) base64 payload length.
let bigger = ToolResultContent::Image {
data: "A".repeat(10_000_000),
mime_type: "image/png".into(),
};
assert_eq!(
img.context_pressure_bytes(),
bigger.context_pressure_bytes()
);
}

#[test]
fn single_image_does_not_trip_default_handoff_threshold() {
// Regression: a single ~3.1M-base64-byte `view_image` result on an
// otherwise-empty history must NOT exceed the default pre-usage
// handoff cap. The gate's byte-fallback threshold with the shipped
// defaults (max_context_tokens=200_000, max_output_tokens=32_768) is
// min(200_000*9/10, 200_000-32_768) = 167_232 "bytes". Before the fix
// this item counted ~3.1M and tripped instantly.
let item = image_item(3_118_884);
const DEFAULT_PRE_USAGE_THRESHOLD: usize = 167_232;
assert!(
item.context_pressure_bytes() <= DEFAULT_PRE_USAGE_THRESHOLD,
"one image charged {} bytes of context pressure, over the {} threshold",
item.context_pressure_bytes(),
DEFAULT_PRE_USAGE_THRESHOLD
);
// The real wire size, by contrast, is still the full base64 payload.
assert!(item.estimated_bytes() >= 3_118_884);
}

#[test]
fn text_content_size_is_identical_for_both_measures() {
// Only images diverge; text must size the same under both paths.
let text = ToolResultContent::Text("hello world".into());
assert_eq!(text.estimated_bytes(), text.context_pressure_bytes());
let item = HistoryItem::User("a user message".into());
assert_eq!(item.estimated_bytes(), item.context_pressure_bytes());
}
}
Loading