From c17ed5ce7e8e12c32bffc48e22b2e3a393edd026 Mon Sep 17 00:00:00 2001 From: Will Pfleger Date: Thu, 11 Jun 2026 14:50:29 -0400 Subject: [PATCH 01/13] feat(acp): add systemPrompt field to session/new Implement the systemPrompt field proposed in RFD PR #1237 across both buzz-agent and buzz-acp. The agent receives an optional systemPrompt in session/new params and appends it to its effective system prompt (after env-default + hints). The harness passes its combined [Base] + [System] content via this field instead of embedding it in every user message. Agent side (buzz-agent): - SessionNewParams gains optional systemPrompt field (camelCase serde) - session_new appends client prompt to effective_system_prompt with \n\n separator; rejects combined prompts exceeding 512KB - initialize response advertises systemPrompt: true in promptCapabilities Harness side (buzz-acp): - AcpClient.initialize() parses promptCapabilities.systemPrompt from the agent's init response into a supports_system_prompt bool - session_new_full/session_new accept system_prompt: Option<&str>; field is only included in params when agent advertises support - create_session_and_apply_model combines base_prompt + system_prompt and passes it via session/new when supported - format_prompt skips [Base] and [System] sections when system_prompt_via_session is true (content already in system role) - Initial-message path skips prepend_base_prompt when agent supports systemPrompt (base content already passed via session/new) Backward compatible: old harnesses omitting systemPrompt work unchanged; new harness talking to old agent (no capability) falls back to embedding in user messages. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/acp.rs | 86 ++++++++++++++++++++++++++++++++++- crates/buzz-acp/src/lib.rs | 2 +- crates/buzz-acp/src/pool.rs | 35 ++++++++++++-- crates/buzz-acp/src/queue.rs | 81 +++++++++++++++++++++++++++++++-- crates/buzz-agent/src/lib.rs | 41 +++++++++++++---- crates/buzz-agent/src/wire.rs | 46 +++++++++++++++++++ 6 files changed, 271 insertions(+), 20 deletions(-) diff --git a/crates/buzz-acp/src/acp.rs b/crates/buzz-acp/src/acp.rs index 7d8db4d40..6d878eed1 100644 --- a/crates/buzz-acp/src/acp.rs +++ b/crates/buzz-acp/src/acp.rs @@ -148,6 +148,9 @@ pub struct AcpClient { observer_agent_index: Option, /// Best-effort context attached to raw ACP wire events. observer_context: ObserverContext, + /// Whether the agent advertises `systemPrompt` support in its + /// `promptCapabilities`. Set after `initialize()` returns. + pub supports_system_prompt: bool, } impl AcpClient { @@ -239,6 +242,7 @@ impl AcpClient { observer: None, observer_agent_index: None, observer_context: ObserverContext::default(), + supports_system_prompt: false, }) } @@ -280,23 +284,38 @@ impl AcpClient { }); let result = self.send_request("initialize", params).await?; tracing::debug!(target: "acp::init", "initialize response: {result}"); + + // Parse promptCapabilities.systemPrompt from agent's init response. + self.supports_system_prompt = result + .pointer("/agentCapabilities/promptCapabilities/systemPrompt") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + Ok(result) } /// Send `session/new` and return the full response alongside the session ID. /// /// `cwd` must be an absolute path. `mcp_servers` may be empty. + /// `system_prompt` is passed to the agent when `supports_system_prompt` is true. /// Callers use [`extract_model_config_options`] and [`extract_model_state`] /// to pull model info from the raw result. pub async fn session_new_full( &mut self, cwd: &str, mcp_servers: Vec, + system_prompt: Option<&str>, ) -> Result { - let params = serde_json::json!({ + let mut params = serde_json::json!({ "cwd": cwd, "mcpServers": mcp_servers, }); + // Only include systemPrompt when the agent advertises support. + if self.supports_system_prompt { + if let Some(sp) = system_prompt { + params["systemPrompt"] = serde_json::Value::String(sp.to_owned()); + } + } let result = self.send_request("session/new", params).await?; let session_id = result["sessionId"] .as_str() @@ -317,8 +336,12 @@ impl AcpClient { &mut self, cwd: &str, mcp_servers: Vec, + system_prompt: Option<&str>, ) -> Result { - Ok(self.session_new_full(cwd, mcp_servers).await?.session_id) + Ok(self + .session_new_full(cwd, mcp_servers, system_prompt) + .await? + .session_id) } /// Send `session/set_config_option` (stable ACP path). @@ -2021,4 +2044,63 @@ mod tests { "expected IdleTimeout after silence, got {result:?}" ); } + + // ── systemPrompt capability parsing ────────────────────────────────── + + #[tokio::test] + async fn initialize_parses_system_prompt_capability() { + // Agent responds with systemPrompt: true in promptCapabilities. + let script = r#" + read -t 2 _init + echo '{"jsonrpc":"2.0","id":0,"result":{"protocolVersion":1,"agentCapabilities":{"promptCapabilities":{"systemPrompt":true}}}}' + sleep 1 + "#; + let mut client = spawn_script(script).await; + let _result = client + .initialize() + .await + .expect("initialize should succeed"); + assert!( + client.supports_system_prompt, + "should detect systemPrompt capability" + ); + } + + #[tokio::test] + async fn initialize_defaults_system_prompt_false_when_absent() { + // Agent responds without systemPrompt in promptCapabilities. + let script = r#" + read -t 2 _init + echo '{"jsonrpc":"2.0","id":0,"result":{"protocolVersion":1,"agentCapabilities":{"promptCapabilities":{"image":false}}}}' + sleep 1 + "#; + let mut client = spawn_script(script).await; + let _result = client + .initialize() + .await + .expect("initialize should succeed"); + assert!( + !client.supports_system_prompt, + "should default to false when systemPrompt absent" + ); + } + + #[tokio::test] + async fn initialize_defaults_system_prompt_false_when_no_capabilities() { + // Agent responds with no agentCapabilities at all. + let script = r#" + read -t 2 _init + echo '{"jsonrpc":"2.0","id":0,"result":{"protocolVersion":1}}' + sleep 1 + "#; + let mut client = spawn_script(script).await; + let _result = client + .initialize() + .await + .expect("initialize should succeed"); + assert!( + !client.supports_system_prompt, + "should default to false when no agentCapabilities" + ); + } } diff --git a/crates/buzz-acp/src/lib.rs b/crates/buzz-acp/src/lib.rs index a53192654..a7b7b34c5 100644 --- a/crates/buzz-acp/src/lib.rs +++ b/crates/buzz-acp/src/lib.rs @@ -2526,7 +2526,7 @@ async fn run_models(args: ModelsArgs) -> Result<()> { // so shutdown() runs on all paths (success, error, timeout). let protocol_result = tokio::time::timeout(MODELS_TIMEOUT, async { let init = client.initialize().await?; - let session = client.session_new_full(&cwd, vec![]).await?; + let session = client.session_new_full(&cwd, vec![], None).await?; Ok::<_, acp::AcpError>((init, session)) }) .await; diff --git a/crates/buzz-acp/src/pool.rs b/crates/buzz-acp/src/pool.rs index 86b7292b4..56a98b5f8 100644 --- a/crates/buzz-acp/src/pool.rs +++ b/crates/buzz-acp/src/pool.rs @@ -400,9 +400,27 @@ async fn create_session_and_apply_model( agent: &mut OwnedAgent, ctx: &PromptContext, ) -> Result { + // Combine base_prompt + system_prompt into a single systemPrompt value + // when the agent supports it. This moves system-level content from + // user messages into the proper system role via session/new. + let combined_system_prompt: Option = if agent.acp.supports_system_prompt { + match (ctx.base_prompt, ctx.system_prompt.as_deref()) { + (Some(bp), Some(sp)) => Some(format!("[Base]\n{}\n\n[System]\n{sp}", bp.trim_end())), + (Some(bp), None) => Some(format!("[Base]\n{}", bp.trim_end())), + (None, Some(sp)) => Some(format!("[System]\n{sp}")), + (None, None) => None, + } + } else { + None + }; + let resp = agent .acp - .session_new_full(&ctx.cwd, ctx.mcp_servers.clone()) + .session_new_full( + &ctx.cwd, + ctx.mcp_servers.clone(), + combined_system_prompt.as_deref(), + ) .await?; // Populate model capabilities on first session creation. @@ -811,10 +829,16 @@ pub async fn run_prompt_task( target: "pool::session", "sending initial_message to session {session_id} for channel {cid}" ); - // Prepend base prompt to initial_message for platform orientation. - let init_msg = match ctx.base_prompt { - Some(bp) => prepend_base_prompt(bp, initial_msg), - None => initial_msg.to_string(), + // Prepend base prompt to initial_message for platform orientation, + // but only when the agent doesn't support systemPrompt (in which + // case [Base] was already passed via session/new). + let init_msg = if agent.acp.supports_system_prompt { + initial_msg.to_string() + } else { + match ctx.base_prompt { + Some(bp) => prepend_base_prompt(bp, initial_msg), + None => initial_msg.to_string(), + } }; let init_result = agent .acp @@ -973,6 +997,7 @@ pub async fn run_prompt_task( channel_info: channel_info.as_ref(), conversation_context: conversation_context.as_ref(), profile_lookup: profile_lookup.as_ref(), + system_prompt_via_session: agent.acp.supports_system_prompt, }, ) } else { diff --git a/crates/buzz-acp/src/queue.rs b/crates/buzz-acp/src/queue.rs index ee776f830..46ce0cdf8 100644 --- a/crates/buzz-acp/src/queue.rs +++ b/crates/buzz-acp/src/queue.rs @@ -1015,6 +1015,9 @@ pub struct FormatPromptArgs<'a> { pub channel_info: Option<&'a PromptChannelInfo>, pub conversation_context: Option<&'a ConversationContext>, pub profile_lookup: Option<&'a PromptProfileLookup>, + /// When true, `[Base]` and `[System]` sections are omitted from the prompt + /// because they were already passed via `session/new`'s `systemPrompt` field. + pub system_prompt_via_session: bool, } /// Prepend the `[Base]` platform-context section to a prompt body. @@ -1055,13 +1058,19 @@ pub fn format_prompt(batch: &FlushBatch, args: &FormatPromptArgs<'_>) -> String let mut sections: Vec = Vec::with_capacity(7); // 0. Base prompt (platform-level, always first). - if let Some(bp) = args.base_prompt { - sections.push(format!("[Base]\n{}", bp.trim_end())); + // Skipped when systemPrompt was passed via session/new. + if !args.system_prompt_via_session { + if let Some(bp) = args.base_prompt { + sections.push(format!("[Base]\n{}", bp.trim_end())); + } } // 1. System prompt. - if let Some(sp) = args.system_prompt { - sections.push(format!("[System]\n{sp}")); + // Skipped when systemPrompt was passed via session/new. + if !args.system_prompt_via_session { + if let Some(sp) = args.system_prompt { + sections.push(format!("[System]\n{sp}")); + } } // 1b. NIP-AE agent core memory (rendered by `engram_fetch::build_core_section`). @@ -1651,6 +1660,70 @@ mod tests { "[Context] must come before [Thread Context]" ); } + // ── Test 11d: system_prompt_via_session skips [Base] and [System] ───────── + + #[test] + fn test_format_prompt_system_prompt_via_session_skips_base_and_system() { + let ch = Uuid::new_v4(); + let event = make_event("hello"); + + let batch = FlushBatch { + channel_id: ch, + events: vec![BatchEvent { + event, + prompt_tag: "test".into(), + received_at: Instant::now(), + }], + cancelled_events: vec![], + }; + + // With system_prompt_via_session=true, [Base] and [System] are omitted. + let prompt = format_prompt( + &batch, + &FormatPromptArgs { + base_prompt: Some("Platform base."), + system_prompt: Some("Role prompt."), + system_prompt_via_session: true, + ..Default::default() + }, + ); + assert!(!prompt.contains("[Base]"), "should not contain [Base]"); + assert!(!prompt.contains("[System]"), "should not contain [System]"); + assert!( + prompt.starts_with("[Context]"), + "should start with [Context]" + ); + } + + #[test] + fn test_format_prompt_system_prompt_via_session_false_includes_base_and_system() { + let ch = Uuid::new_v4(); + let event = make_event("hello"); + + let batch = FlushBatch { + channel_id: ch, + events: vec![BatchEvent { + event, + prompt_tag: "test".into(), + received_at: Instant::now(), + }], + cancelled_events: vec![], + }; + + // With system_prompt_via_session=false (default), [Base] and [System] are included. + let prompt = format_prompt( + &batch, + &FormatPromptArgs { + base_prompt: Some("Platform base."), + system_prompt: Some("Role prompt."), + system_prompt_via_session: false, + ..Default::default() + }, + ); + assert!(prompt.contains("[Base]\nPlatform base.")); + assert!(prompt.contains("[System]\nRole prompt.")); + } + // ── Test 12: drop mode discards in-flight channel events ───────────────── #[test] diff --git a/crates/buzz-agent/src/lib.rs b/crates/buzz-agent/src/lib.rs index 442447c64..c809503c9 100644 --- a/crates/buzz-agent/src/lib.rs +++ b/crates/buzz-agent/src/lib.rs @@ -224,7 +224,7 @@ async fn initialize(id: Value, params: Value, wire_tx: &WireSender) { "protocolVersion": PROTOCOL_VERSION, "agentCapabilities": { "loadSession": false, - "promptCapabilities": { "image": false, "audio": false, "embeddedContext": false }, + "promptCapabilities": { "image": false, "audio": false, "embeddedContext": false, "systemPrompt": true }, "mcpCapabilities": { "http": false, "sse": false }, }, "agentInfo": { "name": "buzz-agent", "version": env!("CARGO_PKG_VERSION") }, @@ -261,15 +261,40 @@ async fn session_new(app: &Arc, id: Value, params: Value, wire_tx: &WireSen .await; } } - let effective_system_prompt: Arc = if app.cfg.hints_enabled { - let hints = hints::build_hints_section(std::path::Path::new(&p.cwd)); - if hints.is_empty() { - Arc::from(app.cfg.system_prompt.as_str()) + let effective_system_prompt: Arc = { + let mut prompt = if app.cfg.hints_enabled { + let hints = hints::build_hints_section(std::path::Path::new(&p.cwd)); + if hints.is_empty() { + app.cfg.system_prompt.clone() + } else { + format!("{}\n\n{}", app.cfg.system_prompt, hints) + } } else { - Arc::from(format!("{}\n\n{}", app.cfg.system_prompt, hints)) + app.cfg.system_prompt.clone() + }; + // Append client-provided systemPrompt (additive semantics per ACP spec). + if let Some(ref client_prompt) = p.system_prompt { + if !client_prompt.is_empty() { + prompt.push_str("\n\n"); + prompt.push_str(client_prompt); + } + } + // Reject combined prompts exceeding 512KB. + const MAX_SYSTEM_PROMPT_BYTES: usize = 512 * 1024; + if prompt.len() > MAX_SYSTEM_PROMPT_BYTES { + return reject( + wire_tx, + id, + INVALID_PARAMS, + &format!( + "session/new: combined system prompt exceeds {}KB limit ({} bytes)", + MAX_SYSTEM_PROMPT_BYTES / 1024, + prompt.len() + ), + ) + .await; } - } else { - Arc::from(app.cfg.system_prompt.as_str()) + Arc::from(prompt.as_str()) }; let mcp = match McpRegistry::spawn_all(&app.cfg, &p.mcp_servers, &p.cwd).await { Ok(m) => Arc::new(m), diff --git a/crates/buzz-agent/src/wire.rs b/crates/buzz-agent/src/wire.rs index ba69eb1d9..57ce841f5 100644 --- a/crates/buzz-agent/src/wire.rs +++ b/crates/buzz-agent/src/wire.rs @@ -49,6 +49,8 @@ pub struct SessionNewParams { pub cwd: String, #[serde(default)] pub mcp_servers: Vec, + #[serde(default)] + pub system_prompt: Option, } #[derive(Debug, Deserialize)] @@ -178,3 +180,47 @@ pub async fn writer_task(mut rx: mpsc::Receiver) { let _ = stdout.flush().await; } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn session_new_params_deserializes_system_prompt() { + let json = serde_json::json!({ + "cwd": "/tmp/test", + "mcpServers": [], + "systemPrompt": "You are a helpful agent." + }); + let params: SessionNewParams = serde_json::from_value(json).unwrap(); + assert_eq!(params.cwd, "/tmp/test"); + assert_eq!( + params.system_prompt.as_deref(), + Some("You are a helpful agent.") + ); + } + + #[test] + fn session_new_params_system_prompt_defaults_to_none() { + let json = serde_json::json!({ + "cwd": "/tmp/test", + "mcpServers": [] + }); + let params: SessionNewParams = serde_json::from_value(json).unwrap(); + assert_eq!(params.cwd, "/tmp/test"); + assert!(params.system_prompt.is_none()); + } + + #[test] + fn session_new_params_ignores_unknown_fields() { + // Backward compat: old agents with new harness — unknown fields are ignored. + let json = serde_json::json!({ + "cwd": "/tmp/test", + "mcpServers": [], + "unknownField": "should be ignored" + }); + let params: SessionNewParams = serde_json::from_value(json).unwrap(); + assert_eq!(params.cwd, "/tmp/test"); + assert!(params.system_prompt.is_none()); + } +} From 96e6332c9ae7062073a490c6f3f3ada009c39249 Mon Sep 17 00:00:00 2001 From: Will Pfleger Date: Thu, 11 Jun 2026 15:00:07 -0400 Subject: [PATCH 02/13] test(acp): verify session_new_full systemPrompt serialization Add three tests covering the session_new_full systemPrompt wire behavior: - Includes systemPrompt in params when agent advertises capability - Omits systemPrompt when agent does not advertise capability - Omits systemPrompt when value is None (even with capability) Uses bash echo-back pattern: the test script echoes the received request in the response so assertions can inspect the exact JSON sent. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/acp.rs | 96 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/crates/buzz-acp/src/acp.rs b/crates/buzz-acp/src/acp.rs index 6d878eed1..ac307c819 100644 --- a/crates/buzz-acp/src/acp.rs +++ b/crates/buzz-acp/src/acp.rs @@ -2103,4 +2103,100 @@ mod tests { "should default to false when no agentCapabilities" ); } + + // ── session_new_full systemPrompt serialization ────────────────────── + + #[tokio::test] + async fn session_new_full_includes_system_prompt_when_supported() { + // Script: respond to initialize (with capability), then echo back + // the session/new request params in the response so we can inspect them. + let script = r#" + read -t 2 _init + echo '{"jsonrpc":"2.0","id":0,"result":{"protocolVersion":1,"agentCapabilities":{"promptCapabilities":{"systemPrompt":true}}}}' + read -t 2 REQ + echo '{"jsonrpc":"2.0","id":1,"result":{"sessionId":"ses_test","_receivedRequest":'"$REQ"'}}' + sleep 1 + "#; + let mut client = spawn_script(script).await; + client + .initialize() + .await + .expect("initialize should succeed"); + assert!(client.supports_system_prompt); + + let resp = client + .session_new_full("/tmp", vec![], Some("Custom system prompt")) + .await + .expect("session_new_full should succeed"); + + assert_eq!(resp.session_id, "ses_test"); + // The script echoed the full request back in _receivedRequest. + // Verify the params contain systemPrompt. + let received = &resp.raw["_receivedRequest"]; + assert_eq!( + received["params"]["systemPrompt"].as_str(), + Some("Custom system prompt"), + "systemPrompt should be included in params when agent supports it" + ); + } + + #[tokio::test] + async fn session_new_full_omits_system_prompt_when_not_supported() { + // Script: respond to initialize WITHOUT capability, then echo back request. + let script = r#" + read -t 2 _init + echo '{"jsonrpc":"2.0","id":0,"result":{"protocolVersion":1,"agentCapabilities":{"promptCapabilities":{"image":false}}}}' + read -t 2 REQ + echo '{"jsonrpc":"2.0","id":1,"result":{"sessionId":"ses_test","_receivedRequest":'"$REQ"'}}' + sleep 1 + "#; + let mut client = spawn_script(script).await; + client + .initialize() + .await + .expect("initialize should succeed"); + assert!(!client.supports_system_prompt); + + let resp = client + .session_new_full("/tmp", vec![], Some("Custom system prompt")) + .await + .expect("session_new_full should succeed"); + + assert_eq!(resp.session_id, "ses_test"); + let received = &resp.raw["_receivedRequest"]; + assert!( + received["params"]["systemPrompt"].is_null(), + "systemPrompt should NOT be in params when agent doesn't support it" + ); + } + + #[tokio::test] + async fn session_new_full_omits_system_prompt_when_none() { + // Even when agent supports it, None means no field sent. + let script = r#" + read -t 2 _init + echo '{"jsonrpc":"2.0","id":0,"result":{"protocolVersion":1,"agentCapabilities":{"promptCapabilities":{"systemPrompt":true}}}}' + read -t 2 REQ + echo '{"jsonrpc":"2.0","id":1,"result":{"sessionId":"ses_test","_receivedRequest":'"$REQ"'}}' + sleep 1 + "#; + let mut client = spawn_script(script).await; + client + .initialize() + .await + .expect("initialize should succeed"); + assert!(client.supports_system_prompt); + + let resp = client + .session_new_full("/tmp", vec![], None) + .await + .expect("session_new_full should succeed"); + + assert_eq!(resp.session_id, "ses_test"); + let received = &resp.raw["_receivedRequest"]; + assert!( + received["params"]["systemPrompt"].is_null(), + "systemPrompt should NOT be in params when value is None" + ); + } } From e530ca71f0480bb040b908cb5abbd70fa541ba93 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Thu, 11 Jun 2026 15:15:21 -0400 Subject: [PATCH 03/13] fix(acp): address review feedback on systemPrompt implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Strip [Base]/[System] labels from the systemPrompt wire value — the protocol carries raw content, not harness-specific formatting. Add TODO explaining why agent_core stays in user messages (resolved per-channel after session creation). Move MAX_SYSTEM_PROMPT_BYTES to config.rs alongside other limit constants. New tests: 512KB size gate rejection (integration), empty-string systemPrompt deserialization (unit). Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/pool.rs | 6 ++--- crates/buzz-acp/src/queue.rs | 3 +++ crates/buzz-agent/src/config.rs | 1 + crates/buzz-agent/src/lib.rs | 3 +-- crates/buzz-agent/src/wire.rs | 12 ++++++++++ crates/buzz-agent/tests/fake_llm.rs | 34 +++++++++++++++++++++++++++++ 6 files changed, 54 insertions(+), 5 deletions(-) diff --git a/crates/buzz-acp/src/pool.rs b/crates/buzz-acp/src/pool.rs index 56a98b5f8..ee1b9dbe6 100644 --- a/crates/buzz-acp/src/pool.rs +++ b/crates/buzz-acp/src/pool.rs @@ -405,9 +405,9 @@ async fn create_session_and_apply_model( // user messages into the proper system role via session/new. let combined_system_prompt: Option = if agent.acp.supports_system_prompt { match (ctx.base_prompt, ctx.system_prompt.as_deref()) { - (Some(bp), Some(sp)) => Some(format!("[Base]\n{}\n\n[System]\n{sp}", bp.trim_end())), - (Some(bp), None) => Some(format!("[Base]\n{}", bp.trim_end())), - (None, Some(sp)) => Some(format!("[System]\n{sp}")), + (Some(bp), Some(sp)) => Some(format!("{}\n\n{sp}", bp.trim_end())), + (Some(bp), None) => Some(bp.trim_end().to_string()), + (None, Some(sp)) => Some(sp.to_string()), (None, None) => None, } } else { diff --git a/crates/buzz-acp/src/queue.rs b/crates/buzz-acp/src/queue.rs index 46ce0cdf8..77ae93eef 100644 --- a/crates/buzz-acp/src/queue.rs +++ b/crates/buzz-acp/src/queue.rs @@ -1074,6 +1074,9 @@ pub fn format_prompt(batch: &FlushBatch, args: &FormatPromptArgs<'_>) -> String } // 1b. NIP-AE agent core memory (rendered by `engram_fetch::build_core_section`). + // agent_core is always in user messages because it is resolved per-channel + // after session creation. A future session/update mechanism could move it + // to the system role. if let Some(core) = args.agent_core { sections.push(core.to_string()); } diff --git a/crates/buzz-agent/src/config.rs b/crates/buzz-agent/src/config.rs index f11eb33a5..fa230bfde 100644 --- a/crates/buzz-agent/src/config.rs +++ b/crates/buzz-agent/src/config.rs @@ -3,6 +3,7 @@ use std::time::Duration; pub const PROTOCOL_VERSION: u32 = 1; pub const MAX_PROMPT_BYTES: usize = 1024 * 1024; +pub const MAX_SYSTEM_PROMPT_BYTES: usize = 512 * 1024; pub const MAX_TOOL_RESULT_BYTES: usize = 8 * 1024 * 1024; pub const MAX_TOOL_CALLS_PER_TURN: usize = 64; diff --git a/crates/buzz-agent/src/lib.rs b/crates/buzz-agent/src/lib.rs index c809503c9..84c036c65 100644 --- a/crates/buzz-agent/src/lib.rs +++ b/crates/buzz-agent/src/lib.rs @@ -18,7 +18,7 @@ use tokio::io::BufReader; use tokio::sync::{mpsc, watch, Mutex}; use crate::agent::RunCtx; -use crate::config::{Config, PROTOCOL_VERSION}; +use crate::config::{Config, MAX_SYSTEM_PROMPT_BYTES, PROTOCOL_VERSION}; use crate::llm::Llm; use crate::mcp::McpRegistry; use crate::types::HistoryItem; @@ -280,7 +280,6 @@ async fn session_new(app: &Arc, id: Value, params: Value, wire_tx: &WireSen } } // Reject combined prompts exceeding 512KB. - const MAX_SYSTEM_PROMPT_BYTES: usize = 512 * 1024; if prompt.len() > MAX_SYSTEM_PROMPT_BYTES { return reject( wire_tx, diff --git a/crates/buzz-agent/src/wire.rs b/crates/buzz-agent/src/wire.rs index 57ce841f5..6250d0e63 100644 --- a/crates/buzz-agent/src/wire.rs +++ b/crates/buzz-agent/src/wire.rs @@ -223,4 +223,16 @@ mod tests { assert_eq!(params.cwd, "/tmp/test"); assert!(params.system_prompt.is_none()); } + + #[test] + fn session_new_params_empty_string_system_prompt() { + // An explicit empty string is distinct from absent — deserializes to Some(""). + let json = serde_json::json!({ + "cwd": "/tmp/test", + "mcpServers": [], + "systemPrompt": "" + }); + let params: SessionNewParams = serde_json::from_value(json).unwrap(); + assert_eq!(params.system_prompt, Some(String::new())); + } } diff --git a/crates/buzz-agent/tests/fake_llm.rs b/crates/buzz-agent/tests/fake_llm.rs index 740288819..30577512a 100644 --- a/crates/buzz-agent/tests/fake_llm.rs +++ b/crates/buzz-agent/tests/fake_llm.rs @@ -335,3 +335,37 @@ async fn rejects_oversized_line() { .await .expect("agent didn't exit after oversized line"); } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn session_new_rejects_oversized_system_prompt() { + // A systemPrompt exceeding 512KB must produce a JSON-RPC error, not a panic. + let url = spawn_fake_llm(vec![]).await; + let mut h = Harness::spawn(&url).await; + h.send( + "initialize", + json!({"protocolVersion":1,"clientCapabilities":{}}), + ) + .await; + let r = h.recv().await; + assert_eq!(r["result"]["protocolVersion"], 1); + + // 600KB payload — exceeds the 512KB limit. + let big_prompt = "x".repeat(600 * 1024); + let id = h + .send( + "session/new", + json!({"cwd":"/tmp","mcpServers":[],"systemPrompt": big_prompt}), + ) + .await; + let r = h.recv_until(|v| v["id"] == json!(id)).await; + assert!( + r.get("error").is_some(), + "expected JSON-RPC error for oversized systemPrompt, got: {r}" + ); + let err_msg = r["error"]["message"].as_str().unwrap_or(""); + assert!( + err_msg.contains("512KB limit"), + "error message should mention 512KB limit, got: {err_msg}" + ); + h.shutdown().await; +} From 753ad22e17e5b1f1528cb575786c3d4a50c6e3e0 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Thu, 11 Jun 2026 15:31:38 -0400 Subject: [PATCH 04/13] test(agent): E2E tests proving systemPrompt reaches LLM system role Add request-capturing fake LLM server variant and two integration tests that prove the full contract end-to-end without any live LLM dependency: - system_prompt_reaches_llm_system_role: sends systemPrompt via session/new, triggers a prompt, inspects the captured LLM request body to verify the canary string appears in the system role message with correct additive ordering (agent default prompt before client prompt). - system_prompt_absent_no_canary: same flow without systemPrompt field, verifies the canary does NOT appear in the system message while the agent's default prompt is still present. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-agent/tests/fake_llm.rs | 220 ++++++++++++++++++++++++++++ 1 file changed, 220 insertions(+) diff --git a/crates/buzz-agent/tests/fake_llm.rs b/crates/buzz-agent/tests/fake_llm.rs index 30577512a..1a98f5438 100644 --- a/crates/buzz-agent/tests/fake_llm.rs +++ b/crates/buzz-agent/tests/fake_llm.rs @@ -59,6 +59,91 @@ async fn spawn_fake_llm(responses: Vec) -> String { url } +// ─── Request-capturing fake LLM server ────────────────────────────────────── + +/// Like `spawn_fake_llm` but also captures the full JSON request body from each +/// incoming HTTP request. Returns (url, captured_requests). +async fn spawn_capturing_fake_llm( + responses: Vec, +) -> (String, Arc>>) { + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let url = format!("http://{}", listener.local_addr().unwrap()); + let queue = Arc::new(Mutex::new(VecDeque::from(responses))); + let captures: Arc>> = Arc::new(Mutex::new(Vec::new())); + let captures_clone = captures.clone(); + tokio::spawn(async move { + loop { + let (mut sock, _) = match listener.accept().await { + Ok(p) => p, + Err(_) => return, + }; + let queue = queue.clone(); + let captures = captures_clone.clone(); + tokio::spawn(async move { + // Read headers. + let mut buf = Vec::new(); + let mut tmp = [0u8; 4096]; + while !buf.windows(4).any(|w| w == b"\r\n\r\n") { + match sock.read(&mut tmp).await { + Ok(0) | Err(_) => return, + Ok(n) => buf.extend_from_slice(&tmp[..n]), + } + if buf.len() > 2_000_000 { + return; + } + } + // Parse Content-Length from headers to read the body. + let header_end = buf + .windows(4) + .position(|w| w == b"\r\n\r\n") + .unwrap() + + 4; + let header_str = String::from_utf8_lossy(&buf[..header_end]); + let content_length: usize = header_str + .lines() + .find_map(|line| { + let lower = line.to_lowercase(); + if lower.starts_with("content-length:") { + lower.trim_start_matches("content-length:").trim().parse().ok() + } else { + None + } + }) + .unwrap_or(0); + + // Collect body bytes (some may already be in buf after headers). + let mut body_buf = buf[header_end..].to_vec(); + while body_buf.len() < content_length { + match sock.read(&mut tmp).await { + Ok(0) | Err(_) => break, + Ok(n) => body_buf.extend_from_slice(&tmp[..n]), + } + } + + // Parse and store the request body. + if let Ok(parsed) = serde_json::from_slice::(&body_buf[..content_length.min(body_buf.len())]) { + captures.lock().await.push(parsed); + } + + // Send canned response. + let body = queue + .lock() + .await + .pop_front() + .unwrap_or_else(|| json!({ "error": "no canned response" })); + let body_s = serde_json::to_string(&body).unwrap(); + let resp = format!( + "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}", + body_s.len(), body_s, + ); + let _ = sock.write_all(resp.as_bytes()).await; + let _ = sock.shutdown().await; + }); + } + }); + (url, captures) +} + // ─── ACP harness ──────────────────────────────────────────────────────────── struct Harness { @@ -369,3 +454,138 @@ async fn session_new_rejects_oversized_system_prompt() { ); h.shutdown().await; } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn system_prompt_reaches_llm_system_role() { + // Proves the full contract: systemPrompt sent via session/new → agent appends + // it to the effective system prompt → LLM receives it in the system role. + let canary = "CANARY_E2E_TEST_MARKER_7f3a9b"; + let (url, captures) = spawn_capturing_fake_llm(vec![openai_text("done")]).await; + let mut h = Harness::spawn(&url).await; + + // initialize — verify the agent advertises systemPrompt capability. + h.send( + "initialize", + json!({"protocolVersion":1,"clientCapabilities":{}}), + ) + .await; + let r = h.recv().await; + assert_eq!(r["result"]["protocolVersion"], 1); + assert_eq!( + r["result"]["agentCapabilities"]["promptCapabilities"]["systemPrompt"], + json!(true), + "agent must advertise systemPrompt capability" + ); + + // session/new with systemPrompt containing the canary. + let sn_id = h + .send( + "session/new", + json!({"cwd":"/tmp","mcpServers":[],"systemPrompt": canary}), + ) + .await; + let r = h.recv_until(|v| v["id"] == json!(sn_id)).await; + let sid = r["result"]["sessionId"].as_str().unwrap().to_owned(); + assert!(sid.starts_with("ses_")); + + // session/prompt — triggers the LLM call. + let p_id = h + .send( + "session/prompt", + json!({ + "sessionId": sid, + "prompt": [{"type":"text","text":"hello"}], + }), + ) + .await; + let _ = h.recv_until(|v| v["id"] == json!(p_id)).await; + + // Inspect the captured LLM request. + let reqs = captures.lock().await; + assert!(!reqs.is_empty(), "expected at least one LLM request"); + let llm_req = &reqs[0]; + let messages = llm_req["messages"].as_array().expect("messages array"); + + // First message should be the system role. + let system_msg = &messages[0]; + assert_eq!(system_msg["role"], "system", "first message must be system role"); + let system_content = system_msg["content"].as_str().unwrap_or(""); + + // Canary must appear in the system message (proves systemPrompt was appended). + assert!( + system_content.contains(canary), + "system message must contain the canary string.\nGot: {system_content}" + ); + + // The agent's default prompt must appear BEFORE the canary (additive ordering). + let default_prompt = "You are buzz-agent"; + let default_pos = system_content.find(default_prompt).expect( + "system message must contain the agent's default prompt" + ); + let canary_pos = system_content.find(canary).unwrap(); + assert!( + default_pos < canary_pos, + "default prompt must appear before canary (additive append ordering)" + ); + + h.shutdown().await; +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn system_prompt_absent_no_canary() { + // Negative case: when systemPrompt is NOT sent in session/new, the canary + // must NOT appear in the LLM system message. + let canary = "CANARY_E2E_TEST_MARKER_7f3a9b"; + let (url, captures) = spawn_capturing_fake_llm(vec![openai_text("done")]).await; + let mut h = Harness::spawn(&url).await; + + // initialize. + h.send( + "initialize", + json!({"protocolVersion":1,"clientCapabilities":{}}), + ) + .await; + let _ = h.recv().await; + + // session/new WITHOUT systemPrompt field. + let sn_id = h + .send("session/new", json!({"cwd":"/tmp","mcpServers":[]})) + .await; + let r = h.recv_until(|v| v["id"] == json!(sn_id)).await; + let sid = r["result"]["sessionId"].as_str().unwrap().to_owned(); + + // session/prompt — triggers the LLM call. + let p_id = h + .send( + "session/prompt", + json!({ + "sessionId": sid, + "prompt": [{"type":"text","text":"hello"}], + }), + ) + .await; + let _ = h.recv_until(|v| v["id"] == json!(p_id)).await; + + // Inspect the captured LLM request. + let reqs = captures.lock().await; + assert!(!reqs.is_empty(), "expected at least one LLM request"); + let llm_req = &reqs[0]; + let messages = llm_req["messages"].as_array().expect("messages array"); + let system_msg = &messages[0]; + assert_eq!(system_msg["role"], "system"); + let system_content = system_msg["content"].as_str().unwrap_or(""); + + // Canary must NOT appear (it was never sent). + assert!( + !system_content.contains(canary), + "system message must NOT contain canary when systemPrompt is absent.\nGot: {system_content}" + ); + + // But the agent's default prompt should still be there. + assert!( + system_content.contains("You are buzz-agent"), + "system message must still contain the agent's default prompt" + ); + + h.shutdown().await; +} From 980af8ce0cedbd03cc071c1e529133f1b9211641 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Thu, 11 Jun 2026 15:34:41 -0400 Subject: [PATCH 05/13] style: apply cargo fmt to fake_llm.rs Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-agent/tests/fake_llm.rs | 31 ++++++++++++++++------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/crates/buzz-agent/tests/fake_llm.rs b/crates/buzz-agent/tests/fake_llm.rs index 1a98f5438..25e4c7a5c 100644 --- a/crates/buzz-agent/tests/fake_llm.rs +++ b/crates/buzz-agent/tests/fake_llm.rs @@ -63,9 +63,7 @@ async fn spawn_fake_llm(responses: Vec) -> String { /// Like `spawn_fake_llm` but also captures the full JSON request body from each /// incoming HTTP request. Returns (url, captured_requests). -async fn spawn_capturing_fake_llm( - responses: Vec, -) -> (String, Arc>>) { +async fn spawn_capturing_fake_llm(responses: Vec) -> (String, Arc>>) { let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let url = format!("http://{}", listener.local_addr().unwrap()); let queue = Arc::new(Mutex::new(VecDeque::from(responses))); @@ -93,18 +91,18 @@ async fn spawn_capturing_fake_llm( } } // Parse Content-Length from headers to read the body. - let header_end = buf - .windows(4) - .position(|w| w == b"\r\n\r\n") - .unwrap() - + 4; + let header_end = buf.windows(4).position(|w| w == b"\r\n\r\n").unwrap() + 4; let header_str = String::from_utf8_lossy(&buf[..header_end]); let content_length: usize = header_str .lines() .find_map(|line| { let lower = line.to_lowercase(); if lower.starts_with("content-length:") { - lower.trim_start_matches("content-length:").trim().parse().ok() + lower + .trim_start_matches("content-length:") + .trim() + .parse() + .ok() } else { None } @@ -121,7 +119,9 @@ async fn spawn_capturing_fake_llm( } // Parse and store the request body. - if let Ok(parsed) = serde_json::from_slice::(&body_buf[..content_length.min(body_buf.len())]) { + if let Ok(parsed) = + serde_json::from_slice::(&body_buf[..content_length.min(body_buf.len())]) + { captures.lock().await.push(parsed); } @@ -508,7 +508,10 @@ async fn system_prompt_reaches_llm_system_role() { // First message should be the system role. let system_msg = &messages[0]; - assert_eq!(system_msg["role"], "system", "first message must be system role"); + assert_eq!( + system_msg["role"], "system", + "first message must be system role" + ); let system_content = system_msg["content"].as_str().unwrap_or(""); // Canary must appear in the system message (proves systemPrompt was appended). @@ -519,9 +522,9 @@ async fn system_prompt_reaches_llm_system_role() { // The agent's default prompt must appear BEFORE the canary (additive ordering). let default_prompt = "You are buzz-agent"; - let default_pos = system_content.find(default_prompt).expect( - "system message must contain the agent's default prompt" - ); + let default_pos = system_content + .find(default_prompt) + .expect("system message must contain the agent's default prompt"); let canary_pos = system_content.find(canary).unwrap(); assert!( default_pos < canary_pos, From 44a7fa6b4342fbf1340d63b5cfd9ad1dce2204bb Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Thu, 11 Jun 2026 16:16:56 -0400 Subject: [PATCH 06/13] refactor(acp): remove speculative capability flag, always pass systemPrompt The RFD explicitly states 'No separate capability flag is needed' and recommends protocol-version-based detection. Remove the supports_system_prompt field and system_prompt_via_session flag that gated sending systemPrompt on a promptCapabilities boolean. The harness now always passes systemPrompt in session/new when it has system content. Agents that support the field use it; others ignore unknown fields per JSON-RPC. The [Base]/[System] user-message injection continues unchanged for backward compatibility. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/acp.rs | 126 +++------------------------- crates/buzz-acp/src/pool.rs | 26 ++---- crates/buzz-acp/src/queue.rs | 80 +----------------- crates/buzz-agent/src/lib.rs | 2 +- crates/buzz-agent/tests/fake_llm.rs | 7 +- 5 files changed, 24 insertions(+), 217 deletions(-) diff --git a/crates/buzz-acp/src/acp.rs b/crates/buzz-acp/src/acp.rs index ac307c819..e6babe83b 100644 --- a/crates/buzz-acp/src/acp.rs +++ b/crates/buzz-acp/src/acp.rs @@ -148,9 +148,6 @@ pub struct AcpClient { observer_agent_index: Option, /// Best-effort context attached to raw ACP wire events. observer_context: ObserverContext, - /// Whether the agent advertises `systemPrompt` support in its - /// `promptCapabilities`. Set after `initialize()` returns. - pub supports_system_prompt: bool, } impl AcpClient { @@ -242,7 +239,6 @@ impl AcpClient { observer: None, observer_agent_index: None, observer_context: ObserverContext::default(), - supports_system_prompt: false, }) } @@ -285,19 +281,14 @@ impl AcpClient { let result = self.send_request("initialize", params).await?; tracing::debug!(target: "acp::init", "initialize response: {result}"); - // Parse promptCapabilities.systemPrompt from agent's init response. - self.supports_system_prompt = result - .pointer("/agentCapabilities/promptCapabilities/systemPrompt") - .and_then(|v| v.as_bool()) - .unwrap_or(false); - Ok(result) } /// Send `session/new` and return the full response alongside the session ID. /// /// `cwd` must be an absolute path. `mcp_servers` may be empty. - /// `system_prompt` is passed to the agent when `supports_system_prompt` is true. + /// `system_prompt` is included in the request when `Some` — agents that + /// support the field will use it; others ignore unknown fields per JSON-RPC. /// Callers use [`extract_model_config_options`] and [`extract_model_state`] /// to pull model info from the raw result. pub async fn session_new_full( @@ -310,11 +301,8 @@ impl AcpClient { "cwd": cwd, "mcpServers": mcp_servers, }); - // Only include systemPrompt when the agent advertises support. - if self.supports_system_prompt { - if let Some(sp) = system_prompt { - params["systemPrompt"] = serde_json::Value::String(sp.to_owned()); - } + if let Some(sp) = system_prompt { + params["systemPrompt"] = serde_json::Value::String(sp.to_owned()); } let result = self.send_request("session/new", params).await?; let session_id = result["sessionId"] @@ -2045,74 +2033,14 @@ mod tests { ); } - // ── systemPrompt capability parsing ────────────────────────────────── - - #[tokio::test] - async fn initialize_parses_system_prompt_capability() { - // Agent responds with systemPrompt: true in promptCapabilities. - let script = r#" - read -t 2 _init - echo '{"jsonrpc":"2.0","id":0,"result":{"protocolVersion":1,"agentCapabilities":{"promptCapabilities":{"systemPrompt":true}}}}' - sleep 1 - "#; - let mut client = spawn_script(script).await; - let _result = client - .initialize() - .await - .expect("initialize should succeed"); - assert!( - client.supports_system_prompt, - "should detect systemPrompt capability" - ); - } - - #[tokio::test] - async fn initialize_defaults_system_prompt_false_when_absent() { - // Agent responds without systemPrompt in promptCapabilities. - let script = r#" - read -t 2 _init - echo '{"jsonrpc":"2.0","id":0,"result":{"protocolVersion":1,"agentCapabilities":{"promptCapabilities":{"image":false}}}}' - sleep 1 - "#; - let mut client = spawn_script(script).await; - let _result = client - .initialize() - .await - .expect("initialize should succeed"); - assert!( - !client.supports_system_prompt, - "should default to false when systemPrompt absent" - ); - } - - #[tokio::test] - async fn initialize_defaults_system_prompt_false_when_no_capabilities() { - // Agent responds with no agentCapabilities at all. - let script = r#" - read -t 2 _init - echo '{"jsonrpc":"2.0","id":0,"result":{"protocolVersion":1}}' - sleep 1 - "#; - let mut client = spawn_script(script).await; - let _result = client - .initialize() - .await - .expect("initialize should succeed"); - assert!( - !client.supports_system_prompt, - "should default to false when no agentCapabilities" - ); - } - // ── session_new_full systemPrompt serialization ────────────────────── #[tokio::test] - async fn session_new_full_includes_system_prompt_when_supported() { - // Script: respond to initialize (with capability), then echo back - // the session/new request params in the response so we can inspect them. + async fn session_new_full_includes_system_prompt_when_some() { + // Script: respond to initialize, then echo back the session/new request. let script = r#" read -t 2 _init - echo '{"jsonrpc":"2.0","id":0,"result":{"protocolVersion":1,"agentCapabilities":{"promptCapabilities":{"systemPrompt":true}}}}' + echo '{"jsonrpc":"2.0","id":0,"result":{"protocolVersion":1,"agentCapabilities":{}}}' read -t 2 REQ echo '{"jsonrpc":"2.0","id":1,"result":{"sessionId":"ses_test","_receivedRequest":'"$REQ"'}}' sleep 1 @@ -2122,7 +2050,6 @@ mod tests { .initialize() .await .expect("initialize should succeed"); - assert!(client.supports_system_prompt); let resp = client .session_new_full("/tmp", vec![], Some("Custom system prompt")) @@ -2130,52 +2057,20 @@ mod tests { .expect("session_new_full should succeed"); assert_eq!(resp.session_id, "ses_test"); - // The script echoed the full request back in _receivedRequest. - // Verify the params contain systemPrompt. let received = &resp.raw["_receivedRequest"]; assert_eq!( received["params"]["systemPrompt"].as_str(), Some("Custom system prompt"), - "systemPrompt should be included in params when agent supports it" - ); - } - - #[tokio::test] - async fn session_new_full_omits_system_prompt_when_not_supported() { - // Script: respond to initialize WITHOUT capability, then echo back request. - let script = r#" - read -t 2 _init - echo '{"jsonrpc":"2.0","id":0,"result":{"protocolVersion":1,"agentCapabilities":{"promptCapabilities":{"image":false}}}}' - read -t 2 REQ - echo '{"jsonrpc":"2.0","id":1,"result":{"sessionId":"ses_test","_receivedRequest":'"$REQ"'}}' - sleep 1 - "#; - let mut client = spawn_script(script).await; - client - .initialize() - .await - .expect("initialize should succeed"); - assert!(!client.supports_system_prompt); - - let resp = client - .session_new_full("/tmp", vec![], Some("Custom system prompt")) - .await - .expect("session_new_full should succeed"); - - assert_eq!(resp.session_id, "ses_test"); - let received = &resp.raw["_receivedRequest"]; - assert!( - received["params"]["systemPrompt"].is_null(), - "systemPrompt should NOT be in params when agent doesn't support it" + "systemPrompt should be included in params when Some" ); } #[tokio::test] async fn session_new_full_omits_system_prompt_when_none() { - // Even when agent supports it, None means no field sent. + // When system_prompt is None, the field should not appear in params. let script = r#" read -t 2 _init - echo '{"jsonrpc":"2.0","id":0,"result":{"protocolVersion":1,"agentCapabilities":{"promptCapabilities":{"systemPrompt":true}}}}' + echo '{"jsonrpc":"2.0","id":0,"result":{"protocolVersion":1,"agentCapabilities":{}}}' read -t 2 REQ echo '{"jsonrpc":"2.0","id":1,"result":{"sessionId":"ses_test","_receivedRequest":'"$REQ"'}}' sleep 1 @@ -2185,7 +2080,6 @@ mod tests { .initialize() .await .expect("initialize should succeed"); - assert!(client.supports_system_prompt); let resp = client .session_new_full("/tmp", vec![], None) diff --git a/crates/buzz-acp/src/pool.rs b/crates/buzz-acp/src/pool.rs index ee1b9dbe6..71fff4e78 100644 --- a/crates/buzz-acp/src/pool.rs +++ b/crates/buzz-acp/src/pool.rs @@ -401,18 +401,15 @@ async fn create_session_and_apply_model( ctx: &PromptContext, ) -> Result { // Combine base_prompt + system_prompt into a single systemPrompt value - // when the agent supports it. This moves system-level content from - // user messages into the proper system role via session/new. - let combined_system_prompt: Option = if agent.acp.supports_system_prompt { + // for the session/new request. Agents that support the field will use it + // for the system role; others ignore unknown fields per JSON-RPC. + let combined_system_prompt: Option = match (ctx.base_prompt, ctx.system_prompt.as_deref()) { (Some(bp), Some(sp)) => Some(format!("{}\n\n{sp}", bp.trim_end())), (Some(bp), None) => Some(bp.trim_end().to_string()), (None, Some(sp)) => Some(sp.to_string()), (None, None) => None, - } - } else { - None - }; + }; let resp = agent .acp @@ -829,16 +826,10 @@ pub async fn run_prompt_task( target: "pool::session", "sending initial_message to session {session_id} for channel {cid}" ); - // Prepend base prompt to initial_message for platform orientation, - // but only when the agent doesn't support systemPrompt (in which - // case [Base] was already passed via session/new). - let init_msg = if agent.acp.supports_system_prompt { - initial_msg.to_string() - } else { - match ctx.base_prompt { - Some(bp) => prepend_base_prompt(bp, initial_msg), - None => initial_msg.to_string(), - } + // Prepend base prompt to initial_message for platform orientation. + let init_msg = match ctx.base_prompt { + Some(bp) => prepend_base_prompt(bp, initial_msg), + None => initial_msg.to_string(), }; let init_result = agent .acp @@ -997,7 +988,6 @@ pub async fn run_prompt_task( channel_info: channel_info.as_ref(), conversation_context: conversation_context.as_ref(), profile_lookup: profile_lookup.as_ref(), - system_prompt_via_session: agent.acp.supports_system_prompt, }, ) } else { diff --git a/crates/buzz-acp/src/queue.rs b/crates/buzz-acp/src/queue.rs index 77ae93eef..a544f7936 100644 --- a/crates/buzz-acp/src/queue.rs +++ b/crates/buzz-acp/src/queue.rs @@ -1015,9 +1015,6 @@ pub struct FormatPromptArgs<'a> { pub channel_info: Option<&'a PromptChannelInfo>, pub conversation_context: Option<&'a ConversationContext>, pub profile_lookup: Option<&'a PromptProfileLookup>, - /// When true, `[Base]` and `[System]` sections are omitted from the prompt - /// because they were already passed via `session/new`'s `systemPrompt` field. - pub system_prompt_via_session: bool, } /// Prepend the `[Base]` platform-context section to a prompt body. @@ -1058,19 +1055,13 @@ pub fn format_prompt(batch: &FlushBatch, args: &FormatPromptArgs<'_>) -> String let mut sections: Vec = Vec::with_capacity(7); // 0. Base prompt (platform-level, always first). - // Skipped when systemPrompt was passed via session/new. - if !args.system_prompt_via_session { - if let Some(bp) = args.base_prompt { - sections.push(format!("[Base]\n{}", bp.trim_end())); - } + if let Some(bp) = args.base_prompt { + sections.push(format!("[Base]\n{}", bp.trim_end())); } // 1. System prompt. - // Skipped when systemPrompt was passed via session/new. - if !args.system_prompt_via_session { - if let Some(sp) = args.system_prompt { - sections.push(format!("[System]\n{sp}")); - } + if let Some(sp) = args.system_prompt { + sections.push(format!("[System]\n{sp}")); } // 1b. NIP-AE agent core memory (rendered by `engram_fetch::build_core_section`). @@ -1663,69 +1654,6 @@ mod tests { "[Context] must come before [Thread Context]" ); } - // ── Test 11d: system_prompt_via_session skips [Base] and [System] ───────── - - #[test] - fn test_format_prompt_system_prompt_via_session_skips_base_and_system() { - let ch = Uuid::new_v4(); - let event = make_event("hello"); - - let batch = FlushBatch { - channel_id: ch, - events: vec![BatchEvent { - event, - prompt_tag: "test".into(), - received_at: Instant::now(), - }], - cancelled_events: vec![], - }; - - // With system_prompt_via_session=true, [Base] and [System] are omitted. - let prompt = format_prompt( - &batch, - &FormatPromptArgs { - base_prompt: Some("Platform base."), - system_prompt: Some("Role prompt."), - system_prompt_via_session: true, - ..Default::default() - }, - ); - assert!(!prompt.contains("[Base]"), "should not contain [Base]"); - assert!(!prompt.contains("[System]"), "should not contain [System]"); - assert!( - prompt.starts_with("[Context]"), - "should start with [Context]" - ); - } - - #[test] - fn test_format_prompt_system_prompt_via_session_false_includes_base_and_system() { - let ch = Uuid::new_v4(); - let event = make_event("hello"); - - let batch = FlushBatch { - channel_id: ch, - events: vec![BatchEvent { - event, - prompt_tag: "test".into(), - received_at: Instant::now(), - }], - cancelled_events: vec![], - }; - - // With system_prompt_via_session=false (default), [Base] and [System] are included. - let prompt = format_prompt( - &batch, - &FormatPromptArgs { - base_prompt: Some("Platform base."), - system_prompt: Some("Role prompt."), - system_prompt_via_session: false, - ..Default::default() - }, - ); - assert!(prompt.contains("[Base]\nPlatform base.")); - assert!(prompt.contains("[System]\nRole prompt.")); - } // ── Test 12: drop mode discards in-flight channel events ───────────────── diff --git a/crates/buzz-agent/src/lib.rs b/crates/buzz-agent/src/lib.rs index 84c036c65..0580df2c1 100644 --- a/crates/buzz-agent/src/lib.rs +++ b/crates/buzz-agent/src/lib.rs @@ -224,7 +224,7 @@ async fn initialize(id: Value, params: Value, wire_tx: &WireSender) { "protocolVersion": PROTOCOL_VERSION, "agentCapabilities": { "loadSession": false, - "promptCapabilities": { "image": false, "audio": false, "embeddedContext": false, "systemPrompt": true }, + "promptCapabilities": { "image": false, "audio": false, "embeddedContext": false }, "mcpCapabilities": { "http": false, "sse": false }, }, "agentInfo": { "name": "buzz-agent", "version": env!("CARGO_PKG_VERSION") }, diff --git a/crates/buzz-agent/tests/fake_llm.rs b/crates/buzz-agent/tests/fake_llm.rs index 25e4c7a5c..1d7f2711f 100644 --- a/crates/buzz-agent/tests/fake_llm.rs +++ b/crates/buzz-agent/tests/fake_llm.rs @@ -463,7 +463,7 @@ async fn system_prompt_reaches_llm_system_role() { let (url, captures) = spawn_capturing_fake_llm(vec![openai_text("done")]).await; let mut h = Harness::spawn(&url).await; - // initialize — verify the agent advertises systemPrompt capability. + // initialize. h.send( "initialize", json!({"protocolVersion":1,"clientCapabilities":{}}), @@ -471,11 +471,6 @@ async fn system_prompt_reaches_llm_system_role() { .await; let r = h.recv().await; assert_eq!(r["result"]["protocolVersion"], 1); - assert_eq!( - r["result"]["agentCapabilities"]["promptCapabilities"]["systemPrompt"], - json!(true), - "agent must advertise systemPrompt capability" - ); // session/new with systemPrompt containing the canary. let sn_id = h From aa13640a992be5defd3df3bd29b0eb21024445ac Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Thu, 11 Jun 2026 18:26:26 -0400 Subject: [PATCH 07/13] fix(acp): remove duplicated system prompt from user message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR 981 correctly routes base_prompt and system_prompt into the LLM system role via session/new. However the legacy injection path (format_prompt prepending [Base] and [System] to the user message) was not removed, causing all system-prompt content to appear twice — once in the system role and once in the user message. Remove the [Base] and [System] sections from format_prompt(), remove prepend_base_prompt() calls from heartbeat and initial_message paths, and delete the now-unused prepend_base_prompt function and struct fields. The user message now contains only per-turn dynamic content: Agent Memory, Context, Thread Context, and Events. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/lib.rs | 8 +-- crates/buzz-acp/src/pool.rs | 15 ++--- crates/buzz-acp/src/queue.rs | 109 ++++++++++++----------------------- 3 files changed, 46 insertions(+), 86 deletions(-) diff --git a/crates/buzz-acp/src/lib.rs b/crates/buzz-acp/src/lib.rs index a7b7b34c5..079c86dda 100644 --- a/crates/buzz-acp/src/lib.rs +++ b/crates/buzz-acp/src/lib.rs @@ -31,7 +31,7 @@ use pool::{ AgentPool, CancelMode, OwnedAgent, PromptContext, PromptOutcome, PromptResult, PromptSource, SessionState, }; -use queue::{prepend_base_prompt, EventQueue, QueuedEvent, ThreadTags}; +use queue::{EventQueue, QueuedEvent, ThreadTags}; use relay::{HarnessRelay, RelayEventPublisher}; use tokio::sync::{mpsc, watch}; use tracing_subscriber::EnvFilter; @@ -2350,10 +2350,8 @@ fn dispatch_heartbeat( .heartbeat_prompt .clone() .unwrap_or_else(default_heartbeat_prompt); - let prompt_text = match ctx.base_prompt { - Some(bp) => prepend_base_prompt(bp, &prompt_text), - None => prompt_text, - }; + // base_prompt is delivered via system role in session/new — no need + // to prepend it to the heartbeat user message. let result_tx = pool.result_tx(); let ctx_clone = Arc::clone(ctx); let agent_index = agent.index; diff --git a/crates/buzz-acp/src/pool.rs b/crates/buzz-acp/src/pool.rs index 71fff4e78..9a1d2a8f6 100644 --- a/crates/buzz-acp/src/pool.rs +++ b/crates/buzz-acp/src/pool.rs @@ -35,8 +35,8 @@ use crate::acp::{ use crate::config::{DedupMode, PermissionMode}; use crate::observer; use crate::queue::{ - prepend_base_prompt, ContextMessage, ConversationContext, FlushBatch, PromptChannelInfo, - PromptProfile, PromptProfileLookup, + ContextMessage, ConversationContext, FlushBatch, PromptChannelInfo, PromptProfile, + PromptProfileLookup, }; use crate::relay::{ChannelInfo, RestClient}; @@ -826,11 +826,9 @@ pub async fn run_prompt_task( target: "pool::session", "sending initial_message to session {session_id} for channel {cid}" ); - // Prepend base prompt to initial_message for platform orientation. - let init_msg = match ctx.base_prompt { - Some(bp) => prepend_base_prompt(bp, initial_msg), - None => initial_msg.to_string(), - }; + // base_prompt is delivered via system role in session/new — no need + // to prepend it to the user message. + let init_msg = initial_msg.to_string(); let init_result = agent .acp .session_prompt_with_idle_timeout( @@ -982,12 +980,11 @@ pub async fn run_prompt_task( crate::queue::format_prompt( b, &crate::queue::FormatPromptArgs { - base_prompt: ctx.base_prompt, - system_prompt: ctx.system_prompt.as_deref(), agent_core: agent_core_section.as_deref(), channel_info: channel_info.as_ref(), conversation_context: conversation_context.as_ref(), profile_lookup: profile_lookup.as_ref(), + ..Default::default() }, ) } else { diff --git a/crates/buzz-acp/src/queue.rs b/crates/buzz-acp/src/queue.rs index a544f7936..ce9f0a66c 100644 --- a/crates/buzz-acp/src/queue.rs +++ b/crates/buzz-acp/src/queue.rs @@ -1009,31 +1009,22 @@ fn format_conversation_context( /// Arguments for [`format_prompt`] beyond the required [`FlushBatch`]. #[derive(Default)] pub struct FormatPromptArgs<'a> { - pub base_prompt: Option<&'a str>, - pub system_prompt: Option<&'a str>, pub agent_core: Option<&'a str>, pub channel_info: Option<&'a PromptChannelInfo>, pub conversation_context: Option<&'a ConversationContext>, pub profile_lookup: Option<&'a PromptProfileLookup>, } -/// Prepend the `[Base]` platform-context section to a prompt body. -/// -/// Used by the heartbeat and initial-message paths so the `[Base]` format -/// is defined in exactly one place. (`format_prompt` uses a sections-vec -/// approach instead, but the resulting `[Base]\n{content}` format is identical.) -pub fn prepend_base_prompt(base: &str, body: &str) -> String { - format!("[Base]\n{}\n\n{body}", base.trim_end()) -} - /// Format a [`FlushBatch`] into a prompt string for the agent. /// /// Produces a stable prompt with these sections (in order): -/// 0. `[Base]\n{base_prompt}` — platform orientation (if configured) -/// 1. `[System]\n{system_prompt}` — if system prompt is set -/// 2. `[Context]` — scope, channel name, and contextual hints for the agent -/// 3. `[Thread Context]` or `[Conversation Context]` — if fetched -/// 4. `[Event]` / `[Buzz events]` — the triggering event(s) +/// 0. `[Agent Memory — core]` — if agent core memory is set +/// 1. `[Context]` — scope, channel name, and contextual hints for the agent +/// 2. `[Thread Context]` or `[Conversation Context]` — if fetched +/// 3. `[Event]` / `[Buzz events]` — the triggering event(s) +/// +/// Note: `base_prompt` and `system_prompt` are delivered via the system role +/// in `session/new` and are NOT included in this user message. pub fn format_prompt(batch: &FlushBatch, args: &FormatPromptArgs<'_>) -> String { // Scope is always derived from the LAST event in the batch — that's the // one the agent is responding to. Thread/DM context is supplementary info @@ -1054,15 +1045,8 @@ pub fn format_prompt(batch: &FlushBatch, args: &FormatPromptArgs<'_>) -> String let mut sections: Vec = Vec::with_capacity(7); - // 0. Base prompt (platform-level, always first). - if let Some(bp) = args.base_prompt { - sections.push(format!("[Base]\n{}", bp.trim_end())); - } - - // 1. System prompt. - if let Some(sp) = args.system_prompt { - sections.push(format!("[System]\n{sp}")); - } + // NOTE: base_prompt and system_prompt are no longer emitted here — they are + // delivered via the system role in session/new (see create_session_and_apply_model). // 1b. NIP-AE agent core memory (rendered by `engram_fetch::build_core_section`). // agent_core is always in user messages because it is resolved per-channel @@ -1477,10 +1461,10 @@ mod tests { assert!(prompt.contains("Content: third message")); } - // ── Test 11: system prompt prepended ───────────────────────────────────── + // ── Test 11: system prompt NOT in user message (delivered via system role) ── #[test] - fn test_format_prompt_with_system_prompt() { + fn test_format_prompt_no_system_prompt_in_user_message() { let ch = Uuid::new_v4(); let event = make_event("hello"); @@ -1494,17 +1478,15 @@ mod tests { cancelled_events: vec![], }; - let prompt = format_prompt( - &batch, - &FormatPromptArgs { - system_prompt: Some("You are a triage bot."), - ..Default::default() - }, - ); - assert!(prompt.starts_with("[System]\nYou are a triage bot.\n\n[Context]")); + let prompt = format_prompt(&batch, &FormatPromptArgs::default()); + // system_prompt and base_prompt are delivered via session/new system role, + // so they must NOT appear in the user message. + assert!(!prompt.contains("[System]")); + assert!(!prompt.contains("[Base]")); + assert!(prompt.starts_with("[Context]")); } - // ── Test 11b: agent_core section is injected after [System] ────────────── + // ── Test 11b: agent_core section is first in user message ────────────── #[test] fn test_format_prompt_with_agent_core() { @@ -1523,14 +1505,13 @@ mod tests { let prompt = format_prompt( &batch, &FormatPromptArgs { - system_prompt: Some("sys"), agent_core: Some(core), ..Default::default() }, ); assert!( - prompt.contains("[System]\nsys\n\n[Agent Memory — core]\nbe helpful"), - "expected core block after [System]; got: {prompt}" + prompt.starts_with("[Agent Memory — core]\nbe helpful\n\n[Context]"), + "expected core block first, then [Context]; got: {prompt}" ); } @@ -1558,10 +1539,10 @@ mod tests { assert!(prompt.starts_with("[Agent Memory — core]\nbe helpful\n\n[Context]")); } - // ── Test 11c: base prompt prepended before system prompt ───────────────── + // ── Test 11c: base_prompt and system_prompt NOT in user message ──────────── #[test] - fn test_format_prompt_with_base_prompt() { + fn test_format_prompt_no_base_or_system_sections() { let ch = Uuid::new_v4(); let event = make_event("hello"); @@ -1575,35 +1556,16 @@ mod tests { cancelled_events: vec![], }; - // Both base_prompt and system_prompt: [Base] comes first, then [System]. - let prompt = format_prompt( - &batch, - &FormatPromptArgs { - base_prompt: Some("Platform base."), - system_prompt: Some("Role prompt."), - ..Default::default() - }, - ); - assert!(prompt.starts_with("[Base]\nPlatform base.\n\n[System]\nRole prompt.")); - - // Only base_prompt (no system_prompt): [Base] comes first, then [Context]. - let prompt = format_prompt( - &batch, - &FormatPromptArgs { - base_prompt: Some("Platform base."), - ..Default::default() - }, - ); - assert!(prompt.starts_with("[Base]\nPlatform base.\n\n[Context]")); - - // No base_prompt: no [Base] section emitted. + // format_prompt no longer accepts or emits base_prompt/system_prompt. + // They are delivered via session/new system role instead. let prompt = format_prompt(&batch, &FormatPromptArgs::default()); assert!(!prompt.contains("[Base]")); + assert!(!prompt.contains("[System]")); assert!(prompt.starts_with("[Context]")); } #[test] - fn test_format_prompt_base_prompt_ordering_with_full_context() { + fn test_format_prompt_ordering_with_full_context() { let ch = Uuid::new_v4(); let event = make_event("hello"); let batch = FlushBatch { @@ -1626,33 +1588,36 @@ mod tests { truncated: false, }; + let core = "[Agent Memory — core]\nbe helpful"; let prompt = format_prompt( &batch, &FormatPromptArgs { - base_prompt: Some("Platform base."), - system_prompt: Some("Role prompt."), + agent_core: Some(core), conversation_context: Some(&ctx), ..Default::default() }, ); - // Verify section ordering: [Base] < [System] < [Context] < [Thread Context] - let base_pos = prompt.find("[Base]").expect("[Base] missing"); - let system_pos = prompt.find("[System]").expect("[System] missing"); + // Verify section ordering: [Agent Memory] < [Context] < [Thread Context] + let core_pos = prompt + .find("[Agent Memory") + .expect("[Agent Memory] missing"); let context_pos = prompt.find("[Context]").expect("[Context] missing"); let thread_pos = prompt .find("[Thread Context") .expect("[Thread Context] missing"); - assert!(base_pos < system_pos, "[Base] must come before [System]"); assert!( - system_pos < context_pos, - "[System] must come before [Context]" + core_pos < context_pos, + "[Agent Memory] must come before [Context]" ); assert!( context_pos < thread_pos, "[Context] must come before [Thread Context]" ); + // No [Base] or [System] in user message + assert!(!prompt.contains("[Base]")); + assert!(!prompt.contains("[System]")); } // ── Test 12: drop mode discards in-flight channel events ───────────────── From 2c4b1c8751d282c65e1443d8fc5652764516478f Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Thu, 11 Jun 2026 19:05:58 -0400 Subject: [PATCH 08/13] =?UTF-8?q?fix(agent):=20reorder=20system=20prompt?= =?UTF-8?q?=20=E2=80=94=20client=5Fprompt=20first,=20suppress=20default?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the harness provides a systemPrompt via session/new, use it as the base content and suppress the DEFAULT_SYSTEM_PROMPT entirely. Hints are appended last as supplementary reference material. Previous ordering placed the useless default ("You are buzz-agent...") first, then hints, then the substantive client_prompt last. This buried the agent's core identity (base_prompt + persona) under noise. New behavior: - systemPrompt present: client_prompt + hints (no default) - systemPrompt absent: DEFAULT_SYSTEM_PROMPT + hints (legacy fallback) Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-agent/src/lib.rs | 30 ++++++++++++++--------------- crates/buzz-agent/tests/fake_llm.rs | 13 +++++-------- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/crates/buzz-agent/src/lib.rs b/crates/buzz-agent/src/lib.rs index 0580df2c1..dca2f037f 100644 --- a/crates/buzz-agent/src/lib.rs +++ b/crates/buzz-agent/src/lib.rs @@ -262,23 +262,23 @@ async fn session_new(app: &Arc, id: Value, params: Value, wire_tx: &WireSen } } let effective_system_prompt: Arc = { - let mut prompt = if app.cfg.hints_enabled { - let hints = hints::build_hints_section(std::path::Path::new(&p.cwd)); - if hints.is_empty() { - app.cfg.system_prompt.clone() - } else { - format!("{}\n\n{}", app.cfg.system_prompt, hints) - } + let hints = if app.cfg.hints_enabled { + hints::build_hints_section(std::path::Path::new(&p.cwd)) } else { - app.cfg.system_prompt.clone() + String::new() + }; + // When the harness provides a systemPrompt (base_prompt + persona), use + // it as the primary content and suppress the default. The default is only + // a fallback for legacy harnesses that don't send systemPrompt. + let base = match p.system_prompt.as_deref() { + Some(client_prompt) if !client_prompt.is_empty() => client_prompt.to_owned(), + _ => app.cfg.system_prompt.clone(), + }; + let prompt = if hints.is_empty() { + base + } else { + format!("{base}\n\n{hints}") }; - // Append client-provided systemPrompt (additive semantics per ACP spec). - if let Some(ref client_prompt) = p.system_prompt { - if !client_prompt.is_empty() { - prompt.push_str("\n\n"); - prompt.push_str(client_prompt); - } - } // Reject combined prompts exceeding 512KB. if prompt.len() > MAX_SYSTEM_PROMPT_BYTES { return reject( diff --git a/crates/buzz-agent/tests/fake_llm.rs b/crates/buzz-agent/tests/fake_llm.rs index 1d7f2711f..c63e69fb1 100644 --- a/crates/buzz-agent/tests/fake_llm.rs +++ b/crates/buzz-agent/tests/fake_llm.rs @@ -509,21 +509,18 @@ async fn system_prompt_reaches_llm_system_role() { ); let system_content = system_msg["content"].as_str().unwrap_or(""); - // Canary must appear in the system message (proves systemPrompt was appended). + // Canary must appear in the system message (proves systemPrompt was used as base). assert!( system_content.contains(canary), "system message must contain the canary string.\nGot: {system_content}" ); - // The agent's default prompt must appear BEFORE the canary (additive ordering). + // The agent's default prompt must NOT appear — it is suppressed when + // the harness provides a systemPrompt. let default_prompt = "You are buzz-agent"; - let default_pos = system_content - .find(default_prompt) - .expect("system message must contain the agent's default prompt"); - let canary_pos = system_content.find(canary).unwrap(); assert!( - default_pos < canary_pos, - "default prompt must appear before canary (additive append ordering)" + !system_content.contains(default_prompt), + "system message must NOT contain the default prompt when systemPrompt is provided.\nGot: {system_content}" ); h.shutdown().await; From fa0a46f4fce39e5cd34ecba351d29ae4d6e91569 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Thu, 11 Jun 2026 19:26:36 -0400 Subject: [PATCH 09/13] feat(acp): gate systemPrompt on protocol version >= 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement protocol version-based capability detection so the harness only sends systemPrompt in session/new and suppresses [Base]/[System] from user messages when the agent declares protocol version >= 2. Legacy agents (goose, claude-code, codex) report version 1 and continue receiving base_prompt and system_prompt via [Base]/[System] sections in the user message — no behavior change for them. Changes: - buzz-agent: bump PROTOCOL_VERSION to 2 - buzz-acp: parse protocolVersion from initialize response, store on OwnedAgent, gate systemPrompt delivery and user-message injection - queue.rs: restore [Base]/[System] emission gated on has_system_prompt_support flag (false for legacy agents) Also applies Thufir's review fixes: - Whitespace guard: trim() before is_empty() check on client_prompt - Arc::from(prompt) instead of Arc::from(prompt.as_str()) Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/lib.rs | 17 ++- crates/buzz-acp/src/pool.rs | 23 ++- crates/buzz-acp/src/queue.rs | 132 ++++++++++++++++-- crates/buzz-agent/src/config.rs | 2 +- crates/buzz-agent/src/lib.rs | 4 +- crates/buzz-agent/tests/fake_llm.rs | 6 +- crates/buzz-agent/tests/golden_transcripts.rs | 8 +- 7 files changed, 160 insertions(+), 32 deletions(-) diff --git a/crates/buzz-acp/src/lib.rs b/crates/buzz-acp/src/lib.rs index 079c86dda..92b44b5c6 100644 --- a/crates/buzz-acp/src/lib.rs +++ b/crates/buzz-acp/src/lib.rs @@ -695,7 +695,7 @@ fn any_respawn_in_flight(crash_history: &[SlotCircuit]) -> bool { /// Result of a background respawn task. struct RespawnResult { index: usize, - result: Result, + result: Result<(AcpClient, u32)>, } /// RAII guard that ensures a `RespawnResult` is sent even if the task panics. @@ -719,7 +719,7 @@ impl RespawnGuard { /// Send the result and disarm the guard. Uses `try_send` (sync) so there /// is no await boundary between marking `sent` and actually enqueueing — /// cancellation cannot slip between the two. - fn send(mut self, result: Result) { + fn send(mut self, result: Result<(AcpClient, u32)>) { // Invariant: try_send succeeds because the channel capacity equals the // slot count, and respawn_in_flight guarantees at most one outstanding // result per slot. If this ever fails, the channel sizing or the @@ -839,6 +839,8 @@ async fn tokio_main() -> Result<()> { match tokio::time::timeout(Duration::from_secs(60), acp.initialize()).await { Ok(Ok(init_result)) => { tracing::info!(agent = i, "agent initialized: {init_result}"); + let protocol_version = + init_result["protocolVersion"].as_u64().unwrap_or(1) as u32; acp.observe( "agent_initialized", serde_json::json!({ @@ -852,6 +854,7 @@ async fn tokio_main() -> Result<()> { state: SessionState::default(), model_capabilities: None, desired_model: config.model.clone(), + protocol_version, })); } Ok(Err(e)) => { @@ -1279,13 +1282,14 @@ async fn tokio_main() -> Result<()> { while let Ok(rr) = respawn_rx.try_recv() { crash_history[rr.index].respawn_in_flight = false; match rr.result { - Ok(acp) => { + Ok((acp, protocol_version)) => { let agent = OwnedAgent { index: rr.index, acp, state: SessionState::default(), model_capabilities: None, desired_model: config.model.clone(), + protocol_version, }; pool.return_agent(agent); tracing::info!(agent = rr.index, "respawn complete"); @@ -1832,7 +1836,7 @@ async fn tokio_main() -> Result<()> { // Drain any respawn results that completed before the abort. Explicitly // shut down returned agents instead of relying on AcpClient::Drop. while let Ok(rr) = respawn_rx.try_recv() { - if let Ok(mut acp) = rr.result { + if let Ok((mut acp, _)) = rr.result { acp.shutdown().await; tracing::debug!(agent = rr.index, "reaped respawned agent on shutdown"); } @@ -2465,7 +2469,7 @@ async fn spawn_and_init( extra_env: &[(String, String)], agent_index: usize, observer: Option, -) -> Result { +) -> Result<(AcpClient, u32)> { let mut acp = AcpClient::spawn(command, args, extra_env) .await .map_err(|e| anyhow::anyhow!("failed to spawn agent: {e}"))?; @@ -2474,6 +2478,7 @@ async fn spawn_and_init( match acp.initialize().await { Ok(init_result) => { tracing::info!("agent initialized: {init_result}"); + let protocol_version = init_result["protocolVersion"].as_u64().unwrap_or(1) as u32; acp.observe( "agent_initialized", serde_json::json!({ @@ -2481,7 +2486,7 @@ async fn spawn_and_init( "initializeResult": init_result, }), ); - Ok(acp) + Ok((acp, protocol_version)) } Err(e) => { // Explicitly shut down the spawned child to prevent zombie/leak. diff --git a/crates/buzz-acp/src/pool.rs b/crates/buzz-acp/src/pool.rs index 9a1d2a8f6..25afd2caf 100644 --- a/crates/buzz-acp/src/pool.rs +++ b/crates/buzz-acp/src/pool.rs @@ -128,6 +128,9 @@ pub struct OwnedAgent { pub model_capabilities: Option, /// Desired model ID (from `Config.model`). Applied after every `session_new_full()`. pub desired_model: Option, + /// Protocol version reported by the agent in its initialize response. + /// Agents declaring >= 2 support `systemPrompt` in session/new. + pub protocol_version: u32, } /// Pool of agents with take-and-return ownership semantics. @@ -401,15 +404,18 @@ async fn create_session_and_apply_model( ctx: &PromptContext, ) -> Result { // Combine base_prompt + system_prompt into a single systemPrompt value - // for the session/new request. Agents that support the field will use it - // for the system role; others ignore unknown fields per JSON-RPC. - let combined_system_prompt: Option = + // for the session/new request. Only sent when the agent declares protocol + // version >= 2 (supports systemPrompt); legacy agents ignore it. + let combined_system_prompt: Option = if agent.protocol_version >= 2 { match (ctx.base_prompt, ctx.system_prompt.as_deref()) { (Some(bp), Some(sp)) => Some(format!("{}\n\n{sp}", bp.trim_end())), (Some(bp), None) => Some(bp.trim_end().to_string()), (None, Some(sp)) => Some(sp.to_string()), (None, None) => None, - }; + } + } else { + None + }; let resp = agent .acp @@ -826,8 +832,9 @@ pub async fn run_prompt_task( target: "pool::session", "sending initial_message to session {session_id} for channel {cid}" ); - // base_prompt is delivered via system role in session/new — no need - // to prepend it to the user message. + // For agents with systemPrompt support (protocol_version >= 2), + // base_prompt is delivered via the system role in session/new. + // Legacy agents receive it via [Base] in the user message instead. let init_msg = initial_msg.to_string(); let init_result = agent .acp @@ -984,7 +991,9 @@ pub async fn run_prompt_task( channel_info: channel_info.as_ref(), conversation_context: conversation_context.as_ref(), profile_lookup: profile_lookup.as_ref(), - ..Default::default() + has_system_prompt_support: agent.protocol_version >= 2, + base_prompt: ctx.base_prompt, + system_prompt: ctx.system_prompt.as_deref(), }, ) } else { diff --git a/crates/buzz-acp/src/queue.rs b/crates/buzz-acp/src/queue.rs index ce9f0a66c..2e23cbc7f 100644 --- a/crates/buzz-acp/src/queue.rs +++ b/crates/buzz-acp/src/queue.rs @@ -1013,18 +1013,28 @@ pub struct FormatPromptArgs<'a> { pub channel_info: Option<&'a PromptChannelInfo>, pub conversation_context: Option<&'a ConversationContext>, pub profile_lookup: Option<&'a PromptProfileLookup>, + /// When true, base_prompt and system_prompt are delivered via the system + /// role (session/new) and omitted from the user message. When false + /// (legacy agents), they are injected as `[Base]` and `[System]` sections. + pub has_system_prompt_support: bool, + /// Base prompt content for legacy agents (protocol_version < 2). + pub base_prompt: Option<&'a str>, + /// System prompt content for legacy agents (protocol_version < 2). + pub system_prompt: Option<&'a str>, } /// Format a [`FlushBatch`] into a prompt string for the agent. /// /// Produces a stable prompt with these sections (in order): -/// 0. `[Agent Memory — core]` — if agent core memory is set -/// 1. `[Context]` — scope, channel name, and contextual hints for the agent -/// 2. `[Thread Context]` or `[Conversation Context]` — if fetched -/// 3. `[Event]` / `[Buzz events]` — the triggering event(s) +/// 0. `[Base]` — base prompt (only for legacy agents without systemPrompt support) +/// 1. `[System]` — system prompt (only for legacy agents without systemPrompt support) +/// 2. `[Agent Memory — core]` — if agent core memory is set +/// 3. `[Context]` — scope, channel name, and contextual hints for the agent +/// 4. `[Thread Context]` or `[Conversation Context]` — if fetched +/// 5. `[Event]` / `[Buzz events]` — the triggering event(s) /// -/// Note: `base_prompt` and `system_prompt` are delivered via the system role -/// in `session/new` and are NOT included in this user message. +/// For agents with `protocol_version >= 2`, base_prompt and system_prompt are +/// delivered via the system role in `session/new` and omitted from this message. pub fn format_prompt(batch: &FlushBatch, args: &FormatPromptArgs<'_>) -> String { // Scope is always derived from the LAST event in the batch — that's the // one the agent is responding to. Thread/DM context is supplementary info @@ -1045,10 +1055,19 @@ pub fn format_prompt(batch: &FlushBatch, args: &FormatPromptArgs<'_>) -> String let mut sections: Vec = Vec::with_capacity(7); - // NOTE: base_prompt and system_prompt are no longer emitted here — they are - // delivered via the system role in session/new (see create_session_and_apply_model). + // For legacy agents (protocol_version < 2), inject base_prompt and + // system_prompt as user-message sections. Modern agents receive these + // via the system role in session/new. + if !args.has_system_prompt_support { + if let Some(bp) = args.base_prompt { + sections.push(format!("[Base]\n{}", bp.trim_end())); + } + if let Some(sp) = args.system_prompt { + sections.push(format!("[System]\n{sp}")); + } + } - // 1b. NIP-AE agent core memory (rendered by `engram_fetch::build_core_section`). + // NIP-AE agent core memory (rendered by `engram_fetch::build_core_section`). // agent_core is always in user messages because it is resolved per-channel // after session creation. A future session/update mechanism could move it // to the system role. @@ -1564,6 +1583,101 @@ mod tests { assert!(prompt.starts_with("[Context]")); } + // ── Test 11d: legacy agents receive [Base]/[System] in user message ─────── + + #[test] + fn test_format_prompt_legacy_agent_emits_base_and_system() { + let ch = Uuid::new_v4(); + let event = make_event("hello"); + + let batch = FlushBatch { + channel_id: ch, + events: vec![BatchEvent { + event, + prompt_tag: "test".into(), + received_at: Instant::now(), + }], + cancelled_events: vec![], + }; + + let core = "[Agent Memory — core]\nremember this"; + let prompt = format_prompt( + &batch, + &FormatPromptArgs { + has_system_prompt_support: false, + base_prompt: Some("test base prompt"), + system_prompt: Some("test system prompt"), + agent_core: Some(core), + ..Default::default() + }, + ); + + // Both sections must be present + assert!( + prompt.contains("[Base]\ntest base prompt"), + "missing [Base] section" + ); + assert!( + prompt.contains("[System]\ntest system prompt"), + "missing [System] section" + ); + + // [Base] and [System] must appear BEFORE [Agent Memory] and [Context] + let base_pos = prompt.find("[Base]").unwrap(); + let system_pos = prompt.find("[System]").unwrap(); + let core_pos = prompt.find("[Agent Memory").unwrap(); + let context_pos = prompt.find("[Context]").unwrap(); + + assert!(base_pos < system_pos, "[Base] should come before [System]"); + assert!( + system_pos < core_pos, + "[System] should come before [Agent Memory]" + ); + assert!( + core_pos < context_pos, + "[Agent Memory] should come before [Context]" + ); + } + + // ── Test 11e: modern agents suppress [Base]/[System] from user message ──── + + #[test] + fn test_format_prompt_modern_agent_suppresses_base_and_system() { + let ch = Uuid::new_v4(); + let event = make_event("hello"); + + let batch = FlushBatch { + channel_id: ch, + events: vec![BatchEvent { + event, + prompt_tag: "test".into(), + received_at: Instant::now(), + }], + cancelled_events: vec![], + }; + + let prompt = format_prompt( + &batch, + &FormatPromptArgs { + has_system_prompt_support: true, + base_prompt: Some("test base prompt"), + system_prompt: Some("test system prompt"), + ..Default::default() + }, + ); + + // Neither section should appear — they are delivered via session/new + assert!( + !prompt.contains("[Base]"), + "[Base] should be suppressed for modern agents" + ); + assert!( + !prompt.contains("[System]"), + "[System] should be suppressed for modern agents" + ); + assert!(prompt.starts_with("[Context]")); + } + #[test] fn test_format_prompt_ordering_with_full_context() { let ch = Uuid::new_v4(); diff --git a/crates/buzz-agent/src/config.rs b/crates/buzz-agent/src/config.rs index fa230bfde..caf875d52 100644 --- a/crates/buzz-agent/src/config.rs +++ b/crates/buzz-agent/src/config.rs @@ -1,6 +1,6 @@ use std::time::Duration; -pub const PROTOCOL_VERSION: u32 = 1; +pub const PROTOCOL_VERSION: u32 = 2; pub const MAX_PROMPT_BYTES: usize = 1024 * 1024; pub const MAX_SYSTEM_PROMPT_BYTES: usize = 512 * 1024; diff --git a/crates/buzz-agent/src/lib.rs b/crates/buzz-agent/src/lib.rs index dca2f037f..2bfb2cf07 100644 --- a/crates/buzz-agent/src/lib.rs +++ b/crates/buzz-agent/src/lib.rs @@ -271,7 +271,7 @@ async fn session_new(app: &Arc, id: Value, params: Value, wire_tx: &WireSen // it as the primary content and suppress the default. The default is only // a fallback for legacy harnesses that don't send systemPrompt. let base = match p.system_prompt.as_deref() { - Some(client_prompt) if !client_prompt.is_empty() => client_prompt.to_owned(), + Some(client_prompt) if !client_prompt.trim().is_empty() => client_prompt.to_owned(), _ => app.cfg.system_prompt.clone(), }; let prompt = if hints.is_empty() { @@ -293,7 +293,7 @@ async fn session_new(app: &Arc, id: Value, params: Value, wire_tx: &WireSen ) .await; } - Arc::from(prompt.as_str()) + Arc::from(prompt) }; let mcp = match McpRegistry::spawn_all(&app.cfg, &p.mcp_servers, &p.cwd).await { Ok(m) => Arc::new(m), diff --git a/crates/buzz-agent/tests/fake_llm.rs b/crates/buzz-agent/tests/fake_llm.rs index c63e69fb1..cb3c9f17f 100644 --- a/crates/buzz-agent/tests/fake_llm.rs +++ b/crates/buzz-agent/tests/fake_llm.rs @@ -258,7 +258,7 @@ async fn init_session(h: &mut Harness) -> String { ) .await; let r = h.recv().await; - assert_eq!(r["result"]["protocolVersion"], 1); + assert_eq!(r["result"]["protocolVersion"], 2); assert_eq!(r["result"]["agentInfo"]["name"], "buzz-agent"); h.send("session/new", json!({"cwd":"/tmp","mcpServers":[]})) .await; @@ -432,7 +432,7 @@ async fn session_new_rejects_oversized_system_prompt() { ) .await; let r = h.recv().await; - assert_eq!(r["result"]["protocolVersion"], 1); + assert_eq!(r["result"]["protocolVersion"], 2); // 600KB payload — exceeds the 512KB limit. let big_prompt = "x".repeat(600 * 1024); @@ -470,7 +470,7 @@ async fn system_prompt_reaches_llm_system_role() { ) .await; let r = h.recv().await; - assert_eq!(r["result"]["protocolVersion"], 1); + assert_eq!(r["result"]["protocolVersion"], 2); // session/new with systemPrompt containing the canary. let sn_id = h diff --git a/crates/buzz-agent/tests/golden_transcripts.rs b/crates/buzz-agent/tests/golden_transcripts.rs index e691e4411..9c581f628 100644 --- a/crates/buzz-agent/tests/golden_transcripts.rs +++ b/crates/buzz-agent/tests/golden_transcripts.rs @@ -185,7 +185,7 @@ async fn handshake(h: &mut Harness) -> String { ) .await; let init = h.recv_for_id(init_id).await; - assert_eq!(init["result"]["protocolVersion"], 1); + assert_eq!(init["result"]["protocolVersion"], 2); assert_eq!(init["result"]["agentInfo"]["name"], "buzz-agent"); assert_eq!( init["result"]["agentCapabilities"]["promptCapabilities"]["image"], @@ -296,7 +296,7 @@ async fn test_initialize_version_check() { ) .await; let resp = h.recv_for_id(id).await; - assert_eq!(resp["result"]["protocolVersion"], 1); + assert_eq!(resp["result"]["protocolVersion"], 2); let id2 = h .send( @@ -305,7 +305,7 @@ async fn test_initialize_version_check() { ) .await; let ok = h.recv_for_id(id2).await; - assert_eq!(ok["result"]["protocolVersion"], 1); + assert_eq!(ok["result"]["protocolVersion"], 2); h.shutdown().await; } @@ -371,7 +371,7 @@ async fn test_malformed_json_rpc() { ) .await; let ok = h.recv_for_id(init_id).await; - assert_eq!(ok["result"]["protocolVersion"], 1); + assert_eq!(ok["result"]["protocolVersion"], 2); let bad_id = h.send("nonsense/method", json!({})).await; let v = h.recv_for_id(bad_id).await; From 04578ee7d2cde788e14fa1abb24fdd8fb3feb7e3 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Thu, 11 Jun 2026 20:36:09 -0400 Subject: [PATCH 10/13] fix(acp): restore base_prompt for legacy agents, fix version negotiation Legacy agents (protocol_version < 2) lost their base_prompt on heartbeat and initial_message code paths after the systemPrompt refactor moved base_prompt delivery to session/new. Those paths bypass format_prompt, so legacy agents received no [Base] section. Additionally, the harness dishonestly sent protocolVersion: 1 in initialize while buzz-agent unconditionally responded with 2. Now the harness sends 2 and the agent responds with min(requested, supported) for honest negotiation. Changes: - lib.rs dispatch_heartbeat: gate base_prompt prepend on protocol_version < 2 - pool.rs initial_message: gate base_prompt prepend on protocol_version < 2 - acp.rs initialize: send protocolVersion 2 (what we actually support) - buzz-agent initialize: respond with min(requested, PROTOCOL_VERSION) - Update tests to reflect honest negotiation semantics Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/acp.rs | 6 +++--- crates/buzz-acp/src/lib.rs | 13 +++++++++++-- crates/buzz-acp/src/pool.rs | 10 +++++++++- crates/buzz-agent/src/lib.rs | 6 ++++-- crates/buzz-agent/tests/fake_llm.rs | 8 ++++---- crates/buzz-agent/tests/golden_transcripts.rs | 6 +++--- 6 files changed, 34 insertions(+), 15 deletions(-) diff --git a/crates/buzz-acp/src/acp.rs b/crates/buzz-acp/src/acp.rs index e6babe83b..1aadff6c1 100644 --- a/crates/buzz-acp/src/acp.rs +++ b/crates/buzz-acp/src/acp.rs @@ -271,7 +271,7 @@ impl AcpClient { /// The caller may inspect `agentCapabilities` in the returned value. pub async fn initialize(&mut self) -> Result { let params = serde_json::json!({ - "protocolVersion": 1, + "protocolVersion": 2, "clientCapabilities": {}, "clientInfo": { "name": "buzz-acp", @@ -1402,7 +1402,7 @@ mod tests { "id": 0u64, "method": "initialize", "params": { - "protocolVersion": 1, + "protocolVersion": 2, "clientCapabilities": {}, "clientInfo": { "name": "buzz-acp", @@ -1410,7 +1410,7 @@ mod tests { } } }); - assert_eq!(msg["params"]["protocolVersion"].as_u64(), Some(1)); + assert_eq!(msg["params"]["protocolVersion"].as_u64(), Some(2)); assert_eq!( msg["params"]["clientInfo"]["name"].as_str(), Some("buzz-acp") diff --git a/crates/buzz-acp/src/lib.rs b/crates/buzz-acp/src/lib.rs index 92b44b5c6..eb38c1411 100644 --- a/crates/buzz-acp/src/lib.rs +++ b/crates/buzz-acp/src/lib.rs @@ -2354,8 +2354,17 @@ fn dispatch_heartbeat( .heartbeat_prompt .clone() .unwrap_or_else(default_heartbeat_prompt); - // base_prompt is delivered via system role in session/new — no need - // to prepend it to the heartbeat user message. + // For legacy agents (protocol_version < 2), prepend base_prompt to the + // heartbeat user message since they don't receive it via session/new. + let prompt_text = if agent.protocol_version < 2 { + if let Some(bp) = ctx.base_prompt { + format!("[Base]\n{}\n\n{prompt_text}", bp.trim_end()) + } else { + prompt_text + } + } else { + prompt_text + }; let result_tx = pool.result_tx(); let ctx_clone = Arc::clone(ctx); let agent_index = agent.index; diff --git a/crates/buzz-acp/src/pool.rs b/crates/buzz-acp/src/pool.rs index 25afd2caf..9ffa3ac76 100644 --- a/crates/buzz-acp/src/pool.rs +++ b/crates/buzz-acp/src/pool.rs @@ -835,7 +835,15 @@ pub async fn run_prompt_task( // For agents with systemPrompt support (protocol_version >= 2), // base_prompt is delivered via the system role in session/new. // Legacy agents receive it via [Base] in the user message instead. - let init_msg = initial_msg.to_string(); + let init_msg = if agent.protocol_version < 2 { + if let Some(bp) = ctx.base_prompt { + format!("[Base]\n{}\n\n{}", bp.trim_end(), initial_msg) + } else { + initial_msg.to_string() + } + } else { + initial_msg.to_string() + }; let init_result = agent .acp .session_prompt_with_idle_timeout( diff --git a/crates/buzz-agent/src/lib.rs b/crates/buzz-agent/src/lib.rs index 2bfb2cf07..7a136a699 100644 --- a/crates/buzz-agent/src/lib.rs +++ b/crates/buzz-agent/src/lib.rs @@ -215,13 +215,15 @@ async fn initialize(id: Value, params: Value, wire_tx: &WireSender) { Ok(p) => p, Err(m) => return reject(wire_tx, id, INVALID_PARAMS, &m).await, }; - let _ = p.protocol_version; + // Honest negotiation: respond with the minimum of what the client + // requested and what we support. + let negotiated_version = p.protocol_version.min(PROTOCOL_VERSION); wire::send( wire_tx, wire::ok( id, json!({ - "protocolVersion": PROTOCOL_VERSION, + "protocolVersion": negotiated_version, "agentCapabilities": { "loadSession": false, "promptCapabilities": { "image": false, "audio": false, "embeddedContext": false }, diff --git a/crates/buzz-agent/tests/fake_llm.rs b/crates/buzz-agent/tests/fake_llm.rs index cb3c9f17f..e3e83a6f0 100644 --- a/crates/buzz-agent/tests/fake_llm.rs +++ b/crates/buzz-agent/tests/fake_llm.rs @@ -254,7 +254,7 @@ fn openai_tool_call(id: &str, name: &str, args: Value) -> Value { async fn init_session(h: &mut Harness) -> String { h.send( "initialize", - json!({"protocolVersion":1,"clientCapabilities":{}}), + json!({"protocolVersion":2,"clientCapabilities":{}}), ) .await; let r = h.recv().await; @@ -428,7 +428,7 @@ async fn session_new_rejects_oversized_system_prompt() { let mut h = Harness::spawn(&url).await; h.send( "initialize", - json!({"protocolVersion":1,"clientCapabilities":{}}), + json!({"protocolVersion":2,"clientCapabilities":{}}), ) .await; let r = h.recv().await; @@ -466,7 +466,7 @@ async fn system_prompt_reaches_llm_system_role() { // initialize. h.send( "initialize", - json!({"protocolVersion":1,"clientCapabilities":{}}), + json!({"protocolVersion":2,"clientCapabilities":{}}), ) .await; let r = h.recv().await; @@ -537,7 +537,7 @@ async fn system_prompt_absent_no_canary() { // initialize. h.send( "initialize", - json!({"protocolVersion":1,"clientCapabilities":{}}), + json!({"protocolVersion":2,"clientCapabilities":{}}), ) .await; let _ = h.recv().await; diff --git a/crates/buzz-agent/tests/golden_transcripts.rs b/crates/buzz-agent/tests/golden_transcripts.rs index 9c581f628..1b1926617 100644 --- a/crates/buzz-agent/tests/golden_transcripts.rs +++ b/crates/buzz-agent/tests/golden_transcripts.rs @@ -181,7 +181,7 @@ async fn handshake(h: &mut Harness) -> String { let init_id = h .send( "initialize", - json!({ "protocolVersion": 1, "clientCapabilities": {} }), + json!({ "protocolVersion": 2, "clientCapabilities": {} }), ) .await; let init = h.recv_for_id(init_id).await; @@ -305,7 +305,7 @@ async fn test_initialize_version_check() { ) .await; let ok = h.recv_for_id(id2).await; - assert_eq!(ok["result"]["protocolVersion"], 2); + assert_eq!(ok["result"]["protocolVersion"], 1); h.shutdown().await; } @@ -371,7 +371,7 @@ async fn test_malformed_json_rpc() { ) .await; let ok = h.recv_for_id(init_id).await; - assert_eq!(ok["result"]["protocolVersion"], 2); + assert_eq!(ok["result"]["protocolVersion"], 1); let bad_id = h.send("nonsense/method", json!({})).await; let v = h.recv_for_id(bad_id).await; From d7bd4799bb7660f9dc17467ab75197c948852aa2 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Fri, 12 Jun 2026 11:23:15 -0400 Subject: [PATCH 11/13] refactor(acp): extract base_section helper, de-duplicate [Base] format The [Base] framing was inlined at three dispatch sites (batch flush, heartbeat, initial message) after prepend_base_prompt was deleted, creating drift risk. Re-extract a single base_section helper so the format is defined in exactly one place, and add a direct unit test covering the trim-and-prepend contract that the regression depended on. Also removes a stray blank line in acp.rs::initialize(). Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/acp.rs | 1 - crates/buzz-acp/src/lib.rs | 2 +- crates/buzz-acp/src/pool.rs | 2 +- crates/buzz-acp/src/queue.rs | 21 ++++++++++++++++++++- 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/crates/buzz-acp/src/acp.rs b/crates/buzz-acp/src/acp.rs index 1aadff6c1..0248d1702 100644 --- a/crates/buzz-acp/src/acp.rs +++ b/crates/buzz-acp/src/acp.rs @@ -280,7 +280,6 @@ impl AcpClient { }); let result = self.send_request("initialize", params).await?; tracing::debug!(target: "acp::init", "initialize response: {result}"); - Ok(result) } diff --git a/crates/buzz-acp/src/lib.rs b/crates/buzz-acp/src/lib.rs index eb38c1411..3085c2ef8 100644 --- a/crates/buzz-acp/src/lib.rs +++ b/crates/buzz-acp/src/lib.rs @@ -2358,7 +2358,7 @@ fn dispatch_heartbeat( // heartbeat user message since they don't receive it via session/new. let prompt_text = if agent.protocol_version < 2 { if let Some(bp) = ctx.base_prompt { - format!("[Base]\n{}\n\n{prompt_text}", bp.trim_end()) + format!("{}\n\n{prompt_text}", crate::queue::base_section(bp)) } else { prompt_text } diff --git a/crates/buzz-acp/src/pool.rs b/crates/buzz-acp/src/pool.rs index 9ffa3ac76..9092daf70 100644 --- a/crates/buzz-acp/src/pool.rs +++ b/crates/buzz-acp/src/pool.rs @@ -837,7 +837,7 @@ pub async fn run_prompt_task( // Legacy agents receive it via [Base] in the user message instead. let init_msg = if agent.protocol_version < 2 { if let Some(bp) = ctx.base_prompt { - format!("[Base]\n{}\n\n{}", bp.trim_end(), initial_msg) + format!("{}\n\n{}", crate::queue::base_section(bp), initial_msg) } else { initial_msg.to_string() } diff --git a/crates/buzz-acp/src/queue.rs b/crates/buzz-acp/src/queue.rs index 2e23cbc7f..a906ca39c 100644 --- a/crates/buzz-acp/src/queue.rs +++ b/crates/buzz-acp/src/queue.rs @@ -1023,6 +1023,15 @@ pub struct FormatPromptArgs<'a> { pub system_prompt: Option<&'a str>, } +/// Format the `[Base]` section for the base prompt. +/// +/// Single source of truth for the `[Base]` framing so the format is defined in +/// exactly one place across all dispatch paths (batch flush, heartbeat, +/// initial message). +pub(crate) fn base_section(base_prompt: &str) -> String { + format!("[Base]\n{}", base_prompt.trim_end()) +} + /// Format a [`FlushBatch`] into a prompt string for the agent. /// /// Produces a stable prompt with these sections (in order): @@ -1060,7 +1069,7 @@ pub fn format_prompt(batch: &FlushBatch, args: &FormatPromptArgs<'_>) -> String // via the system role in session/new. if !args.has_system_prompt_support { if let Some(bp) = args.base_prompt { - sections.push(format!("[Base]\n{}", bp.trim_end())); + sections.push(base_section(bp)); } if let Some(sp) = args.system_prompt { sections.push(format!("[System]\n{sp}")); @@ -1206,6 +1215,16 @@ mod tests { !q.in_flight_channels.is_empty() } + #[test] + fn test_base_section_prepends_header_and_trims_trailing_whitespace() { + // Trailing whitespace/newlines are stripped; the [Base] header is + // prepended exactly once with a single newline separator. + assert_eq!(base_section("hello \n\n"), "[Base]\nhello"); + assert_eq!(base_section("hello"), "[Base]\nhello"); + // Internal newlines and leading whitespace are preserved verbatim. + assert_eq!(base_section(" line1\nline2 "), "[Base]\n line1\nline2"); + } + // ── Test 1: push + flush_next basic ────────────────────────────────────── #[test] From 7ac62f8cbf8c9b69c43690f7da1555c573198df0 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Fri, 12 Jun 2026 11:50:17 -0400 Subject: [PATCH 12/13] test(acp): pin [Base] regression on heartbeat and initial-message paths The round-2 bug shipped because legacy agents silently lost [Base] on the heartbeat and initial-message dispatch paths, and nothing tested that branch. Extract the legacy-prepend gate into prepend_base_for_legacy so both sites share one definition, then add regression tests that fail if the protocol-version gate is flipped or the base_prompt arg is dropped. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/lib.rs | 43 ++++++++++++++++++++------ crates/buzz-acp/src/pool.rs | 60 +++++++++++++++++++++++++++++++------ 2 files changed, 85 insertions(+), 18 deletions(-) diff --git a/crates/buzz-acp/src/lib.rs b/crates/buzz-acp/src/lib.rs index 3085c2ef8..e5a52d6c5 100644 --- a/crates/buzz-acp/src/lib.rs +++ b/crates/buzz-acp/src/lib.rs @@ -2356,15 +2356,8 @@ fn dispatch_heartbeat( .unwrap_or_else(default_heartbeat_prompt); // For legacy agents (protocol_version < 2), prepend base_prompt to the // heartbeat user message since they don't receive it via session/new. - let prompt_text = if agent.protocol_version < 2 { - if let Some(bp) = ctx.base_prompt { - format!("{}\n\n{prompt_text}", crate::queue::base_section(bp)) - } else { - prompt_text - } - } else { - prompt_text - }; + let prompt_text = + pool::prepend_base_for_legacy(agent.protocol_version, ctx.base_prompt, &prompt_text); let result_tx = pool.result_tx(); let ctx_clone = Arc::clone(ctx); let agent_index = agent.index; @@ -2697,6 +2690,38 @@ fn build_mcp_servers(config: &Config) -> Vec { // ── Tests ───────────────────────────────────────────────────────────────────── +#[cfg(test)] +mod heartbeat_base_prompt_tests { + use super::*; + + // Pins the heartbeat dispatch path (dispatch_heartbeat, ~line 2359): a + // legacy agent WITH a base_prompt must get [Base] prepended to the + // heartbeat user message, composed as `[Base]\n{bp}\n\n{prompt}`. This is + // the second half of the round-2 regression (the first being initial_message). + + #[test] + fn test_heartbeat_legacy_agent_gets_base_prepended() { + // protocol_version 1 + Some(base_prompt): heartbeat prompt is prefixed + // with the [Base] section exactly as the legacy session/new path would. + let prompt = "[System: Heartbeat]\nrun feed get"; + let composed = pool::prepend_base_for_legacy(1, Some("you are a helpful agent"), prompt); + assert_eq!( + composed, + "[Base]\nyou are a helpful agent\n\n[System: Heartbeat]\nrun feed get" + ); + assert!(composed.starts_with("[Base]\nyou are a helpful agent\n\n")); + } + + #[test] + fn test_heartbeat_modern_agent_omits_base() { + // protocol_version 2 gets base_prompt via session/new; the heartbeat + // prompt is sent verbatim. + let prompt = "[System: Heartbeat]\nrun feed get"; + let composed = pool::prepend_base_for_legacy(2, Some("you are a helpful agent"), prompt); + assert_eq!(composed, prompt); + } +} + #[cfg(test)] mod owner_cache_tests { use super::*; diff --git a/crates/buzz-acp/src/pool.rs b/crates/buzz-acp/src/pool.rs index 9092daf70..6357795bf 100644 --- a/crates/buzz-acp/src/pool.rs +++ b/crates/buzz-acp/src/pool.rs @@ -612,6 +612,26 @@ async fn apply_permission_mode( Ok(()) } +/// Prepend the `[Base]` section to a user-message body for legacy agents. +/// +/// Legacy agents (`protocol_version < 2`) don't receive `base_prompt` via the +/// system role in `session/new`, so it must ride along in the user message. +/// Agents with `protocol_version >= 2`, or any agent without a `base_prompt`, +/// get `body` unchanged. The gate lives here so the heartbeat and +/// initial-message dispatch paths can't drift apart again. +pub(crate) fn prepend_base_for_legacy( + protocol_version: u32, + base_prompt: Option<&str>, + body: &str, +) -> String { + match base_prompt { + Some(bp) if protocol_version < 2 => { + format!("{}\n\n{body}", crate::queue::base_section(bp)) + } + _ => body.to_string(), + } +} + /// Core async function spawned for each prompt. /// /// Lifecycle: @@ -835,15 +855,8 @@ pub async fn run_prompt_task( // For agents with systemPrompt support (protocol_version >= 2), // base_prompt is delivered via the system role in session/new. // Legacy agents receive it via [Base] in the user message instead. - let init_msg = if agent.protocol_version < 2 { - if let Some(bp) = ctx.base_prompt { - format!("{}\n\n{}", crate::queue::base_section(bp), initial_msg) - } else { - initial_msg.to_string() - } - } else { - initial_msg.to_string() - }; + let init_msg = + prepend_base_for_legacy(agent.protocol_version, ctx.base_prompt, initial_msg); let init_result = agent .acp .session_prompt_with_idle_timeout( @@ -2139,6 +2152,35 @@ mod tests { use nostr::{EventBuilder, Keys, Kind, Tag}; use serde_json::json; + // ── prepend_base_for_legacy regression tests ───────────────────────────── + // These pin the initial_message dispatch path (run_prompt_task, ~line 855): + // a legacy agent WITH a base_prompt must get [Base] prepended to the user + // message. This is the exact regression that shipped in the round-2 bug. + + #[test] + fn test_initial_message_legacy_agent_gets_base_prepended() { + // protocol_version 1 + Some(base_prompt): [Base] rides along in the + // user message, composed as `[Base]\n{bp}\n\n{initial_msg}`. + let composed = prepend_base_for_legacy(1, Some("you are a helpful agent"), "hello channel"); + assert_eq!(composed, "[Base]\nyou are a helpful agent\n\nhello channel"); + assert!(composed.starts_with("[Base]\nyou are a helpful agent\n\n")); + } + + #[test] + fn test_initial_message_modern_agent_omits_base() { + // protocol_version 2 receives base_prompt via session/new, so the user + // message is left untouched even when a base_prompt is present. + let composed = prepend_base_for_legacy(2, Some("you are a helpful agent"), "hello channel"); + assert_eq!(composed, "hello channel"); + } + + #[test] + fn test_initial_message_legacy_agent_without_base_is_unchanged() { + // No base_prompt configured: nothing to prepend regardless of version. + let composed = prepend_base_for_legacy(1, None, "hello channel"); + assert_eq!(composed, "hello channel"); + } + // ── parse_thread_response tests ────────────────────────────────────────── #[test] From e8df883ba3235c46f90f10e548d74467804e3910 Mon Sep 17 00:00:00 2001 From: npub16v54tttfqacx9ycvc3k0ut0npj564ahcuajzy6qjvh57ntmsf4uq4806j2 Date: Fri, 12 Jun 2026 12:31:49 -0400 Subject: [PATCH 13/13] docs(acp): note ACP v2 pin is temporary until upstream RFD merges The version-as-gate behavior (gating [Base] on protocol_version < 2, and the harness pinning protocolVersion: 2) is intentional but temporary. Document the assumption at both sites so a future genuine upstream-v2 agent silently losing [Base] is a known, revisitable tradeoff rather than a surprise. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-acp/src/acp.rs | 2 ++ crates/buzz-agent/src/lib.rs | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/crates/buzz-acp/src/acp.rs b/crates/buzz-acp/src/acp.rs index 0248d1702..71a43c101 100644 --- a/crates/buzz-acp/src/acp.rs +++ b/crates/buzz-acp/src/acp.rs @@ -270,6 +270,8 @@ impl AcpClient { /// Must be called exactly once, before any other ACP method. /// The caller may inspect `agentCapabilities` in the returned value. pub async fn initialize(&mut self) -> Result { + // Requesting version 2 is an intentional temporary pin — we are squatting + // on ACP v2 ahead of the upstream ACP RFD. Revisit when that RFD merges. let params = serde_json::json!({ "protocolVersion": 2, "clientCapabilities": {}, diff --git a/crates/buzz-agent/src/lib.rs b/crates/buzz-agent/src/lib.rs index 7a136a699..5e4988b23 100644 --- a/crates/buzz-agent/src/lib.rs +++ b/crates/buzz-agent/src/lib.rs @@ -217,6 +217,10 @@ async fn initialize(id: Value, params: Value, wire_tx: &WireSender) { }; // Honest negotiation: respond with the minimum of what the client // requested and what we support. + // NOTE: gating `[Base]` injection on `protocol_version < 2` is a deliberate + // temporary measure — we are squatting on ACP v2 ahead of the upstream ACP + // RFD. Revisit when that RFD merges; otherwise a genuine upstream-v2 agent + // would silently lose `[Base]`. let negotiated_version = p.protocol_version.min(PROTOCOL_VERSION); wire::send( wire_tx,