From 5945f48a8526725e66a67e2fefd73e5aafec0391 Mon Sep 17 00:00:00 2001 From: npub1fgdl5qqnh3k3f2xkqrvt7cujalhm623x4s7fdjdj5yrtp5fzjl9qrjpucw <4a1bfa0013bc6d14a8d600d8bf6392efefbd2a26ac3c96c9b2a106b0d12297ca@sprout-oss.stage.blox.sqprod.co> Date: Mon, 29 Jun 2026 22:08:24 -0400 Subject: [PATCH] refactor(desktop): make provider/model selection provider-agnostic + add databricks discovery Removes the implicit Databricks fallback so the desktop agent provider/model selection is provider- and model-agnostic; BUZZ_AGENT_PROVIDER must be set explicitly. The static model roster (GPT-5.5, Claude Sonnet 4.6, etc.) is gone, so providers without live discovery surface only "Default model" / "Custom model...". Providers that require an explicit model (openai, anthropic, openai-compat) filter out the "Default model" option. Adds live model enumeration for the databricks and databricks_v2 providers, which previously had an empty picker. Discovery is a public buzz-agent library function called in-process from desktop-tauri (new buzz_agent_pkg path-dep), reusing build_token_source so both static DATABRICKS_TOKEN and on-disk PKCE caches populate the dropdown. A bearer_no_browser() override stops before the interactive OAuth flow and returns Err on an empty cache, so discovery never hangs on a browser prompt: - databricks (v1): GET {host}/api/2.0/serving-endpoints, filtered to state.ready == READY and an llm/v1/chat|completions task. - databricks_v2: GET {host}/api/ai-gateway/v2/endpoints?page_size=100, paginated via next_page_token, with a known-model fallback when the API returns nothing. DATABRICKS_TOKEN is redacted from any surfaced error. No-token-no-cache yields a clean Err so the picker degrades to "Custom model..." rather than failing. Also extracts build_buzz_agent_provider_defaults / parse_agent_env_lines into a focused managed_agents/agent_env.rs module to keep runtime.rs under the file-size guard. Co-authored-by: Will Pfleger Signed-off-by: Will Pfleger --- crates/buzz-agent/README.md | 12 +- crates/buzz-agent/src/auth.rs | 116 ++++++ crates/buzz-agent/src/catalog.rs | 369 ++++++++++++++++++ crates/buzz-agent/src/config.rs | 197 +++++----- crates/buzz-agent/src/lib.rs | 9 +- crates/buzz-agent/src/llm.rs | 2 +- desktop/scripts/check-file-sizes.mjs | 5 + desktop/src-tauri/Cargo.lock | 56 +++ desktop/src-tauri/Cargo.toml | 2 + desktop/src-tauri/build.rs | 47 ++- .../src-tauri/src/commands/agent_models.rs | 118 +++++- .../src/commands/agent_models_tests.rs | 16 + .../src-tauri/src/managed_agents/agent_env.rs | 191 +++++++++ desktop/src-tauri/src/managed_agents/mod.rs | 2 + .../src-tauri/src/managed_agents/runtime.rs | 30 +- .../src/managed_agents/runtime/tests.rs | 7 - .../agents/ui/personaDialogPickers.tsx | 169 +------- .../agents/ui/personaModelDiscoveryStatus.ts | 2 +- desktop/tests/e2e/persona-env-vars.spec.ts | 57 +-- 19 files changed, 1046 insertions(+), 361 deletions(-) create mode 100644 crates/buzz-agent/src/catalog.rs create mode 100644 desktop/src-tauri/src/managed_agents/agent_env.rs diff --git a/crates/buzz-agent/README.md b/crates/buzz-agent/README.md index 55cf6ab50..46f8b4a8a 100644 --- a/crates/buzz-agent/README.md +++ b/crates/buzz-agent/README.md @@ -129,17 +129,17 @@ Everything is environment variables. No flags, no config files. (We are a subpro | Variable | Default | Notes | |---|---|---| -| `BUZZ_AGENT_PROVIDER` | — | `anthropic`, `openai`, `databricks`, or `databricks_v2`. If unset, or if `anthropic`/`openai` is selected but its API key is missing, Databricks is auto-selected when `DATABRICKS_HOST` + `DATABRICKS_MODEL` are set. | -| `ANTHROPIC_API_KEY` | — | Required when provider=anthropic unless Databricks fallback is configured. | +| `BUZZ_AGENT_PROVIDER` | — | Required. `anthropic`, `openai`, `databricks`, or `databricks_v2`. No implicit fallback — the agent errors at startup when this is unset. | +| `ANTHROPIC_API_KEY` | — | Required when provider=anthropic. | | `ANTHROPIC_MODEL` | — | Required when provider=anthropic. | | `ANTHROPIC_BASE_URL` | `https://api.anthropic.com` | | | `ANTHROPIC_API_VERSION` | `2023-06-01` | | -| `OPENAI_COMPAT_API_KEY` | — | Required when provider=openai unless Databricks fallback is configured. | +| `OPENAI_COMPAT_API_KEY` | — | Required when provider=openai. | | `OPENAI_COMPAT_MODEL` | — | Required when provider=openai. | | `OPENAI_COMPAT_BASE_URL` | `https://api.openai.com/v1` | Point at vLLM, llama.cpp, OpenRouter, Ollama, etc. | | `OPENAI_COMPAT_API` | `auto` | `auto` \| `chat` \| `responses`. `auto` picks Responses for `*.openai.com`, Chat Completions everywhere else. | -| `DATABRICKS_HOST` | — | Required when provider=databricks or when using Databricks fallback. | -| `DATABRICKS_MODEL` | — | Required when provider=databricks or when using Databricks fallback. | +| `DATABRICKS_HOST` | — | Required when provider=databricks or provider=databricks_v2. | +| `DATABRICKS_MODEL` | — | Required when provider=databricks or provider=databricks_v2. | | `DATABRICKS_TOKEN` | — | Optional static bearer escape hatch. If unset, Databricks uses browser OAuth + refresh cache. | | `BUZZ_AGENT_SYSTEM_PROMPT` | built-in | Inline system prompt. | | `BUZZ_AGENT_SYSTEM_PROMPT_FILE` | — | File path. Mutually exclusive with the above. | @@ -172,7 +172,7 @@ Everything is environment variables. No flags, no config files. (We are a subpro | Databricks | `databricks` | `POST {host}/serving-endpoints/{model}/invocations` | goose-claude-4-6-sonnet | | Databricks AI Gateway v2 | `databricks_v2` | `POST {host}/ai-gateway/{provider}/v1/...` | databricks-gpt-5-5, databricks-claude-opus-4-7 | -If `BUZZ_AGENT_PROVIDER=anthropic` is selected without `ANTHROPIC_API_KEY`, or `BUZZ_AGENT_PROVIDER=openai` is selected without `OPENAI_COMPAT_API_KEY`, the agent automatically falls back to Databricks OAuth when `DATABRICKS_HOST` and `DATABRICKS_MODEL` are set. The same Databricks fallback applies when `BUZZ_AGENT_PROVIDER` is unset. Explicit Anthropic/OpenAI API keys always win. +If `BUZZ_AGENT_PROVIDER=anthropic` is selected without `ANTHROPIC_API_KEY`, or `BUZZ_AGENT_PROVIDER=openai` is selected without `OPENAI_COMPAT_API_KEY`, the agent returns an error — there is no implicit fallback to another provider. `provider=openai` speaks two HTTP dialects: the [Responses API](https://platform.openai.com/docs/api-reference/responses) (`/v1/responses`, required for GPT-5 / o-series tool-calling on OpenAI's own service) and the [Chat Completions API](https://platform.openai.com/docs/api-reference/chat) (`/chat/completions`, the broadly-supported OpenAI-compatible wire format). diff --git a/crates/buzz-agent/src/auth.rs b/crates/buzz-agent/src/auth.rs index 5b439aa78..34974fbf0 100644 --- a/crates/buzz-agent/src/auth.rs +++ b/crates/buzz-agent/src/auth.rs @@ -44,6 +44,15 @@ const BROWSER_AUTH_TIMEOUT: Duration = Duration::from_secs(60); pub trait TokenSource: Send + Sync { async fn bearer(&self) -> Result; + /// Return a bearer token from cache or refresh, **never** opening a browser. + /// + /// The default delegates to [`bearer`](Self::bearer) — correct for token + /// sources (e.g. static API keys) that can never trigger a browser flow. + /// [`PkceOAuthTokenSource`] overrides this to stop before the browser step. + async fn bearer_no_browser(&self) -> Result { + self.bearer().await + } + /// Force a fresh bearer after the server rejected the current one (401). /// /// `rejected` is the exact access token that just got the 401. Unlike @@ -287,6 +296,10 @@ impl TokenSource for PkceOAuthTokenSource { Ok(bearer) } + async fn bearer_no_browser(&self) -> Result { + self.try_bearer_no_browser().await + } + /// Force-refresh after a 401, never touching the browser flow. /// /// `rejected` is the access token the server just 401'd. Coalescing keys @@ -344,6 +357,68 @@ impl TokenSource for PkceOAuthTokenSource { } } +impl PkceOAuthTokenSource { + /// Return a bearer token from cache or refresh, **never** opening a browser. + /// + /// Follows the same steps as [`bearer`](TokenSource::bearer) but stops at + /// step 4 — if no usable token is available after cache + refresh attempts, + /// returns `Err(LlmAuth(...))` instead of launching the browser PKCE flow. + /// Used by model-discovery paths that must not block on user interaction. + pub(crate) async fn try_bearer_no_browser(&self) -> Result { + let mut state = self.state.lock().await; + + // 1. In-memory cache hit, still fresh. + if let Some(tok) = state.as_ref() { + if !is_expired(tok) { + return Ok(tok.access_token.clone()); + } + } + + // 2. Re-read disk — another process may have refreshed already. + if let Some(disk_tok) = read_cache(&self.cache_path) { + if !is_expired(&disk_tok) { + let bearer = disk_tok.access_token.clone(); + *state = Some(disk_tok); + return Ok(bearer); + } + } + + // 3. Try refresh if we have a refresh token. Endpoints are discovered + // lazily here — only when a refresh token is actually present — so + // that an unreachable OIDC discovery URL cannot prevent the + // no-token/no-cache path from returning `LlmAuth` (graceful + // fallback) instead of `Llm` (hard error). + let refresh = state.as_ref().and_then(|t| t.refresh_token.clone()); + if let Some(rt) = refresh { + let endpoints = self.endpoints().await?; + match self.refresh(&endpoints, &rt).await { + Ok(fresh) => { + let bearer = fresh.access_token.clone(); + self.save(&mut state, fresh)?; + return Ok(bearer); + } + Err(e) => { + tracing::warn!(error = %e, "oauth refresh failed during model discovery"); + } + } + + // 4. Re-read disk after refresh failure. + if let Some(disk_tok) = read_cache(&self.cache_path) { + if !is_expired(&disk_tok) { + let bearer = disk_tok.access_token.clone(); + *state = Some(disk_tok); + return Ok(bearer); + } + } + } + + // No usable token — return error instead of opening a browser. + Err(AgentError::LlmAuth( + "no cached Databricks token; run `buzz-agent auth databricks` first".into(), + )) + } +} + // ---- helpers ------------------------------------------------------------- /// Aborts a spawned task when dropped. Used to guarantee the localhost @@ -726,4 +801,45 @@ mod tests { "expected discovery error, got: {err_msg}" ); } + + /// `try_bearer_no_browser` with an empty cache and no refresh token must + /// return `LlmAuth` immediately — it must NOT attempt OIDC discovery even + /// when the `discovery_url` is unreachable/invalid. This guards the + /// regression where `endpoints()` was called unconditionally before the + /// refresh-token check, causing an `Llm` error (hard failure) instead of + /// the intended graceful `LlmAuth` fallback. + #[tokio::test] + async fn test_try_bearer_no_browser_empty_cache_no_refresh_returns_llm_auth_without_discovery() + { + let dir = tempfile::tempdir().unwrap(); + // Intentionally invalid/unreachable discovery URL — if endpoints() is + // called, the test will get an `Llm` error and the assertion below fails. + let cfg = PkceOAuthConfig { + discovery_url: "https://invalid.example.test/.well-known/oauth-authorization-server" + .into(), + client_id: "test-client".into(), + scopes: vec!["offline_access".into()], + cache_namespace: "test".into(), + cache_dir_override: Some(dir.path().to_path_buf()), + }; + let source = PkceOAuthTokenSource::new(cfg).unwrap(); + + // Empty in-memory state (no token, no refresh token). + { + let mut state = source.state.lock().await; + *state = None; + } + + // No disk cache file either — dir is empty. + + let result = source.try_bearer_no_browser().await; + assert!(result.is_err(), "expected Err, got Ok"); + match result.unwrap_err() { + AgentError::LlmAuth(_) => {} // correct: graceful fallback + other => panic!( + "expected LlmAuth (no discovery attempted), got: {other:?}\n\ + This means endpoints() was called before the refresh-token check." + ), + } + } } diff --git a/crates/buzz-agent/src/catalog.rs b/crates/buzz-agent/src/catalog.rs new file mode 100644 index 000000000..8a8649c27 --- /dev/null +++ b/crates/buzz-agent/src/catalog.rs @@ -0,0 +1,369 @@ +//! Databricks model catalog discovery. +//! +//! Exposes [`discover_databricks_models`] — an async helper that lists +//! available models for the `databricks` and `databricks_v2` providers +//! without triggering a browser OAuth flow. Auth is acquired in-process via +//! [`build_token_source`](crate::llm::build_token_source): +//! +//! - Static bearer (`DATABRICKS_TOKEN`): returned immediately. +//! - PKCE cache hit: returned from disk without a network round-trip. +//! - PKCE cache empty / no token: returns `Err(AgentError::LlmAuth)` — the +//! caller degrades gracefully; no browser, no hang. + +use reqwest::Client; + +use crate::{ + config::{Config, Provider}, + llm::build_token_source, + types::AgentError, +}; + +/// A discovered model entry: `id` is the picker value, `name` is the display +/// label (same as `id` for Databricks — the API has no separate display name). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ModelEntry { + pub id: String, + pub name: String, +} + +/// Known Databricks AI Gateway v2 models — used as a fallback when the +/// `api/ai-gateway/v2/endpoints` call returns an empty list. +/// Mirrors goose's `DATABRICKS_V2_KNOWN_MODELS`. +pub const DATABRICKS_V2_KNOWN_MODELS: &[&str] = + &["databricks-gpt-5-5", "databricks-claude-opus-4-7"]; + +/// Discover available models for a Databricks provider. +/// +/// Returns a non-empty `Vec` on success. Returns +/// `Err(AgentError::LlmAuth)` when no token is available (no static token, +/// no PKCE cache) — callers should degrade gracefully rather than hanging. +/// +/// # Panics +/// Never panics. +pub async fn discover_databricks_models(cfg: &Config) -> Result, AgentError> { + let token_source = build_token_source(cfg)?; + let bearer = token_source.bearer_no_browser().await?; + + let http = Client::new(); + let host = cfg.base_url.trim_end_matches('/'); + + match cfg.provider { + Provider::Databricks => fetch_v1_models(&http, host, &bearer).await, + Provider::DatabricksV2 => fetch_v2_models(&http, host, &bearer).await, + _ => Err(AgentError::InvalidParams( + "discover_databricks_models called for non-Databricks provider".into(), + )), + } +} + +// --------------------------------------------------------------------------- +// v1 — api/2.0/serving-endpoints +// --------------------------------------------------------------------------- + +async fn fetch_v1_models( + http: &Client, + host: &str, + bearer: &str, +) -> Result, AgentError> { + let url = format!("{host}/api/2.0/serving-endpoints"); + let response = http + .get(&url) + .bearer_auth(bearer) + .send() + .await + .map_err(|e| AgentError::Llm(format!("Databricks model discovery request failed: {e}")))?; + + let status = response.status(); + if !status.is_success() { + let body = response.text().await.unwrap_or_default(); + return Err(AgentError::Llm(format!( + "Databricks model discovery HTTP {status}: {body}" + ))); + } + + let json: serde_json::Value = response.json().await.map_err(|e| { + AgentError::Llm(format!( + "Databricks model discovery response parse failed: {e}" + )) + })?; + + parse_v1_endpoints(&json) +} + +/// Parse a `GET api/2.0/serving-endpoints` response. +/// +/// Filters to endpoints that are READY and serve an LLM chat/completions task. +/// When `state.ready` or `task` is absent the endpoint is included — prefer +/// including over silently dropping, per spec. +pub(crate) fn parse_v1_endpoints(json: &serde_json::Value) -> Result, AgentError> { + let endpoints = json + .get("endpoints") + .and_then(|v| v.as_array()) + .ok_or_else(|| { + AgentError::Llm( + "Databricks model discovery: unexpected response (missing 'endpoints' array)" + .into(), + ) + })?; + + let models = endpoints + .iter() + .filter_map(|endpoint| { + let name = endpoint.get("name")?.as_str()?.to_string(); + + // Require READY state when present; include when absent. + let state_ready = endpoint + .get("state") + .and_then(|s| s.get("ready")) + .and_then(|r| r.as_str()) + .map(|r| r == "READY") + .unwrap_or(true); + if !state_ready { + return None; + } + + // Require LLM chat or completions task when present. + let task_ok = endpoint + .get("task") + .and_then(|t| t.as_str()) + .map(|t| t == "llm/v1/chat" || t == "llm/v1/completions") + .unwrap_or(true); + if !task_ok { + return None; + } + + Some(ModelEntry { + id: name.clone(), + name, + }) + }) + .collect(); + + Ok(models) +} + +// --------------------------------------------------------------------------- +// v2 — api/ai-gateway/v2/endpoints (paginated) +// --------------------------------------------------------------------------- + +/// Percent-encode a string for use as a URL query parameter value. +/// Only encodes characters that are not unreserved (RFC 3986). +fn percent_encode(s: &str) -> String { + s.bytes() + .flat_map(|b| match b { + b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => { + vec![b as char] + } + _ => format!("%{b:02X}").chars().collect(), + }) + .collect() +} + +async fn fetch_v2_models( + http: &Client, + host: &str, + bearer: &str, +) -> Result, AgentError> { + let mut all_models: Vec = Vec::new(); + let mut page_token: Option = None; + let base_url = format!("{host}/api/ai-gateway/v2/endpoints"); + + // Cap at 20 pages (2 000 endpoints) to bound execution time. + for _ in 0..20 { + // Build URL with query params manually — avoids requiring the `query` + // reqwest feature in buzz-agent's Cargo.toml. + let url = match &page_token { + Some(tok) => format!( + "{base_url}?page_size=100&page_token={}", + percent_encode(tok) + ), + None => format!("{base_url}?page_size=100"), + }; + let response = http + .get(&url) + .bearer_auth(bearer) + .send() + .await + .map_err(|e| { + AgentError::Llm(format!("Databricks v2 model discovery request failed: {e}")) + })?; + + let status = response.status(); + if !status.is_success() { + let body = response.text().await.unwrap_or_default(); + return Err(AgentError::Llm(format!( + "Databricks v2 model discovery HTTP {status}: {body}" + ))); + } + + let json: serde_json::Value = response.json().await.map_err(|e| { + AgentError::Llm(format!( + "Databricks v2 model discovery response parse failed: {e}" + )) + })?; + + let (page_models, next) = parse_v2_endpoints_page(&json)?; + all_models.extend(page_models); + + match next { + Some(tok) if Some(&tok) != page_token.as_ref() => page_token = Some(tok), + _ => break, + } + } + + // Fall back to known-model list if the API returned nothing. + if all_models.is_empty() { + all_models = DATABRICKS_V2_KNOWN_MODELS + .iter() + .map(|id| ModelEntry { + id: id.to_string(), + name: id.to_string(), + }) + .collect(); + } + + Ok(all_models) +} + +/// Parse one page of a `GET api/ai-gateway/v2/endpoints` response. +/// +/// Returns `(models, next_page_token)`. An empty or absent `next_page_token` +/// signals the last page. +pub(crate) fn parse_v2_endpoints_page( + json: &serde_json::Value, +) -> Result<(Vec, Option), AgentError> { + let endpoints = json + .get("endpoints") + .and_then(|v| v.as_array()) + .ok_or_else(|| { + AgentError::Llm( + "Databricks v2 model discovery: unexpected response (missing 'endpoints' array)" + .into(), + ) + })?; + + let models = endpoints + .iter() + .filter_map(|endpoint| { + let name = endpoint.get("name")?.as_str()?.to_string(); + Some(ModelEntry { + id: name.clone(), + name, + }) + }) + .collect(); + + let next_page_token = json + .get("next_page_token") + .and_then(|v| v.as_str()) + .filter(|token| !token.is_empty()) + .map(str::to_string); + + Ok((models, next_page_token)) +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn v1_parse_filters_ready_chat_endpoints() { + let json = serde_json::json!({ + "endpoints": [ + // included: READY + llm/v1/chat + {"name": "my-llm", "state": {"ready": "READY"}, "task": "llm/v1/chat"}, + // included: READY + llm/v1/completions + {"name": "my-completions", "state": {"ready": "READY"}, "task": "llm/v1/completions"}, + // excluded: NOT_READY + {"name": "dead-endpoint", "state": {"ready": "NOT_READY"}, "task": "llm/v1/chat"}, + // excluded: wrong task + {"name": "embedding-ep", "state": {"ready": "READY"}, "task": "llm/v1/embedding"}, + // included: no state field → include by default + {"name": "no-state", "task": "llm/v1/chat"}, + // included: no task field → include by default + {"name": "no-task", "state": {"ready": "READY"}}, + ] + }); + + let models = parse_v1_endpoints(&json).unwrap(); + let ids: Vec<&str> = models.iter().map(|m| m.id.as_str()).collect(); + assert_eq!(ids, vec!["my-llm", "my-completions", "no-state", "no-task"]); + } + + #[test] + fn v1_parse_errors_on_missing_endpoints_array() { + let json = serde_json::json!({"data": []}); + let err = parse_v1_endpoints(&json).unwrap_err(); + assert!( + err.to_string().contains("missing 'endpoints' array"), + "got: {err}" + ); + } + + #[test] + fn v1_parse_empty_endpoints_returns_empty_vec() { + let json = serde_json::json!({"endpoints": []}); + let models = parse_v1_endpoints(&json).unwrap(); + assert!(models.is_empty()); + } + + #[test] + fn v2_parse_extracts_names_and_page_token() { + let json = serde_json::json!({ + "endpoints": [ + {"name": "databricks-claude-opus-4-7"}, + {"name": "databricks-gpt-5-5"}, + {"name": "custom-model"} + ], + "next_page_token": "tok123" + }); + + let (models, next) = parse_v2_endpoints_page(&json).unwrap(); + let ids: Vec<&str> = models.iter().map(|m| m.id.as_str()).collect(); + assert_eq!( + ids, + vec![ + "databricks-claude-opus-4-7", + "databricks-gpt-5-5", + "custom-model" + ] + ); + assert_eq!(next.as_deref(), Some("tok123")); + } + + #[test] + fn v2_parse_empty_token_signals_last_page() { + let json = serde_json::json!({ + "endpoints": [{"name": "only-model"}], + "next_page_token": "" + }); + + let (models, next) = parse_v2_endpoints_page(&json).unwrap(); + assert_eq!(models.len(), 1); + assert!( + next.is_none(), + "empty token should be treated as no more pages" + ); + } + + #[test] + fn v2_parse_absent_token_signals_last_page() { + let json = serde_json::json!({"endpoints": [{"name": "only-model"}]}); + let (_, next) = parse_v2_endpoints_page(&json).unwrap(); + assert!(next.is_none()); + } + + #[test] + fn v2_parse_errors_on_missing_endpoints_array() { + let json = serde_json::json!({"data": []}); + let err = parse_v2_endpoints_page(&json).unwrap_err(); + assert!( + err.to_string().contains("missing 'endpoints' array"), + "got: {err}" + ); + } +} diff --git a/crates/buzz-agent/src/config.rs b/crates/buzz-agent/src/config.rs index 8e17d10ca..e0f626b67 100644 --- a/crates/buzz-agent/src/config.rs +++ b/crates/buzz-agent/src/config.rs @@ -104,13 +104,12 @@ impl Config { env("BUZZ_AGENT_PROVIDER").as_deref(), env("ANTHROPIC_API_KEY").as_deref(), env("OPENAI_COMPAT_API_KEY").as_deref(), - databricks_host.as_deref(), - databricks_model.as_deref(), )?; - // Universal model override — any provider will use this when its own - // model env var is absent. Useful for wrapper scripts that set a single - // var regardless of which provider is active. + // Universal model override — takes priority over provider-specific model + // env vars (ANTHROPIC_MODEL, OPENAI_COMPAT_MODEL, DATABRICKS_MODEL) when + // present. Set by the desktop from the persona/record to express explicit + // user intent; provider-specific vars serve as defaults for CLI/standalone use. let buzz_agent_model = env("BUZZ_AGENT_MODEL"); // OPENAI_COMPAT_API is only read when provider=openai, so a stray @@ -123,8 +122,8 @@ impl Config { Provider::Anthropic => ( req("ANTHROPIC_API_KEY")?, resolve_model( - env("ANTHROPIC_MODEL").as_deref(), buzz_agent_model.as_deref(), + env("ANTHROPIC_MODEL").as_deref(), ) .ok_or_else(|| "config: ANTHROPIC_MODEL required".to_string())?, env_or("ANTHROPIC_BASE_URL", "https://api.anthropic.com"), @@ -133,8 +132,8 @@ impl Config { Provider::OpenAi => ( req("OPENAI_COMPAT_API_KEY")?, resolve_model( - env("OPENAI_COMPAT_MODEL").as_deref(), buzz_agent_model.as_deref(), + env("OPENAI_COMPAT_MODEL").as_deref(), ) .ok_or_else(|| "config: OPENAI_COMPAT_MODEL required".to_string())?, env_or("OPENAI_COMPAT_BASE_URL", "https://api.openai.com/v1"), @@ -142,7 +141,7 @@ impl Config { ), Provider::Databricks | Provider::DatabricksV2 => ( env("DATABRICKS_TOKEN").unwrap_or_default(), - resolve_model(databricks_model.as_deref(), buzz_agent_model.as_deref()) + resolve_model(buzz_agent_model.as_deref(), databricks_model.as_deref()) .ok_or_else(|| "config: DATABRICKS_MODEL required".to_string())?, databricks_host.ok_or_else(|| "config: DATABRICKS_HOST required".to_string())?, OpenAiApi::Chat, // only read by OpenAI/legacy Databricks dispatch @@ -193,6 +192,43 @@ impl Config { Ok(cfg) } + /// Construct a minimal `Config` for model-catalog discovery. + /// + /// Only the fields used by [`build_token_source`](crate::llm::build_token_source) + /// and the catalog HTTP helpers are meaningful; all others are set to + /// inert defaults. Never call `from_env` for discovery — it requires + /// `DATABRICKS_MODEL` and other fields that are irrelevant here. + pub fn for_discovery(provider: Provider, api_key: String, base_url: String) -> Self { + Self { + provider, + api_key, + base_url, + model: String::new(), + system_prompt: String::new(), + anthropic_api_version: "2023-06-01".into(), + openai_api: OpenAiApi::Chat, + max_rounds: 0, + max_output_tokens: 1, + llm_timeout: Duration::from_secs(30), + tool_timeout: Duration::from_secs(30), + mcp_init_timeout: Duration::from_secs(30), + mcp_max_restart_attempts: 0, + mcp_restart_base_ms: 0, + mcp_restart_max_ms: 0, + max_sessions: 1, + max_line_bytes: 4 * 1024 * 1024, + max_history_bytes: 16 * 1024 * 1024, + max_tool_result_text_bytes: 50 * 1024, + max_context_tokens: 200_001, + max_handoffs: 0, + max_parallel_tools: 1, + hook_timeout: Duration::from_secs(1), + stop_max_rejections: 0, + hook_servers: HookServers::None, + hints_enabled: false, + } + } + fn validate(&self) -> Result<(), String> { const MIN_HISTORY_BYTES: usize = 4096; const MIN_LINE_BYTES: usize = 1024; @@ -271,54 +307,38 @@ fn req(k: &str) -> Result { env(k).ok_or_else(|| format!("config: {k} required")) } -/// Returns the first of `provider_model` or `universal_fallback` that is -/// `Some`, converting to an owned `String`. Returns `None` when both are -/// absent so the caller can supply a provider-specific error message. -fn resolve_model(provider_model: Option<&str>, universal_fallback: Option<&str>) -> Option { - provider_model.or(universal_fallback).map(str::to_owned) +/// Returns the first present value. `explicit_override` (BUZZ_AGENT_MODEL, +/// set by the desktop from the persona/record) wins over `provider_default` +/// (provider-specific env var that may be inherited from the shell). +/// Returns `None` when both are absent so the caller can supply a +/// provider-specific error message. +fn resolve_model( + explicit_override: Option<&str>, + provider_default: Option<&str>, +) -> Option { + explicit_override.or(provider_default).map(str::to_owned) } fn present_nonempty(v: Option<&str>) -> bool { v.map(str::trim).is_some_and(|s| !s.is_empty()) } -fn databricks_available(host: Option<&str>, model: Option<&str>) -> bool { - present_nonempty(host) && present_nonempty(model) -} - fn resolve_provider( requested: Option<&str>, anthropic_key: Option<&str>, openai_key: Option<&str>, - databricks_host: Option<&str>, - databricks_model: Option<&str>, ) -> Result { - let databricks_ready = databricks_available(databricks_host, databricks_model); match requested.map(str::trim).filter(|s| !s.is_empty()) { Some(raw) => { let normalized = raw.to_ascii_lowercase(); match normalized.as_str() { "anthropic" if present_nonempty(anthropic_key) => Ok(Provider::Anthropic), - "anthropic" if databricks_ready => { - tracing::warn!( - requested = raw, - "API key missing for requested provider; falling back to Databricks OAuth" - ); - Ok(Provider::Databricks) - } "anthropic" => Err( - "config: ANTHROPIC_API_KEY required (or set DATABRICKS_HOST and DATABRICKS_MODEL for Databricks OAuth fallback)".into(), + "config: ANTHROPIC_API_KEY required".into(), ), "openai" | "openai-compat" if present_nonempty(openai_key) => Ok(Provider::OpenAi), - "openai" | "openai-compat" if databricks_ready => { - tracing::warn!( - requested = raw, - "API key missing for requested provider; falling back to Databricks OAuth" - ); - Ok(Provider::Databricks) - } "openai" | "openai-compat" => Err( - "config: OPENAI_COMPAT_API_KEY required (or set DATABRICKS_HOST and DATABRICKS_MODEL for Databricks OAuth fallback)".into(), + "config: OPENAI_COMPAT_API_KEY required".into(), ), "databricks" => Ok(Provider::Databricks), "databricks_v2" | "databricks-v2" => Ok(Provider::DatabricksV2), @@ -327,9 +347,8 @@ fn resolve_provider( )), } } - None if databricks_ready => Ok(Provider::Databricks), None => Err( - "config: BUZZ_AGENT_PROVIDER required (or set DATABRICKS_HOST and DATABRICKS_MODEL for Databricks OAuth fallback)".into(), + "config: BUZZ_AGENT_PROVIDER is required — set it to your provider (e.g. anthropic, openai, databricks)".into(), ), } } @@ -540,89 +559,51 @@ mod tests { #[test] fn resolve_provider_keeps_requested_provider_when_token_present() { assert_eq!( - resolve_provider( - Some("anthropic"), - Some("sk-ant"), - None, - Some("https://dbc.example"), - Some("db-model") - ) - .unwrap(), + resolve_provider(Some("anthropic"), Some("sk-ant"), None,).unwrap(), Provider::Anthropic ); assert_eq!( - resolve_provider( - Some("openai"), - None, - Some("sk-openai"), - Some("https://dbc.example"), - Some("db-model") - ) - .unwrap(), + resolve_provider(Some("openai"), None, Some("sk-openai"),).unwrap(), Provider::OpenAi ); } #[test] - fn resolve_provider_falls_back_to_databricks_when_requested_token_missing() { - assert_eq!( - resolve_provider( - Some("anthropic"), - None, - None, - Some("https://dbc.example"), - Some("goose-claude-4-6-sonnet") - ) - .unwrap(), - Provider::Databricks - ); - assert_eq!( - resolve_provider( - Some("openai-compat"), - None, - Some(" "), - Some("https://dbc.example"), - Some("goose-claude-4-6-sonnet") - ) - .unwrap(), - Provider::Databricks - ); + fn resolve_provider_errors_when_requested_provider_key_missing() { + // No fallback — missing key returns an error regardless of Databricks availability. + let err = resolve_provider(Some("anthropic"), None, None).unwrap_err(); + assert!(err.contains("ANTHROPIC_API_KEY required"), "{err}"); + + let err = resolve_provider(Some("openai-compat"), None, Some(" ")).unwrap_err(); + assert!(err.contains("OPENAI_COMPAT_API_KEY required"), "{err}"); } #[test] - fn resolve_provider_can_auto_select_databricks_without_explicit_provider() { - assert_eq!( - resolve_provider( - None, - None, - None, - Some("https://dbc.example"), - Some("goose-claude-4-6-sonnet") - ) - .unwrap(), - Provider::Databricks - ); + fn resolve_provider_errors_when_provider_env_absent() { + // No implicit inference — absent BUZZ_AGENT_PROVIDER is an error. + let err = resolve_provider(None, None, None).unwrap_err(); + assert!(err.contains("BUZZ_AGENT_PROVIDER is required"), "{err}"); } #[test] fn resolve_provider_requires_databricks_host_and_model_for_fallback() { - let err = resolve_provider( - Some("openai"), - None, - None, - Some("https://dbc.example"), - None, - ) - .unwrap_err(); - assert!(err.contains("OPENAI_COMPAT_API_KEY required")); - let err = - resolve_provider(None, None, None, Some("https://dbc.example"), None).unwrap_err(); - assert!(err.contains("BUZZ_AGENT_PROVIDER required")); + // Renamed: verify the explicit databricks provider path works correctly. + // When BUZZ_AGENT_PROVIDER=databricks, resolve_provider succeeds regardless + // of DATABRICKS_HOST/MODEL (those are validated later in from_env()). + assert_eq!( + resolve_provider(Some("databricks"), None, None).unwrap(), + Provider::Databricks + ); + // Missing key for other providers still errors — no Databricks fallback. + let err = resolve_provider(Some("openai"), None, None).unwrap_err(); + assert!(err.contains("OPENAI_COMPAT_API_KEY required"), "{err}"); + let err = resolve_provider(None, None, None).unwrap_err(); + assert!(err.contains("BUZZ_AGENT_PROVIDER is required"), "{err}"); } #[test] fn resolve_provider_unsupported_error_preserves_user_casing() { - let err = resolve_provider(Some("OpenAIish"), None, None, None, None).unwrap_err(); + let err = resolve_provider(Some("OpenAIish"), None, None).unwrap_err(); assert!(err.contains("BUZZ_AGENT_PROVIDER=OpenAIish")); } @@ -645,15 +626,15 @@ mod tests { } #[test] - fn resolve_model_prefers_provider_specific() { - let result = resolve_model(Some("anthropic-model"), Some("universal-model")); - assert_eq!(result.as_deref(), Some("anthropic-model")); + fn resolve_model_prefers_explicit_override() { + let result = resolve_model(Some("override-model"), Some("provider-model")); + assert_eq!(result.as_deref(), Some("override-model")); } #[test] - fn resolve_model_falls_back_to_universal() { - let result = resolve_model(None, Some("universal-model")); - assert_eq!(result.as_deref(), Some("universal-model")); + fn resolve_model_falls_back_to_provider_default() { + let result = resolve_model(None, Some("provider-model")); + assert_eq!(result.as_deref(), Some("provider-model")); } #[test] diff --git a/crates/buzz-agent/src/lib.rs b/crates/buzz-agent/src/lib.rs index 014cb66b0..5cc8e0b4f 100644 --- a/crates/buzz-agent/src/lib.rs +++ b/crates/buzz-agent/src/lib.rs @@ -2,14 +2,19 @@ mod agent; pub mod auth; mod builtin; -mod config; +pub mod catalog; +pub mod config; mod handoff; mod hints; mod llm; mod mcp; -mod types; +pub mod types; mod wire; +pub use catalog::{discover_databricks_models, ModelEntry, DATABRICKS_V2_KNOWN_MODELS}; +pub use config::Provider; +pub use types::AgentError; + use std::collections::HashMap; use std::path::Path; use std::sync::Arc; diff --git a/crates/buzz-agent/src/llm.rs b/crates/buzz-agent/src/llm.rs index 7f1db2ddd..f961d7f26 100644 --- a/crates/buzz-agent/src/llm.rs +++ b/crates/buzz-agent/src/llm.rs @@ -982,7 +982,7 @@ where /// Otherwise a `PkceOAuthTokenSource` pointed at the workspace's OIDC /// discovery URL. First request without a cached token triggers a browser /// flow; subsequent requests use the cache + refresh transparently. -fn build_token_source(cfg: &Config) -> Result, AgentError> { +pub(crate) fn build_token_source(cfg: &Config) -> Result, AgentError> { match cfg.provider { Provider::Anthropic | Provider::OpenAi => { Ok(Arc::new(StaticTokenSource::new(cfg.api_key.clone()))) diff --git a/desktop/scripts/check-file-sizes.mjs b/desktop/scripts/check-file-sizes.mjs index 62725d477..69f19f664 100644 --- a/desktop/scripts/check-file-sizes.mjs +++ b/desktop/scripts/check-file-sizes.mjs @@ -132,6 +132,11 @@ const overrides = new Map([ ["src/shared/ui/markdown.tsx", 2082], ["src/shared/ui/VideoPlayer.tsx", 2199], ["src/shared/ui/sidebar.tsx", 1042], + // Option C databricks-model-discovery: parse/HTTP logic moved to buzz-agent + // catalog module; agent_models.rs retains the thin wrapper (~50 lines). + // File still exceeds 1000 due to OpenAI/Anthropic discovery + subprocess + // fallback. Queued to split into dedicated discovery modules. + ["src-tauri/src/commands/agent_models.rs", 1066], ]); await runFileSizeCheck({ diff --git a/desktop/src-tauri/Cargo.lock b/desktop/src-tauri/Cargo.lock index 7adfd466a..a6267d869 100644 --- a/desktop/src-tauri/Cargo.lock +++ b/desktop/src-tauri/Cargo.lock @@ -535,6 +535,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31b698c5f9a010f6573133b09e0de5408834d0c82f8d7475a89fc1867a71cd90" dependencies = [ "axum-core", + "axum-macros", + "base64 0.22.1", "bytes", "form_urlencoded", "futures-util", @@ -553,8 +555,10 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_urlencoded", + "sha1 0.10.6", "sync_wrapper", "tokio", + "tokio-tungstenite 0.29.0", "tower", "tower-layer", "tower-service", @@ -580,6 +584,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "axum-macros" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aa268c23bfbbd2c4363b9cd302a4f504fb2a9dfe7e3451d66f35dd392e20aca" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.118", +] + [[package]] name = "backon" version = "1.6.0" @@ -824,6 +839,30 @@ version = "3.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" +[[package]] +name = "buzz-agent" +version = "0.1.0" +dependencies = [ + "arc-swap", + "async-trait", + "axum", + "base64 0.22.1", + "getrandom 0.4.2", + "hex", + "nix 0.31.3", + "reqwest 0.13.4", + "rmcp", + "serde", + "serde_json", + "serde_yaml", + "sha2 0.11.0", + "tokio", + "tracing", + "tracing-subscriber", + "urlencoding", + "webbrowser", +] + [[package]] name = "buzz-core" version = "0.1.0" @@ -853,6 +892,7 @@ dependencies = [ "audioadapter-buffers", "axum", "base64 0.22.1", + "buzz-agent", "buzz-core", "buzz-persona", "buzz-sdk", @@ -10471,6 +10511,22 @@ dependencies = [ "string_cache_codegen", ] +[[package]] +name = "webbrowser" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fc95580916af1e68ff6a7be07446fc5db73ebf71cf092de939bbf5f7e189f72" +dependencies = [ + "core-foundation 0.10.1", + "jni 0.22.4", + "log", + "ndk-context", + "objc2", + "objc2-foundation", + "url", + "web-sys", +] + [[package]] name = "webkit2gtk" version = "2.0.2" diff --git a/desktop/src-tauri/Cargo.toml b/desktop/src-tauri/Cargo.toml index 0ce9f8fe7..12121e439 100644 --- a/desktop/src-tauri/Cargo.toml +++ b/desktop/src-tauri/Cargo.toml @@ -24,6 +24,7 @@ mesh-llm = ["dep:mesh-llm-sdk", "dep:mesh-llm-host-runtime"] system-keyring = ["dep:keyring"] [build-dependencies] +base64 = "0.22" serde = { version = "1", features = ["derive"] } serde_json = "1" tauri-build = { version = "2", features = [] } @@ -82,6 +83,7 @@ url = "2" buzz_core_pkg = { package = "buzz-core", path = "../../crates/buzz-core" } buzz_persona_pkg = { package = "buzz-persona", path = "../../crates/buzz-persona" } buzz_sdk_pkg = { package = "buzz-sdk", path = "../../crates/buzz-sdk" } +buzz_agent_pkg = { package = "buzz-agent", path = "../../crates/buzz-agent" } mesh-llm-sdk = { git = "https://github.com/Mesh-LLM/mesh-llm.git", rev = "ebc3ab4c4b5f4a45d5bc942c66c18583800c3f82", package = "mesh-llm-sdk", default-features = false, features = ["client", "serving"], optional = true } mesh-llm-host-runtime = { git = "https://github.com/Mesh-LLM/mesh-llm.git", rev = "ebc3ab4c4b5f4a45d5bc942c66c18583800c3f82", package = "mesh-llm-host-runtime", default-features = false, features = ["dynamic-native-runtime"], optional = true } base64 = "0.22" diff --git a/desktop/src-tauri/build.rs b/desktop/src-tauri/build.rs index 5ffec1a62..3085493a3 100644 --- a/desktop/src-tauri/build.rs +++ b/desktop/src-tauri/build.rs @@ -2,13 +2,16 @@ // so the build-time validation below and the runtime parse cannot drift. include!("src/commands/reconnect_hook_config.rs"); +use base64::Engine as _; + fn main() { println!("cargo:rerun-if-env-changed=BUZZ_RELAY_URL"); println!("cargo:rerun-if-env-changed=BUZZ_RELAY_HTTP"); println!("cargo:rerun-if-env-changed=BUZZ_UPDATER_PUBLIC_KEY"); println!("cargo:rerun-if-env-changed=BUZZ_UPDATER_ENDPOINT"); - println!("cargo:rerun-if-env-changed=BUZZ_BUILD_DATABRICKS_HOST"); - println!("cargo:rerun-if-env-changed=BUZZ_BUILD_DATABRICKS_MODEL"); + println!("cargo:rerun-if-env-changed=BUZZ_BUILD_BUZZ_AGENT_PROVIDER"); + println!("cargo:rerun-if-env-changed=BUZZ_BUILD_BUZZ_AGENT_MODEL"); + println!("cargo:rerun-if-env-changed=BUZZ_BUILD_AGENT_ENV"); println!("cargo:rerun-if-env-changed=BUZZ_BUILD_RELAY_RECONNECT_CMD"); println!("cargo:rustc-check-cfg=cfg(buzz_updater_enabled)"); @@ -20,12 +23,44 @@ fn main() { println!("cargo:rustc-env=BUZZ_DESKTOP_BUILD_RELAY_HTTP={relay_http}"); } - if let Ok(host) = std::env::var("BUZZ_BUILD_DATABRICKS_HOST") { - println!("cargo:rustc-env=BUZZ_DESKTOP_BUILD_DATABRICKS_HOST={host}"); + if let Ok(provider) = std::env::var("BUZZ_BUILD_BUZZ_AGENT_PROVIDER") { + println!("cargo:rustc-env=BUZZ_DESKTOP_BUILD_BUZZ_AGENT_PROVIDER={provider}"); + } + + if let Ok(model) = std::env::var("BUZZ_BUILD_BUZZ_AGENT_MODEL") { + println!("cargo:rustc-env=BUZZ_DESKTOP_BUILD_BUZZ_AGENT_MODEL={model}"); } - if let Ok(model) = std::env::var("BUZZ_BUILD_DATABRICKS_MODEL") { - println!("cargo:rustc-env=BUZZ_DESKTOP_BUILD_DATABRICKS_MODEL={model}"); + // Generic KEY=VALUE pairs to inject into every spawned agent process. + // Newline-delimited; each line must be non-empty and contain exactly one + // `=` separator with a non-empty key. OSS builds leave this unset. + // The validated value is base64-encoded before emitting so the single-line + // Cargo build-script output carries all pairs (Cargo output is line-oriented; + // a raw multiline value would be silently truncated to the first line). + if let Ok(raw) = std::env::var("BUZZ_BUILD_AGENT_ENV") { + for (line_no, line) in raw.lines().enumerate() { + let line = line.trim(); + if line.is_empty() { + continue; + } + let eq = line.find('=').unwrap_or_else(|| { + panic!( + "BUZZ_BUILD_AGENT_ENV line {}: missing '=' separator in {:?}", + line_no + 1, + line + ) + }); + let key = &line[..eq]; + if key.is_empty() { + panic!( + "BUZZ_BUILD_AGENT_ENV line {}: key must not be empty in {:?}", + line_no + 1, + line + ); + } + } + let encoded = base64::engine::general_purpose::STANDARD.encode(raw.as_bytes()); + println!("cargo:rustc-env=BUZZ_DESKTOP_BUILD_AGENT_ENV={encoded}"); } if let Ok(val) = std::env::var("BUZZ_BUILD_RELAY_RECONNECT_CMD") { diff --git a/desktop/src-tauri/src/commands/agent_models.rs b/desktop/src-tauri/src/commands/agent_models.rs index 2502306b7..cef2e7115 100644 --- a/desktop/src-tauri/src/commands/agent_models.rs +++ b/desktop/src-tauri/src/commands/agent_models.rs @@ -112,6 +112,17 @@ pub async fn get_agent_models( return Ok(models); } + if let Some(models) = discover_databricks_models( + &state.http_client, + effective_provider.as_deref(), + &merged_env, + persisted_model.clone(), + ) + .await? + { + return Ok(models); + } + run_agent_models_command( resolved_acp, agent_command, @@ -234,6 +245,17 @@ pub async fn discover_agent_models( return Ok(models); } + if let Some(models) = discover_databricks_models( + &state.http_client, + input.provider.as_deref(), + &merged_env, + None, + ) + .await? + { + return Ok(models); + } + run_agent_models_command(resolved_acp, resolved_agent, agent_args, None, merged_env).await } @@ -603,6 +625,96 @@ async fn discover_anthropic_models( })) } +// --------------------------------------------------------------------------- +// Databricks model discovery (v1 + v2) +// --------------------------------------------------------------------------- +// +// Delegates to buzz_agent_pkg::catalog::discover_databricks_models, which +// acquires auth in-process via build_token_source: +// - Static bearer (DATABRICKS_TOKEN): returned immediately. +// - PKCE cache hit: returned from disk without a browser flow. +// - No token, no cache: returns Err(LlmAuth) → we return Ok(None) and fall +// through to run_agent_models_command. Never hangs, never opens a browser. + +fn is_databricks_provider(provider: Option<&str>) -> bool { + matches!( + provider + .map(str::trim) + .map(str::to_ascii_lowercase) + .as_deref(), + Some("databricks" | "databricks_v2") + ) +} + +fn databricks_agent_provider(provider: &str) -> buzz_agent_pkg::config::Provider { + if provider.trim().eq_ignore_ascii_case("databricks_v2") { + buzz_agent_pkg::config::Provider::DatabricksV2 + } else { + buzz_agent_pkg::config::Provider::Databricks + } +} + +async fn discover_databricks_models( + _client: &reqwest::Client, + provider: Option<&str>, + env: &BTreeMap, + selected_model: Option, +) -> Result, String> { + let provider_str = match provider { + Some(p) if is_databricks_provider(Some(p)) => p, + _ => return Ok(None), + }; + + let host = match env_or_process_value(env, "DATABRICKS_HOST") { + Some(h) => h, + None => return Ok(None), // no host → fall through to subprocess + }; + + // api_key = DATABRICKS_TOKEN (empty string = use PKCE cache). + let api_key = env_or_process_value(env, "DATABRICKS_TOKEN").unwrap_or_default(); + + let agent_provider = databricks_agent_provider(provider_str); + let cfg = buzz_agent_pkg::config::Config::for_discovery(agent_provider, api_key, host); + + // Build a redaction env so the token never appears in surfaced errors. + let token_for_redact = env_or_process_value(env, "DATABRICKS_TOKEN").unwrap_or_default(); + let redaction_env = redaction_env_with_value(env, "DATABRICKS_TOKEN", &token_for_redact); + + let entries = match buzz_agent_pkg::discover_databricks_models(&cfg).await { + Ok(e) => e, + Err(buzz_agent_pkg::AgentError::LlmAuth(_)) => { + // No token + no PKCE cache → fall through to subprocess. + return Ok(None); + } + Err(e) => { + let msg = crate::managed_agents::redact_env_values_in(&e.to_string(), &redaction_env); + return Err(format!("Databricks model discovery failed: {msg}")); + } + }; + + if entries.is_empty() { + return Err("Databricks model discovery returned no models".to_string()); + } + + let models = entries + .into_iter() + .map(|e| AgentModelInfo { + id: e.id, + name: Some(e.name), + description: None, + }) + .collect(); + + Ok(Some(AgentModelsResponse { + agent_name: provider_str.trim().to_string(), + agent_version: "models-api".to_string(), + models, + agent_default_model: None, + selected_model, + supports_switching: true, + })) +} + async fn run_agent_models_command( resolved_acp: PathBuf, agent_command: String, @@ -637,11 +749,9 @@ async fn run_agent_models_command( } } } - // Mirror runtime spawn: internal builds may bake Databricks host/model + // Mirror runtime spawn: internal builds may bake provider/model // defaults. User-provided env below still wins. - for (key, value) in crate::managed_agents::build_databricks_defaults() { - cmd.env(key, value); - } + crate::managed_agents::build_buzz_agent_provider_defaults(&mut cmd); // User env layering — written LAST so it overrides any Buzz-set env above. for (k, v) in &merged_env { cmd.env(k, v); diff --git a/desktop/src-tauri/src/commands/agent_models_tests.rs b/desktop/src-tauri/src/commands/agent_models_tests.rs index 13f781659..2fe72e6e1 100644 --- a/desktop/src-tauri/src/commands/agent_models_tests.rs +++ b/desktop/src-tauri/src/commands/agent_models_tests.rs @@ -211,3 +211,19 @@ fn saved_agent_model_discovery_uses_record_snapshot() { ); assert!(!config.env.contains_key("BUZZ_PRIVATE_KEY")); } + +// --------------------------------------------------------------------------- +// Databricks provider detection +// --------------------------------------------------------------------------- +// +// Parse/filter/pagination tests live in crates/buzz-agent/src/catalog.rs +// (they moved there with the Option C refactor). + +#[test] +fn is_databricks_provider_matches_both_variants() { + assert!(is_databricks_provider(Some("databricks"))); + assert!(is_databricks_provider(Some("databricks_v2"))); + assert!(is_databricks_provider(Some(" DATABRICKS "))); + assert!(!is_databricks_provider(Some("anthropic"))); + assert!(!is_databricks_provider(None)); +} diff --git a/desktop/src-tauri/src/managed_agents/agent_env.rs b/desktop/src-tauri/src/managed_agents/agent_env.rs new file mode 100644 index 000000000..96d807b50 --- /dev/null +++ b/desktop/src-tauri/src/managed_agents/agent_env.rs @@ -0,0 +1,191 @@ +//! Build-time agent env passthrough. +//! +//! Internal builds (buzz-releases) bake arbitrary `KEY=VALUE` pairs into the +//! binary via `BUZZ_BUILD_AGENT_ENV` (base64-encoded, newline-delimited). +//! OSS builds leave the compile-time var unset — nothing is injected. + +use base64::Engine as _; + +/// Inject baked-in provider/model defaults and generic env pairs onto `cmd`. +/// +/// Call this BEFORE writing record/persona metadata env vars so that the +/// record's explicit choices (written after) override the baked defaults. +/// User-supplied `record.env_vars` (written last) always win. +pub(crate) fn build_buzz_agent_provider_defaults(cmd: &mut std::process::Command) { + if let Some(provider) = option_env!("BUZZ_DESKTOP_BUILD_BUZZ_AGENT_PROVIDER") { + if !provider.is_empty() { + cmd.env("BUZZ_AGENT_PROVIDER", provider); + } + } + if let Some(model) = option_env!("BUZZ_DESKTOP_BUILD_BUZZ_AGENT_MODEL") { + if !model.is_empty() { + cmd.env("BUZZ_AGENT_MODEL", model); + } + } + if let Some(raw) = option_env!("BUZZ_DESKTOP_BUILD_AGENT_ENV") { + // The value was base64-encoded at build time so the single-line Cargo + // output carries all KEY=VALUE pairs without truncation. + if let Ok(decoded) = base64::engine::general_purpose::STANDARD.decode(raw.as_bytes()) { + if let Ok(text) = std::str::from_utf8(&decoded) { + for (key, value) in parse_agent_env_lines(text) { + cmd.env(key, value); + } + } + } + } +} + +/// Parse newline-delimited `KEY=VALUE` lines from a baked env blob. +/// Blank lines are skipped. Each non-blank line must contain `=`; the key +/// is everything before the first `=`, the value is everything after (values +/// may themselves contain `=`). Lines with an empty key are skipped. +pub(crate) fn parse_agent_env_lines(raw: &str) -> Vec<(&str, &str)> { + raw.lines() + .filter_map(|line| { + let line = line.trim(); + if line.is_empty() { + return None; + } + let eq = line.find('=')?; + let key = &line[..eq]; + if key.is_empty() { + return None; + } + Some((key, &line[eq + 1..])) + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::{build_buzz_agent_provider_defaults, parse_agent_env_lines}; + + #[test] + fn buzz_agent_provider_defaults_empty_in_oss_build() { + // OSS (and normal test) builds set neither BUZZ_BUILD_BUZZ_AGENT_*, + // so nothing is baked in and no BUZZ_AGENT_* is injected on spawn. + let mut cmd = std::process::Command::new("env"); + cmd.env_clear(); + build_buzz_agent_provider_defaults(&mut cmd); + let output = cmd.output().expect("env should run"); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + !stdout.contains("BUZZ_AGENT_PROVIDER="), + "BUZZ_AGENT_PROVIDER should not be injected in OSS builds" + ); + assert!( + !stdout.contains("BUZZ_AGENT_MODEL="), + "BUZZ_AGENT_MODEL should not be injected in OSS builds" + ); + assert!( + !stdout.contains("DATABRICKS_HOST="), + "DATABRICKS_HOST should not be injected in OSS builds" + ); + } + + #[test] + fn parse_agent_env_lines_splits_on_first_equals() { + // Value may itself contain `=` — only the first `=` is the separator. + let pairs = parse_agent_env_lines("DATABRICKS_HOST=https://host.example.com/path?a=1"); + assert_eq!( + pairs, + vec![("DATABRICKS_HOST", "https://host.example.com/path?a=1")] + ); + } + + #[test] + fn parse_agent_env_lines_multiple_pairs() { + let raw = "KEY_A=value_a\nKEY_B=value_b"; + let pairs = parse_agent_env_lines(raw); + assert_eq!(pairs, vec![("KEY_A", "value_a"), ("KEY_B", "value_b")]); + } + + #[test] + fn parse_agent_env_lines_skips_blank_lines() { + let raw = "KEY_A=val_a\n\n \nKEY_B=val_b"; + let pairs = parse_agent_env_lines(raw); + assert_eq!(pairs, vec![("KEY_A", "val_a"), ("KEY_B", "val_b")]); + } + + #[test] + fn parse_agent_env_lines_skips_line_without_equals() { + // A malformed line (no `=`) is silently skipped — build.rs validates at + // compile time; runtime parsing is defensive. + let raw = "NO_EQUALS_HERE\nGOOD=value"; + let pairs = parse_agent_env_lines(raw); + assert_eq!(pairs, vec![("GOOD", "value")]); + } + + #[test] + fn parse_agent_env_lines_skips_empty_key() { + // `=value` has an empty key — skip it. + let raw = "=orphan_value\nGOOD=value"; + let pairs = parse_agent_env_lines(raw); + assert_eq!(pairs, vec![("GOOD", "value")]); + } + + #[test] + fn parse_agent_env_lines_empty_value_is_allowed() { + // `KEY=` is valid — empty value is intentional (clears an env var). + let pairs = parse_agent_env_lines("EMPTY="); + assert_eq!(pairs, vec![("EMPTY", "")]); + } + + #[test] + fn parse_agent_env_lines_empty_input_returns_empty() { + assert!(parse_agent_env_lines("").is_empty()); + assert!(parse_agent_env_lines(" \n \n").is_empty()); + } + + // ── base64 round-trip regression ───────────────────────────────────── + // + // Cargo build-script output is line-oriented: a raw multiline value emitted + // via `cargo:rustc-env=KEY=...` would be truncated to the first line. + // build.rs base64-encodes the validated value; runtime.rs decodes it. + // This test verifies that a 2-pair value with a URL containing `=` survives + // the encode→decode→parse round-trip and both pairs land correctly. + #[test] + fn parse_agent_env_lines_base64_round_trip_preserves_all_pairs() { + use base64::Engine as _; + let raw = + "DATABRICKS_HOST=https://host.example.com/path?a=1&b=2\nDATABRICKS_MODEL=some-model"; + let encoded = base64::engine::general_purpose::STANDARD.encode(raw.as_bytes()); + let decoded_bytes = base64::engine::general_purpose::STANDARD + .decode(encoded.as_bytes()) + .expect("decode should succeed"); + let decoded = std::str::from_utf8(&decoded_bytes).expect("utf8 should be valid"); + let pairs = parse_agent_env_lines(decoded); + assert_eq!(pairs.len(), 2, "both pairs must survive the round-trip"); + assert_eq!( + pairs[0], + ("DATABRICKS_HOST", "https://host.example.com/path?a=1&b=2") + ); + assert_eq!(pairs[1], ("DATABRICKS_MODEL", "some-model")); + } + + // ── baked defaults ordering regression ─────────────────────────────── + // + // `build_buzz_agent_provider_defaults` must run BEFORE + // `runtime_metadata_env_vars` writes the record's provider/model so that + // record values win (last-write-wins). This test simulates the ordering by + // writing the baked default first, then overwriting with the record value. + #[test] + fn baked_defaults_do_not_override_record_provider_written_after() { + let mut cmd = std::process::Command::new("env"); + cmd.env_clear(); + // Simulate what an internal build's baked defaults would inject. + cmd.env("BUZZ_AGENT_PROVIDER", "databricks"); + // Simulate what runtime_metadata_env_vars writes from the record (comes after). + cmd.env("BUZZ_AGENT_PROVIDER", "anthropic"); + let output = cmd.output().expect("env should run"); + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("BUZZ_AGENT_PROVIDER=anthropic"), + "record provider must win over baked default (last-write-wins)" + ); + assert!( + !stdout.contains("BUZZ_AGENT_PROVIDER=databricks"), + "baked default must not survive when record provider is written after" + ); + } +} diff --git a/desktop/src-tauri/src/managed_agents/mod.rs b/desktop/src-tauri/src/managed_agents/mod.rs index f6f9a9e95..17f023789 100644 --- a/desktop/src-tauri/src/managed_agents/mod.rs +++ b/desktop/src-tauri/src/managed_agents/mod.rs @@ -1,4 +1,6 @@ +mod agent_env; pub(crate) mod agent_events; +pub(crate) use agent_env::build_buzz_agent_provider_defaults; mod backend; pub(crate) mod config_bridge; mod discovery; diff --git a/desktop/src-tauri/src/managed_agents/runtime.rs b/desktop/src-tauri/src/managed_agents/runtime.rs index 788e38906..585ffea26 100644 --- a/desktop/src-tauri/src/managed_agents/runtime.rs +++ b/desktop/src-tauri/src/managed_agents/runtime.rs @@ -2,6 +2,8 @@ use std::collections::HashMap; use tauri::AppHandle; +use super::agent_env::build_buzz_agent_provider_defaults; + use crate::{ managed_agents::{ append_log_marker, known_acp_runtime, login_shell_path, managed_agent_log_path, @@ -1796,6 +1798,10 @@ pub fn spawn_agent_child( } else { command.env_remove("BUZZ_ACP_MODEL"); } + // Baked-in provider defaults for internal builds (buzz-releases sets + // BUZZ_BUILD_BUZZ_AGENT_* at compile time; OSS builds bake nothing). + // Written FIRST so that record/persona metadata env vars below override them. + build_buzz_agent_provider_defaults(&mut command); if let Some(meta) = runtime_meta { for (key, value) in runtime_metadata_env_vars( meta.model_env_var, @@ -1870,13 +1876,6 @@ pub fn spawn_agent_child( ); } - // Baked-in Databricks defaults for internal builds (buzz-releases sets - // BUZZ_BUILD_DATABRICKS_* at compile time; OSS builds bake nothing). - // Written BEFORE user env_vars so a GUI/persona override still wins. - for (key, value) in build_databricks_defaults() { - command.env(key, value); - } - // ── User env vars: the record snapshot ───────────────────────────── // // The record's `env_vars` is the complete, pinned env map — persona env @@ -1953,23 +1952,6 @@ fn child_rust_log_filter() -> String { } } -/// Databricks host/model baked in at compile time for internal builds. Empty -/// in OSS builds, where the `BUZZ_BUILD_DATABRICKS_*` env is unset. -pub(crate) fn build_databricks_defaults() -> Vec<(&'static str, &'static str)> { - let mut defaults = Vec::new(); - if let Some(host) = option_env!("BUZZ_DESKTOP_BUILD_DATABRICKS_HOST") { - if !host.is_empty() { - defaults.push(("DATABRICKS_HOST", host)); - } - } - if let Some(model) = option_env!("BUZZ_DESKTOP_BUILD_DATABRICKS_MODEL") { - if !model.is_empty() { - defaults.push(("DATABRICKS_MODEL", model)); - } - } - defaults -} - pub fn start_managed_agent_process( app: &AppHandle, record: &mut ManagedAgentRecord, diff --git a/desktop/src-tauri/src/managed_agents/runtime/tests.rs b/desktop/src-tauri/src/managed_agents/runtime/tests.rs index 667e8d241..cc9857205 100644 --- a/desktop/src-tauri/src/managed_agents/runtime/tests.rs +++ b/desktop/src-tauri/src/managed_agents/runtime/tests.rs @@ -86,13 +86,6 @@ fn buzz_agent_has_mcp_hooks() { assert_eq!(p.mcp_command, Some("buzz-dev-mcp")); } -#[test] -fn databricks_defaults_empty_in_oss_build() { - // OSS (and normal test) builds set neither BUZZ_BUILD_DATABRICKS_*, - // so nothing is baked in and no DATABRICKS_* is injected on spawn. - assert!(super::build_databricks_defaults().is_empty()); -} - #[test] fn buzz_agent_resolved_via_path() { assert!(known_acp_runtime("/usr/local/bin/buzz-agent").is_some_and(|p| p.mcp_hooks)); diff --git a/desktop/src/features/agents/ui/personaDialogPickers.tsx b/desktop/src/features/agents/ui/personaDialogPickers.tsx index 35cb9db08..e3b51cfcb 100644 --- a/desktop/src/features/agents/ui/personaDialogPickers.tsx +++ b/desktop/src/features/agents/ui/personaDialogPickers.tsx @@ -16,6 +16,7 @@ export const NO_RUNTIME_DROPDOWN_VALUE = "__no_runtime__"; const KNOWN_LLM_PROVIDER_IDS = [ "anthropic", "databricks", + "databricks_v2", "openai", "openai-compat", ] as const; @@ -25,7 +26,6 @@ type PersonaLlmProviderId = (typeof KNOWN_LLM_PROVIDER_IDS)[number]; export type PersonaModelOption = { id: string; label: string; - providers?: readonly PersonaLlmProviderId[]; }; export type PersonaDropdownOption = { @@ -45,144 +45,20 @@ const DEFAULT_MODEL_OPTION: PersonaModelOption = { label: "Default model", }; -const DATABRICKS_DEFAULT_MODEL_ID = "goose-claude-4-8-opus"; -const DATABRICKS_DEFAULT_MODEL_LABEL = "Claude Opus 4.8"; - -const DATABRICKS_DEFAULT_MODEL_OPTION: PersonaModelOption = { - id: "", - label: `${DATABRICKS_DEFAULT_MODEL_LABEL} (default)`, -}; - -// Databricks IDs are sourced from squareup/goose-releases goose_models.json. -// `goose-claude-4-8-opus` is also the current Buzz internal build default in -// squareup/buzz-releases, though it is ahead of that registry today. -const BUZZ_AGENT_MODEL_OPTIONS: readonly PersonaModelOption[] = [ - DATABRICKS_DEFAULT_MODEL_OPTION, - { - id: "goose-claude-4-8-opus", - label: "Claude Opus 4.8", - providers: ["databricks"], - }, - { - id: "goose-claude-4-7-opus", - label: "Claude Opus 4.7", - providers: ["databricks"], - }, - { - id: "goose-claude-4-6-opus", - label: "Claude Opus 4.6", - providers: ["anthropic", "databricks"], - }, - { - id: "goose-claude-4-6-sonnet", - label: "Claude Sonnet 4.6", - providers: ["anthropic", "databricks"], - }, - { - id: "goose-claude-4-5-opus", - label: "Claude Opus 4.5", - providers: ["databricks"], - }, - { - id: "goose-claude-4-5-sonnet", - label: "Claude Sonnet 4.5", - providers: ["databricks"], - }, - { - id: "goose-claude-4-5-haiku", - label: "Claude Haiku 4.5", - providers: ["databricks"], - }, - { - id: "goose-gpt-5-2", - label: "GPT-5.2", - providers: ["databricks"], - }, - { - id: "databricks-gpt-5-5-pro", - label: "GPT-5.5 Pro", - providers: ["databricks"], - }, - { - id: "databricks-gpt-5-5", - label: "GPT-5.5", - providers: ["databricks"], - }, - { - id: "gpt-5.5", - label: "GPT-5.5", - providers: ["openai", "openai-compat"], - }, - { - id: "gpt-5.4", - label: "GPT-5.4", - providers: ["openai", "openai-compat"], - }, - { - id: "gpt-5.4-mini", - label: "GPT-5.4 mini", - providers: ["openai", "openai-compat"], - }, - { - id: "gpt-5.4-nano", - label: "GPT-5.4 nano", - providers: ["openai", "openai-compat"], - }, - { - id: "gpt-5", - label: "GPT-5", - providers: ["openai", "openai-compat"], - }, - { - id: "gpt-5-mini", - label: "GPT-5 mini", - providers: ["openai", "openai-compat"], - }, - { - id: "goose-gemini-3-5-flash", - label: "Gemini 3.5 Flash", - providers: ["databricks"], - }, - { - id: "goose-gemini-3-1-pro", - label: "Gemini 3.1 Pro", - providers: ["databricks"], - }, - { - id: "goose-gemini-3-1-flash-lite", - label: "Gemini 3.1 Flash Lite", - providers: ["databricks"], - }, - { - id: "goose-gemini-2-5-pro", - label: "Gemini 2.5 Pro", - providers: ["databricks"], - }, - { - id: "gemini-2.5-pro", - label: "Gemini 2.5 Pro", - providers: ["openai-compat"], - }, - { - id: "gemini-2.5-flash", - label: "Gemini 2.5 Flash", - providers: ["openai-compat"], - }, -]; - const PERSONA_LLM_PROVIDER_OPTIONS: readonly PersonaModelOption[] = [ { id: "anthropic", label: "Anthropic" }, { id: "openai", label: "OpenAI" }, { id: "openai-compat", label: "OpenAI-compatible" }, { id: "databricks", label: "Databricks" }, + { id: "databricks_v2", label: "Databricks v2" }, ]; const PERSONA_MODEL_OPTIONS_BY_RUNTIME: Record< string, readonly PersonaModelOption[] > = { - goose: BUZZ_AGENT_MODEL_OPTIONS, - "buzz-agent": BUZZ_AGENT_MODEL_OPTIONS, + goose: [DEFAULT_MODEL_OPTION], + "buzz-agent": [DEFAULT_MODEL_OPTION], claude: [DEFAULT_MODEL_OPTION], codex: [DEFAULT_MODEL_OPTION], }; @@ -199,12 +75,8 @@ function isKnownLlmProvider( return (KNOWN_LLM_PROVIDER_IDS as readonly string[]).includes(providerId); } -function runtimeDefaultsToDatabricks(runtimeId: string) { - return runtimeId === "buzz-agent" || runtimeId === "goose"; -} - export function runtimeSupportsLlmProviderSelection(runtimeId: string) { - return runtimeDefaultsToDatabricks(runtimeId); + return runtimeId === "buzz-agent" || runtimeId === "goose"; } function effectiveModelProviderForOptions( @@ -218,11 +90,7 @@ function effectiveModelProviderForOptions( return ""; } - const trimmedProvider = providerId?.trim() ?? ""; - if (trimmedProvider.length === 0 && runtimeDefaultsToDatabricks(runtimeId)) { - return "databricks"; - } - return trimmedProvider; + return providerId?.trim() ?? ""; } export function getPersonaModelOptions( @@ -243,10 +111,7 @@ export function getPersonaModelOptions( return options.filter( (option) => - (option.id.length === 0 && - !providerRequiresExplicitModel(trimmedProvider)) || - (option.id !== DATABRICKS_DEFAULT_MODEL_ID && - option.providers?.includes(trimmedProvider)), + option.id.length === 0 && !providerRequiresExplicitModel(trimmedProvider), ); } @@ -299,10 +164,8 @@ export function providerRequiresExplicitModel( ); } -export function getDefaultLlmProviderLabel(runtimeId: string) { - return runtimeDefaultsToDatabricks(runtimeId) - ? "Databricks (default)" - : "Default"; +export function getDefaultLlmProviderLabel(_runtimeId: string) { + return "Default"; } export function getPersonaProviderOptions( @@ -310,16 +173,10 @@ export function getPersonaProviderOptions( runtimeId: string, ): readonly PersonaModelOption[] { const trimmedProvider = currentProvider.trim(); - const providerOptions = runtimeDefaultsToDatabricks(runtimeId) - ? PERSONA_LLM_PROVIDER_OPTIONS.filter((option) => - trimmedProvider === "databricks" ? true : option.id !== "databricks", - ) - : PERSONA_LLM_PROVIDER_OPTIONS; - const defaultProviderOptions = - runtimeDefaultsToDatabricks(runtimeId) && trimmedProvider === "databricks" - ? [] - : [{ id: "", label: getDefaultLlmProviderLabel(runtimeId) }]; - const options = [...defaultProviderOptions, ...providerOptions]; + const defaultProviderOptions = [ + { id: "", label: getDefaultLlmProviderLabel(runtimeId) }, + ]; + const options = [...defaultProviderOptions, ...PERSONA_LLM_PROVIDER_OPTIONS]; if ( trimmedProvider.length === 0 || options.some((option) => option.id === trimmedProvider) diff --git a/desktop/src/features/agents/ui/personaModelDiscoveryStatus.ts b/desktop/src/features/agents/ui/personaModelDiscoveryStatus.ts index 381e1b469..ad348b363 100644 --- a/desktop/src/features/agents/ui/personaModelDiscoveryStatus.ts +++ b/desktop/src/features/agents/ui/personaModelDiscoveryStatus.ts @@ -53,7 +53,7 @@ export function formatModelDiscoveryErrorStatus( if ( message.includes("DATABRICKS_HOST required") || message.includes("DATABRICKS_MODEL required") || - message.includes("BUZZ_AGENT_PROVIDER required") + message.includes("BUZZ_AGENT_PROVIDER is required") ) { return null; } diff --git a/desktop/tests/e2e/persona-env-vars.spec.ts b/desktop/tests/e2e/persona-env-vars.spec.ts index fb6647b4d..24a28b80e 100644 --- a/desktop/tests/e2e/persona-env-vars.spec.ts +++ b/desktop/tests/e2e/persona-env-vars.spec.ts @@ -296,8 +296,10 @@ test("persona model options follow the selected LLM provider", async ({ await expect(provider).toContainText("Buzz Agent (default)"); await expect(llmProvider).toBeVisible(); await expect(model).toBeVisible(); + // Without live discovery, the only static option is "Default model". await expect(model).toContainText("Default model"); + // Switch to OpenAI — the API-key field appears and is labelled correctly. await llmProvider.click(); await page .getByRole("menuitemradio", { name: "OpenAI", exact: true }) @@ -307,26 +309,14 @@ test("persona model options follow the selected LLM provider", async ({ await expect(providerApiKey).toBeVisible(); await expect(page.getByTestId("env-vars-editor")).toHaveCount(0); await expect(model).toBeVisible(); - - await providerApiKey.fill("sk-openai-test"); + // OpenAI requires an explicit model, so "Default model" is filtered out. + // The menu offers only "Custom model..." — verify it is present and selectable. const openAiModelMenu = await openModelMenu(page, model); - await expect( - openAiModelMenu.getByRole("menuitemradio", { - name: "GPT-5.5", - exact: true, - }), - ).toBeVisible(); - await expect( - openAiModelMenu.getByRole("menuitemradio", { name: "GPT-5", exact: true }), - ).toHaveCount(0); - await expect( - openAiModelMenu.getByRole("menuitemradio", { name: /Claude/ }), - ).toHaveCount(0); await openAiModelMenu - .getByRole("menuitemradio", { name: "GPT-5.5", exact: true }) + .getByRole("menuitemradio", { name: "Custom model...", exact: true }) .click(); - await expect(model).toContainText("GPT-5.5"); + // Switch to Anthropic — API-key field label changes and value clears. await llmProvider.click(); await page .getByRole("menuitemradio", { name: "Anthropic", exact: true }) @@ -335,44 +325,19 @@ test("persona model options follow the selected LLM provider", async ({ await expect(providerApiKey).toHaveValue(""); await expect(model).toBeVisible(); + // Fill in the Anthropic key and verify the model field is still present. await providerApiKey.fill("sk-ant-test"); + await expect(model).toBeVisible(); - const anthropicModelMenu = await openModelMenu(page, model); - await expect( - anthropicModelMenu.getByRole("menuitemradio", { - name: "Claude Sonnet 4.6", - }), - ).toBeVisible(); - await expect( - anthropicModelMenu.getByRole("menuitemradio", { - name: "GPT-5.5", - exact: true, - }), - ).toHaveCount(0); - await anthropicModelMenu - .getByRole("menuitemradio", { name: "Claude Sonnet 4.6" }) - .click(); - await expect(model).toContainText("Claude Sonnet 4.6"); - + // Switch to Default (no explicit provider) — model resets to "Default model". await llmProvider.click(); const llmProviderMenu = page.getByRole("menu").filter({ has: page.getByRole("menuitemradio", { name: "OpenAI", exact: true }), }); await llmProviderMenu .last() - .getByRole("menuitemradio", { name: "Databricks (default)", exact: true }) + .getByRole("menuitemradio", { name: "Default", exact: true }) .click(); await expect(model).toBeVisible(); - await expect(model).toContainText("Claude Sonnet 4.6"); - - const defaultModelMenu = await openModelMenu(page, model); - await expect( - defaultModelMenu.getByRole("menuitemradio", { name: "Claude Sonnet 4.6" }), - ).toBeVisible(); - await expect( - defaultModelMenu.getByRole("menuitemradio", { - name: "GPT-5.5", - exact: true, - }), - ).toBeVisible(); + await expect(model).toContainText("Default model"); });