block · wpfleger96 · Jun 30, 2026 · Jun 30, 2026
diff --git a/crates/buzz-agent/README.md b/crates/buzz-agent/README.md
@@ -129,17 +129,17 @@ Everything is environment variables. No flags, no config files. (We are a subpro
 
 | Variable | Default | Notes |
 |---|---|---|
-| `BUZZ_AGENT_PROVIDER` | — | `anthropic`, `openai`, `databricks`, or `databricks_v2`. If unset, or if `anthropic`/`openai` is selected but its API key is missing, Databricks is auto-selected when `DATABRICKS_HOST` + `DATABRICKS_MODEL` are set. |
-| `ANTHROPIC_API_KEY` | — | Required when provider=anthropic unless Databricks fallback is configured. |
+| `BUZZ_AGENT_PROVIDER` | — | Required. `anthropic`, `openai`, `databricks`, or `databricks_v2`. No implicit fallback — the agent errors at startup when this is unset. |
+| `ANTHROPIC_API_KEY` | — | Required when provider=anthropic. |
 | `ANTHROPIC_MODEL` | — | Required when provider=anthropic. |
 | `ANTHROPIC_BASE_URL` | `https://api.anthropic.com` | |
 | `ANTHROPIC_API_VERSION` | `2023-06-01` | |
-| `OPENAI_COMPAT_API_KEY` | — | Required when provider=openai unless Databricks fallback is configured. |
+| `OPENAI_COMPAT_API_KEY` | — | Required when provider=openai. |
 | `OPENAI_COMPAT_MODEL` | — | Required when provider=openai. |
 | `OPENAI_COMPAT_BASE_URL` | `https://api.openai.com/v1` | Point at vLLM, llama.cpp, OpenRouter, Ollama, etc. |
 | `OPENAI_COMPAT_API` | `auto` | `auto` \| `chat` \| `responses`. `auto` picks Responses for `*.openai.com`, Chat Completions everywhere else. |
-| `DATABRICKS_HOST` | — | Required when provider=databricks or when using Databricks fallback. |
-| `DATABRICKS_MODEL` | — | Required when provider=databricks or when using Databricks fallback. |
+| `DATABRICKS_HOST` | — | Required when provider=databricks or provider=databricks_v2. |
+| `DATABRICKS_MODEL` | — | Required when provider=databricks or provider=databricks_v2. |
 | `DATABRICKS_TOKEN` | — | Optional static bearer escape hatch. If unset, Databricks uses browser OAuth + refresh cache. |
 | `BUZZ_AGENT_SYSTEM_PROMPT` | built-in | Inline system prompt. |
 | `BUZZ_AGENT_SYSTEM_PROMPT_FILE` | — | File path. Mutually exclusive with the above. |
@@ -172,7 +172,7 @@ Everything is environment variables. No flags, no config files. (We are a subpro
 | Databricks | `databricks` | `POST {host}/serving-endpoints/{model}/invocations` | goose-claude-4-6-sonnet |
 | Databricks AI Gateway v2 | `databricks_v2` | `POST {host}/ai-gateway/{provider}/v1/...` | databricks-gpt-5-5, databricks-claude-opus-4-7 |
 
-If `BUZZ_AGENT_PROVIDER=anthropic` is selected without `ANTHROPIC_API_KEY`, or `BUZZ_AGENT_PROVIDER=openai` is selected without `OPENAI_COMPAT_API_KEY`, the agent automatically falls back to Databricks OAuth when `DATABRICKS_HOST` and `DATABRICKS_MODEL` are set. The same Databricks fallback applies when `BUZZ_AGENT_PROVIDER` is unset. Explicit Anthropic/OpenAI API keys always win.
+If `BUZZ_AGENT_PROVIDER=anthropic` is selected without `ANTHROPIC_API_KEY`, or `BUZZ_AGENT_PROVIDER=openai` is selected without `OPENAI_COMPAT_API_KEY`, the agent returns an error — there is no implicit fallback to another provider.
 
 `provider=openai` speaks two HTTP dialects: the [Responses API](https://platform.openai.com/docs/api-reference/responses) (`/v1/responses`, required for GPT-5 / o-series tool-calling on OpenAI's own service) and the [Chat Completions API](https://platform.openai.com/docs/api-reference/chat) (`/chat/completions`, the broadly-supported OpenAI-compatible wire format).
 

diff --git a/crates/buzz-agent/src/auth.rs b/crates/buzz-agent/src/auth.rs
@@ -44,6 +44,15 @@ const BROWSER_AUTH_TIMEOUT: Duration = Duration::from_secs(60);
 pub trait TokenSource: Send + Sync {
     async fn bearer(&self) -> Result<String, AgentError>;
 
+    /// Return a bearer token from cache or refresh, **never** opening a browser.
+    ///
+    /// The default delegates to [`bearer`](Self::bearer) — correct for token
+    /// sources (e.g. static API keys) that can never trigger a browser flow.
+    /// [`PkceOAuthTokenSource`] overrides this to stop before the browser step.
+    async fn bearer_no_browser(&self) -> Result<String, AgentError> {
+        self.bearer().await
+    }
+
     /// Force a fresh bearer after the server rejected the current one (401).
     ///
     /// `rejected` is the exact access token that just got the 401. Unlike
@@ -287,6 +296,10 @@ impl TokenSource for PkceOAuthTokenSource {
         Ok(bearer)
     }
 
+    async fn bearer_no_browser(&self) -> Result<String, AgentError> {
+        self.try_bearer_no_browser().await
+    }
+
     /// Force-refresh after a 401, never touching the browser flow.
     ///
     /// `rejected` is the access token the server just 401'd. Coalescing keys
@@ -344,6 +357,68 @@ impl TokenSource for PkceOAuthTokenSource {
     }
 }
 
+impl PkceOAuthTokenSource {
+    /// Return a bearer token from cache or refresh, **never** opening a browser.
+    ///
+    /// Follows the same steps as [`bearer`](TokenSource::bearer) but stops at
+    /// step 4 — if no usable token is available after cache + refresh attempts,
+    /// returns `Err(LlmAuth(...))` instead of launching the browser PKCE flow.
+    /// Used by model-discovery paths that must not block on user interaction.
+    pub(crate) async fn try_bearer_no_browser(&self) -> Result<String, AgentError> {
+        let mut state = self.state.lock().await;
+
+        // 1. In-memory cache hit, still fresh.
+        if let Some(tok) = state.as_ref() {
+            if !is_expired(tok) {
+                return Ok(tok.access_token.clone());
+            }
+        }
+
+        // 2. Re-read disk — another process may have refreshed already.
+        if let Some(disk_tok) = read_cache(&self.cache_path) {
+            if !is_expired(&disk_tok) {
+                let bearer = disk_tok.access_token.clone();
+                *state = Some(disk_tok);
+                return Ok(bearer);
+            }
+        }
+
+        // 3. Try refresh if we have a refresh token.  Endpoints are discovered
+        //    lazily here — only when a refresh token is actually present — so
+        //    that an unreachable OIDC discovery URL cannot prevent the
+        //    no-token/no-cache path from returning `LlmAuth` (graceful
+        //    fallback) instead of `Llm` (hard error).
+        let refresh = state.as_ref().and_then(|t| t.refresh_token.clone());
+        if let Some(rt) = refresh {
+            let endpoints = self.endpoints().await?;
+            match self.refresh(&endpoints, &rt).await {
+                Ok(fresh) => {
+                    let bearer = fresh.access_token.clone();
+                    self.save(&mut state, fresh)?;
+                    return Ok(bearer);
+                }
+                Err(e) => {
+                    tracing::warn!(error = %e, "oauth refresh failed during model discovery");
+                }
+            }
+
+            // 4. Re-read disk after refresh failure.
+            if let Some(disk_tok) = read_cache(&self.cache_path) {
+                if !is_expired(&disk_tok) {
+                    let bearer = disk_tok.access_token.clone();
+                    *state = Some(disk_tok);
+                    return Ok(bearer);
+                }
+            }
+        }
+
+        // No usable token — return error instead of opening a browser.
+        Err(AgentError::LlmAuth(
+            "no cached Databricks token; run `buzz-agent auth databricks` first".into(),
+        ))
+    }
+}
+
 // ---- helpers -------------------------------------------------------------
 
 /// Aborts a spawned task when dropped. Used to guarantee the localhost
@@ -726,4 +801,45 @@ mod tests {
             "expected discovery error, got: {err_msg}"
         );
     }
+
+    /// `try_bearer_no_browser` with an empty cache and no refresh token must
+    /// return `LlmAuth` immediately — it must NOT attempt OIDC discovery even
+    /// when the `discovery_url` is unreachable/invalid.  This guards the
+    /// regression where `endpoints()` was called unconditionally before the
+    /// refresh-token check, causing an `Llm` error (hard failure) instead of
+    /// the intended graceful `LlmAuth` fallback.
+    #[tokio::test]
+    async fn test_try_bearer_no_browser_empty_cache_no_refresh_returns_llm_auth_without_discovery()
+    {
+        let dir = tempfile::tempdir().unwrap();
+        // Intentionally invalid/unreachable discovery URL — if endpoints() is
+        // called, the test will get an `Llm` error and the assertion below fails.
+        let cfg = PkceOAuthConfig {
+            discovery_url: "https://invalid.example.test/.well-known/oauth-authorization-server"
+                .into(),
+            client_id: "test-client".into(),
+            scopes: vec!["offline_access".into()],
+            cache_namespace: "test".into(),
+            cache_dir_override: Some(dir.path().to_path_buf()),
+        };
+        let source = PkceOAuthTokenSource::new(cfg).unwrap();
+
+        // Empty in-memory state (no token, no refresh token).
+        {
+            let mut state = source.state.lock().await;
+            *state = None;
+        }
+
+        // No disk cache file either — dir is empty.
+
+        let result = source.try_bearer_no_browser().await;
+        assert!(result.is_err(), "expected Err, got Ok");
+        match result.unwrap_err() {
+            AgentError::LlmAuth(_) => {} // correct: graceful fallback
+            other => panic!(
+                "expected LlmAuth (no discovery attempted), got: {other:?}\n\
+                 This means endpoints() was called before the refresh-token check."
+            ),
+        }
+    }
 }