feat(sdk): wire custom-provider precedence ladder and CodebuffClient option

vraj00222 · claude · vraj00222 · commit a7294ebcebc0 · 2026-05-15T23:38:57.000-07:00
In promptAiSdkStream, resolve baseUrl/apiKey across three layers (agent providerOptions > clientCustomProvider > env vars) and forward to getModelForRequest. When the custom-provider path is active: • maxRetries: 1 (one retry handles brief model-load stalls; no further fallback — would violate user intent re: privacy / cost) • Skip codebuff_metadata and OpenRouter routing keys in the request body (same as the existing ChatGPT-OAuth-direct branch) • Wrap connection failures and 404s in friendly messages pointing at the configured URL and model Plumbs CodebuffClient.{providerBaseUrl, providerApiKey} through runOnce → getAgentRuntimeImpl, which wraps promptAiSdkStream with a closure that injects clientCustomProvider on every call. Adds an integration test documenting the precedence contract. Resolves issue #678 (implementation; smoke verification follows). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
diff --git a/common/src/types/contracts/llm.ts b/common/src/types/contracts/llm.ts
@@ -56,6 +56,9 @@ export type PromptAiSdkStreamFn = (
     includeCacheControl?: boolean
     cacheDebugCorrelation?: string
     agentProviderOptions?: OpenRouterProviderRoutingOptions
+    /** Fallback custom-provider config injected by the SDK Client.
+     *  Lower precedence than an agent's own providerOptions.baseUrl. */
+    clientCustomProvider?: { baseUrl?: string; apiKey?: string }
     /** List of agents that can be spawned - used to transform agent tool calls */
     spawnableAgents?: string[]
     /** Map of locally available agent templates - used to transform agent tool calls */
diff --git a/sdk/src/impl/__tests__/model-provider-custom.test.ts b/sdk/src/impl/__tests__/model-provider-custom.test.ts
@@ -67,4 +67,28 @@ describe('getModelForRequest with customProvider', () => {
 
     expect(result.isCustomProvider).toBe(true)
   })
+
+  test('customProvider arg drives selection regardless of env (precedence contract)', async () => {
+    // This documents the contract: getModelForRequest receives the *resolved*
+    // customProvider — the caller (promptAiSdkStream) is responsible for
+    // applying the agent > client > env precedence ladder before calling.
+    process.env.CODEBUFF_BASE_URL = 'http://from-env:11434/v1'
+    process.env.CODEBUFF_PROVIDER_API_KEY = 'env-key'
+
+    const { getModelForRequest } = await import('../model-provider')
+    const result = await getModelForRequest({
+      apiKey: 'cb-key',
+      model: 'gemma2:9b',
+      customProvider: {
+        baseUrl: 'http://from-agent:11434/v1',
+        apiKey: 'agent-key',
+      },
+    })
+
+    expect(result.isCustomProvider).toBe(true)
+    expect(result.model).toBeDefined()
+
+    delete process.env.CODEBUFF_BASE_URL
+    delete process.env.CODEBUFF_PROVIDER_API_KEY
+  })
 })
diff --git a/sdk/src/impl/agent-runtime.ts b/sdk/src/impl/agent-runtime.ts
@@ -29,6 +29,8 @@ export function getAgentRuntimeImpl(
     logger?: Logger
     apiKey: string
     clientEnv?: ClientEnv
+    /** Default custom provider used for runs that don't set one per-agent. */
+    clientCustomProvider?: { baseUrl?: string; apiKey?: string }
   } & Pick<
     AgentRuntimeScopedDeps,
     | 'handleStepsLogChunk'
@@ -44,6 +46,7 @@ export function getAgentRuntimeImpl(
     logger,
     apiKey,
     clientEnv = clientEnvDefault,
+    clientCustomProvider,
     handleStepsLogChunk,
     requestToolCall,
     requestMcpToolData,
@@ -87,7 +90,10 @@ export function getAgentRuntimeImpl(
       }),
 
     // LLM
-    promptAiSdkStream,
+    promptAiSdkStream: clientCustomProvider
+      ? (streamParams) =>
+          promptAiSdkStream({ ...streamParams, clientCustomProvider })
+      : promptAiSdkStream,
     promptAiSdk,
     promptAiSdkStructured,
 
diff --git a/sdk/src/impl/llm.ts b/sdk/src/impl/llm.ts
@@ -23,6 +23,10 @@ import {
   markChatGptOAuthRateLimited,
 } from './model-provider'
 import { refreshChatGptOAuthToken } from '../credentials'
+import {
+  getCustomProviderApiKeyFromEnv,
+  getCustomProviderBaseUrlFromEnv,
+} from '../env'
 import { getErrorStatusCode } from '../error-utils'
 
 import type { ModelRequestParams } from './model-provider'
@@ -130,6 +134,54 @@ type OpenRouterUsageAccounting = {
   }
 }
 
+/**
+ * Wrap raw errors from a custom OpenAI-compatible endpoint in a friendly,
+ * actionable message. Distinguishes connection failures (provider down,
+ * wrong URL) from model-not-found errors.
+ */
+function buildCustomProviderError(args: {
+  baseUrl: string
+  model: string
+  rawMessage: string
+}): string {
+  const lower = args.rawMessage.toLowerCase()
+  const isConnectionError =
+    lower.includes('econnrefused') ||
+    lower.includes('fetch failed') ||
+    lower.includes('etimedout') ||
+    lower.includes('enotfound') ||
+    lower.includes('socket hang up')
+  const isModelNotFound =
+    lower.includes('model not found') ||
+    lower.includes('does not exist') ||
+    (lower.includes('404') && lower.includes(args.model.toLowerCase()))
+
+  if (isConnectionError) {
+    return [
+      `Cannot reach LLM provider at ${args.baseUrl}.`,
+      ``,
+      `Check:`,
+      `  • Is the provider running? (e.g. \`ollama serve\` or LM Studio's Local Server)`,
+      `  • Is the URL correct? Currently configured: ${args.baseUrl}`,
+      `  • Is the model '${args.model}' loaded? (e.g. \`ollama list\`)`,
+      ``,
+      `Original error: ${args.rawMessage}`,
+    ].join('\n')
+  }
+  if (isModelNotFound) {
+    return [
+      `Model '${args.model}' not found at ${args.baseUrl}.`,
+      ``,
+      `Check:`,
+      `  • Pull the model first: \`ollama pull ${args.model}\``,
+      `  • Verify the exact tag with \`ollama list\``,
+      ``,
+      `Original error: ${args.rawMessage}`,
+    ].join('\n')
+  }
+  return args.rawMessage
+}
+
 /**
  * Check if an error is an OAuth rate limit error that should trigger fallback.
  */
@@ -303,13 +355,34 @@ export async function* promptAiSdkStream(
     return promptAborted('User cancelled input')
   }
 
+  // Resolve custom-provider precedence: agent > client option > env.
+  // apiKey is paired with whichever URL "wins" to avoid mixing sources.
+  const agentBaseUrl = params.agentProviderOptions?.baseUrl
+  const agentApiKey = params.agentProviderOptions?.apiKey
+  const clientBaseUrl = params.clientCustomProvider?.baseUrl
+  const clientApiKey = params.clientCustomProvider?.apiKey
+  const envBaseUrl = getCustomProviderBaseUrlFromEnv()
+  const envApiKey = getCustomProviderApiKeyFromEnv()
+
+  const resolvedBaseUrl = agentBaseUrl ?? clientBaseUrl ?? envBaseUrl
+  const resolvedApiKey = agentBaseUrl
+    ? agentApiKey
+    : clientBaseUrl
+      ? clientApiKey
+      : envBaseUrl
+        ? envApiKey
+        : undefined
+
   const modelParams: ModelRequestParams = {
     apiKey: params.apiKey,
     model: params.model,
     skipChatGptOAuth: params.skipChatGptOAuth,
     costMode: params.costMode,
+    ...(resolvedBaseUrl
+      ? { customProvider: { baseUrl: resolvedBaseUrl, apiKey: resolvedApiKey } }
+      : {}),
   }
-  const { model: aiSDKModel, isChatGptOAuth } =
+  const { model: aiSDKModel, isChatGptOAuth, isCustomProvider } =
     await getModelForRequest(modelParams)
 
   if (isChatGptOAuth) {
@@ -329,9 +402,14 @@ export async function* promptAiSdkStream(
     prompt: undefined,
     model: aiSDKModel,
     messages: convertCbToModelMessages(params),
-    ...(isChatGptOAuth && { maxRetries: 0 }),
-    // For ChatGPT OAuth direct, don't send codebuff metadata/provider options to OpenAI
-    ...(isChatGptOAuth
+    // ChatGPT OAuth: no retries (we fall back to Codebuff on first failure).
+    // Custom provider: one retry to handle brief model-load stalls without
+    // dragging out errors when the provider is actually down.
+    ...(isChatGptOAuth ? { maxRetries: 0 } : {}),
+    ...(isCustomProvider ? { maxRetries: 1 } : {}),
+    // Direct routes (ChatGPT OAuth, custom provider): skip codebuff_metadata
+    // and OpenRouter routing keys — neither belongs in those request bodies.
+    ...(isChatGptOAuth || isCustomProvider
       ? {}
       : {
         providerOptions: getProviderOptions({
@@ -458,7 +536,27 @@ export async function* promptAiSdkStream(
   // Track if we've yielded any content - if so, we can't safely fall back
   let hasYieldedContent = false
 
-  for await (const chunkValue of response.fullStream) {
+  // For custom-provider streams, a connection refusal at request init throws
+  // from the iterator before any error chunk is emitted. Rewrap into a
+  // friendly message so users see "is Ollama running?" not raw "fetch failed".
+  const stream = isCustomProvider && resolvedBaseUrl
+    ? (async function* () {
+        try {
+          yield* response.fullStream
+        } catch (e) {
+          const rawMessage = e instanceof Error ? e.message : String(e)
+          throw new Error(
+            buildCustomProviderError({
+              baseUrl: resolvedBaseUrl,
+              model: params.model,
+              rawMessage,
+            }),
+          )
+        }
+      })()
+    : response.fullStream
+
+  for await (const chunkValue of stream) {
     if (chunkValue.type !== 'text-delta') {
       const flushed = stopSequenceHandler.flush()
       if (flushed) {
@@ -603,6 +701,18 @@ export async function* promptAiSdkStream(
         'Error in AI SDK stream',
       )
 
+      // For custom-provider failures, rewrap with a friendly, actionable message
+      // before throwing so users see "is Ollama running?" not raw "fetch failed".
+      if (isCustomProvider && resolvedBaseUrl) {
+        throw new Error(
+          buildCustomProviderError({
+            baseUrl: resolvedBaseUrl,
+            model: params.model,
+            rawMessage: errorMessage,
+          }),
+        )
+      }
+
       // For all other errors, throw them -- they are fatal.
       throw chunkValue.error
     }
diff --git a/sdk/src/run.ts b/sdk/src/run.ts
@@ -85,6 +85,16 @@ export type CodebuffClientOptions = {
   maxAgentSteps?: number
   env?: Record<string, string>
 
+  /**
+   * Default custom OpenAI-compatible provider base URL for runs that don't set
+   * one per-agent. Used for local models (Ollama, LM Studio) or self-hosted
+   * endpoints. Lower precedence than an agent's own providerOptions.baseUrl;
+   * higher precedence than the CODEBUFF_BASE_URL env var.
+   */
+  providerBaseUrl?: string
+  /** Default API key paired with providerBaseUrl. Ignored if providerBaseUrl is unset. */
+  providerApiKey?: string
+
   handleEvent?: (event: PrintModeEvent) => void | Promise<void>
   handleStreamChunk?: (
     chunk:
@@ -198,6 +208,8 @@ async function runOnce({
   agentDefinitions,
   maxAgentSteps = MAX_AGENT_STEPS_DEFAULT,
   env,
+  providerBaseUrl,
+  providerApiKey,
 
   handleEvent,
   handleStreamChunk,
@@ -376,6 +388,9 @@ async function runOnce({
   const agentRuntimeImpl = getAgentRuntimeImpl({
     logger,
     apiKey,
+    clientCustomProvider: providerBaseUrl
+      ? { baseUrl: providerBaseUrl, apiKey: providerApiKey }
+      : undefined,
     handleStepsLogChunk: () => {
       // Does nothing for now
     },