Skip to content

Commit a7294eb

Browse files
vraj00222claude
andcommitted
feat(sdk): wire custom-provider precedence ladder and CodebuffClient option
In promptAiSdkStream, resolve baseUrl/apiKey across three layers (agent providerOptions > clientCustomProvider > env vars) and forward to getModelForRequest. When the custom-provider path is active: • maxRetries: 1 (one retry handles brief model-load stalls; no further fallback — would violate user intent re: privacy / cost) • Skip codebuff_metadata and OpenRouter routing keys in the request body (same as the existing ChatGPT-OAuth-direct branch) • Wrap connection failures and 404s in friendly messages pointing at the configured URL and model Plumbs CodebuffClient.{providerBaseUrl, providerApiKey} through runOnce → getAgentRuntimeImpl, which wraps promptAiSdkStream with a closure that injects clientCustomProvider on every call. Adds an integration test documenting the precedence contract. Resolves issue #678 (implementation; smoke verification follows). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent d41bd17 commit a7294eb

5 files changed

Lines changed: 164 additions & 6 deletions

File tree

common/src/types/contracts/llm.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ export type PromptAiSdkStreamFn = (
5656
includeCacheControl?: boolean
5757
cacheDebugCorrelation?: string
5858
agentProviderOptions?: OpenRouterProviderRoutingOptions
59+
/** Fallback custom-provider config injected by the SDK Client.
60+
* Lower precedence than an agent's own providerOptions.baseUrl. */
61+
clientCustomProvider?: { baseUrl?: string; apiKey?: string }
5962
/** List of agents that can be spawned - used to transform agent tool calls */
6063
spawnableAgents?: string[]
6164
/** Map of locally available agent templates - used to transform agent tool calls */

sdk/src/impl/__tests__/model-provider-custom.test.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,28 @@ describe('getModelForRequest with customProvider', () => {
6767

6868
expect(result.isCustomProvider).toBe(true)
6969
})
70+
71+
test('customProvider arg drives selection regardless of env (precedence contract)', async () => {
72+
// This documents the contract: getModelForRequest receives the *resolved*
73+
// customProvider — the caller (promptAiSdkStream) is responsible for
74+
// applying the agent > client > env precedence ladder before calling.
75+
process.env.CODEBUFF_BASE_URL = 'http://from-env:11434/v1'
76+
process.env.CODEBUFF_PROVIDER_API_KEY = 'env-key'
77+
78+
const { getModelForRequest } = await import('../model-provider')
79+
const result = await getModelForRequest({
80+
apiKey: 'cb-key',
81+
model: 'gemma2:9b',
82+
customProvider: {
83+
baseUrl: 'http://from-agent:11434/v1',
84+
apiKey: 'agent-key',
85+
},
86+
})
87+
88+
expect(result.isCustomProvider).toBe(true)
89+
expect(result.model).toBeDefined()
90+
91+
delete process.env.CODEBUFF_BASE_URL
92+
delete process.env.CODEBUFF_PROVIDER_API_KEY
93+
})
7094
})

sdk/src/impl/agent-runtime.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ export function getAgentRuntimeImpl(
2929
logger?: Logger
3030
apiKey: string
3131
clientEnv?: ClientEnv
32+
/** Default custom provider used for runs that don't set one per-agent. */
33+
clientCustomProvider?: { baseUrl?: string; apiKey?: string }
3234
} & Pick<
3335
AgentRuntimeScopedDeps,
3436
| 'handleStepsLogChunk'
@@ -44,6 +46,7 @@ export function getAgentRuntimeImpl(
4446
logger,
4547
apiKey,
4648
clientEnv = clientEnvDefault,
49+
clientCustomProvider,
4750
handleStepsLogChunk,
4851
requestToolCall,
4952
requestMcpToolData,
@@ -87,7 +90,10 @@ export function getAgentRuntimeImpl(
8790
}),
8891

8992
// LLM
90-
promptAiSdkStream,
93+
promptAiSdkStream: clientCustomProvider
94+
? (streamParams) =>
95+
promptAiSdkStream({ ...streamParams, clientCustomProvider })
96+
: promptAiSdkStream,
9197
promptAiSdk,
9298
promptAiSdkStructured,
9399

sdk/src/impl/llm.ts

Lines changed: 115 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ import {
2323
markChatGptOAuthRateLimited,
2424
} from './model-provider'
2525
import { refreshChatGptOAuthToken } from '../credentials'
26+
import {
27+
getCustomProviderApiKeyFromEnv,
28+
getCustomProviderBaseUrlFromEnv,
29+
} from '../env'
2630
import { getErrorStatusCode } from '../error-utils'
2731

2832
import type { ModelRequestParams } from './model-provider'
@@ -130,6 +134,54 @@ type OpenRouterUsageAccounting = {
130134
}
131135
}
132136

137+
/**
138+
* Wrap raw errors from a custom OpenAI-compatible endpoint in a friendly,
139+
* actionable message. Distinguishes connection failures (provider down,
140+
* wrong URL) from model-not-found errors.
141+
*/
142+
function buildCustomProviderError(args: {
143+
baseUrl: string
144+
model: string
145+
rawMessage: string
146+
}): string {
147+
const lower = args.rawMessage.toLowerCase()
148+
const isConnectionError =
149+
lower.includes('econnrefused') ||
150+
lower.includes('fetch failed') ||
151+
lower.includes('etimedout') ||
152+
lower.includes('enotfound') ||
153+
lower.includes('socket hang up')
154+
const isModelNotFound =
155+
lower.includes('model not found') ||
156+
lower.includes('does not exist') ||
157+
(lower.includes('404') && lower.includes(args.model.toLowerCase()))
158+
159+
if (isConnectionError) {
160+
return [
161+
`Cannot reach LLM provider at ${args.baseUrl}.`,
162+
``,
163+
`Check:`,
164+
` • Is the provider running? (e.g. \`ollama serve\` or LM Studio's Local Server)`,
165+
` • Is the URL correct? Currently configured: ${args.baseUrl}`,
166+
` • Is the model '${args.model}' loaded? (e.g. \`ollama list\`)`,
167+
``,
168+
`Original error: ${args.rawMessage}`,
169+
].join('\n')
170+
}
171+
if (isModelNotFound) {
172+
return [
173+
`Model '${args.model}' not found at ${args.baseUrl}.`,
174+
``,
175+
`Check:`,
176+
` • Pull the model first: \`ollama pull ${args.model}\``,
177+
` • Verify the exact tag with \`ollama list\``,
178+
``,
179+
`Original error: ${args.rawMessage}`,
180+
].join('\n')
181+
}
182+
return args.rawMessage
183+
}
184+
133185
/**
134186
* Check if an error is an OAuth rate limit error that should trigger fallback.
135187
*/
@@ -303,13 +355,34 @@ export async function* promptAiSdkStream(
303355
return promptAborted('User cancelled input')
304356
}
305357

358+
// Resolve custom-provider precedence: agent > client option > env.
359+
// apiKey is paired with whichever URL "wins" to avoid mixing sources.
360+
const agentBaseUrl = params.agentProviderOptions?.baseUrl
361+
const agentApiKey = params.agentProviderOptions?.apiKey
362+
const clientBaseUrl = params.clientCustomProvider?.baseUrl
363+
const clientApiKey = params.clientCustomProvider?.apiKey
364+
const envBaseUrl = getCustomProviderBaseUrlFromEnv()
365+
const envApiKey = getCustomProviderApiKeyFromEnv()
366+
367+
const resolvedBaseUrl = agentBaseUrl ?? clientBaseUrl ?? envBaseUrl
368+
const resolvedApiKey = agentBaseUrl
369+
? agentApiKey
370+
: clientBaseUrl
371+
? clientApiKey
372+
: envBaseUrl
373+
? envApiKey
374+
: undefined
375+
306376
const modelParams: ModelRequestParams = {
307377
apiKey: params.apiKey,
308378
model: params.model,
309379
skipChatGptOAuth: params.skipChatGptOAuth,
310380
costMode: params.costMode,
381+
...(resolvedBaseUrl
382+
? { customProvider: { baseUrl: resolvedBaseUrl, apiKey: resolvedApiKey } }
383+
: {}),
311384
}
312-
const { model: aiSDKModel, isChatGptOAuth } =
385+
const { model: aiSDKModel, isChatGptOAuth, isCustomProvider } =
313386
await getModelForRequest(modelParams)
314387

315388
if (isChatGptOAuth) {
@@ -329,9 +402,14 @@ export async function* promptAiSdkStream(
329402
prompt: undefined,
330403
model: aiSDKModel,
331404
messages: convertCbToModelMessages(params),
332-
...(isChatGptOAuth && { maxRetries: 0 }),
333-
// For ChatGPT OAuth direct, don't send codebuff metadata/provider options to OpenAI
334-
...(isChatGptOAuth
405+
// ChatGPT OAuth: no retries (we fall back to Codebuff on first failure).
406+
// Custom provider: one retry to handle brief model-load stalls without
407+
// dragging out errors when the provider is actually down.
408+
...(isChatGptOAuth ? { maxRetries: 0 } : {}),
409+
...(isCustomProvider ? { maxRetries: 1 } : {}),
410+
// Direct routes (ChatGPT OAuth, custom provider): skip codebuff_metadata
411+
// and OpenRouter routing keys — neither belongs in those request bodies.
412+
...(isChatGptOAuth || isCustomProvider
335413
? {}
336414
: {
337415
providerOptions: getProviderOptions({
@@ -458,7 +536,27 @@ export async function* promptAiSdkStream(
458536
// Track if we've yielded any content - if so, we can't safely fall back
459537
let hasYieldedContent = false
460538

461-
for await (const chunkValue of response.fullStream) {
539+
// For custom-provider streams, a connection refusal at request init throws
540+
// from the iterator before any error chunk is emitted. Rewrap into a
541+
// friendly message so users see "is Ollama running?" not raw "fetch failed".
542+
const stream = isCustomProvider && resolvedBaseUrl
543+
? (async function* () {
544+
try {
545+
yield* response.fullStream
546+
} catch (e) {
547+
const rawMessage = e instanceof Error ? e.message : String(e)
548+
throw new Error(
549+
buildCustomProviderError({
550+
baseUrl: resolvedBaseUrl,
551+
model: params.model,
552+
rawMessage,
553+
}),
554+
)
555+
}
556+
})()
557+
: response.fullStream
558+
559+
for await (const chunkValue of stream) {
462560
if (chunkValue.type !== 'text-delta') {
463561
const flushed = stopSequenceHandler.flush()
464562
if (flushed) {
@@ -603,6 +701,18 @@ export async function* promptAiSdkStream(
603701
'Error in AI SDK stream',
604702
)
605703

704+
// For custom-provider failures, rewrap with a friendly, actionable message
705+
// before throwing so users see "is Ollama running?" not raw "fetch failed".
706+
if (isCustomProvider && resolvedBaseUrl) {
707+
throw new Error(
708+
buildCustomProviderError({
709+
baseUrl: resolvedBaseUrl,
710+
model: params.model,
711+
rawMessage: errorMessage,
712+
}),
713+
)
714+
}
715+
606716
// For all other errors, throw them -- they are fatal.
607717
throw chunkValue.error
608718
}

sdk/src/run.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,16 @@ export type CodebuffClientOptions = {
8585
maxAgentSteps?: number
8686
env?: Record<string, string>
8787

88+
/**
89+
* Default custom OpenAI-compatible provider base URL for runs that don't set
90+
* one per-agent. Used for local models (Ollama, LM Studio) or self-hosted
91+
* endpoints. Lower precedence than an agent's own providerOptions.baseUrl;
92+
* higher precedence than the CODEBUFF_BASE_URL env var.
93+
*/
94+
providerBaseUrl?: string
95+
/** Default API key paired with providerBaseUrl. Ignored if providerBaseUrl is unset. */
96+
providerApiKey?: string
97+
8898
handleEvent?: (event: PrintModeEvent) => void | Promise<void>
8999
handleStreamChunk?: (
90100
chunk:
@@ -198,6 +208,8 @@ async function runOnce({
198208
agentDefinitions,
199209
maxAgentSteps = MAX_AGENT_STEPS_DEFAULT,
200210
env,
211+
providerBaseUrl,
212+
providerApiKey,
201213

202214
handleEvent,
203215
handleStreamChunk,
@@ -376,6 +388,9 @@ async function runOnce({
376388
const agentRuntimeImpl = getAgentRuntimeImpl({
377389
logger,
378390
apiKey,
391+
clientCustomProvider: providerBaseUrl
392+
? { baseUrl: providerBaseUrl, apiKey: providerApiKey }
393+
: undefined,
379394
handleStepsLogChunk: () => {
380395
// Does nothing for now
381396
},

0 commit comments

Comments
 (0)