Skip to content
This repository was archived by the owner on May 15, 2026. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions packages/types/src/providers/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,27 @@ export const anthropicModels = {
},
],
},
"claude-opus-4-7": {
maxTokens: 128_000, // Overridden to 8k if `enableReasoningEffort` is false.
contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag
supportsImages: true,
supportsPromptCache: true,
inputPrice: 5.0, // $5 per million input tokens (≤200K context)
outputPrice: 25.0, // $25 per million output tokens (≤200K context)
cacheWritesPrice: 6.25, // $6.25 per million tokens
cacheReadsPrice: 0.5, // $0.50 per million tokens
supportsReasoningBudget: true,
// Tiered pricing for extended context (requires beta flag)
tiers: [
{
contextWindow: 1_000_000, // 1M tokens with beta flag
inputPrice: 10.0, // $10 per million input tokens (>200K context)
outputPrice: 37.5, // $37.50 per million output tokens (>200K context)
cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context)
cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context)
},
],
},
"claude-opus-4-5-20251101": {
maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false.
contextWindow: 200_000,
Expand Down
28 changes: 28 additions & 0 deletions packages/types/src/providers/bedrock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,31 @@ export const bedrockModels = {
},
],
},
"anthropic.claude-opus-4-7": {
maxTokens: 8192,
contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
supportsImages: true,
supportsPromptCache: true,
supportsReasoningBudget: true,
supportsTemperature: false,
inputPrice: 5.0, // $5 per million input tokens (≤200K context)
outputPrice: 25.0, // $25 per million output tokens (≤200K context)
cacheWritesPrice: 6.25, // $6.25 per million tokens
cacheReadsPrice: 0.5, // $0.50 per million tokens
minTokensPerCachePoint: 1024,
maxCachePoints: 4,
cachableFields: ["system", "messages", "tools"],
// Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
tiers: [
{
contextWindow: 1_000_000, // 1M tokens with beta flag
inputPrice: 10.0, // $10 per million input tokens (>200K context)
outputPrice: 37.5, // $37.50 per million output tokens (>200K context)
cacheWritesPrice: 12.5, // $12.50 per million tokens (>200K context)
cacheReadsPrice: 1.0, // $1.00 per million tokens (>200K context)
},
],
},
"anthropic.claude-opus-4-5-20251101-v1:0": {
maxTokens: 8192,
contextWindow: 200_000,
Expand Down Expand Up @@ -525,6 +550,7 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [
"anthropic.claude-sonnet-4-5-20250929-v1:0",
"anthropic.claude-sonnet-4-6",
"anthropic.claude-opus-4-6-v1",
"anthropic.claude-opus-4-7",
] as const

// Amazon Bedrock models that support Global Inference profiles
Expand All @@ -535,13 +561,15 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [
// - Claude Haiku 4.5
// - Claude Opus 4.5
// - Claude Opus 4.6
// - Claude Opus 4.7
export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
"anthropic.claude-sonnet-4-20250514-v1:0",
"anthropic.claude-sonnet-4-5-20250929-v1:0",
"anthropic.claude-sonnet-4-6",
"anthropic.claude-haiku-4-5-20251001-v1:0",
"anthropic.claude-opus-4-5-20251101-v1:0",
"anthropic.claude-opus-4-6-v1",
"anthropic.claude-opus-4-7",
] as const

// Amazon Bedrock Service Tier types
Expand Down
2 changes: 2 additions & 0 deletions packages/types/src/providers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([
"anthropic/claude-opus-4.1",
"anthropic/claude-opus-4.5",
"anthropic/claude-opus-4.6",
"anthropic/claude-opus-4.7",
"anthropic/claude-haiku-4.5",
"google/gemini-2.5-flash-preview",
"google/gemini-2.5-flash-preview:thinking",
Expand Down Expand Up @@ -74,6 +75,7 @@ export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([
"anthropic/claude-opus-4.1",
"anthropic/claude-opus-4.5",
"anthropic/claude-opus-4.6",
"anthropic/claude-opus-4.7",
"anthropic/claude-sonnet-4",
"anthropic/claude-sonnet-4.5",
"anthropic/claude-sonnet-4.6",
Expand Down
2 changes: 2 additions & 0 deletions packages/types/src/providers/vercel-ai-gateway.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export const VERCEL_AI_GATEWAY_PROMPT_CACHING_MODELS = new Set([
"anthropic/claude-opus-4.1",
"anthropic/claude-opus-4.5",
"anthropic/claude-opus-4.6",
"anthropic/claude-opus-4.7",
"anthropic/claude-sonnet-4",
"anthropic/claude-sonnet-4.6",
"openai/gpt-4.1",
Expand Down Expand Up @@ -55,6 +56,7 @@ export const VERCEL_AI_GATEWAY_VISION_AND_TOOLS_MODELS = new Set([
"anthropic/claude-opus-4.1",
"anthropic/claude-opus-4.5",
"anthropic/claude-opus-4.6",
"anthropic/claude-opus-4.7",
"anthropic/claude-sonnet-4",
"anthropic/claude-sonnet-4.6",
"google/gemini-1.5-flash",
Expand Down
104 changes: 104 additions & 0 deletions src/api/providers/__tests__/bedrock-reasoning.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,110 @@ describe("AwsBedrockHandler - Extended Thinking", () => {
expect(reasoningChunks[1].text).toBe(" about this problem.")
})

it("should use adaptive thinking for Opus 4.7 instead of enabled with budget_tokens", async () => {
handler = new AwsBedrockHandler({
apiProvider: "bedrock",
apiModelId: "anthropic.claude-opus-4-7",
awsRegion: "us-east-1",
enableReasoningEffort: true,
modelMaxTokens: 8192,
modelMaxThinkingTokens: 4096,
})

mockSend.mockResolvedValue({
stream: (async function* () {
yield { messageStart: { role: "assistant" } }
yield {
contentBlockStart: {
content_block: { type: "thinking", thinking: "Thinking adaptively..." },
contentBlockIndex: 0,
},
}
yield { metadata: { usage: { inputTokens: 100, outputTokens: 50 } } }
})(),
})

const messages = [{ role: "user" as const, content: "Test message" }]
const stream = handler.createMessage("System prompt", messages)

const chunks = []
for await (const chunk of stream) {
chunks.push(chunk)
}

// Opus 4.7 must use thinking.type: "adaptive" with output_config.effort
expect(mockSend).toHaveBeenCalledTimes(1)
expect(capturedPayload).toBeDefined()
expect(capturedPayload.additionalModelRequestFields).toBeDefined()
expect(capturedPayload.additionalModelRequestFields.thinking).toEqual({
type: "adaptive",
})
expect(capturedPayload.additionalModelRequestFields.output_config).toEqual({
effort: "high",
})

// Must NOT have budget_tokens (causes 400 error on Opus 4.7)
expect(capturedPayload.additionalModelRequestFields.thinking).not.toHaveProperty("budget_tokens")
})

it("should exclude temperature from inferenceConfig for Opus 4.7 (supportsTemperature: false)", async () => {
handler = new AwsBedrockHandler({
apiProvider: "bedrock",
apiModelId: "anthropic.claude-opus-4-7",
awsRegion: "us-east-1",
modelTemperature: 0.7,
})

mockSend.mockResolvedValue({
stream: (async function* () {
yield { messageStart: { role: "assistant" } }
yield { metadata: { usage: { inputTokens: 100, outputTokens: 50 } } }
})(),
})

const messages = [{ role: "user" as const, content: "Test message" }]
const stream = handler.createMessage("System prompt", messages)

for await (const chunk of stream) {
// consume stream
}

expect(mockSend).toHaveBeenCalledTimes(1)
expect(capturedPayload).toBeDefined()
// Temperature must NOT be present for Opus 4.7
expect(capturedPayload.inferenceConfig).not.toHaveProperty("temperature")
// maxTokens should still be present
expect(capturedPayload.inferenceConfig).toHaveProperty("maxTokens")
})

it("should include temperature in inferenceConfig for models that support it", async () => {
handler = new AwsBedrockHandler({
apiProvider: "bedrock",
apiModelId: "anthropic.claude-sonnet-4-20250514-v1:0",
awsRegion: "us-east-1",
modelTemperature: 0.5,
})

mockSend.mockResolvedValue({
stream: (async function* () {
yield { messageStart: { role: "assistant" } }
yield { metadata: { usage: { inputTokens: 100, outputTokens: 50 } } }
})(),
})

const messages = [{ role: "user" as const, content: "Test message" }]
const stream = handler.createMessage("System prompt", messages)

for await (const chunk of stream) {
// consume stream
}

expect(mockSend).toHaveBeenCalledTimes(1)
expect(capturedPayload).toBeDefined()
// Temperature should be present for Sonnet 4
expect(capturedPayload.inferenceConfig).toHaveProperty("temperature", 0.5)
})

it("should support API key authentication", async () => {
handler = new AwsBedrockHandler({
apiProvider: "bedrock",
Expand Down
10 changes: 7 additions & 3 deletions src/api/providers/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,13 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
// Filter out non-Anthropic blocks (reasoning, thoughtSignature, etc.) before sending to the API
const sanitizedMessages = filterNonAnthropicBlocks(messages)

// Add 1M context beta flag if enabled for supported models (Claude Sonnet 4/4.5/4.6, Opus 4.6)
// Add 1M context beta flag if enabled for supported models (Claude Sonnet 4/4.5/4.6, Opus 4.6/4.7)
if (
(modelId === "claude-sonnet-4-20250514" ||
modelId === "claude-sonnet-4-5" ||
modelId === "claude-sonnet-4-6" ||
modelId === "claude-opus-4-6") &&
modelId === "claude-opus-4-6" ||
modelId === "claude-opus-4-7") &&
this.options.anthropicBeta1MContext
) {
betas.push("context-1m-2025-08-07")
Expand All @@ -82,6 +83,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
case "claude-sonnet-4-6":
case "claude-sonnet-4-5":
case "claude-sonnet-4-20250514":
case "claude-opus-4-7":
case "claude-opus-4-6":
case "claude-opus-4-5-20251101":
case "claude-opus-4-1-20250805":
Expand Down Expand Up @@ -147,6 +149,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
case "claude-sonnet-4-6":
case "claude-sonnet-4-5":
case "claude-sonnet-4-20250514":
case "claude-opus-4-7":
case "claude-opus-4-6":
case "claude-opus-4-5-20251101":
case "claude-opus-4-1-20250805":
Expand Down Expand Up @@ -316,7 +319,8 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
(id === "claude-sonnet-4-20250514" ||
id === "claude-sonnet-4-5" ||
id === "claude-sonnet-4-6" ||
id === "claude-opus-4-6") &&
id === "claude-opus-4-6" ||
id === "claude-opus-4-7") &&
this.options.anthropicBeta1MContext
) {
// Use the tier pricing for 1M context
Expand Down
52 changes: 42 additions & 10 deletions src/api/providers/bedrock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,24 @@ interface BedrockInferenceConfig {
// Define interface for Bedrock additional model request fields
// This includes thinking configuration, 1M context beta, and other model-specific parameters
interface BedrockAdditionalModelFields {
thinking?: {
type: "enabled"
budget_tokens: number
thinking?:
| {
type: "enabled"
budget_tokens: number
}
| {
type: "adaptive"
}
output_config?: {
effort?: "low" | "medium" | "high"
}
anthropic_beta?: string[]
[key: string]: any // Add index signature to be compatible with DocumentType
}

// Models that only support thinking.type: "adaptive" (not "enabled" with budget_tokens)
const BEDROCK_ADAPTIVE_THINKING_ONLY_MODEL_IDS = ["anthropic.claude-opus-4-7"] as const

// Define interface for Bedrock payload
interface BedrockPayload {
modelId: BedrockModelId | string
Expand Down Expand Up @@ -390,12 +400,28 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH

if ((isThinkingExplicitlyEnabled || isThinkingEnabledBySettings) && modelConfig.info.supportsReasoningBudget) {
thinkingEnabled = true
additionalModelRequestFields = {
thinking: {
type: "enabled",
budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096,
},

// Opus 4.7 only supports thinking.type: "adaptive" with output_config.effort
// (NOT "enabled" with budget_tokens which returns a 400 error)
const baseId = this.parseBaseModelId(modelConfig.id)
if (BEDROCK_ADAPTIVE_THINKING_ONLY_MODEL_IDS.includes(baseId as any)) {
additionalModelRequestFields = {
thinking: {
type: "adaptive",
},
output_config: {
effort: "high",
},
}
} else {
additionalModelRequestFields = {
thinking: {
type: "enabled",
budget_tokens: metadata?.thinking?.maxThinkingTokens || modelConfig.reasoningBudget || 4096,
},
}
}

logger.info("Extended thinking enabled for Bedrock request", {
ctx: "bedrock",
modelId: modelConfig.id,
Expand All @@ -405,7 +431,10 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH

const inferenceConfig: BedrockInferenceConfig = {
maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number),
temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
// Only include temperature if the model supports it (Opus 4.7 deprecated temperature)
...(modelConfig.info.supportsTemperature !== false && {
temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
}),
}

// Check if 1M context is enabled for supported Claude 4 models
Expand Down Expand Up @@ -743,7 +772,10 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH

const inferenceConfig: BedrockInferenceConfig = {
maxTokens: modelConfig.maxTokens || (modelConfig.info.maxTokens as number),
temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
// Only include temperature if the model supports it (Opus 4.7 deprecated temperature)
...(modelConfig.info.supportsTemperature !== false && {
temperature: modelConfig.temperature ?? (this.options.modelTemperature as number),
}),
}

// For completePrompt, use a unique conversation ID based on the prompt
Expand Down
5 changes: 5 additions & 0 deletions src/api/providers/fetchers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,11 @@ export const parseOpenRouterModel = ({
modelInfo.maxTokens = anthropicModels["claude-opus-4-6"].maxTokens
}

// Set claude-opus-4.7 model to use the correct configuration
if (id === "anthropic/claude-opus-4.7") {
modelInfo.maxTokens = anthropicModels["claude-opus-4-7"].maxTokens
}

// Ensure correct reasoning handling for Claude Haiku 4.5 on OpenRouter
// Use budget control and disable effort-based reasoning fallback
if (id === "anthropic/claude-haiku-4.5") {
Expand Down
3 changes: 2 additions & 1 deletion webview-ui/src/components/ui/hooks/useSelectedModel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,8 @@ function getSelectedModel({
(id === "claude-sonnet-4-20250514" ||
id === "claude-sonnet-4-5" ||
id === "claude-sonnet-4-6" ||
id === "claude-opus-4-6") &&
id === "claude-opus-4-6" ||
id === "claude-opus-4-7") &&
apiConfiguration.anthropicBeta1MContext &&
baseInfo
) {
Expand Down
Loading