diff --git a/core/control-plane/schema.ts b/core/control-plane/schema.ts index 02901bda2fb..76c951788fd 100644 --- a/core/control-plane/schema.ts +++ b/core/control-plane/schema.ts @@ -20,6 +20,7 @@ const modelDescriptionSchema = z.object({ "nebius", "siliconflow", "tensorix", + "orcarouter", "scaleway", "watsonx", ]), diff --git a/core/llm/autodetect.ts b/core/llm/autodetect.ts index 736caebcc6e..2392b18fcf4 100644 --- a/core/llm/autodetect.ts +++ b/core/llm/autodetect.ts @@ -65,6 +65,7 @@ const PROVIDER_HANDLES_TEMPLATING: string[] = [ "nebius", "relace", "openrouter", + "orcarouter", "clawrouter", "deepseek", "xAI", @@ -124,6 +125,7 @@ const PROVIDER_SUPPORTS_IMAGES: string[] = [ "sagemaker", "continue-proxy", "openrouter", + "orcarouter", "clawrouter", "venice", "sambanova", diff --git a/core/llm/llms/OrcaRouter.ts b/core/llm/llms/OrcaRouter.ts new file mode 100644 index 00000000000..53ea97e8171 --- /dev/null +++ b/core/llm/llms/OrcaRouter.ts @@ -0,0 +1,138 @@ +import { ChatCompletionCreateParams } from "openai/resources/index"; + +import { ORCAROUTER_HEADERS } from "@continuedev/openai-adapters"; + +import { LLMOptions } from "../../index.js"; +import { osModelsEditPrompt } from "../templates/edit.js"; + +import OpenAI from "./OpenAI.js"; + +class OrcaRouter extends OpenAI { + static providerName = "orcarouter"; + protected supportsReasoningField = true; + protected supportsReasoningDetailsField = true; + static defaultOptions: Partial = { + apiBase: "https://api.orcarouter.ai/v1/", + model: "orcarouter/auto", + promptTemplates: { + edit: osModelsEditPrompt, + }, + useLegacyCompletionsEndpoint: false, + }; + + constructor(options: LLMOptions) { + super({ + ...options, + requestOptions: { + ...options.requestOptions, + headers: { + ...ORCAROUTER_HEADERS, + ...options.requestOptions?.headers, + }, + }, + }); + } + + private isAnthropicModel(model?: string): boolean { + if (!model) return false; + return model.toLowerCase().includes("claude"); + } + + private addCacheControlToContent(content: any, addCaching: boolean): any { + if (!addCaching) return content; + + if (typeof content === "string") { + return [ + { + type: "text", + text: content, + cache_control: { type: "ephemeral" }, + }, + ]; + } + + if (Array.isArray(content)) { + return content.map((part, idx) => { + if (part.type === "text" && idx === content.length - 1) { + return { + ...part, + cache_control: { type: "ephemeral" }, + }; + } + return part; + }); + } + + return content; + } + + protected modifyChatBody( + body: ChatCompletionCreateParams, + ): ChatCompletionCreateParams { + body = super.modifyChatBody(body); + + if ( + !this.isAnthropicModel(body.model) || + (!this.cacheBehavior && !this.completionOptions.promptCaching) + ) { + return body; + } + + const shouldCacheConversation = + this.cacheBehavior?.cacheConversation || + this.completionOptions.promptCaching; + const shouldCacheSystemMessage = + this.cacheBehavior?.cacheSystemMessage || + this.completionOptions.promptCaching; + + if (!shouldCacheConversation && !shouldCacheSystemMessage) { + return body; + } + + const filteredMessages = body.messages.filter( + (m: any) => m.role !== "system" && !!m.content, + ); + + const lastTwoUserMsgIndices = filteredMessages + .map((msg: any, index: number) => (msg.role === "user" ? index : -1)) + .filter((index: number) => index !== -1) + .slice(-2); + + let filteredIndex = 0; + const filteredToOriginalIndexMap: number[] = []; + body.messages.forEach((msg: any, originalIndex: number) => { + if (msg.role !== "system" && !!msg.content) { + filteredToOriginalIndexMap[filteredIndex] = originalIndex; + filteredIndex++; + } + }); + + body.messages = body.messages.map((message: any, idx) => { + if (message.role === "system" && shouldCacheSystemMessage) { + return { + ...message, + content: this.addCacheControlToContent(message.content, true), + }; + } + + const filteredIdx = filteredToOriginalIndexMap.indexOf(idx); + if ( + message.role === "user" && + shouldCacheConversation && + filteredIdx !== -1 && + lastTwoUserMsgIndices.includes(filteredIdx) + ) { + return { + ...message, + content: this.addCacheControlToContent(message.content, true), + }; + } + + return message; + }); + + return body; + } +} + +export default OrcaRouter; diff --git a/core/llm/llms/OrcaRouter.vitest.ts b/core/llm/llms/OrcaRouter.vitest.ts new file mode 100644 index 00000000000..8470cb8c735 --- /dev/null +++ b/core/llm/llms/OrcaRouter.vitest.ts @@ -0,0 +1,174 @@ +import { ChatCompletionCreateParams } from "openai/resources/index"; +import { describe, expect, it } from "vitest"; + +import OrcaRouter from "./OrcaRouter"; + +describe("OrcaRouter", () => { + it("uses the correct providerName and default apiBase", () => { + expect(OrcaRouter.providerName).toBe("orcarouter"); + expect(OrcaRouter.defaultOptions?.apiBase).toBe( + "https://api.orcarouter.ai/v1/", + ); + expect(OrcaRouter.defaultOptions?.model).toBe("orcarouter/auto"); + }); + + it("injects OrcaRouter attribution headers", () => { + const orcaRouter = new OrcaRouter({ + model: "orcarouter/auto", + apiKey: "sk-orca-test", + }); + + const headers = (orcaRouter as any).requestOptions?.headers ?? {}; + expect(headers["HTTP-Referer"]).toBe("https://www.continue.dev/"); + expect(headers["X-Title"]).toBe("Continue"); + expect(headers["X-Continue-Provider"]).toBe("orcarouter"); + }); + + it("allows user-provided headers to override defaults", () => { + const orcaRouter = new OrcaRouter({ + model: "orcarouter/auto", + apiKey: "sk-orca-test", + requestOptions: { + headers: { "X-Title": "MyApp" }, + }, + }); + + const headers = (orcaRouter as any).requestOptions?.headers ?? {}; + expect(headers["X-Title"]).toBe("MyApp"); + }); +}); + +describe("OrcaRouter Anthropic Caching", () => { + it("does not throw for Anthropic models without cacheBehavior", () => { + const orcaRouter = new OrcaRouter({ + model: "anthropic/claude-opus-4.7", + apiKey: "sk-orca-test", + }); + + const body: ChatCompletionCreateParams = { + model: "anthropic/claude-opus-4.7", + messages: [], + }; + + expect(() => orcaRouter["modifyChatBody"](body)).not.toThrow(); + }); + + it("adds cache_control to last two user messages when caching is enabled", () => { + const orcaRouter = new OrcaRouter({ + model: "anthropic/claude-opus-4.7", + apiKey: "sk-orca-test", + cacheBehavior: { + cacheConversation: true, + cacheSystemMessage: false, + }, + }); + + const body: ChatCompletionCreateParams = { + model: "anthropic/claude-opus-4.7", + messages: [ + { role: "user", content: "First message" }, + { role: "assistant", content: "Response" }, + { role: "user", content: "Second message" }, + { role: "assistant", content: "Another response" }, + { role: "user", content: "Third message" }, + ], + }; + + const modifiedBody = orcaRouter["modifyChatBody"](body); + const userMessages = modifiedBody.messages.filter( + (msg: any) => msg.role === "user", + ); + + expect(userMessages[0].content).toBe("First message"); + expect(userMessages[1].content).toEqual([ + { + type: "text", + text: "Second message", + cache_control: { type: "ephemeral" }, + }, + ]); + expect(userMessages[2].content).toEqual([ + { + type: "text", + text: "Third message", + cache_control: { type: "ephemeral" }, + }, + ]); + }); + + it("adds cache_control to system message when caching is enabled", () => { + const orcaRouter = new OrcaRouter({ + model: "anthropic/claude-opus-4.7", + apiKey: "sk-orca-test", + cacheBehavior: { + cacheConversation: false, + cacheSystemMessage: true, + }, + }); + + const body: ChatCompletionCreateParams = { + model: "anthropic/claude-opus-4.7", + messages: [ + { role: "system", content: "You are a helpful assistant" }, + { role: "user", content: "Hello" }, + ], + }; + + const modifiedBody = orcaRouter["modifyChatBody"](body); + + expect(modifiedBody.messages[0]).toEqual({ + role: "system", + content: [ + { + type: "text", + text: "You are a helpful assistant", + cache_control: { type: "ephemeral" }, + }, + ], + }); + expect(modifiedBody.messages[1]).toEqual({ + role: "user", + content: "Hello", + }); + }); + + it("does not modify messages for non-Anthropic models", () => { + const orcaRouter = new OrcaRouter({ + model: "openai/gpt-5.5", + apiKey: "sk-orca-test", + cacheBehavior: { + cacheConversation: true, + cacheSystemMessage: true, + }, + }); + + const body: ChatCompletionCreateParams = { + model: "openai/gpt-5.5", + messages: [ + { role: "system", content: "System message" }, + { role: "user", content: "User message" }, + ], + }; + + const modifiedBody = orcaRouter["modifyChatBody"](body); + expect(modifiedBody.messages).toEqual(body.messages); + }); + + it("does not modify messages when no caching is enabled", () => { + const orcaRouter = new OrcaRouter({ + model: "anthropic/claude-opus-4.7", + apiKey: "sk-orca-test", + }); + + const body: ChatCompletionCreateParams = { + model: "anthropic/claude-opus-4.7", + messages: [ + { role: "system", content: "System message" }, + { role: "user", content: "User message" }, + ], + }; + + const modifiedBody = orcaRouter["modifyChatBody"](body); + expect(modifiedBody.messages).toEqual(body.messages); + }); +}); diff --git a/core/llm/llms/index.ts b/core/llm/llms/index.ts index 453b2d90cd8..11cb4194534 100644 --- a/core/llm/llms/index.ts +++ b/core/llm/llms/index.ts @@ -50,6 +50,7 @@ import Nvidia from "./Nvidia"; import Ollama from "./Ollama"; import OpenAI from "./OpenAI"; import OpenRouter from "./OpenRouter"; +import OrcaRouter from "./OrcaRouter"; import ClawRouter from "./ClawRouter"; import OVHcloud from "./OVHcloud"; import { Relace } from "./Relace"; @@ -112,6 +113,7 @@ export const LLMClasses = [ Azure, WatsonX, OpenRouter, + OrcaRouter, ClawRouter, Nvidia, Vllm, diff --git a/core/llm/toolSupport.ts b/core/llm/toolSupport.ts index 88e92eeb7d2..08807cc2fed 100644 --- a/core/llm/toolSupport.ts +++ b/core/llm/toolSupport.ts @@ -399,6 +399,57 @@ export const PROVIDER_TOOL_SUPPORT: Record boolean> = return false; }, + orcarouter: (model) => { + // OrcaRouter routes to various upstream providers via prefixed model names + // like openai/gpt-5, anthropic/claude-opus-4.7, deepseek/deepseek-v4-pro + const lower = model.toLowerCase(); + + // orcarouter/auto and other named routers - assume tool support + // (router pool should be configured to only include tool-capable upstreams + // when used with agent mode; see docs caveat) + if (lower.startsWith("orcarouter/")) { + return true; + } + + // Explicit skip: image-generation models that occasionally appear in chat + // routing pools (e.g. google/gemini-2.5-flash-image) โ€” they reject tool calls + if ( + lower.includes("-image") || + lower.includes("imagen") || + lower.includes("dall-e") + ) { + return false; + } + + // Tool-supporting model name patterns across upstream vendors + const toolSupportingPatterns = [ + "claude", + "sonnet", + "opus", + "haiku", + "gemini", + "command-r", + "mistral", + "mixtral", + "llama-3.1", + "llama-3.2", + "llama-3.3", + "llama-4", + "qwen3", + "qwen-2.5", + "deepseek", + "kimi", + "glm-4", + "minimax", + ]; + + return ( + toolSupportingPatterns.some((pattern) => lower.includes(pattern)) || + !!lower.match(/gpt-[4-9]/) || + !!lower.match(/\bo[1-9]\b/) || + !!lower.match(/grok-[3-9]/) + ); + }, clawrouter: (model) => { // ClawRouter routes to various providers, so we check common tool-supporting patterns const lower = model.toLowerCase(); diff --git a/docs/customize/model-providers/more/orcarouter.mdx b/docs/customize/model-providers/more/orcarouter.mdx new file mode 100644 index 00000000000..37adbf66e0b --- /dev/null +++ b/docs/customize/model-providers/more/orcarouter.mdx @@ -0,0 +1,195 @@ +--- +title: "How to Configure OrcaRouter with Continue" +sidebarTitle: "OrcaRouter" +--- + + + [OrcaRouter](https://www.orcarouter.ai) is an OpenAI-compatible API gateway that aggregates ~120 chat models from OpenAI, Anthropic, Google, DeepSeek, xAI, Qwen, Kimi, MiniMax, Z-AI, and other vendors behind a single `sk-orca-` key. + + + + Sign up at [orcarouter.ai](https://www.orcarouter.ai) and obtain an API key from your [console](https://www.orcarouter.ai/console). + + +## Quickstart + + + + ```yaml title="config.yaml" + name: My Config + version: 0.0.1 + schema: v1 + + models: + - name: OrcaRouter Auto + provider: orcarouter + model: orcarouter/auto + apiBase: https://api.orcarouter.ai/v1/ + apiKey: sk-orca-xxxxxxxxxxxxxxxxxx + ``` + + + ```json title="config.json" + { + "models": [ + { + "title": "OrcaRouter Auto", + "provider": "orcarouter", + "model": "orcarouter/auto", + "apiBase": "https://api.orcarouter.ai/v1/", + "apiKey": "sk-orca-xxxxxxxxxxxxxxxxxx" + } + ] + } + ``` + + + +`orcarouter/auto` is a virtual model that adaptively routes each request to a candidate upstream based on a configurable strategy (cheapest / balanced / quality / contextual bandit / difficulty-gated). Routing pools and reward weights are tunable from the [console](https://www.orcarouter.ai/console/routing) without changing client code. + +## Pinning a specific model + +You can pin any model from the [OrcaRouter catalog](https://www.orcarouter.ai/models) by passing its full ID: + + + + ```yaml title="config.yaml" + models: + - name: Claude Opus 4.7 + provider: orcarouter + model: anthropic/claude-opus-4.7 + apiBase: https://api.orcarouter.ai/v1/ + apiKey: ${{ secrets.ORCAROUTER_API_KEY }} + + - name: GPT-5.5 + provider: orcarouter + model: openai/gpt-5.5 + apiBase: https://api.orcarouter.ai/v1/ + apiKey: ${{ secrets.ORCAROUTER_API_KEY }} + + - name: DeepSeek V4 Pro + provider: orcarouter + model: deepseek/deepseek-v4-pro + apiBase: https://api.orcarouter.ai/v1/ + apiKey: ${{ secrets.ORCAROUTER_API_KEY }} + ``` + + + +## Reasoning controls + +OrcaRouter passes vendor-native reasoning controls through to the upstream: + +### OpenAI / Grok / Gemini / Qwen reasoning families + +Use the flat `reasoning_effort` field via `requestOptions.extraBodyProperties`: + +```yaml title="config.yaml" +models: + - name: GPT-5.5 (High Reasoning) + provider: orcarouter + model: openai/gpt-5.5 + apiBase: https://api.orcarouter.ai/v1/ + apiKey: ${{ secrets.ORCAROUTER_API_KEY }} + requestOptions: + extraBodyProperties: + reasoning_effort: high # "minimal" | "low" | "medium" | "high" +``` + +### Gemini reasoning caveat + +Some Gemini models (including `gemini-3-flash-preview`) are reasoning models that spend most of their `completion_tokens` budget on internal reasoning before producing the final reply. The streaming response stays silent for several seconds during that period, which can look like the chat is blank or stuck. + +For fast responses, pass `reasoning_effort: "minimal"`: + +```yaml title="config.yaml" +models: + - name: Gemini 3 Flash (Fast) + provider: orcarouter + model: google/gemini-3-flash-preview + apiBase: https://api.orcarouter.ai/v1/ + apiKey: ${{ secrets.ORCAROUTER_API_KEY }} + requestOptions: + extraBodyProperties: + reasoning_effort: minimal +``` + +### Anthropic Claude (thinking block) + +Anthropic reasoning models use a native `thinking` block: + +```yaml title="config.yaml" +models: + - name: Claude Opus 4.7 (Thinking) + provider: orcarouter + model: anthropic/claude-opus-4.7 + apiBase: https://api.orcarouter.ai/v1/ + apiKey: ${{ secrets.ORCAROUTER_API_KEY }} + requestOptions: + extraBodyProperties: + thinking: + type: enabled + budget_tokens: 2000 # >= 1024, must be < max_tokens +``` + + + Anthropic reasoning models (e.g. `claude-opus-4.7`) reject `temperature` and `top_k`. Omit them from `completionOptions` when targeting these models. + + +## Fallback chain + +When the primary upstream fails, OrcaRouter can fall back to a configured list using the `extra_body` field: + +```yaml title="config.yaml" +models: + - name: OrcaRouter Auto with Fallback + provider: orcarouter + model: orcarouter/auto + apiBase: https://api.orcarouter.ai/v1/ + apiKey: ${{ secrets.ORCAROUTER_API_KEY }} + requestOptions: + extraBodyProperties: + extra_body: + models: [deepseek/deepseek-v4-pro] + route: fallback +``` + +## Agent / tool-calling caveat + +The default `orcarouter/auto` pool may include models that do not support function calling. If you use Continue's Agent mode (which sends a `tools` field), pin a specific tool-capable model like `anthropic/claude-opus-4.7` or `openai/gpt-5.5`, or adjust the AUTO routing pool from the [console](https://www.orcarouter.ai/console/routing) to only include tool-capable upstreams. + +```yaml title="config.yaml" +models: + - name: Claude Opus 4.7 (for Agent) + provider: orcarouter + model: anthropic/claude-opus-4.7 + apiBase: https://api.orcarouter.ai/v1/ + apiKey: ${{ secrets.ORCAROUTER_API_KEY }} + capabilities: + - tool_use + roles: + - chat + - edit +``` + +## Prompt caching + +For Anthropic Claude models, Continue automatically injects `cache_control: { type: "ephemeral" }` on the system message and the last two user turns when `cacheBehavior` or `promptCaching` is enabled: + +```yaml title="config.yaml" +models: + - name: Claude Opus 4.7 (Cached) + provider: orcarouter + model: anthropic/claude-opus-4.7 + apiBase: https://api.orcarouter.ai/v1/ + apiKey: ${{ secrets.ORCAROUTER_API_KEY }} + cacheBehavior: + cacheSystemMessage: true + cacheConversation: true +``` + +## See also + +- [Full model catalog](https://www.orcarouter.ai/models) +- [Routing configuration console](https://www.orcarouter.ai/console/routing) +- [API documentation](https://docs.orcarouter.ai) diff --git a/docs/customize/model-providers/overview.mdx b/docs/customize/model-providers/overview.mdx index 7ba030dcb4d..a82dc81247d 100644 --- a/docs/customize/model-providers/overview.mdx +++ b/docs/customize/model-providers/overview.mdx @@ -34,6 +34,7 @@ Beyond the top-level providers, Continue supports many other options: | [Together AI](/customize/model-providers/more/together) | Platform for running a variety of open models | | [DeepInfra](/customize/model-providers/more/deepinfra) | Hosting for various open source models | | [OpenRouter](/customize/model-providers/top-level/openrouter) | Gateway to multiple model providers | +| [OrcaRouter](/customize/model-providers/more/orcarouter) | OpenAI-compatible gateway aggregating ~120 chat models with adaptive routing | | [ClawRouter](/customize/model-providers/more/clawrouter) | Open-source LLM router with automatic cost-optimized model selection | | [Tetrate Agent Router Service](/customize/model-providers/top-level/tetrate_agent_router_service) | Gateway with intelligent routing across multiple model providers | | [Cohere](/customize/model-providers/more/cohere) | Models specialized for semantic search and text generation | diff --git a/docs/docs.json b/docs/docs.json index 8556401ffc6..03a66f08fa6 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -180,6 +180,7 @@ "customize/model-providers/more/moonshot", "customize/model-providers/more/nous", "customize/model-providers/more/nvidia", + "customize/model-providers/more/orcarouter", "customize/model-providers/more/tensorix", "customize/model-providers/more/together", "customize/model-providers/more/xAI", diff --git a/extensions/vscode/config_schema.json b/extensions/vscode/config_schema.json index 6eb11282100..94016f716a2 100644 --- a/extensions/vscode/config_schema.json +++ b/extensions/vscode/config_schema.json @@ -216,6 +216,7 @@ "msty", "watsonx", "openrouter", + "orcarouter", "clawrouter", "sambanova", "nvidia", @@ -269,6 +270,7 @@ "### Msty\nMsty is the simplest way to get started with online or local LLMs on all desktop platforms - Windows, Mac, and Linux. No fussing around, one-click and you are up and running. To get started, follow these steps:\n1. Download from [Msty.app](https://msty.app/), open the application, and click 'Setup Local AI'.\n2. Go to the Local AI Module page and download a model of your choice.\n3. Once the model has finished downloading, you can start asking questions through Continue.\n> [Reference](https://continue.dev/docs/reference/Model%20Providers/Msty)", "### IBM watsonx\nwatsonx, developed by IBM, offers a variety of pre-trained AI foundation models that can be used for natural language processing (NLP), computer vision, and speech recognition tasks.", "### OpenRouter\nOpenRouter offers a single API to access almost any language model. To get started, obtain an API key from [their console](https://openrouter.ai/settings/keys).", + "### OrcaRouter\nOrcaRouter is an OpenAI-compatible API gateway that aggregates ~120 chat models from OpenAI, Anthropic, Google, DeepSeek, xAI, Qwen, Kimi, MiniMax, Z-AI, and others behind a single `sk-orca-` key. It also exposes an `orcarouter/auto` virtual model with configurable adaptive routing (cheapest / balanced / quality / contextual bandit / difficulty-gated).\nTo get started, sign up at [orcarouter.ai](https://www.orcarouter.ai) and obtain an API key from your [console](https://www.orcarouter.ai/console).\n> [Reference](https://docs.orcarouter.ai)", "### ClawRouter\nClawRouter is an open-source LLM router that automatically selects the cheapest capable model for each request based on prompt complexity, providing 78-96% cost savings. To get started, run `npx clawrouter` to start the router at localhost:1337. A wallet is auto-generated on first run - fund it with USDC (Solana/Base) to access premium models, or use `blockrun/free` tier without payment.\n> [Reference](https://github.com/BlockRunAI/ClawRouter)", "### SambaNova\n SambaNova provides fast inference of open-source language models with zero data retention. To get started, obtain an API key in [SambaNova Cloud](https://cloud.sambanova.ai/apis?utm_source=continue&utm_medium=external&utm_campaign=cloud_signup ).", "### NVIDIA NIMs\nNVIDIA offers a single API to access almost any language model. To find out more, visit the [LLM APIs Documentation](https://docs.api.nvidia.com/nim/reference/llm-apis).\nFor information specific to getting a key, please check out the [docs here](https://docs.nvidia.com/nim/large-language-models/latest/getting-started.html#option-1-from-api-catalog)", diff --git a/gui/public/logos/orcarouter.png b/gui/public/logos/orcarouter.png new file mode 100644 index 00000000000..d49325a75ee Binary files /dev/null and b/gui/public/logos/orcarouter.png differ diff --git a/gui/src/pages/AddNewModel/configs/models.ts b/gui/src/pages/AddNewModel/configs/models.ts index 1de77b13af1..85844a8d3c1 100644 --- a/gui/src/pages/AddNewModel/configs/models.ts +++ b/gui/src/pages/AddNewModel/configs/models.ts @@ -2802,6 +2802,107 @@ export const models: { [key: string]: ModelPackage } = { isOpenSource: true, }, + // OrcaRouter Models + orcarouterAuto: { + title: "OrcaRouter Auto", + description: + "Adaptive routing across upstream models with configurable strategy (cheapest / balanced / quality / contextual bandit / difficulty-gated). Routing pools and weights are tunable from the OrcaRouter console.", + params: { + title: "OrcaRouter Auto", + model: "orcarouter/auto", + contextLength: 128_000, + }, + icon: "orcarouter.png", + providerOptions: ["orcarouter"], + isOpenSource: false, + }, + orcarouterGpt55: { + title: "OpenAI: GPT-5.5", + description: "OpenAI GPT-5.5 routed through OrcaRouter.", + params: { + title: "OpenAI: GPT-5.5", + model: "openai/gpt-5.5", + contextLength: 400_000, + }, + icon: "orcarouter.png", + providerOptions: ["orcarouter"], + isOpenSource: false, + }, + orcarouterClaudeOpus47: { + title: "Anthropic: Claude Opus 4.7", + description: + "Anthropic Claude Opus 4.7 routed through OrcaRouter. Reasoning model โ€” set `requestOptions.extraBodyProperties.thinking` to control thinking budget.", + params: { + title: "Anthropic: Claude Opus 4.7", + model: "anthropic/claude-opus-4.7", + contextLength: 200_000, + }, + icon: "orcarouter.png", + providerOptions: ["orcarouter"], + isOpenSource: false, + }, + orcarouterGemini3Flash: { + title: "Google: Gemini 3 Flash Preview", + description: + "Google Gemini 3 Flash Preview routed through OrcaRouter. Reasoning model โ€” chat may appear blank for several seconds while the model reasons. Set `reasoning_effort: 'minimal'` in `requestOptions.extraBodyProperties` for fast responses.", + params: { + title: "Google: Gemini 3 Flash", + model: "google/gemini-3-flash-preview", + contextLength: 1_000_000, + }, + icon: "orcarouter.png", + providerOptions: ["orcarouter"], + isOpenSource: false, + }, + orcarouterDeepseekV4Pro: { + title: "DeepSeek: DeepSeek V4 Pro", + description: "DeepSeek V4 Pro routed through OrcaRouter.", + params: { + title: "DeepSeek: DeepSeek V4 Pro", + model: "deepseek/deepseek-v4-pro", + contextLength: 128_000, + }, + icon: "orcarouter.png", + providerOptions: ["orcarouter"], + isOpenSource: true, + }, + orcarouterGrok43: { + title: "xAI: Grok 4.3", + description: "xAI Grok 4.3 routed through OrcaRouter.", + params: { + title: "xAI: Grok 4.3", + model: "grok/grok-4.3", + contextLength: 256_000, + }, + icon: "orcarouter.png", + providerOptions: ["orcarouter"], + isOpenSource: false, + }, + orcarouterQwen36Flash: { + title: "Alibaba: Qwen 3.6 Flash", + description: "Alibaba Qwen 3.6 Flash routed through OrcaRouter.", + params: { + title: "Alibaba: Qwen 3.6 Flash", + model: "qwen/qwen3.6-flash", + contextLength: 128_000, + }, + icon: "orcarouter.png", + providerOptions: ["orcarouter"], + isOpenSource: true, + }, + orcarouterMinimaxM27: { + title: "MiniMax: MiniMax M2.7", + description: "MiniMax M2.7 routed through OrcaRouter.", + params: { + title: "MiniMax: MiniMax M2.7", + model: "minimax/minimax-m2.7", + contextLength: 200_000, + }, + icon: "orcarouter.png", + providerOptions: ["orcarouter"], + isOpenSource: false, + }, + AUTODETECT: { title: "Autodetect", description: diff --git a/gui/src/pages/AddNewModel/configs/providers.ts b/gui/src/pages/AddNewModel/configs/providers.ts index 82f1e15c7a0..31dc9112b42 100644 --- a/gui/src/pages/AddNewModel/configs/providers.ts +++ b/gui/src/pages/AddNewModel/configs/providers.ts @@ -1315,6 +1315,57 @@ To get started, [register](https://dataplatform.cloud.ibm.com/registration/stepo ], apiKeyUrl: "https://api.router.tetrate.ai/", }, + orcarouter: { + title: "OrcaRouter", + provider: "orcarouter", + refPage: "orcarouter", + description: + "OpenAI-compatible API gateway aggregating ~120 chat models from OpenAI, Anthropic, Google, DeepSeek, xAI, Qwen, Kimi, MiniMax, Z-AI and others with adaptive routing.", + longDescription: `[OrcaRouter](https://www.orcarouter.ai) is an OpenAI-compatible API gateway that aggregates ~120 chat models behind a single \`sk-orca-\` key. It also exposes \`orcarouter/auto\`, a virtual model that adaptively routes each request using a configurable strategy (cheapest, balanced, quality, contextual bandit, or difficulty-gated). Routing pools and reward weights are tunable from the [console](https://www.orcarouter.ai/console/routing) without changing client code. + +To get started: +1. Sign up at [orcarouter.ai](https://www.orcarouter.ai) +2. Create an API key in your [console](https://www.orcarouter.ai/console) +3. Paste it below +4. Select a model preset, or use \`orcarouter/auto\` for adaptive routing + +For agent / tool-calling workloads, pin a specific tool-capable model (e.g. \`anthropic/claude-opus-4.7\`, \`openai/gpt-5.5\`) instead of \`orcarouter/auto\`, since the AUTO routing pool may include models that don't support function calling. + +> [Reference](https://docs.orcarouter.ai) ยท [Models](https://www.orcarouter.ai/models)`, + icon: "orcarouter.png", + tags: [ModelProviderTags.RequiresApiKey], + params: { + apiBase: "https://api.orcarouter.ai/v1/", + }, + collectInputFor: [ + { + inputType: "text", + key: "apiKey", + label: "API Key", + placeholder: "Enter your OrcaRouter API key (sk-orca-...)", + required: true, + }, + ...completionParamsInputsConfigs, + ], + packages: [ + models.orcarouterAuto, + models.orcarouterGpt55, + models.orcarouterClaudeOpus47, + models.orcarouterGemini3Flash, + models.orcarouterDeepseekV4Pro, + models.orcarouterGrok43, + models.orcarouterQwen36Flash, + models.orcarouterMinimaxM27, + { + ...models.AUTODETECT, + params: { + ...models.AUTODETECT.params, + title: "OrcaRouter", + }, + }, + ], + apiKeyUrl: "https://www.orcarouter.ai/console", + }, clawrouter: { title: "ClawRouter", provider: "clawrouter", diff --git a/packages/openai-adapters/src/apis/AiSdk.ts b/packages/openai-adapters/src/apis/AiSdk.ts index c0f68689988..34c16b823d5 100644 --- a/packages/openai-adapters/src/apis/AiSdk.ts +++ b/packages/openai-adapters/src/apis/AiSdk.ts @@ -46,6 +46,11 @@ const PROVIDER_MAP: Record = { ...options, baseURL: options.baseURL ?? "http://localhost:1337/v1/", }), + orcarouter: (options) => + createOpenAI({ + ...options, + baseURL: options.baseURL ?? "https://api.orcarouter.ai/v1/", + }), }; export class AiSdkApi implements BaseLlmApi { diff --git a/packages/openai-adapters/src/apis/OrcaRouter.test.ts b/packages/openai-adapters/src/apis/OrcaRouter.test.ts new file mode 100644 index 00000000000..fd9e1ec3516 --- /dev/null +++ b/packages/openai-adapters/src/apis/OrcaRouter.test.ts @@ -0,0 +1,46 @@ +import { describe, expect, it } from "vitest"; + +import { OrcaRouterApi } from "./OrcaRouter.js"; + +describe("OrcaRouterApi", () => { + const baseConfig = { + provider: "orcarouter" as const, + }; + + it("should use default apiBase when not provided", () => { + const api = new OrcaRouterApi(baseConfig); + expect(api["config"].apiBase).toBe("https://api.orcarouter.ai/v1/"); + }); + + it("should allow custom apiBase", () => { + const api = new OrcaRouterApi({ + ...baseConfig, + apiBase: "https://api.custom-orca.example.com/v1/", + }); + expect(api["config"].apiBase).toBe( + "https://api.custom-orca.example.com/v1/", + ); + }); + + it("should include Continue attribution headers", () => { + const api = new OrcaRouterApi(baseConfig); + const headers = api["getHeaders"](); + + expect(headers["HTTP-Referer"]).toBe("https://www.continue.dev/"); + expect(headers["X-Title"]).toBe("Continue"); + expect(headers["User-Agent"]).toBe("Continue/IDE"); + expect(headers["X-Continue-Provider"]).toBe("orcarouter"); + }); + + it("should include standard OpenAI headers", () => { + const api = new OrcaRouterApi({ + ...baseConfig, + apiKey: "sk-orca-test", + }); + const headers = api["getHeaders"](); + + expect(headers["Content-Type"]).toBe("application/json"); + expect(headers["Accept"]).toBe("application/json"); + expect(headers["Authorization"]).toBe("Bearer sk-orca-test"); + }); +}); diff --git a/packages/openai-adapters/src/apis/OrcaRouter.ts b/packages/openai-adapters/src/apis/OrcaRouter.ts new file mode 100644 index 00000000000..e0339fa351a --- /dev/null +++ b/packages/openai-adapters/src/apis/OrcaRouter.ts @@ -0,0 +1,61 @@ +import { ChatCompletionCreateParams } from "openai/resources/index"; + +import { OpenAIConfig } from "../types.js"; +import { OpenAIApi } from "./OpenAI.js"; +import { applyAnthropicCachingToOpenRouterBody } from "./OpenRouterCaching.js"; + +export interface OrcaRouterConfig extends OpenAIConfig { + cachingStrategy?: import("./AnthropicCachingStrategies.js").CachingStrategyName; +} + +export const ORCAROUTER_HEADERS: Record = { + "HTTP-Referer": "https://www.continue.dev/", + "X-Title": "Continue", + "User-Agent": "Continue/IDE", + "X-Continue-Provider": "orcarouter", +}; + +export class OrcaRouterApi extends OpenAIApi { + constructor(config: OrcaRouterConfig) { + super({ + ...config, + apiBase: config.apiBase ?? "https://api.orcarouter.ai/v1/", + }); + } + + /** + * Override headers to include OrcaRouter attribution headers so the + * upstream router can identify Continue traffic and apply per-client + * analytics / routing decisions. + */ + protected override getHeaders(): Record { + return { + ...super.getHeaders(), + ...ORCAROUTER_HEADERS, + }; + } + + private isAnthropicModel(model?: string): boolean { + if (!model) { + return false; + } + return model.toLowerCase().includes("claude"); + } + + override modifyChatBody(body: T): T { + const modifiedBody = super.modifyChatBody(body); + + if (!this.isAnthropicModel(modifiedBody.model)) { + return modifiedBody; + } + + applyAnthropicCachingToOpenRouterBody( + modifiedBody as unknown as ChatCompletionCreateParams, + (this.config as OrcaRouterConfig).cachingStrategy ?? "systemAndTools", + ); + + return modifiedBody; + } +} + +export default OrcaRouterApi; diff --git a/packages/openai-adapters/src/index.ts b/packages/openai-adapters/src/index.ts index c9eb4da00fa..5e10c3da8a9 100644 --- a/packages/openai-adapters/src/index.ts +++ b/packages/openai-adapters/src/index.ts @@ -18,6 +18,7 @@ import { MockApi } from "./apis/Mock.js"; import { MoonshotApi } from "./apis/Moonshot.js"; import { OpenAIApi } from "./apis/OpenAI.js"; import { OpenRouterApi } from "./apis/OpenRouter.js"; +import { OrcaRouterApi } from "./apis/OrcaRouter.js"; import { ClawRouterApi } from "./apis/ClawRouter.js"; import { RelaceApi } from "./apis/Relace.js"; import { VertexAIApi } from "./apis/VertexAI.js"; @@ -180,6 +181,8 @@ export function constructLlmApi(config: LLMConfig): BaseLlmApi | undefined { return openAICompatible("https://api.tensorix.ai/v1/", config); case "openrouter": return new OpenRouterApi(config); + case "orcarouter": + return new OrcaRouterApi(config); case "clawrouter": return new ClawRouterApi(config); case "llama.cpp": @@ -244,4 +247,5 @@ export { export { isResponsesModel } from "./apis/openaiResponses.js"; export { OPENROUTER_HEADERS } from "./apis/OpenRouter.js"; +export { ORCAROUTER_HEADERS } from "./apis/OrcaRouter.js"; export { extractBase64FromDataUrl, parseDataUrl } from "./util/url.js"; diff --git a/packages/openai-adapters/src/types.ts b/packages/openai-adapters/src/types.ts index 3b324b0ac6b..7f3b21fd3e2 100644 --- a/packages/openai-adapters/src/types.ts +++ b/packages/openai-adapters/src/types.ts @@ -52,6 +52,7 @@ export const OpenAIConfigSchema = BasePlusConfig.extend({ z.literal("kindo"), z.literal("msty"), z.literal("openrouter"), + z.literal("orcarouter"), z.literal("clawrouter"), z.literal("sambanova"), z.literal("text-gen-webui"),