diff --git a/crates/openshell-core/src/inference.rs b/crates/openshell-core/src/inference.rs index c04feb6b4..5ed20a878 100644 --- a/crates/openshell-core/src/inference.rs +++ b/crates/openshell-core/src/inference.rs @@ -154,6 +154,17 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile { passthrough_headers: &["x-model-id"], }; +static DEEPINFRA_PROFILE: InferenceProviderProfile = InferenceProviderProfile { + provider_type: "deepinfra", + default_base_url: "https://api.deepinfra.com/v1/openai", + protocols: OPENAI_PROTOCOLS, + credential_key_names: &["DEEPINFRA_API_KEY"], + base_url_config_keys: &["DEEPINFRA_BASE_URL"], + auth: AuthHeader::Bearer, + default_headers: &[], + passthrough_headers: &["x-model-id"], +}; + /// Canonicalize an inference provider type string to a well-known identifier. /// /// Returns `Some(canonical_name)` for recognized inference providers, @@ -166,6 +177,7 @@ pub fn normalize_inference_provider_type(input: &str) -> Option<&'static str> { "openai" => Some("openai"), "anthropic" => Some("anthropic"), "nvidia" => Some("nvidia"), + "deepinfra" => Some("deepinfra"), "google-vertex-ai" | "vertex" | "vertex-ai" | "google-vertex" | "gcp-vertex" => { Some("google-vertex-ai") } @@ -182,6 +194,7 @@ pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProf "openai" => Some(&OPENAI_PROFILE), "anthropic" => Some(&ANTHROPIC_PROFILE), "nvidia" => Some(&NVIDIA_PROFILE), + "deepinfra" => Some(&DEEPINFRA_PROFILE), "google-vertex-ai" => Some(&VERTEX_AI_PROFILE), _ => None, } @@ -302,9 +315,21 @@ mod tests { assert!(profile_for("openai").is_some()); assert!(profile_for("anthropic").is_some()); assert!(profile_for("nvidia").is_some()); + assert!(profile_for("deepinfra").is_some()); assert!(profile_for("OpenAI").is_some()); // case insensitive } + #[test] + fn profile_for_deepinfra() { + let profile = profile_for("deepinfra").expect("deepinfra profile should exist"); + assert_eq!(profile.provider_type, "deepinfra"); + assert_eq!( + profile.default_base_url, + "https://api.deepinfra.com/v1/openai" + ); + assert_eq!(profile.auth, AuthHeader::Bearer); + } + #[test] fn profile_for_unknown_types() { assert!(profile_for("github").is_none()); diff --git a/crates/openshell-providers/src/lib.rs b/crates/openshell-providers/src/lib.rs index 1d0d5a192..1f0497327 100644 --- a/crates/openshell-providers/src/lib.rs +++ b/crates/openshell-providers/src/lib.rs @@ -116,6 +116,7 @@ impl ProviderRegistry { registry.register(providers::openai::SPEC); registry.register(providers::anthropic::SPEC); registry.register(providers::nvidia::SPEC); + registry.register(providers::deepinfra::SPEC); registry.register(providers::gitlab::SPEC); registry.register(providers::github::SPEC); registry.register(providers::outlook::OutlookProvider); diff --git a/crates/openshell-providers/src/profiles.rs b/crates/openshell-providers/src/profiles.rs index 63a6b2eb3..81d649859 100644 --- a/crates/openshell-providers/src/profiles.rs +++ b/crates/openshell-providers/src/profiles.rs @@ -23,6 +23,7 @@ const BUILT_IN_PROFILE_YAMLS: &[&str] = &[ include_str!("../../../providers/cursor.yaml"), include_str!("../../../providers/github.yaml"), include_str!("../../../providers/google-vertex-ai.yaml"), + include_str!("../../../providers/deepinfra.yaml"), include_str!("../../../providers/nvidia.yaml"), include_str!("../../../providers/pypi.yaml"), ]; diff --git a/crates/openshell-providers/src/providers/deepinfra.rs b/crates/openshell-providers/src/providers/deepinfra.rs new file mode 100644 index 000000000..ad70856f8 --- /dev/null +++ b/crates/openshell-providers/src/providers/deepinfra.rs @@ -0,0 +1,15 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use crate::ProviderDiscoverySpec; + +pub const SPEC: ProviderDiscoverySpec = ProviderDiscoverySpec { + id: "deepinfra", + credential_env_vars: &["DEEPINFRA_API_KEY"], +}; + +test_discovers_env_credential!( + discovers_deepinfra_env_credentials, + "DEEPINFRA_API_KEY", + "di-test123" +); diff --git a/crates/openshell-providers/src/providers/mod.rs b/crates/openshell-providers/src/providers/mod.rs index dfe5935a1..5b26e57ac 100644 --- a/crates/openshell-providers/src/providers/mod.rs +++ b/crates/openshell-providers/src/providers/mod.rs @@ -33,6 +33,7 @@ macro_rules! test_discovers_env_credential { pub mod anthropic; pub mod claude; pub mod codex; +pub mod deepinfra; pub mod copilot; pub mod generic; pub mod github; diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs index 272da265a..efd1e1838 100644 --- a/crates/openshell-router/src/backend.rs +++ b/crates/openshell-router/src/backend.rs @@ -642,7 +642,11 @@ fn build_provider_url( fn build_backend_url(endpoint: &str, path: &str) -> String { let base = endpoint.trim_end_matches('/'); - if base.ends_with("/v1") && (path == "/v1" || path.starts_with("/v1/")) { + // Strip the /v1 prefix from the request path when the base URL already + // contains a /v1 segment — either ending with it (e.g. openai, nvidia) + // or containing it internally (e.g. deepinfra: /v1/openai). + let base_has_v1 = base.ends_with("/v1") || base.contains("/v1/"); + if base_has_v1 && (path == "/v1" || path.starts_with("/v1/")) { return format!("{base}{}", &path[3..]); } @@ -704,6 +708,19 @@ mod tests { ); } + #[test] + fn build_backend_url_dedupes_v1_for_base_with_v1_subpath() { + // DeepInfra base URL contains /v1/ internally — /v1 in the request + // path must still be stripped so chat/completions is not doubled. + assert_eq!( + build_backend_url( + "https://api.deepinfra.com/v1/openai", + "/v1/chat/completions" + ), + "https://api.deepinfra.com/v1/openai/chat/completions" + ); + } + fn test_route(endpoint: &str, protocols: &[&str], auth: AuthHeader) -> ResolvedRoute { ResolvedRoute { name: "inference.local".to_string(), diff --git a/docs/sandboxes/manage-providers.mdx b/docs/sandboxes/manage-providers.mdx index a6b9654d0..698fb885b 100644 --- a/docs/sandboxes/manage-providers.mdx +++ b/docs/sandboxes/manage-providers.mdx @@ -253,6 +253,7 @@ The following provider types are supported. | `generic` | User-defined | Any service with custom credentials | | `github` | `GITHUB_TOKEN`, `GH_TOKEN` | GitHub API and `gh` CLI. Refer to [GitHub Sandbox](/get-started/tutorials/github-sandbox). | | `gitlab` | `GITLAB_TOKEN`, `GLAB_TOKEN`, `CI_JOB_TOKEN` | GitLab API, `glab` CLI | +| `deepinfra` | `DEEPINFRA_API_KEY` | DeepInfra inference API | | `nvidia` | `NVIDIA_API_KEY` | NVIDIA API Catalog | | `openai` | `OPENAI_API_KEY` | Any OpenAI-compatible endpoint. Set `--config OPENAI_BASE_URL` to point to the provider. Refer to [Inference Routing](/sandboxes/inference-routing). | | `opencode` | `OPENCODE_API_KEY`, `OPENROUTER_API_KEY`, `OPENAI_API_KEY` | OpenCode | @@ -278,7 +279,7 @@ The following providers have been tested with `inference.local`. Any provider th | Google Vertex AI | `vertex-prod` | `google-vertex-ai` | Regional, global, or multi-region Vertex endpoint | `GOOGLE_VERTEX_AI_TOKEN` or `GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN` | | Baseten | `baseten` | `openai` | `https://inference.baseten.co/v1` | `OPENAI_API_KEY` | | Bitdeer AI | `bitdeer` | `openai` | `https://api-inference.bitdeer.ai/v1` | `OPENAI_API_KEY` | -| Deepinfra | `deepinfra` | `openai` | `https://api.deepinfra.com/v1/openai` | `OPENAI_API_KEY` | +| DeepInfra | `deepinfra` | `deepinfra` | `https://api.deepinfra.com/v1/openai` | `DEEPINFRA_API_KEY` | | Groq | `groq` | `openai` | `https://api.groq.com/openai/v1` | `OPENAI_API_KEY` | | Ollama (local) | `ollama` | `openai` | `http://host.openshell.internal:11434/v1` | `OPENAI_API_KEY` | | LM Studio (local) | `lmstudio` | `openai` | `http://host.openshell.internal:1234/v1` | `OPENAI_API_KEY` | diff --git a/docs/sandboxes/providers-v2.mdx b/docs/sandboxes/providers-v2.mdx index 3a1e0bde7..18135485f 100644 --- a/docs/sandboxes/providers-v2.mdx +++ b/docs/sandboxes/providers-v2.mdx @@ -98,6 +98,7 @@ Built-in Providers v2 profiles currently include: | `cursor` | `agent` | None | | `github` | `source_control` | `GITHUB_TOKEN`, `GH_TOKEN` | | `google-vertex-ai` | `inference` | `GOOGLE_SERVICE_ACCOUNT_KEY`, `GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN`, `VERTEX_AI_SERVICE_ACCOUNT_TOKEN`, `GOOGLE_VERTEX_AI_TOKEN`, `VERTEX_AI_TOKEN` | +| `deepinfra` | `inference` | `DEEPINFRA_API_KEY` | | `nvidia` | `inference` | `NVIDIA_API_KEY` | | `pypi` | `data` | None | diff --git a/providers/deepinfra.yaml b/providers/deepinfra.yaml new file mode 100644 index 000000000..ffc2e3e41 --- /dev/null +++ b/providers/deepinfra.yaml @@ -0,0 +1,24 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +id: deepinfra +display_name: DeepInfra +description: DeepInfra inference endpoints +category: inference +inference_capable: true +credentials: + - name: api_key + description: DeepInfra API key + env_vars: [DEEPINFRA_API_KEY] + required: true + auth_style: bearer + header_name: authorization +discovery: + credentials: [api_key] +endpoints: + - host: api.deepinfra.com + port: 443 + protocol: rest + access: read-write + enforcement: enforce +binaries: [/usr/bin/curl, /usr/local/bin/curl]