Skip to content

Commit e28079c

Browse files
TTS/STT: Integrated Sarvam STT and TTS (#621)
Co-authored-by: Prajna Prayas <gituprajna20@gmail.com>
1 parent 03a0568 commit e28079c

9 files changed

Lines changed: 1259 additions & 12 deletions

File tree

backend/app/core/providers.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class Provider(str, Enum):
1313
AWS = "aws"
1414
LANGFUSE = "langfuse"
1515
GOOGLE = "google"
16+
SARVAMAI = "sarvamai"
1617

1718

1819
@dataclass
@@ -32,6 +33,7 @@ class ProviderConfig:
3233
required_fields=["secret_key", "public_key", "host"]
3334
),
3435
Provider.GOOGLE: ProviderConfig(required_fields=["api_key"]),
36+
Provider.SARVAMAI: ProviderConfig(required_fields=["api_key"]),
3537
}
3638

3739

backend/app/models/llm/request.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class TextLLMParams(SQLModel):
2222
description="Reasoning configuration or instructions",
2323
)
2424
temperature: float | None = Field(
25-
default=None,
25+
default=0.1,
2626
ge=0.0,
2727
le=2.0,
2828
)
@@ -35,17 +35,18 @@ class TextLLMParams(SQLModel):
3535

3636
class STTLLMParams(SQLModel):
3737
model: str
38-
instructions: str
38+
instructions: str | None = None
3939
input_language: str | None = None
4040
output_language: str | None = None
4141
response_format: Literal["text"] | None = Field(
4242
None,
4343
description="Currently supports text type",
4444
)
4545
temperature: float | None = Field(
46-
default=0.2,
46+
default=None,
4747
ge=0.0,
4848
le=2.0,
49+
description="Temperature parameter (not supported by all STT providers)",
4950
)
5051

5152

@@ -190,7 +191,7 @@ class NativeCompletionConfig(SQLModel):
190191
Supports any LLM provider's native API format.
191192
"""
192193

193-
provider: Literal["openai-native", "google-native"] = Field(
194+
provider: Literal["openai-native", "google-native", "sarvamai-native"] = Field(
194195
...,
195196
description="Native provider type (e.g., openai-native)",
196197
)
@@ -210,8 +211,8 @@ class KaapiCompletionConfig(SQLModel):
210211
Supports multiple providers: OpenAI, Claude, Gemini, etc.
211212
"""
212213

213-
provider: Literal["openai", "google"] = Field(
214-
..., description="LLM provider (openai)"
214+
provider: Literal["openai", "google", "sarvamai"] = Field(
215+
..., description="LLM provider (openai, google, sarvamai)"
215216
)
216217

217218
type: Literal["text", "stt", "tts"] = Field(

backend/app/services/llm/mappers.py

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,98 @@ def map_kaapi_to_google_params(kaapi_params: dict) -> tuple[dict, list[str]]:
142142
return google_params, warnings
143143

144144

145+
def map_kaapi_to_sarvam_params(kaapi_params: dict) -> tuple[dict, list[str]]:
146+
"""Map Kaapi-abstracted parameters to SarvamAI API parameters.
147+
148+
Handles both STTLLMParams and TTSLLMParams.
149+
150+
STTLLMParams: model, instructions, input_language, output_language, response_format, temperature
151+
TTSLLMParams: model, voice, language, response_format
152+
153+
Args:
154+
kaapi_params: Dictionary with standardized Kaapi parameters
155+
156+
Returns:
157+
Tuple of:
158+
- Dictionary of SarvamAI API parameters
159+
- List of warnings for unsupported parameters
160+
"""
161+
sarvam_params = {}
162+
warnings = []
163+
164+
# Model is required for all completion types
165+
model = kaapi_params.get("model")
166+
if not model:
167+
return {}, ["Missing required 'model' parameter"]
168+
sarvam_params["model"] = model
169+
170+
# Determine if STT or TTS based on presence of specific params
171+
voice = kaapi_params.get("voice")
172+
input_language = kaapi_params.get("input_language")
173+
174+
if voice is not None:
175+
# TTS mode - map TTSLLMParams
176+
sarvam_params["speaker"] = voice
177+
178+
language = kaapi_params.get("language")
179+
if not language:
180+
return {}, ["Missing required 'language' parameter for TTS"]
181+
sarvam_params["target_language_code"] = language
182+
183+
response_format = kaapi_params.get("response_format")
184+
if response_format:
185+
# Map audio format to SarvamAI codec
186+
format_mapping = {"mp3": "mp3", "wav": "wav", "ogg": "ogg"}
187+
sarvam_params["output_audio_codec"] = format_mapping.get(
188+
response_format, "wav"
189+
)
190+
191+
elif input_language is not None or kaapi_params.get("output_language") is not None:
192+
# STT mode - map STTLLMParams
193+
output_language = kaapi_params.get("output_language")
194+
transcription_mode = "transcribe"
195+
196+
if input_language == "auto":
197+
sarvam_params["language_code"] = "unknown"
198+
elif input_language:
199+
sarvam_params["language_code"] = input_language
200+
201+
if output_language is None:
202+
output_language = input_language
203+
204+
if output_language == "en-IN" and input_language != output_language:
205+
transcription_mode = "translate"
206+
207+
sarvam_params["mode"] = transcription_mode
208+
209+
# Warn about unsupported STT parameters
210+
instructions = kaapi_params.get("instructions")
211+
if instructions:
212+
warnings.append(
213+
"Parameter 'instructions' is not supported by SarvamAI STT and was ignored"
214+
)
215+
216+
temperature = kaapi_params.get("temperature")
217+
if temperature is not None:
218+
warnings.append(
219+
"Parameter 'temperature' is not supported by SarvamAI STT and was ignored"
220+
)
221+
222+
response_format = kaapi_params.get("response_format")
223+
if response_format:
224+
warnings.append(
225+
"Parameter 'response_format' is not supported by SarvamAI STT and was ignored"
226+
)
227+
228+
return sarvam_params, warnings
229+
230+
145231
def transform_kaapi_config_to_native(
146232
kaapi_config: KaapiCompletionConfig,
147233
) -> tuple[NativeCompletionConfig, list[str]]:
148234
"""Transform Kaapi completion config to native provider config with mapped parameters.
149235
150-
Supports OpenAI and Google AI providers.
236+
Supports OpenAI,Google AI and Sarvam AI providers.
151237
152238
Args:
153239
kaapi_config: KaapiCompletionConfig with abstracted parameters
@@ -175,4 +261,13 @@ def transform_kaapi_config_to_native(
175261
warnings,
176262
)
177263

264+
if kaapi_config.provider == "sarvamai":
265+
mapped_params, warnings = map_kaapi_to_sarvam_params(kaapi_config.params)
266+
return (
267+
NativeCompletionConfig(
268+
provider="sarvamai-native", params=mapped_params, type=kaapi_config.type
269+
),
270+
warnings,
271+
)
272+
178273
raise ValueError(f"Unsupported provider: {kaapi_config.provider}")

backend/app/services/llm/providers/registry.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from app.services.llm.providers.base import BaseProvider
77
from app.services.llm.providers.oai import OpenAIProvider
88
from app.services.llm.providers.gai import GoogleAIProvider
9+
from app.services.llm.providers.sai import SarvamAIProvider
910

1011
logger = logging.getLogger(__name__)
1112

@@ -16,13 +17,15 @@ class LLMProvider:
1617
# Future constants for native providers:
1718
# CLAUDE_NATIVE = "claude-native"
1819
GOOGLE_NATIVE = "google-native"
20+
SARVAMAI_NATIVE = "sarvamai-native"
1921

2022
_registry: dict[str, type[BaseProvider]] = {
2123
OPENAI_NATIVE: OpenAIProvider,
2224
OPENAI: OpenAIProvider,
2325
# Future native providers:
2426
# CLAUDE_NATIVE: ClaudeProvider,
2527
GOOGLE_NATIVE: GoogleAIProvider,
28+
SARVAMAI_NATIVE: SarvamAIProvider,
2629
}
2730

2831
@classmethod

0 commit comments

Comments
 (0)