diff --git a/backend/Dockerfile b/backend/Dockerfile index 0d5527d5..c0ba3e28 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -13,6 +13,7 @@ WORKDIR /app/ RUN apt-get update && apt-get install -y \ curl \ poppler-utils \ + ffmpeg \ && rm -rf /var/lib/apt/lists/* # Install uv package manager diff --git a/backend/app/core/audio_utils.py b/backend/app/core/audio_utils.py new file mode 100644 index 00000000..d3b4912a --- /dev/null +++ b/backend/app/core/audio_utils.py @@ -0,0 +1,45 @@ +""" +Audio processing utilities for format conversion. + +This module provides utilities for converting audio between different formats, +particularly for TTS output post-processing. +""" +import io +import logging +from pydub import AudioSegment + + +logger = logging.getLogger(__name__) + + +def convert_pcm_to_mp3( + pcm_bytes: bytes, sample_rate: int = 24000 +) -> tuple[bytes | None, str | None]: + try: + audio = AudioSegment( + data=pcm_bytes, sample_width=2, frame_rate=sample_rate, channels=1 + ) + + output_buffer = io.BytesIO() + audio.export(output_buffer, format="mp3", bitrate="192k") + return output_buffer.getvalue(), None + except Exception as e: + return None, str(e) + + +def convert_pcm_to_ogg( + pcm_bytes: bytes, sample_rate: int = 24000 +) -> tuple[bytes | None, str | None]: + """Convert raw PCM to OGG with Opus codec.""" + try: + audio = AudioSegment( + data=pcm_bytes, sample_width=2, frame_rate=sample_rate, channels=1 + ) + + output_buffer = io.BytesIO() + audio.export( + output_buffer, format="ogg", codec="libopus", parameters=["-b:a", "64k"] + ) + return output_buffer.getvalue(), None + except Exception as e: + return None, str(e) diff --git a/backend/app/crud/llm.py b/backend/app/crud/llm.py index b5c23cd6..c1e01e7e 100644 --- a/backend/app/crud/llm.py +++ b/backend/app/crud/llm.py @@ -1,18 +1,10 @@ -""" -CRUD operations for LLM calls. - -This module handles database operations for LLM calls including: -1. Creating new LLM call records -2. Updating LLM call responses -3. Fetching LLM calls by ID -""" - import logging from typing import Any, Literal from uuid import UUID from sqlmodel import Session, select from app.core.util import now +import base64 import json from app.models.llm import LlmCall, LLMCallRequest, ConfigBlob from app.models.llm.request import ( @@ -41,7 +33,8 @@ def serialize_input(query_input: QueryInput | str) -> str: "type": "audio", "format": query_input.content.format, "mime_type": query_input.content.mime_type, - "size_bytes": len(query_input.content.value), + # approximate byte size from b64encoded value + "size_bytes": len(query_input.content.value) * 3 // 4, } ) else: @@ -74,8 +67,10 @@ def create_llm_call( """ # Determine input/output types based on completion config type completion_config = resolved_config.completion - completion_type = completion_config.type or getattr( - completion_config.params, "type", "text" + completion_type = completion_config.type or ( + completion_config.params.get("type", "text") + if isinstance(completion_config.params, dict) + else getattr(completion_config.params, "type", "text") ) input_type: Literal["text", "audio", "image"] @@ -92,9 +87,9 @@ def create_llm_call( output_type = "text" model = ( - completion_config.params.model - if hasattr(completion_config.params, "model") - else completion_config.params.get("model", "") + completion_config.params.get("model", "") + if isinstance(completion_config.params, dict) + else getattr(completion_config.params, "model", "") ) # Build config dict for storage @@ -174,8 +169,23 @@ def update_llm_call_response( if provider_response_id is not None: db_llm_call.provider_response_id = provider_response_id + if content is not None: + # For audio outputs (AudioOutput model): calculate size metadata from base64 content + # AudioOutput serializes as: {"type": "audio", "content": {"format": "base64", "value": "...", "mime_type": "..."}} + if content.get("type") == "audio": + audio_value = content.get("content", {}).get("value") + if audio_value: + try: + audio_data = base64.b64decode(audio_value) + content["audio_size_bytes"] = len(audio_data) + except Exception as e: + logger.warning( + f"[update_llm_call_response] Failed to calculate audio size: {e}" + ) + db_llm_call.content = content + if usage is not None: db_llm_call.usage = usage if conversation_id is not None: diff --git a/backend/app/models/llm/request.py b/backend/app/models/llm/request.py index b90fb622..0991aeba 100644 --- a/backend/app/models/llm/request.py +++ b/backend/app/models/llm/request.py @@ -1,14 +1,11 @@ +import sqlalchemy as sa from typing import Annotated, Any, Literal, Union - from uuid import UUID, uuid4 -from sqlmodel import Field, SQLModel -from pydantic import Discriminator, model_validator, HttpUrl +from pydantic import model_validator, HttpUrl from datetime import datetime -from app.core.util import now - -import sqlalchemy as sa from sqlalchemy.dialects.postgresql import JSONB from sqlmodel import Field, SQLModel, Index, text +from app.core.util import now class TextLLMParams(SQLModel): @@ -70,8 +67,8 @@ class TextContent(SQLModel): class AudioContent(SQLModel): format: Literal["base64"] = "base64" - value: str = Field(..., min_length=1, description="Base64 encoded audio") - # keeping the mime_type liberal here, since does not affect transcription type + value: str = Field(..., description="Base64 encoded audio") + # keeping the mime_type liberal here, since does not affect base64 encoding mime_type: str | None = Field( None, description="MIME type of the audio (e.g., audio/wav, audio/mp3, audio/ogg)", @@ -487,8 +484,13 @@ class LlmCall(SQLModel, table=True): updated_at: datetime = Field( default_factory=now, - nullable=False, - sa_column_kwargs={"comment": "Timestamp when the LLM call was last updated"}, + sa_column=sa.Column( + sa.DateTime, + default=now, + nullable=False, + onupdate=now, + comment="Timestamp when the LLM call was last updated", + ), ) deleted_at: datetime | None = Field( diff --git a/backend/app/models/llm/response.py b/backend/app/models/llm/response.py index 7b13e301..780ba233 100644 --- a/backend/app/models/llm/response.py +++ b/backend/app/models/llm/response.py @@ -3,7 +3,6 @@ This module contains structured response models for LLM API calls. """ - from sqlmodel import SQLModel, Field from typing import Literal, Annotated from app.models.llm.request import AudioContent, TextContent @@ -27,7 +26,7 @@ class AudioOutput(SQLModel): # Type alias for LLM output (discriminated union) -LLMOutput = Annotated[TextOutput | AudioOutput | None, Field(discriminator="type")] +LLMOutput = Annotated[TextOutput | AudioOutput, Field(discriminator="type")] class LLMResponse(SQLModel): @@ -45,7 +44,7 @@ class LLMResponse(SQLModel): model: str = Field( ..., description="Model used by the provider (e.g., gpt-4-turbo)." ) - output: LLMOutput = Field( + output: LLMOutput | None = Field( ..., description="Structured output containing text and optional additional data.", ) diff --git a/backend/app/services/llm/input_resolver.py b/backend/app/services/llm/input_resolver.py deleted file mode 100644 index 69679c00..00000000 --- a/backend/app/services/llm/input_resolver.py +++ /dev/null @@ -1,87 +0,0 @@ -import base64 -import logging -import tempfile -from pathlib import Path - -from app.models.llm.request import ( - TextInput, - AudioInput, - QueryInput, -) - - -logger = logging.getLogger(__name__) - - -def get_file_extension(mime_type: str) -> str: - """Map MIME type to file extension.""" - mime_to_ext = { - "audio/wav": ".wav", - "audio/wave": ".wav", - "audio/x-wav": ".wav", - "audio/mp3": ".mp3", - "audio/mpeg": ".mp3", - "audio/ogg": ".ogg", - "audio/flac": ".flac", - "audio/webm": ".webm", - "audio/mp4": ".mp4", - "audio/m4a": ".m4a", - } - return mime_to_ext.get(mime_type, ".audio") - - -# important!! -def resolve_input(query_input: QueryInput) -> tuple[str, str | None]: - """Resolve discriminated union input to content string. - - Args: - query_input: The input from QueryParams (TextInput or AudioInput) - - Returns: - (content_string, None) on success - for text returns content value, for audio returns temp file path - ("", error_message) on failure - """ - try: - if isinstance(query_input, TextInput): - return query_input.content.value, None - - elif isinstance(query_input, AudioInput): - # AudioInput content is base64-encoded audio - mime_type = query_input.content.mime_type or "audio/wav" - return resolve_audio_base64(query_input.content.value, mime_type) - - else: - return "", f"Unknown input type: {type(query_input)}" - - except Exception as e: - logger.error(f"[resolve_input] Failed to resolve input: {e}", exc_info=True) - return "", f"Failed to resolve input: {str(e)}" - - -def resolve_audio_base64(data: str, mime_type: str) -> tuple[str, str | None]: - """Decode base64 audio and write to temp file. Returns (file_path, error).""" - try: - audio_bytes = base64.b64decode(data) - except Exception as e: - return "", f"Invalid base64 audio data: {str(e)}" - - ext = get_file_extension(mime_type) - try: - with tempfile.NamedTemporaryFile( - suffix=ext, delete=False, prefix="audio_" - ) as tmp: - tmp.write(audio_bytes) - temp_path = tmp.name - - logger.info(f"[resolve_audio_base64] Wrote audio to temp file: {temp_path}") - return temp_path, None - except Exception as e: - return "", f"Failed to write audio to temp file: {str(e)}" - - -def cleanup_temp_file(file_path: str) -> None: - """Clean up a temporary file if it exists.""" - try: - Path(file_path).unlink(missing_ok=True) - except Exception as e: - logger.warning(f"[cleanup_temp_file] Failed to delete temp file: {e}") diff --git a/backend/app/services/llm/jobs.py b/backend/app/services/llm/jobs.py index c6997a08..33aff370 100644 --- a/backend/app/services/llm/jobs.py +++ b/backend/app/services/llm/jobs.py @@ -1,6 +1,7 @@ import logging +from contextlib import contextmanager +from typing import Any from uuid import UUID - from asgi_correlation_id import correlation_id from fastapi import HTTPException from sqlmodel import Session @@ -18,6 +19,7 @@ LLMCallConfig, KaapiCompletionConfig, TextInput, + AudioInput, ) from app.models.llm.response import TextOutput from app.services.llm.guardrails import ( @@ -26,9 +28,7 @@ ) from app.services.llm.providers.registry import get_llm_provider from app.services.llm.mappers import transform_kaapi_config_to_native -from app.services.llm.input_resolver import resolve_input, cleanup_temp_file - -from app.utils import APIResponse, send_callback +from app.utils import APIResponse, send_callback, resolve_input, cleanup_temp_file logger = logging.getLogger(__name__) @@ -101,6 +101,74 @@ def handle_job_error( return callback_response.model_dump() +@contextmanager +def resolved_input_context(query_input: TextInput | AudioInput): + """Context manager for resolving and cleaning up input resources. + + Ensures temporary files (e.g., downloaded audio) are cleaned up + even if errors occur during LLM execution. + """ + resolved_input, error = resolve_input(query_input) + if error: + raise ValueError(error) + + try: + yield resolved_input + finally: + # Clean up temp files for audio inputs + if resolved_input and isinstance(query_input, AudioInput): + cleanup_temp_file(resolved_input) + + +def validate_text_with_guardrails( + text: str, + guardrails: list[dict[str, Any]], + job_id: UUID, + project_id: int, + organization_id: int, + guardrail_type: str, # "input" or "output" +) -> tuple[str | None, str | None]: + """Validate text against guardrails. + + Returns: + (validated_text, error_message) + - If successful: (modified_text, None) + - If failed: (None, error_message) + - If bypassed: (original_text, None) + """ + safe_result = run_guardrails_validation( + text, + guardrails, + job_id, + project_id, + organization_id, + suppress_pass_logs=True, + ) + + logger.info( + f"[validate_text_with_guardrails] {guardrail_type.capitalize()} guardrail validation | " + f"success={safe_result['success']}, job_id={job_id}" + ) + + if safe_result.get("bypassed"): + logger.info( + f"[validate_text_with_guardrails] Guardrails bypassed (service unavailable) | " + f"job_id={job_id}" + ) + return text, None + + if safe_result["success"]: + validated_text = safe_result["data"]["safe_text"] + + # Special case for output guardrails: check if rephrase is needed + if guardrail_type == "output" and safe_result["data"].get("rephrase_needed"): + return None, "Output requires rephrasing" + + return validated_text, None + + return None, safe_result["error"] + + def resolve_config_blob( config_crud: ConfigVersionCrud, config: LLMCallConfig ) -> tuple[ConfigBlob | None, str | None]: @@ -151,7 +219,8 @@ def execute_job( """ request = LLMCallRequest(**request_data) - job_id: UUID = UUID(job_id) + job_uuid = UUID(job_id) # Renamed to avoid shadowing parameter + callback_url_str = str(request.callback_url) if request.callback_url else None config = request.config callback_response = None @@ -161,33 +230,15 @@ def execute_job( llm_call_id: UUID | None = None # Track the LLM call record logger.info( - f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}, " + f"[execute_job] Starting LLM job execution | job_id={job_uuid}, task_id={task_id}" ) try: with Session(engine) as session: # Update job status to PROCESSING job_crud = JobCrud(session=session) - logger.info(f"[execute_job] Attempting to fetch job | job_id={job_id}") - job = session.get(Job, job_id) - if not job: - # Log all jobs to see what's in the database - from sqlmodel import select - - all_jobs = session.exec( - select(Job).order_by(Job.created_at.desc()).limit(5) - ).all() - logger.error( - f"[execute_job] Job not found! | job_id={job_id} | " - f"Recent jobs in DB: {[(j.id, j.status) for j in all_jobs]}" - ) - else: - logger.info( - f"[execute_job] Found job | job_id={job_id}, status={job.status}" - ) - job_crud.update( - job_id=job_id, job_update=JobUpdate(status=JobStatus.PROCESSING) + job_id=job_uuid, job_update=JobUpdate(status=JobStatus.PROCESSING) ) # if stored config, fetch blob from DB @@ -205,7 +256,7 @@ def execute_job( metadata=request.request_metadata, ) return handle_job_error( - job_id, request.callback_url, callback_response + job_uuid, callback_url_str, callback_response ) else: @@ -224,44 +275,29 @@ def execute_job( if not isinstance(request.query.input, TextInput): logger.info( "[execute_job] Skipping input guardrails for non-text input. " - f"job_id={job_id}, input_type={getattr(request.query.input, 'type', type(request.query.input).__name__)}" + f"job_id={job_uuid}, input_type={getattr(request.query.input, 'type', type(request.query.input).__name__)}" ) else: - safe_input = run_guardrails_validation( + validated_text, error = validate_text_with_guardrails( request.query.input.content.value, input_guardrails, - job_id, + job_uuid, project_id, organization_id, - suppress_pass_logs=True, + guardrail_type="input", ) - logger.info( - f"[execute_job] Input guardrail validation | success={safe_input['success']}." - ) - - if safe_input.get("bypassed"): - logger.info( - "[execute_job] Guardrails bypassed (service unavailable)" - ) - - elif safe_input["success"]: - request.query.input.content.value = safe_input["data"][ - "safe_text" - ] - else: - # Update the text value with error message - request.query.input.content.value = safe_input["error"] - + if error: callback_response = APIResponse.failure_response( - error=safe_input["error"], + error=error, metadata=request.request_metadata, ) return handle_job_error( - job_id, request.callback_url, callback_response + job_uuid, callback_url_str, callback_response ) - user_sent_config_provider = "" + # Update input with validated text + request.query.input.content.value = validated_text try: # Transform Kaapi config to native config if needed (before getting provider) completion_config = config_blob.completion @@ -278,14 +314,13 @@ def execute_job( if request.request_metadata is None: request.request_metadata = {} request.request_metadata.setdefault("warnings", []).extend(warnings) - else: - pass + except Exception as e: callback_response = APIResponse.failure_response( error=f"Error processing configuration: {str(e)}", metadata=request.request_metadata, ) - return handle_job_error(job_id, request.callback_url, callback_response) + return handle_job_error(job_uuid, callback_url_str, callback_response) # Create LLM call record before execution try: @@ -299,7 +334,7 @@ def execute_job( llm_call = create_llm_call( session, request=request, - job_id=job_id, + job_id=job_uuid, project_id=project_id, organization_id=organization_id, resolved_config=resolved_config_blob, @@ -307,18 +342,18 @@ def execute_job( ) llm_call_id = llm_call.id logger.info( - f"[execute_job] Created LLM call record | llm_call_id={llm_call_id}, job_id={job_id}" + f"[execute_job] Created LLM call record | llm_call_id={llm_call_id}, job_id={job_uuid}" ) except Exception as e: logger.error( - f"[execute_job] Failed to create LLM call record: {str(e)} | job_id={job_id}", + f"[execute_job] Failed to create LLM call record: {str(e)} | job_id={job_uuid}", exc_info=True, ) callback_response = APIResponse.failure_response( error=f"Failed to create LLM call record: {str(e)}", metadata=request.request_metadata, ) - return handle_job_error(job_id, request.callback_url, callback_response) + return handle_job_error(job_uuid, callback_url_str, callback_response) try: provider_instance = get_llm_provider( @@ -332,7 +367,7 @@ def execute_job( error=str(ve), metadata=request.request_metadata, ) - return handle_job_error(job_id, request.callback_url, callback_response) + return handle_job_error(job_uuid, callback_url_str, callback_response) langfuse_credentials = get_provider_credential( session=session, @@ -346,91 +381,64 @@ def execute_job( if request.query.conversation and request.query.conversation.id: conversation_id = request.query.conversation.id - # Resolve input (handles text, audio_base64, audio_url) - resolved_input, resolve_error = resolve_input(request.query.input) - if resolve_error: - callback_response = APIResponse.failure_response( - error=resolve_error, - metadata=request.request_metadata, - ) - return handle_job_error(job_id, request.callback_url, callback_response) - # Apply Langfuse observability decorator to provider execute method decorated_execute = observe_llm_execution( credentials=langfuse_credentials, session_id=conversation_id, )(provider_instance.execute) + # Resolve input and execute LLM (context manager handles cleanup) try: - response, error = decorated_execute( - completion_config=completion_config, - query=request.query, - resolved_input=resolved_input, - include_provider_raw_response=request.include_provider_raw_response, + with resolved_input_context(request.query.input) as resolved_input: + response, error = decorated_execute( + completion_config=completion_config, + query=request.query, + resolved_input=resolved_input, + include_provider_raw_response=request.include_provider_raw_response, + ) + except ValueError as ve: + # Handle input resolution errors from context manager + callback_response = APIResponse.failure_response( + error=str(ve), + metadata=request.request_metadata, ) - finally: - # Clean up temp files for audio inputs - if resolved_input and resolved_input != request.query.input: - cleanup_temp_file(resolved_input) + return handle_job_error(job_uuid, callback_url_str, callback_response) if response: if output_guardrails: if not isinstance(response.response.output, TextOutput): logger.info( "[execute_job] Skipping output guardrails for non-text output. " - f"job_id={job_id}, output_type={getattr(response.response.output, 'type', type(response.response.output).__name__)}" + f"job_id={job_uuid}, output_type={getattr(response.response.output, 'type', type(response.response.output).__name__)}" ) else: output_text = response.response.output.content.value - safe_output = run_guardrails_validation( + validated_text, error = validate_text_with_guardrails( output_text, output_guardrails, - job_id, + job_uuid, project_id, organization_id, - suppress_pass_logs=True, - ) - - logger.info( - f"[execute_job] Output guardrail validation | success={safe_output['success']}." + guardrail_type="output", ) - if safe_output.get("bypassed"): - logger.info( - "[execute_job] Guardrails bypassed (service unavailable)" - ) - - elif safe_output["success"]: - response.response.output.content.value = safe_output["data"][ - "safe_text" - ] - - if safe_output["data"]["rephrase_needed"] == True: - callback_response = APIResponse.failure_response( - error=request.query.input, - metadata=request.request_metadata, - ) - return handle_job_error( - job_id, request.callback_url, callback_response - ) - - else: - response.response.output.content.value = safe_output["error"] - + if error: callback_response = APIResponse.failure_response( - error=safe_output["error"], + error=error, metadata=request.request_metadata, ) return handle_job_error( - job_id, request.callback_url, callback_response + job_uuid, callback_url_str, callback_response ) + # Update output with validated text + response.response.output.content.value = validated_text callback_response = APIResponse.success_response( data=response, metadata=request.request_metadata ) - if request.callback_url: + if callback_url_str: send_callback( - callback_url=request.callback_url, + callback_url=callback_url_str, data=callback_response.model_dump(), ) @@ -459,10 +467,10 @@ def execute_job( # Don't fail the job if updating the record fails job_crud.update( - job_id=job_id, job_update=JobUpdate(status=JobStatus.SUCCESS) + job_id=job_uuid, job_update=JobUpdate(status=JobStatus.SUCCESS) ) logger.info( - f"[execute_job] Successfully completed LLM job | job_id={job_id}, " + f"[execute_job] Successfully completed LLM job | job_id={job_uuid}, " f"provider_response_id={response.response.provider_response_id}, tokens={response.usage.total_tokens}" ) return callback_response.model_dump() @@ -471,15 +479,16 @@ def execute_job( error=error or "Unknown error occurred", metadata=request.request_metadata, ) - return handle_job_error(job_id, request.callback_url, callback_response) + return handle_job_error(job_uuid, callback_url_str, callback_response) except Exception as e: + error_type = type(e).__name__ callback_response = APIResponse.failure_response( - error=f"Unexpected error occurred", + error=f"Unexpected error during LLM execution: {error_type}", metadata=request.request_metadata, ) logger.error( - f"[execute_job] Unknown error occurred: {str(e)} | job_id={job_id}, task_id={task_id}", + f"[execute_job] Unexpected error: {str(e)} | job_id={job_uuid}, task_id={task_id}", exc_info=True, ) - return handle_job_error(job_id, request.callback_url, callback_response) + return handle_job_error(job_uuid, callback_url_str, callback_response) diff --git a/backend/app/services/llm/mappers.py b/backend/app/services/llm/mappers.py index 4b982b60..8b0b895e 100644 --- a/backend/app/services/llm/mappers.py +++ b/backend/app/services/llm/mappers.py @@ -99,6 +99,10 @@ def map_kaapi_to_google_params(kaapi_params: dict) -> tuple[dict, list[str]]: warnings = [] # Model is present in all param types + model = kaapi_params.get("model") + if not model: + return {}, ["Missing required 'model' parameter"] + google_params["model"] = kaapi_params.get("model") # Instructions for STT prompts @@ -111,6 +115,18 @@ def map_kaapi_to_google_params(kaapi_params: dict) -> tuple[dict, list[str]]: if temperature is not None: google_params["temperature"] = temperature + # TTS Config + voice = kaapi_params.get("voice") + if voice: + google_params["voice"] = voice + + language = kaapi_params.get("language") + if language: + google_params["language"] = language + + response_format = kaapi_params.get("response_format") + if response_format: + google_params["response_format"] = response_format # Warn about unsupported parameters if kaapi_params.get("knowledge_base_ids"): warnings.append( diff --git a/backend/app/services/llm/providers/gai.py b/backend/app/services/llm/providers/gai.py index 0c848fb7..ce9bf6ad 100644 --- a/backend/app/services/llm/providers/gai.py +++ b/backend/app/services/llm/providers/gai.py @@ -1,12 +1,16 @@ import logging +import base64 +from typing import Any from google import genai from google.genai.types import ( GenerateContentResponse, GenerateContentConfig, ThinkingConfig, + SpeechConfig, + VoiceConfig, + PrebuiltVoiceConfig, ) -from typing import Any from app.models.llm import ( NativeCompletionConfig, @@ -17,8 +21,9 @@ TextOutput, TextContent, ) +from app.models.llm.response import AudioOutput, AudioContent from app.services.llm.providers.base import BaseProvider - +from app.core.audio_utils import convert_pcm_to_mp3, convert_pcm_to_ogg logger = logging.getLogger(__name__) @@ -155,6 +160,170 @@ def _execute_stt( return llm_response, None + def _execute_tts( + self, + completion_config: NativeCompletionConfig, + resolved_input: str, + include_provider_raw_response: bool = False, + ) -> tuple[LLMCallResponse | None, str | None]: + """Execute text-to-speech completion using Google AI. + + Args: + completion_config: Configuration for the completion request + resolved_input: Text string to synthesize + include_provider_raw_response: Whether to include raw provider response + + Returns: + Tuple of (LLMCallResponse, error_message) + """ + provider = completion_config.provider + generation_params = completion_config.params + + # Validate input is a text string + if not isinstance(resolved_input, str): + return None, f"{provider} TTS requires text string as input" + + if not resolved_input.strip(): + return None, "Text input cannot be empty" + + # Extract required params + model = generation_params.get("model") + if not model: + return None, "Missing 'model' in native params" + + voice = generation_params.get("voice") + if not voice: + return None, "Missing 'voice' in native params" + + language = generation_params.get("language") + if not language: + return None, "Missing 'language' in native params" + + # Extract optional params + response_format = generation_params.get("response_format", "wav") + + # Extract Gemini-specific params from provider_specific.gemini + provider_specific = generation_params.get("provider_specific", {}) + gemini_params = provider_specific.get("gemini", {}) + + director_notes = gemini_params.get("director_notes") + # Build Gemini TTS config + config_kwargs = { + "response_modalities": ["AUDIO"], + "speech_config": SpeechConfig( + voice_config=VoiceConfig( + prebuilt_voice_config=PrebuiltVoiceConfig(voice_name=voice) + ), + language_code=language, + ), + } + + if director_notes: + config_kwargs["system_instruction"] = director_notes + + config = GenerateContentConfig(**config_kwargs) + + # Execute TTS + response: GenerateContentResponse = self.client.models.generate_content( + model=model, contents=resolved_input, config=config + ) + if not response.response_id: + return None, "Google AI response missing response_id" + try: + raw_audio_bytes = response.candidates[0].content.parts[0].inline_data.data + + except (IndexError, AttributeError) as e: + return None, f"Failed to extract audio from response: {str(e)}" + + if not raw_audio_bytes: + return None, "Google AI response missing audio data" + + # Post-process audio format conversion if needed + # Gemini TTS natively outputs 24kHz 16-bit PCM (WAV format) + actual_format = "wav" # Native Gemini TTS output format + encoded_content = base64.b64encode(raw_audio_bytes or b"").decode("ascii") + + if response_format and response_format != "wav": + # Need to convert from WAV to requested format + logger.info( + f"[GoogleAIProvider._execute_tts] Converting audio from WAV to {response_format}" + ) + + if response_format == "mp3": + converted_bytes, convert_error = convert_pcm_to_mp3(raw_audio_bytes) + if convert_error: + return None, f"Failed to convert audio to MP3: {convert_error}" + encoded_content = base64.b64encode(converted_bytes or b"").decode( + "ascii" + ) + actual_format = "mp3" + + elif response_format == "ogg": + converted_bytes, convert_error = convert_pcm_to_ogg(raw_audio_bytes) + if convert_error: + return None, f"Failed to convert audio to OGG: {convert_error}" + encoded_content = base64.b64encode(converted_bytes or b"").decode( + "ascii" + ) + actual_format = "ogg" + else: + logger.warning( + f"[GoogleAIProvider._execute_tts] Unsupported response_format '{response_format}', returning native WAV" + ) + response_format = "wav" + logger.info( + f"[GoogleAIProvider._execute_tts] Audio conversion successful: {actual_format.upper()} ({len(raw_audio_bytes)} bytes)" + ) + response_mime_type = f"audio/{response_format}" + + # Extract usage metadata + if response.usage_metadata: + input_tokens = response.usage_metadata.prompt_token_count or 0 + output_tokens = response.usage_metadata.candidates_token_count or 0 + total_tokens = response.usage_metadata.total_token_count or 0 + reasoning_tokens = response.usage_metadata.thoughts_token_count or 0 + else: + logger.warning( + f"[GoogleAIProvider._execute_tts] Response missing usage_metadata, using zeros" + ) + input_tokens = 0 + output_tokens = 0 + total_tokens = 0 + reasoning_tokens = 0 + + # Build response + llm_response = LLMCallResponse( + response=LLMResponse( + provider_response_id=response.response_id, + model=response.model_version or model, + provider=provider, + # output=LLMOutput(audio_bytes=audio_bytes, audio_format=actual_format), + output=AudioOutput( + content=AudioContent( + format="base64", + value=encoded_content, + mime_type=response_mime_type, + ) + ), + ), + usage=Usage( + input_tokens=input_tokens, + output_tokens=output_tokens, + total_tokens=total_tokens, + reasoning_tokens=reasoning_tokens, + ), + ) + + if include_provider_raw_response: + llm_response.provider_raw_response = response.model_dump() + + logger.info( + f"[GoogleAIProvider._execute_tts] Successfully generated TTS response: " + f"{response.response_id}, audio_size={len(raw_audio_bytes)} bytes" + ) + + return llm_response, None + def execute( self, completion_config: NativeCompletionConfig, @@ -171,6 +340,12 @@ def execute( resolved_input=resolved_input, include_provider_raw_response=include_provider_raw_response, ) + elif completion_type == "tts": + return self._execute_tts( + completion_config=completion_config, + resolved_input=resolved_input, + include_provider_raw_response=include_provider_raw_response, + ) else: return ( None, diff --git a/backend/app/services/llm/providers/oai.py b/backend/app/services/llm/providers/oai.py index 676a420c..83c0aa8d 100644 --- a/backend/app/services/llm/providers/oai.py +++ b/backend/app/services/llm/providers/oai.py @@ -1,10 +1,10 @@ import logging +from typing import Any import openai from openai import OpenAI from openai.types.responses.response import Response -from typing import Any from app.models.llm import ( NativeCompletionConfig, LLMCallResponse, diff --git a/backend/app/services/llm/providers/registry.py b/backend/app/services/llm/providers/registry.py index 70167f73..15236b8d 100644 --- a/backend/app/services/llm/providers/registry.py +++ b/backend/app/services/llm/providers/registry.py @@ -1,30 +1,13 @@ import os from dotenv import load_dotenv import logging - from sqlmodel import Session -from openai import OpenAI from app.crud import get_provider_credential from app.services.llm.providers.base import BaseProvider from app.services.llm.providers.oai import OpenAIProvider from app.services.llm.providers.gai import GoogleAIProvider -from google.genai.types import GenerateContentConfig - -# temporary import - -from app.models.llm import ( - NativeCompletionConfig, - LLMCallResponse, - QueryParams, - LLMOutput, - LLMResponse, - Usage, -) - -load_dotenv() - logger = logging.getLogger(__name__) @@ -65,9 +48,6 @@ def get_llm_provider( ) -> BaseProvider: provider_class = LLMProvider.get_provider_class(provider_type) - # e.g "openai-native" -> "openai", "claude-native" -> "claude" - credential_provider = provider_type.replace("-native", "") - # e.g., "openai-native" → "openai", "claude-native" → "claude" credential_provider = provider_type.replace("-native", "") diff --git a/backend/app/tests/core/test_audio_utils.py b/backend/app/tests/core/test_audio_utils.py new file mode 100644 index 00000000..580fcce1 --- /dev/null +++ b/backend/app/tests/core/test_audio_utils.py @@ -0,0 +1,108 @@ +"""Tests for audio utility functions.""" +import subprocess +import pytest +from app.core.audio_utils import convert_pcm_to_mp3, convert_pcm_to_ogg + + +def _is_ffmpeg_available() -> bool: + """Check if ffmpeg is available in the system.""" + try: + subprocess.run( + ["ffmpeg", "-version"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=True, + ) + return True + except (FileNotFoundError, subprocess.CalledProcessError): + return False + + +# Skip all tests in this module if ffmpeg is not available +pytestmark = pytest.mark.skipif( + not _is_ffmpeg_available(), + reason="ffmpeg not available in environment", +) + + +def test_convert_pcm_to_mp3_success() -> None: + """Test successful PCM to MP3 conversion.""" + # Create minimal valid PCM data (1 second of silence at 24kHz, 16-bit, mono) + sample_rate = 24000 + duration_seconds = 1 + num_samples = sample_rate * duration_seconds + pcm_bytes = b"\x00\x00" * num_samples # 16-bit silence + + result, error = convert_pcm_to_mp3(pcm_bytes, sample_rate=sample_rate) + + assert error is None + assert result is not None + assert isinstance(result, bytes) + assert len(result) > 0 + + +def test_convert_pcm_to_mp3_custom_sample_rate() -> None: + """Test PCM to MP3 conversion with custom sample rate.""" + sample_rate = 16000 + num_samples = sample_rate # 1 second + pcm_bytes = b"\x00\x00" * num_samples + + result, error = convert_pcm_to_mp3(pcm_bytes, sample_rate=sample_rate) + + assert error is None + assert result is not None + assert isinstance(result, bytes) + + +def test_convert_pcm_to_mp3_short_audio() -> None: + """Test PCM to MP3 conversion with short audio.""" + # Very short audio (100ms) + sample_rate = 24000 + num_samples = int(sample_rate * 0.1) + pcm_bytes = b"\x00\x00" * num_samples + + result, error = convert_pcm_to_mp3(pcm_bytes, sample_rate=sample_rate) + + assert error is None + assert result is not None + + +def test_convert_pcm_to_ogg_success() -> None: + """Test successful PCM to OGG conversion.""" + sample_rate = 24000 + duration_seconds = 1 + num_samples = sample_rate * duration_seconds + pcm_bytes = b"\x00\x00" * num_samples + + result, error = convert_pcm_to_ogg(pcm_bytes, sample_rate=sample_rate) + + assert error is None + assert result is not None + assert isinstance(result, bytes) + assert len(result) > 0 + + +def test_convert_pcm_to_ogg_custom_sample_rate() -> None: + """Test PCM to OGG conversion with custom sample rate.""" + sample_rate = 16000 + num_samples = sample_rate # 1 second + pcm_bytes = b"\x00\x00" * num_samples + + result, error = convert_pcm_to_ogg(pcm_bytes, sample_rate=sample_rate) + + assert error is None + assert result is not None + assert isinstance(result, bytes) + + +def test_convert_pcm_to_ogg_short_audio() -> None: + """Test PCM to OGG conversion with short audio.""" + # Very short audio (100ms) + sample_rate = 24000 + num_samples = int(sample_rate * 0.1) + pcm_bytes = b"\x00\x00" * num_samples + + result, error = convert_pcm_to_ogg(pcm_bytes, sample_rate=sample_rate) + + assert error is None + assert result is not None diff --git a/backend/app/tests/crud/test_llm.py b/backend/app/tests/crud/test_llm.py index 2251755f..ce6bb2e6 100644 --- a/backend/app/tests/crud/test_llm.py +++ b/backend/app/tests/crud/test_llm.py @@ -411,3 +411,139 @@ def test_update_llm_call_response_not_found(db: Session) -> None: llm_call_id=fake_id, provider_response_id="resp_123", ) + + +def test_update_llm_call_response_with_audio_content( + db: Session, + test_job, + test_project: Project, + test_organization: Organization, + tts_config_blob: ConfigBlob, +) -> None: + """Test updating LLM call with audio content calculates size.""" + import base64 + + request = LLMCallRequest( + query=QueryParams(input="Test input"), + config=LLMCallConfig(blob=tts_config_blob), + ) + + created = create_llm_call( + db, + request=request, + job_id=test_job.id, + project_id=test_project.id, + organization_id=test_organization.id, + resolved_config=tts_config_blob, + original_provider="openai", + ) + + # Create valid audio content with base64 data + audio_bytes = b"fake audio data for testing" + audio_base64 = base64.b64encode(audio_bytes).decode("utf-8") + + audio_content = { + "type": "audio", + "content": { + "format": "base64", + "value": audio_base64, + "mime_type": "audio/mp3", + }, + } + + updated = update_llm_call_response( + db, + llm_call_id=created.id, + content=audio_content, + ) + + # Verify audio size was calculated and added + assert updated.content is not None + assert updated.content["audio_size_bytes"] == len(audio_bytes) + + +def test_update_llm_call_response_with_invalid_audio_base64( + db: Session, + test_job, + test_project: Project, + test_organization: Organization, + tts_config_blob: ConfigBlob, +) -> None: + """Test updating LLM call with invalid audio base64 logs warning but continues.""" + request = LLMCallRequest( + query=QueryParams(input="Test input"), + config=LLMCallConfig(blob=tts_config_blob), + ) + + created = create_llm_call( + db, + request=request, + job_id=test_job.id, + project_id=test_project.id, + organization_id=test_organization.id, + resolved_config=tts_config_blob, + original_provider="openai", + ) + + # Invalid base64 data + audio_content = { + "type": "audio", + "content": { + "format": "base64", + "value": "not-valid-base64!!!", + "mime_type": "audio/mp3", + }, + } + + # Should not raise error, just log warning + updated = update_llm_call_response( + db, + llm_call_id=created.id, + content=audio_content, + ) + + # Content should still be updated, just without audio_size_bytes + assert updated.content is not None + assert updated.content["type"] == "audio" + assert "audio_size_bytes" not in updated.content + + +def test_update_llm_call_response_with_text_content_no_size_calculation( + db: Session, + test_job, + test_project: Project, + test_organization: Organization, + text_config_blob: ConfigBlob, +) -> None: + """Test updating LLM call with text content does not calculate audio size.""" + request = LLMCallRequest( + query=QueryParams(input="Test input"), + config=LLMCallConfig(blob=text_config_blob), + ) + + created = create_llm_call( + db, + request=request, + job_id=test_job.id, + project_id=test_project.id, + organization_id=test_organization.id, + resolved_config=text_config_blob, + original_provider="openai", + ) + + text_content = { + "type": "text", + "content": { + "value": "This is a text response", + }, + } + + updated = update_llm_call_response( + db, + llm_call_id=created.id, + content=text_content, + ) + + # Should not have audio_size_bytes + assert updated.content is not None + assert "audio_size_bytes" not in updated.content diff --git a/backend/app/tests/services/llm/providers/test_gai.py b/backend/app/tests/services/llm/providers/test_gai.py index 63bcce07..09bc6453 100644 --- a/backend/app/tests/services/llm/providers/test_gai.py +++ b/backend/app/tests/services/llm/providers/test_gai.py @@ -1,9 +1,11 @@ """ -Tests for the Google AI provider (STT). +Tests for the Google AI provider (STT and TTS). """ +import base64 + import pytest -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch from types import SimpleNamespace from app.models.llm import ( @@ -247,3 +249,433 @@ def test_stt_with_valid_file_path( assert error is None assert result is not None assert result.response.output.content.value == "Valid transcription" + + +def mock_tts_google_response( + audio_bytes: bytes = b"\x00\x01\x02\x03", + model: str = "gemini-2.5-pro-preview-tts", + response_id: str = "resp_tts_123", +) -> SimpleNamespace: + """Create a mock Google AI TTS response object with audio data.""" + usage = SimpleNamespace( + prompt_token_count=10, + candidates_token_count=0, + total_token_count=10, + thoughts_token_count=0, + ) + + inline_data = SimpleNamespace(data=audio_bytes) + part = SimpleNamespace(inline_data=inline_data) + content = SimpleNamespace(parts=[part]) + candidate = SimpleNamespace(content=content) + + response = SimpleNamespace( + response_id=response_id, + model_version=model, + candidates=[candidate], + usage_metadata=usage, + model_dump=lambda: { + "response_id": response_id, + "model_version": model, + "usage_metadata": { + "prompt_token_count": 10, + "candidates_token_count": 0, + "total_token_count": 10, + "thoughts_token_count": 0, + }, + }, + ) + return response + + +SAMPLE_PCM_BYTES = b"\x00\x01" * 1000 + + +class TestGoogleAIProviderTTS: + """Test cases for GoogleAIProvider TTS functionality.""" + + @pytest.fixture + def mock_client(self): + """Create a mock Google AI client.""" + return MagicMock() + + @pytest.fixture + def provider(self, mock_client): + """Create a GoogleAIProvider instance with mock client.""" + return GoogleAIProvider(client=mock_client) + + @pytest.fixture + def tts_config(self): + """Create a basic TTS completion config.""" + return NativeCompletionConfig( + provider="google-native", + type="tts", + params={ + "model": "gemini-2.5-pro-preview-tts", + "voice": "Kore", + "language": "en-US", + }, + ) + + @pytest.fixture + def query_params(self): + """Create basic query parameters.""" + return QueryParams(input="Hello world") + + def test_tts_success_wav_default( + self, provider, mock_client, tts_config, query_params + ): + """Test successful TTS execution with default WAV format.""" + mock_response = mock_tts_google_response(audio_bytes=SAMPLE_PCM_BYTES) + mock_client.models.generate_content.return_value = mock_response + + result, error = provider.execute(tts_config, query_params, "Hello world") + + assert error is None + assert result is not None + assert result.response.output.type == "audio" + assert result.response.output.content.format == "base64" + assert result.response.output.content.mime_type == "audio/wav" + # Verify the base64 content decodes back to original bytes + decoded = base64.b64decode(result.response.output.content.value) + assert decoded == SAMPLE_PCM_BYTES + assert result.response.provider_response_id == "resp_tts_123" + assert result.response.model == "gemini-2.5-pro-preview-tts" + assert result.response.provider == "google-native" + assert result.usage.input_tokens == 10 + assert result.usage.total_tokens == 10 + + def test_tts_success_mp3_format( + self, provider, mock_client, tts_config, query_params + ): + """Test TTS with MP3 response format conversion.""" + tts_config.params["response_format"] = "mp3" + mock_response = mock_tts_google_response(audio_bytes=SAMPLE_PCM_BYTES) + mock_client.models.generate_content.return_value = mock_response + + fake_mp3_bytes = b"fake-mp3-content" + with patch( + "app.services.llm.providers.gai.convert_pcm_to_mp3", + return_value=(fake_mp3_bytes, None), + ) as mock_convert: + result, error = provider.execute(tts_config, query_params, "Hello world") + + assert error is None + assert result is not None + assert result.response.output.content.mime_type == "audio/mp3" + decoded = base64.b64decode(result.response.output.content.value) + assert decoded == fake_mp3_bytes + mock_convert.assert_called_once_with(SAMPLE_PCM_BYTES) + + def test_tts_success_ogg_format( + self, provider, mock_client, tts_config, query_params + ): + """Test TTS with OGG response format conversion.""" + tts_config.params["response_format"] = "ogg" + mock_response = mock_tts_google_response(audio_bytes=SAMPLE_PCM_BYTES) + mock_client.models.generate_content.return_value = mock_response + + fake_ogg_bytes = b"fake-ogg-content" + with patch( + "app.services.llm.providers.gai.convert_pcm_to_ogg", + return_value=(fake_ogg_bytes, None), + ) as mock_convert: + result, error = provider.execute(tts_config, query_params, "Hello world") + + assert error is None + assert result is not None + assert result.response.output.content.mime_type == "audio/ogg" + decoded = base64.b64decode(result.response.output.content.value) + assert decoded == fake_ogg_bytes + mock_convert.assert_called_once_with(SAMPLE_PCM_BYTES) + + def test_tts_mp3_conversion_failure( + self, provider, mock_client, tts_config, query_params + ): + """Test error when MP3 conversion fails.""" + tts_config.params["response_format"] = "mp3" + mock_response = mock_tts_google_response(audio_bytes=SAMPLE_PCM_BYTES) + mock_client.models.generate_content.return_value = mock_response + + with patch( + "app.services.llm.providers.gai.convert_pcm_to_mp3", + return_value=(None, "ffmpeg not found"), + ): + result, error = provider.execute(tts_config, query_params, "Hello world") + + assert result is None + assert "Failed to convert audio to MP3" in error + assert "ffmpeg not found" in error + + def test_tts_ogg_conversion_failure( + self, provider, mock_client, tts_config, query_params + ): + """Test error when OGG conversion fails.""" + tts_config.params["response_format"] = "ogg" + mock_response = mock_tts_google_response(audio_bytes=SAMPLE_PCM_BYTES) + mock_client.models.generate_content.return_value = mock_response + + with patch( + "app.services.llm.providers.gai.convert_pcm_to_ogg", + return_value=(None, "codec error"), + ): + result, error = provider.execute(tts_config, query_params, "Hello world") + + assert result is None + assert "Failed to convert audio to OGG" in error + + def test_tts_empty_input(self, provider, mock_client, tts_config, query_params): + """Test error when text input is empty.""" + result, error = provider.execute(tts_config, query_params, " ") + + assert result is None + assert error == "Text input cannot be empty" + mock_client.models.generate_content.assert_not_called() + + def test_tts_non_string_input( + self, provider, mock_client, tts_config, query_params + ): + """Test error when input is not a string.""" + result, error = provider.execute(tts_config, query_params, {"invalid": "data"}) + + assert result is None + assert "TTS requires text string as input" in error + + def test_tts_missing_model(self, provider, mock_client, query_params): + """Test error when model parameter is missing.""" + config = NativeCompletionConfig( + provider="google-native", + type="tts", + params={"voice": "Kore", "language": "en-US"}, + ) + + result, error = provider.execute(config, query_params, "Hello") + + assert result is None + assert error == "Missing 'model' in native params" + + def test_tts_missing_voice(self, provider, mock_client, query_params): + """Test error when voice parameter is missing.""" + config = NativeCompletionConfig( + provider="google-native", + type="tts", + params={"model": "gemini-2.5-pro-preview-tts", "language": "en-US"}, + ) + + result, error = provider.execute(config, query_params, "Hello") + + assert result is None + assert error == "Missing 'voice' in native params" + + def test_tts_missing_language(self, provider, mock_client, query_params): + """Test error when language parameter is missing.""" + config = NativeCompletionConfig( + provider="google-native", + type="tts", + params={"model": "gemini-2.5-pro-preview-tts", "voice": "Kore"}, + ) + + result, error = provider.execute(config, query_params, "Hello") + + assert result is None + assert error == "Missing 'language' in native params" + + def test_tts_missing_response_id( + self, provider, mock_client, tts_config, query_params + ): + """Test error when response has no response_id.""" + mock_response = mock_tts_google_response() + mock_response.response_id = None + mock_client.models.generate_content.return_value = mock_response + + result, error = provider.execute(tts_config, query_params, "Hello") + + assert result is None + assert error == "Google AI response missing response_id" + + def test_tts_missing_audio_data( + self, provider, mock_client, tts_config, query_params + ): + """Test error when response has no audio data in parts.""" + mock_response = mock_tts_google_response(audio_bytes=None) + mock_client.models.generate_content.return_value = mock_response + + result, error = provider.execute(tts_config, query_params, "Hello") + + assert result is None + assert error == "Google AI response missing audio data" + + def test_tts_empty_candidates( + self, provider, mock_client, tts_config, query_params + ): + """Test error when response has empty candidates list.""" + mock_response = mock_tts_google_response() + mock_response.candidates = [] + mock_client.models.generate_content.return_value = mock_response + + result, error = provider.execute(tts_config, query_params, "Hello") + + assert result is None + assert "Failed to extract audio from response" in error + + def test_tts_missing_inline_data( + self, provider, mock_client, tts_config, query_params + ): + """Test error when candidate part has no inline_data.""" + mock_response = mock_tts_google_response() + mock_response.candidates[0].content.parts[0].inline_data = None + mock_client.models.generate_content.return_value = mock_response + + result, error = provider.execute(tts_config, query_params, "Hello") + + assert result is None + assert "Failed to extract audio from response" in error + + def test_tts_no_usage_metadata( + self, provider, mock_client, tts_config, query_params + ): + """Test TTS response when usage_metadata is None (defaults to zeros).""" + mock_response = mock_tts_google_response(audio_bytes=SAMPLE_PCM_BYTES) + mock_response.usage_metadata = None + mock_client.models.generate_content.return_value = mock_response + + result, error = provider.execute(tts_config, query_params, "Hello") + + assert error is None + assert result is not None + assert result.usage.input_tokens == 0 + assert result.usage.output_tokens == 0 + assert result.usage.total_tokens == 0 + assert result.usage.reasoning_tokens == 0 + + def test_tts_include_provider_raw_response( + self, provider, mock_client, tts_config, query_params + ): + """Test TTS with include_provider_raw_response=True.""" + mock_response = mock_tts_google_response(audio_bytes=SAMPLE_PCM_BYTES) + mock_client.models.generate_content.return_value = mock_response + + result, error = provider.execute( + tts_config, query_params, "Hello", include_provider_raw_response=True + ) + + assert error is None + assert result is not None + assert result.provider_raw_response is not None + assert isinstance(result.provider_raw_response, dict) + + def test_tts_without_provider_raw_response( + self, provider, mock_client, tts_config, query_params + ): + """Test TTS without raw response (default).""" + mock_response = mock_tts_google_response(audio_bytes=SAMPLE_PCM_BYTES) + mock_client.models.generate_content.return_value = mock_response + + result, error = provider.execute(tts_config, query_params, "Hello") + + assert error is None + assert result.provider_raw_response is None + + def test_tts_with_director_notes( + self, provider, mock_client, tts_config, query_params + ): + """Test TTS with Gemini-specific director_notes parameter.""" + tts_config.params["provider_specific"] = { + "gemini": {"director_notes": "Speak in a cheerful tone"} + } + mock_response = mock_tts_google_response(audio_bytes=SAMPLE_PCM_BYTES) + mock_client.models.generate_content.return_value = mock_response + + result, error = provider.execute(tts_config, query_params, "Hello") + + assert error is None + assert result is not None + # Verify config was passed with system_instruction + call_args = mock_client.models.generate_content.call_args + config_arg = call_args[1]["config"] + assert config_arg.system_instruction == "Speak in a cheerful tone" + + def test_tts_without_director_notes( + self, provider, mock_client, tts_config, query_params + ): + """Test TTS without director_notes (no system_instruction in config).""" + mock_response = mock_tts_google_response(audio_bytes=SAMPLE_PCM_BYTES) + mock_client.models.generate_content.return_value = mock_response + + result, error = provider.execute(tts_config, query_params, "Hello") + + assert error is None + call_args = mock_client.models.generate_content.call_args + config_arg = call_args[1]["config"] + assert ( + not hasattr(config_arg, "system_instruction") + or config_arg.system_instruction is None + ) + + def test_tts_model_version_fallback( + self, provider, mock_client, tts_config, query_params + ): + """Test that model falls back to config model when model_version is None.""" + mock_response = mock_tts_google_response(audio_bytes=SAMPLE_PCM_BYTES) + mock_response.model_version = None + mock_client.models.generate_content.return_value = mock_response + + result, error = provider.execute(tts_config, query_params, "Hello") + + assert error is None + assert result.response.model == "gemini-2.5-pro-preview-tts" + + def test_tts_generic_exception( + self, provider, mock_client, tts_config, query_params + ): + """Test handling of unexpected exceptions.""" + mock_client.models.generate_content.side_effect = Exception("API unavailable") + + result, error = provider.execute(tts_config, query_params, "Hello") + + assert result is None + assert error == "Unexpected error occurred" + + def test_tts_type_error(self, provider, mock_client, tts_config, query_params): + """Test handling of TypeError (invalid parameters).""" + mock_client.models.generate_content.side_effect = TypeError( + "unexpected keyword argument" + ) + + result, error = provider.execute(tts_config, query_params, "Hello") + + assert result is None + assert "Invalid or unexpected parameter in Config" in error + + def test_tts_passes_correct_model_and_content( + self, provider, mock_client, tts_config, query_params + ): + """Test that the correct model and text content are passed to the API.""" + mock_response = mock_tts_google_response(audio_bytes=SAMPLE_PCM_BYTES) + mock_client.models.generate_content.return_value = mock_response + + result, error = provider.execute(tts_config, query_params, "Say this text") + + assert error is None + call_args = mock_client.models.generate_content.call_args + assert call_args[1]["model"] == "gemini-2.5-pro-preview-tts" + assert call_args[1]["contents"] == "Say this text" + + def test_tts_passes_correct_voice_config( + self, provider, mock_client, tts_config, query_params + ): + """Test that voice and language are correctly configured.""" + mock_response = mock_tts_google_response(audio_bytes=SAMPLE_PCM_BYTES) + mock_client.models.generate_content.return_value = mock_response + + result, error = provider.execute(tts_config, query_params, "Hello") + + assert error is None + call_args = mock_client.models.generate_content.call_args + config_arg = call_args[1]["config"] + assert config_arg.response_modalities == ["AUDIO"] + voice_name = ( + config_arg.speech_config.voice_config.prebuilt_voice_config.voice_name + ) + assert voice_name == "Kore" + assert config_arg.speech_config.language_code == "en-US" diff --git a/backend/app/tests/services/llm/test_input_resolver.py b/backend/app/tests/services/llm/test_input_resolver.py index 5443f8fe..ffc0b74b 100644 --- a/backend/app/tests/services/llm/test_input_resolver.py +++ b/backend/app/tests/services/llm/test_input_resolver.py @@ -12,7 +12,7 @@ import pytest from app.models.llm.request import TextInput, AudioInput, TextContent, AudioContent -from app.services.llm.input_resolver import ( +from app.utils import ( get_file_extension, resolve_input, resolve_audio_base64, diff --git a/backend/app/tests/services/llm/test_jobs.py b/backend/app/tests/services/llm/test_jobs.py index 27bb0384..60456e00 100644 --- a/backend/app/tests/services/llm/test_jobs.py +++ b/backend/app/tests/services/llm/test_jobs.py @@ -367,7 +367,7 @@ def test_exception_during_execution( result = self._execute_job(job_for_execution, db, request_data) assert not result["success"] - assert "Unexpected error occurred" in result["error"] + assert "Unexpected error during LLM execution" in result["error"] def test_exception_during_provider_retrieval( self, db, job_env, job_for_execution, request_data diff --git a/backend/app/utils.py b/backend/app/utils.py index 78877d35..37cd9705 100644 --- a/backend/app/utils.py +++ b/backend/app/utils.py @@ -1,6 +1,8 @@ +import base64 import functools as ft import ipaddress import logging +import tempfile from dataclasses import dataclass from datetime import datetime, timedelta, timezone from pathlib import Path @@ -401,3 +403,79 @@ def load_description(filename: Path) -> str: @load_description.register def _(filename: str) -> str: return load_description(Path(filename)) + + +# Input resolver functions moved from app.services.llm.input_resolver +def get_file_extension(mime_type: str) -> str: + """Map MIME type to file extension.""" + mime_to_ext = { + "audio/wav": ".wav", + "audio/wave": ".wav", + "audio/x-wav": ".wav", + "audio/mp3": ".mp3", + "audio/mpeg": ".mp3", + "audio/ogg": ".ogg", + "audio/flac": ".flac", + "audio/webm": ".webm", + "audio/mp4": ".mp4", + "audio/m4a": ".m4a", + } + return mime_to_ext.get(mime_type, ".audio") + + +def resolve_audio_base64(data: str, mime_type: str) -> tuple[str, str | None]: + """Decode base64 audio and write to temp file. Returns (file_path, error).""" + try: + audio_bytes = base64.b64decode(data) + except Exception as e: + return "", f"Invalid base64 audio data: {str(e)}" + + ext = get_file_extension(mime_type) + try: + with tempfile.NamedTemporaryFile( + suffix=ext, delete=False, prefix="audio_" + ) as tmp: + tmp.write(audio_bytes) + temp_path = tmp.name + + logger.info(f"[resolve_audio_base64] Wrote audio to temp file: {temp_path}") + return temp_path, None + except Exception as e: + return "", f"Failed to write audio to temp file: {str(e)}" + + +def resolve_input(query_input) -> tuple[str, str | None]: + """Resolve discriminated union input to content string. + + Args: + query_input: The input from QueryParams (TextInput or AudioInput) + + Returns: + (content_string, None) on success - for text returns content value, for audio returns temp file path + ("", error_message) on failure + """ + from app.models.llm.request import TextInput, AudioInput + + try: + if isinstance(query_input, TextInput): + return query_input.content.value, None + + elif isinstance(query_input, AudioInput): + # AudioInput content is base64-encoded audio + mime_type = query_input.content.mime_type or "audio/wav" + return resolve_audio_base64(query_input.content.value, mime_type) + + else: + return "", f"Unknown input type: {type(query_input)}" + + except Exception as e: + logger.error(f"[resolve_input] Failed to resolve input: {e}", exc_info=True) + return "", f"Failed to resolve input: {str(e)}" + + +def cleanup_temp_file(file_path: str) -> None: + """Clean up a temporary file if it exists.""" + try: + Path(file_path).unlink(missing_ok=True) + except Exception as e: + logger.warning(f"[cleanup_temp_file] Failed to delete temp file: {e}") diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 19819dec..18799eeb 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -37,6 +37,7 @@ dependencies = [ "redis>=5.0.0,<6.0.0", "flower>=2.0.1", "google-genai>=1.59.0", + "pydub>=0.25.1", ] [tool.uv] diff --git a/backend/uv.lock b/backend/uv.lock index 019f97f6..a37bfe63 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -227,6 +227,7 @@ dependencies = [ { name = "py-zerox" }, { name = "pydantic" }, { name = "pydantic-settings" }, + { name = "pydub" }, { name = "pyjwt" }, { name = "pytest" }, { name = "python-multipart" }, @@ -274,6 +275,7 @@ requires-dist = [ { name = "py-zerox", specifier = ">=0.0.7,<1.0.0" }, { name = "pydantic", specifier = ">2.0" }, { name = "pydantic-settings", specifier = ">=2.2.1,<3.0.0" }, + { name = "pydub", specifier = ">=0.25.1" }, { name = "pyjwt", specifier = ">=2.8.0,<3.0.0" }, { name = "pytest", specifier = ">=7.4.4" }, { name = "python-multipart", specifier = ">=0.0.22,<1.0.0" }, @@ -686,55 +688,55 @@ wheels = [ [[package]] name = "cryptography" -version = "46.0.5" +version = "46.0.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/60/04/ee2a9e8542e4fa2773b81771ff8349ff19cdd56b7258a0cc442639052edb/cryptography-46.0.5.tar.gz", hash = "sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d", size = 750064, upload-time = "2026-02-10T19:18:38.255Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/81/b0bb27f2ba931a65409c6b8a8b358a7f03c0e46eceacddff55f7c84b1f3b/cryptography-46.0.5-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:351695ada9ea9618b3500b490ad54c739860883df6c1f555e088eaf25b1bbaad", size = 7176289, upload-time = "2026-02-10T19:17:08.274Z" }, - { url = "https://files.pythonhosted.org/packages/ff/9e/6b4397a3e3d15123de3b1806ef342522393d50736c13b20ec4c9ea6693a6/cryptography-46.0.5-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c18ff11e86df2e28854939acde2d003f7984f721eba450b56a200ad90eeb0e6b", size = 4275637, upload-time = "2026-02-10T19:17:10.53Z" }, - { url = "https://files.pythonhosted.org/packages/63/e7/471ab61099a3920b0c77852ea3f0ea611c9702f651600397ac567848b897/cryptography-46.0.5-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d7e3d356b8cd4ea5aff04f129d5f66ebdc7b6f8eae802b93739ed520c47c79b", size = 4424742, upload-time = "2026-02-10T19:17:12.388Z" }, - { url = "https://files.pythonhosted.org/packages/37/53/a18500f270342d66bf7e4d9f091114e31e5ee9e7375a5aba2e85a91e0044/cryptography-46.0.5-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263", size = 4277528, upload-time = "2026-02-10T19:17:13.853Z" }, - { url = "https://files.pythonhosted.org/packages/22/29/c2e812ebc38c57b40e7c583895e73c8c5adb4d1e4a0cc4c5a4fdab2b1acc/cryptography-46.0.5-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:803812e111e75d1aa73690d2facc295eaefd4439be1023fefc4995eaea2af90d", size = 4947993, upload-time = "2026-02-10T19:17:15.618Z" }, - { url = "https://files.pythonhosted.org/packages/6b/e7/237155ae19a9023de7e30ec64e5d99a9431a567407ac21170a046d22a5a3/cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed", size = 4456855, upload-time = "2026-02-10T19:17:17.221Z" }, - { url = "https://files.pythonhosted.org/packages/2d/87/fc628a7ad85b81206738abbd213b07702bcbdada1dd43f72236ef3cffbb5/cryptography-46.0.5-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:f145bba11b878005c496e93e257c1e88f154d278d2638e6450d17e0f31e558d2", size = 3984635, upload-time = "2026-02-10T19:17:18.792Z" }, - { url = "https://files.pythonhosted.org/packages/84/29/65b55622bde135aedf4565dc509d99b560ee4095e56989e815f8fd2aa910/cryptography-46.0.5-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e9251e3be159d1020c4030bd2e5f84d6a43fe54b6c19c12f51cde9542a2817b2", size = 4277038, upload-time = "2026-02-10T19:17:20.256Z" }, - { url = "https://files.pythonhosted.org/packages/bc/36/45e76c68d7311432741faf1fbf7fac8a196a0a735ca21f504c75d37e2558/cryptography-46.0.5-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:47fb8a66058b80e509c47118ef8a75d14c455e81ac369050f20ba0d23e77fee0", size = 4912181, upload-time = "2026-02-10T19:17:21.825Z" }, - { url = "https://files.pythonhosted.org/packages/6d/1a/c1ba8fead184d6e3d5afcf03d569acac5ad063f3ac9fb7258af158f7e378/cryptography-46.0.5-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:4c3341037c136030cb46e4b1e17b7418ea4cbd9dd207e4a6f3b2b24e0d4ac731", size = 4456482, upload-time = "2026-02-10T19:17:25.133Z" }, - { url = "https://files.pythonhosted.org/packages/f9/e5/3fb22e37f66827ced3b902cf895e6a6bc1d095b5b26be26bd13c441fdf19/cryptography-46.0.5-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:890bcb4abd5a2d3f852196437129eb3667d62630333aacc13dfd470fad3aaa82", size = 4405497, upload-time = "2026-02-10T19:17:26.66Z" }, - { url = "https://files.pythonhosted.org/packages/1a/df/9d58bb32b1121a8a2f27383fabae4d63080c7ca60b9b5c88be742be04ee7/cryptography-46.0.5-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:80a8d7bfdf38f87ca30a5391c0c9ce4ed2926918e017c29ddf643d0ed2778ea1", size = 4667819, upload-time = "2026-02-10T19:17:28.569Z" }, - { url = "https://files.pythonhosted.org/packages/ea/ed/325d2a490c5e94038cdb0117da9397ece1f11201f425c4e9c57fe5b9f08b/cryptography-46.0.5-cp311-abi3-win32.whl", hash = "sha256:60ee7e19e95104d4c03871d7d7dfb3d22ef8a9b9c6778c94e1c8fcc8365afd48", size = 3028230, upload-time = "2026-02-10T19:17:30.518Z" }, - { url = "https://files.pythonhosted.org/packages/e9/5a/ac0f49e48063ab4255d9e3b79f5def51697fce1a95ea1370f03dc9db76f6/cryptography-46.0.5-cp311-abi3-win_amd64.whl", hash = "sha256:38946c54b16c885c72c4f59846be9743d699eee2b69b6988e0a00a01f46a61a4", size = 3480909, upload-time = "2026-02-10T19:17:32.083Z" }, - { url = "https://files.pythonhosted.org/packages/00/13/3d278bfa7a15a96b9dc22db5a12ad1e48a9eb3d40e1827ef66a5df75d0d0/cryptography-46.0.5-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:94a76daa32eb78d61339aff7952ea819b1734b46f73646a07decb40e5b3448e2", size = 7119287, upload-time = "2026-02-10T19:17:33.801Z" }, - { url = "https://files.pythonhosted.org/packages/67/c8/581a6702e14f0898a0848105cbefd20c058099e2c2d22ef4e476dfec75d7/cryptography-46.0.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5be7bf2fb40769e05739dd0046e7b26f9d4670badc7b032d6ce4db64dddc0678", size = 4265728, upload-time = "2026-02-10T19:17:35.569Z" }, - { url = "https://files.pythonhosted.org/packages/dd/4a/ba1a65ce8fc65435e5a849558379896c957870dd64fecea97b1ad5f46a37/cryptography-46.0.5-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe346b143ff9685e40192a4960938545c699054ba11d4f9029f94751e3f71d87", size = 4408287, upload-time = "2026-02-10T19:17:36.938Z" }, - { url = "https://files.pythonhosted.org/packages/f8/67/8ffdbf7b65ed1ac224d1c2df3943553766914a8ca718747ee3871da6107e/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c69fd885df7d089548a42d5ec05be26050ebcd2283d89b3d30676eb32ff87dee", size = 4270291, upload-time = "2026-02-10T19:17:38.748Z" }, - { url = "https://files.pythonhosted.org/packages/f8/e5/f52377ee93bc2f2bba55a41a886fd208c15276ffbd2569f2ddc89d50e2c5/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:8293f3dea7fc929ef7240796ba231413afa7b68ce38fd21da2995549f5961981", size = 4927539, upload-time = "2026-02-10T19:17:40.241Z" }, - { url = "https://files.pythonhosted.org/packages/3b/02/cfe39181b02419bbbbcf3abdd16c1c5c8541f03ca8bda240debc467d5a12/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:1abfdb89b41c3be0365328a410baa9df3ff8a9110fb75e7b52e66803ddabc9a9", size = 4442199, upload-time = "2026-02-10T19:17:41.789Z" }, - { url = "https://files.pythonhosted.org/packages/c0/96/2fcaeb4873e536cf71421a388a6c11b5bc846e986b2b069c79363dc1648e/cryptography-46.0.5-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:d66e421495fdb797610a08f43b05269e0a5ea7f5e652a89bfd5a7d3c1dee3648", size = 3960131, upload-time = "2026-02-10T19:17:43.379Z" }, - { url = "https://files.pythonhosted.org/packages/d8/d2/b27631f401ddd644e94c5cf33c9a4069f72011821cf3dc7309546b0642a0/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:4e817a8920bfbcff8940ecfd60f23d01836408242b30f1a708d93198393a80b4", size = 4270072, upload-time = "2026-02-10T19:17:45.481Z" }, - { url = "https://files.pythonhosted.org/packages/f4/a7/60d32b0370dae0b4ebe55ffa10e8599a2a59935b5ece1b9f06edb73abdeb/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:68f68d13f2e1cb95163fa3b4db4bf9a159a418f5f6e7242564fc75fcae667fd0", size = 4892170, upload-time = "2026-02-10T19:17:46.997Z" }, - { url = "https://files.pythonhosted.org/packages/d2/b9/cf73ddf8ef1164330eb0b199a589103c363afa0cf794218c24d524a58eab/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:a3d1fae9863299076f05cb8a778c467578262fae09f9dc0ee9b12eb4268ce663", size = 4441741, upload-time = "2026-02-10T19:17:48.661Z" }, - { url = "https://files.pythonhosted.org/packages/5f/eb/eee00b28c84c726fe8fa0158c65afe312d9c3b78d9d01daf700f1f6e37ff/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c4143987a42a2397f2fc3b4d7e3a7d313fbe684f67ff443999e803dd75a76826", size = 4396728, upload-time = "2026-02-10T19:17:50.058Z" }, - { url = "https://files.pythonhosted.org/packages/65/f4/6bc1a9ed5aef7145045114b75b77c2a8261b4d38717bd8dea111a63c3442/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7d731d4b107030987fd61a7f8ab512b25b53cef8f233a97379ede116f30eb67d", size = 4652001, upload-time = "2026-02-10T19:17:51.54Z" }, - { url = "https://files.pythonhosted.org/packages/86/ef/5d00ef966ddd71ac2e6951d278884a84a40ffbd88948ef0e294b214ae9e4/cryptography-46.0.5-cp314-cp314t-win32.whl", hash = "sha256:c3bcce8521d785d510b2aad26ae2c966092b7daa8f45dd8f44734a104dc0bc1a", size = 3003637, upload-time = "2026-02-10T19:17:52.997Z" }, - { url = "https://files.pythonhosted.org/packages/b7/57/f3f4160123da6d098db78350fdfd9705057aad21de7388eacb2401dceab9/cryptography-46.0.5-cp314-cp314t-win_amd64.whl", hash = "sha256:4d8ae8659ab18c65ced284993c2265910f6c9e650189d4e3f68445ef82a810e4", size = 3469487, upload-time = "2026-02-10T19:17:54.549Z" }, - { url = "https://files.pythonhosted.org/packages/e2/fa/a66aa722105ad6a458bebd64086ca2b72cdd361fed31763d20390f6f1389/cryptography-46.0.5-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:4108d4c09fbbf2789d0c926eb4152ae1760d5a2d97612b92d508d96c861e4d31", size = 7170514, upload-time = "2026-02-10T19:17:56.267Z" }, - { url = "https://files.pythonhosted.org/packages/0f/04/c85bdeab78c8bc77b701bf0d9bdcf514c044e18a46dcff330df5448631b0/cryptography-46.0.5-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1f30a86d2757199cb2d56e48cce14deddf1f9c95f1ef1b64ee91ea43fe2e18", size = 4275349, upload-time = "2026-02-10T19:17:58.419Z" }, - { url = "https://files.pythonhosted.org/packages/5c/32/9b87132a2f91ee7f5223b091dc963055503e9b442c98fc0b8a5ca765fab0/cryptography-46.0.5-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:039917b0dc418bb9f6edce8a906572d69e74bd330b0b3fea4f79dab7f8ddd235", size = 4420667, upload-time = "2026-02-10T19:18:00.619Z" }, - { url = "https://files.pythonhosted.org/packages/a1/a6/a7cb7010bec4b7c5692ca6f024150371b295ee1c108bdc1c400e4c44562b/cryptography-46.0.5-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ba2a27ff02f48193fc4daeadf8ad2590516fa3d0adeeb34336b96f7fa64c1e3a", size = 4276980, upload-time = "2026-02-10T19:18:02.379Z" }, - { url = "https://files.pythonhosted.org/packages/8e/7c/c4f45e0eeff9b91e3f12dbd0e165fcf2a38847288fcfd889deea99fb7b6d/cryptography-46.0.5-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:61aa400dce22cb001a98014f647dc21cda08f7915ceb95df0c9eaf84b4b6af76", size = 4939143, upload-time = "2026-02-10T19:18:03.964Z" }, - { url = "https://files.pythonhosted.org/packages/37/19/e1b8f964a834eddb44fa1b9a9976f4e414cbb7aa62809b6760c8803d22d1/cryptography-46.0.5-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ce58ba46e1bc2aac4f7d9290223cead56743fa6ab94a5d53292ffaac6a91614", size = 4453674, upload-time = "2026-02-10T19:18:05.588Z" }, - { url = "https://files.pythonhosted.org/packages/db/ed/db15d3956f65264ca204625597c410d420e26530c4e2943e05a0d2f24d51/cryptography-46.0.5-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:420d0e909050490d04359e7fdb5ed7e667ca5c3c402b809ae2563d7e66a92229", size = 3978801, upload-time = "2026-02-10T19:18:07.167Z" }, - { url = "https://files.pythonhosted.org/packages/41/e2/df40a31d82df0a70a0daf69791f91dbb70e47644c58581d654879b382d11/cryptography-46.0.5-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:582f5fcd2afa31622f317f80426a027f30dc792e9c80ffee87b993200ea115f1", size = 4276755, upload-time = "2026-02-10T19:18:09.813Z" }, - { url = "https://files.pythonhosted.org/packages/33/45/726809d1176959f4a896b86907b98ff4391a8aa29c0aaaf9450a8a10630e/cryptography-46.0.5-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:bfd56bb4b37ed4f330b82402f6f435845a5f5648edf1ad497da51a8452d5d62d", size = 4901539, upload-time = "2026-02-10T19:18:11.263Z" }, - { url = "https://files.pythonhosted.org/packages/99/0f/a3076874e9c88ecb2ecc31382f6e7c21b428ede6f55aafa1aa272613e3cd/cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c", size = 4452794, upload-time = "2026-02-10T19:18:12.914Z" }, - { url = "https://files.pythonhosted.org/packages/02/ef/ffeb542d3683d24194a38f66ca17c0a4b8bf10631feef44a7ef64e631b1a/cryptography-46.0.5-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9f16fbdf4da055efb21c22d81b89f155f02ba420558db21288b3d0035bafd5f4", size = 4404160, upload-time = "2026-02-10T19:18:14.375Z" }, - { url = "https://files.pythonhosted.org/packages/96/93/682d2b43c1d5f1406ed048f377c0fc9fc8f7b0447a478d5c65ab3d3a66eb/cryptography-46.0.5-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ced80795227d70549a411a4ab66e8ce307899fad2220ce5ab2f296e687eacde9", size = 4667123, upload-time = "2026-02-10T19:18:15.886Z" }, - { url = "https://files.pythonhosted.org/packages/45/2d/9c5f2926cb5300a8eefc3f4f0b3f3df39db7f7ce40c8365444c49363cbda/cryptography-46.0.5-cp38-abi3-win32.whl", hash = "sha256:02f547fce831f5096c9a567fd41bc12ca8f11df260959ecc7c3202555cc47a72", size = 3010220, upload-time = "2026-02-10T19:18:17.361Z" }, - { url = "https://files.pythonhosted.org/packages/48/ef/0c2f4a8e31018a986949d34a01115dd057bf536905dca38897bacd21fac3/cryptography-46.0.5-cp38-abi3-win_amd64.whl", hash = "sha256:556e106ee01aa13484ce9b0239bca667be5004efb0aabbed28d353df86445595", size = 3467050, upload-time = "2026-02-10T19:18:18.899Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/78/19/f748958276519adf6a0c1e79e7b8860b4830dda55ccdf29f2719b5fc499c/cryptography-46.0.4.tar.gz", hash = "sha256:bfd019f60f8abc2ed1b9be4ddc21cfef059c841d86d710bb69909a688cbb8f59", size = 749301, upload-time = "2026-01-28T00:24:37.379Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/99/157aae7949a5f30d51fcb1a9851e8ebd5c74bf99b5285d8bb4b8b9ee641e/cryptography-46.0.4-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:281526e865ed4166009e235afadf3a4c4cba6056f99336a99efba65336fd5485", size = 7173686, upload-time = "2026-01-28T00:23:07.515Z" }, + { url = "https://files.pythonhosted.org/packages/87/91/874b8910903159043b5c6a123b7e79c4559ddd1896e38967567942635778/cryptography-46.0.4-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5f14fba5bf6f4390d7ff8f086c566454bff0411f6d8aa7af79c88b6f9267aecc", size = 4275871, upload-time = "2026-01-28T00:23:09.439Z" }, + { url = "https://files.pythonhosted.org/packages/c0/35/690e809be77896111f5b195ede56e4b4ed0435b428c2f2b6d35046fbb5e8/cryptography-46.0.4-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:47bcd19517e6389132f76e2d5303ded6cf3f78903da2158a671be8de024f4cd0", size = 4423124, upload-time = "2026-01-28T00:23:11.529Z" }, + { url = "https://files.pythonhosted.org/packages/1a/5b/a26407d4f79d61ca4bebaa9213feafdd8806dc69d3d290ce24996d3cfe43/cryptography-46.0.4-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:01df4f50f314fbe7009f54046e908d1754f19d0c6d3070df1e6268c5a4af09fa", size = 4277090, upload-time = "2026-01-28T00:23:13.123Z" }, + { url = "https://files.pythonhosted.org/packages/0c/d8/4bb7aec442a9049827aa34cee1aa83803e528fa55da9a9d45d01d1bb933e/cryptography-46.0.4-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5aa3e463596b0087b3da0dbe2b2487e9fc261d25da85754e30e3b40637d61f81", size = 4947652, upload-time = "2026-01-28T00:23:14.554Z" }, + { url = "https://files.pythonhosted.org/packages/2b/08/f83e2e0814248b844265802d081f2fac2f1cbe6cd258e72ba14ff006823a/cryptography-46.0.4-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0a9ad24359fee86f131836a9ac3bffc9329e956624a2d379b613f8f8abaf5255", size = 4455157, upload-time = "2026-01-28T00:23:16.443Z" }, + { url = "https://files.pythonhosted.org/packages/0a/05/19d849cf4096448779d2dcc9bb27d097457dac36f7273ffa875a93b5884c/cryptography-46.0.4-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:dc1272e25ef673efe72f2096e92ae39dea1a1a450dd44918b15351f72c5a168e", size = 3981078, upload-time = "2026-01-28T00:23:17.838Z" }, + { url = "https://files.pythonhosted.org/packages/e6/89/f7bac81d66ba7cde867a743ea5b37537b32b5c633c473002b26a226f703f/cryptography-46.0.4-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:de0f5f4ec8711ebc555f54735d4c673fc34b65c44283895f1a08c2b49d2fd99c", size = 4276213, upload-time = "2026-01-28T00:23:19.257Z" }, + { url = "https://files.pythonhosted.org/packages/da/9f/7133e41f24edd827020ad21b068736e792bc68eecf66d93c924ad4719fb3/cryptography-46.0.4-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:eeeb2e33d8dbcccc34d64651f00a98cb41b2dc69cef866771a5717e6734dfa32", size = 4912190, upload-time = "2026-01-28T00:23:21.244Z" }, + { url = "https://files.pythonhosted.org/packages/a6/f7/6d43cbaddf6f65b24816e4af187d211f0bc536a29961f69faedc48501d8e/cryptography-46.0.4-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:3d425eacbc9aceafd2cb429e42f4e5d5633c6f873f5e567077043ef1b9bbf616", size = 4454641, upload-time = "2026-01-28T00:23:22.866Z" }, + { url = "https://files.pythonhosted.org/packages/9e/4f/ebd0473ad656a0ac912a16bd07db0f5d85184924e14fc88feecae2492834/cryptography-46.0.4-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:91627ebf691d1ea3976a031b61fb7bac1ccd745afa03602275dda443e11c8de0", size = 4405159, upload-time = "2026-01-28T00:23:25.278Z" }, + { url = "https://files.pythonhosted.org/packages/d1/f7/7923886f32dc47e27adeff8246e976d77258fd2aa3efdd1754e4e323bf49/cryptography-46.0.4-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2d08bc22efd73e8854b0b7caff402d735b354862f1145d7be3b9c0f740fef6a0", size = 4666059, upload-time = "2026-01-28T00:23:26.766Z" }, + { url = "https://files.pythonhosted.org/packages/eb/a7/0fca0fd3591dffc297278a61813d7f661a14243dd60f499a7a5b48acb52a/cryptography-46.0.4-cp311-abi3-win32.whl", hash = "sha256:82a62483daf20b8134f6e92898da70d04d0ef9a75829d732ea1018678185f4f5", size = 3026378, upload-time = "2026-01-28T00:23:28.317Z" }, + { url = "https://files.pythonhosted.org/packages/2d/12/652c84b6f9873f0909374864a57b003686c642ea48c84d6c7e2c515e6da5/cryptography-46.0.4-cp311-abi3-win_amd64.whl", hash = "sha256:6225d3ebe26a55dbc8ead5ad1265c0403552a63336499564675b29eb3184c09b", size = 3478614, upload-time = "2026-01-28T00:23:30.275Z" }, + { url = "https://files.pythonhosted.org/packages/b9/27/542b029f293a5cce59349d799d4d8484b3b1654a7b9a0585c266e974a488/cryptography-46.0.4-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:485e2b65d25ec0d901bca7bcae0f53b00133bf3173916d8e421f6fddde103908", size = 7116417, upload-time = "2026-01-28T00:23:31.958Z" }, + { url = "https://files.pythonhosted.org/packages/f8/f5/559c25b77f40b6bf828eabaf988efb8b0e17b573545edb503368ca0a2a03/cryptography-46.0.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:078e5f06bd2fa5aea5a324f2a09f914b1484f1d0c2a4d6a8a28c74e72f65f2da", size = 4264508, upload-time = "2026-01-28T00:23:34.264Z" }, + { url = "https://files.pythonhosted.org/packages/49/a1/551fa162d33074b660dc35c9bc3616fefa21a0e8c1edd27b92559902e408/cryptography-46.0.4-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dce1e4f068f03008da7fa51cc7abc6ddc5e5de3e3d1550334eaf8393982a5829", size = 4409080, upload-time = "2026-01-28T00:23:35.793Z" }, + { url = "https://files.pythonhosted.org/packages/b0/6a/4d8d129a755f5d6df1bbee69ea2f35ebfa954fa1847690d1db2e8bca46a5/cryptography-46.0.4-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:2067461c80271f422ee7bdbe79b9b4be54a5162e90345f86a23445a0cf3fd8a2", size = 4270039, upload-time = "2026-01-28T00:23:37.263Z" }, + { url = "https://files.pythonhosted.org/packages/4c/f5/ed3fcddd0a5e39321e595e144615399e47e7c153a1fb8c4862aec3151ff9/cryptography-46.0.4-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:c92010b58a51196a5f41c3795190203ac52edfd5dc3ff99149b4659eba9d2085", size = 4926748, upload-time = "2026-01-28T00:23:38.884Z" }, + { url = "https://files.pythonhosted.org/packages/43/ae/9f03d5f0c0c00e85ecb34f06d3b79599f20630e4db91b8a6e56e8f83d410/cryptography-46.0.4-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:829c2b12bbc5428ab02d6b7f7e9bbfd53e33efd6672d21341f2177470171ad8b", size = 4442307, upload-time = "2026-01-28T00:23:40.56Z" }, + { url = "https://files.pythonhosted.org/packages/8b/22/e0f9f2dae8040695103369cf2283ef9ac8abe4d51f68710bec2afd232609/cryptography-46.0.4-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:62217ba44bf81b30abaeda1488686a04a702a261e26f87db51ff61d9d3510abd", size = 3959253, upload-time = "2026-01-28T00:23:42.827Z" }, + { url = "https://files.pythonhosted.org/packages/01/5b/6a43fcccc51dae4d101ac7d378a8724d1ba3de628a24e11bf2f4f43cba4d/cryptography-46.0.4-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:9c2da296c8d3415b93e6053f5a728649a87a48ce084a9aaf51d6e46c87c7f2d2", size = 4269372, upload-time = "2026-01-28T00:23:44.655Z" }, + { url = "https://files.pythonhosted.org/packages/17/b7/0f6b8c1dd0779df2b526e78978ff00462355e31c0a6f6cff8a3e99889c90/cryptography-46.0.4-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:9b34d8ba84454641a6bf4d6762d15847ecbd85c1316c0a7984e6e4e9f748ec2e", size = 4891908, upload-time = "2026-01-28T00:23:46.48Z" }, + { url = "https://files.pythonhosted.org/packages/83/17/259409b8349aa10535358807a472c6a695cf84f106022268d31cea2b6c97/cryptography-46.0.4-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:df4a817fa7138dd0c96c8c8c20f04b8aaa1fac3bbf610913dcad8ea82e1bfd3f", size = 4441254, upload-time = "2026-01-28T00:23:48.403Z" }, + { url = "https://files.pythonhosted.org/packages/9c/fe/e4a1b0c989b00cee5ffa0764401767e2d1cf59f45530963b894129fd5dce/cryptography-46.0.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b1de0ebf7587f28f9190b9cb526e901bf448c9e6a99655d2b07fff60e8212a82", size = 4396520, upload-time = "2026-01-28T00:23:50.26Z" }, + { url = "https://files.pythonhosted.org/packages/b3/81/ba8fd9657d27076eb40d6a2f941b23429a3c3d2f56f5a921d6b936a27bc9/cryptography-46.0.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9b4d17bc7bd7cdd98e3af40b441feaea4c68225e2eb2341026c84511ad246c0c", size = 4651479, upload-time = "2026-01-28T00:23:51.674Z" }, + { url = "https://files.pythonhosted.org/packages/00/03/0de4ed43c71c31e4fe954edd50b9d28d658fef56555eba7641696370a8e2/cryptography-46.0.4-cp314-cp314t-win32.whl", hash = "sha256:c411f16275b0dea722d76544a61d6421e2cc829ad76eec79280dbdc9ddf50061", size = 3001986, upload-time = "2026-01-28T00:23:53.485Z" }, + { url = "https://files.pythonhosted.org/packages/5c/70/81830b59df7682917d7a10f833c4dab2a5574cd664e86d18139f2b421329/cryptography-46.0.4-cp314-cp314t-win_amd64.whl", hash = "sha256:728fedc529efc1439eb6107b677f7f7558adab4553ef8669f0d02d42d7b959a7", size = 3468288, upload-time = "2026-01-28T00:23:55.09Z" }, + { url = "https://files.pythonhosted.org/packages/56/f7/f648fdbb61d0d45902d3f374217451385edc7e7768d1b03ff1d0e5ffc17b/cryptography-46.0.4-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a9556ba711f7c23f77b151d5798f3ac44a13455cc68db7697a1096e6d0563cab", size = 7169583, upload-time = "2026-01-28T00:23:56.558Z" }, + { url = "https://files.pythonhosted.org/packages/d8/cc/8f3224cbb2a928de7298d6ed4790f5ebc48114e02bdc9559196bfb12435d/cryptography-46.0.4-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8bf75b0259e87fa70bddc0b8b4078b76e7fd512fd9afae6c1193bcf440a4dbef", size = 4275419, upload-time = "2026-01-28T00:23:58.364Z" }, + { url = "https://files.pythonhosted.org/packages/17/43/4a18faa7a872d00e4264855134ba82d23546c850a70ff209e04ee200e76f/cryptography-46.0.4-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3c268a3490df22270955966ba236d6bc4a8f9b6e4ffddb78aac535f1a5ea471d", size = 4419058, upload-time = "2026-01-28T00:23:59.867Z" }, + { url = "https://files.pythonhosted.org/packages/ee/64/6651969409821d791ba12346a124f55e1b76f66a819254ae840a965d4b9c/cryptography-46.0.4-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:812815182f6a0c1d49a37893a303b44eaac827d7f0d582cecfc81b6427f22973", size = 4278151, upload-time = "2026-01-28T00:24:01.731Z" }, + { url = "https://files.pythonhosted.org/packages/20/0b/a7fce65ee08c3c02f7a8310cc090a732344066b990ac63a9dfd0a655d321/cryptography-46.0.4-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:a90e43e3ef65e6dcf969dfe3bb40cbf5aef0d523dff95bfa24256be172a845f4", size = 4939441, upload-time = "2026-01-28T00:24:03.175Z" }, + { url = "https://files.pythonhosted.org/packages/db/a7/20c5701e2cd3e1dfd7a19d2290c522a5f435dd30957d431dcb531d0f1413/cryptography-46.0.4-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a05177ff6296644ef2876fce50518dffb5bcdf903c85250974fc8bc85d54c0af", size = 4451617, upload-time = "2026-01-28T00:24:05.403Z" }, + { url = "https://files.pythonhosted.org/packages/00/dc/3e16030ea9aa47b63af6524c354933b4fb0e352257c792c4deeb0edae367/cryptography-46.0.4-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:daa392191f626d50f1b136c9b4cf08af69ca8279d110ea24f5c2700054d2e263", size = 3977774, upload-time = "2026-01-28T00:24:06.851Z" }, + { url = "https://files.pythonhosted.org/packages/42/c8/ad93f14118252717b465880368721c963975ac4b941b7ef88f3c56bf2897/cryptography-46.0.4-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e07ea39c5b048e085f15923511d8121e4a9dc45cee4e3b970ca4f0d338f23095", size = 4277008, upload-time = "2026-01-28T00:24:08.926Z" }, + { url = "https://files.pythonhosted.org/packages/00/cf/89c99698151c00a4631fbfcfcf459d308213ac29e321b0ff44ceeeac82f1/cryptography-46.0.4-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:d5a45ddc256f492ce42a4e35879c5e5528c09cd9ad12420828c972951d8e016b", size = 4903339, upload-time = "2026-01-28T00:24:12.009Z" }, + { url = "https://files.pythonhosted.org/packages/03/c3/c90a2cb358de4ac9309b26acf49b2a100957e1ff5cc1e98e6c4996576710/cryptography-46.0.4-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:6bb5157bf6a350e5b28aee23beb2d84ae6f5be390b2f8ee7ea179cda077e1019", size = 4451216, upload-time = "2026-01-28T00:24:13.975Z" }, + { url = "https://files.pythonhosted.org/packages/96/2c/8d7f4171388a10208671e181ca43cdc0e596d8259ebacbbcfbd16de593da/cryptography-46.0.4-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:dd5aba870a2c40f87a3af043e0dee7d9eb02d4aff88a797b48f2b43eff8c3ab4", size = 4404299, upload-time = "2026-01-28T00:24:16.169Z" }, + { url = "https://files.pythonhosted.org/packages/e9/23/cbb2036e450980f65c6e0a173b73a56ff3bccd8998965dea5cc9ddd424a5/cryptography-46.0.4-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:93d8291da8d71024379ab2cb0b5c57915300155ad42e07f76bea6ad838d7e59b", size = 4664837, upload-time = "2026-01-28T00:24:17.629Z" }, + { url = "https://files.pythonhosted.org/packages/0a/21/f7433d18fe6d5845329cbdc597e30caf983229c7a245bcf54afecc555938/cryptography-46.0.4-cp38-abi3-win32.whl", hash = "sha256:0563655cb3c6d05fb2afe693340bc050c30f9f34e15763361cf08e94749401fc", size = 3009779, upload-time = "2026-01-28T00:24:20.198Z" }, + { url = "https://files.pythonhosted.org/packages/3a/6a/bd2e7caa2facffedf172a45c1a02e551e6d7d4828658c9a245516a598d94/cryptography-46.0.4-cp38-abi3-win_amd64.whl", hash = "sha256:fa0900b9ef9c49728887d1576fd8d9e7e3ea872fa9b25ef9b64888adc434e976", size = 3466633, upload-time = "2026-01-28T00:24:21.851Z" }, ] [[package]] @@ -2597,6 +2599,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/60/5d4751ba3f4a40a6891f24eec885f51afd78d208498268c734e256fb13c4/pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809", size = 51880, upload-time = "2025-11-10T14:25:45.546Z" }, ] +[[package]] +name = "pydub" +version = "0.25.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/9a/e6bca0eed82db26562c73b5076539a4a08d3cffd19c3cc5913a3e61145fd/pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f", size = 38326, upload-time = "2021-03-10T02:09:54.659Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/53/d78dc063216e62fc55f6b2eebb447f6a4b0a59f55c8406376f76bf959b08/pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6", size = 32327, upload-time = "2021-03-10T02:09:53.503Z" }, +] + [[package]] name = "pygments" version = "2.19.2"