Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions api/config/embedder.litellm.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"embedder_ollama": {
"client_class": "OllamaClient",
"model_kwargs": {
"model": "nomic-embed-text"
}
},
"embedder": {
"client_class": "LiteLLMClient",
"initialize_kwargs": {
"api_key": "${LITELLM_API_KEY}",
"base_url": "${LITELLM_BASE_URL}"
},
"batch_size": 10,
Comment on lines +9 to +14
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The initialize_kwargs block explicitly passes api_key and base_url using environment variable placeholders. However, if these environment variables are not set, the configuration loader (replace_env_placeholders in api/config.py) will leave the literal placeholder strings "${LITELLM_API_KEY}" and "${LITELLM_BASE_URL}" intact. This will cause LiteLLMClient to initialize with these invalid literal strings instead of falling back to its built-in defaults or environment variable lookups.

Since LiteLLMClient already natively handles retrieving LITELLM_API_KEY and LITELLM_BASE_URL from the environment (with sensible fallbacks like "dummy" and "http://localhost:4000"), you can safely remove the initialize_kwargs block entirely to make the configuration more robust and less redundant.

    "client_class": "LiteLLMClient",
    "batch_size": 10,

"model_kwargs": {
"model": "nomic-embed-text"
}
},
"retriever": {
"top_k": 20
},
"text_splitter": {
"split_by": "word",
"chunk_size": 350,
"chunk_overlap": 100
}
}
220 changes: 220 additions & 0 deletions api/config/generator.litellm.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
{
"default_provider": "litellm",
"providers": {
"dashscope": {
"default_model": "qwen-plus",
"supportsCustomModel": true,
"models": {
"qwen-plus": {
"temperature": 0.7,
"top_p": 0.8
},
"qwen-turbo": {
"temperature": 0.7,
"top_p": 0.8
},
"deepseek-r1": {
"temperature": 0.7,
"top_p": 0.8
}
}
},
"google": {
"default_model": "gemini-2.5-flash",
"supportsCustomModel": true,
"models": {
"gemini-2.5-flash": {
"temperature": 1.0,
"top_p": 0.8,
"top_k": 20
},
"gemini-2.5-flash-lite": {
"temperature": 1.0,
"top_p": 0.8,
"top_k": 20
},
"gemini-2.5-pro": {
"temperature": 1.0,
"top_p": 0.8,
"top_k": 20
}
}
},
"openai": {
"default_model": "gpt-5-nano",
"supportsCustomModel": true,
"models": {
"gpt-5": {
"temperature": 1.0
},
"gpt-5-nano": {
"temperature": 1.0
},
"gpt-5-mini": {
"temperature": 1.0
},
"gpt-4o": {
"temperature": 0.7,
"top_p": 0.8
},
"gpt-4.1": {
"temperature": 0.7,
"top_p": 0.8
},
"o1": {
"temperature": 0.7,
"top_p": 0.8
},
"o3": {
"temperature": 1.0
},
"o4-mini": {
"temperature": 1.0
}
}
},
"litellm": {
"default_model": "qwen3:1.7b",
"supportsCustomModel": true,
"models": {
"qwen3:1.7b": {
"temperature": 0.7,
"top_p": 0.8,
"num_ctx": 32000
},
"llama3:8b": {
"temperature": 0.7,
"top_p": 0.8,
"num_ctx": 8000
},
"qwen3:8b": {
"temperature": 0.7,
"top_p": 0.8,
"num_ctx": 32000
}
Comment on lines +80 to +94
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The litellm provider models include the num_ctx parameter. Since LiteLLMClient inherits from OpenAIClient and utilizes the standard openai Python SDK under the hood, passing non-standard parameters like num_ctx directly in model_kwargs will cause a TypeError (unexpected keyword argument) client-side when calling chat.completions.create.

Additionally, the context window size (num_ctx) is typically configured on the LiteLLM server side rather than per-request. Removing num_ctx and correcting the indentation to match the rest of the file (10 spaces for properties) resolves this runtime risk and keeps the configuration clean.

        "qwen3:1.7b": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "llama3:8b": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "qwen3:8b": {
          "temperature": 0.7,
          "top_p": 0.8
        }

}
},
"openrouter": {
"default_model": "openai/gpt-5-nano",
"supportsCustomModel": true,
"models": {
"openai/gpt-5-nano": {
"temperature": 0.7,
"top_p": 0.8
},
"openai/gpt-4o": {
"temperature": 0.7,
"top_p": 0.8
},
"deepseek/deepseek-r1": {
"temperature": 0.7,
"top_p": 0.8
},
"openai/gpt-4.1": {
"temperature": 0.7,
"top_p": 0.8
},
"openai/o1": {
"temperature": 0.7,
"top_p": 0.8
},
"openai/o3": {
"temperature": 1.0
},
"openai/o4-mini": {
"temperature": 1.0
},
"anthropic/claude-3.7-sonnet": {
"temperature": 0.7,
"top_p": 0.8
},
"anthropic/claude-3.5-sonnet": {
"temperature": 0.7,
"top_p": 0.8
}
}
},
"ollama": {
"default_model": "qwen3:1.7b",
"supportsCustomModel": true,
"models": {
"qwen3:1.7b": {
"options": {
"temperature": 0.7,
"top_p": 0.8,
"num_ctx": 32000
}
},
"llama3:8b": {
"options": {
"temperature": 0.7,
"top_p": 0.8,
"num_ctx": 8000
}
},
"qwen3:8b": {
"options": {
"temperature": 0.7,
"top_p": 0.8,
"num_ctx": 32000
}
}
}
},
"bedrock": {
"client_class": "BedrockClient",
"default_model": "anthropic.claude-3-sonnet-20240229-v1:0",
"supportsCustomModel": true,
"models": {
"anthropic.claude-3-sonnet-20240229-v1:0": {
"temperature": 0.7,
"top_p": 0.8
},
"anthropic.claude-3-haiku-20240307-v1:0": {
"temperature": 0.7,
"top_p": 0.8
},
"anthropic.claude-3-opus-20240229-v1:0": {
"temperature": 0.7,
"top_p": 0.8
},
"amazon.titan-text-express-v1": {
"temperature": 0.7,
"top_p": 0.8
},
"cohere.command-r-v1:0": {
"temperature": 0.7,
"top_p": 0.8
},
"ai21.j2-ultra-v1": {
"temperature": 0.7,
"top_p": 0.8
}
}
},
"azure": {
"client_class": "AzureAIClient",
"default_model": "gpt-4o",
"supportsCustomModel": true,
"models": {
"gpt-4o": {
"temperature": 0.7,
"top_p": 0.8
},
"gpt-4": {
"temperature": 0.7,
"top_p": 0.8
},
"gpt-35-turbo": {
"temperature": 0.7,
"top_p": 0.8
},
"gpt-4-turbo": {
"temperature": 0.7,
"top_p": 0.8
}
}
}
}
}

3 changes: 3 additions & 0 deletions docker-compose-litellm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ services:
volumes:
- ~/.adalflow:/root/.adalflow # Persist repository and embedding data
- ./api/logs:/app/api/logs # Persist log files across container restarts
# LiteLLM-specific config overrides
- ./api/config/generator.litellm.json:/app/api/config/generator.json:ro
- ./api/config/embedder.litellm.json:/app/api/config/embedder.json:ro
# Resource limits for docker-compose up (not Swarm mode)
mem_limit: 6g
mem_reservation: 2g
Expand Down
Loading