Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 40 additions & 5 deletions backend/app/services/llm_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,16 @@ def __init__(
async def _get_client(self) -> httpx.AsyncClient:
"""Get or create HTTP client."""
if self._client is None or self._client.is_closed:
self._client = httpx.AsyncClient(timeout=self.timeout, follow_redirects=True)
# Kimi For Coding requires a Coding Agent User-Agent
headers = {}
if self.base_url and "kimi.com/coding" in self.base_url:
headers["User-Agent"] = "Kimi-Code/1.0"
headers["x-kimi-code"] = "true"
self._client = httpx.AsyncClient(
timeout=self.timeout,
follow_redirects=True,
headers=headers
)
return self._client

def _get_headers(self) -> dict[str, str]:
Expand All @@ -230,6 +239,17 @@ def _normalize_base_url(self) -> str:
url = url[: -len("/chat/completions")]
return url

def _get_chat_endpoint(self) -> str:
"""Get the chat completions endpoint path.

Some custom endpoints (like api.kimi.com/coding/) use different paths.
"""
base = self._normalize_base_url()
# Special handling for kimi.com/coding endpoint which uses /v1/chat/completions
if "kimi.com/coding" in base and not base.endswith("/v1"):
return f"{base}/v1/chat/completions"
return f"{base}/chat/completions"

def _build_payload(
self,
messages: list[LLMMessage],
Expand All @@ -240,6 +260,11 @@ def _build_payload(
**kwargs: Any,
) -> dict[str, Any]:
"""Build request payload."""
# Kimi k2.5 models only support temperature=1
model_name = (self.model or "").lower()
if "kimi-k2.5" in model_name or "kimi-k2-5" in model_name:
temperature = 1.0

payload: dict[str, Any] = {
"model": self.model,
"messages": [m.to_openai_format() for m in messages],
Expand Down Expand Up @@ -403,7 +428,7 @@ async def complete(
**kwargs: Any,
) -> LLMResponse:
"""Non-streaming completion."""
url = f"{self._normalize_base_url()}/chat/completions"
url = self._get_chat_endpoint()
payload = self._build_payload(messages, tools, temperature, max_tokens, stream=False, **kwargs)

client = await self._get_client()
Expand Down Expand Up @@ -440,7 +465,7 @@ async def stream(
**kwargs: Any,
) -> LLMResponse:
"""Streaming completion."""
url = f"{self._normalize_base_url()}/chat/completions"
url = self._get_chat_endpoint()
payload = self._build_payload(messages, tools, temperature, max_tokens, stream=True, **kwargs)

full_content = ""
Expand Down Expand Up @@ -1345,11 +1370,14 @@ async def _get_client(self) -> httpx.AsyncClient:
return self._client

def _get_headers(self) -> dict[str, str]:
return {
headers = {
"Content-Type": "application/json",
"x-api-key": self.api_key,
"anthropic-version": self.API_VERSION,
}
# Kimi for Coding doesn't need anthropic-version header
if self.base_url and "kimi.com" not in self.base_url:
headers["anthropic-version"] = self.API_VERSION
return headers

def _build_payload(
self,
Expand Down Expand Up @@ -1725,6 +1753,13 @@ class ProviderSpec:
default_base_url="https://api.moonshot.cn/v1",
default_max_tokens=8192,
),
"kimi-coding": ProviderSpec(
provider="kimi-coding",
display_name="Kimi for Coding",
protocol="anthropic",
default_base_url="https://api.kimi.com/coding",
default_max_tokens=32768,
),
"vllm": ProviderSpec(
provider="vllm",
display_name="vLLM",
Expand Down