diff --git a/lightllm/server/api_cli.py b/lightllm/server/api_cli.py index 1bdf8f342..369c2232d 100644 --- a/lightllm/server/api_cli.py +++ b/lightllm/server/api_cli.py @@ -1,4 +1,15 @@ import argparse +import json + + +def _json_dict(value: str) -> dict: + try: + parsed = json.loads(value) + except json.JSONDecodeError as e: + raise argparse.ArgumentTypeError(f"invalid JSON object: {e}") from e + if not isinstance(parsed, dict): + raise argparse.ArgumentTypeError("value must be a JSON object") + return parsed def make_argument_parser() -> argparse.ArgumentParser: @@ -202,6 +213,16 @@ def make_argument_parser() -> argparse.ArgumentParser: "- /test/chat_template/tool_chat_template_deepseekr1.jinja" ), ) + parser.add_argument( + "--default_chat_template_kwargs", + "--default-chat-template-kwargs", + type=_json_dict, + default=None, + help=( + "Default JSON kwargs passed to tokenizer.apply_chat_template for every chat request. " + 'For example: \'{"preserve_thinking": true}\'. Request chat_template_kwargs override these values.' + ), + ) parser.add_argument( "--running_max_req_size", type=int, default=256, help="the max size for forward requests in the same time" diff --git a/lightllm/server/api_openai.py b/lightllm/server/api_openai.py index 0d934c44c..570ac54b5 100644 --- a/lightllm/server/api_openai.py +++ b/lightllm/server/api_openai.py @@ -12,7 +12,7 @@ from lightllm.server.reasoning_parser import ReasoningParser from .function_call_parser import TOOLS_TAG_LIST, FunctionCallParser, ToolCallItem -from .build_prompt import build_prompt, init_tokenizer +from .build_prompt import build_prompt, get_effective_chat_template_kwargs, init_tokenizer asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) import ujson as json @@ -163,13 +163,14 @@ def _is_force_thinking_mode(request: ChatCompletionRequest) -> bool: reasoning_parser = get_env_start_args().reasoning_parser if not reasoning_parser: return False + chat_template_kwargs = get_effective_chat_template_kwargs(request) if reasoning_parser in ["qwen3-thinking", "gpt-oss", "minimax"]: return True if reasoning_parser in ["deepseek-v3"]: - return request.chat_template_kwargs is not None and request.chat_template_kwargs.get("thinking") is True + return chat_template_kwargs.get("thinking") is True if reasoning_parser in ["qwen3", "glm45", "nano_v3", "interns1", "gemma4"]: # qwen3, glm45, nano_v3, interns1, and gemma4 are reasoning by default; - return not request.chat_template_kwargs or request.chat_template_kwargs.get("enable_thinking", True) is True + return chat_template_kwargs.get("enable_thinking", True) is True return True # default diff --git a/lightllm/server/build_prompt.py b/lightllm/server/build_prompt.py index 54d22a0d0..1c6cc5884 100644 --- a/lightllm/server/build_prompt.py +++ b/lightllm/server/build_prompt.py @@ -4,6 +4,7 @@ from lightllm.utils.log_utils import init_logger from functools import lru_cache from lightllm.utils.config_utils import get_model_type_v1 +from lightllm.utils.envs_utils import get_env_start_args logger = init_logger(__name__) @@ -126,6 +127,16 @@ def _normalize_multimodal_content_types(messages: list) -> None: part["type"] = "audio" +def get_effective_chat_template_kwargs(request) -> dict: + kwargs = {} + default_kwargs = getattr(get_env_start_args(), "default_chat_template_kwargs", None) + if default_kwargs: + kwargs.update(default_kwargs) + if request.chat_template_kwargs: + kwargs.update(request.chat_template_kwargs) + return kwargs + + async def build_prompt(request, tools) -> str: # pydantic格式转成dict, 否则,当根据tokenizer_config.json拼template时,Jinja判断无法识别 messages = [m.model_dump(by_alias=True, exclude_none=True) for m in request.messages] @@ -141,8 +152,9 @@ async def build_prompt(request, tools) -> str: if request.role_settings: kwargs["role_setting"] = request.role_settings - if request.chat_template_kwargs: - kwargs.update(request.chat_template_kwargs) + chat_template_kwargs = get_effective_chat_template_kwargs(request) + if chat_template_kwargs: + kwargs.update(chat_template_kwargs) # 修复一些parser类型是默认打开thinking,但是 tokenizer有时候不知道打开了thinking。导致 # 构建的reasoning parser 和 tokenizer 的行为不对齐导致的问题。 diff --git a/lightllm/server/core/objs/start_args_type.py b/lightllm/server/core/objs/start_args_type.py index 40c802815..68402102f 100644 --- a/lightllm/server/core/objs/start_args_type.py +++ b/lightllm/server/core/objs/start_args_type.py @@ -1,5 +1,5 @@ from dataclasses import dataclass, field -from typing import List, Optional, Tuple +from typing import Dict, List, Optional, Tuple # 只是为了更好的编程提示 @@ -57,6 +57,7 @@ class StartArgs: }, ) chat_template: Optional[str] = field(default=None) + default_chat_template_kwargs: Optional[Dict] = field(default=None) running_max_req_size: int = field(default=512) tp: int = field(default=1) dp: int = field(default=1)