Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
653 changes: 653 additions & 0 deletions fastdeploy/input/base_processor.py

Large diffs are not rendered by default.

495 changes: 19 additions & 476 deletions fastdeploy/input/ernie4_5_processor.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def __init__(
self.tool_parser_dict = dict()
self.decode_status = dict()
self.model_status_dict = dict()
self.tokenizer_type = "ernie4_5"
self._load_tokenizer()

# Generation config
Expand Down
21 changes: 9 additions & 12 deletions fastdeploy/input/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,28 +82,25 @@ def create_processor(self):
except Exception as e:
logger.info(f"Plugin input processor not available ({e}), using built-in processor")
if not self.model_config.enable_mm:
if not ErnieArchitectures.contains_ernie_arch(architecture):
if not envs.ENABLE_V1_DATA_PROCESSOR:
from fastdeploy.input.text_processor import DataProcessor
else:
from fastdeploy.input.v1.text_processor import DataProcessor
if not envs.ENABLE_V1_DATA_PROCESSOR:
from fastdeploy.input.text_processor import TextProcessor

self.processor = DataProcessor(
tokenizer_type = "ernie4_5" if ErnieArchitectures.contains_ernie_arch(architecture) else "auto"
self.processor = TextProcessor(
model_name_or_path=self.model_name_or_path,
tokenizer_type=tokenizer_type,
reasoning_parser_obj=reasoning_parser_obj,
tool_parser_obj=tool_parser_obj,
Comment on lines +88 to 93
Copy link

Copilot AI Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里新增了根据架构选择 tokenizer_type(ernie4_5 vs auto)并走 TextProcessor 的分支,但当前单测只覆盖了“非 Ernie、非多模态”的路径。建议补一个用例覆盖 Ernie 架构(例如 architecture 为 ERNIE4.5 相关字符串)时 tokenizer_type 被设置为 ernie4_5,并验证 TextProcessor 的构造参数。

Copilot uses AI. Check for mistakes.
)
else:
if not envs.ENABLE_V1_DATA_PROCESSOR:
from fastdeploy.input.ernie4_5_processor import (
Ernie4_5Processor,
)
if not ErnieArchitectures.contains_ernie_arch(architecture):
from fastdeploy.input.v1.text_processor import DataProcessor
else:
from fastdeploy.input.v1.ernie4_5_processor import (
Ernie4_5Processor,
Ernie4_5Processor as DataProcessor,
)

self.processor = Ernie4_5Processor(
self.processor = DataProcessor(
Comment on lines +85 to +103
Copy link

Copilot AI Mar 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PR 描述目前未填写 Motivation/Modifications/Usage/Tests 等关键信息,且标题建议按模板加空格写成“[Optimization] Merge Text processor”。建议补充:为何要合并 Text processor、兼容性/行为差异(如 HF tokenizer streaming 返回值变化)、以及相关测试/回归验证方式。

Copilot uses AI. Check for mistakes.
model_name_or_path=self.model_name_or_path,
reasoning_parser_obj=reasoning_parser_obj,
tool_parser_obj=tool_parser_obj,
Expand Down
Loading
Loading