volcengine · yaozheng-fang · Jan 17, 2026 · Jan 17, 2026 · Jan 17, 2026 · Jan 17, 2026
diff --git a/pyproject.toml b/pyproject.toml
@@ -97,4 +97,4 @@ include-package-data = true
 exclude = [
     "veadk/integrations/ve_faas/template/*",
     "veadk/integrations/ve_faas/web_template/*"
-]
+]
diff --git a/veadk/agent.py b/veadk/agent.py
@@ -15,7 +15,7 @@
 from __future__ import annotations
 
 import os
-from typing import Dict, Optional, Union, Literal
+from typing import Dict, Literal, Optional, Union
 
 # If user didn't set LITELLM_LOCAL_MODEL_COST_MAP, set it to True
 # to enable local model cost map.
@@ -231,6 +231,16 @@ def model_post_init(self, __context: Any) -> None:
             )
             self.tools.append(load_knowledgebase_tool)
 
+            if self.knowledgebase.enable_profile:
+                logger.debug(
+                    f"Knowledgebase {self.knowledgebase.index} profile enabled"
+                )
+                from veadk.tools.builtin_tools.load_kb_queries import (
+                    load_kb_queries,
+                )
+
+                self.tools.append(load_kb_queries)
+
         if self.long_term_memory is not None:
             from google.adk.tools import load_memory
 
@@ -333,7 +343,11 @@ def load_skills(self):
                     f"- name: {skill.name}\n- description: {skill.description}\n\n"
                 )
 
-            if self.skills_mode not in ["skills_sandbox", "aio_sandbox", "local"]:
+            if self.skills_mode not in [
+                "skills_sandbox",
+                "aio_sandbox",
+                "local",
+            ]:
                 raise ValueError(
                     f"Unsupported skill mode {self.skills_mode}, use `skills_sandbox`, `aio_sandbox` or `local` instead."
                 )

diff --git a/veadk/knowledgebase/knowledgebase.py b/veadk/knowledgebase/knowledgebase.py
@@ -20,6 +20,7 @@
 
 from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend
 from veadk.knowledgebase.entry import KnowledgebaseEntry
+from veadk.knowledgebase.types import KnowledgebaseProfile
 from veadk.utils.logger import get_logger
 
 logger = get_logger(__name__)
@@ -86,84 +87,6 @@ class KnowledgeBase(BaseModel):
 
     Notes:
         Please ensure that you have set the embedding-related configurations in environment variables.
-
-    Examples:
-        ### Simple backend
-
-        Create a local knowledgebase:
-
-        ```python
-        from veadk import Agent, Runner
-        from veadk.knowledgebase.knowledgebase import KnowledgeBase
-        from veadk.memory.short_term_memory import ShortTermMemory
-
-        app_name = "veadk_playground_app"
-        user_id = "veadk_playground_user"
-        session_id = "veadk_playground_session"
-
-
-        knowledgebase = KnowledgeBase(backend="opensearch", app_name=app_name)
-        knowledgebase.add_from_files(files=[knowledgebase_file])
-
-        agent = Agent(knowledgebase=knowledgebase)
-
-        runner = Runner(
-            agent=agent,
-            short_term_memory=ShortTermMemory(),
-            app_name=app_name,
-            user_id=user_id,
-        )
-
-        response = await runner.run(
-            messages="Tell me the secret of green.", session_id=session_id
-        )
-        print(response)
-        ```
-
-        ### Initialize knowledgebase with metadata
-
-        ```python
-        from veadk.knowledgebase import KnowledgeBase
-
-        knowledgebase = KnowledgeBase(
-            name="user_data",
-            description="A knowledgebase contains user hobbies.",
-            index="my_app",
-        )
-        ```
-
-        ### Initialize knowledgebase with backend instance
-
-        ```python
-        import veadk.config  # noqa
-
-        from veadk.knowledgebase import KnowledgeBase
-        from veadk.knowledgebase.backends.in_memory_backend import InMemoryKnowledgeBackend
-
-        backend = InMemoryKnowledgeBackend(
-            index="my_app",
-            embedding_config=...,
-        )
-
-        knowledgebase = KnowledgeBase(
-            name="user_data",
-            description="A knowledgebase contains user hobbies.",
-            backend=backend,
-        )
-        ```
-
-        ### Initialize knowledgebase with backend config
-
-        ```python
-        from veadk.knowledgebase import KnowledgeBase
-
-        knowledgebase = KnowledgeBase(
-            name="user_data",
-            description="A knowledgebase contains user hobbies.",
-            backend="local",
-            backend_config={"index": "user_app"},
-        )
-        ```
     """
 
     name: str = "user_knowledgebase"
@@ -183,6 +106,8 @@ class KnowledgeBase(BaseModel):
 
     index: str = ""
 
+    enable_profile: bool = False
+
     def model_post_init(self, __context: Any) -> None:
         if isinstance(self.backend, BaseKnowledgebaseBackend):
             self._backend = self.backend
@@ -312,3 +237,65 @@ def __getattr__(self, name) -> Callable:
         For example, knowledgebase.delete(...) -> self._backend.delete(...)
         """
         return getattr(self._backend, name)
+
+    async def generate_profiles(self, files: list[str], profile_path: str = ""):
+        """Generate knowledgebase profiles.
+
+        Args:
+            files (list[str]): The list of files.
+            name (str): The name of the knowledgebase.
+            profile_path (str, optional): The path to store the generated profiles. If empty, the profiles will be stored in a default path.
+
+        Returns:
+            list[KnowledgebaseProfile]: A list of knowledgebase profiles.
+        """
+        import json
+        from pathlib import Path
+
+        from veadk import Agent, Runner
+        from veadk.utils.misc import write_string_to_file
+
+        file_contents = [Path(file).read_text() for file in files]
+
+        agent = Agent(
+            name="profile_generator",
+            model_name="deepseek-v3-2-251201",
+            # model_extra_config={
+            #     "extra_body": {"thinking": {"type": "disabled"}},
+            # },
+            description="A generator for generating knowledgebase profiles for the given files.",
+            instruction='Generate JSON-formatted profile for the given file content. The corresponding language should be consistent with the file content. Respond ONLY with a JSON object containing the capitalized fields. Format: {"name": "", "description": "", "tags": [], "keywords": []} (3-5 tags, 3-5 keywords)',
+            output_schema=KnowledgebaseProfile,
+        )
+        runner = Runner(agent=agent)
+
+        profiles = []
+        for idx, file_content in enumerate(file_contents):
+            response = await runner.run(
+                messages="file content: " + file_content,
+                session_id=f"profile_{idx}",
+            )
+            try:
+                profiles.append(KnowledgebaseProfile(**json.loads(response)))
+            except json.JSONDecodeError:
+                logger.error(
+                    f"Failed to parse JSON response for file {files[idx]}: {response}. Skip for this file."
+                )
+                continue
+
+        logger.debug(f"Generated {len(profiles)} profiles: {profiles}.")
+
+        for idx, profile in enumerate(profiles):
+            if not profile_path:
+                profile_path = f"./profiles/knowledgebase/profiles_{self.index}"
+            write_string_to_file(
+                profile_path + f"/profile_{profile.name}.json",
+                json.dumps(profile.model_dump(), indent=4, ensure_ascii=False),
+            )
+
+        profile_names = [profile.name for profile in profiles]
+
+        write_string_to_file(
+            profile_path + "/profile_list.json",
+            json.dumps(profile_names, indent=4, ensure_ascii=False),
+        )
diff --git a/veadk/knowledgebase/types.py b/veadk/knowledgebase/types.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pydantic import BaseModel, Field
+
+
+class KnowledgebaseProfile(BaseModel):
+    name: str = Field(description="The name of the knowledgebase.")
+
+    description: str = Field(description="The description of the knowledgebase.")
+
+    tags: list[str] = Field(
+        description="Some tags of the knowledgebase. It represents the category of the knowledgebase. About 3-5 tags should be provided."
+    )
+
+    keywords: list[str] = Field(
+        description="Recommanded query keywords of the knowledgebase. About 3-5 keywords should be provided."
+    )
diff --git a/veadk/tools/builtin_tools/load_kb_queries.py b/veadk/tools/builtin_tools/load_kb_queries.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+from pathlib import Path
+
+from google.adk.tools.tool_context import ToolContext
+
+from veadk import Agent
+from veadk.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+def load_profile(profile_path: Path) -> dict:
+    # read file content
+    with open(profile_path, "r") as f:
+        content = f.read()
+    return json.loads(content)
+
+
+def load_kb_queries(profile_names: list[str], tool_context: ToolContext) -> list[str]:
+    """Load recommanded knowledgebase queries based on the knowledgebase profiles.
+
+    Args:
+        profile_names: The list of knowledgebase profile names to load the profile for.
+
+    Returns:
+    A list of knowledgebase profile results.
+    """
+    logger.info(f"Loading knowledgebase profiles: {profile_names}")
+
+    if not isinstance(tool_context._invocation_context.agent, Agent):
+        logger.warning("Agent is not VeADK Agent, cannot load knowledgebase profile")
+        return ["Error: Agent is not VeADK Agent, cannot load knowledgebase profile"]
+
+    if not tool_context._invocation_context.agent.knowledgebase:
+        logger.warning("Agent has no knowledgebase, cannot load knowledgebase profile")
+        return ["Error: Agent has no knowledgebase, cannot load knowledgebase profile"]
+
+    index = tool_context._invocation_context.agent.knowledgebase.index
+
+    recommanded_queries = []
+    for profile_name in profile_names:
+        profile_path = Path(
+            f"./profiles/knowledgebase/profiles_{index}/profile_{profile_name}.json"
+        )
+        profile = load_profile(profile_path)
+        recommanded_queries.extend(profile.get("keywords", []))
+        logger.debug(
+            f"Loaded keywords from profile {profile_name}: {profile.get('keywords', [])}"
+        )
+    logger.debug(
+        f"Loaded total keywords for knowledgebase {index}: {recommanded_queries}"
+    )
+    return recommanded_queries
diff --git a/veadk/tools/builtin_tools/load_knowledgebase.py b/veadk/tools/builtin_tools/load_knowledgebase.py
@@ -14,6 +14,8 @@
 
 from __future__ import annotations
 
+import json
+
 from google.adk.models.llm_request import LlmRequest
 from google.adk.tools.function_tool import FunctionTool
 from google.adk.tools.tool_context import ToolContext
@@ -23,6 +25,7 @@
 
 from veadk.knowledgebase import KnowledgeBase
 from veadk.knowledgebase.entry import KnowledgebaseEntry
+from veadk.tools.builtin_tools.load_kb_queries import load_profile
 from veadk.utils.logger import get_logger
 
 logger = get_logger(__name__)
@@ -70,16 +73,57 @@ async def process_llm_request(
         await super().process_llm_request(
             tool_context=tool_context, llm_request=llm_request
         )
+
+        index = self.knowledgebase.index
+        if self.knowledgebase.enable_profile:
+            from pathlib import Path
+
+            profile_names = []
+            profile_descriptions = []
+
+            with open(
+                f"./profiles/knowledgebase/profiles_{index}/profile_list.json",
+                "r",
+            ) as f:
+                profile_names = json.load(f)
+
+            for profile_name in profile_names:
+                profile_descriptions.append(
+                    load_profile(
+                        Path(
+                            f"./profiles/knowledgebase/profiles_{index}/profile_{profile_name}.json"
+                        ),
+                    )["description"]
+                )
+
+            profiles_text = "\n".join(
+                f"- profile_name: {name}\n  profile_description: {profile_descriptions[idx]}"
+                for idx, name in enumerate(profile_names)
+            )
+
         # Tell the model about the knowledgebase.
         llm_request.append_instructions(
             [
                 f"""
 You have a knowledgebase (knowledegebase name is `{self.knowledgebase.name}`, knowledgebase description is `{self.knowledgebase.description}`). You can use it to answer questions. If any questions need
 you to look up the knowledgebase, you should call load_knowledgebase function with a query.
 """
-            ]
+            ],
         )
 
+        if self.knowledgebase.enable_profile:
+            llm_request.append_instructions(
+                [
+                    f"""
+The knowledgebase is divided into the following profiles: 
+
+{profiles_text}
+
+You should choose some profiles which are relevant to the user question. Before load the knowledgebase, you must call `load_kb_queries` to load the recommanded queries of the knowledgebase profiles. You should generate final knowledgebase queries based on the user question and recommanded queries.
+"""
+                ]
+            )
+
     async def load_knowledgebase(
         self, query: str, tool_context: ToolContext
     ) -> LoadKnowledgebaseResponse: