diff --git a/pyproject.toml b/pyproject.toml index 85f6fe03..eb1bfdf4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,4 +97,4 @@ include-package-data = true exclude = [ "veadk/integrations/ve_faas/template/*", "veadk/integrations/ve_faas/web_template/*" -] +] \ No newline at end of file diff --git a/veadk/agent.py b/veadk/agent.py index 71329c8d..e67344aa 100644 --- a/veadk/agent.py +++ b/veadk/agent.py @@ -15,7 +15,7 @@ from __future__ import annotations import os -from typing import Dict, Optional, Union, Literal +from typing import Dict, Literal, Optional, Union # If user didn't set LITELLM_LOCAL_MODEL_COST_MAP, set it to True # to enable local model cost map. @@ -231,6 +231,16 @@ def model_post_init(self, __context: Any) -> None: ) self.tools.append(load_knowledgebase_tool) + if self.knowledgebase.enable_profile: + logger.debug( + f"Knowledgebase {self.knowledgebase.index} profile enabled" + ) + from veadk.tools.builtin_tools.load_kb_queries import ( + load_kb_queries, + ) + + self.tools.append(load_kb_queries) + if self.long_term_memory is not None: from google.adk.tools import load_memory @@ -333,7 +343,11 @@ def load_skills(self): f"- name: {skill.name}\n- description: {skill.description}\n\n" ) - if self.skills_mode not in ["skills_sandbox", "aio_sandbox", "local"]: + if self.skills_mode not in [ + "skills_sandbox", + "aio_sandbox", + "local", + ]: raise ValueError( f"Unsupported skill mode {self.skills_mode}, use `skills_sandbox`, `aio_sandbox` or `local` instead." ) diff --git a/veadk/knowledgebase/knowledgebase.py b/veadk/knowledgebase/knowledgebase.py index ba105ca8..98890aad 100644 --- a/veadk/knowledgebase/knowledgebase.py +++ b/veadk/knowledgebase/knowledgebase.py @@ -20,6 +20,7 @@ from veadk.knowledgebase.backends.base_backend import BaseKnowledgebaseBackend from veadk.knowledgebase.entry import KnowledgebaseEntry +from veadk.knowledgebase.types import KnowledgebaseProfile from veadk.utils.logger import get_logger logger = get_logger(__name__) @@ -86,84 +87,6 @@ class KnowledgeBase(BaseModel): Notes: Please ensure that you have set the embedding-related configurations in environment variables. - - Examples: - ### Simple backend - - Create a local knowledgebase: - - ```python - from veadk import Agent, Runner - from veadk.knowledgebase.knowledgebase import KnowledgeBase - from veadk.memory.short_term_memory import ShortTermMemory - - app_name = "veadk_playground_app" - user_id = "veadk_playground_user" - session_id = "veadk_playground_session" - - - knowledgebase = KnowledgeBase(backend="opensearch", app_name=app_name) - knowledgebase.add_from_files(files=[knowledgebase_file]) - - agent = Agent(knowledgebase=knowledgebase) - - runner = Runner( - agent=agent, - short_term_memory=ShortTermMemory(), - app_name=app_name, - user_id=user_id, - ) - - response = await runner.run( - messages="Tell me the secret of green.", session_id=session_id - ) - print(response) - ``` - - ### Initialize knowledgebase with metadata - - ```python - from veadk.knowledgebase import KnowledgeBase - - knowledgebase = KnowledgeBase( - name="user_data", - description="A knowledgebase contains user hobbies.", - index="my_app", - ) - ``` - - ### Initialize knowledgebase with backend instance - - ```python - import veadk.config # noqa - - from veadk.knowledgebase import KnowledgeBase - from veadk.knowledgebase.backends.in_memory_backend import InMemoryKnowledgeBackend - - backend = InMemoryKnowledgeBackend( - index="my_app", - embedding_config=..., - ) - - knowledgebase = KnowledgeBase( - name="user_data", - description="A knowledgebase contains user hobbies.", - backend=backend, - ) - ``` - - ### Initialize knowledgebase with backend config - - ```python - from veadk.knowledgebase import KnowledgeBase - - knowledgebase = KnowledgeBase( - name="user_data", - description="A knowledgebase contains user hobbies.", - backend="local", - backend_config={"index": "user_app"}, - ) - ``` """ name: str = "user_knowledgebase" @@ -183,6 +106,8 @@ class KnowledgeBase(BaseModel): index: str = "" + enable_profile: bool = False + def model_post_init(self, __context: Any) -> None: if isinstance(self.backend, BaseKnowledgebaseBackend): self._backend = self.backend @@ -312,3 +237,65 @@ def __getattr__(self, name) -> Callable: For example, knowledgebase.delete(...) -> self._backend.delete(...) """ return getattr(self._backend, name) + + async def generate_profiles(self, files: list[str], profile_path: str = ""): + """Generate knowledgebase profiles. + + Args: + files (list[str]): The list of files. + name (str): The name of the knowledgebase. + profile_path (str, optional): The path to store the generated profiles. If empty, the profiles will be stored in a default path. + + Returns: + list[KnowledgebaseProfile]: A list of knowledgebase profiles. + """ + import json + from pathlib import Path + + from veadk import Agent, Runner + from veadk.utils.misc import write_string_to_file + + file_contents = [Path(file).read_text() for file in files] + + agent = Agent( + name="profile_generator", + model_name="deepseek-v3-2-251201", + # model_extra_config={ + # "extra_body": {"thinking": {"type": "disabled"}}, + # }, + description="A generator for generating knowledgebase profiles for the given files.", + instruction='Generate JSON-formatted profile for the given file content. The corresponding language should be consistent with the file content. Respond ONLY with a JSON object containing the capitalized fields. Format: {"name": "", "description": "", "tags": [], "keywords": []} (3-5 tags, 3-5 keywords)', + output_schema=KnowledgebaseProfile, + ) + runner = Runner(agent=agent) + + profiles = [] + for idx, file_content in enumerate(file_contents): + response = await runner.run( + messages="file content: " + file_content, + session_id=f"profile_{idx}", + ) + try: + profiles.append(KnowledgebaseProfile(**json.loads(response))) + except json.JSONDecodeError: + logger.error( + f"Failed to parse JSON response for file {files[idx]}: {response}. Skip for this file." + ) + continue + + logger.debug(f"Generated {len(profiles)} profiles: {profiles}.") + + for idx, profile in enumerate(profiles): + if not profile_path: + profile_path = f"./profiles/knowledgebase/profiles_{self.index}" + write_string_to_file( + profile_path + f"/profile_{profile.name}.json", + json.dumps(profile.model_dump(), indent=4, ensure_ascii=False), + ) + + profile_names = [profile.name for profile in profiles] + + write_string_to_file( + profile_path + "/profile_list.json", + json.dumps(profile_names, indent=4, ensure_ascii=False), + ) diff --git a/veadk/knowledgebase/types.py b/veadk/knowledgebase/types.py new file mode 100644 index 00000000..62aac0ec --- /dev/null +++ b/veadk/knowledgebase/types.py @@ -0,0 +1,29 @@ +# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pydantic import BaseModel, Field + + +class KnowledgebaseProfile(BaseModel): + name: str = Field(description="The name of the knowledgebase.") + + description: str = Field(description="The description of the knowledgebase.") + + tags: list[str] = Field( + description="Some tags of the knowledgebase. It represents the category of the knowledgebase. About 3-5 tags should be provided." + ) + + keywords: list[str] = Field( + description="Recommanded query keywords of the knowledgebase. About 3-5 keywords should be provided." + ) diff --git a/veadk/tools/builtin_tools/load_kb_queries.py b/veadk/tools/builtin_tools/load_kb_queries.py new file mode 100644 index 00000000..aa99b66c --- /dev/null +++ b/veadk/tools/builtin_tools/load_kb_queries.py @@ -0,0 +1,67 @@ +# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from pathlib import Path + +from google.adk.tools.tool_context import ToolContext + +from veadk import Agent +from veadk.utils.logger import get_logger + +logger = get_logger(__name__) + + +def load_profile(profile_path: Path) -> dict: + # read file content + with open(profile_path, "r") as f: + content = f.read() + return json.loads(content) + + +def load_kb_queries(profile_names: list[str], tool_context: ToolContext) -> list[str]: + """Load recommanded knowledgebase queries based on the knowledgebase profiles. + + Args: + profile_names: The list of knowledgebase profile names to load the profile for. + + Returns: + A list of knowledgebase profile results. + """ + logger.info(f"Loading knowledgebase profiles: {profile_names}") + + if not isinstance(tool_context._invocation_context.agent, Agent): + logger.warning("Agent is not VeADK Agent, cannot load knowledgebase profile") + return ["Error: Agent is not VeADK Agent, cannot load knowledgebase profile"] + + if not tool_context._invocation_context.agent.knowledgebase: + logger.warning("Agent has no knowledgebase, cannot load knowledgebase profile") + return ["Error: Agent has no knowledgebase, cannot load knowledgebase profile"] + + index = tool_context._invocation_context.agent.knowledgebase.index + + recommanded_queries = [] + for profile_name in profile_names: + profile_path = Path( + f"./profiles/knowledgebase/profiles_{index}/profile_{profile_name}.json" + ) + profile = load_profile(profile_path) + recommanded_queries.extend(profile.get("keywords", [])) + logger.debug( + f"Loaded keywords from profile {profile_name}: {profile.get('keywords', [])}" + ) + logger.debug( + f"Loaded total keywords for knowledgebase {index}: {recommanded_queries}" + ) + return recommanded_queries diff --git a/veadk/tools/builtin_tools/load_knowledgebase.py b/veadk/tools/builtin_tools/load_knowledgebase.py index 2639ba60..52cad6ec 100644 --- a/veadk/tools/builtin_tools/load_knowledgebase.py +++ b/veadk/tools/builtin_tools/load_knowledgebase.py @@ -14,6 +14,8 @@ from __future__ import annotations +import json + from google.adk.models.llm_request import LlmRequest from google.adk.tools.function_tool import FunctionTool from google.adk.tools.tool_context import ToolContext @@ -23,6 +25,7 @@ from veadk.knowledgebase import KnowledgeBase from veadk.knowledgebase.entry import KnowledgebaseEntry +from veadk.tools.builtin_tools.load_kb_queries import load_profile from veadk.utils.logger import get_logger logger = get_logger(__name__) @@ -70,6 +73,34 @@ async def process_llm_request( await super().process_llm_request( tool_context=tool_context, llm_request=llm_request ) + + index = self.knowledgebase.index + if self.knowledgebase.enable_profile: + from pathlib import Path + + profile_names = [] + profile_descriptions = [] + + with open( + f"./profiles/knowledgebase/profiles_{index}/profile_list.json", + "r", + ) as f: + profile_names = json.load(f) + + for profile_name in profile_names: + profile_descriptions.append( + load_profile( + Path( + f"./profiles/knowledgebase/profiles_{index}/profile_{profile_name}.json" + ), + )["description"] + ) + + profiles_text = "\n".join( + f"- profile_name: {name}\n profile_description: {profile_descriptions[idx]}" + for idx, name in enumerate(profile_names) + ) + # Tell the model about the knowledgebase. llm_request.append_instructions( [ @@ -77,9 +108,22 @@ async def process_llm_request( You have a knowledgebase (knowledegebase name is `{self.knowledgebase.name}`, knowledgebase description is `{self.knowledgebase.description}`). You can use it to answer questions. If any questions need you to look up the knowledgebase, you should call load_knowledgebase function with a query. """ - ] + ], ) + if self.knowledgebase.enable_profile: + llm_request.append_instructions( + [ + f""" +The knowledgebase is divided into the following profiles: + +{profiles_text} + +You should choose some profiles which are relevant to the user question. Before load the knowledgebase, you must call `load_kb_queries` to load the recommanded queries of the knowledgebase profiles. You should generate final knowledgebase queries based on the user question and recommanded queries. +""" + ] + ) + async def load_knowledgebase( self, query: str, tool_context: ToolContext ) -> LoadKnowledgebaseResponse: diff --git a/veadk/utils/misc.py b/veadk/utils/misc.py index 50963548..cc027495 100644 --- a/veadk/utils/misc.py +++ b/veadk/utils/misc.py @@ -217,3 +217,13 @@ async def upload_to_files_api( max_wait_seconds=max_wait_seconds, ) return file.id + + +def write_string_to_file(file_path: str, content: str): + dir_path = os.path.dirname(file_path) + + if dir_path: + os.makedirs(dir_path, exist_ok=True) + + with open(file_path, "w", encoding="utf-8") as f: + f.write(content)