diff --git a/README.md b/README.md index 7e26dbd..43adeca 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ StackOne AI provides a unified interface for accessing various SaaS tools throug - Glob pattern filtering with patterns like `"hris_*"` and exclusions `"!hris_delete_*"` - Provider and action filtering - Multi-account support +- **Semantic Search**: AI-powered tool discovery using natural language queries - **Utility Tools** (Beta): Dynamic tool discovery and execution based on natural language queries - Integration with popular AI frameworks: - OpenAI Functions @@ -325,6 +326,57 @@ execute_tool = utility_tools.get_tool("tool_execute") result = execute_tool.call(toolName="hris_list_employees", params={"limit": 10}) ``` +## Semantic Search + +Semantic search enables tool discovery using natural language instead of exact keyword matching. It understands intent and synonyms, so queries like "onboard new hire" or "check my to-do list" resolve to the right StackOne actions. + +**How it works:** Your query is matched against all StackOne actions using semantic vector search. Results are automatically filtered to only the connectors available in your linked accounts, so you only get tools you can actually use. + +### `search_tools()` — Recommended + +High-level method that returns a `Tools` collection ready for any framework: + +```python +from stackone_ai import StackOneToolSet + +toolset = StackOneToolSet() + +# Natural language search — no need to know exact tool names +tools = toolset.search_tools("manage employee records", top_k=5) + +# Use with any framework +langchain_tools = tools.to_langchain() + +# Filter by connector +tools = toolset.search_tools("create time off request", connector="bamboohr", top_k=3) +``` + +### `search_action_names()` — Lightweight + +Returns action names and similarity scores without fetching full tool definitions. Useful for inspecting results before committing: + +```python +results = toolset.search_action_names("time off requests", top_k=5) +for r in results: + print(f"{r.action_name} ({r.connector_key}): {r.similarity_score:.2f}") +``` + +### Utility Tools with Semantic Search + +For agent loops using `tool_search` / `tool_execute`, pass `semantic_client` to upgrade from local keyword matching to semantic search: + +```python +tools = toolset.fetch_tools() +utility = tools.utility_tools(semantic_client=toolset.semantic_client) + +search_tool = utility.get_tool("tool_search") +results = search_tool.call(query="onboard a new team member", limit=5) +``` + +> `tool_search` queries the full backend catalog, so make sure `fetch_tools()` covers the actions you expect to discover. + +See [Semantic Search Example](examples/semantic_search_example.py) for complete patterns including OpenAI and LangChain integration. + ## Examples For more examples, check out the [examples/](examples/) directory: @@ -335,6 +387,7 @@ For more examples, check out the [examples/](examples/) directory: - [LangChain Integration](examples/langchain_integration.py) - [CrewAI Integration](examples/crewai_integration.py) - [Utility Tools](examples/utility_tools_example.py) +- [Semantic Search](examples/semantic_search_example.py) ## Development diff --git a/examples/crewai_integration.py b/examples/crewai_integration.py index 6cc1604..a3d6e0a 100644 --- a/examples/crewai_integration.py +++ b/examples/crewai_integration.py @@ -1,6 +1,9 @@ """ This example demonstrates how to use StackOne tools with CrewAI. +Note: This example is Python only. CrewAI does not have an official +TypeScript/Node.js library. + CrewAI uses LangChain tools natively. ```bash diff --git a/examples/crewai_semantic_search.py b/examples/crewai_semantic_search.py new file mode 100644 index 0000000..5ea4d4d --- /dev/null +++ b/examples/crewai_semantic_search.py @@ -0,0 +1,145 @@ +""" +CrewAI meeting booking agent powered by semantic search. + +Note: This example is Python only. CrewAI does not have an official +TypeScript/Node.js library. + +Instead of hardcoding tool names, this example uses semantic search to discover +scheduling tools (e.g., Calendly) from natural language queries like "book a +meeting" or "check availability". + +Prerequisites: +- STACKONE_API_KEY environment variable set +- STACKONE_ACCOUNT_ID environment variable set (Calendly-linked account) +- OPENAI_API_KEY environment variable set (for CrewAI's LLM) + +```bash +uv run examples/crewai_semantic_search.py +``` +""" + +import os +from typing import Any + +from crewai import Agent, Crew, Task +from crewai.tools.base_tool import BaseTool as CrewAIBaseTool +from dotenv import load_dotenv +from pydantic import BaseModel, Field + +from stackone_ai import StackOneToolSet +from stackone_ai.models import StackOneTool + +load_dotenv() + +_account_ids = [aid.strip() for aid in os.getenv("STACKONE_ACCOUNT_ID", "").split(",") if aid.strip()] + + +def _to_crewai_tool(tool: StackOneTool) -> CrewAIBaseTool: + """Wrap a StackOneTool as a CrewAI BaseTool. + + CrewAI has its own BaseTool (not LangChain's), so we create a + lightweight wrapper that delegates execution to the StackOne tool. + """ + schema_props: dict[str, Any] = {} + annotations: dict[str, Any] = {} + + for name, details in tool.parameters.properties.items(): + python_type: type = str + if isinstance(details, dict): + type_str = details.get("type", "string") + if type_str == "number": + python_type = float + elif type_str == "integer": + python_type = int + elif type_str == "boolean": + python_type = bool + field = Field(description=details.get("description", "")) + else: + field = Field(description="") + + schema_props[name] = field + annotations[name] = python_type + + _schema = type( + f"{tool.name.title().replace('_', '')}Args", + (BaseModel,), + {"__annotations__": annotations, "__module__": __name__, **schema_props}, + ) + + _parent = tool + _name = tool.name + _description = tool.description + + class WrappedTool(CrewAIBaseTool): + name: str = _name + description: str = _description + args_schema: type[BaseModel] = _schema + + def _run(self, **kwargs: Any) -> Any: + return _parent.execute(kwargs) + + return WrappedTool() + + +def crewai_semantic_search() -> None: + toolset = StackOneToolSet() + + # Step 1: Preview — lightweight search returning action names and scores + # search_action_names() queries the semantic API without fetching full + # tool definitions. Useful for inspecting what's available before committing. + preview = toolset.search_action_names( + "book a meeting or check availability", + account_ids=_account_ids, + ) + print("Semantic search preview (action names only):") + for r in preview: + print(f" [{r.similarity_score:.2f}] {r.action_name} ({r.connector_key})") + print() + + # Step 2: Full discovery — fetch matching tools ready for framework use + # search_tools() fetches tools from linked accounts, runs semantic search, + # and returns only tools the user has access to. + tools = toolset.search_tools( + "schedule meetings, check availability, list events", + connector="calendly", + account_ids=_account_ids, + ) + assert len(tools) > 0, "Expected at least one scheduling tool" + + print(f"Discovered {len(tools)} scheduling tools:") + for tool in tools: + print(f" - {tool.name}: {tool.description[:80]}...") + print() + + # Step 3: Convert to CrewAI format + crewai_tools = [_to_crewai_tool(t) for t in tools] + + # Step 4: Create a CrewAI meeting booking agent + agent = Agent( + role="Meeting Booking Agent", + goal="Help users manage their calendar by discovering and booking meetings, " + "checking availability, and listing upcoming events.", + backstory="You are an AI assistant specialized in calendar management. " + "You have access to scheduling tools discovered via semantic search " + "and can help users with all meeting-related tasks.", + llm="gpt-4o-mini", + tools=crewai_tools, + max_iter=2, + verbose=True, + ) + + task = Task( + description="List upcoming scheduled events to give an overview of the calendar.", + agent=agent, + expected_output="A summary of upcoming events or a confirmation that events were retrieved.", + ) + + crew = Crew(agents=[agent], tasks=[task]) + + result = crew.kickoff() + assert result is not None, "Expected result to be returned" + print(f"\nCrew result: {result}") + + +if __name__ == "__main__": + crewai_semantic_search() diff --git a/examples/semantic_search_example.py b/examples/semantic_search_example.py new file mode 100644 index 0000000..927fd59 --- /dev/null +++ b/examples/semantic_search_example.py @@ -0,0 +1,390 @@ +#!/usr/bin/env python +""" +Example demonstrating semantic search for AI-powered tool discovery. + +Semantic search understands natural language intent and synonyms, so queries like +"book a meeting" or "cancel an event" resolve to the right StackOne actions — +unlike keyword matching which requires exact tool names. + +This example uses a Calendly-linked account to demonstrate how semantic search +discovers scheduling, event, and organization management tools from natural +language queries. + + +How Semantic Search Works (Overview) +===================================== + +The SDK provides three paths for semantic tool discovery, each with a different +trade-off between speed, filtering, and completeness: + +1. search_tools(query) — Full discovery (recommended for agent frameworks) + + This is the method you should use when integrating with OpenAI, LangChain, + CrewAI, or any other agent framework. It works in these steps: + + a) Fetch ALL tools from the user's linked accounts via MCP + b) Extract the set of available connectors (e.g. {bamboohr, calendly}) + c) Query the semantic search API with the natural language query + d) Filter results to only connectors the user has access to + e) Deduplicate across API versions (keep highest score per action) + f) Match results back to the fetched tool definitions + g) Return a Tools collection sorted by relevance score + + Key point: tools are fetched first, semantic search runs second, and only + the intersection (tools the user has AND that match the query) is returned. + If the semantic API is unavailable, the SDK falls back to local BM25+TF-IDF + search automatically. + +2. search_action_names(query) — Lightweight preview + + Queries the semantic API directly and returns metadata (name, connector, + score, description) without fetching full tool definitions. Useful for + inspecting results before committing to a full fetch. When account_ids are + provided, results are filtered to the user's available connectors. + +3. utility_tools(semantic_client=...) — Agent-loop pattern + + Creates tool_search and tool_execute utility tools that agents can call + inside an agentic loop. The agent searches, inspects, and executes tools + dynamically. Note: utility tool search queries the full backend catalog + (all connectors), not just the user's linked accounts. + + +This example is runnable with the following command: +```bash +uv run examples/semantic_search_example.py +``` + +Prerequisites: +- STACKONE_API_KEY environment variable set +- STACKONE_ACCOUNT_ID environment variable set (required for examples that fetch tools) +- At least one linked account in StackOne (this example uses Calendly) + +Note: search_action_names() works with just STACKONE_API_KEY — no account ID needed. +""" + +import logging +import os + +from dotenv import load_dotenv + +from stackone_ai import StackOneToolSet + +load_dotenv() + +# Show SDK warnings (e.g., semantic search fallback to local search) +logging.basicConfig(level=logging.WARNING) + +# Read account IDs from environment — supports comma-separated values +_account_ids = [aid.strip() for aid in os.getenv("STACKONE_ACCOUNT_ID", "").split(",") if aid.strip()] + + +def example_search_action_names(): + """Lightweight search returning action names and scores without fetching tools. + + search_action_names() queries the semantic search API directly — it does NOT + need account IDs or MCP. This makes it the simplest way to try semantic search. + + When called without account_ids, results come from the full StackOne catalog + (all connectors). When called with account_ids, results are filtered to only + connectors available in your linked accounts. + """ + print("=" * 60) + print("Example 1: search_action_names() — lightweight discovery") + print("=" * 60) + print() + print("This searches the StackOne action catalog using semantic vectors.") + print("No account ID needed — results come from all available connectors.") + print() + + toolset = StackOneToolSet() + + query = "get user schedule" + print(f'Searching for: "{query}"') + print() + + results = toolset.search_action_names(query, top_k=5) + + print(f"Top {len(results)} matches from the full catalog:") + for r in results: + print(f" [{r.similarity_score:.2f}] {r.action_name} ({r.connector_key})") + print(f" {r.description}") + print() + + # Show filtering effect when account_ids are available + if _account_ids: + print(f"Now filtering to your linked accounts ({', '.join(_account_ids)})...") + filtered = toolset.search_action_names(query, account_ids=_account_ids, top_k=5) + print(f"Filtered to {len(filtered)} matches (only your connectors):") + for r in filtered: + print(f" [{r.similarity_score:.2f}] {r.action_name} ({r.connector_key})") + else: + print("Tip: Set STACKONE_ACCOUNT_ID to see results filtered to your linked connectors.") + + print() + + +def example_search_tools(): + """High-level semantic search returning a Tools collection. + + search_tools() is the recommended way to use semantic search. It: + 1. Queries the semantic search API with your natural language query + 2. Fetches tool definitions from your linked accounts via MCP + 3. Matches semantic results to available tools (filtering out connectors you don't have) + 4. Returns a Tools collection ready for any framework (.to_openai(), .to_langchain(), etc.) + """ + print("=" * 60) + print("Example 2: search_tools() — full tool discovery") + print("=" * 60) + print() + + toolset = StackOneToolSet() + + query = "cancel an event" + print(f'Step 1: Searching for "{query}" via semantic search...') + print() + + tools = toolset.search_tools(query, account_ids=_account_ids, top_k=5) + + connectors = {t.name.split("_")[0] for t in tools} + print(f"Found {len(tools)} tools from your linked account(s) ({', '.join(sorted(connectors))}):") + for tool in tools: + print(f" - {tool.name}") + print(f" {tool.description}") + print() + + # Show OpenAI conversion + print("Step 2: Converting to OpenAI function-calling format...") + openai_tools = tools.to_openai() + print(f"Created {len(openai_tools)} OpenAI function definitions:") + for fn in openai_tools: + func = fn["function"] + param_names = list(func["parameters"].get("properties", {}).keys()) + print(f" - {func['name']}({', '.join(param_names[:3])}{'...' if len(param_names) > 3 else ''})") + print() + + +def example_search_tools_with_connector(): + """Semantic search filtered by connector. + + Use the connector parameter to scope results to a specific provider, + for example when you know the user works with Calendly. + """ + print("=" * 60) + print("Example 3: search_tools() with connector filter") + print("=" * 60) + print() + + toolset = StackOneToolSet() + + query = "book a meeting" + connector = "calendly" + print(f'Searching for "{query}" filtered to connector="{connector}"...') + print() + + tools = toolset.search_tools( + query, + connector=connector, + account_ids=_account_ids, + top_k=3, + ) + + print(f"Found {len(tools)} {connector} tools:") + for tool in tools: + print(f" - {tool.name}") + print(f" {tool.description}") + print() + + +def example_utility_tools_semantic(): + """Using utility tools with semantic search for agent loops. + + When building agent loops (search -> select -> execute), pass + semantic_client to utility_tools() to upgrade tool_search from + local BM25+TF-IDF to cloud-based semantic search. + + Note: tool_search queries the full backend catalog (all connectors), + not just the ones in your linked accounts. + """ + print("=" * 60) + print("Example 4: Utility tools with semantic search") + print("=" * 60) + print() + + toolset = StackOneToolSet() + + print("Step 1: Fetching tools from your linked accounts via MCP...") + tools = toolset.fetch_tools(account_ids=_account_ids) + print(f"Loaded {len(tools)} tools.") + print() + + print("Step 2: Creating utility tools with semantic search enabled...") + print(" Passing semantic_client upgrades tool_search from local keyword") + print(" matching (BM25+TF-IDF) to cloud-based semantic vector search.") + utility = tools.utility_tools(semantic_client=toolset.semantic_client) + + search_tool = utility.get_tool("tool_search") + if search_tool: + query = "cancel an event or meeting" + print() + print(f'Step 3: Calling tool_search with query="{query}"...') + print(" (This searches the full StackOne catalog, not just your linked tools)") + print() + result = search_tool.call(query=query, limit=5) + tools_data = result.get("tools", []) + print(f"tool_search returned {len(tools_data)} results:") + for tool_info in tools_data: + print(f" [{tool_info['score']:.2f}] {tool_info['name']}") + print(f" {tool_info['description']}") + + print() + + +def example_openai_agent_loop(): + """Complete agent loop: semantic search -> OpenAI -> execute. + + This demonstrates the full pattern for building an AI agent that + discovers tools via semantic search and executes them via OpenAI. + """ + print("=" * 60) + print("Example 5: OpenAI agent loop with semantic search") + print("=" * 60) + print() + + try: + from openai import OpenAI + except ImportError: + print("Skipped: OpenAI library not installed. Install with: pip install openai") + print() + return + + if not os.getenv("OPENAI_API_KEY"): + print("Skipped: Set OPENAI_API_KEY to run this example.") + print() + return + + client = OpenAI() + toolset = StackOneToolSet() + + query = "list upcoming events" + print(f'Step 1: Discovering tools for "{query}" via semantic search...') + tools = toolset.search_tools(query, account_ids=_account_ids, top_k=3) + print(f"Found {len(tools)} tools:") + for tool in tools: + print(f" - {tool.name}") + print() + + print("Step 2: Sending tools to OpenAI as function definitions...") + openai_tools = tools.to_openai() + + messages = [ + {"role": "system", "content": "You are a helpful scheduling assistant."}, + {"role": "user", "content": "Can you show me my upcoming events?"}, + ] + + response = client.chat.completions.create( + model="gpt-4o-mini", + messages=messages, + tools=openai_tools, + tool_choice="auto", + ) + + if response.choices[0].message.tool_calls: + print("Step 3: OpenAI chose to call these tools:") + for tool_call in response.choices[0].message.tool_calls: + print(f" - {tool_call.function.name}({tool_call.function.arguments})") + + tool = tools.get_tool(tool_call.function.name) + if tool: + result = tool.execute(tool_call.function.arguments) + print( + f" Response keys: {list(result.keys()) if isinstance(result, dict) else type(result)}" + ) + else: + print(f"OpenAI responded with text: {response.choices[0].message.content}") + + print() + + +def example_langchain_semantic(): + """Semantic search with LangChain tools. + + search_tools() returns a Tools collection that converts directly + to LangChain format — no extra steps needed. + """ + print("=" * 60) + print("Example 6: Semantic search with LangChain") + print("=" * 60) + print() + + try: + from langchain_core.tools import BaseTool # noqa: F401 + except ImportError: + print("Skipped: LangChain not installed. Install with: pip install langchain-core") + print() + return + + toolset = StackOneToolSet() + + query = "remove a user from the team" + print(f'Step 1: Searching for "{query}" via semantic search...') + tools = toolset.search_tools(query, account_ids=_account_ids, top_k=5) + print(f"Found {len(tools)} tools.") + print() + + print("Step 2: Converting to LangChain tools...") + langchain_tools = tools.to_langchain() + + print(f"Created {len(langchain_tools)} LangChain tools (ready for use with agents):") + for tool in langchain_tools: + print(f" - {tool.name} (type: {type(tool).__name__})") + print(f" {tool.description}") + + print() + + +def main(): + """Run all semantic search examples.""" + print() + print("############################################################") + print("# StackOne AI SDK — Semantic Search Examples #") + print("############################################################") + print() + + if not os.getenv("STACKONE_API_KEY"): + print("Set STACKONE_API_KEY to run these examples.") + return + + # --- Examples that work without account IDs --- + example_search_action_names() + + # --- Examples that require account IDs (MCP needs x-account-id) --- + if not _account_ids: + print("=" * 60) + print("Remaining examples require STACKONE_ACCOUNT_ID") + print("=" * 60) + print() + print("Set STACKONE_ACCOUNT_ID (comma-separated for multiple) to run") + print("examples that fetch full tool definitions from your linked accounts:") + print(" - search_tools() with natural language queries") + print(" - search_tools() with connector filter") + print(" - Utility tools with semantic search") + print(" - OpenAI agent loop") + print(" - LangChain integration") + return + + example_search_tools() + example_search_tools_with_connector() + example_utility_tools_semantic() + + # Framework integration patterns + example_openai_agent_loop() + example_langchain_semantic() + + print("############################################################") + print("# All examples completed! #") + print("############################################################") + + +if __name__ == "__main__": + main() diff --git a/examples/test_examples.py b/examples/test_examples.py index 45d631e..36fc7ba 100644 --- a/examples/test_examples.py +++ b/examples/test_examples.py @@ -31,6 +31,7 @@ def get_example_files() -> list[str]: "file_uploads.py": ["mcp"], "stackone_account_ids.py": ["mcp"], "utility_tools_example.py": ["mcp"], + "semantic_search_example.py": ["mcp"], "mcp_server.py": ["mcp"], } diff --git a/examples/utility_tools_example.py b/examples/utility_tools_example.py index 3291f7e..7a21bbe 100644 --- a/examples/utility_tools_example.py +++ b/examples/utility_tools_example.py @@ -80,9 +80,39 @@ def example_utility_tools_with_execution(): print() +def example_utility_tools_semantic(): + """Semantic search variant of utility tools. + + By passing semantic_client to utility_tools(), tool_search switches from + local BM25+TF-IDF to cloud-based semantic search for better natural language + understanding. See examples/semantic_search_example.py for more patterns. + """ + print("Example 3: Utility tools with semantic search\n") + + toolset = StackOneToolSet() + + # Fetch tools — these define the available tool catalog + all_tools = toolset.fetch_tools(actions=["bamboohr_*"]) + print(f"Total BambooHR tools available: {len(all_tools)}") + + # Pass semantic_client to switch from local BM25 to cloud semantic search + utility_tools = all_tools.utility_tools(semantic_client=toolset.semantic_client) + + filter_tool = utility_tools.get_tool("tool_search") + if filter_tool: + # Semantic search understands intent — "onboard new hire" finds onboarding tools + result = filter_tool.call(query="onboard a new team member", limit=5, minScore=0.0) + + print("Found relevant tools (semantic search):") + for tool in result.get("tools", []): + print(f" - {tool['name']} (score: {tool['score']:.2f}): {tool['description']}") + + print() + + def example_with_openai(): """Example of using utility tools with OpenAI""" - print("Example 3: Using utility tools with OpenAI\n") + print("Example 4: Using utility tools with OpenAI\n") try: from openai import OpenAI @@ -131,7 +161,7 @@ def example_with_openai(): def example_with_langchain(): """Example of using tools with LangChain""" - print("Example 4: Using tools with LangChain\n") + print("Example 5: Using tools with LangChain\n") try: from langchain.agents import AgentExecutor, create_tool_calling_agent @@ -197,6 +227,7 @@ def main(): # Basic examples that work without external APIs example_utility_tools_basic() example_utility_tools_with_execution() + example_utility_tools_semantic() # Examples that require OpenAI API if os.getenv("OPENAI_API_KEY"): diff --git a/stackone_ai/__init__.py b/stackone_ai/__init__.py index f7a0aba..434e318 100644 --- a/stackone_ai/__init__.py +++ b/stackone_ai/__init__.py @@ -1,11 +1,22 @@ """StackOne AI SDK""" -from .models import StackOneTool, Tools -from .toolset import StackOneToolSet +from stackone_ai.models import StackOneTool, Tools +from stackone_ai.semantic_search import ( + SemanticSearchClient, + SemanticSearchError, + SemanticSearchResponse, + SemanticSearchResult, +) +from stackone_ai.toolset import StackOneToolSet __all__ = [ "StackOneToolSet", "StackOneTool", "Tools", + # Semantic search + "SemanticSearchClient", + "SemanticSearchResult", + "SemanticSearchResponse", + "SemanticSearchError", ] __version__ = "2.3.1" diff --git a/stackone_ai/models.py b/stackone_ai/models.py index fcd32d7..a3f50e4 100644 --- a/stackone_ai/models.py +++ b/stackone_ai/models.py @@ -6,7 +6,10 @@ from collections.abc import Sequence from datetime import datetime, timezone from enum import Enum -from typing import Annotated, Any, ClassVar, TypeAlias, cast +from typing import TYPE_CHECKING, Annotated, Any, ClassVar, TypeAlias, cast + +if TYPE_CHECKING: + from stackone_ai.semantic_search import SemanticSearchClient from urllib.parse import quote import httpx @@ -98,6 +101,18 @@ class StackOneTool(BaseModel): "feedback_metadata", } + @property + def connector(self) -> str: + """Extract connector from tool name. + + Tool names follow the format: {connector}_{action}_{entity} + e.g., 'bamboohr_create_employee' -> 'bamboohr' + + Returns: + Connector name in lowercase + """ + return self.name.split("_")[0].lower() + def __init__( self, description: str, @@ -514,6 +529,19 @@ def get_account_id(self) -> str | None: return account_id return None + def get_connectors(self) -> set[str]: + """Get unique connector names from all tools. + + Returns: + Set of connector names (lowercase) + + Example: + tools = toolset.fetch_tools() + connectors = tools.get_connectors() + # {'bamboohr', 'hibob', 'slack', ...} + """ + return {tool.connector for tool in self.tools} + def to_openai(self) -> list[JsonDict]: """Convert all tools to OpenAI function format @@ -530,34 +558,56 @@ def to_langchain(self) -> Sequence[BaseTool]: """ return [tool.to_langchain() for tool in self.tools] - def utility_tools(self, hybrid_alpha: float | None = None) -> Tools: + def utility_tools( + self, + hybrid_alpha: float | None = None, + semantic_client: SemanticSearchClient | None = None, + ) -> Tools: """Return utility tools for tool discovery and execution - Utility tools enable dynamic tool discovery and execution based on natural language queries - using hybrid BM25 + TF-IDF search. + Utility tools enable dynamic tool discovery and execution based on natural language queries. + By default, uses local hybrid BM25 + TF-IDF search. When a semantic_client is provided, + uses cloud-based semantic search for higher accuracy on natural language queries. Args: - hybrid_alpha: Weight for BM25 in hybrid search (0-1). If not provided, uses - ToolIndex.DEFAULT_HYBRID_ALPHA (0.2), which gives more weight to BM25 scoring - and has been shown to provide better tool discovery accuracy - (10.8% improvement in validation testing). + hybrid_alpha: Weight for BM25 in hybrid search (0-1). Only used when + semantic_client is not provided. If not provided, uses DEFAULT_HYBRID_ALPHA (0.2), + which gives more weight to BM25 scoring. + semantic_client: SemanticSearchClient instance for cloud-based semantic search. + When provided, semantic search is used instead of local BM25+TF-IDF. + Can be obtained from StackOneToolSet.semantic_client. Returns: Tools collection containing tool_search and tool_execute Note: This feature is in beta and may change in future versions + + Example: + # Local search (default) + utility = tools.utility_tools() + + # Semantic search (requires toolset) + from stackone_ai import StackOneToolSet + toolset = StackOneToolSet() + tools = toolset.fetch_tools() + utility = tools.utility_tools( + semantic_client=toolset.semantic_client, + ) """ - from stackone_ai.utility_tools import ( - ToolIndex, - create_tool_execute, - create_tool_search, - ) + from stackone_ai.utility_tools import create_tool_execute - # Create search index with hybrid search - index = ToolIndex(self.tools, hybrid_alpha=hybrid_alpha) + if semantic_client is not None: + from stackone_ai.utility_tools import create_semantic_tool_search + + search_tool = create_semantic_tool_search(semantic_client) + execute_tool = create_tool_execute(self) + return Tools([search_tool, execute_tool]) - # Create utility tools + # Default: local BM25+TF-IDF search + from stackone_ai.utility_tools import ToolIndex, create_tool_search + + index = ToolIndex(self.tools, hybrid_alpha=hybrid_alpha) filter_tool = create_tool_search(index) execute_tool = create_tool_execute(self) diff --git a/stackone_ai/semantic_search.py b/stackone_ai/semantic_search.py new file mode 100644 index 0000000..7821663 --- /dev/null +++ b/stackone_ai/semantic_search.py @@ -0,0 +1,236 @@ +"""Semantic search client for StackOne action search API. + +How Semantic Search Works +========================= + +The SDK provides three ways to discover tools using semantic search. +Each path trades off between speed, filtering, and completeness. + +1. ``search_tools(query)`` — Full tool discovery (recommended for agent frameworks) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is the primary method used when integrating with OpenAI, LangChain, or CrewAI. +The internal flow is: + +:: + + User query (e.g. "create an employee") + │ + ▼ + ┌─────────────────────────────────────────────────────┐ + │ Step 1: Fetch ALL tools from linked accounts via MCP │ + │ (uses account_ids to scope the request) │ + └────────────────────────┬────────────────────────────┘ + │ + ▼ + ┌─────────────────────────────────────────────────────┐ + │ Step 2: Extract available connectors from the │ + │ fetched tools (e.g. {bamboohr, hibob}) │ + └────────────────────────┬────────────────────────────┘ + │ + ▼ + ┌─────────────────────────────────────────────────────┐ + │ Step 3: Query the semantic search API (/actions/ │ + │ search) with the natural language query │ + └────────────────────────┬────────────────────────────┘ + │ + ▼ + ┌─────────────────────────────────────────────────────┐ + │ Step 4: Filter results — keep only connectors the │ + │ user has access to + apply min_score cutoff │ + │ │ + │ If not enough results, make per-connector │ + │ fallback queries for missing connectors │ + └────────────────────────┬────────────────────────────┘ + │ + ▼ + ┌─────────────────────────────────────────────────────┐ + │ Step 5: Deduplicate by normalized action name │ + │ (strips API version suffixes, keeps highest │ + │ scoring version of each action) │ + └────────────────────────┬────────────────────────────┘ + │ + ▼ + ┌─────────────────────────────────────────────────────┐ + │ Step 6: Match semantic results back to the fetched │ + │ tool definitions from Step 1 │ + │ Return Tools sorted by relevance score │ + └─────────────────────────────────────────────────────┘ + +Key point: tools are fetched first, semantic search runs second, and only +tools that exist in the user's linked accounts AND match the semantic query +are returned. This prevents suggesting tools the user cannot execute. + +If the semantic API is unavailable, the SDK falls back to a local +BM25 + TF-IDF hybrid search over the fetched tools (unless +``fallback_to_local=False``). + + +2. ``search_action_names(query)`` — Lightweight discovery +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Queries the semantic API directly and returns action name metadata +(name, connector, score, description) **without** fetching full tool +definitions. This is useful for previewing results before committing +to a full fetch. + +When ``account_ids`` are provided, tools are fetched only to determine +available connectors — results are then filtered to those connectors. +Without ``account_ids``, results come from the full StackOne catalog. + + +3. ``utility_tools(semantic_client=...)`` — Agent-loop search + execute +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Creates a ``tool_search`` utility tool that agents can call inside a +loop. The agent searches for tools, inspects results, then calls +``tool_execute`` to run the chosen tool. When ``semantic_client`` is +passed, ``tool_search`` uses cloud-based semantic vectors instead of +local BM25 + TF-IDF. + +Note: utility tool search queries the **full backend catalog** (all +connectors), not just the ones in the user's linked accounts. +""" + +from __future__ import annotations + +import base64 +from typing import Any + +import httpx +from pydantic import BaseModel + + +class SemanticSearchError(Exception): + """Raised when semantic search fails.""" + + pass + + +class SemanticSearchResult(BaseModel): + """Single result from semantic search API.""" + + action_name: str + connector_key: str + similarity_score: float + label: str + description: str + + +class SemanticSearchResponse(BaseModel): + """Response from /actions/search endpoint.""" + + results: list[SemanticSearchResult] + total_count: int + query: str + + +class SemanticSearchClient: + """Client for StackOne semantic search API. + + This client provides access to the semantic search endpoint which uses + enhanced embeddings for higher accuracy than local BM25+TF-IDF search. + + Example: + client = SemanticSearchClient(api_key="sk-xxx") + response = client.search("create employee", connector="bamboohr", top_k=5) + for result in response.results: + print(f"{result.action_name}: {result.similarity_score:.2f}") + """ + + def __init__( + self, + api_key: str, + base_url: str = "https://api.stackone.com", + timeout: float = 30.0, + ) -> None: + """Initialize the semantic search client. + + Args: + api_key: StackOne API key + base_url: Base URL for API requests + timeout: Request timeout in seconds + """ + self.api_key = api_key + self.base_url = base_url.rstrip("/") + self.timeout = timeout + + def _build_auth_header(self) -> str: + """Build the Basic auth header.""" + token = base64.b64encode(f"{self.api_key}:".encode()).decode() + return f"Basic {token}" + + def search( + self, + query: str, + connector: str | None = None, + top_k: int | None = None, + ) -> SemanticSearchResponse: + """Search for relevant actions using semantic search. + + Args: + query: Natural language query describing what tools/actions you need + connector: Optional connector/provider filter (e.g., "bamboohr", "slack") + top_k: Maximum number of results to return. If not provided, uses the backend default. + + Returns: + SemanticSearchResponse containing matching actions with similarity scores + + Raises: + SemanticSearchError: If the API call fails + + Example: + response = client.search("onboard a new team member", top_k=5) + for result in response.results: + print(f"{result.action_name}: {result.similarity_score:.2f}") + """ + url = f"{self.base_url}/actions/search" + headers = { + "Authorization": self._build_auth_header(), + "Content-Type": "application/json", + } + payload: dict[str, Any] = {"query": query} + if top_k is not None: + payload["top_k"] = top_k + if connector: + payload["connector"] = connector + + try: + response = httpx.post(url, json=payload, headers=headers, timeout=self.timeout) + response.raise_for_status() + data = response.json() + return SemanticSearchResponse(**data) + except httpx.HTTPStatusError as e: + raise SemanticSearchError(f"API error: {e.response.status_code} - {e.response.text}") from e + except httpx.RequestError as e: + raise SemanticSearchError(f"Request failed: {e}") from e + except Exception as e: + raise SemanticSearchError(f"Search failed: {e}") from e + + def search_action_names( + self, + query: str, + connector: str | None = None, + top_k: int | None = None, + min_score: float = 0.0, + ) -> list[str]: + """Convenience method returning just action names. + + Args: + query: Natural language query + connector: Optional connector/provider filter + top_k: Maximum number of results. If not provided, uses the backend default. + min_score: Minimum similarity score threshold (0-1) + + Returns: + List of action names sorted by relevance + + Example: + action_names = client.search_action_names( + "create employee", + connector="bamboohr", + min_score=0.5 + ) + """ + response = self.search(query, connector, top_k) + return [r.action_name for r in response.results if r.similarity_score >= min_score] diff --git a/stackone_ai/toolset.py b/stackone_ai/toolset.py index 126078a..0e20102 100644 --- a/stackone_ai/toolset.py +++ b/stackone_ai/toolset.py @@ -4,7 +4,9 @@ import base64 import fnmatch import json +import logging import os +import re import threading from collections.abc import Coroutine from dataclasses import dataclass @@ -18,6 +20,13 @@ ToolParameters, Tools, ) +from stackone_ai.semantic_search import ( + SemanticSearchClient, + SemanticSearchError, + SemanticSearchResult, +) + +logger = logging.getLogger("stackone.tools") try: _SDK_VERSION = metadata.version("stackone-ai") @@ -34,6 +43,19 @@ } _USER_AGENT = f"stackone-ai-python/{_SDK_VERSION}" +_VERSIONED_ACTION_RE = re.compile(r"^[a-z][a-z0-9]*_\d+(?:\.\d+)+_(.+)_global$") + + +def _normalize_action_name(action_name: str) -> str: + """Convert semantic search API action name to MCP tool name. + + API: 'calendly_1.0.0_calendly_create_scheduling_link_global' + MCP: 'calendly_create_scheduling_link' + """ + match = _VERSIONED_ACTION_RE.match(action_name) + return match.group(1) if match else action_name + + T = TypeVar("T") @@ -251,6 +273,7 @@ def __init__( self.account_id = account_id self.base_url = base_url or DEFAULT_BASE_URL self._account_ids: list[str] = [] + self._semantic_client: SemanticSearchClient | None = None def set_accounts(self, account_ids: list[str]) -> StackOneToolSet: """Set account IDs for filtering tools @@ -264,6 +287,275 @@ def set_accounts(self, account_ids: list[str]) -> StackOneToolSet: self._account_ids = account_ids return self + @property + def semantic_client(self) -> SemanticSearchClient: + """Lazy initialization of semantic search client. + + Returns: + SemanticSearchClient instance configured with the toolset's API key and base URL + """ + if self._semantic_client is None: + self._semantic_client = SemanticSearchClient( + api_key=self.api_key, + base_url=self.base_url, + ) + return self._semantic_client + + def search_tools( + self, + query: str, + *, + connector: str | None = None, + top_k: int | None = None, + min_score: float = 0.0, + account_ids: list[str] | None = None, + fallback_to_local: bool = True, + ) -> Tools: + """Search for and fetch tools using semantic search. + + This method uses the StackOne semantic search API to find relevant tools + based on natural language queries. It optimizes results by filtering to + only connectors available in linked accounts. + + Args: + query: Natural language description of needed functionality + (e.g., "create employee", "send a message") + connector: Optional provider/connector filter (e.g., "bamboohr", "slack") + top_k: Maximum number of tools to return. If None, uses the backend default. + min_score: Minimum similarity score threshold 0-1 (default: 0.0) + account_ids: Optional account IDs (uses set_accounts() if not provided) + fallback_to_local: If True, fall back to local BM25+TF-IDF search on API failure + + Returns: + Tools collection with semantically matched tools from linked accounts + + Raises: + SemanticSearchError: If the API call fails and fallback_to_local is False + + Examples: + # Basic semantic search + tools = toolset.search_tools("manage employee records", top_k=5) + + # Filter by connector + tools = toolset.search_tools( + "create time off request", + connector="bamboohr", + min_score=0.5 + ) + + # With account filtering + tools = toolset.search_tools( + "send message", + account_ids=["acc-123"], + top_k=3 + ) + """ + try: + # Step 1: Fetch all tools to get available connectors from linked accounts + all_tools = self.fetch_tools(account_ids=account_ids) + available_connectors = all_tools.get_connectors() + + if not available_connectors: + return Tools([]) + + # Step 2: Fetch results from semantic API, then filter client-side + response = self.semantic_client.search( + query=query, + connector=connector, + ) + + # Step 3: Filter results to only available connectors and min_score + filtered_results = [ + r + for r in response.results + if r.connector_key.lower() in available_connectors and r.similarity_score >= min_score + ] + + # Step 3b: If not enough results, make per-connector calls for missing connectors + if not connector and (top_k is None or len(filtered_results) < top_k): + found_connectors = {r.connector_key.lower() for r in filtered_results} + missing_connectors = available_connectors - found_connectors + for missing in missing_connectors: + if top_k is not None and len(filtered_results) >= top_k: + break + try: + extra = self.semantic_client.search(query=query, connector=missing, top_k=top_k) + for r in extra.results: + if r.similarity_score >= min_score and r.action_name not in { + fr.action_name for fr in filtered_results + }: + filtered_results.append(r) + if top_k is not None and len(filtered_results) >= top_k: + break + except SemanticSearchError: + continue + + # Re-sort by score after merging results from multiple calls + filtered_results.sort(key=lambda r: r.similarity_score, reverse=True) + + # Deduplicate by normalized MCP name (keep highest score first, already sorted) + seen_names: set[str] = set() + deduped: list[SemanticSearchResult] = [] + for r in filtered_results: + norm = _normalize_action_name(r.action_name) + if norm not in seen_names: + seen_names.add(norm) + deduped.append(r) + filtered_results = deduped[:top_k] if top_k is not None else deduped + + if not filtered_results: + return Tools([]) + + # Step 4: Get matching tools from already-fetched tools + action_names = {_normalize_action_name(r.action_name) for r in filtered_results} + matched_tools = [t for t in all_tools if t.name in action_names] + + # Sort matched tools by semantic search score order + action_order = {_normalize_action_name(r.action_name): i for i, r in enumerate(filtered_results)} + matched_tools.sort(key=lambda t: action_order.get(t.name, float("inf"))) + + return Tools(matched_tools) + + except SemanticSearchError as e: + if not fallback_to_local: + raise + + logger.warning("Semantic search failed (%s), falling back to local BM25+TF-IDF search", e) + utility = all_tools.utility_tools() + search_tool = utility.get_tool("tool_search") + + if search_tool: + fallback_limit = top_k * 3 if top_k is not None else 100 + result = search_tool.execute( + { + "query": query, + "limit": fallback_limit, + "minScore": min_score, + } + ) + matched_names = [t["name"] for t in result.get("tools", [])] + # Filter by available connectors and preserve relevance order + tool_map = {t.name: t for t in all_tools} + filter_connectors = {connector.lower()} if connector else available_connectors + matched_tools = [ + tool_map[name] + for name in matched_names + if name in tool_map and name.split("_")[0].lower() in filter_connectors + ] + return Tools(matched_tools[:top_k] if top_k is not None else matched_tools) + + return all_tools + + def search_action_names( + self, + query: str, + *, + connector: str | None = None, + account_ids: list[str] | None = None, + top_k: int | None = None, + min_score: float = 0.0, + ) -> list[SemanticSearchResult]: + """Search for action names without fetching tools. + + Useful when you need to inspect search results before fetching, + or when building custom filtering logic. + + Args: + query: Natural language description of needed functionality + connector: Optional provider/connector filter (single connector) + account_ids: Optional account IDs to scope results to connectors + available in those accounts (uses set_accounts() if not provided). + When provided, results are filtered to only matching connectors. + top_k: Maximum number of results. If None, uses the backend default. + min_score: Minimum similarity score threshold 0-1 (default: 0.0) + + Returns: + List of SemanticSearchResult with action names, scores, and metadata + + Examples: + # Lightweight: inspect results before fetching + results = toolset.search_action_names("manage employees") + for r in results: + print(f"{r.action_name}: {r.similarity_score:.2f}") + + # Account-scoped: only results for connectors in linked accounts + results = toolset.search_action_names( + "create employee", + account_ids=["acc-123"], + top_k=5 + ) + + # Then fetch specific high-scoring actions + selected = [r.action_name for r in results if r.similarity_score > 0.7] + tools = toolset.fetch_tools(actions=selected) + """ + # Resolve available connectors from account_ids (same pattern as search_tools) + available_connectors: set[str] | None = None + effective_account_ids = account_ids or self._account_ids + if effective_account_ids: + all_tools = self.fetch_tools(account_ids=effective_account_ids) + available_connectors = all_tools.get_connectors() + if not available_connectors: + return [] + + try: + response = self.semantic_client.search( + query=query, + connector=connector, + top_k=top_k, + ) + except SemanticSearchError as e: + logger.warning("Semantic search failed: %s", e) + return [] + + # Filter by min_score + results = [r for r in response.results if r.similarity_score >= min_score] + + # Filter by available connectors if resolved from accounts + if available_connectors: + connector_set = {c.lower() for c in available_connectors} + results = [r for r in results if r.connector_key.lower() in connector_set] + + # If not enough results, make per-connector calls for missing connectors + if not connector and (top_k is None or len(results) < top_k): + found_connectors = {r.connector_key.lower() for r in results} + missing_connectors = connector_set - found_connectors + for missing in missing_connectors: + if top_k is not None and len(results) >= top_k: + break + try: + extra = self.semantic_client.search(query=query, connector=missing, top_k=top_k) + for r in extra.results: + if r.similarity_score >= min_score and r.action_name not in { + er.action_name for er in results + }: + results.append(r) + if top_k is not None and len(results) >= top_k: + break + except SemanticSearchError: + continue + + # Re-sort by score after merging + results.sort(key=lambda r: r.similarity_score, reverse=True) + + # Normalize and deduplicate by MCP name (keep highest score first) + seen: set[str] = set() + normalized: list[SemanticSearchResult] = [] + for r in results: + norm_name = _normalize_action_name(r.action_name) + if norm_name not in seen: + seen.add(norm_name) + normalized.append( + SemanticSearchResult( + action_name=norm_name, + connector_key=r.connector_key, + similarity_score=r.similarity_score, + label=r.label, + description=r.description, + ) + ) + return normalized[:top_k] if top_k is not None else normalized + def _filter_by_provider(self, tool_name: str, providers: list[str]) -> bool: """Check if a tool name matches any of the provider filters diff --git a/stackone_ai/utility_tools.py b/stackone_ai/utility_tools.py index 0d9a209..6423367 100644 --- a/stackone_ai/utility_tools.py +++ b/stackone_ai/utility_tools.py @@ -15,6 +15,7 @@ if TYPE_CHECKING: from stackone_ai.models import Tools + from stackone_ai.semantic_search import SemanticSearchClient class ToolSearchResult(BaseModel): @@ -201,11 +202,13 @@ def create_tool_search(index: ToolIndex) -> StackOneTool: "type": "number", "description": "Maximum number of tools to return (default: 5)", "default": 5, + "nullable": True, }, "minScore": { "type": "number", "description": "Minimum relevance score (0-1) to filter results (default: 0.0)", "default": 0.0, + "nullable": True, }, }, ) @@ -219,8 +222,8 @@ def execute_filter(arguments: str | JsonDict | None = None) -> JsonDict: kwargs = arguments or {} query = kwargs.get("query", "") - limit = int(kwargs.get("limit", 5)) - min_score = float(kwargs.get("minScore", 0.0)) + limit = int(kwargs["limit"]) if kwargs.get("limit") is not None else 5 + min_score = float(kwargs["minScore"]) if kwargs.get("minScore") is not None else 0.0 # Search for tools results = index.search(query, limit, min_score) @@ -266,6 +269,124 @@ def execute( return ToolSearchTool() +def create_semantic_tool_search(semantic_client: SemanticSearchClient) -> StackOneTool: + """Create a semantic search variant of tool_search. + + Uses cloud semantic search API instead of local BM25+TF-IDF for + improved natural language tool discovery. + + Args: + semantic_client: Initialized SemanticSearchClient instance + + Returns: + Utility tool for searching relevant tools using semantic search + """ + from stackone_ai.semantic_search import SemanticSearchClient # noqa: F811 + from stackone_ai.toolset import _normalize_action_name + + if not isinstance(semantic_client, SemanticSearchClient): + raise TypeError("semantic_client must be a SemanticSearchClient instance") + + name = "tool_search" + description = ( + "Searches for relevant tools based on a natural language query using " + "semantic vector search. Call this first to discover " + "available tools before executing them." + ) + + parameters = ToolParameters( + type="object", + properties={ + "query": { + "type": "string", + "description": ( + "Natural language query describing what tools you need " + '(e.g., "onboard a new team member", "request vacation days")' + ), + }, + "limit": { + "type": "number", + "description": "Maximum number of tools to return (default: 5)", + "default": 5, + "nullable": True, + }, + "minScore": { + "type": "number", + "description": "Minimum similarity score (0-1) to filter results (default: 0.0)", + "default": 0.0, + "nullable": True, + }, + "connector": { + "type": "string", + "description": "Optional: filter by connector/provider (e.g., 'bamboohr', 'slack')", + "nullable": True, + }, + }, + ) + + def execute_search(arguments: str | JsonDict | None = None) -> JsonDict: + """Execute the semantic search tool""" + if isinstance(arguments, str): + kwargs = json.loads(arguments) + else: + kwargs = arguments or {} + + query = kwargs.get("query", "") + limit = int(kwargs["limit"]) if kwargs.get("limit") is not None else 5 + min_score = float(kwargs["minScore"]) if kwargs.get("minScore") is not None else 0.0 + connector = kwargs.get("connector") + + response = semantic_client.search( + query=query, + connector=connector, + top_k=limit, + ) + + seen: set[str] = set() + tools_data: list[dict[str, object]] = [] + for r in response.results: + if r.similarity_score >= min_score: + norm_name = _normalize_action_name(r.action_name) + if norm_name not in seen: + seen.add(norm_name) + tools_data.append( + { + "name": norm_name, + "description": r.description, + "score": r.similarity_score, + "connector": r.connector_key, + } + ) + + return {"tools": tools_data[:limit]} + + execute_config = ExecuteConfig( + name=name, + method="POST", + url="", # Utility tools don't make HTTP requests + headers={}, + ) + + class SemanticToolSearchTool(StackOneTool): + """Utility tool for searching relevant tools using semantic search""" + + def __init__(self) -> None: + super().__init__( + description=description, + parameters=parameters, + _execute_config=execute_config, + _api_key="", # Utility tools don't need API key + _account_id=None, + ) + + def execute( + self, arguments: str | JsonDict | None = None, *, options: JsonDict | None = None + ) -> JsonDict: + return execute_search(arguments) + + return SemanticToolSearchTool() + + def create_tool_execute(tools_collection: Tools) -> StackOneTool: """Create the tool_execute tool diff --git a/tests/test_semantic_search.py b/tests/test_semantic_search.py new file mode 100644 index 0000000..913a41c --- /dev/null +++ b/tests/test_semantic_search.py @@ -0,0 +1,1061 @@ +"""Tests for semantic search client and integration.""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import httpx +import pytest + +from stackone_ai.semantic_search import ( + SemanticSearchClient, + SemanticSearchError, + SemanticSearchResponse, + SemanticSearchResult, +) + + +class TestSemanticSearchResult: + """Tests for SemanticSearchResult model.""" + + def test_create_result(self) -> None: + """Test creating a search result.""" + result = SemanticSearchResult( + action_name="bamboohr_create_employee", + connector_key="bamboohr", + similarity_score=0.92, + label="Create Employee", + description="Creates a new employee in BambooHR", + ) + + assert result.action_name == "bamboohr_create_employee" + assert result.connector_key == "bamboohr" + assert result.similarity_score == 0.92 + assert result.label == "Create Employee" + assert result.description == "Creates a new employee in BambooHR" + + +class TestSemanticSearchResponse: + """Tests for SemanticSearchResponse model.""" + + def test_create_response(self) -> None: + """Test creating a search response.""" + results = [ + SemanticSearchResult( + action_name="bamboohr_create_employee", + connector_key="bamboohr", + similarity_score=0.92, + label="Create Employee", + description="Creates a new employee", + ), + SemanticSearchResult( + action_name="hibob_create_employee", + connector_key="hibob", + similarity_score=0.85, + label="Create Employee", + description="Creates a new employee", + ), + ] + response = SemanticSearchResponse( + results=results, + total_count=2, + query="create employee", + ) + + assert len(response.results) == 2 + assert response.total_count == 2 + assert response.query == "create employee" + + +class TestSemanticSearchClient: + """Tests for SemanticSearchClient.""" + + def test_init(self) -> None: + """Test client initialization.""" + client = SemanticSearchClient(api_key="test-key") + + assert client.api_key == "test-key" + assert client.base_url == "https://api.stackone.com" + assert client.timeout == 30.0 + + def test_init_custom_base_url(self) -> None: + """Test client initialization with custom base URL.""" + client = SemanticSearchClient( + api_key="test-key", + base_url="https://custom.api.com/", + ) + + assert client.base_url == "https://custom.api.com" # Trailing slash stripped + + def test_build_auth_header(self) -> None: + """Test building the authorization header.""" + client = SemanticSearchClient(api_key="test-key") + header = client._build_auth_header() + + # test-key: encoded in base64 = dGVzdC1rZXk6 + assert header == "Basic dGVzdC1rZXk6" + + @patch("httpx.post") + def test_search_success(self, mock_post: MagicMock) -> None: + """Test successful search request.""" + mock_response = MagicMock() + mock_response.json.return_value = { + "results": [ + { + "action_name": "bamboohr_create_employee", + "connector_key": "bamboohr", + "similarity_score": 0.92, + "label": "Create Employee", + "description": "Creates a new employee", + } + ], + "total_count": 1, + "query": "create employee", + } + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + client = SemanticSearchClient(api_key="test-key") + response = client.search("create employee", top_k=5) + + assert len(response.results) == 1 + assert response.results[0].action_name == "bamboohr_create_employee" + assert response.total_count == 1 + assert response.query == "create employee" + + # Verify request was made correctly + mock_post.assert_called_once() + call_kwargs = mock_post.call_args + assert call_kwargs.kwargs["json"] == {"query": "create employee", "top_k": 5} + assert "Authorization" in call_kwargs.kwargs["headers"] + + @patch("httpx.post") + def test_search_with_connector(self, mock_post: MagicMock) -> None: + """Test search with connector filter.""" + mock_response = MagicMock() + mock_response.json.return_value = { + "results": [], + "total_count": 0, + "query": "create employee", + } + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + client = SemanticSearchClient(api_key="test-key") + client.search("create employee", connector="bamboohr", top_k=10) + + call_kwargs = mock_post.call_args + assert call_kwargs.kwargs["json"] == { + "query": "create employee", + "connector": "bamboohr", + "top_k": 10, + } + + @patch("httpx.post") + def test_search_http_error(self, mock_post: MagicMock) -> None: + """Test search with HTTP error.""" + mock_response = MagicMock() + mock_response.status_code = 401 + mock_response.text = "Unauthorized" + mock_post.return_value = mock_response + mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( + "Unauthorized", + request=MagicMock(), + response=mock_response, + ) + + client = SemanticSearchClient(api_key="invalid-key") + + with pytest.raises(SemanticSearchError) as exc_info: + client.search("create employee") + + assert "API error: 401" in str(exc_info.value) + + @patch("httpx.post") + def test_search_request_error(self, mock_post: MagicMock) -> None: + """Test search with request error.""" + mock_post.side_effect = httpx.RequestError("Connection failed") + + client = SemanticSearchClient(api_key="test-key") + + with pytest.raises(SemanticSearchError) as exc_info: + client.search("create employee") + + assert "Request failed" in str(exc_info.value) + + @patch("httpx.post") + def test_search_action_names(self, mock_post: MagicMock) -> None: + """Test search_action_names convenience method.""" + mock_response = MagicMock() + mock_response.json.return_value = { + "results": [ + { + "action_name": "bamboohr_create_employee", + "connector_key": "bamboohr", + "similarity_score": 0.92, + "label": "Create Employee", + "description": "Creates a new employee", + }, + { + "action_name": "hibob_create_employee", + "connector_key": "hibob", + "similarity_score": 0.45, + "label": "Create Employee", + "description": "Creates a new employee", + }, + ], + "total_count": 2, + "query": "create employee", + } + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + client = SemanticSearchClient(api_key="test-key") + + # Without min_score filter + names = client.search_action_names("create employee") + assert len(names) == 2 + assert "bamboohr_create_employee" in names + assert "hibob_create_employee" in names + + # With min_score filter + names = client.search_action_names("create employee", min_score=0.5) + assert len(names) == 1 + assert "bamboohr_create_employee" in names + + +class TestSemanticSearchIntegration: + """Integration tests for semantic search with toolset.""" + + def test_toolset_semantic_client_lazy_init(self) -> None: + """Test that semantic_client is lazily initialized.""" + from stackone_ai import StackOneToolSet + + toolset = StackOneToolSet(api_key="test-key") + + # Access semantic_client + client = toolset.semantic_client + assert isinstance(client, SemanticSearchClient) + assert client.api_key == "test-key" + + # Same instance on second access + assert toolset.semantic_client is client + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_toolset_search_tools( + self, + mock_fetch: MagicMock, + mock_search: MagicMock, + ) -> None: + """Test toolset.search_tools() method with connector filtering.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + # Mock semantic search to return versioned API names (including some for unavailable connectors) + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="bamboohr_1.0.0_bamboohr_create_employee_global", + connector_key="bamboohr", + similarity_score=0.95, + label="Create Employee", + description="Creates a new employee", + ), + SemanticSearchResult( + action_name="workday_1.0.0_workday_create_worker_global", + connector_key="workday", # User doesn't have this connector + similarity_score=0.90, + label="Create Worker", + description="Creates a new worker", + ), + SemanticSearchResult( + action_name="hibob_1.0.0_hibob_create_employee_global", + connector_key="hibob", + similarity_score=0.85, + label="Create Employee", + description="Creates a new employee", + ), + ], + total_count=3, + query="create employee", + ) + + # Mock MCP fetch to return only bamboohr and hibob tools (user's linked accounts) + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates a new employee", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="hibob_create_employee", + description="Creates a new employee", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="bamboohr_list_employees", + description="Lists employees", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + tools = toolset.search_tools("create employee", top_k=5) + + # Should only return tools for available connectors (bamboohr, hibob) + # workday_create_worker should be filtered out + assert len(tools) == 2 + tool_names = [t.name for t in tools] + assert "bamboohr_create_employee" in tool_names + assert "hibob_create_employee" in tool_names + assert "workday_create_worker" not in tool_names # Filtered out - connector not available + + # Results should be sorted by semantic score + assert tools[0].name == "bamboohr_create_employee" # score 0.95 + assert tools[1].name == "hibob_create_employee" # score 0.85 + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_toolset_search_tools_fallback( + self, + mock_fetch: MagicMock, + mock_search: MagicMock, + ) -> None: + """Test search_tools() fallback when semantic search fails.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + # Semantic search raises an error to trigger fallback + mock_search.side_effect = SemanticSearchError("API unavailable") + + # Mock MCP fetch to return tools from multiple connectors + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates a new employee in BambooHR", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="bamboohr_list_employees", + description="Lists all employees in BambooHR", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="workday_create_worker", + description="Creates a new worker in Workday", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + tools = toolset.search_tools("create employee", top_k=5, fallback_to_local=True) + + # Should return results from the local BM25+TF-IDF fallback + assert len(tools) > 0 + tool_names = [t.name for t in tools] + # Should only include tools for available connectors (bamboohr, workday) + for name in tool_names: + connector = name.split("_")[0] + assert connector in {"bamboohr", "workday"} + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_toolset_search_tools_fallback_respects_connector( + self, + mock_fetch: MagicMock, + mock_search: MagicMock, + ) -> None: + """Test BM25 fallback filters to the requested connector.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + mock_search.side_effect = SemanticSearchError("API unavailable") + + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates a new employee in BambooHR", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="bamboohr_list_employees", + description="Lists all employees in BambooHR", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="workday_create_worker", + description="Creates a new worker in Workday", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + tools = toolset.search_tools("create employee", connector="bamboohr", fallback_to_local=True) + + assert len(tools) > 0 + tool_names = [t.name for t in tools] + for name in tool_names: + assert name.split("_")[0] == "bamboohr" + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_toolset_search_tools_fallback_disabled( + self, + mock_fetch: MagicMock, + mock_search: MagicMock, + ) -> None: + """Test search_tools() raises when fallback is disabled.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + mock_search.side_effect = SemanticSearchError("API unavailable") + # Must provide tools so the flow reaches the semantic search call + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates a new employee", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + with pytest.raises(SemanticSearchError): + toolset.search_tools("create employee", fallback_to_local=False) + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_toolset_search_action_names( + self, + mock_fetch: MagicMock, + mock_search: MagicMock, + ) -> None: + """Test toolset.search_action_names() method.""" + from stackone_ai import StackOneToolSet + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="bamboohr_1.0.0_bamboohr_create_employee_global", + connector_key="bamboohr", + similarity_score=0.92, + label="Create Employee", + description="Creates a new employee", + ), + SemanticSearchResult( + action_name="hibob_1.0.0_hibob_create_employee_global", + connector_key="hibob", + similarity_score=0.45, + label="Create Employee", + description="Creates a new employee", + ), + ], + total_count=2, + query="create employee", + ) + + toolset = StackOneToolSet(api_key="test-key") + results = toolset.search_action_names("create employee", min_score=0.5) + + # Should filter by min_score and normalize action names + assert len(results) == 1 + assert results[0].action_name == "bamboohr_create_employee" + + def test_utility_tools_semantic_search(self) -> None: + """Test utility_tools with semantic search.""" + from stackone_ai.models import StackOneTool, Tools + + # Create a mock tools collection + tool = MagicMock(spec=StackOneTool) + tool.name = "test_tool" + tool.description = "Test tool" + tools = Tools([tool]) + + # Without semantic search - should use local search + # Patch ToolIndex in utility_tools module where it's imported + with ( + patch("stackone_ai.utility_tools.ToolIndex"), + patch("stackone_ai.utility_tools.create_tool_search") as mock_create_search, + patch("stackone_ai.utility_tools.create_tool_execute") as mock_create_execute, + ): + mock_search_tool = MagicMock(spec=StackOneTool) + mock_search_tool.name = "tool_search" + mock_execute_tool = MagicMock(spec=StackOneTool) + mock_execute_tool.name = "tool_execute" + mock_create_search.return_value = mock_search_tool + mock_create_execute.return_value = mock_execute_tool + utility = tools.utility_tools() + assert len(utility) == 2 # tool_search + tool_execute + + # With semantic search - presence of semantic_client enables it + mock_client = MagicMock(spec=SemanticSearchClient) + with ( + patch("stackone_ai.utility_tools.create_semantic_tool_search") as mock_create, + patch("stackone_ai.utility_tools.create_tool_execute") as mock_create_execute, + ): + mock_search_tool = MagicMock(spec=StackOneTool) + mock_search_tool.name = "tool_search" + mock_execute_tool = MagicMock(spec=StackOneTool) + mock_execute_tool.name = "tool_execute" + mock_create.return_value = mock_search_tool + mock_create_execute.return_value = mock_execute_tool + utility = tools.utility_tools(semantic_client=mock_client) + assert len(utility) == 2 + mock_create.assert_called_once_with(mock_client) + + +class TestSemanticToolSearch: + """Tests for create_semantic_tool_search utility.""" + + def test_create_semantic_tool_search_type_error(self) -> None: + """Test that invalid client raises TypeError.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + with pytest.raises(TypeError) as exc_info: + create_semantic_tool_search("not a client") # type: ignore + + assert "SemanticSearchClient instance" in str(exc_info.value) + + @patch.object(SemanticSearchClient, "search") + def test_semantic_tool_search_execute(self, mock_search: MagicMock) -> None: + """Test executing semantic tool search.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="bamboohr_1.0.0_bamboohr_create_employee_global", + connector_key="bamboohr", + similarity_score=0.92, + label="Create Employee", + description="Creates a new employee", + ), + ], + total_count=1, + query="create employee", + ) + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client) + + result = tool.execute({"query": "create employee", "limit": 5}) + + assert "tools" in result + assert len(result["tools"]) == 1 + # Name should be normalized from versioned API format to MCP format + assert result["tools"][0]["name"] == "bamboohr_create_employee" + assert result["tools"][0]["score"] == 0.92 + assert result["tools"][0]["connector"] == "bamboohr" + + @patch.object(SemanticSearchClient, "search") + def test_semantic_tool_search_with_min_score(self, mock_search: MagicMock) -> None: + """Test semantic tool search with min_score filter.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="high_score_action", + connector_key="test", + similarity_score=0.9, + label="High Score", + description="High scoring action", + ), + SemanticSearchResult( + action_name="low_score_action", + connector_key="test", + similarity_score=0.3, + label="Low Score", + description="Low scoring action", + ), + ], + total_count=2, + query="test", + ) + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client) + + result = tool.execute({"query": "test", "limit": 10, "minScore": 0.5}) + + assert len(result["tools"]) == 1 + assert result["tools"][0]["name"] == "high_score_action" + + @patch.object(SemanticSearchClient, "search") + def test_semantic_tool_search_with_connector(self, mock_search: MagicMock) -> None: + """Test semantic tool search with connector filter.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + mock_search.return_value = SemanticSearchResponse( + results=[], + total_count=0, + query="create employee", + ) + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client) + + tool.execute({"query": "create employee", "connector": "bamboohr"}) + + mock_search.assert_called_once_with( + query="create employee", + connector="bamboohr", + top_k=5, # default limit + ) + + def test_semantic_tool_search_has_correct_parameters(self) -> None: + """Test that semantic tool has the expected parameter schema.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client) + + assert tool.name == "tool_search" + assert "semantic" in tool.description.lower() + + props = tool.parameters.properties + assert "query" in props + assert "limit" in props + assert "minScore" in props + assert "connector" in props + + +class TestConnectorProperty: + """Tests for StackOneTool.connector property.""" + + def test_connector_extracts_from_name(self) -> None: + """Test that connector is extracted from tool name.""" + from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters + + execute_config = ExecuteConfig( + name="bamboohr_create_employee", + method="POST", + url="https://api.example.com", + headers={}, + ) + tool = StackOneTool( + description="Creates employee", + parameters=ToolParameters(type="object", properties={}), + _execute_config=execute_config, + _api_key="test-key", + ) + + assert tool.connector == "bamboohr" + + def test_connector_is_lowercase(self) -> None: + """Test that connector is always lowercase.""" + from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters + + execute_config = ExecuteConfig( + name="BambooHR_Create_Employee", + method="POST", + url="https://api.example.com", + headers={}, + ) + tool = StackOneTool( + description="Creates employee", + parameters=ToolParameters(type="object", properties={}), + _execute_config=execute_config, + _api_key="test-key", + ) + + assert tool.connector == "bamboohr" + + def test_connector_with_single_word_name(self) -> None: + """Test connector extraction with single-word tool name.""" + from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters + + execute_config = ExecuteConfig( + name="utility", + method="POST", + url="https://api.example.com", + headers={}, + ) + tool = StackOneTool( + description="Utility tool", + parameters=ToolParameters(type="object", properties={}), + _execute_config=execute_config, + _api_key="test-key", + ) + + assert tool.connector == "utility" + + +class TestToolsConnectorHelpers: + """Tests for Tools.get_connectors().""" + + def test_get_connectors(self) -> None: + """Test getting unique connectors from tools collection.""" + from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters, Tools + + def make_tool(name: str) -> StackOneTool: + return StackOneTool( + description=f"Tool {name}", + parameters=ToolParameters(type="object", properties={}), + _execute_config=ExecuteConfig(name=name, method="POST", url="", headers={}), + _api_key="test-key", + ) + + tools = Tools( + [ + make_tool("bamboohr_create_employee"), + make_tool("bamboohr_list_employees"), + make_tool("hibob_create_employee"), + make_tool("slack_send_message"), + ] + ) + + connectors = tools.get_connectors() + + assert connectors == {"bamboohr", "hibob", "slack"} + + def test_get_connectors_empty(self) -> None: + """Test get_connectors with empty tools collection.""" + from stackone_ai.models import Tools + + tools = Tools([]) + assert tools.get_connectors() == set() + + +class TestSearchActionNamesWithAccountIds: + """Tests for search_action_names with account_ids parameter.""" + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_filters_by_account_connectors(self, mock_fetch: MagicMock, mock_search: MagicMock) -> None: + """Test that results are filtered to connectors available in linked accounts.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="bamboohr_1.0.0_bamboohr_create_employee_global", + connector_key="bamboohr", + similarity_score=0.95, + label="Create Employee", + description="Creates employee", + ), + SemanticSearchResult( + action_name="workday_1.0.0_workday_create_worker_global", + connector_key="workday", + similarity_score=0.90, + label="Create Worker", + description="Creates worker", + ), + SemanticSearchResult( + action_name="hibob_1.0.0_hibob_create_employee_global", + connector_key="hibob", + similarity_score=0.85, + label="Create Employee", + description="Creates employee", + ), + ], + total_count=3, + query="create employee", + ) + + # Mock MCP to return only bamboohr and hibob tools (user's linked accounts) + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates employee", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="hibob_create_employee", + description="Creates employee", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + results = toolset.search_action_names( + "create employee", + account_ids=["acc-123"], + top_k=10, + ) + + # workday should be filtered out (not in linked accounts) + # Names should be normalized from versioned API format + assert len(results) == 2 + action_names = [r.action_name for r in results] + assert "bamboohr_create_employee" in action_names + assert "hibob_create_employee" in action_names + assert "workday_create_worker" not in action_names + + @patch.object(SemanticSearchClient, "search") + def test_search_action_names_returns_empty_on_failure(self, mock_search: MagicMock) -> None: + """Test that search_action_names returns [] when semantic search fails.""" + from stackone_ai import StackOneToolSet + + mock_search.side_effect = SemanticSearchError("API unavailable") + + toolset = StackOneToolSet(api_key="test-key") + results = toolset.search_action_names("create employee") + + assert results == [] + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_fetches_max_then_falls_back_per_connector( + self, mock_fetch: MagicMock, mock_search: MagicMock + ) -> None: + """Test that API fetches max results first, then per-connector if not enough.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + mock_search.return_value = SemanticSearchResponse( + results=[], + total_count=0, + query="test", + ) + + # Mock MCP to return a bamboohr tool + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_list_employees", + description="Lists employees", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + toolset.search_action_names( + "test", + account_ids=["acc-123"], + top_k=5, + ) + + # First call: passes user's top_k to backend + # Second call: per-connector fallback for "bamboohr" since first returned nothing + assert mock_search.call_count == 2 + first_call = mock_search.call_args_list[0].kwargs + assert first_call["top_k"] == 5 + assert first_call["connector"] is None + second_call = mock_search.call_args_list[1].kwargs + assert second_call["connector"] == "bamboohr" + assert second_call["top_k"] == 5 + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_respects_top_k_after_filtering(self, mock_fetch: MagicMock, mock_search: MagicMock) -> None: + """Test that results are limited to top_k after filtering.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + # Return more results than top_k using versioned API names + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name=f"bamboohr_1.0.0_bamboohr_action_{i}_global", + connector_key="bamboohr", + similarity_score=0.9 - i * 0.1, + label=f"Action {i}", + description=f"Action {i}", + ) + for i in range(10) + ], + total_count=10, + query="test", + ) + + # Mock MCP to return bamboohr tools + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_action_0", + description="Action 0", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + results = toolset.search_action_names( + "test", + account_ids=["acc-123"], + top_k=3, + ) + + # Should be limited to top_k after normalization + assert len(results) == 3 + # Names should be normalized + assert results[0].action_name == "bamboohr_action_0" + + +class TestNormalizeActionName: + """Tests for _normalize_action_name() function.""" + + def test_versioned_name_is_normalized(self) -> None: + """Test that versioned API names are normalized to MCP format.""" + from stackone_ai.toolset import _normalize_action_name + + assert ( + _normalize_action_name("calendly_1.0.0_calendly_create_scheduling_link_global") + == "calendly_create_scheduling_link" + ) + + def test_multi_segment_version(self) -> None: + """Test normalization with multi-segment semver.""" + from stackone_ai.toolset import _normalize_action_name + + assert ( + _normalize_action_name("breathehr_1.0.1_breathehr_list_employees_global") + == "breathehr_list_employees" + ) + + def test_already_normalized_name_unchanged(self) -> None: + """Test that MCP-format names pass through unchanged.""" + from stackone_ai.toolset import _normalize_action_name + + assert _normalize_action_name("bamboohr_create_employee") == "bamboohr_create_employee" + + def test_non_matching_name_unchanged(self) -> None: + """Test that names that don't match the pattern pass through unchanged.""" + from stackone_ai.toolset import _normalize_action_name + + assert _normalize_action_name("some_random_tool") == "some_random_tool" + + def test_empty_string(self) -> None: + """Test empty string input.""" + from stackone_ai.toolset import _normalize_action_name + + assert _normalize_action_name("") == "" + + def test_multiple_versions_normalize_to_same(self) -> None: + """Test that different versions of the same action normalize identically.""" + from stackone_ai.toolset import _normalize_action_name + + name_v1 = _normalize_action_name("breathehr_1.0.0_breathehr_list_employees_global") + name_v2 = _normalize_action_name("breathehr_1.0.1_breathehr_list_employees_global") + assert name_v1 == name_v2 == "breathehr_list_employees" + + +class TestSemanticSearchDeduplication: + """Tests for deduplication after name normalization.""" + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_search_tools_deduplicates_versions(self, mock_fetch: MagicMock, mock_search: MagicMock) -> None: + """Test that search_tools deduplicates multiple API versions of the same action.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="breathehr_1.0.0_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.95, + label="List Employees", + description="Lists employees", + ), + SemanticSearchResult( + action_name="breathehr_1.0.1_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.90, + label="List Employees v2", + description="Lists employees v2", + ), + SemanticSearchResult( + action_name="bamboohr_1.0.0_bamboohr_create_employee_global", + connector_key="bamboohr", + similarity_score=0.85, + label="Create Employee", + description="Creates employee", + ), + ], + total_count=3, + query="list employees", + ) + + mock_fetch.return_value = [ + _McpToolDefinition( + name="breathehr_list_employees", + description="Lists employees", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates employee", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + tools = toolset.search_tools("list employees", top_k=5) + + # Should deduplicate: both breathehr versions -> breathehr_list_employees + tool_names = [t.name for t in tools] + assert tool_names.count("breathehr_list_employees") == 1 + assert "bamboohr_create_employee" in tool_names + assert len(tools) == 2 + + @patch.object(SemanticSearchClient, "search") + def test_search_action_names_deduplicates_versions(self, mock_search: MagicMock) -> None: + """Test that search_action_names deduplicates multiple API versions.""" + from stackone_ai import StackOneToolSet + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="breathehr_1.0.0_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.95, + label="List Employees", + description="Lists employees", + ), + SemanticSearchResult( + action_name="breathehr_1.0.1_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.90, + label="List Employees v2", + description="Lists employees v2", + ), + ], + total_count=2, + query="list employees", + ) + + toolset = StackOneToolSet(api_key="test-key") + results = toolset.search_action_names("list employees", top_k=5) + + # Should deduplicate: only one result for breathehr_list_employees + assert len(results) == 1 + assert results[0].action_name == "breathehr_list_employees" + # Should keep the highest score (first seen, already sorted by score) + assert results[0].similarity_score == 0.95 + + @patch.object(SemanticSearchClient, "search") + def test_semantic_tool_search_deduplicates_versions(self, mock_search: MagicMock) -> None: + """Test that create_semantic_tool_search deduplicates API versions.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="breathehr_1.0.0_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.95, + label="List Employees", + description="Lists employees", + ), + SemanticSearchResult( + action_name="breathehr_1.0.1_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.90, + label="List Employees v2", + description="Lists employees v2", + ), + ], + total_count=2, + query="list employees", + ) + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client) + result = tool.execute({"query": "list employees", "limit": 10}) + + # Should deduplicate: only one result + assert len(result["tools"]) == 1 + assert result["tools"][0]["name"] == "breathehr_list_employees" + assert result["tools"][0]["score"] == 0.95