diff --git a/README.md b/README.md index 7e26dbd..e7ffbf5 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ StackOne AI provides a unified interface for accessing various SaaS tools throug - Glob pattern filtering with patterns like `"hris_*"` and exclusions `"!hris_delete_*"` - Provider and action filtering - Multi-account support +- **Semantic Search**: AI-powered tool discovery using natural language queries - **Utility Tools** (Beta): Dynamic tool discovery and execution based on natural language queries - Integration with popular AI frameworks: - OpenAI Functions @@ -325,6 +326,25 @@ execute_tool = utility_tools.get_tool("tool_execute") result = execute_tool.call(toolName="hris_list_employees", params={"limit": 10}) ``` +## Semantic Search + +Discover tools using natural language instead of exact names. Queries like "onboard new hire" resolve to the right actions even when the tool is called `hris_create_employee`. + +```python +from stackone_ai import StackOneToolSet + +toolset = StackOneToolSet() + +# Search by intent — returns Tools collection ready for any framework +tools = toolset.search_tools("manage employee records", account_ids=["your-account-id"], top_k=5) +openai_tools = tools.to_openai() + +# Lightweight: inspect results without fetching full tool definitions +results = toolset.search_action_names("time off requests", top_k=5) +``` + +Results are automatically scoped to connectors in your linked accounts. See [Semantic Search Example](examples/semantic_search_example.py) for utility tools integration, OpenAI, and LangChain patterns. + ## Examples For more examples, check out the [examples/](examples/) directory: @@ -335,6 +355,7 @@ For more examples, check out the [examples/](examples/) directory: - [LangChain Integration](examples/langchain_integration.py) - [CrewAI Integration](examples/crewai_integration.py) - [Utility Tools](examples/utility_tools_example.py) +- [Semantic Search](examples/semantic_search_example.py) ## Development diff --git a/examples/semantic_search_example.py b/examples/semantic_search_example.py new file mode 100644 index 0000000..8842de6 --- /dev/null +++ b/examples/semantic_search_example.py @@ -0,0 +1,413 @@ +#!/usr/bin/env python +""" +Example demonstrating semantic search for AI-powered tool discovery. + +Semantic search understands natural language intent and synonyms, so queries like +"book a meeting" or "cancel an event" resolve to the right StackOne actions — +unlike keyword matching which requires exact tool names. + +This example uses a Calendly-linked account to demonstrate how semantic search +discovers scheduling, event, and organization management tools from natural +language queries. + + +How Semantic Search Works (Overview) +===================================== + +The SDK provides three paths for semantic tool discovery, each with a different +trade-off between speed, filtering, and completeness: + +1. search_tools(query) — Full discovery (recommended for agent frameworks) + + This is the method you should use when integrating with OpenAI, LangChain, + CrewAI, or any other agent framework. It works in these steps: + + a) Fetch tools from the user's linked accounts via MCP + b) Extract available connectors (e.g. {bamboohr, calendly}) + c) Search EACH connector in parallel via the semantic search API + d) Collect results, sort by relevance score + e) If top_k was specified, keep only the top K results + f) Match results back to the fetched tool definitions + g) Return a Tools collection sorted by relevance score + + Key point: only the user's own connectors are searched — no wasted results + from connectors the user doesn't have. When top_k is not specified, the + backend decides how many results to return per connector. If the semantic + API is unavailable, the SDK falls back to local BM25+TF-IDF search + automatically. + +2. search_action_names(query) — Lightweight preview + + Queries the semantic API directly and returns metadata (name, connector, + score, description) without fetching full tool definitions. Useful for + inspecting results before committing to a full fetch. When account_ids are + provided, each connector is searched in parallel (same as search_tools). + +3. utility_tools() — Agent-loop pattern + + Creates tool_search and tool_execute utility tools that agents can call + inside an agentic loop. Pass semantic_client=toolset.semantic_client to + enable cloud-based semantic search; without it, local BM25+TF-IDF is + used. When created via utility_tools(), tool_search is automatically + scoped to the user's linked connectors. The agent searches, inspects, + and executes tools dynamically. + + +This example is runnable with the following command: +```bash +uv run examples/semantic_search_example.py +``` + +Prerequisites: +- STACKONE_API_KEY environment variable set +- STACKONE_ACCOUNT_ID environment variable set (required for examples that fetch tools) +- At least one linked account in StackOne (this example uses Calendly) + +Note: search_action_names() works with just STACKONE_API_KEY — no account ID needed. +""" + +import logging +import os + +from dotenv import load_dotenv + +from stackone_ai import StackOneToolSet + +load_dotenv() + +# Show SDK warnings (e.g., semantic search fallback to local search) +logging.basicConfig(level=logging.WARNING) + +# Read account IDs from environment — supports comma-separated values +_account_ids = [aid.strip() for aid in os.getenv("STACKONE_ACCOUNT_ID", "").split(",") if aid.strip()] + + +def example_search_action_names(): + """Lightweight search returning action names and scores without fetching tools. + + search_action_names() queries the semantic search API directly — it does NOT + need account IDs or MCP. This makes it the simplest way to try semantic search. + + When called without account_ids, results come from the full StackOne catalog + (all connectors). When called with account_ids, results are filtered to only + connectors available in your linked accounts. + """ + print("=" * 60) + print("Example 1: search_action_names() — lightweight discovery") + print("=" * 60) + print() + print("This searches the StackOne action catalog using semantic vectors.") + print("No account ID needed — results come from all available connectors.") + print() + + toolset = StackOneToolSet() + + query = "get user schedule" + + # --- top_k behavior --- + # When top_k is NOT specified, the backend decides how many results to return. + # When top_k IS specified, results are explicitly limited to that number. + print(f'Searching for: "{query}" (no top_k — backend decides count)') + results_default = toolset.search_action_names(query) + print(f" Backend returned {len(results_default)} results (its default)") + print() + + print(f'Searching for: "{query}" (top_k=3 — explicitly limited)') + results_limited = toolset.search_action_names(query, top_k=3) + print(f" Got exactly {len(results_limited)} results") + print() + + # Show the limited results + print(f"Top {len(results_limited)} matches from the full catalog:") + for r in results_limited: + print(f" [{r.similarity_score:.2f}] {r.action_name} ({r.connector_key})") + print(f" {r.description}") + print() + + # Show filtering effect when account_ids are available + if _account_ids: + print(f"Now filtering to your linked accounts ({', '.join(_account_ids)})...") + print(" (Each connector is searched in parallel — only your connectors are queried)") + filtered = toolset.search_action_names(query, account_ids=_account_ids, top_k=5) + print(f" Filtered to {len(filtered)} matches (only your connectors):") + for r in filtered: + print(f" [{r.similarity_score:.2f}] {r.action_name} ({r.connector_key})") + else: + print("Tip: Set STACKONE_ACCOUNT_ID to see results filtered to your linked connectors.") + + print() + + +def example_search_tools(): + """High-level semantic search returning a Tools collection. + + search_tools() is the recommended way to use semantic search. It: + 1. Fetches tool definitions from your linked accounts via MCP + 2. Searches each of your connectors in parallel via the semantic search API + 3. Sorts results by relevance and matches back to tool definitions + 4. Returns a Tools collection ready for any framework (.to_openai(), .to_langchain(), etc.) + """ + print("=" * 60) + print("Example 2: search_tools() — full tool discovery") + print("=" * 60) + print() + + toolset = StackOneToolSet() + + query = "cancel an event" + print(f'Step 1: Searching for "{query}" via semantic search...') + print() + + tools = toolset.search_tools(query, account_ids=_account_ids, top_k=5) + + connectors = tools.get_connectors() + print(f"Found {len(tools)} tools from your linked account(s) ({', '.join(sorted(connectors))}):") + for tool in tools: + print(f" - {tool.name}") + print(f" {tool.description}") + print() + + +def example_search_tools_with_connector(): + """Semantic search filtered by connector. + + Use the connector parameter to scope results to a specific provider, + for example when you know the user works with Calendly. + """ + print("=" * 60) + print("Example 3: search_tools() with connector filter") + print("=" * 60) + print() + + toolset = StackOneToolSet() + + query = "book a meeting" + connector = "calendly" + print(f'Searching for "{query}" filtered to connector="{connector}"...') + print() + + tools = toolset.search_tools( + query, + connector=connector, + account_ids=_account_ids, + top_k=3, + ) + + print(f"Found {len(tools)} {connector} tools:") + for tool in tools: + print(f" - {tool.name}") + print(f" {tool.description}") + print() + + +def example_utility_tools_semantic(): + """Using utility tools with semantic search for agent loops. + + Pass semantic_client=toolset.semantic_client to utility_tools() to enable + cloud-based semantic search. Without it, utility_tools() uses local + BM25+TF-IDF search instead. + + When created via utility_tools(), tool_search is automatically scoped to + the connectors available in your fetched tools collection. + """ + print("=" * 60) + print("Example 4: Utility tools with semantic search") + print("=" * 60) + print() + + toolset = StackOneToolSet() + + print("Step 1: Fetching tools from your linked accounts via MCP...") + tools = toolset.fetch_tools(account_ids=_account_ids) + print(f"Loaded {len(tools)} tools.") + print() + + print("Step 2: Creating utility tools with semantic search enabled...") + print(" Pass semantic_client=toolset.semantic_client to enable semantic search.") + utility = tools.utility_tools(semantic_client=toolset.semantic_client) + + search_tool = utility.get_tool("tool_search") + if search_tool: + query = "cancel an event or meeting" + print() + print(f'Step 3: Calling tool_search with query="{query}"...') + print(" (Searches are scoped to your linked connectors)") + print() + result = search_tool.call(query=query, limit=5) + tools_data = result.get("tools", []) + print(f"tool_search returned {len(tools_data)} results:") + for tool_info in tools_data: + print(f" [{tool_info['score']:.2f}] {tool_info['name']}") + print(f" {tool_info['description']}") + + print() + + +def example_openai_agent_loop(): + """Complete agent loop: semantic search -> LLM -> execute. + + This demonstrates the full pattern for building an AI agent that + discovers tools via semantic search and executes them via an LLM. + + Supports both OpenAI and Google Gemini (via its OpenAI-compatible API). + Set OPENAI_API_KEY for OpenAI, or GOOGLE_API_KEY for Gemini. + """ + print("=" * 60) + print("Example 5: LLM agent loop with semantic search") + print("=" * 60) + print() + + try: + from openai import OpenAI + except ImportError: + print("Skipped: OpenAI library not installed. Install with: pip install openai") + print() + return + + # Support both OpenAI and Gemini (via OpenAI-compatible endpoint) + openai_key = os.getenv("OPENAI_API_KEY") + google_key = os.getenv("GOOGLE_API_KEY") + + if openai_key: + client = OpenAI() + model = "gpt-4o-mini" + provider = "OpenAI" + elif google_key: + client = OpenAI( + api_key=google_key, + base_url="https://generativelanguage.googleapis.com/v1beta/openai/", + ) + model = "gemini-2.5-flash" + provider = "Gemini" + else: + print("Skipped: Set OPENAI_API_KEY or GOOGLE_API_KEY to run this example.") + print() + return + + print(f"Using {provider} ({model})") + print() + + toolset = StackOneToolSet() + + query = "list upcoming events" + print(f'Step 1: Discovering tools for "{query}" via semantic search...') + tools = toolset.search_tools(query, account_ids=_account_ids, top_k=3) + print(f"Found {len(tools)} tools:") + for tool in tools: + print(f" - {tool.name}") + print() + + print(f"Step 2: Sending tools to {provider} as function definitions...") + openai_tools = tools.to_openai() + + messages = [ + {"role": "system", "content": "You are a helpful scheduling assistant."}, + {"role": "user", "content": "Can you show me my upcoming events?"}, + ] + + response = client.chat.completions.create( + model=model, + messages=messages, + tools=openai_tools, + tool_choice="auto", + ) + + if response.choices[0].message.tool_calls: + print(f"Step 3: {provider} chose to call these tools:") + for tool_call in response.choices[0].message.tool_calls: + print(f" - {tool_call.function.name}({tool_call.function.arguments})") + + tool = tools.get_tool(tool_call.function.name) + if tool: + result = tool.execute(tool_call.function.arguments) + print( + f" Response keys: {list(result.keys()) if isinstance(result, dict) else type(result)}" + ) + else: + print(f"{provider} responded with text: {response.choices[0].message.content}") + + print() + + +def example_langchain_semantic(): + """Semantic search with LangChain tools. + + search_tools() returns a Tools collection that converts directly + to LangChain format — no extra steps needed. + """ + print("=" * 60) + print("Example 6: Semantic search with LangChain") + print("=" * 60) + print() + + try: + from langchain_core.tools import BaseTool # noqa: F401 + except ImportError: + print("Skipped: LangChain not installed. Install with: pip install langchain-core") + print() + return + + toolset = StackOneToolSet() + + query = "remove a user from the team" + print(f'Step 1: Searching for "{query}" via semantic search...') + tools = toolset.search_tools(query, account_ids=_account_ids, top_k=5) + print(f"Found {len(tools)} tools.") + print() + + print("Step 2: Converting to LangChain tools...") + langchain_tools = tools.to_langchain() + + print(f"Created {len(langchain_tools)} LangChain tools (ready for use with agents):") + for tool in langchain_tools: + print(f" - {tool.name} (type: {type(tool).__name__})") + print(f" {tool.description}") + + print() + + +def main(): + """Run all semantic search examples.""" + print() + print("############################################################") + print("# StackOne AI SDK — Semantic Search Examples #") + print("############################################################") + print() + + if not os.getenv("STACKONE_API_KEY"): + print("Set STACKONE_API_KEY to run these examples.") + return + + # --- Examples that work without account IDs --- + example_search_action_names() + + # --- Examples that require account IDs (MCP needs x-account-id) --- + if not _account_ids: + print("=" * 60) + print("Remaining examples require STACKONE_ACCOUNT_ID") + print("=" * 60) + print() + print("Set STACKONE_ACCOUNT_ID (comma-separated for multiple) to run") + print("examples that fetch full tool definitions from your linked accounts:") + print(" - search_tools() with natural language queries") + print(" - search_tools() with connector filter") + print(" - Utility tools with semantic search") + print(" - OpenAI agent loop") + print(" - LangChain integration") + return + + example_search_tools() + example_search_tools_with_connector() + example_utility_tools_semantic() + + # Framework integration patterns + example_openai_agent_loop() + example_langchain_semantic() + + print("############################################################") + print("# All examples completed! #") + print("############################################################") + + +if __name__ == "__main__": + main() diff --git a/examples/test_examples.py b/examples/test_examples.py index 45d631e..36fc7ba 100644 --- a/examples/test_examples.py +++ b/examples/test_examples.py @@ -31,6 +31,7 @@ def get_example_files() -> list[str]: "file_uploads.py": ["mcp"], "stackone_account_ids.py": ["mcp"], "utility_tools_example.py": ["mcp"], + "semantic_search_example.py": ["mcp"], "mcp_server.py": ["mcp"], } diff --git a/stackone_ai/__init__.py b/stackone_ai/__init__.py index f7a0aba..434e318 100644 --- a/stackone_ai/__init__.py +++ b/stackone_ai/__init__.py @@ -1,11 +1,22 @@ """StackOne AI SDK""" -from .models import StackOneTool, Tools -from .toolset import StackOneToolSet +from stackone_ai.models import StackOneTool, Tools +from stackone_ai.semantic_search import ( + SemanticSearchClient, + SemanticSearchError, + SemanticSearchResponse, + SemanticSearchResult, +) +from stackone_ai.toolset import StackOneToolSet __all__ = [ "StackOneToolSet", "StackOneTool", "Tools", + # Semantic search + "SemanticSearchClient", + "SemanticSearchResult", + "SemanticSearchResponse", + "SemanticSearchError", ] __version__ = "2.3.1" diff --git a/stackone_ai/models.py b/stackone_ai/models.py index fcd32d7..2e3ac02 100644 --- a/stackone_ai/models.py +++ b/stackone_ai/models.py @@ -13,6 +13,8 @@ from langchain_core.tools import BaseTool from pydantic import BaseModel, BeforeValidator, Field, PrivateAttr +from stackone_ai.semantic_search import SemanticSearchClient + # Type aliases for common types JsonDict: TypeAlias = dict[str, Any] Headers: TypeAlias = dict[str, str] @@ -98,6 +100,18 @@ class StackOneTool(BaseModel): "feedback_metadata", } + @property + def connector(self) -> str: + """Extract connector from tool name. + + Tool names follow the format: {connector}_{action}_{entity} + e.g., 'bamboohr_create_employee' -> 'bamboohr' + + Returns: + Connector name in lowercase + """ + return self.name.split("_")[0].lower() + def __init__( self, description: str, @@ -514,6 +528,19 @@ def get_account_id(self) -> str | None: return account_id return None + def get_connectors(self) -> set[str]: + """Get unique connector names from all tools. + + Returns: + Set of connector names (lowercase) + + Example: + tools = toolset.fetch_tools() + connectors = tools.get_connectors() + # {'bamboohr', 'hibob', 'slack', ...} + """ + return {tool.connector for tool in self.tools} + def to_openai(self) -> list[JsonDict]: """Convert all tools to OpenAI function format @@ -530,34 +557,54 @@ def to_langchain(self) -> Sequence[BaseTool]: """ return [tool.to_langchain() for tool in self.tools] - def utility_tools(self, hybrid_alpha: float | None = None) -> Tools: + def utility_tools( + self, + hybrid_alpha: float | None = None, + semantic_client: SemanticSearchClient | None = None, + ) -> Tools: """Return utility tools for tool discovery and execution - Utility tools enable dynamic tool discovery and execution based on natural language queries - using hybrid BM25 + TF-IDF search. + Utility tools enable dynamic tool discovery and execution based on natural language queries. + By default, uses local hybrid BM25 + TF-IDF search. When a semantic_client is provided, + uses cloud-based semantic search for higher accuracy on natural language queries. Args: - hybrid_alpha: Weight for BM25 in hybrid search (0-1). If not provided, uses - ToolIndex.DEFAULT_HYBRID_ALPHA (0.2), which gives more weight to BM25 scoring - and has been shown to provide better tool discovery accuracy - (10.8% improvement in validation testing). + hybrid_alpha: Weight for BM25 in hybrid search (0-1). Only used when + semantic_client is not provided. If not provided, uses DEFAULT_HYBRID_ALPHA (0.2), + which gives more weight to BM25 scoring. + semantic_client: Optional SemanticSearchClient instance. Pass + toolset.semantic_client to enable cloud-based semantic search. Returns: Tools collection containing tool_search and tool_execute Note: This feature is in beta and may change in future versions + + Example: + # Semantic search (pass semantic_client explicitly) + toolset = StackOneToolSet() + tools = toolset.fetch_tools() + utility = tools.utility_tools(semantic_client=toolset.semantic_client) + + # Local BM25+TF-IDF search (default, no semantic_client) + utility = tools.utility_tools() """ - from stackone_ai.utility_tools import ( - ToolIndex, - create_tool_execute, - create_tool_search, - ) + from stackone_ai.utility_tools import create_tool_execute - # Create search index with hybrid search - index = ToolIndex(self.tools, hybrid_alpha=hybrid_alpha) + if semantic_client is not None: + from stackone_ai.utility_tools import create_semantic_tool_search + + search_tool = create_semantic_tool_search( + semantic_client, available_connectors=self.get_connectors() + ) + execute_tool = create_tool_execute(self) + return Tools([search_tool, execute_tool]) - # Create utility tools + # Default: local BM25+TF-IDF search + from stackone_ai.utility_tools import ToolIndex, create_tool_search + + index = ToolIndex(self.tools, hybrid_alpha=hybrid_alpha) filter_tool = create_tool_search(index) execute_tool = create_tool_execute(self) diff --git a/stackone_ai/semantic_search.py b/stackone_ai/semantic_search.py new file mode 100644 index 0000000..2030a2b --- /dev/null +++ b/stackone_ai/semantic_search.py @@ -0,0 +1,210 @@ +"""Semantic search client for StackOne action search API. + +How Semantic Search Works +========================= + +The SDK provides three ways to discover tools using semantic search. +Each path trades off between speed, filtering, and completeness. + +1. ``search_tools(query)`` — Full tool discovery (recommended for agent frameworks) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is the primary method used when integrating with OpenAI, LangChain, or CrewAI. +The internal flow is: + +1. Fetch ALL tools from linked accounts via MCP (uses account_ids to scope the request) +2. Extract available connectors from the fetched tools (e.g. {bamboohr, hibob}) +3. Search EACH connector in parallel via the semantic search API (/actions/search) +4. Collect results, sort by relevance score, apply top_k if specified +5. Match semantic results back to the fetched tool definitions +6. Return Tools sorted by relevance score + +Key point: only the user's own connectors are searched — no wasted results +from connectors the user doesn't have. Tools are fetched first, semantic +search runs second, and only tools that exist in the user's linked +accounts AND match the semantic query are returned. This prevents +suggesting tools the user cannot execute. + +If the semantic API is unavailable, the SDK falls back to a local +BM25 + TF-IDF hybrid search over the fetched tools (unless +``fallback_to_local=False``). + + +2. ``search_action_names(query)`` — Lightweight discovery +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Queries the semantic API directly and returns action name metadata +(name, connector, score, description) **without** fetching full tool +definitions. This is useful for previewing results before committing +to a full fetch. + +When ``account_ids`` are provided, each connector is searched in +parallel (same as ``search_tools``). Without ``account_ids``, results +come from the full StackOne catalog. + + +3. ``utility_tools(semantic_client=...)`` — Agent-loop search + execute +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Creates a ``tool_search`` utility tool that agents can call inside a +loop. The agent searches for tools, inspects results, then calls +``tool_execute`` to run the chosen tool. When ``semantic_client`` is +passed, ``tool_search`` uses cloud-based semantic vectors instead of +local BM25 + TF-IDF. + +When created via ``utility_tools()``, ``tool_search`` is automatically +scoped to the user's linked connectors (extracted from the fetched tools). +""" + +from __future__ import annotations + +import base64 +from typing import Any + +import httpx +from pydantic import BaseModel + + +class SemanticSearchError(Exception): + """Raised when semantic search fails.""" + + pass + + +class SemanticSearchResult(BaseModel): + """Single result from semantic search API.""" + + action_name: str + connector_key: str + similarity_score: float + label: str + description: str + project_id: str = "global" + + +class SemanticSearchResponse(BaseModel): + """Response from /actions/search endpoint.""" + + results: list[SemanticSearchResult] + total_count: int + query: str + connector_filter: str | None = None + project_filter: str | None = None + + +class SemanticSearchClient: + """Client for StackOne semantic search API. + + This client provides access to the semantic search endpoint which uses + enhanced embeddings for higher accuracy than local BM25+TF-IDF search. + + Example: + client = SemanticSearchClient(api_key="sk-xxx") + response = client.search("create employee", connector="bamboohr", top_k=5) + for result in response.results: + print(f"{result.action_name}: {result.similarity_score:.2f}") + """ + + def __init__( + self, + api_key: str, + base_url: str = "https://api.stackone.com", + timeout: float = 30.0, + ) -> None: + """Initialize the semantic search client. + + Args: + api_key: StackOne API key + base_url: Base URL for API requests + timeout: Request timeout in seconds + """ + self.api_key = api_key + self.base_url = base_url.rstrip("/") + self.timeout = timeout + + def _build_auth_header(self) -> str: + """Build the Basic auth header.""" + token = base64.b64encode(f"{self.api_key}:".encode()).decode() + return f"Basic {token}" + + def search( + self, + query: str, + connector: str | None = None, + top_k: int | None = None, + project_id: str | None = None, + ) -> SemanticSearchResponse: + """Search for relevant actions using semantic search. + + Args: + query: Natural language query describing what tools/actions you need + connector: Optional connector/provider filter (e.g., "bamboohr", "slack") + top_k: Maximum number of results to return. If not provided, uses the backend default. + project_id: Optional project scope (e.g., "103/dev-56501"). When provided, + results include both global actions and project-specific actions. + + Returns: + SemanticSearchResponse containing matching actions with similarity scores + + Raises: + SemanticSearchError: If the API call fails + + Example: + response = client.search("onboard a new team member", top_k=5) + for result in response.results: + print(f"{result.action_name}: {result.similarity_score:.2f}") + """ + url = f"{self.base_url}/actions/search" + headers = { + "Authorization": self._build_auth_header(), + "Content-Type": "application/json", + } + payload: dict[str, Any] = {"query": query} + if top_k is not None: + payload["top_k"] = top_k + if connector: + payload["connector"] = connector + if project_id: + payload["project_id"] = project_id + + try: + response = httpx.post(url, json=payload, headers=headers, timeout=self.timeout) + response.raise_for_status() + data = response.json() + return SemanticSearchResponse(**data) + except httpx.HTTPStatusError as e: + raise SemanticSearchError(f"API error: {e.response.status_code} - {e.response.text}") from e + except httpx.RequestError as e: + raise SemanticSearchError(f"Request failed: {e}") from e + except Exception as e: + raise SemanticSearchError(f"Search failed: {e}") from e + + def search_action_names( + self, + query: str, + connector: str | None = None, + top_k: int | None = None, + min_score: float = 0.0, + project_id: str | None = None, + ) -> list[str]: + """Convenience method returning just action names. + + Args: + query: Natural language query + connector: Optional connector/provider filter + top_k: Maximum number of results. If not provided, uses the backend default. + min_score: Minimum similarity score threshold (0-1) + project_id: Optional project scope for multi-tenant filtering + + Returns: + List of action names sorted by relevance + + Example: + action_names = client.search_action_names( + "create employee", + connector="bamboohr", + min_score=0.5 + ) + """ + response = self.search(query, connector, top_k, project_id) + return [r.action_name for r in response.results if r.similarity_score >= min_score] diff --git a/stackone_ai/toolset.py b/stackone_ai/toolset.py index 126078a..fa8dd5c 100644 --- a/stackone_ai/toolset.py +++ b/stackone_ai/toolset.py @@ -2,8 +2,10 @@ import asyncio import base64 +import concurrent.futures import fnmatch import json +import logging import os import threading from collections.abc import Coroutine @@ -18,6 +20,14 @@ ToolParameters, Tools, ) +from stackone_ai.semantic_search import ( + SemanticSearchClient, + SemanticSearchError, + SemanticSearchResult, +) +from stackone_ai.utils.normalize import _normalize_action_name + +logger = logging.getLogger("stackone.tools") try: _SDK_VERSION = metadata.version("stackone-ai") @@ -34,6 +44,7 @@ } _USER_AGENT = f"stackone-ai-python/{_SDK_VERSION}" + T = TypeVar("T") @@ -251,6 +262,7 @@ def __init__( self.account_id = account_id self.base_url = base_url or DEFAULT_BASE_URL self._account_ids: list[str] = [] + self._semantic_client: SemanticSearchClient | None = None def set_accounts(self, account_ids: list[str]) -> StackOneToolSet: """Set account IDs for filtering tools @@ -264,6 +276,252 @@ def set_accounts(self, account_ids: list[str]) -> StackOneToolSet: self._account_ids = account_ids return self + @property + def semantic_client(self) -> SemanticSearchClient: + """Lazy initialization of semantic search client. + + Returns: + SemanticSearchClient instance configured with the toolset's API key and base URL + """ + if self._semantic_client is None: + self._semantic_client = SemanticSearchClient( + api_key=self.api_key, + base_url=self.base_url, + ) + return self._semantic_client + + def search_tools( + self, + query: str, + *, + connector: str | None = None, + top_k: int | None = None, + min_score: float = 0.0, + account_ids: list[str] | None = None, + fallback_to_local: bool = True, + ) -> Tools: + """Search for and fetch tools using semantic search. + + This method uses the StackOne semantic search API to find relevant tools + based on natural language queries. It optimizes results by filtering to + only connectors available in linked accounts. + + Args: + query: Natural language description of needed functionality + (e.g., "create employee", "send a message") + connector: Optional provider/connector filter (e.g., "bamboohr", "slack") + top_k: Maximum number of tools to return. If None, uses the backend default. + min_score: Minimum similarity score threshold 0-1 (default: 0.0) + account_ids: Optional account IDs (uses set_accounts() if not provided) + fallback_to_local: If True, fall back to local BM25+TF-IDF search on API failure + + Returns: + Tools collection with semantically matched tools from linked accounts + + Raises: + SemanticSearchError: If the API call fails and fallback_to_local is False + + Examples: + # Basic semantic search + tools = toolset.search_tools("manage employee records", top_k=5) + + # Filter by connector + tools = toolset.search_tools( + "create time off request", + connector="bamboohr", + min_score=0.5 + ) + + # With account filtering + tools = toolset.search_tools( + "send message", + account_ids=["acc-123"], + top_k=3 + ) + """ + all_tools = self.fetch_tools(account_ids=account_ids) + available_connectors = all_tools.get_connectors() + + if not available_connectors: + return Tools([]) + + try: + # Step 2: Determine which connectors to search + if connector: + connectors_to_search = {connector.lower()} & available_connectors + if not connectors_to_search: + return Tools([]) + else: + connectors_to_search = available_connectors + + # Step 3: Search each connector in parallel + def _search_one(c: str) -> list[SemanticSearchResult]: + resp = self.semantic_client.search(query=query, connector=c, top_k=top_k) + return [r for r in resp.results if r.similarity_score >= min_score] + + all_results: list[SemanticSearchResult] = [] + last_error: SemanticSearchError | None = None + max_workers = min(len(connectors_to_search), 10) + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as pool: + futures = {pool.submit(_search_one, c): c for c in connectors_to_search} + for future in concurrent.futures.as_completed(futures): + try: + all_results.extend(future.result()) + except SemanticSearchError as e: + last_error = e + + # If ALL connector searches failed, re-raise to trigger fallback + if not all_results and last_error is not None: + raise last_error + + # Step 4: Sort by score, apply top_k + all_results.sort(key=lambda r: r.similarity_score, reverse=True) + if top_k is not None: + all_results = all_results[:top_k] + + if not all_results: + return Tools([]) + + # Step 5: Match back to fetched tool definitions + action_names = {_normalize_action_name(r.action_name) for r in all_results} + matched_tools = [t for t in all_tools if t.name in action_names] + + # Sort matched tools by semantic search score order + action_order = {_normalize_action_name(r.action_name): i for i, r in enumerate(all_results)} + matched_tools.sort(key=lambda t: action_order.get(t.name, float("inf"))) + + return Tools(matched_tools) + + except SemanticSearchError as e: + if not fallback_to_local: + raise + + logger.warning("Semantic search failed (%s), falling back to local BM25+TF-IDF search", e) + utility = all_tools.utility_tools() + search_tool = utility.get_tool("tool_search") + + if search_tool: + result = search_tool.execute( + { + "query": query, + "limit": top_k, + "minScore": min_score, + } + ) + matched_names = [t["name"] for t in result.get("tools", [])] + # Filter by available connectors and preserve relevance order + tool_map = {t.name: t for t in all_tools} + filter_connectors = {connector.lower()} if connector else available_connectors + matched_tools = [ + tool_map[name] + for name in matched_names + if name in tool_map and name.split("_")[0].lower() in filter_connectors + ] + return Tools(matched_tools[:top_k] if top_k is not None else matched_tools) + + return all_tools + + def search_action_names( + self, + query: str, + *, + connector: str | None = None, + account_ids: list[str] | None = None, + top_k: int | None = None, + min_score: float = 0.0, + ) -> list[SemanticSearchResult]: + """Search for action names without fetching tools. + + Useful when you need to inspect search results before fetching, + or when building custom filtering logic. + + Args: + query: Natural language description of needed functionality + connector: Optional provider/connector filter (single connector) + account_ids: Optional account IDs to scope results to connectors + available in those accounts (uses set_accounts() if not provided). + When provided, results are filtered to only matching connectors. + top_k: Maximum number of results. If None, uses the backend default. + min_score: Minimum similarity score threshold 0-1 (default: 0.0) + + Returns: + List of SemanticSearchResult with action names, scores, and metadata. + Versioned API names are normalized to MCP format but results are NOT + deduplicated — multiple API versions of the same action may appear + with their individual scores. + + Examples: + # Lightweight: inspect results before fetching + results = toolset.search_action_names("manage employees") + for r in results: + print(f"{r.action_name}: {r.similarity_score:.2f}") + + # Account-scoped: only results for connectors in linked accounts + results = toolset.search_action_names( + "create employee", + account_ids=["acc-123"], + top_k=5 + ) + + # Then fetch specific high-scoring actions + selected = [r.action_name for r in results if r.similarity_score > 0.7] + tools = toolset.fetch_tools(actions=selected) + """ + # Resolve available connectors from account_ids (same pattern as search_tools) + available_connectors: set[str] | None = None + effective_account_ids = account_ids or self._account_ids + if effective_account_ids: + all_tools = self.fetch_tools(account_ids=effective_account_ids) + available_connectors = all_tools.get_connectors() + if not available_connectors: + return [] + + try: + if available_connectors: + # Parallel per-connector search (only user's connectors) + if connector: + connectors_to_search = {connector.lower()} & available_connectors + else: + connectors_to_search = available_connectors + + def _search_one(c: str) -> list[SemanticSearchResult]: + try: + resp = self.semantic_client.search(query=query, connector=c, top_k=top_k) + return [r for r in resp.results if r.similarity_score >= min_score] + except SemanticSearchError: + return [] + + all_results: list[SemanticSearchResult] = [] + if connectors_to_search: + max_workers = min(len(connectors_to_search), 10) + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as pool: + futures = [pool.submit(_search_one, c) for c in connectors_to_search] + for future in concurrent.futures.as_completed(futures): + all_results.extend(future.result()) + else: + # No account filtering — single global search + response = self.semantic_client.search(query=query, connector=connector, top_k=top_k) + all_results = [r for r in response.results if r.similarity_score >= min_score] + + except SemanticSearchError as e: + logger.warning("Semantic search failed: %s", e) + return [] + + # Sort by score, normalize action names + all_results.sort(key=lambda r: r.similarity_score, reverse=True) + normalized: list[SemanticSearchResult] = [] + for r in all_results: + normalized.append( + SemanticSearchResult( + action_name=_normalize_action_name(r.action_name), + connector_key=r.connector_key, + similarity_score=r.similarity_score, + label=r.label, + description=r.description, + ) + ) + return normalized[:top_k] if top_k is not None else normalized + def _filter_by_provider(self, tool_name: str, providers: list[str]) -> bool: """Check if a tool name matches any of the provider filters diff --git a/stackone_ai/utility_tools.py b/stackone_ai/utility_tools.py index 0d9a209..1d7f2a7 100644 --- a/stackone_ai/utility_tools.py +++ b/stackone_ai/utility_tools.py @@ -2,20 +2,19 @@ from __future__ import annotations +import concurrent.futures import json -from typing import TYPE_CHECKING import bm25s import numpy as np from pydantic import BaseModel from stackone_ai.constants import DEFAULT_HYBRID_ALPHA -from stackone_ai.models import ExecuteConfig, JsonDict, StackOneTool, ToolParameters +from stackone_ai.models import ExecuteConfig, JsonDict, StackOneTool, ToolParameters, Tools +from stackone_ai.semantic_search import SemanticSearchClient, SemanticSearchError, SemanticSearchResult +from stackone_ai.utils.normalize import _normalize_action_name from stackone_ai.utils.tfidf_index import TfidfDocument, TfidfIndex -if TYPE_CHECKING: - from stackone_ai.models import Tools - class ToolSearchResult(BaseModel): """Result from tool_search""" @@ -201,11 +200,13 @@ def create_tool_search(index: ToolIndex) -> StackOneTool: "type": "number", "description": "Maximum number of tools to return (default: 5)", "default": 5, + "nullable": True, }, "minScore": { "type": "number", "description": "Minimum relevance score (0-1) to filter results (default: 0.0)", "default": 0.0, + "nullable": True, }, }, ) @@ -219,8 +220,8 @@ def execute_filter(arguments: str | JsonDict | None = None) -> JsonDict: kwargs = arguments or {} query = kwargs.get("query", "") - limit = int(kwargs.get("limit", 5)) - min_score = float(kwargs.get("minScore", 0.0)) + limit = int(kwargs["limit"]) if kwargs.get("limit") is not None else 5 + min_score = float(kwargs["minScore"]) if kwargs.get("minScore") is not None else 0.0 # Search for tools results = index.search(query, limit, min_score) @@ -266,6 +267,154 @@ def execute( return ToolSearchTool() +def create_semantic_tool_search( + semantic_client: SemanticSearchClient, + available_connectors: set[str] | None = None, +) -> StackOneTool: + """Create a semantic search variant of tool_search. + + Uses cloud semantic search API instead of local BM25+TF-IDF for + improved natural language tool discovery. + + Args: + semantic_client: Initialized SemanticSearchClient instance + available_connectors: Optional set of connector names to scope searches to. + When provided, searches each connector in parallel and only returns + results for those connectors. When None, queries the full catalog. + + Returns: + Utility tool for searching relevant tools using semantic search + """ + if not isinstance(semantic_client, SemanticSearchClient): + raise TypeError("semantic_client must be a SemanticSearchClient instance") + + name = "tool_search" + description = ( + "Searches for relevant tools based on a natural language query using " + "semantic vector search. Call this first to discover " + "available tools before executing them." + ) + + parameters = ToolParameters( + type="object", + properties={ + "query": { + "type": "string", + "description": ( + "Natural language query describing what tools you need " + '(e.g., "onboard a new team member", "request vacation days")' + ), + }, + "limit": { + "type": "number", + "description": "Maximum number of tools to return (default: 5)", + "default": 5, + "nullable": True, + }, + "minScore": { + "type": "number", + "description": "Minimum similarity score (0-1) to filter results (default: 0.0)", + "default": 0.0, + "nullable": True, + }, + "connector": { + "type": "string", + "description": "Optional: filter by connector/provider (e.g., 'bamboohr', 'slack')", + "nullable": True, + }, + }, + ) + + def execute_search(arguments: str | JsonDict | None = None) -> JsonDict: + """Execute the semantic search tool""" + if isinstance(arguments, str): + kwargs = json.loads(arguments) + else: + kwargs = arguments or {} + + query = kwargs.get("query", "") + limit = int(kwargs["limit"]) if kwargs.get("limit") is not None else 5 + min_score = float(kwargs["minScore"]) if kwargs.get("minScore") is not None else 0.0 + connector = kwargs.get("connector") + + all_results: list[SemanticSearchResult] = [] + + if available_connectors is not None and available_connectors: + # Scoped search: query each connector in parallel + if connector: + connectors_to_search = {connector.lower()} & available_connectors + else: + connectors_to_search = available_connectors + + if connectors_to_search: + max_workers = min(len(connectors_to_search), 10) + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as pool: + futures = { + pool.submit(semantic_client.search, query=query, connector=c, top_k=limit): c + for c in connectors_to_search + } + for future in concurrent.futures.as_completed(futures): + try: + resp = future.result() + all_results.extend(resp.results) + except SemanticSearchError: + pass # Partial failures: skip failed connectors + else: + # No connector scoping: query full catalog (backwards compat) + response = semantic_client.search( + query=query, + connector=connector, + top_k=limit, + ) + all_results = list(response.results) + + # Sort by score, deduplicate, filter by min_score, apply limit + all_results.sort(key=lambda r: r.similarity_score, reverse=True) + seen: set[str] = set() + tools_data: list[dict[str, object]] = [] + for r in all_results: + if r.similarity_score >= min_score: + norm_name = _normalize_action_name(r.action_name) + if norm_name not in seen: + seen.add(norm_name) + tools_data.append( + { + "name": norm_name, + "description": r.description, + "score": r.similarity_score, + "connector": r.connector_key, + } + ) + + return {"tools": tools_data[:limit]} + + execute_config = ExecuteConfig( + name=name, + method="POST", + url="", # Utility tools don't make HTTP requests + headers={}, + ) + + class SemanticToolSearchTool(StackOneTool): + """Utility tool for searching relevant tools using semantic search""" + + def __init__(self) -> None: + super().__init__( + description=description, + parameters=parameters, + _execute_config=execute_config, + _api_key="", # Utility tools don't need API key + _account_id=None, + ) + + def execute( + self, arguments: str | JsonDict | None = None, *, options: JsonDict | None = None + ) -> JsonDict: + return execute_search(arguments) + + return SemanticToolSearchTool() + + def create_tool_execute(tools_collection: Tools) -> StackOneTool: """Create the tool_execute tool diff --git a/stackone_ai/utils/normalize.py b/stackone_ai/utils/normalize.py new file mode 100644 index 0000000..e6ff0d8 --- /dev/null +++ b/stackone_ai/utils/normalize.py @@ -0,0 +1,17 @@ +"""Action name normalization utilities.""" + +from __future__ import annotations + +import re + +_VERSIONED_ACTION_RE = re.compile(r"^[a-z][a-z0-9]*_\d+(?:\.\d+)+_(.+)_global$") + + +def _normalize_action_name(action_name: str) -> str: + """Convert semantic search API action name to MCP tool name. + + API: 'calendly_1.0.0_calendly_create_scheduling_link_global' + MCP: 'calendly_create_scheduling_link' + """ + match = _VERSIONED_ACTION_RE.match(action_name) + return match.group(1) if match else action_name diff --git a/tests/test_semantic_search.py b/tests/test_semantic_search.py new file mode 100644 index 0000000..c3aebf2 --- /dev/null +++ b/tests/test_semantic_search.py @@ -0,0 +1,1211 @@ +"""Tests for semantic search client and integration.""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import httpx +import pytest + +from stackone_ai.semantic_search import ( + SemanticSearchClient, + SemanticSearchError, + SemanticSearchResponse, + SemanticSearchResult, +) + + +class TestSemanticSearchResult: + """Tests for SemanticSearchResult model.""" + + def test_create_result(self) -> None: + """Test creating a search result.""" + result = SemanticSearchResult( + action_name="bamboohr_create_employee", + connector_key="bamboohr", + similarity_score=0.92, + label="Create Employee", + description="Creates a new employee in BambooHR", + ) + + assert result.action_name == "bamboohr_create_employee" + assert result.connector_key == "bamboohr" + assert result.similarity_score == 0.92 + assert result.label == "Create Employee" + assert result.description == "Creates a new employee in BambooHR" + + +class TestSemanticSearchResponse: + """Tests for SemanticSearchResponse model.""" + + def test_create_response(self) -> None: + """Test creating a search response.""" + results = [ + SemanticSearchResult( + action_name="bamboohr_create_employee", + connector_key="bamboohr", + similarity_score=0.92, + label="Create Employee", + description="Creates a new employee", + ), + SemanticSearchResult( + action_name="hibob_create_employee", + connector_key="hibob", + similarity_score=0.85, + label="Create Employee", + description="Creates a new employee", + ), + ] + response = SemanticSearchResponse( + results=results, + total_count=2, + query="create employee", + ) + + assert len(response.results) == 2 + assert response.total_count == 2 + assert response.query == "create employee" + + +class TestSemanticSearchClient: + """Tests for SemanticSearchClient.""" + + def test_init(self) -> None: + """Test client initialization.""" + client = SemanticSearchClient(api_key="test-key") + + assert client.api_key == "test-key" + assert client.base_url == "https://api.stackone.com" + assert client.timeout == 30.0 + + def test_init_custom_base_url(self) -> None: + """Test client initialization with custom base URL.""" + client = SemanticSearchClient( + api_key="test-key", + base_url="https://custom.api.com/", + ) + + assert client.base_url == "https://custom.api.com" # Trailing slash stripped + + def test_build_auth_header(self) -> None: + """Test building the authorization header.""" + client = SemanticSearchClient(api_key="test-key") + header = client._build_auth_header() + + # test-key: encoded in base64 = dGVzdC1rZXk6 + assert header == "Basic dGVzdC1rZXk6" + + @patch("httpx.post") + def test_search_success(self, mock_post: MagicMock) -> None: + """Test successful search request.""" + mock_response = MagicMock() + mock_response.json.return_value = { + "results": [ + { + "action_name": "bamboohr_create_employee", + "connector_key": "bamboohr", + "similarity_score": 0.92, + "label": "Create Employee", + "description": "Creates a new employee", + } + ], + "total_count": 1, + "query": "create employee", + } + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + client = SemanticSearchClient(api_key="test-key") + response = client.search("create employee", top_k=5) + + assert len(response.results) == 1 + assert response.results[0].action_name == "bamboohr_create_employee" + assert response.total_count == 1 + assert response.query == "create employee" + + # Verify request was made correctly + mock_post.assert_called_once() + call_kwargs = mock_post.call_args + assert call_kwargs.kwargs["json"] == {"query": "create employee", "top_k": 5} + assert "Authorization" in call_kwargs.kwargs["headers"] + + @patch("httpx.post") + def test_search_with_connector(self, mock_post: MagicMock) -> None: + """Test search with connector filter.""" + mock_response = MagicMock() + mock_response.json.return_value = { + "results": [], + "total_count": 0, + "query": "create employee", + } + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + client = SemanticSearchClient(api_key="test-key") + client.search("create employee", connector="bamboohr", top_k=10) + + call_kwargs = mock_post.call_args + assert call_kwargs.kwargs["json"] == { + "query": "create employee", + "connector": "bamboohr", + "top_k": 10, + } + + @patch("httpx.post") + def test_search_http_error(self, mock_post: MagicMock) -> None: + """Test search with HTTP error.""" + mock_response = MagicMock() + mock_response.status_code = 401 + mock_response.text = "Unauthorized" + mock_post.return_value = mock_response + mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( + "Unauthorized", + request=MagicMock(), + response=mock_response, + ) + + client = SemanticSearchClient(api_key="invalid-key") + + with pytest.raises(SemanticSearchError) as exc_info: + client.search("create employee") + + assert "API error: 401" in str(exc_info.value) + + @patch("httpx.post") + def test_search_request_error(self, mock_post: MagicMock) -> None: + """Test search with request error.""" + mock_post.side_effect = httpx.RequestError("Connection failed") + + client = SemanticSearchClient(api_key="test-key") + + with pytest.raises(SemanticSearchError) as exc_info: + client.search("create employee") + + assert "Request failed" in str(exc_info.value) + + @patch("httpx.post") + def test_search_action_names(self, mock_post: MagicMock) -> None: + """Test search_action_names convenience method.""" + mock_response = MagicMock() + mock_response.json.return_value = { + "results": [ + { + "action_name": "bamboohr_create_employee", + "connector_key": "bamboohr", + "similarity_score": 0.92, + "label": "Create Employee", + "description": "Creates a new employee", + }, + { + "action_name": "hibob_create_employee", + "connector_key": "hibob", + "similarity_score": 0.45, + "label": "Create Employee", + "description": "Creates a new employee", + }, + ], + "total_count": 2, + "query": "create employee", + } + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + client = SemanticSearchClient(api_key="test-key") + + # Without min_score filter + names = client.search_action_names("create employee") + assert len(names) == 2 + assert "bamboohr_create_employee" in names + assert "hibob_create_employee" in names + + # With min_score filter + names = client.search_action_names("create employee", min_score=0.5) + assert len(names) == 1 + assert "bamboohr_create_employee" in names + + +class TestSemanticSearchIntegration: + """Integration tests for semantic search with toolset.""" + + def test_toolset_semantic_client_lazy_init(self) -> None: + """Test that semantic_client is lazily initialized.""" + from stackone_ai import StackOneToolSet + + toolset = StackOneToolSet(api_key="test-key") + + # Access semantic_client + client = toolset.semantic_client + assert isinstance(client, SemanticSearchClient) + assert client.api_key == "test-key" + + # Same instance on second access + assert toolset.semantic_client is client + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_toolset_search_tools( + self, + mock_fetch: MagicMock, + mock_search: MagicMock, + ) -> None: + """Test toolset.search_tools() method with connector filtering.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + # Mock semantic search to return versioned API names (including some for unavailable connectors) + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="bamboohr_1.0.0_bamboohr_create_employee_global", + connector_key="bamboohr", + similarity_score=0.95, + label="Create Employee", + description="Creates a new employee", + ), + SemanticSearchResult( + action_name="workday_1.0.0_workday_create_worker_global", + connector_key="workday", # User doesn't have this connector + similarity_score=0.90, + label="Create Worker", + description="Creates a new worker", + ), + SemanticSearchResult( + action_name="hibob_1.0.0_hibob_create_employee_global", + connector_key="hibob", + similarity_score=0.85, + label="Create Employee", + description="Creates a new employee", + ), + ], + total_count=3, + query="create employee", + ) + + # Mock MCP fetch to return only bamboohr and hibob tools (user's linked accounts) + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates a new employee", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="hibob_create_employee", + description="Creates a new employee", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="bamboohr_list_employees", + description="Lists employees", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + tools = toolset.search_tools("create employee", top_k=5) + + # Should only return tools for available connectors (bamboohr, hibob) + # workday_create_worker should be filtered out + assert len(tools) == 2 + tool_names = [t.name for t in tools] + assert "bamboohr_create_employee" in tool_names + assert "hibob_create_employee" in tool_names + assert "workday_create_worker" not in tool_names # Filtered out - connector not available + + # Results should be sorted by semantic score + assert tools[0].name == "bamboohr_create_employee" # score 0.95 + assert tools[1].name == "hibob_create_employee" # score 0.85 + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_toolset_search_tools_fallback( + self, + mock_fetch: MagicMock, + mock_search: MagicMock, + ) -> None: + """Test search_tools() fallback when semantic search fails.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + # Semantic search raises an error to trigger fallback + mock_search.side_effect = SemanticSearchError("API unavailable") + + # Mock MCP fetch to return tools from multiple connectors + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates a new employee in BambooHR", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="bamboohr_list_employees", + description="Lists all employees in BambooHR", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="workday_create_worker", + description="Creates a new worker in Workday", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + tools = toolset.search_tools("create employee", top_k=5, fallback_to_local=True) + + # Should return results from the local BM25+TF-IDF fallback + assert len(tools) > 0 + tool_names = [t.name for t in tools] + # Should only include tools for available connectors (bamboohr, workday) + for name in tool_names: + connector = name.split("_")[0] + assert connector in {"bamboohr", "workday"} + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_toolset_search_tools_fallback_respects_connector( + self, + mock_fetch: MagicMock, + mock_search: MagicMock, + ) -> None: + """Test BM25 fallback filters to the requested connector.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + mock_search.side_effect = SemanticSearchError("API unavailable") + + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates a new employee in BambooHR", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="bamboohr_list_employees", + description="Lists all employees in BambooHR", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="workday_create_worker", + description="Creates a new worker in Workday", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + tools = toolset.search_tools("create employee", connector="bamboohr", fallback_to_local=True) + + assert len(tools) > 0 + tool_names = [t.name for t in tools] + for name in tool_names: + assert name.split("_")[0] == "bamboohr" + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_toolset_search_tools_fallback_disabled( + self, + mock_fetch: MagicMock, + mock_search: MagicMock, + ) -> None: + """Test search_tools() raises when fallback is disabled.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + mock_search.side_effect = SemanticSearchError("API unavailable") + # Must provide tools so the flow reaches the semantic search call + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates a new employee", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + with pytest.raises(SemanticSearchError): + toolset.search_tools("create employee", fallback_to_local=False) + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_toolset_search_action_names( + self, + mock_fetch: MagicMock, + mock_search: MagicMock, + ) -> None: + """Test toolset.search_action_names() method.""" + from stackone_ai import StackOneToolSet + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="bamboohr_1.0.0_bamboohr_create_employee_global", + connector_key="bamboohr", + similarity_score=0.92, + label="Create Employee", + description="Creates a new employee", + ), + SemanticSearchResult( + action_name="hibob_1.0.0_hibob_create_employee_global", + connector_key="hibob", + similarity_score=0.45, + label="Create Employee", + description="Creates a new employee", + ), + ], + total_count=2, + query="create employee", + ) + + toolset = StackOneToolSet(api_key="test-key") + results = toolset.search_action_names("create employee", min_score=0.5) + + # Should filter by min_score and normalize action names + assert len(results) == 1 + assert results[0].action_name == "bamboohr_create_employee" + + def test_utility_tools_semantic_search(self) -> None: + """Test utility_tools with semantic search.""" + from stackone_ai.models import StackOneTool, Tools + + # Create a mock tools collection + tool = MagicMock(spec=StackOneTool) + tool.name = "test_tool" + tool.description = "Test tool" + tool.connector = "test" + tools = Tools([tool]) + + # Without semantic search - should use local search + # Patch ToolIndex in utility_tools module where it's imported + with ( + patch("stackone_ai.utility_tools.ToolIndex"), + patch("stackone_ai.utility_tools.create_tool_search") as mock_create_search, + patch("stackone_ai.utility_tools.create_tool_execute") as mock_create_execute, + ): + mock_search_tool = MagicMock(spec=StackOneTool) + mock_search_tool.name = "tool_search" + mock_execute_tool = MagicMock(spec=StackOneTool) + mock_execute_tool.name = "tool_execute" + mock_create_search.return_value = mock_search_tool + mock_create_execute.return_value = mock_execute_tool + utility = tools.utility_tools() + assert len(utility) == 2 # tool_search + tool_execute + + # With semantic search - presence of semantic_client enables it + mock_client = MagicMock(spec=SemanticSearchClient) + with ( + patch("stackone_ai.utility_tools.create_semantic_tool_search") as mock_create, + patch("stackone_ai.utility_tools.create_tool_execute") as mock_create_execute, + ): + mock_search_tool = MagicMock(spec=StackOneTool) + mock_search_tool.name = "tool_search" + mock_execute_tool = MagicMock(spec=StackOneTool) + mock_execute_tool.name = "tool_execute" + mock_create.return_value = mock_search_tool + mock_create_execute.return_value = mock_execute_tool + utility = tools.utility_tools(semantic_client=mock_client) + assert len(utility) == 2 + # Should pass available connectors from the tools collection + mock_create.assert_called_once_with(mock_client, available_connectors={"test"}) + + +class TestSemanticToolSearch: + """Tests for create_semantic_tool_search utility.""" + + def test_create_semantic_tool_search_type_error(self) -> None: + """Test that invalid client raises TypeError.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + with pytest.raises(TypeError) as exc_info: + create_semantic_tool_search("not a client") # type: ignore + + assert "SemanticSearchClient instance" in str(exc_info.value) + + @patch.object(SemanticSearchClient, "search") + def test_semantic_tool_search_execute(self, mock_search: MagicMock) -> None: + """Test executing semantic tool search.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="bamboohr_1.0.0_bamboohr_create_employee_global", + connector_key="bamboohr", + similarity_score=0.92, + label="Create Employee", + description="Creates a new employee", + ), + ], + total_count=1, + query="create employee", + ) + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client) + + result = tool.execute({"query": "create employee", "limit": 5}) + + assert "tools" in result + assert len(result["tools"]) == 1 + # Name should be normalized from versioned API format to MCP format + assert result["tools"][0]["name"] == "bamboohr_create_employee" + assert result["tools"][0]["score"] == 0.92 + assert result["tools"][0]["connector"] == "bamboohr" + + @patch.object(SemanticSearchClient, "search") + def test_semantic_tool_search_with_min_score(self, mock_search: MagicMock) -> None: + """Test semantic tool search with min_score filter.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="high_score_action", + connector_key="test", + similarity_score=0.9, + label="High Score", + description="High scoring action", + ), + SemanticSearchResult( + action_name="low_score_action", + connector_key="test", + similarity_score=0.3, + label="Low Score", + description="Low scoring action", + ), + ], + total_count=2, + query="test", + ) + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client) + + result = tool.execute({"query": "test", "limit": 10, "minScore": 0.5}) + + assert len(result["tools"]) == 1 + assert result["tools"][0]["name"] == "high_score_action" + + @patch.object(SemanticSearchClient, "search") + def test_semantic_tool_search_with_connector(self, mock_search: MagicMock) -> None: + """Test semantic tool search with connector filter.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + mock_search.return_value = SemanticSearchResponse( + results=[], + total_count=0, + query="create employee", + ) + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client) + + tool.execute({"query": "create employee", "connector": "bamboohr"}) + + mock_search.assert_called_once_with( + query="create employee", + connector="bamboohr", + top_k=5, # default limit + ) + + def test_semantic_tool_search_has_correct_parameters(self) -> None: + """Test that semantic tool has the expected parameter schema.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client) + + assert tool.name == "tool_search" + assert "semantic" in tool.description.lower() + + props = tool.parameters.properties + assert "query" in props + assert "limit" in props + assert "minScore" in props + assert "connector" in props + + +class TestSemanticToolSearchScoping: + """Tests for connector scoping in create_semantic_tool_search.""" + + @patch.object(SemanticSearchClient, "search") + def test_scoped_searches_each_connector_in_parallel(self, mock_search: MagicMock) -> None: + """Test that available_connectors triggers per-connector parallel searches.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + def _search_side_effect( + query: str, connector: str | None = None, top_k: int | None = None + ) -> SemanticSearchResponse: + if connector == "bamboohr": + return SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="bamboohr_create_employee", + connector_key="bamboohr", + similarity_score=0.95, + label="Create Employee", + description="Creates employee", + ), + ], + total_count=1, + query=query, + ) + elif connector == "hibob": + return SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="hibob_create_employee", + connector_key="hibob", + similarity_score=0.85, + label="Create Employee", + description="Creates employee", + ), + ], + total_count=1, + query=query, + ) + return SemanticSearchResponse(results=[], total_count=0, query=query) + + mock_search.side_effect = _search_side_effect + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client, available_connectors={"bamboohr", "hibob"}) + + result = tool.execute({"query": "create employee", "limit": 10}) + + # Should have searched each connector separately + assert mock_search.call_count == 2 + called_connectors = {call.kwargs.get("connector") for call in mock_search.call_args_list} + assert called_connectors == {"bamboohr", "hibob"} + + # Should return results from both connectors + assert len(result["tools"]) == 2 + names = [t["name"] for t in result["tools"]] + assert "bamboohr_create_employee" in names + assert "hibob_create_employee" in names + + @patch.object(SemanticSearchClient, "search") + def test_scoped_agent_connector_intersects_with_available(self, mock_search: MagicMock) -> None: + """Test that agent's connector param is intersected with available_connectors.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="bamboohr_create_employee", + connector_key="bamboohr", + similarity_score=0.95, + label="Create Employee", + description="Creates employee", + ), + ], + total_count=1, + query="create employee", + ) + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client, available_connectors={"bamboohr", "hibob"}) + + # Agent requests connector="bamboohr" — should only search bamboohr + tool.execute({"query": "create employee", "connector": "bamboohr"}) + + assert mock_search.call_count == 1 + assert mock_search.call_args.kwargs["connector"] == "bamboohr" + + @patch.object(SemanticSearchClient, "search") + def test_scoped_agent_connector_not_available_returns_empty(self, mock_search: MagicMock) -> None: + """Test that requesting an unavailable connector returns empty results.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client, available_connectors={"bamboohr", "hibob"}) + + # Agent requests connector="workday" — not in available_connectors + result = tool.execute({"query": "create employee", "connector": "workday"}) + + # Should not call API at all + mock_search.assert_not_called() + assert result["tools"] == [] + + @patch.object(SemanticSearchClient, "search") + def test_no_connectors_queries_full_catalog(self, mock_search: MagicMock) -> None: + """Test that available_connectors=None preserves full catalog behavior (backwards compat).""" + from stackone_ai.utility_tools import create_semantic_tool_search + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="workday_create_worker", + connector_key="workday", + similarity_score=0.90, + label="Create Worker", + description="Creates worker", + ), + ], + total_count=1, + query="create employee", + ) + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client) # No available_connectors + + result = tool.execute({"query": "create employee", "limit": 5}) + + # Should make a single call without connector scoping + mock_search.assert_called_once_with( + query="create employee", + connector=None, + top_k=5, + ) + assert len(result["tools"]) == 1 + assert result["tools"][0]["name"] == "workday_create_worker" + + +class TestConnectorProperty: + """Tests for StackOneTool.connector property.""" + + def test_connector_extracts_from_name(self) -> None: + """Test that connector is extracted from tool name.""" + from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters + + execute_config = ExecuteConfig( + name="bamboohr_create_employee", + method="POST", + url="https://api.example.com", + headers={}, + ) + tool = StackOneTool( + description="Creates employee", + parameters=ToolParameters(type="object", properties={}), + _execute_config=execute_config, + _api_key="test-key", + ) + + assert tool.connector == "bamboohr" + + def test_connector_is_lowercase(self) -> None: + """Test that connector is always lowercase.""" + from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters + + execute_config = ExecuteConfig( + name="BambooHR_Create_Employee", + method="POST", + url="https://api.example.com", + headers={}, + ) + tool = StackOneTool( + description="Creates employee", + parameters=ToolParameters(type="object", properties={}), + _execute_config=execute_config, + _api_key="test-key", + ) + + assert tool.connector == "bamboohr" + + def test_connector_with_single_word_name(self) -> None: + """Test connector extraction with single-word tool name.""" + from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters + + execute_config = ExecuteConfig( + name="utility", + method="POST", + url="https://api.example.com", + headers={}, + ) + tool = StackOneTool( + description="Utility tool", + parameters=ToolParameters(type="object", properties={}), + _execute_config=execute_config, + _api_key="test-key", + ) + + assert tool.connector == "utility" + + +class TestToolsConnectorHelpers: + """Tests for Tools.get_connectors().""" + + def test_get_connectors(self) -> None: + """Test getting unique connectors from tools collection.""" + from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters, Tools + + def make_tool(name: str) -> StackOneTool: + return StackOneTool( + description=f"Tool {name}", + parameters=ToolParameters(type="object", properties={}), + _execute_config=ExecuteConfig(name=name, method="POST", url="", headers={}), + _api_key="test-key", + ) + + tools = Tools( + [ + make_tool("bamboohr_create_employee"), + make_tool("bamboohr_list_employees"), + make_tool("hibob_create_employee"), + make_tool("slack_send_message"), + ] + ) + + connectors = tools.get_connectors() + + assert connectors == {"bamboohr", "hibob", "slack"} + + def test_get_connectors_empty(self) -> None: + """Test get_connectors with empty tools collection.""" + from stackone_ai.models import Tools + + tools = Tools([]) + assert tools.get_connectors() == set() + + +class TestSearchActionNamesWithAccountIds: + """Tests for search_action_names with account_ids parameter.""" + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_filters_by_account_connectors(self, mock_fetch: MagicMock, mock_search: MagicMock) -> None: + """Test that only connectors from linked accounts are searched (per-connector parallel).""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + def _search_side_effect( + query: str, connector: str | None = None, top_k: int | None = None + ) -> SemanticSearchResponse: + if connector == "bamboohr": + return SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="bamboohr_1.0.0_bamboohr_create_employee_global", + connector_key="bamboohr", + similarity_score=0.95, + label="Create Employee", + description="Creates employee", + ), + ], + total_count=1, + query=query, + ) + elif connector == "hibob": + return SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="hibob_1.0.0_hibob_create_employee_global", + connector_key="hibob", + similarity_score=0.85, + label="Create Employee", + description="Creates employee", + ), + ], + total_count=1, + query=query, + ) + return SemanticSearchResponse(results=[], total_count=0, query=query) + + mock_search.side_effect = _search_side_effect + + # Mock MCP to return only bamboohr and hibob tools (user's linked accounts) + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates employee", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="hibob_create_employee", + description="Creates employee", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + results = toolset.search_action_names( + "create employee", + account_ids=["acc-123"], + top_k=10, + ) + + # Only bamboohr and hibob searched (workday never queried) + assert len(results) == 2 + action_names = [r.action_name for r in results] + assert "bamboohr_create_employee" in action_names + assert "hibob_create_employee" in action_names + # Verify only per-connector calls were made (no global call) + assert mock_search.call_count == 2 + called_connectors = {call.kwargs.get("connector") for call in mock_search.call_args_list} + assert called_connectors == {"bamboohr", "hibob"} + + @patch.object(SemanticSearchClient, "search") + def test_search_action_names_returns_empty_on_failure(self, mock_search: MagicMock) -> None: + """Test that search_action_names returns [] when semantic search fails.""" + from stackone_ai import StackOneToolSet + + mock_search.side_effect = SemanticSearchError("API unavailable") + + toolset = StackOneToolSet(api_key="test-key") + results = toolset.search_action_names("create employee") + + assert results == [] + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_searches_all_connectors_in_parallel(self, mock_fetch: MagicMock, mock_search: MagicMock) -> None: + """Test that all available connectors are searched directly (no global call + fallback).""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + mock_search.return_value = SemanticSearchResponse( + results=[], + total_count=0, + query="test", + ) + + # Mock MCP to return tools from two connectors + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_list_employees", + description="Lists employees", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="hibob_list_employees", + description="Lists employees", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + toolset.search_action_names( + "test", + account_ids=["acc-123"], + top_k=5, + ) + + # Each connector gets its own search call (parallel, not sequential fallback) + assert mock_search.call_count == 2 + called_connectors = {call.kwargs.get("connector") for call in mock_search.call_args_list} + assert called_connectors == {"bamboohr", "hibob"} + # top_k is passed to each per-connector call + for call in mock_search.call_args_list: + assert call.kwargs["top_k"] == 5 + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_respects_top_k_after_filtering(self, mock_fetch: MagicMock, mock_search: MagicMock) -> None: + """Test that results are limited to top_k after filtering.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + # Return more results than top_k using versioned API names + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name=f"bamboohr_1.0.0_bamboohr_action_{i}_global", + connector_key="bamboohr", + similarity_score=0.9 - i * 0.1, + label=f"Action {i}", + description=f"Action {i}", + ) + for i in range(10) + ], + total_count=10, + query="test", + ) + + # Mock MCP to return bamboohr tools + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_action_0", + description="Action 0", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + results = toolset.search_action_names( + "test", + account_ids=["acc-123"], + top_k=3, + ) + + # Should be limited to top_k after normalization + assert len(results) == 3 + # Names should be normalized + assert results[0].action_name == "bamboohr_action_0" + + +class TestNormalizeActionName: + """Tests for _normalize_action_name() function.""" + + def test_versioned_name_is_normalized(self) -> None: + """Test that versioned API names are normalized to MCP format.""" + from stackone_ai.utils.normalize import _normalize_action_name + + assert ( + _normalize_action_name("calendly_1.0.0_calendly_create_scheduling_link_global") + == "calendly_create_scheduling_link" + ) + + def test_multi_segment_version(self) -> None: + """Test normalization with multi-segment semver.""" + from stackone_ai.utils.normalize import _normalize_action_name + + assert ( + _normalize_action_name("breathehr_1.0.1_breathehr_list_employees_global") + == "breathehr_list_employees" + ) + + def test_already_normalized_name_unchanged(self) -> None: + """Test that MCP-format names pass through unchanged.""" + from stackone_ai.utils.normalize import _normalize_action_name + + assert _normalize_action_name("bamboohr_create_employee") == "bamboohr_create_employee" + + def test_non_matching_name_unchanged(self) -> None: + """Test that names that don't match the pattern pass through unchanged.""" + from stackone_ai.utils.normalize import _normalize_action_name + + assert _normalize_action_name("some_random_tool") == "some_random_tool" + + def test_empty_string(self) -> None: + """Test empty string input.""" + from stackone_ai.utils.normalize import _normalize_action_name + + assert _normalize_action_name("") == "" + + def test_multiple_versions_normalize_to_same(self) -> None: + """Test that different versions of the same action normalize identically.""" + from stackone_ai.utils.normalize import _normalize_action_name + + name_v1 = _normalize_action_name("breathehr_1.0.0_breathehr_list_employees_global") + name_v2 = _normalize_action_name("breathehr_1.0.1_breathehr_list_employees_global") + assert name_v1 == name_v2 == "breathehr_list_employees" + + +class TestSemanticSearchDeduplication: + """Tests for deduplication after name normalization.""" + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_search_tools_deduplicates_versions(self, mock_fetch: MagicMock, mock_search: MagicMock) -> None: + """Test that search_tools deduplicates multiple API versions of the same action.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="breathehr_1.0.0_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.95, + label="List Employees", + description="Lists employees", + ), + SemanticSearchResult( + action_name="breathehr_1.0.1_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.90, + label="List Employees v2", + description="Lists employees v2", + ), + SemanticSearchResult( + action_name="bamboohr_1.0.0_bamboohr_create_employee_global", + connector_key="bamboohr", + similarity_score=0.85, + label="Create Employee", + description="Creates employee", + ), + ], + total_count=3, + query="list employees", + ) + + mock_fetch.return_value = [ + _McpToolDefinition( + name="breathehr_list_employees", + description="Lists employees", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates employee", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + tools = toolset.search_tools("list employees", top_k=5) + + # Should deduplicate: both breathehr versions -> breathehr_list_employees + tool_names = [t.name for t in tools] + assert tool_names.count("breathehr_list_employees") == 1 + assert "bamboohr_create_employee" in tool_names + assert len(tools) == 2 + + @patch.object(SemanticSearchClient, "search") + def test_search_action_names_normalizes_versions(self, mock_search: MagicMock) -> None: + """Test that search_action_names normalizes versioned API names.""" + from stackone_ai import StackOneToolSet + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="breathehr_1.0.0_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.95, + label="List Employees", + description="Lists employees", + ), + SemanticSearchResult( + action_name="breathehr_1.0.1_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.90, + label="List Employees v2", + description="Lists employees v2", + ), + ], + total_count=2, + query="list employees", + ) + + toolset = StackOneToolSet(api_key="test-key") + results = toolset.search_action_names("list employees", top_k=5) + + # Both results are returned with normalized names (no dedup in global path) + assert len(results) == 2 + assert results[0].action_name == "breathehr_list_employees" + assert results[1].action_name == "breathehr_list_employees" + # Sorted by score descending + assert results[0].similarity_score == 0.95 + assert results[1].similarity_score == 0.90 + + @patch.object(SemanticSearchClient, "search") + def test_semantic_tool_search_deduplicates_versions(self, mock_search: MagicMock) -> None: + """Test that create_semantic_tool_search deduplicates API versions.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="breathehr_1.0.0_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.95, + label="List Employees", + description="Lists employees", + ), + SemanticSearchResult( + action_name="breathehr_1.0.1_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.90, + label="List Employees v2", + description="Lists employees v2", + ), + ], + total_count=2, + query="list employees", + ) + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client) + result = tool.execute({"query": "list employees", "limit": 10}) + + # Should deduplicate: only one result + assert len(result["tools"]) == 1 + assert result["tools"][0]["name"] == "breathehr_list_employees" + assert result["tools"][0]["score"] == 0.95