From 0e0f7f347e0d2bf6ddd4399a11e81658d0a9e849 Mon Sep 17 00:00:00 2001 From: Stackone Date: Thu, 5 Feb 2026 11:48:44 +0000 Subject: [PATCH 01/25] Senamtic Search on action in Python AI SDK --- stackone_ai/__init__.py | 15 +- stackone_ai/models.py | 66 ++- stackone_ai/semantic_search.py | 145 +++++ stackone_ai/toolset.py | 143 +++++ stackone_ai/utility_tools.py | 111 ++++ tests/benchmark_search.py | 987 +++++++++++++++++++++++++++++++++ tests/test_semantic_search.py | 486 ++++++++++++++++ 7 files changed, 1935 insertions(+), 18 deletions(-) create mode 100644 stackone_ai/semantic_search.py create mode 100644 tests/benchmark_search.py create mode 100644 tests/test_semantic_search.py diff --git a/stackone_ai/__init__.py b/stackone_ai/__init__.py index f7a0aba..434e318 100644 --- a/stackone_ai/__init__.py +++ b/stackone_ai/__init__.py @@ -1,11 +1,22 @@ """StackOne AI SDK""" -from .models import StackOneTool, Tools -from .toolset import StackOneToolSet +from stackone_ai.models import StackOneTool, Tools +from stackone_ai.semantic_search import ( + SemanticSearchClient, + SemanticSearchError, + SemanticSearchResponse, + SemanticSearchResult, +) +from stackone_ai.toolset import StackOneToolSet __all__ = [ "StackOneToolSet", "StackOneTool", "Tools", + # Semantic search + "SemanticSearchClient", + "SemanticSearchResult", + "SemanticSearchResponse", + "SemanticSearchError", ] __version__ = "2.3.1" diff --git a/stackone_ai/models.py b/stackone_ai/models.py index fcd32d7..69b670b 100644 --- a/stackone_ai/models.py +++ b/stackone_ai/models.py @@ -6,7 +6,10 @@ from collections.abc import Sequence from datetime import datetime, timezone from enum import Enum -from typing import Annotated, Any, ClassVar, TypeAlias, cast +from typing import TYPE_CHECKING, Annotated, Any, ClassVar, TypeAlias, cast + +if TYPE_CHECKING: + from stackone_ai.semantic_search import SemanticSearchClient from urllib.parse import quote import httpx @@ -530,34 +533,65 @@ def to_langchain(self) -> Sequence[BaseTool]: """ return [tool.to_langchain() for tool in self.tools] - def utility_tools(self, hybrid_alpha: float | None = None) -> Tools: + def utility_tools( + self, + hybrid_alpha: float | None = None, + use_semantic_search: bool = False, + semantic_client: SemanticSearchClient | None = None, + ) -> Tools: """Return utility tools for tool discovery and execution - Utility tools enable dynamic tool discovery and execution based on natural language queries - using hybrid BM25 + TF-IDF search. + Utility tools enable dynamic tool discovery and execution based on natural language queries. + By default, uses local hybrid BM25 + TF-IDF search. Optionally, can use cloud-based + semantic search for higher accuracy (84% Hit@5 vs 21% for local search). Args: - hybrid_alpha: Weight for BM25 in hybrid search (0-1). If not provided, uses - ToolIndex.DEFAULT_HYBRID_ALPHA (0.2), which gives more weight to BM25 scoring - and has been shown to provide better tool discovery accuracy - (10.8% improvement in validation testing). + hybrid_alpha: Weight for BM25 in hybrid search (0-1). Only used when + use_semantic_search=False. If not provided, uses DEFAULT_HYBRID_ALPHA (0.2), + which gives more weight to BM25 scoring. + use_semantic_search: If True, use cloud-based semantic search instead of local + BM25+TF-IDF search. Requires semantic_client to be provided. + semantic_client: SemanticSearchClient instance. Required when use_semantic_search=True. + Can be obtained from StackOneToolSet.semantic_client. Returns: Tools collection containing tool_search and tool_execute + Raises: + ValueError: If use_semantic_search=True but semantic_client is not provided + Note: This feature is in beta and may change in future versions + + Example: + # Local search (default) + utility = tools.utility_tools() + + # Semantic search (requires toolset) + from stackone_ai import StackOneToolSet + toolset = StackOneToolSet() + tools = toolset.fetch_tools() + utility = tools.utility_tools( + use_semantic_search=True, + semantic_client=toolset.semantic_client, + ) """ - from stackone_ai.utility_tools import ( - ToolIndex, - create_tool_execute, - create_tool_search, - ) + from stackone_ai.utility_tools import create_tool_execute - # Create search index with hybrid search - index = ToolIndex(self.tools, hybrid_alpha=hybrid_alpha) + if use_semantic_search: + if semantic_client is None: + raise ValueError("semantic_client is required when use_semantic_search=True") + + from stackone_ai.utility_tools import create_semantic_tool_search + + search_tool = create_semantic_tool_search(semantic_client) + execute_tool = create_tool_execute(self) + return Tools([search_tool, execute_tool]) - # Create utility tools + # Default: local BM25+TF-IDF search + from stackone_ai.utility_tools import ToolIndex, create_tool_search + + index = ToolIndex(self.tools, hybrid_alpha=hybrid_alpha) filter_tool = create_tool_search(index) execute_tool = create_tool_execute(self) diff --git a/stackone_ai/semantic_search.py b/stackone_ai/semantic_search.py new file mode 100644 index 0000000..c9caf21 --- /dev/null +++ b/stackone_ai/semantic_search.py @@ -0,0 +1,145 @@ +"""Semantic search client for StackOne action search API.""" + +from __future__ import annotations + +import base64 +from typing import Any + +import httpx +from pydantic import BaseModel + + +class SemanticSearchError(Exception): + """Raised when semantic search fails.""" + + pass + + +class SemanticSearchResult(BaseModel): + """Single result from semantic search API.""" + + action_name: str + connector_key: str + similarity_score: float + label: str + description: str + + +class SemanticSearchResponse(BaseModel): + """Response from /actions/search endpoint.""" + + results: list[SemanticSearchResult] + total_count: int + query: str + + +class SemanticSearchClient: + """Client for StackOne semantic search API. + + This client provides access to the semantic search endpoint which uses + enhanced embeddings for 84% Hit@5 accuracy (compared to ~21% for local + BM25+TF-IDF search). + + Example: + client = SemanticSearchClient(api_key="sk-xxx") + response = client.search("create employee", connector="bamboohr", top_k=5) + for result in response.results: + print(f"{result.action_name}: {result.similarity_score:.2f}") + """ + + def __init__( + self, + api_key: str, + base_url: str = "https://api.stackone.com", + timeout: float = 30.0, + ) -> None: + """Initialize the semantic search client. + + Args: + api_key: StackOne API key + base_url: Base URL for API requests + timeout: Request timeout in seconds + """ + self.api_key = api_key + self.base_url = base_url.rstrip("/") + self.timeout = timeout + + def _build_auth_header(self) -> str: + """Build the Basic auth header.""" + token = base64.b64encode(f"{self.api_key}:".encode()).decode() + return f"Basic {token}" + + def search( + self, + query: str, + connector: str | None = None, + top_k: int = 10, + ) -> SemanticSearchResponse: + """Search for relevant actions using semantic search. + + Args: + query: Natural language query describing what tools/actions you need + connector: Optional connector/provider filter (e.g., "bamboohr", "slack") + top_k: Maximum number of results to return (1-500, default: 10) + + Returns: + SemanticSearchResponse containing matching actions with similarity scores + + Raises: + SemanticSearchError: If the API call fails + + Example: + response = client.search("onboard a new team member", top_k=5) + for result in response.results: + print(f"{result.action_name}: {result.similarity_score:.2f}") + """ + url = f"{self.base_url}/actions/search" + headers = { + "Authorization": self._build_auth_header(), + "Content-Type": "application/json", + } + payload: dict[str, Any] = {"query": query, "top_k": top_k} + if connector: + payload["connector"] = connector + + try: + response = httpx.post(url, json=payload, headers=headers, timeout=self.timeout) + response.raise_for_status() + data = response.json() + return SemanticSearchResponse(**data) + except httpx.HTTPStatusError as e: + raise SemanticSearchError( + f"API error: {e.response.status_code} - {e.response.text}" + ) from e + except httpx.RequestError as e: + raise SemanticSearchError(f"Request failed: {e}") from e + except Exception as e: + raise SemanticSearchError(f"Search failed: {e}") from e + + def search_action_names( + self, + query: str, + connector: str | None = None, + top_k: int = 10, + min_score: float = 0.0, + ) -> list[str]: + """Convenience method returning just action names. + + Args: + query: Natural language query + connector: Optional connector/provider filter + top_k: Maximum number of results + min_score: Minimum similarity score threshold (0-1) + + Returns: + List of action names sorted by relevance + + Example: + action_names = client.search_action_names( + "create employee", + connector="bamboohr", + min_score=0.5 + ) + """ + response = self.search(query, connector, top_k) + return [r.action_name for r in response.results if r.similarity_score >= min_score] diff --git a/stackone_ai/toolset.py b/stackone_ai/toolset.py index 126078a..e9d8137 100644 --- a/stackone_ai/toolset.py +++ b/stackone_ai/toolset.py @@ -18,6 +18,11 @@ ToolParameters, Tools, ) +from stackone_ai.semantic_search import ( + SemanticSearchClient, + SemanticSearchError, + SemanticSearchResult, +) try: _SDK_VERSION = metadata.version("stackone-ai") @@ -251,6 +256,7 @@ def __init__( self.account_id = account_id self.base_url = base_url or DEFAULT_BASE_URL self._account_ids: list[str] = [] + self._semantic_client: SemanticSearchClient | None = None def set_accounts(self, account_ids: list[str]) -> StackOneToolSet: """Set account IDs for filtering tools @@ -264,6 +270,143 @@ def set_accounts(self, account_ids: list[str]) -> StackOneToolSet: self._account_ids = account_ids return self + @property + def semantic_client(self) -> SemanticSearchClient: + """Lazy initialization of semantic search client. + + Returns: + SemanticSearchClient instance configured with the toolset's API key and base URL + """ + if self._semantic_client is None: + self._semantic_client = SemanticSearchClient( + api_key=self.api_key, + base_url=self.base_url, + ) + return self._semantic_client + + def search_tools( + self, + query: str, + *, + connector: str | None = None, + top_k: int = 10, + min_score: float = 0.0, + account_ids: list[str] | None = None, + fallback_to_local: bool = True, + ) -> Tools: + """Search for and fetch tools using semantic search. + + This method uses the StackOne semantic search API (84% Hit@5 accuracy) + to find relevant tools based on natural language queries, then fetches + those tools via MCP. + + Args: + query: Natural language description of needed functionality + (e.g., "create employee", "send a message") + connector: Optional provider/connector filter (e.g., "bamboohr", "slack") + top_k: Maximum number of tools to return (default: 10) + min_score: Minimum similarity score threshold 0-1 (default: 0.0) + account_ids: Optional account IDs (uses set_accounts() if not provided) + fallback_to_local: If True, fall back to local BM25+TF-IDF search on API failure + + Returns: + Tools collection with semantically matched tools + + Raises: + SemanticSearchError: If the API call fails and fallback_to_local is False + + Examples: + # Basic semantic search + tools = toolset.search_tools("manage employee records", top_k=5) + + # Filter by connector + tools = toolset.search_tools( + "create time off request", + connector="bamboohr", + min_score=0.5 + ) + + # With account filtering + tools = toolset.search_tools( + "send message", + account_ids=["acc-123"], + top_k=3 + ) + """ + try: + action_names = self.semantic_client.search_action_names( + query=query, + connector=connector, + top_k=top_k, + min_score=min_score, + ) + + if not action_names: + return Tools([]) + + return self.fetch_tools(actions=action_names, account_ids=account_ids) + + except SemanticSearchError: + if not fallback_to_local: + raise + + # Fallback to local search + all_tools = self.fetch_tools(account_ids=account_ids) + utility = all_tools.utility_tools() + search_tool = utility.get_tool("tool_search") + + if search_tool: + result = search_tool.execute( + { + "query": query, + "limit": top_k, + "minScore": min_score, + } + ) + matched_names = [t["name"] for t in result.get("tools", [])] + return Tools([t for t in all_tools if t.name in matched_names]) + + return all_tools + + def search_action_names( + self, + query: str, + *, + connector: str | None = None, + top_k: int = 10, + min_score: float = 0.0, + ) -> list[SemanticSearchResult]: + """Search for action names without fetching tools. + + Useful when you need to inspect search results before fetching, + or when building custom filtering logic. + + Args: + query: Natural language description of needed functionality + connector: Optional provider/connector filter + top_k: Maximum number of results (default: 10) + min_score: Minimum similarity score threshold 0-1 (default: 0.0) + + Returns: + List of SemanticSearchResult with action names, scores, and metadata + + Example: + # Inspect results before fetching + results = toolset.search_action_names("manage employees", top_k=10) + for r in results: + print(f"{r.action_name}: {r.similarity_score:.2f}") + + # Then fetch specific high-scoring actions + selected = [r.action_name for r in results if r.similarity_score > 0.7] + tools = toolset.fetch_tools(actions=selected) + """ + response = self.semantic_client.search( + query=query, + connector=connector, + top_k=top_k, + ) + return [r for r in response.results if r.similarity_score >= min_score] + def _filter_by_provider(self, tool_name: str, providers: list[str]) -> bool: """Check if a tool name matches any of the provider filters diff --git a/stackone_ai/utility_tools.py b/stackone_ai/utility_tools.py index 0d9a209..473f447 100644 --- a/stackone_ai/utility_tools.py +++ b/stackone_ai/utility_tools.py @@ -15,6 +15,7 @@ if TYPE_CHECKING: from stackone_ai.models import Tools + from stackone_ai.semantic_search import SemanticSearchClient class ToolSearchResult(BaseModel): @@ -266,6 +267,116 @@ def execute( return ToolSearchTool() +def create_semantic_tool_search(semantic_client: SemanticSearchClient) -> StackOneTool: + """Create a semantic search variant of tool_search. + + Uses cloud semantic search API (84% Hit@5 accuracy) instead of + local BM25+TF-IDF (21% accuracy). + + Args: + semantic_client: Initialized SemanticSearchClient instance + + Returns: + Utility tool for searching relevant tools using semantic search + """ + from stackone_ai.semantic_search import SemanticSearchClient # noqa: F811 + + if not isinstance(semantic_client, SemanticSearchClient): + raise TypeError("semantic_client must be a SemanticSearchClient instance") + + name = "tool_search" + description = ( + "Searches for relevant tools based on a natural language query using " + "semantic vector search (84% accuracy). Call this first to discover " + "available tools before executing them." + ) + + parameters = ToolParameters( + type="object", + properties={ + "query": { + "type": "string", + "description": ( + "Natural language query describing what tools you need " + '(e.g., "onboard a new team member", "request vacation days")' + ), + }, + "limit": { + "type": "number", + "description": "Maximum number of tools to return (default: 5)", + "default": 5, + }, + "minScore": { + "type": "number", + "description": "Minimum similarity score (0-1) to filter results (default: 0.0)", + "default": 0.0, + }, + "connector": { + "type": "string", + "description": "Optional: filter by connector/provider (e.g., 'bamboohr', 'slack')", + "nullable": True, + }, + }, + ) + + def execute_search(arguments: str | JsonDict | None = None) -> JsonDict: + """Execute the semantic search tool""" + if isinstance(arguments, str): + kwargs = json.loads(arguments) + else: + kwargs = arguments or {} + + query = kwargs.get("query", "") + limit = int(kwargs.get("limit", 5)) + min_score = float(kwargs.get("minScore", 0.0)) + connector = kwargs.get("connector") + + response = semantic_client.search( + query=query, + connector=connector, + top_k=limit, + ) + + tools_data = [ + { + "name": r.action_name, + "description": r.description, + "score": r.similarity_score, + "connector": r.connector_key, + } + for r in response.results + if r.similarity_score >= min_score + ] + + return {"tools": tools_data[:limit]} + + execute_config = ExecuteConfig( + name=name, + method="POST", + url="", # Utility tools don't make HTTP requests + headers={}, + ) + + class SemanticToolSearchTool(StackOneTool): + """Utility tool for searching relevant tools using semantic search""" + + def __init__(self) -> None: + super().__init__( + description=description, + parameters=parameters, + _execute_config=execute_config, + _api_key="", # Utility tools don't need API key + _account_id=None, + ) + + def execute( + self, arguments: str | JsonDict | None = None, *, options: JsonDict | None = None + ) -> JsonDict: + return execute_search(arguments) + + return SemanticToolSearchTool() + + def create_tool_execute(tools_collection: Tools) -> StackOneTool: """Create the tool_execute tool diff --git a/tests/benchmark_search.py b/tests/benchmark_search.py new file mode 100644 index 0000000..f69e9a3 --- /dev/null +++ b/tests/benchmark_search.py @@ -0,0 +1,987 @@ +""" +Benchmark comparing local BM25+TF-IDF vs semantic search. + +Expected results: +- Local BM25+TF-IDF: ~21% Hit@5 +- Semantic Search: ~84% Hit@5 +- Improvement: 4x + +Run with: + STACKONE_API_KEY=xxx python tests/benchmark_search.py +""" + +from __future__ import annotations + +import os +import time +from dataclasses import dataclass, field +from typing import Literal + +from stackone_ai import StackOneToolSet +from stackone_ai.semantic_search import SemanticSearchClient +from stackone_ai.utility_tools import ToolIndex + + +@dataclass +class EvaluationTask: + """Single evaluation task for benchmark.""" + + id: str + query: str + category: str + complexity: Literal["simple", "moderate", "complex"] + expected_matches: list[str] + connector: str | None = None + + +# 103 semantically-challenging evaluation queries +# Ported from ai-generation/apps/action_search/tests/benchmark.integration.spec.ts +EVALUATION_TASKS: list[EvaluationTask] = [ + # ============ ALL CONNECTORS - SEMANTIC CHALLENGES ============ + # HR/HRIS - Natural language + EvaluationTask( + id="hr-sem-01", + query="onboard a new team member", + category="hr", + complexity="moderate", + expected_matches=["Create Employee", "Add Employee", "employee"], + ), + EvaluationTask( + id="hr-sem-02", + query="fetch staff information", + category="hr", + complexity="simple", + expected_matches=["Get Employee", "Get Worker", "List Employees", "employee", "worker"], + ), + EvaluationTask( + id="hr-sem-03", + query="request vacation days", + category="hr", + complexity="moderate", + expected_matches=["Create Time Off", "Create Absence", "Time-Off", "absence", "leave"], + ), + EvaluationTask( + id="hr-sem-04", + query="show me everyone in the company", + category="hr", + complexity="simple", + expected_matches=["List Employees", "List Workers", "employees", "workers"], + ), + EvaluationTask( + id="hr-sem-05", + query="change someone's job title", + category="hr", + complexity="moderate", + expected_matches=["Update Employee", "Job Change", "Update Worker", "employee"], + ), + EvaluationTask( + id="hr-sem-06", + query="terminate an employee", + category="hr", + complexity="moderate", + expected_matches=["Delete Employee", "Terminate", "employee"], + ), + EvaluationTask( + id="hr-sem-07", + query="pull the org chart", + category="hr", + complexity="moderate", + expected_matches=["List Departments", "Organization", "hierarchy", "departments"], + ), + EvaluationTask( + id="hr-sem-08", + query="sick day request", + category="hr", + complexity="simple", + expected_matches=["Create Absence", "Time-Off", "Leave", "absence"], + ), + EvaluationTask( + id="hr-sem-09", + query="get employee details", + category="hr", + complexity="simple", + expected_matches=["Get Employee", "employee"], + ), + EvaluationTask( + id="hr-sem-10", + query="update staff record", + category="hr", + complexity="simple", + expected_matches=["Update Employee", "employee"], + ), + EvaluationTask( + id="hr-sem-11", + query="add new hire to the system", + category="hr", + complexity="moderate", + expected_matches=["Create Employee", "employee"], + ), + EvaluationTask( + id="hr-sem-12", + query="who works in engineering", + category="hr", + complexity="moderate", + expected_matches=["List Employees", "employees", "department"], + ), + EvaluationTask( + id="hr-sem-13", + query="view compensation details", + category="hr", + complexity="moderate", + expected_matches=["Get Employee", "compensation", "salary"], + ), + EvaluationTask( + id="hr-sem-14", + query="see all time-off requests", + category="hr", + complexity="simple", + expected_matches=["List Time Off", "List Absences", "time_off", "absence"], + ), + EvaluationTask( + id="hr-sem-15", + query="approve PTO", + category="hr", + complexity="moderate", + expected_matches=["Update Time Off", "Update Absence", "time_off", "absence"], + ), + # Recruiting/ATS - Natural language + EvaluationTask( + id="ats-sem-01", + query="bring in a new applicant", + category="recruiting", + complexity="moderate", + expected_matches=["Create Candidate", "Create Application", "candidate", "application"], + ), + EvaluationTask( + id="ats-sem-02", + query="see who applied for the role", + category="recruiting", + complexity="simple", + expected_matches=["List Candidates", "List Applications", "candidates", "applications"], + ), + EvaluationTask( + id="ats-sem-03", + query="advance someone to the next round", + category="recruiting", + complexity="moderate", + expected_matches=["Move Application", "Update Stage", "stage", "move"], + ), + EvaluationTask( + id="ats-sem-04", + query="turn down a job seeker", + category="recruiting", + complexity="moderate", + expected_matches=["Reject", "Disqualify", "reject", "application"], + ), + EvaluationTask( + id="ats-sem-05", + query="post a new position", + category="recruiting", + complexity="simple", + expected_matches=["Create Job", "Job Posting", "job"], + ), + EvaluationTask( + id="ats-sem-06", + query="schedule an interview", + category="recruiting", + complexity="moderate", + expected_matches=["Create Interview", "Schedule", "interview"], + ), + EvaluationTask( + id="ats-sem-07", + query="view candidate resume", + category="recruiting", + complexity="simple", + expected_matches=["Get Candidate", "candidate", "document"], + ), + EvaluationTask( + id="ats-sem-08", + query="add interview feedback", + category="recruiting", + complexity="moderate", + expected_matches=["Create Scorecard", "scorecard", "feedback"], + ), + EvaluationTask( + id="ats-sem-09", + query="check application status", + category="recruiting", + complexity="simple", + expected_matches=["Get Application", "application"], + ), + EvaluationTask( + id="ats-sem-10", + query="see open positions", + category="recruiting", + complexity="simple", + expected_matches=["List Jobs", "jobs"], + ), + # CRM - Natural language + EvaluationTask( + id="crm-sem-01", + query="add a new prospect", + category="crm", + complexity="simple", + expected_matches=["Create Lead", "Create Contact", "lead", "contact"], + ), + EvaluationTask( + id="crm-sem-02", + query="log a sales opportunity", + category="crm", + complexity="moderate", + expected_matches=["Create Deal", "Create Opportunity", "deal", "opportunity"], + ), + EvaluationTask( + id="crm-sem-03", + query="close a deal", + category="crm", + complexity="moderate", + expected_matches=["Update Deal", "Update Opportunity", "deal", "opportunity"], + ), + EvaluationTask( + id="crm-sem-04", + query="find customer information", + category="crm", + complexity="simple", + expected_matches=["Get Contact", "Get Account", "contact", "account"], + ), + EvaluationTask( + id="crm-sem-05", + query="create a new account", + category="crm", + complexity="simple", + expected_matches=["Create Account", "account"], + ), + EvaluationTask( + id="crm-sem-06", + query="log a sales call", + category="crm", + complexity="moderate", + expected_matches=["Create Activity", "activity", "call"], + ), + EvaluationTask( + id="crm-sem-07", + query="see pipeline deals", + category="crm", + complexity="simple", + expected_matches=["List Deals", "List Opportunities", "deals", "opportunities"], + ), + EvaluationTask( + id="crm-sem-08", + query="update contact info", + category="crm", + complexity="simple", + expected_matches=["Update Contact", "contact"], + ), + EvaluationTask( + id="crm-sem-09", + query="track customer interaction", + category="crm", + complexity="moderate", + expected_matches=["Create Activity", "activity"], + ), + EvaluationTask( + id="crm-sem-10", + query="view all contacts", + category="crm", + complexity="simple", + expected_matches=["List Contacts", "contacts"], + ), + # Project Management - Natural language + EvaluationTask( + id="pm-sem-01", + query="assign work to someone", + category="project", + complexity="simple", + expected_matches=["Create Task", "Create Issue", "Assign", "task", "issue"], + ), + EvaluationTask( + id="pm-sem-02", + query="check my to-do list", + category="project", + complexity="simple", + expected_matches=["List Tasks", "List Issues", "tasks", "issues"], + ), + EvaluationTask( + id="pm-sem-03", + query="file a bug report", + category="project", + complexity="moderate", + expected_matches=["Create Issue", "Create Task", "issue"], + ), + EvaluationTask( + id="pm-sem-04", + query="mark task as done", + category="project", + complexity="simple", + expected_matches=["Update Task", "Update Issue", "task", "issue"], + ), + EvaluationTask( + id="pm-sem-05", + query="create a new project", + category="project", + complexity="simple", + expected_matches=["Create Project", "project"], + ), + EvaluationTask( + id="pm-sem-06", + query="view project status", + category="project", + complexity="simple", + expected_matches=["Get Project", "project"], + ), + EvaluationTask( + id="pm-sem-07", + query="add a comment to ticket", + category="project", + complexity="moderate", + expected_matches=["Create Comment", "comment"], + ), + EvaluationTask( + id="pm-sem-08", + query="see sprint backlog", + category="project", + complexity="moderate", + expected_matches=["List Tasks", "List Issues", "tasks", "issues"], + ), + # Messaging - Natural language + EvaluationTask( + id="msg-sem-01", + query="ping my colleague", + category="messaging", + complexity="simple", + expected_matches=["Send Message", "message"], + ), + EvaluationTask( + id="msg-sem-02", + query="start a group chat", + category="messaging", + complexity="moderate", + expected_matches=["Create Conversation", "Create Channel", "conversation", "channel"], + ), + EvaluationTask( + id="msg-sem-03", + query="post in the team channel", + category="messaging", + complexity="simple", + expected_matches=["Send Message", "message", "channel"], + ), + EvaluationTask( + id="msg-sem-04", + query="see recent messages", + category="messaging", + complexity="simple", + expected_matches=["List Messages", "messages"], + ), + EvaluationTask( + id="msg-sem-05", + query="create a new channel", + category="messaging", + complexity="simple", + expected_matches=["Create Channel", "channel"], + ), + # Documents - Natural language + EvaluationTask( + id="doc-sem-01", + query="upload a file", + category="documents", + complexity="simple", + expected_matches=["Upload File", "Create File", "file", "upload"], + ), + EvaluationTask( + id="doc-sem-02", + query="download the document", + category="documents", + complexity="simple", + expected_matches=["Download File", "Get File", "file", "download"], + ), + EvaluationTask( + id="doc-sem-03", + query="see all shared files", + category="documents", + complexity="simple", + expected_matches=["List Files", "files"], + ), + EvaluationTask( + id="doc-sem-04", + query="create a new folder", + category="documents", + complexity="simple", + expected_matches=["Create Folder", "folder"], + ), + EvaluationTask( + id="doc-sem-05", + query="share document with team", + category="documents", + complexity="moderate", + expected_matches=["Share File", "Update File", "file", "share"], + ), + # Marketing - Natural language + EvaluationTask( + id="mkt-sem-01", + query="create email campaign", + category="marketing", + complexity="moderate", + expected_matches=["Create Campaign", "campaign", "email"], + ), + EvaluationTask( + id="mkt-sem-02", + query="add contact to mailing list", + category="marketing", + complexity="simple", + expected_matches=["Add Member", "Create Contact", "contact", "list"], + ), + EvaluationTask( + id="mkt-sem-03", + query="send newsletter", + category="marketing", + complexity="moderate", + expected_matches=["Send Campaign", "campaign", "email"], + ), + EvaluationTask( + id="mkt-sem-04", + query="view campaign analytics", + category="marketing", + complexity="moderate", + expected_matches=["Get Campaign", "campaign", "analytics"], + ), + EvaluationTask( + id="mkt-sem-05", + query="create automation workflow", + category="marketing", + complexity="complex", + expected_matches=["Create Automation", "automation", "workflow"], + ), + # LMS - Natural language + EvaluationTask( + id="lms-sem-01", + query="assign training to employee", + category="lms", + complexity="moderate", + expected_matches=["Create Assignment", "Assign Content", "assignment", "content"], + ), + EvaluationTask( + id="lms-sem-02", + query="check course completion", + category="lms", + complexity="simple", + expected_matches=["Get Completion", "completion", "progress"], + ), + EvaluationTask( + id="lms-sem-03", + query="create new course", + category="lms", + complexity="moderate", + expected_matches=["Create Content", "content", "course"], + ), + EvaluationTask( + id="lms-sem-04", + query="see available trainings", + category="lms", + complexity="simple", + expected_matches=["List Content", "content", "courses"], + ), + EvaluationTask( + id="lms-sem-05", + query="track learning progress", + category="lms", + complexity="moderate", + expected_matches=["Get Completion", "List Completions", "completion"], + ), + # Per-connector examples + EvaluationTask( + id="bamboo-sem-01", + query="bring on a new hire", + category="hr", + complexity="moderate", + connector="bamboohr", + expected_matches=["Create Employee", "employee"], + ), + EvaluationTask( + id="bamboo-sem-02", + query="get employee time off balance", + category="hr", + complexity="simple", + connector="bamboohr", + expected_matches=["Get Time Off", "time_off", "balance"], + ), + EvaluationTask( + id="slack-sem-01", + query="ping the team", + category="messaging", + complexity="simple", + connector="slack", + expected_matches=["Send Message", "message"], + ), + EvaluationTask( + id="slack-sem-02", + query="create team workspace", + category="messaging", + complexity="moderate", + connector="slack", + expected_matches=["Create Channel", "channel"], + ), + EvaluationTask( + id="jira-sem-01", + query="file a new bug", + category="project", + complexity="simple", + connector="jira", + expected_matches=["Create Issue", "issue"], + ), + EvaluationTask( + id="jira-sem-02", + query="view sprint tasks", + category="project", + complexity="simple", + connector="jira", + expected_matches=["List Issues", "issues"], + ), + EvaluationTask( + id="greenhouse-sem-01", + query="add new job posting", + category="recruiting", + complexity="simple", + connector="greenhouse", + expected_matches=["Create Job", "job"], + ), + EvaluationTask( + id="greenhouse-sem-02", + query="move candidate forward", + category="recruiting", + complexity="moderate", + connector="greenhouse", + expected_matches=["Move Application", "Update Application", "application"], + ), + EvaluationTask( + id="salesforce-sem-01", + query="create sales opportunity", + category="crm", + complexity="simple", + connector="salesforce", + expected_matches=["Create Opportunity", "opportunity"], + ), + EvaluationTask( + id="salesforce-sem-02", + query="log customer call", + category="crm", + complexity="moderate", + connector="salesforce", + expected_matches=["Create Activity", "activity"], + ), + EvaluationTask( + id="hubspot-sem-01", + query="add new lead", + category="crm", + complexity="simple", + connector="hubspot", + expected_matches=["Create Contact", "contact"], + ), + EvaluationTask( + id="hubspot-sem-02", + query="track deal progress", + category="crm", + complexity="moderate", + connector="hubspot", + expected_matches=["Get Deal", "Update Deal", "deal"], + ), + # Complex multi-step queries + EvaluationTask( + id="complex-01", + query="set up new employee with all required training", + category="hr", + complexity="complex", + expected_matches=["Create Employee", "Create Assignment", "employee", "assignment"], + ), + EvaluationTask( + id="complex-02", + query="process job application and schedule interview", + category="recruiting", + complexity="complex", + expected_matches=["Create Application", "Create Interview", "application", "interview"], + ), + EvaluationTask( + id="complex-03", + query="update deal and notify team", + category="crm", + complexity="complex", + expected_matches=["Update Deal", "Send Message", "deal", "message"], + ), + EvaluationTask( + id="complex-04", + query="create project and assign initial tasks", + category="project", + complexity="complex", + expected_matches=["Create Project", "Create Task", "project", "task"], + ), + # Edge cases - Abbreviations and slang + EvaluationTask( + id="edge-01", + query="PTO request", + category="hr", + complexity="simple", + expected_matches=["Create Time Off", "time_off", "absence"], + ), + EvaluationTask( + id="edge-02", + query="1:1 meeting", + category="hr", + complexity="moderate", + expected_matches=["Create Event", "Create Meeting", "meeting"], + ), + EvaluationTask( + id="edge-03", + query="OOO", + category="hr", + complexity="simple", + expected_matches=["Time Off", "Absence", "time_off", "absence"], + ), + EvaluationTask( + id="edge-04", + query="ASAP task", + category="project", + complexity="simple", + expected_matches=["Create Task", "task"], + ), + EvaluationTask( + id="edge-05", + query="DM someone", + category="messaging", + complexity="simple", + expected_matches=["Send Message", "message"], + ), + # Synonyms and alternative phrases + EvaluationTask( + id="syn-01", + query="fire someone", + category="hr", + complexity="moderate", + expected_matches=["Delete Employee", "Terminate", "employee"], + ), + EvaluationTask( + id="syn-02", + query="look up customer", + category="crm", + complexity="simple", + expected_matches=["Get Contact", "Get Account", "contact", "account"], + ), + EvaluationTask( + id="syn-03", + query="grab the file", + category="documents", + complexity="simple", + expected_matches=["Download File", "Get File", "file"], + ), + EvaluationTask( + id="syn-04", + query="sign up new user", + category="hr", + complexity="moderate", + expected_matches=["Create Employee", "Create User", "employee", "user"], + ), + EvaluationTask( + id="syn-05", + query="kill the ticket", + category="project", + complexity="moderate", + expected_matches=["Delete Issue", "Update Issue", "Close Issue", "issue"], + ), + # Business context queries + EvaluationTask( + id="biz-01", + query="run payroll", + category="hr", + complexity="complex", + expected_matches=["payroll", "compensation"], + ), + EvaluationTask( + id="biz-02", + query="close quarter books", + category="crm", + complexity="complex", + expected_matches=["Update Deal", "deal", "opportunity"], + ), + EvaluationTask( + id="biz-03", + query="annual review", + category="hr", + complexity="moderate", + expected_matches=["Review", "Performance", "employee"], + ), + EvaluationTask( + id="biz-04", + query="sprint planning", + category="project", + complexity="moderate", + expected_matches=["Create Task", "List Tasks", "task", "issue"], + ), + EvaluationTask( + id="biz-05", + query="customer onboarding", + category="crm", + complexity="complex", + expected_matches=["Create Account", "Create Contact", "account", "contact"], + ), +] + + +@dataclass +class TaskResult: + """Result of evaluating a single task.""" + + task_id: str + query: str + hit: bool + rank: int | None # Position of first match, None if not found + top_results: list[str] + latency_ms: float + + +@dataclass +class BenchmarkResult: + """Aggregated results from running benchmark.""" + + method: str + hit_at_k: float + mean_reciprocal_rank: float + avg_latency_ms: float + total_tasks: int + hits: int + results: list[TaskResult] = field(default_factory=list) + + +@dataclass +class ComparisonReport: + """Comparison between local and semantic search.""" + + local_results: BenchmarkResult + semantic_results: BenchmarkResult + + @property + def improvement(self) -> float: + """Percentage point improvement in Hit@k.""" + return self.semantic_results.hit_at_k - self.local_results.hit_at_k + + +def check_hit(result_names: list[str], expected_matches: list[str]) -> tuple[bool, int | None]: + """Check if any expected match appears in results (case-insensitive partial match).""" + for i, name in enumerate(result_names): + name_lower = name.lower() + for expected in expected_matches: + if expected.lower() in name_lower: + return True, i + 1 + return False, None + + +class SearchBenchmark: + """Benchmark comparing local vs semantic search.""" + + def __init__( + self, + tools: list, + api_key: str, + base_url: str = "https://api.stackone.com", + ): + """Initialize benchmark with tools and API credentials. + + Args: + tools: List of StackOneTool instances to search + api_key: StackOne API key for semantic search + base_url: Base URL for API requests + """ + self.tools = tools + self.local_index = ToolIndex(tools) + self.semantic_client = SemanticSearchClient(api_key=api_key, base_url=base_url) + + def evaluate_local( + self, + tasks: list[EvaluationTask], + k: int = 5, + ) -> BenchmarkResult: + """Run local BM25+TF-IDF search against benchmark tasks. + + Args: + tasks: List of evaluation tasks + k: Number of top results to consider (default: 5) + + Returns: + BenchmarkResult with aggregated metrics + """ + results: list[TaskResult] = [] + total_rr = 0.0 + + for task in tasks: + start = time.perf_counter() + search_results = self.local_index.search(task.query, limit=k) + latency = (time.perf_counter() - start) * 1000 + + result_names = [r.name for r in search_results] + hit, rank = check_hit(result_names, task.expected_matches) + + if hit and rank: + total_rr += 1.0 / rank + + results.append( + TaskResult( + task_id=task.id, + query=task.query, + hit=hit, + rank=rank, + top_results=result_names[:k], + latency_ms=latency, + ) + ) + + hits = sum(1 for r in results if r.hit) + return BenchmarkResult( + method="Local BM25+TF-IDF", + hit_at_k=hits / len(tasks) if tasks else 0, + mean_reciprocal_rank=total_rr / len(tasks) if tasks else 0, + avg_latency_ms=sum(r.latency_ms for r in results) / len(results) if results else 0, + total_tasks=len(tasks), + hits=hits, + results=results, + ) + + def evaluate_semantic( + self, + tasks: list[EvaluationTask], + k: int = 5, + ) -> BenchmarkResult: + """Run semantic search against benchmark tasks. + + Args: + tasks: List of evaluation tasks + k: Number of top results to consider (default: 5) + + Returns: + BenchmarkResult with aggregated metrics + """ + results: list[TaskResult] = [] + total_rr = 0.0 + + for task in tasks: + start = time.perf_counter() + response = self.semantic_client.search( + query=task.query, + connector=task.connector, + top_k=k, + ) + latency = (time.perf_counter() - start) * 1000 + + result_names = [r.action_name for r in response.results] + hit, rank = check_hit(result_names, task.expected_matches) + + if hit and rank: + total_rr += 1.0 / rank + + results.append( + TaskResult( + task_id=task.id, + query=task.query, + hit=hit, + rank=rank, + top_results=result_names[:k], + latency_ms=latency, + ) + ) + + hits = sum(1 for r in results if r.hit) + return BenchmarkResult( + method="Semantic Search", + hit_at_k=hits / len(tasks) if tasks else 0, + mean_reciprocal_rank=total_rr / len(tasks) if tasks else 0, + avg_latency_ms=sum(r.latency_ms for r in results) / len(results) if results else 0, + total_tasks=len(tasks), + hits=hits, + results=results, + ) + + def compare(self, tasks: list[EvaluationTask] | None = None, k: int = 5) -> ComparisonReport: + """Compare both methods and generate report. + + Args: + tasks: List of evaluation tasks (defaults to EVALUATION_TASKS) + k: Number of top results to consider (default: 5) + + Returns: + ComparisonReport with results from both methods + """ + tasks = tasks or EVALUATION_TASKS + local = self.evaluate_local(tasks, k) + semantic = self.evaluate_semantic(tasks, k) + return ComparisonReport(local_results=local, semantic_results=semantic) + + +def print_report(report: ComparisonReport) -> None: + """Print a formatted comparison report.""" + print("\n" + "=" * 70) + print("SEARCH BENCHMARK COMPARISON") + print("=" * 70) + + print(f"\n{'Method':<25} {'Hit@5':<12} {'MRR':<12} {'Latency':<12} {'Hits':<10}") + print("-" * 70) + + for r in [report.local_results, report.semantic_results]: + print( + f"{r.method:<25} {r.hit_at_k:>10.1%} {r.mean_reciprocal_rank:>10.3f} " + f"{r.avg_latency_ms:>9.1f}ms {r.hits:>4}/{r.total_tasks}" + ) + + print("-" * 70) + print(f"{'Improvement':<25} {report.improvement:>+10.1%}") + print("=" * 70) + + # Show failed tasks for local search + failed_local = [r for r in report.local_results.results if not r.hit] + if failed_local and len(failed_local) <= 20: + print(f"\nLocal search missed ({len(failed_local)} tasks):") + for r in failed_local[:10]: + print(f" - {r.task_id}: '{r.query}'") + print(f" Got: {r.top_results[:3]}") + if len(failed_local) > 10: + print(f" ... and {len(failed_local) - 10} more") + + +def run_benchmark(api_key: str | None = None, base_url: str = "https://api.stackone.com") -> ComparisonReport: + """Run the full benchmark comparison. + + Args: + api_key: StackOne API key (uses STACKONE_API_KEY env var if not provided) + base_url: Base URL for API requests + + Returns: + ComparisonReport with results + + Raises: + ValueError: If no API key is available + """ + api_key = api_key or os.environ.get("STACKONE_API_KEY") + if not api_key: + raise ValueError("API key must be provided or set via STACKONE_API_KEY environment variable") + + print("Initializing toolset...") + toolset = StackOneToolSet(api_key=api_key, base_url=base_url) + + print("Fetching tools (this may take a moment)...") + tools = toolset.fetch_tools() + print(f"Loaded {len(tools)} tools") + + print(f"\nRunning benchmark with {len(EVALUATION_TASKS)} evaluation tasks...") + benchmark = SearchBenchmark(list(tools), api_key=api_key, base_url=base_url) + + report = benchmark.compare() + print_report(report) + + return report + + +if __name__ == "__main__": + try: + run_benchmark() + except ValueError as e: + print(f"Error: {e}") + print("Set STACKONE_API_KEY environment variable or pass api_key parameter") + exit(1) + except Exception as e: + print(f"Benchmark failed: {e}") + exit(1) diff --git a/tests/test_semantic_search.py b/tests/test_semantic_search.py new file mode 100644 index 0000000..3becbd8 --- /dev/null +++ b/tests/test_semantic_search.py @@ -0,0 +1,486 @@ +"""Tests for semantic search client and integration.""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import httpx +import pytest + +from stackone_ai.semantic_search import ( + SemanticSearchClient, + SemanticSearchError, + SemanticSearchResponse, + SemanticSearchResult, +) + + +class TestSemanticSearchResult: + """Tests for SemanticSearchResult model.""" + + def test_create_result(self) -> None: + """Test creating a search result.""" + result = SemanticSearchResult( + action_name="bamboohr_create_employee", + connector_key="bamboohr", + similarity_score=0.92, + label="Create Employee", + description="Creates a new employee in BambooHR", + ) + + assert result.action_name == "bamboohr_create_employee" + assert result.connector_key == "bamboohr" + assert result.similarity_score == 0.92 + assert result.label == "Create Employee" + assert result.description == "Creates a new employee in BambooHR" + + +class TestSemanticSearchResponse: + """Tests for SemanticSearchResponse model.""" + + def test_create_response(self) -> None: + """Test creating a search response.""" + results = [ + SemanticSearchResult( + action_name="bamboohr_create_employee", + connector_key="bamboohr", + similarity_score=0.92, + label="Create Employee", + description="Creates a new employee", + ), + SemanticSearchResult( + action_name="hibob_create_employee", + connector_key="hibob", + similarity_score=0.85, + label="Create Employee", + description="Creates a new employee", + ), + ] + response = SemanticSearchResponse( + results=results, + total_count=2, + query="create employee", + ) + + assert len(response.results) == 2 + assert response.total_count == 2 + assert response.query == "create employee" + + +class TestSemanticSearchClient: + """Tests for SemanticSearchClient.""" + + def test_init(self) -> None: + """Test client initialization.""" + client = SemanticSearchClient(api_key="test-key") + + assert client.api_key == "test-key" + assert client.base_url == "https://api.stackone.com" + assert client.timeout == 30.0 + + def test_init_custom_base_url(self) -> None: + """Test client initialization with custom base URL.""" + client = SemanticSearchClient( + api_key="test-key", + base_url="https://custom.api.com/", + ) + + assert client.base_url == "https://custom.api.com" # Trailing slash stripped + + def test_build_auth_header(self) -> None: + """Test building the authorization header.""" + client = SemanticSearchClient(api_key="test-key") + header = client._build_auth_header() + + # test-key: encoded in base64 = dGVzdC1rZXk6 + assert header == "Basic dGVzdC1rZXk6" + + @patch("httpx.post") + def test_search_success(self, mock_post: MagicMock) -> None: + """Test successful search request.""" + mock_response = MagicMock() + mock_response.json.return_value = { + "results": [ + { + "action_name": "bamboohr_create_employee", + "connector_key": "bamboohr", + "similarity_score": 0.92, + "label": "Create Employee", + "description": "Creates a new employee", + } + ], + "total_count": 1, + "query": "create employee", + } + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + client = SemanticSearchClient(api_key="test-key") + response = client.search("create employee", top_k=5) + + assert len(response.results) == 1 + assert response.results[0].action_name == "bamboohr_create_employee" + assert response.total_count == 1 + assert response.query == "create employee" + + # Verify request was made correctly + mock_post.assert_called_once() + call_kwargs = mock_post.call_args + assert call_kwargs.kwargs["json"] == {"query": "create employee", "top_k": 5} + assert "Authorization" in call_kwargs.kwargs["headers"] + + @patch("httpx.post") + def test_search_with_connector(self, mock_post: MagicMock) -> None: + """Test search with connector filter.""" + mock_response = MagicMock() + mock_response.json.return_value = { + "results": [], + "total_count": 0, + "query": "create employee", + } + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + client = SemanticSearchClient(api_key="test-key") + client.search("create employee", connector="bamboohr", top_k=10) + + call_kwargs = mock_post.call_args + assert call_kwargs.kwargs["json"] == { + "query": "create employee", + "connector": "bamboohr", + "top_k": 10, + } + + @patch("httpx.post") + def test_search_http_error(self, mock_post: MagicMock) -> None: + """Test search with HTTP error.""" + mock_response = MagicMock() + mock_response.status_code = 401 + mock_response.text = "Unauthorized" + mock_post.return_value = mock_response + mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( + "Unauthorized", + request=MagicMock(), + response=mock_response, + ) + + client = SemanticSearchClient(api_key="invalid-key") + + with pytest.raises(SemanticSearchError) as exc_info: + client.search("create employee") + + assert "API error: 401" in str(exc_info.value) + + @patch("httpx.post") + def test_search_request_error(self, mock_post: MagicMock) -> None: + """Test search with request error.""" + mock_post.side_effect = httpx.RequestError("Connection failed") + + client = SemanticSearchClient(api_key="test-key") + + with pytest.raises(SemanticSearchError) as exc_info: + client.search("create employee") + + assert "Request failed" in str(exc_info.value) + + @patch("httpx.post") + def test_search_action_names(self, mock_post: MagicMock) -> None: + """Test search_action_names convenience method.""" + mock_response = MagicMock() + mock_response.json.return_value = { + "results": [ + { + "action_name": "bamboohr_create_employee", + "connector_key": "bamboohr", + "similarity_score": 0.92, + "label": "Create Employee", + "description": "Creates a new employee", + }, + { + "action_name": "hibob_create_employee", + "connector_key": "hibob", + "similarity_score": 0.45, + "label": "Create Employee", + "description": "Creates a new employee", + }, + ], + "total_count": 2, + "query": "create employee", + } + mock_response.raise_for_status = MagicMock() + mock_post.return_value = mock_response + + client = SemanticSearchClient(api_key="test-key") + + # Without min_score filter + names = client.search_action_names("create employee") + assert len(names) == 2 + assert "bamboohr_create_employee" in names + assert "hibob_create_employee" in names + + # With min_score filter + names = client.search_action_names("create employee", min_score=0.5) + assert len(names) == 1 + assert "bamboohr_create_employee" in names + + +class TestSemanticSearchIntegration: + """Integration tests for semantic search with toolset.""" + + def test_toolset_semantic_client_lazy_init(self) -> None: + """Test that semantic_client is lazily initialized.""" + from stackone_ai import StackOneToolSet + + toolset = StackOneToolSet(api_key="test-key") + + # Access semantic_client + client = toolset.semantic_client + assert isinstance(client, SemanticSearchClient) + assert client.api_key == "test-key" + + # Same instance on second access + assert toolset.semantic_client is client + + @patch.object(SemanticSearchClient, "search_action_names") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_toolset_search_tools( + self, + mock_fetch: MagicMock, + mock_search: MagicMock, + ) -> None: + """Test toolset.search_tools() method.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + # Mock semantic search to return action names + mock_search.return_value = ["bamboohr_create_employee", "hibob_create_employee"] + + # Mock MCP fetch to return tools using actual dataclass + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates a new employee", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="hibob_create_employee", + description="Creates a new employee", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="bamboohr_list_employees", + description="Lists employees", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + tools = toolset.search_tools("create employee", top_k=5) + + # Should only return the 2 matching tools + assert len(tools) == 2 + tool_names = [t.name for t in tools] + assert "bamboohr_create_employee" in tool_names + assert "hibob_create_employee" in tool_names + assert "bamboohr_list_employees" not in tool_names + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_toolset_search_action_names( + self, + mock_fetch: MagicMock, + mock_search: MagicMock, + ) -> None: + """Test toolset.search_action_names() method.""" + from stackone_ai import StackOneToolSet + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="bamboohr_create_employee", + connector_key="bamboohr", + similarity_score=0.92, + label="Create Employee", + description="Creates a new employee", + ), + SemanticSearchResult( + action_name="hibob_create_employee", + connector_key="hibob", + similarity_score=0.45, + label="Create Employee", + description="Creates a new employee", + ), + ], + total_count=2, + query="create employee", + ) + + toolset = StackOneToolSet(api_key="test-key") + results = toolset.search_action_names("create employee", min_score=0.5) + + # Should filter by min_score + assert len(results) == 1 + assert results[0].action_name == "bamboohr_create_employee" + + def test_utility_tools_semantic_search(self) -> None: + """Test utility_tools with semantic search.""" + from stackone_ai.models import StackOneTool, Tools + + # Create a mock tools collection + tool = MagicMock(spec=StackOneTool) + tool.name = "test_tool" + tool.description = "Test tool" + tools = Tools([tool]) + + # Without semantic search - should use local search + # Patch ToolIndex in utility_tools module where it's imported + with ( + patch("stackone_ai.utility_tools.ToolIndex") as mock_index_class, + patch("stackone_ai.utility_tools.create_tool_search") as mock_create_search, + patch("stackone_ai.utility_tools.create_tool_execute") as mock_create_execute, + ): + mock_search_tool = MagicMock(spec=StackOneTool) + mock_search_tool.name = "tool_search" + mock_execute_tool = MagicMock(spec=StackOneTool) + mock_execute_tool.name = "tool_execute" + mock_create_search.return_value = mock_search_tool + mock_create_execute.return_value = mock_execute_tool + utility = tools.utility_tools() + assert len(utility) == 2 # tool_search + tool_execute + + # With semantic search - requires client + with pytest.raises(ValueError) as exc_info: + tools.utility_tools(use_semantic_search=True) + assert "semantic_client is required" in str(exc_info.value) + + # With semantic search and client + mock_client = MagicMock(spec=SemanticSearchClient) + with ( + patch("stackone_ai.utility_tools.create_semantic_tool_search") as mock_create, + patch("stackone_ai.utility_tools.create_tool_execute") as mock_create_execute, + ): + mock_search_tool = MagicMock(spec=StackOneTool) + mock_search_tool.name = "tool_search" + mock_execute_tool = MagicMock(spec=StackOneTool) + mock_execute_tool.name = "tool_execute" + mock_create.return_value = mock_search_tool + mock_create_execute.return_value = mock_execute_tool + utility = tools.utility_tools(use_semantic_search=True, semantic_client=mock_client) + assert len(utility) == 2 + mock_create.assert_called_once_with(mock_client) + + +class TestSemanticToolSearch: + """Tests for create_semantic_tool_search utility.""" + + def test_create_semantic_tool_search_type_error(self) -> None: + """Test that invalid client raises TypeError.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + with pytest.raises(TypeError) as exc_info: + create_semantic_tool_search("not a client") # type: ignore + + assert "SemanticSearchClient instance" in str(exc_info.value) + + @patch.object(SemanticSearchClient, "search") + def test_semantic_tool_search_execute(self, mock_search: MagicMock) -> None: + """Test executing semantic tool search.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="bamboohr_create_employee", + connector_key="bamboohr", + similarity_score=0.92, + label="Create Employee", + description="Creates a new employee", + ), + ], + total_count=1, + query="create employee", + ) + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client) + + result = tool.execute({"query": "create employee", "limit": 5}) + + assert "tools" in result + assert len(result["tools"]) == 1 + assert result["tools"][0]["name"] == "bamboohr_create_employee" + assert result["tools"][0]["score"] == 0.92 + assert result["tools"][0]["connector"] == "bamboohr" + + @patch.object(SemanticSearchClient, "search") + def test_semantic_tool_search_with_min_score(self, mock_search: MagicMock) -> None: + """Test semantic tool search with min_score filter.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="high_score_action", + connector_key="test", + similarity_score=0.9, + label="High Score", + description="High scoring action", + ), + SemanticSearchResult( + action_name="low_score_action", + connector_key="test", + similarity_score=0.3, + label="Low Score", + description="Low scoring action", + ), + ], + total_count=2, + query="test", + ) + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client) + + result = tool.execute({"query": "test", "limit": 10, "minScore": 0.5}) + + assert len(result["tools"]) == 1 + assert result["tools"][0]["name"] == "high_score_action" + + @patch.object(SemanticSearchClient, "search") + def test_semantic_tool_search_with_connector(self, mock_search: MagicMock) -> None: + """Test semantic tool search with connector filter.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + mock_search.return_value = SemanticSearchResponse( + results=[], + total_count=0, + query="create employee", + ) + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client) + + tool.execute({"query": "create employee", "connector": "bamboohr"}) + + mock_search.assert_called_once_with( + query="create employee", + connector="bamboohr", + top_k=5, # default limit + ) + + def test_semantic_tool_search_has_correct_parameters(self) -> None: + """Test that semantic tool has the expected parameter schema.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client) + + assert tool.name == "tool_search" + assert "semantic" in tool.description.lower() + assert "84%" in tool.description + + props = tool.parameters.properties + assert "query" in props + assert "limit" in props + assert "minScore" in props + assert "connector" in props From 0b0e9e09b4e435b9e569b46dc7a3ee1615fda5c9 Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Thu, 5 Feb 2026 13:42:58 +0000 Subject: [PATCH 02/25] Filter tools based on the SDK auth config and connector --- stackone_ai/models.py | 41 +++++ stackone_ai/toolset.py | 72 ++++++-- tests/test_semantic_search.py | 313 +++++++++++++++++++++++++++++++++- 3 files changed, 407 insertions(+), 19 deletions(-) diff --git a/stackone_ai/models.py b/stackone_ai/models.py index 69b670b..e2c1d2e 100644 --- a/stackone_ai/models.py +++ b/stackone_ai/models.py @@ -101,6 +101,18 @@ class StackOneTool(BaseModel): "feedback_metadata", } + @property + def connector(self) -> str: + """Extract connector from tool name. + + Tool names follow the format: {connector}_{action}_{entity} + e.g., 'bamboohr_create_employee' -> 'bamboohr' + + Returns: + Connector name in lowercase + """ + return self.name.split("_")[0].lower() + def __init__( self, description: str, @@ -517,6 +529,35 @@ def get_account_id(self) -> str | None: return account_id return None + def get_connectors(self) -> set[str]: + """Get unique connector names from all tools. + + Returns: + Set of connector names (lowercase) + + Example: + tools = toolset.fetch_tools() + connectors = tools.get_connectors() + # {'bamboohr', 'hibob', 'slack', ...} + """ + return {tool.connector for tool in self.tools} + + def filter_by_connector(self, connectors: list[str] | set[str]) -> Tools: + """Filter tools by connector names. + + Args: + connectors: List or set of connector names to include (case-insensitive) + + Returns: + New Tools collection containing only tools from specified connectors + + Example: + hr_tools = tools.filter_by_connector(['bamboohr', 'hibob']) + """ + connector_set = {c.lower() for c in connectors} + filtered = [t for t in self.tools if t.connector in connector_set] + return Tools(filtered) + def to_openai(self) -> list[JsonDict]: """Convert all tools to OpenAI function format diff --git a/stackone_ai/toolset.py b/stackone_ai/toolset.py index e9d8137..b4d67dd 100644 --- a/stackone_ai/toolset.py +++ b/stackone_ai/toolset.py @@ -297,8 +297,8 @@ def search_tools( """Search for and fetch tools using semantic search. This method uses the StackOne semantic search API (84% Hit@5 accuracy) - to find relevant tools based on natural language queries, then fetches - those tools via MCP. + to find relevant tools based on natural language queries. It optimizes + results by filtering to only connectors available in linked accounts. Args: query: Natural language description of needed functionality @@ -310,7 +310,7 @@ def search_tools( fallback_to_local: If True, fall back to local BM25+TF-IDF search on API failure Returns: - Tools collection with semantically matched tools + Tools collection with semantically matched tools from linked accounts Raises: SemanticSearchError: If the API call fails and fallback_to_local is False @@ -334,17 +334,43 @@ def search_tools( ) """ try: - action_names = self.semantic_client.search_action_names( + # Step 1: Fetch all tools to get available connectors from linked accounts + all_tools = self.fetch_tools(account_ids=account_ids) + available_connectors = all_tools.get_connectors() + + if not available_connectors: + return Tools([]) + + # Step 2: Over-fetch from semantic API to account for connector filtering + # We fetch 3x to ensure we get enough results after filtering + over_fetch_multiplier = 3 + over_fetch_k = top_k * over_fetch_multiplier + + response = self.semantic_client.search( query=query, connector=connector, - top_k=top_k, - min_score=min_score, + top_k=over_fetch_k, ) - if not action_names: + # Step 3: Filter results to only available connectors and min_score + filtered_results = [ + r + for r in response.results + if r.connector_key.lower() in available_connectors and r.similarity_score >= min_score + ][:top_k] # Take only top_k after filtering + + if not filtered_results: return Tools([]) - return self.fetch_tools(actions=action_names, account_ids=account_ids) + # Step 4: Get matching tools from already-fetched tools + action_names = {r.action_name for r in filtered_results} + matched_tools = [t for t in all_tools if t.name in action_names] + + # Sort matched tools by semantic search score order + action_order = {r.action_name: i for i, r in enumerate(filtered_results)} + matched_tools.sort(key=lambda t: action_order.get(t.name, float("inf"))) + + return Tools(matched_tools) except SemanticSearchError: if not fallback_to_local: @@ -373,6 +399,7 @@ def search_action_names( query: str, *, connector: str | None = None, + available_connectors: set[str] | None = None, top_k: int = 10, min_score: float = 0.0, ) -> list[SemanticSearchResult]: @@ -383,7 +410,10 @@ def search_action_names( Args: query: Natural language description of needed functionality - connector: Optional provider/connector filter + connector: Optional provider/connector filter (single connector) + available_connectors: Optional set of connectors to filter results by. + If provided, only returns results for these connectors (over-fetches + from API to ensure enough results after filtering). top_k: Maximum number of results (default: 10) min_score: Minimum similarity score threshold 0-1 (default: 0.0) @@ -396,16 +426,36 @@ def search_action_names( for r in results: print(f"{r.action_name}: {r.similarity_score:.2f}") + # Filter by available connectors from linked accounts + tools = toolset.fetch_tools() + results = toolset.search_action_names( + "create employee", + available_connectors=tools.get_connectors(), + top_k=5 + ) + # Then fetch specific high-scoring actions selected = [r.action_name for r in results if r.similarity_score > 0.7] tools = toolset.fetch_tools(actions=selected) """ + # Over-fetch if filtering by available_connectors + fetch_k = top_k * 3 if available_connectors else top_k + response = self.semantic_client.search( query=query, connector=connector, - top_k=top_k, + top_k=fetch_k, ) - return [r for r in response.results if r.similarity_score >= min_score] + + # Filter by min_score + results = [r for r in response.results if r.similarity_score >= min_score] + + # Filter by available connectors if provided + if available_connectors: + connector_set = {c.lower() for c in available_connectors} + results = [r for r in results if r.connector_key.lower() in connector_set] + + return results[:top_k] def _filter_by_provider(self, tool_name: str, providers: list[str]) -> bool: """Check if a tool name matches any of the provider filters diff --git a/tests/test_semantic_search.py b/tests/test_semantic_search.py index 3becbd8..5bfef95 100644 --- a/tests/test_semantic_search.py +++ b/tests/test_semantic_search.py @@ -241,21 +241,47 @@ def test_toolset_semantic_client_lazy_init(self) -> None: # Same instance on second access assert toolset.semantic_client is client - @patch.object(SemanticSearchClient, "search_action_names") + @patch.object(SemanticSearchClient, "search") @patch("stackone_ai.toolset._fetch_mcp_tools") def test_toolset_search_tools( self, mock_fetch: MagicMock, mock_search: MagicMock, ) -> None: - """Test toolset.search_tools() method.""" + """Test toolset.search_tools() method with connector filtering.""" from stackone_ai import StackOneToolSet from stackone_ai.toolset import _McpToolDefinition - # Mock semantic search to return action names - mock_search.return_value = ["bamboohr_create_employee", "hibob_create_employee"] + # Mock semantic search to return results (including some for unavailable connectors) + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="bamboohr_create_employee", + connector_key="bamboohr", + similarity_score=0.95, + label="Create Employee", + description="Creates a new employee", + ), + SemanticSearchResult( + action_name="workday_create_worker", + connector_key="workday", # User doesn't have this connector + similarity_score=0.90, + label="Create Worker", + description="Creates a new worker", + ), + SemanticSearchResult( + action_name="hibob_create_employee", + connector_key="hibob", + similarity_score=0.85, + label="Create Employee", + description="Creates a new employee", + ), + ], + total_count=3, + query="create employee", + ) - # Mock MCP fetch to return tools using actual dataclass + # Mock MCP fetch to return only bamboohr and hibob tools (user's linked accounts) mock_fetch.return_value = [ _McpToolDefinition( name="bamboohr_create_employee", @@ -277,12 +303,17 @@ def test_toolset_search_tools( toolset = StackOneToolSet(api_key="test-key") tools = toolset.search_tools("create employee", top_k=5) - # Should only return the 2 matching tools + # Should only return tools for available connectors (bamboohr, hibob) + # workday_create_worker should be filtered out assert len(tools) == 2 tool_names = [t.name for t in tools] assert "bamboohr_create_employee" in tool_names assert "hibob_create_employee" in tool_names - assert "bamboohr_list_employees" not in tool_names + assert "workday_create_worker" not in tool_names # Filtered out - connector not available + + # Results should be sorted by semantic score + assert tools[0].name == "bamboohr_create_employee" # score 0.95 + assert tools[1].name == "hibob_create_employee" # score 0.85 @patch.object(SemanticSearchClient, "search") @patch("stackone_ai.toolset._fetch_mcp_tools") @@ -335,7 +366,7 @@ def test_utility_tools_semantic_search(self) -> None: # Without semantic search - should use local search # Patch ToolIndex in utility_tools module where it's imported with ( - patch("stackone_ai.utility_tools.ToolIndex") as mock_index_class, + patch("stackone_ai.utility_tools.ToolIndex"), patch("stackone_ai.utility_tools.create_tool_search") as mock_create_search, patch("stackone_ai.utility_tools.create_tool_execute") as mock_create_execute, ): @@ -484,3 +515,269 @@ def test_semantic_tool_search_has_correct_parameters(self) -> None: assert "limit" in props assert "minScore" in props assert "connector" in props + + +class TestConnectorProperty: + """Tests for StackOneTool.connector property.""" + + def test_connector_extracts_from_name(self) -> None: + """Test that connector is extracted from tool name.""" + from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters + + execute_config = ExecuteConfig( + name="bamboohr_create_employee", + method="POST", + url="https://api.example.com", + headers={}, + ) + tool = StackOneTool( + description="Creates employee", + parameters=ToolParameters(type="object", properties={}), + _execute_config=execute_config, + _api_key="test-key", + ) + + assert tool.connector == "bamboohr" + + def test_connector_is_lowercase(self) -> None: + """Test that connector is always lowercase.""" + from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters + + execute_config = ExecuteConfig( + name="BambooHR_Create_Employee", + method="POST", + url="https://api.example.com", + headers={}, + ) + tool = StackOneTool( + description="Creates employee", + parameters=ToolParameters(type="object", properties={}), + _execute_config=execute_config, + _api_key="test-key", + ) + + assert tool.connector == "bamboohr" + + def test_connector_with_single_word_name(self) -> None: + """Test connector extraction with single-word tool name.""" + from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters + + execute_config = ExecuteConfig( + name="utility", + method="POST", + url="https://api.example.com", + headers={}, + ) + tool = StackOneTool( + description="Utility tool", + parameters=ToolParameters(type="object", properties={}), + _execute_config=execute_config, + _api_key="test-key", + ) + + assert tool.connector == "utility" + + +class TestToolsConnectorHelpers: + """Tests for Tools.get_connectors() and filter_by_connector().""" + + def test_get_connectors(self) -> None: + """Test getting unique connectors from tools collection.""" + from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters, Tools + + def make_tool(name: str) -> StackOneTool: + return StackOneTool( + description=f"Tool {name}", + parameters=ToolParameters(type="object", properties={}), + _execute_config=ExecuteConfig(name=name, method="POST", url="", headers={}), + _api_key="test-key", + ) + + tools = Tools([ + make_tool("bamboohr_create_employee"), + make_tool("bamboohr_list_employees"), + make_tool("hibob_create_employee"), + make_tool("slack_send_message"), + ]) + + connectors = tools.get_connectors() + + assert connectors == {"bamboohr", "hibob", "slack"} + + def test_get_connectors_empty(self) -> None: + """Test get_connectors with empty tools collection.""" + from stackone_ai.models import Tools + + tools = Tools([]) + assert tools.get_connectors() == set() + + def test_filter_by_connector(self) -> None: + """Test filtering tools by connector.""" + from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters, Tools + + def make_tool(name: str) -> StackOneTool: + return StackOneTool( + description=f"Tool {name}", + parameters=ToolParameters(type="object", properties={}), + _execute_config=ExecuteConfig(name=name, method="POST", url="", headers={}), + _api_key="test-key", + ) + + tools = Tools([ + make_tool("bamboohr_create_employee"), + make_tool("bamboohr_list_employees"), + make_tool("hibob_create_employee"), + make_tool("slack_send_message"), + ]) + + # Filter by single connector + bamboo_tools = tools.filter_by_connector(["bamboohr"]) + assert len(bamboo_tools) == 2 + assert all(t.connector == "bamboohr" for t in bamboo_tools) + + # Filter by multiple connectors + hr_tools = tools.filter_by_connector(["bamboohr", "hibob"]) + assert len(hr_tools) == 3 + assert all(t.connector in {"bamboohr", "hibob"} for t in hr_tools) + + def test_filter_by_connector_case_insensitive(self) -> None: + """Test that filter_by_connector is case-insensitive.""" + from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters, Tools + + tool = StackOneTool( + description="Creates employee", + parameters=ToolParameters(type="object", properties={}), + _execute_config=ExecuteConfig( + name="bamboohr_create_employee", method="POST", url="", headers={} + ), + _api_key="test-key", + ) + tools = Tools([tool]) + + # Should match regardless of case + assert len(tools.filter_by_connector(["BambooHR"])) == 1 + assert len(tools.filter_by_connector(["BAMBOOHR"])) == 1 + assert len(tools.filter_by_connector(["bamboohr"])) == 1 + + def test_filter_by_connector_returns_new_tools(self) -> None: + """Test that filter_by_connector returns a new Tools instance.""" + from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters, Tools + + tool = StackOneTool( + description="Creates employee", + parameters=ToolParameters(type="object", properties={}), + _execute_config=ExecuteConfig( + name="bamboohr_create_employee", method="POST", url="", headers={} + ), + _api_key="test-key", + ) + tools = Tools([tool]) + + filtered = tools.filter_by_connector(["bamboohr"]) + + assert filtered is not tools + assert isinstance(filtered, Tools) + + +class TestSearchActionNamesWithAvailableConnectors: + """Tests for search_action_names with available_connectors parameter.""" + + @patch.object(SemanticSearchClient, "search") + def test_filters_by_available_connectors(self, mock_search: MagicMock) -> None: + """Test that results are filtered by available connectors.""" + from stackone_ai import StackOneToolSet + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="bamboohr_create_employee", + connector_key="bamboohr", + similarity_score=0.95, + label="Create Employee", + description="Creates employee", + ), + SemanticSearchResult( + action_name="workday_create_worker", + connector_key="workday", + similarity_score=0.90, + label="Create Worker", + description="Creates worker", + ), + SemanticSearchResult( + action_name="hibob_create_employee", + connector_key="hibob", + similarity_score=0.85, + label="Create Employee", + description="Creates employee", + ), + ], + total_count=3, + query="create employee", + ) + + toolset = StackOneToolSet(api_key="test-key") + results = toolset.search_action_names( + "create employee", + available_connectors={"bamboohr", "hibob"}, + top_k=10, + ) + + # workday should be filtered out + assert len(results) == 2 + action_names = [r.action_name for r in results] + assert "bamboohr_create_employee" in action_names + assert "hibob_create_employee" in action_names + assert "workday_create_worker" not in action_names + + @patch.object(SemanticSearchClient, "search") + def test_over_fetches_when_filtering(self, mock_search: MagicMock) -> None: + """Test that API is called with 3x top_k when filtering by connectors.""" + from stackone_ai import StackOneToolSet + + mock_search.return_value = SemanticSearchResponse( + results=[], + total_count=0, + query="test", + ) + + toolset = StackOneToolSet(api_key="test-key") + toolset.search_action_names( + "test", + available_connectors={"bamboohr"}, + top_k=5, + ) + + # Should over-fetch by 3x + mock_search.assert_called_once() + call_kwargs = mock_search.call_args.kwargs + assert call_kwargs["top_k"] == 15 # 5 * 3 + + @patch.object(SemanticSearchClient, "search") + def test_respects_top_k_after_filtering(self, mock_search: MagicMock) -> None: + """Test that results are limited to top_k after filtering.""" + from stackone_ai import StackOneToolSet + + # Return more results than top_k + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name=f"bamboohr_action_{i}", + connector_key="bamboohr", + similarity_score=0.9 - i * 0.1, + label=f"Action {i}", + description=f"Action {i}", + ) + for i in range(10) + ], + total_count=10, + query="test", + ) + + toolset = StackOneToolSet(api_key="test-key") + results = toolset.search_action_names( + "test", + available_connectors={"bamboohr"}, + top_k=3, + ) + + assert len(results) == 3 From 736e68f0cfbe07f08c2cadd51e268c71b047974a Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Thu, 5 Feb 2026 16:23:14 +0000 Subject: [PATCH 03/25] Use the local benchmark from the ai-generations --- tests/benchmark_search.py | 211 ++++++++++++++++++++++++++++++++++---- 1 file changed, 192 insertions(+), 19 deletions(-) diff --git a/tests/benchmark_search.py b/tests/benchmark_search.py index f69e9a3..4910bf0 100644 --- a/tests/benchmark_search.py +++ b/tests/benchmark_search.py @@ -6,21 +6,135 @@ - Semantic Search: ~84% Hit@5 - Improvement: 4x -Run with: +Run with production API: STACKONE_API_KEY=xxx python tests/benchmark_search.py + +Run with local Lambda (ai-generation/apps/action_search): + # First, start the local Lambda: + # cd ai-generation/apps/action_search && make run-local + # Then run benchmark: + python tests/benchmark_search.py --local + +Environment Variables: + STACKONE_API_KEY: Required for production mode + LOCAL_LAMBDA_URL: Optional, defaults to http://localhost:4513/2015-03-31/functions/function/invocations """ from __future__ import annotations +import argparse import os import time from dataclasses import dataclass, field -from typing import Literal +from typing import Any, Literal, Protocol + +import httpx from stackone_ai import StackOneToolSet -from stackone_ai.semantic_search import SemanticSearchClient +from stackone_ai.semantic_search import SemanticSearchClient, SemanticSearchResponse, SemanticSearchResult from stackone_ai.utility_tools import ToolIndex +# Default local Lambda URL (from ai-generation/apps/action_search docker-compose) +DEFAULT_LOCAL_LAMBDA_URL = "http://localhost:4513/2015-03-31/functions/function/invocations" + + +class SearchClientProtocol(Protocol): + """Protocol for search clients (production or local).""" + + def search( + self, + query: str, + connector: str | None = None, + top_k: int = 10, + ) -> SemanticSearchResponse: ... + + +class LocalLambdaSearchClient: + """Client for local action_search Lambda. + + This client connects to the local Lambda running via docker-compose + from ai-generation/apps/action_search. + + Usage: + # Start local Lambda first: + # cd ai-generation/apps/action_search && make run-local + + client = LocalLambdaSearchClient() + response = client.search("create employee", connector="bamboohr", top_k=5) + """ + + def __init__( + self, + lambda_url: str = DEFAULT_LOCAL_LAMBDA_URL, + timeout: float = 30.0, + ) -> None: + """Initialize the local Lambda client. + + Args: + lambda_url: URL of the local Lambda endpoint + timeout: Request timeout in seconds + """ + self.lambda_url = lambda_url + self.timeout = timeout + + def search( + self, + query: str, + connector: str | None = None, + top_k: int = 10, + ) -> SemanticSearchResponse: + """Search for relevant actions using local Lambda. + + Args: + query: Natural language query + connector: Optional connector filter + top_k: Maximum number of results + + Returns: + SemanticSearchResponse with matching actions + """ + # Lambda event envelope format + payload: dict[str, Any] = { + "type": "search", + "payload": { + "query": query, + "top_k": top_k, + }, + } + if connector: + payload["payload"]["connector"] = connector + + try: + response = httpx.post( + self.lambda_url, + json=payload, + headers={"Content-Type": "application/json"}, + timeout=self.timeout, + ) + response.raise_for_status() + data = response.json() + + # Convert Lambda response to SemanticSearchResponse + results = [ + SemanticSearchResult( + action_name=r.get("action_name", ""), + connector_key=r.get("connector_key", ""), + similarity_score=r.get("similarity_score", 0.0), + label=r.get("label", ""), + description=r.get("description", ""), + ) + for r in data.get("results", []) + ] + return SemanticSearchResponse( + results=results, + total_count=data.get("total_count", len(results)), + query=data.get("query", query), + ) + except httpx.RequestError as e: + raise RuntimeError(f"Local Lambda request failed: {e}") from e + except Exception as e: + raise RuntimeError(f"Local Lambda search failed: {e}") from e + @dataclass class EvaluationTask: @@ -778,19 +892,17 @@ class SearchBenchmark: def __init__( self, tools: list, - api_key: str, - base_url: str = "https://api.stackone.com", + semantic_client: SearchClientProtocol, ): - """Initialize benchmark with tools and API credentials. + """Initialize benchmark with tools and search client. Args: tools: List of StackOneTool instances to search - api_key: StackOne API key for semantic search - base_url: Base URL for API requests + semantic_client: Client for semantic search (production or local) """ self.tools = tools self.local_index = ToolIndex(tools) - self.semantic_client = SemanticSearchClient(api_key=api_key, base_url=base_url) + self.semantic_client = semantic_client def evaluate_local( self, @@ -942,22 +1054,38 @@ def print_report(report: ComparisonReport) -> None: print(f" ... and {len(failed_local) - 10} more") -def run_benchmark(api_key: str | None = None, base_url: str = "https://api.stackone.com") -> ComparisonReport: +def run_benchmark( + api_key: str | None = None, + base_url: str = "https://api.stackone.com", + use_local: bool = False, + local_lambda_url: str = DEFAULT_LOCAL_LAMBDA_URL, +) -> ComparisonReport: """Run the full benchmark comparison. Args: api_key: StackOne API key (uses STACKONE_API_KEY env var if not provided) - base_url: Base URL for API requests + base_url: Base URL for production API requests + use_local: If True, use local Lambda instead of production API + local_lambda_url: URL of local Lambda endpoint Returns: ComparisonReport with results Raises: - ValueError: If no API key is available + ValueError: If no API key is available (production mode only) """ - api_key = api_key or os.environ.get("STACKONE_API_KEY") - if not api_key: - raise ValueError("API key must be provided or set via STACKONE_API_KEY environment variable") + # Create semantic search client based on mode + if use_local: + print(f"Using LOCAL Lambda at: {local_lambda_url}") + semantic_client: SearchClientProtocol = LocalLambdaSearchClient(lambda_url=local_lambda_url) + # For local mode, we still need API key for toolset but can use a dummy if not set + api_key = api_key or os.environ.get("STACKONE_API_KEY") or "local-testing" + else: + api_key = api_key or os.environ.get("STACKONE_API_KEY") + if not api_key: + raise ValueError("API key must be provided or set via STACKONE_API_KEY environment variable") + print(f"Using PRODUCTION API at: {base_url}") + semantic_client = SemanticSearchClient(api_key=api_key, base_url=base_url) print("Initializing toolset...") toolset = StackOneToolSet(api_key=api_key, base_url=base_url) @@ -967,7 +1095,7 @@ def run_benchmark(api_key: str | None = None, base_url: str = "https://api.stack print(f"Loaded {len(tools)} tools") print(f"\nRunning benchmark with {len(EVALUATION_TASKS)} evaluation tasks...") - benchmark = SearchBenchmark(list(tools), api_key=api_key, base_url=base_url) + benchmark = SearchBenchmark(list(tools), semantic_client=semantic_client) report = benchmark.compare() print_report(report) @@ -975,13 +1103,58 @@ def run_benchmark(api_key: str | None = None, base_url: str = "https://api.stack return report -if __name__ == "__main__": +def main() -> None: + """Main entry point with CLI argument parsing.""" + parser = argparse.ArgumentParser( + description="Benchmark comparing local BM25+TF-IDF vs semantic search", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Run with production API + STACKONE_API_KEY=xxx python tests/benchmark_search.py + + # Run with local Lambda (start it first: cd ai-generation/apps/action_search && make run-local) + python tests/benchmark_search.py --local + + # Run with custom local Lambda URL + python tests/benchmark_search.py --local --lambda-url http://localhost:9000/invoke + """, + ) + parser.add_argument( + "--local", + action="store_true", + help="Use local Lambda instead of production API", + ) + parser.add_argument( + "--lambda-url", + default=DEFAULT_LOCAL_LAMBDA_URL, + help=f"Local Lambda URL (default: {DEFAULT_LOCAL_LAMBDA_URL})", + ) + parser.add_argument( + "--api-url", + default="https://api.stackone.com", + help="Production API base URL", + ) + + args = parser.parse_args() + try: - run_benchmark() + run_benchmark( + base_url=args.api_url, + use_local=args.local, + local_lambda_url=args.lambda_url, + ) except ValueError as e: print(f"Error: {e}") - print("Set STACKONE_API_KEY environment variable or pass api_key parameter") + print("Set STACKONE_API_KEY environment variable or use --local flag") exit(1) except Exception as e: print(f"Benchmark failed: {e}") + import traceback + + traceback.print_exc() exit(1) + + +if __name__ == "__main__": + main() From 4d3deca3a37fe04ed91099df14c1dbb2c26f4ef4 Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Fri, 6 Feb 2026 13:45:27 +0000 Subject: [PATCH 04/25] Add Semantinc search bench mark with local benchmarks --- README.md | 28 +++ examples/demo_semantic_search.py | 310 +++++++++++++++++++++++++++++++ tests/BENCHMARK_RESULTS.md | 151 +++++++++++++++ tests/benchmark_search.py | 197 +++++++++++++++----- 4 files changed, 636 insertions(+), 50 deletions(-) create mode 100644 examples/demo_semantic_search.py create mode 100644 tests/BENCHMARK_RESULTS.md diff --git a/README.md b/README.md index 7e26dbd..ec247eb 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ StackOne AI provides a unified interface for accessing various SaaS tools throug - Glob pattern filtering with patterns like `"hris_*"` and exclusions `"!hris_delete_*"` - Provider and action filtering - Multi-account support +- **Semantic Search**: AI-powered tool discovery using natural language (76.6% Hit@5 vs 66.0% for keyword search) - **Utility Tools** (Beta): Dynamic tool discovery and execution based on natural language queries - Integration with popular AI frameworks: - OpenAI Functions @@ -325,6 +326,33 @@ execute_tool = utility_tools.get_tool("tool_execute") result = execute_tool.call(toolName="hris_list_employees", params={"limit": 10}) ``` +## Semantic Search + +Search across 9,000+ actions using natural language instead of exact keyword matching. + +```python +from stackone_ai import StackOneToolSet + +toolset = StackOneToolSet() + +# Find tools using natural language +tools = toolset.search_tools("onboard a new team member", top_k=5) +# Returns: create_employee, invite_employee, ... + +# Filter by connector +tools = toolset.search_tools("send a message", connector="slack", top_k=3) +``` + +Semantic search understands intent and synonyms, so queries like "onboard a new team member", "check my to-do list", or "file a bug" return the right actions even when no keywords match. + +It can also power the `tool_search` utility tool for AI agents: + +```python +tools = toolset.fetch_tools(account_ids=["your-account-id"]) +utility = tools.utility_tools(use_semantic_search=True) +# AI agent gets semantic-powered tool_search + tool_execute +``` + ## Examples For more examples, check out the [examples/](examples/) directory: diff --git a/examples/demo_semantic_search.py b/examples/demo_semantic_search.py new file mode 100644 index 0000000..32b14f7 --- /dev/null +++ b/examples/demo_semantic_search.py @@ -0,0 +1,310 @@ +""" +Semantic Search Demo - Local BM25 vs Semantic Search + +Demonstrates how semantic search understands natural language intent +while local keyword search fails on synonyms and colloquial queries. + +Run with local Lambda: + cd ai-generation/apps/action_search && make run-local + uv run python examples/demo_semantic_search.py --local + +Run with production API: + STACKONE_API_KEY=xxx uv run python examples/demo_semantic_search.py +""" + +from __future__ import annotations + +import argparse +import os +import time +from dataclasses import dataclass +from typing import Any + +import httpx + +from stackone_ai.semantic_search import ( + SemanticSearchClient, + SemanticSearchResponse, + SemanticSearchResult, +) +from stackone_ai.utility_tools import ToolIndex + +# Local Lambda URL +DEFAULT_LAMBDA_URL = "http://localhost:4513/2015-03-31/functions/function/invocations" + +# Demo queries - the strongest "wow" moments from benchmark results +DEMO_QUERIES = [ + { + "query": "fire someone", + "why": "Synonym: 'fire' = terminate employment", + }, + { + "query": "ping the team", + "why": "Intent: 'ping' = send a message", + }, + { + "query": "file a new bug", + "why": "Intent: 'file a bug' = create issue (not file operations)", + }, + { + "query": "check my to-do list", + "why": "Concept: 'to-do list' = list tasks", + }, + { + "query": "show me everyone in the company", + "why": "Synonym: 'everyone in company' = list employees", + }, + { + "query": "turn down a job seeker", + "why": "Synonym: 'turn down' = reject application", + }, + { + "query": "approve PTO", + "why": "Abbreviation: 'PTO' = paid time off request", + }, + { + "query": "grab that spreadsheet", + "why": "Colloquial: 'grab' = download file", + }, +] + + +@dataclass +class LightweightTool: + """Minimal tool for BM25 indexing.""" + + name: str + description: str + + +class LocalLambdaClient: + """Client for local action_search Lambda.""" + + def __init__(self, url: str = DEFAULT_LAMBDA_URL) -> None: + self.url = url + + def search( + self, + query: str, + connector: str | None = None, + top_k: int = 5, + ) -> SemanticSearchResponse: + payload: dict[str, Any] = { + "type": "search", + "payload": {"query": query, "top_k": top_k}, + } + if connector: + payload["payload"]["connector"] = connector + + resp = httpx.post(self.url, json=payload, timeout=30.0) + resp.raise_for_status() + data = resp.json() + + results = [ + SemanticSearchResult( + action_name=r.get("action_name", ""), + connector_key=r.get("connector_key", ""), + similarity_score=r.get("similarity_score", 0.0), + label=r.get("label", ""), + description=r.get("description", ""), + ) + for r in data.get("results", []) + ] + return SemanticSearchResponse( + results=results, + total_count=data.get("total_count", len(results)), + query=data.get("query", query), + ) + + def fetch_actions(self) -> list[LightweightTool]: + """Fetch broad action catalog for BM25 index.""" + seen: dict[str, LightweightTool] = {} + for q in ["employee", "candidate", "contact", "task", "message", "file", "event", "deal"]: + try: + resp = httpx.post( + self.url, + json={"type": "search", "payload": {"query": q, "top_k": 500}}, + timeout=30.0, + ) + for r in resp.json().get("results", []): + name = r.get("action_name", "") + if name and name not in seen: + seen[name] = LightweightTool(name=name, description=r.get("description", "")) + except Exception: + continue + return list(seen.values()) + + +def shorten_name(name: str) -> str: + """Shorten action name for display. + + bamboohr_1.0.0_bamboohr_list_employees_global -> bamboohr: list_employees + """ + parts = name.split("_") + # Find version segment (e.g., "1.0.0") and split around it + version_idx = None + for i, p in enumerate(parts): + if "." in p and any(c.isdigit() for c in p): + version_idx = i + break + + if version_idx is not None: + connector = parts[0] + # Skip connector + version + repeated connector prefix + action_parts = parts[version_idx + 1 :] + # Remove leading connector name if repeated + if action_parts and action_parts[0].lower().replace("-", "") == connector.lower().replace("-", ""): + action_parts = action_parts[1:] + # Remove trailing 'global' + if action_parts and action_parts[-1] == "global": + action_parts = action_parts[:-1] + action = "_".join(action_parts) + return f"{connector}: {action}" + + return name + + +def print_header(text: str) -> None: + print(f"\n{'=' * 70}") + print(f" {text}") + print(f"{'=' * 70}") + + +def print_section(text: str) -> None: + print(f"\n--- {text} ---\n") + + +def run_demo(use_local: bool, lambda_url: str, api_key: str | None) -> None: + # Step 1: Setup + if use_local: + client = LocalLambdaClient(url=lambda_url) + semantic_search = client.search + else: + if not api_key: + print("Error: STACKONE_API_KEY required for production mode") + print("Use --local flag for local Lambda mode") + exit(1) + sem_client = SemanticSearchClient(api_key=api_key) + semantic_search = sem_client.search + client = None + + print_header("SEMANTIC SEARCH DEMO") + print("\n Comparing Local BM25+TF-IDF vs Semantic Search") + print(" across 5,144 actions from 200+ connectors\n") + + # Step 2: Build local BM25 index + print(" Loading action catalog for local BM25 index...") + if use_local: + tools = client.fetch_actions() + else: + # For production mode, use semantic search to build catalog + local_client = LocalLambdaClient(url=lambda_url) + tools = local_client.fetch_actions() + + local_index = ToolIndex(tools) # type: ignore[arg-type] + print(f" Indexed {len(tools)} actions\n") + + input(" Press Enter to start the demo...\n") + + # Step 3: Side-by-side comparison + print_header("SIDE-BY-SIDE COMPARISON") + + local_hits = 0 + semantic_hits = 0 + + for i, demo in enumerate(DEMO_QUERIES, 1): + query = demo["query"] + why = demo["why"] + + print(f"\n [{i}/{len(DEMO_QUERIES)}] Query: \"{query}\"") + print(f" Why interesting: {why}") + print() + + # Local search + start = time.perf_counter() + local_results = local_index.search(query, limit=3) + local_ms = (time.perf_counter() - start) * 1000 + local_names = [shorten_name(r.name) for r in local_results] + + # Semantic search + start = time.perf_counter() + sem_response = semantic_search(query=query, top_k=3) + sem_ms = (time.perf_counter() - start) * 1000 + sem_names = [shorten_name(r.action_name) for r in sem_response.results] + sem_scores = [f"{r.similarity_score:.2f}" for r in sem_response.results] + + # Display + w = 38 + print(f" {'Local BM25 (keyword)':<{w}} | {'Semantic Search (AI)':<{w}}") + print(f" {f'{local_ms:.1f}ms':<{w}} | {f'{sem_ms:.1f}ms':<{w}}") + print(f" {'-' * w} | {'-' * w}") + for j in range(min(3, max(len(local_names), len(sem_names)))): + l_name = local_names[j] if j < len(local_names) else "" + s_name = sem_names[j] if j < len(sem_names) else "" + s_score = sem_scores[j] if j < len(sem_scores) else "" + l_display = f" {l_name[:w]:<{w}}" + s_display = f" {s_name[:w - 8]:<{w - 8}} ({s_score})" if s_name else "" + print(f"{l_display} |{s_display}") + + input("\n Press Enter for next query...") + + # Step 4: Summary + print_header("BENCHMARK RESULTS (94 evaluation tasks)") + + print(""" + Method Hit@5 MRR Avg Latency + ---------------------------------------------------------- + Local BM25+TF-IDF 66.0% 0.538 1.2ms + Semantic Search 76.6% 0.634 279.6ms + ---------------------------------------------------------- + Improvement +10.6% +0.096 + """) + + # Step 5: Code examples + print_header("DEVELOPER API") + + print(""" + # 1. Direct semantic search + from stackone_ai import StackOneToolSet + + toolset = StackOneToolSet(api_key="xxx") + tools = toolset.search_tools("fire someone", top_k=5) + # Returns: terminate_employee, offboard_employee, ... + + + # 2. Semantic search with connector filter + tools = toolset.search_tools( + "send a message", + connector="slack", + top_k=3, + ) + # Returns: slack_send_message, slack_create_conversation, ... + + + # 3. MCP utility tool (for AI agents) + tools = toolset.fetch_tools() + utility = tools.utility_tools(use_semantic_search=True) + # AI agent gets: tool_search (semantic-powered) + tool_execute + + + # 4. Inspect results before fetching + results = toolset.search_action_names("onboard new hire") + for r in results: + print(f"{r.action_name}: {r.similarity_score:.2f}") + """) + + print_header("END OF DEMO") + + +def main() -> None: + parser = argparse.ArgumentParser(description="Semantic Search Demo") + parser.add_argument("--local", action="store_true", help="Use local Lambda") + parser.add_argument("--lambda-url", default=DEFAULT_LAMBDA_URL, help="Lambda URL") + args = parser.parse_args() + + api_key = os.environ.get("STACKONE_API_KEY") + run_demo(use_local=args.local, lambda_url=args.lambda_url, api_key=api_key) + + +if __name__ == "__main__": + main() diff --git a/tests/BENCHMARK_RESULTS.md b/tests/BENCHMARK_RESULTS.md new file mode 100644 index 0000000..0071a21 --- /dev/null +++ b/tests/BENCHMARK_RESULTS.md @@ -0,0 +1,151 @@ +# Search Benchmark Results + +## Local BM25+TF-IDF vs Semantic Search + +**Date:** 2025-02-06 +**Dataset:** 94 evaluation tasks across 8 categories +**Corpus:** 5,144 actions from 200+ connectors +**Metric:** Hit@5 (correct action in top 5 results) + +## Summary + +| Method | Hit@5 | MRR | Avg Latency | Hits | +|--------|-------|-----|-------------|------| +| Local BM25+TF-IDF | 66.0% | 0.538 | 1.2ms | 62/94 | +| Semantic Search | 76.6% | 0.634 | 279.6ms | 72/94 | +| **Improvement** | **+10.6%** | **+0.096** | | **+10** | + +## Detailed Breakdown + +### Semantic Wins (17 tasks) + +Tasks where semantic search finds the correct result but local BM25 fails. +These demonstrate semantic search's ability to understand **intent and synonyms**. + +| Query | Local Top Result | Semantic Top Result | +|-------|-----------------|-------------------| +| "fire someone" | workable_get_job_recruiters | factorial_terminate_employee | +| "ping the team" | teamtailor_delete_team | slack_send_message | +| "file a new bug" | github_create_or_update_file | jira_update_issue | +| "ping my colleague" | salesforce_get_my_events | microsoftoutlook_reply_message | +| "fetch staff information" | pinpoint_get_application | workday_list_workers | +| "show me everyone in the company" | humaans_get_me | lattice_talent_list_users | +| "turn down a job seeker" | pinpoint_get_job_seeker | jobadder_reject_requisition | +| "check application status" | dropbox_check_remove_member | jobadder_list_application_status | +| "check my to-do list" | jira_check_bulk_permissions | todoist_list_tasks | +| "start a group chat" | microsoftteams_update_chat | discord_create_group_dm | +| "move candidate forward" | workable_move_candidate | greenhouse_move_application | +| "approve PTO" | ashby_approve_offer | planday_approve_absence_request | +| "update staff record" | bamboohr_update_hour_record | cezannehr_update_employee | +| "pull the org chart" | github_create_issue_comment | lattice_list_review_cycles | +| "assign training to employee" | easyllama_assign_training | hibob_create_training_record | +| "file a bug report" | smartrecruiters_get_report_file | github_create_issue_comment | +| "track customer interaction" | qlik_create_interaction | peoplefluent_track_launch | + +### Local Wins (7 tasks) + +Tasks where BM25 keyword matching outperforms semantic search. + +| Query | Local Top Result | Semantic Top Result | +|-------|-----------------|-------------------| +| "see who applied for the role" | greenhouse_list_applied_candidate_tags | ashby_add_hiring_team_member | +| "advance someone to the next round" | greenhouse_move_application | factorial_invite_employee | +| "see open positions" | teamtailor_list_jobs | hibob_create_position_opening | +| "close a deal" | zohocrm_get_deal | shopify_close_order | +| "check course completion" | saba_delete_recurring_completion | saba_get_course | +| "update deal and notify team" | zohocrm_get_deal | microsoftteams_update_team | +| "look up customer" | linear_update_customer_need | shopify_search_customers | + +### Both Miss (15 tasks) + +Hard queries that neither method handles well. Many are abbreviations, cross-domain concepts, or have overly strict expected matches. + +| Query | Category | Why Hard | +|-------|----------|----------| +| "onboard a new team member" | hr | "team member" maps to team tools, not HR | +| "OOO" | hr | Abbreviation - neither understands | +| "DM someone" | messaging | Both find discord_create_dm but expected pattern too strict | +| "customer onboarding" | crm | Cross-domain concept | +| "close quarter books" | crm | Domain-specific financial term | +| "PTO request" | hr | Both find PTO tools but expected pattern mismatch | +| "kill the ticket" | project | Both find delete_ticket but expected pattern mismatch | +| "who works in engineering" | hr | Requires department filtering, not just listing | +| "add a new prospect" | crm | Both find prospect tools but connector mismatch | +| "see all shared files" | documents | "shared" narrows scope too much | +| "see available trainings" | lms | Both find training tools but pattern mismatch | +| "track learning progress" | lms | Abstract concept mapping | +| "create team workspace" | messaging | Cross-domain: workspace vs channel | +| "log customer call" | crm | Connector-specific (Salesforce) term | +| "add new lead" | crm | Connector-specific (HubSpot) but returns wrong HubSpot actions | + +## How to Run + +### Local Mode (recommended for development) + +Requires the action_search Lambda running locally: + +```bash +# Terminal 1: Start the Lambda +cd ai-generation/apps/action_search +cp .env.example .env +# Edit .env: set USE_LOCAL_STORE=false and TURBOPUFFER_API_KEY=tpuf_xxx +make run-local + +# Terminal 2: Run benchmark +cd stackone-ai-python +uv run python tests/benchmark_search.py --local +``` + +### Production Mode + +```bash +STACKONE_API_KEY=xxx uv run python tests/benchmark_search.py +``` + +### CLI Options + +``` +--local Use local Lambda instead of production API +--lambda-url URL Custom Lambda URL (default: localhost:4513) +--api-url URL Custom production API URL +``` + +## Methodology + +### Evaluation Tasks + +94 tasks across 8 categories: + +| Category | Tasks | Description | +|----------|-------|-------------| +| HR/HRIS | 19 | Employee management, time off, org structure | +| Recruiting/ATS | 12 | Candidates, applications, interviews | +| CRM | 12 | Contacts, deals, accounts | +| Project Management | 8 | Tasks, issues, projects | +| Messaging | 5 | Messages, channels, conversations | +| Documents | 5 | Files, folders, drives | +| Marketing | 5 | Campaigns, lists, automation | +| LMS | 5 | Courses, assignments, completions | + +Plus per-connector tests (Slack, Jira, Greenhouse, Salesforce, HubSpot) and edge cases (abbreviations, slang, complex queries). + +### Matching Logic + +- **Hit@5**: At least one expected pattern appears (case-insensitive partial match) in the top 5 results +- **MRR** (Mean Reciprocal Rank): 1/position of first correct result, averaged across all tasks +- **Fair comparison**: Both methods search the same 5,144-action corpus + +### Corpus + +Both local and semantic search operate on the same action catalog: +- 5,144 unique actions +- 200+ connectors (BambooHR, Greenhouse, Salesforce, Slack, Jira, etc.) +- 7 verticals (HRIS, ATS, CRM, Documents, IAM, LMS, Marketing) + +## Conclusions + +1. **Semantic search improves accuracy by +10.6%** (66.0% -> 76.6% Hit@5) +2. **Semantic excels at intent understanding**: "fire someone" -> terminate, "ping the team" -> send_message +3. **Local BM25 is competitive** when queries contain exact keywords from tool names +4. **15 tasks need better evaluation criteria** - some "misses" are actually correct results with overly strict expected patterns +5. **Latency tradeoff**: Local is ~230x faster (1.2ms vs 280ms) but runs in-memory with pre-built index diff --git a/tests/benchmark_search.py b/tests/benchmark_search.py index 4910bf0..5f09a1a 100644 --- a/tests/benchmark_search.py +++ b/tests/benchmark_search.py @@ -30,7 +30,6 @@ import httpx -from stackone_ai import StackOneToolSet from stackone_ai.semantic_search import SemanticSearchClient, SemanticSearchResponse, SemanticSearchResult from stackone_ai.utility_tools import ToolIndex @@ -77,6 +76,35 @@ def __init__( self.lambda_url = lambda_url self.timeout = timeout + def _invoke(self, event: dict[str, Any]) -> dict[str, Any]: + """Invoke the local Lambda with an event payload.""" + response = httpx.post( + self.lambda_url, + json=event, + headers={"Content-Type": "application/json"}, + timeout=self.timeout, + ) + response.raise_for_status() + return response.json() + + def _parse_results(self, data: dict[str, Any], query: str) -> SemanticSearchResponse: + """Parse Lambda response into SemanticSearchResponse.""" + results = [ + SemanticSearchResult( + action_name=r.get("action_name", ""), + connector_key=r.get("connector_key", ""), + similarity_score=r.get("similarity_score", 0.0), + label=r.get("label", ""), + description=r.get("description", ""), + ) + for r in data.get("results", []) + ] + return SemanticSearchResponse( + results=results, + total_count=data.get("total_count", len(results)), + query=data.get("query", query), + ) + def search( self, query: str, @@ -93,48 +121,83 @@ def search( Returns: SemanticSearchResponse with matching actions """ - # Lambda event envelope format payload: dict[str, Any] = { "type": "search", - "payload": { - "query": query, - "top_k": top_k, - }, + "payload": {"query": query, "top_k": top_k}, } if connector: payload["payload"]["connector"] = connector try: - response = httpx.post( - self.lambda_url, - json=payload, - headers={"Content-Type": "application/json"}, - timeout=self.timeout, - ) - response.raise_for_status() - data = response.json() - - # Convert Lambda response to SemanticSearchResponse - results = [ - SemanticSearchResult( - action_name=r.get("action_name", ""), - connector_key=r.get("connector_key", ""), - similarity_score=r.get("similarity_score", 0.0), - label=r.get("label", ""), - description=r.get("description", ""), - ) - for r in data.get("results", []) - ] - return SemanticSearchResponse( - results=results, - total_count=data.get("total_count", len(results)), - query=data.get("query", query), - ) + data = self._invoke(payload) + return self._parse_results(data, query) except httpx.RequestError as e: raise RuntimeError(f"Local Lambda request failed: {e}") from e except Exception as e: raise RuntimeError(f"Local Lambda search failed: {e}") from e + def fetch_all_actions(self) -> list[SemanticSearchResult]: + """Fetch a broad set of actions from the Lambda for building local BM25 index. + + Uses multiple broad queries with high top_k to collect the full action catalog. + This avoids needing the /mcp endpoint or STACKONE_API_KEY for benchmarking. + + Returns: + Deduplicated list of all available actions + """ + broad_queries = [ + "employee", + "candidate", + "contact", + "task", + "message", + "file", + "user", + "event", + "campaign", + "course", + "deal", + "account", + "job", + "interview", + "department", + "time off", + "comment", + "project", + "folder", + "role", + ] + + seen: dict[str, SemanticSearchResult] = {} + for query in broad_queries: + try: + data = self._invoke({ + "type": "search", + "payload": {"query": query, "top_k": 500}, + }) + for r in data.get("results", []): + name = r.get("action_name", "") + if name and name not in seen: + seen[name] = SemanticSearchResult( + action_name=name, + connector_key=r.get("connector_key", ""), + similarity_score=r.get("similarity_score", 0.0), + label=r.get("label", ""), + description=r.get("description", ""), + ) + except Exception: + continue + + return list(seen.values()) + + +@dataclass +class LightweightTool: + """Minimal tool representation for BM25 indexing (no API dependency).""" + + name: str + description: str + @dataclass class EvaluationTask: @@ -897,11 +960,12 @@ def __init__( """Initialize benchmark with tools and search client. Args: - tools: List of StackOneTool instances to search + tools: List of tool objects (StackOneTool or LightweightTool) with name + description semantic_client: Client for semantic search (production or local) """ self.tools = tools - self.local_index = ToolIndex(tools) + # ToolIndex uses duck typing - only needs .name and .description + self.local_index = ToolIndex(tools) # type: ignore[arg-type] self.semantic_client = semantic_client def evaluate_local( @@ -1043,15 +1107,41 @@ def print_report(report: ComparisonReport) -> None: print(f"{'Improvement':<25} {report.improvement:>+10.1%}") print("=" * 70) - # Show failed tasks for local search + # Build lookup maps + local_by_id = {r.task_id: r for r in report.local_results.results} + semantic_by_id = {r.task_id: r for r in report.semantic_results.results} + failed_local = [r for r in report.local_results.results if not r.hit] - if failed_local and len(failed_local) <= 20: - print(f"\nLocal search missed ({len(failed_local)} tasks):") - for r in failed_local[:10]: + failed_semantic = [r for r in report.semantic_results.results if not r.hit] + + # Tasks semantic gets right but local misses (the value semantic adds) + semantic_wins = [r for r in failed_local if semantic_by_id.get(r.task_id, r).hit] + # Tasks local gets right but semantic misses + local_wins = [r for r in failed_semantic if local_by_id.get(r.task_id, r).hit] + # Tasks both miss + both_miss = [r for r in failed_local if not semantic_by_id.get(r.task_id, r).hit] + + print(f"\n{'SEMANTIC WINS':} ({len(semantic_wins)} tasks - semantic gets right, local misses):") + for r in semantic_wins: + sr = semantic_by_id[r.task_id] + print(f" - {r.task_id}: '{r.query}'") + print(f" Local got: {r.top_results[:3]}") + print(f" Semantic got: {sr.top_results[:3]}") + + if local_wins: + print(f"\n{'LOCAL WINS':} ({len(local_wins)} tasks - local gets right, semantic misses):") + for r in local_wins: + lr = local_by_id[r.task_id] print(f" - {r.task_id}: '{r.query}'") - print(f" Got: {r.top_results[:3]}") - if len(failed_local) > 10: - print(f" ... and {len(failed_local) - 10} more") + print(f" Local got: {lr.top_results[:3]}") + print(f" Semantic got: {r.top_results[:3]}") + + print(f"\n{'BOTH MISS':} ({len(both_miss)} tasks):") + for r in both_miss: + sr = semantic_by_id[r.task_id] + print(f" - {r.task_id}: '{r.query}'") + print(f" Local got: {r.top_results[:3]}") + print(f" Semantic got: {sr.top_results[:3]}") def run_benchmark( @@ -1074,12 +1164,17 @@ def run_benchmark( Raises: ValueError: If no API key is available (production mode only) """ - # Create semantic search client based on mode + # Create semantic search client and load tools based on mode if use_local: print(f"Using LOCAL Lambda at: {local_lambda_url}") - semantic_client: SearchClientProtocol = LocalLambdaSearchClient(lambda_url=local_lambda_url) - # For local mode, we still need API key for toolset but can use a dummy if not set - api_key = api_key or os.environ.get("STACKONE_API_KEY") or "local-testing" + local_client = LocalLambdaSearchClient(lambda_url=local_lambda_url) + semantic_client: SearchClientProtocol = local_client + + # Fetch tool catalog from the Lambda itself (no /mcp or API key needed) + print("Fetching action catalog from local Lambda...") + actions = local_client.fetch_all_actions() + tools = [LightweightTool(name=a.action_name, description=a.description) for a in actions] + print(f"Loaded {len(tools)} actions from Lambda") else: api_key = api_key or os.environ.get("STACKONE_API_KEY") if not api_key: @@ -1087,15 +1182,17 @@ def run_benchmark( print(f"Using PRODUCTION API at: {base_url}") semantic_client = SemanticSearchClient(api_key=api_key, base_url=base_url) - print("Initializing toolset...") - toolset = StackOneToolSet(api_key=api_key, base_url=base_url) + from stackone_ai import StackOneToolSet + + print("Initializing toolset...") + toolset = StackOneToolSet(api_key=api_key, base_url=base_url) - print("Fetching tools (this may take a moment)...") - tools = toolset.fetch_tools() - print(f"Loaded {len(tools)} tools") + print("Fetching tools (this may take a moment)...") + tools = list(toolset.fetch_tools()) + print(f"Loaded {len(tools)} tools") print(f"\nRunning benchmark with {len(EVALUATION_TASKS)} evaluation tasks...") - benchmark = SearchBenchmark(list(tools), semantic_client=semantic_client) + benchmark = SearchBenchmark(tools, semantic_client=semantic_client) report = benchmark.compare() print_report(report) From 981f912bffd08c558271d9539625504a84f97368 Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Fri, 6 Feb 2026 13:59:19 +0000 Subject: [PATCH 05/25] Fix CI lint errors --- examples/demo_semantic_search.py | 5 +++-- stackone_ai/semantic_search.py | 4 +--- tests/benchmark_search.py | 10 +++++---- tests/test_semantic_search.py | 36 ++++++++++++++++---------------- 4 files changed, 28 insertions(+), 27 deletions(-) diff --git a/examples/demo_semantic_search.py b/examples/demo_semantic_search.py index 32b14f7..7dff5a0 100644 --- a/examples/demo_semantic_search.py +++ b/examples/demo_semantic_search.py @@ -184,6 +184,7 @@ def run_demo(use_local: bool, lambda_url: str, api_key: str | None) -> None: print("Error: STACKONE_API_KEY required for production mode") print("Use --local flag for local Lambda mode") exit(1) + assert api_key is not None # narrowing for type checker sem_client = SemanticSearchClient(api_key=api_key) semantic_search = sem_client.search client = None @@ -216,7 +217,7 @@ def run_demo(use_local: bool, lambda_url: str, api_key: str | None) -> None: query = demo["query"] why = demo["why"] - print(f"\n [{i}/{len(DEMO_QUERIES)}] Query: \"{query}\"") + print(f'\n [{i}/{len(DEMO_QUERIES)}] Query: "{query}"') print(f" Why interesting: {why}") print() @@ -243,7 +244,7 @@ def run_demo(use_local: bool, lambda_url: str, api_key: str | None) -> None: s_name = sem_names[j] if j < len(sem_names) else "" s_score = sem_scores[j] if j < len(sem_scores) else "" l_display = f" {l_name[:w]:<{w}}" - s_display = f" {s_name[:w - 8]:<{w - 8}} ({s_score})" if s_name else "" + s_display = f" {s_name[: w - 8]:<{w - 8}} ({s_score})" if s_name else "" print(f"{l_display} |{s_display}") input("\n Press Enter for next query...") diff --git a/stackone_ai/semantic_search.py b/stackone_ai/semantic_search.py index c9caf21..f23a9e9 100644 --- a/stackone_ai/semantic_search.py +++ b/stackone_ai/semantic_search.py @@ -108,9 +108,7 @@ def search( data = response.json() return SemanticSearchResponse(**data) except httpx.HTTPStatusError as e: - raise SemanticSearchError( - f"API error: {e.response.status_code} - {e.response.text}" - ) from e + raise SemanticSearchError(f"API error: {e.response.status_code} - {e.response.text}") from e except httpx.RequestError as e: raise SemanticSearchError(f"Request failed: {e}") from e except Exception as e: diff --git a/tests/benchmark_search.py b/tests/benchmark_search.py index 5f09a1a..0418bfa 100644 --- a/tests/benchmark_search.py +++ b/tests/benchmark_search.py @@ -171,10 +171,12 @@ def fetch_all_actions(self) -> list[SemanticSearchResult]: seen: dict[str, SemanticSearchResult] = {} for query in broad_queries: try: - data = self._invoke({ - "type": "search", - "payload": {"query": query, "top_k": 500}, - }) + data = self._invoke( + { + "type": "search", + "payload": {"query": query, "top_k": 500}, + } + ) for r in data.get("results", []): name = r.get("action_name", "") if name and name not in seen: diff --git a/tests/test_semantic_search.py b/tests/test_semantic_search.py index 5bfef95..a7c0441 100644 --- a/tests/test_semantic_search.py +++ b/tests/test_semantic_search.py @@ -593,12 +593,14 @@ def make_tool(name: str) -> StackOneTool: _api_key="test-key", ) - tools = Tools([ - make_tool("bamboohr_create_employee"), - make_tool("bamboohr_list_employees"), - make_tool("hibob_create_employee"), - make_tool("slack_send_message"), - ]) + tools = Tools( + [ + make_tool("bamboohr_create_employee"), + make_tool("bamboohr_list_employees"), + make_tool("hibob_create_employee"), + make_tool("slack_send_message"), + ] + ) connectors = tools.get_connectors() @@ -623,12 +625,14 @@ def make_tool(name: str) -> StackOneTool: _api_key="test-key", ) - tools = Tools([ - make_tool("bamboohr_create_employee"), - make_tool("bamboohr_list_employees"), - make_tool("hibob_create_employee"), - make_tool("slack_send_message"), - ]) + tools = Tools( + [ + make_tool("bamboohr_create_employee"), + make_tool("bamboohr_list_employees"), + make_tool("hibob_create_employee"), + make_tool("slack_send_message"), + ] + ) # Filter by single connector bamboo_tools = tools.filter_by_connector(["bamboohr"]) @@ -647,9 +651,7 @@ def test_filter_by_connector_case_insensitive(self) -> None: tool = StackOneTool( description="Creates employee", parameters=ToolParameters(type="object", properties={}), - _execute_config=ExecuteConfig( - name="bamboohr_create_employee", method="POST", url="", headers={} - ), + _execute_config=ExecuteConfig(name="bamboohr_create_employee", method="POST", url="", headers={}), _api_key="test-key", ) tools = Tools([tool]) @@ -666,9 +668,7 @@ def test_filter_by_connector_returns_new_tools(self) -> None: tool = StackOneTool( description="Creates employee", parameters=ToolParameters(type="object", properties={}), - _execute_config=ExecuteConfig( - name="bamboohr_create_employee", method="POST", url="", headers={} - ), + _execute_config=ExecuteConfig(name="bamboohr_create_employee", method="POST", url="", headers={}), _api_key="test-key", ) tools = Tools([tool]) From be6db2a525031599aa5b7bb1bb0172f92c9e9848 Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Fri, 6 Feb 2026 14:15:07 +0000 Subject: [PATCH 06/25] Fix the lint in the benchmark file --- tests/BENCHMARK_RESULTS.md | 121 +++++++++++++++++++------------------ 1 file changed, 61 insertions(+), 60 deletions(-) diff --git a/tests/BENCHMARK_RESULTS.md b/tests/BENCHMARK_RESULTS.md index 0071a21..c84ff04 100644 --- a/tests/BENCHMARK_RESULTS.md +++ b/tests/BENCHMARK_RESULTS.md @@ -9,11 +9,11 @@ ## Summary -| Method | Hit@5 | MRR | Avg Latency | Hits | -|--------|-------|-----|-------------|------| -| Local BM25+TF-IDF | 66.0% | 0.538 | 1.2ms | 62/94 | -| Semantic Search | 76.6% | 0.634 | 279.6ms | 72/94 | -| **Improvement** | **+10.6%** | **+0.096** | | **+10** | +| Method | Hit@5 | MRR | Avg Latency | Hits | +| ----------------- | ---------- | ---------- | ----------- | ------- | +| Local BM25+TF-IDF | 66.0% | 0.538 | 1.2ms | 62/94 | +| Semantic Search | 76.6% | 0.634 | 279.6ms | 72/94 | +| **Improvement** | **+10.6%** | **+0.096** | | **+10** | ## Detailed Breakdown @@ -22,61 +22,61 @@ Tasks where semantic search finds the correct result but local BM25 fails. These demonstrate semantic search's ability to understand **intent and synonyms**. -| Query | Local Top Result | Semantic Top Result | -|-------|-----------------|-------------------| -| "fire someone" | workable_get_job_recruiters | factorial_terminate_employee | -| "ping the team" | teamtailor_delete_team | slack_send_message | -| "file a new bug" | github_create_or_update_file | jira_update_issue | -| "ping my colleague" | salesforce_get_my_events | microsoftoutlook_reply_message | -| "fetch staff information" | pinpoint_get_application | workday_list_workers | -| "show me everyone in the company" | humaans_get_me | lattice_talent_list_users | -| "turn down a job seeker" | pinpoint_get_job_seeker | jobadder_reject_requisition | -| "check application status" | dropbox_check_remove_member | jobadder_list_application_status | -| "check my to-do list" | jira_check_bulk_permissions | todoist_list_tasks | -| "start a group chat" | microsoftteams_update_chat | discord_create_group_dm | -| "move candidate forward" | workable_move_candidate | greenhouse_move_application | -| "approve PTO" | ashby_approve_offer | planday_approve_absence_request | -| "update staff record" | bamboohr_update_hour_record | cezannehr_update_employee | -| "pull the org chart" | github_create_issue_comment | lattice_list_review_cycles | -| "assign training to employee" | easyllama_assign_training | hibob_create_training_record | -| "file a bug report" | smartrecruiters_get_report_file | github_create_issue_comment | -| "track customer interaction" | qlik_create_interaction | peoplefluent_track_launch | +| Query | Local Top Result | Semantic Top Result | +| --------------------------------- | ------------------------------- | -------------------------------- | +| "fire someone" | workable_get_job_recruiters | factorial_terminate_employee | +| "ping the team" | teamtailor_delete_team | slack_send_message | +| "file a new bug" | github_create_or_update_file | jira_update_issue | +| "ping my colleague" | salesforce_get_my_events | microsoftoutlook_reply_message | +| "fetch staff information" | pinpoint_get_application | workday_list_workers | +| "show me everyone in the company" | humaans_get_me | lattice_talent_list_users | +| "turn down a job seeker" | pinpoint_get_job_seeker | jobadder_reject_requisition | +| "check application status" | dropbox_check_remove_member | jobadder_list_application_status | +| "check my to-do list" | jira_check_bulk_permissions | todoist_list_tasks | +| "start a group chat" | microsoftteams_update_chat | discord_create_group_dm | +| "move candidate forward" | workable_move_candidate | greenhouse_move_application | +| "approve PTO" | ashby_approve_offer | planday_approve_absence_request | +| "update staff record" | bamboohr_update_hour_record | cezannehr_update_employee | +| "pull the org chart" | github_create_issue_comment | lattice_list_review_cycles | +| "assign training to employee" | easyllama_assign_training | hibob_create_training_record | +| "file a bug report" | smartrecruiters_get_report_file | github_create_issue_comment | +| "track customer interaction" | qlik_create_interaction | peoplefluent_track_launch | ### Local Wins (7 tasks) Tasks where BM25 keyword matching outperforms semantic search. -| Query | Local Top Result | Semantic Top Result | -|-------|-----------------|-------------------| -| "see who applied for the role" | greenhouse_list_applied_candidate_tags | ashby_add_hiring_team_member | -| "advance someone to the next round" | greenhouse_move_application | factorial_invite_employee | -| "see open positions" | teamtailor_list_jobs | hibob_create_position_opening | -| "close a deal" | zohocrm_get_deal | shopify_close_order | -| "check course completion" | saba_delete_recurring_completion | saba_get_course | -| "update deal and notify team" | zohocrm_get_deal | microsoftteams_update_team | -| "look up customer" | linear_update_customer_need | shopify_search_customers | +| Query | Local Top Result | Semantic Top Result | +| ----------------------------------- | -------------------------------------- | ----------------------------- | +| "see who applied for the role" | greenhouse_list_applied_candidate_tags | ashby_add_hiring_team_member | +| "advance someone to the next round" | greenhouse_move_application | factorial_invite_employee | +| "see open positions" | teamtailor_list_jobs | hibob_create_position_opening | +| "close a deal" | zohocrm_get_deal | shopify_close_order | +| "check course completion" | saba_delete_recurring_completion | saba_get_course | +| "update deal and notify team" | zohocrm_get_deal | microsoftteams_update_team | +| "look up customer" | linear_update_customer_need | shopify_search_customers | ### Both Miss (15 tasks) Hard queries that neither method handles well. Many are abbreviations, cross-domain concepts, or have overly strict expected matches. -| Query | Category | Why Hard | -|-------|----------|----------| -| "onboard a new team member" | hr | "team member" maps to team tools, not HR | -| "OOO" | hr | Abbreviation - neither understands | -| "DM someone" | messaging | Both find discord_create_dm but expected pattern too strict | -| "customer onboarding" | crm | Cross-domain concept | -| "close quarter books" | crm | Domain-specific financial term | -| "PTO request" | hr | Both find PTO tools but expected pattern mismatch | -| "kill the ticket" | project | Both find delete_ticket but expected pattern mismatch | -| "who works in engineering" | hr | Requires department filtering, not just listing | -| "add a new prospect" | crm | Both find prospect tools but connector mismatch | -| "see all shared files" | documents | "shared" narrows scope too much | -| "see available trainings" | lms | Both find training tools but pattern mismatch | -| "track learning progress" | lms | Abstract concept mapping | -| "create team workspace" | messaging | Cross-domain: workspace vs channel | -| "log customer call" | crm | Connector-specific (Salesforce) term | -| "add new lead" | crm | Connector-specific (HubSpot) but returns wrong HubSpot actions | +| Query | Category | Why Hard | +| --------------------------- | --------- | -------------------------------------------------------------- | +| "onboard a new team member" | hr | "team member" maps to team tools, not HR | +| "OOO" | hr | Abbreviation - neither understands | +| "DM someone" | messaging | Both find discord_create_dm but expected pattern too strict | +| "customer onboarding" | crm | Cross-domain concept | +| "close quarter books" | crm | Domain-specific financial term | +| "PTO request" | hr | Both find PTO tools but expected pattern mismatch | +| "kill the ticket" | project | Both find delete_ticket but expected pattern mismatch | +| "who works in engineering" | hr | Requires department filtering, not just listing | +| "add a new prospect" | crm | Both find prospect tools but connector mismatch | +| "see all shared files" | documents | "shared" narrows scope too much | +| "see available trainings" | lms | Both find training tools but pattern mismatch | +| "track learning progress" | lms | Abstract concept mapping | +| "create team workspace" | messaging | Cross-domain: workspace vs channel | +| "log customer call" | crm | Connector-specific (Salesforce) term | +| "add new lead" | crm | Connector-specific (HubSpot) but returns wrong HubSpot actions | ## How to Run @@ -116,16 +116,16 @@ STACKONE_API_KEY=xxx uv run python tests/benchmark_search.py 94 tasks across 8 categories: -| Category | Tasks | Description | -|----------|-------|-------------| -| HR/HRIS | 19 | Employee management, time off, org structure | -| Recruiting/ATS | 12 | Candidates, applications, interviews | -| CRM | 12 | Contacts, deals, accounts | -| Project Management | 8 | Tasks, issues, projects | -| Messaging | 5 | Messages, channels, conversations | -| Documents | 5 | Files, folders, drives | -| Marketing | 5 | Campaigns, lists, automation | -| LMS | 5 | Courses, assignments, completions | +| Category | Tasks | Description | +| ------------------ | ----- | -------------------------------------------- | +| HR/HRIS | 19 | Employee management, time off, org structure | +| Recruiting/ATS | 12 | Candidates, applications, interviews | +| CRM | 12 | Contacts, deals, accounts | +| Project Management | 8 | Tasks, issues, projects | +| Messaging | 5 | Messages, channels, conversations | +| Documents | 5 | Files, folders, drives | +| Marketing | 5 | Campaigns, lists, automation | +| LMS | 5 | Courses, assignments, completions | Plus per-connector tests (Slack, Jira, Greenhouse, Salesforce, HubSpot) and edge cases (abbreviations, slang, complex queries). @@ -138,6 +138,7 @@ Plus per-connector tests (Slack, Jira, Greenhouse, Salesforce, HubSpot) and edge ### Corpus Both local and semantic search operate on the same action catalog: + - 5,144 unique actions - 200+ connectors (BambooHR, Greenhouse, Salesforce, Slack, Jira, etc.) - 7 verticals (HRIS, ATS, CRM, Documents, IAM, LMS, Marketing) From bcb0b8768570acd81617c993a8a39cf5289dfd91 Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Fri, 6 Feb 2026 14:43:58 +0000 Subject: [PATCH 07/25] Formalise the docs and code --- examples/demo_semantic_search.py | 311 ------------------------------- tests/BENCHMARK_RESULTS.md | 7 +- tests/benchmark_search.py | 17 +- 3 files changed, 6 insertions(+), 329 deletions(-) delete mode 100644 examples/demo_semantic_search.py diff --git a/examples/demo_semantic_search.py b/examples/demo_semantic_search.py deleted file mode 100644 index 7dff5a0..0000000 --- a/examples/demo_semantic_search.py +++ /dev/null @@ -1,311 +0,0 @@ -""" -Semantic Search Demo - Local BM25 vs Semantic Search - -Demonstrates how semantic search understands natural language intent -while local keyword search fails on synonyms and colloquial queries. - -Run with local Lambda: - cd ai-generation/apps/action_search && make run-local - uv run python examples/demo_semantic_search.py --local - -Run with production API: - STACKONE_API_KEY=xxx uv run python examples/demo_semantic_search.py -""" - -from __future__ import annotations - -import argparse -import os -import time -from dataclasses import dataclass -from typing import Any - -import httpx - -from stackone_ai.semantic_search import ( - SemanticSearchClient, - SemanticSearchResponse, - SemanticSearchResult, -) -from stackone_ai.utility_tools import ToolIndex - -# Local Lambda URL -DEFAULT_LAMBDA_URL = "http://localhost:4513/2015-03-31/functions/function/invocations" - -# Demo queries - the strongest "wow" moments from benchmark results -DEMO_QUERIES = [ - { - "query": "fire someone", - "why": "Synonym: 'fire' = terminate employment", - }, - { - "query": "ping the team", - "why": "Intent: 'ping' = send a message", - }, - { - "query": "file a new bug", - "why": "Intent: 'file a bug' = create issue (not file operations)", - }, - { - "query": "check my to-do list", - "why": "Concept: 'to-do list' = list tasks", - }, - { - "query": "show me everyone in the company", - "why": "Synonym: 'everyone in company' = list employees", - }, - { - "query": "turn down a job seeker", - "why": "Synonym: 'turn down' = reject application", - }, - { - "query": "approve PTO", - "why": "Abbreviation: 'PTO' = paid time off request", - }, - { - "query": "grab that spreadsheet", - "why": "Colloquial: 'grab' = download file", - }, -] - - -@dataclass -class LightweightTool: - """Minimal tool for BM25 indexing.""" - - name: str - description: str - - -class LocalLambdaClient: - """Client for local action_search Lambda.""" - - def __init__(self, url: str = DEFAULT_LAMBDA_URL) -> None: - self.url = url - - def search( - self, - query: str, - connector: str | None = None, - top_k: int = 5, - ) -> SemanticSearchResponse: - payload: dict[str, Any] = { - "type": "search", - "payload": {"query": query, "top_k": top_k}, - } - if connector: - payload["payload"]["connector"] = connector - - resp = httpx.post(self.url, json=payload, timeout=30.0) - resp.raise_for_status() - data = resp.json() - - results = [ - SemanticSearchResult( - action_name=r.get("action_name", ""), - connector_key=r.get("connector_key", ""), - similarity_score=r.get("similarity_score", 0.0), - label=r.get("label", ""), - description=r.get("description", ""), - ) - for r in data.get("results", []) - ] - return SemanticSearchResponse( - results=results, - total_count=data.get("total_count", len(results)), - query=data.get("query", query), - ) - - def fetch_actions(self) -> list[LightweightTool]: - """Fetch broad action catalog for BM25 index.""" - seen: dict[str, LightweightTool] = {} - for q in ["employee", "candidate", "contact", "task", "message", "file", "event", "deal"]: - try: - resp = httpx.post( - self.url, - json={"type": "search", "payload": {"query": q, "top_k": 500}}, - timeout=30.0, - ) - for r in resp.json().get("results", []): - name = r.get("action_name", "") - if name and name not in seen: - seen[name] = LightweightTool(name=name, description=r.get("description", "")) - except Exception: - continue - return list(seen.values()) - - -def shorten_name(name: str) -> str: - """Shorten action name for display. - - bamboohr_1.0.0_bamboohr_list_employees_global -> bamboohr: list_employees - """ - parts = name.split("_") - # Find version segment (e.g., "1.0.0") and split around it - version_idx = None - for i, p in enumerate(parts): - if "." in p and any(c.isdigit() for c in p): - version_idx = i - break - - if version_idx is not None: - connector = parts[0] - # Skip connector + version + repeated connector prefix - action_parts = parts[version_idx + 1 :] - # Remove leading connector name if repeated - if action_parts and action_parts[0].lower().replace("-", "") == connector.lower().replace("-", ""): - action_parts = action_parts[1:] - # Remove trailing 'global' - if action_parts and action_parts[-1] == "global": - action_parts = action_parts[:-1] - action = "_".join(action_parts) - return f"{connector}: {action}" - - return name - - -def print_header(text: str) -> None: - print(f"\n{'=' * 70}") - print(f" {text}") - print(f"{'=' * 70}") - - -def print_section(text: str) -> None: - print(f"\n--- {text} ---\n") - - -def run_demo(use_local: bool, lambda_url: str, api_key: str | None) -> None: - # Step 1: Setup - if use_local: - client = LocalLambdaClient(url=lambda_url) - semantic_search = client.search - else: - if not api_key: - print("Error: STACKONE_API_KEY required for production mode") - print("Use --local flag for local Lambda mode") - exit(1) - assert api_key is not None # narrowing for type checker - sem_client = SemanticSearchClient(api_key=api_key) - semantic_search = sem_client.search - client = None - - print_header("SEMANTIC SEARCH DEMO") - print("\n Comparing Local BM25+TF-IDF vs Semantic Search") - print(" across 5,144 actions from 200+ connectors\n") - - # Step 2: Build local BM25 index - print(" Loading action catalog for local BM25 index...") - if use_local: - tools = client.fetch_actions() - else: - # For production mode, use semantic search to build catalog - local_client = LocalLambdaClient(url=lambda_url) - tools = local_client.fetch_actions() - - local_index = ToolIndex(tools) # type: ignore[arg-type] - print(f" Indexed {len(tools)} actions\n") - - input(" Press Enter to start the demo...\n") - - # Step 3: Side-by-side comparison - print_header("SIDE-BY-SIDE COMPARISON") - - local_hits = 0 - semantic_hits = 0 - - for i, demo in enumerate(DEMO_QUERIES, 1): - query = demo["query"] - why = demo["why"] - - print(f'\n [{i}/{len(DEMO_QUERIES)}] Query: "{query}"') - print(f" Why interesting: {why}") - print() - - # Local search - start = time.perf_counter() - local_results = local_index.search(query, limit=3) - local_ms = (time.perf_counter() - start) * 1000 - local_names = [shorten_name(r.name) for r in local_results] - - # Semantic search - start = time.perf_counter() - sem_response = semantic_search(query=query, top_k=3) - sem_ms = (time.perf_counter() - start) * 1000 - sem_names = [shorten_name(r.action_name) for r in sem_response.results] - sem_scores = [f"{r.similarity_score:.2f}" for r in sem_response.results] - - # Display - w = 38 - print(f" {'Local BM25 (keyword)':<{w}} | {'Semantic Search (AI)':<{w}}") - print(f" {f'{local_ms:.1f}ms':<{w}} | {f'{sem_ms:.1f}ms':<{w}}") - print(f" {'-' * w} | {'-' * w}") - for j in range(min(3, max(len(local_names), len(sem_names)))): - l_name = local_names[j] if j < len(local_names) else "" - s_name = sem_names[j] if j < len(sem_names) else "" - s_score = sem_scores[j] if j < len(sem_scores) else "" - l_display = f" {l_name[:w]:<{w}}" - s_display = f" {s_name[: w - 8]:<{w - 8}} ({s_score})" if s_name else "" - print(f"{l_display} |{s_display}") - - input("\n Press Enter for next query...") - - # Step 4: Summary - print_header("BENCHMARK RESULTS (94 evaluation tasks)") - - print(""" - Method Hit@5 MRR Avg Latency - ---------------------------------------------------------- - Local BM25+TF-IDF 66.0% 0.538 1.2ms - Semantic Search 76.6% 0.634 279.6ms - ---------------------------------------------------------- - Improvement +10.6% +0.096 - """) - - # Step 5: Code examples - print_header("DEVELOPER API") - - print(""" - # 1. Direct semantic search - from stackone_ai import StackOneToolSet - - toolset = StackOneToolSet(api_key="xxx") - tools = toolset.search_tools("fire someone", top_k=5) - # Returns: terminate_employee, offboard_employee, ... - - - # 2. Semantic search with connector filter - tools = toolset.search_tools( - "send a message", - connector="slack", - top_k=3, - ) - # Returns: slack_send_message, slack_create_conversation, ... - - - # 3. MCP utility tool (for AI agents) - tools = toolset.fetch_tools() - utility = tools.utility_tools(use_semantic_search=True) - # AI agent gets: tool_search (semantic-powered) + tool_execute - - - # 4. Inspect results before fetching - results = toolset.search_action_names("onboard new hire") - for r in results: - print(f"{r.action_name}: {r.similarity_score:.2f}") - """) - - print_header("END OF DEMO") - - -def main() -> None: - parser = argparse.ArgumentParser(description="Semantic Search Demo") - parser.add_argument("--local", action="store_true", help="Use local Lambda") - parser.add_argument("--lambda-url", default=DEFAULT_LAMBDA_URL, help="Lambda URL") - args = parser.parse_args() - - api_key = os.environ.get("STACKONE_API_KEY") - run_demo(use_local=args.local, lambda_url=args.lambda_url, api_key=api_key) - - -if __name__ == "__main__": - main() diff --git a/tests/BENCHMARK_RESULTS.md b/tests/BENCHMARK_RESULTS.md index c84ff04..35c9940 100644 --- a/tests/BENCHMARK_RESULTS.md +++ b/tests/BENCHMARK_RESULTS.md @@ -85,14 +85,9 @@ Hard queries that neither method handles well. Many are abbreviations, cross-dom Requires the action_search Lambda running locally: ```bash -# Terminal 1: Start the Lambda -cd ai-generation/apps/action_search -cp .env.example .env -# Edit .env: set USE_LOCAL_STORE=false and TURBOPUFFER_API_KEY=tpuf_xxx -make run-local +# Terminal 1: Start the local action_search Lambda # Terminal 2: Run benchmark -cd stackone-ai-python uv run python tests/benchmark_search.py --local ``` diff --git a/tests/benchmark_search.py b/tests/benchmark_search.py index 0418bfa..f3881e6 100644 --- a/tests/benchmark_search.py +++ b/tests/benchmark_search.py @@ -9,9 +9,8 @@ Run with production API: STACKONE_API_KEY=xxx python tests/benchmark_search.py -Run with local Lambda (ai-generation/apps/action_search): - # First, start the local Lambda: - # cd ai-generation/apps/action_search && make run-local +Run with local Lambda: + # First, start the local action_search Lambda # Then run benchmark: python tests/benchmark_search.py --local @@ -33,7 +32,7 @@ from stackone_ai.semantic_search import SemanticSearchClient, SemanticSearchResponse, SemanticSearchResult from stackone_ai.utility_tools import ToolIndex -# Default local Lambda URL (from ai-generation/apps/action_search docker-compose) +# Default local Lambda URL DEFAULT_LOCAL_LAMBDA_URL = "http://localhost:4513/2015-03-31/functions/function/invocations" @@ -51,13 +50,8 @@ def search( class LocalLambdaSearchClient: """Client for local action_search Lambda. - This client connects to the local Lambda running via docker-compose - from ai-generation/apps/action_search. - Usage: - # Start local Lambda first: - # cd ai-generation/apps/action_search && make run-local - + # Start the local action_search Lambda first client = LocalLambdaSearchClient() response = client.search("create employee", connector="bamboohr", top_k=5) """ @@ -214,7 +208,6 @@ class EvaluationTask: # 103 semantically-challenging evaluation queries -# Ported from ai-generation/apps/action_search/tests/benchmark.integration.spec.ts EVALUATION_TASKS: list[EvaluationTask] = [ # ============ ALL CONNECTORS - SEMANTIC CHALLENGES ============ # HR/HRIS - Natural language @@ -1212,7 +1205,7 @@ def main() -> None: # Run with production API STACKONE_API_KEY=xxx python tests/benchmark_search.py - # Run with local Lambda (start it first: cd ai-generation/apps/action_search && make run-local) + # Run with local Lambda (start the local action_search Lambda first) python tests/benchmark_search.py --local # Run with custom local Lambda URL From 0a26c579173f80981a14e9d613bc2ca14f3e9d8c Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Fri, 6 Feb 2026 15:01:53 +0000 Subject: [PATCH 08/25] Keep semantic search minimal in the README --- README.md | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index ec247eb..8ea99fd 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ StackOne AI provides a unified interface for accessing various SaaS tools throug - Glob pattern filtering with patterns like `"hris_*"` and exclusions `"!hris_delete_*"` - Provider and action filtering - Multi-account support -- **Semantic Search**: AI-powered tool discovery using natural language (76.6% Hit@5 vs 66.0% for keyword search) +- **Semantic Search**: AI-powered tool discovery using natural language queries - **Utility Tools** (Beta): Dynamic tool discovery and execution based on natural language queries - Integration with popular AI frameworks: - OpenAI Functions @@ -328,30 +328,7 @@ result = execute_tool.call(toolName="hris_list_employees", params={"limit": 10}) ## Semantic Search -Search across 9,000+ actions using natural language instead of exact keyword matching. - -```python -from stackone_ai import StackOneToolSet - -toolset = StackOneToolSet() - -# Find tools using natural language -tools = toolset.search_tools("onboard a new team member", top_k=5) -# Returns: create_employee, invite_employee, ... - -# Filter by connector -tools = toolset.search_tools("send a message", connector="slack", top_k=3) -``` - -Semantic search understands intent and synonyms, so queries like "onboard a new team member", "check my to-do list", or "file a bug" return the right actions even when no keywords match. - -It can also power the `tool_search` utility tool for AI agents: - -```python -tools = toolset.fetch_tools(account_ids=["your-account-id"]) -utility = tools.utility_tools(use_semantic_search=True) -# AI agent gets semantic-powered tool_search + tool_execute -``` +Semantic search allows tool discovery using natural language instead of exact keyword matching. It understands intent and synonyms, so queries like "fire someone" or "check my to-do list" resolve to the right actions. Enable it via `toolset.search_tools(query)` or pass `use_semantic_search=True` to utility tools. ## Examples From e6ab80b76c05fc907327f4d7fbd98ed8468b8bbd Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Fri, 6 Feb 2026 15:30:17 +0000 Subject: [PATCH 09/25] Remove the old benchmark data --- stackone_ai/models.py | 2 +- stackone_ai/semantic_search.py | 3 +-- stackone_ai/toolset.py | 6 +++--- stackone_ai/utility_tools.py | 6 +++--- tests/benchmark_search.py | 5 +---- tests/test_semantic_search.py | 1 - 6 files changed, 9 insertions(+), 14 deletions(-) diff --git a/stackone_ai/models.py b/stackone_ai/models.py index e2c1d2e..fb9a1de 100644 --- a/stackone_ai/models.py +++ b/stackone_ai/models.py @@ -584,7 +584,7 @@ def utility_tools( Utility tools enable dynamic tool discovery and execution based on natural language queries. By default, uses local hybrid BM25 + TF-IDF search. Optionally, can use cloud-based - semantic search for higher accuracy (84% Hit@5 vs 21% for local search). + semantic search for higher accuracy on natural language queries. Args: hybrid_alpha: Weight for BM25 in hybrid search (0-1). Only used when diff --git a/stackone_ai/semantic_search.py b/stackone_ai/semantic_search.py index f23a9e9..0f46f13 100644 --- a/stackone_ai/semantic_search.py +++ b/stackone_ai/semantic_search.py @@ -37,8 +37,7 @@ class SemanticSearchClient: """Client for StackOne semantic search API. This client provides access to the semantic search endpoint which uses - enhanced embeddings for 84% Hit@5 accuracy (compared to ~21% for local - BM25+TF-IDF search). + enhanced embeddings for higher accuracy than local BM25+TF-IDF search. Example: client = SemanticSearchClient(api_key="sk-xxx") diff --git a/stackone_ai/toolset.py b/stackone_ai/toolset.py index b4d67dd..68c9cdb 100644 --- a/stackone_ai/toolset.py +++ b/stackone_ai/toolset.py @@ -296,9 +296,9 @@ def search_tools( ) -> Tools: """Search for and fetch tools using semantic search. - This method uses the StackOne semantic search API (84% Hit@5 accuracy) - to find relevant tools based on natural language queries. It optimizes - results by filtering to only connectors available in linked accounts. + This method uses the StackOne semantic search API to find relevant tools + based on natural language queries. It optimizes results by filtering to + only connectors available in linked accounts. Args: query: Natural language description of needed functionality diff --git a/stackone_ai/utility_tools.py b/stackone_ai/utility_tools.py index 473f447..d49421e 100644 --- a/stackone_ai/utility_tools.py +++ b/stackone_ai/utility_tools.py @@ -270,8 +270,8 @@ def execute( def create_semantic_tool_search(semantic_client: SemanticSearchClient) -> StackOneTool: """Create a semantic search variant of tool_search. - Uses cloud semantic search API (84% Hit@5 accuracy) instead of - local BM25+TF-IDF (21% accuracy). + Uses cloud semantic search API instead of local BM25+TF-IDF for + improved natural language tool discovery. Args: semantic_client: Initialized SemanticSearchClient instance @@ -287,7 +287,7 @@ def create_semantic_tool_search(semantic_client: SemanticSearchClient) -> StackO name = "tool_search" description = ( "Searches for relevant tools based on a natural language query using " - "semantic vector search (84% accuracy). Call this first to discover " + "semantic vector search. Call this first to discover " "available tools before executing them." ) diff --git a/tests/benchmark_search.py b/tests/benchmark_search.py index f3881e6..883ae40 100644 --- a/tests/benchmark_search.py +++ b/tests/benchmark_search.py @@ -1,10 +1,7 @@ """ Benchmark comparing local BM25+TF-IDF vs semantic search. -Expected results: -- Local BM25+TF-IDF: ~21% Hit@5 -- Semantic Search: ~84% Hit@5 -- Improvement: 4x +Compares Hit@5 and MRR between local BM25+TF-IDF and semantic search. Run with production API: STACKONE_API_KEY=xxx python tests/benchmark_search.py diff --git a/tests/test_semantic_search.py b/tests/test_semantic_search.py index a7c0441..d5a560b 100644 --- a/tests/test_semantic_search.py +++ b/tests/test_semantic_search.py @@ -508,7 +508,6 @@ def test_semantic_tool_search_has_correct_parameters(self) -> None: assert tool.name == "tool_search" assert "semantic" in tool.description.lower() - assert "84%" in tool.description props = tool.parameters.properties assert "query" in props From 96270d653f9102774dd70418d9f0132213222a74 Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Fri, 6 Feb 2026 17:21:26 +0000 Subject: [PATCH 10/25] implement PR feedback suggestions from cubic --- stackone_ai/toolset.py | 16 ++++++-- stackone_ai/utility_tools.py | 4 ++ tests/benchmark_search.py | 2 +- tests/test_semantic_search.py | 69 +++++++++++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 5 deletions(-) diff --git a/stackone_ai/toolset.py b/stackone_ai/toolset.py index 68c9cdb..0fea009 100644 --- a/stackone_ai/toolset.py +++ b/stackone_ai/toolset.py @@ -344,7 +344,7 @@ def search_tools( # Step 2: Over-fetch from semantic API to account for connector filtering # We fetch 3x to ensure we get enough results after filtering over_fetch_multiplier = 3 - over_fetch_k = top_k * over_fetch_multiplier + over_fetch_k = min(top_k * over_fetch_multiplier, 500) response = self.semantic_client.search( query=query, @@ -378,6 +378,7 @@ def search_tools( # Fallback to local search all_tools = self.fetch_tools(account_ids=account_ids) + available_connectors = all_tools.get_connectors() utility = all_tools.utility_tools() search_tool = utility.get_tool("tool_search") @@ -385,12 +386,19 @@ def search_tools( result = search_tool.execute( { "query": query, - "limit": top_k, + "limit": top_k * 3, # Over-fetch to account for connector filtering "minScore": min_score, } ) matched_names = [t["name"] for t in result.get("tools", [])] - return Tools([t for t in all_tools if t.name in matched_names]) + # Filter by available connectors and preserve relevance order + tool_map = {t.name: t for t in all_tools} + matched_tools = [ + tool_map[name] + for name in matched_names + if name in tool_map and name.split("_")[0].lower() in available_connectors + ] + return Tools(matched_tools[:top_k]) return all_tools @@ -439,7 +447,7 @@ def search_action_names( tools = toolset.fetch_tools(actions=selected) """ # Over-fetch if filtering by available_connectors - fetch_k = top_k * 3 if available_connectors else top_k + fetch_k = min(top_k * 3, 500) if available_connectors else min(top_k, 500) response = self.semantic_client.search( query=query, diff --git a/stackone_ai/utility_tools.py b/stackone_ai/utility_tools.py index d49421e..91470ca 100644 --- a/stackone_ai/utility_tools.py +++ b/stackone_ai/utility_tools.py @@ -202,11 +202,13 @@ def create_tool_search(index: ToolIndex) -> StackOneTool: "type": "number", "description": "Maximum number of tools to return (default: 5)", "default": 5, + "nullable": True, }, "minScore": { "type": "number", "description": "Minimum relevance score (0-1) to filter results (default: 0.0)", "default": 0.0, + "nullable": True, }, }, ) @@ -305,11 +307,13 @@ def create_semantic_tool_search(semantic_client: SemanticSearchClient) -> StackO "type": "number", "description": "Maximum number of tools to return (default: 5)", "default": 5, + "nullable": True, }, "minScore": { "type": "number", "description": "Minimum similarity score (0-1) to filter results (default: 0.0)", "default": 0.0, + "nullable": True, }, "connector": { "type": "string", diff --git a/tests/benchmark_search.py b/tests/benchmark_search.py index 883ae40..8779324 100644 --- a/tests/benchmark_search.py +++ b/tests/benchmark_search.py @@ -204,7 +204,7 @@ class EvaluationTask: connector: str | None = None -# 103 semantically-challenging evaluation queries +# Semantically-challenging evaluation queries EVALUATION_TASKS: list[EvaluationTask] = [ # ============ ALL CONNECTORS - SEMANTIC CHALLENGES ============ # HR/HRIS - Natural language diff --git a/tests/test_semantic_search.py b/tests/test_semantic_search.py index d5a560b..abcbdf0 100644 --- a/tests/test_semantic_search.py +++ b/tests/test_semantic_search.py @@ -315,6 +315,75 @@ def test_toolset_search_tools( assert tools[0].name == "bamboohr_create_employee" # score 0.95 assert tools[1].name == "hibob_create_employee" # score 0.85 + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_toolset_search_tools_fallback( + self, + mock_fetch: MagicMock, + mock_search: MagicMock, + ) -> None: + """Test search_tools() fallback when semantic search fails.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + # Semantic search raises an error to trigger fallback + mock_search.side_effect = SemanticSearchError("API unavailable") + + # Mock MCP fetch to return tools from multiple connectors + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates a new employee in BambooHR", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="bamboohr_list_employees", + description="Lists all employees in BambooHR", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="workday_create_worker", + description="Creates a new worker in Workday", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + tools = toolset.search_tools("create employee", top_k=5, fallback_to_local=True) + + # Should return results from the local BM25+TF-IDF fallback + assert len(tools) > 0 + tool_names = [t.name for t in tools] + # Should only include tools for available connectors (bamboohr, workday) + for name in tool_names: + connector = name.split("_")[0] + assert connector in {"bamboohr", "workday"} + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_toolset_search_tools_fallback_disabled( + self, + mock_fetch: MagicMock, + mock_search: MagicMock, + ) -> None: + """Test search_tools() raises when fallback is disabled.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + mock_search.side_effect = SemanticSearchError("API unavailable") + # Must provide tools so the flow reaches the semantic search call + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates a new employee", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + with pytest.raises(SemanticSearchError): + toolset.search_tools("create employee", fallback_to_local=False) + @patch.object(SemanticSearchClient, "search") @patch("stackone_ai.toolset._fetch_mcp_tools") def test_toolset_search_action_names( From 901d5af3b50931b29d8715e3247e2bf2f4e7f8a6 Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Fri, 6 Feb 2026 17:31:48 +0000 Subject: [PATCH 11/25] fix nullable in the semantic tool schema --- stackone_ai/utility_tools.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stackone_ai/utility_tools.py b/stackone_ai/utility_tools.py index 91470ca..270bea3 100644 --- a/stackone_ai/utility_tools.py +++ b/stackone_ai/utility_tools.py @@ -222,8 +222,8 @@ def execute_filter(arguments: str | JsonDict | None = None) -> JsonDict: kwargs = arguments or {} query = kwargs.get("query", "") - limit = int(kwargs.get("limit", 5)) - min_score = float(kwargs.get("minScore", 0.0)) + limit = int(kwargs.get("limit") or 5) + min_score = float(kwargs.get("minScore") or 0.0) # Search for tools results = index.search(query, limit, min_score) @@ -331,8 +331,8 @@ def execute_search(arguments: str | JsonDict | None = None) -> JsonDict: kwargs = arguments or {} query = kwargs.get("query", "") - limit = int(kwargs.get("limit", 5)) - min_score = float(kwargs.get("minScore", 0.0)) + limit = int(kwargs.get("limit") or 5) + min_score = float(kwargs.get("minScore") or 0.0) connector = kwargs.get("connector") response = semantic_client.search( From 94bb25d828e9ad7df61ab2d4ce3e8273e64139c7 Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Fri, 6 Feb 2026 17:38:57 +0000 Subject: [PATCH 12/25] limit override --- stackone_ai/utility_tools.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stackone_ai/utility_tools.py b/stackone_ai/utility_tools.py index 270bea3..00a2d80 100644 --- a/stackone_ai/utility_tools.py +++ b/stackone_ai/utility_tools.py @@ -222,8 +222,8 @@ def execute_filter(arguments: str | JsonDict | None = None) -> JsonDict: kwargs = arguments or {} query = kwargs.get("query", "") - limit = int(kwargs.get("limit") or 5) - min_score = float(kwargs.get("minScore") or 0.0) + limit = int(kwargs["limit"]) if kwargs.get("limit") is not None else 5 + min_score = float(kwargs["minScore"]) if kwargs.get("minScore") is not None else 0.0 # Search for tools results = index.search(query, limit, min_score) @@ -331,8 +331,8 @@ def execute_search(arguments: str | JsonDict | None = None) -> JsonDict: kwargs = arguments or {} query = kwargs.get("query", "") - limit = int(kwargs.get("limit") or 5) - min_score = float(kwargs.get("minScore") or 0.0) + limit = int(kwargs["limit"]) if kwargs.get("limit") is not None else 5 + min_score = float(kwargs["minScore"]) if kwargs.get("minScore") is not None else 0.0 connector = kwargs.get("connector") response = semantic_client.search( From 2c5619fdf6a9267b99c7f21e6a46222de808575d Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Fri, 6 Feb 2026 19:52:14 +0000 Subject: [PATCH 13/25] handle per connector calls to avoid the guesswork --- stackone_ai/toolset.py | 62 ++++++++++++++++++++++++++++++----- tests/test_semantic_search.py | 17 ++++++---- 2 files changed, 65 insertions(+), 14 deletions(-) diff --git a/stackone_ai/toolset.py b/stackone_ai/toolset.py index 0fea009..c666399 100644 --- a/stackone_ai/toolset.py +++ b/stackone_ai/toolset.py @@ -341,15 +341,13 @@ def search_tools( if not available_connectors: return Tools([]) - # Step 2: Over-fetch from semantic API to account for connector filtering - # We fetch 3x to ensure we get enough results after filtering - over_fetch_multiplier = 3 - over_fetch_k = min(top_k * over_fetch_multiplier, 500) + # Step 2: Fetch max results from semantic API, then filter client-side + semantic_api_max = 500 response = self.semantic_client.search( query=query, connector=connector, - top_k=over_fetch_k, + top_k=semantic_api_max, ) # Step 3: Filter results to only available connectors and min_score @@ -357,7 +355,32 @@ def search_tools( r for r in response.results if r.connector_key.lower() in available_connectors and r.similarity_score >= min_score - ][:top_k] # Take only top_k after filtering + ] + + # Step 3b: If not enough results, make per-connector calls for missing connectors + if len(filtered_results) < top_k and not connector: + found_connectors = {r.connector_key.lower() for r in filtered_results} + missing_connectors = available_connectors - found_connectors + for missing in missing_connectors: + if len(filtered_results) >= top_k: + break + try: + extra = self.semantic_client.search(query=query, connector=missing, top_k=top_k) + for r in extra.results: + if r.similarity_score >= min_score and r.action_name not in { + fr.action_name for fr in filtered_results + }: + filtered_results.append(r) + if len(filtered_results) >= top_k: + break + except SemanticSearchError: + continue + + # Re-sort by score after merging results from multiple calls + filtered_results.sort(key=lambda r: r.similarity_score, reverse=True) + + # Apply top_k limit after all filtering and fallback + filtered_results = filtered_results[:top_k] if not filtered_results: return Tools([]) @@ -446,8 +469,9 @@ def search_action_names( selected = [r.action_name for r in results if r.similarity_score > 0.7] tools = toolset.fetch_tools(actions=selected) """ - # Over-fetch if filtering by available_connectors - fetch_k = min(top_k * 3, 500) if available_connectors else min(top_k, 500) + # Fetch max results to maximize results after connector filtering + semantic_api_max = 500 + fetch_k = semantic_api_max if available_connectors else min(top_k, 500) response = self.semantic_client.search( query=query, @@ -463,6 +487,28 @@ def search_action_names( connector_set = {c.lower() for c in available_connectors} results = [r for r in results if r.connector_key.lower() in connector_set] + # If not enough results, make per-connector calls for missing connectors + if len(results) < top_k and not connector: + found_connectors = {r.connector_key.lower() for r in results} + missing_connectors = connector_set - found_connectors + for missing in missing_connectors: + if len(results) >= top_k: + break + try: + extra = self.semantic_client.search(query=query, connector=missing, top_k=top_k) + for r in extra.results: + if r.similarity_score >= min_score and r.action_name not in { + er.action_name for er in results + }: + results.append(r) + if len(results) >= top_k: + break + except SemanticSearchError: + continue + + # Re-sort by score after merging + results.sort(key=lambda r: r.similarity_score, reverse=True) + return results[:top_k] def _filter_by_provider(self, tool_name: str, providers: list[str]) -> bool: diff --git a/tests/test_semantic_search.py b/tests/test_semantic_search.py index abcbdf0..efa4836 100644 --- a/tests/test_semantic_search.py +++ b/tests/test_semantic_search.py @@ -798,8 +798,8 @@ def test_filters_by_available_connectors(self, mock_search: MagicMock) -> None: assert "workday_create_worker" not in action_names @patch.object(SemanticSearchClient, "search") - def test_over_fetches_when_filtering(self, mock_search: MagicMock) -> None: - """Test that API is called with 3x top_k when filtering by connectors.""" + def test_fetches_max_then_falls_back_per_connector(self, mock_search: MagicMock) -> None: + """Test that API fetches max results first, then per-connector if not enough.""" from stackone_ai import StackOneToolSet mock_search.return_value = SemanticSearchResponse( @@ -815,10 +815,15 @@ def test_over_fetches_when_filtering(self, mock_search: MagicMock) -> None: top_k=5, ) - # Should over-fetch by 3x - mock_search.assert_called_once() - call_kwargs = mock_search.call_args.kwargs - assert call_kwargs["top_k"] == 15 # 5 * 3 + # First call: fetch API max (500) for broad search + # Second call: per-connector fallback for "bamboohr" since first returned nothing + assert mock_search.call_count == 2 + first_call = mock_search.call_args_list[0].kwargs + assert first_call["top_k"] == 500 + assert first_call["connector"] is None + second_call = mock_search.call_args_list[1].kwargs + assert second_call["connector"] == "bamboohr" + assert second_call["top_k"] == 5 @patch.object(SemanticSearchClient, "search") def test_respects_top_k_after_filtering(self, mock_search: MagicMock) -> None: From 4c5726d9f32433438f9e7dd21a815f8f5c45c048 Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Fri, 6 Feb 2026 22:20:51 +0000 Subject: [PATCH 14/25] ci: trigger rebuild From 06d6a9ad76bc61b02019b6ce1a62182372f761ae Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Mon, 9 Feb 2026 11:09:01 +0000 Subject: [PATCH 15/25] simplify utility_tools API by inferring semantic search from client presence --- README.md | 2 +- stackone_ai/models.py | 21 ++++++--------------- tests/test_semantic_search.py | 9 ++------- 3 files changed, 9 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 8ea99fd..d55a687 100644 --- a/README.md +++ b/README.md @@ -328,7 +328,7 @@ result = execute_tool.call(toolName="hris_list_employees", params={"limit": 10}) ## Semantic Search -Semantic search allows tool discovery using natural language instead of exact keyword matching. It understands intent and synonyms, so queries like "fire someone" or "check my to-do list" resolve to the right actions. Enable it via `toolset.search_tools(query)` or pass `use_semantic_search=True` to utility tools. +Semantic search allows tool discovery using natural language instead of exact keyword matching. It understands intent and synonyms, so queries like "fire someone" or "check my to-do list" resolve to the right actions. Enable it via `toolset.search_tools(query)` or by passing a `semantic_client` to utility tools. ## Examples diff --git a/stackone_ai/models.py b/stackone_ai/models.py index fb9a1de..151fdc9 100644 --- a/stackone_ai/models.py +++ b/stackone_ai/models.py @@ -577,30 +577,25 @@ def to_langchain(self) -> Sequence[BaseTool]: def utility_tools( self, hybrid_alpha: float | None = None, - use_semantic_search: bool = False, semantic_client: SemanticSearchClient | None = None, ) -> Tools: """Return utility tools for tool discovery and execution Utility tools enable dynamic tool discovery and execution based on natural language queries. - By default, uses local hybrid BM25 + TF-IDF search. Optionally, can use cloud-based - semantic search for higher accuracy on natural language queries. + By default, uses local hybrid BM25 + TF-IDF search. When a semantic_client is provided, + uses cloud-based semantic search for higher accuracy on natural language queries. Args: hybrid_alpha: Weight for BM25 in hybrid search (0-1). Only used when - use_semantic_search=False. If not provided, uses DEFAULT_HYBRID_ALPHA (0.2), + semantic_client is not provided. If not provided, uses DEFAULT_HYBRID_ALPHA (0.2), which gives more weight to BM25 scoring. - use_semantic_search: If True, use cloud-based semantic search instead of local - BM25+TF-IDF search. Requires semantic_client to be provided. - semantic_client: SemanticSearchClient instance. Required when use_semantic_search=True. + semantic_client: SemanticSearchClient instance for cloud-based semantic search. + When provided, semantic search is used instead of local BM25+TF-IDF. Can be obtained from StackOneToolSet.semantic_client. Returns: Tools collection containing tool_search and tool_execute - Raises: - ValueError: If use_semantic_search=True but semantic_client is not provided - Note: This feature is in beta and may change in future versions @@ -613,16 +608,12 @@ def utility_tools( toolset = StackOneToolSet() tools = toolset.fetch_tools() utility = tools.utility_tools( - use_semantic_search=True, semantic_client=toolset.semantic_client, ) """ from stackone_ai.utility_tools import create_tool_execute - if use_semantic_search: - if semantic_client is None: - raise ValueError("semantic_client is required when use_semantic_search=True") - + if semantic_client is not None: from stackone_ai.utility_tools import create_semantic_tool_search search_tool = create_semantic_tool_search(semantic_client) diff --git a/tests/test_semantic_search.py b/tests/test_semantic_search.py index efa4836..f5ae35d 100644 --- a/tests/test_semantic_search.py +++ b/tests/test_semantic_search.py @@ -448,12 +448,7 @@ def test_utility_tools_semantic_search(self) -> None: utility = tools.utility_tools() assert len(utility) == 2 # tool_search + tool_execute - # With semantic search - requires client - with pytest.raises(ValueError) as exc_info: - tools.utility_tools(use_semantic_search=True) - assert "semantic_client is required" in str(exc_info.value) - - # With semantic search and client + # With semantic search - presence of semantic_client enables it mock_client = MagicMock(spec=SemanticSearchClient) with ( patch("stackone_ai.utility_tools.create_semantic_tool_search") as mock_create, @@ -465,7 +460,7 @@ def test_utility_tools_semantic_search(self) -> None: mock_execute_tool.name = "tool_execute" mock_create.return_value = mock_search_tool mock_create_execute.return_value = mock_execute_tool - utility = tools.utility_tools(use_semantic_search=True, semantic_client=mock_client) + utility = tools.utility_tools(semantic_client=mock_client) assert len(utility) == 2 mock_create.assert_called_once_with(mock_client) From bf45364b6d7a76c9b9a6b01e90d19505b36e8298 Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Mon, 9 Feb 2026 17:43:26 +0000 Subject: [PATCH 16/25] Benchmark update and PR suggestions --- README.md | 49 +- examples/semantic_search_example.py | 253 ++++++ examples/test_examples.py | 1 + examples/utility_tools_example.py | 35 +- stackone_ai/models.py | 16 - stackone_ai/semantic_search.py | 12 +- stackone_ai/toolset.py | 26 +- tests/BENCHMARK_RESULTS.md | 147 ---- tests/benchmark_search.py | 1249 --------------------------- tests/test_semantic_search.py | 127 ++- 10 files changed, 409 insertions(+), 1506 deletions(-) create mode 100644 examples/semantic_search_example.py delete mode 100644 tests/BENCHMARK_RESULTS.md delete mode 100644 tests/benchmark_search.py diff --git a/README.md b/README.md index d55a687..5c00029 100644 --- a/README.md +++ b/README.md @@ -328,7 +328,53 @@ result = execute_tool.call(toolName="hris_list_employees", params={"limit": 10}) ## Semantic Search -Semantic search allows tool discovery using natural language instead of exact keyword matching. It understands intent and synonyms, so queries like "fire someone" or "check my to-do list" resolve to the right actions. Enable it via `toolset.search_tools(query)` or by passing a `semantic_client` to utility tools. +Semantic search enables tool discovery using natural language instead of exact keyword matching. It understands intent and synonyms, so queries like "fire someone" or "check my to-do list" resolve to the right StackOne actions. + +**How it works:** Your query is matched against all StackOne actions using semantic vector search. Results are automatically filtered to only the connectors available in your linked accounts, so you only get tools you can actually use. + +### `search_tools()` — Recommended + +High-level method that returns a `Tools` collection ready for any framework: + +```python +from stackone_ai import StackOneToolSet + +toolset = StackOneToolSet() + +# Natural language search — no need to know exact tool names +tools = toolset.search_tools("manage employee records", top_k=5) + +# Use with any framework +openai_tools = tools.to_openai() +langchain_tools = tools.to_langchain() + +# Filter by connector +tools = toolset.search_tools("create time off request", connector="bamboohr", top_k=3) +``` + +### `search_action_names()` — Lightweight + +Returns action names and similarity scores without fetching full tool definitions. Useful for inspecting results before committing: + +```python +results = toolset.search_action_names("time off requests", top_k=5) +for r in results: + print(f"{r.action_name} ({r.connector_key}): {r.similarity_score:.2f}") +``` + +### Utility Tools with Semantic Search + +For agent loops using `tool_search` / `tool_execute`, pass `semantic_client` to upgrade from local keyword matching to semantic search: + +```python +tools = toolset.fetch_tools() +utility = tools.utility_tools(semantic_client=toolset.semantic_client) + +search_tool = utility.get_tool("tool_search") +results = search_tool.call(query="onboard a new team member", limit=5) +``` + +See [Semantic Search Example](examples/semantic_search_example.py) for complete patterns including OpenAI and LangChain integration. ## Examples @@ -340,6 +386,7 @@ For more examples, check out the [examples/](examples/) directory: - [LangChain Integration](examples/langchain_integration.py) - [CrewAI Integration](examples/crewai_integration.py) - [Utility Tools](examples/utility_tools_example.py) +- [Semantic Search](examples/semantic_search_example.py) ## Development diff --git a/examples/semantic_search_example.py b/examples/semantic_search_example.py new file mode 100644 index 0000000..45bd38c --- /dev/null +++ b/examples/semantic_search_example.py @@ -0,0 +1,253 @@ +#!/usr/bin/env python +""" +Example demonstrating semantic search for AI-powered tool discovery. + +Semantic search understands natural language intent and synonyms, so queries like +"fire someone" or "check my to-do list" resolve to the right StackOne actions — +unlike keyword matching which requires exact tool names. + +This example is runnable with the following command: +```bash +uv run examples/semantic_search_example.py +``` + +Prerequisites: +- STACKONE_API_KEY environment variable set +- At least one linked account in StackOne +""" + +import os + +from dotenv import load_dotenv + +from stackone_ai import StackOneToolSet + +load_dotenv() + + +def example_search_tools(): + """High-level semantic search returning a Tools collection. + + search_tools() is the recommended way to use semantic search. It: + 1. Fetches all available tools from your linked accounts + 2. Queries the semantic search API with your natural language query + 3. Filters results to only connectors available in your accounts + 4. Returns a Tools collection ready for any framework (.to_openai(), .to_langchain(), etc.) + """ + print("Example 1: search_tools() — high-level semantic search\n") + + toolset = StackOneToolSet() + + # Search using natural language — no need to know exact tool names + tools = toolset.search_tools( + "manage employee records", + top_k=5, + min_score=0.3, + ) + + print(f"Found {len(tools)} matching tools:") + for tool in tools: + print(f" - {tool.name}: {tool.description[:80]}...") + + # The result is a standard Tools collection — convert to any framework format + openai_tools = tools.to_openai() + print(f"\nConverted to {len(openai_tools)} OpenAI function definitions") + + print() + + +def example_search_tools_with_connector(): + """Semantic search filtered by connector. + + Use the connector parameter to scope results to a specific provider, + for example when you know the user works with BambooHR. + """ + print("Example 2: search_tools() with connector filter\n") + + toolset = StackOneToolSet() + + # Search within a specific connector + tools = toolset.search_tools( + "create time off request", + connector="bamboohr", + top_k=3, + min_score=0.3, + ) + + print(f"Found {len(tools)} BambooHR tools for 'create time off request':") + for tool in tools: + print(f" - {tool.name}") + + print() + + +def example_search_action_names(): + """Lightweight search returning action names and scores without fetching tools. + + search_action_names() is useful when you want to inspect search results + before committing to fetching full tool definitions — for example, to + show the user a list of options. + """ + print("Example 3: search_action_names() — lightweight inspection\n") + + toolset = StackOneToolSet() + + results = toolset.search_action_names( + "time off requests", + top_k=5, + min_score=0.3, + ) + + print("Search results (action names + scores):") + for r in results: + print(f" {r.action_name} ({r.connector_key}) — score: {r.similarity_score:.2f}") + print(f" {r.description[:80]}...") + + print() + + +def example_utility_tools_semantic(): + """Using utility tools with semantic search for agent loops. + + When building agent loops (search → select → execute), pass + semantic_client to utility_tools() to upgrade tool_search from + local BM25+TF-IDF to cloud-based semantic search. + """ + print("Example 4: Utility tools with semantic search\n") + + toolset = StackOneToolSet() + + # Fetch tools for your accounts + tools = toolset.fetch_tools() + + # Pass semantic_client to switch tool_search to semantic mode + utility = tools.utility_tools(semantic_client=toolset.semantic_client) + + # tool_search now uses semantic search under the hood + search_tool = utility.get_tool("tool_search") + if search_tool: + result = search_tool.call(query="onboard a new team member", limit=5) + print("Semantic tool_search results:") + for tool_info in result.get("tools", []): + print(f" - {tool_info['name']} (score: {tool_info['score']:.2f})") + print(f" {tool_info['description'][:80]}...") + + print() + + +def example_openai_agent_loop(): + """Complete agent loop: semantic search → OpenAI → execute. + + This demonstrates the full pattern for building an AI agent that + discovers tools via semantic search and executes them via OpenAI. + """ + print("Example 5: OpenAI agent loop with semantic search\n") + + try: + from openai import OpenAI + except ImportError: + print("OpenAI library not installed. Install with: pip install openai") + print() + return + + if not os.getenv("OPENAI_API_KEY"): + print("Set OPENAI_API_KEY to run this example") + print() + return + + client = OpenAI() + toolset = StackOneToolSet() + + # Step 1: Discover relevant tools using semantic search + tools = toolset.search_tools("list employees and their details", top_k=3) + print(f"Discovered {len(tools)} tools via semantic search") + for tool in tools: + print(f" - {tool.name}") + + # Step 2: Convert to OpenAI format and call the LLM + openai_tools = tools.to_openai() + + messages = [ + {"role": "system", "content": "You are a helpful HR assistant."}, + {"role": "user", "content": "Can you list the first 5 employees?"}, + ] + + response = client.chat.completions.create( + model="gpt-4o-mini", + messages=messages, + tools=openai_tools, + tool_choice="auto", + ) + + # Step 3: Execute the tool calls + if response.choices[0].message.tool_calls: + print("\nLLM chose to call:") + for tool_call in response.choices[0].message.tool_calls: + print(f" - {tool_call.function.name}({tool_call.function.arguments})") + + tool = tools.get_tool(tool_call.function.name) + if tool: + result = tool.execute(tool_call.function.arguments) + print(f" Result keys: {list(result.keys()) if isinstance(result, dict) else type(result)}") + else: + print(f"\nLLM response: {response.choices[0].message.content}") + + print() + + +def example_langchain_semantic(): + """Semantic search with LangChain tools. + + search_tools() returns a Tools collection that converts directly + to LangChain format — no extra steps needed. + """ + print("Example 6: Semantic search with LangChain\n") + + try: + from langchain_core.tools import BaseTool # noqa: F401 + except ImportError: + print("LangChain not installed. Install with: pip install langchain-core") + print() + return + + toolset = StackOneToolSet() + + # Semantic search → LangChain tools in two lines + tools = toolset.search_tools("employee management", top_k=5) + langchain_tools = tools.to_langchain() + + print(f"Created {len(langchain_tools)} LangChain tools from semantic search:") + for tool in langchain_tools: + print(f" - {tool.name}: {tool.description[:80]}...") + + print() + + +def main(): + """Run all semantic search examples.""" + print("=" * 60) + print("StackOne AI SDK — Semantic Search Examples") + print("=" * 60) + print() + + # Core patterns (require STACKONE_API_KEY) + if not os.getenv("STACKONE_API_KEY"): + print("Set STACKONE_API_KEY to run these examples") + return + + example_search_tools() + example_search_tools_with_connector() + example_search_action_names() + example_utility_tools_semantic() + + # Framework integration patterns + example_openai_agent_loop() + example_langchain_semantic() + + print("=" * 60) + print("Examples completed!") + print("=" * 60) + + +if __name__ == "__main__": + main() diff --git a/examples/test_examples.py b/examples/test_examples.py index 45d631e..36fc7ba 100644 --- a/examples/test_examples.py +++ b/examples/test_examples.py @@ -31,6 +31,7 @@ def get_example_files() -> list[str]: "file_uploads.py": ["mcp"], "stackone_account_ids.py": ["mcp"], "utility_tools_example.py": ["mcp"], + "semantic_search_example.py": ["mcp"], "mcp_server.py": ["mcp"], } diff --git a/examples/utility_tools_example.py b/examples/utility_tools_example.py index 3291f7e..143ef2a 100644 --- a/examples/utility_tools_example.py +++ b/examples/utility_tools_example.py @@ -80,9 +80,39 @@ def example_utility_tools_with_execution(): print() +def example_utility_tools_semantic(): + """Semantic search variant of utility tools. + + By passing semantic_client to utility_tools(), tool_search switches from + local BM25+TF-IDF to cloud-based semantic search for better natural language + understanding. See examples/semantic_search_example.py for more patterns. + """ + print("Example 3: Utility tools with semantic search\n") + + toolset = StackOneToolSet() + + # Fetch tools — these define the available tool catalog + all_tools = toolset.fetch_tools(actions=["bamboohr_*"]) + print(f"Total BambooHR tools available: {len(all_tools)}") + + # Pass semantic_client to switch from local BM25 to cloud semantic search + utility_tools = all_tools.utility_tools(semantic_client=toolset.semantic_client) + + filter_tool = utility_tools.get_tool("tool_search") + if filter_tool: + # Semantic search understands intent — "fire someone" finds termination tools + result = filter_tool.call(query="onboard a new team member", limit=5, minScore=0.0) + + print("Found relevant tools (semantic search):") + for tool in result.get("tools", []): + print(f" - {tool['name']} (score: {tool['score']:.2f}): {tool['description']}") + + print() + + def example_with_openai(): """Example of using utility tools with OpenAI""" - print("Example 3: Using utility tools with OpenAI\n") + print("Example 4: Using utility tools with OpenAI\n") try: from openai import OpenAI @@ -131,7 +161,7 @@ def example_with_openai(): def example_with_langchain(): """Example of using tools with LangChain""" - print("Example 4: Using tools with LangChain\n") + print("Example 5: Using tools with LangChain\n") try: from langchain.agents import AgentExecutor, create_tool_calling_agent @@ -197,6 +227,7 @@ def main(): # Basic examples that work without external APIs example_utility_tools_basic() example_utility_tools_with_execution() + example_utility_tools_semantic() # Examples that require OpenAI API if os.getenv("OPENAI_API_KEY"): diff --git a/stackone_ai/models.py b/stackone_ai/models.py index 151fdc9..a3f50e4 100644 --- a/stackone_ai/models.py +++ b/stackone_ai/models.py @@ -542,22 +542,6 @@ def get_connectors(self) -> set[str]: """ return {tool.connector for tool in self.tools} - def filter_by_connector(self, connectors: list[str] | set[str]) -> Tools: - """Filter tools by connector names. - - Args: - connectors: List or set of connector names to include (case-insensitive) - - Returns: - New Tools collection containing only tools from specified connectors - - Example: - hr_tools = tools.filter_by_connector(['bamboohr', 'hibob']) - """ - connector_set = {c.lower() for c in connectors} - filtered = [t for t in self.tools if t.connector in connector_set] - return Tools(filtered) - def to_openai(self) -> list[JsonDict]: """Convert all tools to OpenAI function format diff --git a/stackone_ai/semantic_search.py b/stackone_ai/semantic_search.py index 0f46f13..222181d 100644 --- a/stackone_ai/semantic_search.py +++ b/stackone_ai/semantic_search.py @@ -72,14 +72,14 @@ def search( self, query: str, connector: str | None = None, - top_k: int = 10, + top_k: int | None = None, ) -> SemanticSearchResponse: """Search for relevant actions using semantic search. Args: query: Natural language query describing what tools/actions you need connector: Optional connector/provider filter (e.g., "bamboohr", "slack") - top_k: Maximum number of results to return (1-500, default: 10) + top_k: Maximum number of results to return. If not provided, uses the backend default. Returns: SemanticSearchResponse containing matching actions with similarity scores @@ -97,7 +97,9 @@ def search( "Authorization": self._build_auth_header(), "Content-Type": "application/json", } - payload: dict[str, Any] = {"query": query, "top_k": top_k} + payload: dict[str, Any] = {"query": query} + if top_k is not None: + payload["top_k"] = top_k if connector: payload["connector"] = connector @@ -117,7 +119,7 @@ def search_action_names( self, query: str, connector: str | None = None, - top_k: int = 10, + top_k: int | None = None, min_score: float = 0.0, ) -> list[str]: """Convenience method returning just action names. @@ -125,7 +127,7 @@ def search_action_names( Args: query: Natural language query connector: Optional connector/provider filter - top_k: Maximum number of results + top_k: Maximum number of results. If not provided, uses the backend default. min_score: Minimum similarity score threshold (0-1) Returns: diff --git a/stackone_ai/toolset.py b/stackone_ai/toolset.py index c666399..4da1eb2 100644 --- a/stackone_ai/toolset.py +++ b/stackone_ai/toolset.py @@ -430,7 +430,7 @@ def search_action_names( query: str, *, connector: str | None = None, - available_connectors: set[str] | None = None, + account_ids: list[str] | None = None, top_k: int = 10, min_score: float = 0.0, ) -> list[SemanticSearchResult]: @@ -442,26 +442,25 @@ def search_action_names( Args: query: Natural language description of needed functionality connector: Optional provider/connector filter (single connector) - available_connectors: Optional set of connectors to filter results by. - If provided, only returns results for these connectors (over-fetches - from API to ensure enough results after filtering). + account_ids: Optional account IDs to scope results to connectors + available in those accounts (uses set_accounts() if not provided). + When provided, results are filtered to only matching connectors. top_k: Maximum number of results (default: 10) min_score: Minimum similarity score threshold 0-1 (default: 0.0) Returns: List of SemanticSearchResult with action names, scores, and metadata - Example: - # Inspect results before fetching + Examples: + # Lightweight: inspect results before fetching results = toolset.search_action_names("manage employees", top_k=10) for r in results: print(f"{r.action_name}: {r.similarity_score:.2f}") - # Filter by available connectors from linked accounts - tools = toolset.fetch_tools() + # Account-scoped: only results for connectors in linked accounts results = toolset.search_action_names( "create employee", - available_connectors=tools.get_connectors(), + account_ids=["acc-123"], top_k=5 ) @@ -469,6 +468,13 @@ def search_action_names( selected = [r.action_name for r in results if r.similarity_score > 0.7] tools = toolset.fetch_tools(actions=selected) """ + # Resolve available connectors from account_ids (same pattern as search_tools) + available_connectors: set[str] | None = None + effective_account_ids = account_ids or self._account_ids + if effective_account_ids: + all_tools = self.fetch_tools(account_ids=effective_account_ids) + available_connectors = all_tools.get_connectors() + # Fetch max results to maximize results after connector filtering semantic_api_max = 500 fetch_k = semantic_api_max if available_connectors else min(top_k, 500) @@ -482,7 +488,7 @@ def search_action_names( # Filter by min_score results = [r for r in response.results if r.similarity_score >= min_score] - # Filter by available connectors if provided + # Filter by available connectors if resolved from accounts if available_connectors: connector_set = {c.lower() for c in available_connectors} results = [r for r in results if r.connector_key.lower() in connector_set] diff --git a/tests/BENCHMARK_RESULTS.md b/tests/BENCHMARK_RESULTS.md deleted file mode 100644 index 35c9940..0000000 --- a/tests/BENCHMARK_RESULTS.md +++ /dev/null @@ -1,147 +0,0 @@ -# Search Benchmark Results - -## Local BM25+TF-IDF vs Semantic Search - -**Date:** 2025-02-06 -**Dataset:** 94 evaluation tasks across 8 categories -**Corpus:** 5,144 actions from 200+ connectors -**Metric:** Hit@5 (correct action in top 5 results) - -## Summary - -| Method | Hit@5 | MRR | Avg Latency | Hits | -| ----------------- | ---------- | ---------- | ----------- | ------- | -| Local BM25+TF-IDF | 66.0% | 0.538 | 1.2ms | 62/94 | -| Semantic Search | 76.6% | 0.634 | 279.6ms | 72/94 | -| **Improvement** | **+10.6%** | **+0.096** | | **+10** | - -## Detailed Breakdown - -### Semantic Wins (17 tasks) - -Tasks where semantic search finds the correct result but local BM25 fails. -These demonstrate semantic search's ability to understand **intent and synonyms**. - -| Query | Local Top Result | Semantic Top Result | -| --------------------------------- | ------------------------------- | -------------------------------- | -| "fire someone" | workable_get_job_recruiters | factorial_terminate_employee | -| "ping the team" | teamtailor_delete_team | slack_send_message | -| "file a new bug" | github_create_or_update_file | jira_update_issue | -| "ping my colleague" | salesforce_get_my_events | microsoftoutlook_reply_message | -| "fetch staff information" | pinpoint_get_application | workday_list_workers | -| "show me everyone in the company" | humaans_get_me | lattice_talent_list_users | -| "turn down a job seeker" | pinpoint_get_job_seeker | jobadder_reject_requisition | -| "check application status" | dropbox_check_remove_member | jobadder_list_application_status | -| "check my to-do list" | jira_check_bulk_permissions | todoist_list_tasks | -| "start a group chat" | microsoftteams_update_chat | discord_create_group_dm | -| "move candidate forward" | workable_move_candidate | greenhouse_move_application | -| "approve PTO" | ashby_approve_offer | planday_approve_absence_request | -| "update staff record" | bamboohr_update_hour_record | cezannehr_update_employee | -| "pull the org chart" | github_create_issue_comment | lattice_list_review_cycles | -| "assign training to employee" | easyllama_assign_training | hibob_create_training_record | -| "file a bug report" | smartrecruiters_get_report_file | github_create_issue_comment | -| "track customer interaction" | qlik_create_interaction | peoplefluent_track_launch | - -### Local Wins (7 tasks) - -Tasks where BM25 keyword matching outperforms semantic search. - -| Query | Local Top Result | Semantic Top Result | -| ----------------------------------- | -------------------------------------- | ----------------------------- | -| "see who applied for the role" | greenhouse_list_applied_candidate_tags | ashby_add_hiring_team_member | -| "advance someone to the next round" | greenhouse_move_application | factorial_invite_employee | -| "see open positions" | teamtailor_list_jobs | hibob_create_position_opening | -| "close a deal" | zohocrm_get_deal | shopify_close_order | -| "check course completion" | saba_delete_recurring_completion | saba_get_course | -| "update deal and notify team" | zohocrm_get_deal | microsoftteams_update_team | -| "look up customer" | linear_update_customer_need | shopify_search_customers | - -### Both Miss (15 tasks) - -Hard queries that neither method handles well. Many are abbreviations, cross-domain concepts, or have overly strict expected matches. - -| Query | Category | Why Hard | -| --------------------------- | --------- | -------------------------------------------------------------- | -| "onboard a new team member" | hr | "team member" maps to team tools, not HR | -| "OOO" | hr | Abbreviation - neither understands | -| "DM someone" | messaging | Both find discord_create_dm but expected pattern too strict | -| "customer onboarding" | crm | Cross-domain concept | -| "close quarter books" | crm | Domain-specific financial term | -| "PTO request" | hr | Both find PTO tools but expected pattern mismatch | -| "kill the ticket" | project | Both find delete_ticket but expected pattern mismatch | -| "who works in engineering" | hr | Requires department filtering, not just listing | -| "add a new prospect" | crm | Both find prospect tools but connector mismatch | -| "see all shared files" | documents | "shared" narrows scope too much | -| "see available trainings" | lms | Both find training tools but pattern mismatch | -| "track learning progress" | lms | Abstract concept mapping | -| "create team workspace" | messaging | Cross-domain: workspace vs channel | -| "log customer call" | crm | Connector-specific (Salesforce) term | -| "add new lead" | crm | Connector-specific (HubSpot) but returns wrong HubSpot actions | - -## How to Run - -### Local Mode (recommended for development) - -Requires the action_search Lambda running locally: - -```bash -# Terminal 1: Start the local action_search Lambda - -# Terminal 2: Run benchmark -uv run python tests/benchmark_search.py --local -``` - -### Production Mode - -```bash -STACKONE_API_KEY=xxx uv run python tests/benchmark_search.py -``` - -### CLI Options - -``` ---local Use local Lambda instead of production API ---lambda-url URL Custom Lambda URL (default: localhost:4513) ---api-url URL Custom production API URL -``` - -## Methodology - -### Evaluation Tasks - -94 tasks across 8 categories: - -| Category | Tasks | Description | -| ------------------ | ----- | -------------------------------------------- | -| HR/HRIS | 19 | Employee management, time off, org structure | -| Recruiting/ATS | 12 | Candidates, applications, interviews | -| CRM | 12 | Contacts, deals, accounts | -| Project Management | 8 | Tasks, issues, projects | -| Messaging | 5 | Messages, channels, conversations | -| Documents | 5 | Files, folders, drives | -| Marketing | 5 | Campaigns, lists, automation | -| LMS | 5 | Courses, assignments, completions | - -Plus per-connector tests (Slack, Jira, Greenhouse, Salesforce, HubSpot) and edge cases (abbreviations, slang, complex queries). - -### Matching Logic - -- **Hit@5**: At least one expected pattern appears (case-insensitive partial match) in the top 5 results -- **MRR** (Mean Reciprocal Rank): 1/position of first correct result, averaged across all tasks -- **Fair comparison**: Both methods search the same 5,144-action corpus - -### Corpus - -Both local and semantic search operate on the same action catalog: - -- 5,144 unique actions -- 200+ connectors (BambooHR, Greenhouse, Salesforce, Slack, Jira, etc.) -- 7 verticals (HRIS, ATS, CRM, Documents, IAM, LMS, Marketing) - -## Conclusions - -1. **Semantic search improves accuracy by +10.6%** (66.0% -> 76.6% Hit@5) -2. **Semantic excels at intent understanding**: "fire someone" -> terminate, "ping the team" -> send_message -3. **Local BM25 is competitive** when queries contain exact keywords from tool names -4. **15 tasks need better evaluation criteria** - some "misses" are actually correct results with overly strict expected patterns -5. **Latency tradeoff**: Local is ~230x faster (1.2ms vs 280ms) but runs in-memory with pre-built index diff --git a/tests/benchmark_search.py b/tests/benchmark_search.py deleted file mode 100644 index 8779324..0000000 --- a/tests/benchmark_search.py +++ /dev/null @@ -1,1249 +0,0 @@ -""" -Benchmark comparing local BM25+TF-IDF vs semantic search. - -Compares Hit@5 and MRR between local BM25+TF-IDF and semantic search. - -Run with production API: - STACKONE_API_KEY=xxx python tests/benchmark_search.py - -Run with local Lambda: - # First, start the local action_search Lambda - # Then run benchmark: - python tests/benchmark_search.py --local - -Environment Variables: - STACKONE_API_KEY: Required for production mode - LOCAL_LAMBDA_URL: Optional, defaults to http://localhost:4513/2015-03-31/functions/function/invocations -""" - -from __future__ import annotations - -import argparse -import os -import time -from dataclasses import dataclass, field -from typing import Any, Literal, Protocol - -import httpx - -from stackone_ai.semantic_search import SemanticSearchClient, SemanticSearchResponse, SemanticSearchResult -from stackone_ai.utility_tools import ToolIndex - -# Default local Lambda URL -DEFAULT_LOCAL_LAMBDA_URL = "http://localhost:4513/2015-03-31/functions/function/invocations" - - -class SearchClientProtocol(Protocol): - """Protocol for search clients (production or local).""" - - def search( - self, - query: str, - connector: str | None = None, - top_k: int = 10, - ) -> SemanticSearchResponse: ... - - -class LocalLambdaSearchClient: - """Client for local action_search Lambda. - - Usage: - # Start the local action_search Lambda first - client = LocalLambdaSearchClient() - response = client.search("create employee", connector="bamboohr", top_k=5) - """ - - def __init__( - self, - lambda_url: str = DEFAULT_LOCAL_LAMBDA_URL, - timeout: float = 30.0, - ) -> None: - """Initialize the local Lambda client. - - Args: - lambda_url: URL of the local Lambda endpoint - timeout: Request timeout in seconds - """ - self.lambda_url = lambda_url - self.timeout = timeout - - def _invoke(self, event: dict[str, Any]) -> dict[str, Any]: - """Invoke the local Lambda with an event payload.""" - response = httpx.post( - self.lambda_url, - json=event, - headers={"Content-Type": "application/json"}, - timeout=self.timeout, - ) - response.raise_for_status() - return response.json() - - def _parse_results(self, data: dict[str, Any], query: str) -> SemanticSearchResponse: - """Parse Lambda response into SemanticSearchResponse.""" - results = [ - SemanticSearchResult( - action_name=r.get("action_name", ""), - connector_key=r.get("connector_key", ""), - similarity_score=r.get("similarity_score", 0.0), - label=r.get("label", ""), - description=r.get("description", ""), - ) - for r in data.get("results", []) - ] - return SemanticSearchResponse( - results=results, - total_count=data.get("total_count", len(results)), - query=data.get("query", query), - ) - - def search( - self, - query: str, - connector: str | None = None, - top_k: int = 10, - ) -> SemanticSearchResponse: - """Search for relevant actions using local Lambda. - - Args: - query: Natural language query - connector: Optional connector filter - top_k: Maximum number of results - - Returns: - SemanticSearchResponse with matching actions - """ - payload: dict[str, Any] = { - "type": "search", - "payload": {"query": query, "top_k": top_k}, - } - if connector: - payload["payload"]["connector"] = connector - - try: - data = self._invoke(payload) - return self._parse_results(data, query) - except httpx.RequestError as e: - raise RuntimeError(f"Local Lambda request failed: {e}") from e - except Exception as e: - raise RuntimeError(f"Local Lambda search failed: {e}") from e - - def fetch_all_actions(self) -> list[SemanticSearchResult]: - """Fetch a broad set of actions from the Lambda for building local BM25 index. - - Uses multiple broad queries with high top_k to collect the full action catalog. - This avoids needing the /mcp endpoint or STACKONE_API_KEY for benchmarking. - - Returns: - Deduplicated list of all available actions - """ - broad_queries = [ - "employee", - "candidate", - "contact", - "task", - "message", - "file", - "user", - "event", - "campaign", - "course", - "deal", - "account", - "job", - "interview", - "department", - "time off", - "comment", - "project", - "folder", - "role", - ] - - seen: dict[str, SemanticSearchResult] = {} - for query in broad_queries: - try: - data = self._invoke( - { - "type": "search", - "payload": {"query": query, "top_k": 500}, - } - ) - for r in data.get("results", []): - name = r.get("action_name", "") - if name and name not in seen: - seen[name] = SemanticSearchResult( - action_name=name, - connector_key=r.get("connector_key", ""), - similarity_score=r.get("similarity_score", 0.0), - label=r.get("label", ""), - description=r.get("description", ""), - ) - except Exception: - continue - - return list(seen.values()) - - -@dataclass -class LightweightTool: - """Minimal tool representation for BM25 indexing (no API dependency).""" - - name: str - description: str - - -@dataclass -class EvaluationTask: - """Single evaluation task for benchmark.""" - - id: str - query: str - category: str - complexity: Literal["simple", "moderate", "complex"] - expected_matches: list[str] - connector: str | None = None - - -# Semantically-challenging evaluation queries -EVALUATION_TASKS: list[EvaluationTask] = [ - # ============ ALL CONNECTORS - SEMANTIC CHALLENGES ============ - # HR/HRIS - Natural language - EvaluationTask( - id="hr-sem-01", - query="onboard a new team member", - category="hr", - complexity="moderate", - expected_matches=["Create Employee", "Add Employee", "employee"], - ), - EvaluationTask( - id="hr-sem-02", - query="fetch staff information", - category="hr", - complexity="simple", - expected_matches=["Get Employee", "Get Worker", "List Employees", "employee", "worker"], - ), - EvaluationTask( - id="hr-sem-03", - query="request vacation days", - category="hr", - complexity="moderate", - expected_matches=["Create Time Off", "Create Absence", "Time-Off", "absence", "leave"], - ), - EvaluationTask( - id="hr-sem-04", - query="show me everyone in the company", - category="hr", - complexity="simple", - expected_matches=["List Employees", "List Workers", "employees", "workers"], - ), - EvaluationTask( - id="hr-sem-05", - query="change someone's job title", - category="hr", - complexity="moderate", - expected_matches=["Update Employee", "Job Change", "Update Worker", "employee"], - ), - EvaluationTask( - id="hr-sem-06", - query="terminate an employee", - category="hr", - complexity="moderate", - expected_matches=["Delete Employee", "Terminate", "employee"], - ), - EvaluationTask( - id="hr-sem-07", - query="pull the org chart", - category="hr", - complexity="moderate", - expected_matches=["List Departments", "Organization", "hierarchy", "departments"], - ), - EvaluationTask( - id="hr-sem-08", - query="sick day request", - category="hr", - complexity="simple", - expected_matches=["Create Absence", "Time-Off", "Leave", "absence"], - ), - EvaluationTask( - id="hr-sem-09", - query="get employee details", - category="hr", - complexity="simple", - expected_matches=["Get Employee", "employee"], - ), - EvaluationTask( - id="hr-sem-10", - query="update staff record", - category="hr", - complexity="simple", - expected_matches=["Update Employee", "employee"], - ), - EvaluationTask( - id="hr-sem-11", - query="add new hire to the system", - category="hr", - complexity="moderate", - expected_matches=["Create Employee", "employee"], - ), - EvaluationTask( - id="hr-sem-12", - query="who works in engineering", - category="hr", - complexity="moderate", - expected_matches=["List Employees", "employees", "department"], - ), - EvaluationTask( - id="hr-sem-13", - query="view compensation details", - category="hr", - complexity="moderate", - expected_matches=["Get Employee", "compensation", "salary"], - ), - EvaluationTask( - id="hr-sem-14", - query="see all time-off requests", - category="hr", - complexity="simple", - expected_matches=["List Time Off", "List Absences", "time_off", "absence"], - ), - EvaluationTask( - id="hr-sem-15", - query="approve PTO", - category="hr", - complexity="moderate", - expected_matches=["Update Time Off", "Update Absence", "time_off", "absence"], - ), - # Recruiting/ATS - Natural language - EvaluationTask( - id="ats-sem-01", - query="bring in a new applicant", - category="recruiting", - complexity="moderate", - expected_matches=["Create Candidate", "Create Application", "candidate", "application"], - ), - EvaluationTask( - id="ats-sem-02", - query="see who applied for the role", - category="recruiting", - complexity="simple", - expected_matches=["List Candidates", "List Applications", "candidates", "applications"], - ), - EvaluationTask( - id="ats-sem-03", - query="advance someone to the next round", - category="recruiting", - complexity="moderate", - expected_matches=["Move Application", "Update Stage", "stage", "move"], - ), - EvaluationTask( - id="ats-sem-04", - query="turn down a job seeker", - category="recruiting", - complexity="moderate", - expected_matches=["Reject", "Disqualify", "reject", "application"], - ), - EvaluationTask( - id="ats-sem-05", - query="post a new position", - category="recruiting", - complexity="simple", - expected_matches=["Create Job", "Job Posting", "job"], - ), - EvaluationTask( - id="ats-sem-06", - query="schedule an interview", - category="recruiting", - complexity="moderate", - expected_matches=["Create Interview", "Schedule", "interview"], - ), - EvaluationTask( - id="ats-sem-07", - query="view candidate resume", - category="recruiting", - complexity="simple", - expected_matches=["Get Candidate", "candidate", "document"], - ), - EvaluationTask( - id="ats-sem-08", - query="add interview feedback", - category="recruiting", - complexity="moderate", - expected_matches=["Create Scorecard", "scorecard", "feedback"], - ), - EvaluationTask( - id="ats-sem-09", - query="check application status", - category="recruiting", - complexity="simple", - expected_matches=["Get Application", "application"], - ), - EvaluationTask( - id="ats-sem-10", - query="see open positions", - category="recruiting", - complexity="simple", - expected_matches=["List Jobs", "jobs"], - ), - # CRM - Natural language - EvaluationTask( - id="crm-sem-01", - query="add a new prospect", - category="crm", - complexity="simple", - expected_matches=["Create Lead", "Create Contact", "lead", "contact"], - ), - EvaluationTask( - id="crm-sem-02", - query="log a sales opportunity", - category="crm", - complexity="moderate", - expected_matches=["Create Deal", "Create Opportunity", "deal", "opportunity"], - ), - EvaluationTask( - id="crm-sem-03", - query="close a deal", - category="crm", - complexity="moderate", - expected_matches=["Update Deal", "Update Opportunity", "deal", "opportunity"], - ), - EvaluationTask( - id="crm-sem-04", - query="find customer information", - category="crm", - complexity="simple", - expected_matches=["Get Contact", "Get Account", "contact", "account"], - ), - EvaluationTask( - id="crm-sem-05", - query="create a new account", - category="crm", - complexity="simple", - expected_matches=["Create Account", "account"], - ), - EvaluationTask( - id="crm-sem-06", - query="log a sales call", - category="crm", - complexity="moderate", - expected_matches=["Create Activity", "activity", "call"], - ), - EvaluationTask( - id="crm-sem-07", - query="see pipeline deals", - category="crm", - complexity="simple", - expected_matches=["List Deals", "List Opportunities", "deals", "opportunities"], - ), - EvaluationTask( - id="crm-sem-08", - query="update contact info", - category="crm", - complexity="simple", - expected_matches=["Update Contact", "contact"], - ), - EvaluationTask( - id="crm-sem-09", - query="track customer interaction", - category="crm", - complexity="moderate", - expected_matches=["Create Activity", "activity"], - ), - EvaluationTask( - id="crm-sem-10", - query="view all contacts", - category="crm", - complexity="simple", - expected_matches=["List Contacts", "contacts"], - ), - # Project Management - Natural language - EvaluationTask( - id="pm-sem-01", - query="assign work to someone", - category="project", - complexity="simple", - expected_matches=["Create Task", "Create Issue", "Assign", "task", "issue"], - ), - EvaluationTask( - id="pm-sem-02", - query="check my to-do list", - category="project", - complexity="simple", - expected_matches=["List Tasks", "List Issues", "tasks", "issues"], - ), - EvaluationTask( - id="pm-sem-03", - query="file a bug report", - category="project", - complexity="moderate", - expected_matches=["Create Issue", "Create Task", "issue"], - ), - EvaluationTask( - id="pm-sem-04", - query="mark task as done", - category="project", - complexity="simple", - expected_matches=["Update Task", "Update Issue", "task", "issue"], - ), - EvaluationTask( - id="pm-sem-05", - query="create a new project", - category="project", - complexity="simple", - expected_matches=["Create Project", "project"], - ), - EvaluationTask( - id="pm-sem-06", - query="view project status", - category="project", - complexity="simple", - expected_matches=["Get Project", "project"], - ), - EvaluationTask( - id="pm-sem-07", - query="add a comment to ticket", - category="project", - complexity="moderate", - expected_matches=["Create Comment", "comment"], - ), - EvaluationTask( - id="pm-sem-08", - query="see sprint backlog", - category="project", - complexity="moderate", - expected_matches=["List Tasks", "List Issues", "tasks", "issues"], - ), - # Messaging - Natural language - EvaluationTask( - id="msg-sem-01", - query="ping my colleague", - category="messaging", - complexity="simple", - expected_matches=["Send Message", "message"], - ), - EvaluationTask( - id="msg-sem-02", - query="start a group chat", - category="messaging", - complexity="moderate", - expected_matches=["Create Conversation", "Create Channel", "conversation", "channel"], - ), - EvaluationTask( - id="msg-sem-03", - query="post in the team channel", - category="messaging", - complexity="simple", - expected_matches=["Send Message", "message", "channel"], - ), - EvaluationTask( - id="msg-sem-04", - query="see recent messages", - category="messaging", - complexity="simple", - expected_matches=["List Messages", "messages"], - ), - EvaluationTask( - id="msg-sem-05", - query="create a new channel", - category="messaging", - complexity="simple", - expected_matches=["Create Channel", "channel"], - ), - # Documents - Natural language - EvaluationTask( - id="doc-sem-01", - query="upload a file", - category="documents", - complexity="simple", - expected_matches=["Upload File", "Create File", "file", "upload"], - ), - EvaluationTask( - id="doc-sem-02", - query="download the document", - category="documents", - complexity="simple", - expected_matches=["Download File", "Get File", "file", "download"], - ), - EvaluationTask( - id="doc-sem-03", - query="see all shared files", - category="documents", - complexity="simple", - expected_matches=["List Files", "files"], - ), - EvaluationTask( - id="doc-sem-04", - query="create a new folder", - category="documents", - complexity="simple", - expected_matches=["Create Folder", "folder"], - ), - EvaluationTask( - id="doc-sem-05", - query="share document with team", - category="documents", - complexity="moderate", - expected_matches=["Share File", "Update File", "file", "share"], - ), - # Marketing - Natural language - EvaluationTask( - id="mkt-sem-01", - query="create email campaign", - category="marketing", - complexity="moderate", - expected_matches=["Create Campaign", "campaign", "email"], - ), - EvaluationTask( - id="mkt-sem-02", - query="add contact to mailing list", - category="marketing", - complexity="simple", - expected_matches=["Add Member", "Create Contact", "contact", "list"], - ), - EvaluationTask( - id="mkt-sem-03", - query="send newsletter", - category="marketing", - complexity="moderate", - expected_matches=["Send Campaign", "campaign", "email"], - ), - EvaluationTask( - id="mkt-sem-04", - query="view campaign analytics", - category="marketing", - complexity="moderate", - expected_matches=["Get Campaign", "campaign", "analytics"], - ), - EvaluationTask( - id="mkt-sem-05", - query="create automation workflow", - category="marketing", - complexity="complex", - expected_matches=["Create Automation", "automation", "workflow"], - ), - # LMS - Natural language - EvaluationTask( - id="lms-sem-01", - query="assign training to employee", - category="lms", - complexity="moderate", - expected_matches=["Create Assignment", "Assign Content", "assignment", "content"], - ), - EvaluationTask( - id="lms-sem-02", - query="check course completion", - category="lms", - complexity="simple", - expected_matches=["Get Completion", "completion", "progress"], - ), - EvaluationTask( - id="lms-sem-03", - query="create new course", - category="lms", - complexity="moderate", - expected_matches=["Create Content", "content", "course"], - ), - EvaluationTask( - id="lms-sem-04", - query="see available trainings", - category="lms", - complexity="simple", - expected_matches=["List Content", "content", "courses"], - ), - EvaluationTask( - id="lms-sem-05", - query="track learning progress", - category="lms", - complexity="moderate", - expected_matches=["Get Completion", "List Completions", "completion"], - ), - # Per-connector examples - EvaluationTask( - id="bamboo-sem-01", - query="bring on a new hire", - category="hr", - complexity="moderate", - connector="bamboohr", - expected_matches=["Create Employee", "employee"], - ), - EvaluationTask( - id="bamboo-sem-02", - query="get employee time off balance", - category="hr", - complexity="simple", - connector="bamboohr", - expected_matches=["Get Time Off", "time_off", "balance"], - ), - EvaluationTask( - id="slack-sem-01", - query="ping the team", - category="messaging", - complexity="simple", - connector="slack", - expected_matches=["Send Message", "message"], - ), - EvaluationTask( - id="slack-sem-02", - query="create team workspace", - category="messaging", - complexity="moderate", - connector="slack", - expected_matches=["Create Channel", "channel"], - ), - EvaluationTask( - id="jira-sem-01", - query="file a new bug", - category="project", - complexity="simple", - connector="jira", - expected_matches=["Create Issue", "issue"], - ), - EvaluationTask( - id="jira-sem-02", - query="view sprint tasks", - category="project", - complexity="simple", - connector="jira", - expected_matches=["List Issues", "issues"], - ), - EvaluationTask( - id="greenhouse-sem-01", - query="add new job posting", - category="recruiting", - complexity="simple", - connector="greenhouse", - expected_matches=["Create Job", "job"], - ), - EvaluationTask( - id="greenhouse-sem-02", - query="move candidate forward", - category="recruiting", - complexity="moderate", - connector="greenhouse", - expected_matches=["Move Application", "Update Application", "application"], - ), - EvaluationTask( - id="salesforce-sem-01", - query="create sales opportunity", - category="crm", - complexity="simple", - connector="salesforce", - expected_matches=["Create Opportunity", "opportunity"], - ), - EvaluationTask( - id="salesforce-sem-02", - query="log customer call", - category="crm", - complexity="moderate", - connector="salesforce", - expected_matches=["Create Activity", "activity"], - ), - EvaluationTask( - id="hubspot-sem-01", - query="add new lead", - category="crm", - complexity="simple", - connector="hubspot", - expected_matches=["Create Contact", "contact"], - ), - EvaluationTask( - id="hubspot-sem-02", - query="track deal progress", - category="crm", - complexity="moderate", - connector="hubspot", - expected_matches=["Get Deal", "Update Deal", "deal"], - ), - # Complex multi-step queries - EvaluationTask( - id="complex-01", - query="set up new employee with all required training", - category="hr", - complexity="complex", - expected_matches=["Create Employee", "Create Assignment", "employee", "assignment"], - ), - EvaluationTask( - id="complex-02", - query="process job application and schedule interview", - category="recruiting", - complexity="complex", - expected_matches=["Create Application", "Create Interview", "application", "interview"], - ), - EvaluationTask( - id="complex-03", - query="update deal and notify team", - category="crm", - complexity="complex", - expected_matches=["Update Deal", "Send Message", "deal", "message"], - ), - EvaluationTask( - id="complex-04", - query="create project and assign initial tasks", - category="project", - complexity="complex", - expected_matches=["Create Project", "Create Task", "project", "task"], - ), - # Edge cases - Abbreviations and slang - EvaluationTask( - id="edge-01", - query="PTO request", - category="hr", - complexity="simple", - expected_matches=["Create Time Off", "time_off", "absence"], - ), - EvaluationTask( - id="edge-02", - query="1:1 meeting", - category="hr", - complexity="moderate", - expected_matches=["Create Event", "Create Meeting", "meeting"], - ), - EvaluationTask( - id="edge-03", - query="OOO", - category="hr", - complexity="simple", - expected_matches=["Time Off", "Absence", "time_off", "absence"], - ), - EvaluationTask( - id="edge-04", - query="ASAP task", - category="project", - complexity="simple", - expected_matches=["Create Task", "task"], - ), - EvaluationTask( - id="edge-05", - query="DM someone", - category="messaging", - complexity="simple", - expected_matches=["Send Message", "message"], - ), - # Synonyms and alternative phrases - EvaluationTask( - id="syn-01", - query="fire someone", - category="hr", - complexity="moderate", - expected_matches=["Delete Employee", "Terminate", "employee"], - ), - EvaluationTask( - id="syn-02", - query="look up customer", - category="crm", - complexity="simple", - expected_matches=["Get Contact", "Get Account", "contact", "account"], - ), - EvaluationTask( - id="syn-03", - query="grab the file", - category="documents", - complexity="simple", - expected_matches=["Download File", "Get File", "file"], - ), - EvaluationTask( - id="syn-04", - query="sign up new user", - category="hr", - complexity="moderate", - expected_matches=["Create Employee", "Create User", "employee", "user"], - ), - EvaluationTask( - id="syn-05", - query="kill the ticket", - category="project", - complexity="moderate", - expected_matches=["Delete Issue", "Update Issue", "Close Issue", "issue"], - ), - # Business context queries - EvaluationTask( - id="biz-01", - query="run payroll", - category="hr", - complexity="complex", - expected_matches=["payroll", "compensation"], - ), - EvaluationTask( - id="biz-02", - query="close quarter books", - category="crm", - complexity="complex", - expected_matches=["Update Deal", "deal", "opportunity"], - ), - EvaluationTask( - id="biz-03", - query="annual review", - category="hr", - complexity="moderate", - expected_matches=["Review", "Performance", "employee"], - ), - EvaluationTask( - id="biz-04", - query="sprint planning", - category="project", - complexity="moderate", - expected_matches=["Create Task", "List Tasks", "task", "issue"], - ), - EvaluationTask( - id="biz-05", - query="customer onboarding", - category="crm", - complexity="complex", - expected_matches=["Create Account", "Create Contact", "account", "contact"], - ), -] - - -@dataclass -class TaskResult: - """Result of evaluating a single task.""" - - task_id: str - query: str - hit: bool - rank: int | None # Position of first match, None if not found - top_results: list[str] - latency_ms: float - - -@dataclass -class BenchmarkResult: - """Aggregated results from running benchmark.""" - - method: str - hit_at_k: float - mean_reciprocal_rank: float - avg_latency_ms: float - total_tasks: int - hits: int - results: list[TaskResult] = field(default_factory=list) - - -@dataclass -class ComparisonReport: - """Comparison between local and semantic search.""" - - local_results: BenchmarkResult - semantic_results: BenchmarkResult - - @property - def improvement(self) -> float: - """Percentage point improvement in Hit@k.""" - return self.semantic_results.hit_at_k - self.local_results.hit_at_k - - -def check_hit(result_names: list[str], expected_matches: list[str]) -> tuple[bool, int | None]: - """Check if any expected match appears in results (case-insensitive partial match).""" - for i, name in enumerate(result_names): - name_lower = name.lower() - for expected in expected_matches: - if expected.lower() in name_lower: - return True, i + 1 - return False, None - - -class SearchBenchmark: - """Benchmark comparing local vs semantic search.""" - - def __init__( - self, - tools: list, - semantic_client: SearchClientProtocol, - ): - """Initialize benchmark with tools and search client. - - Args: - tools: List of tool objects (StackOneTool or LightweightTool) with name + description - semantic_client: Client for semantic search (production or local) - """ - self.tools = tools - # ToolIndex uses duck typing - only needs .name and .description - self.local_index = ToolIndex(tools) # type: ignore[arg-type] - self.semantic_client = semantic_client - - def evaluate_local( - self, - tasks: list[EvaluationTask], - k: int = 5, - ) -> BenchmarkResult: - """Run local BM25+TF-IDF search against benchmark tasks. - - Args: - tasks: List of evaluation tasks - k: Number of top results to consider (default: 5) - - Returns: - BenchmarkResult with aggregated metrics - """ - results: list[TaskResult] = [] - total_rr = 0.0 - - for task in tasks: - start = time.perf_counter() - search_results = self.local_index.search(task.query, limit=k) - latency = (time.perf_counter() - start) * 1000 - - result_names = [r.name for r in search_results] - hit, rank = check_hit(result_names, task.expected_matches) - - if hit and rank: - total_rr += 1.0 / rank - - results.append( - TaskResult( - task_id=task.id, - query=task.query, - hit=hit, - rank=rank, - top_results=result_names[:k], - latency_ms=latency, - ) - ) - - hits = sum(1 for r in results if r.hit) - return BenchmarkResult( - method="Local BM25+TF-IDF", - hit_at_k=hits / len(tasks) if tasks else 0, - mean_reciprocal_rank=total_rr / len(tasks) if tasks else 0, - avg_latency_ms=sum(r.latency_ms for r in results) / len(results) if results else 0, - total_tasks=len(tasks), - hits=hits, - results=results, - ) - - def evaluate_semantic( - self, - tasks: list[EvaluationTask], - k: int = 5, - ) -> BenchmarkResult: - """Run semantic search against benchmark tasks. - - Args: - tasks: List of evaluation tasks - k: Number of top results to consider (default: 5) - - Returns: - BenchmarkResult with aggregated metrics - """ - results: list[TaskResult] = [] - total_rr = 0.0 - - for task in tasks: - start = time.perf_counter() - response = self.semantic_client.search( - query=task.query, - connector=task.connector, - top_k=k, - ) - latency = (time.perf_counter() - start) * 1000 - - result_names = [r.action_name for r in response.results] - hit, rank = check_hit(result_names, task.expected_matches) - - if hit and rank: - total_rr += 1.0 / rank - - results.append( - TaskResult( - task_id=task.id, - query=task.query, - hit=hit, - rank=rank, - top_results=result_names[:k], - latency_ms=latency, - ) - ) - - hits = sum(1 for r in results if r.hit) - return BenchmarkResult( - method="Semantic Search", - hit_at_k=hits / len(tasks) if tasks else 0, - mean_reciprocal_rank=total_rr / len(tasks) if tasks else 0, - avg_latency_ms=sum(r.latency_ms for r in results) / len(results) if results else 0, - total_tasks=len(tasks), - hits=hits, - results=results, - ) - - def compare(self, tasks: list[EvaluationTask] | None = None, k: int = 5) -> ComparisonReport: - """Compare both methods and generate report. - - Args: - tasks: List of evaluation tasks (defaults to EVALUATION_TASKS) - k: Number of top results to consider (default: 5) - - Returns: - ComparisonReport with results from both methods - """ - tasks = tasks or EVALUATION_TASKS - local = self.evaluate_local(tasks, k) - semantic = self.evaluate_semantic(tasks, k) - return ComparisonReport(local_results=local, semantic_results=semantic) - - -def print_report(report: ComparisonReport) -> None: - """Print a formatted comparison report.""" - print("\n" + "=" * 70) - print("SEARCH BENCHMARK COMPARISON") - print("=" * 70) - - print(f"\n{'Method':<25} {'Hit@5':<12} {'MRR':<12} {'Latency':<12} {'Hits':<10}") - print("-" * 70) - - for r in [report.local_results, report.semantic_results]: - print( - f"{r.method:<25} {r.hit_at_k:>10.1%} {r.mean_reciprocal_rank:>10.3f} " - f"{r.avg_latency_ms:>9.1f}ms {r.hits:>4}/{r.total_tasks}" - ) - - print("-" * 70) - print(f"{'Improvement':<25} {report.improvement:>+10.1%}") - print("=" * 70) - - # Build lookup maps - local_by_id = {r.task_id: r for r in report.local_results.results} - semantic_by_id = {r.task_id: r for r in report.semantic_results.results} - - failed_local = [r for r in report.local_results.results if not r.hit] - failed_semantic = [r for r in report.semantic_results.results if not r.hit] - - # Tasks semantic gets right but local misses (the value semantic adds) - semantic_wins = [r for r in failed_local if semantic_by_id.get(r.task_id, r).hit] - # Tasks local gets right but semantic misses - local_wins = [r for r in failed_semantic if local_by_id.get(r.task_id, r).hit] - # Tasks both miss - both_miss = [r for r in failed_local if not semantic_by_id.get(r.task_id, r).hit] - - print(f"\n{'SEMANTIC WINS':} ({len(semantic_wins)} tasks - semantic gets right, local misses):") - for r in semantic_wins: - sr = semantic_by_id[r.task_id] - print(f" - {r.task_id}: '{r.query}'") - print(f" Local got: {r.top_results[:3]}") - print(f" Semantic got: {sr.top_results[:3]}") - - if local_wins: - print(f"\n{'LOCAL WINS':} ({len(local_wins)} tasks - local gets right, semantic misses):") - for r in local_wins: - lr = local_by_id[r.task_id] - print(f" - {r.task_id}: '{r.query}'") - print(f" Local got: {lr.top_results[:3]}") - print(f" Semantic got: {r.top_results[:3]}") - - print(f"\n{'BOTH MISS':} ({len(both_miss)} tasks):") - for r in both_miss: - sr = semantic_by_id[r.task_id] - print(f" - {r.task_id}: '{r.query}'") - print(f" Local got: {r.top_results[:3]}") - print(f" Semantic got: {sr.top_results[:3]}") - - -def run_benchmark( - api_key: str | None = None, - base_url: str = "https://api.stackone.com", - use_local: bool = False, - local_lambda_url: str = DEFAULT_LOCAL_LAMBDA_URL, -) -> ComparisonReport: - """Run the full benchmark comparison. - - Args: - api_key: StackOne API key (uses STACKONE_API_KEY env var if not provided) - base_url: Base URL for production API requests - use_local: If True, use local Lambda instead of production API - local_lambda_url: URL of local Lambda endpoint - - Returns: - ComparisonReport with results - - Raises: - ValueError: If no API key is available (production mode only) - """ - # Create semantic search client and load tools based on mode - if use_local: - print(f"Using LOCAL Lambda at: {local_lambda_url}") - local_client = LocalLambdaSearchClient(lambda_url=local_lambda_url) - semantic_client: SearchClientProtocol = local_client - - # Fetch tool catalog from the Lambda itself (no /mcp or API key needed) - print("Fetching action catalog from local Lambda...") - actions = local_client.fetch_all_actions() - tools = [LightweightTool(name=a.action_name, description=a.description) for a in actions] - print(f"Loaded {len(tools)} actions from Lambda") - else: - api_key = api_key or os.environ.get("STACKONE_API_KEY") - if not api_key: - raise ValueError("API key must be provided or set via STACKONE_API_KEY environment variable") - print(f"Using PRODUCTION API at: {base_url}") - semantic_client = SemanticSearchClient(api_key=api_key, base_url=base_url) - - from stackone_ai import StackOneToolSet - - print("Initializing toolset...") - toolset = StackOneToolSet(api_key=api_key, base_url=base_url) - - print("Fetching tools (this may take a moment)...") - tools = list(toolset.fetch_tools()) - print(f"Loaded {len(tools)} tools") - - print(f"\nRunning benchmark with {len(EVALUATION_TASKS)} evaluation tasks...") - benchmark = SearchBenchmark(tools, semantic_client=semantic_client) - - report = benchmark.compare() - print_report(report) - - return report - - -def main() -> None: - """Main entry point with CLI argument parsing.""" - parser = argparse.ArgumentParser( - description="Benchmark comparing local BM25+TF-IDF vs semantic search", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=""" -Examples: - # Run with production API - STACKONE_API_KEY=xxx python tests/benchmark_search.py - - # Run with local Lambda (start the local action_search Lambda first) - python tests/benchmark_search.py --local - - # Run with custom local Lambda URL - python tests/benchmark_search.py --local --lambda-url http://localhost:9000/invoke - """, - ) - parser.add_argument( - "--local", - action="store_true", - help="Use local Lambda instead of production API", - ) - parser.add_argument( - "--lambda-url", - default=DEFAULT_LOCAL_LAMBDA_URL, - help=f"Local Lambda URL (default: {DEFAULT_LOCAL_LAMBDA_URL})", - ) - parser.add_argument( - "--api-url", - default="https://api.stackone.com", - help="Production API base URL", - ) - - args = parser.parse_args() - - try: - run_benchmark( - base_url=args.api_url, - use_local=args.local, - local_lambda_url=args.lambda_url, - ) - except ValueError as e: - print(f"Error: {e}") - print("Set STACKONE_API_KEY environment variable or use --local flag") - exit(1) - except Exception as e: - print(f"Benchmark failed: {e}") - import traceback - - traceback.print_exc() - exit(1) - - -if __name__ == "__main__": - main() diff --git a/tests/test_semantic_search.py b/tests/test_semantic_search.py index f5ae35d..1404fd2 100644 --- a/tests/test_semantic_search.py +++ b/tests/test_semantic_search.py @@ -642,7 +642,7 @@ def test_connector_with_single_word_name(self) -> None: class TestToolsConnectorHelpers: - """Tests for Tools.get_connectors() and filter_by_connector().""" + """Tests for Tools.get_connectors().""" def test_get_connectors(self) -> None: """Test getting unique connectors from tools collection.""" @@ -676,79 +676,16 @@ def test_get_connectors_empty(self) -> None: tools = Tools([]) assert tools.get_connectors() == set() - def test_filter_by_connector(self) -> None: - """Test filtering tools by connector.""" - from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters, Tools - - def make_tool(name: str) -> StackOneTool: - return StackOneTool( - description=f"Tool {name}", - parameters=ToolParameters(type="object", properties={}), - _execute_config=ExecuteConfig(name=name, method="POST", url="", headers={}), - _api_key="test-key", - ) - - tools = Tools( - [ - make_tool("bamboohr_create_employee"), - make_tool("bamboohr_list_employees"), - make_tool("hibob_create_employee"), - make_tool("slack_send_message"), - ] - ) - - # Filter by single connector - bamboo_tools = tools.filter_by_connector(["bamboohr"]) - assert len(bamboo_tools) == 2 - assert all(t.connector == "bamboohr" for t in bamboo_tools) - - # Filter by multiple connectors - hr_tools = tools.filter_by_connector(["bamboohr", "hibob"]) - assert len(hr_tools) == 3 - assert all(t.connector in {"bamboohr", "hibob"} for t in hr_tools) - - def test_filter_by_connector_case_insensitive(self) -> None: - """Test that filter_by_connector is case-insensitive.""" - from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters, Tools - - tool = StackOneTool( - description="Creates employee", - parameters=ToolParameters(type="object", properties={}), - _execute_config=ExecuteConfig(name="bamboohr_create_employee", method="POST", url="", headers={}), - _api_key="test-key", - ) - tools = Tools([tool]) - - # Should match regardless of case - assert len(tools.filter_by_connector(["BambooHR"])) == 1 - assert len(tools.filter_by_connector(["BAMBOOHR"])) == 1 - assert len(tools.filter_by_connector(["bamboohr"])) == 1 - - def test_filter_by_connector_returns_new_tools(self) -> None: - """Test that filter_by_connector returns a new Tools instance.""" - from stackone_ai.models import ExecuteConfig, StackOneTool, ToolParameters, Tools - - tool = StackOneTool( - description="Creates employee", - parameters=ToolParameters(type="object", properties={}), - _execute_config=ExecuteConfig(name="bamboohr_create_employee", method="POST", url="", headers={}), - _api_key="test-key", - ) - tools = Tools([tool]) - - filtered = tools.filter_by_connector(["bamboohr"]) - - assert filtered is not tools - assert isinstance(filtered, Tools) - -class TestSearchActionNamesWithAvailableConnectors: - """Tests for search_action_names with available_connectors parameter.""" +class TestSearchActionNamesWithAccountIds: + """Tests for search_action_names with account_ids parameter.""" @patch.object(SemanticSearchClient, "search") - def test_filters_by_available_connectors(self, mock_search: MagicMock) -> None: - """Test that results are filtered by available connectors.""" + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_filters_by_account_connectors(self, mock_fetch: MagicMock, mock_search: MagicMock) -> None: + """Test that results are filtered to connectors available in linked accounts.""" from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition mock_search.return_value = SemanticSearchResponse( results=[ @@ -778,14 +715,28 @@ def test_filters_by_available_connectors(self, mock_search: MagicMock) -> None: query="create employee", ) + # Mock MCP to return only bamboohr and hibob tools (user's linked accounts) + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates employee", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="hibob_create_employee", + description="Creates employee", + input_schema={"type": "object", "properties": {}}, + ), + ] + toolset = StackOneToolSet(api_key="test-key") results = toolset.search_action_names( "create employee", - available_connectors={"bamboohr", "hibob"}, + account_ids=["acc-123"], top_k=10, ) - # workday should be filtered out + # workday should be filtered out (not in linked accounts) assert len(results) == 2 action_names = [r.action_name for r in results] assert "bamboohr_create_employee" in action_names @@ -793,9 +744,13 @@ def test_filters_by_available_connectors(self, mock_search: MagicMock) -> None: assert "workday_create_worker" not in action_names @patch.object(SemanticSearchClient, "search") - def test_fetches_max_then_falls_back_per_connector(self, mock_search: MagicMock) -> None: + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_fetches_max_then_falls_back_per_connector( + self, mock_fetch: MagicMock, mock_search: MagicMock + ) -> None: """Test that API fetches max results first, then per-connector if not enough.""" from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition mock_search.return_value = SemanticSearchResponse( results=[], @@ -803,10 +758,19 @@ def test_fetches_max_then_falls_back_per_connector(self, mock_search: MagicMock) query="test", ) + # Mock MCP to return a bamboohr tool + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_list_employees", + description="Lists employees", + input_schema={"type": "object", "properties": {}}, + ), + ] + toolset = StackOneToolSet(api_key="test-key") toolset.search_action_names( "test", - available_connectors={"bamboohr"}, + account_ids=["acc-123"], top_k=5, ) @@ -821,9 +785,11 @@ def test_fetches_max_then_falls_back_per_connector(self, mock_search: MagicMock) assert second_call["top_k"] == 5 @patch.object(SemanticSearchClient, "search") - def test_respects_top_k_after_filtering(self, mock_search: MagicMock) -> None: + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_respects_top_k_after_filtering(self, mock_fetch: MagicMock, mock_search: MagicMock) -> None: """Test that results are limited to top_k after filtering.""" from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition # Return more results than top_k mock_search.return_value = SemanticSearchResponse( @@ -841,10 +807,19 @@ def test_respects_top_k_after_filtering(self, mock_search: MagicMock) -> None: query="test", ) + # Mock MCP to return bamboohr tools + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_action_0", + description="Action 0", + input_schema={"type": "object", "properties": {}}, + ), + ] + toolset = StackOneToolSet(api_key="test-key") results = toolset.search_action_names( "test", - available_connectors={"bamboohr"}, + account_ids=["acc-123"], top_k=3, ) From 2ae1e777dd7f23029ebe58c97c852aa584464323 Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Mon, 9 Feb 2026 20:26:33 +0000 Subject: [PATCH 17/25] update the README gst --- README.md | 3 +-- examples/utility_tools_example.py | 2 +- stackone_ai/toolset.py | 2 ++ 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5c00029..4fa5050 100644 --- a/README.md +++ b/README.md @@ -328,7 +328,7 @@ result = execute_tool.call(toolName="hris_list_employees", params={"limit": 10}) ## Semantic Search -Semantic search enables tool discovery using natural language instead of exact keyword matching. It understands intent and synonyms, so queries like "fire someone" or "check my to-do list" resolve to the right StackOne actions. +Semantic search enables tool discovery using natural language instead of exact keyword matching. It understands intent and synonyms, so queries like "onboard new hire" or "check my to-do list" resolve to the right StackOne actions. **How it works:** Your query is matched against all StackOne actions using semantic vector search. Results are automatically filtered to only the connectors available in your linked accounts, so you only get tools you can actually use. @@ -345,7 +345,6 @@ toolset = StackOneToolSet() tools = toolset.search_tools("manage employee records", top_k=5) # Use with any framework -openai_tools = tools.to_openai() langchain_tools = tools.to_langchain() # Filter by connector diff --git a/examples/utility_tools_example.py b/examples/utility_tools_example.py index 143ef2a..0d78b80 100644 --- a/examples/utility_tools_example.py +++ b/examples/utility_tools_example.py @@ -100,7 +100,7 @@ def example_utility_tools_semantic(): filter_tool = utility_tools.get_tool("tool_search") if filter_tool: - # Semantic search understands intent — "fire someone" finds termination tools + # Semantic search understands intent — "onboard new hire" finds termination tools result = filter_tool.call(query="onboard a new team member", limit=5, minScore=0.0) print("Found relevant tools (semantic search):") diff --git a/stackone_ai/toolset.py b/stackone_ai/toolset.py index 4da1eb2..65f517b 100644 --- a/stackone_ai/toolset.py +++ b/stackone_ai/toolset.py @@ -474,6 +474,8 @@ def search_action_names( if effective_account_ids: all_tools = self.fetch_tools(account_ids=effective_account_ids) available_connectors = all_tools.get_connectors() + if not available_connectors: + return [] # Fetch max results to maximize results after connector filtering semantic_api_max = 500 From e1fb3dda01ac58f89d120c64a79a6f2a65e4b175 Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Tue, 10 Feb 2026 09:38:40 +0000 Subject: [PATCH 18/25] Note on the fetch tools for actions that user expect to discover --- README.md | 2 ++ examples/utility_tools_example.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4fa5050..43adeca 100644 --- a/README.md +++ b/README.md @@ -373,6 +373,8 @@ search_tool = utility.get_tool("tool_search") results = search_tool.call(query="onboard a new team member", limit=5) ``` +> `tool_search` queries the full backend catalog, so make sure `fetch_tools()` covers the actions you expect to discover. + See [Semantic Search Example](examples/semantic_search_example.py) for complete patterns including OpenAI and LangChain integration. ## Examples diff --git a/examples/utility_tools_example.py b/examples/utility_tools_example.py index 0d78b80..7a21bbe 100644 --- a/examples/utility_tools_example.py +++ b/examples/utility_tools_example.py @@ -100,7 +100,7 @@ def example_utility_tools_semantic(): filter_tool = utility_tools.get_tool("tool_search") if filter_tool: - # Semantic search understands intent — "onboard new hire" finds termination tools + # Semantic search understands intent — "onboard new hire" finds onboarding tools result = filter_tool.call(query="onboard a new team member", limit=5, minScore=0.0) print("Found relevant tools (semantic search):") From 4e1947921b2828771759f0e7bfdeb04277f9dfe8 Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Tue, 10 Feb 2026 18:09:54 +0000 Subject: [PATCH 19/25] Update examples and improve the semantic seach --- examples/semantic_search_example.py | 275 +++++++++++++++++++--------- stackone_ai/toolset.py | 82 ++++++--- stackone_ai/utility_tools.py | 24 ++- tests/test_semantic_search.py | 267 +++++++++++++++++++++++++-- 4 files changed, 510 insertions(+), 138 deletions(-) diff --git a/examples/semantic_search_example.py b/examples/semantic_search_example.py index 45bd38c..a8504e1 100644 --- a/examples/semantic_search_example.py +++ b/examples/semantic_search_example.py @@ -3,9 +3,13 @@ Example demonstrating semantic search for AI-powered tool discovery. Semantic search understands natural language intent and synonyms, so queries like -"fire someone" or "check my to-do list" resolve to the right StackOne actions — +"book a meeting" or "cancel an event" resolve to the right StackOne actions — unlike keyword matching which requires exact tool names. +This example uses a Calendly-linked account to demonstrate how semantic search +discovers scheduling, event, and organization management tools from natural +language queries. + This example is runnable with the following command: ```bash uv run examples/semantic_search_example.py @@ -13,9 +17,13 @@ Prerequisites: - STACKONE_API_KEY environment variable set -- At least one linked account in StackOne +- STACKONE_ACCOUNT_ID environment variable set (required for examples that fetch tools) +- At least one linked account in StackOne (this example uses Calendly) + +Note: search_action_names() works with just STACKONE_API_KEY — no account ID needed. """ +import logging import os from dotenv import load_dotenv @@ -24,152 +32,214 @@ load_dotenv() +# Show SDK warnings (e.g., semantic search fallback to local search) +logging.basicConfig(level=logging.WARNING) -def example_search_tools(): - """High-level semantic search returning a Tools collection. +# Read account IDs from environment — supports comma-separated values +_account_ids = [aid.strip() for aid in os.getenv("STACKONE_ACCOUNT_ID", "").split(",") if aid.strip()] - search_tools() is the recommended way to use semantic search. It: - 1. Fetches all available tools from your linked accounts - 2. Queries the semantic search API with your natural language query - 3. Filters results to only connectors available in your accounts - 4. Returns a Tools collection ready for any framework (.to_openai(), .to_langchain(), etc.) + +def example_search_action_names(): + """Lightweight search returning action names and scores without fetching tools. + + search_action_names() queries the semantic search API directly — it does NOT + need account IDs or MCP. This makes it the simplest way to try semantic search. + + When called without account_ids, results come from the full StackOne catalog + (all connectors). When called with account_ids, results are filtered to only + connectors available in your linked accounts. """ - print("Example 1: search_tools() — high-level semantic search\n") + print("=" * 60) + print("Example 1: search_action_names() — lightweight discovery") + print("=" * 60) + print() + print("This searches the StackOne action catalog using semantic vectors.") + print("No account ID needed — results come from all available connectors.") + print() toolset = StackOneToolSet() - # Search using natural language — no need to know exact tool names - tools = toolset.search_tools( - "manage employee records", - top_k=5, - min_score=0.3, - ) + query = "get user schedule" + print(f'Searching for: "{query}"') + print() - print(f"Found {len(tools)} matching tools:") - for tool in tools: - print(f" - {tool.name}: {tool.description[:80]}...") + results = toolset.search_action_names(query, top_k=5) - # The result is a standard Tools collection — convert to any framework format - openai_tools = tools.to_openai() - print(f"\nConverted to {len(openai_tools)} OpenAI function definitions") + print(f"Top {len(results)} matches from the full catalog:") + for r in results: + print(f" [{r.similarity_score:.2f}] {r.action_name} ({r.connector_key})") + print(f" {r.description}") + print() + + # Show filtering effect when account_ids are available + if _account_ids: + print(f"Now filtering to your linked accounts ({', '.join(_account_ids)})...") + filtered = toolset.search_action_names(query, account_ids=_account_ids, top_k=5) + print(f"Filtered to {len(filtered)} matches (only your connectors):") + for r in filtered: + print(f" [{r.similarity_score:.2f}] {r.action_name} ({r.connector_key})") + else: + print("Tip: Set STACKONE_ACCOUNT_ID to see results filtered to your linked connectors.") print() -def example_search_tools_with_connector(): - """Semantic search filtered by connector. +def example_search_tools(): + """High-level semantic search returning a Tools collection. - Use the connector parameter to scope results to a specific provider, - for example when you know the user works with BambooHR. + search_tools() is the recommended way to use semantic search. It: + 1. Queries the semantic search API with your natural language query + 2. Fetches tool definitions from your linked accounts via MCP + 3. Matches semantic results to available tools (filtering out connectors you don't have) + 4. Returns a Tools collection ready for any framework (.to_openai(), .to_langchain(), etc.) """ - print("Example 2: search_tools() with connector filter\n") + print("=" * 60) + print("Example 2: search_tools() — full tool discovery") + print("=" * 60) + print() toolset = StackOneToolSet() - # Search within a specific connector - tools = toolset.search_tools( - "create time off request", - connector="bamboohr", - top_k=3, - min_score=0.3, - ) + query = "cancel an event" + print(f'Step 1: Searching for "{query}" via semantic search...') + print() + + tools = toolset.search_tools(query, account_ids=_account_ids, top_k=5) - print(f"Found {len(tools)} BambooHR tools for 'create time off request':") + connectors = {t.name.split("_")[0] for t in tools} + print(f"Found {len(tools)} tools from your linked account(s) ({', '.join(sorted(connectors))}):") for tool in tools: print(f" - {tool.name}") + print(f" {tool.description}") + print() + # Show OpenAI conversion + print("Step 2: Converting to OpenAI function-calling format...") + openai_tools = tools.to_openai() + print(f"Created {len(openai_tools)} OpenAI function definitions:") + for fn in openai_tools: + func = fn["function"] + param_names = list(func["parameters"].get("properties", {}).keys()) + print(f" - {func['name']}({', '.join(param_names[:3])}{'...' if len(param_names) > 3 else ''})") print() -def example_search_action_names(): - """Lightweight search returning action names and scores without fetching tools. +def example_search_tools_with_connector(): + """Semantic search filtered by connector. - search_action_names() is useful when you want to inspect search results - before committing to fetching full tool definitions — for example, to - show the user a list of options. + Use the connector parameter to scope results to a specific provider, + for example when you know the user works with Calendly. """ - print("Example 3: search_action_names() — lightweight inspection\n") + print("=" * 60) + print("Example 3: search_tools() with connector filter") + print("=" * 60) + print() toolset = StackOneToolSet() - results = toolset.search_action_names( - "time off requests", - top_k=5, - min_score=0.3, - ) + query = "book a meeting" + connector = "calendly" + print(f'Searching for "{query}" filtered to connector="{connector}"...') + print() - print("Search results (action names + scores):") - for r in results: - print(f" {r.action_name} ({r.connector_key}) — score: {r.similarity_score:.2f}") - print(f" {r.description[:80]}...") + tools = toolset.search_tools( + query, + connector=connector, + account_ids=_account_ids, + top_k=3, + ) + print(f"Found {len(tools)} {connector} tools:") + for tool in tools: + print(f" - {tool.name}") + print(f" {tool.description}") print() def example_utility_tools_semantic(): """Using utility tools with semantic search for agent loops. - When building agent loops (search → select → execute), pass + When building agent loops (search -> select -> execute), pass semantic_client to utility_tools() to upgrade tool_search from local BM25+TF-IDF to cloud-based semantic search. + + Note: tool_search queries the full backend catalog (all connectors), + not just the ones in your linked accounts. """ - print("Example 4: Utility tools with semantic search\n") + print("=" * 60) + print("Example 4: Utility tools with semantic search") + print("=" * 60) + print() toolset = StackOneToolSet() - # Fetch tools for your accounts - tools = toolset.fetch_tools() + print("Step 1: Fetching tools from your linked accounts via MCP...") + tools = toolset.fetch_tools(account_ids=_account_ids) + print(f"Loaded {len(tools)} tools.") + print() - # Pass semantic_client to switch tool_search to semantic mode + print("Step 2: Creating utility tools with semantic search enabled...") + print(" Passing semantic_client upgrades tool_search from local keyword") + print(" matching (BM25+TF-IDF) to cloud-based semantic vector search.") utility = tools.utility_tools(semantic_client=toolset.semantic_client) - # tool_search now uses semantic search under the hood search_tool = utility.get_tool("tool_search") if search_tool: - result = search_tool.call(query="onboard a new team member", limit=5) - print("Semantic tool_search results:") - for tool_info in result.get("tools", []): - print(f" - {tool_info['name']} (score: {tool_info['score']:.2f})") - print(f" {tool_info['description'][:80]}...") + query = "cancel an event or meeting" + print() + print(f'Step 3: Calling tool_search with query="{query}"...') + print(" (This searches the full StackOne catalog, not just your linked tools)") + print() + result = search_tool.call(query=query, limit=5) + tools_data = result.get("tools", []) + print(f"tool_search returned {len(tools_data)} results:") + for tool_info in tools_data: + print(f" [{tool_info['score']:.2f}] {tool_info['name']}") + print(f" {tool_info['description']}") print() def example_openai_agent_loop(): - """Complete agent loop: semantic search → OpenAI → execute. + """Complete agent loop: semantic search -> OpenAI -> execute. This demonstrates the full pattern for building an AI agent that discovers tools via semantic search and executes them via OpenAI. """ - print("Example 5: OpenAI agent loop with semantic search\n") + print("=" * 60) + print("Example 5: OpenAI agent loop with semantic search") + print("=" * 60) + print() try: from openai import OpenAI except ImportError: - print("OpenAI library not installed. Install with: pip install openai") + print("Skipped: OpenAI library not installed. Install with: pip install openai") print() return if not os.getenv("OPENAI_API_KEY"): - print("Set OPENAI_API_KEY to run this example") + print("Skipped: Set OPENAI_API_KEY to run this example.") print() return client = OpenAI() toolset = StackOneToolSet() - # Step 1: Discover relevant tools using semantic search - tools = toolset.search_tools("list employees and their details", top_k=3) - print(f"Discovered {len(tools)} tools via semantic search") + query = "list upcoming events" + print(f'Step 1: Discovering tools for "{query}" via semantic search...') + tools = toolset.search_tools(query, account_ids=_account_ids, top_k=3) + print(f"Found {len(tools)} tools:") for tool in tools: print(f" - {tool.name}") + print() - # Step 2: Convert to OpenAI format and call the LLM + print("Step 2: Sending tools to OpenAI as function definitions...") openai_tools = tools.to_openai() messages = [ - {"role": "system", "content": "You are a helpful HR assistant."}, - {"role": "user", "content": "Can you list the first 5 employees?"}, + {"role": "system", "content": "You are a helpful scheduling assistant."}, + {"role": "user", "content": "Can you show me my upcoming events?"}, ] response = client.chat.completions.create( @@ -179,18 +249,17 @@ def example_openai_agent_loop(): tool_choice="auto", ) - # Step 3: Execute the tool calls if response.choices[0].message.tool_calls: - print("\nLLM chose to call:") + print("Step 3: OpenAI chose to call these tools:") for tool_call in response.choices[0].message.tool_calls: print(f" - {tool_call.function.name}({tool_call.function.arguments})") tool = tools.get_tool(tool_call.function.name) if tool: result = tool.execute(tool_call.function.arguments) - print(f" Result keys: {list(result.keys()) if isinstance(result, dict) else type(result)}") + print(f" Response keys: {list(result.keys()) if isinstance(result, dict) else type(result)}") else: - print(f"\nLLM response: {response.choices[0].message.content}") + print(f"OpenAI responded with text: {response.choices[0].message.content}") print() @@ -201,52 +270,78 @@ def example_langchain_semantic(): search_tools() returns a Tools collection that converts directly to LangChain format — no extra steps needed. """ - print("Example 6: Semantic search with LangChain\n") + print("=" * 60) + print("Example 6: Semantic search with LangChain") + print("=" * 60) + print() try: from langchain_core.tools import BaseTool # noqa: F401 except ImportError: - print("LangChain not installed. Install with: pip install langchain-core") + print("Skipped: LangChain not installed. Install with: pip install langchain-core") print() return toolset = StackOneToolSet() - # Semantic search → LangChain tools in two lines - tools = toolset.search_tools("employee management", top_k=5) + query = "remove a user from the team" + print(f'Step 1: Searching for "{query}" via semantic search...') + tools = toolset.search_tools(query, account_ids=_account_ids, top_k=5) + print(f"Found {len(tools)} tools.") + print() + + print("Step 2: Converting to LangChain tools...") langchain_tools = tools.to_langchain() - print(f"Created {len(langchain_tools)} LangChain tools from semantic search:") + print(f"Created {len(langchain_tools)} LangChain tools (ready for use with agents):") for tool in langchain_tools: - print(f" - {tool.name}: {tool.description[:80]}...") + print(f" - {tool.name} (type: {type(tool).__name__})") + print(f" {tool.description}") print() def main(): """Run all semantic search examples.""" - print("=" * 60) - print("StackOne AI SDK — Semantic Search Examples") - print("=" * 60) + print() + print("############################################################") + print("# StackOne AI SDK — Semantic Search Examples #") + print("############################################################") print() - # Core patterns (require STACKONE_API_KEY) if not os.getenv("STACKONE_API_KEY"): - print("Set STACKONE_API_KEY to run these examples") + print("Set STACKONE_API_KEY to run these examples.") + return + + # --- Examples that work without account IDs --- + example_search_action_names() + + # --- Examples that require account IDs (MCP needs x-account-id) --- + if not _account_ids: + print("=" * 60) + print("Remaining examples require STACKONE_ACCOUNT_ID") + print("=" * 60) + print() + print("Set STACKONE_ACCOUNT_ID (comma-separated for multiple) to run") + print("examples that fetch full tool definitions from your linked accounts:") + print(" - search_tools() with natural language queries") + print(" - search_tools() with connector filter") + print(" - Utility tools with semantic search") + print(" - OpenAI agent loop") + print(" - LangChain integration") return example_search_tools() example_search_tools_with_connector() - example_search_action_names() example_utility_tools_semantic() # Framework integration patterns example_openai_agent_loop() example_langchain_semantic() - print("=" * 60) - print("Examples completed!") - print("=" * 60) + print("############################################################") + print("# All examples completed! #") + print("############################################################") if __name__ == "__main__": diff --git a/stackone_ai/toolset.py b/stackone_ai/toolset.py index 65f517b..3f212a4 100644 --- a/stackone_ai/toolset.py +++ b/stackone_ai/toolset.py @@ -4,7 +4,9 @@ import base64 import fnmatch import json +import logging import os +import re import threading from collections.abc import Coroutine from dataclasses import dataclass @@ -24,6 +26,8 @@ SemanticSearchResult, ) +logger = logging.getLogger("stackone.tools") + try: _SDK_VERSION = metadata.version("stackone-ai") except metadata.PackageNotFoundError: # pragma: no cover - best-effort fallback when running from source @@ -39,6 +43,19 @@ } _USER_AGENT = f"stackone-ai-python/{_SDK_VERSION}" +_VERSIONED_ACTION_RE = re.compile(r"^[a-z][a-z0-9]*_\d+(?:\.\d+)+_(.+)_global$") + + +def _normalize_action_name(action_name: str) -> str: + """Convert semantic search API action name to MCP tool name. + + API: 'calendly_1.0.0_calendly_create_scheduling_link_global' + MCP: 'calendly_create_scheduling_link' + """ + match = _VERSIONED_ACTION_RE.match(action_name) + return match.group(1) if match else action_name + + T = TypeVar("T") @@ -341,13 +358,10 @@ def search_tools( if not available_connectors: return Tools([]) - # Step 2: Fetch max results from semantic API, then filter client-side - semantic_api_max = 500 - + # Step 2: Fetch results from semantic API, then filter client-side response = self.semantic_client.search( query=query, connector=connector, - top_k=semantic_api_max, ) # Step 3: Filter results to only available connectors and min_score @@ -379,29 +393,34 @@ def search_tools( # Re-sort by score after merging results from multiple calls filtered_results.sort(key=lambda r: r.similarity_score, reverse=True) - # Apply top_k limit after all filtering and fallback - filtered_results = filtered_results[:top_k] + # Deduplicate by normalized MCP name (keep highest score first, already sorted) + seen_names: set[str] = set() + deduped: list[SemanticSearchResult] = [] + for r in filtered_results: + norm = _normalize_action_name(r.action_name) + if norm not in seen_names: + seen_names.add(norm) + deduped.append(r) + filtered_results = deduped[:top_k] if not filtered_results: return Tools([]) # Step 4: Get matching tools from already-fetched tools - action_names = {r.action_name for r in filtered_results} + action_names = {_normalize_action_name(r.action_name) for r in filtered_results} matched_tools = [t for t in all_tools if t.name in action_names] # Sort matched tools by semantic search score order - action_order = {r.action_name: i for i, r in enumerate(filtered_results)} + action_order = {_normalize_action_name(r.action_name): i for i, r in enumerate(filtered_results)} matched_tools.sort(key=lambda t: action_order.get(t.name, float("inf"))) return Tools(matched_tools) - except SemanticSearchError: + except SemanticSearchError as e: if not fallback_to_local: raise - # Fallback to local search - all_tools = self.fetch_tools(account_ids=account_ids) - available_connectors = all_tools.get_connectors() + logger.warning("Semantic search failed (%s), falling back to local BM25+TF-IDF search", e) utility = all_tools.utility_tools() search_tool = utility.get_tool("tool_search") @@ -416,10 +435,11 @@ def search_tools( matched_names = [t["name"] for t in result.get("tools", [])] # Filter by available connectors and preserve relevance order tool_map = {t.name: t for t in all_tools} + filter_connectors = {connector.lower()} if connector else available_connectors matched_tools = [ tool_map[name] for name in matched_names - if name in tool_map and name.split("_")[0].lower() in available_connectors + if name in tool_map and name.split("_")[0].lower() in filter_connectors ] return Tools(matched_tools[:top_k]) @@ -477,15 +497,15 @@ def search_action_names( if not available_connectors: return [] - # Fetch max results to maximize results after connector filtering - semantic_api_max = 500 - fetch_k = semantic_api_max if available_connectors else min(top_k, 500) - - response = self.semantic_client.search( - query=query, - connector=connector, - top_k=fetch_k, - ) + try: + response = self.semantic_client.search( + query=query, + connector=connector, + top_k=None if available_connectors else top_k, + ) + except SemanticSearchError as e: + logger.warning("Semantic search failed: %s", e) + return [] # Filter by min_score results = [r for r in response.results if r.similarity_score >= min_score] @@ -517,7 +537,23 @@ def search_action_names( # Re-sort by score after merging results.sort(key=lambda r: r.similarity_score, reverse=True) - return results[:top_k] + # Normalize and deduplicate by MCP name (keep highest score first) + seen: set[str] = set() + normalized: list[SemanticSearchResult] = [] + for r in results: + norm_name = _normalize_action_name(r.action_name) + if norm_name not in seen: + seen.add(norm_name) + normalized.append( + SemanticSearchResult( + action_name=norm_name, + connector_key=r.connector_key, + similarity_score=r.similarity_score, + label=r.label, + description=r.description, + ) + ) + return normalized[:top_k] def _filter_by_provider(self, tool_name: str, providers: list[str]) -> bool: """Check if a tool name matches any of the provider filters diff --git a/stackone_ai/utility_tools.py b/stackone_ai/utility_tools.py index 00a2d80..01d5af8 100644 --- a/stackone_ai/utility_tools.py +++ b/stackone_ai/utility_tools.py @@ -282,6 +282,7 @@ def create_semantic_tool_search(semantic_client: SemanticSearchClient) -> StackO Utility tool for searching relevant tools using semantic search """ from stackone_ai.semantic_search import SemanticSearchClient # noqa: F811 + from stackone_ai.toolset import _normalize_action_name if not isinstance(semantic_client, SemanticSearchClient): raise TypeError("semantic_client must be a SemanticSearchClient instance") @@ -341,16 +342,19 @@ def execute_search(arguments: str | JsonDict | None = None) -> JsonDict: top_k=limit, ) - tools_data = [ - { - "name": r.action_name, - "description": r.description, - "score": r.similarity_score, - "connector": r.connector_key, - } - for r in response.results - if r.similarity_score >= min_score - ] + seen: set[str] = set() + tools_data: list[dict[str, object]] = [] + for r in response.results: + if r.similarity_score >= min_score: + norm_name = _normalize_action_name(r.action_name) + if norm_name not in seen: + seen.add(norm_name) + tools_data.append({ + "name": norm_name, + "description": r.description, + "score": r.similarity_score, + "connector": r.connector_key, + }) return {"tools": tools_data[:limit]} diff --git a/tests/test_semantic_search.py b/tests/test_semantic_search.py index 1404fd2..d0157a4 100644 --- a/tests/test_semantic_search.py +++ b/tests/test_semantic_search.py @@ -252,25 +252,25 @@ def test_toolset_search_tools( from stackone_ai import StackOneToolSet from stackone_ai.toolset import _McpToolDefinition - # Mock semantic search to return results (including some for unavailable connectors) + # Mock semantic search to return versioned API names (including some for unavailable connectors) mock_search.return_value = SemanticSearchResponse( results=[ SemanticSearchResult( - action_name="bamboohr_create_employee", + action_name="bamboohr_1.0.0_bamboohr_create_employee_global", connector_key="bamboohr", similarity_score=0.95, label="Create Employee", description="Creates a new employee", ), SemanticSearchResult( - action_name="workday_create_worker", + action_name="workday_1.0.0_workday_create_worker_global", connector_key="workday", # User doesn't have this connector similarity_score=0.90, label="Create Worker", description="Creates a new worker", ), SemanticSearchResult( - action_name="hibob_create_employee", + action_name="hibob_1.0.0_hibob_create_employee_global", connector_key="hibob", similarity_score=0.85, label="Create Employee", @@ -359,6 +359,45 @@ def test_toolset_search_tools_fallback( connector = name.split("_")[0] assert connector in {"bamboohr", "workday"} + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_toolset_search_tools_fallback_respects_connector( + self, + mock_fetch: MagicMock, + mock_search: MagicMock, + ) -> None: + """Test BM25 fallback filters to the requested connector.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + mock_search.side_effect = SemanticSearchError("API unavailable") + + mock_fetch.return_value = [ + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates a new employee in BambooHR", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="bamboohr_list_employees", + description="Lists all employees in BambooHR", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="workday_create_worker", + description="Creates a new worker in Workday", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + tools = toolset.search_tools("create employee", connector="bamboohr", fallback_to_local=True) + + assert len(tools) > 0 + tool_names = [t.name for t in tools] + for name in tool_names: + assert name.split("_")[0] == "bamboohr" + @patch.object(SemanticSearchClient, "search") @patch("stackone_ai.toolset._fetch_mcp_tools") def test_toolset_search_tools_fallback_disabled( @@ -397,14 +436,14 @@ def test_toolset_search_action_names( mock_search.return_value = SemanticSearchResponse( results=[ SemanticSearchResult( - action_name="bamboohr_create_employee", + action_name="bamboohr_1.0.0_bamboohr_create_employee_global", connector_key="bamboohr", similarity_score=0.92, label="Create Employee", description="Creates a new employee", ), SemanticSearchResult( - action_name="hibob_create_employee", + action_name="hibob_1.0.0_hibob_create_employee_global", connector_key="hibob", similarity_score=0.45, label="Create Employee", @@ -418,7 +457,7 @@ def test_toolset_search_action_names( toolset = StackOneToolSet(api_key="test-key") results = toolset.search_action_names("create employee", min_score=0.5) - # Should filter by min_score + # Should filter by min_score and normalize action names assert len(results) == 1 assert results[0].action_name == "bamboohr_create_employee" @@ -485,7 +524,7 @@ def test_semantic_tool_search_execute(self, mock_search: MagicMock) -> None: mock_search.return_value = SemanticSearchResponse( results=[ SemanticSearchResult( - action_name="bamboohr_create_employee", + action_name="bamboohr_1.0.0_bamboohr_create_employee_global", connector_key="bamboohr", similarity_score=0.92, label="Create Employee", @@ -503,6 +542,7 @@ def test_semantic_tool_search_execute(self, mock_search: MagicMock) -> None: assert "tools" in result assert len(result["tools"]) == 1 + # Name should be normalized from versioned API format to MCP format assert result["tools"][0]["name"] == "bamboohr_create_employee" assert result["tools"][0]["score"] == 0.92 assert result["tools"][0]["connector"] == "bamboohr" @@ -690,21 +730,21 @@ def test_filters_by_account_connectors(self, mock_fetch: MagicMock, mock_search: mock_search.return_value = SemanticSearchResponse( results=[ SemanticSearchResult( - action_name="bamboohr_create_employee", + action_name="bamboohr_1.0.0_bamboohr_create_employee_global", connector_key="bamboohr", similarity_score=0.95, label="Create Employee", description="Creates employee", ), SemanticSearchResult( - action_name="workday_create_worker", + action_name="workday_1.0.0_workday_create_worker_global", connector_key="workday", similarity_score=0.90, label="Create Worker", description="Creates worker", ), SemanticSearchResult( - action_name="hibob_create_employee", + action_name="hibob_1.0.0_hibob_create_employee_global", connector_key="hibob", similarity_score=0.85, label="Create Employee", @@ -737,12 +777,25 @@ def test_filters_by_account_connectors(self, mock_fetch: MagicMock, mock_search: ) # workday should be filtered out (not in linked accounts) + # Names should be normalized from versioned API format assert len(results) == 2 action_names = [r.action_name for r in results] assert "bamboohr_create_employee" in action_names assert "hibob_create_employee" in action_names assert "workday_create_worker" not in action_names + @patch.object(SemanticSearchClient, "search") + def test_search_action_names_returns_empty_on_failure(self, mock_search: MagicMock) -> None: + """Test that search_action_names returns [] when semantic search fails.""" + from stackone_ai import StackOneToolSet + + mock_search.side_effect = SemanticSearchError("API unavailable") + + toolset = StackOneToolSet(api_key="test-key") + results = toolset.search_action_names("create employee") + + assert results == [] + @patch.object(SemanticSearchClient, "search") @patch("stackone_ai.toolset._fetch_mcp_tools") def test_fetches_max_then_falls_back_per_connector( @@ -774,11 +827,11 @@ def test_fetches_max_then_falls_back_per_connector( top_k=5, ) - # First call: fetch API max (500) for broad search + # First call: broad search without top_k (let backend decide, filter client-side) # Second call: per-connector fallback for "bamboohr" since first returned nothing assert mock_search.call_count == 2 first_call = mock_search.call_args_list[0].kwargs - assert first_call["top_k"] == 500 + assert first_call["top_k"] is None assert first_call["connector"] is None second_call = mock_search.call_args_list[1].kwargs assert second_call["connector"] == "bamboohr" @@ -791,11 +844,11 @@ def test_respects_top_k_after_filtering(self, mock_fetch: MagicMock, mock_search from stackone_ai import StackOneToolSet from stackone_ai.toolset import _McpToolDefinition - # Return more results than top_k + # Return more results than top_k using versioned API names mock_search.return_value = SemanticSearchResponse( results=[ SemanticSearchResult( - action_name=f"bamboohr_action_{i}", + action_name=f"bamboohr_1.0.0_bamboohr_action_{i}_global", connector_key="bamboohr", similarity_score=0.9 - i * 0.1, label=f"Action {i}", @@ -823,4 +876,188 @@ def test_respects_top_k_after_filtering(self, mock_fetch: MagicMock, mock_search top_k=3, ) + # Should be limited to top_k after normalization assert len(results) == 3 + # Names should be normalized + assert results[0].action_name == "bamboohr_action_0" + + +class TestNormalizeActionName: + """Tests for _normalize_action_name() function.""" + + def test_versioned_name_is_normalized(self) -> None: + """Test that versioned API names are normalized to MCP format.""" + from stackone_ai.toolset import _normalize_action_name + + assert ( + _normalize_action_name("calendly_1.0.0_calendly_create_scheduling_link_global") + == "calendly_create_scheduling_link" + ) + + def test_multi_segment_version(self) -> None: + """Test normalization with multi-segment semver.""" + from stackone_ai.toolset import _normalize_action_name + + assert ( + _normalize_action_name("breathehr_1.0.1_breathehr_list_employees_global") + == "breathehr_list_employees" + ) + + def test_already_normalized_name_unchanged(self) -> None: + """Test that MCP-format names pass through unchanged.""" + from stackone_ai.toolset import _normalize_action_name + + assert _normalize_action_name("bamboohr_create_employee") == "bamboohr_create_employee" + + def test_non_matching_name_unchanged(self) -> None: + """Test that names that don't match the pattern pass through unchanged.""" + from stackone_ai.toolset import _normalize_action_name + + assert _normalize_action_name("some_random_tool") == "some_random_tool" + + def test_empty_string(self) -> None: + """Test empty string input.""" + from stackone_ai.toolset import _normalize_action_name + + assert _normalize_action_name("") == "" + + def test_multiple_versions_normalize_to_same(self) -> None: + """Test that different versions of the same action normalize identically.""" + from stackone_ai.toolset import _normalize_action_name + + name_v1 = _normalize_action_name("breathehr_1.0.0_breathehr_list_employees_global") + name_v2 = _normalize_action_name("breathehr_1.0.1_breathehr_list_employees_global") + assert name_v1 == name_v2 == "breathehr_list_employees" + + +class TestSemanticSearchDeduplication: + """Tests for deduplication after name normalization.""" + + @patch.object(SemanticSearchClient, "search") + @patch("stackone_ai.toolset._fetch_mcp_tools") + def test_search_tools_deduplicates_versions( + self, mock_fetch: MagicMock, mock_search: MagicMock + ) -> None: + """Test that search_tools deduplicates multiple API versions of the same action.""" + from stackone_ai import StackOneToolSet + from stackone_ai.toolset import _McpToolDefinition + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="breathehr_1.0.0_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.95, + label="List Employees", + description="Lists employees", + ), + SemanticSearchResult( + action_name="breathehr_1.0.1_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.90, + label="List Employees v2", + description="Lists employees v2", + ), + SemanticSearchResult( + action_name="bamboohr_1.0.0_bamboohr_create_employee_global", + connector_key="bamboohr", + similarity_score=0.85, + label="Create Employee", + description="Creates employee", + ), + ], + total_count=3, + query="list employees", + ) + + mock_fetch.return_value = [ + _McpToolDefinition( + name="breathehr_list_employees", + description="Lists employees", + input_schema={"type": "object", "properties": {}}, + ), + _McpToolDefinition( + name="bamboohr_create_employee", + description="Creates employee", + input_schema={"type": "object", "properties": {}}, + ), + ] + + toolset = StackOneToolSet(api_key="test-key") + tools = toolset.search_tools("list employees", top_k=5) + + # Should deduplicate: both breathehr versions -> breathehr_list_employees + tool_names = [t.name for t in tools] + assert tool_names.count("breathehr_list_employees") == 1 + assert "bamboohr_create_employee" in tool_names + assert len(tools) == 2 + + @patch.object(SemanticSearchClient, "search") + def test_search_action_names_deduplicates_versions(self, mock_search: MagicMock) -> None: + """Test that search_action_names deduplicates multiple API versions.""" + from stackone_ai import StackOneToolSet + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="breathehr_1.0.0_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.95, + label="List Employees", + description="Lists employees", + ), + SemanticSearchResult( + action_name="breathehr_1.0.1_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.90, + label="List Employees v2", + description="Lists employees v2", + ), + ], + total_count=2, + query="list employees", + ) + + toolset = StackOneToolSet(api_key="test-key") + results = toolset.search_action_names("list employees", top_k=5) + + # Should deduplicate: only one result for breathehr_list_employees + assert len(results) == 1 + assert results[0].action_name == "breathehr_list_employees" + # Should keep the highest score (first seen, already sorted by score) + assert results[0].similarity_score == 0.95 + + @patch.object(SemanticSearchClient, "search") + def test_semantic_tool_search_deduplicates_versions(self, mock_search: MagicMock) -> None: + """Test that create_semantic_tool_search deduplicates API versions.""" + from stackone_ai.utility_tools import create_semantic_tool_search + + mock_search.return_value = SemanticSearchResponse( + results=[ + SemanticSearchResult( + action_name="breathehr_1.0.0_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.95, + label="List Employees", + description="Lists employees", + ), + SemanticSearchResult( + action_name="breathehr_1.0.1_breathehr_list_employees_global", + connector_key="breathehr", + similarity_score=0.90, + label="List Employees v2", + description="Lists employees v2", + ), + ], + total_count=2, + query="list employees", + ) + + client = SemanticSearchClient(api_key="test-key") + tool = create_semantic_tool_search(client) + result = tool.execute({"query": "list employees", "limit": 10}) + + # Should deduplicate: only one result + assert len(result["tools"]) == 1 + assert result["tools"][0]["name"] == "breathehr_list_employees" + assert result["tools"][0]["score"] == 0.95 From 521339bfb73e445c0e3919337963587d1e11c27e Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Tue, 10 Feb 2026 18:23:45 +0000 Subject: [PATCH 20/25] Fix ruff issues --- examples/semantic_search_example.py | 4 +++- stackone_ai/utility_tools.py | 14 ++++++++------ tests/test_semantic_search.py | 4 +--- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/examples/semantic_search_example.py b/examples/semantic_search_example.py index a8504e1..84813b7 100644 --- a/examples/semantic_search_example.py +++ b/examples/semantic_search_example.py @@ -257,7 +257,9 @@ def example_openai_agent_loop(): tool = tools.get_tool(tool_call.function.name) if tool: result = tool.execute(tool_call.function.arguments) - print(f" Response keys: {list(result.keys()) if isinstance(result, dict) else type(result)}") + print( + f" Response keys: {list(result.keys()) if isinstance(result, dict) else type(result)}" + ) else: print(f"OpenAI responded with text: {response.choices[0].message.content}") diff --git a/stackone_ai/utility_tools.py b/stackone_ai/utility_tools.py index 01d5af8..6423367 100644 --- a/stackone_ai/utility_tools.py +++ b/stackone_ai/utility_tools.py @@ -349,12 +349,14 @@ def execute_search(arguments: str | JsonDict | None = None) -> JsonDict: norm_name = _normalize_action_name(r.action_name) if norm_name not in seen: seen.add(norm_name) - tools_data.append({ - "name": norm_name, - "description": r.description, - "score": r.similarity_score, - "connector": r.connector_key, - }) + tools_data.append( + { + "name": norm_name, + "description": r.description, + "score": r.similarity_score, + "connector": r.connector_key, + } + ) return {"tools": tools_data[:limit]} diff --git a/tests/test_semantic_search.py b/tests/test_semantic_search.py index d0157a4..8af0b93 100644 --- a/tests/test_semantic_search.py +++ b/tests/test_semantic_search.py @@ -935,9 +935,7 @@ class TestSemanticSearchDeduplication: @patch.object(SemanticSearchClient, "search") @patch("stackone_ai.toolset._fetch_mcp_tools") - def test_search_tools_deduplicates_versions( - self, mock_fetch: MagicMock, mock_search: MagicMock - ) -> None: + def test_search_tools_deduplicates_versions(self, mock_fetch: MagicMock, mock_search: MagicMock) -> None: """Test that search_tools deduplicates multiple API versions of the same action.""" from stackone_ai import StackOneToolSet from stackone_ai.toolset import _McpToolDefinition From 4d704abc469c3dcd7102caf1b0a2be46ecbd590b Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Thu, 12 Feb 2026 09:17:04 +0000 Subject: [PATCH 21/25] Document the semantic search feature in the python files and example --- examples/semantic_search_example.py | 40 ++++++++++++ stackone_ai/semantic_search.py | 94 ++++++++++++++++++++++++++++- 2 files changed, 133 insertions(+), 1 deletion(-) diff --git a/examples/semantic_search_example.py b/examples/semantic_search_example.py index 84813b7..927fd59 100644 --- a/examples/semantic_search_example.py +++ b/examples/semantic_search_example.py @@ -10,6 +10,46 @@ discovers scheduling, event, and organization management tools from natural language queries. + +How Semantic Search Works (Overview) +===================================== + +The SDK provides three paths for semantic tool discovery, each with a different +trade-off between speed, filtering, and completeness: + +1. search_tools(query) — Full discovery (recommended for agent frameworks) + + This is the method you should use when integrating with OpenAI, LangChain, + CrewAI, or any other agent framework. It works in these steps: + + a) Fetch ALL tools from the user's linked accounts via MCP + b) Extract the set of available connectors (e.g. {bamboohr, calendly}) + c) Query the semantic search API with the natural language query + d) Filter results to only connectors the user has access to + e) Deduplicate across API versions (keep highest score per action) + f) Match results back to the fetched tool definitions + g) Return a Tools collection sorted by relevance score + + Key point: tools are fetched first, semantic search runs second, and only + the intersection (tools the user has AND that match the query) is returned. + If the semantic API is unavailable, the SDK falls back to local BM25+TF-IDF + search automatically. + +2. search_action_names(query) — Lightweight preview + + Queries the semantic API directly and returns metadata (name, connector, + score, description) without fetching full tool definitions. Useful for + inspecting results before committing to a full fetch. When account_ids are + provided, results are filtered to the user's available connectors. + +3. utility_tools(semantic_client=...) — Agent-loop pattern + + Creates tool_search and tool_execute utility tools that agents can call + inside an agentic loop. The agent searches, inspects, and executes tools + dynamically. Note: utility tool search queries the full backend catalog + (all connectors), not just the user's linked accounts. + + This example is runnable with the following command: ```bash uv run examples/semantic_search_example.py diff --git a/stackone_ai/semantic_search.py b/stackone_ai/semantic_search.py index 222181d..7821663 100644 --- a/stackone_ai/semantic_search.py +++ b/stackone_ai/semantic_search.py @@ -1,4 +1,96 @@ -"""Semantic search client for StackOne action search API.""" +"""Semantic search client for StackOne action search API. + +How Semantic Search Works +========================= + +The SDK provides three ways to discover tools using semantic search. +Each path trades off between speed, filtering, and completeness. + +1. ``search_tools(query)`` — Full tool discovery (recommended for agent frameworks) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is the primary method used when integrating with OpenAI, LangChain, or CrewAI. +The internal flow is: + +:: + + User query (e.g. "create an employee") + │ + ▼ + ┌─────────────────────────────────────────────────────┐ + │ Step 1: Fetch ALL tools from linked accounts via MCP │ + │ (uses account_ids to scope the request) │ + └────────────────────────┬────────────────────────────┘ + │ + ▼ + ┌─────────────────────────────────────────────────────┐ + │ Step 2: Extract available connectors from the │ + │ fetched tools (e.g. {bamboohr, hibob}) │ + └────────────────────────┬────────────────────────────┘ + │ + ▼ + ┌─────────────────────────────────────────────────────┐ + │ Step 3: Query the semantic search API (/actions/ │ + │ search) with the natural language query │ + └────────────────────────┬────────────────────────────┘ + │ + ▼ + ┌─────────────────────────────────────────────────────┐ + │ Step 4: Filter results — keep only connectors the │ + │ user has access to + apply min_score cutoff │ + │ │ + │ If not enough results, make per-connector │ + │ fallback queries for missing connectors │ + └────────────────────────┬────────────────────────────┘ + │ + ▼ + ┌─────────────────────────────────────────────────────┐ + │ Step 5: Deduplicate by normalized action name │ + │ (strips API version suffixes, keeps highest │ + │ scoring version of each action) │ + └────────────────────────┬────────────────────────────┘ + │ + ▼ + ┌─────────────────────────────────────────────────────┐ + │ Step 6: Match semantic results back to the fetched │ + │ tool definitions from Step 1 │ + │ Return Tools sorted by relevance score │ + └─────────────────────────────────────────────────────┘ + +Key point: tools are fetched first, semantic search runs second, and only +tools that exist in the user's linked accounts AND match the semantic query +are returned. This prevents suggesting tools the user cannot execute. + +If the semantic API is unavailable, the SDK falls back to a local +BM25 + TF-IDF hybrid search over the fetched tools (unless +``fallback_to_local=False``). + + +2. ``search_action_names(query)`` — Lightweight discovery +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Queries the semantic API directly and returns action name metadata +(name, connector, score, description) **without** fetching full tool +definitions. This is useful for previewing results before committing +to a full fetch. + +When ``account_ids`` are provided, tools are fetched only to determine +available connectors — results are then filtered to those connectors. +Without ``account_ids``, results come from the full StackOne catalog. + + +3. ``utility_tools(semantic_client=...)`` — Agent-loop search + execute +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Creates a ``tool_search`` utility tool that agents can call inside a +loop. The agent searches for tools, inspects results, then calls +``tool_execute`` to run the chosen tool. When ``semantic_client`` is +passed, ``tool_search`` uses cloud-based semantic vectors instead of +local BM25 + TF-IDF. + +Note: utility tool search queries the **full backend catalog** (all +connectors), not just the ones in the user's linked accounts. +""" from __future__ import annotations From fbd9c79b8a4b4e45da74dc0160cd9cfbd879c648 Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Thu, 12 Feb 2026 17:32:47 +0000 Subject: [PATCH 22/25] Respect the backend results unless top_k specified explicitly, add python only crewAI example --- examples/crewai_integration.py | 3 + examples/crewai_semantic_search.py | 94 ++++++++++++++++++++++++++++++ stackone_ai/models.py | 68 +++++++++++++++++++++ stackone_ai/toolset.py | 33 ++++++----- 4 files changed, 182 insertions(+), 16 deletions(-) create mode 100644 examples/crewai_semantic_search.py diff --git a/examples/crewai_integration.py b/examples/crewai_integration.py index 6cc1604..a3d6e0a 100644 --- a/examples/crewai_integration.py +++ b/examples/crewai_integration.py @@ -1,6 +1,9 @@ """ This example demonstrates how to use StackOne tools with CrewAI. +Note: This example is Python only. CrewAI does not have an official +TypeScript/Node.js library. + CrewAI uses LangChain tools natively. ```bash diff --git a/examples/crewai_semantic_search.py b/examples/crewai_semantic_search.py new file mode 100644 index 0000000..b6245f1 --- /dev/null +++ b/examples/crewai_semantic_search.py @@ -0,0 +1,94 @@ +""" +CrewAI meeting booking agent powered by semantic search. + +Note: This example is Python only. CrewAI does not have an official +TypeScript/Node.js library. + +Instead of hardcoding tool names, this example uses semantic search to discover +scheduling tools (e.g., Calendly) from natural language queries like "book a +meeting" or "check availability". + +Prerequisites: +- STACKONE_API_KEY environment variable set +- STACKONE_ACCOUNT_ID environment variable set (Calendly-linked account) +- OPENAI_API_KEY environment variable set (for CrewAI's LLM) + +```bash +uv run examples/crewai_semantic_search.py +``` +""" + +import os + +from crewai import Agent, Crew, Task +from dotenv import load_dotenv + +from stackone_ai import StackOneToolSet + +load_dotenv() + +_account_ids = [aid.strip() for aid in os.getenv("STACKONE_ACCOUNT_ID", "").split(",") if aid.strip()] + + +def crewai_semantic_search() -> None: + toolset = StackOneToolSet() + + # Step 1: Preview — lightweight search returning action names and scores + # search_action_names() queries the semantic API without fetching full + # tool definitions. Useful for inspecting what's available before committing. + preview = toolset.search_action_names( + "book a meeting or check availability", + account_ids=_account_ids + ) + print("Semantic search preview (action names only):") + for r in preview: + print(f" [{r.similarity_score:.2f}] {r.action_name} ({r.connector_key})") + print() + + # Step 2: Full discovery — fetch matching tools ready for framework use + # search_tools() fetches tools from linked accounts, runs semantic search, + # and returns only tools the user has access to. + tools = toolset.search_tools( + "schedule meetings, check availability, list events", + connector="calendly", + account_ids=_account_ids + ) + assert len(tools) > 0, "Expected at least one scheduling tool" + + print(f"Discovered {len(tools)} scheduling tools:") + for tool in tools: + print(f" - {tool.name}: {tool.description[:80]}...") + print() + + # Step 3: Convert to CrewAI format + crewai_tools = tools.to_crewai() + + # Step 4: Create a CrewAI meeting booking agent + agent = Agent( + role="Meeting Booking Agent", + goal="Help users manage their calendar by discovering and booking meetings, " + "checking availability, and listing upcoming events.", + backstory="You are an AI assistant specialized in calendar management. " + "You have access to scheduling tools discovered via semantic search " + "and can help users with all meeting-related tasks.", + llm="gpt-4o-mini", + tools=crewai_tools, + max_iter=2, + verbose=True, + ) + + task = Task( + description="List upcoming scheduled events to give an overview of the calendar.", + agent=agent, + expected_output="A summary of upcoming events or a confirmation that events were retrieved.", + ) + + crew = Crew(agents=[agent], tasks=[task]) + + result = crew.kickoff() + assert result is not None, "Expected result to be returned" + print(f"\nCrew result: {result}") + + +if __name__ == "__main__": + crewai_semantic_search() diff --git a/stackone_ai/models.py b/stackone_ai/models.py index a3f50e4..c3bd5ef 100644 --- a/stackone_ai/models.py +++ b/stackone_ai/models.py @@ -450,6 +450,64 @@ def _run(self, **kwargs: Any) -> Any: return StackOneLangChainTool() + def to_crewai(self) -> Any: + """Convert this tool to CrewAI format + + Requires the ``crewai`` package (``pip install crewai``). + + Returns: + Tool as a ``crewai.tools.BaseTool`` instance + """ + try: + from crewai.tools.base_tool import BaseTool as CrewAIBaseTool + except ImportError as e: + raise ImportError("crewai is required for to_crewai(). Install with: pip install crewai") from e + + schema_props: dict[str, Any] = {} + annotations: dict[str, Any] = {} + + for name, details in self.parameters.properties.items(): + python_type: type = str + if isinstance(details, dict): + type_str = details.get("type", "string") + if type_str == "number": + python_type = float + elif type_str == "integer": + python_type = int + elif type_str == "boolean": + python_type = bool + + field = Field(description=details.get("description", "")) + else: + field = Field(description="") + + schema_props[name] = field + annotations[name] = python_type + + schema_class = type( + f"{self.name.title()}Args", + (BaseModel,), + { + "__annotations__": annotations, + "__module__": __name__, + **schema_props, + }, + ) + + parent_tool = self + _name = parent_tool.name + _description = parent_tool.description + + class StackOneCrewAITool(CrewAIBaseTool): + name: str = _name + description: str = _description + args_schema: type[BaseModel] = schema_class + + def _run(self, **kwargs: Any) -> Any: + return parent_tool.execute(kwargs) + + return StackOneCrewAITool() + def set_account_id(self, account_id: str | None) -> None: """Set the account ID for this tool @@ -558,6 +616,16 @@ def to_langchain(self) -> Sequence[BaseTool]: """ return [tool.to_langchain() for tool in self.tools] + def to_crewai(self) -> list[Any]: + """Convert all tools to CrewAI format + + Requires the ``crewai`` package (``pip install crewai``). + + Returns: + List of tools as ``crewai.tools.BaseTool`` instances + """ + return [tool.to_crewai() for tool in self.tools] + def utility_tools( self, hybrid_alpha: float | None = None, diff --git a/stackone_ai/toolset.py b/stackone_ai/toolset.py index 3f212a4..0e20102 100644 --- a/stackone_ai/toolset.py +++ b/stackone_ai/toolset.py @@ -306,7 +306,7 @@ def search_tools( query: str, *, connector: str | None = None, - top_k: int = 10, + top_k: int | None = None, min_score: float = 0.0, account_ids: list[str] | None = None, fallback_to_local: bool = True, @@ -321,7 +321,7 @@ def search_tools( query: Natural language description of needed functionality (e.g., "create employee", "send a message") connector: Optional provider/connector filter (e.g., "bamboohr", "slack") - top_k: Maximum number of tools to return (default: 10) + top_k: Maximum number of tools to return. If None, uses the backend default. min_score: Minimum similarity score threshold 0-1 (default: 0.0) account_ids: Optional account IDs (uses set_accounts() if not provided) fallback_to_local: If True, fall back to local BM25+TF-IDF search on API failure @@ -372,11 +372,11 @@ def search_tools( ] # Step 3b: If not enough results, make per-connector calls for missing connectors - if len(filtered_results) < top_k and not connector: + if not connector and (top_k is None or len(filtered_results) < top_k): found_connectors = {r.connector_key.lower() for r in filtered_results} missing_connectors = available_connectors - found_connectors for missing in missing_connectors: - if len(filtered_results) >= top_k: + if top_k is not None and len(filtered_results) >= top_k: break try: extra = self.semantic_client.search(query=query, connector=missing, top_k=top_k) @@ -385,7 +385,7 @@ def search_tools( fr.action_name for fr in filtered_results }: filtered_results.append(r) - if len(filtered_results) >= top_k: + if top_k is not None and len(filtered_results) >= top_k: break except SemanticSearchError: continue @@ -401,7 +401,7 @@ def search_tools( if norm not in seen_names: seen_names.add(norm) deduped.append(r) - filtered_results = deduped[:top_k] + filtered_results = deduped[:top_k] if top_k is not None else deduped if not filtered_results: return Tools([]) @@ -425,10 +425,11 @@ def search_tools( search_tool = utility.get_tool("tool_search") if search_tool: + fallback_limit = top_k * 3 if top_k is not None else 100 result = search_tool.execute( { "query": query, - "limit": top_k * 3, # Over-fetch to account for connector filtering + "limit": fallback_limit, "minScore": min_score, } ) @@ -441,7 +442,7 @@ def search_tools( for name in matched_names if name in tool_map and name.split("_")[0].lower() in filter_connectors ] - return Tools(matched_tools[:top_k]) + return Tools(matched_tools[:top_k] if top_k is not None else matched_tools) return all_tools @@ -451,7 +452,7 @@ def search_action_names( *, connector: str | None = None, account_ids: list[str] | None = None, - top_k: int = 10, + top_k: int | None = None, min_score: float = 0.0, ) -> list[SemanticSearchResult]: """Search for action names without fetching tools. @@ -465,7 +466,7 @@ def search_action_names( account_ids: Optional account IDs to scope results to connectors available in those accounts (uses set_accounts() if not provided). When provided, results are filtered to only matching connectors. - top_k: Maximum number of results (default: 10) + top_k: Maximum number of results. If None, uses the backend default. min_score: Minimum similarity score threshold 0-1 (default: 0.0) Returns: @@ -473,7 +474,7 @@ def search_action_names( Examples: # Lightweight: inspect results before fetching - results = toolset.search_action_names("manage employees", top_k=10) + results = toolset.search_action_names("manage employees") for r in results: print(f"{r.action_name}: {r.similarity_score:.2f}") @@ -501,7 +502,7 @@ def search_action_names( response = self.semantic_client.search( query=query, connector=connector, - top_k=None if available_connectors else top_k, + top_k=top_k, ) except SemanticSearchError as e: logger.warning("Semantic search failed: %s", e) @@ -516,11 +517,11 @@ def search_action_names( results = [r for r in results if r.connector_key.lower() in connector_set] # If not enough results, make per-connector calls for missing connectors - if len(results) < top_k and not connector: + if not connector and (top_k is None or len(results) < top_k): found_connectors = {r.connector_key.lower() for r in results} missing_connectors = connector_set - found_connectors for missing in missing_connectors: - if len(results) >= top_k: + if top_k is not None and len(results) >= top_k: break try: extra = self.semantic_client.search(query=query, connector=missing, top_k=top_k) @@ -529,7 +530,7 @@ def search_action_names( er.action_name for er in results }: results.append(r) - if len(results) >= top_k: + if top_k is not None and len(results) >= top_k: break except SemanticSearchError: continue @@ -553,7 +554,7 @@ def search_action_names( description=r.description, ) ) - return normalized[:top_k] + return normalized[:top_k] if top_k is not None else normalized def _filter_by_provider(self, tool_name: str, providers: list[str]) -> bool: """Check if a tool name matches any of the provider filters From 76bedac5bb9fa8292a1ca7fd116651db2b8699e0 Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Thu, 12 Feb 2026 17:40:37 +0000 Subject: [PATCH 23/25] move the crewAI tools conversation back in the example --- examples/crewai_semantic_search.py | 57 +++++++++++++++++++++++-- stackone_ai/models.py | 67 ------------------------------ 2 files changed, 54 insertions(+), 70 deletions(-) diff --git a/examples/crewai_semantic_search.py b/examples/crewai_semantic_search.py index b6245f1..5ea4d4d 100644 --- a/examples/crewai_semantic_search.py +++ b/examples/crewai_semantic_search.py @@ -19,17 +19,68 @@ """ import os +from typing import Any from crewai import Agent, Crew, Task +from crewai.tools.base_tool import BaseTool as CrewAIBaseTool from dotenv import load_dotenv +from pydantic import BaseModel, Field from stackone_ai import StackOneToolSet +from stackone_ai.models import StackOneTool load_dotenv() _account_ids = [aid.strip() for aid in os.getenv("STACKONE_ACCOUNT_ID", "").split(",") if aid.strip()] +def _to_crewai_tool(tool: StackOneTool) -> CrewAIBaseTool: + """Wrap a StackOneTool as a CrewAI BaseTool. + + CrewAI has its own BaseTool (not LangChain's), so we create a + lightweight wrapper that delegates execution to the StackOne tool. + """ + schema_props: dict[str, Any] = {} + annotations: dict[str, Any] = {} + + for name, details in tool.parameters.properties.items(): + python_type: type = str + if isinstance(details, dict): + type_str = details.get("type", "string") + if type_str == "number": + python_type = float + elif type_str == "integer": + python_type = int + elif type_str == "boolean": + python_type = bool + field = Field(description=details.get("description", "")) + else: + field = Field(description="") + + schema_props[name] = field + annotations[name] = python_type + + _schema = type( + f"{tool.name.title().replace('_', '')}Args", + (BaseModel,), + {"__annotations__": annotations, "__module__": __name__, **schema_props}, + ) + + _parent = tool + _name = tool.name + _description = tool.description + + class WrappedTool(CrewAIBaseTool): + name: str = _name + description: str = _description + args_schema: type[BaseModel] = _schema + + def _run(self, **kwargs: Any) -> Any: + return _parent.execute(kwargs) + + return WrappedTool() + + def crewai_semantic_search() -> None: toolset = StackOneToolSet() @@ -38,7 +89,7 @@ def crewai_semantic_search() -> None: # tool definitions. Useful for inspecting what's available before committing. preview = toolset.search_action_names( "book a meeting or check availability", - account_ids=_account_ids + account_ids=_account_ids, ) print("Semantic search preview (action names only):") for r in preview: @@ -51,7 +102,7 @@ def crewai_semantic_search() -> None: tools = toolset.search_tools( "schedule meetings, check availability, list events", connector="calendly", - account_ids=_account_ids + account_ids=_account_ids, ) assert len(tools) > 0, "Expected at least one scheduling tool" @@ -61,7 +112,7 @@ def crewai_semantic_search() -> None: print() # Step 3: Convert to CrewAI format - crewai_tools = tools.to_crewai() + crewai_tools = [_to_crewai_tool(t) for t in tools] # Step 4: Create a CrewAI meeting booking agent agent = Agent( diff --git a/stackone_ai/models.py b/stackone_ai/models.py index c3bd5ef..da1b219 100644 --- a/stackone_ai/models.py +++ b/stackone_ai/models.py @@ -450,64 +450,6 @@ def _run(self, **kwargs: Any) -> Any: return StackOneLangChainTool() - def to_crewai(self) -> Any: - """Convert this tool to CrewAI format - - Requires the ``crewai`` package (``pip install crewai``). - - Returns: - Tool as a ``crewai.tools.BaseTool`` instance - """ - try: - from crewai.tools.base_tool import BaseTool as CrewAIBaseTool - except ImportError as e: - raise ImportError("crewai is required for to_crewai(). Install with: pip install crewai") from e - - schema_props: dict[str, Any] = {} - annotations: dict[str, Any] = {} - - for name, details in self.parameters.properties.items(): - python_type: type = str - if isinstance(details, dict): - type_str = details.get("type", "string") - if type_str == "number": - python_type = float - elif type_str == "integer": - python_type = int - elif type_str == "boolean": - python_type = bool - - field = Field(description=details.get("description", "")) - else: - field = Field(description="") - - schema_props[name] = field - annotations[name] = python_type - - schema_class = type( - f"{self.name.title()}Args", - (BaseModel,), - { - "__annotations__": annotations, - "__module__": __name__, - **schema_props, - }, - ) - - parent_tool = self - _name = parent_tool.name - _description = parent_tool.description - - class StackOneCrewAITool(CrewAIBaseTool): - name: str = _name - description: str = _description - args_schema: type[BaseModel] = schema_class - - def _run(self, **kwargs: Any) -> Any: - return parent_tool.execute(kwargs) - - return StackOneCrewAITool() - def set_account_id(self, account_id: str | None) -> None: """Set the account ID for this tool @@ -616,15 +558,6 @@ def to_langchain(self) -> Sequence[BaseTool]: """ return [tool.to_langchain() for tool in self.tools] - def to_crewai(self) -> list[Any]: - """Convert all tools to CrewAI format - - Requires the ``crewai`` package (``pip install crewai``). - - Returns: - List of tools as ``crewai.tools.BaseTool`` instances - """ - return [tool.to_crewai() for tool in self.tools] def utility_tools( self, From 893f6d4c743defad4a11b786f739c1011bce30ee Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Thu, 12 Feb 2026 17:52:59 +0000 Subject: [PATCH 24/25] CI Trigger --- stackone_ai/models.py | 1 - 1 file changed, 1 deletion(-) diff --git a/stackone_ai/models.py b/stackone_ai/models.py index da1b219..a3f50e4 100644 --- a/stackone_ai/models.py +++ b/stackone_ai/models.py @@ -558,7 +558,6 @@ def to_langchain(self) -> Sequence[BaseTool]: """ return [tool.to_langchain() for tool in self.tools] - def utility_tools( self, hybrid_alpha: float | None = None, From c4f8f3477cf91281be0df2a671163ba80bf4a662 Mon Sep 17 00:00:00 2001 From: Shashikant86 Date: Thu, 12 Feb 2026 17:57:01 +0000 Subject: [PATCH 25/25] Fix unit tests with updated top_k behavior --- tests/test_semantic_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_semantic_search.py b/tests/test_semantic_search.py index 8af0b93..913a41c 100644 --- a/tests/test_semantic_search.py +++ b/tests/test_semantic_search.py @@ -827,11 +827,11 @@ def test_fetches_max_then_falls_back_per_connector( top_k=5, ) - # First call: broad search without top_k (let backend decide, filter client-side) + # First call: passes user's top_k to backend # Second call: per-connector fallback for "bamboohr" since first returned nothing assert mock_search.call_count == 2 first_call = mock_search.call_args_list[0].kwargs - assert first_call["top_k"] is None + assert first_call["top_k"] == 5 assert first_call["connector"] is None second_call = mock_search.call_args_list[1].kwargs assert second_call["connector"] == "bamboohr"