feat(search-optimization): cache tool catalog and parallelize per-account MCP fetches (#173)

shashi-stackone · web-flow · commit cd635e65621e · 2026-04-14T17:16:06.000+01:00
* Add caching to the fetch tools to boost search performance

* Address Co-Pilot Comments

* Add benchmark search example in the repo

* Fix issues spotted by ruff
diff --git a/examples/benchmark_search.py b/examples/benchmark_search.py
@@ -0,0 +1,134 @@
+"""Benchmark: measure SDK search latency with caching.
+
+Runs fetch_tools, local (BM25+TF-IDF) search, and semantic search N times,
+reports cold vs warm average latency and the speedup from caching.
+
+Prerequisites:
+    - STACKONE_API_KEY environment variable
+    - STACKONE_ACCOUNT_ID environment variable
+
+Run with:
+    uv run python examples/benchmark_search.py              # default 100 iterations
+    uv run python examples/benchmark_search.py -n 50        # fewer for a quick check
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+import time
+
+try:
+    from dotenv import load_dotenv
+
+    load_dotenv()
+except ModuleNotFoundError:
+    pass
+
+from stackone_ai import StackOneToolSet
+
+QUERIES = [
+    "list events",
+    "cancel a meeting",
+    "send a message",
+    "get current user",
+    "list employees",
+]
+
+
+def bench(fn, n: int) -> tuple[float, float, list[float]]:
+    """Run fn() n times. Return (cold, warm_avg, all_times)."""
+    times: list[float] = []
+    for _ in range(n):
+        t = time.perf_counter()
+        fn()
+        times.append(time.perf_counter() - t)
+
+    cold = times[0]
+    warm_times = times[1:]
+    warm_avg = sum(warm_times) / len(warm_times) if warm_times else cold
+    return cold, warm_avg, times
+
+
+def fmt_ms(seconds: float) -> str:
+    return f"{seconds * 1000:8.1f}ms"
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Benchmark SDK search latency")
+    parser.add_argument(
+        "--iterations", "-n", type=int, default=100, help="iterations per benchmark (default 100)"
+    )
+    args = parser.parse_args()
+    n = args.iterations
+
+    api_key = os.getenv("STACKONE_API_KEY")
+    account_id = os.getenv("STACKONE_ACCOUNT_ID")
+
+    if not api_key:
+        print("Set STACKONE_API_KEY to run this benchmark.")
+        return 1
+    if not account_id:
+        print("Set STACKONE_ACCOUNT_ID to run this benchmark.")
+        return 1
+
+    print(f"Benchmarking with account {account_id[:8]}..., {n} iterations each\n")
+
+    ts = StackOneToolSet(
+        api_key=api_key,
+        account_id=account_id,
+        search={"method": "auto", "top_k": 5},
+    )
+
+    results: list[tuple[str, float, float, float]] = []
+    query_idx = 0
+
+    def next_query() -> str:
+        nonlocal query_idx
+        q = QUERIES[query_idx % len(QUERIES)]
+        query_idx += 1
+        return q
+
+    # --- 1. fetch_tools ---
+    print(f"[1/3] fetch_tools x{n} ...")
+    ts.clear_catalog_cache()
+    cold, warm_avg, _ = bench(lambda: ts.fetch_tools(), n)
+    speedup = cold / warm_avg if warm_avg > 0 else float("inf")
+    results.append(("fetch_tools", cold, warm_avg, speedup))
+    print(f"       cold={fmt_ms(cold)}  warm_avg={fmt_ms(warm_avg)}  speedup={speedup:.0f}x")
+
+    # --- 2. local search (BM25 + TF-IDF) ---
+    print(f"[2/3] search_tools (local) x{n} ...")
+    ts.clear_catalog_cache()
+    query_idx = 0
+    cold, warm_avg, _ = bench(lambda: ts.search_tools(next_query(), search="local"), n)
+    speedup = cold / warm_avg if warm_avg > 0 else float("inf")
+    results.append(("search (local/BM25)", cold, warm_avg, speedup))
+    print(f"       cold={fmt_ms(cold)}  warm_avg={fmt_ms(warm_avg)}  speedup={speedup:.0f}x")
+
+    # --- 3. semantic search (auto) ---
+    print(f"[3/3] search_tools (semantic/auto) x{n} ...")
+    ts.clear_catalog_cache()
+    query_idx = 0
+    cold, warm_avg, _ = bench(lambda: ts.search_tools(next_query(), search="auto"), n)
+    speedup = cold / warm_avg if warm_avg > 0 else float("inf")
+    results.append(("search (semantic)", cold, warm_avg, speedup))
+    print(f"       cold={fmt_ms(cold)}  warm_avg={fmt_ms(warm_avg)}  speedup={speedup:.0f}x")
+
+    # --- Summary ---
+    print("\n" + "=" * 65)
+    print(f"{'Benchmark':<22} {'Cold':>10} {'Warm (avg)':>10} {'Speedup':>10}")
+    print("-" * 65)
+    for name, c, w, s in results:
+        print(f"{name:<22} {fmt_ms(c):>10} {fmt_ms(w):>10} {s:>9.0f}x")
+    print("=" * 65)
+
+    print(f"\nWarm = average of {n - 1} calls after the first (cold) call.")
+    print("Speedup = cold / warm_avg — shows the benefit of caching.\n")
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/examples/test_examples.py b/examples/test_examples.py
@@ -34,6 +34,7 @@ def get_example_files() -> list[str]:
     "semantic_search_example.py": ["mcp"],
     "mcp_server.py": ["mcp"],
     "workday_integration.py": ["openai", "mcp"],
+    "benchmark_search.py": ["mcp"],
 }
 
 
diff --git a/stackone_ai/toolset.py b/stackone_ai/toolset.py
@@ -170,7 +170,6 @@ class _ExecuteTool(StackOneTool):
     """LLM-callable tool that executes a StackOne tool by name."""
 
     _toolset: Any = PrivateAttr(default=None)
-    _cached_tools: Any = PrivateAttr(default=None)
 
     def execute(
         self, arguments: str | JsonDict | None = None, *, options: JsonDict | None = None
@@ -185,10 +184,8 @@ def execute(
             parsed = _ExecuteInput(**raw_params)
             tool_name = parsed.tool_name
 
-            if self._cached_tools is None:
-                self._cached_tools = self._toolset.fetch_tools(account_ids=self._toolset._account_ids)
-
-            target = self._cached_tools.get_tool(parsed.tool_name)
+            tools = self._toolset.fetch_tools(account_ids=self._toolset._account_ids)
+            target = tools.get_tool(parsed.tool_name)
 
             if target is None:
                 return {
@@ -602,6 +599,8 @@ def __init__(
         execute_timeout = execute.get("timeout") if execute else None
         self._timeout: float = timeout if timeout is not None else (execute_timeout or 60.0)
         self._tools_cache: Tools | None = None
+        self._catalog_cache: dict[tuple[Any, ...], Tools] = {}
+        self._tool_index_cache: tuple[int, Any] | None = None
 
     def set_accounts(self, account_ids: list[str]) -> StackOneToolSet:
         """Set account IDs for filtering tools
@@ -613,8 +612,18 @@ def set_accounts(self, account_ids: list[str]) -> StackOneToolSet:
             This toolset instance for chaining
         """
         self._account_ids = account_ids
+        self.clear_catalog_cache()
         return self
 
+    def clear_catalog_cache(self) -> None:
+        """Invalidate cached tool catalog and local search index.
+
+        Call when linked accounts change outside of ``set_accounts`` or when
+        you need to force a fresh fetch from the StackOne MCP endpoint.
+        """
+        self._catalog_cache.clear()
+        self._tool_index_cache = None
+
     def get_search_tool(self, *, search: SearchMode | None = None) -> SearchTool:
         """Get a callable search tool that returns Tools collections.
 
@@ -802,7 +811,10 @@ def _local_search(
         if not available_connectors:
             return Tools([])
 
-        index = ToolIndex(list(all_tools))
+        cache_key = id(all_tools)
+        if self._tool_index_cache is None or self._tool_index_cache[0] != cache_key:
+            self._tool_index_cache = (cache_key, ToolIndex(list(all_tools)))
+        index = self._tool_index_cache[1]
         results = index.search(
             query,
             limit=top_k if top_k is not None else 5,
@@ -1171,22 +1183,41 @@ def fetch_tools(
             else:
                 account_scope = [None]
 
+            cache_key = (
+                tuple(sorted(account_scope, key=lambda a: (a is None, a))),
+                tuple(sorted(p.lower() for p in providers)) if providers else None,
+                tuple(sorted(actions)) if actions else None,
+            )
+            cached = self._catalog_cache.get(cache_key)
+            if cached is not None:
+                return cached
+
             endpoint = f"{self.base_url.rstrip('/')}/mcp"
-            all_tools: list[StackOneTool] = []
 
-            for account in account_scope:
+            def _fetch_for_account(account: str | None) -> list[StackOneTool]:
                 headers = self._build_mcp_headers(account)
                 catalog = _fetch_mcp_tools(endpoint, headers)
-                for tool_def in catalog:
-                    all_tools.append(self._create_rpc_tool(tool_def, account))
+                return [self._create_rpc_tool(tool_def, account) for tool_def in catalog]
+
+            all_tools: list[StackOneTool] = []
+            if len(account_scope) == 1:
+                all_tools.extend(_fetch_for_account(account_scope[0]))
+            else:
+                max_workers = min(len(account_scope), 10)
+                with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as pool:
+                    futures = [pool.submit(_fetch_for_account, acc) for acc in account_scope]
+                    for future in futures:
+                        all_tools.extend(future.result())
 
             if providers:
                 all_tools = [tool for tool in all_tools if self._filter_by_provider(tool.name, providers)]
 
             if actions:
                 all_tools = [tool for tool in all_tools if self._filter_by_action(tool.name, actions)]
 
-            return Tools(all_tools)
+            result = Tools(all_tools)
+            self._catalog_cache[cache_key] = result
+            return result
 
         except ToolsetError:
             raise
diff --git a/tests/test_agent_tools.py b/tests/test_agent_tools.py
@@ -268,7 +268,11 @@ def test_invalid_json_returns_error_dict(self):
 
         assert "error" in result
 
-    def test_caches_fetched_tools(self):
+    def test_delegates_catalog_lookup_to_toolset(self):
+        # _ExecuteTool no longer holds a local cache; the toolset's catalog
+        # cache (see StackOneToolSet._catalog_cache) is the single source of
+        # truth. Verify execute always defers to the toolset so it benefits
+        # from that shared cache.
         toolset = MagicMock()
         toolset.api_key = "test-key"
         toolset._account_ids = []
@@ -286,7 +290,8 @@ def test_caches_fetched_tools(self):
         execute.execute({"tool_name": "test_tool"})
         execute.execute({"tool_name": "test_tool"})
 
-        toolset.fetch_tools.assert_called_once()
+        assert toolset.fetch_tools.call_count == 2
+        toolset.fetch_tools.assert_called_with(account_ids=[])
 
     def test_passes_account_ids_from_toolset(self):
         toolset = MagicMock()
diff --git a/tests/test_fetch_tools.py b/tests/test_fetch_tools.py

Original file line number	Diff line number	Diff line change
`@@ -34,6 +34,7 @@ def get_example_files() -> list[str]:`
`34`	`34`	`"semantic_search_example.py": ["mcp"],`
`35`	`35`	`"mcp_server.py": ["mcp"],`
`36`	`36`	`"workday_integration.py": ["openai", "mcp"],`
	`37`	`+ "benchmark_search.py": ["mcp"],`
`37`	`38`	`}`
`38`	`39`
`39`	`40`