From f9765d05755ae8968f79bcd3402e1a72d671fede Mon Sep 17 00:00:00 2001 From: claude-code-best Date: Sun, 12 Apr 2026 17:15:52 +0800 Subject: [PATCH 01/12] =?UTF-8?q?feat:=20=E7=AC=AC=E4=B8=80=E7=89=88?= =?UTF-8?q?=E5=A4=A7=E9=87=8D=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bun.lock | 39 ++ docs/extensibility/mcp-configuration.mdx | 346 ++++++++++++ docs/extensibility/mcp-protocol.mdx | 278 ++++++++-- mint.json | 17 +- package.json | 2 + packages/agent-tools/package.json | 11 + .../agent-tools/src/__tests__/compat.test.ts | 34 ++ .../src/__tests__/registry.test.ts | 63 +++ packages/agent-tools/src/index.ts | 18 + packages/agent-tools/src/registry.ts | 21 + packages/agent-tools/src/types.ts | 221 ++++++++ packages/mcp-client/package.json | 16 + .../src/__tests__/InProcessTransport.test.ts | 80 +++ .../mcp-client/src/__tests__/cache.test.ts | 80 +++ .../src/__tests__/connection.test.ts | 84 +++ .../src/__tests__/discovery.test.ts | 162 ++++++ .../mcp-client/src/__tests__/errors.test.ts | 69 +++ .../src/__tests__/execution.test.ts | 144 +++++ .../mcp-client/src/__tests__/manager.test.ts | 113 ++++ .../src/__tests__/sanitization.test.ts | 51 ++ .../mcp-client/src/__tests__/strings.test.ts | 101 ++++ packages/mcp-client/src/cache.ts | 58 ++ packages/mcp-client/src/connection.ts | 519 ++++++++++++++++++ packages/mcp-client/src/discovery.ts | 143 +++++ packages/mcp-client/src/errors.ts | 80 +++ packages/mcp-client/src/execution.ts | 182 ++++++ packages/mcp-client/src/index.ts | 124 +++++ packages/mcp-client/src/interfaces.ts | 74 +++ packages/mcp-client/src/manager.ts | 241 ++++++++ packages/mcp-client/src/sanitization.ts | 31 ++ packages/mcp-client/src/strings.ts | 86 +++ .../src/transport/InProcessTransport.ts | 63 +++ packages/mcp-client/src/types.ts | 240 ++++++++ src/services/mcp/adapter/analytics.ts | 18 + src/services/mcp/adapter/auth.ts | 28 + src/services/mcp/adapter/featureGate.ts | 15 + src/services/mcp/adapter/httpConfig.ts | 15 + src/services/mcp/adapter/imageProcessor.ts | 16 + src/services/mcp/adapter/index.ts | 32 ++ src/services/mcp/adapter/logger.ts | 38 ++ src/services/mcp/adapter/proxy.ts | 30 + src/services/mcp/adapter/storage.ts | 20 + src/services/mcp/adapter/subprocessEnv.ts | 15 + src/services/mcp/client.ts | 31 +- src/services/mcp/types.ts | 2 +- 45 files changed, 3988 insertions(+), 63 deletions(-) create mode 100644 docs/extensibility/mcp-configuration.mdx create mode 100644 packages/agent-tools/package.json create mode 100644 packages/agent-tools/src/__tests__/compat.test.ts create mode 100644 packages/agent-tools/src/__tests__/registry.test.ts create mode 100644 packages/agent-tools/src/index.ts create mode 100644 packages/agent-tools/src/registry.ts create mode 100644 packages/agent-tools/src/types.ts create mode 100644 packages/mcp-client/package.json create mode 100644 packages/mcp-client/src/__tests__/InProcessTransport.test.ts create mode 100644 packages/mcp-client/src/__tests__/cache.test.ts create mode 100644 packages/mcp-client/src/__tests__/connection.test.ts create mode 100644 packages/mcp-client/src/__tests__/discovery.test.ts create mode 100644 packages/mcp-client/src/__tests__/errors.test.ts create mode 100644 packages/mcp-client/src/__tests__/execution.test.ts create mode 100644 packages/mcp-client/src/__tests__/manager.test.ts create mode 100644 packages/mcp-client/src/__tests__/sanitization.test.ts create mode 100644 packages/mcp-client/src/__tests__/strings.test.ts create mode 100644 packages/mcp-client/src/cache.ts create mode 100644 packages/mcp-client/src/connection.ts create mode 100644 packages/mcp-client/src/discovery.ts create mode 100644 packages/mcp-client/src/errors.ts create mode 100644 packages/mcp-client/src/execution.ts create mode 100644 packages/mcp-client/src/index.ts create mode 100644 packages/mcp-client/src/interfaces.ts create mode 100644 packages/mcp-client/src/manager.ts create mode 100644 packages/mcp-client/src/sanitization.ts create mode 100644 packages/mcp-client/src/strings.ts create mode 100644 packages/mcp-client/src/transport/InProcessTransport.ts create mode 100644 packages/mcp-client/src/types.ts create mode 100644 src/services/mcp/adapter/analytics.ts create mode 100644 src/services/mcp/adapter/auth.ts create mode 100644 src/services/mcp/adapter/featureGate.ts create mode 100644 src/services/mcp/adapter/httpConfig.ts create mode 100644 src/services/mcp/adapter/imageProcessor.ts create mode 100644 src/services/mcp/adapter/index.ts create mode 100644 src/services/mcp/adapter/logger.ts create mode 100644 src/services/mcp/adapter/proxy.ts create mode 100644 src/services/mcp/adapter/storage.ts create mode 100644 src/services/mcp/adapter/subprocessEnv.ts diff --git a/bun.lock b/bun.lock index 15bdb4caa..7a173ae16 100644 --- a/bun.lock +++ b/bun.lock @@ -70,6 +70,7 @@ "@types/stack-utils": "^2.0.3", "@types/turndown": "^5.0.6", "@types/ws": "^8.18.1", + "agent-tools": "workspace:*", "ajv": "^8.18.0", "asciichart": "^1.5.25", "audio-capture-napi": "workspace:*", @@ -103,6 +104,7 @@ "lodash-es": "^4.17.23", "lru-cache": "^11.2.7", "marked": "^17.0.5", + "mcp-client": "workspace:*", "modifiers-napi": "workspace:*", "openai": "^6.33.0", "p-map": "^7.0.4", @@ -176,6 +178,13 @@ "wrap-ansi": "^10.0.0", }, }, + "packages/agent-tools": { + "name": "agent-tools", + "version": "1.0.0", + "dependencies": { + "zod": "^3.25.0", + }, + }, "packages/audio-capture-napi": { "name": "audio-capture-napi", "version": "1.0.0", @@ -194,6 +203,18 @@ "sharp": "^0.33.5", }, }, + "packages/mcp-client": { + "name": "mcp-client", + "version": "1.0.0", + "dependencies": { + "@modelcontextprotocol/sdk": "^1.29.0", + "agent-tools": "workspace:*", + "lodash-es": "^4.17.21", + "lru-cache": "^10.0.0", + "p-map": "^4.0.0", + "zod": "^3.25.0", + }, + }, "packages/modifiers-napi": { "name": "modifiers-napi", "version": "1.0.0", @@ -1074,6 +1095,10 @@ "agent-base": ["agent-base@8.0.0", "https://registry.npmmirror.com/agent-base/-/agent-base-8.0.0.tgz", {}, "sha512-QT8i0hCz6C/KQ+KTAbSNwCHDGdmUJl2tp2ZpNlGSWCfhUNVbYG2WLE3MdZGBAgXPV4GAvjGMxo+C1hroyxmZEg=="], + "agent-tools": ["agent-tools@workspace:packages/agent-tools"], + + "aggregate-error": ["aggregate-error@3.1.0", "https://registry.npmmirror.com/aggregate-error/-/aggregate-error-3.1.0.tgz", { "dependencies": { "clean-stack": "^2.0.0", "indent-string": "^4.0.0" } }, "sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA=="], + "ajv": ["ajv@8.18.0", "https://registry.npmmirror.com/ajv/-/ajv-8.18.0.tgz", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A=="], "ajv-formats": ["ajv-formats@3.0.1", "https://registry.npmmirror.com/ajv-formats/-/ajv-formats-3.0.1.tgz", { "dependencies": { "ajv": "^8.0.0" } }, "sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ=="], @@ -1160,6 +1185,8 @@ "cjs-module-lexer": ["cjs-module-lexer@2.2.0", "https://registry.npmmirror.com/cjs-module-lexer/-/cjs-module-lexer-2.2.0.tgz", {}, "sha512-4bHTS2YuzUvtoLjdy+98ykbNB5jS0+07EvFNXerqZQJ89F7DI6ET7OQo/HJuW6K0aVsKA9hj9/RVb2kQVOrPDQ=="], + "clean-stack": ["clean-stack@2.2.0", "https://registry.npmmirror.com/clean-stack/-/clean-stack-2.2.0.tgz", {}, "sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A=="], + "cli-boxes": ["cli-boxes@4.0.1", "https://registry.npmmirror.com/cli-boxes/-/cli-boxes-4.0.1.tgz", {}, "sha512-5IOn+jcCEHEraYolBPs/sT4BxYCe2nHg374OPiItB1O96KZFseS2gthU4twyYzeDcFew4DaUM/xwc5BQf08JJw=="], "cli-highlight": ["cli-highlight@2.1.11", "https://registry.npmmirror.com/cli-highlight/-/cli-highlight-2.1.11.tgz", { "dependencies": { "chalk": "^4.0.0", "highlight.js": "^10.7.1", "mz": "^2.4.0", "parse5": "^5.1.1", "parse5-htmlparser2-tree-adapter": "^6.0.0", "yargs": "^16.0.0" }, "bin": { "highlight": "bin/highlight" } }, "sha512-9KDcoEVwyUXrjcJNvHD0NFc/hiwe/WPVYIleQh2O1N2Zro5gWJZ/K+3DGn8w8P/F6FxOgzyC5bxDyHIgCSPhGg=="], @@ -1558,6 +1585,8 @@ "mcp-chrome-bridge": ["mcp-chrome-bridge@1.0.31", "https://registry.npmmirror.com/mcp-chrome-bridge/-/mcp-chrome-bridge-1.0.31.tgz", { "dependencies": { "@anthropic-ai/claude-agent-sdk": "^0.1.69", "@fastify/cors": "^11.0.1", "@modelcontextprotocol/sdk": "^1.11.0", "@types/node-fetch": "2", "better-sqlite3": "^11.6.0", "chalk": "^5.4.1", "chrome-devtools-frontend": "^1.0.1299282", "chrome-mcp-shared": "1.0.2", "commander": "^13.1.0", "drizzle-orm": "^0.38.2", "fastify": "^5.3.2", "is-admin": "^4.0.0", "node-fetch": "2", "pino": "^9.6.0", "uuid": "^11.1.0" }, "bin": { "mcp-chrome-bridge": "dist/cli.js", "chrome-mcp-bridge": "dist/cli.js", "mcp-chrome-stdio": "dist/mcp/mcp-server-stdio.js" } }, "sha512-bcl4POvdXhf9PX0+EIJ9guR+n6oVPNfbSBnhwf0LVg9MWwMJYpdvLszUT77NG2gBJCJF+JV/+CNz5xHnt9GwFg=="], + "mcp-client": ["mcp-client@workspace:packages/mcp-client"], + "media-typer": ["media-typer@1.1.0", "https://registry.npmmirror.com/media-typer/-/media-typer-1.1.0.tgz", {}, "sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw=="], "merge-descriptors": ["merge-descriptors@2.0.0", "https://registry.npmmirror.com/merge-descriptors/-/merge-descriptors-2.0.0.tgz", {}, "sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g=="], @@ -2282,6 +2311,10 @@ "@typespec/ts-http-runtime/https-proxy-agent": ["https-proxy-agent@7.0.6", "https://registry.npmmirror.com/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", { "dependencies": { "agent-base": "^7.1.2", "debug": "4" } }, "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw=="], + "agent-tools/zod": ["zod@3.25.76", "https://registry.npmmirror.com/zod/-/zod-3.25.76.tgz", {}, "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ=="], + + "aggregate-error/indent-string": ["indent-string@4.0.0", "https://registry.npmmirror.com/indent-string/-/indent-string-4.0.0.tgz", {}, "sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg=="], + "ansi-escapes/type-fest": ["type-fest@0.21.3", "https://registry.npmmirror.com/type-fest/-/type-fest-0.21.3.tgz", {}, "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w=="], "chrome-mcp-shared/zod": ["zod@3.25.76", "https://registry.npmmirror.com/zod/-/zod-3.25.76.tgz", {}, "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ=="], @@ -2318,6 +2351,12 @@ "mcp-chrome-bridge/@anthropic-ai/claude-agent-sdk": ["@anthropic-ai/claude-agent-sdk@0.1.77", "https://registry.npmmirror.com/@anthropic-ai/claude-agent-sdk/-/claude-agent-sdk-0.1.77.tgz", { "optionalDependencies": { "@img/sharp-darwin-arm64": "^0.33.5", "@img/sharp-darwin-x64": "^0.33.5", "@img/sharp-linux-arm": "^0.33.5", "@img/sharp-linux-arm64": "^0.33.5", "@img/sharp-linux-x64": "^0.33.5", "@img/sharp-linuxmusl-arm64": "^0.33.5", "@img/sharp-linuxmusl-x64": "^0.33.5", "@img/sharp-win32-x64": "^0.33.5" }, "peerDependencies": { "zod": "^3.25.0 || ^4.0.0" } }, "sha512-ZEjWQtkoB2MEY6K16DWMmF+8OhywAynH0m08V265cerbZ8xPD/2Ng2jPzbbO40mPeFSsMDJboShL+a3aObP0Jg=="], + "mcp-client/lru-cache": ["lru-cache@10.4.3", "https://registry.npmmirror.com/lru-cache/-/lru-cache-10.4.3.tgz", {}, "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="], + + "mcp-client/p-map": ["p-map@4.0.0", "https://registry.npmmirror.com/p-map/-/p-map-4.0.0.tgz", { "dependencies": { "aggregate-error": "^3.0.0" } }, "sha512-/bjOqmgETBYB5BoEeGVea8dmvHb2m9GLy1E9W43yeyfP6QQCZGFNa+XRceJEuDB6zqr+gKpIAmlLebMpykw/MQ=="], + + "mcp-client/zod": ["zod@3.25.76", "https://registry.npmmirror.com/zod/-/zod-3.25.76.tgz", {}, "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ=="], + "micromatch/picomatch": ["picomatch@2.3.2", "https://registry.npmmirror.com/picomatch/-/picomatch-2.3.2.tgz", {}, "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA=="], "minipass-flush/minipass": ["minipass@3.3.6", "https://registry.npmmirror.com/minipass/-/minipass-3.3.6.tgz", { "dependencies": { "yallist": "^4.0.0" } }, "sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw=="], diff --git a/docs/extensibility/mcp-configuration.mdx b/docs/extensibility/mcp-configuration.mdx new file mode 100644 index 000000000..c696096f9 --- /dev/null +++ b/docs/extensibility/mcp-configuration.mdx @@ -0,0 +1,346 @@ +--- +title: "MCP 配置 - 多来源合并、作用域与策略管控" +description: "详细说明 Claude Code MCP 配置的来源层次、合并优先级、传输类型、企业策略管控、插件集成和保留名称机制。" +keywords: ["MCP", "配置", "settings.json", ".mcp.json", "企业策略", "插件"] +--- + +## 配置来源与作用域 + +Claude Code 的 MCP 配置来自多个来源,每个来源对应一个 `scope`(作用域)。配置按优先级合并,高优先级来源的同名配置覆盖低优先级。 + +### 来源列表 + +| 来源 | Scope | 文件/接口 | 说明 | +|------|-------|----------|------| +| 企业管控 | `enterprise` | 系统管理路径 `managed-mcp.json` | **排他模式**:存在时忽略所有其他来源 | +| 本地项目 | `local` | `/.claude/settings.local.json` | 项目级私有配置(不提交到 VCS) | +| 项目配置 | `project` | `/.mcp.json` | 项目级共享配置(可提交到 VCS) | +| 用户全局 | `user` | `~/.claude/settings.json` | 用户级配置,所有项目共享 | +| 插件 | `dynamic` | 插件 manifest 中 `.mcp.json` / `.mcpb` | 插件提供的 MCP 服务器 | +| claude.ai | `claudeai` | 通过 API 获取 | claude.ai 网页端配置的连接器 | +| 内置动态 | `dynamic` | 代码中注册 | Computer Use / Chrome 等内置服务器 | +| IDE SDK | `sdk` | IDE 传入 | VS Code / JetBrains 嵌入模式 | + +### 合并优先级(从低到高) + +``` +claude.ai 连接器 ← 最低优先级 + ↓ 去重 +插件服务器 + ↓ 去重 +用户全局配置 + ↓ +项目配置(.mcp.json) ← 需要用户审批 + ↓ +本地项目配置 + ↓ +动态配置(内置 MCP) ← 最高优先级 +``` + +`Object.assign({}, dedupedPluginServers, userServers, approvedProjectServers, localServers)` 实现合并——后出现的同名键覆盖前者。 + +## 企业管控模式 + +当 `managed-mcp.json` 文件存在时,进入 **排他模式**: + +```typescript +// config.ts:1084 +if (doesEnterpriseMcpConfigExist()) { + // 只返回企业配置,忽略所有用户/项目/插件/claude.ai 配置 + return { servers: filtered, errors: [] } +} +``` + +特性: +- 路径由系统管理决定(`getManagedFilePath()` + `managed-mcp.json`) +- 覆盖所有用户级、项目级、插件和 claude.ai 配置 +- 仍然应用策略过滤(allowlist/denylist) +- 无法通过 CLI 添加新服务器(`addMcpConfig` 会拒绝) + +## 传输类型与配置 Schema + +### stdio(默认) + +启动子进程,通过 stdin/stdout JSON-RPC 通信。 + +```json +{ + "my-server": { + "command": "npx", + "args": ["-y", "@my-org/mcp-server"], + "env": { "API_KEY": "..." } + } +} +``` + +`type` 字段可省略(默认为 `stdio`)。环境变量通过 `env` 传递给子进程,会与当前进程环境合并。 + +**Windows 注意**:使用 `npx` 需要包装为 `cmd /c npx`,否则会报错。 + +### SSE(Server-Sent Events) + +通过 HTTP SSE 连接远程 MCP 服务器。 + +```json +{ + "my-remote": { + "type": "sse", + "url": "https://mcp.example.com/sse", + "headers": { "Authorization": "Bearer ..." }, + "oauth": { + "clientId": "...", + "authServerMetadataUrl": "https://auth.example.com/.well-known/oauth-authorization-server" + } + } +} +``` + +支持 OAuth 认证流程。认证失败时进入 `needs-auth` 状态,15 分钟 TTL 缓存避免重复提示。 + +### HTTP(Streamable HTTP) + +HTTP 流式传输。 + +```json +{ + "my-http": { + "type": "http", + "url": "https://mcp.example.com/mcp", + "headers": { "X-API-Key": "..." } + } +} +``` + +支持与 SSE 相同的 OAuth 配置。 + +### WebSocket + +```json +{ + "my-ws": { + "type": "ws", + "url": "wss://mcp.example.com/ws" + } +} +``` + +### IDE 专用类型(内部) + +`sse-ide` 和 `ws-ide` 是 IDE 扩展专用类型,不由用户直接配置。 + +- `sse-ide`:使用 lockfile token 认证 +- `ws-ide`:使用 `X-Claude-Code-Ide-Authorization` header + +### SDK 类型(内部) + +`type: "sdk"` 由 IDE 嵌入模式传入,不经过保留名称检查和企业管控排他限制。 + +### claude.ai 代理类型(内部) + +`type: "claudeai-proxy"` 由 claude.ai 网页端配置的连接器使用,通过 OAuth bearer token 认证并支持 401 重试。 + +## 配置操作 + +### 添加 MCP 服务器 + +通过 CLI 命令 `claude mcp add` 或 API 调用 `addMcpConfig()`: + +```bash +# 添加到用户配置 +claude mcp add my-server -s user -- npx @my-org/mcp-server + +# 添加到项目配置 +claude mcp add my-server -s project -- npx @my-org/mcp-server + +# 添加 HTTP 类型 +claude mcp add my-remote -s user -t http -u https://mcp.example.com/mcp +``` + +添加时的验证流程: + +1. **名称校验**:只允许字母、数字、连字符和下划线 +2. **保留名检查**:`claude-in-chrome` 和 `computer-use` 被保留 +3. **企业管控检查**:企业模式下拒绝添加 +4. **Schema 验证**:Zod 校验配置格式 +5. **策略检查**:denylist 拒绝、allowlist 验证 + +### 移除 MCP 服务器 + +```bash +claude mcp remove my-server -s user +``` + +### 列出 MCP 服务器 + +```bash +claude mcp list +``` + +## 项目配置审批 + +`.mcp.json` 中的项目配置需要用户显式审批才能生效: + +```typescript +// config.ts:1166 +const approvedProjectServers: Record = {} +for (const [name, config] of Object.entries(projectServers)) { + if (getProjectMcpServerStatus(name) === 'approved') { + approvedProjectServers[name] = config + } +} +``` + +首次打开项目时,Claude Code 会提示用户审批 `.mcp.json` 中的每个服务器。审批状态持久化在本地配置中。 + +## 插件 MCP 集成 + +插件通过 manifest 中的 `.mcp.json` 或 `.mcpb` 文件声明 MCP 服务器: + +```typescript +// 插件 MCP 加载流程 +const pluginResult = await loadAllPluginsCacheOnly() +const pluginServerResults = await Promise.all( + pluginResult.enabled.map(plugin => getPluginMcpServers(plugin, mcpErrors)) +) +``` + +### 插件命名空间 + +插件 MCP 服务器名格式为 `plugin::`,不会与手动配置的名称冲突。 + +### 去重机制 + +插件服务器通过内容签名去重(`dedupPluginMcpServers`): + +- **stdio 类型**:签名 = `stdio:` + JSON.stringify([command, ...args]) +- **URL 类型**:签名 = `url:` + 原始 URL(unwrap CCR proxy URL) +- **sdk 类型**:签名为 null,不去重 + +去重规则: +1. 手动配置优先于插件配置 +2. 先加载的插件优先于后加载的 +3. 被抑制的插件服务器在 `/plugin` UI 中显示提示 + +### claude.ai 连接器去重 + +claude.ai 连接器使用相同的内容签名机制去重(`dedupClaudeAiMcpServers`): +- 仅启用的手动配置参与去重(禁用的手动配置不应抑制连接器) +- 连接器名格式为 `claude.ai ` + +## 策略管控 + +### Allowlist / Denylist + +企业策略通过 allowlist 和 denylist 控制可用的 MCP 服务器: + +```typescript +// config.ts:1243 - 最终策略过滤 +for (const [name, serverConfig] of Object.entries(configs)) { + if (!isMcpServerAllowedByPolicy(name, serverConfig)) { + continue // 跳过策略禁止的服务器 + } + filtered[name] = serverConfig +} +``` + +策略检查考虑: +- 服务器名称匹配 +- stdio 类型的 command + args 匹配 +- URL 类型的 URL 模式匹配(支持通配符) + +### 插件专用模式 + +`isRestrictedToPluginOnly('mcp')` 启用时,只允许插件提供的 MCP 服务器——用户/项目级配置被忽略。 + +## 环境变量展开 + +MCP 配置中的环境变量支持 `$VAR` 和 `${VAR}` 语法展开: + +```json +{ + "my-server": { + "command": "npx", + "args": ["@my-org/mcp-server"], + "env": { + "API_KEY": "$MY_API_KEY", + "DB_URL": "${DATABASE_URL}" + } + } +} +``` + +展开时缺失的变量会生成警告信息,但不阻止配置加载。 + +## 内置 MCP 动态注册 + +内置 MCP 服务器在 `main.tsx` 启动流程中动态注入配置: + +### Computer Use MCP + +```typescript +// src/utils/computerUse/setup.ts +export function setupComputerUseMCP(): { + mcpConfig: Record + allowedTools: string[] +} { + return { + mcpConfig: { + "computer-use": { + type: "stdio", + command: process.execPath, + args: ["--computer-use-mcp"], + scope: "dynamic", + } + }, + allowedTools: ["mcp__computer-use__screenshot", ...] + } +} +``` + +启用条件: +- Feature flag `CHICAGO_MCP` 开启 +- `getPlatform() !== "unknown"`(macOS/Windows/Linux) +- 非非交互式会话 +- GrowthBook gate `getChicagoEnabled()` 返回 true + +### Claude in Chrome MCP + +```typescript +// 类似 Computer Use,在 main.tsx 中注册 +const { mcpConfig, allowedTools, systemPrompt } = setupClaudeInChrome() +dynamicMcpConfig = { ...dynamicMcpConfig, ...mcpConfig } +``` + +启用条件: +- `--chrome` 参数或 `claudeInChromeDefaultEnabled` 配置 +- Chrome 扩展已安装 + +### VSCode SDK MCP + +IDE 嵌入模式通过初始化消息传入 `type:'sdk'` 的配置,由 `setupVscodeSdkMcp()` 设置双向通知。 + +## 保留名称 + +以下 MCP 服务器名称被保留,用户无法手动配置同名服务器: + +| 名称 | 用途 | 检查条件 | +|------|------|---------| +| `claude-in-chrome` | Chrome 浏览器控制 | 始终检查 | +| `computer-use` | 桌面自动化 | `CHICAGO_MCP` feature flag 开启时检查 | +| `claude-vscode` | VSCode IDE 集成 | 由 SDK 传入,不经过名称检查 | + +保留名检查在两个位置: +1. `addMcpConfig()`(`config.ts:636-648`)— 运行时拒绝 +2. `main.tsx` 启动检查(`main.tsx:2351-2368`)— 启动时退出 + +## 关键源文件索引 + +| 文件 | 职责 | +|------|------| +| `src/services/mcp/config.ts` | 配置管理核心:合并、去重、策略、添加/删除 | +| `src/services/mcp/types.ts` | Zod Schema 定义、类型声明 | +| `src/services/mcp/client.ts` | 连接管理、传输层选择 | +| `src/utils/plugins/mcpPluginIntegration.ts` | 插件 MCP 配置加载 | +| `src/utils/computerUse/setup.ts` | Computer Use 动态注册 | +| `src/utils/claudeInChrome/common.ts` | Chrome MCP 保留名与工具名 | +| `src/services/mcp/vscodeSdkMcp.ts` | VSCode SDK 双向通知 | diff --git a/docs/extensibility/mcp-protocol.mdx b/docs/extensibility/mcp-protocol.mdx index 2b8d26719..cbf106463 100644 --- a/docs/extensibility/mcp-protocol.mdx +++ b/docs/extensibility/mcp-protocol.mdx @@ -1,25 +1,32 @@ --- title: "MCP 协议 - 连接管理、工具发现与执行链路" -description: "从源码角度解析 Claude Code 的 MCP 集成:7 种传输层实现、connectToServer 的 memoize 缓存、工具发现的 LRU 策略、认证状态机、以及 MCP 工具如何进入权限检查链路。" -keywords: ["MCP", "Model Context Protocol", "工具扩展", "MCP 客户端", "工具发现"] +description: "从源码角度解析 Claude Code 的 MCP 集成:内置 MCP 与外部 MCP 的区别、7 种传输层实现、connectToServer 的 memoize 缓存、工具发现的 LRU 策略、认证状态机、以及 MCP 工具如何进入权限检查链路。" +keywords: ["MCP", "Model Context Protocol", "工具扩展", "MCP 客户端", "工具发现", "内置 MCP", "外部 MCP"] --- -{/* 本章目标:从源码角度揭示 MCP 客户端的连接管理、工具发现协议和执行链路 */} +{/* 本章目标:从源码角度揭示 MCP 客户端的两种运行模式(内置/外部)、连接管理、工具发现协议和执行链路 */} ## 架构总览:从配置到可用工具 ``` -settings.json: { mcpServers: { "my-db": { command: "npx", args: [...] } } } +配置层(多来源合并) + ├── settings.json: { mcpServers: { "my-db": { command: "npx", args: [...] } } } ← 外部 + ├── .mcp.json: 项目级 MCP 配置 ← 外部 + ├── 插件 manifest (.mcp.json / .mcpb) ← 外部(插件) + ├── claude.ai connectors ← 外部(远程) + ├── enterprise managed-mcp.json ← 外部(企业管控) + ├── setupComputerUseMCP() / setupClaudeInChrome() ← 内置(动态注册) + └── SDK 传入 (type:'sdk') ← 内置(IDE 嵌入) ↓ -getAllMcpConfigs() ← enterprise 独占或合并 user/project/local + plugin + claude.ai +getAllMcpConfigs() ← enterprise 独占 或 合并 user/project/local + plugin + claude.ai ↓ useManageMCPConnections() ← React Hook 管理连接生命周期 ↓ connectToServer(name, config) ← memoize 缓存(lodash memoize) - ├── 创建 Transport(stdio/sse/http/...) - ├── new Client() ← @modelcontextprotocol/sdk - ├── client.connect(transport) ← 超时控制(MCP_TIMEOUT, 默认 30s) - └── 返回 MCPServerConnection ← { connected | failed | needs-auth | pending } + ├── 判断:内置 MCP → InProcessTransport(同进程) + ├── 判断:外部 stdio → StdioClientTransport(子进程) + ├── 判断:远程 SSE/HTTP/WS → 网络传输 + └── 返回 MCPServerConnection ← { connected | failed | needs-auth | pending | disabled } ↓ fetchToolsForClient(client) ← LRU(20) 缓存 ├── client.request({ method: 'tools/list' }) @@ -30,19 +37,208 @@ assembleToolPool() ← 合并内置工具 + MCP 工具 工具名格式: mcp____ ← buildMcpToolName() ``` +## 两种 MCP 模式:内置 vs 外部 + +Claude Code 的 MCP 实现区分 **内置 MCP 服务器** 和 **外部 MCP 服务器**。两者使用相同的客户端协议和工具发现机制,但在连接方式、生命周期管理和配置来源上完全不同。 + +### 内置 MCP 服务器 + +内置 MCP 服务器由 Claude Code 自身提供,无需用户手动配置。它们在启动时自动注册为 `dynamic` scope 的配置,并在同进程内运行。 + +| 服务器 | 名称 | 包路径 | Feature Flag | 启用方式 | +|--------|------|--------|-------------|---------| +| Computer Use | `computer-use` | `@ant/computer-use-mcp` | `CHICAGO_MCP` | GrowthBook gate + macOS + interactive | +| Claude in Chrome | `claude-in-chrome` | `@ant/claude-for-chrome-mcp` | — | `--chrome` 参数或 `claudeInChromeDefaultEnabled` 配置 | +| VSCode SDK | `claude-vscode` | — | — | IDE 嵌入模式 (type:`sdk`) | + +#### InProcessTransport:零开销同进程通信 + +内置服务器通过 `InProcessTransport`(`src/services/mcp/InProcessTransport.ts`)运行,**不启动子进程**: + +```typescript +// 创建一对 linked transport —— 消息在两端之间直接传递 +const [clientTransport, serverTransport] = createLinkedTransportPair() + +// server 端连接到 serverTransport +inProcessServer = createComputerUseMcpServerForCli() +await inProcessServer.connect(serverTransport) + +// client 端使用 clientTransport(与外部 MCP 的 Client 相同接口) +transport = clientTransport +``` + +`InProcessTransport` 的核心设计: +- `send()` 通过 `queueMicrotask()` 异步投递消息到对端,避免同步请求/响应的栈深度问题 +- `close()` 双向关闭,任一端关闭都会触发两端的 `onclose` 回调 +- 无网络开销、无 IPC 序列化、无进程启动时间 + +#### 动态注册流程 + +内置服务器在 `main.tsx` 的启动流程中注册,注入 `dynamicMcpConfig`: + +```typescript +// main.tsx: Computer Use MCP 动态注册 +if (feature("CHICAGO_MCP") && getPlatform() !== "unknown" && !getIsNonInteractiveSession()) { + const { getChicagoEnabled } = await import("src/utils/computerUse/gates.js") + if (getChicagoEnabled()) { + const { setupComputerUseMCP } = await import("src/utils/computerUse/setup.js") + const { mcpConfig, allowedTools } = setupComputerUseMCP() + dynamicMcpConfig = { ...dynamicMcpConfig, ...mcpConfig } + allowedTools.push(...cuTools) + } +} +``` + +`setupComputerUseMCP()` 返回的配置(`src/utils/computerUse/setup.ts`): + +```typescript +{ + "computer-use": { + type: "stdio", // 类型标记为 stdio(但 client.ts 会拦截为 InProcessTransport) + command: process.execPath, + args: ["--computer-use-mcp"], + scope: "dynamic", // 动态作用域,不持久化 + } +} +``` + +#### 连接时拦截 + +`connectToServer()` 在 `client.ts:906-944` 中根据服务器名拦截内置服务器: + +```typescript +// Chrome MCP — 在 process 内运行,避免 ~325MB 子进程 +if (isClaudeInChromeMCPServer(name)) { + const { createChromeContext } = await import('../../utils/claudeInChrome/mcpServer.js') + const { createClaudeForChromeMcpServer } = await import('@ant/claude-for-chrome-mcp') + const { createLinkedTransportPair } = await import('./InProcessTransport.js') + const context = createChromeContext(config.env) + inProcessServer = createClaudeForChromeMcpServer(context) + const [clientTransport, serverTransport] = createLinkedTransportPair() + await inProcessServer.connect(serverTransport) + transport = clientTransport +} + +// Computer Use MCP — 同理 +if (feature('CHICAGO_MCP') && isComputerUseMCPServer(name)) { + const { createComputerUseMcpServerForCli } = await import('../../utils/computerUse/mcpServer.js') + const { createLinkedTransportPair } = await import('./InProcessTransport.js') + inProcessServer = await createComputerUseMcpServerForCli() + const [clientTransport, serverTransport] = createLinkedTransportPair() + await inProcessServer.connect(serverTransport) + transport = clientTransport +} +``` + +#### 保留名称保护 + +内置服务器的名称被保留,用户无法手动添加同名配置(`config.ts:636-648`): + +```typescript +// 添加 MCP 配置时检查保留名 +if (isClaudeInChromeMCPServer(name)) { + throw new Error(`Cannot add MCP server "${name}": this name is reserved.`) +} +if (feature('CHICAGO_MCP') && isComputerUseMCPServer(name)) { + throw new Error(`Cannot add MCP server "${name}": this name is reserved.`) +} +``` + +启动时也有全局检查(`main.tsx:2351-2368`):如果用户配置中包含保留名(非 `type:'sdk'`),直接 `process.exit(1)`。 + +#### VSCode SDK MCP + +VSCode SDK MCP 是特殊的内置模式。IDE(如 VS Code、JetBrains)通过嵌入方式启动 Claude Code,并传入 `type:'sdk'` 的 MCP 配置。这类配置: +- 不经过保留名称检查(IDE 可以使用任意名称) +- 不参与 enterprise MCP 的排他控制 +- 通过 VSCode SDK transport 连接 +- 支持双向通知(如 `file_updated`、`experiment_gates`) + +```typescript +// src/services/mcp/vscodeSdkMcp.ts +export function setupVscodeSdkMcp(sdkClients: MCPServerConnection[]): void { + const client = sdkClients.find(client => client.name === 'claude-vscode') + if (client && client.type === 'connected') { + // 注册 log_event 通知处理器 + client.client.setNotificationHandler(LogEventNotificationSchema(), ...) + // 发送实验门控到 VSCode + client.client.notification({ method: 'experiment_gates', params: { gates } }) + } +} +``` + +### 外部 MCP 服务器 + +外部 MCP 服务器由用户在配置文件中声明,通过子进程或网络连接运行。 + +#### 配置来源 + +| 来源 | Scope | 文件位置 | 优先级 | +|------|-------|---------|--------| +| 项目配置 | `project` | `/.mcp.json` | 最高(同名覆盖) | +| 本地配置 | `local` | `/.claude/settings.local.json` | 高 | +| 用户配置 | `user` | `~/.claude/settings.json` | 中 | +| 插件 | `dynamic` | 插件 manifest 中 `.mcp.json` | 中 | +| claude.ai | `claudeai` | 通过 API 获取 | 低 | +| 企业管控 | `enterprise` | 系统管理路径 `managed-mcp.json` | 排他(存在时覆盖全部) | + +#### 配置示例 + +```json +// settings.json / .mcp.json 中的 MCP 配置 +{ + "mcpServers": { + // stdio 类型 — 启动子进程 + "my-database": { + "command": "npx", + "args": ["@my-org/db-mcp-server"], + "env": { "DB_URL": "postgres://..." } + }, + + // HTTP 流类型 — 远程服务器 + "remote-api": { + "type": "http", + "url": "https://api.example.com/mcp" + }, + + // SSE 类型 — Server-Sent Events + "realtime-feed": { + "type": "sse", + "url": "https://feed.example.com/sse" + }, + + // WebSocket 类型 + "ws-service": { + "type": "ws", + "url": "wss://ws.example.com/mcp" + } + } +} +``` + +#### 配置合并与去重 + +`getAllMcpConfigs()`(`config.ts`)按优先级合并多个来源的配置: + +1. 企业管控配置存在时,**独占返回**(忽略所有其他来源) +2. 否则合并:user → project → local → plugin → claude.ai +3. 插件与手动配置去重:通过 `getMcpServerSignature()` 生成内容签名(基于 command/args/url),插件配置被同名手动配置抑制 +4. `addScopeToServers()` 为每个配置项标注来源 scope + ## 7 种传输层实现 `connectToServer()`(`client.ts:596-1643`)根据 `config.type` 分发到不同的 Transport 实现: | 传输类型 | Transport 类 | 适用场景 | 认证方式 | |----------|-------------|---------|---------| -| `stdio`(默认) | `StdioClientTransport` | 本地子进程 | 无 | +| `stdio`(默认) | `StdioClientTransport` | 外部本地子进程 | 无 | | `sse` | `SSEClientTransport` | 远程 SSE 服务 | `ClaudeAuthProvider` + OAuth | | `http` | `StreamableHTTPClientTransport` | HTTP 流 | `ClaudeAuthProvider` + OAuth | | `sse-ide` | `SSEClientTransport` | IDE 集成 | lockfile token | | `ws-ide` | `WebSocketTransport` | IDE WebSocket | `X-Claude-Code-Ide-Authorization` | | `ws` | `WebSocketTransport` | WebSocket 服务 | session ingress token | | `claudeai-proxy` | `StreamableHTTPClientTransport` | claude.ai 代理 | OAuth bearer + 401 重试 | +| InProcess(内置) | `InProcessTransport` | Computer Use / Chrome | 无(同进程) | ### stdio 传输的进程管理 @@ -112,9 +308,17 @@ timer.unref?.() // 不阻止进程退出 ```typescript const fullyQualifiedName = buildMcpToolName(client.name, tool.name) -// 结果: "mcp__my-db__query" +// 结果: "mcp__my-database__query" ``` +### 内置 MCP 的工具发现 + +内置 MCP 服务器虽然使用 InProcessTransport,但工具发现流程与外部服务器完全一致: + +- **Computer Use**:`createComputerUseMcpServerForCli()` 在 `src/utils/computerUse/mcpServer.ts` 中构建 MCP Server 对象,注册 `ListToolsRequestSchema` handler。工具描述包含平台特定的已安装应用列表(1s 超时枚举)。 +- **Claude in Chrome**:`createClaudeForChromeMcpServer()` 在 `@ant/claude-for-chrome-mcp` 包中构建 Server,提供 17+ 个浏览器控制工具。 +- **VSCode SDK**:由 IDE 端提供工具列表,通过 SDK transport 传递。 + ### 工具描述截断 MCP 工具描述上限 2048 字符(`MAX_MCP_DESCRIPTION_LENGTH`)。OpenAPI 生成的 MCP 服务器曾观察到 15-60KB 的描述文档。 @@ -134,6 +338,8 @@ MCP 工具描述上限 2048 字符(`MAX_MCP_DESCRIPTION_LENGTH`)。OpenAPI MCP 工具默认返回 `{ behavior: 'passthrough' }`(`client.ts:1816-1834`),意味着它们始终进入权限确认流程。工具名使用 `mcp__` 前缀精确匹配权限规则。 +内置 MCP 服务器的工具通过 `allowedTools` 列表自动授权——在 `main.tsx` 启动时加入,绕过普通权限提示。例如 Computer Use 工具的 `request_access` 自行处理会话级审批。 + ## MCP 工具的执行链路 ``` @@ -169,23 +375,33 @@ getRemoteMcpServerConnectionBatchSize() // 默认 20 本地 MCP 服务器(stdio)是重量级的子进程,默认限制 3 个并发连接。远程服务器是轻量级 HTTP 请求,允许 20 个并发。 -## 实际配置示例 - -```json -// settings.json 中的 MCP 配置 -{ - "mcpServers": { - "my-database": { - "command": "npx", - "args": ["@my-org/db-mcp-server"], - "env": { "DB_URL": "postgres://..." } - }, - "remote-api": { - "type": "http", - "url": "https://api.example.com/mcp" - } - } -} -``` - -配置后,AI 的工具列表中会出现 `mcp__my-database__query` 和 `mcp__remote-api__*` 工具——与内置工具使用相同的权限检查链路和 UI 渲染。 +## 内置 vs 外部 MCP 对比总结 + +| 维度 | 内置 MCP | 外部 MCP | +|------|---------|---------| +| **Transport** | `InProcessTransport`(同进程) | stdio / SSE / HTTP / WebSocket | +| **配置来源** | `setupComputerUseMCP()` / `setupClaudeInChrome()` 等动态注册 | settings.json / .mcp.json / 插件 / claude.ai | +| **Scope** | `dynamic` | `user` / `project` / `local` / `enterprise` / `claudeai` | +| **进程模型** | 同进程,零开销 | 子进程(stdio)或网络连接 | +| **名称保护** | 保留名,用户不可添加同名 | 自由命名(字母数字 + `-_`) | +| **生命周期** | 随 CLI 启停 | 连接缓存 + 按需重连 | +| **权限** | `allowedTools` 自动授权 | `passthrough` 进入权限确认 | +| **Feature Flag** | `CHICAGO_MCP`(Computer Use)等 | 无(始终可用) | +| **工具发现** | 与外部相同(MCP 协议) | 标准 MCP `tools/list` | +| **清理** | `inProcessServer.close()` | 信号升级策略 SIGINT→SIGTERM→SIGKILL | + +## 关键源文件索引 + +| 文件 | 职责 | +|------|------| +| `src/services/mcp/client.ts` | 核心客户端:connectToServer、fetchToolsForClient、MCPTool.call | +| `src/services/mcp/config.ts` | 配置管理:getAllMcpConfigs、addMcpConfig、removeMcpConfig | +| `src/services/mcp/types.ts` | 类型定义:配置 Schema、连接状态类型 | +| `src/services/mcp/InProcessTransport.ts` | 内置 MCP 传输层:linked transport pair | +| `src/services/mcp/vscodeSdkMcp.ts` | VSCode SDK MCP:双向通知、实验门控 | +| `src/services/mcp/useManageMCPConnections.ts` | React Hook:连接生命周期、重连 | +| `src/utils/computerUse/mcpServer.ts` | Computer Use MCP Server 构建 | +| `src/utils/computerUse/setup.ts` | Computer Use 动态注册 | +| `src/utils/claudeInChrome/mcpServer.ts` | Chrome MCP Server 构建 + Bridge 配置 | +| `src/tools/MCPTool/MCPTool.ts` | MCP 工具包装:统一 Tool 接口 | +| `src/entrypoints/mcp.ts` | MCP server 入口(Claude Code 作为 MCP server) | diff --git a/mint.json b/mint.json index 277167e1c..b341d2632 100644 --- a/mint.json +++ b/mint.json @@ -86,6 +86,7 @@ "group": "可扩展性", "pages": [ "docs/extensibility/mcp-protocol", + "docs/extensibility/mcp-configuration", "docs/extensibility/hooks", "docs/extensibility/skills", "docs/extensibility/custom-agents" @@ -177,21 +178,7 @@ ] } ], - "excludes": [ - "docs/test-plans/**", - "docs/testing-spec.md", - "docs/REVISION-PLAN.md", - "docs/feature-exploration-plan.md", - "docs/ultraplan-implementation.md", - "docs/features/feature-flags-audit-complete.md", - "docs/features/feature-flags-codex-review.md", - "docs/features/growthbook-enablement-plan.md", - "docs/features/computer-use-architecture-v2.md", - "docs/features/computer-use-mcp-test-report.md", - "docs/features/computer-use-tools-reference.md", - "docs/features/computer-use-windows-enhancement.md", - "docs/features/lan-pipes-implementation.md" - ], + "excludes": [], "footerSocials": { "github": "https://github.com/anthropics/claude-code" } diff --git a/package.json b/package.json index 7549723d6..58616a8d0 100644 --- a/package.json +++ b/package.json @@ -74,6 +74,8 @@ "@anthropic-ai/sdk": "^0.80.0", "@anthropic-ai/vertex-sdk": "^0.14.4", "@anthropic/ink": "workspace:*", + "agent-tools": "workspace:*", + "mcp-client": "workspace:*", "@aws-sdk/client-bedrock": "^3.1020.0", "@aws-sdk/client-bedrock-runtime": "^3.1020.0", "@aws-sdk/client-sts": "^3.1020.0", diff --git a/packages/agent-tools/package.json b/packages/agent-tools/package.json new file mode 100644 index 000000000..27d06d159 --- /dev/null +++ b/packages/agent-tools/package.json @@ -0,0 +1,11 @@ +{ + "name": "agent-tools", + "version": "1.0.0", + "private": true, + "type": "module", + "main": "./src/index.ts", + "types": "./src/index.ts", + "dependencies": { + "zod": "^3.25.0" + } +} diff --git a/packages/agent-tools/src/__tests__/compat.test.ts b/packages/agent-tools/src/__tests__/compat.test.ts new file mode 100644 index 000000000..5a5885dd9 --- /dev/null +++ b/packages/agent-tools/src/__tests__/compat.test.ts @@ -0,0 +1,34 @@ +import { describe, expect, test } from 'bun:test' +import type { CoreTool, Tool, Tools, AnyObject, ToolResult, ValidationResult, PermissionResult } from 'agent-tools' +import type { Tool as HostTool } from '../../src/Tool.js' + +describe('agent-tools compatibility', () => { + test('CoreTool structural compatibility with host Tool', () => { + // The host's Tool should structurally satisfy CoreTool + // because it has all required fields (name, call, description, etc.) + // This test verifies the type-level compatibility at runtime + const mockHostTool: HostTool = { + name: 'test', + aliases: [], + searchHint: 'test tool', + inputSchema: {} as any, + async call() { return { data: 'ok' } as any }, + async description() { return 'test' }, + async prompt() { return 'test prompt' }, + isConcurrencySafe: () => false, + isEnabled: () => true, + isReadOnly: () => false, + async checkPermissions() { return { behavior: 'allow' as const, updatedInput: {} } }, + toAutoClassifierInput: () => '', + userFacingName: () => 'test', + maxResultSizeChars: 100000, + mapToolResultToToolResultBlockParam: () => ({ type: 'tool_result', tool_use_id: '1', content: 'ok' }), + renderToolUseMessage: () => null, + } + + // This assignment should work if HostTool structurally extends CoreTool + const coreTool: CoreTool = mockHostTool as CoreTool + expect(coreTool.name).toBe('test') + expect(coreTool.isEnabled()).toBe(true) + }) +}) diff --git a/packages/agent-tools/src/__tests__/registry.test.ts b/packages/agent-tools/src/__tests__/registry.test.ts new file mode 100644 index 000000000..c35aa9d1e --- /dev/null +++ b/packages/agent-tools/src/__tests__/registry.test.ts @@ -0,0 +1,63 @@ +import { describe, expect, test } from 'bun:test' +import { findToolByName, toolMatchesName } from '../registry.js' +import type { CoreTool, Tools } from '../types.js' + +describe('toolMatchesName', () => { + test('matches primary name', () => { + expect(toolMatchesName({ name: 'bash' }, 'bash')).toBe(true) + }) + + test('does not match different name', () => { + expect(toolMatchesName({ name: 'bash' }, 'read')).toBe(false) + }) + + test('matches alias', () => { + expect(toolMatchesName({ name: 'bash', aliases: ['shell', 'sh'] }, 'shell')).toBe(true) + expect(toolMatchesName({ name: 'bash', aliases: ['shell', 'sh'] }, 'sh')).toBe(true) + }) + + test('handles empty aliases', () => { + expect(toolMatchesName({ name: 'bash', aliases: [] }, 'bash')).toBe(true) + expect(toolMatchesName({ name: 'bash', aliases: [] }, 'shell')).toBe(false) + }) + + test('handles undefined aliases', () => { + expect(toolMatchesName({ name: 'bash' }, 'bash')).toBe(true) + expect(toolMatchesName({ name: 'bash' }, 'shell')).toBe(false) + }) +}) + +describe('findToolByName', () => { + const tools: Tools = [ + { name: 'bash' } as CoreTool, + { name: 'read', aliases: ['cat'] } as CoreTool, + { name: 'write', aliases: ['edit'] } as CoreTool, + ] + + test('finds tool by primary name', () => { + expect(findToolByName(tools, 'bash')?.name).toBe('bash') + }) + + test('finds tool by alias', () => { + expect(findToolByName(tools, 'cat')?.name).toBe('read') + expect(findToolByName(tools, 'edit')?.name).toBe('write') + }) + + test('returns undefined for unknown name', () => { + expect(findToolByName(tools, 'unknown')).toBeUndefined() + }) + + test('handles empty tools array', () => { + expect(findToolByName([], 'bash')).toBeUndefined() + }) + + test('returns first match for duplicate names', () => { + const dupTools: Tools = [ + { name: 'tool', aliases: ['a'] } as CoreTool, + { name: 'tool', aliases: ['b'] } as CoreTool, + ] + const found = findToolByName(dupTools, 'tool') + expect(found).toBeDefined() + expect(found!.aliases).toContain('a') + }) +}) diff --git a/packages/agent-tools/src/index.ts b/packages/agent-tools/src/index.ts new file mode 100644 index 000000000..3a9ce1334 --- /dev/null +++ b/packages/agent-tools/src/index.ts @@ -0,0 +1,18 @@ +// agent-tools — Tool interface definitions and registry utilities +// Pure types + pure functions, zero runtime dependencies + +export type { + AnyObject, + ToolInputJSONSchema, + ToolProgressData, + ToolProgress, + ToolCallProgress, + ToolResult, + ValidationResult, + PermissionResult, + CoreTool, + Tool, + Tools, +} from './types.js' + +export { findToolByName, toolMatchesName } from './registry.js' diff --git a/packages/agent-tools/src/registry.ts b/packages/agent-tools/src/registry.ts new file mode 100644 index 000000000..e1038bc8c --- /dev/null +++ b/packages/agent-tools/src/registry.ts @@ -0,0 +1,21 @@ +import type { CoreTool, Tools } from './types.js' + +/** + * Checks if a tool matches the given name (primary name or alias). + */ +export function toolMatchesName( + tool: { name: string; aliases?: string[] }, + name: string, +): boolean { + return tool.name === name || (tool.aliases?.includes(name) ?? false) +} + +/** + * Finds a tool by name or alias from a list of tools. + */ +export function findToolByName( + tools: Tools, + name: string, +): CoreTool | undefined { + return tools.find(t => toolMatchesName(t, name)) +} diff --git a/packages/agent-tools/src/types.ts b/packages/agent-tools/src/types.ts new file mode 100644 index 000000000..611be167e --- /dev/null +++ b/packages/agent-tools/src/types.ts @@ -0,0 +1,221 @@ +// agent-tools — Core Tool interface definitions +// Protocol-level types, independent of any host framework + +import type { z } from 'zod/v4' + +// ============================================================================ +// Schema types +// ============================================================================ + +/** + * Zod schema type for any object with string keys. + * Used as the Input generic constraint for Tool. + */ +export type AnyObject = z.ZodType<{ [key: string]: unknown }> + +/** + * JSON Schema format for MCP tool input schemas. + * MCP servers provide this directly instead of Zod schemas. + */ +export type ToolInputJSONSchema = { + [x: string]: unknown + type: 'object' + properties?: { + [x: string]: unknown + } +} + +// ============================================================================ +// Progress types +// ============================================================================ + +/** + * Progress data from a running tool. Host defines concrete subtypes. + * Typed as `any` at the protocol level — the host assigns real shapes. + */ +export type ToolProgressData = any + +/** + * A progress event from a tool execution. + */ +export type ToolProgress

= { + toolUseID: string + data: P +} + +/** + * Callback for receiving progress updates during tool execution. + */ +export type ToolCallProgress

= ( + progress: ToolProgress

, +) => void + +// ============================================================================ +// Result types +// ============================================================================ + +/** + * Result returned by a tool's call() method. + * @template T - The output data type + * @template Message - The message type (host-specific, defaults to unknown) + */ +export type ToolResult = { + data: T + newMessages?: Message[] + contextModifier?: (context: any) => any + /** MCP protocol metadata (structuredContent, _meta) */ + mcpMeta?: { + _meta?: Record + structuredContent?: Record + } +} + +// ============================================================================ +// Validation & Permission types +// ============================================================================ + +/** + * Result of tool input validation. + */ +export type ValidationResult = + | { result: true } + | { result: false; message: string; errorCode: number } + +/** + * Result of a permission check for a tool invocation. + */ +export type PermissionResult = + | { behavior: 'allow'; updatedInput: Record } + | { behavior: 'deny'; message: string } + | { behavior: 'passthrough' } + +// ============================================================================ +// Core Tool interface +// ============================================================================ + +/** + * The host-agnostic core Tool interface. + * + * This defines the protocol-level contract for any tool — independent of + * React rendering, specific context types, or host infrastructure. + * + * The host (Claude Code) extends this with render methods, richer context + * types, and other host-specific features. Host tools structurally satisfy + * this interface because they implement all required fields. + * + * @template Input - Zod schema type for tool input + * @template Output - Tool output data type + * @template P - Tool progress data type + * @template Context - Tool execution context type (host-specific) + */ +export interface CoreTool< + Input extends AnyObject = AnyObject, + Output = unknown, + P extends ToolProgressData = ToolProgressData, + Context = unknown, +> { + // ── Identity ── + readonly name: string + aliases?: string[] + searchHint?: string + + // ── Schema ── + readonly inputSchema: Input + readonly inputJSONSchema?: ToolInputJSONSchema + outputSchema?: z.ZodType + + // ── Execution ── + call( + args: z.infer, + context: Context, + canUseTool: (...args: any[]) => Promise, + parentMessage: any, + onProgress?: ToolCallProgress

, + ): Promise> + + // ── Description ── + description( + input: z.infer, + options: { + isNonInteractiveSession: boolean + toolPermissionContext: any + tools: readonly CoreTool[] + }, + ): Promise + + prompt(options: { + getToolPermissionContext: () => Promise + tools: readonly CoreTool[] + agents: any[] + allowedAgentTypes?: string[] + }): Promise + + // ── Behavioral properties ── + isConcurrencySafe(input: z.infer): boolean + isEnabled(): boolean + isReadOnly(input: z.infer): boolean + isDestructive?(input: z.infer): boolean + isOpenWorld?(input: z.infer): boolean + interruptBehavior?(): 'cancel' | 'block' + requiresUserInteraction?(): boolean + + // ── MCP markers ── + isMcp?: boolean + isLsp?: boolean + readonly shouldDefer?: boolean + readonly alwaysLoad?: boolean + mcpInfo?: { serverName: string; toolName: string } + + // ── Permissions ── + validateInput?( + input: z.infer, + context: Context, + ): Promise + + checkPermissions( + input: z.infer, + context: Context, + ): Promise + + // ── Utility ── + inputsEquivalent?(a: z.infer, b: z.infer): boolean + getPath?(input: z.infer): string + toAutoClassifierInput(input: z.infer): unknown + backfillObservableInput?(input: Record): void + + // ── Output ── + maxResultSizeChars: number + userFacingName(input: Partial> | undefined): string + mapToolResultToToolResultBlockParam( + content: Output, + toolUseID: string, + ): any + + // ── Optional output helpers ── + isResultTruncated?(output: Output): boolean + getToolUseSummary?(input: Partial> | undefined): string | null + getActivityDescription?( + input: Partial> | undefined, + ): string | null + isTransparentWrapper?(): boolean + isSearchOrReadCommand?(input: z.infer): { + isSearch: boolean + isRead: boolean + isList?: boolean + } +} + +/** + * A tool with a generic context type. + * This is the default export — hosts can specify their own Context type. + */ +export type Tool< + Input extends AnyObject = AnyObject, + Output = unknown, + P extends ToolProgressData = ToolProgressData, +> = CoreTool + +/** + * A collection of tools. + */ +export type Tools = readonly CoreTool[] diff --git a/packages/mcp-client/package.json b/packages/mcp-client/package.json new file mode 100644 index 000000000..f47801ecf --- /dev/null +++ b/packages/mcp-client/package.json @@ -0,0 +1,16 @@ +{ + "name": "mcp-client", + "version": "1.0.0", + "private": true, + "type": "module", + "main": "./src/index.ts", + "types": "./src/index.ts", + "dependencies": { + "@modelcontextprotocol/sdk": "^1.29.0", + "agent-tools": "workspace:*", + "lru-cache": "^10.0.0", + "lodash-es": "^4.17.21", + "p-map": "^4.0.0", + "zod": "^3.25.0" + } +} diff --git a/packages/mcp-client/src/__tests__/InProcessTransport.test.ts b/packages/mcp-client/src/__tests__/InProcessTransport.test.ts new file mode 100644 index 000000000..f9ee89a4e --- /dev/null +++ b/packages/mcp-client/src/__tests__/InProcessTransport.test.ts @@ -0,0 +1,80 @@ +import { describe, expect, test } from 'bun:test' +import { createLinkedTransportPair } from '../transport/InProcessTransport.js' +import type { JSONRPCMessage } from '@modelcontextprotocol/sdk/types.js' + +describe('InProcessTransport', () => { + test('creates linked pair', () => { + const [client, server] = createLinkedTransportPair() + expect(client).toBeDefined() + expect(server).toBeDefined() + }) + + test('delivers messages from client to server', async () => { + const [client, server] = createLinkedTransportPair() + + let received: JSONRPCMessage | null = null + server.onmessage = (msg) => { received = msg } + + const message: JSONRPCMessage = { + jsonrpc: '2.0', + method: 'test', + params: {}, + id: 1, + } + + await client.send(message) + + // Wait for queueMicrotask to deliver + await new Promise(resolve => setTimeout(resolve, 10)) + + expect(received).not.toBeNull() + expect(received!.jsonrpc).toBe('2.0') + expect((received as any).method).toBe('test') + }) + + test('delivers messages from server to client', async () => { + const [client, server] = createLinkedTransportPair() + + let received: JSONRPCMessage | null = null + client.onmessage = (msg) => { received = msg } + + await server.send({ jsonrpc: '2.0', result: 42, id: 1 }) + + await new Promise(resolve => setTimeout(resolve, 10)) + + expect(received).not.toBeNull() + }) + + test('close triggers onclose on both sides', async () => { + const [client, server] = createLinkedTransportPair() + + let clientClosed = false + let serverClosed = false + client.onclose = () => { clientClosed = true } + server.onclose = () => { serverClosed = true } + + await client.close() + + expect(clientClosed).toBe(true) + expect(serverClosed).toBe(true) + }) + + test('close is idempotent', async () => { + const [client] = createLinkedTransportPair() + + let closeCount = 0 + client.onclose = () => { closeCount++ } + + await client.close() + await client.close() + + expect(closeCount).toBe(1) + }) + + test('send after close throws', async () => { + const [client] = createLinkedTransportPair() + await client.close() + + expect(client.send({ jsonrpc: '2.0', method: 'test' } as any)).rejects.toThrow('Transport is closed') + }) +}) diff --git a/packages/mcp-client/src/__tests__/cache.test.ts b/packages/mcp-client/src/__tests__/cache.test.ts new file mode 100644 index 000000000..b5e6fe049 --- /dev/null +++ b/packages/mcp-client/src/__tests__/cache.test.ts @@ -0,0 +1,80 @@ +import { describe, expect, test } from 'bun:test' +import { memoizeWithLRU } from '../cache.js' + +describe('memoizeWithLRU', () => { + test('caches results', () => { + let callCount = 0 + const fn = memoizeWithLRU( + (x: number) => { callCount++; return x * 2 }, + (x) => `key-${x}`, + 10, + ) + + expect(fn(5)).toBe(10) + expect(callCount).toBe(1) + expect(fn(5)).toBe(10) + expect(callCount).toBe(1) // cached, no new call + }) + + test('evicts least recently used entries', () => { + const fn = memoizeWithLRU( + (x: number) => x, + (x) => `key-${x}`, + 2, + ) + + fn(1) + fn(2) + fn(3) // should evict key-1 + + expect(fn.cache.size()).toBe(2) + expect(fn.cache.has('key-1')).toBe(false) + expect(fn.cache.has('key-2')).toBe(true) + expect(fn.cache.has('key-3')).toBe(true) + }) + + test('cache.clear removes all entries', () => { + const fn = memoizeWithLRU( + (x: number) => x, + (x) => `key-${x}`, + 10, + ) + + fn(1) + fn(2) + expect(fn.cache.size()).toBe(2) + + fn.cache.clear() + expect(fn.cache.size()).toBe(0) + }) + + test('cache.delete removes specific entry', () => { + const fn = memoizeWithLRU( + (x: number) => x, + (x) => `key-${x}`, + 10, + ) + + fn(1) + fn(2) + expect(fn.cache.delete('key-1')).toBe(true) + expect(fn.cache.has('key-1')).toBe(false) + expect(fn.cache.has('key-2')).toBe(true) + }) + + test('cache.get returns value without promoting', () => { + const fn = memoizeWithLRU( + (x: number) => x * 10, + (x) => `key-${x}`, + 2, + ) + + fn(1) + fn(2) + // key-1 is LRU, but get() should not promote it + expect(fn.cache.get('key-1')).toBe(10) + // Adding key-3 should still evict key-1 (not promoted by get) + fn(3) + expect(fn.cache.has('key-1')).toBe(false) + }) +}) diff --git a/packages/mcp-client/src/__tests__/connection.test.ts b/packages/mcp-client/src/__tests__/connection.test.ts new file mode 100644 index 000000000..7f8f78c62 --- /dev/null +++ b/packages/mcp-client/src/__tests__/connection.test.ts @@ -0,0 +1,84 @@ +import { describe, expect, test } from 'bun:test' +import { + DEFAULT_CONNECTION_TIMEOUT_MS, + MAX_MCP_DESCRIPTION_LENGTH, + MAX_ERRORS_BEFORE_RECONNECT, + isTerminalConnectionError, + isMcpSessionExpiredError, +} from '../connection.js' + +describe('connection constants', () => { + test('has reasonable defaults', () => { + expect(DEFAULT_CONNECTION_TIMEOUT_MS).toBe(30_000) + expect(MAX_MCP_DESCRIPTION_LENGTH).toBe(2048) + expect(MAX_ERRORS_BEFORE_RECONNECT).toBe(3) + }) +}) + +describe('isTerminalConnectionError', () => { + test('detects ECONNRESET', () => { + expect(isTerminalConnectionError('Connection reset: ECONNRESET')).toBe(true) + }) + + test('detects ETIMEDOUT', () => { + expect(isTerminalConnectionError('Connection timed out: ETIMEDOUT')).toBe(true) + }) + + test('detects EPIPE', () => { + expect(isTerminalConnectionError('Broken pipe: EPIPE')).toBe(true) + }) + + test('detects EHOSTUNREACH', () => { + expect(isTerminalConnectionError('Host unreachable: EHOSTUNREACH')).toBe(true) + }) + + test('detects ECONNREFUSED', () => { + expect(isTerminalConnectionError('Connection refused: ECONNREFUSED')).toBe(true) + }) + + test('detects SSE disconnection messages', () => { + expect(isTerminalConnectionError('SSE stream disconnected')).toBe(true) + expect(isTerminalConnectionError('Failed to reconnect SSE stream')).toBe(true) + }) + + test('detects terminated', () => { + expect(isTerminalConnectionError('Process terminated')).toBe(true) + }) + + test('rejects non-terminal errors', () => { + expect(isTerminalConnectionError('some random error')).toBe(false) + expect(isTerminalConnectionError('')).toBe(false) + expect(isTerminalConnectionError('timeout waiting for response')).toBe(false) + }) +}) + +describe('isMcpSessionExpiredError', () => { + test('detects 404 with JSON-RPC session-not-found code', () => { + const error = new Error('Not found: {"code":-32001,"message":"Session not found"}') + Object.assign(error, { code: 404 }) + expect(isMcpSessionExpiredError(error)).toBe(true) + }) + + test('detects 404 with spaced JSON-RPC code', () => { + const error = new Error('Not found: {"code": -32001}') + Object.assign(error, { code: 404 }) + expect(isMcpSessionExpiredError(error)).toBe(true) + }) + + test('rejects non-404 errors', () => { + const error = new Error('{"code":-32001}') + Object.assign(error, { code: 500 }) + expect(isMcpSessionExpiredError(error)).toBe(false) + }) + + test('rejects 404 without session code', () => { + const error = new Error('Not found') + Object.assign(error, { code: 404 }) + expect(isMcpSessionExpiredError(error)).toBe(false) + }) + + test('rejects errors without code property', () => { + const error = new Error('Session not found') + expect(isMcpSessionExpiredError(error)).toBe(false) + }) +}) diff --git a/packages/mcp-client/src/__tests__/discovery.test.ts b/packages/mcp-client/src/__tests__/discovery.test.ts new file mode 100644 index 000000000..a43d3472f --- /dev/null +++ b/packages/mcp-client/src/__tests__/discovery.test.ts @@ -0,0 +1,162 @@ +import { describe, expect, test, mock } from 'bun:test' +import { discoverTools, createCachedToolDiscovery } from '../discovery.js' +import type { DiscoveryOptions } from '../discovery.js' +import type { ConnectedMCPServer } from '../types.js' +import type { McpClientDependencies } from '../interfaces.js' + +function createMockDeps(): McpClientDependencies { + return { + logger: { + debug: mock(() => {}), + info: mock(() => {}), + warn: mock(() => {}), + error: mock(() => {}), + }, + httpConfig: { + getUserAgent: () => 'test-agent/1.0', + }, + } +} + +describe('discoverTools', () => { + test('returns empty array when capabilities.tools is missing', async () => { + const result = await discoverTools({ + serverName: 'test', + client: {} as any, + capabilities: {}, + deps: createMockDeps(), + }) + expect(result).toEqual([]) + }) + + test('fetches and transforms tools from server', async () => { + const mockClient = { + request: mock(() => + Promise.resolve({ + tools: [ + { + name: 'search', + description: 'Search for items', + inputSchema: { type: 'object' }, + annotations: { readOnlyHint: true, title: 'Search Items' }, + }, + ], + }), + ), + } + + const result = await discoverTools({ + serverName: 'my-server', + client: mockClient as any, + capabilities: { tools: {} }, + deps: createMockDeps(), + }) + + expect(result).toHaveLength(1) + const tool = result[0] + expect(tool.name).toBe('mcp__my-server__search') + expect(tool.mcpInfo).toEqual({ serverName: 'my-server', toolName: 'search' }) + expect(tool.isMcp).toBe(true) + expect(tool.isReadOnly()).toBe(true) + expect(tool.userFacingName()).toBe('Search Items') + expect(await tool.description()).toBe('Search for items') + }) + + test('respects skipPrefix option', async () => { + const mockClient = { + request: mock(() => + Promise.resolve({ + tools: [{ name: 'search', description: 'Search' }], + }), + ), + } + + const result = await discoverTools({ + serverName: 'my-server', + client: mockClient as any, + capabilities: { tools: {} }, + skipPrefix: true, + deps: createMockDeps(), + }) + + expect(result[0].name).toBe('search') + }) + + test('returns empty array on fetch error', async () => { + const mockClient = { + request: mock(() => Promise.reject(new Error('Connection lost'))), + } + const deps = createMockDeps() + + const result = await discoverTools({ + serverName: 'failing-server', + client: mockClient as any, + capabilities: { tools: {} }, + deps, + }) + + expect(result).toEqual([]) + expect(deps.logger.warn).toHaveBeenCalled() + }) + + test('sanitizes tool data', async () => { + const mockClient = { + request: mock(() => + Promise.resolve({ + tools: [ + { + name: 'tool\x00with\x07control', + description: 'desc', + }, + ], + }), + ), + } + + const result = await discoverTools({ + serverName: 'test', + client: mockClient as any, + capabilities: { tools: {} }, + deps: createMockDeps(), + }) + + expect(result[0].name).not.toContain('\x00') + }) +}) + +describe('createCachedToolDiscovery', () => { + test('caches results by server name', async () => { + const deps = createMockDeps() + const { discover, cache } = createCachedToolDiscovery(deps) + + const mockConn = { + type: 'connected' as const, + name: 'cached-server', + client: { + request: mock(() => + Promise.resolve({ + tools: [{ name: 'tool1', description: 'Tool 1' }], + }), + ), + }, + capabilities: { tools: {} }, + } as unknown as ConnectedMCPServer + + // First call — should fetch + const result1 = await discover(mockConn) + expect(result1).toHaveLength(1) + + // Second call — should use cache + const result2 = await discover(mockConn) + expect(result2).toHaveLength(1) + + // Request was called only once + expect(mockConn.client.request).toHaveBeenCalledTimes(1) + + // Cache delete works + cache.delete('cached-server') + const result3 = await discover(mockConn) + expect(result3).toHaveLength(1) + expect(mockConn.client.request).toHaveBeenCalledTimes(2) + }) +}) diff --git a/packages/mcp-client/src/__tests__/errors.test.ts b/packages/mcp-client/src/__tests__/errors.test.ts new file mode 100644 index 000000000..9201794a8 --- /dev/null +++ b/packages/mcp-client/src/__tests__/errors.test.ts @@ -0,0 +1,69 @@ +import { describe, expect, test } from 'bun:test' +import { + McpError, + McpConnectionError, + McpAuthError, + McpTimeoutError, + McpToolCallError, + McpSessionExpiredError, +} from '../errors.js' + +describe('McpError', () => { + test('has correct properties', () => { + const err = new McpError('test message', 'my-server', 'TEST_CODE') + expect(err.message).toBe('test message') + expect(err.serverName).toBe('my-server') + expect(err.code).toBe('TEST_CODE') + expect(err.name).toBe('McpError') + expect(err).toBeInstanceOf(Error) + }) +}) + +describe('McpConnectionError', () => { + test('inherits from McpError', () => { + const cause = new Error('ECONNREFUSED') + const err = new McpConnectionError('my-server', 'Connection failed', cause) + expect(err).toBeInstanceOf(McpError) + expect(err).toBeInstanceOf(Error) + expect(err.code).toBe('CONNECTION_FAILED') + expect(err.serverName).toBe('my-server') + expect(err.cause).toBe(cause) + }) + + test('works without cause', () => { + const err = new McpConnectionError('my-server', 'Failed') + expect(err.cause).toBeUndefined() + }) +}) + +describe('McpAuthError', () => { + test('has AUTH_REQUIRED code', () => { + const err = new McpAuthError('my-server', 'Auth needed') + expect(err.code).toBe('AUTH_REQUIRED') + expect(err).toBeInstanceOf(McpError) + }) +}) + +describe('McpTimeoutError', () => { + test('has timeout info in message', () => { + const err = new McpTimeoutError('my-server', 5000) + expect(err.code).toBe('TIMEOUT') + expect(err.timeoutMs).toBe(5000) + expect(err.message).toContain('5000') + }) +}) + +describe('McpToolCallError', () => { + test('has tool name', () => { + const err = new McpToolCallError('my-server', 'query', 'Tool failed') + expect(err.code).toBe('TOOL_CALL_FAILED') + expect(err.toolName).toBe('query') + }) +}) + +describe('McpSessionExpiredError', () => { + test('has SESSION_EXPIRED code', () => { + const err = new McpSessionExpiredError('my-server') + expect(err.code).toBe('SESSION_EXPIRED') + }) +}) diff --git a/packages/mcp-client/src/__tests__/execution.test.ts b/packages/mcp-client/src/__tests__/execution.test.ts new file mode 100644 index 000000000..c70053c89 --- /dev/null +++ b/packages/mcp-client/src/__tests__/execution.test.ts @@ -0,0 +1,144 @@ +import { describe, expect, test, mock } from 'bun:test' +import { callMcpTool } from '../execution.js' +import type { ConnectedMCPServer } from '../types.js' +import type { McpClientDependencies } from '../interfaces.js' +import { McpAuthError, McpToolCallError } from '../errors.js' + +function createMockDeps(): McpClientDependencies { + return { + logger: { + debug: mock(() => {}), + info: mock(() => {}), + warn: mock(() => {}), + error: mock(() => {}), + }, + httpConfig: { + getUserAgent: () => 'test-agent/1.0', + }, + } +} + +describe('callMcpTool', () => { + test('calls tool and returns result', async () => { + const mockResult = { + content: [{ type: 'text', text: 'result data' }], + _meta: { requestId: '123' }, + } + + const mockConn = { + name: 'test-server', + client: { + callTool: mock(() => Promise.resolve(mockResult)), + }, + type: 'connected' as const, + } as unknown as ConnectedMCPServer + + const result = await callMcpTool( + { + client: mockConn, + tool: 'search', + args: { query: 'test' }, + signal: new AbortController().signal, + }, + createMockDeps(), + ) + + expect(result.content).toBeDefined() + }) + + test('throws McpToolCallError when result has isError=true', async () => { + const mockResult = { + isError: true, + content: [{ type: 'text', text: 'Something went wrong' }], + } + + const mockConn = { + name: 'test-server', + client: { + callTool: mock(() => Promise.resolve(mockResult)), + }, + type: 'connected' as const, + } as unknown as ConnectedMCPServer + + await expect( + callMcpTool( + { + client: mockConn, + tool: 'fail-tool', + args: {}, + signal: new AbortController().signal, + }, + createMockDeps(), + ), + ).rejects.toThrow() + + try { + await callMcpTool( + { + client: mockConn, + tool: 'fail-tool', + args: {}, + signal: new AbortController().signal, + }, + createMockDeps(), + ) + } catch (e) { + expect(e).toBeInstanceOf(McpToolCallError) + expect((e as McpToolCallError).serverName).toBe('test-server') + expect((e as McpToolCallError).toolName).toBe('fail-tool') + } + }) + + test('throws McpAuthError on 401 response', async () => { + const error = new Error('Unauthorized') + Object.assign(error, { code: 401 }) + + const mockConn = { + name: 'auth-server', + client: { + callTool: mock(() => Promise.reject(error)), + }, + type: 'connected' as const, + } as unknown as ConnectedMCPServer + + await expect( + callMcpTool( + { + client: mockConn, + tool: 'protected-tool', + args: {}, + signal: new AbortController().signal, + }, + createMockDeps(), + ), + ).rejects.toThrow(McpAuthError) + }) + + test('passes metadata to the client', async () => { + const mockResult = { content: [{ type: 'text', text: 'ok' }] } + const callToolMock = mock(() => Promise.resolve(mockResult)) + + const mockConn = { + name: 'test-server', + client: { + callTool: callToolMock, + }, + type: 'connected' as const, + } as unknown as ConnectedMCPServer + + await callMcpTool( + { + client: mockConn, + tool: 'my-tool', + args: { key: 'value' }, + meta: { 'custom-key': 'custom-value' }, + signal: new AbortController().signal, + }, + createMockDeps(), + ) + + expect(callToolMock).toHaveBeenCalled() + const callArgs = callToolMock.mock.calls[0] as any[] + expect(callArgs[0]._meta).toEqual({ 'custom-key': 'custom-value' }) + }) +}) diff --git a/packages/mcp-client/src/__tests__/manager.test.ts b/packages/mcp-client/src/__tests__/manager.test.ts new file mode 100644 index 000000000..f067ffa2e --- /dev/null +++ b/packages/mcp-client/src/__tests__/manager.test.ts @@ -0,0 +1,113 @@ +import { describe, expect, test, mock } from 'bun:test' +import { createMcpManager } from '../manager.js' +import type { McpManager } from '../manager.js' +import type { McpClientDependencies } from '../interfaces.js' +import type { ScopedMcpServerConfig, MCPServerConnection, ConnectedMCPServer } from '../types.js' +import type { Client } from '@modelcontextprotocol/sdk/client/index.js' + +function createMockDeps(): McpClientDependencies { + return { + logger: { + debug: mock(() => {}), + info: mock(() => {}), + warn: mock(() => {}), + error: mock(() => {}), + }, + httpConfig: { + getUserAgent: () => 'test-agent/1.0', + getSessionId: () => 'test-session', + }, + } +} + +describe('createMcpManager', () => { + test('creates a manager instance', () => { + const manager = createMcpManager(createMockDeps()) + expect(manager).toBeDefined() + expect(manager.getConnections).toBeTypeOf('function') + expect(manager.connect).toBeTypeOf('function') + expect(manager.disconnect).toBeTypeOf('function') + expect(manager.getTools).toBeTypeOf('function') + expect(manager.getAllTools).toBeTypeOf('function') + expect(manager.callTool).toBeTypeOf('function') + expect(manager.on).toBeTypeOf('function') + expect(manager.off).toBeTypeOf('function') + }) + + test('connect throws if connectFn not set', async () => { + const manager = createMcpManager(createMockDeps()) + await expect(manager.connect('test', { command: 'npx', args: [] })) + .rejects.toThrow('connectFn not set') + }) + + test('connect calls connectFn and emits connected event', async () => { + const manager = createMcpManager(createMockDeps()) as any + let connectedEvent: string | null = null + manager.on('connected', (name: string) => { connectedEvent = name }) + + const mockConnection: ConnectedMCPServer = { + type: 'connected', + name: 'test-server', + client: { + request: mock(() => Promise.resolve({ tools: [] })), + onclose: null, + } as unknown as Client, + capabilities: {}, + config: { command: 'npx', args: [], scope: 'dynamic' } as ScopedMcpServerConfig, + cleanup: mock(() => Promise.resolve()), + } + + manager.setConnectFn(async (name: string, config: ScopedMcpServerConfig) => { + expect(name).toBe('test-server') + expect(config.scope).toBe('dynamic') + return mockConnection + }) + + const result = await manager.connect('test-server', { command: 'npx', args: [] }) + expect(result.type).toBe('connected') + expect(connectedEvent).toBe('test-server') + }) + + test('disconnect calls cleanup and emits disconnected', async () => { + const manager = createMcpManager(createMockDeps()) as any + let disconnected = false + manager.on('disconnected', () => { disconnected = true }) + + const mockCleanup = mock(() => Promise.resolve()) + const mockConnection: ConnectedMCPServer = { + type: 'connected', + name: 'test-server', + client: { request: mock(() => Promise.resolve({ tools: [] })) } as unknown as Client, + capabilities: {}, + config: { command: 'npx', args: [], scope: 'dynamic' } as ScopedMcpServerConfig, + cleanup: mockCleanup, + } + + manager.setConnectFn(async () => mockConnection) + await manager.connect('test-server', { command: 'npx', args: [] }) + + await manager.disconnect('test-server') + expect(mockCleanup).toHaveBeenCalled() + expect(disconnected).toBe(true) + expect(manager.getConnections().size).toBe(0) + }) + + test('on/off event handling', () => { + const manager = createMcpManager(createMockDeps()) as any + const handler = mock(() => {}) + manager.on('error', handler) + manager.off('error', handler) + // No crash — just verifying it works + expect(true).toBe(true) + }) + + test('getTools returns empty array for unknown server', () => { + const manager = createMcpManager(createMockDeps()) + expect(manager.getTools('unknown')).toEqual([]) + }) + + test('getAllTools returns empty array initially', () => { + const manager = createMcpManager(createMockDeps()) + expect(manager.getAllTools()).toEqual([]) + }) +}) diff --git a/packages/mcp-client/src/__tests__/sanitization.test.ts b/packages/mcp-client/src/__tests__/sanitization.test.ts new file mode 100644 index 000000000..b254a638f --- /dev/null +++ b/packages/mcp-client/src/__tests__/sanitization.test.ts @@ -0,0 +1,51 @@ +import { describe, expect, test } from 'bun:test' +import { recursivelySanitizeUnicode } from '../sanitization.js' + +describe('recursivelySanitizeUnicode', () => { + test('passes through clean strings', () => { + expect(recursivelySanitizeUnicode('hello world')).toBe('hello world') + expect(recursivelySanitizeUnicode('')).toBe('') + }) + + test('removes control characters', () => { + expect(recursivelySanitizeUnicode('hello\x00world')).toBe('helloworld') + expect(recursivelySanitizeUnicode('test\x07bell')).toBe('testbell') + }) + + test('preserves allowed whitespace', () => { + expect(recursivelySanitizeUnicode('hello\tworld')).toBe('hello\tworld') + expect(recursivelySanitizeUnicode('hello\nworld')).toBe('hello\nworld') + expect(recursivelySanitizeUnicode('hello\rworld')).toBe('hello\rworld') + }) + + test('removes replacement character', () => { + expect(recursivelySanitizeUnicode('hello\uFFFDworld')).toBe('helloworld') + }) + + test('normalizes to NFC', () => { + // é can be composed (U+00E9) or decomposed (U+0065 + U+0301) + const decomposed = 'e\u0301' + const result = recursivelySanitizeUnicode(decomposed) + expect(result).toBe('é') + }) + + test('sanitizes arrays recursively', () => { + const input = ['hello\x00world', 'clean'] + expect(recursivelySanitizeUnicode(input)).toEqual(['helloworld', 'clean']) + }) + + test('sanitizes objects recursively', () => { + const input = { name: 'test\x07', nested: { value: 'a\x00b' } } + expect(recursivelySanitizeUnicode(input)).toEqual({ + name: 'test', + nested: { value: 'ab' }, + }) + }) + + test('handles null and non-string primitives', () => { + expect(recursivelySanitizeUnicode(null)).toBe(null) + expect(recursivelySanitizeUnicode(42)).toBe(42) + expect(recursivelySanitizeUnicode(true)).toBe(true) + expect(recursivelySanitizeUnicode(undefined)).toBe(undefined) + }) +}) diff --git a/packages/mcp-client/src/__tests__/strings.test.ts b/packages/mcp-client/src/__tests__/strings.test.ts new file mode 100644 index 000000000..9a8e031e8 --- /dev/null +++ b/packages/mcp-client/src/__tests__/strings.test.ts @@ -0,0 +1,101 @@ +import { describe, expect, test } from 'bun:test' +import { + buildMcpToolName, + normalizeNameForMCP, + mcpInfoFromString, + getMcpPrefix, + getToolNameForPermissionCheck, + getMcpDisplayName, + extractMcpToolDisplayName, +} from '../strings.js' + +describe('normalizeNameForMCP', () => { + test('keeps valid names unchanged', () => { + expect(normalizeNameForMCP('my-server')).toBe('my-server') + expect(normalizeNameForMCP('my_server')).toBe('my_server') + expect(normalizeNameForMCP('server123')).toBe('server123') + }) + + test('replaces dots and spaces with underscores', () => { + expect(normalizeNameForMCP('test.server')).toBe('test_server') + expect(normalizeNameForMCP('test server')).toBe('test_server') + }) + + test('collapses underscores for claude.ai prefix', () => { + expect(normalizeNameForMCP('claude.ai Slack')).toBe('claude_ai_Slack') + expect(normalizeNameForMCP('claude.ai My Server')).toBe('claude_ai_My_Server') + }) +}) + +describe('buildMcpToolName', () => { + test('builds fully qualified name', () => { + expect(buildMcpToolName('my-server', 'query')).toBe('mcp__my-server__query') + }) + + test('normalizes server name with dots', () => { + expect(buildMcpToolName('test.server', 'tool')).toBe('mcp__test_server__tool') + }) +}) + +describe('mcpInfoFromString', () => { + test('parses valid MCP tool name', () => { + const info = mcpInfoFromString('mcp__my-server__query') + expect(info).toEqual({ serverName: 'my-server', toolName: 'query' }) + }) + + test('returns null for non-MCP names', () => { + expect(mcpInfoFromString('bash')).toBeNull() + expect(mcpInfoFromString('mcp__')).toBeNull() + expect(mcpInfoFromString('')).toBeNull() + }) + + test('handles tool names with double underscores', () => { + const info = mcpInfoFromString('mcp__server__tool__part') + expect(info).toEqual({ serverName: 'server', toolName: 'tool__part' }) + }) + + test('handles server-only (no tool name)', () => { + const info = mcpInfoFromString('mcp__server') + expect(info).toEqual({ serverName: 'server', toolName: undefined }) + }) +}) + +describe('getMcpPrefix', () => { + test('returns correct prefix', () => { + expect(getMcpPrefix('my-server')).toBe('mcp__my-server__') + }) +}) + +describe('getToolNameForPermissionCheck', () => { + test('uses mcp prefix for MCP tools', () => { + expect(getToolNameForPermissionCheck({ + name: 'query', + mcpInfo: { serverName: 'my-server', toolName: 'query' }, + })).toBe('mcp__my-server__query') + }) + + test('uses raw name for non-MCP tools', () => { + expect(getToolNameForPermissionCheck({ name: 'bash' })).toBe('bash') + }) +}) + +describe('getMcpDisplayName', () => { + test('strips MCP prefix', () => { + // getMcpDisplayName normalizes server name before building prefix + expect(getMcpDisplayName('mcp__my_server__query', 'my.server')).toBe('query') + }) +}) + +describe('extractMcpToolDisplayName', () => { + test('removes MCP suffix', () => { + expect(extractMcpToolDisplayName('github - Add comment (MCP)')).toBe('Add comment') + }) + + test('handles no dash', () => { + expect(extractMcpToolDisplayName('Add comment (MCP)')).toBe('Add comment') + }) + + test('handles no suffix', () => { + expect(extractMcpToolDisplayName('github - Add comment')).toBe('Add comment') + }) +}) diff --git a/packages/mcp-client/src/cache.ts b/packages/mcp-client/src/cache.ts new file mode 100644 index 000000000..b434cb506 --- /dev/null +++ b/packages/mcp-client/src/cache.ts @@ -0,0 +1,58 @@ +// LRU memoization cache for MCP tool discovery +// Adapted from src/utils/memoize.ts — only memoizeWithLRU needed + +import { LRUCache } from 'lru-cache' + +type LRUMemoizedFunction = { + (...args: Args): Result + cache: { + clear: () => void + size: () => number + delete: (key: string) => boolean + get: (key: string) => Result | undefined + has: (key: string) => boolean + } +} + +/** + * Creates a memoized function with LRU eviction policy. + * Prevents unbounded memory growth by evicting least recently used entries. + * + * @param f The function to memoize + * @param cacheFn Key generation function + * @param maxCacheSize Maximum cache entries (default 100) + */ +export function memoizeWithLRU< + Args extends unknown[], + Result extends NonNullable, +>( + f: (...args: Args) => Result, + cacheFn: (...args: Args) => string, + maxCacheSize: number = 100, +): LRUMemoizedFunction { + const cache = new LRUCache({ + max: maxCacheSize, + }) + + const memoized = (...args: Args): Result => { + const key = cacheFn(...args) + const cached = cache.get(key) + if (cached !== undefined) { + return cached + } + + const result = f(...args) + cache.set(key, result) + return result + } + + memoized.cache = { + clear: () => cache.clear(), + size: () => cache.size, + delete: (key: string) => cache.delete(key), + get: (key: string) => cache.peek(key), + has: (key: string) => cache.has(key), + } + + return memoized +} diff --git a/packages/mcp-client/src/connection.ts b/packages/mcp-client/src/connection.ts new file mode 100644 index 000000000..30f3b18d6 --- /dev/null +++ b/packages/mcp-client/src/connection.ts @@ -0,0 +1,519 @@ +// MCP connection utilities — protocol-level helpers for establishing and managing connections +// These are building blocks used by the host's connectToServer implementation. + +import { Client } from '@modelcontextprotocol/sdk/client/index.js' +import { ListRootsRequestSchema } from '@modelcontextprotocol/sdk/types.js' +import type { Transport } from '@modelcontextprotocol/sdk/shared/transport.js' +import type { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js' +import type { McpClientDependencies } from './interfaces.js' +import type { ConnectedMCPServer, ScopedMcpServerConfig } from './types.js' + +// ============================================================================ +// Constants +// ============================================================================ + +/** Default connection timeout in milliseconds */ +export const DEFAULT_CONNECTION_TIMEOUT_MS = 30_000 + +/** Maximum length for MCP descriptions/instructions */ +export const MAX_MCP_DESCRIPTION_LENGTH = 2048 + +/** Maximum consecutive terminal errors before triggering reconnection */ +export const MAX_ERRORS_BEFORE_RECONNECT = 3 + +// ============================================================================ +// Client creation +// ============================================================================ + +export interface CreateClientOptions { + /** Client name (e.g., "claude-code") */ + name: string + /** Client title */ + title?: string + /** Client version */ + version: string + /** Client description */ + description?: string + /** Client website URL */ + websiteUrl?: string + /** Root URI for ListRoots requests (defaults to current working directory) */ + rootUri?: string +} + +/** + * Creates a configured MCP Client instance with standard capabilities and handlers. + * The host can further customize the client before connecting. + */ +export function createMcpClient(options: CreateClientOptions): Client { + const client = new Client( + { + name: options.name, + title: options.title ?? options.name, + version: options.version, + description: options.description, + websiteUrl: options.websiteUrl, + }, + { + capabilities: { + roots: {}, + elicitation: {}, + }, + }, + ) + + // Register default ListRoots handler + client.setRequestHandler(ListRootsRequestSchema, async () => ({ + roots: [ + { + uri: options.rootUri ?? `file://${process.cwd()}`, + }, + ], + })) + + return client +} + +// ============================================================================ +// Connection timeout +// ============================================================================ + +/** + * Wraps a connection promise with a timeout. + * Returns the result of connectPromise or rejects with a timeout error. + */ +export async function withConnectionTimeout( + connectPromise: Promise, + timeoutMs: number, + onTimeout: () => Promise | void, +): Promise { + const startTime = Date.now() + + const timeoutPromise = new Promise((_, reject) => { + const timeoutId = setTimeout(async () => { + await onTimeout() + reject( + new Error( + `MCP connection timed out after ${timeoutMs}ms`, + ), + ) + }, timeoutMs) + + // Clean up timeout if connect resolves or rejects + connectPromise.then( + () => clearTimeout(timeoutId), + () => clearTimeout(timeoutId), + ) + }) + + return Promise.race([connectPromise, timeoutPromise]) +} + +// ============================================================================ +// Stderr capture +// ============================================================================ + +/** + * Sets up stderr capture for stdio transports. + * Returns the stderr output accumulator and cleanup function. + */ +export function captureStderr( + transport: StdioClientTransport, + maxSize = 64 * 1024 * 1024, +): { getOutput: () => string; clearOutput: () => void; removeHandler: () => void } { + let stderrOutput = '' + + const handler = (data: Buffer) => { + if (stderrOutput.length < maxSize) { + try { + stderrOutput += data.toString() + } catch { + // Ignore errors from exceeding max string length + } + } + } + + transport.stderr?.on('data', handler) + + return { + getOutput: () => stderrOutput, + clearOutput: () => { stderrOutput = '' }, + removeHandler: () => { transport.stderr?.off('data', handler) }, + } +} + +// ============================================================================ +// Error/close handlers +// ============================================================================ + +/** + * Terminal connection error patterns that indicate the connection is broken. + */ +export function isTerminalConnectionError(msg: string): boolean { + return ( + msg.includes('ECONNRESET') || + msg.includes('ETIMEDOUT') || + msg.includes('EPIPE') || + msg.includes('EHOSTUNREACH') || + msg.includes('ECONNREFUSED') || + msg.includes('Body Timeout Error') || + msg.includes('terminated') || + msg.includes('SSE stream disconnected') || + msg.includes('Failed to reconnect SSE stream') + ) +} + +/** + * Detects MCP "Session not found" errors (HTTP 404 + JSON-RPC code -32001). + */ +export function isMcpSessionExpiredError(error: Error): boolean { + const httpStatus = + 'code' in error ? (error as Error & { code?: number }).code : undefined + if (httpStatus !== 404) { + return false + } + return ( + error.message.includes('"code":-32001') || + error.message.includes('"code": -32001') + ) +} + +export interface ConnectionMonitorOptions { + serverName: string + transportType: string + logger: McpClientDependencies['logger'] + /** Called when the transport should be closed to trigger reconnection */ + closeTransport: () => void + /** Called to clear connection caches on close */ + onConnectionClosed?: () => void +} + +/** + * Installs enhanced error and close handlers on an MCP Client for + * connection drop detection and automatic reconnection. + * + * Returns the cleanup function to remove handlers. + */ +export function installConnectionMonitor( + client: Client, + options: ConnectionMonitorOptions, +): () => void { + const { serverName, transportType, logger, closeTransport, onConnectionClosed } = options + const connectionStartTime = Date.now() + let hasErrorOccurred = false + let consecutiveConnectionErrors = 0 + let hasTriggeredClose = false + + const originalOnerror = client.onerror + const originalOnclose = client.onclose + + const safeClose = (reason: string) => { + if (hasTriggeredClose) return + hasTriggeredClose = true + logger.debug(`[${serverName}] Closing transport (${reason})`) + void client.close().catch(e => { + logger.debug(`[${serverName}] Error during close: ${e}`) + }) + } + + // Error handler + client.onerror = (error: Error) => { + const uptime = Date.now() - connectionStartTime + hasErrorOccurred = true + + logger.debug( + `[${serverName}] ${transportType.toUpperCase()} connection dropped after ${Math.floor(uptime / 1000)}s uptime`, + ) + + // Session expiry for HTTP transports + if ( + (transportType === 'http' || transportType === 'claudeai-proxy') && + isMcpSessionExpiredError(error) + ) { + logger.debug( + `[${serverName}] MCP session expired, triggering reconnection`, + ) + safeClose('session expired') + originalOnerror?.(error) + return + } + + // Terminal error tracking for remote transports + if ( + transportType === 'sse' || + transportType === 'http' || + transportType === 'claudeai-proxy' + ) { + if (error.message.includes('Maximum reconnection attempts')) { + safeClose('SSE reconnection exhausted') + originalOnerror?.(error) + return + } + + if (isTerminalConnectionError(error.message)) { + consecutiveConnectionErrors++ + logger.debug( + `[${serverName}] Terminal connection error ${consecutiveConnectionErrors}/${MAX_ERRORS_BEFORE_RECONNECT}`, + ) + + if (consecutiveConnectionErrors >= MAX_ERRORS_BEFORE_RECONNECT) { + consecutiveConnectionErrors = 0 + safeClose('max consecutive terminal errors') + } + } else { + consecutiveConnectionErrors = 0 + } + } + + originalOnerror?.(error) + } + + // Close handler + client.onclose = () => { + const uptime = Date.now() - connectionStartTime + logger.debug( + `[${serverName}] ${transportType.toUpperCase()} connection closed after ${Math.floor(uptime / 1000)}s (${hasErrorOccurred ? 'with errors' : 'cleanly'})`, + ) + + onConnectionClosed?.() + originalOnclose?.() + } + + // Return cleanup function + return () => { + client.onerror = originalOnerror + client.onclose = originalOnclose + } +} + +// ============================================================================ +// Signal escalation for stdio cleanup +// ============================================================================ + +/** + * Terminates a stdio child process with escalating signals: + * SIGINT (100ms) → SIGTERM (400ms) → SIGKILL + * + * Total maximum cleanup time: ~500ms + */ +export async function terminateWithSignalEscalation( + childPid: number, + logger: McpClientDependencies['logger'], + serverName: string, +): Promise { + try { + logger.debug(`[${serverName}] Sending SIGINT to MCP server process`) + + try { + process.kill(childPid, 'SIGINT') + } catch (error) { + logger.debug(`[${serverName}] Error sending SIGINT: ${error}`) + return + } + + await new Promise(async resolve => { + let resolved = false + + const checkInterval = setInterval(() => { + try { + process.kill(childPid, 0) + } catch { + if (!resolved) { + resolved = true + clearInterval(checkInterval) + clearTimeout(failsafeTimeout) + logger.debug(`[${serverName}] MCP server process exited cleanly`) + resolve() + } + } + }, 50) + + const failsafeTimeout = setTimeout(() => { + if (!resolved) { + resolved = true + clearInterval(checkInterval) + logger.debug(`[${serverName}] Cleanup timeout reached, stopping process monitoring`) + resolve() + } + }, 600) + + try { + // Wait 100ms for SIGINT to work + await sleep(100) + + if (!resolved) { + try { + process.kill(childPid, 0) + // Process still exists, try SIGTERM + logger.debug(`[${serverName}] SIGINT failed, sending SIGTERM`) + try { + process.kill(childPid, 'SIGTERM') + } catch (termError) { + logger.debug(`[${serverName}] Error sending SIGTERM: ${termError}`) + resolved = true + clearInterval(checkInterval) + clearTimeout(failsafeTimeout) + resolve() + return + } + } catch { + resolved = true + clearInterval(checkInterval) + clearTimeout(failsafeTimeout) + resolve() + return + } + + // Wait 400ms for SIGTERM + await sleep(400) + + if (!resolved) { + try { + process.kill(childPid, 0) + logger.debug(`[${serverName}] SIGTERM failed, sending SIGKILL`) + try { + process.kill(childPid, 'SIGKILL') + } catch (killError) { + logger.debug(`[${serverName}] Error sending SIGKILL: ${killError}`) + } + } catch { + resolved = true + clearInterval(checkInterval) + clearTimeout(failsafeTimeout) + resolve() + } + } + } + + if (!resolved) { + resolved = true + clearInterval(checkInterval) + clearTimeout(failsafeTimeout) + resolve() + } + } catch { + if (!resolved) { + resolved = true + clearInterval(checkInterval) + clearTimeout(failsafeTimeout) + resolve() + } + } + }) + } catch (processError) { + logger.debug(`[${serverName}] Error terminating process: ${processError}`) + } +} + +/** Simple sleep utility (avoids importing from host) */ +function sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)) +} + +// ============================================================================ +// Cleanup factory +// ============================================================================ + +export interface CleanupOptions { + client: Client + transport: Transport + transportType: string + childPid?: number + inProcessServer?: { close(): Promise } + stderrCleanup?: { removeHandler: () => void } + logger: McpClientDependencies['logger'] + serverName: string +} + +/** + * Creates a cleanup function for an MCP connection. + * Handles in-process servers, stderr listener removal, signal escalation, and client close. + */ +export function createCleanup(options: CleanupOptions): () => Promise { + const { + client, + transport, + transportType, + childPid, + inProcessServer, + stderrCleanup, + logger, + serverName, + } = options + + return async () => { + // In-process servers + if (inProcessServer) { + try { + await inProcessServer.close() + } catch (error) { + logger.debug(`[${serverName}] Error closing in-process server: ${error}`) + } + try { + await client.close() + } catch (error) { + logger.debug(`[${serverName}] Error closing client: ${error}`) + } + return + } + + // Remove stderr listener + stderrCleanup?.removeHandler() + + // Signal escalation for stdio + if (transportType === 'stdio' && childPid) { + await terminateWithSignalEscalation(childPid, logger, serverName) + } + + // Close the client connection (which also closes the transport) + try { + await client.close() + } catch (error) { + logger.debug(`[${serverName}] Error closing client: ${error}`) + } + } +} + +// ============================================================================ +// Connected server result builder +// ============================================================================ + +export interface BuildConnectedServerOptions { + name: string + client: Client + config: ScopedMcpServerConfig + cleanup: () => Promise +} + +/** + * Builds a ConnectedMCPServer result from a connected client. + * Truncates server instructions if they exceed MAX_MCP_DESCRIPTION_LENGTH. + */ +export function buildConnectedServer( + options: BuildConnectedServerOptions, + logger: McpClientDependencies['logger'], +): ConnectedMCPServer { + const { name, client, config, cleanup } = options + + const capabilities = client.getServerCapabilities() ?? {} + const serverVersion = client.getServerVersion() + const rawInstructions = client.getInstructions() + + let instructions = rawInstructions + if (rawInstructions && rawInstructions.length > MAX_MCP_DESCRIPTION_LENGTH) { + instructions = rawInstructions.slice(0, MAX_MCP_DESCRIPTION_LENGTH) + '… [truncated]' + logger.debug( + `[${name}] Server instructions truncated from ${rawInstructions.length} to ${MAX_MCP_DESCRIPTION_LENGTH} chars`, + ) + } + + return { + name, + client, + type: 'connected' as const, + capabilities, + serverInfo: serverVersion, + instructions, + config, + cleanup, + } +} diff --git a/packages/mcp-client/src/discovery.ts b/packages/mcp-client/src/discovery.ts new file mode 100644 index 000000000..217f73bc0 --- /dev/null +++ b/packages/mcp-client/src/discovery.ts @@ -0,0 +1,143 @@ +// MCP tool discovery — fetch and process tools from connected MCP servers +// Extracted from src/services/mcp/client.ts (fetchToolsForClient) + +import type { Client } from '@modelcontextprotocol/sdk/client/index.js' +import { + ListToolsResultSchema, + type ListToolsResult, +} from '@modelcontextprotocol/sdk/types.js' +import type { CoreTool } from 'agent-tools' +import type { ConnectedMCPServer } from './types.js' +import type { McpClientDependencies } from './interfaces.js' +import { buildMcpToolName } from './strings.js' +import { memoizeWithLRU } from './cache.js' +import { recursivelySanitizeUnicode } from './sanitization.js' + +// ============================================================================ +// Constants +// ============================================================================ + +/** Default max cache size for tool discovery (keyed by server name) */ +export const MCP_FETCH_CACHE_SIZE = 20 + +/** Maximum description length before truncation */ +const MAX_MCP_DESCRIPTION_LENGTH = 2048 + +// ============================================================================ +// Tool discovery +// ============================================================================ + +export interface DiscoveryOptions { + /** Server name for logging and tool naming */ + serverName: string + /** Connected MCP server client */ + client: Client + /** Server capabilities (checked before fetching) */ + capabilities: Record + /** Whether to skip the mcp__ prefix for tool names */ + skipPrefix?: boolean + /** Host dependencies for logging */ + deps: McpClientDependencies +} + +/** + * Fetches tools from a connected MCP server and converts them to CoreTool format. + * Returns empty array if the server doesn't support tools or if fetching fails. + */ +export async function discoverTools(options: DiscoveryOptions): Promise { + const { serverName, client, capabilities, skipPrefix, deps } = options + + if (!capabilities?.tools) { + return [] + } + + try { + const result = (await client.request( + { method: 'tools/list' }, + ListToolsResultSchema, + )) as ListToolsResult + + // Sanitize tool data from MCP server + const toolsToProcess = recursivelySanitizeUnicode(result.tools) + + return toolsToProcess.map((tool): CoreTool => { + const fullyQualifiedName = buildMcpToolName(serverName, tool.name) + const effectiveName = skipPrefix ? tool.name : fullyQualifiedName + + return { + name: effectiveName, + mcpInfo: { serverName, toolName: tool.name }, + isMcp: true, + inputJSONSchema: tool.inputSchema as CoreTool['inputJSONSchema'], + async description() { + return tool.description ?? '' + }, + async prompt() { + const desc = tool.description ?? '' + return desc.length > MAX_MCP_DESCRIPTION_LENGTH + ? desc.slice(0, MAX_MCP_DESCRIPTION_LENGTH) + '… [truncated]' + : desc + }, + isConcurrencySafe: () => tool.annotations?.readOnlyHint ?? false, + isReadOnly: () => tool.annotations?.readOnlyHint ?? false, + isDestructive: () => tool.annotations?.destructiveHint ?? false, + isOpenWorld: () => tool.annotations?.openWorldHint ?? false, + isEnabled: () => true, + async checkPermissions() { + return { behavior: 'passthrough' as const } + }, + toAutoClassifierInput: () => '', + userFacingName: () => tool.annotations?.title ?? tool.name, + maxResultSizeChars: 100_000, + mapToolResultToToolResultBlockParam: (content: unknown, id: string) => ({ + type: 'tool_result' as const, + tool_use_id: id, + content, + }), + async call() { + throw new Error('Use manager.callTool() instead') + }, + inputSchema: {} as CoreTool['inputSchema'], + } satisfies CoreTool + }) + } catch (error) { + deps.logger.warn(`Failed to fetch tools for ${serverName}:`, error) + return [] + } +} + +// ============================================================================ +// Cached tool discovery (LRU by server name) +// ============================================================================ + +/** + * Creates a memoized tool discovery function with LRU caching. + * Cache is keyed by server name (stable across reconnects). + */ +export function createCachedToolDiscovery( + deps: McpClientDependencies, + cacheSize: number = MCP_FETCH_CACHE_SIZE, +): { + discover: (server: ConnectedMCPServer, skipPrefix?: boolean) => Promise + cache: { delete(key: string): void; clear(): void } +} { + const discover = memoizeWithLRU( + async (server: ConnectedMCPServer, skipPrefix?: boolean): Promise => { + if (server.type !== 'connected') return [] + return discoverTools({ + serverName: server.name, + client: server.client, + capabilities: server.capabilities ?? {}, + skipPrefix, + deps, + }) + }, + (server: ConnectedMCPServer) => server.name, + cacheSize, + ) + + return { + discover, + cache: discover.cache, + } +} diff --git a/packages/mcp-client/src/errors.ts b/packages/mcp-client/src/errors.ts new file mode 100644 index 000000000..f917c8cd6 --- /dev/null +++ b/packages/mcp-client/src/errors.ts @@ -0,0 +1,80 @@ +// MCP typed error hierarchy + +/** + * Base error class for all MCP-related errors. + */ +export class McpError extends Error { + constructor( + message: string, + public readonly serverName: string, + public readonly code: string, + ) { + super(message) + this.name = 'McpError' + } +} + +/** + * Error thrown when connection to an MCP server fails. + */ +export class McpConnectionError extends McpError { + constructor( + serverName: string, + message: string, + public readonly cause?: Error, + ) { + super(message, serverName, 'CONNECTION_FAILED') + this.name = 'McpConnectionError' + } +} + +/** + * Error thrown when authentication is required but not available. + */ +export class McpAuthError extends McpError { + constructor(serverName: string, message: string) { + super(message, serverName, 'AUTH_REQUIRED') + this.name = 'McpAuthError' + } +} + +/** + * Error thrown when a connection or request times out. + */ +export class McpTimeoutError extends McpError { + constructor( + serverName: string, + public readonly timeoutMs: number, + ) { + super( + `Connection to ${serverName} timed out after ${timeoutMs}ms`, + serverName, + 'TIMEOUT', + ) + this.name = 'McpTimeoutError' + } +} + +/** + * Error thrown when an MCP tool call fails. + */ +export class McpToolCallError extends McpError { + constructor( + serverName: string, + public readonly toolName: string, + message: string, + ) { + super(message, serverName, 'TOOL_CALL_FAILED') + this.name = 'McpToolCallError' + } +} + +/** + * Error thrown when an MCP session has expired. + */ +export class McpSessionExpiredError extends McpError { + constructor(serverName: string) { + super(`Session expired for ${serverName}`, serverName, 'SESSION_EXPIRED') + this.name = 'McpSessionExpiredError' + } +} diff --git a/packages/mcp-client/src/execution.ts b/packages/mcp-client/src/execution.ts new file mode 100644 index 000000000..381818840 --- /dev/null +++ b/packages/mcp-client/src/execution.ts @@ -0,0 +1,182 @@ +// MCP tool execution — call tools on connected MCP servers +// Extracted from src/services/mcp/client.ts (callMCPTool) + +import { + CallToolResultSchema, +} from '@modelcontextprotocol/sdk/types.js' +import type { ConnectedMCPServer } from './types.js' +import type { McpClientDependencies } from './interfaces.js' +import { + McpToolCallError, + McpAuthError, +} from './errors.js' + +// ============================================================================ +// Constants +// ============================================================================ + +/** Default timeout for MCP tool calls (~27.8 hours — effectively infinite) */ +const DEFAULT_MCP_TOOL_TIMEOUT_MS = 100_000_000 + +// ============================================================================ +// Tool execution +// ============================================================================ + +export interface CallToolOptions { + /** The connected MCP server to call */ + client: ConnectedMCPServer + /** Tool name (as registered on the server, not the fully qualified name) */ + tool: string + /** Tool arguments */ + args: Record + /** Optional metadata to send with the call */ + meta?: Record + /** Abort signal for cancellation */ + signal: AbortSignal + /** Progress callback */ + onProgress?: (data: { progress?: number; total?: number; message?: string }) => void + /** Tool call timeout in ms (defaults to ~27.8 hours) */ + timeoutMs?: number +} + +export interface CallToolResult { + content: unknown + _meta?: Record + structuredContent?: Record + isError?: boolean +} + +/** + * Call a tool on a connected MCP server with timeout and progress handling. + * + * This is the protocol-level tool execution function. The host is responsible for: + * - Session management (reconnection on expiry) + * - Result transformation (content processing, truncation, persistence) + * - Error wrapping for telemetry + */ +export async function callMcpTool( + options: CallToolOptions, + deps: McpClientDependencies, +): Promise { + const { client, tool, args, meta, signal, onProgress, timeoutMs } = options + const { name: serverName, client: mcpClient } = client + const effectiveTimeout = timeoutMs ?? getMcpToolTimeoutMs() + + let progressInterval: ReturnType | undefined + + try { + deps.logger.debug(`[${serverName}] Calling MCP tool: ${tool}`) + + // Progress logging for long-running tools (every 30 seconds) + progressInterval = setInterval( + () => { + deps.logger.debug(`[${serverName}] Tool '${tool}' still running`) + }, + 30_000, + ) + + const result = await Promise.race([ + mcpClient.callTool( + { + name: tool, + arguments: args, + _meta: meta, + }, + CallToolResultSchema, + { + signal, + timeout: effectiveTimeout, + onprogress: onProgress, + }, + ), + createTimeoutPromise(serverName, tool, effectiveTimeout), + ]) + + // Handle isError in result + if ('isError' in result && result.isError) { + let errorDetails = 'Unknown error' + if ( + 'content' in result && + Array.isArray(result.content) && + result.content.length > 0 + ) { + const firstContent = result.content[0] + if ( + firstContent && + typeof firstContent === 'object' && + 'text' in firstContent + ) { + errorDetails = (firstContent as { text: string }).text + } + } + + throw new McpToolCallError(serverName, tool, errorDetails) + } + + return { + content: result, + _meta: result._meta as Record | undefined, + structuredContent: result.structuredContent as + | Record + | undefined, + } + } catch (e) { + if (progressInterval !== undefined) { + clearInterval(progressInterval) + } + + if (e instanceof Error && e.name !== 'AbortError') { + deps.logger.debug( + `[${serverName}] Tool '${tool}' failed: ${e.message}`, + ) + } + + // Check for 401 errors + if (e instanceof Error) { + const errorCode = 'code' in e ? (e.code as number | undefined) : undefined + if (errorCode === 401) { + throw new McpAuthError( + serverName, + `MCP server "${serverName}" requires re-authorization (token expired)`, + ) + } + } + + throw e + } finally { + if (progressInterval !== undefined) { + clearInterval(progressInterval) + } + } +} + +// ============================================================================ +// Helpers +// ============================================================================ + +function getMcpToolTimeoutMs(): number { + return ( + parseInt(process.env.MCP_TOOL_TIMEOUT || '', 10) || + DEFAULT_MCP_TOOL_TIMEOUT_MS + ) +} + +function createTimeoutPromise( + serverName: string, + tool: string, + timeoutMs: number, +): Promise { + return new Promise((_, reject) => { + const timeoutId = setTimeout( + () => { + reject( + new Error( + `MCP server "${serverName}" tool "${tool}" timed out after ${Math.floor(timeoutMs / 1000)}s`, + ), + ) + }, + timeoutMs, + ) + timeoutId.unref?.() + }) +} diff --git a/packages/mcp-client/src/index.ts b/packages/mcp-client/src/index.ts new file mode 100644 index 000000000..f165d40f3 --- /dev/null +++ b/packages/mcp-client/src/index.ts @@ -0,0 +1,124 @@ +// mcp-client — MCP protocol client +// Strict protocol layer: connection, transport, tool discovery, execution + +// Types & schemas +export { + ConfigScope, + TransportType, + McpStdioServerConfigSchema, + McpSSEServerConfigSchema, + McpHTTPServerConfigSchema, + McpWebSocketServerConfigSchema, + McpSdkServerConfigSchema, + McpClaudeAIProxyServerConfigSchema, + McpServerConfigSchema, + McpJsonConfigSchema, +} from './types.js' + +export type { + ConfigScope as ConfigScopeType, + Transport, + McpStdioServerConfig, + McpSSEServerConfig, + McpSSEIDEServerConfig, + McpWebSocketIDEServerConfig, + McpHTTPServerConfig, + McpWebSocketServerConfig, + McpSdkServerConfig, + McpClaudeAIProxyServerConfig, + McpServerConfig, + ScopedMcpServerConfig, + McpJsonConfig, + MCPServerConnection, + ConnectedMCPServer, + FailedMCPServer, + NeedsAuthMCPServer, + PendingMCPServer, + DisabledMCPServer, + ServerResource, + SerializedTool, + SerializedClient, + MCPCliState, +} from './types.js' + +// Errors +export { + McpError, + McpConnectionError, + McpAuthError, + McpTimeoutError, + McpToolCallError, + McpSessionExpiredError, +} from './errors.js' + +// Interfaces (host dependency injection) +export type { + Logger, + AnalyticsSink, + FeatureGate, + AuthProvider, + ProxyConfig, + ContentStorage, + ImageProcessor, + HttpConfig, + SubprocessEnvProvider, + McpClientDependencies, +} from './interfaces.js' + +// Transport +export { createLinkedTransportPair } from './transport/InProcessTransport.js' + +// String utilities +export { + buildMcpToolName, + normalizeNameForMCP, + mcpInfoFromString, + getMcpPrefix, + getToolNameForPermissionCheck, + getMcpDisplayName, + extractMcpToolDisplayName, +} from './strings.js' + +// Cache +export { memoizeWithLRU } from './cache.js' + +// Sanitization +export { recursivelySanitizeUnicode } from './sanitization.js' + +// Connection utilities +export { + DEFAULT_CONNECTION_TIMEOUT_MS, + MAX_MCP_DESCRIPTION_LENGTH, + MAX_ERRORS_BEFORE_RECONNECT, + createMcpClient, + withConnectionTimeout, + captureStderr, + isTerminalConnectionError, + isMcpSessionExpiredError, + installConnectionMonitor, + terminateWithSignalEscalation, + createCleanup, + buildConnectedServer, +} from './connection.js' +export type { + CreateClientOptions, + ConnectionMonitorOptions, + CleanupOptions, + BuildConnectedServerOptions, +} from './connection.js' + +// Tool discovery +export { + MCP_FETCH_CACHE_SIZE, + discoverTools, + createCachedToolDiscovery, +} from './discovery.js' +export type { DiscoveryOptions } from './discovery.js' + +// Tool execution +export { callMcpTool } from './execution.js' +export type { CallToolOptions, CallToolResult } from './execution.js' + +// Manager (main API) +export { createMcpManager } from './manager.js' +export type { McpManager } from './manager.js' diff --git a/packages/mcp-client/src/interfaces.ts b/packages/mcp-client/src/interfaces.ts new file mode 100644 index 000000000..9e4fdb753 --- /dev/null +++ b/packages/mcp-client/src/interfaces.ts @@ -0,0 +1,74 @@ +// Host dependency injection interfaces +// The MCP client package uses these interfaces to decouple from host infrastructure. + +/** Logging interface */ +export interface Logger { + debug(message: string, ...args: unknown[]): void + info(message: string, ...args: unknown[]): void + warn(message: string, ...args: unknown[]): void + error(message: string, ...args: unknown[]): void +} + +/** Analytics/telemetry callback */ +export interface AnalyticsSink { + trackEvent(event: string, metadata: Record): void +} + +/** Feature flag check */ +export interface FeatureGate { + isEnabled(flag: string): boolean +} + +/** OAuth token provider */ +export interface AuthProvider { + getTokens(): Promise<{ accessToken: string } | null> + refreshTokens(): Promise + handleOAuthError?(error: unknown): Promise +} + +/** HTTP/WebSocket proxy configuration */ +export interface ProxyConfig { + getFetchOptions?(): Record + getWebSocketAgent?(url: string): unknown + getWebSocketUrl?(url: string): string | undefined + getTLSOptions?(): Record | undefined +} + +/** Binary/image content persistence */ +export interface ContentStorage { + persistBinaryContent(data: Buffer, ext: string): Promise + persistToolResult?(toolUseId: string, content: unknown): Promise +} + +/** Image processing (resize, downsample) */ +export interface ImageProcessor { + resizeAndDownsample?(buffer: Buffer): Promise +} + +/** HTTP configuration (user agent, session ID) */ +export interface HttpConfig { + getUserAgent(): string + getSessionId?(): string +} + +/** Subprocess environment variable provider */ +export interface SubprocessEnvProvider { + getEnv(additional?: Record): Record +} + +/** + * Complete set of host dependencies required by the MCP client. + * All fields except `logger` and `httpConfig` are optional — + * the client degrades gracefully when they're not provided. + */ +export interface McpClientDependencies { + logger: Logger + analytics?: AnalyticsSink + featureGate?: FeatureGate + auth?: AuthProvider + proxy?: ProxyConfig + storage?: ContentStorage + imageProcessor?: ImageProcessor + httpConfig: HttpConfig + subprocessEnv?: SubprocessEnvProvider +} diff --git a/packages/mcp-client/src/manager.ts b/packages/mcp-client/src/manager.ts new file mode 100644 index 000000000..3695882f0 --- /dev/null +++ b/packages/mcp-client/src/manager.ts @@ -0,0 +1,241 @@ +// McpManager — imperative API for MCP protocol client +// Factory function that creates a manager instance with event-based notifications + +import type { Client } from '@modelcontextprotocol/sdk/client/index.js' +import type { + ListToolsResult, +} from '@modelcontextprotocol/sdk/types.js' +import memoize from 'lodash-es/memoize.js' +import { buildMcpToolName } from './strings.js' +import type { CoreTool } from 'agent-tools' +import type { + McpServerConfig, + ScopedMcpServerConfig, + MCPServerConnection, + ConnectedMCPServer, + FailedMCPServer, + NeedsAuthMCPServer, +} from './types.js' +import type { McpClientDependencies } from './interfaces.js' +import { + McpConnectionError, + McpAuthError, + McpTimeoutError, +} from './errors.js' +import { memoizeWithLRU } from './cache.js' +import { discoverTools } from './discovery.js' +import { callMcpTool } from './execution.js' + +// ============================================================================ +// Event types +// ============================================================================ + +export type McpManagerEvents = { + connected: (name: string) => void + disconnected: (name: string, error?: Error) => void + toolsChanged: (serverName: string, tools: CoreTool[]) => void + error: (name: string, error: Error) => void + authRequired: (name: string) => void +} + +type EventHandler = (...args: any[]) => void + +// ============================================================================ +// Manager interface +// ============================================================================ + +export interface McpManager { + connect(name: string, config: McpServerConfig): Promise + disconnect(name: string): Promise + disconnectAll(): Promise + getConnections(): Map + getTools(serverName: string): CoreTool[] + getAllTools(): CoreTool[] + callTool(serverName: string, toolName: string, args: unknown): Promise + on(event: E, handler: McpManagerEvents[E]): void + off(event: string, handler: EventHandler): void +} + +// ============================================================================ +// Default timeout +// ============================================================================ + +const MCP_TIMEOUT_MS = 30_000 +const MCP_REQUEST_TIMEOUT_MS = 60_000 + +// ============================================================================ +// Manager implementation +// ============================================================================ + +class McpManagerImpl implements McpManager { + private connections = new Map() + private toolsCache = new Map() + private listeners = new Map>() + private deps: McpClientDependencies + private connectFn: ((name: string, config: ScopedMcpServerConfig) => Promise) | null = null + + constructor(deps: McpClientDependencies) { + this.deps = deps + } + + /** Set the connect function — the host provides this with all transport logic */ + setConnectFn(fn: (name: string, config: ScopedMcpServerConfig) => Promise): void { + this.connectFn = fn + } + + async connect(name: string, config: McpServerConfig): Promise { + if (!this.connectFn) { + throw new Error('McpManager: connectFn not set. Call setConnectFn() first.') + } + + const scopedConfig: ScopedMcpServerConfig = { ...config, scope: 'dynamic' } + + try { + const connection = await this.connectFn(name, scopedConfig) + this.connections.set(name, connection) + + if (connection.type === 'connected') { + this.emit('connected', name) + // Fetch tools for this server + await this.refreshTools(name, connection) + } else if (connection.type === 'needs-auth') { + this.emit('authRequired', name) + } + + return connection + } catch (err) { + const error = err instanceof Error ? err : new Error(String(err)) + this.emit('error', name, error) + throw err + } + } + + async disconnect(name: string): Promise { + const conn = this.connections.get(name) + if (!conn) return + + if (conn.type === 'connected') { + try { + await conn.cleanup() + } catch (err) { + this.deps.logger.warn(`Error disconnecting ${name}:`, err) + } + } + + this.connections.delete(name) + this.toolsCache.delete(name) + this.emit('disconnected', name) + } + + async disconnectAll(): Promise { + const names = [...this.connections.keys()] + await Promise.all(names.map(name => this.disconnect(name))) + } + + getConnections(): Map { + return new Map(this.connections) + } + + getTools(serverName: string): CoreTool[] { + return this.toolsCache.get(serverName) ?? [] + } + + getAllTools(): CoreTool[] { + const all: CoreTool[] = [] + for (const tools of this.toolsCache.values()) { + all.push(...tools) + } + return all + } + + async callTool(serverName: string, toolName: string, args: unknown): Promise { + const conn = this.connections.get(serverName) + if (!conn || conn.type !== 'connected') { + throw new McpConnectionError(serverName, `Server ${serverName} is not connected`) + } + + return callMcpTool( + { + client: conn, + tool: toolName, + args: args as Record, + signal: new AbortController().signal, + }, + this.deps, + ) + } + + on(event: E, handler: McpManagerEvents[E]): void { + if (!this.listeners.has(event)) { + this.listeners.set(event, new Set()) + } + this.listeners.get(event)!.add(handler) + } + + off(event: string, handler: EventHandler): void { + this.listeners.get(event)?.delete(handler) + } + + // ── Private ── + + private emit(event: string, ...args: unknown[]): void { + this.listeners.get(event)?.forEach(handler => { + try { + handler(...args) + } catch (err) { + this.deps.logger.error(`Error in ${event} handler:`, err) + } + }) + } + + private async refreshTools(name: string, conn: ConnectedMCPServer): Promise { + try { + const tools = await discoverTools({ + serverName: name, + client: conn.client, + capabilities: conn.capabilities ?? {}, + deps: this.deps, + }) + + this.toolsCache.set(name, tools) + this.emit('toolsChanged', name, tools) + } catch (err) { + this.deps.logger.warn(`Failed to fetch tools for ${name}:`, err) + } + } +} + +// ============================================================================ +// Factory function +// ============================================================================ + +/** + * Creates a new MCP manager instance. + * + * The manager handles connection lifecycle, tool discovery, and event notification. + * The host must call `setConnectFn()` to provide the transport-level connection logic. + * + * @param deps Host dependency injections (logger, auth, proxy, etc.) + * @returns McpManager instance + * + * @example + * ```typescript + * const manager = createMcpManager({ + * logger: console, + * httpConfig: { getUserAgent: () => 'my-app/1.0' }, + * }) + * + * manager.setConnectFn(async (name, config) => { + * // Transport-level connection logic here + * }) + * + * manager.on('connected', (name) => console.log(`Connected to ${name}`)) + * manager.on('toolsChanged', (name, tools) => console.log(`${name}: ${tools.length} tools`)) + * + * await manager.connect('my-server', { command: 'npx', args: ['my-mcp-server'] }) + * const tools = manager.getAllTools() + * ``` + */ +export function createMcpManager(deps: McpClientDependencies): McpManager { + return new McpManagerImpl(deps) +} diff --git a/packages/mcp-client/src/sanitization.ts b/packages/mcp-client/src/sanitization.ts new file mode 100644 index 000000000..91713fc73 --- /dev/null +++ b/packages/mcp-client/src/sanitization.ts @@ -0,0 +1,31 @@ +// Unicode sanitization for MCP data +// Extracted from src/utils/sanitization.ts + +/** + * Recursively sanitizes Unicode characters in MCP server responses. + * Removes or replaces problematic Unicode that could cause display or parsing issues. + */ +export function recursivelySanitizeUnicode(data: T): T { + if (typeof data === 'string') { + // Remove control characters except \t, \n, \r + // Replace null bytes and other C0 controls + return data + .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '') + .replace(/\uFFFD/g, '') // replacement character + .normalize('NFC') as unknown as T + } + + if (Array.isArray(data)) { + return data.map(item => recursivelySanitizeUnicode(item)) as unknown as T + } + + if (data !== null && typeof data === 'object') { + const result = {} as Record + for (const [key, value] of Object.entries(data as Record)) { + result[key] = recursivelySanitizeUnicode(value) + } + return result as T + } + + return data +} diff --git a/packages/mcp-client/src/strings.ts b/packages/mcp-client/src/strings.ts new file mode 100644 index 000000000..c1b9a760e --- /dev/null +++ b/packages/mcp-client/src/strings.ts @@ -0,0 +1,86 @@ +// MCP string utility functions — pure, no dependencies +// Extracted from src/services/mcp/mcpStringUtils.ts and normalization.ts + +// Claude.ai server names are prefixed with this string +const CLAUDEAI_SERVER_PREFIX = 'claude.ai ' + +/** + * Normalize server names to be compatible with the API pattern ^[a-zA-Z0-9_-]{1,64}$ + * Replaces any invalid characters (including dots and spaces) with underscores. + */ +export function normalizeNameForMCP(name: string): string { + let normalized = name.replace(/[^a-zA-Z0-9_-]/g, '_') + if (name.startsWith(CLAUDEAI_SERVER_PREFIX)) { + normalized = normalized.replace(/_+/g, '_').replace(/^_|_$/g, '') + } + return normalized +} + +/** + * Generates the MCP tool/command name prefix for a given server + */ +export function getMcpPrefix(serverName: string): string { + return `mcp__${normalizeNameForMCP(serverName)}__` +} + +/** + * Builds a fully qualified MCP tool name from server and tool names. + * Inverse of mcpInfoFromString(). + */ +export function buildMcpToolName(serverName: string, toolName: string): string { + return `${getMcpPrefix(serverName)}${normalizeNameForMCP(toolName)}` +} + +/** + * Extracts MCP server information from a tool name string. + * @param toolString Expected format: "mcp__serverName__toolName" + */ +export function mcpInfoFromString(toolString: string): { + serverName: string + toolName: string | undefined +} | null { + const parts = toolString.split('__') + const [mcpPart, serverName, ...toolNameParts] = parts + if (mcpPart !== 'mcp' || !serverName) { + return null + } + const toolName = + toolNameParts.length > 0 ? toolNameParts.join('__') : undefined + return { serverName, toolName } +} + +/** + * Returns the name to use for permission rule matching. + */ +export function getToolNameForPermissionCheck(tool: { + name: string + mcpInfo?: { serverName: string; toolName: string } +}): string { + return tool.mcpInfo + ? buildMcpToolName(tool.mcpInfo.serverName, tool.mcpInfo.toolName) + : tool.name +} + +/** + * Extracts the display name from an MCP tool/command name + */ +export function getMcpDisplayName( + fullName: string, + serverName: string, +): string { + const prefix = `mcp__${normalizeNameForMCP(serverName)}__` + return fullName.replace(prefix, '') +} + +/** + * Extracts just the tool/command display name from a userFacingName + */ +export function extractMcpToolDisplayName(userFacingName: string): string { + let withoutSuffix = userFacingName.replace(/\s*\(MCP\)\s*$/, '') + withoutSuffix = withoutSuffix.trim() + const dashIndex = withoutSuffix.indexOf(' - ') + if (dashIndex !== -1) { + return withoutSuffix.substring(dashIndex + 3).trim() + } + return withoutSuffix +} diff --git a/packages/mcp-client/src/transport/InProcessTransport.ts b/packages/mcp-client/src/transport/InProcessTransport.ts new file mode 100644 index 000000000..61a198d44 --- /dev/null +++ b/packages/mcp-client/src/transport/InProcessTransport.ts @@ -0,0 +1,63 @@ +import type { Transport } from '@modelcontextprotocol/sdk/shared/transport.js' +import type { JSONRPCMessage } from '@modelcontextprotocol/sdk/types.js' + +/** + * In-process linked transport pair for running an MCP server and client + * in the same process without spawning a subprocess. + * + * `send()` on one side delivers to `onmessage` on the other. + * `close()` on either side calls `onclose` on both. + */ +class InProcessTransport implements Transport { + private peer: InProcessTransport | undefined + private closed = false + + onclose?: () => void + onerror?: (error: Error) => void + onmessage?: (message: JSONRPCMessage) => void + + /** @internal */ + _setPeer(peer: InProcessTransport): void { + this.peer = peer + } + + async start(): Promise {} + + async send(message: JSONRPCMessage): Promise { + if (this.closed) { + throw new Error('Transport is closed') + } + // Deliver to the other side asynchronously to avoid stack depth issues + // with synchronous request/response cycles + queueMicrotask(() => { + this.peer?.onmessage?.(message) + }) + } + + async close(): Promise { + if (this.closed) { + return + } + this.closed = true + this.onclose?.() + // Close the peer if it hasn't already closed + if (this.peer && !this.peer.closed) { + this.peer.closed = true + this.peer.onclose?.() + } + } +} + +/** + * Creates a pair of linked transports for in-process MCP communication. + * Messages sent on one transport are delivered to the other's `onmessage`. + * + * @returns [clientTransport, serverTransport] + */ +export function createLinkedTransportPair(): [Transport, Transport] { + const a = new InProcessTransport() + const b = new InProcessTransport() + a._setPeer(b) + b._setPeer(a) + return [a, b] +} diff --git a/packages/mcp-client/src/types.ts b/packages/mcp-client/src/types.ts new file mode 100644 index 000000000..5204a654a --- /dev/null +++ b/packages/mcp-client/src/types.ts @@ -0,0 +1,240 @@ +// MCP configuration types, schemas, and connection state types +// Adapted from src/services/mcp/types.ts — uses zod directly instead of lazySchema + +import type { Client } from '@modelcontextprotocol/sdk/client/index.js' +import type { + Resource, + ServerCapabilities, +} from '@modelcontextprotocol/sdk/types.js' +import { z } from 'zod/v4' + +// ============================================================================ +// Configuration scope +// ============================================================================ + +export const ConfigScope = z.enum([ + 'local', + 'user', + 'project', + 'dynamic', + 'enterprise', + 'claudeai', + 'managed', +]) +export type ConfigScope = z.infer + +// ============================================================================ +// Transport type +// ============================================================================ + +export const TransportType = z.enum([ + 'stdio', + 'sse', + 'sse-ide', + 'http', + 'ws', + 'sdk', + 'claudeai-proxy', +]) +export type Transport = z.infer + +// ============================================================================ +// Server configuration schemas +// ============================================================================ + +export const McpStdioServerConfigSchema = z.object({ + type: z.literal('stdio').optional(), + command: z.string().min(1, 'Command cannot be empty'), + args: z.array(z.string()).default([]), + env: z.record(z.string(), z.string()).optional(), +}) + +const McpOAuthConfigSchema = z.object({ + clientId: z.string().optional(), + callbackPort: z.number().int().positive().optional(), + authServerMetadataUrl: z + .string() + .url() + .startsWith('https://', { + message: 'authServerMetadataUrl must use https://', + }) + .optional(), + xaa: z.boolean().optional(), +}) + +export const McpSSEServerConfigSchema = z.object({ + type: z.literal('sse'), + url: z.string(), + headers: z.record(z.string(), z.string()).optional(), + headersHelper: z.string().optional(), + oauth: McpOAuthConfigSchema.optional(), +}) + +export const McpSSEIDEServerConfigSchema = z.object({ + type: z.literal('sse-ide'), + url: z.string(), + ideName: z.string(), + ideRunningInWindows: z.boolean().optional(), +}) + +export const McpWebSocketIDEServerConfigSchema = z.object({ + type: z.literal('ws-ide'), + url: z.string(), + ideName: z.string(), + authToken: z.string().optional(), + ideRunningInWindows: z.boolean().optional(), +}) + +export const McpHTTPServerConfigSchema = z.object({ + type: z.literal('http'), + url: z.string(), + headers: z.record(z.string(), z.string()).optional(), + headersHelper: z.string().optional(), + oauth: McpOAuthConfigSchema.optional(), +}) + +export const McpWebSocketServerConfigSchema = z.object({ + type: z.literal('ws'), + url: z.string(), + headers: z.record(z.string(), z.string()).optional(), + headersHelper: z.string().optional(), +}) + +export const McpSdkServerConfigSchema = z.object({ + type: z.literal('sdk'), + name: z.string(), +}) + +export const McpClaudeAIProxyServerConfigSchema = z.object({ + type: z.literal('claudeai-proxy'), + url: z.string(), + id: z.string(), +}) + +export const McpServerConfigSchema = z.union([ + McpStdioServerConfigSchema, + McpSSEServerConfigSchema, + McpSSEIDEServerConfigSchema, + McpWebSocketIDEServerConfigSchema, + McpHTTPServerConfigSchema, + McpWebSocketServerConfigSchema, + McpSdkServerConfigSchema, + McpClaudeAIProxyServerConfigSchema, +]) + +// ============================================================================ +// Inferred config types +// ============================================================================ + +export type McpStdioServerConfig = z.infer +export type McpSSEServerConfig = z.infer +export type McpSSEIDEServerConfig = z.infer +export type McpWebSocketIDEServerConfig = z.infer< + typeof McpWebSocketIDEServerConfigSchema +> +export type McpHTTPServerConfig = z.infer +export type McpWebSocketServerConfig = z.infer< + typeof McpWebSocketServerConfigSchema +> +export type McpSdkServerConfig = z.infer +export type McpClaudeAIProxyServerConfig = z.infer< + typeof McpClaudeAIProxyServerConfigSchema +> +export type McpServerConfig = z.infer + +export type ScopedMcpServerConfig = McpServerConfig & { + scope: ConfigScope + pluginSource?: string +} + +export const McpJsonConfigSchema = z.object({ + mcpServers: z.record(z.string(), McpServerConfigSchema), +}) + +export type McpJsonConfig = z.infer + +// ============================================================================ +// Server connection state types +// ============================================================================ + +export type ConnectedMCPServer = { + client: Client + name: string + type: 'connected' + capabilities: ServerCapabilities + serverInfo?: { + name: string + version: string + } + instructions?: string + config: ScopedMcpServerConfig + cleanup: () => Promise +} + +export type FailedMCPServer = { + name: string + type: 'failed' + config: ScopedMcpServerConfig + error?: string +} + +export type NeedsAuthMCPServer = { + name: string + type: 'needs-auth' + config: ScopedMcpServerConfig +} + +export type PendingMCPServer = { + name: string + type: 'pending' + config: ScopedMcpServerConfig + reconnectAttempt?: number + maxReconnectAttempts?: number +} + +export type DisabledMCPServer = { + name: string + type: 'disabled' + config: ScopedMcpServerConfig +} + +export type MCPServerConnection = + | ConnectedMCPServer + | FailedMCPServer + | NeedsAuthMCPServer + | PendingMCPServer + | DisabledMCPServer + +// ============================================================================ +// Resource and serialization types +// ============================================================================ + +export type ServerResource = Resource & { server: string } + +export interface SerializedTool { + name: string + description: string + inputJSONSchema?: { + [x: string]: unknown + type: 'object' + properties?: { + [x: string]: unknown + } + } + isMcp?: boolean + originalToolName?: string +} + +export interface SerializedClient { + name: string + type: 'connected' | 'failed' | 'needs-auth' | 'pending' | 'disabled' + capabilities?: ServerCapabilities +} + +export interface MCPCliState { + clients: SerializedClient[] + configs: Record + tools: SerializedTool[] + resources: Record + normalizedNames?: Record +} diff --git a/src/services/mcp/adapter/analytics.ts b/src/services/mcp/adapter/analytics.ts new file mode 100644 index 000000000..40bbb2b84 --- /dev/null +++ b/src/services/mcp/adapter/analytics.ts @@ -0,0 +1,18 @@ +// Host analytics adapter — bridges logEvent to mcp-client's AnalyticsSink interface + +import type { AnalyticsSink } from 'mcp-client' +import { + type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + logEvent, +} from '../../analytics/index.js' + +/** + * Creates an AnalyticsSink implementation that delegates to the host's logEvent. + */ +export function createMcpAnalytics(): AnalyticsSink { + return { + trackEvent(event: string, metadata: Record) { + logEvent(event, metadata as Record) + }, + } +} diff --git a/src/services/mcp/adapter/auth.ts b/src/services/mcp/adapter/auth.ts new file mode 100644 index 000000000..7d68f7661 --- /dev/null +++ b/src/services/mcp/adapter/auth.ts @@ -0,0 +1,28 @@ +// Host auth provider adapter — bridges OAuth token management to mcp-client's AuthProvider interface + +import type { AuthProvider } from 'mcp-client' +import { + getClaudeAIOAuthTokens, + checkAndRefreshOAuthTokenIfNeeded, + handleOAuth401Error, +} from '../../../utils/auth.js' + +/** + * Creates an AuthProvider implementation using the host's OAuth token management. + */ +export function createMcpAuth(): AuthProvider { + return { + async getTokens() { + const tokens = getClaudeAIOAuthTokens() + if (!tokens) return null + return { accessToken: tokens.accessToken } + }, + async refreshTokens() { + await checkAndRefreshOAuthTokenIfNeeded() + }, + async handleOAuthError(error: unknown) { + const currentToken = getClaudeAIOAuthTokens()?.accessToken ?? '' + await handleOAuth401Error(currentToken) + }, + } +} diff --git a/src/services/mcp/adapter/featureGate.ts b/src/services/mcp/adapter/featureGate.ts new file mode 100644 index 000000000..32e0d391b --- /dev/null +++ b/src/services/mcp/adapter/featureGate.ts @@ -0,0 +1,15 @@ +// Host feature gate adapter — bridges feature() to mcp-client's FeatureGate interface + +import type { FeatureGate } from 'mcp-client' +import { feature } from 'bun:bundle' + +/** + * Creates a FeatureGate implementation using the host's feature flag system. + */ +export function createMcpFeatureGate(): FeatureGate { + return { + isEnabled(flag: string) { + return feature(flag) + }, + } +} diff --git a/src/services/mcp/adapter/httpConfig.ts b/src/services/mcp/adapter/httpConfig.ts new file mode 100644 index 000000000..7e633990c --- /dev/null +++ b/src/services/mcp/adapter/httpConfig.ts @@ -0,0 +1,15 @@ +// Host HTTP config adapter — bridges getUserAgent/getSessionId to mcp-client's HttpConfig interface + +import type { HttpConfig } from 'mcp-client' +import { getMCPUserAgent } from '../../../utils/http.js' +import { getSessionId } from '../../../bootstrap/state.js' + +/** + * Creates an HttpConfig implementation using the host's user agent and session ID. + */ +export function createMcpHttpConfig(): HttpConfig { + return { + getUserAgent: () => getMCPUserAgent(), + getSessionId: () => getSessionId(), + } +} diff --git a/src/services/mcp/adapter/imageProcessor.ts b/src/services/mcp/adapter/imageProcessor.ts new file mode 100644 index 000000000..fe5041b07 --- /dev/null +++ b/src/services/mcp/adapter/imageProcessor.ts @@ -0,0 +1,16 @@ +// Host image processor adapter — bridges maybeResizeAndDownsampleImageBuffer to mcp-client's ImageProcessor interface + +import type { ImageProcessor } from 'mcp-client' +import { maybeResizeAndDownsampleImageBuffer } from '../../../utils/imageResizer.js' + +/** + * Creates an ImageProcessor implementation using the host's image resizing. + */ +export function createMcpImageProcessor(): ImageProcessor { + return { + async resizeAndDownsample(buffer: Buffer) { + const result = await maybeResizeAndDownsampleImageBuffer(buffer, buffer.length, 'png') + return result.buffer + }, + } +} diff --git a/src/services/mcp/adapter/index.ts b/src/services/mcp/adapter/index.ts new file mode 100644 index 000000000..4ef4f1604 --- /dev/null +++ b/src/services/mcp/adapter/index.ts @@ -0,0 +1,32 @@ +// Host dependency injection — assembles McpClientDependencies from host infrastructure +// This is the single entry point for creating the dependencies object used by createMcpManager() + +import type { McpClientDependencies } from 'mcp-client' +import { createMcpLogger } from './logger.js' +import { createMcpHttpConfig } from './httpConfig.js' +import { createMcpProxyConfig } from './proxy.js' +import { createMcpAnalytics } from './analytics.js' +import { createMcpSubprocessEnv } from './subprocessEnv.js' +import { createMcpStorage } from './storage.js' +import { createMcpImageProcessor } from './imageProcessor.js' +import { createMcpAuth } from './auth.js' +/** + * Creates the full set of MCP client dependencies using host infrastructure. + * All adapters are lazy — they only call into host modules when invoked. + * + * Note: featureGate is omitted because Bun's feature() requires string-literal + * arguments at compile time and cannot accept runtime variables. The interface + * field is optional and the mcp-client package does not use it currently. + */ +export function createMcpDependencies(): McpClientDependencies { + return { + logger: createMcpLogger(), + httpConfig: createMcpHttpConfig(), + proxy: createMcpProxyConfig(), + analytics: createMcpAnalytics(), + subprocessEnv: createMcpSubprocessEnv(), + storage: createMcpStorage(), + imageProcessor: createMcpImageProcessor(), + auth: createMcpAuth(), + } +} diff --git a/src/services/mcp/adapter/logger.ts b/src/services/mcp/adapter/logger.ts new file mode 100644 index 000000000..e99cef045 --- /dev/null +++ b/src/services/mcp/adapter/logger.ts @@ -0,0 +1,38 @@ +// Host logger adapter — bridges logMCPDebug/logMCPError to mcp-client's Logger interface + +import type { Logger } from 'mcp-client' +import { logMCPDebug, logMCPError } from '../../../utils/log.js' + +/** + * Creates a Logger implementation that delegates to the host's MCP logging system. + */ +export function createMcpLogger(): Logger { + return { + debug(message: string, ...args: unknown[]) { + // Extract server name from bracketed prefix if present: [serverName] message + const match = message.match(/^\[([^\]]+)\]\s*(.*)/) + if (match) { + logMCPDebug(match[1], match[2]) + } + // Silently ignore messages without server name prefix + }, + info(message: string, ...args: unknown[]) { + const match = message.match(/^\[([^\]]+)\]\s*(.*)/) + if (match) { + logMCPDebug(match[1], match[2]) + } + }, + warn(message: string, ...args: unknown[]) { + const match = message.match(/^\[([^\]]+)\]\s*(.*)/) + if (match) { + logMCPError(match[1], message) + } + }, + error(message: string, ...args: unknown[]) { + const match = message.match(/^\[([^\]]+)\]\s*(.*)/) + if (match) { + logMCPError(match[1], args[0] ?? message) + } + }, + } +} diff --git a/src/services/mcp/adapter/proxy.ts b/src/services/mcp/adapter/proxy.ts new file mode 100644 index 000000000..633121780 --- /dev/null +++ b/src/services/mcp/adapter/proxy.ts @@ -0,0 +1,30 @@ +// Host proxy config adapter — bridges proxy/MTLS to mcp-client's ProxyConfig interface + +import type { ProxyConfig } from 'mcp-client' +import { + getProxyFetchOptions, + getWebSocketProxyAgent, + getWebSocketProxyUrl, +} from '../../../utils/proxy.js' +import { getWebSocketTLSOptions } from '../../../utils/mtls.js' + +/** + * Creates a ProxyConfig implementation using the host's proxy and TLS settings. + */ +export function createMcpProxyConfig(): ProxyConfig { + return { + getFetchOptions() { + return getProxyFetchOptions() as Record + }, + getWebSocketAgent(url: string) { + return getWebSocketProxyAgent(url) + }, + getWebSocketUrl(url: string) { + return getWebSocketProxyUrl(url) + }, + getTLSOptions() { + const opts = getWebSocketTLSOptions() + return opts as Record | undefined + }, + } +} diff --git a/src/services/mcp/adapter/storage.ts b/src/services/mcp/adapter/storage.ts new file mode 100644 index 000000000..f4377c268 --- /dev/null +++ b/src/services/mcp/adapter/storage.ts @@ -0,0 +1,20 @@ +// Host content storage adapter — bridges persistBinaryContent to mcp-client's ContentStorage interface + +import type { ContentStorage } from 'mcp-client' +import { persistBinaryContent } from '../../../utils/mcpOutputStorage.js' +import { persistToolResult, isPersistError } from '../../../utils/toolResultStorage.js' + +/** + * Creates a ContentStorage implementation using the host's binary persistence. + */ +export function createMcpStorage(): ContentStorage { + return { + async persistBinaryContent(data: Buffer, ext: string) { + const result = await persistBinaryContent(data, ext, `mcp-adapter-${Date.now()}`) + if ('error' in result) { + throw new Error(result.error) + } + return result.filepath + }, + } +} diff --git a/src/services/mcp/adapter/subprocessEnv.ts b/src/services/mcp/adapter/subprocessEnv.ts new file mode 100644 index 000000000..a6358af33 --- /dev/null +++ b/src/services/mcp/adapter/subprocessEnv.ts @@ -0,0 +1,15 @@ +// Host subprocess environment adapter + +import type { SubprocessEnvProvider } from 'mcp-client' +import { subprocessEnv } from '../../../utils/subprocessEnv.js' + +/** + * Creates a SubprocessEnvProvider using the host's subprocess environment logic. + */ +export function createMcpSubprocessEnv(): SubprocessEnvProvider { + return { + getEnv(additional?: Record) { + return { ...subprocessEnv(), ...additional } as Record + }, + } +} diff --git a/src/services/mcp/client.ts b/src/services/mcp/client.ts index d6db09b38..63c90077a 100644 --- a/src/services/mcp/client.ts +++ b/src/services/mcp/client.ts @@ -93,7 +93,6 @@ import { getWebSocketProxyAgent, getWebSocketProxyUrl, } from '../../utils/proxy.js' -import { recursivelySanitizeUnicode } from '../../utils/sanitization.js' import { getSessionIngressAuthToken } from '../../utils/sessionIngressAuth.js' import { subprocessEnv } from '../../utils/subprocessEnv.js' import { @@ -113,6 +112,19 @@ import { buildMcpToolName } from './mcpStringUtils.js' import { normalizeNameForMCP } from './normalization.js' import { getLoggingSafeMcpBaseUrl } from './utils.js' +// Package imports — delegate to mcp-client package utilities where applicable +import { + createMcpClient as createMcpClientFromPackage, + captureStderr, + isMcpSessionExpiredError as isMcpSessionExpiredErrorFromPackage, + installConnectionMonitor, + createCleanup as createCleanupFromPackage, + buildConnectedServer, + DEFAULT_CONNECTION_TIMEOUT_MS, + MAX_MCP_DESCRIPTION_LENGTH as PKG_MAX_MCP_DESCRIPTION_LENGTH, +} from 'mcp-client' +import { recursivelySanitizeUnicode } from 'mcp-client' + /* eslint-disable @typescript-eslint/no-require-imports */ const fetchMcpSkillsForClient = feature('MCP_SKILLS') ? ( @@ -191,20 +203,7 @@ export class McpToolCallError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS extends T * Per the MCP spec, servers return 404 when a session ID is no longer valid. * We check both signals to avoid false positives from generic 404s (wrong URL, server gone, etc.). */ -export function isMcpSessionExpiredError(error: Error): boolean { - const httpStatus = - 'code' in error ? (error as Error & { code?: number }).code : undefined - if (httpStatus !== 404) { - return false - } - // The SDK embeds the response body text in the error message. - // MCP servers return: {"error":{"code":-32001,"message":"Session not found"},...} - // Check for the JSON-RPC error code to distinguish from generic web server 404s. - return ( - error.message.includes('"code":-32001') || - error.message.includes('"code": -32001') - ) -} +export const isMcpSessionExpiredError = isMcpSessionExpiredErrorFromPackage /** * Default timeout for MCP tool calls (effectively infinite - ~27.8 hours). @@ -216,7 +215,7 @@ const DEFAULT_MCP_TOOL_TIMEOUT_MS = 100_000_000 * OpenAPI-generated MCP servers have been observed dumping 15-60KB of endpoint * docs into tool.description; this caps the p95 tail without losing the intent. */ -const MAX_MCP_DESCRIPTION_LENGTH = 2048 +const MAX_MCP_DESCRIPTION_LENGTH = PKG_MAX_MCP_DESCRIPTION_LENGTH /** * Gets the timeout for MCP tool calls in milliseconds. diff --git a/src/services/mcp/types.ts b/src/services/mcp/types.ts index b98821dee..9f64d5666 100644 --- a/src/services/mcp/types.ts +++ b/src/services/mcp/types.ts @@ -21,7 +21,7 @@ export const ConfigScopeSchema = lazySchema(() => export type ConfigScope = z.infer> export const TransportSchema = lazySchema(() => - z.enum(['stdio', 'sse', 'sse-ide', 'http', 'ws', 'sdk']), + z.enum(['stdio', 'sse', 'sse-ide', 'http', 'ws', 'sdk', 'claudeai-proxy']), ) export type Transport = z.infer> From 069daa15dcbb13bc249f3b5bea3f17bec1ece54b Mon Sep 17 00:00:00 2001 From: claude-code-best Date: Sun, 12 Apr 2026 22:19:54 +0800 Subject: [PATCH 02/12] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E7=B1=BB?= =?UTF-8?q?=E5=9E=8B=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/entrypoints/sdk/coreSchemas.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/entrypoints/sdk/coreSchemas.ts b/src/entrypoints/sdk/coreSchemas.ts index 4d5b9d0a0..c1aab5bc1 100644 --- a/src/entrypoints/sdk/coreSchemas.ts +++ b/src/entrypoints/sdk/coreSchemas.ts @@ -336,14 +336,15 @@ export const PermissionResultSchema = lazySchema(() => export const PermissionModeSchema = lazySchema(() => z - .enum(['default', 'acceptEdits', 'bypassPermissions', 'plan', 'dontAsk']) + .enum(['default', 'acceptEdits', 'bypassPermissions', 'plan', 'dontAsk', 'auto']) .describe( 'Permission mode for controlling how tool executions are handled. ' + "'default' - Standard behavior, prompts for dangerous operations. " + "'acceptEdits' - Auto-accept file edit operations. " + "'bypassPermissions' - Bypass all permission checks (requires allowDangerouslySkipPermissions). " + "'plan' - Planning mode, no actual tool execution. " + - "'dontAsk' - Don't prompt for permissions, deny if not pre-approved.", + "'dontAsk' - Don't prompt for permissions, deny if not pre-approved. " + + "'auto' - Automatic mode (transcript classifier).", ), ) From f8e7e114d8a5f895136df2f8610bcecfbe7c8a76 Mon Sep 17 00:00:00 2001 From: claude-code-best Date: Sun, 12 Apr 2026 22:47:03 +0800 Subject: [PATCH 03/12] =?UTF-8?q?chore:=20=E6=9B=B4=E6=96=B0=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E5=88=B0=201.3.2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 5 +++-- build.ts | 16 +++++++++++++++- package.json | 11 ++++++----- scripts/setup-chrome-mcp.mjs | 13 +++---------- 4 files changed, 27 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 77c9407ae..54d8e0224 100644 --- a/README.md +++ b/README.md @@ -42,8 +42,9 @@ ```sh bun i -g claude-code-best bun pm -g trust claude-code-best -ccb # 直接打开 claude code -CLAUDE_BRIDGE_BASE_URL=https://remote-control.claude-code-best.win/ CLAUDE_BRIDGE_OAUTH_TOKEN=test-my-key bun run dev --remote-control # 我们有自部署的远程控制 +ccb # 以 nodejs 打开 claude code +ccb-bun # 以 bun 形态打开 +CLAUDE_BRIDGE_BASE_URL=https://remote-control.claude-code-best.win/ CLAUDE_BRIDGE_OAUTH_TOKEN=test-my-key ccb --remote-control # 我们有自部署的远程控制 ``` ## ⚡ 快速开始(源码版) diff --git a/build.ts b/build.ts index ec4b0091e..7b2169e0a 100644 --- a/build.ts +++ b/build.ts @@ -36,7 +36,7 @@ const DEFAULT_BUILD_FEATURES = [ 'CONTEXT_COLLAPSE', 'MONITOR_TOOL', 'FORK_SUBAGENT', - 'UDS_INBOX', +// 'UDS_INBOX', 'KAIROS', 'COORDINATOR_MODE', 'LAN_PIPES', @@ -112,3 +112,17 @@ if (!rgScript.success) { } else { console.log(`Bundled download-ripgrep script to ${outdir}/`) } + +// Step 6: Generate cli-bun and cli-node executable entry points +const cliBun = join(outdir, 'cli-bun.js') +const cliNode = join(outdir, 'cli-node.js') + +await writeFile(cliBun, '#!/usr/bin/env bun\nimport "./cli.js"\n') +await writeFile(cliNode, '#!/usr/bin/env node\nimport "./cli.js"\n') + +// Make both executable +const { chmodSync } = await import('fs') +chmodSync(cliBun, 0o755) +chmodSync(cliNode, 0o755) + +console.log(`Generated ${cliBun} (shebang: bun) and ${cliNode} (shebang: node)`) diff --git a/package.json b/package.json index 58616a8d0..7dbd9ad4e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "claude-code-best", - "version": "1.2.1", + "version": "1.3.2", "description": "Reverse-engineered Anthropic Claude Code CLI — interactive AI coding assistant in the terminal", "type": "module", "author": "claude-code-best ", @@ -25,8 +25,9 @@ "bun": ">=1.2.0" }, "bin": { - "ccb": "dist/cli.js", - "claude-code-best": "dist/cli.js" + "ccb": "dist/cli-node.js", + "ccb-bun": "dist/cli-bun.js", + "claude-code-best": "dist/cli-node.js" }, "workspaces": [ "packages/*", @@ -34,8 +35,8 @@ ], "files": [ "dist", - "scripts/download-ripgrep.ts", - "scripts/postinstall.cjs" + "scripts/postinstall.cjs", + "scripts/setup-chrome-mcp.mjs" ], "scripts": { "build": "bun run build.ts", diff --git a/scripts/setup-chrome-mcp.mjs b/scripts/setup-chrome-mcp.mjs index da10b4218..485457e70 100644 --- a/scripts/setup-chrome-mcp.mjs +++ b/scripts/setup-chrome-mcp.mjs @@ -9,18 +9,11 @@ */ import { execFileSync } from "node:child_process"; +import { createRequire } from "node:module"; import { dirname, join } from "node:path"; -import { fileURLToPath } from "node:url"; -const __dirname = dirname(fileURLToPath(import.meta.url)); -const cliPath = join( - __dirname, - "..", - "node_modules", - "mcp-chrome-bridge", - "dist", - "cli.js", -); +const require = createRequire(import.meta.url); +const cliPath = require.resolve("mcp-chrome-bridge/dist/cli.js"); const userArgs = process.argv.slice(2); From e8ebb2272de9ec35fb374886af484b4dfb70164e Mon Sep 17 00:00:00 2001 From: Eric-Guo Date: Sun, 12 Apr 2026 10:34:08 +0800 Subject: [PATCH 04/12] Add brave as alternative WebSearchTool --- docs/external-dependencies.md | 15 +- docs/features/web-search-tool.md | 44 +-- docs/tools/search-and-navigation.mdx | 10 +- .../__tests__/adapterFactory.test.ts | 70 +++++ .../__tests__/braveAdapter.extract.test.ts | 106 +++++++ .../__tests__/braveAdapter.integration.ts | 91 ++++++ .../__tests__/braveAdapter.test.ts | 273 ++++++++++++++++++ .../WebSearchTool/adapters/braveAdapter.ts | 169 +++++++++++ src/tools/WebSearchTool/adapters/index.ts | 52 ++-- 9 files changed, 777 insertions(+), 53 deletions(-) create mode 100644 src/tools/WebSearchTool/__tests__/adapterFactory.test.ts create mode 100644 src/tools/WebSearchTool/__tests__/braveAdapter.extract.test.ts create mode 100644 src/tools/WebSearchTool/__tests__/braveAdapter.integration.ts create mode 100644 src/tools/WebSearchTool/__tests__/braveAdapter.test.ts create mode 100644 src/tools/WebSearchTool/adapters/braveAdapter.ts diff --git a/docs/external-dependencies.md b/docs/external-dependencies.md index a28447e7c..756be5144 100644 --- a/docs/external-dependencies.md +++ b/docs/external-dependencies.md @@ -19,7 +19,7 @@ | 11 | BigQuery Metrics | `api.anthropic.com/api/claude_code/metrics` | HTTPS | 默认启用 | | 12 | MCP Proxy | `mcp-proxy.anthropic.com` | HTTPS+WS | 使用 MCP 工具时 | | 13 | MCP Registry | `api.anthropic.com/mcp-registry` | HTTPS | 查询 MCP 服务器时 | -| 14 | Bing Search | `www.bing.com` | HTTPS | WebSearch 工具 | +| 14 | Web Search Pages | `www.bing.com`, `search.brave.com` | HTTPS | WebSearch 工具,可通过 `WEB_SEARCH_ADAPTER=bing|brave` 切换 | | 15 | Google Cloud Storage (更新) | `storage.googleapis.com` | HTTPS | 版本检查 | | 16 | GitHub Raw (Changelog/Stats) | `raw.githubusercontent.com` | HTTPS | 更新提示 | | 17 | Claude in Chrome Bridge | `bridge.claudeusercontent.com` | WSS | Chrome 集成 | @@ -121,12 +121,16 @@ Anthropic 托管的 MCP 服务器代理。 - **端点**: `https://api.anthropic.com/mcp-registry/v0/servers?version=latest&visibility=commercial` - **文件**: `src/services/mcp/officialRegistry.ts` -### 14. Bing Search +### 14. Web Search Pages -WebSearch 工具的默认适配器,抓取 Bing 搜索结果。 +WebSearch 工具支持直接抓取 Bing 搜索结果页面,也支持通过 Brave 的 LLM Context API +获取搜索上下文;可通过 `WEB_SEARCH_ADAPTER=bing|brave` 显式切换后端。 -- **端点**: `https://www.bing.com/search?q={query}&setmkt=en-US` -- **文件**: `src/tools/WebSearchTool/adapters/bingAdapter.ts` +- **Bing 端点**: `https://www.bing.com/search?q={query}&setmkt=en-US` +- **Brave 端点**: `https://api.search.brave.com/res/v1/llm/context?q={query}` +- **文件**: + - `src/tools/WebSearchTool/adapters/bingAdapter.ts` + - `src/tools/WebSearchTool/adapters/braveAdapter.ts` 另外还有 Domain Blocklist 查询: - **端点**: `https://api.anthropic.com/api/web/domain_info?domain={domain}` @@ -201,6 +205,7 @@ WebSearch 工具的默认适配器,抓取 Bing 搜索结果。 | `{region}-aiplatform.googleapis.com` | Google Vertex AI | HTTPS | | `{resource}.services.ai.azure.com` | Azure Foundry | HTTPS | | `www.bing.com` | Bing 搜索 | HTTPS | +| `search.brave.com` | Brave 搜索 | HTTPS | | `storage.googleapis.com` | 自动更新 | HTTPS | | `raw.githubusercontent.com` | Changelog / 插件统计 | HTTPS | | `bridge.claudeusercontent.com` | Chrome Bridge | WSS | diff --git a/docs/features/web-search-tool.md b/docs/features/web-search-tool.md index 84802cc2b..5a6db8c34 100644 --- a/docs/features/web-search-tool.md +++ b/docs/features/web-search-tool.md @@ -1,11 +1,11 @@ # WEB_SEARCH_TOOL — 网页搜索工具 -> 实现状态:适配器架构完成,Bing 适配器为当前默认后端 +> 实现状态:适配器架构完成,支持 API / Bing / Brave 三种后端 > 引用数:核心工具,无 feature flag 门控(始终启用) ## 一、功能概述 -WebSearchTool 让模型可以搜索互联网获取最新信息。原始实现仅支持 Anthropic API 服务端搜索(`web_search_20250305` server tool),在第三方代理端点下不可用。现已重构为适配器架构,新增 Bing 搜索页面解析作为 fallback,确保任何 API 端点都能使用搜索功能。 +WebSearchTool 让模型可以搜索互联网获取最新信息。原始实现仅支持 Anthropic API 服务端搜索(`web_search_20250305` server tool),在第三方代理端点下不可用。现已重构为适配器架构,支持 API 服务端搜索,以及 Bing / Brave 两个 HTML 解析后端,确保任何 API 端点都能使用搜索功能。 ## 二、实现架构 @@ -21,9 +21,13 @@ WebSearchTool.call() │ └── 使用 web_search_20250305 server tool │ 通过 queryModelWithStreaming 二次调用 API │ - └── BingSearchAdapter — Bing HTML 抓取 + 正则提取(当前默认) - └── 直接抓取 Bing 搜索页 HTML - 正则提取 b_algo 块中的标题/URL/摘要 + ├── BingSearchAdapter — Bing HTML 抓取 + 正则提取 + │ └── 直接抓取 Bing 搜索页 HTML + │ 正则提取 b_algo 块中的标题/URL/摘要 + │ + └── BraveSearchAdapter — Brave LLM Context API + └── 调用 Brave HTTPS GET 接口 + 将 grounding payload 映射为标题/URL/摘要 ``` ### 2.2 模块结构 @@ -37,8 +41,9 @@ WebSearchTool.call() | 适配器工厂 | `src/tools/WebSearchTool/adapters/index.ts` | `createAdapter()` 工厂函数,选择后端 | | API 适配器 | `src/tools/WebSearchTool/adapters/apiAdapter.ts` | 封装原有 `queryModelWithStreaming` 逻辑,使用 server tool | | Bing 适配器 | `src/tools/WebSearchTool/adapters/bingAdapter.ts` | Bing HTML 抓取 + 正则解析 | -| 单元测试 | `src/tools/WebSearchTool/__tests__/bingAdapter.test.ts` | 32 个测试用例 | -| 集成测试 | `src/tools/WebSearchTool/__tests__/bingAdapter.integration.ts` | 真实网络请求验证 | +| Brave 适配器 | `src/tools/WebSearchTool/adapters/braveAdapter.ts` | Brave LLM Context API 适配与结果映射 | +| 单元测试 | `src/tools/WebSearchTool/__tests__/bingAdapter.test.ts`, `src/tools/WebSearchTool/__tests__/braveAdapter*.test.ts`, `src/tools/WebSearchTool/__tests__/adapterFactory.test.ts` | Bing / Brave 解析与工厂逻辑测试 | +| 集成测试 | `src/tools/WebSearchTool/__tests__/bingAdapter.integration.ts`, `src/tools/WebSearchTool/__tests__/braveAdapter.integration.ts` | 真实网络请求验证 | ### 2.3 数据流 @@ -49,20 +54,18 @@ WebSearchTool.call() validateInput() — 校验 query 非空、allowed/block 不共存 │ ▼ - createAdapter() → BingSearchAdapter(当前硬编码) + createAdapter() → ApiSearchAdapter | BingSearchAdapter | BraveSearchAdapter │ ▼ adapter.search(query, { allowedDomains, blockedDomains, signal, onProgress }) │ ├── onProgress({ type: 'query_update', query }) │ - ├── axios.get(bing.com/search?q=...&setmkt=en-US) - │ └── 13 个 Edge 浏览器请求头 + ├── axios.get(search-engine-url) + │ └── API 鉴权请求头 │ - ├── extractBingResults(html) — 正则提取

  • 块 - │ ├── resolveBingUrl() — 解码 base64 重定向 URL - │ ├── extractSnippet() — 三级降级摘要提取 - │ └── decodeHtmlEntities() — he.decode + ├── extractResults(payload) — 按后端提取结果 + │ └── grounding → SearchResult[] 映射 │ ├── 客户端域名过滤 (allowedDomains / blockedDomains) │ @@ -117,19 +120,18 @@ Bing 返回的重定向 URL 格式:`bing.com/ck/a?...&u=a1aHR0cHM6Ly9...` ## 四、适配器选择逻辑 -当前 `createAdapter()` 硬编码返回 `BingSearchAdapter`,原逻辑已注释保留: +`createAdapter()` 按以下优先级选择后端,并按选中的后端 key 缓存适配器实例: ```typescript export function createAdapter(): WebSearchAdapter { - return new BingSearchAdapter() - // 注释保留的选择逻辑: - // 1. WEB_SEARCH_ADAPTER 环境变量强制指定 api|bing - // 2. isFirstPartyAnthropicBaseUrl() → API 适配器 - // 3. 第三方端点 → Bing 适配器 + // 1. WEB_SEARCH_ADAPTER=api|bing|brave 显式指定 + // 2. Anthropic 官方 API Base URL → ApiSearchAdapter + // 3. 第三方代理 / 非官方端点 → BingSearchAdapter } ``` -恢复自动选择:取消 `index.ts` 中的注释即可。 +显式指定 `WEB_SEARCH_ADAPTER=brave` 时,会改用 Brave LLM Context API 后端,并要求 +`BRAVE_SEARCH_API_KEY` 或 `BRAVE_API_KEY`。 ## 五、接口定义 diff --git a/docs/tools/search-and-navigation.mdx b/docs/tools/search-and-navigation.mdx index 99393748e..9422ea177 100644 --- a/docs/tools/search-and-navigation.mdx +++ b/docs/tools/search-and-navigation.mdx @@ -146,14 +146,15 @@ AI 的信息获取不局限于本地代码: ### WebSearch 实现机制 -WebSearch 通过适配器模式支持两种搜索后端,由 `src/tools/WebSearchTool/adapters/` 中的工厂函数 `createAdapter()` 选择: +WebSearch 通过适配器模式支持三种搜索后端,由 `src/tools/WebSearchTool/adapters/` 中的工厂函数 `createAdapter()` 选择: ``` 适配器架构: WebSearchTool.call() → createAdapter() 选择后端 ├─ ApiSearchAdapter — Anthropic API 服务端搜索(需官方 API 密钥) - └─ BingSearchAdapter — 直接抓取 Bing 搜索页面解析(无需 API 密钥) + ├─ BingSearchAdapter — 直接抓取 Bing 搜索页面解析(无需 API 密钥) + └─ BraveSearchAdapter — 调用 Brave LLM Context API 解析(需 Brave API 密钥) → adapter.search(query, options) → 转换为统一 SearchResult[] 格式返回 ``` @@ -166,8 +167,9 @@ WebSearch 通过适配器模式支持两种搜索后端,由 `src/tools/WebSear |--------|------|--------| | 1 | 环境变量 `WEB_SEARCH_ADAPTER=api` | `ApiSearchAdapter` | | 2 | 环境变量 `WEB_SEARCH_ADAPTER=bing` | `BingSearchAdapter` | -| 3 | API Base URL 指向 Anthropic 官方 | `ApiSearchAdapter` | -| 4 | 第三方代理 / 非官方端点 | `BingSearchAdapter` | +| 3 | 环境变量 `WEB_SEARCH_ADAPTER=brave` | `BraveSearchAdapter` | +| 4 | API Base URL 指向 Anthropic 官方 | `ApiSearchAdapter` | +| 5 | 第三方代理 / 非官方端点 | `BingSearchAdapter` | 适配器是无状态的,同一会话内缓存复用。 diff --git a/src/tools/WebSearchTool/__tests__/adapterFactory.test.ts b/src/tools/WebSearchTool/__tests__/adapterFactory.test.ts new file mode 100644 index 000000000..d93b255b4 --- /dev/null +++ b/src/tools/WebSearchTool/__tests__/adapterFactory.test.ts @@ -0,0 +1,70 @@ +import { afterEach, describe, expect, mock, test } from 'bun:test' + +let isFirstPartyBaseUrl = true + +mock.module('../adapters/apiAdapter.js', () => ({ + ApiSearchAdapter: class ApiSearchAdapter {}, +})) + +mock.module('../adapters/bingAdapter.js', () => ({ + BingSearchAdapter: class BingSearchAdapter {}, +})) + +mock.module('../adapters/braveAdapter.js', () => ({ + BraveSearchAdapter: class BraveSearchAdapter {}, +})) + +mock.module('../../../utils/model/providers.js', () => ({ + isFirstPartyAnthropicBaseUrl: () => isFirstPartyBaseUrl, +})) + +const { createAdapter } = await import('../adapters/index') + +const originalWebSearchAdapter = process.env.WEB_SEARCH_ADAPTER + +afterEach(() => { + isFirstPartyBaseUrl = true + + if (originalWebSearchAdapter === undefined) { + delete process.env.WEB_SEARCH_ADAPTER + } else { + process.env.WEB_SEARCH_ADAPTER = originalWebSearchAdapter + } +}) + +describe('createAdapter', () => { + test('reuses the same instance when the selected backend does not change', () => { + process.env.WEB_SEARCH_ADAPTER = 'brave' + + const firstAdapter = createAdapter() + const secondAdapter = createAdapter() + + expect(firstAdapter).toBe(secondAdapter) + expect(firstAdapter.constructor.name).toBe('BraveSearchAdapter') + }) + + test('rebuilds the adapter when WEB_SEARCH_ADAPTER changes', () => { + process.env.WEB_SEARCH_ADAPTER = 'brave' + const braveAdapter = createAdapter() + + process.env.WEB_SEARCH_ADAPTER = 'bing' + const bingAdapter = createAdapter() + + expect(bingAdapter).not.toBe(braveAdapter) + expect(bingAdapter.constructor.name).toBe('BingSearchAdapter') + }) + + test('selects the API adapter for first-party Anthropic URLs', () => { + delete process.env.WEB_SEARCH_ADAPTER + isFirstPartyBaseUrl = true + + expect(createAdapter().constructor.name).toBe('ApiSearchAdapter') + }) + + test('selects the Bing adapter for third-party Anthropic base URLs', () => { + delete process.env.WEB_SEARCH_ADAPTER + isFirstPartyBaseUrl = false + + expect(createAdapter().constructor.name).toBe('BingSearchAdapter') + }) +}) diff --git a/src/tools/WebSearchTool/__tests__/braveAdapter.extract.test.ts b/src/tools/WebSearchTool/__tests__/braveAdapter.extract.test.ts new file mode 100644 index 000000000..f891ce3ca --- /dev/null +++ b/src/tools/WebSearchTool/__tests__/braveAdapter.extract.test.ts @@ -0,0 +1,106 @@ +import { describe, expect, test } from 'bun:test' +import { extractBraveResults } from '../adapters/braveAdapter' + +describe('extractBraveResults', () => { + test('extracts generic grounding results', () => { + const results = extractBraveResults({ + grounding: { + generic: [ + { + title: 'Example Title 1', + url: 'https://example.com/page1', + snippets: ['First result description'], + }, + { + title: 'Example Title 2', + url: 'https://example.com/page2', + snippets: ['Second result description'], + }, + ], + }, + }) + + expect(results).toEqual([ + { + title: 'Example Title 1', + url: 'https://example.com/page1', + snippet: 'First result description', + }, + { + title: 'Example Title 2', + url: 'https://example.com/page2', + snippet: 'Second result description', + }, + ]) + }) + + test('combines generic, poi, and map grounding results', () => { + const results = extractBraveResults({ + grounding: { + generic: [{ title: 'Generic', url: 'https://example.com/generic' }], + poi: { title: 'POI', url: 'https://maps.example.com/poi' }, + map: [{ title: 'Map', url: 'https://maps.example.com/map' }], + }, + }) + + expect(results).toEqual([ + { title: 'Generic', url: 'https://example.com/generic', snippet: undefined }, + { title: 'POI', url: 'https://maps.example.com/poi', snippet: undefined }, + { title: 'Map', url: 'https://maps.example.com/map', snippet: undefined }, + ]) + }) + + test('joins multiple snippets into one summary string', () => { + const results = extractBraveResults({ + grounding: { + generic: [ + { + title: 'Joined Snippets', + url: 'https://example.com/joined', + snippets: ['First snippet.', 'Second snippet.'], + }, + ], + }, + }) + + expect(results[0].snippet).toBe('First snippet. Second snippet.') + }) + + test('skips entries without a title or URL', () => { + const results = extractBraveResults({ + grounding: { + generic: [ + { title: 'Missing URL' }, + { url: 'https://example.com/missing-title' }, + { title: 'Valid', url: 'https://example.com/valid' }, + ], + }, + }) + + expect(results).toEqual([ + { title: 'Valid', url: 'https://example.com/valid', snippet: undefined }, + ]) + }) + + test('deduplicates repeated URLs across grounding buckets', () => { + const results = extractBraveResults({ + grounding: { + generic: [{ title: 'First', url: 'https://example.com/dup' }], + poi: { title: 'Second', url: 'https://example.com/dup' }, + map: [{ title: 'Third', url: 'https://example.com/dup' }], + }, + }) + + expect(results).toEqual([ + { title: 'First', url: 'https://example.com/dup', snippet: undefined }, + ]) + }) + + test('returns empty array when grounding is missing', () => { + expect(extractBraveResults({})).toEqual([]) + }) + + test('returns empty array when grounding arrays are absent', () => { + expect(extractBraveResults({ grounding: {} })).toEqual([]) + }) +}) diff --git a/src/tools/WebSearchTool/__tests__/braveAdapter.integration.ts b/src/tools/WebSearchTool/__tests__/braveAdapter.integration.ts new file mode 100644 index 000000000..f7dc6e653 --- /dev/null +++ b/src/tools/WebSearchTool/__tests__/braveAdapter.integration.ts @@ -0,0 +1,91 @@ +/** + * Integration test for BraveSearchAdapter — hits Brave's LLM context API. + * + * Usage: + * BRAVE_SEARCH_API_KEY=... bun run src/tools/WebSearchTool/__tests__/braveAdapter.integration.ts + * + * Optional env vars: + * BRAVE_QUERY — search query (default: "Claude AI Anthropic") + * BRAVE_API_KEY — fallback key env var + */ + +if (!globalThis.MACRO) { + globalThis.MACRO = { VERSION: '0.0.0-test', BUILD_TIME: '0' } as any +} + +import { BraveSearchAdapter } from '../adapters/braveAdapter' + +const query = process.env.BRAVE_QUERY || 'Claude AI Anthropic' + +async function main() { + if (!process.env.BRAVE_SEARCH_API_KEY && !process.env.BRAVE_API_KEY) { + console.error( + '❌ Missing Brave API key. Set BRAVE_SEARCH_API_KEY or BRAVE_API_KEY.', + ) + process.exit(1) + } + + console.log(`\n🔍 Searching Brave for: "${query}"\n`) + + const adapter = new BraveSearchAdapter() + const startTime = Date.now() + + const results = await adapter.search(query, { + onProgress: p => { + if (p.type === 'query_update') { + console.log(` → Query sent: ${p.query}`) + } + if (p.type === 'search_results_received') { + console.log(` → Received ${p.resultCount} results`) + } + }, + }) + + const elapsed = Date.now() - startTime + console.log(`\n✅ Done in ${elapsed}ms — ${results.length} result(s)\n`) + + if (results.length === 0) { + console.log('⚠️ No results returned. Possible causes:') + console.log(' - Brave returned no grounding data for the query') + console.log(' - Network/firewall issue') + console.log(' - Invalid or rate-limited Brave API key\n') + process.exit(1) + } + + for (const [i, r] of results.entries()) { + console.log(` ${i + 1}. ${r.title}`) + console.log(` ${r.url}`) + if (r.snippet) { + const snippet = r.snippet.replace(/\n/g, ' ') + console.log( + ` ${snippet.slice(0, 150)}${snippet.length > 150 ? '…' : ''}`, + ) + } + console.log() + } + + let passed = true + for (const [i, r] of results.entries()) { + if (!r.title || typeof r.title !== 'string') { + console.error(`❌ Result ${i + 1}: missing or non-string title`, r) + passed = false + } + if (!r.url || !r.url.startsWith('http')) { + console.error(`❌ Result ${i + 1}: missing or non-http url`, r) + passed = false + } + } + + if (passed) { + console.log('✅ All results have valid structure.\n') + } else { + process.exit(1) + } +} + +if (import.meta.main) { + main().catch(e => { + console.error('❌ Fatal error:', e) + process.exit(1) + }) +} diff --git a/src/tools/WebSearchTool/__tests__/braveAdapter.test.ts b/src/tools/WebSearchTool/__tests__/braveAdapter.test.ts new file mode 100644 index 000000000..8158e6dde --- /dev/null +++ b/src/tools/WebSearchTool/__tests__/braveAdapter.test.ts @@ -0,0 +1,273 @@ +import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test' + +const originalBraveSearchApiKey = process.env.BRAVE_SEARCH_API_KEY +const originalBraveApiKey = process.env.BRAVE_API_KEY + +describe('BraveSearchAdapter.search', () => { + const createAdapter = async () => { + const { BraveSearchAdapter } = await import('../adapters/braveAdapter') + return new BraveSearchAdapter() + } + + const SAMPLE_RESPONSE = { + grounding: { + generic: [ + { + title: 'Result One', + url: 'https://example.com/result1', + snippets: ['Snippet one'], + }, + { + title: 'Result Two', + url: 'https://example.com/result2', + snippets: ['Snippet two'], + }, + ], + }, + } + + beforeEach(() => { + process.env.BRAVE_SEARCH_API_KEY = 'test-brave-key' + delete process.env.BRAVE_API_KEY + }) + + afterEach(() => { + mock.restore() + + if (originalBraveSearchApiKey === undefined) { + delete process.env.BRAVE_SEARCH_API_KEY + } else { + process.env.BRAVE_SEARCH_API_KEY = originalBraveSearchApiKey + } + + if (originalBraveApiKey === undefined) { + delete process.env.BRAVE_API_KEY + } else { + process.env.BRAVE_API_KEY = originalBraveApiKey + } + }) + + test('returns parsed results from Brave LLM context payload', async () => { + mock.module('axios', () => ({ + default: { + get: mock(() => Promise.resolve({ data: SAMPLE_RESPONSE })), + isCancel: () => false, + }, + })) + + const adapter = await createAdapter() + const results = await adapter.search('test query', {}) + + expect(results).toHaveLength(2) + expect(results[0]).toEqual({ + title: 'Result One', + url: 'https://example.com/result1', + snippet: 'Snippet one', + }) + expect(results[1].title).toBe('Result Two') + }) + + test('calls onProgress with query_update and search_results_received', async () => { + mock.module('axios', () => ({ + default: { + get: mock(() => Promise.resolve({ data: SAMPLE_RESPONSE })), + isCancel: () => false, + }, + })) + + const progressCalls: any[] = [] + const onProgress = (p: any) => progressCalls.push(p) + + const adapter = await createAdapter() + await adapter.search('test', { onProgress }) + + expect(progressCalls).toHaveLength(2) + expect(progressCalls[0]).toEqual({ + type: 'query_update', + query: 'test', + }) + expect(progressCalls[1]).toEqual({ + type: 'search_results_received', + resultCount: 2, + query: 'test', + }) + }) + + test('filters results by allowedDomains', async () => { + const mixedResponse = { + grounding: { + generic: [ + { title: 'Allowed', url: 'https://allowed.com/a' }, + { title: 'Blocked', url: 'https://blocked.com/b' }, + ], + }, + } + + mock.module('axios', () => ({ + default: { + get: mock(() => Promise.resolve({ data: mixedResponse })), + isCancel: () => false, + }, + })) + + const adapter = await createAdapter() + const results = await adapter.search('test', { + allowedDomains: ['allowed.com'], + }) + + expect(results).toHaveLength(1) + expect(results[0].url).toBe('https://allowed.com/a') + }) + + test('filters results by blockedDomains', async () => { + const mixedResponse = { + grounding: { + generic: [ + { title: 'Good', url: 'https://good.com/a' }, + { title: 'Spam', url: 'https://spam.com/b' }, + ], + }, + } + + mock.module('axios', () => ({ + default: { + get: mock(() => Promise.resolve({ data: mixedResponse })), + isCancel: () => false, + }, + })) + + const adapter = await createAdapter() + const results = await adapter.search('test', { + blockedDomains: ['spam.com'], + }) + + expect(results).toHaveLength(1) + expect(results[0].url).toBe('https://good.com/a') + }) + + test('filters subdomains with allowedDomains', async () => { + const response = { + grounding: { + generic: [ + { title: 'Subdomain', url: 'https://docs.example.com/page' }, + { title: 'Other', url: 'https://other.com/page' }, + ], + }, + } + + mock.module('axios', () => ({ + default: { + get: mock(() => Promise.resolve({ data: response })), + isCancel: () => false, + }, + })) + + const adapter = await createAdapter() + const results = await adapter.search('test', { + allowedDomains: ['example.com'], + }) + + expect(results).toHaveLength(1) + expect(results[0].url).toBe('https://docs.example.com/page') + }) + + test('throws AbortError when signal is already aborted', async () => { + mock.module('axios', () => ({ + default: { + get: mock((_url: string, config: any) => { + if (config?.signal?.aborted) { + const err = new Error('canceled') + ;(err as any).__CANCEL__ = true + return Promise.reject(err) + } + return Promise.resolve({ data: SAMPLE_RESPONSE }) + }), + isCancel: (e: any) => e?.__CANCEL__ === true, + }, + })) + + const adapter = await createAdapter() + const controller = new AbortController() + controller.abort() + + const { AbortError } = await import('../../../utils/errors') + await expect( + adapter.search('test', { signal: controller.signal }), + ).rejects.toThrow(AbortError) + }) + + test('re-throws non-abort axios errors', async () => { + const networkError = new Error('Network error') + mock.module('axios', () => ({ + default: { + get: mock(() => Promise.reject(networkError)), + isCancel: () => false, + }, + })) + + const adapter = await createAdapter() + await expect(adapter.search('test', {})).rejects.toThrow('Network error') + }) + + test('sends the documented HTTPS endpoint with query params and auth header', async () => { + const axiosGet = mock(() => Promise.resolve({ data: SAMPLE_RESPONSE })) + mock.module('axios', () => ({ + default: { + get: axiosGet, + isCancel: () => false, + }, + })) + + const adapter = await createAdapter() + await adapter.search('hello world & special=chars', {}) + + expect(axiosGet.mock.calls).toHaveLength(1) + expect((axiosGet.mock.calls as any[][])[0][0]).toBe( + 'https://api.search.brave.com/res/v1/llm/context', + ) + expect((axiosGet.mock.calls as any[][])[0][1]).toMatchObject({ + params: { q: 'hello world & special=chars' }, + headers: { + Accept: 'application/json', + 'X-Subscription-Token': 'test-brave-key', + }, + }) + }) + + test('accepts BRAVE_API_KEY as a fallback env var', async () => { + delete process.env.BRAVE_SEARCH_API_KEY + process.env.BRAVE_API_KEY = 'fallback-key' + + const axiosGet = mock(() => Promise.resolve({ data: SAMPLE_RESPONSE })) + mock.module('axios', () => ({ + default: { + get: axiosGet, + isCancel: () => false, + }, + })) + + const adapter = await createAdapter() + await adapter.search('test', {}) + + expect((axiosGet.mock.calls as any[][])[0][1].headers).toMatchObject({ + 'X-Subscription-Token': 'fallback-key', + }) + }) + + test('throws when no Brave API key is configured', async () => { + delete process.env.BRAVE_SEARCH_API_KEY + delete process.env.BRAVE_API_KEY + + mock.module('axios', () => ({ + default: { + get: mock(() => Promise.resolve({ data: SAMPLE_RESPONSE })), + isCancel: () => false, + }, + })) + + const adapter = await createAdapter() + await expect(adapter.search('test', {})).rejects.toThrow( + 'BraveSearchAdapter requires BRAVE_SEARCH_API_KEY or BRAVE_API_KEY', + ) + }) +}) diff --git a/src/tools/WebSearchTool/adapters/braveAdapter.ts b/src/tools/WebSearchTool/adapters/braveAdapter.ts new file mode 100644 index 000000000..fbfc6e7da --- /dev/null +++ b/src/tools/WebSearchTool/adapters/braveAdapter.ts @@ -0,0 +1,169 @@ +/** + * Brave-based search adapter — fetches Brave's LLM context API and maps the + * grounding payload into SearchResult objects. + */ + +import axios from 'axios' +import { AbortError } from '../../../utils/errors.js' +import type { SearchResult, SearchOptions, WebSearchAdapter } from './types.js' + +const FETCH_TIMEOUT_MS = 30_000 +const BRAVE_LLM_CONTEXT_URL = 'https://api.search.brave.com/res/v1/llm/context' +const BRAVE_API_KEY_ENV_VARS = ['BRAVE_SEARCH_API_KEY', 'BRAVE_API_KEY'] as const + +interface BraveGroundingResult { + title?: string + url?: string + snippets?: string[] +} + +interface BraveSearchResponse { + grounding?: { + generic?: BraveGroundingResult[] + map?: BraveGroundingResult[] + poi?: BraveGroundingResult | null + } +} + +export class BraveSearchAdapter implements WebSearchAdapter { + async search( + query: string, + options: SearchOptions, + ): Promise { + const { signal, onProgress, allowedDomains, blockedDomains } = options + + if (signal?.aborted) { + throw new AbortError() + } + + onProgress?.({ type: 'query_update', query }) + + const abortController = new AbortController() + if (signal) { + signal.addEventListener('abort', () => abortController.abort(), { + once: true, + }) + } + + let payload: BraveSearchResponse + try { + const response = await axios.get( + BRAVE_LLM_CONTEXT_URL, + { + signal: abortController.signal, + timeout: FETCH_TIMEOUT_MS, + responseType: 'json', + headers: { + Accept: 'application/json', + 'X-Subscription-Token': getBraveApiKey(), + }, + params: { q: query }, + }, + ) + payload = response.data + } catch (e) { + if (axios.isCancel(e) || abortController.signal.aborted) { + throw new AbortError() + } + throw e + } + + if (abortController.signal.aborted) { + throw new AbortError() + } + + const rawResults = extractBraveResults(payload) + const results = rawResults.filter(r => { + try { + const hostname = new URL(r.url).hostname + if ( + allowedDomains?.length && + !allowedDomains.some( + d => hostname === d || hostname.endsWith('.' + d), + ) + ) { + return false + } + if ( + blockedDomains?.length && + blockedDomains.some(d => hostname === d || hostname.endsWith('.' + d)) + ) { + return false + } + } catch { + return false + } + return true + }) + + onProgress?.({ + type: 'search_results_received', + resultCount: results.length, + query, + }) + + return results + } +} + +export function extractBraveResults( + payload: BraveSearchResponse, +): SearchResult[] { + const grounding = payload.grounding + if (!grounding) { + return [] + } + + const entries = [ + ...(Array.isArray(grounding.generic) ? grounding.generic : []), + ...(grounding.poi ? [grounding.poi] : []), + ...(Array.isArray(grounding.map) ? grounding.map : []), + ] + + const seenUrls = new Set() + const results: SearchResult[] = [] + + for (const entry of entries) { + if (!entry?.url || !entry.title || seenUrls.has(entry.url)) { + continue + } + + seenUrls.add(entry.url) + results.push({ + title: entry.title, + url: entry.url, + snippet: normalizeSnippet(entry.snippets), + }) + } + + return results +} + +function normalizeSnippet(snippets: string[] | undefined): string | undefined { + if (!Array.isArray(snippets)) { + return undefined + } + + const normalized = snippets + .map(snippet => snippet.trim()) + .filter(snippet => snippet.length > 0) + + if (normalized.length === 0) { + return undefined + } + + return normalized.join(' ') +} + +function getBraveApiKey(): string { + for (const envVar of BRAVE_API_KEY_ENV_VARS) { + const value = process.env[envVar]?.trim() + if (value) { + return value + } + } + + throw new Error( + 'BraveSearchAdapter requires BRAVE_SEARCH_API_KEY or BRAVE_API_KEY', + ) +} diff --git a/src/tools/WebSearchTool/adapters/index.ts b/src/tools/WebSearchTool/adapters/index.ts index 49bf07ed9..16c5b6c50 100644 --- a/src/tools/WebSearchTool/adapters/index.ts +++ b/src/tools/WebSearchTool/adapters/index.ts @@ -6,36 +6,42 @@ import { isFirstPartyAnthropicBaseUrl } from '../../../utils/model/providers.js' import { ApiSearchAdapter } from './apiAdapter.js' import { BingSearchAdapter } from './bingAdapter.js' +import { BraveSearchAdapter } from './braveAdapter.js' import type { WebSearchAdapter } from './types.js' -export type { SearchResult, SearchOptions, SearchProgress, WebSearchAdapter } from './types.js' +export type { + SearchResult, + SearchOptions, + SearchProgress, + WebSearchAdapter, +} from './types.js' let cachedAdapter: WebSearchAdapter | null = null +let cachedAdapterKey: 'api' | 'bing' | 'brave' | null = null export function createAdapter(): WebSearchAdapter { - // 直接用 bing 适配器,跳过 API 适配器的选择逻辑 - return new BingSearchAdapter() -// // Adapter is stateless — safe to reuse across calls within a session -// if (cachedAdapter) return cachedAdapter + const envAdapter = process.env.WEB_SEARCH_ADAPTER + const adapterKey = + envAdapter === 'api' || envAdapter === 'bing' || envAdapter === 'brave' + ? envAdapter + : isFirstPartyAnthropicBaseUrl() + ? 'api' + : 'bing' -// // Env override: WEB_SEARCH_ADAPTER=api|bing forces specific backend -// const envAdapter = process.env.WEB_SEARCH_ADAPTER -// if (envAdapter === 'api') { -// cachedAdapter = new ApiSearchAdapter() -// return cachedAdapter -// } -// if (envAdapter === 'bing') { -// cachedAdapter = new BingSearchAdapter() -// return cachedAdapter -// } + if (cachedAdapter && cachedAdapterKey === adapterKey) return cachedAdapter -// // Anthropic official URL → API server-side search -// if (isFirstPartyAnthropicBaseUrl()) { -// cachedAdapter = new ApiSearchAdapter() -// return cachedAdapter -// } + if (adapterKey === 'api') { + cachedAdapter = new ApiSearchAdapter() + cachedAdapterKey = 'api' + return cachedAdapter + } + if (adapterKey === 'bing') { + cachedAdapter = new BingSearchAdapter() + cachedAdapterKey = 'bing' + return cachedAdapter + } -// // Third-party proxies / non-Anthropic endpoints → Bing fallback -// cachedAdapter = new BingSearchAdapter() -// return cachedAdapter + cachedAdapter = new BraveSearchAdapter() + cachedAdapterKey = 'brave' + return cachedAdapter } From 451978735092ec1c3e1451eda6ed76569e05e99a Mon Sep 17 00:00:00 2001 From: claude-code-best Date: Sun, 12 Apr 2026 23:12:09 +0800 Subject: [PATCH 05/12] =?UTF-8?q?fix:=20=E4=BF=AE=E6=AD=A3=E9=A1=BA?= =?UTF-8?q?=E5=BA=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/tools/WebSearchTool/adapters/index.ts | 12 ++++++------ src/utils/model/providers.ts | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/tools/WebSearchTool/adapters/index.ts b/src/tools/WebSearchTool/adapters/index.ts index 16c5b6c50..2a42aac42 100644 --- a/src/tools/WebSearchTool/adapters/index.ts +++ b/src/tools/WebSearchTool/adapters/index.ts @@ -35,13 +35,13 @@ export function createAdapter(): WebSearchAdapter { cachedAdapterKey = 'api' return cachedAdapter } - if (adapterKey === 'bing') { - cachedAdapter = new BingSearchAdapter() - cachedAdapterKey = 'bing' - return cachedAdapter + if (adapterKey === 'brave') { + cachedAdapter = new BraveSearchAdapter() + cachedAdapterKey = 'brave' + return cachedAdapter } - cachedAdapter = new BraveSearchAdapter() - cachedAdapterKey = 'brave' + cachedAdapter = new BingSearchAdapter() + cachedAdapterKey = 'bing' return cachedAdapter } diff --git a/src/utils/model/providers.ts b/src/utils/model/providers.ts index 823384f2d..79572d42e 100644 --- a/src/utils/model/providers.ts +++ b/src/utils/model/providers.ts @@ -39,6 +39,7 @@ export function getAPIProviderForStatsig(): AnalyticsMetadata_I_VERIFIED_THIS_IS */ export function isFirstPartyAnthropicBaseUrl(): boolean { const baseUrl = process.env.ANTHROPIC_BASE_URL + // TODO: 这里会有问题, 只配置了 openai 协议的用户, 按理说会为 true 导致问题 if (!baseUrl) { return true } From 1b6bf2c95dae221cfe40d4247930325918f10edc Mon Sep 17 00:00:00 2001 From: claude-code-best Date: Sun, 12 Apr 2026 23:24:12 +0800 Subject: [PATCH 06/12] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E5=AF=B9?= =?UTF-8?q?=E7=A9=B7=E9=AC=BC=E6=A8=A1=E5=BC=8F=E7=9A=84=20auto=20dream=20?= =?UTF-8?q?=E5=92=8C=20session=20memory=20=E8=B6=8A=E8=BF=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/query/stopHooks.ts | 2 +- src/services/SessionMemory/sessionMemory.ts | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/query/stopHooks.ts b/src/query/stopHooks.ts index 10e268dfb..73aa62df6 100644 --- a/src/query/stopHooks.ts +++ b/src/query/stopHooks.ts @@ -159,7 +159,7 @@ export async function* handleStopHooks( toolUseContext.appendSystemMessage as ((msg: import('../types/message.js').SystemMessage) => void) | undefined, ) } - if (!toolUseContext.agentId) { + if (!toolUseContext.agentId && !poorMode) { void executeAutoDream(stopHookContext, toolUseContext.appendSystemMessage) } } diff --git a/src/services/SessionMemory/sessionMemory.ts b/src/services/SessionMemory/sessionMemory.ts index 32e42af8b..7be2da4b6 100644 --- a/src/services/SessionMemory/sessionMemory.ts +++ b/src/services/SessionMemory/sessionMemory.ts @@ -6,6 +6,7 @@ import { writeFile } from 'fs/promises' import memoize from 'lodash-es/memoize.js' +import { feature } from 'bun:bundle' import { getIsRemoteMode } from '../../bootstrap/state.js' import { getSystemPrompt } from '../../constants/prompts.js' import { getSystemContext, getUserContext } from '../../context.js' @@ -280,6 +281,12 @@ const extractSessionMemory = sequential(async function ( return } + // Poor mode: skip to reduce token consumption + if (feature('POOR')) { + const { isPoorModeActive } = await import('../../commands/poor/poorMode.js') + if (isPoorModeActive()) return + } + // Check gate lazily when hook runs (cached, non-blocking) if (!isSessionMemoryGateEnabled()) { // Log gate failure once per session (ant-only) From 96b543444700464eb940764853cfaffb485843bd Mon Sep 17 00:00:00 2001 From: claude-code-best Date: Sun, 12 Apr 2026 23:42:53 +0800 Subject: [PATCH 07/12] =?UTF-8?q?feat:=20=E7=A9=B7=E9=AC=BC=E6=A8=A1?= =?UTF-8?q?=E5=BC=8F=E5=8E=BB=E9=99=A4=20session-summary?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/services/AgentSummary/agentSummary.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/services/AgentSummary/agentSummary.ts b/src/services/AgentSummary/agentSummary.ts index 26a9ece65..9041eba4e 100644 --- a/src/services/AgentSummary/agentSummary.ts +++ b/src/services/AgentSummary/agentSummary.ts @@ -11,6 +11,7 @@ */ import type { TaskContext } from '../../Task.js' +import { isPoorModeActive } from '../../commands/poor/poorMode.js' import { updateAgentSummary } from '../../tasks/LocalAgentTask/LocalAgentTask.js' import { filterIncompleteToolCalls } from '../../tools/AgentTool/runAgent.js' import type { AgentId } from '../../types/ids.js' @@ -60,6 +61,11 @@ export function startAgentSummarization( async function runSummary(): Promise { if (stopped) return + if (isPoorModeActive()) { + logForDebugging('[AgentSummary] Skipping summary — poor mode active') + scheduleNext() + return + } logForDebugging(`[AgentSummary] Timer fired for agent ${agentId}`) From f8a46a24113610a5b1b9164eacddc40978689d19 Mon Sep 17 00:00:00 2001 From: claude-code-best Date: Mon, 13 Apr 2026 09:22:13 +0800 Subject: [PATCH 08/12] =?UTF-8?q?feat:=20=E5=88=9B=E5=BB=BA=20builtin-tool?= =?UTF-8?q?s=20=E5=8C=85=EF=BC=8C=E6=90=AC=E8=BF=90=E6=89=80=E6=9C=89?= =?UTF-8?q?=E5=B7=A5=E5=85=B7=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 将 src/tools/ 下的全部 60 个工具目录迁移至 packages/builtin-tools/src/tools/, 内部导入路径已更新为 src/ alias 模式。 Co-Authored-By: Claude Opus 4.6 --- packages/builtin-tools/package.json | 16 + packages/builtin-tools/src/index.ts | 70 + .../src/tools/AgentTool/AgentTool.tsx | 1836 ++++++++++++ .../builtin-tools/src/tools/AgentTool/UI.tsx | 1138 +++++++ .../AgentTool/__tests__/agentDisplay.test.ts | 136 + .../__tests__/agentToolUtils.test.ts | 253 ++ .../src/tools/AgentTool/agentColorManager.ts | 66 + .../src/tools/AgentTool/agentDisplay.ts | 104 + .../src/tools/AgentTool/agentMemory.ts | 177 ++ .../tools/AgentTool/agentMemorySnapshot.ts | 197 ++ .../src/tools/AgentTool/agentToolUtils.ts | 687 +++++ .../built-in/claudeCodeGuideAgent.ts | 205 ++ .../tools/AgentTool/built-in/exploreAgent.ts | 83 + .../AgentTool/built-in/generalPurposeAgent.ts | 34 + .../src/tools/AgentTool/built-in/planAgent.ts | 92 + .../built-in/src/tools/BashTool/toolName.ts | 2 + .../src/tools/ExitPlanModeTool/constants.ts | 2 + .../src/tools/FileEditTool/constants.ts | 2 + .../built-in/src/tools/FileReadTool/prompt.ts | 2 + .../src/tools/FileWriteTool/prompt.ts | 2 + .../built-in/src/tools/GlobTool/prompt.ts | 2 + .../built-in/src/tools/GrepTool/prompt.ts | 2 + .../src/tools/NotebookEditTool/constants.ts | 2 + .../src/tools/SendMessageTool/constants.ts | 2 + .../built-in/src/tools/WebFetchTool/prompt.ts | 2 + .../src/tools/WebSearchTool/prompt.ts | 2 + .../AgentTool/built-in/src/utils/auth.ts | 2 + .../built-in/src/utils/embeddedTools.ts | 2 + .../built-in/src/utils/settings/settings.ts | 2 + .../AgentTool/built-in/statuslineSetup.ts | 144 + .../AgentTool/built-in/verificationAgent.ts | 152 + .../src/tools/AgentTool/builtInAgents.ts | 72 + .../src/tools/AgentTool/constants.ts | 12 + .../src/tools/AgentTool/forkSubagent.ts | 210 ++ .../src/tools/AgentTool/loadAgentsDir.ts | 755 +++++ .../src/tools/AgentTool/prompt.ts | 287 ++ .../src/tools/AgentTool/resumeAgent.ts | 265 ++ .../src/tools/AgentTool/runAgent.ts | 1000 +++++++ .../src/tools/AgentTool/src/Tool.ts | 4 + .../components/ConfigurableShortcutHint.ts | 2 + .../AgentTool/src/components/CtrlOToExpand.ts | 3 + .../src/components/design-system/Byline.ts | 2 + .../design-system/KeyboardShortcutHint.ts | 2 + .../src/tools/AgentTool/src/types/message.ts | 3 + .../src/tools/AgentTool/src/utils/debug.ts | 2 + .../AgentTool/src/utils/promptCategory.ts | 2 + .../AgentTool/src/utils/settings/constants.ts | 2 + .../AskUserQuestionTool.tsx | 342 +++ .../src/tools/AskUserQuestionTool/prompt.ts | 44 + .../src/bootstrap/state.ts | 3 + .../src/components/MessageResponse.ts | 2 + .../src/constants/figures.ts | 2 + .../src/utils/permissions/PermissionMode.ts | 2 + .../src/tools/BashTool/BashTool.tsx | 1472 +++++++++ .../tools/BashTool/BashToolResultMessage.tsx | 129 + .../builtin-tools/src/tools/BashTool/UI.tsx | 213 ++ .../__tests__/commandSemantics.test.ts | 87 + .../destructiveCommandWarning.test.ts | 112 + .../src/tools/BashTool/bashCommandHelpers.ts | 265 ++ .../src/tools/BashTool/bashPermissions.ts | 2621 +++++++++++++++++ .../src/tools/BashTool/bashSecurity.ts | 2592 ++++++++++++++++ .../src/tools/BashTool/commandSemantics.ts | 140 + .../src/tools/BashTool/commentLabel.ts | 13 + .../BashTool/destructiveCommandWarning.ts | 102 + .../src/tools/BashTool/modeValidation.ts | 115 + .../src/tools/BashTool/pathValidation.ts | 1303 ++++++++ .../src/tools/BashTool/prompt.ts | 369 +++ .../src/tools/BashTool/readOnlyValidation.ts | 1990 +++++++++++++ .../src/tools/BashTool/sedEditParser.ts | 322 ++ .../src/tools/BashTool/sedValidation.ts | 684 +++++ .../src/tools/BashTool/shouldUseSandbox.ts | 153 + .../src/tools/BashTool/src/Tool.ts | 2 + .../src/tools/BashTool/src/bootstrap/state.ts | 2 + .../tools/BashTool/src/hooks/useCanUseTool.ts | 2 + .../src/services/analytics/growthbook.ts | 2 + .../BashTool/src/services/analytics/index.ts | 2 + .../src/tools/BashTool/src/state/AppState.ts | 2 + .../src/tools/BashTool/src/utils/Shell.ts | 2 + .../src/tools/BashTool/src/utils/cwd.ts | 2 + .../src/utils/permissions/filesystem.ts | 2 + .../src/utils/sandbox/sandbox-ui-utils.ts | 2 + .../src/tools/BashTool/toolName.ts | 2 + .../builtin-tools/src/tools/BashTool/utils.ts | 223 ++ .../src/tools/BriefTool/BriefTool.ts | 204 ++ .../builtin-tools/src/tools/BriefTool/UI.tsx | 104 + .../src/tools/BriefTool/attachments.ts | 110 + .../src/tools/BriefTool/prompt.ts | 22 + .../src/tools/BriefTool/upload.ts | 174 ++ .../src/tools/ConfigTool/ConfigTool.ts | 467 +++ .../builtin-tools/src/tools/ConfigTool/UI.tsx | 48 + .../src/tools/ConfigTool/constants.ts | 1 + .../src/tools/ConfigTool/prompt.ts | 93 + .../src/tools/ConfigTool/supportedSettings.ts | 211 ++ .../tools/CtxInspectTool/CtxInspectTool.ts | 80 + .../src/tools/DiscoverSkillsTool/prompt.ts | 3 + .../EnterPlanModeTool/EnterPlanModeTool.ts | 126 + .../src/tools/EnterPlanModeTool/UI.tsx | 41 + .../src/tools/EnterPlanModeTool/constants.ts | 1 + .../src/tools/EnterPlanModeTool/prompt.ts | 170 ++ .../src/constants/figures.ts | 2 + .../src/utils/permissions/PermissionMode.ts | 2 + .../EnterWorktreeTool/EnterWorktreeTool.ts | 127 + .../src/tools/EnterWorktreeTool/UI.tsx | 25 + .../src/tools/EnterWorktreeTool/constants.ts | 1 + .../src/tools/EnterWorktreeTool/prompt.ts | 30 + .../ExitPlanModeTool/ExitPlanModeV2Tool.ts | 493 ++++ .../src/tools/ExitPlanModeTool/UI.tsx | 88 + .../src/tools/ExitPlanModeTool/constants.ts | 2 + .../src/tools/ExitPlanModeTool/prompt.ts | 29 + .../src/components/Markdown.ts | 2 + .../src/components/MessageResponse.ts | 2 + .../RejectedPlanMessage.ts | 2 + .../ExitPlanModeTool/src/constants/figures.ts | 2 + .../src/utils/permissions/PermissionMode.ts | 2 + .../ExitWorktreeTool/ExitWorktreeTool.ts | 329 +++ .../src/tools/ExitWorktreeTool/UI.tsx | 33 + .../src/tools/ExitWorktreeTool/constants.ts | 1 + .../src/tools/ExitWorktreeTool/prompt.ts | 32 + .../src/tools/FileEditTool/FileEditTool.ts | 625 ++++ .../src/tools/FileEditTool/UI.tsx | 323 ++ .../FileEditTool/__tests__/utils.test.ts | 208 ++ .../src/tools/FileEditTool/constants.ts | 11 + .../src/tools/FileEditTool/prompt.ts | 28 + .../FileEditToolUseRejectedMessage.ts | 2 + .../src/components/MessageResponse.ts | 2 + .../src/services/analytics/index.ts | 2 + .../src/tools/FileEditTool/src/utils/log.ts | 2 + .../tools/FileEditTool/src/utils/messages.ts | 2 + .../src/tools/FileEditTool/src/utils/path.ts | 2 + .../FileEditTool/src/utils/stringUtils.ts | 2 + .../src/tools/FileEditTool/types.ts | 85 + .../src/tools/FileEditTool/utils.ts | 775 +++++ .../src/tools/FileReadTool/FileReadTool.ts | 1183 ++++++++ .../src/tools/FileReadTool/UI.tsx | 202 ++ .../src/tools/FileReadTool/imageProcessor.ts | 94 + .../src/tools/FileReadTool/limits.ts | 92 + .../src/tools/FileReadTool/prompt.ts | 49 + .../src/services/analytics/growthbook.ts | 2 + .../src/tools/FileReadTool/src/utils/file.ts | 2 + .../tools/FileReadTool/src/utils/messages.ts | 2 + .../src/tools/FileWriteTool/FileWriteTool.ts | 434 +++ .../src/tools/FileWriteTool/UI.tsx | 336 +++ .../src/tools/FileWriteTool/prompt.ts | 18 + .../src/components/MessageResponse.ts | 2 + .../src/services/analytics/index.ts | 2 + .../tools/FileWriteTool/src/utils/messages.ts | 2 + .../src/tools/GlobTool/GlobTool.ts | 198 ++ .../builtin-tools/src/tools/GlobTool/UI.tsx | 65 + .../src/tools/GlobTool/prompt.ts | 7 + .../src/components/MessageResponse.ts | 2 + .../src/tools/GlobTool/src/utils/messages.ts | 2 + .../src/tools/GrepTool/GrepTool.ts | 577 ++++ .../builtin-tools/src/tools/GrepTool/UI.tsx | 190 ++ .../src/tools/GrepTool/prompt.ts | 18 + .../src/tools/LSPTool/LSPTool.ts | 860 ++++++ .../builtin-tools/src/tools/LSPTool/UI.tsx | 203 ++ .../LSPTool/__tests__/formatters.test.ts | 197 ++ .../tools/LSPTool/__tests__/schemas.test.ts | 37 + .../src/tools/LSPTool/formatters.ts | 592 ++++ .../builtin-tools/src/tools/LSPTool/prompt.ts | 21 + .../src/tools/LSPTool/schemas.ts | 215 ++ .../src/tools/LSPTool/symbolContext.ts | 90 + .../ListMcpResourcesTool.ts | 123 + .../src/tools/ListMcpResourcesTool/UI.tsx | 35 + .../src/tools/ListMcpResourcesTool/prompt.ts | 20 + .../src/tools/ListPeersTool/ListPeersTool.ts | 107 + .../src/tools/MCPTool/MCPTool.ts | 77 + .../builtin-tools/src/tools/MCPTool/UI.tsx | 395 +++ .../__tests__/classifyForCollapse.test.ts | 146 + .../src/tools/MCPTool/classifyForCollapse.ts | 604 ++++ .../builtin-tools/src/tools/MCPTool/prompt.ts | 3 + .../src/tools/McpAuthTool/McpAuthTool.ts | 215 ++ .../src/tools/MonitorTool/MonitorTool.tsx | 190 ++ .../NotebookEditTool/NotebookEditTool.ts | 490 +++ .../src/tools/NotebookEditTool/UI.tsx | 125 + .../src/tools/NotebookEditTool/constants.ts | 2 + .../src/tools/NotebookEditTool/prompt.ts | 3 + .../NotebookEditTool/src/types/message.ts | 3 + .../NotebookEditTool/src/utils/fileHistory.ts | 3 + .../NotebookEditTool/src/utils/messages.ts | 2 + .../tools/NotebookEditTool/src/utils/theme.ts | 2 + .../OverflowTestTool/OverflowTestTool.ts | 3 + .../tools/PowerShellTool/PowerShellTool.tsx | 1267 ++++++++ .../src/tools/PowerShellTool/UI.tsx | 181 ++ .../__tests__/commandSemantics.test.ts | 147 + .../destructiveCommandWarning.test.ts | 208 ++ .../__tests__/gitSafety.test.ts | 130 + .../__tests__/powershellSecurity.test.ts | 294 ++ .../src/tools/PowerShellTool/clmTypes.ts | 211 ++ .../tools/PowerShellTool/commandSemantics.ts | 142 + .../tools/PowerShellTool/commonParameters.ts | 30 + .../destructiveCommandWarning.ts | 109 + .../src/tools/PowerShellTool/gitSafety.ts | 176 ++ .../tools/PowerShellTool/modeValidation.ts | 404 +++ .../tools/PowerShellTool/pathValidation.ts | 2049 +++++++++++++ .../PowerShellTool/powershellPermissions.ts | 1648 +++++++++++ .../PowerShellTool/powershellSecurity.ts | 1090 +++++++ .../src/tools/PowerShellTool/prompt.ts | 145 + .../PowerShellTool/readOnlyValidation.ts | 1823 ++++++++++++ .../PowerShellTool/src/hooks/useCanUseTool.ts | 2 + .../PowerShellTool/src/state/AppState.ts | 2 + .../src/tools/PowerShellTool/toolName.ts | 2 + .../PushNotificationTool.ts | 87 + .../src/tools/REPLTool/REPLTool.ts | 89 + .../src/tools/REPLTool/constants.ts | 46 + .../src/tools/REPLTool/primitiveTools.ts | 39 + .../ReadMcpResourceTool.ts | 158 + .../src/tools/ReadMcpResourceTool/UI.tsx | 44 + .../src/tools/ReadMcpResourceTool/prompt.ts | 16 + .../RemoteTriggerTool/RemoteTriggerTool.ts | 161 + .../src/tools/RemoteTriggerTool/UI.tsx | 20 + .../src/tools/RemoteTriggerTool/prompt.ts | 15 + .../ReviewArtifactTool/ReviewArtifactTool.ts | 142 + .../tools/ScheduleCronTool/CronCreateTool.ts | 157 + .../tools/ScheduleCronTool/CronDeleteTool.ts | 95 + .../tools/ScheduleCronTool/CronListTool.ts | 97 + .../src/tools/ScheduleCronTool/UI.tsx | 75 + .../src/tools/ScheduleCronTool/prompt.ts | 128 + .../tools/SendMessageTool/SendMessageTool.ts | 970 ++++++ .../src/tools/SendMessageTool/UI.tsx | 40 + .../src/tools/SendMessageTool/constants.ts | 1 + .../src/tools/SendMessageTool/prompt.ts | 49 + .../SendUserFileTool/SendUserFileTool.ts | 84 + .../src/tools/SendUserFileTool/prompt.ts | 1 + .../src/tools/SkillTool/SkillTool.ts | 1109 +++++++ .../builtin-tools/src/tools/SkillTool/UI.tsx | 181 ++ .../src/tools/SkillTool/constants.ts | 1 + .../src/tools/SkillTool/prompt.ts | 241 ++ .../src/tools/SkillTool/src/Tool.ts | 8 + .../tools/SkillTool/src/bootstrap/state.ts | 2 + .../src/tools/SkillTool/src/commands.ts | 9 + .../SkillTool/src/components/CtrlOToExpand.ts | 2 + .../components/FallbackToolUseErrorMessage.ts | 2 + .../FallbackToolUseRejectedMessage.ts | 2 + .../src/tools/SkillTool/src/types/command.ts | 2 + .../src/tools/SkillTool/src/types/message.ts | 6 + .../src/tools/SkillTool/src/utils/debug.ts | 2 + .../src/utils/permissions/PermissionResult.ts | 2 + .../src/utils/permissions/permissions.ts | 2 + .../src/utils/plugins/pluginIdentifier.ts | 3 + .../src/utils/telemetry/pluginTelemetry.ts | 2 + .../src/tools/SleepTool/SleepTool.ts | 134 + .../src/tools/SleepTool/prompt.ts | 17 + .../src/tools/SnipTool/SnipTool.ts | 92 + .../src/tools/SnipTool/prompt.ts | 1 + .../tools/SubscribePRTool/SubscribePRTool.ts | 88 + .../SuggestBackgroundPRTool.ts | 84 + .../SyntheticOutputTool.ts | 163 + .../tools/TaskCreateTool/TaskCreateTool.ts | 138 + .../src/tools/TaskCreateTool/constants.ts | 1 + .../src/tools/TaskCreateTool/prompt.ts | 56 + .../src/tools/TaskGetTool/TaskGetTool.ts | 128 + .../src/tools/TaskGetTool/constants.ts | 1 + .../src/tools/TaskGetTool/prompt.ts | 24 + .../src/tools/TaskListTool/TaskListTool.ts | 116 + .../src/tools/TaskListTool/constants.ts | 1 + .../src/tools/TaskListTool/prompt.ts | 49 + .../tools/TaskOutputTool/TaskOutputTool.tsx | 564 ++++ .../src/tools/TaskOutputTool/constants.ts | 1 + .../src/tools/TaskStopTool/TaskStopTool.ts | 131 + .../src/tools/TaskStopTool/UI.tsx | 50 + .../src/tools/TaskStopTool/prompt.ts | 8 + .../tools/TaskUpdateTool/TaskUpdateTool.ts | 406 +++ .../src/tools/TaskUpdateTool/constants.ts | 1 + .../src/tools/TaskUpdateTool/prompt.ts | 77 + .../tools/TeamCreateTool/TeamCreateTool.ts | 240 ++ .../src/tools/TeamCreateTool/UI.tsx | 6 + .../src/tools/TeamCreateTool/constants.ts | 1 + .../src/tools/TeamCreateTool/prompt.ts | 113 + .../tools/TeamDeleteTool/TeamDeleteTool.ts | 139 + .../src/tools/TeamDeleteTool/UI.tsx | 25 + .../src/tools/TeamDeleteTool/constants.ts | 1 + .../src/tools/TeamDeleteTool/prompt.ts | 16 + .../TerminalCaptureTool.ts | 82 + .../src/tools/TerminalCaptureTool/prompt.ts | 1 + .../src/tools/TodoWriteTool/TodoWriteTool.ts | 115 + .../src/tools/TodoWriteTool/constants.ts | 1 + .../src/tools/TodoWriteTool/prompt.ts | 184 ++ .../tools/ToolSearchTool/ToolSearchTool.ts | 471 +++ .../src/tools/ToolSearchTool/constants.ts | 1 + .../src/tools/ToolSearchTool/prompt.ts | 121 + .../tools/TungstenTool/TungstenLiveMonitor.ts | 2 + .../src/tools/TungstenTool/TungstenTool.js | 7 + .../src/tools/TungstenTool/TungstenTool.ts | 6 + .../VerifyPlanExecutionTool.ts | 93 + .../VerifyPlanExecutionTool/constants.ts | 1 + .../tools/WebBrowserTool/WebBrowserPanel.ts | 3 + .../tools/WebBrowserTool/WebBrowserTool.ts | 97 + .../src/tools/WebFetchTool/UI.tsx | 67 + .../src/tools/WebFetchTool/WebFetchTool.ts | 318 ++ .../__tests__/preapproved.test.ts | 78 + .../__tests__/urlValidation.test.ts | 149 + .../src/tools/WebFetchTool/preapproved.ts | 166 ++ .../src/tools/WebFetchTool/prompt.ts | 46 + .../src/tools/WebFetchTool/utils.ts | 530 ++++ .../src/tools/WebSearchTool/UI.tsx | 127 + .../src/tools/WebSearchTool/WebSearchTool.ts | 221 ++ .../__tests__/adapterFactory.test.ts | 59 + .../__tests__/bingAdapter.integration.ts | 82 + .../__tests__/bingAdapter.test.ts | 499 ++++ .../__tests__/braveAdapter.extract.test.ts | 106 + .../__tests__/braveAdapter.integration.ts | 91 + .../__tests__/braveAdapter.test.ts | 273 ++ .../WebSearchTool/adapters/apiAdapter.ts | 173 ++ .../WebSearchTool/adapters/bingAdapter.ts | 204 ++ .../WebSearchTool/adapters/braveAdapter.ts | 169 ++ .../src/tools/WebSearchTool/adapters/index.ts | 47 + .../src/tools/WebSearchTool/adapters/types.ts | 22 + .../src/tools/WebSearchTool/prompt.ts | 34 + .../WebSearchTool/src/constants/common.ts | 2 + .../src/utils/model/providers.ts | 2 + .../src/utils/permissions/PermissionResult.ts | 2 + .../WorkflowPermissionRequest.tsx | 166 ++ .../src/tools/WorkflowTool/WorkflowTool.ts | 74 + .../src/tools/WorkflowTool/bundled/index.ts | 15 + .../src/tools/WorkflowTool/constants.ts | 3 + .../WorkflowTool/createWorkflowCommand.ts | 41 + .../__tests__/gitOperationTracking.test.ts | 195 ++ .../src/tools/shared/gitOperationTracking.ts | 277 ++ .../src/tools/shared/spawnMultiAgent.ts | 1093 +++++++ .../src/tools/src/types/message.ts | 5 + .../tools/testing/TestingPermissionTool.tsx | 76 + packages/builtin-tools/src/tools/utils.ts | 40 + 323 files changed, 58392 insertions(+) create mode 100644 packages/builtin-tools/package.json create mode 100644 packages/builtin-tools/src/index.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/AgentTool.tsx create mode 100644 packages/builtin-tools/src/tools/AgentTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/AgentTool/__tests__/agentDisplay.test.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/__tests__/agentToolUtils.test.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/agentColorManager.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/agentDisplay.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/agentMemory.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/agentMemorySnapshot.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/agentToolUtils.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/claudeCodeGuideAgent.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/exploreAgent.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/generalPurposeAgent.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/planAgent.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/BashTool/toolName.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/ExitPlanModeTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/FileEditTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/FileReadTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/FileWriteTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/GlobTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/GrepTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/NotebookEditTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/SendMessageTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/WebFetchTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/WebSearchTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/src/utils/auth.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/src/utils/embeddedTools.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/src/utils/settings/settings.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/statuslineSetup.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/built-in/verificationAgent.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/builtInAgents.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/forkSubagent.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/loadAgentsDir.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/resumeAgent.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/runAgent.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/src/Tool.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/src/components/ConfigurableShortcutHint.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/src/components/CtrlOToExpand.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/src/components/design-system/Byline.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/src/components/design-system/KeyboardShortcutHint.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/src/types/message.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/src/utils/debug.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/src/utils/promptCategory.ts create mode 100644 packages/builtin-tools/src/tools/AgentTool/src/utils/settings/constants.ts create mode 100644 packages/builtin-tools/src/tools/AskUserQuestionTool/AskUserQuestionTool.tsx create mode 100644 packages/builtin-tools/src/tools/AskUserQuestionTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/AskUserQuestionTool/src/bootstrap/state.ts create mode 100644 packages/builtin-tools/src/tools/AskUserQuestionTool/src/components/MessageResponse.ts create mode 100644 packages/builtin-tools/src/tools/AskUserQuestionTool/src/constants/figures.ts create mode 100644 packages/builtin-tools/src/tools/AskUserQuestionTool/src/utils/permissions/PermissionMode.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/BashTool.tsx create mode 100644 packages/builtin-tools/src/tools/BashTool/BashToolResultMessage.tsx create mode 100644 packages/builtin-tools/src/tools/BashTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/BashTool/__tests__/commandSemantics.test.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/__tests__/destructiveCommandWarning.test.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/bashCommandHelpers.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/bashPermissions.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/bashSecurity.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/commandSemantics.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/commentLabel.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/destructiveCommandWarning.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/modeValidation.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/pathValidation.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/readOnlyValidation.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/sedEditParser.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/sedValidation.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/shouldUseSandbox.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/src/Tool.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/src/bootstrap/state.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/src/hooks/useCanUseTool.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/src/services/analytics/growthbook.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/src/services/analytics/index.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/src/state/AppState.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/src/utils/Shell.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/src/utils/cwd.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/src/utils/permissions/filesystem.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/src/utils/sandbox/sandbox-ui-utils.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/toolName.ts create mode 100644 packages/builtin-tools/src/tools/BashTool/utils.ts create mode 100644 packages/builtin-tools/src/tools/BriefTool/BriefTool.ts create mode 100644 packages/builtin-tools/src/tools/BriefTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/BriefTool/attachments.ts create mode 100644 packages/builtin-tools/src/tools/BriefTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/BriefTool/upload.ts create mode 100644 packages/builtin-tools/src/tools/ConfigTool/ConfigTool.ts create mode 100644 packages/builtin-tools/src/tools/ConfigTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/ConfigTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/ConfigTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/ConfigTool/supportedSettings.ts create mode 100644 packages/builtin-tools/src/tools/CtxInspectTool/CtxInspectTool.ts create mode 100644 packages/builtin-tools/src/tools/DiscoverSkillsTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/EnterPlanModeTool/EnterPlanModeTool.ts create mode 100644 packages/builtin-tools/src/tools/EnterPlanModeTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/EnterPlanModeTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/EnterPlanModeTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/EnterPlanModeTool/src/constants/figures.ts create mode 100644 packages/builtin-tools/src/tools/EnterPlanModeTool/src/utils/permissions/PermissionMode.ts create mode 100644 packages/builtin-tools/src/tools/EnterWorktreeTool/EnterWorktreeTool.ts create mode 100644 packages/builtin-tools/src/tools/EnterWorktreeTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/EnterWorktreeTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/EnterWorktreeTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/ExitPlanModeTool/ExitPlanModeV2Tool.ts create mode 100644 packages/builtin-tools/src/tools/ExitPlanModeTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/ExitPlanModeTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/ExitPlanModeTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/ExitPlanModeTool/src/components/Markdown.ts create mode 100644 packages/builtin-tools/src/tools/ExitPlanModeTool/src/components/MessageResponse.ts create mode 100644 packages/builtin-tools/src/tools/ExitPlanModeTool/src/components/messages/UserToolResultMessage/RejectedPlanMessage.ts create mode 100644 packages/builtin-tools/src/tools/ExitPlanModeTool/src/constants/figures.ts create mode 100644 packages/builtin-tools/src/tools/ExitPlanModeTool/src/utils/permissions/PermissionMode.ts create mode 100644 packages/builtin-tools/src/tools/ExitWorktreeTool/ExitWorktreeTool.ts create mode 100644 packages/builtin-tools/src/tools/ExitWorktreeTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/ExitWorktreeTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/ExitWorktreeTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/FileEditTool/FileEditTool.ts create mode 100644 packages/builtin-tools/src/tools/FileEditTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/FileEditTool/__tests__/utils.test.ts create mode 100644 packages/builtin-tools/src/tools/FileEditTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/FileEditTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/FileEditTool/src/components/FileEditToolUseRejectedMessage.ts create mode 100644 packages/builtin-tools/src/tools/FileEditTool/src/components/MessageResponse.ts create mode 100644 packages/builtin-tools/src/tools/FileEditTool/src/services/analytics/index.ts create mode 100644 packages/builtin-tools/src/tools/FileEditTool/src/utils/log.ts create mode 100644 packages/builtin-tools/src/tools/FileEditTool/src/utils/messages.ts create mode 100644 packages/builtin-tools/src/tools/FileEditTool/src/utils/path.ts create mode 100644 packages/builtin-tools/src/tools/FileEditTool/src/utils/stringUtils.ts create mode 100644 packages/builtin-tools/src/tools/FileEditTool/types.ts create mode 100644 packages/builtin-tools/src/tools/FileEditTool/utils.ts create mode 100644 packages/builtin-tools/src/tools/FileReadTool/FileReadTool.ts create mode 100644 packages/builtin-tools/src/tools/FileReadTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/FileReadTool/imageProcessor.ts create mode 100644 packages/builtin-tools/src/tools/FileReadTool/limits.ts create mode 100644 packages/builtin-tools/src/tools/FileReadTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/FileReadTool/src/services/analytics/growthbook.ts create mode 100644 packages/builtin-tools/src/tools/FileReadTool/src/utils/file.ts create mode 100644 packages/builtin-tools/src/tools/FileReadTool/src/utils/messages.ts create mode 100644 packages/builtin-tools/src/tools/FileWriteTool/FileWriteTool.ts create mode 100644 packages/builtin-tools/src/tools/FileWriteTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/FileWriteTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/FileWriteTool/src/components/MessageResponse.ts create mode 100644 packages/builtin-tools/src/tools/FileWriteTool/src/services/analytics/index.ts create mode 100644 packages/builtin-tools/src/tools/FileWriteTool/src/utils/messages.ts create mode 100644 packages/builtin-tools/src/tools/GlobTool/GlobTool.ts create mode 100644 packages/builtin-tools/src/tools/GlobTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/GlobTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/GlobTool/src/components/MessageResponse.ts create mode 100644 packages/builtin-tools/src/tools/GlobTool/src/utils/messages.ts create mode 100644 packages/builtin-tools/src/tools/GrepTool/GrepTool.ts create mode 100644 packages/builtin-tools/src/tools/GrepTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/GrepTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/LSPTool/LSPTool.ts create mode 100644 packages/builtin-tools/src/tools/LSPTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/LSPTool/__tests__/formatters.test.ts create mode 100644 packages/builtin-tools/src/tools/LSPTool/__tests__/schemas.test.ts create mode 100644 packages/builtin-tools/src/tools/LSPTool/formatters.ts create mode 100644 packages/builtin-tools/src/tools/LSPTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/LSPTool/schemas.ts create mode 100644 packages/builtin-tools/src/tools/LSPTool/symbolContext.ts create mode 100644 packages/builtin-tools/src/tools/ListMcpResourcesTool/ListMcpResourcesTool.ts create mode 100644 packages/builtin-tools/src/tools/ListMcpResourcesTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/ListMcpResourcesTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/ListPeersTool/ListPeersTool.ts create mode 100644 packages/builtin-tools/src/tools/MCPTool/MCPTool.ts create mode 100644 packages/builtin-tools/src/tools/MCPTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/MCPTool/__tests__/classifyForCollapse.test.ts create mode 100644 packages/builtin-tools/src/tools/MCPTool/classifyForCollapse.ts create mode 100644 packages/builtin-tools/src/tools/MCPTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/McpAuthTool/McpAuthTool.ts create mode 100644 packages/builtin-tools/src/tools/MonitorTool/MonitorTool.tsx create mode 100644 packages/builtin-tools/src/tools/NotebookEditTool/NotebookEditTool.ts create mode 100644 packages/builtin-tools/src/tools/NotebookEditTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/NotebookEditTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/NotebookEditTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/NotebookEditTool/src/types/message.ts create mode 100644 packages/builtin-tools/src/tools/NotebookEditTool/src/utils/fileHistory.ts create mode 100644 packages/builtin-tools/src/tools/NotebookEditTool/src/utils/messages.ts create mode 100644 packages/builtin-tools/src/tools/NotebookEditTool/src/utils/theme.ts create mode 100644 packages/builtin-tools/src/tools/OverflowTestTool/OverflowTestTool.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/PowerShellTool.tsx create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/__tests__/commandSemantics.test.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/__tests__/destructiveCommandWarning.test.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/__tests__/gitSafety.test.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/__tests__/powershellSecurity.test.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/clmTypes.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/commandSemantics.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/commonParameters.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/destructiveCommandWarning.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/gitSafety.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/modeValidation.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/pathValidation.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/powershellPermissions.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/powershellSecurity.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/readOnlyValidation.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/src/hooks/useCanUseTool.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/src/state/AppState.ts create mode 100644 packages/builtin-tools/src/tools/PowerShellTool/toolName.ts create mode 100644 packages/builtin-tools/src/tools/PushNotificationTool/PushNotificationTool.ts create mode 100644 packages/builtin-tools/src/tools/REPLTool/REPLTool.ts create mode 100644 packages/builtin-tools/src/tools/REPLTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/REPLTool/primitiveTools.ts create mode 100644 packages/builtin-tools/src/tools/ReadMcpResourceTool/ReadMcpResourceTool.ts create mode 100644 packages/builtin-tools/src/tools/ReadMcpResourceTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/ReadMcpResourceTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/RemoteTriggerTool/RemoteTriggerTool.ts create mode 100644 packages/builtin-tools/src/tools/RemoteTriggerTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/RemoteTriggerTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/ReviewArtifactTool/ReviewArtifactTool.ts create mode 100644 packages/builtin-tools/src/tools/ScheduleCronTool/CronCreateTool.ts create mode 100644 packages/builtin-tools/src/tools/ScheduleCronTool/CronDeleteTool.ts create mode 100644 packages/builtin-tools/src/tools/ScheduleCronTool/CronListTool.ts create mode 100644 packages/builtin-tools/src/tools/ScheduleCronTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/ScheduleCronTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/SendMessageTool/SendMessageTool.ts create mode 100644 packages/builtin-tools/src/tools/SendMessageTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/SendMessageTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/SendMessageTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/SendUserFileTool/SendUserFileTool.ts create mode 100644 packages/builtin-tools/src/tools/SendUserFileTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/SkillTool/SkillTool.ts create mode 100644 packages/builtin-tools/src/tools/SkillTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/SkillTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/SkillTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/SkillTool/src/Tool.ts create mode 100644 packages/builtin-tools/src/tools/SkillTool/src/bootstrap/state.ts create mode 100644 packages/builtin-tools/src/tools/SkillTool/src/commands.ts create mode 100644 packages/builtin-tools/src/tools/SkillTool/src/components/CtrlOToExpand.ts create mode 100644 packages/builtin-tools/src/tools/SkillTool/src/components/FallbackToolUseErrorMessage.ts create mode 100644 packages/builtin-tools/src/tools/SkillTool/src/components/FallbackToolUseRejectedMessage.ts create mode 100644 packages/builtin-tools/src/tools/SkillTool/src/types/command.ts create mode 100644 packages/builtin-tools/src/tools/SkillTool/src/types/message.ts create mode 100644 packages/builtin-tools/src/tools/SkillTool/src/utils/debug.ts create mode 100644 packages/builtin-tools/src/tools/SkillTool/src/utils/permissions/PermissionResult.ts create mode 100644 packages/builtin-tools/src/tools/SkillTool/src/utils/permissions/permissions.ts create mode 100644 packages/builtin-tools/src/tools/SkillTool/src/utils/plugins/pluginIdentifier.ts create mode 100644 packages/builtin-tools/src/tools/SkillTool/src/utils/telemetry/pluginTelemetry.ts create mode 100644 packages/builtin-tools/src/tools/SleepTool/SleepTool.ts create mode 100644 packages/builtin-tools/src/tools/SleepTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/SnipTool/SnipTool.ts create mode 100644 packages/builtin-tools/src/tools/SnipTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/SubscribePRTool/SubscribePRTool.ts create mode 100644 packages/builtin-tools/src/tools/SuggestBackgroundPRTool/SuggestBackgroundPRTool.ts create mode 100644 packages/builtin-tools/src/tools/SyntheticOutputTool/SyntheticOutputTool.ts create mode 100644 packages/builtin-tools/src/tools/TaskCreateTool/TaskCreateTool.ts create mode 100644 packages/builtin-tools/src/tools/TaskCreateTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/TaskCreateTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/TaskGetTool/TaskGetTool.ts create mode 100644 packages/builtin-tools/src/tools/TaskGetTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/TaskGetTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/TaskListTool/TaskListTool.ts create mode 100644 packages/builtin-tools/src/tools/TaskListTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/TaskListTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/TaskOutputTool/TaskOutputTool.tsx create mode 100644 packages/builtin-tools/src/tools/TaskOutputTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/TaskStopTool/TaskStopTool.ts create mode 100644 packages/builtin-tools/src/tools/TaskStopTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/TaskStopTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/TaskUpdateTool/TaskUpdateTool.ts create mode 100644 packages/builtin-tools/src/tools/TaskUpdateTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/TaskUpdateTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/TeamCreateTool/TeamCreateTool.ts create mode 100644 packages/builtin-tools/src/tools/TeamCreateTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/TeamCreateTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/TeamCreateTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/TeamDeleteTool/TeamDeleteTool.ts create mode 100644 packages/builtin-tools/src/tools/TeamDeleteTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/TeamDeleteTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/TeamDeleteTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/TerminalCaptureTool/TerminalCaptureTool.ts create mode 100644 packages/builtin-tools/src/tools/TerminalCaptureTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/TodoWriteTool/TodoWriteTool.ts create mode 100644 packages/builtin-tools/src/tools/TodoWriteTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/TodoWriteTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/ToolSearchTool/ToolSearchTool.ts create mode 100644 packages/builtin-tools/src/tools/ToolSearchTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/ToolSearchTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/TungstenTool/TungstenLiveMonitor.ts create mode 100644 packages/builtin-tools/src/tools/TungstenTool/TungstenTool.js create mode 100644 packages/builtin-tools/src/tools/TungstenTool/TungstenTool.ts create mode 100644 packages/builtin-tools/src/tools/VerifyPlanExecutionTool/VerifyPlanExecutionTool.ts create mode 100644 packages/builtin-tools/src/tools/VerifyPlanExecutionTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/WebBrowserTool/WebBrowserPanel.ts create mode 100644 packages/builtin-tools/src/tools/WebBrowserTool/WebBrowserTool.ts create mode 100644 packages/builtin-tools/src/tools/WebFetchTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/WebFetchTool/WebFetchTool.ts create mode 100644 packages/builtin-tools/src/tools/WebFetchTool/__tests__/preapproved.test.ts create mode 100644 packages/builtin-tools/src/tools/WebFetchTool/__tests__/urlValidation.test.ts create mode 100644 packages/builtin-tools/src/tools/WebFetchTool/preapproved.ts create mode 100644 packages/builtin-tools/src/tools/WebFetchTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/WebFetchTool/utils.ts create mode 100644 packages/builtin-tools/src/tools/WebSearchTool/UI.tsx create mode 100644 packages/builtin-tools/src/tools/WebSearchTool/WebSearchTool.ts create mode 100644 packages/builtin-tools/src/tools/WebSearchTool/__tests__/adapterFactory.test.ts create mode 100644 packages/builtin-tools/src/tools/WebSearchTool/__tests__/bingAdapter.integration.ts create mode 100644 packages/builtin-tools/src/tools/WebSearchTool/__tests__/bingAdapter.test.ts create mode 100644 packages/builtin-tools/src/tools/WebSearchTool/__tests__/braveAdapter.extract.test.ts create mode 100644 packages/builtin-tools/src/tools/WebSearchTool/__tests__/braveAdapter.integration.ts create mode 100644 packages/builtin-tools/src/tools/WebSearchTool/__tests__/braveAdapter.test.ts create mode 100644 packages/builtin-tools/src/tools/WebSearchTool/adapters/apiAdapter.ts create mode 100644 packages/builtin-tools/src/tools/WebSearchTool/adapters/bingAdapter.ts create mode 100644 packages/builtin-tools/src/tools/WebSearchTool/adapters/braveAdapter.ts create mode 100644 packages/builtin-tools/src/tools/WebSearchTool/adapters/index.ts create mode 100644 packages/builtin-tools/src/tools/WebSearchTool/adapters/types.ts create mode 100644 packages/builtin-tools/src/tools/WebSearchTool/prompt.ts create mode 100644 packages/builtin-tools/src/tools/WebSearchTool/src/constants/common.ts create mode 100644 packages/builtin-tools/src/tools/WebSearchTool/src/utils/model/providers.ts create mode 100644 packages/builtin-tools/src/tools/WebSearchTool/src/utils/permissions/PermissionResult.ts create mode 100644 packages/builtin-tools/src/tools/WorkflowTool/WorkflowPermissionRequest.tsx create mode 100644 packages/builtin-tools/src/tools/WorkflowTool/WorkflowTool.ts create mode 100644 packages/builtin-tools/src/tools/WorkflowTool/bundled/index.ts create mode 100644 packages/builtin-tools/src/tools/WorkflowTool/constants.ts create mode 100644 packages/builtin-tools/src/tools/WorkflowTool/createWorkflowCommand.ts create mode 100644 packages/builtin-tools/src/tools/shared/__tests__/gitOperationTracking.test.ts create mode 100644 packages/builtin-tools/src/tools/shared/gitOperationTracking.ts create mode 100644 packages/builtin-tools/src/tools/shared/spawnMultiAgent.ts create mode 100644 packages/builtin-tools/src/tools/src/types/message.ts create mode 100644 packages/builtin-tools/src/tools/testing/TestingPermissionTool.tsx create mode 100644 packages/builtin-tools/src/tools/utils.ts diff --git a/packages/builtin-tools/package.json b/packages/builtin-tools/package.json new file mode 100644 index 000000000..770526ef5 --- /dev/null +++ b/packages/builtin-tools/package.json @@ -0,0 +1,16 @@ +{ + "name": "builtin-tools", + "version": "1.0.0", + "private": true, + "type": "module", + "main": "./src/index.ts", + "types": "./src/index.ts", + "exports": { + ".": "./src/index.ts", + "./tools/*": "./src/tools/*", + "./utils": "./src/utils.ts" + }, + "dependencies": { + "agent-tools": "workspace:*" + } +} diff --git a/packages/builtin-tools/src/index.ts b/packages/builtin-tools/src/index.ts new file mode 100644 index 000000000..8609978c6 --- /dev/null +++ b/packages/builtin-tools/src/index.ts @@ -0,0 +1,70 @@ +// builtin-tools — All tool implementations for Claude Code +// This barrel file re-exports the main tool constants and utilities. +// For specific submodules, use deep imports: 'builtin-tools/tools/XTool/XTool.js' + +// ============================================================================= +// Main tool exports (used by src/tools.ts) +// ============================================================================= + +// Core tools +export { AgentTool } from './tools/AgentTool/AgentTool.js' +export { AskUserQuestionTool } from './tools/AskUserQuestionTool/AskUserQuestionTool.js' +export { BashTool } from './tools/BashTool/BashTool.js' +export { BriefTool } from './tools/BriefTool/BriefTool.js' +export { ConfigTool } from './tools/ConfigTool/ConfigTool.js' +export { EnterPlanModeTool } from './tools/EnterPlanModeTool/EnterPlanModeTool.js' +export { EnterWorktreeTool } from './tools/EnterWorktreeTool/EnterWorktreeTool.js' +export { ExitPlanModeV2Tool } from './tools/ExitPlanModeTool/ExitPlanModeV2Tool.js' +export { ExitWorktreeTool } from './tools/ExitWorktreeTool/ExitWorktreeTool.js' +export { FileEditTool } from './tools/FileEditTool/FileEditTool.js' +export { FileReadTool } from './tools/FileReadTool/FileReadTool.js' +export { FileWriteTool } from './tools/FileWriteTool/FileWriteTool.js' +export { GlobTool } from './tools/GlobTool/GlobTool.js' +export { GrepTool } from './tools/GrepTool/GrepTool.js' +export { LSPTool } from './tools/LSPTool/LSPTool.js' +export { ListMcpResourcesTool } from './tools/ListMcpResourcesTool/ListMcpResourcesTool.js' +export { ReadMcpResourceTool } from './tools/ReadMcpResourceTool/ReadMcpResourceTool.js' +export { NotebookEditTool } from './tools/NotebookEditTool/NotebookEditTool.js' +export { SkillTool } from './tools/SkillTool/SkillTool.js' +export { TaskOutputTool } from './tools/TaskOutputTool/TaskOutputTool.js' +export { TaskStopTool } from './tools/TaskStopTool/TaskStopTool.js' +export { TodoWriteTool } from './tools/TodoWriteTool/TodoWriteTool.js' +export { ToolSearchTool } from './tools/ToolSearchTool/ToolSearchTool.js' +export { TungstenTool } from './tools/TungstenTool/TungstenTool.js' +export { WebFetchTool } from './tools/WebFetchTool/WebFetchTool.js' +export { WebSearchTool } from './tools/WebSearchTool/WebSearchTool.js' +export { TestingPermissionTool } from './tools/testing/TestingPermissionTool.js' + +// Feature-gated tools +export { OVERFLOW_TEST_TOOL_NAME } from './tools/OverflowTestTool/OverflowTestTool.js' +export { CtxInspectTool } from './tools/CtxInspectTool/CtxInspectTool.js' +export { ListPeersTool } from './tools/ListPeersTool/ListPeersTool.js' +export { MonitorTool } from './tools/MonitorTool/MonitorTool.js' +export { PowerShellTool } from './tools/PowerShellTool/PowerShellTool.js' +export { PushNotificationTool } from './tools/PushNotificationTool/PushNotificationTool.js' +export { REPLTool } from './tools/REPLTool/REPLTool.js' +export { RemoteTriggerTool } from './tools/RemoteTriggerTool/RemoteTriggerTool.js' +export { ReviewArtifactTool } from './tools/ReviewArtifactTool/ReviewArtifactTool.js' +export { CronCreateTool } from './tools/ScheduleCronTool/CronCreateTool.js' +export { CronDeleteTool } from './tools/ScheduleCronTool/CronDeleteTool.js' +export { CronListTool } from './tools/ScheduleCronTool/CronListTool.js' +export { SendMessageTool } from './tools/SendMessageTool/SendMessageTool.js' +export { SendUserFileTool } from './tools/SendUserFileTool/SendUserFileTool.js' +export { SleepTool } from './tools/SleepTool/SleepTool.js' +export { SnipTool } from './tools/SnipTool/SnipTool.js' +export { SubscribePRTool } from './tools/SubscribePRTool/SubscribePRTool.js' +export { SuggestBackgroundPRTool } from './tools/SuggestBackgroundPRTool/SuggestBackgroundPRTool.js' +export { TeamCreateTool } from './tools/TeamCreateTool/TeamCreateTool.js' +export { TeamDeleteTool } from './tools/TeamDeleteTool/TeamDeleteTool.js' +export { TerminalCaptureTool } from './tools/TerminalCaptureTool/TerminalCaptureTool.js' +export { VerifyPlanExecutionTool } from './tools/VerifyPlanExecutionTool/VerifyPlanExecutionTool.js' +export { WebBrowserTool } from './tools/WebBrowserTool/WebBrowserTool.js' +export { WorkflowTool } from './tools/WorkflowTool/WorkflowTool.js' +export { initBundledWorkflows } from './tools/WorkflowTool/bundled/index.js' +export { getWorkflowCommands } from './tools/WorkflowTool/createWorkflowCommand.js' + +// Constants +export { SYNTHETIC_OUTPUT_TOOL_NAME, createSyntheticOutputTool } from './tools/SyntheticOutputTool/SyntheticOutputTool.js' + +// Shared utilities +export { tagMessagesWithToolUseID, getToolUseIDFromParentMessage } from './tools/utils.js' diff --git a/packages/builtin-tools/src/tools/AgentTool/AgentTool.tsx b/packages/builtin-tools/src/tools/AgentTool/AgentTool.tsx new file mode 100644 index 000000000..82b729386 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/AgentTool.tsx @@ -0,0 +1,1836 @@ +import { feature } from 'bun:bundle' +import * as React from 'react' +import { buildTool, type ToolDef, toolMatchesName } from 'src/Tool.js' +import type { + AssistantMessage, + Message as MessageType, + NormalizedUserMessage, +} from 'src/types/message.js' +import { getQuerySourceForAgent } from 'src/utils/promptCategory.js' +import { z } from 'zod/v4' +import { + clearInvokedSkillsForAgent, + getSdkAgentProgressSummariesEnabled, +} from 'src/bootstrap/state.js' +import { + enhanceSystemPromptWithEnvDetails, + getSystemPrompt, +} from 'src/constants/prompts.js' +import { isCoordinatorMode } from 'src/coordinator/coordinatorMode.js' +import { startAgentSummarization } from 'src/services/AgentSummary/agentSummary.js' +import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js' +import { + type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + logEvent, +} from 'src/services/analytics/index.js' +import { clearDumpState } from 'src/services/api/dumpPrompts.js' +import { + completeAgentTask as completeAsyncAgent, + createActivityDescriptionResolver, + createProgressTracker, + enqueueAgentNotification, + failAgentTask as failAsyncAgent, + getProgressUpdate, + getTokenCountFromTracker, + isLocalAgentTask, + killAsyncAgent, + registerAgentForeground, + registerAsyncAgent, + unregisterAgentForeground, + updateAgentProgress as updateAsyncAgentProgress, + updateProgressFromMessage, +} from 'src/tasks/LocalAgentTask/LocalAgentTask.js' +import { + checkRemoteAgentEligibility, + formatPreconditionError, + getRemoteTaskSessionUrl, + registerRemoteAgentTask, + type BackgroundRemoteSessionPrecondition, +} from 'src/tasks/RemoteAgentTask/RemoteAgentTask.js' +import { assembleToolPool } from 'src/tools.js' +import { asAgentId } from 'src/types/ids.js' +import { runWithAgentContext, type SubagentContext } from 'src/utils/agentContext.js' +import { isAgentSwarmsEnabled } from 'src/utils/agentSwarmsEnabled.js' +import { getCwd, runWithCwdOverride } from 'src/utils/cwd.js' +import { logForDebugging } from 'src/utils/debug.js' +import { isEnvTruthy } from 'src/utils/envUtils.js' +import { AbortError, errorMessage, toError } from 'src/utils/errors.js' +import type { CacheSafeParams } from 'src/utils/forkedAgent.js' +import { lazySchema } from 'src/utils/lazySchema.js' +import { + createUserMessage, + extractTextContent, + isSyntheticMessage, + normalizeMessages, +} from 'src/utils/messages.js' +import { getAgentModel } from 'src/utils/model/agent.js' +import { permissionModeSchema } from 'src/utils/permissions/PermissionMode.js' +import type { PermissionResult } from 'src/utils/permissions/PermissionResult.js' +import { + filterDeniedAgents, + getDenyRuleForAgent, +} from 'src/utils/permissions/permissions.js' +import { enqueueSdkEvent } from 'src/utils/sdkEventQueue.js' +import { writeAgentMetadata } from 'src/utils/sessionStorage.js' +import { sleep } from 'src/utils/sleep.js' +import { buildEffectiveSystemPrompt } from 'src/utils/systemPrompt.js' +import { asSystemPrompt } from 'src/utils/systemPromptType.js' +import { getTaskOutputPath } from 'src/utils/task/diskOutput.js' +import { getParentSessionId, isTeammate } from 'src/utils/teammate.js' +import { isInProcessTeammate } from 'src/utils/teammateContext.js' +import { teleportToRemote } from 'src/utils/teleport.js' +import { getAssistantMessageContentLength } from 'src/utils/tokens.js' +import { createAgentId } from 'src/utils/uuid.js' +import { + createAgentWorktree, + hasWorktreeChanges, + removeAgentWorktree, +} from 'src/utils/worktree.js' +import { BASH_TOOL_NAME } from '../BashTool/toolName.js' +import { BackgroundHint } from '../BashTool/UI.js' +import { FILE_READ_TOOL_NAME } from '../FileReadTool/prompt.js' +import { spawnTeammate } from '../shared/spawnMultiAgent.js' +import { setAgentColor } from './agentColorManager.js' +import { + agentToolResultSchema, + classifyHandoffIfNeeded, + emitTaskProgress, + extractPartialResult, + finalizeAgentTool, + getLastToolUseName, + runAsyncAgentLifecycle, +} from './agentToolUtils.js' +import { GENERAL_PURPOSE_AGENT } from './built-in/generalPurposeAgent.js' +import { + AGENT_TOOL_NAME, + LEGACY_AGENT_TOOL_NAME, + ONE_SHOT_BUILTIN_AGENT_TYPES, +} from './constants.js' +import { + buildForkedMessages, + buildWorktreeNotice, + FORK_AGENT, + isForkSubagentEnabled, + isInForkChild, +} from './forkSubagent.js' +import type { AgentDefinition } from './loadAgentsDir.js' +import { + filterAgentsByMcpRequirements, + hasRequiredMcpServers, + isBuiltInAgent, +} from './loadAgentsDir.js' +import { getPrompt } from './prompt.js' +import { runAgent } from './runAgent.js' +import { + renderGroupedAgentToolUse, + renderToolResultMessage, + renderToolUseErrorMessage, + renderToolUseMessage, + renderToolUseProgressMessage, + renderToolUseRejectedMessage, + renderToolUseTag, + userFacingName, + userFacingNameBackgroundColor, +} from './UI.js' + +/* eslint-disable @typescript-eslint/no-require-imports */ +const proactiveModule = + feature('PROACTIVE') || feature('KAIROS') + ? (require('src/proactive/index.js') as typeof import('src/proactive/index.js')) + : null +/* eslint-enable @typescript-eslint/no-require-imports */ + +// Progress display constants (for showing background hint) +const PROGRESS_THRESHOLD_MS = 2000 // Show background hint after 2 seconds + +// Check if background tasks are disabled at module load time +const isBackgroundTasksDisabled = + // eslint-disable-next-line custom-rules/no-process-env-top-level -- Intentional: schema must be defined at module load + isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_BACKGROUND_TASKS) + +// Auto-background agent tasks after this many ms (0 = disabled) +// Enabled by env var OR GrowthBook gate (checked lazily since GB may not be ready at module load) +function getAutoBackgroundMs(): number { + if ( + isEnvTruthy(process.env.CLAUDE_AUTO_BACKGROUND_TASKS) || + getFeatureValue_CACHED_MAY_BE_STALE('tengu_auto_background_agents', false) + ) { + return 120_000 + } + return 0 +} + +// Multi-agent type constants are defined inline inside gated blocks to enable dead code elimination + +// Base input schema without multi-agent parameters +const baseInputSchema = lazySchema(() => + z.object({ + description: z + .string() + .describe('A short (3-5 word) description of the task'), + prompt: z.string().describe('The task for the agent to perform'), + subagent_type: z + .string() + .optional() + .describe('The type of specialized agent to use for this task'), + model: z + .enum(['sonnet', 'opus', 'haiku']) + .optional() + .describe( + "Optional model override for this agent. Takes precedence over the agent definition's model frontmatter. If omitted, uses the agent definition's model, or inherits from the parent.", + ), + run_in_background: z + .boolean() + .optional() + .describe( + 'Set to true to run this agent in the background. You will be notified when it completes.', + ), + }), +) + +// Full schema combining base + multi-agent params + isolation +const fullInputSchema = lazySchema(() => { + // Multi-agent parameters + const multiAgentInputSchema = z.object({ + name: z + .string() + .optional() + .describe( + 'Name for the spawned agent. Makes it addressable via SendMessage({to: name}) while running.', + ), + team_name: z + .string() + .optional() + .describe( + 'Team name for spawning. Uses current team context if omitted.', + ), + mode: permissionModeSchema() + .optional() + .describe( + 'Permission mode for spawned teammate (e.g., "plan" to require plan approval).', + ), + }) + + return baseInputSchema() + .merge(multiAgentInputSchema) + .extend({ + isolation: (process.env.USER_TYPE === 'ant' + ? z.enum(['worktree', 'remote']) + : z.enum(['worktree']) + ) + .optional() + .describe( + process.env.USER_TYPE === 'ant' + ? 'Isolation mode. "worktree" creates a temporary git worktree so the agent works on an isolated copy of the repo. "remote" launches the agent in a remote CCR environment (always runs in background).' + : 'Isolation mode. "worktree" creates a temporary git worktree so the agent works on an isolated copy of the repo.', + ), + cwd: z + .string() + .optional() + .describe( + 'Absolute path to run the agent in. Overrides the working directory for all filesystem and shell operations within this agent. Mutually exclusive with isolation: "worktree".', + ), + }) +}) + +// Strip optional fields from the schema when the backing feature is off so +// the model never sees them. Done via .omit() rather than conditional spread +// inside .extend() because the spread-ternary breaks Zod's type inference +// (field type collapses to `unknown`). The ternary return produces a union +// type, but call() destructures via the explicit AgentToolInput type below +// which always includes all optional fields. +export const inputSchema = lazySchema(() => { + const schema = feature('KAIROS') + ? fullInputSchema() + : fullInputSchema().omit({ cwd: true }) + + // GrowthBook-in-lazySchema is acceptable here (unlike subagent_type, which + // was removed in 906da6c723): the divergence window is one-session-per- + // gate-flip via _CACHED_MAY_BE_STALE disk read, and worst case is either + // "schema shows a no-op param" (gate flips on mid-session: param ignored + // by forceAsync) or "schema hides a param that would've worked" (gate + // flips off mid-session: everything still runs async via memoized + // forceAsync). No Zod rejection, no crash — unlike required→optional. + return isBackgroundTasksDisabled || isForkSubagentEnabled() + ? schema.omit({ run_in_background: true }) + : schema +}) +type InputSchema = ReturnType + +// Explicit type widens the schema inference to always include all optional +// fields even when .omit() strips them for gating (cwd, run_in_background). +// subagent_type is optional; call() defaults it to general-purpose when the +// fork gate is off, or routes to the fork path when the gate is on. +type AgentToolInput = z.infer> & { + name?: string + team_name?: string + mode?: z.infer> + isolation?: 'worktree' | 'remote' + cwd?: string +} + +// Output schema - multi-agent spawned schema added dynamically at runtime when enabled +export const outputSchema = lazySchema(() => { + const syncOutputSchema = agentToolResultSchema().extend({ + status: z.literal('completed'), + prompt: z.string(), + }) + + const asyncOutputSchema = z.object({ + status: z.literal('async_launched'), + agentId: z.string().describe('The ID of the async agent'), + description: z.string().describe('The description of the task'), + prompt: z.string().describe('The prompt for the agent'), + outputFile: z + .string() + .describe('Path to the output file for checking agent progress'), + canReadOutputFile: z + .boolean() + .optional() + .describe( + 'Whether the calling agent has Read/Bash tools to check progress', + ), + }) + + return z.union([syncOutputSchema, asyncOutputSchema]) +}) +type OutputSchema = ReturnType +type Output = z.input + +// Private type for teammate spawn results - excluded from exported schema for dead code elimination +// The 'teammate_spawned' status string is only included when ENABLE_AGENT_SWARMS is true +type TeammateSpawnedOutput = { + status: 'teammate_spawned' + prompt: string + teammate_id: string + agent_id: string + agent_type?: string + model?: string + name: string + color?: string + tmux_session_name: string + tmux_window_name: string + tmux_pane_id: string + team_name?: string + is_splitpane?: boolean + plan_mode_required?: boolean +} + +// Combined output type including both public and internal types +// Note: TeammateSpawnedOutput type is fine - TypeScript types are erased at compile time +// Private type for remote-launched results — excluded from exported schema +// like TeammateSpawnedOutput for dead code elimination purposes. Exported +// for UI.tsx to do proper discriminated-union narrowing instead of ad-hoc casts. +export type RemoteLaunchedOutput = { + status: 'remote_launched' + taskId: string + sessionUrl: string + description: string + prompt: string + outputFile: string +} + +type InternalOutput = Output | TeammateSpawnedOutput | RemoteLaunchedOutput + +import type { AgentToolProgress, ShellProgress } from 'src/types/tools.js' +// AgentTool forwards both its own progress events and shell progress +// events from the sub-agent so the SDK receives tool_progress updates during bash/powershell runs. +export type Progress = AgentToolProgress | ShellProgress + +export const AgentTool = buildTool({ + async prompt({ agents, tools, getToolPermissionContext, allowedAgentTypes }) { + const toolPermissionContext = await getToolPermissionContext() + + // Get MCP servers that have tools available + const mcpServersWithTools: string[] = [] + for (const tool of tools) { + if (tool.name?.startsWith('mcp__')) { + const parts = tool.name.split('__') + const serverName = parts[1] + if (serverName && !mcpServersWithTools.includes(serverName)) { + mcpServersWithTools.push(serverName) + } + } + } + + // Filter agents: first by MCP requirements, then by permission rules + const agentsWithMcpRequirementsMet = filterAgentsByMcpRequirements( + agents, + mcpServersWithTools, + ) + const filteredAgents = filterDeniedAgents( + agentsWithMcpRequirementsMet, + toolPermissionContext, + AGENT_TOOL_NAME, + ) + + // Use inline env check instead of coordinatorModule to avoid circular + // dependency issues during test module loading. + const isCoordinator = feature('COORDINATOR_MODE') + ? isEnvTruthy(process.env.CLAUDE_CODE_COORDINATOR_MODE) + : false + return await getPrompt(filteredAgents, isCoordinator, allowedAgentTypes) + }, + name: AGENT_TOOL_NAME, + searchHint: 'delegate work to a subagent', + aliases: [LEGACY_AGENT_TOOL_NAME], + maxResultSizeChars: 100_000, + async description() { + return 'Launch a new agent' + }, + get inputSchema(): InputSchema { + return inputSchema() + }, + get outputSchema(): OutputSchema { + return outputSchema() + }, + async call( + { + prompt, + subagent_type, + description, + model: modelParam, + run_in_background, + name, + team_name, + mode: spawnMode, + isolation, + cwd, + }: AgentToolInput, + toolUseContext, + canUseTool, + assistantMessage, + onProgress?, + ) { + const startTime = Date.now() + const model = isCoordinatorMode() ? undefined : modelParam + + // Get app state for permission mode and agent filtering + const appState = toolUseContext.getAppState() + const permissionMode = appState.toolPermissionContext.mode + // In-process teammates get a no-op setAppState; setAppStateForTasks + // reaches the root store so task registration/progress/kill stay visible. + const rootSetAppState = + toolUseContext.setAppStateForTasks ?? toolUseContext.setAppState + + // Check if user is trying to use agent teams without access + if (team_name && !isAgentSwarmsEnabled()) { + throw new Error('Agent Teams is not yet available on your plan.') + } + + // Teammates (in-process or tmux) passing `name` would trigger spawnTeammate() + // below, but TeamFile.members is a flat array with one leadAgentId — nested + // teammates land in the roster with no provenance and confuse the lead. + const teamName = resolveTeamName({ team_name }, appState) + if (isTeammate() && teamName && name) { + throw new Error( + 'Teammates cannot spawn other teammates — the team roster is flat. To spawn a subagent instead, omit the `name` parameter.', + ) + } + // In-process teammates cannot spawn background agents (their lifecycle is + // tied to the leader's process). Tmux teammates are separate processes and + // can manage their own background agents. + if (isInProcessTeammate() && teamName && run_in_background === true) { + throw new Error( + 'In-process teammates cannot spawn background agents. Use run_in_background=false for synchronous subagents.', + ) + } + + // Check if this is a multi-agent spawn request + // Spawn is triggered when team_name is set (from param or context) and name is provided + if (teamName && name) { + // Set agent definition color for grouped UI display before spawning + const agentDef = subagent_type + ? toolUseContext.options.agentDefinitions.activeAgents.find( + a => a.agentType === subagent_type, + ) + : undefined + if (agentDef?.color) { + setAgentColor(subagent_type!, agentDef.color) + } + const result = await spawnTeammate( + { + name, + prompt, + description, + team_name: teamName, + use_splitpane: true, + plan_mode_required: spawnMode === 'plan', + model: model ?? agentDef?.model, + agent_type: subagent_type, + invokingRequestId: assistantMessage?.requestId as string | undefined, + }, + toolUseContext, + ) + + // Type assertion uses TeammateSpawnedOutput (defined above) instead of any. + // This type is excluded from the exported outputSchema for dead code elimination. + // Cast through unknown because TeammateSpawnedOutput is intentionally + // not part of the exported Output union (for dead code elimination purposes). + const spawnResult: TeammateSpawnedOutput = { + status: 'teammate_spawned' as const, + prompt, + ...result.data, + } + return { data: spawnResult } as unknown as { data: Output } + } + + // Fork subagent experiment routing: + // - subagent_type set: use it (explicit wins) + // - subagent_type omitted, gate on: fork path (undefined) + // - subagent_type omitted, gate off: default general-purpose + const effectiveType = + subagent_type ?? + (isForkSubagentEnabled() ? undefined : GENERAL_PURPOSE_AGENT.agentType) + const isForkPath = effectiveType === undefined + + let selectedAgent: AgentDefinition + if (isForkPath) { + // Recursive fork guard: fork children keep the Agent tool in their + // pool for cache-identical tool defs, so reject fork attempts at call + // time. Primary check is querySource (compaction-resistant — set on + // context.options at spawn time, survives autocompact's message + // rewrite). Message-scan fallback catches any path where querySource + // wasn't threaded. + if ( + toolUseContext.options.querySource === + `agent:builtin:${FORK_AGENT.agentType}` || + isInForkChild(toolUseContext.messages) + ) { + throw new Error( + 'Fork is not available inside a forked worker. Complete your task directly using your tools.', + ) + } + selectedAgent = FORK_AGENT + } else { + // Filter agents to exclude those denied via Agent(AgentName) syntax + const allAgents = toolUseContext.options.agentDefinitions.activeAgents + const { allowedAgentTypes } = toolUseContext.options.agentDefinitions + const agents = filterDeniedAgents( + // When allowedAgentTypes is set (from Agent(x,y) tool spec), restrict to those types + allowedAgentTypes + ? allAgents.filter(a => allowedAgentTypes.includes(a.agentType)) + : allAgents, + appState.toolPermissionContext, + AGENT_TOOL_NAME, + ) + + const found = agents.find(agent => agent.agentType === effectiveType) + if (!found) { + // Check if the agent exists but is denied by permission rules + const agentExistsButDenied = allAgents.find( + agent => agent.agentType === effectiveType, + ) + if (agentExistsButDenied) { + const denyRule = getDenyRuleForAgent( + appState.toolPermissionContext, + AGENT_TOOL_NAME, + effectiveType, + ) + throw new Error( + `Agent type '${effectiveType}' has been denied by permission rule '${AGENT_TOOL_NAME}(${effectiveType})' from ${denyRule?.source ?? 'settings'}.`, + ) + } + throw new Error( + `Agent type '${effectiveType}' not found. Available agents: ${agents + .map(a => a.agentType) + .join(', ')}`, + ) + } + selectedAgent = found + } + + // Same lifecycle constraint as the run_in_background guard above, but for + // agent definitions that force background via `background: true`. Checked + // here because selectedAgent is only now resolved. + if ( + isInProcessTeammate() && + teamName && + selectedAgent.background === true + ) { + throw new Error( + `In-process teammates cannot spawn background agents. Agent '${selectedAgent.agentType}' has background: true in its definition.`, + ) + } + + // Capture for type narrowing — `let selectedAgent` prevents TS from + // narrowing property types across the if-else assignment above. + const requiredMcpServers = selectedAgent.requiredMcpServers + + // Check if required MCP servers have tools available + // A server that's connected but not authenticated won't have any tools + if (requiredMcpServers?.length) { + // If any required servers are still pending (connecting), wait for them + // before checking tool availability. This avoids a race condition where + // the agent is invoked before MCP servers finish connecting. + const hasPendingRequiredServers = appState.mcp.clients.some( + c => + c.type === 'pending' && + requiredMcpServers.some(pattern => + c.name.toLowerCase().includes(pattern.toLowerCase()), + ), + ) + + let currentAppState = appState + if (hasPendingRequiredServers) { + const MAX_WAIT_MS = 30_000 + const POLL_INTERVAL_MS = 500 + const deadline = Date.now() + MAX_WAIT_MS + + while (Date.now() < deadline) { + await sleep(POLL_INTERVAL_MS) + currentAppState = toolUseContext.getAppState() + + // Early exit: if any required server has already failed, no point + // waiting for other pending servers — the check will fail regardless. + const hasFailedRequiredServer = currentAppState.mcp.clients.some( + c => + c.type === 'failed' && + requiredMcpServers.some(pattern => + c.name.toLowerCase().includes(pattern.toLowerCase()), + ), + ) + if (hasFailedRequiredServer) break + + const stillPending = currentAppState.mcp.clients.some( + c => + c.type === 'pending' && + requiredMcpServers.some(pattern => + c.name.toLowerCase().includes(pattern.toLowerCase()), + ), + ) + if (!stillPending) break + } + } + + // Get servers that actually have tools (meaning they're connected AND authenticated) + const serversWithTools: string[] = [] + for (const tool of currentAppState.mcp.tools) { + if (tool.name?.startsWith('mcp__')) { + // Extract server name from tool name (format: mcp__serverName__toolName) + const parts = tool.name.split('__') + const serverName = parts[1] + if (serverName && !serversWithTools.includes(serverName)) { + serversWithTools.push(serverName) + } + } + } + + if (!hasRequiredMcpServers(selectedAgent, serversWithTools)) { + const missing = requiredMcpServers.filter( + pattern => + !serversWithTools.some(server => + server.toLowerCase().includes(pattern.toLowerCase()), + ), + ) + throw new Error( + `Agent '${selectedAgent.agentType}' requires MCP servers matching: ${missing.join(', ')}. ` + + `MCP servers with tools: ${serversWithTools.length > 0 ? serversWithTools.join(', ') : 'none'}. ` + + `Use /mcp to configure and authenticate the required MCP servers.`, + ) + } + } + + // Initialize the color for this agent if it has a predefined one + if (selectedAgent.color) { + setAgentColor(selectedAgent.agentType, selectedAgent.color) + } + + // Resolve agent params for logging (these are already resolved in runAgent) + const resolvedAgentModel = getAgentModel( + selectedAgent.model, + toolUseContext.options.mainLoopModel, + isForkPath ? undefined : model, + permissionMode, + ) + + logEvent('tengu_agent_tool_selected', { + agent_type: + selectedAgent.agentType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + model: + resolvedAgentModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + source: + selectedAgent.source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + color: + selectedAgent.color as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + is_built_in_agent: isBuiltInAgent(selectedAgent), + is_resume: false, + is_async: + (run_in_background === true || selectedAgent.background === true) && + !isBackgroundTasksDisabled, + is_fork: isForkPath, + }) + + // Resolve effective isolation mode (explicit param overrides agent def) + const effectiveIsolation = isolation ?? selectedAgent.isolation + + // Remote isolation: delegate to CCR. Gated ant-only — the guard enables + // dead code elimination of the entire block for external builds. + if (process.env.USER_TYPE === 'ant' && effectiveIsolation === 'remote') { + const eligibility = await checkRemoteAgentEligibility() + if (!eligibility.eligible) { + const reasons = (eligibility as { eligible: false; errors: BackgroundRemoteSessionPrecondition[] }).errors + .map(formatPreconditionError) + .join('\n') + throw new Error(`Cannot launch remote agent:\n${reasons}`) + } + + let bundleFailHint: string | undefined + const session = await teleportToRemote({ + initialMessage: prompt, + description, + signal: toolUseContext.abortController.signal, + onBundleFail: msg => { + bundleFailHint = msg + }, + }) + if (!session) { + throw new Error(bundleFailHint ?? 'Failed to create remote session') + } + + const { taskId, sessionId } = registerRemoteAgentTask({ + remoteTaskType: 'remote-agent', + session: { id: session.id, title: session.title || description }, + command: prompt, + context: toolUseContext, + toolUseId: toolUseContext.toolUseId, + }) + + logEvent('tengu_agent_tool_remote_launched', { + agent_type: + selectedAgent.agentType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + }) + + const remoteResult: RemoteLaunchedOutput = { + status: 'remote_launched', + taskId, + sessionUrl: getRemoteTaskSessionUrl(sessionId), + description, + prompt, + outputFile: getTaskOutputPath(taskId), + } + return { data: remoteResult } as unknown as { data: Output } + } + // System prompt + prompt messages: branch on fork path. + // + // Fork path: child inherits the PARENT's system prompt (not FORK_AGENT's) + // for cache-identical API request prefixes. Prompt messages are built via + // buildForkedMessages() which clones the parent's full assistant message + // (all tool_use blocks) + placeholder tool_results + per-child directive. + // + // Normal path: build the selected agent's own system prompt with env + // details, and use a simple user message for the prompt. + let enhancedSystemPrompt: string[] | undefined + let forkParentSystemPrompt: + | ReturnType + | undefined + let promptMessages: MessageType[] + + if (isForkPath) { + if (toolUseContext.renderedSystemPrompt) { + forkParentSystemPrompt = toolUseContext.renderedSystemPrompt + } else { + // Fallback: recompute. May diverge from parent's cached bytes if + // GrowthBook state changed between parent turn-start and fork spawn. + const mainThreadAgentDefinition = appState.agent + ? appState.agentDefinitions.activeAgents.find( + a => a.agentType === appState.agent, + ) + : undefined + const additionalWorkingDirectories = Array.from( + appState.toolPermissionContext.additionalWorkingDirectories.keys(), + ) + const defaultSystemPrompt = await getSystemPrompt( + toolUseContext.options.tools, + toolUseContext.options.mainLoopModel, + additionalWorkingDirectories, + toolUseContext.options.mcpClients, + ) + forkParentSystemPrompt = buildEffectiveSystemPrompt({ + mainThreadAgentDefinition, + toolUseContext, + customSystemPrompt: toolUseContext.options.customSystemPrompt, + defaultSystemPrompt, + appendSystemPrompt: toolUseContext.options.appendSystemPrompt, + }) + } + promptMessages = buildForkedMessages(prompt, assistantMessage) + } else { + try { + const additionalWorkingDirectories = Array.from( + appState.toolPermissionContext.additionalWorkingDirectories.keys(), + ) + + // All agents have getSystemPrompt - pass toolUseContext to all + const agentPrompt = selectedAgent.getSystemPrompt({ toolUseContext }) + + // Log agent memory loaded event for subagents + if (selectedAgent.memory) { + logEvent('tengu_agent_memory_loaded', { + ...(process.env.USER_TYPE === 'ant' && { + agent_type: + selectedAgent.agentType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + }), + scope: + selectedAgent.memory as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + source: + 'subagent' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + }) + } + + // Apply environment details enhancement + enhancedSystemPrompt = await enhanceSystemPromptWithEnvDetails( + [agentPrompt], + resolvedAgentModel, + additionalWorkingDirectories, + ) + } catch (error) { + logForDebugging( + `Failed to get system prompt for agent ${selectedAgent.agentType}: ${errorMessage(error)}`, + ) + } + promptMessages = [createUserMessage({ content: prompt })] + } + + const metadata = { + prompt, + resolvedAgentModel, + isBuiltInAgent: isBuiltInAgent(selectedAgent), + startTime, + agentType: selectedAgent.agentType, + isAsync: + (run_in_background === true || selectedAgent.background === true) && + !isBackgroundTasksDisabled, + } + + // Use inline env check instead of coordinatorModule to avoid circular + // dependency issues during test module loading. + const isCoordinator = feature('COORDINATOR_MODE') + ? isEnvTruthy(process.env.CLAUDE_CODE_COORDINATOR_MODE) + : false + + // Fork subagent experiment: force ALL spawns async for a unified + // interaction model (not just fork spawns — all of them). + const forceAsync = isForkSubagentEnabled() + + // Assistant mode: force all agents async. Synchronous subagents hold the + // main loop's turn open until they complete — the daemon's inputQueue + // backs up, and the first overdue cron catch-up on spawn becomes N + // serial subagent turns blocking all user input. Same gate as + // executeForkedSlashCommand's fire-and-forget path; the + // re-entry there is handled by the else branch + // below (registerAsyncAgentTask + notifyOnCompletion). + const assistantForceAsync = feature('KAIROS') + ? appState.kairosEnabled + : false + + const shouldRunAsync = + (run_in_background === true || + selectedAgent.background === true || + isCoordinator || + forceAsync || + assistantForceAsync || + (proactiveModule?.isProactiveActive() ?? false)) && + !isBackgroundTasksDisabled + // Assemble the worker's tool pool independently of the parent's. + // Workers always get their tools from assembleToolPool with their own + // permission mode, so they aren't affected by the parent's tool + // restrictions. This is computed here so that runAgent doesn't need to + // import from tools.ts (which would create a circular dependency). + const workerPermissionContext = { + ...appState.toolPermissionContext, + mode: selectedAgent.permissionMode ?? 'acceptEdits', + } + const workerTools = assembleToolPool( + workerPermissionContext, + appState.mcp.tools, + ) + + // Create a stable agent ID early so it can be used for worktree slug + const earlyAgentId = createAgentId() + + // Set up worktree isolation if requested + let worktreeInfo: { + worktreePath: string + worktreeBranch?: string + headCommit?: string + gitRoot?: string + hookBased?: boolean + } | null = null + + if (effectiveIsolation === 'worktree') { + const slug = `agent-${earlyAgentId.slice(0, 8)}` + worktreeInfo = await createAgentWorktree(slug) + } + + // Fork + worktree: inject a notice telling the child to translate paths + // and re-read potentially stale files. Appended after the fork directive + // so it appears as the most recent guidance the child sees. + if (isForkPath && worktreeInfo) { + promptMessages.push( + createUserMessage({ + content: buildWorktreeNotice(getCwd(), worktreeInfo.worktreePath), + }), + ) + } + + const runAgentParams: Parameters[0] = { + agentDefinition: selectedAgent, + promptMessages, + toolUseContext, + canUseTool, + isAsync: shouldRunAsync, + querySource: + toolUseContext.options.querySource ?? + getQuerySourceForAgent( + selectedAgent.agentType, + isBuiltInAgent(selectedAgent), + ), + model: isForkPath ? undefined : model, + // Fork path: pass parent's system prompt AND parent's exact tool + // array (cache-identical prefix). workerTools is rebuilt under + // permissionMode 'bubble' which differs from the parent's mode, so + // its tool-def serialization diverges and breaks cache at the first + // differing tool. useExactTools also inherits the parent's + // thinkingConfig and isNonInteractiveSession (see runAgent.ts). + // + // Normal path: when a cwd override is in effect (worktree isolation + // or explicit cwd), skip the pre-built system prompt so runAgent's + // buildAgentSystemPrompt() runs inside wrapWithCwd where getCwd() + // returns the override path. + override: isForkPath + ? { systemPrompt: forkParentSystemPrompt } + : enhancedSystemPrompt && !worktreeInfo && !cwd + ? { systemPrompt: asSystemPrompt(enhancedSystemPrompt) } + : undefined, + availableTools: isForkPath ? toolUseContext.options.tools : workerTools, + // Pass parent conversation when the fork-subagent path needs full + // context. useExactTools inherits thinkingConfig (runAgent.ts:624). + forkContextMessages: isForkPath ? toolUseContext.messages : undefined, + ...(isForkPath && { useExactTools: true }), + worktreePath: worktreeInfo?.worktreePath, + description, + } + + // Helper to wrap execution with a cwd override: explicit cwd arg (KAIROS) + // takes precedence over worktree isolation path. + const cwdOverridePath = cwd ?? worktreeInfo?.worktreePath + const wrapWithCwd = (fn: () => T): T => + cwdOverridePath ? runWithCwdOverride(cwdOverridePath, fn) : fn() + + // Helper to clean up worktree after agent completes + const cleanupWorktreeIfNeeded = async (): Promise<{ + worktreePath?: string + worktreeBranch?: string + }> => { + if (!worktreeInfo) return {} + const { worktreePath, worktreeBranch, headCommit, gitRoot, hookBased } = + worktreeInfo + // Null out to make idempotent — guards against double-call if code + // between cleanup and end of try throws into catch + worktreeInfo = null + if (hookBased) { + // Hook-based worktrees are always kept since we can't detect VCS changes + logForDebugging(`Hook-based agent worktree kept at: ${worktreePath}`) + return { worktreePath } + } + if (headCommit) { + const changed = await hasWorktreeChanges(worktreePath, headCommit) + if (!changed) { + await removeAgentWorktree(worktreePath, worktreeBranch, gitRoot) + // Clear worktreePath from metadata so resume doesn't try to use + // a deleted directory. Fire-and-forget to match runAgent's + // writeAgentMetadata handling. + void writeAgentMetadata(asAgentId(earlyAgentId), { + agentType: selectedAgent.agentType, + description, + }).catch(_err => + logForDebugging(`Failed to clear worktree metadata: ${_err}`), + ) + return {} + } + } + logForDebugging(`Agent worktree has changes, keeping: ${worktreePath}`) + return { worktreePath, worktreeBranch } + } + + if (shouldRunAsync) { + const asyncAgentId = earlyAgentId + const agentBackgroundTask = registerAsyncAgent({ + agentId: asyncAgentId, + description, + prompt, + selectedAgent, + setAppState: rootSetAppState, + // Don't link to parent's abort controller -- background agents should + // survive when the user presses ESC to cancel the main thread. + // They are killed explicitly via chat:killAgents. + toolUseId: toolUseContext.toolUseId, + }) + + // Register name → agentId for SendMessage routing. Post-registerAsyncAgent + // so we don't leave a stale entry if spawn fails. Sync agents skipped — + // coordinator is blocked, so SendMessage routing doesn't apply. + if (name) { + rootSetAppState(prev => { + const next = new Map(prev.agentNameRegistry) + next.set(name, asAgentId(asyncAgentId)) + return { ...prev, agentNameRegistry: next } + }) + } + + // Wrap async agent execution in agent context for analytics attribution + const asyncAgentContext: SubagentContext = { + agentId: asyncAgentId, + // For subagents from teammates: use team lead's session + // For subagents from main REPL: undefined (no parent session) + parentSessionId: getParentSessionId(), + agentType: 'subagent' as const, + subagentName: selectedAgent.agentType, + isBuiltIn: isBuiltInAgent(selectedAgent), + invokingRequestId: assistantMessage?.requestId as string | undefined, + invocationKind: 'spawn' as const, + invocationEmitted: false, + } + + // Workload propagation: handlePromptSubmit wraps the entire turn in + // runWithWorkload (AsyncLocalStorage). ALS context is captured at + // invocation time — when this `void` fires — and survives every await + // inside. No capture/restore needed; the detached closure sees the + // parent turn's workload automatically, isolated from its finally. + void runWithAgentContext(asyncAgentContext, () => + wrapWithCwd(() => + runAsyncAgentLifecycle({ + taskId: agentBackgroundTask.agentId, + abortController: agentBackgroundTask.abortController!, + makeStream: onCacheSafeParams => + runAgent({ + ...runAgentParams, + override: { + ...runAgentParams.override, + agentId: asAgentId(agentBackgroundTask.agentId), + abortController: agentBackgroundTask.abortController!, + }, + onCacheSafeParams, + }), + metadata, + description, + toolUseContext, + rootSetAppState, + agentIdForCleanup: asyncAgentId, + enableSummarization: + isCoordinator || + isForkSubagentEnabled() || + getSdkAgentProgressSummariesEnabled(), + getWorktreeResult: cleanupWorktreeIfNeeded, + }), + ), + ) + + const canReadOutputFile = toolUseContext.options.tools.some( + t => + toolMatchesName(t, FILE_READ_TOOL_NAME) || + toolMatchesName(t, BASH_TOOL_NAME), + ) + return { + data: { + isAsync: true as const, + status: 'async_launched' as const, + agentId: agentBackgroundTask.agentId, + description: description, + prompt: prompt, + outputFile: getTaskOutputPath(agentBackgroundTask.agentId), + canReadOutputFile, + }, + } + } else { + // Create an explicit agentId for sync agents + const syncAgentId = asAgentId(earlyAgentId) + + // Set up agent context for sync execution (for analytics attribution) + const syncAgentContext: SubagentContext = { + agentId: syncAgentId, + // For subagents from teammates: use team lead's session + // For subagents from main REPL: undefined (no parent session) + parentSessionId: getParentSessionId(), + agentType: 'subagent' as const, + subagentName: selectedAgent.agentType, + isBuiltIn: isBuiltInAgent(selectedAgent), + invokingRequestId: assistantMessage?.requestId as string | undefined, + invocationKind: 'spawn' as const, + invocationEmitted: false, + } + + // Wrap entire sync agent execution in context for analytics attribution + // and optionally in a worktree cwd override for filesystem isolation + return runWithAgentContext(syncAgentContext, () => + wrapWithCwd(async () => { + const agentMessages: MessageType[] = [] + const agentStartTime = Date.now() + const syncTracker = createProgressTracker() + const syncResolveActivity = createActivityDescriptionResolver( + toolUseContext.options.tools, + ) + + // Yield initial progress message to carry metadata (prompt) + if (promptMessages.length > 0) { + const normalizedPromptMessages = normalizeMessages(promptMessages) + const normalizedFirstMessage = normalizedPromptMessages.find( + (m): m is NormalizedUserMessage => m.type === 'user', + ) + if ( + normalizedFirstMessage && + normalizedFirstMessage.type === 'user' && + onProgress + ) { + onProgress({ + toolUseID: `agent_${assistantMessage.message.id}`, + data: { + message: normalizedFirstMessage, + type: 'agent_progress', + prompt, + agentId: syncAgentId, + }, + }) + } + } + + // Register as foreground task immediately so it can be backgrounded at any time + // Skip registration if background tasks are disabled + let foregroundTaskId: string | undefined + // Create the background race promise once outside the loop — otherwise + // each iteration adds a new .then() reaction to the same pending + // promise, accumulating callbacks for the lifetime of the agent. + let backgroundPromise: Promise<{ type: 'background' }> | undefined + let cancelAutoBackground: (() => void) | undefined + if (!isBackgroundTasksDisabled) { + const registration = registerAgentForeground({ + agentId: syncAgentId, + description, + prompt, + selectedAgent, + setAppState: rootSetAppState, + toolUseId: toolUseContext.toolUseId, + autoBackgroundMs: getAutoBackgroundMs() || undefined, + }) + foregroundTaskId = registration.taskId + backgroundPromise = registration.backgroundSignal.then(() => ({ + type: 'background' as const, + })) + cancelAutoBackground = registration.cancelAutoBackground + } + + // Track if we've shown the background hint UI + let backgroundHintShown = false + // Track if the agent was backgrounded (cleanup handled by backgrounded finally) + let wasBackgrounded = false + // Per-scope stop function — NOT shared with the backgrounded closure. + // idempotent: startAgentSummarization's stop() checks `stopped` flag. + let stopForegroundSummarization: (() => void) | undefined + // const capture for sound type narrowing inside the callback below + const summaryTaskId = foregroundTaskId + + // Get async iterator for the agent + const agentIterator = runAgent({ + ...runAgentParams, + override: { + ...runAgentParams.override, + agentId: syncAgentId, + }, + onCacheSafeParams: + summaryTaskId && getSdkAgentProgressSummariesEnabled() + ? (params: CacheSafeParams) => { + const { stop } = startAgentSummarization( + summaryTaskId, + syncAgentId, + params, + rootSetAppState, + ) + stopForegroundSummarization = stop + } + : undefined, + })[Symbol.asyncIterator]() + + // Track if an error occurred during iteration + let syncAgentError: Error | undefined + let wasAborted = false + let worktreeResult: { + worktreePath?: string + worktreeBranch?: string + } = {} + + try { + while (true) { + const elapsed = Date.now() - agentStartTime + + // Show background hint after threshold (but task is already registered) + // Skip if background tasks are disabled + if ( + !isBackgroundTasksDisabled && + !backgroundHintShown && + elapsed >= PROGRESS_THRESHOLD_MS && + toolUseContext.setToolJSX + ) { + backgroundHintShown = true + toolUseContext.setToolJSX({ + jsx: , + shouldHidePromptInput: false, + shouldContinueAnimation: true, + showSpinner: true, + }) + } + + // Race between next message and background signal + // If background tasks are disabled, just await the next message directly + const nextMessagePromise = agentIterator.next() + const raceResult = backgroundPromise + ? await Promise.race([ + nextMessagePromise.then(r => ({ + type: 'message' as const, + result: r, + })), + backgroundPromise, + ]) + : { + type: 'message' as const, + result: await nextMessagePromise, + } + + // Check if we were backgrounded via backgroundAll() + // foregroundTaskId is guaranteed to be defined if raceResult.type is 'background' + // because backgroundPromise is only defined when foregroundTaskId is defined + if (raceResult.type === 'background' && foregroundTaskId) { + const appState = toolUseContext.getAppState() + const task = appState.tasks[foregroundTaskId] + if (isLocalAgentTask(task) && task.isBackgrounded) { + // Capture the taskId for use in the async callback + const backgroundedTaskId = foregroundTaskId + wasBackgrounded = true + // Stop foreground summarization; the backgrounded closure + // below owns its own independent stop function. + stopForegroundSummarization?.() + + // Workload: inherited via ALS at `void` invocation time, + // same as the async-from-start path above. + // Continue agent in background and return async result + void runWithAgentContext(syncAgentContext, async () => { + let stopBackgroundedSummarization: (() => void) | undefined + try { + // Clean up the foreground iterator so its finally block runs + // (releases MCP connections, session hooks, prompt cache tracking, etc.) + // Timeout prevents blocking if MCP server cleanup hangs. + // .catch() prevents unhandled rejection if timeout wins the race. + await Promise.race([ + agentIterator.return(undefined).catch(() => {}), + sleep(1000), + ]) + // Initialize progress tracking from existing messages + const tracker = createProgressTracker() + const resolveActivity2 = + createActivityDescriptionResolver( + toolUseContext.options.tools, + ) + for (const existingMsg of agentMessages) { + updateProgressFromMessage( + tracker, + existingMsg, + resolveActivity2, + toolUseContext.options.tools, + ) + } + for await (const msg of runAgent({ + ...runAgentParams, + isAsync: true, // Agent is now running in background + override: { + ...runAgentParams.override, + agentId: asAgentId(backgroundedTaskId), + abortController: task.abortController, + }, + onCacheSafeParams: getSdkAgentProgressSummariesEnabled() + ? (params: CacheSafeParams) => { + const { stop } = startAgentSummarization( + backgroundedTaskId, + asAgentId(backgroundedTaskId), + params, + rootSetAppState, + ) + stopBackgroundedSummarization = stop + } + : undefined, + })) { + agentMessages.push(msg) + + // Track progress for backgrounded agents + updateProgressFromMessage( + tracker, + msg, + resolveActivity2, + toolUseContext.options.tools, + ) + updateAsyncAgentProgress( + backgroundedTaskId, + getProgressUpdate(tracker), + rootSetAppState, + ) + + const lastToolName = getLastToolUseName(msg) + if (lastToolName) { + emitTaskProgress( + tracker, + backgroundedTaskId, + toolUseContext.toolUseId, + description, + startTime, + lastToolName, + ) + } + } + const agentResult = finalizeAgentTool( + agentMessages, + backgroundedTaskId, + metadata, + ) + + // Mark task completed FIRST so TaskOutput(block=true) + // unblocks immediately. classifyHandoffIfNeeded and + // cleanupWorktreeIfNeeded can hang — they must not gate + // the status transition (gh-20236). + completeAsyncAgent(agentResult, rootSetAppState) + + // Extract text from agent result content for the notification + let finalMessage = extractTextContent( + agentResult.content, + '\n', + ) + + if (feature('TRANSCRIPT_CLASSIFIER')) { + const backgroundedAppState = + toolUseContext.getAppState() + const handoffWarning = await classifyHandoffIfNeeded({ + agentMessages, + tools: toolUseContext.options.tools, + toolPermissionContext: + backgroundedAppState.toolPermissionContext, + abortSignal: task.abortController!.signal, + subagentType: selectedAgent.agentType, + totalToolUseCount: agentResult.totalToolUseCount, + }) + if (handoffWarning) { + finalMessage = `${handoffWarning}\n\n${finalMessage}` + } + } + + // Clean up worktree before notification so we can include it + const worktreeResult = await cleanupWorktreeIfNeeded() + + enqueueAgentNotification({ + taskId: backgroundedTaskId, + description, + status: 'completed', + setAppState: rootSetAppState, + finalMessage, + usage: { + totalTokens: getTokenCountFromTracker(tracker), + toolUses: agentResult.totalToolUseCount, + durationMs: agentResult.totalDurationMs, + }, + toolUseId: toolUseContext.toolUseId, + ...worktreeResult, + }) + } catch (error) { + if (error instanceof AbortError) { + // Transition status BEFORE worktree cleanup so + // TaskOutput unblocks even if git hangs (gh-20236). + killAsyncAgent(backgroundedTaskId, rootSetAppState) + logEvent('tengu_agent_tool_terminated', { + agent_type: + metadata.agentType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + model: + metadata.resolvedAgentModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + duration_ms: Date.now() - metadata.startTime, + is_async: true, + is_built_in_agent: metadata.isBuiltInAgent, + reason: + 'user_cancel_background' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + }) + const worktreeResult = await cleanupWorktreeIfNeeded() + const partialResult = + extractPartialResult(agentMessages) + enqueueAgentNotification({ + taskId: backgroundedTaskId, + description, + status: 'killed', + setAppState: rootSetAppState, + toolUseId: toolUseContext.toolUseId, + finalMessage: partialResult, + ...worktreeResult, + }) + return + } + const errMsg = errorMessage(error) + failAsyncAgent( + backgroundedTaskId, + errMsg, + rootSetAppState, + ) + const worktreeResult = await cleanupWorktreeIfNeeded() + enqueueAgentNotification({ + taskId: backgroundedTaskId, + description, + status: 'failed', + error: errMsg, + setAppState: rootSetAppState, + toolUseId: toolUseContext.toolUseId, + ...worktreeResult, + }) + } finally { + stopBackgroundedSummarization?.() + clearInvokedSkillsForAgent(syncAgentId) + clearDumpState(syncAgentId) + // Note: worktree cleanup is done before enqueueAgentNotification + // in both try and catch paths so we can include worktree info + } + }) + + // Return async_launched result immediately + const canReadOutputFile = toolUseContext.options.tools.some( + t => + toolMatchesName(t, FILE_READ_TOOL_NAME) || + toolMatchesName(t, BASH_TOOL_NAME), + ) + return { + data: { + isAsync: true as const, + status: 'async_launched' as const, + agentId: backgroundedTaskId, + description: description, + prompt: prompt, + outputFile: getTaskOutputPath(backgroundedTaskId), + canReadOutputFile, + }, + } + } + } + + // Process the message from the race result + if (raceResult.type !== 'message') { + // This shouldn't happen - background case handled above + continue + } + const { result } = raceResult + if (result.done) break + const message = result.value as MessageType + + agentMessages.push(message) + + // Emit task_progress for the VS Code subagent panel + updateProgressFromMessage( + syncTracker, + message, + syncResolveActivity, + toolUseContext.options.tools, + ) + if (foregroundTaskId) { + const lastToolName = getLastToolUseName(message) + if (lastToolName) { + emitTaskProgress( + syncTracker, + foregroundTaskId, + toolUseContext.toolUseId, + description, + agentStartTime, + lastToolName, + ) + // Keep AppState task.progress in sync when SDK summaries are + // enabled, so updateAgentSummary reads correct token/tool counts + // instead of zeros. + if (getSdkAgentProgressSummariesEnabled()) { + updateAsyncAgentProgress( + foregroundTaskId, + getProgressUpdate(syncTracker), + rootSetAppState, + ) + } + } + } + + // Forward bash_progress events from sub-agent to parent so the SDK + // receives tool_progress events just as it does for the main agent. + if ( + message.type === 'progress' && + ((message.data as { type: string })?.type === 'bash_progress' || + (message.data as { type: string })?.type === 'powershell_progress') && + onProgress + ) { + onProgress({ + toolUseID: message.toolUseID as string, + data: message.data, + }) + } + + if (message.type !== 'assistant' && message.type !== 'user') { + continue + } + + // Increment token count in spinner for assistant messages + // Subagent streaming events are filtered out in runAgent.ts, so we + // need to count tokens from completed messages here + if (message.type === 'assistant') { + const contentLength = getAssistantMessageContentLength(message as AssistantMessage) + if (contentLength > 0) { + toolUseContext.setResponseLength(len => len + contentLength) + } + } + + const normalizedNew = normalizeMessages([message]) + for (const m of normalizedNew) { + for (const content of (m.message?.content ?? []) as readonly { readonly type: string }[]) { + if ( + content.type !== 'tool_use' && + content.type !== 'tool_result' + ) { + continue + } + + // Forward progress updates + if (onProgress) { + onProgress({ + toolUseID: `agent_${assistantMessage.message.id}`, + data: { + message: m, + type: 'agent_progress', + // prompt only needed on first progress message (UI.tsx:624 + // reads progressMessages[0]). Omit here to avoid duplication. + prompt: '', + agentId: syncAgentId, + }, + }) + } + } + } + } + } catch (error) { + // Handle errors from the sync agent loop + // AbortError should be re-thrown for proper interruption handling + if (error instanceof AbortError) { + wasAborted = true + logEvent('tengu_agent_tool_terminated', { + agent_type: + metadata.agentType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + model: + metadata.resolvedAgentModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + duration_ms: Date.now() - metadata.startTime, + is_async: false, + is_built_in_agent: metadata.isBuiltInAgent, + reason: + 'user_cancel_sync' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + }) + throw error + } + + // Log the error for debugging + logForDebugging(`Sync agent error: ${errorMessage(error)}`, { + level: 'error', + }) + + // Store the error to handle after cleanup + syncAgentError = toError(error) + } finally { + // Clear the background hint UI + if (toolUseContext.setToolJSX) { + toolUseContext.setToolJSX(null) + } + + // Stop foreground summarization. Idempotent — if already stopped at + // the backgrounding transition, this is a no-op. The backgrounded + // closure owns a separate stop function (stopBackgroundedSummarization). + stopForegroundSummarization?.() + + // Unregister foreground task if agent completed without being backgrounded + if (foregroundTaskId) { + unregisterAgentForeground(foregroundTaskId, rootSetAppState) + // Notify SDK consumers (e.g. VS Code subagent panel) that this + // foreground agent is done. Goes through drainSdkEvents() — does + // NOT trigger the print.ts XML task_notification parser or the LLM loop. + if (!wasBackgrounded) { + const progress = getProgressUpdate(syncTracker) + enqueueSdkEvent({ + type: 'system', + subtype: 'task_notification', + task_id: foregroundTaskId, + tool_use_id: toolUseContext.toolUseId, + status: syncAgentError + ? 'failed' + : wasAborted + ? 'stopped' + : 'completed', + output_file: '', + summary: description, + usage: { + total_tokens: progress.tokenCount, + tool_uses: progress.toolUseCount, + duration_ms: Date.now() - agentStartTime, + }, + }) + } + } + + // Clean up scoped skills so they don't accumulate in the global map + clearInvokedSkillsForAgent(syncAgentId) + + // Clean up dumpState entry for this agent to prevent unbounded growth + // Skip if backgrounded — the backgrounded agent's finally handles cleanup + if (!wasBackgrounded) { + clearDumpState(syncAgentId) + } + + // Cancel auto-background timer if agent completed before it fired + cancelAutoBackground?.() + + // Clean up worktree if applicable (in finally to handle abort/error paths) + // Skip if backgrounded — the background continuation is still running in it + if (!wasBackgrounded) { + worktreeResult = await cleanupWorktreeIfNeeded() + } + } + + // Re-throw abort errors + // TODO: Find a cleaner way to express this + const lastMessage = agentMessages.findLast( + _ => _.type !== 'system' && _.type !== 'progress', + ) + if (lastMessage && isSyntheticMessage(lastMessage)) { + logEvent('tengu_agent_tool_terminated', { + agent_type: + metadata.agentType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + model: + metadata.resolvedAgentModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + duration_ms: Date.now() - metadata.startTime, + is_async: false, + is_built_in_agent: metadata.isBuiltInAgent, + reason: + 'user_cancel_sync' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + }) + throw new AbortError() + } + + // If an error occurred during iteration, try to return a result with + // whatever messages we have. If we have no assistant messages, + // re-throw the error so it's properly handled by the tool framework. + if (syncAgentError) { + // Check if we have any assistant messages to return + const hasAssistantMessages = agentMessages.some( + msg => msg.type === 'assistant', + ) + + if (!hasAssistantMessages) { + // No messages collected, re-throw the error + throw syncAgentError + } + + // We have some messages, try to finalize and return them + // This allows the parent agent to see partial progress even after an error + logForDebugging( + `Sync agent recovering from error with ${agentMessages.length} messages`, + ) + } + + const agentResult = finalizeAgentTool( + agentMessages, + syncAgentId, + metadata, + ) + + if (feature('TRANSCRIPT_CLASSIFIER')) { + const currentAppState = toolUseContext.getAppState() + const handoffWarning = await classifyHandoffIfNeeded({ + agentMessages, + tools: toolUseContext.options.tools, + toolPermissionContext: currentAppState.toolPermissionContext, + abortSignal: toolUseContext.abortController.signal, + subagentType: selectedAgent.agentType, + totalToolUseCount: agentResult.totalToolUseCount, + }) + if (handoffWarning) { + agentResult.content = [ + { type: 'text' as const, text: handoffWarning }, + ...agentResult.content, + ] + } + } + + return { + data: { + status: 'completed' as const, + prompt, + ...agentResult, + ...worktreeResult, + }, + } + }), + ) + } + }, + isReadOnly() { + return true // delegates permission checks to its underlying tools + }, + toAutoClassifierInput(input) { + const i = input as AgentToolInput + const tags = [ + i.subagent_type, + i.mode ? `mode=${i.mode}` : undefined, + ].filter((t): t is string => t !== undefined) + const prefix = tags.length > 0 ? `(${tags.join(', ')}): ` : ': ' + return `${prefix}${i.prompt}` + }, + isConcurrencySafe() { + return true + }, + userFacingName, + userFacingNameBackgroundColor, + getActivityDescription(input) { + return input?.description ?? 'Running task' + }, + async checkPermissions(input, context): Promise { + const appState = context.getAppState() + + // Only route through auto mode classifier when in auto mode + // In all other modes, auto-approve sub-agent generation + // Note: process.env.USER_TYPE === 'ant' guard enables dead code elimination for external builds + if ( + process.env.USER_TYPE === 'ant' && + appState.toolPermissionContext.mode === 'auto' + ) { + return { + behavior: 'passthrough', + message: 'Agent tool requires permission to spawn sub-agents.', + } + } + + return { behavior: 'allow', updatedInput: input } + }, + mapToolResultToToolResultBlockParam(data, toolUseID) { + // Multi-agent spawn result + const internalData = data as InternalOutput + if ( + typeof internalData === 'object' && + internalData !== null && + 'status' in internalData && + internalData.status === 'teammate_spawned' + ) { + const spawnData = internalData as TeammateSpawnedOutput + return { + tool_use_id: toolUseID, + type: 'tool_result', + content: [ + { + type: 'text', + text: `Spawned successfully. +agent_id: ${spawnData.teammate_id} +name: ${spawnData.name} +team_name: ${spawnData.team_name} +The agent is now running and will receive instructions via mailbox.`, + }, + ], + } + } + if ('status' in internalData && internalData.status === 'remote_launched') { + const r = internalData + return { + tool_use_id: toolUseID, + type: 'tool_result', + content: [ + { + type: 'text', + text: `Remote agent launched in CCR.\ntaskId: ${r.taskId}\nsession_url: ${r.sessionUrl}\noutput_file: ${r.outputFile}\nThe agent is running remotely. You will be notified automatically when it completes.\nBriefly tell the user what you launched and end your response.`, + }, + ], + } + } + if (data.status === 'async_launched') { + const prefix = `Async agent launched successfully.\nagentId: ${data.agentId} (internal ID - do not mention to user. Use SendMessage with to: '${data.agentId}' to continue this agent.)\nThe agent is working in the background. You will be notified automatically when it completes.` + const instructions = data.canReadOutputFile + ? `Do not duplicate this agent's work — avoid working with the same files or topics it is using. Work on non-overlapping tasks, or briefly tell the user what you launched and end your response.\noutput_file: ${data.outputFile}\nIf asked, you can check progress before completion by using ${FILE_READ_TOOL_NAME} or ${BASH_TOOL_NAME} tail on the output file.` + : `Briefly tell the user what you launched and end your response. Do not generate any other text — agent results will arrive in a subsequent message.` + const text = `${prefix}\n${instructions}` + return { + tool_use_id: toolUseID, + type: 'tool_result', + content: [ + { + type: 'text', + text, + }, + ], + } + } + if (data.status === 'completed') { + const worktreeData = data as Record + const worktreeInfoText = worktreeData.worktreePath + ? `\nworktreePath: ${worktreeData.worktreePath}\nworktreeBranch: ${worktreeData.worktreeBranch}` + : '' + // If the subagent completes with no content, the tool_result is just the + // agentId/usage trailer below — a metadata-only block at the prompt tail. + // Some models read that as "nothing to act on" and end their turn + // immediately. Say so explicitly so the parent has something to react to. + const contentOrMarker = + data.content.length > 0 + ? data.content + : [ + { + type: 'text' as const, + text: '(Subagent completed but returned no output.)', + }, + ] + // One-shot built-ins (Explore, Plan) are never continued via SendMessage + // — the agentId hint and block are dead weight (~135 chars × + // 34M Explore runs/week ≈ 1-2 Gtok/week). Telemetry doesn't parse this + // block (it uses logEvent in finalizeAgentTool), so dropping is safe. + // agentType is optional for resume compat — missing means show trailer. + if ( + data.agentType && + ONE_SHOT_BUILTIN_AGENT_TYPES.has(data.agentType) && + !worktreeInfoText + ) { + return { + tool_use_id: toolUseID, + type: 'tool_result', + content: contentOrMarker, + } + } + return { + tool_use_id: toolUseID, + type: 'tool_result', + content: [ + ...contentOrMarker, + { + type: 'text', + text: `agentId: ${data.agentId} (use SendMessage with to: '${data.agentId}' to continue this agent)${worktreeInfoText} +total_tokens: ${data.totalTokens} +tool_uses: ${data.totalToolUseCount} +duration_ms: ${data.totalDurationMs}`, + }, + ], + } + } + data satisfies never + throw new Error( + `Unexpected agent tool result status: ${(data as { status: string }).status}`, + ) + }, + renderToolResultMessage, + renderToolUseMessage, + renderToolUseTag, + renderToolUseProgressMessage, + renderToolUseRejectedMessage, + renderToolUseErrorMessage, + renderGroupedToolUse: renderGroupedAgentToolUse, +} satisfies ToolDef) + +function resolveTeamName( + input: { team_name?: string }, + appState: { teamContext?: { teamName: string } }, +): string | undefined { + if (!isAgentSwarmsEnabled()) return undefined + return input.team_name || appState.teamContext?.teamName +} diff --git a/packages/builtin-tools/src/tools/AgentTool/UI.tsx b/packages/builtin-tools/src/tools/AgentTool/UI.tsx new file mode 100644 index 000000000..4ba99149a --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/UI.tsx @@ -0,0 +1,1138 @@ +import type { + ContentBlock, + ToolResultBlockParam, + ToolUseBlockParam, +} from '@anthropic-ai/sdk/resources/index.mjs' +type BetaContentBlock = ContentBlock | ToolResultBlockParam +import * as React from 'react' +import { ConfigurableShortcutHint } from 'src/components/ConfigurableShortcutHint.js' +import { + CtrlOToExpand, + SubAgentProvider, +} from 'src/components/CtrlOToExpand.js' +import { Byline, KeyboardShortcutHint } from '@anthropic/ink' +import type { z } from 'zod/v4' +import { AgentProgressLine } from 'src/components/AgentProgressLine.js' +import { FallbackToolUseErrorMessage } from 'src/components/FallbackToolUseErrorMessage.js' +import { FallbackToolUseRejectedMessage } from 'src/components/FallbackToolUseRejectedMessage.js' +import { Markdown } from 'src/components/Markdown.js' +import { Message as MessageComponent } from 'src/components/Message.js' +import { MessageResponse } from 'src/components/MessageResponse.js' +import { ToolUseLoader } from 'src/components/ToolUseLoader.js' +import { Box, Text } from '@anthropic/ink' +import { getDumpPromptsPath } from 'src/services/api/dumpPrompts.js' +import { findToolByName, type Tools } from 'src/Tool.js' +import type { Message, ProgressMessage } from 'src/types/message.js' +import type { AgentToolProgress } from 'src/types/tools.js' +import { count } from 'src/utils/array.js' +import { + getSearchOrReadFromContent, + getSearchReadSummaryText, +} from 'src/utils/collapseReadSearch.js' +import { getDisplayPath } from 'src/utils/file.js' +import { formatDuration, formatNumber } from 'src/utils/format.js' +import { + buildSubagentLookups, + createAssistantMessage, + EMPTY_LOOKUPS, +} from 'src/utils/messages.js' +import type { ModelAlias } from 'src/utils/model/aliases.js' +import { + getMainLoopModel, + parseUserSpecifiedModel, + renderModelName, +} from 'src/utils/model/model.js' +import type { Theme, ThemeName } from 'src/utils/theme.js' +import type { + outputSchema, + Progress, + RemoteLaunchedOutput, +} from './AgentTool.js' +import { inputSchema } from './AgentTool.js' +import { getAgentColor } from './agentColorManager.js' +import { GENERAL_PURPOSE_AGENT } from './built-in/generalPurposeAgent.js' +import { BetaUsage } from '@anthropic-ai/sdk/resources/beta.mjs' + +const MAX_PROGRESS_MESSAGES_TO_SHOW = 3 + +/** + * Guard: checks if progress data has a `message` field (agent_progress or + * skill_progress). Other progress types (e.g. bash_progress forwarded from + * sub-agents) lack this field and must be skipped by UI helpers. + */ +function hasProgressMessage(data: Progress): data is AgentToolProgress { + if (!('message' in data)) { + return false + } + const msg = (data as AgentToolProgress).message + return msg != null && typeof msg === 'object' && 'type' in msg +} + +/** + * Check if a progress message is a search/read/REPL operation (tool use or result). + * Returns { isSearch, isRead, isREPL } if it's a collapsible operation, null otherwise. + * + * For tool_result messages, uses the provided `toolUseByID` map to find the + * corresponding tool_use block instead of relying on `normalizedMessages`. + */ +function getSearchOrReadInfo( + progressMessage: ProgressMessage, + tools: Tools, + toolUseByID: Map, +): { isSearch: boolean; isRead: boolean; isREPL: boolean } | null { + if (!hasProgressMessage(progressMessage.data)) { + return null + } + const message = progressMessage.data.message + + // Check tool_use (assistant message) + if (message.type === 'assistant') { + return getSearchOrReadFromContent(message.message.content[0], tools) + } + + // Check tool_result (user message) - find corresponding tool use from the map + if (message.type === 'user') { + const content = message.message.content[0] + if (content?.type === 'tool_result') { + const toolUse = toolUseByID.get(content.tool_use_id) + if (toolUse) { + return getSearchOrReadFromContent(toolUse, tools) + } + } + } + + return null +} + +type SummaryMessage = { + type: 'summary' + searchCount: number + readCount: number + replCount: number + uuid: string + isActive: boolean // true if still in progress (last message was tool_use, not tool_result) +} + +type ProcessedMessage = + | { type: 'original'; message: ProgressMessage } + | SummaryMessage + +/** + * Process progress messages to group consecutive search/read operations into summaries. + * For ants only - returns original messages for non-ants. + * @param isAgentRunning - If true, the last group is always marked as active (in progress) + */ +function processProgressMessages( + messages: ProgressMessage[], + tools: Tools, + isAgentRunning: boolean, +): ProcessedMessage[] { + // Only process for ants + if (process.env.USER_TYPE !== 'ant') { + return messages + .filter( + (m): m is ProgressMessage => + hasProgressMessage(m.data) && m.data.message.type !== 'user', + ) + .map(m => ({ type: 'original', message: m })) + } + + const result: ProcessedMessage[] = [] + let currentGroup: { + searchCount: number + readCount: number + replCount: number + startUuid: string + } | null = null + + function flushGroup(isActive: boolean): void { + if ( + currentGroup && + (currentGroup.searchCount > 0 || + currentGroup.readCount > 0 || + currentGroup.replCount > 0) + ) { + result.push({ + type: 'summary', + searchCount: currentGroup.searchCount, + readCount: currentGroup.readCount, + replCount: currentGroup.replCount, + uuid: `summary-${currentGroup.startUuid}`, + isActive, + }) + } + currentGroup = null + } + + const agentMessages = messages.filter( + (m): m is ProgressMessage => hasProgressMessage(m.data), + ) + + // Build tool_use lookup incrementally as we iterate + const toolUseByID = new Map() + for (const msg of agentMessages) { + // Track tool_use blocks as we see them + if (msg.data.message.type === 'assistant') { + for (const c of msg.data.message.message.content) { + if (c.type === 'tool_use') { + toolUseByID.set(c.id, c as ToolUseBlockParam) + } + } + } + const info = getSearchOrReadInfo(msg, tools, toolUseByID) + + if (info && (info.isSearch || info.isRead || info.isREPL)) { + // This is a search/read/REPL operation - add to current group + if (!currentGroup) { + currentGroup = { + searchCount: 0, + readCount: 0, + replCount: 0, + startUuid: msg.uuid, + } + } + // Only count tool_result messages (not tool_use) to avoid double counting + if (msg.data.message.type === 'user') { + if (info.isSearch) { + currentGroup.searchCount++ + } else if (info.isREPL) { + currentGroup.replCount++ + } else if (info.isRead) { + currentGroup.readCount++ + } + } + } else { + // Non-search/read/REPL message - flush current group (completed) and add this message + flushGroup(false) + // Skip user tool_result messages — subagent progress messages lack + // toolUseResult, so UserToolSuccessMessage returns null and the + // height=1 Box in renderToolUseProgressMessage shows as a blank line. + if (msg.data.message.type !== 'user') { + result.push({ type: 'original', message: msg }) + } + } + } + + // Flush any remaining group - it's active if the agent is still running + flushGroup(isAgentRunning) + + return result +} + +const ESTIMATED_LINES_PER_TOOL = 9 +const TERMINAL_BUFFER_LINES = 7 + +type Output = z.input> + +export function AgentPromptDisplay({ + prompt, + dim: _dim = false, +}: { + prompt: string + theme?: ThemeName // deprecated, kept for compatibility - Markdown uses useTheme internally + dim?: boolean // deprecated, kept for compatibility - dimColor cannot be applied to Box (Markdown returns Box) +}): React.ReactNode { + return ( + + + Prompt: + + + {prompt} + + + ) +} + +export function AgentResponseDisplay({ + content, +}: { + content: { type: string; text: string }[] + theme?: ThemeName // deprecated, kept for compatibility - Markdown uses useTheme internally +}): React.ReactNode { + return ( + + + Response: + + {content.map((block: { type: string; text: string }, index: number) => ( + + {block.text} + + ))} + + ) +} + +type VerboseAgentTranscriptProps = { + progressMessages: ProgressMessage[] + tools: Tools + verbose: boolean +} + +function VerboseAgentTranscript({ + progressMessages, + tools, + verbose, +}: VerboseAgentTranscriptProps): React.ReactNode { + const { lookups: agentLookups, inProgressToolUseIDs } = buildSubagentLookups( + progressMessages + .filter((pm): pm is ProgressMessage => + hasProgressMessage(pm.data), + ) + .map(pm => pm.data), + ) + + // Filter out user tool_result messages that lack toolUseResult. + // Subagent progress messages don't carry the parsed tool output, + // so UserToolSuccessMessage returns null and MessageResponse renders + // a bare ⎿ with no content. + const filteredMessages = progressMessages.filter( + (pm): pm is ProgressMessage => { + if (!hasProgressMessage(pm.data)) { + return false + } + const msg = pm.data.message + if (msg.type === 'user' && msg.toolUseResult === undefined) { + return false + } + return true + }, + ) + + return ( + <> + {filteredMessages.map(progressMessage => ( + + + + ))} + + ) +} + +export function renderToolResultMessage( + data: Output, + progressMessagesForMessage: ProgressMessage[], + { + tools, + verbose, + theme, + isTranscriptMode = false, + }: { + tools: Tools + verbose: boolean + theme: ThemeName + isTranscriptMode?: boolean + }, +): React.ReactNode { + // Remote-launched agents (ant-only) use a private output type not in the + // public schema. Narrow via the internal discriminant. + const internal = data as Output | RemoteLaunchedOutput + if (internal.status === 'remote_launched') { + return ( + + + + Remote agent launched{' '} + + · {internal.taskId} · {internal.sessionUrl} + + + + + ) + } + if (data.status === 'async_launched') { + const { prompt } = data + return ( + + + + Backgrounded agent + {!isTranscriptMode && ( + + {' ('} + + + {prompt && ( + + )} + + {')'} + + )} + + + {isTranscriptMode && prompt && ( + + + + )} + + ) + } + + if (data.status !== 'completed') { + return null + } + + const { + agentId, + totalDurationMs, + totalToolUseCount, + totalTokens, + usage, + content, + prompt, + } = data + const result = [ + totalToolUseCount === 1 ? '1 tool use' : `${totalToolUseCount} tool uses`, + formatNumber(totalTokens) + ' tokens', + formatDuration(totalDurationMs), + ] + + const completionMessage = `Done (${result.join(' · ')})` + + const finalAssistantMessage = createAssistantMessage({ + content: completionMessage, + usage: { ...usage, inference_geo: null, iterations: null, speed: null } as unknown as BetaUsage, + }) + + return ( + + {process.env.USER_TYPE === 'ant' && ( + + + [ANT-ONLY] API calls: {getDisplayPath(getDumpPromptsPath(agentId))} + + + )} + {isTranscriptMode && prompt && ( + + + + )} + {isTranscriptMode ? ( + + + + ) : null} + {isTranscriptMode && content && content.length > 0 && ( + + + + )} + + + + {!isTranscriptMode && ( + + {' '} + + + )} + + ) +} + +export function renderToolUseMessage({ + description, + prompt, +}: Partial<{ + description: string + prompt: string +}>): React.ReactNode { + if (!description || !prompt) { + return null + } + return description +} + +export function renderToolUseTag( + input: Partial<{ + description: string + prompt: string + subagent_type: string + model?: ModelAlias + }>, +): React.ReactNode { + const tags: React.ReactNode[] = [] + + if (input.model) { + const mainModel = getMainLoopModel() + const agentModel = parseUserSpecifiedModel(input.model) + if (agentModel !== mainModel) { + tags.push( + + {renderModelName(agentModel)} + , + ) + } + } + + if (tags.length === 0) { + return null + } + + return <>{tags} +} + +const INITIALIZING_TEXT = 'Initializing…' + +export function renderToolUseProgressMessage( + progressMessages: ProgressMessage[], + { + tools, + verbose, + terminalSize, + inProgressToolCallCount, + isTranscriptMode = false, + }: { + tools: Tools + verbose: boolean + terminalSize?: { columns: number; rows: number } + inProgressToolCallCount?: number + isTranscriptMode?: boolean + }, +): React.ReactNode { + if (!progressMessages.length) { + return ( + + {INITIALIZING_TEXT} + + ) + } + + // Checks to see if we should show a super condensed progress message summary. + // This prevents flickers when the terminal size is too small to render all the dynamic content + const toolToolRenderLinesEstimate = + (inProgressToolCallCount ?? 1) * ESTIMATED_LINES_PER_TOOL + + TERMINAL_BUFFER_LINES + const shouldUseCondensedMode = + !isTranscriptMode && + terminalSize && + terminalSize.rows && + terminalSize.rows < toolToolRenderLinesEstimate + + const getProgressStats = () => { + const toolUseCount = count(progressMessages, msg => { + if (!hasProgressMessage(msg.data)) { + return false + } + const message = msg.data.message + return message.message.content.some( + (content: BetaContentBlock) => content.type === 'tool_use', + ) + }) + + const latestAssistant = progressMessages.findLast( + (msg): msg is ProgressMessage => + hasProgressMessage(msg.data) && msg.data.message.type === 'assistant', + ) + + let tokens = null + if (latestAssistant?.data.message.type === 'assistant') { + const usage = latestAssistant.data.message.message.usage + tokens = + (usage.cache_creation_input_tokens ?? 0) + + (usage.cache_read_input_tokens ?? 0) + + usage.input_tokens + + usage.output_tokens + } + + return { toolUseCount, tokens } + } + + if (shouldUseCondensedMode) { + const { toolUseCount, tokens } = getProgressStats() + + return ( + + + In progress… · {toolUseCount} tool{' '} + {toolUseCount === 1 ? 'use' : 'uses'} + {tokens && ` · ${formatNumber(tokens)} tokens`} ·{' '} + + + + ) + } + + // Process messages to group consecutive search/read operations into summaries (ants only) + // isAgentRunning=true since this is the progress view while the agent is still running + const processedMessages = processProgressMessages( + progressMessages, + tools, + true, + ) + + // For display, take the last few processed messages + const displayedMessages = isTranscriptMode + ? processedMessages + : processedMessages.slice(-MAX_PROGRESS_MESSAGES_TO_SHOW) + + // Count hidden tool uses specifically (not all messages) to match the + // final "Done (N tool uses)" count. Each tool use generates multiple + // progress messages (tool_use + tool_result + text), so counting all + // hidden messages inflates the number shown to the user. + const hiddenMessages = isTranscriptMode + ? [] + : processedMessages.slice( + 0, + Math.max(0, processedMessages.length - MAX_PROGRESS_MESSAGES_TO_SHOW), + ) + const hiddenToolUseCount = count(hiddenMessages, m => { + if (m.type === 'summary') { + return m.searchCount + m.readCount + m.replCount > 0 + } + const data = m.message.data + if (!hasProgressMessage(data)) { + return false + } + return data.message.message.content.some( + (content: BetaContentBlock) => content.type === 'tool_use', + ) + }) + + const firstData = progressMessages[0]?.data + const prompt = + firstData && hasProgressMessage(firstData) ? firstData.prompt : undefined + + // After grouping, displayedMessages can be empty when the only progress so + // far is an assistant tool_use for a search/read op (grouped but not yet + // counted, since counts increment on tool_result). Fall back to the + // initializing text so MessageResponse doesn't render a bare ⎿. + if (displayedMessages.length === 0 && !(isTranscriptMode && prompt)) { + return ( + + {INITIALIZING_TEXT} + + ) + } + + const { + lookups: subagentLookups, + inProgressToolUseIDs: collapsedInProgressIDs, + } = buildSubagentLookups( + progressMessages + .filter((pm): pm is ProgressMessage => + hasProgressMessage(pm.data), + ) + .map(pm => pm.data), + ) + + return ( + + + + {isTranscriptMode && prompt && ( + + + + )} + {displayedMessages.map(processed => { + if (processed.type === 'summary') { + // Render summary for grouped search/read/REPL operations using shared formatting + const summaryText = getSearchReadSummaryText( + processed.searchCount, + processed.readCount, + processed.isActive, + processed.replCount, + ) + return ( + + {summaryText} + + ) + } + // Render original message without height=1 wrapper so null + // content (tool not found, renderToolUseMessage returns null) + // doesn't leave a blank line. Tool call headers are single-line + // anyway so truncation isn't needed. + return ( + + ) + })} + + {hiddenToolUseCount > 0 && ( + + +{hiddenToolUseCount} more tool{' '} + {hiddenToolUseCount === 1 ? 'use' : 'uses'} + + )} + + + ) +} + +export function renderToolUseRejectedMessage( + _input: { description: string; prompt: string; subagent_type: string }, + { + progressMessagesForMessage, + tools, + verbose, + isTranscriptMode, + }: { + columns: number + messages: Message[] + style?: 'condensed' + theme: ThemeName + progressMessagesForMessage: ProgressMessage[] + tools: Tools + verbose: boolean + isTranscriptMode?: boolean + }, +): React.ReactNode { + // Get agentId from progress messages if available (agent was running before rejection) + const firstData = progressMessagesForMessage[0]?.data + const agentId = + firstData && hasProgressMessage(firstData) ? firstData.agentId : undefined + + return ( + <> + {process.env.USER_TYPE === 'ant' && agentId && ( + + + [ANT-ONLY] API calls: {getDisplayPath(getDumpPromptsPath(agentId))} + + + )} + {renderToolUseProgressMessage(progressMessagesForMessage, { + tools, + verbose, + isTranscriptMode, + })} + + + ) +} + +export function renderToolUseErrorMessage( + result: ToolResultBlockParam['content'], + { + progressMessagesForMessage, + tools, + verbose, + isTranscriptMode, + }: { + progressMessagesForMessage: ProgressMessage[] + tools: Tools + verbose: boolean + isTranscriptMode?: boolean + }, +): React.ReactNode { + return ( + <> + {renderToolUseProgressMessage(progressMessagesForMessage, { + tools, + verbose, + isTranscriptMode, + })} + + + ) +} + +function calculateAgentStats(progressMessages: ProgressMessage[]): { + toolUseCount: number + tokens: number | null +} { + const toolUseCount = count(progressMessages, msg => { + if (!hasProgressMessage(msg.data)) { + return false + } + const message = msg.data.message + return ( + message.type === 'user' && + message.message.content.some((content: BetaContentBlock) => content.type === 'tool_result') + ) + }) + + const latestAssistant = progressMessages.findLast( + (msg): msg is ProgressMessage => + hasProgressMessage(msg.data) && msg.data.message.type === 'assistant', + ) + + let tokens = null + if (latestAssistant?.data.message.type === 'assistant') { + const usage = latestAssistant.data.message.message.usage + tokens = + (usage.cache_creation_input_tokens ?? 0) + + (usage.cache_read_input_tokens ?? 0) + + usage.input_tokens + + usage.output_tokens + } + + return { toolUseCount, tokens } +} + +export function renderGroupedAgentToolUse( + toolUses: Array<{ + param: ToolUseBlockParam + isResolved: boolean + isError: boolean + isInProgress: boolean + progressMessages: ProgressMessage[] + result?: { + param: ToolResultBlockParam + output: Output + } + }>, + options: { + shouldAnimate: boolean + tools: Tools + }, +): React.ReactNode | null { + const { shouldAnimate, tools } = options + + // Calculate stats for each agent + const agentStats = toolUses.map( + ({ param, isResolved, isError, progressMessages, result }) => { + const stats = calculateAgentStats(progressMessages) + const lastToolInfo = extractLastToolInfo(progressMessages, tools) + const parsedInput = inputSchema().safeParse(param.input) + + // teammate_spawned is not part of the exported Output type (cast through unknown + // for dead code elimination), so check via string comparison on the raw value + const isTeammateSpawn = + (result?.output?.status as string) === 'teammate_spawned' + + // For teammate spawns, show @name with type in parens and description as status + let agentType: string + let description: string | undefined + let color: keyof Theme | undefined + let descriptionColor: keyof Theme | undefined + let taskDescription: string | undefined + if (isTeammateSpawn && parsedInput.success && parsedInput.data.name) { + agentType = `@${parsedInput.data.name}` + const subagentType = parsedInput.data.subagent_type + description = isCustomSubagentType(subagentType) + ? subagentType + : undefined + taskDescription = parsedInput.data.description + // Use the custom agent definition's color on the type, not the name + descriptionColor = isCustomSubagentType(subagentType) + ? getAgentColor(subagentType) + : undefined + } else { + agentType = parsedInput.success + ? userFacingName(parsedInput.data) + : 'Agent' + description = parsedInput.success + ? parsedInput.data.description + : undefined + color = parsedInput.success + ? userFacingNameBackgroundColor(parsedInput.data) + : undefined + taskDescription = undefined + } + + // Check if this was launched as a background agent OR backgrounded mid-execution + const launchedAsAsync = + parsedInput.success && + 'run_in_background' in parsedInput.data && + parsedInput.data.run_in_background === true + const outputStatus = (result?.output as { status?: string } | undefined) + ?.status + const backgroundedMidExecution = + outputStatus === 'async_launched' || outputStatus === 'remote_launched' + const isAsync = + launchedAsAsync || backgroundedMidExecution || isTeammateSpawn + + const name = parsedInput.success ? parsedInput.data.name : undefined + + return { + id: param.id, + agentType, + description, + toolUseCount: stats.toolUseCount, + tokens: stats.tokens, + isResolved, + isError, + isAsync, + color, + descriptionColor, + lastToolInfo, + taskDescription, + name, + } + }, + ) + + const anyUnresolved = toolUses.some(t => !t.isResolved) + const anyError = toolUses.some(t => t.isError) + const allComplete = !anyUnresolved + + // Check if all agents are the same type + const allSameType = + agentStats.length > 0 && + agentStats.every(stat => stat.agentType === agentStats[0]?.agentType) + const commonType = + allSameType && agentStats[0]?.agentType !== 'Agent' + ? agentStats[0]?.agentType + : null + + // Check if all resolved agents are async (background) + const allAsync = agentStats.every(stat => stat.isAsync) + + return ( + + + + + {allComplete ? ( + allAsync ? ( + <> + {toolUses.length} background agents launched{' '} + + + + + ) : ( + <> + {toolUses.length}{' '} + {commonType ? `${commonType} agents` : 'agents'} finished + + ) + ) : ( + <> + Running {toolUses.length}{' '} + {commonType ? `${commonType} agents` : 'agents'}… + + )}{' '} + + {!allAsync && } + + {agentStats.map((stat, index) => ( + + ))} + + ) +} + +export function userFacingName( + input: + | Partial<{ + description: string + prompt: string + subagent_type: string + name: string + team_name: string + }> + | undefined, +): string { + if ( + input?.subagent_type && + input.subagent_type !== GENERAL_PURPOSE_AGENT.agentType + ) { + // Display "worker" agents as "Agent" for cleaner UI + if (input.subagent_type === 'worker') { + return 'Agent' + } + return input.subagent_type + } + return 'Agent' +} + +export function userFacingNameBackgroundColor( + input: + | Partial<{ description: string; prompt: string; subagent_type: string }> + | undefined, +): keyof Theme | undefined { + if (!input?.subagent_type) { + return undefined + } + + // Get the color for this agent + return getAgentColor(input.subagent_type) +} + +export function extractLastToolInfo( + progressMessages: ProgressMessage[], + tools: Tools, +): string | null { + // Build tool_use lookup from all progress messages (needed for reverse iteration) + const toolUseByID = new Map() + for (const pm of progressMessages) { + if (!hasProgressMessage(pm.data)) { + continue + } + if (pm.data.message.type === 'assistant') { + for (const c of pm.data.message.message.content) { + if (c.type === 'tool_use') { + toolUseByID.set(c.id, c as ToolUseBlockParam) + } + } + } + } + + // Count trailing consecutive search/read operations from the end + let searchCount = 0 + let readCount = 0 + for (let i = progressMessages.length - 1; i >= 0; i--) { + const msg = progressMessages[i]! + if (!hasProgressMessage(msg.data)) { + continue + } + const info = getSearchOrReadInfo(msg, tools, toolUseByID) + if (info && (info.isSearch || info.isRead)) { + // Only count tool_result messages to avoid double counting + if (msg.data.message.type === 'user') { + if (info.isSearch) { + searchCount++ + } else if (info.isRead) { + readCount++ + } + } + } else { + break + } + } + + if (searchCount + readCount >= 2) { + return getSearchReadSummaryText(searchCount, readCount, true) + } + + // Find the last tool_result message + const lastToolResult = progressMessages.findLast( + (msg): msg is ProgressMessage => { + if (!hasProgressMessage(msg.data)) { + return false + } + const message = msg.data.message + return ( + message.type === 'user' && + message.message.content.some((c: BetaContentBlock) => c.type === 'tool_result') + ) + }, + ) + + if (lastToolResult?.data.message.type === 'user') { + const toolResultBlock = lastToolResult.data.message.message.content.find( + (c: BetaContentBlock) => c.type === 'tool_result', + ) + + if (toolResultBlock?.type === 'tool_result') { + // Look up the corresponding tool_use — already indexed above + const toolUseBlock = toolUseByID.get(toolResultBlock.tool_use_id) + + if (toolUseBlock) { + const tool = findToolByName(tools, toolUseBlock.name) + if (!tool) { + return toolUseBlock.name // Fallback to raw name + } + + const input = toolUseBlock.input as Record + const parsedInput = tool.inputSchema.safeParse(input) + + // Get user-facing tool name + const userFacingToolName = tool.userFacingName( + parsedInput.success ? parsedInput.data : undefined, + ) + + // Try to get summary from the tool itself + if (tool.getToolUseSummary) { + const summary = tool.getToolUseSummary( + parsedInput.success ? parsedInput.data : undefined, + ) + if (summary) { + return `${userFacingToolName}: ${summary}` + } + } + + // Default: just show user-facing tool name + return userFacingToolName + } + } + } + + return null +} + +function isCustomSubagentType( + subagentType: string | undefined, +): subagentType is string { + return ( + !!subagentType && + subagentType !== GENERAL_PURPOSE_AGENT.agentType && + subagentType !== 'worker' + ) +} diff --git a/packages/builtin-tools/src/tools/AgentTool/__tests__/agentDisplay.test.ts b/packages/builtin-tools/src/tools/AgentTool/__tests__/agentDisplay.test.ts new file mode 100644 index 000000000..072b48c26 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/__tests__/agentDisplay.test.ts @@ -0,0 +1,136 @@ +import { mock, describe, expect, test } from "bun:test"; + +// Mock heavy deps +mock.module("src/utils/model/agent.js", () => ({ + getDefaultSubagentModel: () => undefined, +})); + +mock.module("src/utils/settings/constants.js", () => ({ + getSourceDisplayName: (source: string) => source, +})); + +const { + resolveAgentOverrides, + compareAgentsByName, + AGENT_SOURCE_GROUPS, +} = await import("../agentDisplay"); + +function makeAgent(agentType: string, source: string): any { + return { agentType, source, name: agentType }; +} + +describe("resolveAgentOverrides", () => { + test("marks no overrides when all agents active", () => { + const agents = [makeAgent("builder", "userSettings")]; + const result = resolveAgentOverrides(agents, agents); + expect(result).toHaveLength(1); + expect(result[0].overriddenBy).toBeUndefined(); + }); + + test("marks inactive agent as overridden", () => { + const allAgents = [ + makeAgent("builder", "projectSettings"), + makeAgent("builder", "userSettings"), + ]; + const activeAgents = [makeAgent("builder", "userSettings")]; + const result = resolveAgentOverrides(allAgents, activeAgents); + const projectAgent = result.find( + (a: any) => a.source === "projectSettings", + ); + expect(projectAgent?.overriddenBy).toBe("userSettings"); + }); + + test("overriddenBy shows the overriding agent source", () => { + const allAgents = [makeAgent("tester", "localSettings")]; + const activeAgents = [makeAgent("tester", "policySettings")]; + const result = resolveAgentOverrides(allAgents, activeAgents); + expect(result[0].overriddenBy).toBe("policySettings"); + }); + + test("deduplicates agents by (agentType, source)", () => { + const agents = [ + makeAgent("builder", "userSettings"), + makeAgent("builder", "userSettings"), // duplicate + ]; + const result = resolveAgentOverrides(agents, agents.slice(0, 1)); + expect(result).toHaveLength(1); + }); + + test("preserves agent definition properties", () => { + const agents = [{ agentType: "a", source: "userSettings", name: "Agent A" }] as any[]; + const result = resolveAgentOverrides(agents, agents); + expect((result[0] as any).name).toBe("Agent A"); + expect(result[0].agentType).toBe("a"); + }); + + test("handles empty arrays", () => { + expect(resolveAgentOverrides([], [])).toEqual([]); + }); + + test("handles agent from git worktree (duplicate detection)", () => { + const agents = [ + makeAgent("builder", "projectSettings"), + makeAgent("builder", "projectSettings"), + makeAgent("builder", "localSettings"), + ]; + const result = resolveAgentOverrides(agents, agents.slice(0, 1)); + // Deduped: projectSettings appears once, localSettings once + expect(result).toHaveLength(2); + }); +}); + +describe("compareAgentsByName", () => { + test("sorts alphabetically ascending", () => { + const a = makeAgent("alpha", "userSettings"); + const b = makeAgent("beta", "userSettings"); + expect(compareAgentsByName(a, b)).toBeLessThan(0); + }); + + test("returns negative when a.name < b.name", () => { + const a = makeAgent("a", "s"); + const b = makeAgent("b", "s"); + expect(compareAgentsByName(a, b)).toBeLessThan(0); + }); + + test("returns positive when a.name > b.name", () => { + const a = makeAgent("z", "s"); + const b = makeAgent("a", "s"); + expect(compareAgentsByName(a, b)).toBeGreaterThan(0); + }); + + test("returns 0 for same name", () => { + const a = makeAgent("same", "s"); + const b = makeAgent("same", "s"); + expect(compareAgentsByName(a, b)).toBe(0); + }); + + test("is case-insensitive (sensitivity: base)", () => { + const a = makeAgent("Alpha", "s"); + const b = makeAgent("alpha", "s"); + expect(compareAgentsByName(a, b)).toBe(0); + }); +}); + +describe("AGENT_SOURCE_GROUPS", () => { + test("contains expected source groups in order", () => { + expect(AGENT_SOURCE_GROUPS).toHaveLength(7); + expect(AGENT_SOURCE_GROUPS[0]).toEqual({ + label: "User agents", + source: "userSettings", + }); + expect(AGENT_SOURCE_GROUPS[6]).toEqual({ + label: "Built-in agents", + source: "built-in", + }); + }); + + test("has unique labels", () => { + const labels = AGENT_SOURCE_GROUPS.map((g) => g.label); + expect(new Set(labels).size).toBe(labels.length); + }); + + test("has unique sources", () => { + const sources = AGENT_SOURCE_GROUPS.map((g) => g.source); + expect(new Set(sources).size).toBe(sources.length); + }); +}); diff --git a/packages/builtin-tools/src/tools/AgentTool/__tests__/agentToolUtils.test.ts b/packages/builtin-tools/src/tools/AgentTool/__tests__/agentToolUtils.test.ts new file mode 100644 index 000000000..460bff67f --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/__tests__/agentToolUtils.test.ts @@ -0,0 +1,253 @@ +import { mock, describe, expect, test } from "bun:test"; + +// ─── Mocks for agentToolUtils.ts dependencies ─── +// Only mock modules that are truly unavailable or cause side effects. +// Do NOT mock common/shared modules (zod/v4, bootstrap/state, etc.) to avoid +// corrupting the module cache for other test files in the same Bun process. + +const noop = () => {}; + +mock.module("bun:bundle", () => ({ feature: () => false })); + +mock.module("src/constants/tools.js", () => ({ + ALL_AGENT_DISALLOWED_TOOLS: new Set(), + ASYNC_AGENT_ALLOWED_TOOLS: new Set(), + CUSTOM_AGENT_DISALLOWED_TOOLS: new Set(), + IN_PROCESS_TEAMMATE_ALLOWED_TOOLS: new Set(), +})); + +mock.module("src/services/AgentSummary/agentSummary.js", () => ({ + startAgentSummarization: noop, +})); + +mock.module("src/services/analytics/index.js", () => ({ + logEvent: noop, + logEventAsync: async () => {}, + stripProtoFields: (v: any) => v, + attachAnalyticsSink: noop, + _resetForTesting: noop, + AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS: undefined, +})); + +mock.module("src/services/api/dumpPrompts.js", () => ({ + clearDumpState: noop, +})); + +mock.module("src/Tool.js", () => ({ + toolMatchesName: () => false, + findToolByName: noop, +})); + +// messages.ts is complex - provide stubs for all named exports +mock.module("src/utils/messages.ts", () => ({ + extractTextContent: (content: any[]) => + content?.filter?.((b: any) => b.type === "text")?.map?.((b: any) => b.text)?.join("") ?? "", + getLastAssistantMessage: () => null, + SYNTHETIC_MESSAGES: new Set(), + INTERRUPT_MESSAGE: "", + INTERRUPT_MESSAGE_FOR_TOOL_USE: "", + CANCEL_MESSAGE: "", + REJECT_MESSAGE: "", + REJECT_MESSAGE_WITH_REASON_PREFIX: "", + SUBAGENT_REJECT_MESSAGE: "", + SUBAGENT_REJECT_MESSAGE_WITH_REASON_PREFIX: "", + PLAN_REJECTION_PREFIX: "", + DENIAL_WORKAROUND_GUIDANCE: "", + NO_RESPONSE_REQUESTED: "", + SYNTHETIC_TOOL_RESULT_PLACEHOLDER: "", + SYNTHETIC_MODEL: "", + AUTO_REJECT_MESSAGE: noop, + DONT_ASK_REJECT_MESSAGE: noop, + withMemoryCorrectionHint: (s: string) => s, + deriveShortMessageId: () => "", + isClassifierDenial: () => false, + buildYoloRejectionMessage: () => "", + buildClassifierUnavailableMessage: () => "", + isEmptyMessageText: () => true, + createAssistantMessage: noop, + createAssistantAPIErrorMessage: noop, + createUserMessage: noop, + prepareUserContent: noop, + createUserInterruptionMessage: noop, + createSyntheticUserCaveatMessage: noop, + formatCommandInputTags: noop, +})); + +mock.module("src/tasks/LocalAgentTask/LocalAgentTask.js", () => ({ + completeAgentTask: noop, + createActivityDescriptionResolver: () => ({}), + createProgressTracker: () => ({}), + enqueueAgentNotification: noop, + failAgentTask: noop, + getProgressUpdate: () => ({ tokenCount: 0, toolUseCount: 0 }), + getTokenCountFromTracker: () => 0, + isLocalAgentTask: () => false, + killAsyncAgent: noop, + updateAgentProgress: noop, + updateProgressFromMessage: noop, +})); + +mock.module("src/utils/debug.js", () => ({ + getMinDebugLogLevel: () => "warn", + isDebugMode: () => false, + enableDebugLogging: () => false, + getDebugFilter: () => null, + isDebugToStdErr: () => false, + getDebugFilePath: () => null, + setHasFormattedOutput: noop, + getHasFormattedOutput: () => false, + flushDebugLogs: async () => {}, + logForDebugging: noop, + getDebugLogPath: () => "", + logAntError: noop, +})); + +mock.module("src/utils/errors.js", () => ({ + ClaudeError: class extends Error {}, + MalformedCommandError: class extends Error {}, + AbortError: class extends Error {}, + ConfigParseError: class extends Error {}, + ShellError: class extends Error {}, + TeleportOperationError: class extends Error {}, + TelemetrySafeError_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS: class extends Error {}, + isAbortError: () => false, + hasExactErrorMessage: () => false, + toError: (e: any) => e instanceof Error ? e : new Error(String(e)), + errorMessage: (e: any) => String(e), + getErrnoCode: () => undefined, + isENOENT: () => false, + getErrnoPath: () => undefined, + shortErrorStack: () => "", + isFsInaccessible: () => false, + classifyAxiosError: () => ({ category: "unknown" }), +})); + +mock.module("src/utils/forkedAgent.js", () => ({})); + +mock.module("src/utils/permissions/yoloClassifier.js", () => ({ + buildTranscriptForClassifier: () => "", + classifyYoloAction: () => null, +})); + +mock.module("src/utils/task/sdkProgress.js", () => ({ + emitTaskProgress: noop, +})); + +mock.module("src/utils/tokens.js", () => ({ + getTokenCountFromUsage: () => 0, +})); + +mock.module("src/tools/ExitPlanModeTool/constants.js", () => ({ + EXIT_PLAN_MODE_V2_TOOL_NAME: "exit_plan_mode", +})); + +mock.module("src/tools/AgentTool/constants.js", () => ({ + AGENT_TOOL_NAME: "agent", + LEGACY_AGENT_TOOL_NAME: "task", +})); + +mock.module("src/tools/AgentTool/loadAgentsDir.js", () => ({})); + +mock.module("src/state/AppState.js", () => ({})); + +mock.module("src/types/ids.js", () => ({ + asAgentId: (id: string) => id, +})); + +// Break circular dep +mock.module("src/tools/AgentTool/AgentTool.tsx", () => ({ + AgentTool: {}, + inputSchema: {}, + outputSchema: {}, + default: {}, +})); + +const { + countToolUses, + getLastToolUseName, +} = await import("../agentToolUtils"); + +function makeAssistantMessage(content: any[]): any { + return { type: "assistant", message: { content } }; +} + +function makeUserMessage(text: string): any { + return { type: "user", message: { content: text } }; +} + +describe("countToolUses", () => { + test("counts tool_use blocks in messages", () => { + const messages = [ + makeAssistantMessage([ + { type: "tool_use", name: "Read" }, + { type: "text", text: "hello" }, + ]), + ]; + expect(countToolUses(messages)).toBe(1); + }); + + test("returns 0 for messages without tool_use", () => { + const messages = [ + makeAssistantMessage([{ type: "text", text: "hello" }]), + ]; + expect(countToolUses(messages)).toBe(0); + }); + + test("returns 0 for empty array", () => { + expect(countToolUses([])).toBe(0); + }); + + test("counts multiple tool_use blocks across messages", () => { + const messages = [ + makeAssistantMessage([{ type: "tool_use", name: "Read" }]), + makeUserMessage("ok"), + makeAssistantMessage([{ type: "tool_use", name: "Write" }]), + ]; + expect(countToolUses(messages)).toBe(2); + }); + + test("counts tool_use in single message with multiple blocks", () => { + const messages = [ + makeAssistantMessage([ + { type: "tool_use", name: "Read" }, + { type: "tool_use", name: "Grep" }, + { type: "tool_use", name: "Write" }, + ]), + ]; + expect(countToolUses(messages)).toBe(3); + }); +}); + +describe("getLastToolUseName", () => { + test("returns last tool name from assistant message", () => { + const msg = makeAssistantMessage([ + { type: "tool_use", name: "Read" }, + { type: "tool_use", name: "Write" }, + ]); + expect(getLastToolUseName(msg)).toBe("Write"); + }); + + test("returns undefined for message without tool_use", () => { + const msg = makeAssistantMessage([{ type: "text", text: "hello" }]); + expect(getLastToolUseName(msg)).toBeUndefined(); + }); + + test("returns the last tool when multiple tool_uses present", () => { + const msg = makeAssistantMessage([ + { type: "tool_use", name: "Read" }, + { type: "tool_use", name: "Grep" }, + { type: "tool_use", name: "Edit" }, + ]); + expect(getLastToolUseName(msg)).toBe("Edit"); + }); + + test("returns undefined for non-assistant message", () => { + const msg = makeUserMessage("hello"); + expect(getLastToolUseName(msg)).toBeUndefined(); + }); + + test("handles message with null content", () => { + const msg = { type: "assistant", message: { content: null } } as any; + expect(getLastToolUseName(msg)).toBeUndefined(); + }); +}); diff --git a/packages/builtin-tools/src/tools/AgentTool/agentColorManager.ts b/packages/builtin-tools/src/tools/AgentTool/agentColorManager.ts new file mode 100644 index 000000000..ba6da1ddc --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/agentColorManager.ts @@ -0,0 +1,66 @@ +import { getAgentColorMap } from 'src/bootstrap/state.js' +import type { Theme } from 'src/utils/theme.js' + +export type AgentColorName = + | 'red' + | 'blue' + | 'green' + | 'yellow' + | 'purple' + | 'orange' + | 'pink' + | 'cyan' + +export const AGENT_COLORS: readonly AgentColorName[] = [ + 'red', + 'blue', + 'green', + 'yellow', + 'purple', + 'orange', + 'pink', + 'cyan', +] as const + +export const AGENT_COLOR_TO_THEME_COLOR = { + red: 'red_FOR_SUBAGENTS_ONLY', + blue: 'blue_FOR_SUBAGENTS_ONLY', + green: 'green_FOR_SUBAGENTS_ONLY', + yellow: 'yellow_FOR_SUBAGENTS_ONLY', + purple: 'purple_FOR_SUBAGENTS_ONLY', + orange: 'orange_FOR_SUBAGENTS_ONLY', + pink: 'pink_FOR_SUBAGENTS_ONLY', + cyan: 'cyan_FOR_SUBAGENTS_ONLY', +} as const satisfies Record + +export function getAgentColor(agentType: string): keyof Theme | undefined { + if (agentType === 'general-purpose') { + return undefined + } + + const agentColorMap = getAgentColorMap() + + // Check if color already assigned + const existingColor = agentColorMap.get(agentType) + if (existingColor && AGENT_COLORS.includes(existingColor)) { + return AGENT_COLOR_TO_THEME_COLOR[existingColor] + } + + return undefined +} + +export function setAgentColor( + agentType: string, + color: AgentColorName | undefined, +): void { + const agentColorMap = getAgentColorMap() + + if (!color) { + agentColorMap.delete(agentType) + return + } + + if (AGENT_COLORS.includes(color)) { + agentColorMap.set(agentType, color) + } +} diff --git a/packages/builtin-tools/src/tools/AgentTool/agentDisplay.ts b/packages/builtin-tools/src/tools/AgentTool/agentDisplay.ts new file mode 100644 index 000000000..1da7b7a4f --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/agentDisplay.ts @@ -0,0 +1,104 @@ +/** + * Shared utilities for displaying agent information. + * Used by both the CLI `claude agents` handler and the interactive `/agents` command. + */ + +import { getDefaultSubagentModel } from 'src/utils/model/agent.js' +import { + getSourceDisplayName, + type SettingSource, +} from 'src/utils/settings/constants.js' +import type { AgentDefinition } from './loadAgentsDir.js' + +type AgentSource = SettingSource | 'built-in' | 'plugin' + +export type AgentSourceGroup = { + label: string + source: AgentSource +} + +/** + * Ordered list of agent source groups for display. + * Both the CLI and interactive UI should use this to ensure consistent ordering. + */ +export const AGENT_SOURCE_GROUPS: AgentSourceGroup[] = [ + { label: 'User agents', source: 'userSettings' }, + { label: 'Project agents', source: 'projectSettings' }, + { label: 'Local agents', source: 'localSettings' }, + { label: 'Managed agents', source: 'policySettings' }, + { label: 'Plugin agents', source: 'plugin' }, + { label: 'CLI arg agents', source: 'flagSettings' }, + { label: 'Built-in agents', source: 'built-in' }, +] + +export type ResolvedAgent = AgentDefinition & { + overriddenBy?: AgentSource +} + +/** + * Annotate agents with override information by comparing against the active + * (winning) agent list. An agent is "overridden" when another agent with the + * same type from a higher-priority source takes precedence. + * + * Also deduplicates by (agentType, source) to handle git worktree duplicates + * where the same agent file is loaded from both the worktree and main repo. + */ +export function resolveAgentOverrides( + allAgents: AgentDefinition[], + activeAgents: AgentDefinition[], +): ResolvedAgent[] { + const activeMap = new Map() + for (const agent of activeAgents) { + activeMap.set(agent.agentType, agent) + } + + const seen = new Set() + const resolved: ResolvedAgent[] = [] + + // Iterate allAgents, annotating each with override info from activeAgents. + // Deduplicate by (agentType, source) to handle git worktree duplicates. + for (const agent of allAgents) { + const key = `${agent.agentType}:${agent.source}` + if (seen.has(key)) continue + seen.add(key) + + const active = activeMap.get(agent.agentType) + const overriddenBy = + active && active.source !== agent.source ? active.source : undefined + resolved.push({ ...agent, overriddenBy }) + } + + return resolved +} + +/** + * Resolve the display model string for an agent. + * Returns the model alias or 'inherit' for display purposes. + */ +export function resolveAgentModelDisplay( + agent: AgentDefinition, +): string | undefined { + const model = agent.model || getDefaultSubagentModel() + if (!model) return undefined + return model === 'inherit' ? 'inherit' : model +} + +/** + * Get a human-readable label for the source that overrides an agent. + * Returns lowercase, e.g. "user", "project", "managed". + */ +export function getOverrideSourceLabel(source: AgentSource): string { + return getSourceDisplayName(source).toLowerCase() +} + +/** + * Compare agents alphabetically by name (case-insensitive). + */ +export function compareAgentsByName( + a: AgentDefinition, + b: AgentDefinition, +): number { + return a.agentType.localeCompare(b.agentType, undefined, { + sensitivity: 'base', + }) +} diff --git a/packages/builtin-tools/src/tools/AgentTool/agentMemory.ts b/packages/builtin-tools/src/tools/AgentTool/agentMemory.ts new file mode 100644 index 000000000..d1b16f1e2 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/agentMemory.ts @@ -0,0 +1,177 @@ +import { join, normalize, sep } from 'path' +import { getProjectRoot } from 'src/bootstrap/state.js' +import { + buildMemoryPrompt, + ensureMemoryDirExists, +} from 'src/memdir/memdir.js' +import { getMemoryBaseDir } from 'src/memdir/paths.js' +import { getCwd } from 'src/utils/cwd.js' +import { findCanonicalGitRoot } from 'src/utils/git.js' +import { sanitizePath } from 'src/utils/path.js' + +// Persistent agent memory scope: 'user' (~/.claude/agent-memory/), 'project' (.claude/agent-memory/), or 'local' (.claude/agent-memory-local/) +export type AgentMemoryScope = 'user' | 'project' | 'local' + +/** + * Sanitize an agent type name for use as a directory name. + * Replaces colons (invalid on Windows, used in plugin-namespaced agent + * types like "my-plugin:my-agent") with dashes. + */ +function sanitizeAgentTypeForPath(agentType: string): string { + return agentType.replace(/:/g, '-') +} + +/** + * Returns the local agent memory directory, which is project-specific and not checked into VCS. + * When CLAUDE_CODE_REMOTE_MEMORY_DIR is set, persists to the mount with project namespacing. + * Otherwise, uses /.claude/agent-memory-local//. + */ +function getLocalAgentMemoryDir(dirName: string): string { + if (process.env.CLAUDE_CODE_REMOTE_MEMORY_DIR) { + return ( + join( + process.env.CLAUDE_CODE_REMOTE_MEMORY_DIR, + 'projects', + sanitizePath( + findCanonicalGitRoot(getProjectRoot()) ?? getProjectRoot(), + ), + 'agent-memory-local', + dirName, + ) + sep + ) + } + return join(getCwd(), '.claude', 'agent-memory-local', dirName) + sep +} + +/** + * Returns the agent memory directory for a given agent type and scope. + * - 'user' scope: /agent-memory// + * - 'project' scope: /.claude/agent-memory// + * - 'local' scope: see getLocalAgentMemoryDir() + */ +export function getAgentMemoryDir( + agentType: string, + scope: AgentMemoryScope, +): string { + const dirName = sanitizeAgentTypeForPath(agentType) + switch (scope) { + case 'project': + return join(getCwd(), '.claude', 'agent-memory', dirName) + sep + case 'local': + return getLocalAgentMemoryDir(dirName) + case 'user': + return join(getMemoryBaseDir(), 'agent-memory', dirName) + sep + } +} + +// Check if file is within an agent memory directory (any scope). +export function isAgentMemoryPath(absolutePath: string): boolean { + // SECURITY: Normalize to prevent path traversal bypasses via .. segments + const normalizedPath = normalize(absolutePath) + const memoryBase = getMemoryBaseDir() + + // User scope: check memory base (may be custom dir or config home) + if (normalizedPath.startsWith(join(memoryBase, 'agent-memory') + sep)) { + return true + } + + // Project scope: always cwd-based (not redirected) + if ( + normalizedPath.startsWith(join(getCwd(), '.claude', 'agent-memory') + sep) + ) { + return true + } + + // Local scope: persisted to mount when CLAUDE_CODE_REMOTE_MEMORY_DIR is set, otherwise cwd-based + if (process.env.CLAUDE_CODE_REMOTE_MEMORY_DIR) { + if ( + normalizedPath.includes(sep + 'agent-memory-local' + sep) && + normalizedPath.startsWith( + join(process.env.CLAUDE_CODE_REMOTE_MEMORY_DIR, 'projects') + sep, + ) + ) { + return true + } + } else if ( + normalizedPath.startsWith( + join(getCwd(), '.claude', 'agent-memory-local') + sep, + ) + ) { + return true + } + + return false +} + +/** + * Returns the agent memory file path for a given agent type and scope. + */ +export function getAgentMemoryEntrypoint( + agentType: string, + scope: AgentMemoryScope, +): string { + return join(getAgentMemoryDir(agentType, scope), 'MEMORY.md') +} + +export function getMemoryScopeDisplay( + memory: AgentMemoryScope | undefined, +): string { + switch (memory) { + case 'user': + return `User (${join(getMemoryBaseDir(), 'agent-memory')}/)` + case 'project': + return 'Project (.claude/agent-memory/)' + case 'local': + return `Local (${getLocalAgentMemoryDir('...')})` + default: + return 'None' + } +} + +/** + * Load persistent memory for an agent with memory enabled. + * Creates the memory directory if needed and returns a prompt with memory contents. + * + * @param agentType The agent's type name (used as directory name) + * @param scope 'user' for ~/.claude/agent-memory/ or 'project' for .claude/agent-memory/ + */ +export function loadAgentMemoryPrompt( + agentType: string, + scope: AgentMemoryScope, +): string { + let scopeNote: string + switch (scope) { + case 'user': + scopeNote = + '- Since this memory is user-scope, keep learnings general since they apply across all projects' + break + case 'project': + scopeNote = + '- Since this memory is project-scope and shared with your team via version control, tailor your memories to this project' + break + case 'local': + scopeNote = + '- Since this memory is local-scope (not checked into version control), tailor your memories to this project and machine' + break + } + + const memoryDir = getAgentMemoryDir(agentType, scope) + + // Fire-and-forget: this runs at agent-spawn time inside a sync + // getSystemPrompt() callback (called from React render in AgentDetail.tsx, + // so it cannot be async). The spawned agent won't try to Write until after + // a full API round-trip, by which time mkdir will have completed. Even if + // it hasn't, FileWriteTool does its own mkdir of the parent directory. + void ensureMemoryDirExists(memoryDir) + + const coworkExtraGuidelines = + process.env.CLAUDE_COWORK_MEMORY_EXTRA_GUIDELINES + return buildMemoryPrompt({ + displayName: 'Persistent Agent Memory', + memoryDir, + extraGuidelines: + coworkExtraGuidelines && coworkExtraGuidelines.trim().length > 0 + ? [scopeNote, coworkExtraGuidelines] + : [scopeNote], + }) +} diff --git a/packages/builtin-tools/src/tools/AgentTool/agentMemorySnapshot.ts b/packages/builtin-tools/src/tools/AgentTool/agentMemorySnapshot.ts new file mode 100644 index 000000000..b08fea5cf --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/agentMemorySnapshot.ts @@ -0,0 +1,197 @@ +import { mkdir, readdir, readFile, unlink, writeFile } from 'fs/promises' +import { join } from 'path' +import { z } from 'zod/v4' +import { getCwd } from 'src/utils/cwd.js' +import { logForDebugging } from 'src/utils/debug.js' +import { lazySchema } from 'src/utils/lazySchema.js' +import { jsonParse, jsonStringify } from 'src/utils/slowOperations.js' +import { type AgentMemoryScope, getAgentMemoryDir } from './agentMemory.js' + +const SNAPSHOT_BASE = 'agent-memory-snapshots' +const SNAPSHOT_JSON = 'snapshot.json' +const SYNCED_JSON = '.snapshot-synced.json' + +const snapshotMetaSchema = lazySchema(() => + z.object({ + updatedAt: z.string().min(1), + }), +) + +const syncedMetaSchema = lazySchema(() => + z.object({ + syncedFrom: z.string().min(1), + }), +) +type SyncedMeta = z.infer> + +/** + * Returns the path to the snapshot directory for an agent in the current project. + * e.g., /.claude/agent-memory-snapshots// + */ +export function getSnapshotDirForAgent(agentType: string): string { + return join(getCwd(), '.claude', SNAPSHOT_BASE, agentType) +} + +function getSnapshotJsonPath(agentType: string): string { + return join(getSnapshotDirForAgent(agentType), SNAPSHOT_JSON) +} + +function getSyncedJsonPath(agentType: string, scope: AgentMemoryScope): string { + return join(getAgentMemoryDir(agentType, scope), SYNCED_JSON) +} + +async function readJsonFile( + path: string, + schema: z.ZodType, +): Promise { + try { + const content = await readFile(path, { encoding: 'utf-8' }) + const result = schema.safeParse(jsonParse(content)) + return result.success ? result.data : null + } catch { + return null + } +} + +async function copySnapshotToLocal( + agentType: string, + scope: AgentMemoryScope, +): Promise { + const snapshotMemDir = getSnapshotDirForAgent(agentType) + const localMemDir = getAgentMemoryDir(agentType, scope) + + await mkdir(localMemDir, { recursive: true }) + + try { + const files = await readdir(snapshotMemDir, { withFileTypes: true }) + for (const dirent of files) { + if (!dirent.isFile() || dirent.name === SNAPSHOT_JSON) continue + const content = await readFile(join(snapshotMemDir, dirent.name), { + encoding: 'utf-8', + }) + await writeFile(join(localMemDir, dirent.name), content) + } + } catch (e) { + logForDebugging(`Failed to copy snapshot to local agent memory: ${e}`) + } +} + +async function saveSyncedMeta( + agentType: string, + scope: AgentMemoryScope, + snapshotTimestamp: string, +): Promise { + const syncedPath = getSyncedJsonPath(agentType, scope) + const localMemDir = getAgentMemoryDir(agentType, scope) + await mkdir(localMemDir, { recursive: true }) + const meta: SyncedMeta = { syncedFrom: snapshotTimestamp } + try { + await writeFile(syncedPath, jsonStringify(meta)) + } catch (e) { + logForDebugging(`Failed to save snapshot sync metadata: ${e}`) + } +} + +/** + * Check if a snapshot exists and whether it's newer than what we last synced. + */ +export async function checkAgentMemorySnapshot( + agentType: string, + scope: AgentMemoryScope, +): Promise<{ + action: 'none' | 'initialize' | 'prompt-update' + snapshotTimestamp?: string +}> { + const snapshotMeta = await readJsonFile( + getSnapshotJsonPath(agentType), + snapshotMetaSchema(), + ) + + if (!snapshotMeta) { + return { action: 'none' } + } + + const localMemDir = getAgentMemoryDir(agentType, scope) + + let hasLocalMemory = false + try { + const dirents = await readdir(localMemDir, { withFileTypes: true }) + hasLocalMemory = dirents.some(d => d.isFile() && d.name.endsWith('.md')) + } catch { + // Directory doesn't exist + } + + if (!hasLocalMemory) { + return { action: 'initialize', snapshotTimestamp: snapshotMeta.updatedAt } + } + + const syncedMeta = await readJsonFile( + getSyncedJsonPath(agentType, scope), + syncedMetaSchema(), + ) + + if ( + !syncedMeta || + new Date(snapshotMeta.updatedAt) > new Date(syncedMeta.syncedFrom) + ) { + return { + action: 'prompt-update', + snapshotTimestamp: snapshotMeta.updatedAt, + } + } + + return { action: 'none' } +} + +/** + * Initialize local agent memory from a snapshot (first-time setup). + */ +export async function initializeFromSnapshot( + agentType: string, + scope: AgentMemoryScope, + snapshotTimestamp: string, +): Promise { + logForDebugging( + `Initializing agent memory for ${agentType} from project snapshot`, + ) + await copySnapshotToLocal(agentType, scope) + await saveSyncedMeta(agentType, scope, snapshotTimestamp) +} + +/** + * Replace local agent memory with the snapshot. + */ +export async function replaceFromSnapshot( + agentType: string, + scope: AgentMemoryScope, + snapshotTimestamp: string, +): Promise { + logForDebugging( + `Replacing agent memory for ${agentType} with project snapshot`, + ) + // Remove existing .md files before copying to avoid orphans + const localMemDir = getAgentMemoryDir(agentType, scope) + try { + const existing = await readdir(localMemDir, { withFileTypes: true }) + for (const dirent of existing) { + if (dirent.isFile() && dirent.name.endsWith('.md')) { + await unlink(join(localMemDir, dirent.name)) + } + } + } catch { + // Directory may not exist yet + } + await copySnapshotToLocal(agentType, scope) + await saveSyncedMeta(agentType, scope, snapshotTimestamp) +} + +/** + * Mark the current snapshot as synced without changing local memory. + */ +export async function markSnapshotSynced( + agentType: string, + scope: AgentMemoryScope, + snapshotTimestamp: string, +): Promise { + await saveSyncedMeta(agentType, scope, snapshotTimestamp) +} diff --git a/packages/builtin-tools/src/tools/AgentTool/agentToolUtils.ts b/packages/builtin-tools/src/tools/AgentTool/agentToolUtils.ts new file mode 100644 index 000000000..e8cf493f8 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/agentToolUtils.ts @@ -0,0 +1,687 @@ +import { feature } from 'bun:bundle' +import { z } from 'zod/v4' +import { clearInvokedSkillsForAgent } from 'src/bootstrap/state.js' +import { + ALL_AGENT_DISALLOWED_TOOLS, + ASYNC_AGENT_ALLOWED_TOOLS, + CUSTOM_AGENT_DISALLOWED_TOOLS, + IN_PROCESS_TEAMMATE_ALLOWED_TOOLS, +} from 'src/constants/tools.js' +import { startAgentSummarization } from 'src/services/AgentSummary/agentSummary.js' +import { + type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + logEvent, +} from 'src/services/analytics/index.js' +import { clearDumpState } from 'src/services/api/dumpPrompts.js' +import type { AppState } from 'src/state/AppState.js' +import type { + Tool, + ToolPermissionContext, + Tools, + ToolUseContext, +} from 'src/Tool.js' +import { toolMatchesName } from 'src/Tool.js' +import { + completeAgentTask as completeAsyncAgent, + createActivityDescriptionResolver, + createProgressTracker, + enqueueAgentNotification, + failAgentTask as failAsyncAgent, + getProgressUpdate, + getTokenCountFromTracker, + isLocalAgentTask, + killAsyncAgent, + type ProgressTracker, + updateAgentProgress as updateAsyncAgentProgress, + updateProgressFromMessage, +} from 'src/tasks/LocalAgentTask/LocalAgentTask.js' +import { asAgentId } from 'src/types/ids.js' +import type { Message as MessageType, ContentItem } from 'src/types/message.js' +import { isAgentSwarmsEnabled } from 'src/utils/agentSwarmsEnabled.js' +import { logForDebugging } from 'src/utils/debug.js' +import { isInProtectedNamespace } from 'src/utils/envUtils.js' +import { AbortError, errorMessage } from 'src/utils/errors.js' +import type { CacheSafeParams } from 'src/utils/forkedAgent.js' +import { lazySchema } from 'src/utils/lazySchema.js' +import { + extractTextContent, + getLastAssistantMessage, +} from 'src/utils/messages.js' +import type { PermissionMode } from 'src/utils/permissions/PermissionMode.js' +import { permissionRuleValueFromString } from 'src/utils/permissions/permissionRuleParser.js' +import { + buildTranscriptForClassifier, + classifyYoloAction, +} from 'src/utils/permissions/yoloClassifier.js' +import { emitTaskProgress as emitTaskProgressEvent } from 'src/utils/task/sdkProgress.js' +import { isInProcessTeammate } from 'src/utils/teammateContext.js' +import { getTokenCountFromUsage } from 'src/utils/tokens.js' +import { EXIT_PLAN_MODE_V2_TOOL_NAME } from '../ExitPlanModeTool/constants.js' +import { AGENT_TOOL_NAME, LEGACY_AGENT_TOOL_NAME } from './constants.js' +import type { AgentDefinition } from './loadAgentsDir.js' +export type ResolvedAgentTools = { + hasWildcard: boolean + validTools: string[] + invalidTools: string[] + resolvedTools: Tools + allowedAgentTypes?: string[] +} + +export function filterToolsForAgent({ + tools, + isBuiltIn, + isAsync = false, + permissionMode, +}: { + tools: Tools + isBuiltIn: boolean + isAsync?: boolean + permissionMode?: PermissionMode +}): Tools { + return tools.filter(tool => { + // Allow MCP tools for all agents + if (tool.name.startsWith('mcp__')) { + return true + } + // Allow ExitPlanMode for agents in plan mode (e.g., in-process teammates) + // This bypasses both the ALL_AGENT_DISALLOWED_TOOLS and async tool filters + if ( + toolMatchesName(tool, EXIT_PLAN_MODE_V2_TOOL_NAME) && + permissionMode === 'plan' + ) { + return true + } + if (ALL_AGENT_DISALLOWED_TOOLS.has(tool.name)) { + return false + } + if (!isBuiltIn && CUSTOM_AGENT_DISALLOWED_TOOLS.has(tool.name)) { + return false + } + if (isAsync && !ASYNC_AGENT_ALLOWED_TOOLS.has(tool.name)) { + if (isAgentSwarmsEnabled() && isInProcessTeammate()) { + // Allow AgentTool for in-process teammates to spawn sync subagents. + // Validation in AgentTool.call() prevents background agents and teammate spawning. + if (toolMatchesName(tool, AGENT_TOOL_NAME)) { + return true + } + // Allow task tools for in-process teammates to coordinate via shared task list + if (IN_PROCESS_TEAMMATE_ALLOWED_TOOLS.has(tool.name)) { + return true + } + } + return false + } + return true + }) +} + +/** + * Resolves and validates agent tools against available tools + * Handles wildcard expansion and validation in one place + */ +export function resolveAgentTools( + agentDefinition: Pick< + AgentDefinition, + 'tools' | 'disallowedTools' | 'source' | 'permissionMode' + >, + availableTools: Tools, + isAsync = false, + isMainThread = false, +): ResolvedAgentTools { + const { + tools: agentTools, + disallowedTools, + source, + permissionMode, + } = agentDefinition + // When isMainThread is true, skip filterToolsForAgent entirely — the main + // thread's tool pool is already properly assembled by useMergedTools(), so + // the sub-agent disallow lists shouldn't apply. + const filteredAvailableTools = isMainThread + ? availableTools + : filterToolsForAgent({ + tools: availableTools, + isBuiltIn: source === 'built-in', + isAsync, + permissionMode, + }) + + // Create a set of disallowed tool names for quick lookup + const disallowedToolSet = new Set( + disallowedTools?.map(toolSpec => { + const { toolName } = permissionRuleValueFromString(toolSpec) + return toolName + }) ?? [], + ) + + // Filter available tools based on disallowed list + const allowedAvailableTools = filteredAvailableTools.filter( + tool => !disallowedToolSet.has(tool.name), + ) + + // If tools is undefined or ['*'], allow all tools (after filtering disallowed) + const hasWildcard = + agentTools === undefined || + (agentTools.length === 1 && agentTools[0] === '*') + if (hasWildcard) { + return { + hasWildcard: true, + validTools: [], + invalidTools: [], + resolvedTools: allowedAvailableTools, + } + } + + const availableToolMap = new Map() + for (const tool of allowedAvailableTools) { + availableToolMap.set(tool.name, tool) + } + + const validTools: string[] = [] + const invalidTools: string[] = [] + const resolved: Tool[] = [] + const resolvedToolsSet = new Set() + let allowedAgentTypes: string[] | undefined + + for (const toolSpec of agentTools) { + // Parse the tool spec to extract the base tool name and any permission pattern + const { toolName, ruleContent } = permissionRuleValueFromString(toolSpec) + + // Special case: Agent tool carries allowedAgentTypes metadata in its spec + if (toolName === AGENT_TOOL_NAME) { + if (ruleContent) { + // Parse comma-separated agent types: "worker, researcher" → ["worker", "researcher"] + allowedAgentTypes = ruleContent.split(',').map(s => s.trim()) + } + // For sub-agents, Agent is excluded by filterToolsForAgent — mark the spec + // valid for allowedAgentTypes tracking but skip tool resolution. + if (!isMainThread) { + validTools.push(toolSpec) + continue + } + // For main thread, filtering was skipped so Agent is in availableToolMap — + // fall through to normal resolution below. + } + + const tool = availableToolMap.get(toolName) + if (tool) { + validTools.push(toolSpec) + if (!resolvedToolsSet.has(tool)) { + resolved.push(tool) + resolvedToolsSet.add(tool) + } + } else { + invalidTools.push(toolSpec) + } + } + + return { + hasWildcard: false, + validTools, + invalidTools, + resolvedTools: resolved, + allowedAgentTypes, + } +} + +export const agentToolResultSchema = lazySchema(() => + z.object({ + agentId: z.string(), + // Optional: older persisted sessions won't have this (resume replays + // results verbatim without re-validation). Used to gate the sync + // result trailer — one-shot built-ins skip the SendMessage hint. + agentType: z.string().optional(), + content: z.array(z.object({ type: z.literal('text'), text: z.string() })), + totalToolUseCount: z.number(), + totalDurationMs: z.number(), + totalTokens: z.number(), + usage: z.object({ + input_tokens: z.number(), + output_tokens: z.number(), + cache_creation_input_tokens: z.number().nullable(), + cache_read_input_tokens: z.number().nullable(), + server_tool_use: z + .object({ + web_search_requests: z.number(), + web_fetch_requests: z.number(), + }) + .nullable(), + service_tier: z.enum(['standard', 'priority', 'batch']).nullable(), + cache_creation: z + .object({ + ephemeral_1h_input_tokens: z.number(), + ephemeral_5m_input_tokens: z.number(), + }) + .nullable(), + }), + }), +) + +export type AgentToolResult = z.input> + +export function countToolUses(messages: MessageType[]): number { + let count = 0 + for (const m of messages) { + if (m.type === 'assistant') { + const content = m.message?.content as ContentItem[] | undefined + for (const block of content ?? []) { + if (block.type === 'tool_use') { + count++ + } + } + } + } + return count +} + +export function finalizeAgentTool( + agentMessages: MessageType[], + agentId: string, + metadata: { + prompt: string + resolvedAgentModel: string + isBuiltInAgent: boolean + startTime: number + agentType: string + isAsync: boolean + }, +): AgentToolResult { + const { + prompt, + resolvedAgentModel, + isBuiltInAgent, + startTime, + agentType, + isAsync, + } = metadata + + const lastAssistantMessage = getLastAssistantMessage(agentMessages) + if (lastAssistantMessage === undefined) { + throw new Error('No assistant messages found') + } + // Extract text content from the agent's response. If the final assistant + // message is a pure tool_use block (loop exited mid-turn), fall back to + // the most recent assistant message that has text content. + let content = (lastAssistantMessage.message?.content as ContentItem[] ?? []).filter( + _ => _.type === 'text', + ) + if (content.length === 0) { + for (let i = agentMessages.length - 1; i >= 0; i--) { + const m = agentMessages[i]! + if (m.type !== 'assistant') continue + const textBlocks = (m.message?.content as ContentItem[] ?? []).filter(_ => _.type === 'text') + if (textBlocks.length > 0) { + content = textBlocks + break + } + } + } + + const totalTokens = getTokenCountFromUsage(lastAssistantMessage.message?.usage as Parameters[0]) + const totalToolUseCount = countToolUses(agentMessages) + + logEvent('tengu_agent_tool_completed', { + agent_type: + agentType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + model: + resolvedAgentModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + prompt_char_count: prompt.length, + response_char_count: content.length, + assistant_message_count: agentMessages.length, + total_tool_uses: totalToolUseCount, + duration_ms: Date.now() - startTime, + total_tokens: totalTokens, + is_built_in_agent: isBuiltInAgent, + is_async: isAsync, + }) + + // Signal to inference that this subagent's cache chain can be evicted. + const lastRequestId = lastAssistantMessage.requestId + if (lastRequestId) { + logEvent('tengu_cache_eviction_hint', { + scope: + 'subagent_end' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + last_request_id: + lastRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + }) + } + + return { + agentId, + agentType, + content, + totalDurationMs: Date.now() - startTime, + totalTokens, + totalToolUseCount, + usage: lastAssistantMessage.message?.usage as AgentToolResult['usage'], + } +} + +/** + * Returns the name of the last tool_use block in an assistant message, + * or undefined if the message is not an assistant message with tool_use. + */ +export function getLastToolUseName(message: MessageType): string | undefined { + if (message.type !== 'assistant') return undefined + const block = (message.message?.content as ContentItem[] ?? []).findLast(b => b.type === 'tool_use') + return block?.type === 'tool_use' ? block.name : undefined +} + +export function emitTaskProgress( + tracker: ProgressTracker, + taskId: string, + toolUseId: string | undefined, + description: string, + startTime: number, + lastToolName: string, +): void { + const progress = getProgressUpdate(tracker) + emitTaskProgressEvent({ + taskId, + toolUseId, + description: progress.lastActivity?.activityDescription ?? description, + startTime, + totalTokens: progress.tokenCount, + toolUses: progress.toolUseCount, + lastToolName, + }) +} + +export async function classifyHandoffIfNeeded({ + agentMessages, + tools, + toolPermissionContext, + abortSignal, + subagentType, + totalToolUseCount, +}: { + agentMessages: MessageType[] + tools: Tools + toolPermissionContext: AppState['toolPermissionContext'] + abortSignal: AbortSignal + subagentType: string + totalToolUseCount: number +}): Promise { + if (feature('TRANSCRIPT_CLASSIFIER')) { + if (toolPermissionContext.mode !== 'auto') return null + + const agentTranscript = buildTranscriptForClassifier(agentMessages, tools) + if (!agentTranscript) return null + + const classifierResult = await classifyYoloAction( + agentMessages, + { + role: 'user', + content: [ + { + type: 'text', + text: "Sub-agent has finished and is handing back control to the main agent. Review the sub-agent's work based on the block rules and let the main agent know if any file is dangerous (the main agent will see the reason).", + }, + ], + }, + tools, + toolPermissionContext as ToolPermissionContext, + abortSignal, + ) + + const handoffDecision = classifierResult.unavailable + ? 'unavailable' + : classifierResult.shouldBlock + ? 'blocked' + : 'allowed' + logEvent('tengu_auto_mode_decision', { + decision: + handoffDecision as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + toolName: + // Use legacy name for analytics continuity across the Task→Agent rename + LEGACY_AGENT_TOOL_NAME as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + inProtectedNamespace: isInProtectedNamespace(), + classifierModel: + classifierResult.model as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + agentType: + subagentType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + toolUseCount: totalToolUseCount, + isHandoff: true, + // For handoff, the relevant agent completion is the subagent's final + // assistant message — the last thing the classifier transcript shows + // before the handoff review prompt. + agentMsgId: getLastAssistantMessage(agentMessages)?.message + .id as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + classifierStage: + classifierResult.stage as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + classifierStage1RequestId: + classifierResult.stage1RequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + classifierStage1MsgId: + classifierResult.stage1MsgId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + classifierStage2RequestId: + classifierResult.stage2RequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + classifierStage2MsgId: + classifierResult.stage2MsgId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + }) + + if (classifierResult.shouldBlock) { + // When classifier is unavailable, still propagate the sub-agent's + // results but with a warning so the parent agent can verify the work. + if (classifierResult.unavailable) { + logForDebugging( + 'Handoff classifier unavailable, allowing sub-agent output with warning', + { level: 'warn' }, + ) + return `Note: The safety classifier was unavailable when reviewing this sub-agent's work. Please carefully verify the sub-agent's actions and output before acting on them.` + } + + logForDebugging( + `Handoff classifier flagged sub-agent output: ${classifierResult.reason}`, + { level: 'warn' }, + ) + return `SECURITY WARNING: This sub-agent performed actions that may violate security policy. Reason: ${classifierResult.reason}. Review the sub-agent's actions carefully before acting on its output.` + } + } + + return null +} + +/** + * Extract a partial result string from an agent's accumulated messages. + * Used when an async agent is killed to preserve what it accomplished. + * Returns undefined if no text content is found. + */ +export function extractPartialResult( + messages: MessageType[], +): string | undefined { + for (let i = messages.length - 1; i >= 0; i--) { + const m = messages[i]! + if (m.type !== 'assistant') continue + const text = extractTextContent(m.message?.content as ContentItem[] ?? [], '\n') + if (text) { + return text + } + } + return undefined +} + +type SetAppState = (f: (prev: AppState) => AppState) => void + +/** + * Drives a background agent from spawn to terminal notification. + * Shared between AgentTool's async-from-start path and resumeAgentBackground. + */ +export async function runAsyncAgentLifecycle({ + taskId, + abortController, + makeStream, + metadata, + description, + toolUseContext, + rootSetAppState, + agentIdForCleanup, + enableSummarization, + getWorktreeResult, +}: { + taskId: string + abortController: AbortController + makeStream: ( + onCacheSafeParams: ((p: CacheSafeParams) => void) | undefined, + ) => AsyncGenerator + metadata: Parameters[2] + description: string + toolUseContext: ToolUseContext + rootSetAppState: SetAppState + agentIdForCleanup: string + enableSummarization: boolean + getWorktreeResult: () => Promise<{ + worktreePath?: string + worktreeBranch?: string + }> +}): Promise { + let stopSummarization: (() => void) | undefined + const agentMessages: MessageType[] = [] + try { + const tracker = createProgressTracker() + const resolveActivity = createActivityDescriptionResolver( + toolUseContext.options.tools, + ) + const onCacheSafeParams = enableSummarization + ? (params: CacheSafeParams) => { + const { stop } = startAgentSummarization( + taskId, + asAgentId(taskId), + params, + rootSetAppState, + ) + stopSummarization = stop + } + : undefined + for await (const message of makeStream(onCacheSafeParams)) { + agentMessages.push(message) + // Append immediately when UI holds the task (retain). Bootstrap reads + // disk in parallel and UUID-merges the prefix — disk-write-before-yield + // means live is always a suffix of disk, so merge is order-correct. + rootSetAppState(prev => { + const t = prev.tasks[taskId] + if (!isLocalAgentTask(t) || !t.retain) return prev + const base = t.messages ?? [] + return { + ...prev, + tasks: { + ...prev.tasks, + [taskId]: { ...t, messages: [...base, message] }, + }, + } + }) + updateProgressFromMessage( + tracker, + message, + resolveActivity, + toolUseContext.options.tools, + ) + updateAsyncAgentProgress( + taskId, + getProgressUpdate(tracker), + rootSetAppState, + ) + const lastToolName = getLastToolUseName(message) + if (lastToolName) { + emitTaskProgress( + tracker, + taskId, + toolUseContext.toolUseId, + description, + metadata.startTime, + lastToolName, + ) + } + } + + stopSummarization?.() + + const agentResult = finalizeAgentTool(agentMessages, taskId, metadata) + + // Mark task completed FIRST so TaskOutput(block=true) unblocks + // immediately. classifyHandoffIfNeeded (API call) and getWorktreeResult + // (git exec) are notification embellishments that can hang — they must + // not gate the status transition (gh-20236). + completeAsyncAgent(agentResult, rootSetAppState) + + let finalMessage = extractTextContent(agentResult.content, '\n') + + if (feature('TRANSCRIPT_CLASSIFIER')) { + const handoffWarning = await classifyHandoffIfNeeded({ + agentMessages, + tools: toolUseContext.options.tools, + toolPermissionContext: + toolUseContext.getAppState().toolPermissionContext, + abortSignal: abortController.signal, + subagentType: metadata.agentType, + totalToolUseCount: agentResult.totalToolUseCount, + }) + if (handoffWarning) { + finalMessage = `${handoffWarning}\n\n${finalMessage}` + } + } + + const worktreeResult = await getWorktreeResult() + + enqueueAgentNotification({ + taskId, + description, + status: 'completed', + setAppState: rootSetAppState, + finalMessage, + usage: { + totalTokens: getTokenCountFromTracker(tracker), + toolUses: agentResult.totalToolUseCount, + durationMs: agentResult.totalDurationMs, + }, + toolUseId: toolUseContext.toolUseId, + ...worktreeResult, + }) + } catch (error) { + stopSummarization?.() + if (error instanceof AbortError) { + // killAsyncAgent is a no-op if TaskStop already set status='killed' — + // but only this catch handler has agentMessages, so the notification + // must fire unconditionally. Transition status BEFORE worktree cleanup + // so TaskOutput unblocks even if git hangs (gh-20236). + killAsyncAgent(taskId, rootSetAppState) + logEvent('tengu_agent_tool_terminated', { + agent_type: + metadata.agentType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + model: + metadata.resolvedAgentModel as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + duration_ms: Date.now() - metadata.startTime, + is_async: true, + is_built_in_agent: metadata.isBuiltInAgent, + reason: + 'user_kill_async' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + }) + const worktreeResult = await getWorktreeResult() + const partialResult = extractPartialResult(agentMessages) + enqueueAgentNotification({ + taskId, + description, + status: 'killed', + setAppState: rootSetAppState, + toolUseId: toolUseContext.toolUseId, + finalMessage: partialResult, + ...worktreeResult, + }) + return + } + const msg = errorMessage(error) + failAsyncAgent(taskId, msg, rootSetAppState) + const worktreeResult = await getWorktreeResult() + enqueueAgentNotification({ + taskId, + description, + status: 'failed', + error: msg, + setAppState: rootSetAppState, + toolUseId: toolUseContext.toolUseId, + ...worktreeResult, + }) + } finally { + clearInvokedSkillsForAgent(agentIdForCleanup) + clearDumpState(agentIdForCleanup) + } +} diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/claudeCodeGuideAgent.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/claudeCodeGuideAgent.ts new file mode 100644 index 000000000..b7a155c78 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/claudeCodeGuideAgent.ts @@ -0,0 +1,205 @@ +import { BASH_TOOL_NAME } from 'builtin-tools/tools/BashTool/toolName.js' +import { FILE_READ_TOOL_NAME } from 'builtin-tools/tools/FileReadTool/prompt.js' +import { GLOB_TOOL_NAME } from 'builtin-tools/tools/GlobTool/prompt.js' +import { GREP_TOOL_NAME } from 'builtin-tools/tools/GrepTool/prompt.js' +import { SEND_MESSAGE_TOOL_NAME } from 'builtin-tools/tools/SendMessageTool/constants.js' +import { WEB_FETCH_TOOL_NAME } from 'builtin-tools/tools/WebFetchTool/prompt.js' +import { WEB_SEARCH_TOOL_NAME } from 'builtin-tools/tools/WebSearchTool/prompt.js' +import { isUsing3PServices } from 'src/utils/auth.js' +import { hasEmbeddedSearchTools } from 'src/utils/embeddedTools.js' +import { getSettings_DEPRECATED } from 'src/utils/settings/settings.js' +import { jsonStringify } from 'src/utils/slowOperations.js' +import type { + AgentDefinition, + BuiltInAgentDefinition, +} from '../loadAgentsDir.js' + +const CLAUDE_CODE_DOCS_MAP_URL = + 'https://code.claude.com/docs/en/claude_code_docs_map.md' +const CDP_DOCS_MAP_URL = 'https://platform.claude.com/llms.txt' + +export const CLAUDE_CODE_GUIDE_AGENT_TYPE = 'claude-code-guide' + +function getClaudeCodeGuideBasePrompt(): string { + // Ant-native builds alias find/grep to embedded bfs/ugrep and remove the + // dedicated Glob/Grep tools, so point at find/grep instead. + const localSearchHint = hasEmbeddedSearchTools() + ? `${FILE_READ_TOOL_NAME}, \`find\`, and \`grep\`` + : `${FILE_READ_TOOL_NAME}, ${GLOB_TOOL_NAME}, and ${GREP_TOOL_NAME}` + + return `You are the Claude guide agent. Your primary responsibility is helping users understand and use Claude Code, the Claude Agent SDK, and the Claude API (formerly the Anthropic API) effectively. + +**Your expertise spans three domains:** + +1. **Claude Code** (the CLI tool): Installation, configuration, hooks, skills, MCP servers, keyboard shortcuts, IDE integrations, settings, and workflows. + +2. **Claude Agent SDK**: A framework for building custom AI agents based on Claude Code technology. Available for Node.js/TypeScript and Python. + +3. **Claude API**: The Claude API (formerly known as the Anthropic API) for direct model interaction, tool use, and integrations. + +**Documentation sources:** + +- **Claude Code docs** (${CLAUDE_CODE_DOCS_MAP_URL}): Fetch this for questions about the Claude Code CLI tool, including: + - Installation, setup, and getting started + - Hooks (pre/post command execution) + - Custom skills + - MCP server configuration + - IDE integrations (VS Code, JetBrains) + - Settings files and configuration + - Keyboard shortcuts and hotkeys + - Subagents and plugins + - Sandboxing and security + +- **Claude Agent SDK docs** (${CDP_DOCS_MAP_URL}): Fetch this for questions about building agents with the SDK, including: + - SDK overview and getting started (Python and TypeScript) + - Agent configuration + custom tools + - Session management and permissions + - MCP integration in agents + - Hosting and deployment + - Cost tracking and context management + Note: Agent SDK docs are part of the Claude API documentation at the same URL. + +- **Claude API docs** (${CDP_DOCS_MAP_URL}): Fetch this for questions about the Claude API (formerly the Anthropic API), including: + - Messages API and streaming + - Tool use (function calling) and Anthropic-defined tools (computer use, code execution, web search, text editor, bash, programmatic tool calling, tool search tool, context editing, Files API, structured outputs) + - Vision, PDF support, and citations + - Extended thinking and structured outputs + - MCP connector for remote MCP servers + - Cloud provider integrations (Bedrock, Vertex AI, Foundry) + +**Approach:** +1. Determine which domain the user's question falls into +2. Use ${WEB_FETCH_TOOL_NAME} to fetch the appropriate docs map +3. Identify the most relevant documentation URLs from the map +4. Fetch the specific documentation pages +5. Provide clear, actionable guidance based on official documentation +6. Use ${WEB_SEARCH_TOOL_NAME} if docs don't cover the topic +7. Reference local project files (CLAUDE.md, .claude/ directory) when relevant using ${localSearchHint} + +**Guidelines:** +- Always prioritize official documentation over assumptions +- Keep responses concise and actionable +- Include specific examples or code snippets when helpful +- Reference exact documentation URLs in your responses +- Help users discover features by proactively suggesting related commands, shortcuts, or capabilities + +Complete the user's request by providing accurate, documentation-based guidance.` +} + +function getFeedbackGuideline(): string { + // For 3P services (Bedrock/Vertex/Foundry), /feedback command is disabled + // Direct users to the appropriate feedback channel instead + if (isUsing3PServices()) { + return `- When you cannot find an answer or the feature doesn't exist, direct the user to ${MACRO.ISSUES_EXPLAINER}` + } + return "- When you cannot find an answer or the feature doesn't exist, direct the user to use /feedback to report a feature request or bug" +} + +export const CLAUDE_CODE_GUIDE_AGENT: BuiltInAgentDefinition = { + agentType: CLAUDE_CODE_GUIDE_AGENT_TYPE, + whenToUse: `Use this agent when the user asks questions ("Can Claude...", "Does Claude...", "How do I...") about: (1) Claude Code (the CLI tool) - features, hooks, slash commands, MCP servers, settings, IDE integrations, keyboard shortcuts; (2) Claude Agent SDK - building custom agents; (3) Claude API (formerly Anthropic API) - API usage, tool use, Anthropic SDK usage. **IMPORTANT:** Before spawning a new agent, check if there is already a running or recently completed claude-code-guide agent that you can continue via ${SEND_MESSAGE_TOOL_NAME}.`, + // Ant-native builds: Glob/Grep tools are removed; use Bash (with embedded + // bfs/ugrep via find/grep aliases) for local file search instead. + tools: hasEmbeddedSearchTools() + ? [ + BASH_TOOL_NAME, + FILE_READ_TOOL_NAME, + WEB_FETCH_TOOL_NAME, + WEB_SEARCH_TOOL_NAME, + ] + : [ + GLOB_TOOL_NAME, + GREP_TOOL_NAME, + FILE_READ_TOOL_NAME, + WEB_FETCH_TOOL_NAME, + WEB_SEARCH_TOOL_NAME, + ], + source: 'built-in', + baseDir: 'built-in', + model: 'haiku', + permissionMode: 'dontAsk', + getSystemPrompt({ toolUseContext }) { + const commands = toolUseContext.options.commands + + // Build context sections + const contextSections: string[] = [] + + // 1. Custom skills + const customCommands = commands.filter(cmd => cmd.type === 'prompt') + if (customCommands.length > 0) { + const commandList = customCommands + .map(cmd => `- /${cmd.name}: ${cmd.description}`) + .join('\n') + contextSections.push( + `**Available custom skills in this project:**\n${commandList}`, + ) + } + + // 2. Custom agents from .claude/agents/ + const customAgents = + toolUseContext.options.agentDefinitions.activeAgents.filter( + (a: AgentDefinition) => a.source !== 'built-in', + ) + if (customAgents.length > 0) { + const agentList = customAgents + .map((a: AgentDefinition) => `- ${a.agentType}: ${a.whenToUse}`) + .join('\n') + contextSections.push( + `**Available custom agents configured:**\n${agentList}`, + ) + } + + // 3. MCP servers + const mcpClients = toolUseContext.options.mcpClients + if (mcpClients && mcpClients.length > 0) { + const mcpList = mcpClients + .map((client: { name: string }) => `- ${client.name}`) + .join('\n') + contextSections.push(`**Configured MCP servers:**\n${mcpList}`) + } + + // 4. Plugin commands + const pluginCommands = commands.filter( + cmd => cmd.type === 'prompt' && cmd.source === 'plugin', + ) + if (pluginCommands.length > 0) { + const pluginList = pluginCommands + .map(cmd => `- /${cmd.name}: ${cmd.description}`) + .join('\n') + contextSections.push(`**Available plugin skills:**\n${pluginList}`) + } + + // 5. User settings + const settings = getSettings_DEPRECATED() + if (Object.keys(settings).length > 0) { + // eslint-disable-next-line no-restricted-syntax -- human-facing UI, not tool_result + const settingsJson = jsonStringify(settings, null, 2) + contextSections.push( + `**User's settings.json:**\n\`\`\`json\n${settingsJson}\n\`\`\``, + ) + } + + // Add the feedback guideline (conditional based on whether user is using 3P services) + const feedbackGuideline = getFeedbackGuideline() + const basePromptWithFeedback = `${getClaudeCodeGuideBasePrompt()} +${feedbackGuideline}` + + // If we have any context to add, append it to the base system prompt + if (contextSections.length > 0) { + return `${basePromptWithFeedback} + +--- + +# User's Current Configuration + +The user has the following custom setup in their environment: + +${contextSections.join('\n\n')} + +When answering questions, consider these configured features and proactively suggest them when relevant.` + } + + // Return the base prompt if no context to add + return basePromptWithFeedback + }, +} diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/exploreAgent.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/exploreAgent.ts new file mode 100644 index 000000000..77c4c5952 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/exploreAgent.ts @@ -0,0 +1,83 @@ +import { BASH_TOOL_NAME } from 'builtin-tools/tools/BashTool/toolName.js' +import { EXIT_PLAN_MODE_TOOL_NAME } from 'builtin-tools/tools/ExitPlanModeTool/constants.js' +import { FILE_EDIT_TOOL_NAME } from 'builtin-tools/tools/FileEditTool/constants.js' +import { FILE_READ_TOOL_NAME } from 'builtin-tools/tools/FileReadTool/prompt.js' +import { FILE_WRITE_TOOL_NAME } from 'builtin-tools/tools/FileWriteTool/prompt.js' +import { GLOB_TOOL_NAME } from 'builtin-tools/tools/GlobTool/prompt.js' +import { GREP_TOOL_NAME } from 'builtin-tools/tools/GrepTool/prompt.js' +import { NOTEBOOK_EDIT_TOOL_NAME } from 'builtin-tools/tools/NotebookEditTool/constants.js' +import { hasEmbeddedSearchTools } from 'src/utils/embeddedTools.js' +import { AGENT_TOOL_NAME } from '../constants.js' +import type { BuiltInAgentDefinition } from '../loadAgentsDir.js' + +function getExploreSystemPrompt(): string { + // Ant-native builds alias find/grep to embedded bfs/ugrep and remove the + // dedicated Glob/Grep tools, so point at find/grep via Bash instead. + const embedded = hasEmbeddedSearchTools() + const globGuidance = embedded + ? `- Use \`find\` via ${BASH_TOOL_NAME} for broad file pattern matching` + : `- Use ${GLOB_TOOL_NAME} for broad file pattern matching` + const grepGuidance = embedded + ? `- Use \`grep\` via ${BASH_TOOL_NAME} for searching file contents with regex` + : `- Use ${GREP_TOOL_NAME} for searching file contents with regex` + + return `You are a file search specialist for Claude Code, Anthropic's official CLI for Claude. You excel at thoroughly navigating and exploring codebases. + +=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS === +This is a READ-ONLY exploration task. You are STRICTLY PROHIBITED from: +- Creating new files (no Write, touch, or file creation of any kind) +- Modifying existing files (no Edit operations) +- Deleting files (no rm or deletion) +- Moving or copying files (no mv or cp) +- Creating temporary files anywhere, including /tmp +- Using redirect operators (>, >>, |) or heredocs to write to files +- Running ANY commands that change system state + +Your role is EXCLUSIVELY to search and analyze existing code. You do NOT have access to file editing tools - attempting to edit files will fail. + +Your strengths: +- Rapidly finding files using glob patterns +- Searching code and text with powerful regex patterns +- Reading and analyzing file contents + +Guidelines: +${globGuidance} +${grepGuidance} +- Use ${FILE_READ_TOOL_NAME} when you know the specific file path you need to read +- Use ${BASH_TOOL_NAME} ONLY for read-only operations (ls, git status, git log, git diff, find${embedded ? ', grep' : ''}, cat, head, tail) +- NEVER use ${BASH_TOOL_NAME} for: mkdir, touch, rm, cp, mv, git add, git commit, npm install, pip install, or any file creation/modification +- Adapt your search approach based on the thoroughness level specified by the caller +- Communicate your final report directly as a regular message - do NOT attempt to create files + +NOTE: You are meant to be a fast agent that returns output as quickly as possible. In order to achieve this you must: +- Make efficient use of the tools that you have at your disposal: be smart about how you search for files and implementations +- Wherever possible you should try to spawn multiple parallel tool calls for grepping and reading files + +Complete the user's search request efficiently and report your findings clearly.` +} + +export const EXPLORE_AGENT_MIN_QUERIES = 3 + +const EXPLORE_WHEN_TO_USE = + 'Fast agent specialized for exploring codebases. Use this when you need to quickly find files by patterns (eg. "src/components/**/*.tsx"), search code for keywords (eg. "API endpoints"), or answer questions about the codebase (eg. "how do API endpoints work?"). When calling this agent, specify the desired thoroughness level: "quick" for basic searches, "medium" for moderate exploration, or "very thorough" for comprehensive analysis across multiple locations and naming conventions.' + +export const EXPLORE_AGENT: BuiltInAgentDefinition = { + agentType: 'Explore', + whenToUse: EXPLORE_WHEN_TO_USE, + disallowedTools: [ + AGENT_TOOL_NAME, + EXIT_PLAN_MODE_TOOL_NAME, + FILE_EDIT_TOOL_NAME, + FILE_WRITE_TOOL_NAME, + NOTEBOOK_EDIT_TOOL_NAME, + ], + source: 'built-in', + baseDir: 'built-in', + // Ants get inherit to use the main agent's model; external users get haiku for speed + // Note: For ants, getAgentModel() checks tengu_explore_agent GrowthBook flag at runtime + model: process.env.USER_TYPE === 'ant' ? 'inherit' : 'haiku', + // Explore is a fast read-only search agent — it doesn't need commit/PR/lint + // rules from CLAUDE.md. The main agent has full context and interprets results. + omitClaudeMd: true, + getSystemPrompt: () => getExploreSystemPrompt(), +} diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/generalPurposeAgent.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/generalPurposeAgent.ts new file mode 100644 index 000000000..7d39b284e --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/generalPurposeAgent.ts @@ -0,0 +1,34 @@ +import type { BuiltInAgentDefinition } from '../loadAgentsDir.js' + +const SHARED_PREFIX = `You are an agent for Claude Code, Anthropic's official CLI for Claude. Given the user's message, you should use the tools available to complete the task. Complete the task fully—don't gold-plate, but don't leave it half-done.` + +const SHARED_GUIDELINES = `Your strengths: +- Searching for code, configurations, and patterns across large codebases +- Analyzing multiple files to understand system architecture +- Investigating complex questions that require exploring many files +- Performing multi-step research tasks + +Guidelines: +- For file searches: search broadly when you don't know where something lives. Use Read when you know the specific file path. +- For analysis: Start broad and narrow down. Use multiple search strategies if the first doesn't yield results. +- Be thorough: Check multiple locations, consider different naming conventions, look for related files. +- NEVER create files unless they're absolutely necessary for achieving your goal. ALWAYS prefer editing an existing file to creating a new one. +- NEVER proactively create documentation files (*.md) or README files. Only create documentation files if explicitly requested.` + +// Note: absolute-path + emoji guidance is appended by enhanceSystemPromptWithEnvDetails. +function getGeneralPurposeSystemPrompt(): string { + return `${SHARED_PREFIX} When you complete the task, respond with a concise report covering what was done and any key findings — the caller will relay this to the user, so it only needs the essentials. + +${SHARED_GUIDELINES}` +} + +export const GENERAL_PURPOSE_AGENT: BuiltInAgentDefinition = { + agentType: 'general-purpose', + whenToUse: + 'General-purpose agent for researching complex questions, searching for code, and executing multi-step tasks. When you are searching for a keyword or file and are not confident that you will find the right match in the first few tries use this agent to perform the search for you.', + tools: ['*'], + source: 'built-in', + baseDir: 'built-in', + // model is intentionally omitted - uses getDefaultSubagentModel(). + getSystemPrompt: getGeneralPurposeSystemPrompt, +} diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/planAgent.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/planAgent.ts new file mode 100644 index 000000000..054482750 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/planAgent.ts @@ -0,0 +1,92 @@ +import { BASH_TOOL_NAME } from 'builtin-tools/tools/BashTool/toolName.js' +import { EXIT_PLAN_MODE_TOOL_NAME } from 'builtin-tools/tools/ExitPlanModeTool/constants.js' +import { FILE_EDIT_TOOL_NAME } from 'builtin-tools/tools/FileEditTool/constants.js' +import { FILE_READ_TOOL_NAME } from 'builtin-tools/tools/FileReadTool/prompt.js' +import { FILE_WRITE_TOOL_NAME } from 'builtin-tools/tools/FileWriteTool/prompt.js' +import { GLOB_TOOL_NAME } from 'builtin-tools/tools/GlobTool/prompt.js' +import { GREP_TOOL_NAME } from 'builtin-tools/tools/GrepTool/prompt.js' +import { NOTEBOOK_EDIT_TOOL_NAME } from 'builtin-tools/tools/NotebookEditTool/constants.js' +import { hasEmbeddedSearchTools } from 'src/utils/embeddedTools.js' +import { AGENT_TOOL_NAME } from '../constants.js' +import type { BuiltInAgentDefinition } from '../loadAgentsDir.js' +import { EXPLORE_AGENT } from './exploreAgent.js' + +function getPlanV2SystemPrompt(): string { + // Ant-native builds alias find/grep to embedded bfs/ugrep and remove the + // dedicated Glob/Grep tools, so point at find/grep instead. + const searchToolsHint = hasEmbeddedSearchTools() + ? `\`find\`, \`grep\`, and ${FILE_READ_TOOL_NAME}` + : `${GLOB_TOOL_NAME}, ${GREP_TOOL_NAME}, and ${FILE_READ_TOOL_NAME}` + + return `You are a software architect and planning specialist for Claude Code. Your role is to explore the codebase and design implementation plans. + +=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS === +This is a READ-ONLY planning task. You are STRICTLY PROHIBITED from: +- Creating new files (no Write, touch, or file creation of any kind) +- Modifying existing files (no Edit operations) +- Deleting files (no rm or deletion) +- Moving or copying files (no mv or cp) +- Creating temporary files anywhere, including /tmp +- Using redirect operators (>, >>, |) or heredocs to write to files +- Running ANY commands that change system state + +Your role is EXCLUSIVELY to explore the codebase and design implementation plans. You do NOT have access to file editing tools - attempting to edit files will fail. + +You will be provided with a set of requirements and optionally a perspective on how to approach the design process. + +## Your Process + +1. **Understand Requirements**: Focus on the requirements provided and apply your assigned perspective throughout the design process. + +2. **Explore Thoroughly**: + - Read any files provided to you in the initial prompt + - Find existing patterns and conventions using ${searchToolsHint} + - Understand the current architecture + - Identify similar features as reference + - Trace through relevant code paths + - Use ${BASH_TOOL_NAME} ONLY for read-only operations (ls, git status, git log, git diff, find${hasEmbeddedSearchTools() ? ', grep' : ''}, cat, head, tail) + - NEVER use ${BASH_TOOL_NAME} for: mkdir, touch, rm, cp, mv, git add, git commit, npm install, pip install, or any file creation/modification + +3. **Design Solution**: + - Create implementation approach based on your assigned perspective + - Consider trade-offs and architectural decisions + - Follow existing patterns where appropriate + +4. **Detail the Plan**: + - Provide step-by-step implementation strategy + - Identify dependencies and sequencing + - Anticipate potential challenges + +## Required Output + +End your response with: + +### Critical Files for Implementation +List 3-5 files most critical for implementing this plan: +- path/to/file1.ts +- path/to/file2.ts +- path/to/file3.ts + +REMEMBER: You can ONLY explore and plan. You CANNOT and MUST NOT write, edit, or modify any files. You do NOT have access to file editing tools.` +} + +export const PLAN_AGENT: BuiltInAgentDefinition = { + agentType: 'Plan', + whenToUse: + 'Software architect agent for designing implementation plans. Use this when you need to plan the implementation strategy for a task. Returns step-by-step plans, identifies critical files, and considers architectural trade-offs.', + disallowedTools: [ + AGENT_TOOL_NAME, + EXIT_PLAN_MODE_TOOL_NAME, + FILE_EDIT_TOOL_NAME, + FILE_WRITE_TOOL_NAME, + NOTEBOOK_EDIT_TOOL_NAME, + ], + source: 'built-in', + tools: EXPLORE_AGENT.tools, + baseDir: 'built-in', + model: 'inherit', + // Plan is read-only and can Read CLAUDE.md directly if it needs conventions. + // Dropping it from context saves tokens without blocking access. + omitClaudeMd: true, + getSystemPrompt: () => getPlanV2SystemPrompt(), +} diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/BashTool/toolName.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/BashTool/toolName.ts new file mode 100644 index 000000000..2da8eb7a9 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/BashTool/toolName.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type BASH_TOOL_NAME = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/ExitPlanModeTool/constants.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/ExitPlanModeTool/constants.ts new file mode 100644 index 000000000..11f9fd01d --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/ExitPlanModeTool/constants.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type EXIT_PLAN_MODE_TOOL_NAME = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/FileEditTool/constants.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/FileEditTool/constants.ts new file mode 100644 index 000000000..b455c0655 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/FileEditTool/constants.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type FILE_EDIT_TOOL_NAME = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/FileReadTool/prompt.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/FileReadTool/prompt.ts new file mode 100644 index 000000000..fac6439fc --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/FileReadTool/prompt.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type FILE_READ_TOOL_NAME = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/FileWriteTool/prompt.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/FileWriteTool/prompt.ts new file mode 100644 index 000000000..e69299d74 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/FileWriteTool/prompt.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type FILE_WRITE_TOOL_NAME = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/GlobTool/prompt.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/GlobTool/prompt.ts new file mode 100644 index 000000000..060caf29c --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/GlobTool/prompt.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type GLOB_TOOL_NAME = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/GrepTool/prompt.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/GrepTool/prompt.ts new file mode 100644 index 000000000..08b8a8d29 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/GrepTool/prompt.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type GREP_TOOL_NAME = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/NotebookEditTool/constants.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/NotebookEditTool/constants.ts new file mode 100644 index 000000000..6c6c94bad --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/NotebookEditTool/constants.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type NOTEBOOK_EDIT_TOOL_NAME = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/SendMessageTool/constants.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/SendMessageTool/constants.ts new file mode 100644 index 000000000..efd60265b --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/SendMessageTool/constants.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type SEND_MESSAGE_TOOL_NAME = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/WebFetchTool/prompt.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/WebFetchTool/prompt.ts new file mode 100644 index 000000000..63b342a25 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/WebFetchTool/prompt.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type WEB_FETCH_TOOL_NAME = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/WebSearchTool/prompt.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/WebSearchTool/prompt.ts new file mode 100644 index 000000000..38871a0ba --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/src/tools/WebSearchTool/prompt.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type WEB_SEARCH_TOOL_NAME = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/src/utils/auth.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/src/utils/auth.ts new file mode 100644 index 000000000..909e31047 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/src/utils/auth.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type isUsing3PServices = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/src/utils/embeddedTools.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/src/utils/embeddedTools.ts new file mode 100644 index 000000000..c0160dbf9 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/src/utils/embeddedTools.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type hasEmbeddedSearchTools = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/src/utils/settings/settings.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/src/utils/settings/settings.ts new file mode 100644 index 000000000..4b9b819d5 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/src/utils/settings/settings.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type getSettings_DEPRECATED = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/statuslineSetup.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/statuslineSetup.ts new file mode 100644 index 000000000..ba58c1905 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/statuslineSetup.ts @@ -0,0 +1,144 @@ +import type { BuiltInAgentDefinition } from '../loadAgentsDir.js' + +const STATUSLINE_SYSTEM_PROMPT = `You are a status line setup agent for Claude Code. Your job is to create or update the statusLine command in the user's Claude Code settings. + +When asked to convert the user's shell PS1 configuration, follow these steps: +1. Read the user's shell configuration files in this order of preference: + - ~/.zshrc + - ~/.bashrc + - ~/.bash_profile + - ~/.profile + +2. Extract the PS1 value using this regex pattern: /(?:^|\\n)\\s*(?:export\\s+)?PS1\\s*=\\s*["']([^"']+)["']/m + +3. Convert PS1 escape sequences to shell commands: + - \\u → $(whoami) + - \\h → $(hostname -s) + - \\H → $(hostname) + - \\w → $(pwd) + - \\W → $(basename "$(pwd)") + - \\$ → $ + - \\n → \\n + - \\t → $(date +%H:%M:%S) + - \\d → $(date "+%a %b %d") + - \\@ → $(date +%I:%M%p) + - \\# → # + - \\! → ! + +4. When using ANSI color codes, be sure to use \`printf\`. Do not remove colors. Note that the status line will be printed in a terminal using dimmed colors. + +5. If the imported PS1 would have trailing "$" or ">" characters in the output, you MUST remove them. + +6. If no PS1 is found and user did not provide other instructions, ask for further instructions. + +How to use the statusLine command: +1. The statusLine command will receive the following JSON input via stdin: + { + "session_id": "string", // Unique session ID + "session_name": "string", // Optional: Human-readable session name set via /rename + "transcript_path": "string", // Path to the conversation transcript + "cwd": "string", // Current working directory + "model": { + "id": "string", // Model ID (e.g., "claude-3-5-sonnet-20241022") + "display_name": "string" // Display name (e.g., "Claude 3.5 Sonnet") + }, + "workspace": { + "current_dir": "string", // Current working directory path + "project_dir": "string", // Project root directory path + "added_dirs": ["string"] // Directories added via /add-dir + }, + "version": "string", // Claude Code app version (e.g., "1.0.71") + "output_style": { + "name": "string", // Output style name (e.g., "default", "Explanatory", "Learning") + }, + "context_window": { + "total_input_tokens": number, // Total input tokens used in session (cumulative) + "total_output_tokens": number, // Total output tokens used in session (cumulative) + "context_window_size": number, // Context window size for current model (e.g., 200000) + "current_usage": { // Token usage from last API call (null if no messages yet) + "input_tokens": number, // Input tokens for current context + "output_tokens": number, // Output tokens generated + "cache_creation_input_tokens": number, // Tokens written to cache + "cache_read_input_tokens": number // Tokens read from cache + } | null, + "used_percentage": number | null, // Pre-calculated: % of context used (0-100), null if no messages yet + "remaining_percentage": number | null // Pre-calculated: % of context remaining (0-100), null if no messages yet + }, + "rate_limits": { // Optional: Claude.ai subscription usage limits. Only present for subscribers after first API response. + "five_hour": { // Optional: 5-hour session limit (may be absent) + "used_percentage": number, // Percentage of limit used (0-100) + "resets_at": number // Unix epoch seconds when this window resets + }, + "seven_day": { // Optional: 7-day weekly limit (may be absent) + "used_percentage": number, // Percentage of limit used (0-100) + "resets_at": number // Unix epoch seconds when this window resets + } + }, + "vim": { // Optional, only present when vim mode is enabled + "mode": "INSERT" | "NORMAL" // Current vim editor mode + }, + "agent": { // Optional, only present when Claude is started with --agent flag + "name": "string", // Agent name (e.g., "code-architect", "test-runner") + "type": "string" // Optional: Agent type identifier + }, + "worktree": { // Optional, only present when in a --worktree session + "name": "string", // Worktree name/slug (e.g., "my-feature") + "path": "string", // Full path to the worktree directory + "branch": "string", // Optional: Git branch name for the worktree + "original_cwd": "string", // The directory Claude was in before entering the worktree + "original_branch": "string" // Optional: Branch that was checked out before entering the worktree + } + } + + You can use this JSON data in your command like: + - $(cat | jq -r '.model.display_name') + - $(cat | jq -r '.workspace.current_dir') + - $(cat | jq -r '.output_style.name') + + Or store it in a variable first: + - input=$(cat); echo "$(echo "$input" | jq -r '.model.display_name') in $(echo "$input" | jq -r '.workspace.current_dir')" + + To display context remaining percentage (simplest approach using pre-calculated field): + - input=$(cat); remaining=$(echo "$input" | jq -r '.context_window.remaining_percentage // empty'); [ -n "$remaining" ] && echo "Context: $remaining% remaining" + + Or to display context used percentage: + - input=$(cat); used=$(echo "$input" | jq -r '.context_window.used_percentage // empty'); [ -n "$used" ] && echo "Context: $used% used" + + To display Claude.ai subscription rate limit usage (5-hour session limit): + - input=$(cat); pct=$(echo "$input" | jq -r '.rate_limits.five_hour.used_percentage // empty'); [ -n "$pct" ] && printf "5h: %.0f%%" "$pct" + + To display both 5-hour and 7-day limits when available: + - input=$(cat); five=$(echo "$input" | jq -r '.rate_limits.five_hour.used_percentage // empty'); week=$(echo "$input" | jq -r '.rate_limits.seven_day.used_percentage // empty'); out=""; [ -n "$five" ] && out="5h:$(printf '%.0f' "$five")%"; [ -n "$week" ] && out="$out 7d:$(printf '%.0f' "$week")%"; echo "$out" + +2. For longer commands, you can save a new file in the user's ~/.claude directory, e.g.: + - ~/.claude/statusline-command.sh and reference that file in the settings. + +3. Update the user's ~/.claude/settings.json with: + { + "statusLine": { + "type": "command", + "command": "your_command_here" + } + } + +4. If ~/.claude/settings.json is a symlink, update the target file instead. + +Guidelines: +- Preserve existing settings when updating +- Return a summary of what was configured, including the name of the script file if used +- If the script includes git commands, they should skip optional locks +- IMPORTANT: At the end of your response, inform the parent agent that this "statusline-setup" agent must be used for further status line changes. + Also ensure that the user is informed that they can ask Claude to continue to make changes to the status line. +` + +export const STATUSLINE_SETUP_AGENT: BuiltInAgentDefinition = { + agentType: 'statusline-setup', + whenToUse: + "Use this agent to configure the user's Claude Code status line setting.", + tools: ['Read', 'Edit'], + source: 'built-in', + baseDir: 'built-in', + model: 'sonnet', + color: 'orange', + getSystemPrompt: () => STATUSLINE_SYSTEM_PROMPT, +} diff --git a/packages/builtin-tools/src/tools/AgentTool/built-in/verificationAgent.ts b/packages/builtin-tools/src/tools/AgentTool/built-in/verificationAgent.ts new file mode 100644 index 000000000..3f7f2842a --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/built-in/verificationAgent.ts @@ -0,0 +1,152 @@ +import { BASH_TOOL_NAME } from 'builtin-tools/tools/BashTool/toolName.js' +import { EXIT_PLAN_MODE_TOOL_NAME } from 'builtin-tools/tools/ExitPlanModeTool/constants.js' +import { FILE_EDIT_TOOL_NAME } from 'builtin-tools/tools/FileEditTool/constants.js' +import { FILE_WRITE_TOOL_NAME } from 'builtin-tools/tools/FileWriteTool/prompt.js' +import { NOTEBOOK_EDIT_TOOL_NAME } from 'builtin-tools/tools/NotebookEditTool/constants.js' +import { WEB_FETCH_TOOL_NAME } from 'builtin-tools/tools/WebFetchTool/prompt.js' +import { AGENT_TOOL_NAME } from '../constants.js' +import type { BuiltInAgentDefinition } from '../loadAgentsDir.js' + +const VERIFICATION_SYSTEM_PROMPT = `You are a verification specialist. Your job is not to confirm the implementation works — it's to try to break it. + +You have two documented failure patterns. First, verification avoidance: when faced with a check, you find reasons not to run it — you read code, narrate what you would test, write "PASS," and move on. Second, being seduced by the first 80%: you see a polished UI or a passing test suite and feel inclined to pass it, not noticing half the buttons do nothing, the state vanishes on refresh, or the backend crashes on bad input. The first 80% is the easy part. Your entire value is in finding the last 20%. The caller may spot-check your commands by re-running them — if a PASS step has no command output, or output that doesn't match re-execution, your report gets rejected. + +=== CRITICAL: DO NOT MODIFY THE PROJECT === +You are STRICTLY PROHIBITED from: +- Creating, modifying, or deleting any files IN THE PROJECT DIRECTORY +- Installing dependencies or packages +- Running git write operations (add, commit, push) + +You MAY write ephemeral test scripts to a temp directory (/tmp or $TMPDIR) via ${BASH_TOOL_NAME} redirection when inline commands aren't sufficient — e.g., a multi-step race harness or a Playwright test. Clean up after yourself. + +Check your ACTUAL available tools rather than assuming from this prompt. You may have browser automation (mcp__claude-in-chrome__*, mcp__playwright__*), ${WEB_FETCH_TOOL_NAME}, or other MCP tools depending on the session — do not skip capabilities you didn't think to check for. + +=== WHAT YOU RECEIVE === +You will receive: the original task description, files changed, approach taken, and optionally a plan file path. + +=== VERIFICATION STRATEGY === +Adapt your strategy based on what was changed: + +**Frontend changes**: Start dev server → check your tools for browser automation (mcp__claude-in-chrome__*, mcp__playwright__*) and USE them to navigate, screenshot, click, and read console — do NOT say "needs a real browser" without attempting → curl a sample of page subresources (image-optimizer URLs like /_next/image, same-origin API routes, static assets) since HTML can serve 200 while everything it references fails → run frontend tests +**Backend/API changes**: Start server → curl/fetch endpoints → verify response shapes against expected values (not just status codes) → test error handling → check edge cases +**CLI/script changes**: Run with representative inputs → verify stdout/stderr/exit codes → test edge inputs (empty, malformed, boundary) → verify --help / usage output is accurate +**Infrastructure/config changes**: Validate syntax → dry-run where possible (terraform plan, kubectl apply --dry-run=server, docker build, nginx -t) → check env vars / secrets are actually referenced, not just defined +**Library/package changes**: Build → full test suite → import the library from a fresh context and exercise the public API as a consumer would → verify exported types match README/docs examples +**Bug fixes**: Reproduce the original bug → verify fix → run regression tests → check related functionality for side effects +**Mobile (iOS/Android)**: Clean build → install on simulator/emulator → dump accessibility/UI tree (idb ui describe-all / uiautomator dump), find elements by label, tap by tree coords, re-dump to verify; screenshots secondary → kill and relaunch to test persistence → check crash logs (logcat / device console) +**Data/ML pipeline**: Run with sample input → verify output shape/schema/types → test empty input, single row, NaN/null handling → check for silent data loss (row counts in vs out) +**Database migrations**: Run migration up → verify schema matches intent → run migration down (reversibility) → test against existing data, not just empty DB +**Refactoring (no behavior change)**: Existing test suite MUST pass unchanged → diff the public API surface (no new/removed exports) → spot-check observable behavior is identical (same inputs → same outputs) +**Other change types**: The pattern is always the same — (a) figure out how to exercise this change directly (run/call/invoke/deploy it), (b) check outputs against expectations, (c) try to break it with inputs/conditions the implementer didn't test. The strategies above are worked examples for common cases. + +=== REQUIRED STEPS (universal baseline) === +1. Read the project's CLAUDE.md / README for build/test commands and conventions. Check package.json / Makefile / pyproject.toml for script names. If the implementer pointed you to a plan or spec file, read it — that's the success criteria. +2. Run the build (if applicable). A broken build is an automatic FAIL. +3. Run the project's test suite (if it has one). Failing tests are an automatic FAIL. +4. Run linters/type-checkers if configured (eslint, tsc, mypy, etc.). +5. Check for regressions in related code. + +Then apply the type-specific strategy above. Match rigor to stakes: a one-off script doesn't need race-condition probes; production payments code needs everything. + +Test suite results are context, not evidence. Run the suite, note pass/fail, then move on to your real verification. The implementer is an LLM too — its tests may be heavy on mocks, circular assertions, or happy-path coverage that proves nothing about whether the system actually works end-to-end. + +=== RECOGNIZE YOUR OWN RATIONALIZATIONS === +You will feel the urge to skip checks. These are the exact excuses you reach for — recognize them and do the opposite: +- "The code looks correct based on my reading" — reading is not verification. Run it. +- "The implementer's tests already pass" — the implementer is an LLM. Verify independently. +- "This is probably fine" — probably is not verified. Run it. +- "Let me start the server and check the code" — no. Start the server and hit the endpoint. +- "I don't have a browser" — did you actually check for mcp__claude-in-chrome__* / mcp__playwright__*? If present, use them. If an MCP tool fails, troubleshoot (server running? selector right?). The fallback exists so you don't invent your own "can't do this" story. +- "This would take too long" — not your call. +If you catch yourself writing an explanation instead of a command, stop. Run the command. + +=== ADVERSARIAL PROBES (adapt to the change type) === +Functional tests confirm the happy path. Also try to break it: +- **Concurrency** (servers/APIs): parallel requests to create-if-not-exists paths — duplicate sessions? lost writes? +- **Boundary values**: 0, -1, empty string, very long strings, unicode, MAX_INT +- **Idempotency**: same mutating request twice — duplicate created? error? correct no-op? +- **Orphan operations**: delete/reference IDs that don't exist +These are seeds, not a checklist — pick the ones that fit what you're verifying. + +=== BEFORE ISSUING PASS === +Your report must include at least one adversarial probe you ran (concurrency, boundary, idempotency, orphan op, or similar) and its result — even if the result was "handled correctly." If all your checks are "returns 200" or "test suite passes," you have confirmed the happy path, not verified correctness. Go back and try to break something. + +=== BEFORE ISSUING FAIL === +You found something that looks broken. Before reporting FAIL, check you haven't missed why it's actually fine: +- **Already handled**: is there defensive code elsewhere (validation upstream, error recovery downstream) that prevents this? +- **Intentional**: does CLAUDE.md / comments / commit message explain this as deliberate? +- **Not actionable**: is this a real limitation but unfixable without breaking an external contract (stable API, protocol spec, backwards compat)? If so, note it as an observation, not a FAIL — a "bug" that can't be fixed isn't actionable. +Don't use these as excuses to wave away real issues — but don't FAIL on intentional behavior either. + +=== OUTPUT FORMAT (REQUIRED) === +Every check MUST follow this structure. A check without a Command run block is not a PASS — it's a skip. + +\`\`\` +### Check: [what you're verifying] +**Command run:** + [exact command you executed] +**Output observed:** + [actual terminal output — copy-paste, not paraphrased. Truncate if very long but keep the relevant part.] +**Result: PASS** (or FAIL — with Expected vs Actual) +\`\`\` + +Bad (rejected): +\`\`\` +### Check: POST /api/register validation +**Result: PASS** +Evidence: Reviewed the route handler in routes/auth.py. The logic correctly validates +email format and password length before DB insert. +\`\`\` +(No command run. Reading code is not verification.) + +Good: +\`\`\` +### Check: POST /api/register rejects short password +**Command run:** + curl -s -X POST localhost:8000/api/register -H 'Content-Type: application/json' \\ + -d '{"email":"t@t.co","password":"short"}' | python3 -m json.tool +**Output observed:** + { + "error": "password must be at least 8 characters" + } + (HTTP 400) +**Expected vs Actual:** Expected 400 with password-length error. Got exactly that. +**Result: PASS** +\`\`\` + +End with exactly this line (parsed by caller): + +VERDICT: PASS +or +VERDICT: FAIL +or +VERDICT: PARTIAL + +PARTIAL is for environmental limitations only (no test framework, tool unavailable, server can't start) — not for "I'm unsure whether this is a bug." If you can run the check, you must decide PASS or FAIL. + +Use the literal string \`VERDICT: \` followed by exactly one of \`PASS\`, \`FAIL\`, \`PARTIAL\`. No markdown bold, no punctuation, no variation. +- **FAIL**: include what failed, exact error output, reproduction steps. +- **PARTIAL**: what was verified, what could not be and why (missing tool/env), what the implementer should know.` + +const VERIFICATION_WHEN_TO_USE = + 'Use this agent to verify that implementation work is correct before reporting completion. Invoke after non-trivial tasks (3+ file edits, backend/API changes, infrastructure changes). Pass the ORIGINAL user task description, list of files changed, and approach taken. The agent runs builds, tests, linters, and checks to produce a PASS/FAIL/PARTIAL verdict with evidence.' + +export const VERIFICATION_AGENT: BuiltInAgentDefinition = { + agentType: 'verification', + whenToUse: VERIFICATION_WHEN_TO_USE, + color: 'red', + background: true, + disallowedTools: [ + AGENT_TOOL_NAME, + EXIT_PLAN_MODE_TOOL_NAME, + FILE_EDIT_TOOL_NAME, + FILE_WRITE_TOOL_NAME, + NOTEBOOK_EDIT_TOOL_NAME, + ], + source: 'built-in', + baseDir: 'built-in', + model: 'inherit', + getSystemPrompt: () => VERIFICATION_SYSTEM_PROMPT, + criticalSystemReminder_EXPERIMENTAL: + 'CRITICAL: This is a VERIFICATION-ONLY task. You CANNOT edit, write, or create files IN THE PROJECT DIRECTORY (tmp is allowed for ephemeral test scripts). You MUST end with VERDICT: PASS, VERDICT: FAIL, or VERDICT: PARTIAL.', +} diff --git a/packages/builtin-tools/src/tools/AgentTool/builtInAgents.ts b/packages/builtin-tools/src/tools/AgentTool/builtInAgents.ts new file mode 100644 index 000000000..5735a4692 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/builtInAgents.ts @@ -0,0 +1,72 @@ +import { feature } from 'bun:bundle' +import { getIsNonInteractiveSession } from 'src/bootstrap/state.js' +import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js' +import { isEnvTruthy } from 'src/utils/envUtils.js' +import { CLAUDE_CODE_GUIDE_AGENT } from './built-in/claudeCodeGuideAgent.js' +import { EXPLORE_AGENT } from './built-in/exploreAgent.js' +import { GENERAL_PURPOSE_AGENT } from './built-in/generalPurposeAgent.js' +import { PLAN_AGENT } from './built-in/planAgent.js' +import { STATUSLINE_SETUP_AGENT } from './built-in/statuslineSetup.js' +import { VERIFICATION_AGENT } from './built-in/verificationAgent.js' +import type { AgentDefinition } from './loadAgentsDir.js' + +export function areExplorePlanAgentsEnabled(): boolean { + if (feature('BUILTIN_EXPLORE_PLAN_AGENTS')) { + // 3P default: true — Bedrock/Vertex keep agents enabled (matches pre-experiment + // external behavior). A/B test treatment sets false to measure impact of removal. + return getFeatureValue_CACHED_MAY_BE_STALE('tengu_amber_stoat', true) + } + return false +} + +export function getBuiltInAgents(): AgentDefinition[] { + // Allow disabling all built-in agents via env var (useful for SDK users who want a blank slate) + // Only applies in noninteractive mode (SDK/API usage) + if ( + isEnvTruthy(process.env.CLAUDE_AGENT_SDK_DISABLE_BUILTIN_AGENTS) && + getIsNonInteractiveSession() + ) { + return [] + } + + // Use lazy require inside the function body to avoid circular dependency + // issues at module init time. The coordinatorMode module depends on tools + // which depend on AgentTool which imports this file. + if (feature('COORDINATOR_MODE')) { + if (isEnvTruthy(process.env.CLAUDE_CODE_COORDINATOR_MODE)) { + /* eslint-disable @typescript-eslint/no-require-imports */ + const { getCoordinatorAgents } = + require('src/coordinator/workerAgent.js') as typeof import('src/coordinator/workerAgent.js') + /* eslint-enable @typescript-eslint/no-require-imports */ + return getCoordinatorAgents() + } + } + + const agents: AgentDefinition[] = [ + GENERAL_PURPOSE_AGENT, + STATUSLINE_SETUP_AGENT, + ] + + if (areExplorePlanAgentsEnabled()) { + agents.push(EXPLORE_AGENT, PLAN_AGENT) + } + + // Include Code Guide agent for non-SDK entrypoints + const isNonSdkEntrypoint = + process.env.CLAUDE_CODE_ENTRYPOINT !== 'sdk-ts' && + process.env.CLAUDE_CODE_ENTRYPOINT !== 'sdk-py' && + process.env.CLAUDE_CODE_ENTRYPOINT !== 'sdk-cli' + + if (isNonSdkEntrypoint) { + agents.push(CLAUDE_CODE_GUIDE_AGENT) + } + + if ( + feature('VERIFICATION_AGENT') && + getFeatureValue_CACHED_MAY_BE_STALE('tengu_hive_evidence', false) + ) { + agents.push(VERIFICATION_AGENT) + } + + return agents +} diff --git a/packages/builtin-tools/src/tools/AgentTool/constants.ts b/packages/builtin-tools/src/tools/AgentTool/constants.ts new file mode 100644 index 000000000..b712dc23d --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/constants.ts @@ -0,0 +1,12 @@ +export const AGENT_TOOL_NAME = 'Agent' +// Legacy wire name for backward compat (permission rules, hooks, resumed sessions) +export const LEGACY_AGENT_TOOL_NAME = 'Task' +export const VERIFICATION_AGENT_TYPE = 'verification' + +// Built-in agents that run once and return a report — the parent never +// SendMessages back to continue them. Skip the agentId/SendMessage/usage +// trailer for these to save tokens (~135 chars × 34M Explore runs/week). +export const ONE_SHOT_BUILTIN_AGENT_TYPES: ReadonlySet = new Set([ + 'Explore', + 'Plan', +]) diff --git a/packages/builtin-tools/src/tools/AgentTool/forkSubagent.ts b/packages/builtin-tools/src/tools/AgentTool/forkSubagent.ts new file mode 100644 index 000000000..4ab95e66a --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/forkSubagent.ts @@ -0,0 +1,210 @@ +import { feature } from 'bun:bundle' +import type { BetaToolUseBlock } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs' +import { randomUUID } from 'crypto' +import { getIsNonInteractiveSession } from 'src/bootstrap/state.js' +import { + FORK_BOILERPLATE_TAG, + FORK_DIRECTIVE_PREFIX, +} from 'src/constants/xml.js' +import { isCoordinatorMode } from 'src/coordinator/coordinatorMode.js' +import type { + AssistantMessage, + Message as MessageType, +} from 'src/types/message.js' +import { logForDebugging } from 'src/utils/debug.js' +import { createUserMessage } from 'src/utils/messages.js' +import type { BuiltInAgentDefinition } from './loadAgentsDir.js' + +/** + * Fork subagent feature gate. + * + * When enabled: + * - `subagent_type` becomes optional on the Agent tool schema + * - Omitting `subagent_type` triggers an implicit fork: the child inherits + * the parent's full conversation context and system prompt + * - All agent spawns run in the background (async) for a unified + * `` interaction model + * - `/fork ` slash command is available + * + * Mutually exclusive with coordinator mode — coordinator already owns the + * orchestration role and has its own delegation model. + */ +export function isForkSubagentEnabled(): boolean { + if (feature('FORK_SUBAGENT')) { + if (isCoordinatorMode()) return false + if (getIsNonInteractiveSession()) return false + return true + } + return false +} + +/** Synthetic agent type name used for analytics when the fork path fires. */ +export const FORK_SUBAGENT_TYPE = 'fork' + +/** + * Synthetic agent definition for the fork path. + * + * Not registered in builtInAgents — used only when `!subagent_type` and the + * experiment is active. `tools: ['*']` with `useExactTools` means the fork + * child receives the parent's exact tool pool (for cache-identical API + * prefixes). `permissionMode: 'bubble'` surfaces permission prompts to the + * parent terminal. `model: 'inherit'` keeps the parent's model for context + * length parity. + * + * The getSystemPrompt here is unused: the fork path passes + * `override.systemPrompt` with the parent's already-rendered system prompt + * bytes, threaded via `toolUseContext.renderedSystemPrompt`. Reconstructing + * by re-calling getSystemPrompt() can diverge (GrowthBook cold→warm) and + * bust the prompt cache; threading the rendered bytes is byte-exact. + */ +export const FORK_AGENT = { + agentType: FORK_SUBAGENT_TYPE, + whenToUse: + 'Implicit fork — inherits full conversation context. Not selectable via subagent_type; triggered by omitting subagent_type when the fork experiment is active.', + tools: ['*'], + maxTurns: 200, + model: 'inherit', + permissionMode: 'bubble', + source: 'built-in', + baseDir: 'built-in', + getSystemPrompt: () => '', +} satisfies BuiltInAgentDefinition + +/** + * Guard against recursive forking. Fork children keep the Agent tool in their + * tool pool for cache-identical tool definitions, so we reject fork attempts + * at call time by detecting the fork boilerplate tag in conversation history. + */ +export function isInForkChild(messages: MessageType[]): boolean { + return messages.some(m => { + if (m.type !== 'user') return false + const content = m.message!.content + if (!Array.isArray(content)) return false + return content.some( + block => + block.type === 'text' && + block.text.includes(`<${FORK_BOILERPLATE_TAG}>`), + ) + }) +} + +/** Placeholder text used for all tool_result blocks in the fork prefix. + * Must be identical across all fork children for prompt cache sharing. */ +const FORK_PLACEHOLDER_RESULT = 'Fork started — processing in background' + +/** + * Build the forked conversation messages for the child agent. + * + * For prompt cache sharing, all fork children must produce byte-identical + * API request prefixes. This function: + * 1. Keeps the full parent assistant message (all tool_use blocks, thinking, text) + * 2. Builds a single user message with tool_results for every tool_use block + * using an identical placeholder, then appends a per-child directive text block + * + * Result: [...history, assistant(all_tool_uses), user(placeholder_results..., directive)] + * Only the final text block differs per child, maximizing cache hits. + */ +export function buildForkedMessages( + directive: string, + assistantMessage: AssistantMessage, +): MessageType[] { + // Clone the assistant message to avoid mutating the original, keeping all + // content blocks (thinking, text, and every tool_use) + const fullAssistantMessage: AssistantMessage = { + ...assistantMessage, + uuid: randomUUID(), + message: { + ...assistantMessage.message, + content: [...(Array.isArray(assistantMessage.message.content) ? assistantMessage.message.content : [])], + }, + } + + // Collect all tool_use blocks from the assistant message + const toolUseBlocks = (Array.isArray(assistantMessage.message.content) ? assistantMessage.message.content : []).filter( + (block): block is BetaToolUseBlock => block.type === 'tool_use', + ) + + if (toolUseBlocks.length === 0) { + logForDebugging( + `No tool_use blocks found in assistant message for fork directive: ${directive.slice(0, 50)}...`, + { level: 'error' }, + ) + return [ + createUserMessage({ + content: [ + { type: 'text' as const, text: buildChildMessage(directive) }, + ], + }), + ] + } + + // Build tool_result blocks for every tool_use, all with identical placeholder text + const toolResultBlocks = toolUseBlocks.map(block => ({ + type: 'tool_result' as const, + tool_use_id: block.id, + content: [ + { + type: 'text' as const, + text: FORK_PLACEHOLDER_RESULT, + }, + ], + })) + + // Build a single user message: all placeholder tool_results + the per-child directive + // TODO(smoosh): this text sibling creates a [tool_result, text] pattern on the wire + // (renders as \n\nHuman:). One-off per-child construction, + // not a repeated teacher, so low-priority. If we ever care, use smooshIntoToolResult + // from src/utils/messages.ts to fold the directive into the last tool_result.content. + const toolResultMessage = createUserMessage({ + content: [ + ...toolResultBlocks, + { + type: 'text' as const, + text: buildChildMessage(directive), + }, + ], + }) + + return [fullAssistantMessage, toolResultMessage] +} + +export function buildChildMessage(directive: string): string { + return `<${FORK_BOILERPLATE_TAG}> +STOP. READ THIS FIRST. + +You are a forked worker process. You are NOT the main agent. + +RULES (non-negotiable): +1. Your system prompt says "default to forking." IGNORE IT \u2014 that's for the parent. You ARE the fork. Do NOT spawn sub-agents; execute directly. +2. Do NOT converse, ask questions, or suggest next steps +3. Do NOT editorialize or add meta-commentary +4. USE your tools directly: Bash, Read, Write, etc. +5. If you modify files, commit your changes before reporting. Include the commit hash in your report. +6. Do NOT emit text between tool calls. Use tools silently, then report once at the end. +7. Stay strictly within your directive's scope. If you discover related systems outside your scope, mention them in one sentence at most — other workers cover those areas. +8. Keep your report under 500 words unless the directive specifies otherwise. Be factual and concise. +9. Your response MUST begin with "Scope:". No preamble, no thinking-out-loud. +10. REPORT structured facts, then stop + +Output format (plain text labels, not markdown headers): + Scope: + Result: + Key files: + Files changed: + Issues: + + +${FORK_DIRECTIVE_PREFIX}${directive}` +} + +/** + * Notice injected into fork children running in an isolated worktree. + * Tells the child to translate paths from the inherited context, re-read + * potentially stale files, and that its changes are isolated. + */ +export function buildWorktreeNotice( + parentCwd: string, + worktreeCwd: string, +): string { + return `You've inherited the conversation context above from a parent agent working in ${parentCwd}. You are operating in an isolated git worktree at ${worktreeCwd} — same repository, same relative file structure, separate working copy. Paths in the inherited context refer to the parent's working directory; translate them to your worktree root. Re-read files before editing if the parent may have modified them since they appear in the context. Your changes stay in this worktree and will not affect the parent's files.` +} diff --git a/packages/builtin-tools/src/tools/AgentTool/loadAgentsDir.ts b/packages/builtin-tools/src/tools/AgentTool/loadAgentsDir.ts new file mode 100644 index 000000000..30cf8bb91 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/loadAgentsDir.ts @@ -0,0 +1,755 @@ +import { feature } from 'bun:bundle' +import memoize from 'lodash-es/memoize.js' +import { basename } from 'path' +import type { SettingSource } from 'src/utils/settings/constants.js' +import { z } from 'zod/v4' +import { isAutoMemoryEnabled } from 'src/memdir/paths.js' +import { + type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + logEvent, +} from 'src/services/analytics/index.js' +import { + type McpServerConfig, + McpServerConfigSchema, +} from 'src/services/mcp/types.js' +import type { ToolUseContext } from 'src/Tool.js' +import { logForDebugging } from 'src/utils/debug.js' +import { + EFFORT_LEVELS, + type EffortValue, + parseEffortValue, +} from 'src/utils/effort.js' +import { isEnvTruthy } from 'src/utils/envUtils.js' +import { parsePositiveIntFromFrontmatter } from 'src/utils/frontmatterParser.js' +import { lazySchema } from 'src/utils/lazySchema.js' +import { logError } from 'src/utils/log.js' +import { + loadMarkdownFilesForSubdir, + parseAgentToolsFromFrontmatter, + parseSlashCommandToolsFromFrontmatter, +} from 'src/utils/markdownConfigLoader.js' +import { + PERMISSION_MODES, + type PermissionMode, +} from 'src/utils/permissions/PermissionMode.js' +import { + clearPluginAgentCache, + loadPluginAgents, +} from 'src/utils/plugins/loadPluginAgents.js' +import { HooksSchema, type HooksSettings } from 'src/utils/settings/types.js' +import { jsonStringify } from 'src/utils/slowOperations.js' +import { FILE_EDIT_TOOL_NAME } from '../FileEditTool/constants.js' +import { FILE_READ_TOOL_NAME } from '../FileReadTool/prompt.js' +import { FILE_WRITE_TOOL_NAME } from '../FileWriteTool/prompt.js' +import { + AGENT_COLORS, + type AgentColorName, + setAgentColor, +} from './agentColorManager.js' +import { type AgentMemoryScope, loadAgentMemoryPrompt } from './agentMemory.js' +import { + checkAgentMemorySnapshot, + initializeFromSnapshot, +} from './agentMemorySnapshot.js' +import { getBuiltInAgents } from './builtInAgents.js' + +// Type for MCP server specification in agent definitions +// Can be either a reference to an existing server by name, or an inline definition as { [name]: config } +export type AgentMcpServerSpec = + | string // Reference to existing server by name (e.g., "slack") + | { [name: string]: McpServerConfig } // Inline definition as { name: config } + +// Zod schema for agent MCP server specs +const AgentMcpServerSpecSchema = lazySchema(() => + z.union([ + z.string(), // Reference by name + z.record(z.string(), McpServerConfigSchema()), // Inline as { name: config } + ]), +) + +// Zod schemas for JSON agent validation +// Note: HooksSchema is lazy so the circular chain AppState -> loadAgentsDir -> settings/types +// is broken at module load time +const AgentJsonSchema = lazySchema(() => + z.object({ + description: z.string().min(1, 'Description cannot be empty'), + tools: z.array(z.string()).optional(), + disallowedTools: z.array(z.string()).optional(), + prompt: z.string().min(1, 'Prompt cannot be empty'), + model: z + .string() + .trim() + .min(1, 'Model cannot be empty') + .transform(m => (m.toLowerCase() === 'inherit' ? 'inherit' : m)) + .optional(), + effort: z.union([z.enum(EFFORT_LEVELS), z.number().int()]).optional(), + permissionMode: z.enum(PERMISSION_MODES).optional(), + mcpServers: z.array(AgentMcpServerSpecSchema()).optional(), + hooks: HooksSchema().optional(), + maxTurns: z.number().int().positive().optional(), + skills: z.array(z.string()).optional(), + initialPrompt: z.string().optional(), + memory: z.enum(['user', 'project', 'local']).optional(), + background: z.boolean().optional(), + isolation: (process.env.USER_TYPE === 'ant' + ? z.enum(['worktree', 'remote']) + : z.enum(['worktree']) + ).optional(), + }), +) + +const AgentsJsonSchema = lazySchema(() => + z.record(z.string(), AgentJsonSchema()), +) + +// Base type with common fields for all agents +export type BaseAgentDefinition = { + agentType: string + whenToUse: string + tools?: string[] + disallowedTools?: string[] + skills?: string[] // Skill names to preload (parsed from comma-separated frontmatter) + mcpServers?: AgentMcpServerSpec[] // MCP servers specific to this agent + hooks?: HooksSettings // Session-scoped hooks registered when agent starts + color?: AgentColorName + model?: string + effort?: EffortValue + permissionMode?: PermissionMode + maxTurns?: number // Maximum number of agentic turns before stopping + filename?: string // Original filename without .md extension (for user/project/managed agents) + baseDir?: string + criticalSystemReminder_EXPERIMENTAL?: string // Short message re-injected at every user turn + requiredMcpServers?: string[] // MCP server name patterns that must be configured for agent to be available + background?: boolean // Always run as background task when spawned + initialPrompt?: string // Prepended to the first user turn (slash commands work) + memory?: AgentMemoryScope // Persistent memory scope + isolation?: 'worktree' | 'remote' // Run in an isolated git worktree, or remotely in CCR (ant-only) + pendingSnapshotUpdate?: { snapshotTimestamp: string } + /** Omit CLAUDE.md hierarchy from the agent's userContext. Read-only agents + * (Explore, Plan) don't need commit/PR/lint guidelines — the main agent has + * full CLAUDE.md and interprets their output. Saves ~5-15 Gtok/week across + * 34M+ Explore spawns. Kill-switch: tengu_slim_subagent_claudemd. */ + omitClaudeMd?: boolean +} + +// Built-in agents - dynamic prompts only, no static systemPrompt field +export type BuiltInAgentDefinition = BaseAgentDefinition & { + source: 'built-in' + baseDir: 'built-in' + callback?: () => void + getSystemPrompt: (params: { + toolUseContext: Pick + }) => string +} + +// Custom agents from user/project/policy settings - prompt stored via closure +export type CustomAgentDefinition = BaseAgentDefinition & { + getSystemPrompt: () => string + source: SettingSource + filename?: string + baseDir?: string +} + +// Plugin agents - similar to custom but with plugin metadata, prompt stored via closure +export type PluginAgentDefinition = BaseAgentDefinition & { + getSystemPrompt: () => string + source: 'plugin' + filename?: string + plugin: string +} + +// Union type for all agent types +export type AgentDefinition = + | BuiltInAgentDefinition + | CustomAgentDefinition + | PluginAgentDefinition + +// Type guards for runtime type checking +export function isBuiltInAgent( + agent: AgentDefinition, +): agent is BuiltInAgentDefinition { + return agent.source === 'built-in' +} + +export function isCustomAgent( + agent: AgentDefinition, +): agent is CustomAgentDefinition { + return agent.source !== 'built-in' && agent.source !== 'plugin' +} + +export function isPluginAgent( + agent: AgentDefinition, +): agent is PluginAgentDefinition { + return agent.source === 'plugin' +} + +export type AgentDefinitionsResult = { + activeAgents: AgentDefinition[] + allAgents: AgentDefinition[] + failedFiles?: Array<{ path: string; error: string }> + allowedAgentTypes?: string[] +} + +export function getActiveAgentsFromList( + allAgents: AgentDefinition[], +): AgentDefinition[] { + const builtInAgents = allAgents.filter(a => a.source === 'built-in') + const pluginAgents = allAgents.filter(a => a.source === 'plugin') + const userAgents = allAgents.filter(a => a.source === 'userSettings') + const projectAgents = allAgents.filter(a => a.source === 'projectSettings') + const managedAgents = allAgents.filter(a => a.source === 'policySettings') + const flagAgents = allAgents.filter(a => a.source === 'flagSettings') + + const agentGroups = [ + builtInAgents, + pluginAgents, + userAgents, + projectAgents, + flagAgents, + managedAgents, + ] + + const agentMap = new Map() + + for (const agents of agentGroups) { + for (const agent of agents) { + agentMap.set(agent.agentType, agent) + } + } + + return Array.from(agentMap.values()) +} + +/** + * Checks if an agent's required MCP servers are available. + * Returns true if no requirements or all requirements are met. + * @param agent The agent to check + * @param availableServers List of available MCP server names (e.g., from mcp.clients) + */ +export function hasRequiredMcpServers( + agent: AgentDefinition, + availableServers: string[], +): boolean { + if (!agent.requiredMcpServers || agent.requiredMcpServers.length === 0) { + return true + } + // Each required pattern must match at least one available server (case-insensitive) + return agent.requiredMcpServers.every(pattern => + availableServers.some(server => + server.toLowerCase().includes(pattern.toLowerCase()), + ), + ) +} + +/** + * Filters agents based on MCP server requirements. + * Only returns agents whose required MCP servers are available. + * @param agents List of agents to filter + * @param availableServers List of available MCP server names + */ +export function filterAgentsByMcpRequirements( + agents: AgentDefinition[], + availableServers: string[], +): AgentDefinition[] { + return agents.filter(agent => hasRequiredMcpServers(agent, availableServers)) +} + +/** + * Check for and initialize agent memory from project snapshots. + * For agents with memory enabled, copies snapshot to local if no local memory exists. + * For agents with newer snapshots, logs a debug message (user prompt TODO). + */ +async function initializeAgentMemorySnapshots( + agents: CustomAgentDefinition[], +): Promise { + await Promise.all( + agents.map(async agent => { + if (agent.memory !== 'user') return + const result = await checkAgentMemorySnapshot( + agent.agentType, + agent.memory, + ) + switch (result.action) { + case 'initialize': + logForDebugging( + `Initializing ${agent.agentType} memory from project snapshot`, + ) + await initializeFromSnapshot( + agent.agentType, + agent.memory, + result.snapshotTimestamp!, + ) + break + case 'prompt-update': + agent.pendingSnapshotUpdate = { + snapshotTimestamp: result.snapshotTimestamp!, + } + logForDebugging( + `Newer snapshot available for ${agent.agentType} memory (snapshot: ${result.snapshotTimestamp})`, + ) + break + } + }), + ) +} + +export const getAgentDefinitionsWithOverrides = memoize( + async (cwd: string): Promise => { + // Simple mode: skip custom agents, only return built-ins + if (isEnvTruthy(process.env.CLAUDE_CODE_SIMPLE)) { + const builtInAgents = getBuiltInAgents() + return { + activeAgents: builtInAgents, + allAgents: builtInAgents, + } + } + + try { + const markdownFiles = await loadMarkdownFilesForSubdir('agents', cwd) + + const failedFiles: Array<{ path: string; error: string }> = [] + const customAgents = markdownFiles + .map(({ filePath, baseDir, frontmatter, content, source }) => { + const agent = parseAgentFromMarkdown( + filePath, + baseDir, + frontmatter, + content, + source, + ) + if (!agent) { + // Skip non-agent markdown files silently (e.g., reference docs + // co-located with agent definitions). Only report errors for files + // that look like agent attempts (have a 'name' field in frontmatter). + if (!frontmatter['name']) { + return null + } + const errorMsg = getParseError(frontmatter) + failedFiles.push({ path: filePath, error: errorMsg }) + logForDebugging( + `Failed to parse agent from ${filePath}: ${errorMsg}`, + ) + logEvent('tengu_agent_parse_error', { + error: + errorMsg as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + location: + source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, + }) + return null + } + return agent + }) + .filter(agent => agent !== null) + + // Kick off plugin agent loading concurrently with memory snapshot init — + // loadPluginAgents is memoized and takes no args, so it's independent. + // Join both so neither becomes a floating promise if the other throws. + let pluginAgentsPromise = loadPluginAgents() + if (feature('AGENT_MEMORY_SNAPSHOT') && isAutoMemoryEnabled()) { + const [pluginAgents_] = await Promise.all([ + pluginAgentsPromise, + initializeAgentMemorySnapshots(customAgents), + ]) + pluginAgentsPromise = Promise.resolve(pluginAgents_) + } + const pluginAgents = await pluginAgentsPromise + + const builtInAgents = getBuiltInAgents() + + const allAgentsList: AgentDefinition[] = [ + ...builtInAgents, + ...pluginAgents, + ...customAgents, + ] + + const activeAgents = getActiveAgentsFromList(allAgentsList) + + // Initialize colors for all active agents + for (const agent of activeAgents) { + if (agent.color) { + setAgentColor(agent.agentType, agent.color) + } + } + + return { + activeAgents, + allAgents: allAgentsList, + failedFiles: failedFiles.length > 0 ? failedFiles : undefined, + } + } catch (error) { + const errorMessage = + error instanceof Error ? error.message : String(error) + logForDebugging(`Error loading agent definitions: ${errorMessage}`) + logError(error) + // Even on error, return the built-in agents + const builtInAgents = getBuiltInAgents() + return { + activeAgents: builtInAgents, + allAgents: builtInAgents, + failedFiles: [{ path: 'unknown', error: errorMessage }], + } + } + }, +) + +export function clearAgentDefinitionsCache(): void { + getAgentDefinitionsWithOverrides.cache.clear?.() + clearPluginAgentCache() +} + +/** + * Helper to determine the specific parsing error for an agent file + */ +function getParseError(frontmatter: Record): string { + const agentType = frontmatter['name'] + const description = frontmatter['description'] + + if (!agentType || typeof agentType !== 'string') { + return 'Missing required "name" field in frontmatter' + } + + if (!description || typeof description !== 'string') { + return 'Missing required "description" field in frontmatter' + } + + return 'Unknown parsing error' +} + +/** + * Parse hooks from frontmatter using the HooksSchema + * @param frontmatter The frontmatter object containing potential hooks + * @param agentType The agent type for logging purposes + * @returns Parsed hooks settings or undefined if invalid/missing + */ +function parseHooksFromFrontmatter( + frontmatter: Record, + agentType: string, +): HooksSettings | undefined { + if (!frontmatter.hooks) { + return undefined + } + + const result = HooksSchema().safeParse(frontmatter.hooks) + if (!result.success) { + logForDebugging( + `Invalid hooks in agent '${agentType}': ${result.error.message}`, + ) + return undefined + } + return result.data +} + +/** + * Parses agent definition from JSON data + */ +export function parseAgentFromJson( + name: string, + definition: unknown, + source: SettingSource = 'flagSettings', +): CustomAgentDefinition | null { + try { + const parsed = AgentJsonSchema().parse(definition) + + let tools = parseAgentToolsFromFrontmatter(parsed.tools) + + // If memory is enabled, inject Write/Edit/Read tools for memory access + if (isAutoMemoryEnabled() && parsed.memory && tools !== undefined) { + const toolSet = new Set(tools) + for (const tool of [ + FILE_WRITE_TOOL_NAME, + FILE_EDIT_TOOL_NAME, + FILE_READ_TOOL_NAME, + ]) { + if (!toolSet.has(tool)) { + tools = [...tools, tool] + } + } + } + + const disallowedTools = + parsed.disallowedTools !== undefined + ? parseAgentToolsFromFrontmatter(parsed.disallowedTools) + : undefined + + const systemPrompt = parsed.prompt + + const agent: CustomAgentDefinition = { + agentType: name, + whenToUse: parsed.description, + ...(tools !== undefined ? { tools } : {}), + ...(disallowedTools !== undefined ? { disallowedTools } : {}), + getSystemPrompt: () => { + if (isAutoMemoryEnabled() && parsed.memory) { + return ( + systemPrompt + '\n\n' + loadAgentMemoryPrompt(name, parsed.memory) + ) + } + return systemPrompt + }, + source, + ...(parsed.model ? { model: parsed.model } : {}), + ...(parsed.effort !== undefined ? { effort: parsed.effort } : {}), + ...(parsed.permissionMode + ? { permissionMode: parsed.permissionMode } + : {}), + ...(parsed.mcpServers && parsed.mcpServers.length > 0 + ? { mcpServers: parsed.mcpServers } + : {}), + ...(parsed.hooks ? { hooks: parsed.hooks } : {}), + ...(parsed.maxTurns !== undefined ? { maxTurns: parsed.maxTurns } : {}), + ...(parsed.skills && parsed.skills.length > 0 + ? { skills: parsed.skills } + : {}), + ...(parsed.initialPrompt ? { initialPrompt: parsed.initialPrompt } : {}), + ...(parsed.background ? { background: parsed.background } : {}), + ...(parsed.memory ? { memory: parsed.memory } : {}), + ...(parsed.isolation ? { isolation: parsed.isolation } : {}), + } + + return agent + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + logForDebugging(`Error parsing agent '${name}' from JSON: ${errorMessage}`) + logError(error) + return null + } +} + +/** + * Parses multiple agents from a JSON object + */ +export function parseAgentsFromJson( + agentsJson: unknown, + source: SettingSource = 'flagSettings', +): AgentDefinition[] { + try { + const parsed = AgentsJsonSchema().parse(agentsJson) + return Object.entries(parsed) + .map(([name, def]) => parseAgentFromJson(name, def, source)) + .filter((agent): agent is CustomAgentDefinition => agent !== null) + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + logForDebugging(`Error parsing agents from JSON: ${errorMessage}`) + logError(error) + return [] + } +} + +/** + * Parses agent definition from markdown file data + */ +export function parseAgentFromMarkdown( + filePath: string, + baseDir: string, + frontmatter: Record, + content: string, + source: SettingSource, +): CustomAgentDefinition | null { + try { + const agentType = frontmatter['name'] + let whenToUse = frontmatter['description'] as string + + // Validate required fields — silently skip files without any agent + // frontmatter (they're likely co-located reference documentation) + if (!agentType || typeof agentType !== 'string') { + return null + } + if (!whenToUse || typeof whenToUse !== 'string') { + logForDebugging( + `Agent file ${filePath} is missing required 'description' in frontmatter`, + ) + return null + } + + // Unescape newlines in whenToUse that were escaped for YAML parsing + whenToUse = whenToUse.replace(/\\n/g, '\n') + + const color = frontmatter['color'] as AgentColorName | undefined + const modelRaw = frontmatter['model'] + let model: string | undefined + if (typeof modelRaw === 'string' && modelRaw.trim().length > 0) { + const trimmed = modelRaw.trim() + model = trimmed.toLowerCase() === 'inherit' ? 'inherit' : trimmed + } + + // Parse background flag + const backgroundRaw = frontmatter['background'] + + if ( + backgroundRaw !== undefined && + backgroundRaw !== 'true' && + backgroundRaw !== 'false' && + backgroundRaw !== true && + backgroundRaw !== false + ) { + logForDebugging( + `Agent file ${filePath} has invalid background value '${backgroundRaw}'. Must be 'true', 'false', or omitted.`, + ) + } + + const background = + backgroundRaw === 'true' || backgroundRaw === true ? true : undefined + + // Parse memory scope + const VALID_MEMORY_SCOPES: AgentMemoryScope[] = ['user', 'project', 'local'] + const memoryRaw = frontmatter['memory'] as string | undefined + let memory: AgentMemoryScope | undefined + if (memoryRaw !== undefined) { + if (VALID_MEMORY_SCOPES.includes(memoryRaw as AgentMemoryScope)) { + memory = memoryRaw as AgentMemoryScope + } else { + logForDebugging( + `Agent file ${filePath} has invalid memory value '${memoryRaw}'. Valid options: ${VALID_MEMORY_SCOPES.join(', ')}`, + ) + } + } + + // Parse isolation mode. 'remote' is ant-only; external builds reject it at parse time. + type IsolationMode = 'worktree' | 'remote' + const VALID_ISOLATION_MODES: readonly IsolationMode[] = + process.env.USER_TYPE === 'ant' ? ['worktree', 'remote'] : ['worktree'] + const isolationRaw = frontmatter['isolation'] as string | undefined + let isolation: IsolationMode | undefined + if (isolationRaw !== undefined) { + if (VALID_ISOLATION_MODES.includes(isolationRaw as IsolationMode)) { + isolation = isolationRaw as IsolationMode + } else { + logForDebugging( + `Agent file ${filePath} has invalid isolation value '${isolationRaw}'. Valid options: ${VALID_ISOLATION_MODES.join(', ')}`, + ) + } + } + + // Parse effort from frontmatter (supports string levels and integers) + const effortRaw = frontmatter['effort'] + const parsedEffort = + effortRaw !== undefined ? parseEffortValue(effortRaw) : undefined + + if (effortRaw !== undefined && parsedEffort === undefined) { + logForDebugging( + `Agent file ${filePath} has invalid effort '${effortRaw}'. Valid options: ${EFFORT_LEVELS.join(', ')} or an integer`, + ) + } + + // Parse permissionMode from frontmatter + const permissionModeRaw = frontmatter['permissionMode'] as + | string + | undefined + const isValidPermissionMode = + permissionModeRaw && + (PERMISSION_MODES as readonly string[]).includes(permissionModeRaw) + + if (permissionModeRaw && !isValidPermissionMode) { + const errorMsg = `Agent file ${filePath} has invalid permissionMode '${permissionModeRaw}'. Valid options: ${PERMISSION_MODES.join(', ')}` + logForDebugging(errorMsg) + } + + // Parse maxTurns from frontmatter + const maxTurnsRaw = frontmatter['maxTurns'] + const maxTurns = parsePositiveIntFromFrontmatter(maxTurnsRaw) + if (maxTurnsRaw !== undefined && maxTurns === undefined) { + logForDebugging( + `Agent file ${filePath} has invalid maxTurns '${maxTurnsRaw}'. Must be a positive integer.`, + ) + } + + // Extract filename without extension + const filename = basename(filePath, '.md') + + // Parse tools from frontmatter + let tools = parseAgentToolsFromFrontmatter(frontmatter['tools']) + + // If memory is enabled, inject Write/Edit/Read tools for memory access + if (isAutoMemoryEnabled() && memory && tools !== undefined) { + const toolSet = new Set(tools) + for (const tool of [ + FILE_WRITE_TOOL_NAME, + FILE_EDIT_TOOL_NAME, + FILE_READ_TOOL_NAME, + ]) { + if (!toolSet.has(tool)) { + tools = [...tools, tool] + } + } + } + + // Parse disallowedTools from frontmatter + const disallowedToolsRaw = frontmatter['disallowedTools'] + const disallowedTools = + disallowedToolsRaw !== undefined + ? parseAgentToolsFromFrontmatter(disallowedToolsRaw) + : undefined + + // Parse skills from frontmatter + const skills = parseSlashCommandToolsFromFrontmatter(frontmatter['skills']) + + const initialPromptRaw = frontmatter['initialPrompt'] + const initialPrompt = + typeof initialPromptRaw === 'string' && initialPromptRaw.trim() + ? initialPromptRaw + : undefined + + // Parse mcpServers from frontmatter using same Zod validation as JSON agents + const mcpServersRaw = frontmatter['mcpServers'] + let mcpServers: AgentMcpServerSpec[] | undefined + if (Array.isArray(mcpServersRaw)) { + mcpServers = mcpServersRaw + .map(item => { + const result = AgentMcpServerSpecSchema().safeParse(item) + if (result.success) { + return result.data + } + logForDebugging( + `Agent file ${filePath} has invalid mcpServers item: ${jsonStringify(item)}. Error: ${result.error.message}`, + ) + return null + }) + .filter((item): item is AgentMcpServerSpec => item !== null) + } + + // Parse hooks from frontmatter + const hooks = parseHooksFromFrontmatter(frontmatter, agentType) + + const systemPrompt = content.trim() + const agentDef: CustomAgentDefinition = { + baseDir, + agentType: agentType, + whenToUse: whenToUse, + ...(tools !== undefined ? { tools } : {}), + ...(disallowedTools !== undefined ? { disallowedTools } : {}), + ...(skills !== undefined ? { skills } : {}), + ...(initialPrompt !== undefined ? { initialPrompt } : {}), + ...(mcpServers !== undefined && mcpServers.length > 0 + ? { mcpServers } + : {}), + ...(hooks !== undefined ? { hooks } : {}), + getSystemPrompt: () => { + if (isAutoMemoryEnabled() && memory) { + const memoryPrompt = loadAgentMemoryPrompt(agentType, memory) + return systemPrompt + '\n\n' + memoryPrompt + } + return systemPrompt + }, + source, + filename, + ...(color && typeof color === 'string' && AGENT_COLORS.includes(color) + ? { color } + : {}), + ...(model !== undefined ? { model } : {}), + ...(parsedEffort !== undefined ? { effort: parsedEffort } : {}), + ...(isValidPermissionMode + ? { permissionMode: permissionModeRaw as PermissionMode } + : {}), + ...(maxTurns !== undefined ? { maxTurns } : {}), + ...(background ? { background } : {}), + ...(memory ? { memory } : {}), + ...(isolation ? { isolation } : {}), + } + return agentDef + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + logForDebugging(`Error parsing agent from ${filePath}: ${errorMessage}`) + logError(error) + return null + } +} diff --git a/packages/builtin-tools/src/tools/AgentTool/prompt.ts b/packages/builtin-tools/src/tools/AgentTool/prompt.ts new file mode 100644 index 000000000..4198859a4 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/prompt.ts @@ -0,0 +1,287 @@ +import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js' +import { getSubscriptionType } from 'src/utils/auth.js' +import { hasEmbeddedSearchTools } from 'src/utils/embeddedTools.js' +import { isEnvDefinedFalsy, isEnvTruthy } from 'src/utils/envUtils.js' +import { isTeammate } from 'src/utils/teammate.js' +import { isInProcessTeammate } from 'src/utils/teammateContext.js' +import { FILE_READ_TOOL_NAME } from '../FileReadTool/prompt.js' +import { FILE_WRITE_TOOL_NAME } from '../FileWriteTool/prompt.js' +import { GLOB_TOOL_NAME } from '../GlobTool/prompt.js' +import { SEND_MESSAGE_TOOL_NAME } from '../SendMessageTool/constants.js' +import { AGENT_TOOL_NAME } from './constants.js' +import { isForkSubagentEnabled } from './forkSubagent.js' +import type { AgentDefinition } from './loadAgentsDir.js' + +function getToolsDescription(agent: AgentDefinition): string { + const { tools, disallowedTools } = agent + const hasAllowlist = tools && tools.length > 0 + const hasDenylist = disallowedTools && disallowedTools.length > 0 + + if (hasAllowlist && hasDenylist) { + // Both defined: filter allowlist by denylist to match runtime behavior + const denySet = new Set(disallowedTools) + const effectiveTools = tools.filter(t => !denySet.has(t)) + if (effectiveTools.length === 0) { + return 'None' + } + return effectiveTools.join(', ') + } else if (hasAllowlist) { + // Allowlist only: show the specific tools available + return tools.join(', ') + } else if (hasDenylist) { + // Denylist only: show "All tools except X, Y, Z" + return `All tools except ${disallowedTools.join(', ')}` + } + // No restrictions + return 'All tools' +} + +/** + * Format one agent line for the agent_listing_delta attachment message: + * `- type: whenToUse (Tools: ...)`. + */ +export function formatAgentLine(agent: AgentDefinition): string { + const toolsDescription = getToolsDescription(agent) + return `- ${agent.agentType}: ${agent.whenToUse} (Tools: ${toolsDescription})` +} + +/** + * Whether the agent list should be injected as an attachment message instead + * of embedded in the tool description. When true, getPrompt() returns a static + * description and attachments.ts emits an agent_listing_delta attachment. + * + * The dynamic agent list was ~10.2% of fleet cache_creation tokens: MCP async + * connect, /reload-plugins, or permission-mode changes mutate the list → + * description changes → full tool-schema cache bust. + * + * Override with CLAUDE_CODE_AGENT_LIST_IN_MESSAGES=true/false for testing. + */ +export function shouldInjectAgentListInMessages(): boolean { + if (isEnvTruthy(process.env.CLAUDE_CODE_AGENT_LIST_IN_MESSAGES)) return true + if (isEnvDefinedFalsy(process.env.CLAUDE_CODE_AGENT_LIST_IN_MESSAGES)) + return false + return getFeatureValue_CACHED_MAY_BE_STALE('tengu_agent_list_attach', false) +} + +export async function getPrompt( + agentDefinitions: AgentDefinition[], + isCoordinator?: boolean, + allowedAgentTypes?: string[], +): Promise { + // Filter agents by allowed types when Agent(x,y) restricts which agents can be spawned + const effectiveAgents = allowedAgentTypes + ? agentDefinitions.filter(a => allowedAgentTypes.includes(a.agentType)) + : agentDefinitions + + // Fork subagent feature: when enabled, insert the "When to fork" section + // (fork semantics, directive-style prompts) and swap in fork-aware examples. + const forkEnabled = isForkSubagentEnabled() + + const whenToForkSection = forkEnabled + ? ` + +## When to fork + +Fork yourself (omit \`subagent_type\`) when the intermediate tool output isn't worth keeping in your context. The criterion is qualitative \u2014 "will I need this output again" \u2014 not task size. +- **Research**: fork open-ended questions. If research can be broken into independent questions, launch parallel forks in one message. A fork beats a fresh subagent for this \u2014 it inherits context and shares your cache. +- **Implementation**: prefer to fork implementation work that requires more than a couple of edits. Do research before jumping to implementation. + +Forks are cheap because they share your prompt cache. Don't set \`model\` on a fork \u2014 a different model can't reuse the parent's cache. Pass a short \`name\` (one or two words, lowercase) so the user can see the fork in the teams panel and steer it mid-run. + +**Don't peek.** The tool result includes an \`output_file\` path — do not Read or tail it unless the user explicitly asks for a progress check. You get a completion notification; trust it. Reading the transcript mid-flight pulls the fork's tool noise into your context, which defeats the point of forking. + +**Don't race.** After launching, you know nothing about what the fork found. Never fabricate or predict fork results in any format — not as prose, summary, or structured output. The notification arrives as a user-role message in a later turn; it is never something you write yourself. If the user asks a follow-up before the notification lands, tell them the fork is still running — give status, not a guess. + +**Writing a fork prompt.** Since the fork inherits your context, the prompt is a *directive* — what to do, not what the situation is. Be specific about scope: what's in, what's out, what another agent is handling. Don't re-explain background. +` + : '' + + const writingThePromptSection = ` + +## Writing the prompt + +${forkEnabled ? 'When spawning a fresh agent (with a `subagent_type`), it starts with zero context. ' : ''}Brief the agent like a smart colleague who just walked into the room — it hasn't seen this conversation, doesn't know what you've tried, doesn't understand why this task matters. +- Explain what you're trying to accomplish and why. +- Describe what you've already learned or ruled out. +- Give enough context about the surrounding problem that the agent can make judgment calls rather than just following a narrow instruction. +- If you need a short response, say so ("report in under 200 words"). +- Lookups: hand over the exact command. Investigations: hand over the question — prescribed steps become dead weight when the premise is wrong. + +${forkEnabled ? 'For fresh agents, terse' : 'Terse'} command-style prompts produce shallow, generic work. + +**Never delegate understanding.** Don't write "based on your findings, fix the bug" or "based on the research, implement it." Those phrases push synthesis onto the agent instead of doing it yourself. Write prompts that prove you understood: include file paths, line numbers, what specifically to change. +` + + const forkExamples = `Example usage: + + +user: "What's left on this branch before we can ship?" +assistant: Forking this \u2014 it's a survey question. I want the punch list, not the git output in my context. +${AGENT_TOOL_NAME}({ + name: "ship-audit", + description: "Branch ship-readiness audit", + prompt: "Audit what's left before this branch can ship. Check: uncommitted changes, commits ahead of main, whether tests exist, whether the GrowthBook gate is wired up, whether CI-relevant files changed. Report a punch list \u2014 done vs. missing. Under 200 words." +}) +assistant: Ship-readiness audit running. + +Turn ends here. The coordinator knows nothing about the findings yet. What follows is a SEPARATE turn \u2014 the notification arrives from outside, as a user-role message. It is not something the coordinator writes. + +[later turn \u2014 notification arrives as user message] +assistant: Audit's back. Three blockers: no tests for the new prompt path, GrowthBook gate wired but not in build_flags.yaml, and one uncommitted file. + + + +user: "so is the gate wired up or not" + +User asks mid-wait. The audit fork was launched to answer exactly this, and it hasn't returned. The coordinator does not have this answer. Give status, not a fabricated result. + +assistant: Still waiting on the audit \u2014 that's one of the things it's checking. Should land shortly. + + + +user: "Can you get a second opinion on whether this migration is safe?" +assistant: I'll ask the code-reviewer agent — it won't see my analysis, so it can give an independent read. + +A subagent_type is specified, so the agent starts fresh. It needs full context in the prompt. The briefing explains what to assess and why. + +${AGENT_TOOL_NAME}({ + name: "migration-review", + description: "Independent migration review", + subagent_type: "code-reviewer", + prompt: "Review migration 0042_user_schema.sql for safety. Context: we're adding a NOT NULL column to a 50M-row table. Existing rows get a backfill default. I want a second opinion on whether the backfill approach is safe under concurrent writes — I've checked locking behavior but want independent verification. Report: is this safe, and if not, what specifically breaks?" +}) + +` + + const currentExamples = `Example usage: + + +"test-runner": use this agent after you are done writing code to run tests +"greeting-responder": use this agent to respond to user greetings with a friendly joke + + + +user: "Please write a function that checks if a number is prime" +assistant: I'm going to use the ${FILE_WRITE_TOOL_NAME} tool to write the following code: + +function isPrime(n) { + if (n <= 1) return false + for (let i = 2; i * i <= n; i++) { + if (n % i === 0) return false + } + return true +} + + +Since a significant piece of code was written and the task was completed, now use the test-runner agent to run the tests + +assistant: Uses the ${AGENT_TOOL_NAME} tool to launch the test-runner agent + + + +user: "Hello" + +Since the user is greeting, use the greeting-responder agent to respond with a friendly joke + +assistant: "I'm going to use the ${AGENT_TOOL_NAME} tool to launch the greeting-responder agent" + +` + + // When the gate is on, the agent list lives in an agent_listing_delta + // attachment (see attachments.ts) instead of inline here. This keeps the + // tool description static across MCP/plugin/permission changes so the + // tools-block prompt cache doesn't bust every time an agent loads. + const listViaAttachment = shouldInjectAgentListInMessages() + + const agentListSection = listViaAttachment + ? `Available agent types are listed in messages in the conversation.` + : `Available agent types and the tools they have access to: +${effectiveAgents.map(agent => formatAgentLine(agent)).join('\n')}` + + // Shared core prompt used by both coordinator and non-coordinator modes + const shared = `Launch a new agent to handle complex, multi-step tasks autonomously. + +The ${AGENT_TOOL_NAME} tool launches specialized agents (subprocesses) that autonomously handle complex tasks. Each agent type has specific capabilities and tools available to it. + +${agentListSection} + +${ + forkEnabled + ? `When using the ${AGENT_TOOL_NAME} tool, specify a subagent_type to use a specialized agent, or omit it to fork yourself — a fork inherits your full conversation context.` + : `When using the ${AGENT_TOOL_NAME} tool, specify a subagent_type parameter to select which agent type to use. If omitted, the general-purpose agent is used.` +}` + + // Coordinator mode gets the slim prompt -- the coordinator system prompt + // already covers usage notes, examples, and when-not-to-use guidance. + if (isCoordinator) { + return shared + } + + // Ant-native builds alias find/grep to embedded bfs/ugrep and remove the + // dedicated Glob/Grep tools, so point at find via Bash instead. + const embedded = hasEmbeddedSearchTools() + const fileSearchHint = embedded + ? '`find` via the Bash tool' + : `the ${GLOB_TOOL_NAME} tool` + // The "class Foo" example is about content search. Non-embedded stays Glob + // (original intent: find-the-file-containing). Embedded gets grep because + // find -name doesn't look at file contents. + const contentSearchHint = embedded + ? '`grep` via the Bash tool' + : `the ${GLOB_TOOL_NAME} tool` + const whenNotToUseSection = forkEnabled + ? '' + : ` +When NOT to use the ${AGENT_TOOL_NAME} tool: +- If you want to read a specific file path, use the ${FILE_READ_TOOL_NAME} tool or ${fileSearchHint} instead of the ${AGENT_TOOL_NAME} tool, to find the match more quickly +- If you are searching for a specific class definition like "class Foo", use ${contentSearchHint} instead, to find the match more quickly +- If you are searching for code within a specific file or set of 2-3 files, use the ${FILE_READ_TOOL_NAME} tool instead of the ${AGENT_TOOL_NAME} tool, to find the match more quickly +- Other tasks that are not related to the agent descriptions above +` + + // When listing via attachment, the "launch multiple agents" note is in the + // attachment message (conditioned on subscription there). When inline, keep + // the existing per-call getSubscriptionType() check. + const concurrencyNote = + !listViaAttachment && getSubscriptionType() !== 'pro' + ? ` +- Launch multiple agents concurrently whenever possible, to maximize performance; to do that, use a single message with multiple tool uses` + : '' + + // Non-coordinator gets the full prompt with all sections + return `${shared} +${whenNotToUseSection} + +Usage notes: +- Always include a short description (3-5 words) summarizing what the agent will do${concurrencyNote} +- When the agent is done, it will return a single message back to you. The result returned by the agent is not visible to the user. To show the user the result, you should send a text message back to the user with a concise summary of the result.${ + // eslint-disable-next-line custom-rules/no-process-env-top-level + !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_BACKGROUND_TASKS) && + !isInProcessTeammate() && + !forkEnabled + ? ` +- You can optionally run agents in the background using the run_in_background parameter. When an agent runs in the background, you will be automatically notified when it completes — do NOT sleep, poll, or proactively check on its progress. Continue with other work or respond to the user instead. +- **Foreground vs background**: Use foreground (default) when you need the agent's results before you can proceed — e.g., research agents whose findings inform your next steps. Use background when you have genuinely independent work to do in parallel.` + : '' + } +- To continue a previously spawned agent, use ${SEND_MESSAGE_TOOL_NAME} with the agent's ID or name as the \`to\` field. The agent resumes with its full context preserved. ${forkEnabled ? 'Each fresh Agent invocation with a subagent_type starts without context — provide a complete task description.' : 'Each Agent invocation starts fresh — provide a complete task description.'} +- The agent's outputs should generally be trusted +- Clearly tell the agent whether you expect it to write code or just to do research (search, file reads, web fetches, etc.)${forkEnabled ? '' : ", since it is not aware of the user's intent"} +- If the agent description mentions that it should be used proactively, then you should try your best to use it without the user having to ask for it first. Use your judgement. +- If the user specifies that they want you to run agents "in parallel", you MUST send a single message with multiple ${AGENT_TOOL_NAME} tool use content blocks. For example, if you need to launch both a build-validator agent and a test-runner agent in parallel, send a single message with both tool calls. +- You can optionally set \`isolation: "worktree"\` to run the agent in a temporary git worktree, giving it an isolated copy of the repository. The worktree is automatically cleaned up if the agent makes no changes; if changes are made, the worktree path and branch are returned in the result.${ + process.env.USER_TYPE === 'ant' + ? `\n- You can set \`isolation: "remote"\` to run the agent in a remote CCR environment. This is always a background task; you'll be notified when it completes. Use for long-running tasks that need a fresh sandbox.` + : '' + }${ + isInProcessTeammate() + ? ` +- The run_in_background, name, team_name, and mode parameters are not available in this context. Only synchronous subagents are supported.` + : isTeammate() + ? ` +- The name, team_name, and mode parameters are not available in this context — teammates cannot spawn other teammates. Omit them to spawn a subagent.` + : '' + }${whenToForkSection}${writingThePromptSection} + +${forkEnabled ? forkExamples : currentExamples}` +} diff --git a/packages/builtin-tools/src/tools/AgentTool/resumeAgent.ts b/packages/builtin-tools/src/tools/AgentTool/resumeAgent.ts new file mode 100644 index 000000000..de6591e90 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/resumeAgent.ts @@ -0,0 +1,265 @@ +import { promises as fsp } from 'fs' +import { getSdkAgentProgressSummariesEnabled } from 'src/bootstrap/state.js' +import { getSystemPrompt } from 'src/constants/prompts.js' +import { isCoordinatorMode } from 'src/coordinator/coordinatorMode.js' +import type { CanUseToolFn } from 'src/hooks/useCanUseTool.js' +import type { ToolUseContext } from 'src/Tool.js' +import { registerAsyncAgent } from 'src/tasks/LocalAgentTask/LocalAgentTask.js' +import { assembleToolPool } from 'src/tools.js' +import { asAgentId } from 'src/types/ids.js' +import { runWithAgentContext } from 'src/utils/agentContext.js' +import { runWithCwdOverride } from 'src/utils/cwd.js' +import { logForDebugging } from 'src/utils/debug.js' +import { + createUserMessage, + filterOrphanedThinkingOnlyMessages, + filterUnresolvedToolUses, + filterWhitespaceOnlyAssistantMessages, +} from 'src/utils/messages.js' +import { getAgentModel } from 'src/utils/model/agent.js' +import { getQuerySourceForAgent } from 'src/utils/promptCategory.js' +import { + getAgentTranscript, + readAgentMetadata, +} from 'src/utils/sessionStorage.js' +import { buildEffectiveSystemPrompt } from 'src/utils/systemPrompt.js' +import type { SystemPrompt } from 'src/utils/systemPromptType.js' +import { getTaskOutputPath } from 'src/utils/task/diskOutput.js' +import { getParentSessionId } from 'src/utils/teammate.js' +import { reconstructForSubagentResume } from 'src/utils/toolResultStorage.js' +import { runAsyncAgentLifecycle } from './agentToolUtils.js' +import { GENERAL_PURPOSE_AGENT } from './built-in/generalPurposeAgent.js' +import { FORK_AGENT, isForkSubagentEnabled } from './forkSubagent.js' +import type { AgentDefinition } from './loadAgentsDir.js' +import { isBuiltInAgent } from './loadAgentsDir.js' +import { runAgent } from './runAgent.js' + +export type ResumeAgentResult = { + agentId: string + description: string + outputFile: string +} +export async function resumeAgentBackground({ + agentId, + prompt, + toolUseContext, + canUseTool, + invokingRequestId, +}: { + agentId: string + prompt: string + toolUseContext: ToolUseContext + canUseTool: CanUseToolFn + invokingRequestId?: string +}): Promise { + const startTime = Date.now() + const appState = toolUseContext.getAppState() + // In-process teammates get a no-op setAppState; setAppStateForTasks + // reaches the root store so task registration/progress/kill stay visible. + const rootSetAppState = + toolUseContext.setAppStateForTasks ?? toolUseContext.setAppState + const permissionMode = appState.toolPermissionContext.mode + + const [transcript, meta] = await Promise.all([ + getAgentTranscript(asAgentId(agentId)), + readAgentMetadata(asAgentId(agentId)), + ]) + if (!transcript) { + throw new Error(`No transcript found for agent ID: ${agentId}`) + } + const resumedMessages = filterWhitespaceOnlyAssistantMessages( + filterOrphanedThinkingOnlyMessages( + filterUnresolvedToolUses(transcript.messages), + ), + ) + const resumedReplacementState = reconstructForSubagentResume( + toolUseContext.contentReplacementState, + resumedMessages, + transcript.contentReplacements, + ) + // Best-effort: if the original worktree was removed externally, fall back + // to parent cwd rather than crashing on chdir later. + const resumedWorktreePath = meta?.worktreePath + ? await fsp.stat(meta.worktreePath).then( + s => (s.isDirectory() ? meta.worktreePath : undefined), + () => { + logForDebugging( + `Resumed worktree ${meta.worktreePath} no longer exists; falling back to parent cwd`, + ) + return undefined + }, + ) + : undefined + if (resumedWorktreePath) { + // Bump mtime so stale-worktree cleanup doesn't delete a just-resumed worktree (#22355) + const now = new Date() + await fsp.utimes(resumedWorktreePath, now, now) + } + + // Skip filterDeniedAgents re-gating — original spawn already passed permission checks + let selectedAgent: AgentDefinition + let isResumedFork = false + if (meta?.agentType === FORK_AGENT.agentType) { + selectedAgent = FORK_AGENT + isResumedFork = true + } else if (meta?.agentType) { + const found = toolUseContext.options.agentDefinitions.activeAgents.find( + a => a.agentType === meta.agentType, + ) + selectedAgent = found ?? GENERAL_PURPOSE_AGENT + } else { + selectedAgent = GENERAL_PURPOSE_AGENT + } + + const uiDescription = meta?.description ?? '(resumed)' + + let forkParentSystemPrompt: SystemPrompt | undefined + if (isResumedFork) { + if (toolUseContext.renderedSystemPrompt) { + forkParentSystemPrompt = toolUseContext.renderedSystemPrompt + } else { + const mainThreadAgentDefinition = appState.agent + ? appState.agentDefinitions.activeAgents.find( + a => a.agentType === appState.agent, + ) + : undefined + const additionalWorkingDirectories = Array.from( + appState.toolPermissionContext.additionalWorkingDirectories.keys(), + ) + const defaultSystemPrompt = await getSystemPrompt( + toolUseContext.options.tools, + toolUseContext.options.mainLoopModel, + additionalWorkingDirectories, + toolUseContext.options.mcpClients, + ) + forkParentSystemPrompt = buildEffectiveSystemPrompt({ + mainThreadAgentDefinition, + toolUseContext, + customSystemPrompt: toolUseContext.options.customSystemPrompt, + defaultSystemPrompt, + appendSystemPrompt: toolUseContext.options.appendSystemPrompt, + }) + } + if (!forkParentSystemPrompt) { + throw new Error( + 'Cannot resume fork agent: unable to reconstruct parent system prompt', + ) + } + } + + // Resolve model for analytics metadata (runAgent resolves its own internally) + const resolvedAgentModel = getAgentModel( + selectedAgent.model, + toolUseContext.options.mainLoopModel, + undefined, + permissionMode, + ) + + const workerPermissionContext = { + ...appState.toolPermissionContext, + mode: selectedAgent.permissionMode ?? 'acceptEdits', + } + const workerTools = isResumedFork + ? toolUseContext.options.tools + : assembleToolPool(workerPermissionContext, appState.mcp.tools) + + const runAgentParams: Parameters[0] = { + agentDefinition: selectedAgent, + promptMessages: [ + ...resumedMessages, + createUserMessage({ content: prompt }), + ], + toolUseContext, + canUseTool, + isAsync: true, + querySource: getQuerySourceForAgent( + selectedAgent.agentType, + isBuiltInAgent(selectedAgent), + ), + model: undefined, + // Fork resume: pass parent's system prompt (cache-identical prefix). + // Non-fork: undefined → runAgent recomputes under wrapWithCwd so + // getCwd() sees resumedWorktreePath. + override: isResumedFork + ? { systemPrompt: forkParentSystemPrompt } + : undefined, + availableTools: workerTools, + // Transcript already contains the parent context slice from the + // original fork. Re-supplying it would cause duplicate tool_use IDs. + forkContextMessages: undefined, + ...(isResumedFork && { useExactTools: true }), + // Re-persist so metadata survives runAgent's writeAgentMetadata overwrite + worktreePath: resumedWorktreePath, + description: meta?.description, + contentReplacementState: resumedReplacementState, + } + + // Skip name-registry write — original entry persists from the initial spawn + const agentBackgroundTask = registerAsyncAgent({ + agentId, + description: uiDescription, + prompt, + selectedAgent, + setAppState: rootSetAppState, + toolUseId: toolUseContext.toolUseId, + }) + + const metadata = { + prompt, + resolvedAgentModel, + isBuiltInAgent: isBuiltInAgent(selectedAgent), + startTime, + agentType: selectedAgent.agentType, + isAsync: true, + } + + const asyncAgentContext = { + agentId, + parentSessionId: getParentSessionId(), + agentType: 'subagent' as const, + subagentName: selectedAgent.agentType, + isBuiltIn: isBuiltInAgent(selectedAgent), + invokingRequestId, + invocationKind: 'resume' as const, + invocationEmitted: false, + } + + const wrapWithCwd = (fn: () => T): T => + resumedWorktreePath ? runWithCwdOverride(resumedWorktreePath, fn) : fn() + + void runWithAgentContext(asyncAgentContext, () => + wrapWithCwd(() => + runAsyncAgentLifecycle({ + taskId: agentBackgroundTask.agentId, + abortController: agentBackgroundTask.abortController!, + makeStream: onCacheSafeParams => + runAgent({ + ...runAgentParams, + override: { + ...runAgentParams.override, + agentId: asAgentId(agentBackgroundTask.agentId), + abortController: agentBackgroundTask.abortController!, + }, + onCacheSafeParams, + }), + metadata, + description: uiDescription, + toolUseContext, + rootSetAppState, + agentIdForCleanup: agentId, + enableSummarization: + isCoordinatorMode() || + isForkSubagentEnabled() || + getSdkAgentProgressSummariesEnabled(), + getWorktreeResult: async () => + resumedWorktreePath ? { worktreePath: resumedWorktreePath } : {}, + }), + ), + ) + + return { + agentId, + description: uiDescription, + outputFile: getTaskOutputPath(agentId), + } +} diff --git a/packages/builtin-tools/src/tools/AgentTool/runAgent.ts b/packages/builtin-tools/src/tools/AgentTool/runAgent.ts new file mode 100644 index 000000000..baeed9022 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/runAgent.ts @@ -0,0 +1,1000 @@ +import { feature } from 'bun:bundle' +import type { UUID } from 'crypto' +import { randomUUID } from 'crypto' +import uniqBy from 'lodash-es/uniqBy.js' +import { logForDebugging } from 'src/utils/debug.js' +import { getProjectRoot, getSessionId } from 'src/bootstrap/state.js' +import { getCommand, getSkillToolCommands, hasCommand } from 'src/commands.js' +import { + DEFAULT_AGENT_PROMPT, + enhanceSystemPromptWithEnvDetails, +} from 'src/constants/prompts.js' +import type { QuerySource } from 'src/constants/querySource.js' +import { getSystemContext, getUserContext } from 'src/context.js' +import type { CanUseToolFn } from 'src/hooks/useCanUseTool.js' +import { query } from 'src/query.js' +import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js' +import { getDumpPromptsPath } from 'src/services/api/dumpPrompts.js' +import { cleanupAgentTracking } from 'src/services/api/promptCacheBreakDetection.js' +import { + connectToServer, + fetchToolsForClient, +} from 'src/services/mcp/client.js' +import { getMcpConfigByName } from 'src/services/mcp/config.js' +import type { + MCPServerConnection, + ScopedMcpServerConfig, +} from 'src/services/mcp/types.js' +import type { Tool, Tools, ToolUseContext } from 'src/Tool.js' +import { killShellTasksForAgent } from 'src/tasks/LocalShellTask/killShellTasks.js' +import type { Command } from 'src/types/command.js' +import type { AgentId } from 'src/types/ids.js' +import type { + AssistantMessage, + Message, + ProgressMessage, + RequestStartEvent, + StreamEvent, + SystemCompactBoundaryMessage, + TombstoneMessage, + ToolUseSummaryMessage, + UserMessage, +} from 'src/types/message.js' +import { createAttachmentMessage } from 'src/utils/attachments.js' +import { AbortError } from 'src/utils/errors.js' +import { getDisplayPath } from 'src/utils/file.js' +import { + cloneFileStateCache, + createFileStateCacheWithSizeLimit, + READ_FILE_STATE_CACHE_SIZE, +} from 'src/utils/fileStateCache.js' +import { + type CacheSafeParams, + createSubagentContext, +} from 'src/utils/forkedAgent.js' +import { registerFrontmatterHooks } from 'src/utils/hooks/registerFrontmatterHooks.js' +import { clearSessionHooks } from 'src/utils/hooks/sessionHooks.js' +import { executeSubagentStartHooks } from 'src/utils/hooks.js' +import { createUserMessage } from 'src/utils/messages.js' +import { getAgentModel } from 'src/utils/model/agent.js' +import { getAPIProvider } from 'src/utils/model/providers.js' +import { + createSubagentTrace, + endTrace, + isLangfuseEnabled, +} from 'src/services/langfuse/index.js' +import type { ModelAlias } from 'src/utils/model/aliases.js' +import { + clearAgentTranscriptSubdir, + recordSidechainTranscript, + setAgentTranscriptSubdir, + writeAgentMetadata, +} from 'src/utils/sessionStorage.js' +import { + isRestrictedToPluginOnly, + isSourceAdminTrusted, +} from 'src/utils/settings/pluginOnlyPolicy.js' +import { + asSystemPrompt, + type SystemPrompt, +} from 'src/utils/systemPromptType.js' +import { + isPerfettoTracingEnabled, + registerAgent as registerPerfettoAgent, + unregisterAgent as unregisterPerfettoAgent, +} from 'src/utils/telemetry/perfettoTracing.js' +import type { ContentReplacementState } from 'src/utils/toolResultStorage.js' +import { createAgentId } from 'src/utils/uuid.js' +import { resolveAgentTools } from './agentToolUtils.js' +import { type AgentDefinition, isBuiltInAgent } from './loadAgentsDir.js' + +/** + * Initialize agent-specific MCP servers + * Agents can define their own MCP servers in their frontmatter that are additive + * to the parent's MCP clients. These servers are connected when the agent starts + * and cleaned up when the agent finishes. + * + * @param agentDefinition The agent definition with optional mcpServers + * @param parentClients MCP clients inherited from parent context + * @returns Merged clients (parent + agent-specific), agent MCP tools, and cleanup function + */ +async function initializeAgentMcpServers( + agentDefinition: AgentDefinition, + parentClients: MCPServerConnection[], +): Promise<{ + clients: MCPServerConnection[] + tools: Tools + cleanup: () => Promise +}> { + // If no agent-specific servers defined, return parent clients as-is + if (!agentDefinition.mcpServers?.length) { + return { + clients: parentClients, + tools: [], + cleanup: async () => {}, + } + } + + // When MCP is locked to plugin-only, skip frontmatter MCP servers for + // USER-CONTROLLED agents only. Plugin, built-in, and policySettings agents + // are admin-trusted — their frontmatter MCP is part of the admin-approved + // surface. Blocking them (as the first cut did) breaks plugin agents that + // legitimately need MCP, contradicting "plugin-provided always loads." + const agentIsAdminTrusted = isSourceAdminTrusted(agentDefinition.source) + if (isRestrictedToPluginOnly('mcp') && !agentIsAdminTrusted) { + logForDebugging( + `[Agent: ${agentDefinition.agentType}] Skipping MCP servers: strictPluginOnlyCustomization locks MCP to plugin-only (agent source: ${agentDefinition.source})`, + ) + return { + clients: parentClients, + tools: [], + cleanup: async () => {}, + } + } + + const agentClients: MCPServerConnection[] = [] + // Track which clients were newly created (inline definitions) vs. shared from parent + // Only newly created clients should be cleaned up when the agent finishes + const newlyCreatedClients: MCPServerConnection[] = [] + const agentTools: Tool[] = [] + + for (const spec of agentDefinition.mcpServers) { + let config: ScopedMcpServerConfig | null = null + let name: string + let isNewlyCreated = false + + if (typeof spec === 'string') { + // Reference by name - look up in existing MCP configs + // This uses the memoized connectToServer, so we may get a shared client + name = spec + config = getMcpConfigByName(spec) + if (!config) { + logForDebugging( + `[Agent: ${agentDefinition.agentType}] MCP server not found: ${spec}`, + { level: 'warn' }, + ) + continue + } + } else { + // Inline definition as { [name]: config } + // These are agent-specific servers that should be cleaned up + const entries = Object.entries(spec) + if (entries.length !== 1) { + logForDebugging( + `[Agent: ${agentDefinition.agentType}] Invalid MCP server spec: expected exactly one key`, + { level: 'warn' }, + ) + continue + } + const [serverName, serverConfig] = entries[0]! + name = serverName + config = { + ...serverConfig, + scope: 'dynamic' as const, + } as ScopedMcpServerConfig + isNewlyCreated = true + } + + // Connect to the server + const client = await connectToServer(name, config) + agentClients.push(client) + if (isNewlyCreated) { + newlyCreatedClients.push(client) + } + + // Fetch tools if connected + if (client.type === 'connected') { + const tools = await fetchToolsForClient(client) + agentTools.push(...tools) + logForDebugging( + `[Agent: ${agentDefinition.agentType}] Connected to MCP server '${name}' with ${tools.length} tools`, + ) + } else { + logForDebugging( + `[Agent: ${agentDefinition.agentType}] Failed to connect to MCP server '${name}': ${client.type}`, + { level: 'warn' }, + ) + } + } + + // Create cleanup function for agent-specific servers + // Only clean up newly created clients (inline definitions), not shared/referenced ones + // Shared clients (referenced by string name) are memoized and used by the parent context + const cleanup = async () => { + for (const client of newlyCreatedClients) { + if (client.type === 'connected') { + try { + await client.cleanup() + } catch (error) { + logForDebugging( + `[Agent: ${agentDefinition.agentType}] Error cleaning up MCP server '${client.name}': ${error}`, + { level: 'warn' }, + ) + } + } + } + } + + // Return merged clients (parent + agent-specific) and agent tools + return { + clients: [...parentClients, ...agentClients], + tools: agentTools, + cleanup, + } +} + +type QueryMessage = + | StreamEvent + | RequestStartEvent + | Message + | ToolUseSummaryMessage + | TombstoneMessage + +/** + * Type guard to check if a message from query() is a recordable Message type. + * Matches the types we want to record: assistant, user, progress, or system compact_boundary. + */ +function isRecordableMessage( + msg: QueryMessage, +): msg is + | AssistantMessage + | UserMessage + | ProgressMessage + | SystemCompactBoundaryMessage { + return ( + msg.type === 'assistant' || + msg.type === 'user' || + msg.type === 'progress' || + (msg.type === 'system' && + 'subtype' in msg && + msg.subtype === 'compact_boundary') + ) +} + +export async function* runAgent({ + agentDefinition, + promptMessages, + toolUseContext, + canUseTool, + isAsync, + canShowPermissionPrompts, + forkContextMessages, + querySource, + override, + model, + maxTurns, + preserveToolUseResults, + availableTools, + allowedTools, + onCacheSafeParams, + contentReplacementState, + useExactTools, + worktreePath, + description, + transcriptSubdir, + onQueryProgress, +}: { + agentDefinition: AgentDefinition + promptMessages: Message[] + toolUseContext: ToolUseContext + canUseTool: CanUseToolFn + isAsync: boolean + /** Whether this agent can show permission prompts. Defaults to !isAsync. + * Set to true for in-process teammates that run async but share the terminal. */ + canShowPermissionPrompts?: boolean + forkContextMessages?: Message[] + querySource: QuerySource + override?: { + userContext?: { [k: string]: string } + systemContext?: { [k: string]: string } + systemPrompt?: SystemPrompt + abortController?: AbortController + agentId?: AgentId + } + model?: ModelAlias + maxTurns?: number + /** Preserve toolUseResult on messages for subagents with viewable transcripts */ + preserveToolUseResults?: boolean + /** Precomputed tool pool for the worker agent. Computed by the caller + * (AgentTool.tsx) to avoid a circular dependency between runAgent and tools.ts. + * Always contains the full tool pool assembled with the worker's own permission + * mode, independent of the parent's tool restrictions. */ + availableTools: Tools + /** Tool permission rules to add to the agent's session allow rules. + * When provided, replaces ALL allow rules so the agent only has what's + * explicitly listed (parent approvals don't leak through). */ + allowedTools?: string[] + /** Optional callback invoked with CacheSafeParams after constructing the agent's + * system prompt, context, and tools. Used by background summarization to fork + * the agent's conversation for periodic progress summaries. */ + onCacheSafeParams?: (params: CacheSafeParams) => void + /** Replacement state reconstructed from a resumed sidechain transcript so + * the same tool results are re-replaced (prompt cache stability). When + * omitted, createSubagentContext clones the parent's state. */ + contentReplacementState?: ContentReplacementState + /** When true, use availableTools directly without filtering through + * resolveAgentTools(). Also inherits the parent's thinkingConfig and + * isNonInteractiveSession instead of overriding them. Used by the fork + * subagent path to produce byte-identical API request prefixes for + * prompt cache hits. */ + useExactTools?: boolean + /** Worktree path if the agent was spawned with isolation: "worktree". + * Persisted to metadata so resume can restore the correct cwd. */ + worktreePath?: string + /** Original task description from AgentTool input. Persisted to metadata + * so a resumed agent's notification can show the original description. */ + description?: string + /** Optional subdirectory under subagents/ to group this agent's transcript + * with related ones (e.g. workflows/ for workflow subagents). */ + transcriptSubdir?: string + /** Optional callback fired on every message yielded by query() — including + * stream_event deltas that runAgent otherwise drops. Use to detect liveness + * during long single-block streams (e.g. thinking) where no assistant + * message is yielded for >60s. */ + onQueryProgress?: () => void +}): AsyncGenerator { + // Track subagent usage for feature discovery + + const appState = toolUseContext.getAppState() + const permissionMode = appState.toolPermissionContext.mode + // Always-shared channel to the root AppState store. toolUseContext.setAppState + // is a no-op when the *parent* is itself an async agent (nested async→async), + // so session-scoped writes (hooks, bash tasks) must go through this instead. + const rootSetAppState = + toolUseContext.setAppStateForTasks ?? toolUseContext.setAppState + + const resolvedAgentModel = getAgentModel( + agentDefinition.model, + toolUseContext.options.mainLoopModel, + model, + permissionMode, + ) + + const agentId = override?.agentId ? override.agentId : createAgentId() + + // Route this agent's transcript into a grouping subdirectory if requested + // (e.g. workflow subagents write to subagents/workflows//). + if (transcriptSubdir) { + setAgentTranscriptSubdir(agentId, transcriptSubdir) + } + + // Register agent in Perfetto trace for hierarchy visualization + if (isPerfettoTracingEnabled()) { + const parentId = toolUseContext.agentId ?? getSessionId() + registerPerfettoAgent(agentId, agentDefinition.agentType, parentId) + } + + // Log API calls path for subagents (ant-only) + if (process.env.USER_TYPE === 'ant') { + logForDebugging( + `[Subagent ${agentDefinition.agentType}] API calls: ${getDisplayPath(getDumpPromptsPath(agentId))}`, + ) + } + + // Handle message forking for context sharing + // Filter out incomplete tool calls from parent messages to avoid API errors + const contextMessages: Message[] = forkContextMessages + ? filterIncompleteToolCalls(forkContextMessages) + : [] + const initialMessages: Message[] = [...contextMessages, ...promptMessages] + + const agentReadFileState = + forkContextMessages !== undefined + ? cloneFileStateCache(toolUseContext.readFileState) + : createFileStateCacheWithSizeLimit(READ_FILE_STATE_CACHE_SIZE) + + const [baseUserContext, baseSystemContext] = await Promise.all([ + override?.userContext ?? getUserContext(), + override?.systemContext ?? getSystemContext(), + ]) + + // Read-only agents (Explore, Plan) don't act on commit/PR/lint rules from + // CLAUDE.md — the main agent has full context and interprets their output. + // Dropping claudeMd here saves ~5-15 Gtok/week across 34M+ Explore spawns. + // Explicit override.userContext from callers is preserved untouched. + // Kill-switch defaults true; flip tengu_slim_subagent_claudemd=false to revert. + const shouldOmitClaudeMd = + agentDefinition.omitClaudeMd && + !override?.userContext && + getFeatureValue_CACHED_MAY_BE_STALE('tengu_slim_subagent_claudemd', true) + const { claudeMd: _omittedClaudeMd, ...userContextNoClaudeMd } = + baseUserContext + const resolvedUserContext = shouldOmitClaudeMd + ? userContextNoClaudeMd + : baseUserContext + + // Explore/Plan are read-only search agents — the parent-session-start + // gitStatus (up to 40KB, explicitly labeled stale) is dead weight. If they + // need git info they run `git status` themselves and get fresh data. + // Saves ~1-3 Gtok/week fleet-wide. + const { gitStatus: _omittedGitStatus, ...systemContextNoGit } = + baseSystemContext + const resolvedSystemContext = + agentDefinition.agentType === 'Explore' || + agentDefinition.agentType === 'Plan' + ? systemContextNoGit + : baseSystemContext + + // Override permission mode if agent defines one + // However, don't override if parent is in bypassPermissions or acceptEdits mode - those should always take precedence + // For async agents, also set shouldAvoidPermissionPrompts since they can't show UI + const agentPermissionMode = agentDefinition.permissionMode + const agentGetAppState = () => { + const state = toolUseContext.getAppState() + let toolPermissionContext = state.toolPermissionContext + + // Override permission mode if agent defines one (unless parent is bypassPermissions, acceptEdits, or auto) + if ( + agentPermissionMode && + state.toolPermissionContext.mode !== 'bypassPermissions' && + state.toolPermissionContext.mode !== 'acceptEdits' && + !( + feature('TRANSCRIPT_CLASSIFIER') && + state.toolPermissionContext.mode === 'auto' + ) + ) { + toolPermissionContext = { + ...toolPermissionContext, + mode: agentPermissionMode, + } + } + + // Set flag to auto-deny prompts for agents that can't show UI + // Use explicit canShowPermissionPrompts if provided, otherwise: + // - bubble mode: always show prompts (bubbles to parent terminal) + // - default: !isAsync (sync agents show prompts, async agents don't) + const shouldAvoidPrompts = + canShowPermissionPrompts !== undefined + ? !canShowPermissionPrompts + : agentPermissionMode === 'bubble' + ? false + : isAsync + if (shouldAvoidPrompts) { + toolPermissionContext = { + ...toolPermissionContext, + shouldAvoidPermissionPrompts: true, + } + } + + // For background agents that can show prompts, await automated checks + // (classifier, permission hooks) before showing the permission dialog. + // Since these are background agents, waiting is fine — the user should + // only be interrupted when automated checks can't resolve the permission. + // This applies to bubble mode (always) and explicit canShowPermissionPrompts. + if (isAsync && !shouldAvoidPrompts) { + toolPermissionContext = { + ...toolPermissionContext, + awaitAutomatedChecksBeforeDialog: true, + } + } + + // Scope tool permissions: when allowedTools is provided, use them as session rules. + // IMPORTANT: Preserve cliArg rules (from SDK's --allowedTools) since those are + // explicit permissions from the SDK consumer that should apply to all agents. + // Only clear session-level rules from the parent to prevent unintended leakage. + if (allowedTools !== undefined) { + toolPermissionContext = { + ...toolPermissionContext, + alwaysAllowRules: { + // Preserve SDK-level permissions from --allowedTools + cliArg: state.toolPermissionContext.alwaysAllowRules.cliArg, + // Use the provided allowedTools as session-level permissions + session: [...allowedTools], + }, + } + } + + // Override effort level if agent defines one + const effortValue = + agentDefinition.effort !== undefined + ? agentDefinition.effort + : state.effortValue + + if ( + toolPermissionContext === state.toolPermissionContext && + effortValue === state.effortValue + ) { + return state + } + return { + ...state, + toolPermissionContext, + effortValue, + } + } + + const resolvedTools = useExactTools + ? availableTools + : resolveAgentTools(agentDefinition, availableTools, isAsync).resolvedTools + + const additionalWorkingDirectories = Array.from( + appState.toolPermissionContext.additionalWorkingDirectories.keys(), + ) + + const agentSystemPrompt = override?.systemPrompt + ? override.systemPrompt + : asSystemPrompt( + await getAgentSystemPrompt( + agentDefinition, + toolUseContext, + resolvedAgentModel, + additionalWorkingDirectories, + resolvedTools, + ), + ) + + // Determine abortController: + // - Override takes precedence + // - Async agents get a new unlinked controller (runs independently) + // - Sync agents share parent's controller + const agentAbortController = override?.abortController + ? override.abortController + : isAsync + ? new AbortController() + : toolUseContext.abortController + + // Execute SubagentStart hooks and collect additional context + const additionalContexts: string[] = [] + for await (const hookResult of executeSubagentStartHooks( + agentId, + agentDefinition.agentType, + agentAbortController.signal, + )) { + if ( + hookResult.additionalContexts && + hookResult.additionalContexts.length > 0 + ) { + additionalContexts.push(...hookResult.additionalContexts) + } + } + + // Add SubagentStart hook context as a user message (consistent with SessionStart/UserPromptSubmit) + if (additionalContexts.length > 0) { + const contextMessage = createAttachmentMessage({ + type: 'hook_additional_context', + content: additionalContexts, + hookName: 'SubagentStart', + toolUseID: randomUUID(), + hookEvent: 'SubagentStart', + }) + initialMessages.push(contextMessage) + } + + // Register agent's frontmatter hooks (scoped to agent lifecycle) + // Pass isAgent=true to convert Stop hooks to SubagentStop (since subagents trigger SubagentStop) + // Same admin-trusted gate for frontmatter hooks: under ["hooks"] alone + // (skills/agents not locked), user agents still load — block their + // frontmatter-hook REGISTRATION here where source is known, rather than + // blanket-blocking all session hooks at execution time (which would + // also kill plugin agents' hooks). + const hooksAllowedForThisAgent = + !isRestrictedToPluginOnly('hooks') || + isSourceAdminTrusted(agentDefinition.source) + if (agentDefinition.hooks && hooksAllowedForThisAgent) { + registerFrontmatterHooks( + rootSetAppState, + agentId, + agentDefinition.hooks, + `agent '${agentDefinition.agentType}'`, + true, // isAgent - converts Stop to SubagentStop + ) + } + + // Preload skills from agent frontmatter + const skillsToPreload = agentDefinition.skills ?? [] + if (skillsToPreload.length > 0) { + const allSkills = await getSkillToolCommands(getProjectRoot()) + + // Filter valid skills and warn about missing ones + const validSkills: Array<{ + skillName: string + skill: (typeof allSkills)[0] & { type: 'prompt' } + }> = [] + + for (const skillName of skillsToPreload) { + // Resolve the skill name, trying multiple strategies: + // 1. Exact match (hasCommand checks name, userFacingName, aliases) + // 2. Fully-qualified with agent's plugin prefix (e.g., "my-skill" → "plugin:my-skill") + // 3. Suffix match on ":skillName" for plugin-namespaced skills + const resolvedName = resolveSkillName( + skillName, + allSkills, + agentDefinition, + ) + if (!resolvedName) { + logForDebugging( + `[Agent: ${agentDefinition.agentType}] Warning: Skill '${skillName}' specified in frontmatter was not found`, + { level: 'warn' }, + ) + continue + } + + const skill = getCommand(resolvedName, allSkills) + if (skill.type !== 'prompt') { + logForDebugging( + `[Agent: ${agentDefinition.agentType}] Warning: Skill '${skillName}' is not a prompt-based skill`, + { level: 'warn' }, + ) + continue + } + validSkills.push({ skillName, skill }) + } + + // Load all skill contents concurrently and add to initial messages + const { formatSkillLoadingMetadata } = await import( + 'src/utils/processUserInput/processSlashCommand.js' + ) + const loaded = await Promise.all( + validSkills.map(async ({ skillName, skill }) => ({ + skillName, + skill, + content: await skill.getPromptForCommand('', toolUseContext), + })), + ) + for (const { skillName, skill, content } of loaded) { + logForDebugging( + `[Agent: ${agentDefinition.agentType}] Preloaded skill '${skillName}'`, + ) + + // Add command-message metadata so the UI shows which skill is loading + const metadata = formatSkillLoadingMetadata( + skillName, + skill.progressMessage, + ) + + initialMessages.push( + createUserMessage({ + content: [{ type: 'text', text: metadata }, ...content], + isMeta: true, + }), + ) + } + } + + // Initialize agent-specific MCP servers (additive to parent's servers) + const { + clients: mergedMcpClients, + tools: agentMcpTools, + cleanup: mcpCleanup, + } = await initializeAgentMcpServers( + agentDefinition, + toolUseContext.options.mcpClients, + ) + + // Merge agent MCP tools with resolved agent tools, deduplicating by name. + // resolvedTools is already deduplicated (see resolveAgentTools), so skip + // the spread + uniqBy overhead when there are no agent-specific MCP tools. + const allTools = + agentMcpTools.length > 0 + ? uniqBy([...resolvedTools, ...agentMcpTools], 'name') + : resolvedTools + + // Build agent-specific options + const agentOptions: ToolUseContext['options'] = { + isNonInteractiveSession: useExactTools + ? toolUseContext.options.isNonInteractiveSession + : isAsync + ? true + : (toolUseContext.options.isNonInteractiveSession ?? false), + appendSystemPrompt: toolUseContext.options.appendSystemPrompt, + tools: allTools, + commands: [], + debug: toolUseContext.options.debug, + verbose: toolUseContext.options.verbose, + mainLoopModel: resolvedAgentModel, + // For fork children (useExactTools), inherit thinking config to match the + // parent's API request prefix for prompt cache hits. For regular + // sub-agents, disable thinking to control output token costs. + thinkingConfig: useExactTools + ? toolUseContext.options.thinkingConfig + : { type: 'disabled' as const }, + mcpClients: mergedMcpClients, + mcpResources: toolUseContext.options.mcpResources, + agentDefinitions: toolUseContext.options.agentDefinitions, + // Fork children (useExactTools path) need querySource on context.options + // for the recursive-fork guard at AgentTool.tsx call() — it checks + // options.querySource === 'agent:builtin:fork'. This survives autocompact + // (which rewrites messages, not context.options). Without this, the guard + // reads undefined and only the message-scan fallback fires — which + // autocompact defeats by replacing the fork-boilerplate message. + ...(useExactTools && { querySource }), + } + + // Create subagent context using shared helper + // - Sync agents share setAppState, setResponseLength, abortController with parent + // - Async agents are fully isolated (but with explicit unlinked abortController) + const agentToolUseContext = createSubagentContext(toolUseContext, { + options: agentOptions, + agentId, + agentType: agentDefinition.agentType, + messages: initialMessages, + readFileState: agentReadFileState, + abortController: agentAbortController, + getAppState: agentGetAppState, + // Sync agents share these callbacks with parent + shareSetAppState: !isAsync, + shareSetResponseLength: true, // Both sync and async contribute to response metrics + criticalSystemReminder_EXPERIMENTAL: + agentDefinition.criticalSystemReminder_EXPERIMENTAL, + contentReplacementState, + }) + + // Preserve tool use results for subagents with viewable transcripts (in-process teammates) + if (preserveToolUseResults) { + agentToolUseContext.preserveToolUseResults = true + } + + // Expose cache-safe params for background summarization (prompt cache sharing) + if (onCacheSafeParams) { + onCacheSafeParams({ + systemPrompt: agentSystemPrompt, + userContext: resolvedUserContext, + systemContext: resolvedSystemContext, + toolUseContext: agentToolUseContext, + forkContextMessages: initialMessages, + }) + } + + // Record initial messages before the query loop starts, plus the agentType + // so resume can route correctly when subagent_type is omitted. Both writes + // are fire-and-forget — persistence failure shouldn't block the agent. + void recordSidechainTranscript(initialMessages, agentId).catch(_err => + logForDebugging(`Failed to record sidechain transcript: ${_err}`), + ) + void writeAgentMetadata(agentId, { + agentType: agentDefinition.agentType, + ...(worktreePath && { worktreePath }), + ...(description && { description }), + }).catch(_err => logForDebugging(`Failed to write agent metadata: ${_err}`)) + + // Track the last recorded message UUID for parent chain continuity + let lastRecordedUuid: UUID | null = initialMessages.at(-1)?.uuid ?? null + + // Create Langfuse sub-agent trace (no-op if not configured). + // Sub-agent trace shares the same sessionId as the parent, so Langfuse + // groups them under the same Session view. + const subTrace = isLangfuseEnabled() + ? createSubagentTrace({ + sessionId: getSessionId(), + agentType: agentDefinition.agentType, + agentId, + model: resolvedAgentModel, + provider: getAPIProvider(), + input: initialMessages, + }) + : null + + // Attach sub-agent trace to toolUseContext so query() reuses it + if (subTrace) { + agentToolUseContext.langfuseTrace = subTrace + } + + try { + for await (const message of query({ + messages: initialMessages, + systemPrompt: agentSystemPrompt, + userContext: resolvedUserContext, + systemContext: resolvedSystemContext, + canUseTool, + toolUseContext: agentToolUseContext, + querySource, + maxTurns: maxTurns ?? agentDefinition.maxTurns, + })) { + onQueryProgress?.() + // Forward subagent API request starts to parent's metrics display + // so TTFT/OTPS update during subagent execution. + if ( + message.type === 'stream_event' && + (message as any).event.type === 'message_start' && + (message as any).ttftMs != null + ) { + toolUseContext.pushApiMetricsEntry?.((message as any).ttftMs) + continue + } + + // Yield attachment messages (e.g., structured_output) without recording them + if (message.type === 'attachment') { + // Handle max turns reached signal from query.ts + if ((message as any).attachment.type === 'max_turns_reached') { + logForDebugging( + `[Agent +: $ +{ + agentDefinition.agentType +} +] Reached max turns limit ($ +{ + (message as any).attachment.maxTurns +} +)`, + ) + break + } + yield message as Message + continue + } + + if (isRecordableMessage(message)) { + // Record only the new message with correct parent (O(1) per message) + await recordSidechainTranscript( + [message], + agentId, + lastRecordedUuid, + ).catch(err => + logForDebugging(`Failed to record sidechain transcript: ${err}`), + ) + if (message.type !== 'progress') { + lastRecordedUuid = message.uuid + } + yield message + } + } + + if (agentAbortController.signal.aborted) { + throw new AbortError() + } + + // Run callback if provided (only built-in agents have callbacks) + if (isBuiltInAgent(agentDefinition) && agentDefinition.callback) { + agentDefinition.callback() + } + } finally { + // End Langfuse sub-agent trace (no-op if not configured) + endTrace(subTrace) + // Clean up agent-specific MCP servers (runs on normal completion, abort, or error) + await mcpCleanup() + // Clean up agent's session hooks + if (agentDefinition.hooks) { + clearSessionHooks(rootSetAppState, agentId) + } + // Clean up prompt cache tracking state for this agent + if (feature('PROMPT_CACHE_BREAK_DETECTION')) { + cleanupAgentTracking(agentId) + } + // Release cloned file state cache memory + agentToolUseContext.readFileState.clear() + // Release the cloned fork context messages + initialMessages.length = 0 + // Release perfetto agent registry entry + unregisterPerfettoAgent(agentId) + // Release transcript subdir mapping + clearAgentTranscriptSubdir(agentId) + // Release this agent's todos entry. Without this, every subagent that + // called TodoWrite leaves a key in AppState.todos forever (even after all + // items complete, the value is [] but the key stays). Whale sessions + // spawn hundreds of agents; each orphaned key is a small leak that adds up. + rootSetAppState(prev => { + if (!(agentId in prev.todos)) return prev + const { [agentId]: _removed, ...todos } = prev.todos + return { ...prev, todos } + }) + // Kill any background bash tasks this agent spawned. Without this, a + // `run_in_background` shell loop (e.g. test fixture fake-logs.sh) outlives + // the agent as a PPID=1 zombie once the main session eventually exits. + killShellTasksForAgent(agentId, toolUseContext.getAppState, rootSetAppState) + /* eslint-disable @typescript-eslint/no-require-imports */ + if (feature('MONITOR_TOOL')) { + const mcpMod = + require('src/tasks/MonitorMcpTask/MonitorMcpTask.js') as typeof import('src/tasks/MonitorMcpTask/MonitorMcpTask.js') + mcpMod.killMonitorMcpTasksForAgent( + agentId, + toolUseContext.getAppState, + rootSetAppState, + ) + } + /* eslint-enable @typescript-eslint/no-require-imports */ + } +} + +/** + * Filters out assistant messages with incomplete tool calls (tool uses without results). + * This prevents API errors when sending messages with orphaned tool calls. + */ +export function filterIncompleteToolCalls(messages: Message[]): Message[] { + // Build a set of tool use IDs that have results + const toolUseIdsWithResults = new Set() + + for (const message of messages) { + if (message?.type === 'user') { + const userMessage = message as UserMessage + const content = userMessage.message.content + if (Array.isArray(content)) { + for (const block of content) { + if (block.type === 'tool_result' && block.tool_use_id) { + toolUseIdsWithResults.add(block.tool_use_id) + } + } + } + } + } + + // Filter out assistant messages that contain tool calls without results + return messages.filter(message => { + if (message?.type === 'assistant') { + const assistantMessage = message as AssistantMessage + const content = assistantMessage.message.content + if (Array.isArray(content)) { + // Check if this assistant message has any tool uses without results + const hasIncompleteToolCall = content.some( + block => + block.type === 'tool_use' && + block.id && + !toolUseIdsWithResults.has(block.id), + ) + // Exclude messages with incomplete tool calls + return !hasIncompleteToolCall + } + } + // Keep all non-assistant messages and assistant messages without tool calls + return true + }) +} + +async function getAgentSystemPrompt( + agentDefinition: AgentDefinition, + toolUseContext: Pick, + resolvedAgentModel: string, + additionalWorkingDirectories: string[], + resolvedTools: readonly Tool[], +): Promise { + const enabledToolNames = new Set(resolvedTools.map(t => t.name)) + try { + const agentPrompt = agentDefinition.getSystemPrompt({ toolUseContext }) + const prompts = [agentPrompt] + + return await enhanceSystemPromptWithEnvDetails( + prompts, + resolvedAgentModel, + additionalWorkingDirectories, + enabledToolNames, + ) + } catch (_error) { + return enhanceSystemPromptWithEnvDetails( + [DEFAULT_AGENT_PROMPT], + resolvedAgentModel, + additionalWorkingDirectories, + enabledToolNames, + ) + } +} + +/** + * Resolve a skill name from agent frontmatter to a registered command name. + * + * Plugin skills are registered with namespaced names (e.g., "my-plugin:my-skill") + * but agents reference them with bare names (e.g., "my-skill"). This function + * tries multiple resolution strategies: + * + * 1. Exact match via hasCommand (name, userFacingName, aliases) + * 2. Prefix with agent's plugin name (e.g., "my-skill" → "my-plugin:my-skill") + * 3. Suffix match — find any command whose name ends with ":skillName" + */ +function resolveSkillName( + skillName: string, + allSkills: Command[], + agentDefinition: AgentDefinition, +): string | null { + // 1. Direct match + if (hasCommand(skillName, allSkills)) { + return skillName + } + + // 2. Try prefixing with the agent's plugin name + // Plugin agents have agentType like "pluginName:agentName" + const pluginPrefix = agentDefinition.agentType.split(':')[0] + if (pluginPrefix) { + const qualifiedName = `${pluginPrefix}:${skillName}` + if (hasCommand(qualifiedName, allSkills)) { + return qualifiedName + } + } + + // 3. Suffix match — find a skill whose name ends with ":skillName" + const suffix = `:${skillName}` + const match = allSkills.find(cmd => cmd.name.endsWith(suffix)) + if (match) { + return match.name + } + + return null +} diff --git a/packages/builtin-tools/src/tools/AgentTool/src/Tool.ts b/packages/builtin-tools/src/tools/AgentTool/src/Tool.ts new file mode 100644 index 000000000..7e33e7efc --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/src/Tool.ts @@ -0,0 +1,4 @@ +// Auto-generated type stub — replace with real implementation +export type buildTool = any; +export type ToolDef = any; +export type toolMatchesName = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/src/components/ConfigurableShortcutHint.ts b/packages/builtin-tools/src/tools/AgentTool/src/components/ConfigurableShortcutHint.ts new file mode 100644 index 000000000..d68e6f6e0 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/src/components/ConfigurableShortcutHint.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type ConfigurableShortcutHint = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/src/components/CtrlOToExpand.ts b/packages/builtin-tools/src/tools/AgentTool/src/components/CtrlOToExpand.ts new file mode 100644 index 000000000..b8e3b0a62 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/src/components/CtrlOToExpand.ts @@ -0,0 +1,3 @@ +// Auto-generated type stub — replace with real implementation +export type CtrlOToExpand = any; +export type SubAgentProvider = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/src/components/design-system/Byline.ts b/packages/builtin-tools/src/tools/AgentTool/src/components/design-system/Byline.ts new file mode 100644 index 000000000..ed8c71384 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/src/components/design-system/Byline.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type Byline = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/src/components/design-system/KeyboardShortcutHint.ts b/packages/builtin-tools/src/tools/AgentTool/src/components/design-system/KeyboardShortcutHint.ts new file mode 100644 index 000000000..ab506bb31 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/src/components/design-system/KeyboardShortcutHint.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type KeyboardShortcutHint = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/src/types/message.ts b/packages/builtin-tools/src/tools/AgentTool/src/types/message.ts new file mode 100644 index 000000000..4b0a33f37 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/src/types/message.ts @@ -0,0 +1,3 @@ +// Auto-generated type stub — replace with real implementation +export type Message = any; +export type NormalizedUserMessage = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/src/utils/debug.ts b/packages/builtin-tools/src/tools/AgentTool/src/utils/debug.ts new file mode 100644 index 000000000..c64d5960c --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/src/utils/debug.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type logForDebugging = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/src/utils/promptCategory.ts b/packages/builtin-tools/src/tools/AgentTool/src/utils/promptCategory.ts new file mode 100644 index 000000000..207db7233 --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/src/utils/promptCategory.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type getQuerySourceForAgent = any; diff --git a/packages/builtin-tools/src/tools/AgentTool/src/utils/settings/constants.ts b/packages/builtin-tools/src/tools/AgentTool/src/utils/settings/constants.ts new file mode 100644 index 000000000..b82138d6a --- /dev/null +++ b/packages/builtin-tools/src/tools/AgentTool/src/utils/settings/constants.ts @@ -0,0 +1,2 @@ +// Auto-generated type stub — replace with real implementation +export type SettingSource = any; diff --git a/packages/builtin-tools/src/tools/AskUserQuestionTool/AskUserQuestionTool.tsx b/packages/builtin-tools/src/tools/AskUserQuestionTool/AskUserQuestionTool.tsx new file mode 100644 index 000000000..c2c964d97 --- /dev/null +++ b/packages/builtin-tools/src/tools/AskUserQuestionTool/AskUserQuestionTool.tsx @@ -0,0 +1,342 @@ +import { feature } from 'bun:bundle' +import * as React from 'react' +import { + getAllowedChannels, + getQuestionPreviewFormat, +} from 'src/bootstrap/state.js' +import { MessageResponse } from 'src/components/MessageResponse.js' +import { BLACK_CIRCLE } from 'src/constants/figures.js' +import { getModeColor } from 'src/utils/permissions/PermissionMode.js' +import { z } from 'zod/v4' +import { Box, Text } from '@anthropic/ink' +import type { Tool } from 'src/Tool.js' +import { buildTool, type ToolDef } from 'src/Tool.js' +import { lazySchema } from 'src/utils/lazySchema.js' +import { + ASK_USER_QUESTION_TOOL_CHIP_WIDTH, + ASK_USER_QUESTION_TOOL_NAME, + ASK_USER_QUESTION_TOOL_PROMPT, + DESCRIPTION, + PREVIEW_FEATURE_PROMPT, +} from './prompt.js' + +const questionOptionSchema = lazySchema(() => + z.object({ + label: z + .string() + .describe( + 'The display text for this option that the user will see and select. Should be concise (1-5 words) and clearly describe the choice.', + ), + description: z + .string() + .describe( + 'Explanation of what this option means or what will happen if chosen. Useful for providing context about trade-offs or implications.', + ), + preview: z + .string() + .optional() + .describe( + 'Optional preview content rendered when this option is focused. Use for mockups, code snippets, or visual comparisons that help users compare options. See the tool description for the expected content format.', + ), + }), +) + +const questionSchema = lazySchema(() => + z.object({ + question: z + .string() + .describe( + 'The complete question to ask the user. Should be clear, specific, and end with a question mark. Example: "Which library should we use for date formatting?" If multiSelect is true, phrase it accordingly, e.g. "Which features do you want to enable?"', + ), + header: z + .string() + .describe( + `Very short label displayed as a chip/tag (max ${ASK_USER_QUESTION_TOOL_CHIP_WIDTH} chars). Examples: "Auth method", "Library", "Approach".`, + ), + options: z + .array(questionOptionSchema()) + .min(2) + .max(4) + .describe( + `The available choices for this question. Must have 2-4 options. Each option should be a distinct, mutually exclusive choice (unless multiSelect is enabled). There should be no 'Other' option, that will be provided automatically.`, + ), + multiSelect: z + .boolean() + .default(false) + .describe( + 'Set to true to allow the user to select multiple options instead of just one. Use when choices are not mutually exclusive.', + ), + }), +) + +const annotationsSchema = lazySchema(() => { + const annotationSchema = z.object({ + preview: z + .string() + .optional() + .describe( + 'The preview content of the selected option, if the question used previews.', + ), + notes: z + .string() + .optional() + .describe('Free-text notes the user added to their selection.'), + }) + + return z + .record(z.string(), annotationSchema) + .optional() + .describe( + 'Optional per-question annotations from the user (e.g., notes on preview selections). Keyed by question text.', + ) +}) + +const UNIQUENESS_REFINE = { + check: (data: { + questions: { question: string; options: { label: string }[] }[] + }) => { + const questions = data.questions.map(q => q.question) + if (questions.length !== new Set(questions).size) { + return false + } + for (const question of data.questions) { + const labels = question.options.map(opt => opt.label) + if (labels.length !== new Set(labels).size) { + return false + } + } + return true + }, + message: + 'Question texts must be unique, option labels must be unique within each question', +} as const + +const commonFields = lazySchema(() => ({ + answers: z + .record(z.string(), z.string()) + .optional() + .describe('User answers collected by the permission component'), + annotations: annotationsSchema(), + metadata: z + .object({ + source: z + .string() + .optional() + .describe( + 'Optional identifier for the source of this question (e.g., "remember" for /remember command). Used for analytics tracking.', + ), + }) + .optional() + .describe( + 'Optional metadata for tracking and analytics purposes. Not displayed to user.', + ), +})) + +const inputSchema = lazySchema(() => + z + .strictObject({ + questions: z + .array(questionSchema()) + .min(1) + .max(4) + .describe('Questions to ask the user (1-4 questions)'), + ...commonFields(), + }) + .refine(UNIQUENESS_REFINE.check, { + message: UNIQUENESS_REFINE.message, + }), +) +type InputSchema = ReturnType + +const outputSchema = lazySchema(() => + z.object({ + questions: z + .array(questionSchema()) + .describe('The questions that were asked'), + answers: z + .record(z.string(), z.string()) + .describe( + 'The answers provided by the user (question text -> answer string; multi-select answers are comma-separated)', + ), + annotations: annotationsSchema(), + }), +) +type OutputSchema = ReturnType + +// SDK schemas are identical to internal schemas now that `preview` and +// `annotations` are public (configurable via `toolConfig.askUserQuestion`). +export const _sdkInputSchema = inputSchema +export const _sdkOutputSchema = outputSchema + +export type Question = z.infer> +export type QuestionOption = z.infer> +export type Output = z.infer + +function AskUserQuestionResultMessage({ + answers, +}: { + answers: Output['answers'] +}): React.ReactNode { + return ( + + + {BLACK_CIRCLE}  + User answered Claude's questions: + + + + {Object.entries(answers).map(([questionText, answer]) => ( + + · {questionText} → {answer} + + ))} + + + + ) +} + +export const AskUserQuestionTool: Tool = buildTool({ + name: ASK_USER_QUESTION_TOOL_NAME, + searchHint: 'prompt the user with a multiple-choice question', + maxResultSizeChars: 100_000, + shouldDefer: true, + async description() { + return DESCRIPTION + }, + async prompt() { + const format = getQuestionPreviewFormat() + if (format === undefined) { + // SDK consumer that hasn't opted into a preview format — omit preview + // guidance (they may not render the field at all). + return ASK_USER_QUESTION_TOOL_PROMPT + } + return ASK_USER_QUESTION_TOOL_PROMPT + PREVIEW_FEATURE_PROMPT[format] + }, + get inputSchema(): InputSchema { + return inputSchema() + }, + get outputSchema(): OutputSchema { + return outputSchema() + }, + userFacingName() { + return '' + }, + isEnabled() { + // When --channels is active the user is likely on Telegram/Discord, not + // watching the TUI. The multiple-choice dialog would hang with nobody at + // the keyboard. Channel permission relay already skips + // requiresUserInteraction() tools (interactiveHandler.ts) so there's + // no alternate approval path. + if ( + (feature('KAIROS') || feature('KAIROS_CHANNELS')) && + getAllowedChannels().length > 0 + ) { + return false + } + return true + }, + isConcurrencySafe() { + return true + }, + isReadOnly() { + return true + }, + toAutoClassifierInput(input) { + return input.questions.map(q => q.question).join(' | ') + }, + requiresUserInteraction() { + return true + }, + async validateInput({ questions }) { + if (getQuestionPreviewFormat() !== 'html') { + return { result: true } + } + for (const q of questions) { + for (const opt of q.options) { + const err = validateHtmlPreview(opt.preview) + if (err) { + return { + result: false, + message: `Option "${opt.label}" in question "${q.question}": ${err}`, + errorCode: 1, + } + } + } + } + return { result: true } + }, + async checkPermissions(input) { + return { + behavior: 'ask' as const, + message: 'Answer questions?', + updatedInput: input, + } + }, + renderToolUseMessage() { + return null + }, + renderToolUseProgressMessage() { + return null + }, + renderToolResultMessage({ answers }, _toolUseID) { + return + }, + renderToolUseRejectedMessage() { + return ( + + {BLACK_CIRCLE}  + User declined to answer questions + + ) + }, + renderToolUseErrorMessage() { + return null + }, + async call({ questions, answers = {}, annotations }, _context) { + return { + data: { questions, answers, ...(annotations && { annotations }) }, + } + }, + mapToolResultToToolResultBlockParam({ answers, annotations }, toolUseID) { + const answersText = Object.entries(answers) + .map(([questionText, answer]) => { + const annotation = annotations?.[questionText] + const parts = [`"${questionText}"="${answer}"`] + if (annotation?.preview) { + parts.push(`selected preview:\n${annotation.preview}`) + } + if (annotation?.notes) { + parts.push(`user notes: ${annotation.notes}`) + } + return parts.join(' ') + }) + .join(', ') + + return { + type: 'tool_result', + content: `User has answered your questions: ${answersText}. You can now continue with the user's answers in mind.`, + tool_use_id: toolUseID, + } + }, +} satisfies ToolDef) + +// Lightweight HTML fragment check. Not a parser — HTML5 parsers are +// error-recovering by spec and accept anything. We're checking model intent +// (did it emit HTML?) and catching the specific things we told it not to do. +function validateHtmlPreview(preview: string | undefined): string | null { + if (preview === undefined) return null + if (/<\s*(html|body|!doctype)\b/i.test(preview)) { + return 'preview must be an HTML fragment, not a full document (no , , or )' + } + // SDK consumers typically set this via innerHTML — disallow executable/style + // tags so a preview can't run code or restyle the host page. Inline event + // handlers (onclick etc.) are still possible; consumers should sanitize. + if (/<\s*(script|style)\b/i.test(preview)) { + return 'preview must not contain