diff --git a/.changeset/glob-truncation-and-backslash.md b/.changeset/glob-truncation-and-backslash.md new file mode 100644 index 00000000..a179ecf3 --- /dev/null +++ b/.changeset/glob-truncation-and-backslash.md @@ -0,0 +1,7 @@ +--- +"@moonshot-ai/kaos": patch +"@moonshot-ai/agent-core": patch +"@moonshot-ai/kimi-code": patch +--- + +Fix glob pattern backslash escaping and include match count in truncation messages. diff --git a/apps/kimi-code/src/tui/components/messages/tool-call.ts b/apps/kimi-code/src/tui/components/messages/tool-call.ts index d0fcc1f6..04992223 100644 --- a/apps/kimi-code/src/tui/components/messages/tool-call.ts +++ b/apps/kimi-code/src/tui/components/messages/tool-call.ts @@ -394,6 +394,22 @@ function extractKeyArgument( Agent: ['description', 'prompt'], }; + // Glob: concatenate multiple args into a single summary so the header + // shows pattern, optional explicit path, and include_dirs override. + if (toolName === 'Glob') { + const pattern = args['pattern']; + if (typeof pattern !== 'string' || pattern.length === 0) return null; + let summary = pattern; + const path = args['path']; + if (typeof path === 'string' && path.length > 0) { + summary += ` · ${makeWorkspaceRelativePath(path, workspaceDir)}`; + } + if (args['include_dirs'] === false) { + summary += ' · no dirs'; + } + return truncateArgValue('pattern', summary); + } + const candidates = keyMap[toolName] ?? Object.keys(args); for (const key of candidates) { const val = args[key]; diff --git a/packages/agent-core/src/tools/builtin/file/glob.md b/packages/agent-core/src/tools/builtin/file/glob.md index f26cc8a0..769164bf 100644 --- a/packages/agent-core/src/tools/builtin/file/glob.md +++ b/packages/agent-core/src/tools/builtin/file/glob.md @@ -2,15 +2,14 @@ Find files (and optionally directories) by glob pattern, sorted by modification Good patterns: - `*.ts` — files in the current directory matching an extension -- `src/**/*.ts` — recursive with a subdirectory anchor and extension -- `test_*.py` — files whose name starts with a literal prefix +- `src/**/*.ts` — recursive walk with a subdirectory anchor and extension +- `**/*.py` — recursive walk from the search root for an extension +- `*.{ts,tsx}` — brace expansion is supported; expanded into `*.ts` and `*.tsx` before walking +- `{src,test}/**/*.ts` — cartesian brace expansion is supported too -Rejected patterns (no literal anchor — nothing bounds the result set): -- `**`, `**/*`, `*/*` — pure wildcards. Add an extension or subdirectory to give the walk a concrete target. -- Anything that starts with `**/` (e.g. `**/*.md`, `**/main/*.py`). The leading `**/` has no literal anchor in front of it. Anchor it with a top-level subdirectory like `src/**/*.md`. -- `*.{ts,tsx}` — brace expansion is not supported. Issue two calls: `*.ts` and `*.tsx`. +Results are capped at the first 100 matching paths (walk order, not global modification-time order). If a search would return more, a truncation marker is appended with the count of matches seen so far. Refine the pattern (extension, subdirectory) when 100 is not enough, or call again with a narrower anchor. -Large-directory warning — avoid recursing into dependency/build output even with an anchor: +Large-directory caveat — avoid recursing into dependency / build output even with an anchor: - `node_modules/**/*.js`, `.venv/**/*.py`, `__pycache__/**`, `target/**` all match technically but typically produce thousands of results that truncate at the match cap and waste the caller context. - Prefer specific subpaths like `node_modules/react/src/**/*.js`. \ No newline at end of file + Prefer specific subpaths like `node_modules/react/src/**/*.js`. diff --git a/packages/agent-core/src/tools/builtin/file/glob.ts b/packages/agent-core/src/tools/builtin/file/glob.ts index c3787b3b..6b0da554 100644 --- a/packages/agent-core/src/tools/builtin/file/glob.ts +++ b/packages/agent-core/src/tools/builtin/file/glob.ts @@ -8,24 +8,26 @@ * search base to save tokens; `output.paths` keeps absolute paths so * downstream Read/Edit can consume them directly. * - * Safety rails: - * - Pure-wildcard patterns (nothing but `*` / `?` / `/`) are rejected - * because they have no literal anchor — they would enumerate every - * file under the search root and exhaust the caller's context on - * large trees. Examples: `**`, `** / *`, `** / **`, `* / *`. - * Constrained patterns (with any literal anchor such as an extension - * or subdirectory) are allowed — the literal bounds the result set. - * - Patterns using brace expansion (`{a,b,c}`) are rejected up-front - * because the underlying `_globWalk` treats `{` / `}` as literals, - * so such patterns would silently match zero files. + * Behaviour: + * - Brace expansion (`*.{ts,tsx}`, `{src,test}/**`) is expanded at + * this layer into a list of sub-patterns before handing each to + * `kaos.glob`. The kaos walker treats `{` / `}` as literals, so the + * fan-out has to happen here for any results to come back. Cartesian + * and one level of nesting are supported; unbalanced or comma-less + * braces fall through as literals. * - `path` is validated by `resolvePathAccess` in strict mode. Explicit * paths must be absolute and within the workspace roots. - * - match count is capped at `MAX_MATCHES`; a separate `YIELD_SAFETY_CAP` - * (MAX_MATCHES × 2) on the raw yield stream is a secondary belt that + * - Match count is capped at `MAX_MATCHES` (unique paths). A separate + * `YIELD_SAFETY_CAP` on the raw yield stream is a secondary belt that * still terminates the stream if the kaos layer's own symlink-cycle * detection were ever absent or bypassed. Primary cycle defense lives * in `packages/kaos/src/local.ts:_globWalk` via a path-local visited - * inode set. + * inode set. With brace expansion the legitimate yield volume scales + * with the number of sub-patterns, so the safety cap scales too. + * - Pre-rejection of pure-wildcard / `**`-leading patterns has been + * removed; the 100-match cap is the only safety against runaway + * enumeration. Callers are expected to add an anchor (extension, + * subdirectory) when 100 results would not be enough. */ import type { Kaos } from '@moonshot-ai/kaos'; @@ -38,7 +40,6 @@ import type { ExecutableToolResult, ToolExecution } from '../../../loop/types'; import { resolvePathAccessPath } from '../../policies/path-access'; import type { PathClass } from '../../policies/path-access'; import { toInputJsonSchema } from '../../support/input-schema'; -import { listDirectory } from '../../support/list-directory'; import { literalRulePattern, matchesGlobRuleSubject } from '../../support/rule-match'; import type { WorkspaceConfig } from '../../support/workspace'; import GLOB_DESCRIPTION from './glob.md'; @@ -62,7 +63,19 @@ export const GlobInputSchema = z.object({ export type GlobInput = z.Infer; -export const MAX_MATCHES = 1000; +export const MAX_MATCHES = 100; + +/** + * Hard upper bound on the number of sub-patterns a single brace expansion + * is allowed to produce. Generous enough for the common LLM patterns + * (`*.{ts,tsx,js,jsx,mjs,cjs}` etc.) while still keeping pathological + * cartesian inputs like `{a,b}{c,d}{e,f}{g,h}{i,j}{k,l}` (= 64) from + * fanning out unboundedly. Beyond this we fall through with the original + * pattern unexpanded — kaos would then treat the braces as literals and + * match zero, which is the right "obvious failure" signal for a pattern + * the model probably did not mean. + */ +const MAX_BRACE_EXPANSIONS = 64; /** * Path-shape hint appended to the tool description only on a Windows @@ -84,12 +97,10 @@ const S_IFDIR = 0o040000; /** * Tool-level description shown to the LLM at tool declaration time. - * Tells the model — before any round-trip — which patterns are - * accepted, which are rejected, and which directories are too large to - * recurse into. Patterns with a literal anchor before a double-star are - * allowed; pure-wildcard patterns (a bare double-star or a double-star - * followed by `/`) are rejected outright. On a Windows backend - * the description also carries `WINDOWS_PATH_HINT` (path-shape guidance). + * Tells the model — before any round-trip — which patterns are accepted, + * how brace expansion is handled, and which directories are too large to + * recurse into. On a Windows backend the description also carries + * `WINDOWS_PATH_HINT` (path-shape guidance). */ export class GlobTool implements BuiltinTool { readonly name = 'Glob' as const; @@ -116,10 +127,25 @@ export class GlobTool implements BuiltinTool { }); } const searchRoots = [path ?? this.workspace.workspaceDir]; + + const detailParts: string[] = []; + detailParts.push(`pattern: ${args.pattern}`); + if (args.path !== undefined) { + detailParts.push(`path: ${args.path}`); + } + if (args.include_dirs === false) { + detailParts.push('include_dirs: false'); + } + return { accesses: ToolAccesses.searchTree(searchRoots[0]!), description: `Searching ${args.pattern}`, - display: { kind: 'file_io', operation: 'glob', path: searchRoots[0]! }, + display: { + kind: 'file_io', + operation: 'glob', + path: searchRoots[0]!, + detail: detailParts.join(', '), + }, approvalRule: literalRulePattern(this.name, args.pattern), matchesRule: (ruleArgs) => matchesGlobRuleSubject(ruleArgs, args.pattern), execute: () => this.execution(args, searchRoots), @@ -127,58 +153,14 @@ export class GlobTool implements BuiltinTool { } private async execution(args: GlobInput, searchRoots: string[]): Promise { - if (startsWithDoubleStarPrefix(args.pattern)) { - let tree: string; - try { - tree = await listDirectory(this.kaos, this.workspace.workspaceDir); - } catch { - tree = '(listing unavailable)'; - } - return { - isError: true, - output: - `Pattern "${args.pattern}" starts with '**' which is not allowed — ` + - `the leading '**/' has no literal anchor in front of it and would ` + - `enumerate every file under the search root, typically exhausting ` + - `the caller's context on large trees. Use more specific patterns ` + - `instead, such as "src/**/*.py" or "test/**/*.py".\n\n` + - `Top of ${this.workspace.workspaceDir}:\n${tree}`, - }; - } - - if (isPureWildcard(args.pattern)) { - const allowedRoots = [this.workspace.workspaceDir, ...this.workspace.additionalDirs]; - const rootList = allowedRoots.map((d) => ` - ${d}`).join('\n'); - let tree: string; - try { - tree = await listDirectory(this.kaos, this.workspace.workspaceDir); - } catch { - tree = '(listing unavailable)'; - } - return { - isError: true, - output: - `Pattern "${args.pattern}" is a pure wildcard (only \`*\`, \`?\`, \`**\`, \`/\`) ` + - `and would enumerate every file under the search root — with no literal ` + - `anchor to bound the result set, this typically exhausts your context on ` + - `large trees. Add an extension ` + - `("${args.pattern === '**' || args.pattern === '**/*' ? '**/*.ts' : '**/*.md'}") ` + - `or a subdirectory ("src/**/*.ts") to constrain the walk.\n\n` + - `Allowed roots for explicit path searches:\n${rootList}\n\n` + - `Top of ${this.workspace.workspaceDir}:\n${tree}`, - }; - } - - if (containsBraceExpansion(args.pattern)) { - return { - isError: true, - output: - `Pattern "${args.pattern}" uses brace expansion (\`{a,b,...}\`), which ` + - `is not supported by this Glob tool. Split it into separate calls, ` + - `one pattern per alternative. For example, instead of "*.{ts,tsx}" ` + - `issue two calls: "*.ts" and "*.tsx".`, - }; - } + // Expand brace alternations into a list of sub-patterns the kaos + // walker can actually understand. `*.{ts,tsx}` → ["*.ts", "*.tsx"]; + // unbalanced or comma-less braces (`{abc}`, `{a,b`) fall through as + // a single-element list with the original pattern. When the fan-out + // would exceed MAX_BRACE_EXPANSIONS we also return the original so + // the caller sees an obvious zero-match outcome rather than a silent + // partial walk. + const subPatterns = expandBraces(args.pattern); // Default true. When false, directories yielded by kaos are // filtered out using the same stat that fuels the mtime sort @@ -229,41 +211,46 @@ export class GlobTool implements BuiltinTool { // file. `yielded` still terminates the stream if that primary // defense were ever absent or bypassed (e.g. a future kaos // backend without inode tracking), so the tool layer doesn't - // depend on the kaos implementation for cycle safety. + // depend on the kaos implementation for cycle safety. With + // brace expansion the legitimate yield volume scales with the + // number of sub-patterns (each is its own walk), so the cap + // scales too. const seen = new Set(); const entries: Array<{ path: string; mtime: number }> = []; - const YIELD_SAFETY_CAP = MAX_MATCHES * 2; + const YIELD_SAFETY_CAP = MAX_MATCHES * 2 * subPatterns.length; let yielded = 0; let truncated = false; outer: for (const root of searchRoots) { - for await (const filePath of this.kaos.glob(root, args.pattern)) { - yielded++; - if (yielded >= YIELD_SAFETY_CAP) { - truncated = true; - break outer; - } - if (seen.has(filePath)) continue; - if (entries.length >= MAX_MATCHES) { - truncated = true; - break outer; + for (const subPattern of subPatterns) { + for await (const filePath of this.kaos.glob(root, subPattern)) { + yielded++; + if (yielded >= YIELD_SAFETY_CAP) { + truncated = true; + break outer; + } + if (seen.has(filePath)) continue; + if (entries.length >= MAX_MATCHES) { + truncated = true; + break outer; + } + seen.add(filePath); + let mtime = 0; + let isDir = false; + try { + const st = await this.kaos.stat(filePath); + mtime = st.stMtime ?? 0; + isDir = (st.stMode & S_IFMT) === S_IFDIR; + } catch { + // stat failure — use 0 mtime / assume file so it still surfaces + } + // Apply include_dirs *after* marking seen so a filtered dir + // doesn't re-enter via a later duplicate yield, and *before* + // pushing to entries so MAX_MATCHES continues to cap output + // (not pre-filter) size. + if (!includeDirs && isDir) continue; + entries.push({ path: filePath, mtime }); } - seen.add(filePath); - let mtime = 0; - let isDir = false; - try { - const st = await this.kaos.stat(filePath); - mtime = st.stMtime ?? 0; - isDir = (st.stMode & S_IFMT) === S_IFDIR; - } catch { - // stat failure — use 0 mtime / assume file so it still surfaces - } - // Apply include_dirs *after* marking seen so a filtered dir - // doesn't re-enter via a later duplicate yield, and *before* - // pushing to entries so MAX_MATCHES continues to cap output - // (not pre-filter) size. - if (!includeDirs && isDir) continue; - entries.push({ path: filePath, mtime }); } } @@ -282,7 +269,7 @@ export class GlobTool implements BuiltinTool { } const lines: string[] = []; if (truncated) { - lines.push(`[Truncated at ${String(MAX_MATCHES)} matches — use a more specific pattern]`); + lines.push(`[Truncated at ${String(MAX_MATCHES)} matches — ${String(seen.size)} matched so far, use a more specific pattern]`); lines.push(`Only the first ${String(MAX_MATCHES)} matches are returned.`); } lines.push(...displayLines); @@ -325,41 +312,38 @@ function relativizeIfUnder(candidate: string, base: string, pathClass: PathClass return normCandidate; } -// Return true iff `pattern` begins with the literal sequence `**` followed -// by a `/`. Such patterns have no literal anchor in front of the recursive -// wildcard, so the walk has nothing to bound it on the left and would -// descend into every top-level directory of the search root before any -// suffix constraint can filter. Rejected up-front to match the Python Glob -// behavior — callers must anchor with a top-level subdirectory. -function startsWithDoubleStarPrefix(pattern: string): boolean { - return pattern.startsWith('**/'); -} - /** - * Return true if `pattern` is pure wildcards — only `*`, `?`, `**`, `/`. - * Such patterns have no literal anchor and would enumerate every file - * under the search root. Backslash-escaped characters (`\X`) count as - * literals so `\*` or `\?` still means "pattern has an anchor". + * Expand brace alternations (`{a,b,c}`, `{src,test}/**`) into a flat list + * of sub-patterns. Recursive — handles cartesian products (`{a,b}/{c,d}.ts` + * → 4 patterns) and one or more levels of nesting (`{a,{b,c}}.ts`). + * + * Falls through with the original pattern as a single-element list when: + * - the pattern contains no `{...}` group at all; + * - the pattern contains `{...}` groups but none have a top-level comma + * (e.g. `{abc}` — bash treats those as literal); + * - braces are unbalanced (a stray `{` with no matching `}`, etc.); + * - expansion would produce more than `MAX_BRACE_EXPANSIONS` patterns — + * pathological cartesian inputs (`{a,b}{c,d}{e,f}{g,h}{i,j}{k,l,m}` + * ≥ 192) bail out rather than fan out unboundedly. + * + * Backslash-escaped braces (`\{`, `\}`) are treated as literals and skip + * the structural recognition so a user can opt out of expansion. */ -function isPureWildcard(pattern: string): boolean { - if (pattern === '') return false; - for (let i = 0; i < pattern.length; i++) { - const ch = pattern[i]; - if (ch === '\\' && i + 1 < pattern.length) { - // escaped literal — pattern has an anchor - return false; - } - if (ch !== '*' && ch !== '?' && ch !== '/') { - return false; - } +export function expandBraces(pattern: string): string[] { + const out: string[] = []; + if (!expandInto(pattern, out, MAX_BRACE_EXPANSIONS)) { + // Cap exceeded somewhere down the recursion — discard partial + // fan-out and report the original. Letting half the alternatives + // through would be a silent footgun. + return [pattern]; } - return true; + return out; } -/** Return true iff `pattern` looks like it uses `{a,b,c}` brace expansion. */ -function containsBraceExpansion(pattern: string): boolean { - let inBrace = false; - let sawCommaInsideBrace = false; +function expandInto(pattern: string, out: string[], cap: number): boolean { + // Find the first balanced `{...}` group containing a top-level comma. + let depth = 0; + let start = -1; for (let i = 0; i < pattern.length; i++) { const ch = pattern[i]; if (ch === '\\' && i + 1 < pattern.length) { @@ -367,16 +351,72 @@ function containsBraceExpansion(pattern: string): boolean { continue; } if (ch === '{') { - inBrace = true; - sawCommaInsideBrace = false; + if (depth === 0) start = i; + depth++; continue; } if (ch === '}') { - if (inBrace && sawCommaInsideBrace) return true; - inBrace = false; + if (depth === 0) { + // Stray `}` — treat the whole pattern as literal. + return pushLiteral(pattern, out, cap); + } + depth--; + if (depth === 0 && start !== -1) { + const inner = pattern.slice(start + 1, i); + const parts = splitTopLevelCommas(inner); + if (parts.length < 2) { + // No commas at the top level → literal group; skip past it + // and keep scanning for a real alternation further right. + start = -1; + continue; + } + const prefix = pattern.slice(0, start); + const suffix = pattern.slice(i + 1); + for (const part of parts) { + if (out.length >= cap) return false; + if (!expandInto(prefix + part + suffix, out, cap)) return false; + } + return true; + } + } + } + + if (depth !== 0) { + // Unbalanced `{` — treat the whole pattern as literal. + return pushLiteral(pattern, out, cap); + } + + return pushLiteral(pattern, out, cap); +} + +function pushLiteral(pattern: string, out: string[], cap: number): boolean { + if (out.length >= cap) return false; + out.push(pattern); + return true; +} + +/** + * Split on commas that sit at brace depth zero. Used by `expandBraces` + * to slice a `{a,{b,c},d}` group into `["a", "{b,c}", "d"]` rather than + * `["a", "{b", "c}", "d"]`. + */ +function splitTopLevelCommas(s: string): string[] { + const parts: string[] = []; + let depth = 0; + let last = 0; + for (let i = 0; i < s.length; i++) { + const ch = s[i]; + if (ch === '\\' && i + 1 < s.length) { + i++; continue; } - if (ch === ',' && inBrace) sawCommaInsideBrace = true; + if (ch === '{') depth++; + else if (ch === '}') depth--; + else if (ch === ',' && depth === 0) { + parts.push(s.slice(last, i)); + last = i + 1; + } } - return false; + parts.push(s.slice(last)); + return parts; } diff --git a/packages/agent-core/test/tools/builtin-current.test.ts b/packages/agent-core/test/tools/builtin-current.test.ts index 9f2c50ff..ad44a1fb 100644 --- a/packages/agent-core/test/tools/builtin-current.test.ts +++ b/packages/agent-core/test/tools/builtin-current.test.ts @@ -153,8 +153,21 @@ describe('current builtin file and shell tools', () => { expect(result.output).toContain('old_string not found'); }); - it('Glob exposes parameters and rejects pure wildcard patterns', async () => { - const tool = new GlobTool(createFakeKaos(), workspace); + it('Glob exposes parameters and walks pure-wildcard patterns capped at MAX_MATCHES', async () => { + // Pure wildcards used to be rejected up-front; now they walk like + // any other pattern and the 100-match cap is the only safety. + const glob = vi.fn().mockReturnValue( + (async function* () { + yield '/workspace/a.ts'; + })(), + ); + const tool = new GlobTool( + createFakeKaos({ + glob, + stat: vi.fn().mockResolvedValue({ stMtime: 1, stMode: 0o100000 }), + }), + workspace, + ); expect(GlobInputSchema.safeParse({ pattern: '*.ts' }).success).toBe(true); expect(tool.parameters).toMatchObject({ @@ -163,8 +176,9 @@ describe('current builtin file and shell tools', () => { }); const result = await executeTool(tool, context({ pattern: '**' })); - expect(result).toMatchObject({ isError: true }); - expect(result.output).toContain('pure wildcard'); + expect(result.isError).toBeFalsy(); + expect(glob).toHaveBeenCalledWith('/workspace', '**'); + expect(result.output).toContain('a.ts'); }); it('Grep exposes parameters and rejects relative workspace escapes before spawning rg', async () => { diff --git a/packages/agent-core/test/tools/glob.test.ts b/packages/agent-core/test/tools/glob.test.ts index c0d5091f..6879d396 100644 --- a/packages/agent-core/test/tools/glob.test.ts +++ b/packages/agent-core/test/tools/glob.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it, vi } from 'vitest'; -import { type GlobInput, GlobInputSchema, GlobTool, MAX_MATCHES } from '../../src/tools/builtin/file/glob'; +import { expandBraces, type GlobInput, GlobInputSchema, GlobTool, MAX_MATCHES } from '../../src/tools/builtin/file/glob'; import type { WorkspaceConfig } from '../../src/tools/support/workspace'; import { createFakeKaos } from './fixtures/fake-kaos'; import { executeTool } from './fixtures/execute-tool'; @@ -96,35 +96,52 @@ describe('GlobTool', () => { expect(glob).toHaveBeenCalledWith('C:/WORKSPACE', 'src/**/*.ts'); }); - it('rejects pure wildcard patterns before walking the tree', async () => { - const glob = vi.fn(); + it('walks pure-wildcard patterns instead of rejecting them, capping at MAX_MATCHES', async () => { + // Previously rejected up-front; now the 100-match cap is the only + // safety. Verifies the pattern reaches kaos and the cap fires. + const paths = Array.from({ length: MAX_MATCHES + 5 }, (_, i) => `/workspace/${String(i)}.ts`); + const glob = vi.fn().mockReturnValue(asyncPaths(paths)); const tool = new GlobTool( createFakeKaos({ glob, - iterdir: vi.fn().mockReturnValue(asyncPaths(['/workspace/src'])), - stat: vi.fn().mockResolvedValue(stat(0, 0o040000)), + iterdir: vi.fn().mockReturnValue(asyncPaths(['/workspace/0.ts'])), + stat: vi.fn().mockResolvedValue(stat(1)), }), workspace, ); const result = await executeTool(tool, context({ pattern: '**' })); - expect(result).toMatchObject({ isError: true }); - expect(result.output).toContain('pure wildcard'); - expect(result.output).toContain('/workspace'); - expect(glob).not.toHaveBeenCalled(); + expect(result.isError).toBeFalsy(); + expect(glob).toHaveBeenCalledWith('/workspace', '**'); + expect(result.output).toContain(`[Truncated at ${String(MAX_MATCHES)} matches`); }); - it('rejects brace expansion patterns with a clear split-call hint', async () => { - const glob = vi.fn(); - const tool = new GlobTool(createFakeKaos({ glob }), workspace); + it('expands brace patterns into multiple sub-pattern walks and dedups paths', async () => { + // `*.{ts,tsx}` → two kaos.glob calls with `*.ts` and `*.tsx`. Shared + // hits are deduped so the same file does not appear twice. + const glob = vi.fn((_root: string, pattern: string) => { + if (pattern === '*.ts') return asyncPaths(['/workspace/a.ts', '/workspace/shared.ts']); + if (pattern === '*.tsx') return asyncPaths(['/workspace/shared.tsx', '/workspace/shared.ts']); + return asyncPaths([]); + }); + const tool = new GlobTool( + createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), + workspace, + ); const result = await executeTool(tool, context({ pattern: '*.{ts,tsx}' })); - expect(result).toMatchObject({ isError: true }); - expect(result.output).toContain('brace expansion'); - expect(result.output).toContain('Split it into separate calls'); - expect(glob).not.toHaveBeenCalled(); + expect(result.isError).toBeFalsy(); + expect(glob).toHaveBeenCalledWith('/workspace', '*.ts'); + expect(glob).toHaveBeenCalledWith('/workspace', '*.tsx'); + const output = typeof result.output === 'string' ? result.output : ''; + const lines = output.split('\n').filter((l) => l.endsWith('.ts') || l.endsWith('.tsx')); + expect(lines).toContain('a.ts'); + expect(lines).toContain('shared.ts'); + expect(lines).toContain('shared.tsx'); + // Dedup: shared.ts appears only once even though both sub-patterns yielded it. + expect(lines.filter((l) => l === 'shared.ts')).toHaveLength(1); }); it('searches only the current workspace when path is omitted', async () => { @@ -190,7 +207,7 @@ describe('GlobTool', () => { const result = await executeTool(tool, context({ pattern: '*.ts' })); - expect(result.output).toContain(`[Truncated at ${String(MAX_MATCHES)} matches`); + expect(result.output).toContain(`[Truncated at ${String(MAX_MATCHES)} matches — ${String(MAX_MATCHES)} matched so far, use a more specific pattern]`); expect(result.output).toContain('0.ts'); expect(result.output).not.toContain(`${String(MAX_MATCHES)}.ts`); }); @@ -264,25 +281,23 @@ describe('GlobTool', () => { }); }); - it('rejects "**/" prefix patterns even with a literal anchor', async () => { - // py rejects every pattern starting with "**/". TS only rejects pure- - // wildcard patterns and accepts "**/*.py" because the `.py` literal - // anchors the walk. Test as a lockdown of the py contract — expected - // to fail under the current TS policy. - const glob = vi.fn(); + it('walks "**/" prefix patterns with a literal anchor instead of rejecting them', async () => { + // Previously a hard reject; now `**/*.py` reaches kaos like any + // other pattern and the 100-match cap is the only safety. + const glob = vi + .fn() + .mockReturnValue(asyncPaths(['/workspace/a.py', '/workspace/sub/b.py'])); const tool = new GlobTool( - createFakeKaos({ - glob, - iterdir: vi.fn().mockReturnValue(asyncPaths([])), - stat: vi.fn().mockResolvedValue(stat(0, 0o040000)), - }), + createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), workspace, ); const result = await executeTool(tool, context({ pattern: '**/*.py' })); - expect(result).toMatchObject({ isError: true }); - expect(result.output).toMatch(/starts with '\*\*' which is not allowed/); + expect(result.isError).toBeFalsy(); + expect(glob).toHaveBeenCalledWith('/workspace', '**/*.py'); + expect(result.output).toContain('a.py'); + expect(result.output).toContain('sub/b.py'); }); it('walks safe recursive patterns with a literal subdirectory anchor', async () => { @@ -381,32 +396,6 @@ describe('GlobTool', () => { expect(result.output).toContain(`Only the first ${String(MAX_MATCHES)} matches are returned`); }); - it('includes a directory listing in the rejection message for "**/" patterns', async () => { - // py rejection includes the top-level directory listing as a hint. - const iterdir = vi - .fn() - .mockReturnValue(asyncPaths(['/workspace/file1.txt', '/workspace/file2.py', '/workspace/src', '/workspace/docs'])); - const glob = vi.fn(); - const tool = new GlobTool( - createFakeKaos({ - glob, - iterdir, - stat: vi.fn().mockResolvedValue(stat(0, 0o100000)), - }), - workspace, - ); - - const result = await executeTool(tool, context({ pattern: '**/*.txt' })); - - expect(result).toMatchObject({ isError: true }); - expect(result.output).toMatch(/starts with '\*\*' which is not allowed/); - expect(result.output).toContain('Use more specific patterns instead'); - expect(result.output).toContain('file1.txt'); - expect(result.output).toContain('file2.py'); - expect(result.output).toContain('src'); - expect(result.output).toContain('docs'); - }); - it('returns a "Found N matches" footer at exactly MAX_MATCHES without truncation', async () => { const paths = Array.from( { length: MAX_MATCHES }, @@ -426,22 +415,22 @@ describe('GlobTool', () => { expect(result.output).toContain(`Found ${String(MAX_MATCHES)} matches`); }); - it('rejects "**/" patterns with literal subdirectory anchors after the prefix', async () => { - const glob = vi.fn(); + it('walks "**/" patterns with literal subdirectory anchors after the prefix', async () => { + // Previously rejected up-front; now `**/main/*.py` walks like any + // other anchored pattern. + const glob = vi + .fn() + .mockReturnValue(asyncPaths(['/workspace/src/main/app.py'])); const tool = new GlobTool( - createFakeKaos({ - glob, - iterdir: vi.fn().mockReturnValue(asyncPaths([])), - stat: vi.fn().mockResolvedValue(stat(0, 0o040000)), - }), + createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }), workspace, ); const result = await executeTool(tool, context({ pattern: '**/main/*.py' })); - expect(result).toMatchObject({ isError: true }); - expect(result.output).toMatch(/starts with '\*\*' which is not allowed/); - expect(glob).not.toHaveBeenCalled(); + expect(result.isError).toBeFalsy(); + expect(glob).toHaveBeenCalledWith('/workspace', '**/main/*.py'); + expect(result.output).toContain('src/main/app.py'); }); it('matches dotfiles like .gitlab-ci.yml under a simple "*.yml" pattern', async () => { @@ -565,11 +554,12 @@ describe('GlobTool', () => { expect(result.output).toMatch(/outside the workspace|outside the working directory/); }); - it('locks down rejection phrasing and large-directory caveats in the description', () => { + it('locks down brace-expansion mention and large-directory caveats in the description', () => { const tool = new GlobTool(createFakeKaos(), workspace); expect(tool.description).toContain('**'); expect(tool.description).toMatch(/\*\*\/\*\.py/); + expect(tool.description).toContain('brace expansion'); expect(tool.description).toContain('node_modules'); expect(tool.description).not.toContain('On Windows'); }); @@ -586,3 +576,49 @@ describe('GlobTool', () => { expect(tool.description).toContain('/c/Users/foo'); }); }); + +describe('expandBraces', () => { + it('returns the original pattern unchanged when there is no brace group', () => { + expect(expandBraces('src/**/*.ts')).toEqual(['src/**/*.ts']); + }); + + it('expands a single top-level brace group into one pattern per alternative', () => { + expect(expandBraces('*.{ts,tsx}')).toEqual(['*.ts', '*.tsx']); + }); + + it('produces the cartesian product when more than one brace group appears', () => { + expect(expandBraces('{src,test}/{a,b}.ts')).toEqual([ + 'src/a.ts', + 'src/b.ts', + 'test/a.ts', + 'test/b.ts', + ]); + }); + + it('recursively expands nested brace groups', () => { + expect(expandBraces('{a,{b,c}}.ts')).toEqual(['a.ts', 'b.ts', 'c.ts']); + }); + + it('falls through with the literal pattern when a brace group has no top-level comma', () => { + // bash also treats `{abc}` as a literal; we follow the same rule. + expect(expandBraces('{abc}.ts')).toEqual(['{abc}.ts']); + }); + + it('falls through with the literal pattern when braces are unbalanced', () => { + expect(expandBraces('{a,b.ts')).toEqual(['{a,b.ts']); + expect(expandBraces('a,b}.ts')).toEqual(['a,b}.ts']); + }); + + it('treats backslash-escaped braces as literals and does not expand them', () => { + expect(expandBraces('\\{a,b\\}.ts')).toEqual(['\\{a,b\\}.ts']); + }); + + it('falls back to the original pattern when expansion would exceed the fan-out cap', () => { + // Seven groups of 3 alternatives = 3^7 = 2187 patterns, well above + // the MAX_BRACE_EXPANSIONS = 64 cap. Falling back is preferred over + // silently dropping alternatives. + const pathological = '{a,b,c}{d,e,f}{g,h,i}{j,k,l}{m,n,o}{p,q,r}{s,t,u}'; + expect(expandBraces(pathological)).toEqual([pathological]); + }); +}); + diff --git a/packages/kaos/src/internal.ts b/packages/kaos/src/internal.ts index 323a1245..fc57e990 100644 --- a/packages/kaos/src/internal.ts +++ b/packages/kaos/src/internal.ts @@ -203,6 +203,9 @@ export function globPatternToRegex(pattern: string, caseSensitive: boolean): Reg // leading `^` must remain literal even though JS regex char // classes treat it as negation in the first position. let charClass = pattern.slice(i + 1, end); + // Escape backslashes inside the class so a trailing backslash + // does not accidentally escape the closing `]`. + charClass = charClass.replace(/\\/g, '\\\\'); if (charClass.startsWith('!')) { charClass = '^' + charClass.slice(1); } else if (charClass.startsWith('^')) { @@ -213,8 +216,20 @@ export function globPatternToRegex(pattern: string, caseSensitive: boolean): Reg } break; } + case '\\': { + if (i + 1 < pattern.length) { + const next = pattern.charAt(i + 1); + regex += next.replaceAll(/[{}()+.\\[\]^$|]/g, '\\$&'); + // Advance past the escaped character so it is not processed + // again as a regex metacharacter. match literally. + i++; + } else { + regex += '\\\\'; + } + break; + } default: - regex += ch.replaceAll(/[{}()+.\\^$|]/g, '\\$&'); + regex += ch.replaceAll(/[{}()+.\\[\]^$|]/g, '\\$&'); } } regex += '$'; diff --git a/packages/kaos/test/internal.test.ts b/packages/kaos/test/internal.test.ts index 058f63db..5a271216 100644 --- a/packages/kaos/test/internal.test.ts +++ b/packages/kaos/test/internal.test.ts @@ -189,6 +189,23 @@ describe('globPatternToRegex additional cases', () => { // Without escaping, `+` would make `ab` match via one-or-more repetition. expect(regex.test('ab.c$d(e)')).toBe(false); }); + + it('treats backslash as an escape character for the next literal', () => { + // `\[` and `\]` should match literal brackets in the filename. + const regex = globPatternToRegex('special \\[bracket\\].ts', true); + expect(regex.test('special [bracket].ts')).toBe(true); + expect(regex.test('special bracket.ts')).toBe(false); + }); + + it('escapes backslashes inside character classes to avoid an unterminated regex', () => { + // A trailing backslash inside a glob char class (`[abc\\]`) would + // escape the closing `]` in the resulting regex. We escape it so the + // regex stays valid and the backslash is treated as a literal member. + const regex = globPatternToRegex('file[abc\\].txt', true); + expect(regex.test('filea.txt')).toBe(true); + expect(regex.test('file\\.txt')).toBe(true); + expect(regex.test('filez.txt')).toBe(false); + }); }); describe('globPatternToRegex', () => {