Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changeset/glob-truncation-and-backslash.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"@moonshot-ai/kaos": patch
"@moonshot-ai/agent-core": patch
"@moonshot-ai/kimi-code": patch
---

Fix glob pattern backslash escaping and include match count in truncation messages.
16 changes: 16 additions & 0 deletions apps/kimi-code/src/tui/components/messages/tool-call.ts
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,22 @@ function extractKeyArgument(
Agent: ['description', 'prompt'],
};

// Glob: concatenate multiple args into a single summary so the header
// shows pattern, optional explicit path, and include_dirs override.
if (toolName === 'Glob') {
const pattern = args['pattern'];
if (typeof pattern !== 'string' || pattern.length === 0) return null;
let summary = pattern;
const path = args['path'];
if (typeof path === 'string' && path.length > 0) {
summary += ` · ${makeWorkspaceRelativePath(path, workspaceDir)}`;
}
if (args['include_dirs'] === false) {
summary += ' · no dirs';
}
return truncateArgValue('pattern', summary);
}

const candidates = keyMap[toolName] ?? Object.keys(args);
for (const key of candidates) {
const val = args[key];
Expand Down
15 changes: 7 additions & 8 deletions packages/agent-core/src/tools/builtin/file/glob.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@ Find files (and optionally directories) by glob pattern, sorted by modification

Good patterns:
- `*.ts` — files in the current directory matching an extension
- `src/**/*.ts` — recursive with a subdirectory anchor and extension
- `test_*.py` — files whose name starts with a literal prefix
- `src/**/*.ts` — recursive walk with a subdirectory anchor and extension
- `**/*.py` — recursive walk from the search root for an extension
- `*.{ts,tsx}` — brace expansion is supported; expanded into `*.ts` and `*.tsx` before walking
- `{src,test}/**/*.ts` — cartesian brace expansion is supported too

Rejected patterns (no literal anchor — nothing bounds the result set):
- `**`, `**/*`, `*/*` — pure wildcards. Add an extension or subdirectory to give the walk a concrete target.
- Anything that starts with `**/` (e.g. `**/*.md`, `**/main/*.py`). The leading `**/` has no literal anchor in front of it. Anchor it with a top-level subdirectory like `src/**/*.md`.
- `*.{ts,tsx}` — brace expansion is not supported. Issue two calls: `*.ts` and `*.tsx`.
Results are capped at the first 100 matching paths (walk order, not global modification-time order). If a search would return more, a truncation marker is appended with the count of matches seen so far. Refine the pattern (extension, subdirectory) when 100 is not enough, or call again with a narrower anchor.

Large-directory warning — avoid recursing into dependency/build output even with an anchor:
Large-directory caveat — avoid recursing into dependency / build output even with an anchor:
- `node_modules/**/*.js`, `.venv/**/*.py`, `__pycache__/**`, `target/**` all match technically but
typically produce thousands of results that truncate at the match cap and waste the caller context.
Prefer specific subpaths like `node_modules/react/src/**/*.js`.
Prefer specific subpaths like `node_modules/react/src/**/*.js`.
318 changes: 179 additions & 139 deletions packages/agent-core/src/tools/builtin/file/glob.ts

Large diffs are not rendered by default.

22 changes: 18 additions & 4 deletions packages/agent-core/test/tools/builtin-current.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,21 @@ describe('current builtin file and shell tools', () => {
expect(result.output).toContain('old_string not found');
});

it('Glob exposes parameters and rejects pure wildcard patterns', async () => {
const tool = new GlobTool(createFakeKaos(), workspace);
it('Glob exposes parameters and walks pure-wildcard patterns capped at MAX_MATCHES', async () => {
// Pure wildcards used to be rejected up-front; now they walk like
// any other pattern and the 100-match cap is the only safety.
const glob = vi.fn().mockReturnValue(
(async function* () {
yield '/workspace/a.ts';
})(),
);
const tool = new GlobTool(
createFakeKaos({
glob,
stat: vi.fn().mockResolvedValue({ stMtime: 1, stMode: 0o100000 }),
}),
workspace,
);

expect(GlobInputSchema.safeParse({ pattern: '*.ts' }).success).toBe(true);
expect(tool.parameters).toMatchObject({
Expand All @@ -163,8 +176,9 @@ describe('current builtin file and shell tools', () => {
});

const result = await executeTool(tool, context({ pattern: '**' }));
expect(result).toMatchObject({ isError: true });
expect(result.output).toContain('pure wildcard');
expect(result.isError).toBeFalsy();
expect(glob).toHaveBeenCalledWith('/workspace', '**');
expect(result.output).toContain('a.ts');
});

it('Grep exposes parameters and rejects relative workspace escapes before spawning rg', async () => {
Expand Down
170 changes: 103 additions & 67 deletions packages/agent-core/test/tools/glob.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { describe, expect, it, vi } from 'vitest';

import { type GlobInput, GlobInputSchema, GlobTool, MAX_MATCHES } from '../../src/tools/builtin/file/glob';
import { expandBraces, type GlobInput, GlobInputSchema, GlobTool, MAX_MATCHES } from '../../src/tools/builtin/file/glob';
import type { WorkspaceConfig } from '../../src/tools/support/workspace';
import { createFakeKaos } from './fixtures/fake-kaos';
import { executeTool } from './fixtures/execute-tool';
Expand Down Expand Up @@ -96,35 +96,52 @@ describe('GlobTool', () => {
expect(glob).toHaveBeenCalledWith('C:/WORKSPACE', 'src/**/*.ts');
});

it('rejects pure wildcard patterns before walking the tree', async () => {
const glob = vi.fn();
it('walks pure-wildcard patterns instead of rejecting them, capping at MAX_MATCHES', async () => {
// Previously rejected up-front; now the 100-match cap is the only
// safety. Verifies the pattern reaches kaos and the cap fires.
const paths = Array.from({ length: MAX_MATCHES + 5 }, (_, i) => `/workspace/${String(i)}.ts`);
const glob = vi.fn().mockReturnValue(asyncPaths(paths));
const tool = new GlobTool(
createFakeKaos({
glob,
iterdir: vi.fn().mockReturnValue(asyncPaths(['/workspace/src'])),
stat: vi.fn().mockResolvedValue(stat(0, 0o040000)),
iterdir: vi.fn().mockReturnValue(asyncPaths(['/workspace/0.ts'])),
stat: vi.fn().mockResolvedValue(stat(1)),
}),
workspace,
);

const result = await executeTool(tool, context({ pattern: '**' }));

expect(result).toMatchObject({ isError: true });
expect(result.output).toContain('pure wildcard');
expect(result.output).toContain('/workspace');
expect(glob).not.toHaveBeenCalled();
expect(result.isError).toBeFalsy();
expect(glob).toHaveBeenCalledWith('/workspace', '**');
expect(result.output).toContain(`[Truncated at ${String(MAX_MATCHES)} matches`);
});

it('rejects brace expansion patterns with a clear split-call hint', async () => {
const glob = vi.fn();
const tool = new GlobTool(createFakeKaos({ glob }), workspace);
it('expands brace patterns into multiple sub-pattern walks and dedups paths', async () => {
// `*.{ts,tsx}` → two kaos.glob calls with `*.ts` and `*.tsx`. Shared
// hits are deduped so the same file does not appear twice.
const glob = vi.fn((_root: string, pattern: string) => {
if (pattern === '*.ts') return asyncPaths(['/workspace/a.ts', '/workspace/shared.ts']);
if (pattern === '*.tsx') return asyncPaths(['/workspace/shared.tsx', '/workspace/shared.ts']);
return asyncPaths([]);
});
const tool = new GlobTool(
createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }),
workspace,
);

const result = await executeTool(tool, context({ pattern: '*.{ts,tsx}' }));

expect(result).toMatchObject({ isError: true });
expect(result.output).toContain('brace expansion');
expect(result.output).toContain('Split it into separate calls');
expect(glob).not.toHaveBeenCalled();
expect(result.isError).toBeFalsy();
expect(glob).toHaveBeenCalledWith('/workspace', '*.ts');
expect(glob).toHaveBeenCalledWith('/workspace', '*.tsx');
const output = typeof result.output === 'string' ? result.output : '';
const lines = output.split('\n').filter((l) => l.endsWith('.ts') || l.endsWith('.tsx'));
expect(lines).toContain('a.ts');
expect(lines).toContain('shared.ts');
expect(lines).toContain('shared.tsx');
// Dedup: shared.ts appears only once even though both sub-patterns yielded it.
expect(lines.filter((l) => l === 'shared.ts')).toHaveLength(1);
});

it('searches only the current workspace when path is omitted', async () => {
Expand Down Expand Up @@ -190,7 +207,7 @@ describe('GlobTool', () => {

const result = await executeTool(tool, context({ pattern: '*.ts' }));

expect(result.output).toContain(`[Truncated at ${String(MAX_MATCHES)} matches`);
expect(result.output).toContain(`[Truncated at ${String(MAX_MATCHES)} matches — ${String(MAX_MATCHES)} matched so far, use a more specific pattern]`);
expect(result.output).toContain('0.ts');
expect(result.output).not.toContain(`${String(MAX_MATCHES)}.ts`);
});
Expand Down Expand Up @@ -264,25 +281,23 @@ describe('GlobTool', () => {
});
});

it('rejects "**/" prefix patterns even with a literal anchor', async () => {
// py rejects every pattern starting with "**/". TS only rejects pure-
// wildcard patterns and accepts "**/*.py" because the `.py` literal
// anchors the walk. Test as a lockdown of the py contract — expected
// to fail under the current TS policy.
const glob = vi.fn();
it('walks "**/" prefix patterns with a literal anchor instead of rejecting them', async () => {
// Previously a hard reject; now `**/*.py` reaches kaos like any
// other pattern and the 100-match cap is the only safety.
const glob = vi
.fn()
.mockReturnValue(asyncPaths(['/workspace/a.py', '/workspace/sub/b.py']));
const tool = new GlobTool(
createFakeKaos({
glob,
iterdir: vi.fn().mockReturnValue(asyncPaths([])),
stat: vi.fn().mockResolvedValue(stat(0, 0o040000)),
}),
createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }),
workspace,
);

const result = await executeTool(tool, context({ pattern: '**/*.py' }));

expect(result).toMatchObject({ isError: true });
expect(result.output).toMatch(/starts with '\*\*' which is not allowed/);
expect(result.isError).toBeFalsy();
expect(glob).toHaveBeenCalledWith('/workspace', '**/*.py');
expect(result.output).toContain('a.py');
expect(result.output).toContain('sub/b.py');
});

it('walks safe recursive patterns with a literal subdirectory anchor', async () => {
Expand Down Expand Up @@ -381,32 +396,6 @@ describe('GlobTool', () => {
expect(result.output).toContain(`Only the first ${String(MAX_MATCHES)} matches are returned`);
});

it('includes a directory listing in the rejection message for "**/" patterns', async () => {
// py rejection includes the top-level directory listing as a hint.
const iterdir = vi
.fn()
.mockReturnValue(asyncPaths(['/workspace/file1.txt', '/workspace/file2.py', '/workspace/src', '/workspace/docs']));
const glob = vi.fn();
const tool = new GlobTool(
createFakeKaos({
glob,
iterdir,
stat: vi.fn().mockResolvedValue(stat(0, 0o100000)),
}),
workspace,
);

const result = await executeTool(tool, context({ pattern: '**/*.txt' }));

expect(result).toMatchObject({ isError: true });
expect(result.output).toMatch(/starts with '\*\*' which is not allowed/);
expect(result.output).toContain('Use more specific patterns instead');
expect(result.output).toContain('file1.txt');
expect(result.output).toContain('file2.py');
expect(result.output).toContain('src');
expect(result.output).toContain('docs');
});

it('returns a "Found N matches" footer at exactly MAX_MATCHES without truncation', async () => {
const paths = Array.from(
{ length: MAX_MATCHES },
Expand All @@ -426,22 +415,22 @@ describe('GlobTool', () => {
expect(result.output).toContain(`Found ${String(MAX_MATCHES)} matches`);
});

it('rejects "**/" patterns with literal subdirectory anchors after the prefix', async () => {
const glob = vi.fn();
it('walks "**/" patterns with literal subdirectory anchors after the prefix', async () => {
// Previously rejected up-front; now `**/main/*.py` walks like any
// other anchored pattern.
const glob = vi
.fn()
.mockReturnValue(asyncPaths(['/workspace/src/main/app.py']));
const tool = new GlobTool(
createFakeKaos({
glob,
iterdir: vi.fn().mockReturnValue(asyncPaths([])),
stat: vi.fn().mockResolvedValue(stat(0, 0o040000)),
}),
createFakeKaos({ glob, stat: vi.fn().mockResolvedValue(stat(1)) }),
workspace,
);

const result = await executeTool(tool, context({ pattern: '**/main/*.py' }));

expect(result).toMatchObject({ isError: true });
expect(result.output).toMatch(/starts with '\*\*' which is not allowed/);
expect(glob).not.toHaveBeenCalled();
expect(result.isError).toBeFalsy();
expect(glob).toHaveBeenCalledWith('/workspace', '**/main/*.py');
expect(result.output).toContain('src/main/app.py');
});

it('matches dotfiles like .gitlab-ci.yml under a simple "*.yml" pattern', async () => {
Expand Down Expand Up @@ -565,11 +554,12 @@ describe('GlobTool', () => {
expect(result.output).toMatch(/outside the workspace|outside the working directory/);
});

it('locks down rejection phrasing and large-directory caveats in the description', () => {
it('locks down brace-expansion mention and large-directory caveats in the description', () => {
const tool = new GlobTool(createFakeKaos(), workspace);

expect(tool.description).toContain('**');
expect(tool.description).toMatch(/\*\*\/\*\.py/);
expect(tool.description).toContain('brace expansion');
expect(tool.description).toContain('node_modules');
expect(tool.description).not.toContain('On Windows');
});
Expand All @@ -586,3 +576,49 @@ describe('GlobTool', () => {
expect(tool.description).toContain('/c/Users/foo');
});
});

describe('expandBraces', () => {
it('returns the original pattern unchanged when there is no brace group', () => {
expect(expandBraces('src/**/*.ts')).toEqual(['src/**/*.ts']);
});

it('expands a single top-level brace group into one pattern per alternative', () => {
expect(expandBraces('*.{ts,tsx}')).toEqual(['*.ts', '*.tsx']);
});

it('produces the cartesian product when more than one brace group appears', () => {
expect(expandBraces('{src,test}/{a,b}.ts')).toEqual([
'src/a.ts',
'src/b.ts',
'test/a.ts',
'test/b.ts',
]);
});

it('recursively expands nested brace groups', () => {
expect(expandBraces('{a,{b,c}}.ts')).toEqual(['a.ts', 'b.ts', 'c.ts']);
});

it('falls through with the literal pattern when a brace group has no top-level comma', () => {
// bash also treats `{abc}` as a literal; we follow the same rule.
expect(expandBraces('{abc}.ts')).toEqual(['{abc}.ts']);
});

it('falls through with the literal pattern when braces are unbalanced', () => {
expect(expandBraces('{a,b.ts')).toEqual(['{a,b.ts']);
expect(expandBraces('a,b}.ts')).toEqual(['a,b}.ts']);
});

it('treats backslash-escaped braces as literals and does not expand them', () => {
expect(expandBraces('\\{a,b\\}.ts')).toEqual(['\\{a,b\\}.ts']);
});

it('falls back to the original pattern when expansion would exceed the fan-out cap', () => {
// Seven groups of 3 alternatives = 3^7 = 2187 patterns, well above
// the MAX_BRACE_EXPANSIONS = 64 cap. Falling back is preferred over
// silently dropping alternatives.
const pathological = '{a,b,c}{d,e,f}{g,h,i}{j,k,l}{m,n,o}{p,q,r}{s,t,u}';
expect(expandBraces(pathological)).toEqual([pathological]);
});
});

17 changes: 16 additions & 1 deletion packages/kaos/src/internal.ts
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,9 @@
// leading `^` must remain literal even though JS regex char
// classes treat it as negation in the first position.
let charClass = pattern.slice(i + 1, end);
// Escape backslashes inside the class so a trailing backslash
// does not accidentally escape the closing `]`.
charClass = charClass.replace(/\\/g, '\\\\');

Check warning on line 208 in packages/kaos/src/internal.ts

View workflow job for this annotation

GitHub Actions / lint

eslint-plugin-unicorn(prefer-string-replace-all)

Prefer `String#replaceAll()` over `String#replace()` when using a regex with the global flag.
if (charClass.startsWith('!')) {
charClass = '^' + charClass.slice(1);
} else if (charClass.startsWith('^')) {
Expand All @@ -213,8 +216,20 @@
}
break;
}
case '\\': {
if (i + 1 < pattern.length) {
const next = pattern.charAt(i + 1);
regex += next.replaceAll(/[{}()+.\\[\]^$|]/g, '\\$&');
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Escape escaped wildcard metacharacters

When a glob pattern uses a backslash to match a literal wildcard, e.g. \*.ts or \?name, this branch appends the raw */? into the regex because the escape set omits those regex metacharacters. That makes new RegExp('^*$') / new RegExp('^?$') throw “Nothing to repeat”, so kaos.glob errors instead of matching files whose names literally contain * or ?; include * and ? in the escaped-character regex here.

Useful? React with 👍 / 👎.

// Advance past the escaped character so it is not processed
// again as a regex metacharacter. match literally.
i++;
} else {
regex += '\\\\';
}
break;
}
default:
regex += ch.replaceAll(/[{}()+.\\^$|]/g, '\\$&');
regex += ch.replaceAll(/[{}()+.\\[\]^$|]/g, '\\$&');
}
}
regex += '$';
Expand Down
17 changes: 17 additions & 0 deletions packages/kaos/test/internal.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,23 @@ describe('globPatternToRegex additional cases', () => {
// Without escaping, `+` would make `ab` match via one-or-more repetition.
expect(regex.test('ab.c$d(e)')).toBe(false);
});

it('treats backslash as an escape character for the next literal', () => {
// `\[` and `\]` should match literal brackets in the filename.
const regex = globPatternToRegex('special \\[bracket\\].ts', true);
expect(regex.test('special [bracket].ts')).toBe(true);
expect(regex.test('special bracket.ts')).toBe(false);
});

it('escapes backslashes inside character classes to avoid an unterminated regex', () => {
// A trailing backslash inside a glob char class (`[abc\\]`) would
// escape the closing `]` in the resulting regex. We escape it so the
// regex stays valid and the backslash is treated as a literal member.
const regex = globPatternToRegex('file[abc\\].txt', true);
expect(regex.test('filea.txt')).toBe(true);
expect(regex.test('file\\.txt')).toBe(true);
expect(regex.test('filez.txt')).toBe(false);
});
});

describe('globPatternToRegex', () => {
Expand Down
Loading