Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import {
1. Skips non-assistant messages
2. Intercepts text that looks like a tool call in a markdown code block format:
```tool
TOOL_NAME: example_tool
example_tool
BEGIN_ARG: arg1
value
END_ARG
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ describe("createSystemMessageExampleCall", () => {
);
Comment thread
cubic-dev-ai[bot] marked this conversation as resolved.
expect(result).includes("Use this tool to test things");
expect(result).includes("```tool");
expect(result).includes("TOOL_NAME: test_tool");
expect(result).includes("\ntest_tool\n");
expect(result).not.toContain("TOOL_NAME:");
expect(result).includes("```");
});

Expand All @@ -42,7 +43,8 @@ describe("createSystemMessageExampleCall", () => {

expect(result).includes("Use this tool to test things");
expect(result).includes("```tool");
expect(result).includes("TOOL_NAME: test_tool");
expect(result).includes("\ntest_tool\n");
expect(result).not.toContain("TOOL_NAME:");
expect(result).includes("BEGIN_ARG: arg1");
expect(result).includes("value1");
expect(result).includes("END_ARG");
Expand Down Expand Up @@ -96,6 +98,7 @@ describe("generateToolsSystemMessage", () => {
// Check structure rather than exact text
expect(result).includes(TOOL_INSTRUCTIONS_TAG);
expect(result).includes(customDescription);
expect(result).not.toContain("TOOL_NAME:");
expect(result).includes(closeTag(TOOL_INSTRUCTIONS_TAG));

// Check for general section about available tools without requiring exact wording
Expand Down Expand Up @@ -131,7 +134,7 @@ describe("generateToolsSystemMessage", () => {
const result = generateToolsSystemMessage(tools, framework);

// Check for key elements without requiring exact wording
expect(result).includes(`TOOL_NAME: ${toolName}`);
expect(result).includes(`\n${toolName}\n`);
expect(result).includes("TOOL_DESCRIPTION:");
expect(result).includes(toolDesc);
expect(result).includes("TOOL_ARG: param1 (string, required)");
Expand Down Expand Up @@ -183,7 +186,7 @@ describe("generateToolsSystemMessage", () => {

// Check for both types of tools
expect(result).includes(customMsg);
expect(result).includes("TOOL_NAME: tool_without_description");
expect(result).includes("\ntool_without_description\n");

// Verify structure without exact text matching
const hasAvailableToolsSection = /tools are available to you/i.test(result);
Expand Down Expand Up @@ -213,7 +216,7 @@ describe("generateToolsSystemMessage", () => {

// Check for example sections without exact text
expect(result).includes("```tool_definition");
expect(result).includes("TOOL_NAME: example_tool");
expect(result).includes("\nexample_tool\n");
expect(result).includes("```tool");

// Check for example format structure
Expand Down Expand Up @@ -262,6 +265,6 @@ describe("addSystemMessageToolsToSystemMessage", () => {

expect(result.startsWith(baseMessage)).toBe(true);
expect(result).includes(TOOL_INSTRUCTIONS_TAG);
expect(result).includes(`TOOL_NAME: ${toolName}`);
expect(result).includes(`\n${toolName}\n`);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { detectToolCallStart } from "../detectToolCallStart";
describe("detectToolCallStart", () => {
let framework = new SystemMessageToolCodeblocksFramework();
it("detects standard tool call start", () => {
const buffer = "```tool\nTOOL_NAME: example_tool";
const buffer = "```tool\nexample_tool";
const result = detectToolCallStart(buffer, framework);

expect(result.isInToolCall).toBe(true);
Expand All @@ -19,11 +19,11 @@ describe("detectToolCallStart", () => {

expect(result.isInToolCall).toBe(true);
expect(result.isInPartialStart).toBe(false);
expect(result.modifiedBuffer).toBe("```tool\nTOOL_NAME: example_tool");
expect(result.modifiedBuffer).toBe("```tool\n example_tool");
});

it("detects case-insensitive tool call start", () => {
const buffer = "```ToOl\nTOOL_NAME: example_tool";
const buffer = "```ToOl\nexample_tool";
const result = detectToolCallStart(buffer, framework);

expect(result.isInToolCall).toBe(true);
Expand All @@ -37,7 +37,7 @@ describe("detectToolCallStart", () => {

expect(result.isInToolCall).toBe(true);
expect(result.isInPartialStart).toBe(false);
expect(result.modifiedBuffer).toBe("```tool\nTOOL_NAME: example_tool");
expect(result.modifiedBuffer).toBe("```tool\n example_tool");
});

it("identifies partial tool call start", () => {
Expand Down
12 changes: 6 additions & 6 deletions core/tools/systemMessageTools/toolCodeblocks/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ export class SystemMessageToolCodeblocksFramework
// Poor models are really bad at following instructions, alternate starts allowed:
acceptedToolCallStarts: [string, string][] = [
["```tool\n", "```tool\n"],
["tool_name:", "```tool\nTOOL_NAME:"],
["tool_name:", "```tool\n"],
];

toolCallStateToSystemToolCall(state: ToolCallState): string {
let parts = ["```tool"];
parts.push(`TOOL_NAME: ${state.toolCall.function.name}`);
parts.push(state.toolCall.function.name);
try {
for (const arg in state.parsedArgs) {
parts.push(`BEGIN_ARG: ${arg}`);
Expand All @@ -31,7 +31,7 @@ export class SystemMessageToolCodeblocksFramework
handleToolCallBuffer = handleToolCallBuffer;

toolToSystemToolDefinition(tool: Tool): string {
let toolDefinition = `\`\`\`tool_definition\nTOOL_NAME: ${tool.function.name}\n`;
let toolDefinition = `\`\`\`tool_definition\n${tool.function.name}\n`;

if (tool.function.description) {
toolDefinition += `TOOL_DESCRIPTION:\n${tool.function.description}\n`;
Expand Down Expand Up @@ -75,7 +75,7 @@ You can only call ONE tool at at time. The tool codeblock should be the last thi

exampleDynamicToolDefinition = `
\`\`\`tool_definition
TOOL_NAME: example_tool
example_tool
TOOL_ARG: arg_1 (string, required)
Description of the first argument
END_ARG
Expand All @@ -85,7 +85,7 @@ END_ARG

exampleDynamicToolCall = `
\`\`\`tool
TOOL_NAME: example_tool
example_tool
BEGIN_ARG: arg_1
The value
of arg 1
Expand All @@ -101,7 +101,7 @@ END_ARG
exampleArgs: Array<[string, string | number]> = [],
) {
let callExample = `\`\`\`tool
TOOL_NAME: ${toolName}`;
${toolName}`;

// Add each argument dynamically
for (const [argName, argValue] of exampleArgs) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ describe("interceptSystemToolCalls", () => {
},
],
[{ role: "assistant", content: "```tool\n" }],
[{ role: "assistant", content: "TOOL_NAME: test_tool\n" }],
[{ role: "assistant", content: "test_tool\n" }],
[{ role: "assistant", content: "BEGIN_ARG: arg1\n" }],
[{ role: "assistant", content: "value1\n" }],
[{ role: "assistant", content: "END_ARG\n" }],
Expand Down Expand Up @@ -182,7 +182,7 @@ describe("interceptSystemToolCalls", () => {
it("processes tool_name without codeblock format", async () => {
const messages: ChatMessage[][] = [
[{ role: "assistant", content: "I'll help you with that.\n" }],
[{ role: "assistant", content: "TOOL_NAME: test_tool\n" }],
[{ role: "assistant", content: "tool_name: test_tool\n" }],
[{ role: "assistant", content: "BEGIN_ARG: arg1\n" }],
[{ role: "assistant", content: "value1\n" }],
[{ role: "assistant", content: "END_ARG\n" }],
Expand Down Expand Up @@ -242,10 +242,46 @@ describe("interceptSystemToolCalls", () => {
).toBe("}");
});

it("does not intercept quoted tool syntax in explanatory text", async () => {
const messages: ChatMessage[][] = [
[{ role: "assistant", content: "Here is the syntax:\n" }],
[{ role: "assistant", content: "read_file\n" }],
[{ role: "assistant", content: "BEGIN_ARG: filepath\n" }],
[{ role: "assistant", content: "path/to/the_file.txt\n" }],
[{ role: "assistant", content: "END_ARG\n" }],
];

const generator = interceptSystemToolCalls(
createAsyncGenerator(messages),
abortController,
framework,
);

const outputChunks: string[] = [];
while (true) {
const result = await generator.next();
if (result.done || !result.value) {
break;
}

const chunkText = (
(result.value as AssistantChatMessage[])[0].content as {
type: "text";
text: string;
}[]
)[0].text;
outputChunks.push(chunkText);
expect((result.value as AssistantChatMessage[])[0].toolCalls).toBeFalsy();
}

expect(outputChunks.join("")).toBe(
"Here is the syntax:\nread_file\nBEGIN_ARG: filepath\npath/to/the_file.txt\nEND_ARG\n",
);
});
it("ignores content after a tool call", async () => {
const messages: ChatMessage[][] = [
[{ role: "assistant", content: "```tool\n" }],
[{ role: "assistant", content: "TOOL_NAME: test_tool\n" }],
[{ role: "assistant", content: "test_tool\n" }],
[{ role: "assistant", content: "BEGIN_ARG: arg1\n" }],
[{ role: "assistant", content: "value1\n" }],
[{ role: "assistant", content: "END_ARG\n" }],
Expand Down Expand Up @@ -273,7 +309,7 @@ describe("interceptSystemToolCalls", () => {
it("stops processing when aborted", async () => {
const messages: ChatMessage[][] = [
[{ role: "assistant", content: "```tool\n" }],
[{ role: "assistant", content: "TOOL_NAME: test_tool\n" }],
[{ role: "assistant", content: "test_tool\n" }],
];

const generator = interceptSystemToolCalls(
Expand All @@ -296,7 +332,7 @@ describe("interceptSystemToolCalls", () => {
it("handles JSON parsing for argument values", async () => {
const messages: ChatMessage[][] = [
[{ role: "assistant", content: "```tool\n" }],
[{ role: "assistant", content: "TOOL_NAME: test_tool\n" }],
[{ role: "assistant", content: "test_tool\n" }],
[{ role: "assistant", content: "BEGIN_ARG: number_arg\n" }],
[{ role: "assistant", content: "123\n" }],
[{ role: "assistant", content: "END_ARG\n" }],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ export function handleToolCallBuffer(
// Tool name line - process once line 2 is reached
case 1:
if (isNewLine) {
const name = (line.split(/tool_?name:/i)[1] ?? "").trim();
const hasToolNameTag = /tool_?name:/i.test(line);
const name = hasToolNameTag
? (line.split(/tool_?name:/i)[1] ?? "").trim()
: line.trim();
if (!name) {
throw new Error("Invalid tool name");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,24 @@ describe("handleToolCallBuffer", () => {
state = getInitialToolCallParseState();
});

it("handles the ```tool\ntool_name the name", () => {
handleToolCallBuffer("```tool\ntool_name: my_name", state);
it("handles the ```tool\\n<tool_name> format", () => {
handleToolCallBuffer("```tool\nmy_name", state);
Comment thread
cubic-dev-ai[bot] marked this conversation as resolved.
expect(state.currentLineIndex).toBe(1);

const result = handleToolCallBuffer("\n", state);
expect(result).toEqual({
type: "function",
function: {
name: "my_name",
arguments: "",
},
id: expect.any(String),
});
expect(state.currentLineIndex).toBe(2);
});

it("handles the ```tool\\nTOOL_NAME: <tool_name> format", () => {
handleToolCallBuffer("```tool\nTOOL_NAME: my_name", state);
expect(state.currentLineIndex).toBe(1);

const result = handleToolCallBuffer("\n", state);
Expand Down Expand Up @@ -45,6 +61,24 @@ describe("handleToolCallBuffer", () => {
expect(state.currentLineIndex).toBe(2);
});

it("handles tool name line without prefix", () => {
state.currentLineIndex = 1;

const result = handleToolCallBuffer("test_tool", state);
expect(result).toBeUndefined();

const newLineResult = handleToolCallBuffer("\n", state);

expect(newLineResult).toEqual({
type: "function",
function: {
name: "test_tool",
arguments: "",
},
id: expect.any(String),
});
});

it("handles case-insensitive tool name line", () => {
state.currentLineIndex = 1;

Expand All @@ -63,6 +97,15 @@ describe("handleToolCallBuffer", () => {
});
});

it("rejects tagged tool name lines with empty values", () => {
state.currentLineIndex = 1;
handleToolCallBuffer("TOOL_NAME:", state);

expect(() => handleToolCallBuffer("\n", state)).toThrow(
"Invalid tool name",
);
});

it("begins an argument correctly", () => {
state.currentLineIndex = 2;

Expand Down
Loading