From 550b153878058a088bdbe375a49e0865c5758887 Mon Sep 17 00:00:00 2001 From: Alexei Chmelev Date: Fri, 15 May 2026 07:47:51 +0200 Subject: [PATCH 1/3] fix: prevent thinking content from leaking into LLM output Add renderChatMessageWithoutThinking() to messageContent.ts which returns an empty string for role:"thinking" chunks instead of rendering them as plain text. Replace renderChatMessage() with renderChatMessageWithoutThinking() at all call sites where thinking content must not appear in output: - streamLines() in diff/util.ts (Apply and Edit-with-rules path) - _streamComplete() in all provider implementations (Edit-without-rules path) - BaseLLM.chat() in llm/index.ts (title generation, repo map summarisation, context retrieval tool selection, next-edit prediction, conversation compaction) --- core/diff/util.ts | 6 ++++-- core/llm/index.ts | 7 +++++-- core/llm/llms/Anthropic.ts | 8 ++++++-- core/llm/llms/Bedrock.ts | 7 +++++-- core/llm/llms/Cloudflare.ts | 4 ++-- core/llm/llms/Cohere.ts | 7 +++++-- core/llm/llms/CustomLLM.ts | 4 ++-- core/llm/llms/Flowise.ts | 4 ++-- core/llm/llms/Gemini.ts | 7 +++++-- core/llm/llms/OpenAI.ts | 7 +++++-- core/llm/llms/VertexAI.ts | 7 +++++-- core/util/messageContent.ts | 5 +++++ 12 files changed, 51 insertions(+), 22 deletions(-) diff --git a/core/diff/util.ts b/core/diff/util.ts index 571ede7b977..927fde501f0 100644 --- a/core/diff/util.ts +++ b/core/diff/util.ts @@ -1,7 +1,7 @@ import { distance } from "fastest-levenshtein"; import { ChatMessage } from "../index.js"; -import { renderChatMessage } from "../util/messageContent.js"; +import { renderChatMessageWithoutThinking } from "../util/messageContent.js"; export type LineStream = AsyncGenerator; @@ -108,7 +108,9 @@ export async function* streamLines( try { for await (const update of streamCompletion) { const chunk = - typeof update === "string" ? update : renderChatMessage(update); + typeof update === "string" + ? update + : renderChatMessageWithoutThinking(update); buffer += chunk; const lines = buffer.split("\n"); buffer = lines.pop() ?? ""; diff --git a/core/llm/index.ts b/core/llm/index.ts index f7d97b73e3c..13411528412 100644 --- a/core/llm/index.ts +++ b/core/llm/index.ts @@ -35,7 +35,10 @@ import { isAbortError } from "../util/isAbortError.js"; import { isLemonadeInstalled } from "../util/lemonadeHelper.js"; import { Logger } from "../util/Logger.js"; import mergeJson from "../util/merge.js"; -import { renderChatMessage } from "../util/messageContent.js"; +import { + renderChatMessage, + renderChatMessageWithoutThinking, +} from "../util/messageContent.js"; import { isOllamaInstalled } from "../util/ollamaHelper.js"; import { TokensBatchingService } from "../util/TokensBatchingService.js"; import { withExponentialBackoff } from "../util/withExponentialBackoff.js"; @@ -964,7 +967,7 @@ export abstract class BaseLLM implements ILLM { ) { let completion = ""; for await (const message of this.streamChat(messages, signal, options)) { - completion += renderChatMessage(message); + completion += renderChatMessageWithoutThinking(message); } return { role: "assistant" as const, content: completion }; } diff --git a/core/llm/llms/Anthropic.ts b/core/llm/llms/Anthropic.ts index e33f92f203f..33b109e63ee 100644 --- a/core/llm/llms/Anthropic.ts +++ b/core/llm/llms/Anthropic.ts @@ -27,7 +27,11 @@ import { Usage, } from "../../index.js"; import { safeParseToolCallArgs } from "../../tools/parseArgs.js"; -import { renderChatMessage, stripImages } from "../../util/messageContent.js"; +import { + renderChatMessage, + renderChatMessageWithoutThinking, + stripImages, +} from "../../util/messageContent.js"; import { extractBase64FromDataUrl } from "../../util/url.js"; import { DEFAULT_REASONING_TOKENS } from "../constants.js"; import { BaseLLM } from "../index.js"; @@ -258,7 +262,7 @@ class Anthropic extends BaseLLM { ): AsyncGenerator { const messages = [{ role: "user" as const, content: prompt }]; for await (const update of this._streamChat(messages, signal, options)) { - yield renderChatMessage(update); + yield renderChatMessageWithoutThinking(update); } } diff --git a/core/llm/llms/Bedrock.ts b/core/llm/llms/Bedrock.ts index bc9566cbecc..151fab087f1 100644 --- a/core/llm/llms/Bedrock.ts +++ b/core/llm/llms/Bedrock.ts @@ -20,7 +20,10 @@ import { fromNodeProviderChain } from "@aws-sdk/credential-providers"; import type { CompletionOptions } from "../../index.js"; import { ChatMessage, Chunk, LLMOptions, MessageContent } from "../../index.js"; import { safeParseToolCallArgs } from "../../tools/parseArgs.js"; -import { renderChatMessage, stripImages } from "../../util/messageContent.js"; +import { + renderChatMessageWithoutThinking, + stripImages, +} from "../../util/messageContent.js"; import { parseDataUrl } from "../../util/url.js"; import { BaseLLM } from "../index.js"; import { PROVIDER_TOOL_SUPPORT } from "../toolSupport.js"; @@ -100,7 +103,7 @@ class Bedrock extends BaseLLM { ): AsyncGenerator { const messages = [{ role: "user" as const, content: prompt }]; for await (const update of this._streamChat(messages, signal, options)) { - yield renderChatMessage(update); + yield renderChatMessageWithoutThinking(update); } } diff --git a/core/llm/llms/Cloudflare.ts b/core/llm/llms/Cloudflare.ts index cba4204ef0b..2674133c74c 100644 --- a/core/llm/llms/Cloudflare.ts +++ b/core/llm/llms/Cloudflare.ts @@ -1,6 +1,6 @@ import { streamSse } from "@continuedev/fetch"; import { ChatMessage, CompletionOptions } from "../../index.js"; -import { renderChatMessage } from "../../util/messageContent.js"; +import { renderChatMessageWithoutThinking } from "../../util/messageContent.js"; import { BaseLLM } from "../index.js"; export default class Cloudflare extends BaseLLM { @@ -59,7 +59,7 @@ export default class Cloudflare extends BaseLLM { signal, options, )) { - yield renderChatMessage(chunk); + yield renderChatMessageWithoutThinking(chunk); } } } diff --git a/core/llm/llms/Cohere.ts b/core/llm/llms/Cohere.ts index b0a27b6cf8c..34b4e7c8f95 100644 --- a/core/llm/llms/Cohere.ts +++ b/core/llm/llms/Cohere.ts @@ -5,7 +5,10 @@ import { CompletionOptions, LLMOptions, } from "../../index.js"; -import { renderChatMessage, stripImages } from "../../util/messageContent.js"; +import { + renderChatMessageWithoutThinking, + stripImages, +} from "../../util/messageContent.js"; import { BaseLLM } from "../index.js"; import { DEFAULT_REASONING_TOKENS } from "../constants.js"; @@ -148,7 +151,7 @@ class Cohere extends BaseLLM { ): AsyncGenerator { const messages = [{ role: "user" as const, content: prompt }]; for await (const update of this._streamChat(messages, signal, options)) { - yield renderChatMessage(update); + yield renderChatMessageWithoutThinking(update); } } diff --git a/core/llm/llms/CustomLLM.ts b/core/llm/llms/CustomLLM.ts index e04ea0ab739..e3967b6ec7b 100644 --- a/core/llm/llms/CustomLLM.ts +++ b/core/llm/llms/CustomLLM.ts @@ -1,5 +1,5 @@ import { ChatMessage, CompletionOptions, CustomLLM } from "../../index.js"; -import { renderChatMessage } from "../../util/messageContent.js"; +import { renderChatMessageWithoutThinking } from "../../util/messageContent.js"; import { BaseLLM } from "../index.js"; class CustomLLMClass extends BaseLLM { @@ -76,7 +76,7 @@ class CustomLLMClass extends BaseLLM { if (typeof content === "string") { yield content; } else { - yield renderChatMessage(content); + yield renderChatMessageWithoutThinking(content); } } } else { diff --git a/core/llm/llms/Flowise.ts b/core/llm/llms/Flowise.ts index 57bb18ccc66..19fcd6c4517 100644 --- a/core/llm/llms/Flowise.ts +++ b/core/llm/llms/Flowise.ts @@ -1,7 +1,7 @@ import socketIOClient, { Socket } from "socket.io-client"; import { ChatMessage, CompletionOptions, LLMOptions } from "../../index.js"; -import { renderChatMessage } from "../../util/messageContent.js"; +import { renderChatMessageWithoutThinking } from "../../util/messageContent.js"; import { BaseLLM } from "../index.js"; interface IFlowiseApiOptions { @@ -121,7 +121,7 @@ class Flowise extends BaseLLM { ): AsyncGenerator { const message: ChatMessage = { role: "user", content: prompt }; for await (const chunk of this._streamChat([message], signal, options)) { - yield renderChatMessage(chunk); + yield renderChatMessageWithoutThinking(chunk); } } diff --git a/core/llm/llms/Gemini.ts b/core/llm/llms/Gemini.ts index fb080fb44f2..3d1bc37caaa 100644 --- a/core/llm/llms/Gemini.ts +++ b/core/llm/llms/Gemini.ts @@ -10,7 +10,10 @@ import { ToolCallDelta, } from "../../index.js"; import { safeParseToolCallArgs } from "../../tools/parseArgs.js"; -import { renderChatMessage, stripImages } from "../../util/messageContent.js"; +import { + renderChatMessageWithoutThinking, + stripImages, +} from "../../util/messageContent.js"; import { extractBase64FromDataUrl } from "../../util/url.js"; import { BaseLLM } from "../index.js"; import { LlmApiRequestType } from "../openaiTypeConverters.js"; @@ -89,7 +92,7 @@ class Gemini extends BaseLLM { signal, options, )) { - yield renderChatMessage(message); + yield renderChatMessageWithoutThinking(message); } } diff --git a/core/llm/llms/OpenAI.ts b/core/llm/llms/OpenAI.ts index c65b55dc1a5..53e306780f1 100644 --- a/core/llm/llms/OpenAI.ts +++ b/core/llm/llms/OpenAI.ts @@ -16,7 +16,10 @@ import { LLMOptions, Tool, } from "../../index.js"; -import { renderChatMessage } from "../../util/messageContent.js"; +import { + renderChatMessage, + renderChatMessageWithoutThinking, +} from "../../util/messageContent.js"; import { BaseLLM } from "../index.js"; import { fromChatCompletionChunk, @@ -430,7 +433,7 @@ class OpenAI extends BaseLLM { signal, options, )) { - yield renderChatMessage(chunk); + yield renderChatMessageWithoutThinking(chunk); } } diff --git a/core/llm/llms/VertexAI.ts b/core/llm/llms/VertexAI.ts index 2bb5fa13d9d..aa4bc73e123 100644 --- a/core/llm/llms/VertexAI.ts +++ b/core/llm/llms/VertexAI.ts @@ -2,7 +2,10 @@ import { AuthClient, GoogleAuth, JWT, auth } from "google-auth-library"; import { streamResponse, streamSse } from "@continuedev/fetch"; import { ChatMessage, CompletionOptions, LLMOptions } from "../../index.js"; -import { renderChatMessage, stripImages } from "../../util/messageContent.js"; +import { + renderChatMessageWithoutThinking, + stripImages, +} from "../../util/messageContent.js"; import { BaseLLM } from "../index.js"; import { LlmApiRequestType } from "../openaiTypeConverters.js"; @@ -489,7 +492,7 @@ class VertexAI extends BaseLLM { signal, options, )) { - yield renderChatMessage(message); + yield renderChatMessageWithoutThinking(message); } } diff --git a/core/util/messageContent.ts b/core/util/messageContent.ts index 5a0bc95bf78..635372777bb 100644 --- a/core/util/messageContent.ts +++ b/core/util/messageContent.ts @@ -17,6 +17,11 @@ export function stripImages(messageContent: MessageContent): string { .join("\n"); } +export function renderChatMessageWithoutThinking(message: ChatMessage): string { + if (message.role === "thinking") return ""; + return renderChatMessage(message); +} + export function renderChatMessage(message: ChatMessage): string { switch (message?.role) { case "user": From 9522c8b044bb94e4781ec597c91258a3ade8860f Mon Sep 17 00:00:00 2001 From: Alexei Chmelev Date: Fri, 15 May 2026 15:53:38 +0200 Subject: [PATCH 2/3] fix: prevent thinking content leaking in edit, history export, and legacy slash commands Extends the renderChatMessageWithoutThinking fix to six remaining call sites: recursiveStream (diff buffer), toMarkDown (history export), and the four built-in-legacy slash commands (commit, review, draftIssue, onboard) that streamed chunks directly to the UI. --- core/commands/slash/built-in-legacy/commit.ts | 4 ++-- core/commands/slash/built-in-legacy/draftIssue.ts | 4 ++-- core/commands/slash/built-in-legacy/onboard.ts | 4 ++-- core/commands/slash/built-in-legacy/review.ts | 4 ++-- core/edit/recursiveStream.ts | 4 ++-- core/util/historyUtils.ts | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/core/commands/slash/built-in-legacy/commit.ts b/core/commands/slash/built-in-legacy/commit.ts index 1f7e713437b..5b7acaa704e 100644 --- a/core/commands/slash/built-in-legacy/commit.ts +++ b/core/commands/slash/built-in-legacy/commit.ts @@ -1,5 +1,5 @@ import { SlashCommand } from "../../../index.js"; -import { renderChatMessage } from "../../../util/messageContent.js"; +import { renderChatMessageWithoutThinking } from "../../../util/messageContent.js"; const CommitMessageCommand: SlashCommand = { name: "commit", @@ -18,7 +18,7 @@ const CommitMessageCommand: SlashCommand = { [{ role: "user", content: prompt }], abortController.signal, )) { - yield renderChatMessage(chunk); + yield renderChatMessageWithoutThinking(chunk); } }, }; diff --git a/core/commands/slash/built-in-legacy/draftIssue.ts b/core/commands/slash/built-in-legacy/draftIssue.ts index 9a1c5bdf9bb..448e5e1d8a0 100644 --- a/core/commands/slash/built-in-legacy/draftIssue.ts +++ b/core/commands/slash/built-in-legacy/draftIssue.ts @@ -1,6 +1,6 @@ import { ChatMessage, SlashCommand } from "../../../index.js"; import { removeQuotesAndEscapes } from "../../../util/index.js"; -import { renderChatMessage } from "../../../util/messageContent.js"; +import { renderChatMessageWithoutThinking } from "../../../util/messageContent.js"; const PROMPT = ( input: string, @@ -49,7 +49,7 @@ const DraftIssueCommand: SlashCommand = { abortController.signal, )) { body += chunk.content; - yield renderChatMessage(chunk); + yield renderChatMessageWithoutThinking(chunk); } const url = `${params.repositoryUrl}/issues/new?title=${encodeURIComponent( diff --git a/core/commands/slash/built-in-legacy/onboard.ts b/core/commands/slash/built-in-legacy/onboard.ts index 500781450df..aef89b32587 100644 --- a/core/commands/slash/built-in-legacy/onboard.ts +++ b/core/commands/slash/built-in-legacy/onboard.ts @@ -3,7 +3,7 @@ import ignore from "ignore"; import type { FileType, IDE, SlashCommand } from "../../.."; import { getGlobalContinueIgArray } from "../../../indexing/continueignore"; import { DEFAULT_IGNORE, gitIgArrayFromFile } from "../../../indexing/ignore"; -import { renderChatMessage } from "../../../util/messageContent"; +import { renderChatMessageWithoutThinking } from "../../../util/messageContent"; import { findUriInDirs, getUriPathBasename, @@ -48,7 +48,7 @@ const OnboardSlashCommand: SlashCommand = { [{ role: "user", content: prompt }], abortController.signal, )) { - yield renderChatMessage(chunk); + yield renderChatMessageWithoutThinking(chunk); } }, }; diff --git a/core/commands/slash/built-in-legacy/review.ts b/core/commands/slash/built-in-legacy/review.ts index 37c280b3414..b17f4235419 100644 --- a/core/commands/slash/built-in-legacy/review.ts +++ b/core/commands/slash/built-in-legacy/review.ts @@ -1,5 +1,5 @@ import { ChatMessage, SlashCommand } from "../../../index.js"; -import { renderChatMessage } from "../../../util/messageContent.js"; +import { renderChatMessageWithoutThinking } from "../../../util/messageContent.js"; const prompt = ` Review the following code, focusing on Readability, Maintainability, Code Smells, Speed, and Memory Performance. Provide feedback with these guidelines: @@ -47,7 +47,7 @@ const ReviewMessageCommand: SlashCommand = { [{ role: "user", content: content }], abortController.signal, )) { - yield renderChatMessage(chunk); + yield renderChatMessageWithoutThinking(chunk); } }, }; diff --git a/core/edit/recursiveStream.ts b/core/edit/recursiveStream.ts index 5c9f2fef6a6..fd80f0f67e0 100644 --- a/core/edit/recursiveStream.ts +++ b/core/edit/recursiveStream.ts @@ -7,7 +7,7 @@ import { } from ".."; import { DEFAULT_MAX_TOKENS } from "../llm/constants"; import { countTokens } from "../llm/countTokens"; -import { renderChatMessage } from "../util/messageContent"; +import { renderChatMessageWithoutThinking } from "../util/messageContent"; import { APPLY_UNIQUE_TOKEN } from "./constants.js"; const INFINITE_STREAM_SAFETY = 0.9; @@ -85,7 +85,7 @@ export async function* recursiveStream( for await (const chunk of generator) { yield chunk; - const rendered = renderChatMessage(chunk); + const rendered = renderChatMessageWithoutThinking(chunk); buffer += rendered; totalTokens += countTokens(chunk.content); diff --git a/core/util/historyUtils.ts b/core/util/historyUtils.ts index fc0939c7bcf..3ab09739e97 100644 --- a/core/util/historyUtils.ts +++ b/core/util/historyUtils.ts @@ -5,7 +5,7 @@ import path from "path"; import { languageForFilepath } from "../autocomplete/constants/AutocompleteLanguageInfo.js"; import { ChatMessage, IDE } from "../index.js"; -import { renderChatMessage } from "../util/messageContent.js"; +import { renderChatMessageWithoutThinking } from "../util/messageContent.js"; import { getContinueGlobalPath } from "../util/paths.js"; // If useful elsewhere, helper funcs should move to core/util/index.ts or similar @@ -45,7 +45,7 @@ export function toMarkDown(history: ChatMessage[], time?: Date): string { let content = `### [Continue](https://continue.dev) session transcript\n Exported: ${time.toLocaleString()}`; for (const msg of history) { - let msgText = renderChatMessage(msg); + let msgText = renderChatMessageWithoutThinking(msg); if (!msgText) { continue; // Skip messages without content } From ef340a0a6c868bf0f0a6bc147cbdf11ac5ea4b68 Mon Sep 17 00:00:00 2001 From: Alexei Chmelev Date: Fri, 15 May 2026 16:24:06 +0200 Subject: [PATCH 3/3] revert: restore renderChatMessage in recursiveStream to preserve buffer integrity The buffer in recursiveStream must be a faithful copy of all model output so the recursive continuation path can resume from the correct position. Thinking content is already filtered downstream by streamLines() in diff/util.ts, so no thinking leaks to the diff pipeline or UI. --- core/edit/recursiveStream.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/edit/recursiveStream.ts b/core/edit/recursiveStream.ts index fd80f0f67e0..5c9f2fef6a6 100644 --- a/core/edit/recursiveStream.ts +++ b/core/edit/recursiveStream.ts @@ -7,7 +7,7 @@ import { } from ".."; import { DEFAULT_MAX_TOKENS } from "../llm/constants"; import { countTokens } from "../llm/countTokens"; -import { renderChatMessageWithoutThinking } from "../util/messageContent"; +import { renderChatMessage } from "../util/messageContent"; import { APPLY_UNIQUE_TOKEN } from "./constants.js"; const INFINITE_STREAM_SAFETY = 0.9; @@ -85,7 +85,7 @@ export async function* recursiveStream( for await (const chunk of generator) { yield chunk; - const rendered = renderChatMessageWithoutThinking(chunk); + const rendered = renderChatMessage(chunk); buffer += rendered; totalTokens += countTokens(chunk.content);