From 550b153878058a088bdbe375a49e0865c5758887 Mon Sep 17 00:00:00 2001
From: Alexei Chmelev <alexei_chmelev@gmx.de>
Date: Fri, 15 May 2026 07:47:51 +0200
Subject: [PATCH 1/3] fix: prevent thinking content from leaking into LLM
 output

Add renderChatMessageWithoutThinking() to messageContent.ts which returns
an empty string for role:"thinking" chunks instead of rendering them as
plain text.

Replace renderChatMessage() with renderChatMessageWithoutThinking() at all
call sites where thinking content must not appear in output:
- streamLines() in diff/util.ts (Apply and Edit-with-rules path)
- _streamComplete() in all provider implementations (Edit-without-rules path)
- BaseLLM.chat() in llm/index.ts (title generation, repo map summarisation,
  context retrieval tool selection, next-edit prediction, conversation
  compaction)
---
 core/diff/util.ts           | 6 ++++--
 core/llm/index.ts           | 7 +++++--
 core/llm/llms/Anthropic.ts  | 8 ++++++--
 core/llm/llms/Bedrock.ts    | 7 +++++--
 core/llm/llms/Cloudflare.ts | 4 ++--
 core/llm/llms/Cohere.ts     | 7 +++++--
 core/llm/llms/CustomLLM.ts  | 4 ++--
 core/llm/llms/Flowise.ts    | 4 ++--
 core/llm/llms/Gemini.ts     | 7 +++++--
 core/llm/llms/OpenAI.ts     | 7 +++++--
 core/llm/llms/VertexAI.ts   | 7 +++++--
 core/util/messageContent.ts | 5 +++++
 12 files changed, 51 insertions(+), 22 deletions(-)
diff --git a/core/diff/util.ts b/core/diff/util.ts
index 571ede7b977..927fde501f0 100644
--- a/core/diff/util.ts
+++ b/core/diff/util.ts
@@ -1,7 +1,7 @@
 import { distance } from "fastest-levenshtein";
 
 import { ChatMessage } from "../index.js";
-import { renderChatMessage } from "../util/messageContent.js";
+import { renderChatMessageWithoutThinking } from "../util/messageContent.js";
 
 export type LineStream = AsyncGenerator<string>;
 
@@ -108,7 +108,9 @@ export async function* streamLines(
   try {
     for await (const update of streamCompletion) {
       const chunk =
-        typeof update === "string" ? update : renderChatMessage(update);
+        typeof update === "string"
+          ? update
+          : renderChatMessageWithoutThinking(update);
       buffer += chunk;
       const lines = buffer.split("\n");
       buffer = lines.pop() ?? "";
diff --git a/core/llm/index.ts b/core/llm/index.ts
index f7d97b73e3c..13411528412 100644
--- a/core/llm/index.ts
+++ b/core/llm/index.ts
@@ -35,7 +35,10 @@ import { isAbortError } from "../util/isAbortError.js";
 import { isLemonadeInstalled } from "../util/lemonadeHelper.js";
 import { Logger } from "../util/Logger.js";
 import mergeJson from "../util/merge.js";
-import { renderChatMessage } from "../util/messageContent.js";
+import {
+  renderChatMessage,
+  renderChatMessageWithoutThinking,
+} from "../util/messageContent.js";
 import { isOllamaInstalled } from "../util/ollamaHelper.js";
 import { TokensBatchingService } from "../util/TokensBatchingService.js";
 import { withExponentialBackoff } from "../util/withExponentialBackoff.js";
@@ -964,7 +967,7 @@ export abstract class BaseLLM implements ILLM {
   ) {
     let completion = "";
     for await (const message of this.streamChat(messages, signal, options)) {
-      completion += renderChatMessage(message);
+      completion += renderChatMessageWithoutThinking(message);
     }
     return { role: "assistant" as const, content: completion };
   }
diff --git a/core/llm/llms/Anthropic.ts b/core/llm/llms/Anthropic.ts
index e33f92f203f..33b109e63ee 100644
--- a/core/llm/llms/Anthropic.ts
+++ b/core/llm/llms/Anthropic.ts
@@ -27,7 +27,11 @@ import {
   Usage,
 } from "../../index.js";
 import { safeParseToolCallArgs } from "../../tools/parseArgs.js";
-import { renderChatMessage, stripImages } from "../../util/messageContent.js";
+import {
+  renderChatMessage,
+  renderChatMessageWithoutThinking,
+  stripImages,
+} from "../../util/messageContent.js";
 import { extractBase64FromDataUrl } from "../../util/url.js";
 import { DEFAULT_REASONING_TOKENS } from "../constants.js";
 import { BaseLLM } from "../index.js";
@@ -258,7 +262,7 @@ class Anthropic extends BaseLLM {
   ): AsyncGenerator<string> {
     const messages = [{ role: "user" as const, content: prompt }];
     for await (const update of this._streamChat(messages, signal, options)) {
-      yield renderChatMessage(update);
+      yield renderChatMessageWithoutThinking(update);
     }
   }
 
diff --git a/core/llm/llms/Bedrock.ts b/core/llm/llms/Bedrock.ts
index bc9566cbecc..151fab087f1 100644
--- a/core/llm/llms/Bedrock.ts
+++ b/core/llm/llms/Bedrock.ts
@@ -20,7 +20,10 @@ import { fromNodeProviderChain } from "@aws-sdk/credential-providers";
 import type { CompletionOptions } from "../../index.js";
 import { ChatMessage, Chunk, LLMOptions, MessageContent } from "../../index.js";
 import { safeParseToolCallArgs } from "../../tools/parseArgs.js";
-import { renderChatMessage, stripImages } from "../../util/messageContent.js";
+import {
+  renderChatMessageWithoutThinking,
+  stripImages,
+} from "../../util/messageContent.js";
 import { parseDataUrl } from "../../util/url.js";
 import { BaseLLM } from "../index.js";
 import { PROVIDER_TOOL_SUPPORT } from "../toolSupport.js";
@@ -100,7 +103,7 @@ class Bedrock extends BaseLLM {
   ): AsyncGenerator<string> {
     const messages = [{ role: "user" as const, content: prompt }];
     for await (const update of this._streamChat(messages, signal, options)) {
-      yield renderChatMessage(update);
+      yield renderChatMessageWithoutThinking(update);
     }
   }
 
diff --git a/core/llm/llms/Cloudflare.ts b/core/llm/llms/Cloudflare.ts
index cba4204ef0b..2674133c74c 100644
--- a/core/llm/llms/Cloudflare.ts
+++ b/core/llm/llms/Cloudflare.ts
@@ -1,6 +1,6 @@
 import { streamSse } from "@continuedev/fetch";
 import { ChatMessage, CompletionOptions } from "../../index.js";
-import { renderChatMessage } from "../../util/messageContent.js";
+import { renderChatMessageWithoutThinking } from "../../util/messageContent.js";
 import { BaseLLM } from "../index.js";
 
 export default class Cloudflare extends BaseLLM {
@@ -59,7 +59,7 @@ export default class Cloudflare extends BaseLLM {
       signal,
       options,
     )) {
-      yield renderChatMessage(chunk);
+      yield renderChatMessageWithoutThinking(chunk);
     }
   }
 }
diff --git a/core/llm/llms/Cohere.ts b/core/llm/llms/Cohere.ts
index b0a27b6cf8c..34b4e7c8f95 100644
--- a/core/llm/llms/Cohere.ts
+++ b/core/llm/llms/Cohere.ts
@@ -5,7 +5,10 @@ import {
   CompletionOptions,
   LLMOptions,
 } from "../../index.js";
-import { renderChatMessage, stripImages } from "../../util/messageContent.js";
+import {
+  renderChatMessageWithoutThinking,
+  stripImages,
+} from "../../util/messageContent.js";
 import { BaseLLM } from "../index.js";
 import { DEFAULT_REASONING_TOKENS } from "../constants.js";
 
@@ -148,7 +151,7 @@ class Cohere extends BaseLLM {
   ): AsyncGenerator<string> {
     const messages = [{ role: "user" as const, content: prompt }];
     for await (const update of this._streamChat(messages, signal, options)) {
-      yield renderChatMessage(update);
+      yield renderChatMessageWithoutThinking(update);
     }
   }
 
diff --git a/core/llm/llms/CustomLLM.ts b/core/llm/llms/CustomLLM.ts
index e04ea0ab739..e3967b6ec7b 100644
--- a/core/llm/llms/CustomLLM.ts
+++ b/core/llm/llms/CustomLLM.ts
@@ -1,5 +1,5 @@
 import { ChatMessage, CompletionOptions, CustomLLM } from "../../index.js";
-import { renderChatMessage } from "../../util/messageContent.js";
+import { renderChatMessageWithoutThinking } from "../../util/messageContent.js";
 import { BaseLLM } from "../index.js";
 
 class CustomLLMClass extends BaseLLM {
@@ -76,7 +76,7 @@ class CustomLLMClass extends BaseLLM {
         if (typeof content === "string") {
           yield content;
         } else {
-          yield renderChatMessage(content);
+          yield renderChatMessageWithoutThinking(content);
         }
       }
     } else {
diff --git a/core/llm/llms/Flowise.ts b/core/llm/llms/Flowise.ts
index 57bb18ccc66..19fcd6c4517 100644
--- a/core/llm/llms/Flowise.ts
+++ b/core/llm/llms/Flowise.ts
@@ -1,7 +1,7 @@
 import socketIOClient, { Socket } from "socket.io-client";
 
 import { ChatMessage, CompletionOptions, LLMOptions } from "../../index.js";
-import { renderChatMessage } from "../../util/messageContent.js";
+import { renderChatMessageWithoutThinking } from "../../util/messageContent.js";
 import { BaseLLM } from "../index.js";
 
 interface IFlowiseApiOptions {
@@ -121,7 +121,7 @@ class Flowise extends BaseLLM {
   ): AsyncGenerator<string> {
     const message: ChatMessage = { role: "user", content: prompt };
     for await (const chunk of this._streamChat([message], signal, options)) {
-      yield renderChatMessage(chunk);
+      yield renderChatMessageWithoutThinking(chunk);
     }
   }
 
diff --git a/core/llm/llms/Gemini.ts b/core/llm/llms/Gemini.ts
index fb080fb44f2..3d1bc37caaa 100644
--- a/core/llm/llms/Gemini.ts
+++ b/core/llm/llms/Gemini.ts
@@ -10,7 +10,10 @@ import {
   ToolCallDelta,
 } from "../../index.js";
 import { safeParseToolCallArgs } from "../../tools/parseArgs.js";
-import { renderChatMessage, stripImages } from "../../util/messageContent.js";
+import {
+  renderChatMessageWithoutThinking,
+  stripImages,
+} from "../../util/messageContent.js";
 import { extractBase64FromDataUrl } from "../../util/url.js";
 import { BaseLLM } from "../index.js";
 import { LlmApiRequestType } from "../openaiTypeConverters.js";
@@ -89,7 +92,7 @@ class Gemini extends BaseLLM {
       signal,
       options,
     )) {
-      yield renderChatMessage(message);
+      yield renderChatMessageWithoutThinking(message);
     }
   }
 
diff --git a/core/llm/llms/OpenAI.ts b/core/llm/llms/OpenAI.ts
index c65b55dc1a5..53e306780f1 100644
--- a/core/llm/llms/OpenAI.ts
+++ b/core/llm/llms/OpenAI.ts
@@ -16,7 +16,10 @@ import {
   LLMOptions,
   Tool,
 } from "../../index.js";
-import { renderChatMessage } from "../../util/messageContent.js";
+import {
+  renderChatMessage,
+  renderChatMessageWithoutThinking,
+} from "../../util/messageContent.js";
 import { BaseLLM } from "../index.js";
 import {
   fromChatCompletionChunk,
@@ -430,7 +433,7 @@ class OpenAI extends BaseLLM {
       signal,
       options,
     )) {
-      yield renderChatMessage(chunk);
+      yield renderChatMessageWithoutThinking(chunk);
     }
   }
 
diff --git a/core/llm/llms/VertexAI.ts b/core/llm/llms/VertexAI.ts
index 2bb5fa13d9d..aa4bc73e123 100644
--- a/core/llm/llms/VertexAI.ts
+++ b/core/llm/llms/VertexAI.ts
@@ -2,7 +2,10 @@ import { AuthClient, GoogleAuth, JWT, auth } from "google-auth-library";
 
 import { streamResponse, streamSse } from "@continuedev/fetch";
 import { ChatMessage, CompletionOptions, LLMOptions } from "../../index.js";
-import { renderChatMessage, stripImages } from "../../util/messageContent.js";
+import {
+  renderChatMessageWithoutThinking,
+  stripImages,
+} from "../../util/messageContent.js";
 import { BaseLLM } from "../index.js";
 
 import { LlmApiRequestType } from "../openaiTypeConverters.js";
@@ -489,7 +492,7 @@ class VertexAI extends BaseLLM {
       signal,
       options,
     )) {
-      yield renderChatMessage(message);
+      yield renderChatMessageWithoutThinking(message);
     }
   }
 
diff --git a/core/util/messageContent.ts b/core/util/messageContent.ts
index 5a0bc95bf78..635372777bb 100644
--- a/core/util/messageContent.ts
+++ b/core/util/messageContent.ts
@@ -17,6 +17,11 @@ export function stripImages(messageContent: MessageContent): string {
     .join("\n");
 }
 
+export function renderChatMessageWithoutThinking(message: ChatMessage): string {
+  if (message.role === "thinking") return "";
+  return renderChatMessage(message);
+}
+
 export function renderChatMessage(message: ChatMessage): string {
   switch (message?.role) {
     case "user":

From 9522c8b044bb94e4781ec597c91258a3ade8860f Mon Sep 17 00:00:00 2001
From: Alexei Chmelev <alexei_chmelev@gmx.de>
Date: Fri, 15 May 2026 15:53:38 +0200
Subject: [PATCH 2/3] fix: prevent thinking content leaking in edit, history
 export, and legacy slash commands

Extends the renderChatMessageWithoutThinking fix to six remaining call
sites: recursiveStream (diff buffer), toMarkDown (history export), and
the four built-in-legacy slash commands (commit, review, draftIssue,
onboard) that streamed chunks directly to the UI.
---
 core/commands/slash/built-in-legacy/commit.ts     | 4 ++--
 core/commands/slash/built-in-legacy/draftIssue.ts | 4 ++--
 core/commands/slash/built-in-legacy/onboard.ts    | 4 ++--
 core/commands/slash/built-in-legacy/review.ts     | 4 ++--
 core/edit/recursiveStream.ts                      | 4 ++--
 core/util/historyUtils.ts                         | 4 ++--
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/core/commands/slash/built-in-legacy/commit.ts b/core/commands/slash/built-in-legacy/commit.ts
index 1f7e713437b..5b7acaa704e 100644
--- a/core/commands/slash/built-in-legacy/commit.ts
+++ b/core/commands/slash/built-in-legacy/commit.ts
@@ -1,5 +1,5 @@
 import { SlashCommand } from "../../../index.js";
-import { renderChatMessage } from "../../../util/messageContent.js";
+import { renderChatMessageWithoutThinking } from "../../../util/messageContent.js";
 
 const CommitMessageCommand: SlashCommand = {
   name: "commit",
@@ -18,7 +18,7 @@ const CommitMessageCommand: SlashCommand = {
       [{ role: "user", content: prompt }],
       abortController.signal,
     )) {
-      yield renderChatMessage(chunk);
+      yield renderChatMessageWithoutThinking(chunk);
     }
   },
 };
diff --git a/core/commands/slash/built-in-legacy/draftIssue.ts b/core/commands/slash/built-in-legacy/draftIssue.ts
index 9a1c5bdf9bb..448e5e1d8a0 100644
--- a/core/commands/slash/built-in-legacy/draftIssue.ts
+++ b/core/commands/slash/built-in-legacy/draftIssue.ts
@@ -1,6 +1,6 @@
 import { ChatMessage, SlashCommand } from "../../../index.js";
 import { removeQuotesAndEscapes } from "../../../util/index.js";
-import { renderChatMessage } from "../../../util/messageContent.js";
+import { renderChatMessageWithoutThinking } from "../../../util/messageContent.js";
 
 const PROMPT = (
   input: string,
@@ -49,7 +49,7 @@ const DraftIssueCommand: SlashCommand = {
       abortController.signal,
     )) {
       body += chunk.content;
-      yield renderChatMessage(chunk);
+      yield renderChatMessageWithoutThinking(chunk);
     }
 
     const url = `${params.repositoryUrl}/issues/new?title=${encodeURIComponent(
diff --git a/core/commands/slash/built-in-legacy/onboard.ts b/core/commands/slash/built-in-legacy/onboard.ts
index 500781450df..aef89b32587 100644
--- a/core/commands/slash/built-in-legacy/onboard.ts
+++ b/core/commands/slash/built-in-legacy/onboard.ts
@@ -3,7 +3,7 @@ import ignore from "ignore";
 import type { FileType, IDE, SlashCommand } from "../../..";
 import { getGlobalContinueIgArray } from "../../../indexing/continueignore";
 import { DEFAULT_IGNORE, gitIgArrayFromFile } from "../../../indexing/ignore";
-import { renderChatMessage } from "../../../util/messageContent";
+import { renderChatMessageWithoutThinking } from "../../../util/messageContent";
 import {
   findUriInDirs,
   getUriPathBasename,
@@ -48,7 +48,7 @@ const OnboardSlashCommand: SlashCommand = {
       [{ role: "user", content: prompt }],
       abortController.signal,
     )) {
-      yield renderChatMessage(chunk);
+      yield renderChatMessageWithoutThinking(chunk);
     }
   },
 };
diff --git a/core/commands/slash/built-in-legacy/review.ts b/core/commands/slash/built-in-legacy/review.ts
index 37c280b3414..b17f4235419 100644
--- a/core/commands/slash/built-in-legacy/review.ts
+++ b/core/commands/slash/built-in-legacy/review.ts
@@ -1,5 +1,5 @@
 import { ChatMessage, SlashCommand } from "../../../index.js";
-import { renderChatMessage } from "../../../util/messageContent.js";
+import { renderChatMessageWithoutThinking } from "../../../util/messageContent.js";
 
 const prompt = `
      Review the following code, focusing on Readability, Maintainability, Code Smells, Speed, and Memory Performance. Provide feedback with these guidelines:
@@ -47,7 +47,7 @@ const ReviewMessageCommand: SlashCommand = {
       [{ role: "user", content: content }],
       abortController.signal,
     )) {
-      yield renderChatMessage(chunk);
+      yield renderChatMessageWithoutThinking(chunk);
     }
   },
 };
diff --git a/core/edit/recursiveStream.ts b/core/edit/recursiveStream.ts
index 5c9f2fef6a6..fd80f0f67e0 100644
--- a/core/edit/recursiveStream.ts
+++ b/core/edit/recursiveStream.ts
@@ -7,7 +7,7 @@ import {
 } from "..";
 import { DEFAULT_MAX_TOKENS } from "../llm/constants";
 import { countTokens } from "../llm/countTokens";
-import { renderChatMessage } from "../util/messageContent";
+import { renderChatMessageWithoutThinking } from "../util/messageContent";
 import { APPLY_UNIQUE_TOKEN } from "./constants.js";
 
 const INFINITE_STREAM_SAFETY = 0.9;
@@ -85,7 +85,7 @@ export async function* recursiveStream(
 
     for await (const chunk of generator) {
       yield chunk;
-      const rendered = renderChatMessage(chunk);
+      const rendered = renderChatMessageWithoutThinking(chunk);
       buffer += rendered;
       totalTokens += countTokens(chunk.content);
 
diff --git a/core/util/historyUtils.ts b/core/util/historyUtils.ts
index fc0939c7bcf..3ab09739e97 100644
--- a/core/util/historyUtils.ts
+++ b/core/util/historyUtils.ts
@@ -5,7 +5,7 @@ import path from "path";
 
 import { languageForFilepath } from "../autocomplete/constants/AutocompleteLanguageInfo.js";
 import { ChatMessage, IDE } from "../index.js";
-import { renderChatMessage } from "../util/messageContent.js";
+import { renderChatMessageWithoutThinking } from "../util/messageContent.js";
 import { getContinueGlobalPath } from "../util/paths.js";
 
 // If useful elsewhere, helper funcs should move to core/util/index.ts or similar
@@ -45,7 +45,7 @@ export function toMarkDown(history: ChatMessage[], time?: Date): string {
   let content = `### [Continue](https://continue.dev) session transcript\n Exported: ${time.toLocaleString()}`;
 
   for (const msg of history) {
-    let msgText = renderChatMessage(msg);
+    let msgText = renderChatMessageWithoutThinking(msg);
     if (!msgText) {
       continue; // Skip messages without content
     }

From ef340a0a6c868bf0f0a6bc147cbdf11ac5ea4b68 Mon Sep 17 00:00:00 2001
From: Alexei Chmelev <alexei_chmelev@gmx.de>
Date: Fri, 15 May 2026 16:24:06 +0200
Subject: [PATCH 3/3] revert: restore renderChatMessage in recursiveStream to
 preserve buffer integrity

The buffer in recursiveStream must be a faithful copy of all model output
so the recursive continuation path can resume from the correct position.
Thinking content is already filtered downstream by streamLines() in
diff/util.ts, so no thinking leaks to the diff pipeline or UI.
---
 core/edit/recursiveStream.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/edit/recursiveStream.ts b/core/edit/recursiveStream.ts
index fd80f0f67e0..5c9f2fef6a6 100644
--- a/core/edit/recursiveStream.ts
+++ b/core/edit/recursiveStream.ts
@@ -7,7 +7,7 @@ import {
 } from "..";
 import { DEFAULT_MAX_TOKENS } from "../llm/constants";
 import { countTokens } from "../llm/countTokens";
-import { renderChatMessageWithoutThinking } from "../util/messageContent";
+import { renderChatMessage } from "../util/messageContent";
 import { APPLY_UNIQUE_TOKEN } from "./constants.js";
 
 const INFINITE_STREAM_SAFETY = 0.9;
@@ -85,7 +85,7 @@ export async function* recursiveStream(
 
     for await (const chunk of generator) {
       yield chunk;
-      const rendered = renderChatMessageWithoutThinking(chunk);
+      const rendered = renderChatMessage(chunk);
       buffer += rendered;
       totalTokens += countTokens(chunk.content);