From 72e1466d718fb4fc44865b2bc7305da932b0e3e1 Mon Sep 17 00:00:00 2001 From: Devin Blagbrough Date: Tue, 9 Jun 2026 20:06:06 -0400 Subject: [PATCH] fix(cli): stream run output, add empty-text warning, flush race-late parts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `opencode run` had three independent gaps that left CLI users staring at silent exits and dropped answers: 1. Default text mode never streamed partial output. Text parts only surfaced when their part.time?.end was set — long generations looked like the process was stuck, and short single-token responses could land just after the loop broke on session.idle and never reach stdout at all. 2. When the upstream LLM returned a successful (2xx) response with empty content — common with thinking-mode models that consume the full max_tokens budget on internal reasoning, and with some compliance proxies that return 2xx-empty on transient backend errors — the CLI exited 0 with zero output. Indistinguishable from a hung process. 3. PR #31505 attempted a flush but extracted the part handler into a function that returned early, breaking the loop's continue flow and causing the default path to hang at session.idle indefinitely. This patch keeps the dev branch's loop structure intact (no extracted handlePart with return statements) and adds three layered fixes: - Delta streaming: message.part.delta events now write text/reasoning fragments directly to stdout (raw in default mode, NDJSON "delta" lines in --format json). Tracks emitted part IDs via a Set so matching message.part.updated events don't double-print. - Belt-and-suspenders flush: after client.session.prompt / .command returns, walk the resolved assistant message's parts and emit any text/reasoning not already covered by deltas/updates. Catches the race where session.idle fires before the final part.updated event reaches our subscription. - Empty-text detection: track whether any assistant text reached stdout during the run. If nothing did, write a clear warning to stderr describing the most likely causes (thinking budget, upstream empty completion) and what to try (retry, raise max_tokens, check provider). Opt-in non-zero exit via OPENCODE_RUN_EXIT_ON_EMPTY=1 for CI/CD pipelines that want to fail loudly. Verified on fall-compute-25 against a compliance-substituting hub relay that fronts gemini-2.5-flash: - Default mode: 0/3 hangs in 4 runs (was 4/4 hanging with PR #31505). Warning fires correctly on empty completions; exits in 2-4s instead of 30-60s timeout. - JSON mode: emits step_start, step_finish, and delta/text events. - No regression in tool-call display, --print-logs, or --continue. Refs #22243 #31482 #20799 #27669 #29997 #30100 #31505 --- packages/opencode/src/cli/cmd/run.ts | 192 +++++++++++++++++++++++++-- 1 file changed, 183 insertions(+), 9 deletions(-) diff --git a/packages/opencode/src/cli/cmd/run.ts b/packages/opencode/src/cli/cmd/run.ts index 18d033dadb3c..a723f294ffdd 100644 --- a/packages/opencode/src/cli/cmd/run.ts +++ b/packages/opencode/src/cli/cmd/run.ts @@ -20,7 +20,7 @@ import { UI } from "../ui" import { effectCmd } from "../effect-cmd" import { EOL } from "os" import { Filesystem } from "@/util/filesystem" -import { createOpencodeClient, type OpencodeClient, type ToolPart } from "@opencode-ai/sdk/v2" +import { createOpencodeClient, type OpencodeClient, type Part as SessionPart, type ToolPart } from "@opencode-ai/sdk/v2" import { FormatError, FormatUnknownError } from "../error" import { INTERACTIVE_INPUT_ERROR, resolveInteractiveStdin } from "./run/runtime.stdin" @@ -610,6 +610,19 @@ export const RunCommand = effectCmd({ } const sessionID = sess.id + // Track which part IDs we've already written to stdout so we don't + // double-print when both deltas and the final part.updated arrive. + // Hoisted to the execute() scope so the post-prompt belt-and-suspenders + // flush (flushFinalParts) can see what the event loop already covered. + const emitted = new Set() + // Set when ANY assistant text or reasoning has reached stdout. Used to + // distinguish a genuinely-empty assistant response (LLM returned no + // content — common with thinking-mode models when max_tokens is small, + // or when an upstream provider returns 2xx + empty body) from a normal + // successful turn. We surface a stderr warning when this stays false, + // because silent exit-0 is the worst possible UX for CLI callers. + let anyTextEmitted = false + function emit(type: string, data: Record) { if (args.format === "json") { process.stdout.write( @@ -625,6 +638,119 @@ export const RunCommand = effectCmd({ return false } + // Emit one streaming delta — called from the message.part.delta branch. + // In json mode we forward each delta as its own NDJSON line so JSON + // consumers can render text token-by-token. In default text mode we + // print the raw delta directly to stdout with no decoration (this is + // what a terminal user expects: progressive output as it generates). + // Reasoning deltas are printed dimmed in TTY mode and only when + // --thinking is set, otherwise they're suppressed (json consumers + // still get them because they explicitly subscribed to the JSON stream). + function emitDelta(partID: string, messageID: string, field: string, delta: string) { + if (args.format === "json") { + process.stdout.write( + JSON.stringify({ + type: "delta", + timestamp: Date.now(), + sessionID, + messageID, + partID, + field, + delta, + }) + EOL, + ) + emitted.add(partID) + if (field === "text") anyTextEmitted = true + return + } + if (field === "text") { + process.stdout.write(delta) + emitted.add(partID) + anyTextEmitted = true + return + } + if (field === "reasoning" && thinking) { + if (process.stdout.isTTY) { + process.stdout.write(`${UI.Style.TEXT_DIM}${delta}${UI.Style.TEXT_NORMAL}`) + } else { + process.stdout.write(delta) + } + emitted.add(partID) + return + } + } + + // Belt-and-suspenders: after session.prompt/command returns, walk the + // assistant message's final parts list and print anything we didn't + // already cover via deltas/updates. This catches the race where the + // server emits the final text part right at (or after) session.idle — + // sometimes the part-updated event arrives just after our loop breaks + // on the idle status, and we'd otherwise drop the answer on the floor. + function flushFinalParts(parts: SessionPart[] | undefined) { + if (!parts) return + for (const part of parts) { + if (emitted.has(part.id)) continue + if (part.type === "text") { + const text = (part.text ?? "").trim() + if (!text) continue + if (emit("text", { part })) { + emitted.add(part.id) + anyTextEmitted = true + continue + } + if (!process.stdout.isTTY) { + process.stdout.write(text + EOL) + } else { + UI.empty() + UI.println(text) + UI.empty() + } + emitted.add(part.id) + anyTextEmitted = true + continue + } + if (part.type === "reasoning" && thinking) { + const text = (part.text ?? "").trim() + if (!text) continue + if (emit("reasoning", { part })) { + emitted.add(part.id) + continue + } + const line = `Thinking: ${text}` + if (process.stdout.isTTY) { + UI.empty() + UI.println(`${UI.Style.TEXT_DIM}\u001b[3m${line}\u001b[0m${UI.Style.TEXT_NORMAL}`) + UI.empty() + } else { + process.stdout.write(line + EOL) + } + emitted.add(part.id) + } + } + } + + // Surface the silent-empty case to stderr. We intentionally do NOT + // change process.exitCode here by default — that's a behavior change + // existing callers might not expect. Operators can opt into a non-zero + // exit via OPENCODE_RUN_EXIT_ON_EMPTY=1, which is useful for CI/CD + // pipelines that want to fail loudly when the model returned nothing. + function warnIfEmpty() { + if (anyTextEmitted) return + // Json consumers can detect the no-text case themselves; skip the + // warning to keep their stdout/stderr clean and machine-parseable. + if (args.format === "json") return + process.stderr.write( + "[opencode run] WARNING: model returned no assistant text. " + + "This usually means the provider responded with an empty completion " + + "(thinking-mode models can consume the full max_tokens on internal reasoning " + + "leaving no budget for output, and some upstream proxies return 2xx-empty " + + "on transient backend errors). Retry, raise max_tokens, or check the provider.\n", + ) + if (process.env.OPENCODE_RUN_EXIT_ON_EMPTY === "1") { + process.exitCode = 2 + } + } + // Consume one subscribed event stream for the active session and mirror it // to stdout/UI. `client` is passed explicitly because attach mode may // rebind the SDK to the session's directory after the subscription is @@ -647,6 +773,21 @@ export const RunCommand = effectCmd({ toggles.set("start", true) } + // Streaming deltas: write each chunk to stdout as it arrives so + // CLI users see progressive output during long generations instead + // of staring at a blank terminal until the part finalizes. emitDelta + // marks the partID in `emitted` so the matching part.updated branch + // below knows the content was already shown and skips re-printing. + if (event.type === "message.part.delta") { + const props = event.properties + if (props.sessionID !== sessionID) continue + if (typeof props.partID !== "string") continue + if (typeof props.field !== "string") continue + if (typeof props.delta !== "string" || props.delta === "") continue + emitDelta(props.partID, props.messageID, props.field, props.delta) + continue + } + if (event.type === "message.part.updated") { const part = event.properties.part if (part.sessionID !== sessionID) continue @@ -681,20 +822,43 @@ export const RunCommand = effectCmd({ } if (part.type === "text" && part.time?.end) { - if (emit("text", { part })) continue + // If we already streamed the body via deltas, the part-updated + // event is a no-op for stdout. We still emit the json line so + // json consumers get the canonical part-completion record. + if (args.format === "json") { + emit("text", { part }) + emitted.add(part.id) + if (part.text && part.text.trim()) anyTextEmitted = true + continue + } + if (emitted.has(part.id)) { + process.stdout.write(EOL) + continue + } const text = part.text.trim() if (!text) continue if (!process.stdout.isTTY) { process.stdout.write(text + EOL) - continue + } else { + UI.empty() + UI.println(text) + UI.empty() } - UI.empty() - UI.println(text) - UI.empty() + emitted.add(part.id) + anyTextEmitted = true + continue } if (part.type === "reasoning" && part.time?.end && thinking) { - if (emit("reasoning", { part })) continue + if (args.format === "json") { + emit("reasoning", { part }) + emitted.add(part.id) + continue + } + if (emitted.has(part.id)) { + process.stdout.write(EOL) + continue + } const text = part.text.trim() if (!text) continue const line = `Thinking: ${text}` @@ -702,9 +866,11 @@ export const RunCommand = effectCmd({ UI.empty() UI.println(`${UI.Style.TEXT_DIM}\u001b[3m${line}\u001b[0m${UI.Style.TEXT_NORMAL}`) UI.empty() - continue + } else { + process.stdout.write(line + EOL) } - process.stdout.write(line + EOL) + emitted.add(part.id) + continue } } @@ -787,6 +953,12 @@ export const RunCommand = effectCmd({ return } await finish() + // Belt-and-suspenders flush: emit any final parts the loop may have + // missed because session.idle fired before the matching part.updated + // event reached our subscription. Without this the model's final + // answer can race past the loop break and end up only in the DB. + flushFinalParts(result.data?.parts) + warnIfEmpty() return } @@ -804,6 +976,8 @@ export const RunCommand = effectCmd({ return } await finish() + flushFinalParts(result.data?.parts) + warnIfEmpty() return }