Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 183 additions & 9 deletions packages/opencode/src/cli/cmd/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import { UI } from "../ui"
import { effectCmd } from "../effect-cmd"
import { EOL } from "os"
import { Filesystem } from "@/util/filesystem"
import { createOpencodeClient, type OpencodeClient, type ToolPart } from "@opencode-ai/sdk/v2"
import { createOpencodeClient, type OpencodeClient, type Part as SessionPart, type ToolPart } from "@opencode-ai/sdk/v2"
import { FormatError, FormatUnknownError } from "../error"
import { INTERACTIVE_INPUT_ERROR, resolveInteractiveStdin } from "./run/runtime.stdin"

Expand Down Expand Up @@ -610,6 +610,19 @@ export const RunCommand = effectCmd({
}
const sessionID = sess.id

// Track which part IDs we've already written to stdout so we don't
// double-print when both deltas and the final part.updated arrive.
// Hoisted to the execute() scope so the post-prompt belt-and-suspenders
// flush (flushFinalParts) can see what the event loop already covered.
const emitted = new Set<string>()
// Set when ANY assistant text or reasoning has reached stdout. Used to
// distinguish a genuinely-empty assistant response (LLM returned no
// content — common with thinking-mode models when max_tokens is small,
// or when an upstream provider returns 2xx + empty body) from a normal
// successful turn. We surface a stderr warning when this stays false,
// because silent exit-0 is the worst possible UX for CLI callers.
let anyTextEmitted = false

function emit(type: string, data: Record<string, unknown>) {
if (args.format === "json") {
process.stdout.write(
Expand All @@ -625,6 +638,119 @@ export const RunCommand = effectCmd({
return false
}

// Emit one streaming delta — called from the message.part.delta branch.
// In json mode we forward each delta as its own NDJSON line so JSON
// consumers can render text token-by-token. In default text mode we
// print the raw delta directly to stdout with no decoration (this is
// what a terminal user expects: progressive output as it generates).
// Reasoning deltas are printed dimmed in TTY mode and only when
// --thinking is set, otherwise they're suppressed (json consumers
// still get them because they explicitly subscribed to the JSON stream).
function emitDelta(partID: string, messageID: string, field: string, delta: string) {
if (args.format === "json") {
process.stdout.write(
JSON.stringify({
type: "delta",
timestamp: Date.now(),
sessionID,
messageID,
partID,
field,
delta,
}) + EOL,
)
emitted.add(partID)
if (field === "text") anyTextEmitted = true
return
}
if (field === "text") {
process.stdout.write(delta)
emitted.add(partID)
anyTextEmitted = true
return
}
if (field === "reasoning" && thinking) {
if (process.stdout.isTTY) {
process.stdout.write(`${UI.Style.TEXT_DIM}${delta}${UI.Style.TEXT_NORMAL}`)
} else {
process.stdout.write(delta)
}
emitted.add(partID)
return
}
}

// Belt-and-suspenders: after session.prompt/command returns, walk the
// assistant message's final parts list and print anything we didn't
// already cover via deltas/updates. This catches the race where the
// server emits the final text part right at (or after) session.idle —
// sometimes the part-updated event arrives just after our loop breaks
// on the idle status, and we'd otherwise drop the answer on the floor.
function flushFinalParts(parts: SessionPart[] | undefined) {
if (!parts) return
for (const part of parts) {
if (emitted.has(part.id)) continue
if (part.type === "text") {
const text = (part.text ?? "").trim()
if (!text) continue
if (emit("text", { part })) {
emitted.add(part.id)
anyTextEmitted = true
continue
}
if (!process.stdout.isTTY) {
process.stdout.write(text + EOL)
} else {
UI.empty()
UI.println(text)
UI.empty()
}
emitted.add(part.id)
anyTextEmitted = true
continue
}
if (part.type === "reasoning" && thinking) {
const text = (part.text ?? "").trim()
if (!text) continue
if (emit("reasoning", { part })) {
emitted.add(part.id)
continue
}
const line = `Thinking: ${text}`
if (process.stdout.isTTY) {
UI.empty()
UI.println(`${UI.Style.TEXT_DIM}\u001b[3m${line}\u001b[0m${UI.Style.TEXT_NORMAL}`)
UI.empty()
} else {
process.stdout.write(line + EOL)
}
emitted.add(part.id)
}
}
}

// Surface the silent-empty case to stderr. We intentionally do NOT
// change process.exitCode here by default — that's a behavior change
// existing callers might not expect. Operators can opt into a non-zero
// exit via OPENCODE_RUN_EXIT_ON_EMPTY=1, which is useful for CI/CD
// pipelines that want to fail loudly when the model returned nothing.
function warnIfEmpty() {
if (anyTextEmitted) return
// Json consumers can detect the no-text case themselves; skip the
// warning to keep their stdout/stderr clean and machine-parseable.
if (args.format === "json") return
process.stderr.write(
"[opencode run] WARNING: model returned no assistant text. " +
"This usually means the provider responded with an empty completion " +
"(thinking-mode models can consume the full max_tokens on internal reasoning " +
"leaving no budget for output, and some upstream proxies return 2xx-empty " +
"on transient backend errors). Retry, raise max_tokens, or check the provider.\n",
)
if (process.env.OPENCODE_RUN_EXIT_ON_EMPTY === "1") {
process.exitCode = 2
}
}

// Consume one subscribed event stream for the active session and mirror it
// to stdout/UI. `client` is passed explicitly because attach mode may
// rebind the SDK to the session's directory after the subscription is
Expand All @@ -647,6 +773,21 @@ export const RunCommand = effectCmd({
toggles.set("start", true)
}

// Streaming deltas: write each chunk to stdout as it arrives so
// CLI users see progressive output during long generations instead
// of staring at a blank terminal until the part finalizes. emitDelta
// marks the partID in `emitted` so the matching part.updated branch
// below knows the content was already shown and skips re-printing.
if (event.type === "message.part.delta") {
const props = event.properties
if (props.sessionID !== sessionID) continue
if (typeof props.partID !== "string") continue
if (typeof props.field !== "string") continue
if (typeof props.delta !== "string" || props.delta === "") continue
emitDelta(props.partID, props.messageID, props.field, props.delta)
continue
}

if (event.type === "message.part.updated") {
const part = event.properties.part
if (part.sessionID !== sessionID) continue
Expand Down Expand Up @@ -681,30 +822,55 @@ export const RunCommand = effectCmd({
}

if (part.type === "text" && part.time?.end) {
if (emit("text", { part })) continue
// If we already streamed the body via deltas, the part-updated
// event is a no-op for stdout. We still emit the json line so
// json consumers get the canonical part-completion record.
if (args.format === "json") {
emit("text", { part })
emitted.add(part.id)
if (part.text && part.text.trim()) anyTextEmitted = true
continue
}
if (emitted.has(part.id)) {
process.stdout.write(EOL)
continue
}
const text = part.text.trim()
if (!text) continue
if (!process.stdout.isTTY) {
process.stdout.write(text + EOL)
continue
} else {
UI.empty()
UI.println(text)
UI.empty()
}
UI.empty()
UI.println(text)
UI.empty()
emitted.add(part.id)
anyTextEmitted = true
continue
}

if (part.type === "reasoning" && part.time?.end && thinking) {
if (emit("reasoning", { part })) continue
if (args.format === "json") {
emit("reasoning", { part })
emitted.add(part.id)
continue
}
if (emitted.has(part.id)) {
process.stdout.write(EOL)
continue
}
const text = part.text.trim()
if (!text) continue
const line = `Thinking: ${text}`
if (process.stdout.isTTY) {
UI.empty()
UI.println(`${UI.Style.TEXT_DIM}\u001b[3m${line}\u001b[0m${UI.Style.TEXT_NORMAL}`)
UI.empty()
continue
} else {
process.stdout.write(line + EOL)
}
process.stdout.write(line + EOL)
emitted.add(part.id)
continue
}
}

Expand Down Expand Up @@ -787,6 +953,12 @@ export const RunCommand = effectCmd({
return
}
await finish()
// Belt-and-suspenders flush: emit any final parts the loop may have
// missed because session.idle fired before the matching part.updated
// event reached our subscription. Without this the model's final
// answer can race past the loop break and end up only in the DB.
flushFinalParts(result.data?.parts)
warnIfEmpty()
return
}

Expand All @@ -804,6 +976,8 @@ export const RunCommand = effectCmd({
return
}
await finish()
flushFinalParts(result.data?.parts)
warnIfEmpty()
return
}

Expand Down
Loading