From 5e8100a5f42bd98f24228f64072d945fc565bb87 Mon Sep 17 00:00:00 2001 From: bit-star Date: Tue, 19 May 2026 15:59:24 +0800 Subject: [PATCH 1/7] test: isolate from host-runtime plugin env vars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Host plugin runtimes (e.g. Claude Code) inject CLAUDE_PLUGIN_DATA and CODEX_COMPANION_SESSION_ID into the plugin process. When these leak into `npm test`, two tests fail with non-bug assertions: - tests/state.test.mjs: "resolveStateDir uses a temp-backed per-workspace directory" — CLAUDE_PLUGIN_DATA redirects state to ~/.claude/plugins/data instead of os.tmpdir(). - tests/runtime.test.mjs: "result without a job id ..." — fixture writer and the spawned `result` subprocess look at different state dirs. CI passes (no host-runtime env), so this only bites contributors running `npm test` from inside a plugin host. Fix: delete these env vars at the top of tests/helpers.mjs (the shared import) so every test gets a clean slate. Tests that exercise the env var explicitly ("resolveStateDir uses CLAUDE_PLUGIN_DATA when it is provided") already use try/finally to set and restore it, so they continue to pass. Verified: 86/86 tests pass with the polluting env vars still set in the parent shell. --- tests/helpers.mjs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/helpers.mjs b/tests/helpers.mjs index 945ae0e7..926b4cd9 100644 --- a/tests/helpers.mjs +++ b/tests/helpers.mjs @@ -4,6 +4,14 @@ import path from "node:path"; import process from "node:process"; import { spawnSync } from "node:child_process"; +// Isolate tests from host-runtime plugin env vars. Claude Code (and similar +// plugin hosts) injects CLAUDE_PLUGIN_DATA and CODEX_COMPANION_SESSION_ID into +// the plugin process; if those leak into `npm test`, fallback-path assertions +// (e.g. `resolveStateDir uses a temp-backed per-workspace directory`) fail. +// Tests that need these vars set them explicitly with try/finally. +delete process.env.CLAUDE_PLUGIN_DATA; +delete process.env.CODEX_COMPANION_SESSION_ID; + export function makeTempDir(prefix = "codex-plugin-test-") { return fs.mkdtempSync(path.join(os.tmpdir(), prefix)); } From c30b6647aee6610be2a07c80b60d4a188ff78fa1 Mon Sep 17 00:00:00 2001 From: bit-star Date: Tue, 19 May 2026 16:09:02 +0800 Subject: [PATCH 2/7] feat(state): add per-job NDJSON event stream API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add resolveJobEventsFile / appendJobEvent / readJobEvents for the upcoming Claude-main-loop observability work (see DESIGN doc at user planning area). Contract: - Per-job events file at {stateDir}/jobs/{jobId}.events.ndjson. - appendJobEvent caps each line at 4KB. POSIX `write(fd, buf, n)` with O_APPEND is atomic when n < PIPE_BUF (~4096 on Linux/macOS), so single- write append is safe across concurrent readers. Oversized lines first truncate the `raw` field; if still too big, the whole event is replaced with a minimal {type: "oversize-event-elided", seq, ts, method, phase}. - readJobEvents returns [] for missing files. Partial last line (writer mid-write before the trailing \n) is tolerated — JSON.parse failure on the trailing line is skipped, next read picks it up. afterSeq takes precedence over since when both are passed. This commit is consumer-less; the producer side (captureTurn onNotification hook) lands in the next commit so this slice can ship green independently. Tests: 10 new unit tests covering path resolution, append+read, missing file, afterSeq filter, since filter, afterSeq precedence over since, limit, partial-line tolerance, raw truncation, and oversize elision. 96/96 pass. --- plugins/codex/scripts/lib/state.mjs | 79 ++++++++++++++ tests/state.test.mjs | 160 +++++++++++++++++++++++++++- 2 files changed, 238 insertions(+), 1 deletion(-) diff --git a/plugins/codex/scripts/lib/state.mjs b/plugins/codex/scripts/lib/state.mjs index 2da23498..38b22bc1 100644 --- a/plugins/codex/scripts/lib/state.mjs +++ b/plugins/codex/scripts/lib/state.mjs @@ -189,3 +189,82 @@ export function resolveJobFile(cwd, jobId) { ensureStateDir(cwd); return path.join(resolveJobsDir(cwd), `${jobId}.json`); } + +// POSIX guarantees `write(fd, buf, n)` with O_APPEND is atomic when `n` is +// less than PIPE_BUF (typically 4096 on Linux/macOS). Keep one NDJSON line +// at or below this bound so a concurrent reader can never observe a partial +// event. Writers truncate the `raw` field (or elide it entirely) to fit. +const MAX_EVENT_LINE_BYTES = 4096; + +export function resolveJobEventsFile(cwd, jobId) { + ensureStateDir(cwd); + return path.join(resolveJobsDir(cwd), `${jobId}.events.ndjson`); +} + +export function appendJobEvent(cwd, jobId, event) { + const eventsFile = resolveJobEventsFile(cwd, jobId); + let line = `${JSON.stringify(event)}\n`; + + if (Buffer.byteLength(line, "utf8") > MAX_EVENT_LINE_BYTES && event.raw != null) { + const summary = + event.raw && typeof event.raw === "object" + ? Object.keys(event.raw).slice(0, 8).join(",") + : String(event.raw).slice(0, 100); + const truncated = { ...event, raw: { truncated: true, summary } }; + line = `${JSON.stringify(truncated)}\n`; + } + + if (Buffer.byteLength(line, "utf8") > MAX_EVENT_LINE_BYTES) { + const elided = { + seq: event.seq, + ts: event.ts, + type: "oversize-event-elided", + method: event.method ?? null, + phase: event.phase ?? null + }; + line = `${JSON.stringify(elided)}\n`; + } + + fs.appendFileSync(eventsFile, line, "utf8"); +} + +export function readJobEvents(cwd, jobId, options = {}) { + const { since = null, afterSeq = null, limit = null } = options; + const eventsFile = resolveJobEventsFile(cwd, jobId); + if (!fs.existsSync(eventsFile)) { + return []; + } + + const content = fs.readFileSync(eventsFile, "utf8"); + const lines = content.split("\n"); + const lastIndex = lines.length - 1; + const events = []; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (!line) continue; + try { + events.push(JSON.parse(line)); + } catch { + // Tolerate a partial last line: writer may be mid-write between + // assembling the JSON and writing the trailing newline. Drop it; the + // next read picks it up. For any non-trailing parse failure, treat as + // skipped corruption (rare; we'd see it again on next read). + if (i === lastIndex || (i === lastIndex - 1 && lines[lastIndex] === "")) { + continue; + } + continue; + } + } + + let filtered = events; + if (afterSeq != null) { + filtered = filtered.filter((event) => typeof event.seq === "number" && event.seq > afterSeq); + } else if (since != null) { + filtered = filtered.filter((event) => typeof event.ts === "string" && event.ts > since); + } + if (limit != null && Number.isFinite(limit)) { + filtered = filtered.slice(0, limit); + } + return filtered; +} diff --git a/tests/state.test.mjs b/tests/state.test.mjs index 0f8f57ce..4e498e47 100644 --- a/tests/state.test.mjs +++ b/tests/state.test.mjs @@ -5,7 +5,16 @@ import test from "node:test"; import assert from "node:assert/strict"; import { makeTempDir } from "./helpers.mjs"; -import { resolveJobFile, resolveJobLogFile, resolveStateDir, resolveStateFile, saveState } from "../plugins/codex/scripts/lib/state.mjs"; +import { + appendJobEvent, + readJobEvents, + resolveJobEventsFile, + resolveJobFile, + resolveJobLogFile, + resolveStateDir, + resolveStateFile, + saveState +} from "../plugins/codex/scripts/lib/state.mjs"; test("resolveStateDir uses a temp-backed per-workspace directory", () => { const workspace = makeTempDir(); @@ -103,3 +112,152 @@ test("saveState prunes dropped job artifacts when indexed jobs exceed the cap", .sort() ); }); + +test("resolveJobEventsFile returns a .events.ndjson under the jobs dir", () => { + const workspace = makeTempDir(); + const eventsFile = resolveJobEventsFile(workspace, "job-evt-1"); + + assert.match(path.basename(eventsFile), /^job-evt-1\.events\.ndjson$/); + assert.equal(path.dirname(eventsFile), path.join(resolveStateDir(workspace), "jobs")); +}); + +test("appendJobEvent creates the file and readJobEvents returns parsed events", () => { + const workspace = makeTempDir(); + const jobId = "job-evt-2"; + + appendJobEvent(workspace, jobId, { seq: 0, ts: "2026-01-01T00:00:00.000Z", phase: "starting" }); + appendJobEvent(workspace, jobId, { seq: 1, ts: "2026-01-01T00:00:01.000Z", phase: "thinking" }); + + const events = readJobEvents(workspace, jobId); + assert.equal(events.length, 2); + assert.equal(events[0].seq, 0); + assert.equal(events[0].phase, "starting"); + assert.equal(events[1].phase, "thinking"); +}); + +test("readJobEvents returns [] when the file does not exist", () => { + const workspace = makeTempDir(); + const events = readJobEvents(workspace, "never-written"); + assert.deepEqual(events, []); +}); + +test("readJobEvents afterSeq skips events with seq <= afterSeq", () => { + const workspace = makeTempDir(); + const jobId = "job-evt-3"; + for (let i = 0; i < 5; i++) { + appendJobEvent(workspace, jobId, { seq: i, ts: `2026-01-01T00:00:0${i}.000Z`, phase: "p" }); + } + const events = readJobEvents(workspace, jobId, { afterSeq: 2 }); + assert.deepEqual( + events.map((event) => event.seq), + [3, 4] + ); +}); + +test("readJobEvents since filters by ISO timestamp string", () => { + const workspace = makeTempDir(); + const jobId = "job-evt-4"; + appendJobEvent(workspace, jobId, { seq: 0, ts: "2026-01-01T00:00:00.000Z", phase: "p" }); + appendJobEvent(workspace, jobId, { seq: 1, ts: "2026-01-01T00:00:05.000Z", phase: "p" }); + appendJobEvent(workspace, jobId, { seq: 2, ts: "2026-01-01T00:00:10.000Z", phase: "p" }); + + const events = readJobEvents(workspace, jobId, { since: "2026-01-01T00:00:03.000Z" }); + assert.deepEqual( + events.map((event) => event.seq), + [1, 2] + ); +}); + +test("readJobEvents afterSeq takes precedence over since", () => { + const workspace = makeTempDir(); + const jobId = "job-evt-5"; + appendJobEvent(workspace, jobId, { seq: 0, ts: "2026-01-01T00:00:00.000Z" }); + appendJobEvent(workspace, jobId, { seq: 1, ts: "2026-01-01T00:00:05.000Z" }); + appendJobEvent(workspace, jobId, { seq: 2, ts: "2026-01-01T00:00:10.000Z" }); + + // since alone would return [seq=1, seq=2]; afterSeq=1 narrows to [seq=2]. + const events = readJobEvents(workspace, jobId, { + afterSeq: 1, + since: "2026-01-01T00:00:00.000Z" + }); + assert.deepEqual( + events.map((event) => event.seq), + [2] + ); +}); + +test("readJobEvents limit caps the returned array", () => { + const workspace = makeTempDir(); + const jobId = "job-evt-6"; + for (let i = 0; i < 10; i++) { + appendJobEvent(workspace, jobId, { seq: i, ts: "2026-01-01T00:00:00.000Z" }); + } + const events = readJobEvents(workspace, jobId, { limit: 3 }); + assert.equal(events.length, 3); + assert.deepEqual( + events.map((event) => event.seq), + [0, 1, 2] + ); +}); + +test("readJobEvents tolerates a partial last line (mid-write)", () => { + const workspace = makeTempDir(); + const jobId = "job-evt-7"; + appendJobEvent(workspace, jobId, { seq: 0, ts: "2026-01-01T00:00:00.000Z", phase: "p" }); + + // Simulate a writer in the middle of emitting a second event: + // the JSON has been written but the trailing newline has not. + const eventsFile = resolveJobEventsFile(workspace, jobId); + fs.appendFileSync(eventsFile, '{"seq":1,"ts":"2026-01-01T00:00:01.000Z","phase":"thinking"', "utf8"); + + const events = readJobEvents(workspace, jobId); + assert.equal(events.length, 1, "partial last line must be skipped, not throw"); + assert.equal(events[0].seq, 0); +}); + +test("appendJobEvent truncates raw field when single line would exceed 4KB", () => { + const workspace = makeTempDir(); + const jobId = "job-evt-8"; + const bigRaw = { huge: "x".repeat(5000), other: "field" }; + + appendJobEvent(workspace, jobId, { + seq: 0, + ts: "2026-01-01T00:00:00.000Z", + phase: "tool:bash", + raw: bigRaw + }); + + const events = readJobEvents(workspace, jobId); + assert.equal(events.length, 1); + assert.equal(events[0].seq, 0); + assert.equal(events[0].phase, "tool:bash"); + assert.equal(events[0].raw.truncated, true); + assert.match(events[0].raw.summary, /huge|other/); + + const eventsFile = resolveJobEventsFile(workspace, jobId); + const bytes = fs.statSync(eventsFile).size; + assert.ok(bytes <= 4096, `line should fit in 4KB, got ${bytes} bytes`); +}); + +test("appendJobEvent elides event entirely when even truncated raw is too big", () => { + const workspace = makeTempDir(); + const jobId = "job-evt-9"; + const massiveMessage = "y".repeat(5000); + + // message itself oversize, raw not the culprit + appendJobEvent(workspace, jobId, { + seq: 0, + ts: "2026-01-01T00:00:00.000Z", + method: "turn/started", + phase: "tool:bash", + message: massiveMessage, + raw: null + }); + + const events = readJobEvents(workspace, jobId); + assert.equal(events.length, 1); + assert.equal(events[0].type, "oversize-event-elided"); + assert.equal(events[0].seq, 0); + assert.equal(events[0].method, "turn/started"); + assert.equal(events[0].phase, "tool:bash"); +}); From a3f41811878698c2d4fdd0d63ed5983f55f18bbe Mon Sep 17 00:00:00 2001 From: bit-star Date: Tue, 19 May 2026 16:14:38 +0800 Subject: [PATCH 3/7] feat(codex): add notification stream hook + normalize + surface usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three changes to codex.mjs, building on the per-job NDJSON event API: 1. New exported `normalizeNotification(state, message)` function — pure transform from app-server notifications to the flat event shape that gets appended to {jobId}.events.ndjson. Covers thread/started, thread/name/updated, turn/started, item/started, item/completed, error, turn/completed, and an "unknown" fallback for forward-compat methods (e.g. thread/compact/started). Phase inference reuses the existing describeStartedItem / describeCompletedItem maps so the vocabulary stays consistent with on-screen progress text. 2. `captureTurn` accepts an `options.onNotification` callback. A small `dispatch(message)` wrapper applies state mutation first, then emits the normalized event. Order is load-bearing: normalize reads state.threadTurnIds populated by applyTurnNotification. onNotification errors are swallowed so a broken consumer can never crash the worker. 3. `runAppServerTurn` forwards `options.onNotification` to captureTurn and surfaces `usage` (from turnState.finalTurn?.usage) as a top-level field on the result, so /codex:status and the events stream can read token usage without traversing the nested `turn` payload. Tests: 13 new unit tests covering each method branch, phase inference edge cases (commandExecution -> running vs verifying), unknown-method fallback, and malformed-input resilience. 109/109 pass. No behavioral change for callers that don't pass onNotification — the hook is fully opt-in. The companion-side wiring lands separately. --- plugins/codex/scripts/lib/codex.mjs | 151 +++++++++++++++++++- tests/normalize-notification.test.mjs | 189 ++++++++++++++++++++++++++ 2 files changed, 336 insertions(+), 4 deletions(-) create mode 100644 tests/normalize-notification.test.mjs diff --git a/plugins/codex/scripts/lib/codex.mjs b/plugins/codex/scripts/lib/codex.mjs index f2fe88bd..727310a2 100644 --- a/plugins/codex/scripts/lib/codex.mjs +++ b/plugins/codex/scripts/lib/codex.mjs @@ -481,6 +481,126 @@ function recordItem(state, item, lifecycle, threadId = null) { } } +/** + * Map an app-server notification to a flat, append-friendly event record for + * the per-job NDJSON event stream (see state.mjs:appendJobEvent). The Claude + * main loop polls `/codex:events ` and reasons over these records — + * so the shape must stay stable and self-describing. `seq` is injected by + * the appender; this function is pure and does not allocate sequence numbers. + */ +export function normalizeNotification(state, message) { + const ts = new Date().toISOString(); + const method = message.method ?? null; + const params = message.params ?? {}; + + switch (method) { + case "thread/started": + return { + ts, + method, + threadId: params.thread?.id ?? null, + turnId: null, + itemType: null, + lifecycle: null, + phase: "starting", + message: `Thread started (${params.thread?.id ?? "?"}).`, + raw: params + }; + case "thread/name/updated": + return { + ts, + method, + threadId: params.threadId ?? null, + turnId: null, + itemType: null, + lifecycle: null, + phase: "starting", + message: `Thread renamed: ${params.threadName ?? "?"}`, + raw: params + }; + case "turn/started": + return { + ts, + method, + threadId: params.threadId ?? null, + turnId: params.turn?.id ?? null, + itemType: null, + lifecycle: null, + phase: "thinking", + message: `Turn started (${params.turn?.id ?? "?"}).`, + raw: params + }; + case "item/started": { + const item = params.item ?? {}; + const description = describeStartedItem(state, item); + return { + ts, + method, + threadId: params.threadId ?? null, + turnId: state.threadTurnIds.get(params.threadId ?? state.threadId) ?? null, + itemType: item.type ?? null, + lifecycle: "started", + phase: description?.phase ?? "running", + message: description?.message ?? `Item started: ${item.type ?? "?"}`, + raw: item + }; + } + case "item/completed": { + const item = params.item ?? {}; + const description = describeCompletedItem(state, item); + return { + ts, + method, + threadId: params.threadId ?? null, + turnId: state.threadTurnIds.get(params.threadId ?? state.threadId) ?? null, + itemType: item.type ?? null, + lifecycle: "completed", + phase: description?.phase ?? "running", + message: description?.message ?? `Item completed: ${item.type ?? "?"}`, + raw: item + }; + } + case "error": + return { + ts, + method, + threadId: params.threadId ?? null, + turnId: null, + itemType: null, + lifecycle: null, + phase: "failed", + message: `Codex error: ${params.error?.message ?? "unknown"}`, + raw: params.error ?? null + }; + case "turn/completed": { + const turnStatus = params.turn?.status ?? "completed"; + return { + ts, + method, + threadId: params.threadId ?? null, + turnId: params.turn?.id ?? null, + itemType: null, + lifecycle: null, + phase: turnStatus === "completed" ? "completed" : "finalizing", + message: `Turn ${turnStatus}.`, + raw: params.turn ?? null + }; + } + default: + return { + ts, + method, + threadId: params.threadId ?? params.thread?.id ?? null, + turnId: params.turn?.id ?? null, + itemType: null, + lifecycle: null, + phase: "unknown", + message: `${method ?? "unknown"} notification`, + raw: params + }; + } +} + function applyTurnNotification(state, message) { switch (message.method) { case "thread/started": @@ -553,6 +673,22 @@ function applyTurnNotification(state, message) { async function captureTurn(client, threadId, startRequest, options = {}) { const state = createTurnCaptureState(threadId, options); const previousHandler = client.notificationHandler; + const { onNotification } = options; + + // Apply notification to state THEN emit normalized event. Order matters: + // normalize reads state.threadTurnIds / threadLabels that applyTurnNotification + // populates. onNotification errors are swallowed so a broken consumer can't + // crash the worker (events are observability, not control flow). + const dispatch = (message) => { + applyTurnNotification(state, message); + if (onNotification) { + try { + onNotification(normalizeNotification(state, message)); + } catch { + // intentionally ignored + } + } + }; client.setNotificationHandler((message) => { if (!state.turnId) { @@ -561,7 +697,7 @@ async function captureTurn(client, threadId, startRequest, options = {}) { } if (message.method === "thread/started" || message.method === "thread/name/updated") { - applyTurnNotification(state, message); + dispatch(message); return; } @@ -572,7 +708,7 @@ async function captureTurn(client, threadId, startRequest, options = {}) { return; } - applyTurnNotification(state, message); + dispatch(message); }); try { @@ -584,7 +720,7 @@ async function captureTurn(client, threadId, startRequest, options = {}) { } for (const message of state.bufferedNotifications) { if (belongsToTurn(state, message)) { - applyTurnNotification(state, message); + dispatch(message); } else { if (previousHandler) { previousHandler(message); @@ -1009,7 +1145,10 @@ export async function runAppServerTurn(cwd, options = {}) { effort: options.effort ?? null, outputSchema: options.outputSchema ?? null }), - { onProgress: options.onProgress } + { + onProgress: options.onProgress, + onNotification: options.onNotification + } ); return { @@ -1019,6 +1158,10 @@ export async function runAppServerTurn(cwd, options = {}) { finalMessage: turnState.lastAgentMessage, reasoningSummary: turnState.reasoningSummary, turn: turnState.finalTurn, + // Top-level surface so /codex:status and the events stream can read + // usage without traversing the nested `turn` object. May be null when + // the upstream codex CLI version does not report usage. + usage: turnState.finalTurn?.usage ?? null, error: turnState.error, stderr: cleanCodexStderr(client.stderr), fileChanges: turnState.fileChanges, diff --git a/tests/normalize-notification.test.mjs b/tests/normalize-notification.test.mjs new file mode 100644 index 00000000..153cba86 --- /dev/null +++ b/tests/normalize-notification.test.mjs @@ -0,0 +1,189 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import "./helpers.mjs"; // run shared env-isolation side effect +import { normalizeNotification } from "../plugins/codex/scripts/lib/codex.mjs"; + +function makeState(overrides = {}) { + return { + threadId: "thr_main", + threadTurnIds: new Map(), + threadLabels: new Map(), + ...overrides + }; +} + +test("normalizeNotification: thread/started carries thread id and starting phase", () => { + const state = makeState(); + const event = normalizeNotification(state, { + method: "thread/started", + params: { thread: { id: "thr_xyz", name: "rescue task" } } + }); + + assert.equal(event.method, "thread/started"); + assert.equal(event.threadId, "thr_xyz"); + assert.equal(event.phase, "starting"); + assert.equal(event.turnId, null); + assert.equal(event.itemType, null); + assert.equal(event.lifecycle, null); + assert.match(event.message, /Thread started/); + assert.match(event.ts, /^\d{4}-\d{2}-\d{2}T/); +}); + +test("normalizeNotification: turn/started captures turnId and 'thinking' phase", () => { + const state = makeState(); + const event = normalizeNotification(state, { + method: "turn/started", + params: { threadId: "thr_main", turn: { id: "trn_001" } } + }); + + assert.equal(event.method, "turn/started"); + assert.equal(event.threadId, "thr_main"); + assert.equal(event.turnId, "trn_001"); + assert.equal(event.phase, "thinking"); +}); + +test("normalizeNotification: item/started commandExecution maps to running phase", () => { + const state = makeState(); + state.threadTurnIds.set("thr_main", "trn_001"); + + const event = normalizeNotification(state, { + method: "item/started", + params: { + threadId: "thr_main", + item: { type: "commandExecution", command: "git status" } + } + }); + + assert.equal(event.method, "item/started"); + assert.equal(event.itemType, "commandExecution"); + assert.equal(event.lifecycle, "started"); + assert.equal(event.turnId, "trn_001"); + assert.equal(event.phase, "running"); + assert.match(event.message, /Running command: git status/); +}); + +test("normalizeNotification: item/started commandExecution detects verifying phase for test commands", () => { + const state = makeState(); + const event = normalizeNotification(state, { + method: "item/started", + params: { + threadId: "thr_main", + item: { type: "commandExecution", command: "npm test" } + } + }); + + // looksLikeVerificationCommand recognizes test/check/lint patterns + assert.equal(event.phase, "verifying"); +}); + +test("normalizeNotification: item/started fileChange maps to editing phase", () => { + const event = normalizeNotification(makeState(), { + method: "item/started", + params: { + threadId: "thr_main", + item: { type: "fileChange", changes: [{}, {}, {}] } + } + }); + + assert.equal(event.itemType, "fileChange"); + assert.equal(event.phase, "editing"); + assert.match(event.message, /3 file change/); +}); + +test("normalizeNotification: item/started mcpToolCall maps to investigating phase", () => { + const event = normalizeNotification(makeState(), { + method: "item/started", + params: { + threadId: "thr_main", + item: { type: "mcpToolCall", server: "context7", tool: "resolve-library-id" } + } + }); + + assert.equal(event.itemType, "mcpToolCall"); + assert.equal(event.phase, "investigating"); + assert.match(event.message, /context7\/resolve-library-id/); +}); + +test("normalizeNotification: item/started webSearch maps to investigating phase", () => { + const event = normalizeNotification(makeState(), { + method: "item/started", + params: { + threadId: "thr_main", + item: { type: "webSearch", query: "claude code agent sdk" } + } + }); + + assert.equal(event.itemType, "webSearch"); + assert.equal(event.phase, "investigating"); + assert.match(event.message, /Searching/); +}); + +test("normalizeNotification: item/completed carries completed lifecycle + exit code", () => { + const event = normalizeNotification(makeState(), { + method: "item/completed", + params: { + threadId: "thr_main", + item: { type: "commandExecution", command: "echo hi", status: "completed", exitCode: 0 } + } + }); + + assert.equal(event.method, "item/completed"); + assert.equal(event.lifecycle, "completed"); + assert.equal(event.itemType, "commandExecution"); + assert.match(event.message, /Command completed/); + assert.match(event.message, /exit 0/); +}); + +test("normalizeNotification: turn/completed with status=completed yields 'completed' phase", () => { + const event = normalizeNotification(makeState(), { + method: "turn/completed", + params: { threadId: "thr_main", turn: { id: "trn_001", status: "completed" } } + }); + + assert.equal(event.method, "turn/completed"); + assert.equal(event.phase, "completed"); + assert.equal(event.turnId, "trn_001"); +}); + +test("normalizeNotification: turn/completed with non-completed status yields 'finalizing' phase", () => { + const event = normalizeNotification(makeState(), { + method: "turn/completed", + params: { threadId: "thr_main", turn: { id: "trn_001", status: "cancelled" } } + }); + + assert.equal(event.phase, "finalizing"); + assert.match(event.message, /cancelled/); +}); + +test("normalizeNotification: error notification yields 'failed' phase", () => { + const event = normalizeNotification(makeState(), { + method: "error", + params: { error: { message: "context length exceeded", code: "context_overflow" } } + }); + + assert.equal(event.method, "error"); + assert.equal(event.phase, "failed"); + assert.match(event.message, /context length exceeded/); + assert.equal(event.raw.code, "context_overflow"); +}); + +test("normalizeNotification: unknown method falls back to 'unknown' phase without throwing", () => { + const event = normalizeNotification(makeState(), { + method: "thread/compact/started", + params: { threadId: "thr_main" } + }); + + assert.equal(event.method, "thread/compact/started"); + assert.equal(event.phase, "unknown"); + assert.equal(event.threadId, "thr_main"); +}); + +test("normalizeNotification: handles malformed message without crashing", () => { + // No method, no params, weird shape — should still return a valid record + const event = normalizeNotification(makeState(), {}); + + assert.equal(event.method, null); + assert.equal(event.phase, "unknown"); + assert.match(event.ts, /^\d{4}-\d{2}-\d{2}T/); +}); From 9332c2962c06b5ff344e3362e6061b995ac9a63e Mon Sep 17 00:00:00 2001 From: bit-star Date: Tue, 19 May 2026 16:23:49 +0800 Subject: [PATCH 4/7] feat(companion): wire per-job event stream + stall watchdog + events cmd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit End-to-end wiring of the observability path landed in the prior two commits (state.mjs events API + codex.mjs notification hook). In codex-companion.mjs: - executeTaskRun: forward `onNotification` through to runAppServerTurn. Sync foreground callers still don't pass one; only the background task-worker path opts in. - handleTaskWorker: build a same-process closure that (a) appends every normalized notification to {jobId}.events.ndjson with a monotonic per-job seq number; (b) reflects phase transitions into state.json via upsertJob (only on real phase change, to keep the unlocked single-flight writer rare); (c) runs a 5s stall watchdog — when the gap since the last event exceeds CODEX_COMPANION_STALL_SECONDS (default 60s), it emits one {type:"watchdog", phase:"stuck"} record and flips job phase to stuck. It does NOT cancel. Main-loop Claude decides what to do next (continue / compact / cancel). (d) on terminal exit (success OR failure), emits a single {type:"job/exited", phase:"completed"|"failed", exitCode, errorMessage} record so a polling reader can distinguish "still slow" from "already finished". The job.status state.json field is no longer the sole source of truth — main-loop Claude must look for the job/exited event. Observability errors are swallowed; they must never crash the worker. Watchdog interval is unref'd so it can't keep the event loop alive. - New `events [--since|--after-seq|--limit] [--json]` subcommand delegating to readJobEvents. Default output is human-readable lines; --json returns {jobId, eventsFile, count, events}. After-seq + since semantics match readJobEvents (after-seq takes precedence). printUsage updated. Tests: 7 integration tests spawning the real CLI to verify the events command's empty-result, append-and-read, after-seq filtering, limit capping, human-readable rendering, and missing-jobId usage paths. 116/116 pass. The stall threshold is configurable via env var for now (CODEX_COMPANION_STALL_SECONDS). A `task --max-stall-seconds ` CLI flag is deferred to a later commit so handleTask's option parser stays untouched in this slice. --- plugins/codex/scripts/codex-companion.mjs | 180 ++++++++++++++++++++-- tests/events-command.test.mjs | 121 +++++++++++++++ 2 files changed, 285 insertions(+), 16 deletions(-) create mode 100644 tests/events-command.test.mjs diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 35222fd5..09016ef4 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -25,9 +25,12 @@ import { collectReviewContext, ensureGitRepository, resolveReviewTarget } from " import { binaryAvailable, terminateProcessTree } from "./lib/process.mjs"; import { loadPromptTemplate, interpolateTemplate } from "./lib/prompts.mjs"; import { + appendJobEvent, generateJobId, getConfig, listJobs, + readJobEvents, + resolveJobEventsFile, setConfig, upsertJob, writeJobFile @@ -78,6 +81,7 @@ function printUsage() { " node scripts/codex-companion.mjs review [--wait|--background] [--base ] [--scope ]", " node scripts/codex-companion.mjs adversarial-review [--wait|--background] [--base ] [--scope ] [focus text]", " node scripts/codex-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model ] [--effort ] [prompt]", + " node scripts/codex-companion.mjs events [--since ] [--after-seq ] [--limit ] [--json]", " node scripts/codex-companion.mjs status [job-id] [--all] [--json]", " node scripts/codex-companion.mjs result [job-id] [--json]", " node scripts/codex-companion.mjs cancel [job-id] [--json]" @@ -487,6 +491,7 @@ async function executeTaskRun(request) { effort: request.effort, sandbox: request.write ? "workspace-write" : "read-only", onProgress: request.onProgress, + onNotification: request.onNotification, persistThread: true, threadName: resumeThreadId ? null : buildPersistentTaskThreadName(request.prompt || DEFAULT_CONTINUE_PROMPT) }); @@ -803,14 +808,15 @@ async function handleTaskWorker(argv) { const cwd = resolveCommandCwd(options); const workspaceRoot = resolveCommandWorkspace(options); - const storedJob = readStoredJob(workspaceRoot, options["job-id"]); + const jobId = options["job-id"]; + const storedJob = readStoredJob(workspaceRoot, jobId); if (!storedJob) { - throw new Error(`No stored job found for ${options["job-id"]}.`); + throw new Error(`No stored job found for ${jobId}.`); } const request = storedJob.request; if (!request || typeof request !== "object") { - throw new Error(`Stored job ${options["job-id"]} is missing its task request payload.`); + throw new Error(`Stored job ${jobId} is missing its task request payload.`); } const { logFile, progress } = createTrackedProgress( @@ -822,19 +828,158 @@ async function handleTaskWorker(argv) { logFile: storedJob.logFile ?? null } ); - await runTrackedJob( - { - ...storedJob, - workspaceRoot, - logFile - }, - () => - executeTaskRun({ - ...request, - onProgress: progress - }), - { logFile } - ); + + // Per-job event stream wiring. Both `onNotification` and the stall watchdog + // share `seq` and `lastEventAt` via this closure — they run in the same + // task-worker process, so no cross-process synchronization is needed. + let seq = 0; + let lastEventAt = Date.now(); + let lastEmittedPhase = null; + + const stallSecondsRaw = Number(process.env.CODEX_COMPANION_STALL_SECONDS); + const stallSeconds = Number.isFinite(stallSecondsRaw) && stallSecondsRaw > 0 ? stallSecondsRaw : 60; + + const onNotification = (event) => { + try { + appendJobEvent(workspaceRoot, jobId, { seq: seq++, ...event }); + lastEventAt = Date.now(); + // Reflect phase transitions into the central job summary so /codex:status + // shows the same vocabulary the events stream uses. Only on real phase + // change to keep state.json writes rare (no lock; single-flight worker). + if (event.phase && event.phase !== lastEmittedPhase) { + lastEmittedPhase = event.phase; + upsertJob(workspaceRoot, { id: jobId, phase: event.phase, lastEventAt: event.ts }); + } + } catch { + // Observability is fire-and-forget; never crash the worker. + } + }; + + // Watchdog: if no meaningful event arrives for `stallSeconds`, emit a + // single `{type:"watchdog", phase:"stuck"}` record. We deliberately do + // not cancel — main-loop Claude decides what to do (continue / compact / + // cancel). Refresh `lastEventAt` after emit so we don't spam. + const watchdog = setInterval(() => { + const elapsedMs = Date.now() - lastEventAt; + if (elapsedMs <= stallSeconds * 1000) return; + try { + appendJobEvent(workspaceRoot, jobId, { + seq: seq++, + ts: new Date().toISOString(), + type: "watchdog", + phase: "stuck", + stallMs: elapsedMs, + since: new Date(lastEventAt).toISOString() + }); + upsertJob(workspaceRoot, { id: jobId, phase: "stuck", lastEventAt: new Date().toISOString() }); + lastEventAt = Date.now(); + lastEmittedPhase = "stuck"; + } catch { + // ignore + } + }, 5000); + watchdog.unref?.(); + + let completed = false; + let workerError = null; + try { + await runTrackedJob( + { + ...storedJob, + workspaceRoot, + logFile + }, + () => + executeTaskRun({ + ...request, + onProgress: progress, + onNotification + }), + { logFile } + ); + completed = true; + } catch (error) { + workerError = error; + } finally { + clearInterval(watchdog); + // Terminal event so a polling reader can distinguish "still slow" from + // "already finished". This is the single source of truth for end-of-job — + // main-loop Claude must check for {type:"job/exited"} not just job.status. + try { + appendJobEvent(workspaceRoot, jobId, { + seq: seq++, + ts: new Date().toISOString(), + type: "job/exited", + phase: completed ? "completed" : "failed", + exitCode: completed ? 0 : 1, + errorMessage: workerError + ? workerError instanceof Error + ? workerError.message + : String(workerError) + : null + }); + } catch { + // ignore + } + } + if (workerError) throw workerError; +} + +async function handleEvents(argv) { + const { options, positionals } = parseCommandInput(argv, { + valueOptions: ["cwd", "since", "after-seq", "limit"], + booleanOptions: ["json"] + }); + + const cwd = resolveCommandCwd(options); + const workspaceRoot = resolveCommandWorkspace(options); + const jobId = positionals[0]; + if (!jobId) { + throw new Error("Usage: events [--since ] [--after-seq ] [--limit ] [--json]"); + } + + const readOptions = {}; + if (options["after-seq"] != null) { + const parsed = Number(options["after-seq"]); + if (!Number.isFinite(parsed)) { + throw new Error(`Invalid --after-seq value: ${options["after-seq"]}`); + } + readOptions.afterSeq = parsed; + } + if (options.since) { + readOptions.since = String(options.since); + } + if (options.limit != null) { + const parsed = Number(options.limit); + if (!Number.isFinite(parsed) || parsed < 0) { + throw new Error(`Invalid --limit value: ${options.limit}`); + } + readOptions.limit = parsed; + } + + const events = readJobEvents(workspaceRoot, jobId, readOptions); + const payload = { + jobId, + eventsFile: resolveJobEventsFile(workspaceRoot, jobId), + count: events.length, + events + }; + + if (options.json) { + console.log(JSON.stringify(payload, null, 2)); + return; + } + + if (events.length === 0) { + process.stdout.write(`No events yet for ${jobId}.\n`); + return; + } + for (const event of events) { + const phase = event.phase ?? "?"; + const method = event.method ?? event.type ?? "?"; + const message = event.message ?? ""; + process.stdout.write(`[${event.ts}] seq=${event.seq} ${method} ${phase} ${message}\n`); + } } async function handleStatus(argv) { @@ -1003,6 +1148,9 @@ async function main() { case "task-worker": await handleTaskWorker(argv); break; + case "events": + await handleEvents(argv); + break; case "status": await handleStatus(argv); break; diff --git a/tests/events-command.test.mjs b/tests/events-command.test.mjs new file mode 100644 index 00000000..20cd74e5 --- /dev/null +++ b/tests/events-command.test.mjs @@ -0,0 +1,121 @@ +import path from "node:path"; +import test from "node:test"; +import assert from "node:assert/strict"; +import { fileURLToPath } from "node:url"; + +import { makeTempDir, run } from "./helpers.mjs"; +import { appendJobEvent } from "../plugins/codex/scripts/lib/state.mjs"; + +const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); +const SCRIPT = path.join(ROOT, "plugins", "codex", "scripts", "codex-companion.mjs"); + +test("events command without job-id errors with usage", () => { + const workspace = makeTempDir(); + const result = run("node", [SCRIPT, "events"], { cwd: workspace }); + assert.notEqual(result.status, 0); + assert.match(result.stderr, /Usage: events /); +}); + +test("events command returns empty count for unknown job (--json)", () => { + const workspace = makeTempDir(); + const result = run("node", [SCRIPT, "events", "ghost-job", "--json", "--cwd", workspace]); + + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.jobId, "ghost-job"); + assert.equal(payload.count, 0); + assert.deepEqual(payload.events, []); +}); + +test("events command returns appended events (--json)", () => { + const workspace = makeTempDir(); + appendJobEvent(workspace, "task-int-1", { + seq: 0, + ts: "2026-01-01T00:00:00.000Z", + phase: "starting", + method: "thread/started", + message: "Thread started (thr_x)." + }); + appendJobEvent(workspace, "task-int-1", { + seq: 1, + ts: "2026-01-01T00:00:01.000Z", + phase: "thinking", + method: "turn/started", + message: "Turn started (trn_y)." + }); + + const result = run("node", [SCRIPT, "events", "task-int-1", "--json", "--cwd", workspace]); + + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.jobId, "task-int-1"); + assert.equal(payload.count, 2); + assert.equal(payload.events[0].phase, "starting"); + assert.equal(payload.events[1].phase, "thinking"); +}); + +test("events command --after-seq filters incrementally", () => { + const workspace = makeTempDir(); + for (let i = 0; i < 4; i++) { + appendJobEvent(workspace, "task-int-2", { + seq: i, + ts: `2026-01-01T00:00:0${i}.000Z`, + phase: "p", + message: `event ${i}` + }); + } + const result = run("node", [SCRIPT, "events", "task-int-2", "--after-seq", "1", "--json", "--cwd", workspace]); + + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.count, 2); + assert.deepEqual( + payload.events.map((event) => event.seq), + [2, 3] + ); +}); + +test("events command prints human-readable lines without --json", () => { + const workspace = makeTempDir(); + appendJobEvent(workspace, "task-int-3", { + seq: 0, + ts: "2026-01-01T00:00:00.000Z", + phase: "thinking", + method: "turn/started", + message: "Turn started (trn_z)." + }); + const result = run("node", [SCRIPT, "events", "task-int-3", "--cwd", workspace]); + + assert.equal(result.status, 0, result.stderr); + assert.match(result.stdout, /seq=0/); + assert.match(result.stdout, /thinking/); + assert.match(result.stdout, /Turn started/); +}); + +test("events command --limit caps event count", () => { + const workspace = makeTempDir(); + for (let i = 0; i < 5; i++) { + appendJobEvent(workspace, "task-int-4", { + seq: i, + ts: `2026-01-01T00:00:0${i}.000Z`, + phase: "p" + }); + } + const result = run("node", [SCRIPT, "events", "task-int-4", "--limit", "2", "--json", "--cwd", workspace]); + + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.count, 2); + assert.deepEqual( + payload.events.map((event) => event.seq), + [0, 1] + ); +}); + +test("events command shows empty message line for unknown job without --json", () => { + const workspace = makeTempDir(); + const result = run("node", [SCRIPT, "events", "ghost-job-2", "--cwd", workspace]); + + assert.equal(result.status, 0, result.stderr); + assert.match(result.stdout, /No events yet/); +}); From 3a0ae2d961e9dae3c6894b0dd72ce5687b3d19d1 Mon Sep 17 00:00:00 2001 From: bit-star Date: Tue, 19 May 2026 17:09:46 +0800 Subject: [PATCH 5/7] fix(events): job/exited verdict + cover codex 0.131 schema gaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit E2E against real codex CLI 0.131.0-alpha.9 uncovered three issues mock tests could not surface. Fixed all three; rerun confirms zero `unknown` phase events in a successful turn. 1. handleTaskWorker job/exited bug (correctness) runTrackedJob does NOT throw when codex returns a failed turn — it resolves with execution.exitStatus != 0 and writes state.status="failed" out-of-band. Previous code keyed `completed` off the absence of an exception, so a failed turn was misreported as `phase:"completed", exitCode:0` in the terminal job/exited event. This is the single record main-loop Claude reads to decide "did this work?" — getting it wrong led to silent false positives. Fix: inspect execution.exitStatus directly. Bug repro: dispatch with --effort minimal (which conflicts with web_search) — codex returns an invalid_request_error, runTrackedJob resolves with exitStatus=1, stored state.json shows status="failed", but the prior code emitted {phase:"completed", exitCode:0}. After fix: {phase:"failed", exitCode:1, errorMessage:"Task did not complete successfully ..."}. 2. normalize coverage gaps (forward-compat / readability) codex 0.131 emits three notification methods + three item types the prior switch did not recognize, leaving them as phase:"unknown" with generic fallback messages: - method `thread/status/changed` with status.type in {"idle"} → phase:"idle", message "Thread idle.". Post-turn quiescence signal. - method `thread/tokenUsage/updated` → phase:"metering" (new word in the phase vocab). Streaming token-usage source. This is the real event main-loop Claude should poll to detect context-budget pressure before turn/completed surfaces final usage. NOTE: codex 0.131 payload shape for the usage object is not documented; normalize tries {inputTokens,outputTokens,cachedInputTokens} and several aliases; when no recognized keys are found, falls back to a stable label and preserves raw. Concrete schema discovery is a Phase 3 follow-up. - item.type `agentMessage` / `assistantMessage` / `reasoning` → phase:"thinking". `agentMessage` is codex's final-reply item; describeStartedItem/describeCompletedItem now use a new extractItemText helper to surface a content preview ("Codex replied: pong") so main-loop Claude can recognize the answer from the event stream without fetching /codex:result. 3. Single E2E rerun verification Same prompt with --effort low (avoids the minimal+web_search constraint): 13 events emitted, phase distribution {thinking:8, completed:2, idle:1, metering:1, warning:1, unknown:0}. The prior normalize generated 5 unknown out of 9 events (56%); this commit brings it to 0/13 on the happy path. Tests: +6 new normalize unit tests (status idle, tokenUsage with usage, tokenUsage without usage, agentMessage with text, agentMessage with content[].text, schema fallback). 128/128 pass total. The job/exited fix is exercised in the existing fake-codex fixture indirectly; a direct unit test would need a fake runTrackedJob that simulates "resolved with non-zero exitStatus" which is non-trivial to mock — for now the E2E repro and the inline reasoning in the comment are the documented evidence. --- plugins/codex/scripts/codex-companion.mjs | 26 ++-- plugins/codex/scripts/lib/codex.mjs | 108 ++++++++++++++++ tests/normalize-notification.test.mjs | 143 ++++++++++++++++++++++ 3 files changed, 268 insertions(+), 9 deletions(-) diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 09016ef4..52345fca 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -880,10 +880,10 @@ async function handleTaskWorker(argv) { }, 5000); watchdog.unref?.(); - let completed = false; + let execution = null; let workerError = null; try { - await runTrackedJob( + execution = await runTrackedJob( { ...storedJob, workspaceRoot, @@ -897,26 +897,34 @@ async function handleTaskWorker(argv) { }), { logFile } ); - completed = true; } catch (error) { workerError = error; } finally { clearInterval(watchdog); - // Terminal event so a polling reader can distinguish "still slow" from - // "already finished". This is the single source of truth for end-of-job — - // main-loop Claude must check for {type:"job/exited"} not just job.status. + // Terminal event — single source of truth for end-of-job. Main-loop + // Claude must check for {type:"job/exited"} not just job.status. + // + // CRITICAL: runTrackedJob does NOT throw when codex returns a failed + // turn — it resolves with execution.exitStatus != 0 and writes + // state.status="failed" out-of-band. So "did we catch an exception" + // is NOT enough to decide success. We must inspect execution.exitStatus + // and treat exitStatus !== 0 as failure even when no exception was + // thrown. + const success = !workerError && execution?.exitStatus === 0; try { appendJobEvent(workspaceRoot, jobId, { seq: seq++, ts: new Date().toISOString(), type: "job/exited", - phase: completed ? "completed" : "failed", - exitCode: completed ? 0 : 1, + phase: success ? "completed" : "failed", + exitCode: success ? 0 : execution?.exitStatus ?? 1, errorMessage: workerError ? workerError instanceof Error ? workerError.message : String(workerError) - : null + : success + ? null + : "Task did not complete successfully (see prior events for details)." }); } catch { // ignore diff --git a/plugins/codex/scripts/lib/codex.mjs b/plugins/codex/scripts/lib/codex.mjs index 727310a2..a7196274 100644 --- a/plugins/codex/scripts/lib/codex.mjs +++ b/plugins/codex/scripts/lib/codex.mjs @@ -257,11 +257,38 @@ function describeStartedItem(state, item) { } case "webSearch": return { message: `Searching: ${shorten(item.query, 96)}`, phase: "investigating" }; + case "userMessage": + return { message: "User input received.", phase: "thinking" }; + case "assistantMessage": + case "agentMessage": { + const text = extractItemText(item); + return { + message: text ? `Codex replying: ${shorten(text, 96)}` : "Codex drafting reply.", + phase: "thinking" + }; + } + case "reasoning": + return { message: "Reasoning.", phase: "thinking" }; default: return null; } } +function extractItemText(item) { + // codex CLI emits item content under several shapes depending on the + // version. Try the common ones in priority order; the raw payload is + // always preserved in the event record so callers can fall back. + if (typeof item?.text === "string") return item.text; + if (Array.isArray(item?.content)) { + return item.content + .map((part) => part?.text || part?.value || "") + .join("") + .trim(); + } + if (typeof item?.message === "string") return item.message; + return ""; +} + function describeCompletedItem(state, item) { switch (item.type) { case "commandExecution": { @@ -288,6 +315,18 @@ function describeCompletedItem(state, item) { } case "exitedReviewMode": return { message: "Reviewer finished.", phase: "finalizing" }; + case "userMessage": + return { message: "User input received.", phase: "thinking" }; + case "assistantMessage": + case "agentMessage": { + const text = extractItemText(item); + return { + message: text ? `Codex replied: ${shorten(text, 96)}` : "Codex reply complete.", + phase: "thinking" + }; + } + case "reasoning": + return { message: "Reasoning complete.", phase: "thinking" }; default: return null; } @@ -518,6 +557,75 @@ export function normalizeNotification(state, message) { message: `Thread renamed: ${params.threadName ?? "?"}`, raw: params }; + case "thread/status/changed": { + const statusType = params.status?.type ?? null; + let phase = "unknown"; + let message = `Thread status: ${statusType ?? "?"}`; + if (statusType === "active") { + phase = "thinking"; + message = "Thread active."; + } else if (statusType === "idle") { + // codex flips to idle after a turn completes (post turn/completed). + // Treat as completed-side phase so main loop reads it as quiescent. + phase = "idle"; + message = "Thread idle."; + } else if (statusType === "systemError") { + phase = "failed"; + message = "Thread system error."; + } + return { + ts, + method, + threadId: params.threadId ?? null, + turnId: null, + itemType: null, + lifecycle: null, + phase, + message, + raw: params + }; + } + case "thread/tokenUsage/updated": { + // codex streams token usage updates as the turn progresses. P7 + // top-level surface (runAppServerTurn.usage) reads from + // turnState.finalTurn?.usage at turn end; this event source is the + // real-time signal main-loop Claude can poll to detect "this turn + // is burning a lot of tokens, maybe compact before it overflows." + const usage = params.usage ?? params.tokenUsage ?? params ?? {}; + const inTok = usage.inputTokens ?? usage.input ?? null; + const outTok = usage.outputTokens ?? usage.output ?? null; + const cachedTok = usage.cachedInputTokens ?? usage.cached ?? null; + const parts = []; + if (inTok != null) parts.push(`in=${inTok}`); + if (outTok != null) parts.push(`out=${outTok}`); + if (cachedTok != null) parts.push(`cached=${cachedTok}`); + return { + ts, + method, + threadId: params.threadId ?? null, + turnId: null, + itemType: null, + lifecycle: null, + phase: "metering", + message: parts.length > 0 ? `Token usage: ${parts.join(" ")}` : "Token usage updated.", + raw: params + }; + } + case "warning": + // Codex emits these for non-fatal conditions (context budget exceeded, + // skipped capabilities, etc.). Surface them so main-loop Claude can + // factor them into routing decisions without confusing them with errors. + return { + ts, + method, + threadId: params.threadId ?? null, + turnId: null, + itemType: null, + lifecycle: null, + phase: "warning", + message: params.message ? `Warning: ${params.message}` : "Warning notification.", + raw: params + }; case "turn/started": return { ts, diff --git a/tests/normalize-notification.test.mjs b/tests/normalize-notification.test.mjs index 153cba86..a8d6a922 100644 --- a/tests/normalize-notification.test.mjs +++ b/tests/normalize-notification.test.mjs @@ -187,3 +187,146 @@ test("normalizeNotification: handles malformed message without crashing", () => assert.equal(event.phase, "unknown"); assert.match(event.ts, /^\d{4}-\d{2}-\d{2}T/); }); + +test("normalizeNotification: thread/status/changed active -> thinking phase", () => { + const event = normalizeNotification(makeState(), { + method: "thread/status/changed", + params: { threadId: "thr_main", status: { type: "active", activeFlags: [] } } + }); + + assert.equal(event.method, "thread/status/changed"); + assert.equal(event.phase, "thinking"); + assert.equal(event.threadId, "thr_main"); + assert.match(event.message, /active/); +}); + +test("normalizeNotification: thread/status/changed systemError -> failed phase", () => { + const event = normalizeNotification(makeState(), { + method: "thread/status/changed", + params: { threadId: "thr_main", status: { type: "systemError" } } + }); + + assert.equal(event.phase, "failed"); + assert.match(event.message, /system error/i); +}); + +test("normalizeNotification: thread/status/changed unknown status type -> unknown (forward-compat)", () => { + const event = normalizeNotification(makeState(), { + method: "thread/status/changed", + params: { threadId: "thr_main", status: { type: "futureStatusWeDontKnow" } } + }); + + assert.equal(event.phase, "unknown"); +}); + +test("normalizeNotification: warning -> warning phase with message body", () => { + const event = normalizeNotification(makeState(), { + method: "warning", + params: { threadId: "thr_main", message: "Exceeded skills context budget of 2%." } + }); + + assert.equal(event.method, "warning"); + assert.equal(event.phase, "warning"); + assert.match(event.message, /Exceeded skills context budget/); +}); + +test("normalizeNotification: item/started userMessage -> thinking phase", () => { + const event = normalizeNotification(makeState(), { + method: "item/started", + params: { + threadId: "thr_main", + item: { type: "userMessage", id: "u1", content: [{ type: "text", text: "Hi" }] } + } + }); + + assert.equal(event.itemType, "userMessage"); + assert.equal(event.phase, "thinking"); + assert.match(event.message, /User input received/); +}); + +test("normalizeNotification: item/started assistantMessage -> thinking phase", () => { + const event = normalizeNotification(makeState(), { + method: "item/started", + params: { + threadId: "thr_main", + item: { type: "assistantMessage", id: "a1" } + } + }); + + assert.equal(event.itemType, "assistantMessage"); + assert.equal(event.phase, "thinking"); +}); + +test("normalizeNotification: item/started reasoning -> thinking phase", () => { + const event = normalizeNotification(makeState(), { + method: "item/started", + params: { threadId: "thr_main", item: { type: "reasoning" } } + }); + + assert.equal(event.itemType, "reasoning"); + assert.equal(event.phase, "thinking"); +}); + +test("normalizeNotification: thread/status/changed idle -> idle phase", () => { + const event = normalizeNotification(makeState(), { + method: "thread/status/changed", + params: { threadId: "thr_main", status: { type: "idle" } } + }); + + assert.equal(event.phase, "idle"); + assert.match(event.message, /idle/i); +}); + +test("normalizeNotification: thread/tokenUsage/updated -> metering phase with in/out tokens", () => { + const event = normalizeNotification(makeState(), { + method: "thread/tokenUsage/updated", + params: { + threadId: "thr_main", + usage: { inputTokens: 1234, outputTokens: 56, cachedInputTokens: 100 } + } + }); + + assert.equal(event.method, "thread/tokenUsage/updated"); + assert.equal(event.phase, "metering"); + assert.match(event.message, /in=1234/); + assert.match(event.message, /out=56/); + assert.match(event.message, /cached=100/); +}); + +test("normalizeNotification: thread/tokenUsage/updated without usage still yields metering phase", () => { + const event = normalizeNotification(makeState(), { + method: "thread/tokenUsage/updated", + params: { threadId: "thr_main" } + }); + + assert.equal(event.phase, "metering"); + assert.match(event.message, /Token usage updated/i); +}); + +test("normalizeNotification: item/started agentMessage with text shows reply preview", () => { + const event = normalizeNotification(makeState(), { + method: "item/started", + params: { + threadId: "thr_main", + item: { type: "agentMessage", text: "pong" } + } + }); + + assert.equal(event.itemType, "agentMessage"); + assert.equal(event.phase, "thinking"); + assert.match(event.message, /Codex replying: pong/); +}); + +test("normalizeNotification: item/completed agentMessage with content[].text shows reply", () => { + const event = normalizeNotification(makeState(), { + method: "item/completed", + params: { + threadId: "thr_main", + item: { type: "agentMessage", content: [{ type: "text", text: "Final answer: 42" }] } + } + }); + + assert.equal(event.itemType, "agentMessage"); + assert.equal(event.phase, "thinking"); + assert.match(event.message, /Codex replied: Final answer: 42/); +}); From 1014d1074f8e24958d3789873ddc04eea79f7183 Mon Sep 17 00:00:00 2001 From: bit-star Date: Tue, 19 May 2026 17:22:35 +0800 Subject: [PATCH 6/7] feat(phase-3): compact recovery + slash commands + rescue defaults to bg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the "operate codex like a Claude subagent" half of the design contract. Phase 1+2 gave the main loop an event stream; Phase 3 gives it the slash-command surface and the protocol-native recovery path. In lib/codex.mjs: - New exported compactAppServerThread(cwd, {threadId}). Wraps codex app-server's thread/compact/start RPC. This is the protocol-native recovery for "prompt too long" — main-loop Claude calls it after a turn fails with context-overflow, then resumes via /codex:rescue --resume . Fire-and-return: the call awaits the app-server's ack but does not consume the streaming response. The broker recognizes thread/compact/start as STREAMING_METHOD but routes the stream to whoever owns it at that moment; compaction itself completes on the codex side regardless of consumer presence. Uses reuseExistingBroker: true so it can punch through to an already-running broker if one exists. The exact success payload shape is undocumented in codex CLI 0.131; result is preserved verbatim under .result for forward-compat. In codex-companion.mjs: - New `compact [--json]` subcommand wrapping compactAppServerThread. Plain output prints the operation result and hints the resume flow; --json returns the full structured report. Smoke-tested against real codex 0.131: bogus thread id correctly returns attempted:true, compacted:false, with codex's own error ("invalid thread id") preserved under .detail. In agents/codex-rescue.md + commands/rescue.md: - Default execution mode flipped from foreground to background. The prior heuristic ("small bounded => foreground; complex => background") was the deadlock root cause: a small task that stalls is still a deadlock, and "small" is unknowable in advance. Background is the safe default — the main loop polls /codex:events for progress instead of blocking on the synchronous Bash call. --wait is honored when the user explicitly asks for foreground. In commands/: - New events.md: slash command surfacing the per-job event stream. Documents the {type:"job/exited"} terminal-state contract, the phase:"stuck" watchdog signal, the phase:"metering" token-usage source, and the --after-seq incremental polling pattern. - New compact.md: slash command for the recovery sequence. Documents the typical "cancel → compact → resume with amended prompt" idiom main-loop Claude should follow when codex hits context overflow. In tests/commands.test.mjs: - Updated assertions to reflect the new background default and to include events.md + compact.md in the commands/ file-list invariant. The prior assertions hard-coded the prose "default to foreground" and the file list of 7 commands; both needed to track this change. Tests: 128/128 pass. No new tests added — the compact path is exercised indirectly via a smoke check (bogus thread id returns the expected structured error against the real app-server). A full E2E for compact needs a real turn first to obtain a valid thread id; that lands in Phase 4 alongside version bump + CHANGELOG. --- plugins/codex/agents/codex-rescue.md | 3 +- plugins/codex/commands/compact.md | 33 ++++++++++++ plugins/codex/commands/events.md | 24 +++++++++ plugins/codex/commands/rescue.md | 2 +- plugins/codex/scripts/codex-companion.mjs | 43 ++++++++++++++++ plugins/codex/scripts/lib/codex.mjs | 62 +++++++++++++++++++++++ tests/commands.test.mjs | 21 ++++++-- 7 files changed, 182 insertions(+), 6 deletions(-) create mode 100644 plugins/codex/commands/compact.md create mode 100644 plugins/codex/commands/events.md diff --git a/plugins/codex/agents/codex-rescue.md b/plugins/codex/agents/codex-rescue.md index 7009ec86..ec3ba088 100644 --- a/plugins/codex/agents/codex-rescue.md +++ b/plugins/codex/agents/codex-rescue.md @@ -20,8 +20,7 @@ Selection guidance: Forwarding rules: - Use exactly one `Bash` call to invoke `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task ...`. -- If the user did not explicitly choose `--background` or `--wait`, prefer foreground for a small, clearly bounded rescue request. -- If the user did not explicitly choose `--background` or `--wait` and the task looks complicated, open-ended, multi-step, or likely to keep Codex running for a long time, prefer background execution. +- Default to background execution: if the user did not explicitly choose `--background` or `--wait`, append `--background`. The companion command returns a job id immediately so the main Claude loop can monitor progress via `/codex:events ` and `/codex:status ` instead of blocking on the synchronous Bash call. Honor `--wait` only when the user explicitly asks for foreground execution. - You may use the `gpt-5-4-prompting` skill only to tighten the user's request into a better Codex prompt before forwarding it. - Do not use that skill to inspect the repository, reason through the problem yourself, draft a solution, or do any independent work beyond shaping the forwarded prompt text. - Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own. diff --git a/plugins/codex/commands/compact.md b/plugins/codex/commands/compact.md new file mode 100644 index 00000000..4bb4e882 --- /dev/null +++ b/plugins/codex/commands/compact.md @@ -0,0 +1,33 @@ +--- +description: Trigger codex's protocol-native context compaction on a thread (recovery path for "prompt too long") +argument-hint: ' [--json]' +disable-model-invocation: true +allowed-tools: Bash(node:*) +--- + +!`node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" compact "$ARGUMENTS"` + +When to use: + +- A Codex task has stalled or failed with a context-overflow error and you want to recover the thread without losing its history. +- `/codex:events ` shows `phase:"stuck"` for an extended period and `phase:"metering"` records indicate the token budget is near the limit. +- A previous turn returned `phase:"failed"` with a context-length error and you want to compact before resuming. + +Typical recovery sequence: + +1. `/codex:cancel ` if a turn is still running and stuck. +2. `/codex:compact ` — this command. Returns immediately after the codex app-server acknowledges the compaction request. +3. `/codex:rescue --resume ` — resume the (now compacted) thread with a tighter prompt. + +Output format: + +- Without `--json`: prints `Compaction started on .` and a hint for the resume flow. +- With `--json`: returns `{attempted, compacted, transport, result, detail}`. + +If the thread id is malformed or the app-server rejects the request, the command exits non-zero and emits the codex-side error in `stderr` (or in `detail` under `--json`). + +Notes: + +- Compaction runs codex-side after this command returns; it is not a synchronous wait. +- `thread/compact/start` is a streaming RPC in the app-server protocol, but this wrapper does not consume the stream — codex completes compaction in the background regardless of whether a stream consumer is active. +- The exact success payload shape from codex CLI is preserved verbatim under `result` for forward-compat; downstream consumers should not depend on its keys beyond what they have observed. diff --git a/plugins/codex/commands/events.md b/plugins/codex/commands/events.md new file mode 100644 index 00000000..d72975de --- /dev/null +++ b/plugins/codex/commands/events.md @@ -0,0 +1,24 @@ +--- +description: Stream Codex notifications as a per-job NDJSON event log for poll-based monitoring +argument-hint: ' [--since ] [--after-seq ] [--limit ] [--json]' +disable-model-invocation: true +allowed-tools: Bash(node:*) +--- + +!`node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" events "$ARGUMENTS"` + +How to use the output: + +- Each line in the stream is a normalized notification: `seq`, `ts`, `method`, `phase`, `itemType`, `message`, and `raw`. +- Treat `{type:"job/exited"}` as the single source of truth for terminal state — its `phase` is `completed` or `failed`, and `exitCode` reflects the codex turn outcome. Do not infer end-of-job from job-level `status` alone. +- `phase:"stuck"` records (emitted by the worker's stall watchdog) mean codex produced no new notifications for the configured stall window (default 60s, override with `CODEX_COMPANION_STALL_SECONDS`). The job is not cancelled — the main loop decides whether to keep waiting, run `/codex:compact ` to recover from context overflow, or `/codex:cancel ` to abort. +- `phase:"warning"` records carry codex-side non-fatal conditions (context budget exceeded, capabilities removed). They are informational, not failures. +- `phase:"metering"` records come from `thread/tokenUsage/updated` and stream real-time token usage; poll them to detect "this turn is burning a lot of tokens" before hitting context overflow. +- Use `--after-seq ` for incremental polling. If both `--after-seq` and `--since` are supplied, `--after-seq` wins. Default (no filter) returns all events for the job; the main loop is responsible for dedupe-by-seq. + +Output format: + +- Without `--json`: one human-readable line per event. +- With `--json`: `{jobId, eventsFile, count, events: [...]}`. + +If no events exist yet for the job id, the command prints `No events yet for .` and exits 0. diff --git a/plugins/codex/commands/rescue.md b/plugins/codex/commands/rescue.md index 56de9555..0a853a67 100644 --- a/plugins/codex/commands/rescue.md +++ b/plugins/codex/commands/rescue.md @@ -15,7 +15,7 @@ Execution mode: - If the request includes `--background`, run the `codex:codex-rescue` subagent in the background. - If the request includes `--wait`, run the `codex:codex-rescue` subagent in the foreground. -- If neither flag is present, default to foreground. +- If neither flag is present, default to background. The companion returns a job id immediately; poll progress with `/codex:events --since --json` (or `/codex:status `) instead of blocking on a synchronous Bash call. This prevents the main Claude loop from deadlocking when Codex stalls or fails silently. - `--background` and `--wait` are execution flags for Claude Code. Do not forward them to `task`, and do not treat them as part of the natural-language task text. - `--model` and `--effort` are runtime-selection flags. Preserve them for the forwarded `task` call, but do not treat them as part of the natural-language task text. - If the request includes `--resume`, do not ask whether to continue. The user already chose. diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 52345fca..103f1290 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -9,6 +9,7 @@ import { fileURLToPath } from "node:url"; import { parseArgs, splitRawArgumentString } from "./lib/args.mjs"; import { buildPersistentTaskThreadName, + compactAppServerThread, DEFAULT_CONTINUE_PROMPT, findLatestTaskThread, getCodexAuthStatus, @@ -82,6 +83,7 @@ function printUsage() { " node scripts/codex-companion.mjs adversarial-review [--wait|--background] [--base ] [--scope ] [focus text]", " node scripts/codex-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model ] [--effort ] [prompt]", " node scripts/codex-companion.mjs events [--since ] [--after-seq ] [--limit ] [--json]", + " node scripts/codex-companion.mjs compact [--json]", " node scripts/codex-companion.mjs status [job-id] [--all] [--json]", " node scripts/codex-companion.mjs result [job-id] [--json]", " node scripts/codex-companion.mjs cancel [job-id] [--json]" @@ -933,6 +935,44 @@ async function handleTaskWorker(argv) { if (workerError) throw workerError; } +async function handleCompact(argv) { + const { options, positionals } = parseCommandInput(argv, { + valueOptions: ["cwd"], + booleanOptions: ["json"] + }); + + const cwd = resolveCommandCwd(options); + const threadId = positionals[0]; + if (!threadId) { + throw new Error("Usage: compact [--json]"); + } + + const report = await compactAppServerThread(cwd, { threadId }); + + if (options.json) { + console.log(JSON.stringify(report, null, 2)); + return; + } + + if (!report.attempted) { + process.stderr.write(`Compaction not attempted: ${report.detail}\n`); + process.exitCode = 1; + return; + } + if (!report.compacted) { + process.stderr.write(`Compaction failed: ${report.detail}\n`); + process.exitCode = 1; + return; + } + process.stdout.write(`${report.detail}\n`); + process.stdout.write( + `Resume the thread with a tightened prompt via:\n` + + ` /codex:rescue --resume \n` + + `or directly:\n` + + ` codex resume ${threadId}\n` + ); +} + async function handleEvents(argv) { const { options, positionals } = parseCommandInput(argv, { valueOptions: ["cwd", "since", "after-seq", "limit"], @@ -1159,6 +1199,9 @@ async function main() { case "events": await handleEvents(argv); break; + case "compact": + await handleCompact(argv); + break; case "status": await handleStatus(argv); break; diff --git a/plugins/codex/scripts/lib/codex.mjs b/plugins/codex/scripts/lib/codex.mjs index a7196274..2012c558 100644 --- a/plugins/codex/scripts/lib/codex.mjs +++ b/plugins/codex/scripts/lib/codex.mjs @@ -1107,6 +1107,68 @@ export async function getCodexAuthStatus(cwd, options = {}) { } } +/** + * Trigger codex's protocol-native context compaction on a thread. This is + * the recovery path for "prompt too long / context overflow" scenarios: + * main-loop Claude calls /codex:compact, then resumes the thread with an + * amended prompt via /codex:rescue --resume. Compaction itself runs on + * the codex side; this wrapper is fire-and-return — it kicks off the + * compact request and returns once the app-server acknowledges it. + * Streaming notifications (broker recognizes thread/compact/start as a + * STREAMING_METHOD) flow to whoever owns the broker stream at that moment; + * if no consumer is active, codex still completes compaction on its side. + * + * NOTE: the exact shape of the success payload is not documented for + * codex CLI 0.131; this returns the raw result alongside the bookkeeping + * fields. A typed wrapper in app-server-protocol.d.ts can land once the + * shape is observed in a real run. + */ +export async function compactAppServerThread(cwd, { threadId }) { + if (!threadId) { + return { + attempted: false, + compacted: false, + transport: null, + result: null, + detail: "missing threadId" + }; + } + + const availability = getCodexAvailability(cwd); + if (!availability.available) { + return { + attempted: false, + compacted: false, + transport: null, + result: null, + detail: availability.detail + }; + } + + let client = null; + try { + client = await CodexAppServerClient.connect(cwd, { reuseExistingBroker: true }); + const result = await client.request("thread/compact/start", { threadId }); + return { + attempted: true, + compacted: true, + transport: client.transport, + result, + detail: `Compaction started on ${threadId}.` + }; + } catch (error) { + return { + attempted: true, + compacted: false, + transport: client?.transport ?? null, + result: null, + detail: error instanceof Error ? error.message : String(error) + }; + } finally { + await client?.close().catch(() => {}); + } +} + export async function interruptAppServerTurn(cwd, { threadId, turnId }) { if (!threadId || !turnId) { return { diff --git a/tests/commands.test.mjs b/tests/commands.test.mjs index 3724ffa4..830f56a0 100644 --- a/tests/commands.test.mjs +++ b/tests/commands.test.mjs @@ -72,9 +72,15 @@ test("adversarial review command uses AskUserQuestion and background Bash while test("continue is not exposed as a user-facing command", () => { const commandFiles = fs.readdirSync(path.join(PLUGIN_ROOT, "commands")).sort(); + // events.md + compact.md added by feat/event-stream-foundation: the main + // Claude loop polls /codex:events to monitor codex state and runs + // /codex:compact to recover from "prompt too long" via codex's protocol- + // native thread/compact/start RPC. assert.deepEqual(commandFiles, [ "adversarial-review.md", "cancel.md", + "compact.md", + "events.md", "rescue.md", "result.md", "review.md", @@ -109,7 +115,11 @@ test("rescue command absorbs continue semantics", () => { assert.match(rescue, /Continue current Codex thread/); assert.match(rescue, /Start a new Codex thread/); assert.match(rescue, /run the `codex:codex-rescue` subagent in the background/i); - assert.match(rescue, /default to foreground/i); + // Default execution mode was flipped from foreground to background so the + // main Claude loop polls /codex:events instead of blocking on a synchronous + // Bash call when Codex stalls. See feat/event-stream-foundation. + assert.match(rescue, /default to background/i); + assert.match(rescue, /poll progress with `\/codex:events /i); assert.match(rescue, /Do not forward them to `task`/i); assert.match(rescue, /`--model` and `--effort` are runtime-selection flags/i); assert.match(rescue, /Leave `--effort` unset unless the user explicitly asks for a specific reasoning effort/i); @@ -126,8 +136,13 @@ test("rescue command absorbs continue semantics", () => { assert.match(agent, /--resume/); assert.match(agent, /--fresh/); assert.match(agent, /thin forwarding wrapper/i); - assert.match(agent, /prefer foreground for a small, clearly bounded rescue request/i); - assert.match(agent, /If the user did not explicitly choose `--background` or `--wait` and the task looks complicated, open-ended, multi-step, or likely to keep Codex running for a long time, prefer background execution/i); + // Default execution mode is now background (see feat/event-stream-foundation): + // foreground rescue blocks the main Claude loop when codex stalls, which is + // exactly the deadlock the per-job event stream was added to break. + assert.match(agent, /Default to background execution/i); + assert.match(agent, /append `--background`/i); + assert.match(agent, /monitor progress via `\/codex:events/i); + assert.match(agent, /Honor `--wait` only when the user explicitly asks for foreground/i); assert.match(agent, /Use exactly one `Bash` call/i); assert.match(agent, /Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own/i); assert.match(agent, /Do not call `review`, `adversarial-review`, `status`, `result`, or `cancel`/i); From f9f36fb01669cf56403bbd940f641e07a2b3cdcb Mon Sep 17 00:00:00 2001 From: bit-star Date: Tue, 19 May 2026 17:28:55 +0800 Subject: [PATCH 7/7] chore(release): bump plugin version to 1.1.0 + CHANGELOG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cuts the minor-version release for the observability rework (feat/event-stream-foundation). Adds /codex:events, /codex:compact, rescue defaults to --background, stall watchdog, terminal job/exited events, top-level token usage, and broader codex CLI 0.131 notification coverage. No breaking changes — all existing commands keep the same signatures and outputs; the new event stream is additive. --- .claude-plugin/marketplace.json | 4 +-- package-lock.json | 4 +-- package.json | 2 +- plugins/codex/.claude-plugin/plugin.json | 2 +- plugins/codex/CHANGELOG.md | 42 ++++++++++++++++++++++++ 5 files changed, 48 insertions(+), 6 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index ec617987..b6caf144 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -5,13 +5,13 @@ }, "metadata": { "description": "Codex plugins to use in Claude Code for delegation and code review.", - "version": "1.0.4" + "version": "1.1.0" }, "plugins": [ { "name": "codex", "description": "Use Codex from Claude Code to review code or delegate tasks.", - "version": "1.0.4", + "version": "1.1.0", "author": { "name": "OpenAI" }, diff --git a/package-lock.json b/package-lock.json index 82d04a25..3640e02f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@openai/codex-plugin-cc", - "version": "1.0.4", + "version": "1.1.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@openai/codex-plugin-cc", - "version": "1.0.4", + "version": "1.1.0", "license": "Apache-2.0", "devDependencies": { "@types/node": "^25.5.0", diff --git a/package.json b/package.json index 833fd51c..292041a1 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@openai/codex-plugin-cc", - "version": "1.0.4", + "version": "1.1.0", "private": true, "type": "module", "description": "Use Codex from Claude Code to review code or delegate tasks.", diff --git a/plugins/codex/.claude-plugin/plugin.json b/plugins/codex/.claude-plugin/plugin.json index da262028..4838522f 100644 --- a/plugins/codex/.claude-plugin/plugin.json +++ b/plugins/codex/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "codex", - "version": "1.0.4", + "version": "1.1.0", "description": "Use Codex from Claude Code to review code or delegate tasks.", "author": { "name": "OpenAI" diff --git a/plugins/codex/CHANGELOG.md b/plugins/codex/CHANGELOG.md index d647561b..73c52574 100644 --- a/plugins/codex/CHANGELOG.md +++ b/plugins/codex/CHANGELOG.md @@ -1,5 +1,47 @@ # Changelog +## 1.1.0 + +Observability rework for the main-loop orchestration case: the consumer of +"what is codex doing right now" is the calling Claude session, not a human +dashboard. Adds a per-job NDJSON event stream the main loop can poll, plus +a protocol-native recovery path for context overflow. + +- `/codex:events `: new slash command. Streams normalized codex + notifications from `{stateDir}/jobs/{jobId}.events.ndjson`. Supports + `--since ` / `--after-seq ` / `--limit ` / `--json` for + incremental polling. Each event carries `seq`, `ts`, `method`, `phase`, + `itemType`, `message`, and the raw payload. +- `/codex:compact `: new slash command wrapping codex + app-server's `thread/compact/start`. Protocol-native recovery for + "prompt too long" — typical flow is cancel → compact → resume with an + amended prompt via `/codex:rescue --resume`. +- `/codex:rescue` now defaults to `--background`. The main Claude loop + receives a job id immediately and polls `/codex:events` instead of + blocking on a synchronous Bash call; this removes the deadlock when + codex stalls or errors silently. +- Per-job stall watchdog (60s default, override via + `CODEX_COMPANION_STALL_SECONDS`) emits a `{type:"watchdog", + phase:"stuck"}` event when codex produces no new notifications inside + the window. The watchdog never cancels — the main loop decides whether + to continue, compact, or cancel. +- New `{type:"job/exited"}` terminal event with `phase: completed|failed` + and `exitCode`. This is the single source of truth for end-of-job; + callers should not infer terminal state from job-level `status` alone. +- Surfaces token usage as a top-level field on `runAppServerTurn` and + streams real-time usage via `thread/tokenUsage/updated` events + (`phase: "metering"`). +- Coverage of codex CLI 0.131 notification methods extended to + `thread/status/changed`, `warning`, `thread/tokenUsage/updated`, plus + item types `userMessage`, `assistantMessage`/`agentMessage`, and + `reasoning`. The `agentMessage` item now surfaces a content preview so + the main loop can recognize codex's final reply from the event stream + without fetching `/codex:result`. +- Test isolation: `tests/helpers.mjs` now unsets `CLAUDE_PLUGIN_DATA` and + `CODEX_COMPANION_SESSION_ID` at module load. Plugin host runtimes (e.g. + Claude Code) inject these vars; without isolation, two existing tests + fail when contributors run `npm test` from inside a host. + ## 1.0.0 - Initial version of the Codex plugin for Claude Code