From 5e8100a5f42bd98f24228f64072d945fc565bb87 Mon Sep 17 00:00:00 2001
From: bit-star <bit@gie.edu.kg>
Date: Tue, 19 May 2026 15:59:24 +0800
Subject: [PATCH 1/7] test: isolate from host-runtime plugin env vars
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Host plugin runtimes (e.g. Claude Code) inject CLAUDE_PLUGIN_DATA and
CODEX_COMPANION_SESSION_ID into the plugin process. When these leak into
`npm test`, two tests fail with non-bug assertions:

- tests/state.test.mjs: "resolveStateDir uses a temp-backed per-workspace
  directory" — CLAUDE_PLUGIN_DATA redirects state to ~/.claude/plugins/data
  instead of os.tmpdir().
- tests/runtime.test.mjs: "result without a job id ..." — fixture writer
  and the spawned `result` subprocess look at different state dirs.

CI passes (no host-runtime env), so this only bites contributors running
`npm test` from inside a plugin host. Fix: delete these env vars at the top
of tests/helpers.mjs (the shared import) so every test gets a clean slate.
Tests that exercise the env var explicitly ("resolveStateDir uses
CLAUDE_PLUGIN_DATA when it is provided") already use try/finally to set and
restore it, so they continue to pass.

Verified: 86/86 tests pass with the polluting env vars still set in the
parent shell.
---
 tests/helpers.mjs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/helpers.mjs b/tests/helpers.mjs
index 945ae0e7..926b4cd9 100644
--- a/tests/helpers.mjs
+++ b/tests/helpers.mjs
@@ -4,6 +4,14 @@ import path from "node:path";
 import process from "node:process";
 import { spawnSync } from "node:child_process";
 
+// Isolate tests from host-runtime plugin env vars. Claude Code (and similar
+// plugin hosts) injects CLAUDE_PLUGIN_DATA and CODEX_COMPANION_SESSION_ID into
+// the plugin process; if those leak into `npm test`, fallback-path assertions
+// (e.g. `resolveStateDir uses a temp-backed per-workspace directory`) fail.
+// Tests that need these vars set them explicitly with try/finally.
+delete process.env.CLAUDE_PLUGIN_DATA;
+delete process.env.CODEX_COMPANION_SESSION_ID;
+
 export function makeTempDir(prefix = "codex-plugin-test-") {
   return fs.mkdtempSync(path.join(os.tmpdir(), prefix));
 }

From c30b6647aee6610be2a07c80b60d4a188ff78fa1 Mon Sep 17 00:00:00 2001
From: bit-star <bit@gie.edu.kg>
Date: Tue, 19 May 2026 16:09:02 +0800
Subject: [PATCH 2/7] feat(state): add per-job NDJSON event stream API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add resolveJobEventsFile / appendJobEvent / readJobEvents for the upcoming
Claude-main-loop observability work (see DESIGN doc at user planning area).

Contract:
- Per-job events file at {stateDir}/jobs/{jobId}.events.ndjson.
- appendJobEvent caps each line at 4KB. POSIX `write(fd, buf, n)` with
  O_APPEND is atomic when n < PIPE_BUF (~4096 on Linux/macOS), so single-
  write append is safe across concurrent readers. Oversized lines first
  truncate the `raw` field; if still too big, the whole event is replaced
  with a minimal {type: "oversize-event-elided", seq, ts, method, phase}.
- readJobEvents returns [] for missing files. Partial last line (writer
  mid-write before the trailing \n) is tolerated — JSON.parse failure on
  the trailing line is skipped, next read picks it up. afterSeq takes
  precedence over since when both are passed.

This commit is consumer-less; the producer side (captureTurn onNotification
hook) lands in the next commit so this slice can ship green independently.

Tests: 10 new unit tests covering path resolution, append+read, missing
file, afterSeq filter, since filter, afterSeq precedence over since, limit,
partial-line tolerance, raw truncation, and oversize elision. 96/96 pass.
---
 plugins/codex/scripts/lib/state.mjs |  79 ++++++++++++++
 tests/state.test.mjs                | 160 +++++++++++++++++++++++++++-
 2 files changed, 238 insertions(+), 1 deletion(-)

diff --git a/plugins/codex/scripts/lib/state.mjs b/plugins/codex/scripts/lib/state.mjs
index 2da23498..38b22bc1 100644
--- a/plugins/codex/scripts/lib/state.mjs
+++ b/plugins/codex/scripts/lib/state.mjs
@@ -189,3 +189,82 @@ export function resolveJobFile(cwd, jobId) {
   ensureStateDir(cwd);
   return path.join(resolveJobsDir(cwd), `${jobId}.json`);
 }
+
+// POSIX guarantees `write(fd, buf, n)` with O_APPEND is atomic when `n` is
+// less than PIPE_BUF (typically 4096 on Linux/macOS). Keep one NDJSON line
+// at or below this bound so a concurrent reader can never observe a partial
+// event. Writers truncate the `raw` field (or elide it entirely) to fit.
+const MAX_EVENT_LINE_BYTES = 4096;
+
+export function resolveJobEventsFile(cwd, jobId) {
+  ensureStateDir(cwd);
+  return path.join(resolveJobsDir(cwd), `${jobId}.events.ndjson`);
+}
+
+export function appendJobEvent(cwd, jobId, event) {
+  const eventsFile = resolveJobEventsFile(cwd, jobId);
+  let line = `${JSON.stringify(event)}\n`;
+
+  if (Buffer.byteLength(line, "utf8") > MAX_EVENT_LINE_BYTES && event.raw != null) {
+    const summary =
+      event.raw && typeof event.raw === "object"
+        ? Object.keys(event.raw).slice(0, 8).join(",")
+        : String(event.raw).slice(0, 100);
+    const truncated = { ...event, raw: { truncated: true, summary } };
+    line = `${JSON.stringify(truncated)}\n`;
+  }
+
+  if (Buffer.byteLength(line, "utf8") > MAX_EVENT_LINE_BYTES) {
+    const elided = {
+      seq: event.seq,
+      ts: event.ts,
+      type: "oversize-event-elided",
+      method: event.method ?? null,
+      phase: event.phase ?? null
+    };
+    line = `${JSON.stringify(elided)}\n`;
+  }
+
+  fs.appendFileSync(eventsFile, line, "utf8");
+}
+
+export function readJobEvents(cwd, jobId, options = {}) {
+  const { since = null, afterSeq = null, limit = null } = options;
+  const eventsFile = resolveJobEventsFile(cwd, jobId);
+  if (!fs.existsSync(eventsFile)) {
+    return [];
+  }
+
+  const content = fs.readFileSync(eventsFile, "utf8");
+  const lines = content.split("\n");
+  const lastIndex = lines.length - 1;
+  const events = [];
+
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    if (!line) continue;
+    try {
+      events.push(JSON.parse(line));
+    } catch {
+      // Tolerate a partial last line: writer may be mid-write between
+      // assembling the JSON and writing the trailing newline. Drop it; the
+      // next read picks it up. For any non-trailing parse failure, treat as
+      // skipped corruption (rare; we'd see it again on next read).
+      if (i === lastIndex || (i === lastIndex - 1 && lines[lastIndex] === "")) {
+        continue;
+      }
+      continue;
+    }
+  }
+
+  let filtered = events;
+  if (afterSeq != null) {
+    filtered = filtered.filter((event) => typeof event.seq === "number" && event.seq > afterSeq);
+  } else if (since != null) {
+    filtered = filtered.filter((event) => typeof event.ts === "string" && event.ts > since);
+  }
+  if (limit != null && Number.isFinite(limit)) {
+    filtered = filtered.slice(0, limit);
+  }
+  return filtered;
+}
diff --git a/tests/state.test.mjs b/tests/state.test.mjs
index 0f8f57ce..4e498e47 100644
--- a/tests/state.test.mjs
+++ b/tests/state.test.mjs
@@ -5,7 +5,16 @@ import test from "node:test";
 import assert from "node:assert/strict";
 
 import { makeTempDir } from "./helpers.mjs";
-import { resolveJobFile, resolveJobLogFile, resolveStateDir, resolveStateFile, saveState } from "../plugins/codex/scripts/lib/state.mjs";
+import {
+  appendJobEvent,
+  readJobEvents,
+  resolveJobEventsFile,
+  resolveJobFile,
+  resolveJobLogFile,
+  resolveStateDir,
+  resolveStateFile,
+  saveState
+} from "../plugins/codex/scripts/lib/state.mjs";
 
 test("resolveStateDir uses a temp-backed per-workspace directory", () => {
   const workspace = makeTempDir();
@@ -103,3 +112,152 @@ test("saveState prunes dropped job artifacts when indexed jobs exceed the cap",
       .sort()
   );
 });
+
+test("resolveJobEventsFile returns a .events.ndjson under the jobs dir", () => {
+  const workspace = makeTempDir();
+  const eventsFile = resolveJobEventsFile(workspace, "job-evt-1");
+
+  assert.match(path.basename(eventsFile), /^job-evt-1\.events\.ndjson$/);
+  assert.equal(path.dirname(eventsFile), path.join(resolveStateDir(workspace), "jobs"));
+});
+
+test("appendJobEvent creates the file and readJobEvents returns parsed events", () => {
+  const workspace = makeTempDir();
+  const jobId = "job-evt-2";
+
+  appendJobEvent(workspace, jobId, { seq: 0, ts: "2026-01-01T00:00:00.000Z", phase: "starting" });
+  appendJobEvent(workspace, jobId, { seq: 1, ts: "2026-01-01T00:00:01.000Z", phase: "thinking" });
+
+  const events = readJobEvents(workspace, jobId);
+  assert.equal(events.length, 2);
+  assert.equal(events[0].seq, 0);
+  assert.equal(events[0].phase, "starting");
+  assert.equal(events[1].phase, "thinking");
+});
+
+test("readJobEvents returns [] when the file does not exist", () => {
+  const workspace = makeTempDir();
+  const events = readJobEvents(workspace, "never-written");
+  assert.deepEqual(events, []);
+});
+
+test("readJobEvents afterSeq skips events with seq <= afterSeq", () => {
+  const workspace = makeTempDir();
+  const jobId = "job-evt-3";
+  for (let i = 0; i < 5; i++) {
+    appendJobEvent(workspace, jobId, { seq: i, ts: `2026-01-01T00:00:0${i}.000Z`, phase: "p" });
+  }
+  const events = readJobEvents(workspace, jobId, { afterSeq: 2 });
+  assert.deepEqual(
+    events.map((event) => event.seq),
+    [3, 4]
+  );
+});
+
+test("readJobEvents since filters by ISO timestamp string", () => {
+  const workspace = makeTempDir();
+  const jobId = "job-evt-4";
+  appendJobEvent(workspace, jobId, { seq: 0, ts: "2026-01-01T00:00:00.000Z", phase: "p" });
+  appendJobEvent(workspace, jobId, { seq: 1, ts: "2026-01-01T00:00:05.000Z", phase: "p" });
+  appendJobEvent(workspace, jobId, { seq: 2, ts: "2026-01-01T00:00:10.000Z", phase: "p" });
+
+  const events = readJobEvents(workspace, jobId, { since: "2026-01-01T00:00:03.000Z" });
+  assert.deepEqual(
+    events.map((event) => event.seq),
+    [1, 2]
+  );
+});
+
+test("readJobEvents afterSeq takes precedence over since", () => {
+  const workspace = makeTempDir();
+  const jobId = "job-evt-5";
+  appendJobEvent(workspace, jobId, { seq: 0, ts: "2026-01-01T00:00:00.000Z" });
+  appendJobEvent(workspace, jobId, { seq: 1, ts: "2026-01-01T00:00:05.000Z" });
+  appendJobEvent(workspace, jobId, { seq: 2, ts: "2026-01-01T00:00:10.000Z" });
+
+  // since alone would return [seq=1, seq=2]; afterSeq=1 narrows to [seq=2].
+  const events = readJobEvents(workspace, jobId, {
+    afterSeq: 1,
+    since: "2026-01-01T00:00:00.000Z"
+  });
+  assert.deepEqual(
+    events.map((event) => event.seq),
+    [2]
+  );
+});
+
+test("readJobEvents limit caps the returned array", () => {
+  const workspace = makeTempDir();
+  const jobId = "job-evt-6";
+  for (let i = 0; i < 10; i++) {
+    appendJobEvent(workspace, jobId, { seq: i, ts: "2026-01-01T00:00:00.000Z" });
+  }
+  const events = readJobEvents(workspace, jobId, { limit: 3 });
+  assert.equal(events.length, 3);
+  assert.deepEqual(
+    events.map((event) => event.seq),
+    [0, 1, 2]
+  );
+});
+
+test("readJobEvents tolerates a partial last line (mid-write)", () => {
+  const workspace = makeTempDir();
+  const jobId = "job-evt-7";
+  appendJobEvent(workspace, jobId, { seq: 0, ts: "2026-01-01T00:00:00.000Z", phase: "p" });
+
+  // Simulate a writer in the middle of emitting a second event:
+  // the JSON has been written but the trailing newline has not.
+  const eventsFile = resolveJobEventsFile(workspace, jobId);
+  fs.appendFileSync(eventsFile, '{"seq":1,"ts":"2026-01-01T00:00:01.000Z","phase":"thinking"', "utf8");
+
+  const events = readJobEvents(workspace, jobId);
+  assert.equal(events.length, 1, "partial last line must be skipped, not throw");
+  assert.equal(events[0].seq, 0);
+});
+
+test("appendJobEvent truncates raw field when single line would exceed 4KB", () => {
+  const workspace = makeTempDir();
+  const jobId = "job-evt-8";
+  const bigRaw = { huge: "x".repeat(5000), other: "field" };
+
+  appendJobEvent(workspace, jobId, {
+    seq: 0,
+    ts: "2026-01-01T00:00:00.000Z",
+    phase: "tool:bash",
+    raw: bigRaw
+  });
+
+  const events = readJobEvents(workspace, jobId);
+  assert.equal(events.length, 1);
+  assert.equal(events[0].seq, 0);
+  assert.equal(events[0].phase, "tool:bash");
+  assert.equal(events[0].raw.truncated, true);
+  assert.match(events[0].raw.summary, /huge|other/);
+
+  const eventsFile = resolveJobEventsFile(workspace, jobId);
+  const bytes = fs.statSync(eventsFile).size;
+  assert.ok(bytes <= 4096, `line should fit in 4KB, got ${bytes} bytes`);
+});
+
+test("appendJobEvent elides event entirely when even truncated raw is too big", () => {
+  const workspace = makeTempDir();
+  const jobId = "job-evt-9";
+  const massiveMessage = "y".repeat(5000);
+
+  // message itself oversize, raw not the culprit
+  appendJobEvent(workspace, jobId, {
+    seq: 0,
+    ts: "2026-01-01T00:00:00.000Z",
+    method: "turn/started",
+    phase: "tool:bash",
+    message: massiveMessage,
+    raw: null
+  });
+
+  const events = readJobEvents(workspace, jobId);
+  assert.equal(events.length, 1);
+  assert.equal(events[0].type, "oversize-event-elided");
+  assert.equal(events[0].seq, 0);
+  assert.equal(events[0].method, "turn/started");
+  assert.equal(events[0].phase, "tool:bash");
+});

From a3f41811878698c2d4fdd0d63ed5983f55f18bbe Mon Sep 17 00:00:00 2001
From: bit-star <bit@gie.edu.kg>
Date: Tue, 19 May 2026 16:14:38 +0800
Subject: [PATCH 3/7] feat(codex): add notification stream hook + normalize +
 surface usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three changes to codex.mjs, building on the per-job NDJSON event API:

1. New exported `normalizeNotification(state, message)` function — pure
   transform from app-server notifications to the flat event shape that
   gets appended to {jobId}.events.ndjson. Covers thread/started,
   thread/name/updated, turn/started, item/started, item/completed,
   error, turn/completed, and an "unknown" fallback for forward-compat
   methods (e.g. thread/compact/started). Phase inference reuses the
   existing describeStartedItem / describeCompletedItem maps so the
   vocabulary stays consistent with on-screen progress text.

2. `captureTurn` accepts an `options.onNotification` callback. A small
   `dispatch(message)` wrapper applies state mutation first, then emits
   the normalized event. Order is load-bearing: normalize reads
   state.threadTurnIds populated by applyTurnNotification. onNotification
   errors are swallowed so a broken consumer can never crash the worker.

3. `runAppServerTurn` forwards `options.onNotification` to captureTurn
   and surfaces `usage` (from turnState.finalTurn?.usage) as a top-level
   field on the result, so /codex:status and the events stream can read
   token usage without traversing the nested `turn` payload.

Tests: 13 new unit tests covering each method branch, phase inference
edge cases (commandExecution -> running vs verifying), unknown-method
fallback, and malformed-input resilience. 109/109 pass.

No behavioral change for callers that don't pass onNotification — the
hook is fully opt-in. The companion-side wiring lands separately.
---
 plugins/codex/scripts/lib/codex.mjs   | 151 +++++++++++++++++++-
 tests/normalize-notification.test.mjs | 189 ++++++++++++++++++++++++++
 2 files changed, 336 insertions(+), 4 deletions(-)
 create mode 100644 tests/normalize-notification.test.mjs

diff --git a/plugins/codex/scripts/lib/codex.mjs b/plugins/codex/scripts/lib/codex.mjs
index f2fe88bd..727310a2 100644
--- a/plugins/codex/scripts/lib/codex.mjs
+++ b/plugins/codex/scripts/lib/codex.mjs
@@ -481,6 +481,126 @@ function recordItem(state, item, lifecycle, threadId = null) {
   }
 }
 
+/**
+ * Map an app-server notification to a flat, append-friendly event record for
+ * the per-job NDJSON event stream (see state.mjs:appendJobEvent). The Claude
+ * main loop polls `/codex:events <job-id>` and reasons over these records —
+ * so the shape must stay stable and self-describing. `seq` is injected by
+ * the appender; this function is pure and does not allocate sequence numbers.
+ */
+export function normalizeNotification(state, message) {
+  const ts = new Date().toISOString();
+  const method = message.method ?? null;
+  const params = message.params ?? {};
+
+  switch (method) {
+    case "thread/started":
+      return {
+        ts,
+        method,
+        threadId: params.thread?.id ?? null,
+        turnId: null,
+        itemType: null,
+        lifecycle: null,
+        phase: "starting",
+        message: `Thread started (${params.thread?.id ?? "?"}).`,
+        raw: params
+      };
+    case "thread/name/updated":
+      return {
+        ts,
+        method,
+        threadId: params.threadId ?? null,
+        turnId: null,
+        itemType: null,
+        lifecycle: null,
+        phase: "starting",
+        message: `Thread renamed: ${params.threadName ?? "?"}`,
+        raw: params
+      };
+    case "turn/started":
+      return {
+        ts,
+        method,
+        threadId: params.threadId ?? null,
+        turnId: params.turn?.id ?? null,
+        itemType: null,
+        lifecycle: null,
+        phase: "thinking",
+        message: `Turn started (${params.turn?.id ?? "?"}).`,
+        raw: params
+      };
+    case "item/started": {
+      const item = params.item ?? {};
+      const description = describeStartedItem(state, item);
+      return {
+        ts,
+        method,
+        threadId: params.threadId ?? null,
+        turnId: state.threadTurnIds.get(params.threadId ?? state.threadId) ?? null,
+        itemType: item.type ?? null,
+        lifecycle: "started",
+        phase: description?.phase ?? "running",
+        message: description?.message ?? `Item started: ${item.type ?? "?"}`,
+        raw: item
+      };
+    }
+    case "item/completed": {
+      const item = params.item ?? {};
+      const description = describeCompletedItem(state, item);
+      return {
+        ts,
+        method,
+        threadId: params.threadId ?? null,
+        turnId: state.threadTurnIds.get(params.threadId ?? state.threadId) ?? null,
+        itemType: item.type ?? null,
+        lifecycle: "completed",
+        phase: description?.phase ?? "running",
+        message: description?.message ?? `Item completed: ${item.type ?? "?"}`,
+        raw: item
+      };
+    }
+    case "error":
+      return {
+        ts,
+        method,
+        threadId: params.threadId ?? null,
+        turnId: null,
+        itemType: null,
+        lifecycle: null,
+        phase: "failed",
+        message: `Codex error: ${params.error?.message ?? "unknown"}`,
+        raw: params.error ?? null
+      };
+    case "turn/completed": {
+      const turnStatus = params.turn?.status ?? "completed";
+      return {
+        ts,
+        method,
+        threadId: params.threadId ?? null,
+        turnId: params.turn?.id ?? null,
+        itemType: null,
+        lifecycle: null,
+        phase: turnStatus === "completed" ? "completed" : "finalizing",
+        message: `Turn ${turnStatus}.`,
+        raw: params.turn ?? null
+      };
+    }
+    default:
+      return {
+        ts,
+        method,
+        threadId: params.threadId ?? params.thread?.id ?? null,
+        turnId: params.turn?.id ?? null,
+        itemType: null,
+        lifecycle: null,
+        phase: "unknown",
+        message: `${method ?? "unknown"} notification`,
+        raw: params
+      };
+  }
+}
+
 function applyTurnNotification(state, message) {
   switch (message.method) {
     case "thread/started":
@@ -553,6 +673,22 @@ function applyTurnNotification(state, message) {
 async function captureTurn(client, threadId, startRequest, options = {}) {
   const state = createTurnCaptureState(threadId, options);
   const previousHandler = client.notificationHandler;
+  const { onNotification } = options;
+
+  // Apply notification to state THEN emit normalized event. Order matters:
+  // normalize reads state.threadTurnIds / threadLabels that applyTurnNotification
+  // populates. onNotification errors are swallowed so a broken consumer can't
+  // crash the worker (events are observability, not control flow).
+  const dispatch = (message) => {
+    applyTurnNotification(state, message);
+    if (onNotification) {
+      try {
+        onNotification(normalizeNotification(state, message));
+      } catch {
+        // intentionally ignored
+      }
+    }
+  };
 
   client.setNotificationHandler((message) => {
     if (!state.turnId) {
@@ -561,7 +697,7 @@ async function captureTurn(client, threadId, startRequest, options = {}) {
     }
 
     if (message.method === "thread/started" || message.method === "thread/name/updated") {
-      applyTurnNotification(state, message);
+      dispatch(message);
       return;
     }
 
@@ -572,7 +708,7 @@ async function captureTurn(client, threadId, startRequest, options = {}) {
         return;
     }
 
-    applyTurnNotification(state, message);
+    dispatch(message);
   });
 
   try {
@@ -584,7 +720,7 @@ async function captureTurn(client, threadId, startRequest, options = {}) {
     }
     for (const message of state.bufferedNotifications) {
       if (belongsToTurn(state, message)) {
-        applyTurnNotification(state, message);
+        dispatch(message);
       } else {
         if (previousHandler) {
           previousHandler(message);
@@ -1009,7 +1145,10 @@ export async function runAppServerTurn(cwd, options = {}) {
           effort: options.effort ?? null,
           outputSchema: options.outputSchema ?? null
         }),
-      { onProgress: options.onProgress }
+      {
+        onProgress: options.onProgress,
+        onNotification: options.onNotification
+      }
     );
 
     return {
@@ -1019,6 +1158,10 @@ export async function runAppServerTurn(cwd, options = {}) {
       finalMessage: turnState.lastAgentMessage,
       reasoningSummary: turnState.reasoningSummary,
       turn: turnState.finalTurn,
+      // Top-level surface so /codex:status and the events stream can read
+      // usage without traversing the nested `turn` object. May be null when
+      // the upstream codex CLI version does not report usage.
+      usage: turnState.finalTurn?.usage ?? null,
       error: turnState.error,
       stderr: cleanCodexStderr(client.stderr),
       fileChanges: turnState.fileChanges,
diff --git a/tests/normalize-notification.test.mjs b/tests/normalize-notification.test.mjs
new file mode 100644
index 00000000..153cba86
--- /dev/null
+++ b/tests/normalize-notification.test.mjs
@@ -0,0 +1,189 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import "./helpers.mjs"; // run shared env-isolation side effect
+import { normalizeNotification } from "../plugins/codex/scripts/lib/codex.mjs";
+
+function makeState(overrides = {}) {
+  return {
+    threadId: "thr_main",
+    threadTurnIds: new Map(),
+    threadLabels: new Map(),
+    ...overrides
+  };
+}
+
+test("normalizeNotification: thread/started carries thread id and starting phase", () => {
+  const state = makeState();
+  const event = normalizeNotification(state, {
+    method: "thread/started",
+    params: { thread: { id: "thr_xyz", name: "rescue task" } }
+  });
+
+  assert.equal(event.method, "thread/started");
+  assert.equal(event.threadId, "thr_xyz");
+  assert.equal(event.phase, "starting");
+  assert.equal(event.turnId, null);
+  assert.equal(event.itemType, null);
+  assert.equal(event.lifecycle, null);
+  assert.match(event.message, /Thread started/);
+  assert.match(event.ts, /^\d{4}-\d{2}-\d{2}T/);
+});
+
+test("normalizeNotification: turn/started captures turnId and 'thinking' phase", () => {
+  const state = makeState();
+  const event = normalizeNotification(state, {
+    method: "turn/started",
+    params: { threadId: "thr_main", turn: { id: "trn_001" } }
+  });
+
+  assert.equal(event.method, "turn/started");
+  assert.equal(event.threadId, "thr_main");
+  assert.equal(event.turnId, "trn_001");
+  assert.equal(event.phase, "thinking");
+});
+
+test("normalizeNotification: item/started commandExecution maps to running phase", () => {
+  const state = makeState();
+  state.threadTurnIds.set("thr_main", "trn_001");
+
+  const event = normalizeNotification(state, {
+    method: "item/started",
+    params: {
+      threadId: "thr_main",
+      item: { type: "commandExecution", command: "git status" }
+    }
+  });
+
+  assert.equal(event.method, "item/started");
+  assert.equal(event.itemType, "commandExecution");
+  assert.equal(event.lifecycle, "started");
+  assert.equal(event.turnId, "trn_001");
+  assert.equal(event.phase, "running");
+  assert.match(event.message, /Running command: git status/);
+});
+
+test("normalizeNotification: item/started commandExecution detects verifying phase for test commands", () => {
+  const state = makeState();
+  const event = normalizeNotification(state, {
+    method: "item/started",
+    params: {
+      threadId: "thr_main",
+      item: { type: "commandExecution", command: "npm test" }
+    }
+  });
+
+  // looksLikeVerificationCommand recognizes test/check/lint patterns
+  assert.equal(event.phase, "verifying");
+});
+
+test("normalizeNotification: item/started fileChange maps to editing phase", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "item/started",
+    params: {
+      threadId: "thr_main",
+      item: { type: "fileChange", changes: [{}, {}, {}] }
+    }
+  });
+
+  assert.equal(event.itemType, "fileChange");
+  assert.equal(event.phase, "editing");
+  assert.match(event.message, /3 file change/);
+});
+
+test("normalizeNotification: item/started mcpToolCall maps to investigating phase", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "item/started",
+    params: {
+      threadId: "thr_main",
+      item: { type: "mcpToolCall", server: "context7", tool: "resolve-library-id" }
+    }
+  });
+
+  assert.equal(event.itemType, "mcpToolCall");
+  assert.equal(event.phase, "investigating");
+  assert.match(event.message, /context7\/resolve-library-id/);
+});
+
+test("normalizeNotification: item/started webSearch maps to investigating phase", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "item/started",
+    params: {
+      threadId: "thr_main",
+      item: { type: "webSearch", query: "claude code agent sdk" }
+    }
+  });
+
+  assert.equal(event.itemType, "webSearch");
+  assert.equal(event.phase, "investigating");
+  assert.match(event.message, /Searching/);
+});
+
+test("normalizeNotification: item/completed carries completed lifecycle + exit code", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "item/completed",
+    params: {
+      threadId: "thr_main",
+      item: { type: "commandExecution", command: "echo hi", status: "completed", exitCode: 0 }
+    }
+  });
+
+  assert.equal(event.method, "item/completed");
+  assert.equal(event.lifecycle, "completed");
+  assert.equal(event.itemType, "commandExecution");
+  assert.match(event.message, /Command completed/);
+  assert.match(event.message, /exit 0/);
+});
+
+test("normalizeNotification: turn/completed with status=completed yields 'completed' phase", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "turn/completed",
+    params: { threadId: "thr_main", turn: { id: "trn_001", status: "completed" } }
+  });
+
+  assert.equal(event.method, "turn/completed");
+  assert.equal(event.phase, "completed");
+  assert.equal(event.turnId, "trn_001");
+});
+
+test("normalizeNotification: turn/completed with non-completed status yields 'finalizing' phase", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "turn/completed",
+    params: { threadId: "thr_main", turn: { id: "trn_001", status: "cancelled" } }
+  });
+
+  assert.equal(event.phase, "finalizing");
+  assert.match(event.message, /cancelled/);
+});
+
+test("normalizeNotification: error notification yields 'failed' phase", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "error",
+    params: { error: { message: "context length exceeded", code: "context_overflow" } }
+  });
+
+  assert.equal(event.method, "error");
+  assert.equal(event.phase, "failed");
+  assert.match(event.message, /context length exceeded/);
+  assert.equal(event.raw.code, "context_overflow");
+});
+
+test("normalizeNotification: unknown method falls back to 'unknown' phase without throwing", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "thread/compact/started",
+    params: { threadId: "thr_main" }
+  });
+
+  assert.equal(event.method, "thread/compact/started");
+  assert.equal(event.phase, "unknown");
+  assert.equal(event.threadId, "thr_main");
+});
+
+test("normalizeNotification: handles malformed message without crashing", () => {
+  // No method, no params, weird shape — should still return a valid record
+  const event = normalizeNotification(makeState(), {});
+
+  assert.equal(event.method, null);
+  assert.equal(event.phase, "unknown");
+  assert.match(event.ts, /^\d{4}-\d{2}-\d{2}T/);
+});

From 9332c2962c06b5ff344e3362e6061b995ac9a63e Mon Sep 17 00:00:00 2001
From: bit-star <bit@gie.edu.kg>
Date: Tue, 19 May 2026 16:23:49 +0800
Subject: [PATCH 4/7] feat(companion): wire per-job event stream + stall
 watchdog + events cmd
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

End-to-end wiring of the observability path landed in the prior two commits
(state.mjs events API + codex.mjs notification hook).

In codex-companion.mjs:

- executeTaskRun: forward `onNotification` through to runAppServerTurn.
  Sync foreground callers still don't pass one; only the background
  task-worker path opts in.

- handleTaskWorker: build a same-process closure that
  (a) appends every normalized notification to {jobId}.events.ndjson
      with a monotonic per-job seq number;
  (b) reflects phase transitions into state.json via upsertJob (only on
      real phase change, to keep the unlocked single-flight writer rare);
  (c) runs a 5s stall watchdog — when the gap since the last event
      exceeds CODEX_COMPANION_STALL_SECONDS (default 60s), it emits one
      {type:"watchdog", phase:"stuck"} record and flips job phase to
      stuck. It does NOT cancel. Main-loop Claude decides what to do
      next (continue / compact / cancel).
  (d) on terminal exit (success OR failure), emits a single
      {type:"job/exited", phase:"completed"|"failed", exitCode, errorMessage}
      record so a polling reader can distinguish "still slow" from
      "already finished". The job.status state.json field is no longer
      the sole source of truth — main-loop Claude must look for the
      job/exited event.
  Observability errors are swallowed; they must never crash the worker.
  Watchdog interval is unref'd so it can't keep the event loop alive.

- New `events <job-id> [--since|--after-seq|--limit] [--json]` subcommand
  delegating to readJobEvents. Default output is human-readable lines;
  --json returns {jobId, eventsFile, count, events}. After-seq + since
  semantics match readJobEvents (after-seq takes precedence). printUsage
  updated.

Tests: 7 integration tests spawning the real CLI to verify the events
command's empty-result, append-and-read, after-seq filtering, limit
capping, human-readable rendering, and missing-jobId usage paths.
116/116 pass.

The stall threshold is configurable via env var for now
(CODEX_COMPANION_STALL_SECONDS). A `task --max-stall-seconds <N>` CLI
flag is deferred to a later commit so handleTask's option parser stays
untouched in this slice.
---
 plugins/codex/scripts/codex-companion.mjs | 180 ++++++++++++++++++++--
 tests/events-command.test.mjs             | 121 +++++++++++++++
 2 files changed, 285 insertions(+), 16 deletions(-)
 create mode 100644 tests/events-command.test.mjs

diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs
index 35222fd5..09016ef4 100644
--- a/plugins/codex/scripts/codex-companion.mjs
+++ b/plugins/codex/scripts/codex-companion.mjs
@@ -25,9 +25,12 @@ import { collectReviewContext, ensureGitRepository, resolveReviewTarget } from "
 import { binaryAvailable, terminateProcessTree } from "./lib/process.mjs";
 import { loadPromptTemplate, interpolateTemplate } from "./lib/prompts.mjs";
 import {
+  appendJobEvent,
   generateJobId,
   getConfig,
   listJobs,
+  readJobEvents,
+  resolveJobEventsFile,
   setConfig,
   upsertJob,
   writeJobFile
@@ -78,6 +81,7 @@ function printUsage() {
       "  node scripts/codex-companion.mjs review [--wait|--background] [--base <ref>] [--scope <auto|working-tree|branch>]",
       "  node scripts/codex-companion.mjs adversarial-review [--wait|--background] [--base <ref>] [--scope <auto|working-tree|branch>] [focus text]",
       "  node scripts/codex-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model <model|spark>] [--effort <none|minimal|low|medium|high|xhigh>] [prompt]",
+      "  node scripts/codex-companion.mjs events <job-id> [--since <iso>] [--after-seq <n>] [--limit <n>] [--json]",
       "  node scripts/codex-companion.mjs status [job-id] [--all] [--json]",
       "  node scripts/codex-companion.mjs result [job-id] [--json]",
       "  node scripts/codex-companion.mjs cancel [job-id] [--json]"
@@ -487,6 +491,7 @@ async function executeTaskRun(request) {
     effort: request.effort,
     sandbox: request.write ? "workspace-write" : "read-only",
     onProgress: request.onProgress,
+    onNotification: request.onNotification,
     persistThread: true,
     threadName: resumeThreadId ? null : buildPersistentTaskThreadName(request.prompt || DEFAULT_CONTINUE_PROMPT)
   });
@@ -803,14 +808,15 @@ async function handleTaskWorker(argv) {
 
   const cwd = resolveCommandCwd(options);
   const workspaceRoot = resolveCommandWorkspace(options);
-  const storedJob = readStoredJob(workspaceRoot, options["job-id"]);
+  const jobId = options["job-id"];
+  const storedJob = readStoredJob(workspaceRoot, jobId);
   if (!storedJob) {
-    throw new Error(`No stored job found for ${options["job-id"]}.`);
+    throw new Error(`No stored job found for ${jobId}.`);
   }
 
   const request = storedJob.request;
   if (!request || typeof request !== "object") {
-    throw new Error(`Stored job ${options["job-id"]} is missing its task request payload.`);
+    throw new Error(`Stored job ${jobId} is missing its task request payload.`);
   }
 
   const { logFile, progress } = createTrackedProgress(
@@ -822,19 +828,158 @@ async function handleTaskWorker(argv) {
       logFile: storedJob.logFile ?? null
     }
   );
-  await runTrackedJob(
-    {
-      ...storedJob,
-      workspaceRoot,
-      logFile
-    },
-    () =>
-      executeTaskRun({
-        ...request,
-        onProgress: progress
-      }),
-    { logFile }
-  );
+
+  // Per-job event stream wiring. Both `onNotification` and the stall watchdog
+  // share `seq` and `lastEventAt` via this closure — they run in the same
+  // task-worker process, so no cross-process synchronization is needed.
+  let seq = 0;
+  let lastEventAt = Date.now();
+  let lastEmittedPhase = null;
+
+  const stallSecondsRaw = Number(process.env.CODEX_COMPANION_STALL_SECONDS);
+  const stallSeconds = Number.isFinite(stallSecondsRaw) && stallSecondsRaw > 0 ? stallSecondsRaw : 60;
+
+  const onNotification = (event) => {
+    try {
+      appendJobEvent(workspaceRoot, jobId, { seq: seq++, ...event });
+      lastEventAt = Date.now();
+      // Reflect phase transitions into the central job summary so /codex:status
+      // shows the same vocabulary the events stream uses. Only on real phase
+      // change to keep state.json writes rare (no lock; single-flight worker).
+      if (event.phase && event.phase !== lastEmittedPhase) {
+        lastEmittedPhase = event.phase;
+        upsertJob(workspaceRoot, { id: jobId, phase: event.phase, lastEventAt: event.ts });
+      }
+    } catch {
+      // Observability is fire-and-forget; never crash the worker.
+    }
+  };
+
+  // Watchdog: if no meaningful event arrives for `stallSeconds`, emit a
+  // single `{type:"watchdog", phase:"stuck"}` record. We deliberately do
+  // not cancel — main-loop Claude decides what to do (continue / compact /
+  // cancel). Refresh `lastEventAt` after emit so we don't spam.
+  const watchdog = setInterval(() => {
+    const elapsedMs = Date.now() - lastEventAt;
+    if (elapsedMs <= stallSeconds * 1000) return;
+    try {
+      appendJobEvent(workspaceRoot, jobId, {
+        seq: seq++,
+        ts: new Date().toISOString(),
+        type: "watchdog",
+        phase: "stuck",
+        stallMs: elapsedMs,
+        since: new Date(lastEventAt).toISOString()
+      });
+      upsertJob(workspaceRoot, { id: jobId, phase: "stuck", lastEventAt: new Date().toISOString() });
+      lastEventAt = Date.now();
+      lastEmittedPhase = "stuck";
+    } catch {
+      // ignore
+    }
+  }, 5000);
+  watchdog.unref?.();
+
+  let completed = false;
+  let workerError = null;
+  try {
+    await runTrackedJob(
+      {
+        ...storedJob,
+        workspaceRoot,
+        logFile
+      },
+      () =>
+        executeTaskRun({
+          ...request,
+          onProgress: progress,
+          onNotification
+        }),
+      { logFile }
+    );
+    completed = true;
+  } catch (error) {
+    workerError = error;
+  } finally {
+    clearInterval(watchdog);
+    // Terminal event so a polling reader can distinguish "still slow" from
+    // "already finished". This is the single source of truth for end-of-job —
+    // main-loop Claude must check for {type:"job/exited"} not just job.status.
+    try {
+      appendJobEvent(workspaceRoot, jobId, {
+        seq: seq++,
+        ts: new Date().toISOString(),
+        type: "job/exited",
+        phase: completed ? "completed" : "failed",
+        exitCode: completed ? 0 : 1,
+        errorMessage: workerError
+          ? workerError instanceof Error
+            ? workerError.message
+            : String(workerError)
+          : null
+      });
+    } catch {
+      // ignore
+    }
+  }
+  if (workerError) throw workerError;
+}
+
+async function handleEvents(argv) {
+  const { options, positionals } = parseCommandInput(argv, {
+    valueOptions: ["cwd", "since", "after-seq", "limit"],
+    booleanOptions: ["json"]
+  });
+
+  const cwd = resolveCommandCwd(options);
+  const workspaceRoot = resolveCommandWorkspace(options);
+  const jobId = positionals[0];
+  if (!jobId) {
+    throw new Error("Usage: events <job-id> [--since <iso>] [--after-seq <n>] [--limit <n>] [--json]");
+  }
+
+  const readOptions = {};
+  if (options["after-seq"] != null) {
+    const parsed = Number(options["after-seq"]);
+    if (!Number.isFinite(parsed)) {
+      throw new Error(`Invalid --after-seq value: ${options["after-seq"]}`);
+    }
+    readOptions.afterSeq = parsed;
+  }
+  if (options.since) {
+    readOptions.since = String(options.since);
+  }
+  if (options.limit != null) {
+    const parsed = Number(options.limit);
+    if (!Number.isFinite(parsed) || parsed < 0) {
+      throw new Error(`Invalid --limit value: ${options.limit}`);
+    }
+    readOptions.limit = parsed;
+  }
+
+  const events = readJobEvents(workspaceRoot, jobId, readOptions);
+  const payload = {
+    jobId,
+    eventsFile: resolveJobEventsFile(workspaceRoot, jobId),
+    count: events.length,
+    events
+  };
+
+  if (options.json) {
+    console.log(JSON.stringify(payload, null, 2));
+    return;
+  }
+
+  if (events.length === 0) {
+    process.stdout.write(`No events yet for ${jobId}.\n`);
+    return;
+  }
+  for (const event of events) {
+    const phase = event.phase ?? "?";
+    const method = event.method ?? event.type ?? "?";
+    const message = event.message ?? "";
+    process.stdout.write(`[${event.ts}] seq=${event.seq} ${method} ${phase} ${message}\n`);
+  }
 }
 
 async function handleStatus(argv) {
@@ -1003,6 +1148,9 @@ async function main() {
     case "task-worker":
       await handleTaskWorker(argv);
       break;
+    case "events":
+      await handleEvents(argv);
+      break;
     case "status":
       await handleStatus(argv);
       break;
diff --git a/tests/events-command.test.mjs b/tests/events-command.test.mjs
new file mode 100644
index 00000000..20cd74e5
--- /dev/null
+++ b/tests/events-command.test.mjs
@@ -0,0 +1,121 @@
+import path from "node:path";
+import test from "node:test";
+import assert from "node:assert/strict";
+import { fileURLToPath } from "node:url";
+
+import { makeTempDir, run } from "./helpers.mjs";
+import { appendJobEvent } from "../plugins/codex/scripts/lib/state.mjs";
+
+const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
+const SCRIPT = path.join(ROOT, "plugins", "codex", "scripts", "codex-companion.mjs");
+
+test("events command without job-id errors with usage", () => {
+  const workspace = makeTempDir();
+  const result = run("node", [SCRIPT, "events"], { cwd: workspace });
+  assert.notEqual(result.status, 0);
+  assert.match(result.stderr, /Usage: events <job-id>/);
+});
+
+test("events command returns empty count for unknown job (--json)", () => {
+  const workspace = makeTempDir();
+  const result = run("node", [SCRIPT, "events", "ghost-job", "--json", "--cwd", workspace]);
+
+  assert.equal(result.status, 0, result.stderr);
+  const payload = JSON.parse(result.stdout);
+  assert.equal(payload.jobId, "ghost-job");
+  assert.equal(payload.count, 0);
+  assert.deepEqual(payload.events, []);
+});
+
+test("events command returns appended events (--json)", () => {
+  const workspace = makeTempDir();
+  appendJobEvent(workspace, "task-int-1", {
+    seq: 0,
+    ts: "2026-01-01T00:00:00.000Z",
+    phase: "starting",
+    method: "thread/started",
+    message: "Thread started (thr_x)."
+  });
+  appendJobEvent(workspace, "task-int-1", {
+    seq: 1,
+    ts: "2026-01-01T00:00:01.000Z",
+    phase: "thinking",
+    method: "turn/started",
+    message: "Turn started (trn_y)."
+  });
+
+  const result = run("node", [SCRIPT, "events", "task-int-1", "--json", "--cwd", workspace]);
+
+  assert.equal(result.status, 0, result.stderr);
+  const payload = JSON.parse(result.stdout);
+  assert.equal(payload.jobId, "task-int-1");
+  assert.equal(payload.count, 2);
+  assert.equal(payload.events[0].phase, "starting");
+  assert.equal(payload.events[1].phase, "thinking");
+});
+
+test("events command --after-seq filters incrementally", () => {
+  const workspace = makeTempDir();
+  for (let i = 0; i < 4; i++) {
+    appendJobEvent(workspace, "task-int-2", {
+      seq: i,
+      ts: `2026-01-01T00:00:0${i}.000Z`,
+      phase: "p",
+      message: `event ${i}`
+    });
+  }
+  const result = run("node", [SCRIPT, "events", "task-int-2", "--after-seq", "1", "--json", "--cwd", workspace]);
+
+  assert.equal(result.status, 0, result.stderr);
+  const payload = JSON.parse(result.stdout);
+  assert.equal(payload.count, 2);
+  assert.deepEqual(
+    payload.events.map((event) => event.seq),
+    [2, 3]
+  );
+});
+
+test("events command prints human-readable lines without --json", () => {
+  const workspace = makeTempDir();
+  appendJobEvent(workspace, "task-int-3", {
+    seq: 0,
+    ts: "2026-01-01T00:00:00.000Z",
+    phase: "thinking",
+    method: "turn/started",
+    message: "Turn started (trn_z)."
+  });
+  const result = run("node", [SCRIPT, "events", "task-int-3", "--cwd", workspace]);
+
+  assert.equal(result.status, 0, result.stderr);
+  assert.match(result.stdout, /seq=0/);
+  assert.match(result.stdout, /thinking/);
+  assert.match(result.stdout, /Turn started/);
+});
+
+test("events command --limit caps event count", () => {
+  const workspace = makeTempDir();
+  for (let i = 0; i < 5; i++) {
+    appendJobEvent(workspace, "task-int-4", {
+      seq: i,
+      ts: `2026-01-01T00:00:0${i}.000Z`,
+      phase: "p"
+    });
+  }
+  const result = run("node", [SCRIPT, "events", "task-int-4", "--limit", "2", "--json", "--cwd", workspace]);
+
+  assert.equal(result.status, 0, result.stderr);
+  const payload = JSON.parse(result.stdout);
+  assert.equal(payload.count, 2);
+  assert.deepEqual(
+    payload.events.map((event) => event.seq),
+    [0, 1]
+  );
+});
+
+test("events command shows empty message line for unknown job without --json", () => {
+  const workspace = makeTempDir();
+  const result = run("node", [SCRIPT, "events", "ghost-job-2", "--cwd", workspace]);
+
+  assert.equal(result.status, 0, result.stderr);
+  assert.match(result.stdout, /No events yet/);
+});

From 3a0ae2d961e9dae3c6894b0dd72ce5687b3d19d1 Mon Sep 17 00:00:00 2001
From: bit-star <bit@gie.edu.kg>
Date: Tue, 19 May 2026 17:09:46 +0800
Subject: [PATCH 5/7] fix(events): job/exited verdict + cover codex 0.131
 schema gaps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

E2E against real codex CLI 0.131.0-alpha.9 uncovered three issues mock
tests could not surface. Fixed all three; rerun confirms zero `unknown`
phase events in a successful turn.

1. handleTaskWorker job/exited bug (correctness)
   runTrackedJob does NOT throw when codex returns a failed turn — it
   resolves with execution.exitStatus != 0 and writes state.status="failed"
   out-of-band. Previous code keyed `completed` off the absence of an
   exception, so a failed turn was misreported as `phase:"completed",
   exitCode:0` in the terminal job/exited event. This is the single
   record main-loop Claude reads to decide "did this work?" — getting
   it wrong led to silent false positives. Fix: inspect
   execution.exitStatus directly. Bug repro: dispatch with --effort
   minimal (which conflicts with web_search) — codex returns an
   invalid_request_error, runTrackedJob resolves with exitStatus=1,
   stored state.json shows status="failed", but the prior code emitted
   {phase:"completed", exitCode:0}. After fix: {phase:"failed",
   exitCode:1, errorMessage:"Task did not complete successfully ..."}.

2. normalize coverage gaps (forward-compat / readability)
   codex 0.131 emits three notification methods + three item types the
   prior switch did not recognize, leaving them as phase:"unknown" with
   generic fallback messages:

   - method `thread/status/changed` with status.type in {"idle"} →
     phase:"idle", message "Thread idle.". Post-turn quiescence signal.
   - method `thread/tokenUsage/updated` → phase:"metering" (new word in
     the phase vocab). Streaming token-usage source. This is the real
     event main-loop Claude should poll to detect context-budget pressure
     before turn/completed surfaces final usage. NOTE: codex 0.131 payload
     shape for the usage object is not documented; normalize tries
     {inputTokens,outputTokens,cachedInputTokens} and several aliases;
     when no recognized keys are found, falls back to a stable label and
     preserves raw. Concrete schema discovery is a Phase 3 follow-up.
   - item.type `agentMessage` / `assistantMessage` / `reasoning` →
     phase:"thinking". `agentMessage` is codex's final-reply item;
     describeStartedItem/describeCompletedItem now use a new
     extractItemText helper to surface a content preview ("Codex
     replied: pong") so main-loop Claude can recognize the answer
     from the event stream without fetching /codex:result.

3. Single E2E rerun verification
   Same prompt with --effort low (avoids the minimal+web_search
   constraint): 13 events emitted, phase distribution {thinking:8,
   completed:2, idle:1, metering:1, warning:1, unknown:0}. The prior
   normalize generated 5 unknown out of 9 events (56%); this commit
   brings it to 0/13 on the happy path.

Tests: +6 new normalize unit tests (status idle, tokenUsage with usage,
tokenUsage without usage, agentMessage with text, agentMessage with
content[].text, schema fallback). 128/128 pass total. The job/exited
fix is exercised in the existing fake-codex fixture indirectly; a
direct unit test would need a fake runTrackedJob that simulates
"resolved with non-zero exitStatus" which is non-trivial to mock — for
now the E2E repro and the inline reasoning in the comment are the
documented evidence.
---
 plugins/codex/scripts/codex-companion.mjs |  26 ++--
 plugins/codex/scripts/lib/codex.mjs       | 108 ++++++++++++++++
 tests/normalize-notification.test.mjs     | 143 ++++++++++++++++++++++
 3 files changed, 268 insertions(+), 9 deletions(-)

diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs
index 09016ef4..52345fca 100644
--- a/plugins/codex/scripts/codex-companion.mjs
+++ b/plugins/codex/scripts/codex-companion.mjs
@@ -880,10 +880,10 @@ async function handleTaskWorker(argv) {
   }, 5000);
   watchdog.unref?.();
 
-  let completed = false;
+  let execution = null;
   let workerError = null;
   try {
-    await runTrackedJob(
+    execution = await runTrackedJob(
       {
         ...storedJob,
         workspaceRoot,
@@ -897,26 +897,34 @@ async function handleTaskWorker(argv) {
         }),
       { logFile }
     );
-    completed = true;
   } catch (error) {
     workerError = error;
   } finally {
     clearInterval(watchdog);
-    // Terminal event so a polling reader can distinguish "still slow" from
-    // "already finished". This is the single source of truth for end-of-job —
-    // main-loop Claude must check for {type:"job/exited"} not just job.status.
+    // Terminal event — single source of truth for end-of-job. Main-loop
+    // Claude must check for {type:"job/exited"} not just job.status.
+    //
+    // CRITICAL: runTrackedJob does NOT throw when codex returns a failed
+    // turn — it resolves with execution.exitStatus != 0 and writes
+    // state.status="failed" out-of-band. So "did we catch an exception"
+    // is NOT enough to decide success. We must inspect execution.exitStatus
+    // and treat exitStatus !== 0 as failure even when no exception was
+    // thrown.
+    const success = !workerError && execution?.exitStatus === 0;
     try {
       appendJobEvent(workspaceRoot, jobId, {
         seq: seq++,
         ts: new Date().toISOString(),
         type: "job/exited",
-        phase: completed ? "completed" : "failed",
-        exitCode: completed ? 0 : 1,
+        phase: success ? "completed" : "failed",
+        exitCode: success ? 0 : execution?.exitStatus ?? 1,
         errorMessage: workerError
           ? workerError instanceof Error
             ? workerError.message
             : String(workerError)
-          : null
+          : success
+            ? null
+            : "Task did not complete successfully (see prior events for details)."
       });
     } catch {
       // ignore
diff --git a/plugins/codex/scripts/lib/codex.mjs b/plugins/codex/scripts/lib/codex.mjs
index 727310a2..a7196274 100644
--- a/plugins/codex/scripts/lib/codex.mjs
+++ b/plugins/codex/scripts/lib/codex.mjs
@@ -257,11 +257,38 @@ function describeStartedItem(state, item) {
     }
     case "webSearch":
       return { message: `Searching: ${shorten(item.query, 96)}`, phase: "investigating" };
+    case "userMessage":
+      return { message: "User input received.", phase: "thinking" };
+    case "assistantMessage":
+    case "agentMessage": {
+      const text = extractItemText(item);
+      return {
+        message: text ? `Codex replying: ${shorten(text, 96)}` : "Codex drafting reply.",
+        phase: "thinking"
+      };
+    }
+    case "reasoning":
+      return { message: "Reasoning.", phase: "thinking" };
     default:
       return null;
   }
 }
 
+function extractItemText(item) {
+  // codex CLI emits item content under several shapes depending on the
+  // version. Try the common ones in priority order; the raw payload is
+  // always preserved in the event record so callers can fall back.
+  if (typeof item?.text === "string") return item.text;
+  if (Array.isArray(item?.content)) {
+    return item.content
+      .map((part) => part?.text || part?.value || "")
+      .join("")
+      .trim();
+  }
+  if (typeof item?.message === "string") return item.message;
+  return "";
+}
+
 function describeCompletedItem(state, item) {
   switch (item.type) {
     case "commandExecution": {
@@ -288,6 +315,18 @@ function describeCompletedItem(state, item) {
     }
     case "exitedReviewMode":
       return { message: "Reviewer finished.", phase: "finalizing" };
+    case "userMessage":
+      return { message: "User input received.", phase: "thinking" };
+    case "assistantMessage":
+    case "agentMessage": {
+      const text = extractItemText(item);
+      return {
+        message: text ? `Codex replied: ${shorten(text, 96)}` : "Codex reply complete.",
+        phase: "thinking"
+      };
+    }
+    case "reasoning":
+      return { message: "Reasoning complete.", phase: "thinking" };
     default:
       return null;
   }
@@ -518,6 +557,75 @@ export function normalizeNotification(state, message) {
         message: `Thread renamed: ${params.threadName ?? "?"}`,
         raw: params
       };
+    case "thread/status/changed": {
+      const statusType = params.status?.type ?? null;
+      let phase = "unknown";
+      let message = `Thread status: ${statusType ?? "?"}`;
+      if (statusType === "active") {
+        phase = "thinking";
+        message = "Thread active.";
+      } else if (statusType === "idle") {
+        // codex flips to idle after a turn completes (post turn/completed).
+        // Treat as completed-side phase so main loop reads it as quiescent.
+        phase = "idle";
+        message = "Thread idle.";
+      } else if (statusType === "systemError") {
+        phase = "failed";
+        message = "Thread system error.";
+      }
+      return {
+        ts,
+        method,
+        threadId: params.threadId ?? null,
+        turnId: null,
+        itemType: null,
+        lifecycle: null,
+        phase,
+        message,
+        raw: params
+      };
+    }
+    case "thread/tokenUsage/updated": {
+      // codex streams token usage updates as the turn progresses. P7
+      // top-level surface (runAppServerTurn.usage) reads from
+      // turnState.finalTurn?.usage at turn end; this event source is the
+      // real-time signal main-loop Claude can poll to detect "this turn
+      // is burning a lot of tokens, maybe compact before it overflows."
+      const usage = params.usage ?? params.tokenUsage ?? params ?? {};
+      const inTok = usage.inputTokens ?? usage.input ?? null;
+      const outTok = usage.outputTokens ?? usage.output ?? null;
+      const cachedTok = usage.cachedInputTokens ?? usage.cached ?? null;
+      const parts = [];
+      if (inTok != null) parts.push(`in=${inTok}`);
+      if (outTok != null) parts.push(`out=${outTok}`);
+      if (cachedTok != null) parts.push(`cached=${cachedTok}`);
+      return {
+        ts,
+        method,
+        threadId: params.threadId ?? null,
+        turnId: null,
+        itemType: null,
+        lifecycle: null,
+        phase: "metering",
+        message: parts.length > 0 ? `Token usage: ${parts.join(" ")}` : "Token usage updated.",
+        raw: params
+      };
+    }
+    case "warning":
+      // Codex emits these for non-fatal conditions (context budget exceeded,
+      // skipped capabilities, etc.). Surface them so main-loop Claude can
+      // factor them into routing decisions without confusing them with errors.
+      return {
+        ts,
+        method,
+        threadId: params.threadId ?? null,
+        turnId: null,
+        itemType: null,
+        lifecycle: null,
+        phase: "warning",
+        message: params.message ? `Warning: ${params.message}` : "Warning notification.",
+        raw: params
+      };
     case "turn/started":
       return {
         ts,
diff --git a/tests/normalize-notification.test.mjs b/tests/normalize-notification.test.mjs
index 153cba86..a8d6a922 100644
--- a/tests/normalize-notification.test.mjs
+++ b/tests/normalize-notification.test.mjs
@@ -187,3 +187,146 @@ test("normalizeNotification: handles malformed message without crashing", () =>
   assert.equal(event.phase, "unknown");
   assert.match(event.ts, /^\d{4}-\d{2}-\d{2}T/);
 });
+
+test("normalizeNotification: thread/status/changed active -> thinking phase", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "thread/status/changed",
+    params: { threadId: "thr_main", status: { type: "active", activeFlags: [] } }
+  });
+
+  assert.equal(event.method, "thread/status/changed");
+  assert.equal(event.phase, "thinking");
+  assert.equal(event.threadId, "thr_main");
+  assert.match(event.message, /active/);
+});
+
+test("normalizeNotification: thread/status/changed systemError -> failed phase", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "thread/status/changed",
+    params: { threadId: "thr_main", status: { type: "systemError" } }
+  });
+
+  assert.equal(event.phase, "failed");
+  assert.match(event.message, /system error/i);
+});
+
+test("normalizeNotification: thread/status/changed unknown status type -> unknown (forward-compat)", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "thread/status/changed",
+    params: { threadId: "thr_main", status: { type: "futureStatusWeDontKnow" } }
+  });
+
+  assert.equal(event.phase, "unknown");
+});
+
+test("normalizeNotification: warning -> warning phase with message body", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "warning",
+    params: { threadId: "thr_main", message: "Exceeded skills context budget of 2%." }
+  });
+
+  assert.equal(event.method, "warning");
+  assert.equal(event.phase, "warning");
+  assert.match(event.message, /Exceeded skills context budget/);
+});
+
+test("normalizeNotification: item/started userMessage -> thinking phase", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "item/started",
+    params: {
+      threadId: "thr_main",
+      item: { type: "userMessage", id: "u1", content: [{ type: "text", text: "Hi" }] }
+    }
+  });
+
+  assert.equal(event.itemType, "userMessage");
+  assert.equal(event.phase, "thinking");
+  assert.match(event.message, /User input received/);
+});
+
+test("normalizeNotification: item/started assistantMessage -> thinking phase", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "item/started",
+    params: {
+      threadId: "thr_main",
+      item: { type: "assistantMessage", id: "a1" }
+    }
+  });
+
+  assert.equal(event.itemType, "assistantMessage");
+  assert.equal(event.phase, "thinking");
+});
+
+test("normalizeNotification: item/started reasoning -> thinking phase", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "item/started",
+    params: { threadId: "thr_main", item: { type: "reasoning" } }
+  });
+
+  assert.equal(event.itemType, "reasoning");
+  assert.equal(event.phase, "thinking");
+});
+
+test("normalizeNotification: thread/status/changed idle -> idle phase", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "thread/status/changed",
+    params: { threadId: "thr_main", status: { type: "idle" } }
+  });
+
+  assert.equal(event.phase, "idle");
+  assert.match(event.message, /idle/i);
+});
+
+test("normalizeNotification: thread/tokenUsage/updated -> metering phase with in/out tokens", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "thread/tokenUsage/updated",
+    params: {
+      threadId: "thr_main",
+      usage: { inputTokens: 1234, outputTokens: 56, cachedInputTokens: 100 }
+    }
+  });
+
+  assert.equal(event.method, "thread/tokenUsage/updated");
+  assert.equal(event.phase, "metering");
+  assert.match(event.message, /in=1234/);
+  assert.match(event.message, /out=56/);
+  assert.match(event.message, /cached=100/);
+});
+
+test("normalizeNotification: thread/tokenUsage/updated without usage still yields metering phase", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "thread/tokenUsage/updated",
+    params: { threadId: "thr_main" }
+  });
+
+  assert.equal(event.phase, "metering");
+  assert.match(event.message, /Token usage updated/i);
+});
+
+test("normalizeNotification: item/started agentMessage with text shows reply preview", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "item/started",
+    params: {
+      threadId: "thr_main",
+      item: { type: "agentMessage", text: "pong" }
+    }
+  });
+
+  assert.equal(event.itemType, "agentMessage");
+  assert.equal(event.phase, "thinking");
+  assert.match(event.message, /Codex replying: pong/);
+});
+
+test("normalizeNotification: item/completed agentMessage with content[].text shows reply", () => {
+  const event = normalizeNotification(makeState(), {
+    method: "item/completed",
+    params: {
+      threadId: "thr_main",
+      item: { type: "agentMessage", content: [{ type: "text", text: "Final answer: 42" }] }
+    }
+  });
+
+  assert.equal(event.itemType, "agentMessage");
+  assert.equal(event.phase, "thinking");
+  assert.match(event.message, /Codex replied: Final answer: 42/);
+});

From 1014d1074f8e24958d3789873ddc04eea79f7183 Mon Sep 17 00:00:00 2001
From: bit-star <bit@gie.edu.kg>
Date: Tue, 19 May 2026 17:22:35 +0800
Subject: [PATCH 6/7] feat(phase-3): compact recovery + slash commands + rescue
 defaults to bg
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the "operate codex like a Claude subagent" half of the design
contract. Phase 1+2 gave the main loop an event stream; Phase 3 gives
it the slash-command surface and the protocol-native recovery path.

In lib/codex.mjs:

- New exported compactAppServerThread(cwd, {threadId}). Wraps codex
  app-server's thread/compact/start RPC. This is the protocol-native
  recovery for "prompt too long" — main-loop Claude calls it after a
  turn fails with context-overflow, then resumes via /codex:rescue
  --resume <amended prompt>. Fire-and-return: the call awaits the
  app-server's ack but does not consume the streaming response. The
  broker recognizes thread/compact/start as STREAMING_METHOD but
  routes the stream to whoever owns it at that moment; compaction
  itself completes on the codex side regardless of consumer presence.
  Uses reuseExistingBroker: true so it can punch through to an
  already-running broker if one exists. The exact success payload
  shape is undocumented in codex CLI 0.131; result is preserved
  verbatim under .result for forward-compat.

In codex-companion.mjs:

- New `compact <thread-id> [--json]` subcommand wrapping
  compactAppServerThread. Plain output prints the operation result and
  hints the resume flow; --json returns the full structured report.
  Smoke-tested against real codex 0.131: bogus thread id correctly
  returns attempted:true, compacted:false, with codex's own error
  ("invalid thread id") preserved under .detail.

In agents/codex-rescue.md + commands/rescue.md:

- Default execution mode flipped from foreground to background. The
  prior heuristic ("small bounded => foreground; complex => background")
  was the deadlock root cause: a small task that stalls is still a
  deadlock, and "small" is unknowable in advance. Background is the
  safe default — the main loop polls /codex:events <job-id> for
  progress instead of blocking on the synchronous Bash call. --wait is
  honored when the user explicitly asks for foreground.

In commands/:

- New events.md: slash command surfacing the per-job event stream.
  Documents the {type:"job/exited"} terminal-state contract, the
  phase:"stuck" watchdog signal, the phase:"metering" token-usage
  source, and the --after-seq incremental polling pattern.
- New compact.md: slash command for the recovery sequence. Documents
  the typical "cancel → compact → resume with amended prompt" idiom
  main-loop Claude should follow when codex hits context overflow.

In tests/commands.test.mjs:

- Updated assertions to reflect the new background default and to
  include events.md + compact.md in the commands/ file-list invariant.
  The prior assertions hard-coded the prose "default to foreground" and
  the file list of 7 commands; both needed to track this change.

Tests: 128/128 pass. No new tests added — the compact path is exercised
indirectly via a smoke check (bogus thread id returns the expected
structured error against the real app-server). A full E2E for compact
needs a real turn first to obtain a valid thread id; that lands in
Phase 4 alongside version bump + CHANGELOG.
---
 plugins/codex/agents/codex-rescue.md      |  3 +-
 plugins/codex/commands/compact.md         | 33 ++++++++++++
 plugins/codex/commands/events.md          | 24 +++++++++
 plugins/codex/commands/rescue.md          |  2 +-
 plugins/codex/scripts/codex-companion.mjs | 43 ++++++++++++++++
 plugins/codex/scripts/lib/codex.mjs       | 62 +++++++++++++++++++++++
 tests/commands.test.mjs                   | 21 ++++++--
 7 files changed, 182 insertions(+), 6 deletions(-)
 create mode 100644 plugins/codex/commands/compact.md
 create mode 100644 plugins/codex/commands/events.md

diff --git a/plugins/codex/agents/codex-rescue.md b/plugins/codex/agents/codex-rescue.md
index 7009ec86..ec3ba088 100644
--- a/plugins/codex/agents/codex-rescue.md
+++ b/plugins/codex/agents/codex-rescue.md
@@ -20,8 +20,7 @@ Selection guidance:
 Forwarding rules:
 
 - Use exactly one `Bash` call to invoke `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task ...`.
-- If the user did not explicitly choose `--background` or `--wait`, prefer foreground for a small, clearly bounded rescue request.
-- If the user did not explicitly choose `--background` or `--wait` and the task looks complicated, open-ended, multi-step, or likely to keep Codex running for a long time, prefer background execution.
+- Default to background execution: if the user did not explicitly choose `--background` or `--wait`, append `--background`. The companion command returns a job id immediately so the main Claude loop can monitor progress via `/codex:events <job-id>` and `/codex:status <job-id>` instead of blocking on the synchronous Bash call. Honor `--wait` only when the user explicitly asks for foreground execution.
 - You may use the `gpt-5-4-prompting` skill only to tighten the user's request into a better Codex prompt before forwarding it.
 - Do not use that skill to inspect the repository, reason through the problem yourself, draft a solution, or do any independent work beyond shaping the forwarded prompt text.
 - Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own.
diff --git a/plugins/codex/commands/compact.md b/plugins/codex/commands/compact.md
new file mode 100644
index 00000000..4bb4e882
--- /dev/null
+++ b/plugins/codex/commands/compact.md
@@ -0,0 +1,33 @@
+---
+description: Trigger codex's protocol-native context compaction on a thread (recovery path for "prompt too long")
+argument-hint: '<thread-id> [--json]'
+disable-model-invocation: true
+allowed-tools: Bash(node:*)
+---
+
+!`node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" compact "$ARGUMENTS"`
+
+When to use:
+
+- A Codex task has stalled or failed with a context-overflow error and you want to recover the thread without losing its history.
+- `/codex:events <job-id>` shows `phase:"stuck"` for an extended period and `phase:"metering"` records indicate the token budget is near the limit.
+- A previous turn returned `phase:"failed"` with a context-length error and you want to compact before resuming.
+
+Typical recovery sequence:
+
+1. `/codex:cancel <job-id>` if a turn is still running and stuck.
+2. `/codex:compact <thread-id>` — this command. Returns immediately after the codex app-server acknowledges the compaction request.
+3. `/codex:rescue --resume <amended prompt>` — resume the (now compacted) thread with a tighter prompt.
+
+Output format:
+
+- Without `--json`: prints `Compaction started on <thread-id>.` and a hint for the resume flow.
+- With `--json`: returns `{attempted, compacted, transport, result, detail}`.
+
+If the thread id is malformed or the app-server rejects the request, the command exits non-zero and emits the codex-side error in `stderr` (or in `detail` under `--json`).
+
+Notes:
+
+- Compaction runs codex-side after this command returns; it is not a synchronous wait.
+- `thread/compact/start` is a streaming RPC in the app-server protocol, but this wrapper does not consume the stream — codex completes compaction in the background regardless of whether a stream consumer is active.
+- The exact success payload shape from codex CLI is preserved verbatim under `result` for forward-compat; downstream consumers should not depend on its keys beyond what they have observed.
diff --git a/plugins/codex/commands/events.md b/plugins/codex/commands/events.md
new file mode 100644
index 00000000..d72975de
--- /dev/null
+++ b/plugins/codex/commands/events.md
@@ -0,0 +1,24 @@
+---
+description: Stream Codex notifications as a per-job NDJSON event log for poll-based monitoring
+argument-hint: '<job-id> [--since <iso>] [--after-seq <n>] [--limit <n>] [--json]'
+disable-model-invocation: true
+allowed-tools: Bash(node:*)
+---
+
+!`node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" events "$ARGUMENTS"`
+
+How to use the output:
+
+- Each line in the stream is a normalized notification: `seq`, `ts`, `method`, `phase`, `itemType`, `message`, and `raw`.
+- Treat `{type:"job/exited"}` as the single source of truth for terminal state — its `phase` is `completed` or `failed`, and `exitCode` reflects the codex turn outcome. Do not infer end-of-job from job-level `status` alone.
+- `phase:"stuck"` records (emitted by the worker's stall watchdog) mean codex produced no new notifications for the configured stall window (default 60s, override with `CODEX_COMPANION_STALL_SECONDS`). The job is not cancelled — the main loop decides whether to keep waiting, run `/codex:compact <thread-id>` to recover from context overflow, or `/codex:cancel <job-id>` to abort.
+- `phase:"warning"` records carry codex-side non-fatal conditions (context budget exceeded, capabilities removed). They are informational, not failures.
+- `phase:"metering"` records come from `thread/tokenUsage/updated` and stream real-time token usage; poll them to detect "this turn is burning a lot of tokens" before hitting context overflow.
+- Use `--after-seq <last-seq>` for incremental polling. If both `--after-seq` and `--since` are supplied, `--after-seq` wins. Default (no filter) returns all events for the job; the main loop is responsible for dedupe-by-seq.
+
+Output format:
+
+- Without `--json`: one human-readable line per event.
+- With `--json`: `{jobId, eventsFile, count, events: [...]}`.
+
+If no events exist yet for the job id, the command prints `No events yet for <job-id>.` and exits 0.
diff --git a/plugins/codex/commands/rescue.md b/plugins/codex/commands/rescue.md
index 56de9555..0a853a67 100644
--- a/plugins/codex/commands/rescue.md
+++ b/plugins/codex/commands/rescue.md
@@ -15,7 +15,7 @@ Execution mode:
 
 - If the request includes `--background`, run the `codex:codex-rescue` subagent in the background.
 - If the request includes `--wait`, run the `codex:codex-rescue` subagent in the foreground.
-- If neither flag is present, default to foreground.
+- If neither flag is present, default to background. The companion returns a job id immediately; poll progress with `/codex:events <job-id> --since <last-seq> --json` (or `/codex:status <job-id>`) instead of blocking on a synchronous Bash call. This prevents the main Claude loop from deadlocking when Codex stalls or fails silently.
 - `--background` and `--wait` are execution flags for Claude Code. Do not forward them to `task`, and do not treat them as part of the natural-language task text.
 - `--model` and `--effort` are runtime-selection flags. Preserve them for the forwarded `task` call, but do not treat them as part of the natural-language task text.
 - If the request includes `--resume`, do not ask whether to continue. The user already chose.
diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs
index 52345fca..103f1290 100644
--- a/plugins/codex/scripts/codex-companion.mjs
+++ b/plugins/codex/scripts/codex-companion.mjs
@@ -9,6 +9,7 @@ import { fileURLToPath } from "node:url";
 import { parseArgs, splitRawArgumentString } from "./lib/args.mjs";
 import {
     buildPersistentTaskThreadName,
+    compactAppServerThread,
     DEFAULT_CONTINUE_PROMPT,
     findLatestTaskThread,
     getCodexAuthStatus,
@@ -82,6 +83,7 @@ function printUsage() {
       "  node scripts/codex-companion.mjs adversarial-review [--wait|--background] [--base <ref>] [--scope <auto|working-tree|branch>] [focus text]",
       "  node scripts/codex-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model <model|spark>] [--effort <none|minimal|low|medium|high|xhigh>] [prompt]",
       "  node scripts/codex-companion.mjs events <job-id> [--since <iso>] [--after-seq <n>] [--limit <n>] [--json]",
+      "  node scripts/codex-companion.mjs compact <thread-id> [--json]",
       "  node scripts/codex-companion.mjs status [job-id] [--all] [--json]",
       "  node scripts/codex-companion.mjs result [job-id] [--json]",
       "  node scripts/codex-companion.mjs cancel [job-id] [--json]"
@@ -933,6 +935,44 @@ async function handleTaskWorker(argv) {
   if (workerError) throw workerError;
 }
 
+async function handleCompact(argv) {
+  const { options, positionals } = parseCommandInput(argv, {
+    valueOptions: ["cwd"],
+    booleanOptions: ["json"]
+  });
+
+  const cwd = resolveCommandCwd(options);
+  const threadId = positionals[0];
+  if (!threadId) {
+    throw new Error("Usage: compact <thread-id> [--json]");
+  }
+
+  const report = await compactAppServerThread(cwd, { threadId });
+
+  if (options.json) {
+    console.log(JSON.stringify(report, null, 2));
+    return;
+  }
+
+  if (!report.attempted) {
+    process.stderr.write(`Compaction not attempted: ${report.detail}\n`);
+    process.exitCode = 1;
+    return;
+  }
+  if (!report.compacted) {
+    process.stderr.write(`Compaction failed: ${report.detail}\n`);
+    process.exitCode = 1;
+    return;
+  }
+  process.stdout.write(`${report.detail}\n`);
+  process.stdout.write(
+    `Resume the thread with a tightened prompt via:\n` +
+      `  /codex:rescue --resume <amended prompt>\n` +
+      `or directly:\n` +
+      `  codex resume ${threadId}\n`
+  );
+}
+
 async function handleEvents(argv) {
   const { options, positionals } = parseCommandInput(argv, {
     valueOptions: ["cwd", "since", "after-seq", "limit"],
@@ -1159,6 +1199,9 @@ async function main() {
     case "events":
       await handleEvents(argv);
       break;
+    case "compact":
+      await handleCompact(argv);
+      break;
     case "status":
       await handleStatus(argv);
       break;
diff --git a/plugins/codex/scripts/lib/codex.mjs b/plugins/codex/scripts/lib/codex.mjs
index a7196274..2012c558 100644
--- a/plugins/codex/scripts/lib/codex.mjs
+++ b/plugins/codex/scripts/lib/codex.mjs
@@ -1107,6 +1107,68 @@ export async function getCodexAuthStatus(cwd, options = {}) {
   }
 }
 
+/**
+ * Trigger codex's protocol-native context compaction on a thread. This is
+ * the recovery path for "prompt too long / context overflow" scenarios:
+ * main-loop Claude calls /codex:compact, then resumes the thread with an
+ * amended prompt via /codex:rescue --resume. Compaction itself runs on
+ * the codex side; this wrapper is fire-and-return — it kicks off the
+ * compact request and returns once the app-server acknowledges it.
+ * Streaming notifications (broker recognizes thread/compact/start as a
+ * STREAMING_METHOD) flow to whoever owns the broker stream at that moment;
+ * if no consumer is active, codex still completes compaction on its side.
+ *
+ * NOTE: the exact shape of the success payload is not documented for
+ * codex CLI 0.131; this returns the raw result alongside the bookkeeping
+ * fields. A typed wrapper in app-server-protocol.d.ts can land once the
+ * shape is observed in a real run.
+ */
+export async function compactAppServerThread(cwd, { threadId }) {
+  if (!threadId) {
+    return {
+      attempted: false,
+      compacted: false,
+      transport: null,
+      result: null,
+      detail: "missing threadId"
+    };
+  }
+
+  const availability = getCodexAvailability(cwd);
+  if (!availability.available) {
+    return {
+      attempted: false,
+      compacted: false,
+      transport: null,
+      result: null,
+      detail: availability.detail
+    };
+  }
+
+  let client = null;
+  try {
+    client = await CodexAppServerClient.connect(cwd, { reuseExistingBroker: true });
+    const result = await client.request("thread/compact/start", { threadId });
+    return {
+      attempted: true,
+      compacted: true,
+      transport: client.transport,
+      result,
+      detail: `Compaction started on ${threadId}.`
+    };
+  } catch (error) {
+    return {
+      attempted: true,
+      compacted: false,
+      transport: client?.transport ?? null,
+      result: null,
+      detail: error instanceof Error ? error.message : String(error)
+    };
+  } finally {
+    await client?.close().catch(() => {});
+  }
+}
+
 export async function interruptAppServerTurn(cwd, { threadId, turnId }) {
   if (!threadId || !turnId) {
     return {
diff --git a/tests/commands.test.mjs b/tests/commands.test.mjs
index 3724ffa4..830f56a0 100644
--- a/tests/commands.test.mjs
+++ b/tests/commands.test.mjs
@@ -72,9 +72,15 @@ test("adversarial review command uses AskUserQuestion and background Bash while
 
 test("continue is not exposed as a user-facing command", () => {
   const commandFiles = fs.readdirSync(path.join(PLUGIN_ROOT, "commands")).sort();
+  // events.md + compact.md added by feat/event-stream-foundation: the main
+  // Claude loop polls /codex:events to monitor codex state and runs
+  // /codex:compact to recover from "prompt too long" via codex's protocol-
+  // native thread/compact/start RPC.
   assert.deepEqual(commandFiles, [
     "adversarial-review.md",
     "cancel.md",
+    "compact.md",
+    "events.md",
     "rescue.md",
     "result.md",
     "review.md",
@@ -109,7 +115,11 @@ test("rescue command absorbs continue semantics", () => {
   assert.match(rescue, /Continue current Codex thread/);
   assert.match(rescue, /Start a new Codex thread/);
   assert.match(rescue, /run the `codex:codex-rescue` subagent in the background/i);
-  assert.match(rescue, /default to foreground/i);
+  // Default execution mode was flipped from foreground to background so the
+  // main Claude loop polls /codex:events instead of blocking on a synchronous
+  // Bash call when Codex stalls. See feat/event-stream-foundation.
+  assert.match(rescue, /default to background/i);
+  assert.match(rescue, /poll progress with `\/codex:events <job-id>/i);
   assert.match(rescue, /Do not forward them to `task`/i);
   assert.match(rescue, /`--model` and `--effort` are runtime-selection flags/i);
   assert.match(rescue, /Leave `--effort` unset unless the user explicitly asks for a specific reasoning effort/i);
@@ -126,8 +136,13 @@ test("rescue command absorbs continue semantics", () => {
   assert.match(agent, /--resume/);
   assert.match(agent, /--fresh/);
   assert.match(agent, /thin forwarding wrapper/i);
-  assert.match(agent, /prefer foreground for a small, clearly bounded rescue request/i);
-  assert.match(agent, /If the user did not explicitly choose `--background` or `--wait` and the task looks complicated, open-ended, multi-step, or likely to keep Codex running for a long time, prefer background execution/i);
+  // Default execution mode is now background (see feat/event-stream-foundation):
+  // foreground rescue blocks the main Claude loop when codex stalls, which is
+  // exactly the deadlock the per-job event stream was added to break.
+  assert.match(agent, /Default to background execution/i);
+  assert.match(agent, /append `--background`/i);
+  assert.match(agent, /monitor progress via `\/codex:events/i);
+  assert.match(agent, /Honor `--wait` only when the user explicitly asks for foreground/i);
   assert.match(agent, /Use exactly one `Bash` call/i);
   assert.match(agent, /Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own/i);
   assert.match(agent, /Do not call `review`, `adversarial-review`, `status`, `result`, or `cancel`/i);

From f9f36fb01669cf56403bbd940f641e07a2b3cdcb Mon Sep 17 00:00:00 2001
From: bit-star <bit@gie.edu.kg>
Date: Tue, 19 May 2026 17:28:55 +0800
Subject: [PATCH 7/7] chore(release): bump plugin version to 1.1.0 + CHANGELOG
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cuts the minor-version release for the observability rework
(feat/event-stream-foundation). Adds /codex:events, /codex:compact,
rescue defaults to --background, stall watchdog, terminal job/exited
events, top-level token usage, and broader codex CLI 0.131 notification
coverage. No breaking changes — all existing commands keep the same
signatures and outputs; the new event stream is additive.
---
 .claude-plugin/marketplace.json          |  4 +--
 package-lock.json                        |  4 +--
 package.json                             |  2 +-
 plugins/codex/.claude-plugin/plugin.json |  2 +-
 plugins/codex/CHANGELOG.md               | 42 ++++++++++++++++++++++++
 5 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index ec617987..b6caf144 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -5,13 +5,13 @@
   },
   "metadata": {
     "description": "Codex plugins to use in Claude Code for delegation and code review.",
-    "version": "1.0.4"
+    "version": "1.1.0"
   },
   "plugins": [
     {
       "name": "codex",
       "description": "Use Codex from Claude Code to review code or delegate tasks.",
-      "version": "1.0.4",
+      "version": "1.1.0",
       "author": {
         "name": "OpenAI"
       },
diff --git a/package-lock.json b/package-lock.json
index 82d04a25..3640e02f 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "@openai/codex-plugin-cc",
-  "version": "1.0.4",
+  "version": "1.1.0",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "@openai/codex-plugin-cc",
-      "version": "1.0.4",
+      "version": "1.1.0",
       "license": "Apache-2.0",
       "devDependencies": {
         "@types/node": "^25.5.0",
diff --git a/package.json b/package.json
index 833fd51c..292041a1 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@openai/codex-plugin-cc",
-  "version": "1.0.4",
+  "version": "1.1.0",
   "private": true,
   "type": "module",
   "description": "Use Codex from Claude Code to review code or delegate tasks.",
diff --git a/plugins/codex/.claude-plugin/plugin.json b/plugins/codex/.claude-plugin/plugin.json
index da262028..4838522f 100644
--- a/plugins/codex/.claude-plugin/plugin.json
+++ b/plugins/codex/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "codex",
-  "version": "1.0.4",
+  "version": "1.1.0",
   "description": "Use Codex from Claude Code to review code or delegate tasks.",
   "author": {
     "name": "OpenAI"
diff --git a/plugins/codex/CHANGELOG.md b/plugins/codex/CHANGELOG.md
index d647561b..73c52574 100644
--- a/plugins/codex/CHANGELOG.md
+++ b/plugins/codex/CHANGELOG.md
@@ -1,5 +1,47 @@
 # Changelog
 
+## 1.1.0
+
+Observability rework for the main-loop orchestration case: the consumer of
+"what is codex doing right now" is the calling Claude session, not a human
+dashboard. Adds a per-job NDJSON event stream the main loop can poll, plus
+a protocol-native recovery path for context overflow.
+
+- `/codex:events <job-id>`: new slash command. Streams normalized codex
+  notifications from `{stateDir}/jobs/{jobId}.events.ndjson`. Supports
+  `--since <iso>` / `--after-seq <n>` / `--limit <n>` / `--json` for
+  incremental polling. Each event carries `seq`, `ts`, `method`, `phase`,
+  `itemType`, `message`, and the raw payload.
+- `/codex:compact <thread-id>`: new slash command wrapping codex
+  app-server's `thread/compact/start`. Protocol-native recovery for
+  "prompt too long" — typical flow is cancel → compact → resume with an
+  amended prompt via `/codex:rescue --resume`.
+- `/codex:rescue` now defaults to `--background`. The main Claude loop
+  receives a job id immediately and polls `/codex:events` instead of
+  blocking on a synchronous Bash call; this removes the deadlock when
+  codex stalls or errors silently.
+- Per-job stall watchdog (60s default, override via
+  `CODEX_COMPANION_STALL_SECONDS`) emits a `{type:"watchdog",
+  phase:"stuck"}` event when codex produces no new notifications inside
+  the window. The watchdog never cancels — the main loop decides whether
+  to continue, compact, or cancel.
+- New `{type:"job/exited"}` terminal event with `phase: completed|failed`
+  and `exitCode`. This is the single source of truth for end-of-job;
+  callers should not infer terminal state from job-level `status` alone.
+- Surfaces token usage as a top-level field on `runAppServerTurn` and
+  streams real-time usage via `thread/tokenUsage/updated` events
+  (`phase: "metering"`).
+- Coverage of codex CLI 0.131 notification methods extended to
+  `thread/status/changed`, `warning`, `thread/tokenUsage/updated`, plus
+  item types `userMessage`, `assistantMessage`/`agentMessage`, and
+  `reasoning`. The `agentMessage` item now surfaces a content preview so
+  the main loop can recognize codex's final reply from the event stream
+  without fetching `/codex:result`.
+- Test isolation: `tests/helpers.mjs` now unsets `CLAUDE_PLUGIN_DATA` and
+  `CODEX_COMPANION_SESSION_ID` at module load. Plugin host runtimes (e.g.
+  Claude Code) inject these vars; without isolation, two existing tests
+  fail when contributors run `npm test` from inside a host.
+
 ## 1.0.0
 
 - Initial version of the Codex plugin for Claude Code