From 14ae8f931d2bcbd7ed792996f7ef9b65082b5054 Mon Sep 17 00:00:00 2001 From: Elad Ariel Date: Wed, 20 May 2026 12:50:22 +0300 Subject: [PATCH 1/3] feat: add Antigravity CLI and Gemini environment support --- commands/adversarial-review.toml | 11 + commands/cancel.toml | 11 + commands/rescue.toml | 21 + commands/result.toml | 11 + commands/review.toml | 11 + commands/setup.toml | 14 + commands/status.toml | 11 + gemini-extension.json | 6 + package-lock.json | 4 +- package.json | 4 +- plugins/codex/scripts/codex-companion.mjs | 6 +- plugins/codex/scripts/lib/app-server.mjs | 4 +- plugins/codex/scripts/lib/codex.mjs | 4 +- plugins/codex/scripts/lib/job-control.mjs | 3 +- plugins/codex/scripts/lib/state.mjs | 2 +- plugins/codex/scripts/lib/tracked-jobs.mjs | 2 +- .../codex/scripts/session-lifecycle-hook.mjs | 7 +- .../codex/scripts/stop-review-gate-hook.mjs | 10 +- tests/tests/broker-endpoint.test.mjs | 22 + tests/tests/bump-version.test.mjs | 88 + tests/tests/commands.test.mjs | 219 ++ tests/tests/fake-codex-fixture.mjs | 594 +++++ tests/tests/git.test.mjs | 183 ++ tests/tests/helpers.mjs | 32 + tests/tests/process.test.mjs | 55 + tests/tests/render.test.mjs | 59 + tests/tests/runtime.test.mjs | 2139 +++++++++++++++++ tests/tests/state.test.mjs | 105 + 28 files changed, 3616 insertions(+), 22 deletions(-) create mode 100644 commands/adversarial-review.toml create mode 100644 commands/cancel.toml create mode 100644 commands/rescue.toml create mode 100644 commands/result.toml create mode 100644 commands/review.toml create mode 100644 commands/setup.toml create mode 100644 commands/status.toml create mode 100644 gemini-extension.json create mode 100644 tests/tests/broker-endpoint.test.mjs create mode 100644 tests/tests/bump-version.test.mjs create mode 100644 tests/tests/commands.test.mjs create mode 100644 tests/tests/fake-codex-fixture.mjs create mode 100644 tests/tests/git.test.mjs create mode 100644 tests/tests/helpers.mjs create mode 100644 tests/tests/process.test.mjs create mode 100644 tests/tests/render.test.mjs create mode 100644 tests/tests/runtime.test.mjs create mode 100644 tests/tests/state.test.mjs diff --git a/commands/adversarial-review.toml b/commands/adversarial-review.toml new file mode 100644 index 00000000..d82f1a90 --- /dev/null +++ b/commands/adversarial-review.toml @@ -0,0 +1,11 @@ +description = "Runs a steerable review that questions the chosen implementation and design" +prompt = """ +You are the adversarial-review command handler for the Codex plugin on Antigravity CLI. +Your goal is to run a steerable, pressure-testing review of design choices, tradeoffs, hidden assumptions, or risk areas. + +Please find the companion script `codex-companion.mjs` under the `codex-plugin-agy/plugins/codex/scripts/` directory in the current workspace. +Execute the following command using the `run_command` tool: +`node "" adversarial-review {{args}}` + +Present the review results exactly as-is to the user. +""" diff --git a/commands/cancel.toml b/commands/cancel.toml new file mode 100644 index 00000000..7c386db1 --- /dev/null +++ b/commands/cancel.toml @@ -0,0 +1,11 @@ +description = "Cancel an ongoing background Codex job" +prompt = """ +You are the cancel command handler for the Codex plugin on Antigravity CLI. +Your goal is to cancel a running background job. + +Please find the companion script `codex-companion.mjs` under the `codex-plugin-agy/plugins/codex/scripts/` directory in the current workspace. +Execute the following command using the `run_command` tool: +`node "" cancel {{args}}` + +Present the output exactly as-is to the user. +""" diff --git a/commands/rescue.toml b/commands/rescue.toml new file mode 100644 index 00000000..ae739e9c --- /dev/null +++ b/commands/rescue.toml @@ -0,0 +1,21 @@ +description = "Delegate investigation, an explicit fix request, or follow-up rescue work to the Codex rescue process" +prompt = """ +You are the rescue command handler for the Codex plugin on Antigravity CLI. +Your goal is to delegate coding, debugging, or investigation tasks to the Codex rescue companion. + +Please find the companion script `codex-companion.mjs` under the `codex-plugin-agy/plugins/codex/scripts/` directory in the current workspace. + +First, check if there is a resumable rescue candidate from the current session by running: +`node "" task-resume-candidate --json` + +If the result shows a candidate is available, ask the user if they want to: +1. "Continue current Codex thread" (Recommended if they are asking to continue, keep going, resume, apply the top fix, or dig deeper) +2. "Start a new Codex thread" (Recommended otherwise) + +Then, run the task using the `run_command` tool: +`node "" task {{args}}` +(Add `--resume-last` if they chose to continue the thread, or `--fresh` for a new one). + +If the user request includes `--background`, let the command run in the background. +Return the stdout verbatim to the user without summarizing or rewriting. +""" diff --git a/commands/result.toml b/commands/result.toml new file mode 100644 index 00000000..6149afc7 --- /dev/null +++ b/commands/result.toml @@ -0,0 +1,11 @@ +description = "Fetch and display the final result of a completed background Codex job" +prompt = """ +You are the result command handler for the Codex plugin on Antigravity CLI. +Your goal is to fetch and print the final result of a background job. + +Please find the companion script `codex-companion.mjs` under the `codex-plugin-agy/plugins/codex/scripts/` directory in the current workspace. +Execute the following command using the `run_command` tool: +`node "" result {{args}}` + +Present the output exactly as-is to the user. +""" diff --git a/commands/review.toml b/commands/review.toml new file mode 100644 index 00000000..7ac12d2d --- /dev/null +++ b/commands/review.toml @@ -0,0 +1,11 @@ +description = "Runs a normal Codex review on your current work" +prompt = """ +You are the review command handler for the Codex plugin on Antigravity CLI. +Your goal is to run a normal, read-only Codex review on the current uncommitted changes or branch comparison. + +Please find the companion script `codex-companion.mjs` under the `codex-plugin-agy/plugins/codex/scripts/` directory in the current workspace. +Execute the following command using the `run_command` tool: +`node "" review {{args}}` + +Present the review results exactly as-is to the user. +""" diff --git a/commands/setup.toml b/commands/setup.toml new file mode 100644 index 00000000..901d79b4 --- /dev/null +++ b/commands/setup.toml @@ -0,0 +1,14 @@ +description = "Check whether the local Codex CLI is ready and optionally toggle the stop-time review gate" +prompt = """ +You are the setup command handler for the Codex plugin on Antigravity CLI. +Your goal is to run the setup process. + +Please find the companion script `codex-companion.mjs` under the `codex-plugin-agy/plugins/codex/scripts/` directory in the current workspace. +Execute the following command using the `run_command` tool: +`node "" setup --json {{args}}` + +After executing the command: +1. Parse the JSON output returned by the command. +2. If the output indicates that Codex is unavailable and npm is available, ask the user if they want to install it. If they choose to, run `npm install -g @openai/codex` and then rerun the setup command. +3. Present the setup results beautifully to the user. +""" diff --git a/commands/status.toml b/commands/status.toml new file mode 100644 index 00000000..4044d257 --- /dev/null +++ b/commands/status.toml @@ -0,0 +1,11 @@ +description = "Check progress on ongoing or completed background Codex jobs" +prompt = """ +You are the status command handler for the Codex plugin on Antigravity CLI. +Your goal is to check the progress or status of background jobs. + +Please find the companion script `codex-companion.mjs` under the `codex-plugin-agy/plugins/codex/scripts/` directory in the current workspace. +Execute the following command using the `run_command` tool: +`node "" status {{args}}` + +Present the output exactly as-is to the user. +""" diff --git a/gemini-extension.json b/gemini-extension.json new file mode 100644 index 00000000..a02fa069 --- /dev/null +++ b/gemini-extension.json @@ -0,0 +1,6 @@ +{ + "name": "codex", + "version": "1.0.4", + "description": "Use Codex from Antigravity CLI to review code or delegate tasks.", + "contextFileName": "GEMINI.md" +} diff --git a/package-lock.json b/package-lock.json index 82d04a25..8a565358 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,11 +1,11 @@ { - "name": "@openai/codex-plugin-cc", + "name": "@openai/codex-plugin-agy", "version": "1.0.4", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "@openai/codex-plugin-cc", + "name": "@openai/codex-plugin-agy", "version": "1.0.4", "license": "Apache-2.0", "devDependencies": { diff --git a/package.json b/package.json index 833fd51c..75ff77ca 100644 --- a/package.json +++ b/package.json @@ -1,9 +1,9 @@ { - "name": "@openai/codex-plugin-cc", + "name": "@openai/codex-plugin-agy", "version": "1.0.4", "private": true, "type": "module", - "description": "Use Codex from Claude Code to review code or delegate tasks.", + "description": "Use Codex from Antigravity CLI to review code or delegate tasks.", "license": "Apache-2.0", "engines": { "node": ">=18.18.0" diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 35222fd5..1a0a79d2 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -68,7 +68,7 @@ const DEFAULT_STATUS_WAIT_TIMEOUT_MS = 240000; const DEFAULT_STATUS_POLL_INTERVAL_MS = 2000; const VALID_REASONING_EFFORTS = new Set(["none", "minimal", "low", "medium", "high", "xhigh"]); const MODEL_ALIASES = new Map([["spark", "gpt-5.3-codex-spark"]]); -const STOP_REVIEW_TASK_MARKER = "Run a stop-gate review of the previous Claude turn."; +const STOP_REVIEW_TASK_MARKER = "Run a stop-gate review of the previous Antigravity turn."; function printUsage() { console.log( @@ -289,7 +289,7 @@ function isActiveJobStatus(status) { } function getCurrentClaudeSessionId() { - return process.env[SESSION_ID_ENV] ?? null; + return process.env[SESSION_ID_ENV] ?? process.env.ANTIGRAVITY_TRAJECTORY_ID ?? process.env.GEMINI_TRAJECTORY_ID ?? null; } function filterJobsForCurrentClaudeSession(jobs) { @@ -538,7 +538,7 @@ function buildTaskRunMetadata({ prompt, resumeLast = false }) { if (!resumeLast && String(prompt ?? "").includes(STOP_REVIEW_TASK_MARKER)) { return { title: "Codex Stop Gate Review", - summary: "Stop-gate review of previous Claude turn" + summary: "Stop-gate review of previous Antigravity turn" }; } diff --git a/plugins/codex/scripts/lib/app-server.mjs b/plugins/codex/scripts/lib/app-server.mjs index 127c8376..ae31df1c 100644 --- a/plugins/codex/scripts/lib/app-server.mjs +++ b/plugins/codex/scripts/lib/app-server.mjs @@ -16,7 +16,7 @@ import { parseBrokerEndpoint } from "./broker-endpoint.mjs"; import { ensureBrokerSession, loadBrokerSession } from "./broker-lifecycle.mjs"; import { terminateProcessTree } from "./process.mjs"; -const PLUGIN_MANIFEST_URL = new URL("../../.claude-plugin/plugin.json", import.meta.url); +const PLUGIN_MANIFEST_URL = new URL("../../../../gemini-extension.json", import.meta.url); const PLUGIN_MANIFEST = JSON.parse(fs.readFileSync(PLUGIN_MANIFEST_URL, "utf8")); export const BROKER_ENDPOINT_ENV = "CODEX_COMPANION_APP_SERVER_ENDPOINT"; @@ -25,7 +25,7 @@ export const BROKER_BUSY_RPC_CODE = -32001; /** @type {ClientInfo} */ const DEFAULT_CLIENT_INFO = { title: "Codex Plugin", - name: "Claude Code", + name: "Antigravity CLI", version: PLUGIN_MANIFEST.version ?? "0.0.0" }; diff --git a/plugins/codex/scripts/lib/codex.mjs b/plugins/codex/scripts/lib/codex.mjs index f2fe88bd..91c02f5c 100644 --- a/plugins/codex/scripts/lib/codex.mjs +++ b/plugins/codex/scripts/lib/codex.mjs @@ -39,7 +39,7 @@ import { BROKER_BUSY_RPC_CODE, BROKER_ENDPOINT_ENV, CodexAppServerClient } from import { loadBrokerSession } from "./broker-lifecycle.mjs"; import { binaryAvailable } from "./process.mjs"; -const SERVICE_NAME = "claude_code_codex_plugin"; +const SERVICE_NAME = "antigravity_cli_codex_plugin"; const TASK_THREAD_PREFIX = "Codex Companion Task"; const DEFAULT_CONTINUE_PROMPT = "Continue from the current thread state. Pick the next highest-value step and follow through until the task is resolved."; @@ -815,7 +815,7 @@ export function getSessionRuntimeStatus(env = process.env, cwd = process.cwd()) return { mode: "shared", label: "shared session", - detail: "This Claude session is configured to reuse one shared Codex runtime.", + detail: "This Antigravity session is configured to reuse one shared Codex runtime.", endpoint }; } diff --git a/plugins/codex/scripts/lib/job-control.mjs b/plugins/codex/scripts/lib/job-control.mjs index ad152c15..5cdfa68f 100644 --- a/plugins/codex/scripts/lib/job-control.mjs +++ b/plugins/codex/scripts/lib/job-control.mjs @@ -13,7 +13,8 @@ export function sortJobsNewestFirst(jobs) { } function getCurrentSessionId(options = {}) { - return options.env?.[SESSION_ID_ENV] ?? process.env[SESSION_ID_ENV] ?? null; + const env = options.env ?? process.env; + return env[SESSION_ID_ENV] ?? env.ANTIGRAVITY_TRAJECTORY_ID ?? env.GEMINI_TRAJECTORY_ID ?? null; } function filterJobsForCurrentSession(jobs, options = {}) { diff --git a/plugins/codex/scripts/lib/state.mjs b/plugins/codex/scripts/lib/state.mjs index 2da23498..6fe1777f 100644 --- a/plugins/codex/scripts/lib/state.mjs +++ b/plugins/codex/scripts/lib/state.mjs @@ -6,7 +6,7 @@ import path from "node:path"; import { resolveWorkspaceRoot } from "./workspace.mjs"; const STATE_VERSION = 1; -const PLUGIN_DATA_ENV = "CLAUDE_PLUGIN_DATA"; +const PLUGIN_DATA_ENV = process.env.ANTIGRAVITY_PLUGIN_DATA ? "ANTIGRAVITY_PLUGIN_DATA" : (process.env.GEMINI_PLUGIN_DATA ? "GEMINI_PLUGIN_DATA" : "CLAUDE_PLUGIN_DATA"); const FALLBACK_STATE_ROOT_DIR = path.join(os.tmpdir(), "codex-companion"); const STATE_FILE_NAME = "state.json"; const JOBS_DIR_NAME = "jobs"; diff --git a/plugins/codex/scripts/lib/tracked-jobs.mjs b/plugins/codex/scripts/lib/tracked-jobs.mjs index 90286901..e808c884 100644 --- a/plugins/codex/scripts/lib/tracked-jobs.mjs +++ b/plugins/codex/scripts/lib/tracked-jobs.mjs @@ -59,7 +59,7 @@ export function createJobLogFile(workspaceRoot, jobId, title) { export function createJobRecord(base, options = {}) { const env = options.env ?? process.env; - const sessionId = env[options.sessionIdEnv ?? SESSION_ID_ENV]; + const sessionId = env[options.sessionIdEnv ?? SESSION_ID_ENV] ?? env.ANTIGRAVITY_TRAJECTORY_ID ?? env.GEMINI_TRAJECTORY_ID ?? null; return { ...base, createdAt: nowIso(), diff --git a/plugins/codex/scripts/session-lifecycle-hook.mjs b/plugins/codex/scripts/session-lifecycle-hook.mjs index 9655eaef..cf829b7e 100644 --- a/plugins/codex/scripts/session-lifecycle-hook.mjs +++ b/plugins/codex/scripts/session-lifecycle-hook.mjs @@ -17,7 +17,7 @@ import { loadState, resolveStateFile, saveState } from "./lib/state.mjs"; import { resolveWorkspaceRoot } from "./lib/workspace.mjs"; export const SESSION_ID_ENV = "CODEX_COMPANION_SESSION_ID"; -const PLUGIN_DATA_ENV = "CLAUDE_PLUGIN_DATA"; +const PLUGIN_DATA_ENV = process.env.ANTIGRAVITY_PLUGIN_DATA ? "ANTIGRAVITY_PLUGIN_DATA" : (process.env.GEMINI_PLUGIN_DATA ? "GEMINI_PLUGIN_DATA" : "CLAUDE_PLUGIN_DATA"); function readHookInput() { const raw = fs.readFileSync(0, "utf8").trim(); @@ -32,10 +32,11 @@ function shellEscape(value) { } function appendEnvVar(name, value) { - if (!process.env.CLAUDE_ENV_FILE || value == null || value === "") { + const envFile = process.env.ANTIGRAVITY_ENV_FILE || process.env.GEMINI_ENV_FILE || process.env.CLAUDE_ENV_FILE; + if (!envFile || value == null || value === "") { return; } - fs.appendFileSync(process.env.CLAUDE_ENV_FILE, `export ${name}=${shellEscape(value)}\n`, "utf8"); + fs.appendFileSync(envFile, `export ${name}=${shellEscape(value)}\n`, "utf8"); } function cleanupSessionJobs(cwd, sessionId) { diff --git a/plugins/codex/scripts/stop-review-gate-hook.mjs b/plugins/codex/scripts/stop-review-gate-hook.mjs index 2346bdcf..9877e5a8 100644 --- a/plugins/codex/scripts/stop-review-gate-hook.mjs +++ b/plugins/codex/scripts/stop-review-gate-hook.mjs @@ -16,7 +16,7 @@ import { resolveWorkspaceRoot } from "./lib/workspace.mjs"; const STOP_REVIEW_TIMEOUT_MS = 15 * 60 * 1000; const SCRIPT_DIR = path.dirname(fileURLToPath(import.meta.url)); const ROOT_DIR = path.resolve(SCRIPT_DIR, ".."); -const STOP_REVIEW_TASK_MARKER = "Run a stop-gate review of the previous Claude turn."; +const STOP_REVIEW_TASK_MARKER = "Run a stop-gate review of the previous Antigravity turn."; function readHookInput() { const raw = fs.readFileSync(0, "utf8").trim(); @@ -48,11 +48,11 @@ function filterJobsForCurrentSession(jobs, input = {}) { function buildStopReviewPrompt(input = {}) { const lastAssistantMessage = String(input.last_assistant_message ?? "").trim(); const template = loadPromptTemplate(ROOT_DIR, "stop-review-gate"); - const claudeResponseBlock = lastAssistantMessage - ? ["Previous Claude response:", lastAssistantMessage].join("\n") + const antigravityResponseBlock = lastAssistantMessage + ? ["Previous Antigravity response:", lastAssistantMessage].join("\n") : ""; return interpolateTemplate(template, { - CLAUDE_RESPONSE_BLOCK: claudeResponseBlock + ANTIGRAVITY_RESPONSE_BLOCK: antigravityResponseBlock }); } @@ -141,7 +141,7 @@ function runStopReview(cwd, input = {}) { function main() { const input = readHookInput(); - const cwd = input.cwd || process.env.CLAUDE_PROJECT_DIR || process.cwd(); + const cwd = input.cwd || process.env.ANTIGRAVITY_PROJECT_DIR || process.env.GEMINI_PROJECT_DIR || process.env.CLAUDE_PROJECT_DIR || process.cwd(); const workspaceRoot = resolveWorkspaceRoot(cwd); const config = getConfig(workspaceRoot); diff --git a/tests/tests/broker-endpoint.test.mjs b/tests/tests/broker-endpoint.test.mjs new file mode 100644 index 00000000..b3fc1146 --- /dev/null +++ b/tests/tests/broker-endpoint.test.mjs @@ -0,0 +1,22 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { createBrokerEndpoint, parseBrokerEndpoint } from "../plugins/codex/scripts/lib/broker-endpoint.mjs"; + +test("createBrokerEndpoint uses Unix sockets on non-Windows platforms", () => { + const endpoint = createBrokerEndpoint("/tmp/cxc-12345", "darwin"); + assert.equal(endpoint, "unix:/tmp/cxc-12345/broker.sock"); + assert.deepEqual(parseBrokerEndpoint(endpoint), { + kind: "unix", + path: "/tmp/cxc-12345/broker.sock" + }); +}); + +test("createBrokerEndpoint uses named pipes on Windows", () => { + const endpoint = createBrokerEndpoint("C:\\\\Temp\\\\cxc-12345", "win32"); + assert.equal(endpoint, "pipe:\\\\.\\pipe\\cxc-12345-codex-app-server"); + assert.deepEqual(parseBrokerEndpoint(endpoint), { + kind: "pipe", + path: "\\\\.\\pipe\\cxc-12345-codex-app-server" + }); +}); diff --git a/tests/tests/bump-version.test.mjs b/tests/tests/bump-version.test.mjs new file mode 100644 index 00000000..205b0e9f --- /dev/null +++ b/tests/tests/bump-version.test.mjs @@ -0,0 +1,88 @@ +import fs from "node:fs"; +import path from "node:path"; +import test from "node:test"; +import assert from "node:assert/strict"; +import { fileURLToPath } from "node:url"; + +import { makeTempDir, run } from "./helpers.mjs"; + +const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); +const SCRIPT = path.join(ROOT, "scripts", "bump-version.mjs"); + +function writeJson(filePath, json) { + fs.mkdirSync(path.dirname(filePath), { recursive: true }); + fs.writeFileSync(filePath, `${JSON.stringify(json, null, 2)}\n`); +} + +function readJson(filePath) { + return JSON.parse(fs.readFileSync(filePath, "utf8")); +} + +function makeVersionFixture() { + const root = makeTempDir(); + + writeJson(path.join(root, "package.json"), { + name: "@openai/codex-plugin-cc", + version: "1.0.2" + }); + writeJson(path.join(root, "package-lock.json"), { + name: "@openai/codex-plugin-cc", + version: "1.0.2", + lockfileVersion: 3, + packages: { + "": { + name: "@openai/codex-plugin-cc", + version: "1.0.2" + } + } + }); + writeJson(path.join(root, "plugins", "codex", ".claude-plugin", "plugin.json"), { + name: "codex", + version: "1.0.2" + }); + writeJson(path.join(root, ".claude-plugin", "marketplace.json"), { + metadata: { + version: "1.0.2" + }, + plugins: [ + { + name: "codex", + version: "1.0.2" + } + ] + }); + + return root; +} + +test("bump-version updates every release manifest", () => { + const root = makeVersionFixture(); + + const result = run("node", [SCRIPT, "--root", root, "1.2.3"], { + cwd: ROOT + }); + + assert.equal(result.status, 0, result.stderr); + assert.equal(readJson(path.join(root, "package.json")).version, "1.2.3"); + assert.equal(readJson(path.join(root, "package-lock.json")).version, "1.2.3"); + assert.equal(readJson(path.join(root, "package-lock.json")).packages[""].version, "1.2.3"); + assert.equal(readJson(path.join(root, "plugins", "codex", ".claude-plugin", "plugin.json")).version, "1.2.3"); + assert.equal(readJson(path.join(root, ".claude-plugin", "marketplace.json")).metadata.version, "1.2.3"); + assert.equal(readJson(path.join(root, ".claude-plugin", "marketplace.json")).plugins[0].version, "1.2.3"); +}); + +test("bump-version check mode reports stale metadata", () => { + const root = makeVersionFixture(); + writeJson(path.join(root, "package.json"), { + name: "@openai/codex-plugin-cc", + version: "1.0.3" + }); + + const result = run("node", [SCRIPT, "--root", root, "--check"], { + cwd: ROOT + }); + + assert.notEqual(result.status, 0); + assert.match(result.stderr, /plugins\/codex\/\.claude-plugin\/plugin\.json version/); + assert.match(result.stderr, /\.claude-plugin\/marketplace\.json metadata\.version/); +}); diff --git a/tests/tests/commands.test.mjs b/tests/tests/commands.test.mjs new file mode 100644 index 00000000..3724ffa4 --- /dev/null +++ b/tests/tests/commands.test.mjs @@ -0,0 +1,219 @@ +import fs from "node:fs"; +import path from "node:path"; +import test from "node:test"; +import assert from "node:assert/strict"; +import { fileURLToPath } from "node:url"; + +const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); +const PLUGIN_ROOT = path.join(ROOT, "plugins", "codex"); + +function read(relativePath) { + return fs.readFileSync(path.join(PLUGIN_ROOT, relativePath), "utf8"); +} + +test("review command uses AskUserQuestion and background Bash while staying review-only", () => { + const source = read("commands/review.md"); + assert.match(source, /AskUserQuestion/); + assert.match(source, /\bBash\(/); + assert.match(source, /Do not fix issues/i); + assert.match(source, /review-only/i); + assert.match(source, /return Codex's output verbatim to the user/i); + assert.match(source, /```bash/); + assert.match(source, /```typescript/); + assert.match(source, /review "\$ARGUMENTS"/); + assert.match(source, /\[--scope auto\|working-tree\|branch\]/); + assert.match(source, /run_in_background:\s*true/); + assert.match(source, /command:\s*`node "\$\{CLAUDE_PLUGIN_ROOT\}\/scripts\/codex-companion\.mjs" review "\$ARGUMENTS"`/); + assert.match(source, /description:\s*"Codex review"/); + assert.match(source, /Do not call `BashOutput`/); + assert.match(source, /Return the command stdout verbatim, exactly as-is/i); + assert.match(source, /git status --short --untracked-files=all/); + assert.match(source, /git diff --shortstat/); + assert.match(source, /Treat untracked files or directories as reviewable work/i); + assert.match(source, /Recommend waiting only when the review is clearly tiny, roughly 1-2 files total/i); + assert.match(source, /In every other case, including unclear size, recommend background/i); + assert.match(source, /The companion script parses `--wait` and `--background`/i); + assert.match(source, /Claude Code's `Bash\(..., run_in_background: true\)` is what actually detaches the run/i); + assert.match(source, /When in doubt, run the review/i); + assert.match(source, /\(Recommended\)/); + assert.match(source, /does not support staged-only review, unstaged-only review, or extra focus text/i); +}); + +test("adversarial review command uses AskUserQuestion and background Bash while staying review-only", () => { + const source = read("commands/adversarial-review.md"); + assert.match(source, /AskUserQuestion/); + assert.match(source, /\bBash\(/); + assert.match(source, /Do not fix issues/i); + assert.match(source, /review-only/i); + assert.match(source, /return Codex's output verbatim to the user/i); + assert.match(source, /```bash/); + assert.match(source, /```typescript/); + assert.match(source, /adversarial-review "\$ARGUMENTS"/); + assert.match(source, /\[--scope auto\|working-tree\|branch\] \[focus \.\.\.\]/); + assert.match(source, /run_in_background:\s*true/); + assert.match(source, /command:\s*`node "\$\{CLAUDE_PLUGIN_ROOT\}\/scripts\/codex-companion\.mjs" adversarial-review "\$ARGUMENTS"`/); + assert.match(source, /description:\s*"Codex adversarial review"/); + assert.match(source, /Do not call `BashOutput`/); + assert.match(source, /Return the command stdout verbatim, exactly as-is/i); + assert.match(source, /git status --short --untracked-files=all/); + assert.match(source, /git diff --shortstat/); + assert.match(source, /Treat untracked files or directories as reviewable work/i); + assert.match(source, /Recommend waiting only when the scoped review is clearly tiny, roughly 1-2 files total/i); + assert.match(source, /In every other case, including unclear size, recommend background/i); + assert.match(source, /The companion script parses `--wait` and `--background`/i); + assert.match(source, /Claude Code's `Bash\(..., run_in_background: true\)` is what actually detaches the run/i); + assert.match(source, /When in doubt, run the review/i); + assert.match(source, /\(Recommended\)/); + assert.match(source, /uses the same review target selection as `\/codex:review`/i); + assert.match(source, /supports working-tree review, branch review, and `--base `/i); + assert.match(source, /does not support `--scope staged` or `--scope unstaged`/i); + assert.match(source, /can still take extra focus text after the flags/i); +}); + +test("continue is not exposed as a user-facing command", () => { + const commandFiles = fs.readdirSync(path.join(PLUGIN_ROOT, "commands")).sort(); + assert.deepEqual(commandFiles, [ + "adversarial-review.md", + "cancel.md", + "rescue.md", + "result.md", + "review.md", + "setup.md", + "status.md" + ]); +}); + +test("rescue command absorbs continue semantics", () => { + const rescue = read("commands/rescue.md"); + const agent = read("agents/codex-rescue.md"); + const readme = fs.readFileSync(path.join(ROOT, "README.md"), "utf8"); + const runtimeSkill = read("skills/codex-cli-runtime/SKILL.md"); + + assert.match(rescue, /The final user-visible response must be Codex's output verbatim/i); + assert.match(rescue, /allowed-tools:\s*Bash\(node:\*\),\s*AskUserQuestion,\s*Agent/); + // Regression for #234: `Skill(codex:rescue)` from the main agent recursed + // because rescue.md named the routing with ambiguous prose ("Route this + // request to the `codex:codex-rescue` subagent") while running under + // `context: fork` — forked general-purpose subagents do not expose the + // `Agent` tool, so the fork fell back to `Skill` and re-entered this + // command. Pin the explicit transport and the inline (no-fork) execution. + assert.match(rescue, /subagent_type: "codex:codex-rescue"/); + assert.match(rescue, /do not call `Skill\(codex:codex-rescue\)`/i); + assert.doesNotMatch(rescue, /^context:\s*fork\b/m); + assert.match(rescue, /--background\|--wait/); + assert.match(rescue, /--resume\|--fresh/); + assert.match(rescue, /--model /); + assert.match(rescue, /--effort /); + assert.match(rescue, /task-resume-candidate --json/); + assert.match(rescue, /AskUserQuestion/); + assert.match(rescue, /Continue current Codex thread/); + assert.match(rescue, /Start a new Codex thread/); + assert.match(rescue, /run the `codex:codex-rescue` subagent in the background/i); + assert.match(rescue, /default to foreground/i); + assert.match(rescue, /Do not forward them to `task`/i); + assert.match(rescue, /`--model` and `--effort` are runtime-selection flags/i); + assert.match(rescue, /Leave `--effort` unset unless the user explicitly asks for a specific reasoning effort/i); + assert.match(rescue, /If they ask for `spark`, map it to `gpt-5\.3-codex-spark`/i); + assert.match(rescue, /If the request includes `--resume`, do not ask whether to continue/i); + assert.match(rescue, /If the request includes `--fresh`, do not ask whether to continue/i); + assert.match(rescue, /If the user chooses continue, add `--resume`/i); + assert.match(rescue, /If the user chooses a new thread, add `--fresh`/i); + assert.match(rescue, /thin forwarder only/i); + assert.match(rescue, /Return the Codex companion stdout verbatim to the user/i); + assert.match(rescue, /Do not paraphrase, summarize, rewrite, or add commentary before or after it/i); + assert.match(rescue, /return that command's stdout as-is/i); + assert.match(rescue, /Leave `--resume` and `--fresh` in the forwarded request/i); + assert.match(agent, /--resume/); + assert.match(agent, /--fresh/); + assert.match(agent, /thin forwarding wrapper/i); + assert.match(agent, /prefer foreground for a small, clearly bounded rescue request/i); + assert.match(agent, /If the user did not explicitly choose `--background` or `--wait` and the task looks complicated, open-ended, multi-step, or likely to keep Codex running for a long time, prefer background execution/i); + assert.match(agent, /Use exactly one `Bash` call/i); + assert.match(agent, /Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own/i); + assert.match(agent, /Do not call `review`, `adversarial-review`, `status`, `result`, or `cancel`/i); + assert.match(agent, /Leave `--effort` unset unless the user explicitly requests a specific reasoning effort/i); + assert.match(agent, /Leave model unset by default/i); + assert.match(agent, /If the user asks for `spark`, map that to `--model gpt-5\.3-codex-spark`/i); + assert.match(agent, /If the user asks for a concrete model name such as `gpt-5\.4-mini`, pass it through with `--model`/i); + assert.match(agent, /Return the stdout of the `codex-companion` command exactly as-is/i); + assert.match(agent, /If the Bash call fails or Codex cannot be invoked, return nothing/i); + assert.match(agent, /gpt-5-4-prompting/); + assert.match(agent, /only to tighten the user's request into a better Codex prompt/i); + assert.match(agent, /Do not use that skill to inspect the repository, reason through the problem yourself, draft a solution, or do any independent work/i); + assert.match(runtimeSkill, /only job is to invoke `task` once and return that stdout unchanged/i); + assert.match(runtimeSkill, /Do not call `setup`, `review`, `adversarial-review`, `status`, `result`, or `cancel`/i); + assert.match(runtimeSkill, /use the `gpt-5-4-prompting` skill to rewrite the user's request into a tighter Codex prompt/i); + assert.match(runtimeSkill, /That prompt drafting is the only Claude-side work allowed/i); + assert.match(runtimeSkill, /Leave `--effort` unset unless the user explicitly requests a specific effort/i); + assert.match(runtimeSkill, /Leave model unset by default/i); + assert.match(runtimeSkill, /Map `spark` to `--model gpt-5\.3-codex-spark`/i); + assert.match(runtimeSkill, /If the forwarded request includes `--background` or `--wait`, treat that as Claude-side execution control only/i); + assert.match(runtimeSkill, /Strip it before calling `task`/i); + assert.match(runtimeSkill, /`--effort`: accepted values are `none`, `minimal`, `low`, `medium`, `high`, `xhigh`/i); + assert.match(runtimeSkill, /Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own/i); + assert.match(runtimeSkill, /If the Bash call fails or Codex cannot be invoked, return nothing/i); + assert.match(readme, /`codex:codex-rescue` subagent/i); + assert.match(readme, /if you do not pass `--model` or `--effort`, Codex chooses its own defaults/i); + assert.match(readme, /--model gpt-5\.4-mini --effort medium/i); + assert.match(readme, /`spark`, the plugin maps that to `gpt-5\.3-codex-spark`/i); + assert.match(readme, /continue a previous Codex task/i); + assert.match(readme, /### `\/codex:setup`/); + assert.match(readme, /### `\/codex:review`/); + assert.match(readme, /### `\/codex:adversarial-review`/); + assert.match(readme, /uses the same review target selection as `\/codex:review`/i); + assert.match(readme, /--base main challenge whether this was the right caching and retry design/); + assert.match(readme, /### `\/codex:rescue`/); + assert.match(readme, /### `\/codex:status`/); + assert.match(readme, /### `\/codex:result`/); + assert.match(readme, /### `\/codex:cancel`/); +}); + +test("result and cancel commands are exposed as deterministic runtime entrypoints", () => { + const result = read("commands/result.md"); + const cancel = read("commands/cancel.md"); + const resultHandling = read("skills/codex-result-handling/SKILL.md"); + + assert.match(result, /disable-model-invocation:\s*true/); + assert.match(result, /codex-companion\.mjs" result "\$ARGUMENTS"/); + assert.match(cancel, /disable-model-invocation:\s*true/); + assert.match(cancel, /codex-companion\.mjs" cancel "\$ARGUMENTS"/); + assert.match(resultHandling, /do not turn a failed or incomplete Codex run into a Claude-side implementation attempt/i); + assert.match(resultHandling, /if Codex was never successfully invoked, do not generate a substitute answer at all/i); +}); + +test("internal docs use task terminology for rescue runs", () => { + const runtimeSkill = read("skills/codex-cli-runtime/SKILL.md"); + const promptingSkill = read("skills/gpt-5-4-prompting/SKILL.md"); + const promptRecipes = read("skills/gpt-5-4-prompting/references/codex-prompt-recipes.md"); + + assert.match(runtimeSkill, /codex-companion\.mjs" task ""/); + assert.match(runtimeSkill, /Use `task` for every rescue request/i); + assert.match(runtimeSkill, /task --resume-last/i); + assert.match(promptingSkill, /Use `task` when the task is diagnosis/i); + assert.match(promptRecipes, /Codex task prompts/i); + assert.match(promptRecipes, /Use these as starting templates for Codex task prompts/i); + assert.match(promptRecipes, /## Diagnosis/); + assert.match(promptRecipes, /## Narrow Fix/); +}); + +test("hooks keep session-end cleanup and stop gating enabled", () => { + const source = read("hooks/hooks.json"); + assert.match(source, /SessionStart/); + assert.match(source, /SessionEnd/); + assert.match(source, /stop-review-gate-hook\.mjs/); + assert.match(source, /session-lifecycle-hook\.mjs/); +}); + +test("setup command can offer Codex install and still points users to codex login", () => { + const setup = read("commands/setup.md"); + const readme = fs.readFileSync(path.join(ROOT, "README.md"), "utf8"); + + assert.match(setup, /argument-hint:\s*'\[--enable-review-gate\|--disable-review-gate\]'/); + assert.match(setup, /AskUserQuestion/); + assert.match(setup, /npm install -g @openai\/codex/); + assert.match(setup, /codex-companion\.mjs" setup --json \$ARGUMENTS/); + assert.match(readme, /!codex login/); + assert.match(readme, /offer to install Codex for you/i); + assert.match(readme, /\/codex:setup --enable-review-gate/); + assert.match(readme, /\/codex:setup --disable-review-gate/); +}); diff --git a/tests/tests/fake-codex-fixture.mjs b/tests/tests/fake-codex-fixture.mjs new file mode 100644 index 00000000..71c0ae2a --- /dev/null +++ b/tests/tests/fake-codex-fixture.mjs @@ -0,0 +1,594 @@ +import fs from "node:fs"; +import path from "node:path"; +import process from "node:process"; + +import { writeExecutable } from "./helpers.mjs"; + +export function installFakeCodex(binDir, behavior = "review-ok") { + const statePath = path.join(binDir, "fake-codex-state.json"); + const scriptPath = path.join(binDir, "codex"); + const source = `#!/usr/bin/env node +const fs = require("node:fs"); +const path = require("node:path"); +const readline = require("node:readline"); + + const STATE_PATH = ${JSON.stringify(statePath)}; + const BEHAVIOR = ${JSON.stringify(behavior)}; + const interruptibleTurns = new Map(); + + function loadState() { + if (!fs.existsSync(STATE_PATH)) { + return { nextThreadId: 1, nextTurnId: 1, appServerStarts: 0, threads: [], capabilities: null, lastInterrupt: null }; + } + return JSON.parse(fs.readFileSync(STATE_PATH, "utf8")); + } + +function saveState(state) { + fs.writeFileSync(STATE_PATH, JSON.stringify(state, null, 2)); +} + +function requiresExperimental(field, message, state) { + if (!(field in (message.params || {}))) { + return false; + } + return !state.capabilities || state.capabilities.experimentalApi !== true; +} + +function now() { + return Math.floor(Date.now() / 1000); +} + +function buildThread(thread) { + return { + id: thread.id, + preview: thread.preview || "", + ephemeral: Boolean(thread.ephemeral), + modelProvider: "openai", + createdAt: thread.createdAt, + updatedAt: thread.updatedAt, + status: { type: "idle" }, + path: null, + cwd: thread.cwd, + cliVersion: "fake-codex", + source: "appServer", + agentNickname: null, + agentRole: null, + gitInfo: null, + name: thread.name || null, + turns: [] + }; +} + +function buildTurn(id, status = "inProgress", error = null) { + return { id, status, items: [], error }; +} + +function buildAccountReadResult() { + switch (BEHAVIOR) { + case "logged-out": + case "refreshable-auth": + case "auth-run-fails": + return { account: null, requiresOpenaiAuth: true }; + case "provider-no-auth": + case "env-key-provider": + return { account: null, requiresOpenaiAuth: false }; + case "api-key-account-only": + return { account: { type: "apiKey" }, requiresOpenaiAuth: true }; + default: + return { + account: { type: "chatgpt", email: "test@example.com", planType: "plus" }, + requiresOpenaiAuth: true + }; + } +} + +function buildConfigReadResult() { + switch (BEHAVIOR) { + case "provider-no-auth": + return { + config: { model_provider: "ollama" }, + origins: {} + }; + case "env-key-provider": + return { + config: { + model_provider: "openai-custom", + model_providers: { + "openai-custom": { + name: "OpenAI custom", + env_key: "OPENAI_API_KEY", + requires_openai_auth: false + } + } + }, + origins: {} + }; + default: + return { + config: { model_provider: "openai" }, + origins: {} + }; + } +} + +function send(message) { + process.stdout.write(JSON.stringify(message) + "\\n"); +} + +function nextThread(state, cwd, ephemeral) { + const thread = { + id: "thr_" + state.nextThreadId++, + cwd: cwd || process.cwd(), + name: null, + preview: "", + ephemeral: Boolean(ephemeral), + createdAt: now(), + updatedAt: now() + }; + state.threads.unshift(thread); + saveState(state); + return thread; +} + +function ensureThread(state, threadId) { + const thread = state.threads.find((candidate) => candidate.id === threadId); + if (!thread) { + throw new Error("unknown thread " + threadId); + } + return thread; +} + +function nextTurnId(state) { + const turnId = "turn_" + state.nextTurnId++; + saveState(state); + return turnId; +} + +function emitTurnCompleted(threadId, turnId, item) { + const items = Array.isArray(item) ? item : [item]; + send({ method: "turn/started", params: { threadId, turn: buildTurn(turnId) } }); + for (const entry of items) { + if (entry && entry.started) { + send({ method: "item/started", params: { threadId, turnId, item: entry.started } }); + } + if (entry && entry.completed) { + send({ method: "item/completed", params: { threadId, turnId, item: entry.completed } }); + } + } + send({ method: "turn/completed", params: { threadId, turn: buildTurn(turnId, "completed") } }); +} + +function emitTurnCompletedLater(threadId, turnId, item, delayMs) { + setTimeout(() => { + emitTurnCompleted(threadId, turnId, item); + }, delayMs); +} + +function nativeReviewText(target) { + if (target.type === "baseBranch") { + return "Reviewed changes against " + target.branch + ".\\nNo material issues found."; + } + if (target.type === "custom") { + return "Reviewed custom target.\\nNo material issues found."; + } + return "Reviewed uncommitted changes.\\nNo material issues found."; +} + +function structuredReviewPayload(prompt) { + if (prompt.includes("adversarial software review")) { + if (BEHAVIOR === "adversarial-clean") { + return JSON.stringify({ + verdict: "approve", + summary: "No material issues found.", + findings: [], + next_steps: [] + }); + } + + return JSON.stringify({ + verdict: "needs-attention", + summary: "One adversarial concern surfaced.", + findings: [ + { + severity: "high", + title: "Missing empty-state guard", + body: "The change assumes data is always present.", + file: "src/app.js", + line_start: 4, + line_end: 6, + confidence: 0.87, + recommendation: "Handle empty collections before indexing." + } + ], + next_steps: ["Add an empty-state test."] + }); + } + + if (BEHAVIOR === "invalid-json") { + return "not valid json"; + } + + return JSON.stringify({ + verdict: "approve", + summary: "No material issues found.", + findings: [], + next_steps: [] + }); +} + +function taskPayload(prompt, resume) { + if (prompt.includes("") && (prompt.includes("Only review the work from the previous Claude turn.") || prompt.includes("Only review the work from the previous Antigravity turn."))) { + if (BEHAVIOR === "adversarial-clean") { + return "ALLOW: No blocking issues found in the previous turn."; + } + return "BLOCK: Missing empty-state guard in src/app.js:4-6."; + } + + if (resume || prompt.includes("Continue from the current thread state") || prompt.includes("follow up")) { + return "Resumed the prior run.\\nFollow-up prompt accepted."; + } + + return "Handled the requested task.\\nTask prompt accepted."; +} + +const args = process.argv.slice(2); +if (args[0] === "--version") { + console.log("codex-cli test"); + process.exit(0); +} +if (args[0] === "app-server" && args[1] === "--help") { + console.log("fake app-server help"); + process.exit(0); +} +if (args[0] === "login" && args[1] === "status") { + if (BEHAVIOR === "logged-out" || BEHAVIOR === "refreshable-auth" || BEHAVIOR === "auth-run-fails" || BEHAVIOR === "provider-no-auth" || BEHAVIOR === "env-key-provider" || BEHAVIOR === "api-key-account-only") { + console.error("not authenticated"); + process.exit(1); + } + console.log("logged in"); + process.exit(0); +} +if (args[0] === "login") { + process.exit(0); +} +if (args[0] !== "app-server") { + process.exit(1); +} +const bootState = loadState(); +bootState.appServerStarts = (bootState.appServerStarts || 0) + 1; +saveState(bootState); + +const rl = readline.createInterface({ input: process.stdin }); +rl.on("line", (line) => { + if (!line.trim()) { + return; + } + + const message = JSON.parse(line); + const state = loadState(); + + try { + switch (message.method) { + case "initialize": + state.capabilities = message.params.capabilities || null; + saveState(state); + send({ id: message.id, result: { userAgent: "fake-codex-app-server" } }); + break; + + case "initialized": + break; + + case "account/read": + send({ id: message.id, result: buildAccountReadResult() }); + break; + + case "config/read": + if (BEHAVIOR === "config-read-fails") { + throw new Error("config/read failed for cwd"); + } + send({ id: message.id, result: buildConfigReadResult() }); + break; + + case "thread/start": { + if (BEHAVIOR === "auth-run-fails") { + throw new Error("authentication expired; run codex login"); + } + if (requiresExperimental("persistExtendedHistory", message, state) || requiresExperimental("persistFullHistory", message, state)) { + throw new Error("thread/start.persistFullHistory requires experimentalApi capability"); + } + const thread = nextThread(state, message.params.cwd, message.params.ephemeral); + send({ id: message.id, result: { thread: buildThread(thread), model: message.params.model || "gpt-5.4", modelProvider: "openai", serviceTier: null, cwd: thread.cwd, approvalPolicy: "never", sandbox: { type: "readOnly", access: { type: "fullAccess" }, networkAccess: false }, reasoningEffort: null } }); + send({ method: "thread/started", params: { thread: { id: thread.id } } }); + break; + } + + case "thread/name/set": { + const thread = ensureThread(state, message.params.threadId); + thread.name = message.params.name; + thread.updatedAt = now(); + saveState(state); + send({ id: message.id, result: {} }); + break; + } + + case "thread/list": { + let threads = state.threads.slice(); + if (message.params.cwd) { + threads = threads.filter((thread) => thread.cwd === message.params.cwd); + } + if (message.params.searchTerm) { + threads = threads.filter((thread) => (thread.name || "").includes(message.params.searchTerm)); + } + threads.sort((left, right) => right.updatedAt - left.updatedAt); + send({ id: message.id, result: { data: threads.map(buildThread), nextCursor: null } }); + break; + } + + case "thread/resume": { + if (requiresExperimental("persistExtendedHistory", message, state) || requiresExperimental("persistFullHistory", message, state)) { + throw new Error("thread/resume.persistFullHistory requires experimentalApi capability"); + } + const thread = ensureThread(state, message.params.threadId); + thread.updatedAt = now(); + saveState(state); + send({ id: message.id, result: { thread: buildThread(thread), model: message.params.model || "gpt-5.4", modelProvider: "openai", serviceTier: null, cwd: thread.cwd, approvalPolicy: "never", sandbox: { type: "readOnly", access: { type: "fullAccess" }, networkAccess: false }, reasoningEffort: null } }); + break; + } + + case "review/start": { + const thread = ensureThread(state, message.params.threadId); + let reviewThread = thread; + if (message.params.delivery === "detached") { + reviewThread = nextThread(state, thread.cwd, true); + send({ method: "thread/started", params: { thread: { id: reviewThread.id } } }); + } + const turnId = nextTurnId(state); + send({ id: message.id, result: { turn: buildTurn(turnId), reviewThreadId: reviewThread.id } }); + emitTurnCompleted(reviewThread.id, turnId, [ + { + started: { type: "enteredReviewMode", id: turnId, review: "current changes" } + }, + ...(BEHAVIOR === "with-reasoning" + ? [ + { + completed: { + type: "reasoning", + id: "reasoning_" + turnId, + summary: [{ text: "Reviewed the changed files and checked the likely regression paths." }], + content: [] + } + } + ] + : []), + { + completed: { type: "exitedReviewMode", id: turnId, review: nativeReviewText(message.params.target) } + } + ]); + break; + } + + case "turn/start": { + const thread = ensureThread(state, message.params.threadId); + const prompt = (message.params.input || []) + .filter((item) => item.type === "text") + .map((item) => item.text) + .join("\\n"); + const turnId = nextTurnId(state); + thread.updatedAt = now(); + state.lastTurnStart = { + threadId: message.params.threadId, + turnId, + model: message.params.model ?? null, + effort: message.params.effort ?? null, + prompt + }; + saveState(state); + send({ id: message.id, result: { turn: buildTurn(turnId) } }); + + const payload = message.params.outputSchema && message.params.outputSchema.properties && message.params.outputSchema.properties.verdict + ? structuredReviewPayload(prompt) + : taskPayload(prompt, thread.name && thread.name.startsWith("Codex Companion Task") && prompt.includes("Continue from the current thread state")); + + if ( + BEHAVIOR === "with-subagent" || + BEHAVIOR === "with-late-subagent-message" || + BEHAVIOR === "with-subagent-no-main-turn-completed" + ) { + const subThread = nextThread(state, thread.cwd, true); + const subThreadRecord = ensureThread(state, subThread.id); + subThreadRecord.name = "design-challenger"; + saveState(state); + const subTurnId = nextTurnId(state); + + send({ method: "thread/started", params: { thread: { ...buildThread(subThreadRecord), name: "design-challenger", agentNickname: "design-challenger" } } }); + send({ method: "turn/started", params: { threadId: thread.id, turn: buildTurn(turnId) } }); + send({ + method: "item/started", + params: { + threadId: thread.id, + turnId, + item: { + type: "collabAgentToolCall", + id: "collab_" + turnId, + tool: "wait", + status: "inProgress", + senderThreadId: thread.id, + receiverThreadIds: [subThread.id], + prompt: "Challenge the implementation approach", + model: null, + reasoningEffort: null, + agentsStates: { + [subThread.id]: { status: "inProgress", message: "Investigating design tradeoffs" } + } + } + } + }); + if (BEHAVIOR === "with-late-subagent-message") { + send({ + method: "item/completed", + params: { + threadId: thread.id, + turnId, + item: { type: "agentMessage", id: "msg_" + turnId, text: payload, phase: "final_answer" } + } + }); + } + send({ method: "turn/started", params: { threadId: subThread.id, turn: buildTurn(subTurnId) } }); + send({ + method: "item/completed", + params: { + threadId: subThread.id, + turnId: subTurnId, + item: { + type: "reasoning", + id: "reasoning_" + subTurnId, + summary: [{ text: "Questioned the retry strategy and the cache invalidation boundaries." }], + content: [] + } + } + }); + send({ + method: "item/completed", + params: { + threadId: subThread.id, + turnId: subTurnId, + item: { + type: "agentMessage", + id: "msg_" + subTurnId, + text: "The design assumes retries are harmless, but they can duplicate side effects without stronger idempotency guarantees.", + phase: "analysis" + } + } + }); + send({ method: "turn/completed", params: { threadId: subThread.id, turn: buildTurn(subTurnId, "completed") } }); + send({ + method: "item/completed", + params: { + threadId: thread.id, + turnId, + item: { + type: "collabAgentToolCall", + id: "collab_" + turnId, + tool: "wait", + status: "completed", + senderThreadId: thread.id, + receiverThreadIds: [subThread.id], + prompt: "Challenge the implementation approach", + model: null, + reasoningEffort: null, + agentsStates: { + [subThread.id]: { status: "completed", message: "Finished" } + } + } + } + }); + if (BEHAVIOR !== "with-late-subagent-message") { + send({ + method: "item/completed", + params: { + threadId: thread.id, + turnId, + item: { type: "agentMessage", id: "msg_" + turnId, text: payload, phase: "final_answer" } + } + }); + } + if (BEHAVIOR !== "with-subagent-no-main-turn-completed") { + send({ method: "turn/completed", params: { threadId: thread.id, turn: buildTurn(turnId, "completed") } }); + } + break; + } + + const items = [ + ...(BEHAVIOR === "with-reasoning" + ? [ + { + completed: { + type: "reasoning", + id: "reasoning_" + turnId, + summary: [{ text: "Inspected the prompt, gathered evidence, and checked the highest-risk paths first." }], + content: [] + } + } + ] + : []), + { + completed: { type: "agentMessage", id: "msg_" + turnId, text: payload, phase: "final_answer" } + } + ]; + + if (BEHAVIOR === "interruptible-slow-task") { + send({ method: "turn/started", params: { threadId: thread.id, turn: buildTurn(turnId) } }); + const timer = setTimeout(() => { + if (!interruptibleTurns.has(turnId)) { + return; + } + interruptibleTurns.delete(turnId); + for (const entry of items) { + if (entry && entry.completed) { + send({ method: "item/completed", params: { threadId: thread.id, turnId, item: entry.completed } }); + } + } + send({ method: "turn/completed", params: { threadId: thread.id, turn: buildTurn(turnId, "completed") } }); + }, 5000); + interruptibleTurns.set(turnId, { threadId: thread.id, timer }); + } else if (BEHAVIOR === "slow-task") { + emitTurnCompletedLater(thread.id, turnId, items, 400); + } else { + emitTurnCompleted(thread.id, turnId, items); + } + break; + } + + case "turn/interrupt": { + state.lastInterrupt = { + threadId: message.params.threadId, + turnId: message.params.turnId + }; + saveState(state); + const pending = interruptibleTurns.get(message.params.turnId); + if (pending) { + clearTimeout(pending.timer); + interruptibleTurns.delete(message.params.turnId); + send({ + method: "turn/completed", + params: { + threadId: pending.threadId, + turn: buildTurn(message.params.turnId, "interrupted") + } + }); + } + send({ id: message.id, result: {} }); + break; + } + + default: + send({ id: message.id, error: { code: -32601, message: "Unsupported method: " + message.method } }); + break; + } + } catch (error) { + send({ id: message.id, error: { code: -32000, message: error.message } }); + } +}); +`; + writeExecutable(scriptPath, source); + + // On Windows, npm global binaries are invoked via .cmd wrappers. + // Create a codex.cmd so the fake binary is discoverable by spawn with shell: true. + if (process.platform === "win32") { + const cmdWrapper = `@echo off\r\nnode "%~dp0codex" %*\r\n`; + fs.writeFileSync(path.join(binDir, "codex.cmd"), cmdWrapper, { encoding: "utf8" }); + } +} + +export function buildEnv(binDir) { + const sep = process.platform === "win32" ? ";" : ":"; + const cleanEnv = { ...process.env }; + delete cleanEnv.ANTIGRAVITY_TRAJECTORY_ID; + delete cleanEnv.GEMINI_TRAJECTORY_ID; + delete cleanEnv.CLAUDE_SESSION_ID; + delete cleanEnv.CODEX_COMPANION_SESSION_ID; + return { + ...cleanEnv, + PATH: `${binDir}${sep}${process.env.PATH}` + }; +} diff --git a/tests/tests/git.test.mjs b/tests/tests/git.test.mjs new file mode 100644 index 00000000..14ff2576 --- /dev/null +++ b/tests/tests/git.test.mjs @@ -0,0 +1,183 @@ +import fs from "node:fs"; +import path from "node:path"; +import test from "node:test"; +import assert from "node:assert/strict"; + +import { collectReviewContext, resolveReviewTarget } from "../plugins/codex/scripts/lib/git.mjs"; +import { initGitRepo, makeTempDir, run } from "./helpers.mjs"; + +test("resolveReviewTarget prefers working tree when repo is dirty", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.writeFileSync(path.join(cwd, "app.js"), "console.log('v1');\n"); + run("git", ["add", "app.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "app.js"), "console.log('v2');\n"); + + const target = resolveReviewTarget(cwd, {}); + + assert.equal(target.mode, "working-tree"); +}); + +test("resolveReviewTarget falls back to branch diff when repo is clean", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.writeFileSync(path.join(cwd, "app.js"), "console.log('v1');\n"); + run("git", ["add", "app.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + run("git", ["checkout", "-b", "feature/test"], { cwd }); + fs.writeFileSync(path.join(cwd, "app.js"), "console.log('v2');\n"); + run("git", ["add", "app.js"], { cwd }); + run("git", ["commit", "-m", "change"], { cwd }); + + const target = resolveReviewTarget(cwd, {}); + const context = collectReviewContext(cwd, target); + + assert.equal(target.mode, "branch"); + assert.match(target.label, /main/); + assert.match(context.content, /Branch Diff/); +}); + +test("resolveReviewTarget honors explicit base overrides", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.writeFileSync(path.join(cwd, "app.js"), "console.log('v1');\n"); + run("git", ["add", "app.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + run("git", ["checkout", "-b", "feature/test"], { cwd }); + fs.writeFileSync(path.join(cwd, "app.js"), "console.log('v2');\n"); + run("git", ["add", "app.js"], { cwd }); + run("git", ["commit", "-m", "change"], { cwd }); + + const target = resolveReviewTarget(cwd, { base: "main" }); + + assert.equal(target.mode, "branch"); + assert.equal(target.baseRef, "main"); +}); + +test("resolveReviewTarget requires an explicit base when no default branch can be inferred", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.writeFileSync(path.join(cwd, "app.js"), "console.log('v1');\n"); + run("git", ["add", "app.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + run("git", ["branch", "-m", "feature-only"], { cwd }); + + assert.throws( + () => resolveReviewTarget(cwd, {}), + /Unable to detect the repository default branch\. Pass --base or use --scope working-tree\./ + ); +}); + +test("collectReviewContext keeps inline diffs for tiny adversarial reviews", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.writeFileSync(path.join(cwd, "app.js"), "console.log('v1');\n"); + run("git", ["add", "app.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "app.js"), "console.log('INLINE_MARKER');\n"); + + const target = resolveReviewTarget(cwd, {}); + const context = collectReviewContext(cwd, target); + + assert.equal(context.inputMode, "inline-diff"); + assert.equal(context.fileCount, 1); + assert.match(context.collectionGuidance, /primary evidence/i); + assert.match(context.content, /INLINE_MARKER/); +}); + +test("collectReviewContext skips untracked directories in working tree review", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.writeFileSync(path.join(cwd, "app.js"), "console.log('v1');\n"); + run("git", ["add", "app.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + + const nestedRepoDir = path.join(cwd, ".claude", "worktrees", "agent-test"); + fs.mkdirSync(nestedRepoDir, { recursive: true }); + initGitRepo(nestedRepoDir); + + const target = resolveReviewTarget(cwd, { scope: "working-tree" }); + const context = collectReviewContext(cwd, target); + + assert.match(context.content, /### \.claude\/worktrees\/agent-test\/\n\(skipped: directory\)/); +}); + +test("collectReviewContext skips broken untracked symlinks instead of crashing", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.writeFileSync(path.join(cwd, "app.js"), "console.log('v1');\n"); + run("git", ["add", "app.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.symlinkSync("missing-target", path.join(cwd, "broken-link")); + + const target = resolveReviewTarget(cwd, {}); + const context = collectReviewContext(cwd, target); + + assert.equal(target.mode, "working-tree"); + assert.match(context.content, /### broken-link/); + assert.match(context.content, /skipped: broken symlink or unreadable file/i); +}); + +test("collectReviewContext falls back to lightweight context for larger adversarial reviews", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + for (const name of ["a.js", "b.js", "c.js"]) { + fs.writeFileSync(path.join(cwd, name), `export const value = "${name}-v1";\n`); + } + run("git", ["add", "a.js", "b.js", "c.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "a.js"), 'export const value = "SELF_COLLECT_MARKER_A";\n'); + fs.writeFileSync(path.join(cwd, "b.js"), 'export const value = "SELF_COLLECT_MARKER_B";\n'); + fs.writeFileSync(path.join(cwd, "c.js"), 'export const value = "SELF_COLLECT_MARKER_C";\n'); + + const target = resolveReviewTarget(cwd, {}); + const context = collectReviewContext(cwd, target); + + assert.equal(context.inputMode, "self-collect"); + assert.equal(context.fileCount, 3); + assert.match(context.collectionGuidance, /lightweight summary/i); + assert.match(context.collectionGuidance, /read-only git commands/i); + assert.doesNotMatch(context.content, /SELF_COLLECT_MARKER_[ABC]/); + assert.match(context.content, /## Changed Files/); +}); + +test("collectReviewContext falls back to lightweight context for oversized single-file diffs", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.writeFileSync(path.join(cwd, "app.js"), "export const value = 'v1';\n"); + run("git", ["add", "app.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "app.js"), `export const value = '${"x".repeat(512)}';\n`); + + const target = resolveReviewTarget(cwd, {}); + const context = collectReviewContext(cwd, target, { maxInlineDiffBytes: 128 }); + + assert.equal(context.fileCount, 1); + assert.equal(context.inputMode, "self-collect"); + assert.ok(context.diffBytes > 128); + assert.doesNotMatch(context.content, /xxx/); + assert.match(context.content, /## Changed Files/); +}); + +test("collectReviewContext keeps untracked file content in lightweight working tree context", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + for (const name of ["a.js", "b.js"]) { + fs.writeFileSync(path.join(cwd, name), `export const value = "${name}-v1";\n`); + } + run("git", ["add", "a.js", "b.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "a.js"), 'export const value = "TRACKED_MARKER_A";\n'); + fs.writeFileSync(path.join(cwd, "b.js"), 'export const value = "TRACKED_MARKER_B";\n'); + fs.writeFileSync(path.join(cwd, "new-risk.js"), 'export const value = "UNTRACKED_RISK_MARKER";\n'); + + const target = resolveReviewTarget(cwd, {}); + const context = collectReviewContext(cwd, target); + + assert.equal(context.inputMode, "self-collect"); + assert.equal(context.fileCount, 3); + assert.doesNotMatch(context.content, /TRACKED_MARKER_[AB]/); + assert.match(context.content, /## Untracked Files/); + assert.match(context.content, /UNTRACKED_RISK_MARKER/); +}); diff --git a/tests/tests/helpers.mjs b/tests/tests/helpers.mjs new file mode 100644 index 00000000..945ae0e7 --- /dev/null +++ b/tests/tests/helpers.mjs @@ -0,0 +1,32 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import process from "node:process"; +import { spawnSync } from "node:child_process"; + +export function makeTempDir(prefix = "codex-plugin-test-") { + return fs.mkdtempSync(path.join(os.tmpdir(), prefix)); +} + +export function writeExecutable(filePath, source) { + fs.writeFileSync(filePath, source, { encoding: "utf8", mode: 0o755 }); +} + +export function run(command, args, options = {}) { + return spawnSync(command, args, { + cwd: options.cwd, + env: options.env, + encoding: "utf8", + input: options.input, + shell: process.platform === "win32" && !path.isAbsolute(command), + windowsHide: true + }); +} + +export function initGitRepo(cwd) { + run("git", ["init", "-b", "main"], { cwd }); + run("git", ["config", "user.name", "Codex Plugin Tests"], { cwd }); + run("git", ["config", "user.email", "tests@example.com"], { cwd }); + run("git", ["config", "commit.gpgsign", "false"], { cwd }); + run("git", ["config", "tag.gpgsign", "false"], { cwd }); +} diff --git a/tests/tests/process.test.mjs b/tests/tests/process.test.mjs new file mode 100644 index 00000000..80e0715b --- /dev/null +++ b/tests/tests/process.test.mjs @@ -0,0 +1,55 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { terminateProcessTree } from "../plugins/codex/scripts/lib/process.mjs"; + +test("terminateProcessTree uses taskkill on Windows", () => { + let captured = null; + const outcome = terminateProcessTree(1234, { + platform: "win32", + runCommandImpl(command, args) { + captured = { command, args }; + return { + command, + args, + status: 0, + signal: null, + stdout: "", + stderr: "", + error: null + }; + }, + killImpl() { + throw new Error("kill fallback should not run"); + } + }); + + assert.deepEqual(captured, { + command: "taskkill", + args: ["/PID", "1234", "/T", "/F"] + }); + assert.equal(outcome.delivered, true); + assert.equal(outcome.method, "taskkill"); +}); + +test("terminateProcessTree treats missing Windows processes as already stopped", () => { + const outcome = terminateProcessTree(1234, { + platform: "win32", + runCommandImpl(command, args) { + return { + command, + args, + status: 128, + signal: null, + stdout: "ERROR: The process \"1234\" not found.", + stderr: "", + error: null + }; + } + }); + + assert.equal(outcome.attempted, true); + assert.equal(outcome.method, "taskkill"); + assert.equal(outcome.result.status, 128); + assert.match(outcome.result.stdout, /not found/i); +}); diff --git a/tests/tests/render.test.mjs b/tests/tests/render.test.mjs new file mode 100644 index 00000000..ab68038e --- /dev/null +++ b/tests/tests/render.test.mjs @@ -0,0 +1,59 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { renderReviewResult, renderStoredJobResult } from "../plugins/codex/scripts/lib/render.mjs"; + +test("renderReviewResult degrades gracefully when JSON is missing required review fields", () => { + const output = renderReviewResult( + { + parsed: { + verdict: "approve", + summary: "Looks fine." + }, + rawOutput: JSON.stringify({ + verdict: "approve", + summary: "Looks fine." + }), + parseError: null + }, + { + reviewLabel: "Adversarial Review", + targetLabel: "working tree diff" + } + ); + + assert.match(output, /Codex returned JSON with an unexpected review shape\./); + assert.match(output, /Missing array `findings`\./); + assert.match(output, /Raw final message:/); +}); + +test("renderStoredJobResult prefers rendered output for structured review jobs", () => { + const output = renderStoredJobResult( + { + id: "review-123", + status: "completed", + title: "Codex Adversarial Review", + jobClass: "review", + threadId: "thr_123" + }, + { + threadId: "thr_123", + rendered: "# Codex Adversarial Review\n\nTarget: working tree diff\nVerdict: needs-attention\n", + result: { + result: { + verdict: "needs-attention", + summary: "One issue.", + findings: [], + next_steps: [] + }, + rawOutput: + '{"verdict":"needs-attention","summary":"One issue.","findings":[],"next_steps":[]}' + } + } + ); + + assert.match(output, /^# Codex Adversarial Review/); + assert.doesNotMatch(output, /^\{/); + assert.match(output, /Codex session ID: thr_123/); + assert.match(output, /Resume in Codex: codex resume thr_123/); +}); diff --git a/tests/tests/runtime.test.mjs b/tests/tests/runtime.test.mjs new file mode 100644 index 00000000..5e547825 --- /dev/null +++ b/tests/tests/runtime.test.mjs @@ -0,0 +1,2139 @@ +import fs from "node:fs"; +import path from "node:path"; +import test from "node:test"; +import assert from "node:assert/strict"; + +// Isolate tests from parent terminal environments +delete process.env.ANTIGRAVITY_TRAJECTORY_ID; +delete process.env.GEMINI_TRAJECTORY_ID; +delete process.env.CLAUDE_SESSION_ID; +delete process.env.CODEX_COMPANION_SESSION_ID; +delete process.env.ANTIGRAVITY_ENV_FILE; +delete process.env.GEMINI_ENV_FILE; +delete process.env.CLAUDE_ENV_FILE; +delete process.env.ANTIGRAVITY_PROJECT_DIR; +delete process.env.GEMINI_PROJECT_DIR; +delete process.env.CLAUDE_PROJECT_DIR; +delete process.env.ANTIGRAVITY_PLUGIN_DATA; +delete process.env.GEMINI_PLUGIN_DATA; +delete process.env.CLAUDE_PLUGIN_DATA; + +import { spawn } from "node:child_process"; +import { fileURLToPath } from "node:url"; + +import { buildEnv, installFakeCodex } from "./fake-codex-fixture.mjs"; +import { initGitRepo, makeTempDir, run } from "./helpers.mjs"; +import { loadBrokerSession, saveBrokerSession } from "../plugins/codex/scripts/lib/broker-lifecycle.mjs"; +import { resolveStateDir } from "../plugins/codex/scripts/lib/state.mjs"; + +const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); +const PLUGIN_ROOT = path.join(ROOT, "plugins", "codex"); +const SCRIPT = path.join(PLUGIN_ROOT, "scripts", "codex-companion.mjs"); +const STOP_HOOK = path.join(PLUGIN_ROOT, "scripts", "stop-review-gate-hook.mjs"); +const SESSION_HOOK = path.join(PLUGIN_ROOT, "scripts", "session-lifecycle-hook.mjs"); + +async function waitFor(predicate, { timeoutMs = 5000, intervalMs = 50 } = {}) { + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + const value = await predicate(); + if (value) { + return value; + } + await new Promise((resolve) => setTimeout(resolve, intervalMs)); + } + throw new Error("Timed out waiting for condition."); +} + +test("setup reports ready when fake codex is installed and authenticated", () => { + const binDir = makeTempDir(); + installFakeCodex(binDir); + + const result = run("node", [SCRIPT, "setup", "--json"], { + cwd: ROOT, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0); + const payload = JSON.parse(result.stdout); + assert.equal(payload.ready, true); + assert.match(payload.codex.detail, /advanced runtime available/); + assert.equal(payload.sessionRuntime.mode, "direct"); +}); + +test("setup is ready without npm when Codex is already installed and authenticated", () => { + const binDir = makeTempDir(); + installFakeCodex(binDir); + fs.symlinkSync(process.execPath, path.join(binDir, "node")); + + const result = run("node", [SCRIPT, "setup", "--json"], { + cwd: ROOT, + env: { + ...process.env, + PATH: binDir + } + }); + + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.ready, true); + assert.equal(payload.npm.available, false); + assert.equal(payload.codex.available, true); + assert.equal(payload.auth.loggedIn, true); +}); + +test("setup trusts app-server API key auth even when login status alone would fail", () => { + const binDir = makeTempDir(); + installFakeCodex(binDir, "api-key-account-only"); + + const result = run("node", [SCRIPT, "setup", "--json"], { + cwd: ROOT, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.ready, true); + assert.equal(payload.auth.loggedIn, true); + assert.equal(payload.auth.authMethod, "apiKey"); + assert.equal(payload.auth.source, "app-server"); + assert.match(payload.auth.detail, /API key configured \(unverified\)/); +}); + +test("setup is ready when the active provider does not require OpenAI login", () => { + const binDir = makeTempDir(); + installFakeCodex(binDir, "provider-no-auth"); + + const result = run("node", [SCRIPT, "setup", "--json"], { + cwd: ROOT, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.ready, true); + assert.equal(payload.auth.loggedIn, true); + assert.equal(payload.auth.authMethod, null); + assert.equal(payload.auth.source, "app-server"); + assert.match(payload.auth.detail, /configured and does not require OpenAI authentication/i); +}); + +test("setup treats custom providers with app-server-ready config as ready", () => { + const binDir = makeTempDir(); + installFakeCodex(binDir, "env-key-provider"); + + const result = run("node", [SCRIPT, "setup", "--json"], { + cwd: ROOT, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.ready, true); + assert.equal(payload.auth.loggedIn, true); + assert.equal(payload.auth.authMethod, null); + assert.equal(payload.auth.source, "app-server"); + assert.match(payload.auth.detail, /configured and does not require OpenAI authentication/i); +}); + +test("setup reports not ready when app-server config read fails", () => { + const binDir = makeTempDir(); + installFakeCodex(binDir, "config-read-fails"); + + const result = run("node", [SCRIPT, "setup", "--json"], { + cwd: ROOT, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.ready, false); + assert.equal(payload.auth.loggedIn, false); + assert.equal(payload.auth.source, "app-server"); + assert.match(payload.auth.detail, /config\/read failed for cwd/); +}); + +test("review renders a no-findings result from app-server review/start", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.mkdirSync(path.join(repo, "src")); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = 1;\n"); + run("git", ["add", "src/app.js"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = 2;\n"); + + const result = run("node", [SCRIPT, "review"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0); + assert.match(result.stdout, /Reviewed uncommitted changes/); + assert.match(result.stdout, /No material issues found/); +}); + +test("task runs when the active provider does not require OpenAI login", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "provider-no-auth"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "check auth preflight"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + assert.match(result.stdout, /Handled the requested task/); +}); + +test("task runs without auth preflight so Codex can refresh an expired session", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "refreshable-auth"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "check refreshable auth"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + assert.match(result.stdout, /Handled the requested task/); +}); + +test("task reports the actual Codex auth error when the run is rejected", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "auth-run-fails"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "check failed auth"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.notEqual(result.status, 0); + assert.match(result.stderr, /authentication expired; run codex login/); +}); + +test("review accepts the quoted raw argument style for built-in base-branch review", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.mkdirSync(path.join(repo, "src")); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = 1;\n"); + run("git", ["add", "src/app.js"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = 2;\n"); + + const result = run("node", [SCRIPT, "review", "--base main"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0); + assert.match(result.stdout, /Reviewed changes against main/); + assert.match(result.stdout, /No material issues found/); +}); + +test("adversarial review renders structured findings over app-server turn/start", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.mkdirSync(path.join(repo, "src")); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0];\n"); + run("git", ["add", "src/app.js"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0].id;\n"); + + const result = run("node", [SCRIPT, "adversarial-review"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0); + assert.match(result.stdout, /Missing empty-state guard/); +}); + +test("adversarial review accepts the same base-branch targeting as review", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.mkdirSync(path.join(repo, "src")); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0];\n"); + run("git", ["add", "src/app.js"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0].id;\n"); + + const result = run("node", [SCRIPT, "adversarial-review", "--base", "main"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + assert.match(result.stdout, /Branch review against main|against main/i); + assert.match(result.stdout, /Missing empty-state guard/); +}); + +test("adversarial review asks Codex to inspect larger diffs itself", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.mkdirSync(path.join(repo, "src")); + for (const name of ["a.js", "b.js", "c.js"]) { + fs.writeFileSync(path.join(repo, "src", name), `export const value = "${name}-v1";\n`); + } + run("git", ["add", "src/a.js", "src/b.js", "src/c.js"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "src", "a.js"), 'export const value = "PROMPT_SELF_COLLECT_A";\n'); + fs.writeFileSync(path.join(repo, "src", "b.js"), 'export const value = "PROMPT_SELF_COLLECT_B";\n'); + fs.writeFileSync(path.join(repo, "src", "c.js"), 'export const value = "PROMPT_SELF_COLLECT_C";\n'); + + const result = run("node", [SCRIPT, "adversarial-review"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8")); + assert.match(state.lastTurnStart.prompt, /lightweight summary/i); + assert.match(state.lastTurnStart.prompt, /read-only git commands/i); + assert.doesNotMatch(state.lastTurnStart.prompt, /PROMPT_SELF_COLLECT_[ABC]/); +}); + +test("review includes reasoning output when the app server returns it", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "with-reasoning"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "README.md"), "hello again\n"); + + const result = run("node", [SCRIPT, "review"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + assert.match(result.stdout, /Reasoning:/); + assert.match(result.stdout, /Reviewed the changed files and checked the likely regression paths first|Reviewed the changed files and checked the likely regression paths/i); +}); + +test("review logs reasoning summaries and review output to the job log", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "with-reasoning"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "README.md"), "hello again\n"); + + const result = run("node", [SCRIPT, "review"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const stateDir = resolveStateDir(repo); + const state = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")); + const log = fs.readFileSync(state.jobs[0].logFile, "utf8"); + assert.match(log, /Reasoning summary/); + assert.match(log, /Reviewed the changed files and checked the likely regression paths/); + assert.match(log, /Review output/); + assert.match(log, /Reviewed uncommitted changes\./); +}); + +test("task --resume-last resumes the latest persisted task thread", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const firstRun = run("node", [SCRIPT, "task", "initial task"], { + cwd: repo, + env: buildEnv(binDir) + }); + assert.equal(firstRun.status, 0, firstRun.stderr); + + const result = run("node", [SCRIPT, "task", "--resume-last", "follow up"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + assert.equal(result.stdout, "Resumed the prior run.\nFollow-up prompt accepted.\n"); +}); + +test("task-resume-candidate returns the latest rescue thread from the current session", () => { + const workspace = makeTempDir(); + const stateDir = resolveStateDir(workspace); + const jobsDir = path.join(stateDir, "jobs"); + fs.mkdirSync(jobsDir, { recursive: true }); + + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify( + { + version: 1, + config: { stopReviewGate: false }, + jobs: [ + { + id: "task-current", + status: "completed", + title: "Codex Task", + jobClass: "task", + sessionId: "sess-current", + threadId: "thr_current", + summary: "Investigate the flaky test", + updatedAt: "2026-03-24T20:00:00.000Z" + }, + { + id: "task-other-session", + status: "completed", + title: "Codex Task", + jobClass: "task", + sessionId: "sess-other", + threadId: "thr_other", + summary: "Old rescue run", + updatedAt: "2026-03-24T20:05:00.000Z" + }, + { + id: "review-current", + status: "completed", + title: "Codex Review", + jobClass: "review", + sessionId: "sess-current", + threadId: "thr_review", + summary: "Review main...HEAD", + updatedAt: "2026-03-24T20:10:00.000Z" + } + ] + }, + null, + 2 + )}\n`, + "utf8" + ); + + const result = run("node", [SCRIPT, "task-resume-candidate", "--json"], { + cwd: workspace, + env: { + ...process.env, + CODEX_COMPANION_SESSION_ID: "sess-current" + } + }); + + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.available, true); + assert.equal(payload.sessionId, "sess-current"); + assert.equal(payload.candidate.id, "task-current"); + assert.equal(payload.candidate.threadId, "thr_current"); +}); + +test("task --resume-last does not resume a task from another Claude session", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const statePath = path.join(binDir, "fake-codex-state.json"); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const otherEnv = { + ...buildEnv(binDir), + CODEX_COMPANION_SESSION_ID: "sess-other" + }; + const currentEnv = { + ...buildEnv(binDir), + CODEX_COMPANION_SESSION_ID: "sess-current" + }; + + const firstRun = run("node", [SCRIPT, "task", "initial task"], { + cwd: repo, + env: otherEnv + }); + assert.equal(firstRun.status, 0, firstRun.stderr); + + const candidate = run("node", [SCRIPT, "task-resume-candidate", "--json"], { + cwd: repo, + env: currentEnv + }); + assert.equal(candidate.status, 0, candidate.stderr); + assert.equal(JSON.parse(candidate.stdout).available, false); + + const resume = run("node", [SCRIPT, "task", "--resume-last", "follow up"], { + cwd: repo, + env: currentEnv + }); + assert.equal(resume.status, 1); + assert.match(resume.stderr, /No previous Codex task thread was found for this repository\./); + + const fakeState = JSON.parse(fs.readFileSync(statePath, "utf8")); + assert.equal(fakeState.lastTurnStart.threadId, "thr_1"); + assert.equal(fakeState.lastTurnStart.prompt, "initial task"); +}); + +test("task --resume-last ignores running tasks from other Claude sessions", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const stateDir = resolveStateDir(repo); + fs.mkdirSync(path.join(stateDir, "jobs"), { recursive: true }); + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify( + { + version: 1, + config: { stopReviewGate: false }, + jobs: [ + { + id: "task-other-running", + status: "running", + title: "Codex Task", + jobClass: "task", + sessionId: "sess-other", + threadId: "thr_other", + summary: "Other session active task", + updatedAt: "2026-03-24T20:05:00.000Z" + } + ] + }, + null, + 2 + )}\n`, + "utf8" + ); + + const env = { + ...buildEnv(binDir), + CODEX_COMPANION_SESSION_ID: "sess-current" + }; + const status = run("node", [SCRIPT, "status", "--json"], { + cwd: repo, + env + }); + assert.equal(status.status, 0, status.stderr); + assert.deepEqual(JSON.parse(status.stdout).running, []); + + const resume = run("node", [SCRIPT, "task", "--resume-last", "follow up"], { + cwd: repo, + env + }); + assert.equal(resume.status, 1); + assert.match(resume.stderr, /No previous Codex task thread was found for this repository\./); +}); + +test("session start hook exports the Claude session id and plugin data dir for later commands", () => { + const repo = makeTempDir(); + const envFile = path.join(makeTempDir(), "claude-env.sh"); + fs.writeFileSync(envFile, "", "utf8"); + const pluginDataDir = makeTempDir(); + + const result = run("node", [SESSION_HOOK, "SessionStart"], { + cwd: repo, + env: { + ...process.env, + CLAUDE_ENV_FILE: envFile, + CLAUDE_PLUGIN_DATA: pluginDataDir + }, + input: JSON.stringify({ + hook_event_name: "SessionStart", + session_id: "sess-current", + cwd: repo + }) + }); + + assert.equal(result.status, 0, result.stderr); + assert.equal( + fs.readFileSync(envFile, "utf8"), + `export CODEX_COMPANION_SESSION_ID='sess-current'\nexport CLAUDE_PLUGIN_DATA='${pluginDataDir}'\n` + ); +}); + +test("write task output focuses on the Codex result without generic follow-up hints", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "--write", "fix the failing test"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n"); +}); + +test("task --resume acts like --resume-last without leaking the flag into the prompt", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const statePath = path.join(binDir, "fake-codex-state.json"); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const firstRun = run("node", [SCRIPT, "task", "initial task"], { + cwd: repo, + env: buildEnv(binDir) + }); + assert.equal(firstRun.status, 0, firstRun.stderr); + + const result = run("node", [SCRIPT, "task", "--resume", "follow up"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const fakeState = JSON.parse(fs.readFileSync(statePath, "utf8")); + assert.equal(fakeState.lastTurnStart.threadId, "thr_1"); + assert.equal(fakeState.lastTurnStart.prompt, "follow up"); +}); + +test("task --fresh is treated as routing control and does not leak into the prompt", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const statePath = path.join(binDir, "fake-codex-state.json"); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "--fresh", "diagnose the flaky test"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const fakeState = JSON.parse(fs.readFileSync(statePath, "utf8")); + assert.equal(fakeState.lastTurnStart.prompt, "diagnose the flaky test"); +}); + +test("task forwards model selection and reasoning effort to app-server turn/start", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const statePath = path.join(binDir, "fake-codex-state.json"); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "--model", "spark", "--effort", "low", "diagnose the failing test"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const fakeState = JSON.parse(fs.readFileSync(statePath, "utf8")); + assert.equal(fakeState.lastTurnStart.model, "gpt-5.3-codex-spark"); + assert.equal(fakeState.lastTurnStart.effort, "low"); +}); + +test("task logs reasoning summaries and assistant messages to the job log", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "with-reasoning"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "investigate the failing test"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const stateDir = resolveStateDir(repo); + const state = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")); + const log = fs.readFileSync(state.jobs[0].logFile, "utf8"); + assert.match(log, /Reasoning summary/); + assert.match(log, /Inspected the prompt, gathered evidence, and checked the highest-risk paths first/); + assert.match(log, /Assistant message/); + assert.match(log, /Handled the requested task/); +}); + +test("task logs subagent reasoning and messages with a subagent prefix", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "with-subagent"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "challenge the current design"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const stateDir = resolveStateDir(repo); + const state = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")); + const log = fs.readFileSync(state.jobs[0].logFile, "utf8"); + assert.match(log, /Starting subagent design-challenger via collaboration tool: wait\./); + assert.match(log, /Subagent design-challenger reasoning:/); + assert.match(log, /Questioned the retry strategy and the cache invalidation boundaries\./); + assert.match(log, /Subagent design-challenger:/); + assert.match( + log, + /The design assumes retries are harmless, but they can duplicate side effects without stronger idempotency guarantees\./ + ); +}); + +test("task waits for the main thread to complete before returning the final result", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "with-subagent"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "challenge the current design"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n"); +}); + +test("task ignores later subagent messages when choosing the final returned output", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "with-late-subagent-message"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "challenge the current design"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n"); +}); + +test("task can finish after subagent work even if the parent turn/completed event is missing", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "with-subagent-no-main-turn-completed"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "challenge the current design"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n"); +}); + +test("task using the shared broker still completes when Codex spawns subagents", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "with-subagent"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "README.md"), "hello again\n"); + + const env = buildEnv(binDir); + const review = run("node", [SCRIPT, "review"], { + cwd: repo, + env + }); + assert.equal(review.status, 0, review.stderr); + + if (!loadBrokerSession(repo)) { + return; + } + + const result = run("node", [SCRIPT, "task", "challenge the current design"], { + cwd: repo, + env + }); + + assert.equal(result.status, 0, result.stderr); + assert.equal(result.stdout, "Handled the requested task.\nTask prompt accepted.\n"); +}); + +test("task --background enqueues a detached worker and exposes per-job status", async () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "slow-task"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const launched = run("node", [SCRIPT, "task", "--background", "--json", "investigate the failing test"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(launched.status, 0, launched.stderr); + const launchPayload = JSON.parse(launched.stdout); + assert.equal(launchPayload.status, "queued"); + assert.match(launchPayload.jobId, /^task-/); + + const waitedStatus = run( + "node", + [SCRIPT, "status", launchPayload.jobId, "--wait", "--timeout-ms", "15000", "--json"], + { + cwd: repo, + env: buildEnv(binDir) + } + ); + + assert.equal(waitedStatus.status, 0, waitedStatus.stderr); + const waitedPayload = JSON.parse(waitedStatus.stdout); + assert.equal(waitedPayload.job.id, launchPayload.jobId); + assert.equal(waitedPayload.job.status, "completed"); + + const resultPayload = await waitFor(() => { + const result = run("node", [SCRIPT, "result", launchPayload.jobId, "--json"], { + cwd: repo, + env: buildEnv(binDir) + }); + if (result.status !== 0) { + return null; + } + return JSON.parse(result.stdout); + }); + + assert.equal(resultPayload.job.id, launchPayload.jobId); + assert.equal(resultPayload.job.status, "completed"); + assert.match(resultPayload.storedJob.rendered, /Handled the requested task/); +}); + +test("review rejects focus text because it is native-review only", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "README.md"), "hello again\n"); + + const result = run("node", [SCRIPT, "review", "--scope working-tree focus on auth"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status > 0, true); + assert.match(result.stderr, /does not support custom focus text/i); + assert.match(result.stderr, /\/codex:adversarial-review focus on auth/i); +}); + +test("review rejects staged-only scope because it is native-review only", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "README.md"), "hello again\n"); + run("git", ["add", "README.md"], { cwd: repo }); + + const result = run("node", [SCRIPT, "review", "--scope", "staged"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status > 0, true); + assert.match(result.stderr, /Unsupported review scope "staged"/i); + assert.match(result.stderr, /Use one of: auto, working-tree, branch, or pass --base /i); +}); + +test("adversarial review rejects staged-only scope to match review target selection", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "README.md"), "hello again\n"); + run("git", ["add", "README.md"], { cwd: repo }); + + const result = run("node", [SCRIPT, "adversarial-review", "--scope", "staged"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status > 0, true); + assert.match(result.stderr, /Unsupported review scope "staged"/i); + assert.match(result.stderr, /Use one of: auto, working-tree, branch, or pass --base /i); +}); + +test("review accepts --background while still running as a tracked review job", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "README.md"), "hello again\n"); + + const launched = run("node", [SCRIPT, "review", "--background", "--json"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(launched.status, 0, launched.stderr); + const launchPayload = JSON.parse(launched.stdout); + assert.equal(launchPayload.review, "Review"); + assert.match(launchPayload.codex.stdout, /No material issues found/); + + const status = run("node", [SCRIPT, "status"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(status.status, 0, status.stderr); + assert.match(status.stdout, /# Codex Status/); + assert.match(status.stdout, /Codex Review/); + assert.match(status.stdout, /completed/); +}); + +test("status shows phases, hints, and the latest finished job", () => { + const workspace = makeTempDir(); + const stateDir = resolveStateDir(workspace); + const jobsDir = path.join(stateDir, "jobs"); + fs.mkdirSync(jobsDir, { recursive: true }); + + const logFile = path.join(jobsDir, "review-live.log"); + fs.writeFileSync( + logFile, + [ + "[2026-03-18T15:30:00.000Z] Starting Codex Review.", + "[2026-03-18T15:30:01.000Z] Thread ready (thr_1).", + "[2026-03-18T15:30:02.000Z] Turn started (turn_1).", + "[2026-03-18T15:30:03.000Z] Reviewer started: current changes" + ].join("\n"), + "utf8" + ); + + const finishedJobFile = path.join(jobsDir, "review-done.json"); + fs.writeFileSync( + finishedJobFile, + JSON.stringify( + { + id: "review-done", + status: "completed", + title: "Codex Review", + rendered: "# Codex Review\n\nReviewed uncommitted changes.\nNo material issues found.\n" + }, + null, + 2 + ), + "utf8" + ); + + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify( + { + version: 1, + config: { stopReviewGate: false }, + jobs: [ + { + id: "review-live", + kind: "review", + kindLabel: "review", + status: "running", + title: "Codex Review", + jobClass: "review", + phase: "reviewing", + threadId: "thr_1", + summary: "Review working tree diff", + logFile, + createdAt: "2026-03-18T15:30:00.000Z", + updatedAt: "2026-03-18T15:30:03.000Z" + }, + { + id: "review-done", + status: "completed", + title: "Codex Review", + jobClass: "review", + threadId: "thr_done", + summary: "Review main...HEAD", + createdAt: "2026-03-18T15:10:00.000Z", + startedAt: "2026-03-18T15:10:05.000Z", + completedAt: "2026-03-18T15:11:10.000Z", + updatedAt: "2026-03-18T15:11:10.000Z" + } + ] + }, + null, + 2 + )}\n`, + "utf8" + ); + + const result = run("node", [SCRIPT, "status"], { + cwd: workspace + }); + + assert.equal(result.status, 0, result.stderr); + assert.match(result.stdout, /Active jobs:/); + assert.match(result.stdout, /\| Job \| Kind \| Status \| Phase \| Elapsed \| Codex Session ID \| Summary \| Actions \|/); + assert.match(result.stdout, /\| review-live \| review \| running \| reviewing \| .* \| thr_1 \| Review working tree diff \|/); + assert.match(result.stdout, /`\/codex:status review-live`
`\/codex:cancel review-live`/); + assert.match(result.stdout, /Live details:/); + assert.match(result.stdout, /Latest finished:/); + assert.match(result.stdout, /Progress:/); + assert.match(result.stdout, /Session runtime: direct startup/); + assert.match(result.stdout, /Phase: reviewing/); + assert.match(result.stdout, /Codex session ID: thr_1/); + assert.match(result.stdout, /Resume in Codex: codex resume thr_1/); + assert.match(result.stdout, /Thread ready \(thr_1\)\./); + assert.match(result.stdout, /Reviewer started: current changes/); + assert.match(result.stdout, /Duration: 1m 5s/); + assert.match(result.stdout, /Codex session ID: thr_done/); + assert.match(result.stdout, /Resume in Codex: codex resume thr_done/); +}); + +test("status without a job id only shows jobs from the current Claude session", () => { + const workspace = makeTempDir(); + const stateDir = resolveStateDir(workspace); + const jobsDir = path.join(stateDir, "jobs"); + fs.mkdirSync(jobsDir, { recursive: true }); + + const currentLog = path.join(jobsDir, "review-current.log"); + const otherLog = path.join(jobsDir, "review-other.log"); + fs.writeFileSync(currentLog, "[2026-03-18T15:30:00.000Z] Reviewer started: current changes\n", "utf8"); + fs.writeFileSync(otherLog, "[2026-03-18T15:31:00.000Z] Reviewer started: old changes\n", "utf8"); + + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify( + { + version: 1, + config: { stopReviewGate: false }, + jobs: [ + { + id: "review-current", + kind: "review", + kindLabel: "review", + status: "running", + title: "Codex Review", + jobClass: "review", + phase: "reviewing", + sessionId: "sess-current", + threadId: "thr_current", + summary: "Current session review", + logFile: currentLog, + createdAt: "2026-03-18T15:30:00.000Z", + updatedAt: "2026-03-18T15:30:00.000Z" + }, + { + id: "review-other", + kind: "review", + kindLabel: "review", + status: "completed", + title: "Codex Review", + jobClass: "review", + sessionId: "sess-other", + threadId: "thr_other", + summary: "Previous session review", + createdAt: "2026-03-18T15:20:00.000Z", + startedAt: "2026-03-18T15:20:05.000Z", + completedAt: "2026-03-18T15:21:00.000Z", + updatedAt: "2026-03-18T15:21:00.000Z" + } + ] + }, + null, + 2 + )}\n`, + "utf8" + ); + + const result = run("node", [SCRIPT, "status"], { + cwd: workspace, + env: { + ...process.env, + CODEX_COMPANION_SESSION_ID: "sess-current" + } + }); + + assert.equal(result.status, 0, result.stderr); + assert.deepEqual( + [...new Set(result.stdout.match(/review-(?:current|other)/g) ?? [])], + ["review-current"] + ); +}); + +test("status preserves adversarial review kind labels", () => { + const workspace = makeTempDir(); + const stateDir = resolveStateDir(workspace); + const jobsDir = path.join(stateDir, "jobs"); + fs.mkdirSync(jobsDir, { recursive: true }); + + const logFile = path.join(jobsDir, "review-adv.log"); + fs.writeFileSync(logFile, "[2026-03-18T15:30:00.000Z] Reviewer started: adversarial review\n", "utf8"); + + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify( + { + version: 1, + config: { stopReviewGate: false }, + jobs: [ + { + id: "review-adv-live", + kind: "adversarial-review", + status: "running", + title: "Codex Adversarial Review", + jobClass: "review", + phase: "reviewing", + threadId: "thr_adv_live", + summary: "Adversarial review current changes", + logFile, + createdAt: "2026-03-18T15:30:00.000Z", + updatedAt: "2026-03-18T15:30:00.000Z" + }, + { + id: "review-adv", + kind: "adversarial-review", + status: "completed", + title: "Codex Adversarial Review", + jobClass: "review", + threadId: "thr_adv_done", + summary: "Adversarial review working tree diff", + createdAt: "2026-03-18T15:10:00.000Z", + startedAt: "2026-03-18T15:10:05.000Z", + completedAt: "2026-03-18T15:11:10.000Z", + updatedAt: "2026-03-18T15:11:10.000Z" + } + ] + }, + null, + 2 + )}\n`, + "utf8" + ); + + const result = run("node", [SCRIPT, "status"], { + cwd: workspace + }); + + assert.equal(result.status, 0, result.stderr); + assert.match(result.stdout, /\| review-adv-live \| adversarial-review \| running \| reviewing \|/); + assert.match(result.stdout, /- review-adv \| completed \| adversarial-review \| Codex Adversarial Review/); + assert.match(result.stdout, /Codex session ID: thr_adv_live/); + assert.match(result.stdout, /Codex session ID: thr_adv_done/); +}); + +test("status --wait times out cleanly when a job is still active", () => { + const workspace = makeTempDir(); + const stateDir = resolveStateDir(workspace); + const jobsDir = path.join(stateDir, "jobs"); + fs.mkdirSync(jobsDir, { recursive: true }); + + const logFile = path.join(jobsDir, "task-live.log"); + fs.writeFileSync(logFile, "[2026-03-18T15:30:00.000Z] Starting Codex Task.\n", "utf8"); + fs.writeFileSync( + path.join(jobsDir, "task-live.json"), + JSON.stringify( + { + id: "task-live", + status: "running", + title: "Codex Task", + logFile + }, + null, + 2 + ), + "utf8" + ); + + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify( + { + version: 1, + config: { stopReviewGate: false }, + jobs: [ + { + id: "task-live", + status: "running", + title: "Codex Task", + jobClass: "task", + summary: "Investigate flaky test", + logFile, + createdAt: "2026-03-18T15:30:00.000Z", + startedAt: "2026-03-18T15:30:01.000Z", + updatedAt: "2026-03-18T15:30:02.000Z" + } + ] + }, + null, + 2 + )}\n`, + "utf8" + ); + + const result = run("node", [SCRIPT, "status", "task-live", "--wait", "--timeout-ms", "25", "--json"], { + cwd: workspace + }); + + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.job.id, "task-live"); + assert.equal(payload.job.status, "running"); + assert.equal(payload.waitTimedOut, true); +}); + +test("result returns the stored output for the latest finished job by default", () => { + const workspace = makeTempDir(); + const stateDir = resolveStateDir(workspace); + const jobsDir = path.join(stateDir, "jobs"); + fs.mkdirSync(jobsDir, { recursive: true }); + + fs.writeFileSync( + path.join(jobsDir, "review-finished.json"), + JSON.stringify( + { + id: "review-finished", + status: "completed", + title: "Codex Review", + rendered: "# Codex Review\n\nReviewed uncommitted changes.\nNo material issues found.\n", + result: { + codex: { + stdout: "Reviewed uncommitted changes.\nNo material issues found." + } + }, + threadId: "thr_review_finished" + }, + null, + 2 + ), + "utf8" + ); + + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify( + { + version: 1, + config: { stopReviewGate: false }, + jobs: [ + { + id: "review-finished", + status: "completed", + title: "Codex Review", + jobClass: "review", + threadId: "thr_review_finished", + summary: "Review working tree diff", + createdAt: "2026-03-18T15:00:00.000Z", + updatedAt: "2026-03-18T15:01:00.000Z" + } + ] + }, + null, + 2 + )}\n`, + "utf8" + ); + + const result = run("node", [SCRIPT, "result"], { + cwd: workspace + }); + + assert.equal(result.status, 0, result.stderr); + assert.equal( + result.stdout, + "Reviewed uncommitted changes.\nNo material issues found.\n\nCodex session ID: thr_review_finished\nResume in Codex: codex resume thr_review_finished\n" + ); +}); + +test("result without a job id prefers the latest finished job from the current Claude session", () => { + const workspace = makeTempDir(); + const stateDir = resolveStateDir(workspace); + const jobsDir = path.join(stateDir, "jobs"); + fs.mkdirSync(jobsDir, { recursive: true }); + + fs.writeFileSync( + path.join(jobsDir, "review-current.json"), + JSON.stringify( + { + id: "review-current", + status: "completed", + title: "Codex Review", + threadId: "thr_current", + result: { + codex: { + stdout: "Current session output." + } + } + }, + null, + 2 + ), + "utf8" + ); + + fs.writeFileSync( + path.join(jobsDir, "review-other.json"), + JSON.stringify( + { + id: "review-other", + status: "completed", + title: "Codex Review", + threadId: "thr_other", + result: { + codex: { + stdout: "Old session output." + } + } + }, + null, + 2 + ), + "utf8" + ); + + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify( + { + version: 1, + config: { stopReviewGate: false }, + jobs: [ + { + id: "review-current", + status: "completed", + title: "Codex Review", + jobClass: "review", + sessionId: "sess-current", + threadId: "thr_current", + summary: "Current session review", + createdAt: "2026-03-18T15:10:00.000Z", + updatedAt: "2026-03-18T15:11:00.000Z" + }, + { + id: "review-other", + status: "completed", + title: "Codex Review", + jobClass: "review", + sessionId: "sess-other", + threadId: "thr_other", + summary: "Old session review", + createdAt: "2026-03-18T15:20:00.000Z", + updatedAt: "2026-03-18T15:21:00.000Z" + } + ] + }, + null, + 2 + )}\n`, + "utf8" + ); + + const result = run("node", [SCRIPT, "result"], { + cwd: workspace, + env: { + ...process.env, + CODEX_COMPANION_SESSION_ID: "sess-current" + } + }); + + assert.equal(result.status, 0, result.stderr); + assert.equal( + result.stdout, + "Current session output.\n\nCodex session ID: thr_current\nResume in Codex: codex resume thr_current\n" + ); +}); + +test("result for a finished write-capable task returns the raw Codex final response", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const taskRun = run("node", [SCRIPT, "task", "--write", "fix the flaky integration test"], { + cwd: repo, + env: buildEnv(binDir) + }); + assert.equal(taskRun.status, 0, taskRun.stderr); + + const result = run("node", [SCRIPT, "result"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + assert.match(result.stdout, /^Handled the requested task\.\nTask prompt accepted\.\n/); + assert.match(result.stdout, /Codex session ID: thr_[a-z0-9]+/i); + assert.match(result.stdout, /Resume in Codex: codex resume thr_[a-z0-9]+/i); +}); + +test("cancel stops an active background job and marks it cancelled", async (t) => { + const workspace = makeTempDir(); + const stateDir = resolveStateDir(workspace); + const jobsDir = path.join(stateDir, "jobs"); + fs.mkdirSync(jobsDir, { recursive: true }); + + const sleeper = spawn(process.execPath, ["-e", "setInterval(() => {}, 1000)"], { + cwd: workspace, + detached: true, + stdio: "ignore" + }); + sleeper.unref(); + + t.after(() => { + try { + process.kill(-sleeper.pid, "SIGTERM"); + } catch { + try { + process.kill(sleeper.pid, "SIGTERM"); + } catch { + // Ignore missing process. + } + } + }); + + const logFile = path.join(jobsDir, "task-live.log"); + const jobFile = path.join(jobsDir, "task-live.json"); + fs.writeFileSync(logFile, "[2026-03-18T15:30:00.000Z] Starting Codex Task.\n", "utf8"); + fs.writeFileSync( + jobFile, + JSON.stringify( + { + id: "task-live", + status: "running", + title: "Codex Task", + logFile + }, + null, + 2 + ), + "utf8" + ); + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify( + { + version: 1, + config: { stopReviewGate: false }, + jobs: [ + { + id: "task-live", + status: "running", + title: "Codex Task", + jobClass: "task", + summary: "Investigate flaky test", + pid: sleeper.pid, + logFile, + createdAt: "2026-03-18T15:30:00.000Z", + startedAt: "2026-03-18T15:30:01.000Z", + updatedAt: "2026-03-18T15:30:02.000Z" + } + ] + }, + null, + 2 + )}\n`, + "utf8" + ); + + const cancelResult = run("node", [SCRIPT, "cancel", "task-live", "--json"], { + cwd: workspace + }); + + assert.equal(cancelResult.status, 0, cancelResult.stderr); + assert.equal(JSON.parse(cancelResult.stdout).status, "cancelled"); + + await waitFor(() => { + try { + process.kill(sleeper.pid, 0); + return false; + } catch (error) { + return error?.code === "ESRCH"; + } + }); + + const state = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")); + const cancelled = state.jobs.find((job) => job.id === "task-live"); + assert.equal(cancelled.status, "cancelled"); + assert.equal(cancelled.pid, null); + + const stored = JSON.parse(fs.readFileSync(jobFile, "utf8")); + assert.equal(stored.status, "cancelled"); + assert.match(fs.readFileSync(logFile, "utf8"), /Cancelled by user/); +}); + +test("cancel without a job id ignores active jobs from other Claude sessions", () => { + const workspace = makeTempDir(); + const stateDir = resolveStateDir(workspace); + const jobsDir = path.join(stateDir, "jobs"); + fs.mkdirSync(jobsDir, { recursive: true }); + + const logFile = path.join(jobsDir, "task-other.log"); + fs.writeFileSync(logFile, "", "utf8"); + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify( + { + version: 1, + config: { stopReviewGate: false }, + jobs: [ + { + id: "task-other", + status: "running", + title: "Codex Task", + jobClass: "task", + sessionId: "sess-other", + summary: "Other session run", + updatedAt: "2026-03-24T20:05:00.000Z", + logFile + } + ] + }, + null, + 2 + )}\n`, + "utf8" + ); + + const env = { + ...process.env, + CODEX_COMPANION_SESSION_ID: "sess-current" + }; + const status = run("node", [SCRIPT, "status", "--json"], { + cwd: workspace, + env + }); + assert.equal(status.status, 0, status.stderr); + assert.deepEqual(JSON.parse(status.stdout).running, []); + + const cancel = run("node", [SCRIPT, "cancel", "--json"], { + cwd: workspace, + env + }); + assert.equal(cancel.status, 1); + assert.match(cancel.stderr, /No active Codex jobs to cancel for this session\./); + + const state = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")); + assert.equal(state.jobs[0].status, "running"); +}); + +test("cancel with a job id can still target an active job from another Claude session", () => { + const workspace = makeTempDir(); + const stateDir = resolveStateDir(workspace); + const jobsDir = path.join(stateDir, "jobs"); + fs.mkdirSync(jobsDir, { recursive: true }); + + const logFile = path.join(jobsDir, "task-other.log"); + fs.writeFileSync(logFile, "", "utf8"); + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify( + { + version: 1, + config: { stopReviewGate: false }, + jobs: [ + { + id: "task-other", + status: "running", + title: "Codex Task", + jobClass: "task", + sessionId: "sess-other", + summary: "Other session run", + updatedAt: "2026-03-24T20:05:00.000Z", + logFile + } + ] + }, + null, + 2 + )}\n`, + "utf8" + ); + + const env = { + ...process.env, + CODEX_COMPANION_SESSION_ID: "sess-current" + }; + const cancel = run("node", [SCRIPT, "cancel", "task-other", "--json"], { + cwd: workspace, + env + }); + assert.equal(cancel.status, 0, cancel.stderr); + assert.equal(JSON.parse(cancel.stdout).jobId, "task-other"); + + const state = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")); + assert.equal(state.jobs[0].status, "cancelled"); +}); + +test("cancel sends turn interrupt to the shared app-server before killing a brokered task", async () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const fakeStatePath = path.join(binDir, "fake-codex-state.json"); + installFakeCodex(binDir, "interruptible-slow-task"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const env = buildEnv(binDir); + const launched = run("node", [SCRIPT, "task", "--background", "--json", "investigate the flaky worker timeout"], { + cwd: repo, + env + }); + + assert.equal(launched.status, 0, launched.stderr); + const launchPayload = JSON.parse(launched.stdout); + const jobId = launchPayload.jobId; + assert.ok(jobId); + + const stateDir = resolveStateDir(repo); + const runningJob = await waitFor(() => { + const state = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")); + const job = state.jobs.find((candidate) => candidate.id === jobId); + if (job?.status === "running" && job.threadId && job.turnId) { + return job; + } + return null; + }, { timeoutMs: 15000 }); + + const cancelResult = run("node", [SCRIPT, "cancel", jobId, "--json"], { + cwd: repo, + env + }); + + assert.equal(cancelResult.status, 0, cancelResult.stderr); + const cancelPayload = JSON.parse(cancelResult.stdout); + assert.equal(cancelPayload.status, "cancelled"); + assert.equal(cancelPayload.turnInterruptAttempted, true); + assert.equal(cancelPayload.turnInterrupted, true); + + await waitFor(() => { + const fakeState = JSON.parse(fs.readFileSync(fakeStatePath, "utf8")); + return fakeState.lastInterrupt ?? null; + }); + + const fakeState = JSON.parse(fs.readFileSync(fakeStatePath, "utf8")); + assert.deepEqual(fakeState.lastInterrupt, { + threadId: runningJob.threadId, + turnId: runningJob.turnId + }); + + const cleanup = run("node", [SESSION_HOOK, "SessionEnd"], { + cwd: repo, + env, + input: JSON.stringify({ + hook_event_name: "SessionEnd", + cwd: repo + }) + }); + assert.equal(cleanup.status, 0, cleanup.stderr); +}); + +test("session end fully cleans up jobs for the ending session", async (t) => { + const repo = makeTempDir(); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const stateDir = resolveStateDir(repo); + const jobsDir = path.join(stateDir, "jobs"); + fs.mkdirSync(jobsDir, { recursive: true }); + + const completedLog = path.join(jobsDir, "completed.log"); + const runningLog = path.join(jobsDir, "running.log"); + const otherSessionLog = path.join(jobsDir, "other.log"); + const completedJobFile = path.join(jobsDir, "review-completed.json"); + const runningJobFile = path.join(jobsDir, "review-running.json"); + const otherJobFile = path.join(jobsDir, "review-other.json"); + fs.writeFileSync(completedLog, "completed\n", "utf8"); + fs.writeFileSync(runningLog, "running\n", "utf8"); + fs.writeFileSync(otherSessionLog, "other\n", "utf8"); + fs.writeFileSync(completedJobFile, JSON.stringify({ id: "review-completed" }, null, 2), "utf8"); + fs.writeFileSync(otherJobFile, JSON.stringify({ id: "review-other" }, null, 2), "utf8"); + + const sleeper = spawn(process.execPath, ["-e", "setInterval(() => {}, 1000)"], { + cwd: repo, + detached: true, + stdio: "ignore" + }); + sleeper.unref(); + fs.writeFileSync(runningJobFile, JSON.stringify({ id: "review-running" }, null, 2), "utf8"); + + t.after(() => { + try { + process.kill(-sleeper.pid, "SIGTERM"); + } catch { + try { + process.kill(sleeper.pid, "SIGTERM"); + } catch { + // Ignore missing process. + } + } + }); + + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify( + { + version: 1, + config: { stopReviewGate: false }, + jobs: [ + { + id: "review-completed", + status: "completed", + title: "Codex Review", + sessionId: "sess-current", + logFile: completedLog, + createdAt: "2026-03-18T15:30:00.000Z", + updatedAt: "2026-03-18T15:31:00.000Z" + }, + { + id: "review-running", + status: "running", + title: "Codex Review", + sessionId: "sess-current", + pid: sleeper.pid, + logFile: runningLog, + createdAt: "2026-03-18T15:32:00.000Z", + updatedAt: "2026-03-18T15:33:00.000Z" + }, + { + id: "review-other", + status: "completed", + title: "Codex Review", + sessionId: "sess-other", + logFile: otherSessionLog, + createdAt: "2026-03-18T15:34:00.000Z", + updatedAt: "2026-03-18T15:35:00.000Z" + } + ] + }, + null, + 2 + )}\n`, + "utf8" + ); + + const result = run("node", [SESSION_HOOK, "SessionEnd"], { + cwd: repo, + env: { + ...process.env, + CODEX_COMPANION_SESSION_ID: "sess-current" + }, + input: JSON.stringify({ + hook_event_name: "SessionEnd", + session_id: "sess-current", + cwd: repo + }) + }); + + assert.equal(result.status, 0, result.stderr); + assert.equal(fs.existsSync(otherSessionLog), true); + assert.equal(fs.existsSync(otherJobFile), true); + assert.deepEqual( + fs.readdirSync(path.dirname(otherJobFile)).sort(), + [path.basename(otherJobFile), path.basename(otherSessionLog)].sort() + ); + + await waitFor(() => { + try { + process.kill(sleeper.pid, 0); + return false; + } catch (error) { + return error?.code === "ESRCH"; + } + }); + + const state = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")); + assert.deepEqual(state.jobs.map((job) => job.id), ["review-other"]); + const otherJob = state.jobs[0]; + assert.equal(otherJob.logFile, otherSessionLog); +}); + +test("stop hook runs a stop-time review task and blocks on findings when the review gate is enabled", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const fakeStatePath = path.join(binDir, "fake-codex-state.json"); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const setup = run("node", [SCRIPT, "setup", "--enable-review-gate", "--json"], { + cwd: repo, + env: buildEnv(binDir) + }); + assert.equal(setup.status, 0, setup.stderr); + const setupPayload = JSON.parse(setup.stdout); + assert.equal(setupPayload.reviewGateEnabled, true); + + const taskResult = run("node", [SCRIPT, "task", "--write", "fix the issue"], { + cwd: repo, + env: buildEnv(binDir) + }); + assert.equal(taskResult.status, 0, taskResult.stderr); + + const blocked = run("node", [STOP_HOOK], { + cwd: repo, + env: buildEnv(binDir), + input: JSON.stringify({ + cwd: repo, + session_id: "sess-stop-review", + last_assistant_message: "I completed the refactor and updated the retry logic." + }) + }); + assert.equal(blocked.status, 0, blocked.stderr); + const blockedPayload = JSON.parse(blocked.stdout); + assert.equal(blockedPayload.decision, "block"); + assert.match(blockedPayload.reason, /Codex stop-time review found issues that still need fixes/i); + assert.match(blockedPayload.reason, /Missing empty-state guard/i); + + const fakeState = JSON.parse(fs.readFileSync(fakeStatePath, "utf8")); + assert.match(fakeState.lastTurnStart.prompt, //i); + assert.match(fakeState.lastTurnStart.prompt, //i); + assert.match(fakeState.lastTurnStart.prompt, /Only review the work from the previous (Claude|Antigravity) turn/i); + assert.match(fakeState.lastTurnStart.prompt, /I completed the refactor and updated the retry logic\./); + + const status = run("node", [SCRIPT, "status"], { + cwd: repo, + env: { + ...buildEnv(binDir), + CODEX_COMPANION_SESSION_ID: "sess-stop-review" + } + }); + assert.equal(status.status, 0, status.stderr); + assert.match(status.stdout, /Codex Stop Gate Review/); +}); + +test("stop hook logs running tasks to stderr without blocking when the review gate is disabled", () => { + const repo = makeTempDir(); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const stateDir = resolveStateDir(repo); + const jobsDir = path.join(stateDir, "jobs"); + fs.mkdirSync(jobsDir, { recursive: true }); + + const runningLog = path.join(jobsDir, "task-running.log"); + fs.writeFileSync(runningLog, "running\n", "utf8"); + + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify( + { + version: 1, + config: { + stopReviewGate: false + }, + jobs: [ + { + id: "task-live", + status: "running", + title: "Codex Task", + jobClass: "task", + sessionId: "sess-current", + logFile: runningLog, + createdAt: "2026-03-18T15:32:00.000Z", + updatedAt: "2026-03-18T15:33:00.000Z" + } + ] + }, + null, + 2 + )}\n`, + "utf8" + ); + + const blocked = run("node", [STOP_HOOK], { + cwd: repo, + env: { + ...process.env, + CODEX_COMPANION_SESSION_ID: "sess-current" + }, + input: JSON.stringify({ cwd: repo }) + }); + + assert.equal(blocked.status, 0, blocked.stderr); + assert.equal(blocked.stdout.trim(), ""); + assert.match(blocked.stderr, /Codex task task-live is still running/i); + assert.match(blocked.stderr, /\/codex:status/i); + assert.match(blocked.stderr, /\/codex:cancel task-live/i); +}); + +test("stop hook allows the stop when the review gate is enabled and the stop-time review task is clean", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "adversarial-clean"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const setup = run("node", [SCRIPT, "setup", "--enable-review-gate", "--json"], { + cwd: repo, + env: buildEnv(binDir) + }); + assert.equal(setup.status, 0, setup.stderr); + + const allowed = run("node", [STOP_HOOK], { + cwd: repo, + env: buildEnv(binDir), + input: JSON.stringify({ cwd: repo, session_id: "sess-stop-clean" }) + }); + + assert.equal(allowed.status, 0, allowed.stderr); + assert.equal(allowed.stdout.trim(), ""); +}); + +test("stop hook does not block when Codex is unavailable even if the review gate is enabled", () => { + const repo = makeTempDir(); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const setup = run(process.execPath, [SCRIPT, "setup", "--enable-review-gate", "--json"], { + cwd: repo + }); + assert.equal(setup.status, 0, setup.stderr); + + const allowed = run(process.execPath, [STOP_HOOK], { + cwd: repo, + env: { + ...process.env, + PATH: "" + }, + input: JSON.stringify({ cwd: repo }) + }); + + assert.equal(allowed.status, 0, allowed.stderr); + assert.equal(allowed.stdout.trim(), ""); + assert.match(allowed.stderr, /Codex is not set up for the review gate/i); + assert.match(allowed.stderr, /Run \/codex:setup/i); +}); + +test("stop hook runs the actual task when auth status looks stale", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "refreshable-auth"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const setup = run("node", [SCRIPT, "setup", "--enable-review-gate", "--json"], { + cwd: repo, + env: buildEnv(binDir) + }); + assert.equal(setup.status, 0, setup.stderr); + + const allowed = run("node", [STOP_HOOK], { + cwd: repo, + env: buildEnv(binDir), + input: JSON.stringify({ cwd: repo }) + }); + + assert.equal(allowed.status, 0, allowed.stderr); + assert.doesNotMatch(allowed.stderr, /Codex is not set up for the review gate/i); + const payload = JSON.parse(allowed.stdout); + assert.equal(payload.decision, "block"); + assert.match(payload.reason, /Missing empty-state guard/i); +}); + +test("commands lazily start and reuse one shared app-server after first use", async () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const fakeStatePath = path.join(binDir, "fake-codex-state.json"); + + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "README.md"), "hello again\n"); + + const env = buildEnv(binDir); + + const review = run("node", [SCRIPT, "review"], { + cwd: repo, + env + }); + assert.equal(review.status, 0, review.stderr); + + const brokerSession = loadBrokerSession(repo); + if (!brokerSession) { + return; + } + + const adversarial = run("node", [SCRIPT, "adversarial-review"], { + cwd: repo, + env + }); + assert.equal(adversarial.status, 0, adversarial.stderr); + + const fakeState = JSON.parse(fs.readFileSync(fakeStatePath, "utf8")); + assert.equal(fakeState.appServerStarts, 1); + + const cleanup = run("node", [SESSION_HOOK, "SessionEnd"], { + cwd: repo, + env, + input: JSON.stringify({ + hook_event_name: "SessionEnd", + cwd: repo + }) + }); + assert.equal(cleanup.status, 0, cleanup.stderr); +}); + +test("setup reuses an existing shared app-server without starting another one", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const fakeStatePath = path.join(binDir, "fake-codex-state.json"); + + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "README.md"), "hello again\n"); + + const env = buildEnv(binDir); + + const review = run("node", [SCRIPT, "review"], { + cwd: repo, + env + }); + assert.equal(review.status, 0, review.stderr); + + const brokerSession = loadBrokerSession(repo); + if (!brokerSession) { + return; + } + + const setup = run("node", [SCRIPT, "setup", "--json"], { + cwd: repo, + env + }); + assert.equal(setup.status, 0, setup.stderr); + + const fakeState = JSON.parse(fs.readFileSync(fakeStatePath, "utf8")); + assert.equal(fakeState.appServerStarts, 1); + + const cleanup = run("node", [SESSION_HOOK, "SessionEnd"], { + cwd: repo, + env, + input: JSON.stringify({ + hook_event_name: "SessionEnd", + cwd: repo + }) + }); + assert.equal(cleanup.status, 0, cleanup.stderr); +}); + +test("status reports shared session runtime when a lazy broker is active", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "README.md"), "hello again\n"); + + const review = run("node", [SCRIPT, "review"], { + cwd: repo, + env: buildEnv(binDir) + }); + assert.equal(review.status, 0, review.stderr); + + if (!loadBrokerSession(repo)) { + return; + } + + const result = run("node", [SCRIPT, "status"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + assert.match(result.stdout, /Session runtime: shared session/); +}); + +test("setup and status honor --cwd when reading shared session runtime", () => { + const targetWorkspace = makeTempDir(); + const invocationWorkspace = makeTempDir(); + + saveBrokerSession(targetWorkspace, { + endpoint: "unix:/tmp/fake-broker.sock" + }); + + const status = run("node", [SCRIPT, "status", "--cwd", targetWorkspace], { + cwd: invocationWorkspace + }); + assert.equal(status.status, 0, status.stderr); + assert.match(status.stdout, /Session runtime: shared session/); + + const setup = run("node", [SCRIPT, "setup", "--cwd", targetWorkspace, "--json"], { + cwd: invocationWorkspace + }); + assert.equal(setup.status, 0, setup.stderr); + const payload = JSON.parse(setup.stdout); + assert.equal(payload.sessionRuntime.mode, "shared"); + assert.equal(payload.sessionRuntime.endpoint, "unix:/tmp/fake-broker.sock"); +}); diff --git a/tests/tests/state.test.mjs b/tests/tests/state.test.mjs new file mode 100644 index 00000000..0f8f57ce --- /dev/null +++ b/tests/tests/state.test.mjs @@ -0,0 +1,105 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import test from "node:test"; +import assert from "node:assert/strict"; + +import { makeTempDir } from "./helpers.mjs"; +import { resolveJobFile, resolveJobLogFile, resolveStateDir, resolveStateFile, saveState } from "../plugins/codex/scripts/lib/state.mjs"; + +test("resolveStateDir uses a temp-backed per-workspace directory", () => { + const workspace = makeTempDir(); + const stateDir = resolveStateDir(workspace); + + assert.equal(stateDir.startsWith(os.tmpdir()), true); + assert.match(path.basename(stateDir), /.+-[a-f0-9]{16}$/); + assert.match(stateDir, new RegExp(`^${os.tmpdir().replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}`)); +}); + +test("resolveStateDir uses CLAUDE_PLUGIN_DATA when it is provided", () => { + const workspace = makeTempDir(); + const pluginDataDir = makeTempDir(); + const previousPluginDataDir = process.env.CLAUDE_PLUGIN_DATA; + process.env.CLAUDE_PLUGIN_DATA = pluginDataDir; + + try { + const stateDir = resolveStateDir(workspace); + + assert.equal(stateDir.startsWith(path.join(pluginDataDir, "state")), true); + assert.match(path.basename(stateDir), /.+-[a-f0-9]{16}$/); + assert.match( + stateDir, + new RegExp(`^${path.join(pluginDataDir, "state").replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}`) + ); + } finally { + if (previousPluginDataDir == null) { + delete process.env.CLAUDE_PLUGIN_DATA; + } else { + process.env.CLAUDE_PLUGIN_DATA = previousPluginDataDir; + } + } +}); + +test("saveState prunes dropped job artifacts when indexed jobs exceed the cap", () => { + const workspace = makeTempDir(); + const stateFile = resolveStateFile(workspace); + fs.mkdirSync(path.dirname(stateFile), { recursive: true }); + + const jobs = Array.from({ length: 51 }, (_, index) => { + const jobId = `job-${index}`; + const updatedAt = new Date(Date.UTC(2026, 0, 1, 0, index, 0)).toISOString(); + const logFile = resolveJobLogFile(workspace, jobId); + const jobFile = resolveJobFile(workspace, jobId); + fs.writeFileSync(logFile, `log ${jobId}\n`, "utf8"); + fs.writeFileSync(jobFile, JSON.stringify({ id: jobId, status: "completed" }, null, 2), "utf8"); + return { + id: jobId, + status: "completed", + logFile, + updatedAt, + createdAt: updatedAt + }; + }); + + fs.writeFileSync( + stateFile, + `${JSON.stringify( + { + version: 1, + config: { stopReviewGate: false }, + jobs + }, + null, + 2 + )}\n`, + "utf8" + ); + + saveState(workspace, { + version: 1, + config: { stopReviewGate: false }, + jobs + }); + + const prunedJobFile = resolveJobFile(workspace, "job-0"); + const prunedLogFile = resolveJobLogFile(workspace, "job-0"); + const retainedJobFile = resolveJobFile(workspace, "job-50"); + const retainedLogFile = resolveJobLogFile(workspace, "job-50"); + const jobsDir = path.dirname(prunedJobFile); + + assert.equal(fs.existsSync(retainedJobFile), true); + assert.equal(fs.existsSync(retainedLogFile), true); + + const savedState = JSON.parse(fs.readFileSync(stateFile, "utf8")); + assert.equal(savedState.jobs.length, 50); + assert.deepEqual( + savedState.jobs.map((job) => job.id), + Array.from({ length: 50 }, (_, index) => `job-${50 - index}`) + ); + assert.deepEqual( + fs.readdirSync(jobsDir).sort(), + Array.from({ length: 50 }, (_, index) => `job-${index + 1}`) + .flatMap((jobId) => [`${jobId}.json`, `${jobId}.log`]) + .sort() + ); +}); From 81c1c84ab210cf39d707e971f832e49ec285904c Mon Sep 17 00:00:00 2001 From: Elad Ariel Date: Wed, 20 May 2026 13:29:33 +0300 Subject: [PATCH 2/3] fix: address review feedback for antigravity support --- plugins/codex/scripts/lib/app-server.mjs | 2 +- plugins/codex/scripts/stop-review-gate-hook.mjs | 2 +- tests/runtime.test.mjs | 15 ++++++++++++++- tests/tests/runtime.test.mjs | 15 ++++++++++++++- 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/plugins/codex/scripts/lib/app-server.mjs b/plugins/codex/scripts/lib/app-server.mjs index ae31df1c..e9b6e095 100644 --- a/plugins/codex/scripts/lib/app-server.mjs +++ b/plugins/codex/scripts/lib/app-server.mjs @@ -16,7 +16,7 @@ import { parseBrokerEndpoint } from "./broker-endpoint.mjs"; import { ensureBrokerSession, loadBrokerSession } from "./broker-lifecycle.mjs"; import { terminateProcessTree } from "./process.mjs"; -const PLUGIN_MANIFEST_URL = new URL("../../../../gemini-extension.json", import.meta.url); +const PLUGIN_MANIFEST_URL = new URL("../../.claude-plugin/plugin.json", import.meta.url); const PLUGIN_MANIFEST = JSON.parse(fs.readFileSync(PLUGIN_MANIFEST_URL, "utf8")); export const BROKER_ENDPOINT_ENV = "CODEX_COMPANION_APP_SERVER_ENDPOINT"; diff --git a/plugins/codex/scripts/stop-review-gate-hook.mjs b/plugins/codex/scripts/stop-review-gate-hook.mjs index 9877e5a8..d74b155a 100644 --- a/plugins/codex/scripts/stop-review-gate-hook.mjs +++ b/plugins/codex/scripts/stop-review-gate-hook.mjs @@ -52,7 +52,7 @@ function buildStopReviewPrompt(input = {}) { ? ["Previous Antigravity response:", lastAssistantMessage].join("\n") : ""; return interpolateTemplate(template, { - ANTIGRAVITY_RESPONSE_BLOCK: antigravityResponseBlock + CLAUDE_RESPONSE_BLOCK: antigravityResponseBlock }); } diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 90408372..96ba4f8f 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -3,7 +3,7 @@ import path from "node:path"; import test from "node:test"; import assert from "node:assert/strict"; import { spawn } from "node:child_process"; -import { fileURLToPath } from "node:url"; +import { fileURLToPath, pathToFileURL } from "node:url"; import { buildEnv, installFakeCodex } from "./fake-codex-fixture.mjs"; import { initGitRepo, makeTempDir, run } from "./helpers.mjs"; @@ -44,6 +44,18 @@ test("setup reports ready when fake codex is installed and authenticated", () => assert.equal(payload.sessionRuntime.mode, "direct"); }); +test("app-server module loads from the shipped plugin directory", async () => { + const installRoot = makeTempDir(); + const installedPluginRoot = path.join(installRoot, "codex"); + fs.cpSync(PLUGIN_ROOT, installedPluginRoot, { recursive: true }); + + const moduleUrl = pathToFileURL(path.join(installedPluginRoot, "scripts", "lib", "app-server.mjs")); + moduleUrl.search = `?test=${Date.now()}`; + const appServerModule = await import(moduleUrl.href); + + assert.equal(typeof appServerModule.CodexAppServerClient?.connect, "function"); +}); + test("setup is ready without npm when Codex is already installed and authenticated", () => { const binDir = makeTempDir(); installFakeCodex(binDir); @@ -1830,6 +1842,7 @@ test("stop hook runs a stop-time review task and blocks on findings when the rev assert.match(fakeState.lastTurnStart.prompt, //i); assert.match(fakeState.lastTurnStart.prompt, //i); assert.match(fakeState.lastTurnStart.prompt, /Only review the work from the previous Claude turn/i); + assert.match(fakeState.lastTurnStart.prompt, /Previous Antigravity response:/); assert.match(fakeState.lastTurnStart.prompt, /I completed the refactor and updated the retry logic\./); const status = run("node", [SCRIPT, "status"], { diff --git a/tests/tests/runtime.test.mjs b/tests/tests/runtime.test.mjs index 5e547825..eede33cb 100644 --- a/tests/tests/runtime.test.mjs +++ b/tests/tests/runtime.test.mjs @@ -19,7 +19,7 @@ delete process.env.GEMINI_PLUGIN_DATA; delete process.env.CLAUDE_PLUGIN_DATA; import { spawn } from "node:child_process"; -import { fileURLToPath } from "node:url"; +import { fileURLToPath, pathToFileURL } from "node:url"; import { buildEnv, installFakeCodex } from "./fake-codex-fixture.mjs"; import { initGitRepo, makeTempDir, run } from "./helpers.mjs"; @@ -60,6 +60,18 @@ test("setup reports ready when fake codex is installed and authenticated", () => assert.equal(payload.sessionRuntime.mode, "direct"); }); +test("app-server module loads from the shipped plugin directory", async () => { + const installRoot = makeTempDir(); + const installedPluginRoot = path.join(installRoot, "codex"); + fs.cpSync(PLUGIN_ROOT, installedPluginRoot, { recursive: true }); + + const moduleUrl = pathToFileURL(path.join(installedPluginRoot, "scripts", "lib", "app-server.mjs")); + moduleUrl.search = `?test=${Date.now()}`; + const appServerModule = await import(moduleUrl.href); + + assert.equal(typeof appServerModule.CodexAppServerClient?.connect, "function"); +}); + test("setup is ready without npm when Codex is already installed and authenticated", () => { const binDir = makeTempDir(); installFakeCodex(binDir); @@ -1846,6 +1858,7 @@ test("stop hook runs a stop-time review task and blocks on findings when the rev assert.match(fakeState.lastTurnStart.prompt, //i); assert.match(fakeState.lastTurnStart.prompt, //i); assert.match(fakeState.lastTurnStart.prompt, /Only review the work from the previous (Claude|Antigravity) turn/i); + assert.match(fakeState.lastTurnStart.prompt, /Previous Antigravity response:/); assert.match(fakeState.lastTurnStart.prompt, /I completed the refactor and updated the retry logic\./); const status = run("node", [SCRIPT, "status"], { From e93d99d5bee782e21d2703bb1db83dd8b6d390cc Mon Sep 17 00:00:00 2001 From: Elad Ariel Date: Wed, 20 May 2026 14:04:37 +0300 Subject: [PATCH 3/3] fix: align Antigravity stop-gate command prompts --- commands/adversarial-review.toml | 2 +- commands/cancel.toml | 2 +- commands/rescue.toml | 2 +- commands/result.toml | 2 +- commands/review.toml | 2 +- commands/setup.toml | 2 +- commands/status.toml | 2 +- plugins/codex/prompts/stop-review-gate.md | 10 +++++----- tests/fake-codex-fixture.mjs | 2 +- tests/runtime.test.mjs | 2 +- tests/tests/fake-codex-fixture.mjs | 2 +- tests/tests/runtime.test.mjs | 2 +- 12 files changed, 16 insertions(+), 16 deletions(-) diff --git a/commands/adversarial-review.toml b/commands/adversarial-review.toml index d82f1a90..1b0bff8e 100644 --- a/commands/adversarial-review.toml +++ b/commands/adversarial-review.toml @@ -3,7 +3,7 @@ prompt = """ You are the adversarial-review command handler for the Codex plugin on Antigravity CLI. Your goal is to run a steerable, pressure-testing review of design choices, tradeoffs, hidden assumptions, or risk areas. -Please find the companion script `codex-companion.mjs` under the `codex-plugin-agy/plugins/codex/scripts/` directory in the current workspace. +Please find the companion script `codex-companion.mjs` under the `plugins/codex/scripts/` directory in the current workspace. Execute the following command using the `run_command` tool: `node "" adversarial-review {{args}}` diff --git a/commands/cancel.toml b/commands/cancel.toml index 7c386db1..00157808 100644 --- a/commands/cancel.toml +++ b/commands/cancel.toml @@ -3,7 +3,7 @@ prompt = """ You are the cancel command handler for the Codex plugin on Antigravity CLI. Your goal is to cancel a running background job. -Please find the companion script `codex-companion.mjs` under the `codex-plugin-agy/plugins/codex/scripts/` directory in the current workspace. +Please find the companion script `codex-companion.mjs` under the `plugins/codex/scripts/` directory in the current workspace. Execute the following command using the `run_command` tool: `node "" cancel {{args}}` diff --git a/commands/rescue.toml b/commands/rescue.toml index ae739e9c..873611fe 100644 --- a/commands/rescue.toml +++ b/commands/rescue.toml @@ -3,7 +3,7 @@ prompt = """ You are the rescue command handler for the Codex plugin on Antigravity CLI. Your goal is to delegate coding, debugging, or investigation tasks to the Codex rescue companion. -Please find the companion script `codex-companion.mjs` under the `codex-plugin-agy/plugins/codex/scripts/` directory in the current workspace. +Please find the companion script `codex-companion.mjs` under the `plugins/codex/scripts/` directory in the current workspace. First, check if there is a resumable rescue candidate from the current session by running: `node "" task-resume-candidate --json` diff --git a/commands/result.toml b/commands/result.toml index 6149afc7..6e8c17d3 100644 --- a/commands/result.toml +++ b/commands/result.toml @@ -3,7 +3,7 @@ prompt = """ You are the result command handler for the Codex plugin on Antigravity CLI. Your goal is to fetch and print the final result of a background job. -Please find the companion script `codex-companion.mjs` under the `codex-plugin-agy/plugins/codex/scripts/` directory in the current workspace. +Please find the companion script `codex-companion.mjs` under the `plugins/codex/scripts/` directory in the current workspace. Execute the following command using the `run_command` tool: `node "" result {{args}}` diff --git a/commands/review.toml b/commands/review.toml index 7ac12d2d..c1def2d9 100644 --- a/commands/review.toml +++ b/commands/review.toml @@ -3,7 +3,7 @@ prompt = """ You are the review command handler for the Codex plugin on Antigravity CLI. Your goal is to run a normal, read-only Codex review on the current uncommitted changes or branch comparison. -Please find the companion script `codex-companion.mjs` under the `codex-plugin-agy/plugins/codex/scripts/` directory in the current workspace. +Please find the companion script `codex-companion.mjs` under the `plugins/codex/scripts/` directory in the current workspace. Execute the following command using the `run_command` tool: `node "" review {{args}}` diff --git a/commands/setup.toml b/commands/setup.toml index 901d79b4..ffceea76 100644 --- a/commands/setup.toml +++ b/commands/setup.toml @@ -3,7 +3,7 @@ prompt = """ You are the setup command handler for the Codex plugin on Antigravity CLI. Your goal is to run the setup process. -Please find the companion script `codex-companion.mjs` under the `codex-plugin-agy/plugins/codex/scripts/` directory in the current workspace. +Please find the companion script `codex-companion.mjs` under the `plugins/codex/scripts/` directory in the current workspace. Execute the following command using the `run_command` tool: `node "" setup --json {{args}}` diff --git a/commands/status.toml b/commands/status.toml index 4044d257..a9ef00bb 100644 --- a/commands/status.toml +++ b/commands/status.toml @@ -3,7 +3,7 @@ prompt = """ You are the status command handler for the Codex plugin on Antigravity CLI. Your goal is to check the progress or status of background jobs. -Please find the companion script `codex-companion.mjs` under the `codex-plugin-agy/plugins/codex/scripts/` directory in the current workspace. +Please find the companion script `codex-companion.mjs` under the `plugins/codex/scripts/` directory in the current workspace. Execute the following command using the `run_command` tool: `node "" status {{args}}` diff --git a/plugins/codex/prompts/stop-review-gate.md b/plugins/codex/prompts/stop-review-gate.md index 8ed4d129..179e4314 100644 --- a/plugins/codex/prompts/stop-review-gate.md +++ b/plugins/codex/prompts/stop-review-gate.md @@ -1,11 +1,11 @@ -Run a stop-gate review of the previous Claude turn. -Only review the work from the previous Claude turn. -Only review it if Claude actually did code changes in that turn. +Run a stop-gate review of the previous Antigravity turn. +Only review the work from the previous Antigravity turn. +Only review it if Antigravity actually did code changes in that turn. Pure status, setup, or reporting output does not count as reviewable work. For example, the output of /codex:setup or /codex:status does not count. Only direct edits made in that specific turn count. -If the previous Claude turn was only a status update, a summary, a setup/login check, a review result, or output from a command that did not itself make direct edits in that turn, return ALLOW immediately and do no further work. +If the previous Antigravity turn was only a status update, a summary, a setup/login check, a review result, or output from a command that did not itself make direct edits in that turn, return ALLOW immediately and do no further work. Challenge whether that specific work and its design choices should ship. {{CLAUDE_RESPONSE_BLOCK}} @@ -27,7 +27,7 @@ Use BLOCK only if the previous turn made code changes and you found something th Ground every blocking claim in the repository context or tool outputs you inspected during this run. -Do not treat the previous Claude response as proof that code changes happened; verify that from the repository state before you block. +Do not treat the previous Antigravity response as proof that code changes happened; verify that from the repository state before you block. Do not block based on older edits from earlier turns when the immediately previous turn did not itself make direct edits. diff --git a/tests/fake-codex-fixture.mjs b/tests/fake-codex-fixture.mjs index debcadce..62a23a51 100644 --- a/tests/fake-codex-fixture.mjs +++ b/tests/fake-codex-fixture.mjs @@ -217,7 +217,7 @@ function structuredReviewPayload(prompt) { } function taskPayload(prompt, resume) { - if (prompt.includes("") && prompt.includes("Only review the work from the previous Claude turn.")) { + if (prompt.includes("") && prompt.includes("Only review the work from the previous Antigravity turn.")) { if (BEHAVIOR === "adversarial-clean") { return "ALLOW: No blocking issues found in the previous turn."; } diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 96ba4f8f..288d3a1d 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -1841,7 +1841,7 @@ test("stop hook runs a stop-time review task and blocks on findings when the rev const fakeState = JSON.parse(fs.readFileSync(fakeStatePath, "utf8")); assert.match(fakeState.lastTurnStart.prompt, //i); assert.match(fakeState.lastTurnStart.prompt, //i); - assert.match(fakeState.lastTurnStart.prompt, /Only review the work from the previous Claude turn/i); + assert.match(fakeState.lastTurnStart.prompt, /Only review the work from the previous Antigravity turn/i); assert.match(fakeState.lastTurnStart.prompt, /Previous Antigravity response:/); assert.match(fakeState.lastTurnStart.prompt, /I completed the refactor and updated the retry logic\./); diff --git a/tests/tests/fake-codex-fixture.mjs b/tests/tests/fake-codex-fixture.mjs index 71c0ae2a..213c53d7 100644 --- a/tests/tests/fake-codex-fixture.mjs +++ b/tests/tests/fake-codex-fixture.mjs @@ -217,7 +217,7 @@ function structuredReviewPayload(prompt) { } function taskPayload(prompt, resume) { - if (prompt.includes("") && (prompt.includes("Only review the work from the previous Claude turn.") || prompt.includes("Only review the work from the previous Antigravity turn."))) { + if (prompt.includes("") && prompt.includes("Only review the work from the previous Antigravity turn.")) { if (BEHAVIOR === "adversarial-clean") { return "ALLOW: No blocking issues found in the previous turn."; } diff --git a/tests/tests/runtime.test.mjs b/tests/tests/runtime.test.mjs index eede33cb..6fb00781 100644 --- a/tests/tests/runtime.test.mjs +++ b/tests/tests/runtime.test.mjs @@ -1857,7 +1857,7 @@ test("stop hook runs a stop-time review task and blocks on findings when the rev const fakeState = JSON.parse(fs.readFileSync(fakeStatePath, "utf8")); assert.match(fakeState.lastTurnStart.prompt, //i); assert.match(fakeState.lastTurnStart.prompt, //i); - assert.match(fakeState.lastTurnStart.prompt, /Only review the work from the previous (Claude|Antigravity) turn/i); + assert.match(fakeState.lastTurnStart.prompt, /Only review the work from the previous Antigravity turn/i); assert.match(fakeState.lastTurnStart.prompt, /Previous Antigravity response:/); assert.match(fakeState.lastTurnStart.prompt, /I completed the refactor and updated the retry logic\./);