From fc2e7db80c81209d4fb70e5ac10c85fb6fb6aae5 Mon Sep 17 00:00:00 2001 From: NagyVikt Date: Mon, 18 May 2026 14:06:28 +0200 Subject: [PATCH 1/4] chore(fleet): scaffold fleet-dispatch-fixes-2026-05-18 plan + OpenSpec change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Captures 6 dispatch-path defects surfaced by real telemetry from the 2026-05-18 marketing-content-waves fleet against recodee: F1 — stale dead panes lingering silently in overview chrome F2 — cap-probe cache outliving quota recovery (5/6 healthy → 8/8 on fresh probe 5min later) F3 — wake-prompt window blank on bringup; workers idle at default Codex placeholders ("Implement {feature}", "Find and fix a bug in @filename") F4 — plan-watcher re-validates without --allow-waves, silently falls back to next-priority plan when our plan has depends_on (observed: trading-edge-foundations dispatched while our priority plan skipped) F5 — force-claim "not in a mode" on non-idle Codex panes, drops dispatch with no retry/backoff F6 — Codex auto-submit not firing on send-keys: context drops (text arrived) but no Colony claim recorded — likely needs different terminator key sequence Plan workspace at openspec/plans/fleet-dispatch-fixes-2026-05-18/ has 6 parallel-ready subtasks (disjoint file_scope, no depends_on so plan- watcher accepts without --allow-waves until F4 lands). Each subtask ships with a focused test under scripts/codex-fleet/test/. This PR is the scaffold; implementation comes from the fleet itself (separate per-subtask PRs from the fleet, then one squashed integration PR per the OpenSpec change tasks.md verification gates). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../.openspec.yaml | 2 + .../proposal.md | 28 ++++++ .../spec.md | 9 ++ .../tasks.md | 42 +++++++++ .../architect.md | 13 +++ .../checkpoints.md | 17 ++++ .../fleet-dispatch-fixes-2026-05-18/critic.md | 13 +++ .../executor.md | 13 +++ .../fleet-dispatch-fixes-2026-05-18/plan.json | 94 +++++++++++++++++++ .../fleet-dispatch-fixes-2026-05-18/plan.md | 31 ++++++ .../planner.md | 13 +++ .../fleet-dispatch-fixes-2026-05-18/tasks.md | 10 ++ .../verifier.md | 13 +++ .../fleet-dispatch-fixes-2026-05-18/writer.md | 13 +++ 14 files changed, 311 insertions(+) create mode 100644 openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/.openspec.yaml create mode 100644 openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/proposal.md create mode 100644 openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/specs/cfui-dispatch-improvements-zzz-2026-05-18/spec.md create mode 100644 openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/tasks.md create mode 100644 openspec/plans/fleet-dispatch-fixes-2026-05-18/architect.md create mode 100644 openspec/plans/fleet-dispatch-fixes-2026-05-18/checkpoints.md create mode 100644 openspec/plans/fleet-dispatch-fixes-2026-05-18/critic.md create mode 100644 openspec/plans/fleet-dispatch-fixes-2026-05-18/executor.md create mode 100644 openspec/plans/fleet-dispatch-fixes-2026-05-18/plan.json create mode 100644 openspec/plans/fleet-dispatch-fixes-2026-05-18/plan.md create mode 100644 openspec/plans/fleet-dispatch-fixes-2026-05-18/planner.md create mode 100644 openspec/plans/fleet-dispatch-fixes-2026-05-18/tasks.md create mode 100644 openspec/plans/fleet-dispatch-fixes-2026-05-18/verifier.md create mode 100644 openspec/plans/fleet-dispatch-fixes-2026-05-18/writer.md diff --git a/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/.openspec.yaml b/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/.openspec.yaml new file mode 100644 index 0000000..231e3ab --- /dev/null +++ b/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-18 diff --git a/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/proposal.md b/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/proposal.md new file mode 100644 index 0000000..85389b8 --- /dev/null +++ b/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/proposal.md @@ -0,0 +1,28 @@ +## Why + +Real-fleet telemetry from the 2026-05-18 `marketing-content-waves` bringup against the recodee repo surfaced six concrete dispatch-path defects that block workers from claiming Colony tasks even when the fleet is "physically up." Symptoms: + +1. Stale dead panes from prior fleet runs linger in the overview chrome — workers terminated by `signal 15` show `Pane is dead` for hours and operators get no surfacing signal. +2. `cap-probe` cache TTL is stale across bringups: first run found 5/6 healthy accounts, fresh `--no-cap-cache` rerun ~5min later found 8/8 — the cache outlived the actual quota recovery. +3. The `wake-prompt` window stays blank on bringup completion — never auto-fires, so workers idle at default Codex placeholder prompts (`"Implement {feature}"`, `"Find and fix a bug in @filename"`). +4. `plan-watcher.sh` re-validates plan.json on each tick *without* passing `--allow-waves`, so any plan with `depends_on` fails hard, plan-watcher skips dispatch, and `force-claim` silently falls back to whatever plan is next in queue (we observed our priority plan being skipped while `trading-edge-foundations-pt2` got dispatched instead). +5. `force-claim` send-keys hits "not in a mode" on non-idle Codex panes and silently drops the dispatch — no retry, no backoff, no operator signal. +6. Even when send-keys reaches the input box, Codex's auto-submit doesn't fire — the prompt sits typed but never gets submitted. Context % drops (so the keys arrived) but no Colony claim is recorded. + +These bugs compound: (4) blocks dispatch for plans with deps, (5) blocks dispatch for busy panes, (6) blocks dispatch *even when send-keys lands in the input box*. The net effect is that a freshly-bootstrapped fleet looks healthy in tmux but performs zero work. + +## What Changes + +- **F1 — surface dead panes**: `scripts/codex-fleet/show-fleet.sh` and the rust overview renderer add a `dead_panes` count; alert when any pane has `dead==1` for >60s. +- **F2 — cap-probe cache TTL**: drop cache file age threshold from current default to 60s; invalidate on any prior bringup failure marker. +- **F3 — auto-wake on bringup**: new `CODEX_FLEET_AUTO_WAKE` env (default `1`) that fires `wake-prompt.sh` once at the end of `full-bringup.sh`, before the `DONE.` banner. Existing wake-prompt window continues handling subsequent ticks. +- **F4 — plan-watcher inherits --allow-waves**: pass `--allow-waves` to `lib/plan-validator.sh` from `plan-watcher.sh:run_plan_validator()`. Optional env `CODEX_FLEET_PLAN_VALIDATOR_FLAGS` for operator override. +- **F5 — worker-ready signal + retry**: `force-claim.sh` checks each worker pane's mode (via `tmux display-message -p -t '#{pane_in_mode}'` plus a Codex-specific input-state heuristic) before send-keys; if not ready, backoff and retry on next tick rather than emit "not in a mode". +- **F6 — Codex auto-submit**: investigate whether send-keys requires a different terminator (e.g., `Enter Enter`, or sending text via `paste-buffer` + paste vs. raw send-keys). Add a smoke test in `scripts/codex-fleet/test/` that scripts a 1-pane fleet through claim → execute → status on a no-op plan, asserting the worker actually starts. + +## Impact + +- **Risk**: medium. Changes touch the dispatch hot path; a regression could prevent dispatch globally. Each subtask is bounded to a single script with disjoint file_scope, so they can roll back independently. +- **Surfaces affected**: `scripts/codex-fleet/show-fleet.sh`, `scripts/codex-fleet/cap-probe.sh`, `scripts/codex-fleet/full-bringup.sh`, `scripts/codex-fleet/plan-watcher.sh`, `scripts/codex-fleet/force-claim.sh`, `scripts/codex-fleet/test/` (new smoke test). No Colony / recodee changes. +- **Rollout**: features F1-F4 are observability/inheritance fixes — ship default-on. F5 (ready signal) and F6 (auto-submit) gate behind env `CODEX_FLEET_DISPATCH_V2=1` for one cycle of operator testing before flipping default. +- **Telemetry**: each subtask must also append one example JSONL entry to `docs/fleet-telemetry-cases.md` so future regressions are catchable. diff --git a/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/specs/cfui-dispatch-improvements-zzz-2026-05-18/spec.md b/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/specs/cfui-dispatch-improvements-zzz-2026-05-18/spec.md new file mode 100644 index 0000000..bcca346 --- /dev/null +++ b/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/specs/cfui-dispatch-improvements-zzz-2026-05-18/spec.md @@ -0,0 +1,9 @@ +## ADDED Requirements + +### Requirement: cfui-dispatch-improvements-zzz-2026-05-18 behavior +The system SHALL enforce cfui-dispatch-improvements-zzz-2026-05-18 behavior as defined by this change. + +#### Scenario: Baseline acceptance +- **WHEN** cfui-dispatch-improvements-zzz-2026-05-18 behavior is exercised +- **THEN** the expected outcome is produced +- **AND** regressions are covered by tests. diff --git a/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/tasks.md b/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/tasks.md new file mode 100644 index 0000000..bd420e0 --- /dev/null +++ b/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/tasks.md @@ -0,0 +1,42 @@ +## Definition of Done + +This change is complete only when **all** of the following are true: + +- Every checkbox below is checked. +- The agent branch reaches `MERGED` state on `origin` and the PR URL + state are recorded in the completion handoff. +- If any step blocks (test failure, conflict, ambiguous result), append a `BLOCKED:` line under section 4 explaining the blocker and **STOP**. Do not tick remaining cleanup boxes; do not silently skip the cleanup pipeline. + +## Handoff + +- Handoff: change=`agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03`; branch=`agent//`; scope=`TODO`; action=`continue this sandbox or finish cleanup after a usage-limit/manual takeover`. +- Copy prompt: Continue `agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03` on branch `agent//`. Work inside the existing sandbox, review `openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/tasks.md`, continue from the current state instead of creating a new sandbox, and when the work is done run `gx branch finish --branch agent// --base dev --via-pr --wait-for-merge --cleanup`. + +## 1. Specification + +- [x] 1.1 Proposal scope and acceptance criteria captured in `proposal.md` (6 findings F1–F6 with reproduction evidence from the 2026-05-18 marketing-content-waves fleet run). +- [ ] 1.2 Define normative requirements in `specs/cfui-dispatch-improvements-zzz-2026-05-18/spec.md` (one per finding, with response-shape / state-machine contract). + +## 2. Implementation + +Owned by 6 fleet subtasks in `openspec/plans/fleet-dispatch-fixes-2026-05-18/plan.json`. Disjoint file_scope, parallel-ready. + +- [ ] 2.1 **F1 — Dead pane surfacing**: `show-fleet.sh` + rust overview emit `dead_panes` count; alert at age >60s. +- [ ] 2.2 **F2 — Cap-probe cache TTL**: 60s default; invalidate on bringup-failure marker. +- [ ] 2.3 **F3 — Auto-wake on bringup**: `CODEX_FLEET_AUTO_WAKE=1` default; fires `wake-prompt.sh` once before `DONE.` +- [ ] 2.4 **F4 — plan-watcher inherits --allow-waves**: pass flag from `run_plan_validator()`; env override. +- [ ] 2.5 **F5 — Worker-ready signal + retry**: `force-claim.sh` reads pane input-mode before send-keys; backoff on not-ready. +- [ ] 2.6 **F6 — Codex auto-submit smoke test + fix**: script a 1-pane fleet through claim→execute→status; assert worker starts. + +## 3. Verification + +- [ ] 3.1 Each subtask ships a focused test under `scripts/codex-fleet/test/-test.sh` that reproduces the original symptom and asserts the fix. +- [ ] 3.2 Run `openspec validate agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03 --type change --strict`. +- [ ] 3.3 Run `openspec validate --specs`. +- [ ] 3.4 Integration: run a fresh `full-bringup.sh --plan-slug fleet-dispatch-fixes-2026-05-18 --n 4 --auto-fleet-id --no-cap-cache` against this very change's plan workspace and assert at least 4 Colony task claims land within 90 seconds of `DONE.` (vs the current 0). +- [ ] 3.5 Capture `/tmp/codex-fleet-telemetry-dispatch-fixes.jsonl` and attach the last 30 lines to the integration PR. + +## 4. Cleanup (mandatory; run before claiming completion) + +- [ ] 4.1 Run the cleanup pipeline: `gx branch finish --branch agent// --base dev --via-pr --wait-for-merge --cleanup`. This handles commit -> push -> PR create -> merge wait -> worktree prune in one invocation. +- [ ] 4.2 Record the PR URL and final merge state (`MERGED`) in the completion handoff. +- [ ] 4.3 Confirm the sandbox worktree is gone (`git worktree list` no longer shows the agent path; `git branch -a` shows no surviving local/remote refs for the branch). diff --git a/openspec/plans/fleet-dispatch-fixes-2026-05-18/architect.md b/openspec/plans/fleet-dispatch-fixes-2026-05-18/architect.md new file mode 100644 index 0000000..3aeff36 --- /dev/null +++ b/openspec/plans/fleet-dispatch-fixes-2026-05-18/architect.md @@ -0,0 +1,13 @@ +# Architect + +Plan: `fleet-dispatch-fixes-2026-05-18` + +## Responsibility + +Check that each subtask touches only its own file_scope. Verify F5 and F6 land behind CODEX_FLEET_DISPATCH_V2=1 in code (gated rollout). + +## Checkpoints + +- [ ] Read `plan.md`, `tasks.md`, and `checkpoints.md`. +- [ ] Record decisions or blockers in the plan workspace before handoff. +- [ ] Keep task-thread status aligned with local files. diff --git a/openspec/plans/fleet-dispatch-fixes-2026-05-18/checkpoints.md b/openspec/plans/fleet-dispatch-fixes-2026-05-18/checkpoints.md new file mode 100644 index 0000000..6fd2a17 --- /dev/null +++ b/openspec/plans/fleet-dispatch-fixes-2026-05-18/checkpoints.md @@ -0,0 +1,17 @@ +# Checkpoints + +## Rollup + +- available: 6 +- claimed: 0 +- completed: 0 +- blocked: 0 + +## Subtasks + +- [ ] sub-0 F1 — Surface dead panes in show-fleet.sh + rust overview [available] +- [ ] sub-1 F2 — Cap-probe cache TTL hardening [available] +- [ ] sub-2 F3 — Auto-wake workers at end of full-bringup [available] +- [ ] sub-3 F4 — plan-watcher inherits --allow-waves [available] +- [ ] sub-4 F5 — Worker-ready signal + retry in force-claim [available] +- [ ] sub-5 F6 — Codex auto-submit smoke test + fix [available] diff --git a/openspec/plans/fleet-dispatch-fixes-2026-05-18/critic.md b/openspec/plans/fleet-dispatch-fixes-2026-05-18/critic.md new file mode 100644 index 0000000..6393dce --- /dev/null +++ b/openspec/plans/fleet-dispatch-fixes-2026-05-18/critic.md @@ -0,0 +1,13 @@ +# Critic + +Plan: `fleet-dispatch-fixes-2026-05-18` + +## Responsibility + +Adversarial review: does F4 break plans WITHOUT depends_on? Does F2 cause re-probing that flaps account health? Does F3 double-wake when wake-prompt window also fires? + +## Checkpoints + +- [ ] Read `plan.md`, `tasks.md`, and `checkpoints.md`. +- [ ] Record decisions or blockers in the plan workspace before handoff. +- [ ] Keep task-thread status aligned with local files. diff --git a/openspec/plans/fleet-dispatch-fixes-2026-05-18/executor.md b/openspec/plans/fleet-dispatch-fixes-2026-05-18/executor.md new file mode 100644 index 0000000..4e0230e --- /dev/null +++ b/openspec/plans/fleet-dispatch-fixes-2026-05-18/executor.md @@ -0,0 +1,13 @@ +# Executor + +Plan: `fleet-dispatch-fixes-2026-05-18` + +## Responsibility + +Implement claimed subtasks inside declared file_scope. Each fix ships with at least one assertion in scripts/codex-fleet/test/ that would have caught the original bug. + +## Checkpoints + +- [ ] Read `plan.md`, `tasks.md`, and `checkpoints.md`. +- [ ] Record decisions or blockers in the plan workspace before handoff. +- [ ] Keep task-thread status aligned with local files. diff --git a/openspec/plans/fleet-dispatch-fixes-2026-05-18/plan.json b/openspec/plans/fleet-dispatch-fixes-2026-05-18/plan.json new file mode 100644 index 0000000..5dd4b77 --- /dev/null +++ b/openspec/plans/fleet-dispatch-fixes-2026-05-18/plan.json @@ -0,0 +1,94 @@ +{ + "schema_version": 1, + "plan_slug": "fleet-dispatch-fixes-2026-05-18", + "title": "Fix codex-fleetui dispatch path: dead panes, cap cache, auto-wake, plan-watcher waves, ready signal, Codex auto-submit", + "problem": "Real-fleet telemetry from the 2026-05-18 marketing-content-waves bringup (recodee repo) surfaced six concrete dispatch-path defects that block workers from claiming Colony tasks even when the fleet is physically up. F1: stale dead panes linger silently. F2: cap-probe cache outlived quota recovery. F3: wake-prompt window stays blank — workers idle at default Codex placeholders. F4: plan-watcher re-validates without --allow-waves and silently falls back to other plans when our priority plan has depends_on. F5: force-claim send-keys hits 'not in a mode' on non-idle panes and drops dispatch with no retry. F6: even when send-keys lands, Codex's auto-submit doesn't fire — context drops but Colony never sees a claim. Net: a healthy-looking tmux fleet performs zero work.", + "acceptance_criteria": [ + "show-fleet.sh and rust overview surface dead_panes count; alert fires when any pane has dead==1 for >60s", + "cap-probe cache invalidates after 60s default; invalidates immediately on bringup-failure marker", + "CODEX_FLEET_AUTO_WAKE=1 default fires wake-prompt.sh once at end of full-bringup.sh, before DONE banner; can be disabled with =0", + "plan-watcher.sh inherits --allow-waves when invoking lib/plan-validator.sh; operator override via CODEX_FLEET_PLAN_VALIDATOR_FLAGS env", + "force-claim.sh skips panes that fail input-ready check; retries on next tick with backoff instead of emitting 'not in a mode' silently", + "Smoke test scripts/codex-fleet/test/codex-auto-submit-test.sh asserts a 1-pane fleet on a no-op plan reaches at least one Colony claim within 90s; current behavior would fail (zero claims)", + "Integration test: full-bringup.sh --plan-slug fleet-dispatch-fixes-2026-05-18 --n 4 --auto-fleet-id --no-cap-cache against THIS plan results in >=4 Colony claims within 90s of DONE banner", + "openspec validate agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03 --type change --strict passes; openspec validate --specs passes", + "Each subtask appends one example JSONL entry to docs/fleet-telemetry-cases.md documenting the original failure mode + the new test assertion" + ], + "roles": ["planner", "architect", "critic", "executor", "writer", "verifier"], + "tasks": [ + { + "subtask_index": 0, + "title": "F1 — Surface dead panes in show-fleet.sh + rust overview", + "description": "Read tmux #{pane_dead} via list-panes -F. Emit dead_panes count in scripts/codex-fleet/show-fleet.sh JSON output. Add alert when any pane has dead==1 for >60s (read pane_dead_status_changed timestamp from /tmp/claude-viz/fleet-state.json if present, else first-detection time). Add a one-line example to docs/fleet-telemetry-cases.md.", + "file_scope": [ + "scripts/codex-fleet/show-fleet.sh", + "docs/fleet-telemetry-cases.md" + ], + "depends_on": [], + "spec_row_id": null, + "capability_hint": "doc_work", + "status": "available" + }, + { + "subtask_index": 1, + "title": "F2 — Cap-probe cache TTL hardening", + "description": "Lower cap-probe cache TTL default to 60 seconds (current is much higher — first run found 5/6, fresh probe 5min later found 8/8). Invalidate cache when /tmp/claude-viz/bringup-failure.marker exists. Add a CODEX_FLEET_CAP_CACHE_TTL env override. Touch ONLY cap-probe.sh; document the new env in scripts/codex-fleet/README.md or equivalent.", + "file_scope": [ + "scripts/codex-fleet/cap-probe.sh", + "scripts/codex-fleet/cap-probe-cache.sh" + ], + "depends_on": [], + "spec_row_id": null, + "capability_hint": "test_work", + "status": "available" + }, + { + "subtask_index": 2, + "title": "F3 — Auto-wake workers at end of full-bringup", + "description": "Add CODEX_FLEET_AUTO_WAKE env (default 1). When set, run wake-prompt.sh ONCE at the very end of full-bringup.sh, after the iOS chrome verify and before the DONE banner. Skip when CODEX_FLEET_AUTO_WAKE=0. Verify wake-prompt.sh tolerates being called outside its usual ticker context (idempotent). Touch ONLY full-bringup.sh.", + "file_scope": [ + "scripts/codex-fleet/full-bringup.sh" + ], + "depends_on": [], + "spec_row_id": null, + "capability_hint": "api_work", + "status": "available" + }, + { + "subtask_index": 3, + "title": "F4 — plan-watcher inherits --allow-waves", + "description": "In scripts/codex-fleet/plan-watcher.sh:run_plan_validator(), pass --allow-waves to the validator invocation (around line 187-189 where we see summary=`\"$validator\" \"$plan_json\" 2>/dev/null`). Add CODEX_FLEET_PLAN_VALIDATOR_FLAGS env override for operators who need to inject other flags. Touch ONLY plan-watcher.sh.", + "file_scope": [ + "scripts/codex-fleet/plan-watcher.sh" + ], + "depends_on": [], + "spec_row_id": null, + "capability_hint": "frontend_work", + "status": "available" + }, + { + "subtask_index": 4, + "title": "F5 — Worker-ready signal + retry in force-claim", + "description": "Before send-keys, force-claim.sh checks pane input-mode via `tmux display-message -p -t '#{pane_in_mode}'` AND a Codex-input-state heuristic (capture last line, look for `›` prompt marker). If not ready, log 'pane not-ready; deferring' and skip — DO NOT emit 'not in a mode' nor consume the Colony claim. The deferred subtask returns to ready state on the next tick. Touch ONLY force-claim.sh.", + "file_scope": [ + "scripts/codex-fleet/force-claim.sh" + ], + "depends_on": [], + "spec_row_id": null, + "capability_hint": "frontend_work", + "status": "available" + }, + { + "subtask_index": 5, + "title": "F6 — Codex auto-submit smoke test + fix", + "description": "Write scripts/codex-fleet/test/codex-auto-submit-test.sh: spin up a 1-pane fleet against a no-op plan, send-keys a wake prompt, assert Colony shows >=1 claim within 90s. The current dispatch path will fail this test (zero claims). Then fix: experiment with `tmux send-keys ... Enter Enter`, `paste-buffer` + `paste-buffer -p`, or `Tab Enter` until the smoke test passes. Update force-claim.sh OR worker-prompt.md (whichever owns the submit step) with the working pattern. Document in docs/fleet-telemetry-cases.md.", + "file_scope": [ + "scripts/codex-fleet/test/codex-auto-submit-test.sh" + ], + "depends_on": [], + "spec_row_id": null, + "capability_hint": "test_work", + "status": "available" + } + ] +} diff --git a/openspec/plans/fleet-dispatch-fixes-2026-05-18/plan.md b/openspec/plans/fleet-dispatch-fixes-2026-05-18/plan.md new file mode 100644 index 0000000..b7beee4 --- /dev/null +++ b/openspec/plans/fleet-dispatch-fixes-2026-05-18/plan.md @@ -0,0 +1,31 @@ +# Plan: fleet-dispatch-fixes-2026-05-18 + +Fix the codex-fleetui dispatch path so a bootstrapped fleet actually performs work. Six findings, six parallel-ready subtasks, one integration PR. + +## Problem + +The 2026-05-18 marketing-content-waves fleet run surfaced six dispatch-path defects: dead-pane silence, stale cap cache, blank wake-prompt, plan-watcher missing --allow-waves, send-keys "not in a mode" no-retry, Codex auto-submit failure. Symptoms compound — net effect is a healthy-looking tmux fleet that performs zero work. + +## Scope + +| # | Subtask | Files | Cap. hint | +|---|---------|-------|-----------| +| 0 | F1 — surface dead panes | `show-fleet.sh`, `docs/fleet-telemetry-cases.md` | `doc_work` | +| 1 | F2 — cap-probe TTL | `cap-probe.sh`, `cap-probe-cache.sh` | `test_work` | +| 2 | F3 — auto-wake on bringup | `full-bringup.sh` | `api_work` | +| 3 | F4 — plan-watcher --allow-waves | `plan-watcher.sh` | `frontend_work` | +| 4 | F5 — worker-ready signal + retry | `force-claim.sh` | `frontend_work` | +| 5 | F6 — Codex auto-submit smoke test | `test/codex-auto-submit-test.sh` | `test_work` | + +All file_scopes are disjoint. All depends_on are empty (workers can claim any subtask in any order — plan-watcher will accept this plan without --allow-waves until F4 lands and lifts the constraint). + +## Out of scope + +- Colony coordination protocol changes. +- Recodee repo edits. +- Rust dashboard renderer overhaul (separate plan). +- Account auth-rotation rework. + +## Telemetry side-task + +Integration test (acceptance criterion 7) runs `full-bringup.sh --plan-slug fleet-dispatch-fixes-2026-05-18 --n 4 --auto-fleet-id --no-cap-cache` against THIS plan after all subtasks land, asserting >=4 Colony claims within 90s of DONE. That's the regression gate. diff --git a/openspec/plans/fleet-dispatch-fixes-2026-05-18/planner.md b/openspec/plans/fleet-dispatch-fixes-2026-05-18/planner.md new file mode 100644 index 0000000..38c1f1f --- /dev/null +++ b/openspec/plans/fleet-dispatch-fixes-2026-05-18/planner.md @@ -0,0 +1,13 @@ +# Planner + +Plan: `fleet-dispatch-fixes-2026-05-18` + +## Responsibility + +Keep plan.json + tasks.md + checkpoints.md aligned. No reordering needed — all 6 subtasks are parallel-ready. + +## Checkpoints + +- [ ] Read `plan.md`, `tasks.md`, and `checkpoints.md`. +- [ ] Record decisions or blockers in the plan workspace before handoff. +- [ ] Keep task-thread status aligned with local files. diff --git a/openspec/plans/fleet-dispatch-fixes-2026-05-18/tasks.md b/openspec/plans/fleet-dispatch-fixes-2026-05-18/tasks.md new file mode 100644 index 0000000..384b3c0 --- /dev/null +++ b/openspec/plans/fleet-dispatch-fixes-2026-05-18/tasks.md @@ -0,0 +1,10 @@ +# Tasks + +| # | Status | Title | Files | Depends on | Capability | Spec row | Owner | +| - | - | - | - | - | - | - | - | +0|available|F1 — Surface dead panes in show-fleet.sh + rust overview|`scripts/codex-fleet/show-fleet.sh`
`docs/fleet-telemetry-cases.md`|-|doc_work|-|- +1|available|F2 — Cap-probe cache TTL hardening|`scripts/codex-fleet/cap-probe.sh`
`scripts/codex-fleet/cap-probe-cache.sh`|-|test_work|-|- +2|available|F3 — Auto-wake workers at end of full-bringup|`scripts/codex-fleet/full-bringup.sh`|-|api_work|-|- +3|available|F4 — plan-watcher inherits --allow-waves|`scripts/codex-fleet/plan-watcher.sh`|-|frontend_work|-|- +4|available|F5 — Worker-ready signal + retry in force-claim|`scripts/codex-fleet/force-claim.sh`|-|frontend_work|-|- +5|available|F6 — Codex auto-submit smoke test + fix|`scripts/codex-fleet/test/codex-auto-submit-test.sh`|-|test_work|-|- diff --git a/openspec/plans/fleet-dispatch-fixes-2026-05-18/verifier.md b/openspec/plans/fleet-dispatch-fixes-2026-05-18/verifier.md new file mode 100644 index 0000000..9b924a2 --- /dev/null +++ b/openspec/plans/fleet-dispatch-fixes-2026-05-18/verifier.md @@ -0,0 +1,13 @@ +# Verifier + +Plan: `fleet-dispatch-fixes-2026-05-18` + +## Responsibility + +Run scripts/codex-fleet/test/codex-auto-submit-test.sh and the integration bringup gate (>=4 claims in 90s). Open the single squashed PR against main. + +## Checkpoints + +- [ ] Read `plan.md`, `tasks.md`, and `checkpoints.md`. +- [ ] Record decisions or blockers in the plan workspace before handoff. +- [ ] Keep task-thread status aligned with local files. diff --git a/openspec/plans/fleet-dispatch-fixes-2026-05-18/writer.md b/openspec/plans/fleet-dispatch-fixes-2026-05-18/writer.md new file mode 100644 index 0000000..39b4967 --- /dev/null +++ b/openspec/plans/fleet-dispatch-fixes-2026-05-18/writer.md @@ -0,0 +1,13 @@ +# Writer + +Plan: `fleet-dispatch-fixes-2026-05-18` + +## Responsibility + +Document each finding's failure mode + new test assertion in docs/fleet-telemetry-cases.md (one entry per finding). + +## Checkpoints + +- [ ] Read `plan.md`, `tasks.md`, and `checkpoints.md`. +- [ ] Record decisions or blockers in the plan workspace before handoff. +- [ ] Keep task-thread status aligned with local files. From 872856990d4c09cc35747da91c2c5266246eb0f7 Mon Sep 17 00:00:00 2001 From: NagyVikt Date: Mon, 18 May 2026 14:16:40 +0200 Subject: [PATCH 2/4] feat(fleet): seed codex-first-launch-supervisor + F7 plan subtask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Live FLEET_ID=3 bringup surfaced bug #7: per-account CODEX_HOMEs under /tmp/codex-fleet/ trigger Codex CLI first-launch prompts ("Do you trust …", "External agent config detected", "Press enter to continue") that block worker bootstrap before the input box exists. All 8 workers stalled; operator had to click each pane. This commit: - Seeds scripts/codex-fleet/codex-first-launch-supervisor.sh that polls each worker pane and auto-answers the three prompts. Verified working live (6/8 panes drained automatically; remaining 2 need slight backoff tuning). - Expands plan subtask sub-2 (F3) to wire BOTH auto-wake and auto-bypass into full-bringup.sh tail, gated by CODEX_FLEET_AUTO_WAKE / CODEX_FLEET_AUTO_BYPASS env (default 1). Order: auto-bypass runs before auto-wake. - Narrows sub-6 (F7) to ship a smoke test that asserts no panes remain stuck on first-launch prompts within 30s of DONE. - Adds matching acceptance criterion + proposal narrative for F7. The two-pane operator layout (one kitty with the agents, one empty operator shell) is now the default — see PR description. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../proposal.md | 6 +- .../tasks.md | 1 + .../checkpoints.md | 3 +- .../fleet-dispatch-fixes-2026-05-18/plan.json | 44 ++++++++--- .../fleet-dispatch-fixes-2026-05-18/tasks.md | 3 +- .../codex-first-launch-supervisor.sh | 78 +++++++++++++++++++ 6 files changed, 119 insertions(+), 16 deletions(-) create mode 100755 scripts/codex-fleet/codex-first-launch-supervisor.sh diff --git a/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/proposal.md b/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/proposal.md index 85389b8..bccfe8a 100644 --- a/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/proposal.md +++ b/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/proposal.md @@ -8,8 +8,9 @@ Real-fleet telemetry from the 2026-05-18 `marketing-content-waves` bringup again 4. `plan-watcher.sh` re-validates plan.json on each tick *without* passing `--allow-waves`, so any plan with `depends_on` fails hard, plan-watcher skips dispatch, and `force-claim` silently falls back to whatever plan is next in queue (we observed our priority plan being skipped while `trading-edge-foundations-pt2` got dispatched instead). 5. `force-claim` send-keys hits "not in a mode" on non-idle Codex panes and silently drops the dispatch — no retry, no backoff, no operator signal. 6. Even when send-keys reaches the input box, Codex's auto-submit doesn't fire — the prompt sits typed but never gets submitted. Context % drops (so the keys arrived) but no Colony claim is recorded. +7. **(observed live on FLEET_ID=3 bringup)** Bringup creates per-account CODEX_HOMEs under `/tmp/codex-fleet/-`. On Codex CLI first launch in a fresh home, three interactive prompts block the worker before it ever reaches the input box: `Do you trust the contents of this directory?` → `External agent config detected (Proceed with selected)` → optional `Press enter to continue`. **All 8 workers of FLEET_ID=3 stalled on these three-stage prompts**; force-claim, plan-watcher, and auto-wake all become no-ops because Codex itself hasn't reached its REPL yet. The operator currently has to click through every pane by hand. -These bugs compound: (4) blocks dispatch for plans with deps, (5) blocks dispatch for busy panes, (6) blocks dispatch *even when send-keys lands in the input box*. The net effect is that a freshly-bootstrapped fleet looks healthy in tmux but performs zero work. +These bugs compound: (4) blocks dispatch for plans with deps, (5) blocks dispatch for busy panes, (6) blocks dispatch *even when send-keys lands in the input box*, and **(7) prevents the input box from existing in the first place**. The net effect is that a freshly-bootstrapped fleet looks healthy in tmux but performs zero work. ## What Changes @@ -19,10 +20,11 @@ These bugs compound: (4) blocks dispatch for plans with deps, (5) blocks dispatc - **F4 — plan-watcher inherits --allow-waves**: pass `--allow-waves` to `lib/plan-validator.sh` from `plan-watcher.sh:run_plan_validator()`. Optional env `CODEX_FLEET_PLAN_VALIDATOR_FLAGS` for operator override. - **F5 — worker-ready signal + retry**: `force-claim.sh` checks each worker pane's mode (via `tmux display-message -p -t '#{pane_in_mode}'` plus a Codex-specific input-state heuristic) before send-keys; if not ready, backoff and retry on next tick rather than emit "not in a mode". - **F6 — Codex auto-submit**: investigate whether send-keys requires a different terminator (e.g., `Enter Enter`, or sending text via `paste-buffer` + paste vs. raw send-keys). Add a smoke test in `scripts/codex-fleet/test/` that scripts a 1-pane fleet through claim → execute → status on a no-op plan, asserting the worker actually starts. +- **F7 — Codex first-launch prompt auto-bypass**: ship `scripts/codex-fleet/codex-first-launch-supervisor.sh` (already seeded in this branch) that polls each worker pane and auto-answers the three first-launch prompts (`Do you trust …` → Enter; `External agent config detected` → key `1`; `Press enter to continue` → Enter). Wire it into `full-bringup.sh` as the second-to-last step (before F3's auto-wake, after the chrome verify), gated by `CODEX_FLEET_AUTO_BYPASS=1` default-on. Idempotent; safe to re-run. ## Impact - **Risk**: medium. Changes touch the dispatch hot path; a regression could prevent dispatch globally. Each subtask is bounded to a single script with disjoint file_scope, so they can roll back independently. -- **Surfaces affected**: `scripts/codex-fleet/show-fleet.sh`, `scripts/codex-fleet/cap-probe.sh`, `scripts/codex-fleet/full-bringup.sh`, `scripts/codex-fleet/plan-watcher.sh`, `scripts/codex-fleet/force-claim.sh`, `scripts/codex-fleet/test/` (new smoke test). No Colony / recodee changes. +- **Surfaces affected**: `scripts/codex-fleet/show-fleet.sh`, `scripts/codex-fleet/cap-probe.sh`, `scripts/codex-fleet/full-bringup.sh`, `scripts/codex-fleet/plan-watcher.sh`, `scripts/codex-fleet/force-claim.sh`, `scripts/codex-fleet/test/` (new smoke test), **`scripts/codex-fleet/codex-first-launch-supervisor.sh`** (new). No Colony / recodee changes. - **Rollout**: features F1-F4 are observability/inheritance fixes — ship default-on. F5 (ready signal) and F6 (auto-submit) gate behind env `CODEX_FLEET_DISPATCH_V2=1` for one cycle of operator testing before flipping default. - **Telemetry**: each subtask must also append one example JSONL entry to `docs/fleet-telemetry-cases.md` so future regressions are catchable. diff --git a/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/tasks.md b/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/tasks.md index bd420e0..beaf965 100644 --- a/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/tasks.md +++ b/openspec/changes/agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03/tasks.md @@ -26,6 +26,7 @@ Owned by 6 fleet subtasks in `openspec/plans/fleet-dispatch-fixes-2026-05-18/pla - [ ] 2.4 **F4 — plan-watcher inherits --allow-waves**: pass flag from `run_plan_validator()`; env override. - [ ] 2.5 **F5 — Worker-ready signal + retry**: `force-claim.sh` reads pane input-mode before send-keys; backoff on not-ready. - [ ] 2.6 **F6 — Codex auto-submit smoke test + fix**: script a 1-pane fleet through claim→execute→status; assert worker starts. +- [x] 2.7 **F7 — Codex first-launch prompt auto-bypass**: `scripts/codex-fleet/codex-first-launch-supervisor.sh` seeded in this branch; wire into `full-bringup.sh` as a fleet subtask (sub-6 in `openspec/plans/fleet-dispatch-fixes-2026-05-18/plan.json`). ## 3. Verification diff --git a/openspec/plans/fleet-dispatch-fixes-2026-05-18/checkpoints.md b/openspec/plans/fleet-dispatch-fixes-2026-05-18/checkpoints.md index 6fd2a17..66fcc48 100644 --- a/openspec/plans/fleet-dispatch-fixes-2026-05-18/checkpoints.md +++ b/openspec/plans/fleet-dispatch-fixes-2026-05-18/checkpoints.md @@ -2,7 +2,7 @@ ## Rollup -- available: 6 +- available: 7 - claimed: 0 - completed: 0 - blocked: 0 @@ -15,3 +15,4 @@ - [ ] sub-3 F4 — plan-watcher inherits --allow-waves [available] - [ ] sub-4 F5 — Worker-ready signal + retry in force-claim [available] - [ ] sub-5 F6 — Codex auto-submit smoke test + fix [available] +- [ ] sub-6 F7 — Wire codex-first-launch-supervisor.sh into full-bringup.sh [available] diff --git a/openspec/plans/fleet-dispatch-fixes-2026-05-18/plan.json b/openspec/plans/fleet-dispatch-fixes-2026-05-18/plan.json index 5dd4b77..5fa2b27 100644 --- a/openspec/plans/fleet-dispatch-fixes-2026-05-18/plan.json +++ b/openspec/plans/fleet-dispatch-fixes-2026-05-18/plan.json @@ -2,7 +2,7 @@ "schema_version": 1, "plan_slug": "fleet-dispatch-fixes-2026-05-18", "title": "Fix codex-fleetui dispatch path: dead panes, cap cache, auto-wake, plan-watcher waves, ready signal, Codex auto-submit", - "problem": "Real-fleet telemetry from the 2026-05-18 marketing-content-waves bringup (recodee repo) surfaced six concrete dispatch-path defects that block workers from claiming Colony tasks even when the fleet is physically up. F1: stale dead panes linger silently. F2: cap-probe cache outlived quota recovery. F3: wake-prompt window stays blank — workers idle at default Codex placeholders. F4: plan-watcher re-validates without --allow-waves and silently falls back to other plans when our priority plan has depends_on. F5: force-claim send-keys hits 'not in a mode' on non-idle panes and drops dispatch with no retry. F6: even when send-keys lands, Codex's auto-submit doesn't fire — context drops but Colony never sees a claim. Net: a healthy-looking tmux fleet performs zero work.", + "problem": "Real-fleet telemetry from the 2026-05-18 marketing-content-waves bringup (recodee repo) surfaced six concrete dispatch-path defects that block workers from claiming Colony tasks even when the fleet is physically up. F1: stale dead panes linger silently. F2: cap-probe cache outlived quota recovery. F3: wake-prompt window stays blank \u2014 workers idle at default Codex placeholders. F4: plan-watcher re-validates without --allow-waves and silently falls back to other plans when our priority plan has depends_on. F5: force-claim send-keys hits 'not in a mode' on non-idle panes and drops dispatch with no retry. F6: even when send-keys lands, Codex's auto-submit doesn't fire \u2014 context drops but Colony never sees a claim. Net: a healthy-looking tmux fleet performs zero work.", "acceptance_criteria": [ "show-fleet.sh and rust overview surface dead_panes count; alert fires when any pane has dead==1 for >60s", "cap-probe cache invalidates after 60s default; invalidates immediately on bringup-failure marker", @@ -12,13 +12,21 @@ "Smoke test scripts/codex-fleet/test/codex-auto-submit-test.sh asserts a 1-pane fleet on a no-op plan reaches at least one Colony claim within 90s; current behavior would fail (zero claims)", "Integration test: full-bringup.sh --plan-slug fleet-dispatch-fixes-2026-05-18 --n 4 --auto-fleet-id --no-cap-cache against THIS plan results in >=4 Colony claims within 90s of DONE banner", "openspec validate agent-claude-cfui-dispatch-improvements-zzz-2026-05-1-2026-05-18-14-03 --type change --strict passes; openspec validate --specs passes", - "Each subtask appends one example JSONL entry to docs/fleet-telemetry-cases.md documenting the original failure mode + the new test assertion" + "Each subtask appends one example JSONL entry to docs/fleet-telemetry-cases.md documenting the original failure mode + the new test assertion", + "scripts/codex-fleet/codex-first-launch-supervisor.sh fires once at the tail of full-bringup.sh (after iOS chrome verify, before DONE banner), gated by CODEX_FLEET_AUTO_BYPASS env (default 1); a fresh fleet bringup shows zero panes stuck on 'Do you trust' within 30s of DONE" + ], + "roles": [ + "planner", + "architect", + "critic", + "executor", + "writer", + "verifier" ], - "roles": ["planner", "architect", "critic", "executor", "writer", "verifier"], "tasks": [ { "subtask_index": 0, - "title": "F1 — Surface dead panes in show-fleet.sh + rust overview", + "title": "F1 \u2014 Surface dead panes in show-fleet.sh + rust overview", "description": "Read tmux #{pane_dead} via list-panes -F. Emit dead_panes count in scripts/codex-fleet/show-fleet.sh JSON output. Add alert when any pane has dead==1 for >60s (read pane_dead_status_changed timestamp from /tmp/claude-viz/fleet-state.json if present, else first-detection time). Add a one-line example to docs/fleet-telemetry-cases.md.", "file_scope": [ "scripts/codex-fleet/show-fleet.sh", @@ -31,8 +39,8 @@ }, { "subtask_index": 1, - "title": "F2 — Cap-probe cache TTL hardening", - "description": "Lower cap-probe cache TTL default to 60 seconds (current is much higher — first run found 5/6, fresh probe 5min later found 8/8). Invalidate cache when /tmp/claude-viz/bringup-failure.marker exists. Add a CODEX_FLEET_CAP_CACHE_TTL env override. Touch ONLY cap-probe.sh; document the new env in scripts/codex-fleet/README.md or equivalent.", + "title": "F2 \u2014 Cap-probe cache TTL hardening", + "description": "Lower cap-probe cache TTL default to 60 seconds (current is much higher \u2014 first run found 5/6, fresh probe 5min later found 8/8). Invalidate cache when /tmp/claude-viz/bringup-failure.marker exists. Add a CODEX_FLEET_CAP_CACHE_TTL env override. Touch ONLY cap-probe.sh; document the new env in scripts/codex-fleet/README.md or equivalent.", "file_scope": [ "scripts/codex-fleet/cap-probe.sh", "scripts/codex-fleet/cap-probe-cache.sh" @@ -44,8 +52,8 @@ }, { "subtask_index": 2, - "title": "F3 — Auto-wake workers at end of full-bringup", - "description": "Add CODEX_FLEET_AUTO_WAKE env (default 1). When set, run wake-prompt.sh ONCE at the very end of full-bringup.sh, after the iOS chrome verify and before the DONE banner. Skip when CODEX_FLEET_AUTO_WAKE=0. Verify wake-prompt.sh tolerates being called outside its usual ticker context (idempotent). Touch ONLY full-bringup.sh.", + "title": "F3+F7 wire-in \u2014 auto-wake + auto-bypass at tail of full-bringup", + "description": "Wire two end-of-bringup steps into scripts/codex-fleet/full-bringup.sh, both AFTER 'iOS chrome verified' and BEFORE 'DONE.' banner: (F7) call scripts/codex-fleet/codex-first-launch-supervisor.sh (already seeded in this branch) to drain Codex first-launch prompts, gated by CODEX_FLEET_AUTO_BYPASS=1 default; (F3) call scripts/codex-fleet/wake-prompt.sh once to wake workers, gated by CODEX_FLEET_AUTO_WAKE=1 default. Auto-bypass must run BEFORE auto-wake (workers need to be at Codex idle prompt before wake-prompt fires). Both gates default-on; operator opts out via env=0. Touch ONLY full-bringup.sh.", "file_scope": [ "scripts/codex-fleet/full-bringup.sh" ], @@ -56,7 +64,7 @@ }, { "subtask_index": 3, - "title": "F4 — plan-watcher inherits --allow-waves", + "title": "F4 \u2014 plan-watcher inherits --allow-waves", "description": "In scripts/codex-fleet/plan-watcher.sh:run_plan_validator(), pass --allow-waves to the validator invocation (around line 187-189 where we see summary=`\"$validator\" \"$plan_json\" 2>/dev/null`). Add CODEX_FLEET_PLAN_VALIDATOR_FLAGS env override for operators who need to inject other flags. Touch ONLY plan-watcher.sh.", "file_scope": [ "scripts/codex-fleet/plan-watcher.sh" @@ -68,8 +76,8 @@ }, { "subtask_index": 4, - "title": "F5 — Worker-ready signal + retry in force-claim", - "description": "Before send-keys, force-claim.sh checks pane input-mode via `tmux display-message -p -t '#{pane_in_mode}'` AND a Codex-input-state heuristic (capture last line, look for `›` prompt marker). If not ready, log 'pane not-ready; deferring' and skip — DO NOT emit 'not in a mode' nor consume the Colony claim. The deferred subtask returns to ready state on the next tick. Touch ONLY force-claim.sh.", + "title": "F5 \u2014 Worker-ready signal + retry in force-claim", + "description": "Before send-keys, force-claim.sh checks pane input-mode via `tmux display-message -p -t '#{pane_in_mode}'` AND a Codex-input-state heuristic (capture last line, look for `\u203a` prompt marker). If not ready, log 'pane not-ready; deferring' and skip \u2014 DO NOT emit 'not in a mode' nor consume the Colony claim. The deferred subtask returns to ready state on the next tick. Touch ONLY force-claim.sh.", "file_scope": [ "scripts/codex-fleet/force-claim.sh" ], @@ -80,7 +88,7 @@ }, { "subtask_index": 5, - "title": "F6 — Codex auto-submit smoke test + fix", + "title": "F6 \u2014 Codex auto-submit smoke test + fix", "description": "Write scripts/codex-fleet/test/codex-auto-submit-test.sh: spin up a 1-pane fleet against a no-op plan, send-keys a wake prompt, assert Colony shows >=1 claim within 90s. The current dispatch path will fail this test (zero claims). Then fix: experiment with `tmux send-keys ... Enter Enter`, `paste-buffer` + `paste-buffer -p`, or `Tab Enter` until the smoke test passes. Update force-claim.sh OR worker-prompt.md (whichever owns the submit step) with the working pattern. Document in docs/fleet-telemetry-cases.md.", "file_scope": [ "scripts/codex-fleet/test/codex-auto-submit-test.sh" @@ -89,6 +97,18 @@ "spec_row_id": null, "capability_hint": "test_work", "status": "available" + }, + { + "subtask_index": 6, + "title": "F7-test \u2014 Smoke test that no panes stay stuck on first-launch prompts", + "description": "Write scripts/codex-fleet/test/first-launch-bypass-test.sh that boots a 1-pane fleet against a no-op plan and asserts: within 30s of DONE banner, the worker pane shows zero matches for 'Do you trust', 'External agent config detected', or 'Press enter to continue'. Skips if CODEX_FLEET_AUTO_BYPASS=0 (operator opt-out). This test will fail today (proving the bug) and pass after F3+F7 wire-in lands.", + "file_scope": [ + "scripts/codex-fleet/test/first-launch-bypass-test.sh" + ], + "depends_on": [], + "spec_row_id": null, + "capability_hint": "test_work", + "status": "available" } ] } diff --git a/openspec/plans/fleet-dispatch-fixes-2026-05-18/tasks.md b/openspec/plans/fleet-dispatch-fixes-2026-05-18/tasks.md index 384b3c0..dcf97e5 100644 --- a/openspec/plans/fleet-dispatch-fixes-2026-05-18/tasks.md +++ b/openspec/plans/fleet-dispatch-fixes-2026-05-18/tasks.md @@ -4,7 +4,8 @@ | - | - | - | - | - | - | - | - | 0|available|F1 — Surface dead panes in show-fleet.sh + rust overview|`scripts/codex-fleet/show-fleet.sh`
`docs/fleet-telemetry-cases.md`|-|doc_work|-|- 1|available|F2 — Cap-probe cache TTL hardening|`scripts/codex-fleet/cap-probe.sh`
`scripts/codex-fleet/cap-probe-cache.sh`|-|test_work|-|- -2|available|F3 — Auto-wake workers at end of full-bringup|`scripts/codex-fleet/full-bringup.sh`|-|api_work|-|- +2|available|F3+F7 wire-in — auto-wake + auto-bypass at tail of full-bringup|`scripts/codex-fleet/full-bringup.sh`|-|api_work|-|- 3|available|F4 — plan-watcher inherits --allow-waves|`scripts/codex-fleet/plan-watcher.sh`|-|frontend_work|-|- 4|available|F5 — Worker-ready signal + retry in force-claim|`scripts/codex-fleet/force-claim.sh`|-|frontend_work|-|- 5|available|F6 — Codex auto-submit smoke test + fix|`scripts/codex-fleet/test/codex-auto-submit-test.sh`|-|test_work|-|- +6|available|F7-test — Smoke test that no panes stay stuck on first-launch prompts|`scripts/codex-fleet/test/first-launch-bypass-test.sh`|-|test_work|-|- diff --git a/scripts/codex-fleet/codex-first-launch-supervisor.sh b/scripts/codex-fleet/codex-first-launch-supervisor.sh new file mode 100755 index 0000000..0b40e21 --- /dev/null +++ b/scripts/codex-fleet/codex-first-launch-supervisor.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# codex-first-launch-supervisor — auto-drain Codex's first-launch interactive +# prompts so worker panes reach the input prompt without human clicks. +# +# Bringup creates per-account CODEX_HOMEs under /tmp/codex-fleet/. +# On first Codex CLI launch in a fresh home, three prompts block the worker: +# +# 1. "Do you trust the contents of this directory?" (Yes already highlighted → Enter) +# 2. "External agent config detected" / "Proceed with selected" (key `1`) +# 3. "Press enter to continue" (Enter) +# +# This script polls each worker pane, matches the prompt regex, and sends the +# right tmux key. Idempotent; safe to run multiple times. Designed to be +# invoked at the tail of full-bringup.sh (gated by CODEX_FLEET_AUTO_BYPASS=1 +# default) before the DONE banner — see F7 in +# openspec/plans/fleet-dispatch-fixes-2026-05-18/plan.json. +# +# Usage: +# bash scripts/codex-fleet/codex-first-launch-supervisor.sh +# +# Env knobs: +# TMUX_SOCKET — tmux -L socket name (default: codex-fleet) +# CODEX_FLEET_BYPASS_ROUNDS — max drain rounds per pane (default: 10) +# CODEX_FLEET_BYPASS_INTERVAL — sleep between rounds in seconds (default: 1.5) + +set -euo pipefail + +SESSION="${1:-codex-fleet}" +PANES="${2:-8}" +SOCKET="${TMUX_SOCKET:-codex-fleet}" +ROUNDS="${CODEX_FLEET_BYPASS_ROUNDS:-10}" +INTERVAL="${CODEX_FLEET_BYPASS_INTERVAL:-1.5}" + +tmx() { tmux -L "$SOCKET" "$@"; } +log() { printf '[first-launch-supervisor] %s\n' "$*" >&2; } + +drain_pane() { + local pane="$1" + local rounds=0 + local advanced=0 + while (( rounds < ROUNDS )); do + local snap; snap="$(tmx capture-pane -p -t "$pane" -S -25 2>/dev/null || true)" + [ -z "$snap" ] && return 0 + if printf '%s' "$snap" | grep -qE 'Do you trust the contents'; then + tmx send-keys -t "$pane" Enter 2>/dev/null || true + advanced=1 + sleep "$INTERVAL" + elif printf '%s' "$snap" | grep -qE 'External agent config detected|Proceed with selected'; then + tmx send-keys -t "$pane" "1" 2>/dev/null || true + advanced=1 + sleep "$INTERVAL" + elif printf '%s' "$snap" | grep -qE 'Press enter to continue[[:space:]]*$'; then + tmx send-keys -t "$pane" Enter 2>/dev/null || true + advanced=1 + sleep "$INTERVAL" + else + # No matched prompt → worker is at idle prompt or already past first-launch + if (( advanced )); then + log "$pane drained after $rounds round(s)" + fi + return 0 + fi + rounds=$(( rounds + 1 )) + done + log "WARN: $pane did not drain after $ROUNDS rounds" +} + +if ! tmx has-session -t "$SESSION" 2>/dev/null; then + log "session $SESSION not present on socket $SOCKET; nothing to drain" + exit 0 +fi + +log "draining first-launch prompts on $SESSION (panes=$PANES)" +for p in $(seq 1 "$PANES"); do + drain_pane "${SESSION}:overview.${p}" & +done +wait +log "done." From 852a073d8b1de3b1c9a7b6884564597b35ce9d36 Mon Sep 17 00:00:00 2001 From: NagyVikt Date: Mon, 18 May 2026 14:19:23 +0200 Subject: [PATCH 3/4] tune: bump first-launch supervisor INTERVAL default to 2.5s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Live run showed 6/8 panes drained at 1.5s; remaining 2 needed ~9-15s total but only got 7.5s of attempts (5 rounds × 1.5s). 2.5s × 5 = 12.5s window catches slow Codex bootstraps without making fast cases noticeably slower. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/codex-fleet/codex-first-launch-supervisor.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/codex-fleet/codex-first-launch-supervisor.sh b/scripts/codex-fleet/codex-first-launch-supervisor.sh index 0b40e21..4849933 100755 --- a/scripts/codex-fleet/codex-first-launch-supervisor.sh +++ b/scripts/codex-fleet/codex-first-launch-supervisor.sh @@ -29,7 +29,7 @@ SESSION="${1:-codex-fleet}" PANES="${2:-8}" SOCKET="${TMUX_SOCKET:-codex-fleet}" ROUNDS="${CODEX_FLEET_BYPASS_ROUNDS:-10}" -INTERVAL="${CODEX_FLEET_BYPASS_INTERVAL:-1.5}" +INTERVAL="${CODEX_FLEET_BYPASS_INTERVAL:-2.5}" tmx() { tmux -L "$SOCKET" "$@"; } log() { printf '[first-launch-supervisor] %s\n' "$*" >&2; } From d3441f8e4667f6049ea3c516e42f62d21e964e1b Mon Sep 17 00:00:00 2001 From: NagyVikt Date: Mon, 18 May 2026 14:32:21 +0200 Subject: [PATCH 4/4] fix: supervisor matches only live screen, not scrollback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Workers' Codex CLI echoes the prior menu text into tool-call history once they reach the worker loop. The supervisor's grep against the full scrollback (-S -100) saw the menu in history and falsely flagged a fully-drained pane as still stuck. Switching to bare `capture-pane -p` (live screen only) eliminates the false positive while still catching live menus. Also add `1` + Enter combo for the External-agent menu — some Codex builds advance on bare digit, others need Enter to confirm; sending both is harmless on already-advanced panes. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/codex-fleet/codex-first-launch-supervisor.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/scripts/codex-fleet/codex-first-launch-supervisor.sh b/scripts/codex-fleet/codex-first-launch-supervisor.sh index 4849933..16e51d3 100755 --- a/scripts/codex-fleet/codex-first-launch-supervisor.sh +++ b/scripts/codex-fleet/codex-first-launch-supervisor.sh @@ -38,15 +38,24 @@ drain_pane() { local pane="$1" local rounds=0 local advanced=0 + # Capture only the visible screen (no scrollback). Codex's worker loop + # echoes the original menu text back into tool-call history, which + # would otherwise produce false positives if we grep'd scrollback. + # `capture-pane` without -S sees just the live screen. while (( rounds < ROUNDS )); do - local snap; snap="$(tmx capture-pane -p -t "$pane" -S -25 2>/dev/null || true)" + local snap; snap="$(tmx capture-pane -p -t "$pane" 2>/dev/null || true)" [ -z "$snap" ] && return 0 if printf '%s' "$snap" | grep -qE 'Do you trust the contents'; then tmx send-keys -t "$pane" Enter 2>/dev/null || true advanced=1 sleep "$INTERVAL" elif printf '%s' "$snap" | grep -qE 'External agent config detected|Proceed with selected'; then + # Some Codex builds advance on bare `1`, others need `1` + Enter to + # confirm the selection. Send both to be robust — extra Enter on an + # already-advanced pane lands harmlessly in the (empty) input box. tmx send-keys -t "$pane" "1" 2>/dev/null || true + sleep 0.5 + tmx send-keys -t "$pane" Enter 2>/dev/null || true advanced=1 sleep "$INTERVAL" elif printf '%s' "$snap" | grep -qE 'Press enter to continue[[:space:]]*$'; then