From 864ff26ef329ce783d0a1e1717e3b52f4727bb45 Mon Sep 17 00:00:00 2001 From: QuentinCody <33259999+QuentinCody@users.noreply.github.com> Date: Wed, 13 May 2026 12:57:27 -0400 Subject: [PATCH 1/6] fix: pin dependencies and stabilize CI --- package-lock.json | 16 ++++----- package.json | 16 ++++----- src/commands/__tests__/verify.test.ts | 50 +++++++++++++++------------ src/harness/signatures.ts | 10 +++--- 4 files changed, 49 insertions(+), 43 deletions(-) diff --git a/package-lock.json b/package-lock.json index 531e8bf..e4f6d0c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,20 +13,20 @@ "linux" ], "dependencies": { - "commander": "^12.0.0" + "commander": "12.1.0" }, "bin": { "interlinked": "dist/index.js", "interlinked-hook": "dist/hook-entry.js" }, "devDependencies": { - "@biomejs/biome": "^2.4.13", - "@types/node": "^20.0.0", - "fast-check": "^4.7.0", - "tsup": "^8.0.0", - "tsx": "^4.0.0", - "typescript": "^5.5.0", - "vitest": "^3.0.0" + "@biomejs/biome": "2.4.13", + "@types/node": "20.19.33", + "fast-check": "4.7.0", + "tsup": "8.5.1", + "tsx": "4.21.0", + "typescript": "5.9.3", + "vitest": "3.2.4" }, "engines": { "node": ">=22.0.0" diff --git a/package.json b/package.json index 28f91a0..b538b45 100644 --- a/package.json +++ b/package.json @@ -82,18 +82,18 @@ }, "packageManager": "npm@10.0.0", "dependencies": { - "commander": "^12.0.0" + "commander": "12.1.0" }, "optionalDependencies": { "@typescript/native-preview": "7.0.0-dev.20260421.2" }, "devDependencies": { - "@biomejs/biome": "^2.4.13", - "@types/node": "^20.0.0", - "fast-check": "^4.7.0", - "tsup": "^8.0.0", - "tsx": "^4.0.0", - "typescript": "^5.5.0", - "vitest": "^3.0.0" + "@biomejs/biome": "2.4.13", + "@types/node": "20.19.33", + "fast-check": "4.7.0", + "tsup": "8.5.1", + "tsx": "4.21.0", + "typescript": "5.9.3", + "vitest": "3.2.4" } } diff --git a/src/commands/__tests__/verify.test.ts b/src/commands/__tests__/verify.test.ts index e4ad46e..59448c0 100644 --- a/src/commands/__tests__/verify.test.ts +++ b/src/commands/__tests__/verify.test.ts @@ -141,31 +141,35 @@ describe("scored suggestions", () => { }); describe("suppression detection", () => { - it("ignores suppression markers that only appear inside string literals", async () => { - const { verifyCommand } = await import("../verify.js"); + it( + "ignores suppression markers that only appear inside string literals", + async () => { + const { verifyCommand } = await import("../verify.js"); - // Build the literal token at runtime so this test file's own source - // doesn't contain a raw "@ts-expect-error" — the suppressions check would - // (correctly) nag every edit if it did. The fixture file written below - // still contains the literal token, which is the point of the test. - const tsIgnore = `@ts-${"ignore"}`; - writeFileSync( - join(tempDir, "fixture.ts"), - [ - "export function buildFixture() {", - ` const code = "// ${tsIgnore}\\nconst x = 1;";`, - ` return code.includes("${tsIgnore}");`, - "}", - "", - ].join("\n"), - ); + // Build the literal token at runtime so this test file's own source + // doesn't contain a raw "@ts-expect-error" — the suppressions check would + // (correctly) nag every edit if it did. The fixture file written below + // still contains the literal token, which is the point of the test. + const tsIgnore = `@ts-${"ignore"}`; + writeFileSync( + join(tempDir, "fixture.ts"), + [ + "export function buildFixture() {", + ` const code = "// ${tsIgnore}\\nconst x = 1;";`, + ` return code.includes("${tsIgnore}");`, + "}", + "", + ].join("\n"), + ); - const captured = await captureStd(async () => { - await verifyCommand({ target: tempDir, json: true }); - }); - const result = JSON.parse(captured.stdout); - expect(result.suppressions.issues).toBe(0); - }); + const captured = await captureStd(async () => { + await verifyCommand({ target: tempDir, json: true }); + }); + const result = JSON.parse(captured.stdout); + expect(result.suppressions.issues).toBe(0); + }, + 60_000, + ); }); // Pins the invariant for the tail "X / Y files flagged" summary: diff --git a/src/harness/signatures.ts b/src/harness/signatures.ts index f921253..9403b05 100644 --- a/src/harness/signatures.ts +++ b/src/harness/signatures.ts @@ -521,7 +521,7 @@ const SECRETS_RULES: SignatureRule[] = [ category: "secrets_detection", severity: "critical", description: "GitLab Personal Access Token", - patterns: [/\bglpat-[A-Za-z0-9_-]{20}\b/], + patterns: [/(? Date: Thu, 14 May 2026 12:24:27 -0400 Subject: [PATCH 2/6] docs: metacoding agent design plan (v2 with six review iterations) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-prompt metacoder that runs synchronously on UserPromptSubmit before the coding agent's first tool call. Emits a session-scoped overlay of guard rules (action: "block" only, tighten-only against the floor) plus an optional system_prompt_addendum. Same-tier model as the coding agent: Opus 4.7 max for Claude, GPT-5.5 xhigh for Codex. Both transports use the user's CLI subscription via `claude -p` / `codex exec` — no API keys required. The doc captures the locked-in decisions (model tier, hard-block enforcement, replace semantics across multi-prompt sessions), the floor/overlay invariant (action constraint + regex validation + ReDoS guards + append-after ordering), recursion-guard mechanics, hook timeout coordination, and the per-runner rollout contract. Six rounds of external review folded in at §reviewer-P1..P5 markers. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/design/metacoding-agent-plan.md | 828 +++++++++++++++++++++++++++ 1 file changed, 828 insertions(+) create mode 100644 docs/design/metacoding-agent-plan.md diff --git a/docs/design/metacoding-agent-plan.md b/docs/design/metacoding-agent-plan.md new file mode 100644 index 0000000..643658b --- /dev/null +++ b/docs/design/metacoding-agent-plan.md @@ -0,0 +1,828 @@ +# Metacoding Agent — Implementation Plan (v2) + +**Status:** Proposed — v2 after follow-up reviewer pass. Awaiting reviewer signoff before implementation begins. +**Author:** Q. Cody, drafted via Claude Code session 2026-05-13. Revised same-session in response to reviewer findings. +**Scope:** v1 prototype for **Claude Code and Codex CLI only**. Cursor / Copilot / Gemini support deferred (see §9). + +--- + +## Changes from v1 + +Reviewer flagged five issues in v1; all five are folded in. Two open +questions are now resolved. A follow-up v2 pass flagged six more plan +precision issues; those are folded in below as well. + +| Change | Location | Origin | +|---|---|---| +| Scope narrowed to Claude Code + Codex only | Throughout | User scoping decision | +| Hook adapter path `defaultTimeoutForPhase` raises `user-prompt` to 35s; metacoder internal timeout remains 30s | §2.4, §3, §6 | Reviewer #1 (High) + v2 review #4 | +| Overlay rules constrained to `action: "block"` only; floor rules iterate before overlay | §2.3, §5 | Reviewer #2 (High) | +| Regex validation on every overlay regex (length cap, flag whitelist, try/catch, ReDoS shape reject) | §2.3, §5 | Reviewer #3 (High) | +| Codex adapter's `encodeCodexAllow` emits `hookSpecificOutput.additionalContext` for UserPromptSubmit (not stderr) | §3, §7 | Reviewer #4 (Medium) | +| Metacoder receives `scanResult.redacted` when the PII scanner finds content; otherwise receives `event.prompt` directly because no redaction was needed | §6 | Reviewer #5 (Medium) + v2 review #6 | +| Subprocess recursion guard via `INTERLINKED_METACODER_SUBPROCESS=1` env sentinel, carried through `UnifiedHookEvent` and `toLegacyHarnessEvent` | §2.5, §3 | Reviewer open Q2 + v2 review #2 | +| Codex `Stop` vs Claude `SessionEnd` / `Stop` cleanup parity documented and tested with correct phase names | §7, §8.2 | Reviewer open Q1 + v2 review #5 | +| Multi-prompt sessions explicitly replace prior overlays instead of merging | §1.1, §8.9 | v2 review #1 | +| Overlay v1 drops `extra_exceptions` / `additional_patterns`; exceptions use `negate: true` rule patterns | §2.3, §5, §6 | v2 review #3 | + +--- + +## 1. Concept + +On every `UserPromptSubmit` (i.e. every time the user sends a message to a +coding agent), a **metacoder** LLM call runs synchronously *before* the +coding agent's first tool call: + +1. Reads the user's prompt (PII-scanner-redacted when the scanner finds + content; otherwise unchanged, see §6) +2. Reads `AGENTS.md` / `CLAUDE.md` project instructions +3. Reads cached codebase context (existing project graph / structural + cache maintained by the harness — no fresh research in v1) +4. Emits a **session-scoped overlay** that constrains the coding agent + for the lifetime of that session: + - `.interlinked/sessions//overlay-rules.json` — additional + guard rules + - `.interlinked/sessions//system-prompt.md` — appended to + coding agent's context via hook stdout +5. The overlay is loaded by the harness **synchronously** so the very + next `PreToolUse` from the coding agent already evaluates against the + tighter ruleset. +6. On session end (`SessionEnd` for Claude, `Stop` for Codex), the + overlay directory is evicted. + +Net effect: hooks are **compiled output** of a per-prompt planner, not +hand-maintained source. The user's framing: "no one should have to *use* +hooks — they should be created and customized for each new prompt, by a +metacoding agent." + +### 1.1 Multi-prompt sessions + +`UserPromptSubmit` can fire multiple times inside one long-running agent +session. v1 uses **replace semantics**: + +- Each `UserPromptSubmit` fully replaces the previous overlay for that + `session_id` (rules and addendum). Overlays do not merge across prompts. +- `writeOverlayArtifacts` writes tmp files and renames them onto the same + `.interlinked/sessions//overlay-rules.json` and + `system-prompt.md` paths, overwriting the previous prompt's artifacts + atomically. +- `sessionRules.set(session_id, loadRules(cwd, session_id))` overwrites + the prior in-memory rules entry. After prompt B, prompt A's overlay + rules are no longer active. +- The new `system_prompt_addendum` is injected fresh on the new + `UserPromptSubmit`. Prior addenda can remain in the coding agent's + conversation history because the harness cannot delete already-injected + model context. That is a known runner limitation; the authoritative + enforced rules are always the latest overlay in `sessionRules`. + +Regression test: prompt A emits a rule blocking `src/legacy/payments/`; +prompt B asks to fix `src/legacy/payments/migrate.ts` and emits a +different overlay. Assert A's rule no longer blocks after B's +`UserPromptSubmit`, and B's addendum is the one returned to the hook. + +--- + +## 2. Decisions locked in + +These are non-default choices the user has explicitly made. Reviewers +should push back on any of these that look wrong. + +### 2.1 Metacoder model: same tier as the coding agent + +- **Claude Code session** (`agent_source === "claude"`): use **Opus 4.7 + with maximum reasoning effort**. Invoked via the existing `claude -p` + subprocess pattern (mirrors `policy-classifier.ts::callViaClaudeCode`). + Reuses the user's existing Claude Code subscription — no separate API + key required. +- **Codex session** (`agent_source === "codex"`): use **GPT-5.5 with + `model_reasoning_effort: "xhigh"`** (user's framing: "x-high"; the + CLI's actual config value is `xhigh`, one word — verified live). + Invoked via the `codex exec` subprocess pattern, which uses the + developer's existing Codex CLI subscription via `codex login`. **No + OpenAI API key required.** Symmetric with the Claude path: both + metacoder transports reuse the developer's existing CLI subscription + rather than billing a separate API account. +- **Cursor / Copilot / Gemini sessions:** out of scope for v1. The + metacoder is not invoked for those runners; the coding agent runs + against floor rules only. See §9 for the rationale. + +**Rationale (user):** "If the main coding agent is going to be +constrained by the metacoder's output, the metacoder shouldn't be a +dumber model that under-constrains." The metacoder is a peer of the +coding agent, not a cheap upstream filter. + +**Latency cost:** Opus 4.7 max-effort + Codex high are 5–30s per prompt. +This is visible to the user as a delay between hitting Enter and the +coding agent starting work. **This is intentional** — the user's design +explicitly accepts the latency in exchange for per-prompt tailoring of +the harness. + +**Cost cost:** Opus 4.7 max per prompt is in the range of $0.05–$0.30 +depending on prompt length, system prompt size, and thinking budget. +For heavy users this can be hundreds of dollars per month. Out of scope +for v1: per-budget tracking / monthly caps. Document as a known cost. + +### 2.2 Enforcement: hard-block + +When an overlay rule matches a `PreToolUse`, the harness **blocks** the +tool call with the rule's reason, identically to how built-in floor +rules block. Overlay rules participate in the same evaluator pipeline; +the only distinction is the rule's `source` provenance. + +**Rationale (user):** matches the design intent of "the harness should +constrain". Warn-only mode would let the coding agent route around +overlay rules just as it routes around AGENTS.md today, defeating the +purpose. + +**Failure mode this creates:** if the metacoder emits a wrong-shaped +constraint, the coding agent gets stuck in a worse local optimum until +session end. v1 mitigates this only via the floor/overlay invariant +(below) — the agent can always reach the floor's allowed actions. + +### 2.3 Floor / overlay invariant: tighten-only + +- **Floor** = built-in rules (105) + `.interlinked/guard-rules.json` + (team) + `.interlinked/guard-rules.local.json` (personal) + distilled + rules from `/enforce`. Hand-authored, immutable per session. +- **Overlay** = metacoder-emitted, session-scoped. Can only **ADD** + constraints to the floor. Never relaxes. + +Concrete enforcement (in `overlay-loader.ts`): + +| Loader behavior | Why | +|---|---| +| Reject any `disabled_rules` field in overlay | Disabling a floor rule = relaxing. | +| Reject any overlay rule whose `id` collides with a floor id | Replacing a floor rule = relaxing. | +| Require `id` prefix `overlay::` | Namespacing; prevents id squatting. | +| **Reject any rule whose `action` is not `"block"`** | Only `block` matches the "blocks exactly like floor rules" contract. `ask` / `soft_block` return early from the evaluator with weaker-than-block decisions (`pre-tool.ts:373, 395`); if iteration order put overlay first, the overlay would *relax* the effective decision. `rewrite` mutates input — too powerful for an LLM-emitted rule. `warn` doesn't return early but is informational, not constraining. | +| **Append overlay rules AFTER floor rules in the merged list** | Belt-and-suspenders so floor `block` always iterates before any overlay rule that matches the same input. With both the action constraint above and append-after, an overlay rule can only fire when no floor rule matched first. | +| Reject top-level `extra_exceptions` and `additional_patterns` fields entirely | These fields are not part of overlay v1. `extra_exceptions` is command-substring-only in the current matcher and would be misleading for file-path rules; `additional_patterns` is not a current `GuardRulesConfig` field. Overlay exceptions must be expressed as `negate: true` patterns inside the overlay rule itself. | +| Cap rule count (≤20 per overlay) | Defensive against a runaway LLM emitting 200 rules. | + +Rejected fields/rules are dropped with a `[interlinked:overlay]` stderr +warning. The rest of the overlay still loads. This is the same +fail-soft pattern used elsewhere in the loader. + +**Regex validation (overlay-only).** Floor rules are admin-authored and +trusted; `rule-matching.ts::getCachedRegex` does not validate input +because the existing rule corpus is hand-curated (explicit comment at +`rule-matching.ts:53–57`). LLM-emitted regexes break that assumption. +The overlay loader runs these checks at load time on every regex in +`patterns[].regex` and `active_when.file_scope`, and drops any rule +that fails: + +| Check | Reason | +|---|---| +| `pattern.length ≤ 200` chars | Bounds compilation time and complexity. | +| `rule.patterns.length ≤ 10` | Bounds per-rule evaluation cost. | +| `flags ∈ {"i", "m", "s", ""}` only | Reject `g`, `y` (stateful, break shared cache), `u` (Unicode rules) for predictable matching. | +| Wrap `new RegExp(pattern, flags)` in try/catch | Invalid regex throws on every PreToolUse otherwise; drop the rule with a warning. | +| Reject patterns containing nested unbounded quantifiers (`(a+)+`, `(a*)*`, `(a|a)*`) | Catastrophic backtracking (ReDoS) risk. Cheap structural check: `/\([^)]*[+*][^)]*\)[+*]/` catches the common cases. Not exhaustive — see §10 risk #9 — but sufficient for v1. | + +### 2.4 Synchronous before first tool call + +The metacoder is awaited inside the harness's `UserPromptSubmit` +handler. The hook script's socket call blocks until the handler returns. +By the time the coding agent's runtime resumes and issues its first tool +call, the overlay is already in the harness's in-memory per-session +rule cache. + +Implication: the harness does NOT rely on the 2-second `fs.watchFile` +polling path for per-session overlays. Polling is fine for floor rule +edits (team/local files); for session overlays we use in-memory. + +**Hook timeout amendment.** The adapter path at +`src/hook-entry.ts:30` defines `DEFAULT_HOOK_TIMEOUT_MS = 2000` and +`defaultTimeoutForPhase` at L215 returns it for every phase except +`pre-tool`. A 30s metacoder would be killed at 2s, fall through to +cold fallback, and never write the overlay. The plan adds a +`user-prompt` phase branch: + +```ts +const DEFAULT_USER_PROMPT_TIMEOUT_MS = 35_000; + +function defaultTimeoutForPhase(event: UnifiedHookEvent): number { + if (event.phase === PHASE_PRE_TOOL) return DEFAULT_LEGACY_PRE_TOOL_TIMEOUT_MS; + if (event.phase === PHASE_USER_PROMPT) return DEFAULT_USER_PROMPT_TIMEOUT_MS; + return DEFAULT_HOOK_TIMEOUT_MS; +} +``` + +The metacoder's internal timeout remains 30s. The hook timeout is 35s so +the harness has a 5s buffer to convert a clean metacoder timeout into an +`allow` decision instead of racing the hook's own timeout and producing a +spurious cold fallback. The legacy `.mjs` script has its own per-phase +timeouts; align it to the same 35s user-prompt hook budget. + +### 2.5 Subprocess recursion guard (new in v2) + +The metacoder spawns `claude -p` to call Opus 4.7. The subprocess +inherits the user's `.claude/settings.json` hooks → its first prompt +fires `UserPromptSubmit` → harness sees it → metacoder fires recursively +→ infinite loop. + +The existing `policy-classifier.ts::callViaClaudeCode` does not address +this because it runs on `PreToolUse`, not `UserPromptSubmit`. v1 +introduces a sentinel env var: + +- `metacoder-client.ts` sets `INTERLINKED_METACODER_SUBPROCESS=1` on + the spawned subprocess env, alongside the existing + `--disallowed-tools`, `--no-session-persistence`, etc. +- The hook script (both `hook-entry.ts` and the legacy `.mjs`) reads + this env at startup and, when set, forwards + `metacoder_subprocess: true` on the event envelope sent to the + harness socket. +- For the adapter path, `metacoder_subprocess?: boolean` is added to + `UnifiedHookEvent`; `hook-entry.ts` sets it before sending the RPC, and + `legacy-client.ts::toLegacyHarnessEvent` explicitly copies it through + to `HarnessEvent`. Without the `legacy-client.ts` copy, the framed + adapter path strips the sentinel before `server.ts` can see it. +- The harness's `UserPromptSubmit` branch short-circuits when set: + returns `{ decision: "allow" }` immediately, no metacoder call. + +This breaks the recursion at the earliest deterministic point. The same +env also short-circuits the activity-jsonl write (we don't need to log +the metacoder's own prompts as user activity). + +**Trust note:** the env var is set by the harness's own subprocess +spawn, not by the agent. An agent that controls its own env (rare) or +a compromised hook script could forge it, suppressing metacoder +evaluation. See §10 risk #8. + +--- + +## 3. File-by-file change list + +### New files + +| Path | Purpose | +|---|---| +| `src/harness/metacoder/types.ts` | `OverlayRulesFile`, `MetacoderInputContext`, `MetacoderOutcome`, `MetacoderConfig` types | +| `src/harness/metacoder/overlay-loader.ts` | Read overlay JSON, enforce tighten-only invariant (including action-block-only and regex validation), return validated rules + warnings | +| `src/harness/metacoder/regex-validator.ts` | The five checks from §2.3 as pure functions. Used by overlay-loader; testable in isolation. | +| `src/harness/metacoder/prompt-builder.ts` | Assemble `MetacoderInputContext` from `promptForMeta` (scanner-redacted when findings exist, see §6) + AGENTS.md/CLAUDE.md + floor rule ids + project graph summary. Caps total at ~20kB. | +| `src/harness/metacoder/metacoder-client.ts` | LLM call. Routes by `agent_source`. Claude → `claude -p` subprocess (Opus 4.7, high effort) with `INTERLINKED_METACODER_SUBPROCESS=1` env. Codex → OpenAI HTTP (GPT-5.5, high reasoning). Fail-open on all error modes. | +| `src/harness/metacoder/metacoder-writer.ts` | tmp-then-rename atomic writes to `.interlinked/sessions//`. Writes to stable per-session paths so each prompt overwrites the prior overlay artifacts atomically. Uses existing `sanitizeSessionId`. | +| `src/harness/metacoder/index.ts` | Barrel exporting `runMetacoderForPrompt(event, config, cwd, promptForMeta)` — single entry point called from `server.ts`. | +| `src/harness/__tests__/metacoder-overlay.test.ts` | Floor invariant + regex validation tests (see §8.1) | +| `src/harness/__tests__/metacoder-session-lifecycle.test.ts` | Claude `SessionEnd`, Claude `Stop`, and Codex `Stop` all evict overlay | +| `src/harness/__tests__/metacoder-multiprompt.test.ts` | Prompt B replaces prompt A overlay and returns B's addendum | +| `src/harness/__tests__/metacoder-multiclient.test.ts` | Claude + Codex envelopes both produce identical `MetacoderInputContext` | +| `src/harness/__tests__/metacoder-failure.test.ts` | Mocked client throws → fall back to floor; malformed JSON → no overlay merged; missing API key → skipped; recursion guard short-circuits | +| `src/harness/__tests__/metacoder-privacy.test.ts` | Redacted prompt is passed to metacoder when scanner fires | + +### Modified files + +| Path | Change | +|---|---| +| `src/harness/rules-loader.ts` | Extend `loadRules(cwd, sessionId?)` to optionally merge the current per-session overlay rules. Overlay rules appended AFTER floor in merged list. Existing call sites (no `sessionId`) unchanged. | +| `src/harness/server.ts` | (a) UserPromptSubmit handler at L903: short-circuit if `event.metacoder_subprocess === true`; otherwise after existing PII scan, await `runMetacoderForPrompt(event, config, CWD, promptForMeta)`, populate per-session rule cache, return `allow + additional_context`. (b) Add `sessionRules: Map` near L313 alongside existing `classifierSessions` / `autoCoordStates`. (c) In `SessionEnd` / `Stop` handler at L755: call `evictOverlayForSession(CWD, event.session_id)` and `sessionRules.delete(event.session_id)`. (d) Route evaluator calls to use `rulesForSession(session_id)` instead of global `rules` for PreToolUse. (e) Build `promptForMeta = scanResult ? scanResult.redacted : event.prompt`, so scanner-flagged raw spans do not leave the process. | +| `src/harness/types.ts` | Add `metacoder?: MetacoderConfig` to `GuardRulesConfig`. Add `metacoder_subprocess?: boolean` to `HarnessEvent`. | +| `src/harness/unified-event.ts` | Add `metacoder_subprocess?: boolean` to `UnifiedHookEvent` so the adapter path can carry the recursion sentinel through the framed RPC envelope. | +| `src/harness/legacy-client.ts` | In `toLegacyHarnessEvent`, copy `event.metacoder_subprocess === true` to `out.metacoder_subprocess = true`; otherwise the production framed path strips the sentinel before `server.ts`. | +| `src/hook-entry.ts` | (a) Add `PHASE_USER_PROMPT = "user-prompt"` constant and `DEFAULT_USER_PROMPT_TIMEOUT_MS = 35_000`. (b) Extend `defaultTimeoutForPhase` per §2.4. (c) Read `INTERLINKED_METACODER_SUBPROCESS` from `opts.env` and set `event.metacoder_subprocess = true` before sending the RPC when present. | +| `src/harness/adapters/codex.ts` | `encodeCodexAllow`: when `event.runner_native_event === "UserPromptSubmit"` and `decision.additional_context` is set, emit `{ hookSpecificOutput: { hookEventName: "UserPromptSubmit", additionalContext: decision.additional_context } }` on **stdout**, not stderr. Current behavior at L230–234 writes to stderr; change moves it to stdout's `hookSpecificOutput` channel to match Claude's contract (per `docs/hooks-ecosystem-comparison.md:81`). | +| `src/harness/adapters/claude-code.ts` | Verify `encodeDecision` emits `hookSpecificOutput.additionalContext` for UserPromptSubmit allow with a `decision.additional_context` field. If the current adapter doesn't already handle this, add the branch. (One-line check at implementation time.) | +| `src/lib/hook-template-chunks/provider-responses.ts` | Add `user_prompt_advice` response type to `formatClaudeResponse` and `formatCodexResponse` only. Drop Cursor/Copilot from this change set entirely. | +| `src/lib/hooks-template.ts` | In `UserPromptSubmit` branch of generated `.mjs`: (a) forward `INTERLINKED_METACODER_SUBPROCESS` env value to the harness socket payload. (b) After receiving harness decision, if `decision.additional_context` present, emit via `formatProviderResponse("user_prompt_advice", { summary: decision.additional_context })` to stdout. | + +No new hook-installer work needed. `UserPromptSubmit` is already wired +for Claude Code and Codex per `hook-installers.ts:36–50` and +`adapters/codex.ts:51–58`. + +--- + +## 4. Hook event flow + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ User types prompt. Agent (Claude / Codex) fires UserPromptSubmit │ +└─────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────┐ +│ Hook entry (src/hook-entry.ts): │ +│ 1. Detect adapter (Claude vs Codex) via env / runner arg │ +│ 2. Build UnifiedHookEvent via adapter.parseHookInput │ +│ 3. Forward INTERLINKED_METACODER_SUBPROCESS env onto event (if set) │ +│ 4. Send RPC to daemon with timeout = 35s (user-prompt phase) │ +│ 5. AWAIT response (blocks agent's prompt processing) │ +└─────────────────────────────────────────────────────────────────────────┘ + │ (Unix socket) + ▼ +┌─────────────────────────────────────────────────────────────────────────┐ +│ Harness server.ts UserPromptSubmit branch: │ +│ 1. SHORT-CIRCUIT: if event.metacoder_subprocess → return allow now │ +│ 2. (Existing) PII scan → redacted_prompt for activity.jsonl │ +│ 3. (NEW) runMetacoderForPrompt(event, config, CWD, promptForMeta): │ +│ a. buildMetacoderContext(promptForMeta, AGENTS.md, floor ids, ...) │ +│ — promptForMeta = scanResult ? scanResult.redacted : event.prompt │ +│ b. callMetacoder(ctx, config): │ +│ - Claude: spawn `claude -p` with │ +│ INTERLINKED_METACODER_SUBPROCESS=1 in env │ +│ - Codex: OpenAI HTTP │ +│ c. validate JSON against OverlayRulesFile schema │ +│ d. validate every regex (length, flags, try/catch, ReDoS shape) │ +│ e. reject rules with action !== "block" │ +│ f. writeOverlayArtifacts(cwd, session_id, overlay) │ +│ g. sessionRules.set(session_id, loadRules(cwd, session_id)) │ +│ (overlay rules APPENDED after floor) │ +│ 4. Return { decision: allow, redacted_prompt?, additional_context? } │ +│ where additional_context = overlay.system_prompt_addendum │ +└─────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────┐ +│ Adapter encodes decision to provider-specific stdout JSON: │ +│ - Claude: hookSpecificOutput.additionalContext = system_prompt │ +│ - Codex: hookSpecificOutput.additionalContext = system_prompt │ +│ (was stderr in current adapter — fixed in §3) │ +└─────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────┐ +│ Agent receives prompt + injected system_prompt_addendum in context. │ +│ Agent issues first PreToolUse. │ +└─────────────────────────────────────────────────────────────────────────┘ + │ (Unix socket) + ▼ +┌─────────────────────────────────────────────────────────────────────────┐ +│ Harness PreToolUse evaluates against rulesForSession(session_id): │ +│ = floor rules ∪ overlay rules (in that order) │ +│ Floor rules iterate first → floor block always wins over overlay │ +│ Overlay rules are action: "block" only → can't downgrade a decision │ +└─────────────────────────────────────────────────────────────────────────┘ + │ + ▼ + ... session continues ... + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────┐ +│ Session end: │ +│ - Claude fires SessionEnd and may also fire Stop; Codex fires Stop. │ +│ Phase names differ, but legacy event names reach the same server case.│ +│ 1. (Existing) save trajectory, releaseAllForAgent, sessions.remove, │ +│ classifierSessions.delete, autoCoordStates.delete │ +│ 2. (NEW) evictOverlayForSession(cwd, session_id) → │ +│ rm -rf .interlinked/sessions// │ +│ 3. (NEW) sessionRules.delete(session_id) │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +Per-client envelope at `UserPromptSubmit`: + +| Client | Native event | Prompt field | Adapter file | +|---|---|---|---| +| Claude Code | `UserPromptSubmit` | `prompt` | `src/harness/adapters/claude-code.ts` | +| Codex | `UserPromptSubmit` (same shape, optional `turn_id`) | `prompt` | `src/harness/adapters/codex.ts` | + +--- + +## 5. Overlay schema + +`.interlinked/sessions//overlay-rules.json`: + +```json +{ + "version": 1, + "session_id": "abc123", + "generated_at": "2026-05-13T14:00:00Z", + "generated_by": "metacoder", + "source_prompt_sha256": "deadbeef…", + "system_prompt_addendum": "You are working on the payment-flow refactor. Touching src/legacy/payments is out of scope unless you call out the deviation first.", + "rules": [ + { + "id": "overlay:abc123:0", + "enabled": true, + "trigger": "PreToolUse", + "tool_match": ["Edit", "Write", "MultiEdit"], + "action": "block", + "patterns": [ + { "field": "file_path", "regex": "src/legacy/payments/" }, + { "field": "file_path", "regex": "src/legacy/payments/migrate\\.ts", "negate": true } + ], + "reason": "Touching legacy/payments is out of scope for this session.", + "severity": "high" + } + ] +} +``` + +Validation rules in `overlay-loader.ts` (executed in order): + +1. Reject `disabled_rules` field entirely. +2. Reject any rule whose `id` collides with a floor id. +3. Reject rules whose `id` does not start with `overlay::`. +4. **Reject any rule whose `action` is not `"block"`.** +5. Reject top-level `extra_exceptions` and `additional_patterns` fields + entirely. Overlay exceptions must use `negate: true` patterns inside + the overlay rule. +6. Cap rule count at 20; drop excess with a warning. +7. For each surviving rule's regexes (in `patterns[].regex` and any + `active_when.file_scope`): run the five-check regex validator from + §2.3. Drop the rule on any failure. +8. **Append surviving overlay rules AFTER floor rules in the merged + `rules.rules` array** so floor blocks always iterate before overlay + rules for the same tool input. + +### What "tighten-only" guarantees concretely + +For any prompt P and tool call T: + +> `Floor(T) == block` ⟹ `(Floor ∪ Overlay(P))(T) == block` + +That is, the loaded ruleset after merging the overlay is a strict +superset of the floor *in terms of block coverage*. The action +constraint (rule 4) plus the iteration order (rule 8) together ensure +an overlay can never produce a strictly weaker decision than the floor +would have produced alone. + +--- + +## 6. Metacoder LLM contract + +### System prompt (constant, embedded in `metacoder-client.ts`) + +``` +You are a session-scoped policy author for an AI coding agent. + +You receive: a user prompt (possibly with PII redacted as