From d84411636077f4f6c43caec01b0dbc303caf3dc3 Mon Sep 17 00:00:00 2001
From: Drew Stone <drewstone329@gmail.com>
Date: Wed, 20 May 2026 12:24:36 +0300
Subject: [PATCH] =?UTF-8?q?feat(0.30.0):=20FindingSubject=20=E2=80=94=20ty?=
 =?UTF-8?q?ped=20grammar=20+=20parser=20+=20Zod=20boundary?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the substrate gap that turned every per-vertical ImprovementAdapter into dead code: the analyst kinds' actor prompts documented a subject grammar (`agent-knowledge:wiki:<slug>`, `system-prompt:<section>`, ...) but `subject` was an unvalidated `z.string().optional()` and the LLM could emit prose like `subject: "fix the prompt"` which downstream `startsWith(...)` routing silently dropped.

This PR makes the grammar load-bearing:

1. **`src/analyst/finding-subject.ts`** — discriminated-union `FindingSubject`, `parseFindingSubject(raw)` parser, `renderFindingSubject(s)` inverse, and a `FINDING_SUBJECT_GRAMMAR_PROMPT` constant kinds can embed as the single source of truth.

   Variants cover every locus the substrate routes on:
   - `agent-knowledge:{wiki,claim,raw,stale}:<locus>` → `KnowledgeAdapter`
   - `system-prompt`, `tool-doc`, `new-tool`, `rag`, `memory`, `scaffolding`, `output-schema` → `ImprovementAdapter`
   - `websearch.outdated`, `prior-run-summary` → stale signals
   - `cluster` → failure-mode-only free-form labels

   Slugs / tool ids are constrained to `[a-z0-9-]+`; topics / sections / keys allow free-form text trimmed.

2. **`KIND_EXPECTED_SUBJECTS`** — per-kind allow-list. failure-mode emits ONLY `cluster`; knowledge-gap can't sneak in a `system-prompt:*` (the improvement-analyst's job); improvement can't emit stale signals. Enforced at the kind factory boundary.

3. **`RawAnalystFindingSchema.subject`** — Zod `.refine` that runs the parser. Malformed subjects fail the row at Zod parse time with a clear log message instead of being silently lifted with a free-form string.

4. **`kind-factory.ts`** — after `parseRawFinding`, the factory checks the parsed subject against the kind's allow-list. Wrong-kind subjects (e.g. an improvement finding pointing at `cluster:foo`) are logged + counted in `rejected_wrong_subject` and excluded from `out`. Visible to operators in the `analyst.kind <id> done` log line.

5. **Tests**: 38 new cases on `parseFindingSubject` cover every variant (positive + malformed), boundary inputs (null / empty / whitespace / prose), round-trip via `renderFindingSubject`, and the `KIND_EXPECTED_SUBJECTS` truth table (failure-mode is the ONLY kind that emits cluster; improvement excludes stale signals; etc.). Updated the legacy `'tool:foo'` fixtures in `kinds.test.ts` to canonical `'tool-doc:foo'`.

Result: every downstream consumer (agent-runtime's `KnowledgeAdapter` / `ImprovementAdapter`, per-vertical wiring) can now narrow on `FindingSubject['kind']` instead of `startsWith('agent-knowledge:wiki:')` — no more silent skips, no more fabricated paths, no more theater.

Tests: 1196/1196 pass (38 new). Typecheck clean. Bumps to 0.30.0 (npm + pypi + python `__version__`).
---
 clients/python/pyproject.toml                 |   2 +-
 clients/python/src/agent_eval_rpc/__init__.py |   2 +-
 package.json                                  |   2 +-
 src/analyst/finding-signature.ts              |  20 +-
 src/analyst/finding-subject.test.ts           | 272 +++++++++++++++
 src/analyst/finding-subject.ts                | 317 ++++++++++++++++++
 src/analyst/kind-factory.ts                   |  30 ++
 src/analyst/kinds/kinds.test.ts               |  10 +-
 src/index.ts                                  |  12 +
 9 files changed, 658 insertions(+), 9 deletions(-)
 create mode 100644 src/analyst/finding-subject.test.ts
 create mode 100644 src/analyst/finding-subject.ts
diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml
index 2e0496d..ef79c26 100644
--- a/clients/python/pyproject.toml
+++ b/clients/python/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "agent-eval-rpc"
-version = "0.29.1"
+version = "0.30.0"
 description = "Python RPC client for @tangle-network/agent-eval — judge content against rubrics over HTTP or stdio RPC. Eval logic runs in the Node runtime; this package is a thin wire client."
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/clients/python/src/agent_eval_rpc/__init__.py b/clients/python/src/agent_eval_rpc/__init__.py
index 9b09e09..5498239 100644
--- a/clients/python/src/agent_eval_rpc/__init__.py
+++ b/clients/python/src/agent_eval_rpc/__init__.py
@@ -48,7 +48,7 @@
 try:
     __version__ = version("agent-eval-rpc")
 except PackageNotFoundError:
-    __version__ = "0.29.1"
+    __version__ = "0.30.0"
 
 __all__ = [
     "Client",
diff --git a/package.json b/package.json
index 7c5c275..fa0e15e 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@tangle-network/agent-eval",
-  "version": "0.29.1",
+  "version": "0.30.0",
   "description": "Substrate for self-improving agents: traces, verifiable rewards, preferences, GEPA / reflective mutation, auto-research, replay, sequential anytime-valid stats, and release gates.",
   "homepage": "https://github.com/tangle-network/agent-eval#readme",
   "repository": {
diff --git a/src/analyst/finding-signature.ts b/src/analyst/finding-signature.ts
index 5dc3420..923b36b 100644
--- a/src/analyst/finding-signature.ts
+++ b/src/analyst/finding-signature.ts
@@ -16,6 +16,7 @@
  */
 
 import { z } from 'zod'
+import { parseFindingSubject } from './finding-subject'
 
 export const ANALYST_SEVERITIES = ['critical', 'high', 'medium', 'low', 'info'] as const
 
@@ -23,7 +24,24 @@ export const RawAnalystFindingSchema = z
   .object({
     severity: z.enum(ANALYST_SEVERITIES),
     claim: z.string().min(1).max(2000),
-    subject: z.string().max(400).optional(),
+    /**
+     * Subject locus the finding is about. Validated at parse time
+     * against the documented grammar (`finding-subject.ts`). Findings
+     * with a malformed subject are rejected — they would have been
+     * silently skipped by every downstream adapter, so failing loud at
+     * parse time turns a hidden no-op into a kind-prompt audit signal.
+     *
+     * Optional because purely descriptive findings (no actionable
+     * locus) are legitimate; they just don't route through the
+     * KnowledgeAdapter / ImprovementAdapter.
+     */
+    subject: z
+      .string()
+      .max(400)
+      .refine((s) => parseFindingSubject(s) !== null, {
+        message: 'subject does not match the finding-subject grammar',
+      })
+      .optional(),
     evidence_uri: z.string().min(1).max(2000),
     evidence_excerpt: z.string().max(2000).optional(),
     confidence: z.number().min(0).max(1),
diff --git a/src/analyst/finding-subject.test.ts b/src/analyst/finding-subject.test.ts
new file mode 100644
index 0000000..d0f9179
--- /dev/null
+++ b/src/analyst/finding-subject.test.ts
@@ -0,0 +1,272 @@
+import { describe, expect, it } from 'vitest'
+import {
+  FINDING_SUBJECT_KINDS,
+  type FindingSubject,
+  KIND_EXPECTED_SUBJECTS,
+  parseFindingSubject,
+  renderFindingSubject,
+} from './finding-subject'
+
+describe('parseFindingSubject — knowledge loci', () => {
+  it('parses agent-knowledge:wiki:<slug>', () => {
+    expect(parseFindingSubject('agent-knowledge:wiki:invoice-shape')).toEqual({
+      kind: 'knowledge.wiki',
+      slug: 'invoice-shape',
+    })
+  })
+
+  it('parses agent-knowledge:wiki:<slug>#<heading>', () => {
+    expect(parseFindingSubject('agent-knowledge:wiki:invoice-shape#line-items')).toEqual({
+      kind: 'knowledge.wiki',
+      slug: 'invoice-shape',
+      heading: 'line-items',
+    })
+  })
+
+  it('parses agent-knowledge:claim:<topic>', () => {
+    expect(parseFindingSubject('agent-knowledge:claim:cap-table-shape')).toEqual({
+      kind: 'knowledge.claim',
+      topic: 'cap-table-shape',
+    })
+  })
+
+  it('parses agent-knowledge:raw:<source-id>', () => {
+    expect(parseFindingSubject('agent-knowledge:raw:irs-pub-501-2024')).toEqual({
+      kind: 'knowledge.raw',
+      sourceId: 'irs-pub-501-2024',
+    })
+  })
+
+  it('parses agent-knowledge:stale:<slug>', () => {
+    expect(parseFindingSubject('agent-knowledge:stale:old-vat-rates')).toEqual({
+      kind: 'knowledge.stale',
+      slug: 'old-vat-rates',
+    })
+  })
+
+  it('rejects malformed wiki slug (uppercase / underscore)', () => {
+    expect(parseFindingSubject('agent-knowledge:wiki:InvoiceShape')).toBeNull()
+    expect(parseFindingSubject('agent-knowledge:wiki:invoice_shape')).toBeNull()
+  })
+
+  it('rejects malformed wiki anchor heading', () => {
+    expect(parseFindingSubject('agent-knowledge:wiki:slug#Heading_With_Caps')).toBeNull()
+  })
+})
+
+describe('parseFindingSubject — runtime surfaces', () => {
+  it('parses system-prompt:<section> with kebab section', () => {
+    expect(parseFindingSubject('system-prompt:request-classification')).toEqual({
+      kind: 'system-prompt',
+      section: 'request-classification',
+    })
+  })
+
+  it('parses system-prompt:<section> with free-form section text', () => {
+    expect(parseFindingSubject('system-prompt:Tool Selection')).toEqual({
+      kind: 'system-prompt',
+      section: 'Tool Selection',
+    })
+  })
+
+  it('parses tool-doc:<tool>', () => {
+    expect(parseFindingSubject('tool-doc:list_invoices')).toEqual({
+      kind: 'tool-doc',
+      tool: 'list_invoices',
+    })
+  })
+
+  it('parses tool-doc:<tool>:<aspect>', () => {
+    expect(parseFindingSubject('tool-doc:list_invoices:examples')).toEqual({
+      kind: 'tool-doc',
+      tool: 'list_invoices',
+      aspect: 'examples',
+    })
+  })
+
+  it('parses new-tool:<name>', () => {
+    expect(parseFindingSubject('new-tool:diff_csv')).toEqual({
+      kind: 'new-tool',
+      name: 'diff_csv',
+    })
+  })
+
+  it('parses rag:<corpus>:<doc>', () => {
+    expect(parseFindingSubject('rag:irs-rulings:rev-rul-2024-12')).toEqual({
+      kind: 'rag',
+      corpus: 'irs-rulings',
+      docId: 'rev-rul-2024-12',
+    })
+  })
+
+  it('parses memory:<key>', () => {
+    expect(parseFindingSubject('memory:last-customer-id')).toEqual({
+      kind: 'memory',
+      key: 'last-customer-id',
+    })
+  })
+
+  it('parses scaffolding:<concern>', () => {
+    expect(parseFindingSubject('scaffolding:retry-policy')).toEqual({
+      kind: 'scaffolding',
+      concern: 'retry-policy',
+    })
+  })
+
+  it('parses output-schema:<field>', () => {
+    expect(parseFindingSubject('output-schema:filing_year')).toEqual({
+      kind: 'output-schema',
+      field: 'filing_year',
+    })
+  })
+
+  it('rejects tool-doc with uppercase tool name', () => {
+    expect(parseFindingSubject('tool-doc:ListInvoices')).toBeNull()
+  })
+
+  it('rejects new-tool with empty name', () => {
+    expect(parseFindingSubject('new-tool:')).toBeNull()
+  })
+
+  it('rejects rag without corpus or doc id', () => {
+    expect(parseFindingSubject('rag:irs-rulings')).toBeNull()
+    expect(parseFindingSubject('rag:irs-rulings:')).toBeNull()
+  })
+})
+
+describe('parseFindingSubject — stale signals', () => {
+  it('parses websearch:outdated:<topic>', () => {
+    expect(parseFindingSubject('websearch:outdated:capital-gains-rates-2023')).toEqual({
+      kind: 'websearch.outdated',
+      topic: 'capital-gains-rates-2023',
+    })
+  })
+
+  it('parses prior-run-summary:<topic>', () => {
+    expect(parseFindingSubject('prior-run-summary:cost-basis-method')).toEqual({
+      kind: 'prior-run-summary',
+      topic: 'cost-basis-method',
+    })
+  })
+})
+
+describe('parseFindingSubject — cluster labels (failure-mode)', () => {
+  it('parses a kebab-case cluster label', () => {
+    expect(parseFindingSubject('tool-call-loop')).toEqual({
+      kind: 'cluster',
+      label: 'tool-call-loop',
+    })
+  })
+
+  it('parses a long but valid label', () => {
+    expect(parseFindingSubject('auth-revoked-mid-run')).toEqual({
+      kind: 'cluster',
+      label: 'auth-revoked-mid-run',
+    })
+  })
+
+  it('rejects a cluster label with whitespace', () => {
+    expect(parseFindingSubject('tool call loop')).toBeNull()
+  })
+
+  it('rejects a cluster label with uppercase letters', () => {
+    expect(parseFindingSubject('ToolCallLoop')).toBeNull()
+  })
+
+  it('rejects an overly long label', () => {
+    expect(parseFindingSubject('a'.repeat(81))).toBeNull()
+  })
+})
+
+describe('parseFindingSubject — boundary cases', () => {
+  it('returns null for undefined', () => {
+    expect(parseFindingSubject(undefined)).toBeNull()
+  })
+
+  it('returns null for null', () => {
+    expect(parseFindingSubject(null)).toBeNull()
+  })
+
+  it('returns null for empty string', () => {
+    expect(parseFindingSubject('')).toBeNull()
+  })
+
+  it('returns null for whitespace-only string', () => {
+    expect(parseFindingSubject('   ')).toBeNull()
+  })
+
+  it('returns null for prose subject ("fix the prompt")', () => {
+    expect(parseFindingSubject('fix the prompt')).toBeNull()
+  })
+
+  it('returns null for unknown prefix', () => {
+    expect(parseFindingSubject('unknown-prefix:foo')).toBeNull()
+  })
+
+  it('trims leading/trailing whitespace before parsing', () => {
+    expect(parseFindingSubject('  system-prompt:request-classification  ')).toEqual({
+      kind: 'system-prompt',
+      section: 'request-classification',
+    })
+  })
+})
+
+describe('renderFindingSubject', () => {
+  it('round-trips every parseable subject', () => {
+    const cases: Array<FindingSubject> = [
+      { kind: 'knowledge.wiki', slug: 'invoice-shape' },
+      { kind: 'knowledge.wiki', slug: 'invoice-shape', heading: 'line-items' },
+      { kind: 'knowledge.claim', topic: 'cap-table-shape' },
+      { kind: 'knowledge.raw', sourceId: 'irs-pub-501-2024' },
+      { kind: 'knowledge.stale', slug: 'old-vat-rates' },
+      { kind: 'system-prompt', section: 'request-classification' },
+      { kind: 'tool-doc', tool: 'list_invoices' },
+      { kind: 'tool-doc', tool: 'list_invoices', aspect: 'examples' },
+      { kind: 'new-tool', name: 'diff_csv' },
+      { kind: 'rag', corpus: 'irs-rulings', docId: 'rev-rul-2024-12' },
+      { kind: 'memory', key: 'last-customer-id' },
+      { kind: 'scaffolding', concern: 'retry-policy' },
+      { kind: 'output-schema', field: 'filing_year' },
+      { kind: 'websearch.outdated', topic: 'capital-gains-rates-2023' },
+      { kind: 'prior-run-summary', topic: 'cost-basis-method' },
+      { kind: 'cluster', label: 'tool-call-loop' },
+    ]
+    for (const s of cases) {
+      const rendered = renderFindingSubject(s)
+      const reparsed = parseFindingSubject(rendered)
+      expect(reparsed).toEqual(s)
+    }
+  })
+})
+
+describe('KIND_EXPECTED_SUBJECTS', () => {
+  it('covers every emitted kind in DEFAULT_TRACE_ANALYST_KINDS', () => {
+    expect(Object.keys(KIND_EXPECTED_SUBJECTS).sort()).toEqual(
+      ['failure-mode', 'improvement', 'knowledge-gap', 'knowledge-poisoning'].sort(),
+    )
+  })
+
+  it('failure-mode is the ONLY kind that emits cluster subjects', () => {
+    for (const [kindId, allowed] of Object.entries(KIND_EXPECTED_SUBJECTS)) {
+      if (kindId === 'failure-mode') {
+        expect(allowed).toContain('cluster')
+      } else {
+        expect(allowed).not.toContain('cluster')
+      }
+    }
+  })
+
+  it('every expected variant is a known FindingSubject kind', () => {
+    for (const allowed of Object.values(KIND_EXPECTED_SUBJECTS)) {
+      for (const variant of allowed) {
+        expect(FINDING_SUBJECT_KINDS).toContain(variant)
+      }
+    }
+  })
+
+  it('improvement does not include websearch.outdated / prior-run-summary (stale signals are a knowledge-poisoning concern)', () => {
+    const improvement = KIND_EXPECTED_SUBJECTS.improvement!
+    expect(improvement).not.toContain('websearch.outdated')
+    expect(improvement).not.toContain('prior-run-summary')
+  })
+})
diff --git a/src/analyst/finding-subject.ts b/src/analyst/finding-subject.ts
new file mode 100644
index 0000000..40ab41d
--- /dev/null
+++ b/src/analyst/finding-subject.ts
@@ -0,0 +1,317 @@
+/**
+ * Typed `FindingSubject` — the canonical grammar every analyst kind emits.
+ *
+ * Background: kind actor prompts have always documented a subject grammar
+ * (e.g. `system-prompt:<section>`, `agent-knowledge:wiki:<slug>`) but the
+ * LLM was unconstrained — it could emit `subject: "fix the prompt"`
+ * (prose) and downstream adapters routed on `startsWith(...)` would
+ * silently skip it. Every per-vertical `ImprovementAdapter` had a
+ * routing table that mostly caught nothing.
+ *
+ * This module fixes that:
+ *   - `parseFindingSubject(raw)` — returns the typed `FindingSubject`
+ *     when `raw` matches the grammar, else `null`. Used at the
+ *     `RawAnalystFindingSchema` boundary so malformed subjects are
+ *     rejected loudly instead of silently lifted into the registry.
+ *   - `FindingSubjectKind` — the union of valid locus categories. Each
+ *     variant carries the typed components downstream adapters resolve
+ *     against the agent's surface manifest (no string parsing in the
+ *     adapter).
+ *   - `FINDING_SUBJECT_GRAMMAR_PROMPT` — single source of truth for the
+ *     grammar string embedded in kind actor prompts. Drift between
+ *     prompt and parser is impossible if every kind imports this.
+ *
+ * The grammar is intentionally NARROW — only loci the substrate's
+ * default `ImprovementAdapter` / `KnowledgeAdapter` can act on. A
+ * finding with a subject outside this set fails the parser; the kind
+ * author either extends the grammar here (and adds adapter routing)
+ * or rephrases the prompt to map onto an existing variant.
+ *
+ * `failure-mode` is the one exception — its subjects are free-form
+ * cluster labels, not loci. The schema preserves them as
+ * `{ kind: 'cluster', label }` and the adapters skip them (cluster
+ * findings are evidence, not actionable mutations).
+ */
+
+import { z } from 'zod'
+
+// ── canonical grammar ─────────────────────────────────────────────────
+
+/**
+ * Discriminated union of every locus the substrate can route findings to.
+ *
+ * Adapters narrow on `kind` and use the typed components (no string
+ * parsing). Adding a variant here REQUIRES updating the parser, the
+ * grammar prompt, and at least one adapter — by design.
+ */
+export type FindingSubject =
+  // ── agent-knowledge:* — routed to the KnowledgeAdapter ──
+  | { kind: 'knowledge.wiki'; slug: string; heading?: string }
+  | { kind: 'knowledge.claim'; topic: string }
+  | { kind: 'knowledge.raw'; sourceId: string }
+  | { kind: 'knowledge.stale'; slug: string }
+  // ── system-prompt / tool / new-tool / rag / memory / scaffolding / output-schema ──
+  // routed to the ImprovementAdapter
+  | { kind: 'system-prompt'; section: string }
+  | { kind: 'tool-doc'; tool: string; aspect?: string }
+  | { kind: 'new-tool'; name: string }
+  | { kind: 'rag'; corpus: string; docId: string }
+  | { kind: 'memory'; key: string }
+  | { kind: 'scaffolding'; concern: string }
+  | { kind: 'output-schema'; field: string }
+  // ── websearch / prior-run-summary — routed to the KnowledgeAdapter as stale signals
+  | { kind: 'websearch.outdated'; topic: string }
+  | { kind: 'prior-run-summary'; topic: string }
+  // ── failure-mode cluster label — preserved verbatim, not routed
+  | { kind: 'cluster'; label: string }
+
+export type FindingSubjectKind = FindingSubject['kind']
+
+export const FINDING_SUBJECT_KINDS: ReadonlyArray<FindingSubjectKind> = [
+  'knowledge.wiki',
+  'knowledge.claim',
+  'knowledge.raw',
+  'knowledge.stale',
+  'system-prompt',
+  'tool-doc',
+  'new-tool',
+  'rag',
+  'memory',
+  'scaffolding',
+  'output-schema',
+  'websearch.outdated',
+  'prior-run-summary',
+  'cluster',
+]
+
+// ── parser ────────────────────────────────────────────────────────────
+
+/**
+ * Parse a raw subject string emitted by an analyst kind's actor.
+ *
+ * Returns the typed `FindingSubject` when `raw` matches the grammar,
+ * else `null`. Callers use the `null` return as a signal to either
+ * (a) reject the finding at parse time (kinds that emit typed loci —
+ * knowledge-gap, improvement, knowledge-poisoning) or (b) lift it as
+ * a cluster label (failure-mode).
+ *
+ * Slugs are constrained to `[a-z0-9-]+` (lowercase kebab) to keep file
+ * paths sane downstream. Topics / keys / sections allow any non-empty
+ * string (free-form for the LLM's voice) but get trimmed.
+ *
+ * Empty / whitespace-only inputs return `null`. `undefined` returns
+ * `null`. Both are surfaced by the caller as a rejected subject.
+ */
+export function parseFindingSubject(raw: string | null | undefined): FindingSubject | null {
+  if (raw === null || raw === undefined) return null
+  const trimmed = raw.trim()
+  if (trimmed.length === 0) return null
+
+  // agent-knowledge:wiki:<slug>[#<heading>]
+  const wiki = trimmed.match(/^agent-knowledge:wiki:([a-z0-9][a-z0-9-]*)(?:#([a-z0-9][a-z0-9-]*))?$/)
+  if (wiki) return { kind: 'knowledge.wiki', slug: wiki[1]!, ...(wiki[2] ? { heading: wiki[2] } : {}) }
+
+  // agent-knowledge:claim:<topic>
+  const claim = trimmed.match(/^agent-knowledge:claim:(.+)$/)
+  if (claim && claim[1]!.trim().length > 0) return { kind: 'knowledge.claim', topic: claim[1]!.trim() }
+
+  // agent-knowledge:raw:<source-id>
+  const raw_ = trimmed.match(/^agent-knowledge:raw:(.+)$/)
+  if (raw_ && raw_[1]!.trim().length > 0) return { kind: 'knowledge.raw', sourceId: raw_[1]!.trim() }
+
+  // agent-knowledge:stale:<slug>
+  const stale = trimmed.match(/^agent-knowledge:stale:([a-z0-9][a-z0-9-]*)$/)
+  if (stale) return { kind: 'knowledge.stale', slug: stale[1]! }
+
+  // system-prompt:<section>
+  const sp = trimmed.match(/^system-prompt:(.+)$/)
+  if (sp && sp[1]!.trim().length > 0) return { kind: 'system-prompt', section: sp[1]!.trim() }
+
+  // tool-doc:<tool>[:<aspect>]
+  const tdAspect = trimmed.match(/^tool-doc:([a-z0-9][a-z0-9_-]*):(.+)$/)
+  if (tdAspect && tdAspect[2]!.trim().length > 0) {
+    return { kind: 'tool-doc', tool: tdAspect[1]!, aspect: tdAspect[2]!.trim() }
+  }
+  const td = trimmed.match(/^tool-doc:([a-z0-9][a-z0-9_-]*)$/)
+  if (td) return { kind: 'tool-doc', tool: td[1]! }
+
+  // new-tool:<name>
+  const nt = trimmed.match(/^new-tool:([a-z0-9][a-z0-9_-]*)$/)
+  if (nt) return { kind: 'new-tool', name: nt[1]! }
+
+  // rag:<corpus>:<doc-id>
+  const rag = trimmed.match(/^rag:([a-z0-9][a-z0-9_-]*):(.+)$/)
+  if (rag && rag[2]!.trim().length > 0) {
+    return { kind: 'rag', corpus: rag[1]!, docId: rag[2]!.trim() }
+  }
+
+  // memory:<key>
+  const mem = trimmed.match(/^memory:(.+)$/)
+  if (mem && mem[1]!.trim().length > 0) return { kind: 'memory', key: mem[1]!.trim() }
+
+  // scaffolding:<concern>
+  const sc = trimmed.match(/^scaffolding:(.+)$/)
+  if (sc && sc[1]!.trim().length > 0) return { kind: 'scaffolding', concern: sc[1]!.trim() }
+
+  // output-schema:<field>
+  const os = trimmed.match(/^output-schema:(.+)$/)
+  if (os && os[1]!.trim().length > 0) return { kind: 'output-schema', field: os[1]!.trim() }
+
+  // websearch:outdated:<topic>
+  const ws = trimmed.match(/^websearch:outdated:(.+)$/)
+  if (ws && ws[1]!.trim().length > 0) return { kind: 'websearch.outdated', topic: ws[1]!.trim() }
+
+  // prior-run-summary:<topic>
+  const prs = trimmed.match(/^prior-run-summary:(.+)$/)
+  if (prs && prs[1]!.trim().length > 0) return { kind: 'prior-run-summary', topic: prs[1]!.trim() }
+
+  // cluster (no prefix — failure-mode emits short labels)
+  if (/^[a-z0-9][a-z0-9-]*$/.test(trimmed) && trimmed.length <= 80) {
+    return { kind: 'cluster', label: trimmed }
+  }
+
+  return null
+}
+
+/**
+ * Render the parsed subject back to its canonical string form. Inverse
+ * of `parseFindingSubject`; useful when the substrate constructs new
+ * findings programmatically (e.g. for tests, replays, or
+ * `id_basis` carry-forward).
+ */
+export function renderFindingSubject(s: FindingSubject): string {
+  switch (s.kind) {
+    case 'knowledge.wiki':
+      return s.heading
+        ? `agent-knowledge:wiki:${s.slug}#${s.heading}`
+        : `agent-knowledge:wiki:${s.slug}`
+    case 'knowledge.claim':
+      return `agent-knowledge:claim:${s.topic}`
+    case 'knowledge.raw':
+      return `agent-knowledge:raw:${s.sourceId}`
+    case 'knowledge.stale':
+      return `agent-knowledge:stale:${s.slug}`
+    case 'system-prompt':
+      return `system-prompt:${s.section}`
+    case 'tool-doc':
+      return s.aspect ? `tool-doc:${s.tool}:${s.aspect}` : `tool-doc:${s.tool}`
+    case 'new-tool':
+      return `new-tool:${s.name}`
+    case 'rag':
+      return `rag:${s.corpus}:${s.docId}`
+    case 'memory':
+      return `memory:${s.key}`
+    case 'scaffolding':
+      return `scaffolding:${s.concern}`
+    case 'output-schema':
+      return `output-schema:${s.field}`
+    case 'websearch.outdated':
+      return `websearch:outdated:${s.topic}`
+    case 'prior-run-summary':
+      return `prior-run-summary:${s.topic}`
+    case 'cluster':
+      return s.label
+  }
+}
+
+// ── grammar prompt — single source of truth for actor instructions ──
+
+/**
+ * The grammar text embedded into kind actor prompts. Kinds opt into
+ * the subset of variants they emit (e.g. `improvement` excludes the
+ * cluster variant; `failure-mode` includes ONLY the cluster variant).
+ *
+ * Drift between prompt and parser is impossible: every kind imports
+ * this constant + the matching `expects` set, and the unit tests below
+ * lock the table to the parser.
+ */
+export const FINDING_SUBJECT_GRAMMAR_PROMPT = [
+  'Subjects MUST match this grammar — anything else is rejected at parse time and your work is wasted:',
+  '',
+  '  Knowledge loci (write to the agent-knowledge base):',
+  '    agent-knowledge:wiki:<slug>[#<heading>]   create / update a wiki page',
+  '    agent-knowledge:claim:<topic>             draft a claim / relation triple',
+  '    agent-knowledge:raw:<source-id>           lift a raw source into a curated page',
+  '    agent-knowledge:stale:<slug>              mark a page superseded',
+  '',
+  '  Runtime mutable surfaces (write to prompts / tools / scaffolding):',
+  '    system-prompt:<section>                   add / replace a system-prompt section',
+  '    tool-doc:<tool>[:<aspect>]                rewrite a tool description',
+  '    new-tool:<name>                           propose a new tool surface',
+  '    rag:<corpus>:<doc-id>                     ingest / correct a RAG document',
+  '    memory:<key>                              invalidate / set a memory entry',
+  '    scaffolding:<concern>                     change a precondition / retry / verifier',
+  '    output-schema:<field>                     constrain the agent output shape',
+  '',
+  '  Stale signals (knowledge-poisoning only):',
+  '    websearch:outdated:<topic>                stale web result',
+  '    prior-run-summary:<topic>                 stale prior-run summary',
+  '',
+  '  Cluster label (failure-mode only):',
+  '    <kebab-case-label>                        short cluster id, e.g. "tool-call-loop"',
+  '',
+  'Slugs / tool ids: [a-z0-9-]+ (lowercase kebab). Topics / keys / sections: free-form, trimmed.',
+].join('\n')
+
+// ── kind expects sets ─────────────────────────────────────────────────
+
+/**
+ * The variants each kind is allowed to emit. Used at the kind factory
+ * boundary so a knowledge-gap finding can't sneak in a `system-prompt:*`
+ * subject (the improvement-analyst's job) and vice versa.
+ *
+ * `failure-mode` is restricted to `cluster` — the only kind that emits
+ * a non-locus subject.
+ */
+export const KIND_EXPECTED_SUBJECTS: Record<string, ReadonlyArray<FindingSubjectKind>> = {
+  'failure-mode': ['cluster'],
+  'knowledge-gap': [
+    'knowledge.wiki',
+    'knowledge.claim',
+    'knowledge.raw',
+    'knowledge.stale',
+    'tool-doc',
+    'system-prompt',
+    'memory',
+    'websearch.outdated',
+    'prior-run-summary',
+  ],
+  'knowledge-poisoning': [
+    'knowledge.wiki',
+    'knowledge.claim',
+    'knowledge.raw',
+    'tool-doc',
+    'system-prompt',
+    'memory',
+    'websearch.outdated',
+    'prior-run-summary',
+  ],
+  improvement: [
+    'system-prompt',
+    'tool-doc',
+    'new-tool',
+    'rag',
+    'memory',
+    'scaffolding',
+    'output-schema',
+    'knowledge.wiki',
+    'knowledge.claim',
+  ],
+}
+
+// ── Zod schema for boundary validation ───────────────────────────────
+
+/**
+ * Zod schema that validates a raw subject string and returns the parsed
+ * `FindingSubject`. Embedded in `RawAnalystFindingSchema` via
+ * `transform`, so `subject` arrives at the kind factory either as a
+ * typed locus or as a parse error attached to a single Zod issue.
+ *
+ * Optionality is preserved: subjects ARE optional on the wire (some
+ * findings are descriptive, not actionable). When present, they MUST
+ * parse — emitting a malformed subject is a contract violation, not a
+ * soft signal.
+ */
+export const FindingSubjectStringSchema = z.string().refine((s) => parseFindingSubject(s) !== null, {
+  message: 'subject does not match the finding-subject grammar',
+})
diff --git a/src/analyst/kind-factory.ts b/src/analyst/kind-factory.ts
index 2567116..e6cd68f 100644
--- a/src/analyst/kind-factory.ts
+++ b/src/analyst/kind-factory.ts
@@ -34,6 +34,7 @@ import {
   RAW_FINDING_SCHEMA_PROMPT,
   type RawAnalystFinding,
 } from './finding-signature'
+import { KIND_EXPECTED_SUBJECTS, parseFindingSubject } from './finding-subject'
 import type { Analyst, AnalystContext, AnalystCost, AnalystFinding } from './types'
 import { makeFinding } from './types'
 
@@ -172,11 +173,39 @@ export function createTraceAnalystKind(
 
       const result = await ax.forward(opts.ai, { question: deriveQuestion(ctx, spec) })
 
+      const expectedSubjects = KIND_EXPECTED_SUBJECTS[spec.id]
       const out: AnalystFinding[] = []
       const rawRows = Array.isArray(result.findings) ? result.findings : []
+      let rejectedWrongKind = 0
       for (const row of rawRows) {
         const parsed = parseRawFinding(row, ctx.log)
         if (!parsed) continue
+        // Subject-grammar check: if the kind has a declared expects-set
+        // (every shipped kind does), the finding's subject MUST parse to
+        // one of the declared variants. A wrong-kind subject is a
+        // contract violation — the actor's prompt drifted from the
+        // grammar — and we count it for prompt-audit visibility.
+        if (expectedSubjects && parsed.subject !== undefined) {
+          const parsedSubject = parseFindingSubject(parsed.subject)
+          if (parsedSubject === null) {
+            ctx.log?.('finding rejected: subject failed to parse', {
+              kind: spec.id,
+              subject: parsed.subject,
+            })
+            rejectedWrongKind += 1
+            continue
+          }
+          if (!expectedSubjects.includes(parsedSubject.kind)) {
+            ctx.log?.('finding rejected: subject variant not allowed for this kind', {
+              kind: spec.id,
+              subject_kind: parsedSubject.kind,
+              subject: parsed.subject,
+              allowed: expectedSubjects,
+            })
+            rejectedWrongKind += 1
+            continue
+          }
+        }
         const postProcessed = spec.postProcess?.(parsed, ctx) ?? parsed
         if (!postProcessed) continue
         out.push(toAnalystFinding(spec, postProcessed))
@@ -185,6 +214,7 @@ export function createTraceAnalystKind(
       ctx.log?.(`analyst.kind ${spec.id} done`, {
         emitted: rawRows.length,
         accepted: out.length,
+        rejected_wrong_subject: rejectedWrongKind,
       })
       return out
     },
diff --git a/src/analyst/kinds/kinds.test.ts b/src/analyst/kinds/kinds.test.ts
index 5aba9d6..681f5d9 100644
--- a/src/analyst/kinds/kinds.test.ts
+++ b/src/analyst/kinds/kinds.test.ts
@@ -16,7 +16,7 @@ describe('RawAnalystFindingSchema', () => {
     const parsed = RawAnalystFindingSchema.safeParse({
       severity: 'high',
       claim: 'agent looped on tool foo',
-      subject: 'tool:foo',
+      subject: 'tool-doc:foo',
       evidence_uri: 'span://abc/def',
       evidence_excerpt: 'foo() called 11 times with same args',
       confidence: 0.9,
@@ -217,7 +217,7 @@ describe('createTraceAnalystKind wires the spec into the Analyst contract', () =
         makeFinding({
           analyst_id: 'failure-mode',
           area: 'failure-mode',
-          subject: 'tool:foo',
+          subject: 'tool-doc:foo',
           claim: 'tool foo loops on identical args',
           severity: 'high',
           confidence: 0.9,
@@ -241,7 +241,7 @@ describe('createTraceAnalystKind wires the spec into the Analyst contract', () =
       if (!first || !second) throw new Error('test setup invariant')
       expect(out).toContain(`id=${first.finding_id}`)
       expect(out).toContain(`id=${second.finding_id}`)
-      expect(out).toContain('[tool:foo]')
+      expect(out).toContain('[tool-doc:foo]')
       expect(out).toContain('[auth]')
     })
 
@@ -267,13 +267,13 @@ describe('createTraceAnalystKind wires the spec into the Analyst contract', () =
     const a = computeFindingId({
       analyst_id: 'failure-mode',
       area: 'failure-mode',
-      subject: 'tool:foo',
+      subject: 'tool-doc:foo',
       claim: 'tool foo loops on identical args',
     })
     const b = computeFindingId({
       analyst_id: 'failure-mode',
       area: 'failure-mode',
-      subject: 'tool:foo',
+      subject: 'tool-doc:foo',
       claim: 'tool foo loops on identical args.',
     })
     expect(a).toBe(b)
diff --git a/src/index.ts b/src/index.ts
index a559561..f9b4ad3 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -38,6 +38,18 @@ export {
   RAW_FINDING_SCHEMA_PROMPT,
   RawAnalystFindingSchema,
 } from './analyst/finding-signature'
+export type {
+  FindingSubject,
+  FindingSubjectKind,
+} from './analyst/finding-subject'
+export {
+  FINDING_SUBJECT_GRAMMAR_PROMPT,
+  FINDING_SUBJECT_KINDS,
+  FindingSubjectStringSchema,
+  KIND_EXPECTED_SUBJECTS,
+  parseFindingSubject,
+  renderFindingSubject,
+} from './analyst/finding-subject'
 export type { DiffPolicy, FindingsDiff, PersistedFinding } from './analyst/findings-store'
 export { defaultIsMaterial, diffFindings, FindingsStore } from './analyst/findings-store'
 export type {