From 4163588fb11f6f7b01d80fdfdaa4786749a1a678 Mon Sep 17 00:00:00 2001
From: Andrea Gorletta <andrea.alice.gorletta@gmail.com>
Date: Wed, 1 Jul 2026 11:16:51 +0200
Subject: [PATCH 1/3] feat(spec-analyze): MCP knowledge-graph cache in front of
 discovery (FASE 1)

- Context lookup phase: reuse indexed repo-map/+comments.md on a fresh
  (commit_sha==HEAD) hit and skip discovery; miss => original flow unchanged
- Index write-back phase: ingest cartographer/crawler output (non-blocking)
- repo_map_to_bundle.py: deterministic md/JSON -> ingest bundle transform
- machine-readable sidecars (repo-map/index.json, comments.index.json)
- useIndex/workspace args; workspace sanitized to [a-z0-9-]
- docs in workflows/MCP-CACHE.md; FASE 2 (per-area staleness) deferred

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 workflows/MCP-CACHE.md          |  91 ++++++++++++++
 workflows/repo_map_to_bundle.py | 209 ++++++++++++++++++++++++++++++++
 workflows/spec-analyze.js       | 144 ++++++++++++++++++++--
 3 files changed, 435 insertions(+), 9 deletions(-)
 create mode 100644 workflows/MCP-CACHE.md
 create mode 100644 workflows/repo_map_to_bundle.py
diff --git a/workflows/MCP-CACHE.md b/workflows/MCP-CACHE.md
new file mode 100644
index 0000000..b2b1ada
--- /dev/null
+++ b/workflows/MCP-CACHE.md
@@ -0,0 +1,91 @@
+# MCP knowledge-graph cache for `spec-analyze`
+
+Wires the `spec-analyze` workflow (`workflows/spec-analyze.js`) to the **vibingwithclaude**
+MCP knowledge-graph as a cache in front of the discovery phase.
+
+- **On start**, look up indexed context and, on a **fresh** hit, reuse it and skip discovery.
+- **On a miss**, run discovery as usual and **populate** the graph from its output.
+
+Design constraint: the Workflow JS body can only call `agent()/parallel()/pipeline()/phase()/log()`;
+the MCP tools are reachable **only inside spawned agents**. So every MCP read/write lives in a
+dedicated agent (`context-broker`, `indexer`), never in the JS body. (Verified by spike, 2026-07.)
+
+## Flow
+
+```
+Context lookup ──fresh?──► reuse: broker materializes repo-map/ + comments.md → SKIP discovery
+      │ miss / MCP down / useIndex=false
+      ▼
+Context (cartographer ‖ crawler)  →  Index write-back (ingest into the graph, non-blocking)
+      ▼
+Analysis → Verification → Reverse diff → SRS improved → Report   (unchanged)
+```
+
+The `Context` barrier is preserved: `repo-map/` + `comments.md` must exist before the Analysis
+pipeline, whether produced by discovery or materialized from the cache.
+
+## Config (`args`)
+
+| arg | default | meaning |
+|---|---|---|
+| `useIndex` | `true` | set `false` to bypass the MCP entirely (flow is then exactly the original) |
+| `workspace` | `repo` | tenant scope; **sanitized** to `[a-z0-9-]` (the MCP rejects slashes), e.g. `pagopa/interop-be-monorepo` → `pagopa-interop-be-monorepo` |
+
+If the MCP is unreachable the workflow degrades to the original discovery flow — no regressions.
+
+## Node / workspace contract
+
+Workspace = sanitized `owner/repo` (repo-scoped, reusable across features/slugs). Staleness is
+**repo-level** (FASE 1): the indexed bundle's `commit_sha` (stored in `node.extra`) is compared to
+the current branch HEAD; equal ⇒ fresh, else miss. `get_context` returns `card.extra.commit_sha`
+and `body_md` verbatim, so no server change is needed.
+
+Bundles are built deterministically by `workflows/repo_map_to_bundle.py`:
+
+- **cartographer bundle** (`source_kind: cartographer`): one `area` node per area —
+  `name=area_key`, `summary=purpose`, `body_md=<area>.md` (verbatim), `extra={paths, dependsOn,
+  commit_sha, branch}`, `links: dependsOn→area`.
+- **crawler bundle** (`source_kind: crawler`): one `pr` node per enriched PR —
+  `name=pr-<n>`, `extra={number, state, signals, paths, commit_sha}`, `links: touches→area`
+  (mapped by path prefix); plus one doc node `comments-md` (type `pr`, number 0) whose `body_md`
+  is the **entire `comments.md` verbatim**, so a fresh hit restores it exactly.
+
+Materialization on a fresh hit rebuilds `repo-map/index.md` (table from the area cards),
+`repo-map/<area>.md` (each area card's `body_md`), and `comments.md` (the `comments-md` node's
+`body_md`). The broker **self-verifies** all targets exist and are non-empty before reporting
+`fresh` — the JS body cannot check the filesystem, so the broker is the only guard.
+
+## Machine-readable sidecars (M4)
+
+For deterministic parsing, cartographer/crawler also emit JSON sidecars alongside the prose:
+- `repo-map/index.json` — `{ "areas": [{ area_key, purpose, paths[], dependsOn[] }] }`
+  (`area_key` = the `<area>.md` filename stem, so JSON and node file join).
+- `comments.index.json` — `{ "prs": [{ number, title, state, signals, paths[] }] }` (enriched PRs only).
+
+## Robustness invariants
+
+- The cache never overrides as-is truth: any doubt ⇒ miss ⇒ full discovery.
+- Write-back is best-effort/non-fatal: an MCP failure logs a warning, never aborts the analysis.
+- `ingest_bundle` is the primary write path (`schema_version: "1.0"`, validated); the indexer
+  falls back to `upsert_node`/`add_link` if `ingest_bundle` is unavailable.
+- `replace_edges` is asserted only for bundles that actually carry the nodes being re-linked, and
+  empty bundles are omitted, so a partial/failed discovery never wipes prior edges.
+
+## Status
+
+**FASE 1 — implemented, reviewed, spikes green (2026-07).** Repo-level fresh/miss, write-back,
+sidecars, deterministic transform. Not yet: committed to `main`, live E2E on a real repo.
+
+## FASE 2 backlog (deferred)
+
+Per-area staleness instead of whole-repo fresh/miss:
+- diff changed paths (`gh … compare <indexedSha>…<HEAD>`) ∩ each area's key paths → stale areas only.
+- **Requires new contracts that do not exist today**: a cartographer mode for *scoped* re-generation
+  of only the stale areas, and a deterministic re-synthesis of `repo-map/index.md` merging fresh +
+  cached areas. Crawler staleness = PRs merged since `indexedSha`.
+- Configurable tolerance (`maxStaleAreas` / commit budget).
+
+Other open items:
+- `workflows/run_cost.py` `ROLE_PHASE`/`PHASE_ORDER` are stale (old Italian role names, missing
+  `SRS improved`) and don't know the new phases → the RR-5 cost breakdown misclassifies the MCP
+  phases. Pre-existing drift; fix separately.
diff --git a/workflows/repo_map_to_bundle.py b/workflows/repo_map_to_bundle.py
new file mode 100644
index 0000000..0e11f0a
--- /dev/null
+++ b/workflows/repo_map_to_bundle.py
@@ -0,0 +1,209 @@
+#!/usr/bin/env python3
+"""
+repo_map_to_bundle.py — deterministic md/JSON -> MCP ingest bundle transform (M3).
+
+Turns the machine-readable discovery artifacts produced by the cartographer and the
+crawler into two vibingwithclaude `ingest_bundle` payloads, so the `indexer` agent only
+has to shell out here and forward the result to the MCP (no fragile in-agent parsing).
+
+Inputs (all produced by the Context phase of workflows/spec-analyze.js):
+  - <repo-map-dir>/index.json      (M4 cartographer sidecar)  + one <area_key>.md per area
+  - <comments-index>               (M4 crawler sidecar, comments.index.json)
+  - <comments-md>                  (crawler comments.md, stored verbatim for faithful reuse)
+
+Output: a JSON object { "bundles": [ <cartographer bundle>, <crawler bundle> ] } on stdout
+(or --out). Each bundle is a ready-to-send ingest_bundle payload. The node modeling is the
+contract the context-broker relies on to MATERIALIZE the artifacts back on a fresh cache hit:
+  - area node:  name=area_key, body_md=<area_key>.md content, extra.paths/dependsOn
+                -> broker rebuilds repo-map/index.md (from purposes) + repo-map/<area_key>.md (from body_md)
+  - pr node:    name=pr-<number>, extra.{number,state,signals,paths}, links touches->area
+  - doc node:   name=comments-md (type pr, number 0), body_md=<entire comments.md>
+                -> broker rebuilds comments.md verbatim from this single node
+
+Stdlib only (like fetch_atlassian.py / run_cost.py). No network, no third-party deps.
+"""
+import argparse
+import json
+import os
+import sys
+
+
+def _load_json(path, default):
+    if not path or not os.path.isfile(path):
+        return default
+    try:
+        with open(path, encoding="utf-8") as fh:
+            return json.load(fh)
+    except (OSError, ValueError) as e:
+        sys.stderr.write(f"WARN: could not read {path}: {e}\n")
+        return default
+
+
+def _read_text(path):
+    if not path or not os.path.isfile(path):
+        return ""
+    try:
+        with open(path, encoding="utf-8") as fh:
+            return fh.read()
+    except OSError as e:
+        sys.stderr.write(f"WARN: could not read {path}: {e}\n")
+        return ""
+
+
+def _touches_area(pr_paths, areas_by_key):
+    """Map a PR's touched paths to the area_keys it overlaps (by path-prefix), deduped."""
+    hits = []
+    for area_key, paths in areas_by_key.items():
+        for ap in paths:
+            ap_norm = ap.rstrip("/")
+            if not ap_norm:
+                continue
+            if any(p == ap_norm or p.startswith(ap_norm + "/") or ap_norm.startswith(p.rstrip("/") + "/")
+                   for p in pr_paths if p):
+                hits.append(area_key)
+                break
+    # stable order, deduped
+    seen, out = set(), []
+    for h in hits:
+        if h not in seen:
+            seen.add(h)
+            out.append(h)
+    return out
+
+
+def build(args):
+    common_extra = {"commit_sha": args.commit_sha, "branch": args.branch, "repo": args.repo}
+    sha_tag = (args.commit_sha or "nosha")[:12]
+
+    # ---- cartographer bundle (areas) ----
+    index = _load_json(os.path.join(args.repo_map_dir, "index.json"), {"areas": []})
+    areas = index.get("areas", []) if isinstance(index, dict) else []
+    areas_by_key = {}
+    area_nodes = []
+    for a in areas:
+        key = str(a.get("area_key", "")).strip()
+        if not key:
+            continue
+        paths = [p for p in (a.get("paths") or []) if p]
+        areas_by_key[key] = paths
+        body_md = _read_text(os.path.join(args.repo_map_dir, f"{key}.md"))
+        area_nodes.append({
+            "type": "area",
+            "name": key,
+            "title": key,
+            "summary": str(a.get("purpose", "")).strip(),
+            "keywords": paths,
+            "body_md": body_md,
+            "extra": dict(common_extra, paths=paths, dependsOn=[d for d in (a.get("dependsOn") or []) if d],
+                          artifact="repo-map"),
+        })
+    # dependsOn links, only to areas that actually exist
+    for a in areas:
+        key = str(a.get("area_key", "")).strip()
+        for dep in (a.get("dependsOn") or []):
+            dep = str(dep).strip()
+            if key and dep and dep in areas_by_key:
+                for n in area_nodes:
+                    if n["name"] == key:
+                        n.setdefault("links", []).append(
+                            {"relation": "dependsOn", "target_type": "area", "target_name": dep})
+
+    cartographer_bundle = {
+        "schema_version": args.schema_version,
+        "bundle_id": f"{args.workspace}-cartographer-{sha_tag}",
+        "source_kind": "cartographer",
+        "workspace": args.workspace,
+        "commit_sha": args.commit_sha,
+        "branch": args.branch,
+        "replace_edges": True,
+        "nodes": area_nodes,
+    }
+
+    # ---- crawler bundle (PRs + full comments.md doc node) ----
+    cidx = _load_json(args.comments_index, {"prs": []})
+    prs = cidx.get("prs", []) if isinstance(cidx, dict) else []
+    pr_nodes = []
+    for pr in prs:
+        num = pr.get("number")
+        if num is None:
+            continue
+        try:
+            num = int(num)
+        except (TypeError, ValueError):
+            sys.stderr.write(f"WARN: skipping PR with non-integer number {num!r}\n")
+            continue
+        pr_paths = [p for p in (pr.get("paths") or []) if p]
+        node = {
+            "type": "pr",
+            "name": f"pr-{num}",
+            "title": str(pr.get("title", "")).strip() or f"PR {num}",
+            "summary": str(pr.get("title", "")).strip(),
+            "extra": dict(common_extra, number=num, state=pr.get("state", ""),
+                          signals=pr.get("signals", ""), paths=pr_paths, artifact="pr"),
+        }
+        links = [{"relation": "touches", "target_type": "area", "target_name": ak}
+                 for ak in _touches_area(pr_paths, areas_by_key)]
+        if links:
+            node["links"] = links
+        pr_nodes.append(node)
+
+    # one doc node carrying the full comments.md verbatim (faithful reuse on fresh hit)
+    comments_md = _read_text(args.comments_md)
+    if comments_md.strip():
+        pr_nodes.append({
+            "type": "pr",
+            "name": "comments-md",
+            "title": "comments.md (full crawler output)",
+            "summary": "verbatim crawler comments.md for faithful cache reuse",
+            "body_md": comments_md,
+            "extra": dict(common_extra, number=0, artifact="comments-md"),
+        })
+
+    # replace_edges wipes existing edges for these nodes before re-adding them. Only assert it for
+    # the crawler bundle when we actually have PR nodes to re-link: a comments-md-only bundle (e.g.
+    # comments.index.json missing) must NOT wipe prior `touches` edges it cannot re-establish.
+    has_pr_nodes = any(n["name"] != "comments-md" for n in pr_nodes)
+    crawler_bundle = {
+        "schema_version": args.schema_version,
+        "bundle_id": f"{args.workspace}-crawler-{sha_tag}",
+        "source_kind": "crawler",
+        "workspace": args.workspace,
+        "commit_sha": args.commit_sha,
+        "branch": args.branch,
+        "replace_edges": has_pr_nodes,
+        "nodes": pr_nodes,
+    }
+
+    # Omit a bundle with zero nodes so a failed/empty discovery never ships an empty,
+    # edge-wiping bundle to the MCP (the indexer also skips empties defensively).
+    bundles = [b for b in (cartographer_bundle, crawler_bundle) if b["nodes"]]
+    return {"bundles": bundles}
+
+
+def main():
+    ap = argparse.ArgumentParser(description="Build MCP ingest bundles from repo-map/ + comments artifacts.")
+    ap.add_argument("--repo", required=True)
+    ap.add_argument("--branch", default="main")
+    ap.add_argument("--commit-sha", default="", dest="commit_sha")
+    ap.add_argument("--workspace", required=True, help="sanitized [a-z0-9-] workspace (owner-repo)")
+    ap.add_argument("--repo-map-dir", required=True, dest="repo_map_dir")
+    ap.add_argument("--comments-index", default="", dest="comments_index")
+    ap.add_argument("--comments-md", default="", dest="comments_md")
+    ap.add_argument("--schema-version", default="1.0", dest="schema_version")
+    ap.add_argument("--out", default="", help="output file (default stdout)")
+    args = ap.parse_args()
+
+    result = build(args)
+    payload = json.dumps(result, ensure_ascii=False, indent=2)
+    if args.out:
+        with open(args.out, "w", encoding="utf-8") as fh:
+            fh.write(payload)
+        # brief summary to stderr for the calling agent's log
+        cb, wb = result["bundles"]
+        sys.stderr.write(f"wrote {args.out}: {len(cb['nodes'])} area nodes, {len(wb['nodes'])} crawler nodes\n")
+    else:
+        print(payload)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/workflows/spec-analyze.js b/workflows/spec-analyze.js
index 079f48a..ccad9ab 100644
--- a/workflows/spec-analyze.js
+++ b/workflows/spec-analyze.js
@@ -3,7 +3,9 @@ export const meta = {
   description: 'Spec-vs-code as a dynamic workflow, A/B-parameterized by args.variant ("prescriptive" default | "goals"): context in parallel, fan-out of N analyzers from the SRS sections, adversarial verification + bounded rework in a pipeline, reverse diff, report. The orchestration skeleton is IDENTICAL across variants — only the two experimental axes differ (axis a = spec style: step-by-step PROCEDURE vs Objective/Contract/Guardrail; axis b = discovery freedom: fixed numeric caps vs budget+judgment).',
   whenToUse: 'After the interactive preflight (credentials + gh check, fetch_atlassian.py, RF-FLOW-2 confirmation and SRS segmentation into <=10 units). Inputs are passed via args (set args.variant to pick the A/B arm). It does NOT run the fetch nor the user confirmation itself. For an A/B comparison, run both variants on the SAME repo/branch/units, into SEPARATE output dirs.',
   phases: [
-    { title: 'Context', detail: 'cartographer || crawler (parallel, independent)', model: 'haiku' },
+    { title: 'Context lookup', detail: 'MCP cache read: reuse indexed repo-map/+comments.md if FRESH (commit_sha==HEAD), else miss (skipped when useIndex=false)', model: 'haiku' },
+    { title: 'Context', detail: 'cartographer || crawler (parallel, independent) — skipped on a fresh cache hit', model: 'haiku' },
+    { title: 'Index write-back', detail: 'MCP cache populate: ingest cartographer/crawler output after a real discovery (non-blocking)', model: 'haiku' },
     { title: 'Analysis', detail: 'fan-out: one analyzer per SRS unit', model: 'opus' },
     { title: 'Verification', detail: 'verifier (opus) per finding + bounded rework (opus, <=1 round)', model: 'opus' },
     { title: 'Reverse diff', detail: 'reverse-scout over the definitive findings', model: 'sonnet' },
@@ -58,6 +60,20 @@ const cardsPath = A.cardsPath || null
 const units = Array.isArray(A.units) ? A.units : []
 const mergeNote = A.mergeNote || null
 
+// ---------------------------------------------------------------------------
+// MCP knowledge-graph cache (vibingwithclaude) — added on top of the original spec.
+// The workflow can (1) look up indexed context BEFORE discovery and reuse it when it is
+// FRESH (repo-level: the indexed cartographer/crawler bundle's commit_sha == current HEAD),
+// and (2) populate the index AFTER a real discovery. All MCP I/O happens inside agents
+// (the Workflow JS body cannot call MCP tools directly); this is best-effort and NEVER
+// blocks the analysis — if the MCP is down or useIndex=false, the flow is exactly the
+// original one. FASE 1 is repo-level fresh/miss only (no per-area staleness/merge yet).
+const useIndex = A.useIndex !== false // default ON; set args.useIndex=false to bypass the MCP entirely
+// The MCP validates `workspace` to [a-z0-9-] (lowercased) and REJECTS slashes, so owner/repo
+// must be sanitized (e.g. pagopa/interop-be-monorepo -> pagopa-interop-be-monorepo).
+const sanitizeWorkspace = (s) => String(s || '').toLowerCase().replace(/[^a-z0-9-]+/g, '-').replace(/^-+|-+$/g, '')
+const workspace = sanitizeWorkspace(A.workspace || repo)
+
 if (!repo || !slug || units.length === 0) {
   throw new Error('Missing args: repo, slug and a non-empty units[] are required. Run the interactive driver first (preflight + fetch + confirmation + segmentation).')
 }
@@ -116,6 +132,8 @@ const TAIL_RESERVE = REPORT_RESERVE + EDITOR_RESERVE // editor (parallel) + repo
 // run — this is surfaced in RR-5.
 // ---------------------------------------------------------------------------
 const MODELS = {
+  contextBroker: 'haiku', // MCP cache lookup (read) + fresh-materialization — cheap/mechanical
+  indexer: 'haiku',       // MCP write-back (populate) after a real discovery — cheap/mechanical
   cartographer: 'haiku',
   crawler: 'haiku',
   analyzer: 'opus',
@@ -310,6 +328,22 @@ const REPORT_SCHEMA = {
   },
 }
 
+// ---------------------------------------------------------------------------
+// M4 — machine-readable sidecars for the MCP indexer (deterministic parsing by
+// workflows/repo_map_to_bundle.py). Shared by BOTH variants so they never drift.
+// The sidecars DUPLICATE the human index (index.md table / comments.md head index)
+// in a stable JSON shape; the prose artifacts stay the contract toward the analyzers.
+// ---------------------------------------------------------------------------
+const CARTO_SIDECAR = `
+MACHINE-READABLE SIDECAR (ALSO write, for the MCP indexer): ${outputDir}/repo-map/index.json
+{ "areas": [ { "area_key": "<EXACTLY the <area>.md filename stem, no extension>", "purpose": "<one line>", "paths": ["<key path>", ...], "dependsOn": ["<area_key>", ...] } ] }
+area_key MUST equal the <area>.md filename stem so the JSON and the node file can be joined. Include EVERY area; no file content.`.trim()
+
+const CRAWLER_SIDECAR = `
+MACHINE-READABLE SIDECAR (ALSO write, for the MCP indexer): ${base}/comments.index.json
+{ "prs": [ { "number": <int>, "title": "<str>", "state": "<str>", "signals": "<S1..S4 or free>", "paths": ["<touched path>", ...] } ] }
+List ONLY the enriched (kept) PRs, mirroring the head index table of comments.md; discarded PRs are excluded.`.trim()
+
 // ---------------------------------------------------------------------------
 // Role prompts (inlined, faithful to the prescriptive variant's agents/*.md)
 // ---------------------------------------------------------------------------
@@ -326,6 +360,7 @@ PROCEDURE
 4. Write ${outputDir}/repo-map/index.md as the first consultable thing: a | area | node | purpose | table listing every node.
 
 OUTPUT: write ONLY inside ${outputDir}/repo-map/. No file content, compact nodes, as-is ${branch}, no secrets.
+${CARTO_SIDECAR}
 Return a short text summary (number of areas, index path).`
 
 const crawlerPrompt = `${COMMON}
@@ -344,6 +379,7 @@ PROCEDURE
 4. LIMIT: at most ~30 enriched PRs; beyond that keep the most relevant ones and FLAG the cut.
 
 OUTPUT: ${base}/comments.md. At the HEAD the PR->path index: | PR | Title | State | Signals | Touched paths |. Then one section per PR with the selected comments (author, file:line, text). At the tail the discarded PRs with reason.
+${CRAWLER_SIDECAR}
 Discovery does NOT stop at Jira links; open PRs = context; no secrets.
 Return a short summary (candidate PRs, enriched, discarded, any cut).`
 
@@ -433,6 +469,7 @@ TOOLS: Bash, Read, Write only.
 
 OBJECTIVE: produce enough as-is orientation of the repo that the other roles can locate code without re-reading everything. Orientation only - do NOT read file contents.
 OUTPUT CONTRACT: a segmented repo-map/ under ${outputDir}/repo-map/ - index.md FIRST (a | area | node | purpose | table, the first consultable thing) plus one compact <area>.md node per coherent area (purpose, key paths WITHOUT content, optional dependsOn). Write ONLY inside ${outputDir}/repo-map/.
+${CARTO_SIDECAR}
 GUARDRAILS/INVARIANTS: no file content; compact nodes; repo-scoped & reusable; as-is truth of ${branch}; no secrets; minimal tools. HOW you build it (which gh calls, how you group the areas, aim ~5-15 nodes) is YOUR judgment.
 Return a short text summary (number of areas, index path).`
 
@@ -443,6 +480,7 @@ TOOLS: Bash, Read, Write only.
 
 OBJECTIVE: a pre-localization overview of the PRs and comments relevant to this feature/domain.
 OUTPUT CONTRACT: ${base}/comments.md with, at the HEAD, an index that makes the comments consultable BY PATH (at least: | PR | Title | State | Signals | Touched paths |) and, below, the selected comments per PR (author, file:line, text); the discarded PRs listed at the tail with a one-line reason.
+${CRAWLER_SIDECAR}
 GUARDRAILS/INVARIANTS: discovery does NOT stop at Jira links - draw on AT LEAST Jira remote links, issue keys, feature terms from ${srsPath}, OPEN PRs against ${branch}, AND any other useful signal, by judgment (the concrete queries/commands are YOURS to choose); dedupe (same author+text) and filter noise (bots, LGTM, CI); OPEN PRs = context, NOT coverage. COST BUDGET, NO FIXED NUMERIC CEILING: keep the number of enriched PRs reasonable, prioritize by relevance, and FLAG every cut - never a silent truncation. Judgment decides how many PRs deserve enrichment. No secrets.
 Return a short summary (candidate PRs, enriched, discarded, any flagged cut).`
 
@@ -503,6 +541,70 @@ const P = (VARIANT === 'goals')
   ? { cartographer: cartographerPromptGoals, crawler: crawlerPromptGoals, analyzer: analyzerPromptGoals, verifier: verifierPromptGoals, rework: reworkPromptGoals, reverseScout: reverseScoutPromptGoals }
   : { cartographer: cartographerPrompt, crawler: crawlerPrompt, analyzer: analyzerPrompt, verifier: verifierPrompt, rework: reworkPrompt, reverseScout: reverseScoutPrompt }
 
+// ---------------------------------------------------------------------------
+// MCP cache roles (variant-agnostic): context-broker (read+materialize) and indexer
+// (populate). Both reach the vibingwithclaude MCP via ToolSearch — MCP tools cannot be
+// called from the Workflow JS body, only from inside an agent. Verified reachable by spike S0.
+// ---------------------------------------------------------------------------
+const CONTEXT_LOOKUP_SCHEMA = {
+  type: 'object',
+  additionalProperties: true,
+  required: ['status', 'materialized'],
+  properties: {
+    status: { type: 'string', enum: ['fresh', 'miss'], description: 'fresh = indexed bundle commit_sha == current HEAD AND artifacts fully materialized; else miss' },
+    materialized: { type: 'boolean', description: 'true only if repo-map/ AND comments.md were fully rewritten from the index' },
+    headSha: { type: 'string' },
+    indexedSha: { type: 'string', description: 'the commit_sha found on the indexed area nodes (empty if none)' },
+    notes: { type: 'string' },
+  },
+}
+
+const unitTitles = units.map((u) => u.titolo ?? u.title).filter(Boolean).join('; ')
+
+// READ: look up the indexed context and, on a FRESH repo-level hit, rebuild repo-map/ + comments.md.
+const contextBrokerPrompt = `${COMMON}
+
+ROLE: CONTEXT-BROKER (MCP cache lookup, FASE 1 repo-level fresh/miss). Run ONCE, BEFORE discovery.
+TOOLS: Bash, Read, Write, AND the "vibingwithclaude" MCP tools — load them first with ToolSearch query "select:mcp__vibingwithclaude__get_context". (These MCP tools are the ONE exception to the no-extra-tools rule for this role.)
+
+OBJECTIVE: decide if the knowledge-graph already holds a FRESH map+comments for this repo and, if so, materialize them on disk so discovery can be skipped. Repo-level only: no per-area merge.
+
+PROCEDURE
+1. HEAD sha: sha=$(gh api "repos/${repo}/commits/${branch}" --jq .sha) (fallback: gh api with -H 'Accept: application/vnd.github+json'). Record it as headSha.
+2. get_context(text="${repo} ${unitTitles}", workspace="${workspace}", limit=50). Inspect the returned cards.
+3. FRESHNESS (repo-level): among the matches, collect cards of type "area" and read card.extra.commit_sha. The index is FRESH iff there is >=1 area card AND their commit_sha equals headSha AND a card named "comments-md" (type pr) with the SAME commit_sha is present (it carries the full comments.md). Otherwise it is a MISS. On ANY doubt, choose MISS (the cache must never override as-is truth).
+4a. If MISS: write NOTHING; call report_miss(query="${repo} ${unitTitles}", notes="no fresh indexed bundle") for telemetry; return { status:"miss", materialized:false, headSha, indexedSha:"<or empty>", notes }.
+4b. If FRESH: MATERIALIZE (mkdir -p "${outputDir}/repo-map" first), then SELF-VERIFY, then return:
+    - ${outputDir}/repo-map/<area_name>.md  <- each area card's body_md (verbatim).
+    - ${outputDir}/repo-map/index.md        <- a "| area | node | purpose |" table built from the area cards (node = <area_name>.md), so it matches the cartographer contract the analyzers read.
+    - ${base}/comments.md                   <- the body_md of the card named "comments-md" (verbatim).
+    SELF-VERIFY before returning fresh: run  wc -c "${outputDir}/repo-map/index.md" "${base}/comments.md"  and  ls "${outputDir}/repo-map"/*.md  — every target MUST exist and be NON-empty and there must be one <area>.md per area card. The JS body cannot check the filesystem, so YOU are the only guard: if the comments-md card lacks body_md, OR any target is missing/empty, OR the mkdir/writes failed, return { status:"miss", materialized:false, ... } so a full discovery runs. Return { status:"fresh", materialized:true, ... } ONLY when the self-verify passes for ALL targets.
+INVARIANTS: as-is truth wins over cache; write only under ${outputDir}/repo-map/ and ${base}/; no secrets. Return the structured object.`
+
+// POPULATE: after a real discovery, ingest the cartographer/crawler artifacts into the graph.
+const indexerPrompt = `${COMMON}
+
+ROLE: INDEXER (MCP cache write-back). Run ONCE, AFTER a real discovery (miss path). Best-effort, NON-blocking.
+TOOLS: Bash (gh, python3), Read, AND the "vibingwithclaude" MCP tools — load them with ToolSearch query "select:mcp__vibingwithclaude__ingest_bundle,mcp__vibingwithclaude__upsert_node,mcp__vibingwithclaude__add_link".
+
+OBJECTIVE: index the just-produced repo-map/ + comments.md into the graph under workspace "${workspace}", tagged with the current HEAD commit_sha, so a later run can reuse it.
+
+PROCEDURE
+1. HEAD sha: sha=$(gh api "repos/${repo}/commits/${branch}" --jq .sha).
+2. Locate the transform script deterministically: root=$(git rev-parse --show-toplevel 2>/dev/null || pwd); script="$root/workflows/repo_map_to_bundle.py"; if [ ! -f "$script" ]; then script=$(find "$root" -name repo_map_to_bundle.py -not -path '*/node_modules/*' | head -1); fi. If still not found, log it and skip write-back (non-fatal).
+3. Build the bundles deterministically:
+   python3 "$script" --repo "${repo}" --branch "${branch}" --commit-sha "$sha" --workspace "${workspace}" \\
+     --repo-map-dir "${outputDir}/repo-map" --comments-index "${base}/comments.index.json" \\
+     --comments-md "${base}/comments.md" --out "${base}/mcp-bundles.json"
+   (If ${base}/comments.index.json is missing because the crawler didn't emit the sidecar, proceed with just the repo-map bundle; note it.)
+4. Read ${base}/mcp-bundles.json. For EACH object in .bundles, call ingest_bundle with its fields (schema_version, bundle_id, source_kind, workspace, commit_sha, branch, replace_edges, nodes).
+5. FALLBACK (ingest_bundle unavailable or schema-rejected): for each node call upsert_node(type,name,title,summary,body_md,keywords,extra,links,workspace="${workspace}"), then add_link for each node.links entry. (upsert_node is proven-good.)
+6. NON-FATAL: if anything fails, log it and return a short summary; NEVER raise. Return a short text summary (bundles sent, nodes ingested, method used, any fallback/skip).
+INVARIANTS: no secrets in nodes; workspace exactly "${workspace}".`
+
+P.contextBroker = contextBrokerPrompt
+P.indexer = indexerPrompt
+
 // Editor: markdown in, markdown out. The improved SRS is a derivative PROPOSAL
 // the user reviews and imports into a NEW Confluence page (Insert > Markup > Markdown).
 // Variant-agnostic restructuring, now ENRICHED with the reverse-diff (code->spec)
@@ -661,15 +763,39 @@ As-is truth; enums to the letter; no secrets. Return the structured object (repo
 // ---------------------------------------------------------------------------
 log(`spec-analyze (${VARIANT}) · style=${AXES.style} · ${repo}@${branch} · ${units.length} SRS units · out ${base}`)
 
+// MCP cache lookup (M1, FASE 1) — reuse indexed repo-map/ + comments.md if FRESH.
+// On a fresh hit the broker materializes both artifacts and we SKIP discovery entirely.
+// Any doubt / MCP down / useIndex=false => miss => the original discovery runs unchanged.
+let cacheFresh = false
+if (useIndex) {
+  phase('Context lookup')
+  const lk = await agent(P.contextBroker, { label: 'context-broker', phase: 'Context lookup', schema: CONTEXT_LOOKUP_SCHEMA, model: MODELS.contextBroker })
+  cacheFresh = Boolean(lk && lk.status === 'fresh' && lk.materialized)
+  if (lk) log(`context lookup: ${lk.status}${lk.headSha ? ` @ ${String(lk.headSha).slice(0, 8)}` : ''}${cacheFresh ? ' — reusing indexed repo-map/ + comments.md, SKIPPING discovery' : ' — running full discovery'}`)
+  else log('context lookup: broker failed — running full discovery')
+}
+
 // RF-FLOW-3 — Context in parallel (barrier: analyzers depend on repo-map/ and comments.md)
-phase('Context')
-const [mapResult, crawlResult] = await parallel([
-  () => agent(P.cartographer, { label: 'cartographer', phase: 'Context', model: MODELS.cartographer }),
-  () => agent(P.crawler, { label: 'crawler', phase: 'Context', model: MODELS.crawler }),
-])
-if (!mapResult || !crawlResult) {
-  const failed = [!mapResult && 'cartographer (repo-map/)', !crawlResult && 'crawler (comments.md)'].filter(Boolean).join(' and ')
-  throw new Error(`Context phase aborted: ${failed} failed — downstream analysis depends on it and cannot proceed reliably.`)
+// Skipped on a fresh cache hit (the broker already wrote both artifacts).
+if (!cacheFresh) {
+  phase('Context')
+  const [mapResult, crawlResult] = await parallel([
+    () => agent(P.cartographer, { label: 'cartographer', phase: 'Context', model: MODELS.cartographer }),
+    () => agent(P.crawler, { label: 'crawler', phase: 'Context', model: MODELS.crawler }),
+  ])
+  if (!mapResult || !crawlResult) {
+    const failed = [!mapResult && 'cartographer (repo-map/)', !crawlResult && 'crawler (comments.md)'].filter(Boolean).join(' and ')
+    throw new Error(`Context phase aborted: ${failed} failed — downstream analysis depends on it and cannot proceed reliably.`)
+  }
+
+  // Index write-back (M2) — populate the graph from the fresh discovery. Best-effort,
+  // NON-blocking: a failure here must never abort the analysis (invariant: single-artifact
+  // errors are warnings). Skipped on a fresh cache hit (nothing new to index).
+  if (useIndex) {
+    phase('Index write-back')
+    const idx = await agent(P.indexer, { label: 'indexer', phase: 'Index write-back', model: MODELS.indexer })
+    if (!idx) log('WARNING: index write-back failed (non-fatal) — the MCP cache was not updated this run.')
+  }
 }
 
 // RF-FLOW-4/5 — Fan-out analyzers -> verifier -> bounded rework, in a pipeline (no barrier between units)

From b76f3bc68efa92a86fb2d10c1f85489370a8629e Mon Sep 17 00:00:00 2001
From: Andrea Gorletta <andrea.alice.gorletta@gmail.com>
Date: Wed, 1 Jul 2026 11:22:33 +0200
Subject: [PATCH 2/3] fix(spec-analyze): harden MCP cache after 2nd review
 round

- repo_map_to_bundle.py: fix crash when writing the --out summary with
  0 or 1 bundles (regression from omitting empty bundles); the indexer
  always uses --out, so an empty/partial discovery would have crashed
- workspace: fall back to sanitized repo if a caller passes a workspace
  that sanitizes to empty (never send an empty workspace to the MCP)
- context-broker prompt: make purpose->card.summary mapping explicit for
  fresh-hit index.md materialization

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 workflows/repo_map_to_bundle.py | 7 ++++---
 workflows/spec-analyze.js       | 6 ++++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/workflows/repo_map_to_bundle.py b/workflows/repo_map_to_bundle.py
index 0e11f0a..1cbb9be 100644
--- a/workflows/repo_map_to_bundle.py
+++ b/workflows/repo_map_to_bundle.py
@@ -198,9 +198,10 @@ def main():
     if args.out:
         with open(args.out, "w", encoding="utf-8") as fh:
             fh.write(payload)
-        # brief summary to stderr for the calling agent's log
-        cb, wb = result["bundles"]
-        sys.stderr.write(f"wrote {args.out}: {len(cb['nodes'])} area nodes, {len(wb['nodes'])} crawler nodes\n")
+        # brief summary to stderr for the calling agent's log. Robust to 0/1/2 bundles:
+        # empty bundles are omitted upstream, so result["bundles"] may have fewer than two.
+        counts = ", ".join(f"{b['source_kind']}={len(b['nodes'])}" for b in result["bundles"])
+        sys.stderr.write(f"wrote {args.out}: {counts or 'no bundles (empty discovery)'}\n")
     else:
         print(payload)
 
diff --git a/workflows/spec-analyze.js b/workflows/spec-analyze.js
index ccad9ab..c002751 100644
--- a/workflows/spec-analyze.js
+++ b/workflows/spec-analyze.js
@@ -72,7 +72,9 @@ const useIndex = A.useIndex !== false // default ON; set args.useIndex=false to
 // The MCP validates `workspace` to [a-z0-9-] (lowercased) and REJECTS slashes, so owner/repo
 // must be sanitized (e.g. pagopa/interop-be-monorepo -> pagopa-interop-be-monorepo).
 const sanitizeWorkspace = (s) => String(s || '').toLowerCase().replace(/[^a-z0-9-]+/g, '-').replace(/^-+|-+$/g, '')
-const workspace = sanitizeWorkspace(A.workspace || repo)
+// Fall back to the (always-present) repo if a caller passes a workspace that sanitizes to empty
+// (e.g. only punctuation), so we never send an empty workspace to the MCP.
+const workspace = sanitizeWorkspace(A.workspace || repo) || sanitizeWorkspace(repo)
 
 if (!repo || !slug || units.length === 0) {
   throw new Error('Missing args: repo, slug and a non-empty units[] are required. Run the interactive driver first (preflight + fetch + confirmation + segmentation).')
@@ -576,7 +578,7 @@ PROCEDURE
 4a. If MISS: write NOTHING; call report_miss(query="${repo} ${unitTitles}", notes="no fresh indexed bundle") for telemetry; return { status:"miss", materialized:false, headSha, indexedSha:"<or empty>", notes }.
 4b. If FRESH: MATERIALIZE (mkdir -p "${outputDir}/repo-map" first), then SELF-VERIFY, then return:
     - ${outputDir}/repo-map/<area_name>.md  <- each area card's body_md (verbatim).
-    - ${outputDir}/repo-map/index.md        <- a "| area | node | purpose |" table built from the area cards (node = <area_name>.md), so it matches the cartographer contract the analyzers read.
+    - ${outputDir}/repo-map/index.md        <- a "| area | node | purpose |" table built from the area cards (area = card.name, node = <card.name>.md, purpose = card.summary), so it matches the cartographer contract the analyzers read.
     - ${base}/comments.md                   <- the body_md of the card named "comments-md" (verbatim).
     SELF-VERIFY before returning fresh: run  wc -c "${outputDir}/repo-map/index.md" "${base}/comments.md"  and  ls "${outputDir}/repo-map"/*.md  — every target MUST exist and be NON-empty and there must be one <area>.md per area card. The JS body cannot check the filesystem, so YOU are the only guard: if the comments-md card lacks body_md, OR any target is missing/empty, OR the mkdir/writes failed, return { status:"miss", materialized:false, ... } so a full discovery runs. Return { status:"fresh", materialized:true, ... } ONLY when the self-verify passes for ALL targets.
 INVARIANTS: as-is truth wins over cache; write only under ${outputDir}/repo-map/ and ${base}/; no secrets. Return the structured object.`

From 38c8aa0a421c759683fae52d456723698031048c Mon Sep 17 00:00:00 2001
From: Andrea Gorletta <andrea.alice.gorletta@gmail.com>
Date: Wed, 1 Jul 2026 15:12:41 +0200
Subject: [PATCH 3/3] feat(spec-analyze): .env-driven config + MCP preflight
 check

- workflows/check_mcp.py (stdlib): single-source-of-truth root .env for
  MCP_URL/MCP_API_KEY (never hardcoded in the workflow). Missing .env ->
  create template + .gitignore and stop (like the Atlassian preflight);
  present -> idempotently (re)register the MCP server from .env and ping it
  (JSON-RPC initialize). Unreachable = warning (workflow degrades to
  no-cache via useIndex), never blocking. Prints resolved SPEC_OUTPUT_DIR.
- default outputDir -> ./output[-goals]; launcher passes SPEC_OUTPUT_DIR
  (absolute) as args.outputDir so output always lands in this repo.
- .gitignore + .env.example; README + MCP-CACHE.md preflight docs.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .env.example              |  16 +++
 .gitignore                |  14 +++
 README.md                 |  25 +++--
 workflows/MCP-CACHE.md    |  22 ++++
 workflows/check_mcp.py    | 219 ++++++++++++++++++++++++++++++++++++++
 workflows/spec-analyze.js |   5 +-
 6 files changed, 291 insertions(+), 10 deletions(-)
 create mode 100644 .env.example
 create mode 100644 .gitignore
 create mode 100644 workflows/check_mcp.py

diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..7be5968
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,16 @@
+# spec-code-analyzer configuration — copy to .env (gitignored) and fill in.
+# Single source of truth for the whole preflight. Load it with:  set -a; . ./.env; set +a
+
+# Atlassian (Confluence/Jira fetch — workflows/fetch_atlassian.py)
+ATLASSIAN_BASE_URL=https://<org>.atlassian.net
+ATLASSIAN_EMAIL=you@example.com
+ATLASSIAN_API_TOKEN=
+
+# vibingwithclaude MCP knowledge-graph cache (workflows/check_mcp.py, MCP-CACHE.md).
+# NEVER hardcoded in the workflow — change the url/key here and the preflight re-registers the server.
+MCP_URL=https://mcp.vibingwithclaude.it/mcp
+MCP_API_KEY=
+
+# Where the workflow writes output. Absolute path recommended so it always lands in this repo
+# regardless of the launch cwd (passed to the workflow as args.outputDir).
+SPEC_OUTPUT_DIR=./output
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d7f5f43
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,14 @@
+# spec-code-analyzer secrets & outputs — never commit
+.env
+.env.*
+!.env.example
+
+# workflow output dirs
+output/
+output-goals/
+.spec-analyze*
+
+# misc
+.DS_Store
+node_modules/
+workflows/__pycache__/
diff --git a/README.md b/README.md
index 3352b81..85cb0a6 100644
--- a/README.md
+++ b/README.md
@@ -41,34 +41,38 @@ Le due varianti scrivono in directory separate (`./.spec-analyze` vs `./.spec-an
 ## Componenti
 
 - **`workflows/spec-analyze.js`** — il workflow di orchestrazione (descritto sopra).
-- **`workflows/fetch_atlassian.py`** — fetch deterministico di Confluence/Jira (solo stdlib Python 3, nessuna dipendenza). Produce `srs.md` (+ eventuale `cards.md`). Credenziali da env o `<out>/.env`, mai stampate.
+- **`workflows/fetch_atlassian.py`** — fetch deterministico di Confluence/Jira (solo stdlib Python 3, nessuna dipendenza). Produce `srs.md` (+ eventuale `cards.md`). Credenziali da env o `.env`, mai stampate.
+- **`workflows/check_mcp.py`** — preflight della cache MCP (solo stdlib): legge `MCP_URL`/`MCP_API_KEY` dal `.env` di root (mai hardcodati nel workflow), (ri)registra il server MCP da `.env` e ne verifica la raggiungibilità. Config mancante → crea il template `.env` e si ferma; MCP irraggiungibile → warning (il workflow degrada a no-cache). Vedi `workflows/MCP-CACHE.md`.
 - **`workflows/run_cost.py`** — post-processing del costo reale per-agent/per-fase, ricostruito dai transcript JSONL della run (il workflow internamente vede solo il totale output-token; questo script recupera input + cache per il breakdown della RR-5).
 
 ## Come si usa
 
 Il workflow **non** esegue da solo il fetch né la conferma utente: presuppone un preflight interattivo. Flusso tipico:
 
-**1. Preflight** — verifica credenziali Atlassian e `gh` autenticato:
+Tutta la configurazione vive in un **unico `.env` a root** (gitignored): `ATLASSIAN_*`, `MCP_URL`, `MCP_API_KEY`, `SPEC_OUTPUT_DIR`. Se manca, `check_mcp.py` lo crea come template e si ferma; caricalo nell'ambiente con `set -a; . ./.env; set +a` così le variabili valgono per tutti gli script del preflight.
+
+**1. Preflight** — config `.env`, cache MCP, credenziali Atlassian, `gh`:
 
 ```bash
-export ATLASSIAN_BASE_URL="https://<org>.atlassian.net"
-export ATLASSIAN_EMAIL="tu@example.com"
-export ATLASSIAN_API_TOKEN="..."        # oppure in <out>/.env (gitignored)
+python3 workflows/check_mcp.py        # crea/valida .env, (ri)registra e pinga l'MCP; stampa SPEC_OUTPUT_DIR
+set -a; . ./.env; set +a              # carica ATLASSIAN_*, MCP_*, SPEC_OUTPUT_DIR nell'ambiente
 gh auth status
 ```
 
+`check_mcp.py` esce con **2** (bloccante) se il `.env` manca o `MCP_*` è incompleto; con **0** anche se l'MCP è irraggiungibile (warning: il workflow degrada a no-cache via `useIndex`). L'`MCP_URL` non è mai hardcodato: cambialo nel `.env` e il preflight ri-registra il server.
+
 **2. Fetch della specifica** da Confluence (+ eventuale card Jira):
 
 ```bash
 python3 workflows/fetch_atlassian.py \
   --confluence <id|url> \
   --jira <KEY|url> \
-  --out ./.spec-analyze/<slug>
+  --out "$SPEC_OUTPUT_DIR/<slug>"
 ```
 
 **3. Segmentazione** dell'SRS in **≤10 unità** (sezioni), confermata con l'utente.
 
-**4. Lancio del workflow** (dal main-loop di Claude Code) passando gli `args`:
+**4. Lancio del workflow** (dal main-loop di Claude Code) passando gli `args` (`outputDir` = `SPEC_OUTPUT_DIR`, assoluto → output sempre in questo repo):
 
 ```jsonc
 {
@@ -76,8 +80,11 @@ python3 workflows/fetch_atlassian.py \
   "repo": "owner/repo",
   "branch": "develop",
   "slug": "draft-srs-...",
-  "srsPath": "./.spec-analyze/<slug>/srs.md",
-  "cardsPath": "./.spec-analyze/<slug>/cards.md",  // o null
+  "outputDir": "<SPEC_OUTPUT_DIR>",     // da check_mcp.py; assoluto consigliato
+  "workspace": "owner/repo",            // opzionale; sanificato a [a-z0-9-] per l'MCP
+  "useIndex": true,                     // false = bypassa la cache MCP
+  "srsPath": "<SPEC_OUTPUT_DIR>/<slug>/srs.md",
+  "cardsPath": "<SPEC_OUTPUT_DIR>/<slug>/cards.md",  // o null
   "units": [ { "idx": "01", "titolo": "...", "prose": "..." } ],
   "mergeNote": "eventuali merge di sezioni eseguiti"  // o null
 }
diff --git a/workflows/MCP-CACHE.md b/workflows/MCP-CACHE.md
index b2b1ada..36688f0 100644
--- a/workflows/MCP-CACHE.md
+++ b/workflows/MCP-CACHE.md
@@ -30,9 +30,31 @@ pipeline, whether produced by discovery or materialized from the cache.
 |---|---|---|
 | `useIndex` | `true` | set `false` to bypass the MCP entirely (flow is then exactly the original) |
 | `workspace` | `repo` | tenant scope; **sanitized** to `[a-z0-9-]` (the MCP rejects slashes), e.g. `pagopa/interop-be-monorepo` → `pagopa-interop-be-monorepo` |
+| `outputDir` | `./output` | where output lands; the preflight passes `SPEC_OUTPUT_DIR` (absolute → always this repo) |
 
 If the MCP is unreachable the workflow degrades to the original discovery flow — no regressions.
 
+## Config & preflight (`.env` + `check_mcp.py`)
+
+The MCP url/key are **never hardcoded** in the workflow — the single source of truth is the root
+`.env` (gitignored; see `.env.example`):
+
+```
+MCP_URL=…      MCP_API_KEY=…      SPEC_OUTPUT_DIR=…      ATLASSIAN_*=…
+```
+
+`workflows/check_mcp.py` (stdlib) is the preflight step:
+- **`.env` missing** → creates the template + ensures `.gitignore`, **exits 2** (fill it and re-run).
+- **`MCP_*` incomplete** → exits 2.
+- **otherwise** → idempotently **(re)registers** the `vibingwithclaude` Claude MCP server *from `.env`*
+  (so changing the url/key in `.env` propagates to the agents, which reach the server by name), then
+  **pings** it (JSON-RPC `initialize`). Reachable → exit 0; unreachable/unauthorized → **warning**,
+  exit 0 (the workflow degrades to no-cache via `useIndex`, so a down MCP never aborts an analysis).
+- prints the resolved absolute `SPEC_OUTPUT_DIR` on stdout for the launcher to pass as `args.outputDir`.
+
+The workflow JS body cannot read `.env`/env, so `outputDir` (and the useIndex/workspace knobs) arrive
+via `args`; the preflight is what reads `.env` and wires them in.
+
 ## Node / workspace contract
 
 Workspace = sanitized `owner/repo` (repo-scoped, reusable across features/slugs). Staleness is
diff --git a/workflows/check_mcp.py b/workflows/check_mcp.py
new file mode 100644
index 0000000..bb4992d
--- /dev/null
+++ b/workflows/check_mcp.py
@@ -0,0 +1,219 @@
+#!/usr/bin/env python3
+"""
+check_mcp.py — preflight: config + reachability of the vibingwithclaude MCP (stdlib only).
+
+The MCP url/key are NEVER hardcoded in the workflow: the single source of truth is the root
+`.env`. Behavior mirrors the Atlassian preflight (fetch_atlassian.py): if `.env` is missing it is
+CREATED as a template and the run STOPS so you can fill it in; if present it is read.
+
+Steps:
+  1. Locate `.env` at the repo root (default: two levels up from this script; override --env-file).
+     Missing -> write a template with all keys + ensure `.gitignore`, then EXIT 2 (blocking).
+  2. Load MCP_URL + MCP_API_KEY (real environment overrides `.env`). Missing/empty -> EXIT 2.
+  3. Sync the Claude MCP server registration FROM `.env` (idempotent `claude mcp add`), so a url/key
+     change in `.env` propagates to the agents that reach the server by name. Best-effort + logged;
+     skip with --no-register.
+  4. Ping the endpoint (JSON-RPC `initialize`) to verify reachability + auth.
+       reachable+authorized -> print OK, EXIT 0.
+       unreachable/unauthorized -> print WARNING, EXIT 0 (NON-blocking: the workflow degrades to
+       no-cache via useIndex, so a down MCP must never abort the whole analysis).
+Also prints the resolved SPEC_OUTPUT_DIR (absolute) so the launcher can pass it as args.outputDir.
+
+Exit codes: 0 = ok/degraded (proceed), 2 = blocking config problem (fill .env and re-run).
+"""
+import argparse
+import json
+import os
+import subprocess
+import sys
+import urllib.error
+import urllib.request
+
+SERVER_NAME = "vibingwithclaude"
+REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+# All keys the project's single root .env is expected to carry. MCP_* + SPEC_OUTPUT_DIR are new;
+# the ATLASSIAN_* keys are kept so one .env template covers the whole preflight.
+ENV_TEMPLATE = """\
+# spec-code-analyzer configuration — NOT committed (gitignored). Fill in and re-run the preflight.
+
+# Atlassian (Confluence/Jira fetch — see workflows/fetch_atlassian.py)
+ATLASSIAN_BASE_URL=https://<org>.atlassian.net
+ATLASSIAN_EMAIL=you@example.com
+ATLASSIAN_API_TOKEN=
+
+# vibingwithclaude MCP knowledge-graph cache (see workflows/check_mcp.py, MCP-CACHE.md)
+MCP_URL=https://mcp.vibingwithclaude.it/mcp
+MCP_API_KEY=
+
+# Where the workflow writes its output (absolute path recommended so it always lands here)
+SPEC_OUTPUT_DIR=./output
+"""
+
+
+def load_env(path):
+    """Parse a KEY=VALUE .env (ignoring comments/blanks, stripping surrounding quotes)."""
+    env = {}
+    if not os.path.isfile(path):
+        return env
+    with open(path, encoding="utf-8") as fh:
+        for line in fh:
+            line = line.strip()
+            if not line or line.startswith("#") or "=" not in line:
+                continue
+            k, v = line.split("=", 1)
+            env[k.strip()] = v.strip().strip('"').strip("'")
+    return env
+
+
+def ensure_gitignore(root):
+    """Make sure .env and the output dirs are gitignored (idempotent)."""
+    gi = os.path.join(root, ".gitignore")
+    have = ""
+    if os.path.isfile(gi):
+        with open(gi, encoding="utf-8") as fh:
+            have = fh.read()
+    want = [".env", "output/", "output-goals/", ".spec-analyze*"]
+    missing = [w for w in want if w not in have.split()]
+    if missing:
+        with open(gi, "a", encoding="utf-8") as fh:
+            if have and not have.endswith("\n"):
+                fh.write("\n")
+            fh.write("# spec-code-analyzer secrets & outputs\n" + "\n".join(missing) + "\n")
+
+
+def create_template(path, root):
+    with open(path, "w", encoding="utf-8") as fh:
+        fh.write(ENV_TEMPLATE)
+    os.chmod(path, 0o600)
+    ensure_gitignore(root)
+
+
+def sync_registration(url, key):
+    """Idempotently register the MCP server from .env so nothing pins the url but .env."""
+    if not _have_claude_cli():
+        print("  ~ 'claude' CLI not found — skipping server registration sync "
+              "(the workflow agents use whatever is already registered).", file=sys.stderr)
+        return
+    # remove any stale local registration, then add from .env (ignore remove failure)
+    subprocess.run(["claude", "mcp", "remove", SERVER_NAME, "-s", "local"],
+                   capture_output=True, text=True)
+    add = subprocess.run(
+        ["claude", "mcp", "add", "--transport", "http", SERVER_NAME, url,
+         "--header", f"Authorization: Bearer {key}", "-s", "local"],
+        capture_output=True, text=True)
+    if add.returncode == 0:
+        print(f"  + registered MCP server '{SERVER_NAME}' from .env (url={url}).", file=sys.stderr)
+    else:
+        print(f"  ! could not register '{SERVER_NAME}': {add.stderr.strip() or add.stdout.strip()}",
+              file=sys.stderr)
+
+
+def _have_claude_cli():
+    try:
+        subprocess.run(["claude", "--version"], capture_output=True, text=True)
+        return True
+    except (OSError, FileNotFoundError):
+        return False
+
+
+def ping(url, key, timeout=10):
+    """JSON-RPC initialize against the MCP. Returns (ok, detail)."""
+    payload = {
+        "jsonrpc": "2.0", "id": 1, "method": "initialize",
+        "params": {
+            "protocolVersion": "2025-06-18",
+            "capabilities": {},
+            "clientInfo": {"name": "spec-analyze-preflight", "version": "1"},
+        },
+    }
+    req = urllib.request.Request(
+        url, data=json.dumps(payload).encode("utf-8"), method="POST",
+        headers={
+            "Authorization": f"Bearer {key}",
+            "Content-Type": "application/json",
+            "Accept": "application/json, text/event-stream",
+        })
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            raw = resp.read().decode("utf-8", "replace")
+        # streamable-HTTP may answer as SSE: pull the first `data:` JSON line
+        data = raw
+        if "data:" in raw and not raw.lstrip().startswith("{"):
+            for line in raw.splitlines():
+                if line.startswith("data:"):
+                    data = line[len("data:"):].strip()
+                    break
+        try:
+            obj = json.loads(data)
+        except ValueError:
+            return True, "reachable (non-JSON body; MCP handshake accepted the request)"
+        if isinstance(obj, dict) and obj.get("error"):
+            return False, f"server returned error: {obj['error']}"
+        info = (obj.get("result", {}) or {}).get("serverInfo", {}) if isinstance(obj, dict) else {}
+        return True, f"reachable; serverInfo={info or '(none)'}"
+    except urllib.error.HTTPError as e:
+        if e.code in (401, 403):
+            return False, f"HTTP {e.code} — auth rejected (check MCP_API_KEY)"
+        return False, f"HTTP {e.code}"
+    except urllib.error.URLError as e:
+        return False, f"unreachable: {e.reason}"
+    except Exception as e:  # noqa: BLE001 — preflight must never crash the caller
+        return False, f"error: {e}"
+
+
+def main():
+    ap = argparse.ArgumentParser(description="Preflight config + reachability check for the MCP cache.")
+    ap.add_argument("--env-file", default=os.path.join(REPO_ROOT, ".env"))
+    ap.add_argument("--no-register", action="store_true", help="skip syncing the Claude MCP registration")
+    ap.add_argument("--print-output-dir", action="store_true", help="only print resolved SPEC_OUTPUT_DIR and exit")
+    args = ap.parse_args()
+
+    env_path = args.env_file
+    root = os.path.dirname(os.path.abspath(env_path)) or REPO_ROOT
+
+    # Step 1 — .env presence (create template + stop if missing, like the Atlassian preflight)
+    if not os.path.isfile(env_path):
+        create_template(env_path, root)
+        print(f"Created {env_path} template (and .gitignore). Fill in MCP_API_KEY / ATLASSIAN_* "
+              f"/ SPEC_OUTPUT_DIR, then re-run the preflight.", file=sys.stderr)
+        return 2
+
+    env = load_env(env_path)
+    # real environment overrides the file
+    url = os.environ.get("MCP_URL") or env.get("MCP_URL", "")
+    key = os.environ.get("MCP_API_KEY") or env.get("MCP_API_KEY", "")
+    out_dir = os.environ.get("SPEC_OUTPUT_DIR") or env.get("SPEC_OUTPUT_DIR", "./output")
+    out_abs = out_dir if os.path.isabs(out_dir) else os.path.normpath(os.path.join(root, out_dir))
+
+    if args.print_output_dir:
+        print(out_abs)
+        return 0
+
+    # Step 2 — required MCP config (blocking if incomplete)
+    if not url or not key:
+        missing = ", ".join(k for k, v in (("MCP_URL", url), ("MCP_API_KEY", key)) if not v)
+        print(f"Incomplete MCP config in {env_path}: missing {missing}. Fill it and re-run.", file=sys.stderr)
+        return 2
+
+    print(f"MCP preflight — url={url}  ·  output-dir={out_abs}", file=sys.stderr)
+
+    # Step 3 — sync registration from .env (so the url is pinned only in .env)
+    if not args.no_register:
+        sync_registration(url, key)
+
+    # Step 4 — reachability (NON-blocking: the workflow degrades to no-cache if this fails)
+    ok, detail = ping(url, key)
+    if ok:
+        print(f"  ✓ MCP reachable & authorized — {detail}", file=sys.stderr)
+    else:
+        print(f"  ! MCP NOT reachable — {detail}", file=sys.stderr)
+        print("    Proceeding anyway: the workflow degrades to no-cache (useIndex miss). "
+              "Fix the MCP or run with args.useIndex=false to silence.", file=sys.stderr)
+    # print the resolved output dir on stdout for the launcher to capture (args.outputDir)
+    print(out_abs)
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/workflows/spec-analyze.js b/workflows/spec-analyze.js
index c002751..02fde1d 100644
--- a/workflows/spec-analyze.js
+++ b/workflows/spec-analyze.js
@@ -53,7 +53,10 @@ const branch = A.branch || 'main'
 const VARIANT = (A.variant === 'goals') ? 'goals' : 'prescriptive'
 // Default output-dir is variant-aware so the two arms NEVER mix artifacts (compartmentalization,
 // like the original two plugins). The driver may still override outputDir explicitly.
-const outputDir = A.outputDir || (VARIANT === 'goals' ? './.spec-analyze-goals' : './.spec-analyze')
+// The launcher/preflight resolves SPEC_OUTPUT_DIR (from the root .env, see workflows/check_mcp.py)
+// and passes it as args.outputDir — pass an ABSOLUTE path there so output always lands in this repo
+// regardless of the launch cwd. This is only the fallback default when args.outputDir is absent.
+const outputDir = A.outputDir || (VARIANT === 'goals' ? './output-goals' : './output')
 const slug = A.slug
 const srsPath = A.srsPath || `${outputDir}/${slug}/srs.md`
 const cardsPath = A.cardsPath || null