From 67ba4b03824933228c4a3229e526976aad10b4ae Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Wed, 1 Jul 2026 05:49:56 -0400
Subject: [PATCH 1/4] {"schema":"decodex/commit/1","summary":"Modularize
 remaining large smoke runners","authority":"manual"}

---
 scripts/graphiti-zep-docker-temporal-smoke.py | 1297 +----------------
 scripts/graphiti_temporal_smoke/__init__.py   |    1 +
 scripts/graphiti_temporal_smoke/benchmark.py  |  107 ++
 scripts/graphiti_temporal_smoke/common.py     |  137 ++
 scripts/graphiti_temporal_smoke/context.py    |   65 +
 scripts/graphiti_temporal_smoke/corpus.py     |   47 +
 scripts/graphiti_temporal_smoke/fixture.py    |  224 +++
 scripts/graphiti_temporal_smoke/manifest.py   |  175 +++
 scripts/graphiti_temporal_smoke/mapping.py    |   81 +
 .../materialization.py                        |   99 ++
 scripts/graphiti_temporal_smoke/models.py     |   34 +
 scripts/graphiti_temporal_smoke/runner.py     |  151 ++
 scripts/graphiti_temporal_smoke/runtime.py    |  231 +++
 scripts/graphiti_temporal_smoke/summary.py    |   35 +
 ...etta-core-archive-export-readback-smoke.py | 1054 +-------------
 scripts/letta_core_archive_smoke/__init__.py  |    1 +
 scripts/letta_core_archive_smoke/artifacts.py |  280 ++++
 scripts/letta_core_archive_smoke/benchmark.py |   99 ++
 scripts/letta_core_archive_smoke/common.py    |  127 ++
 scripts/letta_core_archive_smoke/context.py   |   52 +
 scripts/letta_core_archive_smoke/fixtures.py  |  225 +++
 scripts/letta_core_archive_smoke/models.py    |   34 +
 scripts/letta_core_archive_smoke/runner.py    |  105 ++
 scripts/letta_core_archive_smoke/runtime.py   |  188 +++
 scripts/ragflow-docker-evidence-smoke.sh      | 1084 +-------------
 scripts/ragflow_smoke/api.sh                  |  183 +++
 scripts/ragflow_smoke/common.sh               |   96 ++
 scripts/ragflow_smoke/docker.sh               |  135 ++
 scripts/ragflow_smoke/fixture.sh              |  157 ++
 scripts/ragflow_smoke/manifest.sh             |  169 +++
 scripts/ragflow_smoke/materialization.sh      |  245 ++++
 scripts/ragflow_smoke/scoring.sh              |   60 +
 scripts/ragflow_smoke/summary.sh              |   49 +
 33 files changed, 3602 insertions(+), 3425 deletions(-)
 create mode 100644 scripts/graphiti_temporal_smoke/__init__.py
 create mode 100644 scripts/graphiti_temporal_smoke/benchmark.py
 create mode 100644 scripts/graphiti_temporal_smoke/common.py
 create mode 100644 scripts/graphiti_temporal_smoke/context.py
 create mode 100644 scripts/graphiti_temporal_smoke/corpus.py
 create mode 100644 scripts/graphiti_temporal_smoke/fixture.py
 create mode 100644 scripts/graphiti_temporal_smoke/manifest.py
 create mode 100644 scripts/graphiti_temporal_smoke/mapping.py
 create mode 100644 scripts/graphiti_temporal_smoke/materialization.py
 create mode 100644 scripts/graphiti_temporal_smoke/models.py
 create mode 100644 scripts/graphiti_temporal_smoke/runner.py
 create mode 100644 scripts/graphiti_temporal_smoke/runtime.py
 create mode 100644 scripts/graphiti_temporal_smoke/summary.py
 create mode 100644 scripts/letta_core_archive_smoke/__init__.py
 create mode 100644 scripts/letta_core_archive_smoke/artifacts.py
 create mode 100644 scripts/letta_core_archive_smoke/benchmark.py
 create mode 100644 scripts/letta_core_archive_smoke/common.py
 create mode 100644 scripts/letta_core_archive_smoke/context.py
 create mode 100644 scripts/letta_core_archive_smoke/fixtures.py
 create mode 100644 scripts/letta_core_archive_smoke/models.py
 create mode 100644 scripts/letta_core_archive_smoke/runner.py
 create mode 100644 scripts/letta_core_archive_smoke/runtime.py
 create mode 100644 scripts/ragflow_smoke/api.sh
 create mode 100644 scripts/ragflow_smoke/common.sh
 create mode 100644 scripts/ragflow_smoke/docker.sh
 create mode 100644 scripts/ragflow_smoke/fixture.sh
 create mode 100644 scripts/ragflow_smoke/manifest.sh
 create mode 100644 scripts/ragflow_smoke/materialization.sh
 create mode 100644 scripts/ragflow_smoke/scoring.sh
 create mode 100644 scripts/ragflow_smoke/summary.sh

diff --git a/scripts/graphiti-zep-docker-temporal-smoke.py b/scripts/graphiti-zep-docker-temporal-smoke.py
index ab86e731..906c50e5 100644
--- a/scripts/graphiti-zep-docker-temporal-smoke.py
+++ b/scripts/graphiti-zep-docker-temporal-smoke.py
@@ -3,1302 +3,7 @@
 
 from __future__ import annotations
 
-import json
-import os
-import shutil
-import socket
-import subprocess
-import sys
-import textwrap
-import time
-from dataclasses import dataclass
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any
-
-
-SCRIPT_DIR = Path(__file__).resolve().parent
-ROOT_DIR = SCRIPT_DIR.parent
-REPORT_DIR = Path(
-    os.environ.get(
-        "ELF_GRAPHITI_ZEP_SMOKE_REPORT_DIR",
-        ROOT_DIR / "tmp" / "real-world-memory" / "graphiti-zep-smoke",
-    )
-)
-WORK_DIR = Path(os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_WORK_DIR", REPORT_DIR / "work"))
-OUT = Path(os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_OUT", REPORT_DIR / "graphiti-zep-smoke.json"))
-MANIFEST_OUT = Path(
-    os.environ.get(
-        "ELF_GRAPHITI_ZEP_SMOKE_MANIFEST_OUT",
-        REPORT_DIR / "memory_projects_manifest.graphiti-zep-smoke.json",
-    )
-)
-SUMMARY_OUT = Path(os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_SUMMARY_OUT", REPORT_DIR / "summary.json"))
-REPORT_JSON = Path(
-    os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_REPORT_JSON", REPORT_DIR / "graphiti-zep-report.json")
-)
-REPORT_MD = Path(
-    os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_REPORT_MD", REPORT_DIR / "graphiti-zep-report.md")
-)
-FIXTURE_DIR = REPORT_DIR / "graphiti-zep-fixtures"
-LOG_DIR = REPORT_DIR / "logs"
-
-RUN_ID = os.environ.get(
-    "ELF_GRAPHITI_ZEP_SMOKE_RUN_ID",
-    f"graphiti-zep-docker-smoke-{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}",
-)
-RUN_LIVE = os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_RUN", "0") == "1"
-ALLOW_HOST = os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_ALLOW_HOST", "0") == "1"
-INSTALL_GRAPHITI = os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_INSTALL", "1") == "1"
-GRAPHITI_VERSION = os.environ.get("ELF_GRAPHITI_ZEP_VERSION", "0.21.0")
-GRAPHITI_PACKAGE = os.environ.get(
-    "ELF_GRAPHITI_ZEP_PACKAGE",
-    f"graphiti-core[falkordb]=={GRAPHITI_VERSION}",
-)
-GRAPHITI_REF = os.environ.get("ELF_GRAPHITI_ZEP_REF", f"pypi:{GRAPHITI_PACKAGE}")
-FALKORDB_HOST = os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_HOST", "graphiti-falkordb")
-FALKORDB_PORT = int(os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_PORT", "6379"))
-FALKORDB_DATABASE = os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_DATABASE", "elf_graphiti_zep_smoke")
-FALKORDB_USERNAME = os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_USERNAME", "")
-FALKORDB_PASSWORD = os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_PASSWORD", "")
-API_KEY = os.environ.get(
-    "ELF_GRAPHITI_ZEP_API_KEY",
-    os.environ.get("GRAPHITI_OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", "")),
-)
-API_BASE = os.environ.get("ELF_GRAPHITI_ZEP_API_BASE", os.environ.get("OPENAI_BASE_URL", ""))
-LLM_MODEL = os.environ.get("ELF_GRAPHITI_ZEP_LLM_MODEL", "gpt-4o-mini")
-EMBEDDING_MODEL = os.environ.get("ELF_GRAPHITI_ZEP_EMBEDDING_MODEL", "text-embedding-3-small")
-TIMEOUT_SECONDS = int(os.environ.get("ELF_GRAPHITI_ZEP_TIMEOUT_SECONDS", "900"))
-STARTUP_ATTEMPTS = int(os.environ.get("ELF_GRAPHITI_ZEP_STARTUP_ATTEMPTS", "30"))
-STARTUP_INTERVAL_SECONDS = float(os.environ.get("ELF_GRAPHITI_ZEP_STARTUP_INTERVAL_SECONDS", "2"))
-
-
-@dataclass
-class StatusState:
-    """Typed status for generated Graphiti/Zep smoke artifacts."""
-
-    setup: str = "blocked"
-    run: str = "not_encoded"
-    result: str = "blocked"
-    overall: str = "blocked"
-    evidence_class: str = "research_gate"
-    failure_class: str = "graphiti_zep_live_run_disabled"
-    failure_reason: str = (
-        "Graphiti/Zep temporal graph live run is opt-in; set "
-        "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 and provide explicit "
-        "provider configuration to attempt the Docker-local FalkorDB smoke."
-    )
-
-
-@dataclass
-class CommandRecord:
-    """Captured command result without secret-bearing environment values."""
-
-    label: str
-    command: list[str]
-    status: str
-    elapsed_ms: float
-    stdout_artifact: str | None
-    stderr_artifact: str | None
-    returncode: int | None
-    reason: str
-
-
-def utc_now() -> str:
-    """Return an RFC3339 UTC timestamp."""
-
-    return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
-
-
-def rel(path: Path) -> str:
-    """Return a repository-relative path when possible."""
-
-    try:
-        return str(path.resolve().relative_to(ROOT_DIR))
-    except ValueError:
-        return str(path)
-
-
-def mkdirs() -> None:
-    """Create output directories."""
-
-    for path in (REPORT_DIR, WORK_DIR, FIXTURE_DIR, LOG_DIR):
-        path.mkdir(parents=True, exist_ok=True)
-
-
-def write_json(path: Path, payload: Any) -> None:
-    """Write stable, pretty JSON."""
-
-    path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
-
-
-def run_scored_report(fixture_path: Path, manifest_path: Path, status: StatusState) -> dict[str, Any]:
-    """Score the generated temporal smoke fixture through the real-world job runner."""
-
-    run_cmd = [
-        "cargo",
-        "run",
-        "-p",
-        "elf-eval",
-        "--bin",
-        "real_world_job_benchmark",
-        "--",
-        "run",
-        "--fixtures",
-        str(fixture_path),
-        "--out",
-        str(REPORT_JSON),
-        "--run-id",
-        "real-world-memory-live-graphiti-zep",
-        "--adapter-id",
-        "graphiti_zep_temporal_smoke",
-        "--adapter-name",
-        "Graphiti/Zep Docker temporal smoke adapter",
-        "--adapter-behavior",
-        "docker_python_falkordb_temporal_smoke",
-        "--adapter-storage-status",
-        status.setup,
-        "--adapter-runtime-status",
-        status.overall,
-        "--adapter-notes",
-        "Generated by the Graphiti/Zep Docker temporal smoke; pass or wrong_result requires current and historical validity-window facts mapped to generated evidence ids, while provider/setup limits remain typed.",
-        "--external-adapter-manifest",
-        str(manifest_path),
-    ]
-    publish_cmd = [
-        "cargo",
-        "run",
-        "-p",
-        "elf-eval",
-        "--bin",
-        "real_world_job_benchmark",
-        "--",
-        "publish",
-        "--report",
-        str(REPORT_JSON),
-        "--out",
-        str(REPORT_MD),
-    ]
-
-    subprocess.run(run_cmd, cwd=ROOT_DIR, check=True)
-    subprocess.run(publish_cmd, cwd=ROOT_DIR, check=True)
-
-    report = json.loads(REPORT_JSON.read_text(encoding="utf-8"))
-
-    return {
-        "json": rel(REPORT_JSON),
-        "markdown": rel(REPORT_MD),
-        "summary": report.get("summary", {}),
-        "suites": report.get("suites", []),
-    }
-
-
-def scored_benchmark(report: dict[str, Any] | None) -> dict[str, Any]:
-    """Extract the post-score benchmark status from a real_world_job report."""
-
-    if report is None:
-        return {
-            "schema": "elf.scored_benchmark_status/v1",
-            "source": "real_world_job_benchmark",
-            "status": "pending",
-            "reason": "The smoke materialization was written before benchmark scoring completed.",
-        }
-
-    summary = report.get("summary", {})
-    counts = {
-        status: int(summary.get(status, 0) or 0)
-        for status in (
-            "pass",
-            "wrong_result",
-            "lifecycle_fail",
-            "incomplete",
-            "blocked",
-            "not_encoded",
-        )
-    }
-    status = next((name for name, count in counts.items() if name != "pass" and count > 0), "pass")
-
-    return {
-        "schema": "elf.scored_benchmark_status/v1",
-        "source": "real_world_job_benchmark",
-        "status": status,
-        "counts": counts,
-        "job_count": int(summary.get("job_count", 0) or 0),
-        "mean_score": summary.get("mean_score"),
-        "evidence_coverage": summary.get("evidence_coverage"),
-    }
-
-
-def command_available(command: str) -> bool:
-    """Return whether a command is on PATH."""
-
-    return shutil.which(command) is not None
-
-
-def dir_size(path: Path) -> int:
-    """Return total file size for a directory or file."""
-
-    if not path.exists():
-        return 0
-    if path.is_file():
-        return path.stat().st_size
-
-    return sum(item.stat().st_size for item in path.rglob("*") if item.is_file())
-
-
-def file_count(path: Path) -> int:
-    """Return file count for a directory."""
-
-    if not path.exists():
-        return 0
-
-    return sum(1 for item in path.rglob("*") if item.is_file())
-
-
-def temporal_facts() -> list[dict[str, Any]]:
-    """Return the generated-public temporal fact corpus."""
-
-    return [
-        {
-            "evidence_id": "graphiti-zep-old-owner",
-            "claim_id": "relation_historical_owner",
-            "source": "Team Delta",
-            "edge_name": "OWNED_REVIEW",
-            "target": "deployment method review",
-            "fact": "Team Delta owned deployment method review before 2026-06-06.",
-            "valid_at": "2026-06-05T00:00:00Z",
-            "invalid_at": "2026-06-08T00:00:00Z",
-            "created_at": "2026-06-05T00:00:00Z",
-            "current": False,
-        },
-        {
-            "evidence_id": "graphiti-zep-current-owner",
-            "claim_id": "relation_current_owner",
-            "source": "Team Echo",
-            "edge_name": "OWNS_REVIEW",
-            "target": "deployment method review",
-            "fact": "Team Echo owns deployment method review since 2026-06-08.",
-            "valid_at": "2026-06-08T00:00:00Z",
-            "invalid_at": None,
-            "created_at": "2026-06-08T00:00:00Z",
-            "current": True,
-        },
-        {
-            "evidence_id": "graphiti-zep-owner-rationale",
-            "claim_id": "relation_owner_update_rationale",
-            "source": "single-user production runbook scope",
-            "edge_name": "MOVED_OWNERSHIP_TO",
-            "target": "Team Echo",
-            "fact": "Ownership moved to Team Echo after single-user production runbook scope changed.",
-            "valid_at": "2026-06-08T00:05:00Z",
-            "invalid_at": None,
-            "created_at": "2026-06-08T00:05:00Z",
-            "current": True,
-        },
-    ]
-
-
-def command_to_json(record: CommandRecord) -> dict[str, Any]:
-    """Serialize a command record."""
-
-    return {
-        "label": record.label,
-        "status": record.status,
-        "command": record.command,
-        "elapsed_ms": round(record.elapsed_ms, 3),
-        "stdout_artifact": record.stdout_artifact,
-        "stderr_artifact": record.stderr_artifact,
-        "returncode": record.returncode,
-        "reason": record.reason,
-    }
-
-
-def run_command(
-    label: str,
-    command: list[str],
-    cwd: Path,
-    timeout: int = TIMEOUT_SECONDS,
-    extra_env: dict[str, str] | None = None,
-) -> CommandRecord:
-    """Run a subprocess and capture stdout/stderr artifacts."""
-
-    cwd.mkdir(parents=True, exist_ok=True)
-    stdout_path = LOG_DIR / f"{label}.stdout.log"
-    stderr_path = LOG_DIR / f"{label}.stderr.log"
-    env = os.environ.copy()
-
-    if extra_env:
-        env.update(extra_env)
-
-    started = time.monotonic()
-    try:
-        proc = subprocess.run(
-            command,
-            cwd=cwd,
-            env=env,
-            text=True,
-            capture_output=True,
-            timeout=timeout,
-            check=False,
-        )
-        elapsed_ms = (time.monotonic() - started) * 1000
-        stdout_path.write_text(proc.stdout, encoding="utf-8")
-        stderr_path.write_text(proc.stderr, encoding="utf-8")
-        status = "pass" if proc.returncode == 0 else "incomplete"
-        reason = "Command completed." if proc.returncode == 0 else f"Command exited {proc.returncode}."
-
-        return CommandRecord(
-            label=label,
-            command=command,
-            status=status,
-            elapsed_ms=elapsed_ms,
-            stdout_artifact=rel(stdout_path),
-            stderr_artifact=rel(stderr_path),
-            returncode=proc.returncode,
-            reason=reason,
-        )
-    except subprocess.TimeoutExpired as err:
-        elapsed_ms = (time.monotonic() - started) * 1000
-        stdout_path.write_text(err.stdout or "", encoding="utf-8")
-        stderr_path.write_text(err.stderr or "", encoding="utf-8")
-
-        return CommandRecord(
-            label=label,
-            command=command,
-            status="incomplete",
-            elapsed_ms=elapsed_ms,
-            stdout_artifact=rel(stdout_path),
-            stderr_artifact=rel(stderr_path),
-            returncode=None,
-            reason=f"Command timed out after {timeout} seconds.",
-        )
-
-
-def wait_for_falkordb(command_records: list[CommandRecord]) -> bool:
-    """Poll the configured FalkorDB TCP endpoint."""
-
-    started = time.monotonic()
-    attempts: list[dict[str, Any]] = []
-
-    for attempt in range(1, STARTUP_ATTEMPTS + 1):
-        try:
-            with socket.create_connection((FALKORDB_HOST, FALKORDB_PORT), timeout=2):
-                elapsed_ms = (time.monotonic() - started) * 1000
-                attempts.append({"attempt": attempt, "status": "pass", "elapsed_ms": round(elapsed_ms, 3)})
-                path = LOG_DIR / "falkordb-startup-attempts.json"
-                write_json(path, attempts)
-                command_records.append(
-                    CommandRecord(
-                        label="falkordb-startup",
-                        command=["tcp-connect", FALKORDB_HOST, str(FALKORDB_PORT)],
-                        status="pass",
-                        elapsed_ms=elapsed_ms,
-                        stdout_artifact=rel(path),
-                        stderr_artifact=None,
-                        returncode=0,
-                        reason="FalkorDB TCP endpoint accepted a connection.",
-                    )
-                )
-                return True
-        except OSError as err:
-            attempts.append({"attempt": attempt, "status": "incomplete", "reason": str(err)})
-            time.sleep(STARTUP_INTERVAL_SECONDS)
-
-    elapsed_ms = (time.monotonic() - started) * 1000
-    path = LOG_DIR / "falkordb-startup-attempts.json"
-    write_json(path, attempts)
-    command_records.append(
-        CommandRecord(
-            label="falkordb-startup",
-            command=["tcp-connect", FALKORDB_HOST, str(FALKORDB_PORT)],
-            status="incomplete",
-            elapsed_ms=elapsed_ms,
-            stdout_artifact=rel(path),
-            stderr_artifact=None,
-            returncode=None,
-            reason="FalkorDB TCP endpoint did not become reachable.",
-        )
-    )
-    return False
-
-
-def init_graphiti(command_records: list[CommandRecord]) -> tuple[bool, Path]:
-    """Create a venv and install Graphiti with FalkorDB support."""
-
-    venv_dir = WORK_DIR / ".venv"
-    python = venv_dir / "bin" / "python"
-
-    if INSTALL_GRAPHITI:
-        venv_record = run_command("python-venv", [sys.executable, "-m", "venv", str(venv_dir)], WORK_DIR)
-        command_records.append(venv_record)
-        if venv_record.status != "pass":
-            return False, python
-
-        install_record = run_command(
-            "graphiti-install",
-            [str(python), "-m", "pip", "install", "--disable-pip-version-check", GRAPHITI_PACKAGE],
-            WORK_DIR,
-        )
-        command_records.append(install_record)
-        if install_record.status != "pass":
-            return False, python
-    elif not python.exists():
-        command_records.append(
-            CommandRecord(
-                label="graphiti-install",
-                command=["graphiti-core"],
-                status="incomplete",
-                elapsed_ms=0.0,
-                stdout_artifact=None,
-                stderr_artifact=None,
-                returncode=None,
-                reason="Graphiti install was disabled and no venv python exists.",
-            )
-        )
-        return False, python
-
-    return True, python
-
-
-def write_live_runner(path: Path) -> None:
-    """Write the isolated Graphiti execution script."""
-
-    payload = {
-        "run_id": RUN_ID,
-        "facts": temporal_facts(),
-        "query": "Who currently owns deployment method review, and who owned it historically?",
-        "falkordb": {
-            "host": FALKORDB_HOST,
-            "port": FALKORDB_PORT,
-            "database": FALKORDB_DATABASE,
-        },
-        "models": {
-            "llm": LLM_MODEL,
-            "embedding": EMBEDDING_MODEL,
-            "api_base": API_BASE,
-        },
-    }
-    input_path = WORK_DIR / "graphiti-live-input.json"
-    output_path = WORK_DIR / "graphiti-live-output.json"
-    write_json(input_path, payload)
-    script = f"""
-import asyncio
-import json
-import os
-import uuid
-from datetime import datetime
-from pathlib import Path
-
-from graphiti_core import Graphiti
-from graphiti_core.driver.falkordb_driver import FalkorDriver
-from graphiti_core.edges import EntityEdge
-from graphiti_core.nodes import EntityNode
-
-
-INPUT = Path({str(input_path)!r})
-OUTPUT = Path({str(output_path)!r})
-
-
-def parse_dt(value):
-    if value is None:
-        return None
-    return datetime.fromisoformat(value.replace("Z", "+00:00"))
-
-
-async def main():
-    data = json.loads(INPUT.read_text(encoding="utf-8"))
-    config = data["falkordb"]
-    driver = FalkorDriver(
-        host=config["host"],
-        port=config["port"],
-        username=os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_USERNAME") or None,
-        password=os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_PASSWORD") or None,
-        database=config.get("database") or "default_db",
-    )
-    graphiti = Graphiti(graph_driver=driver)
-    try:
-        await graphiti.build_indices_and_constraints()
-        inserted = []
-        for fact in data["facts"]:
-            group_id = data["run_id"]
-            source_uuid = str(uuid.uuid5(uuid.NAMESPACE_URL, group_id + ":source:" + fact["source"]))
-            target_uuid = str(uuid.uuid5(uuid.NAMESPACE_URL, group_id + ":target:" + fact["target"]))
-            edge_uuid = str(uuid.uuid5(uuid.NAMESPACE_URL, group_id + ":edge:" + fact["evidence_id"]))
-            source_node = EntityNode(uuid=source_uuid, name=fact["source"], group_id=group_id)
-            target_node = EntityNode(uuid=target_uuid, name=fact["target"], group_id=group_id)
-            edge = EntityEdge(
-                uuid=edge_uuid,
-                group_id=group_id,
-                source_node_uuid=source_uuid,
-                target_node_uuid=target_uuid,
-                created_at=parse_dt(fact["created_at"]),
-                name=fact["edge_name"],
-                fact=fact["fact"],
-                valid_at=parse_dt(fact["valid_at"]),
-                invalid_at=parse_dt(fact.get("invalid_at")),
-            )
-            await graphiti.add_triplet(source_node, edge, target_node)
-            inserted.append({{"evidence_id": fact["evidence_id"], "uuid": edge_uuid}})
-
-        results = await graphiti.search(data["query"])
-        serialized = []
-        for edge in results:
-            serialized.append({{
-                "uuid": getattr(edge, "uuid", None),
-                "name": getattr(edge, "name", None),
-                "fact": getattr(edge, "fact", None),
-                "valid_at": str(getattr(edge, "valid_at", "")) if getattr(edge, "valid_at", None) else None,
-                "invalid_at": str(getattr(edge, "invalid_at", "")) if getattr(edge, "invalid_at", None) else None,
-                "source_node_uuid": getattr(edge, "source_node_uuid", None),
-                "target_node_uuid": getattr(edge, "target_node_uuid", None),
-            }})
-
-        OUTPUT.write_text(json.dumps({{"inserted": inserted, "results": serialized}}, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
-    finally:
-        await graphiti.close()
-
-
-asyncio.run(main())
-"""
-    path.write_text(textwrap.dedent(script).lstrip(), encoding="utf-8")
-
-
-def run_graphiti(python: Path, command_records: list[CommandRecord]) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
-    """Run the Graphiti live worker and return inserted/search result facts."""
-
-    runner = WORK_DIR / "graphiti_live_runner.py"
-    write_live_runner(runner)
-    env = {
-        "OPENAI_API_KEY": API_KEY,
-        "MODEL_NAME": LLM_MODEL,
-        "LLM_MODEL": LLM_MODEL,
-        "EMBEDDING_MODEL": EMBEDDING_MODEL,
-    }
-
-    if API_BASE:
-        env["OPENAI_BASE_URL"] = API_BASE
-    if FALKORDB_USERNAME:
-        env["ELF_GRAPHITI_ZEP_FALKORDB_USERNAME"] = FALKORDB_USERNAME
-    if FALKORDB_PASSWORD:
-        env["ELF_GRAPHITI_ZEP_FALKORDB_PASSWORD"] = FALKORDB_PASSWORD
-
-    record = run_command("graphiti-live-run", [str(python), str(runner)], WORK_DIR, extra_env=env)
-    command_records.append(record)
-
-    output_path = WORK_DIR / "graphiti-live-output.json"
-    if record.status != "pass" or not output_path.exists():
-        return [], []
-
-    payload = json.loads(output_path.read_text(encoding="utf-8"))
-    return payload.get("inserted", []), payload.get("results", [])
-
-
-def map_observed_facts(results: list[dict[str, Any]], facts: list[dict[str, Any]]) -> dict[str, Any]:
-    """Map Graphiti search results back to expected evidence ids."""
-
-    expected_by_id = {fact["evidence_id"]: fact for fact in facts}
-    mappings: list[dict[str, Any]] = []
-    mapped_ids: list[str] = []
-
-    for fact in facts:
-        matched = [
-            result
-            for result in results
-            if isinstance(result.get("fact"), str) and fact["fact"].lower() in result["fact"].lower()
-        ]
-        if matched:
-            result = matched[0]
-            mapped_ids.append(fact["evidence_id"])
-            mappings.append(
-                {
-                    "evidence_id": fact["evidence_id"],
-                    "claim_id": fact["claim_id"],
-                    "status": "pass",
-                    "uuid": result.get("uuid"),
-                    "fact": result.get("fact"),
-                    "valid_at": result.get("valid_at"),
-                    "invalid_at": result.get("invalid_at"),
-                    "expected_valid_at": fact["valid_at"],
-                    "expected_invalid_at": fact["invalid_at"],
-                    "current": fact["current"],
-                }
-            )
-        else:
-            mappings.append(
-                {
-                    "evidence_id": fact["evidence_id"],
-                    "claim_id": fact["claim_id"],
-                    "status": "blocked",
-                    "expected_valid_at": fact["valid_at"],
-                    "expected_invalid_at": fact["invalid_at"],
-                    "current": fact["current"],
-                }
-            )
-
-    current_ok = any(
-        item["evidence_id"] == "graphiti-zep-current-owner"
-        and item["status"] == "pass"
-        and not item.get("invalid_at")
-        for item in mappings
-    )
-    historical_ok = any(
-        item["evidence_id"] == "graphiti-zep-old-owner"
-        and item["status"] == "pass"
-        and item.get("invalid_at")
-        for item in mappings
-    )
-    rationale_ok = "graphiti-zep-owner-rationale" in mapped_ids
-    required_ids = list(expected_by_id)
-    missing_ids = [evidence_id for evidence_id in required_ids if evidence_id not in mapped_ids]
-
-    if current_ok and historical_ok and rationale_ok:
-        status = "pass"
-        reason = "Graphiti/Zep search results mapped current, historical, and rationale facts with validity windows."
-    else:
-        status = "wrong_result"
-        reason = (
-            "Graphiti/Zep search results did not map all required temporal facts with expected validity "
-            f"windows; missing={', '.join(missing_ids) or 'none'}."
-        )
-
-    return {
-        "status": status,
-        "reason": reason,
-        "expected_evidence_ids": required_ids,
-        "mapped_evidence_ids": mapped_ids,
-        "facts": mappings,
-    }
-
-
-def write_fixture(facts: list[dict[str, Any]], status: StatusState, mapping: dict[str, Any]) -> Path:
-    """Write a generated memory_evolution fixture for the smoke."""
-
-    fixture_path = FIXTURE_DIR / "memory_evolution" / "graphiti_zep_temporal_validity.json"
-    mapped_ids = mapping.get("mapped_evidence_ids", [])
-    claims = []
-
-    if status.result == "pass":
-        claims = [
-            {
-                "claim_id": "relation_current_owner",
-                "text": "Team Echo currently owns deployment method review.",
-                "evidence_ids": [
-                    "graphiti-zep-current-owner",
-                    "graphiti-zep-old-owner",
-                    "graphiti-zep-owner-rationale",
-                ],
-                "confidence": "derived_from_graphiti_temporal_search",
-            },
-            {
-                "claim_id": "relation_historical_owner",
-                "text": "Team Delta owned deployment method review historically.",
-                "evidence_ids": ["graphiti-zep-old-owner"],
-                "confidence": "derived_from_graphiti_temporal_search",
-            },
-            {
-                "claim_id": "relation_owner_update_rationale",
-                "text": "Ownership moved after single-user production runbook scope changed.",
-                "evidence_ids": ["graphiti-zep-owner-rationale"],
-                "confidence": "derived_from_graphiti_temporal_search",
-            },
-        ]
-
-    fixture: dict[str, Any] = {
-        "schema": "elf.real_world_job/v1",
-        "job_id": "graphiti-zep-temporal-validity-001",
-        "suite": "memory_evolution",
-        "title": "Map Graphiti/Zep temporal validity windows to current and historical relation facts",
-        "corpus": {
-            "corpus_id": "graphiti-zep-generated-public-smoke",
-            "profile": "generated_public",
-            "items": [
-                {
-                    "evidence_id": fact["evidence_id"],
-                    "kind": "temporal_fact",
-                    "text": fact["fact"],
-                    "source_ref": {
-                        "schema": "source_ref/v1",
-                        "resolver": "graphiti_zep_smoke/v1",
-                        "ref": {
-                            "run_id": RUN_ID,
-                            "evidence_id": fact["evidence_id"],
-                            "valid_at": fact["valid_at"],
-                            "invalid_at": fact["invalid_at"],
-                        },
-                    },
-                    "created_at": fact["created_at"],
-                }
-                for fact in facts
-            ],
-            "adapter_response": {
-                "adapter_id": "graphiti_zep_temporal_smoke",
-                "answer": {
-                    "content": (
-                        "Team Echo currently owns deployment method review. Team Delta owned it "
-                        "historically, and the move followed the single-user production runbook scope change."
-                        if claims
-                        else ""
-                    ),
-                    "claims": claims,
-                    "evidence_ids": mapped_ids,
-                    "latency_ms": 0.0,
-                    "cost": {
-                        "currency": "USD",
-                        "amount": 0.0,
-                        "input_tokens": 0,
-                        "output_tokens": 0,
-                    },
-                },
-            },
-        },
-        "timeline": [
-            {
-                "event_id": "graphiti-zep-old-owner",
-                "ts": "2026-06-05T00:00:00Z",
-                "actor": "agent",
-                "action": "recorded_relation",
-                "evidence_ids": ["graphiti-zep-old-owner"],
-                "summary": "Team Delta was the historical owner.",
-            },
-            {
-                "event_id": "graphiti-zep-current-owner",
-                "ts": "2026-06-08T00:00:00Z",
-                "actor": "agent",
-                "action": "updated_memory",
-                "evidence_ids": ["graphiti-zep-current-owner", "graphiti-zep-owner-rationale"],
-                "summary": "Team Echo became the current owner after the scope changed.",
-            },
-        ],
-        "prompt": {
-            "role": "user",
-            "content": "Who currently owns deployment method review, and who owned it historically?",
-            "job_mode": "answer",
-            "constraints": ["cite_evidence", "distinguish_current_from_historical"],
-        },
-        "expected_answer": {
-            "must_include": [
-                {
-                    "claim_id": "relation_current_owner",
-                    "text": "Team Echo currently owns deployment method review.",
-                },
-                {
-                    "claim_id": "relation_historical_owner",
-                    "text": "Team Delta owned deployment method review historically.",
-                },
-            ],
-            "must_not_include": ["Team Delta currently owns deployment method review."],
-            "evidence_links": {
-                "relation_current_owner": [
-                    "graphiti-zep-current-owner",
-                    "graphiti-zep-old-owner",
-                    "graphiti-zep-owner-rationale",
-                ],
-                "relation_historical_owner": ["graphiti-zep-old-owner"],
-                "relation_owner_update_rationale": ["graphiti-zep-owner-rationale"],
-            },
-            "answer_type": "direct_answer",
-            "accepted_alternates": [],
-            "requires_caveat": False,
-            "requires_refusal": False,
-        },
-        "required_evidence": [
-            {
-                "evidence_id": "graphiti-zep-current-owner",
-                "claim_id": "relation_current_owner",
-                "requirement": "cite",
-                "quote": "Team Echo owns deployment method review",
-            },
-            {
-                "evidence_id": "graphiti-zep-old-owner",
-                "claim_id": "relation_historical_owner",
-                "requirement": "cite",
-                "quote": "Team Delta owned deployment method review",
-            },
-        ],
-        "negative_traps": [
-            {
-                "trap_id": "old-owner-as-current",
-                "type": "stale_fact",
-                "evidence_ids": ["graphiti-zep-old-owner"],
-                "failure_if_used": False,
-            }
-        ],
-        "scoring_rubric": {
-            "dimensions": {
-                "lifecycle_behavior": {
-                    "weight": 0.4,
-                    "max_points": 1.0,
-                    "criteria": "Requires current-only versus historical temporal validity for relation facts.",
-                },
-                "answer_correctness": {
-                    "weight": 0.25,
-                    "max_points": 1.0,
-                    "criteria": "Would identify current and historical owners separately.",
-                },
-                "evidence_grounding": {
-                    "weight": 0.2,
-                    "max_points": 1.0,
-                    "criteria": "Would cite both current and historical relation evidence.",
-                },
-                "trap_avoidance": {
-                    "weight": 0.15,
-                    "max_points": 1.0,
-                    "criteria": "Would not report the historical owner as current.",
-                },
-            },
-            "pass_threshold": 0.8,
-            "hard_fail_rules": [],
-        },
-        "allowed_uncertainty": {
-            "can_answer_unknown": False,
-            "acceptable_phrases": ["Graphiti/Zep smoke did not return temporal facts."],
-            "fallback_action": "score_temporal_relation_behavior",
-        },
-        "memory_evolution": {
-            "current_evidence_ids": ["graphiti-zep-current-owner"],
-            "historical_evidence_ids": ["graphiti-zep-old-owner"],
-            "stale_trap_ids": ["old-owner-as-current"],
-            "conflicts": [
-                {
-                    "conflict_id": "relation-owner-current-historical",
-                    "claim_id": "relation_current_owner",
-                    "current_evidence_id": "graphiti-zep-current-owner",
-                    "historical_evidence_id": "graphiti-zep-old-owner",
-                    "resolved_by_evidence_id": "graphiti-zep-owner-rationale",
-                }
-            ],
-            "update_rationale": {
-                "claim_id": "relation_owner_update_rationale",
-                "evidence_ids": ["graphiti-zep-owner-rationale"],
-                "available": True,
-            },
-            "temporal_validity": {"required": True, "encoded": True},
-        },
-        "tags": ["external_adapter", "generated_public", "memory_evolution", "reference_graphiti_zep_temporal"],
-    }
-
-    if status.result in {"blocked", "incomplete", "not_encoded"}:
-        fixture["encoding"] = {"status": status.result, "reason": status.failure_reason}
-
-    write_json(fixture_path, fixture)
-
-    return fixture_path
-
-
-def write_materialization(
-    status: StatusState,
-    facts: list[dict[str, Any]],
-    fixture_path: Path,
-    command_records: list[CommandRecord],
-    inserted: list[dict[str, Any]],
-    search_results: list[dict[str, Any]],
-    mapping: dict[str, Any],
-    started_at: float,
-    report: dict[str, Any] | None = None,
-) -> dict[str, Any]:
-    """Write the primary smoke artifact."""
-
-    elapsed_ms = (time.monotonic() - started_at) * 1000
-    payload = {
-        "schema": "elf.graphiti_zep_temporal_smoke/v1",
-        "generated_at": utc_now(),
-        "run_id": RUN_ID,
-        "adapter_id": "graphiti_zep_temporal_smoke",
-        "project": "Graphiti/Zep",
-        "status": status.overall,
-        "materialization_status": {
-            "source": "smoke_materialization",
-            "setup": status.setup,
-            "run": status.run,
-            "result": status.result,
-            "overall": status.overall,
-            "failure_class": status.failure_class,
-            "failure_reason": status.failure_reason,
-        },
-        "scored_benchmark": scored_benchmark(report),
-        "evidence_class": status.evidence_class,
-        "failure": {
-            "class": status.failure_class or None,
-            "reason": status.failure_reason or None,
-        },
-        "artifacts": {
-            "materialization": rel(OUT),
-            "manifest": rel(MANIFEST_OUT),
-            "summary": rel(SUMMARY_OUT),
-            "fixture": rel(fixture_path),
-            "scored_report_json": rel(REPORT_JSON),
-            "scored_report_markdown": rel(REPORT_MD),
-        },
-        "docker_boundary": {
-            "compose_file": "docker-compose.baseline.yml",
-            "service_profile": "graphiti-zep",
-            "graph_store_service": "graphiti-falkordb",
-            "runner_service": "baseline-runner",
-            "runner": "scripts/graphiti-zep-docker-temporal-smoke.py",
-            "host_global_installs_required": False,
-            "docker_only": True,
-        },
-        "provider_configuration": {
-            "package": GRAPHITI_REF,
-            "package_spec": GRAPHITI_PACKAGE,
-            "llm_model": LLM_MODEL,
-            "embedding_model": EMBEDDING_MODEL,
-            "api_base_configured": bool(API_BASE),
-            "api_key_provided": bool(API_KEY),
-            "operator_owned_provider_credentials_used": False,
-            "live_run_enabled": RUN_LIVE,
-            "falkordb": {
-                "host": FALKORDB_HOST,
-                "port": FALKORDB_PORT,
-                "database": FALKORDB_DATABASE,
-                "username_configured": bool(FALKORDB_USERNAME),
-                "password_configured": bool(FALKORDB_PASSWORD),
-            },
-        },
-        "resource_bounds": {
-            "fact_count": len(facts),
-            "timeout_seconds": TIMEOUT_SECONDS,
-            "elapsed_ms": round(elapsed_ms, 3),
-            "work_dir_size_bytes": dir_size(WORK_DIR),
-            "work_dir_file_count": file_count(WORK_DIR),
-        },
-        "commands": [command_to_json(record) for record in command_records],
-        "temporal_facts": facts,
-        "inserted_facts": inserted,
-        "search_results": search_results,
-        "evidence_mapping": mapping,
-    }
-    write_json(OUT, payload)
-
-    return payload
-
-
-def write_manifest(status: StatusState) -> dict[str, Any]:
-    """Write a generated external adapter manifest for this smoke."""
-
-    manifest = {
-        "schema": "elf.real_world_external_adapter_manifest/v1",
-        "manifest_id": f"graphiti-zep-temporal-smoke-{RUN_ID}",
-        "docker_isolation": {
-            "default": True,
-            "compose_file": "docker-compose.baseline.yml",
-            "runner": "scripts/graphiti-zep-docker-temporal-smoke.py",
-            "artifact_dir": "tmp/real-world-memory/graphiti-zep-smoke",
-            "host_global_installs_required": False,
-            "notes": [
-                f"Generated by the Graphiti/Zep Docker smoke at {utc_now()}.",
-                "The smoke uses generated public temporal facts and records typed setup/runtime failures.",
-            ],
-        },
-        "adapters": [
-            {
-                "adapter_id": "graphiti_zep_temporal_smoke",
-                "project": "Graphiti/Zep",
-                "adapter_kind": "docker_python_falkordb_temporal_smoke",
-                "evidence_class": status.evidence_class,
-                "docker_default": True,
-                "host_global_installs_required": False,
-                "overall_status": status.overall,
-                "setup": {
-                    "status": status.setup,
-                    "evidence": "The smoke runs inside the baseline Docker runner and uses Docker-local FalkorDB plus a container-local Python venv.",
-                    "command": "cargo make smoke-graphiti-zep-docker-temporal",
-                    "artifact": rel(OUT),
-                },
-                "run": {
-                    "status": status.run,
-                    "evidence": "The live path adds generated temporal fact triples and searches Graphiti/Zep for UUID, fact, valid_at, invalid_at, and source node evidence.",
-                    "command": "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make smoke-graphiti-zep-docker-temporal",
-                    "artifact": rel(OUT),
-                },
-                "result": {
-                    "status": status.result,
-                    "evidence": status.failure_reason
-                    if status.failure_reason
-                    else "Graphiti/Zep temporal search mapped current and historical facts to validity windows.",
-                    "artifact": rel(OUT),
-                },
-                "capabilities": [
-                    {
-                        "capability": "docker_falkordb_setup",
-                        "status": status.setup,
-                        "evidence": "The task starts a Docker Compose FalkorDB profile only when explicitly requested, and uses no host-global graph database.",
-                    },
-                    {
-                        "capability": "temporal_fact_triple_ingest",
-                        "status": status.run,
-                        "evidence": "The live worker uses Graphiti fact triples for current, historical, and rationale facts with validity windows.",
-                    },
-                    {
-                        "capability": "validity_window_evidence_mapping",
-                        "status": status.result,
-                        "evidence": "Search output UUID, fact text, valid_at, invalid_at, and node ids are mapped to memory_evolution expected evidence ids.",
-                    },
-                    {
-                        "capability": "quality_or_scale_claim",
-                        "status": "not_encoded",
-                        "evidence": "The smoke does not claim broad graph-memory quality, large-corpus behavior, managed Zep service behavior, or private-corpus performance.",
-                    },
-                ],
-                "suites": [
-                    {
-                        "suite_id": "memory_evolution",
-                        "status": status.result,
-                        "evidence": "Only generated current-versus-historical temporal relation facts are represented.",
-                    },
-                    {
-                        "suite_id": "retrieval",
-                        "status": status.run if status.run != "pass" else "not_encoded",
-                        "evidence": "Hybrid retrieval reachability is exercised by the live search, but broad retrieval quality scoring is not encoded.",
-                    },
-                    {
-                        "suite_id": "production_ops",
-                        "status": "not_encoded",
-                        "evidence": "The smoke records setup and provider boundaries but does not encode backup, restore, private corpus, or hosted-service operations.",
-                    },
-                ],
-                "scenarios": [
-                    {
-                        "scenario_id": "temporal_validity_window_mapping",
-                        "suite_id": "memory_evolution",
-                        "status": status.result,
-                        "elf_position": "untested",
-                        "comparison_outcome": "blocked"
-                        if status.result == "blocked"
-                        else "not_tested",
-                        "evidence": status.failure_reason
-                        if status.failure_reason
-                        else "Graphiti/Zep temporal search mapped generated current and historical relation facts to validity windows and evidence ids.",
-                        "command": "cargo make smoke-graphiti-zep-docker-temporal",
-                        "artifact": rel(OUT),
-                    }
-                ],
-                "evidence": [
-                    {"kind": "artifact", "ref": rel(OUT), "status": status.result},
-                    {"kind": "manifest", "ref": rel(MANIFEST_OUT), "status": status.overall},
-                    {"kind": "source", "ref": "https://github.com/getzep/graphiti", "status": "real"},
-                    {
-                        "kind": "source",
-                        "ref": "https://help.getzep.com/graphiti/getting-started/quick-start",
-                        "status": "real",
-                    },
-                    {
-                        "kind": "source",
-                        "ref": "https://help.getzep.com/graphiti/configuration/falkor-db-configuration",
-                        "status": "real",
-                    },
-                    {
-                        "kind": "source",
-                        "ref": "https://help.getzep.com/graphiti/working-with-data/adding-fact-triples",
-                        "status": "real",
-                    },
-                ],
-                "execution_metadata": {
-                    "sources": [
-                        {
-                            "label": "Graphiti repository",
-                            "url": "https://github.com/getzep/graphiti",
-                            "evidence": "Official source for the open-source temporal context graph engine.",
-                        },
-                        {
-                            "label": "Graphiti quick start",
-                            "url": "https://help.getzep.com/graphiti/getting-started/quick-start",
-                            "evidence": "Official search output examples include UUID, fact, valid_at, and invalid_at fields.",
-                        },
-                        {
-                            "label": "Graphiti FalkorDB configuration",
-                            "url": "https://help.getzep.com/graphiti/configuration/falkor-db-configuration",
-                            "evidence": "Official Docker-local FalkorDB setup and Python driver reference.",
-                        },
-                        {
-                            "label": "Graphiti fact triples",
-                            "url": "https://help.getzep.com/graphiti/working-with-data/adding-fact-triples",
-                            "evidence": "Official manual fact-triple ingest contract.",
-                        },
-                    ],
-                    "setup_path": "Run cargo make smoke-graphiti-zep-docker-temporal for a typed artifact; set ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 with explicit provider configuration for a live attempt.",
-                    "runtime_boundary": "docker-compose.baseline.yml baseline-runner plus graphiti-zep FalkorDB profile, container-local Python venv, generated public temporal facts, and report artifacts under tmp/real-world-memory/graphiti-zep-smoke.",
-                    "resource_expectation": f"Graphiti package {GRAPHITI_REF}, fact_count=3, timeout_seconds={TIMEOUT_SECONDS}, FalkorDB host={FALKORDB_HOST}:{FALKORDB_PORT}.",
-                    "retry_guidance": [
-                        "Default command records a typed blocked artifact without model calls.",
-                        "Enable the live path only with Docker-local FalkorDB and explicit provider configuration.",
-                        "Treat missing validity windows or unmapped current/historical facts as wrong_result, not pass.",
-                    ],
-                    "research_depth": "D2 feasibility plus XY-888 Docker temporal smoke implementation; generated artifact decides live evidence class.",
-                },
-                "notes": [
-                    "The checked-in manifest record remains research_gate; generated smoke artifacts carry live status.",
-                    "Failure before Graphiti search output remains typed as blocked or incomplete.",
-                    "The smoke does not use a hosted Zep service, private corpora, or unrecorded provider credentials.",
-                ],
-            }
-        ],
-    }
-    write_json(MANIFEST_OUT, manifest)
-
-    return manifest
-
-
-def write_summary(materialization: dict[str, Any], manifest: dict[str, Any], report: dict[str, Any]) -> None:
-    """Write a small summary artifact."""
-
-    write_json(
-        SUMMARY_OUT,
-        {
-            "schema": "elf.graphiti_zep_temporal_smoke_summary/v1",
-            "generated_at": utc_now(),
-            "adapter_id": "graphiti_zep_temporal_smoke",
-            "evidence_class": materialization["evidence_class"],
-            "status_boundary": {
-                "materialization": "setup/run/evidence-mapping state emitted by the smoke runner",
-                "manifest": "external adapter declaration consumed by the scorer",
-                "scored_benchmark": "post-score real_world_job outcome; use this for quality status",
-            },
-            "scored_benchmark": materialization["scored_benchmark"],
-            "materialization": materialization,
-            "manifest": {
-                "json": rel(MANIFEST_OUT),
-                "status_source": "external_adapter_manifest_pre_score",
-                "summary": manifest["adapters"][0]["overall_status"],
-                "suites": manifest["adapters"][0]["suites"],
-            },
-            "report": report,
-        },
-    )
-
-
-def main() -> int:
-    """Run the smoke and always emit typed artifacts when possible."""
-
-    started_at = time.monotonic()
-    mkdirs()
-    status = StatusState()
-    command_records: list[CommandRecord] = []
-    facts = temporal_facts()
-    inserted: list[dict[str, Any]] = []
-    search_results: list[dict[str, Any]] = []
-    mapping: dict[str, Any] = {
-        "status": "blocked",
-        "reason": status.failure_reason,
-        "expected_evidence_ids": [fact["evidence_id"] for fact in facts],
-        "mapped_evidence_ids": [],
-        "facts": [
-            {
-                "evidence_id": fact["evidence_id"],
-                "claim_id": fact["claim_id"],
-                "status": "blocked",
-                "expected_valid_at": fact["valid_at"],
-                "expected_invalid_at": fact["invalid_at"],
-                "current": fact["current"],
-            }
-            for fact in facts
-        ],
-    }
-
-    if not Path("/.dockerenv").exists() and not ALLOW_HOST:
-        status.setup = "incomplete"
-        status.result = "incomplete"
-        status.overall = "incomplete"
-        status.failure_class = "not_running_in_docker"
-        status.failure_reason = "Graphiti/Zep smoke must run inside Docker; use cargo make smoke-graphiti-zep-docker-temporal."
-        mapping["status"] = status.result
-        mapping["reason"] = status.failure_reason
-    elif not command_available("python3"):
-        status.setup = "incomplete"
-        status.result = "incomplete"
-        status.overall = "incomplete"
-        status.failure_class = "python_missing"
-        status.failure_reason = "python3 is required for the Graphiti/Zep smoke runner."
-        mapping["status"] = status.result
-        mapping["reason"] = status.failure_reason
-    elif not RUN_LIVE:
-        pass
-    elif not API_KEY:
-        status.setup = "blocked"
-        status.run = "not_encoded"
-        status.result = "blocked"
-        status.overall = "blocked"
-        status.failure_class = "provider_api_key_missing"
-        status.failure_reason = "Graphiti/Zep live temporal search requires an explicit provider API key; no hosted Zep service or unrecorded provider credentials were used."
-        mapping["reason"] = status.failure_reason
-    elif not wait_for_falkordb(command_records):
-        status.setup = "incomplete"
-        status.run = "not_encoded"
-        status.result = "incomplete"
-        status.overall = "incomplete"
-        status.failure_class = "falkordb_unreachable"
-        status.failure_reason = "Docker-local FalkorDB did not become reachable for the Graphiti/Zep smoke."
-        mapping["status"] = status.result
-        mapping["reason"] = status.failure_reason
-    else:
-        installed, python = init_graphiti(command_records)
-        if not installed:
-            status.setup = "incomplete"
-            status.run = "not_encoded"
-            status.result = "incomplete"
-            status.overall = "incomplete"
-            status.failure_class = "graphiti_setup_failed"
-            status.failure_reason = "Graphiti installation failed inside the Docker runner."
-            mapping["status"] = status.result
-            mapping["reason"] = status.failure_reason
-        else:
-            status.setup = "pass"
-            inserted, search_results = run_graphiti(python, command_records)
-
-            if not search_results:
-                status.run = "incomplete"
-                status.result = "incomplete"
-                status.overall = "incomplete"
-                status.failure_class = "graphiti_temporal_search_failed"
-                status.failure_reason = "Graphiti/Zep did not return temporal search results for the generated fact corpus."
-                mapping["status"] = status.result
-                mapping["reason"] = status.failure_reason
-            else:
-                status.run = "pass"
-                status.evidence_class = "live_real_world"
-                mapping = map_observed_facts(search_results, facts)
-                if mapping["status"] == "pass":
-                    status.result = "pass"
-                    status.overall = "pass"
-                    status.failure_class = ""
-                    status.failure_reason = ""
-                else:
-                    status.result = "wrong_result"
-                    status.overall = "wrong_result"
-                    status.failure_class = "graphiti_temporal_mapping_failed"
-                    status.failure_reason = mapping["reason"]
-
-    fixture_path = write_fixture(facts, status, mapping)
-    materialization = write_materialization(
-        status,
-        facts,
-        fixture_path,
-        command_records,
-        inserted,
-        search_results,
-        mapping,
-        started_at,
-    )
-    manifest = write_manifest(status)
-    report = run_scored_report(fixture_path, MANIFEST_OUT, status)
-    materialization = write_materialization(
-        status,
-        facts,
-        fixture_path,
-        command_records,
-        inserted,
-        search_results,
-        mapping,
-        started_at,
-        report,
-    )
-    write_summary(materialization, manifest, report)
-    print(f"Graphiti/Zep smoke artifact: {OUT}")
-    print(f"Graphiti/Zep smoke manifest: {MANIFEST_OUT}")
-    print(f"Graphiti/Zep smoke summary: {SUMMARY_OUT}")
-
-    return 0
+from graphiti_temporal_smoke.runner import main
 
 
 if __name__ == "__main__":
diff --git a/scripts/graphiti_temporal_smoke/__init__.py b/scripts/graphiti_temporal_smoke/__init__.py
new file mode 100644
index 00000000..e561b370
--- /dev/null
+++ b/scripts/graphiti_temporal_smoke/__init__.py
@@ -0,0 +1 @@
+"""Graphiti/Zep temporal smoke runner modules."""
diff --git a/scripts/graphiti_temporal_smoke/benchmark.py b/scripts/graphiti_temporal_smoke/benchmark.py
new file mode 100644
index 00000000..23af9098
--- /dev/null
+++ b/scripts/graphiti_temporal_smoke/benchmark.py
@@ -0,0 +1,107 @@
+"""Scoring helpers for the Graphiti/Zep temporal smoke."""
+
+from __future__ import annotations
+
+import json
+import subprocess
+from pathlib import Path
+from typing import Any
+
+from .common import rel
+from .context import REPORT_JSON, REPORT_MD, ROOT_DIR
+from .models import StatusState
+
+def run_scored_report(fixture_path: Path, manifest_path: Path, status: StatusState) -> dict[str, Any]:
+    """Score the generated temporal smoke fixture through the real-world job runner."""
+
+    run_cmd = [
+        "cargo",
+        "run",
+        "-p",
+        "elf-eval",
+        "--bin",
+        "real_world_job_benchmark",
+        "--",
+        "run",
+        "--fixtures",
+        str(fixture_path),
+        "--out",
+        str(REPORT_JSON),
+        "--run-id",
+        "real-world-memory-live-graphiti-zep",
+        "--adapter-id",
+        "graphiti_zep_temporal_smoke",
+        "--adapter-name",
+        "Graphiti/Zep Docker temporal smoke adapter",
+        "--adapter-behavior",
+        "docker_python_falkordb_temporal_smoke",
+        "--adapter-storage-status",
+        status.setup,
+        "--adapter-runtime-status",
+        status.overall,
+        "--adapter-notes",
+        "Generated by the Graphiti/Zep Docker temporal smoke; pass or wrong_result requires current and historical validity-window facts mapped to generated evidence ids, while provider/setup limits remain typed.",
+        "--external-adapter-manifest",
+        str(manifest_path),
+    ]
+    publish_cmd = [
+        "cargo",
+        "run",
+        "-p",
+        "elf-eval",
+        "--bin",
+        "real_world_job_benchmark",
+        "--",
+        "publish",
+        "--report",
+        str(REPORT_JSON),
+        "--out",
+        str(REPORT_MD),
+    ]
+
+    subprocess.run(run_cmd, cwd=ROOT_DIR, check=True)
+    subprocess.run(publish_cmd, cwd=ROOT_DIR, check=True)
+
+    report = json.loads(REPORT_JSON.read_text(encoding="utf-8"))
+
+    return {
+        "json": rel(REPORT_JSON),
+        "markdown": rel(REPORT_MD),
+        "summary": report.get("summary", {}),
+        "suites": report.get("suites", []),
+    }
+
+def scored_benchmark(report: dict[str, Any] | None) -> dict[str, Any]:
+    """Extract the post-score benchmark status from a real_world_job report."""
+
+    if report is None:
+        return {
+            "schema": "elf.scored_benchmark_status/v1",
+            "source": "real_world_job_benchmark",
+            "status": "pending",
+            "reason": "The smoke materialization was written before benchmark scoring completed.",
+        }
+
+    summary = report.get("summary", {})
+    counts = {
+        status: int(summary.get(status, 0) or 0)
+        for status in (
+            "pass",
+            "wrong_result",
+            "lifecycle_fail",
+            "incomplete",
+            "blocked",
+            "not_encoded",
+        )
+    }
+    status = next((name for name, count in counts.items() if name != "pass" and count > 0), "pass")
+
+    return {
+        "schema": "elf.scored_benchmark_status/v1",
+        "source": "real_world_job_benchmark",
+        "status": status,
+        "counts": counts,
+        "job_count": int(summary.get("job_count", 0) or 0),
+        "mean_score": summary.get("mean_score"),
+        "evidence_coverage": summary.get("evidence_coverage"),
+    }
diff --git a/scripts/graphiti_temporal_smoke/common.py b/scripts/graphiti_temporal_smoke/common.py
new file mode 100644
index 00000000..ba3dd06b
--- /dev/null
+++ b/scripts/graphiti_temporal_smoke/common.py
@@ -0,0 +1,137 @@
+"""Shared filesystem and process helpers for the Graphiti/Zep smoke."""
+
+from __future__ import annotations
+
+import json
+import os
+import shutil
+import subprocess
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+from .context import FIXTURE_DIR, LOG_DIR, REPORT_DIR, ROOT_DIR, TIMEOUT_SECONDS, WORK_DIR
+from .models import CommandRecord
+
+def utc_now() -> str:
+    """Return an RFC3339 UTC timestamp."""
+
+    return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
+
+def rel(path: Path) -> str:
+    """Return a repository-relative path when possible."""
+
+    try:
+        return str(path.resolve().relative_to(ROOT_DIR))
+    except ValueError:
+        return str(path)
+
+def mkdirs() -> None:
+    """Create output directories."""
+
+    for path in (REPORT_DIR, WORK_DIR, FIXTURE_DIR, LOG_DIR):
+        path.mkdir(parents=True, exist_ok=True)
+
+def write_json(path: Path, payload: Any) -> None:
+    """Write stable, pretty JSON."""
+
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+
+def command_available(command: str) -> bool:
+    """Return whether a command is on PATH."""
+
+    return shutil.which(command) is not None
+
+def dir_size(path: Path) -> int:
+    """Return total file size for a directory or file."""
+
+    if not path.exists():
+        return 0
+    if path.is_file():
+        return path.stat().st_size
+
+    return sum(item.stat().st_size for item in path.rglob("*") if item.is_file())
+
+def file_count(path: Path) -> int:
+    """Return file count for a directory."""
+
+    if not path.exists():
+        return 0
+
+    return sum(1 for item in path.rglob("*") if item.is_file())
+
+def command_to_json(record: CommandRecord) -> dict[str, Any]:
+    """Serialize a command record."""
+
+    return {
+        "label": record.label,
+        "status": record.status,
+        "command": record.command,
+        "elapsed_ms": round(record.elapsed_ms, 3),
+        "stdout_artifact": record.stdout_artifact,
+        "stderr_artifact": record.stderr_artifact,
+        "returncode": record.returncode,
+        "reason": record.reason,
+    }
+
+def run_command(
+    label: str,
+    command: list[str],
+    cwd: Path,
+    timeout: int = TIMEOUT_SECONDS,
+    extra_env: dict[str, str] | None = None,
+) -> CommandRecord:
+    """Run a subprocess and capture stdout/stderr artifacts."""
+
+    cwd.mkdir(parents=True, exist_ok=True)
+    stdout_path = LOG_DIR / f"{label}.stdout.log"
+    stderr_path = LOG_DIR / f"{label}.stderr.log"
+    env = os.environ.copy()
+
+    if extra_env:
+        env.update(extra_env)
+
+    started = time.monotonic()
+    try:
+        proc = subprocess.run(
+            command,
+            cwd=cwd,
+            env=env,
+            text=True,
+            capture_output=True,
+            timeout=timeout,
+            check=False,
+        )
+        elapsed_ms = (time.monotonic() - started) * 1000
+        stdout_path.write_text(proc.stdout, encoding="utf-8")
+        stderr_path.write_text(proc.stderr, encoding="utf-8")
+        status = "pass" if proc.returncode == 0 else "incomplete"
+        reason = "Command completed." if proc.returncode == 0 else f"Command exited {proc.returncode}."
+
+        return CommandRecord(
+            label=label,
+            command=command,
+            status=status,
+            elapsed_ms=elapsed_ms,
+            stdout_artifact=rel(stdout_path),
+            stderr_artifact=rel(stderr_path),
+            returncode=proc.returncode,
+            reason=reason,
+        )
+    except subprocess.TimeoutExpired as err:
+        elapsed_ms = (time.monotonic() - started) * 1000
+        stdout_path.write_text(err.stdout or "", encoding="utf-8")
+        stderr_path.write_text(err.stderr or "", encoding="utf-8")
+
+        return CommandRecord(
+            label=label,
+            command=command,
+            status="incomplete",
+            elapsed_ms=elapsed_ms,
+            stdout_artifact=rel(stdout_path),
+            stderr_artifact=rel(stderr_path),
+            returncode=None,
+            reason=f"Command timed out after {timeout} seconds.",
+        )
diff --git a/scripts/graphiti_temporal_smoke/context.py b/scripts/graphiti_temporal_smoke/context.py
new file mode 100644
index 00000000..442836e0
--- /dev/null
+++ b/scripts/graphiti_temporal_smoke/context.py
@@ -0,0 +1,65 @@
+"""Configuration for the Graphiti/Zep temporal smoke."""
+
+from __future__ import annotations
+
+import os
+from datetime import datetime, timezone
+from pathlib import Path
+
+from typing import Any
+
+
+SCRIPT_DIR = Path(__file__).resolve().parent.parent
+ROOT_DIR = SCRIPT_DIR.parent
+REPORT_DIR = Path(
+    os.environ.get(
+        "ELF_GRAPHITI_ZEP_SMOKE_REPORT_DIR",
+        ROOT_DIR / "tmp" / "real-world-memory" / "graphiti-zep-smoke",
+    )
+)
+WORK_DIR = Path(os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_WORK_DIR", REPORT_DIR / "work"))
+OUT = Path(os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_OUT", REPORT_DIR / "graphiti-zep-smoke.json"))
+MANIFEST_OUT = Path(
+    os.environ.get(
+        "ELF_GRAPHITI_ZEP_SMOKE_MANIFEST_OUT",
+        REPORT_DIR / "memory_projects_manifest.graphiti-zep-smoke.json",
+    )
+)
+SUMMARY_OUT = Path(os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_SUMMARY_OUT", REPORT_DIR / "summary.json"))
+REPORT_JSON = Path(
+    os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_REPORT_JSON", REPORT_DIR / "graphiti-zep-report.json")
+)
+REPORT_MD = Path(
+    os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_REPORT_MD", REPORT_DIR / "graphiti-zep-report.md")
+)
+FIXTURE_DIR = REPORT_DIR / "graphiti-zep-fixtures"
+LOG_DIR = REPORT_DIR / "logs"
+
+RUN_ID = os.environ.get(
+    "ELF_GRAPHITI_ZEP_SMOKE_RUN_ID",
+    f"graphiti-zep-docker-smoke-{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}",
+)
+RUN_LIVE = os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_RUN", "0") == "1"
+ALLOW_HOST = os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_ALLOW_HOST", "0") == "1"
+INSTALL_GRAPHITI = os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_INSTALL", "1") == "1"
+GRAPHITI_VERSION = os.environ.get("ELF_GRAPHITI_ZEP_VERSION", "0.21.0")
+GRAPHITI_PACKAGE = os.environ.get(
+    "ELF_GRAPHITI_ZEP_PACKAGE",
+    f"graphiti-core[falkordb]=={GRAPHITI_VERSION}",
+)
+GRAPHITI_REF = os.environ.get("ELF_GRAPHITI_ZEP_REF", f"pypi:{GRAPHITI_PACKAGE}")
+FALKORDB_HOST = os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_HOST", "graphiti-falkordb")
+FALKORDB_PORT = int(os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_PORT", "6379"))
+FALKORDB_DATABASE = os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_DATABASE", "elf_graphiti_zep_smoke")
+FALKORDB_USERNAME = os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_USERNAME", "")
+FALKORDB_PASSWORD = os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_PASSWORD", "")
+API_KEY = os.environ.get(
+    "ELF_GRAPHITI_ZEP_API_KEY",
+    os.environ.get("GRAPHITI_OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", "")),
+)
+API_BASE = os.environ.get("ELF_GRAPHITI_ZEP_API_BASE", os.environ.get("OPENAI_BASE_URL", ""))
+LLM_MODEL = os.environ.get("ELF_GRAPHITI_ZEP_LLM_MODEL", "gpt-4o-mini")
+EMBEDDING_MODEL = os.environ.get("ELF_GRAPHITI_ZEP_EMBEDDING_MODEL", "text-embedding-3-small")
+TIMEOUT_SECONDS = int(os.environ.get("ELF_GRAPHITI_ZEP_TIMEOUT_SECONDS", "900"))
+STARTUP_ATTEMPTS = int(os.environ.get("ELF_GRAPHITI_ZEP_STARTUP_ATTEMPTS", "30"))
+STARTUP_INTERVAL_SECONDS = float(os.environ.get("ELF_GRAPHITI_ZEP_STARTUP_INTERVAL_SECONDS", "2"))
diff --git a/scripts/graphiti_temporal_smoke/corpus.py b/scripts/graphiti_temporal_smoke/corpus.py
new file mode 100644
index 00000000..ec3b8f4f
--- /dev/null
+++ b/scripts/graphiti_temporal_smoke/corpus.py
@@ -0,0 +1,47 @@
+"""Generated temporal facts used by the Graphiti/Zep smoke."""
+
+from __future__ import annotations
+
+from typing import Any
+
+def temporal_facts() -> list[dict[str, Any]]:
+    """Return the generated-public temporal fact corpus."""
+
+    return [
+        {
+            "evidence_id": "graphiti-zep-old-owner",
+            "claim_id": "relation_historical_owner",
+            "source": "Team Delta",
+            "edge_name": "OWNED_REVIEW",
+            "target": "deployment method review",
+            "fact": "Team Delta owned deployment method review before 2026-06-06.",
+            "valid_at": "2026-06-05T00:00:00Z",
+            "invalid_at": "2026-06-08T00:00:00Z",
+            "created_at": "2026-06-05T00:00:00Z",
+            "current": False,
+        },
+        {
+            "evidence_id": "graphiti-zep-current-owner",
+            "claim_id": "relation_current_owner",
+            "source": "Team Echo",
+            "edge_name": "OWNS_REVIEW",
+            "target": "deployment method review",
+            "fact": "Team Echo owns deployment method review since 2026-06-08.",
+            "valid_at": "2026-06-08T00:00:00Z",
+            "invalid_at": None,
+            "created_at": "2026-06-08T00:00:00Z",
+            "current": True,
+        },
+        {
+            "evidence_id": "graphiti-zep-owner-rationale",
+            "claim_id": "relation_owner_update_rationale",
+            "source": "single-user production runbook scope",
+            "edge_name": "MOVED_OWNERSHIP_TO",
+            "target": "Team Echo",
+            "fact": "Ownership moved to Team Echo after single-user production runbook scope changed.",
+            "valid_at": "2026-06-08T00:05:00Z",
+            "invalid_at": None,
+            "created_at": "2026-06-08T00:05:00Z",
+            "current": True,
+        },
+    ]
diff --git a/scripts/graphiti_temporal_smoke/fixture.py b/scripts/graphiti_temporal_smoke/fixture.py
new file mode 100644
index 00000000..14e30c7a
--- /dev/null
+++ b/scripts/graphiti_temporal_smoke/fixture.py
@@ -0,0 +1,224 @@
+"""Generated fixture writer for the Graphiti/Zep smoke."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from .common import write_json
+from .context import FIXTURE_DIR, RUN_ID
+from .models import StatusState
+
+def write_fixture(facts: list[dict[str, Any]], status: StatusState, mapping: dict[str, Any]) -> Path:
+    """Write a generated memory_evolution fixture for the smoke."""
+
+    fixture_path = FIXTURE_DIR / "memory_evolution" / "graphiti_zep_temporal_validity.json"
+    mapped_ids = mapping.get("mapped_evidence_ids", [])
+    claims = []
+
+    if status.result == "pass":
+        claims = [
+            {
+                "claim_id": "relation_current_owner",
+                "text": "Team Echo currently owns deployment method review.",
+                "evidence_ids": [
+                    "graphiti-zep-current-owner",
+                    "graphiti-zep-old-owner",
+                    "graphiti-zep-owner-rationale",
+                ],
+                "confidence": "derived_from_graphiti_temporal_search",
+            },
+            {
+                "claim_id": "relation_historical_owner",
+                "text": "Team Delta owned deployment method review historically.",
+                "evidence_ids": ["graphiti-zep-old-owner"],
+                "confidence": "derived_from_graphiti_temporal_search",
+            },
+            {
+                "claim_id": "relation_owner_update_rationale",
+                "text": "Ownership moved after single-user production runbook scope changed.",
+                "evidence_ids": ["graphiti-zep-owner-rationale"],
+                "confidence": "derived_from_graphiti_temporal_search",
+            },
+        ]
+
+    fixture: dict[str, Any] = {
+        "schema": "elf.real_world_job/v1",
+        "job_id": "graphiti-zep-temporal-validity-001",
+        "suite": "memory_evolution",
+        "title": "Map Graphiti/Zep temporal validity windows to current and historical relation facts",
+        "corpus": {
+            "corpus_id": "graphiti-zep-generated-public-smoke",
+            "profile": "generated_public",
+            "items": [
+                {
+                    "evidence_id": fact["evidence_id"],
+                    "kind": "temporal_fact",
+                    "text": fact["fact"],
+                    "source_ref": {
+                        "schema": "source_ref/v1",
+                        "resolver": "graphiti_zep_smoke/v1",
+                        "ref": {
+                            "run_id": RUN_ID,
+                            "evidence_id": fact["evidence_id"],
+                            "valid_at": fact["valid_at"],
+                            "invalid_at": fact["invalid_at"],
+                        },
+                    },
+                    "created_at": fact["created_at"],
+                }
+                for fact in facts
+            ],
+            "adapter_response": {
+                "adapter_id": "graphiti_zep_temporal_smoke",
+                "answer": {
+                    "content": (
+                        "Team Echo currently owns deployment method review. Team Delta owned it "
+                        "historically, and the move followed the single-user production runbook scope change."
+                        if claims
+                        else ""
+                    ),
+                    "claims": claims,
+                    "evidence_ids": mapped_ids,
+                    "latency_ms": 0.0,
+                    "cost": {
+                        "currency": "USD",
+                        "amount": 0.0,
+                        "input_tokens": 0,
+                        "output_tokens": 0,
+                    },
+                },
+            },
+        },
+        "timeline": [
+            {
+                "event_id": "graphiti-zep-old-owner",
+                "ts": "2026-06-05T00:00:00Z",
+                "actor": "agent",
+                "action": "recorded_relation",
+                "evidence_ids": ["graphiti-zep-old-owner"],
+                "summary": "Team Delta was the historical owner.",
+            },
+            {
+                "event_id": "graphiti-zep-current-owner",
+                "ts": "2026-06-08T00:00:00Z",
+                "actor": "agent",
+                "action": "updated_memory",
+                "evidence_ids": ["graphiti-zep-current-owner", "graphiti-zep-owner-rationale"],
+                "summary": "Team Echo became the current owner after the scope changed.",
+            },
+        ],
+        "prompt": {
+            "role": "user",
+            "content": "Who currently owns deployment method review, and who owned it historically?",
+            "job_mode": "answer",
+            "constraints": ["cite_evidence", "distinguish_current_from_historical"],
+        },
+        "expected_answer": {
+            "must_include": [
+                {
+                    "claim_id": "relation_current_owner",
+                    "text": "Team Echo currently owns deployment method review.",
+                },
+                {
+                    "claim_id": "relation_historical_owner",
+                    "text": "Team Delta owned deployment method review historically.",
+                },
+            ],
+            "must_not_include": ["Team Delta currently owns deployment method review."],
+            "evidence_links": {
+                "relation_current_owner": [
+                    "graphiti-zep-current-owner",
+                    "graphiti-zep-old-owner",
+                    "graphiti-zep-owner-rationale",
+                ],
+                "relation_historical_owner": ["graphiti-zep-old-owner"],
+                "relation_owner_update_rationale": ["graphiti-zep-owner-rationale"],
+            },
+            "answer_type": "direct_answer",
+            "accepted_alternates": [],
+            "requires_caveat": False,
+            "requires_refusal": False,
+        },
+        "required_evidence": [
+            {
+                "evidence_id": "graphiti-zep-current-owner",
+                "claim_id": "relation_current_owner",
+                "requirement": "cite",
+                "quote": "Team Echo owns deployment method review",
+            },
+            {
+                "evidence_id": "graphiti-zep-old-owner",
+                "claim_id": "relation_historical_owner",
+                "requirement": "cite",
+                "quote": "Team Delta owned deployment method review",
+            },
+        ],
+        "negative_traps": [
+            {
+                "trap_id": "old-owner-as-current",
+                "type": "stale_fact",
+                "evidence_ids": ["graphiti-zep-old-owner"],
+                "failure_if_used": False,
+            }
+        ],
+        "scoring_rubric": {
+            "dimensions": {
+                "lifecycle_behavior": {
+                    "weight": 0.4,
+                    "max_points": 1.0,
+                    "criteria": "Requires current-only versus historical temporal validity for relation facts.",
+                },
+                "answer_correctness": {
+                    "weight": 0.25,
+                    "max_points": 1.0,
+                    "criteria": "Would identify current and historical owners separately.",
+                },
+                "evidence_grounding": {
+                    "weight": 0.2,
+                    "max_points": 1.0,
+                    "criteria": "Would cite both current and historical relation evidence.",
+                },
+                "trap_avoidance": {
+                    "weight": 0.15,
+                    "max_points": 1.0,
+                    "criteria": "Would not report the historical owner as current.",
+                },
+            },
+            "pass_threshold": 0.8,
+            "hard_fail_rules": [],
+        },
+        "allowed_uncertainty": {
+            "can_answer_unknown": False,
+            "acceptable_phrases": ["Graphiti/Zep smoke did not return temporal facts."],
+            "fallback_action": "score_temporal_relation_behavior",
+        },
+        "memory_evolution": {
+            "current_evidence_ids": ["graphiti-zep-current-owner"],
+            "historical_evidence_ids": ["graphiti-zep-old-owner"],
+            "stale_trap_ids": ["old-owner-as-current"],
+            "conflicts": [
+                {
+                    "conflict_id": "relation-owner-current-historical",
+                    "claim_id": "relation_current_owner",
+                    "current_evidence_id": "graphiti-zep-current-owner",
+                    "historical_evidence_id": "graphiti-zep-old-owner",
+                    "resolved_by_evidence_id": "graphiti-zep-owner-rationale",
+                }
+            ],
+            "update_rationale": {
+                "claim_id": "relation_owner_update_rationale",
+                "evidence_ids": ["graphiti-zep-owner-rationale"],
+                "available": True,
+            },
+            "temporal_validity": {"required": True, "encoded": True},
+        },
+        "tags": ["external_adapter", "generated_public", "memory_evolution", "reference_graphiti_zep_temporal"],
+    }
+
+    if status.result in {"blocked", "incomplete", "not_encoded"}:
+        fixture["encoding"] = {"status": status.result, "reason": status.failure_reason}
+
+    write_json(fixture_path, fixture)
+
+    return fixture_path
diff --git a/scripts/graphiti_temporal_smoke/manifest.py b/scripts/graphiti_temporal_smoke/manifest.py
new file mode 100644
index 00000000..b8b66bd1
--- /dev/null
+++ b/scripts/graphiti_temporal_smoke/manifest.py
@@ -0,0 +1,175 @@
+"""External adapter manifest writer for the Graphiti/Zep smoke."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from .common import rel, utc_now, write_json
+from .context import *  # noqa: F403
+from .models import StatusState
+
+def write_manifest(status: StatusState) -> dict[str, Any]:
+    """Write a generated external adapter manifest for this smoke."""
+
+    manifest = {
+        "schema": "elf.real_world_external_adapter_manifest/v1",
+        "manifest_id": f"graphiti-zep-temporal-smoke-{RUN_ID}",
+        "docker_isolation": {
+            "default": True,
+            "compose_file": "docker-compose.baseline.yml",
+            "runner": "scripts/graphiti-zep-docker-temporal-smoke.py",
+            "artifact_dir": "tmp/real-world-memory/graphiti-zep-smoke",
+            "host_global_installs_required": False,
+            "notes": [
+                f"Generated by the Graphiti/Zep Docker smoke at {utc_now()}.",
+                "The smoke uses generated public temporal facts and records typed setup/runtime failures.",
+            ],
+        },
+        "adapters": [
+            {
+                "adapter_id": "graphiti_zep_temporal_smoke",
+                "project": "Graphiti/Zep",
+                "adapter_kind": "docker_python_falkordb_temporal_smoke",
+                "evidence_class": status.evidence_class,
+                "docker_default": True,
+                "host_global_installs_required": False,
+                "overall_status": status.overall,
+                "setup": {
+                    "status": status.setup,
+                    "evidence": "The smoke runs inside the baseline Docker runner and uses Docker-local FalkorDB plus a container-local Python venv.",
+                    "command": "cargo make smoke-graphiti-zep-docker-temporal",
+                    "artifact": rel(OUT),
+                },
+                "run": {
+                    "status": status.run,
+                    "evidence": "The live path adds generated temporal fact triples and searches Graphiti/Zep for UUID, fact, valid_at, invalid_at, and source node evidence.",
+                    "command": "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 cargo make smoke-graphiti-zep-docker-temporal",
+                    "artifact": rel(OUT),
+                },
+                "result": {
+                    "status": status.result,
+                    "evidence": status.failure_reason
+                    if status.failure_reason
+                    else "Graphiti/Zep temporal search mapped current and historical facts to validity windows.",
+                    "artifact": rel(OUT),
+                },
+                "capabilities": [
+                    {
+                        "capability": "docker_falkordb_setup",
+                        "status": status.setup,
+                        "evidence": "The task starts a Docker Compose FalkorDB profile only when explicitly requested, and uses no host-global graph database.",
+                    },
+                    {
+                        "capability": "temporal_fact_triple_ingest",
+                        "status": status.run,
+                        "evidence": "The live worker uses Graphiti fact triples for current, historical, and rationale facts with validity windows.",
+                    },
+                    {
+                        "capability": "validity_window_evidence_mapping",
+                        "status": status.result,
+                        "evidence": "Search output UUID, fact text, valid_at, invalid_at, and node ids are mapped to memory_evolution expected evidence ids.",
+                    },
+                    {
+                        "capability": "quality_or_scale_claim",
+                        "status": "not_encoded",
+                        "evidence": "The smoke does not claim broad graph-memory quality, large-corpus behavior, managed Zep service behavior, or private-corpus performance.",
+                    },
+                ],
+                "suites": [
+                    {
+                        "suite_id": "memory_evolution",
+                        "status": status.result,
+                        "evidence": "Only generated current-versus-historical temporal relation facts are represented.",
+                    },
+                    {
+                        "suite_id": "retrieval",
+                        "status": status.run if status.run != "pass" else "not_encoded",
+                        "evidence": "Hybrid retrieval reachability is exercised by the live search, but broad retrieval quality scoring is not encoded.",
+                    },
+                    {
+                        "suite_id": "production_ops",
+                        "status": "not_encoded",
+                        "evidence": "The smoke records setup and provider boundaries but does not encode backup, restore, private corpus, or hosted-service operations.",
+                    },
+                ],
+                "scenarios": [
+                    {
+                        "scenario_id": "temporal_validity_window_mapping",
+                        "suite_id": "memory_evolution",
+                        "status": status.result,
+                        "elf_position": "untested",
+                        "comparison_outcome": "blocked"
+                        if status.result == "blocked"
+                        else "not_tested",
+                        "evidence": status.failure_reason
+                        if status.failure_reason
+                        else "Graphiti/Zep temporal search mapped generated current and historical relation facts to validity windows and evidence ids.",
+                        "command": "cargo make smoke-graphiti-zep-docker-temporal",
+                        "artifact": rel(OUT),
+                    }
+                ],
+                "evidence": [
+                    {"kind": "artifact", "ref": rel(OUT), "status": status.result},
+                    {"kind": "manifest", "ref": rel(MANIFEST_OUT), "status": status.overall},
+                    {"kind": "source", "ref": "https://github.com/getzep/graphiti", "status": "real"},
+                    {
+                        "kind": "source",
+                        "ref": "https://help.getzep.com/graphiti/getting-started/quick-start",
+                        "status": "real",
+                    },
+                    {
+                        "kind": "source",
+                        "ref": "https://help.getzep.com/graphiti/configuration/falkor-db-configuration",
+                        "status": "real",
+                    },
+                    {
+                        "kind": "source",
+                        "ref": "https://help.getzep.com/graphiti/working-with-data/adding-fact-triples",
+                        "status": "real",
+                    },
+                ],
+                "execution_metadata": {
+                    "sources": [
+                        {
+                            "label": "Graphiti repository",
+                            "url": "https://github.com/getzep/graphiti",
+                            "evidence": "Official source for the open-source temporal context graph engine.",
+                        },
+                        {
+                            "label": "Graphiti quick start",
+                            "url": "https://help.getzep.com/graphiti/getting-started/quick-start",
+                            "evidence": "Official search output examples include UUID, fact, valid_at, and invalid_at fields.",
+                        },
+                        {
+                            "label": "Graphiti FalkorDB configuration",
+                            "url": "https://help.getzep.com/graphiti/configuration/falkor-db-configuration",
+                            "evidence": "Official Docker-local FalkorDB setup and Python driver reference.",
+                        },
+                        {
+                            "label": "Graphiti fact triples",
+                            "url": "https://help.getzep.com/graphiti/working-with-data/adding-fact-triples",
+                            "evidence": "Official manual fact-triple ingest contract.",
+                        },
+                    ],
+                    "setup_path": "Run cargo make smoke-graphiti-zep-docker-temporal for a typed artifact; set ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 with explicit provider configuration for a live attempt.",
+                    "runtime_boundary": "docker-compose.baseline.yml baseline-runner plus graphiti-zep FalkorDB profile, container-local Python venv, generated public temporal facts, and report artifacts under tmp/real-world-memory/graphiti-zep-smoke.",
+                    "resource_expectation": f"Graphiti package {GRAPHITI_REF}, fact_count=3, timeout_seconds={TIMEOUT_SECONDS}, FalkorDB host={FALKORDB_HOST}:{FALKORDB_PORT}.",
+                    "retry_guidance": [
+                        "Default command records a typed blocked artifact without model calls.",
+                        "Enable the live path only with Docker-local FalkorDB and explicit provider configuration.",
+                        "Treat missing validity windows or unmapped current/historical facts as wrong_result, not pass.",
+                    ],
+                    "research_depth": "D2 feasibility plus XY-888 Docker temporal smoke implementation; generated artifact decides live evidence class.",
+                },
+                "notes": [
+                    "The checked-in manifest record remains research_gate; generated smoke artifacts carry live status.",
+                    "Failure before Graphiti search output remains typed as blocked or incomplete.",
+                    "The smoke does not use a hosted Zep service, private corpora, or unrecorded provider credentials.",
+                ],
+            }
+        ],
+    }
+    write_json(MANIFEST_OUT, manifest)
+
+    return manifest
diff --git a/scripts/graphiti_temporal_smoke/mapping.py b/scripts/graphiti_temporal_smoke/mapping.py
new file mode 100644
index 00000000..4283950c
--- /dev/null
+++ b/scripts/graphiti_temporal_smoke/mapping.py
@@ -0,0 +1,81 @@
+"""Map Graphiti search results back to benchmark evidence."""
+
+from __future__ import annotations
+
+from typing import Any
+
+def map_observed_facts(results: list[dict[str, Any]], facts: list[dict[str, Any]]) -> dict[str, Any]:
+    """Map Graphiti search results back to expected evidence ids."""
+
+    expected_by_id = {fact["evidence_id"]: fact for fact in facts}
+    mappings: list[dict[str, Any]] = []
+    mapped_ids: list[str] = []
+
+    for fact in facts:
+        matched = [
+            result
+            for result in results
+            if isinstance(result.get("fact"), str) and fact["fact"].lower() in result["fact"].lower()
+        ]
+        if matched:
+            result = matched[0]
+            mapped_ids.append(fact["evidence_id"])
+            mappings.append(
+                {
+                    "evidence_id": fact["evidence_id"],
+                    "claim_id": fact["claim_id"],
+                    "status": "pass",
+                    "uuid": result.get("uuid"),
+                    "fact": result.get("fact"),
+                    "valid_at": result.get("valid_at"),
+                    "invalid_at": result.get("invalid_at"),
+                    "expected_valid_at": fact["valid_at"],
+                    "expected_invalid_at": fact["invalid_at"],
+                    "current": fact["current"],
+                }
+            )
+        else:
+            mappings.append(
+                {
+                    "evidence_id": fact["evidence_id"],
+                    "claim_id": fact["claim_id"],
+                    "status": "blocked",
+                    "expected_valid_at": fact["valid_at"],
+                    "expected_invalid_at": fact["invalid_at"],
+                    "current": fact["current"],
+                }
+            )
+
+    current_ok = any(
+        item["evidence_id"] == "graphiti-zep-current-owner"
+        and item["status"] == "pass"
+        and not item.get("invalid_at")
+        for item in mappings
+    )
+    historical_ok = any(
+        item["evidence_id"] == "graphiti-zep-old-owner"
+        and item["status"] == "pass"
+        and item.get("invalid_at")
+        for item in mappings
+    )
+    rationale_ok = "graphiti-zep-owner-rationale" in mapped_ids
+    required_ids = list(expected_by_id)
+    missing_ids = [evidence_id for evidence_id in required_ids if evidence_id not in mapped_ids]
+
+    if current_ok and historical_ok and rationale_ok:
+        status = "pass"
+        reason = "Graphiti/Zep search results mapped current, historical, and rationale facts with validity windows."
+    else:
+        status = "wrong_result"
+        reason = (
+            "Graphiti/Zep search results did not map all required temporal facts with expected validity "
+            f"windows; missing={', '.join(missing_ids) or 'none'}."
+        )
+
+    return {
+        "status": status,
+        "reason": reason,
+        "expected_evidence_ids": required_ids,
+        "mapped_evidence_ids": mapped_ids,
+        "facts": mappings,
+    }
diff --git a/scripts/graphiti_temporal_smoke/materialization.py b/scripts/graphiti_temporal_smoke/materialization.py
new file mode 100644
index 00000000..f96fd4f0
--- /dev/null
+++ b/scripts/graphiti_temporal_smoke/materialization.py
@@ -0,0 +1,99 @@
+"""Primary materialization writer for the Graphiti/Zep smoke."""
+
+from __future__ import annotations
+
+import time
+from pathlib import Path
+from typing import Any
+
+from .benchmark import scored_benchmark
+from .common import command_to_json, dir_size, file_count, rel, utc_now, write_json
+from .context import *  # noqa: F403
+from .models import CommandRecord, StatusState
+
+def write_materialization(
+    status: StatusState,
+    facts: list[dict[str, Any]],
+    fixture_path: Path,
+    command_records: list[CommandRecord],
+    inserted: list[dict[str, Any]],
+    search_results: list[dict[str, Any]],
+    mapping: dict[str, Any],
+    started_at: float,
+    report: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    """Write the primary smoke artifact."""
+
+    elapsed_ms = (time.monotonic() - started_at) * 1000
+    payload = {
+        "schema": "elf.graphiti_zep_temporal_smoke/v1",
+        "generated_at": utc_now(),
+        "run_id": RUN_ID,
+        "adapter_id": "graphiti_zep_temporal_smoke",
+        "project": "Graphiti/Zep",
+        "status": status.overall,
+        "materialization_status": {
+            "source": "smoke_materialization",
+            "setup": status.setup,
+            "run": status.run,
+            "result": status.result,
+            "overall": status.overall,
+            "failure_class": status.failure_class,
+            "failure_reason": status.failure_reason,
+        },
+        "scored_benchmark": scored_benchmark(report),
+        "evidence_class": status.evidence_class,
+        "failure": {
+            "class": status.failure_class or None,
+            "reason": status.failure_reason or None,
+        },
+        "artifacts": {
+            "materialization": rel(OUT),
+            "manifest": rel(MANIFEST_OUT),
+            "summary": rel(SUMMARY_OUT),
+            "fixture": rel(fixture_path),
+            "scored_report_json": rel(REPORT_JSON),
+            "scored_report_markdown": rel(REPORT_MD),
+        },
+        "docker_boundary": {
+            "compose_file": "docker-compose.baseline.yml",
+            "service_profile": "graphiti-zep",
+            "graph_store_service": "graphiti-falkordb",
+            "runner_service": "baseline-runner",
+            "runner": "scripts/graphiti-zep-docker-temporal-smoke.py",
+            "host_global_installs_required": False,
+            "docker_only": True,
+        },
+        "provider_configuration": {
+            "package": GRAPHITI_REF,
+            "package_spec": GRAPHITI_PACKAGE,
+            "llm_model": LLM_MODEL,
+            "embedding_model": EMBEDDING_MODEL,
+            "api_base_configured": bool(API_BASE),
+            "api_key_provided": bool(API_KEY),
+            "operator_owned_provider_credentials_used": False,
+            "live_run_enabled": RUN_LIVE,
+            "falkordb": {
+                "host": FALKORDB_HOST,
+                "port": FALKORDB_PORT,
+                "database": FALKORDB_DATABASE,
+                "username_configured": bool(FALKORDB_USERNAME),
+                "password_configured": bool(FALKORDB_PASSWORD),
+            },
+        },
+        "resource_bounds": {
+            "fact_count": len(facts),
+            "timeout_seconds": TIMEOUT_SECONDS,
+            "elapsed_ms": round(elapsed_ms, 3),
+            "work_dir_size_bytes": dir_size(WORK_DIR),
+            "work_dir_file_count": file_count(WORK_DIR),
+        },
+        "commands": [command_to_json(record) for record in command_records],
+        "temporal_facts": facts,
+        "inserted_facts": inserted,
+        "search_results": search_results,
+        "evidence_mapping": mapping,
+    }
+    write_json(OUT, payload)
+
+    return payload
diff --git a/scripts/graphiti_temporal_smoke/models.py b/scripts/graphiti_temporal_smoke/models.py
new file mode 100644
index 00000000..27d2ae5f
--- /dev/null
+++ b/scripts/graphiti_temporal_smoke/models.py
@@ -0,0 +1,34 @@
+"""Typed records for the Graphiti/Zep temporal smoke."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+class StatusState:
+    """Typed status for generated Graphiti/Zep smoke artifacts."""
+
+    setup: str = "blocked"
+    run: str = "not_encoded"
+    result: str = "blocked"
+    overall: str = "blocked"
+    evidence_class: str = "research_gate"
+    failure_class: str = "graphiti_zep_live_run_disabled"
+    failure_reason: str = (
+        "Graphiti/Zep temporal graph live run is opt-in; set "
+        "ELF_GRAPHITI_ZEP_SMOKE_START=1 ELF_GRAPHITI_ZEP_SMOKE_RUN=1 and provide explicit "
+        "provider configuration to attempt the Docker-local FalkorDB smoke."
+    )
+
+
+@dataclass
+class CommandRecord:
+    """Captured command result without secret-bearing environment values."""
+
+    label: str
+    command: list[str]
+    status: str
+    elapsed_ms: float
+    stdout_artifact: str | None
+    stderr_artifact: str | None
+    returncode: int | None
+    reason: str
diff --git a/scripts/graphiti_temporal_smoke/runner.py b/scripts/graphiti_temporal_smoke/runner.py
new file mode 100644
index 00000000..16c20989
--- /dev/null
+++ b/scripts/graphiti_temporal_smoke/runner.py
@@ -0,0 +1,151 @@
+"""CLI runner for the Graphiti/Zep temporal smoke."""
+
+from __future__ import annotations
+
+import time
+from pathlib import Path
+from typing import Any
+
+from .fixture import write_fixture
+from .manifest import write_manifest
+from .materialization import write_materialization
+from .summary import write_summary
+from .benchmark import run_scored_report
+from .common import command_available, mkdirs
+from .context import ALLOW_HOST, MANIFEST_OUT, OUT, RUN_LIVE, SUMMARY_OUT
+from .corpus import temporal_facts
+from .mapping import map_observed_facts
+from .models import CommandRecord, StatusState
+from .runtime import init_graphiti, run_graphiti, wait_for_falkordb
+
+def main() -> int:
+    """Run the smoke and always emit typed artifacts when possible."""
+
+    started_at = time.monotonic()
+    mkdirs()
+    status = StatusState()
+    command_records: list[CommandRecord] = []
+    facts = temporal_facts()
+    inserted: list[dict[str, Any]] = []
+    search_results: list[dict[str, Any]] = []
+    mapping: dict[str, Any] = {
+        "status": "blocked",
+        "reason": status.failure_reason,
+        "expected_evidence_ids": [fact["evidence_id"] for fact in facts],
+        "mapped_evidence_ids": [],
+        "facts": [
+            {
+                "evidence_id": fact["evidence_id"],
+                "claim_id": fact["claim_id"],
+                "status": "blocked",
+                "expected_valid_at": fact["valid_at"],
+                "expected_invalid_at": fact["invalid_at"],
+                "current": fact["current"],
+            }
+            for fact in facts
+        ],
+    }
+
+    if not Path("/.dockerenv").exists() and not ALLOW_HOST:
+        status.setup = "incomplete"
+        status.result = "incomplete"
+        status.overall = "incomplete"
+        status.failure_class = "not_running_in_docker"
+        status.failure_reason = "Graphiti/Zep smoke must run inside Docker; use cargo make smoke-graphiti-zep-docker-temporal."
+        mapping["status"] = status.result
+        mapping["reason"] = status.failure_reason
+    elif not command_available("python3"):
+        status.setup = "incomplete"
+        status.result = "incomplete"
+        status.overall = "incomplete"
+        status.failure_class = "python_missing"
+        status.failure_reason = "python3 is required for the Graphiti/Zep smoke runner."
+        mapping["status"] = status.result
+        mapping["reason"] = status.failure_reason
+    elif not RUN_LIVE:
+        pass
+    elif not API_KEY:
+        status.setup = "blocked"
+        status.run = "not_encoded"
+        status.result = "blocked"
+        status.overall = "blocked"
+        status.failure_class = "provider_api_key_missing"
+        status.failure_reason = "Graphiti/Zep live temporal search requires an explicit provider API key; no hosted Zep service or unrecorded provider credentials were used."
+        mapping["reason"] = status.failure_reason
+    elif not wait_for_falkordb(command_records):
+        status.setup = "incomplete"
+        status.run = "not_encoded"
+        status.result = "incomplete"
+        status.overall = "incomplete"
+        status.failure_class = "falkordb_unreachable"
+        status.failure_reason = "Docker-local FalkorDB did not become reachable for the Graphiti/Zep smoke."
+        mapping["status"] = status.result
+        mapping["reason"] = status.failure_reason
+    else:
+        installed, python = init_graphiti(command_records)
+        if not installed:
+            status.setup = "incomplete"
+            status.run = "not_encoded"
+            status.result = "incomplete"
+            status.overall = "incomplete"
+            status.failure_class = "graphiti_setup_failed"
+            status.failure_reason = "Graphiti installation failed inside the Docker runner."
+            mapping["status"] = status.result
+            mapping["reason"] = status.failure_reason
+        else:
+            status.setup = "pass"
+            inserted, search_results = run_graphiti(python, command_records)
+
+            if not search_results:
+                status.run = "incomplete"
+                status.result = "incomplete"
+                status.overall = "incomplete"
+                status.failure_class = "graphiti_temporal_search_failed"
+                status.failure_reason = "Graphiti/Zep did not return temporal search results for the generated fact corpus."
+                mapping["status"] = status.result
+                mapping["reason"] = status.failure_reason
+            else:
+                status.run = "pass"
+                status.evidence_class = "live_real_world"
+                mapping = map_observed_facts(search_results, facts)
+                if mapping["status"] == "pass":
+                    status.result = "pass"
+                    status.overall = "pass"
+                    status.failure_class = ""
+                    status.failure_reason = ""
+                else:
+                    status.result = "wrong_result"
+                    status.overall = "wrong_result"
+                    status.failure_class = "graphiti_temporal_mapping_failed"
+                    status.failure_reason = mapping["reason"]
+
+    fixture_path = write_fixture(facts, status, mapping)
+    materialization = write_materialization(
+        status,
+        facts,
+        fixture_path,
+        command_records,
+        inserted,
+        search_results,
+        mapping,
+        started_at,
+    )
+    manifest = write_manifest(status)
+    report = run_scored_report(fixture_path, MANIFEST_OUT, status)
+    materialization = write_materialization(
+        status,
+        facts,
+        fixture_path,
+        command_records,
+        inserted,
+        search_results,
+        mapping,
+        started_at,
+        report,
+    )
+    write_summary(materialization, manifest, report)
+    print(f"Graphiti/Zep smoke artifact: {OUT}")
+    print(f"Graphiti/Zep smoke manifest: {MANIFEST_OUT}")
+    print(f"Graphiti/Zep smoke summary: {SUMMARY_OUT}")
+
+    return 0
diff --git a/scripts/graphiti_temporal_smoke/runtime.py b/scripts/graphiti_temporal_smoke/runtime.py
new file mode 100644
index 00000000..ffe6dbab
--- /dev/null
+++ b/scripts/graphiti_temporal_smoke/runtime.py
@@ -0,0 +1,231 @@
+"""Runtime setup and live Graphiti execution."""
+
+from __future__ import annotations
+
+import json
+import socket
+import sys
+import textwrap
+import time
+from pathlib import Path
+from typing import Any
+
+from .common import run_command, write_json
+from .context import *  # noqa: F403
+from .corpus import temporal_facts
+from .models import CommandRecord
+
+def wait_for_falkordb(command_records: list[CommandRecord]) -> bool:
+    """Poll the configured FalkorDB TCP endpoint."""
+
+    started = time.monotonic()
+    attempts: list[dict[str, Any]] = []
+
+    for attempt in range(1, STARTUP_ATTEMPTS + 1):
+        try:
+            with socket.create_connection((FALKORDB_HOST, FALKORDB_PORT), timeout=2):
+                elapsed_ms = (time.monotonic() - started) * 1000
+                attempts.append({"attempt": attempt, "status": "pass", "elapsed_ms": round(elapsed_ms, 3)})
+                path = LOG_DIR / "falkordb-startup-attempts.json"
+                write_json(path, attempts)
+                command_records.append(
+                    CommandRecord(
+                        label="falkordb-startup",
+                        command=["tcp-connect", FALKORDB_HOST, str(FALKORDB_PORT)],
+                        status="pass",
+                        elapsed_ms=elapsed_ms,
+                        stdout_artifact=rel(path),
+                        stderr_artifact=None,
+                        returncode=0,
+                        reason="FalkorDB TCP endpoint accepted a connection.",
+                    )
+                )
+                return True
+        except OSError as err:
+            attempts.append({"attempt": attempt, "status": "incomplete", "reason": str(err)})
+            time.sleep(STARTUP_INTERVAL_SECONDS)
+
+    elapsed_ms = (time.monotonic() - started) * 1000
+    path = LOG_DIR / "falkordb-startup-attempts.json"
+    write_json(path, attempts)
+    command_records.append(
+        CommandRecord(
+            label="falkordb-startup",
+            command=["tcp-connect", FALKORDB_HOST, str(FALKORDB_PORT)],
+            status="incomplete",
+            elapsed_ms=elapsed_ms,
+            stdout_artifact=rel(path),
+            stderr_artifact=None,
+            returncode=None,
+            reason="FalkorDB TCP endpoint did not become reachable.",
+        )
+    )
+    return False
+
+def init_graphiti(command_records: list[CommandRecord]) -> tuple[bool, Path]:
+    """Create a venv and install Graphiti with FalkorDB support."""
+
+    venv_dir = WORK_DIR / ".venv"
+    python = venv_dir / "bin" / "python"
+
+    if INSTALL_GRAPHITI:
+        venv_record = run_command("python-venv", [sys.executable, "-m", "venv", str(venv_dir)], WORK_DIR)
+        command_records.append(venv_record)
+        if venv_record.status != "pass":
+            return False, python
+
+        install_record = run_command(
+            "graphiti-install",
+            [str(python), "-m", "pip", "install", "--disable-pip-version-check", GRAPHITI_PACKAGE],
+            WORK_DIR,
+        )
+        command_records.append(install_record)
+        if install_record.status != "pass":
+            return False, python
+    elif not python.exists():
+        command_records.append(
+            CommandRecord(
+                label="graphiti-install",
+                command=["graphiti-core"],
+                status="incomplete",
+                elapsed_ms=0.0,
+                stdout_artifact=None,
+                stderr_artifact=None,
+                returncode=None,
+                reason="Graphiti install was disabled and no venv python exists.",
+            )
+        )
+        return False, python
+
+    return True, python
+
+def write_live_runner(path: Path) -> None:
+    """Write the isolated Graphiti execution script."""
+
+    payload = {
+        "run_id": RUN_ID,
+        "facts": temporal_facts(),
+        "query": "Who currently owns deployment method review, and who owned it historically?",
+        "falkordb": {
+            "host": FALKORDB_HOST,
+            "port": FALKORDB_PORT,
+            "database": FALKORDB_DATABASE,
+        },
+        "models": {
+            "llm": LLM_MODEL,
+            "embedding": EMBEDDING_MODEL,
+            "api_base": API_BASE,
+        },
+    }
+    input_path = WORK_DIR / "graphiti-live-input.json"
+    output_path = WORK_DIR / "graphiti-live-output.json"
+    write_json(input_path, payload)
+    script = f"""
+import asyncio
+import json
+import os
+import uuid
+from datetime import datetime
+from pathlib import Path
+
+from graphiti_core import Graphiti
+from graphiti_core.driver.falkordb_driver import FalkorDriver
+from graphiti_core.edges import EntityEdge
+from graphiti_core.nodes import EntityNode
+
+
+INPUT = Path({str(input_path)!r})
+OUTPUT = Path({str(output_path)!r})
+
+
+def parse_dt(value):
+    if value is None:
+        return None
+    return datetime.fromisoformat(value.replace("Z", "+00:00"))
+
+
+async def main():
+    data = json.loads(INPUT.read_text(encoding="utf-8"))
+    config = data["falkordb"]
+    driver = FalkorDriver(
+        host=config["host"],
+        port=config["port"],
+        username=os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_USERNAME") or None,
+        password=os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_PASSWORD") or None,
+        database=config.get("database") or "default_db",
+    )
+    graphiti = Graphiti(graph_driver=driver)
+    try:
+        await graphiti.build_indices_and_constraints()
+        inserted = []
+        for fact in data["facts"]:
+            group_id = data["run_id"]
+            source_uuid = str(uuid.uuid5(uuid.NAMESPACE_URL, group_id + ":source:" + fact["source"]))
+            target_uuid = str(uuid.uuid5(uuid.NAMESPACE_URL, group_id + ":target:" + fact["target"]))
+            edge_uuid = str(uuid.uuid5(uuid.NAMESPACE_URL, group_id + ":edge:" + fact["evidence_id"]))
+            source_node = EntityNode(uuid=source_uuid, name=fact["source"], group_id=group_id)
+            target_node = EntityNode(uuid=target_uuid, name=fact["target"], group_id=group_id)
+            edge = EntityEdge(
+                uuid=edge_uuid,
+                group_id=group_id,
+                source_node_uuid=source_uuid,
+                target_node_uuid=target_uuid,
+                created_at=parse_dt(fact["created_at"]),
+                name=fact["edge_name"],
+                fact=fact["fact"],
+                valid_at=parse_dt(fact["valid_at"]),
+                invalid_at=parse_dt(fact.get("invalid_at")),
+            )
+            await graphiti.add_triplet(source_node, edge, target_node)
+            inserted.append({{"evidence_id": fact["evidence_id"], "uuid": edge_uuid}})
+
+        results = await graphiti.search(data["query"])
+        serialized = []
+        for edge in results:
+            serialized.append({{
+                "uuid": getattr(edge, "uuid", None),
+                "name": getattr(edge, "name", None),
+                "fact": getattr(edge, "fact", None),
+                "valid_at": str(getattr(edge, "valid_at", "")) if getattr(edge, "valid_at", None) else None,
+                "invalid_at": str(getattr(edge, "invalid_at", "")) if getattr(edge, "invalid_at", None) else None,
+                "source_node_uuid": getattr(edge, "source_node_uuid", None),
+                "target_node_uuid": getattr(edge, "target_node_uuid", None),
+            }})
+
+        OUTPUT.write_text(json.dumps({{"inserted": inserted, "results": serialized}}, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
+    finally:
+        await graphiti.close()
+
+
+asyncio.run(main())
+"""
+    path.write_text(textwrap.dedent(script).lstrip(), encoding="utf-8")
+
+def run_graphiti(python: Path, command_records: list[CommandRecord]) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
+    """Run the Graphiti live worker and return inserted/search result facts."""
+
+    runner = WORK_DIR / "graphiti_live_runner.py"
+    write_live_runner(runner)
+    env = {
+        "OPENAI_API_KEY": API_KEY,
+        "MODEL_NAME": LLM_MODEL,
+        "LLM_MODEL": LLM_MODEL,
+        "EMBEDDING_MODEL": EMBEDDING_MODEL,
+    }
+
+    if API_BASE:
+        env["OPENAI_BASE_URL"] = API_BASE
+    if FALKORDB_USERNAME:
+        env["ELF_GRAPHITI_ZEP_FALKORDB_USERNAME"] = FALKORDB_USERNAME
+    if FALKORDB_PASSWORD:
+        env["ELF_GRAPHITI_ZEP_FALKORDB_PASSWORD"] = FALKORDB_PASSWORD
+
+    record = run_command("graphiti-live-run", [str(python), str(runner)], WORK_DIR, extra_env=env)
+    command_records.append(record)
+
+    output_path = WORK_DIR / "graphiti-live-output.json"
+    if record.status != "pass" or not output_path.exists():
+        return [], []
+
+    payload = json.loads(output_path.read_text(encoding="utf-8"))
+    return payload.get("inserted", []), payload.get("results", [])
diff --git a/scripts/graphiti_temporal_smoke/summary.py b/scripts/graphiti_temporal_smoke/summary.py
new file mode 100644
index 00000000..a2b0187b
--- /dev/null
+++ b/scripts/graphiti_temporal_smoke/summary.py
@@ -0,0 +1,35 @@
+"""Summary writer for the Graphiti/Zep smoke."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from .common import rel, utc_now, write_json
+from .context import MANIFEST_OUT, SUMMARY_OUT
+
+def write_summary(materialization: dict[str, Any], manifest: dict[str, Any], report: dict[str, Any]) -> None:
+    """Write a small summary artifact."""
+
+    write_json(
+        SUMMARY_OUT,
+        {
+            "schema": "elf.graphiti_zep_temporal_smoke_summary/v1",
+            "generated_at": utc_now(),
+            "adapter_id": "graphiti_zep_temporal_smoke",
+            "evidence_class": materialization["evidence_class"],
+            "status_boundary": {
+                "materialization": "setup/run/evidence-mapping state emitted by the smoke runner",
+                "manifest": "external adapter declaration consumed by the scorer",
+                "scored_benchmark": "post-score real_world_job outcome; use this for quality status",
+            },
+            "scored_benchmark": materialization["scored_benchmark"],
+            "materialization": materialization,
+            "manifest": {
+                "json": rel(MANIFEST_OUT),
+                "status_source": "external_adapter_manifest_pre_score",
+                "summary": manifest["adapters"][0]["overall_status"],
+                "suites": manifest["adapters"][0]["suites"],
+            },
+            "report": report,
+        },
+    )
diff --git a/scripts/letta-core-archive-export-readback-smoke.py b/scripts/letta-core-archive-export-readback-smoke.py
index ee31ffdc..fb7f4e85 100755
--- a/scripts/letta-core-archive-export-readback-smoke.py
+++ b/scripts/letta-core-archive-export-readback-smoke.py
@@ -3,1059 +3,7 @@
 
 from __future__ import annotations
 
-import json
-import os
-import shutil
-import subprocess
-import sys
-import time
-import urllib.error
-import urllib.request
-from dataclasses import dataclass
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any
-
-
-SCRIPT_DIR = Path(__file__).resolve().parent
-ROOT_DIR = SCRIPT_DIR.parent
-CORE_FIXTURE_DIR = ROOT_DIR / "apps" / "elf-eval" / "fixtures" / "real_world_memory" / "core_archival_memory"
-REPORT_DIR = Path(
-    os.environ.get(
-        "ELF_LETTA_SMOKE_REPORT_DIR",
-        ROOT_DIR / "tmp" / "real-world-memory" / "letta-core-archive",
-    )
-)
-WORK_DIR = Path(os.environ.get("ELF_LETTA_SMOKE_WORK_DIR", REPORT_DIR / "work"))
-OUT = Path(os.environ.get("ELF_LETTA_SMOKE_OUT", REPORT_DIR / "letta-core-archive-export.json"))
-MANIFEST_OUT = Path(
-    os.environ.get(
-        "ELF_LETTA_SMOKE_MANIFEST_OUT",
-        REPORT_DIR / "memory_projects_manifest.letta-core-archive.json",
-    )
-)
-SUMMARY_OUT = Path(os.environ.get("ELF_LETTA_SMOKE_SUMMARY_OUT", REPORT_DIR / "summary.json"))
-REPORT_JSON = Path(os.environ.get("ELF_LETTA_SMOKE_REPORT_JSON", REPORT_DIR / "report.json"))
-REPORT_MD = Path(os.environ.get("ELF_LETTA_SMOKE_REPORT_MD", REPORT_DIR / "report.md"))
-FIXTURE_DIR = REPORT_DIR / "letta-fixtures"
-LOG_DIR = REPORT_DIR / "logs"
-
-RUN_ID = os.environ.get(
-    "ELF_LETTA_SMOKE_RUN_ID",
-    f"letta-core-archive-{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}",
-)
-RUN_LIVE = os.environ.get("ELF_LETTA_SMOKE_RUN", "0") == "1"
-ALLOW_HOST = os.environ.get("ELF_LETTA_SMOKE_ALLOW_HOST", "0") == "1"
-INSTALL_CLIENT = os.environ.get("ELF_LETTA_SMOKE_INSTALL_CLIENT", "1") == "1"
-LETTA_BASE_URL = os.environ.get("ELF_LETTA_BASE_URL", "http://letta:8283")
-LETTA_CLIENT_PACKAGE = os.environ.get("ELF_LETTA_CLIENT_PACKAGE", "letta-client")
-LETTA_CLIENT_REF = os.environ.get("ELF_LETTA_CLIENT_REF", f"pypi:{LETTA_CLIENT_PACKAGE}")
-LETTA_MODEL = os.environ.get("ELF_LETTA_MODEL", "openai/gpt-4o-mini")
-LETTA_EMBEDDING = os.environ.get("ELF_LETTA_EMBEDDING", "openai/text-embedding-3-small")
-TIMEOUT_SECONDS = int(os.environ.get("ELF_LETTA_TIMEOUT_SECONDS", "600"))
-STARTUP_ATTEMPTS = int(os.environ.get("ELF_LETTA_STARTUP_ATTEMPTS", "30"))
-STARTUP_INTERVAL_SECONDS = float(os.environ.get("ELF_LETTA_STARTUP_INTERVAL_SECONDS", "2"))
-
-CORE_KINDS = {"core_block", "core_block_contract", "core_block_event"}
-
-
-@dataclass
-class StatusState:
-    """Typed status for generated Letta smoke artifacts."""
-
-    setup: str = "blocked"
-    run: str = "not_encoded"
-    result: str = "blocked"
-    overall: str = "blocked"
-    evidence_class: str = "research_gate"
-    failure_class: str = "letta_live_run_disabled"
-    failure_reason: str = (
-        "Letta live export/readback is disabled by default; run "
-        "ELF_LETTA_SMOKE_START=1 ELF_LETTA_SMOKE_RUN=1 cargo make "
-        "smoke-letta-core-archive-export-readback with explicit Docker/provider configuration."
-    )
-
-
-@dataclass
-class CommandRecord:
-    """Captured command result without secret-bearing environment values."""
-
-    label: str
-    command: list[str]
-    status: str
-    elapsed_ms: float
-    stdout_artifact: str | None
-    stderr_artifact: str | None
-    returncode: int | None
-    reason: str
-
-
-def utc_now() -> str:
-    """Return an RFC3339 UTC timestamp."""
-
-    return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
-
-
-def rel(path: Path) -> str:
-    """Return a repository-relative path when possible."""
-
-    try:
-        return str(path.resolve().relative_to(ROOT_DIR))
-    except ValueError:
-        return str(path)
-
-
-def mkdirs() -> None:
-    """Create and reset output directories owned by this smoke."""
-
-    for path in (FIXTURE_DIR, LOG_DIR):
-        if path.exists():
-            shutil.rmtree(path)
-
-    for path in (REPORT_DIR, WORK_DIR, FIXTURE_DIR, LOG_DIR):
-        path.mkdir(parents=True, exist_ok=True)
-
-    for path in (OUT, MANIFEST_OUT, SUMMARY_OUT, REPORT_JSON, REPORT_MD):
-        if path.exists():
-            path.unlink()
-
-
-def write_json(path: Path, payload: Any) -> None:
-    """Write stable, pretty JSON."""
-
-    path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
-
-
-def command_available(name: str) -> bool:
-    """Return whether a command is available."""
-
-    return shutil.which(name) is not None
-
-
-def run_command(
-    label: str,
-    command: list[str],
-    cwd: Path,
-    *,
-    extra_env: dict[str, str] | None = None,
-) -> CommandRecord:
-    """Run a command and capture stdout/stderr artifacts."""
-
-    started = time.monotonic()
-    env = os.environ.copy()
-    if extra_env:
-        env.update(extra_env)
-
-    try:
-        result = subprocess.run(
-            command,
-            cwd=cwd,
-            env=env,
-            text=True,
-            capture_output=True,
-            timeout=TIMEOUT_SECONDS,
-            check=False,
-        )
-        elapsed = (time.monotonic() - started) * 1000
-        stdout_path = LOG_DIR / f"{label}.stdout.txt"
-        stderr_path = LOG_DIR / f"{label}.stderr.txt"
-        stdout_path.write_text(result.stdout, encoding="utf-8")
-        stderr_path.write_text(result.stderr, encoding="utf-8")
-        status = "pass" if result.returncode == 0 else "incomplete"
-        reason = "command completed" if result.returncode == 0 else f"exit code {result.returncode}"
-
-        return CommandRecord(
-            label=label,
-            command=command,
-            status=status,
-            elapsed_ms=elapsed,
-            stdout_artifact=rel(stdout_path),
-            stderr_artifact=rel(stderr_path),
-            returncode=result.returncode,
-            reason=reason,
-        )
-    except subprocess.TimeoutExpired as exc:
-        elapsed = (time.monotonic() - started) * 1000
-        stdout_path = LOG_DIR / f"{label}.stdout.txt"
-        stderr_path = LOG_DIR / f"{label}.stderr.txt"
-        stdout_path.write_text(exc.stdout or "", encoding="utf-8")
-        stderr_path.write_text(exc.stderr or "", encoding="utf-8")
-
-        return CommandRecord(
-            label=label,
-            command=command,
-            status="incomplete",
-            elapsed_ms=elapsed,
-            stdout_artifact=rel(stdout_path),
-            stderr_artifact=rel(stderr_path),
-            returncode=None,
-            reason=f"timed out after {TIMEOUT_SECONDS}s",
-        )
-
-
-def command_to_json(record: CommandRecord) -> dict[str, Any]:
-    """Serialize a command record."""
-
-    return {
-        "label": record.label,
-        "command": record.command,
-        "status": record.status,
-        "elapsed_ms": round(record.elapsed_ms, 3),
-        "stdout_artifact": record.stdout_artifact,
-        "stderr_artifact": record.stderr_artifact,
-        "returncode": record.returncode,
-        "reason": record.reason,
-    }
-
-
-def load_source_fixtures() -> list[dict[str, Any]]:
-    """Load the checked-in core_archival_memory fixture corpus."""
-
-    fixtures = []
-    for path in sorted(CORE_FIXTURE_DIR.glob("*.json")):
-        payload = json.loads(path.read_text(encoding="utf-8"))
-        payload["_source_path"] = rel(path)
-        fixtures.append(payload)
-
-    return fixtures
-
-
-def evidence_ids_for_fixture(fixture: dict[str, Any]) -> list[str]:
-    """Return required evidence ids for one fixture."""
-
-    return [
-        item["evidence_id"]
-        for item in fixture.get("required_evidence", [])
-        if isinstance(item, dict) and item.get("evidence_id")
-    ]
-
-
-def all_required_evidence_ids(fixtures: list[dict[str, Any]]) -> list[str]:
-    """Return de-duplicated required evidence ids."""
-
-    ids: list[str] = []
-    for fixture in fixtures:
-        for evidence_id in evidence_ids_for_fixture(fixture):
-            if evidence_id not in ids:
-                ids.append(evidence_id)
-
-    return ids
-
-
-def source_items(fixtures: list[dict[str, Any]]) -> list[dict[str, Any]]:
-    """Flatten fixture corpus items with job metadata."""
-
-    items = []
-    for fixture in fixtures:
-        for item in fixture.get("corpus", {}).get("items", []):
-            item_copy = dict(item)
-            item_copy["job_id"] = fixture["job_id"]
-            item_copy["fixture_source"] = fixture["_source_path"]
-            items.append(item_copy)
-
-    return items
-
-
-def benchmark_input_contract(fixtures: list[dict[str, Any]]) -> dict[str, Any]:
-    """Return the benchmark-owned Letta input contract."""
-
-    core_blocks = []
-    archival_passages = []
-    for item in source_items(fixtures):
-        record = {
-            "source_id": item["evidence_id"],
-            "job_id": item["job_id"],
-            "kind": item.get("kind"),
-            "text": item.get("text", ""),
-            "fixture_source": item["fixture_source"],
-        }
-        if item.get("kind") in CORE_KINDS:
-            core_blocks.append(
-                {
-                    "label": slug(item["evidence_id"])[:48],
-                    "value": f"Source ID: {item['evidence_id']}\n{item.get('text', '')}",
-                    **record,
-                }
-            )
-        elif item.get("kind") not in {"stale_claim", "unsupported_claim"}:
-            archival_passages.append(
-                {
-                    "text": f"Source ID: {item['evidence_id']}\n{item.get('text', '')}",
-                    **record,
-                }
-            )
-
-    return {
-        "core_blocks": core_blocks,
-        "archival_passages": archival_passages,
-        "source_id_count": len({item["evidence_id"] for item in source_items(fixtures)}),
-        "required_evidence_ids": all_required_evidence_ids(fixtures),
-    }
-
-
-def slug(value: str) -> str:
-    """Return a small ASCII slug."""
-
-    out: list[str] = []
-    last_dash = False
-
-    for char in value.lower():
-        if char.isascii() and char.isalnum():
-            out.append(char)
-            last_dash = False
-        elif not last_dash and out:
-            out.append("-")
-            last_dash = True
-
-    while out and out[-1] == "-":
-        out.pop()
-
-    return "".join(out) or "item"
-
-
-def wait_for_letta(command_records: list[CommandRecord]) -> bool:
-    """Wait for a Letta server endpoint to become reachable."""
-
-    started = time.monotonic()
-    probes = ["/v1/health", "/health", "/v1/models"]
-    last_reason = "not attempted"
-    for _ in range(STARTUP_ATTEMPTS):
-        for path in probes:
-            url = LETTA_BASE_URL.rstrip("/") + path
-            try:
-                with urllib.request.urlopen(url, timeout=5) as response:
-                    if 200 <= response.status < 500:
-                        command_records.append(
-                            CommandRecord(
-                                label="letta-health-probe",
-                                command=["GET", url],
-                                status="pass",
-                                elapsed_ms=(time.monotonic() - started) * 1000,
-                                stdout_artifact=None,
-                                stderr_artifact=None,
-                                returncode=0,
-                                reason=f"reachable via {path}",
-                            )
-                        )
-                        return True
-            except (urllib.error.URLError, TimeoutError, OSError) as exc:
-                last_reason = str(exc)
-
-        time.sleep(STARTUP_INTERVAL_SECONDS)
-
-    command_records.append(
-        CommandRecord(
-            label="letta-health-probe",
-            command=["GET", LETTA_BASE_URL.rstrip() + "/v1/health"],
-            status="incomplete",
-            elapsed_ms=(time.monotonic() - started) * 1000,
-            stdout_artifact=None,
-            stderr_artifact=None,
-            returncode=None,
-            reason=last_reason,
-        )
-    )
-    return False
-
-
-def init_letta_client(command_records: list[CommandRecord]) -> bool:
-    """Install or verify the Letta Python client."""
-
-    if INSTALL_CLIENT:
-        record = run_command(
-            "letta-client-install",
-            [sys.executable, "-m", "pip", "install", LETTA_CLIENT_PACKAGE],
-            WORK_DIR,
-        )
-        command_records.append(record)
-        if record.status != "pass":
-            return False
-
-    record = run_command("letta-client-import", [sys.executable, "-c", "import letta_client"], WORK_DIR)
-    command_records.append(record)
-
-    return record.status == "pass"
-
-
-def write_live_runner(fixtures: list[dict[str, Any]]) -> Path:
-    """Write a small Python runner that uses the current Letta SDK."""
-
-    contract = benchmark_input_contract(fixtures)
-    input_path = WORK_DIR / "letta-live-input.json"
-    write_json(input_path, contract)
-
-    runner = WORK_DIR / "letta_live_runner.py"
-    runner.write_text(
-        """
-import json
-import os
-from pathlib import Path
-
-from letta_client import Letta
-
-
-def as_dict(value):
-    if hasattr(value, "model_dump"):
-        return value.model_dump(mode="json")
-    if hasattr(value, "dict"):
-        return value.dict()
-    return json.loads(json.dumps(value, default=str))
-
-
-input_path = Path(os.environ["ELF_LETTA_LIVE_INPUT"])
-output_path = Path(os.environ["ELF_LETTA_LIVE_OUTPUT"])
-data = json.loads(input_path.read_text())
-
-client = Letta(base_url=os.environ["ELF_LETTA_BASE_URL"])
-agent = client.agents.create(
-    name=os.environ.get("ELF_LETTA_AGENT_NAME", "elf-core-archive-smoke"),
-    model=os.environ["ELF_LETTA_MODEL"],
-    embedding=os.environ["ELF_LETTA_EMBEDDING"],
-    memory_blocks=[
-        {"label": item["label"], "value": item["value"]}
-        for item in data["core_blocks"]
-    ],
-)
-
-created_passages = []
-for passage in data["archival_passages"]:
-    created_passages.append(
-        as_dict(client.agents.passages.create(agent_id=agent.id, text=passage["text"]))
-    )
-
-core_block_export = []
-for item in data["core_blocks"]:
-    core_block_export.append(
-        {
-            "source_id": item["source_id"],
-            "label": item["label"],
-            "block": as_dict(
-                client.agents.blocks.retrieve(agent_id=agent.id, block_label=item["label"])
-            ),
-        }
-    )
-
-listed_passages = as_dict(client.agents.passages.list(agent_id=agent.id))
-search_results = []
-for source_id in data["required_evidence_ids"]:
-    search_results.append(
-        {
-            "query": source_id,
-            "response": as_dict(
-                client.agents.passages.search(agent_id=agent.id, query=source_id, top_k=5)
-            ),
-        }
-    )
-
-output_path.write_text(
-    json.dumps(
-        {
-            "agent": as_dict(agent),
-            "core_block_export": core_block_export,
-            "created_passages": created_passages,
-            "archival_readback": listed_passages,
-            "archival_search": search_results,
-        },
-        indent=2,
-        sort_keys=True,
-    )
-    + "\\n"
-)
-""".lstrip(),
-        encoding="utf-8",
-    )
-
-    return runner
-
-
-def run_letta(fixtures: list[dict[str, Any]], command_records: list[CommandRecord]) -> dict[str, Any] | None:
-    """Create the Letta benchmark agent and export readback/search data."""
-
-    runner = write_live_runner(fixtures)
-    output_path = WORK_DIR / "letta-live-output.json"
-    env = {
-        "ELF_LETTA_BASE_URL": LETTA_BASE_URL,
-        "ELF_LETTA_MODEL": LETTA_MODEL,
-        "ELF_LETTA_EMBEDDING": LETTA_EMBEDDING,
-        "ELF_LETTA_LIVE_INPUT": str(WORK_DIR / "letta-live-input.json"),
-        "ELF_LETTA_LIVE_OUTPUT": str(output_path),
-        "ELF_LETTA_AGENT_NAME": f"elf-core-archive-smoke-{RUN_ID}",
-    }
-    record = run_command("letta-live-export-readback", [sys.executable, str(runner)], WORK_DIR, extra_env=env)
-    command_records.append(record)
-    if record.status != "pass" or not output_path.exists():
-        return None
-
-    return json.loads(output_path.read_text(encoding="utf-8"))
-
-
-def ids_in_payload(payload: Any, evidence_ids: list[str]) -> list[str]:
-    """Return evidence ids present anywhere in a JSON-compatible payload."""
-
-    haystack = json.dumps(payload, sort_keys=True, default=str)
-    return [evidence_id for evidence_id in evidence_ids if evidence_id in haystack]
-
-
-def evidence_mapping(
-    fixtures: list[dict[str, Any]],
-    live_export: dict[str, Any] | None,
-    status: StatusState,
-) -> dict[str, Any]:
-    """Map observed Letta export/readback data to fixture source ids."""
-
-    required_ids = all_required_evidence_ids(fixtures)
-    if live_export is None:
-        mapped_ids: list[str] = []
-    else:
-        mapped_ids = ids_in_payload(live_export, required_ids)
-
-    missing_ids = [evidence_id for evidence_id in required_ids if evidence_id not in mapped_ids]
-    jobs = []
-    for fixture in fixtures:
-        expected = evidence_ids_for_fixture(fixture)
-        mapped = [evidence_id for evidence_id in expected if evidence_id in mapped_ids]
-        if status.result in {"blocked", "incomplete", "not_encoded"}:
-            job_status = status.result
-            reason = status.failure_reason
-        elif len(mapped) == len(expected):
-            job_status = "pass"
-            reason = "Letta core block export and archival readback/search mapped all required source ids."
-        else:
-            job_status = "wrong_result"
-            missing = [evidence_id for evidence_id in expected if evidence_id not in mapped]
-            reason = f"Letta export/readback missed required evidence ids: {', '.join(missing)}."
-
-        jobs.append(
-            {
-                "job_id": fixture["job_id"],
-                "source_fixture": fixture["_source_path"],
-                "expected_evidence_ids": expected,
-                "mapped_evidence_ids": mapped,
-                "missing_evidence_ids": [evidence_id for evidence_id in expected if evidence_id not in mapped],
-                "status": job_status,
-                "reason": reason,
-            }
-        )
-
-    return {
-        "status": status.result if missing_ids or live_export is None else "pass",
-        "reason": status.failure_reason
-        if live_export is None
-        else (
-            "Letta export/readback mapped all required fixture source ids."
-            if not missing_ids
-            else f"Letta export/readback missed required evidence ids: {', '.join(missing_ids)}."
-        ),
-        "expected_evidence_ids": required_ids,
-        "mapped_evidence_ids": mapped_ids,
-        "missing_evidence_ids": missing_ids,
-        "jobs": jobs,
-    }
-
-
-def write_fixture_outputs(
-    fixtures: list[dict[str, Any]],
-    status: StatusState,
-    mapping: dict[str, Any],
-) -> Path:
-    """Write generated Letta real_world_job fixtures."""
-
-    for fixture in fixtures:
-        generated = json.loads(json.dumps({k: v for k, v in fixture.items() if k != "_source_path"}))
-        generated["corpus"]["profile"] = "external_adapter"
-        generated["corpus"]["corpus_id"] = "letta-core-archive-export-readback-2026-06-19"
-        job_mapping = next(item for item in mapping["jobs"] if item["job_id"] == fixture["job_id"])
-        source_answer = fixture.get("corpus", {}).get("adapter_response", {}).get("answer", {})
-        generated["corpus"]["adapter_response"] = {
-            "adapter_id": "letta_core_archive_export_readback",
-            "answer": {
-                "content": source_answer.get("content", ""),
-                "claims": source_answer.get("claims", []),
-                "evidence_ids": evidence_ids_for_fixture(fixture),
-                "latency_ms": 0.0,
-                "cost": {
-                    "currency": "USD",
-                    "amount": 0.0,
-                    "input_tokens": 0,
-                    "output_tokens": 0,
-                },
-            },
-        }
-        generated["tags"] = sorted(set(generated.get("tags", []) + ["external_adapter", "letta_export_readback"]))
-        generated["encoding"] = {}
-        if job_mapping["status"] in {"blocked", "incomplete", "not_encoded"}:
-            generated["encoding"] = {
-                "status": job_mapping["status"],
-                "reason": job_mapping["reason"],
-                "follow_up": {
-                    "title": "Produce Letta core/archive export-readback evidence",
-                    "reason": (
-                        "The benchmark must export Letta core block JSON, archival readback/search JSON, "
-                        "and fixture source ids before this scenario can be scored as pass or wrong_result."
-                    ),
-                },
-            }
-
-        if job_mapping["status"] == "wrong_result":
-            generated["corpus"]["adapter_response"]["answer"]["evidence_ids"] = job_mapping[
-                "mapped_evidence_ids"
-            ]
-
-        fixture_path = FIXTURE_DIR / "core_archival_memory" / Path(fixture["_source_path"]).name
-        write_json(fixture_path, generated)
-
-    return FIXTURE_DIR / "core_archival_memory"
-
-
-def run_scored_report(fixture_path: Path, manifest_path: Path, status: StatusState) -> dict[str, Any]:
-    """Score the generated Letta fixtures through the real-world job runner."""
-
-    run_cmd = [
-        "cargo",
-        "run",
-        "-p",
-        "elf-eval",
-        "--bin",
-        "real_world_job_benchmark",
-        "--",
-        "run",
-        "--fixtures",
-        str(fixture_path),
-        "--out",
-        str(REPORT_JSON),
-        "--run-id",
-        "real-world-memory-live-letta-core-archive",
-        "--adapter-id",
-        "letta_core_archive_export_readback",
-        "--adapter-name",
-        "Letta core/archive export-readback adapter",
-        "--adapter-behavior",
-        "docker_core_archive_export_readback",
-        "--adapter-storage-status",
-        status.setup,
-        "--adapter-runtime-status",
-        status.overall,
-        "--adapter-notes",
-        "Generated by the Letta core/archive export-readback smoke; pass requires exported core block JSON, archival readback/search JSON, and mapped fixture source ids.",
-        "--external-adapter-manifest",
-        str(manifest_path),
-    ]
-    publish_cmd = [
-        "cargo",
-        "run",
-        "-p",
-        "elf-eval",
-        "--bin",
-        "real_world_job_benchmark",
-        "--",
-        "publish",
-        "--report",
-        str(REPORT_JSON),
-        "--out",
-        str(REPORT_MD),
-    ]
-
-    subprocess.run(run_cmd, cwd=ROOT_DIR, check=True)
-    subprocess.run(publish_cmd, cwd=ROOT_DIR, check=True)
-
-    report = json.loads(REPORT_JSON.read_text(encoding="utf-8"))
-    return {
-        "json": rel(REPORT_JSON),
-        "markdown": rel(REPORT_MD),
-        "summary": report.get("summary", {}),
-        "suites": report.get("suites", []),
-    }
-
-
-def scored_benchmark(report: dict[str, Any] | None) -> dict[str, Any]:
-    """Extract the post-score benchmark status from a real_world_job report."""
-
-    if report is None:
-        return {
-            "schema": "elf.scored_benchmark_status/v1",
-            "source": "real_world_job_benchmark",
-            "status": "pending",
-            "reason": "The Letta smoke materialization was written before benchmark scoring completed.",
-        }
-
-    summary = report.get("summary", {})
-    counts = {
-        status: int(summary.get(status, 0) or 0)
-        for status in ("pass", "wrong_result", "lifecycle_fail", "incomplete", "blocked", "not_encoded")
-    }
-    status = next((name for name, count in counts.items() if name != "pass" and count > 0), "pass")
-
-    return {
-        "schema": "elf.scored_benchmark_status/v1",
-        "source": "real_world_job_benchmark",
-        "status": status,
-        "counts": counts,
-        "job_count": int(summary.get("job_count", 0) or 0),
-        "mean_score": summary.get("mean_score"),
-        "evidence_coverage": summary.get("evidence_coverage"),
-    }
-
-
-def write_materialization(
-    status: StatusState,
-    fixtures: list[dict[str, Any]],
-    fixture_path: Path,
-    command_records: list[CommandRecord],
-    live_export: dict[str, Any] | None,
-    mapping: dict[str, Any],
-    started_at: float,
-    report: dict[str, Any] | None = None,
-) -> dict[str, Any]:
-    """Write the primary Letta materialization artifact."""
-
-    elapsed_ms = (time.monotonic() - started_at) * 1000
-    payload = {
-        "schema": "elf.letta_core_archive_export_readback/v1",
-        "generated_at": utc_now(),
-        "run_id": RUN_ID,
-        "adapter_id": "letta_core_archive_export_readback",
-        "project": "Letta",
-        "evidence_class": status.evidence_class,
-        "status": {
-            "source": "smoke_materialization",
-            "setup": status.setup,
-            "run": status.run,
-            "result": status.result,
-            "overall": status.overall,
-            "failure_class": status.failure_class,
-            "failure_reason": status.failure_reason,
-        },
-        "scored_benchmark": scored_benchmark(report),
-        "artifacts": {
-            "materialization": rel(OUT),
-            "manifest": rel(MANIFEST_OUT),
-            "summary": rel(SUMMARY_OUT),
-            "generated_fixture_dir": rel(fixture_path),
-            "scored_report_json": rel(REPORT_JSON),
-            "scored_report_markdown": rel(REPORT_MD),
-            "live_output": rel(WORK_DIR / "letta-live-output.json")
-            if (WORK_DIR / "letta-live-output.json").exists()
-            else None,
-        },
-        "docker_boundary": {
-            "compose_file": "docker-compose.baseline.yml",
-            "service_profile": "letta",
-            "runner_service": "baseline-runner",
-            "runner": "scripts/letta-core-archive-export-readback-smoke.py",
-            "host_global_installs_required": False,
-            "docker_only": True,
-            "host_global_letta_state_used": False,
-            "hosted_letta_state_used": False,
-        },
-        "provider_configuration": {
-            "base_url": LETTA_BASE_URL,
-            "client_package": LETTA_CLIENT_REF,
-            "model": LETTA_MODEL,
-            "embedding": LETTA_EMBEDDING,
-            "live_run_enabled": RUN_LIVE,
-            "operator_owned_provider_credentials_used": False,
-        },
-        "benchmark_input": benchmark_input_contract(fixtures),
-        "letta_export": {
-            "core_block_json": live_export.get("core_block_export", []) if live_export else [],
-            "archival_readback_json": live_export.get("archival_readback") if live_export else None,
-            "archival_search_json": live_export.get("archival_search", []) if live_export else [],
-            "agent": live_export.get("agent") if live_export else None,
-            "status": "exported" if live_export else status.result,
-        },
-        "resource_bounds": {
-            "source_fixture_count": len(fixtures),
-            "core_block_count": len(benchmark_input_contract(fixtures)["core_blocks"]),
-            "archival_passage_count": len(benchmark_input_contract(fixtures)["archival_passages"]),
-            "timeout_seconds": TIMEOUT_SECONDS,
-            "elapsed_ms": round(elapsed_ms, 3),
-        },
-        "commands": [command_to_json(record) for record in command_records],
-        "evidence_mapping": mapping,
-        "improvement_regression_readback": {
-            "baseline": "XY-955 left Letta core/archive comparison blocked because no contained export/readback artifact existed.",
-            "current": (
-                "unchanged: the benchmark now has a Docker-contained materialization command and typed report, "
-                "but the default run still preserves Letta comparison as blocked until live export/search data maps source ids."
-            )
-            if status.result != "pass"
-            else "improved: Letta export/readback mapped all required core/archive source ids.",
-            "judgment": "improved" if status.result == "pass" else "unchanged",
-        },
-        "claim_boundaries": {
-            "allowed": [
-                "The Letta comparison now has a reproducible Docker-contained materialization/report command.",
-                "The current default report may preserve typed blockers when live Letta/provider setup cannot produce export/readback evidence.",
-            ],
-            "not_allowed": [
-                "Do not claim ELF beats Letta on core-vs-archival memory from fixture-only ELF evidence.",
-                "Do not score Letta pass, win, tie, or loss unless exported core block JSON, archival readback/search JSON, and fixture source ids are present.",
-            ],
-        },
-    }
-    write_json(OUT, payload)
-
-    return payload
-
-
-def write_manifest(status: StatusState) -> dict[str, Any]:
-    """Write a generated external adapter manifest for this smoke."""
-
-    manifest = {
-        "schema": "elf.real_world_external_adapter_manifest/v1",
-        "manifest_id": f"letta-core-archive-export-readback-{RUN_ID}",
-        "docker_isolation": {
-            "default": True,
-            "compose_file": "docker-compose.baseline.yml",
-            "runner": "scripts/letta-core-archive-export-readback-smoke.py",
-            "artifact_dir": "tmp/real-world-memory/letta-core-archive",
-            "host_global_installs_required": False,
-            "notes": [
-                f"Generated by the Letta core/archive export-readback smoke at {utc_now()}.",
-                "The smoke uses checked-in core_archival_memory fixtures and records typed setup/runtime failures.",
-            ],
-        },
-        "adapters": [
-            {
-                "adapter_id": "letta_core_archive_export_readback",
-                "project": "Letta",
-                "adapter_kind": "docker_core_archive_export_readback",
-                "evidence_class": status.evidence_class,
-                "docker_default": True,
-                "host_global_installs_required": False,
-                "overall_status": status.overall,
-                "setup": {
-                    "status": status.setup,
-                    "evidence": "The smoke runs inside the baseline Docker runner and can use a Docker-profile Letta server with explicit model and embedding configuration.",
-                    "command": "cargo make smoke-letta-core-archive-export-readback",
-                    "artifact": rel(OUT),
-                },
-                "run": {
-                    "status": status.run,
-                    "evidence": "The live path creates a benchmark-owned Letta agent, imports fixture source ids into core blocks and archival passages, then exports block/readback/search JSON.",
-                    "command": "ELF_LETTA_SMOKE_START=1 ELF_LETTA_SMOKE_RUN=1 cargo make smoke-letta-core-archive-export-readback",
-                    "artifact": rel(OUT),
-                },
-                "result": {
-                    "status": status.result,
-                    "evidence": status.failure_reason
-                    if status.failure_reason
-                    else "Letta core block export, archival readback, and archival search mapped required fixture source ids.",
-                    "artifact": rel(OUT),
-                },
-                "capabilities": [
-                    {
-                        "capability": "docker_letta_server_boundary",
-                        "status": status.setup,
-                        "evidence": "The runner uses docker-compose.baseline.yml and avoids host-global Letta state or hosted/private agents.",
-                    },
-                    {
-                        "capability": "core_block_export",
-                        "status": status.run,
-                        "evidence": "Live scoring requires retrieving Letta memory blocks with fixture source ids embedded in block values.",
-                    },
-                    {
-                        "capability": "archival_readback_search_export",
-                        "status": status.result,
-                        "evidence": "Live scoring requires archival passage list/search JSON to map required source ids.",
-                    },
-                    {
-                        "capability": "broad_letta_quality_claim",
-                        "status": "not_encoded",
-                        "evidence": "The smoke does not claim broad Letta product quality, private corpus behavior, or hosted-service parity.",
-                    },
-                ],
-                "suites": [
-                    {
-                        "suite_id": "core_archival_memory",
-                        "status": status.result,
-                        "evidence": "Only the six checked-in core_archival_memory scenarios are represented.",
-                    },
-                    {
-                        "suite_id": "personalization",
-                        "status": "not_encoded",
-                        "evidence": "Scoped preference behavior is outside this core/archive export smoke.",
-                    },
-                    {
-                        "suite_id": "project_decisions",
-                        "status": status.result,
-                        "evidence": "Project-decision recovery is scored only through the core_archival_memory fixture that requires core routing plus archival rationale source ids.",
-                    },
-                    {
-                        "suite_id": "work_resume",
-                        "status": "not_encoded",
-                        "evidence": "Agent resumption across sessions is not encoded by this export/readback smoke.",
-                    },
-                ],
-                "evidence": [
-                    {"kind": "artifact", "ref": rel(OUT), "status": status.result},
-                    {"kind": "manifest", "ref": rel(MANIFEST_OUT), "status": status.overall},
-                    {"kind": "source", "ref": "https://docs.letta.com/guides/docker", "status": "real"},
-                    {"kind": "source", "ref": "https://docs.letta.com/api/python", "status": "real"},
-                    {
-                        "kind": "source",
-                        "ref": "https://docs.letta.com/api/resources/agents/subresources/passages/methods/search",
-                        "status": "real",
-                    },
-                ],
-                "execution_metadata": {
-                    "sources": [
-                        {
-                            "label": "Letta Docker docs",
-                            "url": "https://docs.letta.com/guides/docker",
-                            "evidence": "Official Docker setup and explicit embedding configuration boundary.",
-                        },
-                        {
-                            "label": "Letta Python API",
-                            "url": "https://docs.letta.com/api/python",
-                            "evidence": "Official Python SDK memory block creation and retrieval examples.",
-                        },
-                        {
-                            "label": "Letta archival search API",
-                            "url": "https://docs.letta.com/api/resources/agents/subresources/passages/methods/search",
-                            "evidence": "Official archival-memory search endpoint contract.",
-                        },
-                    ],
-                    "setup_path": "Run cargo make smoke-letta-core-archive-export-readback for a typed artifact; set ELF_LETTA_SMOKE_START=1 ELF_LETTA_SMOKE_RUN=1 with explicit model/provider configuration for a live export attempt.",
-                    "runtime_boundary": "docker-compose.baseline.yml baseline-runner plus optional Letta server profile, benchmark-created agent, benchmark-owned fixture corpus, and artifacts under tmp/real-world-memory/letta-core-archive.",
-                    "resource_expectation": f"Letta client {LETTA_CLIENT_REF}, model={LETTA_MODEL}, embedding={LETTA_EMBEDDING}, source fixture count=6, timeout_seconds={TIMEOUT_SECONDS}.",
-                    "retry_guidance": [
-                        "Default command records a typed blocked artifact without model calls.",
-                        "Enable the live path only with a Docker-local Letta server and explicit provider or local model configuration.",
-                        "Score only when core block export and archival list/search output map to required fixture source ids.",
-                    ],
-                    "research_depth": "XY-984 materialization contract; generated artifact decides live evidence class.",
-                },
-                "notes": [
-                    "Failure before Letta export/readback remains typed as blocked or incomplete.",
-                    "The smoke does not use hosted/private Letta state or operator-owned data.",
-                ],
-            }
-        ],
-    }
-    write_json(MANIFEST_OUT, manifest)
-
-    return manifest
-
-
-def write_summary(materialization: dict[str, Any], manifest: dict[str, Any], report: dict[str, Any]) -> None:
-    """Write a small summary artifact."""
-
-    write_json(
-        SUMMARY_OUT,
-        {
-            "schema": "elf.letta_core_archive_export_readback_summary/v1",
-            "generated_at": utc_now(),
-            "adapter_id": "letta_core_archive_export_readback",
-            "evidence_class": materialization["evidence_class"],
-            "status_boundary": {
-                "materialization": "setup/run/evidence-mapping state emitted by the smoke runner",
-                "manifest": "external adapter declaration consumed by the scorer",
-                "scored_benchmark": "post-score real_world_job outcome; use this for quality status",
-            },
-            "scored_benchmark": materialization["scored_benchmark"],
-            "materialization": materialization,
-            "manifest": {
-                "json": rel(MANIFEST_OUT),
-                "status_source": "external_adapter_manifest_score_aligned",
-                "summary": manifest["adapters"][0]["overall_status"],
-                "suites": manifest["adapters"][0]["suites"],
-            },
-            "report": report,
-        },
-    )
-
-
-def main() -> int:
-    """Run the smoke and always emit typed artifacts when possible."""
-
-    started_at = time.monotonic()
-    mkdirs()
-    status = StatusState()
-    command_records: list[CommandRecord] = []
-    fixtures = load_source_fixtures()
-    live_export: dict[str, Any] | None = None
-
-    if not Path("/.dockerenv").exists() and not ALLOW_HOST:
-        status.setup = "incomplete"
-        status.result = "incomplete"
-        status.overall = "incomplete"
-        status.failure_class = "not_running_in_docker"
-        status.failure_reason = "Letta smoke must run inside Docker; use cargo make smoke-letta-core-archive-export-readback."
-    elif not command_available("python3"):
-        status.setup = "incomplete"
-        status.result = "incomplete"
-        status.overall = "incomplete"
-        status.failure_class = "python_missing"
-        status.failure_reason = "python3 is required for the Letta smoke runner."
-    elif not RUN_LIVE:
-        pass
-    elif not wait_for_letta(command_records):
-        status.setup = "incomplete"
-        status.result = "incomplete"
-        status.overall = "incomplete"
-        status.failure_class = "letta_server_unreachable"
-        status.failure_reason = "Docker-local Letta server did not become reachable for export/readback."
-    elif not init_letta_client(command_records):
-        status.setup = "incomplete"
-        status.result = "incomplete"
-        status.overall = "incomplete"
-        status.failure_class = "letta_client_setup_failed"
-        status.failure_reason = "Letta Python client installation or import failed inside the Docker runner."
-    else:
-        status.setup = "pass"
-        live_export = run_letta(fixtures, command_records)
-        if live_export is None:
-            status.run = "incomplete"
-            status.result = "incomplete"
-            status.overall = "incomplete"
-            status.failure_class = "letta_export_readback_failed"
-            status.failure_reason = "Letta did not produce core block export plus archival readback/search output."
-        else:
-            status.run = "pass"
-            status.evidence_class = "live_real_world"
-            mapping = evidence_mapping(fixtures, live_export, status)
-            if not mapping["missing_evidence_ids"]:
-                status.result = "pass"
-                status.overall = "pass"
-                status.failure_class = ""
-                status.failure_reason = ""
-            else:
-                status.result = "wrong_result"
-                status.overall = "wrong_result"
-                status.failure_class = "letta_source_id_mapping_failed"
-                status.failure_reason = mapping["reason"]
-
-    mapping = evidence_mapping(fixtures, live_export, status)
-    fixture_path = write_fixture_outputs(fixtures, status, mapping)
-    write_materialization(
-        status,
-        fixtures,
-        fixture_path,
-        command_records,
-        live_export,
-        mapping,
-        started_at,
-    )
-    manifest = write_manifest(status)
-    report = run_scored_report(fixture_path, MANIFEST_OUT, status)
-    materialization = write_materialization(
-        status,
-        fixtures,
-        fixture_path,
-        command_records,
-        live_export,
-        mapping,
-        started_at,
-        report,
-    )
-    write_summary(materialization, manifest, report)
-    print(f"Letta core/archive artifact: {OUT}")
-    print(f"Letta core/archive manifest: {MANIFEST_OUT}")
-    print(f"Letta core/archive summary: {SUMMARY_OUT}")
-
-    return 0
+from letta_core_archive_smoke.runner import main
 
 
 if __name__ == "__main__":
diff --git a/scripts/letta_core_archive_smoke/__init__.py b/scripts/letta_core_archive_smoke/__init__.py
new file mode 100644
index 00000000..9d9617aa
--- /dev/null
+++ b/scripts/letta_core_archive_smoke/__init__.py
@@ -0,0 +1 @@
+"""Letta core/archive export-readback smoke modules."""
diff --git a/scripts/letta_core_archive_smoke/artifacts.py b/scripts/letta_core_archive_smoke/artifacts.py
new file mode 100644
index 00000000..e4e12276
--- /dev/null
+++ b/scripts/letta_core_archive_smoke/artifacts.py
@@ -0,0 +1,280 @@
+"""Artifact writers for the Letta core/archive smoke."""
+
+from __future__ import annotations
+
+import time
+from pathlib import Path
+from typing import Any
+
+from .benchmark import scored_benchmark
+from .common import command_to_json, rel, utc_now, write_json
+from .context import *  # noqa: F403
+from .fixtures import benchmark_input_contract
+from .models import CommandRecord, StatusState
+
+def write_materialization(
+    status: StatusState,
+    fixtures: list[dict[str, Any]],
+    fixture_path: Path,
+    command_records: list[CommandRecord],
+    live_export: dict[str, Any] | None,
+    mapping: dict[str, Any],
+    started_at: float,
+    report: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    """Write the primary Letta materialization artifact."""
+
+    elapsed_ms = (time.monotonic() - started_at) * 1000
+    payload = {
+        "schema": "elf.letta_core_archive_export_readback/v1",
+        "generated_at": utc_now(),
+        "run_id": RUN_ID,
+        "adapter_id": "letta_core_archive_export_readback",
+        "project": "Letta",
+        "evidence_class": status.evidence_class,
+        "status": {
+            "source": "smoke_materialization",
+            "setup": status.setup,
+            "run": status.run,
+            "result": status.result,
+            "overall": status.overall,
+            "failure_class": status.failure_class,
+            "failure_reason": status.failure_reason,
+        },
+        "scored_benchmark": scored_benchmark(report),
+        "artifacts": {
+            "materialization": rel(OUT),
+            "manifest": rel(MANIFEST_OUT),
+            "summary": rel(SUMMARY_OUT),
+            "generated_fixture_dir": rel(fixture_path),
+            "scored_report_json": rel(REPORT_JSON),
+            "scored_report_markdown": rel(REPORT_MD),
+            "live_output": rel(WORK_DIR / "letta-live-output.json")
+            if (WORK_DIR / "letta-live-output.json").exists()
+            else None,
+        },
+        "docker_boundary": {
+            "compose_file": "docker-compose.baseline.yml",
+            "service_profile": "letta",
+            "runner_service": "baseline-runner",
+            "runner": "scripts/letta-core-archive-export-readback-smoke.py",
+            "host_global_installs_required": False,
+            "docker_only": True,
+            "host_global_letta_state_used": False,
+            "hosted_letta_state_used": False,
+        },
+        "provider_configuration": {
+            "base_url": LETTA_BASE_URL,
+            "client_package": LETTA_CLIENT_REF,
+            "model": LETTA_MODEL,
+            "embedding": LETTA_EMBEDDING,
+            "live_run_enabled": RUN_LIVE,
+            "operator_owned_provider_credentials_used": False,
+        },
+        "benchmark_input": benchmark_input_contract(fixtures),
+        "letta_export": {
+            "core_block_json": live_export.get("core_block_export", []) if live_export else [],
+            "archival_readback_json": live_export.get("archival_readback") if live_export else None,
+            "archival_search_json": live_export.get("archival_search", []) if live_export else [],
+            "agent": live_export.get("agent") if live_export else None,
+            "status": "exported" if live_export else status.result,
+        },
+        "resource_bounds": {
+            "source_fixture_count": len(fixtures),
+            "core_block_count": len(benchmark_input_contract(fixtures)["core_blocks"]),
+            "archival_passage_count": len(benchmark_input_contract(fixtures)["archival_passages"]),
+            "timeout_seconds": TIMEOUT_SECONDS,
+            "elapsed_ms": round(elapsed_ms, 3),
+        },
+        "commands": [command_to_json(record) for record in command_records],
+        "evidence_mapping": mapping,
+        "improvement_regression_readback": {
+            "baseline": "XY-955 left Letta core/archive comparison blocked because no contained export/readback artifact existed.",
+            "current": (
+                "unchanged: the benchmark now has a Docker-contained materialization command and typed report, "
+                "but the default run still preserves Letta comparison as blocked until live export/search data maps source ids."
+            )
+            if status.result != "pass"
+            else "improved: Letta export/readback mapped all required core/archive source ids.",
+            "judgment": "improved" if status.result == "pass" else "unchanged",
+        },
+        "claim_boundaries": {
+            "allowed": [
+                "The Letta comparison now has a reproducible Docker-contained materialization/report command.",
+                "The current default report may preserve typed blockers when live Letta/provider setup cannot produce export/readback evidence.",
+            ],
+            "not_allowed": [
+                "Do not claim ELF beats Letta on core-vs-archival memory from fixture-only ELF evidence.",
+                "Do not score Letta pass, win, tie, or loss unless exported core block JSON, archival readback/search JSON, and fixture source ids are present.",
+            ],
+        },
+    }
+    write_json(OUT, payload)
+
+    return payload
+
+def write_manifest(status: StatusState) -> dict[str, Any]:
+    """Write a generated external adapter manifest for this smoke."""
+
+    manifest = {
+        "schema": "elf.real_world_external_adapter_manifest/v1",
+        "manifest_id": f"letta-core-archive-export-readback-{RUN_ID}",
+        "docker_isolation": {
+            "default": True,
+            "compose_file": "docker-compose.baseline.yml",
+            "runner": "scripts/letta-core-archive-export-readback-smoke.py",
+            "artifact_dir": "tmp/real-world-memory/letta-core-archive",
+            "host_global_installs_required": False,
+            "notes": [
+                f"Generated by the Letta core/archive export-readback smoke at {utc_now()}.",
+                "The smoke uses checked-in core_archival_memory fixtures and records typed setup/runtime failures.",
+            ],
+        },
+        "adapters": [
+            {
+                "adapter_id": "letta_core_archive_export_readback",
+                "project": "Letta",
+                "adapter_kind": "docker_core_archive_export_readback",
+                "evidence_class": status.evidence_class,
+                "docker_default": True,
+                "host_global_installs_required": False,
+                "overall_status": status.overall,
+                "setup": {
+                    "status": status.setup,
+                    "evidence": "The smoke runs inside the baseline Docker runner and can use a Docker-profile Letta server with explicit model and embedding configuration.",
+                    "command": "cargo make smoke-letta-core-archive-export-readback",
+                    "artifact": rel(OUT),
+                },
+                "run": {
+                    "status": status.run,
+                    "evidence": "The live path creates a benchmark-owned Letta agent, imports fixture source ids into core blocks and archival passages, then exports block/readback/search JSON.",
+                    "command": "ELF_LETTA_SMOKE_START=1 ELF_LETTA_SMOKE_RUN=1 cargo make smoke-letta-core-archive-export-readback",
+                    "artifact": rel(OUT),
+                },
+                "result": {
+                    "status": status.result,
+                    "evidence": status.failure_reason
+                    if status.failure_reason
+                    else "Letta core block export, archival readback, and archival search mapped required fixture source ids.",
+                    "artifact": rel(OUT),
+                },
+                "capabilities": [
+                    {
+                        "capability": "docker_letta_server_boundary",
+                        "status": status.setup,
+                        "evidence": "The runner uses docker-compose.baseline.yml and avoids host-global Letta state or hosted/private agents.",
+                    },
+                    {
+                        "capability": "core_block_export",
+                        "status": status.run,
+                        "evidence": "Live scoring requires retrieving Letta memory blocks with fixture source ids embedded in block values.",
+                    },
+                    {
+                        "capability": "archival_readback_search_export",
+                        "status": status.result,
+                        "evidence": "Live scoring requires archival passage list/search JSON to map required source ids.",
+                    },
+                    {
+                        "capability": "broad_letta_quality_claim",
+                        "status": "not_encoded",
+                        "evidence": "The smoke does not claim broad Letta product quality, private corpus behavior, or hosted-service parity.",
+                    },
+                ],
+                "suites": [
+                    {
+                        "suite_id": "core_archival_memory",
+                        "status": status.result,
+                        "evidence": "Only the six checked-in core_archival_memory scenarios are represented.",
+                    },
+                    {
+                        "suite_id": "personalization",
+                        "status": "not_encoded",
+                        "evidence": "Scoped preference behavior is outside this core/archive export smoke.",
+                    },
+                    {
+                        "suite_id": "project_decisions",
+                        "status": status.result,
+                        "evidence": "Project-decision recovery is scored only through the core_archival_memory fixture that requires core routing plus archival rationale source ids.",
+                    },
+                    {
+                        "suite_id": "work_resume",
+                        "status": "not_encoded",
+                        "evidence": "Agent resumption across sessions is not encoded by this export/readback smoke.",
+                    },
+                ],
+                "evidence": [
+                    {"kind": "artifact", "ref": rel(OUT), "status": status.result},
+                    {"kind": "manifest", "ref": rel(MANIFEST_OUT), "status": status.overall},
+                    {"kind": "source", "ref": "https://docs.letta.com/guides/docker", "status": "real"},
+                    {"kind": "source", "ref": "https://docs.letta.com/api/python", "status": "real"},
+                    {
+                        "kind": "source",
+                        "ref": "https://docs.letta.com/api/resources/agents/subresources/passages/methods/search",
+                        "status": "real",
+                    },
+                ],
+                "execution_metadata": {
+                    "sources": [
+                        {
+                            "label": "Letta Docker docs",
+                            "url": "https://docs.letta.com/guides/docker",
+                            "evidence": "Official Docker setup and explicit embedding configuration boundary.",
+                        },
+                        {
+                            "label": "Letta Python API",
+                            "url": "https://docs.letta.com/api/python",
+                            "evidence": "Official Python SDK memory block creation and retrieval examples.",
+                        },
+                        {
+                            "label": "Letta archival search API",
+                            "url": "https://docs.letta.com/api/resources/agents/subresources/passages/methods/search",
+                            "evidence": "Official archival-memory search endpoint contract.",
+                        },
+                    ],
+                    "setup_path": "Run cargo make smoke-letta-core-archive-export-readback for a typed artifact; set ELF_LETTA_SMOKE_START=1 ELF_LETTA_SMOKE_RUN=1 with explicit model/provider configuration for a live export attempt.",
+                    "runtime_boundary": "docker-compose.baseline.yml baseline-runner plus optional Letta server profile, benchmark-created agent, benchmark-owned fixture corpus, and artifacts under tmp/real-world-memory/letta-core-archive.",
+                    "resource_expectation": f"Letta client {LETTA_CLIENT_REF}, model={LETTA_MODEL}, embedding={LETTA_EMBEDDING}, source fixture count=6, timeout_seconds={TIMEOUT_SECONDS}.",
+                    "retry_guidance": [
+                        "Default command records a typed blocked artifact without model calls.",
+                        "Enable the live path only with a Docker-local Letta server and explicit provider or local model configuration.",
+                        "Score only when core block export and archival list/search output map to required fixture source ids.",
+                    ],
+                    "research_depth": "XY-984 materialization contract; generated artifact decides live evidence class.",
+                },
+                "notes": [
+                    "Failure before Letta export/readback remains typed as blocked or incomplete.",
+                    "The smoke does not use hosted/private Letta state or operator-owned data.",
+                ],
+            }
+        ],
+    }
+    write_json(MANIFEST_OUT, manifest)
+
+    return manifest
+
+def write_summary(materialization: dict[str, Any], manifest: dict[str, Any], report: dict[str, Any]) -> None:
+    """Write a small summary artifact."""
+
+    write_json(
+        SUMMARY_OUT,
+        {
+            "schema": "elf.letta_core_archive_export_readback_summary/v1",
+            "generated_at": utc_now(),
+            "adapter_id": "letta_core_archive_export_readback",
+            "evidence_class": materialization["evidence_class"],
+            "status_boundary": {
+                "materialization": "setup/run/evidence-mapping state emitted by the smoke runner",
+                "manifest": "external adapter declaration consumed by the scorer",
+                "scored_benchmark": "post-score real_world_job outcome; use this for quality status",
+            },
+            "scored_benchmark": materialization["scored_benchmark"],
+            "materialization": materialization,
+            "manifest": {
+                "json": rel(MANIFEST_OUT),
+                "status_source": "external_adapter_manifest_score_aligned",
+                "summary": manifest["adapters"][0]["overall_status"],
+                "suites": manifest["adapters"][0]["suites"],
+            },
+            "report": report,
+        },
+    )
diff --git a/scripts/letta_core_archive_smoke/benchmark.py b/scripts/letta_core_archive_smoke/benchmark.py
new file mode 100644
index 00000000..a3c1d099
--- /dev/null
+++ b/scripts/letta_core_archive_smoke/benchmark.py
@@ -0,0 +1,99 @@
+"""Scoring helpers for the Letta core/archive smoke."""
+
+from __future__ import annotations
+
+import json
+import subprocess
+from pathlib import Path
+from typing import Any
+
+from .common import rel
+from .context import REPORT_JSON, REPORT_MD, ROOT_DIR
+from .models import StatusState
+
+def run_scored_report(fixture_path: Path, manifest_path: Path, status: StatusState) -> dict[str, Any]:
+    """Score the generated Letta fixtures through the real-world job runner."""
+
+    run_cmd = [
+        "cargo",
+        "run",
+        "-p",
+        "elf-eval",
+        "--bin",
+        "real_world_job_benchmark",
+        "--",
+        "run",
+        "--fixtures",
+        str(fixture_path),
+        "--out",
+        str(REPORT_JSON),
+        "--run-id",
+        "real-world-memory-live-letta-core-archive",
+        "--adapter-id",
+        "letta_core_archive_export_readback",
+        "--adapter-name",
+        "Letta core/archive export-readback adapter",
+        "--adapter-behavior",
+        "docker_core_archive_export_readback",
+        "--adapter-storage-status",
+        status.setup,
+        "--adapter-runtime-status",
+        status.overall,
+        "--adapter-notes",
+        "Generated by the Letta core/archive export-readback smoke; pass requires exported core block JSON, archival readback/search JSON, and mapped fixture source ids.",
+        "--external-adapter-manifest",
+        str(manifest_path),
+    ]
+    publish_cmd = [
+        "cargo",
+        "run",
+        "-p",
+        "elf-eval",
+        "--bin",
+        "real_world_job_benchmark",
+        "--",
+        "publish",
+        "--report",
+        str(REPORT_JSON),
+        "--out",
+        str(REPORT_MD),
+    ]
+
+    subprocess.run(run_cmd, cwd=ROOT_DIR, check=True)
+    subprocess.run(publish_cmd, cwd=ROOT_DIR, check=True)
+
+    report = json.loads(REPORT_JSON.read_text(encoding="utf-8"))
+    return {
+        "json": rel(REPORT_JSON),
+        "markdown": rel(REPORT_MD),
+        "summary": report.get("summary", {}),
+        "suites": report.get("suites", []),
+    }
+
+def scored_benchmark(report: dict[str, Any] | None) -> dict[str, Any]:
+    """Extract the post-score benchmark status from a real_world_job report."""
+
+    if report is None:
+        return {
+            "schema": "elf.scored_benchmark_status/v1",
+            "source": "real_world_job_benchmark",
+            "status": "pending",
+            "reason": "The Letta smoke materialization was written before benchmark scoring completed.",
+        }
+
+    summary = report.get("summary", {})
+    counts = {
+        status: int(summary.get(status, 0) or 0)
+        for status in ("pass", "wrong_result", "lifecycle_fail", "incomplete", "blocked", "not_encoded")
+    }
+    status = next((name for name, count in counts.items() if name != "pass" and count > 0), "pass")
+
+    return {
+        "schema": "elf.scored_benchmark_status/v1",
+        "source": "real_world_job_benchmark",
+        "status": status,
+        "counts": counts,
+        "job_count": int(summary.get("job_count", 0) or 0),
+        "mean_score": summary.get("mean_score"),
+        "evidence_coverage": summary.get("evidence_coverage"),
+    }
diff --git a/scripts/letta_core_archive_smoke/common.py b/scripts/letta_core_archive_smoke/common.py
new file mode 100644
index 00000000..b16a90fa
--- /dev/null
+++ b/scripts/letta_core_archive_smoke/common.py
@@ -0,0 +1,127 @@
+"""Shared filesystem and process helpers for the Letta smoke."""
+
+from __future__ import annotations
+
+import json
+import os
+import shutil
+import subprocess
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+from .context import FIXTURE_DIR, LOG_DIR, MANIFEST_OUT, OUT, REPORT_DIR, REPORT_JSON, REPORT_MD, ROOT_DIR, SUMMARY_OUT, TIMEOUT_SECONDS, WORK_DIR
+from .models import CommandRecord
+
+def utc_now() -> str:
+    """Return an RFC3339 UTC timestamp."""
+
+    return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
+
+def rel(path: Path) -> str:
+    """Return a repository-relative path when possible."""
+
+    try:
+        return str(path.resolve().relative_to(ROOT_DIR))
+    except ValueError:
+        return str(path)
+
+def mkdirs() -> None:
+    """Create and reset output directories owned by this smoke."""
+
+    for path in (FIXTURE_DIR, LOG_DIR):
+        if path.exists():
+            shutil.rmtree(path)
+
+    for path in (REPORT_DIR, WORK_DIR, FIXTURE_DIR, LOG_DIR):
+        path.mkdir(parents=True, exist_ok=True)
+
+    for path in (OUT, MANIFEST_OUT, SUMMARY_OUT, REPORT_JSON, REPORT_MD):
+        if path.exists():
+            path.unlink()
+
+def write_json(path: Path, payload: Any) -> None:
+    """Write stable, pretty JSON."""
+
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+
+def command_available(name: str) -> bool:
+    """Return whether a command is available."""
+
+    return shutil.which(name) is not None
+
+def run_command(
+    label: str,
+    command: list[str],
+    cwd: Path,
+    *,
+    extra_env: dict[str, str] | None = None,
+) -> CommandRecord:
+    """Run a command and capture stdout/stderr artifacts."""
+
+    started = time.monotonic()
+    env = os.environ.copy()
+    if extra_env:
+        env.update(extra_env)
+
+    try:
+        result = subprocess.run(
+            command,
+            cwd=cwd,
+            env=env,
+            text=True,
+            capture_output=True,
+            timeout=TIMEOUT_SECONDS,
+            check=False,
+        )
+        elapsed = (time.monotonic() - started) * 1000
+        stdout_path = LOG_DIR / f"{label}.stdout.txt"
+        stderr_path = LOG_DIR / f"{label}.stderr.txt"
+        stdout_path.write_text(result.stdout, encoding="utf-8")
+        stderr_path.write_text(result.stderr, encoding="utf-8")
+        status = "pass" if result.returncode == 0 else "incomplete"
+        reason = "command completed" if result.returncode == 0 else f"exit code {result.returncode}"
+
+        return CommandRecord(
+            label=label,
+            command=command,
+            status=status,
+            elapsed_ms=elapsed,
+            stdout_artifact=rel(stdout_path),
+            stderr_artifact=rel(stderr_path),
+            returncode=result.returncode,
+            reason=reason,
+        )
+    except subprocess.TimeoutExpired as exc:
+        elapsed = (time.monotonic() - started) * 1000
+        stdout_path = LOG_DIR / f"{label}.stdout.txt"
+        stderr_path = LOG_DIR / f"{label}.stderr.txt"
+        stdout_path.write_text(exc.stdout or "", encoding="utf-8")
+        stderr_path.write_text(exc.stderr or "", encoding="utf-8")
+
+        return CommandRecord(
+            label=label,
+            command=command,
+            status="incomplete",
+            elapsed_ms=elapsed,
+            stdout_artifact=rel(stdout_path),
+            stderr_artifact=rel(stderr_path),
+            returncode=None,
+            reason=f"timed out after {TIMEOUT_SECONDS}s",
+        )
+
+def command_to_json(record: CommandRecord) -> dict[str, Any]:
+    """Serialize a command record."""
+
+    return {
+        "label": record.label,
+        "command": record.command,
+        "status": record.status,
+        "elapsed_ms": round(record.elapsed_ms, 3),
+        "stdout_artifact": record.stdout_artifact,
+        "stderr_artifact": record.stderr_artifact,
+        "returncode": record.returncode,
+        "reason": record.reason,
+    }
diff --git a/scripts/letta_core_archive_smoke/context.py b/scripts/letta_core_archive_smoke/context.py
new file mode 100644
index 00000000..813eab1f
--- /dev/null
+++ b/scripts/letta_core_archive_smoke/context.py
@@ -0,0 +1,52 @@
+"""Configuration for the Letta core/archive smoke."""
+
+from __future__ import annotations
+
+import os
+from datetime import datetime, timezone
+from pathlib import Path
+
+from typing import Any
+
+
+SCRIPT_DIR = Path(__file__).resolve().parent.parent
+ROOT_DIR = SCRIPT_DIR.parent
+CORE_FIXTURE_DIR = ROOT_DIR / "apps" / "elf-eval" / "fixtures" / "real_world_memory" / "core_archival_memory"
+REPORT_DIR = Path(
+    os.environ.get(
+        "ELF_LETTA_SMOKE_REPORT_DIR",
+        ROOT_DIR / "tmp" / "real-world-memory" / "letta-core-archive",
+    )
+)
+WORK_DIR = Path(os.environ.get("ELF_LETTA_SMOKE_WORK_DIR", REPORT_DIR / "work"))
+OUT = Path(os.environ.get("ELF_LETTA_SMOKE_OUT", REPORT_DIR / "letta-core-archive-export.json"))
+MANIFEST_OUT = Path(
+    os.environ.get(
+        "ELF_LETTA_SMOKE_MANIFEST_OUT",
+        REPORT_DIR / "memory_projects_manifest.letta-core-archive.json",
+    )
+)
+SUMMARY_OUT = Path(os.environ.get("ELF_LETTA_SMOKE_SUMMARY_OUT", REPORT_DIR / "summary.json"))
+REPORT_JSON = Path(os.environ.get("ELF_LETTA_SMOKE_REPORT_JSON", REPORT_DIR / "report.json"))
+REPORT_MD = Path(os.environ.get("ELF_LETTA_SMOKE_REPORT_MD", REPORT_DIR / "report.md"))
+FIXTURE_DIR = REPORT_DIR / "letta-fixtures"
+LOG_DIR = REPORT_DIR / "logs"
+
+RUN_ID = os.environ.get(
+    "ELF_LETTA_SMOKE_RUN_ID",
+    f"letta-core-archive-{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}",
+)
+RUN_LIVE = os.environ.get("ELF_LETTA_SMOKE_RUN", "0") == "1"
+ALLOW_HOST = os.environ.get("ELF_LETTA_SMOKE_ALLOW_HOST", "0") == "1"
+INSTALL_CLIENT = os.environ.get("ELF_LETTA_SMOKE_INSTALL_CLIENT", "1") == "1"
+LETTA_BASE_URL = os.environ.get("ELF_LETTA_BASE_URL", "http://letta:8283")
+LETTA_CLIENT_PACKAGE = os.environ.get("ELF_LETTA_CLIENT_PACKAGE", "letta-client")
+LETTA_CLIENT_REF = os.environ.get("ELF_LETTA_CLIENT_REF", f"pypi:{LETTA_CLIENT_PACKAGE}")
+LETTA_MODEL = os.environ.get("ELF_LETTA_MODEL", "openai/gpt-4o-mini")
+LETTA_EMBEDDING = os.environ.get("ELF_LETTA_EMBEDDING", "openai/text-embedding-3-small")
+TIMEOUT_SECONDS = int(os.environ.get("ELF_LETTA_TIMEOUT_SECONDS", "600"))
+STARTUP_ATTEMPTS = int(os.environ.get("ELF_LETTA_STARTUP_ATTEMPTS", "30"))
+STARTUP_INTERVAL_SECONDS = float(os.environ.get("ELF_LETTA_STARTUP_INTERVAL_SECONDS", "2"))
+
+CORE_KINDS = {"core_block", "core_block_contract", "core_block_event"}
+
diff --git a/scripts/letta_core_archive_smoke/fixtures.py b/scripts/letta_core_archive_smoke/fixtures.py
new file mode 100644
index 00000000..464d8801
--- /dev/null
+++ b/scripts/letta_core_archive_smoke/fixtures.py
@@ -0,0 +1,225 @@
+"""Fixture loading, evidence mapping, and generated fixture output."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+from .common import rel, write_json
+from .context import CORE_FIXTURE_DIR, CORE_KINDS, FIXTURE_DIR
+from .models import StatusState
+
+def load_source_fixtures() -> list[dict[str, Any]]:
+    """Load the checked-in core_archival_memory fixture corpus."""
+
+    fixtures = []
+    for path in sorted(CORE_FIXTURE_DIR.glob("*.json")):
+        payload = json.loads(path.read_text(encoding="utf-8"))
+        payload["_source_path"] = rel(path)
+        fixtures.append(payload)
+
+    return fixtures
+
+def evidence_ids_for_fixture(fixture: dict[str, Any]) -> list[str]:
+    """Return required evidence ids for one fixture."""
+
+    return [
+        item["evidence_id"]
+        for item in fixture.get("required_evidence", [])
+        if isinstance(item, dict) and item.get("evidence_id")
+    ]
+
+def all_required_evidence_ids(fixtures: list[dict[str, Any]]) -> list[str]:
+    """Return de-duplicated required evidence ids."""
+
+    ids: list[str] = []
+    for fixture in fixtures:
+        for evidence_id in evidence_ids_for_fixture(fixture):
+            if evidence_id not in ids:
+                ids.append(evidence_id)
+
+    return ids
+
+def source_items(fixtures: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """Flatten fixture corpus items with job metadata."""
+
+    items = []
+    for fixture in fixtures:
+        for item in fixture.get("corpus", {}).get("items", []):
+            item_copy = dict(item)
+            item_copy["job_id"] = fixture["job_id"]
+            item_copy["fixture_source"] = fixture["_source_path"]
+            items.append(item_copy)
+
+    return items
+
+def benchmark_input_contract(fixtures: list[dict[str, Any]]) -> dict[str, Any]:
+    """Return the benchmark-owned Letta input contract."""
+
+    core_blocks = []
+    archival_passages = []
+    for item in source_items(fixtures):
+        record = {
+            "source_id": item["evidence_id"],
+            "job_id": item["job_id"],
+            "kind": item.get("kind"),
+            "text": item.get("text", ""),
+            "fixture_source": item["fixture_source"],
+        }
+        if item.get("kind") in CORE_KINDS:
+            core_blocks.append(
+                {
+                    "label": slug(item["evidence_id"])[:48],
+                    "value": f"Source ID: {item['evidence_id']}\n{item.get('text', '')}",
+                    **record,
+                }
+            )
+        elif item.get("kind") not in {"stale_claim", "unsupported_claim"}:
+            archival_passages.append(
+                {
+                    "text": f"Source ID: {item['evidence_id']}\n{item.get('text', '')}",
+                    **record,
+                }
+            )
+
+    return {
+        "core_blocks": core_blocks,
+        "archival_passages": archival_passages,
+        "source_id_count": len({item["evidence_id"] for item in source_items(fixtures)}),
+        "required_evidence_ids": all_required_evidence_ids(fixtures),
+    }
+
+def slug(value: str) -> str:
+    """Return a small ASCII slug."""
+
+    out: list[str] = []
+    last_dash = False
+
+    for char in value.lower():
+        if char.isascii() and char.isalnum():
+            out.append(char)
+            last_dash = False
+        elif not last_dash and out:
+            out.append("-")
+            last_dash = True
+
+    while out and out[-1] == "-":
+        out.pop()
+
+    return "".join(out) or "item"
+
+def ids_in_payload(payload: Any, evidence_ids: list[str]) -> list[str]:
+    """Return evidence ids present anywhere in a JSON-compatible payload."""
+
+    haystack = json.dumps(payload, sort_keys=True, default=str)
+    return [evidence_id for evidence_id in evidence_ids if evidence_id in haystack]
+
+def evidence_mapping(
+    fixtures: list[dict[str, Any]],
+    live_export: dict[str, Any] | None,
+    status: StatusState,
+) -> dict[str, Any]:
+    """Map observed Letta export/readback data to fixture source ids."""
+
+    required_ids = all_required_evidence_ids(fixtures)
+    if live_export is None:
+        mapped_ids: list[str] = []
+    else:
+        mapped_ids = ids_in_payload(live_export, required_ids)
+
+    missing_ids = [evidence_id for evidence_id in required_ids if evidence_id not in mapped_ids]
+    jobs = []
+    for fixture in fixtures:
+        expected = evidence_ids_for_fixture(fixture)
+        mapped = [evidence_id for evidence_id in expected if evidence_id in mapped_ids]
+        if status.result in {"blocked", "incomplete", "not_encoded"}:
+            job_status = status.result
+            reason = status.failure_reason
+        elif len(mapped) == len(expected):
+            job_status = "pass"
+            reason = "Letta core block export and archival readback/search mapped all required source ids."
+        else:
+            job_status = "wrong_result"
+            missing = [evidence_id for evidence_id in expected if evidence_id not in mapped]
+            reason = f"Letta export/readback missed required evidence ids: {', '.join(missing)}."
+
+        jobs.append(
+            {
+                "job_id": fixture["job_id"],
+                "source_fixture": fixture["_source_path"],
+                "expected_evidence_ids": expected,
+                "mapped_evidence_ids": mapped,
+                "missing_evidence_ids": [evidence_id for evidence_id in expected if evidence_id not in mapped],
+                "status": job_status,
+                "reason": reason,
+            }
+        )
+
+    return {
+        "status": status.result if missing_ids or live_export is None else "pass",
+        "reason": status.failure_reason
+        if live_export is None
+        else (
+            "Letta export/readback mapped all required fixture source ids."
+            if not missing_ids
+            else f"Letta export/readback missed required evidence ids: {', '.join(missing_ids)}."
+        ),
+        "expected_evidence_ids": required_ids,
+        "mapped_evidence_ids": mapped_ids,
+        "missing_evidence_ids": missing_ids,
+        "jobs": jobs,
+    }
+
+def write_fixture_outputs(
+    fixtures: list[dict[str, Any]],
+    status: StatusState,
+    mapping: dict[str, Any],
+) -> Path:
+    """Write generated Letta real_world_job fixtures."""
+
+    for fixture in fixtures:
+        generated = json.loads(json.dumps({k: v for k, v in fixture.items() if k != "_source_path"}))
+        generated["corpus"]["profile"] = "external_adapter"
+        generated["corpus"]["corpus_id"] = "letta-core-archive-export-readback-2026-06-19"
+        job_mapping = next(item for item in mapping["jobs"] if item["job_id"] == fixture["job_id"])
+        source_answer = fixture.get("corpus", {}).get("adapter_response", {}).get("answer", {})
+        generated["corpus"]["adapter_response"] = {
+            "adapter_id": "letta_core_archive_export_readback",
+            "answer": {
+                "content": source_answer.get("content", ""),
+                "claims": source_answer.get("claims", []),
+                "evidence_ids": evidence_ids_for_fixture(fixture),
+                "latency_ms": 0.0,
+                "cost": {
+                    "currency": "USD",
+                    "amount": 0.0,
+                    "input_tokens": 0,
+                    "output_tokens": 0,
+                },
+            },
+        }
+        generated["tags"] = sorted(set(generated.get("tags", []) + ["external_adapter", "letta_export_readback"]))
+        generated["encoding"] = {}
+        if job_mapping["status"] in {"blocked", "incomplete", "not_encoded"}:
+            generated["encoding"] = {
+                "status": job_mapping["status"],
+                "reason": job_mapping["reason"],
+                "follow_up": {
+                    "title": "Produce Letta core/archive export-readback evidence",
+                    "reason": (
+                        "The benchmark must export Letta core block JSON, archival readback/search JSON, "
+                        "and fixture source ids before this scenario can be scored as pass or wrong_result."
+                    ),
+                },
+            }
+
+        if job_mapping["status"] == "wrong_result":
+            generated["corpus"]["adapter_response"]["answer"]["evidence_ids"] = job_mapping[
+                "mapped_evidence_ids"
+            ]
+
+        fixture_path = FIXTURE_DIR / "core_archival_memory" / Path(fixture["_source_path"]).name
+        write_json(fixture_path, generated)
+
+    return FIXTURE_DIR / "core_archival_memory"
diff --git a/scripts/letta_core_archive_smoke/models.py b/scripts/letta_core_archive_smoke/models.py
new file mode 100644
index 00000000..98a632fc
--- /dev/null
+++ b/scripts/letta_core_archive_smoke/models.py
@@ -0,0 +1,34 @@
+"""Typed records for the Letta core/archive smoke."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+class StatusState:
+    """Typed status for generated Letta smoke artifacts."""
+
+    setup: str = "blocked"
+    run: str = "not_encoded"
+    result: str = "blocked"
+    overall: str = "blocked"
+    evidence_class: str = "research_gate"
+    failure_class: str = "letta_live_run_disabled"
+    failure_reason: str = (
+        "Letta live export/readback is disabled by default; run "
+        "ELF_LETTA_SMOKE_START=1 ELF_LETTA_SMOKE_RUN=1 cargo make "
+        "smoke-letta-core-archive-export-readback with explicit Docker/provider configuration."
+    )
+
+
+@dataclass
+class CommandRecord:
+    """Captured command result without secret-bearing environment values."""
+
+    label: str
+    command: list[str]
+    status: str
+    elapsed_ms: float
+    stdout_artifact: str | None
+    stderr_artifact: str | None
+    returncode: int | None
+    reason: str
diff --git a/scripts/letta_core_archive_smoke/runner.py b/scripts/letta_core_archive_smoke/runner.py
new file mode 100644
index 00000000..53e71668
--- /dev/null
+++ b/scripts/letta_core_archive_smoke/runner.py
@@ -0,0 +1,105 @@
+"""CLI runner for the Letta core/archive smoke."""
+
+from __future__ import annotations
+
+import time
+from pathlib import Path
+from typing import Any
+
+from .artifacts import write_manifest, write_materialization, write_summary
+from .benchmark import run_scored_report
+from .common import command_available, mkdirs
+from .context import ALLOW_HOST, MANIFEST_OUT, OUT, RUN_LIVE, SUMMARY_OUT
+from .fixtures import evidence_mapping, load_source_fixtures, write_fixture_outputs
+from .models import CommandRecord, StatusState
+from .runtime import init_letta_client, run_letta, wait_for_letta
+
+def main() -> int:
+    """Run the smoke and always emit typed artifacts when possible."""
+
+    started_at = time.monotonic()
+    mkdirs()
+    status = StatusState()
+    command_records: list[CommandRecord] = []
+    fixtures = load_source_fixtures()
+    live_export: dict[str, Any] | None = None
+
+    if not Path("/.dockerenv").exists() and not ALLOW_HOST:
+        status.setup = "incomplete"
+        status.result = "incomplete"
+        status.overall = "incomplete"
+        status.failure_class = "not_running_in_docker"
+        status.failure_reason = "Letta smoke must run inside Docker; use cargo make smoke-letta-core-archive-export-readback."
+    elif not command_available("python3"):
+        status.setup = "incomplete"
+        status.result = "incomplete"
+        status.overall = "incomplete"
+        status.failure_class = "python_missing"
+        status.failure_reason = "python3 is required for the Letta smoke runner."
+    elif not RUN_LIVE:
+        pass
+    elif not wait_for_letta(command_records):
+        status.setup = "incomplete"
+        status.result = "incomplete"
+        status.overall = "incomplete"
+        status.failure_class = "letta_server_unreachable"
+        status.failure_reason = "Docker-local Letta server did not become reachable for export/readback."
+    elif not init_letta_client(command_records):
+        status.setup = "incomplete"
+        status.result = "incomplete"
+        status.overall = "incomplete"
+        status.failure_class = "letta_client_setup_failed"
+        status.failure_reason = "Letta Python client installation or import failed inside the Docker runner."
+    else:
+        status.setup = "pass"
+        live_export = run_letta(fixtures, command_records)
+        if live_export is None:
+            status.run = "incomplete"
+            status.result = "incomplete"
+            status.overall = "incomplete"
+            status.failure_class = "letta_export_readback_failed"
+            status.failure_reason = "Letta did not produce core block export plus archival readback/search output."
+        else:
+            status.run = "pass"
+            status.evidence_class = "live_real_world"
+            mapping = evidence_mapping(fixtures, live_export, status)
+            if not mapping["missing_evidence_ids"]:
+                status.result = "pass"
+                status.overall = "pass"
+                status.failure_class = ""
+                status.failure_reason = ""
+            else:
+                status.result = "wrong_result"
+                status.overall = "wrong_result"
+                status.failure_class = "letta_source_id_mapping_failed"
+                status.failure_reason = mapping["reason"]
+
+    mapping = evidence_mapping(fixtures, live_export, status)
+    fixture_path = write_fixture_outputs(fixtures, status, mapping)
+    write_materialization(
+        status,
+        fixtures,
+        fixture_path,
+        command_records,
+        live_export,
+        mapping,
+        started_at,
+    )
+    manifest = write_manifest(status)
+    report = run_scored_report(fixture_path, MANIFEST_OUT, status)
+    materialization = write_materialization(
+        status,
+        fixtures,
+        fixture_path,
+        command_records,
+        live_export,
+        mapping,
+        started_at,
+        report,
+    )
+    write_summary(materialization, manifest, report)
+    print(f"Letta core/archive artifact: {OUT}")
+    print(f"Letta core/archive manifest: {MANIFEST_OUT}")
+    print(f"Letta core/archive summary: {SUMMARY_OUT}")
+
+    return 0
diff --git a/scripts/letta_core_archive_smoke/runtime.py b/scripts/letta_core_archive_smoke/runtime.py
new file mode 100644
index 00000000..5eddd2b7
--- /dev/null
+++ b/scripts/letta_core_archive_smoke/runtime.py
@@ -0,0 +1,188 @@
+"""Runtime setup and live Letta execution."""
+
+from __future__ import annotations
+
+import json
+import textwrap
+import time
+import urllib.error
+import urllib.request
+from pathlib import Path
+from typing import Any
+
+from .common import run_command, write_json
+from .context import *  # noqa: F403
+from .fixtures import benchmark_input_contract, slug
+from .models import CommandRecord
+
+def wait_for_letta(command_records: list[CommandRecord]) -> bool:
+    """Wait for a Letta server endpoint to become reachable."""
+
+    started = time.monotonic()
+    probes = ["/v1/health", "/health", "/v1/models"]
+    last_reason = "not attempted"
+    for _ in range(STARTUP_ATTEMPTS):
+        for path in probes:
+            url = LETTA_BASE_URL.rstrip("/") + path
+            try:
+                with urllib.request.urlopen(url, timeout=5) as response:
+                    if 200 <= response.status < 500:
+                        command_records.append(
+                            CommandRecord(
+                                label="letta-health-probe",
+                                command=["GET", url],
+                                status="pass",
+                                elapsed_ms=(time.monotonic() - started) * 1000,
+                                stdout_artifact=None,
+                                stderr_artifact=None,
+                                returncode=0,
+                                reason=f"reachable via {path}",
+                            )
+                        )
+                        return True
+            except (urllib.error.URLError, TimeoutError, OSError) as exc:
+                last_reason = str(exc)
+
+        time.sleep(STARTUP_INTERVAL_SECONDS)
+
+    command_records.append(
+        CommandRecord(
+            label="letta-health-probe",
+            command=["GET", LETTA_BASE_URL.rstrip() + "/v1/health"],
+            status="incomplete",
+            elapsed_ms=(time.monotonic() - started) * 1000,
+            stdout_artifact=None,
+            stderr_artifact=None,
+            returncode=None,
+            reason=last_reason,
+        )
+    )
+    return False
+
+def init_letta_client(command_records: list[CommandRecord]) -> bool:
+    """Install or verify the Letta Python client."""
+
+    if INSTALL_CLIENT:
+        record = run_command(
+            "letta-client-install",
+            [sys.executable, "-m", "pip", "install", LETTA_CLIENT_PACKAGE],
+            WORK_DIR,
+        )
+        command_records.append(record)
+        if record.status != "pass":
+            return False
+
+    record = run_command("letta-client-import", [sys.executable, "-c", "import letta_client"], WORK_DIR)
+    command_records.append(record)
+
+    return record.status == "pass"
+
+def write_live_runner(fixtures: list[dict[str, Any]]) -> Path:
+    """Write a small Python runner that uses the current Letta SDK."""
+
+    contract = benchmark_input_contract(fixtures)
+    input_path = WORK_DIR / "letta-live-input.json"
+    write_json(input_path, contract)
+
+    runner = WORK_DIR / "letta_live_runner.py"
+    runner.write_text(
+        """
+import json
+import os
+from pathlib import Path
+
+from letta_client import Letta
+
+
+def as_dict(value):
+    if hasattr(value, "model_dump"):
+        return value.model_dump(mode="json")
+    if hasattr(value, "dict"):
+        return value.dict()
+    return json.loads(json.dumps(value, default=str))
+
+
+input_path = Path(os.environ["ELF_LETTA_LIVE_INPUT"])
+output_path = Path(os.environ["ELF_LETTA_LIVE_OUTPUT"])
+data = json.loads(input_path.read_text())
+
+client = Letta(base_url=os.environ["ELF_LETTA_BASE_URL"])
+agent = client.agents.create(
+    name=os.environ.get("ELF_LETTA_AGENT_NAME", "elf-core-archive-smoke"),
+    model=os.environ["ELF_LETTA_MODEL"],
+    embedding=os.environ["ELF_LETTA_EMBEDDING"],
+    memory_blocks=[
+        {"label": item["label"], "value": item["value"]}
+        for item in data["core_blocks"]
+    ],
+)
+
+created_passages = []
+for passage in data["archival_passages"]:
+    created_passages.append(
+        as_dict(client.agents.passages.create(agent_id=agent.id, text=passage["text"]))
+    )
+
+core_block_export = []
+for item in data["core_blocks"]:
+    core_block_export.append(
+        {
+            "source_id": item["source_id"],
+            "label": item["label"],
+            "block": as_dict(
+                client.agents.blocks.retrieve(agent_id=agent.id, block_label=item["label"])
+            ),
+        }
+    )
+
+listed_passages = as_dict(client.agents.passages.list(agent_id=agent.id))
+search_results = []
+for source_id in data["required_evidence_ids"]:
+    search_results.append(
+        {
+            "query": source_id,
+            "response": as_dict(
+                client.agents.passages.search(agent_id=agent.id, query=source_id, top_k=5)
+            ),
+        }
+    )
+
+output_path.write_text(
+    json.dumps(
+        {
+            "agent": as_dict(agent),
+            "core_block_export": core_block_export,
+            "created_passages": created_passages,
+            "archival_readback": listed_passages,
+            "archival_search": search_results,
+        },
+        indent=2,
+        sort_keys=True,
+    )
+    + "\\n"
+)
+""".lstrip(),
+        encoding="utf-8",
+    )
+
+    return runner
+
+def run_letta(fixtures: list[dict[str, Any]], command_records: list[CommandRecord]) -> dict[str, Any] | None:
+    """Create the Letta benchmark agent and export readback/search data."""
+
+    runner = write_live_runner(fixtures)
+    output_path = WORK_DIR / "letta-live-output.json"
+    env = {
+        "ELF_LETTA_BASE_URL": LETTA_BASE_URL,
+        "ELF_LETTA_MODEL": LETTA_MODEL,
+        "ELF_LETTA_EMBEDDING": LETTA_EMBEDDING,
+        "ELF_LETTA_LIVE_INPUT": str(WORK_DIR / "letta-live-input.json"),
+        "ELF_LETTA_LIVE_OUTPUT": str(output_path),
+        "ELF_LETTA_AGENT_NAME": f"elf-core-archive-smoke-{RUN_ID}",
+    }
+    record = run_command("letta-live-export-readback", [sys.executable, str(runner)], WORK_DIR, extra_env=env)
+    command_records.append(record)
+    if record.status != "pass" or not output_path.exists():
+        return None
+
+    return json.loads(output_path.read_text(encoding="utf-8"))
diff --git a/scripts/ragflow-docker-evidence-smoke.sh b/scripts/ragflow-docker-evidence-smoke.sh
index 17dd572f..aa9da8f8 100755
--- a/scripts/ragflow-docker-evidence-smoke.sh
+++ b/scripts/ragflow-docker-evidence-smoke.sh
@@ -104,1083 +104,15 @@ DOCUMENT_STEP_STATUS="not_encoded"
 CHUNK_STEP_STATUS="not_encoded"
 RETRIEVAL_STEP_STATUS="not_encoded"
 
-required_command() {
-	local cmd="$1"
-	if ! command -v "${cmd}" >/dev/null 2>&1; then
-		echo "Missing ${cmd}; cannot write RAGFlow smoke artifacts." >&2
-		exit 1
-	fi
-}
-
-optional_command_status() {
-	local cmd="$1"
-	if command -v "${cmd}" >/dev/null 2>&1; then
-		printf 'available'
-	else
-		printf 'missing'
-	fi
-}
-
-relative_path() {
-	local path="$1"
-	if [[ "${path}" == "${ROOT_DIR}/"* ]]; then
-		printf '%s' "${path#"${ROOT_DIR}/"}"
-	else
-		printf '%s' "${path}"
-	fi
-}
-
-json_status() {
-	local status="$1"
-	case "${status}" in
-		real | mocked | unsupported | blocked | incomplete | wrong_result | lifecycle_fail | pass | not_encoded)
-			printf '%s' "${status}"
-			;;
-		*)
-			printf 'incomplete'
-			;;
-	esac
-}
-
-capture_docker_info() {
-	if docker info --format '{{json .}}' >"${DOCKER_INFO}" 2>"${ARTIFACT_DIR}/docker-info.stderr"; then
-		return 0
-	fi
-
-	jq -n --rawfile stderr "${ARTIFACT_DIR}/docker-info.stderr" '{
-		error: "docker_info_failed",
-		stderr: $stderr
-	}' >"${DOCKER_INFO}"
-	return 1
-}
-
-capture_disk_info() {
-	docker system df >"${DOCKER_DF}" 2>/dev/null || true
-}
-
-capture_vm_max_map_count() {
-	if VM_MAX_MAP_COUNT="$(sysctl -n vm.max_map_count 2>/dev/null)"; then
-		if [[ "${VM_MAX_MAP_COUNT}" =~ ^[0-9]+$ ]] && [[ "${VM_MAX_MAP_COUNT}" -ge 262144 ]]; then
-			VM_MAX_MAP_COUNT_STATUS="pass"
-		elif [[ "${VM_MAX_MAP_COUNT}" =~ ^[0-9]+$ ]]; then
-			VM_MAX_MAP_COUNT_STATUS="blocked"
-		else
-			VM_MAX_MAP_COUNT_STATUS="not_observed"
-		fi
-	else
-		VM_MAX_MAP_COUNT=""
-		VM_MAX_MAP_COUNT_STATUS="not_observed"
-	fi
-}
-
-capture_image_info() {
-	if [[ "${PULL_IMAGE}" == "1" && "${ACCEPT_RESOURCE_ENVELOPE}" == "1" ]]; then
-		docker pull "${RAGFLOW_IMAGE}" >"${ARTIFACT_DIR}/docker-pull.log" 2>&1 || true
-	fi
-
-	if docker image inspect "${RAGFLOW_IMAGE}" >"${IMAGE_INSPECT}" 2>/dev/null; then
-		IMAGE_PRESENT="true"
-		IMAGE_SIZE_BYTES="$(jq -r '.[0].Size // ""' "${IMAGE_INSPECT}")"
-	else
-		printf '[]\n' >"${IMAGE_INSPECT}"
-	fi
-}
-
-update_env_var() {
-	local file="$1"
-	local key="$2"
-	local value="$3"
-
-	if grep -q "^${key}=" "${file}"; then
-		sed -i.bak "s|^${key}=.*|${key}=${value}|" "${file}"
-	else
-		printf '\n%s=%s\n' "${key}" "${value}" >>"${file}"
-	fi
-}
-
-prepare_official_ragflow_repo() {
-	local repo_dir="${WORK_DIR}/ragflow"
-
-	if [[ ! -d "${repo_dir}/.git" ]]; then
-		rm -rf "${repo_dir}"
-		git clone --depth 1 --branch "${RAGFLOW_REF}" "${RAGFLOW_REPO_URL}" "${repo_dir}" \
-			>"${ARTIFACT_DIR}/ragflow-git-clone.log" 2>&1
-	else
-		git -C "${repo_dir}" fetch --depth 1 origin "${RAGFLOW_REF}" \
-			>"${ARTIFACT_DIR}/ragflow-git-fetch.log" 2>&1
-		git -C "${repo_dir}" checkout -f FETCH_HEAD \
-			>"${ARTIFACT_DIR}/ragflow-git-checkout.log" 2>&1
-	fi
-
-	update_env_var "${repo_dir}/docker/.env" "DEVICE" "${CPU_GPU_MODE}"
-	update_env_var "${repo_dir}/docker/.env" "SVR_WEB_HTTP_PORT" "${ELF_RAGFLOW_WEB_HTTP_PORT:-18080}"
-	update_env_var "${repo_dir}/docker/.env" "SVR_WEB_HTTPS_PORT" "${ELF_RAGFLOW_WEB_HTTPS_PORT:-18443}"
-	update_env_var "${repo_dir}/docker/.env" "SVR_HTTP_PORT" "${API_PORT}"
-	update_env_var "${repo_dir}/docker/.env" "ADMIN_SVR_HTTP_PORT" "${ELF_RAGFLOW_ADMIN_PORT:-19381}"
-	update_env_var "${repo_dir}/docker/.env" "SVR_MCP_PORT" "${ELF_RAGFLOW_MCP_PORT:-19382}"
-	update_env_var "${repo_dir}/docker/.env" "GO_HTTP_PORT" "${ELF_RAGFLOW_GO_HTTP_PORT:-19384}"
-	update_env_var "${repo_dir}/docker/.env" "GO_ADMIN_PORT" "${ELF_RAGFLOW_GO_ADMIN_PORT:-19383}"
-	update_env_var "${repo_dir}/docker/.env" "EXPOSE_MYSQL_PORT" "${ELF_RAGFLOW_MYSQL_PORT:-13306}"
-	update_env_var "${repo_dir}/docker/.env" "MINIO_CONSOLE_PORT" "${ELF_RAGFLOW_MINIO_CONSOLE_PORT:-19001}"
-	update_env_var "${repo_dir}/docker/.env" "MINIO_PORT" "${ELF_RAGFLOW_MINIO_PORT:-19000}"
-	update_env_var "${repo_dir}/docker/.env" "REDIS_PORT" "${ELF_RAGFLOW_REDIS_PORT:-16379}"
-	update_env_var "${repo_dir}/docker/.env" "ES_PORT" "${ELF_RAGFLOW_ES_PORT:-11200}"
-	update_env_var "${repo_dir}/docker/.env" "OS_PORT" "${ELF_RAGFLOW_OS_PORT:-11201}"
-	update_env_var "${repo_dir}/docker/.env" "RAGFLOW_IMAGE" "${RAGFLOW_IMAGE}"
-
-	printf '%s' "${repo_dir}"
-}
-
-run_with_timeout_if_available() {
-	local seconds="$1"
-	shift
-
-	if command -v timeout >/dev/null 2>&1; then
-		timeout "${seconds}" "$@"
-	else
-		"$@"
-	fi
-}
-
-start_ragflow_stack() {
-	local repo_dir="$1"
-	local started_at ended_at
-	started_at="$(date +%s)"
-
-	if (
-		cd "${repo_dir}/docker"
-		run_with_timeout_if_available "${COMPOSE_TIMEOUT_SECONDS}" \
-			docker compose -p "${COMPOSE_PROJECT}" -f docker-compose.yml up -d
-	) >"${COMPOSE_UP_LOG}" 2>&1; then
-		STARTED="true"
-		SETUP_STATUS="pass"
-		FAILURE_CLASS=""
-		FAILURE_REASON=""
-	else
-		SETUP_STATUS="incomplete"
-		OVERALL_STATUS="incomplete"
-		RESULT_STATUS="incomplete"
-		FAILURE_CLASS="ragflow_compose_start_failed"
-		FAILURE_REASON="Official RAGFlow Docker Compose did not start successfully; see compose-up.log in the artifact directory."
-	fi
-
-	ended_at="$(date +%s)"
-	STARTUP_TIME_MS="$(((ended_at - started_at) * 1000))"
-}
-
-wait_for_ragflow_api() {
-	local attempt code
-
-	for attempt in $(seq 1 "${STARTUP_ATTEMPTS}"); do
-		code="$(curl -sS -o /dev/null -w '%{http_code}' "${API_BASE}/api/v1/system/healthz" 2>/dev/null || true)"
-		jq -nc --argjson attempt "${attempt}" --arg code "${code}" --arg url "${API_BASE}/api/v1/system/healthz" '{
-			attempt: $attempt,
-			url: $url,
-			http_code: $code
-		}' >>"${STARTUP_ATTEMPTS_JSONL}"
-
-		if [[ "${code}" == "200" ]]; then
-			return 0
-		fi
-
-		sleep "${STARTUP_INTERVAL_SECONDS}"
-	done
-
-	return 1
-}
-
-api_json_request() {
-	local method="$1"
-	local path="$2"
-	local request_file="$3"
-	local response_file="$4"
-	local stderr_file="${response_file}.stderr"
-	local code
-
-	code="$(curl -sS -X "${method}" \
-		-o "${response_file}" \
-		-w '%{http_code}' \
-		-H 'Content-Type: application/json' \
-		-H "Authorization: Bearer ${API_KEY}" \
-		--data-binary @"${request_file}" \
-		"${API_BASE}${path}" 2>"${stderr_file}" || true)"
-
-	jq -n --arg code "${code}" --rawfile stderr "${stderr_file}" '{
-		http_code: $code,
-		stderr: $stderr
-	}' >"${response_file}.meta.json"
-
-	[[ "${code}" =~ ^2 ]]
-}
-
-response_code_ok() {
-	local response_file="$1"
-
-	jq -e '(.code? == 0) or (.id? != null) or (.data? != null)' "${response_file}" >/dev/null 2>&1
-}
-
-extract_id() {
-	local response_file="$1"
-	jq -r '
-		.data.id
-		// .data[0].id
-		// .data.document_id
-		// .data.chunk_id
-		// .id
-		// empty
-	' "${response_file}"
-}
-
-run_api_smoke() {
-	local dataset_name="${RUN_ID}"
-
-	jq -n --arg name "${dataset_name}" '{
-		name: $name,
-		description: "Generated public ELF RAGFlow Docker evidence smoke corpus.",
-		permission: "me",
-		chunk_method: "manual",
-		parser_config: {"raptor": {"use_raptor": false}}
-	}' >"${DATASET_REQUEST}"
-
-	if api_json_request POST "/api/v1/datasets" "${DATASET_REQUEST}" "${DATASET_RESPONSE}" \
-		&& response_code_ok "${DATASET_RESPONSE}"; then
-		DATASET_STEP_STATUS="pass"
-		DATASET_ID="$(extract_id "${DATASET_RESPONSE}")"
-	else
-		DATASET_STEP_STATUS="incomplete"
-		RUN_STATUS="incomplete"
-		RESULT_STATUS="incomplete"
-		OVERALL_STATUS="incomplete"
-		FAILURE_CLASS="ragflow_dataset_create_failed"
-		FAILURE_REASON="RAGFlow dataset creation did not return a successful response."
-		return 0
-	fi
-
-	if [[ -z "${DATASET_ID}" ]]; then
-		DATASET_STEP_STATUS="incomplete"
-		RUN_STATUS="incomplete"
-		RESULT_STATUS="incomplete"
-		OVERALL_STATUS="incomplete"
-		FAILURE_CLASS="ragflow_dataset_id_missing"
-		FAILURE_REASON="RAGFlow dataset creation succeeded but no dataset id was found in the response."
-		return 0
-	fi
-
-	jq -n --arg name "${DOCUMENT_NAME}" '{name: $name}' >"${DOCUMENT_REQUEST}"
-
-	if api_json_request POST "/api/v1/datasets/${DATASET_ID}/documents?type=empty" \
-		"${DOCUMENT_REQUEST}" "${DOCUMENT_RESPONSE}" \
-		&& response_code_ok "${DOCUMENT_RESPONSE}"; then
-		DOCUMENT_STEP_STATUS="pass"
-		DOCUMENT_ID="$(extract_id "${DOCUMENT_RESPONSE}")"
-	else
-		DOCUMENT_STEP_STATUS="incomplete"
-		RUN_STATUS="incomplete"
-		RESULT_STATUS="incomplete"
-		OVERALL_STATUS="incomplete"
-		FAILURE_CLASS="ragflow_document_create_failed"
-		FAILURE_REASON="RAGFlow empty document creation did not return a successful response."
-		return 0
-	fi
-
-	if [[ -z "${DOCUMENT_ID}" ]]; then
-		DOCUMENT_STEP_STATUS="incomplete"
-		RUN_STATUS="incomplete"
-		RESULT_STATUS="incomplete"
-		OVERALL_STATUS="incomplete"
-		FAILURE_CLASS="ragflow_document_id_missing"
-		FAILURE_REASON="RAGFlow empty document creation succeeded but no document id was found in the response."
-		return 0
-	fi
-
-	jq -n \
-		--arg content "${CORPUS_TEXT}" \
-		--arg token "${EVIDENCE_TOKEN}" \
-		'{
-			content: $content,
-			important_keywords: [$token],
-			questions: ["Which evidence token should map to ragflow-smoke-anchor?"]
-		}' >"${CHUNK_REQUEST}"
-
-	if api_json_request POST "/api/v1/datasets/${DATASET_ID}/documents/${DOCUMENT_ID}/chunks" \
-		"${CHUNK_REQUEST}" "${CHUNK_RESPONSE}" \
-		&& response_code_ok "${CHUNK_RESPONSE}"; then
-		CHUNK_STEP_STATUS="pass"
-		CHUNK_ID="$(extract_id "${CHUNK_RESPONSE}")"
-	else
-		CHUNK_STEP_STATUS="incomplete"
-		RUN_STATUS="incomplete"
-		RESULT_STATUS="incomplete"
-		OVERALL_STATUS="incomplete"
-		FAILURE_CLASS="ragflow_chunk_create_failed"
-		FAILURE_REASON="RAGFlow chunk creation did not return a successful response."
-		return 0
-	fi
-
-	jq -n \
-		--arg question "Which RAGFlow smoke evidence token maps to ragflow-smoke-anchor?" \
-		--arg dataset_id "${DATASET_ID}" \
-		--arg document_id "${DOCUMENT_ID}" \
-		'{
-			question: $question,
-			dataset_ids: [$dataset_id],
-			document_ids: [$document_id],
-			page: 1,
-			page_size: 5,
-			similarity_threshold: 0.0,
-			vector_similarity_weight: 0.0,
-			top_k: 5,
-			keyword: true,
-			highlight: false
-		}' >"${RETRIEVAL_REQUEST}"
-
-	if api_json_request POST "/api/v1/retrieval" "${RETRIEVAL_REQUEST}" "${RETRIEVAL_RESPONSE}" \
-		&& response_code_ok "${RETRIEVAL_RESPONSE}"; then
-		RETRIEVAL_STEP_STATUS="pass"
-	else
-		RETRIEVAL_STEP_STATUS="incomplete"
-		RUN_STATUS="incomplete"
-		RESULT_STATUS="incomplete"
-		OVERALL_STATUS="incomplete"
-		FAILURE_CLASS="ragflow_retrieval_failed"
-		FAILURE_REASON="RAGFlow retrieval did not return a successful response."
-		return 0
-	fi
-
-	jq \
-		--arg evidence_id "${EVIDENCE_ID}" \
-		--arg token "${EVIDENCE_TOKEN}" \
-		--arg document_name "${DOCUMENT_NAME}" '
-		def chunk_array:
-			if (.data.chunks? | type) == "array" then .data.chunks
-			elif (.reference.chunks? | type) == "array" then .reference.chunks
-			else [] end;
-		chunk_array
-		| map({
-			chunk_id: (.id // .chunk_id // ""),
-			content: (.content // .content_with_weight // ""),
-			document_id: (.document_id // .doc_id // ""),
-			document_name: (.document_name // .document_keyword // .doc_name // .docnm_kwd // ""),
-			dataset_id: (.dataset_id // .kb_id // ""),
-			positions: (.positions // []),
-			similarity: (.similarity // null),
-			vector_similarity: (.vector_similarity // null),
-			term_similarity: (.term_similarity // null),
-			evidence_ids: (
-				if (((.content // .content_with_weight // "") | contains($token))
-					or ((.document_name // .document_keyword // .doc_name // .docnm_kwd // "") == $document_name))
-				then [$evidence_id]
-				else []
-				end
-			),
-			mapping_status: (
-				if ((.content // .content_with_weight // "") | contains($token)) then "matched_content"
-				elif ((.document_name // .document_keyword // .doc_name // .docnm_kwd // "") == $document_name) then "matched_document"
-				else "unmatched"
-				end
-			)
-		})' "${RETRIEVAL_RESPONSE}" >"${REFERENCE_MAPPING}"
-
-	RUN_STATUS="pass"
-	EVIDENCE_CLASS="live_real_world"
-
-	if jq -e --arg evidence_id "${EVIDENCE_ID}" '
-		length > 0 and any(.[]; (.evidence_ids // []) | index($evidence_id))
-	' "${REFERENCE_MAPPING}" >/dev/null; then
-		RESULT_STATUS="pass"
-		OVERALL_STATUS="pass"
-		FAILURE_CLASS=""
-		FAILURE_REASON=""
-	else
-		RESULT_STATUS="wrong_result"
-		OVERALL_STATUS="wrong_result"
-		FAILURE_CLASS="ragflow_reference_mapping_missing"
-		FAILURE_REASON="RAGFlow retrieval returned chunks but none mapped to the generated evidence id."
-	fi
-}
-
-cleanup_stack() {
-	local repo_dir="${WORK_DIR}/ragflow"
-
-	if [[ "${STARTED}" != "true" || "${CLEANUP}" != "1" || ! -d "${repo_dir}/docker" ]]; then
-		return 0
-	fi
-
-	(
-		cd "${repo_dir}/docker"
-		docker compose -p "${COMPOSE_PROJECT}" -f docker-compose.yml down -v
-	) >"${COMPOSE_DOWN_LOG}" 2>&1 || true
-}
-
-write_scored_benchmark() {
-	if [[ -s "${REPORT_JSON}" ]]; then
-		jq 'def count($key): (.summary[$key] // 0);
-			def scored_status:
-				if count("wrong_result") > 0 then "wrong_result"
-				elif count("lifecycle_fail") > 0 then "lifecycle_fail"
-				elif count("incomplete") > 0 then "incomplete"
-				elif count("blocked") > 0 then "blocked"
-				elif count("not_encoded") > 0 then "not_encoded"
-				elif count("pass") > 0 then "pass"
-				else "not_encoded"
-				end;
-			{
-				schema: "elf.scored_benchmark_status/v1",
-				source: "real_world_job_benchmark",
-				status: scored_status,
-				counts: {
-					pass: count("pass"),
-					wrong_result: count("wrong_result"),
-					lifecycle_fail: count("lifecycle_fail"),
-					incomplete: count("incomplete"),
-					blocked: count("blocked"),
-					not_encoded: count("not_encoded")
-				},
-				job_count: (.summary.job_count // 0),
-				mean_score: (.summary.mean_score // null),
-				evidence_coverage: (.summary.evidence_coverage // null)
-			}' "${REPORT_JSON}" >"${SCORED_BENCHMARK}"
-	else
-		jq -n '{
-			schema: "elf.scored_benchmark_status/v1",
-			source: "real_world_job_benchmark",
-			status: "pending",
-			reason: "The smoke materialization was written before benchmark scoring completed."
-		}' >"${SCORED_BENCHMARK}"
-	fi
-}
-
-write_artifact() {
-	local generated_at out_rel manifest_rel fixture_rel report_json_rel report_md_rel docker_status git_status curl_status jq_status
-	generated_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
-	out_rel="$(relative_path "${OUT}")"
-	manifest_rel="$(relative_path "${MANIFEST_OUT}")"
-	fixture_rel="$(relative_path "${FIXTURE_PATH}")"
-	report_json_rel="$(relative_path "${REPORT_JSON}")"
-	report_md_rel="$(relative_path "${REPORT_MD}")"
-	docker_status="$(optional_command_status docker)"
-	git_status="$(optional_command_status git)"
-	curl_status="$(optional_command_status curl)"
-	jq_status="$(optional_command_status jq)"
-
-	jq -n \
-		--arg schema "elf.ragflow_docker_evidence_smoke/v1" \
-		--arg run_id "${RUN_ID}" \
-		--arg generated_at "${generated_at}" \
-		--arg adapter_id "ragflow_docker_evidence_smoke" \
-		--arg evidence_class "${EVIDENCE_CLASS}" \
-		--arg overall_status "$(json_status "${OVERALL_STATUS}")" \
-		--arg setup_status "$(json_status "${SETUP_STATUS}")" \
-		--arg run_status "$(json_status "${RUN_STATUS}")" \
-		--arg result_status "$(json_status "${RESULT_STATUS}")" \
-		--arg failure_class "${FAILURE_CLASS}" \
-		--arg failure_reason "${FAILURE_REASON}" \
-		--arg out_rel "${out_rel}" \
-		--arg manifest_rel "${manifest_rel}" \
-		--arg fixture_rel "${fixture_rel}" \
-		--arg report_json_rel "${report_json_rel}" \
-		--arg report_md_rel "${report_md_rel}" \
-		--arg artifact_dir "$(relative_path "${ARTIFACT_DIR}")" \
-		--arg work_dir "$(relative_path "${WORK_DIR}")" \
-		--arg repo_url "${RAGFLOW_REPO_URL}" \
-		--arg ragflow_ref "${RAGFLOW_REF}" \
-		--arg ragflow_image "${RAGFLOW_IMAGE}" \
-		--arg compose_project "${COMPOSE_PROJECT}" \
-		--arg cpu_gpu_mode "${CPU_GPU_MODE}" \
-		--arg start_enabled "${START_RAGFLOW}" \
-		--arg accept_resource_envelope "${ACCEPT_RESOURCE_ENVELOPE}" \
-		--arg allow_arm "${ALLOW_ARM}" \
-		--arg pull_image "${PULL_IMAGE}" \
-		--arg cleanup "${CLEANUP}" \
-		--arg api_base "${API_BASE}" \
-		--arg api_key_provided "$([[ -n "${API_KEY}" ]] && printf true || printf false)" \
-		--arg startup_time_ms "${STARTUP_TIME_MS}" \
-		--arg started "${STARTED}" \
-		--arg startup_attempt_count "${STARTUP_ATTEMPTS}" \
-		--arg startup_interval_seconds "${STARTUP_INTERVAL_SECONDS}" \
-		--arg compose_timeout_seconds "${COMPOSE_TIMEOUT_SECONDS}" \
-		--arg evidence_id "${EVIDENCE_ID}" \
-		--arg document_name "${DOCUMENT_NAME}" \
-		--arg evidence_token "${EVIDENCE_TOKEN}" \
-		--arg corpus_text "${CORPUS_TEXT}" \
-		--arg dataset_id "${DATASET_ID}" \
-		--arg document_id "${DOCUMENT_ID}" \
-		--arg chunk_id "${CHUNK_ID}" \
-		--arg vm_max_map_count "${VM_MAX_MAP_COUNT}" \
-		--arg vm_max_map_count_status "${VM_MAX_MAP_COUNT_STATUS}" \
-		--arg vm_max_map_count_action "${VM_MAX_MAP_COUNT_ACTION}" \
-		--arg image_present "${IMAGE_PRESENT}" \
-		--arg image_size_bytes "${IMAGE_SIZE_BYTES}" \
-		--arg host_global_installs_required "${HOST_GLOBAL_INSTALLS_REQUIRED}" \
-		--arg docker_status "${docker_status}" \
-		--arg git_status "${git_status}" \
-		--arg curl_status "${curl_status}" \
-		--arg jq_status "${jq_status}" \
-		--arg dataset_step_status "$(json_status "${DATASET_STEP_STATUS}")" \
-		--arg document_step_status "$(json_status "${DOCUMENT_STEP_STATUS}")" \
-		--arg chunk_step_status "$(json_status "${CHUNK_STEP_STATUS}")" \
-		--arg retrieval_step_status "$(json_status "${RETRIEVAL_STEP_STATUS}")" \
-		--slurpfile docker_info "${DOCKER_INFO}" \
-		--slurpfile image_inspect "${IMAGE_INSPECT}" \
-		--slurpfile reference_mapping "${REFERENCE_MAPPING}" \
-		--rawfile docker_df "${DOCKER_DF}" \
-		--rawfile compose_up_log "${COMPOSE_UP_LOG}" \
-		--rawfile compose_down_log "${COMPOSE_DOWN_LOG}" \
-		--slurpfile dataset_response "${DATASET_RESPONSE}" \
-		--slurpfile document_response "${DOCUMENT_RESPONSE}" \
-		--slurpfile chunk_response "${CHUNK_RESPONSE}" \
-		--slurpfile retrieval_response "${RETRIEVAL_RESPONSE}" \
-		--slurpfile scored_benchmark "${SCORED_BENCHMARK}" \
-		--slurpfile startup_attempts <(jq -s '.' "${STARTUP_ATTEMPTS_JSONL}") \
-		'{
-			schema: $schema,
-			run_id: $run_id,
-			generated_at: $generated_at,
-			adapter_id: $adapter_id,
-			evidence_class: $evidence_class,
-			overall_status: $overall_status,
-			status_source: "smoke_materialization",
-			scored_benchmark: $scored_benchmark[0],
-			no_quality_claim: true,
-			failure: (
-				if $failure_class == "" then null
-				else {
-					class: $failure_class,
-					reason: $failure_reason
-				}
-				end
-			),
-			artifacts: {
-				smoke: $out_rel,
-				external_adapter_manifest: $manifest_rel,
-				generated_fixture: $fixture_rel,
-				scored_report_json: $report_json_rel,
-				scored_report_markdown: $report_md_rel,
-				artifact_dir: $artifact_dir,
-				work_dir: $work_dir
-			},
-			upstream: {
-				repository: $repo_url,
-				ref: $ragflow_ref,
-				quickstart: "https://ragflow.io/docs/",
-				http_api_reference: "https://raw.githubusercontent.com/infiniflow/ragflow/main/docs/references/http_api_reference.md",
-				api_key_guide: "https://ragflow.io/docs/acquire_ragflow_api_key"
-			},
-			docker_boundary: {
-				status: $setup_status,
-				official_compose_path: "ragflow/docker/docker-compose.yml",
-				compose_project: $compose_project,
-				image: $ragflow_image,
-				device: $cpu_gpu_mode,
-				start_enabled: ($start_enabled == "1"),
-				resource_envelope_accepted: ($accept_resource_envelope == "1"),
-				allow_arm: ($allow_arm == "1"),
-				pull_image_requested: ($pull_image == "1"),
-				cleanup_requested: ($cleanup == "1"),
-				host_global_installs_required: ($host_global_installs_required == "true"),
-				tooling: {
-					docker: $docker_status,
-					git: $git_status,
-					curl: $curl_status,
-					jq: $jq_status
-				}
-			},
-			setup: {
-				status: $setup_status,
-				command: "cargo make smoke-ragflow-docker",
-				live_command: "ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make smoke-ragflow-docker",
-				started: ($started == "true"),
-				startup_time_ms: (if $startup_time_ms == "" then null else ($startup_time_ms | tonumber) end),
-				vm_max_map_count: {
-					status: $vm_max_map_count_status,
-					observed: (if $vm_max_map_count == "" then null else $vm_max_map_count end),
-					required_min: 262144,
-					action: $vm_max_map_count_action
-				},
-				image: {
-					present: ($image_present == "true"),
-					size_bytes: (if $image_size_bytes == "" then null else ($image_size_bytes | tonumber) end),
-					official_compressed_size_note: "RAGFlow quickstart lists the stable image at about 2 GB compressed.",
-					official_expanded_size_note: "RAGFlow quickstart says the image expands to about 7 GB once unpacked.",
-					inspect: ($image_inspect[0] // [])
-				},
-				resource_envelope: {
-					official_min_cpu_cores: 4,
-					official_min_ram_gb: 16,
-					official_min_disk_gb: 50,
-					docker_info: ($docker_info[0] // {}),
-					docker_system_df: $docker_df
-				},
-				provider_boundaries: {
-					ragflow_api_base: $api_base,
-					ragflow_api_key_provided: ($api_key_provided == "true"),
-					operator_owned_provider_credentials_used: false,
-					private_corpus_used: false,
-					generated_public_corpus_only: true,
-					external_llm_quality_scoring_claimed: false
-				},
-				retry_behavior: {
-					startup_poll_attempts_configured: ($startup_attempt_count | tonumber),
-					startup_interval_seconds: ($startup_interval_seconds | tonumber),
-					compose_timeout_seconds: ($compose_timeout_seconds | tonumber),
-					startup_attempts: ($startup_attempts[0] // [])
-				},
-				log_excerpt: {
-					compose_up: ($compose_up_log | split("\n") | .[0:40]),
-					compose_down: ($compose_down_log | split("\n") | .[0:20])
-				}
-			},
-			corpus: {
-				profile: "generated_public",
-				evidence_id: $evidence_id,
-				document_name: $document_name,
-				evidence_token: $evidence_token,
-				text: $corpus_text,
-				dataset_id: (if $dataset_id == "" then null else $dataset_id end),
-				document_id: (if $document_id == "" then null else $document_id end),
-				chunk_id: (if $chunk_id == "" then null else $chunk_id end)
-			},
-			run: {
-				status: $run_status,
-				steps: {
-					dataset_creation: {
-						status: $dataset_step_status,
-						request_artifact: "dataset-create-request.json",
-						response_artifact: "dataset-create-response.json",
-						response: ($dataset_response[0] // null)
-					},
-					document_creation: {
-						status: $document_step_status,
-						request_artifact: "document-create-request.json",
-						response_artifact: "document-create-response.json",
-						response: ($document_response[0] // null)
-					},
-					chunk_ingest: {
-						status: $chunk_step_status,
-						request_artifact: "chunk-create-request.json",
-						response_artifact: "chunk-create-response.json",
-						response: ($chunk_response[0] // null)
-					},
-					retrieval_query: {
-						status: $retrieval_step_status,
-						request_artifact: "retrieval-request.json",
-						response_artifact: "retrieval-response.json",
-						response: ($retrieval_response[0] // null)
-					}
-				}
-			},
-			result: {
-				status: $result_status,
-				evidence: "RAGFlow retrieval reference chunks are mapped to real_world_job evidence ids when content or document metadata matches the generated public corpus.",
-				reference_chunk_count: (($reference_mapping[0] // []) | length),
-				mapped_reference_chunk_count: (($reference_mapping[0] // []) | map(select((.evidence_ids // []) | length > 0)) | length)
-			},
-			evidence_mapping: {
-				expected_evidence_ids: [$evidence_id],
-				reference_chunks: ($reference_mapping[0] // []),
-				field_mapping: {
-					"id": "chunk_id",
-					"document_id": "document_id",
-					"document_name_or_document_keyword": "document_name",
-					"dataset_id_or_kb_id": "dataset_id",
-					"content_or_content_with_weight": "content",
-					"positions": "positions",
-					"similarity": "similarity",
-					"vector_similarity": "vector_similarity",
-					"term_similarity": "term_similarity"
-				}
-			}
-		}' >"${OUT}"
-}
-
-write_manifest() {
-	local generated_at out_rel manifest_rel retrieval_suite_status production_ops_status capability_retrieval_status capability_setup_status
-	generated_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
-	out_rel="$(relative_path "${OUT}")"
-	manifest_rel="$(relative_path "${MANIFEST_OUT}")"
-	retrieval_suite_status="$(json_status "${RESULT_STATUS}")"
-	capability_retrieval_status="$(json_status "${RESULT_STATUS}")"
-	capability_setup_status="$(json_status "${SETUP_STATUS}")"
-	production_ops_status="not_encoded"
-
-	jq -n \
-		--arg generated_at "${generated_at}" \
-		--arg manifest_id "ragflow-docker-evidence-smoke-${RUN_ID}" \
-		--arg out_rel "${out_rel}" \
-		--arg manifest_rel "${manifest_rel}" \
-		--arg evidence_class "${EVIDENCE_CLASS}" \
-		--arg overall_status "$(json_status "${OVERALL_STATUS}")" \
-		--arg setup_status "$(json_status "${SETUP_STATUS}")" \
-		--arg run_status "$(json_status "${RUN_STATUS}")" \
-		--arg result_status "$(json_status "${RESULT_STATUS}")" \
-		--arg retrieval_suite_status "${retrieval_suite_status}" \
-		--arg production_ops_status "${production_ops_status}" \
-		--arg capability_setup_status "${capability_setup_status}" \
-		--arg capability_retrieval_status "${capability_retrieval_status}" \
-		--arg ragflow_image "${RAGFLOW_IMAGE}" \
-		--arg cpu_gpu_mode "${CPU_GPU_MODE}" \
-		--arg failure_reason "${FAILURE_REASON}" \
-		--arg host_global_installs_required "${HOST_GLOBAL_INSTALLS_REQUIRED}" \
-		'{
-			schema: "elf.real_world_external_adapter_manifest/v1",
-			manifest_id: $manifest_id,
-			docker_isolation: {
-				default: true,
-				compose_file: "official RAGFlow docker/docker-compose.yml",
-				runner: "scripts/ragflow-docker-evidence-smoke.sh",
-				artifact_dir: "tmp/real-world-memory/ragflow-smoke",
-				host_global_installs_required: ($host_global_installs_required == "true"),
-				notes: [
-					"Generated by the RAGFlow evidence-smoke script at " + $generated_at + ".",
-					"The smoke uses a generated public corpus and does not use private corpus or operator-owned provider credentials."
-				]
-			},
-			adapters: [
-				{
-					adapter_id: "ragflow_docker_evidence_smoke",
-					project: "RAGFlow",
-					adapter_kind: "docker_service_evidence_smoke",
-					evidence_class: $evidence_class,
-					docker_default: true,
-					host_global_installs_required: ($host_global_installs_required == "true"),
-					overall_status: $overall_status,
-					setup: {
-						status: $setup_status,
-						evidence: "Official RAGFlow Docker Compose boundary and resource envelope were evaluated for the tiny evidence smoke.",
-						command: "cargo make smoke-ragflow-docker",
-						artifact: $out_rel
-					},
-					run: {
-						status: $run_status,
-						evidence: "The smoke attempts dataset creation, empty-document corpus ingest, chunk insert, retrieval query, and reference chunk extraction.",
-						command: "ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make smoke-ragflow-docker",
-						artifact: $out_rel
-					},
-					result: {
-						status: $result_status,
-						evidence: (
-							if $failure_reason == "" then "Returned RAGFlow reference chunks were mapped to generated real_world_job evidence ids for the smoke only."
-							else $failure_reason
-							end
-						),
-						artifact: $out_rel
-					},
-					capabilities: [
-						{
-							capability: "official_docker_service_boundary",
-							status: $capability_setup_status,
-							evidence: "The script uses the official RAGFlow Docker Compose setup and records image, disk, startup, CPU/GPU, and vm.max_map_count evidence."
-						},
-						{
-							capability: "dataset_or_chunk_ingest",
-							status: $run_status,
-							evidence: "The live path creates a generated public dataset, empty document, and chunk before querying."
-						},
-						{
-							capability: "retrieval_reference_mapping",
-							status: $capability_retrieval_status,
-							evidence: "The script maps returned chunk id, document id, document name, dataset id, positions, and similarity fields to benchmark evidence ids."
-						},
-						{
-							capability: "quality_or_scale_claim",
-							status: "not_encoded",
-							evidence: "The smoke does not run broad RAGFlow quality scoring, scale tests, private corpora, or comparative ranking claims."
-						}
-					],
-					suites: [
-						{
-							suite_id: "retrieval",
-							status: $retrieval_suite_status,
-							evidence: "Only the generated-public RAGFlow evidence-smoke retrieval path is represented."
-						},
-						{
-							suite_id: "production_ops",
-							status: $production_ops_status,
-							evidence: "Resource envelope evidence is recorded, but no production-ops suite scoring is encoded."
-						},
-						{
-							suite_id: "knowledge_compilation",
-							status: "not_encoded",
-							evidence: "RAGFlow page or knowledge-compilation behavior is not part of this smoke."
-						}
-					],
-					evidence: [
-						{
-							kind: "artifact",
-							ref: $out_rel,
-							status: $result_status
-						},
-						{
-							kind: "manifest",
-							ref: $manifest_rel,
-							status: $overall_status
-						},
-						{
-							kind: "source",
-							ref: "https://ragflow.io/docs/",
-							status: "real"
-						},
-						{
-							kind: "source",
-							ref: "https://raw.githubusercontent.com/infiniflow/ragflow/main/docs/references/http_api_reference.md",
-							status: "real"
-						}
-					],
-					execution_metadata: {
-						sources: [
-							{
-								label: "RAGFlow quickstart",
-								url: "https://ragflow.io/docs/",
-								evidence: "Official Docker startup, resource envelope, vm.max_map_count, and provider configuration guidance."
-							},
-							{
-								label: "RAGFlow HTTP API reference",
-								url: "https://raw.githubusercontent.com/infiniflow/ragflow/main/docs/references/http_api_reference.md",
-								evidence: "Official dataset, document, chunk, retrieval, and reference-chunk field contract."
-							}
-						],
-						setup_path: "Run the official RAGFlow Docker Compose stack with generated public corpus only.",
-						runtime_boundary: "Official RAGFlow Docker Compose service boundary; no host-global RAGFlow install.",
-						resource_expectation: (
-							"RAGFlow image " + $ragflow_image + ", CPU/GPU mode " + $cpu_gpu_mode + ", official minimums 4 CPU cores, 16 GB RAM, 50 GB disk, and vm.max_map_count >= 262144."
-						),
-						retry_guidance: [
-							"Default command records a typed blocked preflight unless resource-heavy startup is explicitly enabled.",
-							"Set ELF_RAGFLOW_SMOKE_START=1 and ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 for a live Docker startup attempt.",
-							"Provide only a local self-hosted RAGFlow API key; do not use private corpora or operator-owned model provider credentials for this smoke."
-						],
-						research_depth: "D2 feasibility plus XY-885 evidence-smoke implementation; generated artifact decides live evidence class."
-					},
-					notes: [
-						"This adapter record is generated by a smoke artifact and must not be generalized into broad RAGFlow quality evidence.",
-						"Failure before query output remains typed as blocked, incomplete, or not_encoded."
-					]
-				}
-			]
-		}' >"${MANIFEST_OUT}"
-}
-
-write_fixture() {
-	local result_status reason
-	result_status="$(json_status "${RESULT_STATUS}")"
-	reason="${FAILURE_REASON}"
-
-	jq -n \
-		--arg run_id "${RUN_ID}" \
-		--arg evidence_id "${EVIDENCE_ID}" \
-		--arg evidence_token "${EVIDENCE_TOKEN}" \
-		--arg corpus_text "${CORPUS_TEXT}" \
-		--arg result_status "${result_status}" \
-		--arg failure_reason "${reason}" \
-		'{
-			schema: "elf.real_world_job/v1",
-			job_id: "ragflow-evidence-smoke-001",
-			suite: "retrieval",
-			title: "Map RAGFlow reference chunks to generated evidence",
-			corpus: {
-				corpus_id: "ragflow-generated-public-smoke",
-				profile: "generated_public",
-				items: [
-					{
-						evidence_id: $evidence_id,
-						kind: "document",
-						text: $corpus_text,
-						source_ref: {
-							schema: "source_ref/v1",
-							resolver: "ragflow_smoke/v1",
-							ref: {
-								run_id: $run_id,
-								evidence_token: $evidence_token
-							}
-						},
-						created_at: "2026-06-10T00:00:00Z"
-					}
-				],
-				adapter_response: {
-					adapter_id: "ragflow_docker_evidence_smoke",
-					answer: {
-						content: (
-							if $result_status == "pass" then
-								"RAGFlow returned reference chunks that map to the generated ragflow-smoke-anchor evidence id."
-							else
-								""
-							end
-						),
-						claims: (
-							if $result_status == "pass" then
-								[
-									{
-										claim_id: "ragflow_reference_mapping",
-										text: "RAGFlow reference chunks map to the generated ragflow-smoke-anchor evidence id.",
-										evidence_ids: [$evidence_id],
-										confidence: "derived_from_ragflow_reference_chunk_mapping"
-									}
-								]
-							else
-								[]
-							end
-						),
-						evidence_ids: (if $result_status == "pass" then [$evidence_id] else [] end),
-						latency_ms: 0.0,
-						cost: {
-							currency: "USD",
-							amount: 0.0,
-							input_tokens: 0,
-							output_tokens: 0
-						}
-					}
-				}
-			},
-			timeline: [
-				{
-					event_id: "ragflow-smoke-corpus-generated",
-					ts: "2026-06-10T00:00:00Z",
-					actor: "system",
-					action: "generated_public_corpus",
-					evidence_ids: [$evidence_id],
-					summary: "The RAGFlow smoke generated a tiny public corpus for reference chunk mapping."
-				}
-			],
-			prompt: {
-				role: "user",
-				content: "Which RAGFlow smoke evidence token maps to the generated reference chunk?",
-				job_mode: "answer",
-				constraints: ["cite_evidence", "avoid_broad_quality_claims"]
-			},
-			expected_answer: {
-				must_include: [
-					{
-						claim_id: "ragflow_reference_mapping",
-						text: "RAGFlow reference chunks map to the generated ragflow-smoke-anchor evidence id."
-					}
-				],
-				must_not_include: ["RAGFlow passed a broad graph/RAG quality benchmark."],
-				evidence_links: {
-					ragflow_reference_mapping: [$evidence_id]
-				},
-				answer_type: "direct_answer",
-				accepted_alternates: [],
-				requires_caveat: true,
-				requires_refusal: false
-			},
-			required_evidence: [
-				{
-					evidence_id: $evidence_id,
-					claim_id: "ragflow_reference_mapping",
-					requirement: "cite",
-					quote: "ragflow-smoke-anchor evidence id"
-				}
-			],
-			negative_traps: [],
-			scoring_rubric: {
-				dimensions: {
-					answer_correctness: {
-						weight: 0.3,
-						max_points: 1.0,
-						criteria: "States the generated evidence mapping without broad quality claims."
-					},
-					evidence_grounding: {
-						weight: 0.45,
-						max_points: 1.0,
-						criteria: "Maps returned RAGFlow reference chunks to the generated evidence id."
-					},
-					trap_avoidance: {
-						weight: 0.15,
-						max_points: 1.0,
-						criteria: "Does not claim broad RAGFlow quality from the tiny smoke."
-					},
-					latency_resource: {
-						weight: 0.1,
-						max_points: 1.0,
-						criteria: "Records setup, resource, provider, and reference-mapping boundaries."
-					}
-				},
-				pass_threshold: 0.75,
-				hard_fail_rules: []
-			},
-			allowed_uncertainty: {
-				can_answer_unknown: false,
-				acceptable_phrases: ["tiny generated corpus", "reference chunk smoke only"],
-				fallback_action: "state_blocker"
-			},
-			operator_debug: null,
-			encoding: {},
-			memory_evolution: null,
-			tags: ["external_adapter", "generated_public", "ragflow", "no_live_claim"]
-		}
-		| if ["blocked", "incomplete", "not_encoded"] | index($result_status) then
-			.encoding = {status: $result_status, reason: $failure_reason}
-		else
-			.
-		end' >"${FIXTURE_PATH}"
-}
-
-write_scored_report() {
-	(
-		cd "${ROOT_DIR}"
-		cargo run -p elf-eval --bin real_world_job_benchmark -- run \
-			--fixtures "${FIXTURE_PATH}" \
-			--out "${REPORT_JSON}" \
-			--run-id real-world-memory-live-ragflow \
-			--adapter-id ragflow_docker_evidence_smoke \
-			--adapter-name "RAGFlow Docker evidence smoke adapter" \
-			--adapter-behavior docker_service_evidence_smoke \
-			--adapter-storage-status "$(json_status "${SETUP_STATUS}")" \
-			--adapter-runtime-status "$(json_status "${OVERALL_STATUS}")" \
-			--adapter-notes "Generated by the RAGFlow Docker evidence smoke; pass or wrong_result requires reference chunks mapped to generated evidence ids, while resource/setup/API-key limits remain typed." \
-			--external-adapter-manifest "${MANIFEST_OUT}"
-		cargo run -p elf-eval --bin real_world_job_benchmark -- publish \
-			--report "${REPORT_JSON}" \
-			--out "${REPORT_MD}"
-	)
-}
-
-write_summary() {
-	jq -n \
-		--slurpfile materialization "${OUT}" \
-		--slurpfile manifest "${MANIFEST_OUT}" \
-		--slurpfile report "${REPORT_JSON}" \
-		'{
-			schema: "elf.ragflow_docker_smoke_summary/v1",
-				generated_at: (now | todateiso8601),
-				adapter_id: "ragflow_docker_evidence_smoke",
-				evidence_class: $materialization[0].evidence_class,
-				status_boundary: {
-					materialization: "setup/run/evidence-mapping state emitted by the smoke runner",
-					manifest: "external adapter declaration consumed by the scorer",
-					scored_benchmark: "post-score real_world_job outcome; use this for quality status"
-				},
-				scored_benchmark: $materialization[0].scored_benchmark,
-				materialization: $materialization[0],
-				manifest: {
-					json: ($materialization[0].artifacts.external_adapter_manifest // "tmp/real-world-memory/ragflow-smoke/memory_projects_manifest.ragflow-smoke.json"),
-					status_source: "external_adapter_manifest_pre_score",
-					summary: $manifest[0].adapters[0].overall_status,
-					suites: $manifest[0].adapters[0].suites
-			},
-			report: {
-				json: ($materialization[0].artifacts.scored_report_json // "tmp/real-world-memory/ragflow-smoke/ragflow-report.json"),
-				markdown: ($materialization[0].artifacts.scored_report_markdown // "tmp/real-world-memory/ragflow-smoke/ragflow-report.md"),
-				summary: $report[0].summary,
-				suites: $report[0].suites
-			}
-		}' >"${SUMMARY_OUT}"
-}
 
-write_outputs() {
-	write_scored_benchmark
-	write_artifact
-	write_manifest
-	write_fixture
-	write_scored_report
-	write_scored_benchmark
-	write_artifact
-	write_summary
-	echo "RAGFlow smoke artifact: ${OUT}"
-	echo "RAGFlow smoke manifest: ${MANIFEST_OUT}"
-	echo "RAGFlow smoke report: ${REPORT_JSON}"
-	echo "RAGFlow smoke summary: ${SUMMARY_OUT}"
-}
+source "${ROOT_DIR}/scripts/ragflow_smoke/common.sh"
+source "${ROOT_DIR}/scripts/ragflow_smoke/docker.sh"
+source "${ROOT_DIR}/scripts/ragflow_smoke/api.sh"
+source "${ROOT_DIR}/scripts/ragflow_smoke/scoring.sh"
+source "${ROOT_DIR}/scripts/ragflow_smoke/materialization.sh"
+source "${ROOT_DIR}/scripts/ragflow_smoke/manifest.sh"
+source "${ROOT_DIR}/scripts/ragflow_smoke/fixture.sh"
+source "${ROOT_DIR}/scripts/ragflow_smoke/summary.sh"
 
 for cmd in jq curl; do
 	required_command "${cmd}"
diff --git a/scripts/ragflow_smoke/api.sh b/scripts/ragflow_smoke/api.sh
new file mode 100644
index 00000000..fb15cad3
--- /dev/null
+++ b/scripts/ragflow_smoke/api.sh
@@ -0,0 +1,183 @@
+# RAGFlow Docker evidence smoke helper functions.
+# Sourced by scripts/ragflow-docker-evidence-smoke.sh.
+
+run_api_smoke() {
+	local dataset_name="${RUN_ID}"
+
+	jq -n --arg name "${dataset_name}" '{
+		name: $name,
+		description: "Generated public ELF RAGFlow Docker evidence smoke corpus.",
+		permission: "me",
+		chunk_method: "manual",
+		parser_config: {"raptor": {"use_raptor": false}}
+	}' >"${DATASET_REQUEST}"
+
+	if api_json_request POST "/api/v1/datasets" "${DATASET_REQUEST}" "${DATASET_RESPONSE}" \
+		&& response_code_ok "${DATASET_RESPONSE}"; then
+		DATASET_STEP_STATUS="pass"
+		DATASET_ID="$(extract_id "${DATASET_RESPONSE}")"
+	else
+		DATASET_STEP_STATUS="incomplete"
+		RUN_STATUS="incomplete"
+		RESULT_STATUS="incomplete"
+		OVERALL_STATUS="incomplete"
+		FAILURE_CLASS="ragflow_dataset_create_failed"
+		FAILURE_REASON="RAGFlow dataset creation did not return a successful response."
+		return 0
+	fi
+
+	if [[ -z "${DATASET_ID}" ]]; then
+		DATASET_STEP_STATUS="incomplete"
+		RUN_STATUS="incomplete"
+		RESULT_STATUS="incomplete"
+		OVERALL_STATUS="incomplete"
+		FAILURE_CLASS="ragflow_dataset_id_missing"
+		FAILURE_REASON="RAGFlow dataset creation succeeded but no dataset id was found in the response."
+		return 0
+	fi
+
+	jq -n --arg name "${DOCUMENT_NAME}" '{name: $name}' >"${DOCUMENT_REQUEST}"
+
+	if api_json_request POST "/api/v1/datasets/${DATASET_ID}/documents?type=empty" \
+		"${DOCUMENT_REQUEST}" "${DOCUMENT_RESPONSE}" \
+		&& response_code_ok "${DOCUMENT_RESPONSE}"; then
+		DOCUMENT_STEP_STATUS="pass"
+		DOCUMENT_ID="$(extract_id "${DOCUMENT_RESPONSE}")"
+	else
+		DOCUMENT_STEP_STATUS="incomplete"
+		RUN_STATUS="incomplete"
+		RESULT_STATUS="incomplete"
+		OVERALL_STATUS="incomplete"
+		FAILURE_CLASS="ragflow_document_create_failed"
+		FAILURE_REASON="RAGFlow empty document creation did not return a successful response."
+		return 0
+	fi
+
+	if [[ -z "${DOCUMENT_ID}" ]]; then
+		DOCUMENT_STEP_STATUS="incomplete"
+		RUN_STATUS="incomplete"
+		RESULT_STATUS="incomplete"
+		OVERALL_STATUS="incomplete"
+		FAILURE_CLASS="ragflow_document_id_missing"
+		FAILURE_REASON="RAGFlow empty document creation succeeded but no document id was found in the response."
+		return 0
+	fi
+
+	jq -n \
+		--arg content "${CORPUS_TEXT}" \
+		--arg token "${EVIDENCE_TOKEN}" \
+		'{
+			content: $content,
+			important_keywords: [$token],
+			questions: ["Which evidence token should map to ragflow-smoke-anchor?"]
+		}' >"${CHUNK_REQUEST}"
+
+	if api_json_request POST "/api/v1/datasets/${DATASET_ID}/documents/${DOCUMENT_ID}/chunks" \
+		"${CHUNK_REQUEST}" "${CHUNK_RESPONSE}" \
+		&& response_code_ok "${CHUNK_RESPONSE}"; then
+		CHUNK_STEP_STATUS="pass"
+		CHUNK_ID="$(extract_id "${CHUNK_RESPONSE}")"
+	else
+		CHUNK_STEP_STATUS="incomplete"
+		RUN_STATUS="incomplete"
+		RESULT_STATUS="incomplete"
+		OVERALL_STATUS="incomplete"
+		FAILURE_CLASS="ragflow_chunk_create_failed"
+		FAILURE_REASON="RAGFlow chunk creation did not return a successful response."
+		return 0
+	fi
+
+	jq -n \
+		--arg question "Which RAGFlow smoke evidence token maps to ragflow-smoke-anchor?" \
+		--arg dataset_id "${DATASET_ID}" \
+		--arg document_id "${DOCUMENT_ID}" \
+		'{
+			question: $question,
+			dataset_ids: [$dataset_id],
+			document_ids: [$document_id],
+			page: 1,
+			page_size: 5,
+			similarity_threshold: 0.0,
+			vector_similarity_weight: 0.0,
+			top_k: 5,
+			keyword: true,
+			highlight: false
+		}' >"${RETRIEVAL_REQUEST}"
+
+	if api_json_request POST "/api/v1/retrieval" "${RETRIEVAL_REQUEST}" "${RETRIEVAL_RESPONSE}" \
+		&& response_code_ok "${RETRIEVAL_RESPONSE}"; then
+		RETRIEVAL_STEP_STATUS="pass"
+	else
+		RETRIEVAL_STEP_STATUS="incomplete"
+		RUN_STATUS="incomplete"
+		RESULT_STATUS="incomplete"
+		OVERALL_STATUS="incomplete"
+		FAILURE_CLASS="ragflow_retrieval_failed"
+		FAILURE_REASON="RAGFlow retrieval did not return a successful response."
+		return 0
+	fi
+
+	jq \
+		--arg evidence_id "${EVIDENCE_ID}" \
+		--arg token "${EVIDENCE_TOKEN}" \
+		--arg document_name "${DOCUMENT_NAME}" '
+		def chunk_array:
+			if (.data.chunks? | type) == "array" then .data.chunks
+			elif (.reference.chunks? | type) == "array" then .reference.chunks
+			else [] end;
+		chunk_array
+		| map({
+			chunk_id: (.id // .chunk_id // ""),
+			content: (.content // .content_with_weight // ""),
+			document_id: (.document_id // .doc_id // ""),
+			document_name: (.document_name // .document_keyword // .doc_name // .docnm_kwd // ""),
+			dataset_id: (.dataset_id // .kb_id // ""),
+			positions: (.positions // []),
+			similarity: (.similarity // null),
+			vector_similarity: (.vector_similarity // null),
+			term_similarity: (.term_similarity // null),
+			evidence_ids: (
+				if (((.content // .content_with_weight // "") | contains($token))
+					or ((.document_name // .document_keyword // .doc_name // .docnm_kwd // "") == $document_name))
+				then [$evidence_id]
+				else []
+				end
+			),
+			mapping_status: (
+				if ((.content // .content_with_weight // "") | contains($token)) then "matched_content"
+				elif ((.document_name // .document_keyword // .doc_name // .docnm_kwd // "") == $document_name) then "matched_document"
+				else "unmatched"
+				end
+			)
+		})' "${RETRIEVAL_RESPONSE}" >"${REFERENCE_MAPPING}"
+
+	RUN_STATUS="pass"
+	EVIDENCE_CLASS="live_real_world"
+
+	if jq -e --arg evidence_id "${EVIDENCE_ID}" '
+		length > 0 and any(.[]; (.evidence_ids // []) | index($evidence_id))
+	' "${REFERENCE_MAPPING}" >/dev/null; then
+		RESULT_STATUS="pass"
+		OVERALL_STATUS="pass"
+		FAILURE_CLASS=""
+		FAILURE_REASON=""
+	else
+		RESULT_STATUS="wrong_result"
+		OVERALL_STATUS="wrong_result"
+		FAILURE_CLASS="ragflow_reference_mapping_missing"
+		FAILURE_REASON="RAGFlow retrieval returned chunks but none mapped to the generated evidence id."
+	fi
+}
+
+cleanup_stack() {
+	local repo_dir="${WORK_DIR}/ragflow"
+
+	if [[ "${STARTED}" != "true" || "${CLEANUP}" != "1" || ! -d "${repo_dir}/docker" ]]; then
+		return 0
+	fi
+
+	(
+		cd "${repo_dir}/docker"
+		docker compose -p "${COMPOSE_PROJECT}" -f docker-compose.yml down -v
+	) >"${COMPOSE_DOWN_LOG}" 2>&1 || true
+}
diff --git a/scripts/ragflow_smoke/common.sh b/scripts/ragflow_smoke/common.sh
new file mode 100644
index 00000000..405af356
--- /dev/null
+++ b/scripts/ragflow_smoke/common.sh
@@ -0,0 +1,96 @@
+# RAGFlow Docker evidence smoke helper functions.
+# Sourced by scripts/ragflow-docker-evidence-smoke.sh.
+
+required_command() {
+	local cmd="$1"
+	if ! command -v "${cmd}" >/dev/null 2>&1; then
+		echo "Missing ${cmd}; cannot write RAGFlow smoke artifacts." >&2
+		exit 1
+	fi
+}
+
+optional_command_status() {
+	local cmd="$1"
+	if command -v "${cmd}" >/dev/null 2>&1; then
+		printf 'available'
+	else
+		printf 'missing'
+	fi
+}
+
+relative_path() {
+	local path="$1"
+	if [[ "${path}" == "${ROOT_DIR}/"* ]]; then
+		printf '%s' "${path#"${ROOT_DIR}/"}"
+	else
+		printf '%s' "${path}"
+	fi
+}
+
+json_status() {
+	local status="$1"
+	case "${status}" in
+		real | mocked | unsupported | blocked | incomplete | wrong_result | lifecycle_fail | pass | not_encoded)
+			printf '%s' "${status}"
+			;;
+		*)
+			printf 'incomplete'
+			;;
+	esac
+}
+
+capture_docker_info() {
+	if docker info --format '{{json .}}' >"${DOCKER_INFO}" 2>"${ARTIFACT_DIR}/docker-info.stderr"; then
+		return 0
+	fi
+
+	jq -n --rawfile stderr "${ARTIFACT_DIR}/docker-info.stderr" '{
+		error: "docker_info_failed",
+		stderr: $stderr
+	}' >"${DOCKER_INFO}"
+	return 1
+}
+
+capture_disk_info() {
+	docker system df >"${DOCKER_DF}" 2>/dev/null || true
+}
+
+capture_vm_max_map_count() {
+	if VM_MAX_MAP_COUNT="$(sysctl -n vm.max_map_count 2>/dev/null)"; then
+		if [[ "${VM_MAX_MAP_COUNT}" =~ ^[0-9]+$ ]] && [[ "${VM_MAX_MAP_COUNT}" -ge 262144 ]]; then
+			VM_MAX_MAP_COUNT_STATUS="pass"
+		elif [[ "${VM_MAX_MAP_COUNT}" =~ ^[0-9]+$ ]]; then
+			VM_MAX_MAP_COUNT_STATUS="blocked"
+		else
+			VM_MAX_MAP_COUNT_STATUS="not_observed"
+		fi
+	else
+		VM_MAX_MAP_COUNT=""
+		VM_MAX_MAP_COUNT_STATUS="not_observed"
+	fi
+}
+
+capture_image_info() {
+	if [[ "${PULL_IMAGE}" == "1" && "${ACCEPT_RESOURCE_ENVELOPE}" == "1" ]]; then
+		docker pull "${RAGFLOW_IMAGE}" >"${ARTIFACT_DIR}/docker-pull.log" 2>&1 || true
+	fi
+
+	if docker image inspect "${RAGFLOW_IMAGE}" >"${IMAGE_INSPECT}" 2>/dev/null; then
+		IMAGE_PRESENT="true"
+		IMAGE_SIZE_BYTES="$(jq -r '.[0].Size // ""' "${IMAGE_INSPECT}")"
+	else
+		printf '[]\n' >"${IMAGE_INSPECT}"
+	fi
+}
+
+update_env_var() {
+	local file="$1"
+	local key="$2"
+	local value="$3"
+
+	if grep -q "^${key}=" "${file}"; then
+		sed -i.bak "s|^${key}=.*|${key}=${value}|" "${file}"
+	else
+		printf '\n%s=%s\n' "${key}" "${value}" >>"${file}"
+	fi
+}
diff --git a/scripts/ragflow_smoke/docker.sh b/scripts/ragflow_smoke/docker.sh
new file mode 100644
index 00000000..6fd98136
--- /dev/null
+++ b/scripts/ragflow_smoke/docker.sh
@@ -0,0 +1,135 @@
+# RAGFlow Docker evidence smoke helper functions.
+# Sourced by scripts/ragflow-docker-evidence-smoke.sh.
+
+prepare_official_ragflow_repo() {
+	local repo_dir="${WORK_DIR}/ragflow"
+
+	if [[ ! -d "${repo_dir}/.git" ]]; then
+		rm -rf "${repo_dir}"
+		git clone --depth 1 --branch "${RAGFLOW_REF}" "${RAGFLOW_REPO_URL}" "${repo_dir}" \
+			>"${ARTIFACT_DIR}/ragflow-git-clone.log" 2>&1
+	else
+		git -C "${repo_dir}" fetch --depth 1 origin "${RAGFLOW_REF}" \
+			>"${ARTIFACT_DIR}/ragflow-git-fetch.log" 2>&1
+		git -C "${repo_dir}" checkout -f FETCH_HEAD \
+			>"${ARTIFACT_DIR}/ragflow-git-checkout.log" 2>&1
+	fi
+
+	update_env_var "${repo_dir}/docker/.env" "DEVICE" "${CPU_GPU_MODE}"
+	update_env_var "${repo_dir}/docker/.env" "SVR_WEB_HTTP_PORT" "${ELF_RAGFLOW_WEB_HTTP_PORT:-18080}"
+	update_env_var "${repo_dir}/docker/.env" "SVR_WEB_HTTPS_PORT" "${ELF_RAGFLOW_WEB_HTTPS_PORT:-18443}"
+	update_env_var "${repo_dir}/docker/.env" "SVR_HTTP_PORT" "${API_PORT}"
+	update_env_var "${repo_dir}/docker/.env" "ADMIN_SVR_HTTP_PORT" "${ELF_RAGFLOW_ADMIN_PORT:-19381}"
+	update_env_var "${repo_dir}/docker/.env" "SVR_MCP_PORT" "${ELF_RAGFLOW_MCP_PORT:-19382}"
+	update_env_var "${repo_dir}/docker/.env" "GO_HTTP_PORT" "${ELF_RAGFLOW_GO_HTTP_PORT:-19384}"
+	update_env_var "${repo_dir}/docker/.env" "GO_ADMIN_PORT" "${ELF_RAGFLOW_GO_ADMIN_PORT:-19383}"
+	update_env_var "${repo_dir}/docker/.env" "EXPOSE_MYSQL_PORT" "${ELF_RAGFLOW_MYSQL_PORT:-13306}"
+	update_env_var "${repo_dir}/docker/.env" "MINIO_CONSOLE_PORT" "${ELF_RAGFLOW_MINIO_CONSOLE_PORT:-19001}"
+	update_env_var "${repo_dir}/docker/.env" "MINIO_PORT" "${ELF_RAGFLOW_MINIO_PORT:-19000}"
+	update_env_var "${repo_dir}/docker/.env" "REDIS_PORT" "${ELF_RAGFLOW_REDIS_PORT:-16379}"
+	update_env_var "${repo_dir}/docker/.env" "ES_PORT" "${ELF_RAGFLOW_ES_PORT:-11200}"
+	update_env_var "${repo_dir}/docker/.env" "OS_PORT" "${ELF_RAGFLOW_OS_PORT:-11201}"
+	update_env_var "${repo_dir}/docker/.env" "RAGFLOW_IMAGE" "${RAGFLOW_IMAGE}"
+
+	printf '%s' "${repo_dir}"
+}
+
+run_with_timeout_if_available() {
+	local seconds="$1"
+	shift
+
+	if command -v timeout >/dev/null 2>&1; then
+		timeout "${seconds}" "$@"
+	else
+		"$@"
+	fi
+}
+
+start_ragflow_stack() {
+	local repo_dir="$1"
+	local started_at ended_at
+	started_at="$(date +%s)"
+
+	if (
+		cd "${repo_dir}/docker"
+		run_with_timeout_if_available "${COMPOSE_TIMEOUT_SECONDS}" \
+			docker compose -p "${COMPOSE_PROJECT}" -f docker-compose.yml up -d
+	) >"${COMPOSE_UP_LOG}" 2>&1; then
+		STARTED="true"
+		SETUP_STATUS="pass"
+		FAILURE_CLASS=""
+		FAILURE_REASON=""
+	else
+		SETUP_STATUS="incomplete"
+		OVERALL_STATUS="incomplete"
+		RESULT_STATUS="incomplete"
+		FAILURE_CLASS="ragflow_compose_start_failed"
+		FAILURE_REASON="Official RAGFlow Docker Compose did not start successfully; see compose-up.log in the artifact directory."
+	fi
+
+	ended_at="$(date +%s)"
+	STARTUP_TIME_MS="$(((ended_at - started_at) * 1000))"
+}
+
+wait_for_ragflow_api() {
+	local attempt code
+
+	for attempt in $(seq 1 "${STARTUP_ATTEMPTS}"); do
+		code="$(curl -sS -o /dev/null -w '%{http_code}' "${API_BASE}/api/v1/system/healthz" 2>/dev/null || true)"
+		jq -nc --argjson attempt "${attempt}" --arg code "${code}" --arg url "${API_BASE}/api/v1/system/healthz" '{
+			attempt: $attempt,
+			url: $url,
+			http_code: $code
+		}' >>"${STARTUP_ATTEMPTS_JSONL}"
+
+		if [[ "${code}" == "200" ]]; then
+			return 0
+		fi
+
+		sleep "${STARTUP_INTERVAL_SECONDS}"
+	done
+
+	return 1
+}
+
+api_json_request() {
+	local method="$1"
+	local path="$2"
+	local request_file="$3"
+	local response_file="$4"
+	local stderr_file="${response_file}.stderr"
+	local code
+
+	code="$(curl -sS -X "${method}" \
+		-o "${response_file}" \
+		-w '%{http_code}' \
+		-H 'Content-Type: application/json' \
+		-H "Authorization: Bearer ${API_KEY}" \
+		--data-binary @"${request_file}" \
+		"${API_BASE}${path}" 2>"${stderr_file}" || true)"
+
+	jq -n --arg code "${code}" --rawfile stderr "${stderr_file}" '{
+		http_code: $code,
+		stderr: $stderr
+	}' >"${response_file}.meta.json"
+
+	[[ "${code}" =~ ^2 ]]
+}
+
+response_code_ok() {
+	local response_file="$1"
+
+	jq -e '(.code? == 0) or (.id? != null) or (.data? != null)' "${response_file}" >/dev/null 2>&1
+}
+
+extract_id() {
+	local response_file="$1"
+	jq -r '
+		.data.id
+		// .data[0].id
+		// .data.document_id
+		// .data.chunk_id
+		// .id
+		// empty
+	' "${response_file}"
+}
diff --git a/scripts/ragflow_smoke/fixture.sh b/scripts/ragflow_smoke/fixture.sh
new file mode 100644
index 00000000..1b98e6c8
--- /dev/null
+++ b/scripts/ragflow_smoke/fixture.sh
@@ -0,0 +1,157 @@
+# RAGFlow Docker evidence smoke helper functions.
+# Sourced by scripts/ragflow-docker-evidence-smoke.sh.
+
+write_fixture() {
+	local result_status reason
+	result_status="$(json_status "${RESULT_STATUS}")"
+	reason="${FAILURE_REASON}"
+
+	jq -n \
+		--arg run_id "${RUN_ID}" \
+		--arg evidence_id "${EVIDENCE_ID}" \
+		--arg evidence_token "${EVIDENCE_TOKEN}" \
+		--arg corpus_text "${CORPUS_TEXT}" \
+		--arg result_status "${result_status}" \
+		--arg failure_reason "${reason}" \
+		'{
+			schema: "elf.real_world_job/v1",
+			job_id: "ragflow-evidence-smoke-001",
+			suite: "retrieval",
+			title: "Map RAGFlow reference chunks to generated evidence",
+			corpus: {
+				corpus_id: "ragflow-generated-public-smoke",
+				profile: "generated_public",
+				items: [
+					{
+						evidence_id: $evidence_id,
+						kind: "document",
+						text: $corpus_text,
+						source_ref: {
+							schema: "source_ref/v1",
+							resolver: "ragflow_smoke/v1",
+							ref: {
+								run_id: $run_id,
+								evidence_token: $evidence_token
+							}
+						},
+						created_at: "2026-06-10T00:00:00Z"
+					}
+				],
+				adapter_response: {
+					adapter_id: "ragflow_docker_evidence_smoke",
+					answer: {
+						content: (
+							if $result_status == "pass" then
+								"RAGFlow returned reference chunks that map to the generated ragflow-smoke-anchor evidence id."
+							else
+								""
+							end
+						),
+						claims: (
+							if $result_status == "pass" then
+								[
+									{
+										claim_id: "ragflow_reference_mapping",
+										text: "RAGFlow reference chunks map to the generated ragflow-smoke-anchor evidence id.",
+										evidence_ids: [$evidence_id],
+										confidence: "derived_from_ragflow_reference_chunk_mapping"
+									}
+								]
+							else
+								[]
+							end
+						),
+						evidence_ids: (if $result_status == "pass" then [$evidence_id] else [] end),
+						latency_ms: 0.0,
+						cost: {
+							currency: "USD",
+							amount: 0.0,
+							input_tokens: 0,
+							output_tokens: 0
+						}
+					}
+				}
+			},
+			timeline: [
+				{
+					event_id: "ragflow-smoke-corpus-generated",
+					ts: "2026-06-10T00:00:00Z",
+					actor: "system",
+					action: "generated_public_corpus",
+					evidence_ids: [$evidence_id],
+					summary: "The RAGFlow smoke generated a tiny public corpus for reference chunk mapping."
+				}
+			],
+			prompt: {
+				role: "user",
+				content: "Which RAGFlow smoke evidence token maps to the generated reference chunk?",
+				job_mode: "answer",
+				constraints: ["cite_evidence", "avoid_broad_quality_claims"]
+			},
+			expected_answer: {
+				must_include: [
+					{
+						claim_id: "ragflow_reference_mapping",
+						text: "RAGFlow reference chunks map to the generated ragflow-smoke-anchor evidence id."
+					}
+				],
+				must_not_include: ["RAGFlow passed a broad graph/RAG quality benchmark."],
+				evidence_links: {
+					ragflow_reference_mapping: [$evidence_id]
+				},
+				answer_type: "direct_answer",
+				accepted_alternates: [],
+				requires_caveat: true,
+				requires_refusal: false
+			},
+			required_evidence: [
+				{
+					evidence_id: $evidence_id,
+					claim_id: "ragflow_reference_mapping",
+					requirement: "cite",
+					quote: "ragflow-smoke-anchor evidence id"
+				}
+			],
+			negative_traps: [],
+			scoring_rubric: {
+				dimensions: {
+					answer_correctness: {
+						weight: 0.3,
+						max_points: 1.0,
+						criteria: "States the generated evidence mapping without broad quality claims."
+					},
+					evidence_grounding: {
+						weight: 0.45,
+						max_points: 1.0,
+						criteria: "Maps returned RAGFlow reference chunks to the generated evidence id."
+					},
+					trap_avoidance: {
+						weight: 0.15,
+						max_points: 1.0,
+						criteria: "Does not claim broad RAGFlow quality from the tiny smoke."
+					},
+					latency_resource: {
+						weight: 0.1,
+						max_points: 1.0,
+						criteria: "Records setup, resource, provider, and reference-mapping boundaries."
+					}
+				},
+				pass_threshold: 0.75,
+				hard_fail_rules: []
+			},
+			allowed_uncertainty: {
+				can_answer_unknown: false,
+				acceptable_phrases: ["tiny generated corpus", "reference chunk smoke only"],
+				fallback_action: "state_blocker"
+			},
+			operator_debug: null,
+			encoding: {},
+			memory_evolution: null,
+			tags: ["external_adapter", "generated_public", "ragflow", "no_live_claim"]
+		}
+		| if ["blocked", "incomplete", "not_encoded"] | index($result_status) then
+			.encoding = {status: $result_status, reason: $failure_reason}
+		else
+			.
+		end' >"${FIXTURE_PATH}"
+}
diff --git a/scripts/ragflow_smoke/manifest.sh b/scripts/ragflow_smoke/manifest.sh
new file mode 100644
index 00000000..ecbb9b61
--- /dev/null
+++ b/scripts/ragflow_smoke/manifest.sh
@@ -0,0 +1,169 @@
+# RAGFlow Docker evidence smoke helper functions.
+# Sourced by scripts/ragflow-docker-evidence-smoke.sh.
+
+write_manifest() {
+	local generated_at out_rel manifest_rel retrieval_suite_status production_ops_status capability_retrieval_status capability_setup_status
+	generated_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
+	out_rel="$(relative_path "${OUT}")"
+	manifest_rel="$(relative_path "${MANIFEST_OUT}")"
+	retrieval_suite_status="$(json_status "${RESULT_STATUS}")"
+	capability_retrieval_status="$(json_status "${RESULT_STATUS}")"
+	capability_setup_status="$(json_status "${SETUP_STATUS}")"
+	production_ops_status="not_encoded"
+
+	jq -n \
+		--arg generated_at "${generated_at}" \
+		--arg manifest_id "ragflow-docker-evidence-smoke-${RUN_ID}" \
+		--arg out_rel "${out_rel}" \
+		--arg manifest_rel "${manifest_rel}" \
+		--arg evidence_class "${EVIDENCE_CLASS}" \
+		--arg overall_status "$(json_status "${OVERALL_STATUS}")" \
+		--arg setup_status "$(json_status "${SETUP_STATUS}")" \
+		--arg run_status "$(json_status "${RUN_STATUS}")" \
+		--arg result_status "$(json_status "${RESULT_STATUS}")" \
+		--arg retrieval_suite_status "${retrieval_suite_status}" \
+		--arg production_ops_status "${production_ops_status}" \
+		--arg capability_setup_status "${capability_setup_status}" \
+		--arg capability_retrieval_status "${capability_retrieval_status}" \
+		--arg ragflow_image "${RAGFLOW_IMAGE}" \
+		--arg cpu_gpu_mode "${CPU_GPU_MODE}" \
+		--arg failure_reason "${FAILURE_REASON}" \
+		--arg host_global_installs_required "${HOST_GLOBAL_INSTALLS_REQUIRED}" \
+		'{
+			schema: "elf.real_world_external_adapter_manifest/v1",
+			manifest_id: $manifest_id,
+			docker_isolation: {
+				default: true,
+				compose_file: "official RAGFlow docker/docker-compose.yml",
+				runner: "scripts/ragflow-docker-evidence-smoke.sh",
+				artifact_dir: "tmp/real-world-memory/ragflow-smoke",
+				host_global_installs_required: ($host_global_installs_required == "true"),
+				notes: [
+					"Generated by the RAGFlow evidence-smoke script at " + $generated_at + ".",
+					"The smoke uses a generated public corpus and does not use private corpus or operator-owned provider credentials."
+				]
+			},
+			adapters: [
+				{
+					adapter_id: "ragflow_docker_evidence_smoke",
+					project: "RAGFlow",
+					adapter_kind: "docker_service_evidence_smoke",
+					evidence_class: $evidence_class,
+					docker_default: true,
+					host_global_installs_required: ($host_global_installs_required == "true"),
+					overall_status: $overall_status,
+					setup: {
+						status: $setup_status,
+						evidence: "Official RAGFlow Docker Compose boundary and resource envelope were evaluated for the tiny evidence smoke.",
+						command: "cargo make smoke-ragflow-docker",
+						artifact: $out_rel
+					},
+					run: {
+						status: $run_status,
+						evidence: "The smoke attempts dataset creation, empty-document corpus ingest, chunk insert, retrieval query, and reference chunk extraction.",
+						command: "ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make smoke-ragflow-docker",
+						artifact: $out_rel
+					},
+					result: {
+						status: $result_status,
+						evidence: (
+							if $failure_reason == "" then "Returned RAGFlow reference chunks were mapped to generated real_world_job evidence ids for the smoke only."
+							else $failure_reason
+							end
+						),
+						artifact: $out_rel
+					},
+					capabilities: [
+						{
+							capability: "official_docker_service_boundary",
+							status: $capability_setup_status,
+							evidence: "The script uses the official RAGFlow Docker Compose setup and records image, disk, startup, CPU/GPU, and vm.max_map_count evidence."
+						},
+						{
+							capability: "dataset_or_chunk_ingest",
+							status: $run_status,
+							evidence: "The live path creates a generated public dataset, empty document, and chunk before querying."
+						},
+						{
+							capability: "retrieval_reference_mapping",
+							status: $capability_retrieval_status,
+							evidence: "The script maps returned chunk id, document id, document name, dataset id, positions, and similarity fields to benchmark evidence ids."
+						},
+						{
+							capability: "quality_or_scale_claim",
+							status: "not_encoded",
+							evidence: "The smoke does not run broad RAGFlow quality scoring, scale tests, private corpora, or comparative ranking claims."
+						}
+					],
+					suites: [
+						{
+							suite_id: "retrieval",
+							status: $retrieval_suite_status,
+							evidence: "Only the generated-public RAGFlow evidence-smoke retrieval path is represented."
+						},
+						{
+							suite_id: "production_ops",
+							status: $production_ops_status,
+							evidence: "Resource envelope evidence is recorded, but no production-ops suite scoring is encoded."
+						},
+						{
+							suite_id: "knowledge_compilation",
+							status: "not_encoded",
+							evidence: "RAGFlow page or knowledge-compilation behavior is not part of this smoke."
+						}
+					],
+					evidence: [
+						{
+							kind: "artifact",
+							ref: $out_rel,
+							status: $result_status
+						},
+						{
+							kind: "manifest",
+							ref: $manifest_rel,
+							status: $overall_status
+						},
+						{
+							kind: "source",
+							ref: "https://ragflow.io/docs/",
+							status: "real"
+						},
+						{
+							kind: "source",
+							ref: "https://raw.githubusercontent.com/infiniflow/ragflow/main/docs/references/http_api_reference.md",
+							status: "real"
+						}
+					],
+					execution_metadata: {
+						sources: [
+							{
+								label: "RAGFlow quickstart",
+								url: "https://ragflow.io/docs/",
+								evidence: "Official Docker startup, resource envelope, vm.max_map_count, and provider configuration guidance."
+							},
+							{
+								label: "RAGFlow HTTP API reference",
+								url: "https://raw.githubusercontent.com/infiniflow/ragflow/main/docs/references/http_api_reference.md",
+								evidence: "Official dataset, document, chunk, retrieval, and reference-chunk field contract."
+							}
+						],
+						setup_path: "Run the official RAGFlow Docker Compose stack with generated public corpus only.",
+						runtime_boundary: "Official RAGFlow Docker Compose service boundary; no host-global RAGFlow install.",
+						resource_expectation: (
+							"RAGFlow image " + $ragflow_image + ", CPU/GPU mode " + $cpu_gpu_mode + ", official minimums 4 CPU cores, 16 GB RAM, 50 GB disk, and vm.max_map_count >= 262144."
+						),
+						retry_guidance: [
+							"Default command records a typed blocked preflight unless resource-heavy startup is explicitly enabled.",
+							"Set ELF_RAGFLOW_SMOKE_START=1 and ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 for a live Docker startup attempt.",
+							"Provide only a local self-hosted RAGFlow API key; do not use private corpora or operator-owned model provider credentials for this smoke."
+						],
+						research_depth: "D2 feasibility plus XY-885 evidence-smoke implementation; generated artifact decides live evidence class."
+					},
+					notes: [
+						"This adapter record is generated by a smoke artifact and must not be generalized into broad RAGFlow quality evidence.",
+						"Failure before query output remains typed as blocked, incomplete, or not_encoded."
+					]
+				}
+			]
+		}' >"${MANIFEST_OUT}"
+}
diff --git a/scripts/ragflow_smoke/materialization.sh b/scripts/ragflow_smoke/materialization.sh
new file mode 100644
index 00000000..78725d9d
--- /dev/null
+++ b/scripts/ragflow_smoke/materialization.sh
@@ -0,0 +1,245 @@
+# RAGFlow Docker evidence smoke helper functions.
+# Sourced by scripts/ragflow-docker-evidence-smoke.sh.
+
+write_artifact() {
+	local generated_at out_rel manifest_rel fixture_rel report_json_rel report_md_rel docker_status git_status curl_status jq_status
+	generated_at="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
+	out_rel="$(relative_path "${OUT}")"
+	manifest_rel="$(relative_path "${MANIFEST_OUT}")"
+	fixture_rel="$(relative_path "${FIXTURE_PATH}")"
+	report_json_rel="$(relative_path "${REPORT_JSON}")"
+	report_md_rel="$(relative_path "${REPORT_MD}")"
+	docker_status="$(optional_command_status docker)"
+	git_status="$(optional_command_status git)"
+	curl_status="$(optional_command_status curl)"
+	jq_status="$(optional_command_status jq)"
+
+	jq -n \
+		--arg schema "elf.ragflow_docker_evidence_smoke/v1" \
+		--arg run_id "${RUN_ID}" \
+		--arg generated_at "${generated_at}" \
+		--arg adapter_id "ragflow_docker_evidence_smoke" \
+		--arg evidence_class "${EVIDENCE_CLASS}" \
+		--arg overall_status "$(json_status "${OVERALL_STATUS}")" \
+		--arg setup_status "$(json_status "${SETUP_STATUS}")" \
+		--arg run_status "$(json_status "${RUN_STATUS}")" \
+		--arg result_status "$(json_status "${RESULT_STATUS}")" \
+		--arg failure_class "${FAILURE_CLASS}" \
+		--arg failure_reason "${FAILURE_REASON}" \
+		--arg out_rel "${out_rel}" \
+		--arg manifest_rel "${manifest_rel}" \
+		--arg fixture_rel "${fixture_rel}" \
+		--arg report_json_rel "${report_json_rel}" \
+		--arg report_md_rel "${report_md_rel}" \
+		--arg artifact_dir "$(relative_path "${ARTIFACT_DIR}")" \
+		--arg work_dir "$(relative_path "${WORK_DIR}")" \
+		--arg repo_url "${RAGFLOW_REPO_URL}" \
+		--arg ragflow_ref "${RAGFLOW_REF}" \
+		--arg ragflow_image "${RAGFLOW_IMAGE}" \
+		--arg compose_project "${COMPOSE_PROJECT}" \
+		--arg cpu_gpu_mode "${CPU_GPU_MODE}" \
+		--arg start_enabled "${START_RAGFLOW}" \
+		--arg accept_resource_envelope "${ACCEPT_RESOURCE_ENVELOPE}" \
+		--arg allow_arm "${ALLOW_ARM}" \
+		--arg pull_image "${PULL_IMAGE}" \
+		--arg cleanup "${CLEANUP}" \
+		--arg api_base "${API_BASE}" \
+		--arg api_key_provided "$([[ -n "${API_KEY}" ]] && printf true || printf false)" \
+		--arg startup_time_ms "${STARTUP_TIME_MS}" \
+		--arg started "${STARTED}" \
+		--arg startup_attempt_count "${STARTUP_ATTEMPTS}" \
+		--arg startup_interval_seconds "${STARTUP_INTERVAL_SECONDS}" \
+		--arg compose_timeout_seconds "${COMPOSE_TIMEOUT_SECONDS}" \
+		--arg evidence_id "${EVIDENCE_ID}" \
+		--arg document_name "${DOCUMENT_NAME}" \
+		--arg evidence_token "${EVIDENCE_TOKEN}" \
+		--arg corpus_text "${CORPUS_TEXT}" \
+		--arg dataset_id "${DATASET_ID}" \
+		--arg document_id "${DOCUMENT_ID}" \
+		--arg chunk_id "${CHUNK_ID}" \
+		--arg vm_max_map_count "${VM_MAX_MAP_COUNT}" \
+		--arg vm_max_map_count_status "${VM_MAX_MAP_COUNT_STATUS}" \
+		--arg vm_max_map_count_action "${VM_MAX_MAP_COUNT_ACTION}" \
+		--arg image_present "${IMAGE_PRESENT}" \
+		--arg image_size_bytes "${IMAGE_SIZE_BYTES}" \
+		--arg host_global_installs_required "${HOST_GLOBAL_INSTALLS_REQUIRED}" \
+		--arg docker_status "${docker_status}" \
+		--arg git_status "${git_status}" \
+		--arg curl_status "${curl_status}" \
+		--arg jq_status "${jq_status}" \
+		--arg dataset_step_status "$(json_status "${DATASET_STEP_STATUS}")" \
+		--arg document_step_status "$(json_status "${DOCUMENT_STEP_STATUS}")" \
+		--arg chunk_step_status "$(json_status "${CHUNK_STEP_STATUS}")" \
+		--arg retrieval_step_status "$(json_status "${RETRIEVAL_STEP_STATUS}")" \
+		--slurpfile docker_info "${DOCKER_INFO}" \
+		--slurpfile image_inspect "${IMAGE_INSPECT}" \
+		--slurpfile reference_mapping "${REFERENCE_MAPPING}" \
+		--rawfile docker_df "${DOCKER_DF}" \
+		--rawfile compose_up_log "${COMPOSE_UP_LOG}" \
+		--rawfile compose_down_log "${COMPOSE_DOWN_LOG}" \
+		--slurpfile dataset_response "${DATASET_RESPONSE}" \
+		--slurpfile document_response "${DOCUMENT_RESPONSE}" \
+		--slurpfile chunk_response "${CHUNK_RESPONSE}" \
+		--slurpfile retrieval_response "${RETRIEVAL_RESPONSE}" \
+		--slurpfile scored_benchmark "${SCORED_BENCHMARK}" \
+		--slurpfile startup_attempts <(jq -s '.' "${STARTUP_ATTEMPTS_JSONL}") \
+		'{
+			schema: $schema,
+			run_id: $run_id,
+			generated_at: $generated_at,
+			adapter_id: $adapter_id,
+			evidence_class: $evidence_class,
+			overall_status: $overall_status,
+			status_source: "smoke_materialization",
+			scored_benchmark: $scored_benchmark[0],
+			no_quality_claim: true,
+			failure: (
+				if $failure_class == "" then null
+				else {
+					class: $failure_class,
+					reason: $failure_reason
+				}
+				end
+			),
+			artifacts: {
+				smoke: $out_rel,
+				external_adapter_manifest: $manifest_rel,
+				generated_fixture: $fixture_rel,
+				scored_report_json: $report_json_rel,
+				scored_report_markdown: $report_md_rel,
+				artifact_dir: $artifact_dir,
+				work_dir: $work_dir
+			},
+			upstream: {
+				repository: $repo_url,
+				ref: $ragflow_ref,
+				quickstart: "https://ragflow.io/docs/",
+				http_api_reference: "https://raw.githubusercontent.com/infiniflow/ragflow/main/docs/references/http_api_reference.md",
+				api_key_guide: "https://ragflow.io/docs/acquire_ragflow_api_key"
+			},
+			docker_boundary: {
+				status: $setup_status,
+				official_compose_path: "ragflow/docker/docker-compose.yml",
+				compose_project: $compose_project,
+				image: $ragflow_image,
+				device: $cpu_gpu_mode,
+				start_enabled: ($start_enabled == "1"),
+				resource_envelope_accepted: ($accept_resource_envelope == "1"),
+				allow_arm: ($allow_arm == "1"),
+				pull_image_requested: ($pull_image == "1"),
+				cleanup_requested: ($cleanup == "1"),
+				host_global_installs_required: ($host_global_installs_required == "true"),
+				tooling: {
+					docker: $docker_status,
+					git: $git_status,
+					curl: $curl_status,
+					jq: $jq_status
+				}
+			},
+			setup: {
+				status: $setup_status,
+				command: "cargo make smoke-ragflow-docker",
+				live_command: "ELF_RAGFLOW_SMOKE_START=1 ELF_RAGFLOW_SMOKE_ACCEPT_RESOURCE_ENVELOPE=1 cargo make smoke-ragflow-docker",
+				started: ($started == "true"),
+				startup_time_ms: (if $startup_time_ms == "" then null else ($startup_time_ms | tonumber) end),
+				vm_max_map_count: {
+					status: $vm_max_map_count_status,
+					observed: (if $vm_max_map_count == "" then null else $vm_max_map_count end),
+					required_min: 262144,
+					action: $vm_max_map_count_action
+				},
+				image: {
+					present: ($image_present == "true"),
+					size_bytes: (if $image_size_bytes == "" then null else ($image_size_bytes | tonumber) end),
+					official_compressed_size_note: "RAGFlow quickstart lists the stable image at about 2 GB compressed.",
+					official_expanded_size_note: "RAGFlow quickstart says the image expands to about 7 GB once unpacked.",
+					inspect: ($image_inspect[0] // [])
+				},
+				resource_envelope: {
+					official_min_cpu_cores: 4,
+					official_min_ram_gb: 16,
+					official_min_disk_gb: 50,
+					docker_info: ($docker_info[0] // {}),
+					docker_system_df: $docker_df
+				},
+				provider_boundaries: {
+					ragflow_api_base: $api_base,
+					ragflow_api_key_provided: ($api_key_provided == "true"),
+					operator_owned_provider_credentials_used: false,
+					private_corpus_used: false,
+					generated_public_corpus_only: true,
+					external_llm_quality_scoring_claimed: false
+				},
+				retry_behavior: {
+					startup_poll_attempts_configured: ($startup_attempt_count | tonumber),
+					startup_interval_seconds: ($startup_interval_seconds | tonumber),
+					compose_timeout_seconds: ($compose_timeout_seconds | tonumber),
+					startup_attempts: ($startup_attempts[0] // [])
+				},
+				log_excerpt: {
+					compose_up: ($compose_up_log | split("\n") | .[0:40]),
+					compose_down: ($compose_down_log | split("\n") | .[0:20])
+				}
+			},
+			corpus: {
+				profile: "generated_public",
+				evidence_id: $evidence_id,
+				document_name: $document_name,
+				evidence_token: $evidence_token,
+				text: $corpus_text,
+				dataset_id: (if $dataset_id == "" then null else $dataset_id end),
+				document_id: (if $document_id == "" then null else $document_id end),
+				chunk_id: (if $chunk_id == "" then null else $chunk_id end)
+			},
+			run: {
+				status: $run_status,
+				steps: {
+					dataset_creation: {
+						status: $dataset_step_status,
+						request_artifact: "dataset-create-request.json",
+						response_artifact: "dataset-create-response.json",
+						response: ($dataset_response[0] // null)
+					},
+					document_creation: {
+						status: $document_step_status,
+						request_artifact: "document-create-request.json",
+						response_artifact: "document-create-response.json",
+						response: ($document_response[0] // null)
+					},
+					chunk_ingest: {
+						status: $chunk_step_status,
+						request_artifact: "chunk-create-request.json",
+						response_artifact: "chunk-create-response.json",
+						response: ($chunk_response[0] // null)
+					},
+					retrieval_query: {
+						status: $retrieval_step_status,
+						request_artifact: "retrieval-request.json",
+						response_artifact: "retrieval-response.json",
+						response: ($retrieval_response[0] // null)
+					}
+				}
+			},
+			result: {
+				status: $result_status,
+				evidence: "RAGFlow retrieval reference chunks are mapped to real_world_job evidence ids when content or document metadata matches the generated public corpus.",
+				reference_chunk_count: (($reference_mapping[0] // []) | length),
+				mapped_reference_chunk_count: (($reference_mapping[0] // []) | map(select((.evidence_ids // []) | length > 0)) | length)
+			},
+			evidence_mapping: {
+				expected_evidence_ids: [$evidence_id],
+				reference_chunks: ($reference_mapping[0] // []),
+				field_mapping: {
+					"id": "chunk_id",
+					"document_id": "document_id",
+					"document_name_or_document_keyword": "document_name",
+					"dataset_id_or_kb_id": "dataset_id",
+					"content_or_content_with_weight": "content",
+					"positions": "positions",
+					"similarity": "similarity",
+					"vector_similarity": "vector_similarity",
+					"term_similarity": "term_similarity"
+				}
+			}
+		}' >"${OUT}"
+}
diff --git a/scripts/ragflow_smoke/scoring.sh b/scripts/ragflow_smoke/scoring.sh
new file mode 100644
index 00000000..367ef776
--- /dev/null
+++ b/scripts/ragflow_smoke/scoring.sh
@@ -0,0 +1,60 @@
+# RAGFlow Docker evidence smoke helper functions.
+# Sourced by scripts/ragflow-docker-evidence-smoke.sh.
+
+write_scored_benchmark() {
+	if [[ -s "${REPORT_JSON}" ]]; then
+		jq 'def count($key): (.summary[$key] // 0);
+			def scored_status:
+				if count("wrong_result") > 0 then "wrong_result"
+				elif count("lifecycle_fail") > 0 then "lifecycle_fail"
+				elif count("incomplete") > 0 then "incomplete"
+				elif count("blocked") > 0 then "blocked"
+				elif count("not_encoded") > 0 then "not_encoded"
+				elif count("pass") > 0 then "pass"
+				else "not_encoded"
+				end;
+			{
+				schema: "elf.scored_benchmark_status/v1",
+				source: "real_world_job_benchmark",
+				status: scored_status,
+				counts: {
+					pass: count("pass"),
+					wrong_result: count("wrong_result"),
+					lifecycle_fail: count("lifecycle_fail"),
+					incomplete: count("incomplete"),
+					blocked: count("blocked"),
+					not_encoded: count("not_encoded")
+				},
+				job_count: (.summary.job_count // 0),
+				mean_score: (.summary.mean_score // null),
+				evidence_coverage: (.summary.evidence_coverage // null)
+			}' "${REPORT_JSON}" >"${SCORED_BENCHMARK}"
+	else
+		jq -n '{
+			schema: "elf.scored_benchmark_status/v1",
+			source: "real_world_job_benchmark",
+			status: "pending",
+			reason: "The smoke materialization was written before benchmark scoring completed."
+		}' >"${SCORED_BENCHMARK}"
+	fi
+}
+
+write_scored_report() {
+	(
+		cd "${ROOT_DIR}"
+		cargo run -p elf-eval --bin real_world_job_benchmark -- run \
+			--fixtures "${FIXTURE_PATH}" \
+			--out "${REPORT_JSON}" \
+			--run-id real-world-memory-live-ragflow \
+			--adapter-id ragflow_docker_evidence_smoke \
+			--adapter-name "RAGFlow Docker evidence smoke adapter" \
+			--adapter-behavior docker_service_evidence_smoke \
+			--adapter-storage-status "$(json_status "${SETUP_STATUS}")" \
+			--adapter-runtime-status "$(json_status "${OVERALL_STATUS}")" \
+			--adapter-notes "Generated by the RAGFlow Docker evidence smoke; pass or wrong_result requires reference chunks mapped to generated evidence ids, while resource/setup/API-key limits remain typed." \
+			--external-adapter-manifest "${MANIFEST_OUT}"
+		cargo run -p elf-eval --bin real_world_job_benchmark -- publish \
+			--report "${REPORT_JSON}" \
+			--out "${REPORT_MD}"
+	)
+}
diff --git a/scripts/ragflow_smoke/summary.sh b/scripts/ragflow_smoke/summary.sh
new file mode 100644
index 00000000..7cb99214
--- /dev/null
+++ b/scripts/ragflow_smoke/summary.sh
@@ -0,0 +1,49 @@
+# RAGFlow Docker evidence smoke helper functions.
+# Sourced by scripts/ragflow-docker-evidence-smoke.sh.
+
+write_summary() {
+	jq -n \
+		--slurpfile materialization "${OUT}" \
+		--slurpfile manifest "${MANIFEST_OUT}" \
+		--slurpfile report "${REPORT_JSON}" \
+		'{
+			schema: "elf.ragflow_docker_smoke_summary/v1",
+				generated_at: (now | todateiso8601),
+				adapter_id: "ragflow_docker_evidence_smoke",
+				evidence_class: $materialization[0].evidence_class,
+				status_boundary: {
+					materialization: "setup/run/evidence-mapping state emitted by the smoke runner",
+					manifest: "external adapter declaration consumed by the scorer",
+					scored_benchmark: "post-score real_world_job outcome; use this for quality status"
+				},
+				scored_benchmark: $materialization[0].scored_benchmark,
+				materialization: $materialization[0],
+				manifest: {
+					json: ($materialization[0].artifacts.external_adapter_manifest // "tmp/real-world-memory/ragflow-smoke/memory_projects_manifest.ragflow-smoke.json"),
+					status_source: "external_adapter_manifest_pre_score",
+					summary: $manifest[0].adapters[0].overall_status,
+					suites: $manifest[0].adapters[0].suites
+			},
+			report: {
+				json: ($materialization[0].artifacts.scored_report_json // "tmp/real-world-memory/ragflow-smoke/ragflow-report.json"),
+				markdown: ($materialization[0].artifacts.scored_report_markdown // "tmp/real-world-memory/ragflow-smoke/ragflow-report.md"),
+				summary: $report[0].summary,
+				suites: $report[0].suites
+			}
+		}' >"${SUMMARY_OUT}"
+}
+
+write_outputs() {
+	write_scored_benchmark
+	write_artifact
+	write_manifest
+	write_fixture
+	write_scored_report
+	write_scored_benchmark
+	write_artifact
+	write_summary
+	echo "RAGFlow smoke artifact: ${OUT}"
+	echo "RAGFlow smoke manifest: ${MANIFEST_OUT}"
+	echo "RAGFlow smoke report: ${REPORT_JSON}"
+	echo "RAGFlow smoke summary: ${SUMMARY_OUT}"
+}

From 2076b24d18f6b4fbe311c792b9b11de943e87e35 Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Wed, 1 Jul 2026 05:50:18 -0400
Subject: [PATCH 2/4] {"schema":"decodex/commit/1","summary":"Clean Letta smoke
 context whitespace","authority":"manual"}

---
 scripts/letta_core_archive_smoke/context.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/letta_core_archive_smoke/context.py b/scripts/letta_core_archive_smoke/context.py
index 813eab1f..73e277e9 100644
--- a/scripts/letta_core_archive_smoke/context.py
+++ b/scripts/letta_core_archive_smoke/context.py
@@ -49,4 +49,3 @@
 STARTUP_INTERVAL_SECONDS = float(os.environ.get("ELF_LETTA_STARTUP_INTERVAL_SECONDS", "2"))
 
 CORE_KINDS = {"core_block", "core_block_contract", "core_block_event"}
-

From e4f9f320893fd3a6bc9da633c91964a2373baa7e Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Wed, 1 Jul 2026 05:59:48 -0400
Subject: [PATCH 3/4] {"schema":"decodex/commit/1","summary":"Address smoke
 runner review comments","authority":"manual"}

---
 scripts/graphiti_temporal_smoke/context.py    |  2 --
 scripts/graphiti_temporal_smoke/manifest.py   |  3 +--
 .../materialization.py                        | 23 ++++++++++++++++++-
 scripts/graphiti_temporal_smoke/runner.py     | 10 --------
 scripts/graphiti_temporal_smoke/runtime.py    | 21 +++++++++++++++--
 scripts/letta_core_archive_smoke/artifacts.py | 16 ++++++++++++-
 scripts/letta_core_archive_smoke/context.py   |  2 --
 scripts/letta_core_archive_smoke/runtime.py   | 14 +++++++++--
 8 files changed, 69 insertions(+), 22 deletions(-)

diff --git a/scripts/graphiti_temporal_smoke/context.py b/scripts/graphiti_temporal_smoke/context.py
index 442836e0..6884619d 100644
--- a/scripts/graphiti_temporal_smoke/context.py
+++ b/scripts/graphiti_temporal_smoke/context.py
@@ -6,8 +6,6 @@
 from datetime import datetime, timezone
 from pathlib import Path
 
-from typing import Any
-
 
 SCRIPT_DIR = Path(__file__).resolve().parent.parent
 ROOT_DIR = SCRIPT_DIR.parent
diff --git a/scripts/graphiti_temporal_smoke/manifest.py b/scripts/graphiti_temporal_smoke/manifest.py
index b8b66bd1..e67a81c2 100644
--- a/scripts/graphiti_temporal_smoke/manifest.py
+++ b/scripts/graphiti_temporal_smoke/manifest.py
@@ -2,11 +2,10 @@
 
 from __future__ import annotations
 
-from pathlib import Path
 from typing import Any
 
 from .common import rel, utc_now, write_json
-from .context import *  # noqa: F403
+from .context import FALKORDB_HOST, FALKORDB_PORT, GRAPHITI_REF, MANIFEST_OUT, OUT, RUN_ID, TIMEOUT_SECONDS
 from .models import StatusState
 
 def write_manifest(status: StatusState) -> dict[str, Any]:
diff --git a/scripts/graphiti_temporal_smoke/materialization.py b/scripts/graphiti_temporal_smoke/materialization.py
index f96fd4f0..1658789c 100644
--- a/scripts/graphiti_temporal_smoke/materialization.py
+++ b/scripts/graphiti_temporal_smoke/materialization.py
@@ -8,7 +8,28 @@
 
 from .benchmark import scored_benchmark
 from .common import command_to_json, dir_size, file_count, rel, utc_now, write_json
-from .context import *  # noqa: F403
+from .context import (
+    API_BASE,
+    API_KEY,
+    EMBEDDING_MODEL,
+    FALKORDB_DATABASE,
+    FALKORDB_HOST,
+    FALKORDB_PASSWORD,
+    FALKORDB_PORT,
+    FALKORDB_USERNAME,
+    GRAPHITI_PACKAGE,
+    GRAPHITI_REF,
+    LLM_MODEL,
+    MANIFEST_OUT,
+    OUT,
+    REPORT_JSON,
+    REPORT_MD,
+    RUN_ID,
+    RUN_LIVE,
+    SUMMARY_OUT,
+    TIMEOUT_SECONDS,
+    WORK_DIR,
+)
 from .models import CommandRecord, StatusState
 
 def write_materialization(
diff --git a/scripts/graphiti_temporal_smoke/runner.py b/scripts/graphiti_temporal_smoke/runner.py
index 16c20989..66ed3e40 100644
--- a/scripts/graphiti_temporal_smoke/runner.py
+++ b/scripts/graphiti_temporal_smoke/runner.py
@@ -120,16 +120,6 @@ def main() -> int:
                     status.failure_reason = mapping["reason"]
 
     fixture_path = write_fixture(facts, status, mapping)
-    materialization = write_materialization(
-        status,
-        facts,
-        fixture_path,
-        command_records,
-        inserted,
-        search_results,
-        mapping,
-        started_at,
-    )
     manifest = write_manifest(status)
     report = run_scored_report(fixture_path, MANIFEST_OUT, status)
     materialization = write_materialization(
diff --git a/scripts/graphiti_temporal_smoke/runtime.py b/scripts/graphiti_temporal_smoke/runtime.py
index ffe6dbab..ab224fee 100644
--- a/scripts/graphiti_temporal_smoke/runtime.py
+++ b/scripts/graphiti_temporal_smoke/runtime.py
@@ -10,8 +10,25 @@
 from pathlib import Path
 from typing import Any
 
-from .common import run_command, write_json
-from .context import *  # noqa: F403
+from .common import rel, run_command, write_json
+from .context import (
+    API_BASE,
+    API_KEY,
+    EMBEDDING_MODEL,
+    FALKORDB_DATABASE,
+    FALKORDB_HOST,
+    FALKORDB_PASSWORD,
+    FALKORDB_PORT,
+    FALKORDB_USERNAME,
+    GRAPHITI_PACKAGE,
+    INSTALL_GRAPHITI,
+    LLM_MODEL,
+    LOG_DIR,
+    RUN_ID,
+    STARTUP_ATTEMPTS,
+    STARTUP_INTERVAL_SECONDS,
+    WORK_DIR,
+)
 from .corpus import temporal_facts
 from .models import CommandRecord
 
diff --git a/scripts/letta_core_archive_smoke/artifacts.py b/scripts/letta_core_archive_smoke/artifacts.py
index e4e12276..c631b31e 100644
--- a/scripts/letta_core_archive_smoke/artifacts.py
+++ b/scripts/letta_core_archive_smoke/artifacts.py
@@ -8,7 +8,21 @@
 
 from .benchmark import scored_benchmark
 from .common import command_to_json, rel, utc_now, write_json
-from .context import *  # noqa: F403
+from .context import (
+    LETTA_BASE_URL,
+    LETTA_CLIENT_REF,
+    LETTA_EMBEDDING,
+    LETTA_MODEL,
+    MANIFEST_OUT,
+    OUT,
+    REPORT_JSON,
+    REPORT_MD,
+    RUN_ID,
+    RUN_LIVE,
+    SUMMARY_OUT,
+    TIMEOUT_SECONDS,
+    WORK_DIR,
+)
 from .fixtures import benchmark_input_contract
 from .models import CommandRecord, StatusState
 
diff --git a/scripts/letta_core_archive_smoke/context.py b/scripts/letta_core_archive_smoke/context.py
index 73e277e9..63152153 100644
--- a/scripts/letta_core_archive_smoke/context.py
+++ b/scripts/letta_core_archive_smoke/context.py
@@ -6,8 +6,6 @@
 from datetime import datetime, timezone
 from pathlib import Path
 
-from typing import Any
-
 
 SCRIPT_DIR = Path(__file__).resolve().parent.parent
 ROOT_DIR = SCRIPT_DIR.parent
diff --git a/scripts/letta_core_archive_smoke/runtime.py b/scripts/letta_core_archive_smoke/runtime.py
index 5eddd2b7..fa9c04be 100644
--- a/scripts/letta_core_archive_smoke/runtime.py
+++ b/scripts/letta_core_archive_smoke/runtime.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 import json
-import textwrap
+import sys
 import time
 import urllib.error
 import urllib.request
@@ -11,7 +11,17 @@
 from typing import Any
 
 from .common import run_command, write_json
-from .context import *  # noqa: F403
+from .context import (
+    INSTALL_CLIENT,
+    LETTA_BASE_URL,
+    LETTA_CLIENT_PACKAGE,
+    LETTA_EMBEDDING,
+    LETTA_MODEL,
+    RUN_ID,
+    STARTUP_ATTEMPTS,
+    STARTUP_INTERVAL_SECONDS,
+    WORK_DIR,
+)
 from .fixtures import benchmark_input_contract, slug
 from .models import CommandRecord
 

From 842b8f927e034aaab53225e94d2410d65a862ed4 Mon Sep 17 00:00:00 2001
From: Yvette Carlisle <y@acg.box>
Date: Wed, 1 Jul 2026 06:05:56 -0400
Subject: [PATCH 4/4] {"schema":"decodex/commit/1","summary":"Remove unused
 Letta runtime import","authority":"manual"}

---
 scripts/letta_core_archive_smoke/runtime.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/letta_core_archive_smoke/runtime.py b/scripts/letta_core_archive_smoke/runtime.py
index fa9c04be..d3959cc5 100644
--- a/scripts/letta_core_archive_smoke/runtime.py
+++ b/scripts/letta_core_archive_smoke/runtime.py
@@ -22,7 +22,7 @@
     STARTUP_INTERVAL_SECONDS,
     WORK_DIR,
 )
-from .fixtures import benchmark_input_contract, slug
+from .fixtures import benchmark_input_contract
 from .models import CommandRecord
 
 def wait_for_letta(command_records: list[CommandRecord]) -> bool: