Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,297 changes: 1 addition & 1,296 deletions scripts/graphiti-zep-docker-temporal-smoke.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions scripts/graphiti_temporal_smoke/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Graphiti/Zep temporal smoke runner modules."""
107 changes: 107 additions & 0 deletions scripts/graphiti_temporal_smoke/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""Scoring helpers for the Graphiti/Zep temporal smoke."""

from __future__ import annotations

import json
import subprocess
from pathlib import Path
from typing import Any

from .common import rel
from .context import REPORT_JSON, REPORT_MD, ROOT_DIR
from .models import StatusState

def run_scored_report(fixture_path: Path, manifest_path: Path, status: StatusState) -> dict[str, Any]:
"""Score the generated temporal smoke fixture through the real-world job runner."""

run_cmd = [
"cargo",
"run",
"-p",
"elf-eval",
"--bin",
"real_world_job_benchmark",
"--",
"run",
"--fixtures",
str(fixture_path),
"--out",
str(REPORT_JSON),
"--run-id",
"real-world-memory-live-graphiti-zep",
"--adapter-id",
"graphiti_zep_temporal_smoke",
"--adapter-name",
"Graphiti/Zep Docker temporal smoke adapter",
"--adapter-behavior",
"docker_python_falkordb_temporal_smoke",
"--adapter-storage-status",
status.setup,
"--adapter-runtime-status",
status.overall,
"--adapter-notes",
"Generated by the Graphiti/Zep Docker temporal smoke; pass or wrong_result requires current and historical validity-window facts mapped to generated evidence ids, while provider/setup limits remain typed.",
"--external-adapter-manifest",
str(manifest_path),
]
publish_cmd = [
"cargo",
"run",
"-p",
"elf-eval",
"--bin",
"real_world_job_benchmark",
"--",
"publish",
"--report",
str(REPORT_JSON),
"--out",
str(REPORT_MD),
]

subprocess.run(run_cmd, cwd=ROOT_DIR, check=True)
subprocess.run(publish_cmd, cwd=ROOT_DIR, check=True)

report = json.loads(REPORT_JSON.read_text(encoding="utf-8"))

return {
"json": rel(REPORT_JSON),
"markdown": rel(REPORT_MD),
"summary": report.get("summary", {}),
"suites": report.get("suites", []),
}

def scored_benchmark(report: dict[str, Any] | None) -> dict[str, Any]:
"""Extract the post-score benchmark status from a real_world_job report."""

if report is None:
return {
"schema": "elf.scored_benchmark_status/v1",
"source": "real_world_job_benchmark",
"status": "pending",
"reason": "The smoke materialization was written before benchmark scoring completed.",
}

summary = report.get("summary", {})
counts = {
status: int(summary.get(status, 0) or 0)
for status in (
"pass",
"wrong_result",
"lifecycle_fail",
"incomplete",
"blocked",
"not_encoded",
)
}
status = next((name for name, count in counts.items() if name != "pass" and count > 0), "pass")

return {
"schema": "elf.scored_benchmark_status/v1",
"source": "real_world_job_benchmark",
"status": status,
"counts": counts,
"job_count": int(summary.get("job_count", 0) or 0),
"mean_score": summary.get("mean_score"),
"evidence_coverage": summary.get("evidence_coverage"),
}
137 changes: 137 additions & 0 deletions scripts/graphiti_temporal_smoke/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
"""Shared filesystem and process helpers for the Graphiti/Zep smoke."""

from __future__ import annotations

import json
import os
import shutil
import subprocess
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

from .context import FIXTURE_DIR, LOG_DIR, REPORT_DIR, ROOT_DIR, TIMEOUT_SECONDS, WORK_DIR
from .models import CommandRecord

def utc_now() -> str:
"""Return an RFC3339 UTC timestamp."""

return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")

def rel(path: Path) -> str:
"""Return a repository-relative path when possible."""

try:
return str(path.resolve().relative_to(ROOT_DIR))
except ValueError:
return str(path)

def mkdirs() -> None:
"""Create output directories."""

for path in (REPORT_DIR, WORK_DIR, FIXTURE_DIR, LOG_DIR):
path.mkdir(parents=True, exist_ok=True)

def write_json(path: Path, payload: Any) -> None:
"""Write stable, pretty JSON."""

path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")

def command_available(command: str) -> bool:
"""Return whether a command is on PATH."""

return shutil.which(command) is not None

def dir_size(path: Path) -> int:
"""Return total file size for a directory or file."""

if not path.exists():
return 0
if path.is_file():
return path.stat().st_size

return sum(item.stat().st_size for item in path.rglob("*") if item.is_file())

def file_count(path: Path) -> int:
"""Return file count for a directory."""

if not path.exists():
return 0

return sum(1 for item in path.rglob("*") if item.is_file())

def command_to_json(record: CommandRecord) -> dict[str, Any]:
"""Serialize a command record."""

return {
"label": record.label,
"status": record.status,
"command": record.command,
"elapsed_ms": round(record.elapsed_ms, 3),
"stdout_artifact": record.stdout_artifact,
"stderr_artifact": record.stderr_artifact,
"returncode": record.returncode,
"reason": record.reason,
}

def run_command(
label: str,
command: list[str],
cwd: Path,
timeout: int = TIMEOUT_SECONDS,
extra_env: dict[str, str] | None = None,
) -> CommandRecord:
"""Run a subprocess and capture stdout/stderr artifacts."""

cwd.mkdir(parents=True, exist_ok=True)
stdout_path = LOG_DIR / f"{label}.stdout.log"
stderr_path = LOG_DIR / f"{label}.stderr.log"
env = os.environ.copy()

if extra_env:
env.update(extra_env)

started = time.monotonic()
try:
proc = subprocess.run(
command,
cwd=cwd,
env=env,
text=True,
capture_output=True,
timeout=timeout,
check=False,
)
elapsed_ms = (time.monotonic() - started) * 1000
stdout_path.write_text(proc.stdout, encoding="utf-8")
stderr_path.write_text(proc.stderr, encoding="utf-8")
status = "pass" if proc.returncode == 0 else "incomplete"
reason = "Command completed." if proc.returncode == 0 else f"Command exited {proc.returncode}."

return CommandRecord(
label=label,
command=command,
status=status,
elapsed_ms=elapsed_ms,
stdout_artifact=rel(stdout_path),
stderr_artifact=rel(stderr_path),
returncode=proc.returncode,
reason=reason,
)
except subprocess.TimeoutExpired as err:
elapsed_ms = (time.monotonic() - started) * 1000
stdout_path.write_text(err.stdout or "", encoding="utf-8")
stderr_path.write_text(err.stderr or "", encoding="utf-8")

return CommandRecord(
label=label,
command=command,
status="incomplete",
elapsed_ms=elapsed_ms,
stdout_artifact=rel(stdout_path),
stderr_artifact=rel(stderr_path),
returncode=None,
reason=f"Command timed out after {timeout} seconds.",
)
63 changes: 63 additions & 0 deletions scripts/graphiti_temporal_smoke/context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Configuration for the Graphiti/Zep temporal smoke."""

from __future__ import annotations

import os
from datetime import datetime, timezone
from pathlib import Path


SCRIPT_DIR = Path(__file__).resolve().parent.parent
ROOT_DIR = SCRIPT_DIR.parent
REPORT_DIR = Path(
os.environ.get(
"ELF_GRAPHITI_ZEP_SMOKE_REPORT_DIR",
ROOT_DIR / "tmp" / "real-world-memory" / "graphiti-zep-smoke",
)
)
WORK_DIR = Path(os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_WORK_DIR", REPORT_DIR / "work"))
OUT = Path(os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_OUT", REPORT_DIR / "graphiti-zep-smoke.json"))
MANIFEST_OUT = Path(
os.environ.get(
"ELF_GRAPHITI_ZEP_SMOKE_MANIFEST_OUT",
REPORT_DIR / "memory_projects_manifest.graphiti-zep-smoke.json",
)
)
SUMMARY_OUT = Path(os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_SUMMARY_OUT", REPORT_DIR / "summary.json"))
REPORT_JSON = Path(
os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_REPORT_JSON", REPORT_DIR / "graphiti-zep-report.json")
)
REPORT_MD = Path(
os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_REPORT_MD", REPORT_DIR / "graphiti-zep-report.md")
)
FIXTURE_DIR = REPORT_DIR / "graphiti-zep-fixtures"
LOG_DIR = REPORT_DIR / "logs"

RUN_ID = os.environ.get(
"ELF_GRAPHITI_ZEP_SMOKE_RUN_ID",
f"graphiti-zep-docker-smoke-{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}",
)
RUN_LIVE = os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_RUN", "0") == "1"
ALLOW_HOST = os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_ALLOW_HOST", "0") == "1"
INSTALL_GRAPHITI = os.environ.get("ELF_GRAPHITI_ZEP_SMOKE_INSTALL", "1") == "1"
GRAPHITI_VERSION = os.environ.get("ELF_GRAPHITI_ZEP_VERSION", "0.21.0")
GRAPHITI_PACKAGE = os.environ.get(
"ELF_GRAPHITI_ZEP_PACKAGE",
f"graphiti-core[falkordb]=={GRAPHITI_VERSION}",
)
GRAPHITI_REF = os.environ.get("ELF_GRAPHITI_ZEP_REF", f"pypi:{GRAPHITI_PACKAGE}")
FALKORDB_HOST = os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_HOST", "graphiti-falkordb")
FALKORDB_PORT = int(os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_PORT", "6379"))
FALKORDB_DATABASE = os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_DATABASE", "elf_graphiti_zep_smoke")
FALKORDB_USERNAME = os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_USERNAME", "")
FALKORDB_PASSWORD = os.environ.get("ELF_GRAPHITI_ZEP_FALKORDB_PASSWORD", "")
API_KEY = os.environ.get(
"ELF_GRAPHITI_ZEP_API_KEY",
os.environ.get("GRAPHITI_OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", "")),
)
API_BASE = os.environ.get("ELF_GRAPHITI_ZEP_API_BASE", os.environ.get("OPENAI_BASE_URL", ""))
LLM_MODEL = os.environ.get("ELF_GRAPHITI_ZEP_LLM_MODEL", "gpt-4o-mini")
EMBEDDING_MODEL = os.environ.get("ELF_GRAPHITI_ZEP_EMBEDDING_MODEL", "text-embedding-3-small")
TIMEOUT_SECONDS = int(os.environ.get("ELF_GRAPHITI_ZEP_TIMEOUT_SECONDS", "900"))
STARTUP_ATTEMPTS = int(os.environ.get("ELF_GRAPHITI_ZEP_STARTUP_ATTEMPTS", "30"))
STARTUP_INTERVAL_SECONDS = float(os.environ.get("ELF_GRAPHITI_ZEP_STARTUP_INTERVAL_SECONDS", "2"))
47 changes: 47 additions & 0 deletions scripts/graphiti_temporal_smoke/corpus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Generated temporal facts used by the Graphiti/Zep smoke."""

from __future__ import annotations

from typing import Any

def temporal_facts() -> list[dict[str, Any]]:
"""Return the generated-public temporal fact corpus."""

return [
{
"evidence_id": "graphiti-zep-old-owner",
"claim_id": "relation_historical_owner",
"source": "Team Delta",
"edge_name": "OWNED_REVIEW",
"target": "deployment method review",
"fact": "Team Delta owned deployment method review before 2026-06-06.",
"valid_at": "2026-06-05T00:00:00Z",
"invalid_at": "2026-06-08T00:00:00Z",
"created_at": "2026-06-05T00:00:00Z",
"current": False,
},
{
"evidence_id": "graphiti-zep-current-owner",
"claim_id": "relation_current_owner",
"source": "Team Echo",
"edge_name": "OWNS_REVIEW",
"target": "deployment method review",
"fact": "Team Echo owns deployment method review since 2026-06-08.",
"valid_at": "2026-06-08T00:00:00Z",
"invalid_at": None,
"created_at": "2026-06-08T00:00:00Z",
"current": True,
},
{
"evidence_id": "graphiti-zep-owner-rationale",
"claim_id": "relation_owner_update_rationale",
"source": "single-user production runbook scope",
"edge_name": "MOVED_OWNERSHIP_TO",
"target": "Team Echo",
"fact": "Ownership moved to Team Echo after single-user production runbook scope changed.",
"valid_at": "2026-06-08T00:05:00Z",
"invalid_at": None,
"created_at": "2026-06-08T00:05:00Z",
"current": True,
},
]
Loading