Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,477 changes: 1 addition & 1,476 deletions scripts/graphify-docker-graph-report-smoke.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions scripts/graphify_smoke/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""graphify Docker graph/report smoke modules."""
480 changes: 480 additions & 0 deletions scripts/graphify_smoke/artifacts.py

Large diffs are not rendered by default.

142 changes: 142 additions & 0 deletions scripts/graphify_smoke/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
from __future__ import annotations

import json
import subprocess
from dataclasses import replace
from pathlib import Path
from typing import Any

from .common import rel
from .context import REPORT_JSON, REPORT_MD, ROOT_DIR
from .models import StatusState



def run_scored_report(fixture_path: Path, manifest_path: Path, status: StatusState) -> dict[str, Any]:
"""Score the generated graphify fixture through the real-world job runner."""

run_cmd = [
"cargo",
"run",
"-p",
"elf-eval",
"--bin",
"real_world_job_benchmark",
"--",
"run",
"--fixtures",
str(fixture_path),
"--out",
str(REPORT_JSON),
"--run-id",
"real-world-memory-live-graphify",
"--adapter-id",
"graphify_docker_smoke",
"--adapter-name",
"graphify Docker graph/report smoke adapter",
"--adapter-behavior",
"docker_cli_graph_report_smoke",
"--adapter-storage-status",
status.setup,
"--adapter-runtime-status",
status.overall,
"--adapter-notes",
"Generated by the graphify Docker graph/report smoke; pass or wrong_result requires graph.json, GRAPH_REPORT.md, and query output mapped to generated evidence ids, while setup/runtime limits remain typed.",
"--external-adapter-manifest",
str(manifest_path),
]
publish_cmd = [
"cargo",
"run",
"-p",
"elf-eval",
"--bin",
"real_world_job_benchmark",
"--",
"publish",
"--report",
str(REPORT_JSON),
"--out",
str(REPORT_MD),
]

subprocess.run(run_cmd, cwd=ROOT_DIR, check=True)
subprocess.run(publish_cmd, cwd=ROOT_DIR, check=True)

report = json.loads(REPORT_JSON.read_text(encoding="utf-8"))

return {
"json": rel(REPORT_JSON),
"markdown": rel(REPORT_MD),
"summary": report.get("summary", {}),
"suites": report.get("suites", []),
}


def scored_benchmark(report: dict[str, Any] | None) -> dict[str, Any]:
"""Extract the post-score benchmark status from a real_world_job report."""

if report is None:
return {
"schema": "elf.scored_benchmark_status/v1",
"source": "real_world_job_benchmark",
"status": "pending",
"reason": "The smoke materialization was written before benchmark scoring completed.",
}

summary = report.get("summary", {})
counts = {
status: int(summary.get(status, 0) or 0)
for status in (
"pass",
"wrong_result",
"lifecycle_fail",
"incomplete",
"blocked",
"not_encoded",
)
}
status = next((name for name, count in counts.items() if name != "pass" and count > 0), "pass")

return {
"schema": "elf.scored_benchmark_status/v1",
"source": "real_world_job_benchmark",
"status": status,
"counts": counts,
"job_count": int(summary.get("job_count", 0) or 0),
"mean_score": summary.get("mean_score"),
"evidence_coverage": summary.get("evidence_coverage"),
}


def status_with_scored_result(status: StatusState, report: dict[str, Any]) -> StatusState:
"""Return a manifest status that follows the scored real_world_job outcome."""

scored = scored_benchmark(report)
scored_status = scored.get("status")
if scored_status not in {
"pass",
"wrong_result",
"lifecycle_fail",
"incomplete",
"blocked",
"not_encoded",
}:
return status

manifest_status = replace(status)
manifest_status.result = str(scored_status)
manifest_status.overall = str(scored_status)

if scored_status == "pass":
manifest_status.failure_class = ""
manifest_status.failure_reason = ""
elif scored_status == "wrong_result":
manifest_status.failure_class = "scored_benchmark_wrong_result"
manifest_status.failure_reason = (
"The graphify smoke materialized graph/report evidence, but the scored "
"real_world_job outcome is wrong_result; inspect graphify-report.json for "
"wrong-result signals."
)

return manifest_status
112 changes: 112 additions & 0 deletions scripts/graphify_smoke/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
from __future__ import annotations

import json
import shutil
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

from .context import (
FIXTURE_DIR,
LOG_DIR,
MANIFEST_OUT,
OUT,
OUTPUT_CAPTURE_DIR,
REPORT_DIR,
REPORT_JSON,
REPORT_MD,
ROOT_DIR,
SUMMARY_OUT,
WORK_DIR,
)



def utc_now() -> str:
"""Return an RFC3339 UTC timestamp."""

return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")


def rel(path: Path) -> str:
"""Return a repository-relative path when possible."""

try:
return str(path.resolve().relative_to(ROOT_DIR))
except ValueError:
return str(path)


def mkdirs() -> None:
"""Create and reset output directories owned by this smoke."""

for path in (FIXTURE_DIR, OUTPUT_CAPTURE_DIR, LOG_DIR):
if path.exists():
shutil.rmtree(path)

for path in (REPORT_DIR, WORK_DIR, FIXTURE_DIR, OUTPUT_CAPTURE_DIR, LOG_DIR):
path.mkdir(parents=True, exist_ok=True)

for path in (
OUT,
MANIFEST_OUT,
SUMMARY_OUT,
REPORT_JSON,
REPORT_MD,
REPORT_DIR / "generated-corpus.csv",
):
if path.exists():
path.unlink()


def write_json(path: Path, payload: Any) -> None:
"""Write stable, pretty JSON."""

path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")


def dir_size(path: Path) -> int:
"""Return total file size for a directory or file."""

if not path.exists():
return 0
if path.is_file():
return path.stat().st_size

return sum(item.stat().st_size for item in path.rglob("*") if item.is_file())


def file_count(path: Path) -> int:
"""Return file count for a directory."""

if not path.exists():
return 0

return sum(1 for item in path.rglob("*") if item.is_file())


def command_available(command: str) -> bool:
"""Return whether a command is on PATH."""

return shutil.which(command) is not None


def slug(value: str) -> str:
"""Return a small ASCII slug."""

out: list[str] = []
last_dash = False

for char in value.lower():
if char.isascii() and char.isalnum():
out.append(char)
last_dash = False
elif not last_dash and out:
out.append("-")
last_dash = True

while out and out[-1] == "-":
out.pop()

return "".join(out) or "item"
41 changes: 41 additions & 0 deletions scripts/graphify_smoke/context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from __future__ import annotations

import os
from datetime import datetime, timezone
from pathlib import Path

SCRIPT_DIR = Path(__file__).resolve().parent.parent
ROOT_DIR = SCRIPT_DIR.parent
REPORT_DIR = Path(
os.environ.get(
"ELF_GRAPHIFY_SMOKE_REPORT_DIR",
ROOT_DIR / "tmp" / "real-world-memory" / "graphify-smoke",
)
)
WORK_DIR = Path(os.environ.get("ELF_GRAPHIFY_SMOKE_WORK_DIR", REPORT_DIR / "work"))
OUT = Path(os.environ.get("ELF_GRAPHIFY_SMOKE_OUT", REPORT_DIR / "graphify-smoke.json"))
MANIFEST_OUT = Path(
os.environ.get(
"ELF_GRAPHIFY_SMOKE_MANIFEST_OUT",
REPORT_DIR / "memory_projects_manifest.graphify-smoke.json",
)
)
SUMMARY_OUT = Path(os.environ.get("ELF_GRAPHIFY_SMOKE_SUMMARY_OUT", REPORT_DIR / "summary.json"))
REPORT_JSON = Path(os.environ.get("ELF_GRAPHIFY_SMOKE_REPORT_JSON", REPORT_DIR / "graphify-report.json"))
REPORT_MD = Path(os.environ.get("ELF_GRAPHIFY_SMOKE_REPORT_MD", REPORT_DIR / "graphify-report.md"))
FIXTURE_DIR = REPORT_DIR / "graphify-fixtures"
CORPUS_DIR = WORK_DIR / "generated-public-corpus"
OUTPUT_CAPTURE_DIR = REPORT_DIR / "graphify-out"
LOG_DIR = REPORT_DIR / "logs"

RUN_ID = os.environ.get(
"ELF_GRAPHIFY_SMOKE_RUN_ID",
f"graphify-docker-smoke-{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}",
)
RUN_GRAPHIFY = os.environ.get("ELF_GRAPHIFY_SMOKE_RUN", "1") == "1"
ALLOW_HOST = os.environ.get("ELF_GRAPHIFY_SMOKE_ALLOW_HOST", "0") == "1"
INSTALL_GRAPHIFY = os.environ.get("ELF_GRAPHIFY_SMOKE_INSTALL", "1") == "1"
GRAPHIFY_PACKAGE = os.environ.get("ELF_GRAPHIFY_PACKAGE", "graphifyy")
GRAPHIFY_REF = os.environ.get("ELF_GRAPHIFY_REF", f"pypi:{GRAPHIFY_PACKAGE}")
TIMEOUT_SECONDS = int(os.environ.get("ELF_GRAPHIFY_TIMEOUT_SECONDS", "600"))
QUERY_BUDGET = int(os.environ.get("ELF_GRAPHIFY_QUERY_BUDGET", "1200"))
Loading