diff --git a/Benchmarks/Statistics/.gitignore b/Benchmarks/Statistics/.gitignore new file mode 100644 index 00000000..945831c2 --- /dev/null +++ b/Benchmarks/Statistics/.gitignore @@ -0,0 +1,3 @@ +.venv/ +__pycache__/ +*.svg diff --git a/Benchmarks/Statistics/README.md b/Benchmarks/Statistics/README.md new file mode 100644 index 00000000..6a6bfb3e --- /dev/null +++ b/Benchmarks/Statistics/README.md @@ -0,0 +1,88 @@ +# Benchmarks/Statistics + +A small, general-purpose Python project for **modelling and plotting measured +benchmark data** for the proving stacks — currently **Aiur** (FFT-cost) and +**Zisk** (zkVM cycles). Two regimes per system: + +1. **profiled features → cost** — predict the deterministic cost (Aiur FFT / + Zisk cycles) from cheap out-of-circuit profiler counters (`hb`, `subst`, + `defeq`, `bytes`), so you can pick benchmark targets by budget before running. +2. **cost → runtime** — predict execution / proving **time, throughput, and RAM** + from the cost. + +## Layout + +``` +benchstats/ # the package: load.py, fit.py, plot.py, __main__.py (CLI) +data/ + aiur/ cost.csv FFT cost + exec/prove time + peak RAM, per constant + features.csv native profiler features + measured FFT + predicted_vs_actual.csv + zisk/ single_shard.csv full-closure single-constant: features + cycles + multi_shard.csv per-shard: features + cycles + prove_real.csv real GPU proves: steps, prove time, peak RAM +docs/ aiur-cost-dataset.md FFT → exec/prove/RAM models + the full table + aiur-fft-cost-model.md predicting FFT from features (cost drivers, ceiling) + aiur-predicted-vs-actual.md per-constant predicted vs actual + zisk-prove-validation.md shard.rs model vs real GPU proves (R²) +figures/ # rendered PNGs (gitignored — regenerate and view with `kitten icat`) +``` + +## Usage + +The `nix develop` dev shell provides `python3` with matplotlib, so just: + +```bash +python3 -m benchstats all # render every graph to figures/ +python3 -m benchstats aiur-runtime # or one graph; add --svg for vector +``` + +Outside Nix, `pip install matplotlib` into a venv first. + +## Graph catalogue + +| command | graph | data | status | +|---|---|---|---| +| `aiur-predictor` | profiled `hb/subst/defeq/bytes` vs **FFT** (one panel each, power-law fit) | `data/aiur/features.csv` | ✅ n=65 | +| `aiur-runtime` | **FFT** vs exec {time, throughput, RAM}; and vs prove {time, throughput, RAM} (two figures, stacked panels) | `data/aiur/cost.csv` | ✅ n=98; RAM is the *process* peak (execute+prove — for completers the peak is in prove, so exec-phase RAM isn't separated) | +| `zisk-predictor` | profiled features vs **cycles**, single-shard **and** multi-shard (two figures) | `data/zisk/{single,multi}_shard.csv` | ✅ single n=66, multi n=12 | +| `zisk-runtime` | **cycles** vs prove {time, throughput, RAM} — projection of the `shard.rs` model | constants read from `shard.rs` | ✅ model projection (no fit) | +| `zisk-prove-validate` | the **actual `shard.rs` model** (`50+33·B` RAM, `54+158·B` time) vs **real GPU proves**, with R² | `data/zisk/prove_real.csv` | ✅ n=31 leaves (single + sharded, witness 5 & 10); RAM R²=0.88, time R²=0.95; witness=10 ≈ no RAM change — see `docs/zisk-prove-validation.md` | + +> `zisk-runtime` and `zisk-prove-validate` read the model **constants from the +> in-repo planner** (`crates/kernel/src/shard.rs`, the single source of truth) via +> `benchstats/shard_model.py` — they plot the code's model, never a fitted one. +> Override with `--shard-rs ` (defaults to the in-repo file; falls back to +> `~/ix/crates/kernel/src/shard.rs` outside a full worktree). + +## Figures + +Rendered to `figures/` by `python3 -m benchstats all` and committed for reference +(regenerate any time — they're deterministic from the data + `shard.rs`). + +### Aiur +| profiled features → FFT | FFT → execution | FFT → proving | +|---|---|---| +| ![](figures/aiur_features_vs_fft.png) | ![](figures/aiur_exec_vs_fft.png) | ![](figures/aiur_prove_vs_fft.png) | + +### Zisk +| features → cycles (single-shard) | features → cycles (multi-shard) | +|---|---| +| ![](figures/zisk_single_features_vs_cycles.png) | ![](figures/zisk_multi_features_vs_cycles.png) | + +| cycles → proving (`shard.rs` model projection) | **`shard.rs` model vs real GPU proves** | +|---|---| +| ![](figures/zisk_prove_vs_cycles.png) | ![](figures/zisk_prove_validation.png) | + +## Data provenance + +- **Aiur FFT cost** (`computeStats.totalFftCost`) and exec/prove/RAM: the on-tree + `bench-typecheck --ixe` / `ix check --stats-out` (full-closure `verify_claim`). +- **Native profiler features** (`hb/subst/defeq/bytes`): the `ix` repo's + `only_const_profile` example (`IX_PROFILE_COUNTERS=1`, out of circuit). +- **Zisk cycles** and the prove-time/RAM model: `zisk-host --execute` and the + shard-prover measurements — see `ix` repo `docs/zisk-cycle-cost-model.md`, + the companion to `docs/aiur-fft-cost-model.md`. + +Regenerate the underlying data with the commands in those docs; this project +just models and plots it. diff --git a/Benchmarks/Statistics/benchstats/__init__.py b/Benchmarks/Statistics/benchstats/__init__.py new file mode 100644 index 00000000..607c58e4 --- /dev/null +++ b/Benchmarks/Statistics/benchstats/__init__.py @@ -0,0 +1,4 @@ +"""benchstats — model & plot measured Aiur / Zisk benchmark data. + +See README.md for the graph catalogue. Run `python -m benchstats --help`. +""" diff --git a/Benchmarks/Statistics/benchstats/__main__.py b/Benchmarks/Statistics/benchstats/__main__.py new file mode 100644 index 00000000..45403d54 --- /dev/null +++ b/Benchmarks/Statistics/benchstats/__main__.py @@ -0,0 +1,125 @@ +"""CLI: render the benchmark cost-model graphs. + + python -m benchstats all # every graph + python -m benchstats aiur-predictor # profiled features -> FFT + python -m benchstats aiur-runtime # FFT -> exec/prove time, throughput, RAM + python -m benchstats zisk-predictor # profiled features -> cycles (single + multi shard) + python -m benchstats zisk-runtime # cycles -> prove time/throughput/RAM (shard.rs model) + python -m benchstats zisk-prove-validate # shard.rs model vs real GPU proves (with R²) + +Figures are written to `figures/` (gitignored — view with `kitten icat`). +""" +import argparse + +from . import load, plot +from .plot import ENV_COLOR + + +def _project(rows, keep): + """Keep only `keep` columns (drop incidental ones like shard/blocks).""" + return [{k: r[k] for k in keep if k in r} for r in rows] + + +def aiur_predictor(a): + rows = load.load("aiur/features.csv") + pts = _project(rows, ["name", "env", "hb", "subst", "defeq", "bytes", "fft"]) + plot.feature_panels(pts, "fft", "Aiur FFT cost", "aiur_features_vs_fft.png", + "Aiur — profiled features vs FFT cost", a.svg) + + +def aiur_runtime(a): + rows = load.load("aiur/cost.csv") + fft, exec_us, prove_ms, rss_kb, kept = load.numeric( + rows, "fft_cost", "exec_us", "prove_ms", "peak_rss_kb") + colors = [ENV_COLOR.get(r.get("env", ""), "#1f77b4") for r in kept] + exec_ms = [v / 1e3 for v in exec_us] + exec_tput = [f / (e / 1e6) for f, e in zip(fft, exec_us)] # FFT-units / s + prove_tput = [f / (p / 1e3) for f, p in zip(fft, prove_ms)] # FFT-units / s + ram_gb = [v / 1024 / 1024 for v in rss_kb] + plot.cost_vs_runtime(fft, "Aiur FFT cost", [ + dict(label="exec time", y=exec_ms, ylabel="exec time (ms)", yscale="log", fit="loglog"), + dict(label="exec throughput", y=exec_tput, ylabel="exec throughput\n(FFT-units/s)", yscale="linear"), + dict(label="peak RAM", y=ram_gb, ylabel="peak RAM (GB)", yscale="log", fit="loglog", + note="process peak RSS (execute+prove; peak is in prove for completers)"), + ], "aiur_exec_vs_fft.png", "Aiur — FFT cost vs execution", colors, a.svg) + plot.cost_vs_runtime(fft, "Aiur FFT cost", [ + dict(label="prove time", y=prove_ms, ylabel="prove time (ms)", yscale="log", fit="loglog"), + dict(label="prove throughput", y=prove_tput, ylabel="prove throughput\n(FFT-units/s)", yscale="linear"), + dict(label="peak RAM", y=ram_gb, ylabel="peak RAM (GB)", yscale="log", fit="loglog"), + ], "aiur_prove_vs_fft.png", "Aiur — FFT cost vs proving", colors, a.svg) + + +def zisk_predictor(a): + single = _project(load.load("zisk/single_shard.csv"), + ["name", "hb", "bytes", "subst", "cycles"]) + plot.feature_panels(single, "cycles", "Zisk cycles (steps)", + "zisk_single_features_vs_cycles.png", + "Zisk single-shard (full-closure) — features vs cycles", a.svg) + multi = _project(load.load("zisk/multi_shard.csv"), + ["shard", "hb", "bytes", "subst", "cycles"]) + plot.feature_panels(multi, "cycles", "Zisk cycles (steps)", + "zisk_multi_features_vs_cycles.png", + "Zisk multi-shard (per shard) — features vs cycles", a.svg) + + +def zisk_runtime(a): + # Project the actual shard.rs prove model (constants read from the code) over + # a cycle range. x = steps; B = steps in billions. See zisk-prove-validate for + # the same model checked against real GPU proves. + from . import shard_model + k = shard_model.read_constants(a.shard_rs) + B = lambda s: s / 1e9 + plot.model_lines((2.7e8, 5.0e9), [ + dict(label=f"{k['RAM_BASE_GIB']:.0f} + {k['RAM_GIB_PER_BCYCLE']:.0f}·B", + fn=lambda s: k["RAM_BASE_GIB"] + k["RAM_GIB_PER_BCYCLE"] * B(s), + ylabel="peak host RAM (GiB)", yscale="linear"), + dict(label=f"{k['PROVE_SETUP_SECS']:.0f} + {k['PROVE_SECS_PER_BCYCLE']:.0f}·B", + fn=lambda s: k["PROVE_SETUP_SECS"] + k["PROVE_SECS_PER_BCYCLE"] * B(s), + ylabel="GPU prove time (s)", yscale="linear"), + dict(label="steps/s", + fn=lambda s: (s / 1e6) / (k["PROVE_SETUP_SECS"] + k["PROVE_SECS_PER_BCYCLE"] * B(s)), + ylabel="prove throughput\n(M steps/s)", yscale="linear"), + ], "zisk_prove_vs_cycles.png", + "Zisk — cycles vs proving (shard.rs model projection)", a.svg) + + +def zisk_prove_validate(a): + """Real GPU proves (zisk-host, RTX PRO 6000) vs the actual shard.rs model.""" + from . import shard_model + k = shard_model.read_constants(a.shard_rs) + steps, ram, leaf, wall, kept = load.numeric( + load.load("zisk/prove_real.csv"), "steps", "peak_ram_gib", "leaf_secs", "wall_secs") + kinds = [f"{r.get('kind','single')} w{r.get('witness','5')}" for r in kept] + ram_model = lambda s: k["RAM_BASE_GIB"] + k["RAM_GIB_PER_BCYCLE"] * s / 1e9 + time_model = lambda s: k["PROVE_SETUP_SECS"] + k["PROVE_SECS_PER_BCYCLE"] * s / 1e9 + plot.model_vs_data([ + dict(x=steps, y=ram, model=ram_model, ylabel="peak host RAM (GiB)", groups=kinds, + formula=f"{k['RAM_BASE_GIB']:.0f} + {k['RAM_GIB_PER_BCYCLE']:.0f}·Bsteps"), + dict(x=steps, y=leaf, model=time_model, ylabel="prove time (s)", groups=kinds, + y2=wall, y2label="measured (wall, incl. setup)", + formula=f"{k['PROVE_SETUP_SECS']:.0f} + {k['PROVE_SECS_PER_BCYCLE']:.0f}·Bsteps"), + ], "zisk_prove_validation.png", + "Zisk shard.rs cost model vs real GPU proves (RTX PRO 6000 Blackwell)", a.svg) + + +CMDS = {"aiur-predictor": aiur_predictor, "aiur-runtime": aiur_runtime, + "zisk-predictor": zisk_predictor, "zisk-runtime": zisk_runtime, + "zisk-prove-validate": zisk_prove_validate} + + +def main(): + ap = argparse.ArgumentParser(prog="benchstats", description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + ap.add_argument("graph", choices=["all"] + list(CMDS), help="which graph(s) to render") + ap.add_argument("--svg", action="store_true", help="also write .svg") + ap.add_argument("--shard-rs", default=None, + help="path to the ix repo's crates/kernel/src/shard.rs " + "(model constants source; default ~/ix/...)") + a = ap.parse_args() + todo = CMDS.values() if a.graph == "all" else [CMDS[a.graph]] + for fn in todo: + fn(a) + + +if __name__ == "__main__": + main() diff --git a/Benchmarks/Statistics/benchstats/fit.py b/Benchmarks/Statistics/benchstats/fit.py new file mode 100644 index 00000000..2a6a1956 --- /dev/null +++ b/Benchmarks/Statistics/benchstats/fit.py @@ -0,0 +1,69 @@ +"""Least-squares estimators shared by the Aiur and Zisk cost models. + +Two regimes show up across the benchmarks: + * profiled features -> cost (predict FFT / cycles from cheap counters) + * cost -> runtime/RAM (predict time / throughput / RAM from cost) + +Costs span several orders of magnitude, so fits minimise *relative* error +(WLS with 1/y^2 weights) and we also expose a log-log power-law fit. +""" +import math + + +def nlogn(x: float) -> float: + """`x * log2(x)` feature — the natural form for Aiur FFT (= sum w*h*log2 h).""" + return x * math.log2(x + 2.0) + + +def _solve(A, b): + n = len(A) + M = [row[:] + [b[i]] for i, row in enumerate(A)] + for i in range(n): + piv = max(range(i, n), key=lambda r: abs(M[r][i])) + M[i], M[piv] = M[piv], M[i] + d = M[i][i] + if abs(d) < 1e-300: + raise ValueError("singular normal matrix") + M[i] = [v / d for v in M[i]] + for r in range(n): + if r != i: + f = M[r][i] + M[r] = [a - f * c for a, c in zip(M[r], M[i])] + return [M[i][n] for i in range(n)] + + +def wls(feature_cols, y, relative=True, intercept=True): + """Weighted least squares. `feature_cols` is a list of equal-length columns. + + Returns dict(coef, r2 (weighted), mape (%), pred). With `relative`, weights + are 1/y^2 so the fit targets relative error (right for multi-decade costs). + """ + n = len(y) + p = len(feature_cols) + (1 if intercept else 0) + rows = [[col[i] for col in feature_cols] + ([1.0] if intercept else []) for i in range(n)] + w = [1.0 / (v * v) if (relative and v) else 1.0 for v in y] + A = [[sum(w[i] * rows[i][j] * rows[i][k] for i in range(n)) for k in range(p)] for j in range(p)] + c = [sum(w[i] * rows[i][j] * y[i] for i in range(n)) for j in range(p)] + coef = _solve(A, c) + pred = [sum(coef[j] * rows[i][j] for j in range(p)) for i in range(n)] + ybar = sum(w[i] * y[i] for i in range(n)) / sum(w) + sst = sum(w[i] * (y[i] - ybar) ** 2 for i in range(n)) + ssr = sum(w[i] * (y[i] - pred[i]) ** 2 for i in range(n)) + mape = sum(abs(pred[i] - y[i]) / y[i] for i in range(n) if y[i]) / n * 100 + return dict(coef=coef, r2=(1 - ssr / sst if sst else 0.0), mape=mape, pred=pred) + + +def loglog(x, y): + """Power-law fit y = a * x^b by OLS in log space. Returns (a, b, r2_log).""" + lx = [math.log(v) for v in x] + ly = [math.log(v) for v in y] + n = len(x) + sx, sy = sum(lx), sum(ly) + sxx = sum(v * v for v in lx) + sxy = sum(a * b for a, b in zip(lx, ly)) + b = (n * sxy - sx * sy) / (n * sxx - sx * sx) + a = (sy - b * sx) / n + yb = sy / n + sst = sum((v - yb) ** 2 for v in ly) + ssr = sum((ly[i] - (a + b * lx[i])) ** 2 for i in range(n)) + return math.exp(a), b, (1 - ssr / sst if sst else 0.0) diff --git a/Benchmarks/Statistics/benchstats/load.py b/Benchmarks/Statistics/benchstats/load.py new file mode 100644 index 00000000..769bc608 --- /dev/null +++ b/Benchmarks/Statistics/benchstats/load.py @@ -0,0 +1,35 @@ +"""CSV loading for the benchmark datasets under `data/`.""" +import csv +import os + +ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +DATA = os.path.join(ROOT, "data") +FIGURES = os.path.join(ROOT, "figures") + + +def load(rel_path): + """Load `data/` as a list of dict rows.""" + with open(os.path.join(DATA, rel_path)) as f: + return list(csv.DictReader(f)) + + +def numeric(rows, *names, require=True): + """Return parallel float columns for `names`, dropping any row where a + requested field is missing / non-numeric (e.g. DNF). Returns (cols..., kept_rows).""" + cols = [[] for _ in names] + kept = [] + for r in rows: + vals = [] + ok = True + for nm in names: + v = r.get(nm, "") + try: + vals.append(float(v)) + except (TypeError, ValueError): + ok = False + break + if ok: + for i, v in enumerate(vals): + cols[i].append(v) + kept.append(r) + return (*cols, kept) diff --git a/Benchmarks/Statistics/benchstats/plot.py b/Benchmarks/Statistics/benchstats/plot.py new file mode 100644 index 00000000..c0224275 --- /dev/null +++ b/Benchmarks/Statistics/benchstats/plot.py @@ -0,0 +1,165 @@ +"""Reusable matplotlib builders for the benchmark cost-model graphs.""" +import math +import os + +import matplotlib +matplotlib.use("Agg") # headless: render to file +import matplotlib.pyplot as plt + +from . import fit +from .load import FIGURES + +# stable per-library colours (Aiur env column); falls back to grey +ENV_COLOR = { + "Init": "#1f77b4", "Std": "#2ca02c", "Lean": "#9467bd", + "Mathlib": "#d62728", "IxVM": "#7f7f7f", +} + + +def _save(figpath, svg): + os.makedirs(FIGURES, exist_ok=True) + out = os.path.join(FIGURES, figpath) + plt.gcf().savefig(out, dpi=140, bbox_inches="tight") + if svg: + s = out.rsplit(".", 1)[0] + ".svg" + plt.gcf().savefig(s) + plt.close() + print(f"wrote {out}") + + +def feature_panels(points, target_name, target_label, out, title, svg=False): + """Small-multiples: one log-log panel per profiled feature vs the target cost, + each with a power-law fit (y = a*x^b) overlaid. `points` is a list of dicts; + feature columns are every key except name/env/the target.""" + feats = [k for k in points[0] if k not in ("name", "env", target_name)] + ncol = 2 + nrow = (len(feats) + ncol - 1) // ncol + fig, axes = plt.subplots(nrow, ncol, figsize=(11, 3.2 * nrow), squeeze=False) + for idx, feat in enumerate(feats): + ax = axes[idx // ncol][idx % ncol] + xs, ys, cols = [], [], [] + for p in points: + try: + x = float(p[feat]); y = float(p[target_name]) + except (ValueError, KeyError): + continue + if x > 0 and y > 0: + xs.append(x); ys.append(y); cols.append(ENV_COLOR.get(p.get("env", ""), "#1f77b4")) + ax.scatter(xs, ys, s=14, c=cols, alpha=0.7, edgecolors="none") + if len(xs) >= 3: + a, b, r2 = fit.loglog(xs, ys) + gx = [min(xs), max(xs)] + ax.plot(gx, [a * v ** b for v in gx], "k-", lw=1) + ax.set_title(f"{feat}: y≈{a:.2g}·{feat}^{b:.2f} R²(log)={r2:.3f}", fontsize=9) + ax.set_xscale("log"); ax.set_yscale("log") + ax.set_xlabel(feat); ax.set_ylabel(target_label, fontsize=8) + ax.grid(True, which="both", ls=":", lw=0.4, alpha=0.5) + for j in range(len(feats), nrow * ncol): + axes[j // ncol][j % ncol].axis("off") + fig.suptitle(title, fontsize=12) + fig.tight_layout(rect=[0, 0, 1, 0.97]) + _save(out, svg) + + +def cost_vs_runtime(x, xlabel, panels, out, title, colors=None, svg=False): + """Stacked shared-x panels (one figure = "one graph"): cost on x, each panel a + runtime measure (time / throughput / RAM). `panels` = list of + dict(label, y, ylabel, yscale, fit). `fit` in {None,'loglog','affine'}.""" + fig, axes = plt.subplots(len(panels), 1, figsize=(9, 2.6 * len(panels)), + sharex=True, squeeze=False) + for i, pan in enumerate(panels): + ax = axes[i][0] + xs = [x[k] for k in range(len(x)) if x[k] is not None and pan["y"][k] is not None and pan["y"][k] > 0] + ys = [pan["y"][k] for k in range(len(x)) if x[k] is not None and pan["y"][k] is not None and pan["y"][k] > 0] + cs = ([colors[k] for k in range(len(x)) if x[k] is not None and pan["y"][k] is not None and pan["y"][k] > 0] + if colors else "#1f77b4") + ax.scatter(xs, ys, s=16, c=cs, alpha=0.75, edgecolors="none") + if pan.get("fit") == "loglog" and len(xs) >= 3: + a, b, r2 = fit.loglog(xs, ys) + gx = sorted(xs) + ax.plot(gx, [a * v ** b for v in gx], "k-", lw=1, + label=f"y≈{a:.2g}·x^{b:.2f} (R²log {r2:.3f})") + ax.legend(fontsize=7, loc="upper left") + elif pan.get("fit") == "affine" and len(xs) >= 3: + r = fit.wls([xs], ys, relative=False) + b, a = r["coef"] + gx = sorted(xs) + ax.plot(gx, [a + b * v for v in gx], "k-", lw=1, + label=f"y≈{a:.3g}+{b:.3g}·x (R² {r['r2']:.3f})") + ax.legend(fontsize=7, loc="upper left") + ax.set_xscale("log") + ax.set_yscale(pan.get("yscale", "log")) + ax.set_ylabel(pan["ylabel"], fontsize=9) + ax.grid(True, which="both", ls=":", lw=0.4, alpha=0.5) + if pan.get("note"): + ax.text(0.99, 0.05, pan["note"], transform=ax.transAxes, fontsize=7, + ha="right", va="bottom", color="#888") + axes[-1][0].set_xlabel(xlabel) + fig.suptitle(title, fontsize=12) + fig.tight_layout(rect=[0, 0, 1, 0.97]) + _save(out, svg) + + +def model_vs_data(panels, out, title, svg=False): + """Overlay a *fixed* model line (no fitted params) on measured points and + annotate the model's R² and MAPE against the data. `panels` = list of + dict(x, y, model, ylabel, formula[, y2, y2label]). `model` is a callable + x->ŷ (e.g. read from shard.rs); R² = 1 − Σ(y−ŷ)²/Σ(y−ȳ)².""" + fig, axes = plt.subplots(len(panels), 1, figsize=(8.5, 4.0 * len(panels)), + squeeze=False) + for i, pan in enumerate(panels): + ax = axes[i][0] + x, y, model = pan["x"], pan["y"], pan["model"] + groups = pan.get("groups") + if groups: + styles = {"single w5": ("o", "#1f77b4"), "sharded w5": ("^", "#2ca02c"), + "sharded w10": ("s", "#ff7f0e"), "single w10": ("D", "#9467bd")} + for g in dict.fromkeys(groups): + xs_g = [x[i] for i in range(len(x)) if groups[i] == g] + ys_g = [y[i] for i in range(len(x)) if groups[i] == g] + mk, col = styles.get(g, ("o", "#1f77b4")) + ax.scatter(xs_g, ys_g, s=38, c=col, marker=mk, zorder=3, + label=f"measured ({g} leaf)") + else: + ax.scatter(x, y, s=34, c="#1f77b4", zorder=3, label="measured (leaf)") + if pan.get("y2") is not None: + ax.scatter(x, pan["y2"], s=20, c="#7f7f7f", marker="x", zorder=3, + label=pan.get("y2label", "measured (2)")) + gx = [min(x) * (max(x) / min(x)) ** (k / 100) for k in range(101)] + ax.plot(gx, [model(v) for v in gx], "-", lw=1.8, color="#d62728", zorder=2, + label=f"shard.rs model: {pan['formula']}") + yhat = [model(v) for v in x] + ybar = sum(y) / len(y) + ss_res = sum((a - b) ** 2 for a, b in zip(y, yhat)) + ss_tot = sum((a - ybar) ** 2 for a in y) + r2 = 1 - ss_res / ss_tot if ss_tot else float("nan") + mape = sum(abs(a - b) / a for a, b in zip(y, yhat)) / len(y) * 100 + ax.set_xscale("log"); ax.set_yscale("log") + ax.set_xlabel("guest STEPS (cycles)"); ax.set_ylabel(pan["ylabel"]) + ax.set_title(f"{pan['ylabel']} — model R²={r2:.3f}, MAPE={mape:.0f}%", fontsize=10) + ax.grid(True, which="both", ls=":", lw=0.4, alpha=0.5) + ax.legend(fontsize=8, loc="upper left") + fig.suptitle(title, fontsize=12) + fig.tight_layout(rect=[0, 0, 1, 0.97]) + _save(out, svg) + + +def model_lines(xrange, panels, out, title, svg=False): + """Draw documented model lines over a cost range when no raw points exist + (Zisk prove model). `panels` = list of dict(label, fn, ylabel, yscale).""" + xs = [xrange[0] * (xrange[1] / xrange[0]) ** (k / 60) for k in range(61)] + fig, axes = plt.subplots(len(panels), 1, figsize=(9, 2.6 * len(panels)), + sharex=True, squeeze=False) + for i, pan in enumerate(panels): + ax = axes[i][0] + ax.plot(xs, [pan["fn"](v) for v in xs], "-", lw=1.6, color="#d62728", + label=pan.get("label", "")) + ax.set_xscale("log"); ax.set_yscale(pan.get("yscale", "linear")) + ax.set_ylabel(pan["ylabel"], fontsize=9) + ax.grid(True, which="both", ls=":", lw=0.4, alpha=0.5) + if pan.get("label"): + ax.legend(fontsize=7, loc="upper left") + axes[-1][0].set_xlabel("cycles (steps)") + fig.suptitle(title, fontsize=12) + fig.tight_layout(rect=[0, 0, 1, 0.97]) + _save(out, svg) diff --git a/Benchmarks/Statistics/benchstats/shard_model.py b/Benchmarks/Statistics/benchstats/shard_model.py new file mode 100644 index 00000000..a2d8f330 --- /dev/null +++ b/Benchmarks/Statistics/benchstats/shard_model.py @@ -0,0 +1,38 @@ +"""Read the load-bearing Zisk cost-model constants straight from the planner +(`crates/kernel/src/shard.rs`). + +We deliberately do NOT define the model here. `shard.rs` is the single source of +truth; this just parses its calibrated `pub const`s so the validation plots +overlay the *actual* code model against measured data (no Python-fit function). +""" +import os +import re + +# The planner is vendored in this repo; resolve it relative to this file +# (Benchmarks/Statistics/benchstats/ -> repo root). Fall back to a sibling +# `~/ix` checkout for anyone running outside a full worktree. +_HERE = os.path.dirname(os.path.abspath(__file__)) +_IN_REPO = os.path.normpath(os.path.join(_HERE, "..", "..", "..", "crates", "kernel", "src", "shard.rs")) +_SIBLING = os.path.expanduser("~/ix/crates/kernel/src/shard.rs") +DEFAULT_SHARD_RS = _IN_REPO if os.path.exists(_IN_REPO) else _SIBLING + +NAMES = [ + "STEPS_PER_HEARTBEAT", "STEPS_PER_SUBST", "STEPS_PER_INGRESS_BYTE", + "SHARD_STEP_FLOOR", "RAM_BASE_GIB", "RAM_GIB_PER_BCYCLE", + "PROVE_SETUP_SECS", "PROVE_SECS_PER_BCYCLE", +] + + +def read_constants(path=None): + """Parse the planner constants from shard.rs. Returns {NAME: float}.""" + path = path or DEFAULT_SHARD_RS + if not os.path.exists(path): + raise SystemExit(f"shard.rs not found at {path}; pass --shard-rs ") + txt = open(path).read() + out = {} + for n in NAMES: + m = re.search(rf"pub const {n}\s*:\s*\w+\s*=\s*([0-9_.]+)", txt) + if not m: + raise SystemExit(f"constant {n} not found in {path}") + out[n] = float(m.group(1).replace("_", "")) + return out diff --git a/Benchmarks/Statistics/data/aiur/cost.csv b/Benchmarks/Statistics/data/aiur/cost.csv new file mode 100644 index 00000000..b2f1da4c --- /dev/null +++ b/Benchmarks/Statistics/data/aiur/cost.csv @@ -0,0 +1,99 @@ +name,fft_cost,exec_us,prove_ms,proof_bytes,peak_rss_kb,source,env +HEq,1716582,2709.036000,1511.967441,,,orig41,Init +Nat,1857523,2295.765000,1451.002140,,,orig41,Init +Eq.rec,2575400,2491.636000,1696.609309,,,orig41,Init +IxVMInd.Tree,2633415,2961.094000,2091.893683,,,orig41,IxVM +HEq.rec,2692988,2749.444000,1864.444050,,,orig41,Init +Trans.mk,2911629,2815.188000,2269.840758,,,orig41,Init +IxVMInd.Tree.rec,4858321,4005.020000,2349.184884,,,orig41,IxVM +IxVMPrim.nat_succ_lit,7330826,4590.689000,2080.253773,,,orig41,IxVM +Nat.add,13343000,8235.306000,3006.046391,,,orig41,Init +IxVMPrim.nat_pred_lit,14804098,8008.256000,3344.730015,,,orig41,IxVM +IxVMPrim.nat_ble_lit,23016526,12205.690000,2889.459036,,,orig41,IxVM +IxVMPrim.nat_mul_big,24580879,12603.493000,2871.700646,,,orig41,IxVM +IxVMPrim.nat_beq_lit,24752029,12961.951000,3041.012543,,,orig41,IxVM +IxVMPrim.nat_mul_lit,25101067,13312.845000,2919.447789,,,orig41,IxVM +IxVMInd.Odd,26245849,13382.838000,3229.612840,,,orig41,IxVM +IxVMInd.Even,26482492,13312.525000,3081.631162,,,orig41,IxVM +IxVMPrim.nat_add_lit,28639807,15834.989000,3066.531031,,,orig41,IxVM +IxVMInd.Odd.rec,32163380,16289.550000,3264.522357,,,orig41,IxVM +IxVMInd.Even.rec,32164273,15633.328000,2942.773558,,,orig41,IxVM +IxVMPrim.nat_sub_lit,34436244,16745.382000,2838.455329,,,orig41,IxVM +IxVMPrim.nat_shl_lit,35417490,17565.795000,3935.793510,,,orig41,IxVM +Nat.add_comm,56084908,26649.125000,3301.877833,,,orig41,Init +Nat.decEq,71921625,32966.190000,3163.015799,,,orig41,Init +IxVMPrim.nat_dec_eq,86118842,42111.659000,3456.470853,,,orig41,IxVM +Nat.decLe,209641496,84878.629000,3534.855399,,,orig41,Init +IxVMPrim.nat_dec_le,216661883,87646.127000,4331.074015,,,orig41,IxVM +IxVMPrim.nat_dec_lt,220751034,92852.029000,3491.598835,,,orig41,IxVM +IxVMPrim.nat_div_lit,405607545,164727.388000,4705.115177,,,orig41,IxVM +IxVMPrim.nat_shr_lit,411128901,167162.878000,4576.064308,,,orig41,IxVM +IxVMPrim.nat_mod_lit,414695549,177135.922000,4354.325562,,,orig41,IxVM +Nat.sub_le_of_le_add,567575653,231292.967000,5009.295195,,,orig41,Init +IxVMPrim.bv_to_nat_lit,635780327,283418.638000,5208.181412,,,orig41,IxVM +IxVMPrim.nat_gcd_lit,665518356,264990.052000,5203.121133,,,orig41,IxVM +String.Internal.append,793580333,335119.396000,5273.904282,,,orig41,Init +IxVMPrim.str_size_lit,802563877,337779.280000,6393.169343,,,orig41,IxVM +IxVMPrim.nat_land_lit,1138665214,497387.819000,6952.455222,,,orig41,IxVM +IxVMPrim.nat_lor_lit,1139887801,446084.293000,6843.340563,,,orig41,IxVM +IxVMPrim.nat_xor_lit,1149371965,527053.141000,7006.902619,,,orig41,IxVM +_private.Init.Prelude.0.Lean.extractMainModule._unsafe_rec,1197925029,577981.226000,6904.757023,,,orig41,Init +Array.append_assoc,3938574533,1756291.622000,16390.973236,,,orig41,Init +Vector.append,4023268168,1718585.829000,14612.319714,,,orig41,Init +Int.emod_emod_of_dvd,3856852693,1861331.911000,12239.866458,33868729,13765596,pr_mined,Init +Vector.extract_append,61830646478,30695507.402000,137766.872018,34208521,150139236,pr_mined,Init +List.mergeSort,13825318985,6346116.814000,31659.065544,34032225,36816268,pr_mined,Init +Array.qsort,15781689533,7322515.498000,36384.228506,34032225,41617024,pr_mined,Init +Fin.foldl,10853255199,4970172.837000,26124.926977,33948877,30540212,probe_unique,Init +Array.binSearch,14397133548,6453202.376000,33123.702388,34032225,37736200,probe_unique,Init +String.split,19578088286,9255175.622000,48419.866594,34032225,56381116,probe_unique,Init +Acc.rec,3505064,3870.067000,1575.785962,33580137,1870988,batch,Init +Array.toList,3332563,3717.096000,1873.389537,33580137,1890228,batch,Init +Prod.map,6904183,5989.784000,1785.964875,33580137,1889320,batch,Init +Sum.elim,5589905,5359.944000,1513.962962,33580137,1895648,batch,Init +Except.bind,7667869,6487.139000,1639.463674,33580137,1934128,batch,Init +Option.bind,7329183,6232.448000,1664.409360,33580137,1926576,batch,Init +WellFounded.fix,10125144,7777.851000,1337.247593,33580137,1907148,batch,Init +List.zipWith,20439088,13681.522000,1622.200089,33580137,2005244,batch,Init +List.dropLast,19509718,12780.138000,1815.338384,33580137,1951680,batch,Init +List.foldr,18579757,12715.948000,1935.755914,33580137,1980088,batch,Init +List.filterMap,25335779,16666.258000,1680.626926,33580137,1985764,batch,Init +List.range,20251801,13874.818000,2327.316771,33580137,1957696,batch,Init +List.foldlM,39202740,23516.705000,2105.941388,33580137,2050940,batch,Init +BitVec.toFin,50437466,29421.349000,2178.496318,33580137,2038400,batch,Init +Int.add,44714703,26202.108000,2156.141557,33580137,2036632,batch,Init +UInt32.toNat,59331806,33833.456000,1743.415643,33580137,2136644,batch,Init +USize.toNat,71607481,39501.772000,2095.931852,33580137,2178892,batch,Init +Nat.strongRecOn,273068854,124130.524000,2777.081482,33580137,2784704,batch,Init +Int.emod,422940733,197871.636000,3157.906850,33647485,3495440,batch,Init +Array.foldlM,434577494,200947.045000,3298.235832,33647485,3678052,batch,Init +Int.ediv,430476738,197321.002000,3404.851868,33647485,3515716,batch,Init +Array.foldl,449323126,204778.063000,3712.239807,33647485,3603224,batch,Init +Array.filter,464818232,210196.681000,4371.871365,33647485,3637196,batch,Init +Int.gcd,657502637,311227.329000,4257.207621,33718033,4093360,batch,Init +BitVec.add,617113462,286846.869000,3568.536969,33718033,3938940,batch,Init +Nat.toDigits,663606297,318489.523000,4740.233077,33718033,4497380,batch,Init +Array.zipWith,736658636,341122.293000,7432.528628,33718033,4840880,batch,Init +Array.map,734574964,371822.898000,4888.655202,33718033,4928464,batch,Init +BitVec.umod,926177790,423203.207000,5012.763637,33718033,5212184,batch,Init +Lean.Name.hash,861742653,388630.740000,5366.628320,33718033,5124012,batch,Init +Nat.repr,966452765,425124.125000,5104.063182,33718033,5250984,batch,Init +Int.repr,993792541,465887.179000,5148.619131,33718033,5285656,batch,Init +String.intercalate,1089240518,483233.812000,5925.904923,33718033,5505476,batch,Init +Char.toLower,1198467414,647832.393000,8104.520180,33718033,5562768,batch,Init +Lean.Expr.replace,859625514,424191.396000,5485.857483,33718033,5157932,batch,Lean +Array.qsortOrd,15841062472,7895491.170000,46996.389859,34032225,41654568,batch,Init +Nat.factorial,33562426,29997.000000,2341.706000,,4515412,mathlib,Mathlib +Nat.choose,29018862,42322.000000,3283.352000,,4516096,mathlib,Mathlib +Nat.gcd_comm,1954958779,1027521.000000,7624.748000,,10236924,mathlib,Init +Finset.sum,3045189408,1415541.000000,10396.109000,,13414292,mathlib,Mathlib +Finset.prod,3045165822,1427049.000000,9874.716000,,13433008,mathlib,Mathlib +Polynomial.eval,5342731754,2509535.000000,15517.503000,,20890920,mathlib,Mathlib +Nat.Prime.two_le,1504045298,812137.000000,6519.006000,,9527972,mathlib,Mathlib +Multiset.sort,18670960624,9385244.000000,51109.624000,,56416568,mathlib,Mathlib +List.Sorted,9578666,12938.000000,1461.827000,,4516448,mathlib,Mathlib +GCDMonoid.gcd,1005736276,533479.000000,5620.519000,,7528544,mathlib,Mathlib +Nat.fib,34171209,30586.000000,1881.654000,,4515464,mathlib,Mathlib +ByteSlice.ofByteArray,107574377,76222.000000,2292.200000,,4494308,std,Std +Std.Time.Month.Offset.ofNat,3607673,6701.000000,1649.519000,,4492220,std,Std +Std.Time.Week.Offset.ofMilliseconds,24577209792,12290074.000000,56386.402000,,64601332,std,Std diff --git a/Benchmarks/Statistics/data/aiur/features.csv b/Benchmarks/Statistics/data/aiur/features.csv new file mode 100644 index 00000000..0ae34624 --- /dev/null +++ b/Benchmarks/Statistics/data/aiur/features.csv @@ -0,0 +1,66 @@ +name,env,hb,subst,defeq,bytes,fft +Nat,Init,3,0,0,113,1857523 +Eq.rec,Init,5,197,25,206,2575400 +HEq.rec,Init,5,268,32,221,2692988 +Trans.mk,Init,11,1634,39,88,2911629 +Nat.add,Init,61,1074,174,1889,13343000 +Nat.add_comm,Init,360,3998,817,6908,56084908 +Nat.decEq,Init,266,6747,1084,8116,71921625 +Nat.decLe,Init,709,18105,2462,21222,209641496 +Nat.sub_le_of_le_add,Init,1915,48701,7249,54845,567575653 +Array.append_assoc,Init,6056,227605,24124,199166,3938574533 +Vector.append,Init,6260,231194,24875,205557,4023268168 +Int.emod_emod_of_dvd,Init,11951,226571,46799,345084,3856852693 +Vector.extract_append,Init,93512,1468359,284163,1447559,61830646478 +List.mergeSort,Init,32915,642232,129128,912942,13825318985 +Array.qsort,Init,41060,739383,146682,1001670,15781689533 +Fin.foldl,Init,24695,536883,107132,778754,10853255199 +Array.binSearch,Init,34358,646852,128045,891352,14397133548 +String.split,Init,39939,928723,159378,1141622,19578088286 +Acc.rec,Init,10,770,39,315,3505064 +Array.toList,Init,20,160,18,281,3332563 +Prod.map,Init,19,1170,105,804,6904183 +Sum.elim,Init,15,745,75,693,5589905 +Except.bind,Init,26,1283,130,1004,7667869 +Option.bind,Init,30,1052,105,945,7329183 +WellFounded.fix,Init,34,2199,161,1108,10125144 +List.zipWith,Init,91,2928,290,2378,20439088 +List.dropLast,Init,86,2197,262,2460,19509718 +List.foldr,Init,79,2026,235,2383,18579757 +List.filterMap,Init,106,2612,323,3273,25335779 +List.range,Init,83,1316,220,2727,20251801 +BitVec.toFin,Init,176,3055,512,5913,50437466 +Int.add,Init,170,3290,538,6034,44714703 +UInt32.toNat,Init,182,3099,566,6967,59331806 +USize.toNat,Init,198,3652,642,8391,71607481 +Nat.strongRecOn,Init,764,28682,3271,24974,273068854 +Int.emod,Init,1405,34415,4908,42899,422940733 +Int.ediv,Init,1417,34507,4931,43384,430476738 +Array.foldl,Init,1284,38282,4743,41477,449323126 +Array.filter,Init,1322,39529,4907,43185,464818232 +Int.gcd,Init,2158,52973,7779,65618,657502637 +BitVec.add,Init,2023,47795,7010,61393,617113462 +Nat.toDigits,Init,1895,45229,7191,64755,663606297 +Array.zipWith,Init,2420,54985,8043,69839,736658636 +Array.map,Init,2415,54424,8019,69838,734574964 +BitVec.umod,Init,2585,71781,10486,89060,926177790 +Lean.Name.hash,Init,2451,57157,8714,82181,861742653 +Nat.repr,Init,2658,62863,9949,91470,966452765 +Int.repr,Init,2693,63371,10062,93136,993792541 +String.intercalate,Init,3004,73875,11525,99770,1089240518 +Char.toLower,Init,3588,82453,13103,115675,1198467414 +Array.qsortOrd,Init,41119,739993,146849,1004447,15841062472 +Nat.factorial,Mathlib,135,2318,383,4549,33562426 +Nat.choose,Mathlib,117,2271,350,3747,29018862 +Nat.gcd_comm,Init,6235,142140,23659,182667,1954958779 +Finset.sum,Mathlib,4973,233536,23920,185897,3045189408 +Finset.prod,Mathlib,4973,233536,23920,185897,3045165822 +Polynomial.eval,Mathlib,7690,450001,40747,304011,5342731754 +Nat.Prime.two_le,Mathlib,4416,102249,15229,131423,1504045298 +Multiset.sort,Mathlib,43220,899845,162155,1113846,18670960624 +List.Sorted,Mathlib,49,1776,89,796,9578666 +GCDMonoid.gcd,Mathlib,2757,79856,9980,85512,1005736276 +Nat.fib,Mathlib,119,2513,364,4284,34171209 +ByteSlice.ofByteArray,Std,323,5758,991,12074,107574377 +Std.Time.Month.Offset.ofNat,Std,9,3,5,383,3607673 +Std.Time.Week.Offset.ofMilliseconds,Std,33688,646214,138123,987960,24577209792 diff --git a/Benchmarks/Statistics/data/aiur/predicted_vs_actual.csv b/Benchmarks/Statistics/data/aiur/predicted_vs_actual.csv new file mode 100644 index 00000000..b7edc89d --- /dev/null +++ b/Benchmarks/Statistics/data/aiur/predicted_vs_actual.csv @@ -0,0 +1,66 @@ +name,env,actual_fft,predicted_fft +Nat,Init,1857523,1913777 +Eq.rec,Init,2575400,2618644 +HEq.rec,Init,2692988,2818184 +Trans.mk,Init,2911629,3172060 +Array.toList,Init,3332563,2795425 +Acc.rec,Init,3505064,3557025 +Std.Time.Month.Offset.ofNat,Std,3607673,3001879 +Sum.elim,Init,5589905,5851932 +Prod.map,Init,6904183,7190959 +Option.bind,Init,7329183,7791742 +Except.bind,Init,7667869,8723770 +List.Sorted,Mathlib,9578666,7173089 +WellFounded.fix,Init,10125144,10426904 +Nat.add,Init,13343000,14007538 +List.foldr,Init,18579757,18544657 +List.dropLast,Init,19509718,19694567 +List.range,Init,20251801,19634460 +List.zipWith,Init,20439088,20370498 +List.filterMap,Init,25335779,25841377 +Nat.choose,Mathlib,29018862,28940757 +Nat.factorial,Mathlib,33562426,34336981 +Nat.fib,Mathlib,34171209,32465875 +Int.add,Init,44714703,47564092 +BitVec.toFin,Init,50437466,46021725 +Nat.add_comm,Init,56084908,60794437 +UInt32.toNat,Init,59331806,53727232 +USize.toNat,Init,71607481,64709054 +Nat.decEq,Init,71921625,77473249 +ByteSlice.ofByteArray,Std,107574377,98677420 +Nat.decLe,Init,209641496,211779321 +Nat.strongRecOn,Init,273068854,271198826 +Int.emod,Init,422940733,455184097 +Int.ediv,Init,430476738,459485153 +Array.foldl,Init,449323126,442489492 +Array.filter,Init,464818232,461196360 +Nat.sub_le_of_le_add,Init,567575653,634461296 +BitVec.add,Init,617113462,672758257 +Int.gcd,Init,657502637,735498708 +Nat.toDigits,Init,663606297,701775836 +Array.map,Init,734574964,776415628 +Array.zipWith,Init,736658636,777748639 +Lean.Name.hash,Init,861742653,895622891 +BitVec.umod,Init,926177790,1024619382 +Nat.repr,Init,966452765,1015475836 +Int.repr,Init,993792541,1032652579 +GCDMonoid.gcd,Mathlib,1005736276,985927330 +String.intercalate,Init,1089240518,1146173038 +Char.toLower,Init,1198467414,1334500616 +Nat.Prime.two_le,Mathlib,1504045298,1554338678 +Nat.gcd_comm,Init,1954958779,2324336182 +Finset.prod,Mathlib,3045165822,2442491797 +Finset.sum,Mathlib,3045189408,2442491797 +Int.emod_emod_of_dvd,Init,3856852693,4688687459 +Array.append_assoc,Init,3938574533,2551361279 +Vector.append,Init,4023268168,2636160685 +Polynomial.eval,Mathlib,5342731754,4300016404 +Fin.foldl,Init,10853255199,11397983888 +List.mergeSort,Init,13825318985,13705192825 +Array.binSearch,Init,14397133548,13466760466 +Array.qsort,Init,15781689533,15413615828 +Array.qsortOrd,Init,15841062472,15447012613 +Multiset.sort,Mathlib,18670960624,17312899573 +String.split,Init,19578088286,17462986906 +Std.Time.Week.Offset.ofMilliseconds,Std,24577209792,14799116898 +Vector.extract_append,Init,61830646478,26830910000 diff --git a/Benchmarks/Statistics/data/zisk/multi_shard.csv b/Benchmarks/Statistics/data/zisk/multi_shard.csv new file mode 100644 index 00000000..b6ffa06b --- /dev/null +++ b/Benchmarks/Statistics/data/zisk/multi_shard.csv @@ -0,0 +1,13 @@ +shard,blocks,hb,bytes,subst,cycles +803,3,3117,23174,6785,771527739 +806,6,4586,32062,10479,810732054 +502,2,4057,24927,15452,1034769720 +68,37,2806,115271,67589,1127754269 +957,278,2672,397663,145185,1382497091 +290,19,4540,72883,75048,1397057638 +383,87,3370,162554,170516,1584528514 +870,311,4151,255573,173497,1773123833 +273,101,4911,159997,227820,1782171986 +815,428,3823,378779,253556,2526432086 +634,1,32843,2308,513,5695914962 +630,1,32843,2309,513,5697160198 diff --git a/Benchmarks/Statistics/data/zisk/multishard_failed.csv b/Benchmarks/Statistics/data/zisk/multishard_failed.csv new file mode 100644 index 00000000..c9012645 --- /dev/null +++ b/Benchmarks/Statistics/data/zisk/multishard_failed.csv @@ -0,0 +1,3 @@ +name,env,witness,work_items,heartbeats,pred_steps_from_hb,peak_ram_gib,outcome +initstd-shard1,initStd,10,1,83063,13483482557,90.99,prover_crash_execute (single atomic block too large for one leaf; cannot be sharded) +initstd-shard2,initStd,10,1,14807,2403354973,235.0,OOM_during_prove (watchdog-killed >235GB; model predicts ~129GB → under-predicts ~2x) diff --git a/Benchmarks/Statistics/data/zisk/prove_real.csv b/Benchmarks/Statistics/data/zisk/prove_real.csv new file mode 100644 index 00000000..9ff47518 --- /dev/null +++ b/Benchmarks/Statistics/data/zisk/prove_real.csv @@ -0,0 +1,32 @@ +name,kind,witness,steps,leaf_secs,wall_secs,peak_ram_gib +nataddcomm,single,5,54594862,12.53,27.36,40.0286 +natreclinear,single,5,61944311,18.22,33.23,40.16 +natmulcomm,single,5,102546519,19.95,35.09,43.82 +stringappend,single,5,607134299,104.75,119.51,58.0974 +natgcdcomm,single,5,1164384528,198.31,213.19,72.6304 +stringappendassoc,single,5,1748739518,305.27,321.37,92.3206 +arraysizemap,single,5,1923985757,329.65,345.24,96.4352 +natfoldsucc,single,5,4689387273,809.12,828.86,194.30 +mergesort-s1,sharded,5,1662509003,288.49,306.27,88.6702 +mergesort-s2,sharded,5,1554408672,265.82,281.46,83.201 +mergesort-s3,sharded,5,1415421503,240.87,256.41,79.0282 +mergesort-s4,sharded,5,1219294461,207.43,223.17,73.6785 +mergesort-s5,sharded,5,1356906024,234.03,252.24,75.6543 +mergesort-s6,sharded,5,548508025,96.83,113.96,58.2065 +mergesort-s6-w10,sharded,10,548508025,92.30,109.45,59.8251 +mergesort-s4-w10,sharded,10,1219294461,198.69,217.15,77.2888 +mergesort-s1-w10,sharded,10,1662509003,273.48,292.02,90.5705 +rbmap-s1,sharded,10,1120266962,198.30,279.22,75.2428 +rbmap-s4,sharded,10,1012883213,166.07,183.03,72.437 +rbmap-s8,sharded,10,898178083,149.93,167.67,68.2036 +binsearch-s1,sharded,10,1327416992,223.01,240.22,82.3585 +binsearch-s5,sharded,10,884038553,145.16,162.85,73.1912 +binsearch-s9,sharded,10,151407011,28.26,45.36,48.6717 +nataddcomm-w10,single,10,54594862,11.97,28.79,42.3264 +natreclinear-w10,single,10,61944311,13.35,30.18,42.9078 +natmulcomm-w10,single,10,102546519,19.41,35.68,45.3372 +stringappend-w10,single,10,607134299,100.25,117.75,63.4575 +natgcdcomm-w10,single,10,1164384528,188.74,206.23,78.6362 +stringappendassoc-w10,single,10,1748739518,286.17,303.35,91.7812 +arraysizemap-w10,single,10,1923985757,320.48,338.73,103.114 +natfoldsucc-w10,single,10,4689387273,765.43,785.55,196.734 diff --git a/Benchmarks/Statistics/data/zisk/single_shard.csv b/Benchmarks/Statistics/data/zisk/single_shard.csv new file mode 100644 index 00000000..b61a2f28 --- /dev/null +++ b/Benchmarks/Statistics/data/zisk/single_shard.csv @@ -0,0 +1,67 @@ +name,hb,bytes,subst,cycles +Nat,3,113,0,975244 +Eq.rec,5,206,197,2348520 +Acc.rec,10,315,770,5105888 +Sum.elim,15,693,745,6618130 +Prod.map,19,804,1170,8177456 +Option.bind,30,945,1052,7440608 +WellFounded.fix,34,1108,2199,13415585 +Nat.add,61,1889,1074,10606339 +List.foldr,79,2383,2026,16707100 +List.range,83,2727,1316,13666491 +Array.toList,20,281,160,2580844 +Int.add,170,6034,3290,27635032 +BitVec.toFin,176,5913,3055,28681028 +Nat.add_comm,360,6908,3998,53239676 +USize.toNat,198,8391,3652,35811906 +Nat.decEq,266,8116,6747,57411966 +Nat.decLe,709,21222,18105,143391161 +Nat.strongRecOn,764,24974,28682,190849758 +Int.emod,1405,42899,34415,269380418 +Array.foldl,1284,41477,38282,278537034 +Nat.sub_le_of_le_add,1915,54845,48701,373184538 +Int.gcd,2158,65618,52973,409112011 +Array.map,2415,69838,54424,443199245 +Lean.Name.hash,2451,82181,57157,447441591 +BitVec.umod,2585,89060,71781,526467117 +Nat.repr,2658,91470,62863,498729913 +String.intercalate,3004,99770,73875,599428829 +Char.toLower,3588,115675,82453,665920824 +Int.emod_emod_of_dvd,11951,345084,226571,2201588182 +Vector.append,6260,205557,231194,1614275115 +Fin.foldl,24695,778754,536883,5110854190 +List.mergeSort,32915,912942,642232,6706906294 +Array.binSearch,34358,891352,646852,6785827470 +Array.qsort,41060,1001670,739383,7199288749 +String.split,39939,1141622,928723,8657387499 +HEq.rec,5,221,268,2727278 +Trans.mk,11,88,1634,7229214 +Except.bind,26,1004,1283,9427477 +List.zipWith,91,2378,2928,20229977 +List.dropLast,86,2460,2197,17522863 +List.filterMap,106,3273,2612,21435279 +UInt32.toNat,182,6967,3099,29980254 +Int.ediv,1417,43384,34507,270987292 +Array.filter,1322,43185,39529,285847515 +BitVec.add,2023,61393,47795,373772656 +Nat.toDigits,1895,64755,45229,357145741 +Array.zipWith,2420,69839,54985,445195121 +Int.repr,2693,93136,63371,504234535 +Array.qsortOrd,41119,1004447,739993,7206704674 +Nat.gcd_comm,6235,182667,142140,1144352360 +Array.append_assoc,6056,199166,227605,1570256148 +ByteSlice.ofByteArray,323,12074,5758,53555107 +Std.Time.Month.Offset.ofNat,9,383,3,1493508 +Std.Time.Week.Offset.ofMilliseconds,33688,987960,646214,6653972854 +nataddcomm,366,6664,4098,54599876 +natmulcomm,751,14050,8121,102557633 +natgcdcomm,6308,182038,144930,1164570401 +natreclinear,384,2731,1135,61949444 +natfoldsucc,23598,725195,491485,4690178680 +stringappend,3045,98323,75119,607234867 +stringappendassoc,6730,230949,249233,1749121257 +stringtakewhile,43346,1142421,878100,8701352526 +binsearch,34692,892413,650543,6880105406 +mergesort,33195,914047,648183,6797757410 +rbmap,31326,928782,748213,6626926364 +arraysizemap,7307,240119,273397,1924406487 diff --git a/Benchmarks/Statistics/docs/aiur-cost-dataset.md b/Benchmarks/Statistics/docs/aiur-cost-dataset.md new file mode 100644 index 00000000..c931fc2c --- /dev/null +++ b/Benchmarks/Statistics/docs/aiur-cost-dataset.md @@ -0,0 +1,135 @@ +# Aiur FFT cost → execution time, proving time, and RAM + +This benchmark relates an Aiur computation's **FFT cost** — a cheap, deterministic structural measure (`Ix.Aiur.Statistics.computeStats.totalFftCost`, summed over all constrained circuits) — to the wall-clock cost of running and proving it. The workload is a **full-closure typecheck** through the IxVM kernel's `verify_claim` entrypoint (`Ix.Claim.check addr none`), which re-typechecks every constant in the target's transitive closure. Each row is one Lean constant. Measured on `main` (`ad7e383`), a 32-core / 250 GB host. + +**Why it's useful:** from a constant's FFT cost alone you can predict its execution time, proving time, and peak RAM before running it — useful for choosing benchmark targets that fit a timeout or memory budget. + +**Predicting the FFT cost itself** (without running the Aiur prover) — from cheap out-of-circuit profiler features (`hb`/`subst`/`defeq`/`bytes`) — is documented in the companion **`aiur-fft-cost-model.md`** (this directory): the exact formula `Σ width·h·log₂h`, the per-circuit cost drivers (BLAKE3←bytes, instantiation←subst, memory←defeq, plus the unpredicted big-Nat `klimbs` family), and a cheap `n·log₂n` model at R²≈0.976. + +**Dataset:** 61 points, each a distinct constant *shape*, spanning FFT cost **1.86e+06 → 6.18e+10**, plus 6 that exceeded available RAM. The `env` column is the defining **library** (40 Init, 3 Std, 6 Mathlib, 1 Lean, 11 IxVM test fixtures) — by each constant's *defining module*, which can differ from its name's namespace (e.g. `Lean.Name.hash` is `Init.Prelude`; `ByteSlice.ofByteArray` is `Std.Data.ByteSlice`). + +## Models (n=61) + +| quantity | formula | R² | notes | +|---|---|---|---| +| Execution time | `exec_ms ≈ 4.903e-07 · fft` (through origin) | 0.9989 | ≈ 0.490 ms per million FFT-units. Near-perfectly linear. | +| Proving time | `prove_ms ≈ 2865 + 2.211e-06 · fft` | 0.9956 | ≈ 2.21 ms per million FFT-units above a ~1.45 s floor (FRI/commitment setup). Mildly sublinear. | +| Peak RAM | `peakGB ≈ 3.27 + 2.333e-09 · fft` | 0.9940 | ≈ **2.44 GB per billion FFT-units** (n=39). Full-closure ceiling ~90–100 B FFT on a 250 GB host. | + +Library doesn't matter to cost — Init/Std/Lean/Mathlib all land on the same lines; the cost is the closure's checking work (lazy `.ixe` load just avoids the ~104 GB eager whole-env load that would otherwise gate Mathlib). + +## Data points (sorted by FFT cost) + +| # | constant | env | shape | FFT cost | FFT (×10⁹) | exec (ms) | prove (ms) | peak RAM (GB) | +|--:|---|---|---|--:|--:|--:|--:|--:| +| 1 | `Nat` | Init | inductive type | 1,857,523 | 0.002 | 2.3 | 1,451.0 | — | +| 2 | `Eq.rec` | Init | recursor | 2,575,400 | 0.003 | 2.5 | 1,696.6 | — | +| 3 | `IxVMInd.Tree` | IxVM | nested inductive | 2,633,415 | 0.003 | 3.0 | 2,091.9 | — | +| 4 | `Trans.mk` | Init | structure constructor | 2,911,629 | 0.003 | 2.8 | 2,269.8 | — | +| 5 | `Array.toList` | Init | array→list conversion | 3,332,563 | 0.003 | 3.7 | 1,873.4 | 1.8 | +| 6 | `Acc.rec` | Init | accessibility recursor | 3,505,064 | 0.004 | 3.9 | 1,575.8 | 1.8 | +| 7 | `Std.Time.Month.Offset.ofNat` | Std | calendar offset | 3,607,673 | 0.004 | 6.7 | 1,649.5 | 4.3 | +| 8 | `IxVMInd.Tree.rec` | IxVM | nested recursor | 4,858,321 | 0.005 | 4.0 | 2,349.2 | — | +| 9 | `Sum.elim` | Init | sum eliminator | 5,589,905 | 0.006 | 5.4 | 1,514.0 | 1.8 | +| 10 | `Prod.map` | Init | product map | 6,904,183 | 0.007 | 6.0 | 1,786.0 | 1.8 | +| 11 | `Option.bind` | Init | monadic bind | 7,329,183 | 0.007 | 6.2 | 1,664.4 | 1.8 | +| 12 | `IxVMPrim.nat_succ_lit` | IxVM | primitive: successor | 7,330,826 | 0.007 | 4.6 | 2,080.3 | — | +| 13 | `WellFounded.fix` | Init | well-founded fixpoint | 10,125,144 | 0.010 | 7.8 | 1,337.2 | 1.8 | +| 14 | `Nat.add` | Init | function reduction | 13,343,000 | 0.013 | 8.2 | 3,006.0 | — | +| 15 | `List.foldr` | Init | list fold | 18,579,757 | 0.019 | 12.7 | 1,935.8 | 1.9 | +| 16 | `List.range` | Init | list generator | 20,251,801 | 0.020 | 13.9 | 2,327.3 | 1.9 | +| 17 | `IxVMPrim.nat_ble_lit` | IxVM | primitive: Bool predicate (≤) | 23,016,526 | 0.023 | 12.2 | 2,889.5 | — | +| 18 | `IxVMPrim.nat_mul_big` | IxVM | primitive: multiplication | 24,580,879 | 0.025 | 12.6 | 2,871.7 | — | +| 19 | `IxVMInd.Even` | IxVM | mutual inductive | 26,482,492 | 0.026 | 13.3 | 3,081.6 | — | +| 20 | `IxVMInd.Even.rec` | IxVM | mutual recursor | 32,164,273 | 0.032 | 15.6 | 2,942.8 | — | +| 21 | `Nat.factorial` | Mathlib | factorial | 33,562,426 | 0.034 | 30.0 | 2,341.7 | 4.3 | +| 22 | `Int.add` | Init | integer addition | 44,714,703 | 0.045 | 26.2 | 2,156.1 | 1.9 | +| 23 | `BitVec.toFin` | Init | bitvec→fin | 50,437,466 | 0.050 | 29.4 | 2,178.5 | 1.9 | +| 24 | `Nat.add_comm` | Init | arithmetic theorem | 56,084,908 | 0.056 | 26.6 | 3,301.9 | — | +| 25 | `USize.toNat` | Init | fixed-width→nat | 71,607,481 | 0.072 | 39.5 | 2,095.9 | 2.1 | +| 26 | `Nat.decEq` | Init | decidable equality | 71,921,625 | 0.072 | 33.0 | 3,163.0 | — | +| 27 | `ByteSlice.ofByteArray` | Std | byte-slice construction | 107,574,377 | 0.108 | 76.2 | 2,292.2 | 4.3 | +| 28 | `Nat.decLe` | Init | decidable order | 209,641,496 | 0.210 | 84.9 | 3,534.9 | — | +| 29 | `Nat.strongRecOn` | Init | strong recursion | 273,068,854 | 0.273 | 124.1 | 2,777.1 | 2.7 | +| 30 | `IxVMPrim.nat_div_lit` | IxVM | primitive: division | 405,607,545 | 0.406 | 164.7 | 4,705.1 | — | +| 31 | `Int.emod` | Init | integer mod | 422,940,733 | 0.423 | 197.9 | 3,157.9 | 3.3 | +| 32 | `Array.foldl` | Init | array fold | 449,323,126 | 0.449 | 204.8 | 3,712.2 | 3.4 | +| 33 | `Nat.sub_le_of_le_add` | Init | order theorem | 567,575,653 | 0.568 | 231.3 | 5,009.3 | — | +| 34 | `IxVMPrim.bv_to_nat_lit` | IxVM | primitive: BitVec→Nat | 635,780,327 | 0.636 | 283.4 | 5,208.2 | — | +| 35 | `Int.gcd` | Init | integer gcd | 657,502,637 | 0.658 | 311.2 | 4,257.2 | 3.9 | +| 36 | `IxVMPrim.nat_gcd_lit` | IxVM | primitive: gcd (recursive) | 665,518,356 | 0.666 | 265.0 | 5,203.1 | — | +| 37 | `Array.map` | Init | array map | 734,574,964 | 0.735 | 371.8 | 4,888.7 | 4.7 | +| 38 | `String.Internal.append` | Init | string concatenation | 793,580,333 | 0.794 | 335.1 | 5,273.9 | — | +| 39 | `Lean.Expr.replace` | Lean | expr traversal | 859,625,514 | 0.860 | 424.2 | 5,485.9 | 4.9 | +| 40 | `Lean.Name.hash` | Init | name hashing | 861,742,653 | 0.862 | 388.6 | 5,366.6 | 4.9 | +| 41 | `BitVec.umod` | Init | bitvec modulo | 926,177,790 | 0.926 | 423.2 | 5,012.8 | 5.0 | +| 42 | `Nat.repr` | Init | number→string | 966,452,765 | 0.966 | 425.1 | 5,104.1 | 5.0 | +| 43 | `GCDMonoid.gcd` | Mathlib | monoid gcd | 1,005,736,276 | 1.006 | 533.5 | 5,620.5 | 7.2 | +| 44 | `String.intercalate` | Init | string join | 1,089,240,518 | 1.089 | 483.2 | 5,925.9 | 5.3 | +| 45 | `IxVMPrim.nat_land_lit` | IxVM | primitive: bitwise-and | 1,138,665,214 | 1.139 | 497.4 | 6,952.5 | — | +| 46 | `_private.Init.Prelude.0.Lean.extractMainModule._unsafe_rec` | Init | unsafe recursor | 1,197,925,029 | 1.198 | 578.0 | 6,904.8 | — | +| 47 | `Char.toLower` | Init | char case map | 1,198,467,414 | 1.198 | 647.8 | 8,104.5 | 5.3 | +| 48 | `Nat.Prime.two_le` | Mathlib | primality proof | 1,504,045,298 | 1.504 | 812.1 | 6,519.0 | 9.1 | +| 49 | `Nat.gcd_comm` | Init | gcd-commutativity theorem | 1,954,958,779 | 1.955 | 1,027.5 | 7,624.7 | 9.8 | +| 50 | `Finset.sum` | Mathlib | finite sum | 3,045,189,408 | 3.045 | 1,415.5 | 10,396.1 | 12.8 | +| 51 | `Int.emod_emod_of_dvd` | Init | omega/inference theorem | 3,856,852,693 | 3.857 | 1,861.3 | 12,239.9 | 13.1 | +| 52 | `Vector.append` | Init | vector append lemma | 4,023,268,168 | 4.023 | 1,718.6 | 14,612.3 | — | +| 53 | `Polynomial.eval` | Mathlib | polynomial evaluation | 5,342,731,754 | 5.343 | 2,509.5 | 15,517.5 | 19.9 | +| 54 | `Fin.foldl` | Init | finite-range fold | 10,853,255,199 | 10.853 | 4,970.2 | 26,124.9 | 29.1 | +| 55 | `List.mergeSort` | Init | merge sort | 13,825,318,985 | 13.825 | 6,346.1 | 31,659.1 | 35.1 | +| 56 | `Array.binSearch` | Init | binary search | 14,397,133,548 | 14.397 | 6,453.2 | 33,123.7 | 36.0 | +| 57 | `Array.qsort` | Init | quicksort | 15,781,689,533 | 15.782 | 7,322.5 | 36,384.2 | 39.7 | +| 58 | `Multiset.sort` | Mathlib | multiset sort | 18,670,960,624 | 18.671 | 9,385.2 | 51,109.6 | 53.8 | +| 59 | `String.split` | Init | string splitting | 19,578,088,286 | 19.578 | 9,255.2 | 48,419.9 | 53.8 | +| 60 | `Std.Time.Week.Offset.ofMilliseconds` | Std | time-unit conversion (ms→week) | 24,577,209,792 | 24.577 | 12,290.1 | 56,386.4 | 61.6 | +| 61 | `Vector.extract_append` | Init | vector extract/append lemma | 61,830,646,478 | 61.831 | 30,695.5 | 137,766.9 | 143.2 | + +Peak RAM is the full-process high-water RSS (execute + prove), reported where captured. + +### Provenance + +FFT cost is deterministic *for a given toolchain*. Rows measured on current HEAD (`ad7e383`) reproduce exactly via `bench-typecheck` — verified bit-identical for `List.mergeSort`/`Array.qsort`/`String.split`, and all Std/Mathlib rows. The foundational small-`Nat`, `IxVM*`, and append rows were measured on a **pre-`#450` snapshot** and read **~1 % high** vs current main (`#450` "Nat-layer FFT cuts", the current HEAD commit, lowered them after — e.g. `Nat.add_comm` 56.08 M here vs **55.50 M** now). This ≲1 % shift on a few small constants doesn't affect the fits. + +## Exceeded available memory + +These could not complete: the full-closure typecheck grew past the host's safe memory limit and was stopped before exhausting RAM. What drives memory is the total reduction work of the whole closure — not the headline constant (e.g. `Matrix.det` pulls the whole commutative-algebra hierarchy; `Std.HashMap.insert` pulls the `DHashMap`/`AssocList` well-founded-recursion proofs). `Std.HashMap.insert` OOMs the same way **even over the lazy `.ixe`** (the cost is checking work, not env loading), which is why `aiur-bench.yml` excludes it. These can be characterized with the native Rust kernel (`ix check-rs`) or out-of-circuit (`ix profile`), or measured in Aiur via **subject-only** checking (`verify_const`, deps trusted) — a different, much cheaper workload. + +| constant | env | peak RAM before stop (GB) | +|---|---|--:| +| `Std.Tactic.BVDecide.BVExpr.bitblast.goCache_Inv_of_Inv._mutual` | Std | 186 | +| `String.toList` | Init | 212 | +| `Matrix.det` | Mathlib | 215 | +| `Substring.toString` | Init | 215 | +| `Std.Time.PlainTime.addMinutes` | Std | 215 | +| `String.foldl` | Init | 216 | + +## Reproducing with main's benchmark infra + +Reproducible with the on-tree `bench-typecheck` (the same tool `aiur-bench.yml` drives) over a lazily-loaded `.ixe`: + +``` +lake build ix bench-typecheck +# Init/Std env (also has the Std.* and ByteSlice/Std.Time constants): +ix compile Benchmarks/Compile/CompileInitStd.lean --out initstd.ixe +bench-typecheck --ixe initstd.ixe Array.qsort String.split Std.Time.Week.Offset.ofMilliseconds --json out.json --texray +# Mathlib env (lazy load keeps RAM modest — Multiset.sort 18.7B fft -> ~54 GB): +ix compile Benchmarks/Compile/CompileMathlib.lean --out mathlib.ixe +bench-typecheck --ixe mathlib.ixe Finset.sum Polynomial.eval Multiset.sort --json out.json --texray +python3 fit_fft.py ../data/aiur/cost.csv +``` + +Pass the bare constant name — **not** a library-prefixed one: the library isn't part of the `Lean.Name`, so `Init.Nat.add_comm` / `Std.Array.qsort` resolve to "not found". `bench-typecheck` emits `fft-cost` (deterministic), `execute-time`, `prove-time`, `constants` (closure size), and — with `--texray` — the proving-phase `peak-rss` that `aiur-bench.yml` tracks. The `IxVM*` rows are test fixtures from `Tests/Ix/IxVM.lean` (reproducible via `ix check ` over the IxTests env, pinned in `kernelCheckEntries`). + +Columns in `../data/aiur/cost.csv`: `name, fft_cost, exec_us, prove_ms, proof_bytes, peak_rss_kb, source, env`. Fit script: `python -m benchstats`. + +## Plotting the relationships + +`python3 -m benchstats aiur-runtime` renders two figures (points coloured by `env`): + +- `figures/aiur_exec_vs_fft.png` — FFT cost vs **execution**: time, throughput, peak RAM (stacked panels sharing the FFT x-axis). +- `figures/aiur_prove_vs_fft.png` — FFT cost vs **proving**: time, throughput, peak RAM. + +![FFT cost vs execution](../figures/aiur_exec_vs_fft.png) +![FFT cost vs proving](../figures/aiur_prove_vs_fft.png) + +The `nix develop` dev shell provides `python3` with matplotlib (outside Nix, `pip install matplotlib` into a venv). `benchstats` fits the **full raw CSV** (n=98), so its overlaid coefficients (e.g. `prove ≈ 2958 + 2.23e-6·fft`) differ slightly from the collapsed n=61 headline models above, which are fit on the deduplicated distinct-shape set. diff --git a/Benchmarks/Statistics/docs/aiur-fft-cost-model.md b/Benchmarks/Statistics/docs/aiur-fft-cost-model.md new file mode 100644 index 00000000..8a1f5737 --- /dev/null +++ b/Benchmarks/Statistics/docs/aiur-fft-cost-model.md @@ -0,0 +1,171 @@ +# Aiur FFT cost predictor + +Predicts **Aiur FFT cost** (`Ix.Aiur.Statistics.computeStats.totalFftCost`) — the +quantity that drives Aiur prover work and RAM — for a full-closure typecheck of a +Lean constant, from **cheap out-of-circuit profiler features** (no Aiur prover +run). Companion to this directory's `aiur-cost-dataset.md` (which goes the other way: FFT +cost → exec/prove/RAM), and to the zkVM-cycle predictor in the `ix` repo +(`docs/zisk-cycle-cost-model.md`). **The Zisk and Aiur models do NOT transfer** +(§4). + +Profiling inputs: `~/benchdata/prof/` (per-name native features) and +`~/benchdata/fft/` (per-circuit stats). Aiur FFT costs: `../data/aiur/cost.csv` here. +Measured 2026-06-26, 250 GiB host. + +--- + +![profiled features vs FFT cost](../figures/aiur_features_vs_fft.png) + +## 1. What FFT cost actually is (exact, from source) + +`Ix/Aiur/Statistics.lean` → `computeStats`: + +``` +totalFftCost = Σ_circuit width_c · height_c · log₂(max(height_c, 2)) +``` + +- **726 circuits** (constrained Aiur functions + memory circuits), total width 33,765. +- `width_c = layout.totalWidth` — **structural, identical across all constants** + (same compiled IxVM kernel). Only the heights vary. +- `height_c = queryCounts[c].uniqueRows` — the **deduplicated** (memoized) row + count for that circuit during execute. (`cacheHits = totalHits − uniqueRows`; + "saved cost" runs 35–42%.) +- **No power-of-two padding** — smooth `h·log₂h` on the raw unique-row count. + +This sum reconstructs the printed `Total FFT cost` to **0.000%** for every +constant (verified). So FFT is *exactly* a function of the per-circuit height +vector; the only modelling question is predicting those heights. + +Per-circuit breakdown for a constant: +`ix check --ixe .ixe --stats-out out.txt` (out-of-circuit execute, no +prove; OOMs on the heaviest constants — see `aiur-cost-dataset.md`). + +--- + +## 2. Cost drivers (per-circuit decomposition) + +A handful of circuits dominate, and each one's height is an almost-perfect linear +function of **one cheap native profiler counter** (`hb`/`subst`/`defeq`/`bytes`, +recorded out of circuit by the `ix` repo's `only_const_profile` example): + +| family | top circuits | share | counter | per-circuit R² | +|---|---|--:|---|--:| +| **BLAKE3 hashing** (content-addressing) | `blake3_g_function` (w=224), `blake3_compress_*`, `read_byte`, `get_expr` | ~30% | `block_bytes` | **1.00** | +| **Expr instantiation** (substitution) | `expr_inst1`/`_walk`, `expr_inst_many`, `list_lookup`, `address_eq` | ~20% | `subst` | 0.99 | +| **Memory / dedup** | `memory[3]` | ~4% | `defeq` | 0.999 | +| **Big-Nat arithmetic** | `klimbs_*`, `u64_sub_with_borrow`, `compare_klimbs`, `bytes_to_u64_limb`, `get_u64_le` | ~0.5%* | **none** | **0.14** | + +\* but **37%** for `Std.Time.Week.Offset.ofMilliseconds` (time-unit conversions do +multi-limb division). This one unpredicted family is the entire accuracy ceiling. + +Why `block_bytes` is the strongest single feature: `blake3_g_function`'s height +≈ `block_bytes` (it hashes the serialized closure, ~1 g-call per byte) and it's +the #1 circuit. The FFT is *dominated by hashing the serialized env*. + +--- + +## 3. The predictor model (cheap features → FFT) + +Because FFT = `Σ w·h·log₂h`, the right functional form is **`n·log₂(n)`** in each +counter (a linear model under-predicts the expensive tail 2–3× by missing the +`log(height)` factor). Fit by weighted least squares (weights `1/fft²`, relative +error), n=55 paired constants, FFT 1.9 M – 61.8 B: + +| model | R² | MAPE | note | +|---|--:|--:|---| +| linear `hb + bytes + defeq` | 0.93 | 18% | wrong form — under-predicts tail | +| `bytes·log₂bytes` **alone** | 0.966 | 10% | dead-simple, blake3-dominated | +| **`hb·log₂hb + bytes·log₂bytes + defeq·log₂defeq`** | **0.976** | **9%** | recommended cheap model | +| + (measured) nat-arith term | 0.993 | 6% | needs execute — see §5 | +| `Σ width·h·log₂h` (per-circuit heights) | **1.000** | 0% | exact; needs execute | + +Recommended cheap predictor: **`bytes·log₂bytes + subst·log₂subst + defeq·log₂defeq`** +(≈ the three predictable driver families). Coefficients shift with n; regenerate +on the current paired data rather than hard-coding. Inputs are all from the `ix` +repo's `only_const_profile` (native, no Aiur run, no OOM). + +The table above is the n=55 Init/Std development calibration. The deployed +pipeline (`python -m benchstats aiur-predictor`) fits this same form on the current **n=65** paired +set (52 Init + 10 Mathlib + 3 Std) at **MAPE 9.3%** — confirming it transfers +across libraries; see `aiur-predicted-vs-actual.md` for the per-constant table. + +--- + +## 4. Why the Zisk cycle model does NOT transfer + +Zisk full-closure cost is `cycles ≈ 0.67M + 94,300·hb + 4,390·subst` +(reduction-dominated; serialized `bytes` adds nothing). **Aiur FFT is the +opposite:** + +- `subst` alone is **useless** for Aiur (R² 0.45; adds nothing to `hb`). +- serialized **`bytes` is dominant** (R² 0.91 alone). +- the **Aiur/Zisk-cycle ratio ranges 0.33–4.09** (mean 1.65) — not a constant. + +**Mechanistic reason:** Aiur FFT ≈ circuit *width × height*. `bytes`/ingress sets +the **width** (how much env is ingested into the trace); `hb`/`defeq` set the +**height** (reduction depth). Zisk full-closure pays only for trace *rows* +(height/reduction), so bytes are irrelevant there. Aiur pays for both dimensions. + +--- + +## 5. The ceiling: big-Nat arithmetic (`klimbs`) + +The cheap model caps at **~0.98** purely because of the big-Nat arithmetic +circuits. They're negligible (~0.5%) for almost every constant but dominate +arithmetic-heavy ones (`Std.Time.*` conversions). No `hb`/`subst`/`defeq`/`bytes` +counter predicts them (R² 0.14). NB: the whole-model 0.98 is high *despite* +nat-arith being unmodeled — it is **not** a 0.98 model *of* nat-arith. + +**Negative result — a native `nat_arith` counter does not work.** Experiment +(uncommitted, `ix` repo `kernel-riscv`): added `bump_nat_arith(work)` to +`crates/kernel/src/profile.rs` + `OpCounts` + `only_const_profile`, charging +op-weighted limb-work (`la·lb` for mul/div/mod/gcd, `max(la,lb)` for +add/sub/bitwise/shift) in `whnf.rs::compute_nat_bin`. Result: lifts the whole +model only **0.976 → 0.980**, and is *backwards* on the discriminating case +(`Week.Offset.ofMilliseconds` NAT=318 vs `Array.qsort` NAT=695 — the reverse of +their 37% vs 0.8% Aiur klimbs share). + +Root cause: the **native kernel does big-Nat arithmetic in `BigUint`** (a few +`compute_nat_bin` calls), but the **Aiur kernel does it limb-by-limb in gadgets** +— its cost is dominated by per-limb deserialization/comparison/division rows with +*no native analog*. The quantity you'd need is "number of Aiur limb-rows," which +only exists once you run the Aiur kernel. So there is **no cheap native predictor +for klimbs cost**; the 0.993 above used the *measured* klimbs heights (i.e. it +required execute). + +**Paths to R²→1.0:** (a) the exact model — per-circuit heights from execute +(`ix check --stats-out`); or (b) model the Aiur `klimbs` gadget cost directly from +the IxVM kernel circuit definitions (weighting div/deserialization correctly) — +untried, more involved. + +--- + +## 6. Data & reproduce + +```bash +# per-name native features (hb, block_bytes, subenv_bytes, subst, whnf, defeq[, nat]) +# in ~/ix (kernel-riscv): +IX_PROFILE_COUNTERS=1 cargo run -q --release -p ix-kernel --example only_const_profile -- \ + initstd.ixe Nat.add_comm Array.qsort String.split ... + +# per-circuit Aiur FFT breakdown for one constant (width, height, fftCost per circuit) +# in ~/ix-aiur: +ix check --ixe initstd.ixe --stats-out stats.txt + +# fit: join features with ../data/aiur/cost.csv, WLS on n·log₂n features (python, ad hoc) +``` + +- Per-name features: `~/benchdata/prof/onlyconst_feat_fc.tsv`, `new_feat.tsv`. +- Per-circuit stats: `~/benchdata/fft/circ/*.txt`, `~/benchdata/fft/stats_*.txt`. +- Aiur FFT + exec/prove/RAM dataset & shapes: this directory + (`aiur-cost-dataset.md`, `../data/aiur/cost.csv`, `python -m benchstats`). + +--- + +## TL;DR for a new agent +1. FFT = `Σ width·h·log₂h` over 726 circuits; widths fixed, heights from execute. Exact. +2. Cheap predictor: `n·log₂n` of `bytes` (BLAKE3), `subst` (instantiation), `defeq` + (memory) → **R² ≈ 0.976**, MAPE ~9%. `bytes·log₂bytes` alone gets 0.966. +3. Do **not** reuse Zisk's `hb+subst` model — for Aiur, bytes dominate and subst is useless. +4. The unmodeled ~2% is big-Nat `klimbs` arithmetic (matters only for arithmetic-heavy + constants like `Std.Time.*`); it has no cheap native proxy. Exact requires execute. diff --git a/Benchmarks/Statistics/docs/aiur-predicted-vs-actual.md b/Benchmarks/Statistics/docs/aiur-predicted-vs-actual.md new file mode 100644 index 00000000..0bcd0c38 --- /dev/null +++ b/Benchmarks/Statistics/docs/aiur-predicted-vs-actual.md @@ -0,0 +1,107 @@ +# Predicted vs actual Aiur FFT cost + +Cheap predictor `fft ≈ k₁·bytes·log₂bytes + k₂·subst·log₂subst + k₃·defeq·log₂defeq` (fit by `python -m benchstats aiur-predictor`, in-sample), evaluated per constant. `—` = no native profile (the `IxVM*` rows are test fixtures, not in any `.ixe`; a few Init/Lean constants weren't profiled). Generated from `../data/aiur/predicted_vs_actual.csv`. + +| constant | env | actual FFT | predicted FFT | pred/actual | +|---|---|--:|--:|--:| +| `HEq` | Init | 1,716,582 | — | — | +| `Nat` | Init | 1,857,523 | 1,913,777 | 1.03× | +| `Eq.rec` | Init | 2,575,400 | 2,618,644 | 1.02× | +| `IxVMInd.Tree` | IxVM | 2,633,415 | — | — | +| `HEq.rec` | Init | 2,692,988 | 2,818,184 | 1.05× | +| `Trans.mk` | Init | 2,911,629 | 3,172,060 | 1.09× | +| `Array.toList` | Init | 3,332,563 | 2,795,425 | 0.84× | +| `Acc.rec` | Init | 3,505,064 | 3,557,025 | 1.01× | +| `Std.Time.Month.Offset.ofNat` | Std | 3,607,673 | 3,001,879 | 0.83× | +| `IxVMInd.Tree.rec` | IxVM | 4,858,321 | — | — | +| `Sum.elim` | Init | 5,589,905 | 5,851,932 | 1.05× | +| `Prod.map` | Init | 6,904,183 | 7,190,959 | 1.04× | +| `Option.bind` | Init | 7,329,183 | 7,791,742 | 1.06× | +| `IxVMPrim.nat_succ_lit` | IxVM | 7,330,826 | — | — | +| `Except.bind` | Init | 7,667,869 | 8,723,770 | 1.14× | +| `List.Sorted` | Mathlib | 9,578,666 | 7,173,089 | 0.75× | +| `WellFounded.fix` | Init | 10,125,144 | 10,426,904 | 1.03× | +| `Nat.add` | Init | 13,343,000 | 14,007,538 | 1.05× | +| `IxVMPrim.nat_pred_lit` | IxVM | 14,804,098 | — | — | +| `List.foldr` | Init | 18,579,757 | 18,544,657 | 1.00× | +| `List.dropLast` | Init | 19,509,718 | 19,694,567 | 1.01× | +| `List.range` | Init | 20,251,801 | 19,634,460 | 0.97× | +| `List.zipWith` | Init | 20,439,088 | 20,370,498 | 1.00× | +| `IxVMPrim.nat_ble_lit` | IxVM | 23,016,526 | — | — | +| `IxVMPrim.nat_mul_big` | IxVM | 24,580,879 | — | — | +| `IxVMPrim.nat_beq_lit` | IxVM | 24,752,029 | — | — | +| `IxVMPrim.nat_mul_lit` | IxVM | 25,101,067 | — | — | +| `List.filterMap` | Init | 25,335,779 | 25,841,377 | 1.02× | +| `IxVMInd.Odd` | IxVM | 26,245,849 | — | — | +| `IxVMInd.Even` | IxVM | 26,482,492 | — | — | +| `IxVMPrim.nat_add_lit` | IxVM | 28,639,807 | — | — | +| `Nat.choose` | Mathlib | 29,018,862 | 28,940,757 | 1.00× | +| `IxVMInd.Odd.rec` | IxVM | 32,163,380 | — | — | +| `IxVMInd.Even.rec` | IxVM | 32,164,273 | — | — | +| `Nat.factorial` | Mathlib | 33,562,426 | 34,336,981 | 1.02× | +| `Nat.fib` | Mathlib | 34,171,209 | 32,465,875 | 0.95× | +| `IxVMPrim.nat_sub_lit` | IxVM | 34,436,244 | — | — | +| `IxVMPrim.nat_shl_lit` | IxVM | 35,417,490 | — | — | +| `List.foldlM` | Init | 39,202,740 | — | — | +| `Int.add` | Init | 44,714,703 | 47,564,092 | 1.06× | +| `BitVec.toFin` | Init | 50,437,466 | 46,021,725 | 0.91× | +| `Nat.add_comm` | Init | 56,084,908 | 60,794,437 | 1.08× | +| `UInt32.toNat` | Init | 59,331,806 | 53,727,232 | 0.91× | +| `USize.toNat` | Init | 71,607,481 | 64,709,054 | 0.90× | +| `Nat.decEq` | Init | 71,921,625 | 77,473,249 | 1.08× | +| `IxVMPrim.nat_dec_eq` | IxVM | 86,118,842 | — | — | +| `ByteSlice.ofByteArray` | Std | 107,574,377 | 98,677,420 | 0.92× | +| `Nat.decLe` | Init | 209,641,496 | 211,779,321 | 1.01× | +| `IxVMPrim.nat_dec_le` | IxVM | 216,661,883 | — | — | +| `IxVMPrim.nat_dec_lt` | IxVM | 220,751,034 | — | — | +| `Nat.strongRecOn` | Init | 273,068,854 | 271,198,826 | 0.99× | +| `IxVMPrim.nat_div_lit` | IxVM | 405,607,545 | — | — | +| `IxVMPrim.nat_shr_lit` | IxVM | 411,128,901 | — | — | +| `IxVMPrim.nat_mod_lit` | IxVM | 414,695,549 | — | — | +| `Int.emod` | Init | 422,940,733 | 455,184,097 | 1.08× | +| `Int.ediv` | Init | 430,476,738 | 459,485,153 | 1.07× | +| `Array.foldlM` | Init | 434,577,494 | — | — | +| `Array.foldl` | Init | 449,323,126 | 442,489,492 | 0.98× | +| `Array.filter` | Init | 464,818,232 | 461,196,360 | 0.99× | +| `Nat.sub_le_of_le_add` | Init | 567,575,653 | 634,461,296 | 1.12× | +| `BitVec.add` | Init | 617,113,462 | 672,758,257 | 1.09× | +| `IxVMPrim.bv_to_nat_lit` | IxVM | 635,780,327 | — | — | +| `Int.gcd` | Init | 657,502,637 | 735,498,708 | 1.12× | +| `Nat.toDigits` | Init | 663,606,297 | 701,775,836 | 1.06× | +| `IxVMPrim.nat_gcd_lit` | IxVM | 665,518,356 | — | — | +| `Array.map` | Init | 734,574,964 | 776,415,628 | 1.06× | +| `Array.zipWith` | Init | 736,658,636 | 777,748,639 | 1.06× | +| `String.Internal.append` | Init | 793,580,333 | — | — | +| `IxVMPrim.str_size_lit` | IxVM | 802,563,877 | — | — | +| `Lean.Expr.replace` | Lean | 859,625,514 | — | — | +| `Lean.Name.hash` | Init | 861,742,653 | 895,622,891 | 1.04× | +| `BitVec.umod` | Init | 926,177,790 | 1,024,619,382 | 1.11× | +| `Nat.repr` | Init | 966,452,765 | 1,015,475,836 | 1.05× | +| `Int.repr` | Init | 993,792,541 | 1,032,652,579 | 1.04× | +| `GCDMonoid.gcd` | Mathlib | 1,005,736,276 | 985,927,330 | 0.98× | +| `String.intercalate` | Init | 1,089,240,518 | 1,146,173,038 | 1.05× | +| `IxVMPrim.nat_land_lit` | IxVM | 1,138,665,214 | — | — | +| `IxVMPrim.nat_lor_lit` | IxVM | 1,139,887,801 | — | — | +| `IxVMPrim.nat_xor_lit` | IxVM | 1,149,371,965 | — | — | +| `_private.Init.Prelude.0.Lean.extractMainModule._unsafe_rec` | Init | 1,197,925,029 | — | — | +| `Char.toLower` | Init | 1,198,467,414 | 1,334,500,616 | 1.11× | +| `Nat.Prime.two_le` | Mathlib | 1,504,045,298 | 1,554,338,678 | 1.03× | +| `Nat.gcd_comm` | Init | 1,954,958,779 | 2,324,336,182 | 1.19× | +| `Finset.prod` | Mathlib | 3,045,165,822 | 2,442,491,797 | 0.80× | +| `Finset.sum` | Mathlib | 3,045,189,408 | 2,442,491,797 | 0.80× | +| `Int.emod_emod_of_dvd` | Init | 3,856,852,693 | 4,688,687,459 | 1.22× | +| `Array.append_assoc` | Init | 3,938,574,533 | 2,551,361,279 | 0.65× | +| `Vector.append` | Init | 4,023,268,168 | 2,636,160,685 | 0.66× | +| `Polynomial.eval` | Mathlib | 5,342,731,754 | 4,300,016,404 | 0.80× | +| `Fin.foldl` | Init | 10,853,255,199 | 11,397,983,888 | 1.05× | +| `List.mergeSort` | Init | 13,825,318,985 | 13,705,192,825 | 0.99× | +| `Array.binSearch` | Init | 14,397,133,548 | 13,466,760,466 | 0.94× | +| `Array.qsort` | Init | 15,781,689,533 | 15,413,615,828 | 0.98× | +| `Array.qsortOrd` | Init | 15,841,062,472 | 15,447,012,613 | 0.98× | +| `Multiset.sort` | Mathlib | 18,670,960,624 | 17,312,899,573 | 0.93× | +| `String.split` | Init | 19,578,088,286 | 17,462,986,906 | 0.89× | +| `Std.Time.Week.Offset.ofMilliseconds` | Std | 24,577,209,792 | 14,799,116,898 | 0.60× | +| `Vector.extract_append` | Init | 61,830,646,478 | 26,830,910,000 | 0.43× | + +**Coverage:** 65 of 98 constants predicted (33 without a native profile). In-sample MAPE 9.3%. +Worst residuals are the documented structural outliers — `Vector.extract_append`, the append-proof family, and the big-Nat-arithmetic (`Std.Time.*`) family; see `aiur-fft-cost-model.md` §5. diff --git a/Benchmarks/Statistics/docs/zisk-prove-validation.md b/Benchmarks/Statistics/docs/zisk-prove-validation.md new file mode 100644 index 00000000..f836bb6f --- /dev/null +++ b/Benchmarks/Statistics/docs/zisk-prove-validation.md @@ -0,0 +1,100 @@ +# Validating the Zisk shard-planner cost model against real GPU proves + +This checks the **actual load-bearing model** in the in-repo planner +(`crates/kernel/src/shard.rs`) against real measurements — it does **not** fit a +new function. The constants below are read straight from `shard.rs` by +`benchstats/shard_model.py`, so the plotted lines are the code's. + +## The model (shard.rs, single source of truth) + +| quantity | formula | shard.rs | +|---|---|---| +| guest STEPS / block | `162_339·hb + 4_276·subst + 652·bytes` | `block_step_cost` (L1633–1650) | +| per-shard STEP floor | `+ 180_000_000` | `SHARD_STEP_FLOOR` (L1638) | +| peak host RAM (GiB) | `50 + 33·B_steps` | `ram_gib_for_steps` (L1667–1676) | +| leaf prove time (s) | `54 + 158·B_steps` | `shard_prove_secs` (L1691–1696) | + +`B_steps` = STEPS / 1e9. The RAM model is the *prediction*; the safety margin is +separate (`RAM_USABLE_FRAC = 0.85` in `cycle_cap_for_ram`). + +## Real data (generated via the CLI, not Python) + +For each single-subject env: real steps + peak RAM via the emulator, then a real +single-leaf GPU prove (RTX PRO 6000 Blackwell, 250 GB host) wrapped in +`/usr/bin/time -v` for peak RSS. The prover default is `--max-witness-stored 5` +— **the same setting the model was calibrated at**, so this is apples-to-apples. + +```bash +# steps (safe, emulator RAM is setup-bound ~30 GB regardless of size): +zisk-host --execute --ixe .ixe +# real leaf prove → steps, prove time; /usr/bin/time -v → peak RAM: +/usr/bin/time -v zisk-host --gpu --ixe .ixe +``` + +`data/zisk/prove_real.csv` — **31 real leaves**, 0.055–4.69 B steps, `witness` +column = `--max-witness-stored`: + +* **8 single-subject** leaves (`witness=5`): `nataddcomm` (54.6 M, 12.5 s, 40.0 GiB) + … `natfoldsucc` (4.689 B, 809 s, **194.3 GiB** — the high-end anchor, proven + under a watchdog). Self-contained programs, no per-shard floor/ingress. +* **6 mergesort shards** (`witness=5`, `kind=sharded`): the real planner regime — + RAM-aware sharded via `ix shard --max-ram 130` → 6 leaves of 0.55–1.66 B, each + carrying the 180 M floor + cross-ingress. They land on the **same** RAM/time + envelope as the single-subject leaves (floor/ingress don't shift RAM). +* **17 leaves at `witness=10`**: 8 single-subject (re-proved, 0.055–4.69 B) + + 3 mergesort + 3 rbmap + 3 binsearch shards (0.15–1.66 B) — all clean (no OOM). + +Multi-shard inputs are profiled (`ix profile`) and sharded RAM-aware +(`ix shard --max-ram`), then proved per leaf (`zisk-host --gpu --shard-plan +--only-shard k`). Also salvaged: `data/zisk/multishard_failed.csv` (see below). + +## Result (`python3 -m benchstats zisk-prove-validate`) + +![shard.rs model vs real GPU proves](../figures/zisk_prove_validation.png) + +Over all 31 leaves: **RAM `50+33·B` R² = 0.88**, **time `54+158·B` R² = 0.95** +(single-subject leaves alone, with the 4.69 B anchor, give 0.92 / 0.98). + +- **Peak RAM.** The **slope is dead-on**: single-subject envelope is + `(194.3 − 40.0)/(4.689 − 0.055) ≈ 33.3 GiB/Bstep` vs the model's `33`. The gap + is a constant **~12 GiB on the base** (model `50`, measured ≈ `38`) — a safe + upper bound, ~18 % loose on the ~1 B sharded cluster, within ~5 % by 4.7 B. The + deliberate `45+32 → 50+33` bump in `shard.rs` (after an Init shard sized for + 200 GB used 225 GB) is why the base is conservative. +- **Prove time.** The `158 s/Bstep` slope tracks closely from ~0.6 B up. The high + MAPE is **entirely the `54 s` intercept**: a cold-start cost, but warm leaves + prove in 12–30 s, so the model over-predicts small leaves while nailing large + ones (4.69 B: model 795 s vs real 809 s). Wall time (incl. setup) sits between. + +### `--max-witness-stored 10` adds a flat ~3 GiB, independent of leaf size + +Re-proving **all 8 single-subject envs** (and 3 mergesort shards) at witness=10 +raised peak RAM by a **constant ≈ 3.3 GiB** — the linear fit of the witness delta +is `Δ ≈ 3.3 + 0.0·Bsteps`, i.e. **no size dependence**. The decisive point is the +4.69 B `natfoldsucc` leaf: **194.3 GiB at w5 → 196.7 GiB at w10 (+2.4)**. The +witness queue depth is not the RAM driver — base trace memory is — so `50+33·B` +covers witness=10 within its ~12 GiB conservative base and **no recalibration is +needed**. (Leaf prove time is if anything marginally *lower* at w10 — witness +batching.) On the plot the w10 markers sit on top of the w5 ones. + +### Failure modes (`multishard_failed.csv`) — library envs don't shard cleanly + +Sharding the **whole initStd env** (the Vectors `initStd` target) is *not* clean: +its closure contains single atomic `Muts` blocks that can't be split: + +* `hb=83063` (~13.5 B predicted steps): **crashes the prover** during EXECUTE + (too large for one leaf — exceeds the MT-trace ceiling). +* `hb=14807` (~2.4 B predicted): EXECUTE ok, but proving **OOM'd at >235 GB** at + witness=10 — the model predicts ~129 GB, **under-predicting ~2×** for this + block (heartbeats badly underestimate its true trace). + +So heavy *library* constants (the `.heavy` rows in `Vectors.lean`) can't be +cleanly multi-shard-proved on this box; the clean multi-shard data comes from the +baked programs (`mergesort`, `rbmap`, `binsearch`). + +**Takeaway:** both load-bearing slopes (`33 GiB`, `158 s` per B-step) are +validated by real proves — across single-subject and sharded leaves, at +witness=5 and witness=10 — on the calibration hardware. The RAM base is ~12 GiB +conservative; the time intercept is a cold-start artifact; and the model +under-predicts pathological single-atomic-block shards (a planning limit, not a +slope error). `num_shards` and aggregation overhead are not separately measured. diff --git a/Benchmarks/Statistics/figures/aiur_exec_vs_fft.png b/Benchmarks/Statistics/figures/aiur_exec_vs_fft.png new file mode 100644 index 00000000..30c0463c Binary files /dev/null and b/Benchmarks/Statistics/figures/aiur_exec_vs_fft.png differ diff --git a/Benchmarks/Statistics/figures/aiur_features_vs_fft.png b/Benchmarks/Statistics/figures/aiur_features_vs_fft.png new file mode 100644 index 00000000..0dd2a365 Binary files /dev/null and b/Benchmarks/Statistics/figures/aiur_features_vs_fft.png differ diff --git a/Benchmarks/Statistics/figures/aiur_prove_vs_fft.png b/Benchmarks/Statistics/figures/aiur_prove_vs_fft.png new file mode 100644 index 00000000..b1a17966 Binary files /dev/null and b/Benchmarks/Statistics/figures/aiur_prove_vs_fft.png differ diff --git a/Benchmarks/Statistics/figures/zisk_multi_features_vs_cycles.png b/Benchmarks/Statistics/figures/zisk_multi_features_vs_cycles.png new file mode 100644 index 00000000..c9008235 Binary files /dev/null and b/Benchmarks/Statistics/figures/zisk_multi_features_vs_cycles.png differ diff --git a/Benchmarks/Statistics/figures/zisk_prove_validation.png b/Benchmarks/Statistics/figures/zisk_prove_validation.png new file mode 100644 index 00000000..9d8906df Binary files /dev/null and b/Benchmarks/Statistics/figures/zisk_prove_validation.png differ diff --git a/Benchmarks/Statistics/figures/zisk_prove_vs_cycles.png b/Benchmarks/Statistics/figures/zisk_prove_vs_cycles.png new file mode 100644 index 00000000..09497b71 Binary files /dev/null and b/Benchmarks/Statistics/figures/zisk_prove_vs_cycles.png differ diff --git a/Benchmarks/Statistics/figures/zisk_single_features_vs_cycles.png b/Benchmarks/Statistics/figures/zisk_single_features_vs_cycles.png new file mode 100644 index 00000000..3b46b92e Binary files /dev/null and b/Benchmarks/Statistics/figures/zisk_single_features_vs_cycles.png differ diff --git a/flake.nix b/flake.nix index 3188ce4c..ea134608 100644 --- a/flake.nix +++ b/flake.nix @@ -229,6 +229,8 @@ lean.lean-all cargo-deny valgrind + # Python + matplotlib for Benchmarks/Statistics (model & plot bench data) + (python3.withPackages (ps: with ps; [matplotlib])) ]; };