chore: cherry-pick bench/bench.py + make_csv.py + ulp_precision.csv from master

peng.li24 · peng.li24 · commit 4314ec82e5c2 · 2026-06-07T13:58:25.000+08:00
These three files existed only in the master branch (which diverged at 49647b7 and took a different direction — dropping SVML bridge entirely). bench/bench.py — throughput (GB/s) + max-ULP benchmark: numpycpp vs numpy tests/make_csv.py — generates tests/ulp_precision.csv via ULP distance scan tests/ulp_precision.csv — per-function ULP accuracy data (std backend data) Everything else in master is either: • already superseded by bit-exact's implementation (headers, pycpp, CI) • inferior (152 tests vs 220, no advanced indexing, no SVML bridge) • stale (pycpp/pyproject.toml — already removed) master will be deleted after this commit.
diff --git a/bench/bench.py b/bench/bench.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python3
+"""
+bench/bench.py — numpycpp vs numpy: throughput (GB/s) and ULP accuracy.
+
+CSV columns (stdout):
+  func, dtype, N, numpycpp_ms, numpy_ms, speedup_x, numpycpp_GBps, numpy_GBps, max_ulp_vs_numpy
+
+Usage:
+  # build first (from project root):
+  #   cmake -S tests -B tests/build && cmake --build tests/build
+
+  PYTHONPATH=tests/build python3 bench/bench.py
+  PYTHONPATH=tests/build python3 bench/bench.py > results.csv
+"""
+
+import sys
+import csv
+import timeit
+import numpy as np
+
+try:
+    import numpycpp
+except ImportError:
+    sys.exit("numpycpp not found — set PYTHONPATH=tests/build (or wherever the .so lives)")
+
+# ---------------------------------------------------------------------------
+# Function table: (name, numpy_fn, numpycpp_fn, input_lo, input_hi)
+# ---------------------------------------------------------------------------
+FUNCS = [
+    ("sqrt",   np.sqrt,   numpycpp.sqrt,   0.1, 10.0),
+    ("abs",    np.abs,    numpycpp.abs,   -5.0,  5.0),
+    ("exp",    np.exp,    numpycpp.exp,    0.1,  5.0),
+    ("log",    np.log,    numpycpp.log,    0.1, 10.0),
+    ("sin",    np.sin,    numpycpp.sin,    0.1,  5.0),
+    ("cos",    np.cos,    numpycpp.cos,    0.1,  5.0),
+    ("tan",    np.tan,    numpycpp.tan,    0.1,  1.0),
+    ("arcsin", np.arcsin, numpycpp.arcsin,-0.9,  0.9),
+    ("arccos", np.arccos, numpycpp.arccos,-0.9,  0.9),
+    ("arctan", np.arctan, numpycpp.arctan, 0.1,  5.0),
+    ("cbrt",   np.cbrt,   numpycpp.cbrt,   0.1, 10.0),
+    ("expm1",  np.expm1,  numpycpp.expm1,  0.1,  1.0),
+    ("log1p",  np.log1p,  numpycpp.log1p,  0.1, 10.0),
+    ("log10",  np.log10,  numpycpp.log10,  0.1, 10.0),
+    ("log2",   np.log2,   numpycpp.log2,   0.1, 10.0),
+]
+
+DTYPES  = ["float64", "float32"]
+# sizes from 2^10 to 2^19 (1K … 512K elements)
+SIZES   = [1 << k for k in range(10, 20, 3)]   # 1024, 8192, 65536, 524288
+REPS    = 50    # timeit repeats (take min — eliminates OS scheduling noise)
+NUMBER  = 5     # calls per repeat
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+_rng = np.random.default_rng(42)
+
+def _make(lo: float, hi: float, N: int, dtype: str) -> np.ndarray:
+    return np.ascontiguousarray(_rng.uniform(lo, hi, N).astype(dtype))
+
+def _bench_ms(fn, arr: np.ndarray) -> float:
+    """Return minimum wall-time per call in milliseconds."""
+    t = timeit.repeat(lambda: fn(arr), repeat=REPS, number=NUMBER)
+    return min(t) / NUMBER * 1e3
+
+def _max_ulp(ref: np.ndarray, got: np.ndarray) -> int:
+    """Max absolute ULP difference between two same-dtype arrays."""
+    if ref.dtype == np.float64:
+        return int(np.max(np.abs(ref.view(np.int64) - got.view(np.int64))))
+    return int(np.max(np.abs(ref.view(np.int32) - got.view(np.int32))))
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+def main() -> None:
+    w = csv.writer(sys.stdout)
+    w.writerow([
+        "func", "dtype", "N",
+        "numpycpp_ms", "numpy_ms", "speedup_x",
+        "numpycpp_GBps", "numpy_GBps",
+        "max_ulp_vs_numpy",
+    ])
+
+    for fn_name, np_fn, cpp_fn, lo, hi in FUNCS:
+        for dtype in DTYPES:
+            itemsize = 8 if dtype == "float64" else 4
+            for N in SIZES:
+                arr = _make(lo, hi, N, dtype)
+
+                # warm-up — fill caches, JIT the pybind11 dispatch path
+                cpp_fn(arr); np_fn(arr)
+
+                t_cpp = _bench_ms(cpp_fn, arr)
+                t_np  = _bench_ms(np_fn,  arr)
+
+                gb_cpp  = N * itemsize / t_cpp / 1e6
+                gb_np   = N * itemsize / t_np  / 1e6
+                speedup = t_np / t_cpp
+                ulp     = _max_ulp(np_fn(arr), cpp_fn(arr))
+
+                w.writerow([
+                    fn_name, dtype, N,
+                    f"{t_cpp:.4f}", f"{t_np:.4f}",
+                    f"{speedup:.3f}",
+                    f"{gb_cpp:.2f}", f"{gb_np:.2f}",
+                    ulp,
+                ])
+                sys.stdout.flush()   # stream CSV line-by-line
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/make_csv.py b/tests/make_csv.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+"""Generate tests/ulp_precision.csv — ULP differences: numpycpp vs numpy.
+
+Usage:
+    make csv          # from tests/ directory
+    python3 tests/make_csv.py   # from repo root
+"""
+
+import os, sys, struct, csv
+import numpy as np
+import importlib
+
+# Ensure the tests directory is on sys.path so we can import the C++ module
+_here = os.path.dirname(os.path.abspath(__file__))
+if _here not in sys.path:
+    sys.path.insert(0, _here)
+cpp = importlib.import_module("numpycpp")
+
+
+def ulp_f64(a: float, b: float) -> int:
+    """Signed ULP distance between two float64 values."""
+    if a == b:
+        return 0
+    if np.isnan(a) or np.isnan(b):
+        return 2**63  # sentinel
+    pa = struct.unpack("q", struct.pack("d", float(a)))[0]
+    pb = struct.unpack("q", struct.pack("d", float(b)))[0]
+    if pa < 0: pa = (-pa) ^ 0x7FFFFFFFFFFFFFFF
+    if pb < 0: pb = (-pb) ^ 0x7FFFFFFFFFFFFFFF
+    return abs(pa - pb)
+
+
+def ulp_f32(a: float, b: float) -> int:
+    """Signed ULP distance between two float32 values."""
+    fa, fb = np.float32(a), np.float32(b)
+    if fa == fb:
+        return 0
+    if np.isnan(fa) or np.isnan(fb):
+        return 2**31  # sentinel
+    pa = struct.unpack("i", struct.pack("f", float(fa)))[0]
+    pb = struct.unpack("i", struct.pack("f", float(fb)))[0]
+    if pa < 0: pa = (-pa) ^ 0x7FFFFFFF
+    if pb < 0: pb = (-pb) ^ 0x7FFFFFFF
+    return abs(pa - pb)
+
+
+def measure_unary(cpp_fn, np_fn, prep, dt, ulf, rng, n=100_000):
+    a = rng.randn(n).astype(dt)
+    a = prep(a)
+    cr = np.asarray(getattr(cpp, cpp_fn)(a))
+    pr = np_fn(a)
+    max_u, n_diff = 0, 0
+    for i in range(cr.size):
+        if cr.flat[i] != pr.flat[i]:
+            u = ulf(cr.flat[i], pr.flat[i])
+            if u > max_u:
+                max_u = u
+            n_diff += 1
+    return max_u, n_diff
+
+
+def main():
+    rng = np.random.RandomState(42)
+    N = 100_000
+    ULP_F64 = f"{2**-52:.2e}"
+    ULP_F32 = f"{2**-23:.2e}"
+
+    header = [
+        "function", "dtype", "max_ulp", "n_diff", "total",
+        "category", "ulp_value_f64", "ulp_value_f32",
+    ]
+    rows = []
+
+    # --- Transcendental unary ---
+    TRANS = [
+        ("exp",    np.exp,    lambda a: a),
+        ("log",    np.log,    lambda a: np.abs(a) + 0.1),
+        ("sin",    np.sin,    lambda a: a),
+        ("cos",    np.cos,    lambda a: a),
+        ("tan",    np.tan,    lambda a: a * 0.5),
+        ("cbrt",   np.cbrt,   lambda a: a),
+        ("expm1",  np.expm1,  lambda a: a * 2.0),
+        ("log1p",  np.log1p,  lambda a: np.abs(a) + 0.1),
+        ("log10",  np.log10,  lambda a: np.abs(a) + 0.1),
+        ("log2",   np.log2,   lambda a: np.abs(a) + 0.1),
+        ("arcsin", np.arcsin, lambda a: np.clip(a * 0.5, -1, 1)),
+        ("arccos", np.arccos, lambda a: np.clip(a * 0.5, -1, 1)),
+        ("arctan", np.arctan, lambda a: a),
+    ]
+
+    for cfn, nfn, prep in TRANS:
+        for dt, name, ulf in [
+            (np.float64, "float64", ulp_f64),
+            (np.float32, "float32", ulp_f32),
+        ]:
+            mu, nd = measure_unary(cfn, nfn, prep, dt, ulf, rng, N)
+            rows.append([cfn, name, mu, nd, N, "transcendental", ULP_F64, ULP_F32])
+
+    # --- Element-wise (should be bit-exact) ---
+    ELEM = [
+        ("sqrt",    np.sqrt,    lambda a: np.abs(a)),
+        ("abs",     np.abs,     lambda a: a),
+        ("sign",    np.sign,    lambda a: a),
+        ("round",   np.round,   lambda a: a * 100),
+        ("floor",   np.floor,   lambda a: a * 100),
+        ("ceil",    np.ceil,    lambda a: a * 100),
+        ("degrees", np.degrees, lambda a: a),
+        ("radians", np.radians, lambda a: a),
+    ]
+
+    for cfn, nfn, prep in ELEM:
+        for dt, name, ulf in [
+            (np.float64, "float64", ulp_f64),
+            (np.float32, "float32", ulp_f32),
+        ]:
+            mu, nd = measure_unary(cfn, nfn, prep, dt, ulf, rng, N)
+            rows.append([cfn, name, mu, nd, N, "element-wise", ULP_F64, ULP_F32])
+
+    # --- Binary ---
+    BIN = [
+        ("power",   np.power,   "scalar exponent 2.0"),
+        ("arctan2", np.arctan2,  "scalar 1.0 denominator"),
+        ("hypot",   np.hypot,    "two arrays"),
+    ]
+
+    for cfn, nfn, _desc in BIN:
+        for dt, name, ulf in [
+            (np.float64, "float64", ulp_f64),
+            (np.float32, "float32", ulp_f32),
+        ]:
+            a = rng.randn(N).astype(dt)
+            if cfn == "hypot":
+                b = np.abs(rng.randn(N).astype(dt)) + dt(0.1)
+            elif cfn == "power":
+                b = dt(2.0)
+                a = np.abs(a) + dt(0.01)  # keep positive for fractional exponent
+            else:
+                b = dt(1.0)
+
+            cr = np.asarray(getattr(cpp, cfn)(a, b))
+            pr = nfn(a, b)
+            max_u, n_diff = 0, 0
+            for i in range(cr.size):
+                if cr.flat[i] != pr.flat[i]:
+                    u = ulf(cr.flat[i], pr.flat[i])
+                    if u > max_u:
+                        max_u = u
+                    n_diff += 1
+            rows.append([cfn, name, max_u, n_diff, N, "binary", ULP_F64, ULP_F32])
+
+    csv_path = os.path.join(_here, "ulp_precision.csv")
+    with open(csv_path, "w", newline="") as f:
+        w = csv.writer(f)
+        w.writerow(header)
+        w.writerows(rows)
+
+    print(f"Wrote {len(rows)} rows to {csv_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/ulp_precision.csv b/tests/ulp_precision.csv
@@ -0,0 +1,49 @@
+function,dtype,max_ulp,n_diff,total,category,ulp_value_f64,ulp_value_f32
+exp,float64,1,23643,100000,transcendental,2.22e-16,1.19e-07
+exp,float32,2,39255,100000,transcendental,2.22e-16,1.19e-07
+log,float64,2,25951,100000,transcendental,2.22e-16,1.19e-07
+log,float32,3,26542,100000,transcendental,2.22e-16,1.19e-07
+sin,float64,3,60621,100000,transcendental,2.22e-16,1.19e-07
+sin,float32,1,6407,100000,transcendental,2.22e-16,1.19e-07
+cos,float64,3,65269,100000,transcendental,2.22e-16,1.19e-07
+cos,float32,1,13667,100000,transcendental,2.22e-16,1.19e-07
+tan,float64,3,34341,100000,transcendental,2.22e-16,1.19e-07
+tan,float32,2,23404,100000,transcendental,2.22e-16,1.19e-07
+cbrt,float64,4,57895,100000,transcendental,2.22e-16,1.19e-07
+cbrt,float32,2,35846,100000,transcendental,2.22e-16,1.19e-07
+expm1,float64,2,17840,100000,transcendental,2.22e-16,1.19e-07
+expm1,float32,2,17505,100000,transcendental,2.22e-16,1.19e-07
+log1p,float64,2,29458,100000,transcendental,2.22e-16,1.19e-07
+log1p,float32,2,16940,100000,transcendental,2.22e-16,1.19e-07
+log10,float64,3,28309,100000,transcendental,2.22e-16,1.19e-07
+log10,float32,3,43486,100000,transcendental,2.22e-16,1.19e-07
+log2,float64,2,17853,100000,transcendental,2.22e-16,1.19e-07
+log2,float32,2,17312,100000,transcendental,2.22e-16,1.19e-07
+arcsin,float64,2,19041,100000,transcendental,2.22e-16,1.19e-07
+arcsin,float32,3,19097,100000,transcendental,2.22e-16,1.19e-07
+arccos,float64,2,26077,100000,transcendental,2.22e-16,1.19e-07
+arccos,float32,2,33656,100000,transcendental,2.22e-16,1.19e-07
+arctan,float64,2,25147,100000,transcendental,2.22e-16,1.19e-07
+arctan,float32,2,21313,100000,transcendental,2.22e-16,1.19e-07
+sqrt,float64,0,0,100000,element-wise,2.22e-16,1.19e-07
+sqrt,float32,0,0,100000,element-wise,2.22e-16,1.19e-07
+abs,float64,0,0,100000,element-wise,2.22e-16,1.19e-07
+abs,float32,0,0,100000,element-wise,2.22e-16,1.19e-07
+sign,float64,0,0,100000,element-wise,2.22e-16,1.19e-07
+sign,float32,0,0,100000,element-wise,2.22e-16,1.19e-07
+round,float64,0,0,100000,element-wise,2.22e-16,1.19e-07
+round,float32,0,0,100000,element-wise,2.22e-16,1.19e-07
+floor,float64,0,0,100000,element-wise,2.22e-16,1.19e-07
+floor,float32,0,0,100000,element-wise,2.22e-16,1.19e-07
+ceil,float64,0,0,100000,element-wise,2.22e-16,1.19e-07
+ceil,float32,0,0,100000,element-wise,2.22e-16,1.19e-07
+degrees,float64,0,0,100000,element-wise,2.22e-16,1.19e-07
+degrees,float32,0,0,100000,element-wise,2.22e-16,1.19e-07
+radians,float64,0,0,100000,element-wise,2.22e-16,1.19e-07
+radians,float32,0,0,100000,element-wise,2.22e-16,1.19e-07
+power,float64,0,0,100000,binary,2.22e-16,1.19e-07
+power,float32,0,0,100000,binary,2.22e-16,1.19e-07
+arctan2,float64,0,0,100000,binary,2.22e-16,1.19e-07
+arctan2,float32,0,0,100000,binary,2.22e-16,1.19e-07
+hypot,float64,0,0,100000,binary,2.22e-16,1.19e-07
+hypot,float32,0,0,100000,binary,2.22e-16,1.19e-07