Skip to content

Commit 4314ec8

Browse files
author
peng.li24
committed
chore: cherry-pick bench/bench.py + make_csv.py + ulp_precision.csv from master
These three files existed only in the master branch (which diverged at 49647b7 and took a different direction — dropping SVML bridge entirely). bench/bench.py — throughput (GB/s) + max-ULP benchmark: numpycpp vs numpy tests/make_csv.py — generates tests/ulp_precision.csv via ULP distance scan tests/ulp_precision.csv — per-function ULP accuracy data (std backend data) Everything else in master is either: • already superseded by bit-exact's implementation (headers, pycpp, CI) • inferior (152 tests vs 220, no advanced indexing, no SVML bridge) • stale (pycpp/pyproject.toml — already removed) master will be deleted after this commit.
1 parent fa67477 commit 4314ec8

3 files changed

Lines changed: 321 additions & 0 deletions

File tree

bench/bench.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#!/usr/bin/env python3
2+
"""
3+
bench/bench.py — numpycpp vs numpy: throughput (GB/s) and ULP accuracy.
4+
5+
CSV columns (stdout):
6+
func, dtype, N, numpycpp_ms, numpy_ms, speedup_x, numpycpp_GBps, numpy_GBps, max_ulp_vs_numpy
7+
8+
Usage:
9+
# build first (from project root):
10+
# cmake -S tests -B tests/build && cmake --build tests/build
11+
12+
PYTHONPATH=tests/build python3 bench/bench.py
13+
PYTHONPATH=tests/build python3 bench/bench.py > results.csv
14+
"""
15+
16+
import sys
17+
import csv
18+
import timeit
19+
import numpy as np
20+
21+
try:
22+
import numpycpp
23+
except ImportError:
24+
sys.exit("numpycpp not found — set PYTHONPATH=tests/build (or wherever the .so lives)")
25+
26+
# ---------------------------------------------------------------------------
27+
# Function table: (name, numpy_fn, numpycpp_fn, input_lo, input_hi)
28+
# ---------------------------------------------------------------------------
29+
FUNCS = [
30+
("sqrt", np.sqrt, numpycpp.sqrt, 0.1, 10.0),
31+
("abs", np.abs, numpycpp.abs, -5.0, 5.0),
32+
("exp", np.exp, numpycpp.exp, 0.1, 5.0),
33+
("log", np.log, numpycpp.log, 0.1, 10.0),
34+
("sin", np.sin, numpycpp.sin, 0.1, 5.0),
35+
("cos", np.cos, numpycpp.cos, 0.1, 5.0),
36+
("tan", np.tan, numpycpp.tan, 0.1, 1.0),
37+
("arcsin", np.arcsin, numpycpp.arcsin,-0.9, 0.9),
38+
("arccos", np.arccos, numpycpp.arccos,-0.9, 0.9),
39+
("arctan", np.arctan, numpycpp.arctan, 0.1, 5.0),
40+
("cbrt", np.cbrt, numpycpp.cbrt, 0.1, 10.0),
41+
("expm1", np.expm1, numpycpp.expm1, 0.1, 1.0),
42+
("log1p", np.log1p, numpycpp.log1p, 0.1, 10.0),
43+
("log10", np.log10, numpycpp.log10, 0.1, 10.0),
44+
("log2", np.log2, numpycpp.log2, 0.1, 10.0),
45+
]
46+
47+
DTYPES = ["float64", "float32"]
48+
# sizes from 2^10 to 2^19 (1K … 512K elements)
49+
SIZES = [1 << k for k in range(10, 20, 3)] # 1024, 8192, 65536, 524288
50+
REPS = 50 # timeit repeats (take min — eliminates OS scheduling noise)
51+
NUMBER = 5 # calls per repeat
52+
53+
# ---------------------------------------------------------------------------
54+
# Helpers
55+
# ---------------------------------------------------------------------------
56+
_rng = np.random.default_rng(42)
57+
58+
def _make(lo: float, hi: float, N: int, dtype: str) -> np.ndarray:
59+
return np.ascontiguousarray(_rng.uniform(lo, hi, N).astype(dtype))
60+
61+
def _bench_ms(fn, arr: np.ndarray) -> float:
62+
"""Return minimum wall-time per call in milliseconds."""
63+
t = timeit.repeat(lambda: fn(arr), repeat=REPS, number=NUMBER)
64+
return min(t) / NUMBER * 1e3
65+
66+
def _max_ulp(ref: np.ndarray, got: np.ndarray) -> int:
67+
"""Max absolute ULP difference between two same-dtype arrays."""
68+
if ref.dtype == np.float64:
69+
return int(np.max(np.abs(ref.view(np.int64) - got.view(np.int64))))
70+
return int(np.max(np.abs(ref.view(np.int32) - got.view(np.int32))))
71+
72+
# ---------------------------------------------------------------------------
73+
# Main
74+
# ---------------------------------------------------------------------------
75+
def main() -> None:
76+
w = csv.writer(sys.stdout)
77+
w.writerow([
78+
"func", "dtype", "N",
79+
"numpycpp_ms", "numpy_ms", "speedup_x",
80+
"numpycpp_GBps", "numpy_GBps",
81+
"max_ulp_vs_numpy",
82+
])
83+
84+
for fn_name, np_fn, cpp_fn, lo, hi in FUNCS:
85+
for dtype in DTYPES:
86+
itemsize = 8 if dtype == "float64" else 4
87+
for N in SIZES:
88+
arr = _make(lo, hi, N, dtype)
89+
90+
# warm-up — fill caches, JIT the pybind11 dispatch path
91+
cpp_fn(arr); np_fn(arr)
92+
93+
t_cpp = _bench_ms(cpp_fn, arr)
94+
t_np = _bench_ms(np_fn, arr)
95+
96+
gb_cpp = N * itemsize / t_cpp / 1e6
97+
gb_np = N * itemsize / t_np / 1e6
98+
speedup = t_np / t_cpp
99+
ulp = _max_ulp(np_fn(arr), cpp_fn(arr))
100+
101+
w.writerow([
102+
fn_name, dtype, N,
103+
f"{t_cpp:.4f}", f"{t_np:.4f}",
104+
f"{speedup:.3f}",
105+
f"{gb_cpp:.2f}", f"{gb_np:.2f}",
106+
ulp,
107+
])
108+
sys.stdout.flush() # stream CSV line-by-line
109+
110+
if __name__ == "__main__":
111+
main()

tests/make_csv.py

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#!/usr/bin/env python3
2+
"""Generate tests/ulp_precision.csv — ULP differences: numpycpp vs numpy.
3+
4+
Usage:
5+
make csv # from tests/ directory
6+
python3 tests/make_csv.py # from repo root
7+
"""
8+
9+
import os, sys, struct, csv
10+
import numpy as np
11+
import importlib
12+
13+
# Ensure the tests directory is on sys.path so we can import the C++ module
14+
_here = os.path.dirname(os.path.abspath(__file__))
15+
if _here not in sys.path:
16+
sys.path.insert(0, _here)
17+
cpp = importlib.import_module("numpycpp")
18+
19+
20+
def ulp_f64(a: float, b: float) -> int:
21+
"""Signed ULP distance between two float64 values."""
22+
if a == b:
23+
return 0
24+
if np.isnan(a) or np.isnan(b):
25+
return 2**63 # sentinel
26+
pa = struct.unpack("q", struct.pack("d", float(a)))[0]
27+
pb = struct.unpack("q", struct.pack("d", float(b)))[0]
28+
if pa < 0: pa = (-pa) ^ 0x7FFFFFFFFFFFFFFF
29+
if pb < 0: pb = (-pb) ^ 0x7FFFFFFFFFFFFFFF
30+
return abs(pa - pb)
31+
32+
33+
def ulp_f32(a: float, b: float) -> int:
34+
"""Signed ULP distance between two float32 values."""
35+
fa, fb = np.float32(a), np.float32(b)
36+
if fa == fb:
37+
return 0
38+
if np.isnan(fa) or np.isnan(fb):
39+
return 2**31 # sentinel
40+
pa = struct.unpack("i", struct.pack("f", float(fa)))[0]
41+
pb = struct.unpack("i", struct.pack("f", float(fb)))[0]
42+
if pa < 0: pa = (-pa) ^ 0x7FFFFFFF
43+
if pb < 0: pb = (-pb) ^ 0x7FFFFFFF
44+
return abs(pa - pb)
45+
46+
47+
def measure_unary(cpp_fn, np_fn, prep, dt, ulf, rng, n=100_000):
48+
a = rng.randn(n).astype(dt)
49+
a = prep(a)
50+
cr = np.asarray(getattr(cpp, cpp_fn)(a))
51+
pr = np_fn(a)
52+
max_u, n_diff = 0, 0
53+
for i in range(cr.size):
54+
if cr.flat[i] != pr.flat[i]:
55+
u = ulf(cr.flat[i], pr.flat[i])
56+
if u > max_u:
57+
max_u = u
58+
n_diff += 1
59+
return max_u, n_diff
60+
61+
62+
def main():
63+
rng = np.random.RandomState(42)
64+
N = 100_000
65+
ULP_F64 = f"{2**-52:.2e}"
66+
ULP_F32 = f"{2**-23:.2e}"
67+
68+
header = [
69+
"function", "dtype", "max_ulp", "n_diff", "total",
70+
"category", "ulp_value_f64", "ulp_value_f32",
71+
]
72+
rows = []
73+
74+
# --- Transcendental unary ---
75+
TRANS = [
76+
("exp", np.exp, lambda a: a),
77+
("log", np.log, lambda a: np.abs(a) + 0.1),
78+
("sin", np.sin, lambda a: a),
79+
("cos", np.cos, lambda a: a),
80+
("tan", np.tan, lambda a: a * 0.5),
81+
("cbrt", np.cbrt, lambda a: a),
82+
("expm1", np.expm1, lambda a: a * 2.0),
83+
("log1p", np.log1p, lambda a: np.abs(a) + 0.1),
84+
("log10", np.log10, lambda a: np.abs(a) + 0.1),
85+
("log2", np.log2, lambda a: np.abs(a) + 0.1),
86+
("arcsin", np.arcsin, lambda a: np.clip(a * 0.5, -1, 1)),
87+
("arccos", np.arccos, lambda a: np.clip(a * 0.5, -1, 1)),
88+
("arctan", np.arctan, lambda a: a),
89+
]
90+
91+
for cfn, nfn, prep in TRANS:
92+
for dt, name, ulf in [
93+
(np.float64, "float64", ulp_f64),
94+
(np.float32, "float32", ulp_f32),
95+
]:
96+
mu, nd = measure_unary(cfn, nfn, prep, dt, ulf, rng, N)
97+
rows.append([cfn, name, mu, nd, N, "transcendental", ULP_F64, ULP_F32])
98+
99+
# --- Element-wise (should be bit-exact) ---
100+
ELEM = [
101+
("sqrt", np.sqrt, lambda a: np.abs(a)),
102+
("abs", np.abs, lambda a: a),
103+
("sign", np.sign, lambda a: a),
104+
("round", np.round, lambda a: a * 100),
105+
("floor", np.floor, lambda a: a * 100),
106+
("ceil", np.ceil, lambda a: a * 100),
107+
("degrees", np.degrees, lambda a: a),
108+
("radians", np.radians, lambda a: a),
109+
]
110+
111+
for cfn, nfn, prep in ELEM:
112+
for dt, name, ulf in [
113+
(np.float64, "float64", ulp_f64),
114+
(np.float32, "float32", ulp_f32),
115+
]:
116+
mu, nd = measure_unary(cfn, nfn, prep, dt, ulf, rng, N)
117+
rows.append([cfn, name, mu, nd, N, "element-wise", ULP_F64, ULP_F32])
118+
119+
# --- Binary ---
120+
BIN = [
121+
("power", np.power, "scalar exponent 2.0"),
122+
("arctan2", np.arctan2, "scalar 1.0 denominator"),
123+
("hypot", np.hypot, "two arrays"),
124+
]
125+
126+
for cfn, nfn, _desc in BIN:
127+
for dt, name, ulf in [
128+
(np.float64, "float64", ulp_f64),
129+
(np.float32, "float32", ulp_f32),
130+
]:
131+
a = rng.randn(N).astype(dt)
132+
if cfn == "hypot":
133+
b = np.abs(rng.randn(N).astype(dt)) + dt(0.1)
134+
elif cfn == "power":
135+
b = dt(2.0)
136+
a = np.abs(a) + dt(0.01) # keep positive for fractional exponent
137+
else:
138+
b = dt(1.0)
139+
140+
cr = np.asarray(getattr(cpp, cfn)(a, b))
141+
pr = nfn(a, b)
142+
max_u, n_diff = 0, 0
143+
for i in range(cr.size):
144+
if cr.flat[i] != pr.flat[i]:
145+
u = ulf(cr.flat[i], pr.flat[i])
146+
if u > max_u:
147+
max_u = u
148+
n_diff += 1
149+
rows.append([cfn, name, max_u, n_diff, N, "binary", ULP_F64, ULP_F32])
150+
151+
csv_path = os.path.join(_here, "ulp_precision.csv")
152+
with open(csv_path, "w", newline="") as f:
153+
w = csv.writer(f)
154+
w.writerow(header)
155+
w.writerows(rows)
156+
157+
print(f"Wrote {len(rows)} rows to {csv_path}")
158+
159+
160+
if __name__ == "__main__":
161+
main()

tests/ulp_precision.csv

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
function,dtype,max_ulp,n_diff,total,category,ulp_value_f64,ulp_value_f32
2+
exp,float64,1,23643,100000,transcendental,2.22e-16,1.19e-07
3+
exp,float32,2,39255,100000,transcendental,2.22e-16,1.19e-07
4+
log,float64,2,25951,100000,transcendental,2.22e-16,1.19e-07
5+
log,float32,3,26542,100000,transcendental,2.22e-16,1.19e-07
6+
sin,float64,3,60621,100000,transcendental,2.22e-16,1.19e-07
7+
sin,float32,1,6407,100000,transcendental,2.22e-16,1.19e-07
8+
cos,float64,3,65269,100000,transcendental,2.22e-16,1.19e-07
9+
cos,float32,1,13667,100000,transcendental,2.22e-16,1.19e-07
10+
tan,float64,3,34341,100000,transcendental,2.22e-16,1.19e-07
11+
tan,float32,2,23404,100000,transcendental,2.22e-16,1.19e-07
12+
cbrt,float64,4,57895,100000,transcendental,2.22e-16,1.19e-07
13+
cbrt,float32,2,35846,100000,transcendental,2.22e-16,1.19e-07
14+
expm1,float64,2,17840,100000,transcendental,2.22e-16,1.19e-07
15+
expm1,float32,2,17505,100000,transcendental,2.22e-16,1.19e-07
16+
log1p,float64,2,29458,100000,transcendental,2.22e-16,1.19e-07
17+
log1p,float32,2,16940,100000,transcendental,2.22e-16,1.19e-07
18+
log10,float64,3,28309,100000,transcendental,2.22e-16,1.19e-07
19+
log10,float32,3,43486,100000,transcendental,2.22e-16,1.19e-07
20+
log2,float64,2,17853,100000,transcendental,2.22e-16,1.19e-07
21+
log2,float32,2,17312,100000,transcendental,2.22e-16,1.19e-07
22+
arcsin,float64,2,19041,100000,transcendental,2.22e-16,1.19e-07
23+
arcsin,float32,3,19097,100000,transcendental,2.22e-16,1.19e-07
24+
arccos,float64,2,26077,100000,transcendental,2.22e-16,1.19e-07
25+
arccos,float32,2,33656,100000,transcendental,2.22e-16,1.19e-07
26+
arctan,float64,2,25147,100000,transcendental,2.22e-16,1.19e-07
27+
arctan,float32,2,21313,100000,transcendental,2.22e-16,1.19e-07
28+
sqrt,float64,0,0,100000,element-wise,2.22e-16,1.19e-07
29+
sqrt,float32,0,0,100000,element-wise,2.22e-16,1.19e-07
30+
abs,float64,0,0,100000,element-wise,2.22e-16,1.19e-07
31+
abs,float32,0,0,100000,element-wise,2.22e-16,1.19e-07
32+
sign,float64,0,0,100000,element-wise,2.22e-16,1.19e-07
33+
sign,float32,0,0,100000,element-wise,2.22e-16,1.19e-07
34+
round,float64,0,0,100000,element-wise,2.22e-16,1.19e-07
35+
round,float32,0,0,100000,element-wise,2.22e-16,1.19e-07
36+
floor,float64,0,0,100000,element-wise,2.22e-16,1.19e-07
37+
floor,float32,0,0,100000,element-wise,2.22e-16,1.19e-07
38+
ceil,float64,0,0,100000,element-wise,2.22e-16,1.19e-07
39+
ceil,float32,0,0,100000,element-wise,2.22e-16,1.19e-07
40+
degrees,float64,0,0,100000,element-wise,2.22e-16,1.19e-07
41+
degrees,float32,0,0,100000,element-wise,2.22e-16,1.19e-07
42+
radians,float64,0,0,100000,element-wise,2.22e-16,1.19e-07
43+
radians,float32,0,0,100000,element-wise,2.22e-16,1.19e-07
44+
power,float64,0,0,100000,binary,2.22e-16,1.19e-07
45+
power,float32,0,0,100000,binary,2.22e-16,1.19e-07
46+
arctan2,float64,0,0,100000,binary,2.22e-16,1.19e-07
47+
arctan2,float32,0,0,100000,binary,2.22e-16,1.19e-07
48+
hypot,float64,0,0,100000,binary,2.22e-16,1.19e-07
49+
hypot,float32,0,0,100000,binary,2.22e-16,1.19e-07

0 commit comments

Comments
 (0)