Skip to content

Commit be9d660

Browse files
author
peng.li24
committed
bench: add bench/bench.py (numpycpp vs numpy CSV report); clean tests/CMakeLists.txt
bench/bench.py: - Benchmarks all 15 unary transcendentals (sqrt, abs, exp, log, sin, cos, tan, arcsin, arccos, arctan, cbrt, expm1, log1p, log10, log2) - Both float32 and float64 - Sizes: 1K, 8K, 64K, 512K elements - Reports CSV to stdout: func, dtype, N, numpycpp_ms, numpy_ms, speedup_x, numpycpp_GBps, numpy_GBps, max_ulp_vs_numpy - Usage: PYTHONPATH=tests/build python3 bench/bench.py [> results.csv] tests/CMakeLists.txt: - Remove SVML/npy_* linkage (no longer needed: core.h uses std:: only) - Remove -DNUMPYCPP_USE_NUMPY_MATH, NUMPY_CORE_DIR discovery, umath .so linking, and RPATH — the build is now portable across OS and arch
1 parent c06fd43 commit be9d660

2 files changed

Lines changed: 113 additions & 58 deletions

File tree

bench/bench.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#!/usr/bin/env python3
2+
"""
3+
bench/bench.py — numpycpp vs numpy: throughput (GB/s) and ULP accuracy.
4+
5+
CSV columns (stdout):
6+
func, dtype, N, numpycpp_ms, numpy_ms, speedup_x, numpycpp_GBps, numpy_GBps, max_ulp_vs_numpy
7+
8+
Usage:
9+
# build first (from project root):
10+
# cmake -S tests -B tests/build && cmake --build tests/build
11+
12+
PYTHONPATH=tests/build python3 bench/bench.py
13+
PYTHONPATH=tests/build python3 bench/bench.py > results.csv
14+
"""
15+
16+
import sys
17+
import csv
18+
import timeit
19+
import numpy as np
20+
21+
try:
22+
import numpycpp
23+
except ImportError:
24+
sys.exit("numpycpp not found — set PYTHONPATH=tests/build (or wherever the .so lives)")
25+
26+
# ---------------------------------------------------------------------------
27+
# Function table: (name, numpy_fn, numpycpp_fn, input_lo, input_hi)
28+
# ---------------------------------------------------------------------------
29+
FUNCS = [
30+
("sqrt", np.sqrt, numpycpp.sqrt, 0.1, 10.0),
31+
("abs", np.abs, numpycpp.abs, -5.0, 5.0),
32+
("exp", np.exp, numpycpp.exp, 0.1, 5.0),
33+
("log", np.log, numpycpp.log, 0.1, 10.0),
34+
("sin", np.sin, numpycpp.sin, 0.1, 5.0),
35+
("cos", np.cos, numpycpp.cos, 0.1, 5.0),
36+
("tan", np.tan, numpycpp.tan, 0.1, 1.0),
37+
("arcsin", np.arcsin, numpycpp.arcsin,-0.9, 0.9),
38+
("arccos", np.arccos, numpycpp.arccos,-0.9, 0.9),
39+
("arctan", np.arctan, numpycpp.arctan, 0.1, 5.0),
40+
("cbrt", np.cbrt, numpycpp.cbrt, 0.1, 10.0),
41+
("expm1", np.expm1, numpycpp.expm1, 0.1, 1.0),
42+
("log1p", np.log1p, numpycpp.log1p, 0.1, 10.0),
43+
("log10", np.log10, numpycpp.log10, 0.1, 10.0),
44+
("log2", np.log2, numpycpp.log2, 0.1, 10.0),
45+
]
46+
47+
DTYPES = ["float64", "float32"]
48+
# sizes from 2^10 to 2^19 (1K … 512K elements)
49+
SIZES = [1 << k for k in range(10, 20, 3)] # 1024, 8192, 65536, 524288
50+
REPS = 50 # timeit repeats (take min — eliminates OS scheduling noise)
51+
NUMBER = 5 # calls per repeat
52+
53+
# ---------------------------------------------------------------------------
54+
# Helpers
55+
# ---------------------------------------------------------------------------
56+
_rng = np.random.default_rng(42)
57+
58+
def _make(lo: float, hi: float, N: int, dtype: str) -> np.ndarray:
59+
return np.ascontiguousarray(_rng.uniform(lo, hi, N).astype(dtype))
60+
61+
def _bench_ms(fn, arr: np.ndarray) -> float:
62+
"""Return minimum wall-time per call in milliseconds."""
63+
t = timeit.repeat(lambda: fn(arr), repeat=REPS, number=NUMBER)
64+
return min(t) / NUMBER * 1e3
65+
66+
def _max_ulp(ref: np.ndarray, got: np.ndarray) -> int:
67+
"""Max absolute ULP difference between two same-dtype arrays."""
68+
if ref.dtype == np.float64:
69+
return int(np.max(np.abs(ref.view(np.int64) - got.view(np.int64))))
70+
return int(np.max(np.abs(ref.view(np.int32) - got.view(np.int32))))
71+
72+
# ---------------------------------------------------------------------------
73+
# Main
74+
# ---------------------------------------------------------------------------
75+
def main() -> None:
76+
w = csv.writer(sys.stdout)
77+
w.writerow([
78+
"func", "dtype", "N",
79+
"numpycpp_ms", "numpy_ms", "speedup_x",
80+
"numpycpp_GBps", "numpy_GBps",
81+
"max_ulp_vs_numpy",
82+
])
83+
84+
for fn_name, np_fn, cpp_fn, lo, hi in FUNCS:
85+
for dtype in DTYPES:
86+
itemsize = 8 if dtype == "float64" else 4
87+
for N in SIZES:
88+
arr = _make(lo, hi, N, dtype)
89+
90+
# warm-up — fill caches, JIT the pybind11 dispatch path
91+
cpp_fn(arr); np_fn(arr)
92+
93+
t_cpp = _bench_ms(cpp_fn, arr)
94+
t_np = _bench_ms(np_fn, arr)
95+
96+
gb_cpp = N * itemsize / t_cpp / 1e6
97+
gb_np = N * itemsize / t_np / 1e6
98+
speedup = t_np / t_cpp
99+
ulp = _max_ulp(np_fn(arr), cpp_fn(arr))
100+
101+
w.writerow([
102+
fn_name, dtype, N,
103+
f"{t_cpp:.4f}", f"{t_np:.4f}",
104+
f"{speedup:.3f}",
105+
f"{gb_cpp:.2f}", f"{gb_np:.2f}",
106+
ulp,
107+
])
108+
sys.stdout.flush() # stream CSV line-by-line
109+
110+
if __name__ == "__main__":
111+
main()

tests/CMakeLists.txt

Lines changed: 2 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,6 @@
44
# cmake -S tests -B tests/build
55
# cmake --build tests/build
66
# cmake --build tests/build --target pytest
7-
#
8-
# Precision strategy (no dlopen / no /proc tricks):
9-
# float32 → npy_*f scalar from numpy's _multiarray_umath.so → ≤1 ULP
10-
# float64 → __svml_*8 SVML from numpy's _multiarray_umath.so → 0 ULP
11-
# Both symbol sets are resolved at link-time via -l:<umath.so>.
127

138
cmake_minimum_required(VERSION 3.18)
149
project(numpycpp_tests LANGUAGES CXX)
@@ -35,32 +30,6 @@ find_package(pybind11 REQUIRED CONFIG HINTS "${_PYBIND11_CMAKE_DIR}")
3530
find_package(Eigen3 REQUIRED NO_MODULE)
3631
find_package(OpenMP)
3732

38-
# ---------------------------------------------------------------------------
39-
# Locate numpy's _multiarray_umath.so
40-
# This .so bundles Intel SVML (__svml_exp8 / __svml_sin8 / …) and exports
41-
# numpy's scalar polynomial helpers (npy_expf / npy_sinf / …).
42-
# We link against it directly so core.h's extern "C" declarations resolve at
43-
# link time — no dlopen, no runtime symbol lookup.
44-
# ---------------------------------------------------------------------------
45-
execute_process(
46-
COMMAND "${Python3_EXECUTABLE}" -c
47-
"import os, numpy.core; print(os.path.dirname(numpy.core.__file__))"
48-
OUTPUT_VARIABLE NUMPY_CORE_DIR
49-
OUTPUT_STRIP_TRAILING_WHITESPACE
50-
COMMAND_ERROR_IS_FATAL ANY
51-
)
52-
execute_process(
53-
COMMAND "${Python3_EXECUTABLE}" -c
54-
"import os, glob, numpy.core
55-
d = os.path.dirname(numpy.core.__file__)
56-
print(os.path.basename(glob.glob(os.path.join(d, '_multiarray_umath*.so'))[0]))"
57-
OUTPUT_VARIABLE NUMPY_UMATH_FILENAME
58-
OUTPUT_STRIP_TRAILING_WHITESPACE
59-
COMMAND_ERROR_IS_FATAL ANY
60-
)
61-
message(STATUS "NumPy core : ${NUMPY_CORE_DIR}")
62-
message(STATUS "NumPy umath : ${NUMPY_UMATH_FILENAME}")
63-
6433
# ---------------------------------------------------------------------------
6534
# Python extension module: numpycpp
6635
# ---------------------------------------------------------------------------
@@ -75,11 +44,10 @@ target_include_directories(numpycpp PRIVATE
7544

7645
target_compile_options(numpycpp PRIVATE
7746
-O3
78-
-march=native # enables __AVX512F__ on this machine
47+
-march=native
7948
-fno-math-errno
8049
-fno-trapping-math
81-
-ffp-contract=off # no FMA contraction (keeps results reproducible)
82-
-DNUMPYCPP_USE_NUMPY_MATH # activates npy_* / SVML specialisations in core.h
50+
-ffp-contract=off
8351
)
8452

8553
if(Eigen3_FOUND)
@@ -90,30 +58,6 @@ if(OpenMP_CXX_FOUND)
9058
target_link_libraries(numpycpp PRIVATE OpenMP::OpenMP_CXX)
9159
endif()
9260

93-
# Link against numpy's umath .so to resolve __svml_* and npy_* symbols.
94-
#
95-
# Naming constraints:
96-
# • The file is _multiarray_umath*.so — no "lib" prefix.
97-
# • Passing the absolute path makes CMake generate "-l_multiarray_umath…"
98-
# (strips dir + .so) which ld cannot find.
99-
# • LINK_FLAGS / target_link_options put flags BEFORE object files;
100-
# with GNU ld --as-needed (Ubuntu default) the library is then skipped
101-
# because no symbols are referenced yet → runtime undefined symbols.
102-
#
103-
# Solution: items starting with "-l" inside target_link_libraries() are:
104-
# (a) passed verbatim (no CMake name mangling), and
105-
# (b) placed AFTER object files in the linker command (the link-libraries
106-
# slot, not the link-flags slot) — satisfying --as-needed ordering.
107-
# target_link_directories() adds -L<dir> (safe before objects).
108-
target_link_directories(numpycpp PRIVATE "${NUMPY_CORE_DIR}")
109-
target_link_libraries(numpycpp PRIVATE "-l:${NUMPY_UMATH_FILENAME}")
110-
111-
# Embed the numpy core directory as RPATH so the loader finds the .so at runtime.
112-
set_target_properties(numpycpp PROPERTIES
113-
BUILD_RPATH "${NUMPY_CORE_DIR}"
114-
INSTALL_RPATH "${NUMPY_CORE_DIR}"
115-
)
116-
11761
# ---------------------------------------------------------------------------
11862
# Test targets
11963
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)