Skip to content

Commit 5dbf32b

Browse files
author
peng.li24
committed
fix: normalize log domain-error NaN to +qNaN; align einsum vs-matmul tests
Two remaining CI failures (numpy 1.26.4, non-AVX512 runner): 1. test_domain_log_neg [float64] npy_log(-x) returns 0xfff8000000000000 (negative qNaN); numpy 1.26.4 ufunc returns 0x7ff8000000000000 (positive qNaN). Fix: add custom log_f64() in svml_bridge.h that normalises any domain-error NaN (isnan(r) && !isnan(x)) to positive qNaN via memcpy, matching numpy's canonical NaN convention. 2. test_vs_matmul / test_vs_batch_matmul np.einsum('ij,jk->ik') uses numpy's SSE forward mul+add kernel; numpy.matmul / a @ b uses BLAS (cblas_sgemm). They can differ by ±1 ULP for arbitrary inputs — the two tests were comparing against the wrong reference (a @ b instead of np.einsum). Fix: compare cpp.einsum result against np.einsum (same path as test_random, which already passes), not against matmul.
1 parent 1fbc40d commit 5dbf32b

2 files changed

Lines changed: 30 additions & 5 deletions

File tree

numpy/detail/svml_bridge.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,24 @@ inline float atan2_npy_f32(float y, float x) {
288288
#endif
289289

290290
DISPATCH_F64(exp)
291-
DISPATCH_F64(log)
291+
// log_f64: custom — npy_log(-x) returns negative NaN (0xfff8...); numpy's ufunc
292+
// normalizes domain-error NaN to positive qNaN (0x7ff8...). Mirror that here.
293+
inline double log_f64(double x) {
294+
double r;
295+
#ifdef __AVX512F__
296+
r = cpu_has_avx512f() ? log_svml_f64(x) : log_npy_f64(x);
297+
#else
298+
r = log_npy_f64(x);
299+
#endif
300+
// Normalize: domain-error NaN (finite/inf negative input) → positive qNaN
301+
if (__builtin_expect(std::isnan(r) && !std::isnan(x), 0)) {
302+
constexpr uint64_t qnan_bits = 0x7ff8000000000000ULL;
303+
double pos_nan;
304+
std::memcpy(&pos_nan, &qnan_bits, 8);
305+
return pos_nan;
306+
}
307+
return r;
308+
}
292309
// sin_f64: custom — SVML scalar broadcast path loses signed zero (sin(-0)→+0).
293310
// IEEE 754 requires sin(±0) = ±0; preserve sign of zero explicitly.
294311
inline double sin_f64(double x) {

tests/test_all.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,8 +1063,12 @@ def test_random(self, cpp, dtype, i, j, k):
10631063
def test_vs_matmul(self, cpp, dtype):
10641064
a = random_array((4, 5), seed=1, dtype=dtype)
10651065
b = random_array((5, 3), seed=2, dtype=dtype)
1066-
assert_bit_aligned(np.asarray(cpp.einsum("ij,jk->ik", a, b)), a @ b,
1067-
"ij,jk->ik vs matmul")
1066+
# Compare vs np.einsum (same SSE forward mul+add path as our implementation).
1067+
# np.einsum and np.matmul use different BLAS paths and can differ at machine
1068+
# epsilon — the bit-exact contract is with np.einsum, not with matmul.
1069+
assert_bit_aligned(np.asarray(cpp.einsum("ij,jk->ik", a, b)),
1070+
np.einsum("ij,jk->ik", a, b),
1071+
"ij,jk->ik vs np.einsum")
10681072

10691073

10701074
class TestEinsumBijBjkToBik:
@@ -1086,8 +1090,12 @@ def test_random(self, cpp, dtype, batch, i, j, k):
10861090
def test_vs_batch_matmul(self, cpp, dtype):
10871091
a = random_array((4, 5, 6), seed=1, dtype=dtype)
10881092
b = random_array((4, 6, 3), seed=2, dtype=dtype)
1089-
assert_bit_aligned(np.asarray(cpp.einsum("bij,bjk->bik", a, b)), a @ b,
1090-
"bij,bjk->bik vs batch matmul")
1093+
# Compare vs np.einsum (same SSE forward mul+add path as our implementation).
1094+
# np.einsum and np.matmul use different BLAS paths and can differ at machine
1095+
# epsilon — the bit-exact contract is with np.einsum, not with batched matmul.
1096+
assert_bit_aligned(np.asarray(cpp.einsum("bij,bjk->bik", a, b)),
1097+
np.einsum("bij,bjk->bik", a, b),
1098+
"bij,bjk->bik vs np.einsum")
10911099

10921100

10931101
class TestEinsumAijAijToAi:

0 commit comments

Comments
 (0)