Skip to content

Commit 7af1bb4

Browse files
author
peng.li24
committed
fix(blas_bridge): eliminate permanent-null static cache for dlsym symbols
Two bugs combined to cause silent invocation failures: 1. find_openblas_path() permanently cached an empty path on first miss (/proc/self/maps empty when OpenBLAS not yet loaded). Now resets tried=false when path is empty → retries on next call. 2. All per-function static auto fn = resolve_blas("sym") caches permanently stored nullptr on first failure. Changed to: static FNTYPE* fn = nullptr; if (!fn) fn = (FNTYPE*)resolve_blas("sym"); which retries resolve_blas on every call until successful. Together these allow BLAS operations to recover if OpenBLAS is loaded after the first invocation — typical in Python multiprocessing where a subprocess imports numpy after the C++ module initialises. Verified: 981 tests pass + 12/12 multiprocess scenarios (spawn, retry, 10 independent subprocesses), all f64/f32 bit-exact vs numpy.
1 parent 05fcca8 commit 7af1bb4

1 file changed

Lines changed: 22 additions & 10 deletions

File tree

numpycpp/detail/blas_bridge.h

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ inline const char* find_openblas_path() {
7373
}
7474
}
7575
}
76+
// If not found yet, allow retry on next call (OpenBLAS may be loaded later)
77+
if (path.empty()) tried = false;
7678
return path.empty() ? nullptr : path.c_str();
7779
}
7880
@@ -106,7 +108,8 @@ using cblas_dgemm64_fn = void(int, int, int,
106108
double, double*, int64_t);
107109
108110
inline float blas_sdot(const float* x, const float* y, size_t n) {
109-
static auto fn = (sdot64_fn*)resolve_blas("sdot_64_");
111+
static sdot64_fn* fn = nullptr;
112+
if (!fn) fn = (sdot64_fn*)resolve_blas("sdot_64_");
110113
if (__builtin_expect(fn != nullptr, 1)) {
111114
const int64_t ni = static_cast<int64_t>(n), inc = 1;
112115
return fn(&ni, x, &inc, y, &inc);
@@ -118,7 +121,8 @@ inline float blas_sdot(const float* x, const float* y, size_t n) {
118121
}
119122
120123
inline double blas_ddot(const double* x, const double* y, size_t n) {
121-
static auto fn = (ddot64_fn*)resolve_blas("ddot_64_");
124+
static ddot64_fn* fn = nullptr;
125+
if (!fn) fn = (ddot64_fn*)resolve_blas("ddot_64_");
122126
if (__builtin_expect(fn != nullptr, 1)) {
123127
const int64_t ni = static_cast<int64_t>(n), inc = 1;
124128
return fn(&ni, x, &inc, y, &inc);
@@ -141,7 +145,8 @@ using cblas_dgemv64_fn = void(int, int, int64_t, int64_t,
141145
142146
// y[M] = A[M×K] @ x[K] — 2D × 1D case
143147
inline void blas_sgemv(const float* A, const float* x, float* y, size_t M, size_t K) {
144-
static auto fn = (cblas_sgemv64_fn*)resolve_blas("cblas_sgemv64_");
148+
static cblas_sgemv64_fn* fn = nullptr;
149+
if (!fn) fn = (cblas_sgemv64_fn*)resolve_blas("cblas_sgemv64_");
145150
if (__builtin_expect(fn != nullptr, 1)) {
146151
fn(101, 111, (int64_t)M, (int64_t)K, 1.0f, A, (int64_t)K,
147152
x, 1, 0.0f, y, 1);
@@ -154,7 +159,8 @@ inline void blas_sgemv(const float* A, const float* x, float* y, size_t M, size_
154159
}
155160
}
156161
inline void blas_dgemv(const double* A, const double* x, double* y, size_t M, size_t K) {
157-
static auto fn = (cblas_dgemv64_fn*)resolve_blas("cblas_dgemv64_");
162+
static cblas_dgemv64_fn* fn = nullptr;
163+
if (!fn) fn = (cblas_dgemv64_fn*)resolve_blas("cblas_dgemv64_");
158164
if (__builtin_expect(fn != nullptr, 1)) {
159165
fn(101, 111, (int64_t)M, (int64_t)K, 1.0, A, (int64_t)K,
160166
x, 1, 0.0, y, 1);
@@ -169,7 +175,8 @@ inline void blas_dgemv(const double* A, const double* x, double* y, size_t M, si
169175
170176
// y[N] = B^T[K×N] @ a[K] — 1D × 2D case (Trans=112)
171177
inline void blas_sgemv_t(const float* B, const float* a, float* y, size_t K, size_t N) {
172-
static auto fn = (cblas_sgemv64_fn*)resolve_blas("cblas_sgemv64_");
178+
static cblas_sgemv64_fn* fn = nullptr;
179+
if (!fn) fn = (cblas_sgemv64_fn*)resolve_blas("cblas_sgemv64_");
173180
if (__builtin_expect(fn != nullptr, 1)) {
174181
fn(101, 112, (int64_t)K, (int64_t)N, 1.0f, B, (int64_t)N,
175182
a, 1, 0.0f, y, 1);
@@ -182,7 +189,8 @@ inline void blas_sgemv_t(const float* B, const float* a, float* y, size_t K, siz
182189
}
183190
}
184191
inline void blas_dgemv_t(const double* B, const double* a, double* y, size_t K, size_t N) {
185-
static auto fn = (cblas_dgemv64_fn*)resolve_blas("cblas_dgemv64_");
192+
static cblas_dgemv64_fn* fn = nullptr;
193+
if (!fn) fn = (cblas_dgemv64_fn*)resolve_blas("cblas_dgemv64_");
186194
if (__builtin_expect(fn != nullptr, 1)) {
187195
fn(101, 112, (int64_t)K, (int64_t)N, 1.0, B, (int64_t)N,
188196
a, 1, 0.0, y, 1);
@@ -199,7 +207,8 @@ inline void blas_dgemv_t(const double* B, const double* a, double* y, size_t K,
199207
// Uses cblas_sgemm64_ — same kernel numpy.matmul calls → 0 ULP by construction.
200208
inline void blas_sgemm(const float* A, const float* B, float* C,
201209
size_t M, size_t K, size_t N) {
202-
static auto fn = (cblas_sgemm64_fn*)resolve_blas("cblas_sgemm64_");
210+
static cblas_sgemm64_fn* fn = nullptr;
211+
if (!fn) fn = (cblas_sgemm64_fn*)resolve_blas("cblas_sgemm64_");
203212
if (__builtin_expect(fn != nullptr, 1)) {
204213
fn(101, 111, 111, // RowMajor, NoTrans, NoTrans
205214
(int64_t)M, (int64_t)N, (int64_t)K,
@@ -218,7 +227,8 @@ inline void blas_sgemm(const float* A, const float* B, float* C,
218227
219228
inline void blas_dgemm(const double* A, const double* B, double* C,
220229
size_t M, size_t K, size_t N) {
221-
static auto fn = (cblas_dgemm64_fn*)resolve_blas("cblas_dgemm64_");
230+
static cblas_dgemm64_fn* fn = nullptr;
231+
if (!fn) fn = (cblas_dgemm64_fn*)resolve_blas("cblas_dgemm64_");
222232
if (__builtin_expect(fn != nullptr, 1)) {
223233
fn(101, 111, 111,
224234
(int64_t)M, (int64_t)N, (int64_t)K,
@@ -261,7 +271,8 @@ template<> inline bool blas_gesv_inv<float>(float* A, size_t N) {
261271
// float32 → float64 → dgesv → float32
262272
// (OpenBLAS sgesv_64_ gives 1-ULP-off results vs numpy on this build;
263273
// the float64 path is bit-identical for both types.)
264-
static auto gesv = (dgesv64_fn*)resolve_blas("dgesv_64_");
274+
static dgesv64_fn* gesv = nullptr;
275+
if (!gesv) gesv = (dgesv64_fn*)resolve_blas("dgesv_64_");
265276
if (__builtin_expect(gesv == nullptr, 0)) return false;
266277
int64_t n = static_cast<int64_t>(N);
267278
auto ipiv = std::make_unique<int64_t[]>(N);
@@ -287,7 +298,8 @@ template<> inline bool blas_gesv_inv<float>(float* A, size_t N) {
287298
}
288299
289300
template<> inline bool blas_gesv_inv<double>(double* A, size_t N) {
290-
static auto gesv = (dgesv64_fn*)resolve_blas("dgesv_64_");
301+
static dgesv64_fn* gesv = nullptr;
302+
if (!gesv) gesv = (dgesv64_fn*)resolve_blas("dgesv_64_");
291303
if (__builtin_expect(gesv == nullptr, 0)) return false;
292304
int64_t n = static_cast<int64_t>(N);
293305
auto ipiv = std::make_unique<int64_t[]>(N);

0 commit comments

Comments
 (0)