From 70e491c10b0a6aac53440c9d3b77c092db928453 Mon Sep 17 00:00:00 2001 From: "Viral B. Shah" Date: Mon, 22 Jun 2026 23:41:57 +0000 Subject: [PATCH] ld80: make powl() thread-safe (#222) powl() in ld80/e_powl.c kept its working scratch values (z, w, W, Wa, Wb, ya, yb, u) in file-scope `static` variables. Two threads calling powl() concurrently clobbered each other's scratch, so a call could return arbitrary garbage -- e.g. the reproducer from the issue, powl(0x8.779021e7c2f81b2p+14982L, -0x8.0021b03e1f821c9p-15097L), which is exactly 1, would instead come back as huge values or inf. Move that scratch to local automatic variables inside powl(). The `volatile` on z is preserved (as a local volatile) because it is load-bearing for the rounding/underflow behaviour, exactly like the twom10000 constant. The math is unchanged. reducl() and powil() already used local automatics and were unaffected. Add test/regression/test-222.c: it spawns 8 threads, each repeatedly computing a different power with a known-exact result, and fails if any call ever deviates. Before the fix this fails essentially every run; after it, it is solid. It skips (77) when long double is not the 80-bit type or when threads cannot be started. The regression Makefile rule gains -pthread (harmless for the single-threaded tests). Co-Authored-By: Claude Opus 4.8 (1M context) --- ld80/e_powl.c | 9 ++- test/Makefile | 4 +- test/regression/test-222.c | 126 +++++++++++++++++++++++++++++++++++++ 3 files changed, 136 insertions(+), 3 deletions(-) create mode 100644 test/regression/test-222.c diff --git a/ld80/e_powl.c b/ld80/e_powl.c index dcb2b455..a376be6a 100644 --- a/ld80/e_powl.c +++ b/ld80/e_powl.c @@ -189,8 +189,6 @@ static long double R[] = { static const long double MAXLOGL = 1.1356523406294143949492E4L; static const long double MINLOGL = -1.13994985314888605586758E4L; static const long double LOGE2L = 6.9314718055994530941723E-1L; -static volatile long double z; -static long double w, W, Wa, Wb, ya, yb, u; static const long double huge = 0x1p10000L; #if 0 /* XXX Prevent gcc from erroneously constant folding this. */ static const long double twom10000 = 0x1p-10000L; @@ -208,6 +206,13 @@ powl(long double x, long double y) int i, nflg, iyflg, yoddint; long e; +/* Local scratch (formerly file-scope statics, which made powl() + * non-reentrant and unsafe to call from multiple threads). The + * volatile on z is load-bearing: it forces the rounding/underflow + * behaviour of the final products, just as twom10000 does below. */ +volatile long double z; +long double w, W, Wa, Wb, ya, yb, u; + if( y == 0.0L ) return( 1.0L ); diff --git a/test/Makefile b/test/Makefile index c5d64e7d..1665f52f 100644 --- a/test/Makefile +++ b/test/Makefile @@ -21,8 +21,10 @@ REGRESSION_BINS := $(REGRESSION_SRCS:.c=) regression-build: $(REGRESSION_BINS) +# -pthread lets thread-safety regressions (e.g. test-222) spawn threads. It is +# harmless for the single-threaded tests and is a no-op stub on glibc >= 2.34. regression/%: regression/%.c regression/regress-util.h - $(CC) $(CPPFLAGS) $(CFLAGS) $(CFLAGS_add) $(LDFLAGS) $(LDFLAGS_arch) $< -D__BSD_VISIBLE -I ../include -I../src $(OPENLIBM_LIB) -o $@ + $(CC) $(CPPFLAGS) $(CFLAGS) $(CFLAGS_add) $(LDFLAGS) $(LDFLAGS_arch) $< -D__BSD_VISIBLE -I ../include -I../src $(OPENLIBM_LIB) -pthread -o $@ bench: bench-syslibm bench-openlibm diff --git a/test/regression/test-222.c b/test/regression/test-222.c new file mode 100644 index 00000000..cebb8ece --- /dev/null +++ b/test/regression/test-222.c @@ -0,0 +1,126 @@ +/* + * Regression test for issue #222: powl() is not thread-safe. + * https://github.com/JuliaMath/openlibm/issues/222 + * + * The ld80 implementation (ld80/e_powl.c) kept its working scratch values + * (z, w, W, Wa, Wb, ya, yb, u) in file-scope `static` variables. When two + * threads ran powl() at the same time they clobbered each other's scratch, + * so a call could return arbitrary garbage -- e.g. the reproducer below, + * powl(0x8.779021e7c2f81b2p+14982L, -0x8.0021b03e1f821c9p-15097L), which is + * exactly 1, would instead come back as huge values or inf. + * + * The fix moves that scratch to local automatic variables. This test drives + * several threads, each repeatedly computing a *different* power whose result + * is known exactly, and fails if any call ever deviates. With the bug the + * cross-thread contention is detected essentially every run; after the fix it + * is rock solid. + * + * Skips (exit 77) where it cannot apply: when long double is not the 80-bit + * extended type this file targets, or when threads cannot be started. + */ +#include +#include + +#include "regress-util.h" + +/* + * ld80-specific: the race was in ld80/e_powl.c, which openlibm builds only on + * x86. Restrict to the 80-bit format so every other arch compiles to a bare + * skip with no reference to powl (it may be absent, breaking the link) and the + * pthread code is only built where the test actually runs (natively, never + * under cross-qemu). + */ +#if LDBL_MANT_DIG != 64 + +int +main(void) +{ + return REGRESS_SKIP; /* not the 80-bit long double powl */ +} + +#else + +#include + +#define NTHREADS 8 +#define NITERS 20000 + +/* + * Each job is a (base, exponent, expected) triple chosen so the result is + * representable exactly in 80-bit long double and goes through the full + * mantissa/log path of powl() (rather than an early special-case return), + * which is where the shared scratch lived. Mixing distinct values across + * threads is what makes the old race observable. + */ +struct job { + long double x; + long double y; + long double expect; +}; + +static const struct job jobs[NTHREADS] = { + /* the exact reproducer from the issue: result is 1 */ + { 0x8.779021e7c2f81b2p+14982L, -0x8.0021b03e1f821c9p-15097L, 1.0L }, + { 2.0L, 10.0L, 1024.0L }, + { 3.0L, 7.0L, 2187.0L }, + { 1.5L, 4.0L, 5.0625L }, + { 10.0L, 3.0L, 1000.0L }, + { 7.0L, 2.0L, 49.0L }, + { 5.0L, 3.0L, 125.0L }, + { 2.0L, -3.0L, 0.125L }, +}; + +static volatile int failed; + +static void * +worker(void *arg) +{ + const struct job *j = (const struct job *)arg; + long i; + + for (i = 0; i < NITERS; i++) { + long double r = powl(j->x, j->y); + if (r != j->expect) { + fprintf(stderr, + " powl(%.20Lg, %.20Lg) = %.20Lg, expected %.20Lg\n", + j->x, j->y, r, j->expect); + failed = 1; + return NULL; + } + } + return NULL; +} + +int +main(void) +{ + pthread_t t[NTHREADS]; + int i, started; + + /* Sanity: every job must be correct single-threaded first. */ + for (i = 0; i < NTHREADS; i++) + CHECK(powl(jobs[i].x, jobs[i].y) == jobs[i].expect); + + started = 0; + for (i = 0; i < NTHREADS; i++) { + if (pthread_create(&t[i], NULL, worker, + (void *)&jobs[i]) != 0) + break; + started++; + } + + if (started < 2) { + /* Could not get real concurrency; nothing meaningful to test. */ + for (i = 0; i < started; i++) + pthread_join(t[i], NULL); + return REGRESS_SKIP; + } + + for (i = 0; i < started; i++) + pthread_join(t[i], NULL); + + CHECK(failed == 0); + return 0; +} + +#endif /* LDBL_MANT_DIG */