From c21d8822f5ecbee350433c08e6b4f8deb099afc5 Mon Sep 17 00:00:00 2001
From: Max042004 <sau525@gmail.com>
Date: Sat, 6 Jun 2026 12:11:34 +0800
Subject: [PATCH 1/2] Fix cross-process signal delivery to EL0-preempted guests

test-fork's phase-2 signal child spins in `while (!got_usr1) usleep()`
waiting for a SIGUSR1 sent cross-process by its parent. The signal was
delivered only ~35% of the time and lost the rest, so `make check` hung at
test-fork until the 60s per-test timeout -- often longer, as leaked
`elfuse --fork-child` orphans kept the driver's stdout pipe open.

Two complementary defects:

1. vcpu_run_loop treated HV_EXIT_REASON_UNKNOWN as fatal. A host SIGUSR2
   (the cross-process guest-signal transport) that interrupts hv_vcpu_run
   mid-execution aborts the run with UNKNOWN rather than the clean CANCELED
   that hv_vcpus_exit() produces for a vCPU caught between runs. Route
   UNKNOWN through the same cancellation handling so the already-queued
   guest signal is delivered instead of crashing the child.

2. signal_deliver redirected to the handler only via ELR_EL1, which takes
   effect solely on an ERET from EL1 (the syscall-return path, gated by the
   shim's X8==2 exec_drop_frame marker). When the signal is delivered from
   the cancellation branch -- i.e. the vCPU was preempted while running EL0
   code (cross-process SIGUSR2, or SIGALRM in a tight loop) -- there is no
   pending ERET, the resume uses HV_REG_PC, and the ELR_EL1 write is a
   no-op: the handler never runs and only the X0=signum clobber lands,
   re-running the interrupted nanosleep with a bogus arg and spinning
   forever. Detect EL0 preemption from the live PSTATE (CPSR M[3:0]==0),
   save the interrupted PC from HV_REG_PC instead of the stale ELR_EL1, and
   redirect HV_REG_PC/CPSR directly; skip the X8==2 marker since there is no
   shim frame to drop.

test-fork now passes 20/20 (was ~7/20); `make check` is green with no hang.
---
 src/syscall/proc.c   | 13 ++++++++++++-
 src/syscall/signal.c | 42 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/src/syscall/proc.c b/src/syscall/proc.c
index 647bab8..f79720f 100644
--- a/src/syscall/proc.c
+++ b/src/syscall/proc.c
@@ -1941,11 +1941,22 @@ int vcpu_run_loop(hv_vcpu_t vcpu,
                 exit_code = 128;
                 running = false;
             }
-        } else if (vexit->reason == HV_EXIT_REASON_CANCELED) {
+        } else if (vexit->reason == HV_EXIT_REASON_CANCELED ||
+                   vexit->reason == HV_EXIT_REASON_UNKNOWN) {
             /* Canceled by hv_vcpus_exit(). Can be: alarm timeout,
              * exit_group from another thread, or signal preemption
              * (signal_queue called hv_vcpus_exit to deliver a signal
              * while the guest was in a tight loop).
+             *
+             * HV_EXIT_REASON_UNKNOWN is the same event seen from the other
+             * side of a race: when a host signal (e.g. the SIGUSR2 used by the
+             * cross-process guest-signal transport) is delivered to this thread
+             * while it is actively executing guest code inside hv_vcpu_run, the
+             * run aborts with UNKNOWN instead of the clean CANCELED that
+             * hv_vcpus_exit() produces for a vCPU caught between runs. The
+             * pending guest signal has already been drained and queued, so it
+             * is fully deliverable -- fall through to the same handling and
+             * resume rather than treating it as a fatal unexpected exit.
              */
             if (is_main && g_timed_out) {
                 /* Timeout already handled above the exception switch --
diff --git a/src/syscall/signal.c b/src/syscall/signal.c
index 5e4c820..5a25194 100644
--- a/src/syscall/signal.c
+++ b/src/syscall/signal.c
@@ -1386,14 +1386,35 @@ int signal_deliver(hv_vcpu_t vcpu, guest_t *g, int *exit_code)
 
     /* Deliver to user handler: build rt_sigframe on guest stack */
 
-    /* 1. Save current vCPU state */
+    /* 1. Save current vCPU state.
+     *
+     * ELR_EL1/SPSR_EL1 hold the interrupted EL0 return state only while the
+     * guest is unwinding a syscall (it is at EL1 in the shim, about to ERET).
+     * When the vCPU was preempted while executing EL0 code -- a tight compute
+     * loop interrupted by SIGALRM, or the cross-process guest-signal transport
+     * (SIGUSR2) firing mid-execution -- the live interrupted state is in
+     * HV_REG_PC / HV_REG_CPSR and ELR_EL1 is stale from the previous syscall.
+     * Redirecting via ELR_EL1 alone is then a no-op because the resume uses
+     * HV_REG_PC, so the handler never runs and the X0..X2 writes below clobber
+     * the interrupted registers instead. Detect the EL0-preemption case from
+     * the live PSTATE (M[3:0]==0 => EL0t) and use PC for both save and
+     * redirect.
+     */
     uint64_t saved_regs[31];
     uint64_t saved_sp, saved_pc, saved_pstate;
+    uint64_t cur_cpsr = 0;
+    hv_vcpu_get_reg(vcpu, HV_REG_CPSR, &cur_cpsr);
+    bool el0_preempt = (cur_cpsr & 0xfULL) == 0;
 
     vcpu_snapshot_gprs(vcpu, saved_regs);
     saved_sp = vcpu_get_sysreg(vcpu, HV_SYS_REG_SP_EL0);
-    saved_pc = vcpu_get_sysreg(vcpu, HV_SYS_REG_ELR_EL1);
-    saved_pstate = vcpu_get_sysreg(vcpu, HV_SYS_REG_SPSR_EL1);
+    if (el0_preempt) {
+        hv_vcpu_get_reg(vcpu, HV_REG_PC, &saved_pc);
+        saved_pstate = cur_cpsr;
+    } else {
+        saved_pc = vcpu_get_sysreg(vcpu, HV_SYS_REG_ELR_EL1);
+        saved_pstate = vcpu_get_sysreg(vcpu, HV_SYS_REG_SPSR_EL1);
+    }
 
     /* 1b. rseq abort: if the thread is in a restartable sequence critical
      * section, abort it. Linux does this on every signal delivery.
@@ -1549,6 +1570,16 @@ int signal_deliver(hv_vcpu_t vcpu, guest_t *g, int *exit_code)
     /* SPSR_EL1: EL0t (user mode) */
     hv_vcpu_set_sys_reg(vcpu, HV_SYS_REG_SPSR_EL1, 0);
 
+    /* EL0-preemption delivery: the resume runs from HV_REG_PC, not via an
+     * ERET that consumes ELR_EL1, so redirect the live PC/PSTATE directly.
+     * The ELR_EL1/SPSR_EL1 writes above still cover the rt_sigreturn path,
+     * which unwinds back to EL0 through the shim ERET.
+     */
+    if (el0_preempt) {
+        hv_vcpu_set_reg(vcpu, HV_REG_PC, act->sa_handler);
+        hv_vcpu_set_reg(vcpu, HV_REG_CPSR, 0); /* EL0t */
+    }
+
     /* X0 = signal number */
     hv_vcpu_set_reg(vcpu, HV_REG_X0, (uint64_t) signum);
 
@@ -1590,8 +1621,11 @@ int signal_deliver(hv_vcpu_t vcpu, guest_t *g, int *exit_code)
      * shim still has the interrupted syscall frame on its EL1 stack. Tell it
      * to drop that frame so the handler PC/SP/LR/args installed above are not
      * overwritten before ERET. Fault/BRK delivery paths ignore this marker.
+     * The EL0-preemption path resumes straight into the handler at EL0 with
+     * no shim frame to drop, so the marker is neither needed nor consulted.
      */
-    hv_vcpu_set_reg(vcpu, HV_REG_X8, 2);
+    if (!el0_preempt)
+        hv_vcpu_set_reg(vcpu, HV_REG_X8, 2);
 
     pthread_mutex_unlock(&sig_lock);
     return 1;

From 23b23073bb2fa03a023dbe655629b8c7aa4f5b39 Mon Sep 17 00:00:00 2001
From: Max042004 <sau525@gmail.com>
Date: Sat, 6 Jun 2026 21:09:41 +0800
Subject: [PATCH 2/2] Deliver synchronous fault signals to the faulting thread

A guest exception (SIGSEGV/SIGBUS/SIGILL/SIGFPE/SIGTRAP) was delivered by
calling signal_set_fault_info() (which records si_code/si_addr in a
_Thread_local slot) followed by signal_queue() + signal_deliver(). But
signal_queue() sets a bit in the process-wide pending mask, and every vCPU
thread runs signal_deliver() at its own poll points. So a fault raised by
thread A could be dequeued and delivered by thread B, whose thread-local
fault info is empty -- the siginfo then degrades to si_code=SI_USER with no
si_addr instead of SEGV_MAPERR/si_addr.

A JVM treats a SIGSEGV with si_code=SI_USER as a fatal external signal
rather than a recoverable implicit null check, so javac (heavily
multi-threaded, many null-check faults) crashed with a fatal error report.
Two threads faulting on the same signal also collapsed into a single
bitmask bit, dropping one fault entirely.

Add signal_deliver_fault(), which delivers a given signal directly to the
current thread using its thread-local fault info and never touches the
process-wide pending set. signal_deliver()'s frame-building tail is
factored into deliver_signal_locked() and shared. Route all four
synchronous-fault sites in the vCPU loop (SEGV_ACCERR, BRK->SIGTRAP,
SIGILL, data-abort SIGSEGV) through it.

Add test-fault-signal-mt: N threads each take recoverable SIGSEGVs in a
loop and assert every delivery is SEGV_MAPERR with the right si_addr on the
faulting thread. It fails deterministically (rc=2, fault delivered to a
non-faulting thread) on the old queue-based path and passes on the fix.

(cherry picked from commit 201b4fa0d2ffb2218e252be71930e1cfb9e360a5)
---
 Makefile                     |   6 ++
 src/syscall/proc.c           |  19 +++---
 src/syscall/signal.c         |  89 +++++++++++++++++--------
 src/syscall/signal.h         |  10 +++
 tests/manifest.txt           |   1 +
 tests/test-fault-signal-mt.c | 124 +++++++++++++++++++++++++++++++++++
 6 files changed, 215 insertions(+), 34 deletions(-)
 create mode 100644 tests/test-fault-signal-mt.c

diff --git a/Makefile b/Makefile
index e382cc4..671be28 100644
--- a/Makefile
+++ b/Makefile
@@ -177,6 +177,12 @@ $(BUILD_DIR)/test-scm-creds: tests/test-scm-creds.c | $(BUILD_DIR)
 	@echo "  CROSS   $< (with -lpthread)"
 	$(Q)$(CROSS_COMPILE)gcc -D_GNU_SOURCE -static -O2 -o $@ $< -lpthread
 
+# test-fault-signal-mt spawns pthreads that each take recoverable SIGSEGVs to
+# stress synchronous-fault delivery routing in a multi-threaded guest.
+$(BUILD_DIR)/test-fault-signal-mt: tests/test-fault-signal-mt.c | $(BUILD_DIR)
+	@echo "  CROSS   $< (with -lpthread)"
+	$(Q)$(CROSS_COMPILE)gcc -D_GNU_SOURCE -static -O2 -o $@ $< -lpthread
+
 # test-shim-cred-race spawns a pthread reader while the main thread
 # toggles setresuid; the reader spins on the identity fast path.
 $(BUILD_DIR)/test-shim-cred-race: tests/test-shim-cred-race.c | $(BUILD_DIR)
diff --git a/src/syscall/proc.c b/src/syscall/proc.c
index f79720f..f4defae 100644
--- a/src/syscall/proc.c
+++ b/src/syscall/proc.c
@@ -1454,8 +1454,9 @@ int vcpu_run_loop(hv_vcpu_t vcpu,
                             uint64_t esr;
                             hv_vcpu_get_sys_reg(vcpu, HV_SYS_REG_ESR_EL1, &esr);
                             signal_set_fault_info(LINUX_SEGV_ACCERR, far, esr);
-                            signal_queue(LINUX_SIGSEGV);
-                            int sig_ret = signal_deliver(vcpu, g, &exit_code);
+                            int sig_ret =
+                                signal_deliver_fault(vcpu, g, LINUX_SIGSEGV,
+                                                     &exit_code);
                             if (sig_ret < 0)
                                 running = false;
                             break;
@@ -1538,7 +1539,6 @@ int vcpu_run_loop(hv_vcpu_t vcpu,
                         hv_vcpu_get_sys_reg(vcpu, HV_SYS_REG_ESR_EL1, &brk_esr);
                         signal_set_fault_info(LINUX_TRAP_BRKPT, brk_pc,
                                               brk_esr);
-                        signal_queue(LINUX_SIGTRAP);
                         if (verbose) {
                             uint64_t thread_blocked =
                                 current_thread ? current_thread->blocked
@@ -1550,7 +1550,9 @@ int vcpu_run_loop(hv_vcpu_t vcpu,
                                 (unsigned long long) signal_get_state()
                                     ->pending);
                         }
-                        int sig_ret = signal_deliver(vcpu, g, &exit_code);
+                        int sig_ret =
+                            signal_deliver_fault(vcpu, g, LINUX_SIGTRAP,
+                                                 &exit_code);
                         if (verbose)
                             log_debug("%s: signal_deliver returned %d", prefix,
                                       sig_ret);
@@ -1612,8 +1614,9 @@ int vcpu_run_loop(hv_vcpu_t vcpu,
                                 prefix, (unsigned long long) elr_addr,
                                 (unsigned long long) esr, fault_ec);
                         signal_set_fault_info(LINUX_ILL_ILLOPC, elr_addr, esr);
-                        signal_queue(LINUX_SIGILL);
-                        int sig_ret = signal_deliver(vcpu, g, &exit_code);
+                        int sig_ret =
+                            signal_deliver_fault(vcpu, g, LINUX_SIGILL,
+                                                 &exit_code);
                         /* HVC #11 consumes X8 as the post-fault TLBI opcode.
                          * signal_deliver() may leave it unchanged when no
                          * handler is materialized, or set the syscall-path
@@ -1681,8 +1684,8 @@ int vcpu_run_loop(hv_vcpu_t vcpu,
                             (unsigned long long) esr, fsc, code_name);
                     }
                     signal_set_fault_info(si_code, far_addr, esr);
-                    signal_queue(LINUX_SIGSEGV);
-                    int sig_ret = signal_deliver(vcpu, g, &exit_code);
+                    int sig_ret =
+                        signal_deliver_fault(vcpu, g, LINUX_SIGSEGV, &exit_code);
                     /* HVC #11 consumes X8 as the post-fault TLBI opcode.
                      * signal_deliver() may leave it unchanged when no
                      * handler is materialized, or set the syscall-path
diff --git a/src/syscall/signal.c b/src/syscall/signal.c
index 5a25194..7c917c4 100644
--- a/src/syscall/signal.c
+++ b/src/syscall/signal.c
@@ -1317,37 +1317,27 @@ static void build_sigcontext_reserved(uint8_t *reserved,
     memset(reserved + off, 0, 8);
 }
 
-int signal_deliver(hv_vcpu_t vcpu, guest_t *g, int *exit_code)
+/* Build and install the rt_sigframe for `signum` on the current thread, with
+ * sig_lock held on entry and released on every return path. Shared by
+ * signal_deliver() (signal selected from the process-wide pending set) and
+ * signal_deliver_fault() (synchronous fault forced onto the faulting thread).
+ * rt_info supplies si_code/si_pid/sigval when no thread-local pending_fault is
+ * set; the pending_fault is consumed (one-shot) when valid. Returns 1 if a
+ * handler frame was installed, 0 if the signal was ignored, and -1 (with
+ * *exit_code set) when the default disposition terminates the guest.
+ */
+static int deliver_signal_locked(hv_vcpu_t vcpu,
+                                 guest_t *g,
+                                 int signum,
+                                 signal_rt_info_t rt_info,
+                                 int *exit_code)
 {
-    pthread_mutex_lock(&sig_lock);
     uint64_t *blocked = thread_blocked_ptr();
     uint64_t *saved_ptr = thread_saved_blocked_ptr();
     bool *valid_ptr = thread_saved_valid_ptr();
-    uint64_t deliverable = sig_state.pending & ~*blocked;
-    if (deliverable == 0) {
-        pthread_mutex_unlock(&sig_lock);
-        return 0;
-    }
 
-    /* Find lowest pending unblocked signal */
-    int signum = bit_ctz64(deliverable) + 1;
-    signal_rt_info_t rt_info = signal_default_info(signum);
-
-    /* Dequeue: for RT signals, decrement count and only clear the
-     * pending bit when the queue is empty. Standard signals are
-     * always cleared (single instance, bitmask semantics).
-     */
-    if (signum >= LINUX_SIGRTMIN) {
-        signal_rt_dequeue_locked(signum, &rt_info);
-    } else {
-        rt_info = signal_standard_peek_locked(signum);
-        sig_state.std_info_valid[signum - 1] = false;
-        sig_state.pending &= ~sig_bit(signum);
-    }
-
-    /* signum is bit_ctz64(deliverable) + 1, bounded 1..64 by the 64-bit
-     * pending mask. The static analyzer cannot see the bound, so gate the
-     * array access defensively.
+    /* signum is 1..64 from the caller; the static analyzer cannot see the
+     * bound, so gate the array access defensively.
      */
     int idx = signum - 1;
     if (!RANGE_CHECK(idx, 0, LINUX_NSIG)) {
@@ -1631,6 +1621,53 @@ int signal_deliver(hv_vcpu_t vcpu, guest_t *g, int *exit_code)
     return 1;
 }
 
+int signal_deliver(hv_vcpu_t vcpu, guest_t *g, int *exit_code)
+{
+    pthread_mutex_lock(&sig_lock);
+    uint64_t *blocked = thread_blocked_ptr();
+    uint64_t deliverable = sig_state.pending & ~*blocked;
+    if (deliverable == 0) {
+        pthread_mutex_unlock(&sig_lock);
+        return 0;
+    }
+
+    /* Find lowest pending unblocked signal */
+    int signum = bit_ctz64(deliverable) + 1;
+    signal_rt_info_t rt_info = signal_default_info(signum);
+
+    /* Dequeue: for RT signals, decrement count and only clear the
+     * pending bit when the queue is empty. Standard signals are
+     * always cleared (single instance, bitmask semantics).
+     */
+    if (signum >= LINUX_SIGRTMIN) {
+        signal_rt_dequeue_locked(signum, &rt_info);
+    } else {
+        rt_info = signal_standard_peek_locked(signum);
+        sig_state.std_info_valid[signum - 1] = false;
+        sig_state.pending &= ~sig_bit(signum);
+    }
+
+    return deliver_signal_locked(vcpu, g, signum, rt_info, exit_code);
+}
+
+int signal_deliver_fault(hv_vcpu_t vcpu, guest_t *g, int signum, int *exit_code)
+{
+    /* Synchronous faults (SIGSEGV/SIGBUS/SIGILL/SIGFPE/SIGTRAP) are specific to
+     * the thread that triggered them and must be delivered to that thread with
+     * the thread-local fault info set by signal_set_fault_info(). Routing them
+     * through the process-wide pending bitmask (signal_queue + signal_deliver)
+     * is racy: another vCPU thread can dequeue the bit and deliver it with no
+     * fault info (si_code becomes SI_USER, which makes a JVM treat a recoverable
+     * implicit null-check as a fatal external signal), and two threads faulting
+     * on the same signal collapse into one bit so one fault is lost. Deliver
+     * directly here, never touching sig_state.pending. The blocked mask is
+     * intentionally ignored: a synchronous fault cannot be postponed.
+     */
+    pthread_mutex_lock(&sig_lock);
+    signal_rt_info_t rt_info = signal_default_info(signum);
+    return deliver_signal_locked(vcpu, g, signum, rt_info, exit_code);
+}
+
 /* rt_sigreturn. */
 
 int signal_rt_sigreturn(hv_vcpu_t vcpu, guest_t *g)
diff --git a/src/syscall/signal.h b/src/syscall/signal.h
index f5792f5..927e1aa 100644
--- a/src/syscall/signal.h
+++ b/src/syscall/signal.h
@@ -274,6 +274,16 @@ void signal_set_shim_globals_guest(guest_t *g);
  */
 int signal_deliver(hv_vcpu_t vcpu, guest_t *g, int *exit_code);
 
+/* Deliver a synchronous fault signal directly to the faulting (current) thread,
+ * bypassing the process-wide pending set. The caller must have set the fault
+ * info via signal_set_fault_info() immediately before. Same return convention
+ * as signal_deliver(). Use this for SIGSEGV/SIGBUS/SIGILL/SIGFPE/SIGTRAP raised
+ * from a guest exception, never signal_queue()+signal_deliver(): a queued fault
+ * can be stolen by another vCPU thread (delivered as SI_USER, no si_addr) or
+ * coalesced with another thread's fault into one bitmask bit.
+ */
+int signal_deliver_fault(hv_vcpu_t vcpu, guest_t *g, int signum, int *exit_code);
+
 /* Handle rt_sigreturn (SYS 139): restore registers from rt_sigframe on
  * the guest stack. Returns SYSCALL_EXEC_HAPPENED to skip X0 writeback.
  */
diff --git a/tests/manifest.txt b/tests/manifest.txt
index 7273505..7255ad6 100644
--- a/tests/manifest.txt
+++ b/tests/manifest.txt
@@ -87,6 +87,7 @@ test-negative                  # diff=skip
 
 [section] Signal + thread tests
 test-signal-thread
+test-fault-signal-mt           # diff=skip
 
 [section] Fork edge cases
 test-clone3                    # diff=skip
diff --git a/tests/test-fault-signal-mt.c b/tests/test-fault-signal-mt.c
new file mode 100644
index 0000000..49a857c
--- /dev/null
+++ b/tests/test-fault-signal-mt.c
@@ -0,0 +1,124 @@
+/* Multi-threaded synchronous-fault delivery test
+ *
+ * Copyright 2026 elfuse contributors
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Regression lock-in for synchronous fault (SIGSEGV) delivery in a
+ * multi-threaded guest. Faults were routed through the process-wide pending
+ * bitmask (signal_queue + signal_deliver), so a fault raised by one vCPU
+ * thread could be dequeued and delivered by another: the other thread had no
+ * thread-local fault info, so the kernel-visible siginfo became si_code=SI_USER
+ * with no si_addr instead of SEGV_MAPERR. A JVM treats such a SIGSEGV as a
+ * fatal external signal rather than a recoverable implicit null check, so javac
+ * crashed. Two threads faulting at once also collapsed into a single bitmask
+ * bit, dropping one fault entirely.
+ *
+ * Each thread repeatedly takes a recoverable SIGSEGV (read from a known bad
+ * address) and the handler asserts the siginfo is the correct synchronous-fault
+ * shape. Under the bug, a fault delivered to the wrong thread is seen either as
+ * SI_USER or on a thread with no armed recovery point, which this test flags.
+ *
+ * Syscalls exercised: rt_sigaction, clone (pthreads), the fault delivery path.
+ */
+
+#include <pthread.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "test-harness.h"
+
+int passes = 0, fails = 0;
+
+#define NTHREADS 4
+#define NFAULTS 4000
+
+/* The bad address every thread reads. Page zero is never mapped, so the read
+ * faults with a translation fault -> SEGV_MAPERR and si_addr == FAULT_ADDR. */
+#define FAULT_ADDR ((volatile int *) (uintptr_t) 0x8)
+
+static _Thread_local sigjmp_buf recover;
+static _Thread_local volatile int armed;
+
+/* Set by the handler when it observes a malformed delivery. async-signal-safe
+ * stores to sig_atomic_t/int flags only. */
+static volatile sig_atomic_t bad_si_code = 0;
+static volatile sig_atomic_t bad_si_addr = 0;
+static volatile sig_atomic_t unarmed_delivery = 0;
+
+static void segv_handler(int sig, siginfo_t *info, void *uc)
+{
+    (void) sig;
+    (void) uc;
+    /* A synchronous fault must report a fault si_code (SEGV_MAPERR for an
+     * unmapped address), never SI_USER (0), which is what a stolen/cross-thread
+     * delivery produced. */
+    if (info->si_code != SEGV_MAPERR && info->si_code != SEGV_ACCERR)
+        bad_si_code = 1;
+    if (info->si_addr != (void *) FAULT_ADDR)
+        bad_si_addr = 1;
+    if (!armed) {
+        /* Delivered to a thread that is not currently faulting: the fault was
+         * misrouted. Cannot recover (no jmp target), so fail and exit. */
+        unarmed_delivery = 1;
+        _exit(2);
+    }
+    siglongjmp(recover, 1);
+}
+
+static void *fault_loop(void *arg)
+{
+    (void) arg;
+    for (int i = 0; i < NFAULTS; i++) {
+        armed = 1;
+        if (sigsetjmp(recover, 1) == 0) {
+            /* Trigger the fault. volatile so the read is not optimized away. */
+            volatile int v = *FAULT_ADDR;
+            (void) v;
+        }
+        armed = 0;
+    }
+    return NULL;
+}
+
+int main(void)
+{
+    printf("test-fault-signal-mt: multi-threaded SIGSEGV delivery\n\n");
+
+    struct sigaction sa;
+    sa.sa_sigaction = segv_handler;
+    sa.sa_flags = SA_SIGINFO | SA_NODEFER;
+    sigemptyset(&sa.sa_mask);
+    if (sigaction(SIGSEGV, &sa, NULL) != 0) {
+        printf("  %-34s FAIL: sigaction (errno=%d)\n", "setup", errno);
+        return 1;
+    }
+
+    TEST("concurrent recoverable SIGSEGV faults");
+    pthread_t th[NTHREADS];
+    int spawned = 0;
+    for (int i = 0; i < NTHREADS; i++) {
+        if (pthread_create(&th[i], NULL, fault_loop, NULL) != 0)
+            break;
+        spawned++;
+    }
+    for (int i = 0; i < spawned; i++)
+        pthread_join(th[i], NULL);
+
+    if (spawned != NTHREADS)
+        FAIL("could not spawn all threads");
+    else if (bad_si_code)
+        FAIL("fault delivered with si_code != SEGV_MAPERR/ACCERR (SI_USER?)");
+    else if (bad_si_addr)
+        FAIL("fault delivered with wrong si_addr");
+    else if (unarmed_delivery)
+        FAIL("fault delivered to a non-faulting thread");
+    else
+        PASS();
+
+    SUMMARY("test-fault-signal-mt");
+    return fails ? 1 : 0;
+}