From c21d8822f5ecbee350433c08e6b4f8deb099afc5 Mon Sep 17 00:00:00 2001 From: Max042004 Date: Sat, 6 Jun 2026 12:11:34 +0800 Subject: [PATCH] Fix cross-process signal delivery to EL0-preempted guests test-fork's phase-2 signal child spins in `while (!got_usr1) usleep()` waiting for a SIGUSR1 sent cross-process by its parent. The signal was delivered only ~35% of the time and lost the rest, so `make check` hung at test-fork until the 60s per-test timeout -- often longer, as leaked `elfuse --fork-child` orphans kept the driver's stdout pipe open. Two complementary defects: 1. vcpu_run_loop treated HV_EXIT_REASON_UNKNOWN as fatal. A host SIGUSR2 (the cross-process guest-signal transport) that interrupts hv_vcpu_run mid-execution aborts the run with UNKNOWN rather than the clean CANCELED that hv_vcpus_exit() produces for a vCPU caught between runs. Route UNKNOWN through the same cancellation handling so the already-queued guest signal is delivered instead of crashing the child. 2. signal_deliver redirected to the handler only via ELR_EL1, which takes effect solely on an ERET from EL1 (the syscall-return path, gated by the shim's X8==2 exec_drop_frame marker). When the signal is delivered from the cancellation branch -- i.e. the vCPU was preempted while running EL0 code (cross-process SIGUSR2, or SIGALRM in a tight loop) -- there is no pending ERET, the resume uses HV_REG_PC, and the ELR_EL1 write is a no-op: the handler never runs and only the X0=signum clobber lands, re-running the interrupted nanosleep with a bogus arg and spinning forever. Detect EL0 preemption from the live PSTATE (CPSR M[3:0]==0), save the interrupted PC from HV_REG_PC instead of the stale ELR_EL1, and redirect HV_REG_PC/CPSR directly; skip the X8==2 marker since there is no shim frame to drop. test-fork now passes 20/20 (was ~7/20); `make check` is green with no hang. --- src/syscall/proc.c | 13 ++++++++++++- src/syscall/signal.c | 42 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/src/syscall/proc.c b/src/syscall/proc.c index 647bab8..f79720f 100644 --- a/src/syscall/proc.c +++ b/src/syscall/proc.c @@ -1941,11 +1941,22 @@ int vcpu_run_loop(hv_vcpu_t vcpu, exit_code = 128; running = false; } - } else if (vexit->reason == HV_EXIT_REASON_CANCELED) { + } else if (vexit->reason == HV_EXIT_REASON_CANCELED || + vexit->reason == HV_EXIT_REASON_UNKNOWN) { /* Canceled by hv_vcpus_exit(). Can be: alarm timeout, * exit_group from another thread, or signal preemption * (signal_queue called hv_vcpus_exit to deliver a signal * while the guest was in a tight loop). + * + * HV_EXIT_REASON_UNKNOWN is the same event seen from the other + * side of a race: when a host signal (e.g. the SIGUSR2 used by the + * cross-process guest-signal transport) is delivered to this thread + * while it is actively executing guest code inside hv_vcpu_run, the + * run aborts with UNKNOWN instead of the clean CANCELED that + * hv_vcpus_exit() produces for a vCPU caught between runs. The + * pending guest signal has already been drained and queued, so it + * is fully deliverable -- fall through to the same handling and + * resume rather than treating it as a fatal unexpected exit. */ if (is_main && g_timed_out) { /* Timeout already handled above the exception switch -- diff --git a/src/syscall/signal.c b/src/syscall/signal.c index 5e4c820..5a25194 100644 --- a/src/syscall/signal.c +++ b/src/syscall/signal.c @@ -1386,14 +1386,35 @@ int signal_deliver(hv_vcpu_t vcpu, guest_t *g, int *exit_code) /* Deliver to user handler: build rt_sigframe on guest stack */ - /* 1. Save current vCPU state */ + /* 1. Save current vCPU state. + * + * ELR_EL1/SPSR_EL1 hold the interrupted EL0 return state only while the + * guest is unwinding a syscall (it is at EL1 in the shim, about to ERET). + * When the vCPU was preempted while executing EL0 code -- a tight compute + * loop interrupted by SIGALRM, or the cross-process guest-signal transport + * (SIGUSR2) firing mid-execution -- the live interrupted state is in + * HV_REG_PC / HV_REG_CPSR and ELR_EL1 is stale from the previous syscall. + * Redirecting via ELR_EL1 alone is then a no-op because the resume uses + * HV_REG_PC, so the handler never runs and the X0..X2 writes below clobber + * the interrupted registers instead. Detect the EL0-preemption case from + * the live PSTATE (M[3:0]==0 => EL0t) and use PC for both save and + * redirect. + */ uint64_t saved_regs[31]; uint64_t saved_sp, saved_pc, saved_pstate; + uint64_t cur_cpsr = 0; + hv_vcpu_get_reg(vcpu, HV_REG_CPSR, &cur_cpsr); + bool el0_preempt = (cur_cpsr & 0xfULL) == 0; vcpu_snapshot_gprs(vcpu, saved_regs); saved_sp = vcpu_get_sysreg(vcpu, HV_SYS_REG_SP_EL0); - saved_pc = vcpu_get_sysreg(vcpu, HV_SYS_REG_ELR_EL1); - saved_pstate = vcpu_get_sysreg(vcpu, HV_SYS_REG_SPSR_EL1); + if (el0_preempt) { + hv_vcpu_get_reg(vcpu, HV_REG_PC, &saved_pc); + saved_pstate = cur_cpsr; + } else { + saved_pc = vcpu_get_sysreg(vcpu, HV_SYS_REG_ELR_EL1); + saved_pstate = vcpu_get_sysreg(vcpu, HV_SYS_REG_SPSR_EL1); + } /* 1b. rseq abort: if the thread is in a restartable sequence critical * section, abort it. Linux does this on every signal delivery. @@ -1549,6 +1570,16 @@ int signal_deliver(hv_vcpu_t vcpu, guest_t *g, int *exit_code) /* SPSR_EL1: EL0t (user mode) */ hv_vcpu_set_sys_reg(vcpu, HV_SYS_REG_SPSR_EL1, 0); + /* EL0-preemption delivery: the resume runs from HV_REG_PC, not via an + * ERET that consumes ELR_EL1, so redirect the live PC/PSTATE directly. + * The ELR_EL1/SPSR_EL1 writes above still cover the rt_sigreturn path, + * which unwinds back to EL0 through the shim ERET. + */ + if (el0_preempt) { + hv_vcpu_set_reg(vcpu, HV_REG_PC, act->sa_handler); + hv_vcpu_set_reg(vcpu, HV_REG_CPSR, 0); /* EL0t */ + } + /* X0 = signal number */ hv_vcpu_set_reg(vcpu, HV_REG_X0, (uint64_t) signum); @@ -1590,8 +1621,11 @@ int signal_deliver(hv_vcpu_t vcpu, guest_t *g, int *exit_code) * shim still has the interrupted syscall frame on its EL1 stack. Tell it * to drop that frame so the handler PC/SP/LR/args installed above are not * overwritten before ERET. Fault/BRK delivery paths ignore this marker. + * The EL0-preemption path resumes straight into the handler at EL0 with + * no shim frame to drop, so the marker is neither needed nor consulted. */ - hv_vcpu_set_reg(vcpu, HV_REG_X8, 2); + if (!el0_preempt) + hv_vcpu_set_reg(vcpu, HV_REG_X8, 2); pthread_mutex_unlock(&sig_lock); return 1;