From b944983bdb377e96117c6016db6654941b87a953 Mon Sep 17 00:00:00 2001 From: danbugs Date: Tue, 3 Mar 2026 21:51:48 +0000 Subject: [PATCH 01/37] feat: add PIC emulation, OutBAction variants, and guest halt mechanism - Add PvTimerConfig (port 107) and Halt (port 108) to OutBAction enum - Add userspace 8259A PIC emulation for MSHV/WHP (KVM uses in-kernel PIC) - Add halt() function in guest exit module using Halt port instead of HLT - Add default no-op IRQ handler at IDT vector 0x20 for PIC-remapped IRQ0 - Update dispatch epilogue to use Halt port before cli+hlt fallback - Add hw-interrupts feature flag to hyperlight-host Signed-off-by: danbugs --- src/hyperlight_common/src/outb.rs | 11 + src/hyperlight_guest/src/exit.rs | 28 +++ .../src/arch/amd64/dispatch.rs | 3 + .../src/arch/amd64/exception/entry.rs | 26 ++ src/hyperlight_host/Cargo.toml | 1 + src/hyperlight_host/src/hypervisor/mod.rs | 3 + src/hyperlight_host/src/hypervisor/pic.rs | 228 ++++++++++++++++++ src/hyperlight_host/src/sandbox/outb.rs | 5 + 8 files changed, 305 insertions(+) create mode 100644 src/hyperlight_host/src/hypervisor/pic.rs diff --git a/src/hyperlight_common/src/outb.rs b/src/hyperlight_common/src/outb.rs index b060ae5cf..9cba7f4a7 100644 --- a/src/hyperlight_common/src/outb.rs +++ b/src/hyperlight_common/src/outb.rs @@ -104,6 +104,15 @@ pub enum OutBAction { TraceMemoryAlloc = 105, #[cfg(feature = "mem_profile")] TraceMemoryFree = 106, + /// IO port for PV timer configuration. The guest writes a 32-bit + /// LE value representing the desired timer period in microseconds. + /// A value of 0 disables the timer. + PvTimerConfig = 107, + /// IO port the guest writes to signal "I'm done" to the host. + /// This replaces the `hlt` instruction for halt signaling so that + /// KVM's in-kernel LAPIC (which absorbs HLT exits) does not interfere + /// with hyperlight's halt-based guest-host protocol. + Halt = 108, } impl TryFrom for OutBAction { @@ -120,6 +129,8 @@ impl TryFrom for OutBAction { 105 => Ok(OutBAction::TraceMemoryAlloc), #[cfg(feature = "mem_profile")] 106 => Ok(OutBAction::TraceMemoryFree), + 107 => Ok(OutBAction::PvTimerConfig), + 108 => Ok(OutBAction::Halt), _ => Err(anyhow::anyhow!("Invalid OutBAction value: {}", val)), } } diff --git a/src/hyperlight_guest/src/exit.rs b/src/hyperlight_guest/src/exit.rs index e8d58e24e..785e10a19 100644 --- a/src/hyperlight_guest/src/exit.rs +++ b/src/hyperlight_guest/src/exit.rs @@ -18,6 +18,34 @@ use core::arch::asm; use core::ffi::{CStr, c_char}; use hyperlight_common::outb::OutBAction; +use tracing::instrument; + +/// Halt the execution of the guest and returns control to the host. +/// Halt is generally called for a successful completion of the guest's work, +/// this means we can instrument it as a trace point because the trace state +/// shall not be locked at this point (we are not in an exception context). +#[inline(never)] +#[instrument(skip_all, level = "Trace")] +pub fn halt() { + #[cfg(all(feature = "trace_guest", target_arch = "x86_64"))] + { + // Send data before halting + // If there is no data, this doesn't do anything + // The reason we do this here instead of in `hlt` asm function + // is to avoid allocating before halting, which leaks memory + // because the guest is not expected to resume execution after halting. + hyperlight_guest_tracing::flush(); + } + + // Signal "I'm done" to the host via an IO port write. This causes a + // VM exit on all backends (KVM, MSHV, WHP). The subsequent cli+hlt + // is dead code—hyperlight never re-enters the guest after seeing + // the Halt port—but serves as a safety fallback. + unsafe { + out32(OutBAction::Halt as u16, 0); + asm!("cli", "hlt", options(nostack)); + } +} /// Exits the VM with an Abort OUT action and code 0. #[unsafe(no_mangle)] diff --git a/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs index bb80b30ed..e904eeb6e 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs @@ -68,6 +68,9 @@ core::arch::global_asm!(" mov cr4, rdi flush_done: call {internal_dispatch_function}\n + mov dx, 108\n + out dx, al\n + cli\n hlt\n .cfi_endproc ", internal_dispatch_function = sym crate::guest_function::call::internal_dispatch_function); diff --git a/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs b/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs index 87f89f15c..ca72e62d8 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs @@ -172,6 +172,24 @@ global_asm!( hl_exception_handler = sym super::handle::hl_exception_handler, ); +// Default no-op IRQ handler for hardware interrupts (vectors 0x20-0x2F). +// Sends a non-specific EOI to the master PIC and returns. +// This prevents unhandled-interrupt faults when the in-kernel PIT fires +// before a guest has installed its own IRQ handler. +global_asm!( + ".globl _default_irq_handler", + "_default_irq_handler:", + "push rax", + "mov al, 0x20", + "out 0x20, al", // PIC EOI + "pop rax", + "iretq", +); + +unsafe extern "C" { + fn _default_irq_handler(); +} + pub(in super::super) fn init_idt(pc: *mut ProcCtrl) { let idt = unsafe { &raw mut (*pc).idt }; let set_idt_entry = |idx, handler: unsafe extern "C" fn()| { @@ -203,6 +221,14 @@ pub(in super::super) fn init_idt(pc: *mut ProcCtrl) { set_idt_entry(Exception::VirtualizationException, _do_excp20); // Virtualization Exception set_idt_entry(Exception::SecurityException, _do_excp30); // Security Exception + // Install a default no-op IRQ handler at vector 0x20 (IRQ0 after PIC remapping). + // This ensures HLT can wake when an in-kernel PIT is active, even before the + // guest installs its own IRQ handler. + let irq_handler_addr = _default_irq_handler as *const () as u64; + unsafe { + (&raw mut (*idt).entries[0x20]).write_volatile(IdtEntry::new(irq_handler_addr)); + } + let idtr = IdtPointer { limit: (core::mem::size_of::() - 1) as u16, base: idt as u64, diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index 9352aef1b..80b232538 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -133,6 +133,7 @@ trace_guest = ["dep:opentelemetry", "dep:tracing-opentelemetry", "dep:hyperlight mem_profile = [ "trace_guest", "dep:framehop", "dep:fallible-iterator", "hyperlight-common/mem_profile" ] kvm = ["dep:kvm-bindings", "dep:kvm-ioctls"] mshv3 = ["dep:mshv-bindings", "dep:mshv-ioctls"] +hw-interrupts = [] # This enables easy debug in the guest gdb = ["dep:gdbstub", "dep:gdbstub_arch"] fuzzing = ["hyperlight-common/fuzzing"] diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 72a9d85d4..b90468d97 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -38,6 +38,9 @@ pub(crate) mod crashdump; pub(crate) mod hyperlight_vm; +#[cfg(all(feature = "hw-interrupts", any(mshv3, target_os = "windows")))] +pub(crate) mod pic; + use std::fmt::Debug; #[cfg(any(kvm, mshv3))] use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering}; diff --git a/src/hyperlight_host/src/hypervisor/pic.rs b/src/hyperlight_host/src/hypervisor/pic.rs new file mode 100644 index 000000000..7256ec3e9 --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/pic.rs @@ -0,0 +1,228 @@ +/* +Copyright 2026 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Minimal userspace PIC (8259A) emulation for MSHV/WHP. +//! KVM provides in-kernel PIC via `create_irq_chip()`. + +// PIC I/O port constants +const PIC_MASTER_CMD: u16 = 0x20; +const PIC_MASTER_DATA: u16 = 0x21; +const PIC_SLAVE_CMD: u16 = 0xA0; +const PIC_SLAVE_DATA: u16 = 0xA1; + +/// Tracks where we are in the ICW (Initialization Command Word) sequence. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum PicInitState { + /// Not in an init sequence; normal operation. + Ready, + /// Expecting ICW2 (vector base). + Icw2, + /// Expecting ICW3 (cascade info). + Icw3, + /// Expecting ICW4 (mode). + Icw4, +} + +/// Minimal 8259A PIC state for one chip (master or slave). +#[derive(Debug)] +pub(crate) struct Pic { + master_init_state: PicInitState, + master_vector_base: u8, + master_imr: u8, + + slave_init_state: PicInitState, + slave_vector_base: u8, + slave_imr: u8, +} + +impl Pic { + /// Create a new PIC pair with default state. + pub(crate) fn new() -> Self { + Self { + master_init_state: PicInitState::Ready, + master_vector_base: 0, + master_imr: 0xFF, // all masked + + slave_init_state: PicInitState::Ready, + slave_vector_base: 0, + slave_imr: 0xFF, // all masked + } + } + + /// Handle an I/O OUT to a PIC port. Returns `true` if the port was handled. + pub(crate) fn handle_io_out(&mut self, port: u16, data: u8) -> bool { + match port { + PIC_MASTER_CMD => { + if data & 0x10 != 0 { + // ICW1 -- start init sequence + self.master_init_state = PicInitState::Icw2; + } + // else: OCW (e.g. EOI) -- we accept but ignore + true + } + PIC_MASTER_DATA => { + match self.master_init_state { + PicInitState::Icw2 => { + self.master_vector_base = data; + self.master_init_state = PicInitState::Icw3; + } + PicInitState::Icw3 => { + self.master_init_state = PicInitState::Icw4; + } + PicInitState::Icw4 => { + self.master_init_state = PicInitState::Ready; + } + PicInitState::Ready => { + // IMR write + self.master_imr = data; + } + } + true + } + PIC_SLAVE_CMD => { + if data & 0x10 != 0 { + self.slave_init_state = PicInitState::Icw2; + } + true + } + PIC_SLAVE_DATA => { + match self.slave_init_state { + PicInitState::Icw2 => { + self.slave_vector_base = data; + self.slave_init_state = PicInitState::Icw3; + } + PicInitState::Icw3 => { + self.slave_init_state = PicInitState::Icw4; + } + PicInitState::Icw4 => { + self.slave_init_state = PicInitState::Ready; + } + PicInitState::Ready => { + self.slave_imr = data; + } + } + true + } + _ => false, + } + } + + /// Handle an I/O IN from a PIC port. Returns `Some(value)` if handled. + pub(crate) fn handle_io_in(&self, port: u16) -> Option { + match port { + PIC_MASTER_DATA => Some(self.master_imr), + PIC_SLAVE_DATA => Some(self.slave_imr), + PIC_MASTER_CMD | PIC_SLAVE_CMD => Some(0), // ISR reads return 0 + _ => None, + } + } + + /// Returns the master PIC vector base. + pub(crate) fn master_vector_base(&self) -> u8 { + self.master_vector_base + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn pic_initial_state() { + let pic = Pic::new(); + assert_eq!(pic.master_vector_base, 0); + assert_eq!(pic.slave_vector_base, 0); + assert_eq!(pic.master_imr, 0xFF); + assert_eq!(pic.slave_imr, 0xFF); + } + + #[test] + fn pic_master_icw_sequence() { + let mut pic = Pic::new(); + // ICW1 + assert!(pic.handle_io_out(PIC_MASTER_CMD, 0x11)); + // ICW2: vector base 0x20 + assert!(pic.handle_io_out(PIC_MASTER_DATA, 0x20)); + assert_eq!(pic.master_vector_base(), 0x20); + // ICW3 + assert!(pic.handle_io_out(PIC_MASTER_DATA, 0x04)); + // ICW4 + assert!(pic.handle_io_out(PIC_MASTER_DATA, 0x01)); + // Should be back to Ready -- IMR write + assert!(pic.handle_io_out(PIC_MASTER_DATA, 0xFE)); // unmask IRQ0 + assert_eq!(pic.master_imr, 0xFE); + } + + #[test] + fn pic_slave_icw_sequence() { + let mut pic = Pic::new(); + assert!(pic.handle_io_out(PIC_SLAVE_CMD, 0x11)); + assert!(pic.handle_io_out(PIC_SLAVE_DATA, 0x28)); + assert_eq!(pic.slave_vector_base, 0x28); + assert!(pic.handle_io_out(PIC_SLAVE_DATA, 0x02)); + assert!(pic.handle_io_out(PIC_SLAVE_DATA, 0x01)); + assert!(pic.handle_io_out(PIC_SLAVE_DATA, 0xFF)); + assert_eq!(pic.slave_imr, 0xFF); + } + + #[test] + fn pic_eoi_accepted() { + let mut pic = Pic::new(); + // EOI (OCW2, non-specific EOI = 0x20) + assert!(pic.handle_io_out(PIC_MASTER_CMD, 0x20)); + } + + #[test] + fn pic_unhandled_port() { + let mut pic = Pic::new(); + assert!(!pic.handle_io_out(0x60, 0x00)); + assert_eq!(pic.handle_io_in(0x60), None); + } + + #[test] + fn pic_reinitialize_master() { + let mut pic = Pic::new(); + // First init + pic.handle_io_out(PIC_MASTER_CMD, 0x11); + pic.handle_io_out(PIC_MASTER_DATA, 0x20); + pic.handle_io_out(PIC_MASTER_DATA, 0x04); + pic.handle_io_out(PIC_MASTER_DATA, 0x01); + assert_eq!(pic.master_vector_base(), 0x20); + // Reinit + pic.handle_io_out(PIC_MASTER_CMD, 0x11); + pic.handle_io_out(PIC_MASTER_DATA, 0x30); + pic.handle_io_out(PIC_MASTER_DATA, 0x04); + pic.handle_io_out(PIC_MASTER_DATA, 0x01); + assert_eq!(pic.master_vector_base(), 0x30); + } + + #[test] + fn pic_imr_preserved_across_reinit() { + let mut pic = Pic::new(); + // Set IMR before init + pic.handle_io_out(PIC_MASTER_CMD, 0x11); + pic.handle_io_out(PIC_MASTER_DATA, 0x20); + pic.handle_io_out(PIC_MASTER_DATA, 0x04); + pic.handle_io_out(PIC_MASTER_DATA, 0x01); + pic.handle_io_out(PIC_MASTER_DATA, 0xFE); + assert_eq!(pic.master_imr, 0xFE); + // Reinit -- IMR is set during ICW sequence, not explicitly preserved + pic.handle_io_out(PIC_MASTER_CMD, 0x11); + // After ICW1, IMR should still read as whatever it was -- we only + // change it on Ready state writes. + assert_eq!(pic.handle_io_in(PIC_MASTER_DATA), Some(0xFE)); + } +} diff --git a/src/hyperlight_host/src/sandbox/outb.rs b/src/hyperlight_host/src/sandbox/outb.rs index 76ec53cee..5d5bd91d8 100644 --- a/src/hyperlight_host/src/sandbox/outb.rs +++ b/src/hyperlight_host/src/sandbox/outb.rs @@ -233,6 +233,11 @@ pub(crate) fn handle_outb( OutBAction::TraceMemoryAlloc => trace_info.handle_trace_mem_alloc(regs, mem_mgr), #[cfg(feature = "mem_profile")] OutBAction::TraceMemoryFree => trace_info.handle_trace_mem_free(regs, mem_mgr), + // PvTimerConfig and Halt are handled at the hypervisor level + // (in run_vcpu) and should never reach this handler. + OutBAction::PvTimerConfig | OutBAction::Halt => Err(HandleOutbError::InvalidPort(format!( + "port {port:#x} should be handled at hypervisor level" + ))), } } #[cfg(test)] From 64184b02ad171bcab0f8290784855c2cc340a674 Mon Sep 17 00:00:00 2001 From: danbugs Date: Tue, 3 Mar 2026 21:53:15 +0000 Subject: [PATCH 02/37] feat(kvm): add hardware interrupt support with in-kernel IRQ chip - Create IRQ chip (PIC + IOAPIC + LAPIC) and PIT before vCPU creation - Add hw-interrupts run loop that handles HLT re-entry, Halt port, and PvTimerConfig port (ignored since in-kernel PIT handles scheduling) - Non-hw-interrupts path also recognizes Halt port for compatibility Signed-off-by: danbugs --- .../hypervisor/virtual_machine/kvm/x86_64.rs | 72 +++++++++++++++++++ .../src/hypervisor/virtual_machine/mod.rs | 4 +- 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index db7b235e1..d4c80092c 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -16,8 +16,11 @@ limitations under the License. use std::sync::LazyLock; +use hyperlight_common::outb::OutBAction; #[cfg(gdb)] use kvm_bindings::kvm_guest_debug; +#[cfg(feature = "hw-interrupts")] +use kvm_bindings::{KVM_PIT_SPEAKER_DUMMY, kvm_pit_config}; use kvm_bindings::{ kvm_debugregs, kvm_fpu, kvm_regs, kvm_sregs, kvm_userspace_memory_region, kvm_xsave, }; @@ -106,6 +109,26 @@ impl KvmVm { let vm_fd = hv .create_vm_with_type(0) .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; + + // When hw-interrupts is enabled, create the in-kernel IRQ chip + // (PIC + IOAPIC + LAPIC) and PIT before creating the vCPU so the + // per-vCPU LAPIC is initialised in virtual-wire mode (LINT0 = ExtINT). + // The guest programs the PIC remap and PIT frequency via standard + // IO port writes, which the in-kernel devices handle transparently. + #[cfg(feature = "hw-interrupts")] + { + vm_fd + .create_irq_chip() + .map_err(|e| CreateVmError::InitializeVm(e.into()))?; + let pit_config = kvm_pit_config { + flags: KVM_PIT_SPEAKER_DUMMY, + ..Default::default() + }; + vm_fd + .create_pit2(pit_config) + .map_err(|e| CreateVmError::InitializeVm(e.into()))?; + } + let vcpu_fd = vm_fd .create_vcpu(0) .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; @@ -171,8 +194,57 @@ impl VirtualMachine for KvmVm { // it sets the guest span, no other traces or spans must be setup in between these calls. #[cfg(feature = "trace_guest")] tc.setup_guest_trace(Span::current().context()); + + // When hw-interrupts is enabled, the in-kernel PIC + PIT + LAPIC handle + // timer interrupts natively. The guest signals "I'm done" by writing to + // OutBAction::Halt (an IO port exit) instead of using HLT, because the in-kernel + // LAPIC absorbs HLT (never returns VcpuExit::Hlt to userspace). + #[cfg(feature = "hw-interrupts")] + loop { + match self.vcpu_fd.run() { + Ok(VcpuExit::Hlt) => { + // The in-kernel LAPIC normally handles HLT internally. + // If we somehow get here, just re-enter the guest. + continue; + } + Ok(VcpuExit::IoOut(port, data)) => { + if port == OutBAction::Halt as u16 { + return Ok(VmExit::Halt()); + } + if port == OutBAction::PvTimerConfig as u16 { + // Ignore: the in-kernel PIT handles timer scheduling. + // The guest writes here for MSHV/WHP compatibility. + continue; + } + return Ok(VmExit::IoOut(port, data.to_vec())); + } + Ok(VcpuExit::MmioRead(addr, _)) => return Ok(VmExit::MmioRead(addr)), + Ok(VcpuExit::MmioWrite(addr, _)) => return Ok(VmExit::MmioWrite(addr)), + #[cfg(gdb)] + Ok(VcpuExit::Debug(debug_exit)) => { + return Ok(VmExit::Debug { + dr6: debug_exit.dr6, + exception: debug_exit.exception, + }); + } + Err(e) => match e.errno() { + libc::EINTR => return Ok(VmExit::Cancelled()), + libc::EAGAIN => continue, + _ => return Err(RunVcpuError::Unknown(e.into())), + }, + Ok(other) => { + return Ok(VmExit::Unknown(format!( + "Unknown KVM VCPU exit: {:?}", + other + ))); + } + } + } + + #[cfg(not(feature = "hw-interrupts"))] match self.vcpu_fd.run() { Ok(VcpuExit::Hlt) => Ok(VmExit::Halt()), + Ok(VcpuExit::IoOut(port, _)) if port == OutBAction::Halt as u16 => Ok(VmExit::Halt()), Ok(VcpuExit::IoOut(port, data)) => Ok(VmExit::IoOut(port, data.to_vec())), Ok(VcpuExit::MmioRead(addr, _)) => Ok(VmExit::MmioRead(addr)), Ok(VcpuExit::MmioWrite(addr, _)) => Ok(VmExit::MmioWrite(addr)), diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs index b3bed769e..cb1e253a9 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs @@ -141,10 +141,10 @@ pub(crate) enum VmExit { Unknown(String), /// The operation should be retried, for example this can happen on Linux where a call to run the CPU can return EAGAIN #[cfg_attr( - target_os = "windows", + any(target_os = "windows", feature = "hw-interrupts"), expect( dead_code, - reason = "Retry() is never constructed on Windows, but it is still matched on (which dead_code lint ignores)" + reason = "Retry() is never constructed on Windows or with hw-interrupts (EAGAIN causes continue instead)" ) )] Retry(), From 7f5d0bc290b6b16ca031549bd2acb0a260babcd1 Mon Sep 17 00:00:00 2001 From: danbugs Date: Tue, 3 Mar 2026 21:55:35 +0000 Subject: [PATCH 03/37] feat(mshv): add hardware interrupt support with SynIC timer - Enable LAPIC in partition flags for SynIC direct-mode timer delivery - Configure LAPIC (SVR, TPR, LINT0/1, LVT Timer) during VM creation - Install MSR intercept on IA32_APIC_BASE to prevent guest from disabling the LAPIC globally - Add SynIC STIMER0 configuration via PvTimerConfig IO port - Add userspace PIC emulation integration for MSHV - Restructure run_vcpu into a loop for HLT re-entry and hw IO handling - Bridge PIC EOI to LAPIC EOI for SynIC timer interrupt acknowledgment - Handle PIT/speaker/debug IO ports in userspace Signed-off-by: danbugs --- .../hypervisor/virtual_machine/mshv/x86_64.rs | 534 +++++++++++++++--- 1 file changed, 452 insertions(+), 82 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs index 6ebc59057..cfec009e3 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs @@ -18,16 +18,32 @@ limitations under the License. use std::fmt::Debug; use std::sync::LazyLock; +use hyperlight_common::outb::OutBAction; +#[cfg(feature = "hw-interrupts")] +use mshv_bindings::LapicState; #[cfg(gdb)] use mshv_bindings::{DebugRegisters, hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT}; use mshv_bindings::{ FloatingPointUnit, SpecialRegisters, StandardRegisters, XSave, hv_message_type, hv_message_type_HVMSG_GPA_INTERCEPT, hv_message_type_HVMSG_UNMAPPED_GPA, hv_message_type_HVMSG_X64_HALT, hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT, - hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, - hv_partition_synthetic_processor_features, hv_register_assoc, - hv_register_name_HV_X64_REGISTER_RIP, hv_register_value, mshv_user_mem_region, + hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, hv_register_assoc, + hv_register_name_HV_X64_REGISTER_RIP, hv_register_value, mshv_create_partition_v2, + mshv_user_mem_region, }; +#[cfg(feature = "hw-interrupts")] +use mshv_bindings::{ + HV_INTERCEPT_ACCESS_MASK_WRITE, hv_intercept_parameters, + hv_intercept_type_HV_INTERCEPT_TYPE_X64_MSR_INDEX, hv_message_type_HVMSG_X64_MSR_INTERCEPT, + mshv_install_intercept, +}; +#[cfg(feature = "hw-interrupts")] +use mshv_bindings::{ + hv_register_name_HV_REGISTER_SCONTROL, hv_register_name_HV_REGISTER_STIMER0_CONFIG, + hv_register_name_HV_REGISTER_STIMER0_COUNT, hv_register_name_HV_X64_REGISTER_RAX, +}; +#[cfg(feature = "hw-interrupts")] +use mshv_ioctls::make_default_synthetic_features_mask; use mshv_ioctls::{Mshv, VcpuFd, VmFd}; use tracing::{Span, instrument}; #[cfg(feature = "trace_guest")] @@ -35,6 +51,8 @@ use tracing_opentelemetry::OpenTelemetrySpanExt; #[cfg(gdb)] use crate::hypervisor::gdb::{DebugError, DebuggableVm}; +#[cfg(feature = "hw-interrupts")] +use crate::hypervisor::pic::Pic; use crate::hypervisor::regs::{ CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, FP_CONTROL_WORD_DEFAULT, MXCSR_DEFAULT, @@ -67,33 +85,71 @@ pub(crate) fn is_hypervisor_present() -> bool { pub(crate) struct MshvVm { vm_fd: VmFd, vcpu_fd: VcpuFd, + #[cfg(feature = "hw-interrupts")] + pic: Pic, + #[cfg(feature = "hw-interrupts")] + synic_timer_active: bool, } static MSHV: LazyLock> = LazyLock::new(|| Mshv::new().map_err(|e| CreateVmError::HypervisorNotAvailable(e.into()))); +/// Write a u32 to a LAPIC register at the given APIC offset. +#[cfg(feature = "hw-interrupts")] +fn write_lapic_u32(regs: &mut [::std::os::raw::c_char; 1024], offset: usize, val: u32) { + let bytes = val.to_le_bytes(); + regs[offset] = bytes[0] as _; + regs[offset + 1] = bytes[1] as _; + regs[offset + 2] = bytes[2] as _; + regs[offset + 3] = bytes[3] as _; +} + +/// Read a u32 from a LAPIC register at the given APIC offset. +#[cfg(feature = "hw-interrupts")] +fn read_lapic_u32(regs: &[::std::os::raw::c_char; 1024], offset: usize) -> u32 { + u32::from_le_bytes([ + regs[offset] as u8, + regs[offset + 1] as u8, + regs[offset + 2] as u8, + regs[offset + 3] as u8, + ]) +} + impl MshvVm { /// Create a new instance of a MshvVm #[instrument(skip_all, parent = Span::current(), level = "Trace")] pub(crate) fn new() -> std::result::Result { let mshv = MSHV.as_ref().map_err(|e| e.clone())?; - let pr = Default::default(); - // It's important to avoid create_vm() and explicitly use - // create_vm_with_args() with an empty arguments structure - // here, because otherwise the partition is set up with a SynIC. + #[allow(unused_mut)] + let mut pr: mshv_create_partition_v2 = Default::default(); + // Enable LAPIC for hw-interrupts — required for SynIC direct-mode + // timer delivery via APIC vector. MSHV_PT_BIT_LAPIC = bit 0. + #[cfg(feature = "hw-interrupts")] + { + pr.pt_flags = 1u64; // LAPIC + } let vm_fd = mshv .create_vm_with_args(&pr) .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; let vcpu_fd = { - let features: hv_partition_synthetic_processor_features = Default::default(); + // Use the full default synthetic features mask (includes SynIC, + // synthetic timers, direct mode, hypercall regs, etc.) when + // hw-interrupts is enabled. Without hw-interrupts, leave all + // features at zero. + #[cfg(feature = "hw-interrupts")] + let feature_val = make_default_synthetic_features_mask(); + #[cfg(not(feature = "hw-interrupts"))] + let feature_val = 0u64; + vm_fd .set_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, - unsafe { features.as_uint64[0] }, + feature_val, ) .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + vm_fd .initialize() .map_err(|e| CreateVmError::InitializeVm(e.into()))?; @@ -103,7 +159,62 @@ impl MshvVm { .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))? }; - Ok(Self { vm_fd, vcpu_fd }) + // Initialize the virtual LAPIC when hw-interrupts is enabled. + // LAPIC defaults to disabled (SVR bit 8 = 0), which means no APIC + // interrupts (including SynIC direct-mode timer) can be delivered. + #[cfg(feature = "hw-interrupts")] + { + let mut lapic: LapicState = vcpu_fd + .get_lapic() + .map_err(|e| CreateVmError::InitializeVm(e.into()))?; + + // SVR (offset 0xF0): bit 8 = enable APIC, bits 0-7 = spurious vector + write_lapic_u32(&mut lapic.regs, 0xF0, 0x1FF); + // TPR (offset 0x80): 0 = accept all interrupt priorities + write_lapic_u32(&mut lapic.regs, 0x80, 0); + // DFR (offset 0xE0): 0xFFFFFFFF = flat model + write_lapic_u32(&mut lapic.regs, 0xE0, 0xFFFF_FFFF); + // LDR (offset 0xD0): set logical APIC ID for flat model + write_lapic_u32(&mut lapic.regs, 0xD0, 1 << 24); + // LINT0 (offset 0x350): masked — we don't forward PIC through LAPIC; + // our PIC is emulated in userspace and not wired to LINT0. + write_lapic_u32(&mut lapic.regs, 0x350, 0x0001_0000); + // LINT1 (offset 0x360): NMI delivery, not masked + write_lapic_u32(&mut lapic.regs, 0x360, 0x400); + // LVT Timer (offset 0x320): masked — we use SynIC timer instead + write_lapic_u32(&mut lapic.regs, 0x320, 0x0001_0000); + // LVT Error (offset 0x370): masked + write_lapic_u32(&mut lapic.regs, 0x370, 0x0001_0000); + + vcpu_fd + .set_lapic(&lapic) + .map_err(|e| CreateVmError::InitializeVm(e.into()))?; + + // Install MSR intercept for IA32_APIC_BASE (MSR 0x1B) to prevent + // the guest from globally disabling the LAPIC. The Nanvix kernel + // disables the APIC when no I/O APIC is detected, but we need + // the LAPIC enabled for SynIC direct-mode timer delivery. + // + // This may fail with AccessDenied on some kernel versions; in + // that case we fall back to re-enabling the LAPIC in the timer + // setup path (handle_hw_io_out). + let _ = vm_fd.install_intercept(mshv_install_intercept { + access_type_mask: HV_INTERCEPT_ACCESS_MASK_WRITE, + intercept_type: hv_intercept_type_HV_INTERCEPT_TYPE_X64_MSR_INDEX, + intercept_parameter: hv_intercept_parameters { + msr_index: 0x1B, // IA32_APIC_BASE + }, + }); + } + + Ok(Self { + vm_fd, + vcpu_fd, + #[cfg(feature = "hw-interrupts")] + pic: Pic::new(), + #[cfg(feature = "hw-interrupts")] + synic_timer_active: false, + }) } } @@ -128,6 +239,7 @@ impl VirtualMachine for MshvVm { .map_err(|e| UnmapMemoryError::Hypervisor(e.into())) } + #[cfg_attr(not(feature = "hw-interrupts"), allow(clippy::never_loop))] fn run_vcpu( &mut self, #[cfg(feature = "trace_guest")] tc: &mut SandboxTraceContext, @@ -137,6 +249,8 @@ impl VirtualMachine for MshvVm { hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT; const UNMAPPED_GPA_MESSAGE: hv_message_type = hv_message_type_HVMSG_UNMAPPED_GPA; const INVALID_GPA_ACCESS_MESSAGE: hv_message_type = hv_message_type_HVMSG_GPA_INTERCEPT; + #[cfg(feature = "hw-interrupts")] + const MSR_INTERCEPT_MESSAGE: hv_message_type = hv_message_type_HVMSG_X64_MSR_INTERCEPT; #[cfg(gdb)] const EXCEPTION_INTERCEPT: hv_message_type = hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT; @@ -144,82 +258,181 @@ impl VirtualMachine for MshvVm { // it sets the guest span, no other traces or spans must be setup in between these calls. #[cfg(feature = "trace_guest")] tc.setup_guest_trace(Span::current().context()); - let exit_reason = self.vcpu_fd.run(); - - let result = match exit_reason { - Ok(m) => match m.header.message_type { - HALT_MESSAGE => VmExit::Halt(), - IO_PORT_INTERCEPT_MESSAGE => { - let io_message = m - .to_ioport_info() - .map_err(|_| RunVcpuError::DecodeIOMessage(m.header.message_type))?; - let port_number = io_message.port_number; - let rip = io_message.header.rip; - let rax = io_message.rax; - let instruction_length = io_message.header.instruction_length() as u64; - - // mshv, unlike kvm, does not automatically increment RIP - self.vcpu_fd - .set_reg(&[hv_register_assoc { - name: hv_register_name_HV_X64_REGISTER_RIP, - value: hv_register_value { - reg64: rip + instruction_length, - }, - ..Default::default() - }]) - .map_err(|e| RunVcpuError::IncrementRip(e.into()))?; - VmExit::IoOut(port_number, rax.to_le_bytes().to_vec()) - } - UNMAPPED_GPA_MESSAGE => { - let mimo_message = m - .to_memory_info() - .map_err(|_| RunVcpuError::DecodeIOMessage(m.header.message_type))?; - let addr = mimo_message.guest_physical_address; - match MemoryRegionFlags::try_from(mimo_message) - .map_err(|_| RunVcpuError::ParseGpaAccessInfo)? - { - MemoryRegionFlags::READ => VmExit::MmioRead(addr), - MemoryRegionFlags::WRITE => VmExit::MmioWrite(addr), - _ => VmExit::Unknown("Unknown MMIO access".to_string()), + + loop { + let exit_reason = self.vcpu_fd.run(); + + match exit_reason { + Ok(m) => { + let msg_type = m.header.message_type; + match msg_type { + HALT_MESSAGE => { + // With SynIC timer active, re-enter the guest. + // The hypervisor will deliver pending timer + // interrupts on the next run(), waking the + // vCPU from HLT. + #[cfg(feature = "hw-interrupts")] + if self.synic_timer_active { + continue; + } + return Ok(VmExit::Halt()); + } + IO_PORT_INTERCEPT_MESSAGE => { + let io_message = m + .to_ioport_info() + .map_err(|_| RunVcpuError::DecodeIOMessage(msg_type))?; + let port_number = io_message.port_number; + let rip = io_message.header.rip; + let rax = io_message.rax; + let instruction_length = io_message.header.instruction_length() as u64; + let is_write = io_message.header.intercept_access_type != 0; + + // mshv, unlike kvm, does not automatically increment RIP + self.vcpu_fd + .set_reg(&[hv_register_assoc { + name: hv_register_name_HV_X64_REGISTER_RIP, + value: hv_register_value { + reg64: rip + instruction_length, + }, + ..Default::default() + }]) + .map_err(|e| RunVcpuError::IncrementRip(e.into()))?; + + // OutBAction::Halt always means "I'm done", regardless + // of whether a timer is active. + if is_write && port_number == OutBAction::Halt as u16 { + return Ok(VmExit::Halt()); + } + + #[cfg(feature = "hw-interrupts")] + { + if is_write { + let data = rax.to_le_bytes(); + if self.handle_hw_io_out(port_number, &data) { + continue; + } + } else if let Some(val) = self.handle_hw_io_in(port_number) { + self.vcpu_fd + .set_reg(&[hv_register_assoc { + name: hv_register_name_HV_X64_REGISTER_RAX, + value: hv_register_value { reg64: val }, + ..Default::default() + }]) + .map_err(|e| RunVcpuError::Unknown(e.into()))?; + continue; + } + } + + // Suppress unused variable warning when hw-interrupts is disabled + let _ = is_write; + + return Ok(VmExit::IoOut(port_number, rax.to_le_bytes().to_vec())); + } + UNMAPPED_GPA_MESSAGE => { + let mimo_message = m + .to_memory_info() + .map_err(|_| RunVcpuError::DecodeIOMessage(msg_type))?; + let addr = mimo_message.guest_physical_address; + return match MemoryRegionFlags::try_from(mimo_message) + .map_err(|_| RunVcpuError::ParseGpaAccessInfo)? + { + MemoryRegionFlags::READ => Ok(VmExit::MmioRead(addr)), + MemoryRegionFlags::WRITE => Ok(VmExit::MmioWrite(addr)), + _ => Ok(VmExit::Unknown("Unknown MMIO access".to_string())), + }; + } + INVALID_GPA_ACCESS_MESSAGE => { + let mimo_message = m + .to_memory_info() + .map_err(|_| RunVcpuError::DecodeIOMessage(msg_type))?; + let gpa = mimo_message.guest_physical_address; + let access_info = MemoryRegionFlags::try_from(mimo_message) + .map_err(|_| RunVcpuError::ParseGpaAccessInfo)?; + return match access_info { + MemoryRegionFlags::READ => Ok(VmExit::MmioRead(gpa)), + MemoryRegionFlags::WRITE => Ok(VmExit::MmioWrite(gpa)), + _ => Ok(VmExit::Unknown("Unknown MMIO access".to_string())), + }; + } + #[cfg(feature = "hw-interrupts")] + MSR_INTERCEPT_MESSAGE => { + // Guest is writing to MSR 0x1B (IA32_APIC_BASE). + // Force bit 11 (global APIC enable) to stay set, + // preventing the guest from disabling the LAPIC. + let msr_msg = m + .to_msr_info() + .map_err(|_| RunVcpuError::DecodeIOMessage(msg_type))?; + let rip = msr_msg.header.rip; + let instruction_length = msr_msg.header.instruction_length() as u64; + let msr_val = (msr_msg.rdx << 32) | (msr_msg.rax & 0xFFFF_FFFF); + + // Force APIC global enable (bit 11) to remain set, + // preserving the standard base address. + let forced_val = msr_val | (1 << 11) | 0xFEE00000; + self.vcpu_fd + .set_reg(&[hv_register_assoc { + name: hv_register_name_HV_X64_REGISTER_RIP, + value: hv_register_value { + reg64: rip + instruction_length, + }, + ..Default::default() + }]) + .map_err(|e| RunVcpuError::IncrementRip(e.into()))?; + + use mshv_bindings::hv_register_name_HV_X64_REGISTER_APIC_BASE; + let _ = self.vcpu_fd.set_reg(&[hv_register_assoc { + name: hv_register_name_HV_X64_REGISTER_APIC_BASE, + value: hv_register_value { reg64: forced_val }, + ..Default::default() + }]); + continue; + } + #[cfg(gdb)] + EXCEPTION_INTERCEPT => { + let ex_info = m + .to_exception_info() + .map_err(|_| RunVcpuError::DecodeIOMessage(msg_type))?; + let DebugRegisters { dr6, .. } = self + .vcpu_fd + .get_debug_regs() + .map_err(|e| RunVcpuError::GetDr6(e.into()))?; + return Ok(VmExit::Debug { + dr6, + exception: ex_info.exception_vector as u32, + }); + } + other => { + return Ok(VmExit::Unknown(format!( + "Unknown MSHV VCPU exit: {:?}", + other + ))); + } } } - INVALID_GPA_ACCESS_MESSAGE => { - let mimo_message = m - .to_memory_info() - .map_err(|_| RunVcpuError::DecodeIOMessage(m.header.message_type))?; - let gpa = mimo_message.guest_physical_address; - let access_info = MemoryRegionFlags::try_from(mimo_message) - .map_err(|_| RunVcpuError::ParseGpaAccessInfo)?; - match access_info { - MemoryRegionFlags::READ => VmExit::MmioRead(gpa), - MemoryRegionFlags::WRITE => VmExit::MmioWrite(gpa), - _ => VmExit::Unknown("Unknown MMIO access".to_string()), + Err(e) => match e.errno() { + libc::EINTR => { + // When the SynIC timer is active, EINTR may be + // a spurious signal. Continue the run loop to + // let the hypervisor deliver any pending timer + // interrupt. + #[cfg(feature = "hw-interrupts")] + if self.synic_timer_active { + continue; + } + return Ok(VmExit::Cancelled()); } - } - #[cfg(gdb)] - EXCEPTION_INTERCEPT => { - let ex_info = m - .to_exception_info() - .map_err(|_| RunVcpuError::DecodeIOMessage(m.header.message_type))?; - let DebugRegisters { dr6, .. } = self - .vcpu_fd - .get_debug_regs() - .map_err(|e| RunVcpuError::GetDr6(e.into()))?; - VmExit::Debug { - dr6, - exception: ex_info.exception_vector as u32, + libc::EAGAIN => { + #[cfg(not(feature = "hw-interrupts"))] + { + return Ok(VmExit::Retry()); + } + #[cfg(feature = "hw-interrupts")] + continue; } - } - other => VmExit::Unknown(format!("Unknown MSHV VCPU exit: {:?}", other)), - }, - Err(e) => match e.errno() { - // InterruptHandle::kill() sends a signal (SIGRTMIN+offset) to interrupt the vcpu, which causes EINTR - libc::EINTR => VmExit::Cancelled(), - libc::EAGAIN => VmExit::Retry(), - _ => Err(RunVcpuError::Unknown(e.into()))?, - }, - }; - Ok(result) + _ => return Err(RunVcpuError::Unknown(e.into())), + }, + } + } } fn regs(&self) -> std::result::Result { @@ -463,3 +676,160 @@ impl DebuggableVm for MshvVm { } } } + +#[cfg(feature = "hw-interrupts")] +impl MshvVm { + /// Standard x86 APIC base MSR value: base address 0xFEE00000 + + /// BSP flag (bit 8) + global enable (bit 11). + const APIC_BASE_DEFAULT: u64 = 0xFEE00900; + + /// Perform LAPIC EOI: clear the highest-priority in-service bit. + /// Called when the guest sends PIC EOI, since SynIC direct-mode timer + /// delivers through the LAPIC and the guest only acknowledges via PIC. + fn do_lapic_eoi(&self) { + if let Ok(mut lapic) = self.vcpu_fd.get_lapic() { + // ISR is at offset 0x100, 8 x 32-bit words (one per 16 bytes). + // Scan from highest priority (ISR[7]) to lowest (ISR[0]). + for i in (0u32..8).rev() { + let offset = 0x100 + (i as usize) * 0x10; + let isr_val = read_lapic_u32(&lapic.regs, offset); + if isr_val != 0 { + let bit = 31 - isr_val.leading_zeros(); + write_lapic_u32(&mut lapic.regs, offset, isr_val & !(1u32 << bit)); + let _ = self.vcpu_fd.set_lapic(&lapic); + break; + } + } + } + } + + /// Handle a hardware-interrupt IO IN request. + /// Returns `Some(value)` if the port was handled (PIC or PIT read), + /// `None` if the port should be passed through to the guest handler. + fn handle_hw_io_in(&self, port: u16) -> Option { + if let Some(val) = self.pic.handle_io_in(port) { + return Some(val as u64); + } + // PIT data port read -- return 0 + if port == 0x40 { + return Some(0); + } + None + } + + /// Build SynIC timer config register value. + /// + /// Layout (from Hyper-V TLFS): + /// - Bit 0: enable + /// - Bit 1: periodic + /// - Bit 3: auto_enable + /// - Bits 4-11: apic_vector + /// - Bit 12: direct_mode + fn build_stimer_config(vector: u8) -> u64 { + 1 // enable + | (1 << 1) // periodic + | (1 << 3) // auto_enable + | ((vector as u64) << 4) // apic_vector + | (1 << 12) // direct_mode + } + + /// Convert a timer period in microseconds to the SynIC timer count + /// in 100-nanosecond units (Hyper-V reference time). + fn period_us_to_100ns(period_us: u32) -> u64 { + (period_us as u64) * 10 + } + + fn handle_hw_io_out(&mut self, port: u16, data: &[u8]) -> bool { + if port == OutBAction::PvTimerConfig as u16 { + if data.len() >= 4 { + let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); + if period_us > 0 { + // Re-enable LAPIC if the guest disabled it (via WRMSR + // to MSR 0x1B clearing bit 11). This happens when the + // Nanvix kernel doesn't detect an I/O APIC. + // + // The hypervisor may return 0 for APIC_BASE when the + // APIC is globally disabled, so we always restore the + // standard value (0xFEE00900). + use mshv_bindings::hv_register_name_HV_X64_REGISTER_APIC_BASE; + let mut apic_base_reg = [hv_register_assoc { + name: hv_register_name_HV_X64_REGISTER_APIC_BASE, + value: hv_register_value { reg64: 0 }, + ..Default::default() + }]; + if self.vcpu_fd.get_reg(&mut apic_base_reg).is_ok() { + let cur = unsafe { apic_base_reg[0].value.reg64 }; + if cur & (1 << 11) == 0 { + let _ = self.vcpu_fd.set_reg(&[hv_register_assoc { + name: hv_register_name_HV_X64_REGISTER_APIC_BASE, + value: hv_register_value { + reg64: Self::APIC_BASE_DEFAULT, + }, + ..Default::default() + }]); + } + } + // Re-initialize LAPIC SVR (may have been zeroed when + // guest disabled the APIC globally) + if let Ok(mut lapic) = self.vcpu_fd.get_lapic() { + let svr = read_lapic_u32(&lapic.regs, 0xF0); + if svr & 0x100 == 0 { + write_lapic_u32(&mut lapic.regs, 0xF0, 0x1FF); + write_lapic_u32(&mut lapic.regs, 0x80, 0); // TPR + let _ = self.vcpu_fd.set_lapic(&lapic); + } + } + + // Enable SynIC on this vCPU (SCONTROL bit 0 = enable) + let _ = self.vcpu_fd.set_reg(&[hv_register_assoc { + name: hv_register_name_HV_REGISTER_SCONTROL, + value: hv_register_value { reg64: 1 }, + ..Default::default() + }]); + + // Arm STIMER0 immediately (the guest needs timer + // interrupts for thread preemption, not just idle + // wakeup). + let vector = self.pic.master_vector_base(); + let config = Self::build_stimer_config(vector); + let count_100ns = Self::period_us_to_100ns(period_us); + + let _ = self.vcpu_fd.set_reg(&[ + hv_register_assoc { + name: hv_register_name_HV_REGISTER_STIMER0_CONFIG, + value: hv_register_value { reg64: config }, + ..Default::default() + }, + hv_register_assoc { + name: hv_register_name_HV_REGISTER_STIMER0_COUNT, + value: hv_register_value { reg64: count_100ns }, + ..Default::default() + }, + ]); + self.synic_timer_active = true; + } + } + return true; + } + if !data.is_empty() && self.pic.handle_io_out(port, data[0]) { + // When the guest sends PIC EOI (port 0x20, OCW2 non-specific EOI), + // also perform LAPIC EOI since SynIC timer delivers via LAPIC and + // the guest only knows about the PIC. + if port == 0x20 && (data[0] & 0xE0) == 0x20 && self.synic_timer_active { + self.do_lapic_eoi(); + } + return true; + } + if port == 0x43 || port == 0x40 { + return true; + } + if port == 0x61 { + return true; + } + if port == 0x80 { + return true; + } + false + } +} + From e0a2dfe0215e32ae0d1a2df2843bb7cc85ffda33 Mon Sep 17 00:00:00 2001 From: danbugs Date: Tue, 3 Mar 2026 21:58:47 +0000 Subject: [PATCH 04/37] feat(whp): add hardware interrupt support with software timer Add WHP hardware interrupt support using a host-side software timer thread that periodically injects interrupts via WHvRequestInterrupt. Key changes: - Detect LAPIC emulation support via WHvGetCapability - Initialize LAPIC via bulk interrupt-controller state API (WHvGet/SetVirtualProcessorInterruptControllerState2) since individual APIC register writes fail with ACCESS_DENIED - Software timer thread for periodic interrupt injection - LAPIC EOI handling for PIC-only guest acknowledgment - PIC emulation integration for MSHV/WHP shared 8259A - Filter APIC_BASE from set_sregs when LAPIC emulation active - HLT re-entry when timer is active Signed-off-by: danbugs --- .../src/hypervisor/virtual_machine/whp.rs | 661 +++++++++++++++--- 1 file changed, 567 insertions(+), 94 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 46f7f44e9..697213ab0 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -15,7 +15,12 @@ limitations under the License. */ use std::os::raw::c_void; +#[cfg(feature = "hw-interrupts")] +use std::sync::Arc; +#[cfg(feature = "hw-interrupts")] +use std::sync::atomic::{AtomicBool, Ordering}; +use hyperlight_common::outb::OutBAction; #[cfg(feature = "trace_guest")] use tracing::Span; #[cfg(feature = "trace_guest")] @@ -29,6 +34,8 @@ use windows_result::HRESULT; #[cfg(gdb)] use crate::hypervisor::gdb::{DebugError, DebuggableVm}; +#[cfg(feature = "hw-interrupts")] +use crate::hypervisor::pic::Pic; use crate::hypervisor::regs::{ Align16, CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, FP_CONTROL_WORD_DEFAULT, MXCSR_DEFAULT, WHP_DEBUG_REGS_NAMES, WHP_DEBUG_REGS_NAMES_LEN, @@ -93,6 +100,20 @@ pub(crate) struct WhpVm { /// Tracks host-side file mappings (view_base, mapping_handle) for /// cleanup on unmap or drop. Only populated for MappedFile regions. file_mappings: Vec<(HandleWrapper, *mut c_void)>, + #[cfg(feature = "hw-interrupts")] + pic: Pic, + /// Whether the WHP host supports LAPIC emulation mode. + #[cfg(feature = "hw-interrupts")] + lapic_emulation: bool, + /// Whether the LAPIC timer is actively delivering periodic interrupts. + #[cfg(feature = "hw-interrupts")] + lapic_timer_active: bool, + /// Signal to stop the software timer thread. + #[cfg(feature = "hw-interrupts")] + timer_stop: Option>, + /// Handle for the software timer thread. + #[cfg(feature = "hw-interrupts")] + timer_thread: Option>, } // Safety: `WhpVm` is !Send because it holds `SurrogateProcess` which contains a raw pointer @@ -107,34 +128,237 @@ unsafe impl Send for WhpVm {} impl WhpVm { pub(crate) fn new() -> Result { const NUM_CPU: u32 = 1; - let partition = unsafe { - let partition = - WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; - WHvSetPartitionProperty( - partition, - WHvPartitionPropertyCodeProcessorCount, - &NUM_CPU as *const _ as *const _, - std::mem::size_of_val(&NUM_CPU) as _, - ) - .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; - WHvSetupPartition(partition).map_err(|e| CreateVmError::InitializeVm(e.into()))?; - WHvCreateVirtualProcessor(partition, 0, 0) - .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; - partition + + // Check whether the WHP host supports LAPIC emulation. + // Bit 1 of WHV_CAPABILITY_FEATURES corresponds to LocalApicEmulation. + // LAPIC emulation is required for the LAPIC timer to deliver interrupts. + #[cfg(feature = "hw-interrupts")] + const LAPIC_EMULATION_BIT: u64 = 1 << 1; + + #[cfg(feature = "hw-interrupts")] + let mut lapic_emulation = { + let mut capability: WHV_CAPABILITY = Default::default(); + let ok = unsafe { + WHvGetCapability( + WHvCapabilityCodeFeatures, + &mut capability as *mut _ as *mut c_void, + std::mem::size_of::() as u32, + None, + ) + }; + if ok.is_ok() { + unsafe { capability.Features.AsUINT64 & LAPIC_EMULATION_BIT != 0 } + } else { + false + } }; - // Create the surrogate process with the total memory size - let mgr = get_surrogate_process_manager() - .map_err(|e| CreateVmError::SurrogateProcess(e.to_string()))?; - let surrogate_process = mgr - .get_surrogate_process() - .map_err(|e| CreateVmError::SurrogateProcess(e.to_string()))?; + // Create partition, set CPU count, setup, and create vCPU. + // When hw-interrupts is enabled and LAPIC emulation is supported, + // we create a partition with LAPIC emulation mode. This is + // required for the LAPIC timer. + let partition = unsafe { + #[cfg(feature = "hw-interrupts")] + { + if lapic_emulation { + let p = + WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; + WHvSetPartitionProperty( + p, + WHvPartitionPropertyCodeProcessorCount, + &NUM_CPU as *const _ as *const _, + std::mem::size_of_val(&NUM_CPU) as _, + ) + .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + + let apic_mode: u32 = 1; // WHvX64LocalApicEmulationModeXApic + let partition_ok = WHvSetPartitionProperty( + p, + WHvPartitionPropertyCodeLocalApicEmulationMode, + &apic_mode as *const _ as *const _, + std::mem::size_of_val(&apic_mode) as _, + ) + .is_ok() + && WHvSetupPartition(p).is_ok() + && WHvCreateVirtualProcessor(p, 0, 0).is_ok(); + + if partition_ok { + log::info!("[WHP] LAPIC partition created, running init_lapic_bulk"); + // Initialize the LAPIC via the bulk interrupt-controller + // state API (individual APIC register writes via + // WHvSetVirtualProcessorRegisters fail with ACCESS_DENIED). + if let Err(e) = Self::init_lapic_bulk(p) { + log::warn!( + "[WHP] Bulk LAPIC init failed ({e}); \ + falling back to non-LAPIC mode." + ); + let _ = WHvDeleteVirtualProcessor(p, 0); + let _ = WHvDeletePartition(p); + lapic_emulation = false; + + let p = WHvCreatePartition() + .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; + WHvSetPartitionProperty( + p, + WHvPartitionPropertyCodeProcessorCount, + &NUM_CPU as *const _ as *const _, + std::mem::size_of_val(&NUM_CPU) as _, + ) + .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + WHvSetupPartition(p) + .map_err(|e| CreateVmError::InitializeVm(e.into()))?; + WHvCreateVirtualProcessor(p, 0, 0) + .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; + p + } else { + log::info!("[WHP] Bulk LAPIC init succeeded, lapic_emulation=true"); + p + } + } else { + log::warn!("LAPIC emulation setup failed, falling back to non-LAPIC mode"); + let _ = WHvDeleteVirtualProcessor(p, 0); + let _ = WHvDeletePartition(p); + lapic_emulation = false; + + let p = WHvCreatePartition() + .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; + WHvSetPartitionProperty( + p, + WHvPartitionPropertyCodeProcessorCount, + &NUM_CPU as *const _ as *const _, + std::mem::size_of_val(&NUM_CPU) as _, + ) + .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + WHvSetupPartition(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; + WHvCreateVirtualProcessor(p, 0, 0) + .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; + p + } + } else { + let p = + WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; + WHvSetPartitionProperty( + p, + WHvPartitionPropertyCodeProcessorCount, + &NUM_CPU as *const _ as *const _, + std::mem::size_of_val(&NUM_CPU) as _, + ) + .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + WHvSetupPartition(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; + WHvCreateVirtualProcessor(p, 0, 0) + .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; + p + } + } + + #[cfg(not(feature = "hw-interrupts"))] + { + let p = WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; + WHvSetPartitionProperty( + p, + WHvPartitionPropertyCodeProcessorCount, + &NUM_CPU as *const _ as *const _, + std::mem::size_of_val(&NUM_CPU) as _, + ) + .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + WHvSetupPartition(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; + WHvCreateVirtualProcessor(p, 0, 0) + .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; + p + } + }; - Ok(WhpVm { + let vm = WhpVm { partition, surrogate_process, file_mappings: Vec::new(), - }) + #[cfg(feature = "hw-interrupts")] + pic: Pic::new(), + #[cfg(feature = "hw-interrupts")] + lapic_emulation, + #[cfg(feature = "hw-interrupts")] + lapic_timer_active: false, + #[cfg(feature = "hw-interrupts")] + timer_stop: None, + #[cfg(feature = "hw-interrupts")] + timer_thread: None, + }; + + Ok(vm) + } + + /// Maximum size for the interrupt controller state blob. + const LAPIC_STATE_MAX_SIZE: u32 = 4096; + + /// Initialize the LAPIC via the bulk interrupt-controller state API. + /// Individual APIC register writes via `WHvSetVirtualProcessorRegisters` + /// fail with ACCESS_DENIED on WHP when LAPIC emulation is enabled, + /// so we use `WHvGet/SetVirtualProcessorInterruptControllerState2` + /// to read-modify-write the entire LAPIC register page. + #[cfg(feature = "hw-interrupts")] + unsafe fn init_lapic_bulk(partition: WHV_PARTITION_HANDLE) -> windows_result::Result<()> { + let mut state = vec![0u8; Self::LAPIC_STATE_MAX_SIZE as usize]; + let mut written: u32 = 0; + + unsafe { + WHvGetVirtualProcessorInterruptControllerState2( + partition, + 0, + state.as_mut_ptr() as *mut c_void, + Self::LAPIC_STATE_MAX_SIZE, + Some(&mut written), + )?; + } + state.truncate(written as usize); + log::info!( + "[WHP] init_lapic_bulk: got {} bytes of LAPIC state", + written + ); + + // SVR (offset 0xF0): enable APIC (bit 8) + spurious vector 0xFF + Self::write_lapic_u32(&mut state, 0xF0, 0x1FF); + // TPR (offset 0x80): 0 = accept all interrupt priorities + Self::write_lapic_u32(&mut state, 0x80, 0); + // DFR (offset 0xE0): 0xFFFFFFFF = flat model + Self::write_lapic_u32(&mut state, 0xE0, 0xFFFF_FFFF); + // LDR (offset 0xD0): set logical APIC ID for flat model + Self::write_lapic_u32(&mut state, 0xD0, 1 << 24); + // LINT0 (offset 0x350): masked + Self::write_lapic_u32(&mut state, 0x350, 0x0001_0000); + // LINT1 (offset 0x360): NMI delivery, not masked + Self::write_lapic_u32(&mut state, 0x360, 0x400); + // LVT Timer (offset 0x320): masked initially (configured on PvTimerConfig) + Self::write_lapic_u32(&mut state, 0x320, 0x0001_0000); + // LVT Error (offset 0x370): masked + Self::write_lapic_u32(&mut state, 0x370, 0x0001_0000); + + unsafe { + WHvSetVirtualProcessorInterruptControllerState2( + partition, + 0, + state.as_ptr() as *const c_void, + state.len() as u32, + )?; + } + + Ok(()) + } + + /// Read a u32 from a LAPIC register page at the given APIC offset. + #[cfg(feature = "hw-interrupts")] + fn read_lapic_u32(state: &[u8], offset: usize) -> u32 { + u32::from_le_bytes([ + state[offset], + state[offset + 1], + state[offset + 2], + state[offset + 3], + ]) + } + + /// Write a u32 to a LAPIC register page at the given APIC offset. + #[cfg(feature = "hw-interrupts")] + fn write_lapic_u32(state: &mut [u8], offset: usize, val: u32) { + state[offset..offset + 4].copy_from_slice(&val.to_le_bytes()); } /// Helper for setting arbitrary registers. Makes sure the same number @@ -272,85 +496,155 @@ impl VirtualMachine for WhpVm { // it sets the guest span, no other traces or spans must be setup in between these calls. #[cfg(feature = "trace_guest")] tc.setup_guest_trace(Span::current().context()); - unsafe { - WHvRunVirtualProcessor( - self.partition, - 0, - &mut exit_context as *mut _ as *mut c_void, - std::mem::size_of::() as u32, - ) - .map_err(|e| RunVcpuError::Unknown(e.into()))?; - } - let result = match exit_context.ExitReason { - WHvRunVpExitReasonX64IoPortAccess => unsafe { - let instruction_length = exit_context.VpContext._bitfield & 0xF; - let rip = exit_context.VpContext.Rip + instruction_length as u64; - self.set_registers(&[( - WHvX64RegisterRip, - Align16(WHV_REGISTER_VALUE { Reg64: rip }), - )]) - .map_err(|e| RunVcpuError::IncrementRip(e.into()))?; - VmExit::IoOut( - exit_context.Anonymous.IoPortAccess.PortNumber, - exit_context + + loop { + unsafe { + WHvRunVirtualProcessor( + self.partition, + 0, + &mut exit_context as *mut _ as *mut c_void, + std::mem::size_of::() as u32, + ) + .map_err(|e| RunVcpuError::Unknown(e.into()))?; + } + + match exit_context.ExitReason { + WHvRunVpExitReasonX64IoPortAccess => unsafe { + let instruction_length = exit_context.VpContext._bitfield & 0xF; + let rip = exit_context.VpContext.Rip + instruction_length as u64; + let port = exit_context.Anonymous.IoPortAccess.PortNumber; + let rax = exit_context.Anonymous.IoPortAccess.Rax; + let is_write = exit_context .Anonymous .IoPortAccess - .Rax - .to_le_bytes() - .to_vec(), - ) - }, - WHvRunVpExitReasonX64Halt => VmExit::Halt(), - WHvRunVpExitReasonMemoryAccess => { - let gpa = unsafe { exit_context.Anonymous.MemoryAccess.Gpa }; - let access_info = unsafe { - WHV_MEMORY_ACCESS_TYPE( - // 2 first bits are the access type, see https://learn.microsoft.com/en-us/virtualization/api/hypervisor-platform/funcs/memoryaccess#syntax - (exit_context.Anonymous.MemoryAccess.AccessInfo.AsUINT32 & 0b11) as i32, - ) - }; - let access_info = MemoryRegionFlags::try_from(access_info) - .map_err(|_| RunVcpuError::ParseGpaAccessInfo)?; - match access_info { - MemoryRegionFlags::READ => VmExit::MmioRead(gpa), - MemoryRegionFlags::WRITE => VmExit::MmioWrite(gpa), - _ => VmExit::Unknown("Unknown memory access type".to_string()), + .AccessInfo + .Anonymous + ._bitfield + & 1 + != 0; + + self.set_registers(&[( + WHvX64RegisterRip, + Align16(WHV_REGISTER_VALUE { Reg64: rip }), + )]) + .map_err(|e| RunVcpuError::IncrementRip(e.into()))?; + + // OutBAction::Halt always means "I'm done", regardless + // of whether a timer is active. + if is_write && port == OutBAction::Halt as u16 { + return Ok(VmExit::Halt()); + } + + #[cfg(feature = "hw-interrupts")] + { + if is_write { + let data = rax.to_le_bytes(); + if self.handle_hw_io_out(port, &data) { + continue; + } + } else if let Some(val) = self.handle_hw_io_in(port) { + self.set_registers(&[( + WHvX64RegisterRax, + Align16(WHV_REGISTER_VALUE { Reg64: val }), + )]) + .map_err(|e| RunVcpuError::Unknown(e.into()))?; + continue; + } + } + + // Suppress unused variable warnings when hw-interrupts is disabled + let _ = is_write; + + return Ok(VmExit::IoOut(port, rax.to_le_bytes().to_vec())); + }, + WHvRunVpExitReasonX64Halt => { + // With software timer active, re-enter the guest. + // WHvRunVirtualProcessor will block until the timer + // thread injects an interrupt via WHvRequestInterrupt, + // waking the vCPU from HLT. + #[cfg(feature = "hw-interrupts")] + if self.lapic_timer_active { + continue; + } + return Ok(VmExit::Halt()); } - } - // Execution was cancelled by the host. - WHvRunVpExitReasonCanceled => VmExit::Cancelled(), - #[cfg(gdb)] - WHvRunVpExitReasonException => { - let exception = unsafe { exit_context.Anonymous.VpException }; - - // Get the DR6 register to see which breakpoint was hit - let dr6 = { - let names = [WHvX64RegisterDr6]; - let mut out: [Align16; 1] = unsafe { std::mem::zeroed() }; - unsafe { - WHvGetVirtualProcessorRegisters( - self.partition, - 0, - names.as_ptr(), - 1, - out.as_mut_ptr() as *mut WHV_REGISTER_VALUE, + WHvRunVpExitReasonMemoryAccess => { + let gpa = unsafe { exit_context.Anonymous.MemoryAccess.Gpa }; + let access_info = unsafe { + WHV_MEMORY_ACCESS_TYPE( + (exit_context.Anonymous.MemoryAccess.AccessInfo.AsUINT32 & 0b11) as i32, ) - .map_err(|e| RunVcpuError::GetDr6(e.into()))?; + }; + let access_info = MemoryRegionFlags::try_from(access_info) + .map_err(|_| RunVcpuError::ParseGpaAccessInfo)?; + return match access_info { + MemoryRegionFlags::READ => Ok(VmExit::MmioRead(gpa)), + MemoryRegionFlags::WRITE => Ok(VmExit::MmioWrite(gpa)), + _ => Ok(VmExit::Unknown("Unknown memory access type".to_string())), + }; + } + // Execution was cancelled by the host. + WHvRunVpExitReasonCanceled => { + return Ok(VmExit::Cancelled()); + } + #[cfg(gdb)] + WHvRunVpExitReasonException => { + let exception = unsafe { exit_context.Anonymous.VpException }; + + // Get the DR6 register to see which breakpoint was hit + let dr6 = { + let names = [WHvX64RegisterDr6]; + let mut out: [Align16; 1] = + unsafe { std::mem::zeroed() }; + unsafe { + WHvGetVirtualProcessorRegisters( + self.partition, + 0, + names.as_ptr(), + 1, + out.as_mut_ptr() as *mut WHV_REGISTER_VALUE, + ) + .map_err(|e| RunVcpuError::GetDr6(e.into()))?; + } + unsafe { out[0].0.Reg64 } + }; + + return Ok(VmExit::Debug { + dr6, + exception: exception.ExceptionType as u32, + }); + } + WHV_RUN_VP_EXIT_REASON(_) => { + let rip = exit_context.VpContext.Rip; + log::error!( + "WHP unknown exit reason {}: RIP={:#x}", + exit_context.ExitReason.0, + rip, + ); + if let Ok(regs) = self.regs() { + log::error!( + " RAX={:#x} RCX={:#x} RDX={:#x}", + regs.rax, + regs.rcx, + regs.rdx + ); } - unsafe { out[0].0.Reg64 } - }; - - VmExit::Debug { - dr6, - exception: exception.ExceptionType as u32, + if let Ok(sregs) = self.sregs() { + log::error!( + " CR0={:#x} CR4={:#x} EFER={:#x} APIC_BASE={:#x}", + sregs.cr0, + sregs.cr4, + sregs.efer, + sregs.apic_base + ); + } + return Ok(VmExit::Unknown(format!( + "Unknown exit reason '{}' at RIP={:#x}", + exit_context.ExitReason.0, rip + ))); } } - WHV_RUN_VP_EXIT_REASON(_) => VmExit::Unknown(format!( - "Unknown exit reason '{}'", - exit_context.ExitReason.0 - )), - }; - Ok(result) + } } fn regs(&self) -> std::result::Result { @@ -459,6 +753,26 @@ impl VirtualMachine for WhpVm { fn set_sregs(&self, sregs: &CommonSpecialRegisters) -> std::result::Result<(), RegisterError> { let whp_regs: [(WHV_REGISTER_NAME, Align16); WHP_SREGS_NAMES_LEN] = sregs.into(); + + // When LAPIC emulation is active, skip writing APIC_BASE. + // The generic CommonSpecialRegisters defaults APIC_BASE to 0 + // which would globally disable the LAPIC. On some WHP hosts, + // host-side APIC register writes are blocked entirely + // (ACCESS_DENIED), so attempting to write even a valid value + // would fail and abort set_sregs. Omitting it lets the VP + // keep its default APIC_BASE and the guest configures it. + #[cfg(feature = "hw-interrupts")] + if self.lapic_emulation { + let filtered: Vec<_> = whp_regs + .iter() + .copied() + .filter(|(name, _)| *name != WHvX64RegisterApicBase) + .collect(); + self.set_registers(&filtered) + .map_err(|e| RegisterError::SetSregs(e.into()))?; + return Ok(()); + } + self.set_registers(&whp_regs) .map_err(|e| RegisterError::SetSregs(e.into()))?; Ok(()) @@ -810,6 +1124,160 @@ impl DebuggableVm for WhpVm { } } +#[cfg(feature = "hw-interrupts")] +impl WhpVm { + /// Get the LAPIC state via the bulk interrupt-controller state API. + fn get_lapic_state(&self) -> windows_result::Result> { + let mut state = vec![0u8; Self::LAPIC_STATE_MAX_SIZE as usize]; + let mut written: u32 = 0; + + unsafe { + WHvGetVirtualProcessorInterruptControllerState2( + self.partition, + 0, + state.as_mut_ptr() as *mut c_void, + Self::LAPIC_STATE_MAX_SIZE, + Some(&mut written), + )?; + } + state.truncate(written as usize); + Ok(state) + } + + /// Set the LAPIC state via the bulk interrupt-controller state API. + fn set_lapic_state(&self, state: &[u8]) -> windows_result::Result<()> { + unsafe { + WHvSetVirtualProcessorInterruptControllerState2( + self.partition, + 0, + state.as_ptr() as *const c_void, + state.len() as u32, + ) + } + } + + /// Perform LAPIC EOI: clear the highest-priority in-service bit. + /// Called when the guest sends PIC EOI, since the LAPIC timer + /// delivers through the LAPIC and the guest only acknowledges via PIC. + fn do_lapic_eoi(&self) { + if let Ok(mut state) = self.get_lapic_state() { + // ISR is at offset 0x100, 8 x 32-bit words (one per 16 bytes). + // Scan from highest priority (ISR[7]) to lowest (ISR[0]). + for i in (0u32..8).rev() { + let offset = 0x100 + (i as usize) * 0x10; + let isr_val = Self::read_lapic_u32(&state, offset); + if isr_val != 0 { + let bit = 31 - isr_val.leading_zeros(); + Self::write_lapic_u32(&mut state, offset, isr_val & !(1u32 << bit)); + let _ = self.set_lapic_state(&state); + break; + } + } + } + } + + /// Handle a hardware-interrupt IO IN request. + /// Returns `Some(value)` if the port was handled (PIC or PIT read), + /// `None` if the port should be passed through to the guest handler. + fn handle_hw_io_in(&self, port: u16) -> Option { + if let Some(val) = self.pic.handle_io_in(port) { + return Some(val as u64); + } + // PIT data port read -- return 0 + if port == 0x40 { + return Some(0); + } + None + } + + /// Stop the software timer thread if running. + fn stop_timer_thread(&mut self) { + if let Some(stop) = self.timer_stop.take() { + stop.store(true, Ordering::Relaxed); + } + if let Some(handle) = self.timer_thread.take() { + let _ = handle.join(); + } + self.lapic_timer_active = false; + } + + fn handle_hw_io_out(&mut self, port: u16, data: &[u8]) -> bool { + if port == OutBAction::PvTimerConfig as u16 { + if data.len() >= 4 { + let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); + log::info!( + "[WHP] PvTimerConfig: period_us={period_us}, lapic_emulation={}", + self.lapic_emulation + ); + + // Stop any existing timer thread before (re-)configuring. + self.stop_timer_thread(); + + if period_us > 0 && self.lapic_emulation { + // WHP's bulk LAPIC state API does not start the LAPIC + // timer countdown (CurCount stays 0). Instead we use a + // host-side software timer thread that periodically + // injects interrupts via WHvRequestInterrupt. + let partition_raw = self.partition.0; + let vector = self.pic.master_vector_base() as u32; + let stop = Arc::new(AtomicBool::new(false)); + let stop_clone = stop.clone(); + let period = std::time::Duration::from_micros(period_us as u64); + + let handle = std::thread::spawn(move || { + while !stop_clone.load(Ordering::Relaxed) { + std::thread::sleep(period); + if stop_clone.load(Ordering::Relaxed) { + break; + } + let partition = WHV_PARTITION_HANDLE(partition_raw); + let interrupt = WHV_INTERRUPT_CONTROL { + _bitfield: 0, // Type=Fixed, DestMode=Physical, Trigger=Edge + Destination: 0, + Vector: vector, + }; + let _ = unsafe { + WHvRequestInterrupt( + partition, + &interrupt, + std::mem::size_of::() as u32, + ) + }; + } + }); + + self.timer_stop = Some(stop); + self.timer_thread = Some(handle); + self.lapic_timer_active = true; + log::info!( + "[WHP] Software timer started (period={period_us}us, vector={vector:#x})" + ); + } + } + return true; + } + if !data.is_empty() && self.pic.handle_io_out(port, data[0]) { + // When the guest sends PIC EOI (port 0x20, OCW2 non-specific EOI), + // also perform LAPIC EOI since the software timer delivers + // through the LAPIC and the guest only acknowledges via PIC. + if port == 0x20 && (data[0] & 0xE0) == 0x20 && self.lapic_timer_active { + self.do_lapic_eoi(); + } + return true; + } + if port == 0x43 || port == 0x40 { + return true; + } + if port == 0x61 { + return true; + } + if port == 0x80 { + return true; + } + false + } +} + impl Drop for WhpVm { fn drop(&mut self) { // Clean up any remaining file mappings that weren't explicitly unmapped. @@ -817,6 +1285,10 @@ impl Drop for WhpVm { release_file_mapping(view, handle); } + // Stop the software timer thread before tearing down the partition. + #[cfg(feature = "hw-interrupts")] + self.stop_timer_thread(); + // HyperlightVm::drop() calls set_dropped() before this runs. // set_dropped() ensures no WHvCancelRunVirtualProcessor calls are in progress // or will be made in the future, so it's safe to delete the partition. @@ -865,3 +1337,4 @@ unsafe fn try_load_whv_map_gpa_range2() -> windows_result::Result Date: Tue, 3 Mar 2026 22:15:23 +0000 Subject: [PATCH 05/37] test: add hardware interrupt unit and integration tests Signed-off-by: danbugs --- .../src/hypervisor/hyperlight_vm/x86_64.rs | 6 + src/hyperlight_host/src/hypervisor/mod.rs | 1 + .../hypervisor/virtual_machine/kvm/x86_64.rs | 16 ++ .../hypervisor/virtual_machine/mshv/x86_64.rs | 99 ++++++++++ .../src/hypervisor/virtual_machine/whp.rs | 88 +++++++++ src/tests/rust_guests/simpleguest/src/main.rs | 171 +++++++++++++++++- 6 files changed, 378 insertions(+), 3 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs index ce6116c71..e8f21e119 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs @@ -1550,6 +1550,7 @@ mod tests { // Tests // ========================================================================== + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_simple() { // push rax; hlt - aligns stack to 16 bytes @@ -1695,6 +1696,7 @@ mod tests { use super::*; + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_regs() { let mut a = CodeAssembler::new(64).unwrap(); @@ -1754,6 +1756,7 @@ mod tests { assert_regs_reset(hyperlight_vm.vm.as_ref()); } + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_fpu() { #[cfg(kvm)] @@ -1885,6 +1888,7 @@ mod tests { } } + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_debug_regs() { let mut a = CodeAssembler::new(64).unwrap(); @@ -1927,6 +1931,7 @@ mod tests { assert_debug_regs_reset(hyperlight_vm.vm.as_ref()); } + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_sregs() { // Build code that modifies special registers and halts @@ -1980,6 +1985,7 @@ mod tests { /// Verifies guest-visible FPU state (via FXSAVE) is properly reset. /// Unlike tests using hypervisor API, this runs actual guest code with FXSAVE. + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_fpu_guest_visible_state() { let mut ctx = hyperlight_vm_with_mem_mgr_fxsave(); diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index b90468d97..5c59f776b 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -472,6 +472,7 @@ pub(crate) mod tests { use crate::sandbox::{SandboxConfiguration, UninitializedSandbox}; use crate::{Result, is_hypervisor_present, new_error}; + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn test_initialise() -> Result<()> { if !is_hypervisor_present() { diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index d4c80092c..fa6506dc8 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -500,3 +500,19 @@ impl DebuggableVm for KvmVm { Ok(()) } } + +#[cfg(test)] +#[cfg(feature = "hw-interrupts")] +mod hw_interrupt_tests { + use super::*; + + #[test] + fn halt_port_is_not_standard_device() { + // OutBAction::Halt port must not overlap in-kernel PIC/PIT/speaker ports + const HALT: u16 = OutBAction::Halt as u16; + const _: () = assert!(HALT != 0x20 && HALT != 0x21); + const _: () = assert!(HALT != 0xA0 && HALT != 0xA1); + const _: () = assert!(HALT != 0x40 && HALT != 0x41 && HALT != 0x42 && HALT != 0x43); + const _: () = assert!(HALT != 0x61); + } +} diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs index cfec009e3..39f075781 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs @@ -833,3 +833,102 @@ impl MshvVm { } } +#[cfg(test)] +#[cfg(feature = "hw-interrupts")] +mod hw_interrupt_tests { + use super::*; + + #[test] + fn write_read_lapic_u32_roundtrip() { + let mut regs = [0i8; 1024]; + write_lapic_u32(&mut regs, 0xF0, 0xDEAD_BEEF); + assert_eq!(read_lapic_u32(®s, 0xF0), 0xDEAD_BEEF); + } + + #[test] + fn write_read_lapic_u32_multiple_offsets() { + let mut regs = [0i8; 1024]; + write_lapic_u32(&mut regs, 0x80, 0x1234_5678); + write_lapic_u32(&mut regs, 0xF0, 0xABCD_EF01); + write_lapic_u32(&mut regs, 0xE0, 0xFFFF_FFFF); + assert_eq!(read_lapic_u32(®s, 0x80), 0x1234_5678); + assert_eq!(read_lapic_u32(®s, 0xF0), 0xABCD_EF01); + assert_eq!(read_lapic_u32(®s, 0xE0), 0xFFFF_FFFF); + } + + #[test] + fn write_read_lapic_u32_zero() { + let mut regs = [0xFFu8 as i8; 1024]; + write_lapic_u32(&mut regs, 0x80, 0); + assert_eq!(read_lapic_u32(®s, 0x80), 0); + } + + #[test] + fn write_read_lapic_u32_does_not_clobber_neighbors() { + let mut regs = [0i8; 1024]; + write_lapic_u32(&mut regs, 0x80, 0xAAAA_BBBB); + // Check that bytes before and after are untouched + assert_eq!(regs[0x7F], 0); + assert_eq!(regs[0x84], 0); + } + + #[test] + fn synic_timer_config_bitfield() { + let vector: u8 = 0x20; + let config = MshvVm::build_stimer_config(vector); + + assert_ne!(config & 1, 0, "enable bit should be set"); + assert_ne!(config & (1 << 1), 0, "periodic bit should be set"); + assert_eq!(config & (1 << 2), 0, "lazy bit should be clear"); + assert_ne!(config & (1 << 3), 0, "auto_enable bit should be set"); + assert_eq!((config >> 4) & 0xFF, 0x20, "apic_vector should be 0x20"); + assert_ne!(config & (1 << 12), 0, "direct_mode bit should be set"); + } + + #[test] + fn synic_timer_config_different_vectors() { + for vector in [0x20u8, 0x30, 0x40, 0xFF] { + let config = MshvVm::build_stimer_config(vector); + assert_eq!( + (config >> 4) & 0xFF, + vector as u64, + "vector mismatch for {:#x}", + vector + ); + } + } + + #[test] + fn timer_count_us_to_100ns() { + assert_eq!(MshvVm::period_us_to_100ns(1000), 10_000); + assert_eq!(MshvVm::period_us_to_100ns(10_000), 100_000); + assert_eq!(MshvVm::period_us_to_100ns(1), 10); + } + + #[test] + fn apic_base_default_value() { + let base = MshvVm::APIC_BASE_DEFAULT; + assert_ne!(base & (1 << 8), 0, "BSP flag should be set"); + assert_ne!(base & (1 << 11), 0, "global enable should be set"); + assert_eq!( + base & 0xFFFFF000, + 0xFEE00000, + "base address should be 0xFEE00000" + ); + } + + #[test] + fn lapic_svr_init_value() { + // SVR = 0x1FF: bit 8 = enable APIC, bits 0-7 = spurious vector 0xFF + let svr: u32 = 0x1FF; + assert_ne!(svr & 0x100, 0, "APIC enable bit should be set"); + assert_eq!(svr & 0xFF, 0xFF, "spurious vector should be 0xFF"); + } + + #[test] + fn lapic_lvt_masked_value() { + // Masked LVT entry: bit 16 = 1 + let masked: u32 = 0x0001_0000; + assert_ne!(masked & (1 << 16), 0, "mask bit should be set"); + } +} diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 697213ab0..0c3e8de85 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -1338,3 +1338,91 @@ unsafe fn try_load_whv_map_gpa_range2() -> windows_result::Result() as u32, + None, + ) + }; + assert!( + result.is_ok(), + "WHvGetCapability(Features) failed: {result:?}" + ); + let raw = unsafe { capability.Features.AsUINT64 }; + let has_lapic = raw & (1 << 1) != 0; // bit 1 = LocalApicEmulation + println!("WHP Features raw: {raw:#018x}"); + println!("WHP LocalApicEmulation supported: {has_lapic}"); + assert!( + has_lapic, + "This host does not support WHP LocalApicEmulation. \ + hw-interrupts LAPIC timer tests will be skipped. \ + Windows 11 22H2+ or a recent Windows Server build is typically required." + ); + } +} diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index 6d53c94c6..2b694be42 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -32,7 +32,7 @@ use alloc::{format, vec}; use core::alloc::Layout; use core::ffi::c_char; use core::hint::black_box; -use core::sync::atomic::{AtomicU64, Ordering}; +use core::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use hyperlight_common::flatbuffer_wrappers::function_call::{FunctionCall, FunctionCallType}; use hyperlight_common::flatbuffer_wrappers::function_types::{ @@ -499,6 +499,157 @@ fn outb_with_port(port: u32, value: u32) { } } +// ============================================================================= +// Hardware timer interrupt test infrastructure +// ============================================================================= + +/// Counter incremented by the timer interrupt handler. +static TIMER_IRQ_COUNT: AtomicU32 = AtomicU32::new(0); + +// Timer IRQ handler (vector 0x20 = IRQ0 after PIC remapping). +// Increments the global counter, sends PIC EOI, and returns from interrupt. +// +// This handler is intentionally minimal: it only touches RAX, uses `lock inc` +// for the atomic counter update, and sends a non-specific EOI to the master PIC. +// +// NOTE: global_asm! on x86_64 in Rust defaults to Intel syntax. +core::arch::global_asm!( + ".globl _timer_irq_handler", + "_timer_irq_handler:", + "push rax", + "lock inc dword ptr [rip + {counter}]", + "mov al, 0x20", + "out 0x20, al", + "pop rax", + "iretq", + counter = sym TIMER_IRQ_COUNT, +); + +unsafe extern "C" { + fn _timer_irq_handler(); +} + +/// IDT pointer structure for SIDT/LIDT instructions. +#[repr(C, packed)] +struct IdtPtr { + limit: u16, + base: u64, +} + +/// Test hardware timer interrupt delivery. +/// +/// This function: +/// 1. Initializes the PIC (remaps IRQ0 to vector 0x20) +/// 2. Installs an IDT entry for vector 0x20 pointing to `_timer_irq_handler` +/// 3. Programs PIT channel 0 as a rate generator at the requested period +/// 4. Arms the PV timer by writing the period to OutBAction::PvTimerConfig port (for MSHV/WHP) +/// 5. Enables interrupts (STI) and busy-waits for timer delivery +/// 6. Disables interrupts (CLI) and returns the interrupt count +/// +/// Parameters: +/// - `period_us`: timer period in microseconds (written to OutBAction::PvTimerConfig port) +/// - `max_spin`: maximum busy-wait iterations before giving up +/// +/// Returns the number of timer interrupts received. +#[guest_function("TestTimerInterrupts")] +fn test_timer_interrupts(period_us: i32, max_spin: i32) -> i32 { + // Reset counter + TIMER_IRQ_COUNT.store(0, Ordering::SeqCst); + + // 1) Initialize PIC — remap IRQ0 to vector 0x20 + unsafe { + // Master PIC + core::arch::asm!("out 0x20, al", in("al") 0x11u8, options(nomem, nostack)); // ICW1 + core::arch::asm!("out 0x21, al", in("al") 0x20u8, options(nomem, nostack)); // ICW2: base 0x20 + core::arch::asm!("out 0x21, al", in("al") 0x04u8, options(nomem, nostack)); // ICW3 + core::arch::asm!("out 0x21, al", in("al") 0x01u8, options(nomem, nostack)); // ICW4 + core::arch::asm!("out 0x21, al", in("al") 0xFEu8, options(nomem, nostack)); // IMR: unmask IRQ0 + + // Slave PIC + core::arch::asm!("out 0xA0, al", in("al") 0x11u8, options(nomem, nostack)); // ICW1 + core::arch::asm!("out 0xA1, al", in("al") 0x28u8, options(nomem, nostack)); // ICW2: base 0x28 + core::arch::asm!("out 0xA1, al", in("al") 0x02u8, options(nomem, nostack)); // ICW3 + core::arch::asm!("out 0xA1, al", in("al") 0x01u8, options(nomem, nostack)); // ICW4 + core::arch::asm!("out 0xA1, al", in("al") 0xFFu8, options(nomem, nostack)); // IMR: mask all + } + + // 2) Install IDT entry for vector 0x20 (timer interrupt) + let handler_addr = _timer_irq_handler as *const () as u64; + + // Read current IDT base via SIDT + let mut idtr = IdtPtr { limit: 0, base: 0 }; + unsafe { + core::arch::asm!( + "sidt [{}]", + in(reg) &mut idtr as *mut IdtPtr, + options(nostack, preserves_flags) + ); + } + + // Write a 16-byte IDT entry at vector 0x20 (offset = 0x20 * 16 = 0x200) + let entry_ptr = (idtr.base as usize + 0x20 * 16) as *mut u8; + unsafe { + // offset_low (bits 0-15 of handler) + core::ptr::write_volatile(entry_ptr as *mut u16, handler_addr as u16); + // selector: 0x08 = kernel code segment + core::ptr::write_volatile(entry_ptr.add(2) as *mut u16, 0x08); + // IST=0, reserved=0 + core::ptr::write_volatile(entry_ptr.add(4), 0); + // type_attr: 0x8E = interrupt gate, present, DPL=0 + core::ptr::write_volatile(entry_ptr.add(5), 0x8E); + // offset_mid (bits 16-31) + core::ptr::write_volatile(entry_ptr.add(6) as *mut u16, (handler_addr >> 16) as u16); + // offset_high (bits 32-63) + core::ptr::write_volatile(entry_ptr.add(8) as *mut u32, (handler_addr >> 32) as u32); + // reserved + core::ptr::write_volatile(entry_ptr.add(12) as *mut u32, 0); + } + + // 3) Program PIT channel 0 as rate generator (mode 2). + // Divisor = period_us * 1_193_182 / 1_000_000 (PIT oscillator is 1.193182 MHz). + // On KVM the in-kernel PIT handles these IO writes directly. + // On MSHV/WHP these ports are silently absorbed (timer is set via OutBAction::PvTimerConfig). + let divisor = ((period_us as u64) * 1_193_182 / 1_000_000).clamp(1, 0xFFFF) as u16; + unsafe { + // Command: channel 0, lobyte/hibyte access, mode 2 (rate generator) + core::arch::asm!("out 0x43, al", in("al") 0x34u8, options(nomem, nostack)); + // Channel 0 data: low byte of divisor + core::arch::asm!("out 0x40, al", in("al") (divisor & 0xFF) as u8, options(nomem, nostack)); + // Channel 0 data: high byte of divisor + core::arch::asm!("out 0x40, al", in("al") (divisor >> 8) as u8, options(nomem, nostack)); + } + + // 4) Arm timer: write period_us to OutBAction::PvTimerConfig port + unsafe { + core::arch::asm!( + "out dx, eax", + in("dx") hyperlight_common::outb::OutBAction::PvTimerConfig as u16, + in("eax") period_us as u32, + options(nomem, nostack, preserves_flags) + ); + } + + // 5) Enable interrupts and wait for at least one timer tick + unsafe { + core::arch::asm!("sti", options(nomem, nostack)); + } + + let max = max_spin as u32; + for _ in 0..max { + if TIMER_IRQ_COUNT.load(Ordering::SeqCst) > 0 { + break; + } + core::hint::spin_loop(); + } + + // 6) Disable interrupts and return count + unsafe { + core::arch::asm!("cli", options(nomem, nostack)); + } + + TIMER_IRQ_COUNT.load(Ordering::SeqCst) as i32 +} + static mut COUNTER: i32 = 0; #[guest_function("AddToStatic")] @@ -823,7 +974,14 @@ fn corrupt_output_size_prefix() -> i32 { buf[12..16].copy_from_slice(&[0u8; 4]); buf[16..24].copy_from_slice(&8_u64.to_le_bytes()); - core::arch::asm!("hlt", options(noreturn)); + core::arch::asm!( + "out dx, eax", + "cli", + "hlt", + in("dx") hyperlight_common::outb::OutBAction::Halt as u16, + in("eax") 0u32, + options(noreturn), + ); } } @@ -839,7 +997,14 @@ fn corrupt_output_back_pointer() -> i32 { buf[8..16].copy_from_slice(&[0u8; 8]); buf[16..24].copy_from_slice(&0xDEAD_u64.to_le_bytes()); - core::arch::asm!("hlt", options(noreturn)); + core::arch::asm!( + "out dx, eax", + "cli", + "hlt", + in("dx") hyperlight_common::outb::OutBAction::Halt as u16, + in("eax") 0u32, + options(noreturn), + ); } } From 9b4ed3a89da2cb3cdaaca1e4a49aa982803c533b Mon Sep 17 00:00:00 2001 From: danbugs Date: Tue, 3 Mar 2026 22:31:18 +0000 Subject: [PATCH 06/37] ci: add hw-interrupts test step to CI and Justfile Signed-off-by: danbugs --- .github/workflows/dep_build_test.yml | 5 +++++ Justfile | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/.github/workflows/dep_build_test.yml b/.github/workflows/dep_build_test.yml index 0c72baa66..e4d64fe75 100644 --- a/.github/workflows/dep_build_test.yml +++ b/.github/workflows/dep_build_test.yml @@ -105,6 +105,11 @@ jobs: # with only one driver enabled (kvm/mshv3 features are unix-only, no-op on Windows) just test ${{ inputs.config }} ${{ inputs.hypervisor == 'mshv3' && 'mshv3' || 'kvm' }} + - name: Run Rust tests with hw-interrupts + run: | + # with hw-interrupts feature enabled (+ explicit driver on Linux) + just test ${{ inputs.config }} ${{ runner.os == 'Linux' && (inputs.hypervisor == 'mshv3' && 'mshv3,hw-interrupts' || 'kvm,hw-interrupts') || 'hw-interrupts' }} + - name: Run Rust Gdb tests env: RUST_LOG: debug diff --git a/Justfile b/Justfile index 1a066e2da..0f03663f4 100644 --- a/Justfile +++ b/Justfile @@ -87,6 +87,9 @@ test-like-ci config=default-target hypervisor="kvm": @# with only one driver enabled + build-metadata just test {{config}} build-metadata,{{ if hypervisor == "mshv3" {"mshv3"} else {"kvm"} }} + @# with hw-interrupts enabled (+ explicit driver on Linux) + {{ if os() == "linux" { if hypervisor == "mshv3" { "just test " + config + " mshv3,hw-interrupts" } else { "just test " + config + " kvm,hw-interrupts" } } else { "just test " + config + " hw-interrupts" } }} + @# make sure certain cargo features compile just check @@ -151,6 +154,9 @@ build-test-like-ci config=default-target hypervisor="kvm": @# Run Rust tests with single driver {{ if os() == "linux" { "just test " + config+ " " + if hypervisor == "mshv3" { "mshv3" } else { "kvm" } } else { "" } }} + @# Run Rust tests with hw-interrupts + {{ if os() == "linux" { if hypervisor == "mshv3" { "just test " + config + " mshv3,hw-interrupts" } else { "just test " + config + " kvm,hw-interrupts" } } else { "just test " + config + " hw-interrupts" } }} + @# Run Rust Gdb tests just test-rust-gdb-debugging {{config}} @@ -286,6 +292,7 @@ check: {{ cargo-cmd }} check -p hyperlight-host --features trace_guest,mem_profile {{ target-triple-flag }} {{ cargo-cmd }} check -p hyperlight-host --features nanvix-unstable {{ target-triple-flag }} {{ cargo-cmd }} check -p hyperlight-host --features nanvix-unstable,executable_heap {{ target-triple-flag }} + {{ cargo-cmd }} check -p hyperlight-host --features hw-interrupts {{ target-triple-flag }} fmt-check: (ensure-nightly-fmt) cargo +nightly fmt --all -- --check From 7a0da2277d610a4f2466790960898f2f34e9111a Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 4 Mar 2026 01:14:39 +0000 Subject: [PATCH 07/37] fix: add halt port IO write and restore hw_timer_interrupts test Add Halt outb (port 108) before cli/hlt in guest init and dummyguest so KVM's in-kernel LAPIC does not absorb the HLT exit. Also restore the hw_timer_interrupts integration test that was inadvertently dropped. Signed-off-by: danbugs --- .../src/arch/amd64/init.rs | 3 +++ src/hyperlight_host/tests/integration_test.rs | 25 +++++++++++++++++++ src/tests/rust_guests/dummyguest/src/main.rs | 10 +++++++- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/hyperlight_guest_bin/src/arch/amd64/init.rs b/src/hyperlight_guest_bin/src/arch/amd64/init.rs index 09f3c4775..fa8b86c4d 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/init.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/init.rs @@ -173,6 +173,9 @@ core::arch::global_asm!(" mov rsp, r8\n xor ebp, ebp\n call {generic_init}\n + mov dx, 108\n + out dx, al\n + cli\n hlt\n .cfi_endproc\n ", generic_init = sym crate::generic_init); diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index a6e71a24d..879e92f14 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -1848,3 +1848,28 @@ fn memory_region_types_are_publicly_accessible() { }; } } + +/// Test that hardware timer interrupts are delivered to the guest via OutBAction::PvTimerConfig port. +/// +/// The guest function `TestTimerInterrupts`: +/// 1. Initializes the PIC (IRQ0 -> vector 0x20) +/// 2. Installs an IDT entry for vector 0x20 +/// 3. Arms the timer via OutBAction::PvTimerConfig port +/// 4. Enables interrupts and busy-waits +/// 5. Returns the number of timer interrupts received +/// +/// Requires the `hw-interrupts` feature on the host side. +#[test] +#[cfg(feature = "hw-interrupts")] +fn hw_timer_interrupts() { + with_rust_sandbox(|mut sbox| { + // 1ms period, up to 100M spin iterations (should fire well before that) + let count: i32 = sbox + .call("TestTimerInterrupts", (1000_i32, 100_000_000_i32)) + .unwrap(); + assert!( + count > 0, + "Expected at least one timer interrupt, got {count}" + ); + }); +} diff --git a/src/tests/rust_guests/dummyguest/src/main.rs b/src/tests/rust_guests/dummyguest/src/main.rs index 01063dbdb..2d9db910e 100644 --- a/src/tests/rust_guests/dummyguest/src/main.rs +++ b/src/tests/rust_guests/dummyguest/src/main.rs @@ -30,8 +30,16 @@ fn panic(_info: &PanicInfo) -> ! { } fn halt() { + // OutBAction::Halt = 108; using raw constant to avoid pulling in + // anyhow (via hyperlight_common's TryFrom impl) which requires alloc. unsafe { - asm!("hlt"); + asm!( + "out dx, eax", + "cli", + "hlt", + in("dx") 108u16, + in("eax") 0u32, + ); } } From 4b10e0607dba3400e911160f1265161403c8b850 Mon Sep 17 00:00:00 2001 From: danbugs Date: Thu, 5 Mar 2026 01:30:50 +0000 Subject: [PATCH 08/37] experiment: replace in-kernel PIT with irqfd + host timer thread Replace create_pit2() with a host-side timer thread that fires an EventFd registered via register_irqfd for GSI 0 (IRQ0). The in-kernel IRQ chip (PIC + IOAPIC + LAPIC) is kept for proper interrupt routing. When the guest writes to PvTimerConfig (port 107), the host parses the requested period and spawns a timer thread that periodically writes to the EventFd. Guest PIT port writes (0x40-0x43) exit to userspace and are silently ignored. Tested with nanvix hello-c.elf: works correctly. Signed-off-by: danbugs --- .../hypervisor/virtual_machine/kvm/x86_64.rs | 121 +++++++++++++++--- 1 file changed, 103 insertions(+), 18 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index fa6506dc8..50d02e12b 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -15,12 +15,14 @@ limitations under the License. */ use std::sync::LazyLock; +#[cfg(feature = "hw-interrupts")] +use std::sync::atomic::{AtomicBool, Ordering}; +#[cfg(feature = "hw-interrupts")] +use std::sync::Arc; use hyperlight_common::outb::OutBAction; #[cfg(gdb)] use kvm_bindings::kvm_guest_debug; -#[cfg(feature = "hw-interrupts")] -use kvm_bindings::{KVM_PIT_SPEAKER_DUMMY, kvm_pit_config}; use kvm_bindings::{ kvm_debugregs, kvm_fpu, kvm_regs, kvm_sregs, kvm_userspace_memory_region, kvm_xsave, }; @@ -29,6 +31,8 @@ use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd}; use tracing::{Span, instrument}; #[cfg(feature = "trace_guest")] use tracing_opentelemetry::OpenTelemetrySpanExt; +#[cfg(feature = "hw-interrupts")] +use vmm_sys_util::eventfd::EventFd; #[cfg(gdb)] use crate::hypervisor::gdb::{DebugError, DebuggableVm}; @@ -87,16 +91,35 @@ pub(crate) fn is_hypervisor_present() -> bool { } /// A KVM implementation of a single-vcpu VM -#[derive(Debug)] pub(crate) struct KvmVm { vm_fd: VmFd, vcpu_fd: VcpuFd, + /// EventFd registered via irqfd for GSI 0 (IRQ0). A timer thread + /// writes to this to inject periodic timer interrupts. + #[cfg(feature = "hw-interrupts")] + timer_irq_eventfd: EventFd, + /// Signals the timer thread to stop. + #[cfg(feature = "hw-interrupts")] + timer_stop: Arc, + /// Handle to the background timer thread (if started). + #[cfg(feature = "hw-interrupts")] + timer_thread: Option>, + // KVM, as opposed to mshv/whp, has no get_guest_debug() ioctl, so we must track the state ourselves #[cfg(gdb)] debug_regs: kvm_guest_debug, } +impl std::fmt::Debug for KvmVm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("KvmVm") + .field("vm_fd", &self.vm_fd) + .field("vcpu_fd", &self.vcpu_fd) + .finish_non_exhaustive() + } +} + static KVM: LazyLock> = LazyLock::new(|| Kvm::new().map_err(|e| CreateVmError::HypervisorNotAvailable(e.into()))); @@ -111,23 +134,35 @@ impl KvmVm { .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; // When hw-interrupts is enabled, create the in-kernel IRQ chip - // (PIC + IOAPIC + LAPIC) and PIT before creating the vCPU so the + // (PIC + IOAPIC + LAPIC) before creating the vCPU so the // per-vCPU LAPIC is initialised in virtual-wire mode (LINT0 = ExtINT). - // The guest programs the PIC remap and PIT frequency via standard - // IO port writes, which the in-kernel devices handle transparently. + // The guest programs the PIC remap via standard IO port writes, + // which the in-kernel PIC handles transparently. + // + // Instead of creating an in-kernel PIT (create_pit2), we use a + // host-side timer thread + irqfd to inject IRQ0 at the rate + // requested by the guest via OutBAction::PvTimerConfig (port 107). + // This eliminates the in-kernel PIT device. Guest PIT port writes + // (0x40, 0x43) become no-ops handled in the run loop. #[cfg(feature = "hw-interrupts")] - { + let timer_irq_eventfd = { vm_fd .create_irq_chip() .map_err(|e| CreateVmError::InitializeVm(e.into()))?; - let pit_config = kvm_pit_config { - flags: KVM_PIT_SPEAKER_DUMMY, - ..Default::default() - }; + + // Create an EventFd and register it via irqfd for GSI 0 (IRQ0). + // When the timer thread writes to this EventFd, the in-kernel + // PIC will assert IRQ0, which is delivered as the vector the + // guest configured during PIC remap (typically vector 0x20). + let eventfd = EventFd::new(0) + .map_err(|e| CreateVmError::InitializeVm( + kvm_ioctls::Error::new(e.raw_os_error().unwrap_or(libc::EIO)).into() + ))?; vm_fd - .create_pit2(pit_config) + .register_irqfd(&eventfd, 0) .map_err(|e| CreateVmError::InitializeVm(e.into()))?; - } + eventfd + }; let vcpu_fd = vm_fd .create_vcpu(0) @@ -155,6 +190,12 @@ impl KvmVm { Ok(Self { vm_fd, vcpu_fd, + #[cfg(feature = "hw-interrupts")] + timer_irq_eventfd, + #[cfg(feature = "hw-interrupts")] + timer_stop: Arc::new(AtomicBool::new(false)), + #[cfg(feature = "hw-interrupts")] + timer_thread: None, #[cfg(gdb)] debug_regs: kvm_guest_debug::default(), }) @@ -195,9 +236,11 @@ impl VirtualMachine for KvmVm { #[cfg(feature = "trace_guest")] tc.setup_guest_trace(Span::current().context()); - // When hw-interrupts is enabled, the in-kernel PIC + PIT + LAPIC handle - // timer interrupts natively. The guest signals "I'm done" by writing to - // OutBAction::Halt (an IO port exit) instead of using HLT, because the in-kernel + // When hw-interrupts is enabled, the in-kernel PIC + LAPIC deliver + // interrupts triggered by the host-side timer thread via irqfd. + // There is no in-kernel PIT; guest PIT port writes are no-ops. + // The guest signals "I'm done" by writing to OutBAction::Halt + // (an IO port exit) instead of using HLT, because the in-kernel // LAPIC absorbs HLT (never returns VcpuExit::Hlt to userspace). #[cfg(feature = "hw-interrupts")] loop { @@ -209,11 +252,43 @@ impl VirtualMachine for KvmVm { } Ok(VcpuExit::IoOut(port, data)) => { if port == OutBAction::Halt as u16 { + // Stop the timer thread before returning. + self.timer_stop.store(true, Ordering::Relaxed); + if let Some(h) = self.timer_thread.take() { + let _ = h.join(); + } return Ok(VmExit::Halt()); } if port == OutBAction::PvTimerConfig as u16 { - // Ignore: the in-kernel PIT handles timer scheduling. - // The guest writes here for MSHV/WHP compatibility. + // The guest is configuring the timer period. + // Extract the period in microseconds (LE u32). + if data.len() >= 4 { + let period_us = u32::from_le_bytes( + data[..4].try_into().unwrap(), + ) as u64; + if period_us > 0 && self.timer_thread.is_none() { + let eventfd = self + .timer_irq_eventfd + .try_clone() + .expect("failed to clone timer EventFd"); + let stop = self.timer_stop.clone(); + let period = std::time::Duration::from_micros(period_us); + self.timer_thread = Some(std::thread::spawn(move || { + while !stop.load(Ordering::Relaxed) { + std::thread::sleep(period); + if stop.load(Ordering::Relaxed) { + break; + } + let _ = eventfd.write(1); + } + })); + } + } + continue; + } + // PIT ports (0x40-0x43): no in-kernel PIT, so these + // exit to userspace. Silently ignore them. + if (0x40..=0x43).contains(&port) { continue; } return Ok(VmExit::IoOut(port, data.to_vec())); @@ -501,6 +576,16 @@ impl DebuggableVm for KvmVm { } } +#[cfg(feature = "hw-interrupts")] +impl Drop for KvmVm { + fn drop(&mut self) { + self.timer_stop.store(true, Ordering::Relaxed); + if let Some(h) = self.timer_thread.take() { + let _ = h.join(); + } + } +} + #[cfg(test)] #[cfg(feature = "hw-interrupts")] mod hw_interrupt_tests { From f6e86a9ccab597dda11132274205e0bbb2d18754 Mon Sep 17 00:00:00 2001 From: danbugs Date: Thu, 5 Mar 2026 03:45:00 +0000 Subject: [PATCH 09/37] =?UTF-8?q?experiment:=20MSHV=20=E2=80=94=20replace?= =?UTF-8?q?=20SynIC=20timer=20with=20request=5Fvirtual=5Finterrupt=20+=20h?= =?UTF-8?q?ost=20timer=20thread?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the SynIC-based timer (STIMER0_CONFIG/COUNT + SCONTROL) with a host timer thread that periodically calls request_virtual_interrupt() (HVCALL_ASSERT_VIRTUAL_INTERRUPT, hypercall 148). This makes MSHV consistent with the KVM irqfd and WHP software timer patterns — all three platforms now use: (1) start timer on PvTimerConfig, (2) host thread periodically injects interrupt, (3) stop on Halt/Drop. Changes: - Remove SynIC dependency: make_default_synthetic_features_mask, SCONTROL, STIMER0_CONFIG, STIMER0_COUNT imports and usage - Set synthetic_proc_features to 0 (no SynIC features needed) - Wrap vm_fd in Arc for thread-safe sharing - Add timer_stop + timer_thread fields (same pattern as KVM) - PvTimerConfig handler spawns timer thread with request_virtual_interrupt - Add Drop impl for timer cleanup - Remove build_stimer_config() and period_us_to_100ns() helpers - Remove SynIC-related unit tests Kept unchanged: LAPIC init, MSR intercept, PIC emulation, EOI bridging (all still required for interrupt delivery). NOT RUNTIME TESTED — MSHV not available on this system. Signed-off-by: danbugs --- .../hypervisor/virtual_machine/mshv/x86_64.rs | 202 ++++++++---------- 1 file changed, 94 insertions(+), 108 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs index 39f075781..1da68795b 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs @@ -16,7 +16,11 @@ limitations under the License. #[cfg(gdb)] use std::fmt::Debug; +#[cfg(feature = "hw-interrupts")] +use std::sync::Arc; use std::sync::LazyLock; +#[cfg(feature = "hw-interrupts")] +use std::sync::atomic::{AtomicBool, Ordering}; use hyperlight_common::outb::OutBAction; #[cfg(feature = "hw-interrupts")] @@ -39,11 +43,10 @@ use mshv_bindings::{ }; #[cfg(feature = "hw-interrupts")] use mshv_bindings::{ - hv_register_name_HV_REGISTER_SCONTROL, hv_register_name_HV_REGISTER_STIMER0_CONFIG, - hv_register_name_HV_REGISTER_STIMER0_COUNT, hv_register_name_HV_X64_REGISTER_RAX, + hv_interrupt_type_HV_X64_INTERRUPT_TYPE_FIXED, hv_register_name_HV_X64_REGISTER_RAX, }; #[cfg(feature = "hw-interrupts")] -use mshv_ioctls::make_default_synthetic_features_mask; +use mshv_ioctls::InterruptRequest; use mshv_ioctls::{Mshv, VcpuFd, VmFd}; use tracing::{Span, instrument}; #[cfg(feature = "trace_guest")] @@ -81,14 +84,31 @@ pub(crate) fn is_hypervisor_present() -> bool { } /// A MSHV implementation of a single-vcpu VM -#[derive(Debug)] pub(crate) struct MshvVm { + /// VmFd wrapped in Arc so the timer thread can call + /// `request_virtual_interrupt` from a background thread. + #[cfg(feature = "hw-interrupts")] + vm_fd: Arc, + #[cfg(not(feature = "hw-interrupts"))] vm_fd: VmFd, vcpu_fd: VcpuFd, #[cfg(feature = "hw-interrupts")] pic: Pic, + /// Signals the timer thread to stop. #[cfg(feature = "hw-interrupts")] - synic_timer_active: bool, + timer_stop: Arc, + /// Handle to the background timer thread (if started). + #[cfg(feature = "hw-interrupts")] + timer_thread: Option>, +} + +impl std::fmt::Debug for MshvVm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("MshvVm") + .field("vm_fd", &self.vm_fd) + .field("vcpu_fd", &self.vcpu_fd) + .finish_non_exhaustive() + } } static MSHV: LazyLock> = @@ -123,8 +143,8 @@ impl MshvVm { #[allow(unused_mut)] let mut pr: mshv_create_partition_v2 = Default::default(); - // Enable LAPIC for hw-interrupts — required for SynIC direct-mode - // timer delivery via APIC vector. MSHV_PT_BIT_LAPIC = bit 0. + // Enable LAPIC for hw-interrupts — required for interrupt delivery + // via request_virtual_interrupt. MSHV_PT_BIT_LAPIC = bit 0. #[cfg(feature = "hw-interrupts")] { pr.pt_flags = 1u64; // LAPIC @@ -134,12 +154,11 @@ impl MshvVm { .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; let vcpu_fd = { - // Use the full default synthetic features mask (includes SynIC, - // synthetic timers, direct mode, hypercall regs, etc.) when - // hw-interrupts is enabled. Without hw-interrupts, leave all - // features at zero. + // No synthetic features needed — timer interrupts are injected + // directly via request_virtual_interrupt(), not through SynIC. + // The LAPIC (enabled via pt_flags) handles interrupt delivery. #[cfg(feature = "hw-interrupts")] - let feature_val = make_default_synthetic_features_mask(); + let feature_val = 0u64; #[cfg(not(feature = "hw-interrupts"))] let feature_val = 0u64; @@ -161,7 +180,7 @@ impl MshvVm { // Initialize the virtual LAPIC when hw-interrupts is enabled. // LAPIC defaults to disabled (SVR bit 8 = 0), which means no APIC - // interrupts (including SynIC direct-mode timer) can be delivered. + // interrupts can be delivered (request_virtual_interrupt would fail). #[cfg(feature = "hw-interrupts")] { let mut lapic: LapicState = vcpu_fd @@ -181,7 +200,7 @@ impl MshvVm { write_lapic_u32(&mut lapic.regs, 0x350, 0x0001_0000); // LINT1 (offset 0x360): NMI delivery, not masked write_lapic_u32(&mut lapic.regs, 0x360, 0x400); - // LVT Timer (offset 0x320): masked — we use SynIC timer instead + // LVT Timer (offset 0x320): masked — we use host timer thread instead write_lapic_u32(&mut lapic.regs, 0x320, 0x0001_0000); // LVT Error (offset 0x370): masked write_lapic_u32(&mut lapic.regs, 0x370, 0x0001_0000); @@ -193,7 +212,7 @@ impl MshvVm { // Install MSR intercept for IA32_APIC_BASE (MSR 0x1B) to prevent // the guest from globally disabling the LAPIC. The Nanvix kernel // disables the APIC when no I/O APIC is detected, but we need - // the LAPIC enabled for SynIC direct-mode timer delivery. + // the LAPIC enabled for request_virtual_interrupt delivery. // // This may fail with AccessDenied on some kernel versions; in // that case we fall back to re-enabling the LAPIC in the timer @@ -208,12 +227,17 @@ impl MshvVm { } Ok(Self { + #[cfg(feature = "hw-interrupts")] + vm_fd: Arc::new(vm_fd), + #[cfg(not(feature = "hw-interrupts"))] vm_fd, vcpu_fd, #[cfg(feature = "hw-interrupts")] pic: Pic::new(), #[cfg(feature = "hw-interrupts")] - synic_timer_active: false, + timer_stop: Arc::new(AtomicBool::new(false)), + #[cfg(feature = "hw-interrupts")] + timer_thread: None, }) } } @@ -267,12 +291,12 @@ impl VirtualMachine for MshvVm { let msg_type = m.header.message_type; match msg_type { HALT_MESSAGE => { - // With SynIC timer active, re-enter the guest. + // With timer thread active, re-enter the guest. // The hypervisor will deliver pending timer // interrupts on the next run(), waking the // vCPU from HLT. #[cfg(feature = "hw-interrupts")] - if self.synic_timer_active { + if self.timer_thread.is_some() { continue; } return Ok(VmExit::Halt()); @@ -301,6 +325,14 @@ impl VirtualMachine for MshvVm { // OutBAction::Halt always means "I'm done", regardless // of whether a timer is active. if is_write && port_number == OutBAction::Halt as u16 { + // Stop the timer thread before returning. + #[cfg(feature = "hw-interrupts")] + { + self.timer_stop.store(true, Ordering::Relaxed); + if let Some(h) = self.timer_thread.take() { + let _ = h.join(); + } + } return Ok(VmExit::Halt()); } @@ -411,12 +443,12 @@ impl VirtualMachine for MshvVm { } Err(e) => match e.errno() { libc::EINTR => { - // When the SynIC timer is active, EINTR may be + // When the timer thread is active, EINTR may be // a spurious signal. Continue the run loop to // let the hypervisor deliver any pending timer // interrupt. #[cfg(feature = "hw-interrupts")] - if self.synic_timer_active { + if self.timer_thread.is_some() { continue; } return Ok(VmExit::Cancelled()); @@ -684,8 +716,9 @@ impl MshvVm { const APIC_BASE_DEFAULT: u64 = 0xFEE00900; /// Perform LAPIC EOI: clear the highest-priority in-service bit. - /// Called when the guest sends PIC EOI, since SynIC direct-mode timer - /// delivers through the LAPIC and the guest only acknowledges via PIC. + /// Called when the guest sends PIC EOI, since the timer thread + /// delivers interrupts through the LAPIC and the guest only + /// acknowledges via PIC. fn do_lapic_eoi(&self) { if let Ok(mut lapic) = self.vcpu_fd.get_lapic() { // ISR is at offset 0x100, 8 x 32-bit words (one per 16 bytes). @@ -717,33 +750,11 @@ impl MshvVm { None } - /// Build SynIC timer config register value. - /// - /// Layout (from Hyper-V TLFS): - /// - Bit 0: enable - /// - Bit 1: periodic - /// - Bit 3: auto_enable - /// - Bits 4-11: apic_vector - /// - Bit 12: direct_mode - fn build_stimer_config(vector: u8) -> u64 { - 1 // enable - | (1 << 1) // periodic - | (1 << 3) // auto_enable - | ((vector as u64) << 4) // apic_vector - | (1 << 12) // direct_mode - } - - /// Convert a timer period in microseconds to the SynIC timer count - /// in 100-nanosecond units (Hyper-V reference time). - fn period_us_to_100ns(period_us: u32) -> u64 { - (period_us as u64) * 10 - } - fn handle_hw_io_out(&mut self, port: u16, data: &[u8]) -> bool { if port == OutBAction::PvTimerConfig as u16 { if data.len() >= 4 { let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); - if period_us > 0 { + if period_us > 0 && self.timer_thread.is_none() { // Re-enable LAPIC if the guest disabled it (via WRMSR // to MSR 0x1B clearing bit 11). This happens when the // Nanvix kernel doesn't detect an I/O APIC. @@ -780,42 +791,40 @@ impl MshvVm { } } - // Enable SynIC on this vCPU (SCONTROL bit 0 = enable) - let _ = self.vcpu_fd.set_reg(&[hv_register_assoc { - name: hv_register_name_HV_REGISTER_SCONTROL, - value: hv_register_value { reg64: 1 }, - ..Default::default() - }]); - - // Arm STIMER0 immediately (the guest needs timer - // interrupts for thread preemption, not just idle - // wakeup). - let vector = self.pic.master_vector_base(); - let config = Self::build_stimer_config(vector); - let count_100ns = Self::period_us_to_100ns(period_us); - - let _ = self.vcpu_fd.set_reg(&[ - hv_register_assoc { - name: hv_register_name_HV_REGISTER_STIMER0_CONFIG, - value: hv_register_value { reg64: config }, - ..Default::default() - }, - hv_register_assoc { - name: hv_register_name_HV_REGISTER_STIMER0_COUNT, - value: hv_register_value { reg64: count_100ns }, - ..Default::default() - }, - ]); - self.synic_timer_active = true; + // Start a host timer thread that periodically injects + // interrupts via request_virtual_interrupt (HVCALL 148). + // This replaces the SynIC timer approach and makes MSHV + // consistent with the KVM irqfd and WHP software timer + // patterns. + let vm_fd = self.vm_fd.clone(); + let vector = self.pic.master_vector_base() as u32; + let stop = self.timer_stop.clone(); + let period = std::time::Duration::from_micros(period_us as u64); + self.timer_thread = Some(std::thread::spawn(move || { + while !stop.load(Ordering::Relaxed) { + std::thread::sleep(period); + if stop.load(Ordering::Relaxed) { + break; + } + let _ = vm_fd.request_virtual_interrupt(&InterruptRequest { + interrupt_type: hv_interrupt_type_HV_X64_INTERRUPT_TYPE_FIXED, + apic_id: 0, + vector, + level_triggered: false, + logical_destination_mode: false, + long_mode: false, + }); + } + })); } } return true; } if !data.is_empty() && self.pic.handle_io_out(port, data[0]) { // When the guest sends PIC EOI (port 0x20, OCW2 non-specific EOI), - // also perform LAPIC EOI since SynIC timer delivers via LAPIC and - // the guest only knows about the PIC. - if port == 0x20 && (data[0] & 0xE0) == 0x20 && self.synic_timer_active { + // also perform LAPIC EOI since the timer thread delivers via LAPIC + // and the guest only acknowledges via PIC. + if port == 0x20 && (data[0] & 0xE0) == 0x20 && self.timer_thread.is_some() { self.do_lapic_eoi(); } return true; @@ -833,6 +842,16 @@ impl MshvVm { } } +#[cfg(feature = "hw-interrupts")] +impl Drop for MshvVm { + fn drop(&mut self) { + self.timer_stop.store(true, Ordering::Relaxed); + if let Some(h) = self.timer_thread.take() { + let _ = h.join(); + } + } +} + #[cfg(test)] #[cfg(feature = "hw-interrupts")] mod hw_interrupt_tests { @@ -872,39 +891,6 @@ mod hw_interrupt_tests { assert_eq!(regs[0x84], 0); } - #[test] - fn synic_timer_config_bitfield() { - let vector: u8 = 0x20; - let config = MshvVm::build_stimer_config(vector); - - assert_ne!(config & 1, 0, "enable bit should be set"); - assert_ne!(config & (1 << 1), 0, "periodic bit should be set"); - assert_eq!(config & (1 << 2), 0, "lazy bit should be clear"); - assert_ne!(config & (1 << 3), 0, "auto_enable bit should be set"); - assert_eq!((config >> 4) & 0xFF, 0x20, "apic_vector should be 0x20"); - assert_ne!(config & (1 << 12), 0, "direct_mode bit should be set"); - } - - #[test] - fn synic_timer_config_different_vectors() { - for vector in [0x20u8, 0x30, 0x40, 0xFF] { - let config = MshvVm::build_stimer_config(vector); - assert_eq!( - (config >> 4) & 0xFF, - vector as u64, - "vector mismatch for {:#x}", - vector - ); - } - } - - #[test] - fn timer_count_us_to_100ns() { - assert_eq!(MshvVm::period_us_to_100ns(1000), 10_000); - assert_eq!(MshvVm::period_us_to_100ns(10_000), 100_000); - assert_eq!(MshvVm::period_us_to_100ns(1), 10); - } - #[test] fn apic_base_default_value() { let base = MshvVm::APIC_BASE_DEFAULT; From 4a58433fd9ef3a96db74b62ee3d40490b7be8562 Mon Sep 17 00:00:00 2001 From: danbugs Date: Thu, 5 Mar 2026 06:45:10 +0000 Subject: [PATCH 10/37] =?UTF-8?q?experiment:=20eliminate=20PIC=20state=20m?= =?UTF-8?q?achine=20=E2=80=94=20hardcode=20vector=200x20,=20no-op=20PIC=20?= =?UTF-8?q?ports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the Pic struct from both MSHV and WHP. The nanvix guest always remaps IRQ0 to vector 0x20 via PIC ICW2, so the host can hardcode the timer vector instead of tracking the ICW initialization sequence. PIC ports (0x20/0x21/0xA0/0xA1) are now accepted as no-ops. The only PIC-related logic retained is LAPIC EOI bridging: when the guest sends a non-specific EOI on port 0x20, the host clears the LAPIC ISR bit. This is 3 lines of logic vs the ~130-line pic.rs state machine. This addresses the PR 1272 reviewer concern about emulating "a good fraction of the IC controller" — the PIC state machine was the unnecessary complexity, not the timer delivery mechanism. Signed-off-by: danbugs --- .../hypervisor/virtual_machine/mshv/x86_64.rs | 50 +++++++++++-------- .../src/hypervisor/virtual_machine/whp.rs | 50 +++++++++++-------- 2 files changed, 58 insertions(+), 42 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs index 1da68795b..205caa346 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs @@ -54,8 +54,6 @@ use tracing_opentelemetry::OpenTelemetrySpanExt; #[cfg(gdb)] use crate::hypervisor::gdb::{DebugError, DebuggableVm}; -#[cfg(feature = "hw-interrupts")] -use crate::hypervisor::pic::Pic; use crate::hypervisor::regs::{ CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, FP_CONTROL_WORD_DEFAULT, MXCSR_DEFAULT, @@ -92,8 +90,6 @@ pub(crate) struct MshvVm { #[cfg(not(feature = "hw-interrupts"))] vm_fd: VmFd, vcpu_fd: VcpuFd, - #[cfg(feature = "hw-interrupts")] - pic: Pic, /// Signals the timer thread to stop. #[cfg(feature = "hw-interrupts")] timer_stop: Arc, @@ -233,8 +229,6 @@ impl MshvVm { vm_fd, vcpu_fd, #[cfg(feature = "hw-interrupts")] - pic: Pic::new(), - #[cfg(feature = "hw-interrupts")] timer_stop: Arc::new(AtomicBool::new(false)), #[cfg(feature = "hw-interrupts")] timer_thread: None, @@ -715,6 +709,11 @@ impl MshvVm { /// BSP flag (bit 8) + global enable (bit 11). const APIC_BASE_DEFAULT: u64 = 0xFEE00900; + /// Hardcoded timer interrupt vector. The nanvix guest always + /// remaps IRQ0 to vector 0x20 via PIC ICW2, so we use that same + /// vector directly — no PIC state machine needed. + const TIMER_VECTOR: u32 = 0x20; + /// Perform LAPIC EOI: clear the highest-priority in-service bit. /// Called when the guest sends PIC EOI, since the timer thread /// delivers interrupts through the LAPIC and the guest only @@ -737,17 +736,21 @@ impl MshvVm { } /// Handle a hardware-interrupt IO IN request. - /// Returns `Some(value)` if the port was handled (PIC or PIT read), - /// `None` if the port should be passed through to the guest handler. + /// Returns `Some(value)` if the port was handled, `None` if the + /// port should be passed through to the guest handler. + /// + /// No PIC state machine — PIC data ports return 0xFF (all masked), + /// PIC command ports return 0 (no pending IRQ), PIT returns 0. fn handle_hw_io_in(&self, port: u16) -> Option { - if let Some(val) = self.pic.handle_io_in(port) { - return Some(val as u64); - } - // PIT data port read -- return 0 - if port == 0x40 { - return Some(0); + match port { + // PIC master/slave data ports — return "all masked" + 0x21 | 0xA1 => Some(0xFF), + // PIC master/slave command ports — return 0 (ISR/IRR read) + 0x20 | 0xA0 => Some(0), + // PIT data port read — return 0 + 0x40 => Some(0), + _ => None, } - None } fn handle_hw_io_out(&mut self, port: u16, data: &[u8]) -> bool { @@ -797,7 +800,7 @@ impl MshvVm { // consistent with the KVM irqfd and WHP software timer // patterns. let vm_fd = self.vm_fd.clone(); - let vector = self.pic.master_vector_base() as u32; + let vector = Self::TIMER_VECTOR; let stop = self.timer_stop.clone(); let period = std::time::Duration::from_micros(period_us as u64); self.timer_thread = Some(std::thread::spawn(move || { @@ -820,11 +823,16 @@ impl MshvVm { } return true; } - if !data.is_empty() && self.pic.handle_io_out(port, data[0]) { - // When the guest sends PIC EOI (port 0x20, OCW2 non-specific EOI), - // also perform LAPIC EOI since the timer thread delivers via LAPIC - // and the guest only acknowledges via PIC. - if port == 0x20 && (data[0] & 0xE0) == 0x20 && self.timer_thread.is_some() { + // PIC ports (0x20, 0x21, 0xA0, 0xA1): accept as no-ops. + // No PIC state machine — we only need LAPIC EOI bridging when + // the guest sends a non-specific EOI on the master PIC command + // port (0x20, OCW2 byte with bits 7:5 = 001). + if port == 0x20 || port == 0x21 || port == 0xA0 || port == 0xA1 { + if port == 0x20 + && !data.is_empty() + && (data[0] & 0xE0) == 0x20 + && self.timer_thread.is_some() + { self.do_lapic_eoi(); } return true; diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 0c3e8de85..36a9f151a 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -34,8 +34,6 @@ use windows_result::HRESULT; #[cfg(gdb)] use crate::hypervisor::gdb::{DebugError, DebuggableVm}; -#[cfg(feature = "hw-interrupts")] -use crate::hypervisor::pic::Pic; use crate::hypervisor::regs::{ Align16, CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, FP_CONTROL_WORD_DEFAULT, MXCSR_DEFAULT, WHP_DEBUG_REGS_NAMES, WHP_DEBUG_REGS_NAMES_LEN, @@ -100,8 +98,6 @@ pub(crate) struct WhpVm { /// Tracks host-side file mappings (view_base, mapping_handle) for /// cleanup on unmap or drop. Only populated for MappedFile regions. file_mappings: Vec<(HandleWrapper, *mut c_void)>, - #[cfg(feature = "hw-interrupts")] - pic: Pic, /// Whether the WHP host supports LAPIC emulation mode. #[cfg(feature = "hw-interrupts")] lapic_emulation: bool, @@ -273,8 +269,6 @@ impl WhpVm { surrogate_process, file_mappings: Vec::new(), #[cfg(feature = "hw-interrupts")] - pic: Pic::new(), - #[cfg(feature = "hw-interrupts")] lapic_emulation, #[cfg(feature = "hw-interrupts")] lapic_timer_active: false, @@ -1126,6 +1120,11 @@ impl DebuggableVm for WhpVm { #[cfg(feature = "hw-interrupts")] impl WhpVm { + /// Hardcoded timer interrupt vector. The nanvix guest always + /// remaps IRQ0 to vector 0x20 via PIC ICW2, so we use that same + /// vector directly — no PIC state machine needed. + const TIMER_VECTOR: u32 = 0x20; + /// Get the LAPIC state via the bulk interrupt-controller state API. fn get_lapic_state(&self) -> windows_result::Result> { let mut state = vec![0u8; Self::LAPIC_STATE_MAX_SIZE as usize]; @@ -1177,17 +1176,21 @@ impl WhpVm { } /// Handle a hardware-interrupt IO IN request. - /// Returns `Some(value)` if the port was handled (PIC or PIT read), - /// `None` if the port should be passed through to the guest handler. + /// Returns `Some(value)` if the port was handled, `None` if the + /// port should be passed through to the guest handler. + /// + /// No PIC state machine — PIC data ports return 0xFF (all masked), + /// PIC command ports return 0 (no pending IRQ), PIT returns 0. fn handle_hw_io_in(&self, port: u16) -> Option { - if let Some(val) = self.pic.handle_io_in(port) { - return Some(val as u64); - } - // PIT data port read -- return 0 - if port == 0x40 { - return Some(0); + match port { + // PIC master/slave data ports — return "all masked" + 0x21 | 0xA1 => Some(0xFF), + // PIC master/slave command ports — return 0 (ISR/IRR read) + 0x20 | 0xA0 => Some(0), + // PIT data port read — return 0 + 0x40 => Some(0), + _ => None, } - None } /// Stop the software timer thread if running. @@ -1219,7 +1222,7 @@ impl WhpVm { // host-side software timer thread that periodically // injects interrupts via WHvRequestInterrupt. let partition_raw = self.partition.0; - let vector = self.pic.master_vector_base() as u32; + let vector = Self::TIMER_VECTOR; let stop = Arc::new(AtomicBool::new(false)); let stop_clone = stop.clone(); let period = std::time::Duration::from_micros(period_us as u64); @@ -1256,11 +1259,16 @@ impl WhpVm { } return true; } - if !data.is_empty() && self.pic.handle_io_out(port, data[0]) { - // When the guest sends PIC EOI (port 0x20, OCW2 non-specific EOI), - // also perform LAPIC EOI since the software timer delivers - // through the LAPIC and the guest only acknowledges via PIC. - if port == 0x20 && (data[0] & 0xE0) == 0x20 && self.lapic_timer_active { + // PIC ports (0x20, 0x21, 0xA0, 0xA1): accept as no-ops. + // No PIC state machine — we only need LAPIC EOI bridging when + // the guest sends a non-specific EOI on the master PIC command + // port (0x20, OCW2 byte with bits 7:5 = 001). + if port == 0x20 || port == 0x21 || port == 0xA0 || port == 0xA1 { + if port == 0x20 + && !data.is_empty() + && (data[0] & 0xE0) == 0x20 + && self.lapic_timer_active + { self.do_lapic_eoi(); } return true; From 94806b7bf4afbcde4eb9821690b56498fb3bd47c Mon Sep 17 00:00:00 2001 From: danbugs Date: Fri, 6 Mar 2026 17:25:08 +0000 Subject: [PATCH 11/37] fix: delete unused pic.rs file The PIC state machine was eliminated in the previous experimental commits (MSHV/WHP now hardcode vector 0x20 and no-op PIC ports). Remove the orphaned pic.rs file and its mod declaration. Signed-off-by: danbugs --- src/hyperlight_host/src/hypervisor/mod.rs | 3 - src/hyperlight_host/src/hypervisor/pic.rs | 228 ---------------------- 2 files changed, 231 deletions(-) delete mode 100644 src/hyperlight_host/src/hypervisor/pic.rs diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 5c59f776b..4a508100c 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -38,9 +38,6 @@ pub(crate) mod crashdump; pub(crate) mod hyperlight_vm; -#[cfg(all(feature = "hw-interrupts", any(mshv3, target_os = "windows")))] -pub(crate) mod pic; - use std::fmt::Debug; #[cfg(any(kvm, mshv3))] use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering}; diff --git a/src/hyperlight_host/src/hypervisor/pic.rs b/src/hyperlight_host/src/hypervisor/pic.rs deleted file mode 100644 index 7256ec3e9..000000000 --- a/src/hyperlight_host/src/hypervisor/pic.rs +++ /dev/null @@ -1,228 +0,0 @@ -/* -Copyright 2026 The Hyperlight Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -//! Minimal userspace PIC (8259A) emulation for MSHV/WHP. -//! KVM provides in-kernel PIC via `create_irq_chip()`. - -// PIC I/O port constants -const PIC_MASTER_CMD: u16 = 0x20; -const PIC_MASTER_DATA: u16 = 0x21; -const PIC_SLAVE_CMD: u16 = 0xA0; -const PIC_SLAVE_DATA: u16 = 0xA1; - -/// Tracks where we are in the ICW (Initialization Command Word) sequence. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum PicInitState { - /// Not in an init sequence; normal operation. - Ready, - /// Expecting ICW2 (vector base). - Icw2, - /// Expecting ICW3 (cascade info). - Icw3, - /// Expecting ICW4 (mode). - Icw4, -} - -/// Minimal 8259A PIC state for one chip (master or slave). -#[derive(Debug)] -pub(crate) struct Pic { - master_init_state: PicInitState, - master_vector_base: u8, - master_imr: u8, - - slave_init_state: PicInitState, - slave_vector_base: u8, - slave_imr: u8, -} - -impl Pic { - /// Create a new PIC pair with default state. - pub(crate) fn new() -> Self { - Self { - master_init_state: PicInitState::Ready, - master_vector_base: 0, - master_imr: 0xFF, // all masked - - slave_init_state: PicInitState::Ready, - slave_vector_base: 0, - slave_imr: 0xFF, // all masked - } - } - - /// Handle an I/O OUT to a PIC port. Returns `true` if the port was handled. - pub(crate) fn handle_io_out(&mut self, port: u16, data: u8) -> bool { - match port { - PIC_MASTER_CMD => { - if data & 0x10 != 0 { - // ICW1 -- start init sequence - self.master_init_state = PicInitState::Icw2; - } - // else: OCW (e.g. EOI) -- we accept but ignore - true - } - PIC_MASTER_DATA => { - match self.master_init_state { - PicInitState::Icw2 => { - self.master_vector_base = data; - self.master_init_state = PicInitState::Icw3; - } - PicInitState::Icw3 => { - self.master_init_state = PicInitState::Icw4; - } - PicInitState::Icw4 => { - self.master_init_state = PicInitState::Ready; - } - PicInitState::Ready => { - // IMR write - self.master_imr = data; - } - } - true - } - PIC_SLAVE_CMD => { - if data & 0x10 != 0 { - self.slave_init_state = PicInitState::Icw2; - } - true - } - PIC_SLAVE_DATA => { - match self.slave_init_state { - PicInitState::Icw2 => { - self.slave_vector_base = data; - self.slave_init_state = PicInitState::Icw3; - } - PicInitState::Icw3 => { - self.slave_init_state = PicInitState::Icw4; - } - PicInitState::Icw4 => { - self.slave_init_state = PicInitState::Ready; - } - PicInitState::Ready => { - self.slave_imr = data; - } - } - true - } - _ => false, - } - } - - /// Handle an I/O IN from a PIC port. Returns `Some(value)` if handled. - pub(crate) fn handle_io_in(&self, port: u16) -> Option { - match port { - PIC_MASTER_DATA => Some(self.master_imr), - PIC_SLAVE_DATA => Some(self.slave_imr), - PIC_MASTER_CMD | PIC_SLAVE_CMD => Some(0), // ISR reads return 0 - _ => None, - } - } - - /// Returns the master PIC vector base. - pub(crate) fn master_vector_base(&self) -> u8 { - self.master_vector_base - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn pic_initial_state() { - let pic = Pic::new(); - assert_eq!(pic.master_vector_base, 0); - assert_eq!(pic.slave_vector_base, 0); - assert_eq!(pic.master_imr, 0xFF); - assert_eq!(pic.slave_imr, 0xFF); - } - - #[test] - fn pic_master_icw_sequence() { - let mut pic = Pic::new(); - // ICW1 - assert!(pic.handle_io_out(PIC_MASTER_CMD, 0x11)); - // ICW2: vector base 0x20 - assert!(pic.handle_io_out(PIC_MASTER_DATA, 0x20)); - assert_eq!(pic.master_vector_base(), 0x20); - // ICW3 - assert!(pic.handle_io_out(PIC_MASTER_DATA, 0x04)); - // ICW4 - assert!(pic.handle_io_out(PIC_MASTER_DATA, 0x01)); - // Should be back to Ready -- IMR write - assert!(pic.handle_io_out(PIC_MASTER_DATA, 0xFE)); // unmask IRQ0 - assert_eq!(pic.master_imr, 0xFE); - } - - #[test] - fn pic_slave_icw_sequence() { - let mut pic = Pic::new(); - assert!(pic.handle_io_out(PIC_SLAVE_CMD, 0x11)); - assert!(pic.handle_io_out(PIC_SLAVE_DATA, 0x28)); - assert_eq!(pic.slave_vector_base, 0x28); - assert!(pic.handle_io_out(PIC_SLAVE_DATA, 0x02)); - assert!(pic.handle_io_out(PIC_SLAVE_DATA, 0x01)); - assert!(pic.handle_io_out(PIC_SLAVE_DATA, 0xFF)); - assert_eq!(pic.slave_imr, 0xFF); - } - - #[test] - fn pic_eoi_accepted() { - let mut pic = Pic::new(); - // EOI (OCW2, non-specific EOI = 0x20) - assert!(pic.handle_io_out(PIC_MASTER_CMD, 0x20)); - } - - #[test] - fn pic_unhandled_port() { - let mut pic = Pic::new(); - assert!(!pic.handle_io_out(0x60, 0x00)); - assert_eq!(pic.handle_io_in(0x60), None); - } - - #[test] - fn pic_reinitialize_master() { - let mut pic = Pic::new(); - // First init - pic.handle_io_out(PIC_MASTER_CMD, 0x11); - pic.handle_io_out(PIC_MASTER_DATA, 0x20); - pic.handle_io_out(PIC_MASTER_DATA, 0x04); - pic.handle_io_out(PIC_MASTER_DATA, 0x01); - assert_eq!(pic.master_vector_base(), 0x20); - // Reinit - pic.handle_io_out(PIC_MASTER_CMD, 0x11); - pic.handle_io_out(PIC_MASTER_DATA, 0x30); - pic.handle_io_out(PIC_MASTER_DATA, 0x04); - pic.handle_io_out(PIC_MASTER_DATA, 0x01); - assert_eq!(pic.master_vector_base(), 0x30); - } - - #[test] - fn pic_imr_preserved_across_reinit() { - let mut pic = Pic::new(); - // Set IMR before init - pic.handle_io_out(PIC_MASTER_CMD, 0x11); - pic.handle_io_out(PIC_MASTER_DATA, 0x20); - pic.handle_io_out(PIC_MASTER_DATA, 0x04); - pic.handle_io_out(PIC_MASTER_DATA, 0x01); - pic.handle_io_out(PIC_MASTER_DATA, 0xFE); - assert_eq!(pic.master_imr, 0xFE); - // Reinit -- IMR is set during ICW sequence, not explicitly preserved - pic.handle_io_out(PIC_MASTER_CMD, 0x11); - // After ICW1, IMR should still read as whatever it was -- we only - // change it on Ready state writes. - assert_eq!(pic.handle_io_in(PIC_MASTER_DATA), Some(0xFE)); - } -} From 5135b89e91b9057105978581e8e59a3b21ade939 Mon Sep 17 00:00:00 2001 From: danbugs Date: Fri, 6 Mar 2026 17:26:51 +0000 Subject: [PATCH 12/37] style: rustfmt fixes in kvm.rs Signed-off-by: danbugs --- .../hypervisor/virtual_machine/kvm/x86_64.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index 50d02e12b..b118dd47e 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -14,11 +14,11 @@ See the License for the specific language governing permissions and limitations under the License. */ +#[cfg(feature = "hw-interrupts")] +use std::sync::Arc; use std::sync::LazyLock; #[cfg(feature = "hw-interrupts")] use std::sync::atomic::{AtomicBool, Ordering}; -#[cfg(feature = "hw-interrupts")] -use std::sync::Arc; use hyperlight_common::outb::OutBAction; #[cfg(gdb)] @@ -154,10 +154,11 @@ impl KvmVm { // When the timer thread writes to this EventFd, the in-kernel // PIC will assert IRQ0, which is delivered as the vector the // guest configured during PIC remap (typically vector 0x20). - let eventfd = EventFd::new(0) - .map_err(|e| CreateVmError::InitializeVm( - kvm_ioctls::Error::new(e.raw_os_error().unwrap_or(libc::EIO)).into() - ))?; + let eventfd = EventFd::new(0).map_err(|e| { + CreateVmError::InitializeVm( + kvm_ioctls::Error::new(e.raw_os_error().unwrap_or(libc::EIO)).into(), + ) + })?; vm_fd .register_irqfd(&eventfd, 0) .map_err(|e| CreateVmError::InitializeVm(e.into()))?; @@ -263,9 +264,8 @@ impl VirtualMachine for KvmVm { // The guest is configuring the timer period. // Extract the period in microseconds (LE u32). if data.len() >= 4 { - let period_us = u32::from_le_bytes( - data[..4].try_into().unwrap(), - ) as u64; + let period_us = + u32::from_le_bytes(data[..4].try_into().unwrap()) as u64; if period_us > 0 && self.timer_thread.is_none() { let eventfd = self .timer_irq_eventfd From 1bc420ba9f6a438015ad70458032533f9569fc74 Mon Sep 17 00:00:00 2001 From: danbugs Date: Fri, 6 Mar 2026 22:24:20 +0000 Subject: [PATCH 13/37] refactor: address PR 1272 review feedback - Remove _default_irq_handler (timer thread, not PIT, injects IRQs) - Remove cli;hlt after outb port 108 in guest asm and exit.rs - Revert to #[derive(Debug)] for all backends - Revert feature_val to upstream unsafe { features.as_uint64[0] } - Remove MSR intercept entirely from MSHV - Extract shared hw_interrupts module (LAPIC helpers, IO port handling, EOI bridging) to reduce duplication between MSHV and WHP - Replace lapic_timer_active with timer_thread.is_some() - Error on missing LAPIC emulation in WHP instead of fallback path - Remove lapic_emulation field from WhpVm - Remove Nanvix mentions from comments - Add Intel SDM references to LAPIC register writes Signed-off-by: danbugs --- src/hyperlight_guest/src/exit.rs | 18 +- .../src/arch/amd64/dispatch.rs | 2 - .../src/arch/amd64/exception/entry.rs | 26 -- .../src/arch/amd64/init.rs | 2 - .../virtual_machine/hw_interrupts.rs | 253 ++++++++++++ .../hypervisor/virtual_machine/kvm/x86_64.rs | 24 +- .../src/hypervisor/virtual_machine/mod.rs | 4 + .../hypervisor/virtual_machine/mshv/x86_64.rs | 265 ++---------- .../src/hypervisor/virtual_machine/whp.rs | 384 ++++-------------- 9 files changed, 394 insertions(+), 584 deletions(-) create mode 100644 src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs diff --git a/src/hyperlight_guest/src/exit.rs b/src/hyperlight_guest/src/exit.rs index 785e10a19..1926340a4 100644 --- a/src/hyperlight_guest/src/exit.rs +++ b/src/hyperlight_guest/src/exit.rs @@ -20,10 +20,15 @@ use core::ffi::{CStr, c_char}; use hyperlight_common::outb::OutBAction; use tracing::instrument; -/// Halt the execution of the guest and returns control to the host. -/// Halt is generally called for a successful completion of the guest's work, -/// this means we can instrument it as a trace point because the trace state -/// shall not be locked at this point (we are not in an exception context). +/// Signal successful completion of the guest's work and return control +/// to the host. This replaces the previous `hlt`-based exit: under the +/// `hw-interrupts` feature, `hlt` becomes a wait-for-interrupt (the +/// in-kernel IRQ chip wakes the vCPU), so we use an explicit IO-port +/// write (port 108) to trigger a VM exit that the host treats as a +/// clean shutdown. +/// +/// This function never returns — the host does not re-enter the guest +/// after seeing the Halt port. #[inline(never)] #[instrument(skip_all, level = "Trace")] pub fn halt() { @@ -37,13 +42,8 @@ pub fn halt() { hyperlight_guest_tracing::flush(); } - // Signal "I'm done" to the host via an IO port write. This causes a - // VM exit on all backends (KVM, MSHV, WHP). The subsequent cli+hlt - // is dead code—hyperlight never re-enters the guest after seeing - // the Halt port—but serves as a safety fallback. unsafe { out32(OutBAction::Halt as u16, 0); - asm!("cli", "hlt", options(nostack)); } } diff --git a/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs index e904eeb6e..a39de9020 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs @@ -70,7 +70,5 @@ core::arch::global_asm!(" call {internal_dispatch_function}\n mov dx, 108\n out dx, al\n - cli\n - hlt\n .cfi_endproc ", internal_dispatch_function = sym crate::guest_function::call::internal_dispatch_function); diff --git a/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs b/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs index ca72e62d8..87f89f15c 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/exception/entry.rs @@ -172,24 +172,6 @@ global_asm!( hl_exception_handler = sym super::handle::hl_exception_handler, ); -// Default no-op IRQ handler for hardware interrupts (vectors 0x20-0x2F). -// Sends a non-specific EOI to the master PIC and returns. -// This prevents unhandled-interrupt faults when the in-kernel PIT fires -// before a guest has installed its own IRQ handler. -global_asm!( - ".globl _default_irq_handler", - "_default_irq_handler:", - "push rax", - "mov al, 0x20", - "out 0x20, al", // PIC EOI - "pop rax", - "iretq", -); - -unsafe extern "C" { - fn _default_irq_handler(); -} - pub(in super::super) fn init_idt(pc: *mut ProcCtrl) { let idt = unsafe { &raw mut (*pc).idt }; let set_idt_entry = |idx, handler: unsafe extern "C" fn()| { @@ -221,14 +203,6 @@ pub(in super::super) fn init_idt(pc: *mut ProcCtrl) { set_idt_entry(Exception::VirtualizationException, _do_excp20); // Virtualization Exception set_idt_entry(Exception::SecurityException, _do_excp30); // Security Exception - // Install a default no-op IRQ handler at vector 0x20 (IRQ0 after PIC remapping). - // This ensures HLT can wake when an in-kernel PIT is active, even before the - // guest installs its own IRQ handler. - let irq_handler_addr = _default_irq_handler as *const () as u64; - unsafe { - (&raw mut (*idt).entries[0x20]).write_volatile(IdtEntry::new(irq_handler_addr)); - } - let idtr = IdtPointer { limit: (core::mem::size_of::() - 1) as u16, base: idt as u64, diff --git a/src/hyperlight_guest_bin/src/arch/amd64/init.rs b/src/hyperlight_guest_bin/src/arch/amd64/init.rs index fa8b86c4d..bfa458059 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/init.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/init.rs @@ -175,7 +175,5 @@ core::arch::global_asm!(" call {generic_init}\n mov dx, 108\n out dx, al\n - cli\n - hlt\n .cfi_endproc\n ", generic_init = sym crate::generic_init); diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs new file mode 100644 index 000000000..e20718fce --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs @@ -0,0 +1,253 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Shared helpers for the `hw-interrupts` feature, used by MSHV and WHP +//! backends. KVM uses an in-kernel IRQ chip so it handles most of +//! this transparently. + +/// Hardcoded timer interrupt vector. The guest remaps IRQ0 to +/// vector 0x20 via PIC ICW2, so we use that same vector directly +/// — no PIC state machine needed. +pub(crate) const TIMER_VECTOR: u32 = 0x20; + +/// Handle an IO IN request for hardware-interrupt related ports. +/// Returns `Some(value)` if the port was handled, `None` if the +/// port should be passed through to the guest handler. +/// +/// No PIC state machine — PIC data ports return 0xFF (all masked), +/// PIC command ports return 0 (no pending IRQ), PIT returns 0. +pub(crate) fn handle_io_in(port: u16) -> Option { + match port { + // PIC master/slave data ports — return "all masked" + 0x21 | 0xA1 => Some(0xFF), + // PIC master/slave command ports — return 0 (ISR/IRR read) + 0x20 | 0xA0 => Some(0), + // PIT data port read — return 0 + 0x40 => Some(0), + _ => None, + } +} + +/// Handle IO OUT requests for common legacy hardware ports +/// (PIC, PIT, PC speaker, diagnostic port). +/// +/// Returns `true` if the port was fully handled, `false` otherwise. +/// When the guest sends a non-specific EOI on the master PIC command +/// port (0x20, OCW2 byte with bits 7:5 = 001) and a timer is active, +/// `eoi_callback` is invoked to bridge the PIC EOI to the LAPIC. +pub(crate) fn handle_common_io_out( + port: u16, + data: &[u8], + timer_active: bool, + eoi_callback: impl FnOnce(), +) -> bool { + // PIC ports (0x20, 0x21, 0xA0, 0xA1): accept as no-ops. + // We only need LAPIC EOI bridging when the guest sends a + // non-specific EOI on the master PIC command port. + if port == 0x20 || port == 0x21 || port == 0xA0 || port == 0xA1 { + if port == 0x20 && !data.is_empty() && (data[0] & 0xE0) == 0x20 && timer_active { + eoi_callback(); + } + return true; + } + // PIT ports + if port == 0x43 || port == 0x40 { + return true; + } + // PC speaker + if port == 0x61 { + return true; + } + // Diagnostic port + if port == 0x80 { + return true; + } + false +} + +// --------------------------------------------------------------------------- +// LAPIC register helpers +// +// LAPIC register offsets from Intel SDM Vol. 3A, Table 11-1. +// These operate on a raw byte slice representing the LAPIC register page. +// --------------------------------------------------------------------------- + +/// Write a u32 to a LAPIC register page at the given APIC offset. +pub(crate) fn write_lapic_u32(state: &mut [u8], offset: usize, val: u32) { + state[offset..offset + 4].copy_from_slice(&val.to_le_bytes()); +} + +/// Read a u32 from a LAPIC register page at the given APIC offset. +pub(crate) fn read_lapic_u32(state: &[u8], offset: usize) -> u32 { + u32::from_le_bytes([ + state[offset], + state[offset + 1], + state[offset + 2], + state[offset + 3], + ]) +} + +/// Initialize LAPIC registers in a raw register page to sensible +/// defaults for timer interrupt delivery. +/// +/// Register values from Intel SDM Vol. 3A, Table 11-1: +/// - SVR (0xF0): bit 8 = enable APIC, bits 0-7 = spurious vector 0xFF +/// - TPR (0x80): 0 = accept all interrupt priorities +/// - DFR (0xE0): 0xFFFF_FFFF = flat model +/// - LDR (0xD0): logical APIC ID for flat model +/// - LINT0 (0x350): masked — not wired to PIC +/// - LINT1 (0x360): NMI delivery, not masked +/// - LVT Timer (0x320): masked — host timer thread injects instead +/// - LVT Error (0x370): masked +pub(crate) fn init_lapic_registers(state: &mut [u8]) { + write_lapic_u32(state, 0xF0, 0x1FF); + write_lapic_u32(state, 0x80, 0); + write_lapic_u32(state, 0xE0, 0xFFFF_FFFF); + write_lapic_u32(state, 0xD0, 1 << 24); + write_lapic_u32(state, 0x350, 0x0001_0000); + write_lapic_u32(state, 0x360, 0x400); + write_lapic_u32(state, 0x320, 0x0001_0000); + write_lapic_u32(state, 0x370, 0x0001_0000); +} + +/// Perform LAPIC EOI: clear the highest-priority in-service bit. +/// +/// The ISR is at LAPIC offset 0x100, organized as 8 × 32-bit words +/// (one per 16 bytes). Scans from highest priority (ISR\[7\]) to +/// lowest (ISR\[0\]). +pub(crate) fn lapic_eoi(state: &mut [u8]) { + for i in (0u32..8).rev() { + let offset = 0x100 + (i as usize) * 0x10; + let isr_val = read_lapic_u32(state, offset); + if isr_val != 0 { + let bit = 31 - isr_val.leading_zeros(); + write_lapic_u32(state, offset, isr_val & !(1u32 << bit)); + break; + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn write_read_lapic_roundtrip() { + let mut state = vec![0u8; 1024]; + write_lapic_u32(&mut state, 0xF0, 0xDEAD_BEEF); + assert_eq!(read_lapic_u32(&state, 0xF0), 0xDEAD_BEEF); + } + + #[test] + fn write_read_lapic_multiple_offsets() { + let mut state = vec![0u8; 1024]; + write_lapic_u32(&mut state, 0x80, 0x1234_5678); + write_lapic_u32(&mut state, 0xF0, 0xABCD_EF01); + write_lapic_u32(&mut state, 0xE0, 0xFFFF_FFFF); + assert_eq!(read_lapic_u32(&state, 0x80), 0x1234_5678); + assert_eq!(read_lapic_u32(&state, 0xF0), 0xABCD_EF01); + assert_eq!(read_lapic_u32(&state, 0xE0), 0xFFFF_FFFF); + } + + #[test] + fn write_read_lapic_zero() { + let mut state = vec![0xFFu8; 1024]; + write_lapic_u32(&mut state, 0x80, 0); + assert_eq!(read_lapic_u32(&state, 0x80), 0); + } + + #[test] + fn write_does_not_clobber_neighbors() { + let mut state = vec![0u8; 1024]; + write_lapic_u32(&mut state, 0x80, 0xAAAA_BBBB); + assert_eq!(state[0x7F], 0); + assert_eq!(state[0x84], 0); + } + + #[test] + fn lapic_eoi_clears_highest_isr_bit() { + let mut state = vec![0u8; 1024]; + // Set bit 5 in ISR[0] (offset 0x100) + write_lapic_u32(&mut state, 0x100, 1 << 5); + lapic_eoi(&mut state); + assert_eq!(read_lapic_u32(&state, 0x100), 0); + } + + #[test] + fn lapic_eoi_clears_only_highest() { + let mut state = vec![0u8; 1024]; + // Set bits in ISR[0] and ISR[1] + write_lapic_u32(&mut state, 0x100, 0b11); // bits 0 and 1 + write_lapic_u32(&mut state, 0x110, 1 << 2); // bit 2 in ISR[1] + lapic_eoi(&mut state); + // ISR[1] should be cleared (higher priority), ISR[0] untouched + assert_eq!(read_lapic_u32(&state, 0x110), 0); + assert_eq!(read_lapic_u32(&state, 0x100), 0b11); + } + + #[test] + fn init_lapic_registers_sets_svr() { + let mut state = vec![0u8; 1024]; + init_lapic_registers(&mut state); + let svr = read_lapic_u32(&state, 0xF0); + assert_ne!(svr & 0x100, 0, "APIC enable bit should be set"); + assert_eq!(svr & 0xFF, 0xFF, "spurious vector should be 0xFF"); + } + + #[test] + fn handle_io_in_pic_ports() { + assert_eq!(handle_io_in(0x21), Some(0xFF)); + assert_eq!(handle_io_in(0xA1), Some(0xFF)); + assert_eq!(handle_io_in(0x20), Some(0)); + assert_eq!(handle_io_in(0xA0), Some(0)); + assert_eq!(handle_io_in(0x40), Some(0)); + assert_eq!(handle_io_in(0x42), None); + } + + #[test] + fn handle_common_io_out_pic_eoi() { + let mut eoi_called = false; + // Non-specific EOI (0x20) on master PIC command port + let handled = handle_common_io_out(0x20, &[0x20], true, || eoi_called = true); + assert!(handled); + assert!(eoi_called); + } + + #[test] + fn handle_common_io_out_no_eoi_when_timer_inactive() { + let mut eoi_called = false; + let handled = handle_common_io_out(0x20, &[0x20], false, || eoi_called = true); + assert!(handled); + assert!(!eoi_called); + } + + #[test] + fn handle_common_io_out_pit_ports() { + assert!(handle_common_io_out(0x40, &[], false, || {})); + assert!(handle_common_io_out(0x43, &[], false, || {})); + } + + #[test] + fn handle_common_io_out_speaker_and_diag() { + assert!(handle_common_io_out(0x61, &[], false, || {})); + assert!(handle_common_io_out(0x80, &[], false, || {})); + } + + #[test] + fn handle_common_io_out_unknown_port() { + assert!(!handle_common_io_out(0x100, &[], false, || {})); + } +} diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index b118dd47e..79484aa84 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -40,6 +40,8 @@ use crate::hypervisor::regs::{ CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, FP_CONTROL_WORD_DEFAULT, MXCSR_DEFAULT, }; +#[cfg(feature = "hw-interrupts")] +use crate::hypervisor::virtual_machine::HypervisorError; #[cfg(all(test, not(feature = "nanvix-unstable")))] use crate::hypervisor::virtual_machine::XSAVE_BUFFER_SIZE; use crate::hypervisor::virtual_machine::{ @@ -91,6 +93,7 @@ pub(crate) fn is_hypervisor_present() -> bool { } /// A KVM implementation of a single-vcpu VM +#[derive(Debug)] pub(crate) struct KvmVm { vm_fd: VmFd, vcpu_fd: VcpuFd, @@ -111,15 +114,6 @@ pub(crate) struct KvmVm { debug_regs: kvm_guest_debug, } -impl std::fmt::Debug for KvmVm { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("KvmVm") - .field("vm_fd", &self.vm_fd) - .field("vcpu_fd", &self.vcpu_fd) - .finish_non_exhaustive() - } -} - static KVM: LazyLock> = LazyLock::new(|| Kvm::new().map_err(|e| CreateVmError::HypervisorNotAvailable(e.into()))); @@ -263,14 +257,12 @@ impl VirtualMachine for KvmVm { if port == OutBAction::PvTimerConfig as u16 { // The guest is configuring the timer period. // Extract the period in microseconds (LE u32). - if data.len() >= 4 { - let period_us = - u32::from_le_bytes(data[..4].try_into().unwrap()) as u64; + if let Ok(bytes) = data[..4].try_into() { + let period_us = u32::from_le_bytes(bytes) as u64; if period_us > 0 && self.timer_thread.is_none() { - let eventfd = self - .timer_irq_eventfd - .try_clone() - .expect("failed to clone timer EventFd"); + let eventfd = self.timer_irq_eventfd.try_clone().map_err(|e| { + RunVcpuError::Unknown(HypervisorError::KvmError(e.into())) + })?; let stop = self.timer_stop.clone(); let period = std::time::Duration::from_micros(period_us); self.timer_thread = Some(std::thread::spawn(move || { diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs index cb1e253a9..d142b21e1 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs @@ -38,6 +38,10 @@ pub(crate) mod mshv; #[cfg(target_os = "windows")] pub(crate) mod whp; +/// Shared helpers for hardware interrupt support (MSHV and WHP) +#[cfg(feature = "hw-interrupts")] +pub(crate) mod hw_interrupts; + static AVAILABLE_HYPERVISOR: OnceLock> = OnceLock::new(); /// Returns which type of hypervisor is available, if any diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs index 205caa346..9d238b081 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs @@ -31,17 +31,12 @@ use mshv_bindings::{ FloatingPointUnit, SpecialRegisters, StandardRegisters, XSave, hv_message_type, hv_message_type_HVMSG_GPA_INTERCEPT, hv_message_type_HVMSG_UNMAPPED_GPA, hv_message_type_HVMSG_X64_HALT, hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT, - hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, hv_register_assoc, + hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, + hv_partition_synthetic_processor_features, hv_register_assoc, hv_register_name_HV_X64_REGISTER_RIP, hv_register_value, mshv_create_partition_v2, mshv_user_mem_region, }; #[cfg(feature = "hw-interrupts")] -use mshv_bindings::{ - HV_INTERCEPT_ACCESS_MASK_WRITE, hv_intercept_parameters, - hv_intercept_type_HV_INTERCEPT_TYPE_X64_MSR_INDEX, hv_message_type_HVMSG_X64_MSR_INTERCEPT, - mshv_install_intercept, -}; -#[cfg(feature = "hw-interrupts")] use mshv_bindings::{ hv_interrupt_type_HV_X64_INTERRUPT_TYPE_FIXED, hv_register_name_HV_X64_REGISTER_RAX, }; @@ -82,6 +77,7 @@ pub(crate) fn is_hypervisor_present() -> bool { } /// A MSHV implementation of a single-vcpu VM +#[derive(Debug)] pub(crate) struct MshvVm { /// VmFd wrapped in Arc so the timer thread can call /// `request_virtual_interrupt` from a background thread. @@ -98,37 +94,24 @@ pub(crate) struct MshvVm { timer_thread: Option>, } -impl std::fmt::Debug for MshvVm { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("MshvVm") - .field("vm_fd", &self.vm_fd) - .field("vcpu_fd", &self.vcpu_fd) - .finish_non_exhaustive() - } -} - static MSHV: LazyLock> = LazyLock::new(|| Mshv::new().map_err(|e| CreateVmError::HypervisorNotAvailable(e.into()))); -/// Write a u32 to a LAPIC register at the given APIC offset. +/// Cast MSHV `LapicState.regs` (`[c_char; 1024]`) to a `&[u8]` slice +/// for use with the shared LAPIC helpers. #[cfg(feature = "hw-interrupts")] -fn write_lapic_u32(regs: &mut [::std::os::raw::c_char; 1024], offset: usize, val: u32) { - let bytes = val.to_le_bytes(); - regs[offset] = bytes[0] as _; - regs[offset + 1] = bytes[1] as _; - regs[offset + 2] = bytes[2] as _; - regs[offset + 3] = bytes[3] as _; +fn lapic_regs_as_u8(regs: &[::std::os::raw::c_char; 1024]) -> &[u8] { + // Safety: c_char (i8) and u8 have the same size and alignment; + // LAPIC register values are treated as raw bytes. + unsafe { &*(regs as *const [::std::os::raw::c_char; 1024] as *const [u8; 1024]) } } -/// Read a u32 from a LAPIC register at the given APIC offset. +/// Cast MSHV `LapicState.regs` (`[c_char; 1024]`) to a `&mut [u8]` slice +/// for use with the shared LAPIC helpers. #[cfg(feature = "hw-interrupts")] -fn read_lapic_u32(regs: &[::std::os::raw::c_char; 1024], offset: usize) -> u32 { - u32::from_le_bytes([ - regs[offset] as u8, - regs[offset + 1] as u8, - regs[offset + 2] as u8, - regs[offset + 3] as u8, - ]) +fn lapic_regs_as_u8_mut(regs: &mut [::std::os::raw::c_char; 1024]) -> &mut [u8] { + // Safety: same as above. + unsafe { &mut *(regs as *mut [::std::os::raw::c_char; 1024] as *mut [u8; 1024]) } } impl MshvVm { @@ -150,18 +133,12 @@ impl MshvVm { .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; let vcpu_fd = { - // No synthetic features needed — timer interrupts are injected - // directly via request_virtual_interrupt(), not through SynIC. - // The LAPIC (enabled via pt_flags) handles interrupt delivery. - #[cfg(feature = "hw-interrupts")] - let feature_val = 0u64; - #[cfg(not(feature = "hw-interrupts"))] - let feature_val = 0u64; + let features: hv_partition_synthetic_processor_features = Default::default(); vm_fd .set_partition_property( hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, - feature_val, + unsafe { features.as_uint64[0] }, ) .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; @@ -179,47 +156,17 @@ impl MshvVm { // interrupts can be delivered (request_virtual_interrupt would fail). #[cfg(feature = "hw-interrupts")] { + use super::hw_interrupts::init_lapic_registers; + let mut lapic: LapicState = vcpu_fd .get_lapic() .map_err(|e| CreateVmError::InitializeVm(e.into()))?; - // SVR (offset 0xF0): bit 8 = enable APIC, bits 0-7 = spurious vector - write_lapic_u32(&mut lapic.regs, 0xF0, 0x1FF); - // TPR (offset 0x80): 0 = accept all interrupt priorities - write_lapic_u32(&mut lapic.regs, 0x80, 0); - // DFR (offset 0xE0): 0xFFFFFFFF = flat model - write_lapic_u32(&mut lapic.regs, 0xE0, 0xFFFF_FFFF); - // LDR (offset 0xD0): set logical APIC ID for flat model - write_lapic_u32(&mut lapic.regs, 0xD0, 1 << 24); - // LINT0 (offset 0x350): masked — we don't forward PIC through LAPIC; - // our PIC is emulated in userspace and not wired to LINT0. - write_lapic_u32(&mut lapic.regs, 0x350, 0x0001_0000); - // LINT1 (offset 0x360): NMI delivery, not masked - write_lapic_u32(&mut lapic.regs, 0x360, 0x400); - // LVT Timer (offset 0x320): masked — we use host timer thread instead - write_lapic_u32(&mut lapic.regs, 0x320, 0x0001_0000); - // LVT Error (offset 0x370): masked - write_lapic_u32(&mut lapic.regs, 0x370, 0x0001_0000); + init_lapic_registers(lapic_regs_as_u8_mut(&mut lapic.regs)); vcpu_fd .set_lapic(&lapic) .map_err(|e| CreateVmError::InitializeVm(e.into()))?; - - // Install MSR intercept for IA32_APIC_BASE (MSR 0x1B) to prevent - // the guest from globally disabling the LAPIC. The Nanvix kernel - // disables the APIC when no I/O APIC is detected, but we need - // the LAPIC enabled for request_virtual_interrupt delivery. - // - // This may fail with AccessDenied on some kernel versions; in - // that case we fall back to re-enabling the LAPIC in the timer - // setup path (handle_hw_io_out). - let _ = vm_fd.install_intercept(mshv_install_intercept { - access_type_mask: HV_INTERCEPT_ACCESS_MASK_WRITE, - intercept_type: hv_intercept_type_HV_INTERCEPT_TYPE_X64_MSR_INDEX, - intercept_parameter: hv_intercept_parameters { - msr_index: 0x1B, // IA32_APIC_BASE - }, - }); } Ok(Self { @@ -267,8 +214,6 @@ impl VirtualMachine for MshvVm { hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT; const UNMAPPED_GPA_MESSAGE: hv_message_type = hv_message_type_HVMSG_UNMAPPED_GPA; const INVALID_GPA_ACCESS_MESSAGE: hv_message_type = hv_message_type_HVMSG_GPA_INTERCEPT; - #[cfg(feature = "hw-interrupts")] - const MSR_INTERCEPT_MESSAGE: hv_message_type = hv_message_type_HVMSG_X64_MSR_INTERCEPT; #[cfg(gdb)] const EXCEPTION_INTERCEPT: hv_message_type = hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT; @@ -337,7 +282,9 @@ impl VirtualMachine for MshvVm { if self.handle_hw_io_out(port_number, &data) { continue; } - } else if let Some(val) = self.handle_hw_io_in(port_number) { + } else if let Some(val) = + super::hw_interrupts::handle_io_in(port_number) + { self.vcpu_fd .set_reg(&[hv_register_assoc { name: hv_register_name_HV_X64_REGISTER_RAX, @@ -380,39 +327,6 @@ impl VirtualMachine for MshvVm { _ => Ok(VmExit::Unknown("Unknown MMIO access".to_string())), }; } - #[cfg(feature = "hw-interrupts")] - MSR_INTERCEPT_MESSAGE => { - // Guest is writing to MSR 0x1B (IA32_APIC_BASE). - // Force bit 11 (global APIC enable) to stay set, - // preventing the guest from disabling the LAPIC. - let msr_msg = m - .to_msr_info() - .map_err(|_| RunVcpuError::DecodeIOMessage(msg_type))?; - let rip = msr_msg.header.rip; - let instruction_length = msr_msg.header.instruction_length() as u64; - let msr_val = (msr_msg.rdx << 32) | (msr_msg.rax & 0xFFFF_FFFF); - - // Force APIC global enable (bit 11) to remain set, - // preserving the standard base address. - let forced_val = msr_val | (1 << 11) | 0xFEE00000; - self.vcpu_fd - .set_reg(&[hv_register_assoc { - name: hv_register_name_HV_X64_REGISTER_RIP, - value: hv_register_value { - reg64: rip + instruction_length, - }, - ..Default::default() - }]) - .map_err(|e| RunVcpuError::IncrementRip(e.into()))?; - - use mshv_bindings::hv_register_name_HV_X64_REGISTER_APIC_BASE; - let _ = self.vcpu_fd.set_reg(&[hv_register_assoc { - name: hv_register_name_HV_X64_REGISTER_APIC_BASE, - value: hv_register_value { reg64: forced_val }, - ..Default::default() - }]); - continue; - } #[cfg(gdb)] EXCEPTION_INTERCEPT => { let ex_info = m @@ -709,47 +623,14 @@ impl MshvVm { /// BSP flag (bit 8) + global enable (bit 11). const APIC_BASE_DEFAULT: u64 = 0xFEE00900; - /// Hardcoded timer interrupt vector. The nanvix guest always - /// remaps IRQ0 to vector 0x20 via PIC ICW2, so we use that same - /// vector directly — no PIC state machine needed. - const TIMER_VECTOR: u32 = 0x20; - /// Perform LAPIC EOI: clear the highest-priority in-service bit. /// Called when the guest sends PIC EOI, since the timer thread /// delivers interrupts through the LAPIC and the guest only /// acknowledges via PIC. fn do_lapic_eoi(&self) { if let Ok(mut lapic) = self.vcpu_fd.get_lapic() { - // ISR is at offset 0x100, 8 x 32-bit words (one per 16 bytes). - // Scan from highest priority (ISR[7]) to lowest (ISR[0]). - for i in (0u32..8).rev() { - let offset = 0x100 + (i as usize) * 0x10; - let isr_val = read_lapic_u32(&lapic.regs, offset); - if isr_val != 0 { - let bit = 31 - isr_val.leading_zeros(); - write_lapic_u32(&mut lapic.regs, offset, isr_val & !(1u32 << bit)); - let _ = self.vcpu_fd.set_lapic(&lapic); - break; - } - } - } - } - - /// Handle a hardware-interrupt IO IN request. - /// Returns `Some(value)` if the port was handled, `None` if the - /// port should be passed through to the guest handler. - /// - /// No PIC state machine — PIC data ports return 0xFF (all masked), - /// PIC command ports return 0 (no pending IRQ), PIT returns 0. - fn handle_hw_io_in(&self, port: u16) -> Option { - match port { - // PIC master/slave data ports — return "all masked" - 0x21 | 0xA1 => Some(0xFF), - // PIC master/slave command ports — return 0 (ISR/IRR read) - 0x20 | 0xA0 => Some(0), - // PIT data port read — return 0 - 0x40 => Some(0), - _ => None, + super::hw_interrupts::lapic_eoi(lapic_regs_as_u8_mut(&mut lapic.regs)); + let _ = self.vcpu_fd.set_lapic(&lapic); } } @@ -759,8 +640,8 @@ impl MshvVm { let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); if period_us > 0 && self.timer_thread.is_none() { // Re-enable LAPIC if the guest disabled it (via WRMSR - // to MSR 0x1B clearing bit 11). This happens when the - // Nanvix kernel doesn't detect an I/O APIC. + // to MSR 0x1B clearing bit 11). Some guests clear + // the global APIC enable when no I/O APIC is detected. // // The hypervisor may return 0 for APIC_BASE when the // APIC is globally disabled, so we always restore the @@ -786,21 +667,18 @@ impl MshvVm { // Re-initialize LAPIC SVR (may have been zeroed when // guest disabled the APIC globally) if let Ok(mut lapic) = self.vcpu_fd.get_lapic() { - let svr = read_lapic_u32(&lapic.regs, 0xF0); + let regs = lapic_regs_as_u8(&lapic.regs); + let svr = super::hw_interrupts::read_lapic_u32(regs, 0xF0); if svr & 0x100 == 0 { - write_lapic_u32(&mut lapic.regs, 0xF0, 0x1FF); - write_lapic_u32(&mut lapic.regs, 0x80, 0); // TPR + let regs_mut = lapic_regs_as_u8_mut(&mut lapic.regs); + super::hw_interrupts::write_lapic_u32(regs_mut, 0xF0, 0x1FF); + super::hw_interrupts::write_lapic_u32(regs_mut, 0x80, 0); // TPR let _ = self.vcpu_fd.set_lapic(&lapic); } } - // Start a host timer thread that periodically injects - // interrupts via request_virtual_interrupt (HVCALL 148). - // This replaces the SynIC timer approach and makes MSHV - // consistent with the KVM irqfd and WHP software timer - // patterns. let vm_fd = self.vm_fd.clone(); - let vector = Self::TIMER_VECTOR; + let vector = super::hw_interrupts::TIMER_VECTOR; let stop = self.timer_stop.clone(); let period = std::time::Duration::from_micros(period_us as u64); self.timer_thread = Some(std::thread::spawn(move || { @@ -823,30 +701,8 @@ impl MshvVm { } return true; } - // PIC ports (0x20, 0x21, 0xA0, 0xA1): accept as no-ops. - // No PIC state machine — we only need LAPIC EOI bridging when - // the guest sends a non-specific EOI on the master PIC command - // port (0x20, OCW2 byte with bits 7:5 = 001). - if port == 0x20 || port == 0x21 || port == 0xA0 || port == 0xA1 { - if port == 0x20 - && !data.is_empty() - && (data[0] & 0xE0) == 0x20 - && self.timer_thread.is_some() - { - self.do_lapic_eoi(); - } - return true; - } - if port == 0x43 || port == 0x40 { - return true; - } - if port == 0x61 { - return true; - } - if port == 0x80 { - return true; - } - false + let timer_active = self.timer_thread.is_some(); + super::hw_interrupts::handle_common_io_out(port, data, timer_active, || self.do_lapic_eoi()) } } @@ -866,37 +722,15 @@ mod hw_interrupt_tests { use super::*; #[test] - fn write_read_lapic_u32_roundtrip() { + fn lapic_regs_conversion_roundtrip() { let mut regs = [0i8; 1024]; - write_lapic_u32(&mut regs, 0xF0, 0xDEAD_BEEF); - assert_eq!(read_lapic_u32(®s, 0xF0), 0xDEAD_BEEF); - } - - #[test] - fn write_read_lapic_u32_multiple_offsets() { - let mut regs = [0i8; 1024]; - write_lapic_u32(&mut regs, 0x80, 0x1234_5678); - write_lapic_u32(&mut regs, 0xF0, 0xABCD_EF01); - write_lapic_u32(&mut regs, 0xE0, 0xFFFF_FFFF); - assert_eq!(read_lapic_u32(®s, 0x80), 0x1234_5678); - assert_eq!(read_lapic_u32(®s, 0xF0), 0xABCD_EF01); - assert_eq!(read_lapic_u32(®s, 0xE0), 0xFFFF_FFFF); - } - - #[test] - fn write_read_lapic_u32_zero() { - let mut regs = [0xFFu8 as i8; 1024]; - write_lapic_u32(&mut regs, 0x80, 0); - assert_eq!(read_lapic_u32(®s, 0x80), 0); - } - - #[test] - fn write_read_lapic_u32_does_not_clobber_neighbors() { - let mut regs = [0i8; 1024]; - write_lapic_u32(&mut regs, 0x80, 0xAAAA_BBBB); - // Check that bytes before and after are untouched - assert_eq!(regs[0x7F], 0); - assert_eq!(regs[0x84], 0); + let bytes = lapic_regs_as_u8_mut(&mut regs); + super::super::hw_interrupts::write_lapic_u32(bytes, 0xF0, 0xDEAD_BEEF); + let bytes = lapic_regs_as_u8(®s); + assert_eq!( + super::super::hw_interrupts::read_lapic_u32(bytes, 0xF0), + 0xDEAD_BEEF + ); } #[test] @@ -910,19 +744,4 @@ mod hw_interrupt_tests { "base address should be 0xFEE00000" ); } - - #[test] - fn lapic_svr_init_value() { - // SVR = 0x1FF: bit 8 = enable APIC, bits 0-7 = spurious vector 0xFF - let svr: u32 = 0x1FF; - assert_ne!(svr & 0x100, 0, "APIC enable bit should be set"); - assert_eq!(svr & 0xFF, 0xFF, "spurious vector should be 0xFF"); - } - - #[test] - fn lapic_lvt_masked_value() { - // Masked LVT entry: bit 16 = 1 - let masked: u32 = 0x0001_0000; - assert_ne!(masked & (1 << 16), 0, "mask bit should be set"); - } } diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 36a9f151a..e4612bccf 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -98,12 +98,6 @@ pub(crate) struct WhpVm { /// Tracks host-side file mappings (view_base, mapping_handle) for /// cleanup on unmap or drop. Only populated for MappedFile regions. file_mappings: Vec<(HandleWrapper, *mut c_void)>, - /// Whether the WHP host supports LAPIC emulation mode. - #[cfg(feature = "hw-interrupts")] - lapic_emulation: bool, - /// Whether the LAPIC timer is actively delivering periodic interrupts. - #[cfg(feature = "hw-interrupts")] - lapic_timer_active: bool, /// Signal to stop the software timer thread. #[cfg(feature = "hw-interrupts")] timer_stop: Option>, @@ -125,126 +119,62 @@ impl WhpVm { pub(crate) fn new() -> Result { const NUM_CPU: u32 = 1; - // Check whether the WHP host supports LAPIC emulation. - // Bit 1 of WHV_CAPABILITY_FEATURES corresponds to LocalApicEmulation. - // LAPIC emulation is required for the LAPIC timer to deliver interrupts. - #[cfg(feature = "hw-interrupts")] - const LAPIC_EMULATION_BIT: u64 = 1 << 1; + let partition = unsafe { + #[cfg(feature = "hw-interrupts")] + { + // Check whether the WHP host supports LAPIC emulation. + // Bit 1 of WHV_CAPABILITY_FEATURES = LocalApicEmulation. + // LAPIC emulation is required for timer interrupt delivery. + const LAPIC_EMULATION_BIT: u64 = 1 << 1; - #[cfg(feature = "hw-interrupts")] - let mut lapic_emulation = { - let mut capability: WHV_CAPABILITY = Default::default(); - let ok = unsafe { - WHvGetCapability( + let mut capability: WHV_CAPABILITY = Default::default(); + let has_lapic = WHvGetCapability( WHvCapabilityCodeFeatures, &mut capability as *mut _ as *mut c_void, std::mem::size_of::() as u32, None, ) - }; - if ok.is_ok() { - unsafe { capability.Features.AsUINT64 & LAPIC_EMULATION_BIT != 0 } - } else { - false - } - }; - - // Create partition, set CPU count, setup, and create vCPU. - // When hw-interrupts is enabled and LAPIC emulation is supported, - // we create a partition with LAPIC emulation mode. This is - // required for the LAPIC timer. - let partition = unsafe { - #[cfg(feature = "hw-interrupts")] - { - if lapic_emulation { - let p = - WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; - WHvSetPartitionProperty( - p, - WHvPartitionPropertyCodeProcessorCount, - &NUM_CPU as *const _ as *const _, - std::mem::size_of_val(&NUM_CPU) as _, - ) - .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; - - let apic_mode: u32 = 1; // WHvX64LocalApicEmulationModeXApic - let partition_ok = WHvSetPartitionProperty( - p, - WHvPartitionPropertyCodeLocalApicEmulationMode, - &apic_mode as *const _ as *const _, - std::mem::size_of_val(&apic_mode) as _, - ) - .is_ok() - && WHvSetupPartition(p).is_ok() - && WHvCreateVirtualProcessor(p, 0, 0).is_ok(); - - if partition_ok { - log::info!("[WHP] LAPIC partition created, running init_lapic_bulk"); - // Initialize the LAPIC via the bulk interrupt-controller - // state API (individual APIC register writes via - // WHvSetVirtualProcessorRegisters fail with ACCESS_DENIED). - if let Err(e) = Self::init_lapic_bulk(p) { - log::warn!( - "[WHP] Bulk LAPIC init failed ({e}); \ - falling back to non-LAPIC mode." - ); - let _ = WHvDeleteVirtualProcessor(p, 0); - let _ = WHvDeletePartition(p); - lapic_emulation = false; - - let p = WHvCreatePartition() - .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; - WHvSetPartitionProperty( - p, - WHvPartitionPropertyCodeProcessorCount, - &NUM_CPU as *const _ as *const _, - std::mem::size_of_val(&NUM_CPU) as _, - ) - .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; - WHvSetupPartition(p) - .map_err(|e| CreateVmError::InitializeVm(e.into()))?; - WHvCreateVirtualProcessor(p, 0, 0) - .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; - p - } else { - log::info!("[WHP] Bulk LAPIC init succeeded, lapic_emulation=true"); - p - } - } else { - log::warn!("LAPIC emulation setup failed, falling back to non-LAPIC mode"); - let _ = WHvDeleteVirtualProcessor(p, 0); - let _ = WHvDeletePartition(p); - lapic_emulation = false; - - let p = WHvCreatePartition() - .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; - WHvSetPartitionProperty( - p, - WHvPartitionPropertyCodeProcessorCount, - &NUM_CPU as *const _ as *const _, - std::mem::size_of_val(&NUM_CPU) as _, + .is_ok() + && (capability.Features.AsUINT64 & LAPIC_EMULATION_BIT != 0); + + if !has_lapic { + return Err(CreateVmError::InitializeVm( + windows_result::Error::new( + HRESULT::from_win32(0x32), // ERROR_NOT_SUPPORTED + "WHP LocalApicEmulation capability is required for hw-interrupts", ) - .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; - WHvSetupPartition(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; - WHvCreateVirtualProcessor(p, 0, 0) - .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; - p - } - } else { - let p = - WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; - WHvSetPartitionProperty( - p, - WHvPartitionPropertyCodeProcessorCount, - &NUM_CPU as *const _ as *const _, - std::mem::size_of_val(&NUM_CPU) as _, - ) - .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; - WHvSetupPartition(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; - WHvCreateVirtualProcessor(p, 0, 0) - .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; - p + .into(), + )); } + + let p = WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; + WHvSetPartitionProperty( + p, + WHvPartitionPropertyCodeProcessorCount, + &NUM_CPU as *const _ as *const _, + std::mem::size_of_val(&NUM_CPU) as _, + ) + .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + + let apic_mode: u32 = 1; // WHvX64LocalApicEmulationModeXApic + WHvSetPartitionProperty( + p, + WHvPartitionPropertyCodeLocalApicEmulationMode, + &apic_mode as *const _ as *const _, + std::mem::size_of_val(&apic_mode) as _, + ) + .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + + WHvSetupPartition(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; + WHvCreateVirtualProcessor(p, 0, 0) + .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; + + // Initialize the LAPIC via the bulk interrupt-controller + // state API (individual APIC register writes via + // WHvSetVirtualProcessorRegisters fail with ACCESS_DENIED). + Self::init_lapic_bulk(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; + + p } #[cfg(not(feature = "hw-interrupts"))] @@ -269,10 +199,6 @@ impl WhpVm { surrogate_process, file_mappings: Vec::new(), #[cfg(feature = "hw-interrupts")] - lapic_emulation, - #[cfg(feature = "hw-interrupts")] - lapic_timer_active: false, - #[cfg(feature = "hw-interrupts")] timer_stop: None, #[cfg(feature = "hw-interrupts")] timer_thread: None, @@ -304,27 +230,8 @@ impl WhpVm { )?; } state.truncate(written as usize); - log::info!( - "[WHP] init_lapic_bulk: got {} bytes of LAPIC state", - written - ); - // SVR (offset 0xF0): enable APIC (bit 8) + spurious vector 0xFF - Self::write_lapic_u32(&mut state, 0xF0, 0x1FF); - // TPR (offset 0x80): 0 = accept all interrupt priorities - Self::write_lapic_u32(&mut state, 0x80, 0); - // DFR (offset 0xE0): 0xFFFFFFFF = flat model - Self::write_lapic_u32(&mut state, 0xE0, 0xFFFF_FFFF); - // LDR (offset 0xD0): set logical APIC ID for flat model - Self::write_lapic_u32(&mut state, 0xD0, 1 << 24); - // LINT0 (offset 0x350): masked - Self::write_lapic_u32(&mut state, 0x350, 0x0001_0000); - // LINT1 (offset 0x360): NMI delivery, not masked - Self::write_lapic_u32(&mut state, 0x360, 0x400); - // LVT Timer (offset 0x320): masked initially (configured on PvTimerConfig) - Self::write_lapic_u32(&mut state, 0x320, 0x0001_0000); - // LVT Error (offset 0x370): masked - Self::write_lapic_u32(&mut state, 0x370, 0x0001_0000); + super::hw_interrupts::init_lapic_registers(&mut state); unsafe { WHvSetVirtualProcessorInterruptControllerState2( @@ -338,23 +245,6 @@ impl WhpVm { Ok(()) } - /// Read a u32 from a LAPIC register page at the given APIC offset. - #[cfg(feature = "hw-interrupts")] - fn read_lapic_u32(state: &[u8], offset: usize) -> u32 { - u32::from_le_bytes([ - state[offset], - state[offset + 1], - state[offset + 2], - state[offset + 3], - ]) - } - - /// Write a u32 to a LAPIC register page at the given APIC offset. - #[cfg(feature = "hw-interrupts")] - fn write_lapic_u32(state: &mut [u8], offset: usize, val: u32) { - state[offset..offset + 4].copy_from_slice(&val.to_le_bytes()); - } - /// Helper for setting arbitrary registers. Makes sure the same number /// of names and values are passed (at the expense of some performance). fn set_registers( @@ -536,7 +426,7 @@ impl VirtualMachine for WhpVm { if self.handle_hw_io_out(port, &data) { continue; } - } else if let Some(val) = self.handle_hw_io_in(port) { + } else if let Some(val) = super::hw_interrupts::handle_io_in(port) { self.set_registers(&[( WHvX64RegisterRax, Align16(WHV_REGISTER_VALUE { Reg64: val }), @@ -557,7 +447,7 @@ impl VirtualMachine for WhpVm { // thread injects an interrupt via WHvRequestInterrupt, // waking the vCPU from HLT. #[cfg(feature = "hw-interrupts")] - if self.lapic_timer_active { + if self.timer_thread.is_some() { continue; } return Ok(VmExit::Halt()); @@ -610,13 +500,13 @@ impl VirtualMachine for WhpVm { } WHV_RUN_VP_EXIT_REASON(_) => { let rip = exit_context.VpContext.Rip; - log::error!( + tracing::error!( "WHP unknown exit reason {}: RIP={:#x}", exit_context.ExitReason.0, rip, ); if let Ok(regs) = self.regs() { - log::error!( + tracing::error!( " RAX={:#x} RCX={:#x} RDX={:#x}", regs.rax, regs.rcx, @@ -624,7 +514,7 @@ impl VirtualMachine for WhpVm { ); } if let Ok(sregs) = self.sregs() { - log::error!( + tracing::error!( " CR0={:#x} CR4={:#x} EFER={:#x} APIC_BASE={:#x}", sregs.cr0, sregs.cr4, @@ -748,15 +638,13 @@ impl VirtualMachine for WhpVm { let whp_regs: [(WHV_REGISTER_NAME, Align16); WHP_SREGS_NAMES_LEN] = sregs.into(); - // When LAPIC emulation is active, skip writing APIC_BASE. - // The generic CommonSpecialRegisters defaults APIC_BASE to 0 - // which would globally disable the LAPIC. On some WHP hosts, - // host-side APIC register writes are blocked entirely - // (ACCESS_DENIED), so attempting to write even a valid value - // would fail and abort set_sregs. Omitting it lets the VP - // keep its default APIC_BASE and the guest configures it. + // When LAPIC emulation is active (always with hw-interrupts), + // skip writing APIC_BASE. The generic CommonSpecialRegisters + // defaults APIC_BASE to 0 which would globally disable the LAPIC. + // On some WHP hosts, host-side APIC register writes are blocked + // entirely (ACCESS_DENIED). #[cfg(feature = "hw-interrupts")] - if self.lapic_emulation { + { let filtered: Vec<_> = whp_regs .iter() .copied() @@ -764,12 +652,15 @@ impl VirtualMachine for WhpVm { .collect(); self.set_registers(&filtered) .map_err(|e| RegisterError::SetSregs(e.into()))?; - return Ok(()); + Ok(()) } - self.set_registers(&whp_regs) - .map_err(|e| RegisterError::SetSregs(e.into()))?; - Ok(()) + #[cfg(not(feature = "hw-interrupts"))] + { + self.set_registers(&whp_regs) + .map_err(|e| RegisterError::SetSregs(e.into()))?; + Ok(()) + } } fn debug_regs(&self) -> std::result::Result { @@ -1120,11 +1011,6 @@ impl DebuggableVm for WhpVm { #[cfg(feature = "hw-interrupts")] impl WhpVm { - /// Hardcoded timer interrupt vector. The nanvix guest always - /// remaps IRQ0 to vector 0x20 via PIC ICW2, so we use that same - /// vector directly — no PIC state machine needed. - const TIMER_VECTOR: u32 = 0x20; - /// Get the LAPIC state via the bulk interrupt-controller state API. fn get_lapic_state(&self) -> windows_result::Result> { let mut state = vec![0u8; Self::LAPIC_STATE_MAX_SIZE as usize]; @@ -1160,36 +1046,8 @@ impl WhpVm { /// delivers through the LAPIC and the guest only acknowledges via PIC. fn do_lapic_eoi(&self) { if let Ok(mut state) = self.get_lapic_state() { - // ISR is at offset 0x100, 8 x 32-bit words (one per 16 bytes). - // Scan from highest priority (ISR[7]) to lowest (ISR[0]). - for i in (0u32..8).rev() { - let offset = 0x100 + (i as usize) * 0x10; - let isr_val = Self::read_lapic_u32(&state, offset); - if isr_val != 0 { - let bit = 31 - isr_val.leading_zeros(); - Self::write_lapic_u32(&mut state, offset, isr_val & !(1u32 << bit)); - let _ = self.set_lapic_state(&state); - break; - } - } - } - } - - /// Handle a hardware-interrupt IO IN request. - /// Returns `Some(value)` if the port was handled, `None` if the - /// port should be passed through to the guest handler. - /// - /// No PIC state machine — PIC data ports return 0xFF (all masked), - /// PIC command ports return 0 (no pending IRQ), PIT returns 0. - fn handle_hw_io_in(&self, port: u16) -> Option { - match port { - // PIC master/slave data ports — return "all masked" - 0x21 | 0xA1 => Some(0xFF), - // PIC master/slave command ports — return 0 (ISR/IRR read) - 0x20 | 0xA0 => Some(0), - // PIT data port read — return 0 - 0x40 => Some(0), - _ => None, + super::hw_interrupts::lapic_eoi(&mut state); + let _ = self.set_lapic_state(&state); } } @@ -1201,28 +1059,19 @@ impl WhpVm { if let Some(handle) = self.timer_thread.take() { let _ = handle.join(); } - self.lapic_timer_active = false; } fn handle_hw_io_out(&mut self, port: u16, data: &[u8]) -> bool { if port == OutBAction::PvTimerConfig as u16 { if data.len() >= 4 { let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); - log::info!( - "[WHP] PvTimerConfig: period_us={period_us}, lapic_emulation={}", - self.lapic_emulation - ); // Stop any existing timer thread before (re-)configuring. self.stop_timer_thread(); - if period_us > 0 && self.lapic_emulation { - // WHP's bulk LAPIC state API does not start the LAPIC - // timer countdown (CurCount stays 0). Instead we use a - // host-side software timer thread that periodically - // injects interrupts via WHvRequestInterrupt. + if period_us > 0 { let partition_raw = self.partition.0; - let vector = Self::TIMER_VECTOR; + let vector = super::hw_interrupts::TIMER_VECTOR; let stop = Arc::new(AtomicBool::new(false)); let stop_clone = stop.clone(); let period = std::time::Duration::from_micros(period_us as u64); @@ -1251,38 +1100,12 @@ impl WhpVm { self.timer_stop = Some(stop); self.timer_thread = Some(handle); - self.lapic_timer_active = true; - log::info!( - "[WHP] Software timer started (period={period_us}us, vector={vector:#x})" - ); } } return true; } - // PIC ports (0x20, 0x21, 0xA0, 0xA1): accept as no-ops. - // No PIC state machine — we only need LAPIC EOI bridging when - // the guest sends a non-specific EOI on the master PIC command - // port (0x20, OCW2 byte with bits 7:5 = 001). - if port == 0x20 || port == 0x21 || port == 0xA0 || port == 0xA1 { - if port == 0x20 - && !data.is_empty() - && (data[0] & 0xE0) == 0x20 - && self.lapic_timer_active - { - self.do_lapic_eoi(); - } - return true; - } - if port == 0x43 || port == 0x40 { - return true; - } - if port == 0x61 { - return true; - } - if port == 0x80 { - return true; - } - false + let timer_active = self.timer_thread.is_some(); + super::hw_interrupts::handle_common_io_out(port, data, timer_active, || self.do_lapic_eoi()) } } @@ -1352,59 +1175,11 @@ mod hw_interrupt_tests { use super::*; #[test] - fn lapic_register_helpers() { + fn lapic_register_helpers_delegate() { + use crate::hypervisor::virtual_machine::hw_interrupts; let mut state = vec![0u8; 1024]; - WhpVm::write_lapic_u32(&mut state, 0xF0, 0x1FF); - assert_eq!(WhpVm::read_lapic_u32(&state, 0xF0), 0x1FF); - - WhpVm::write_lapic_u32(&mut state, 0x320, 0x0002_0020); - assert_eq!(WhpVm::read_lapic_u32(&state, 0x320), 0x0002_0020); - } - - #[test] - fn lapic_lvt_timer_periodic() { - // LVT Timer: vector 0x20, periodic (bit 17), unmasked - let vector: u32 = 0x20; - let lvt = vector | (1 << 17); - assert_eq!(lvt & 0xFF, 0x20, "vector should be 0x20"); - assert_ne!(lvt & (1 << 17), 0, "periodic bit should be set"); - assert_eq!(lvt & (1 << 16), 0, "mask bit should be clear"); - } - - #[test] - fn lapic_timer_initial_count() { - // Hyper-V LAPIC timer is 10 MHz with divide-by-1. - // 1 tick = 100 ns, so period_us * 10 = initial count. - let period_us: u32 = 1000; // 1 ms - let initial_count = period_us * 10; - assert_eq!(initial_count, 10_000, "1ms should be 10000 ticks"); - - let period_us: u32 = 100; // 100 us - let initial_count = period_us * 10; - assert_eq!(initial_count, 1_000, "100us should be 1000 ticks"); - } - - #[test] - fn lapic_svr_value() { - let svr: u64 = 0x1FF; - assert_ne!(svr & (1 << 8), 0, "APIC should be enabled"); - assert_eq!(svr & 0xFF, 0xFF, "spurious vector should be 0xFF"); - } - - #[test] - fn apic_base_re_enable_value() { - // When re-enabling: base 0xFEE00000 + BSP (bit 8) + enable (bit 11) - let apic_base: u64 = 0xFEE00900; - assert_ne!(apic_base & (1 << 8), 0, "BSP flag"); - assert_ne!(apic_base & (1 << 11), 0, "global enable"); - assert_eq!(apic_base & 0xFFFFF000, 0xFEE00000, "base address"); - } - - #[test] - fn lapic_divide_config() { - // 0x0B = divide-by-1 - let divide: u32 = 0x0B; - assert_eq!(divide, 0x0B, "divide config should be 0x0B (divide-by-1)"); + hw_interrupts::write_lapic_u32(&mut state, 0xF0, 0x1FF); + assert_eq!(hw_interrupts::read_lapic_u32(&state, 0xF0), 0x1FF); } #[test] @@ -1424,13 +1199,10 @@ mod hw_interrupt_tests { ); let raw = unsafe { capability.Features.AsUINT64 }; let has_lapic = raw & (1 << 1) != 0; // bit 1 = LocalApicEmulation - println!("WHP Features raw: {raw:#018x}"); - println!("WHP LocalApicEmulation supported: {has_lapic}"); assert!( has_lapic, "This host does not support WHP LocalApicEmulation. \ - hw-interrupts LAPIC timer tests will be skipped. \ - Windows 11 22H2+ or a recent Windows Server build is typically required." + hw-interrupts requires Windows 11 22H2+ or a recent Windows Server build." ); } } From fd5f2954feaf8b960c635738dd0c02aeda058998 Mon Sep 17 00:00:00 2001 From: danbugs Date: Sat, 7 Mar 2026 08:32:54 +0000 Subject: [PATCH 14/37] fix: reset timer_stop flag before spawning timer thread During evolve(), the guest init sequence halts via OutBAction::Halt, which sets timer_stop=true. When the guest later configures a timer via PvTimerConfig, the timer thread inherits the already-true stop flag and exits immediately without ever firing. Reset the flag to false right before spawning the timer thread in both KVM and MSHV backends. WHP was not affected because it creates a fresh AtomicBool each time. Signed-off-by: danbugs --- .../src/hypervisor/virtual_machine/kvm/x86_64.rs | 3 +++ .../src/hypervisor/virtual_machine/mshv/x86_64.rs | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index 79484aa84..e4c556715 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -260,6 +260,9 @@ impl VirtualMachine for KvmVm { if let Ok(bytes) = data[..4].try_into() { let period_us = u32::from_le_bytes(bytes) as u64; if period_us > 0 && self.timer_thread.is_none() { + // Reset the stop flag — a previous halt (e.g. the + // init halt during evolve()) may have set it. + self.timer_stop.store(false, Ordering::Relaxed); let eventfd = self.timer_irq_eventfd.try_clone().map_err(|e| { RunVcpuError::Unknown(HypervisorError::KvmError(e.into())) })?; diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs index 9d238b081..ee1ea7db2 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs @@ -639,6 +639,9 @@ impl MshvVm { if data.len() >= 4 { let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); if period_us > 0 && self.timer_thread.is_none() { + // Reset the stop flag — a previous halt (e.g. the + // init halt during evolve()) may have set it. + self.timer_stop.store(false, Ordering::Relaxed); // Re-enable LAPIC if the guest disabled it (via WRMSR // to MSR 0x1B clearing bit 11). Some guests clear // the global APIC enable when no I/O APIC is detected. From ff96cecc26e01d01385a1dd7722fd0b874d4485a Mon Sep 17 00:00:00 2001 From: danbugs Date: Tue, 10 Mar 2026 06:27:46 +0000 Subject: [PATCH 15/37] Address copilot review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - KVM: data[..4] → data.get(..4) to avoid panic on short IO OUT - KVM/MSHV: stop timer thread when period_us == 0 - WHP: check LAPIC state buffer >= 0x374 before init_lapic_registers - hw_interrupts: add bounds checks + panic messages to write/read_lapic_u32 - init.rs/dispatch.rs: use xor eax,eax; out dx,eax; cli; hlt halt sequence - simpleguest: guard IDT vector 0x20 against short IDT limit - simpleguest: early return on period_us <= 0 Signed-off-by: danbugs --- .../src/arch/amd64/dispatch.rs | 5 +- .../src/arch/amd64/init.rs | 5 +- .../virtual_machine/hw_interrupts.rs | 36 ++++++++++++++- .../hypervisor/virtual_machine/kvm/x86_64.rs | 46 +++++++++++-------- .../hypervisor/virtual_machine/mshv/x86_64.rs | 8 +++- .../src/hypervisor/virtual_machine/whp.rs | 10 ++++ src/tests/rust_guests/simpleguest/src/main.rs | 13 +++++- 7 files changed, 99 insertions(+), 24 deletions(-) diff --git a/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs index a39de9020..3f851a36c 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs @@ -69,6 +69,9 @@ core::arch::global_asm!(" flush_done: call {internal_dispatch_function}\n mov dx, 108\n - out dx, al\n + xor eax, eax\n + out dx, eax\n + cli\n + hlt\n .cfi_endproc ", internal_dispatch_function = sym crate::guest_function::call::internal_dispatch_function); diff --git a/src/hyperlight_guest_bin/src/arch/amd64/init.rs b/src/hyperlight_guest_bin/src/arch/amd64/init.rs index bfa458059..72edb952a 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/init.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/init.rs @@ -174,6 +174,9 @@ core::arch::global_asm!(" xor ebp, ebp\n call {generic_init}\n mov dx, 108\n - out dx, al\n + xor eax, eax\n + out dx, eax\n + cli\n + hlt\n .cfi_endproc\n ", generic_init = sym crate::generic_init); diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs index e20718fce..3b03c00f5 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs @@ -86,12 +86,32 @@ pub(crate) fn handle_common_io_out( // --------------------------------------------------------------------------- /// Write a u32 to a LAPIC register page at the given APIC offset. +/// +/// # Panics +/// Panics if `offset + 4 > state.len()`. pub(crate) fn write_lapic_u32(state: &mut [u8], offset: usize, val: u32) { - state[offset..offset + 4].copy_from_slice(&val.to_le_bytes()); + let end = offset + 4; + assert!( + end <= state.len(), + "write_lapic_u32: offset {:#x} out of bounds (state len {:#x})", + offset, + state.len() + ); + state[offset..end].copy_from_slice(&val.to_le_bytes()); } /// Read a u32 from a LAPIC register page at the given APIC offset. +/// +/// # Panics +/// Panics if `offset + 4 > state.len()`. pub(crate) fn read_lapic_u32(state: &[u8], offset: usize) -> u32 { + let end = offset + 4; + assert!( + end <= state.len(), + "read_lapic_u32: offset {:#x} out of bounds (state len {:#x})", + offset, + state.len() + ); u32::from_le_bytes([ state[offset], state[offset + 1], @@ -250,4 +270,18 @@ mod tests { fn handle_common_io_out_unknown_port() { assert!(!handle_common_io_out(0x100, &[], false, || {})); } + + #[test] + #[should_panic(expected = "out of bounds")] + fn write_lapic_u32_panics_on_short_buffer() { + let mut state = vec![0u8; 4]; + write_lapic_u32(&mut state, 2, 0xDEAD); // offset 2 + 4 = 6 > 4 + } + + #[test] + #[should_panic(expected = "out of bounds")] + fn read_lapic_u32_panics_on_short_buffer() { + let state = vec![0u8; 4]; + read_lapic_u32(&state, 2); // offset 2 + 4 = 6 > 4 + } } diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index e4c556715..0ddf0f327 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -257,26 +257,34 @@ impl VirtualMachine for KvmVm { if port == OutBAction::PvTimerConfig as u16 { // The guest is configuring the timer period. // Extract the period in microseconds (LE u32). - if let Ok(bytes) = data[..4].try_into() { - let period_us = u32::from_le_bytes(bytes) as u64; - if period_us > 0 && self.timer_thread.is_none() { - // Reset the stop flag — a previous halt (e.g. the - // init halt during evolve()) may have set it. - self.timer_stop.store(false, Ordering::Relaxed); - let eventfd = self.timer_irq_eventfd.try_clone().map_err(|e| { - RunVcpuError::Unknown(HypervisorError::KvmError(e.into())) - })?; - let stop = self.timer_stop.clone(); - let period = std::time::Duration::from_micros(period_us); - self.timer_thread = Some(std::thread::spawn(move || { - while !stop.load(Ordering::Relaxed) { - std::thread::sleep(period); - if stop.load(Ordering::Relaxed) { - break; - } - let _ = eventfd.write(1); + if let Some(bytes4) = data.get(..4) { + if let Ok(bytes) = bytes4.try_into() { + let period_us = u32::from_le_bytes(bytes) as u64; + if period_us == 0 { + // Stop existing timer if any. + if let Some(thread) = self.timer_thread.take() { + self.timer_stop.store(true, Ordering::Relaxed); + let _ = thread.join(); } - })); + } else if self.timer_thread.is_none() { + // Reset the stop flag — a previous halt (e.g. the + // init halt during evolve()) may have set it. + self.timer_stop.store(false, Ordering::Relaxed); + let eventfd = self.timer_irq_eventfd.try_clone().map_err(|e| { + RunVcpuError::Unknown(HypervisorError::KvmError(e.into())) + })?; + let stop = self.timer_stop.clone(); + let period = std::time::Duration::from_micros(period_us); + self.timer_thread = Some(std::thread::spawn(move || { + while !stop.load(Ordering::Relaxed) { + std::thread::sleep(period); + if stop.load(Ordering::Relaxed) { + break; + } + let _ = eventfd.write(1); + } + })); + } } } continue; diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs index ee1ea7db2..2887c6468 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs @@ -638,7 +638,13 @@ impl MshvVm { if port == OutBAction::PvTimerConfig as u16 { if data.len() >= 4 { let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); - if period_us > 0 && self.timer_thread.is_none() { + if period_us == 0 { + // Stop existing timer if any. + if let Some(thread) = self.timer_thread.take() { + self.timer_stop.store(true, Ordering::Relaxed); + let _ = thread.join(); + } + } else if self.timer_thread.is_none() { // Reset the stop flag — a previous halt (e.g. the // init halt during evolve()) may have set it. self.timer_stop.store(false, Ordering::Relaxed); diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index e4612bccf..7516f1d2e 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -231,6 +231,16 @@ impl WhpVm { } state.truncate(written as usize); + // init_lapic_registers writes up to offset 0x374 (LVT Error at 0x370). + // Bail out if the buffer returned by WHP is too small. + const MIN_LAPIC_STATE_SIZE: usize = 0x374; + if state.len() < MIN_LAPIC_STATE_SIZE { + return Err(windows_result::Error::new( + HRESULT::from_win32(0x32), // ERROR_NOT_SUPPORTED + "WHP LAPIC state buffer is too small for init_lapic_registers", + )); + } + super::hw_interrupts::init_lapic_registers(&mut state); unsafe { diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index 2b694be42..1cb21c252 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -586,8 +586,16 @@ fn test_timer_interrupts(period_us: i32, max_spin: i32) -> i32 { ); } + // Vector 0x20 needs bytes at offset 0x200..0x210 (16-byte entry). + // Ensure the IDT is large enough. + const VECTOR: usize = 0x20; + let required_end = (VECTOR + 1) * 16; // byte just past the entry + if (idtr.limit as usize + 1) < required_end { + return -1; // IDT too small + } + // Write a 16-byte IDT entry at vector 0x20 (offset = 0x20 * 16 = 0x200) - let entry_ptr = (idtr.base as usize + 0x20 * 16) as *mut u8; + let entry_ptr = (idtr.base as usize + VECTOR * 16) as *mut u8; unsafe { // offset_low (bits 0-15 of handler) core::ptr::write_volatile(entry_ptr as *mut u16, handler_addr as u16); @@ -609,6 +617,9 @@ fn test_timer_interrupts(period_us: i32, max_spin: i32) -> i32 { // Divisor = period_us * 1_193_182 / 1_000_000 (PIT oscillator is 1.193182 MHz). // On KVM the in-kernel PIT handles these IO writes directly. // On MSHV/WHP these ports are silently absorbed (timer is set via OutBAction::PvTimerConfig). + if period_us <= 0 { + return -1; // invalid period + } let divisor = ((period_us as u64) * 1_193_182 / 1_000_000).clamp(1, 0xFFFF) as u16; unsafe { // Command: channel 0, lobyte/hibyte access, mode 2 (rate generator) From 329dee2d2e477b7d3f9bb0a2a1dadc7354490cb5 Mon Sep 17 00:00:00 2001 From: danbugs Date: Tue, 10 Mar 2026 15:14:28 +0000 Subject: [PATCH 16/37] Fix formatting (rustfmt nightly) Signed-off-by: danbugs --- .../src/hypervisor/virtual_machine/kvm/x86_64.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index 0ddf0f327..2c01a227f 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -270,9 +270,12 @@ impl VirtualMachine for KvmVm { // Reset the stop flag — a previous halt (e.g. the // init halt during evolve()) may have set it. self.timer_stop.store(false, Ordering::Relaxed); - let eventfd = self.timer_irq_eventfd.try_clone().map_err(|e| { - RunVcpuError::Unknown(HypervisorError::KvmError(e.into())) - })?; + let eventfd = + self.timer_irq_eventfd.try_clone().map_err(|e| { + RunVcpuError::Unknown(HypervisorError::KvmError( + e.into(), + )) + })?; let stop = self.timer_stop.clone(); let period = std::time::Duration::from_micros(period_us); self.timer_thread = Some(std::thread::spawn(move || { From b954562b55c42b45aea26d38f817dd5c75c8a727 Mon Sep 17 00:00:00 2001 From: danbugs Date: Tue, 10 Mar 2026 15:32:33 +0000 Subject: [PATCH 17/37] Fix clippy collapsible-if in KVM timer config Signed-off-by: Daniil Baturin Signed-off-by: danbugs --- .../hypervisor/virtual_machine/kvm/x86_64.rs | 57 +++++++++---------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index 2c01a227f..35daf75ae 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -257,37 +257,34 @@ impl VirtualMachine for KvmVm { if port == OutBAction::PvTimerConfig as u16 { // The guest is configuring the timer period. // Extract the period in microseconds (LE u32). - if let Some(bytes4) = data.get(..4) { - if let Ok(bytes) = bytes4.try_into() { - let period_us = u32::from_le_bytes(bytes) as u64; - if period_us == 0 { - // Stop existing timer if any. - if let Some(thread) = self.timer_thread.take() { - self.timer_stop.store(true, Ordering::Relaxed); - let _ = thread.join(); - } - } else if self.timer_thread.is_none() { - // Reset the stop flag — a previous halt (e.g. the - // init halt during evolve()) may have set it. - self.timer_stop.store(false, Ordering::Relaxed); - let eventfd = - self.timer_irq_eventfd.try_clone().map_err(|e| { - RunVcpuError::Unknown(HypervisorError::KvmError( - e.into(), - )) - })?; - let stop = self.timer_stop.clone(); - let period = std::time::Duration::from_micros(period_us); - self.timer_thread = Some(std::thread::spawn(move || { - while !stop.load(Ordering::Relaxed) { - std::thread::sleep(period); - if stop.load(Ordering::Relaxed) { - break; - } - let _ = eventfd.write(1); - } - })); + if let Some(bytes4) = data.get(..4) + && let Ok(bytes) = bytes4.try_into() + { + let period_us = u32::from_le_bytes(bytes) as u64; + if period_us == 0 { + // Stop existing timer if any. + if let Some(thread) = self.timer_thread.take() { + self.timer_stop.store(true, Ordering::Relaxed); + let _ = thread.join(); } + } else if self.timer_thread.is_none() { + // Reset the stop flag — a previous halt (e.g. the + // init halt during evolve()) may have set it. + self.timer_stop.store(false, Ordering::Relaxed); + let eventfd = self.timer_irq_eventfd.try_clone().map_err(|e| { + RunVcpuError::Unknown(HypervisorError::KvmError(e.into())) + })?; + let stop = self.timer_stop.clone(); + let period = std::time::Duration::from_micros(period_us); + self.timer_thread = Some(std::thread::spawn(move || { + while !stop.load(Ordering::Relaxed) { + std::thread::sleep(period); + if stop.load(Ordering::Relaxed) { + break; + } + let _ = eventfd.write(1); + } + })); } } continue; From 9127f9d602178587b37a29982787cd8cb491fb2c Mon Sep 17 00:00:00 2001 From: danbugs Date: Tue, 10 Mar 2026 16:30:54 +0000 Subject: [PATCH 18/37] fix: remove disallowed assert! macros from LAPIC helpers The hyperlight-host clippy.toml disallows assert!/assert_eq!/assert_ne! in non-test code. Revert to natural slice bounds checking which already panics on out-of-bounds access. Signed-off-by: danbugs --- .../virtual_machine/hw_interrupts.rs | 20 +++---------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs index 3b03c00f5..2d339af5f 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs @@ -90,14 +90,7 @@ pub(crate) fn handle_common_io_out( /// # Panics /// Panics if `offset + 4 > state.len()`. pub(crate) fn write_lapic_u32(state: &mut [u8], offset: usize, val: u32) { - let end = offset + 4; - assert!( - end <= state.len(), - "write_lapic_u32: offset {:#x} out of bounds (state len {:#x})", - offset, - state.len() - ); - state[offset..end].copy_from_slice(&val.to_le_bytes()); + state[offset..offset + 4].copy_from_slice(&val.to_le_bytes()); } /// Read a u32 from a LAPIC register page at the given APIC offset. @@ -105,13 +98,6 @@ pub(crate) fn write_lapic_u32(state: &mut [u8], offset: usize, val: u32) { /// # Panics /// Panics if `offset + 4 > state.len()`. pub(crate) fn read_lapic_u32(state: &[u8], offset: usize) -> u32 { - let end = offset + 4; - assert!( - end <= state.len(), - "read_lapic_u32: offset {:#x} out of bounds (state len {:#x})", - offset, - state.len() - ); u32::from_le_bytes([ state[offset], state[offset + 1], @@ -272,14 +258,14 @@ mod tests { } #[test] - #[should_panic(expected = "out of bounds")] + #[should_panic] fn write_lapic_u32_panics_on_short_buffer() { let mut state = vec![0u8; 4]; write_lapic_u32(&mut state, 2, 0xDEAD); // offset 2 + 4 = 6 > 4 } #[test] - #[should_panic(expected = "out of bounds")] + #[should_panic] fn read_lapic_u32_panics_on_short_buffer() { let state = vec![0u8; 4]; read_lapic_u32(&state, 2); // offset 2 + 4 = 6 > 4 From 6dbf3e7b23c93b29dd0ce1ad8b6438b845ed518c Mon Sep 17 00:00:00 2001 From: danbugs Date: Tue, 10 Mar 2026 16:35:48 +0000 Subject: [PATCH 19/37] refactor: extract hw-interrupts and default vCPU run into separate methods Split the two cfg-gated code paths in run_vcpu into run_vcpu_hw_interrupts and run_vcpu_default helper methods on KvmVm, keeping run_vcpu as a thin dispatcher. This makes each path easier to read and reason about independently. Signed-off-by: danbugs --- .../hypervisor/virtual_machine/kvm/x86_64.rs | 99 +++++++++++-------- 1 file changed, 56 insertions(+), 43 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index 35daf75ae..280c02cec 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -195,49 +195,17 @@ impl KvmVm { debug_regs: kvm_guest_debug::default(), }) } -} - -impl VirtualMachine for KvmVm { - unsafe fn map_memory( - &mut self, - (slot, region): (u32, &MemoryRegion), - ) -> std::result::Result<(), MapMemoryError> { - let mut kvm_region: kvm_userspace_memory_region = region.into(); - kvm_region.slot = slot; - unsafe { self.vm_fd.set_user_memory_region(kvm_region) } - .map_err(|e| MapMemoryError::Hypervisor(e.into())) - } - - fn unmap_memory( - &mut self, - (slot, region): (u32, &MemoryRegion), - ) -> std::result::Result<(), UnmapMemoryError> { - let mut kvm_region: kvm_userspace_memory_region = region.into(); - kvm_region.slot = slot; - // Setting memory_size to 0 unmaps the slot's region - // From https://docs.kernel.org/virt/kvm/api.html - // > Deleting a slot is done by passing zero for memory_size. - kvm_region.memory_size = 0; - unsafe { self.vm_fd.set_user_memory_region(kvm_region) } - .map_err(|e| UnmapMemoryError::Hypervisor(e.into())) - } - - fn run_vcpu( - &mut self, - #[cfg(feature = "trace_guest")] tc: &mut SandboxTraceContext, - ) -> std::result::Result { - // setup_trace_guest must be called right before vcpu_run.run() call, because - // it sets the guest span, no other traces or spans must be setup in between these calls. - #[cfg(feature = "trace_guest")] - tc.setup_guest_trace(Span::current().context()); - // When hw-interrupts is enabled, the in-kernel PIC + LAPIC deliver - // interrupts triggered by the host-side timer thread via irqfd. - // There is no in-kernel PIT; guest PIT port writes are no-ops. - // The guest signals "I'm done" by writing to OutBAction::Halt - // (an IO port exit) instead of using HLT, because the in-kernel - // LAPIC absorbs HLT (never returns VcpuExit::Hlt to userspace). - #[cfg(feature = "hw-interrupts")] + /// Run the vCPU loop with hardware interrupt support. + /// + /// When hw-interrupts is enabled, the in-kernel PIC + LAPIC deliver + /// interrupts triggered by the host-side timer thread via irqfd. + /// There is no in-kernel PIT; guest PIT port writes are no-ops. + /// The guest signals "I'm done" by writing to OutBAction::Halt + /// (an IO port exit) instead of using HLT, because the in-kernel + /// LAPIC absorbs HLT (never returns VcpuExit::Hlt to userspace). + #[cfg(feature = "hw-interrupts")] + fn run_vcpu_hw_interrupts(&mut self) -> std::result::Result { loop { match self.vcpu_fd.run() { Ok(VcpuExit::Hlt) => { @@ -318,8 +286,11 @@ impl VirtualMachine for KvmVm { } } } + } - #[cfg(not(feature = "hw-interrupts"))] + /// Run the vCPU once without hardware interrupt support (default path). + #[cfg(not(feature = "hw-interrupts"))] + fn run_vcpu_default(&mut self) -> std::result::Result { match self.vcpu_fd.run() { Ok(VcpuExit::Hlt) => Ok(VmExit::Halt()), Ok(VcpuExit::IoOut(port, _)) if port == OutBAction::Halt as u16 => Ok(VmExit::Halt()), @@ -343,6 +314,48 @@ impl VirtualMachine for KvmVm { ))), } } +} + +impl VirtualMachine for KvmVm { + unsafe fn map_memory( + &mut self, + (slot, region): (u32, &MemoryRegion), + ) -> std::result::Result<(), MapMemoryError> { + let mut kvm_region: kvm_userspace_memory_region = region.into(); + kvm_region.slot = slot; + unsafe { self.vm_fd.set_user_memory_region(kvm_region) } + .map_err(|e| MapMemoryError::Hypervisor(e.into())) + } + + fn unmap_memory( + &mut self, + (slot, region): (u32, &MemoryRegion), + ) -> std::result::Result<(), UnmapMemoryError> { + let mut kvm_region: kvm_userspace_memory_region = region.into(); + kvm_region.slot = slot; + // Setting memory_size to 0 unmaps the slot's region + // From https://docs.kernel.org/virt/kvm/api.html + // > Deleting a slot is done by passing zero for memory_size. + kvm_region.memory_size = 0; + unsafe { self.vm_fd.set_user_memory_region(kvm_region) } + .map_err(|e| UnmapMemoryError::Hypervisor(e.into())) + } + + fn run_vcpu( + &mut self, + #[cfg(feature = "trace_guest")] tc: &mut SandboxTraceContext, + ) -> std::result::Result { + // setup_trace_guest must be called right before vcpu_run.run() call, because + // it sets the guest span, no other traces or spans must be setup in between these calls. + #[cfg(feature = "trace_guest")] + tc.setup_guest_trace(Span::current().context()); + + #[cfg(feature = "hw-interrupts")] + return self.run_vcpu_hw_interrupts(); + + #[cfg(not(feature = "hw-interrupts"))] + self.run_vcpu_default() + } fn regs(&self) -> std::result::Result { let kvm_regs = self From 6be5f5d55c63a5efa85817fc595a706220151a7f Mon Sep 17 00:00:00 2001 From: danbugs Date: Tue, 10 Mar 2026 17:29:09 +0000 Subject: [PATCH 20/37] fix: preserve RAX across halt sequence in init and dispatch The host reads RAX after the init halt to obtain the dispatch_function address (stored by generic_init's return value). Similarly, the host reads RAX after dispatch to get the guest function return value. The previous 'xor eax, eax' before 'out dx, eax' zeroed RAX, causing the host to store NextAction::Call(0) and subsequent guest calls to jump to address 0 (triggering Double Fault on MSHV/WHP). Remove the xor; the port-108 halt signal only needs the port number, not a specific data value. Signed-off-by: danbugs --- src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs | 1 - src/hyperlight_guest_bin/src/arch/amd64/init.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs index 3f851a36c..117fa42e6 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs @@ -69,7 +69,6 @@ core::arch::global_asm!(" flush_done: call {internal_dispatch_function}\n mov dx, 108\n - xor eax, eax\n out dx, eax\n cli\n hlt\n diff --git a/src/hyperlight_guest_bin/src/arch/amd64/init.rs b/src/hyperlight_guest_bin/src/arch/amd64/init.rs index 72edb952a..c3037b6aa 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/init.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/init.rs @@ -174,7 +174,6 @@ core::arch::global_asm!(" xor ebp, ebp\n call {generic_init}\n mov dx, 108\n - xor eax, eax\n out dx, eax\n cli\n hlt\n From b7dab7ce361302fe900607c1b0fa3156b2826811 Mon Sep 17 00:00:00 2001 From: danbugs Date: Fri, 13 Mar 2026 19:56:24 +0000 Subject: [PATCH 21/37] refactor: split OutBAction into OutBAction + VmAction, move hw_interrupts to x86_64/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Split OutBAction enum: PvTimerConfig and Halt moved to new VmAction enum. VmAction ports are intercepted at the hypervisor level in run_vcpu and never reach the sandbox outb handler, so the split eliminates unreachable match arms. - Move hw_interrupts.rs to virtual_machine/x86_64/hw_interrupts.rs since it contains x86-64 specific helpers (LAPIC, PIC, PIT). - Remove halt() from hyperlight_guest::exit — all halt sites use inline assembly with options(noreturn) to avoid stack epilogue issues. - Extract handle_pv_timer_config() method in KVM backend. - Remove intermediate variable in WhpVm::new(). - Restore create_vm_with_args() comment in MSHV backend. - Add memory fence after IDT writes in simpleguest for CoW safety. Signed-off-by: danbugs --- src/hyperlight_common/src/outb.rs | 21 ++++- src/hyperlight_guest/src/exit.rs | 28 ------ .../hypervisor/virtual_machine/kvm/x86_64.rs | 85 ++++++++++--------- .../src/hypervisor/virtual_machine/mod.rs | 4 +- .../hypervisor/virtual_machine/mshv/x86_64.rs | 34 +++++--- .../src/hypervisor/virtual_machine/whp.rs | 34 +++++--- .../{ => x86_64}/hw_interrupts.rs | 6 +- .../hypervisor/virtual_machine/x86_64/mod.rs | 18 ++++ src/hyperlight_host/src/sandbox/outb.rs | 5 -- src/hyperlight_host/tests/integration_test.rs | 4 +- src/tests/rust_guests/dummyguest/src/main.rs | 2 +- src/tests/rust_guests/simpleguest/src/main.rs | 17 ++-- 12 files changed, 140 insertions(+), 118 deletions(-) rename src/hyperlight_host/src/hypervisor/virtual_machine/{ => x86_64}/hw_interrupts.rs (98%) create mode 100644 src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/mod.rs diff --git a/src/hyperlight_common/src/outb.rs b/src/hyperlight_common/src/outb.rs index 9cba7f4a7..3bfb99848 100644 --- a/src/hyperlight_common/src/outb.rs +++ b/src/hyperlight_common/src/outb.rs @@ -86,6 +86,7 @@ impl TryFrom for Exception { } /// Supported actions when issuing an OUTB actions by Hyperlight. +/// These are handled by the sandbox-level outb dispatcher. /// - Log: for logging, /// - CallFunction: makes a call to a host function, /// - Abort: aborts the execution of the guest, @@ -104,6 +105,13 @@ pub enum OutBAction { TraceMemoryAlloc = 105, #[cfg(feature = "mem_profile")] TraceMemoryFree = 106, +} + +/// IO-port actions intercepted at the hypervisor level (in `run_vcpu`) +/// before they ever reach the sandbox outb handler. These are split +/// from [`OutBAction`] so the outb handler does not need unreachable +/// match arms for ports it can never see. +pub enum VmAction { /// IO port for PV timer configuration. The guest writes a 32-bit /// LE value representing the desired timer period in microseconds. /// A value of 0 disables the timer. @@ -129,9 +137,18 @@ impl TryFrom for OutBAction { 105 => Ok(OutBAction::TraceMemoryAlloc), #[cfg(feature = "mem_profile")] 106 => Ok(OutBAction::TraceMemoryFree), - 107 => Ok(OutBAction::PvTimerConfig), - 108 => Ok(OutBAction::Halt), _ => Err(anyhow::anyhow!("Invalid OutBAction value: {}", val)), } } } + +impl TryFrom for VmAction { + type Error = anyhow::Error; + fn try_from(val: u16) -> anyhow::Result { + match val { + 107 => Ok(VmAction::PvTimerConfig), + 108 => Ok(VmAction::Halt), + _ => Err(anyhow::anyhow!("Invalid VmAction value: {}", val)), + } + } +} diff --git a/src/hyperlight_guest/src/exit.rs b/src/hyperlight_guest/src/exit.rs index 1926340a4..e8d58e24e 100644 --- a/src/hyperlight_guest/src/exit.rs +++ b/src/hyperlight_guest/src/exit.rs @@ -18,34 +18,6 @@ use core::arch::asm; use core::ffi::{CStr, c_char}; use hyperlight_common::outb::OutBAction; -use tracing::instrument; - -/// Signal successful completion of the guest's work and return control -/// to the host. This replaces the previous `hlt`-based exit: under the -/// `hw-interrupts` feature, `hlt` becomes a wait-for-interrupt (the -/// in-kernel IRQ chip wakes the vCPU), so we use an explicit IO-port -/// write (port 108) to trigger a VM exit that the host treats as a -/// clean shutdown. -/// -/// This function never returns — the host does not re-enter the guest -/// after seeing the Halt port. -#[inline(never)] -#[instrument(skip_all, level = "Trace")] -pub fn halt() { - #[cfg(all(feature = "trace_guest", target_arch = "x86_64"))] - { - // Send data before halting - // If there is no data, this doesn't do anything - // The reason we do this here instead of in `hlt` asm function - // is to avoid allocating before halting, which leaks memory - // because the guest is not expected to resume execution after halting. - hyperlight_guest_tracing::flush(); - } - - unsafe { - out32(OutBAction::Halt as u16, 0); - } -} /// Exits the VM with an Abort OUT action and code 0. #[unsafe(no_mangle)] diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index 280c02cec..1dd80d018 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -20,7 +20,7 @@ use std::sync::LazyLock; #[cfg(feature = "hw-interrupts")] use std::sync::atomic::{AtomicBool, Ordering}; -use hyperlight_common::outb::OutBAction; +use hyperlight_common::outb::VmAction; #[cfg(gdb)] use kvm_bindings::kvm_guest_debug; use kvm_bindings::{ @@ -135,7 +135,7 @@ impl KvmVm { // // Instead of creating an in-kernel PIT (create_pit2), we use a // host-side timer thread + irqfd to inject IRQ0 at the rate - // requested by the guest via OutBAction::PvTimerConfig (port 107). + // requested by the guest via VmAction::PvTimerConfig (port 107). // This eliminates the in-kernel PIT device. Guest PIT port writes // (0x40, 0x43) become no-ops handled in the run loop. #[cfg(feature = "hw-interrupts")] @@ -201,7 +201,7 @@ impl KvmVm { /// When hw-interrupts is enabled, the in-kernel PIC + LAPIC deliver /// interrupts triggered by the host-side timer thread via irqfd. /// There is no in-kernel PIT; guest PIT port writes are no-ops. - /// The guest signals "I'm done" by writing to OutBAction::Halt + /// The guest signals "I'm done" by writing to VmAction::Halt /// (an IO port exit) instead of using HLT, because the in-kernel /// LAPIC absorbs HLT (never returns VcpuExit::Hlt to userspace). #[cfg(feature = "hw-interrupts")] @@ -214,7 +214,7 @@ impl KvmVm { continue; } Ok(VcpuExit::IoOut(port, data)) => { - if port == OutBAction::Halt as u16 { + if port == VmAction::Halt as u16 { // Stop the timer thread before returning. self.timer_stop.store(true, Ordering::Relaxed); if let Some(h) = self.timer_thread.take() { @@ -222,39 +222,12 @@ impl KvmVm { } return Ok(VmExit::Halt()); } - if port == OutBAction::PvTimerConfig as u16 { - // The guest is configuring the timer period. - // Extract the period in microseconds (LE u32). - if let Some(bytes4) = data.get(..4) - && let Ok(bytes) = bytes4.try_into() - { - let period_us = u32::from_le_bytes(bytes) as u64; - if period_us == 0 { - // Stop existing timer if any. - if let Some(thread) = self.timer_thread.take() { - self.timer_stop.store(true, Ordering::Relaxed); - let _ = thread.join(); - } - } else if self.timer_thread.is_none() { - // Reset the stop flag — a previous halt (e.g. the - // init halt during evolve()) may have set it. - self.timer_stop.store(false, Ordering::Relaxed); - let eventfd = self.timer_irq_eventfd.try_clone().map_err(|e| { - RunVcpuError::Unknown(HypervisorError::KvmError(e.into())) - })?; - let stop = self.timer_stop.clone(); - let period = std::time::Duration::from_micros(period_us); - self.timer_thread = Some(std::thread::spawn(move || { - while !stop.load(Ordering::Relaxed) { - std::thread::sleep(period); - if stop.load(Ordering::Relaxed) { - break; - } - let _ = eventfd.write(1); - } - })); - } - } + if port == VmAction::PvTimerConfig as u16 { + let data_copy: [u8; 4] = data + .get(..4) + .and_then(|s| s.try_into().ok()) + .unwrap_or([0; 4]); + self.handle_pv_timer_config(&data_copy)?; continue; } // PIT ports (0x40-0x43): no in-kernel PIT, so these @@ -288,12 +261,44 @@ impl KvmVm { } } + #[cfg(feature = "hw-interrupts")] + fn handle_pv_timer_config(&mut self, data: &[u8; 4]) -> std::result::Result<(), RunVcpuError> { + let period_us = u32::from_le_bytes(*data) as u64; + if period_us == 0 { + // Stop existing timer if any. + if let Some(thread) = self.timer_thread.take() { + self.timer_stop.store(true, Ordering::Relaxed); + let _ = thread.join(); + } + } else if self.timer_thread.is_none() { + // Reset the stop flag — a previous halt (e.g. the + // init halt during evolve()) may have set it. + self.timer_stop.store(false, Ordering::Relaxed); + let eventfd = self + .timer_irq_eventfd + .try_clone() + .map_err(|e| RunVcpuError::Unknown(HypervisorError::KvmError(e.into())))?; + let stop = self.timer_stop.clone(); + let period = std::time::Duration::from_micros(period_us); + self.timer_thread = Some(std::thread::spawn(move || { + while !stop.load(Ordering::Relaxed) { + std::thread::sleep(period); + if stop.load(Ordering::Relaxed) { + break; + } + let _ = eventfd.write(1); + } + })); + } + Ok(()) + } + /// Run the vCPU once without hardware interrupt support (default path). #[cfg(not(feature = "hw-interrupts"))] fn run_vcpu_default(&mut self) -> std::result::Result { match self.vcpu_fd.run() { Ok(VcpuExit::Hlt) => Ok(VmExit::Halt()), - Ok(VcpuExit::IoOut(port, _)) if port == OutBAction::Halt as u16 => Ok(VmExit::Halt()), + Ok(VcpuExit::IoOut(port, _)) if port == VmAction::Halt as u16 => Ok(VmExit::Halt()), Ok(VcpuExit::IoOut(port, data)) => Ok(VmExit::IoOut(port, data.to_vec())), Ok(VcpuExit::MmioRead(addr, _)) => Ok(VmExit::MmioRead(addr)), Ok(VcpuExit::MmioWrite(addr, _)) => Ok(VmExit::MmioWrite(addr)), @@ -609,8 +614,8 @@ mod hw_interrupt_tests { #[test] fn halt_port_is_not_standard_device() { - // OutBAction::Halt port must not overlap in-kernel PIC/PIT/speaker ports - const HALT: u16 = OutBAction::Halt as u16; + // VmAction::Halt port must not overlap in-kernel PIC/PIT/speaker ports + const HALT: u16 = VmAction::Halt as u16; const _: () = assert!(HALT != 0x20 && HALT != 0x21); const _: () = assert!(HALT != 0xA0 && HALT != 0xA1); const _: () = assert!(HALT != 0x40 && HALT != 0x41 && HALT != 0x42 && HALT != 0x43); diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs index d142b21e1..9edaa1f87 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs @@ -38,9 +38,9 @@ pub(crate) mod mshv; #[cfg(target_os = "windows")] pub(crate) mod whp; -/// Shared helpers for hardware interrupt support (MSHV and WHP) +/// Shared x86-64 helpers for hardware interrupt support (MSHV and WHP) #[cfg(feature = "hw-interrupts")] -pub(crate) mod hw_interrupts; +pub(crate) mod x86_64; static AVAILABLE_HYPERVISOR: OnceLock> = OnceLock::new(); diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs index 2887c6468..96e82be9d 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs @@ -22,7 +22,7 @@ use std::sync::LazyLock; #[cfg(feature = "hw-interrupts")] use std::sync::atomic::{AtomicBool, Ordering}; -use hyperlight_common::outb::OutBAction; +use hyperlight_common::outb::VmAction; #[cfg(feature = "hw-interrupts")] use mshv_bindings::LapicState; #[cfg(gdb)] @@ -128,6 +128,8 @@ impl MshvVm { { pr.pt_flags = 1u64; // LAPIC } + // It's important to use create_vm_with_args() (not create_vm()), + // because create_vm() sets up a SynIC partition by default. let vm_fd = mshv .create_vm_with_args(&pr) .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; @@ -156,7 +158,7 @@ impl MshvVm { // interrupts can be delivered (request_virtual_interrupt would fail). #[cfg(feature = "hw-interrupts")] { - use super::hw_interrupts::init_lapic_registers; + use super::super::x86_64::hw_interrupts::init_lapic_registers; let mut lapic: LapicState = vcpu_fd .get_lapic() @@ -261,9 +263,9 @@ impl VirtualMachine for MshvVm { }]) .map_err(|e| RunVcpuError::IncrementRip(e.into()))?; - // OutBAction::Halt always means "I'm done", regardless + // VmAction::Halt always means "I'm done", regardless // of whether a timer is active. - if is_write && port_number == OutBAction::Halt as u16 { + if is_write && port_number == VmAction::Halt as u16 { // Stop the timer thread before returning. #[cfg(feature = "hw-interrupts")] { @@ -283,7 +285,7 @@ impl VirtualMachine for MshvVm { continue; } } else if let Some(val) = - super::hw_interrupts::handle_io_in(port_number) + super::super::x86_64::hw_interrupts::handle_io_in(port_number) { self.vcpu_fd .set_reg(&[hv_register_assoc { @@ -629,13 +631,13 @@ impl MshvVm { /// acknowledges via PIC. fn do_lapic_eoi(&self) { if let Ok(mut lapic) = self.vcpu_fd.get_lapic() { - super::hw_interrupts::lapic_eoi(lapic_regs_as_u8_mut(&mut lapic.regs)); + super::super::x86_64::hw_interrupts::lapic_eoi(lapic_regs_as_u8_mut(&mut lapic.regs)); let _ = self.vcpu_fd.set_lapic(&lapic); } } fn handle_hw_io_out(&mut self, port: u16, data: &[u8]) -> bool { - if port == OutBAction::PvTimerConfig as u16 { + if port == VmAction::PvTimerConfig as u16 { if data.len() >= 4 { let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); if period_us == 0 { @@ -677,17 +679,19 @@ impl MshvVm { // guest disabled the APIC globally) if let Ok(mut lapic) = self.vcpu_fd.get_lapic() { let regs = lapic_regs_as_u8(&lapic.regs); - let svr = super::hw_interrupts::read_lapic_u32(regs, 0xF0); + let svr = super::super::x86_64::hw_interrupts::read_lapic_u32(regs, 0xF0); if svr & 0x100 == 0 { let regs_mut = lapic_regs_as_u8_mut(&mut lapic.regs); - super::hw_interrupts::write_lapic_u32(regs_mut, 0xF0, 0x1FF); - super::hw_interrupts::write_lapic_u32(regs_mut, 0x80, 0); // TPR + super::super::x86_64::hw_interrupts::write_lapic_u32( + regs_mut, 0xF0, 0x1FF, + ); + super::super::x86_64::hw_interrupts::write_lapic_u32(regs_mut, 0x80, 0); // TPR let _ = self.vcpu_fd.set_lapic(&lapic); } } let vm_fd = self.vm_fd.clone(); - let vector = super::hw_interrupts::TIMER_VECTOR; + let vector = super::super::x86_64::hw_interrupts::TIMER_VECTOR; let stop = self.timer_stop.clone(); let period = std::time::Duration::from_micros(period_us as u64); self.timer_thread = Some(std::thread::spawn(move || { @@ -711,7 +715,9 @@ impl MshvVm { return true; } let timer_active = self.timer_thread.is_some(); - super::hw_interrupts::handle_common_io_out(port, data, timer_active, || self.do_lapic_eoi()) + super::super::x86_64::hw_interrupts::handle_common_io_out(port, data, timer_active, || { + self.do_lapic_eoi() + }) } } @@ -734,10 +740,10 @@ mod hw_interrupt_tests { fn lapic_regs_conversion_roundtrip() { let mut regs = [0i8; 1024]; let bytes = lapic_regs_as_u8_mut(&mut regs); - super::super::hw_interrupts::write_lapic_u32(bytes, 0xF0, 0xDEAD_BEEF); + super::super::super::x86_64::hw_interrupts::write_lapic_u32(bytes, 0xF0, 0xDEAD_BEEF); let bytes = lapic_regs_as_u8(®s); assert_eq!( - super::super::hw_interrupts::read_lapic_u32(bytes, 0xF0), + super::super::super::x86_64::hw_interrupts::read_lapic_u32(bytes, 0xF0), 0xDEAD_BEEF ); } diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 7516f1d2e..41799e133 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -20,7 +20,7 @@ use std::sync::Arc; #[cfg(feature = "hw-interrupts")] use std::sync::atomic::{AtomicBool, Ordering}; -use hyperlight_common::outb::OutBAction; +use hyperlight_common::outb::VmAction; #[cfg(feature = "trace_guest")] use tracing::Span; #[cfg(feature = "trace_guest")] @@ -194,7 +194,13 @@ impl WhpVm { } }; - let vm = WhpVm { + let mgr = get_surrogate_process_manager() + .map_err(|e| CreateVmError::SurrogateProcess(e.to_string()))?; + let surrogate_process = mgr + .get_surrogate_process() + .map_err(|e| CreateVmError::SurrogateProcess(e.to_string()))?; + + Ok(WhpVm { partition, surrogate_process, file_mappings: Vec::new(), @@ -202,9 +208,7 @@ impl WhpVm { timer_stop: None, #[cfg(feature = "hw-interrupts")] timer_thread: None, - }; - - Ok(vm) + }) } /// Maximum size for the interrupt controller state blob. @@ -241,7 +245,7 @@ impl WhpVm { )); } - super::hw_interrupts::init_lapic_registers(&mut state); + super::x86_64::hw_interrupts::init_lapic_registers(&mut state); unsafe { WHvSetVirtualProcessorInterruptControllerState2( @@ -423,9 +427,9 @@ impl VirtualMachine for WhpVm { )]) .map_err(|e| RunVcpuError::IncrementRip(e.into()))?; - // OutBAction::Halt always means "I'm done", regardless + // VmAction::Halt always means "I'm done", regardless // of whether a timer is active. - if is_write && port == OutBAction::Halt as u16 { + if is_write && port == VmAction::Halt as u16 { return Ok(VmExit::Halt()); } @@ -436,7 +440,7 @@ impl VirtualMachine for WhpVm { if self.handle_hw_io_out(port, &data) { continue; } - } else if let Some(val) = super::hw_interrupts::handle_io_in(port) { + } else if let Some(val) = super::x86_64::hw_interrupts::handle_io_in(port) { self.set_registers(&[( WHvX64RegisterRax, Align16(WHV_REGISTER_VALUE { Reg64: val }), @@ -1056,7 +1060,7 @@ impl WhpVm { /// delivers through the LAPIC and the guest only acknowledges via PIC. fn do_lapic_eoi(&self) { if let Ok(mut state) = self.get_lapic_state() { - super::hw_interrupts::lapic_eoi(&mut state); + super::x86_64::hw_interrupts::lapic_eoi(&mut state); let _ = self.set_lapic_state(&state); } } @@ -1072,7 +1076,7 @@ impl WhpVm { } fn handle_hw_io_out(&mut self, port: u16, data: &[u8]) -> bool { - if port == OutBAction::PvTimerConfig as u16 { + if port == VmAction::PvTimerConfig as u16 { if data.len() >= 4 { let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); @@ -1081,7 +1085,7 @@ impl WhpVm { if period_us > 0 { let partition_raw = self.partition.0; - let vector = super::hw_interrupts::TIMER_VECTOR; + let vector = super::x86_64::hw_interrupts::TIMER_VECTOR; let stop = Arc::new(AtomicBool::new(false)); let stop_clone = stop.clone(); let period = std::time::Duration::from_micros(period_us as u64); @@ -1115,7 +1119,9 @@ impl WhpVm { return true; } let timer_active = self.timer_thread.is_some(); - super::hw_interrupts::handle_common_io_out(port, data, timer_active, || self.do_lapic_eoi()) + super::x86_64::hw_interrupts::handle_common_io_out(port, data, timer_active, || { + self.do_lapic_eoi() + }) } } @@ -1186,7 +1192,7 @@ mod hw_interrupt_tests { #[test] fn lapic_register_helpers_delegate() { - use crate::hypervisor::virtual_machine::hw_interrupts; + use crate::hypervisor::virtual_machine::x86_64::hw_interrupts; let mut state = vec![0u8; 1024]; hw_interrupts::write_lapic_u32(&mut state, 0xF0, 0x1FF); assert_eq!(hw_interrupts::read_lapic_u32(&state, 0xF0), 0x1FF); diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/hw_interrupts.rs similarity index 98% rename from src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs rename to src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/hw_interrupts.rs index 2d339af5f..57ce3a2f9 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/hw_interrupts.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/hw_interrupts.rs @@ -14,9 +14,9 @@ See the License for the specific language governing permissions and limitations under the License. */ -//! Shared helpers for the `hw-interrupts` feature, used by MSHV and WHP -//! backends. KVM uses an in-kernel IRQ chip so it handles most of -//! this transparently. +//! Shared x86-64 helpers for the `hw-interrupts` feature, used by +//! MSHV and WHP backends. KVM uses an in-kernel IRQ chip so it +//! handles most of this transparently. /// Hardcoded timer interrupt vector. The guest remaps IRQ0 to /// vector 0x20 via PIC ICW2, so we use that same vector directly diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/mod.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/mod.rs new file mode 100644 index 000000000..5b0ae0db9 --- /dev/null +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/mod.rs @@ -0,0 +1,18 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! x86-64 specific helpers for hardware interrupt support. +pub(crate) mod hw_interrupts; diff --git a/src/hyperlight_host/src/sandbox/outb.rs b/src/hyperlight_host/src/sandbox/outb.rs index 5d5bd91d8..76ec53cee 100644 --- a/src/hyperlight_host/src/sandbox/outb.rs +++ b/src/hyperlight_host/src/sandbox/outb.rs @@ -233,11 +233,6 @@ pub(crate) fn handle_outb( OutBAction::TraceMemoryAlloc => trace_info.handle_trace_mem_alloc(regs, mem_mgr), #[cfg(feature = "mem_profile")] OutBAction::TraceMemoryFree => trace_info.handle_trace_mem_free(regs, mem_mgr), - // PvTimerConfig and Halt are handled at the hypervisor level - // (in run_vcpu) and should never reach this handler. - OutBAction::PvTimerConfig | OutBAction::Halt => Err(HandleOutbError::InvalidPort(format!( - "port {port:#x} should be handled at hypervisor level" - ))), } } #[cfg(test)] diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 879e92f14..1f823dc49 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -1849,12 +1849,12 @@ fn memory_region_types_are_publicly_accessible() { } } -/// Test that hardware timer interrupts are delivered to the guest via OutBAction::PvTimerConfig port. +/// Test that hardware timer interrupts are delivered to the guest via VmAction::PvTimerConfig port. /// /// The guest function `TestTimerInterrupts`: /// 1. Initializes the PIC (IRQ0 -> vector 0x20) /// 2. Installs an IDT entry for vector 0x20 -/// 3. Arms the timer via OutBAction::PvTimerConfig port +/// 3. Arms the timer via VmAction::PvTimerConfig port /// 4. Enables interrupts and busy-waits /// 5. Returns the number of timer interrupts received /// diff --git a/src/tests/rust_guests/dummyguest/src/main.rs b/src/tests/rust_guests/dummyguest/src/main.rs index 2d9db910e..924bb686c 100644 --- a/src/tests/rust_guests/dummyguest/src/main.rs +++ b/src/tests/rust_guests/dummyguest/src/main.rs @@ -30,7 +30,7 @@ fn panic(_info: &PanicInfo) -> ! { } fn halt() { - // OutBAction::Halt = 108; using raw constant to avoid pulling in + // VmAction::Halt = 108; using raw constant to avoid pulling in // anyhow (via hyperlight_common's TryFrom impl) which requires alloc. unsafe { asm!( diff --git a/src/tests/rust_guests/simpleguest/src/main.rs b/src/tests/rust_guests/simpleguest/src/main.rs index 1cb21c252..ac876e65f 100644 --- a/src/tests/rust_guests/simpleguest/src/main.rs +++ b/src/tests/rust_guests/simpleguest/src/main.rs @@ -542,12 +542,12 @@ struct IdtPtr { /// 1. Initializes the PIC (remaps IRQ0 to vector 0x20) /// 2. Installs an IDT entry for vector 0x20 pointing to `_timer_irq_handler` /// 3. Programs PIT channel 0 as a rate generator at the requested period -/// 4. Arms the PV timer by writing the period to OutBAction::PvTimerConfig port (for MSHV/WHP) +/// 4. Arms the PV timer by writing the period to VmAction::PvTimerConfig port (for MSHV/WHP) /// 5. Enables interrupts (STI) and busy-waits for timer delivery /// 6. Disables interrupts (CLI) and returns the interrupt count /// /// Parameters: -/// - `period_us`: timer period in microseconds (written to OutBAction::PvTimerConfig port) +/// - `period_us`: timer period in microseconds (written to VmAction::PvTimerConfig port) /// - `max_spin`: maximum busy-wait iterations before giving up /// /// Returns the number of timer interrupts received. @@ -613,10 +613,13 @@ fn test_timer_interrupts(period_us: i32, max_spin: i32) -> i32 { core::ptr::write_volatile(entry_ptr.add(12) as *mut u32, 0); } + // Ensure the IDT writes are visible before enabling interrupts. + core::sync::atomic::fence(core::sync::atomic::Ordering::SeqCst); + // 3) Program PIT channel 0 as rate generator (mode 2). // Divisor = period_us * 1_193_182 / 1_000_000 (PIT oscillator is 1.193182 MHz). // On KVM the in-kernel PIT handles these IO writes directly. - // On MSHV/WHP these ports are silently absorbed (timer is set via OutBAction::PvTimerConfig). + // On MSHV/WHP these ports are silently absorbed (timer is set via VmAction::PvTimerConfig). if period_us <= 0 { return -1; // invalid period } @@ -630,11 +633,11 @@ fn test_timer_interrupts(period_us: i32, max_spin: i32) -> i32 { core::arch::asm!("out 0x40, al", in("al") (divisor >> 8) as u8, options(nomem, nostack)); } - // 4) Arm timer: write period_us to OutBAction::PvTimerConfig port + // 4) Arm timer: write period_us to VmAction::PvTimerConfig port unsafe { core::arch::asm!( "out dx, eax", - in("dx") hyperlight_common::outb::OutBAction::PvTimerConfig as u16, + in("dx") hyperlight_common::outb::VmAction::PvTimerConfig as u16, in("eax") period_us as u32, options(nomem, nostack, preserves_flags) ); @@ -989,7 +992,7 @@ fn corrupt_output_size_prefix() -> i32 { "out dx, eax", "cli", "hlt", - in("dx") hyperlight_common::outb::OutBAction::Halt as u16, + in("dx") hyperlight_common::outb::VmAction::Halt as u16, in("eax") 0u32, options(noreturn), ); @@ -1012,7 +1015,7 @@ fn corrupt_output_back_pointer() -> i32 { "out dx, eax", "cli", "hlt", - in("dx") hyperlight_common::outb::OutBAction::Halt as u16, + in("dx") hyperlight_common::outb::VmAction::Halt as u16, in("eax") 0u32, options(noreturn), ); From d9d6eda207b93b40e7892fe6b2bdfd5a41e77479 Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 18 Mar 2026 00:42:45 +0000 Subject: [PATCH 22/37] fix: address PR 1272 review feedback (round 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add trace-level log for unexpected VcpuExit::Hlt in KVM hw-interrupts loop (jsturtevant thread 1) - Fix MSHV EINTR handling: always return Cancelled so InterruptHandle::kill() is honoured even when the timer thread is active (jsturtevant thread 3) - Add min/max bounds (100µs–10s) on guest-provided timer period across all backends to prevent runaway injection or excessive sleep (jsturtevant thread 4) - Stop timer thread in WHP Halt handler before returning, matching KVM/MSHV behaviour (jsturtevant thread 6) - Use MSHV_PT_BIT_LAPIC named constant instead of magic 1u64 for pt_flags (ludfjig nit) Signed-off-by: danbugs --- .../hypervisor/virtual_machine/kvm/x86_64.rs | 6 +++++ .../hypervisor/virtual_machine/mshv/x86_64.rs | 23 ++++++++++--------- .../src/hypervisor/virtual_machine/whp.rs | 9 +++++++- .../virtual_machine/x86_64/hw_interrupts.rs | 8 +++++++ 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index 1dd80d018..9db46296f 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -211,6 +211,7 @@ impl KvmVm { Ok(VcpuExit::Hlt) => { // The in-kernel LAPIC normally handles HLT internally. // If we somehow get here, just re-enter the guest. + tracing::trace!("KVM hw-interrupts: unexpected VcpuExit::Hlt, re-entering guest"); continue; } Ok(VcpuExit::IoOut(port, data)) => { @@ -263,6 +264,10 @@ impl KvmVm { #[cfg(feature = "hw-interrupts")] fn handle_pv_timer_config(&mut self, data: &[u8; 4]) -> std::result::Result<(), RunVcpuError> { + use crate::hypervisor::virtual_machine::x86_64::hw_interrupts::{ + MAX_TIMER_PERIOD_US, MIN_TIMER_PERIOD_US, + }; + let period_us = u32::from_le_bytes(*data) as u64; if period_us == 0 { // Stop existing timer if any. @@ -271,6 +276,7 @@ impl KvmVm { let _ = thread.join(); } } else if self.timer_thread.is_none() { + let period_us = period_us.clamp(MIN_TIMER_PERIOD_US, MAX_TIMER_PERIOD_US); // Reset the stop flag — a previous halt (e.g. the // init halt during evolve()) may have set it. self.timer_stop.store(false, Ordering::Relaxed); diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs index 96e82be9d..38e856305 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs @@ -123,10 +123,11 @@ impl MshvVm { #[allow(unused_mut)] let mut pr: mshv_create_partition_v2 = Default::default(); // Enable LAPIC for hw-interrupts — required for interrupt delivery - // via request_virtual_interrupt. MSHV_PT_BIT_LAPIC = bit 0. + // via request_virtual_interrupt. #[cfg(feature = "hw-interrupts")] { - pr.pt_flags = 1u64; // LAPIC + use mshv_bindings::MSHV_PT_BIT_LAPIC; + pr.pt_flags = 1u64 << MSHV_PT_BIT_LAPIC; } // It's important to use create_vm_with_args() (not create_vm()), // because create_vm() sets up a SynIC partition by default. @@ -352,15 +353,9 @@ impl VirtualMachine for MshvVm { } } Err(e) => match e.errno() { + // InterruptHandle::kill() sends a signal to interrupt the vcpu, + // which causes EINTR. Always honour it as cancellation. libc::EINTR => { - // When the timer thread is active, EINTR may be - // a spurious signal. Continue the run loop to - // let the hypervisor deliver any pending timer - // interrupt. - #[cfg(feature = "hw-interrupts")] - if self.timer_thread.is_some() { - continue; - } return Ok(VmExit::Cancelled()); } libc::EAGAIN => { @@ -639,6 +634,10 @@ impl MshvVm { fn handle_hw_io_out(&mut self, port: u16, data: &[u8]) -> bool { if port == VmAction::PvTimerConfig as u16 { if data.len() >= 4 { + use super::super::x86_64::hw_interrupts::{ + MAX_TIMER_PERIOD_US, MIN_TIMER_PERIOD_US, + }; + let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); if period_us == 0 { // Stop existing timer if any. @@ -693,7 +692,9 @@ impl MshvVm { let vm_fd = self.vm_fd.clone(); let vector = super::super::x86_64::hw_interrupts::TIMER_VECTOR; let stop = self.timer_stop.clone(); - let period = std::time::Duration::from_micros(period_us as u64); + let period_us = + (period_us as u64).clamp(MIN_TIMER_PERIOD_US, MAX_TIMER_PERIOD_US); + let period = std::time::Duration::from_micros(period_us); self.timer_thread = Some(std::thread::spawn(move || { while !stop.load(Ordering::Relaxed) { std::thread::sleep(period); diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 41799e133..920abb959 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -430,6 +430,9 @@ impl VirtualMachine for WhpVm { // VmAction::Halt always means "I'm done", regardless // of whether a timer is active. if is_write && port == VmAction::Halt as u16 { + // Stop the timer thread before returning. + #[cfg(feature = "hw-interrupts")] + self.stop_timer_thread(); return Ok(VmExit::Halt()); } @@ -1078,17 +1081,21 @@ impl WhpVm { fn handle_hw_io_out(&mut self, port: u16, data: &[u8]) -> bool { if port == VmAction::PvTimerConfig as u16 { if data.len() >= 4 { + use super::x86_64::hw_interrupts::{MAX_TIMER_PERIOD_US, MIN_TIMER_PERIOD_US}; + let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); // Stop any existing timer thread before (re-)configuring. self.stop_timer_thread(); if period_us > 0 { + let period_us = + (period_us as u64).clamp(MIN_TIMER_PERIOD_US, MAX_TIMER_PERIOD_US); let partition_raw = self.partition.0; let vector = super::x86_64::hw_interrupts::TIMER_VECTOR; let stop = Arc::new(AtomicBool::new(false)); let stop_clone = stop.clone(); - let period = std::time::Duration::from_micros(period_us as u64); + let period = std::time::Duration::from_micros(period_us); let handle = std::thread::spawn(move || { while !stop_clone.load(Ordering::Relaxed) { diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/hw_interrupts.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/hw_interrupts.rs index 57ce3a2f9..21dde4826 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/hw_interrupts.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/hw_interrupts.rs @@ -23,6 +23,14 @@ limitations under the License. /// — no PIC state machine needed. pub(crate) const TIMER_VECTOR: u32 = 0x20; +/// Minimum allowed timer period in microseconds (100 µs). +/// Prevents runaway interrupt injection from a malicious or buggy guest. +pub(crate) const MIN_TIMER_PERIOD_US: u64 = 100; + +/// Maximum allowed timer period in microseconds (10 seconds). +/// Prevents unreasonably long sleep durations in the timer thread. +pub(crate) const MAX_TIMER_PERIOD_US: u64 = 10_000_000; + /// Handle an IO IN request for hardware-interrupt related ports. /// Returns `Some(value)` if the port was handled, `None` if the /// port should be passed through to the guest handler. From 32f6ec06f80a83dff25b6c503f5ce1c37c3101df Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 18 Mar 2026 06:28:03 +0000 Subject: [PATCH 23/37] refactor: extract TimerThread struct into shared hw_interrupts module Replace per-backend Arc + Option + manual Drop with a shared TimerThread struct in x86_64/hw_interrupts.rs. Each backend now passes a closure for the inject call (eventfd.write on KVM, request_virtual_interrupt on MSHV, WHvRequestInterrupt on WHP). This removes ~60 lines of duplicated timer management logic and ensures consistent start/stop/drop semantics across all backends. Signed-off-by: danbugs --- .../hypervisor/virtual_machine/kvm/x86_64.rs | 53 +++--------- .../hypervisor/virtual_machine/mshv/x86_64.rs | 69 +++++---------- .../src/hypervisor/virtual_machine/whp.rs | 84 +++++++------------ .../virtual_machine/x86_64/hw_interrupts.rs | 60 +++++++++++++ 4 files changed, 124 insertions(+), 142 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index 9db46296f..57c297401 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -14,11 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#[cfg(feature = "hw-interrupts")] -use std::sync::Arc; use std::sync::LazyLock; -#[cfg(feature = "hw-interrupts")] -use std::sync::atomic::{AtomicBool, Ordering}; use hyperlight_common::outb::VmAction; #[cfg(gdb)] @@ -42,6 +38,8 @@ use crate::hypervisor::regs::{ }; #[cfg(feature = "hw-interrupts")] use crate::hypervisor::virtual_machine::HypervisorError; +#[cfg(feature = "hw-interrupts")] +use crate::hypervisor::virtual_machine::x86_64::hw_interrupts::TimerThread; #[cfg(all(test, not(feature = "nanvix-unstable")))] use crate::hypervisor::virtual_machine::XSAVE_BUFFER_SIZE; use crate::hypervisor::virtual_machine::{ @@ -102,12 +100,9 @@ pub(crate) struct KvmVm { /// writes to this to inject periodic timer interrupts. #[cfg(feature = "hw-interrupts")] timer_irq_eventfd: EventFd, - /// Signals the timer thread to stop. + /// Handle to the background timer (if started). #[cfg(feature = "hw-interrupts")] - timer_stop: Arc, - /// Handle to the background timer thread (if started). - #[cfg(feature = "hw-interrupts")] - timer_thread: Option>, + timer: Option, // KVM, as opposed to mshv/whp, has no get_guest_debug() ioctl, so we must track the state ourselves #[cfg(gdb)] @@ -188,9 +183,7 @@ impl KvmVm { #[cfg(feature = "hw-interrupts")] timer_irq_eventfd, #[cfg(feature = "hw-interrupts")] - timer_stop: Arc::new(AtomicBool::new(false)), - #[cfg(feature = "hw-interrupts")] - timer_thread: None, + timer: None, #[cfg(gdb)] debug_regs: kvm_guest_debug::default(), }) @@ -217,9 +210,8 @@ impl KvmVm { Ok(VcpuExit::IoOut(port, data)) => { if port == VmAction::Halt as u16 { // Stop the timer thread before returning. - self.timer_stop.store(true, Ordering::Relaxed); - if let Some(h) = self.timer_thread.take() { - let _ = h.join(); + if let Some(mut t) = self.timer.take() { + t.stop(); } return Ok(VmExit::Halt()); } @@ -271,29 +263,18 @@ impl KvmVm { let period_us = u32::from_le_bytes(*data) as u64; if period_us == 0 { // Stop existing timer if any. - if let Some(thread) = self.timer_thread.take() { - self.timer_stop.store(true, Ordering::Relaxed); - let _ = thread.join(); + if let Some(mut t) = self.timer.take() { + t.stop(); } - } else if self.timer_thread.is_none() { + } else if self.timer.is_none() { let period_us = period_us.clamp(MIN_TIMER_PERIOD_US, MAX_TIMER_PERIOD_US); - // Reset the stop flag — a previous halt (e.g. the - // init halt during evolve()) may have set it. - self.timer_stop.store(false, Ordering::Relaxed); let eventfd = self .timer_irq_eventfd .try_clone() .map_err(|e| RunVcpuError::Unknown(HypervisorError::KvmError(e.into())))?; - let stop = self.timer_stop.clone(); let period = std::time::Duration::from_micros(period_us); - self.timer_thread = Some(std::thread::spawn(move || { - while !stop.load(Ordering::Relaxed) { - std::thread::sleep(period); - if stop.load(Ordering::Relaxed) { - break; - } - let _ = eventfd.write(1); - } + self.timer = Some(TimerThread::start(period, move || { + let _ = eventfd.write(1); })); } Ok(()) @@ -603,16 +584,6 @@ impl DebuggableVm for KvmVm { } } -#[cfg(feature = "hw-interrupts")] -impl Drop for KvmVm { - fn drop(&mut self) { - self.timer_stop.store(true, Ordering::Relaxed); - if let Some(h) = self.timer_thread.take() { - let _ = h.join(); - } - } -} - #[cfg(test)] #[cfg(feature = "hw-interrupts")] mod hw_interrupt_tests { diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs index 38e856305..eef8f2df9 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs @@ -19,8 +19,6 @@ use std::fmt::Debug; #[cfg(feature = "hw-interrupts")] use std::sync::Arc; use std::sync::LazyLock; -#[cfg(feature = "hw-interrupts")] -use std::sync::atomic::{AtomicBool, Ordering}; use hyperlight_common::outb::VmAction; #[cfg(feature = "hw-interrupts")] @@ -60,6 +58,8 @@ use crate::hypervisor::virtual_machine::{ VmExit, XSAVE_MIN_SIZE, }; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; +#[cfg(feature = "hw-interrupts")] +use crate::hypervisor::virtual_machine::x86_64::hw_interrupts::TimerThread; #[cfg(feature = "trace_guest")] use crate::sandbox::trace::TraceContext as SandboxTraceContext; @@ -86,12 +86,9 @@ pub(crate) struct MshvVm { #[cfg(not(feature = "hw-interrupts"))] vm_fd: VmFd, vcpu_fd: VcpuFd, - /// Signals the timer thread to stop. - #[cfg(feature = "hw-interrupts")] - timer_stop: Arc, - /// Handle to the background timer thread (if started). + /// Handle to the background timer (if started). #[cfg(feature = "hw-interrupts")] - timer_thread: Option>, + timer: Option, } static MSHV: LazyLock> = @@ -179,9 +176,7 @@ impl MshvVm { vm_fd, vcpu_fd, #[cfg(feature = "hw-interrupts")] - timer_stop: Arc::new(AtomicBool::new(false)), - #[cfg(feature = "hw-interrupts")] - timer_thread: None, + timer: None, }) } } @@ -238,7 +233,7 @@ impl VirtualMachine for MshvVm { // interrupts on the next run(), waking the // vCPU from HLT. #[cfg(feature = "hw-interrupts")] - if self.timer_thread.is_some() { + if self.timer.as_ref().is_some_and(|t| t.is_active()) { continue; } return Ok(VmExit::Halt()); @@ -270,9 +265,8 @@ impl VirtualMachine for MshvVm { // Stop the timer thread before returning. #[cfg(feature = "hw-interrupts")] { - self.timer_stop.store(true, Ordering::Relaxed); - if let Some(h) = self.timer_thread.take() { - let _ = h.join(); + if let Some(mut t) = self.timer.take() { + t.stop(); } } return Ok(VmExit::Halt()); @@ -641,14 +635,10 @@ impl MshvVm { let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); if period_us == 0 { // Stop existing timer if any. - if let Some(thread) = self.timer_thread.take() { - self.timer_stop.store(true, Ordering::Relaxed); - let _ = thread.join(); + if let Some(mut t) = self.timer.take() { + t.stop(); } - } else if self.timer_thread.is_none() { - // Reset the stop flag — a previous halt (e.g. the - // init halt during evolve()) may have set it. - self.timer_stop.store(false, Ordering::Relaxed); + } else if self.timer.is_none() { // Re-enable LAPIC if the guest disabled it (via WRMSR // to MSR 0x1B clearing bit 11). Some guests clear // the global APIC enable when no I/O APIC is detected. @@ -691,47 +681,30 @@ impl MshvVm { let vm_fd = self.vm_fd.clone(); let vector = super::super::x86_64::hw_interrupts::TIMER_VECTOR; - let stop = self.timer_stop.clone(); let period_us = (period_us as u64).clamp(MIN_TIMER_PERIOD_US, MAX_TIMER_PERIOD_US); let period = std::time::Duration::from_micros(period_us); - self.timer_thread = Some(std::thread::spawn(move || { - while !stop.load(Ordering::Relaxed) { - std::thread::sleep(period); - if stop.load(Ordering::Relaxed) { - break; - } - let _ = vm_fd.request_virtual_interrupt(&InterruptRequest { - interrupt_type: hv_interrupt_type_HV_X64_INTERRUPT_TYPE_FIXED, - apic_id: 0, - vector, - level_triggered: false, - logical_destination_mode: false, - long_mode: false, - }); - } + self.timer = Some(TimerThread::start(period, move || { + let _ = vm_fd.request_virtual_interrupt(&InterruptRequest { + interrupt_type: hv_interrupt_type_HV_X64_INTERRUPT_TYPE_FIXED, + apic_id: 0, + vector, + level_triggered: false, + logical_destination_mode: false, + long_mode: false, + }); })); } } return true; } - let timer_active = self.timer_thread.is_some(); + let timer_active = self.timer.as_ref().is_some_and(|t| t.is_active()); super::super::x86_64::hw_interrupts::handle_common_io_out(port, data, timer_active, || { self.do_lapic_eoi() }) } } -#[cfg(feature = "hw-interrupts")] -impl Drop for MshvVm { - fn drop(&mut self) { - self.timer_stop.store(true, Ordering::Relaxed); - if let Some(h) = self.timer_thread.take() { - let _ = h.join(); - } - } -} - #[cfg(test)] #[cfg(feature = "hw-interrupts")] mod hw_interrupt_tests { diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 920abb959..744fed65e 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -15,10 +15,6 @@ limitations under the License. */ use std::os::raw::c_void; -#[cfg(feature = "hw-interrupts")] -use std::sync::Arc; -#[cfg(feature = "hw-interrupts")] -use std::sync::atomic::{AtomicBool, Ordering}; use hyperlight_common::outb::VmAction; #[cfg(feature = "trace_guest")] @@ -48,6 +44,8 @@ use crate::hypervisor::virtual_machine::{ }; use crate::hypervisor::wrappers::HandleWrapper; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags, MemoryRegionType}; +#[cfg(feature = "hw-interrupts")] +use crate::hypervisor::virtual_machine::x86_64::hw_interrupts::TimerThread; #[cfg(feature = "trace_guest")] use crate::sandbox::trace::TraceContext as SandboxTraceContext; @@ -98,12 +96,9 @@ pub(crate) struct WhpVm { /// Tracks host-side file mappings (view_base, mapping_handle) for /// cleanup on unmap or drop. Only populated for MappedFile regions. file_mappings: Vec<(HandleWrapper, *mut c_void)>, - /// Signal to stop the software timer thread. + /// Handle to the background timer (if started). #[cfg(feature = "hw-interrupts")] - timer_stop: Option>, - /// Handle for the software timer thread. - #[cfg(feature = "hw-interrupts")] - timer_thread: Option>, + timer: Option, } // Safety: `WhpVm` is !Send because it holds `SurrogateProcess` which contains a raw pointer @@ -205,9 +200,7 @@ impl WhpVm { surrogate_process, file_mappings: Vec::new(), #[cfg(feature = "hw-interrupts")] - timer_stop: None, - #[cfg(feature = "hw-interrupts")] - timer_thread: None, + timer: None, }) } @@ -432,7 +425,9 @@ impl VirtualMachine for WhpVm { if is_write && port == VmAction::Halt as u16 { // Stop the timer thread before returning. #[cfg(feature = "hw-interrupts")] - self.stop_timer_thread(); + if let Some(mut t) = self.timer.take() { + t.stop(); + } return Ok(VmExit::Halt()); } @@ -464,7 +459,7 @@ impl VirtualMachine for WhpVm { // thread injects an interrupt via WHvRequestInterrupt, // waking the vCPU from HLT. #[cfg(feature = "hw-interrupts")] - if self.timer_thread.is_some() { + if self.timer.as_ref().is_some_and(|t| t.is_active()) { continue; } return Ok(VmExit::Halt()); @@ -1068,16 +1063,6 @@ impl WhpVm { } } - /// Stop the software timer thread if running. - fn stop_timer_thread(&mut self) { - if let Some(stop) = self.timer_stop.take() { - stop.store(true, Ordering::Relaxed); - } - if let Some(handle) = self.timer_thread.take() { - let _ = handle.join(); - } - } - fn handle_hw_io_out(&mut self, port: u16, data: &[u8]) -> bool { if port == VmAction::PvTimerConfig as u16 { if data.len() >= 4 { @@ -1086,46 +1071,37 @@ impl WhpVm { let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); // Stop any existing timer thread before (re-)configuring. - self.stop_timer_thread(); + if let Some(mut t) = self.timer.take() { + t.stop(); + } if period_us > 0 { let period_us = (period_us as u64).clamp(MIN_TIMER_PERIOD_US, MAX_TIMER_PERIOD_US); let partition_raw = self.partition.0; let vector = super::x86_64::hw_interrupts::TIMER_VECTOR; - let stop = Arc::new(AtomicBool::new(false)); - let stop_clone = stop.clone(); let period = std::time::Duration::from_micros(period_us); - let handle = std::thread::spawn(move || { - while !stop_clone.load(Ordering::Relaxed) { - std::thread::sleep(period); - if stop_clone.load(Ordering::Relaxed) { - break; - } - let partition = WHV_PARTITION_HANDLE(partition_raw); - let interrupt = WHV_INTERRUPT_CONTROL { - _bitfield: 0, // Type=Fixed, DestMode=Physical, Trigger=Edge - Destination: 0, - Vector: vector, - }; - let _ = unsafe { - WHvRequestInterrupt( - partition, - &interrupt, - std::mem::size_of::() as u32, - ) - }; - } - }); - - self.timer_stop = Some(stop); - self.timer_thread = Some(handle); + self.timer = Some(TimerThread::start(period, move || { + let partition = WHV_PARTITION_HANDLE(partition_raw); + let interrupt = WHV_INTERRUPT_CONTROL { + _bitfield: 0, // Type=Fixed, DestMode=Physical, Trigger=Edge + Destination: 0, + Vector: vector, + }; + let _ = unsafe { + WHvRequestInterrupt( + partition, + &interrupt, + std::mem::size_of::() as u32, + ) + }; + })); } } return true; } - let timer_active = self.timer_thread.is_some(); + let timer_active = self.timer.as_ref().is_some_and(|t| t.is_active()); super::x86_64::hw_interrupts::handle_common_io_out(port, data, timer_active, || { self.do_lapic_eoi() }) @@ -1141,7 +1117,9 @@ impl Drop for WhpVm { // Stop the software timer thread before tearing down the partition. #[cfg(feature = "hw-interrupts")] - self.stop_timer_thread(); + if let Some(mut t) = self.timer.take() { + t.stop(); + } // HyperlightVm::drop() calls set_dropped() before this runs. // set_dropped() ensures no WHvCancelRunVirtualProcessor calls are in progress diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/hw_interrupts.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/hw_interrupts.rs index 21dde4826..4f2ccd18b 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/hw_interrupts.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/hw_interrupts.rs @@ -154,6 +154,66 @@ pub(crate) fn lapic_eoi(state: &mut [u8]) { } } +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::thread::JoinHandle; +use std::time::Duration; + +/// Shared timer thread that periodically calls an inject function. +/// +/// Each backend passes a closure for interrupt injection: +/// - KVM: `eventfd.write(1)` +/// - MSHV: `vm_fd.request_virtual_interrupt(...)` +/// - WHP: `WHvRequestInterrupt(...)` +/// +/// The `Drop` impl stops the thread automatically. +#[derive(Debug)] +pub(crate) struct TimerThread { + stop: Arc, + handle: Option>, +} + +impl TimerThread { + /// Start a timer thread that calls `inject_fn` every `period`. + /// The period is clamped to [`MIN_TIMER_PERIOD_US`, `MAX_TIMER_PERIOD_US`]. + pub(crate) fn start(period: Duration, inject_fn: impl Fn() + Send + 'static) -> Self { + let stop = Arc::new(AtomicBool::new(false)); + let stop_clone = stop.clone(); + let handle = std::thread::spawn(move || { + while !stop_clone.load(Ordering::Relaxed) { + std::thread::sleep(period); + if stop_clone.load(Ordering::Relaxed) { + break; + } + inject_fn(); + } + }); + Self { + stop, + handle: Some(handle), + } + } + + /// Stop the timer thread and join it. + pub(crate) fn stop(&mut self) { + self.stop.store(true, Ordering::Relaxed); + if let Some(h) = self.handle.take() { + let _ = h.join(); + } + } + + /// Returns `true` if the timer thread is running. + pub(crate) fn is_active(&self) -> bool { + self.handle.is_some() + } +} + +impl Drop for TimerThread { + fn drop(&mut self) { + self.stop(); + } +} + #[cfg(test)] mod tests { use super::*; From 38b66651b5900646c78b90e039f7bc89c659dde7 Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 18 Mar 2026 06:29:39 +0000 Subject: [PATCH 24/37] refactor: deduplicate WhpVm::new(), use #[cfg] for test exclusion - WhpVm::new(): partition creation sequence now appears once with #[cfg(feature = "hw-interrupts")] around just the LAPIC capability check and LAPIC-specific setup, eliminating ~20 lines of duplication. - Replace #[cfg_attr(feature = "hw-interrupts", ignore)] with #[cfg(not(feature = "hw-interrupts"))] on tests that don't work with hw-interrupts, so they are excluded at compile time rather than at runtime. Signed-off-by: danbugs --- .../src/hypervisor/hyperlight_vm/x86_64.rs | 12 ++--- src/hyperlight_host/src/hypervisor/mod.rs | 2 +- .../src/hypervisor/virtual_machine/whp.rs | 54 ++++++++----------- 3 files changed, 28 insertions(+), 40 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs index e8f21e119..152478e94 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs @@ -1550,7 +1550,7 @@ mod tests { // Tests // ========================================================================== - #[cfg_attr(feature = "hw-interrupts", ignore)] + #[cfg(not(feature = "hw-interrupts"))] #[test] fn reset_vcpu_simple() { // push rax; hlt - aligns stack to 16 bytes @@ -1696,7 +1696,7 @@ mod tests { use super::*; - #[cfg_attr(feature = "hw-interrupts", ignore)] + #[cfg(not(feature = "hw-interrupts"))] #[test] fn reset_vcpu_regs() { let mut a = CodeAssembler::new(64).unwrap(); @@ -1756,7 +1756,7 @@ mod tests { assert_regs_reset(hyperlight_vm.vm.as_ref()); } - #[cfg_attr(feature = "hw-interrupts", ignore)] + #[cfg(not(feature = "hw-interrupts"))] #[test] fn reset_vcpu_fpu() { #[cfg(kvm)] @@ -1888,7 +1888,7 @@ mod tests { } } - #[cfg_attr(feature = "hw-interrupts", ignore)] + #[cfg(not(feature = "hw-interrupts"))] #[test] fn reset_vcpu_debug_regs() { let mut a = CodeAssembler::new(64).unwrap(); @@ -1931,7 +1931,7 @@ mod tests { assert_debug_regs_reset(hyperlight_vm.vm.as_ref()); } - #[cfg_attr(feature = "hw-interrupts", ignore)] + #[cfg(not(feature = "hw-interrupts"))] #[test] fn reset_vcpu_sregs() { // Build code that modifies special registers and halts @@ -1985,7 +1985,7 @@ mod tests { /// Verifies guest-visible FPU state (via FXSAVE) is properly reset. /// Unlike tests using hypervisor API, this runs actual guest code with FXSAVE. - #[cfg_attr(feature = "hw-interrupts", ignore)] + #[cfg(not(feature = "hw-interrupts"))] #[test] fn reset_vcpu_fpu_guest_visible_state() { let mut ctx = hyperlight_vm_with_mem_mgr_fxsave(); diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 4a508100c..71fbac57e 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -469,7 +469,7 @@ pub(crate) mod tests { use crate::sandbox::{SandboxConfiguration, UninitializedSandbox}; use crate::{Result, is_hypervisor_present, new_error}; - #[cfg_attr(feature = "hw-interrupts", ignore)] + #[cfg(not(feature = "hw-interrupts"))] #[test] fn test_initialise() -> Result<()> { if !is_hypervisor_present() { diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 744fed65e..b3472043a 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -141,16 +141,19 @@ impl WhpVm { .into(), )); } + } - let p = WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; - WHvSetPartitionProperty( - p, - WHvPartitionPropertyCodeProcessorCount, - &NUM_CPU as *const _ as *const _, - std::mem::size_of_val(&NUM_CPU) as _, - ) - .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + let p = WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; + WHvSetPartitionProperty( + p, + WHvPartitionPropertyCodeProcessorCount, + &NUM_CPU as *const _ as *const _, + std::mem::size_of_val(&NUM_CPU) as _, + ) + .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + #[cfg(feature = "hw-interrupts")] + { let apic_mode: u32 = 1; // WHvX64LocalApicEmulationModeXApic WHvSetPartitionProperty( p, @@ -159,34 +162,19 @@ impl WhpVm { std::mem::size_of_val(&apic_mode) as _, ) .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + } - WHvSetupPartition(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; - WHvCreateVirtualProcessor(p, 0, 0) - .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; - - // Initialize the LAPIC via the bulk interrupt-controller - // state API (individual APIC register writes via - // WHvSetVirtualProcessorRegisters fail with ACCESS_DENIED). - Self::init_lapic_bulk(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; + WHvSetupPartition(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; + WHvCreateVirtualProcessor(p, 0, 0) + .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; - p - } + // Initialize the LAPIC via the bulk interrupt-controller + // state API (individual APIC register writes via + // WHvSetVirtualProcessorRegisters fail with ACCESS_DENIED). + #[cfg(feature = "hw-interrupts")] + Self::init_lapic_bulk(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; - #[cfg(not(feature = "hw-interrupts"))] - { - let p = WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; - WHvSetPartitionProperty( - p, - WHvPartitionPropertyCodeProcessorCount, - &NUM_CPU as *const _ as *const _, - std::mem::size_of_val(&NUM_CPU) as _, - ) - .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; - WHvSetupPartition(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; - WHvCreateVirtualProcessor(p, 0, 0) - .map_err(|e| CreateVmError::CreateVcpuFd(e.into()))?; - p - } + p }; let mgr = get_surrogate_process_manager() From ad165790700506ee0b731a3f9dc4955a6fce0e3d Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 18 Mar 2026 07:06:52 +0000 Subject: [PATCH 25/37] style: fix rustfmt in KVM trace macro Signed-off-by: danbugs --- .../src/hypervisor/virtual_machine/kvm/x86_64.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index 57c297401..f490fd14c 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -204,7 +204,9 @@ impl KvmVm { Ok(VcpuExit::Hlt) => { // The in-kernel LAPIC normally handles HLT internally. // If we somehow get here, just re-enter the guest. - tracing::trace!("KVM hw-interrupts: unexpected VcpuExit::Hlt, re-entering guest"); + tracing::trace!( + "KVM hw-interrupts: unexpected VcpuExit::Hlt, re-entering guest" + ); continue; } Ok(VcpuExit::IoOut(port, data)) => { From f0bd850207c523b559e9bc32b973e970f5de1cbd Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 18 Mar 2026 15:54:59 +0000 Subject: [PATCH 26/37] style: fix rustfmt import ordering for TimerThread Signed-off-by: danbugs --- .../src/hypervisor/virtual_machine/kvm/x86_64.rs | 4 ++-- .../src/hypervisor/virtual_machine/mshv/x86_64.rs | 4 ++-- src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index f490fd14c..8b4dda28d 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -38,10 +38,10 @@ use crate::hypervisor::regs::{ }; #[cfg(feature = "hw-interrupts")] use crate::hypervisor::virtual_machine::HypervisorError; -#[cfg(feature = "hw-interrupts")] -use crate::hypervisor::virtual_machine::x86_64::hw_interrupts::TimerThread; #[cfg(all(test, not(feature = "nanvix-unstable")))] use crate::hypervisor::virtual_machine::XSAVE_BUFFER_SIZE; +#[cfg(feature = "hw-interrupts")] +use crate::hypervisor::virtual_machine::x86_64::hw_interrupts::TimerThread; use crate::hypervisor::virtual_machine::{ CreateVmError, MapMemoryError, RegisterError, RunVcpuError, UnmapMemoryError, VirtualMachine, VmExit, diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs index eef8f2df9..aff3febda 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs @@ -53,13 +53,13 @@ use crate::hypervisor::regs::{ }; #[cfg(all(test, not(feature = "nanvix-unstable")))] use crate::hypervisor::virtual_machine::XSAVE_BUFFER_SIZE; +#[cfg(feature = "hw-interrupts")] +use crate::hypervisor::virtual_machine::x86_64::hw_interrupts::TimerThread; use crate::hypervisor::virtual_machine::{ CreateVmError, MapMemoryError, RegisterError, RunVcpuError, UnmapMemoryError, VirtualMachine, VmExit, XSAVE_MIN_SIZE, }; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; -#[cfg(feature = "hw-interrupts")] -use crate::hypervisor::virtual_machine::x86_64::hw_interrupts::TimerThread; #[cfg(feature = "trace_guest")] use crate::sandbox::trace::TraceContext as SandboxTraceContext; diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index b3472043a..389ad4433 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -38,14 +38,14 @@ use crate::hypervisor::regs::{ }; use crate::hypervisor::surrogate_process::SurrogateProcess; use crate::hypervisor::surrogate_process_manager::get_surrogate_process_manager; +#[cfg(feature = "hw-interrupts")] +use crate::hypervisor::virtual_machine::x86_64::hw_interrupts::TimerThread; use crate::hypervisor::virtual_machine::{ CreateVmError, HypervisorError, MapMemoryError, RegisterError, RunVcpuError, UnmapMemoryError, VirtualMachine, VmExit, XSAVE_MIN_SIZE, }; use crate::hypervisor::wrappers::HandleWrapper; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags, MemoryRegionType}; -#[cfg(feature = "hw-interrupts")] -use crate::hypervisor::virtual_machine::x86_64::hw_interrupts::TimerThread; #[cfg(feature = "trace_guest")] use crate::sandbox::trace::TraceContext as SandboxTraceContext; From a8cc679ccb7e070fe5e1ec99bebce1d9af0d9815 Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 18 Mar 2026 16:45:52 +0000 Subject: [PATCH 27/37] fix: revert test exclusion to runtime ignore for hw-interrupts compatibility Signed-off-by: danbugs --- .../src/hypervisor/hyperlight_vm/x86_64.rs | 12 ++++++------ src/hyperlight_host/src/hypervisor/mod.rs | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs index 152478e94..e8f21e119 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs @@ -1550,7 +1550,7 @@ mod tests { // Tests // ========================================================================== - #[cfg(not(feature = "hw-interrupts"))] + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_simple() { // push rax; hlt - aligns stack to 16 bytes @@ -1696,7 +1696,7 @@ mod tests { use super::*; - #[cfg(not(feature = "hw-interrupts"))] + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_regs() { let mut a = CodeAssembler::new(64).unwrap(); @@ -1756,7 +1756,7 @@ mod tests { assert_regs_reset(hyperlight_vm.vm.as_ref()); } - #[cfg(not(feature = "hw-interrupts"))] + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_fpu() { #[cfg(kvm)] @@ -1888,7 +1888,7 @@ mod tests { } } - #[cfg(not(feature = "hw-interrupts"))] + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_debug_regs() { let mut a = CodeAssembler::new(64).unwrap(); @@ -1931,7 +1931,7 @@ mod tests { assert_debug_regs_reset(hyperlight_vm.vm.as_ref()); } - #[cfg(not(feature = "hw-interrupts"))] + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_sregs() { // Build code that modifies special registers and halts @@ -1985,7 +1985,7 @@ mod tests { /// Verifies guest-visible FPU state (via FXSAVE) is properly reset. /// Unlike tests using hypervisor API, this runs actual guest code with FXSAVE. - #[cfg(not(feature = "hw-interrupts"))] + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn reset_vcpu_fpu_guest_visible_state() { let mut ctx = hyperlight_vm_with_mem_mgr_fxsave(); diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 71fbac57e..4a508100c 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -469,7 +469,7 @@ pub(crate) mod tests { use crate::sandbox::{SandboxConfiguration, UninitializedSandbox}; use crate::{Result, is_hypervisor_present, new_error}; - #[cfg(not(feature = "hw-interrupts"))] + #[cfg_attr(feature = "hw-interrupts", ignore)] #[test] fn test_initialise() -> Result<()> { if !is_hypervisor_present() { From 1bd28692e03fe4250a430a8aac9b8e3c5d913dc0 Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 18 Mar 2026 22:57:12 +0000 Subject: [PATCH 28/37] refactor: extract shared PvTimerConfig handler and fix timer reconfiguration Extract handle_pv_timer_config() into hw_interrupts.rs as a shared function used by KVM, MSHV, and WHP backends. This eliminates duplicated timer-config parsing/clamping/start logic. Fix MSHV timer reconfiguration: previously MSHV only started a timer if self.timer.is_none(), ignoring reconfiguration requests. Now all backends consistently stop any existing timer before starting a new one when period_us > 0. Fix WHP do_lapic_eoi to log errors from set_lapic_state instead of silently discarding them. Fix MSHV request_virtual_interrupt to log errors via tracing::warn instead of silently discarding them with let _. Signed-off-by: danbugs --- .../hypervisor/virtual_machine/kvm/x86_64.rs | 41 ++---- .../hypervisor/virtual_machine/mshv/x86_64.rs | 121 ++++++++---------- .../src/hypervisor/virtual_machine/whp.rs | 59 ++++----- .../virtual_machine/x86_64/hw_interrupts.rs | 26 ++++ 4 files changed, 117 insertions(+), 130 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index 8b4dda28d..59ab67bd9 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -36,8 +36,6 @@ use crate::hypervisor::regs::{ CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, FP_CONTROL_WORD_DEFAULT, MXCSR_DEFAULT, }; -#[cfg(feature = "hw-interrupts")] -use crate::hypervisor::virtual_machine::HypervisorError; #[cfg(all(test, not(feature = "nanvix-unstable")))] use crate::hypervisor::virtual_machine::XSAVE_BUFFER_SIZE; #[cfg(feature = "hw-interrupts")] @@ -218,11 +216,7 @@ impl KvmVm { return Ok(VmExit::Halt()); } if port == VmAction::PvTimerConfig as u16 { - let data_copy: [u8; 4] = data - .get(..4) - .and_then(|s| s.try_into().ok()) - .unwrap_or([0; 4]); - self.handle_pv_timer_config(&data_copy)?; + self.handle_pv_timer_config(data); continue; } // PIT ports (0x40-0x43): no in-kernel PIT, so these @@ -257,29 +251,16 @@ impl KvmVm { } #[cfg(feature = "hw-interrupts")] - fn handle_pv_timer_config(&mut self, data: &[u8; 4]) -> std::result::Result<(), RunVcpuError> { - use crate::hypervisor::virtual_machine::x86_64::hw_interrupts::{ - MAX_TIMER_PERIOD_US, MIN_TIMER_PERIOD_US, - }; - - let period_us = u32::from_le_bytes(*data) as u64; - if period_us == 0 { - // Stop existing timer if any. - if let Some(mut t) = self.timer.take() { - t.stop(); - } - } else if self.timer.is_none() { - let period_us = period_us.clamp(MIN_TIMER_PERIOD_US, MAX_TIMER_PERIOD_US); - let eventfd = self - .timer_irq_eventfd - .try_clone() - .map_err(|e| RunVcpuError::Unknown(HypervisorError::KvmError(e.into())))?; - let period = std::time::Duration::from_micros(period_us); - self.timer = Some(TimerThread::start(period, move || { - let _ = eventfd.write(1); - })); - } - Ok(()) + fn handle_pv_timer_config(&mut self, data: &[u8]) { + use super::super::x86_64::hw_interrupts::handle_pv_timer_config; + + let eventfd_clone = self + .timer_irq_eventfd + .try_clone() + .expect("failed to clone eventfd"); + handle_pv_timer_config(&mut self.timer, data, move || { + let _ = eventfd_clone.write(1); + }); } /// Run the vCPU once without hardware interrupt support (default path). diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs index aff3febda..5f0ed3e0a 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs @@ -627,75 +627,66 @@ impl MshvVm { fn handle_hw_io_out(&mut self, port: u16, data: &[u8]) -> bool { if port == VmAction::PvTimerConfig as u16 { - if data.len() >= 4 { - use super::super::x86_64::hw_interrupts::{ - MAX_TIMER_PERIOD_US, MIN_TIMER_PERIOD_US, - }; - - let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); - if period_us == 0 { - // Stop existing timer if any. - if let Some(mut t) = self.timer.take() { - t.stop(); + // Re-enable LAPIC if the guest disabled it (via WRMSR + // to MSR 0x1B clearing bit 11). Some guests clear + // the global APIC enable when no I/O APIC is detected. + // + // The hypervisor may return 0 for APIC_BASE when the + // APIC is globally disabled, so we always restore the + // standard value (0xFEE00900). + if self.timer.is_none() { + use mshv_bindings::hv_register_name_HV_X64_REGISTER_APIC_BASE; + let mut apic_base_reg = [hv_register_assoc { + name: hv_register_name_HV_X64_REGISTER_APIC_BASE, + value: hv_register_value { reg64: 0 }, + ..Default::default() + }]; + if self.vcpu_fd.get_reg(&mut apic_base_reg).is_ok() { + let cur = unsafe { apic_base_reg[0].value.reg64 }; + if cur & (1 << 11) == 0 { + let _ = self.vcpu_fd.set_reg(&[hv_register_assoc { + name: hv_register_name_HV_X64_REGISTER_APIC_BASE, + value: hv_register_value { + reg64: Self::APIC_BASE_DEFAULT, + }, + ..Default::default() + }]); } - } else if self.timer.is_none() { - // Re-enable LAPIC if the guest disabled it (via WRMSR - // to MSR 0x1B clearing bit 11). Some guests clear - // the global APIC enable when no I/O APIC is detected. - // - // The hypervisor may return 0 for APIC_BASE when the - // APIC is globally disabled, so we always restore the - // standard value (0xFEE00900). - use mshv_bindings::hv_register_name_HV_X64_REGISTER_APIC_BASE; - let mut apic_base_reg = [hv_register_assoc { - name: hv_register_name_HV_X64_REGISTER_APIC_BASE, - value: hv_register_value { reg64: 0 }, - ..Default::default() - }]; - if self.vcpu_fd.get_reg(&mut apic_base_reg).is_ok() { - let cur = unsafe { apic_base_reg[0].value.reg64 }; - if cur & (1 << 11) == 0 { - let _ = self.vcpu_fd.set_reg(&[hv_register_assoc { - name: hv_register_name_HV_X64_REGISTER_APIC_BASE, - value: hv_register_value { - reg64: Self::APIC_BASE_DEFAULT, - }, - ..Default::default() - }]); - } - } - // Re-initialize LAPIC SVR (may have been zeroed when - // guest disabled the APIC globally) - if let Ok(mut lapic) = self.vcpu_fd.get_lapic() { - let regs = lapic_regs_as_u8(&lapic.regs); - let svr = super::super::x86_64::hw_interrupts::read_lapic_u32(regs, 0xF0); - if svr & 0x100 == 0 { - let regs_mut = lapic_regs_as_u8_mut(&mut lapic.regs); - super::super::x86_64::hw_interrupts::write_lapic_u32( - regs_mut, 0xF0, 0x1FF, - ); - super::super::x86_64::hw_interrupts::write_lapic_u32(regs_mut, 0x80, 0); // TPR - let _ = self.vcpu_fd.set_lapic(&lapic); - } + } + // Re-initialize LAPIC SVR (may have been zeroed when + // guest disabled the APIC globally) + if let Ok(mut lapic) = self.vcpu_fd.get_lapic() { + let regs = lapic_regs_as_u8(&lapic.regs); + let svr = super::super::x86_64::hw_interrupts::read_lapic_u32(regs, 0xF0); + if svr & 0x100 == 0 { + let regs_mut = lapic_regs_as_u8_mut(&mut lapic.regs); + super::super::x86_64::hw_interrupts::write_lapic_u32( + regs_mut, 0xF0, 0x1FF, + ); + super::super::x86_64::hw_interrupts::write_lapic_u32(regs_mut, 0x80, 0); // TPR + let _ = self.vcpu_fd.set_lapic(&lapic); } - - let vm_fd = self.vm_fd.clone(); - let vector = super::super::x86_64::hw_interrupts::TIMER_VECTOR; - let period_us = - (period_us as u64).clamp(MIN_TIMER_PERIOD_US, MAX_TIMER_PERIOD_US); - let period = std::time::Duration::from_micros(period_us); - self.timer = Some(TimerThread::start(period, move || { - let _ = vm_fd.request_virtual_interrupt(&InterruptRequest { - interrupt_type: hv_interrupt_type_HV_X64_INTERRUPT_TYPE_FIXED, - apic_id: 0, - vector, - level_triggered: false, - logical_destination_mode: false, - long_mode: false, - }); - })); } } + + let vm_fd = Arc::clone(&self.vm_fd); + let vector = super::super::x86_64::hw_interrupts::TIMER_VECTOR; + super::super::x86_64::hw_interrupts::handle_pv_timer_config( + &mut self.timer, + data, + move || { + if let Err(e) = vm_fd.request_virtual_interrupt(&InterruptRequest { + interrupt_type: hv_interrupt_type_HV_X64_INTERRUPT_TYPE_FIXED, + apic_id: 0, + vector, + level_triggered: false, + logical_destination_mode: false, + long_mode: false, + }) { + tracing::warn!("MSHV request_virtual_interrupt failed: {e}"); + } + }, + ); return true; } let timer_active = self.timer.as_ref().is_some_and(|t| t.is_active()); diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 389ad4433..3eb97d6ac 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -1047,46 +1047,35 @@ impl WhpVm { fn do_lapic_eoi(&self) { if let Ok(mut state) = self.get_lapic_state() { super::x86_64::hw_interrupts::lapic_eoi(&mut state); - let _ = self.set_lapic_state(&state); + if let Err(e) = self.set_lapic_state(&state) { + tracing::warn!("WHP set_lapic_state (EOI) failed: {e}"); + } } } fn handle_hw_io_out(&mut self, port: u16, data: &[u8]) -> bool { if port == VmAction::PvTimerConfig as u16 { - if data.len() >= 4 { - use super::x86_64::hw_interrupts::{MAX_TIMER_PERIOD_US, MIN_TIMER_PERIOD_US}; - - let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); - - // Stop any existing timer thread before (re-)configuring. - if let Some(mut t) = self.timer.take() { - t.stop(); - } - - if period_us > 0 { - let period_us = - (period_us as u64).clamp(MIN_TIMER_PERIOD_US, MAX_TIMER_PERIOD_US); - let partition_raw = self.partition.0; - let vector = super::x86_64::hw_interrupts::TIMER_VECTOR; - let period = std::time::Duration::from_micros(period_us); - - self.timer = Some(TimerThread::start(period, move || { - let partition = WHV_PARTITION_HANDLE(partition_raw); - let interrupt = WHV_INTERRUPT_CONTROL { - _bitfield: 0, // Type=Fixed, DestMode=Physical, Trigger=Edge - Destination: 0, - Vector: vector, - }; - let _ = unsafe { - WHvRequestInterrupt( - partition, - &interrupt, - std::mem::size_of::() as u32, - ) - }; - })); - } - } + let partition_raw = self.partition.0; + let vector = super::x86_64::hw_interrupts::TIMER_VECTOR; + super::x86_64::hw_interrupts::handle_pv_timer_config( + &mut self.timer, + data, + move || { + let partition = WHV_PARTITION_HANDLE(partition_raw); + let interrupt = WHV_INTERRUPT_CONTROL { + _bitfield: 0, // Type=Fixed, DestMode=Physical, Trigger=Edge + Destination: 0, + Vector: vector, + }; + let _ = unsafe { + WHvRequestInterrupt( + partition, + &interrupt, + std::mem::size_of::() as u32, + ) + }; + }, + ); return true; } let timer_active = self.timer.as_ref().is_some_and(|t| t.is_active()); diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/hw_interrupts.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/hw_interrupts.rs index 4f2ccd18b..45de4aafd 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/hw_interrupts.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/x86_64/hw_interrupts.rs @@ -159,6 +159,32 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::thread::JoinHandle; use std::time::Duration; +/// Handles PV timer configuration from guest IO out. +/// Parses the timer period from `data`, stops any existing timer, +/// and starts a new TimerThread if `period_us > 0`. +/// Returns `true` if the port was handled, `false` otherwise. +pub(crate) fn handle_pv_timer_config( + timer: &mut Option, + data: &[u8], + inject_fn: impl Fn() + Send + 'static, +) -> bool { + if data.len() >= 4 { + let period_us = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); + // Stop existing timer if any. + if let Some(mut t) = timer.take() { + t.stop(); + } + if period_us > 0 { + let clamped = (period_us as u64).clamp(MIN_TIMER_PERIOD_US, MAX_TIMER_PERIOD_US); + let period = Duration::from_micros(clamped); + *timer = Some(TimerThread::start(period, inject_fn)); + } + true + } else { + false + } +} + /// Shared timer thread that periodically calls an inject function. /// /// Each backend passes a closure for interrupt injection: From 1cfa52710f2293fffd1c763011ca165c0d130214 Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 18 Mar 2026 22:59:54 +0000 Subject: [PATCH 29/37] refactor: extract hw-interrupts setup from constructors KVM: extract irqfd/eventfd setup into KvmVm::setup_irqfd(), called from new() via Self::setup_irqfd(&vm_fd). MSHV: extract LAPIC initialization into MshvVm::init_lapic(), called from new() via Self::init_lapic(&vcpu_fd). Move lapic_regs_as_u8 and lapic_regs_as_u8_mut helper functions closer to the hw-interrupts impl block for better locality. WHP: extract check_lapic_emulation_support() and enable_lapic_emulation() from inline blocks in new(). Move LAPIC_STATE_MAX_SIZE and init_lapic_bulk into the hw-interrupts impl block since they are only used with that feature. Signed-off-by: danbugs --- .../hypervisor/virtual_machine/kvm/x86_64.rs | 67 ++++--- .../hypervisor/virtual_machine/mshv/x86_64.rs | 64 ++++--- .../src/hypervisor/virtual_machine/whp.rs | 178 +++++++++--------- 3 files changed, 165 insertions(+), 144 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index 59ab67bd9..a3093f4a0 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -110,6 +110,42 @@ pub(crate) struct KvmVm { static KVM: LazyLock> = LazyLock::new(|| Kvm::new().map_err(|e| CreateVmError::HypervisorNotAvailable(e.into()))); +#[cfg(feature = "hw-interrupts")] +impl KvmVm { + /// Create the in-kernel IRQ chip and register an irqfd for GSI 0. + /// + /// When hw-interrupts is enabled, create the in-kernel IRQ chip + /// (PIC + IOAPIC + LAPIC) before creating the vCPU so the + /// per-vCPU LAPIC is initialised in virtual-wire mode (LINT0 = ExtINT). + /// The guest programs the PIC remap via standard IO port writes, + /// which the in-kernel PIC handles transparently. + /// + /// Instead of creating an in-kernel PIT (create_pit2), we use a + /// host-side timer thread + irqfd to inject IRQ0 at the rate + /// requested by the guest via VmAction::PvTimerConfig (port 107). + /// This eliminates the in-kernel PIT device. Guest PIT port writes + /// (0x40, 0x43) become no-ops handled in the run loop. + fn setup_irqfd(vm_fd: &VmFd) -> std::result::Result { + vm_fd + .create_irq_chip() + .map_err(|e| CreateVmError::InitializeVm(e.into()))?; + + // Create an EventFd and register it via irqfd for GSI 0 (IRQ0). + // When the timer thread writes to this EventFd, the in-kernel + // PIC will assert IRQ0, which is delivered as the vector the + // guest configured during PIC remap (typically vector 0x20). + let eventfd = EventFd::new(0).map_err(|e| { + CreateVmError::InitializeVm( + kvm_ioctls::Error::new(e.raw_os_error().unwrap_or(libc::EIO)).into(), + ) + })?; + vm_fd + .register_irqfd(&eventfd, 0) + .map_err(|e| CreateVmError::InitializeVm(e.into()))?; + Ok(eventfd) + } +} + impl KvmVm { /// Create a new instance of a `KvmVm` #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] @@ -120,37 +156,8 @@ impl KvmVm { .create_vm_with_type(0) .map_err(|e| CreateVmError::CreateVmFd(e.into()))?; - // When hw-interrupts is enabled, create the in-kernel IRQ chip - // (PIC + IOAPIC + LAPIC) before creating the vCPU so the - // per-vCPU LAPIC is initialised in virtual-wire mode (LINT0 = ExtINT). - // The guest programs the PIC remap via standard IO port writes, - // which the in-kernel PIC handles transparently. - // - // Instead of creating an in-kernel PIT (create_pit2), we use a - // host-side timer thread + irqfd to inject IRQ0 at the rate - // requested by the guest via VmAction::PvTimerConfig (port 107). - // This eliminates the in-kernel PIT device. Guest PIT port writes - // (0x40, 0x43) become no-ops handled in the run loop. #[cfg(feature = "hw-interrupts")] - let timer_irq_eventfd = { - vm_fd - .create_irq_chip() - .map_err(|e| CreateVmError::InitializeVm(e.into()))?; - - // Create an EventFd and register it via irqfd for GSI 0 (IRQ0). - // When the timer thread writes to this EventFd, the in-kernel - // PIC will assert IRQ0, which is delivered as the vector the - // guest configured during PIC remap (typically vector 0x20). - let eventfd = EventFd::new(0).map_err(|e| { - CreateVmError::InitializeVm( - kvm_ioctls::Error::new(e.raw_os_error().unwrap_or(libc::EIO)).into(), - ) - })?; - vm_fd - .register_irqfd(&eventfd, 0) - .map_err(|e| CreateVmError::InitializeVm(e.into()))?; - eventfd - }; + let timer_irq_eventfd = Self::setup_irqfd(&vm_fd)?; let vcpu_fd = vm_fd .create_vcpu(0) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs index 5f0ed3e0a..27268bf3c 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs @@ -94,23 +94,6 @@ pub(crate) struct MshvVm { static MSHV: LazyLock> = LazyLock::new(|| Mshv::new().map_err(|e| CreateVmError::HypervisorNotAvailable(e.into()))); -/// Cast MSHV `LapicState.regs` (`[c_char; 1024]`) to a `&[u8]` slice -/// for use with the shared LAPIC helpers. -#[cfg(feature = "hw-interrupts")] -fn lapic_regs_as_u8(regs: &[::std::os::raw::c_char; 1024]) -> &[u8] { - // Safety: c_char (i8) and u8 have the same size and alignment; - // LAPIC register values are treated as raw bytes. - unsafe { &*(regs as *const [::std::os::raw::c_char; 1024] as *const [u8; 1024]) } -} - -/// Cast MSHV `LapicState.regs` (`[c_char; 1024]`) to a `&mut [u8]` slice -/// for use with the shared LAPIC helpers. -#[cfg(feature = "hw-interrupts")] -fn lapic_regs_as_u8_mut(regs: &mut [::std::os::raw::c_char; 1024]) -> &mut [u8] { - // Safety: same as above. - unsafe { &mut *(regs as *mut [::std::os::raw::c_char; 1024] as *mut [u8; 1024]) } -} - impl MshvVm { /// Create a new instance of a MshvVm #[instrument(skip_all, parent = Span::current(), level = "Trace")] @@ -155,19 +138,7 @@ impl MshvVm { // LAPIC defaults to disabled (SVR bit 8 = 0), which means no APIC // interrupts can be delivered (request_virtual_interrupt would fail). #[cfg(feature = "hw-interrupts")] - { - use super::super::x86_64::hw_interrupts::init_lapic_registers; - - let mut lapic: LapicState = vcpu_fd - .get_lapic() - .map_err(|e| CreateVmError::InitializeVm(e.into()))?; - - init_lapic_registers(lapic_regs_as_u8_mut(&mut lapic.regs)); - - vcpu_fd - .set_lapic(&lapic) - .map_err(|e| CreateVmError::InitializeVm(e.into()))?; - } + Self::init_lapic(&vcpu_fd)?; Ok(Self { #[cfg(feature = "hw-interrupts")] @@ -608,12 +579,45 @@ impl DebuggableVm for MshvVm { } } +/// Cast MSHV `LapicState.regs` (`[c_char; 1024]`) to a `&[u8]` slice +/// for use with the shared LAPIC helpers. +#[cfg(feature = "hw-interrupts")] +fn lapic_regs_as_u8(regs: &[::std::os::raw::c_char; 1024]) -> &[u8] { + // Safety: c_char (i8) and u8 have the same size and alignment; + // LAPIC register values are treated as raw bytes. + unsafe { &*(regs as *const [::std::os::raw::c_char; 1024] as *const [u8; 1024]) } +} + +/// Cast MSHV `LapicState.regs` (`[c_char; 1024]`) to a `&mut [u8]` slice +/// for use with the shared LAPIC helpers. +#[cfg(feature = "hw-interrupts")] +fn lapic_regs_as_u8_mut(regs: &mut [::std::os::raw::c_char; 1024]) -> &mut [u8] { + // Safety: same as above. + unsafe { &mut *(regs as *mut [::std::os::raw::c_char; 1024] as *mut [u8; 1024]) } +} + #[cfg(feature = "hw-interrupts")] impl MshvVm { /// Standard x86 APIC base MSR value: base address 0xFEE00000 + /// BSP flag (bit 8) + global enable (bit 11). const APIC_BASE_DEFAULT: u64 = 0xFEE00900; + /// Initialize the virtual LAPIC to sensible defaults. + fn init_lapic(vcpu_fd: &VcpuFd) -> std::result::Result<(), CreateVmError> { + use super::super::x86_64::hw_interrupts::init_lapic_registers; + + let mut lapic: LapicState = vcpu_fd + .get_lapic() + .map_err(|e| CreateVmError::InitializeVm(e.into()))?; + + init_lapic_registers(lapic_regs_as_u8_mut(&mut lapic.regs)); + + vcpu_fd + .set_lapic(&lapic) + .map_err(|e| CreateVmError::InitializeVm(e.into()))?; + Ok(()) + } + /// Perform LAPIC EOI: clear the highest-priority in-service bit. /// Called when the guest sends PIC EOI, since the timer thread /// delivers interrupts through the LAPIC and the guest only diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 3eb97d6ac..2441db610 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -116,32 +116,7 @@ impl WhpVm { let partition = unsafe { #[cfg(feature = "hw-interrupts")] - { - // Check whether the WHP host supports LAPIC emulation. - // Bit 1 of WHV_CAPABILITY_FEATURES = LocalApicEmulation. - // LAPIC emulation is required for timer interrupt delivery. - const LAPIC_EMULATION_BIT: u64 = 1 << 1; - - let mut capability: WHV_CAPABILITY = Default::default(); - let has_lapic = WHvGetCapability( - WHvCapabilityCodeFeatures, - &mut capability as *mut _ as *mut c_void, - std::mem::size_of::() as u32, - None, - ) - .is_ok() - && (capability.Features.AsUINT64 & LAPIC_EMULATION_BIT != 0); - - if !has_lapic { - return Err(CreateVmError::InitializeVm( - windows_result::Error::new( - HRESULT::from_win32(0x32), // ERROR_NOT_SUPPORTED - "WHP LocalApicEmulation capability is required for hw-interrupts", - ) - .into(), - )); - } - } + Self::check_lapic_emulation_support()?; let p = WHvCreatePartition().map_err(|e| CreateVmError::CreateVmFd(e.into()))?; WHvSetPartitionProperty( @@ -153,16 +128,7 @@ impl WhpVm { .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; #[cfg(feature = "hw-interrupts")] - { - let apic_mode: u32 = 1; // WHvX64LocalApicEmulationModeXApic - WHvSetPartitionProperty( - p, - WHvPartitionPropertyCodeLocalApicEmulationMode, - &apic_mode as *const _ as *const _, - std::mem::size_of_val(&apic_mode) as _, - ) - .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; - } + Self::enable_lapic_emulation(p)?; WHvSetupPartition(p).map_err(|e| CreateVmError::InitializeVm(e.into()))?; WHvCreateVirtualProcessor(p, 0, 0) @@ -192,54 +158,6 @@ impl WhpVm { }) } - /// Maximum size for the interrupt controller state blob. - const LAPIC_STATE_MAX_SIZE: u32 = 4096; - - /// Initialize the LAPIC via the bulk interrupt-controller state API. - /// Individual APIC register writes via `WHvSetVirtualProcessorRegisters` - /// fail with ACCESS_DENIED on WHP when LAPIC emulation is enabled, - /// so we use `WHvGet/SetVirtualProcessorInterruptControllerState2` - /// to read-modify-write the entire LAPIC register page. - #[cfg(feature = "hw-interrupts")] - unsafe fn init_lapic_bulk(partition: WHV_PARTITION_HANDLE) -> windows_result::Result<()> { - let mut state = vec![0u8; Self::LAPIC_STATE_MAX_SIZE as usize]; - let mut written: u32 = 0; - - unsafe { - WHvGetVirtualProcessorInterruptControllerState2( - partition, - 0, - state.as_mut_ptr() as *mut c_void, - Self::LAPIC_STATE_MAX_SIZE, - Some(&mut written), - )?; - } - state.truncate(written as usize); - - // init_lapic_registers writes up to offset 0x374 (LVT Error at 0x370). - // Bail out if the buffer returned by WHP is too small. - const MIN_LAPIC_STATE_SIZE: usize = 0x374; - if state.len() < MIN_LAPIC_STATE_SIZE { - return Err(windows_result::Error::new( - HRESULT::from_win32(0x32), // ERROR_NOT_SUPPORTED - "WHP LAPIC state buffer is too small for init_lapic_registers", - )); - } - - super::x86_64::hw_interrupts::init_lapic_registers(&mut state); - - unsafe { - WHvSetVirtualProcessorInterruptControllerState2( - partition, - 0, - state.as_ptr() as *const c_void, - state.len() as u32, - )?; - } - - Ok(()) - } - /// Helper for setting arbitrary registers. Makes sure the same number /// of names and values are passed (at the expense of some performance). fn set_registers( @@ -1011,6 +929,98 @@ impl DebuggableVm for WhpVm { #[cfg(feature = "hw-interrupts")] impl WhpVm { + /// Maximum size for the interrupt controller state blob. + const LAPIC_STATE_MAX_SIZE: u32 = 4096; + + /// Check whether the WHP host supports LAPIC emulation. + /// Bit 1 of WHV_CAPABILITY_FEATURES = LocalApicEmulation. + /// LAPIC emulation is required for timer interrupt delivery. + fn check_lapic_emulation_support() -> Result<(), CreateVmError> { + const LAPIC_EMULATION_BIT: u64 = 1 << 1; + + let mut capability: WHV_CAPABILITY = Default::default(); + let has_lapic = unsafe { + WHvGetCapability( + WHvCapabilityCodeFeatures, + &mut capability as *mut _ as *mut c_void, + std::mem::size_of::() as u32, + None, + ) + .is_ok() + && (capability.Features.AsUINT64 & LAPIC_EMULATION_BIT != 0) + }; + + if !has_lapic { + return Err(CreateVmError::InitializeVm( + windows_result::Error::new( + HRESULT::from_win32(0x32), // ERROR_NOT_SUPPORTED + "WHP LocalApicEmulation capability is required for hw-interrupts", + ) + .into(), + )); + } + Ok(()) + } + + /// Enable LAPIC emulation on the given partition. + fn enable_lapic_emulation(partition: WHV_PARTITION_HANDLE) -> Result<(), CreateVmError> { + let apic_mode: u32 = 1; // WHvX64LocalApicEmulationModeXApic + unsafe { + WHvSetPartitionProperty( + partition, + WHvPartitionPropertyCodeLocalApicEmulationMode, + &apic_mode as *const _ as *const _, + std::mem::size_of_val(&apic_mode) as _, + ) + .map_err(|e| CreateVmError::SetPartitionProperty(e.into()))?; + } + Ok(()) + } + + /// Initialize the LAPIC via the bulk interrupt-controller state API. + /// Individual APIC register writes via `WHvSetVirtualProcessorRegisters` + /// fail with ACCESS_DENIED on WHP when LAPIC emulation is enabled, + /// so we use `WHvGet/SetVirtualProcessorInterruptControllerState2` + /// to read-modify-write the entire LAPIC register page. + unsafe fn init_lapic_bulk(partition: WHV_PARTITION_HANDLE) -> windows_result::Result<()> { + let mut state = vec![0u8; Self::LAPIC_STATE_MAX_SIZE as usize]; + let mut written: u32 = 0; + + unsafe { + WHvGetVirtualProcessorInterruptControllerState2( + partition, + 0, + state.as_mut_ptr() as *mut c_void, + Self::LAPIC_STATE_MAX_SIZE, + Some(&mut written), + )?; + } + state.truncate(written as usize); + + // init_lapic_registers writes up to offset 0x374 (LVT Error at 0x370). + // Bail out if the buffer returned by WHP is too small. + const MIN_LAPIC_STATE_SIZE: usize = 0x374; + if state.len() < MIN_LAPIC_STATE_SIZE { + return Err(windows_result::Error::new( + HRESULT::from_win32(0x32), // ERROR_NOT_SUPPORTED + "WHP LAPIC state buffer is too small for init_lapic_registers", + )); + } + + super::x86_64::hw_interrupts::init_lapic_registers(&mut state); + + unsafe { + WHvSetVirtualProcessorInterruptControllerState2( + partition, + 0, + state.as_ptr() as *const c_void, + state.len() as u32, + )?; + } + + Ok(()) + } + /// Get the LAPIC state via the bulk interrupt-controller state API. fn get_lapic_state(&self) -> windows_result::Result> { let mut state = vec![0u8; Self::LAPIC_STATE_MAX_SIZE as usize]; From 746f29446988eb102d8101ca62c77af2e842ecd1 Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 18 Mar 2026 23:00:39 +0000 Subject: [PATCH 30/37] fix(kvm): return error on unexpected VcpuExit::Hlt in hw-interrupts loop With hw-interrupts enabled, VcpuExit::Hlt should never reach userspace because the in-kernel LAPIC handles HLT internally. Previously this was silently ignored with a trace log and continue. Now it returns RunVcpuError::UnexpectedExit to surface the problem. Add UnexpectedExit(String) variant to RunVcpuError for cases where the vCPU exits in a way that shouldn't happen under normal operation. Signed-off-by: danbugs --- .../src/hypervisor/virtual_machine/kvm/x86_64.rs | 9 +++------ .../src/hypervisor/virtual_machine/mod.rs | 2 ++ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index a3093f4a0..6886e5579 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -207,12 +207,9 @@ impl KvmVm { loop { match self.vcpu_fd.run() { Ok(VcpuExit::Hlt) => { - // The in-kernel LAPIC normally handles HLT internally. - // If we somehow get here, just re-enter the guest. - tracing::trace!( - "KVM hw-interrupts: unexpected VcpuExit::Hlt, re-entering guest" - ); - continue; + return Err(RunVcpuError::UnexpectedExit( + "KVM hw-interrupts: unexpected VcpuExit::Hlt".to_string(), + )); } Ok(VcpuExit::IoOut(port, data)) => { if port == VmAction::Halt as u16 { diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs index 9edaa1f87..ff90376ee 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs @@ -202,6 +202,8 @@ pub enum RunVcpuError { IncrementRip(HypervisorError), #[error("Parse GPA access info failed")] ParseGpaAccessInfo, + #[error("Unexpected vCPU exit: {0}")] + UnexpectedExit(String), #[error("Unknown error: {0}")] Unknown(HypervisorError), } From 2bb83cd1f1c1eac05c43cf5ad7d04e583000a11c Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 18 Mar 2026 23:01:26 +0000 Subject: [PATCH 31/37] refactor: use shared constant for halt port in guest assembly Replace hardcoded port number 108 in dispatch_function and pivot_stack inline assembly with a const operand referencing hyperlight_common::outb::VmAction::Halt. This ensures the guest assembly stays in sync with the host-side port definitions. Signed-off-by: danbugs --- src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs | 7 +++++-- src/hyperlight_guest_bin/src/arch/amd64/init.rs | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs index 117fa42e6..2fffae4f0 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/dispatch.rs @@ -68,9 +68,12 @@ core::arch::global_asm!(" mov cr4, rdi flush_done: call {internal_dispatch_function}\n - mov dx, 108\n + mov dx, {halt_port}\n out dx, eax\n cli\n hlt\n .cfi_endproc -", internal_dispatch_function = sym crate::guest_function::call::internal_dispatch_function); +", + internal_dispatch_function = sym crate::guest_function::call::internal_dispatch_function, + halt_port = const hyperlight_common::outb::VmAction::Halt as u16, +); diff --git a/src/hyperlight_guest_bin/src/arch/amd64/init.rs b/src/hyperlight_guest_bin/src/arch/amd64/init.rs index c3037b6aa..073bd3a2f 100644 --- a/src/hyperlight_guest_bin/src/arch/amd64/init.rs +++ b/src/hyperlight_guest_bin/src/arch/amd64/init.rs @@ -173,9 +173,12 @@ core::arch::global_asm!(" mov rsp, r8\n xor ebp, ebp\n call {generic_init}\n - mov dx, 108\n + mov dx, {halt_port}\n out dx, eax\n cli\n hlt\n .cfi_endproc\n -", generic_init = sym crate::generic_init); +", + generic_init = sym crate::generic_init, + halt_port = const hyperlight_common::outb::VmAction::Halt as u16, +); From a834801af3a66c5a95b239096c06a6ad175963df Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 18 Mar 2026 23:01:54 +0000 Subject: [PATCH 32/37] style: rustfmt Signed-off-by: danbugs --- src/hyperlight_guest/src/error.rs | 3 ++- .../src/hypervisor/virtual_machine/mshv/x86_64.rs | 4 +--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/hyperlight_guest/src/error.rs b/src/hyperlight_guest/src/error.rs index 463ba6a69..4592dc5a9 100644 --- a/src/hyperlight_guest/src/error.rs +++ b/src/hyperlight_guest/src/error.rs @@ -17,9 +17,10 @@ limitations under the License. use alloc::format; use alloc::string::{String, ToString as _}; +use anyhow; use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode; use hyperlight_common::func::Error as FuncError; -use {anyhow, serde_json}; +use serde_json; pub type Result = core::result::Result; diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs index 27268bf3c..0a768bd7a 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mshv/x86_64.rs @@ -664,9 +664,7 @@ impl MshvVm { let svr = super::super::x86_64::hw_interrupts::read_lapic_u32(regs, 0xF0); if svr & 0x100 == 0 { let regs_mut = lapic_regs_as_u8_mut(&mut lapic.regs); - super::super::x86_64::hw_interrupts::write_lapic_u32( - regs_mut, 0xF0, 0x1FF, - ); + super::super::x86_64::hw_interrupts::write_lapic_u32(regs_mut, 0xF0, 0x1FF); super::super::x86_64::hw_interrupts::write_lapic_u32(regs_mut, 0x80, 0); // TPR let _ = self.vcpu_fd.set_lapic(&lapic); } From 836cb91fe7c988f8724d6d04e739800e8e2e7527 Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 18 Mar 2026 23:02:53 +0000 Subject: [PATCH 33/37] fix(kvm): copy IO data before calling handle_pv_timer_config The data slice borrows from the KVM run buffer (self.vcpu_fd), so calling self.handle_pv_timer_config(data) would create a conflicting mutable borrow. Copy data to a local Vec before calling the method. Signed-off-by: danbugs --- .../src/hypervisor/virtual_machine/kvm/x86_64.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index 6886e5579..572159a72 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -220,7 +220,8 @@ impl KvmVm { return Ok(VmExit::Halt()); } if port == VmAction::PvTimerConfig as u16 { - self.handle_pv_timer_config(data); + let data_copy = data.to_vec(); + self.handle_pv_timer_config(&data_copy); continue; } // PIT ports (0x40-0x43): no in-kernel PIT, so these From fcbd2b8393d06b9fdd5731177d9dd11d0215385c Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 18 Mar 2026 23:41:06 +0000 Subject: [PATCH 34/37] style: fix rustfmt import grouping in error.rs Restore the combined `use {anyhow, serde_json};` import that was split during rebase, matching the format expected by nightly rustfmt. Signed-off-by: danbugs --- src/hyperlight_guest/src/error.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/hyperlight_guest/src/error.rs b/src/hyperlight_guest/src/error.rs index 4592dc5a9..463ba6a69 100644 --- a/src/hyperlight_guest/src/error.rs +++ b/src/hyperlight_guest/src/error.rs @@ -17,10 +17,9 @@ limitations under the License. use alloc::format; use alloc::string::{String, ToString as _}; -use anyhow; use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode; use hyperlight_common::func::Error as FuncError; -use serde_json; +use {anyhow, serde_json}; pub type Result = core::result::Result; From 7ccb80d92591fa873db3dc9eac16851d4fc15e03 Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 18 Mar 2026 23:51:44 +0000 Subject: [PATCH 35/37] fix(whp): use WHvX64LocalApicEmulationModeXApic constant Replace hardcoded literal 1 with the named constant from the windows crate for LAPIC emulation mode. Signed-off-by: danbugs --- src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 2441db610..3c6ae5a9d 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -964,7 +964,7 @@ impl WhpVm { /// Enable LAPIC emulation on the given partition. fn enable_lapic_emulation(partition: WHV_PARTITION_HANDLE) -> Result<(), CreateVmError> { - let apic_mode: u32 = 1; // WHvX64LocalApicEmulationModeXApic + let apic_mode = WHvX64LocalApicEmulationModeXApic; unsafe { WHvSetPartitionProperty( partition, From a105ff54ea74eafaf5553d6447da3d1332f592d9 Mon Sep 17 00:00:00 2001 From: danbugs Date: Wed, 18 Mar 2026 23:54:57 +0000 Subject: [PATCH 36/37] fix: resolve clippy expect_used and rustfmt rebase artifacts Replace .expect() with match + tracing::warn on eventfd clone failure in KVM handle_pv_timer_config to satisfy clippy::expect_used lint. Restore combined `use {anyhow, serde_json};` import in error.rs that was split during rebase. Signed-off-by: danbugs --- .../src/hypervisor/virtual_machine/kvm/x86_64.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index 572159a72..5022e1ac7 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -259,10 +259,13 @@ impl KvmVm { fn handle_pv_timer_config(&mut self, data: &[u8]) { use super::super::x86_64::hw_interrupts::handle_pv_timer_config; - let eventfd_clone = self - .timer_irq_eventfd - .try_clone() - .expect("failed to clone eventfd"); + let eventfd_clone = match self.timer_irq_eventfd.try_clone() { + Ok(fd) => fd, + Err(e) => { + tracing::warn!("failed to clone eventfd for timer config: {e}"); + return; + } + }; handle_pv_timer_config(&mut self.timer, data, move || { let _ = eventfd_clone.write(1); }); From 32eaebf0e126ecda3d460ea040c63c409d34e68d Mon Sep 17 00:00:00 2001 From: danbugs Date: Thu, 19 Mar 2026 01:31:02 +0000 Subject: [PATCH 37/37] fix(kvm): remove UnexpectedExit, let catch-all handle unexpected HLT Remove the dedicated VcpuExit::Hlt match arm and UnexpectedExit error variant in the hw-interrupts run loop. Unexpected HLT now falls through to the Ok(other) catch-all, returning VmExit::Unknown which gets converted to the existing UnexpectedVmExit in hyperlight_vm. Signed-off-by: danbugs --- .../src/hypervisor/virtual_machine/kvm/x86_64.rs | 5 ----- src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs | 2 -- 2 files changed, 7 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs index 5022e1ac7..c29754e6c 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/kvm/x86_64.rs @@ -206,11 +206,6 @@ impl KvmVm { fn run_vcpu_hw_interrupts(&mut self) -> std::result::Result { loop { match self.vcpu_fd.run() { - Ok(VcpuExit::Hlt) => { - return Err(RunVcpuError::UnexpectedExit( - "KVM hw-interrupts: unexpected VcpuExit::Hlt".to_string(), - )); - } Ok(VcpuExit::IoOut(port, data)) => { if port == VmAction::Halt as u16 { // Stop the timer thread before returning. diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs index ff90376ee..9edaa1f87 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs @@ -202,8 +202,6 @@ pub enum RunVcpuError { IncrementRip(HypervisorError), #[error("Parse GPA access info failed")] ParseGpaAccessInfo, - #[error("Unexpected vCPU exit: {0}")] - UnexpectedExit(String), #[error("Unknown error: {0}")] Unknown(HypervisorError), }