diff --git a/.github/workflows/ValidatePullRequest.yml b/.github/workflows/ValidatePullRequest.yml index e690cefcf..ac06bcd82 100644 --- a/.github/workflows/ValidatePullRequest.yml +++ b/.github/workflows/ValidatePullRequest.yml @@ -89,7 +89,7 @@ jobs: # See: https://github.com/actions/runner/issues/2205 if: ${{ !cancelled() && !failure() }} strategy: - fail-fast: true + fail-fast: false matrix: hypervisor: ['hyperv-ws2025', mshv3, kvm] cpu: [amd, intel] @@ -112,7 +112,7 @@ jobs: # See: https://github.com/actions/runner/issues/2205 if: ${{ !cancelled() && !failure() }} strategy: - fail-fast: true + fail-fast: false matrix: hypervisor: ['hyperv-ws2025', mshv3, kvm] cpu: [amd, intel] diff --git a/Justfile b/Justfile index 0f03663f4..f35a96963 100644 --- a/Justfile +++ b/Justfile @@ -322,6 +322,11 @@ clippy target=default-target: (witguest-wit) clippyw target=default-target: (witguest-wit) {{ cargo-cmd }} clippy --all-targets --all-features --target x86_64-pc-windows-gnu --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings +# Cross-check for linux from a windows host using clippy (no linking needed). +# Only checks lib targets to avoid dev-dependencies (criterion->alloca) that need a C cross-compiler. +clippyl target=default-target: + {{ cargo-cmd }} clippy --lib --all-features --target x86_64-unknown-linux-gnu --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings + clippy-guests target=default-target: (witguest-wit) (ensure-cargo-hyperlight) cd src/tests/rust_guests/simpleguest && cargo hyperlight clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings cd src/tests/rust_guests/witguest && cargo hyperlight clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings diff --git a/src/hyperlight_guest/src/error.rs b/src/hyperlight_guest/src/error.rs index 463ba6a69..4592dc5a9 100644 --- a/src/hyperlight_guest/src/error.rs +++ b/src/hyperlight_guest/src/error.rs @@ -17,9 +17,10 @@ limitations under the License. use alloc::format; use alloc::string::{String, ToString as _}; +use anyhow; use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode; use hyperlight_common::func::Error as FuncError; -use {anyhow, serde_json}; +use serde_json; pub type Result = core::result::Result; diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs index e8f21e119..2d1161caa 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs @@ -40,9 +40,9 @@ use crate::hypervisor::gdb::{ }; #[cfg(gdb)] use crate::hypervisor::gdb::{DebugError, DebugMemoryAccessError}; -use crate::hypervisor::regs::{ - CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, -}; +#[cfg(not(target_os = "windows"))] +use crate::hypervisor::regs::CommonDebugRegs; +use crate::hypervisor::regs::{CommonFpu, CommonRegisters, CommonSpecialRegisters}; #[cfg(not(gdb))] use crate::hypervisor::virtual_machine::VirtualMachine; #[cfg(kvm)] @@ -329,27 +329,39 @@ impl HyperlightVm { } /// Resets the following vCPU state: - /// - General purpose registers - /// - Debug registers - /// - XSAVE (includes FPU/SSE state with proper FCW and MXCSR defaults) - /// - Special registers (restored from snapshot, with CR3 updated to new page table location) + /// - On Windows: calls WHvResetPartition (resets all per-VP state including + /// GP registers, debug registers, XSAVE, MSRs, APIC, etc.) + /// - On Linux: explicitly resets GP registers, debug registers, and XSAVE + /// + /// This does NOT restore special registers — call `restore_sregs` separately + /// after memory mappings are established. // TODO: check if other state needs to be reset - pub(crate) fn reset_vcpu( + pub(crate) fn reset_vm_state(&mut self) -> std::result::Result<(), RegisterError> { + #[cfg(target_os = "windows")] + self.vm.reset_partition()?; + + #[cfg(not(target_os = "windows"))] + { + self.vm.set_regs(&CommonRegisters { + rflags: 1 << 1, // Reserved bit always set + ..Default::default() + })?; + self.vm.set_debug_regs(&CommonDebugRegs::default())?; + self.vm.reset_xsave()?; + } + + Ok(()) + } + + /// Restores special registers from snapshot with CR3 updated to the + /// new page table location. + pub(crate) fn restore_sregs( &mut self, cr3: u64, sregs: &CommonSpecialRegisters, ) -> std::result::Result<(), RegisterError> { - self.vm.set_regs(&CommonRegisters { - rflags: 1 << 1, // Reserved bit always set - ..Default::default() - })?; - self.vm.set_debug_regs(&CommonDebugRegs::default())?; - self.vm.reset_xsave()?; - #[cfg(not(feature = "nanvix-unstable"))] { - // Restore the full special registers from snapshot, but update CR3 - // to point to the new (relocated) page tables let mut sregs = *sregs; sregs.cr3 = cr3; self.pending_tlb_flush = true; @@ -357,9 +369,7 @@ impl HyperlightVm { } #[cfg(feature = "nanvix-unstable")] { - let _ = (cr3, sregs); // suppress unused warnings - // TODO: This is probably not correct. - // Let's deal with it when we clean up the nanvix-unstable feature + let _ = (cr3, sregs); self.vm .set_sregs(&CommonSpecialRegisters::standard_real_mode_defaults())?; } @@ -367,6 +377,20 @@ impl HyperlightVm { Ok(()) } + /// Combined reset: resets VM state and restores sregs in one call. + /// Used by tests; production code calls reset_vm_state + restore_sregs separately. + #[cfg(test)] + #[allow(dead_code)] + pub(crate) fn reset_vcpu( + &mut self, + cr3: u64, + sregs: &CommonSpecialRegisters, + ) -> std::result::Result<(), RegisterError> { + self.reset_vm_state()?; + self.restore_sregs(cr3, sregs)?; + Ok(()) + } + // Handle a debug exit #[cfg(gdb)] pub(super) fn handle_debug( @@ -879,7 +903,9 @@ mod tests { use super::*; #[cfg(kvm)] use crate::hypervisor::regs::FP_CONTROL_WORD_DEFAULT; - use crate::hypervisor::regs::{CommonSegmentRegister, CommonTableRegister, MXCSR_DEFAULT}; + use crate::hypervisor::regs::{ + CommonDebugRegs, CommonSegmentRegister, CommonTableRegister, MXCSR_DEFAULT, + }; use crate::hypervisor::virtual_machine::VirtualMachine; use crate::mem::layout::SandboxMemoryLayout; use crate::mem::memory_region::{GuestMemoryRegion, MemoryRegionFlags}; @@ -1183,9 +1209,18 @@ mod tests { // Assertion Helpers - Verify vCPU state after reset // ========================================================================== - /// Assert that debug registers are in reset state. - /// Reserved bits in DR6/DR7 are read-only (set by CPU), so we only check - /// that writable bits are cleared to 0 and DR0-DR3 are zeroed. + /// Assert that debug registers are in architectural reset state. + /// + /// On Linux (KVM/MSHV): reset_vcpu explicitly zeroes debug registers. + /// + /// On Windows: WHvResetPartition resets to power-on defaults per + /// Intel SDM Vol. 3, Table 10-1: + /// DR0-DR3 = 0 (breakpoint addresses cleared) + /// DR6 = 0xFFFF0FF0 (reserved bits set, writable bits cleared) + /// DR7 = 0x00000400 (reserved bit 10 set, all enables cleared) + /// + /// Reserved bits in DR6/DR7 are read-only and CPU-dependent, so we only + /// verify that writable bits are cleared to 0 and DR0-DR3 are zeroed. fn assert_debug_regs_reset(vm: &dyn VirtualMachine) { let debug_regs = vm.debug_regs().unwrap(); let expected = CommonDebugRegs { @@ -1200,19 +1235,58 @@ mod tests { } /// Assert that general-purpose registers are in reset state. - /// After reset, all registers should be zeroed except rflags which has - /// reserved bit 1 always set. + /// + /// On Linux (KVM/MSHV): reset_vcpu explicitly zeroes all GP regs and sets + /// rflags = 0x2, so we verify all-zeros. + /// + /// On Windows: WHvResetPartition sets architectural power-on defaults + /// per Intel SDM Vol. 3, Table 10-1: + /// RIP = 0xFFF0 (reset vector) + /// RDX = CPUID signature (CPU-dependent stepping/model/family) + /// RFLAGS = 0x2 (only reserved bit 1 set) + /// All other GP regs = 0 + /// These are overwritten by dispatch_call_from_host before guest execution, + /// but we still verify the power-on state is correct. fn assert_regs_reset(vm: &dyn VirtualMachine) { + let regs = vm.regs().unwrap(); + #[cfg(not(target_os = "windows"))] assert_eq!( - vm.regs().unwrap(), + regs, CommonRegisters { - rflags: 1 << 1, // Reserved bit 1 is always set + rflags: 1 << 1, ..Default::default() } ); + #[cfg(target_os = "windows")] + { + // WHvResetPartition sets x86 power-on reset values + // (Intel SDM Vol. 3, Table 10-1) + let expected = CommonRegisters { + rip: 0xFFF0, // Reset vector + rdx: regs.rdx, // CPUID signature (CPU-dependent) + rflags: 0x2, // Reserved bit 1 + ..Default::default() + }; + assert_ne!( + regs.rdx, 0x4444444444444444, + "RDX should not retain dirty value" + ); + assert_eq!(regs, expected); + } } /// Assert that FPU state is in reset state. + /// + /// On Linux (KVM/MSHV): reset_vcpu calls reset_xsave which zeroes FPU state + /// and sets FCW/MXCSR to defaults. + /// + /// On Windows: WHvResetPartition resets to power-on defaults per + /// Intel SDM Vol. 3, Table 10-1 (FINIT-equivalent state): + /// FCW = 0x037F (all exceptions masked, precision=64-bit, round=nearest) + /// FSW = 0, FTW = 0 (all empty), FOP = 0, FIP = 0, FDP = 0 + /// MXCSR = 0x1F80 (all SIMD exceptions masked, round=nearest) + /// ST0-ST7 = 0, XMM0-XMM15 = 0 + /// /// Handles hypervisor-specific quirks (KVM MXCSR, empty FPU registers). fn assert_fpu_reset(vm: &dyn VirtualMachine) { let fpu = vm.fpu().unwrap(); @@ -1222,8 +1296,14 @@ mod tests { assert_eq!(fpu, expected_fpu); } - /// Assert that special registers are in reset state. - /// Handles hypervisor-specific differences in hidden descriptor cache fields. + /// Assert that special registers match the expected snapshot state. + /// + /// After reset_vcpu, sregs are explicitly restored from the snapshot + /// (with CR3 updated). This verifies they match the expected 64-bit + /// long mode configuration from CommonSpecialRegisters::standard_64bit_defaults. + /// + /// Handles hypervisor-specific differences in hidden descriptor cache fields + /// (unusable, granularity, type_ for unused segments). fn assert_sregs_reset(vm: &dyn VirtualMachine, pml4_addr: u64) { let defaults = CommonSpecialRegisters::standard_64bit_defaults(pml4_addr); let sregs = vm.sregs().unwrap(); diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs index 9edaa1f87..916318945 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/mod.rs @@ -107,7 +107,7 @@ pub(crate) enum HypervisorType { /// Minimum XSAVE buffer size: 512 bytes legacy region + 64 bytes header. /// Only used by MSHV and WHP which use compacted XSAVE format and need to /// validate buffer size before accessing XCOMP_BV. -#[cfg(any(mshv3, target_os = "windows"))] +#[cfg(mshv3)] pub(crate) const XSAVE_MIN_SIZE: usize = 576; /// Standard XSAVE buffer size (4KB) used by KVM and MSHV. @@ -244,6 +244,9 @@ pub enum RegisterError { #[cfg(target_os = "windows")] #[error("Failed to convert WHP registers: {0}")] ConversionFailed(String), + #[cfg(target_os = "windows")] + #[error("Failed to reset partition: {0}")] + ResetPartition(HypervisorError), } /// Map memory error @@ -341,18 +344,32 @@ pub(crate) trait VirtualMachine: Debug + Send { #[allow(dead_code)] fn debug_regs(&self) -> std::result::Result; /// Set the debug registers of the vCPU + #[allow(dead_code)] fn set_debug_regs(&self, drs: &CommonDebugRegs) -> std::result::Result<(), RegisterError>; /// Get xsave #[allow(dead_code)] fn xsave(&self) -> std::result::Result, RegisterError>; /// Reset xsave to default state + #[cfg(not(target_os = "windows"))] fn reset_xsave(&self) -> std::result::Result<(), RegisterError>; /// Set xsave - only used for tests #[cfg(test)] #[cfg(not(feature = "nanvix-unstable"))] fn set_xsave(&self, xsave: &[u32]) -> std::result::Result<(), RegisterError>; + /// Reset the partition using WHvResetPartition. + /// + /// Resets all per-VP state to architectural defaults: GP registers, segment + /// registers, control registers, EFER, MSRs, debug registers, full XSAVE + /// state (x87/SSE/AVX/AVX-512/AMX), XCR0, APIC/LAPIC, pending interrupts, + /// and VMCS/VMCB internals. + /// + /// Does NOT reset: GPA memory mappings or contents, partition configuration, + /// notification ports, or VPCI device state. + #[cfg(target_os = "windows")] + fn reset_partition(&self) -> std::result::Result<(), RegisterError>; + /// Get partition handle #[cfg(target_os = "windows")] fn partition_handle(&self) -> windows::Win32::System::Hypervisor::WHV_PARTITION_HANDLE; diff --git a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs index 3c6ae5a9d..d9614214c 100644 --- a/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs +++ b/src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs @@ -32,9 +32,8 @@ use windows_result::HRESULT; use crate::hypervisor::gdb::{DebugError, DebuggableVm}; use crate::hypervisor::regs::{ Align16, CommonDebugRegs, CommonFpu, CommonRegisters, CommonSpecialRegisters, - FP_CONTROL_WORD_DEFAULT, MXCSR_DEFAULT, WHP_DEBUG_REGS_NAMES, WHP_DEBUG_REGS_NAMES_LEN, - WHP_FPU_NAMES, WHP_FPU_NAMES_LEN, WHP_REGS_NAMES, WHP_REGS_NAMES_LEN, WHP_SREGS_NAMES, - WHP_SREGS_NAMES_LEN, + WHP_DEBUG_REGS_NAMES, WHP_DEBUG_REGS_NAMES_LEN, WHP_FPU_NAMES, WHP_FPU_NAMES_LEN, + WHP_REGS_NAMES, WHP_REGS_NAMES_LEN, WHP_SREGS_NAMES, WHP_SREGS_NAMES_LEN, }; use crate::hypervisor::surrogate_process::SurrogateProcess; use crate::hypervisor::surrogate_process_manager::get_surrogate_process_manager; @@ -42,7 +41,7 @@ use crate::hypervisor::surrogate_process_manager::get_surrogate_process_manager; use crate::hypervisor::virtual_machine::x86_64::hw_interrupts::TimerThread; use crate::hypervisor::virtual_machine::{ CreateVmError, HypervisorError, MapMemoryError, RegisterError, RunVcpuError, UnmapMemoryError, - VirtualMachine, VmExit, XSAVE_MIN_SIZE, + VirtualMachine, VmExit, }; use crate::hypervisor::wrappers::HandleWrapper; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags, MemoryRegionType}; @@ -666,85 +665,6 @@ impl VirtualMachine for WhpVm { Ok(xsave_buffer) } - fn reset_xsave(&self) -> std::result::Result<(), RegisterError> { - // WHP uses compacted XSAVE format (bit 63 of XCOMP_BV set). - // We cannot just zero out the xsave area, we need to preserve the XCOMP_BV. - - // Get the required buffer size by calling with NULL buffer. - let mut buffer_size_needed: u32 = 0; - - let result = unsafe { - WHvGetVirtualProcessorXsaveState( - self.partition, - 0, - std::ptr::null_mut(), - 0, - &mut buffer_size_needed, - ) - }; - - // Expect insufficient buffer error; any other error is unexpected - if let Err(e) = result - && e.code() != windows::Win32::Foundation::WHV_E_INSUFFICIENT_BUFFER - { - return Err(RegisterError::GetXsaveSize(e.into())); - } - - if buffer_size_needed < XSAVE_MIN_SIZE as u32 { - return Err(RegisterError::XsaveSizeMismatch { - expected: XSAVE_MIN_SIZE as u32, - actual: buffer_size_needed, - }); - } - - // Create a buffer to hold the current state (to get the correct XCOMP_BV) - let mut current_state = vec![0u8; buffer_size_needed as usize]; - let mut written_bytes = 0; - unsafe { - WHvGetVirtualProcessorXsaveState( - self.partition, - 0, - current_state.as_mut_ptr() as *mut std::ffi::c_void, - buffer_size_needed, - &mut written_bytes, - ) - .map_err(|e| RegisterError::GetXsave(e.into()))?; - }; - - // Zero out most of the buffer, preserving only XCOMP_BV (520-528). - // Extended components with XSTATE_BV bit=0 will use their init values. - // - // - Legacy region (0-512): x87 FPU + SSE state - // - XSTATE_BV (512-520): Feature bitmap - // - XCOMP_BV (520-528): Compaction bitmap + format bit (KEEP) - // - Reserved (528-576): Header padding - // - Extended (576+): AVX, AVX-512, MPX, PKRU, AMX, etc. - current_state[0..520].fill(0); - current_state[528..].fill(0); - - // XSAVE area layout from Intel SDM Vol. 1 Section 13.4.1: - // - Bytes 0-1: FCW (x87 FPU Control Word) - // - Bytes 24-27: MXCSR - // - Bytes 512-519: XSTATE_BV (bitmap of valid state components) - current_state[0..2].copy_from_slice(&FP_CONTROL_WORD_DEFAULT.to_le_bytes()); - current_state[24..28].copy_from_slice(&MXCSR_DEFAULT.to_le_bytes()); - // XSTATE_BV = 0x3: bits 0,1 = x87 + SSE valid. Explicitly tell hypervisor - // to apply the legacy region from this buffer for consistent behavior. - current_state[512..520].copy_from_slice(&0x3u64.to_le_bytes()); - - unsafe { - WHvSetVirtualProcessorXsaveState( - self.partition, - 0, - current_state.as_ptr() as *const std::ffi::c_void, - buffer_size_needed, - ) - .map_err(|e| RegisterError::SetXsave(e.into()))?; - } - - Ok(()) - } - #[cfg(test)] #[cfg(not(feature = "nanvix-unstable"))] fn set_xsave(&self, xsave: &[u32]) -> std::result::Result<(), RegisterError> { @@ -791,6 +711,14 @@ impl VirtualMachine for WhpVm { Ok(()) } + fn reset_partition(&self) -> std::result::Result<(), RegisterError> { + unsafe { + WHvResetPartition(self.partition) + .map_err(|e| RegisterError::ResetPartition(e.into()))?; + } + Ok(()) + } + /// Get the partition handle for this VM fn partition_handle(&self) -> WHV_PARTITION_HANDLE { self.partition diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 8f147c11c..3a28bc4ac 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -291,6 +291,15 @@ impl MultiUseSandbox { return Err(SnapshotSandboxMismatch); } + // Reset VM/vCPU state first (WHvResetPartition on Windows, register + // reset on Linux). This must happen before memory restore so that + // the partition subsystem rebuild doesn't disturb freshly established + // GPA mappings. + self.vm.reset_vm_state().map_err(|e| { + self.poisoned = true; + HyperlightVmError::Restore(e) + })?; + let (gsnapshot, gscratch) = self.mem_mgr.restore_snapshot(&snapshot)?; if let Some(gsnapshot) = gsnapshot { self.vm @@ -306,10 +315,10 @@ impl MultiUseSandbox { let sregs = snapshot.sregs().ok_or_else(|| { HyperlightError::Error("snapshot from running sandbox should have sregs".to_string()) })?; - // TODO (ludfjig): Go through the rest of possible errors in this `MultiUseSandbox::restore` function - // and determine if they should also poison the sandbox. + // Restore special registers (CR3 etc.) after memory is in place + // so the page tables are already mapped at the target GPA. self.vm - .reset_vcpu(snapshot.root_pt_gpa(), sregs) + .restore_sregs(snapshot.root_pt_gpa(), sregs) .map_err(|e| { self.poisoned = true; HyperlightVmError::Restore(e)