diff --git a/kernel/src/arch_impl/aarch64/boot.S b/kernel/src/arch_impl/aarch64/boot.S index 25d4e3ee..cdbe010a 100644 --- a/kernel/src/arch_impl/aarch64/boot.S +++ b/kernel/src/arch_impl/aarch64/boot.S @@ -813,15 +813,17 @@ secondary_el1_init: isb // Set up per-CPU boot stack (physical addresses, before MMU) - // Stack top = 0x4100_0000 + (cpu_id + 1) * 0x20_0000 + // Stack top = SMP_STACK_BASE_PHYS + (cpu_id + 1) * 0x20_0000 + // SMP_STACK_BASE_PHYS is set by CPU 0 Rust code to (ram_base + 0x01000000). // This gives each CPU a 2MB stack region: - // CPU 1: 0x4120_0000 (top) .. 0x4100_0000 - // CPU 2: 0x4140_0000 (top) .. 0x4120_0000 - // CPU 3: 0x4160_0000 (top) .. 0x4140_0000 + // CPU 1: base+0x0020_0000 (top) .. base + // CPU 2: base+0x0040_0000 (top) .. base+0x0020_0000 + // CPU 3: base+0x0060_0000 (top) .. base+0x0040_0000 mov x0, x19 // cpu_id add x0, x0, #1 // cpu_id + 1 lsl x0, x0, #21 // * 0x20_0000 (2MB) - ldr x1, =0x41000000 // base of per-CPU stack region + ldr x1, =SMP_STACK_BASE_PHYS + ldr x1, [x1] // x1 = actual stack base (set by CPU 0) add x0, x0, x1 mov sp, x0 @@ -987,6 +989,9 @@ SMP_MAIR_PTR: .global SMP_TCR_PTR SMP_TCR_PTR: .quad SMP_TCR_PHYS +.global SMP_STACK_BASE_PTR +SMP_STACK_BASE_PTR: + .quad SMP_STACK_BASE_PHYS // ----------------------------------------------------------------------------- // Boot-time BSS (low): page tables + boot stack @@ -1043,6 +1048,9 @@ SMP_MAIR_PHYS: .global SMP_TCR_PHYS SMP_TCR_PHYS: .skip 8 +.global SMP_STACK_BASE_PHYS +SMP_STACK_BASE_PHYS: + .skip 8 .balign 16 .global __boot_stack_bottom diff --git a/kernel/src/arch_impl/aarch64/constants.rs b/kernel/src/arch_impl/aarch64/constants.rs index 18168bbc..68cd29de 100644 --- a/kernel/src/arch_impl/aarch64/constants.rs +++ b/kernel/src/arch_impl/aarch64/constants.rs @@ -205,20 +205,25 @@ pub const STACK_GUARD_SIZE: usize = PAGE_SIZE; /// Base address for per-CPU kernel stacks region (ARM64). /// Uses a region within the HHDM (higher-half direct map) that is mapped -/// by the boot page tables. Placed at physical 0x4100_0000 (16MB into RAM -/// after kernel) to stay within typical 512MB QEMU RAM configs. +/// by the boot page tables. Placed at ram_base + 0x0100_0000 (16MB into RAM +/// after kernel) to stay within typical 512MB RAM configs. /// -/// QEMU virt RAM layout: physical 0x4000_0000 (1GB mark) for N MB -/// With 512MB RAM: physical 0x4000_0000 to 0x6000_0000 +/// RAM layout (relative to ram_base): +/// - +0x0000_0000 - +0x0100_0000: Kernel image (~16MB) +/// - +0x0100_0000 - +0x0200_0000: Per-CPU stacks (16MB for 8 CPUs) +/// - +0x0200_0000 - end: Heap and dynamic allocations /// -/// Stack layout in RAM: -/// - 0x4000_0000 - 0x4100_0000: Kernel image (~16MB) -/// - 0x4100_0000 - 0x4200_0000: Per-CPU stacks (16MB for 8 CPUs) -/// - 0x4200_0000 - 0x6000_0000: Heap and dynamic allocations -/// -/// Virtual: 0xFFFF_0000_4100_0000 -/// Physical: 0x4100_0000 -pub const PERCPU_STACK_REGION_BASE: u64 = HHDM_BASE + 0x4100_0000; +/// Platform-dependent physical base: +/// - QEMU/Parallels (ram at 0x4000_0000): physical 0x4100_0000 +/// - VMware (ram at 0x8000_0000): physical 0x8100_0000 +#[inline] +pub fn percpu_stack_region_base() -> u64 { + HHDM_BASE + 0x4100_0000 + crate::platform_config::ram_base_offset() +} + +/// Legacy constant for compile-time contexts (diagnostics). Uses the default +/// QEMU/Parallels base. Runtime code should use percpu_stack_region_base(). +pub const PERCPU_STACK_REGION_BASE_DEFAULT: u64 = HHDM_BASE + 0x4100_0000; /// Maximum number of CPUs supported on ARM64. /// Limited to 8 to keep stack region within 512MB RAM constraint. diff --git a/kernel/src/arch_impl/aarch64/context_switch.rs b/kernel/src/arch_impl/aarch64/context_switch.rs index 0e4e8d78..ad58a658 100644 --- a/kernel/src/arch_impl/aarch64/context_switch.rs +++ b/kernel/src/arch_impl/aarch64/context_switch.rs @@ -681,7 +681,7 @@ fn setup_idle_return_locked( .and_then(|t| t.kernel_stack_top.map(|v| v.as_u64())) .unwrap_or_else(|| { let cpu_id64 = cpu_id as u64; - 0xFFFF_0000_0000_0000u64 + 0x4100_0000 + (cpu_id64 + 1) * 0x20_0000 + super::constants::percpu_stack_region_base() + (cpu_id64 + 1) * 0x20_0000 }); // Clear all general purpose registers for clean state @@ -1232,7 +1232,7 @@ fn setup_idle_return_arm64(frame: &mut Aarch64ExceptionFrame) { .flatten() .unwrap_or_else(|| { let cpu_id = Aarch64PerCpu::cpu_id() as u64; - let boot_stack_top = 0xFFFF_0000_0000_0000u64 + 0x4100_0000 + (cpu_id + 1) * 0x20_0000; + let boot_stack_top = super::constants::percpu_stack_region_base() + (cpu_id + 1) * 0x20_0000; boot_stack_top }); diff --git a/kernel/src/arch_impl/aarch64/exception.rs b/kernel/src/arch_impl/aarch64/exception.rs index f1c8ad63..48f8cd3f 100644 --- a/kernel/src/arch_impl/aarch64/exception.rs +++ b/kernel/src/arch_impl/aarch64/exception.rs @@ -29,9 +29,9 @@ use crate::arch_impl::traits::PerCpuOps; fn set_idle_stack_for_eret() { use crate::arch_impl::aarch64::percpu::Aarch64PerCpu; - // Boot stack: HHDM_BASE + 0x4100_0000 + (cpu_id + 1) * 0x20_0000 let cpu_id = Aarch64PerCpu::cpu_id() as u64; - let idle_stack = 0xFFFF_0000_0000_0000u64 + 0x4100_0000 + (cpu_id + 1) * 0x20_0000; + let stack_base = super::constants::percpu_stack_region_base(); + let idle_stack = stack_base + (cpu_id + 1) * 0x20_0000; unsafe { Aarch64PerCpu::set_user_rsp_scratch(idle_stack); } @@ -235,14 +235,14 @@ pub extern "C" fn handle_sync_exception(frame: *mut Aarch64ExceptionFrame, esr: // Classify which stack region the frame is on let frame_addr = frame as u64; + let boot_stack_base = super::constants::percpu_stack_region_base(); + let boot_stack_end = boot_stack_base + 0x0100_0000; const HHDM_BASE_DIAG: u64 = 0xFFFF_0000_0000_0000; - const BOOT_STACK_BASE: u64 = HHDM_BASE_DIAG + 0x4100_0000; - const BOOT_STACK_END: u64 = HHDM_BASE_DIAG + 0x4200_0000; const KSTACK_BASE: u64 = HHDM_BASE_DIAG + 0x5200_0000; const KSTACK_END: u64 = HHDM_BASE_DIAG + 0x5400_0000; - if frame_addr >= BOOT_STACK_BASE && frame_addr < BOOT_STACK_END { + if frame_addr >= boot_stack_base && frame_addr < boot_stack_end { raw_uart_str("\n STACK=boot_cpu"); - let offset_from_base = frame_addr - BOOT_STACK_BASE; + let offset_from_base = frame_addr - boot_stack_base; let boot_cpu = offset_from_base / 0x20_0000; raw_uart_dec(boot_cpu); } else if frame_addr >= KSTACK_BASE && frame_addr < KSTACK_END { @@ -586,14 +586,14 @@ pub extern "C" fn handle_sync_exception(frame: *mut Aarch64ExceptionFrame, esr: // Stack classification let frame_addr = frame_ref as *const _ as u64; + let boot_stack_base = super::constants::percpu_stack_region_base(); + let boot_stack_end = boot_stack_base + 0x0100_0000; const HHDM_BASE_DIAG: u64 = 0xFFFF_0000_0000_0000; - const BOOT_STACK_BASE: u64 = HHDM_BASE_DIAG + 0x4100_0000; - const BOOT_STACK_END: u64 = HHDM_BASE_DIAG + 0x4200_0000; const KSTACK_BASE: u64 = HHDM_BASE_DIAG + 0x5200_0000; const KSTACK_END: u64 = HHDM_BASE_DIAG + 0x5400_0000; - if frame_addr >= BOOT_STACK_BASE && frame_addr < BOOT_STACK_END { + if frame_addr >= boot_stack_base && frame_addr < boot_stack_end { raw_uart_str("\n STACK=boot_cpu"); - let offset_from_base = frame_addr - BOOT_STACK_BASE; + let offset_from_base = frame_addr - boot_stack_base; let boot_cpu = offset_from_base / 0x20_0000; raw_uart_dec(boot_cpu); } else if frame_addr >= KSTACK_BASE && frame_addr < KSTACK_END { @@ -610,7 +610,7 @@ pub extern "C" fn handle_sync_exception(frame: *mut Aarch64ExceptionFrame, esr: // OUTER FRAME: Read the frame 272 bytes above (if on a valid stack) let outer_frame_addr = frame_addr + 272; - if outer_frame_addr + 272 <= BOOT_STACK_END + if outer_frame_addr + 272 <= boot_stack_end || (outer_frame_addr >= KSTACK_BASE && outer_frame_addr + 272 <= KSTACK_END) { let outer = outer_frame_addr as *const u64; diff --git a/kernel/src/arch_impl/aarch64/smp.rs b/kernel/src/arch_impl/aarch64/smp.rs index 8ad369c1..54a51d6b 100644 --- a/kernel/src/arch_impl/aarch64/smp.rs +++ b/kernel/src/arch_impl/aarch64/smp.rs @@ -15,8 +15,9 @@ use core::sync::atomic::{AtomicBool, AtomicU64, Ordering}; /// Maximum number of CPUs supported. pub const MAX_CPUS: usize = 8; -/// PSCI function IDs (SMCCC compliant, 64-bit). +/// PSCI function IDs (SMCCC compliant). const PSCI_CPU_ON_64: u64 = 0xC400_0003; +const PSCI_CPU_ON_32: u64 = 0x8400_0003; extern "C" { /// Physical address of secondary_cpu_entry, stored in .rodata by boot.S. @@ -37,6 +38,11 @@ extern "C" { static SMP_TTBR1_PTR: u64; static SMP_MAIR_PTR: u64; static SMP_TCR_PTR: u64; + + /// Pointer to SMP_STACK_BASE_PHYS (in .bss.boot). CPU 0 writes the + /// physical base address of the per-CPU stack region here before PSCI CPU_ON. + /// On QEMU/Parallels: 0x4100_0000; on VMware: 0x8100_0000. + static SMP_STACK_BASE_PTR: u64; } /// Write CPU 0's actual MMU configuration to .bss.boot variables so @@ -122,6 +128,27 @@ pub fn set_uart_phys(addr: u64) { } } +/// Set the physical base address of the per-CPU stack region. +/// Must be called before `release_cpu()`. +/// +/// The stack base is `ram_base + 0x0100_0000` (16MB into RAM). +/// On QEMU/Parallels (ram at 0x40000000): 0x4100_0000. +/// On VMware (ram at 0x80000000): 0x8100_0000. +pub fn set_stack_base_phys(addr: u64) { + unsafe { + let phys = core::ptr::read_volatile(&SMP_STACK_BASE_PTR); + let virt = phys + 0xFFFF_0000_0000_0000u64; + let ptr = virt as *mut u64; + core::ptr::write_volatile(ptr, addr); + core::arch::asm!( + "dc cvac, {addr}", + "dsb ish", + addr = in(reg) ptr, + options(nostack), + ); + } +} + /// Number of CPUs currently online (starts at 1 for the boot CPU). static CPUS_ONLINE: AtomicU64 = AtomicU64::new(1); @@ -160,6 +187,38 @@ fn psci_cpu_on(target_cpu: u64, entry_point: u64, context_id: u64) -> i64 { ret } +/// PSCI CPU_ON with 32-bit function ID via HVC. +fn psci_cpu_on_32(target_cpu: u64, entry_point: u64, context_id: u64) -> i64 { + let ret: i64; + unsafe { + core::arch::asm!( + "hvc #0", + inout("x0") PSCI_CPU_ON_32 => ret, + in("x1") target_cpu, + in("x2") entry_point, + in("x3") context_id, + options(nomem, nostack), + ); + } + ret +} + +/// PSCI CPU_ON with 64-bit function ID via SMC (EL3 firmware conduit). +fn psci_cpu_on_smc(target_cpu: u64, entry_point: u64, context_id: u64) -> i64 { + let ret: i64; + unsafe { + core::arch::asm!( + "smc #0", + inout("x0") PSCI_CPU_ON_64 => ret, + in("x1") target_cpu, + in("x2") entry_point, + in("x3") context_id, + options(nomem, nostack), + ); + } + ret +} + /// Release a secondary CPU using PSCI CPU_ON. /// /// The CPU will start executing at `secondary_cpu_entry` in boot.S, @@ -182,12 +241,21 @@ pub fn release_cpu(cpu_id: usize) -> i64 { // Context ID: pass cpu_id so the new CPU knows who it is let context_id = cpu_id as u64; - let ret = psci_cpu_on(target_mpidr, entry_phys, context_id); + // Try 64-bit PSCI CPU_ON via HVC first (standard for ARM64 hypervisors) + let mut ret = psci_cpu_on(target_mpidr, entry_phys, context_id); + + // If 64-bit failed, try 32-bit function ID (some hypervisors only support this) + if ret != 0 { + crate::serial_println!("[smp] CPU {}: HVC64 failed (ret={}), trying HVC32...", cpu_id, ret); + ret = psci_cpu_on_32(target_mpidr, entry_phys, context_id); + } + + // Note: SMC conduit not attempted — on VMware (EL1 guest, no EL3), + // SMC would trap to EL2 and likely fault. HVC is the correct conduit. if ret != 0 { - // PSCI error — emit raw UART error indicator - raw_uart_char(b'E'); - raw_uart_char(b'0' + cpu_id as u8); + crate::serial_println!("[smp] PSCI CPU_ON failed for CPU {}: ret={} (MPIDR={:#x} entry={:#x})", + cpu_id, ret, target_mpidr, entry_phys); } ret @@ -233,14 +301,13 @@ pub extern "C" fn secondary_cpu_entry_rust(cpu_id: u64) -> ! { crate::per_cpu_aarch64::init_cpu(cpu_id as usize); // Set kernel stack top for this CPU. - // boot.S sets SP to 0x41000000 + (cpu_id+1)*0x200000 (physical), + // boot.S sets SP to SMP_STACK_BASE_PHYS + (cpu_id+1)*0x200000 (physical), // then adds KERNEL_VIRT_BASE after enabling MMU. // This value is critical: when a user thread runs on this CPU and an // exception occurs, the kernel needs to switch to this stack. - const HHDM_BASE: u64 = 0xFFFF_0000_0000_0000; - const STACK_REGION_BASE: u64 = 0x4100_0000; + let stack_base = super::constants::percpu_stack_region_base(); const STACK_SIZE: u64 = 0x20_0000; // 2MB per CPU - let kernel_stack_top = HHDM_BASE + STACK_REGION_BASE + (cpu_id + 1) * STACK_SIZE; + let kernel_stack_top = stack_base + (cpu_id + 1) * STACK_SIZE; crate::per_cpu_aarch64::set_kernel_stack_top(kernel_stack_top); // Initialize GIC CPU interface (GICC registers are banked per-CPU) @@ -281,12 +348,10 @@ fn create_and_register_idle_thread(cpu_id: usize) { use crate::memory::arch_stub::VirtAddr; // Boot stack addresses — must match boot.S layout. - // HHDM_BASE + STACK_REGION_BASE + (cpu_id + 1) * STACK_SIZE - const HHDM_BASE: u64 = 0xFFFF_0000_0000_0000; - const STACK_REGION_BASE: u64 = 0x4100_0000; + let stack_base = super::constants::percpu_stack_region_base(); const STACK_SIZE: u64 = 0x20_0000; // 2MB per CPU - let boot_stack_top = VirtAddr::new(HHDM_BASE + STACK_REGION_BASE + ((cpu_id as u64) + 1) * STACK_SIZE); - let boot_stack_bottom = VirtAddr::new(HHDM_BASE + STACK_REGION_BASE + (cpu_id as u64) * STACK_SIZE); + let boot_stack_top = VirtAddr::new(stack_base + ((cpu_id as u64) + 1) * STACK_SIZE); + let boot_stack_bottom = VirtAddr::new(stack_base + (cpu_id as u64) * STACK_SIZE); let dummy_tls = VirtAddr::zero(); let mut idle_task = Box::new(Thread::new( diff --git a/kernel/src/arch_impl/aarch64/timer_interrupt.rs b/kernel/src/arch_impl/aarch64/timer_interrupt.rs index 34b9011d..abb337a4 100644 --- a/kernel/src/arch_impl/aarch64/timer_interrupt.rs +++ b/kernel/src/arch_impl/aarch64/timer_interrupt.rs @@ -280,9 +280,13 @@ pub extern "C" fn timer_interrupt_handler() { crate::drivers::usb::ehci::poll_keyboard(); // Poll XHCI USB HID events (needed when PCI interrupt routing isn't available) crate::drivers::usb::xhci::poll_hid_events(); - // Poll VirtIO net PCI for incoming packets (PCI INTx routing not wired up) + // Poll network RX for incoming packets (PCI INTx routing not wired up) + // Covers both VirtIO net PCI (Parallels) and e1000 (VMware) // Throttle to every 50th tick (~20Hz at 1000Hz timer) to avoid overhead - if crate::drivers::virtio::net_pci::is_initialized() && _count % 50 == 0 { + if (crate::drivers::virtio::net_pci::is_initialized() + || crate::drivers::e1000::is_initialized()) + && _count % 50 == 0 + { crate::task::softirqd::raise_softirq(crate::task::softirqd::SoftirqType::NetRx); } } diff --git a/kernel/src/drivers/e1000/mod.rs b/kernel/src/drivers/e1000/mod.rs index 40a88f4c..63047277 100644 --- a/kernel/src/drivers/e1000/mod.rs +++ b/kernel/src/drivers/e1000/mod.rs @@ -21,10 +21,14 @@ use crate::memory::PhysAddrWrapper as PhysAddr; pub use regs::*; -/// Intel 82540EM device ID +/// Intel 82540EM device ID (QEMU default) #[allow(dead_code)] // Used in init() for device detection pub const E1000_DEVICE_ID: u16 = 0x100E; +/// Intel 82574L device ID (VMware e1000e) +#[allow(dead_code)] // Used in init() for device detection +pub const E1000E_DEVICE_ID: u16 = 0x10D3; + /// Number of receive descriptors (must be multiple of 8) const RX_RING_SIZE: usize = 32; /// Number of transmit descriptors (must be multiple of 8) @@ -128,6 +132,8 @@ pub struct E1000 { /// PCI device information #[allow(dead_code)] // Stored for future use (interrupt routing, power management) pci_device: Device, + /// PCI device ID (0x100E = 82540EM, 0x10D3 = 82574L) + device_id: u16, /// Base address of MMIO registers mmio_base: usize, /// Receive descriptor ring @@ -157,37 +163,72 @@ impl E1000 { unsafe { write_volatile((self.mmio_base + reg as usize) as *mut u32, value) } } - /// Read MAC address from EEPROM - fn read_eeprom(&self, addr: u8) -> u16 { - // Write the address and start bit - self.write_reg(REG_EERD, ((addr as u32) << EERD_ADDR_SHIFT) | EERD_START); - - // Wait for completion - loop { - let val = self.read_reg(REG_EERD); - if val & EERD_DONE != 0 { - return ((val >> EERD_DATA_SHIFT) & 0xFFFF) as u16; + /// Read MAC address from EEPROM (with timeout) + /// Returns None if the EEPROM doesn't respond within the timeout. + fn read_eeprom(&self, addr: u8) -> Option { + if self.device_id == E1000E_DEVICE_ID { + // 82574L (e1000e): DONE=bit 1, ADDR_SHIFT=2 + self.write_reg(REG_EERD, ((addr as u32) << 2) | EERD_START); + for _ in 0..100_000 { + let val = self.read_reg(REG_EERD); + if val & (1 << 1) != 0 { + return Some(((val >> EERD_DATA_SHIFT) & 0xFFFF) as u16); + } + core::hint::spin_loop(); + } + } else { + // 82540EM: DONE=bit 4, ADDR_SHIFT=8 + self.write_reg(REG_EERD, ((addr as u32) << EERD_ADDR_SHIFT) | EERD_START); + for _ in 0..100_000 { + let val = self.read_reg(REG_EERD); + if val & EERD_DONE != 0 { + return Some(((val >> EERD_DATA_SHIFT) & 0xFFFF) as u16); + } + core::hint::spin_loop(); } } + None } - /// Read MAC address from EEPROM or RAL/RAH registers - fn read_mac_address(&self) -> [u8; 6] { - // Try to read from EEPROM first - let word0 = self.read_eeprom(0); - let word1 = self.read_eeprom(1); - let word2 = self.read_eeprom(2); - + /// Read MAC address from RAL/RAH registers (works on all Intel NICs) + fn read_mac_from_ral(&self) -> [u8; 6] { + let ral = self.read_reg(REG_RAL); + let rah = self.read_reg(REG_RAH); [ - (word0 & 0xFF) as u8, - ((word0 >> 8) & 0xFF) as u8, - (word1 & 0xFF) as u8, - ((word1 >> 8) & 0xFF) as u8, - (word2 & 0xFF) as u8, - ((word2 >> 8) & 0xFF) as u8, + (ral & 0xFF) as u8, + ((ral >> 8) & 0xFF) as u8, + ((ral >> 16) & 0xFF) as u8, + ((ral >> 24) & 0xFF) as u8, + (rah & 0xFF) as u8, + ((rah >> 8) & 0xFF) as u8, ] } + /// Read MAC address from EEPROM, falling back to RAL/RAH registers + fn read_mac_address(&self) -> [u8; 6] { + // Try EEPROM first + if let (Some(word0), Some(word1), Some(word2)) = ( + self.read_eeprom(0), + self.read_eeprom(1), + self.read_eeprom(2), + ) { + let mac = [ + (word0 & 0xFF) as u8, + ((word0 >> 8) & 0xFF) as u8, + (word1 & 0xFF) as u8, + ((word1 >> 8) & 0xFF) as u8, + (word2 & 0xFF) as u8, + ((word2 >> 8) & 0xFF) as u8, + ]; + // Validate MAC (not all zeros or all FFs) + if mac != [0; 6] && mac != [0xFF; 6] { + return mac; + } + } + // Fallback: read from RAL/RAH registers (pre-loaded by hardware/VMware) + self.read_mac_from_ral() + } + /// Get virtual address to physical address (identity mapped for now) fn virt_to_phys(virt: usize) -> u64 { // In Breenix, we use identity mapping for kernel addresses @@ -239,12 +280,16 @@ impl E1000 { // Configure receive control register // Enable receiver, accept broadcast, 2KB buffers, strip CRC + // Note: BSEX is NOT set — it changes buffer size semantics self.write_reg( REG_RCTL, - RCTL_EN | RCTL_BAM | RCTL_SZ_2048 | RCTL_SECRC | RCTL_BSEX, + RCTL_EN | RCTL_BAM | RCTL_SZ_2048 | RCTL_SECRC, ); + #[cfg(target_arch = "x86_64")] log::info!("E1000: RX initialized with {} descriptors", RX_RING_SIZE); + #[cfg(target_arch = "aarch64")] + crate::serial_println!("[e1000] RX initialized with {} descriptors", RX_RING_SIZE); } /// Initialize transmit functionality @@ -273,7 +318,10 @@ impl E1000 { // IPG transmit time: 10 + 8 + 6 (for IEEE 802.3 standard) self.write_reg(REG_TIPG, 10 | (8 << 10) | (6 << 20)); + #[cfg(target_arch = "x86_64")] log::info!("E1000: TX initialized with {} descriptors", TX_RING_SIZE); + #[cfg(target_arch = "aarch64")] + crate::serial_println!("[e1000] TX initialized with {} descriptors", TX_RING_SIZE); } /// Set up the MAC address filter @@ -378,12 +426,18 @@ impl E1000 { } // Check status after timeout - let tdh_after = self.read_reg(REG_TDH); - let tdt_after = self.read_reg(REG_TDT); - let status = unsafe { read_volatile(&self.tx_ring[idx].status) }; + let _tdh_after = self.read_reg(REG_TDH); + let _tdt_after = self.read_reg(REG_TDT); + let _status = unsafe { read_volatile(&self.tx_ring[idx].status) }; + #[cfg(target_arch = "x86_64")] log::warn!( "E1000: TX timeout TDH={} TDT={} desc.status={:#x}", - tdh_after, tdt_after, status + _tdh_after, _tdt_after, _status + ); + #[cfg(target_arch = "aarch64")] + crate::serial_println!( + "[e1000] TX timeout TDH={} TDT={} desc.status={:#x}", + _tdh_after, _tdt_after, _status ); Err("TX timeout") @@ -440,21 +494,19 @@ impl E1000 { } /// Handle interrupt + #[cfg(target_arch = "x86_64")] pub fn handle_interrupt(&mut self) { let icr = self.read_reg(REG_ICR); if icr & ICR_RXT0 != 0 { - // Receive timer expired - packets available log::debug!("E1000: RX interrupt"); } if icr & ICR_TXDW != 0 { - // Transmit descriptor written back log::debug!("E1000: TX interrupt"); } if icr & ICR_LSC != 0 { - // Link status change if self.link_up() { log::info!("E1000: Link up at {} Mbps", self.link_speed()); } else { @@ -486,31 +538,50 @@ static E1000_INITIALIZED: AtomicBool = AtomicBool::new(false); /// Initialize the E1000 driver pub fn init() -> Result<(), &'static str> { - // Find the E1000 device on the PCI bus - let device = pci::find_device(INTEL_VENDOR_ID, E1000_DEVICE_ID) + // Find the E1000 device on the PCI bus (try 82540EM then 82574L) + let (device, found_device_id) = pci::find_device(INTEL_VENDOR_ID, E1000_DEVICE_ID) + .map(|d| (d, E1000_DEVICE_ID)) + .or_else(|| pci::find_device(INTEL_VENDOR_ID, E1000E_DEVICE_ID).map(|d| (d, E1000E_DEVICE_ID))) .ok_or("E1000 device not found on PCI bus")?; + #[cfg(target_arch = "x86_64")] log::info!( - "E1000: Found device at {:02x}:{:02x}.{} IRQ={}", + "E1000: Found device {:04x} at {:02x}:{:02x}.{} IRQ={}", + found_device_id, device.bus, device.device, device.function, device.interrupt_line ); + #[cfg(target_arch = "aarch64")] + crate::serial_println!( + "[e1000] Found device {:04x} at {:02x}:{:02x}.{}", + found_device_id, + device.bus, + device.device, + device.function + ); // Get the MMIO BAR let mmio_bar = device.get_mmio_bar().ok_or("E1000: No MMIO BAR found")?; - log::info!( - "E1000: MMIO at {:#x} size {:#x}", - mmio_bar.address, - mmio_bar.size - ); + #[cfg(target_arch = "x86_64")] + log::info!("E1000: MMIO at {:#x} size {:#x}", mmio_bar.address, mmio_bar.size); + #[cfg(target_arch = "aarch64")] + crate::serial_println!("[e1000] MMIO at {:#x} size {:#x}", mmio_bar.address, mmio_bar.size); // Map the MMIO region + // x86_64: use map_mmio (allocates VA from MMIO pool, creates page table entries) + // aarch64: use HHDM direct mapping (boot page tables already cover all physical addresses) + #[cfg(target_arch = "x86_64")] let mmio_base = crate::memory::map_mmio(mmio_bar.address, mmio_bar.size as usize)?; + #[cfg(target_arch = "aarch64")] + let mmio_base = (crate::memory::physical_memory_offset().as_u64() + mmio_bar.address) as usize; + #[cfg(target_arch = "x86_64")] log::info!("E1000: Mapped MMIO to {:#x}", mmio_base); + #[cfg(target_arch = "aarch64")] + crate::serial_println!("[e1000] Mapped MMIO to {:#x}", mmio_base); // Enable bus mastering and memory space device.enable_bus_master(); @@ -535,6 +606,7 @@ pub fn init() -> Result<(), &'static str> { // Create driver instance let mut driver = E1000 { pci_device: device, + device_id: found_device_id, mmio_base, rx_ring, rx_buffers, @@ -550,14 +622,17 @@ pub fn init() -> Result<(), &'static str> { // Read MAC address driver.mac_addr = driver.read_mac_address(); + #[cfg(target_arch = "x86_64")] log::info!( "E1000: MAC address {:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", - driver.mac_addr[0], - driver.mac_addr[1], - driver.mac_addr[2], - driver.mac_addr[3], - driver.mac_addr[4], - driver.mac_addr[5] + driver.mac_addr[0], driver.mac_addr[1], driver.mac_addr[2], + driver.mac_addr[3], driver.mac_addr[4], driver.mac_addr[5] + ); + #[cfg(target_arch = "aarch64")] + crate::serial_println!( + "[e1000] MAC address {:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + driver.mac_addr[0], driver.mac_addr[1], driver.mac_addr[2], + driver.mac_addr[3], driver.mac_addr[4], driver.mac_addr[5] ); // Set up MAC address filter @@ -576,20 +651,31 @@ pub fn init() -> Result<(), &'static str> { driver.enable_link(); // Check link status + #[cfg(target_arch = "x86_64")] if driver.link_up() { log::info!("E1000: Link up at {} Mbps", driver.link_speed()); } else { log::info!("E1000: Link down (waiting for link...)"); } + #[cfg(target_arch = "aarch64")] + if driver.link_up() { + crate::serial_println!("[e1000] Link up at {} Mbps", driver.link_speed()); + } else { + crate::serial_println!("[e1000] Link down (waiting for link...)"); + } - // Enable interrupts on the device + // Enable interrupts on the device (x86 uses IRQ-driven RX, aarch64 uses polling) + #[cfg(target_arch = "x86_64")] driver.enable_interrupts(); // Store driver instance *E1000_DRIVER.lock() = Some(driver); E1000_INITIALIZED.store(true, Ordering::Release); + #[cfg(target_arch = "x86_64")] log::info!("E1000 driver initialized"); + #[cfg(target_arch = "aarch64")] + crate::serial_println!("[e1000] Driver initialized"); Ok(()) } @@ -646,6 +732,8 @@ pub fn can_receive() -> bool { } /// Handle E1000 interrupt (called from IRQ 11 handler) +/// aarch64 uses polling mode instead of interrupts +#[cfg(target_arch = "x86_64")] pub fn handle_interrupt() { if let Some(driver) = E1000_DRIVER.lock().as_mut() { driver.handle_interrupt(); diff --git a/kernel/src/drivers/mod.rs b/kernel/src/drivers/mod.rs index d0079613..4539b2a1 100644 --- a/kernel/src/drivers/mod.rs +++ b/kernel/src/drivers/mod.rs @@ -5,7 +5,6 @@ #[cfg(target_arch = "aarch64")] pub mod ahci; -#[cfg(target_arch = "x86_64")] pub mod e1000; pub mod fw_cfg; pub mod pci; @@ -130,10 +129,17 @@ pub fn init() -> usize { Err(e) => serial_println!("[drivers] VirtIO GPU (PCI) init failed: {}", e), } - // Initialize VirtIO network driver (PCI transport) + // Initialize VirtIO network driver (PCI transport) — Parallels match virtio::net_pci::init() { Ok(()) => serial_println!("[drivers] VirtIO network (PCI) initialized"), - Err(e) => serial_println!("[drivers] VirtIO network (PCI) init failed: {}", e), + Err(e) => { + serial_println!("[drivers] VirtIO network (PCI) init failed: {}", e); + // No VirtIO net — try Intel e1000/e1000e (VMware Fusion) + match e1000::init() { + Ok(()) => serial_println!("[drivers] Intel e1000 network driver initialized"), + Err(e2) => serial_println!("[drivers] e1000 init also failed: {}", e2), + } + } } // Initialize VMware SVGA3 GPU if present (VMware Fusion on ARM64) diff --git a/kernel/src/drivers/usb/hid.rs b/kernel/src/drivers/usb/hid.rs index df14bb41..aeb4999a 100644 --- a/kernel/src/drivers/usb/hid.rs +++ b/kernel/src/drivers/usb/hid.rs @@ -41,19 +41,20 @@ static MOUSE_X: AtomicU32 = AtomicU32::new(0); static MOUSE_Y: AtomicU32 = AtomicU32::new(0); static MOUSE_BUTTONS: AtomicU32 = AtomicU32::new(0); +/// Per-endpoint button state for multi-endpoint devices (e.g., VMware dual HID). +/// When multiple USB HID endpoints report button state independently, one endpoint +/// reporting buttons=0 must not cancel the other's press. We track each endpoint's +/// buttons separately and OR them: MOUSE_BUTTONS = EP0_BUTTONS | EP1_BUTTONS. +static EP_BUTTONS: [AtomicU32; 4] = [ + AtomicU32::new(0), AtomicU32::new(0), + AtomicU32::new(0), AtomicU32::new(0), +]; + /// Latched button presses: bits set when a press transition (0→1) is detected. -/// Sustained for PRESS_SUSTAIN_READS reads via mouse_state_consume() to give -/// userspace time to detect the press and act on it (e.g., start a drag). +/// Cleared atomically by mouse_state_consume() when BWM reads the mouse state. +/// This ensures fast press-release cycles (within one compositor frame) are not lost. static MOUSE_BUTTONS_PRESSED: AtomicU32 = AtomicU32::new(0); -/// Number of mouse_state_consume() reads remaining before the press latch clears. -/// Set to PRESS_SUSTAIN_READS when a press is detected. Decremented on each -/// consume call. While > 0, the latch stays active. -static MOUSE_PRESS_SUSTAIN: AtomicU32 = AtomicU32::new(0); - -/// Sustain a latched press for this many userspace reads (~16ms each = ~80ms total). -const PRESS_SUSTAIN_READS: u32 = 5; - /// Once we see the first absolute tablet report (6+ bytes), latch into tablet mode. /// All subsequent reports are parsed as absolute, regardless of byte[1] value. static IS_ABSOLUTE_TABLET: AtomicBool = AtomicBool::new(false); @@ -297,7 +298,12 @@ fn screen_dimensions() -> (u32, u32) { /// Counter for diagnostic logging of first few mouse reports. static MOUSE_LOG_COUNT: AtomicU64 = AtomicU64::new(0); -pub fn process_mouse_report(report: &[u8]) { +/// Process a mouse HID report from a specific endpoint. +/// +/// `ep_idx` identifies the USB endpoint (0-3) so that multi-endpoint devices +/// (like VMware's dual HID mouse) don't race on button state. Each endpoint's +/// buttons are tracked independently; the global MOUSE_BUTTONS is the OR of all. +pub fn process_mouse_report(report: &[u8], ep_idx: u8) { if report.len() < 3 { return; } @@ -360,17 +366,22 @@ pub fn process_mouse_report(report: &[u8]) { } else { report[1] as u32 }; - let prev = MOUSE_BUTTONS.swap(buttons, Ordering::Relaxed); - if buttons != prev { - // Latch press transitions so fast clicks aren't missed by polling - let pressed = buttons & !prev; + // Store this endpoint's buttons, then merge all endpoints + let ei = (ep_idx as usize) & 3; + EP_BUTTONS[ei].store(buttons, Ordering::Relaxed); + let merged = EP_BUTTONS[0].load(Ordering::Relaxed) + | EP_BUTTONS[1].load(Ordering::Relaxed) + | EP_BUTTONS[2].load(Ordering::Relaxed) + | EP_BUTTONS[3].load(Ordering::Relaxed); + let prev = MOUSE_BUTTONS.swap(merged, Ordering::Relaxed); + if merged != prev { + let pressed = merged & !prev; if pressed != 0 { MOUSE_BUTTONS_PRESSED.fetch_or(pressed, Ordering::Relaxed); - MOUSE_PRESS_SUSTAIN.store(PRESS_SUSTAIN_READS, Ordering::Relaxed); } static BTN_LOG: AtomicU64 = AtomicU64::new(0); if BTN_LOG.fetch_add(1, Ordering::Relaxed) < 50 { - crate::serial_println!("[mouse-click] {} -> {}", prev, buttons); + crate::serial_println!("[mouse-click] {} -> {} (ep{})", prev, merged, ep_idx); } } @@ -394,11 +405,16 @@ pub fn process_mouse_report(report: &[u8]) { // Boot protocol relative mouse: 3-4 byte reports // Format: [buttons, dx (i8), dy (i8), wheel (i8)] let new_buttons = report[0] as u32; - let old_buttons = MOUSE_BUTTONS.swap(new_buttons, Ordering::Relaxed); - let pressed = new_buttons & !old_buttons; + let ei = (ep_idx as usize) & 3; + EP_BUTTONS[ei].store(new_buttons, Ordering::Relaxed); + let merged = EP_BUTTONS[0].load(Ordering::Relaxed) + | EP_BUTTONS[1].load(Ordering::Relaxed) + | EP_BUTTONS[2].load(Ordering::Relaxed) + | EP_BUTTONS[3].load(Ordering::Relaxed); + let old_buttons = MOUSE_BUTTONS.swap(merged, Ordering::Relaxed); + let pressed = merged & !old_buttons; if pressed != 0 { MOUSE_BUTTONS_PRESSED.fetch_or(pressed, Ordering::Relaxed); - MOUSE_PRESS_SUSTAIN.store(PRESS_SUSTAIN_READS, Ordering::Relaxed); } let dx = report[1] as i8 as i32; let dy = report[2] as i8 as i32; @@ -422,42 +438,39 @@ pub fn mouse_position() -> (u32, u32) { (MOUSE_X.load(Ordering::Relaxed), MOUSE_Y.load(Ordering::Relaxed)) } -/// Get current mouse position and button state (non-consuming peek). +/// Get current mouse position and raw button state (non-consuming peek). /// -/// Returns instantaneous state without consuming latched button presses. -/// Used by compositor_wait for change detection — must not consume the latch -/// because BWM hasn't read it yet. +/// Returns instantaneous hardware state (no latch). Used by compositor_wait for +/// change detection — the latch must NOT be included here, otherwise a sustained +/// latch (buttons|pressed == prev) prevents compositor_wait from detecting the +/// physical release, causing a deadlock where the sustain counter never decrements. pub fn mouse_state() -> (u32, u32, u32) { - let buttons = MOUSE_BUTTONS.load(Ordering::Relaxed); - let pressed = MOUSE_BUTTONS_PRESSED.load(Ordering::Relaxed); ( MOUSE_X.load(Ordering::Relaxed), MOUSE_Y.load(Ordering::Relaxed), - buttons | pressed, + MOUSE_BUTTONS.load(Ordering::Relaxed), ) } +/// Check if there are pending latched button presses (non-consuming peek). +/// +/// Used by compositor_wait to detect fast press-release cycles that completed +/// before the compositor had a chance to read the state. When this returns true, +/// compositor_wait should set COMPOSITOR_READY_MOUSE so BWM processes the click. +pub fn has_pending_press() -> bool { + MOUSE_BUTTONS_PRESSED.load(Ordering::Relaxed) != 0 +} + /// Get current mouse position and button state, consuming latched presses. /// /// Button state includes latched presses: if a button was pressed and released -/// between two calls, the press is still reported. The latch is sustained for -/// PRESS_SUSTAIN_READS calls (~80ms at 16ms/frame) so drag gestures work even -/// when the hardware release arrives before userspace processes the press. +/// between two consume calls, the press is still reported once. The latch is +/// cleared atomically on read so the next call returns only live hardware state. /// /// Called from sys_get_mouse_pos (userspace reads). pub fn mouse_state_consume() -> (u32, u32, u32) { let buttons = MOUSE_BUTTONS.load(Ordering::Relaxed); - let pressed = MOUSE_BUTTONS_PRESSED.load(Ordering::Relaxed); - // Decrement sustain counter; clear latch only when it reaches 0 - if pressed != 0 { - let remaining = MOUSE_PRESS_SUSTAIN.load(Ordering::Relaxed); - if remaining > 0 { - MOUSE_PRESS_SUSTAIN.fetch_sub(1, Ordering::Relaxed); - } else { - // Sustain expired — clear the latch - MOUSE_BUTTONS_PRESSED.store(0, Ordering::Relaxed); - } - } + let pressed = MOUSE_BUTTONS_PRESSED.swap(0, Ordering::Relaxed); ( MOUSE_X.load(Ordering::Relaxed), MOUSE_Y.load(Ordering::Relaxed), diff --git a/kernel/src/drivers/usb/xhci.rs b/kernel/src/drivers/usb/xhci.rs index eabd2546..82e03829 100644 --- a/kernel/src/drivers/usb/xhci.rs +++ b/kernel/src/drivers/usb/xhci.rs @@ -3535,7 +3535,7 @@ fn wait_for_command_completion(state: &XhciState) -> Result { let report = &(&(*buf).0)[..8]; if report.iter().any(|&b| b != 0) { GET_REPORT_NONZERO.fetch_add(1, Ordering::Relaxed); - super::hid::process_mouse_report(report); + super::hid::process_mouse_report(report, 0); } } } else if cc != completion_code::SUCCESS && cc != completion_code::SHORT_PACKET { @@ -5024,7 +5024,7 @@ pub fn handle_interrupt() { let report_buf = &raw const MOUSE_REPORT_BUF; dma_cache_invalidate((*report_buf).0.as_ptr(), 8); let report = &(*report_buf).0; - super::hid::process_mouse_report(report); + super::hid::process_mouse_report(report, 0); let _ = queue_hid_transfer(state, 1, slot, endpoint); } else if slot == state.mouse_slot && state.mouse_nkro_endpoint != 0 @@ -5034,7 +5034,7 @@ pub fn handle_interrupt() { let report_buf = &raw const MOUSE2_REPORT_BUF; dma_cache_invalidate((*report_buf).0.as_ptr(), 9); let report = &(*report_buf).0; - super::hid::process_mouse_report(report); + super::hid::process_mouse_report(report, 1); let _ = queue_hid_transfer(state, 3, slot, endpoint); } else if slot == state.mouse_slot && endpoint == 1 @@ -5056,7 +5056,7 @@ pub fn handle_interrupt() { LAST_GET_REPORT_U64.store(snap, Ordering::Relaxed); if report.iter().any(|&b| b != 0) { GET_REPORT_NONZERO.fetch_add(1, Ordering::Relaxed); - super::hid::process_mouse_report(report); + super::hid::process_mouse_report(report, 0); } } } else { @@ -5423,7 +5423,7 @@ pub fn poll_hid_events() { let report = &(&(*buf).0)[..8]; if report.iter().any(|&b| b != 0) { GET_REPORT_NONZERO.fetch_add(1, Ordering::Relaxed); - super::hid::process_mouse_report(report); + super::hid::process_mouse_report(report, 0); } } // Event consumed — advance dequeue and continue event loop @@ -5438,7 +5438,7 @@ pub fn poll_hid_events() { let report = &(&(*buf).0)[..8]; if report.iter().any(|&b| b != 0) { GET_REPORT_NONZERO.fetch_add(1, Ordering::Relaxed); - super::hid::process_mouse_report(report); + super::hid::process_mouse_report(report, 0); } } } else if cc == completion_code::SUCCESS || cc == completion_code::SHORT_PACKET { @@ -5496,7 +5496,7 @@ pub fn poll_hid_events() { let report_buf = &raw const MOUSE_REPORT_BUF; dma_cache_invalidate((*report_buf).0.as_ptr(), 8); let report = &(*report_buf).0; - super::hid::process_mouse_report(report); + super::hid::process_mouse_report(report, 0); let _ = queue_hid_transfer(state, 1, state.mouse_slot, state.mouse_endpoint); } // Mouse2 interrupt endpoint event (DCI 5) @@ -5507,7 +5507,7 @@ pub fn poll_hid_events() { let report_buf = &raw const MOUSE2_REPORT_BUF; dma_cache_invalidate((*report_buf).0.as_ptr(), 9); let report = &(*report_buf).0; - super::hid::process_mouse_report(report); + super::hid::process_mouse_report(report, 1); let _ = queue_hid_transfer(state, 3, state.mouse_slot, state.mouse_nkro_endpoint); } else { XFER_OTHER_COUNT.fetch_add(1, Ordering::Relaxed); diff --git a/kernel/src/main_aarch64.rs b/kernel/src/main_aarch64.rs index f5e22a70..7eff9d57 100644 --- a/kernel/src/main_aarch64.rs +++ b/kernel/src/main_aarch64.rs @@ -722,18 +722,27 @@ pub extern "C" fn kernel_main(hw_config_ptr: u64) -> ! { // Bring up secondary CPUs via PSCI CPU_ON. // Probe-based: try each CPU ID and let PSCI tell us which exist. - // Works on QEMU, Parallels, and any ARM64 platform with PSCI. - // VMware excluded: RAM starts at 0x80000000, boot.S stacks at 0x41000000 are invalid. - if !kernel::platform_config::is_vmware() { + // Works on QEMU, Parallels, and VMware — all support PSCI via HVC. + { // Tell boot.S the correct UART address for this platform's serial debug output kernel::arch_impl::aarch64::smp::set_uart_phys(kernel::platform_config::uart_base_phys()); + // Write per-CPU stack base address. Platform-dependent: + // QEMU/Parallels (ram at 0x40000000): 0x4100_0000 + // VMware (ram at 0x80000000): 0x8100_0000 + let stack_base_phys = 0x4100_0000u64 + kernel::platform_config::ram_base_offset(); + kernel::arch_impl::aarch64::smp::set_stack_base_phys(stack_base_phys); + // Write CPU 0's actual TTBR0/TTBR1 to .bss.boot so secondary CPUs use // the correct page tables. On Parallels, the UEFI loader builds its own // page tables (not boot.S's ttbr0_l0/ttbr1_l0), so we must pass the // real TTBR values to secondary CPUs explicitly. kernel::arch_impl::aarch64::smp::set_smp_ttbrs(); + // Log CPU 0's MPIDR for topology diagnostics + let mpidr: u64; + unsafe { core::arch::asm!("mrs {}, mpidr_el1", out(reg) mpidr, options(nomem, nostack)) }; + serial_println!("[smp] CPU 0 MPIDR={:#x}, stack_base={:#x}", mpidr, stack_base_phys); serial_println!("[smp] Probing secondary CPUs via PSCI..."); let mut launched = 0u64; for cpu in 1..kernel::arch_impl::aarch64::smp::MAX_CPUS { @@ -764,8 +773,6 @@ pub extern "C" fn kernel_main(hw_config_ptr: u64) -> ! { "[smp] {} CPUs online", kernel::arch_impl::aarch64::smp::cpus_online() ); - } else { - serial_println!("[smp] Skipping SMP on VMware (boot.S stacks need RAM base relocation)"); } // Test kthread lifecycle BEFORE creating userspace processes @@ -1058,12 +1065,12 @@ fn init_scheduler() { // On Parallels: UEFI loader sets SP to 0x42000000, then HHDM switch adds HHDM_BASE // Use platform detection to pick the right boot stack address. const HHDM_BASE: u64 = 0xFFFF_0000_0000_0000; - let (boot_stack_top, boot_stack_bottom) = if kernel::platform_config::is_qemu() { - const STACK_REGION_BASE: u64 = 0x4100_0000; + let (boot_stack_top, boot_stack_bottom) = if kernel::platform_config::is_qemu() || kernel::platform_config::is_vmware() { + let stack_base = kernel::arch_impl::aarch64::constants::percpu_stack_region_base(); const STACK_SIZE: u64 = 0x20_0000; // 2MB per CPU ( - VirtAddr::new(HHDM_BASE + STACK_REGION_BASE + STACK_SIZE), - VirtAddr::new(HHDM_BASE + STACK_REGION_BASE), + VirtAddr::new(stack_base + STACK_SIZE), + VirtAddr::new(stack_base), ) } else { // Parallels: UEFI loader stack at 0x42000000 (phys), now at HHDM diff --git a/kernel/src/memory/layout.rs b/kernel/src/memory/layout.rs index 3bf8fc4c..c01db28b 100644 --- a/kernel/src/memory/layout.rs +++ b/kernel/src/memory/layout.rs @@ -130,7 +130,7 @@ pub const KERNEL_HIGHER_HALF_BASE: u64 = aarch64_const::KERNEL_HIGHER_HALF_BASE; #[cfg(target_arch = "x86_64")] pub const PERCPU_STACK_REGION_BASE: u64 = 0xffffc90000000000; #[cfg(target_arch = "aarch64")] -pub const PERCPU_STACK_REGION_BASE: u64 = aarch64_const::PERCPU_STACK_REGION_BASE; +pub const PERCPU_STACK_REGION_BASE: u64 = aarch64_const::PERCPU_STACK_REGION_BASE_DEFAULT; /// Size of each per-CPU kernel stack (32 KiB) /// This is sufficient for kernel operations including interrupt handling diff --git a/kernel/src/memory/mod.rs b/kernel/src/memory/mod.rs index 4d93a026..de4d7825 100644 --- a/kernel/src/memory/mod.rs +++ b/kernel/src/memory/mod.rs @@ -320,21 +320,38 @@ impl PhysAddrWrapper { /// are NOT in this region even though they may be numerically greater than phys_offset. pub fn from_kernel_virt(virt: usize) -> u64 { let phys_offset = physical_memory_offset(); - let heap_start = crate::memory::heap::HEAP_START; - let heap_end = heap_start + crate::memory::heap::HEAP_SIZE; - // Check if this is a heap address - these are mapped, not direct - let is_heap = (virt as u64) >= heap_start && (virt as u64) < heap_end; + // Check if this is a heap address - these are mapped, not direct on x86_64. + // On aarch64, the heap IS directly mapped via HHDM (boot.S maps all RAM), + // so we can always use the fast subtraction path. + #[cfg(target_arch = "x86_64")] + let is_heap = { + let heap_start = crate::memory::heap::HEAP_START; + let heap_end = heap_start + crate::memory::heap::HEAP_SIZE; + (virt as u64) >= heap_start && (virt as u64) < heap_end + }; + #[cfg(target_arch = "aarch64")] + let is_heap = false; // The direct physical memory map starts at phys_offset. // We can detect if an address is truly in the direct map by checking: // 1. It's >= phys_offset // 2. Subtracting phys_offset gives a reasonable physical address (< 4GB typically) - // 3. It's NOT in a known non-direct-mapped region (heap, MMIO, stack, etc.) + // 3. It's NOT in a known non-direct-mapped region (heap on x86_64, MMIO, stack, etc.) if !is_heap && (virt as u64) >= phys_offset.as_u64() { - let candidate_phys = (virt as u64) - phys_offset.as_u64(); - // Physical RAM is typically < 4GB in our QEMU setup (512MB max) - // If the result is reasonable, it's likely a direct map address + #[allow(unused_mut)] + let mut candidate_phys = (virt as u64) - phys_offset.as_u64(); + // On aarch64 with VMware, RAM starts at 0x80000000 (not 0x40000000). + // The HHDM L1[1] maps VA HHDM+0x40000000 → PA 0x80000000, so addresses + // in the RAM region need the ram_base_offset added. + #[cfg(target_arch = "aarch64")] + { + let offset = crate::platform_config::ram_base_offset(); + if offset > 0 && candidate_phys >= 0x4000_0000 { + candidate_phys += offset; + } + } + // Physical RAM is typically < 4GB in our setup if candidate_phys < 0x1_0000_0000 { return candidate_phys; } diff --git a/kernel/src/net/mod.rs b/kernel/src/net/mod.rs index d19d8b31..eb3cc68f 100644 --- a/kernel/src/net/mod.rs +++ b/kernel/src/net/mod.rs @@ -22,7 +22,7 @@ use alloc::vec::Vec; use spin::Mutex; // Use E1000 on x86_64, VirtIO net on ARM64 (MMIO for QEMU, PCI for Parallels) -#[cfg(target_arch = "x86_64")] +// On VMware ARM64, e1000 is used (Intel 82574L emulation) use crate::drivers::e1000; #[cfg(target_arch = "aarch64")] use crate::drivers::virtio::net_mmio; @@ -70,8 +70,10 @@ fn get_mac_address() -> Option<[u8; 6]> { { e1000::mac_address() } #[cfg(target_arch = "aarch64")] { - // Try MMIO first (QEMU), then PCI (Parallels) - net_mmio::mac_address().or_else(|| net_pci::mac_address()) + // Try VirtIO MMIO (QEMU), VirtIO PCI (Parallels), then e1000 (VMware) + net_mmio::mac_address() + .or_else(|| net_pci::mac_address()) + .or_else(|| e1000::mac_address()) } } @@ -83,6 +85,8 @@ fn driver_transmit(data: &[u8]) -> Result<(), &'static str> { { if net_pci::is_initialized() { net_pci::transmit(data) + } else if e1000::is_initialized() { + e1000::transmit(data) } else { net_mmio::transmit(data) } @@ -129,6 +133,15 @@ pub const PARALLELS_CONFIG: NetConfig = NetConfig { gateway: [10, 211, 55, 1], // Parallels shared network gateway }; +/// Network configuration for VMware Fusion NAT networking +/// VMware NAT (vmnet8) uses 172.16.45.x with gateway at 172.16.45.2 +#[allow(dead_code)] // Used conditionally when e1000 is active on VMware +pub const VMWARE_CONFIG: NetConfig = NetConfig { + ip_addr: [172, 16, 45, 100], // Static guest IP (avoiding DHCP conflicts) + subnet_mask: [255, 255, 255, 0], + gateway: [172, 16, 45, 2], // VMware NAT gateway +}; + /// Select network config based on compile-time feature or default to SLIRP /// Use VMNET_CONFIG when BREENIX_NET_MODE=vmnet is set at build time #[cfg(feature = "vmnet")] @@ -216,14 +229,15 @@ pub fn init() { crate::serial_println!("[net] Initializing network stack..."); - // Auto-detect platform: PCI net = Parallels, MMIO net = QEMU + // Auto-detect platform: PCI net = Parallels, e1000 = VMware, MMIO net = QEMU if net_pci::is_initialized() { crate::serial_println!("[net] Using VirtIO net PCI driver (Parallels)"); - // Switch to Parallels network config - { - let mut config = NET_CONFIG.lock(); - *config = PARALLELS_CONFIG; - } + let mut config = NET_CONFIG.lock(); + *config = PARALLELS_CONFIG; + } else if e1000::is_initialized() { + crate::serial_println!("[net] Using Intel e1000 driver (VMware)"); + let mut config = NET_CONFIG.lock(); + *config = VMWARE_CONFIG; } if let Some(mac) = get_mac_address() { @@ -349,7 +363,7 @@ pub fn process_rx() { /// Process incoming packets (ARM64 - polling or interrupt driven) #[cfg(target_arch = "aarch64")] pub fn process_rx() { - // Try PCI driver first (Parallels), then MMIO (QEMU) + // Try PCI driver first (Parallels), then e1000 (VMware), then MMIO (QEMU) if net_pci::is_initialized() { let mut processed = false; while let Some(data) = net_pci::receive() { @@ -359,6 +373,16 @@ pub fn process_rx() { if processed { net_pci::recycle_rx_buffers(); } + } else if e1000::is_initialized() { + let mut buffer = [0u8; 2048]; + while e1000::can_receive() { + match e1000::receive(&mut buffer) { + Ok(len) => { + process_packet(&buffer[..len]); + } + Err(_) => break, + } + } } else { let mut processed = false; while let Some(data) = net_mmio::receive() { diff --git a/kernel/src/syscall/graphics.rs b/kernel/src/syscall/graphics.rs index 9793afa1..d100f269 100644 --- a/kernel/src/syscall/graphics.rs +++ b/kernel/src/syscall/graphics.rs @@ -1108,8 +1108,8 @@ fn handle_compositor_wait(cmd: &FbDrawCmd) -> SyscallResult { // Check non-dirty conditions first (mouse + registry are always non-blocking) let mut ready: u64 = 0; - // Bit 1: mouse changed? - if mouse_packed != prev_mouse { + // Bit 1: mouse changed (position, buttons, or pending latched press)? + if mouse_packed != prev_mouse || crate::drivers::usb::hid::has_pending_press() { ready |= 2; } @@ -1181,7 +1181,7 @@ fn handle_compositor_wait(cmd: &FbDrawCmd) -> SyscallResult { let (mx2, my2, mb2) = crate::drivers::usb::hid::mouse_state(); let mouse_packed2 = ((mx2 as u64) << 32) | ((my2 as u64) << 16) | (mb2 as u64); - if mouse_packed2 != prev_mouse { + if mouse_packed2 != prev_mouse || crate::drivers::usb::hid::has_pending_press() { ready_after |= 2; } @@ -1266,6 +1266,14 @@ fn handle_composite_windows(desc_ptr: u64) -> SyscallResult { }); if !any_window_dirty { drop(reg); + // SVGA3 STDU: cursor is drawn in VRAM by software. Update it even when + // no windows are dirty, so mouse movement remains responsive. + if matches!(crate::graphics::compositor_backend(), + crate::graphics::CompositorBackend::Svga3Stdu) { + if crate::drivers::vmware::svga3::update_cursor() { + let _ = crate::drivers::vmware::svga3::present_rect(0, 0, bg_width, bg_height); + } + } return SyscallResult::Ok(0); } drop(reg); diff --git a/libs/libbreenix/src/dns.rs b/libs/libbreenix/src/dns.rs index 564f7ed4..c2a7bd2d 100644 --- a/libs/libbreenix/src/dns.rs +++ b/libs/libbreenix/src/dns.rs @@ -52,6 +52,9 @@ pub const SLIRP_DNS: [u8; 4] = [10, 0, 2, 3]; /// Parallels Desktop shared networking DNS/gateway pub const PARALLELS_DNS: [u8; 4] = [10, 211, 55, 1]; +/// VMware Fusion NAT DNS (vmnet8 gateway) +pub const VMWARE_DNS: [u8; 4] = [172, 16, 45, 2]; + /// Google's public DNS server pub const GOOGLE_DNS: [u8; 4] = [8, 8, 8, 8]; @@ -606,7 +609,7 @@ pub fn resolve(hostname: &str, dns_server: [u8; 4]) -> Result Result { - let servers = [PARALLELS_DNS, SLIRP_DNS, GOOGLE_DNS]; + let servers = [PARALLELS_DNS, VMWARE_DNS, SLIRP_DNS, GOOGLE_DNS]; let mut last_err = DnsError::Timeout; for server in &servers { match resolve(hostname, *server) { diff --git a/userspace/programs/src/bwm.rs b/userspace/programs/src/bwm.rs index 68510eee..4a1e686a 100644 --- a/userspace/programs/src/bwm.rs +++ b/userspace/programs/src/bwm.rs @@ -219,7 +219,8 @@ impl Window { } fn bounds(&self) -> (i32, i32, i32, i32) { - (self.x, self.y, self.x + self.width as i32, self.y + self.total_height() as i32) + // +3 accounts for the drop shadow drawn at (x+3, y+3) in draw_window_frame + (self.x, self.y, self.x + self.width as i32 + 3, self.y + self.total_height() as i32 + 3) } fn close_btn_rect(&self) -> (i32, i32, usize, usize) { @@ -805,6 +806,87 @@ fn compose_full_redraw( } } +/// Partial redraw: only update a dirty sub-region of the screen. +/// +/// Used during drag to avoid full-screen VRAM copies. On SVGA3 (VMware), +/// VRAM is uncacheable so writing 9.2MB per frame kills drag performance. +/// Partial redraw limits VRAM writes to just the union of old+new window bounds. +fn compose_partial_redraw( + vram: &mut [u32], + fb: &mut FrameBuf, + shadow: &mut Option<(&mut [u32], FrameBuf)>, + bg: &[u32], + windows: &[Window], + focused: usize, + clock: &[u8], + dx0: usize, dy0: usize, dx1: usize, dy1: usize, +) { + let screen_w = fb.width; + let screen_h = fb.height; + let dx1 = dx1.min(screen_w); + let dy1 = dy1.min(screen_h); + if dx0 >= dx1 || dy0 >= dy1 { return; } + + if let Some((ref mut sbuf, ref mut sfb)) = shadow { + // 1. Restore background in dirty region only + for row in dy0..dy1 { + let start = row * screen_w + dx0; + let end = row * screen_w + dx1; + sbuf[start..end].copy_from_slice(&bg[start..end]); + } + // 2. Redraw UI elements that intersect dirty region + if dy0 < TASKBAR_HEIGHT { + draw_taskbar(sfb, clock); + } + for i in 0..windows.len() { + if windows[i].minimized { continue; } + let (wx0, wy0, wx1, wy1) = windows[i].bounds(); + if (wx1 as usize) > dx0 && (wx0 as usize) < dx1 + && (wy1 as usize) > dy0 && (wy0 as usize) < dy1 + { + draw_window_frame(sfb, &windows[i], i == focused); + if windows[i].window_id != 0 { + blit_mapped_pixels(sfb, &windows[i]); + } + } + } + if dy1 > screen_h - APPBAR_HEIGHT { + draw_appbar(sfb, windows, focused); + } + // 3. Copy only dirty region from shadow to VRAM + for row in dy0..dy1 { + let start = row * screen_w + dx0; + let end = row * screen_w + dx1; + vram[start..end].copy_from_slice(&sbuf[start..end]); + } + } else { + // Non-shadow path: restore bg region, redraw affected windows + for row in dy0..dy1 { + let start = row * screen_w + dx0; + let end = row * screen_w + dx1; + vram[start..end].copy_from_slice(&bg[start..end]); + } + if dy0 < TASKBAR_HEIGHT { + draw_taskbar(fb, clock); + } + for i in 0..windows.len() { + if windows[i].minimized { continue; } + let (wx0, wy0, wx1, wy1) = windows[i].bounds(); + if (wx1 as usize) > dx0 && (wx0 as usize) < dx1 + && (wy1 as usize) > dy0 && (wy0 as usize) < dy1 + { + draw_window_frame(fb, &windows[i], i == focused); + if windows[i].window_id != 0 { + blit_mapped_pixels(fb, &windows[i]); + } + } + } + if dy1 > screen_h - APPBAR_HEIGHT { + draw_appbar(fb, windows, focused); + } + } +} + // ─── Main ──────────────────────────────────────────────────────────────────── fn main() { @@ -1010,6 +1092,13 @@ fn main() { } } + // Dirty rect tracking — initialized before mouse processing so drag + // can expand the dirty region. Used by section 5 (client blit) and 6 (composite). + let mut dirty_x0 = i32::MAX; + let mut dirty_y0 = i32::MAX; + let mut dirty_x1 = 0i32; + let mut dirty_y1 = 0i32; + // ── 4. Process mouse input (only when mouse changed) ── let mut mouse_moved_this_frame = false; if ready & graphics::COMPOSITOR_READY_MOUSE != 0 { @@ -1028,10 +1117,23 @@ fn main() { // Clamp drag to stay below taskbar let new_y = (mouse_y - off_y).max(TASKBAR_HEIGHT as i32); if new_x != windows[win_idx].x || new_y != windows[win_idx].y { + // Capture old bounds before moving + let (ox0, oy0, ox1, oy1) = windows[win_idx].bounds(); windows[win_idx].x = new_x; windows[win_idx].y = new_y; - compose_full_redraw(composite_buf, &mut fb, &mut shadow_fb, &bg_cache, &windows, focused_win, &clock_text); - full_redraw = true; + // Dirty region = union of old and new bounds + let (nx0, ny0, nx1, ny1) = windows[win_idx].bounds(); + let dr_x0 = ox0.min(nx0).max(0) as usize; + let dr_y0 = oy0.min(ny0).max(0) as usize; + let dr_x1 = ox1.max(nx1) as usize; + let dr_y1 = oy1.max(ny1) as usize; + compose_partial_redraw(composite_buf, &mut fb, &mut shadow_fb, &bg_cache, &windows, focused_win, &clock_text, dr_x0, dr_y0, dr_x1, dr_y1); + // Use partial dirty rect instead of full_redraw + dirty_x0 = dirty_x0.min(dr_x0 as i32); + dirty_y0 = dirty_y0.min(dr_y0 as i32); + dirty_x1 = dirty_x1.max(dr_x1 as i32); + dirty_y1 = dirty_y1.max(dr_y1 as i32); + content_dirty = true; } } else if !windows.is_empty() && focused_win < windows.len() && !windows[focused_win].minimized @@ -1043,7 +1145,9 @@ fn main() { } } - // Release: end drag or route release event + // Release: end drag or route release event. + // Per-endpoint button tracking in the kernel prevents dual USB HID + // endpoints from racing (one endpoint can't cancel the other's press). if (buttons & 1) == 0 && (prev_buttons & 1) != 0 { if dragging.is_some() { dragging = None; @@ -1169,11 +1273,6 @@ fn main() { // ── 5. Blit dirty client window pixels (occluded by higher-z windows) ── // Skip entirely if compositor_wait didn't report dirty content - let mut dirty_x0 = i32::MAX; - let mut dirty_y0 = i32::MAX; - let mut dirty_x1 = 0i32; - let mut dirty_y1 = 0i32; - if ready & graphics::COMPOSITOR_READY_DIRTY != 0 { for i in 0..windows.len().min(16) { if windows[i].window_id != 0 && !windows[i].minimized {