diff --git a/docs/manual.md b/docs/manual.md index c45eaae37..21745babe 100644 --- a/docs/manual.md +++ b/docs/manual.md @@ -109,6 +109,7 @@ This document attempts to clearly describe all of these terms, however as the co * [interrupt](#irq) * [fault](#fault) * [ioport](#ioport) +* [io address space](#io_address_space) ## System {#system} @@ -242,6 +243,13 @@ The mapping has a number of attributes, which include: * caching attributes (mostly relevant for device memory) * permissions (read, write and execute) +*Note:* On x86 a memory region can also be *mapped* into one or more +io address spaces. This type of mapping is known as an *iomap*. It supports +a number of attributes, which include: + +* the io virtual address at which the region is mapped in the io address space +* permissions (read, write) + **Note:** When a memory region is mapped into multiple protection domains, the attributes used for different mappings may vary. @@ -356,6 +364,12 @@ delivered to another PD, the fault being handled depends on when the parent PD i I/O ports are x86 mechanisms to access certain physical devices (e.g. PC serial ports or PCI) using the `in` and `out` CPU instructions. The system description specifies if a protection domain have access to certain port address ranges. These accesses will be executed by seL4 and the result returned to protection domains. +## IO Address Spaces {#io_address_space} + +IO Address Spaces provide a way to isolate device memory accesses within a fixed virtual address space. The isolation provided by the address space is enforced by the underlying hardware IOMMU or SMMU. + +IO Address Spaces allow *memory regions* to be mapped to a provided base IO virtual address. These IO virtual addresses will be translated by the hardware IOMMU or SMMU to the underlying physical memory that backs the memory region. + # SDK {#sdk} Microkit is distributed as a software development kit (SDK). @@ -1056,6 +1070,23 @@ See the 'cap_sharing' example packaged in your SDK or [on GitHub](https://github All capability elements (currently) all support the `pd` attribute, the name of the protection domain that the capability is from. For instance, `` will place the TCB of PD 'alpha' in the CSpace of the current PD. +## `io_address_space` + +The `io_address_space` element describes an address space used to isolate a given device. + +It supports the following attributes: +* `name`: A unique name for the io address space +* `peripheral_id`: A unique identifier. This must match the identifier used by the hardware IOMMU or SMMU to identify devices. + +The `io_address_space` element supports the following elements as children: + +* `iomap`: This is used to map a *memory_region* into the io address space. + +The `iomap` element supports the following attributes: +* `mr`: Identifies the memory region to map. +* `iovaddr`: Identifies the io virtual address at which to map the memory region. +* `perms`: Identifies the permissions with which to map the memory region. Can be a combination of `r` (read), and `w` (write). + Defaults to read-write. ### Page sizes by architecture diff --git a/example/x86_64_iommu_dma_test/x86_64_iommu_dma_test.system b/example/x86_64_iommu_dma_test/x86_64_iommu_dma_test.system new file mode 100644 index 000000000..eb35dd81d --- /dev/null +++ b/example/x86_64_iommu_dma_test/x86_64_iommu_dma_test.system @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + diff --git a/tool/microkit/src/capdl/irq.rs b/tool/microkit/src/capdl/irq.rs index b2ddde9fe..99f32ad61 100644 --- a/tool/microkit/src/capdl/irq.rs +++ b/tool/microkit/src/capdl/irq.rs @@ -86,18 +86,14 @@ fn create_irq_obj( }), }), SysIrqKind::MSI { - pci_bus, - pci_dev, - pci_func, - handle, - .. + pci_device, handle, .. } => Object::IrqMsi(object::IrqMsi { slots: [].to_vec(), extra: Box::new(object::IrqMsiExtraInfo { handle: Word(handle), - pci_bus: Word(pci_bus), - pci_dev: Word(pci_dev), - pci_func: Word(pci_func), + pci_bus: Word(pci_device.bus as u64), + pci_dev: Word(pci_device.device as u64), + pci_func: Word(pci_device.function as u64), }), }), }; diff --git a/tool/microkit/src/main.rs b/tool/microkit/src/main.rs index a9e29b0bb..2f5acf218 100644 --- a/tool/microkit/src/main.rs +++ b/tool/microkit/src/main.rs @@ -210,6 +210,11 @@ fn main() -> Result<(), String> { _ => false, }; + let iommu = match arch { + Arch::X86_64 => json_str_as_bool(&kernel_config_json, "IOMMU")?, + _ => false, + }; + let arm_pa_size_bits = match arch { Arch::Aarch64 => { if json_str_as_bool(&kernel_config_json, "ARM_PA_SIZE_BITS_40")? { @@ -248,6 +253,7 @@ fn main() -> Result<(), String> { "MAX_NUM_BOOTINFO_UNTYPED_CAPS", )?, hypervisor, + iommu, benchmark: args.config == "benchmark", num_cores: if json_str_as_bool(&kernel_config_json, "ENABLE_SMP_SUPPORT")? { json_str_as_u64(&kernel_config_json, "MAX_NUM_NODES")? diff --git a/tool/microkit/src/sdf.rs b/tool/microkit/src/sdf.rs index 55f40bf8e..10b5bf7e0 100644 --- a/tool/microkit/src/sdf.rs +++ b/tool/microkit/src/sdf.rs @@ -22,9 +22,10 @@ use crate::sel4::{ use crate::util::{get_full_path, ranges_overlap, round_up, str_to_bool}; use crate::MAX_PDS; use std::collections::{HashMap, HashSet}; -use std::fmt::Display; +use std::fmt; use std::fs; use std::path::{Path, PathBuf}; +use std::str::FromStr; /// Events that come through entry points (e.g notified or protected) are given an /// identifier that is used as the badge at runtime. @@ -54,16 +55,17 @@ pub const PD_DEFAULT_STACK_SIZE: u64 = 0x2000; const PD_MIN_STACK_SIZE: u64 = 0x1000; const PD_MAX_STACK_SIZE: u64 = 1024 * 1024 * 16; -/// Maximum values for PCI bus, device, function numbers. Inclusive. -const PCI_BUS_MAX: i64 = (1 << 8) - 1; -const PCI_DEV_MAX: i64 = (1 << 5) - 1; -const PCI_FUNC_MAX: i64 = (1 << 3) - 1; - /// Maximum x86 IRQ vector value. Inclusive. /// This value is calculated by the kernel as `irq_user_max - irq_user_min` in /// `src/arch/x86/object/interrupt.c` const X86_IRQ_VECTOR_MAX: i64 = 107; +// In reality the kernel dynamically determines this value. The tool assumes +// at least 512GiB of IO virtual address space. This handles all values reported +// to us by the kernel at runtime except for the 1GiB or no-iommu cases. +// This is currently applied to any iommu mapping independent of the architecture. +const IOMAP_MAX_VADDR: u64 = (1 << 39) - 1; + /// The purpose of this function is to parse an integer that could /// either be in decimal or hex format, unlike the normal parsing /// functionality that the Rust standard library provides. @@ -93,6 +95,154 @@ fn loc_string(xml_sdf: &XmlSystemDescription, pos: roxmltree::TextPos) -> String format!("{}:{}:{}", xml_sdf.filename.display(), pos.row, pos.col) } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct PciDevice { + pub bus: u8, + pub device: u8, + pub function: u8, +} + +impl PciDevice { + /// Maximum values for PCI bus, device, function numbers. Inclusive. + const PCI_BUS_MAX: i64 = (1 << 8) - 1; + const PCI_DEV_MAX: i64 = (1 << 5) - 1; + const PCI_FUNC_MAX: i64 = (1 << 3) - 1; +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PciDeviceParseError { + Malformed, + BusParse, + DeviceParse, + FunctionParse, + BusOutOfRange, + DeviceOutOfRange, + FunctionOutOfRange, +} + +impl fmt::Display for PciDevice { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{:02x}:{:02x}.{:x}", + self.bus, self.device, self.function + ) + } +} + +impl fmt::Display for PciDeviceParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PciDeviceParseError::Malformed => { + write!(f, "expected PCI address in bus:device.function form") + } + PciDeviceParseError::BusParse => write!(f, "failed to parse PCI bus"), + PciDeviceParseError::DeviceParse => write!(f, "failed to parse PCI device"), + PciDeviceParseError::FunctionParse => write!(f, "failed to parse PCI function"), + PciDeviceParseError::BusOutOfRange => { + write!(f, "PCI bus must be within [0..{}]", PciDevice::PCI_BUS_MAX) + } + PciDeviceParseError::DeviceOutOfRange => { + write!( + f, + "PCI device must be within [0..{}]", + PciDevice::PCI_DEV_MAX + ) + } + PciDeviceParseError::FunctionOutOfRange => { + write!( + f, + "PCI function must be within [0..{}]", + PciDevice::PCI_FUNC_MAX + ) + } + } + } +} + +impl FromStr for PciDevice { + type Err = PciDeviceParseError; + + fn from_str(s: &str) -> Result { + let (bus_str, device_function_str) = + s.split_once(':').ok_or(PciDeviceParseError::Malformed)?; + let (device_str, function_str) = device_function_str + .split_once('.') + .ok_or(PciDeviceParseError::Malformed)?; + + let bus = + i64::from_str_radix(bus_str.trim(), 16).map_err(|_| PciDeviceParseError::BusParse)?; + let device = i64::from_str_radix(device_str.trim(), 16) + .map_err(|_| PciDeviceParseError::DeviceParse)?; + let function = i64::from_str_radix(function_str.trim(), 16) + .map_err(|_| PciDeviceParseError::FunctionParse)?; + + if !(0..=PciDevice::PCI_BUS_MAX).contains(&bus) { + return Err(PciDeviceParseError::BusOutOfRange); + } + if !(0..=PciDevice::PCI_DEV_MAX).contains(&device) { + return Err(PciDeviceParseError::DeviceOutOfRange); + } + if !(0..=PciDevice::PCI_FUNC_MAX).contains(&function) { + return Err(PciDeviceParseError::FunctionOutOfRange); + } + + Ok(PciDevice { + bus: bus as u8, + device: device as u8, + function: function as u8, + }) + } +} + +// This can be extended in future to support devices on an SMMU enabled Arm device +// or IOMMU enabled RISC-V device. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum IommuDeviceIdentifier { + X86Pci(PciDevice), +} + +#[derive(Clone, PartialEq, Eq)] +pub enum IommuDeviceIdentifierParseError { + UnsupportedArch(Arch), + Pci(PciDeviceParseError), +} + +impl IommuDeviceIdentifier { + fn from_str_for_arch( + config: &Config, + s: &str, + ) -> Result { + match config.arch { + Arch::X86_64 => PciDevice::from_str(s) + .map(IommuDeviceIdentifier::X86Pci) + .map_err(IommuDeviceIdentifierParseError::Pci), + Arch::Aarch64 | Arch::Riscv64 => Err(IommuDeviceIdentifierParseError::UnsupportedArch( + config.arch, + )), + } + } +} + +impl fmt::Display for IommuDeviceIdentifier { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + IommuDeviceIdentifier::X86Pci(pci_device) => write!(f, "PCI device {pci_device}"), + } + } +} + +impl fmt::Display for IommuDeviceIdentifierParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + IommuDeviceIdentifierParseError::UnsupportedArch(arch) => { + write!(f, "IOMMU device identifiers are not supported on {arch}") + } + IommuDeviceIdentifierParseError::Pci(err) => write!(f, "{err}"), + } + } +} + #[repr(u8)] pub enum SysMapPerms { Read = 1, @@ -100,6 +250,22 @@ pub enum SysMapPerms { Execute = 4, } +impl SysMapPerms { + fn from_str(s: &str) -> Result { + let mut perms = 0; + for c in s.chars() { + match c { + 'r' => perms |= SysMapPerms::Read as u8, + 'w' => perms |= SysMapPerms::Write as u8, + 'x' => perms |= SysMapPerms::Execute as u8, + _ => return Err(()), + } + } + + Ok(perms) + } +} + #[derive(Debug, Clone, PartialEq, Eq)] pub struct SysMap { pub mr: String, @@ -111,6 +277,113 @@ pub struct SysMap { pub text_pos: Option, } +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum SysIOMapPerms { + Read, + Write, + ReadWrite, +} + +impl SysIOMapPerms { + fn from_str(s: &str) -> Result { + let mut read = false; + let mut write = false; + + for c in s.chars() { + match c { + 'r' => read = true, + 'w' => write = true, + _ => return Err(()), + } + } + + match (read, write) { + (true, true) => Ok(SysIOMapPerms::ReadWrite), + (true, false) => Ok(SysIOMapPerms::Read), + (false, true) => Ok(SysIOMapPerms::Write), + (false, false) => Err(()), + } + } + + pub fn read(self) -> bool { + matches!(self, SysIOMapPerms::Read | SysIOMapPerms::ReadWrite) + } + + pub fn write(self) -> bool { + matches!(self, SysIOMapPerms::Write | SysIOMapPerms::ReadWrite) + } +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct SysIOMap { + pub device: String, + pub mr: String, + pub identifier: IommuDeviceIdentifier, + pub iovaddr: u64, + pub perms: SysIOMapPerms, + pub text_pos: Option, +} + +trait Map { + fn mr_name(&self) -> &str; + fn addr(&self) -> u64; + fn text_pos(&self) -> Option; + fn element(&self) -> &'static str; + fn addr_name(&self) -> &'static str; + fn range_name(&self) -> &'static str; +} + +impl Map for SysMap { + fn mr_name(&self) -> &str { + &self.mr + } + + fn addr(&self) -> u64 { + self.vaddr + } + + fn text_pos(&self) -> Option { + self.text_pos + } + + fn element(&self) -> &'static str { + "map" + } + + fn addr_name(&self) -> &'static str { + "vaddr" + } + + fn range_name(&self) -> &'static str { + "virtual address range" + } +} + +impl Map for SysIOMap { + fn mr_name(&self) -> &str { + &self.mr + } + + fn addr(&self) -> u64 { + self.iovaddr + } + + fn text_pos(&self) -> Option { + self.text_pos + } + + fn element(&self) -> &'static str { + "iomap" + } + + fn addr_name(&self) -> &'static str { + "iovaddr" + } + + fn range_name(&self) -> &'static str { + "io address range" + } +} #[derive(Debug, PartialEq, Eq, Clone)] pub enum SysMemoryRegionKind { User, @@ -186,9 +459,7 @@ pub enum SysIrqKind { }, /// x86-64 specific MSI { - pci_bus: u64, - pci_dev: u64, - pci_func: u64, + pci_device: PciDevice, handle: u64, vector: u64, }, @@ -255,7 +526,7 @@ pub struct Channel { #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct CpuCore(pub u8); -impl Display for CpuCore { +impl fmt::Display for CpuCore { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_fmt(format_args!("cpu{:02}", self.0)) } @@ -346,49 +617,6 @@ pub struct VirtualCpu { pub cpu: Option, } -/// To avoid code duplication for handling protection domains -/// and virtual machines, which have a lot in common. -trait ExecutionContext { - fn name(&self) -> &String; - fn kind(&self) -> &'static str; -} - -impl ExecutionContext for ProtectionDomain { - fn name(&self) -> &String { - &self.name - } - - fn kind(&self) -> &'static str { - "protection domain" - } -} - -impl ExecutionContext for VirtualMachine { - fn name(&self) -> &String { - &self.name - } - - fn kind(&self) -> &'static str { - "virtual machine" - } -} - -impl SysMapPerms { - fn from_str(s: &str) -> Result { - let mut perms = 0; - for c in s.chars() { - match c { - 'r' => perms |= SysMapPerms::Read as u8, - 'w' => perms |= SysMapPerms::Write as u8, - 'x' => perms |= SysMapPerms::Execute as u8, - _ => return Err(()), - } - } - - Ok(perms) - } -} - impl SysMap { fn from_xml( xml_sdf: &XmlSystemDescription, @@ -411,7 +639,7 @@ impl SysMap { return Err(value_error( xml_sdf, node, - format!("vaddr (0x{vaddr:x}) must be less than 0x{max_vaddr:x}"), + format!("vaddr ({vaddr:#x}) must be less than {max_vaddr:#x}"), )); } @@ -466,6 +694,137 @@ impl SysMap { } } +impl SysIOMap { + fn from_xml( + _config: &Config, + xml_sdf: &XmlSystemDescription, + node: &roxmltree::Node, + device: &str, + identifier: IommuDeviceIdentifier, + ) -> Result { + let attrs = vec!["mr", "iovaddr", "perms"]; + + check_attributes(xml_sdf, node, &attrs)?; + + let mr = checked_lookup(xml_sdf, node, "mr")?.to_string(); + let iovaddr = sdf_parse_number(checked_lookup(xml_sdf, node, "iovaddr")?, node)?; + + if iovaddr > IOMAP_MAX_VADDR { + return Err(value_error( + xml_sdf, + node, + format!( + "iovaddr ({iovaddr:#x}) must be less than {:#x}", + IOMAP_MAX_VADDR + 1 + ), + )); + } + + let perms = if let Some(xml_perms) = node.attribute("perms") { + match SysIOMapPerms::from_str(xml_perms) { + Ok(parsed_perms) => parsed_perms, + Err(()) => { + return Err(value_error( + xml_sdf, + node, + "perms for io mapped memory must only be a combination of 'r' and 'w'" + .to_string(), + )) + } + } + } else { + // Default to read-write + SysIOMapPerms::ReadWrite + }; + + Ok(SysIOMap { + device: device.to_string(), + mr, + identifier, + iovaddr, + perms, + text_pos: Some(xml_sdf.doc.text_pos_at(node.range().start)), + }) + } +} + +// This is implemented in such a way that each device will have its own address space. +// If devices need to share physical memory, this can be done by mapping the same memory_region +// into each address space. +struct IOAddressSpace { + iomaps: Vec, +} + +impl IOAddressSpace { + fn from_xml( + config: &Config, + xml_sdf: &XmlSystemDescription, + node: &roxmltree::Node, + device_names: &mut HashSet, + iommu_device_identifiers: &mut HashSet, + ) -> Result { + let pos = xml_sdf.doc.text_pos_at(node.range().start); + if !config.iommu { + return Err(format!( + "Error: io address space requires seL4 to be built with IOMMU support: {}", + loc_string(xml_sdf, pos) + )); + } + + check_attributes(xml_sdf, node, &["name", "peripheral_id"])?; + let device_name = checked_lookup(xml_sdf, node, "name")?; + if !device_names.insert(device_name.to_string()) { + return Err(value_error( + xml_sdf, + node, + format!("duplicate device name '{device_name}'"), + )); + } + + // In the SDF we use peripheral_id as an architecture agnostic way to describe + // how a device is identified in a system. For example on x86 the IOMMU identifies + // devices by the PCI tuple (bus,dev,fn) + let identifier_str = checked_lookup(xml_sdf, node, "peripheral_id")?; + let identifier = + IommuDeviceIdentifier::from_str_for_arch(config, identifier_str).map_err(|err| { + value_error( + xml_sdf, + node, + format!("failed to parse device peripheral_id '{identifier_str}': {err}"), + ) + })?; + if !iommu_device_identifiers.insert(identifier) { + return Err(value_error( + xml_sdf, + node, + format!("duplicate device peripheral_id '{identifier}'"), + )); + } + + let mut iomaps = Vec::new(); + + for child in node.children().filter(|node| node.is_element()) { + match child.tag_name().name() { + "iomap" => { + let iomap = + SysIOMap::from_xml(config, xml_sdf, &child, device_name, identifier)?; + iomaps.push(iomap); + } + _ => { + let pos = xml_sdf.doc.text_pos_at(child.range().start); + return Err(format!( + "Error: invalid XML element '{}': {}", + child.tag_name().name(), + loc_string(xml_sdf, pos) + )); + } + } + } + + Ok(IOAddressSpace { iomaps }) + } +} + impl ProtectionDomain { pub fn needs_ep(&self, self_id: usize, channels: &[Channel]) -> bool { self.has_children @@ -623,7 +982,7 @@ impl ProtectionDomain { xml_sdf, node, format!( - "stack size must be between 0x{PD_MIN_STACK_SIZE:x} bytes and 0x{PD_MAX_STACK_SIZE:x} bytes" + "stack size must be between {PD_MIN_STACK_SIZE:#x} bytes and {PD_MAX_STACK_SIZE:#x} bytes" ), )); } @@ -906,94 +1265,8 @@ impl ProtectionDomain { &["id", "setvar_id", "pcidev", "handle", "vector"], )?; - // A "pcidev" attribute is in a form of Bus:Dev.Func - // Split by the colon then the dot. - - // If the input is valid, index 0 contains "Bus", index 1 contains - // "Dev.Func" - let pci_parts_by_colon: Vec<_> = - pcidev_str.split(':').map(str::trim).collect(); - - if pci_parts_by_colon.len() != 2 { - return Err(format!( - "Error: failed to parse PCI address '{}' on element '{}'", - pcidev_str, - child.tag_name().name() - )); - } - - // If the input is valid, index 0 contains "Dev", index 1 contains - // "Func" - let pci_parts_by_dot: Vec<_> = - pci_parts_by_colon[1].split('.').map(str::trim).collect(); - if pci_parts_by_dot.len() != 2 { - return Err(format!( - "Error: failed to parse PCI address '{}' on element '{}'", - pcidev_str, - child.tag_name().name() - )); - } - - let pci_bus_maybe = i64::from_str_radix(pci_parts_by_colon[0], 16); - let pci_dev_maybe = i64::from_str_radix(pci_parts_by_dot[0], 16); - let pci_func_maybe = i64::from_str_radix(pci_parts_by_dot[1], 16); - - match pci_bus_maybe { - Ok(pci_bus_unchecked) => { - if !(0..=PCI_BUS_MAX).contains(&pci_bus_unchecked) { - return Err(value_error( - xml_sdf, - &child, - format!("PCI bus must be within [0..{PCI_BUS_MAX}]"), - )); - } - } - Err(_) => { - return Err(format!( - "Error: failed to parse PCI bus of '{}' on element '{}'", - pcidev_str, - child.tag_name().name() - )) - } - }; - - match pci_dev_maybe { - Ok(pci_dev_unchecked) => { - if !(0..=PCI_DEV_MAX).contains(&pci_dev_unchecked) { - return Err(value_error( - xml_sdf, - &child, - format!("PCI device must be within [0..{PCI_DEV_MAX}]"), - )); - } - } - Err(_) => { - return Err(format!( - "Error: failed to parse PCI device of '{}' on element '{}'", - pcidev_str, - child.tag_name().name() - )) - } - }; - - match pci_func_maybe { - Ok(pci_func_unchecked) => { - if !(0..=PCI_FUNC_MAX).contains(&pci_func_unchecked) { - return Err(value_error( - xml_sdf, - &child, - format!("PCI function must be within [0..{PCI_FUNC_MAX}]"), - )); - } - } - Err(_) => { - return Err(format!( - "Error: failed to parse PCI function of '{}' on element '{}'", - pcidev_str, - child.tag_name().name() - )) - } - }; + let pci_device = PciDevice::from_str(pcidev_str) + .map_err(|err| value_error(xml_sdf, &child, err.to_string()))?; let handle = checked_lookup(xml_sdf, &child, "handle")? .parse::() @@ -1020,9 +1293,7 @@ impl ProtectionDomain { let irq = SysIrq { id: id as u64, kind: SysIrqKind::MSI { - pci_bus: pci_bus_maybe.unwrap() as u64, - pci_dev: pci_dev_maybe.unwrap() as u64, - pci_func: pci_func_maybe.unwrap() as u64, + pci_device, handle: handle as u64, vector: vector as u64, }, @@ -1501,7 +1772,7 @@ impl SysMemoryRegion { return Err(value_error( xml_sdf, node, - format!("page size 0x{page_size:x} not supported"), + format!("page size {page_size:#x} not supported"), )); } @@ -1694,58 +1965,112 @@ struct XmlSystemDescription<'a> { pub struct SystemDescription { pub protection_domains: Vec, pub memory_regions: Vec, + pub iomaps: Vec, pub channels: Vec, } -fn check_maps( +fn location_suffix_format( + xml_sdf: &XmlSystemDescription, + text_pos: Option, +) -> String { + text_pos + .map(|pos| format!("@ {}", loc_string(xml_sdf, pos))) + .unwrap_or_default() +} + +// max_end is the first invalid virtual address +fn check_maps<'a, M, I>( xml_sdf: &XmlSystemDescription, mrs: &[SysMemoryRegion], - e: &dyn ExecutionContext, - maps: &[SysMap], -) -> Result<(), String> { - let mut checked_maps = Vec::with_capacity(maps.len()); + maps: I, + address_space: &str, + max_end: u64, +) -> Result<(), String> +where + M: Map + 'a, + I: IntoIterator, +{ + let mut checked_maps: Vec<(&str, u64, u64)> = Vec::new(); + for map in maps { - let maybe_mr = mrs.iter().find(|mr| mr.name == map.mr); - let pos = map.text_pos.unwrap(); - match maybe_mr { + let element = map.element(); + match mrs.iter().find(|mr| mr.name == map.mr_name()) { Some(mr) => { - if !map.vaddr.is_multiple_of(mr.page_size_bytes()) { + if !map.addr().is_multiple_of(mr.page_size_bytes()) { return Err(format!( - "Error: invalid vaddr alignment on 'map' @ {}", - loc_string(xml_sdf, pos) + "Error: invalid {} alignment on '{element}' {}", + map.addr_name(), + location_suffix_format(xml_sdf, map.text_pos()) )); } - let map_start = map.vaddr; - let map_end = map.vaddr + mr.size; - for (name, start, end) in &checked_maps { + let map_start = map.addr(); + let Some(map_end) = map_start.checked_add(mr.size) else { + return Err(format!( + "Error: {element} for '{}' has address range that overflows {}", + map.mr_name(), + location_suffix_format(xml_sdf, map.text_pos()) + )); + }; + + if map_end > max_end { + return Err(format!( + "Error: {element} for '{}' has {} [{:#x}..{:#x}) which exceeds valid address space [{:#x}..{:#x}) {}", + map.mr_name(), + map.range_name(), + map_start, + map_end, + 0, + max_end, + location_suffix_format(xml_sdf, map.text_pos()) + )); + } + + for (name, start, end) in checked_maps.iter() { if !(map_start >= *end || map_end <= *start) { - return Err( - format!( - "Error: map for '{}' has virtual address range [0x{:x}..0x{:x}) which overlaps with map for '{}' [0x{:x}..0x{:x}) in {} '{}' @ {}", - map.mr, - map_start, - map_end, - name, - start, - end, - e.kind(), - e.name(), - loc_string(xml_sdf, map.text_pos.unwrap()) - ) - ); + return Err(format!( + "Error: map for '{}' has {} [{:#x}..{:#x}) which overlaps with map for '{}' [{:#x}..{:#x}) in {} {}", + map.mr_name(), + map.range_name(), + map_start, + map_end, + name, + start, + end, + address_space, + location_suffix_format(xml_sdf, map.text_pos()) + )); } } - checked_maps.push((&map.mr, map_start, map_end)); + checked_maps.push((map.mr_name(), map_start, map_end)); } None => { return Err(format!( - "Error: invalid memory region name '{}' on 'map' @ {}", - map.mr, - loc_string(xml_sdf, pos) - )) + "Error: invalid memory region name '{}' on '{element}' {}", + map.mr_name(), + location_suffix_format(xml_sdf, map.text_pos()) + )); } - }; + } + } + + Ok(()) +} + +fn check_io_maps( + xml_sdf: &XmlSystemDescription, + mrs: &[SysMemoryRegion], + iomaps: &[SysIOMap], +) -> Result<(), String> { + let mut by_device: HashMap> = HashMap::new(); + + for iomap in iomaps { + by_device.entry(iomap.identifier).or_default().push(iomap); + } + + for (identifier, maps) in by_device { + let address_space = identifier.to_string(); + check_maps(xml_sdf, mrs, maps, &address_space, IOMAP_MAX_VADDR + 1)?; } Ok(()) @@ -1927,6 +2252,9 @@ pub fn parse( let mut root_pds = vec![]; let mut mrs = vec![]; + let mut iomaps = vec![]; + let mut device_names = HashSet::new(); + let mut iommu_device_identifiers = HashSet::new(); let mut channels = vec![]; let system = doc .root() @@ -1959,6 +2287,18 @@ pub fn parse( &child, search_paths, )?), + "io_address_space" => { + iomaps.extend( + IOAddressSpace::from_xml( + config, + &xml_sdf, + &child, + &mut device_names, + &mut iommu_device_identifiers, + )? + .iomaps, + ); + } "virtual_machine" => { let pos = xml_sdf.doc.text_pos_at(child.range().start); return Err(format!( @@ -2080,7 +2420,7 @@ pub fn parse( // expose this footgun to users. return Err(format!( "Error: It is not possible for PD '{}' with a bound vCPU to have children on x86_64: {}", - pd.name(), + pd.name, loc_string(&xml_sdf, pd.text_pos.unwrap()))); } } @@ -2234,12 +2574,26 @@ pub fn parse( // Ensure that all maps are correct for pd in &pds { - check_maps(&xml_sdf, &mrs, pd, &pd.maps)?; + check_maps( + &xml_sdf, + &mrs, + pd.maps.iter(), + &format!("protection domain '{}'", pd.name), + config.pd_map_max_vaddr(pd.stack_size), + )?; if let Some(vm) = &pd.virtual_machine { - check_maps(&xml_sdf, &mrs, vm, &vm.maps)?; + check_maps( + &xml_sdf, + &mrs, + vm.maps.iter(), + &format!("virtual machine '{}'", vm.name), + config.vm_map_max_vaddr(), + )?; } } + check_io_maps(&xml_sdf, &mrs, &iomaps)?; + // Ensure that there are no overlapping extra cap maps in the user caps region // and we are not mapping in the same cap from the same source more than once for pd in &pds { @@ -2283,7 +2637,7 @@ pub fn parse( let pos = mr.text_pos.unwrap(); return Err( format!( - "Error: memory region '{}' physical address range [0x{:x}..0x{:x}) overlaps with another memory region '{}' [0x{:x}..0x{:x}) @ {}", + "Error: memory region '{}' physical address range [{:#x}..{:#x}) overlaps with another memory region '{}' [{:#x}..{:#x}) @ {}", mr.name, mr_start, mr_end, @@ -2417,6 +2771,7 @@ pub fn parse( Ok(SystemDescription { protection_domains: pds, memory_regions: mrs, + iomaps, channels, }) } diff --git a/tool/microkit/src/sel4.rs b/tool/microkit/src/sel4.rs index 641fa63c6..e9cb8b834 100644 --- a/tool/microkit/src/sel4.rs +++ b/tool/microkit/src/sel4.rs @@ -289,6 +289,7 @@ pub struct Config { pub cap_address_bits: u64, pub fan_out_limit: u64, pub max_num_bootinfo_untypeds: u64, + pub iommu: bool, pub hypervisor: bool, pub benchmark: bool, pub num_cores: u8, diff --git a/tool/microkit/tests/sdf/iommu_address_overflow.system b/tool/microkit/tests/sdf/iommu_address_overflow.system new file mode 100644 index 000000000..b6f06e890 --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_address_overflow.system @@ -0,0 +1,15 @@ + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/iommu_duplicate_device.system b/tool/microkit/tests/sdf/iommu_duplicate_device.system new file mode 100644 index 000000000..7641c3d01 --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_duplicate_device.system @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/iommu_duplicate_device_identifier.system b/tool/microkit/tests/sdf/iommu_duplicate_device_identifier.system new file mode 100644 index 000000000..5c0f09efc --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_duplicate_device_identifier.system @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/iommu_missing_from_config.system b/tool/microkit/tests/sdf/iommu_missing_from_config.system new file mode 100644 index 000000000..e57382d9d --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_missing_from_config.system @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/iommu_out_of_bound.system b/tool/microkit/tests/sdf/iommu_out_of_bound.system new file mode 100644 index 000000000..e57382d9d --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_out_of_bound.system @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/iommu_overlap_different_devices.system b/tool/microkit/tests/sdf/iommu_overlap_different_devices.system new file mode 100644 index 000000000..ec267eebf --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_overlap_different_devices.system @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/iommu_overlap_mixed_page_sizes.system b/tool/microkit/tests/sdf/iommu_overlap_mixed_page_sizes.system new file mode 100644 index 000000000..cb62e8a06 --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_overlap_mixed_page_sizes.system @@ -0,0 +1,17 @@ + + + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/iommu_overlap_same_device.system b/tool/microkit/tests/sdf/iommu_overlap_same_device.system new file mode 100644 index 000000000..007be877c --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_overlap_same_device.system @@ -0,0 +1,17 @@ + + + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/iommu_pci_invalid.system b/tool/microkit/tests/sdf/iommu_pci_invalid.system new file mode 100644 index 000000000..fcd8e7885 --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_pci_invalid.system @@ -0,0 +1,15 @@ + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/iommu_pci_negative_bus.system b/tool/microkit/tests/sdf/iommu_pci_negative_bus.system new file mode 100644 index 000000000..f496e8b6c --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_pci_negative_bus.system @@ -0,0 +1,15 @@ + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/iommu_pci_negative_device.system b/tool/microkit/tests/sdf/iommu_pci_negative_device.system new file mode 100644 index 000000000..ff1366d20 --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_pci_negative_device.system @@ -0,0 +1,15 @@ + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/iommu_pci_negative_function.system b/tool/microkit/tests/sdf/iommu_pci_negative_function.system new file mode 100644 index 000000000..1324c3f8b --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_pci_negative_function.system @@ -0,0 +1,15 @@ + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/iommu_perms_execute_invalid.system b/tool/microkit/tests/sdf/iommu_perms_execute_invalid.system new file mode 100644 index 000000000..bffff94be --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_perms_execute_invalid.system @@ -0,0 +1,15 @@ + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/iommu_perms_read_write.system b/tool/microkit/tests/sdf/iommu_perms_read_write.system new file mode 100644 index 000000000..af489480d --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_perms_read_write.system @@ -0,0 +1,15 @@ + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/iommu_perms_write_only.system b/tool/microkit/tests/sdf/iommu_perms_write_only.system new file mode 100644 index 000000000..a2706700b --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_perms_write_only.system @@ -0,0 +1,15 @@ + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/iommu_range_out_of_bound.system b/tool/microkit/tests/sdf/iommu_range_out_of_bound.system new file mode 100644 index 000000000..d99627331 --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_range_out_of_bound.system @@ -0,0 +1,15 @@ + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/iommu_same_mr_different_devices.system b/tool/microkit/tests/sdf/iommu_same_mr_different_devices.system new file mode 100644 index 000000000..c31be4d1c --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_same_mr_different_devices.system @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/iommu_valid_perms_and_bound.system b/tool/microkit/tests/sdf/iommu_valid_perms_and_bound.system new file mode 100644 index 000000000..e7fd642e5 --- /dev/null +++ b/tool/microkit/tests/sdf/iommu_valid_perms_and_bound.system @@ -0,0 +1,15 @@ + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/pd_overlapping_mixed_page_sizes.system b/tool/microkit/tests/sdf/pd_overlapping_mixed_page_sizes.system new file mode 100644 index 000000000..2a604bb10 --- /dev/null +++ b/tool/microkit/tests/sdf/pd_overlapping_mixed_page_sizes.system @@ -0,0 +1,15 @@ + + + + + + + + + + + diff --git a/tool/microkit/tests/sdf/sys_map_range_too_high.system b/tool/microkit/tests/sdf/sys_map_range_too_high.system new file mode 100644 index 000000000..2dbffe5ea --- /dev/null +++ b/tool/microkit/tests/sdf/sys_map_range_too_high.system @@ -0,0 +1,13 @@ + + + + + + + + + diff --git a/tool/microkit/tests/test.rs b/tool/microkit/tests/test.rs index 1c5046ebb..9e9f4a958 100644 --- a/tool/microkit/tests/test.rs +++ b/tool/microkit/tests/test.rs @@ -22,6 +22,7 @@ const DEFAULT_AARCH64_KERNEL_CONFIG: sel4::Config = sel4::Config { max_num_bootinfo_untypeds: 230, fan_out_limit: 256, hypervisor: true, + iommu: true, benchmark: false, num_cores: 1, fpu: true, @@ -46,6 +47,7 @@ const DEFAULT_X86_64_KERNEL_CONFIG: sel4::Config = sel4::Config { max_num_bootinfo_untypeds: 230, fan_out_limit: 256, hypervisor: true, + iommu: true, benchmark: false, num_cores: 1, fpu: true, @@ -589,7 +591,7 @@ mod protection_domain { check_error( &DEFAULT_X86_64_KERNEL_CONFIG, "irq_msi_pci_invalid.system", - "Error: failed to parse PCI address '0:0:0' on element 'irq'", + "Error: expected PCI address in bus:device.function form on element 'irq'", ) } @@ -660,6 +662,14 @@ mod protection_domain { ) } + #[test] + fn test_overlapping_mixed_page_size_maps() { + check_error(&DEFAULT_AARCH64_KERNEL_CONFIG, + "pd_overlapping_mixed_page_sizes.system", + "Error: map for 'small_region' has virtual address range [0x201000..0x202000) which overlaps with map for 'large_region' [0x200000..0x400000) in protection domain 'test' @" + ) + } + #[test] fn test_overlapping_x86_io_ports_1() { check_error(&DEFAULT_X86_64_KERNEL_CONFIG, @@ -695,6 +705,221 @@ mod protection_domain { } } +#[cfg(test)] +mod iommu { + use super::*; + + #[test] + fn test_iommu_missing_from_config() { + check_error( + &sel4::Config { + iommu: false, + ..DEFAULT_X86_64_KERNEL_CONFIG + }, + "iommu_missing_from_config.system", + "Error: io address space requires seL4 to be built with IOMMU support: ", + ); + } + + #[test] + fn test_iommu_missing_from_aarch64_config() { + check_error( + &sel4::Config { + iommu: false, + ..DEFAULT_AARCH64_KERNEL_CONFIG + }, + "iommu_missing_from_config.system", + "Error: io address space requires seL4 to be built with IOMMU support: ", + ); + } + + #[test] + fn test_iommu_missing_from_riscv64_config() { + check_error( + &sel4::Config { + arch: sel4::Arch::Riscv64, + iommu: false, + ..DEFAULT_AARCH64_KERNEL_CONFIG + }, + "iommu_missing_from_config.system", + "Error: io address space requires seL4 to be built with IOMMU support: ", + ); + } + + #[test] + fn test_iommu_unsupported_on_aarch64() { + check_error( + &sel4::Config { + iommu: true, + ..DEFAULT_AARCH64_KERNEL_CONFIG + }, + "iommu_missing_from_config.system", + "Error: failed to parse device peripheral_id '0:3.0': IOMMU device identifiers are not supported on AArch64 on element 'io_address_space':", + ); + } + + #[test] + fn test_iommu_unsupported_on_riscv64() { + check_error( + &sel4::Config { + arch: sel4::Arch::Riscv64, + iommu: true, + ..DEFAULT_AARCH64_KERNEL_CONFIG + }, + "iommu_missing_from_config.system", + "Error: failed to parse device peripheral_id '0:3.0': IOMMU device identifiers are not supported on RISC-V (64-bit) on element 'io_address_space':", + ); + } + + #[test] + fn test_out_of_bound() { + check_error( + &DEFAULT_X86_64_KERNEL_CONFIG, + "iommu_out_of_bound.system", + "Error: iovaddr (0x8000000000) must be less than 0x8000000000 on element 'iomap': iommu_out_of_bound.system:10:9", + ) + } + + #[test] + fn test_range_out_of_bound() { + check_error( + &DEFAULT_X86_64_KERNEL_CONFIG, + "iommu_range_out_of_bound.system", + "Error: iomap for 'region' has io address range [0x7fff800000..0x8000200000) which exceeds valid address space [0x0..0x8000000000) @", + ) + } + + #[test] + fn test_address_overflow() { + check_error( + &DEFAULT_X86_64_KERNEL_CONFIG, + "iommu_address_overflow.system", + "Error: iomap for 'region' has address range that overflows @", + ) + } + + #[test] + fn test_valid_perms_and_bound() { + check_success( + &DEFAULT_X86_64_KERNEL_CONFIG, + "iommu_valid_perms_and_bound.system", + ) + } + + #[test] + fn test_perms_read_write() { + check_success( + &DEFAULT_X86_64_KERNEL_CONFIG, + "iommu_perms_read_write.system", + ) + } + + #[test] + fn test_perms_write_only() { + check_success( + &DEFAULT_X86_64_KERNEL_CONFIG, + "iommu_perms_write_only.system", + ) + } + + #[test] + fn test_perms_execute_invalid() { + check_error( + &DEFAULT_X86_64_KERNEL_CONFIG, + "iommu_perms_execute_invalid.system", + "Error: perms for io mapped memory must only be a combination of 'r' and 'w' on element 'iomap':", + ) + } + + #[test] + fn test_overlap_different_devices() { + check_success( + &DEFAULT_X86_64_KERNEL_CONFIG, + "iommu_overlap_different_devices.system", + ) + } + + #[test] + fn test_overlap_same_device() { + check_error( + &DEFAULT_X86_64_KERNEL_CONFIG, + "iommu_overlap_same_device.system", + "Error: map for 'region_b' has io address range [0x0..0x1000) which overlaps with map for 'region_a' [0x0..0x1000) in PCI device 00:03.0 @ iommu_overlap_same_device.system:12:9", + ) + } + + #[test] + fn test_same_mr_different_devices() { + check_success( + &DEFAULT_X86_64_KERNEL_CONFIG, + "iommu_same_mr_different_devices.system", + ) + } + + #[test] + fn test_overlap_mixed_page_sizes() { + check_error( + &DEFAULT_X86_64_KERNEL_CONFIG, + "iommu_overlap_mixed_page_sizes.system", + "Error: map for 'small_region' has io address range [0x1000..0x2000) which overlaps with map for 'large_region' [0x0..0x200000) in PCI device 00:03.0 @", + ) + } + + #[test] + fn test_pci_invalid() { + check_error( + &DEFAULT_X86_64_KERNEL_CONFIG, + "iommu_pci_invalid.system", + "Error: failed to parse device peripheral_id '0:g.0': failed to parse PCI device on element 'io_address_space': iommu_pci_invalid.system:9:5", + ) + } + + #[test] + fn test_pci_negative_bus() { + check_error( + &DEFAULT_X86_64_KERNEL_CONFIG, + "iommu_pci_negative_bus.system", + "Error: failed to parse device peripheral_id '-1:0.0': PCI bus must be within [0..255] on element 'io_address_space':", + ) + } + + #[test] + fn test_pci_negative_device() { + check_error( + &DEFAULT_X86_64_KERNEL_CONFIG, + "iommu_pci_negative_device.system", + "Error: failed to parse device peripheral_id '0:-1.0': PCI device must be within [0..31] on element 'io_address_space':", + ) + } + + #[test] + fn test_pci_negative_function() { + check_error( + &DEFAULT_X86_64_KERNEL_CONFIG, + "iommu_pci_negative_function.system", + "Error: failed to parse device peripheral_id '0:0.-1': PCI function must be within [0..7] on element 'io_address_space':", + ) + } + + #[test] + fn test_duplicate_device() { + check_error( + &DEFAULT_X86_64_KERNEL_CONFIG, + "iommu_duplicate_device.system", + "Error: duplicate device name 'test_device' on element 'io_address_space':", + ) + } + + #[test] + fn test_duplicate_device_identifier() { + check_error( + &DEFAULT_X86_64_KERNEL_CONFIG, + "iommu_duplicate_device_identifier.system", + "Error: duplicate device peripheral_id 'PCI device 00:03.0' on element 'io_address_space':", + ) + } +} + #[cfg(test)] mod virtual_machine { use super::*; @@ -1011,6 +1236,15 @@ mod system { ) } + #[test] + fn test_map_range_too_high() { + check_error( + &DEFAULT_AARCH64_KERNEL_CONFIG, + "sys_map_range_too_high.system", + "Error: map for 'foo' has virtual address range [0xffffffe000..0x10000000000) which exceeds valid address space [0x0..0xfffffff000) @", + ) + } + #[test] fn test_too_many_pds() { check_error(