From c3c9b14dabc3fabf592237ec5b9bec7fadf0a6a9 Mon Sep 17 00:00:00 2001 From: Shion Tanaka Date: Wed, 6 May 2026 22:56:17 +0900 Subject: [PATCH 1/2] feat(macOS): add vfkit backend for ephemeral and persistent VMs macOS has no KVM/QEMU, so this adds vfkit as the VM backend. Ephemeral VMs use direct kernel boot with SquashFS, persistent VMs use EFI boot. The vfkit/ module mirrors the libvirt/ directory structure, and CLI options match Linux where applicable. Build and run on macOS: cargo build --release codesign -fs - target/release/bcvk Tested on macOS (Apple Silicon) with rootful and rootless podman machine. Assisted-by: Claude Code (Claude Opus 4.6) Signed-off-by: Shion Tanaka --- Cargo.lock | 41 + crates/kit/Cargo.toml | 4 + crates/kit/src/ephemeral_macos.rs | 195 +++++ crates/kit/src/lib.rs | 10 + crates/kit/src/main.rs | 45 +- crates/kit/src/run_ephemeral_macos.rs | 1094 +++++++++++++++++++++++++ crates/kit/src/ssh_options.rs | 136 +++ crates/kit/src/vfkit/inspect.rs | 62 ++ crates/kit/src/vfkit/list.rs | 29 + crates/kit/src/vfkit/mod.rs | 271 ++++++ crates/kit/src/vfkit/rm.rs | 59 ++ crates/kit/src/vfkit/rm_all.rs | 44 + crates/kit/src/vfkit/run.rs | 188 +++++ crates/kit/src/vfkit/ssh.rs | 24 + crates/kit/src/vfkit/start.rs | 115 +++ crates/kit/src/vfkit/stop.rs | 63 ++ 16 files changed, 2370 insertions(+), 10 deletions(-) create mode 100644 crates/kit/src/ephemeral_macos.rs create mode 100644 crates/kit/src/run_ephemeral_macos.rs create mode 100644 crates/kit/src/ssh_options.rs create mode 100644 crates/kit/src/vfkit/inspect.rs create mode 100644 crates/kit/src/vfkit/list.rs create mode 100644 crates/kit/src/vfkit/mod.rs create mode 100644 crates/kit/src/vfkit/rm.rs create mode 100644 crates/kit/src/vfkit/rm_all.rs create mode 100644 crates/kit/src/vfkit/run.rs create mode 100644 crates/kit/src/vfkit/ssh.rs create mode 100644 crates/kit/src/vfkit/start.rs create mode 100644 crates/kit/src/vfkit/stop.rs diff --git a/Cargo.lock b/Cargo.lock index 47985d760..f41a4da01 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -268,6 +268,7 @@ dependencies = [ "xshell", "yaml-rust2", "zlink", + "zstd", ] [[package]] @@ -433,6 +434,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1354349954c6fc9cb0deab020f27f783cf0b604e8bb754dc4658ecf0d29c35f" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] @@ -1598,6 +1601,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.3", + "libc", +] + [[package]] name = "js-sys" version = "0.3.81" @@ -3879,3 +3892,31 @@ dependencies = [ "tokio-stream", "zlink-core", ] + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/crates/kit/Cargo.toml b/crates/kit/Cargo.toml index 1913b27c9..96d5b5f5d 100644 --- a/crates/kit/Cargo.toml +++ b/crates/kit/Cargo.toml @@ -58,6 +58,10 @@ zlink = "0.4" futures-util = "0.3" libsystemd = "0.7" +# macOS-only dependencies (vfkit backend) +[target.'cfg(target_os = "macos")'.dependencies] +zstd = "0.13" + [dev-dependencies] similar-asserts = "1.5" diff --git a/crates/kit/src/ephemeral_macos.rs b/crates/kit/src/ephemeral_macos.rs new file mode 100644 index 000000000..ca3255247 --- /dev/null +++ b/crates/kit/src/ephemeral_macos.rs @@ -0,0 +1,195 @@ +//! Ephemeral VM management commands for macOS (vfkit backend). + +use std::io::Write; +use std::process::{Command, Stdio}; + +use clap::Subcommand; +use color_eyre::eyre::bail; +use color_eyre::Result; + +use crate::run_ephemeral_macos::{self, EphemeralVmMetadata}; + +/// Options for `ephemeral run-ssh`, combining run options with optional SSH arguments. +#[derive(Debug, clap::Parser)] +pub struct RunSshOpts { + #[command(flatten)] + pub run_opts: run_ephemeral_macos::RunEphemeralOpts, + + /// SSH command to execute (optional, defaults to interactive shell) + #[arg(trailing_var_arg = true)] + pub ssh_args: Vec, +} + +#[derive(Debug, Subcommand)] +pub enum EphemeralCommands { + /// Run bootc containers as ephemeral VMs + #[clap(name = "run")] + Run(run_ephemeral_macos::RunEphemeralOpts), + + /// Run ephemeral VM and SSH into it + #[clap(name = "run-ssh")] + RunSsh(RunSshOpts), + + /// Connect to a running ephemeral VM via SSH + #[clap(name = "ssh")] + Ssh { + /// VM name + name: String, + + /// Additional SSH arguments (e.g. -v, -L, commands to execute) + #[clap(allow_hyphen_values = true)] + args: Vec, + }, + + /// List ephemeral VM containers + #[clap(name = "ps")] + Ps { + /// Output as JSON + #[clap(long)] + json: bool, + }, + + /// Remove all ephemeral VM containers + #[clap(name = "rm-all")] + RmAll { + /// Force removal without confirmation + #[clap(short, long)] + force: bool, + }, +} + +impl EphemeralCommands { + /// Execute the ephemeral subcommand. + pub fn run(self) -> Result<()> { + match self { + EphemeralCommands::Run(opts) => run_ephemeral_macos::run(opts), + EphemeralCommands::RunSsh(mut opts) => { + opts.run_opts.ssh_keygen = true; + if !opts.ssh_args.is_empty() { + let combined = shlex::try_join(opts.ssh_args.iter().map(|s| s.as_str())) + .map_err(|e| color_eyre::eyre::eyre!("failed to escape SSH args: {}", e))?; + opts.run_opts.execute.push(combined); + } + run_ephemeral_macos::run(opts.run_opts) + } + EphemeralCommands::Ssh { name, args } => cmd_ssh(&name, &args), + EphemeralCommands::Ps { json } => cmd_ps(json), + EphemeralCommands::RmAll { force } => cmd_rm_all(force), + } + } +} + +fn cmd_ps(json: bool) -> Result<()> { + let vms = EphemeralVmMetadata::list_all()?; + for vm in &vms { + if !vm.is_alive() { + EphemeralVmMetadata::remove(&vm.name); + } + } + let live: Vec<_> = vms.into_iter().filter(|vm| vm.is_alive()).collect(); + + if json { + println!("{}", serde_json::to_string_pretty(&live)?); + return Ok(()); + } + + if live.is_empty() { + println!("No running ephemeral VMs."); + return Ok(()); + } + + println!("{:<24} {:<50} SSH", "NAME", "IMAGE"); + for vm in &live { + println!( + "{:<24} {:<50} ssh -p {} -i {} root@localhost", + vm.name, vm.image, vm.ssh_port, vm.ssh_key + ); + } + Ok(()) +} + +fn cmd_rm_all(force: bool) -> Result<()> { + let vms = EphemeralVmMetadata::list_all()?; + if vms.is_empty() { + println!("No ephemeral VMs found."); + return Ok(()); + } + + if !force { + println!("Found {} ephemeral VM(s):", vms.len()); + for vm in &vms { + println!( + " {} ({})", + vm.name, + if vm.is_alive() { "running" } else { "stopped" } + ); + } + print!("Remove all ephemeral VMs? [y/N]: "); + std::io::stdout().flush()?; + let mut input = String::new(); + std::io::stdin().read_line(&mut input)?; + let input = input.trim().to_lowercase(); + if input != "y" && input != "yes" { + println!("Aborted."); + return Ok(()); + } + } + + for vm in &vms { + if vm.is_alive() { + if let Err(e) = Command::new("kill") + .args([&vm.pid.to_string()]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + { + tracing::warn!("failed to kill VM process {}: {}", vm.pid, e); + } + if vm.gvproxy_pid > 0 { + if let Err(e) = Command::new("kill") + .args([&vm.gvproxy_pid.to_string()]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + { + tracing::warn!("failed to kill gvproxy {}: {}", vm.gvproxy_pid, e); + } + } + } + EphemeralVmMetadata::remove(&vm.name); + println!("Removed {}", vm.name); + } + Ok(()) +} + +fn cmd_ssh(name: &str, args: &[String]) -> Result<()> { + let vm = EphemeralVmMetadata::load(name)?; + if !vm.is_alive() { + EphemeralVmMetadata::remove(name); + bail!("VM '{}' is not running", name); + } + + // Try to set up SSH port forwarding via VM-specific gvproxy socket + let svc_sock = format!("/private/tmp/bcvk/{}-gvproxy-svc.sock", name); + if std::path::Path::new(&svc_sock).exists() { + if let Err(e) = + run_ephemeral_macos::expose_ssh_port(&svc_sock, "192.168.127.2", vm.ssh_port) + { + tracing::debug!("SSH port forward re-expose: {}", e); + } + } + + let key_path = std::path::Path::new(&vm.ssh_key); + if args.is_empty() { + run_ephemeral_macos::run_ssh_interactive(vm.ssh_port, key_path, "root")?; + } else { + let combined = shlex::try_join(args.iter().map(|s| s.as_str())) + .map_err(|e| color_eyre::eyre::eyre!("failed to escape SSH command: {}", e))?; + let status = + run_ephemeral_macos::run_ssh_command(vm.ssh_port, key_path, "root", &combined)?; + if !status.success() { + std::process::exit(status.code().unwrap_or(1)); + } + } + Ok(()) +} diff --git a/crates/kit/src/lib.rs b/crates/kit/src/lib.rs index 279e5caa5..a3aa51578 100644 --- a/crates/kit/src/lib.rs +++ b/crates/kit/src/lib.rs @@ -4,6 +4,16 @@ pub mod cpio; pub mod qemu_img; pub mod xml_utils; +// Cross-platform modules +pub mod ssh_options; + // Linux-only modules #[cfg(target_os = "linux")] pub mod kernel; + +// macOS-only modules (vfkit backend) +#[cfg(target_os = "macos")] +pub mod run_ephemeral_macos; + +#[cfg(target_os = "macos")] +pub mod vfkit; diff --git a/crates/kit/src/main.rs b/crates/kit/src/main.rs index de0a3107e..cc4969312 100644 --- a/crates/kit/src/main.rs +++ b/crates/kit/src/main.rs @@ -11,6 +11,7 @@ mod cpio; mod install_options; mod instancetypes; mod qemu_img; +mod ssh_options; mod xml_utils; // Linux-only modules @@ -60,6 +61,14 @@ mod utils; #[cfg(target_os = "linux")] mod varlink_ipc; +// macOS-only modules (vfkit backend) +#[cfg(target_os = "macos")] +mod ephemeral_macos; +#[cfg(target_os = "macos")] +mod run_ephemeral_macos; +#[cfg(target_os = "macos")] +mod vfkit; + /// Default state directory for bcvk container data #[cfg(target_os = "linux")] pub const CONTAINER_STATEDIR: &str = "/var/lib/bcvk"; @@ -104,8 +113,8 @@ enum InternalsCmds { DumpCliJson, } -/// Stub subcommands for macOS (shows error message when run) -#[cfg(not(target_os = "linux"))] +/// Stub subcommands for unsupported platforms +#[cfg(not(any(target_os = "linux", target_os = "macos")))] #[derive(Debug, Subcommand)] pub enum StubEphemeralCommands { /// Run bootc containers as ephemeral VMs @@ -139,9 +148,21 @@ enum Commands { #[clap(subcommand)] Ephemeral(ephemeral::EphemeralCommands), - // macOS stub: ephemeral command exists but errors out - #[cfg(not(target_os = "linux"))] - /// Run bootc images as stateless VMs via QEMU+Podman (not available on this platform) + // macOS: vfkit-based ephemeral VMs + #[cfg(target_os = "macos")] + /// Manage ephemeral VMs for bootc containers (vfkit backend) + #[clap(subcommand)] + Ephemeral(ephemeral_macos::EphemeralCommands), + + // macOS: vfkit-based persistent VMs + #[cfg(target_os = "macos")] + /// Manage persistent VMs (vfkit backend) + #[clap(subcommand)] + Vm(vfkit::VmCommands), + + // Other platforms: stub + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + /// Manage ephemeral VMs for bootc containers (not available on this platform) #[clap(subcommand)] Ephemeral(StubEphemeralCommands), @@ -284,13 +305,17 @@ fn main() -> Result<(), Report> { #[cfg(target_os = "linux")] Commands::Ephemeral(cmd) => cmd.run()?, - // macOS stub: ephemeral command exists but errors out - #[cfg(not(target_os = "linux"))] + #[cfg(target_os = "macos")] + Commands::Ephemeral(cmd) => cmd.run()?, + + #[cfg(target_os = "macos")] + Commands::Vm(cmd) => cmd.run()?, + + #[cfg(not(any(target_os = "linux", target_os = "macos")))] Commands::Ephemeral(_) => { return Err(color_eyre::eyre::eyre!( - "The 'ephemeral' command is not available on macOS.\n\ - bcvk requires Linux with KVM/QEMU for VM operations.\n\ - See https://github.com/bootc-dev/bcvk/issues/21 for more information." + "The 'ephemeral' command is not available on this platform.\n\ + bcvk requires Linux with KVM/QEMU or macOS with vfkit for VM operations." )); } diff --git a/crates/kit/src/run_ephemeral_macos.rs b/crates/kit/src/run_ephemeral_macos.rs new file mode 100644 index 000000000..d7fe9257f --- /dev/null +++ b/crates/kit/src/run_ephemeral_macos.rs @@ -0,0 +1,1094 @@ +//! Ephemeral VM launch flow for macOS using vfkit + SquashFS. +//! +//! Boot flow: +//! 1. Extract kernel + initramfs from container image +//! 2. Create SquashFS rootfs (lz4, cached by digest) +//! 3. Decompress vmlinuz PE+zstd → uncompressed ARM64 Image +//! 4. Append bcvk units CPIO to initramfs (/etc overlay + /var tmpfs + SSH) +//! 5. Launch vfkit with virtio-blk (SquashFS) + virtio-net (gvproxy) +//! +//! Common helpers (gvproxy, SSH, vfkit detection) are pub for reuse by vfkit/ module. + +use std::fs::{self, OpenOptions}; +use std::io::{Seek, SeekFrom, Write}; +use std::os::unix::net::UnixStream; +use std::path::Path; +use std::process::{Command, Stdio}; +use std::time::Duration; + +use color_eyre::{ + eyre::{bail, eyre, Context}, + Result, +}; +use tracing::{debug, info}; + +// --- Data structures --- + +/// Metadata for a running ephemeral VM, persisted as JSON for `ps` and `ssh`. +#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)] +#[allow(dead_code)] +pub struct EphemeralVmMetadata { + /// VM name used as identifier for resource isolation. + pub name: String, + /// Container image reference used to boot the VM. + pub image: String, + /// PID of the vfkit process. + pub pid: u32, + /// PID of the gvproxy network proxy process. + pub gvproxy_pid: u32, + /// Host-side SSH port forwarded to the VM. + pub ssh_port: u16, + /// Path to the SSH private key for this VM. + pub ssh_key: String, + /// Path to the serial console log file. + pub serial_log: String, + /// Path to the vfkit process log file. + pub log_path: Option, + /// ISO 8601 timestamp when the VM was created. + pub created: String, +} + +#[allow(dead_code)] +impl EphemeralVmMetadata { + /// Return the directory path for ephemeral VM metadata files. + pub fn vms_dir() -> std::path::PathBuf { + std::path::PathBuf::from("/private/tmp/bcvk/vms") + } + + /// Save metadata to a JSON file in the VMs directory. + pub fn save(&self) -> Result<()> { + let dir = Self::vms_dir(); + fs::create_dir_all(&dir)?; + let path = dir.join(format!("{}.json", self.name)); + fs::write(&path, serde_json::to_string_pretty(self)?)?; + Ok(()) + } + + /// Remove metadata file for the named VM. + pub fn remove(name: &str) { + let path = Self::vms_dir().join(format!("{}.json", name)); + let _ = fs::remove_file(path); + } + + /// Load metadata for the named VM from its JSON file. + pub fn load(name: &str) -> Result { + let path = Self::vms_dir().join(format!("{}.json", name)); + let data = fs::read_to_string(&path)?; + Ok(serde_json::from_str(&data)?) + } + + /// List all ephemeral VM metadata from the VMs directory. + pub fn list_all() -> Result> { + let dir = Self::vms_dir(); + if !dir.exists() { + return Ok(Vec::new()); + } + let mut vms = Vec::new(); + for entry in fs::read_dir(&dir)? { + let path = entry?.path(); + if path.extension().and_then(|e| e.to_str()) != Some("json") { + continue; + } + if let Ok(data) = fs::read_to_string(&path) { + if let Ok(meta) = serde_json::from_str::(&data) { + vms.push(meta); + } + } + } + Ok(vms) + } + + /// Check if the VM process is still alive via kill -0. + pub fn is_alive(&self) -> bool { + Command::new("kill") + .args(["-0", &self.pid.to_string()]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .map(|s| s.success()) + .unwrap_or(false) + } +} + +/// Options for launching an ephemeral VM via vfkit. +#[derive(clap::Parser, Debug)] +pub struct RunEphemeralOpts { + /// Container image to boot + pub image: String, + /// Number of vCPUs + #[clap(long)] + pub vcpus: Option, + /// Memory size (e.g. "4G", "2048M", or plain number for MB) + #[clap(long, default_value = "4G")] + pub memory: String, + /// Generate a temporary SSH key pair for VM access + #[clap(long = "ssh-keygen", short = 'K')] + pub ssh_keygen: bool, + /// Command(s) to execute via SSH after boot + #[clap(long)] + pub execute: Vec, + /// VM name for identification and resource isolation + #[clap(long)] + pub name: Option, + /// Additional kernel command line arguments + #[clap(long = "karg")] + pub kernel_args: Vec, + /// Display VM console in GUI window + #[clap(long)] + pub gui: bool, + /// Run in background + #[clap(long, short = 'd')] + pub detach: bool, + /// Enable debug mode (reserved for future use) + #[clap(long)] + pub debug: bool, +} + +fn default_vcpus() -> u32 { + std::thread::available_parallelism() + .map(|n| n.get() as u32) + .unwrap_or(2) +} + +/// Parse memory specification string (e.g. "4G", "2048M") to megabytes. +pub fn parse_memory_to_mb(s: &str) -> Result { + let s = s.trim(); + if let Some(n) = s.strip_suffix('G').or_else(|| s.strip_suffix('g')) { + Ok((n.parse::()? * 1024.0) as u32) + } else if let Some(n) = s.strip_suffix('M').or_else(|| s.strip_suffix('m')) { + Ok(n.parse::()? as u32) + } else { + Ok(s.parse::()?) + } +} + +// --- RAII cleanup guard --- + +struct VmCleanup { + vfkit_pid: u32, + gvproxy_pid: u32, + vm_name: String, +} + +impl Drop for VmCleanup { + fn drop(&mut self) { + tracing::debug!("cleaning up VM processes..."); + if let Err(e) = Command::new("kill") + .arg(self.vfkit_pid.to_string()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + { + tracing::warn!("failed to kill vfkit (PID {}): {}", self.vfkit_pid, e); + } + if let Err(e) = Command::new("kill") + .arg(self.gvproxy_pid.to_string()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + { + tracing::warn!("failed to kill gvproxy (PID {}): {}", self.gvproxy_pid, e); + } + EphemeralVmMetadata::remove(&self.vm_name); + } +} + +// --- Main entry point --- + +/// Run an ephemeral VM from a container image using vfkit + SquashFS. +pub fn run(opts: RunEphemeralOpts) -> Result<()> { + if opts.gui && opts.detach { + bail!("--gui and --detach cannot be used together (GUI requires foreground process)"); + } + + if opts.detach { + return run_detached(&opts); + } + + let vfkit_bin = find_vfkit()?; + info!(image = %opts.image, "starting ephemeral VM on macOS (vfkit + SquashFS)"); + + let cache_base = std::path::PathBuf::from("/private/tmp/bcvk"); + fs::create_dir_all(&cache_base)?; + + let machine = detect_machine_name()?; + let rootful = is_machine_rootful(&machine); + debug!( + "podman machine '{}' ({})", + machine, + if rootful { "rootful" } else { "rootless" } + ); + let digest = ensure_image_and_get_digest(&opts.image)?; + let digest_short = &digest[..16.min(digest.len())]; + info!("image digest: {}...", digest_short); + + let vm_name = opts + .name + .clone() + .unwrap_or_else(|| format!("ephemeral-{}", &digest_short[..8])); + let ssh_key_path = cache_base.join(format!("{}-key", vm_name)); + + let boot_dir = cache_base.join(format!("boot-{}", digest_short)); + fs::create_dir_all(&boot_dir)?; + let squashfs_cache = format!("/private/tmp/bcvk/rootfs-{}.squashfs", digest_short); + let squashfs_path = format!("/private/tmp/bcvk/{}-rootfs.squashfs", vm_name); + let vmlinuz_path = boot_dir.join("vmlinuz"); + let image_path = boot_dir.join("Image"); + let initramfs_orig = boot_dir.join("initramfs-orig.img"); + let initramfs_path = cache_base.join(format!("{}-initramfs.img", vm_name)); + + // Step 1+2: kernel extract + SquashFS creation (parallel) + let step2_handle = if !Path::new(&squashfs_cache).exists() { + let mc = machine.clone(); + let rf = rootful; + let img = opts.image.clone(); + let sc = squashfs_cache.clone(); + Some(std::thread::spawn(move || -> Result<()> { + info!("creating SquashFS image (lz4)..."); + create_squashfs_image(&mc, rf, &img, &sc) + })) + } else { + info!("using cached SquashFS: {}", squashfs_cache); + None + }; + + if !vmlinuz_path.exists() || !initramfs_orig.exists() { + info!("extracting kernel and initramfs..."); + extract_kernel(&machine, &opts.image, &boot_dir)?; + fs::rename(boot_dir.join("initramfs.img"), &initramfs_orig)?; + } + + // Step 3+4: kernel decompress + CPIO append (parallel after Step 1) + let step3_handle = if !image_path.exists() { + let vp = vmlinuz_path.clone(); + let ip = image_path.clone(); + Some(std::thread::spawn(move || -> Result<()> { + info!("decompressing kernel (vmlinuz → Image)..."); + extract_uncompressed_kernel(&vp, &ip) + })) + } else { + None + }; + + fs::copy(&initramfs_orig, &initramfs_path)?; + { + let cpio_data = crate::cpio::create_initramfs_units_cpio() + .map_err(|e| eyre!("failed to create CPIO: {e}"))?; + let mut f = OpenOptions::new().append(true).open(&initramfs_path)?; + let sz = f.seek(SeekFrom::End(0))?; + let pad = sz.next_multiple_of(4) - sz; + if pad > 0 { + f.write_all(&vec![0u8; pad as usize])?; + } + f.write_all(&cpio_data)?; + + if opts.ssh_keygen || !opts.execute.is_empty() { + info!("generating SSH keypair..."); + let _ = fs::remove_file(&ssh_key_path); + let _ = fs::remove_file(ssh_key_path.with_extension("pub")); + let status = Command::new("ssh-keygen") + .args([ + "-t", + "ed25519", + "-f", + &ssh_key_path.to_string_lossy(), + "-N", + "", + "-q", + ]) + .status()?; + if !status.success() { + bail!("ssh-keygen failed (exit code: {:?})", status.code()); + } + let pubkey = fs::read_to_string(ssh_key_path.with_extension("pub"))?; + let ssh_cpio = create_ssh_setup_cpio(pubkey.trim())?; + let pos = f.seek(SeekFrom::End(0))?; + let pad = pos.next_multiple_of(4) - pos; + if pad > 0 { + f.write_all(&vec![0u8; pad as usize])?; + } + f.write_all(&ssh_cpio)?; + } + info!("initramfs prepared"); + } + + if let Some(h) = step3_handle { + h.join() + .map_err(|_| eyre!("kernel decompression thread panicked"))??; + } + if let Some(h) = step2_handle { + h.join() + .map_err(|_| eyre!("squashfs creation thread panicked"))??; + } + + // CoW clone SquashFS for this VM (allows concurrent use of same image) + let _ = fs::remove_file(&squashfs_path); + let clone_status = Command::new("cp") + .args(["-c", &squashfs_cache, &squashfs_path]) + .status() + .context("cloning SquashFS")?; + if !clone_status.success() { + fs::copy(&squashfs_cache, &squashfs_path).context("copying SquashFS")?; + } + + // 5. gvproxy + vfkit + let gvproxy_sock = cache_base.join(format!("{}-gvproxy.sock", vm_name)); + let services_sock = cache_base.join(format!("{}-gvproxy-svc.sock", vm_name)); + let gvproxy_sock_str = gvproxy_sock.to_string_lossy().to_string(); + let services_sock_str = services_sock.to_string_lossy().to_string(); + info!("starting gvproxy..."); + let mut gvproxy_child = start_gvproxy(&gvproxy_sock_str, &services_sock_str)?; + + let mut cmdline_parts: Vec<&str> = vec![ + "root=/dev/vda", + "ro", + "rootfstype=squashfs", + "console=tty0", + "console=hvc0", + "loglevel=4", + "selinux=0", + "net.ifnames=0", + "systemd.journald.storage=volatile", + ]; + let user_args: Vec<&str> = opts.kernel_args.iter().map(|s| s.as_str()).collect(); + cmdline_parts.extend(&user_args); + let cmdline = cmdline_parts.join(" "); + + let mac = generate_mac(); + let mac_str = format!( + "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] + ); + + let bootloader_arg = format!( + "linux,kernel={},initrd={},cmdline=\"{}\"", + image_path.display(), + initramfs_path.display(), + cmdline + ); + + let vcpus = opts.vcpus.unwrap_or_else(default_vcpus); + let memory_mb = parse_memory_to_mb(&opts.memory)?; + + let mut vfkit_args = vec![ + "--cpus".to_string(), + vcpus.to_string(), + "--memory".to_string(), + memory_mb.to_string(), + "--bootloader".to_string(), + bootloader_arg, + "--device".to_string(), + format!("virtio-blk,path={}", squashfs_path), + "--device".to_string(), + format!( + "virtio-net,unixSocketPath={},mac={}", + gvproxy_sock_str, mac_str + ), + "--device".to_string(), + "virtio-rng".to_string(), + ]; + if opts.gui { + vfkit_args.push("--gui".to_string()); + } + + info!("launching vfkit..."); + let vfkit_log = cache_base.join(format!("{}-vfkit.log", vm_name)); + let vfkit_log_file = fs::File::create(&vfkit_log)?; + let mut vfkit_child = Command::new(&vfkit_bin) + .args(&vfkit_args) + .stdout(vfkit_log_file.try_clone()?) + .stderr(vfkit_log_file) + .spawn() + .context("failed to start vfkit")?; + + let ssh_port = find_available_ssh_port(); + debug!("allocated SSH port: {}", ssh_port); + + let metadata = EphemeralVmMetadata { + name: vm_name.clone(), + image: opts.image.clone(), + pid: vfkit_child.id(), + gvproxy_pid: gvproxy_child.id(), + ssh_port, + ssh_key: ssh_key_path.to_string_lossy().to_string(), + serial_log: String::new(), + log_path: None, + created: chrono::Utc::now().to_rfc3339(), + }; + metadata.save()?; + + let _cleanup = VmCleanup { + vfkit_pid: vfkit_child.id(), + gvproxy_pid: gvproxy_child.id(), + vm_name: vm_name.clone(), + }; + + if opts.ssh_keygen || !opts.execute.is_empty() { + info!("setting up SSH port forwarding..."); + for attempt in 0..15u32 { + match expose_ssh_port(&services_sock_str, "192.168.127.2", ssh_port) { + Ok(_) => { + info!("SSH port {} forwarded", ssh_port); + break; + } + Err(e) if attempt < 14 => { + debug!("SSH port forward attempt {}: {}", attempt, e); + let backoff = 200 * 2u64.pow(attempt.min(4)); + std::thread::sleep(Duration::from_millis(backoff)); + } + Err(e) => bail!("SSH port forward failed: {}", e), + } + } + + wait_for_ssh(ssh_port, &ssh_key_path, "root")?; + + if !opts.execute.is_empty() { + for cmd_str in &opts.execute { + info!("executing: {}", cmd_str); + let status = run_ssh_command(ssh_port, &ssh_key_path, "root", cmd_str)?; + if !status.success() { + bail!("command failed: {}", status); + } + } + return Ok(()); + } + + info!( + "SSH ready: ssh -p {} -i {} root@localhost", + ssh_port, + ssh_key_path.display() + ); + + use std::io::IsTerminal; + if std::io::stdin().is_terminal() { + let status = run_ssh_interactive(ssh_port, &ssh_key_path, "root")?; + let exit_code = status.code().unwrap_or(1); + drop(_cleanup); + std::process::exit(exit_code); + } + } + + // No SSH: wait for vfkit to exit (GUI window closed or VM shutdown) + std::mem::forget(_cleanup); + let status = vfkit_child.wait()?; + info!("vfkit exited: {}", status); + if let Err(e) = gvproxy_child.kill() { + tracing::debug!("failed to kill gvproxy: {}", e); + } + EphemeralVmMetadata::remove(&vm_name); + Ok(()) +} + +fn run_detached(opts: &RunEphemeralOpts) -> Result<()> { + let cache_base = std::path::PathBuf::from("/private/tmp/bcvk"); + fs::create_dir_all(&cache_base)?; + let digest = ensure_image_and_get_digest(&opts.image)?; + let digest_short = &digest[..16.min(digest.len())]; + let vm_name = opts + .name + .clone() + .unwrap_or_else(|| format!("ephemeral-{}", &digest_short[..8])); + let log_path = cache_base.join(format!("bcvk-{}.log", vm_name)); + let log_file = fs::File::create(&log_path)?; + + let exe = std::env::current_exe()?; + let mut args: Vec = std::env::args() + .skip(1) + .filter(|a| a != "--detach" && a != "-d") + .collect(); + if !args.contains(&"-K".to_string()) && !args.contains(&"--ssh-keygen".to_string()) { + args.insert(args.len() - 1, "-K".to_string()); + } + if opts.name.is_none() { + args.insert(args.len() - 1, "--name".to_string()); + args.insert(args.len() - 1, vm_name.clone()); + } + + let child = Command::new(exe) + .args(&args) + .stdin(Stdio::null()) + .stdout(log_file.try_clone()?) + .stderr(log_file) + .spawn()?; + + let metadata = EphemeralVmMetadata { + name: vm_name.clone(), + image: opts.image.clone(), + pid: child.id(), + gvproxy_pid: 0, + ssh_port: 0, + ssh_key: cache_base + .join(format!("{}-key", vm_name)) + .to_string_lossy() + .to_string(), + serial_log: String::new(), + log_path: Some(log_path.to_string_lossy().to_string()), + created: chrono::Utc::now().to_rfc3339(), + }; + metadata.save()?; + println!("{}", vm_name); + Ok(()) +} + +// --- SSH setup CPIO --- + +fn create_ssh_setup_cpio(pubkey: &str) -> Result> { + use cpio::newc::Builder as NewcBuilder; + let mut buf = Vec::new(); + + let script = format!( + "#!/bin/bash\n\ + mkdir -p /sysroot/var/roothome/.ssh\n\ + chmod 700 /sysroot/var/roothome/.ssh\n\ + echo '{}' > /sysroot/var/roothome/.ssh/authorized_keys\n\ + chmod 600 /sysroot/var/roothome/.ssh/authorized_keys\n\ + chown -R 0:0 /sysroot/var/roothome/.ssh\n", + pubkey + ); + + let service = "[Unit]\n\ + Description=Setup SSH authorized_keys for root\n\ + DefaultDependencies=no\n\ + ConditionPathExists=/etc/initrd-release\n\ + Before=initrd-fs.target\n\ + After=bcvk-var-ephemeral.service\n\ + Requires=bcvk-var-ephemeral.service\n\ + \n\ + [Service]\n\ + Type=oneshot\n\ + RemainAfterExit=yes\n\ + ExecStart=/usr/bin/bash /usr/lib/bcvk/setup-ssh.sh\n"; + + let dropin = "[Unit]\nWants=bcvk-ssh-setup.service\n"; + + let write_entry = + |buf: &mut Vec, path: &str, data: &[u8], executable: bool| -> std::io::Result<()> { + let mode = if executable { 0o100755 } else { 0o100644 }; + let builder = NewcBuilder::new(path).mode(mode).uid(0).gid(0); + let mut writer = builder.write(buf, data.len() as u32); + writer.write_all(data)?; + writer.finish()?; + Ok(()) + }; + + let write_dir = |buf: &mut Vec, path: &str| -> std::io::Result<()> { + NewcBuilder::new(path) + .mode(0o040755) + .uid(0) + .gid(0) + .write(buf, 0) + .finish()?; + Ok(()) + }; + + write_dir(&mut buf, "usr/lib/bcvk")?; + write_entry( + &mut buf, + "usr/lib/bcvk/setup-ssh.sh", + script.as_bytes(), + true, + )?; + write_entry( + &mut buf, + "usr/lib/systemd/system/bcvk-ssh-setup.service", + service.as_bytes(), + false, + )?; + write_entry( + &mut buf, + "usr/lib/systemd/system/initrd-fs.target.d/bcvk-ssh-setup.conf", + dropin.as_bytes(), + false, + )?; + cpio::newc::trailer(&mut buf).map_err(|e| eyre!("cpio trailer: {e}"))?; + Ok(buf) +} + +// --- vfkit kernel decompression --- + +fn extract_uncompressed_kernel(vmlinuz_path: &Path, output_path: &Path) -> Result<()> { + let data = fs::read(vmlinuz_path)?; + + // Parse zboot header: offset 0x08 = payload_offset (le32), 0x0c = payload_size (le32) + let (pos, payload_end) = if data.len() >= 16 && &data[4..8] == b"zimg" { + let payload_offset = u32::from_le_bytes(data[8..12].try_into().unwrap()) as usize; + let payload_size = u32::from_le_bytes(data[12..16].try_into().unwrap()) as usize; + if payload_offset + payload_size > data.len() { + bail!("zboot payload extends beyond file"); + } + info!( + "zboot header: payload at 0x{:x}, size 0x{:x}", + payload_offset, payload_size + ); + (payload_offset, payload_offset + payload_size) + } else { + let magic = [0x28u8, 0xb5, 0x2f, 0xfd]; + let p = data + .windows(4) + .position(|w| w == magic) + .ok_or_else(|| eyre!("zstd magic not found in vmlinuz"))?; + info!("zstd payload at offset 0x{:x} (no zboot header)", p); + (p, data.len()) + }; + + let mut kernel = Vec::new(); + zstd::stream::copy_decode(&data[pos..payload_end], &mut kernel) + .context("decompressing zstd payload from vmlinuz")?; + + if kernel.len() < 0x3c || &kernel[0x38..0x3c] != b"ARMd" { + bail!("decompressed kernel is not a valid ARM64 Image"); + } + fs::write(output_path, &kernel)?; + info!("decompressed kernel: {} bytes (ARM64 Image)", kernel.len()); + Ok(()) +} + +// --- Shared helpers (pub for vfkit/ module) --- + +fn detect_machine_name() -> Result { + let output = Command::new("podman") + .args(["machine", "info", "--format", "{{.Host.CurrentMachine}}"]) + .output()?; + let name = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if name.is_empty() { + bail!("no podman machine is running"); + } + Ok(name) +} + +fn ensure_image_and_get_digest(image: &str) -> Result { + let status = Command::new("podman") + .args(["image", "exists", image]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status()?; + if !status.success() { + info!("pulling image {}...", image); + if !Command::new("podman") + .args(["pull", image]) + .status()? + .success() + { + bail!("failed to pull image: {}", image); + } + } + let output = Command::new("podman") + .args(["image", "inspect", "--format", "{{.Digest}}", image]) + .output()?; + let digest = String::from_utf8_lossy(&output.stdout).trim().to_string(); + Ok(digest.trim_start_matches("sha256:").to_string()) +} + +fn extract_kernel(machine: &str, image: &str, boot_dir: &Path) -> Result<()> { + let boot_dir_str = boot_dir.to_string_lossy(); + let script = format!( + "KVER=$(podman run --rm {image} ls /usr/lib/modules/ | head -1) && \ + [ -n \"$KVER\" ] && \ + podman run --rm {image} cat /usr/lib/modules/$KVER/vmlinuz > {boot}/vmlinuz && \ + podman run --rm {image} cat /usr/lib/modules/$KVER/initramfs.img > {boot}/initramfs.img", + image = image, + boot = boot_dir_str + ); + let output = Command::new("podman") + .args(["machine", "ssh", machine, &script]) + .output() + .context("extracting kernel from container image")?; + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + bail!( + "No kernel found in image '{}'.\n\ + Checked: /usr/lib/modules//vmlinuz + initramfs.img\n\ + This image may not be a bootable container (bootc) image.\n\ + {}", + image, + stderr.trim() + ); + } + Ok(()) +} + +fn is_machine_rootful(machine: &str) -> bool { + Command::new("podman") + .args(["machine", "ssh", machine, "id", "-u"]) + .output() + .map(|o| String::from_utf8_lossy(&o.stdout).trim() == "0") + .unwrap_or(false) +} + +fn create_squashfs_image( + machine: &str, + rootful: bool, + image: &str, + output_path: &str, +) -> Result<()> { + let script = if rootful { + format!( + "MERGED=$(podman image mount {}) && \ + mksquashfs $MERGED {} -noappend -comp lz4 -b 1M -quiet", + image, output_path + ) + } else { + info!("rootless mode: using podman unshare for SquashFS creation"); + format!( + "podman unshare sh -c 'MERGED=$(podman image mount {}) && \ + mksquashfs $MERGED {} -noappend -comp lz4 -b 1M -quiet'", + image, output_path + ) + }; + + let output = Command::new("podman") + .args(["machine", "ssh", machine, &script]) + .output() + .context("running mksquashfs")?; + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + bail!("mksquashfs failed: {}", stderr.trim()); + } + Ok(()) +} + +/// Clear extended attributes from a file. +/// +/// Apple Virtualization.framework rejects disk images with xattrs like +/// `security.selinux` or `user.containers.override_stat` that are added +/// by podman/buildah when creating images inside containers. +pub fn clear_xattr(path: &Path) { + let _ = Command::new("xattr") + .args(["-c", &path.to_string_lossy()]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); +} + +/// Find the vfkit binary, checking PATH and Podman PKG location. +pub fn find_vfkit() -> Result { + if let Ok(path) = which::which("vfkit") { + return Ok(path.to_string_lossy().to_string()); + } + let podman_path = "/opt/podman/bin/vfkit"; + if Path::new(podman_path).exists() { + return Ok(podman_path.to_string()); + } + bail!("vfkit not found. Install: brew install vfkit") +} + +/// Fixed MAC address matching gvproxy's DHCP static lease for 192.168.127.2. +const GVPROXY_STATIC_MAC: [u8; 6] = [0x5a, 0x94, 0xef, 0xe4, 0x0c, 0xee]; + +/// Generate the fixed MAC address for gvproxy DHCP static lease. +pub fn generate_mac() -> [u8; 6] { + GVPROXY_STATIC_MAC +} + +/// Find the gvproxy binary, checking PATH and Podman installation paths. +fn find_gvproxy() -> Result { + if let Ok(path) = which::which("gvproxy") { + return Ok(path.to_string_lossy().to_string()); + } + for candidate in [ + "/opt/homebrew/opt/podman/libexec/podman/gvproxy", + "/opt/podman/bin/gvproxy", + ] { + if Path::new(candidate).exists() { + return Ok(candidate.to_string()); + } + } + bail!("gvproxy not found. Ensure Podman is installed (brew install podman)") +} + +/// Start a gvproxy instance with the given socket paths. +pub fn start_gvproxy(gvproxy_sock: &str, services_sock: &str) -> Result { + let gvproxy_bin = find_gvproxy()?; + let _ = fs::remove_file(gvproxy_sock); + let _ = fs::remove_file(services_sock); + let child = Command::new(&gvproxy_bin) + .args([ + "-listen-vfkit", + &format!("unixgram://{}", gvproxy_sock), + "-ssh-port", + "-1", + "-services", + &format!("unix://{}", services_sock), + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn() + .context("failed to start gvproxy. Ensure gvproxy is installed (included in Podman)")?; + for _ in 0..50 { + if Path::new(gvproxy_sock).exists() { + break; + } + std::thread::sleep(Duration::from_millis(100)); + } + if !Path::new(gvproxy_sock).exists() { + bail!("gvproxy socket did not appear"); + } + Ok(child) +} + +/// Expose SSH port forwarding via gvproxy's HTTP API. +pub fn expose_ssh_port(services_sock: &str, vm_ip: &str, host_port: u16) -> Result<()> { + let body = format!( + r#"{{"local":":{}","remote":"{}:22","protocol":"tcp"}}"#, + host_port, vm_ip + ); + let mut stream = UnixStream::connect(services_sock)?; + let request = format!( + "POST /services/forwarder/expose HTTP/1.1\r\nHost: unix\r\n\ + Content-Type: application/json\r\nContent-Length: {}\r\n\r\n{}", + body.len(), + body + ); + std::io::Write::write_all(&mut stream, request.as_bytes())?; + std::io::Write::flush(&mut stream)?; + let mut response = vec![0u8; 1024]; + let _ = std::io::Read::read(&mut stream, &mut response); + let response_str = String::from_utf8_lossy(&response); + if !response_str.contains("200") { + bail!( + "gvproxy expose failed: {}", + response_str.trim_end_matches('\0') + ); + } + Ok(()) +} + +const SSH_TIMEOUT: Duration = Duration::from_secs(240); + +/// Find an available TCP port for SSH forwarding in range 2222-3000. +pub fn find_available_ssh_port() -> u16 { + use rand::Rng; + let mut rng = rand::rng(); + const PORT_RANGE_START: u16 = 2222; + const PORT_RANGE_END: u16 = 3000; + for _ in 0..100 { + let port = rng.random_range(PORT_RANGE_START..PORT_RANGE_END); + if std::net::TcpListener::bind(("127.0.0.1", port)).is_ok() { + return port; + } + } + for port in PORT_RANGE_START..PORT_RANGE_END { + if std::net::TcpListener::bind(("127.0.0.1", port)).is_ok() { + return port; + } + } + PORT_RANGE_START +} + +/// Wait for SSH connectivity with exponential backoff (240s timeout). +pub fn wait_for_ssh(port: u16, key_path: &Path, user: &str) -> Result<()> { + use crate::ssh_options::CommonSshOptions; + let ssh_opts = CommonSshOptions::default(); + let user_host = format!("{}@localhost", user); + info!("waiting for SSH on port {} ({}@localhost)...", port, user); + let start = std::time::Instant::now(); + let mut attempt = 0u32; + loop { + if start.elapsed() > SSH_TIMEOUT { + bail!("SSH connection timeout ({}s)", SSH_TIMEOUT.as_secs()); + } + let mut cmd = Command::new("ssh"); + cmd.args(["-p", &port.to_string(), "-i", &key_path.to_string_lossy()]); + ssh_opts.apply_to_command(&mut cmd); + cmd.args(["-o", "BatchMode=yes", &user_host, "true"]); + let status = cmd.stdout(Stdio::null()).stderr(Stdio::null()).status(); + if let Ok(s) = status { + if s.success() { + info!("SSH connected after {}s", start.elapsed().as_secs()); + return Ok(()); + } + } + let backoff = if attempt < 2 { + 500 + } else if attempt < 4 { + 1000 + } else { + 2000 + }; + std::thread::sleep(Duration::from_millis(backoff)); + attempt += 1; + } +} + +/// Execute a command via SSH and return the exit status. +pub fn run_ssh_command( + port: u16, + key_path: &Path, + user: &str, + command: &str, +) -> Result { + use crate::ssh_options::CommonSshOptions; + let ssh_opts = CommonSshOptions::default(); + let user_host = format!("{}@localhost", user); + let mut cmd = Command::new("ssh"); + cmd.args(["-p", &port.to_string(), "-i", &key_path.to_string_lossy()]); + ssh_opts.apply_to_command(&mut cmd); + cmd.args(["-o", "BatchMode=yes", &user_host, command]); + cmd.stdin(Stdio::inherit()) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()) + .status() + .map_err(|e| eyre!("ssh failed: {}", e)) +} + +/// Start an interactive SSH session with TTY allocation. +pub fn run_ssh_interactive( + port: u16, + key_path: &Path, + user: &str, +) -> Result { + use crate::ssh_options::CommonSshOptions; + let ssh_opts = CommonSshOptions::default(); + let user_host = format!("{}@localhost", user); + let mut cmd = Command::new("ssh"); + cmd.args(["-p", &port.to_string(), "-i", &key_path.to_string_lossy()]); + ssh_opts.apply_to_command(&mut cmd); + cmd.args(["-t", &user_host]); + cmd.stdin(Stdio::inherit()) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()) + .status() + .map_err(|e| eyre!("ssh failed: {}", e)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_memory_to_mb() { + let cases = [ + ("4G", 4096), + ("4g", 4096), + ("2048M", 2048), + ("2048m", 2048), + ("512", 512), + ("1G", 1024), + ]; + for (input, expected) in &cases { + assert_eq!( + parse_memory_to_mb(input).unwrap(), + *expected, + "parse_memory_to_mb({:?})", + input + ); + } + } + + #[test] + fn test_parse_memory_to_mb_errors() { + assert!(parse_memory_to_mb("").is_err()); + assert!(parse_memory_to_mb("abc").is_err()); + } + + #[test] + fn test_generate_mac() { + let mac = generate_mac(); + assert_eq!(mac, GVPROXY_STATIC_MAC); + } + + #[test] + fn test_default_vcpus() { + let vcpus = default_vcpus(); + assert!(vcpus >= 1); + assert_eq!( + vcpus, + std::thread::available_parallelism() + .map(|n| n.get() as u32) + .unwrap_or(2) + ); + } + + #[test] + fn test_find_available_ssh_port() { + let port = find_available_ssh_port(); + assert!((2222..3000).contains(&port)); + assert!(std::net::TcpListener::bind(("127.0.0.1", port)).is_ok()); + } + + #[test] + fn test_ephemeral_vm_metadata_roundtrip() { + let meta = EphemeralVmMetadata { + name: "test-vm".to_string(), + image: "quay.io/fedora/fedora-bootc:42".to_string(), + pid: 12345, + gvproxy_pid: 12346, + ssh_port: 2222, + ssh_key: "/tmp/test-key".to_string(), + serial_log: "/tmp/test-serial.log".to_string(), + log_path: Some("/tmp/test-vfkit.log".to_string()), + created: "2026-01-01T00:00:00Z".to_string(), + }; + let json = serde_json::to_string_pretty(&meta).unwrap(); + let loaded: EphemeralVmMetadata = serde_json::from_str(&json).unwrap(); + assert_eq!(loaded.name, "test-vm"); + assert_eq!(loaded.image, "quay.io/fedora/fedora-bootc:42"); + assert_eq!(loaded.pid, 12345); + assert_eq!(loaded.ssh_port, 2222); + assert_eq!(loaded.log_path.as_deref(), Some("/tmp/test-vfkit.log")); + } + + #[test] + fn test_ephemeral_vm_metadata_save_load_remove() { + let dir = tempfile::tempdir().unwrap(); + let json_path = dir.path().join("roundtrip-vm.json"); + let meta = EphemeralVmMetadata { + name: "roundtrip-vm".to_string(), + image: "localhost/test:latest".to_string(), + pid: 999, + gvproxy_pid: 1000, + ssh_port: 2250, + ssh_key: "/tmp/key".to_string(), + serial_log: "/tmp/serial.log".to_string(), + log_path: None, + created: "2026-05-04T00:00:00Z".to_string(), + }; + fs::write(&json_path, serde_json::to_string_pretty(&meta).unwrap()).unwrap(); + let data = fs::read_to_string(&json_path).unwrap(); + let loaded: EphemeralVmMetadata = serde_json::from_str(&data).unwrap(); + assert_eq!(loaded.name, "roundtrip-vm"); + assert_eq!(loaded.ssh_port, 2250); + assert!(loaded.log_path.is_none()); + fs::remove_file(&json_path).unwrap(); + assert!(!json_path.exists()); + } + + #[test] + fn test_ephemeral_vm_metadata_list_all_from_dir() { + let dir = tempfile::tempdir().unwrap(); + for i in 0..3 { + let meta = EphemeralVmMetadata { + name: format!("vm-{i}"), + image: "test:latest".to_string(), + pid: 100 + i, + gvproxy_pid: 200 + i, + ssh_port: 2222 + (i as u16), + ssh_key: "/tmp/key".to_string(), + serial_log: "/tmp/serial.log".to_string(), + log_path: None, + created: "2026-01-01T00:00:00Z".to_string(), + }; + let path = dir.path().join(format!("vm-{i}.json")); + fs::write(&path, serde_json::to_string(&meta).unwrap()).unwrap(); + } + // Also write a non-json file that should be skipped + fs::write(dir.path().join("README.txt"), "not json").unwrap(); + + let mut vms = Vec::new(); + for entry in fs::read_dir(dir.path()).unwrap() { + let path = entry.unwrap().path(); + if path.extension().and_then(|e| e.to_str()) != Some("json") { + continue; + } + if let Ok(data) = fs::read_to_string(&path) { + if let Ok(meta) = serde_json::from_str::(&data) { + vms.push(meta); + } + } + } + assert_eq!(vms.len(), 3); + let mut names: Vec<_> = vms.iter().map(|v| v.name.clone()).collect(); + names.sort(); + assert_eq!(names, vec!["vm-0", "vm-1", "vm-2"]); + } +} diff --git a/crates/kit/src/ssh_options.rs b/crates/kit/src/ssh_options.rs new file mode 100644 index 000000000..8e26be324 --- /dev/null +++ b/crates/kit/src/ssh_options.rs @@ -0,0 +1,136 @@ +//! Cross-platform SSH option types shared between Linux and macOS backends. +//! +//! Extracted from ssh.rs to avoid pulling in Linux-only dependencies on macOS. + +/// Common SSH options that can be shared between different SSH implementations +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct CommonSshOptions { + /// Use strict host key checking + pub strict_host_keys: bool, + /// SSH connection timeout in seconds + pub connect_timeout: u32, + /// Server alive interval in seconds + pub server_alive_interval: u32, + /// SSH log level + pub log_level: String, + /// Additional SSH options as key-value pairs + pub extra_options: Vec<(String, String)>, +} + +impl Default for CommonSshOptions { + fn default() -> Self { + Self { + strict_host_keys: false, + connect_timeout: 1, + server_alive_interval: 60, + log_level: "ERROR".to_string(), + extra_options: vec![], + } + } +} + +impl CommonSshOptions { + /// Apply these options to an SSH command + #[allow(dead_code)] + pub fn apply_to_command(&self, cmd: &mut std::process::Command) { + cmd.args(["-o", "IdentitiesOnly=yes"]); + cmd.args(["-o", "PasswordAuthentication=no"]); + cmd.args(["-o", "KbdInteractiveAuthentication=no"]); + cmd.args(["-o", "GSSAPIAuthentication=no"]); + + cmd.args(["-o", &format!("ConnectTimeout={}", self.connect_timeout)]); + cmd.args([ + "-o", + &format!("ServerAliveInterval={}", self.server_alive_interval), + ]); + cmd.args(["-o", &format!("LogLevel={}", self.log_level)]); + + if !self.strict_host_keys { + cmd.args(["-o", "StrictHostKeyChecking=no"]); + cmd.args(["-o", "UserKnownHostsFile=/dev/null"]); + } + + for (key, value) in &self.extra_options { + cmd.args(["-o", &format!("{}={}", key, value)]); + } + } +} + +/// SSH connection configuration options +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct SshConnectionOptions { + /// Common SSH options shared across implementations + pub common: CommonSshOptions, + /// Enable/disable TTY allocation (default: true) + pub allocate_tty: bool, + /// Suppress output to stdout/stderr (default: false) + pub suppress_output: bool, +} + +impl Default for SshConnectionOptions { + fn default() -> Self { + Self { + common: CommonSshOptions::default(), + allocate_tty: true, + suppress_output: false, + } + } +} + +impl SshConnectionOptions { + /// Create options suitable for quick connectivity tests (short timeout, no TTY) + #[allow(dead_code)] + pub fn for_connectivity_test() -> Self { + Self { + common: CommonSshOptions { + strict_host_keys: false, + connect_timeout: 2, + server_alive_interval: 60, + log_level: "ERROR".to_string(), + extra_options: vec![], + }, + allocate_tty: false, + suppress_output: true, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_common_ssh_options_default() { + let opts = CommonSshOptions::default(); + assert!(!opts.strict_host_keys); + assert_eq!(opts.connect_timeout, 1); + assert_eq!(opts.server_alive_interval, 60); + assert_eq!(opts.log_level, "ERROR"); + assert!(opts.extra_options.is_empty()); + } + + #[test] + fn test_connectivity_test_options() { + let opts = SshConnectionOptions::for_connectivity_test(); + assert_eq!(opts.common.connect_timeout, 2); + assert!(!opts.allocate_tty); + assert!(opts.suppress_output); + } + + #[test] + fn test_apply_to_command() { + let opts = CommonSshOptions::default(); + let mut cmd = std::process::Command::new("ssh"); + opts.apply_to_command(&mut cmd); + let args: Vec<_> = cmd + .get_args() + .map(|a| a.to_string_lossy().to_string()) + .collect(); + assert!(args.contains(&"IdentitiesOnly=yes".to_string())); + assert!(args.contains(&"PasswordAuthentication=no".to_string())); + assert!(args.contains(&"StrictHostKeyChecking=no".to_string())); + assert!(args.contains(&"ConnectTimeout=1".to_string())); + } +} diff --git a/crates/kit/src/vfkit/inspect.rs b/crates/kit/src/vfkit/inspect.rs new file mode 100644 index 000000000..67a506d7c --- /dev/null +++ b/crates/kit/src/vfkit/inspect.rs @@ -0,0 +1,62 @@ +//! vm inspect — Show detailed VM information. + +use super::VmMetadata; +use color_eyre::Result; + +/// Display detailed metadata for the named VM. +pub fn run(name: &str, json: bool) -> Result<()> { + let meta = VmMetadata::load(name)?; + + if json { + println!("{}", serde_json::to_string_pretty(&meta)?); + return Ok(()); + } + + let state = if meta.is_alive() { + "running" + } else { + "stopped" + }; + + println!("Name: {}", meta.name); + println!("State: {}", state); + println!("Disk: {}", meta.disk_image); + println!("CPUs: {}", meta.cpus); + println!("Memory: {} MiB", meta.memory); + println!("GUI: {}", meta.gui); + println!("Created: {}", meta.created); + println!(); + println!("Processes:"); + if meta.vfkit_pid > 0 { + println!( + " vfkit: PID {} ({})", + meta.vfkit_pid, + if meta.is_alive() { + "running" + } else { + "stopped" + } + ); + } + if meta.gvproxy_pid > 0 { + println!(" gvproxy: PID {}", meta.gvproxy_pid); + } + println!(); + println!("SSH:"); + println!(" Port: {}", meta.ssh_port); + println!(" User: {}", meta.ssh_user); + println!(" Key: {}", meta.ssh_key); + if state == "running" { + println!(); + println!( + " ssh -p {} -i {} {}@localhost", + meta.ssh_port, meta.ssh_key, meta.ssh_user + ); + } + println!(); + println!("Files:"); + println!(" EFI store: {}", meta.efi_store); + println!(" Serial log: {}", meta.serial_log); + + Ok(()) +} diff --git a/crates/kit/src/vfkit/list.rs b/crates/kit/src/vfkit/list.rs new file mode 100644 index 000000000..bdda3f295 --- /dev/null +++ b/crates/kit/src/vfkit/list.rs @@ -0,0 +1,29 @@ +//! vm list — List all persistent VMs. + +use super::VmMetadata; +use color_eyre::Result; + +/// List all persistent VMs, optionally as JSON. +pub fn run(json: bool) -> Result<()> { + let vms = VmMetadata::list_all()?; + + if json { + println!("{}", serde_json::to_string_pretty(&vms)?); + return Ok(()); + } + + if vms.is_empty() { + println!("No VMs found."); + return Ok(()); + } + + println!("{:<20} {:<10} {:<30} SSH", "NAME", "STATE", "DISK"); + for vm in &vms { + let state = if vm.is_alive() { "running" } else { "stopped" }; + println!( + "{:<20} {:<10} {:<30} ssh -p {} -i {} {}@localhost", + vm.name, state, vm.disk_image, vm.ssh_port, vm.ssh_key, vm.ssh_user + ); + } + Ok(()) +} diff --git a/crates/kit/src/vfkit/mod.rs b/crates/kit/src/vfkit/mod.rs new file mode 100644 index 000000000..62939254a --- /dev/null +++ b/crates/kit/src/vfkit/mod.rs @@ -0,0 +1,271 @@ +//! Persistent VM management for macOS using vfkit + EFI boot. +//! +//! Subcommands mirror the Linux libvirt/ module structure: +//! run, list, ssh, stop, start, rm, rm-all, inspect + +use std::fs; +use std::path::PathBuf; +use std::process::{Command, Stdio}; + +use clap::Subcommand; +use color_eyre::Result; + +pub mod inspect; +pub mod list; +pub mod rm; +pub mod rm_all; +pub mod run; +pub mod ssh; +pub mod start; +pub mod stop; + +/// Subcommands for persistent VM management via vfkit. +#[derive(Debug, Subcommand)] +pub enum VmCommands { + /// Run a persistent VM from a disk image + Run(run::VmRunOpts), + + /// List all persistent VMs + #[clap(name = "list", alias = "ls")] + List { + /// Output in JSON format + #[clap(long)] + json: bool, + }, + + /// SSH into a running VM + Ssh(ssh::VmSshOpts), + + /// Stop a running VM + Stop { + /// VM name + name: String, + }, + + /// Start a stopped VM + Start(start::VmStartOpts), + + /// Remove a VM and its metadata + #[clap(name = "rm")] + Remove(rm::VmRmOpts), + + /// Remove all VMs + #[clap(name = "rm-all")] + RemoveAll { + /// Force removal without confirmation + #[clap(short, long)] + force: bool, + }, + + /// Show detailed VM information + Inspect { + /// VM name + name: String, + /// Output in JSON format + #[clap(long)] + json: bool, + }, +} + +impl VmCommands { + /// Dispatch to the appropriate subcommand handler. + pub fn run(self) -> Result<()> { + match self { + VmCommands::Run(opts) => run::run(opts), + VmCommands::List { json } => list::run(json), + VmCommands::Ssh(opts) => ssh::run(opts), + VmCommands::Stop { name } => stop::run(&name), + VmCommands::Start(opts) => start::run(opts), + VmCommands::Remove(opts) => rm::run(opts), + VmCommands::RemoveAll { force } => rm_all::run(force), + VmCommands::Inspect { name, json } => inspect::run(&name, json), + } + } +} + +// --- VM Metadata --- + +/// Persistent VM metadata, stored as JSON in `~/.local/share/bcvk/vms/`. +#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)] +pub struct VmMetadata { + /// VM name used as identifier. + pub name: String, + /// Path to the disk image file. + pub disk_image: String, + /// PID of the vfkit process. + pub vfkit_pid: u32, + /// PID of the gvproxy network proxy process. + pub gvproxy_pid: u32, + /// Host-side SSH port forwarded to the VM. + pub ssh_port: u16, + /// Path to the SSH private key. + pub ssh_key: String, + /// SSH username for connecting to the VM. + pub ssh_user: String, + /// Number of vCPUs allocated. + pub cpus: u32, + /// Memory in megabytes. + pub memory: u32, + /// Path to the EFI variable store file. + pub efi_store: String, + /// Path to the serial console log file. + pub serial_log: String, + /// Whether GUI mode is enabled. + pub gui: bool, + /// ISO 8601 timestamp when the VM was created. + pub created: String, + /// Current VM state (running, stopped). + pub state: String, +} + +impl VmMetadata { + /// Return the directory path for persistent VM metadata files. + pub fn vms_dir() -> PathBuf { + dirs::home_dir() + .expect("cannot determine home directory") + .join(".local/share/bcvk/vms") + } + + /// Save metadata to a JSON file in the VMs directory. + pub fn save(&self) -> Result<()> { + let dir = Self::vms_dir(); + fs::create_dir_all(&dir)?; + let path = dir.join(format!("{}.json", self.name)); + fs::write(&path, serde_json::to_string_pretty(self)?)?; + Ok(()) + } + + /// Load metadata for the named VM from its JSON file. + pub fn load(name: &str) -> Result { + let path = Self::vms_dir().join(format!("{}.json", name)); + let data = fs::read_to_string(&path)?; + Ok(serde_json::from_str(&data)?) + } + + /// Remove metadata file for the named VM. + pub fn remove(name: &str) { + let path = Self::vms_dir().join(format!("{}.json", name)); + let _ = fs::remove_file(path); + } + + /// List all persistent VM metadata from the VMs directory. + pub fn list_all() -> Result> { + let dir = Self::vms_dir(); + if !dir.exists() { + return Ok(Vec::new()); + } + let mut vms = Vec::new(); + for entry in fs::read_dir(&dir)? { + let path = entry?.path(); + if path.extension().and_then(|e| e.to_str()) != Some("json") { + continue; + } + if let Ok(data) = fs::read_to_string(&path) { + if let Ok(meta) = serde_json::from_str::(&data) { + vms.push(meta); + } + } + } + Ok(vms) + } + + /// Check if the VM process is still alive via kill -0. + pub fn is_alive(&self) -> bool { + if self.vfkit_pid == 0 { + return false; + } + Command::new("kill") + .args(["-0", &self.vfkit_pid.to_string()]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .map(|s| s.success()) + .unwrap_or(false) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn sample_vm_metadata(name: &str) -> VmMetadata { + VmMetadata { + name: name.to_string(), + disk_image: "/tmp/disk.raw".to_string(), + vfkit_pid: 0, + gvproxy_pid: 0, + ssh_port: 2222, + ssh_key: "/tmp/key".to_string(), + ssh_user: "root".to_string(), + cpus: 2, + memory: 4096, + efi_store: "/tmp/efi.fd".to_string(), + serial_log: "/tmp/serial.log".to_string(), + gui: false, + created: "2026-01-01T00:00:00Z".to_string(), + state: "running".to_string(), + } + } + + #[test] + fn test_vm_metadata_roundtrip() { + let meta = sample_vm_metadata("test-vm"); + let json = serde_json::to_string_pretty(&meta).unwrap(); + let loaded: VmMetadata = serde_json::from_str(&json).unwrap(); + assert_eq!(loaded.name, "test-vm"); + assert_eq!(loaded.disk_image, "/tmp/disk.raw"); + assert_eq!(loaded.cpus, 2); + assert_eq!(loaded.memory, 4096); + assert_eq!(loaded.ssh_user, "root"); + assert_eq!(loaded.state, "running"); + assert!(!loaded.gui); + } + + #[test] + fn test_vm_metadata_save_load_remove() { + let dir = tempfile::tempdir().unwrap(); + let json_path = dir.path().join("myvm.json"); + let meta = sample_vm_metadata("myvm"); + fs::write(&json_path, serde_json::to_string_pretty(&meta).unwrap()).unwrap(); + let data = fs::read_to_string(&json_path).unwrap(); + let loaded: VmMetadata = serde_json::from_str(&data).unwrap(); + assert_eq!(loaded.name, "myvm"); + assert_eq!(loaded.ssh_port, 2222); + fs::remove_file(&json_path).unwrap(); + assert!(!json_path.exists()); + } + + #[test] + fn test_vm_metadata_list_from_dir() { + let dir = tempfile::tempdir().unwrap(); + for i in 0..3 { + let meta = sample_vm_metadata(&format!("vm-{i}")); + let path = dir.path().join(format!("vm-{i}.json")); + fs::write(&path, serde_json::to_string(&meta).unwrap()).unwrap(); + } + fs::write(dir.path().join("notes.txt"), "ignored").unwrap(); + + let mut vms = Vec::new(); + for entry in fs::read_dir(dir.path()).unwrap() { + let path = entry.unwrap().path(); + if path.extension().and_then(|e| e.to_str()) != Some("json") { + continue; + } + if let Ok(data) = fs::read_to_string(&path) { + if let Ok(meta) = serde_json::from_str::(&data) { + vms.push(meta); + } + } + } + assert_eq!(vms.len(), 3); + let mut names: Vec<_> = vms.iter().map(|v| v.name.clone()).collect(); + names.sort(); + assert_eq!(names, vec!["vm-0", "vm-1", "vm-2"]); + } + + #[test] + fn test_vm_metadata_is_alive_zero_pid() { + let meta = sample_vm_metadata("dead-vm"); + assert!(!meta.is_alive()); + } +} diff --git a/crates/kit/src/vfkit/rm.rs b/crates/kit/src/vfkit/rm.rs new file mode 100644 index 000000000..ec48044e8 --- /dev/null +++ b/crates/kit/src/vfkit/rm.rs @@ -0,0 +1,59 @@ +//! vm rm — Remove a persistent VM and its metadata. + +use std::fs; + +use clap::Parser; +use color_eyre::Result; +use tracing::info; + +use super::VmMetadata; + +/// Options for `vm rm`. +#[derive(Parser, Debug)] +pub struct VmRmOpts { + /// VM name + pub name: String, + /// Force removal even if running + #[clap(short, long)] + pub force: bool, +} + +/// Remove a persistent VM, optionally force-killing it. +pub fn run(opts: VmRmOpts) -> Result<()> { + let meta = VmMetadata::load(&opts.name)?; + + if meta.is_alive() { + if !opts.force { + color_eyre::eyre::bail!( + "VM '{}' is running. Stop it first or use --force", + opts.name + ); + } + info!("force stopping VM '{}'...", opts.name); + crate::vfkit::stop::run(&opts.name)?; + } + + for path in [&meta.efi_store, &meta.serial_log] { + if !path.is_empty() { + if let Err(e) = fs::remove_file(path) { + if e.kind() != std::io::ErrorKind::NotFound { + tracing::debug!("failed to remove {}: {}", path, e); + } + } + } + } + + let vms_dir = VmMetadata::vms_dir(); + for suffix in ["-gvproxy.sock", "-gvproxy-svc.sock"] { + let p = vms_dir.join(format!("{}{}", meta.name, suffix)); + if let Err(e) = fs::remove_file(&p) { + if e.kind() != std::io::ErrorKind::NotFound { + tracing::debug!("failed to remove {}: {}", p.display(), e); + } + } + } + + VmMetadata::remove(&opts.name); + println!("Removed '{}'", opts.name); + Ok(()) +} diff --git a/crates/kit/src/vfkit/rm_all.rs b/crates/kit/src/vfkit/rm_all.rs new file mode 100644 index 000000000..2ed80df66 --- /dev/null +++ b/crates/kit/src/vfkit/rm_all.rs @@ -0,0 +1,44 @@ +//! vm rm-all — Remove all persistent VMs. + +use std::io::Write; + +use super::VmMetadata; +use color_eyre::Result; + +/// Remove all persistent VMs, prompting unless `force` is set. +pub fn run(force: bool) -> Result<()> { + let vms = VmMetadata::list_all()?; + if vms.is_empty() { + println!("No VMs found."); + return Ok(()); + } + + if !force { + println!("Found {} VM(s):", vms.len()); + for vm in &vms { + println!( + " {} ({})", + vm.name, + if vm.is_alive() { "running" } else { "stopped" } + ); + } + print!("Remove all VMs? [y/N]: "); + std::io::stdout().flush()?; + let mut input = String::new(); + std::io::stdin().read_line(&mut input)?; + let input = input.trim().to_lowercase(); + if input != "y" && input != "yes" { + println!("Aborted."); + return Ok(()); + } + } + + for vm in &vms { + let opts = super::rm::VmRmOpts { + name: vm.name.clone(), + force: true, + }; + super::rm::run(opts)?; + } + Ok(()) +} diff --git a/crates/kit/src/vfkit/run.rs b/crates/kit/src/vfkit/run.rs new file mode 100644 index 000000000..389aa0ca7 --- /dev/null +++ b/crates/kit/src/vfkit/run.rs @@ -0,0 +1,188 @@ +//! vm run — Start a persistent VM from a disk image using vfkit + EFI boot. + +use std::fs; +use std::path::Path; +use std::process::{Command, Stdio}; + +use clap::Parser; +use color_eyre::{eyre::bail, Result}; +use tracing::info; + +use super::VmMetadata; +use crate::run_ephemeral_macos::{ + clear_xattr, expose_ssh_port, find_available_ssh_port, find_vfkit, generate_mac, start_gvproxy, + wait_for_ssh, +}; + +/// Options for `vm run`. +#[derive(Parser, Debug)] +pub struct VmRunOpts { + /// Disk image path (.raw) + pub disk: String, + /// VM name for identification + #[clap(long)] + pub name: Option, + /// Number of vCPUs + #[clap(long)] + pub vcpus: Option, + /// Memory size (e.g. "4G", "2048M", or plain number for MB) + #[clap(long, default_value = "4G")] + pub memory: String, + /// Path to an existing SSH private key + #[clap(long)] + pub ssh_key: Option, + /// SSH username (default: root) + #[clap(long, default_value = "root")] + pub ssh_user: String, + /// SSH port (default: auto-allocate) + #[clap(long)] + pub ssh_port: Option, + /// Display VM console in GUI window + #[clap(long)] + pub gui: bool, +} + +/// Create and launch a persistent VM from a disk image via vfkit + EFI. +pub fn run(opts: VmRunOpts) -> Result<()> { + let vfkit_bin = find_vfkit()?; + + if !Path::new(&opts.disk).exists() { + bail!("disk image not found: {}", opts.disk); + } + clear_xattr(Path::new(&opts.disk)); + + let ssh_key_path = match &opts.ssh_key { + Some(p) => p.clone(), + None => find_ssh_key()?, + }; + if !Path::new(&ssh_key_path).exists() { + bail!( + "SSH key not found: {}. Specify with --ssh-key", + ssh_key_path + ); + } + + let vm_name = opts.name.clone().unwrap_or_else(|| { + Path::new(&opts.disk) + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("vm") + .to_string() + }); + + let vms_dir = VmMetadata::vms_dir(); + fs::create_dir_all(&vms_dir)?; + + let efi_store = vms_dir.join(format!("{}-efi-vars", vm_name)); + let serial_log = vms_dir.join(format!("{}-serial.log", vm_name)); + let gvproxy_sock = vms_dir.join(format!("{}-gvproxy.sock", vm_name)); + let services_sock = vms_dir.join(format!("{}-gvproxy-svc.sock", vm_name)); + + let gvproxy_sock_str = gvproxy_sock.to_string_lossy().to_string(); + let services_sock_str = services_sock.to_string_lossy().to_string(); + + info!("starting gvproxy..."); + let gvproxy_child = start_gvproxy(&gvproxy_sock_str, &services_sock_str)?; + + let mac = generate_mac(); + let mac_str = format!( + "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] + ); + + let vcpus = opts.vcpus.unwrap_or(2); + let memory_mb = crate::run_ephemeral_macos::parse_memory_to_mb(&opts.memory)?; + + let mut vfkit_args = vec![ + "--cpus".to_string(), + vcpus.to_string(), + "--memory".to_string(), + memory_mb.to_string(), + "--bootloader".to_string(), + format!("efi,variable-store={},create", efi_store.display()), + "--device".to_string(), + format!("virtio-blk,path={}", opts.disk), + "--device".to_string(), + format!( + "virtio-net,unixSocketPath={},mac={}", + gvproxy_sock_str, mac_str + ), + "--device".to_string(), + format!("virtio-serial,logFilePath={}", serial_log.display()), + "--device".to_string(), + "virtio-rng".to_string(), + ]; + if opts.gui { + vfkit_args.push("--gui".to_string()); + } + + info!("launching vfkit (EFI boot)..."); + let vfkit_child = Command::new(&vfkit_bin) + .args(&vfkit_args) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn()?; + + let ssh_port = opts.ssh_port.unwrap_or_else(find_available_ssh_port); + info!("SSH port: {}", ssh_port); + + info!("setting up SSH port forwarding..."); + for attempt in 0..15u32 { + match expose_ssh_port(&services_sock_str, "192.168.127.2", ssh_port) { + Ok(_) => { + info!("SSH port {} forwarded", ssh_port); + break; + } + Err(e) if attempt < 14 => { + tracing::debug!("SSH port forward attempt {}: {}", attempt, e); + let backoff = 200 * 2u64.pow(attempt.min(4)); + std::thread::sleep(std::time::Duration::from_millis(backoff)); + } + Err(e) => bail!("SSH port forward failed: {}", e), + } + } + + let key_path = std::path::Path::new(&ssh_key_path); + wait_for_ssh(ssh_port, key_path, &opts.ssh_user)?; + + let metadata = VmMetadata { + name: vm_name.clone(), + disk_image: opts.disk.clone(), + vfkit_pid: vfkit_child.id(), + gvproxy_pid: gvproxy_child.id(), + ssh_port, + ssh_key: ssh_key_path.clone(), + ssh_user: opts.ssh_user.clone(), + cpus: vcpus, + memory: memory_mb, + efi_store: efi_store.to_string_lossy().to_string(), + serial_log: serial_log.to_string_lossy().to_string(), + gui: opts.gui, + created: chrono::Utc::now().to_rfc3339(), + state: "running".to_string(), + }; + metadata.save()?; + + println!("VM '{}' is running", vm_name); + println!( + " ssh -p {} -i {} {}@localhost", + ssh_port, ssh_key_path, opts.ssh_user + ); + println!(); + println!("To connect: bcvk vm ssh {}", vm_name); + println!("To stop: bcvk vm stop {}", vm_name); + + Ok(()) +} + +fn find_ssh_key() -> Result { + let home = dirs::home_dir() + .ok_or_else(|| color_eyre::eyre::eyre!("cannot determine home directory"))?; + for name in &["id_ed25519", "id_rsa"] { + let path = home.join(".ssh").join(name); + if path.exists() { + return Ok(path.to_string_lossy().to_string()); + } + } + bail!("no SSH key found in ~/.ssh/. Generate with: ssh-keygen -t ed25519") +} diff --git a/crates/kit/src/vfkit/ssh.rs b/crates/kit/src/vfkit/ssh.rs new file mode 100644 index 000000000..74af46736 --- /dev/null +++ b/crates/kit/src/vfkit/ssh.rs @@ -0,0 +1,24 @@ +//! vm ssh — SSH into a running persistent VM. + +use super::VmMetadata; +use crate::run_ephemeral_macos::run_ssh_interactive; +use clap::Parser; +use color_eyre::{eyre::bail, Result}; + +/// Options for `vm ssh`. +#[derive(Parser, Debug)] +pub struct VmSshOpts { + /// VM name + pub name: String, +} + +/// Open an interactive SSH session to a running persistent VM. +pub fn run(opts: VmSshOpts) -> Result<()> { + let vm = VmMetadata::load(&opts.name)?; + if !vm.is_alive() { + bail!("VM '{}' is not running", opts.name); + } + let key_path = std::path::Path::new(&vm.ssh_key); + run_ssh_interactive(vm.ssh_port, key_path, &vm.ssh_user)?; + Ok(()) +} diff --git a/crates/kit/src/vfkit/start.rs b/crates/kit/src/vfkit/start.rs new file mode 100644 index 000000000..f2f2a48f3 --- /dev/null +++ b/crates/kit/src/vfkit/start.rs @@ -0,0 +1,115 @@ +//! vm start — Restart a stopped persistent VM. + +use std::process::{Command, Stdio}; + +use clap::Parser; +use color_eyre::{eyre::bail, Result}; +use tracing::info; + +use super::VmMetadata; +use crate::run_ephemeral_macos::{ + clear_xattr, expose_ssh_port, find_vfkit, generate_mac, start_gvproxy, wait_for_ssh, +}; + +/// Options for `vm start`. +#[derive(Parser, Debug)] +pub struct VmStartOpts { + /// VM name + pub name: String, + /// Display VM console in GUI window + #[clap(long)] + pub gui: bool, +} + +/// Restart a stopped persistent VM by re-launching vfkit. +pub fn run(opts: VmStartOpts) -> Result<()> { + let mut meta = VmMetadata::load(&opts.name)?; + if meta.is_alive() { + bail!("VM '{}' is already running", opts.name); + } + + if !std::path::Path::new(&meta.disk_image).exists() { + bail!("disk image not found: {}", meta.disk_image); + } + clear_xattr(std::path::Path::new(&meta.disk_image)); + + let vfkit_bin = find_vfkit()?; + let vms_dir = VmMetadata::vms_dir(); + + let gvproxy_sock = vms_dir.join(format!("{}-gvproxy.sock", meta.name)); + let services_sock = vms_dir.join(format!("{}-gvproxy-svc.sock", meta.name)); + let gvproxy_sock_str = gvproxy_sock.to_string_lossy().to_string(); + let services_sock_str = services_sock.to_string_lossy().to_string(); + + info!("starting gvproxy..."); + let gvproxy_child = start_gvproxy(&gvproxy_sock_str, &services_sock_str)?; + + let mac = generate_mac(); + let mac_str = format!( + "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] + ); + + let gui = opts.gui || meta.gui; + let mut vfkit_args = vec![ + "--cpus".to_string(), + meta.cpus.to_string(), + "--memory".to_string(), + meta.memory.to_string(), + "--bootloader".to_string(), + format!("efi,variable-store={},create", meta.efi_store), + "--device".to_string(), + format!("virtio-blk,path={}", meta.disk_image), + "--device".to_string(), + format!( + "virtio-net,unixSocketPath={},mac={}", + gvproxy_sock_str, mac_str + ), + "--device".to_string(), + format!("virtio-serial,logFilePath={}", meta.serial_log), + "--device".to_string(), + "virtio-rng".to_string(), + ]; + if gui { + vfkit_args.push("--gui".to_string()); + } + + info!("launching vfkit (EFI boot)..."); + let vfkit_child = Command::new(&vfkit_bin) + .args(&vfkit_args) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn()?; + + info!("setting up SSH port forwarding..."); + for attempt in 0..15u32 { + match expose_ssh_port(&services_sock_str, "192.168.127.2", meta.ssh_port) { + Ok(_) => { + info!("SSH port {} forwarded", meta.ssh_port); + break; + } + Err(e) if attempt < 14 => { + tracing::debug!("SSH port forward attempt {}: {}", attempt, e); + let backoff = 200 * 2u64.pow(attempt.min(4)); + std::thread::sleep(std::time::Duration::from_millis(backoff)); + } + Err(e) => bail!("SSH port forward failed: {}", e), + } + } + + let key_path = std::path::Path::new(&meta.ssh_key); + wait_for_ssh(meta.ssh_port, key_path, &meta.ssh_user)?; + + meta.vfkit_pid = vfkit_child.id(); + meta.gvproxy_pid = gvproxy_child.id(); + meta.state = "running".to_string(); + meta.gui = gui; + meta.save()?; + + println!("Started '{}'", meta.name); + println!( + " ssh -p {} -i {} {}@localhost", + meta.ssh_port, meta.ssh_key, meta.ssh_user + ); + Ok(()) +} diff --git a/crates/kit/src/vfkit/stop.rs b/crates/kit/src/vfkit/stop.rs new file mode 100644 index 000000000..24ea6ceba --- /dev/null +++ b/crates/kit/src/vfkit/stop.rs @@ -0,0 +1,63 @@ +//! vm stop — Stop a running persistent VM. + +use std::process::{Command, Stdio}; +use std::time::Duration; + +use super::VmMetadata; +use color_eyre::{eyre::bail, Result}; +use tracing::info; + +/// Stop a running persistent VM by sending SIGTERM to vfkit. +pub fn run(name: &str) -> Result<()> { + let mut meta = VmMetadata::load(name)?; + if !meta.is_alive() { + bail!("VM '{}' is not running", name); + } + + info!("stopping VM '{}'...", name); + + if meta.vfkit_pid > 0 { + if let Err(e) = Command::new("kill") + .args(["-TERM", &meta.vfkit_pid.to_string()]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + { + tracing::debug!("failed to SIGTERM vfkit (PID {}): {}", meta.vfkit_pid, e); + } + std::thread::sleep(Duration::from_secs(3)); + if meta.is_alive() { + if let Err(e) = Command::new("kill") + .args(["-KILL", &meta.vfkit_pid.to_string()]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + { + tracing::debug!("failed to SIGKILL vfkit (PID {}): {}", meta.vfkit_pid, e); + } + } + } + + if meta.gvproxy_pid > 0 { + if let Err(e) = Command::new("kill") + .args(["-KILL", &meta.gvproxy_pid.to_string()]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + { + tracing::debug!( + "failed to SIGKILL gvproxy (PID {}): {}", + meta.gvproxy_pid, + e + ); + } + } + + meta.state = "stopped".to_string(); + meta.vfkit_pid = 0; + meta.gvproxy_pid = 0; + meta.save()?; + + println!("Stopped '{}'", name); + Ok(()) +} From 4a1c1dddd8a572fb9819a2be9d292dcfe5009ba9 Mon Sep 17 00:00:00 2001 From: Shion Tanaka Date: Tue, 12 May 2026 01:10:31 +0900 Subject: [PATCH 2/2] macOS: add vfkit backend for ephemeral and persistent VMs macOS has no KVM/QEMU, so this adds vfkit as the VM backend. Ephemeral VMs use a custom nbdkit EROFS plugin that dynamically generates rootfs, ESP, and GPT from the container overlay via NBD. Persistent VMs use EFI boot. The vfkit/ module mirrors the libvirt/ directory structure, and CLI options match Linux where applicable. Plugin distribution method is TBD. Build and run on macOS: cargo build --release codesign -fs - target/release/bcvk Tested on macOS (Apple Silicon) with rootful and rootless podman machine. Assisted-by: Claude Code (Opus 4.6) Signed-off-by: Shion Tanaka --- Cargo.lock | 17 + crates/kit/Cargo.toml | 1 + crates/kit/src/ephemeral_macos.rs | 54 +- crates/kit/src/lib.rs | 2 + crates/kit/src/main.rs | 2 + crates/kit/src/nbdkit_macos.rs | 186 +++++++ crates/kit/src/run_ephemeral_macos.rs | 478 ++++++----------- crates/kit/src/vfkit/mod.rs | 14 +- crates/kit/src/vfkit/stop.rs | 26 +- crates/nbdkit-erofs-plugin/Cargo.lock | 39 ++ crates/nbdkit-erofs-plugin/Cargo.toml | 13 + crates/nbdkit-erofs-plugin/src/dir_walk.rs | 138 +++++ crates/nbdkit-erofs-plugin/src/erofs.rs | 502 ++++++++++++++++++ crates/nbdkit-erofs-plugin/src/fat32.rs | 548 ++++++++++++++++++++ crates/nbdkit-erofs-plugin/src/gpt.rs | 290 +++++++++++ crates/nbdkit-erofs-plugin/src/initramfs.rs | 182 +++++++ crates/nbdkit-erofs-plugin/src/lib.rs | 389 ++++++++++++++ crates/nbdkit-erofs-plugin/src/regions.rs | 80 +++ 18 files changed, 2595 insertions(+), 366 deletions(-) create mode 100644 crates/kit/src/nbdkit_macos.rs create mode 100644 crates/nbdkit-erofs-plugin/Cargo.lock create mode 100644 crates/nbdkit-erofs-plugin/Cargo.toml create mode 100644 crates/nbdkit-erofs-plugin/src/dir_walk.rs create mode 100644 crates/nbdkit-erofs-plugin/src/erofs.rs create mode 100644 crates/nbdkit-erofs-plugin/src/fat32.rs create mode 100644 crates/nbdkit-erofs-plugin/src/gpt.rs create mode 100644 crates/nbdkit-erofs-plugin/src/initramfs.rs create mode 100644 crates/nbdkit-erofs-plugin/src/lib.rs create mode 100644 crates/nbdkit-erofs-plugin/src/regions.rs diff --git a/Cargo.lock b/Cargo.lock index f41a4da01..3ecf4e8a3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -632,6 +632,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossbeam-channel" version = "0.5.15" @@ -1833,6 +1842,14 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nbdkit-erofs-plugin" +version = "0.1.0" +dependencies = [ + "crc32fast", + "libc", +] + [[package]] name = "newtype-uuid" version = "1.3.2" diff --git a/crates/kit/Cargo.toml b/crates/kit/Cargo.toml index 96d5b5f5d..65ca5a1e0 100644 --- a/crates/kit/Cargo.toml +++ b/crates/kit/Cargo.toml @@ -60,6 +60,7 @@ libsystemd = "0.7" # macOS-only dependencies (vfkit backend) [target.'cfg(target_os = "macos")'.dependencies] +rustix = { version = "1", features = ["process"] } zstd = "0.13" [dev-dependencies] diff --git a/crates/kit/src/ephemeral_macos.rs b/crates/kit/src/ephemeral_macos.rs index ca3255247..8d46075f4 100644 --- a/crates/kit/src/ephemeral_macos.rs +++ b/crates/kit/src/ephemeral_macos.rs @@ -137,28 +137,55 @@ fn cmd_rm_all(force: bool) -> Result<()> { for vm in &vms { if vm.is_alive() { - if let Err(e) = Command::new("kill") - .args([&vm.pid.to_string()]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - { + if let Err(e) = rustix::process::kill_process( + rustix::process::Pid::from_raw(vm.pid as i32).unwrap(), + rustix::process::Signal::TERM, + ) { tracing::warn!("failed to kill VM process {}: {}", vm.pid, e); } if vm.gvproxy_pid > 0 { - if let Err(e) = Command::new("kill") - .args([&vm.gvproxy_pid.to_string()]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - { + if let Err(e) = rustix::process::kill_process( + rustix::process::Pid::from_raw(vm.gvproxy_pid as i32).unwrap(), + rustix::process::Signal::TERM, + ) { tracing::warn!("failed to kill gvproxy {}: {}", vm.gvproxy_pid, e); } } } + if let Some(ref container) = vm.nbd_container { + crate::nbdkit_macos::stop_nbdkit_container(container); + } EphemeralVmMetadata::remove(&vm.name); println!("Removed {}", vm.name); } + + // Sweep orphaned resources inside podman machine + if let Ok(machine) = run_ephemeral_macos::detect_machine_name() { + // Remove orphaned nbdkit containers + let _ = Command::new("podman") + .args([ + "machine", + "ssh", + &machine, + "--", + "podman", + "rm", + "-f", + "--filter", + "name=bcvk-nbd-", + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); + // Unmount any remaining container image overlays + let _ = Command::new("podman") + .args([ + "machine", "ssh", &machine, "--", "podman", "image", "umount", "--all", + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); + } Ok(()) } @@ -170,7 +197,8 @@ fn cmd_ssh(name: &str, args: &[String]) -> Result<()> { } // Try to set up SSH port forwarding via VM-specific gvproxy socket - let svc_sock = format!("/private/tmp/bcvk/{}-gvproxy-svc.sock", name); + let base = run_ephemeral_macos::ephemeral_base_dir(); + let svc_sock = format!("{}/{}-gvproxy-svc.sock", base.display(), name); if std::path::Path::new(&svc_sock).exists() { if let Err(e) = run_ephemeral_macos::expose_ssh_port(&svc_sock, "192.168.127.2", vm.ssh_port) diff --git a/crates/kit/src/lib.rs b/crates/kit/src/lib.rs index a3aa51578..d7257cb8e 100644 --- a/crates/kit/src/lib.rs +++ b/crates/kit/src/lib.rs @@ -13,6 +13,8 @@ pub mod kernel; // macOS-only modules (vfkit backend) #[cfg(target_os = "macos")] +pub mod nbdkit_macos; +#[cfg(target_os = "macos")] pub mod run_ephemeral_macos; #[cfg(target_os = "macos")] diff --git a/crates/kit/src/main.rs b/crates/kit/src/main.rs index cc4969312..b92d35783 100644 --- a/crates/kit/src/main.rs +++ b/crates/kit/src/main.rs @@ -65,6 +65,8 @@ mod varlink_ipc; #[cfg(target_os = "macos")] mod ephemeral_macos; #[cfg(target_os = "macos")] +mod nbdkit_macos; +#[cfg(target_os = "macos")] mod run_ephemeral_macos; #[cfg(target_os = "macos")] mod vfkit; diff --git a/crates/kit/src/nbdkit_macos.rs b/crates/kit/src/nbdkit_macos.rs new file mode 100644 index 000000000..40c2cc20e --- /dev/null +++ b/crates/kit/src/nbdkit_macos.rs @@ -0,0 +1,186 @@ +//! nbdkit EROFS plugin management for macOS ephemeral VMs. + +use color_eyre::{ + eyre::{bail, Context}, + Result, +}; +use std::process::{Command, Stdio}; +use std::time::Duration; +use tracing::info; + +use crate::run_ephemeral_macos::detect_machine_name; + +/// Path to the nbdkit EROFS plugin shared library inside podman machine. +const NBDKIT_EROFS_PLUGIN_PATH: &str = "/var/tmp/bcvk/libnbdkit_erofs_plugin.so"; + +/// Get the merged overlay path from podman image mount. +pub(crate) fn get_merged_path(machine: &str, rootful: bool, image: &str) -> Result { + let output = if rootful { + Command::new("podman") + .args([ + "machine", "ssh", machine, "--", "podman", "image", "mount", image, + ]) + .output() + .context("podman image mount")? + } else { + Command::new("podman") + .args([ + "machine", "ssh", machine, "--", "podman", "unshare", "podman", "image", "mount", + image, + ]) + .output() + .context("podman image mount")? + }; + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + bail!("podman image mount failed: {}", stderr.trim()); + } + Ok(String::from_utf8_lossy(&output.stdout).trim().to_string()) +} + +/// Start nbdkit with the erofs plugin for dynamic EROFS + ESP + GPT generation. +pub(crate) fn start_nbdkit_erofs_plugin( + machine: &str, + merged_path: &str, + cmdline: &str, + ssh_pubkey: &str, + nbd_port: u16, + vm_name: &str, +) -> Result { + let container_name = format!("bcvk-nbd-{}", vm_name); + + let _ = Command::new("podman") + .args([ + "machine", + "ssh", + machine, + "--", + "podman", + "rm", + "-f", + &container_name, + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); + + fn shell_escape(s: &str) -> String { + format!("'{}'", s.replace('\'', "'\\''")) + } + + let cmdline_esc = shell_escape(&format!("cmdline={}", cmdline)); + let dir_esc = shell_escape(&format!("dir={}", merged_path)); + + let mut ssh_param = String::new(); + if !ssh_pubkey.is_empty() { + ssh_param = format!(" {}", shell_escape(&format!("ssh_pubkey={}", ssh_pubkey))); + } + + let podman_cmd = format!( + "podman run -d --name {name} --security-opt label=disable \ + -p {port}:10809 \ + -v {merged}:{merged}:ro \ + -v {plugin}:/plugin.so:ro \ + -v /usr/bin/nbdkit:/usr/bin/nbdkit:ro \ + -v /usr/lib64/nbdkit:/usr/lib64/nbdkit:ro \ + quay.io/fedora/fedora:latest \ + nbdkit -f -p 10809 -r /plugin.so \ + {dir} {cmdline}{ssh}", + name = container_name, + port = nbd_port, + merged = merged_path, + plugin = NBDKIT_EROFS_PLUGIN_PATH, + dir = dir_esc, + cmdline = cmdline_esc, + ssh = ssh_param, + ); + + let output = Command::new("podman") + .args(["machine", "ssh", machine, "--", &podman_cmd]) + .output() + .context("failed to start nbdkit erofs plugin")?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + bail!("failed to start nbdkit erofs plugin: {}", stderr.trim()); + } + + info!("waiting for nbdkit on port {}...", nbd_port); + let deadline = std::time::Instant::now() + Duration::from_secs(30); + loop { + if let Ok(mut stream) = std::net::TcpStream::connect_timeout( + &std::net::SocketAddr::from(([127, 0, 0, 1], nbd_port)), + Duration::from_millis(500), + ) { + use std::io::Read; + stream.set_read_timeout(Some(Duration::from_secs(2))).ok(); + let mut buf = [0u8; 8]; + if stream.read_exact(&mut buf).is_ok() && &buf == b"NBDMAGIC" { + break; + } + } + if std::time::Instant::now() > deadline { + let _ = Command::new("podman") + .args([ + "machine", + "ssh", + machine, + "--", + "podman", + "rm", + "-f", + &container_name, + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); + bail!( + "nbdkit erofs plugin did not become ready on port {}", + nbd_port + ); + } + std::thread::sleep(Duration::from_millis(500)); + } + + Ok(container_name) +} + +/// Find an available TCP port for NBD in range 10800-10900. +pub fn find_available_nbd_port() -> u16 { + use rand::Rng; + let mut rng = rand::rng(); + const PORT_RANGE_START: u16 = 10800; + const PORT_RANGE_END: u16 = 10900; + for _ in 0..100 { + let port = rng.random_range(PORT_RANGE_START..PORT_RANGE_END); + if std::net::TcpListener::bind(("127.0.0.1", port)).is_ok() { + return port; + } + } + for port in PORT_RANGE_START..PORT_RANGE_END { + if std::net::TcpListener::bind(("127.0.0.1", port)).is_ok() { + return port; + } + } + PORT_RANGE_START +} + +/// Stop and remove an nbdkit container (best-effort). +pub fn stop_nbdkit_container(container_name: &str) { + if let Ok(machine) = detect_machine_name() { + let _ = Command::new("podman") + .args([ + "machine", + "ssh", + &machine, + "--", + "podman", + "rm", + "-f", + container_name, + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); + } +} diff --git a/crates/kit/src/run_ephemeral_macos.rs b/crates/kit/src/run_ephemeral_macos.rs index d7fe9257f..2265aacb7 100644 --- a/crates/kit/src/run_ephemeral_macos.rs +++ b/crates/kit/src/run_ephemeral_macos.rs @@ -1,16 +1,14 @@ -//! Ephemeral VM launch flow for macOS using vfkit + SquashFS. +//! Ephemeral VM launch flow for macOS using vfkit + NBD EROFS plugin. //! -//! Boot flow: -//! 1. Extract kernel + initramfs from container image -//! 2. Create SquashFS rootfs (lz4, cached by digest) -//! 3. Decompress vmlinuz PE+zstd → uncompressed ARM64 Image -//! 4. Append bcvk units CPIO to initramfs (/etc overlay + /var tmpfs + SSH) -//! 5. Launch vfkit with virtio-blk (SquashFS) + virtio-net (gvproxy) +//! Boot flow (fully diskless): +//! 1. Mount container image overlay (`podman image mount`) +//! 2. Start nbdkit with erofs plugin (dynamically generates GPT + ESP + EROFS) +//! 3. Launch vfkit with EFI boot via NBD + virtio-net (gvproxy) +//! 4. Wait for SSH and execute commands //! //! Common helpers (gvproxy, SSH, vfkit detection) are pub for reuse by vfkit/ module. -use std::fs::{self, OpenOptions}; -use std::io::{Seek, SeekFrom, Write}; +use std::fs; use std::os::unix::net::UnixStream; use std::path::Path; use std::process::{Command, Stdio}; @@ -22,6 +20,13 @@ use color_eyre::{ }; use tracing::{debug, info}; +/// Base directory for ephemeral VM state on macOS host. +pub fn ephemeral_base_dir() -> std::path::PathBuf { + dirs::home_dir() + .unwrap_or_else(|| std::path::PathBuf::from("/tmp")) + .join(".local/share/bcvk/ephemeral") +} + // --- Data structures --- /// Metadata for a running ephemeral VM, persisted as JSON for `ps` and `ssh`. @@ -46,13 +51,19 @@ pub struct EphemeralVmMetadata { pub log_path: Option, /// ISO 8601 timestamp when the VM was created. pub created: String, + /// Name of the nbdkit podman container serving the rootfs. + #[serde(default)] + pub nbd_container: Option, + /// NBD port allocated for this VM's rootfs. + #[serde(default)] + pub nbd_port: Option, } #[allow(dead_code)] impl EphemeralVmMetadata { /// Return the directory path for ephemeral VM metadata files. pub fn vms_dir() -> std::path::PathBuf { - std::path::PathBuf::from("/private/tmp/bcvk/vms") + ephemeral_base_dir().join("vms") } /// Save metadata to a JSON file in the VMs directory. @@ -98,15 +109,10 @@ impl EphemeralVmMetadata { Ok(vms) } - /// Check if the VM process is still alive via kill -0. + /// Check if the VM process is still alive via kill(pid, 0). pub fn is_alive(&self) -> bool { - Command::new("kill") - .args(["-0", &self.pid.to_string()]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - .map(|s| s.success()) - .unwrap_or(false) + rustix::process::test_kill_process(rustix::process::Pid::from_raw(self.pid as i32).unwrap()) + .is_ok() } } @@ -167,35 +173,53 @@ pub fn parse_memory_to_mb(s: &str) -> Result { struct VmCleanup { vfkit_pid: u32, gvproxy_pid: u32, + nbd_container: Option, + image: String, vm_name: String, } impl Drop for VmCleanup { fn drop(&mut self) { tracing::debug!("cleaning up VM processes..."); - if let Err(e) = Command::new("kill") - .arg(self.vfkit_pid.to_string()) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - { + if let Some(ref name) = self.nbd_container { + crate::nbdkit_macos::stop_nbdkit_container(name); + } + if let Err(e) = rustix::process::kill_process( + rustix::process::Pid::from_raw(self.vfkit_pid as i32).unwrap(), + rustix::process::Signal::TERM, + ) { tracing::warn!("failed to kill vfkit (PID {}): {}", self.vfkit_pid, e); } - if let Err(e) = Command::new("kill") - .arg(self.gvproxy_pid.to_string()) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - { + if let Err(e) = rustix::process::kill_process( + rustix::process::Pid::from_raw(self.gvproxy_pid as i32).unwrap(), + rustix::process::Signal::TERM, + ) { tracing::warn!("failed to kill gvproxy (PID {}): {}", self.gvproxy_pid, e); } + // Release container image overlay mount + if let Ok(machine) = detect_machine_name() { + let _ = Command::new("podman") + .args([ + "machine", + "ssh", + &machine, + "--", + "podman", + "image", + "umount", + &self.image, + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); + } EphemeralVmMetadata::remove(&self.vm_name); } } // --- Main entry point --- -/// Run an ephemeral VM from a container image using vfkit + SquashFS. +/// Run an ephemeral VM from a container image using vfkit + EROFS over NBD. pub fn run(opts: RunEphemeralOpts) -> Result<()> { if opts.gui && opts.detach { bail!("--gui and --detach cannot be used together (GUI requires foreground process)"); @@ -206,9 +230,9 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { } let vfkit_bin = find_vfkit()?; - info!(image = %opts.image, "starting ephemeral VM on macOS (vfkit + SquashFS)"); + info!(image = %opts.image, "starting ephemeral VM on macOS (vfkit + EROFS)"); - let cache_base = std::path::PathBuf::from("/private/tmp/bcvk"); + let cache_base = ephemeral_base_dir(); fs::create_dir_all(&cache_base)?; let machine = detect_machine_name()?; @@ -228,121 +252,37 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { .unwrap_or_else(|| format!("ephemeral-{}", &digest_short[..8])); let ssh_key_path = cache_base.join(format!("{}-key", vm_name)); - let boot_dir = cache_base.join(format!("boot-{}", digest_short)); - fs::create_dir_all(&boot_dir)?; - let squashfs_cache = format!("/private/tmp/bcvk/rootfs-{}.squashfs", digest_short); - let squashfs_path = format!("/private/tmp/bcvk/{}-rootfs.squashfs", vm_name); - let vmlinuz_path = boot_dir.join("vmlinuz"); - let image_path = boot_dir.join("Image"); - let initramfs_orig = boot_dir.join("initramfs-orig.img"); - let initramfs_path = cache_base.join(format!("{}-initramfs.img", vm_name)); - - // Step 1+2: kernel extract + SquashFS creation (parallel) - let step2_handle = if !Path::new(&squashfs_cache).exists() { - let mc = machine.clone(); - let rf = rootful; - let img = opts.image.clone(); - let sc = squashfs_cache.clone(); - Some(std::thread::spawn(move || -> Result<()> { - info!("creating SquashFS image (lz4)..."); - create_squashfs_image(&mc, rf, &img, &sc) - })) - } else { - info!("using cached SquashFS: {}", squashfs_cache); - None - }; - - if !vmlinuz_path.exists() || !initramfs_orig.exists() { - info!("extracting kernel and initramfs..."); - extract_kernel(&machine, &opts.image, &boot_dir)?; - fs::rename(boot_dir.join("initramfs.img"), &initramfs_orig)?; - } - - // Step 3+4: kernel decompress + CPIO append (parallel after Step 1) - let step3_handle = if !image_path.exists() { - let vp = vmlinuz_path.clone(); - let ip = image_path.clone(); - Some(std::thread::spawn(move || -> Result<()> { - info!("decompressing kernel (vmlinuz → Image)..."); - extract_uncompressed_kernel(&vp, &ip) - })) - } else { - None - }; - - fs::copy(&initramfs_orig, &initramfs_path)?; - { - let cpio_data = crate::cpio::create_initramfs_units_cpio() - .map_err(|e| eyre!("failed to create CPIO: {e}"))?; - let mut f = OpenOptions::new().append(true).open(&initramfs_path)?; - let sz = f.seek(SeekFrom::End(0))?; - let pad = sz.next_multiple_of(4) - sz; - if pad > 0 { - f.write_all(&vec![0u8; pad as usize])?; - } - f.write_all(&cpio_data)?; + fs::create_dir_all(&cache_base)?; - if opts.ssh_keygen || !opts.execute.is_empty() { - info!("generating SSH keypair..."); - let _ = fs::remove_file(&ssh_key_path); - let _ = fs::remove_file(ssh_key_path.with_extension("pub")); - let status = Command::new("ssh-keygen") - .args([ - "-t", - "ed25519", - "-f", - &ssh_key_path.to_string_lossy(), - "-N", - "", - "-q", - ]) - .status()?; - if !status.success() { - bail!("ssh-keygen failed (exit code: {:?})", status.code()); - } - let pubkey = fs::read_to_string(ssh_key_path.with_extension("pub"))?; - let ssh_cpio = create_ssh_setup_cpio(pubkey.trim())?; - let pos = f.seek(SeekFrom::End(0))?; - let pad = pos.next_multiple_of(4) - pos; - if pad > 0 { - f.write_all(&vec![0u8; pad as usize])?; - } - f.write_all(&ssh_cpio)?; + // Generate SSH keypair on macOS host + let mut ssh_pubkey = String::new(); + if opts.ssh_keygen || !opts.execute.is_empty() { + info!("generating SSH keypair..."); + let _ = fs::remove_file(&ssh_key_path); + let _ = fs::remove_file(ssh_key_path.with_extension("pub")); + let status = Command::new("ssh-keygen") + .args([ + "-t", + "ed25519", + "-f", + &ssh_key_path.to_string_lossy(), + "-N", + "", + "-q", + ]) + .status()?; + if !status.success() { + bail!("ssh-keygen failed"); } - info!("initramfs prepared"); - } - - if let Some(h) = step3_handle { - h.join() - .map_err(|_| eyre!("kernel decompression thread panicked"))??; - } - if let Some(h) = step2_handle { - h.join() - .map_err(|_| eyre!("squashfs creation thread panicked"))??; + ssh_pubkey = fs::read_to_string(ssh_key_path.with_extension("pub"))? + .trim() + .to_string(); } - // CoW clone SquashFS for this VM (allows concurrent use of same image) - let _ = fs::remove_file(&squashfs_path); - let clone_status = Command::new("cp") - .args(["-c", &squashfs_cache, &squashfs_path]) - .status() - .context("cloning SquashFS")?; - if !clone_status.success() { - fs::copy(&squashfs_cache, &squashfs_path).context("copying SquashFS")?; - } - - // 5. gvproxy + vfkit - let gvproxy_sock = cache_base.join(format!("{}-gvproxy.sock", vm_name)); - let services_sock = cache_base.join(format!("{}-gvproxy-svc.sock", vm_name)); - let gvproxy_sock_str = gvproxy_sock.to_string_lossy().to_string(); - let services_sock_str = services_sock.to_string_lossy().to_string(); - info!("starting gvproxy..."); - let mut gvproxy_child = start_gvproxy(&gvproxy_sock_str, &services_sock_str)?; - let mut cmdline_parts: Vec<&str> = vec![ - "root=/dev/vda", + "root=/dev/vda2", "ro", - "rootfstype=squashfs", + "rootfstype=erofs", "console=tty0", "console=hvc0", "loglevel=4", @@ -354,18 +294,39 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { cmdline_parts.extend(&user_args); let cmdline = cmdline_parts.join(" "); + // Get container image merged overlay path + let merged_path = crate::nbdkit_macos::get_merged_path(&machine, rootful, &opts.image)?; + info!("overlay merged: {}", merged_path); + + // Start nbdkit with erofs plugin (dynamic EROFS + ESP + GPT from overlay dir) + let nbd_port = crate::nbdkit_macos::find_available_nbd_port(); + let nbd_container_name = crate::nbdkit_macos::start_nbdkit_erofs_plugin( + &machine, + &merged_path, + &cmdline, + &ssh_pubkey, + nbd_port, + &vm_name, + )?; + std::thread::sleep(Duration::from_millis(500)); + info!("nbdkit ready on port {}", nbd_port); + + // gvproxy + vfkit (EFI boot) + let gvproxy_sock = cache_base.join(format!("{}-gvproxy.sock", vm_name)); + let services_sock = cache_base.join(format!("{}-gvproxy-svc.sock", vm_name)); + let gvproxy_sock_str = gvproxy_sock.to_string_lossy().to_string(); + let services_sock_str = services_sock.to_string_lossy().to_string(); + info!("starting gvproxy..."); + let mut gvproxy_child = start_gvproxy(&gvproxy_sock_str, &services_sock_str)?; + let mac = generate_mac(); let mac_str = format!( "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] ); - let bootloader_arg = format!( - "linux,kernel={},initrd={},cmdline=\"{}\"", - image_path.display(), - initramfs_path.display(), - cmdline - ); + let efi_var_store = cache_base.join(format!("{}-efi-vars", vm_name)); + let bootloader_arg = format!("efi,variable-store={},create", efi_var_store.display()); let vcpus = opts.vcpus.unwrap_or_else(default_vcpus); let memory_mb = parse_memory_to_mb(&opts.memory)?; @@ -378,7 +339,10 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { "--bootloader".to_string(), bootloader_arg, "--device".to_string(), - format!("virtio-blk,path={}", squashfs_path), + format!( + "nbd,uri=nbd://127.0.0.1:{}/,readonly,timeout=5000,deviceId=rootfs", + nbd_port + ), "--device".to_string(), format!( "virtio-net,unixSocketPath={},mac={}", @@ -387,6 +351,13 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { "--device".to_string(), "virtio-rng".to_string(), ]; + + let serial_log = cache_base.join(format!("{}-serial.log", vm_name)); + vfkit_args.extend([ + "--device".to_string(), + format!("virtio-serial,logFilePath={}", serial_log.display()), + ]); + if opts.gui { vfkit_args.push("--gui".to_string()); } @@ -411,15 +382,19 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { gvproxy_pid: gvproxy_child.id(), ssh_port, ssh_key: ssh_key_path.to_string_lossy().to_string(), - serial_log: String::new(), + serial_log: serial_log.to_string_lossy().to_string(), log_path: None, created: chrono::Utc::now().to_rfc3339(), + nbd_container: Some(nbd_container_name.clone()), + nbd_port: Some(nbd_port), }; metadata.save()?; let _cleanup = VmCleanup { vfkit_pid: vfkit_child.id(), gvproxy_pid: gvproxy_child.id(), + nbd_container: Some(nbd_container_name.clone()), + image: opts.image.clone(), vm_name: vm_name.clone(), }; @@ -472,15 +447,31 @@ pub fn run(opts: RunEphemeralOpts) -> Result<()> { std::mem::forget(_cleanup); let status = vfkit_child.wait()?; info!("vfkit exited: {}", status); + crate::nbdkit_macos::stop_nbdkit_container(&nbd_container_name); if let Err(e) = gvproxy_child.kill() { tracing::debug!("failed to kill gvproxy: {}", e); } + // Release container image overlay mount + let _ = Command::new("podman") + .args([ + "machine", + "ssh", + &machine, + "--", + "podman", + "image", + "umount", + &opts.image, + ]) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(); EphemeralVmMetadata::remove(&vm_name); Ok(()) } fn run_detached(opts: &RunEphemeralOpts) -> Result<()> { - let cache_base = std::path::PathBuf::from("/private/tmp/bcvk"); + let cache_base = ephemeral_base_dir(); fs::create_dir_all(&cache_base)?; let digest = ensure_image_and_get_digest(&opts.image)?; let digest_short = &digest[..16.min(digest.len())]; @@ -524,128 +515,18 @@ fn run_detached(opts: &RunEphemeralOpts) -> Result<()> { serial_log: String::new(), log_path: Some(log_path.to_string_lossy().to_string()), created: chrono::Utc::now().to_rfc3339(), + nbd_container: None, + nbd_port: None, }; metadata.save()?; println!("{}", vm_name); Ok(()) } -// --- SSH setup CPIO --- - -fn create_ssh_setup_cpio(pubkey: &str) -> Result> { - use cpio::newc::Builder as NewcBuilder; - let mut buf = Vec::new(); - - let script = format!( - "#!/bin/bash\n\ - mkdir -p /sysroot/var/roothome/.ssh\n\ - chmod 700 /sysroot/var/roothome/.ssh\n\ - echo '{}' > /sysroot/var/roothome/.ssh/authorized_keys\n\ - chmod 600 /sysroot/var/roothome/.ssh/authorized_keys\n\ - chown -R 0:0 /sysroot/var/roothome/.ssh\n", - pubkey - ); - - let service = "[Unit]\n\ - Description=Setup SSH authorized_keys for root\n\ - DefaultDependencies=no\n\ - ConditionPathExists=/etc/initrd-release\n\ - Before=initrd-fs.target\n\ - After=bcvk-var-ephemeral.service\n\ - Requires=bcvk-var-ephemeral.service\n\ - \n\ - [Service]\n\ - Type=oneshot\n\ - RemainAfterExit=yes\n\ - ExecStart=/usr/bin/bash /usr/lib/bcvk/setup-ssh.sh\n"; - - let dropin = "[Unit]\nWants=bcvk-ssh-setup.service\n"; - - let write_entry = - |buf: &mut Vec, path: &str, data: &[u8], executable: bool| -> std::io::Result<()> { - let mode = if executable { 0o100755 } else { 0o100644 }; - let builder = NewcBuilder::new(path).mode(mode).uid(0).gid(0); - let mut writer = builder.write(buf, data.len() as u32); - writer.write_all(data)?; - writer.finish()?; - Ok(()) - }; - - let write_dir = |buf: &mut Vec, path: &str| -> std::io::Result<()> { - NewcBuilder::new(path) - .mode(0o040755) - .uid(0) - .gid(0) - .write(buf, 0) - .finish()?; - Ok(()) - }; - - write_dir(&mut buf, "usr/lib/bcvk")?; - write_entry( - &mut buf, - "usr/lib/bcvk/setup-ssh.sh", - script.as_bytes(), - true, - )?; - write_entry( - &mut buf, - "usr/lib/systemd/system/bcvk-ssh-setup.service", - service.as_bytes(), - false, - )?; - write_entry( - &mut buf, - "usr/lib/systemd/system/initrd-fs.target.d/bcvk-ssh-setup.conf", - dropin.as_bytes(), - false, - )?; - cpio::newc::trailer(&mut buf).map_err(|e| eyre!("cpio trailer: {e}"))?; - Ok(buf) -} - -// --- vfkit kernel decompression --- - -fn extract_uncompressed_kernel(vmlinuz_path: &Path, output_path: &Path) -> Result<()> { - let data = fs::read(vmlinuz_path)?; - - // Parse zboot header: offset 0x08 = payload_offset (le32), 0x0c = payload_size (le32) - let (pos, payload_end) = if data.len() >= 16 && &data[4..8] == b"zimg" { - let payload_offset = u32::from_le_bytes(data[8..12].try_into().unwrap()) as usize; - let payload_size = u32::from_le_bytes(data[12..16].try_into().unwrap()) as usize; - if payload_offset + payload_size > data.len() { - bail!("zboot payload extends beyond file"); - } - info!( - "zboot header: payload at 0x{:x}, size 0x{:x}", - payload_offset, payload_size - ); - (payload_offset, payload_offset + payload_size) - } else { - let magic = [0x28u8, 0xb5, 0x2f, 0xfd]; - let p = data - .windows(4) - .position(|w| w == magic) - .ok_or_else(|| eyre!("zstd magic not found in vmlinuz"))?; - info!("zstd payload at offset 0x{:x} (no zboot header)", p); - (p, data.len()) - }; - - let mut kernel = Vec::new(); - zstd::stream::copy_decode(&data[pos..payload_end], &mut kernel) - .context("decompressing zstd payload from vmlinuz")?; - - if kernel.len() < 0x3c || &kernel[0x38..0x3c] != b"ARMd" { - bail!("decompressed kernel is not a valid ARM64 Image"); - } - fs::write(output_path, &kernel)?; - info!("decompressed kernel: {} bytes (ARM64 Image)", kernel.len()); - Ok(()) -} - // --- Shared helpers (pub for vfkit/ module) --- -fn detect_machine_name() -> Result { +/// Detect the name of the running podman machine. +pub fn detect_machine_name() -> Result { let output = Command::new("podman") .args(["machine", "info", "--format", "{{.Host.CurrentMachine}}"]) .output()?; @@ -679,34 +560,6 @@ fn ensure_image_and_get_digest(image: &str) -> Result { Ok(digest.trim_start_matches("sha256:").to_string()) } -fn extract_kernel(machine: &str, image: &str, boot_dir: &Path) -> Result<()> { - let boot_dir_str = boot_dir.to_string_lossy(); - let script = format!( - "KVER=$(podman run --rm {image} ls /usr/lib/modules/ | head -1) && \ - [ -n \"$KVER\" ] && \ - podman run --rm {image} cat /usr/lib/modules/$KVER/vmlinuz > {boot}/vmlinuz && \ - podman run --rm {image} cat /usr/lib/modules/$KVER/initramfs.img > {boot}/initramfs.img", - image = image, - boot = boot_dir_str - ); - let output = Command::new("podman") - .args(["machine", "ssh", machine, &script]) - .output() - .context("extracting kernel from container image")?; - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - bail!( - "No kernel found in image '{}'.\n\ - Checked: /usr/lib/modules//vmlinuz + initramfs.img\n\ - This image may not be a bootable container (bootc) image.\n\ - {}", - image, - stderr.trim() - ); - } - Ok(()) -} - fn is_machine_rootful(machine: &str) -> bool { Command::new("podman") .args(["machine", "ssh", machine, "id", "-u"]) @@ -715,38 +568,6 @@ fn is_machine_rootful(machine: &str) -> bool { .unwrap_or(false) } -fn create_squashfs_image( - machine: &str, - rootful: bool, - image: &str, - output_path: &str, -) -> Result<()> { - let script = if rootful { - format!( - "MERGED=$(podman image mount {}) && \ - mksquashfs $MERGED {} -noappend -comp lz4 -b 1M -quiet", - image, output_path - ) - } else { - info!("rootless mode: using podman unshare for SquashFS creation"); - format!( - "podman unshare sh -c 'MERGED=$(podman image mount {}) && \ - mksquashfs $MERGED {} -noappend -comp lz4 -b 1M -quiet'", - image, output_path - ) - }; - - let output = Command::new("podman") - .args(["machine", "ssh", machine, &script]) - .output() - .context("running mksquashfs")?; - if !output.status.success() { - let stderr = String::from_utf8_lossy(&output.stderr); - bail!("mksquashfs failed: {}", stderr.trim()); - } - Ok(()) -} - /// Clear extended attributes from a file. /// /// Apple Virtualization.framework rejects disk images with xattrs like @@ -1018,12 +839,15 @@ mod tests { serial_log: "/tmp/test-serial.log".to_string(), log_path: Some("/tmp/test-vfkit.log".to_string()), created: "2026-01-01T00:00:00Z".to_string(), + nbd_container: Some("bcvk-nbd-test-vm".to_string()), + nbd_port: Some(10841), }; let json = serde_json::to_string_pretty(&meta).unwrap(); let loaded: EphemeralVmMetadata = serde_json::from_str(&json).unwrap(); assert_eq!(loaded.name, "test-vm"); assert_eq!(loaded.image, "quay.io/fedora/fedora-bootc:42"); assert_eq!(loaded.pid, 12345); + assert_eq!(loaded.nbd_container.as_deref(), Some("bcvk-nbd-test-vm")); assert_eq!(loaded.ssh_port, 2222); assert_eq!(loaded.log_path.as_deref(), Some("/tmp/test-vfkit.log")); } @@ -1042,6 +866,8 @@ mod tests { serial_log: "/tmp/serial.log".to_string(), log_path: None, created: "2026-05-04T00:00:00Z".to_string(), + nbd_container: None, + nbd_port: None, }; fs::write(&json_path, serde_json::to_string_pretty(&meta).unwrap()).unwrap(); let data = fs::read_to_string(&json_path).unwrap(); @@ -1067,6 +893,8 @@ mod tests { serial_log: "/tmp/serial.log".to_string(), log_path: None, created: "2026-01-01T00:00:00Z".to_string(), + nbd_container: Some(format!("bcvk-nbd-vm-{i}")), + nbd_port: Some(10800 + i as u16), }; let path = dir.path().join(format!("vm-{i}.json")); fs::write(&path, serde_json::to_string(&meta).unwrap()).unwrap(); diff --git a/crates/kit/src/vfkit/mod.rs b/crates/kit/src/vfkit/mod.rs index 62939254a..2062851d5 100644 --- a/crates/kit/src/vfkit/mod.rs +++ b/crates/kit/src/vfkit/mod.rs @@ -5,7 +5,6 @@ use std::fs; use std::path::PathBuf; -use std::process::{Command, Stdio}; use clap::Subcommand; use color_eyre::Result; @@ -169,18 +168,15 @@ impl VmMetadata { Ok(vms) } - /// Check if the VM process is still alive via kill -0. + /// Check if the VM process is still alive via kill(pid, 0). pub fn is_alive(&self) -> bool { if self.vfkit_pid == 0 { return false; } - Command::new("kill") - .args(["-0", &self.vfkit_pid.to_string()]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - .map(|s| s.success()) - .unwrap_or(false) + rustix::process::test_kill_process( + rustix::process::Pid::from_raw(self.vfkit_pid as i32).unwrap(), + ) + .is_ok() } } diff --git a/crates/kit/src/vfkit/stop.rs b/crates/kit/src/vfkit/stop.rs index 24ea6ceba..52c69fb51 100644 --- a/crates/kit/src/vfkit/stop.rs +++ b/crates/kit/src/vfkit/stop.rs @@ -1,6 +1,5 @@ //! vm stop — Stop a running persistent VM. -use std::process::{Command, Stdio}; use std::time::Duration; use super::VmMetadata; @@ -17,34 +16,23 @@ pub fn run(name: &str) -> Result<()> { info!("stopping VM '{}'...", name); if meta.vfkit_pid > 0 { - if let Err(e) = Command::new("kill") - .args(["-TERM", &meta.vfkit_pid.to_string()]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - { + let pid = rustix::process::Pid::from_raw(meta.vfkit_pid as i32).unwrap(); + if let Err(e) = rustix::process::kill_process(pid, rustix::process::Signal::TERM) { tracing::debug!("failed to SIGTERM vfkit (PID {}): {}", meta.vfkit_pid, e); } std::thread::sleep(Duration::from_secs(3)); if meta.is_alive() { - if let Err(e) = Command::new("kill") - .args(["-KILL", &meta.vfkit_pid.to_string()]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - { + if let Err(e) = rustix::process::kill_process(pid, rustix::process::Signal::KILL) { tracing::debug!("failed to SIGKILL vfkit (PID {}): {}", meta.vfkit_pid, e); } } } if meta.gvproxy_pid > 0 { - if let Err(e) = Command::new("kill") - .args(["-KILL", &meta.gvproxy_pid.to_string()]) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .status() - { + if let Err(e) = rustix::process::kill_process( + rustix::process::Pid::from_raw(meta.gvproxy_pid as i32).unwrap(), + rustix::process::Signal::KILL, + ) { tracing::debug!( "failed to SIGKILL gvproxy (PID {}): {}", meta.gvproxy_pid, diff --git a/crates/nbdkit-erofs-plugin/Cargo.lock b/crates/nbdkit-erofs-plugin/Cargo.lock new file mode 100644 index 000000000..b5064fd23 --- /dev/null +++ b/crates/nbdkit-erofs-plugin/Cargo.lock @@ -0,0 +1,39 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cpio" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938e716cb1ade5d6c8f959c13a7248b889c07491fc7e41167c3afe20f8f0de1e" + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "nbdkit-erofs-plugin" +version = "0.1.0" +dependencies = [ + "cpio", + "crc32fast", + "libc", +] diff --git a/crates/nbdkit-erofs-plugin/Cargo.toml b/crates/nbdkit-erofs-plugin/Cargo.toml new file mode 100644 index 000000000..0f645c08c --- /dev/null +++ b/crates/nbdkit-erofs-plugin/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "nbdkit-erofs-plugin" +version = "0.1.0" +edition = "2021" +publish = false + +[lib] +crate-type = ["cdylib"] + +[dependencies] +libc = "0.2" +cpio = "0.4" +crc32fast = "1.4" diff --git a/crates/nbdkit-erofs-plugin/src/dir_walk.rs b/crates/nbdkit-erofs-plugin/src/dir_walk.rs new file mode 100644 index 000000000..674556d4b --- /dev/null +++ b/crates/nbdkit-erofs-plugin/src/dir_walk.rs @@ -0,0 +1,138 @@ +use std::ffi::OsString; +use std::fs; +use std::os::unix::fs::MetadataExt; +use std::path::{Path, PathBuf}; + +#[derive(Debug)] +pub struct FileEntry { + pub host_path: PathBuf, + pub size: u64, + pub mode: u32, + pub uid: u32, + pub gid: u32, + pub mtime: u64, + pub nlink: u32, + pub inode_id: u64, +} + +#[derive(Debug)] +pub struct DirInfo { + pub name: OsString, + pub mode: u32, + pub uid: u32, + pub gid: u32, + pub mtime: u64, + pub inode_id: u64, + pub parent_inode_id: u64, + pub children: Vec, +} + +#[derive(Debug)] +pub struct SymlinkEntry { + pub name: Vec, + pub target: Vec, + pub mode: u32, + pub uid: u32, + pub gid: u32, + pub mtime: u64, + pub inode_id: u64, +} + +/// Child entry in a directory: either a file index, dir index, or symlink index +#[derive(Debug, Clone, Copy)] +pub enum ChildRef { + File(usize), + Dir(usize), + Symlink(usize), +} + +#[derive(Debug)] +pub struct WalkResult { + pub dirs: Vec, + pub files: Vec, + pub symlinks: Vec, +} + +pub fn walk_directory(root: &Path) -> std::io::Result { + let mut result = WalkResult { + dirs: Vec::new(), + files: Vec::new(), + symlinks: Vec::new(), + }; + let mut next_inode: u64 = 0; + + walk_recursive(root, root, &mut result, &mut next_inode, 0)?; + Ok(result) +} + +fn walk_recursive( + root: &Path, + dir: &Path, + result: &mut WalkResult, + next_inode: &mut u64, + parent_inode_id: u64, +) -> std::io::Result { + let meta = fs::symlink_metadata(dir)?; + let dir_inode = *next_inode; + *next_inode += 1; + + let di = result.dirs.len(); + result.dirs.push(DirInfo { + name: dir.file_name().unwrap_or_default().to_os_string(), + mode: meta.mode(), + uid: meta.uid(), + gid: meta.gid(), + mtime: meta.mtime() as u64, + inode_id: dir_inode, + parent_inode_id, + children: Vec::new(), + }); + + let mut entries: Vec<_> = fs::read_dir(dir)?.filter_map(|e| e.ok()).collect(); + entries.sort_by_key(|e| e.file_name()); + + for entry in entries { + let path = entry.path(); + let meta = fs::symlink_metadata(&path)?; + let ft = meta.file_type(); + + if ft.is_dir() { + let child_di = walk_recursive(root, &path, result, next_inode, dir_inode)?; + result.dirs[di].children.push(ChildRef::Dir(child_di)); + } else if ft.is_symlink() { + let target = fs::read_link(&path)?; + let target_bytes = target.as_os_str().as_encoded_bytes().to_vec(); + let name = entry.file_name().as_encoded_bytes().to_vec(); + let si = result.symlinks.len(); + let inode = *next_inode; + *next_inode += 1; + result.symlinks.push(SymlinkEntry { + name, + target: target_bytes, + mode: meta.mode(), + uid: meta.uid(), + gid: meta.gid(), + mtime: meta.mtime() as u64, + inode_id: inode, + }); + result.dirs[di].children.push(ChildRef::Symlink(si)); + } else if ft.is_file() { + let fi = result.files.len(); + let inode = *next_inode; + *next_inode += 1; + result.files.push(FileEntry { + host_path: path, + size: meta.len(), + mode: meta.mode(), + uid: meta.uid(), + gid: meta.gid(), + mtime: meta.mtime() as u64, + nlink: meta.nlink() as u32, + inode_id: inode, + }); + result.dirs[di].children.push(ChildRef::File(fi)); + } + } + + Ok(di) +} diff --git a/crates/nbdkit-erofs-plugin/src/erofs.rs b/crates/nbdkit-erofs-plugin/src/erofs.rs new file mode 100644 index 000000000..a795b076a --- /dev/null +++ b/crates/nbdkit-erofs-plugin/src/erofs.rs @@ -0,0 +1,502 @@ +use crate::dir_walk::{ChildRef, DirInfo, WalkResult}; +use crate::regions::{Region, RegionType}; +use std::sync::Arc; + +const EROFS_MAGIC: u32 = 0xE0F5E1E2; +const BLOCK_SIZE: u64 = 4096; +const BLOCK_BITS: u8 = 12; +const SUPERBLOCK_OFFSET: u64 = 1024; + +// EROFS inode formats +const EROFS_INODE_LAYOUT_COMPACT: u16 = 0; + +// EROFS data layouts +const EROFS_INODE_FLAT_PLAIN: u16 = 0; + +// EROFS file types (matching Linux DT_* values) +const EROFS_FT_REG_FILE: u8 = 1; +const EROFS_FT_DIR: u8 = 2; +const EROFS_FT_SYMLINK: u8 = 7; + +#[derive(Debug)] +pub struct FileRegion { + pub file_index: usize, + pub offset_in_erofs: u64, + pub size: u64, +} + +#[derive(Debug)] +pub struct ErofsLayout { + pub metadata: Vec, + pub file_regions: Vec, + pub total_size: u64, +} + +struct DirEntryOnDisk { + nid: u64, + file_type: u8, + name: Vec, +} + +pub fn build_erofs(walk: &WalkResult) -> std::io::Result { + let total_inodes = walk.dirs.len() + walk.files.len() + walk.symlinks.len(); + + // Phase 1: Assign inode positions + // Inodes start at block 1 (block 0 has superblock) + let inode_table_offset = BLOCK_SIZE; // block 1 + let inode_size: u64 = 32; // compact inode + let inode_table_size = align_up(total_inodes as u64 * inode_size, BLOCK_SIZE); + + // Phase 2: Build directory entry blocks + let dir_blocks_offset = inode_table_offset + inode_table_size; + let mut dir_data: Vec = Vec::new(); + let mut dir_block_offsets: Vec = Vec::new(); // per-directory offset in dir_data + + for dir in &walk.dirs { + let offset = align_up(dir_data.len() as u64, BLOCK_SIZE); + dir_data.resize(offset as usize, 0); + dir_block_offsets.push(dir_blocks_offset + offset); + + let mut entries = Vec::new(); + + // "." entry + entries.push(DirEntryOnDisk { + nid: dir.inode_id, + file_type: EROFS_FT_DIR, + name: b".".to_vec(), + }); + + // ".." entry (root points to self) + let parent_nid = dir.parent_inode_id; + entries.push(DirEntryOnDisk { + nid: parent_nid, + file_type: EROFS_FT_DIR, + name: b"..".to_vec(), + }); + + // children (sorted by name in walk) + for child in &dir.children { + match child { + ChildRef::Dir(di) => { + let child_dir = &walk.dirs[*di]; + entries.push(DirEntryOnDisk { + nid: child_dir.inode_id, + file_type: EROFS_FT_DIR, + name: child_dir.name.as_encoded_bytes().to_vec(), + }); + } + ChildRef::File(fi) => { + let file = &walk.files[*fi]; + entries.push(DirEntryOnDisk { + nid: file.inode_id, + file_type: EROFS_FT_REG_FILE, + name: file + .host_path + .file_name() + .unwrap_or_default() + .as_encoded_bytes() + .to_vec(), + }); + } + ChildRef::Symlink(si) => { + let symlink = &walk.symlinks[*si]; + entries.push(DirEntryOnDisk { + nid: symlink.inode_id, + file_type: EROFS_FT_SYMLINK, + name: symlink.name.clone(), + }); + } + } + } + + // Write EROFS directory blocks (splits at 4096-byte boundaries) + write_dir_blocks(&mut dir_data, &entries); + } + let dir_data_size = align_up(dir_data.len() as u64, BLOCK_SIZE); + dir_data.resize(dir_data_size as usize, 0); + + // Phase 3: Compute data region layout + let data_offset = dir_blocks_offset + dir_data_size; + let mut file_regions = Vec::new(); + let mut current_data_offset = data_offset; + + for (i, file) in walk.files.iter().enumerate() { + if file.size > 0 { + let aligned_offset = align_up(current_data_offset, BLOCK_SIZE); + file_regions.push(FileRegion { + file_index: i, + offset_in_erofs: aligned_offset, + size: file.size, + }); + current_data_offset = aligned_offset + align_up(file.size, BLOCK_SIZE); + } + } + + // Symlink targets also need data blocks + for (si, symlink) in walk.symlinks.iter().enumerate() { + if !symlink.target.is_empty() { + let aligned_offset = align_up(current_data_offset, BLOCK_SIZE); + file_regions.push(FileRegion { + file_index: walk.files.len() + si, // files.len() + symlink index + offset_in_erofs: aligned_offset, + size: symlink.target.len() as u64, + }); + current_data_offset = + aligned_offset + align_up(symlink.target.len() as u64, BLOCK_SIZE); + } + } + + let total_size = align_up(current_data_offset, BLOCK_SIZE); + let total_blocks = total_size / BLOCK_SIZE; + + // Phase 4: Build metadata blob + let mut metadata = vec![0u8; (dir_blocks_offset + dir_data_size) as usize]; + + // Write superblock at offset 1024 + write_superblock( + &mut metadata, + total_inodes as u32, + total_blocks as u32, + 0, // root nid + ); + + // Write inodes + // Directories + for (i, dir) in walk.dirs.iter().enumerate() { + let dir_size = compute_dir_size(dir, walk); + let dir_block = (dir_block_offsets[i] - dir_blocks_offset) / BLOCK_SIZE; + write_compact_inode( + &mut metadata, + inode_table_offset as usize + (dir.inode_id as usize * 32), + 0o040000 | (dir.mode & 0o7777), + dir.uid as u16, + dir.gid as u16, + dir_size as u32, + dir.mtime as u32, + 2 + dir + .children + .iter() + .filter(|c| matches!(c, ChildRef::Dir(_))) + .count() as u16, + EROFS_INODE_FLAT_PLAIN, + (dir_blocks_offset / BLOCK_SIZE + dir_block) as u32, + ); + } + + // Regular files + for (i, file) in walk.files.iter().enumerate() { + let data_block = if file.size > 0 { + let fr = file_regions.iter().find(|r| r.file_index == i); + fr.map(|r| (r.offset_in_erofs / BLOCK_SIZE) as u32) + .unwrap_or(0) + } else { + 0 + }; + write_compact_inode( + &mut metadata, + inode_table_offset as usize + (file.inode_id as usize * 32), + 0o100000 | (file.mode & 0o7777), + file.uid as u16, + file.gid as u16, + file.size as u32, + file.mtime as u32, + file.nlink as u16, + EROFS_INODE_FLAT_PLAIN, + data_block, + ); + } + + // Symlinks: FlatPlain with target in data region + // File regions for symlinks start after file regions + let file_region_count = walk.files.iter().filter(|f| f.size > 0).count(); + let mut sym_fr_idx = file_region_count; + for symlink in &walk.symlinks { + let data_block = if !symlink.target.is_empty() { + let fr = &file_regions[sym_fr_idx]; + sym_fr_idx += 1; + (fr.offset_in_erofs / BLOCK_SIZE) as u32 + } else { + 0 + }; + + write_compact_inode( + &mut metadata, + inode_table_offset as usize + (symlink.inode_id as usize * 32), + 0o120000 | (symlink.mode & 0o7777), + symlink.uid as u16, + symlink.gid as u16, + symlink.target.len() as u32, + symlink.mtime as u32, + 1, + EROFS_INODE_FLAT_PLAIN, + data_block, + ); + } + + // Write directory data + let dir_start = dir_blocks_offset as usize; + if dir_start + dir_data.len() <= metadata.len() { + metadata[dir_start..dir_start + dir_data.len()].copy_from_slice(&dir_data); + } + + Ok(ErofsLayout { + metadata, + file_regions, + total_size, + }) +} + +fn write_superblock(buf: &mut [u8], inodes: u32, blocks: u32, root_nid: u16) { + let off = SUPERBLOCK_OFFSET as usize; + // magic + buf[off..off + 4].copy_from_slice(&EROFS_MAGIC.to_le_bytes()); + // checksum (unused) + // feature_compat + buf[off + 8..off + 12].copy_from_slice(&0u32.to_le_bytes()); + // blkszbits + buf[off + 12] = BLOCK_BITS; + // sb_extslots + buf[off + 13] = 0; + // root_nid + buf[off + 14..off + 16].copy_from_slice(&root_nid.to_le_bytes()); + // inos + buf[off + 16..off + 24].copy_from_slice(&(inodes as u64).to_le_bytes()); + // build_time + buf[off + 24..off + 32].copy_from_slice(&0u64.to_le_bytes()); + // build_time_nsec + buf[off + 32..off + 36].copy_from_slice(&0u32.to_le_bytes()); + // blocks + buf[off + 36..off + 40].copy_from_slice(&blocks.to_le_bytes()); + // meta_blkaddr (inode table starts at block 1) + buf[off + 40..off + 44].copy_from_slice(&1u32.to_le_bytes()); + // xattr_blkaddr + buf[off + 44..off + 48].copy_from_slice(&0u32.to_le_bytes()); + // uuid (16 bytes) + // volume_name (16 bytes) + // feature_incompat + buf[off + 80..off + 84].copy_from_slice(&0u32.to_le_bytes()); + // available_compr_algs (union with checksum) + // lz4_max_distance +} + +fn write_compact_inode( + buf: &mut [u8], + offset: usize, + mode: u32, + uid: u16, + gid: u16, + size: u32, + _mtime: u32, + nlink: u16, + data_layout: u16, + u_field: u32, +) { + if offset + 32 > buf.len() { + return; + } + + // format: layout(compact=0) | data_layout << 1 + let format = (EROFS_INODE_LAYOUT_COMPACT) | (data_layout << 1); + buf[offset..offset + 2].copy_from_slice(&format.to_le_bytes()); + // xattr_icount + buf[offset + 2..offset + 4].copy_from_slice(&0u16.to_le_bytes()); + // mode + buf[offset + 4..offset + 6].copy_from_slice(&(mode as u16).to_le_bytes()); + // nlink + buf[offset + 6..offset + 8].copy_from_slice(&nlink.to_le_bytes()); + // size + buf[offset + 8..offset + 12].copy_from_slice(&size.to_le_bytes()); + // reserved + buf[offset + 12..offset + 16].copy_from_slice(&0u32.to_le_bytes()); + // u (union: raw_blkaddr for FlatPlain) + buf[offset + 16..offset + 20].copy_from_slice(&u_field.to_le_bytes()); + // ino (on-disk inode number, optional) + buf[offset + 20..offset + 24].copy_from_slice(&0u32.to_le_bytes()); + // uid + buf[offset + 24..offset + 26].copy_from_slice(&uid.to_le_bytes()); + // gid + buf[offset + 26..offset + 28].copy_from_slice(&gid.to_le_bytes()); + // reserved2 + buf[offset + 28..offset + 32].copy_from_slice(&0u32.to_le_bytes()); +} + +fn write_dir_blocks(buf: &mut Vec, entries: &[DirEntryOnDisk]) { + // EROFS directories are split into 4096-byte blocks. + // Each block contains: [headers...][names...] + // header = 12 bytes: nid(8) + nameoff(2) + file_type(1) + reserved(1) + // nameoff is relative to block start. + + let mut remaining = entries; + + while !remaining.is_empty() { + // Determine how many entries fit in this block + let mut count = 0; + let mut total_size: usize = 0; + for entry in remaining { + let entry_size = 12 + entry.name.len(); + if total_size + entry_size > BLOCK_SIZE as usize && count > 0 { + break; + } + total_size += entry_size; + count += 1; + } + + let block_entries = &remaining[..count]; + remaining = &remaining[count..]; + + // Write headers + let header_total = 12 * block_entries.len(); + let mut nameoff = header_total as u16; + for entry in block_entries { + buf.extend_from_slice(&(entry.nid as u64).to_le_bytes()); + buf.extend_from_slice(&nameoff.to_le_bytes()); + buf.push(entry.file_type); + buf.push(0); + nameoff += entry.name.len() as u16; + } + + // Write names + for entry in block_entries { + buf.extend_from_slice(&entry.name); + } + + // Pad to block boundary (except last block which is sized by inode.size) + if !remaining.is_empty() { + let written = total_size; + let pad = BLOCK_SIZE as usize - (written % BLOCK_SIZE as usize); + if pad < BLOCK_SIZE as usize { + buf.resize(buf.len() + pad, 0); + } + } + } +} + +fn compute_dir_size(dir: &DirInfo, walk: &WalkResult) -> u64 { + // Build entry list to accurately compute size including block splits + let mut entries = Vec::new(); + entries.push(DirEntryOnDisk { + nid: 0, + file_type: EROFS_FT_DIR, + name: b".".to_vec(), + }); + entries.push(DirEntryOnDisk { + nid: 0, + file_type: EROFS_FT_DIR, + name: b"..".to_vec(), + }); + for child in &dir.children { + let name_len = match child { + ChildRef::Dir(di) => walk.dirs[*di].name.len(), + ChildRef::File(fi) => walk.files[*fi] + .host_path + .file_name() + .unwrap_or_default() + .len(), + ChildRef::Symlink(si) => walk.symlinks[*si].name.len(), + }; + entries.push(DirEntryOnDisk { + nid: 0, + file_type: 0, + name: vec![0; name_len], + }); + } + + // Simulate block splitting to get total size + let mut total = 0u64; + let mut remaining = &entries[..]; + while !remaining.is_empty() { + let mut count = 0; + let mut block_size = 0usize; + for entry in remaining { + let entry_size = 12 + entry.name.len(); + if block_size + entry_size > BLOCK_SIZE as usize && count > 0 { + break; + } + block_size += entry_size; + count += 1; + } + remaining = &remaining[count..]; + if remaining.is_empty() { + total += block_size as u64; // last block: actual size + } else { + total += BLOCK_SIZE; // full block + } + } + total +} + +fn align_up(val: u64, align: u64) -> u64 { + (val + align - 1) & !(align - 1) +} + +pub fn build_erofs_regions(layout: &ErofsLayout, walk: &WalkResult) -> Vec { + let files = &walk.files; + let mut regions = Vec::new(); + + // Metadata region (superblock + inode table + dir blocks) + regions.push(Region { + start: 0, + len: layout.metadata.len() as u64, + region_type: RegionType::Data(Arc::new(layout.metadata.clone())), + }); + + // File and symlink data regions + for fr in &layout.file_regions { + // Padding gap + let current_end = regions.last().map(|r| r.start + r.len).unwrap_or(0); + if fr.offset_in_erofs > current_end { + regions.push(Region { + start: current_end, + len: fr.offset_in_erofs - current_end, + region_type: RegionType::Zero, + }); + } + + if fr.file_index < files.len() { + // Regular file: read from host + regions.push(Region { + start: fr.offset_in_erofs, + len: fr.size, + region_type: RegionType::File { + path: files[fr.file_index].host_path.clone(), + }, + }); + } else { + // Symlink target: inline data + let sym_idx = fr.file_index - files.len(); + if sym_idx < walk.symlinks.len() { + // Pad symlink target to fill the block + let mut data = walk.symlinks[sym_idx].target.clone(); + data.resize(fr.size as usize, 0); + regions.push(Region { + start: fr.offset_in_erofs, + len: fr.size, + region_type: RegionType::Data(Arc::new(data)), + }); + } + } + + // Padding to block boundary + let end = fr.offset_in_erofs + fr.size; + let aligned_end = align_up(end, BLOCK_SIZE); + if aligned_end > end { + regions.push(Region { + start: end, + len: aligned_end - end, + region_type: RegionType::Zero, + }); + } + } + + // Ensure total size + let last_end = regions.last().map(|r| r.start + r.len).unwrap_or(0); + if last_end < layout.total_size { + regions.push(Region { + start: last_end, + len: layout.total_size - last_end, + region_type: RegionType::Zero, + }); + } + + regions +} diff --git a/crates/nbdkit-erofs-plugin/src/fat32.rs b/crates/nbdkit-erofs-plugin/src/fat32.rs new file mode 100644 index 000000000..ecc6992d5 --- /dev/null +++ b/crates/nbdkit-erofs-plugin/src/fat32.rs @@ -0,0 +1,548 @@ +//! FAT32 ESP generation using the regions pattern. +//! +//! Generates a virtual FAT32 filesystem with boot files for EFI boot. +//! Metadata (BPB, FAT tables, directory entries) are in-memory Data regions. +//! File data uses File regions for lazy pread from source files. + +use crate::regions::{Region, RegionType}; +use std::path::PathBuf; +use std::sync::Arc; + +const SECTOR_SIZE: u64 = 512; +const CLUSTER_SIZE: u64 = 512; +const SECTORS_PER_CLUSTER: u64 = 1; +const RESERVED_SECTORS: u64 = 32; +const NUM_FATS: u64 = 2; +const DIR_ENTRY_SIZE: u64 = 32; + +const FAT32_EOC: u32 = 0x0FFF_FFFF; +const FAT32_MEDIA: u32 = 0x0FFF_FFF8; + +// Fixed cluster assignments for the ESP directory structure. +// Root directory is always cluster 2 per FAT32 spec. +const CLUSTER_ROOT: u32 = 2; +const CLUSTER_EFI: u32 = 3; +const CLUSTER_EFI_BOOT: u32 = 4; +const CLUSTER_BOOT: u32 = 5; + +struct FatFile { + name_8_3: [u8; 11], + size: u64, + regions: Vec, +} + +pub enum FileDataRegion { + FromFile { path: PathBuf, len: u64 }, + FromData(Vec), + Zero(u64), +} + +struct FatDir { + name_8_3: [u8; 11], + cluster: u32, + entries: Vec, +} + +enum FatDirChild { + Dir(usize), + File(usize), +} + +fn clusters_for(size: u64) -> u64 { + if size == 0 { + 1 + } else { + (size + CLUSTER_SIZE - 1) / CLUSTER_SIZE + } +} + +fn make_8_3(name: &str, ext: &str) -> [u8; 11] { + let mut r = [b' '; 11]; + for (i, b) in name.bytes().take(8).enumerate() { + r[i] = b; + } + for (i, b) in ext.bytes().take(3).enumerate() { + r[8 + i] = b; + } + r +} + +pub fn build_esp_regions( + grub_path: &std::path::Path, + grub_size: u64, + grub_cfg: &[u8], + kernel_path: &std::path::Path, + kernel_size: u64, + initrd_parts: Vec<(FileDataRegion, u64)>, + initrd_total_size: u64, +) -> (Vec, u64) { + // Files + let mut files: Vec = Vec::new(); + + // BOOTAA64.EFI + files.push(FatFile { + name_8_3: make_8_3("BOOTAA64", "EFI"), + size: grub_size, + regions: vec![FileDataRegion::FromFile { + path: grub_path.to_path_buf(), + len: grub_size, + }], + }); + + // GRUB.CFG + files.push(FatFile { + name_8_3: make_8_3("GRUB", "CFG"), + size: grub_cfg.len() as u64, + regions: vec![FileDataRegion::FromData(grub_cfg.to_vec())], + }); + + // VMLINUZ + files.push(FatFile { + name_8_3: make_8_3("VMLINUZ", ""), + size: kernel_size, + regions: vec![FileDataRegion::FromFile { + path: kernel_path.to_path_buf(), + len: kernel_size, + }], + }); + + // INITRD.IMG + files.push(FatFile { + name_8_3: make_8_3("INITRD", "IMG"), + size: initrd_total_size, + regions: initrd_parts.into_iter().map(|(r, _)| r).collect(), + }); + + // Directory structure: + // / (root, cluster 2) → EFI/, boot/ + // /EFI (cluster 3) → BOOT/ + // /EFI/BOOT (cluster 4) → BOOTAA64.EFI, GRUB.CFG + // /boot (cluster 5) → VMLINUZ, INITRD.IMG + // Note: /EFI/BOOT and /boot both use 8.3 name "BOOT" but are in different + // parent directories so there is no conflict in the FAT32 namespace. + let dirs = vec![ + FatDir { + name_8_3: make_8_3("", ""), + cluster: CLUSTER_ROOT, + entries: vec![FatDirChild::Dir(1), FatDirChild::Dir(3)], + }, + FatDir { + name_8_3: make_8_3("EFI", ""), + cluster: CLUSTER_EFI, + entries: vec![FatDirChild::Dir(2)], + }, + FatDir { + name_8_3: make_8_3("BOOT", ""), + cluster: CLUSTER_EFI_BOOT, + entries: vec![FatDirChild::File(0), FatDirChild::File(1)], + }, + FatDir { + name_8_3: make_8_3("BOOT", ""), + cluster: CLUSTER_BOOT, + entries: vec![FatDirChild::File(2), FatDirChild::File(3)], + }, + ]; + + let dir_clusters = dirs.len() as u32; + + // Assign file clusters (starting after directory clusters) + let mut file_start_clusters: Vec = Vec::new(); + let mut next_cluster = 2 + dir_clusters; + for f in &files { + file_start_clusters.push(next_cluster); + next_cluster += clusters_for(f.size) as u32; + } + let total_clusters = next_cluster; + let data_clusters = total_clusters - 2; + + // FAT table + let fat_entries = total_clusters as usize; + let fat_bytes = ((fat_entries * 4 + SECTOR_SIZE as usize - 1) / SECTOR_SIZE as usize) + * SECTOR_SIZE as usize; + let fat_sectors = fat_bytes as u64 / SECTOR_SIZE; + + let mut fat = vec![0u8; fat_bytes]; + // Entry 0: media descriptor + fat[0..4].copy_from_slice(&FAT32_MEDIA.to_le_bytes()); + // Entry 1: EOC + fat[4..8].copy_from_slice(&FAT32_EOC.to_le_bytes()); + + // Directory clusters (each is single-cluster, EOC) + for d in &dirs { + let off = d.cluster as usize * 4; + fat[off..off + 4].copy_from_slice(&FAT32_EOC.to_le_bytes()); + } + + // File cluster chains + for (fi, f) in files.iter().enumerate() { + let start = file_start_clusters[fi]; + let num = clusters_for(f.size) as u32; + for c in 0..num { + let cluster = start + c; + let off = cluster as usize * 4; + if c == num - 1 { + fat[off..off + 4].copy_from_slice(&FAT32_EOC.to_le_bytes()); + } else { + fat[off..off + 4].copy_from_slice(&(cluster + 1).to_le_bytes()); + } + } + } + + // Data region start (in sectors) + let data_start_sector = RESERVED_SECTORS + NUM_FATS * fat_sectors; + + // Build directory entry blocks + let mut dir_blocks: Vec> = Vec::new(); + for (di, d) in dirs.iter().enumerate() { + let mut block = vec![0u8; CLUSTER_SIZE as usize]; + let mut pos = 0usize; + + // "." and ".." entries for subdirectories + if di > 0 { + write_dir_entry(&mut block, pos, b". ", 0x10, d.cluster, 0); + pos += DIR_ENTRY_SIZE as usize; + // Parent cluster: dirs at index 1 (EFI) and 3 (boot) are children of root (0). + // Dir at index 2 (EFI/BOOT) is a child of EFI (dirs[1]). + debug_assert!(dirs.len() == 4, "directory structure changed"); + let parent_cluster = if di == 1 || di == 3 { + 0u32 + } else { + dirs[1].cluster + }; + write_dir_entry(&mut block, pos, b".. ", 0x10, parent_cluster, 0); + pos += DIR_ENTRY_SIZE as usize; + } + + for child in &d.entries { + match child { + FatDirChild::Dir(idx) => { + let cd = &dirs[*idx]; + write_dir_entry(&mut block, pos, &cd.name_8_3, 0x10, cd.cluster, 0); + } + FatDirChild::File(idx) => { + let cf = &files[*idx]; + write_dir_entry( + &mut block, + pos, + &cf.name_8_3, + 0x20, + file_start_clusters[*idx], + cf.size, + ); + } + } + pos += DIR_ENTRY_SIZE as usize; + } + dir_blocks.push(block); + } + + // Total size of ESP partition + let total_sectors = data_start_sector + data_clusters as u64 * SECTORS_PER_CLUSTER; + let total_size = total_sectors * SECTOR_SIZE; + + // BPB (Boot Parameter Block) + let bpb = build_bpb( + total_sectors as u32, + fat_sectors as u32, + data_clusters as u64, + ); + + // FSInfo + let fsinfo = build_fsinfo( + (data_clusters as u32).saturating_sub( + dir_clusters as u32 + + files + .iter() + .map(|f| clusters_for(f.size) as u32) + .sum::(), + ), + next_cluster, + ); + + // Assemble regions + let mut regions: Vec = Vec::new(); + let mut offset = 0u64; + + // Sector 0: BPB + regions.push(Region { + start: offset, + len: SECTOR_SIZE, + region_type: RegionType::Data(Arc::new(bpb.clone())), + }); + offset += SECTOR_SIZE; + + // Sector 1: FSInfo + regions.push(Region { + start: offset, + len: SECTOR_SIZE, + region_type: RegionType::Data(Arc::new(fsinfo.clone())), + }); + offset += SECTOR_SIZE; + + // Sectors 2-5: zero padding + let pad_to_backup = 4 * SECTOR_SIZE; + regions.push(Region { + start: offset, + len: pad_to_backup, + region_type: RegionType::Zero, + }); + offset += pad_to_backup; + + // Sector 6: Backup BPB + regions.push(Region { + start: offset, + len: SECTOR_SIZE, + region_type: RegionType::Data(Arc::new(bpb)), + }); + offset += SECTOR_SIZE; + + // Sector 7: Backup FSInfo + regions.push(Region { + start: offset, + len: SECTOR_SIZE, + region_type: RegionType::Data(Arc::new(fsinfo)), + }); + offset += SECTOR_SIZE; + + // Sectors 8-31: zero padding to reserved end + let remaining_reserved = (RESERVED_SECTORS * SECTOR_SIZE) - offset; + if remaining_reserved > 0 { + regions.push(Region { + start: offset, + len: remaining_reserved, + region_type: RegionType::Zero, + }); + offset += remaining_reserved; + } + + // FAT1 + let fat_data = Arc::new(fat.clone()); + regions.push(Region { + start: offset, + len: fat_bytes as u64, + region_type: RegionType::Data(fat_data.clone()), + }); + offset += fat_bytes as u64; + + // FAT2 (copy) + regions.push(Region { + start: offset, + len: fat_bytes as u64, + region_type: RegionType::Data(fat_data), + }); + offset += fat_bytes as u64; + + // Data area: directory clusters + for block in &dir_blocks { + regions.push(Region { + start: offset, + len: CLUSTER_SIZE, + region_type: RegionType::Data(Arc::new(block.clone())), + }); + offset += CLUSTER_SIZE; + } + + // Data area: file clusters + for (_fi, f) in files.iter().enumerate() { + let mut file_offset = 0u64; + + for part in &f.regions { + match part { + FileDataRegion::FromFile { path, len } => { + regions.push(Region { + start: offset, + len: *len, + region_type: RegionType::File { path: path.clone() }, + }); + offset += len; + file_offset += len; + } + FileDataRegion::FromData(data) => { + let len = data.len() as u64; + regions.push(Region { + start: offset, + len, + region_type: RegionType::Data(Arc::new(data.clone())), + }); + offset += len; + file_offset += len; + } + FileDataRegion::Zero(len) => { + if *len > 0 { + regions.push(Region { + start: offset, + len: *len, + region_type: RegionType::Zero, + }); + offset += len; + file_offset += len; + } + } + } + } + + // Pad to cluster boundary + let used_in_last = file_offset % CLUSTER_SIZE; + if used_in_last > 0 { + let pad = CLUSTER_SIZE - used_in_last; + regions.push(Region { + start: offset, + len: pad, + region_type: RegionType::Zero, + }); + offset += pad; + } + } + + // Ensure total_size is correct + debug_assert!( + offset <= total_size, + "regions exceeded total_size: {} > {}", + offset, + total_size + ); + if offset < total_size { + regions.push(Region { + start: offset, + len: total_size - offset, + region_type: RegionType::Zero, + }); + } + + (regions, total_size) +} + +/// Build initrd regions: original file + 4-byte alignment + CPIO data. +pub fn build_initrd_regions( + initrd_path: &std::path::Path, + initrd_size: u64, + units_cpio: &[u8], + ssh_cpio: Option<&[u8]>, +) -> (Vec<(FileDataRegion, u64)>, u64) { + let mut parts = Vec::new(); + let mut total = 0u64; + + // Original initramfs + parts.push(( + FileDataRegion::FromFile { + path: initrd_path.to_path_buf(), + len: initrd_size, + }, + initrd_size, + )); + total += initrd_size; + + // 4-byte alignment padding + let pad = ((4 - (initrd_size % 4)) % 4) as u64; + if pad > 0 { + parts.push((FileDataRegion::Zero(pad), pad)); + total += pad; + } + + // Units CPIO + let len = units_cpio.len() as u64; + parts.push((FileDataRegion::FromData(units_cpio.to_vec()), len)); + total += len; + + // SSH CPIO (if provided) + if let Some(ssh) = ssh_cpio { + let pad2 = ((4 - (total % 4)) % 4) as u64; + if pad2 > 0 { + parts.push((FileDataRegion::Zero(pad2), pad2)); + total += pad2; + } + let len = ssh.len() as u64; + parts.push((FileDataRegion::FromData(ssh.to_vec()), len)); + total += len; + } + + (parts, total) +} + +fn write_dir_entry(buf: &mut [u8], pos: usize, name: &[u8; 11], attr: u8, cluster: u32, size: u64) { + buf[pos..pos + 11].copy_from_slice(name); + buf[pos + 11] = attr; + // cluster high + buf[pos + 20..pos + 22].copy_from_slice(&((cluster >> 16) as u16).to_le_bytes()); + // cluster low + buf[pos + 26..pos + 28].copy_from_slice(&(cluster as u16).to_le_bytes()); + // file size (32-bit) + buf[pos + 28..pos + 32].copy_from_slice(&(size as u32).to_le_bytes()); +} + +fn build_bpb(total_sectors: u32, fat_sectors: u32, _data_clusters: u64) -> Vec { + let mut bpb = vec![0u8; SECTOR_SIZE as usize]; + // Jump instruction + bpb[0] = 0xEB; + bpb[1] = 0x58; + bpb[2] = 0x90; + // OEM name + bpb[3..11].copy_from_slice(b"MSWIN4.1"); + // Bytes per sector + bpb[11..13].copy_from_slice(&(SECTOR_SIZE as u16).to_le_bytes()); + // Sectors per cluster + bpb[13] = SECTORS_PER_CLUSTER as u8; + // Reserved sectors + bpb[14..16].copy_from_slice(&(RESERVED_SECTORS as u16).to_le_bytes()); + // Number of FATs + bpb[16] = NUM_FATS as u8; + // Root entry count (0 for FAT32) + bpb[17..19].copy_from_slice(&0u16.to_le_bytes()); + // Total sectors 16 (0 for FAT32) + bpb[19..21].copy_from_slice(&0u16.to_le_bytes()); + // Media type + bpb[21] = 0xF8; + // Sectors per FAT 16 (0 for FAT32) + bpb[22..24].copy_from_slice(&0u16.to_le_bytes()); + // Sectors per track + bpb[24..26].copy_from_slice(&32u16.to_le_bytes()); + // Number of heads + bpb[26..28].copy_from_slice(&64u16.to_le_bytes()); + // Hidden sectors + bpb[28..32].copy_from_slice(&0u32.to_le_bytes()); + // Total sectors 32 + bpb[32..36].copy_from_slice(&total_sectors.to_le_bytes()); + // --- FAT32 specific --- + // Sectors per FAT + bpb[36..40].copy_from_slice(&fat_sectors.to_le_bytes()); + // Extended flags + bpb[40..42].copy_from_slice(&0u16.to_le_bytes()); + // FS version + bpb[42..44].copy_from_slice(&0u16.to_le_bytes()); + // Root cluster + bpb[44..48].copy_from_slice(&2u32.to_le_bytes()); + // FSInfo sector + bpb[48..50].copy_from_slice(&1u16.to_le_bytes()); + // Backup boot sector + bpb[50..52].copy_from_slice(&6u16.to_le_bytes()); + // Reserved (12 bytes, already zero) + // Drive number + bpb[64] = 0x80; + // Boot signature + bpb[66] = 0x29; + // Volume serial number + bpb[67..71].copy_from_slice(&0x42424242u32.to_le_bytes()); + // Volume label + bpb[71..82].copy_from_slice(b"BCVK-ESP "); + // Filesystem type + bpb[82..90].copy_from_slice(b"FAT32 "); + // Boot signature + bpb[510] = 0x55; + bpb[511] = 0xAA; + bpb +} + +fn build_fsinfo(free_clusters: u32, next_free: u32) -> Vec { + let mut fs = vec![0u8; SECTOR_SIZE as usize]; + // Signature1 + fs[0..4].copy_from_slice(&0x41615252u32.to_le_bytes()); + // Signature2 + fs[484..488].copy_from_slice(&0x61417272u32.to_le_bytes()); + // Free cluster count + fs[488..492].copy_from_slice(&free_clusters.to_le_bytes()); + // Next free cluster + fs[492..496].copy_from_slice(&next_free.to_le_bytes()); + // Signature3 + fs[508..512].copy_from_slice(&0xAA550000u32.to_le_bytes()); + fs +} diff --git a/crates/nbdkit-erofs-plugin/src/gpt.rs b/crates/nbdkit-erofs-plugin/src/gpt.rs new file mode 100644 index 000000000..88e8bcf44 --- /dev/null +++ b/crates/nbdkit-erofs-plugin/src/gpt.rs @@ -0,0 +1,290 @@ +use crate::regions::{Region, RegionType}; +use std::sync::Arc; + +const SECTOR_SIZE: u64 = 512; +const GPT_HEADER_SIZE: u64 = 92; +const GPT_ENTRY_SIZE: u64 = 128; +const GPT_ENTRIES: u64 = 128; + +// EFI System Partition type GUID +const ESP_TYPE_GUID: [u8; 16] = [ + 0x28, 0x73, 0x2A, 0xC1, 0x1F, 0xF8, 0xD2, 0x11, 0xBA, 0x4B, 0x00, 0xA0, 0xC9, 0x3E, 0xC9, 0x3B, +]; + +// Linux filesystem type GUID +const LINUX_TYPE_GUID: [u8; 16] = [ + 0xAF, 0x3D, 0xC6, 0x0F, 0x83, 0x84, 0x72, 0x47, 0x8E, 0x79, 0x3D, 0x69, 0xD8, 0x47, 0x7D, 0xE4, +]; + +pub struct DiskLayout { + pub regions: Vec, + pub total_size: u64, +} + +pub fn build_gpt_disk( + esp_regions: Vec, + esp_size: u64, + erofs_regions: Vec, + erofs_size: u64, +) -> std::io::Result { + // GPT layout: + // LBA 0: Protective MBR + // LBA 1: GPT Header + // LBA 2-33: Partition Table (128 entries * 128 bytes = 16384 bytes = 32 sectors) + // LBA 34+: ESP partition (aligned to 2048 sectors / 1MB) + // After ESP: EROFS partition + // End: Backup GPT + + let partition_table_sectors = (GPT_ENTRIES * GPT_ENTRY_SIZE + SECTOR_SIZE - 1) / SECTOR_SIZE; + let first_usable_lba = 34u64; // standard + let esp_start_lba = 2048u64; // 1MB aligned + let esp_sectors = (esp_size + SECTOR_SIZE - 1) / SECTOR_SIZE; + let erofs_start_lba = esp_start_lba + esp_sectors; + // Align to 2048 sectors + let erofs_start_lba = (erofs_start_lba + 2047) & !2047; + let erofs_sectors = (erofs_size + SECTOR_SIZE - 1) / SECTOR_SIZE; + let last_usable_lba = erofs_start_lba + erofs_sectors - 1; + let backup_table_lba = last_usable_lba + 1; + let backup_header_lba = backup_table_lba + partition_table_sectors; + let total_sectors = backup_header_lba + 1; + let total_size = total_sectors * SECTOR_SIZE; + + // Build partition table entries + let mut partition_table = vec![0u8; (GPT_ENTRIES * GPT_ENTRY_SIZE) as usize]; + + // Entry 0: ESP + write_gpt_entry( + &mut partition_table, + 0, + &ESP_TYPE_GUID, + esp_start_lba, + esp_start_lba + esp_sectors - 1, + b"EFI System", + ); + + // Entry 1: EROFS rootfs + write_gpt_entry( + &mut partition_table, + 1, + &LINUX_TYPE_GUID, + erofs_start_lba, + erofs_start_lba + erofs_sectors - 1, + b"root", + ); + + let partition_table_crc = crc32fast::hash(&partition_table); + + // Build GPT header + let mut gpt_header = vec![0u8; SECTOR_SIZE as usize]; + write_gpt_header( + &mut gpt_header, + 1, // my LBA + backup_header_lba, + first_usable_lba, + last_usable_lba, + 2, // partition table LBA + 2, // num entries used + partition_table_crc, + ); + + // Build backup GPT header + let mut backup_header = vec![0u8; SECTOR_SIZE as usize]; + write_gpt_header( + &mut backup_header, + backup_header_lba, + 1, // alternate LBA + first_usable_lba, + last_usable_lba, + backup_table_lba, + 2, + partition_table_crc, + ); + + // Build protective MBR + let mut mbr = vec![0u8; SECTOR_SIZE as usize]; + write_protective_mbr(&mut mbr, total_sectors); + + // Assemble regions + let mut regions = Vec::new(); + + // MBR + regions.push(Region { + start: 0, + len: SECTOR_SIZE, + region_type: RegionType::Data(Arc::new(mbr)), + }); + + // GPT Header + regions.push(Region { + start: SECTOR_SIZE, + len: SECTOR_SIZE, + region_type: RegionType::Data(Arc::new(gpt_header)), + }); + + // Partition Table + regions.push(Region { + start: 2 * SECTOR_SIZE, + len: partition_table.len() as u64, + region_type: RegionType::Data(Arc::new(partition_table.clone())), + }); + + // Padding to ESP start + let pad_start = 2 * SECTOR_SIZE + partition_table.len() as u64; + let esp_byte_offset = esp_start_lba * SECTOR_SIZE; + if esp_byte_offset > pad_start { + regions.push(Region { + start: pad_start, + len: esp_byte_offset - pad_start, + region_type: RegionType::Zero, + }); + } + + // ESP partition (from provided regions, offset-adjusted) + for mut r in esp_regions { + r.start += esp_byte_offset; + regions.push(r); + } + + // Padding between ESP and EROFS + let esp_end = esp_byte_offset + esp_size; + let erofs_byte_offset = erofs_start_lba * SECTOR_SIZE; + if erofs_byte_offset > esp_end { + regions.push(Region { + start: esp_end, + len: erofs_byte_offset - esp_end, + region_type: RegionType::Zero, + }); + } + + // EROFS partition (offset all regions) + for mut r in erofs_regions { + r.start += erofs_byte_offset; + regions.push(r); + } + + // Padding to backup GPT + let erofs_end = erofs_byte_offset + erofs_size; + let backup_table_offset = backup_table_lba * SECTOR_SIZE; + if backup_table_offset > erofs_end { + regions.push(Region { + start: erofs_end, + len: backup_table_offset - erofs_end, + region_type: RegionType::Zero, + }); + } + + // Backup partition table + regions.push(Region { + start: backup_table_offset, + len: partition_table.len() as u64, + region_type: RegionType::Data(Arc::new(partition_table)), + }); + + // Backup GPT header + regions.push(Region { + start: backup_header_lba * SECTOR_SIZE, + len: SECTOR_SIZE, + region_type: RegionType::Data(Arc::new(backup_header)), + }); + + Ok(DiskLayout { + regions, + total_size, + }) +} + +fn write_gpt_entry( + table: &mut [u8], + index: usize, + type_guid: &[u8; 16], + first_lba: u64, + last_lba: u64, + name: &[u8], +) { + let off = index * GPT_ENTRY_SIZE as usize; + // Partition type GUID + table[off..off + 16].copy_from_slice(type_guid); + // Unique partition GUID (generate simple one from index) + let mut unique = [0u8; 16]; + unique[0] = index as u8 + 1; + unique[15] = 0x42; + table[off + 16..off + 32].copy_from_slice(&unique); + // First LBA + table[off + 32..off + 40].copy_from_slice(&first_lba.to_le_bytes()); + // Last LBA + table[off + 40..off + 48].copy_from_slice(&last_lba.to_le_bytes()); + // Attributes + table[off + 48..off + 56].copy_from_slice(&0u64.to_le_bytes()); + // Name (UTF-16LE) + for (i, &b) in name.iter().enumerate().take(36) { + table[off + 56 + i * 2] = b; + table[off + 56 + i * 2 + 1] = 0; + } +} + +fn write_gpt_header( + buf: &mut [u8], + my_lba: u64, + alternate_lba: u64, + first_usable: u64, + last_usable: u64, + partition_table_lba: u64, + _num_entries: u32, + partition_crc: u32, +) { + // Signature "EFI PART" + buf[0..8].copy_from_slice(b"EFI PART"); + // Revision 1.0 + buf[8..12].copy_from_slice(&0x00010000u32.to_le_bytes()); + // Header size + buf[12..16].copy_from_slice(&(GPT_HEADER_SIZE as u32).to_le_bytes()); + // Header CRC32 (computed after all fields set) + // My LBA + buf[24..32].copy_from_slice(&my_lba.to_le_bytes()); + // Alternate LBA + buf[32..40].copy_from_slice(&alternate_lba.to_le_bytes()); + // First usable LBA + buf[40..48].copy_from_slice(&first_usable.to_le_bytes()); + // Last usable LBA + buf[48..56].copy_from_slice(&last_usable.to_le_bytes()); + // Fixed disk GUID for reproducible builds (not security-sensitive) + const DISK_GUID: [u8; 16] = [ + 0xAA, 0xBB, 0xCC, 0xDD, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xAA, 0xBB, + 0xCC, + ]; + let disk_guid = DISK_GUID; + buf[56..72].copy_from_slice(&disk_guid); + // Partition entry start LBA + buf[72..80].copy_from_slice(&partition_table_lba.to_le_bytes()); + // Number of partition entries + buf[80..84].copy_from_slice(&(GPT_ENTRIES as u32).to_le_bytes()); + // Size of partition entry + buf[84..88].copy_from_slice(&(GPT_ENTRY_SIZE as u32).to_le_bytes()); + // Partition table CRC32 + buf[88..92].copy_from_slice(&partition_crc.to_le_bytes()); + + // Compute header CRC32 + buf[16..20].copy_from_slice(&0u32.to_le_bytes()); // zero CRC field first + let crc = crc32fast::hash(&buf[0..GPT_HEADER_SIZE as usize]); + buf[16..20].copy_from_slice(&crc.to_le_bytes()); +} + +fn write_protective_mbr(buf: &mut [u8], total_sectors: u64) { + // Partition entry at offset 446 + buf[446] = 0x00; // not bootable + buf[447] = 0x00; // CHS start + buf[448] = 0x02; + buf[449] = 0x00; + buf[450] = 0xEE; // type: GPT protective + buf[451] = 0xFF; // CHS end + buf[452] = 0xFF; + buf[453] = 0xFF; + // LBA start + buf[454..458].copy_from_slice(&1u32.to_le_bytes()); + // LBA size + let size = std::cmp::min(total_sectors - 1, 0xFFFFFFFF) as u32; + buf[458..462].copy_from_slice(&size.to_le_bytes()); + // Boot signature + buf[510] = 0x55; + buf[511] = 0xAA; +} diff --git a/crates/nbdkit-erofs-plugin/src/initramfs.rs b/crates/nbdkit-erofs-plugin/src/initramfs.rs new file mode 100644 index 000000000..87d0d7732 --- /dev/null +++ b/crates/nbdkit-erofs-plugin/src/initramfs.rs @@ -0,0 +1,182 @@ +//! CPIO newc archive generation for initramfs append. + +use std::io::Write; + +use cpio::newc::Builder as NewcBuilder; +use cpio::newc::ModeFileType; + +fn write_dir(out: &mut Vec, path: &str) { + NewcBuilder::new(path) + .mode(0o755) + .set_mode_file_type(ModeFileType::Directory) + .write(out, 0) + .finish() + .unwrap(); +} + +fn write_file(out: &mut Vec, path: &str, data: &[u8]) { + let mut w = NewcBuilder::new(path) + .mode(0o644) + .set_mode_file_type(ModeFileType::Regular) + .write(out, data.len() as u32); + w.write_all(data).unwrap(); + w.finish().unwrap(); +} + +fn write_file_exec(out: &mut Vec, path: &str, data: &[u8]) { + let mut w = NewcBuilder::new(path) + .mode(0o755) + .set_mode_file_type(ModeFileType::Regular) + .write(out, data.len() as u32); + w.write_all(data).unwrap(); + w.finish().unwrap(); +} + +pub fn build_units_cpio() -> Vec { + let mut out = Vec::with_capacity(32768); + + write_dir(&mut out, "usr"); + write_dir(&mut out, "usr/lib"); + write_dir(&mut out, "usr/lib/systemd"); + write_dir(&mut out, "usr/lib/systemd/system"); + write_dir(&mut out, "usr/lib/systemd/system/initrd-fs.target.d"); + + write_file( + &mut out, + "usr/lib/systemd/system/bcvk-var-ephemeral.service", + b"[Unit]\n\ + Description=Setup ephemeral /var from image content\n\ + DefaultDependencies=no\n\ + ConditionPathExists=/etc/initrd-release\n\ + Before=initrd-fs.target\n\ + After=sysroot.mount initrd-parse-etc.service\n\ + Requires=sysroot.mount\n\ + \n\ + [Service]\n\ + Type=oneshot\n\ + RemainAfterExit=yes\n\ + TimeoutStartSec=60\n\ + ExecStart=/usr/bin/mkdir -p /run/var-ephemeral\n\ + ExecStart=/usr/bin/cp -a /sysroot/var/. /run/var-ephemeral/\n\ + ExecStart=/usr/bin/mount --bind /run/var-ephemeral /sysroot/var\n", + ); + + write_file( + &mut out, + "usr/lib/systemd/system/bcvk-etc-overlay.service", + b"[Unit]\n\ + Description=Setup ephemeral /etc overlay\n\ + DefaultDependencies=no\n\ + ConditionPathExists=/etc/initrd-release\n\ + Before=initrd-fs.target\n\ + After=sysroot.mount initrd-parse-etc.service\n\ + Requires=sysroot.mount\n\ + \n\ + [Service]\n\ + Type=oneshot\n\ + RemainAfterExit=yes\n\ + TimeoutStartSec=30\n\ + ExecStart=/usr/bin/mkdir -p /run/etc-lower /run/etc-upper /run/etc-work\n\ + ExecStart=/usr/bin/mount --bind /sysroot/etc /run/etc-lower\n\ + ExecStart=/usr/bin/mount -t overlay overlay -o lowerdir=/run/etc-lower,upperdir=/run/etc-upper,workdir=/run/etc-work,index=off,metacopy=off /sysroot/etc\n", + ); + + write_file( + &mut out, + "usr/lib/systemd/system/bcvk-copy-units.service", + b"[Unit]\n\ + Description=Copy bcvk units for post-switch-root on systemd <256\n\ + DefaultDependencies=no\n\ + ConditionPathExists=/etc/initrd-release\n\ + Before=initrd-fs.target\n\ + \n\ + [Service]\n\ + Type=oneshot\n\ + RemainAfterExit=yes\n\ + ExecStart=/bin/sh -c 'mkdir -p /run/systemd/system/sysinit.target.wants && cp /usr/lib/systemd/system/bcvk-journal-stream.service /run/systemd/system/ && ln -s ../bcvk-journal-stream.service /run/systemd/system/sysinit.target.wants/'\n", + ); + + write_file( + &mut out, + "usr/lib/systemd/system/bcvk-journal-stream.service", + b"[Unit]\n\ + Description=Stream journal to virtio-serial\n\ + DefaultDependencies=no\n\ + \n\ + [Service]\n\ + Type=simple\n\ + ExecStart=/bin/sh -c 'journalctl -f --no-hostname -o short-monotonic > /dev/hvc1 2>&1 || true'\n", + ); + + write_file( + &mut out, + "usr/lib/systemd/system/initrd-fs.target.d/bcvk-var-ephemeral.conf", + b"[Unit]\nWants=bcvk-var-ephemeral.service\n", + ); + write_file( + &mut out, + "usr/lib/systemd/system/initrd-fs.target.d/bcvk-etc-overlay.conf", + b"[Unit]\nWants=bcvk-etc-overlay.service\n", + ); + write_file( + &mut out, + "usr/lib/systemd/system/initrd-fs.target.d/bcvk-copy-units.conf", + b"[Unit]\nWants=bcvk-copy-units.service\n", + ); + + cpio::newc::trailer(out).unwrap() +} + +pub fn build_ssh_cpio(pubkey: &str) -> Vec { + let mut out = Vec::with_capacity(4096); + + write_dir(&mut out, "usr"); + write_dir(&mut out, "usr/lib"); + write_dir(&mut out, "usr/lib/bcvk"); + write_dir(&mut out, "usr/lib/systemd"); + write_dir(&mut out, "usr/lib/systemd/system"); + write_dir(&mut out, "usr/lib/systemd/system/initrd-fs.target.d"); + + let setup_script = format!( + "#!/bin/bash\n\ + mkdir -p /sysroot/var/roothome /sysroot/var/empty /sysroot/var/log /sysroot/var/tmp\n\ + chmod 700 /sysroot/var/roothome\n\ + chmod 711 /sysroot/var/empty\n\ + mkdir -p /sysroot/var/roothome/.ssh\n\ + chmod 700 /sysroot/var/roothome/.ssh\n\ + echo '{}' > /sysroot/var/roothome/.ssh/authorized_keys\n\ + chmod 600 /sysroot/var/roothome/.ssh/authorized_keys\n\ + chown -R 0:0 /sysroot/var/roothome/.ssh\n", + pubkey + ); + write_file_exec( + &mut out, + "usr/lib/bcvk/setup-ssh.sh", + setup_script.as_bytes(), + ); + + write_file( + &mut out, + "usr/lib/systemd/system/bcvk-ssh-setup.service", + b"[Unit]\n\ + Description=Setup SSH authorized_keys for root\n\ + DefaultDependencies=no\n\ + ConditionPathExists=/etc/initrd-release\n\ + Before=initrd-fs.target\n\ + After=bcvk-var-ephemeral.service\n\ + Requires=bcvk-var-ephemeral.service\n\ + \n\ + [Service]\n\ + Type=oneshot\n\ + RemainAfterExit=yes\n\ + ExecStart=/usr/bin/bash /usr/lib/bcvk/setup-ssh.sh\n", + ); + + write_file( + &mut out, + "usr/lib/systemd/system/initrd-fs.target.d/bcvk-ssh-setup.conf", + b"[Unit]\nWants=bcvk-ssh-setup.service\n", + ); + + cpio::newc::trailer(out).unwrap() +} diff --git a/crates/nbdkit-erofs-plugin/src/lib.rs b/crates/nbdkit-erofs-plugin/src/lib.rs new file mode 100644 index 000000000..b2cd4075c --- /dev/null +++ b/crates/nbdkit-erofs-plugin/src/lib.rs @@ -0,0 +1,389 @@ +mod dir_walk; +mod erofs; +mod fat32; +mod gpt; +mod initramfs; +mod regions; + +use std::ffi::{c_char, c_int, c_void, CStr, CString}; +use std::path::PathBuf; +use std::sync::Mutex; + +use regions::Region; + +static PLUGIN_STATE: Mutex> = Mutex::new(None); + +struct PluginState { + dir: PathBuf, + cmdline: Option, + ssh_pubkey: Option, + regions: Vec, + total_size: u64, +} + +// --- nbdkit C FFI --- + +extern "C" { + fn nbdkit_error(fmt: *const c_char, ...); +} + +fn log_error(msg: &str) { + let c = CString::new(msg).unwrap_or_default(); + unsafe { nbdkit_error(b"%s\0".as_ptr() as *const c_char, c.as_ptr()) }; +} + +// --- Plugin callbacks --- + +#[no_mangle] +pub extern "C" fn plugin_config(key: *const c_char, value: *const c_char) -> c_int { + let key = unsafe { CStr::from_ptr(key) }.to_str().unwrap_or(""); + let value = unsafe { CStr::from_ptr(value) }.to_str().unwrap_or(""); + + let mut state = PLUGIN_STATE.lock().unwrap(); + let state = state.get_or_insert_with(|| PluginState { + dir: PathBuf::new(), + cmdline: None, + ssh_pubkey: None, + regions: Vec::new(), + total_size: 0, + }); + + match key { + "dir" => state.dir = PathBuf::from(value), + "cmdline" => state.cmdline = Some(value.to_string()), + "ssh_pubkey" => state.ssh_pubkey = Some(value.to_string()), + _ => { + log_error(&format!("unknown parameter: {}", key)); + return -1; + } + } + 0 +} + +#[no_mangle] +pub extern "C" fn plugin_config_complete() -> c_int { + let state = PLUGIN_STATE.lock().unwrap(); + let state = match state.as_ref() { + Some(s) => s, + None => { + log_error("dir parameter is required"); + return -1; + } + }; + + if state.dir.as_os_str().is_empty() { + log_error("dir parameter is required"); + return -1; + } + + if state.cmdline.is_none() { + log_error("cmdline parameter is required"); + return -1; + } + + 0 +} + +fn find_kernel_dir(dir: &std::path::Path) -> Option<(PathBuf, PathBuf)> { + let modules = dir.join("usr/lib/modules"); + if let Ok(entries) = std::fs::read_dir(&modules) { + for entry in entries.flatten() { + let kdir = entry.path(); + let vmlinuz = kdir.join("vmlinuz"); + let initramfs = kdir.join("initramfs.img"); + if vmlinuz.exists() && initramfs.exists() { + return Some((vmlinuz, initramfs)); + } + } + } + None +} + +fn find_grub(dir: &std::path::Path) -> Option { + fn walk(path: &std::path::Path, target: &str) -> Option { + if let Ok(entries) = std::fs::read_dir(path) { + for entry in entries.flatten() { + let p = entry.path(); + if p.is_file() && p.file_name().map(|n| n == target).unwrap_or(false) { + return Some(p); + } + if p.is_dir() { + if let Some(found) = walk(&p, target) { + return Some(found); + } + } + } + } + None + } + walk(&dir.join("usr/lib"), "grubaa64.efi") +} + +#[no_mangle] +pub extern "C" fn plugin_get_ready() -> c_int { + let mut state_guard = PLUGIN_STATE.lock().unwrap(); + let state = match state_guard.as_mut() { + Some(s) => s, + None => return -1, + }; + + // Walk directory for EROFS + let walk = match dir_walk::walk_directory(&state.dir) { + Ok(w) => w, + Err(e) => { + log_error(&format!("failed to walk directory: {}", e)); + return -1; + } + }; + + let erofs_layout = match erofs::build_erofs(&walk) { + Ok(l) => l, + Err(e) => { + log_error(&format!("failed to build EROFS: {}", e)); + return -1; + } + }; + + let erofs_regions = erofs::build_erofs_regions(&erofs_layout, &walk); + + // Discover boot files from dir + let (kernel_path, initrd_path) = match find_kernel_dir(&state.dir) { + Some(paths) => paths, + None => { + log_error("kernel/initramfs not found in dir/usr/lib/modules/"); + return -1; + } + }; + + let grub_path = match find_grub(&state.dir) { + Some(p) => p, + None => { + log_error("grubaa64.efi not found in dir/usr/lib/"); + return -1; + } + }; + + fn file_size(path: &std::path::Path) -> Option { + match std::fs::metadata(path) { + Ok(m) => Some(m.len()), + Err(e) => { + log_error(&format!("cannot stat {:?}: {}", path, e)); + None + } + } + } + + let Some(kernel_size) = file_size(&kernel_path) else { + return -1; + }; + let Some(initrd_size) = file_size(&initrd_path) else { + return -1; + }; + let Some(grub_size) = file_size(&grub_path) else { + return -1; + }; + + let cmdline = state.cmdline.as_deref().unwrap_or(""); + + // Generate grub.cfg + let grub_cfg = format!( + "set timeout=0\nset default=0\nmenuentry \"bcvk\" {{\n linux /boot/vmlinuz {}\n initrd /boot/initrd.img\n}}\n", + cmdline + ); + + // Generate CPIO archives + let units_cpio = initramfs::build_units_cpio(); + let ssh_cpio = state.ssh_pubkey.as_deref().map(initramfs::build_ssh_cpio); + + // Build initrd regions (original file + padding + CPIO) + let (initrd_parts, initrd_total) = + fat32::build_initrd_regions(&initrd_path, initrd_size, &units_cpio, ssh_cpio.as_deref()); + + // Build ESP regions + let (esp_regions, esp_size) = fat32::build_esp_regions( + &grub_path, + grub_size, + grub_cfg.as_bytes(), + &kernel_path, + kernel_size, + initrd_parts, + initrd_total, + ); + + // Build GPT disk with ESP + EROFS + match gpt::build_gpt_disk( + esp_regions, + esp_size, + erofs_regions, + erofs_layout.total_size, + ) { + Ok(disk) => { + state.regions = disk.regions; + state.total_size = disk.total_size; + } + Err(e) => { + log_error(&format!("failed to build GPT disk: {}", e)); + return -1; + } + } + + 0 +} + +#[no_mangle] +pub extern "C" fn plugin_open(_readonly: c_int) -> *mut c_void { + 1 as *mut c_void +} + +#[no_mangle] +pub extern "C" fn plugin_close(_handle: *mut c_void) {} + +#[no_mangle] +pub extern "C" fn plugin_get_size(_handle: *mut c_void) -> i64 { + let state = PLUGIN_STATE.lock().unwrap(); + state.as_ref().map(|s| s.total_size as i64).unwrap_or(-1) +} + +#[no_mangle] +pub extern "C" fn plugin_can_multi_conn(_handle: *mut c_void) -> c_int { + 1 +} + +#[no_mangle] +pub extern "C" fn plugin_pread( + _handle: *mut c_void, + buf: *mut c_void, + count: u32, + offset: u64, + _flags: u32, +) -> c_int { + let state = PLUGIN_STATE.lock().unwrap(); + let state = match state.as_ref() { + Some(s) => s, + None => return -1, + }; + + let buf = unsafe { std::slice::from_raw_parts_mut(buf as *mut u8, count as usize) }; + + match regions::pread(&state.regions, buf, offset) { + Ok(()) => 0, + Err(e) => { + log_error(&format!("pread error at offset {}: {}", offset, e)); + -1 + } + } +} + +// --- Plugin registration --- + +#[repr(C)] +pub struct NbdkitPlugin { + _struct_size: u64, + _api_version: c_int, + _thread_model: c_int, + name: *const c_char, + longname: *const c_char, + version: *const c_char, + description: *const c_char, + load: Option, + unload: Option, + config: Option c_int>, + config_complete: Option c_int>, + config_help: *const c_char, + open: Option *mut c_void>, + close: Option, + get_size: Option i64>, + can_write: Option c_int>, + can_flush: Option c_int>, + is_rotational: Option c_int>, + can_trim: Option c_int>, + _pread_v1: Option c_int>, + _pwrite_v1: Option c_int>, + _flush_v1: Option c_int>, + _trim_v1: Option c_int>, + _zero_v1: Option c_int>, + errno_is_preserved: c_int, + dump_plugin: Option, + can_zero: Option c_int>, + can_fua: Option c_int>, + pread: Option c_int>, + pwrite: Option c_int>, + flush: Option c_int>, + trim: Option c_int>, + zero: Option c_int>, + magic_config_key: *const c_char, + can_multi_conn: Option c_int>, + can_extents: Option c_int>, + extents: Option c_int>, + can_cache: Option c_int>, + cache: Option c_int>, + thread_model: Option c_int>, + can_fast_zero: Option c_int>, + preconnect: Option c_int>, + get_ready: Option c_int>, + after_fork: Option c_int>, + // Fields after after_fork (list_exports, default_export, export_description, + // cleanup, block_size) are omitted. nbdkit uses _struct_size to determine + // which fields are present, so omitting trailing fields is safe. +} + +unsafe impl Sync for NbdkitPlugin {} + +static PLUGIN_NAME: &[u8] = b"erofs\0"; +static PLUGIN_LONGNAME: &[u8] = b"nbdkit EROFS plugin\0"; +static PLUGIN_VERSION: &[u8] = b"0.2.0\0"; +static PLUGIN_DESCRIPTION: &[u8] = b"Create virtual EROFS+ESP disk from directory\0"; +static PLUGIN_CONFIG_HELP: &[u8] = b"dir= (required) Container overlay merged directory\ncmdline= (required) Kernel command line for grub.cfg\nssh_pubkey= SSH public key for root access\0"; +static PLUGIN_MAGIC_KEY: &[u8] = b"dir\0"; + +static PLUGIN: NbdkitPlugin = NbdkitPlugin { + _struct_size: std::mem::size_of::() as u64, + _api_version: 2, + _thread_model: 0, + name: PLUGIN_NAME.as_ptr() as *const c_char, + longname: PLUGIN_LONGNAME.as_ptr() as *const c_char, + version: PLUGIN_VERSION.as_ptr() as *const c_char, + description: PLUGIN_DESCRIPTION.as_ptr() as *const c_char, + load: None, + unload: None, + config: Some(plugin_config), + config_complete: Some(plugin_config_complete), + config_help: PLUGIN_CONFIG_HELP.as_ptr() as *const c_char, + open: Some(plugin_open), + close: Some(plugin_close), + get_size: Some(plugin_get_size), + can_write: None, + can_flush: None, + is_rotational: None, + can_trim: None, + _pread_v1: None, + _pwrite_v1: None, + _flush_v1: None, + _trim_v1: None, + _zero_v1: None, + errno_is_preserved: 1, + dump_plugin: None, + can_zero: None, + can_fua: None, + pread: Some(plugin_pread), + pwrite: None, + flush: None, + trim: None, + zero: None, + magic_config_key: PLUGIN_MAGIC_KEY.as_ptr() as *const c_char, + can_multi_conn: Some(plugin_can_multi_conn), + can_extents: None, + extents: None, + can_cache: None, + cache: None, + thread_model: None, + can_fast_zero: None, + preconnect: None, + get_ready: Some(plugin_get_ready), + after_fork: None, +}; + +#[no_mangle] +pub extern "C" fn plugin_init() -> *const NbdkitPlugin { + &PLUGIN +} diff --git a/crates/nbdkit-erofs-plugin/src/regions.rs b/crates/nbdkit-erofs-plugin/src/regions.rs new file mode 100644 index 000000000..16268d623 --- /dev/null +++ b/crates/nbdkit-erofs-plugin/src/regions.rs @@ -0,0 +1,80 @@ +//! Region-based virtual block device composition. +//! Inspired by the regions pattern in nbdkit's floppy plugin (BSD-3-Clause). + +use std::path::PathBuf; +use std::sync::Arc; + +#[derive(Debug, Clone)] +pub enum RegionType { + Data(Arc>), + File { path: PathBuf }, + Zero, +} + +#[derive(Debug, Clone)] +pub struct Region { + pub start: u64, + pub len: u64, + pub region_type: RegionType, +} + +impl Region { + pub fn end(&self) -> u64 { + self.start + self.len + } +} + +pub fn find_region(regions: &[Region], offset: u64) -> Option<&Region> { + regions + .binary_search_by(|r| { + if offset < r.start { + std::cmp::Ordering::Greater + } else if offset >= r.end() { + std::cmp::Ordering::Less + } else { + std::cmp::Ordering::Equal + } + }) + .ok() + .map(|i| ®ions[i]) +} + +pub fn pread(regions: &[Region], buf: &mut [u8], offset: u64) -> std::io::Result<()> { + let mut remaining = buf.len(); + let mut buf_offset = 0; + let mut disk_offset = offset; + + while remaining > 0 { + let region = find_region(regions, disk_offset).ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("offset {} outside disk", disk_offset), + ) + })?; + + let region_offset = disk_offset - region.start; + let avail = (region.len - region_offset) as usize; + let len = remaining.min(avail); + + match ®ion.region_type { + RegionType::Data(data) => { + let start = region_offset as usize; + buf[buf_offset..buf_offset + len].copy_from_slice(&data[start..start + len]); + } + RegionType::File { path } => { + use std::os::unix::fs::FileExt; + let f = std::fs::File::open(path)?; + f.read_exact_at(&mut buf[buf_offset..buf_offset + len], region_offset)?; + } + RegionType::Zero => { + buf[buf_offset..buf_offset + len].fill(0); + } + } + + remaining -= len; + buf_offset += len; + disk_offset += len as u64; + } + + Ok(()) +}