Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 80 additions & 11 deletions Sources/ContainerizationEXT4/EXT4+Formatter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ extension EXT4 {
blockSize / groupDescriptorSize
}

private var blockCount: UInt32 {
// internally accessed by journal setup
var blockCount: UInt32 {
((size - 1) / blockSize) + 1
}

Expand All @@ -62,15 +63,17 @@ extension EXT4 {
///
/// - Parameters:
/// - devicePath: The path to the block device where the ext4 filesystem will be created.
/// - blockSize: The filesystem block size.
/// - minDiskSize: The minimum disk size required for the formatted filesystem.
/// - journal: The JBD2 journal size and mode, or nil for an unjournalled filesystem.
///
/// - Note: This ext4 formatter is designed for creating block devices out of container images and does not support all the
/// features and options available in the full ext4 filesystem implementation. It focuses
/// on the core functionality required for formatting a block device with ext4.
///
/// - Important: Ensure that the destination block device is accessible and has sufficient permissions
/// for formatting. The formatting process will erase all existing data on the device.
public init(_ devicePath: FilePath, minDiskSize: UInt64 = 256.kib()) throws {
public init(_ devicePath: FilePath, blockSize: UInt32 = 4096, minDiskSize: UInt64 = 256.kib(), journal: JournalConfig? = nil) throws {
/// The constructor performs the following steps:
///
/// 1. Creates the first 10 inodes:
Expand Down Expand Up @@ -122,6 +125,7 @@ extension EXT4 {
}
// step #2
self.tree = FileTree(EXT4.RootInode, "/")
self.journalConfig = journal
// skip past the superblock and block descriptor table
try self.seek(block: self.groupDescriptorBlocks + 1)
// lost+found directory is required for e2fsck to pass
Expand Down Expand Up @@ -604,6 +608,19 @@ extension EXT4 {
}
breadthWiseChildTree.append(contentsOf: child.pointee.children.map { (child, $0) })
}

// Generate UUID once; shared by filesystem superblock and JBD2 superblock.
let filesystemUUID = UUID().uuid

// Journal init MUST precede optimizeBlockGroupLayout() and commitInodeTable().
// Reason 1: optimizeBlockGroupLayout reads self.currentBlock — journal blocks
// must already be written to be counted in the layout calculation.
// Reason 2: commitInodeTable writes inode 8 to disk — setupJournalInode must
// have updated self.inodes[7] in memory first.
if let config = journalConfig {
try initializeJournal(config: config, filesystemUUID: filesystemUUID)
}

let blockGroupSize = optimizeBlockGroupLayout(blocks: self.currentBlock, inodes: UInt32(self.inodes.count))
let inodeTableOffset = try self.commitInodeTable(
blockGroups: blockGroupSize.blockGroups,
Expand Down Expand Up @@ -861,39 +878,85 @@ extension EXT4 {
superblock.firstInode = EXT4.FirstInode
superblock.lpfInode = EXT4.LostAndFoundInode
superblock.inodeSize = UInt16(EXT4.InodeSize)
superblock.featureCompat = CompatFeature.sparseSuper2 | CompatFeature.extAttr
superblock.featureIncompat =
IncompatFeature.filetype | IncompatFeature.extents | IncompatFeature.flexBg
superblock.featureRoCompat =
RoCompatFeature.largeFile | RoCompatFeature.hugeFile | RoCompatFeature.extraIsize
superblock.minExtraIsize = EXT4.ExtraIsize
superblock.wantExtraIsize = EXT4.ExtraIsize
superblock.logGroupsPerFlex = 31
superblock.uuid = UUID().uuid
superblock.uuid = filesystemUUID
var compatFeatures: UInt32 = CompatFeature.sparseSuper2 | CompatFeature.extAttr
if let config = journalConfig {
compatFeatures |= CompatFeature.hasJournal.rawValue
superblock.journalInum = EXT4.JournalInode
superblock.journalUUID = filesystemUUID
superblock.journalBlocks = journalInodeBlockBackup()
superblock.journalBackupType = 1 // s_jnl_backup_type: 1 = s_jnl_blocks[] holds a valid inode backup
if let mode = config.defaultMode {
switch mode {
case .writeback: superblock.defaultMountOpts = DefaultMountOpts.journalWriteback
case .ordered: superblock.defaultMountOpts = DefaultMountOpts.journalOrdered
case .journal: superblock.defaultMountOpts = DefaultMountOpts.journalData
}
}
}
superblock.featureCompat = compatFeatures

// Fields intentionally left at zero:
// s_r_blocks_count_lo: no blocks reserved for root
// s_mtime / s_wtime: never mounted/written; kernel updates on first access
// s_mnt_count / s_max_mnt_count: no forced-fsck-after-N-mounts policy
// s_lastcheck / s_checkinterval: no time-based fsck scheduling
// s_def_resuid / s_def_resgid: reserved blocks owned by uid/gid 0 (root)
// s_block_group_nr: this superblock resides in group 0
// s_volume_name: no volume label
// s_last_mounted: no recorded prior mount path
// s_algorithm_usage_bitmap: obsolete compression field, not used
// s_prealloc_blocks / s_prealloc_dir_blocks: block preallocation not enabled
// s_reserved_gdt_blocks: online resize not supported
// s_journal_dev: journal is internal (inode 8), not on an external device
// s_last_orphan: fresh filesystem, no pending orphan cleanup
// s_hash_seed / s_def_hash_version: kernel initialises htree hash seed at first mount
// s_first_meta_bg: meta block group feature not enabled
// s_mkfs_time: creation timestamp not recorded
// s_raid_stride / s_mmp_interval / s_mmp_block / s_raid_stripe_width: no RAID or MMP
// s_checksum_type / s_checksum_seed: metadata checksums not enabled (no csum feature bit)
// s_snapshot_*: snapshot feature not enabled
// s_error_count / s_first_error_* / s_last_error_*: fresh filesystem, no recorded errors
// s_usr_quota_inum / s_grp_quota_inum / s_prj_quota_inum: quotas not enabled
// s_overhead_clusters: kernel computes dynamically; zero is always safe
// s_backup_bgs: sparse_super2 active but no secondary backup groups requested
// s_encrypt_algos / s_encrypt_pw_salt: encryption not enabled
// s_checksum: superblock checksum not enabled (no metadata_csum feature bit)

try withUnsafeLittleEndianBytes(of: superblock) { bytes in
try self.handle.write(contentsOf: bytes)
}
try self.handle.write(contentsOf: Array<UInt8>.init(repeating: 0, count: 2048))
}

// MARK: Private Methods and Properties
private var handle: FileHandle
private var inodes: [Ptr<Inode>]
// MARK: Private and internal methods and properties
private var tree: FileTree
private var deletedBlocks: [(start: UInt32, end: UInt32)] = []

private var pos: UInt64 {
// internally accessed by journal setup
var handle: FileHandle
var inodes: [Ptr<Inode>]
let journalConfig: JournalConfig?

var pos: UInt64 {
guard let offset = try? self.handle.offset() else {
return 0
}
return offset
}

private var currentBlock: UInt32 {
var currentBlock: UInt32 {
self.pos / self.blockSize
}

private func seek(block: UInt32) throws {
func seek(block: UInt32) throws {
try self.handle.seek(toOffset: UInt64(block) * blockSize)
}

Expand Down Expand Up @@ -1022,7 +1085,7 @@ extension EXT4 {
}
}

private func writeExtents(_ inode: Inode, _ blocks: (start: UInt32, end: UInt32)) throws -> Inode {
func writeExtents(_ inode: Inode, _ blocks: (start: UInt32, end: UInt32)) throws -> Inode {
var inode = inode
// rest of code assumes that extents MUST go into a new block
if self.pos % self.blockSize != 0 {
Expand Down Expand Up @@ -1226,6 +1289,8 @@ extension EXT4 {
case cannotTruncateFile(_ path: FilePath)
case cannotCreateSparseFile(_ path: FilePath)
case cannotResizeFS(_ size: UInt64)
case journalTooSmall(_ size: UInt64)
case journalTooLarge(_ size: UInt64)
public var description: String {
switch self {
case .notDirectory(let path):
Expand Down Expand Up @@ -1258,6 +1323,10 @@ extension EXT4 {
return "cannot create sparse file at \(path)"
case .cannotResizeFS(let size):
return "cannot resize fs to \(size) bytes"
case .journalTooSmall(let size):
return "requested journal size \(size) bytes is too small; minimum is \(EXT4.MinJournalBlocks) blocks (JBD2_MIN_JOURNAL_BLOCKS)"
case .journalTooLarge(let size):
return "requested journal size \(size) bytes exceeds half the filesystem size"
}
}
}
Expand Down
196 changes: 196 additions & 0 deletions Sources/ContainerizationEXT4/EXT4+Journal.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
//===----------------------------------------------------------------------===//
// Copyright © 2026 Apple Inc. and the Containerization project authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//===----------------------------------------------------------------------===//

import ContainerizationOS
import Foundation

// JBD2 on-disk format reference:
// https://www.kernel.org/doc/html/latest/filesystems/ext4/journal.html

extension EXT4.Formatter {
/// Entry point called from close() when journaling is enabled.
func initializeJournal(
config: EXT4.JournalConfig,
filesystemUUID: (
UInt8, UInt8, UInt8, UInt8, UInt8, UInt8, UInt8, UInt8,
UInt8, UInt8, UInt8, UInt8, UInt8, UInt8, UInt8, UInt8
)
) throws {
let journalBlocks = try calculateJournalSize(requestedSize: config.size, totalBlocks: blockCount)
// Align to block boundary before recording start.
if self.pos % self.blockSize != 0 {
try self.seek(block: self.currentBlock + 1)
}
let journalStartBlock = self.currentBlock
try writeJournalSuperblock(journalBlocks: journalBlocks, filesystemUUID: filesystemUUID)
try zeroJournalBlocks(count: journalBlocks - 1)
try setupJournalInode(startBlock: journalStartBlock, blockCount: journalBlocks)
}

// MARK: - Private helpers

private func calculateJournalSize(requestedSize: UInt64?, totalBlocks: UInt32) throws -> UInt32 {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

JournalSize needs to account for optimizeBlockGroupLayout

The totalBlocks when this gets called is an estimate based on blockSize and diskSize. However, based on how much data is written, the layout changes (right after close() gets called.

The fix would be to either defer the size validation until after close() knows the real final block count, or to have calculateJournalSize use the post-optimizeBlockGroupLayout block count (i.e., account for the fact that close() will expand the image to fit). The totalBlocks / 2 guard should also be applied consistently to both paths.

if let size = requestedSize {
let blocks = size / UInt64(self.blockSize)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to check that blocks > 0 here?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the numerator and denominator are both UInt64 (self.blockSize is UInt32) so that check would be dead code

Copy link
Copy Markdown
Contributor

@dkovba dkovba Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

block would be 0 if size was less than self.blockSize.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My brain is fried, I thought I was seeing "do we need to check for negative?"

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually this needs to be bounded at 1024 blocks minimum...

// JBD2_MIN_JOURNAL_BLOCKS: the kernel refuses to mount with fewer.
// blocks == 0 would also cause a UInt32 underflow in the caller.
guard blocks >= EXT4.MinJournalBlocks else {
throw EXT4.Formatter.Error.journalTooSmall(size)
}
guard blocks <= UInt64(totalBlocks) / 2 else {
throw EXT4.Formatter.Error.journalTooLarge(size)
}
// Safe: blocks ≤ totalBlocks / 2 ≤ UInt32.max / 2, so narrowing cannot trap.
return UInt32(blocks)
}
// Default sizing: scale with the filesystem, with a floor determined by JBD2_MIN_JOURNAL_BLOCKS
// and a ceiling that follows e2fsprogs convention: 128 MiB for filesystems up to 128 GiB,
// and 1 GiB for larger filesystems. The larger ceiling was introduced in e2fsprogs 1.43.2:
// https://e2fsprogs.sourceforge.net/e2fsprogs-release.html#1.43.2
let fsBytes = UInt64(totalBlocks) * UInt64(self.blockSize)
let scaledBytes = fsBytes / 64 // 1/64th of the filesystem, matching e2fsprogs defaults
let minBytes: UInt64 = UInt64(EXT4.MinJournalBlocks) * UInt64(self.blockSize)
let maxBytes: UInt64 = fsBytes > 128.gib() ? 1.gib() : 128.mib()
let clampedBytes = min(max(scaledBytes, minBytes), maxBytes)
// Safe: clampedBytes ≤ 1 GiB and blockSize ≥ 1, so the quotient fits in UInt32.
return UInt32(clampedBytes / UInt64(self.blockSize))
}

private func writeJournalSuperblock(
journalBlocks: UInt32,
filesystemUUID: (
UInt8, UInt8, UInt8, UInt8, UInt8, UInt8, UInt8, UInt8,
UInt8, UInt8, UInt8, UInt8, UInt8, UInt8, UInt8, UInt8
)
) throws {
// Safe: blockSize is UInt32; widening to Int (64-bit on all supported platforms) never truncates.
var buf = [UInt8](repeating: 0, count: Int(self.blockSize))

func writeU32(_ value: UInt32, at offset: Int) {
buf[offset] = UInt8((value >> 24) & 0xFF)
buf[offset + 1] = UInt8((value >> 16) & 0xFF)
buf[offset + 2] = UInt8((value >> 8) & 0xFF)
buf[offset + 3] = UInt8(value & 0xFF)
}

// JBD2 block header (§3.6.3): https://www.kernel.org/doc/html/latest/filesystems/ext4/journal.html#block-header
writeU32(EXT4.JournalMagic, at: 0x00) // h_magic
writeU32(4, at: 0x04) // h_blocktype = superblock v2
writeU32(1, at: 0x08) // h_sequence

// JBD2 superblock body (§3.6.4): https://www.kernel.org/doc/html/latest/filesystems/ext4/journal.html#super-block
writeU32(self.blockSize, at: 0x0C) // s_blocksize
writeU32(journalBlocks, at: 0x10) // s_maxlen
writeU32(1, at: 0x14) // s_first (first usable block)
writeU32(1, at: 0x18) // s_sequence
// 0x1C s_start: left zero — kernel treats zero as "journal empty, begin at s_first"
// 0x20 s_errno: left zero — no prior abort error
// 0x24 s_feature_compat: left zero — no optional features (e.g. data-block checksums)
// 0x28 s_feature_incompat: left zero — non-zero unrecognised flags would cause mount refusal
// 0x2C s_feature_ro_compat: left zero — no flags defined by the spec

// s_uuid at 0x30 (16 bytes)
let uuidBytes = [
filesystemUUID.0, filesystemUUID.1, filesystemUUID.2, filesystemUUID.3,
filesystemUUID.4, filesystemUUID.5, filesystemUUID.6, filesystemUUID.7,
filesystemUUID.8, filesystemUUID.9, filesystemUUID.10, filesystemUUID.11,
filesystemUUID.12, filesystemUUID.13, filesystemUUID.14, filesystemUUID.15,
]
buf[0x30..<0x40] = uuidBytes[...]

writeU32(1, at: 0x40) // s_nr_users

let maxTrans = min(journalBlocks / 4, 32768)
writeU32(maxTrans, at: 0x48) // s_max_transaction
writeU32(maxTrans, at: 0x4C) // s_max_trans_data

// s_users[0] at 0x100 (first entry of 768-byte users array)
buf[0x100..<0x110] = uuidBytes[...]

try self.handle.write(contentsOf: buf)
}

private func zeroJournalBlocks(count: UInt32) throws {
guard count > 0 else { return }
let chunkSize = 1.mib()
// Safe: both operands are UInt32, so their product peaks at ~17 TiB, which fits
// in Int64 (the width of Int on all 64-bit Apple platforms).
let totalBytes = Int(count) * Int(self.blockSize)
let zeroBuf = [UInt8](repeating: 0, count: min(Int(chunkSize), totalBytes))
var remaining = totalBytes
while remaining > 0 {
let toWrite = min(zeroBuf.count, remaining)
try self.handle.write(contentsOf: zeroBuf[0..<toWrite])
remaining -= toWrite
}
}

private func setupJournalInode(startBlock: UInt32, blockCount: UInt32) throws {
var journalInode = EXT4.Inode()
journalInode.mode = EXT4.Inode.Mode(.S_IFREG, 0o600)
journalInode.uid = 0
journalInode.gid = 0
let size = UInt64(blockCount) * UInt64(self.blockSize)
journalInode.sizeLow = size.lo
journalInode.sizeHigh = size.hi
let now = Date().fs()
journalInode.atime = now.lo
journalInode.atimeExtra = now.hi
journalInode.ctime = now.lo
journalInode.ctimeExtra = now.hi
journalInode.mtime = now.lo
journalInode.mtimeExtra = now.hi
journalInode.crtime = now.lo
journalInode.crtimeExtra = now.hi
journalInode.linksCount = 1
journalInode.extraIsize = UInt16(EXT4.ExtraIsize)
journalInode.flags = EXT4.InodeFlag.extents.rawValue | EXT4.InodeFlag.hugeFile.rawValue

// Journal is one contiguous allocation → numExtents = 1 → extent tree fits inline
// in the inode, so writeExtents needs no extra disk I/O for extent index blocks.
// Safe: blockCount is at most UInt32.max and startBlock ≥ 0, so the addition could
// theoretically overflow — but zeroJournalBlocks would have already failed with an
// I/O error if the journal extended past the end of the filesystem image.
journalInode = try self.writeExtents(journalInode, (startBlock, startBlock + blockCount))

self.inodes[Int(EXT4.JournalInode) - 1].initialize(to: journalInode)
}

func journalInodeBlockBackup() -> (
UInt32, UInt32, UInt32, UInt32, UInt32, UInt32, UInt32, UInt32,
UInt32, UInt32, UInt32, UInt32, UInt32, UInt32, UInt32, UInt32,
UInt32
) {
let ji = self.inodes[Int(EXT4.JournalInode) - 1].pointee
// s_jnl_blocks layout (§4.1.2): first 15 words = i_block[] extent-tree data,
// 16th word (index 15) = i_size_high, 17th word (index 16) = i_size.
var words = [UInt32](repeating: 0, count: 17)
withUnsafeBytes(of: ji.block) { bytes in
for i in 0..<15 {
words[i] = bytes.load(fromByteOffset: i * 4, as: UInt32.self)
}
}
words[15] = ji.sizeHigh // i_size_high (16th element per spec)
words[16] = ji.sizeLow // i_size (17th element per spec)
return (
words[0], words[1], words[2], words[3],
words[4], words[5], words[6], words[7],
words[8], words[9], words[10], words[11],
words[12], words[13], words[14], words[15],
words[16]
)
}
}
10 changes: 10 additions & 0 deletions Sources/ContainerizationEXT4/EXT4+Types.swift
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,16 @@ extension EXT4 {
public var checksum: UInt32 = 0
}

static let JournalMagic: UInt32 = 0xC03B_3998
static let JournalInode: InodeNumber = 8
static let MinJournalBlocks: UInt32 = 1024 // JBD2_MIN_JOURNAL_BLOCKS

struct DefaultMountOpts {
static let journalData: UInt32 = 0x0020 // data=journal
static let journalOrdered: UInt32 = 0x0040 // data=ordered
static let journalWriteback: UInt32 = 0x0060 // data=writeback
}

struct CompatFeature {
let rawValue: UInt32

Expand Down
Loading
Loading