From 63e7a9331263a0ebdb422bea784164c29ff5511f Mon Sep 17 00:00:00 2001 From: Max042004 Date: Sat, 6 Jun 2026 17:54:18 +0800 Subject: [PATCH] Translate fcntl flock l_type between Linux and macOS The F_GETLK/F_SETLK/F_SETLKW handler copied the guest's l_type field straight into the host struct flock, on the assumption (stated in a comment) that the lock-type constants match. They do not: Linux aarch64: F_RDLCK=0, F_WRLCK=1, F_UNLCK=2 macOS/BSD: F_RDLCK=1, F_UNLCK=2, F_WRLCK=3 A Linux F_RDLCK (0) is not a valid type on macOS, so the host fcntl() rejected it with EINVAL. POSIX-locking databases take a shared read lock first -- SQLite, for example, fails to open any on-disk database with "disk I/O error" because its initial F_RDLCK on the pending byte never succeeds. Map l_type Linux->macOS on input for all three commands, and map it back macOS->Linux when writing the F_GETLK result, rejecting unknown types with EINVAL. Add test-flock, which locks around SQLite's 1GiB pending-byte offsets; it fails with errno 22 before this change and passes after. --- src/syscall/fs.c | 39 +++++++++++++++++- tests/manifest.txt | 1 + tests/test-flock.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 137 insertions(+), 2 deletions(-) create mode 100644 tests/test-flock.c diff --git a/src/syscall/fs.c b/src/syscall/fs.c index 2465ca4..3de2e1b 100644 --- a/src/syscall/fs.c +++ b/src/syscall/fs.c @@ -655,11 +655,33 @@ int64_t sys_fcntl(guest_t *g, int fd, int cmd, uint64_t arg) memcpy(&l_start, lflock + 8, 8); /* offset 8 due to padding */ memcpy(&l_len, lflock + 16, 8); + /* l_type constants differ between Linux and macOS/BSD: + * Linux: F_RDLCK=0, F_WRLCK=1, F_UNLCK=2 + * macOS: F_RDLCK=1, F_UNLCK=2, F_WRLCK=3 + * Passing the Linux value straight through makes a Linux F_RDLCK (0) + * an invalid type on macOS, which fcntl() rejects with EINVAL. This is + * the lock POSIX databases (e.g. SQLite) take first, so it must map. */ + short mac_type; + switch (l_type) { + case 0: /* LINUX_F_RDLCK */ + mac_type = F_RDLCK; + break; + case 1: /* LINUX_F_WRLCK */ + mac_type = F_WRLCK; + break; + case 2: /* LINUX_F_UNLCK */ + mac_type = F_UNLCK; + break; + default: + host_fd_ref_close(&host_ref); + return -LINUX_EINVAL; + } + struct flock mac_fl = { .l_start = l_start, .l_len = l_len, .l_pid = 0, - .l_type = l_type, /* F_RDLCK=0, F_WRLCK=1, F_UNLCK=2 same on both */ + .l_type = mac_type, .l_whence = l_whence, /* SEEK_SET=0, SEEK_CUR=1, SEEK_END=2 same */ }; @@ -671,7 +693,20 @@ int64_t sys_fcntl(guest_t *g, int fd, int cmd, uint64_t arg) /* For F_GETLK, write back the result */ if (cmd == 5) { - int16_t rt = mac_fl.l_type, rw = mac_fl.l_whence; + /* Map macOS l_type back to Linux constants (see above). */ + int16_t rt; + switch (mac_fl.l_type) { + case F_RDLCK: + rt = 0; /* LINUX_F_RDLCK */ + break; + case F_WRLCK: + rt = 1; /* LINUX_F_WRLCK */ + break; + default: + rt = 2; /* LINUX_F_UNLCK */ + break; + } + int16_t rw = mac_fl.l_whence; int64_t rs = mac_fl.l_start, rl = mac_fl.l_len; int32_t rp = mac_fl.l_pid; memset(lflock, 0, sizeof(lflock)); diff --git a/tests/manifest.txt b/tests/manifest.txt index ff9631b..dc81e24 100644 --- a/tests/manifest.txt +++ b/tests/manifest.txt @@ -42,6 +42,7 @@ test-socket [section] Syscall coverage tests test-file-ops +test-flock test-sysinfo test-io-opt test-syscall-smoke diff --git a/tests/test-flock.c b/tests/test-flock.c new file mode 100644 index 0000000..28b83ba --- /dev/null +++ b/tests/test-flock.c @@ -0,0 +1,99 @@ +/* Test POSIX advisory record locking via fcntl(F_SETLK/F_GETLK/F_SETLKW) + * + * Copyright 2026 elfuse contributors + * Copyright 2025 Moritz Angermann, zw3rk pte. ltd. + * SPDX-License-Identifier: Apache-2.0 + * + * Regression coverage for the Linux<->macOS struct flock translation. The + * l_type constants differ between the two ABIs (Linux F_RDLCK=0/F_WRLCK=1, + * macOS F_RDLCK=1/F_WRLCK=3), so passing the guest value straight through to + * the host made the very first lock SQLite takes (a shared F_RDLCK) fail with + * EINVAL and surface as "disk I/O error". The byte offsets below mirror the + * ones SQLite locks around its 1GiB "pending byte". + */ + +#include +#include +#include +#include +#include + +#include "test-harness.h" + +#define PENDING_BYTE 0x40000000L +#define RESERVED_BYTE (PENDING_BYTE + 1) +#define SHARED_FIRST (PENDING_BYTE + 2) +#define SHARED_SIZE 510 + +static int set_lock(int fd, short type, off_t start, off_t len) +{ + struct flock fl = { + .l_type = type, + .l_whence = SEEK_SET, + .l_start = start, + .l_len = len, + }; + return fcntl(fd, F_SETLK, &fl); +} + +int main(void) +{ + int passes = 0, fails = 0; + const char *path = "/tmp/elfuse-test-flock.db"; + + int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0644); + if (fd < 0) { + perror("open"); + return 1; + } + + /* Shared read lock -- this is the call that regressed to EINVAL. */ + TEST("F_SETLK F_RDLCK (shared)"); + EXPECT_EQ(set_lock(fd, F_RDLCK, SHARED_FIRST, SHARED_SIZE), 0, + "shared read lock rejected"); + + /* Promote the pending byte to a write lock, then drop it. */ + TEST("F_SETLK F_WRLCK (pending)"); + EXPECT_EQ(set_lock(fd, F_WRLCK, PENDING_BYTE, 1), 0, + "pending write lock rejected"); + + TEST("F_SETLK F_WRLCK (reserved)"); + EXPECT_EQ(set_lock(fd, F_WRLCK, RESERVED_BYTE, 1), 0, + "reserved write lock rejected"); + + TEST("F_SETLK F_UNLCK (release shared)"); + EXPECT_EQ(set_lock(fd, F_UNLCK, SHARED_FIRST, SHARED_SIZE), 0, + "unlock rejected"); + + /* Blocking variant must take the same translation path. */ + TEST("F_SETLKW F_WRLCK"); + struct flock wfl = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 16, + }; + EXPECT_EQ(fcntl(fd, F_SETLKW, &wfl), 0, "F_SETLKW rejected"); + + /* F_GETLK on a region this process already write-locks must report back a + * Linux l_type. Linux reports F_UNLCK for locks held by the *same* owner, + * so the only thing we can assert portably is that the type round-trips to + * a valid Linux constant and the call succeeds. */ + TEST("F_GETLK round-trips l_type"); + struct flock gfl = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 16, + }; + int gr = fcntl(fd, F_GETLK, &gfl); + EXPECT_TRUE(gr == 0 && (gfl.l_type == F_UNLCK || gfl.l_type == F_RDLCK || + gfl.l_type == F_WRLCK), + "F_GETLK returned an invalid l_type"); + + close(fd); + unlink(path); + + SUMMARY("test-flock"); + return fails == 0 ? 0 : 1; +}