From 26cd4678a8f976cf80242392d3890b66208716f5 Mon Sep 17 00:00:00 2001 From: Martin Vogel Date: Sat, 4 Jul 2026 21:33:58 +0200 Subject: [PATCH] fix(mem): use real /proc RSS on Linux (mimalloc current_rss undercounts, blinding backpressure) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Distilled from #776's 132460f5. This project sets arena_eager_commit=0 + purge_decommits=1 + purge_delay=0 in cbm_mem_init to reduce upfront memory, so mimalloc's committed-page counter reads low. On Linux mi_process_info() never sets current_rss (vendored/mimalloc/src/prim/unix/prim.c only fills peak_rss), so current_rss defaults to that low committed counter — cbm_mem_rss() returned a few MB while true RSS was multiple GB, leaving cbm_mem_over_budget() backpressure, the memory ceiling, and the host RAM tier blind on Linux. Prefer os_rss() (/proc/self/statm) as the primary source on Linux; macOS and Windows are unchanged (their mi_process_info current_rss is accurate via task_info / GetProcessMemoryInfo). cbm_mem_peak_rss() is untouched. Complements #752 by making the RAM tiers actually bite on Linux. Reproduce-first: mem_rss_reflects_external_resident_memory pins mimalloc's committed counter low with a live mi_malloc, then grows true RSS via a raw 256MB mmap (invisible to mimalloc); unfixed Linux returns the ~few-MB committed counter (RED), fixed Linux returns /proc RSS (GREEN). macOS/Windows pass either way, so the RED manifests on the Linux CI leg. Co-authored-by: petercoxphoto Signed-off-by: Martin Vogel --- src/foundation/mem.c | 26 ++++++++++++++++- tests/test_mem.c | 68 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) diff --git a/src/foundation/mem.c b/src/foundation/mem.c index 46494aad2..c55bfe05b 100644 --- a/src/foundation/mem.c +++ b/src/foundation/mem.c @@ -151,13 +151,37 @@ void cbm_mem_init(double ram_fraction) { } size_t cbm_mem_rss(void) { +#if defined(__linux__) + /* Linux: mimalloc's _mi_prim_process_info() (vendored/mimalloc/src/prim/ + * unix/prim.c) never sets pinfo->current_rss on Linux — it only sets + * peak_rss (from getrusage's ru_maxrss). current_rss therefore keeps + * mi_process_info()'s default of pinfo.current_commit: mimalloc's OWN + * committed-page counter, which this project deliberately tunes low via + * mi_option_arena_eager_commit=0 + purge_decommits=1 + purge_delay=0 + * (cbm_mem_init) to reduce upfront memory. So on Linux "current_rss" is a + * low-biased mimalloc-internal metric, not true RSS: under concurrent + * large-file parsing it can read a few MB while real RSS is multiple GB, + * silently blinding cbm_mem_over_budget()'s backpressure to real memory + * pressure (small-but-nonzero, so the `current_rss > 0` guard below never + * catches it). os_rss() reads /proc/self/statm — authoritative OS RSS, + * unaffected by mimalloc's accounting — so it is the PRIMARY source on + * Linux, not a last-resort fallback. macOS/Windows are unaffected: + * mimalloc sets current_rss correctly there via task_info / + * GetProcessMemoryInfo. */ + size_t proc_rss = os_rss(); + if (proc_rss > 0) { + return proc_rss; + } + /* Extremely unlikely (/proc unavailable) — fall through to mimalloc. */ +#endif size_t current_rss = 0; size_t peak_rss = 0; mi_process_info(NULL, NULL, NULL, ¤t_rss, &peak_rss, NULL, NULL, NULL); if (current_rss > 0) { return current_rss; } - /* Fallback for ASan builds (MI_OVERRIDE=0) */ + /* Fallback for ASan builds (MI_OVERRIDE=0) and any platform where + * mimalloc's current_rss is unavailable/zero. */ return os_rss(); } diff --git a/tests/test_mem.c b/tests/test_mem.c index 26bf04f1b..2ed8ac01d 100644 --- a/tests/test_mem.c +++ b/tests/test_mem.c @@ -16,6 +16,10 @@ #include #include +#include +#ifndef _WIN32 +#include +#endif /* ASan detection — mimalloc MI_OVERRIDE=0 under ASan, mi_process_info * may return 0 for RSS. Tests that depend on accurate RSS must skip. */ @@ -183,6 +187,69 @@ TEST(mem_collect_no_crash) { PASS(); } +/* Reproduce-first guard for the Linux cbm_mem_rss() undercount (distilled + * from #776's 132460f5). + * + * On Linux, mimalloc's mi_process_info() never sets current_rss + * (vendored/mimalloc/src/prim/unix/prim.c only fills peak_rss from + * getrusage's ru_maxrss); current_rss silently keeps mi_process_info()'s + * default of pinfo.current_commit — mimalloc's OWN committed-page counter + * (stats.c:555). The UNFIXED cbm_mem_rss() returns that counter whenever it is + * nonzero, so on Linux it reports mimalloc-committed bytes, NOT true RSS. The + * FIXED code reads /proc/self/statm (os_rss) as the primary source → true RSS. + * + * The guard makes the two quantities DIVERGE deterministically: + * 1. mi_malloc() a small block (kept live) so mimalloc's committed counter is + * a small POSITIVE value — this both defeats the UNFIXED `current_rss > 0` + * fallback guard AND pins the reported value low. mi_malloc always routes + * through mimalloc regardless of MI_OVERRIDE, so this works in the ASan + * test-runner (MI_OVERRIDE=0) too. + * 2. Grow TRUE process RSS by ~256MB via a raw anonymous mmap — memory + * mimalloc's committed counter never sees, but /proc/self/statm does. + * On UNFIXED Linux, cbm_mem_rss() then returns the ~few-MB committed counter + * (< 128MB) → this assertion FAILS (RED). On FIXED Linux it returns the /proc + * RSS (>= 256MB) → GREEN. + * + * macOS/Windows set current_rss from task_info/GetProcessMemoryInfo, which DO + * include the mapped+touched region, so cbm_mem_rss() is accurate there both + * before and after the fix — this passes on those platforms either way. The + * RED therefore manifests only on the Linux CI leg, which is exactly where the + * production undercount bit (backpressure/ceiling blinded). */ +TEST(mem_rss_reflects_external_resident_memory) { + cbm_mem_init(0.5); + + /* (1) Pin mimalloc's committed-page counter to a small positive value. */ + const size_t warm = (size_t)1 * 1024 * 1024; /* 1 MB via mimalloc */ + void *mi_buf = mi_malloc(warm); + ASSERT_NOT_NULL(mi_buf); + memset(mi_buf, 0x11, warm); + + const size_t region = (size_t)256 * 1024 * 1024; /* 256 MB true RSS */ + const size_t threshold = (size_t)128 * 1024 * 1024; /* generous half */ + +#ifdef _WIN32 + /* Windows current_rss (WorkingSetSize) is accurate; a plain resident + * allocation is reflected regardless of allocator. No Linux undercount. */ + void *big = malloc(region); + ASSERT_NOT_NULL(big); + memset(big, 0x5A, region); + size_t rss = cbm_mem_rss(); + ASSERT_GTE(rss, threshold); + free(big); +#else + /* (2) Raw mmap bypasses mimalloc entirely: its committed counter does NOT + * grow, but the true RSS does — this is what exposes the Linux undercount. */ + void *big = mmap(NULL, region, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_TRUE(big != MAP_FAILED); + memset(big, 0x5A, region); /* fault every page in → resident */ + size_t rss = cbm_mem_rss(); + ASSERT_GTE(rss, threshold); + munmap(big, region); +#endif + mi_free(mi_buf); + PASS(); +} + TEST(mem_collect_rss_still_positive) { cbm_mem_init(0.5); cbm_mem_collect(); @@ -803,6 +870,7 @@ SUITE(mem) { RUN_TEST(mem_rss_positive); RUN_TEST(mem_peak_rss_gte_rss); RUN_TEST(mem_rss_increases_after_alloc); + RUN_TEST(mem_rss_reflects_external_resident_memory); RUN_TEST(mem_collect_no_crash); RUN_TEST(mem_collect_rss_still_positive); /* Memory pressure simulation */