From 1db86704a3ffb2c3e0c9ee9239d5aea10dcf3bf5 Mon Sep 17 00:00:00 2001 From: Damian Dimanov Date: Fri, 20 Feb 2026 15:38:23 +0200 Subject: [PATCH] Show multiple user PF's in the output --- sigsegv-monitor.bpf.c | 117 ++++++++++++++++++++++++++++++++---------- sigsegv-monitor.c | 16 ++++-- sigsegv-monitor.h | 11 +++- 3 files changed, 112 insertions(+), 32 deletions(-) diff --git a/sigsegv-monitor.bpf.c b/sigsegv-monitor.bpf.c index 49f28e8..a5f0e56 100644 --- a/sigsegv-monitor.bpf.c +++ b/sigsegv-monitor.bpf.c @@ -21,12 +21,56 @@ struct trace_event_raw_page_fault_user { char __data[0]; }; +struct cr2_stat { + __u64 cr2; + __u64 err; + __u64 tai; +}; + +struct cr2_stats { + struct cr2_stat stat[MAX_USER_PF_ENTRIES]; + __u64 head; + __u64 count; +}; + struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 1024); __type(key, u32); - __type(value, u64); + __type(value, struct cr2_stats); } tgid_cr2 SEC(".maps"); + +inline void cr2stats_init(struct cr2_stats* stats) { + stats->head = 0; + stats->count = 0; +} + +inline void cr2stats_push(struct cr2_stats* stats, struct cr2_stat* value) { + if (stats->head < MAX_USER_PF_ENTRIES) { + stats->stat[stats->head] = *value; + + if (++stats->head >= MAX_USER_PF_ENTRIES) + stats->head -= MAX_USER_PF_ENTRIES; + + if (++stats->count > MAX_USER_PF_ENTRIES) + stats->count = MAX_USER_PF_ENTRIES; + } +} + +inline struct cr2_stat* cr2stats_get(struct cr2_stats* stats, u32 index) { + if (stats->count == MAX_USER_PF_ENTRIES) { + index += stats->head; + if (index >= MAX_USER_PF_ENTRIES) { + index -= MAX_USER_PF_ENTRIES; + } + } + + if (index < MAX_USER_PF_ENTRIES) { + return stats->stat + index; + } + + return NULL; +} #endif // Output map (for user space) @@ -75,24 +119,24 @@ int trace_sigsegv(struct trace_event_raw_signal_generate *ctx) { bpf_probe_read_kernel_str(&event->tgleader_comm, sizeof(event->tgleader_comm), &task->group_leader->comm); // TODO: can the acquisition of pidns_tgid, pidns_pid be made more robust / simplified? { - struct pid const* thread_pid = task->thread_pid; - unsigned int const level = thread_pid->level; - // thread_pid->numbers is a size-one flexible array member (type numbers[1]) - // => cannot perform bounds-check against BTF information - // => need bpf_probe_read_kernel to read from indices potentially > 1 - struct upid const* upid_inv = &thread_pid->numbers[level]; - event->pidns_pid = BPF_CORE_READ(upid_inv, nr); // we already have implicit CO-RE, but we need the probe function call - } - { - struct pid const* tgid_pid = task->signal->pids[PIDTYPE_TGID]; - unsigned int const level = tgid_pid->level; - struct upid const* tgid_upid_inv = &tgid_pid->numbers[level]; - // TODO: doesn't this return the pid in the NS of the tg leader, instead of the pid in the NS of the current thread? - // TODO: don't we need RCU here? - event->pidns_tgid = BPF_CORE_READ(tgid_upid_inv, nr); - } - - event->regs.trapno = task->thread.trap_nr; // TODO: also copy the other fields like cr2 and error_code + struct pid const* thread_pid = task->thread_pid; + unsigned int const level = thread_pid->level; + // thread_pid->numbers is a size-one flexible array member (type numbers[1]) + // => cannot perform bounds-check against BTF information + // => need bpf_probe_read_kernel to read from indices potentially > 1 + struct upid const* upid_inv = &thread_pid->numbers[level]; + event->pidns_pid = BPF_CORE_READ(upid_inv, nr); // we already have implicit CO-RE, but we need the probe function call + } + { + struct pid const* tgid_pid = task->signal->pids[PIDTYPE_TGID]; + unsigned int const level = tgid_pid->level; + struct upid const* tgid_upid_inv = &tgid_pid->numbers[level]; + // TODO: doesn't this return the pid in the NS of the tg leader, instead of the pid in the NS of the current thread? + // TODO: don't we need RCU here? + event->pidns_tgid = BPF_CORE_READ(tgid_upid_inv, nr); + } + + event->regs.trapno = task->thread.trap_nr; event->regs.err = task->thread.error_code; // TODO: how are these regs acquired? @@ -119,14 +163,24 @@ int trace_sigsegv(struct trace_event_raw_signal_generate *ctx) { event->regs.flags = regs->flags; event->regs.cr2 = task->thread.cr2; - event->regs.cr2_fault = -1; + event->cr2_userpf_entry_count = 0; #ifdef TRACE_PF_CR2 u32 tgid = task->tgid; - u64 *cr2 = bpf_map_lookup_elem(&tgid_cr2, &tgid); + struct cr2_stats *cr2stats = bpf_map_lookup_elem(&tgid_cr2, &tgid); + + if (cr2stats) { + for (u32 i = 0; i < cr2stats->count && i < MAX_USER_PF_ENTRIES; i++) { + struct cr2_stat* stat = cr2stats_get(cr2stats, i); + if (stat) { + event->regs.cr2_faults[i] = stat->cr2; + event->regs.cr2_errors[i] = stat->err; + event->cr2_tai[i] = stat->tai; + + ++event->cr2_userpf_entry_count; + } + } - if (cr2) { - event->regs.cr2_fault = *cr2; bpf_map_delete_elem(&tgid_cr2, &tgid); } #endif @@ -149,13 +203,24 @@ int trace_sigsegv(struct trace_event_raw_signal_generate *ctx) { #ifdef TRACE_PF_CR2 SEC("tracepoint/exceptions/page_fault_user") int trace_page_fault(struct trace_event_raw_page_fault_user *ctx) { - u64 cr2; + struct cr2_stat stat; u32 tgid; - cr2 = ctx->address; + stat.cr2 = ctx->address; + stat.err = ctx->error_code; + stat.tai = bpf_ktime_get_tai_ns(); tgid = bpf_get_current_pid_tgid() >> 32; - bpf_map_update_elem(&tgid_cr2, &tgid, &cr2, BPF_ANY); + struct cr2_stats *cr2stats = bpf_map_lookup_elem(&tgid_cr2, &tgid); + if (cr2stats) { + cr2stats_push(cr2stats, &stat); + } else { + struct cr2_stats new_stats; + cr2stats_init(&new_stats); + cr2stats_push(&new_stats, &stat); + + bpf_map_update_elem(&tgid_cr2, &tgid, &new_stats, BPF_ANY); + } return 0; } diff --git a/sigsegv-monitor.c b/sigsegv-monitor.c index d8a7ac9..83457a0 100644 --- a/sigsegv-monitor.c +++ b/sigsegv-monitor.c @@ -69,6 +69,9 @@ void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) { printf("{\"cpu\":%d,", cpu); printf("\"tai\":%llu,", e->tai); + for (u32 i = 0; i < e->cr2_userpf_entry_count; i++) { + printf("\"cr2_tai_%u\":%llu,", i, e->cr2_tai[i]); + } printf("\"process\":{\"rootns_pid\":%d,\"ns_pid\":%d,\"comm\":\"%s\"},", e->tgid, e->pidns_tgid, e->tgleader_comm); printf("\"thread\":{\"rootns_tid\":%d,\"ns_tid\":%d,\"comm\":\"%s\"},", e->pid, e->pidns_pid, e->comm); printf("\"si_code\":%d,", e->si_code); @@ -94,10 +97,15 @@ void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) { printf("\"trapno\":\"0x%016llx\",", e->regs.trapno); printf("\"err\":\"0x%016llx\",", e->regs.err); printf("\"cr2\":\"0x%016llx\",", e->regs.cr2); - if (e->regs.cr2_fault != (u64)-1) - printf("\"cr2_fault\":\"0x%016llx\"", e->regs.cr2_fault); - else - printf("\"cr2_fault\":null"); + for (u32 i = 0; i < e->cr2_userpf_entry_count; i++) + { + printf("\"cr2_fault_%u\":\"0x%016llx\",", i, e->regs.cr2_faults[i]); + printf("\"cr2_err_%u\":\"0x%016llx\"", i, e->regs.cr2_errors[i]); + + if (i + 1 != e->cr2_userpf_entry_count) { + printf(","); + } + } printf("},"); printf("\"lbr\":["); diff --git a/sigsegv-monitor.h b/sigsegv-monitor.h index 1465aba..e369a8c 100644 --- a/sigsegv-monitor.h +++ b/sigsegv-monitor.h @@ -1,8 +1,11 @@ #pragma once - #define MAX_LBR_ENTRIES 32 +// Must be pow2 +#define MAX_USER_PF_ENTRIES 16 + + struct user_regs_t { u64 rip; u64 rsp; @@ -25,13 +28,16 @@ struct user_regs_t { u64 trapno; u64 err; u64 cr2; - u64 cr2_fault; + u64 cr2_faults[MAX_USER_PF_ENTRIES]; + u64 cr2_errors[MAX_USER_PF_ENTRIES]; }; // WARNING: this is for the SENDING process (e.g. pid) of the signal! struct event_t { int si_code; + u32 cr2_userpf_entry_count; + u32 tgid; // the PROCESS id! u32 pidns_tgid; // the PROCESS id within the innermost pid namespace of the process char tgleader_comm[16]; // the PROCESS name @@ -45,4 +51,5 @@ struct event_t { struct perf_branch_entry lbr[MAX_LBR_ENTRIES]; u64 tai; // time atomic international + u64 cr2_tai[MAX_USER_PF_ENTRIES]; };