Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 91 additions & 26 deletions sigsegv-monitor.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,56 @@ struct trace_event_raw_page_fault_user {
char __data[0];
};

struct cr2_stat {
__u64 cr2;
__u64 err;
__u64 tai;
};

struct cr2_stats {
struct cr2_stat stat[MAX_USER_PF_ENTRIES];
__u64 head;
__u64 count;
};

struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 1024);
__type(key, u32);
__type(value, u64);
__type(value, struct cr2_stats);
} tgid_cr2 SEC(".maps");

inline void cr2stats_init(struct cr2_stats* stats) {
stats->head = 0;
stats->count = 0;
}

inline void cr2stats_push(struct cr2_stats* stats, struct cr2_stat* value) {
if (stats->head < MAX_USER_PF_ENTRIES) {
stats->stat[stats->head] = *value;

if (++stats->head >= MAX_USER_PF_ENTRIES)
stats->head -= MAX_USER_PF_ENTRIES;

if (++stats->count > MAX_USER_PF_ENTRIES)
stats->count = MAX_USER_PF_ENTRIES;
}
}

inline struct cr2_stat* cr2stats_get(struct cr2_stats* stats, u32 index) {
if (stats->count == MAX_USER_PF_ENTRIES) {
index += stats->head;
if (index >= MAX_USER_PF_ENTRIES) {
index -= MAX_USER_PF_ENTRIES;
}
}

if (index < MAX_USER_PF_ENTRIES) {
return stats->stat + index;
}

return NULL;
}
#endif

// Output map (for user space)
Expand Down Expand Up @@ -75,24 +119,24 @@ int trace_sigsegv(struct trace_event_raw_signal_generate *ctx) {
bpf_probe_read_kernel_str(&event->tgleader_comm, sizeof(event->tgleader_comm), &task->group_leader->comm);
// TODO: can the acquisition of pidns_tgid, pidns_pid be made more robust / simplified?
{
struct pid const* thread_pid = task->thread_pid;
unsigned int const level = thread_pid->level;
// thread_pid->numbers is a size-one flexible array member (type numbers[1])
// => cannot perform bounds-check against BTF information
// => need bpf_probe_read_kernel to read from indices potentially > 1
struct upid const* upid_inv = &thread_pid->numbers[level];
event->pidns_pid = BPF_CORE_READ(upid_inv, nr); // we already have implicit CO-RE, but we need the probe function call
}
{
struct pid const* tgid_pid = task->signal->pids[PIDTYPE_TGID];
unsigned int const level = tgid_pid->level;
struct upid const* tgid_upid_inv = &tgid_pid->numbers[level];
// TODO: doesn't this return the pid in the NS of the tg leader, instead of the pid in the NS of the current thread?
// TODO: don't we need RCU here?
event->pidns_tgid = BPF_CORE_READ(tgid_upid_inv, nr);
}

event->regs.trapno = task->thread.trap_nr; // TODO: also copy the other fields like cr2 and error_code
struct pid const* thread_pid = task->thread_pid;
unsigned int const level = thread_pid->level;
// thread_pid->numbers is a size-one flexible array member (type numbers[1])
// => cannot perform bounds-check against BTF information
// => need bpf_probe_read_kernel to read from indices potentially > 1
struct upid const* upid_inv = &thread_pid->numbers[level];
event->pidns_pid = BPF_CORE_READ(upid_inv, nr); // we already have implicit CO-RE, but we need the probe function call
}
{
struct pid const* tgid_pid = task->signal->pids[PIDTYPE_TGID];
unsigned int const level = tgid_pid->level;
struct upid const* tgid_upid_inv = &tgid_pid->numbers[level];
// TODO: doesn't this return the pid in the NS of the tg leader, instead of the pid in the NS of the current thread?
// TODO: don't we need RCU here?
event->pidns_tgid = BPF_CORE_READ(tgid_upid_inv, nr);
}

event->regs.trapno = task->thread.trap_nr;
event->regs.err = task->thread.error_code;

// TODO: how are these regs acquired?
Expand All @@ -119,14 +163,24 @@ int trace_sigsegv(struct trace_event_raw_signal_generate *ctx) {
event->regs.flags = regs->flags;

event->regs.cr2 = task->thread.cr2;
event->regs.cr2_fault = -1;
event->cr2_userpf_entry_count = 0;

#ifdef TRACE_PF_CR2
u32 tgid = task->tgid;
u64 *cr2 = bpf_map_lookup_elem(&tgid_cr2, &tgid);
struct cr2_stats *cr2stats = bpf_map_lookup_elem(&tgid_cr2, &tgid);

if (cr2stats) {
for (u32 i = 0; i < cr2stats->count && i < MAX_USER_PF_ENTRIES; i++) {
struct cr2_stat* stat = cr2stats_get(cr2stats, i);
if (stat) {
event->regs.cr2_faults[i] = stat->cr2;
event->regs.cr2_errors[i] = stat->err;
event->cr2_tai[i] = stat->tai;

++event->cr2_userpf_entry_count;
}
}

if (cr2) {
event->regs.cr2_fault = *cr2;
bpf_map_delete_elem(&tgid_cr2, &tgid);
}
#endif
Expand All @@ -149,13 +203,24 @@ int trace_sigsegv(struct trace_event_raw_signal_generate *ctx) {
#ifdef TRACE_PF_CR2
SEC("tracepoint/exceptions/page_fault_user")
int trace_page_fault(struct trace_event_raw_page_fault_user *ctx) {
u64 cr2;
struct cr2_stat stat;
u32 tgid;

cr2 = ctx->address;
stat.cr2 = ctx->address;
stat.err = ctx->error_code;
stat.tai = bpf_ktime_get_tai_ns();
tgid = bpf_get_current_pid_tgid() >> 32;

bpf_map_update_elem(&tgid_cr2, &tgid, &cr2, BPF_ANY);
struct cr2_stats *cr2stats = bpf_map_lookup_elem(&tgid_cr2, &tgid);
if (cr2stats) {
cr2stats_push(cr2stats, &stat);
} else {
struct cr2_stats new_stats;
cr2stats_init(&new_stats);
cr2stats_push(&new_stats, &stat);

bpf_map_update_elem(&tgid_cr2, &tgid, &new_stats, BPF_ANY);
}

return 0;
}
Expand Down
16 changes: 12 additions & 4 deletions sigsegv-monitor.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) {

printf("{\"cpu\":%d,", cpu);
printf("\"tai\":%llu,", e->tai);
for (u32 i = 0; i < e->cr2_userpf_entry_count; i++) {
printf("\"cr2_tai_%u\":%llu,", i, e->cr2_tai[i]);
}
printf("\"process\":{\"rootns_pid\":%d,\"ns_pid\":%d,\"comm\":\"%s\"},", e->tgid, e->pidns_tgid, e->tgleader_comm);
printf("\"thread\":{\"rootns_tid\":%d,\"ns_tid\":%d,\"comm\":\"%s\"},", e->pid, e->pidns_pid, e->comm);
printf("\"si_code\":%d,", e->si_code);
Expand All @@ -94,10 +97,15 @@ void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) {
printf("\"trapno\":\"0x%016llx\",", e->regs.trapno);
printf("\"err\":\"0x%016llx\",", e->regs.err);
printf("\"cr2\":\"0x%016llx\",", e->regs.cr2);
if (e->regs.cr2_fault != (u64)-1)
printf("\"cr2_fault\":\"0x%016llx\"", e->regs.cr2_fault);
else
printf("\"cr2_fault\":null");
for (u32 i = 0; i < e->cr2_userpf_entry_count; i++)
{
printf("\"cr2_fault_%u\":\"0x%016llx\",", i, e->regs.cr2_faults[i]);
printf("\"cr2_err_%u\":\"0x%016llx\"", i, e->regs.cr2_errors[i]);

if (i + 1 != e->cr2_userpf_entry_count) {
printf(",");
}
}
printf("},");

printf("\"lbr\":[");
Expand Down
11 changes: 9 additions & 2 deletions sigsegv-monitor.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
#pragma once


#define MAX_LBR_ENTRIES 32

// Must be pow2
#define MAX_USER_PF_ENTRIES 16


struct user_regs_t {
u64 rip;
u64 rsp;
Expand All @@ -25,13 +28,16 @@ struct user_regs_t {
u64 trapno;
u64 err;
u64 cr2;
u64 cr2_fault;
u64 cr2_faults[MAX_USER_PF_ENTRIES];
u64 cr2_errors[MAX_USER_PF_ENTRIES];
};

// WARNING: this is for the SENDING process (e.g. pid) of the signal!
struct event_t {
int si_code;

u32 cr2_userpf_entry_count;

u32 tgid; // the PROCESS id!
u32 pidns_tgid; // the PROCESS id within the innermost pid namespace of the process
char tgleader_comm[16]; // the PROCESS name
Expand All @@ -45,4 +51,5 @@ struct event_t {
struct perf_branch_entry lbr[MAX_LBR_ENTRIES];

u64 tai; // time atomic international
u64 cr2_tai[MAX_USER_PF_ENTRIES];
};