diff --git a/ddprof-lib/src/main/cpp/ctimer_linux.cpp b/ddprof-lib/src/main/cpp/ctimer_linux.cpp index 0a7758629..c951a4388 100644 --- a/ddprof-lib/src/main/cpp/ctimer_linux.cpp +++ b/ddprof-lib/src/main/cpp/ctimer_linux.cpp @@ -20,6 +20,7 @@ #include "guards.h" #include "ctimer.h" #include "debugSupport.h" +#include "jvmThread.h" #include "libraries.h" #include "profiler.h" #include "threadState.inline.h" @@ -156,6 +157,16 @@ void CTimer::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) { int tid = 0; ProfiledThread *current = ProfiledThread::currentSignalSafe(); assert(current == nullptr || !current->isDeepCrashHandler()); + // Guard against the race window between Profiler::registerThread() and + // thread_native_entry setting JVM TLS (PROF-13072): skip at most one signal + // per thread. Pure native threads (where JVMThread::current() is always null) + // are allowed through once the one-shot window expires. + if (current != nullptr && JVMThread::isInitialized() && JVMThread::current() == nullptr + && current->inInitWindow()) { + current->tickInitWindow(); + errno = saved_errno; + return; + } if (current != NULL) { current->noteCPUSample(Profiler::instance()->recordingEpoch()); tid = current->tid(); diff --git a/ddprof-lib/src/main/cpp/libraryPatcher_linux.cpp b/ddprof-lib/src/main/cpp/libraryPatcher_linux.cpp index 1b00a5a2d..731a9b21f 100644 --- a/ddprof-lib/src/main/cpp/libraryPatcher_linux.cpp +++ b/ddprof-lib/src/main/cpp/libraryPatcher_linux.cpp @@ -72,6 +72,17 @@ static void init_thread_tls() { ProfiledThread::initCurrentThread(); } +// Arm the CPU timer with profiling signals blocked and open the init window +// (PROF-13072). Kept noinline for the same stack-protector reason as +// delete_routine_info: SignalBlocker's sigset_t must not appear in +// start_routine_wrapper_spec's own stack frame on musl/aarch64. +__attribute__((noinline)) +static void start_window_and_register(int tid) { + SignalBlocker blocker; + ProfiledThread::currentSignalSafe()->startInitWindow(); + Profiler::registerThread(tid); +} + // Wrapper around the real start routine. // The wrapper: // 1. Register the newly created thread to profiler @@ -87,7 +98,7 @@ static void* start_routine_wrapper_spec(void* args) { delete_routine_info(thr); init_thread_tls(); int tid = ProfiledThread::currentTid(); - Profiler::registerThread(tid); + start_window_and_register(tid); void* result = routine(params); Profiler::unregisterThread(tid); ProfiledThread::release(); @@ -126,6 +137,7 @@ static void* start_routine_wrapper(void* args) { RoutineInfo* thr = (RoutineInfo*)args; func_start_routine routine; void* params; + int tid; { // Block profiling signals while accessing and freeing RoutineInfo // and during TLS initialization. Under ASAN, new/delete/ @@ -133,14 +145,20 @@ static void* start_routine_wrapper(void* args) { // allocator lock. A profiling signal during any of these calls // runs ASAN-instrumented code that tries to acquire the same // lock, causing deadlock. + // registerThread is also kept inside the blocker so that the CPU + // timer is armed while SIGPROF/SIGVTALRM are masked. Any pending + // signal fires only after signals are re-enabled (when the blocker + // scope exits), at which point JVMThread::current() is still null + // and the guard in CTimer::signalHandler discards the sample safely. SignalBlocker blocker; routine = thr->routine(); params = thr->args(); delete thr; ProfiledThread::initCurrentThread(); + tid = ProfiledThread::currentTid(); + ProfiledThread::currentSignalSafe()->startInitWindow(); + Profiler::registerThread(tid); } - int tid = ProfiledThread::currentTid(); - Profiler::registerThread(tid); void* result = nullptr; // Handle pthread_exit() bypass - the thread calls pthread_exit() // instead of normal termination diff --git a/ddprof-lib/src/main/cpp/thread.h b/ddprof-lib/src/main/cpp/thread.h index d0866e6c3..f037ad0e4 100644 --- a/ddprof-lib/src/main/cpp/thread.h +++ b/ddprof-lib/src/main/cpp/thread.h @@ -65,6 +65,7 @@ class ProfiledThread : public ThreadLocalData { u32 _recording_epoch; u32 _misc_flags; int _filter_slot_id; // Slot ID for thread filtering + uint8_t _init_window; // Countdown for JVM thread init race window (PROF-13072) UnwindFailures _unwind_failures; bool _otel_ctx_initialized; bool _crash_protection_active; @@ -77,7 +78,8 @@ class ProfiledThread : public ThreadLocalData { ProfiledThread(int buffer_pos, int tid) : ThreadLocalData(), _pc(0), _sp(0), _span_id(0), _crash_depth(0), _buffer_pos(buffer_pos), _tid(tid), _cpu_epoch(0), - _wall_epoch(0), _call_trace_id(0), _recording_epoch(0), _misc_flags(0), _filter_slot_id(-1), _otel_ctx_initialized(false), _crash_protection_active(false), + _wall_epoch(0), _call_trace_id(0), _recording_epoch(0), _misc_flags(0), _filter_slot_id(-1), _init_window(0), + _otel_ctx_initialized(false), _crash_protection_active(false), _otel_ctx_record{}, _otel_tag_encodings{}, _otel_local_root_span_id(0) {}; virtual ~ProfiledThread() { } @@ -176,7 +178,16 @@ class ProfiledThread : public ThreadLocalData { int filterSlotId() { return _filter_slot_id; } void setFilterSlotId(int slotId) { _filter_slot_id = slotId; } - + + // JVM thread init race window (PROF-13072): skip at most one signal that fires + // between Profiler::registerThread() and the JVM's pd_set_thread() call. + // Pure native threads (e.g. NativeThreadCreator) also see nullptr from + // JVMThread::current(), so the window auto-expires after one skip, allowing + // their subsequent samples through. + inline bool inInitWindow() const { return _init_window > 0; } + inline void startInitWindow() { _init_window = 1; } + inline void tickInitWindow() { if (_init_window > 0) --_init_window; } + // Signal handler reentrancy protection bool tryEnterCriticalSection() { // Uses GCC atomic builtin (no malloc, async-signal-safe) diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index 5b8d0fa91..f5a16e67f 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -11,6 +11,7 @@ #include "context.h" #include "context_api.h" #include "debugSupport.h" +#include "jvmThread.h" #include "libraries.h" #include "log.h" #include "profiler.h" @@ -65,6 +66,15 @@ void WallClockASGCT::signalHandler(int signo, siginfo_t *siginfo, void *ucontext return; // Another critical section is active, defer profiling } ProfiledThread *current = ProfiledThread::currentSignalSafe(); + // Guard against the race window between Profiler::registerThread() and + // thread_native_entry setting JVM TLS (PROF-13072): skip at most one signal + // per thread. Pure native threads (where JVMThread::current() is always null) + // are allowed through once the one-shot window expires. + if (current != nullptr && JVMThread::isInitialized() && JVMThread::current() == nullptr + && current->inInitWindow()) { + current->tickInitWindow(); + return; + } int tid = current != NULL ? current->tid() : OS::threadId(); Shims::instance().setSighandlerTid(tid); u64 call_trace_id = 0;