Skip to content

Commit eafc113

Browse files
committed
fix: exclude paused sections via callgrind collection toggling
Replace the per-pause CALLGRIND_STOP/START_INSTRUMENTATION calls with CALLGRIND_TOGGLE_COLLECT. Toggling instrumentation flushes callgrind's simulated cache, so every benchmark measured an artificial cold-cache warmup phase and the cache-warming repetition in RunAnalysis was wasted, regressing 76 benchmarks. Toggling collection excludes paused sections without touching simulator state. Gate the toggles to explicit user PauseTiming()/ResumeTiming() calls: TOGGLE_COLLECT is parity-based and collection starts enabled, so the implicit StartKeepRunning()/FinishKeepRunning() bracket must not toggle or it would invert the collection state for the whole run. Bump instrument-hooks for the new callgrind_toggle_collect helper. Fixes COD-2033
1 parent 696eaa1 commit eafc113

4 files changed

Lines changed: 35 additions & 4 deletions

File tree

core/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ include(FetchContent)
1515
FetchContent_Declare(
1616
instrument_hooks_repo
1717
GIT_REPOSITORY https://github.com/CodSpeedHQ/instrument-hooks
18-
GIT_TAG 89fb72a076ec71c9eca6eee9bca98bada4b4dfb4
18+
GIT_TAG b6c4a75ecd81462bfc4d975e13060217799cd6ef
1919
)
2020
FetchContent_MakeAvailable(instrument_hooks_repo)
2121
FetchContent_GetProperties(instrument_hooks_repo)

google_benchmark/include/benchmark/benchmark.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,15 @@ class BENCHMARK_EXPORT BENCHMARK_INTERNAL_CACHELINE_ALIGNED State {
949949
#if defined(CODSPEED_ANALYSIS) || defined(CODSPEED_WALLTIME)
950950
codspeed::CodSpeed* codspeed_;
951951
#endif
952+
#ifdef CODSPEED_ANALYSIS
953+
// True between the implicit ResumeTiming() in StartKeepRunning() and the
954+
// implicit PauseTiming() in FinishKeepRunning(). Only explicit user
955+
// PauseTiming()/ResumeTiming() calls (inside the benchmark loop) may
956+
// toggle callgrind collection: CALLGRIND_TOGGLE_COLLECT is parity-based,
957+
// and collection starts enabled, so the implicit bracket calls must not
958+
// toggle or they would invert the collection state for the whole run.
959+
bool codspeed_in_benchmark_loop_;
960+
#endif
952961
#ifdef CODSPEED_WALLTIME
953962
uint64_t resume_timestamp_;
954963
#endif

google_benchmark/src/benchmark.cc

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ State::State(std::string name, IterationCount max_iters,
199199
#if defined(CODSPEED_ANALYSIS) || defined(CODSPEED_WALLTIME)
200200
codspeed_(codspeed),
201201
#endif
202+
#ifdef CODSPEED_ANALYSIS
203+
codspeed_in_benchmark_loop_(false),
204+
#endif
202205
#ifdef CODSPEED_WALLTIME
203206
resume_timestamp_(0),
204207
#endif
@@ -273,7 +276,13 @@ void State::PauseTiming() {
273276
uint64_t pause_timestamp = measurement_current_timestamp();
274277
#endif
275278
#ifdef CODSPEED_ANALYSIS
276-
callgrind_stop_instrumentation();
279+
// Suspend callgrind cost collection for the paused section. Toggling
280+
// collection (unlike stopping instrumentation) does not flush the
281+
// simulated cache, so it adds no artificial cold-cache cost to the
282+
// measured region.
283+
if (codspeed_in_benchmark_loop_) {
284+
callgrind_toggle_collect();
285+
}
277286
#endif
278287

279288
// Add in time accumulated so far
@@ -314,7 +323,10 @@ void State::ResumeTiming() {
314323
resume_timestamp_ = measurement_current_timestamp();
315324
#endif
316325
#ifdef CODSPEED_ANALYSIS
317-
callgrind_start_instrumentation();
326+
// Re-enable callgrind cost collection after a paused section.
327+
if (codspeed_in_benchmark_loop_) {
328+
callgrind_toggle_collect();
329+
}
318330
#endif
319331
}
320332

@@ -367,12 +379,22 @@ void State::StartKeepRunning() {
367379
manager_->StartStopBarrier();
368380
if (!skipped()) {
369381
ResumeTiming();
382+
#ifdef CODSPEED_ANALYSIS
383+
// Arm collection toggling only after the implicit ResumeTiming() above,
384+
// so that only explicit user pauses inside the loop toggle collection.
385+
codspeed_in_benchmark_loop_ = true;
386+
#endif
370387
}
371388
}
372389

373390
void State::FinishKeepRunning() {
374391
BM_CHECK(started_ && (!finished_ || skipped()));
375392
if (!skipped()) {
393+
#ifdef CODSPEED_ANALYSIS
394+
// Disarm before the implicit PauseTiming() below so it does not toggle
395+
// collection off for the rest of the process.
396+
codspeed_in_benchmark_loop_ = false;
397+
#endif
376398
PauseTiming();
377399
}
378400
// Total iterations has now wrapped around past 0. Fix this.

0 commit comments

Comments
 (0)