From 0207720c7a6564602a8d5458b5f37ad18142252a Mon Sep 17 00:00:00 2001 From: Matt Frank Date: Wed, 27 May 2026 11:38:31 -0500 Subject: [PATCH 1/4] Add --jackknife flag to rcp_viewer --jackknife GBS restricts output to the single real (non-interpolated) RCP at the given global batch size, validating it against the full measured set (so pruned-out batch sizes are still accepted), and also prints the benchmark's submission_runs count. Co-Authored-By: Claude Opus 4.7 --- .../visualization_scripts/rcp_viewer.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py index 28e999d..4447295 100755 --- a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py +++ b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py @@ -68,7 +68,10 @@ def main(): help='specify an RCP json file to use') parser.add_argument('--interpolate', action='store_true', help='generate interpolated rcp min/mean for all batch sizes') - + parser.add_argument('--jackknife', type=int, metavar='GBS', + help='restrict output to the single real (non-interpolated) RCP at this ' + 'global batch size, and also print the benchmark submission_runs') + args = parser.parse_args() rcp_pass_arg='pruned_rcps' @@ -80,7 +83,14 @@ def main(): if not args.no_header: print("BS,Mean,Min") - if not args.interpolate: + if args.jackknife is not None: + record = checker._find_rcp(args.jackknife, 'full_rcps') + if record is None: + sys.exit(f"Error: GBS {args.jackknife} is not a measured " + f"(non-interpolated) RCP batch size for {args.benchmark}") + print_rcp_record(record) + print(f"submission_runs: {checker.submission_runs}") + elif not args.interpolate: data=checker._get_rcp_data(rcp_pass_arg) for key, record in data.items(): print_rcp_record(record) From 8157b5d08bb9de2e8180fd05cc9dac01143e194e Mon Sep 17 00:00:00 2001 From: Matt Frank Date: Wed, 27 May 2026 11:56:10 -0500 Subject: [PATCH 2/4] Add jackknife bootstrap histogram to rcp_viewer When --jackknife is given, resample the reference convergence runs 1000 times (drawing submission_runs values with replacement), take a trimmed mean (trim ceil(10%) from each end), and print an ASCII histogram of the resulting score distribution. Add --seed for reproducible output. Co-Authored-By: Claude Opus 4.7 --- .../visualization_scripts/rcp_viewer.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py index 4447295..8ecc0f5 100755 --- a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py +++ b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py @@ -7,6 +7,8 @@ import sys import os import argparse +import math +import numpy as np #Add the project root directory (assumed to be 3 levels up) to sys.path sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))) @@ -16,6 +18,31 @@ def print_rcp_record(record): print(f"{record['BS']},{record['RCP Mean']},{record['Min Epochs']}") +def jackknife_scores(samples, num_runs, iterations=1000, rng=None): + '''Bootstrap submission-sized trimmed-mean scores from the reference runs. + + Draw num_runs values with replacement from samples, trim k=ceil(10%) from + each end, and take the mean. Repeat iterations times, returning the scores. + ''' + rng = rng if rng is not None else np.random.default_rng() + arr = np.asarray(samples, dtype=float) + k = math.ceil(0.10 * num_runs) + if num_runs - 2 * k <= 0: + sys.exit(f"Error: trimming {k} from each end of {num_runs} runs leaves no samples") + scores = np.empty(iterations) + for i in range(iterations): + draw = np.sort(rng.choice(arr, size=num_runs, replace=True)) + scores[i] = draw[k:num_runs - k].mean() + return scores + +def print_histogram(scores, bar_width=50): + '''Print an ASCII text-bar histogram of scores using numpy auto-binning.''' + counts, edges = np.histogram(scores, bins='auto') + max_count = counts.max() if len(counts) else 0 + for i, c in enumerate(counts): + bar = '#' * (round(bar_width * c / max_count) if max_count else 0) + print(f"{edges[i]:.1f}-{edges[i+1]:.1f} | {bar} ({c})") + # this should be a method of rcp_checker.RCP_Checker, but it's missing. # Instead we derived it from _find_min_rcp() def find_max_rcp(checker, rcp_pass_arg='pruned_rcps'): @@ -71,6 +98,8 @@ def main(): parser.add_argument('--jackknife', type=int, metavar='GBS', help='restrict output to the single real (non-interpolated) RCP at this ' 'global batch size, and also print the benchmark submission_runs') + parser.add_argument('--seed', type=int, default=None, + help='seed the RNG for reproducible --jackknife output') args = parser.parse_args() @@ -90,6 +119,10 @@ def main(): f"(non-interpolated) RCP batch size for {args.benchmark}") print_rcp_record(record) print(f"submission_runs: {checker.submission_runs}") + scores = jackknife_scores(record['Epochs to converge'], + checker.submission_runs, + rng=np.random.default_rng(args.seed)) + print_histogram(scores) elif not args.interpolate: data=checker._get_rcp_data(rcp_pass_arg) for key, record in data.items(): From 05c97fb29c3f7b2618eb25b99c0374877065f4fb Mon Sep 17 00:00:00 2001 From: Matt Frank Date: Wed, 27 May 2026 12:00:12 -0500 Subject: [PATCH 3/4] Rename --jackknife to --bootstrap in rcp_viewer The resampling draws with replacement, which is a bootstrap, not a jackknife, so name it accurately. Rewrite the flag help to lead with its real purpose (producing the score histogram) rather than the output restriction, and increase the resample count from 1000 to 10000 for a smoother distribution. Co-Authored-By: Claude Opus 4.7 --- .../visualization_scripts/rcp_viewer.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py index 8ecc0f5..af17983 100755 --- a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py +++ b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py @@ -18,7 +18,7 @@ def print_rcp_record(record): print(f"{record['BS']},{record['RCP Mean']},{record['Min Epochs']}") -def jackknife_scores(samples, num_runs, iterations=1000, rng=None): +def bootstrap_scores(samples, num_runs, iterations=10000, rng=None): '''Bootstrap submission-sized trimmed-mean scores from the reference runs. Draw num_runs values with replacement from samples, trim k=ceil(10%) from @@ -95,11 +95,11 @@ def main(): help='specify an RCP json file to use') parser.add_argument('--interpolate', action='store_true', help='generate interpolated rcp min/mean for all batch sizes') - parser.add_argument('--jackknife', type=int, metavar='GBS', - help='restrict output to the single real (non-interpolated) RCP at this ' - 'global batch size, and also print the benchmark submission_runs') + parser.add_argument('--bootstrap', type=int, metavar='GBS', + help='print a histogram of bootstrapped, submission-sized trimmed-mean ' + 'scores for the real (non-interpolated) RCP at the given global batch size (GBS)') parser.add_argument('--seed', type=int, default=None, - help='seed the RNG for reproducible --jackknife output') + help='seed the RNG for reproducible --bootstrap output') args = parser.parse_args() @@ -112,14 +112,14 @@ def main(): if not args.no_header: print("BS,Mean,Min") - if args.jackknife is not None: - record = checker._find_rcp(args.jackknife, 'full_rcps') + if args.bootstrap is not None: + record = checker._find_rcp(args.bootstrap, 'full_rcps') if record is None: - sys.exit(f"Error: GBS {args.jackknife} is not a measured " + sys.exit(f"Error: GBS {args.bootstrap} is not a measured " f"(non-interpolated) RCP batch size for {args.benchmark}") print_rcp_record(record) print(f"submission_runs: {checker.submission_runs}") - scores = jackknife_scores(record['Epochs to converge'], + scores = bootstrap_scores(record['Epochs to converge'], checker.submission_runs, rng=np.random.default_rng(args.seed)) print_histogram(scores) From f1fbb5c0987eff25da455afb936be8d439e5d142 Mon Sep 17 00:00:00 2001 From: Matt Frank Date: Wed, 27 May 2026 12:14:55 -0500 Subject: [PATCH 4/4] Print max speedup and tail probability in --bootstrap Add two summary lines to --bootstrap output: max_speedup (RCP mean / RCP min, the largest score ratio achievable from lucky-fast convergence) and P(score < min), the measured fraction of bootstrap scores falling below the RCP min. Co-Authored-By: Claude Opus 4.7 --- .../rcp_checker/visualization_scripts/rcp_viewer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py index af17983..223bbb2 100755 --- a/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py +++ b/mlperf_logging/rcp_checker/visualization_scripts/rcp_viewer.py @@ -119,9 +119,13 @@ def main(): f"(non-interpolated) RCP batch size for {args.benchmark}") print_rcp_record(record) print(f"submission_runs: {checker.submission_runs}") + max_speedup = record['RCP Mean'] / record['Min Epochs'] + print(f"max_speedup (mean/min): {max_speedup}") scores = bootstrap_scores(record['Epochs to converge'], checker.submission_runs, rng=np.random.default_rng(args.seed)) + prob_below_min = np.mean(scores < record['Min Epochs']) + print(f"P(score < min): {prob_below_min}") print_histogram(scores) elif not args.interpolate: data=checker._get_rcp_data(rcp_pass_arg)