-
Notifications
You must be signed in to change notification settings - Fork 31
Open
Description
Results summary
- achieved_bw_1d/achieved_bw_2d=68GB/s
- achieved_bw_3d=43GB/s
all_to_all_1d
benchmarks:
- benchmark_name: all_to_all
benchmark_sweep_params:
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x8", ici_size_range: 32, sharding_strategy: "1x1x8", op_dimension: 1, num_runs: 5} # Non Parallel Replica
trace_dir: "../microbenchmarks/all_to_all_1d"
csv_path: "../microbenchmarks/all_to_all_1d"
xlml_metrics_dir: "../microbenchmarks/all_to_all_1d"
xla_dump_dir: "../microbenchmarks/all_to_all_1d/hlo_graphs"
| iteration | op_type | replica_group_type | rank | mesh_shape | op_dimension | sharding_strategy | input_num_elements | matrix_shape | transferred_data (GB) | dtype_bytes | hlo_input_shape | hlo_output_shape | hlo_replica_groups | step_time_ms_p50 | step_time_ms_p90 | step_time_ms_p95 | step_time_ms_p99 | step_time_ms_avg | step_time_ms_max | step_time_ms_num_runs | step_time_ms_min | achieved_bw (GB/s)_p50 | achieved_bw (GB/s)_p90 | achieved_bw (GB/s)_p95 | achieved_bw (GB/s)_p99 | achieved_bw (GB/s)_avg | achieved_bw (GB/s)_max | achieved_bw (GB/s)_num_runs | achieved_bw (GB/s)_min |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8 | A2A | non-parallel | 8 | 2x2x8 | 1 | 1x1x8 | 8192 | ((8, 8, 128)) | 2.46E-05 | 4 | f32[8,8,128] | f32[8,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.008611044 | 0.0086439376 | 0.0086521008000000010.00865863136 | 0.0085675868 | 0.008660264 | 5 | 0.008460984 | 2.8540093396340795 | 2.9011751774001655 | 2.9029009366511014 | 2.9042815440518504 | 2.8687298763576714 | 2.9046266959020377 | 5 | 2.8377887787254523 | |
| 16 | A2A | non-parallel | 8 | 2x2x8 | 1 | 1x1x8 | 16384 | ((16, 8, 128)) | 4.92E-05 | 4 | f32[16,8,128] | f32[16,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.008716687 | 0.0088573828 | 0.0088662664 | 0.00887337328 | 0.008748859600000002 | 0.00887515 | 5 | 0.008632653 | 5.638839618768003 | 5.6789415140169 | 5.686336025425934 | 5.692251634553162 | 5.618700910331254 | 5.693730536834969 | 55.538159918423915 | |
| 32 | A2A | non-parallel | 8 | 2x2x8 | 1 | 1x1x8 | 32768 | ((32, 8, 128)) | 9.83E-05 | 4 | f32[32,8,128] | f32[32,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.009296519 | 0.009470828 | 0.009508523 | 0.009538679 | 0.009323889600000001 | 0.009546218 | 5 | 0.009152461 | 10.574280545223434 | 10.687841873483267 | 10.714279794321033 | 10.735430130991245 | 10.545262852131069 | 10.740717715158798 | 5 | 10.297690666607448 |
| 64 | A2A | non-parallel | 8 | 2x2x8 | 1 | 1x1x8 | 65536 | ((64, 8, 128)) | 0.00019660800000000003 | 4 | f32[64,8,128] | f32[64,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.010689076 | 0.0107200478 | 0.0107291714 | 0.01073647028 | 0.0106840336 | 0.010738295 | 5 | 0.010648259 | 18.393357854317813 | 18.461365962403796 | 18.462614698865792 | 18.463613688035387 | 18.40221127502451 | 18.463863435327788 | 5 | 18.309051855997627 |
| 128 | A2A | non-parallel | 8 | 2x2x8 | 1 | 1x1x8 | 131072 | ((128, 8, 128)) | 0.00039321600000000005 | 4 | f32[128,8,128] | f32[128,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.013545018 | 0.0137250902 | 0.0137406966 | 0.013753181719999999 | 0.013599039599999998 | 0.013756303 | 5 | 0.013496999 | 29.03030472163271 | 29.114960671163793 | 29.12427403542584 | 29.13172472683548 | 28.916565977041564 | 29.13358739968789 | 5 | 28.584424172686518 |
| 256 | A2A | non-parallel | 8 | 2x2x8 | 1 | 1x1x8 | 262144 | ((256, 8, 128)) | 0.0007864320000000001 | 4 | f32[256,8,128] | f32[256,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.018891957 | 0.018996638399999998 | 0.01901392520.01902775464 | 0.018865306 | 0.019031212 | 5 | 0.018728691 | 41.62787370307905 | 41.98968464649197 | 41.99022315365213 | 41.99065395938026 | 41.68835756148295 | 41.99076166081229 | 5 | 41.32327462906725 | |
| 512 | A2A | non-parallel | 8 | 2x2x8 | 1 | 1x1x8 | 524288 | ((512, 8, 128)) | 0.0015728640000000002 | 4 | f32[512,8,128] | f32[512,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.030623049 | 0.0306439374 | 0.0306460982 | 0.03064782684 | 0.0306050418 | 0.030648259 | 5 | 0.030540216 | 51.36209656980924 | 51.47713969107542 | 51.48927180520952 | 51.498977496516794 | 51.3924075433177 | 51.50140391934361 | 5 | 51.31984821715322 |
| 1024 | A2A | non-parallel | 8 | 2x2x8 | 1 | 1x1x8 | 1048576 | ((1024, 8, 128)) | 0.0031457280000000004 | 4 | f32[1024,8,128] | f32[1024,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.052677071 | 0.0527111644 | 0.05271812720.05272369744 | 0.05267010800000001 | 0.05272509 | 5 | 0.052584634 | 59.71721548451319 | 59.78183404976464 | 59.80201234599838 | 59.81815498298537 | 59.72515663368107 | 59.822190642232115 | 559.6628284560538 | ||
| 2048 | A2A | non-parallel | 8 | 2x2x8 | 1 | 1x1x8 | 2097152 | ((2048, 8, 128)) | 0.006291456000000001 | 4 | f32[2048,8,128] | f32[2048,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.097835534 | 0.0979282112 | 0.09793469360.09793987952000001 | 0.0978175268 | 0.097941176 | 5 | 0.097668667 | 64.30645127362418 | 64.3992281271826 | 64.40777351354059 | 64.41460982262699 | 64.31836091336609 | 64.41631889989858 | 564.237088596935 | ||
| 4096 | A2A | non-parallel | 8 | 2x2x8 | 1 | 1x1x8 | 4194304 | ((4096, 8, 128)) | 0.012582912000000002 | 4 | f32[4096,8,128] | f32[4096,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.187986795 | 0.1880893156 | 0.18809987979999998 | 0.18810833115999998 | 0.18800864339999998 | 0.188110444 | 5 | 0.187920768 | 66.9350844563311 | 66.95193124379138 | 66.95526685591364 | 66.95793534561145 | 66.92731453563388 | 66.9586024680359 | 5 | 66.89108660016774 |
| 8192 | A2A | non-parallel | 8 | 2x2x8 | 1 | 1x1x8 | 8388608 | ((8192, 8, 128)) | 0.025165824000000003 | 4 | f32[8192,8,128] | f32[8192,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.368803121 | 0.3688235292 | 0.36882953160000004 | 0.36883433352 | 0.3687771908 | 0.368835534 | 5 | 0.368639856 | 68.23647243484147 | 68.25469378450728 | 68.26069355892552 | 68.26549337846012 | 68.24127285875251 | 68.26669333334377 | 568.2304758629899 |
all_to_all_2d
benchmarks:
- benchmark_name: all_to_all
benchmark_sweep_params:
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "4x8", ici_size_range: 32, sharding_strategy: "1x8", op_dimension: 2, num_runs: 5} # Non Parallel Replica
trace_dir: "../microbenchmarks/all_to_all_2d"
csv_path: "../microbenchmarks/all_to_all_2d"
xlml_metrics_dir: "../microbenchmarks/all_to_all_2d"
xla_dump_dir: "../microbenchmarks/all_to_all_2d/hlo_graphs"
| iteration | op_type | replica_group_type | rank | mesh_shape | op_dimension | sharding_strategy | input_num_elements | matrix_shape | transferred_data (GB) | dtype_bytes | hlo_input_shape | hlo_output_shape | hlo_replica_groups | step_time_ms_p50 | step_time_ms_p90 | step_time_ms_p95 | step_time_ms_p99 | step_time_ms_avg | step_time_ms_max | step_time_ms_num_runs | step_time_ms_min | achieved_bw (GB/s)_p50 | achieved_bw (GB/s)_p90 | achieved_bw (GB/s)_p95 | achieved_bw (GB/s)_p99 | achieved_bw (GB/s)_avg | achieved_bw (GB/s)_max | achieved_bw (GB/s)_num_runs | achieved_bw (GB/s)_min |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8 | A2A | non-parallel | 8 | 4x8 | 2 | 1x8 | 8192 | ((8, 8, 128)) | 2.46E-05 | 4 | f32[8,8,128] | f32[8,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.008563025 | 0.008734933799999999 | 0.00879183640.00883735848 | 0.008537815 | 0.008848739 | 5 | 0.008272509 | 2.8700138093722725 | 2.947142047540046 | 2.9589728528886137 | 2.9684374971674674 | 2.8798846636937085 | 2.970803658237181 | 5 | 2.777344884960445 | |
| 16 | A2A | non-parallel | 8 | 4x8 | 2 | 1x8 | 16384 | ((16, 8, 128)) | 4.92E-05 | 4 | f32[16,8,128] | f32[16,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.0087491 | 0.0088256904 | 0.0088348142 | 0.008842113240000001 | 0.00874958 | 0.008843938 | 5 | 0.008639856 | 5.617949274782551 | 5.668926070677588 | 5.678954887980783 | 5.686977941823339 | 5.6179998946065774 | 5.688983705283978 | 55.55770517613308 | |
| 32 | A2A | non-parallel | 8 | 4x8 | 2 | 1x8 | 32768 | ((32, 8, 128)) | 9.83E-05 | 4 | f32[32,8,128] | f32[32,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.009254502 | 0.009404802 | 0.009412485 | 0.0094186314 | 0.009276590599999999 | 0.009420168 | 5 | 0.009130852 | 10.62228956242054 | 10.7357777581096 | 10.750957184181201 | 10.763100725038482 | 10.59847213318989 | 10.766136610252802 | 510.4354826792898 | |
| 64 | A2A | non-parallel | 8 | 4x8 | 2 | 1x8 | 65536 | ((64, 8, 128)) | 0.00019660800000000003 | 4 | f32[64,8,128] | f32[64,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.010660264 | 0.0107322934 | 0.0107442982 | 0.01075390204 | 0.010629532 | 0.010756303 | 5 | 0.010483794 | 18.443070453039443 | 18.705720804781432 | 18.72961847298997 | 18.7487366075568 | 18.497997026276316 | 18.753516141198507 | 5 | 18.27839918604004 |
| 128 | A2A | non-parallel | 8 | 4x8 | 2 | 1x8 | 131072 | ((128, 8, 128)) | 0.00039321600000000005 | 4 | f32[128,8,128] | f32[128,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.013602641 | 0.0136981992 | 0.0137164466 | 0.013731044520000001 | 0.013605522200000001 | 0.013734694 | 5 | 0.013506603 | 28.90732762850979 | 29.0839631046883 | 29.098417393391674 | 29.109980824354377 | 28.9022087992601 | 29.11287168209505 | 528.6293964758152 | |
| 256 | A2A | non-parallel | 8 | 4x8 | 2 | 1x8 | 262144 | ((256, 8, 128)) | 0.0007864320000000001 | 4 | f32[256,8,128] | f32[256,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.018792317 | 0.018972629 | 0.019007923 | 0.0190361582 | 0.018821848800000002 | 0.019043217 | 5 | 0.018619448 | 41.848591634549386 | 42.085991701888126 | 42.16155962361874 | 42.22201396100324 | 41.785145728040824 | 42.23712754534937 | 541.2972240982183 | |
| 512 | A2A | non-parallel | 8 | 4x8 | 2 | 1x8 | 524288 | ((512, 8, 128)) | 0.0015728640000000002 | 4 | f32[512,8,128] | f32[512,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.03054982 | 0.030619688 | 0.0306273709999999970.030633517399999997 | 0.0305560626 | 0.030635054 | 5 | 0.030462185 | 51.485213333499196 | 51.58298787134468 | 51.60815810470684 | 51.628294291396564 | 51.47488373996223 | 51.633328338069 | 5 | 51.34196923563446 | |
| 1024 | A2A | non-parallel | 8 | 4x8 | 2 | 1x8 | 1048576 | ((1024, 8, 128)) | 0.0031457280000000004 | 4 | f32[1024,8,128] | f32[1024,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.052659064 | 0.05272509 | 0.0527322930.0527380554 | 0.052642017 | 0.052739496 | 5 | 0.052541417 | 59.737636050652185 | 59.85991101808027 | 59.86565366503011 | 59.870247782589985 | 59.757107524934476 | 59.87139631197995 | 5 | 59.64653132066337 | |
| 2048 | A2A | non-parallel | 8 | 4x8 | 2 | 1x8 | 2097152 | ((2048, 8, 128)) | 0.006291456000000001 | 4 | f32[2048,8,128] | f32[2048,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.097747899 | 0.0979452578 | 0.09799183640.09802909928 | 0.0977875148 | 0.098038415 | 5 | 0.097648259 | 64.36410464433615 | 64.41679769761421 | 64.4232896437367 | 64.42848320063467 | 64.33815347050663 | 64.42978158985918 | 5 | 64.17337530395612 | |
| 4096 | A2A | non-parallel | 8 | 4x8 | 2 | 1x8 | 4194304 | ((4096, 8, 128)) | 0.012582912000000002 | 4 | f32[4096,8,128] | f32[4096,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.188061224 | 0.188235054 | 0.1882621850.18828388980000002 | 0.1880792316 | 0.188289316 | 5 | 0.187936375 | 66.90859355461816 | 66.95030520692961 | 66.95167358031136 | 66.95276827901677 | 66.90221981343015 | 66.95304195369312 | 566.8275410804509 | ||
| 8192 | A2A | non-parallel | 8 | 4x8 | 2 | 1x8 | 8388608 | ((8192, 8, 128)) | 0.025165824000000003 | 4 | f32[8192,8,128] | f32[8192,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7},{8,9,10,11,12,13,14,15},{16,17,18,19,20,21,22,23},{24,25,26,27,28,29,30,31}} | 0.368863145 | 0.3691212486 | 0.36916206479999997 | 0.36919471775999996 | 0.3688938776 | 0.369202881 | 5 | 0.368669868 | 68.22536851709596 | 68.25633569983937 | 68.25873585175053 | 68.26065597327946 | 68.21970299159307 | 68.26113600366169 | 5 | 68.16258836290068 |
all_to_all_3d
benchmarks:
- benchmark_name: all_to_all
benchmark_sweep_params:
- {matrix_dim_range: {start: 2, end: 8192, multiplier: 2}, dtype: "float32", mesh_shape: "2x2x8", ici_size_range: 32, sharding_strategy: "2x2x8", op_dimension: 3, num_runs: 5} # Non Parallel Replica Groups
trace_dir: "../microbenchmarks/all_to_all_3d"
csv_path: "../microbenchmarks/all_to_all_3d"
xlml_metrics_dir: "../microbenchmarks/all_to_all_3d"
xla_dump_dir: "../microbenchmarks/all_to_all_3d/hlo_graphs"
| iteration | op_type | replica_group_type | rank | mesh_shape | op_dimension | sharding_strategy | input_num_elements | matrix_shape | transferred_data (GB) | dtype_bytes | hlo_input_shape | hlo_output_shape | hlo_replica_groups | step_time_ms_p50 | step_time_ms_p90 | step_time_ms_p95 | step_time_ms_p99 | step_time_ms_avg | step_time_ms_max | step_time_ms_num_runs | step_time_ms_min | achieved_bw (GB/s)_p50 | achieved_bw (GB/s)_p90 | achieved_bw (GB/s)_p95 | achieved_bw (GB/s)_p99 | achieved_bw (GB/s)_avg | achieved_bw (GB/s)_max | achieved_bw (GB/s)_num_runs | achieved_bw (GB/s)_min |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 32 | A2A | non-parallel | 32 | 2x2x8 | 3 | 2x2x8 | 32768 | ((32, 8, 128)) | 0.00012288000000000002 | 4 | f32[32,8,128] | f32[32,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}} | 0.012151261 | 0.0121591836 | 0.012159423800000001 | 0.01215961596 | 0.0121342138 | 0.012159664 | 5 | 0.012072029 | 10.112530707718319 | 10.159559674225536 | 10.169230831639041 | 10.176967757569845 | 10.126812378684606 | 10.178901989052546 | 5 | 10.105542389987093 |
| 64 | A2A | non-parallel | 32 | 2x2x8 | 3 | 2x2x8 | 65536 | ((64, 8, 128)) | 0.00024576000000000003 | 4 | f32[64,8,128] | f32[64,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}} | 0.014726291 | 0.014879231599999999 | 0.0148993998 | 0.01491553436 | 0.0147654262 | 0.014919568 | 5 | 0.014648259 | 16.688519872383345 | 16.747306504967447 | 16.762363473950998 | 16.77440904913784 | 16.64496464732585 | 16.777420442934552 | 5 | 16.472326812679835 |
| 128 | A2A | non-parallel | 32 | 2x2x8 | 3 | 2x2x8 | 131072 | ((128, 8, 128)) | 0.0004915200000000001 | 4 | f32[128,8,128] | f32[128,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}} | 0.020040816 | 0.0200741894 | 0.0200785112 | 0.02008196864 | 0.019980072 | 0.020082833 | 5 | 0.019726291 | 24.525947446451287 | 24.785913469866404 | 24.851456916239464 | 24.90389167333791 | 24.60157262312181 | 24.91700036261252 | 5 | 24.474634629486786 |
| 256 | A2A | non-parallel | 32 | 2x2x8 | 3 | 2x2x8 | 262144 | ((256, 8, 128)) | 0.0009830400000000001 | 4 | f32[256,8,128] | f32[256,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}} | 0.031109244 | 0.0311798316 | 0.0311829528 | 0.03118544976 | 0.031123169200000002 | 0.031186074 | 5 | 0.031056423 | 31.599610713780127 | 31.638202014918498 | 31.645778793242886 | 31.6518402159024 | 31.58554873921394 | 31.65335557156728 | 5 | 31.52176192488994 |
| 512 | A2A | non-parallel | 32 | 2x2x8 | 3 | 2x2x8 | 524288 | ((512, 8, 128)) | 0.0019660800000000003 | 4 | f32[512,8,128] | f32[512,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}} | 0.05367587 | 0.053797838800000004 | 0.0537992794000000040.053800431880000006 | 0.0536489794 | 0.05380072 | 5 | 0.053444178 | 36.628749566611596 | 36.76378324795936 | 36.775663158832366 | 36.78516708753077 | 36.647365421214516 | 36.78754306970537 | 536.5437488568926 | ||
| 1024 | A2A | non-parallel | 32 | 2x2x8 | 3 | 2x2x8 | 1048576 | ((1024, 8, 128)) | 0.0039321600000000005 | 4 | f32[1024,8,128] | f32[1024,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}} | 0.098608643 | 0.0986607444 | 0.098667707200000010.09867327744000001 | 0.0985812724 | 0.09867467 | 5 | 0.09845018 | 39.87642340844302 | 39.92717689659808 | 39.933892210059554 | 39.93926446082874 | 39.88752152176063 | 39.94060752352104 | 539.849740566652 | ||
| 2048 | A2A | non-parallel | 32 | 2x2x8 | 3 | 2x2x8 | 2097152 | ((2048, 8, 128)) | 0.007864320000000001 | 4 | f32[2048,8,128] | f32[2048,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}} | 0.188404562 | 0.1885515006 | 0.1885692678 | 0.18858348156 | 0.18844681859999998 | 0.188587035 | 5 | 0.188345738 | 41.741664408317256 | 41.750018454446625 | 41.7523597940835 | 41.754232865793 | 41.732313023456484 | 41.754701133720374 | 5 | 41.70127601825863 |
| 4096 | A2A | non-parallel | 32 | 2x2x8 | 3 | 2x2x8 | 4194304 | ((4096, 8, 128)) | 0.015728640000000002 | 4 | f32[4096,8,128] | f32[4096,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}} | 0.368291717 | 0.3685068432 | 0.3685553426 | 0.36859414212 | 0.368291957 | 0.368603842 | 5 | 0.368027611 | 42.707015319597865 | 42.73080687995496 | 42.73423492148282 | 42.73697735470511 | 42.70699917383609 | 42.737662963010685 | 5 | 42.670852030891204 |
| 8192 | A2A | non-parallel | 32 | 2x2x8 | 3 | 2x2x8 | 8388608 | ((8192, 8, 128)) | 0.031457280000000004 | 4 | f32[8192,8,128] | f32[8192,8,128]{2,1,0:T(8,128) | {{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}} | 0.727989196 | 0.7280333732 | 0.7280472986 | 0.72805843892 | 0.7279903962000001 | 0.728061224 | 5 | 0.727941176 | 43.21119073311083 | 43.213385593207654 | 43.213713416632615 | 43.21397567537258 | 43.21111962202942 | 43.21404124005757 | 543.2069157964111 |
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels