From 65031d6b7ea6aee39b1792e157a227253a8cd26c Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Sat, 13 Jun 2026 20:42:10 -0400 Subject: [PATCH] src: make compile cache zstd dictionary reproducible Add tools/train_compile_cache_dict.mjs, a maintainer tool that regenerates src/compile_cache_zstd.dict end to end, and replace the embedded dictionary with its byte-for-byte reproducible output. The script walks a fixed in-tree corpus, harvests a V8 code cache from each module via vm.compileFunction with produceCachedData (the same shape the CommonJS loader produces at runtime), feeds the blobs to `zstd --train`, and writes the 16 KiB dictionary in place. The output is byte-for-byte stable when node is run with --predictable (the script re-execs itself with it, since V8 otherwise randomizes the string hash seed and that seed leaks into cachedData), the corpus and its sorted order are fixed, and the node build is fixed. This documents and makes reproducible exactly how the embedded dictionary was created, so a future maintainer can regenerate it (e.g. after a V8 or corpus change) and review the resulting diff. Refs: https://github.com/nodejs/node/pull/63861 --- src/compile_cache_zstd.dict | Bin 16384 -> 16384 bytes tools/train_compile_cache_dict.mjs | 168 +++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 tools/train_compile_cache_dict.mjs diff --git a/src/compile_cache_zstd.dict b/src/compile_cache_zstd.dict index b64455d45b1d82ca5b0c2a4dc0c41a6c1220fc68..ef9c5cb52a23bf7db76f029909a3e834c5eee6d3 100644 GIT binary patch literal 16384 zcmc(G3w%`7o&UX)lbMi&FnJIXh!~y&0!es@1X{ymAW$SR;iaIt^SFV5$;>b_LE=g` zAl3)i@b?OLst+R8!zw~PPp_ndntlYr9y?)I}M zpF8*5bAG?y`JLbS{eI{5%Y;2uCo}I|)uX}fp5+_wTw9d)v%8-yy=DH`#Qt#~KYG{4 zALmW_Myc>G)NX&{YdcSF`(ajW!iq7kzIpK6k#{G4^3w3178mVVQ}D=zCwBM$g@5<& zjJMyN_@_1Re)Y3|8D(bw*s0~_W_t(T(Er14mTE#I9lWf zvUDwfjAKmIcumvRKl1gBGal*f-}Aw+@46cftT@)OGMB$G1t{Qn-q4e zx#ND4YicqR*F}S^VUsvuyTnzh`YKvT-DDdS9T2^me z?DF;y+r(3yTv9*uAh~qOVY>{MJ<_=(%V}If6+5}?uo$UCS2DF1ZqAUr+sv@i;(gp^ zUxt)bTFWWksoh8%h)iu@%pziq4=X`B+Lbig({?Tv2gQ5hEpbwu77w^a=ZP1g)}5Um zPMTlXY4$TUG#sbY(0oy+$FOmvGqYisyB%G+Om3|%56E!!(3=VP5`T~esaNW^#>!Qu zN;)&0T`so@vxbs1wKDGyla^B}TYRevrA#Kg!b>quo)qf7VG(%cBf>1>ivFiHp*Mfmc*P;A}O&A z{<5R&o@39scL?G0N8`yE>?3O}w=)YFy{JC$(RJUg1x;;PG8tIqbFP<eCQW>_)4_;<7Oh-8V{I_CPu|T znKllO>efBk9~Ae&S92)h3qwI6#RfBm0aBt<;4=ZRPEy@*XjCJ#6%vm|Op6zMDlmFw zHoc=`Qw~)ml$hp=1k7;Q3=&2GU4e|EwfE@{zR!t+{hwF&GAiPexu4AHjl3Gd>1MMTkDEa>&=N|dhLRSg7D5QWp+FlI&lifQQIx5`TGu~h(V$leDBY`q3 zpm$#SeNWnHDA54@^7-xQSOQfO2;*}6;H=X$>=lhksoJt~1S`SQ`vv{)p;LkOOZ>(p zaxK%sQ7JjAxd8xZS6!$v9-$7VDMDR*nlG@-RAynIPP-ltyKt)c>HV~t7D(+PtN99n zz(B{w1Miaoj`$_ytoqC0qIz^}{B5{D@_~3Xkx+&O$w-T_aF>~Z6&BazlWbgA{pk`V zq^aSkKQ-KnGL?-&>exGZ6^#PM6-q2Kg9h^21UwjDaGAI8zU$~x;>xC{wL-%pNvJWw zkPQA1pj}cczW?KEIh7C`eo<8u1l8cE|J^@ZK#p4D;aQ0}(SR?^I*ZC6WC%3yj$I?D z09KPzeZhJtmIXwQMWCUf^Pf<}p@h_B%X%%KiKxKT2KF)+WM)^6-ba7b z)TDHCDprW_#u9hn&^sO|kaY<&sn&cjF<)yKyf9IE?=Sx0pbX?AbX!*0zHJk3Vp%er zNR{;;ch#@RgSiUQOEisExL>`E{6QE^qw&UMIYF>uXkbYnfdZJZq`DM&?e)*Dm|W!7$VI&CW!-C>gGKIz7*chjN)+jHT(f#*Y5KUj71^SU;a(=>$ zCn|$svjrnsIc-5><CO>E!TGZ5pS-i7K$wUIxEHwD0KFSTyVAqSXfUM|Q zt{%OFF(4z?^@J|F!y$GEr&hS@r}_X_OFpX})g^+CDxt|PGHa&|$Q;09uz(|k_2!rK z;It}!e{rG?caAI;Z6_oTq)r(yiOFb?1rVZJ8NnDjdZd!gvOi0#{;Wok>d$QhGRn{& zDWzd3b*#^DY7^=U7ZolkD67z}+@$xfA2_amZQtqrmK3+^6ZYr@Wz|~0r0Q>#-Cv4a zzlMEzZ{-x^RiaumfghO+>%ARJx(+*CzjXRovdt=rDHTzCvzHba$Pf z;MAUqpqU6%EVWz!#cZeO+^u zoy!rPjGRo5Bggrev((YoIY`%D8{6S<94*Z_QtCLo>}a!WW`kg(Wb5>F6k$<#b#+iQ3 z)BVo>hk3R`6y_Z;*f=$Dn_lEP5#ag)UD2FHHCm6NdM$H~GrQd?!%N%l7hoHdWv4i< z<82KS_em8SX{lHdv82g7H6X(c74s9H5_N3s>4JjwsmdOlm(!c-h2d>pV?S!2EdjeF+k;%dGu>JF zawWL@LuLDD@XSM$U9#N%F_Qzd{4^QKuJe z4^aM0hsU2APwJ^;WepZc;)_6>-KRm4|ABZvXc4bbUEkz2z6ZM1Sg#wXRd`D*rfj^y zsCX#7-?>106e!n2Pdm|!Ef6<5W8)10-nYBsEd|c^2BhN!x9sd9=Al5_)dfus&e2TqcTk$_8m+?4Sql-CZy@dyNsmnKCFHFT7QuQRqaYkAB!ft1<8)kGJ{t(YYSw%%nGVlmBB`+^$0Y3ShR8W>7(7TCqs)(XO;^ZwdbyY^|MRMU0 z`BoXljKo*k`IdDSLjNR&Sdi8_fOD}JZIxLaQlEIDjiyVYV|9n1FBE8y;?f3`qZsQN{3aS z`n#h!rzU3Jc1%tVl92Q$MS9qEqV09iq^txdGd^zXT~vdz8dWCin71Cl$T~N^OH`Q? zFk5wJNqg$a$TnuzsIqoP_H&w}xV=3y7f-RV)-m+QnzQ7@+8eaos;ZoJ-BE0ec@BB? za4>j~%|D)NW>YmdD4%u^dSQ8}%|B~X<$MVF&YP0?aNMiHjy0D)+0T-6Txy(3R69@MX0FPPlt zEu}|l)mLV(`l!zONUr)`T&X{Y51ClXq^noz7c9wL)MKL-ucaO&uuo^D!5?mH($;?T z70n+GYp1B16P4Io@Q@=xNcfYB*KyR*OKu6$ruGVRHII=J#ySB13Xg$D&LZBXNF`fK zM0vYbWAK9xPfn4vJEk1#_9tdR%WbAff9Fh7+izeT&bvL1LjC9paaQ}+vF98Pk7EN& zbYi9uX=`VwPwsYH;V5QAQMqhpN6Jz;Ez=sJR&&x)X|0|1`d)6Qy)LsHG%z%(`dya2 zzQ%94q=vl?hP^H&!~{GXl)7y?>~&IcNxxT_8DYI{ckF2>))L>>^U4MWi($}(8}s_l z={v0Ngg(oxLA=ejXP$)WlhrOxufc~*wmoOq1=brZOPjm-&J`1A&oi*$W)OOkYisi`G(gvDr$HJATX!5Qbi8(Fh8TV%M^v8?nf)@( zn&;06m)eB@*dPKY8-19y^=)SflhQMl=kGMDUe?^^|mTu@0wuT>ji&-hrOt z%qP@B?~lXbI6gJwYBV<=+!_2*vQtZYwoZyP!h0-=0g|`-hO8!C!v-)g+_Ax-I}R8u zB$XX0M}=_#fBeN|qo3$K@I}gWqkfBWi73@;O0a~p_BiCZ;z#;8Zj^1F?Q=-uA@KWt z7oNGRpcrhXIbQ~B(oznaIuARzDUgR9+YJs6Cwh6=mUP?9w!bKwM)O;4OkDg}Y4@ZIMiq81Lb^=rp@vZWWV_mx1HK zAf)iT+q^qBagaW-@@SXD=6ej={#zVO7L7r**)|4VK z423#8yiV3w-VUbG<6v! zX=s|&z(9Pm0eXdVT^s0hD1%WcS{(N2D8*PYIS_4$wI6g+zU0!tsn>xd~;f z^fMFFMWSqVdby@u7?8&QPI13Og>dCSi~kwc$^~Xs7tHNmW9mRn`!gN`m8##?2`Y_- z){5I->-<#Hc5IWZu!;wtupDi4cY z*BvhcoL$4y@kS}U&VV(Zl#K6=_c(BV1EVxhX?399-*?^eW3-9588&?h6E~nMSZ#Me zdL1yrbJVN56Ra4aX^)RgCy3dlx?Xf-0{RwuPn%F@k9f{8p*x{xK;#Ed!;3r+#F6ee zx%R&%9r~~*oI6sv0P~JEB_8*+%}U4@QF;6O<2=nA^vyaiG zb%SY5ER!Ib%giR&OH+=lr{>Gs=4rlIvNaAb=?koY_Y6StggJi2%*8af8Lbf+*^_FK zx5_9d%#0ClchJAhZEf+J@mYyRgzC&fT+dQ#?jy{q2Y*$`w(9)L5h8@O9%3&*D~6-k z5KlY^vmp1$({u`#fRR=~2mk)Ark~U8D43aMvhJ3MMW<~#uB_srSd!2D3Sg&Lb2itoj4mLvQ%taG|VQ>Rla65%#V&14Nf9^5ov%+18=fmHEA+?Cfx6G;`rAlrIu&E`K+*4~DOrWsS_G~Up( zV9K2N-lhicZ11#rv+AtLRr0CIZO;Z|XHqE=C-iu-hbL8hw|5~=$ z4MtKW40!IVFB`)JAL-Q1pVw43qp4xa+?igE{a1s7By+*pvT?VY0Et1mA+lhVXPVhA>4H;fG?WHyyjbS&$AReH)FP07$gwQ!{V> zGWE}@vbSBn)`_=}*Sro10Fk(u@S6GLD8ISk2617jB&#dxtaJJacu@O90#D zq0V{u;44yO7Q2-QijkO$sG!IG((ofXGMkA?#^IUq%H@eBggDB@tJF}jgyQJ`_m9q@ zimpH~SA-%0{Wnj&c#!m0!0(xetYa7<>9dPhZ5i`C-Aal&$EK1qh4O2{5JA%T^v$J1 zsbXE=EE>N(b1PDTvhwRBspSNqg0j;Ir&Ss%6XOY+V+2 zkwW$#PmSUzBtz&aNFc}^BYESeZ;j`oB!wGO(U6r%Ut&cQxU{AMu?Yd^_lXC@F1C2} z*eQdkgo|%p()xE*Rb#5wjFOoN-ZeL&SSf1euP<;OPf-paY%u9Vv=t%H{!_RC(^L0{ zwB5hO{ozJx8Pa`NJz4jMC+xWGt5ucU|5{o3A}>sw^Sh^eHS1c@qN`=0lQ^7SnIV^N zv?r!4m>=$QWf{kmDGN&E`UJ63b~HB^Xj2VV^#IIT%{8E>p1VaB&?Q+_;0C)7Nr?`J zDGS11tJ(K(v;n3pC;^CA|1qGeLJj|bqUp7+-sEuiMp7g<5p5!uZtUWVd`R~F4}alR_8VL{rU_H>gOElawC99plLYN#oci&H_Gbr zl5cuUChi^~3D+*j%C$?1*0l?=#LwOQc^%JoWC-(#koCMdzgyq51qx)fOwD7Wh?z)K z90KN1JTH^Xhx~a1&p|xf@I1qUSMeM|op?hW+__m^I^>_x&jH)1*-~LB(D<^G2nBMu zJsEC~aC8x>x2Koe<92)U-JSxsr_k*wa(jBa zJ;iQMAGfE(?J0G8`no;++@3PG=L)x{zuR-A+f(lL3~+k}x;=y3p22R<5VvQj+v9P2 zhPge%-JYx5o)J9S^o(@#-&W>DSC|pYtmsG_&-&%s0%7%7({ch)5UhJF@Vhk>N|HEm zx>z#EM6V?Rd%Va%AvI{r|lLJPeb$ z=n@bYpK!JR`%#aVzXXhRKYwA=dor^)dm2Bgy{qOS%hm0e01JQ@*t249)h70TB!vl z?h-h?$9=%#`$y9CS(QKUnsi^r_trYj)j{ji?*^SIWarueoI~ z2&869rIbzhpSa4-Y*T;%?f}!oFfme8LE2~X41SX-z2qx0&wlXM0GfF7r^dUiy0!v8 zNUnRGuI)}ecdNlKiBzPJDAk!RFD9UR-;qtFC-{v;5y3S5a~-;LtK;A^?{n?iyj8l( z)^cE^-_d?u%aw7HmvENShjsc(-qUz`Zpd&rjtvqT!X`5vo{X_)ox&pqxtvD}vtCIrc0Zu50Yso&}z)^?5}@JlX4e^&a9uRqmbc;^D8A z_Nea2?~`-Bx}%^pr(7x<5{=-#`?nS7&jE}x_?0QWiv=pm&#H%3_QIYGa3-QVz z0^;DgDW)#z(t#V-QpW#xvFn2u8f20SozDs7s893a1e2j0r?0lo>$;wztJ`_>D3vxK z$K!K@B^(79?q3> z{v0aH*80?3sy|f5o5O2B_gA(9{gP=j zZ5nAe-dya&7r#1*d*tbjjozu!Ag1!4`cexbgTNG`eS3#)%VaN#)8xV1J64z5^04o> zy~aK?*B>6)ATG&?_1jb&EiVLkeMi2o=z@s(Y!6V%(-%Fo-@LnPYbjf-%3?nJGmk}x z>wm^0)9?51PACQ+b2bI(2J=E8&3?>+R|?Q_Dq9*UgrzHVaalQ;fwN?&Vh?|EfMDX%l7wJ8N3PCbesAl;d;`ksFjg$)9?AW;)>F$*h%5MiPr`B6wSE))p(;%eK)DU|Yo^1R{l{Hmn%VMQ?kr<_F6CaUW_M+J>46cv^? zD}@tl@T(qUZY!KLpt!uFe}_&}CSV3!lYpb}uK<6l;u3ESZ+`Q~C;yx;r__b{k~Pj# zLKL8!Uz(`L`^EM`#f^@-l$r?yRg_sv@IWdqV)~Z*l|wC$ojmZSu2e=kZn>l0|6ljL zT_Pd)RA+|^20cGA`Ta8UUeAninl__gx#uCZo23!_T$)??sye7nSUPp>gbM%NYVp80 ze4F_GSGD+vd&1JHr_0Lp2^BM!&-{nUN7bSwWtBzC%jSvSm1XZuC{-pX#Rt{WC6gB8 z<8@W!@3ZscZm3~HNHaXwjh|7_lu7d@&%-y9WQ*>w1sxN5=dQX*cTsC4Q(`%ax&62Mzwmky$};g- zGLr303nhho6u%KnEL{dNc&IKe@4PXn3xqsfb=3fiPnXW+%pwKx))tujIkw^+ps6n8Ow62w_mw9nV-fh_#&-bRM-! zWGiV+_ivh&2e&Ss?QF3!sRXqv;RrMH%s>3F4BstQEV!j7M~b?UAVPz>i+}Tjic?|M z^mn(X5-nCwA`%tiOnQ^XpE-wZuF0mcA>`<-=}E-14!!BoU=`QkD44aWhE$)G4n#8c z;_y5*mJ0WQ$u~|e2Dr2C5-Bm|qWfq6qGHD6k#%+8N?kl*HAk|YV8ynRr18p*hil3I zn3b)KWFwNh$g}SHxmO+GWDQ*tjK-Q)s$;?J84Sisd*5>}yHDcq)7|DJkosRVTdpDwI$pS7h{y~bCo}#5M*&q;XEj_HKUz{|or|0Eq zI4YV>du4qIs5w178!IbDTIb_OMRT{>IhES>CRMx!eThqex$mU;v?Rz zO7RVMt3}Hv*8oqhz#3--mZC`Qk?TZdy-3t=OkBeG30)TJ_3~X$e99iDt*d(DX-nD; zGvB&~tWR1(s1(mXj-iUZ{+5S-!KIDeKS5ID>=TduWh(_Tl-wLi#5;_1thd`rW`pT; zN=zkTnr?XXJ}!blDw)KpTO04TQoX| z(N4J#Xrcsu^elzEJJR27HKo$sXnm_?w~%XHQG~Fq1t8PoColaUy^q>(e|wFaq(|0L z0%s0Q+Dl1Krjo0B)?_1T$vwqHnKBbUwX%8R+@i@l&%t+JI-ZTRCoHT|E7IKvfv$_E zGud@0wpuV3Rwf%t)+OSxPD#ZCj$+`qL-75APbO@jDJ3^cTU7-ji9~xO+J$lxKZQ$| z9b0xm*EPA0i56IE1I@yRI=eY`iR;W5rEz4(?0r@srGv=~ly?h^yMDXH7K?h+>Trmy z#s@lB(QGxnd&T0}pG6Myluf4+38{E6SWIQO-y8-rV-mN0e+ zVe&ycfr9lu3&dhwm+xS2-X)1#f<(~zzBqlkP(g<>#|pc!mTl5jbTKp zgMmmqxWec%YQq7;-w-wcp5e)hPFX^=MJLGTa zt2P4tMs3JwJ>vsIEWPSi2!m54?~OJkRI^Ht?Eap~i670NmDq5zG27GJq2|{2`-d zMQt>Qk+D6L1dL$Fs7s@db-<4qE3#~_(;r)D7}bDSgE0j#p7u?b87op*!|>PFhQS|< zt0h(qdVT?K(~*5`1rH4Lt>36aKaASOP=MnNZwLlg_XSh`v#}aYMqi)A3mfE9gBVX7 zbeQk#BYi*Uw}y-+|M@LTQ3j~$V2ER{ZCDxzRM%f-tO*8cSG7ty8yg89Mf)-GtWNNZ zw72+;0N@&`frGgU8v)F1?J6U*Botd&R~sg$Vu3Kkg%ZZ(g?ieC9r6jcG%0C(F?r8| z5qw5*gx|Ierg(icEBxP*kdB4r#aTu(Gy%z_#N<62*tbe-?r2P2tn;0Uc4mQqHtf$>KpyLNtg*2yGrSuyy9o80M)0(Ig(=#Bb{>K9syhf_gL?n}uW+?$=&(6UI zsG?`D4EmEa3mReO!Elq+WTR*P7!xl=_-uU8+y+@iq{}9esm7|LDg)`+coKz3CLf$k4!9_)4UsEdp-dC{kuyk4lqr-ZcBAy!6;9Yso>Rr)`QkQYkmQkXFlhc3l6(;B&txgjJPtu}oCJGP z>8!(Xn;_dFK`-8yj3)R9h+TroH&H21kXfLquyd$!ehxHJzH~I236!6JpovO}Ru43u zJ86gf0%&ZtCLhf?b#i>tHlCyAox&-AuA>c;lxg#%vFo{Ue3V_L@HuIU>-o~9yg6&%UwVf)stBVbxEXe=tT*{0$r^RRZu>MTYn&$qohfTm z3}N!MCEEE?eK>aKtFSAziKc+WL!9&-Odg+@v>jr`QH!%f$)2Ej7=!%?Ka;g$T(-Q} z2d>IQJcDII|DdAm9TeD9Kb&6bIE`|TB;^GJU1)M4ekMyvP^-1kD6yPg%)e4N8f^(s z?jM0x12pL#lV}uv_h__7fb!`Gv@2~ic8;xamOZ1|O+O#+Qpnum5$)~`nzsP+7-Z%Y?0piM3xmicQeqmy=j;IPZ z>DQJWhYJtR=R3arV3jY@-xg8AUxyQBF^*lfI<5 zDn2dNdzbo;_Bn> zyGo`M99eu^^Zw&d$>I}*MR&LtpKzBPoVe3dI{(Nnui{#EykOk4<0p!bOG;zLADLG0 z@ih0bX(#?*9sfitD}C5x2gGW)TAk=Q8s(OXRj0V&M;?}YAim8nO%wByQii$8U_e6C&a zm0wh-Y5A<2sFv)Ha0PL&+Qdphd?@Eufx|=8Y`2?}&hMP}dwapKSLM*KgecUE%Wbd9 zxg_p*zbC@05=DWq!Rb{AEFE@k_e?%2OiSrCndf*-$bPlj_L>|bG82^rlQT&&2m1lt z;4HoJxVMrT!OBD1R0*uZ9ad!P}fpf}G z5ltoGLd*s_BgvSRS({pK6GxDv!DEf{D!~Y#MkFq=Vaal;Zb$Fcij}4iv02F!jKeMc z?EQo*68+RHvqXszb2d8GwQoCr0p)KNEC*3-Q)3UO9`M{p`J0uME*#t4B4R-VBh2SN zH}D;oTih~eyvf9w5}?U5P%cJQ9F>-h$sDn0YZ znWWFX1)-tc;~<4l)JT{>k=ECnd@llzJpKBL;Y_byCo-3^=5x9yR4TIX0vH?i!f}H6 z+fm810rR>;@kCr#s15rgj2SWI+Hj6dVyvhjmgm+I6EEc)WQLDsMDf$`cD;0yFuWlJ zbe74T!NKPcI7yq&o7+}|??^|7&01ub=feSJUkIGM=I{w_KGIdRCKc_H2MGa81g5uO zG7QVCuuNFY4s-o7Y4wmY7C>-qoicBxs9_Ak5pGrYyrG&Ljxf|)OH_To$WpXDzbbUH zlp|L=G>-yd1fm_?gUc4dI~57zUfCY;qq#CzL9udC~Yjhcp`=ax!f8 zfRkaPgQsosO*B^qj+uZEP43<^vORWkk<*^GT#k2N(EJ_Pa}slrL)5DGfK%)sT}ceL za3aoeP#r!6%?d2Rt(cf$%q4sFk3b~n7ubll%`p@-ZwAhrn66>Gr8XYN$_A;^7TS0N zN{s-dxCmWOgPe+xfaLnwHlTK&_C4*8r?rH$GGgAV+egp)*F!)=D&JkM&C5-F(0mhk z9j?5tQ6rpvviEIj1k7$D0tcCFDrwONoXJMIX(4D6_lZ$cMY2-`cpr2c5bVOBuLIQ= zM^k<|@w4qb_JJ}9u1?2eh&~UKKCn;poW_nb+BM&A$=oqaD~E|tSy(=E zfvV1PECXkukpWDcMgiivP3sxbt{yrtC^qCI4Z-{k`DR6{_yilW?kmuJ+Aa*iSGWs7 zwC8pez-F4IDTm7npXvJenCtN3kKNici;sESKG))7UazlU@yBC|eA?oVi%Oo~SzJ2* znc@@6junk7{&d-~lJUjImmM2hR($ND!qPLu;mSn2_JSO*ACN~=3ODUzI^(~54(q{1 zlh2&QNtQ~(lg!;=gJ}cW4W4adF($97eND|NZHh4h8Bp3@TlqK~`QIt+JJ~qWO=v(H zt48?kC`+DfVXKWOqqeY>``1;^7o9F%#L6P#s*R{16O)G-*n|y!8 zh0ju1#52OLf=El0(ZHzH5butQ2nqu$po8u7pMUU`rDTPS1(sZ_#>Bt_lXXS}|ZeR1mQdXrIJfMGwsviHf?$Ump9Ntonv3^)Jc zYqj}xG9WAK=Dzco$>WlPw>l{!(4^U3|H5VYcuwd+w5P3~m*3A2uIR)NHD%qKmG4)u zpKL~+wYo<89I>)9$6m6z{M#}9YT7`K# zmg)!tIXE-W56hsN09(}~VO zfJ;zaA`b$_05T(zv+z`VrLITMJ-sC+fsT!|_wzkGIH~BK=hQuBCGaY-pkN*?RNI{; z`)l`KJZ`0iL_{+Aq`|XCdk>tV;^d0ufcd`093lB*>#+}wzhaT9es>DBCD7e zptV-?(qxV_gF^=dMK(@~oYn!G;wfG=amM3LGT+6W0|!!1TQ+URHb+CG-Y0{%b{Q8mPP`VpNG5g|mn;*5|5 z!>iUH4i!X{2GKkj;X#C|c0-_6#PI4Em11N}wjrW$L4R#sAEIuet`m{HKto@!4-qzh z%&6PKC>tU|2BLhT?bU%$ZA-WsZ6Ruw2*-`;)+R)}7)5I_4D^Y854JY>(VuY3W%ac! zOW7AjzYuLgUu3i?5Z>6e9%bQ>vC?0Mvf3b{O^uD0_hlGyGQttW4_jBn{EQ-kenZ&E zB9geJHZ1ys2;WLS$AxGg+P0&8T`SfFw{AfEihKzpn$<^oF(bBe5!&8tto8fr>g)V< z!41JMqIM|D8i7u~(Ym3@Xh~swOZ|;q^$8=mq0tD-Iat*itZRv7E-(V;e>NZDr{zN+ zY#?&jg}&e&GK|%Av5+4l3;OM!^kH(Nqz{u~dhnm{K$0qAchKyB1zQh0-()F;+cD~? z7hn!nrn+*@P)Z{Sk>H)930(^!ye(s*QWCPgt5SKv8=u9Er3^2qP)n5lFB-)zNP|aJjTeh@Jn& zXhK^-RAhu1IUMe0C z$c)W4;TjW}k)|$9N#`69x503@dQ|(Vb_A}$*Wnucvo=Phe{hPb%}}+4!trN_4YB&jZ zHKp$6B;;^tA+WxdJ#ziJwis6S$l68D{bWGZYYl{Z|H*982snER96Lj6R&Q~RM>9c> zi6dpA{FBCOug;iNPc4BfLc@27k+)C995qFcz1@Ey9!NqSxHAXMzDgXj=aNa>nMsG~ zXLa9bIPK0%qq6D2=boTeNhM`q&u)kvVnYCKyT_jXQ)wPI<4|MKe2baVC}Dru+(l( zcwf|}WgqN*b&9dCT;W+Fk|t@ostk4RhWy*I*#aH=l%*H`1NA%wb|Ts5x)Ea}~Y&QnC>z z+!0sNkHCI)uA+D5$pdXs$F7?PtW%xwT;DFuQ(dY|HqTb3eW*>SL`0X)G3N?ng=2$L z(QttkZ9VJXG4+&KLZQqvcAUvC8lA$YjkkmzGVv+P^;N^)pwj;Qr7i4fyJhtdhLj&I zSyLu^FXI5`t4`ai2(dx^toxG?-x2BI*8+Gs)`p^HntsWfZtg21 zuMFV!Ebgcgj4(G3bfqSp0uxHw7e)lko(ipnT5lwsMiF0g=*<1%%Bl&cz%=(}Itd}$ z@-N61Ovd*}I)pKLdJ_Gjt;>Eecq?aYB+Qyb-1cgDZY*NA&puvtkVBJaQ`{1aCwWsE zKCo;*iUn&fiGKYy>Qf1`%IeRs26Z^Yw?u#b4);@82TXYjyFQXx-IvTGL>6zHKkyaO zX|XmYa9&?w--8kJ`EL^&eo0llCbAL#d(tDyUNiUI<&@!yw}K6mUG{5l>g@8Ud(pX} zv0HO*$X(R{Bpa_=z4j?K-nKCTKGh{6F=UXq*NI3^{@V#heAb(hA*Ro&cghdt!39lX z(8jG1UtBm^IsCM+1wP{HyV5>YZ9#=xea$lpDi8{@P@r&4x^@(}l?$88)|Xv5p`uDT zYfzouJaftP`l+?kuADTWUU*QQP_ayzmX%AQ+F21Cwz_=uiP%~DT!~Dkcw%dX;`Kb| z_Ppd4YIvVrycX}gA>1zJC2(Epy`HvfxiM_Y2&C1CC@R0bPcU+!VUD%)O=;`Rz zy#Vw4$`#n}xo!AV>_i~AuF#6A?=z`QxPOQUgv@gD56ALGMa)fNXs9GCYt5hAq)^KxQR= zf?J$k2_dXaqSNR-yjzC;AG6Vg*|NZ%Z_2Wq?-{N{{DZw*kre{3vaE{Bxxb6>Vm zRqcc-IN>Y-zVn1dcpaR{AG<_S)Z(h(G}Zr$k4wYriF{Vc|A_f2TKW-wCb8JJ-P*Y> zUTAXROz2Yn{e^3VN&mwUmt+u*O7Y_;5rch1HJjTWEvfV8mn5d)l%5gV7uwjBtZWyZEmkkG6^bV)?8i z43AFS_fI*-RB}tWcm5amO+t@yhZqbExA8NJXd+{`vh8DW*5bj3|AeSDXwBMMZM}A- zy42B1TyspcNSLHnlHWCQ7KgOi=2-<$5)d7eH9`;vW$26fhGv(*-+w-H;F8HFDd(I-ZM6>Au2Z*FpDUm#W<1-UX}l`B?>wNom@% zWra`gbi3#OY+AuF>ywW)Pw7Pko-t+KXS%dw#fQ62d^{G{O~x=cj=Ax^g0dp!%9uL? zF|T4oyNW+0(ykKRMkyS98E{Zs>Y?$&&hZ~a_YM?#2joA2Dm0KeK5fY<*6<#i&-UGv z;s5!$%`zUjK}EZtYB`)26AnYwS5?whUq=IFk;y3eQk=IOrqy6|;+IGw^g5ilCwu$KnZ^N5Ry0p&m@4iH)WD@4hbd159-O(Ms@- ztN(QWA~ti;{$-rpZt>r^K!gTqd2*(>@!!_A5pn+E|H5Tm``0esLG{(v*6|ttpu2RP giF)Bw6&mx@f6%>h^wnV8@G9HaYzK1w?tjqz|3Pq%!vFvP diff --git a/tools/train_compile_cache_dict.mjs b/tools/train_compile_cache_dict.mjs new file mode 100644 index 00000000000000..bce24bdeede3bf --- /dev/null +++ b/tools/train_compile_cache_dict.mjs @@ -0,0 +1,168 @@ +#!/usr/bin/env node +// ============================================================================= +// train_compile_cache_dict.mjs +// +// Maintainer tool that regenerates src/compile_cache_zstd.dict, the zstd +// dictionary embedded in the node binary and used to shrink compile-cache +// entries on disk (see src/compile_cache.cc). +// +// ----------------------------------------------------------------------------- +// What it does +// ----------------------------------------------------------------------------- +// The node compile cache stores V8 code caches (the bytecode/metadata blob V8 +// produces when it compiles a module) so later runs can skip recompilation. +// These blobs share a lot of structure across files, so we zstd-compress them +// with a trained dictionary to cut the cache's on-disk footprint. +// +// This script builds that dictionary end to end: +// +// 1. Walks a fixed in-tree corpus (CORPUS below) in sorted order. +// 2. For each .js file, harvests a V8 code cache via vm.compileFunction with +// produceCachedData — the same shape the CommonJS loader produces at +// runtime — and writes each blob to a temp directory. +// 3. Feeds all the harvested blobs to `zstd --train`, capping the result at +// MAXDICT (16 KiB) bytes. +// 4. Overwrites src/compile_cache_zstd.dict with the trained dictionary. +// +// The shipped src/compile_cache_zstd.dict is the source of truth; this script +// exists to document and reproduce exactly how that file was made, so a future +// maintainer can regenerate it (e.g. after a V8/corpus change) and review the +// resulting diff. +// +// ----------------------------------------------------------------------------- +// Usage +// ----------------------------------------------------------------------------- +// node tools/train_compile_cache_dict.mjs +// +// Run from anywhere (paths are resolved relative to this file). The output is +// written in place to src/compile_cache_zstd.dict; inspect the diff afterwards +// and commit it if it looks right. Progress is printed to stderr. +// +// Prerequisites: +// * the `zstd` CLI on PATH, version REQUIRED_ZSTD (matching deps/zstd). +// * a built node on PATH (this is the node you invoke it with). +// +// Note: --predictable is added automatically (see below) — you do not need to +// pass it yourself. +// +// ----------------------------------------------------------------------------- +// Reproducibility +// ----------------------------------------------------------------------------- +// The output is byte-for-byte stable only if all of the following are pinned: +// +// * node is run with --predictable. V8 otherwise randomizes the string hash +// seed per process, and that seed leaks into vm.compileFunction cachedData, +// so every harvest (and therefore every trained dictionary) would differ +// run-to-run even on the same machine. This script re-executes itself with +// --predictable if needed. +// * the corpus and its order are fixed (CORPUS below, walked sorted). +// * the node build is fixed: cachedData embeds the V8 version and build +// flags, so a different node produces a different (still valid) dictionary. +// * the zstd CLI matches deps/zstd (currently 1.5.7). ZDICT training defaults +// change between zstd releases; this script refuses to run on a mismatch. +// +// Training on --predictable caches is fine even though the runtime consumes +// non-predictable caches: the dictionary only supplies shared substrings, and +// Persist() keeps min(plain, dict) per entry, so a less-than-ideal dictionary +// can never make any entry larger. +// ============================================================================= + +import { spawnSync } from 'node:child_process'; +import { readFileSync, writeFileSync, mkdtempSync, readdirSync, + rmSync } from 'node:fs'; +import { join, relative, dirname } from 'node:path'; +import { tmpdir } from 'node:os'; +import { fileURLToPath } from 'node:url'; + +const ROOT = join(dirname(fileURLToPath(import.meta.url)), '..'); +const OUT = join(ROOT, 'src', 'compile_cache_zstd.dict'); +const MAXDICT = 16384; +const REQUIRED_ZSTD = '1.5.7'; // keep in sync with deps/zstd/lib/zstd.h + +// Fixed corpus, relative to the repo root. Chosen to be diverse, always +// present in a checkout, and disjoint from the held-out measurement corpora +// (e.g. test/parallel) used to report size numbers. +const CORPUS = ['lib', 'tools', 'deps/npm/node_modules']; + +// V8 randomizes the hash seed per process; re-exec under --predictable so the +// harvested caches — and the trained dictionary — are deterministic. +if (!process.execArgv.includes('--predictable')) { + const r = spawnSync(process.execPath, + ['--predictable', fileURLToPath(import.meta.url), ...process.argv.slice(2)], + { stdio: 'inherit' }); + process.exit(r.status ?? 1); +} + +function checkZstd() { + const r = spawnSync('zstd', ['--version'], { encoding: 'utf8' }); + if (r.status !== 0) { + console.error('error: zstd CLI not found on PATH'); process.exit(1); + } + const m = r.stdout.match(/v(\d+\.\d+\.\d+)/); + if (!m || m[1] !== REQUIRED_ZSTD) { + console.error(`error: zstd ${REQUIRED_ZSTD} required (matching deps/zstd), ` + + `found ${m ? m[1] : 'unknown'}`); + process.exit(1); + } +} + +const PARAMS = ['exports', 'require', 'module', '__filename', '__dirname']; + +function* walk(dir) { + let ents; + try { ents = readdirSync(dir, { withFileTypes: true }); } catch { return; } + for (const e of ents.sort((a, b) => (a.name < b.name ? -1 : 1))) { + const p = join(dir, e.name); + if (e.isDirectory()) yield* walk(p); + else if (e.isFile() && p.endsWith('.js')) yield p; + } +} + +async function main() { + checkZstd(); + const { default: vm } = await import('node:vm'); + + const files = []; + for (const root of CORPUS) for (const f of walk(join(ROOT, root))) files.push(f); + files.sort(); + + const samples = mkdtempSync(join(tmpdir(), 'cc-dict-')); + const sampleFiles = []; + let ok = 0; + let r; + try { + for (const f of files) { + let code; + try { code = readFileSync(f, 'utf8'); } catch { continue; } + try { + const fn = vm.compileFunction(code, PARAMS, + { filename: f, produceCachedData: true }); + const cd = fn.cachedData; + if (cd && cd.length > 0) { + const name = relative(ROOT, f).replace(/[\\/]/g, '_') + '.cache'; + const out = join(samples, name); + writeFileSync(out, cd); + sampleFiles.push(out); + ok++; + } + } catch { /* skip modules V8 can't compile standalone */ } + } + sampleFiles.sort(); + console.error(`harvested ${ok}/${files.length} code caches from ` + + `${CORPUS.join(', ')}`); + + // One sample path per file is spread on argv; the fixed corpus (~1.4k + // files, a few hundred KB of paths) stays well under ARG_MAX. + r = spawnSync('zstd', + ['--train', ...sampleFiles, `--maxdict=${MAXDICT}`, '-f', '-o', OUT], + { stdio: ['ignore', 'ignore', 'inherit'] }); + } finally { + rmSync(samples, { recursive: true, force: true }); + } + if (r.status !== 0) { console.error('error: zstd --train failed'); process.exit(1); } + + const size = readFileSync(OUT).length; + console.error(`wrote ${relative(ROOT, OUT)} (${size} bytes)`); +} + +main().catch((e) => { console.error(e); process.exit(1); });