From d1f9074ef54ba475a5b25832c75a2a885fc15cd7 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 11 Jun 2026 20:17:56 -0400 Subject: [PATCH] src: embed zstd dictionary for further compile cache size wins Builds on the zstd compression in #63861 by embedding a small zstd dictionary trained on a diverse corpus of real modules, so each small/medium compile-cache entry compresses better. Per entry we keep the smaller of the plain and dictionary-assisted frame, so the dictionary only ever helps. - Add src/compile_cache_zstd.dict (16 KiB). It is trained on V8 code caches harvested (via vm.compileFunction, the same shape the CJS loader produces) from a diverse corpus: bundled npm packages, lib/, tools/ and a few deps. - Add tools/generate_compile_cache_dict.py and a node.gyp action that generates compile_cache_zstd_dict.h into SHARED_INTERMEDIATE_DIR at build time; no generated header is checked in. libnode include_dirs updated to pick it up. - Prepare the CDict/DDict once per process (shared across all handlers and Workers, matching the lazy-context approach from #63861) and use them in Persist() and ReadCacheFile(). Persist() compresses the plain and dict frames into separate buffers and selects the smaller, so the written bytes and recorded size always agree. The dictionary is only tried for entries up to 256 KiB; larger blobs never benefit, so the second compression is skipped to avoid wasted work. Falls back to plain zstd if dictionary preparation fails. - The dictionary is embedded in the binary because the compile cache must be usable early, portably, and without extra filesystem state. - No on-disk format change: dict-assisted frames carry the dictID, plain frames carry none, and a single DDict decompresses both. - Size, measured on data held out from training (per-entry min policy): diverse modules go from ~1.87x (plain zstd) to ~2.44x with the dictionary (~24% smaller on disk); on test/parallel, which is not in the training corpus at all, ~1.74x -> ~2.22x (~22% smaller). A real end-to-end run (npm --version, ~70 modules) is ~15% smaller. Read time is unchanged and the extra write-time work is negligible. - Add a multi-module write/read roundtrip test and a startup benchmark (standard createBenchmark harness). --- benchmark/misc/compile-cache-timing.js | 72 ++++++++++++++++++ node.gyp | 17 +++++ src/compile_cache.cc | 72 +++++++++++++++++- src/compile_cache_zstd.dict | Bin 0 -> 16384 bytes test/parallel/test-compile-cache-success.js | 80 ++++++++++++++++++++ tools/generate_compile_cache_dict.py | 37 +++++++++ 6 files changed, 274 insertions(+), 4 deletions(-) create mode 100644 benchmark/misc/compile-cache-timing.js create mode 100644 src/compile_cache_zstd.dict create mode 100644 tools/generate_compile_cache_dict.py diff --git a/benchmark/misc/compile-cache-timing.js b/benchmark/misc/compile-cache-timing.js new file mode 100644 index 00000000000000..bdeae8c5cd2e93 --- /dev/null +++ b/benchmark/misc/compile-cache-timing.js @@ -0,0 +1,72 @@ +'use strict'; + +// Startup benchmark for the compile cache (including the zstd dictionary). +// Compares no-cache / cold-cache / warm-cache for two workloads: +// big - one large module (the typescript.js fixture) +// many - many small modules (generated here, side-effect-free) +// The modules are generated into a temp dir so the benchmark is self-contained +// and reproducible, and never executes unrelated code. + +const common = require('../common.js'); +const { spawnSync } = require('child_process'); +const fs = require('fs'); +const os = require('os'); +const path = require('path'); + +const bench = common.createBenchmark(main, { + workload: ['big', 'many'], + cache: ['none', 'cold', 'warm'], + n: [30], +}); + +const BIG = path.resolve(__dirname, '../../test/fixtures/snapshot/typescript.js'); + +// Generate `count` small, side-effect-free modules and return the require() +// code that loads them all in one child. +function makeManyModules(dir, count) { + fs.mkdirSync(dir, { recursive: true }); + const reqs = []; + for (let i = 0; i < count; i++) { + const file = path.join(dir, `mod-${i}.js`); + fs.writeFileSync( + file, + `'use strict';\n` + + `module.exports = function value${i}(a, b) {\n` + + ` const sum = a + b + ${i};\n` + + ` return { id: ${i}, sum, label: 'module-${i}' };\n` + + `};\n`); + reqs.push(`require(${JSON.stringify(file)});`); + } + return reqs.join(''); +} + +function run(cmd, args, cacheDir) { + const env = { ...process.env }; + if (cacheDir) env.NODE_COMPILE_CACHE = cacheDir; + else delete env.NODE_COMPILE_CACHE; + const child = spawnSync(cmd, args, { env, stdio: 'ignore' }); + if (child.error) throw child.error; +} + +function main({ n, workload, cache }) { + const cmd = process.execPath || process.argv[0]; + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'cc-bench-')); + const args = workload === 'big' ? + [BIG] : + ['-e', makeManyModules(path.join(tmp, 'mods'), 120)]; + const cacheDir = cache === 'none' ? null : path.join(tmp, 'cache'); + + try { + if (cache === 'warm') run(cmd, args, cacheDir); // populate once + bench.start(); + for (let i = 0; i < n; i++) { + if (cache === 'cold' && cacheDir) { + fs.rmSync(cacheDir, { recursive: true, force: true }); + } + run(cmd, args, cacheDir); + } + bench.end(n); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } +} diff --git a/node.gyp b/node.gyp index d2dbce19992b10..ab54cc27437e72 100644 --- a/node.gyp +++ b/node.gyp @@ -1110,6 +1110,22 @@ '<@(linked_module_files)', ], }, + { + 'action_name': 'generate_compile_cache_zstd_dict', + 'inputs': [ + 'src/compile_cache_zstd.dict', + 'tools/generate_compile_cache_dict.py', + ], + 'outputs': [ + '<(SHARED_INTERMEDIATE_DIR)/compile_cache_zstd_dict.h', + ], + 'action': [ + '<(python)', + 'tools/generate_compile_cache_dict.py', + 'src/compile_cache_zstd.dict', + '<@(_outputs)', + ], + }, ], }, # node_base { @@ -1123,6 +1139,7 @@ 'src', 'deps/v8/include', 'deps/uv/include', + '<(SHARED_INTERMEDIATE_DIR)', # for compile_cache_zstd_dict.h etc. ], 'dependencies': [ diff --git a/src/compile_cache.cc b/src/compile_cache.cc index 9e11793aa3388f..35c4b02cc70d9a 100644 --- a/src/compile_cache.cc +++ b/src/compile_cache.cc @@ -11,6 +11,10 @@ #include "util.h" #include "zlib.h" #include "zstd.h" +// kCompileCacheZstdDict + kCompileCacheZstdDictSize come from the header +// generated at build time by the GYP action (from src/compile_cache_zstd.dict). +// The include directory (SHARED_INTERMEDIATE_DIR) is added by node.gyp. +#include "compile_cache_zstd_dict.h" #ifdef NODE_IMPLEMENTS_POSIX_CREDENTIALS #include // getuid @@ -28,6 +32,29 @@ using v8::ScriptCompiler; using v8::String; namespace { +// The compile-cache zstd dictionary is immutable and embedded in the binary, +// so the prepared CDict/DDict are created once and shared across all handlers +// (and all Environments/Workers) instead of per handler. They live for the +// lifetime of the process. Returns nullptr if preparation fails, in which +// case callers fall back to plain (dictionary-less) zstd. +ZSTD_CDict* GetCompileCacheCDict() { + static ZSTD_CDict* cdict = + ZSTD_createCDict(kCompileCacheZstdDict, kCompileCacheZstdDictSize, 1); + return cdict; +} + +ZSTD_DDict* GetCompileCacheDDict() { + static ZSTD_DDict* ddict = + ZSTD_createDDict(kCompileCacheZstdDict, kCompileCacheZstdDictSize); + return ddict; +} + +// The dictionary only helps small/medium caches; for larger inputs zstd's own +// adaptive model dominates and the dictionary never wins, so we skip the +// (otherwise wasted) second compression above this raw size. Decompression is +// unaffected: a single DDict decodes both dict-assisted and plain frames. +constexpr uint32_t kCompileCacheDictMaxRawSize = 256 * 1024; + std::string Uint32ToHex(uint32_t crc) { std::string str; str.reserve(8); @@ -266,10 +293,20 @@ void CompileCacheHandler::ReadCacheFile(CompileCacheEntry* entry) { Debug("failed to create zstd context\n"); return; } - // Decompress directly into the buffer handed to V8. + // Decompress directly into the buffer handed to V8. The embedded + // dictionary is referenced via a shared, prepared DDict; plain frames + // (which carry no dictID) decompress correctly with it as well. std::unique_ptr raw_data(new uint8_t[raw_size]); - size_t decompressed_size = ZSTD_decompressDCtx( - zstd_dctx_, raw_data.get(), raw_size, disk_data.get(), cache_size); + ZSTD_DDict* ddict = GetCompileCacheDDict(); + size_t decompressed_size; + if (ddict != nullptr) { + decompressed_size = ZSTD_decompress_usingDDict( + zstd_dctx_, raw_data.get(), raw_size, disk_data.get(), cache_size, + ddict); + } else { + decompressed_size = ZSTD_decompressDCtx( + zstd_dctx_, raw_data.get(), raw_size, disk_data.get(), cache_size); + } if (ZSTD_isError(decompressed_size)) { Debug("decompression failed: %s\n", ZSTD_getErrorName(decompressed_size)); return; @@ -508,16 +545,43 @@ void CompileCacheHandler::Persist() { // shutdown and should add as little overhead as possible. If the data // is not compressible, store it uncompressed, which is indicated by // the cache size being equal to the uncompressed size in the headers. + // + // We also try the embedded trained dictionary and keep whichever frame is + // smaller (still subject to the "only store if < raw" policy). The + // dictionary mainly helps the small/medium caches that dominate real + // compile cache usage; for inputs where plain zstd already wins we keep + // the plain frame. char* cache_ptr = raw_ptr; uint32_t cache_size = raw_size; std::unique_ptr compressed; + std::unique_ptr compressed_dict; if (cctx != nullptr || (cctx = ZSTD_createCCtx()) != nullptr) { size_t compressed_bound = ZSTD_compressBound(raw_size); compressed.reset(new uint8_t[compressed_bound]); size_t compressed_size = ZSTD_compressCCtx( cctx, compressed.get(), compressed_bound, raw_ptr, raw_size, 1); + char* best_ptr = reinterpret_cast(compressed.get()); + // Only attempt the dictionary for small/medium entries (see + // kCompileCacheDictMaxRawSize); for large blobs it never wins and the + // extra compression would be wasted work. + ZSTD_CDict* cdict = raw_size <= kCompileCacheDictMaxRawSize + ? GetCompileCacheCDict() + : nullptr; + if (cdict != nullptr) { + // Compress into a separate buffer so the selected frame's bytes and + // size always stay in sync (the plain buffer is left untouched). + compressed_dict.reset(new uint8_t[compressed_bound]); + size_t dict_size = ZSTD_compress_usingCDict( + cctx, compressed_dict.get(), compressed_bound, raw_ptr, raw_size, + cdict); + if (!ZSTD_isError(dict_size) && + (ZSTD_isError(compressed_size) || dict_size < compressed_size)) { + compressed_size = dict_size; + best_ptr = reinterpret_cast(compressed_dict.get()); + } + } if (!ZSTD_isError(compressed_size) && compressed_size < raw_size) { - cache_ptr = reinterpret_cast(compressed.get()); + cache_ptr = best_ptr; cache_size = static_cast(compressed_size); } } diff --git a/src/compile_cache_zstd.dict b/src/compile_cache_zstd.dict new file mode 100644 index 0000000000000000000000000000000000000000..b64455d45b1d82ca5b0c2a4dc0c41a6c1220fc68 GIT binary patch literal 16384 zcmcJ04}4VBmH&H_mzg06;Xx1rL=1li6hahKWKr{GG9d#=$OIyw?&eJ+b2bI(2J=E8&3?>+R|?Q_Dq9*UgrzHVaalQ;fwN?&Vh?|EfMDX%l7wJ8N3PCbesAl;d;`ksFjg$)9?AW;)>F$*h%5MiPr`B6wSE))p(;%eK)DU|Yo^1R{l{Hmn%VMQ?kr<_F6CaUW_M+J>46cv^? zD}@tl@T(qUZY!KLpt!uFe}_&}CSV3!lYpb}uK<6l;u3ESZ+`Q~C;yx;r__b{k~Pj# zLKL8!Uz(`L`^EM`#f^@-l$r?yRg_sv@IWdqV)~Z*l|wC$ojmZSu2e=kZn>l0|6ljL zT_Pd)RA+|^20cGA`Ta8UUeAninl__gx#uCZo23!_T$)??sye7nSUPp>gbM%NYVp80 ze4F_GSGD+vd&1JHr_0Lp2^BM!&-{nUN7bSwWtBzC%jSvSm1XZuC{-pX#Rt{WC6gB8 z<8@W!@3ZscZm3~HNHaXwjh|7_lu7d@&%-y9WQ*>w1sxN5=dQX*cTsC4Q(`%ax&62Mzwmky$};g- zGLr303nhho6u%KnEL{dNc&IKe@4PXn3xqsfb=3fiPnXW+%pwKx))tujIkw^+ps6n8Ow62w_mw9nV-fh_#&-bRM-! zWGiV+_ivh&2e&Ss?QF3!sRXqv;RrMH%s>3F4BstQEV!j7M~b?UAVPz>i+}Tjic?|M z^mn(X5-nCwA`%tiOnQ^XpE-wZuF0mcA>`<-=}E-14!!BoU=`QkD44aWhE$)G4n#8c z;_y5*mJ0WQ$u~|e2Dr2C5-Bm|qWfq6qGHD6k#%+8N?kl*HAk|YV8ynRr18p*hil3I zn3b)KWFwNh$g}SHxmO+GWDQ*tjK-Q)s$;?J84Sisd*5>}yHDcq)7|DJkosRVTdpDwI$pS7h{y~bCo}#5M*&q;XEj_HKUz{|or|0Eq zI4YV>du4qIs5w178!IbDTIb_OMRT{>IhES>CRMx!eThqex$mU;v?Rz zO7RVMt3}Hv*8oqhz#3--mZC`Qk?TZdy-3t=OkBeG30)TJ_3~X$e99iDt*d(DX-nD; zGvB&~tWR1(s1(mXj-iUZ{+5S-!KIDeKS5ID>=TduWh(_Tl-wLi#5;_1thd`rW`pT; zN=zkTnr?XXJ}!blDw)KpTO04TQoX| z(N4J#Xrcsu^elzEJJR27HKo$sXnm_?w~%XHQG~Fq1t8PoColaUy^q>(e|wFaq(|0L z0%s0Q+Dl1Krjo0B)?_1T$vwqHnKBbUwX%8R+@i@l&%t+JI-ZTRCoHT|E7IKvfv$_E zGud@0wpuV3Rwf%t)+OSxPD#ZCj$+`qL-75APbO@jDJ3^cTU7-ji9~xO+J$lxKZQ$| z9b0xm*EPA0i56IE1I@yRI=eY`iR;W5rEz4(?0r@srGv=~ly?h^yMDXH7K?h+>Trmy z#s@lB(QGxnd&T0}pG6Myluf4+38{E6SWIQO-y8-rV-mN0e+ zVe&ycfr9lu3&dhwm+xS2-X)1#f<(~zzBqlkP(g<>#|pc!mTl5jbTKp zgMmmqxWec%YQq7;-w-wcp5e)hPFX^=MJLGTa zt2P4tMs3JwJ>vsIEWPSi2!m54?~OJkRI^Ht?Eap~i670NmDq5zG27GJq2|{2`-d zMQt>Qk+D6L1dL$Fs7s@db-<4qE3#~_(;r)D7}bDSgE0j#p7u?b87op*!|>PFhQS|< zt0h(qdVT?K(~*5`1rH4Lt>36aKaASOP=MnNZwLlg_XSh`v#}aYMqi)A3mfE9gBVX7 zbeQk#BYi*Uw}y-+|M@LTQ3j~$V2ER{ZCDxzRM%f-tO*8cSG7ty8yg89Mf)-GtWNNZ zw72+;0N@&`frGgU8v)F1?J6U*Botd&R~sg$Vu3Kkg%ZZ(g?ieC9r6jcG%0C(F?r8| z5qw5*gx|Ierg(icEBxP*kdB4r#aTu(Gy%z_#N<62*tbe-?r2P2tn;0Uc4mQqHtf$>KpyLNtg*2yGrSuyy9o80M)0(Ig(=#Bb{>K9syhf_gL?n}uW+?$=&(6UI zsG?`D4EmEa3mReO!Elq+WTR*P7!xl=_-uU8+y+@iq{}9esm7|LDg)`+coKz3CLf$k4!9_)4UsEdp-dC{kuyk4lqr-ZcBAy!6;9Yso>Rr)`QkQYkmQkXFlhc3l6(;B&txgjJPtu}oCJGP z>8!(Xn;_dFK`-8yj3)R9h+TroH&H21kXfLquyd$!ehxHJzH~I236!6JpovO}Ru43u zJ86gf0%&ZtCLhf?b#i>tHlCyAox&-AuA>c;lxg#%vFo{Ue3V_L@HuIU>-o~9yg6&%UwVf)stBVbxEXe=tT*{0$r^RRZu>MTYn&$qohfTm z3}N!MCEEE?eK>aKtFSAziKc+WL!9&-Odg+@v>jr`QH!%f$)2Ej7=!%?Ka;g$T(-Q} z2d>IQJcDII|DdAm9TeD9Kb&6bIE`|TB;^GJU1)M4ekMyvP^-1kD6yPg%)e4N8f^(s z?jM0x12pL#lV}uv_h__7fb!`Gv@2~ic8;xamOZ1|O+O#+Qpnum5$)~`nzsP+7-Z%Y?0piM3xmicQeqmy=j;IPZ z>DQJWhYJtR=R3arV3jY@-xg8AUxyQBF^*lfI<5 zDn2dNdzbo;_Bn> zyGo`M99eu^^Zw&d$>I}*MR&LtpKzBPoVe3dI{(Nnui{#EykOk4<0p!bOG;zLADLG0 z@ih0bX(#?*9sfitD}C5x2gGW)TAk=Q8s(OXRj0V&M;?}YAim8nO%wByQii$8U_e6C&a zm0wh-Y5A<2sFv)Ha0PL&+Qdphd?@Eufx|=8Y`2?}&hMP}dwapKSLM*KgecUE%Wbd9 zxg_p*zbC@05=DWq!Rb{AEFE@k_e?%2OiSrCndf*-$bPlj_L>|bG82^rlQT&&2m1lt z;4HoJxVMrT!OBD1R0*uZ9ad!P}fpf}G z5ltoGLd*s_BgvSRS({pK6GxDv!DEf{D!~Y#MkFq=Vaal;Zb$Fcij}4iv02F!jKeMc z?EQo*68+RHvqXszb2d8GwQoCr0p)KNEC*3-Q)3UO9`M{p`J0uME*#t4B4R-VBh2SN zH}D;oTih~eyvf9w5}?U5P%cJQ9F>-h$sDn0YZ znWWFX1)-tc;~<4l)JT{>k=ECnd@llzJpKBL;Y_byCo-3^=5x9yR4TIX0vH?i!f}H6 z+fm810rR>;@kCr#s15rgj2SWI+Hj6dVyvhjmgm+I6EEc)WQLDsMDf$`cD;0yFuWlJ zbe74T!NKPcI7yq&o7+}|??^|7&01ub=feSJUkIGM=I{w_KGIdRCKc_H2MGa81g5uO zG7QVCuuNFY4s-o7Y4wmY7C>-qoicBxs9_Ak5pGrYyrG&Ljxf|)OH_To$WpXDzbbUH zlp|L=G>-yd1fm_?gUc4dI~57zUfCY;qq#CzL9udC~Yjhcp`=ax!f8 zfRkaPgQsosO*B^qj+uZEP43<^vORWkk<*^GT#k2N(EJ_Pa}slrL)5DGfK%)sT}ceL za3aoeP#r!6%?d2Rt(cf$%q4sFk3b~n7ubll%`p@-ZwAhrn66>Gr8XYN$_A;^7TS0N zN{s-dxCmWOgPe+xfaLnwHlTK&_C4*8r?rH$GGgAV+egp)*F!)=D&JkM&C5-F(0mhk z9j?5tQ6rpvviEIj1k7$D0tcCFDrwONoXJMIX(4D6_lZ$cMY2-`cpr2c5bVOBuLIQ= zM^k<|@w4qb_JJ}9u1?2eh&~UKKCn;poW_nb+BM&A$=oqaD~E|tSy(=E zfvV1PECXkukpWDcMgiivP3sxbt{yrtC^qCI4Z-{k`DR6{_yilW?kmuJ+Aa*iSGWs7 zwC8pez-F4IDTm7npXvJenCtN3kKNici;sESKG))7UazlU@yBC|eA?oVi%Oo~SzJ2* znc@@6junk7{&d-~lJUjImmM2hR($ND!qPLu;mSn2_JSO*ACN~=3ODUzI^(~54(q{1 zlh2&QNtQ~(lg!;=gJ}cW4W4adF($97eND|NZHh4h8Bp3@TlqK~`QIt+JJ~qWO=v(H zt48?kC`+DfVXKWOqqeY>``1;^7o9F%#L6P#s*R{16O)G-*n|y!8 zh0ju1#52OLf=El0(ZHzH5butQ2nqu$po8u7pMUU`rDTPS1(sZ_#>Bt_lXXS}|ZeR1mQdXrIJfMGwsviHf?$Ump9Ntonv3^)Jc zYqj}xG9WAK=Dzco$>WlPw>l{!(4^U3|H5VYcuwd+w5P3~m*3A2uIR)NHD%qKmG4)u zpKL~+wYo<89I>)9$6m6z{M#}9YT7`K# zmg)!tIXE-W56hsN09(}~VO zfJ;zaA`b$_05T(zv+z`VrLITMJ-sC+fsT!|_wzkGIH~BK=hQuBCGaY-pkN*?RNI{; z`)l`KJZ`0iL_{+Aq`|XCdk>tV;^d0ufcd`093lB*>#+}wzhaT9es>DBCD7e zptV-?(qxV_gF^=dMK(@~oYn!G;wfG=amM3LGT+6W0|!!1TQ+URHb+CG-Y0{%b{Q8mPP`VpNG5g|mn;*5|5 z!>iUH4i!X{2GKkj;X#C|c0-_6#PI4Em11N}wjrW$L4R#sAEIuet`m{HKto@!4-qzh z%&6PKC>tU|2BLhT?bU%$ZA-WsZ6Ruw2*-`;)+R)}7)5I_4D^Y854JY>(VuY3W%ac! zOW7AjzYuLgUu3i?5Z>6e9%bQ>vC?0Mvf3b{O^uD0_hlGyGQttW4_jBn{EQ-kenZ&E zB9geJHZ1ys2;WLS$AxGg+P0&8T`SfFw{AfEihKzpn$<^oF(bBe5!&8tto8fr>g)V< z!41JMqIM|D8i7u~(Ym3@Xh~swOZ|;q^$8=mq0tD-Iat*itZRv7E-(V;e>NZDr{zN+ zY#?&jg}&e&GK|%Av5+4l3;OM!^kH(Nqz{u~dhnm{K$0qAchKyB1zQh0-()F;+cD~? z7hn!nrn+*@P)Z{Sk>H)930(^!ye(s*QWCPgt5SKv8=u9Er3^2qP)n5lFB-)zNP|aJjTeh@Jn& zXhK^-RAhu1IUMe0C z$c)W4;TjW}k)|$9N#`69x503@dQ|(Vb_A}$*Wnucvo=Phe{hPb%}}+4!trN_4YB&jZ zHKp$6B;;^tA+WxdJ#ziJwis6S$l68D{bWGZYYl{Z|H*982snER96Lj6R&Q~RM>9c> zi6dpA{FBCOug;iNPc4BfLc@27k+)C995qFcz1@Ey9!NqSxHAXMzDgXj=aNa>nMsG~ zXLa9bIPK0%qq6D2=boTeNhM`q&u)kvVnYCKyT_jXQ)wPI<4|MKe2baVC}Dru+(l( zcwf|}WgqN*b&9dCT;W+Fk|t@ostk4RhWy*I*#aH=l%*H`1NA%wb|Ts5x)Ea}~Y&QnC>z z+!0sNkHCI)uA+D5$pdXs$F7?PtW%xwT;DFuQ(dY|HqTb3eW*>SL`0X)G3N?ng=2$L z(QttkZ9VJXG4+&KLZQqvcAUvC8lA$YjkkmzGVv+P^;N^)pwj;Qr7i4fyJhtdhLj&I zSyLu^FXI5`t4`ai2(dx^toxG?-x2BI*8+Gs)`p^HntsWfZtg21 zuMFV!Ebgcgj4(G3bfqSp0uxHw7e)lko(ipnT5lwsMiF0g=*<1%%Bl&cz%=(}Itd}$ z@-N61Ovd*}I)pKLdJ_Gjt;>Eecq?aYB+Qyb-1cgDZY*NA&puvtkVBJaQ`{1aCwWsE zKCo;*iUn&fiGKYy>Qf1`%IeRs26Z^Yw?u#b4);@82TXYjyFQXx-IvTGL>6zHKkyaO zX|XmYa9&?w--8kJ`EL^&eo0llCbAL#d(tDyUNiUI<&@!yw}K6mUG{5l>g@8Ud(pX} zv0HO*$X(R{Bpa_=z4j?K-nKCTKGh{6F=UXq*NI3^{@V#heAb(hA*Ro&cghdt!39lX z(8jG1UtBm^IsCM+1wP{HyV5>YZ9#=xea$lpDi8{@P@r&4x^@(}l?$88)|Xv5p`uDT zYfzouJaftP`l+?kuADTWUU*QQP_ayzmX%AQ+F21Cwz_=uiP%~DT!~Dkcw%dX;`Kb| z_Ppd4YIvVrycX}gA>1zJC2(Epy`HvfxiM_Y2&C1CC@R0bPcU+!VUD%)O=;`Rz zy#Vw4$`#n}xo!AV>_i~AuF#6A?=z`QxPOQUgv@gD56ALGMa)fNXs9GCYt5hAq)^KxQR= zf?J$k2_dXaqSNR-yjzC;AG6Vg*|NZ%Z_2Wq?-{N{{DZw*kre{3vaE{Bxxb6>Vm zRqcc-IN>Y-zVn1dcpaR{AG<_S)Z(h(G}Zr$k4wYriF{Vc|A_f2TKW-wCb8JJ-P*Y> zUTAXROz2Yn{e^3VN&mwUmt+u*O7Y_;5rch1HJjTWEvfV8mn5d)l%5gV7uwjBtZWyZEmkkG6^bV)?8i z43AFS_fI*-RB}tWcm5amO+t@yhZqbExA8NJXd+{`vh8DW*5bj3|AeSDXwBMMZM}A- zy42B1TyspcNSLHnlHWCQ7KgOi=2-<$5)d7eH9`;vW$26fhGv(*-+w-H;F8HFDd(I-ZM6>Au2Z*FpDUm#W<1-UX}l`B?>wNom@% zWra`gbi3#OY+AuF>ywW)Pw7Pko-t+KXS%dw#fQ62d^{G{O~x=cj=Ax^g0dp!%9uL? zF|T4oyNW+0(ykKRMkyS98E{Zs>Y?$&&hZ~a_YM?#2joA2Dm0KeK5fY<*6<#i&-UGv z;s5!$%`zUjK}EZtYB`)26AnYwS5?whUq=IFk;y3eQk=IOrqy6|;+IGw^g5ilCwu$KnZ^N5Ry0p&m@4iH)WD@4hbd159-O(Ms@- ztN(QWA~ti;{$-rpZt>r^K!gTqd2*(>@!!_A5pn+E|H5Tm``0esLG{(v*6|ttpu2RP giF)Bw6&mx@f6%>h^wnV8@G9HaYzK1w?tjqz|3Pq%!vFvP literal 0 HcmV?d00001 diff --git a/test/parallel/test-compile-cache-success.js b/test/parallel/test-compile-cache-success.js index c02a6243286972..9417b6bf63c490 100644 --- a/test/parallel/test-compile-cache-success.js +++ b/test/parallel/test-compile-cache-success.js @@ -64,3 +64,83 @@ const path = require('path'); } }); } + +// Exercise the dictionary-compressed path (added on top of #63861) for many +// small modules, which is where the embedded dictionary helps most. We write +// the cache, then read it back and assert every entry is accepted - this +// proves each dict-compressed frame decompresses to exactly the bytes that +// were persisted. +{ + tmpdir.refresh(); + const dir = tmpdir.resolve('.compile_cache_dir'); + + // Generate a handful of small modules so the dictionary path is exercised. + const count = 8; + const modules = []; + for (let i = 0; i < count; i++) { + const file = tmpdir.resolve(`mod-${i}.js`); + fs.writeFileSync( + file, + `'use strict';\n` + + `module.exports = function value${i}(a, b) {\n` + + ` const sum = a + b + ${i};\n` + + ` return { id: ${i}, sum, label: 'module-${i}' };\n` + + `};\n`); + modules.push(file); + } + const reqCode = modules.map((m) => `require(${JSON.stringify(m)});`).join(''); + + // First run writes the cache for every module. + spawnSyncAndAssert( + process.execPath, + ['-e', reqCode], + { + env: { + ...process.env, + NODE_DEBUG_NATIVE: 'COMPILE_CACHE', + NODE_COMPILE_CACHE: dir + }, + cwd: tmpdir.path + }, + { + stderr(output) { + for (const m of modules) { + const name = path.basename(m).replace(/[.]/g, '\\.'); + assert.match(output, new RegExp(`writing cache for .*${name}.*success`)); + } + return true; + } + }); + + const cacheDirs = fs.readdirSync(dir); + assert.strictEqual(cacheDirs.length, 1); + // At least one entry per module (the `-e` runner is cached too). + const entries = fs.readdirSync(path.join(dir, cacheDirs[0])); + assert(entries.length >= count, `expected >= ${count} entries, got ${entries.length}`); + + // Second run reads every cached entry back; "was accepted" only happens when + // the decompressed bytes match the freshly produced in-memory cache, so this + // is a full roundtrip check of the dictionary-compressed entries. + spawnSyncAndAssert( + process.execPath, + ['-e', reqCode], + { + env: { + ...process.env, + NODE_DEBUG_NATIVE: 'COMPILE_CACHE', + NODE_COMPILE_CACHE: dir + }, + cwd: tmpdir.path + }, + { + stderr(output) { + for (const m of modules) { + const name = path.basename(m).replace(/[.]/g, '\\.'); + assert.match( + output, + new RegExp(`cache for .*${name} was accepted, keeping the in-memory entry`)); + } + return true; + } + }); +} diff --git a/tools/generate_compile_cache_dict.py b/tools/generate_compile_cache_dict.py new file mode 100644 index 00000000000000..8794889775bb63 --- /dev/null +++ b/tools/generate_compile_cache_dict.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python +"""Generate compile_cache_zstd_dict.h from a trained zstd .dict file. + +Invoked by the GYP action in node.gyp at build time. Only the small binary +.dict (src/compile_cache_zstd.dict) is checked into the repository; the C +array it produces is generated into SHARED_INTERMEDIATE_DIR. +""" +import os +import sys + + +def main(dict_path, out_path): + with open(dict_path, 'rb') as f: + data = f.read() + + lines = [ + '// Generated by tools/generate_compile_cache_dict.py', + '// from %s' % os.path.basename(dict_path), + '// The .dict file is the source of truth; do not edit by hand.', + '', + 'static const unsigned char kCompileCacheZstdDict[] = {', + ] + for i in range(0, len(data), 12): + chunk = data[i:i + 12] + lines.append(' %s,' % ', '.join('0x%02x' % b for b in chunk)) + lines.append('};') + lines.append('static const size_t kCompileCacheZstdDictSize = %d;' % + len(data)) + + with open(out_path, 'w') as f: + f.write('\n'.join(lines) + '\n') + + +if __name__ == '__main__': + if len(sys.argv) != 3: + sys.exit('Usage: %s ' % sys.argv[0]) + main(sys.argv[1], sys.argv[2])