Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions benchmark/misc/compile-cache-timing.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
'use strict';

// Startup benchmark for the compile cache (including the zstd dictionary).
// Compares no-cache / cold-cache / warm-cache for two workloads:
// big - one large module (the typescript.js fixture)
// many - many small modules (generated here, side-effect-free)
// The modules are generated into a temp dir so the benchmark is self-contained
// and reproducible, and never executes unrelated code.

const common = require('../common.js');
const { spawnSync } = require('child_process');
const fs = require('fs');
const os = require('os');
const path = require('path');

const bench = common.createBenchmark(main, {
workload: ['big', 'many'],
cache: ['none', 'cold', 'warm'],
n: [30],
});

const BIG = path.resolve(__dirname, '../../test/fixtures/snapshot/typescript.js');

// Generate `count` small, side-effect-free modules and return the require()
// code that loads them all in one child.
function makeManyModules(dir, count) {
fs.mkdirSync(dir, { recursive: true });
const reqs = [];
for (let i = 0; i < count; i++) {
const file = path.join(dir, `mod-${i}.js`);
fs.writeFileSync(
file,
`'use strict';\n` +
`module.exports = function value${i}(a, b) {\n` +
` const sum = a + b + ${i};\n` +
` return { id: ${i}, sum, label: 'module-${i}' };\n` +
`};\n`);
reqs.push(`require(${JSON.stringify(file)});`);
}
return reqs.join('');
}

function run(cmd, args, cacheDir) {
const env = { ...process.env };
if (cacheDir) env.NODE_COMPILE_CACHE = cacheDir;
else delete env.NODE_COMPILE_CACHE;
const child = spawnSync(cmd, args, { env, stdio: 'ignore' });
if (child.error) throw child.error;
}

function main({ n, workload, cache }) {
const cmd = process.execPath || process.argv[0];
const tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'cc-bench-'));
const args = workload === 'big' ?
[BIG] :
['-e', makeManyModules(path.join(tmp, 'mods'), 120)];
const cacheDir = cache === 'none' ? null : path.join(tmp, 'cache');

try {
if (cache === 'warm') run(cmd, args, cacheDir); // populate once
bench.start();
for (let i = 0; i < n; i++) {
if (cache === 'cold' && cacheDir) {
fs.rmSync(cacheDir, { recursive: true, force: true });
}
run(cmd, args, cacheDir);
}
bench.end(n);
} finally {
fs.rmSync(tmp, { recursive: true, force: true });
}
}
17 changes: 17 additions & 0 deletions node.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -1110,6 +1110,22 @@
'<@(linked_module_files)',
],
},
{
'action_name': 'generate_compile_cache_zstd_dict',
'inputs': [
'src/compile_cache_zstd.dict',
'tools/generate_compile_cache_dict.py',
],
'outputs': [
'<(SHARED_INTERMEDIATE_DIR)/compile_cache_zstd_dict.h',
],
'action': [
'<(python)',
'tools/generate_compile_cache_dict.py',
'src/compile_cache_zstd.dict',
'<@(_outputs)',
],
},
],
}, # node_base
{
Expand All @@ -1123,6 +1139,7 @@
'src',
'deps/v8/include',
'deps/uv/include',
'<(SHARED_INTERMEDIATE_DIR)', # for compile_cache_zstd_dict.h etc.
],

'dependencies': [
Expand Down
72 changes: 68 additions & 4 deletions src/compile_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
#include "util.h"
#include "zlib.h"
#include "zstd.h"
// kCompileCacheZstdDict + kCompileCacheZstdDictSize come from the header
// generated at build time by the GYP action (from src/compile_cache_zstd.dict).
// The include directory (SHARED_INTERMEDIATE_DIR) is added by node.gyp.
#include "compile_cache_zstd_dict.h"

#ifdef NODE_IMPLEMENTS_POSIX_CREDENTIALS
#include <unistd.h> // getuid
Expand All @@ -28,6 +32,29 @@ using v8::ScriptCompiler;
using v8::String;

namespace {
// The compile-cache zstd dictionary is immutable and embedded in the binary,
// so the prepared CDict/DDict are created once and shared across all handlers
// (and all Environments/Workers) instead of per handler. They live for the
// lifetime of the process. Returns nullptr if preparation fails, in which
// case callers fall back to plain (dictionary-less) zstd.
ZSTD_CDict* GetCompileCacheCDict() {
static ZSTD_CDict* cdict =
ZSTD_createCDict(kCompileCacheZstdDict, kCompileCacheZstdDictSize, 1);
return cdict;
}

ZSTD_DDict* GetCompileCacheDDict() {
static ZSTD_DDict* ddict =
ZSTD_createDDict(kCompileCacheZstdDict, kCompileCacheZstdDictSize);
return ddict;
}

// The dictionary only helps small/medium caches; for larger inputs zstd's own
// adaptive model dominates and the dictionary never wins, so we skip the
// (otherwise wasted) second compression above this raw size. Decompression is
// unaffected: a single DDict decodes both dict-assisted and plain frames.
constexpr uint32_t kCompileCacheDictMaxRawSize = 256 * 1024;

std::string Uint32ToHex(uint32_t crc) {
std::string str;
str.reserve(8);
Expand Down Expand Up @@ -266,10 +293,20 @@ void CompileCacheHandler::ReadCacheFile(CompileCacheEntry* entry) {
Debug("failed to create zstd context\n");
return;
}
// Decompress directly into the buffer handed to V8.
// Decompress directly into the buffer handed to V8. The embedded
// dictionary is referenced via a shared, prepared DDict; plain frames
// (which carry no dictID) decompress correctly with it as well.
std::unique_ptr<uint8_t[]> raw_data(new uint8_t[raw_size]);
size_t decompressed_size = ZSTD_decompressDCtx(
zstd_dctx_, raw_data.get(), raw_size, disk_data.get(), cache_size);
ZSTD_DDict* ddict = GetCompileCacheDDict();
size_t decompressed_size;
if (ddict != nullptr) {
decompressed_size = ZSTD_decompress_usingDDict(
zstd_dctx_, raw_data.get(), raw_size, disk_data.get(), cache_size,
ddict);
} else {
decompressed_size = ZSTD_decompressDCtx(
zstd_dctx_, raw_data.get(), raw_size, disk_data.get(), cache_size);
}
if (ZSTD_isError(decompressed_size)) {
Debug("decompression failed: %s\n", ZSTD_getErrorName(decompressed_size));
return;
Expand Down Expand Up @@ -508,16 +545,43 @@ void CompileCacheHandler::Persist() {
// shutdown and should add as little overhead as possible. If the data
// is not compressible, store it uncompressed, which is indicated by
// the cache size being equal to the uncompressed size in the headers.
//
// We also try the embedded trained dictionary and keep whichever frame is
// smaller (still subject to the "only store if < raw" policy). The
// dictionary mainly helps the small/medium caches that dominate real
// compile cache usage; for inputs where plain zstd already wins we keep
// the plain frame.
char* cache_ptr = raw_ptr;
uint32_t cache_size = raw_size;
std::unique_ptr<uint8_t[]> compressed;
std::unique_ptr<uint8_t[]> compressed_dict;
if (cctx != nullptr || (cctx = ZSTD_createCCtx()) != nullptr) {
size_t compressed_bound = ZSTD_compressBound(raw_size);
compressed.reset(new uint8_t[compressed_bound]);
size_t compressed_size = ZSTD_compressCCtx(
cctx, compressed.get(), compressed_bound, raw_ptr, raw_size, 1);
char* best_ptr = reinterpret_cast<char*>(compressed.get());
// Only attempt the dictionary for small/medium entries (see
// kCompileCacheDictMaxRawSize); for large blobs it never wins and the
// extra compression would be wasted work.
ZSTD_CDict* cdict = raw_size <= kCompileCacheDictMaxRawSize
? GetCompileCacheCDict()
: nullptr;
if (cdict != nullptr) {
// Compress into a separate buffer so the selected frame's bytes and
// size always stay in sync (the plain buffer is left untouched).
compressed_dict.reset(new uint8_t[compressed_bound]);
size_t dict_size = ZSTD_compress_usingCDict(
cctx, compressed_dict.get(), compressed_bound, raw_ptr, raw_size,
cdict);
if (!ZSTD_isError(dict_size) &&
(ZSTD_isError(compressed_size) || dict_size < compressed_size)) {
compressed_size = dict_size;
best_ptr = reinterpret_cast<char*>(compressed_dict.get());
}
}
if (!ZSTD_isError(compressed_size) && compressed_size < raw_size) {
cache_ptr = reinterpret_cast<char*>(compressed.get());
cache_ptr = best_ptr;
cache_size = static_cast<uint32_t>(compressed_size);
}
}
Expand Down
Binary file added src/compile_cache_zstd.dict
Binary file not shown.
80 changes: 80 additions & 0 deletions test/parallel/test-compile-cache-success.js
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,83 @@ const path = require('path');
}
});
}

// Exercise the dictionary-compressed path (added on top of #63861) for many
// small modules, which is where the embedded dictionary helps most. We write
// the cache, then read it back and assert every entry is accepted - this
// proves each dict-compressed frame decompresses to exactly the bytes that
// were persisted.
{
tmpdir.refresh();
const dir = tmpdir.resolve('.compile_cache_dir');

// Generate a handful of small modules so the dictionary path is exercised.
const count = 8;
const modules = [];
for (let i = 0; i < count; i++) {
const file = tmpdir.resolve(`mod-${i}.js`);
fs.writeFileSync(
file,
`'use strict';\n` +
`module.exports = function value${i}(a, b) {\n` +
` const sum = a + b + ${i};\n` +
` return { id: ${i}, sum, label: 'module-${i}' };\n` +
`};\n`);
modules.push(file);
}
const reqCode = modules.map((m) => `require(${JSON.stringify(m)});`).join('');

// First run writes the cache for every module.
spawnSyncAndAssert(
process.execPath,
['-e', reqCode],
{
env: {
...process.env,
NODE_DEBUG_NATIVE: 'COMPILE_CACHE',
NODE_COMPILE_CACHE: dir
},
cwd: tmpdir.path
},
{
stderr(output) {
for (const m of modules) {
const name = path.basename(m).replace(/[.]/g, '\\.');
assert.match(output, new RegExp(`writing cache for .*${name}.*success`));
}
return true;
}
});

const cacheDirs = fs.readdirSync(dir);
assert.strictEqual(cacheDirs.length, 1);
// At least one entry per module (the `-e` runner is cached too).
const entries = fs.readdirSync(path.join(dir, cacheDirs[0]));
assert(entries.length >= count, `expected >= ${count} entries, got ${entries.length}`);

// Second run reads every cached entry back; "was accepted" only happens when
// the decompressed bytes match the freshly produced in-memory cache, so this
// is a full roundtrip check of the dictionary-compressed entries.
spawnSyncAndAssert(
process.execPath,
['-e', reqCode],
{
env: {
...process.env,
NODE_DEBUG_NATIVE: 'COMPILE_CACHE',
NODE_COMPILE_CACHE: dir
},
cwd: tmpdir.path
},
{
stderr(output) {
for (const m of modules) {
const name = path.basename(m).replace(/[.]/g, '\\.');
assert.match(
output,
new RegExp(`cache for .*${name} was accepted, keeping the in-memory entry`));
}
return true;
}
});
}
37 changes: 37 additions & 0 deletions tools/generate_compile_cache_dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env python
"""Generate compile_cache_zstd_dict.h from a trained zstd .dict file.

Invoked by the GYP action in node.gyp at build time. Only the small binary
.dict (src/compile_cache_zstd.dict) is checked into the repository; the C
array it produces is generated into SHARED_INTERMEDIATE_DIR.
"""
import os
import sys


def main(dict_path, out_path):
with open(dict_path, 'rb') as f:
data = f.read()

lines = [
'// Generated by tools/generate_compile_cache_dict.py',
'// from %s' % os.path.basename(dict_path),
'// The .dict file is the source of truth; do not edit by hand.',
'',
'static const unsigned char kCompileCacheZstdDict[] = {',
]
for i in range(0, len(data), 12):
chunk = data[i:i + 12]
lines.append(' %s,' % ', '.join('0x%02x' % b for b in chunk))
lines.append('};')
lines.append('static const size_t kCompileCacheZstdDictSize = %d;' %
len(data))

with open(out_path, 'w') as f:
f.write('\n'.join(lines) + '\n')


if __name__ == '__main__':
if len(sys.argv) != 3:
sys.exit('Usage: %s <input.dict> <output.h>' % sys.argv[0])
main(sys.argv[1], sys.argv[2])