diff --git a/benchmark/misc/compile-cache-timing.js b/benchmark/misc/compile-cache-timing.js
new file mode 100644
index 00000000000000..bdeae8c5cd2e93
--- /dev/null
+++ b/benchmark/misc/compile-cache-timing.js
@@ -0,0 +1,72 @@
+'use strict';
+
+// Startup benchmark for the compile cache (including the zstd dictionary).
+// Compares no-cache / cold-cache / warm-cache for two workloads:
+//   big  - one large module (the typescript.js fixture)
+//   many - many small modules (generated here, side-effect-free)
+// The modules are generated into a temp dir so the benchmark is self-contained
+// and reproducible, and never executes unrelated code.
+
+const common = require('../common.js');
+const { spawnSync } = require('child_process');
+const fs = require('fs');
+const os = require('os');
+const path = require('path');
+
+const bench = common.createBenchmark(main, {
+  workload: ['big', 'many'],
+  cache: ['none', 'cold', 'warm'],
+  n: [30],
+});
+
+const BIG = path.resolve(__dirname, '../../test/fixtures/snapshot/typescript.js');
+
+// Generate `count` small, side-effect-free modules and return the require()
+// code that loads them all in one child.
+function makeManyModules(dir, count) {
+  fs.mkdirSync(dir, { recursive: true });
+  const reqs = [];
+  for (let i = 0; i < count; i++) {
+    const file = path.join(dir, `mod-${i}.js`);
+    fs.writeFileSync(
+      file,
+      `'use strict';\n` +
+      `module.exports = function value${i}(a, b) {\n` +
+      `  const sum = a + b + ${i};\n` +
+      `  return { id: ${i}, sum, label: 'module-${i}' };\n` +
+      `};\n`);
+    reqs.push(`require(${JSON.stringify(file)});`);
+  }
+  return reqs.join('');
+}
+
+function run(cmd, args, cacheDir) {
+  const env = { ...process.env };
+  if (cacheDir) env.NODE_COMPILE_CACHE = cacheDir;
+  else delete env.NODE_COMPILE_CACHE;
+  const child = spawnSync(cmd, args, { env, stdio: 'ignore' });
+  if (child.error) throw child.error;
+}
+
+function main({ n, workload, cache }) {
+  const cmd = process.execPath || process.argv[0];
+  const tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'cc-bench-'));
+  const args = workload === 'big' ?
+    [BIG] :
+    ['-e', makeManyModules(path.join(tmp, 'mods'), 120)];
+  const cacheDir = cache === 'none' ? null : path.join(tmp, 'cache');
+
+  try {
+    if (cache === 'warm') run(cmd, args, cacheDir);  // populate once
+    bench.start();
+    for (let i = 0; i < n; i++) {
+      if (cache === 'cold' && cacheDir) {
+        fs.rmSync(cacheDir, { recursive: true, force: true });
+      }
+      run(cmd, args, cacheDir);
+    }
+    bench.end(n);
+  } finally {
+    fs.rmSync(tmp, { recursive: true, force: true });
+  }
+}
diff --git a/node.gyp b/node.gyp
index d2dbce19992b10..ab54cc27437e72 100644
--- a/node.gyp
+++ b/node.gyp
@@ -1110,6 +1110,22 @@
             '<@(linked_module_files)',
           ],
         },
+        {
+          'action_name': 'generate_compile_cache_zstd_dict',
+          'inputs': [
+            'src/compile_cache_zstd.dict',
+            'tools/generate_compile_cache_dict.py',
+          ],
+          'outputs': [
+            '<(SHARED_INTERMEDIATE_DIR)/compile_cache_zstd_dict.h',
+          ],
+          'action': [
+            '<(python)',
+            'tools/generate_compile_cache_dict.py',
+            'src/compile_cache_zstd.dict',
+            '<@(_outputs)',
+          ],
+        },
       ],
     }, # node_base
     {
@@ -1123,6 +1139,7 @@
         'src',
         'deps/v8/include',
         'deps/uv/include',
+        '<(SHARED_INTERMEDIATE_DIR)',  # for compile_cache_zstd_dict.h etc.
       ],
 
       'dependencies': [
diff --git a/src/compile_cache.cc b/src/compile_cache.cc
index 9e11793aa3388f..35c4b02cc70d9a 100644
--- a/src/compile_cache.cc
+++ b/src/compile_cache.cc
@@ -11,6 +11,10 @@
 #include "util.h"
 #include "zlib.h"
 #include "zstd.h"
+// kCompileCacheZstdDict + kCompileCacheZstdDictSize come from the header
+// generated at build time by the GYP action (from src/compile_cache_zstd.dict).
+// The include directory (SHARED_INTERMEDIATE_DIR) is added by node.gyp.
+#include "compile_cache_zstd_dict.h"
 
 #ifdef NODE_IMPLEMENTS_POSIX_CREDENTIALS
 #include <unistd.h>  // getuid
@@ -28,6 +32,29 @@ using v8::ScriptCompiler;
 using v8::String;
 
 namespace {
+// The compile-cache zstd dictionary is immutable and embedded in the binary,
+// so the prepared CDict/DDict are created once and shared across all handlers
+// (and all Environments/Workers) instead of per handler. They live for the
+// lifetime of the process. Returns nullptr if preparation fails, in which
+// case callers fall back to plain (dictionary-less) zstd.
+ZSTD_CDict* GetCompileCacheCDict() {
+  static ZSTD_CDict* cdict =
+      ZSTD_createCDict(kCompileCacheZstdDict, kCompileCacheZstdDictSize, 1);
+  return cdict;
+}
+
+ZSTD_DDict* GetCompileCacheDDict() {
+  static ZSTD_DDict* ddict =
+      ZSTD_createDDict(kCompileCacheZstdDict, kCompileCacheZstdDictSize);
+  return ddict;
+}
+
+// The dictionary only helps small/medium caches; for larger inputs zstd's own
+// adaptive model dominates and the dictionary never wins, so we skip the
+// (otherwise wasted) second compression above this raw size. Decompression is
+// unaffected: a single DDict decodes both dict-assisted and plain frames.
+constexpr uint32_t kCompileCacheDictMaxRawSize = 256 * 1024;
+
 std::string Uint32ToHex(uint32_t crc) {
   std::string str;
   str.reserve(8);
@@ -266,10 +293,20 @@ void CompileCacheHandler::ReadCacheFile(CompileCacheEntry* entry) {
       Debug("failed to create zstd context\n");
       return;
     }
-    // Decompress directly into the buffer handed to V8.
+    // Decompress directly into the buffer handed to V8. The embedded
+    // dictionary is referenced via a shared, prepared DDict; plain frames
+    // (which carry no dictID) decompress correctly with it as well.
     std::unique_ptr<uint8_t[]> raw_data(new uint8_t[raw_size]);
-    size_t decompressed_size = ZSTD_decompressDCtx(
-        zstd_dctx_, raw_data.get(), raw_size, disk_data.get(), cache_size);
+    ZSTD_DDict* ddict = GetCompileCacheDDict();
+    size_t decompressed_size;
+    if (ddict != nullptr) {
+      decompressed_size = ZSTD_decompress_usingDDict(
+          zstd_dctx_, raw_data.get(), raw_size, disk_data.get(), cache_size,
+          ddict);
+    } else {
+      decompressed_size = ZSTD_decompressDCtx(
+          zstd_dctx_, raw_data.get(), raw_size, disk_data.get(), cache_size);
+    }
     if (ZSTD_isError(decompressed_size)) {
       Debug("decompression failed: %s\n", ZSTD_getErrorName(decompressed_size));
       return;
@@ -508,16 +545,43 @@ void CompileCacheHandler::Persist() {
     // shutdown and should add as little overhead as possible. If the data
     // is not compressible, store it uncompressed, which is indicated by
     // the cache size being equal to the uncompressed size in the headers.
+    //
+    // We also try the embedded trained dictionary and keep whichever frame is
+    // smaller (still subject to the "only store if < raw" policy). The
+    // dictionary mainly helps the small/medium caches that dominate real
+    // compile cache usage; for inputs where plain zstd already wins we keep
+    // the plain frame.
     char* cache_ptr = raw_ptr;
     uint32_t cache_size = raw_size;
     std::unique_ptr<uint8_t[]> compressed;
+    std::unique_ptr<uint8_t[]> compressed_dict;
     if (cctx != nullptr || (cctx = ZSTD_createCCtx()) != nullptr) {
       size_t compressed_bound = ZSTD_compressBound(raw_size);
       compressed.reset(new uint8_t[compressed_bound]);
       size_t compressed_size = ZSTD_compressCCtx(
           cctx, compressed.get(), compressed_bound, raw_ptr, raw_size, 1);
+      char* best_ptr = reinterpret_cast<char*>(compressed.get());
+      // Only attempt the dictionary for small/medium entries (see
+      // kCompileCacheDictMaxRawSize); for large blobs it never wins and the
+      // extra compression would be wasted work.
+      ZSTD_CDict* cdict = raw_size <= kCompileCacheDictMaxRawSize
+                              ? GetCompileCacheCDict()
+                              : nullptr;
+      if (cdict != nullptr) {
+        // Compress into a separate buffer so the selected frame's bytes and
+        // size always stay in sync (the plain buffer is left untouched).
+        compressed_dict.reset(new uint8_t[compressed_bound]);
+        size_t dict_size = ZSTD_compress_usingCDict(
+            cctx, compressed_dict.get(), compressed_bound, raw_ptr, raw_size,
+            cdict);
+        if (!ZSTD_isError(dict_size) &&
+            (ZSTD_isError(compressed_size) || dict_size < compressed_size)) {
+          compressed_size = dict_size;
+          best_ptr = reinterpret_cast<char*>(compressed_dict.get());
+        }
+      }
       if (!ZSTD_isError(compressed_size) && compressed_size < raw_size) {
-        cache_ptr = reinterpret_cast<char*>(compressed.get());
+        cache_ptr = best_ptr;
         cache_size = static_cast<uint32_t>(compressed_size);
       }
     }
diff --git a/src/compile_cache_zstd.dict b/src/compile_cache_zstd.dict
new file mode 100644
index 00000000000000..b64455d45b1d82
Binary files /dev/null and b/src/compile_cache_zstd.dict differ
diff --git a/test/parallel/test-compile-cache-success.js b/test/parallel/test-compile-cache-success.js
index c02a6243286972..9417b6bf63c490 100644
--- a/test/parallel/test-compile-cache-success.js
+++ b/test/parallel/test-compile-cache-success.js
@@ -64,3 +64,83 @@ const path = require('path');
       }
     });
 }
+
+// Exercise the dictionary-compressed path (added on top of #63861) for many
+// small modules, which is where the embedded dictionary helps most. We write
+// the cache, then read it back and assert every entry is accepted - this
+// proves each dict-compressed frame decompresses to exactly the bytes that
+// were persisted.
+{
+  tmpdir.refresh();
+  const dir = tmpdir.resolve('.compile_cache_dir');
+
+  // Generate a handful of small modules so the dictionary path is exercised.
+  const count = 8;
+  const modules = [];
+  for (let i = 0; i < count; i++) {
+    const file = tmpdir.resolve(`mod-${i}.js`);
+    fs.writeFileSync(
+      file,
+      `'use strict';\n` +
+      `module.exports = function value${i}(a, b) {\n` +
+      `  const sum = a + b + ${i};\n` +
+      `  return { id: ${i}, sum, label: 'module-${i}' };\n` +
+      `};\n`);
+    modules.push(file);
+  }
+  const reqCode = modules.map((m) => `require(${JSON.stringify(m)});`).join('');
+
+  // First run writes the cache for every module.
+  spawnSyncAndAssert(
+    process.execPath,
+    ['-e', reqCode],
+    {
+      env: {
+        ...process.env,
+        NODE_DEBUG_NATIVE: 'COMPILE_CACHE',
+        NODE_COMPILE_CACHE: dir
+      },
+      cwd: tmpdir.path
+    },
+    {
+      stderr(output) {
+        for (const m of modules) {
+          const name = path.basename(m).replace(/[.]/g, '\\.');
+          assert.match(output, new RegExp(`writing cache for .*${name}.*success`));
+        }
+        return true;
+      }
+    });
+
+  const cacheDirs = fs.readdirSync(dir);
+  assert.strictEqual(cacheDirs.length, 1);
+  // At least one entry per module (the `-e` runner is cached too).
+  const entries = fs.readdirSync(path.join(dir, cacheDirs[0]));
+  assert(entries.length >= count, `expected >= ${count} entries, got ${entries.length}`);
+
+  // Second run reads every cached entry back; "was accepted" only happens when
+  // the decompressed bytes match the freshly produced in-memory cache, so this
+  // is a full roundtrip check of the dictionary-compressed entries.
+  spawnSyncAndAssert(
+    process.execPath,
+    ['-e', reqCode],
+    {
+      env: {
+        ...process.env,
+        NODE_DEBUG_NATIVE: 'COMPILE_CACHE',
+        NODE_COMPILE_CACHE: dir
+      },
+      cwd: tmpdir.path
+    },
+    {
+      stderr(output) {
+        for (const m of modules) {
+          const name = path.basename(m).replace(/[.]/g, '\\.');
+          assert.match(
+            output,
+            new RegExp(`cache for .*${name} was accepted, keeping the in-memory entry`));
+        }
+        return true;
+      }
+    });
+}
diff --git a/tools/generate_compile_cache_dict.py b/tools/generate_compile_cache_dict.py
new file mode 100644
index 00000000000000..8794889775bb63
--- /dev/null
+++ b/tools/generate_compile_cache_dict.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+"""Generate compile_cache_zstd_dict.h from a trained zstd .dict file.
+
+Invoked by the GYP action in node.gyp at build time. Only the small binary
+.dict (src/compile_cache_zstd.dict) is checked into the repository; the C
+array it produces is generated into SHARED_INTERMEDIATE_DIR.
+"""
+import os
+import sys
+
+
+def main(dict_path, out_path):
+    with open(dict_path, 'rb') as f:
+        data = f.read()
+
+    lines = [
+        '// Generated by tools/generate_compile_cache_dict.py',
+        '// from %s' % os.path.basename(dict_path),
+        '// The .dict file is the source of truth; do not edit by hand.',
+        '',
+        'static const unsigned char kCompileCacheZstdDict[] = {',
+    ]
+    for i in range(0, len(data), 12):
+        chunk = data[i:i + 12]
+        lines.append('  %s,' % ', '.join('0x%02x' % b for b in chunk))
+    lines.append('};')
+    lines.append('static const size_t kCompileCacheZstdDictSize = %d;' %
+                 len(data))
+
+    with open(out_path, 'w') as f:
+        f.write('\n'.join(lines) + '\n')
+
+
+if __name__ == '__main__':
+    if len(sys.argv) != 3:
+        sys.exit('Usage: %s <input.dict> <output.h>' % sys.argv[0])
+    main(sys.argv[1], sys.argv[2])