diff --git a/ChangeLog.md b/ChangeLog.md index 0793baa832527..8e23aba024c47 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -20,6 +20,10 @@ See docs/process.md for more on how version tagging works. 6.0.1 (in development) ---------------------- +- New experimental `-sCROSS_ORIGIN_STORAGE` linker flag integrating the + proposed [Cross-Origin Storage browser API](https://github.com/WICG/cross-origin-storage) + as a progressive enhancement for Wasm loading on the web target. See + `docs/compiling/CrossOriginStorage.rst` for details. (#27066) - The installed versions of the compiler-rt library now follow the upstream naming convetion of `libclang_rt..a`. (#27089) - Dynamic linking now explicitly requires asynchronous Wasm compilation. The diff --git a/site/source/docs/compiling/CrossOriginStorage.rst b/site/source/docs/compiling/CrossOriginStorage.rst new file mode 100644 index 0000000000000..7e204a31b83f7 --- /dev/null +++ b/site/source/docs/compiling/CrossOriginStorage.rst @@ -0,0 +1,384 @@ +.. _CrossOriginStorage: + +========================== +Cross-Origin Storage (COS) +========================== + +.. note:: + This feature is **experimental**. The underlying `Cross-Origin Storage + browser API `_ is a WICG + proposal that has not yet shipped in any browser. Emscripten's support is + provided as a progressive enhancement — the runtime falls back to the + standard ``fetch()`` path automatically when the browser does not expose + the API. + +Overview +======== + +The **Cross-Origin Storage (COS)** API is a proposed browser standard that +allows web applications on different origins to share large cached files, +identified by their cryptographic hashes. A file stored in COS by one site +can be retrieved by any other site using the same hash, eliminating redundant +downloads. + +Emscripten's :ref:`CROSS_ORIGIN_STORAGE` flag integrates this into the +standard Wasm loading path. At build time, Emscripten computes the SHA-256 +hash of the final ``.wasm`` binary. At runtime, the generated JavaScript +tries to retrieve the compiled Wasm module from COS before falling back to +a normal network fetch. If the module is not yet in COS it is stored there +after download, making it available to other origins immediately. + +When to use this flag +--------------------- + +COS only delivers a benefit when the ``.wasm`` binary is **byte-identical +across many different origins** — that is, a popular library whose compiled +binary is loaded by many independent sites. If every visitor to every site +downloads the exact same bytes, COS means they only download it once, ever. + +Good candidates are libraries or toolkits that are: + +- popular enough that many independent sites load the same binary, +- distributed as a stable, version-pinned ``.wasm`` file, and +- a **single primary** ``.wasm`` file (COS only covers the binary that + Emscripten compiles; any additional Wasm files loaded at runtime are not + covered). + +**Do not** enable this flag for application-specific Wasm code built for +your own site. That binary is unique to you; no other origin will ever have +the same hash, so it will never get a COS cache hit. The normal HTTP cache +already handles per-origin caching efficiently. + +The exception is a Wasm binary that you deploy across **multiple origins you +own** — for example, the same library shared between ``https://app.example.com`` +and ``https://api.example.com``. In that case COS can eliminate the redundant +download between your own origins. Use :ref:`CROSS_ORIGIN_STORAGE_ORIGINS` to +restrict access to only those origins rather than opening the cache entry to +the world. + +Usage +===== + +Pass :ref:`CROSS_ORIGIN_STORAGE` at link time:: + + emcc hello.cpp -o hello.js -sCROSS_ORIGIN_STORAGE + +Controlling which origins can read the cached file +-------------------------------------------------- + +The :ref:`CROSS_ORIGIN_STORAGE_ORIGINS` setting controls the ``origins`` field +passed to ``requestFileHandles()`` on the write (cache-miss) path. It has no +effect on the read (cache-hit) path. Three modes are available: + +**Globally available** (default, no explicit setting needed) — any origin +can retrieve the file. This is applied automatically when +:ref:`CROSS_ORIGIN_STORAGE` is used without specifying +:ref:`CROSS_ORIGIN_STORAGE_ORIGINS`: + +.. code-block:: bash + + emcc hello.cpp -o hello.js -sCROSS_ORIGIN_STORAGE + +Use this for popular binaries loaded by many independent origins. This is +the recommended mode for resources where global COS cache hits are expected. + +**Restricted to a specific set of origins** — only the listed origins can +retrieve the file: + +.. code-block:: bash + + emcc hello.cpp -o hello.js \ + -sCROSS_ORIGIN_STORAGE \ + -sCROSS_ORIGIN_STORAGE_ORIGINS=https://app.example.com,https://api.example.com + +Use this for proprietary resources shared across a controlled set of related +sites. Each entry must be a valid serialized HTTPS origin (scheme + host + +optional port, no path). Mixing ``'*'`` with explicit origins is a +**link-time error**. + +**Same-site only** — pass an explicit empty list to omit the ``origins`` +field, making the file available only to same-site origins: + +.. code-block:: bash + + emcc hello.cpp -o hello.js \ + -sCROSS_ORIGIN_STORAGE \ + -sCROSS_ORIGIN_STORAGE_ORIGINS=[] + +Use this for resources that should be shared across subdomains of a single +site but not beyond. + +.. note:: + The COS spec defines a **visibility upgrade** rule: a resource's + availability can be widened but never narrowed. If a resource is already + stored as globally available (``'*'``), any subsequent attempt to store it + with a more restrictive ``origins`` list is ignored by the browser. + + This rule also has a security implication: because storing always requires + writing the actual bytes of the resource, no third party can probe the + cache to determine whether a restricted-origin entry was previously stored + by another origin. A cache hit is only possible after an explicit write + that provided the content, so COS cannot be used as a timing oracle to + detect the presence of a resource that the probing origin cannot access. + +Requirements and restrictions +------------------------------ + +- The flag emits a **warning** when the target environment does not include + the web (``-sENVIRONMENT=node``, ``-sENVIRONMENT=shell``): + ``navigator.crossOriginStorage`` is a browser API and is never available + in those environments. +- It produces a **hard link-time error** in **SINGLE_FILE** mode + (``-sSINGLE_FILE``): the Wasm binary is embedded directly into the JS + output and has no standalone ``.wasm`` file or fetchable URL to key the + hash on. +- It produces a **hard link-time error** with ``-sWASM_ASYNC_COMPILATION=0``: + the synchronous instantiation path bypasses ``instantiateAsync()`` entirely, + so the COS code can never be reached. +- It covers **only the primary ``.wasm`` file**. Secondary files produced by + ``-sSPLIT_MODULE`` (``.deferred.wasm``) and side modules loaded at runtime + via ``dlopen`` in ``-sMAIN_MODULE`` builds are fetched through the normal + network path and are not stored in or retrieved from COS. A warning is + emitted for both of these combinations. +- The COS API is a progressive enhancement. Browsers without the API + continue to load the Wasm module via the normal ``fetch()`` and + ``WebAssembly.instantiateStreaming()`` path without any error. + +How it works +============ + +Build time +---------- + +After all optimizations — including any ``wasm-opt`` passes run by Binaryen +— Emscripten reads the final ``.wasm`` binary and hashes it. The hash +object is embedded in the generated JavaScript glue as a build-time +constant (currently SHA-256):: + + Module['wasmHash'] = { algorithm: 'SHA-256', value: 'a3f2...c9d1' }; + +No extra files are produced; the hash is part of the regular ``.js`` output. + +.. warning:: + The hash is computed over the ``.wasm`` binary **as emcc produces it**, + after emcc's own internal Binaryen/``wasm-opt`` pass. If your build + pipeline runs additional wasm post-processing tools *after* emcc exits — + for example, an external ``wasm-strip`` or ``wasm-opt`` invocation in a + Makefile or CI script — those tools change the binary and **invalidate the + embedded hash**. + + In that case you must recompute the hash of the final ``.wasm`` and + patch the value string in the generated ``.js`` yourself before shipping. + A minimal shell snippet for doing so (SHA-256): + + .. code-block:: bash + + # After all post-processing is complete: + final_hash=$(sha256sum hello.wasm | awk '{print $1}') + sed -i "s/'[0-9a-f]\{64\}'/'${final_hash}'/g" hello.js + + On macOS, use ``shasum -a 256`` in place of ``sha256sum``, and install + GNU sed (``brew install gnu-sed``) or adapt the ``sed`` command for BSD + sed syntax. + +Runtime (web only) +------------------ + +When the page loads, the generated JavaScript follows this logic: + +1. **Feature detection** — check ``'crossOriginStorage' in navigator``. + If the API is absent, skip to the normal fetch path immediately. + +2. **Cache hit** — call + ``navigator.crossOriginStorage.requestFileHandles([cosHash])``. + If the handle is returned (the module is already in COS), read it with + ``handle.getFile()`` → ``.arrayBuffer()`` and pass the bytes to + ``WebAssembly.instantiate()``. + Then invoke ``Module['onCOSCacheHit'](hash)`` if defined. + +3. **Cache miss** — if a ``NotFoundError`` is thrown, fetch the ``.wasm`` + over the network as usual, invoke ``Module['onCOSCacheMiss'](hash, url)`` if + defined, call ``WebAssembly.instantiate()`` immediately so the page loads + without delay, and then write the bytes into COS in the background + (fire-and-forget) using the ``origins`` value controlled by + :ref:`CROSS_ORIGIN_STORAGE_ORIGINS` (``'*'`` by default). + Once the write completes, invoke ``Module['onCOSStore'](hash)`` if defined. + +4. **Fallback** — any unexpected error (``NotAllowedError`` from the browser, + network failure during the miss path, etc.) is logged with ``err()`` and + the runtime falls through to the standard streaming-instantiation path + below. The page always loads. + +Instrumentation callbacks +------------------------- + +Three optional ``Module`` properties let you observe COS events at runtime. +They are **opt-in**: to include the callback code in the output, list them in +``INCOMING_MODULE_JS_API`` at link time:: + + emcc hello.cpp -o hello.js -sCROSS_ORIGIN_STORAGE \ + -sINCOMING_MODULE_JS_API=onCOSCacheHit,onCOSCacheMiss,onCOSStore + +.. code-block:: javascript + + var Module = { + // Called when the Wasm binary was served from the cross-origin cache. + onCOSCacheHit: (hash) => { + console.log('Cache hit, SHA-256:', hash); + }, + + // Called when the Wasm binary was not in COS and was fetched over the + // network. |hash| is the hash that missed; |url| is the fallback URL. + onCOSCacheMiss: (hash, url) => { + console.log('Cache miss, SHA-256:', hash, 'fetched from:', url); + }, + + // Called after the Wasm binary has been successfully written to COS. + onCOSStore: (hash) => { + console.log('Stored in COS, SHA-256:', hash); + }, + }; + +Testing with the extension polyfill +==================================== + +Because no browser ships the COS API natively yet, you can experiment using +the `Cross-Origin Storage extension +`_, +which injects a ``navigator.crossOriginStorage`` polyfill on every page. + +Manual testing +-------------- + +1. Install the extension in Chrome. +2. Build your project with ``-sCROSS_ORIGIN_STORAGE -sENVIRONMENT=web``. +3. Serve the output over HTTP (e.g. with ``emrun`` or ``python3 -m http.server``). +4. Open the page — on the first load the Wasm binary is fetched and stored in + COS. Open the same page in a second tab or from a different origin: the + module is loaded from COS without a network request. + +Automated browser testing +-------------------------- + +The Emscripten browser test suite includes COS tests that run against the +polyfill extension. The extension must be available as an **unpacked** +directory (containing ``manifest.json``). A helper script downloads and +unpacks it automatically:: + + python3 test/setup_cos_extension.py + +Then run the tests, passing the printed path as ``EMTEST_COS_EXTENSION_PATH``:: + + EMTEST_COS_EXTENSION_PATH=$(python3 test/setup_cos_extension.py --quiet) \ + python3 test/runner.py \ + browser.test_cross_origin_storage_fallback \ + browser.test_cross_origin_storage_miss_then_hit + +``test_cross_origin_storage_fallback`` does not require the extension and +verifies that a ``-sCROSS_ORIGIN_STORAGE`` build loads correctly on browsers +where the COS API is absent. ``test_cross_origin_storage_miss_then_hit`` +requires the extension and exercises both the cache-miss store and cache-hit +paths in sequence. + +Verifying the embedded hash +============================ + +You can confirm that the hash embedded in the ``.js`` output matches the +actual ``.wasm`` file using standard tools: + +.. code-block:: bash + + # SHA-256 of the wasm file + sha256sum hello.wasm + + # Extract the hash embedded in the JS + grep -oP "value: '\K[0-9a-f]{64}" hello.js + +Both values must be identical. The Emscripten test suite checks this +automatically via ``test_cross_origin_storage_js_output`` in +``test/test_other.py``. + +Custom ``Module['instantiateWasm']`` implementations +===================================================== + +The COS fetch logic described above lives inside ``instantiateAsync()``, which +is the standard Emscripten wasm loading path. When a program provides its own +``Module['instantiateWasm']`` callback, Emscripten calls that callback directly +and **skips** ``instantiateAsync()`` entirely, so the built-in COS code is never +reached. + +To support COS in a custom loader, Emscripten exposes the build-time SHA-256 +hash as a named Module property: + +.. code-block:: javascript + + Module['wasmHash'] // { algorithm: 'SHA-256', value: '<64 hex chars>' } + +This property is set by the generated JavaScript before +``Module['instantiateWasm']`` is called, so it is always available inside the +callback. ``Module`` in this context is the config object passed to the module +factory — whatever variable you use when calling ``new Module(config)`` or the +equivalent factory function. A custom loader can read ``Module['wasmHash']`` +via a reference to that config object: + +.. code-block:: javascript + + var Module = { + instantiateWasm(imports, onSuccess) { + // `this` inside the callback is Emscripten's internal Module object; + // read the hash via the outer Module reference instead. + const cosHash = Module['wasmHash']; + if (cosHash?.value && globalThis.navigator?.crossOriginStorage) { + navigator.crossOriginStorage.requestFileHandles([cosHash]) + .then(handles => handles[0].getFile()) + .then(f => f.arrayBuffer()) + .then(bytes => WebAssembly.instantiate(bytes, imports)) + .then(({instance, module}) => onSuccess(instance, module)) + .catch(err => { + if (err.name !== 'NotFoundError') throw err; + // cache miss — fetch normally and store in the background + fetch('hello.wasm') + .then(r => r.arrayBuffer()) + .then(bytes => { + WebAssembly.instantiate(bytes, imports) + .then(({instance, module}) => onSuccess(instance, module)); + // fire-and-forget store + navigator.crossOriginStorage + .requestFileHandles([cosHash], { create: true, origins: '*' }) + .then(wh => wh[0].createWritable()) + .then(w => w.write(new Blob([bytes], {type:'application/wasm'})) + .then(() => w.close())); + }); + }); + return; // async; onSuccess called above + } + // fallback — normal streaming instantiation + WebAssembly.instantiateStreaming(fetch('hello.wasm'), imports) + .then(({instance, module}) => onSuccess(instance, module)); + }, + }; + +``Module['wasmHash']`` is only present in builds compiled with +:ref:`CROSS_ORIGIN_STORAGE`. Always guard on its truthiness before using it, +as shown above, so the same loader code works in builds compiled without the +flag. + +Relationship to other caching mechanisms +========================================== + +COS is a complement to, not a replacement for, existing browser caches: + +- **HTTP cache / Service Worker cache** — still used for per-origin caching. + COS adds cross-origin sharing on top. +- **``NODE_CODE_CACHING``** — a Node.js-specific V8 bytecode cache; unrelated + to COS. +- **IndexedDB / OPFS** — per-origin storage; COS shares across origins. + +See also +======== + +- `WICG Cross-Origin Storage explainer `_ +- `COS browser extension (Chrome Web Store) `_ +- `COS browser extension (source code) `_ +- :ref:`settings-reference` — ``CROSS_ORIGIN_STORAGE`` entry +- :ref:`WebAssembly` — general guide to building Wasm with Emscripten diff --git a/site/source/docs/compiling/index.rst b/site/source/docs/compiling/index.rst index e8b03becc832d..6aa22ae9c5a70 100644 --- a/site/source/docs/compiling/index.rst +++ b/site/source/docs/compiling/index.rst @@ -13,6 +13,7 @@ This section contains topics about building projects and running the output. - :ref:`Deploying-Pages` covers topics related to hosting Emscripten compiled web pages on a CDN. - :ref:`GitLab` explains how to build and test projects on GitLab. - :ref:`Contrib-Ports` contains information about contrib ports. +- :ref:`CrossOriginStorage` explains how to enable the experimental Cross-Origin Storage integration for sharing Wasm modules across origins. .. toctree:: @@ -26,3 +27,4 @@ This section contains topics about building projects and running the output. Deploying-Pages GitLab Contrib-Ports + CrossOriginStorage diff --git a/site/source/docs/tools_reference/settings_reference.rst b/site/source/docs/tools_reference/settings_reference.rst index 82323a63792ce..3a0d6c1b990e9 100644 --- a/site/source/docs/tools_reference/settings_reference.rst +++ b/site/source/docs/tools_reference/settings_reference.rst @@ -3362,6 +3362,49 @@ indirectly using `importScripts` Default value: false +.. _cross_origin_storage: + +CROSS_ORIGIN_STORAGE +==================== + +Enables Cross-Origin Storage (COS) API support for Wasm +loading on the Web target. At link time Emscripten computes the SHA-256 +hash of the final ``.wasm`` binary and embeds it in the generated JS. +At runtime the COS API is used as a progressive enhancement: the binary is +fetched from the shared cross-origin cache on a hit, or stored there after +a network fetch on a miss; when the API is absent or errors the runtime +falls through to the standard fetch path. + +Requires the Web environment; using it without ``-sENVIRONMENT=web`` is a +hard link-time error. Incompatible with SINGLE_FILE and +WASM_ASYNC_COMPILATION=0 (both produce hard link-time errors). + +See :ref:`CrossOriginStorage` for the full guide. + +.. note:: This is an experimental setting + +Default value: false + +.. _cross_origin_storage_origins: + +CROSS_ORIGIN_STORAGE_ORIGINS +============================ + +Controls which origins may read the Wasm binary from the COS cache. Only +meaningful when ``-sCROSS_ORIGIN_STORAGE`` is set. Applied only during the +write (cache-miss) path, not the read (cache-hit) path. + +``['*']`` (default) — any origin can retrieve the file. +Explicit HTTPS origin list — restricted to those origins only:: + + -sCROSS_ORIGIN_STORAGE_ORIGINS=https://app.example.com,https://api.example.com + +``[]`` — same-site only (omits the ``origins`` field entirely). + +Mixing ``'*'`` with explicit origins is a link-time error. + +Default value: ['*'] + .. _fake_dylibs: FAKE_DYLIBS diff --git a/src/preamble.js b/src/preamble.js index 97f6366910344..1719bf4496c5a 100644 --- a/src/preamble.js +++ b/src/preamble.js @@ -626,6 +626,68 @@ async function instantiateArrayBuffer(binaryFile, imports) { async function instantiateAsync(binary, binaryFile, imports) { #if !SINGLE_FILE +#if CROSS_ORIGIN_STORAGE + // Cross-Origin Storage (COS) progressive enhancement. + // https://github.com/WICG/cross-origin-storage + // Any error (NotAllowedError, network failure, …) falls through to the + // standard Emscripten streaming path so the page always loads. + if (globalThis.navigator?.crossOriginStorage) { + var cosHash = Module['wasmHash']; + try { + var cosHandles = await navigator.crossOriginStorage.requestFileHandles([cosHash]); + // Cache hit — read the Blob and instantiate from its ArrayBuffer. + var cosFile = await cosHandles[0].getFile(); + var cosBytes = await cosFile.arrayBuffer(); +#if expectToReceiveOnModule('onCOSCacheHit') + Module['onCOSCacheHit']?.(cosHash.value); +#endif + return WebAssembly.instantiate(cosBytes, imports); + } catch (cosErr) { + if (cosErr.name === 'NotFoundError') { + // Cache miss — fetch normally, then store in COS for future consumers. + try { + var networkResponse = await fetch(binaryFile, {{{ makeModuleReceiveExpr('fetchSettings', "{ credentials: 'same-origin' }") }}}); + var wasmBytes = await networkResponse.arrayBuffer(); +#if expectToReceiveOnModule('onCOSCacheMiss') + Module['onCOSCacheMiss']?.(cosHash.value, binaryFile); +#endif + // Fire-and-forget store; never block instantiation on the write. + (async () => { + try { + var writeHandles = await navigator.crossOriginStorage.requestFileHandles( + [cosHash], +#if CROSS_ORIGIN_STORAGE_ORIGINS.length === 1 && CROSS_ORIGIN_STORAGE_ORIGINS[0] === '*' + { create: true, origins: '*' }, +#elif CROSS_ORIGIN_STORAGE_ORIGINS.length + { create: true, origins: {{{ JSON.stringify(CROSS_ORIGIN_STORAGE_ORIGINS) }}} }, +#else + { create: true }, +#endif + ); + var writable = await writeHandles[0].createWritable(); + await writable.write(new Blob([wasmBytes], { type: 'application/wasm' })); + await writable.close(); +#if expectToReceiveOnModule('onCOSStore') + Module['onCOSStore']?.(cosHash.value); +#endif + } catch (storeErr) { + err(`COS store failed: ${storeErr}`); + } + })(); + return WebAssembly.instantiate(wasmBytes, imports); + } catch (fetchErr) { + // Network fetch failed; fall through to the standard path below. + err(`COS fallback fetch failed: ${fetchErr}`); + } + } else if (cosErr.name === 'NotAllowedError') { + err(`COS: permission denied.`); + } else { + err(`Cross-Origin Storage lookup failed: ${cosErr}`); + } + // Fall through to the standard streaming path below. + } + } +#endif // CROSS_ORIGIN_STORAGE if (!binary #if MIN_SAFARI_VERSION < 150000 // See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/WebAssembly/instantiateStreaming @@ -825,6 +887,12 @@ function getWasmImports() { var info = getWasmImports(); +#if CROSS_ORIGIN_STORAGE + // Expose the build-time hash so that custom Module['instantiateWasm'] + // callbacks can implement their own COS-aware loading path. + Module['wasmHash'] = { algorithm: 'SHA-256', value: '<<< WASM_HASH_VALUE >>>' }; +#endif + #if expectToReceiveOnModule('instantiateWasm') // User shell pages can write their own Module.instantiateWasm = function(imports, successCallback) callback // to manually instantiate the Wasm module themselves. This allows pages to diff --git a/src/settings.js b/src/settings.js index f953afaa3e52c..109ab40948cef 100644 --- a/src/settings.js +++ b/src/settings.js @@ -2206,6 +2206,39 @@ var GROWABLE_ARRAYBUFFERS = false; // indirectly using `importScripts` var CROSS_ORIGIN = false; +// Enables Cross-Origin Storage (COS) API support for Wasm +// loading on the Web target. At link time Emscripten computes the SHA-256 +// hash of the final ``.wasm`` binary and embeds it in the generated JS. +// At runtime the COS API is used as a progressive enhancement: the binary is +// fetched from the shared cross-origin cache on a hit, or stored there after +// a network fetch on a miss; when the API is absent or errors the runtime +// falls through to the standard fetch path. +// +// Requires the Web environment; using it without ``-sENVIRONMENT=web`` is a +// hard link-time error. Incompatible with SINGLE_FILE and +// WASM_ASYNC_COMPILATION=0 (both produce hard link-time errors). +// +// See :ref:`CrossOriginStorage` for the full guide. +// +// [link] +// [experimental] +var CROSS_ORIGIN_STORAGE = false; + +// Controls which origins may read the Wasm binary from the COS cache. Only +// meaningful when ``-sCROSS_ORIGIN_STORAGE`` is set. Applied only during the +// write (cache-miss) path, not the read (cache-hit) path. +// +// ``['*']`` (default) — any origin can retrieve the file. +// Explicit HTTPS origin list — restricted to those origins only:: +// +// -sCROSS_ORIGIN_STORAGE_ORIGINS=https://app.example.com,https://api.example.com +// +// ``[]`` — same-site only (omits the ``origins`` field entirely). +// +// Mixing ``'*'`` with explicit origins is a link-time error. +// [link] +var CROSS_ORIGIN_STORAGE_ORIGINS = ['*']; + // This setting changes the behaviour of the ``-shared`` flag. When set to true // you get the old emscripten behaviour where the ``-shared`` flag actually // produces a normal object file (i.e. ``ld -r``). When set to true (the diff --git a/test/browser_common.py b/test/browser_common.py index 97d0a55d5f2e1..7ce6694aaaf8a 100644 --- a/test/browser_common.py +++ b/test/browser_common.py @@ -66,6 +66,16 @@ EMTEST_HEADLESS = None EMTEST_CAPTURE_STDIO = int(os.getenv('EMTEST_CAPTURE_STDIO', '0')) +# Path to an unpacked Chrome extension implementing the Cross-Origin Storage +# polyfill. When set, the extension is loaded via --load-extension when +# launching a Chromium-based browser, enabling the COS browser test paths. +# Point this at a local clone of: +# https://github.com/web-ai-community/cross-origin-storage-extension +# (the directory that contains manifest.json). +# TODO: Remove this once Chromium ships COS natively (even behind a flag), +# and update the browser test to use that flag instead. +EMTEST_COS_EXTENSION_PATH = os.getenv('EMTEST_COS_EXTENSION_PATH', '') + # Triggers the browser to restart after every given number of tests. # 0: Disabled (reuse the browser instance to run all tests. Default) # 1: Restart a fresh browser instance for every browser test. @@ -365,6 +375,8 @@ def configure_test_browser(): EMTEST_BROWSER += ' ' + ' '.join(config.default_flags) if EMTEST_HEADLESS == 1: EMTEST_BROWSER += f" {config.headless_flags}" + if EMTEST_COS_EXTENSION_PATH and is_chrome(): + EMTEST_BROWSER += f' --load-extension="{EMTEST_COS_EXTENSION_PATH}"' # Create a server and a web page. When a test runs, we tell the server about it, diff --git a/test/setup_cos_extension.py b/test/setup_cos_extension.py new file mode 100755 index 0000000000000..970ffdf40b7c1 --- /dev/null +++ b/test/setup_cos_extension.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +# Copyright 2026 The Emscripten Authors. All rights reserved. +# Emscripten is available under two separate licenses, the MIT license and the +# University of Illinois/NCSA Open Source License. Both these licenses can be +# found in the LICENSE file. + +# SPDX-License-Identifier: Apache-2.0 + +r"""Download the Cross-Origin Storage Chrome extension for COS browser tests. + +The COS extension polyfills navigator.crossOriginStorage in Chrome so that +automated browser tests can exercise the cache-miss and cache-hit paths without +requiring a native browser implementation of the API. + +Usage +----- + +Run once to download and unpack the extension:: + + python3 test/setup_cos_extension.py + +Then pass the printed path as EMTEST_COS_EXTENSION_PATH when running the +browser tests:: + + EMTEST_COS_EXTENSION_PATH=$(python3 test/setup_cos_extension.py) \\ + python3 test/runner.py browser.test_cross_origin_storage_miss_then_hit + +Or, with --print-path suppressed so only the path is printed (suitable for +shell variable assignment), use --quiet:: + + ext=$(python3 test/setup_cos_extension.py --quiet) + EMTEST_COS_EXTENSION_PATH=$ext python3 test/runner.py \\ + browser.test_cross_origin_storage_fallback \\ + browser.test_cross_origin_storage_miss_then_hit + +The extension is downloaded from its GitHub source repository and unpacked into +out/cos_extension/ (relative to the Emscripten root). Re-run with --force to +refresh an existing download. + +Source: https://github.com/web-ai-community/cross-origin-storage-extension +""" + +import argparse +import io +import os +import sys +import urllib.request +import zipfile + +# Archive of the main branch of the COS extension source repository. +EXTENSION_ARCHIVE_URL = ( + 'https://github.com/web-ai-community/cross-origin-storage-extension' + '/archive/refs/heads/main.zip' +) + +# Default destination relative to the Emscripten root (i.e. two levels up from +# this script, which lives in test/). +_SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +DEFAULT_DEST = os.path.join(_SCRIPT_DIR, '..', 'out', 'cos_extension') + + +def _find_manifest(root): + """Return the directory containing manifest.json, searching up to 2 levels. + + Returns None if root does not exist or no manifest.json is found. + """ + if not os.path.isdir(root): + return None + if os.path.exists(os.path.join(root, 'manifest.json')): + return root + for entry in os.scandir(root): + if entry.is_dir(): + candidate = os.path.join(entry.path, 'manifest.json') + if os.path.exists(candidate): + return entry.path + return None + + +def download_and_unpack(dest_dir, quiet=False): + """Download the extension archive from GitHub and unpack it into dest_dir. + + Returns the path of the directory that contains manifest.json. + """ + if not quiet: + print('Downloading COS extension from GitHub...', file=sys.stderr) + req = urllib.request.Request( + EXTENSION_ARCHIVE_URL, + headers={'User-Agent': 'emscripten-test-setup'}, + ) + with urllib.request.urlopen(req) as response: + data = response.read() + + if not quiet: + print(f'Unpacking to {dest_dir} ...', file=sys.stderr) + os.makedirs(dest_dir, exist_ok=True) + + with zipfile.ZipFile(io.BytesIO(data)) as zf: + # GitHub archives wrap everything in a top-level directory, e.g. + # "cross-origin-storage-extension-main/". Strip that prefix. + names = zf.namelist() + prefix = names[0].split('/')[0] + '/' if names else '' + for member in names: + if member == prefix: + continue + rel = member[len(prefix):] + if not rel: + continue + target = os.path.join(dest_dir, rel) + if member.endswith('/'): + os.makedirs(target, exist_ok=True) + else: + os.makedirs(os.path.dirname(target), exist_ok=True) + with zf.open(member) as src, open(target, 'wb') as dst: + dst.write(src.read()) + + extension_dir = _find_manifest(dest_dir) + if extension_dir is None: + print( + f'ERROR: manifest.json not found anywhere under {dest_dir}.\n' + 'The extension repository structure may have changed.\n' + 'Please report this at https://github.com/emscripten-core/emscripten/issues', + file=sys.stderr, + ) + sys.exit(1) + + return extension_dir + + +def main(): + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + '--dest', + default=DEFAULT_DEST, + help='Directory to unpack the extension into (default: out/cos_extension)', + ) + parser.add_argument( + '--force', + action='store_true', + help='Re-download even if the extension is already present', + ) + parser.add_argument( + '--quiet', + action='store_true', + help='Suppress informational messages; print only the extension path', + ) + args = parser.parse_args() + + dest = os.path.realpath(args.dest) + + extension_dir = _find_manifest(dest) + if extension_dir and not args.force: + if not args.quiet: + print( + f'Extension already present at {extension_dir} ' + f'(use --force to re-download)', + file=sys.stderr, + ) + else: + extension_dir = download_and_unpack(dest, quiet=args.quiet) + if not args.quiet: + print(f'Extension ready at {extension_dir}', file=sys.stderr) + + # Always print the path as the last line so callers can capture it with + # $(...) or --quiet. + print(extension_dir) + + +if __name__ == '__main__': + main() diff --git a/test/test_browser.py b/test/test_browser.py index e71c9a8f94293..3be485c39c4b8 100644 --- a/test/test_browser.py +++ b/test/test_browser.py @@ -24,6 +24,7 @@ import common from browser_common import ( CHROMIUM_BASED_BROWSERS, + EMTEST_COS_EXTENSION_PATH, BrowserCore, HttpServerThread, Reporting, @@ -5696,6 +5697,61 @@ def test_binary_encode(self, extra): def test_shell_minimal(self, args): self.btest_exit('browser_test_hello_world.c', cflags=['--shell-file', path_from_root('html/shell_minimal.html')] + args) + def test_cross_origin_storage_fallback(self): + if not is_chrome(): + self.skipTest('cross-origin storage tests require a Chromium-based browser') + self.btest_exit('browser_test_hello_world.c', + cflags=['-O2', '-sCROSS_ORIGIN_STORAGE', '-Wno-experimental']) + + def test_cross_origin_storage_miss_then_hit(self): + if not is_chrome(): + self.skipTest('cross-origin storage tests require a Chromium-based browser') + if not EMTEST_COS_EXTENSION_PATH: + self.skipTest( + 'set EMTEST_COS_EXTENSION_PATH to the COS extension directory; ' + 'run test/setup_cos_extension.py to download it automatically. ' + 'Note: --load-extension requires Chromium or Chrome for Testing, ' + 'not the official Google Chrome release.', + ) + + # Restart the browser with a fresh user-data-dir so the extension starts + # with empty storage. Without this, a wasm entry written by a previous + # test run (or a retry of this test) would cause the first page load below + # to be a cache-hit instead of the expected cache-miss. + self.browser_restart() + + # A pre-js that reports via the callbacks instead of from C. + # onCOSStore fires after writable.close() completes, so the data is + # durably written before the window closes and the second load begins. + # + # -sINCOMING_MODULE_JS_API replaces the entire default list, so we must + # include onAbort and onExit (used by browser_reporting.js) alongside the + # COS callbacks; otherwise Emscripten aborts on the unknown Module props. + create_file('cos_pre.js', ''' + var Module = { + onCOSStore: function(hash) { + console.log('[COS] stored, SHA-256:', hash); + reportResultToServer('stored'); + }, + onCOSCacheHit: function(hash) { + console.log('[COS] cache-hit, SHA-256:', hash); + reportResultToServer('cache-hit'); + }, + }; + ''') + self.compile_btest('browser_test_hello_world.c', [ + '-O2', + '-sCROSS_ORIGIN_STORAGE', + '-Wno-experimental', + '-sINCOMING_MODULE_JS_API=onAbort,onExit,onCOSStore,onCOSCacheHit', + '--pre-js', 'cos_pre.js', + '-o', 'page.html', + ], reporting=Reporting.JS_ONLY) + # First page load: wasm is fetched from network and written into COS. + self.run_browser('page.html', '/report_result?stored') + # Second page load: wasm is served from COS. + self.run_browser('page.html', '/report_result?cache-hit') + class browser64(browser): def setUp(self): diff --git a/test/test_other.py b/test/test_other.py index a80e34bafa37e..895226a210745 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -5,6 +5,7 @@ import glob +import hashlib import importlib import itertools import json @@ -15390,6 +15391,80 @@ def test_logReadFiles(self): output = self.do_runf('checksummer.c', args=['test.txt'], cflags=['--pre-js=pre.js']) self.assertContained('read file: /test.txt', output) + def test_cross_origin_storage(self): + self.run_process([EMCC, test_file('hello_world.c'), + '-sCROSS_ORIGIN_STORAGE', + '-sENVIRONMENT=web', + '-o', 'hello.js']) + js = read_file('hello.js') + m = re.search(r"algorithm:\s*'SHA-256',\s*value:\s*'([0-9a-f]{64})'", js) + self.assertTrue(m, 'could not find a 64-char hex hash value in JS output') + embedded_hash = m.group(1) + expected_hash = hashlib.sha256(open('hello.wasm', 'rb').read()).hexdigest() + self.assertEqual(embedded_hash, expected_hash, + 'embedded wasm hash does not match actual .wasm SHA-256') + self.run_process([EMCC, test_file('hello_world.c'), + '-sENVIRONMENT=web', + '-o', 'hello.js']) + js = read_file('hello.js') + self.assertNotContained('crossOriginStorage', js) + self.assertNotContained("Module['wasmHash']", js) + + def test_cross_origin_storage_errors(self): + self.assert_fail([EMCC, test_file('hello_world.c'), + '-sCROSS_ORIGIN_STORAGE', + '-sENVIRONMENT=node'], + 'CROSS_ORIGIN_STORAGE requires a web environment') + self.assert_fail([EMCC, test_file('hello_world.c'), + '-sCROSS_ORIGIN_STORAGE', + '-sSINGLE_FILE'], + 'CROSS_ORIGIN_STORAGE is not compatible with SINGLE_FILE') + self.assert_fail([EMCC, test_file('hello_world.c'), + '-sCROSS_ORIGIN_STORAGE', + '-sWASM_ASYNC_COMPILATION=0'], + 'CROSS_ORIGIN_STORAGE is not compatible with WASM_ASYNC_COMPILATION=0') + self.assert_fail([EMCC, test_file('hello_world.c'), + '-sCROSS_ORIGIN_STORAGE', + '-sSIDE_MODULE'], + 'CROSS_ORIGIN_STORAGE is not compatible with SIDE_MODULE') + + def test_cross_origin_storage_origins(self): + self.run_process([EMCC, test_file('hello_world.c'), + '-sCROSS_ORIGIN_STORAGE', + '-sENVIRONMENT=web', + '-sCROSS_ORIGIN_STORAGE_ORIGINS=https://app.example.com,https://api.example.com', + '-o', 'hello.js']) + js = read_file('hello.js') + self.assertContained('"https://app.example.com"', js) + self.assertContained('"https://api.example.com"', js) + self.assertNotContained("origins: '*'", js) + self.run_process([EMCC, test_file('hello_world.c'), + '-sCROSS_ORIGIN_STORAGE', + '-sENVIRONMENT=web', + '-sCROSS_ORIGIN_STORAGE_ORIGINS=[]', + '-o', 'hello.js']) + js = read_file('hello.js') + self.assertContained('{ create: true }', js) + self.assertNotContained('origins:', js) + self.assert_fail( + [EMCC, test_file('hello_world.c'), + '-sCROSS_ORIGIN_STORAGE', + '-sENVIRONMENT=web', + '-sCROSS_ORIGIN_STORAGE_ORIGINS=*,https://example.com'], + "'*' must not be mixed with explicit origins") + self.assert_fail( + [EMCC, test_file('hello_world.c'), + '-sCROSS_ORIGIN_STORAGE', + '-sENVIRONMENT=web', + '-sCROSS_ORIGIN_STORAGE_ORIGINS=http://example.com'], + 'is not a valid HTTPS origin') + self.assert_fail( + [EMCC, test_file('hello_world.c'), + '-sCROSS_ORIGIN_STORAGE', + '-sENVIRONMENT=web', + '-sCROSS_ORIGIN_STORAGE_ORIGINS=https://example.com/path'], + 'is not a valid HTTPS origin') + def test_deprecated_settings(self): err = self.run_process([EMCC, '-sMEMORY64', test_file('hello_world.c')], stderr=PIPE).stderr self.assertContained('emcc: warning: MEMORY64 is deprecated (prefer the standard -m64 or --target=wasm64 flags). Please open a bug if you have a continuing need for this setting [-Wdeprecated]', err) diff --git a/tools/cmdline.py b/tools/cmdline.py index b66249da8f46a..4b38e18ea0081 100644 --- a/tools/cmdline.py +++ b/tools/cmdline.py @@ -47,6 +47,9 @@ 'onRealloc', 'onFree', 'onSbrkGrow', + 'onCOSCacheHit', + 'onCOSCacheMiss', + 'onCOSStore', ] logger = logging.getLogger('args') diff --git a/tools/link.py b/tools/link.py index 2065b9d3d6f9b..5add64aeda873 100644 --- a/tools/link.py +++ b/tools/link.py @@ -5,6 +5,7 @@ import base64 import glob +import hashlib import json import logging import os @@ -795,6 +796,19 @@ def get_dylibs(options, linker_args): return dylibs +def setup_cross_origin_storage(): + if not settings.ENVIRONMENT_MAY_BE_WEB: + exit_with_error('CROSS_ORIGIN_STORAGE requires a web environment (navigator.crossOriginStorage is not available outside the browser)') + origins = settings.CROSS_ORIGIN_STORAGE_ORIGINS + if '*' in origins and len(origins) > 1: + exit_with_error("CROSS_ORIGIN_STORAGE_ORIGINS: '*' must not be mixed with explicit origins") + for o in origins: + if o == '*': + continue + if not re.fullmatch(r'https://[^/]+(:\d+)?', o): + exit_with_error(f"CROSS_ORIGIN_STORAGE_ORIGINS: {o!r} is not a valid HTTPS origin (expected 'https://host' or 'https://host:port')") + + @ToolchainProfiler.profile_block('linker_setup') def phase_linker_setup(options, linker_args): # noqa: C901, PLR0912, PLR0915 """Future modifications should consider refactoring to reduce complexity. @@ -1210,6 +1224,9 @@ def limit_incoming_module_api(): if settings.WASM == 2 and settings.SINGLE_FILE: exit_with_error('cannot have both WASM=2 and SINGLE_FILE enabled at the same time') + if settings.CROSS_ORIGIN_STORAGE: + setup_cross_origin_storage() + if settings.MINIMAL_RUNTIME_STREAMING_WASM_COMPILATION and options.oformat != OFormat.HTML: exit_with_error('MINIMAL_RUNTIME_STREAMING_WASM_COMPILATION is only compatible with html output') @@ -1904,6 +1921,15 @@ def phase_post_link(options, in_wasm, wasm_target, target, js_syms, base_metadat phase_binaryen(target, options, wasm_target) + # Compute the SHA-256 hash of the final wasm (after binaryen) and substitute + # the <<< WASM_HASH_VALUE >>> placeholder that preamble.js left in the JS. + if final_js and settings.CROSS_ORIGIN_STORAGE: + if os.path.exists(wasm_target): + wasm_hash_value = hashlib.sha256(utils.read_binary(wasm_target)).hexdigest() + logger.debug(f'CROSS_ORIGIN_STORAGE: wasm SHA-256 = {wasm_hash_value}') + js_content = do_replace(read_file(final_js), '<<< WASM_HASH_VALUE >>>', wasm_hash_value) + write_file(final_js, js_content) + # If we are not emitting any JS then we are all done now if options.oformat != OFormat.WASM: phase_final_emitting(options, target, js_target, wasm_target) diff --git a/tools/settings.py b/tools/settings.py index 3f09a6cc0e863..dfb977721cf77 100644 --- a/tools/settings.py +++ b/tools/settings.py @@ -149,12 +149,16 @@ ('LEGACY_VM_SUPPORT', 'MEMORY64', None), ('CROSS_ORIGIN', 'NO_DYNAMIC_EXECUTION', None), ('CROSS_ORIGIN', 'NO_PTHREADS', None), + ('CROSS_ORIGIN_STORAGE', 'SINGLE_FILE', 'the .wasm binary is inlined directly into the JS output and has no fetchable URL to key the hash on'), + ('CROSS_ORIGIN_STORAGE', 'NO_WASM_ASYNC_COMPILATION', 'synchronous instantiation does not use the COS fetch path'), + ('CROSS_ORIGIN_STORAGE', 'SIDE_MODULE', 'no JS glue is emitted to carry the hash or perform the COS lookup'), ] EXPERIMENTAL_SETTINGS = { 'SPLIT_MODULE': '-sSPLIT_MODULE is experimental and subject to change', 'SOURCE_PHASE_IMPORTS': '-sSOURCE_PHASE_IMPORTS is experimental and not yet supported in browsers', 'JS_BASE64_API': '-sJS_BASE64_API is experimental and not yet supported in browsers', + 'CROSS_ORIGIN_STORAGE': '-sCROSS_ORIGIN_STORAGE is experimental; the underlying browser API is not yet shipped in any browser', 'GROWABLE_ARRAYBUFFERS': '-sGROWABLE_ARRAYBUFFERS is still experimental and has only recently become available in browsers', 'SUPPORT_BIG_ENDIAN': '-sSUPPORT_BIG_ENDIAN is experimental, not all features are fully supported.', 'WASM_ESM_INTEGRATION': '-sWASM_ESM_INTEGRATION is still experimental and not yet supported in browsers',