diff --git a/test/codesize/test_codesize_cxx_lto.json b/test/codesize/test_codesize_cxx_lto.json index 81891056f9694..488fdb5e0e149 100644 --- a/test/codesize/test_codesize_cxx_lto.json +++ b/test/codesize/test_codesize_cxx_lto.json @@ -1,40 +1,37 @@ { - "a.out.js": 18662, - "a.out.js.gz": 7691, - "a.out.nodebug.wasm": 102186, - "a.out.nodebug.wasm.gz": 39572, - "total": 120848, - "total_gz": 47263, + "a.out.js": 18357, + "a.out.js.gz": 7566, + "a.out.nodebug.wasm": 101980, + "a.out.nodebug.wasm.gz": 39445, + "total": 120337, + "total_gz": 47011, "sent": [ "a (emscripten_resize_heap)", - "b (_setitimer_js)", - "c (_emscripten_runtime_keepalive_clear)", - "d (_abort_js)", - "e (proc_exit)", - "f (fd_write)", - "g (fd_seek)", - "h (fd_read)", - "i (fd_close)", - "j (environ_sizes_get)", - "k (environ_get)" + "b (_emscripten_runtime_keepalive_clear)", + "c (_abort_js)", + "d (proc_exit)", + "e (fd_write)", + "f (fd_seek)", + "g (fd_read)", + "h (fd_close)", + "i (environ_sizes_get)", + "j (environ_get)" ], "imports": [ "a (emscripten_resize_heap)", - "b (_setitimer_js)", - "c (_emscripten_runtime_keepalive_clear)", - "d (_abort_js)", - "e (proc_exit)", - "f (fd_write)", - "g (fd_seek)", - "h (fd_read)", - "i (fd_close)", - "j (environ_sizes_get)", - "k (environ_get)" + "b (_emscripten_runtime_keepalive_clear)", + "c (_abort_js)", + "d (proc_exit)", + "e (fd_write)", + "f (fd_seek)", + "g (fd_read)", + "h (fd_close)", + "i (environ_sizes_get)", + "j (environ_get)" ], "exports": [ - "l (memory)", - "m (__wasm_call_ctors)", - "n (main)", - "o (_emscripten_timeout)" + "k (memory)", + "l (__wasm_call_ctors)", + "m (main)" ] } diff --git a/test/js_optimizer/emitDCEGraph-output.js b/test/js_optimizer/emitDCEGraph-output.js index 2d7801b575d30..6cbab15f8bca4 100644 --- a/test/js_optimizer/emitDCEGraph-output.js +++ b/test/js_optimizer/emitDCEGraph-output.js @@ -1,8 +1,18 @@ [ + { + "name": "emcc$defun$_bad", + "reaches": [] + }, { "name": "emcc$defun$applySignatureConversions", "reaches": [] }, + { + "name": "emcc$defun$func", + "reaches": [ + "emcc$defun$usedFromDeep2" + ] + }, { "name": "emcc$defun$rootedFunc1", "reaches": [], @@ -48,8 +58,7 @@ }, { "name": "emcc$defun$usedFromDeep2", - "reaches": [], - "root": true + "reaches": [] }, { "name": "emcc$defun$user", diff --git a/test/js_optimizer/emitDCEGraph-scopes-output.js b/test/js_optimizer/emitDCEGraph-scopes-output.js index 28962c848bd78..55d90d0e917ff 100644 --- a/test/js_optimizer/emitDCEGraph-scopes-output.js +++ b/test/js_optimizer/emitDCEGraph-scopes-output.js @@ -1,8 +1,14 @@ [ + { + "name": "emcc$defun$arrow", + "reaches": [ + "emcc$defun$arrowed", + "emcc$defun$caller" + ] + }, { "name": "emcc$defun$arrowed", - "reaches": [], - "root": true + "reaches": [] }, { "name": "emcc$defun$bar", diff --git a/test/js_optimizer/emitDCEGraph-vardefs-output.js b/test/js_optimizer/emitDCEGraph-vardefs-output.js new file mode 100644 index 0000000000000..2c9fb28799c83 --- /dev/null +++ b/test/js_optimizer/emitDCEGraph-vardefs-output.js @@ -0,0 +1,75 @@ +[ + { + "name": "emcc$defun$__setitimer_js", + "reaches": [ + "emcc$export$__emscripten_timeout" + ] + }, + { + "name": "emcc$defun$helped", + "reaches": [] + }, + { + "name": "emcc$defun$helper", + "reaches": [ + "emcc$defun$helped" + ] + }, + { + "name": "emcc$defun$namedFE", + "reaches": [ + "emcc$defun$recur" + ] + }, + { + "name": "emcc$defun$reassigned", + "reaches": [ + "emcc$defun$usedByOriginal" + ], + "root": true + }, + { + "name": "emcc$defun$recur", + "reaches": [] + }, + { + "name": "emcc$defun$rootedByShadow", + "reaches": [], + "root": true + }, + { + "name": "emcc$defun$rootedFromMethod", + "reaches": [], + "root": true + }, + { + "name": "emcc$defun$usedByOriginal", + "reaches": [] + }, + { + "name": "emcc$defun$usedByReplacement", + "reaches": [], + "root": true + }, + { + "name": "emcc$export$__emscripten_timeout", + "export": "_emscripten_timeout", + "reaches": [] + }, + { + "name": "emcc$export$_expD1", + "export": "expD1", + "reaches": [], + "root": true + }, + { + "name": "emcc$import$__setitimer_js", + "import": [ + "env", + "setitimer_js" + ], + "reaches": [ + "emcc$defun$__setitimer_js" + ] + } +] diff --git a/test/js_optimizer/emitDCEGraph-vardefs.js b/test/js_optimizer/emitDCEGraph-vardefs.js new file mode 100644 index 0000000000000..404c446d24699 --- /dev/null +++ b/test/js_optimizer/emitDCEGraph-vardefs.js @@ -0,0 +1,69 @@ +// Functions defined by assignment of an arrow function or function expression +// to a top-level variable are tracked like function declarations. This is the +// form in which JS library functions are emitted, so it is what allows +// breaking JS<->wasm dependency cycles (see issue #26038). + +// The JS implementation of a wasm import references a wasm export, forming a +// JS<->wasm cycle. Nothing else uses either, so none of these may be rooted: +// metadce must be able to remove the entire cycle. +var __setitimer_js = (which, timeout) => { + __emscripten_timeout(which, timeout); +}; + +// A function expression is tracked just like an arrow function. +function helped() { +} +var helper = function() { + helped(); +}; + +// A named function expression: the inner reference is to the expression +// itself, but we conservatively attribute it to the same-named top-level +// function, keeping it alive. +function recur() { +} +var namedFE = function recur() { + recur(); +}; + +// Reassigning a tracked name roots it: the assignment target is an identifier +// use, and the replacement body is treated as top-level code. +function usedByOriginal() { +} +function usedByReplacement() { +} +var reassigned = () => { + usedByOriginal(); +}; +reassigned = () => { + usedByReplacement(); +}; + +// A variable whose name was already saved as a wasm export is not tracked +// (re-mapping the name would misattribute references to the export), so its +// contents are treated as top-level code, which roots them. +var _expD1 = wasmExports['expD1']; +function rootedByShadow() { +} +var _expD1 = () => { + rootedByShadow(); +}; + +// A variable function nested inside an untracked scope (an object method) is +// not tracked either; its contents are treated as top-level code. +function rootedFromMethod() { +} +var obj = { + method() { + var innerArrow = () => { + rootedFromMethod(); + }; + }, +}; + +// wasm exports received in the usual way. +var __emscripten_timeout = wasmExports['_emscripten_timeout']; + +var wasmImports = { + setitimer_js: __setitimer_js, +}; diff --git a/test/test_other.py b/test/test_other.py index c90480d7bc4c2..a8a9c41f0a066 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -2988,6 +2988,7 @@ def test_extern_prepost(self): 'emitDCEGraph-sig': (['emitDCEGraph', '--no-print'],), 'emitDCEGraph-prefixing': (['emitDCEGraph', '--no-print'],), 'emitDCEGraph-scopes': (['emitDCEGraph', '--no-print'],), + 'emitDCEGraph-vardefs': (['emitDCEGraph', '--no-print'],), 'minimal-runtime-applyDCEGraphRemovals': (['applyDCEGraphRemovals'],), 'applyDCEGraphRemovals': (['applyDCEGraphRemovals'],), 'applyImportAndExportNameChanges': (['applyImportAndExportNameChanges'],), diff --git a/tools/acorn-optimizer.mjs b/tools/acorn-optimizer.mjs index e1b5604981450..a034d6e398356 100755 --- a/tools/acorn-optimizer.mjs +++ b/tools/acorn-optimizer.mjs @@ -621,10 +621,13 @@ function emitDCEGraph(ast) { } // We track defined functions very carefully, so that we can remove them and - // the things they call, but other function scopes (like arrow functions and - // object methods) are trickier to track (object methods require knowing what - // object a function name is called on), so we do not track those. We consider - // all content inside them as top-level, which means it is used. + // the things they call. That includes functions defined by assignment of a + // function expression or arrow function to a top-level variable (which is + // how JS library functions are emitted). Other function scopes (like object + // methods, or arrow functions not directly assigned to a variable) are + // trickier to track (object methods require knowing what object a function + // name is called on), so we do not track those. We consider all content + // inside them as top-level, which means it is used. var specialScopes = 0; fullWalk( @@ -699,6 +702,31 @@ function emitDCEGraph(ast) { emptyOut(node); } } + } else if ( + value && + (value.type === 'ArrowFunctionExpression' || value.type === 'FunctionExpression') && + !specialScopes && + // If the name already maps to a graph node (a saved export, or an + // earlier same-named function) do not track this one: re-mapping + // the name would misattribute references to the wrong node, which + // could lead to unsound removals. Left untracked, the contents + // are treated as top-level code, which is conservative. + !nameToGraphName.hasOwnProperty(name) + ) { + // this is a function defined by assignment to a variable, e.g. + // var x = () => { .. }; + // var x = function() { .. }; + // We track it just like a function declaration, which allows DCE + // of wasm exports that are only used inside such functions (e.g. + // JS library functions, which are emitted in this form). + // References through the name are attributed to this function. + // That is precise only if the name is never reassigned (generated + // code never reassigns these); if it were, the assignment would + // root this node (the assignment target is an identifier use), so + // this remains conservative. + defuns.push({id: {name}, body: value.body}); + nameToGraphName[name] = getGraphName(name, 'defun'); + emptyOut(node); // ignore this in the second pass; we scan defuns separately } } // A variable declaration that has no initial values can be ignored in