Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
8c3c10e
codex: seed faster-python-1 optimization plan
FabioLuporini Mar 30, 2026
cc446ae
compiler: Augment caching and memoization
FabioLuporini Mar 24, 2026
24ea6a0
codex: Update plan
FabioLuporini Apr 2, 2026
2b3da4e
compiler: Augment caching and tweak memoization heuristics
FabioLuporini Apr 2, 2026
4b7cf36
codex: raise timeout for heavy MPI adjoint tests
FabioLuporini Apr 2, 2026
afcb9ff
codex: update optimization plan summary
FabioLuporini Apr 2, 2026
046a1da
codex: cache TimedAccess instances
FabioLuporini Apr 3, 2026
5720f8a
codex: update plan after TimedAccess caching
FabioLuporini Apr 3, 2026
f29c218
codex: Update plan
FabioLuporini Apr 7, 2026
07b14c4
compiler: Add heuristics to improve fusion lowering turnaround
FabioLuporini Apr 7, 2026
f94c3f4
compiler: Improve Cluster fusion implementation
FabioLuporini Apr 8, 2026
7252c88
Update plan
FabioLuporini Apr 8, 2026
6249431
compiler: Avoid rebuilding IET if unnecessary
FabioLuporini Apr 8, 2026
7985efa
compiler: Fix propagation of transitive IET arg updates
FabioLuporini Apr 8, 2026
954fb3a
compiler: cache CGen printers by settings
FabioLuporini Apr 8, 2026
6e9d7b0
compiler: Enhance Scope to improve DDA turnaround
FabioLuporini Apr 9, 2026
30eb228
compiler: Exploit the new Scope API
FabioLuporini Apr 9, 2026
0b2e7ed
compiler: include ClusterGroup ispace in equality semantics
FabioLuporini Apr 9, 2026
b3bccea
compiler: Add update_args= contract to spare compilation time
FabioLuporini Apr 9, 2026
0cea087
compiler: Add heuristic for topofuse='maximal'
FabioLuporini Apr 10, 2026
16f4322
misc: Patch NVIDIA_VISIBLE_DEVICES and DeviceID
FabioLuporini Apr 10, 2026
00c74c9
Update plan
FabioLuporini Apr 11, 2026
2df56b4
Update plan
FabioLuporini Apr 14, 2026
467d2c0
tools: Add DefaultFrozenDict
FabioLuporini Apr 15, 2026
11432ff
compiler: Remove dead NodesExprs.dspace
FabioLuporini Apr 15, 2026
2273fe8
Update plan
FabioLuporini Apr 17, 2026
d987066
Update plan
FabioLuporini Apr 21, 2026
f98f167
Update plan
FabioLuporini Apr 22, 2026
bafb599
Update plan
FabioLuporini Apr 24, 2026
6605d09
tools: Add reuse_if_unchanged and exploit it
FabioLuporini Apr 24, 2026
61a5929
compiler: Split into EqBlock and Cluster
FabioLuporini Apr 27, 2026
a30bf99
compiler: Stash hash were essential for compilation performance
FabioLuporini Apr 28, 2026
e4ea78d
Update plan
FabioLuporini Apr 28, 2026
c0c0755
compiler: Exploit cached_hash
FabioLuporini Apr 28, 2026
3a9b1ea
compiler: Retain original objects whenever possible
FabioLuporini Apr 28, 2026
99a70ee
compiler: Minimize reconstructions everywhere
FabioLuporini Apr 29, 2026
408fb08
compiler: Memoize Fusion._key
FabioLuporini Apr 29, 2026
c05496d
compiler: Avoid rebuilding Nodes when possible
FabioLuporini Apr 29, 2026
685b01f
compiler: Memoize IET visitors
FabioLuporini Apr 29, 2026
749f904
compiler: Memoize IET engine
FabioLuporini Apr 30, 2026
c00b248
Update plan
FabioLuporini Apr 30, 2026
4a24b05
compiler: Avoid reconstructions in IET visitors
FabioLuporini Apr 30, 2026
c6fef66
compiler: Memoize FindNodes
FabioLuporini Apr 30, 2026
93ff555
Update plan
FabioLuporini May 4, 2026
acfbcba
Update plan
FabioLuporini May 4, 2026
392fc8a
compiler: Call finalize_args once at the end of the lowering
FabioLuporini May 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
667 changes: 667 additions & 0 deletions compier_optimization_plan.md

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ def parallel(item, m):
raise ValueError(f"Can't run test: unexpected mode `{m}`")

env_vars = {'DEVITO_MPI': scheme}
timeout = item.get_closest_marker("parallel").kwargs.get('timeout', 300)

pyversion = sys.executable
testname = get_testname(item)
Expand All @@ -197,7 +198,7 @@ def parallel(item, m):
# OpenMPI requires an explicit flag for oversubscription. We need it as some
# of the MPI tests will spawn lots of processes
if mpi_distro == 'OpenMPI':
call = [mpi_exec, '--oversubscribe', '--timeout', '300'] + args
call = [mpi_exec, '--oversubscribe', '--timeout', str(timeout)] + args
else:
call = [mpi_exec] + args

Expand Down Expand Up @@ -228,7 +229,7 @@ def pytest_configure(config):
"""Register an additional marker."""
config.addinivalue_line(
"markers",
"parallel(mode): mark test to run in parallel"
"parallel(mode, timeout=300): mark test to run in parallel"
)
config.addinivalue_line(
"markers",
Expand Down
1 change: 1 addition & 0 deletions devito/arch/archinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,7 @@ def parse_product_arch():
def get_visible_devices():
device_vars = (
'CUDA_VISIBLE_DEVICES',
'NVIDIA_VISIBLE_DEVICES',
'ROCR_VISIBLE_DEVICES',
'HIP_VISIBLE_DEVICES'
)
Expand Down
5 changes: 4 additions & 1 deletion devito/finite_differences/derivative.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

import sympy

from devito.tools import Pickable, as_mapper, as_tuple, frozendict, is_integer
from devito.tools import (
Pickable, as_mapper, as_tuple, frozendict, is_integer, memoized_func
)
from devito.types.dimension import Dimension
from devito.types.utils import DimensionTuple
from devito.warnings import warn
Expand Down Expand Up @@ -546,6 +548,7 @@ def _evaluate(self, **kwargs):
def _eval_deriv(self):
return self._eval_fd(self.expr)

@memoized_func(scope='build')
def _eval_fd(self, expr, **kwargs):
"""
Evaluate the finite-difference approximation of the Derivative.
Expand Down
4 changes: 4 additions & 0 deletions devito/finite_differences/differentiable.py
Original file line number Diff line number Diff line change
Expand Up @@ -976,6 +976,10 @@ def compare(self, other):
def base(self):
return self.expr.func(*[a for a in self.expr.args if a is not self.weights])

@cached_property
def pivot(self):
return self.base.subs({d: 0 for d in self.dimensions})

@property
def weights(self):
return self._weights
Expand Down
5 changes: 3 additions & 2 deletions devito/finite_differences/finite_difference.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,14 +170,15 @@ def make_derivative(expr, dim, fd_order, deriv_order, side, matvec, x0, coeffici
# `coefficients` method (`taylor` or `symbolic`)
if weights is None:
weights = fd_weights_registry[coefficients](expr, deriv_order, indices, x0)
if isinstance(weights, Iterable) and len(weights) != len(indices):
_, wdim, _ = process_weights(weights, expr, dim)
elif isinstance(weights, Iterable) and len(weights) != len(indices):
warning(f"Number of weights ({len(weights)}) does not match "
f"number of indices ({len(indices)}), reverting to Taylor")
scale = False
wdim = None
weights = fd_weights_registry['taylor'](expr, deriv_order, indices, x0)

# Did fd_weights_registry return a new Function/Expression instead of a values?
_, wdim, _ = process_weights(weights, expr, dim)
if wdim is not None:
weights = [weights._subs(wdim, i) for i in range(len(indices))]

Expand Down
6 changes: 5 additions & 1 deletion devito/finite_differences/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,10 +228,14 @@ def make_stencil_dimension(expr, _min, _max):


@cacheit
def numeric_weights(function, deriv_order, indices, x0):
def _numeric_weights(deriv_order, indices, x0):
return finite_diff_weights(deriv_order, indices, x0)[-1][-1]


def numeric_weights(function, deriv_order, indices, x0):
return _numeric_weights(deriv_order, indices, x0)


fd_weights_registry = {'taylor': numeric_weights, 'standard': numeric_weights,
'symbolic': numeric_weights} # Backward compat for 'symbolic'
coeff_priority = {'taylor': 1, 'standard': 1}
Expand Down
31 changes: 26 additions & 5 deletions devito/ir/cgen/printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@
from devito.tools import ctypes_to_cstr, ctypes_vector_mapper, dtype_to_ctype
from devito.types.basic import AbstractFunction

__all__ = ['BasePrinter', 'ccode']
__all__ = ['BasePrinter', 'ccode', 'get_printer']

_preset_dtypes = (np.float32, np.float64, np.complex64, np.complex128)
_printer_registry = {}


class BasePrinter(CodePrinter):
Expand Down Expand Up @@ -449,15 +452,33 @@ def _print_Fallback(self, expr):
sympy.printing.str.StrPrinter._print_Add = BasePrinter._print_Add


def ccode(expr, printer=None, **settings):
def get_printer(printer, dtype=None):
try:
registry = _printer_registry[printer]
except KeyError:
default = printer()
registry = {None: default, default.dtype: default}
for i in _preset_dtypes:
registry.setdefault(i, printer(settings={'dtype': i}))
_printer_registry[printer] = registry

try:
return registry[dtype]
except KeyError:
handle = printer(settings={'dtype': dtype})
registry[dtype] = handle
return handle


def ccode(expr, printer=None, dtype=None):
"""Generate C++ code from an expression.

Parameters
----------
expr : expr-like
The expression to be printed.
settings : dict
Options for code printing.
dtype : data-type, optional
Data type used by the printer.

Returns
-------
Expand All @@ -468,4 +489,4 @@ def ccode(expr, printer=None, **settings):
if printer is None:
from devito.passes.iet.languages.C import CPrinter
printer = CPrinter
return printer(settings=settings).doprint(expr, None)
return get_printer(printer, dtype).doprint(expr, None)
6 changes: 1 addition & 5 deletions devito/ir/clusters/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def _callback(self, clusters, dim, prefix):
is_parallel_atomic = False

scope = Scope(flatten(c.exprs for c in clusters))
for dep in scope.d_all_gen():
for dep in scope.d_all_gen(writes=scope.writes_tensor):
test00 = dep.is_indep(dim) and not dep.is_storage_related(dim)
test01 = all(dep.is_reduce_atmost(i) for i in prev)
if test00 and test01:
Expand All @@ -112,10 +112,6 @@ def _callback(self, clusters, dim, prefix):
is_parallel_indep &= (dep.distance_mapper.get(dim.root) == 0)
continue

if dep.function in scope.initialized:
# False alarm, the dependence is over a locally-defined symbol
continue

if dep.is_reduction:
is_parallel_atomic = True
continue
Expand Down
Loading
Loading