From d09f1a3b38c0aaf8f306a6046d9d9d2c4d42b24f Mon Sep 17 00:00:00 2001 From: lmoresi Date: Thu, 14 May 2026 20:39:38 +1000 Subject: [PATCH 1/2] Fix SL trace-back FE overshoot + add monotone limiter to SemiLagrangian DDt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SL trace-back in SemiLagrangian.update_pre_solve was calling uw.function.global_evaluate(...) at two sites (velocity midpoint and psi_star endpoint) without propagating its enclosing evalf kwarg, so the trace-back always used FE Lagrange shape-function interpolation regardless of any user-facing RBF flag. P3 Lagrange overshoots at non-nodal upstream points in cells with sharp gradients (Runge-like local non-monotonicity), producing psi_star values outside the data range that feed the implicit SNES and ignite catastrophic ringing on free-surface convection at high Ra. Two changes in ddt.py SemiLagrangian: 1. Plumb evalf through both global_evaluate calls (velocity midpoint and psi_star endpoint). When evalf=True the trace-back now routes through Shepard 3-NN inverse-distance interpolation (bounded by neighbour values), as the calling solver could already request via AdvDiffusionSLCN.solve(_evalf=True). 2. Add monotone_mode instance attribute + per-call kwarg with two limiter options applied after the trace-back evaluation: "clamp" (B.2): clip the trace-back result to [nbr_min, nbr_max] of the k=dim+1 nearest psi_star DOFs. Bit-identical to FE in smooth regions; saves the run at the catastrophe step. Cost: one extra kdtree query per update_pre_solve. "pick" (B.1): keep the FE result if it lies within nbr bounds, else re-evaluate via RBF at out-of-bounds DOFs. More conservative than clamp; doubles trace-back cost when triggered. Default (None) is unchanged behaviour (pure FE, can overshoot). Settable via ``adv_diff.DuDt.monotone_mode = "clamp"`` (same pattern as ``theta``). Tested against the RK4-full free-surface convection canonical case (Ra=1e5, ρg=1e5, P3 T, CN diffusion, 35 steps; default FE rings catastrophically at step 35 with post_raw=[-1.12, +2.56], 432 bad DOFs). With ``monotone_mode = "clamp"`` the run is bit-identical to FE through step 30 (no overshoots, no DOFs out of nbr bounds; the clamp is a no-op), then at step 35 it produces a clean physical solution: post_raw=[0, 1.000], Nu=143 (vs FE artefact 245), vrms=81, h_max=0.151. Wall time per step unchanged (~19s). Underworld development team with AI support from Claude Code --- src/underworld3/systems/ddt.py | 81 ++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/src/underworld3/systems/ddt.py b/src/underworld3/systems/ddt.py index 39bf5692..4d3f0052 100644 --- a/src/underworld3/systems/ddt.py +++ b/src/underworld3/systems/ddt.py @@ -1172,6 +1172,7 @@ def __init__( smoothing=0.0, preserve_moments=False, with_forcing_history: bool = False, + monotone_mode: Optional[str] = None, ): super().__init__() @@ -1185,6 +1186,20 @@ def __init__( self.order = order self.preserve_moments = preserve_moments self.with_forcing_history = with_forcing_history + # Monotonicity limiter for the SL trace-back result. Bound + # the FE-interpolated upstream sample to the local data range + # of psi_star at each trace-back point. Cures the FE Lagrange + # overshoot pattern in cells with sharp gradients while + # preserving FE accuracy elsewhere. + # None → pure FE (legacy; can overshoot at non-nodal + # points in cells with sharp gradients) + # "clamp" → B.2: clip FE result to [nbr_min, nbr_max] of + # the k=dim+1 nearest psi_star DOFs + # "pick" → B.1: keep FE if in nbr bounds, else re-evaluate + # via RBF (Shepard) at out-of-bounds DOFs + # Settable after construction: + # ``adv_diff.DuDt.monotone_mode = "clamp"`` + self.monotone_mode = monotone_mode # Forcing-history storage. Allocated only if requested. Populated # each step via update_forcing_history(forcing_fn) — used by ETD-2 @@ -1636,6 +1651,7 @@ def update_pre_solve( verbose: Optional[bool] = False, dt_physical: Optional[float] = None, store_result: Optional[bool] = True, + monotone_mode: Optional[str] = "__instance__", ): """Sample upstream values along characteristics before solve. @@ -1651,10 +1667,19 @@ def update_pre_solve( existing psi_star levels upstream. Used by VE_Stokes where psi_star[0] already contains the projected actual stress from the previous solve. + monotone_mode : str or None, optional + Override the instance ``self.monotone_mode`` for this call. + Default ``"__instance__"`` (sentinel) means "use whatever is + on the instance". Pass ``None``, ``"clamp"``, or ``"pick"`` + to force a particular mode for one call. """ self._dt = dt + # Resolve monotone_mode: explicit kwarg overrides instance attr. + if monotone_mode == "__instance__": + monotone_mode = getattr(self, "monotone_mode", None) + if not self._history_initialised: self.initialise_history() @@ -1903,6 +1928,7 @@ def update_pre_solve( v_mid_result = uw.function.global_evaluate( self.V_fn, mid_pt_coords, + evalf=evalf, ) # CRITICAL: Preserve UnitAwareArray through slicing @@ -1967,9 +1993,16 @@ def update_pre_solve( # Evaluate psi_star at upstream coordinates # global_evaluate now returns dimensional results (gateway fix 2025-11-28) + # When evalf=True, route through RBF (Shepard, bounded by + # neighbour values) instead of FE shape functions. FE + # Lagrange P3 can overshoot at non-nodal upstream points + # in cells with sharp gradients — observed as the 'pepper' + # DOF scatter that ignites catastrophic ringing on free- + # surface convection at high Ra. value_at_end_points = uw.function.global_evaluate( expr_to_evaluate, end_pt_coords, + evalf=evalf, ) # CRITICAL FIX (2025-11-27): If psi_star has units, ensure the assigned @@ -1978,6 +2011,54 @@ def update_pre_solve( if psi_star_units is not None and not isinstance(value_at_end_points, UnitAwareArray): value_at_end_points = UnitAwareArray(value_at_end_points, units=psi_star_units) + # Monotonicity limiter (B.1 / B.2). Bound the FE/RBF + # trace-back result to the local data range of psi_star + # near each upstream coord. + # monotone_mode = "clamp" → B.2: clip to [nbr_min, nbr_max] + # monotone_mode = "pick" → B.1: keep FE in bounds, else RBF + if monotone_mode in ("clamp", "pick"): + # Plain-numpy coords for kdtree (handle pint/unit-aware) + if hasattr(end_pt_coords, "magnitude"): + epc_nd = np.asarray(end_pt_coords.magnitude) + else: + epc_nd = np.asarray(end_pt_coords) + psi_coords_nd = np.asarray(self.psi_star[i].coords_nd) + if hasattr(psi_coords_nd, "magnitude"): + psi_coords_nd = np.asarray(psi_coords_nd.magnitude) + nnn = self.mesh.dim + 1 + kdt = uw.kdtree.KDTree( + np.ascontiguousarray(psi_coords_nd)) + _, idxs = kdt.query( + np.ascontiguousarray(epc_nd), k=nnn, + sqr_dists=False) + psi_data = np.asarray(self.psi_star[i].data) + # Flatten trailing dims to compute per-coord nbr stats, + # then restore the original shape afterwards. + psi_data_flat = psi_data.reshape(psi_data.shape[0], -1) + nbr_vals = psi_data_flat[idxs] + nbr_min = nbr_vals.min(axis=1) + nbr_max = nbr_vals.max(axis=1) + veep_np = np.asarray(value_at_end_points) + orig_shape = veep_np.shape + veep_flat = veep_np.reshape(nbr_min.shape) + if monotone_mode == "clamp": + veep_lim = np.clip(veep_flat, nbr_min, nbr_max) + else: + # B.1 "pick": re-evaluate via RBF where FE was out of bounds. + value_rbf = uw.function.global_evaluate( + expr_to_evaluate, end_pt_coords, evalf=True) + vrbf_flat = np.asarray(value_rbf).reshape(nbr_min.shape) + out_of_bounds = ((veep_flat < nbr_min) + | (veep_flat > nbr_max)) + veep_lim = np.where(out_of_bounds, vrbf_flat, veep_flat) + value_at_end_points = veep_lim.reshape(orig_shape) + # Re-wrap units after numpy ops, if needed. + if (psi_star_units is not None + and not isinstance(value_at_end_points, + UnitAwareArray)): + value_at_end_points = UnitAwareArray( + value_at_end_points, units=psi_star_units) + self.psi_star[i].array[...] = value_at_end_points # disable this for now - Compute moments before update From 3a4af8e26eae41a9dc8b6b742fd62f38cb8d762d Mon Sep 17 00:00:00 2001 From: lmoresi Date: Thu, 14 May 2026 21:32:52 +1000 Subject: [PATCH 2/2] Address Copilot review on PR #186 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Non-dimensionalise end_pt_coords for the kdtree query. In unit-aware runs end_pt_coords is dimensional (e.g. metres) while psi_star[i].coords_nd is [0,1] non-dimensional, so without explicit conversion the kdtree picks wrong neighbours and the limiter would clamp/pick against unrelated DOFs. Match the same conversion pattern used earlier in the function for psi_star_0_coords → psi_star_0_coords_nd. 2. In pick mode, only evaluate the RBF for the subset of upstream coords whose FE result is out-of-bounds. The previous code unconditionally ran a second global_evaluate(..., evalf=True) over ALL coords, paying ~2x trace-back cost on every call even when zero DOFs needed re-evaluation. Now the cost is dominated by the cheap FE pass when most DOFs are in-bounds. Underworld development team with AI support from Claude Code --- src/underworld3/systems/ddt.py | 47 +++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/src/underworld3/systems/ddt.py b/src/underworld3/systems/ddt.py index 4d3f0052..25b7fbd7 100644 --- a/src/underworld3/systems/ddt.py +++ b/src/underworld3/systems/ddt.py @@ -2017,9 +2017,23 @@ def update_pre_solve( # monotone_mode = "clamp" → B.2: clip to [nbr_min, nbr_max] # monotone_mode = "pick" → B.1: keep FE in bounds, else RBF if monotone_mode in ("clamp", "pick"): - # Plain-numpy coords for kdtree (handle pint/unit-aware) + # Convert end_pt_coords to ND space matching the + # psi_star DOF coords so the kdtree query compares + # like-with-like. Same pattern as the + # psi_star_0_coords → psi_star_0_coords_nd conversion + # above. In unit-aware runs `end_pt_coords` is + # dimensional (e.g. metres) while + # `psi_star[i].coords_nd` is [0,1] non-dimensional, + # so without this conversion the kdtree picks wrong + # neighbours. if hasattr(end_pt_coords, "magnitude"): - epc_nd = np.asarray(end_pt_coords.magnitude) + epc_raw = uw.non_dimensionalise(end_pt_coords) + if isinstance(epc_raw, UnitAwareArray): + epc_nd = np.array(epc_raw) + elif hasattr(epc_raw, "magnitude"): + epc_nd = epc_raw.magnitude + else: + epc_nd = np.asarray(epc_raw) else: epc_nd = np.asarray(end_pt_coords) psi_coords_nd = np.asarray(self.psi_star[i].coords_nd) @@ -2044,13 +2058,32 @@ def update_pre_solve( if monotone_mode == "clamp": veep_lim = np.clip(veep_flat, nbr_min, nbr_max) else: - # B.1 "pick": re-evaluate via RBF where FE was out of bounds. - value_rbf = uw.function.global_evaluate( - expr_to_evaluate, end_pt_coords, evalf=True) - vrbf_flat = np.asarray(value_rbf).reshape(nbr_min.shape) + # B.1 "pick": re-evaluate via RBF only at the + # subset of upstream coords whose FE result is + # out-of-bounds. Keeps pick-mode cost dominated + # by the cheap FE pass when most DOFs are + # in-bounds; the RBF eval is the expensive part + # so we don't want it for points we're going to + # keep anyway. out_of_bounds = ((veep_flat < nbr_min) | (veep_flat > nbr_max)) - veep_lim = np.where(out_of_bounds, vrbf_flat, veep_flat) + oob_mask = out_of_bounds.any(axis=tuple( + range(1, out_of_bounds.ndim))) + veep_lim = veep_flat.copy() + if oob_mask.any(): + oob_coords = end_pt_coords[oob_mask] + value_rbf_oob = uw.function.global_evaluate( + expr_to_evaluate, oob_coords, evalf=True) + vrbf_flat = np.asarray(value_rbf_oob).reshape( + (-1,) + veep_flat.shape[1:]) + # Within the oob subset, only overwrite + # entries that are individually out of bounds + # (multi-component case). + sub_oob = out_of_bounds[oob_mask] + veep_sub = veep_lim[oob_mask] + veep_sub = np.where( + sub_oob, vrbf_flat, veep_sub) + veep_lim[oob_mask] = veep_sub value_at_end_points = veep_lim.reshape(orig_shape) # Re-wrap units after numpy ops, if needed. if (psi_star_units is not None