From d09f1a3b38c0aaf8f306a6046d9d9d2c4d42b24f Mon Sep 17 00:00:00 2001
From: lmoresi <louis.moresi@anu.edu.au>
Date: Thu, 14 May 2026 20:39:38 +1000
Subject: [PATCH 1/2] Fix SL trace-back FE overshoot + add monotone limiter to
 SemiLagrangian DDt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The SL trace-back in SemiLagrangian.update_pre_solve was calling
uw.function.global_evaluate(...) at two sites (velocity midpoint and
psi_star endpoint) without propagating its enclosing evalf kwarg, so
the trace-back always used FE Lagrange shape-function interpolation
regardless of any user-facing RBF flag. P3 Lagrange overshoots at
non-nodal upstream points in cells with sharp gradients (Runge-like
local non-monotonicity), producing psi_star values outside the data
range that feed the implicit SNES and ignite catastrophic ringing on
free-surface convection at high Ra.

Two changes in ddt.py SemiLagrangian:

1. Plumb evalf through both global_evaluate calls (velocity midpoint
   and psi_star endpoint). When evalf=True the trace-back now routes
   through Shepard 3-NN inverse-distance interpolation (bounded by
   neighbour values), as the calling solver could already request via
   AdvDiffusionSLCN.solve(_evalf=True).

2. Add monotone_mode instance attribute + per-call kwarg with two
   limiter options applied after the trace-back evaluation:
     "clamp" (B.2): clip the trace-back result to [nbr_min, nbr_max]
                    of the k=dim+1 nearest psi_star DOFs. Bit-identical
                    to FE in smooth regions; saves the run at the
                    catastrophe step. Cost: one extra kdtree query per
                    update_pre_solve.
     "pick"  (B.1): keep the FE result if it lies within nbr bounds,
                    else re-evaluate via RBF at out-of-bounds DOFs.
                    More conservative than clamp; doubles trace-back
                    cost when triggered.
   Default (None) is unchanged behaviour (pure FE, can overshoot).
   Settable via ``adv_diff.DuDt.monotone_mode = "clamp"`` (same
   pattern as ``theta``).

Tested against the RK4-full free-surface convection canonical case
(Ra=1e5, ρg=1e5, P3 T, CN diffusion, 35 steps; default FE rings
catastrophically at step 35 with post_raw=[-1.12, +2.56], 432 bad
DOFs). With ``monotone_mode = "clamp"`` the run is bit-identical to
FE through step 30 (no overshoots, no DOFs out of nbr bounds; the
clamp is a no-op), then at step 35 it produces a clean physical
solution: post_raw=[0, 1.000], Nu=143 (vs FE artefact 245), vrms=81,
h_max=0.151. Wall time per step unchanged (~19s).

Underworld development team with AI support from Claude Code
---
 src/underworld3/systems/ddt.py | 81 ++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/src/underworld3/systems/ddt.py b/src/underworld3/systems/ddt.py
index 39bf5692..4d3f0052 100644
--- a/src/underworld3/systems/ddt.py
+++ b/src/underworld3/systems/ddt.py
@@ -1172,6 +1172,7 @@ def __init__(
         smoothing=0.0,
         preserve_moments=False,
         with_forcing_history: bool = False,
+        monotone_mode: Optional[str] = None,
     ):
         super().__init__()
 
@@ -1185,6 +1186,20 @@ def __init__(
         self.order = order
         self.preserve_moments = preserve_moments
         self.with_forcing_history = with_forcing_history
+        # Monotonicity limiter for the SL trace-back result. Bound
+        # the FE-interpolated upstream sample to the local data range
+        # of psi_star at each trace-back point. Cures the FE Lagrange
+        # overshoot pattern in cells with sharp gradients while
+        # preserving FE accuracy elsewhere.
+        #   None    → pure FE (legacy; can overshoot at non-nodal
+        #             points in cells with sharp gradients)
+        #   "clamp" → B.2: clip FE result to [nbr_min, nbr_max] of
+        #             the k=dim+1 nearest psi_star DOFs
+        #   "pick"  → B.1: keep FE if in nbr bounds, else re-evaluate
+        #             via RBF (Shepard) at out-of-bounds DOFs
+        # Settable after construction:
+        #   ``adv_diff.DuDt.monotone_mode = "clamp"``
+        self.monotone_mode = monotone_mode
 
         # Forcing-history storage. Allocated only if requested. Populated
         # each step via update_forcing_history(forcing_fn) — used by ETD-2
@@ -1636,6 +1651,7 @@ def update_pre_solve(
         verbose: Optional[bool] = False,
         dt_physical: Optional[float] = None,
         store_result: Optional[bool] = True,
+        monotone_mode: Optional[str] = "__instance__",
     ):
         """Sample upstream values along characteristics before solve.
 
@@ -1651,10 +1667,19 @@ def update_pre_solve(
             existing psi_star levels upstream. Used by VE_Stokes where
             psi_star[0] already contains the projected actual stress from
             the previous solve.
+        monotone_mode : str or None, optional
+            Override the instance ``self.monotone_mode`` for this call.
+            Default ``"__instance__"`` (sentinel) means "use whatever is
+            on the instance". Pass ``None``, ``"clamp"``, or ``"pick"``
+            to force a particular mode for one call.
         """
 
         self._dt = dt
 
+        # Resolve monotone_mode: explicit kwarg overrides instance attr.
+        if monotone_mode == "__instance__":
+            monotone_mode = getattr(self, "monotone_mode", None)
+
         if not self._history_initialised:
             self.initialise_history()
 
@@ -1903,6 +1928,7 @@ def update_pre_solve(
             v_mid_result = uw.function.global_evaluate(
                 self.V_fn,
                 mid_pt_coords,
+                evalf=evalf,
             )
 
             # CRITICAL: Preserve UnitAwareArray through slicing
@@ -1967,9 +1993,16 @@ def update_pre_solve(
 
             # Evaluate psi_star at upstream coordinates
             # global_evaluate now returns dimensional results (gateway fix 2025-11-28)
+            # When evalf=True, route through RBF (Shepard, bounded by
+            # neighbour values) instead of FE shape functions. FE
+            # Lagrange P3 can overshoot at non-nodal upstream points
+            # in cells with sharp gradients — observed as the 'pepper'
+            # DOF scatter that ignites catastrophic ringing on free-
+            # surface convection at high Ra.
             value_at_end_points = uw.function.global_evaluate(
                 expr_to_evaluate,
                 end_pt_coords,
+                evalf=evalf,
             )
 
             # CRITICAL FIX (2025-11-27): If psi_star has units, ensure the assigned
@@ -1978,6 +2011,54 @@ def update_pre_solve(
             if psi_star_units is not None and not isinstance(value_at_end_points, UnitAwareArray):
                 value_at_end_points = UnitAwareArray(value_at_end_points, units=psi_star_units)
 
+            # Monotonicity limiter (B.1 / B.2). Bound the FE/RBF
+            # trace-back result to the local data range of psi_star
+            # near each upstream coord.
+            #   monotone_mode = "clamp" → B.2: clip to [nbr_min, nbr_max]
+            #   monotone_mode = "pick"  → B.1: keep FE in bounds, else RBF
+            if monotone_mode in ("clamp", "pick"):
+                # Plain-numpy coords for kdtree (handle pint/unit-aware)
+                if hasattr(end_pt_coords, "magnitude"):
+                    epc_nd = np.asarray(end_pt_coords.magnitude)
+                else:
+                    epc_nd = np.asarray(end_pt_coords)
+                psi_coords_nd = np.asarray(self.psi_star[i].coords_nd)
+                if hasattr(psi_coords_nd, "magnitude"):
+                    psi_coords_nd = np.asarray(psi_coords_nd.magnitude)
+                nnn = self.mesh.dim + 1
+                kdt = uw.kdtree.KDTree(
+                    np.ascontiguousarray(psi_coords_nd))
+                _, idxs = kdt.query(
+                    np.ascontiguousarray(epc_nd), k=nnn,
+                    sqr_dists=False)
+                psi_data = np.asarray(self.psi_star[i].data)
+                # Flatten trailing dims to compute per-coord nbr stats,
+                # then restore the original shape afterwards.
+                psi_data_flat = psi_data.reshape(psi_data.shape[0], -1)
+                nbr_vals = psi_data_flat[idxs]
+                nbr_min = nbr_vals.min(axis=1)
+                nbr_max = nbr_vals.max(axis=1)
+                veep_np = np.asarray(value_at_end_points)
+                orig_shape = veep_np.shape
+                veep_flat = veep_np.reshape(nbr_min.shape)
+                if monotone_mode == "clamp":
+                    veep_lim = np.clip(veep_flat, nbr_min, nbr_max)
+                else:
+                    # B.1 "pick": re-evaluate via RBF where FE was out of bounds.
+                    value_rbf = uw.function.global_evaluate(
+                        expr_to_evaluate, end_pt_coords, evalf=True)
+                    vrbf_flat = np.asarray(value_rbf).reshape(nbr_min.shape)
+                    out_of_bounds = ((veep_flat < nbr_min)
+                                     | (veep_flat > nbr_max))
+                    veep_lim = np.where(out_of_bounds, vrbf_flat, veep_flat)
+                value_at_end_points = veep_lim.reshape(orig_shape)
+                # Re-wrap units after numpy ops, if needed.
+                if (psi_star_units is not None
+                        and not isinstance(value_at_end_points,
+                                           UnitAwareArray)):
+                    value_at_end_points = UnitAwareArray(
+                        value_at_end_points, units=psi_star_units)
+
             self.psi_star[i].array[...] = value_at_end_points
 
             # disable this for now - Compute moments before update

From 3a4af8e26eae41a9dc8b6b742fd62f38cb8d762d Mon Sep 17 00:00:00 2001
From: lmoresi <louis.moresi@anu.edu.au>
Date: Thu, 14 May 2026 21:32:52 +1000
Subject: [PATCH 2/2] Address Copilot review on PR #186
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. Non-dimensionalise end_pt_coords for the kdtree query. In unit-aware
   runs end_pt_coords is dimensional (e.g. metres) while
   psi_star[i].coords_nd is [0,1] non-dimensional, so without explicit
   conversion the kdtree picks wrong neighbours and the limiter would
   clamp/pick against unrelated DOFs. Match the same conversion pattern
   used earlier in the function for psi_star_0_coords → psi_star_0_coords_nd.

2. In pick mode, only evaluate the RBF for the subset of upstream coords
   whose FE result is out-of-bounds. The previous code unconditionally
   ran a second global_evaluate(..., evalf=True) over ALL coords, paying
   ~2x trace-back cost on every call even when zero DOFs needed
   re-evaluation. Now the cost is dominated by the cheap FE pass when
   most DOFs are in-bounds.

Underworld development team with AI support from Claude Code
---
 src/underworld3/systems/ddt.py | 47 +++++++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 7 deletions(-)

diff --git a/src/underworld3/systems/ddt.py b/src/underworld3/systems/ddt.py
index 4d3f0052..25b7fbd7 100644
--- a/src/underworld3/systems/ddt.py
+++ b/src/underworld3/systems/ddt.py
@@ -2017,9 +2017,23 @@ def update_pre_solve(
             #   monotone_mode = "clamp" → B.2: clip to [nbr_min, nbr_max]
             #   monotone_mode = "pick"  → B.1: keep FE in bounds, else RBF
             if monotone_mode in ("clamp", "pick"):
-                # Plain-numpy coords for kdtree (handle pint/unit-aware)
+                # Convert end_pt_coords to ND space matching the
+                # psi_star DOF coords so the kdtree query compares
+                # like-with-like. Same pattern as the
+                # psi_star_0_coords → psi_star_0_coords_nd conversion
+                # above. In unit-aware runs `end_pt_coords` is
+                # dimensional (e.g. metres) while
+                # `psi_star[i].coords_nd` is [0,1] non-dimensional,
+                # so without this conversion the kdtree picks wrong
+                # neighbours.
                 if hasattr(end_pt_coords, "magnitude"):
-                    epc_nd = np.asarray(end_pt_coords.magnitude)
+                    epc_raw = uw.non_dimensionalise(end_pt_coords)
+                    if isinstance(epc_raw, UnitAwareArray):
+                        epc_nd = np.array(epc_raw)
+                    elif hasattr(epc_raw, "magnitude"):
+                        epc_nd = epc_raw.magnitude
+                    else:
+                        epc_nd = np.asarray(epc_raw)
                 else:
                     epc_nd = np.asarray(end_pt_coords)
                 psi_coords_nd = np.asarray(self.psi_star[i].coords_nd)
@@ -2044,13 +2058,32 @@ def update_pre_solve(
                 if monotone_mode == "clamp":
                     veep_lim = np.clip(veep_flat, nbr_min, nbr_max)
                 else:
-                    # B.1 "pick": re-evaluate via RBF where FE was out of bounds.
-                    value_rbf = uw.function.global_evaluate(
-                        expr_to_evaluate, end_pt_coords, evalf=True)
-                    vrbf_flat = np.asarray(value_rbf).reshape(nbr_min.shape)
+                    # B.1 "pick": re-evaluate via RBF only at the
+                    # subset of upstream coords whose FE result is
+                    # out-of-bounds. Keeps pick-mode cost dominated
+                    # by the cheap FE pass when most DOFs are
+                    # in-bounds; the RBF eval is the expensive part
+                    # so we don't want it for points we're going to
+                    # keep anyway.
                     out_of_bounds = ((veep_flat < nbr_min)
                                      | (veep_flat > nbr_max))
-                    veep_lim = np.where(out_of_bounds, vrbf_flat, veep_flat)
+                    oob_mask = out_of_bounds.any(axis=tuple(
+                        range(1, out_of_bounds.ndim)))
+                    veep_lim = veep_flat.copy()
+                    if oob_mask.any():
+                        oob_coords = end_pt_coords[oob_mask]
+                        value_rbf_oob = uw.function.global_evaluate(
+                            expr_to_evaluate, oob_coords, evalf=True)
+                        vrbf_flat = np.asarray(value_rbf_oob).reshape(
+                            (-1,) + veep_flat.shape[1:])
+                        # Within the oob subset, only overwrite
+                        # entries that are individually out of bounds
+                        # (multi-component case).
+                        sub_oob = out_of_bounds[oob_mask]
+                        veep_sub = veep_lim[oob_mask]
+                        veep_sub = np.where(
+                            sub_oob, vrbf_flat, veep_sub)
+                        veep_lim[oob_mask] = veep_sub
                 value_at_end_points = veep_lim.reshape(orig_shape)
                 # Re-wrap units after numpy ops, if needed.
                 if (psi_star_units is not None