From a4b866e94739705cb07e38435691bbc237c8d3fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ferenc=20T=C3=BCk=C3=B6r?= <ferenc.tukor@radwaytech.com>
Date: Tue, 27 Jan 2026 11:07:38 +0100
Subject: [PATCH 1/2] Return estimated memory when run as an estimator

---
 docs/source/bibliography/references.bib       |  12 ++
 docs/source/conf.py                           |   1 -
 .../reconstruction/ADMM3d_tomobar.rst         |  32 +++
 .../reconstruction/FISTA3d_tomobar.rst        |   3 +-
 .../methods_list/reconstruction_methods.rst   |   3 +-
 .../scripts/methods_to_generate_images.py     |   1 -
 httomolibgpu/memory_estimator_helpers.py      |  24 ---
 httomolibgpu/misc/rescale.py                  |   1 -
 httomolibgpu/prep/normalize.py                |   1 -
 httomolibgpu/prep/phase.py                    |   6 +-
 httomolibgpu/prep/stripe.py                   |  47 ++---
 httomolibgpu/recon/algorithm.py               | 194 ++++++++++++++++--
 httomolibgpu/recon/rotation.py                |  26 +--
 tests/test_recon/test_algorithm.py            | 176 ++++++++++++++++
 tests/test_recon/test_rotation.py             |   2 +-
 zenodo-tests/conftest.py                      |  17 ++
 zenodo-tests/test_recon/test_algorithm.py     |  46 ++++-
 17 files changed, 493 insertions(+), 99 deletions(-)
 create mode 100644 docs/source/reference/methods_list/reconstruction/ADMM3d_tomobar.rst
 delete mode 100644 httomolibgpu/memory_estimator_helpers.py

diff --git a/docs/source/bibliography/references.bib b/docs/source/bibliography/references.bib
index b4cb6b9e..ba7f88d9 100644
--- a/docs/source/bibliography/references.bib
+++ b/docs/source/bibliography/references.bib
@@ -171,3 +171,15 @@ @article{munch2009stripe
   year={2009},
   publisher={Optical Society of America}
 }
+
+
+@article{boyd2011distributed,
+  title={Distributed optimization and statistical learning via the alternating direction method of multipliers},
+  author={Boyd, Stephen and Parikh, Neal and Chu, Eric and Peleato, Borja and Eckstein, Jonathan and others},
+  journal={Foundations and Trends{\textregistered} in Machine learning},
+  volume={3},
+  number={1},
+  pages={1--122},
+  year={2011},
+  publisher={Now Publishers, Inc.}
+}
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 6500722a..554fab60 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -7,7 +7,6 @@
 from datetime import date
 from unittest import mock
 
-
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
diff --git a/docs/source/reference/methods_list/reconstruction/ADMM3d_tomobar.rst b/docs/source/reference/methods_list/reconstruction/ADMM3d_tomobar.rst
new file mode 100644
index 00000000..a6dddb31
--- /dev/null
+++ b/docs/source/reference/methods_list/reconstruction/ADMM3d_tomobar.rst
@@ -0,0 +1,32 @@
+.. _method_ADMM3d_tomobar:
+
+ADMM 3D (ToMoBAR)
+^^^^^^^^^^^^^^^^^^
+
+**Description**
+
+The Alternating Direction Method of Multipliers (ADMM) :cite:`boyd2011distributed` in tomography is an iterative reconstruction algorithm
+that optimises image quality by breaking complex, large-scale, and non-smooth optimization problems into smaller, manageable subproblems.
+It is particularly effective for high-quality, 3D imaging from sparse-view or low-dose data, where traditional methods like filtered
+back-projection (FBP) produce severe artifacts. It can also employ various regularisation :cite:`kazantsev2019ccpi` models to suppress the noise.
+There are few types of regularisation that can be used, please see method's API :mod:`httomolibgpu.recon.algorithm.ADMM3d_tomobar`.
+
+**Where and how to use it:**
+
+When the data is highly inaccurate,  noisy, incomplete, or limited-angle data. Due to added regularisation, the quality of ADMM reconstruction  is expected to be better than other classical methods, such as,
+:ref:`method_CGLS3d_tomobar` or :ref:`method_SIRT3d_tomobar`.
+
+**What are the adjustable parameters:**
+
+* The number of :code:`iterations` is an important parameter as one would like to achieve a trade-off between the resolution and SNR. For ADMM-OS method (when :code:`subsets_number > 1`) the range of iterations depends on how many :code:`subsets_number` used. For 24 subsets, 3-5 iterations is usually enough.
+
+* :code:`subsets_number` usually helps with the faster convergence of the reconstruction algorithm. Fortunately, ADMM is much more robust for higher numbers of subsets, compared to :ref:`method_FISTA3d_tomobar`. 24 or larger number of subsets can be used and :code:`iterations` can be reduced when the subsets grow.
+
+* :code:`initialisation` Initialise ADMM iterations using another reconstruction algorithm, like FBP, for instance. A so-called warm-start. Usually helps significantly to reduce the number of iterations. However, for very noise, undersampled, data it is recommended to use 'CGLS' as initialisation to avoid boosting up the noise and artifacts of the FBP reconstruction.
+
+* :code:`regularisation_parameter` is probably the second most important parameter after :code:`iterations`. When one increases the value of :code:`regularisation_parameter`, one can expect the image to be smoother. The type of smoothing usually depends on the regularisation type, and for Total-Variation is the piecewise-constant smoothness.
+
+* :code:`regularisation_iterations` defines how many inner iterations for regularisation performed on every step of the outer algorithm. This can depend on :code:`regularisation_type` and  :code:`subsets_number`. The general rule is when :code:`subsets_number` is smaller then :code:`regularisation_iterations` should be increased.
+
+* :code:`nonnegativity`. By setting this parameter to :code:`True` imposes positivity constraint on the solution.
+
diff --git a/docs/source/reference/methods_list/reconstruction/FISTA3d_tomobar.rst b/docs/source/reference/methods_list/reconstruction/FISTA3d_tomobar.rst
index a768c8d4..b0a13212 100644
--- a/docs/source/reference/methods_list/reconstruction/FISTA3d_tomobar.rst
+++ b/docs/source/reference/methods_list/reconstruction/FISTA3d_tomobar.rst
@@ -8,7 +8,8 @@ FISTA 3D (ToMoBAR)
 FISTA stands for A Fast Iterative Shrinkage-Thresholding Algorithm :cite:`beck2009fast`. One of the major benefits of FISTA is the ability
 to build complex optimisation functionals that can handle a variety of problematic data. For instance, the amplification of the noise
 in iterations which is common for classical iterative methods, such as, :ref:`method_CGLS3d_tomobar` and :ref:`method_SIRT3d_tomobar`,
-can be resolved by using regularisation. There are many different types of regularisation that can be used, see some are listed here :cite:`kazantsev2019ccpi`.
+can be resolved by using regularisation :cite:`kazantsev2019ccpi`. There are few types of regularisation that can be used, please see method's
+API :mod:`httomolibgpu.recon.algorithm.FISTA3d_tomobar`.
 
 
 **Where and how to use it:**
diff --git a/docs/source/reference/methods_list/reconstruction_methods.rst b/docs/source/reference/methods_list/reconstruction_methods.rst
index 551ef165..c7924e3f 100644
--- a/docs/source/reference/methods_list/reconstruction_methods.rst
+++ b/docs/source/reference/methods_list/reconstruction_methods.rst
@@ -5,7 +5,7 @@ Image reconstruction methods
 
 Methods from :mod:`httomolibgpu.recon.algorithm` module are required to reconstruct the projection data, i.e., converting the set of sinograms into a reconstructed volume.
 The reconstruction methods can be divided into two groups: Direct analytical methods (:ref:`method_LPRec3d_tomobar`, :ref:`method_FBP3d_tomobar`, :ref:`method_FBP2d_astra`)
-and Iterative methods (:ref:`method_FISTA3d_tomobar`, :ref:`method_CGLS3d_tomobar`, :ref:`method_SIRT3d_tomobar`).
+and Iterative methods (:ref:`method_FISTA3d_tomobar`, :ref:`method_ADMM3d_tomobar`, :ref:`method_CGLS3d_tomobar`, :ref:`method_SIRT3d_tomobar`).
 The former are faster and suitable for the majority of well-sampled and well-exposed data, while the latter are more complex and slower methods suitable for erroneous, noisy or/and undersampled data.
 
 .. toctree::
@@ -15,6 +15,7 @@ The former are faster and suitable for the majority of well-sampled and well-exp
    reconstruction/FBP3d_tomobar
    reconstruction/FBP2d_astra
    reconstruction/FISTA3d_tomobar
+   reconstruction/ADMM3d_tomobar
    reconstruction/CGLS3d_tomobar
    reconstruction/SIRT3d_tomobar
 
diff --git a/docs/source/scripts/methods_to_generate_images.py b/docs/source/scripts/methods_to_generate_images.py
index ca99a81b..1f0d66e2 100644
--- a/docs/source/scripts/methods_to_generate_images.py
+++ b/docs/source/scripts/methods_to_generate_images.py
@@ -24,7 +24,6 @@
 import cupy as cp
 from PIL import Image
 
-
 # usage : python -m methods_to_generate_images -i /home/algol/Documents/DEV/httomolibgpu/tests/test_data/synthdata_nxtomo1.npz -o /home/algol/Documents/DEV/httomolibgpu/docs/source/_static/auto_images_methods
 
 
diff --git a/httomolibgpu/memory_estimator_helpers.py b/httomolibgpu/memory_estimator_helpers.py
deleted file mode 100644
index 4c63a188..00000000
--- a/httomolibgpu/memory_estimator_helpers.py
+++ /dev/null
@@ -1,24 +0,0 @@
-ALLOCATION_UNIT_SIZE = 512
-
-
-class _DeviceMemStack:
-    def __init__(self) -> None:
-        self.allocations = []
-        self.current = 0
-        self.highwater = 0
-
-    def malloc(self, bytes):
-        self.allocations.append(bytes)
-        allocated = self._round_up(bytes)
-        self.current += allocated
-        self.highwater = max(self.current, self.highwater)
-
-    def free(self, bytes):
-        assert bytes in self.allocations
-        self.allocations.remove(bytes)
-        self.current -= self._round_up(bytes)
-        assert self.current >= 0
-
-    def _round_up(self, size):
-        size = (size + ALLOCATION_UNIT_SIZE - 1) // ALLOCATION_UNIT_SIZE
-        return size * ALLOCATION_UNIT_SIZE
diff --git a/httomolibgpu/misc/rescale.py b/httomolibgpu/misc/rescale.py
index 00362a56..e701c8c9 100644
--- a/httomolibgpu/misc/rescale.py
+++ b/httomolibgpu/misc/rescale.py
@@ -27,7 +27,6 @@
 
 from typing import Literal, Optional, Tuple, Union
 
-
 __all__ = [
     "rescale_to_int",
 ]
diff --git a/httomolibgpu/prep/normalize.py b/httomolibgpu/prep/normalize.py
index 92df401e..e82e13ae 100644
--- a/httomolibgpu/prep/normalize.py
+++ b/httomolibgpu/prep/normalize.py
@@ -34,7 +34,6 @@
 
 from numpy import float32
 
-
 __all__ = ["dark_flat_field_correction", "minus_log"]
 
 
diff --git a/httomolibgpu/prep/phase.py b/httomolibgpu/prep/phase.py
index 41f8468a..9dc01335 100644
--- a/httomolibgpu/prep/phase.py
+++ b/httomolibgpu/prep/phase.py
@@ -22,7 +22,7 @@
 
 import numpy as np
 from httomolibgpu import cupywrapper
-from httomolibgpu.memory_estimator_helpers import _DeviceMemStack
+from tomobar.supp.memory_estimator_helpers import DeviceMemStack
 
 cp = cupywrapper.cp
 cupy_run = cupywrapper.cupy_run
@@ -91,7 +91,7 @@ def paganin_filter(
     cp.ndarray
         The 3D array of Paganin phase-filtered projection images.
     """
-    mem_stack = _DeviceMemStack() if calc_peak_gpu_mem else None
+    mem_stack = DeviceMemStack() if calc_peak_gpu_mem else None
     # Check the input data is valid
     if not mem_stack and tomo.ndim != 3:
         raise ValueError(
@@ -301,7 +301,7 @@ def _pad_projections(
         "next_power_of_2", "next_fast_length", "use_pad_x_y"
     ],
     pad_x_y: Optional[list],
-    mem_stack: Optional[_DeviceMemStack],
+    mem_stack: Optional[DeviceMemStack],
 ) -> Tuple[cp.ndarray, Tuple[int, int]]:
     """
     Performs padding of each projection to a size optimal for FFT.
diff --git a/httomolibgpu/prep/stripe.py b/httomolibgpu/prep/stripe.py
index c46c33d9..14e05b55 100644
--- a/httomolibgpu/prep/stripe.py
+++ b/httomolibgpu/prep/stripe.py
@@ -30,6 +30,7 @@
 from unittest.mock import Mock
 
 if cupy_run:
+    from tomobar.supp.memory_estimator_helpers import DeviceMemStack
     from cupyx.scipy.ndimage import median_filter, binary_dilation, uniform_filter1d
     from cupyx.scipy.fft import fft2, ifft2, fftshift
     from cupyx.scipy.fftpack import get_fft_plan
@@ -204,32 +205,8 @@ def _reflect(x: np.ndarray, minx: float, maxx: float) -> np.ndarray:
     return np.array(out, dtype=x.dtype)
 
 
-class _DeviceMemStack:
-    def __init__(self) -> None:
-        self.allocations = []
-        self.current = 0
-        self.highwater = 0
-
-    def malloc(self, bytes):
-        self.allocations.append(bytes)
-        allocated = self._round_up(bytes)
-        self.current += allocated
-        self.highwater = max(self.current, self.highwater)
-
-    def free(self, bytes):
-        assert bytes in self.allocations
-        self.allocations.remove(bytes)
-        self.current -= self._round_up(bytes)
-        assert self.current >= 0
-
-    def _round_up(self, size):
-        ALLOCATION_UNIT_SIZE = 512
-        size = (size + ALLOCATION_UNIT_SIZE - 1) // ALLOCATION_UNIT_SIZE
-        return size * ALLOCATION_UNIT_SIZE
-
-
 def _mypad(
-    x: cp.ndarray, pad: Tuple[int, int, int, int], mem_stack: Optional[_DeviceMemStack]
+    x: cp.ndarray, pad: Tuple[int, int, int, int], mem_stack: Optional[DeviceMemStack]
 ) -> cp.ndarray:
     """Function to do numpy like padding on Arrays. Only works for 2-D
     padding.
@@ -272,7 +249,7 @@ def _conv2d(
     w: np.ndarray,
     stride: Tuple[int, int],
     groups: int,
-    mem_stack: Optional[_DeviceMemStack],
+    mem_stack: Optional[DeviceMemStack],
 ) -> cp.ndarray:
     """Convolution (equivalent pytorch.conv2d)"""
     b, ci, hi, wi = x.shape if not mem_stack else x
@@ -355,7 +332,7 @@ def _conv_transpose2d(
     stride: Tuple[int, int],
     pad: Tuple[int, int],
     groups: int,
-    mem_stack: Optional[_DeviceMemStack],
+    mem_stack: Optional[DeviceMemStack],
 ) -> cp.ndarray:
     """Transposed convolution (equivalent pytorch.conv_transpose2d)"""
     b, co, ho, wo = x.shape if not mem_stack else x
@@ -428,7 +405,7 @@ def _afb1d(
     h0: np.ndarray,
     h1: np.ndarray,
     dim: int,
-    mem_stack: Optional[_DeviceMemStack],
+    mem_stack: Optional[DeviceMemStack],
 ) -> cp.ndarray:
     """1D analysis filter bank (along one dimension only) of an image
 
@@ -476,7 +453,7 @@ def _sfb1d(
     g0: np.ndarray,
     g1: np.ndarray,
     dim: int,
-    mem_stack: Optional[_DeviceMemStack],
+    mem_stack: Optional[DeviceMemStack],
 ) -> cp.ndarray:
     """1D synthesis filter bank of an image Array"""
 
@@ -520,7 +497,7 @@ def __init__(self, wave: str):
         self.h1_row = np.array(h1_row).astype("float32")[::-1].reshape((1, 1, 1, -1))
 
     def apply(
-        self, x: cp.ndarray, mem_stack: Optional[_DeviceMemStack] = None
+        self, x: cp.ndarray, mem_stack: Optional[DeviceMemStack] = None
     ) -> Tuple[cp.ndarray, cp.ndarray]:
         """Forward pass of the DWT.
 
@@ -582,7 +559,7 @@ def __init__(self, wave: str):
     def apply(
         self,
         coeffs: Tuple[cp.ndarray, cp.ndarray],
-        mem_stack: Optional[_DeviceMemStack] = None,
+        mem_stack: Optional[DeviceMemStack] = None,
     ) -> cp.ndarray:
         """
         Args:
@@ -672,7 +649,7 @@ def remove_stripe_fw(
     sli_shape = [nz, 1, nproj_pad, ni]
 
     if calc_peak_gpu_mem:
-        mem_stack = _DeviceMemStack()
+        mem_stack = DeviceMemStack()
         # A data copy is assumed when invoking the function
         mem_stack.malloc(np.prod(data) * np.float32().itemsize)
         mem_stack.malloc(np.prod(sli_shape) * np.float32().itemsize)
@@ -845,7 +822,7 @@ def _detect_stripe(listdata, snr):
     listsorted = cp.sort(listdata)[::-1]
     xlist = cp.arange(0, numdata, 1.0)
     ndrop = cp.int16(0.25 * numdata)
-    (_slope, _intercept) = _mpolyfit(
+    _slope, _intercept = _mpolyfit(
         xlist[ndrop : -ndrop - 1], listsorted[ndrop : -ndrop - 1]
     )
 
@@ -869,7 +846,7 @@ def _rs_large(sinogram, snr, size, matindex, drop_ratio=0.1, norm=True):
     Remove large stripes.
     """
     drop_ratio = max(min(drop_ratio, 0.8), 0)  # = cp.clip(drop_ratio, 0.0, 0.8)
-    (nrow, ncol) = sinogram.shape
+    nrow, ncol = sinogram.shape
     ndrop = int(0.5 * drop_ratio * nrow)
     sinosort = cp.sort(sinogram, axis=0)
     sinosmooth = median_filter(sinosort, (1, size))
@@ -907,7 +884,7 @@ def _rs_large(sinogram, snr, size, matindex, drop_ratio=0.1, norm=True):
 def _rs_dead(sinogram, snr, size, matindex, norm=True):
     """remove unresponsive and fluctuating stripes"""
     sinogram = cp.copy(sinogram)  # Make it mutable
-    (nrow, _) = sinogram.shape
+    nrow, _ = sinogram.shape
     sinosmooth = uniform_filter1d(sinogram, 10, axis=0)
 
     listdiff = cp.sum(cp.abs(sinogram - sinosmooth), axis=0)
diff --git a/httomolibgpu/recon/algorithm.py b/httomolibgpu/recon/algorithm.py
index 8dd91339..74094d02 100644
--- a/httomolibgpu/recon/algorithm.py
+++ b/httomolibgpu/recon/algorithm.py
@@ -29,6 +29,7 @@
 from unittest.mock import Mock
 
 if cupy_run:
+    from tomobar.supp.memory_estimator_helpers import DeviceMemStack
     from tomobar.methodsDIR import RecToolsDIR
     from tomobar.methodsDIR_CuPy import RecToolsDIRCuPy
     from tomobar.methodsIR_CuPy import RecToolsIRCuPy
@@ -38,8 +39,7 @@
     RecToolsIRCuPy = Mock()
 
 from numpy import float32
-from typing import Optional, Type, Union
-
+from typing import Optional, Tuple, Type, Union
 
 __all__ = [
     "FBP2d_astra",
@@ -48,6 +48,7 @@
     "SIRT3d_tomobar",
     "CGLS3d_tomobar",
     "FISTA3d_tomobar",
+    "ADMM3d_tomobar",
 ]
 
 input_data_axis_labels = ["angles", "detY", "detX"]  # set the labels of the input data
@@ -192,7 +193,7 @@ def FBP3d_tomobar(
 
 ## %%%%%%%%%%%%%%%%%%%%%%% LPRec  %%%%%%%%%%%%%%%%%%%%%%%%%%%%  ##
 def LPRec3d_tomobar(
-    data: cp.ndarray,
+    data: cp.ndarray | Tuple[int, int, int],
     angles: np.ndarray,
     center: Optional[float] = None,
     detector_pad: Union[bool, int] = False,
@@ -204,6 +205,7 @@ def LPRec3d_tomobar(
     power_of_2_cropping: Optional[bool] = False,
     min_mem_usage_filter: Optional[bool] = True,
     min_mem_usage_ifft2: Optional[bool] = True,
+    **kwargs,
 ) -> cp.ndarray:
     """
     Fourier direct inversion in 3D on unequally spaced (also called as Log-Polar) grids using
@@ -232,6 +234,8 @@ def LPRec3d_tomobar(
         The radius of the circular mask that applies to the reconstructed slice in order to crop
         out some undesirable artifacts. The values outside the given diameter will be set to zero.
         To implement the cropping one can use the range [0.7-1.0] or set to 2.0 when no cropping required.
+    calc_peak_gpu_mem: bool
+        Parameter to support memory estimation in HTTomo. Irrelevant to the method itself and can be ignored by user.
 
     Returns
     -------
@@ -253,8 +257,14 @@ def LPRec3d_tomobar(
         power_of_2_cropping=power_of_2_cropping,
         min_mem_usage_filter=min_mem_usage_filter,
         min_mem_usage_ifft2=min_mem_usage_ifft2,
+        **kwargs,
     )
     cp._default_memory_pool.free_all_blocks()
+
+    mem_stack = DeviceMemStack.instance()
+    if mem_stack:
+        return mem_stack.highwater * 1.00625
+
     return cp.require(cp.swapaxes(reconstruction, 0, 1), requirements="C")
 
 
@@ -272,9 +282,7 @@ def SIRT3d_tomobar(
 ) -> cp.ndarray:
     """
     Perform Simultaneous Iterative Recostruction Technique (SIRT) using ASTRA toolbox :cite:`van2016fast` and
-    ToMoBAR :cite:`kazantsev2020tomographic` wrappers.
-    This is 3D recon directly from a CuPy array while using ASTRA GPUlink capability to avoid host-device
-    transactions for projection and backprojection.
+    ToMoBAR :cite:`kazantsev2020tomographic` wrappers. For more information see :ref:`method_SIRT3d_tomobar`.
 
     Parameters
     ----------
@@ -345,9 +353,7 @@ def CGLS3d_tomobar(
 ) -> cp.ndarray:
     """
     Perform Conjugate Gradient Least Squares (CGLS) using ASTRA toolbox :cite:`van2016fast` and
-    ToMoBAR :cite:`kazantsev2020tomographic` wrappers.
-    This is 3D recon directly from a CuPy array while using ASTRA GPUlink capability to avoid host-device
-    transactions for projection and backprojection.
+    ToMoBAR :cite:`kazantsev2020tomographic` wrappers. For more information see :ref:`method_CGLS3d_tomobar`.
 
     Parameters
     ----------
@@ -418,6 +424,7 @@ def FISTA3d_tomobar(
     """
     A Fast Iterative Shrinkage-Thresholding Algorithm :cite:`beck2009fast` with various types of regularisation or
     denoising operations :cite:`kazantsev2019ccpi` (currently accepts ROF_TV and PD_TV regularisations only).
+    For more information see :ref:`method_FISTA3d_tomobar`.
 
     Parameters
     ----------
@@ -489,9 +496,161 @@ def FISTA3d_tomobar(
     return cp.require(cp.swapaxes(reconstruction, 0, 1), requirements="C")
 
 
+## %%%%%%%%%%%%%%%%%%%%%%% ADMM reconstruction %%%%%%%%%%%%%%%%%%%%%%%%%%%%  ##
+def ADMM3d_tomobar(
+    data: cp.ndarray,
+    angles: np.ndarray,
+    center: Optional[float] = None,
+    detector_pad: Union[bool, int] = False,
+    recon_size: Optional[int] = None,
+    recon_mask_radius: float = 0.95,
+    iterations: int = 3,
+    subsets_number: int = 24,
+    initialisation: Optional[str] = "FBP",
+    ADMM_rho_const: float = 1.0,
+    ADMM_relax_par: float = 1.7,
+    regularisation_type: str = "PD_TV",
+    regularisation_parameter: float = 0.0025,
+    regularisation_iterations: int = 40,
+    regularisation_half_precision: bool = True,
+    nonnegativity: bool = False,
+    gpu_id: int = 0,
+) -> cp.ndarray:
+    """
+    An Alternating Direction Method of Multipliers method with various types of regularisation or
+    denoising operations :cite:`kazantsev2019ccpi` (currently accepts ROF_TV and PD_TV regularisations only).
+    For more information see :ref:`_method_ADMM3d_tomobar`.
+
+    Parameters
+    ----------
+    data : cp.ndarray
+        Projection data as a CuPy array.
+    angles : np.ndarray
+        An array of angles given in radians.
+    center : float, optional
+        The center of rotation (CoR).
+    detector_pad : bool, int
+        Detector width padding with edge values to remove circle/arc type artifacts in the reconstruction. Set to True to perform
+        an automated padding or specify a certain value as an integer.
+    recon_size : int, optional
+        The [recon_size, recon_size] shape of the reconstructed slice in pixels.
+        By default (None), the reconstructed size will be the dimension of the horizontal detector.
+    recon_mask_radius: float
+        The radius of the circular mask that applies to the reconstructed slice in order to crop
+        out some undesirable artifacts. The values outside the given diameter will be set to zero.
+        To implement the cropping one can use the range [0.7-1.0] or set to 2.0 when no cropping required.
+    iterations : int
+        The number of ADMM algorithm iterations. The recommended range is between 3 to 5 with initialisation and
+        more than 10 without. Assuming that the subsets_number is reasonably large (>12).
+    subsets_number: int
+        The number of the ordered subsets to accelerate convergence. The recommended range is between 12 to 24.
+    initialisation: str, optional
+        Initialise ADMM with the reconstructed image to reduce the number of iterations and accelerate. Choose between 'CGLS' when data
+        is noisy and undersampled, 'FBP' when data is of better quality (default) or None.
+    ADMM_rho_const: float
+        Convergence related parameter for ADMM, higher values lead to slower convergence, but too small values can destabilise the iterations.
+        Recommended range is between 0.9 and 2.0.
+    ADMM_relax_par: Relaxation parameter which can lead to acceleration of the algorithm, keep it in the range between 1.5 and 1.8 to avoid divergence.     regularisation_type: str
+        A method to use for regularisation. Currently PD_TV and ROF_TV are available.
+    regularisation_parameter: float
+        The main regularisation parameter to control the amount of smoothing/noise removal. Larger values lead to stronger smoothing.
+    regularisation_iterations: int
+        The number of iterations for regularisers (aka INNER iterations).
+    regularisation_half_precision: bool
+        Perform faster regularisation computation in half-precision with a very minimal sacrifice in quality.
+    nonnegativity : bool
+        Impose nonnegativity constraint (set to True) on the reconstructed image. Default False.
+    gpu_id : int
+        A GPU device index to perform operation on.
+
+    Returns
+    -------
+    cp.ndarray
+        The ADMM reconstructed volume as a CuPy array.
+    """
+    if initialisation not in ["FBP", "CGLS", None]:
+        raise ValueError(
+            "The acceptable values for initialisation are 'FBP','CGLS' and None"
+        )
+
+    if initialisation is not None:
+        if detector_pad == True:
+            detector_pad = __estimate_detectorHoriz_padding(data.shape[2])
+
+        if detector_pad > 0:
+            # if detector_pad is not zero we need to reconstruct the image on the recon+2*detector_pad size
+            recon_size = data.shape[2] + 2 * detector_pad
+
+        if initialisation == "FBP":
+            initialisation_vol = cp.require(
+                cp.swapaxes(
+                    FBP3d_tomobar(
+                        data,
+                        angles=angles,
+                        center=center,
+                        detector_pad=detector_pad,
+                        recon_size=recon_size,
+                        recon_mask_radius=recon_mask_radius,
+                    ),
+                    0,
+                    1,
+                ),
+                requirements="C",
+            )
+        elif initialisation == "CGLS":
+            initialisation_vol = cp.require(
+                cp.swapaxes(
+                    CGLS3d_tomobar(
+                        data,
+                        angles=angles,
+                        center=center,
+                        detector_pad=detector_pad,
+                        recon_size=recon_size,
+                        recon_mask_radius=recon_mask_radius,
+                        iterations=15,
+                    ),
+                    0,
+                    1,
+                ),
+                requirements="C",
+            )
+    else:
+        initialisation_vol = None
+
+    RecToolsCP = _instantiate_iterative_recon_class(
+        data, angles, center, detector_pad, recon_size, gpu_id, datafidelity="LS"
+    )
+
+    _data_ = {
+        "projection_norm_data": data,
+        "OS_number": subsets_number,
+        "data_axes_labels_order": input_data_axis_labels,
+    }
+
+    _algorithm_ = {
+        "initialise": initialisation_vol,
+        "iterations": iterations,
+        "nonnegativity": nonnegativity,
+        "recon_mask_radius": recon_mask_radius,
+        "ADMM_rho_const": ADMM_rho_const,
+        "ADMM_relax_par": ADMM_relax_par,
+    }
+
+    _regularisation_ = {
+        "method": regularisation_type,  # Selected regularisation method
+        "regul_param": regularisation_parameter,  # Regularisation parameter
+        "iterations": regularisation_iterations,  # The number of regularisation iterations
+        "half_precision": regularisation_half_precision,  # enabling half-precision calculation
+    }
+
+    reconstruction = RecToolsCP.ADMM(_data_, _algorithm_, _regularisation_)
+    cp._default_memory_pool.free_all_blocks()
+    return cp.require(cp.swapaxes(reconstruction, 0, 1), requirements="C")
+
+
 ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  ##
 def _instantiate_direct_recon_class(
-    data: cp.ndarray,
+    data: cp.ndarray | Tuple[int, int, int],
     angles: np.ndarray,
     center: Optional[float] = None,
     detector_pad: Union[bool, int] = False,
@@ -511,19 +670,22 @@ def _instantiate_direct_recon_class(
     Returns:
         Type[RecToolsDIRCuPy]: an instance of the direct recon class
     """
+
+    data_shape = data if isinstance(data, tuple) else data.shape
+
     if center is None:
-        center = data.shape[2] // 2  # making a crude guess
+        center = data_shape[2] // 2  # making a crude guess
     if recon_size is None:
-        recon_size = data.shape[2]
+        recon_size = data_shape[2]
     if detector_pad is True:
-        detector_pad = __estimate_detectorHoriz_padding(data.shape[2])
+        detector_pad = __estimate_detectorHoriz_padding(data_shape[2])
     elif detector_pad is False:
         detector_pad = 0
     RecToolsCP = RecToolsDIRCuPy(
-        DetectorsDimH=data.shape[2],  # Horizontal detector dimension
+        DetectorsDimH=data_shape[2],  # Horizontal detector dimension
         DetectorsDimH_pad=detector_pad,  # padding for horizontal detector
-        DetectorsDimV=data.shape[1],  # Vertical detector dimension (3D case)
-        CenterRotOffset=data.shape[2] / 2
+        DetectorsDimV=data_shape[1],  # Vertical detector dimension (3D case)
+        CenterRotOffset=data_shape[2] / 2
         - center
         - 0.5,  # Center of Rotation scalar or a vector
         AnglesVec=-angles,  # A vector of projection angles in radians
diff --git a/httomolibgpu/recon/rotation.py b/httomolibgpu/recon/rotation.py
index 615cdc5e..c3aa47d2 100644
--- a/httomolibgpu/recon/rotation.py
+++ b/httomolibgpu/recon/rotation.py
@@ -171,7 +171,7 @@ def find_center_vo(
 
 
 def _search_coarse(sino, smin, smax, ratio, drop):
-    (nrow, ncol) = sino.shape
+    nrow, ncol = sino.shape
     flip_sino = cp.ascontiguousarray(cp.fliplr(sino))
     comp_sino = cp.ascontiguousarray(cp.flipud(sino))
 
@@ -206,7 +206,7 @@ def _search_coarse(sino, smin, smax, ratio, drop):
 
 
 def _search_fine(sino, srad, step, init_cen, ratio, drop):
-    (nrow, ncol) = sino.shape
+    nrow, ncol = sino.shape
 
     flip_sino = cp.ascontiguousarray(cp.fliplr(sino))
     comp_sino = cp.ascontiguousarray(cp.flipud(sino))
@@ -397,7 +397,7 @@ def _downsample(image, dsp_fact0, dsp_fact1):
     ---------
         image_dsp : Downsampled image.
     """
-    (height, width) = image.shape
+    height, width = image.shape
     dsp_fact0 = cp.clip(cp.int16(dsp_fact0), 1, height // 2)
     dsp_fact1 = cp.clip(cp.int16(dsp_fact1), 1, width // 2)
     height_dsp = height // dsp_fact0
@@ -477,11 +477,11 @@ def find_center_360(
     else:
         _sino = data[:, ind, :]
 
-    (nrow, ncol) = _sino.shape
+    nrow, ncol = _sino.shape
     nrow_180 = nrow // 2 + 1
     sino_top = _sino[0:nrow_180, :]
     sino_bot = cp.fliplr(_sino[-nrow_180:, :])
-    (overlap, side, overlap_position) = _find_overlap(
+    overlap, side, overlap_position = _find_overlap(
         sino_top, sino_bot, win_width, side, denoise, norm, use_overlap
     )
     cor = ncol - overlap / 2
@@ -531,7 +531,7 @@ def _find_overlap(
     win_width = int(np.clip(win_width, 6, min(ncol1, ncol2) // 2))
 
     if side == "right":
-        (list_metric, offset) = _search_overlap(
+        list_metric, offset = _search_overlap(
             mat1,
             mat2,
             win_width,
@@ -544,7 +544,7 @@ def _find_overlap(
         overlap_position += offset
         overlap = ncol1 - overlap_position + win_width // 2
     elif side == "left":
-        (list_metric, offset) = _search_overlap(
+        list_metric, offset = _search_overlap(
             mat1,
             mat2,
             win_width,
@@ -557,7 +557,7 @@ def _find_overlap(
         overlap_position += offset
         overlap = overlap_position + win_width // 2
     else:
-        (list_metric1, offset1) = _search_overlap(
+        list_metric1, offset1 = _search_overlap(
             mat1,
             mat2,
             win_width,
@@ -566,7 +566,7 @@ def _find_overlap(
             norm=norm,
             use_overlap=use_overlap,
         )
-        (list_metric2, offset2) = _search_overlap(
+        list_metric2, offset2 = _search_overlap(
             mat1,
             mat2,
             win_width,
@@ -576,9 +576,9 @@ def _find_overlap(
             use_overlap=use_overlap,
         )
 
-        (curvature1, overlap_position1) = _calculate_curvature(list_metric1)
+        curvature1, overlap_position1 = _calculate_curvature(list_metric1)
         overlap_position1 += offset1
-        (curvature2, overlap_position2) = _calculate_curvature(list_metric2)
+        curvature2, overlap_position2 = _calculate_curvature(list_metric2)
         overlap_position2 += offset2
 
         if curvature1 > curvature2:
@@ -638,8 +638,8 @@ def _search_overlap(
         mat1 = cp.ascontiguousarray(mat1, dtype=cp.float32)
         mat2 = cp.ascontiguousarray(mat2, dtype=cp.float32)
 
-    (nrow1, ncol1) = mat1.shape
-    (nrow2, ncol2) = mat2.shape
+    nrow1, ncol1 = mat1.shape
+    nrow2, ncol2 = mat2.shape
 
     if nrow1 != nrow2:
         raise ValueError("Two images are not at the same height!!!")
diff --git a/tests/test_recon/test_algorithm.py b/tests/test_recon/test_algorithm.py
index 1c5ce4a1..91c85448 100644
--- a/tests/test_recon/test_algorithm.py
+++ b/tests/test_recon/test_algorithm.py
@@ -8,11 +8,13 @@
     SIRT3d_tomobar,
     CGLS3d_tomobar,
     FISTA3d_tomobar,
+    ADMM3d_tomobar,
 )
 from numpy.testing import assert_allclose
 import time
 import pytest
 from cupy.cuda import nvtx
+from ..conftest import MaxMemoryHook
 
 
 def test_reconstruct_FBP_2d_astra(data, flats, darks, ensure_clean_memory):
@@ -309,6 +311,180 @@ def test_reconstruct_FISTA3d_tomobar_rof_tv(data, flats, darks, ensure_clean_mem
     assert recon_data.dtype == np.float32
 
 
+def test_reconstruct_ADMM3d_tomobar_pd_tv(data, flats, darks, ensure_clean_memory):
+    objrecon_size = data.shape[2]
+    normalised_data = dark_flat_field_correction(data, flats, darks, cutoff=10)
+    normalised_data = minus_log(normalised_data)
+
+    recon_data = ADMM3d_tomobar(
+        data=normalised_data,
+        angles=np.linspace(0.0 * np.pi / 180.0, 180.0 * np.pi / 180.0, data.shape[0]),
+        center=79.5,
+        detector_pad=0,
+        recon_size=objrecon_size,
+        iterations=5,
+        subsets_number=24,
+        initialisation=None,
+        regularisation_type="PD_TV",
+        regularisation_parameter=0.004,
+        regularisation_iterations=50,
+        regularisation_half_precision=True,
+        nonnegativity=True,
+    )
+
+    assert recon_data.flags.c_contiguous
+    recon_data = cp.asnumpy(recon_data)
+    assert_allclose(np.min(recon_data), -7.5129734e-05, rtol=1e-06, atol=1e-6)
+    assert_allclose(np.mean(recon_data, axis=(0, 2)).sum(), 0.23559943, rtol=1e-04)
+    assert recon_data.dtype == np.float32
+
+
+def test_reconstruct_ADMM3d_warm_tomobar_pd_tv(data, flats, darks, ensure_clean_memory):
+    objrecon_size = data.shape[2]
+    normalised_data = dark_flat_field_correction(data, flats, darks, cutoff=10)
+    normalised_data = minus_log(normalised_data)
+
+    recon_data = ADMM3d_tomobar(
+        data=normalised_data,
+        angles=np.linspace(0.0 * np.pi / 180.0, 180.0 * np.pi / 180.0, data.shape[0]),
+        center=79.5,
+        detector_pad=0,
+        recon_size=objrecon_size,
+        iterations=2,
+        subsets_number=24,
+        initialisation="FBP",
+        regularisation_type="PD_TV",
+        regularisation_parameter=0.004,
+        regularisation_iterations=50,
+        regularisation_half_precision=True,
+        nonnegativity=False,
+    )
+
+    assert recon_data.flags.c_contiguous
+    recon_data = cp.asnumpy(recon_data)
+    assert_allclose(np.min(recon_data), -0.024014484, rtol=1e-07, atol=1e-6)
+    assert_allclose(np.mean(recon_data, axis=(0, 2)).sum(), 0.22771806, rtol=1e-04)
+    assert recon_data.dtype == np.float32
+
+
+def test_reconstruct_ADMM3d_warm2_tomobar_pd_tv(
+    data, flats, darks, ensure_clean_memory
+):
+    objrecon_size = data.shape[2]
+    normalised_data = dark_flat_field_correction(data, flats, darks, cutoff=10)
+    normalised_data = minus_log(normalised_data)
+
+    recon_data = ADMM3d_tomobar(
+        data=normalised_data,
+        angles=np.linspace(0.0 * np.pi / 180.0, 180.0 * np.pi / 180.0, data.shape[0]),
+        center=79.5,
+        detector_pad=0,
+        recon_size=objrecon_size,
+        iterations=2,
+        subsets_number=24,
+        initialisation="CGLS",
+        regularisation_type="PD_TV",
+        regularisation_parameter=0.0008,
+        regularisation_iterations=50,
+        regularisation_half_precision=True,
+        nonnegativity=False,
+    )
+
+    assert recon_data.flags.c_contiguous
+    recon_data = cp.asnumpy(recon_data)
+    assert_allclose(np.mean(recon_data), 0.001847589, rtol=1e-07, atol=1e-6)
+    assert_allclose(np.mean(recon_data, axis=(0, 2)).sum(), 0.236588, rtol=1e-04)
+    assert recon_data.dtype == np.float32
+
+
+def test_reconstruct_ADMM3d_tomobar_pd_tv_detpad_true(
+    data, flats, darks, ensure_clean_memory
+):
+    objrecon_size = data.shape[2]
+    normalised_data = dark_flat_field_correction(data, flats, darks, cutoff=10)
+    normalised_data = minus_log(normalised_data)
+
+    recon_data = ADMM3d_tomobar(
+        data=normalised_data,
+        angles=np.linspace(0.0 * np.pi / 180.0, 180.0 * np.pi / 180.0, data.shape[0]),
+        center=79.5,
+        detector_pad=True,
+        recon_size=objrecon_size,
+        recon_mask_radius=2.0,
+        iterations=5,
+        subsets_number=24,
+        initialisation=None,
+        regularisation_type="PD_TV",
+        regularisation_parameter=0.004,
+        regularisation_iterations=50,
+        regularisation_half_precision=True,
+        nonnegativity=True,
+    )
+
+    assert recon_data.flags.c_contiguous
+    recon_data = cp.asnumpy(recon_data)
+    assert_allclose(np.min(recon_data), -8.2713435e-05, rtol=1e-06, atol=1e-6)
+    assert_allclose(np.mean(recon_data, axis=(0, 2)).sum(), 0.235260687, rtol=1e-04)
+    assert recon_data.dtype == np.float32
+
+
+def test_reconstruct_ADMM3d_warm_tomobar_pd_tv_detpad_true(
+    data, flats, darks, ensure_clean_memory
+):
+    objrecon_size = data.shape[2]
+    normalised_data = dark_flat_field_correction(data, flats, darks, cutoff=10)
+    normalised_data = minus_log(normalised_data)
+
+    recon_data = ADMM3d_tomobar(
+        data=normalised_data,
+        angles=np.linspace(0.0 * np.pi / 180.0, 180.0 * np.pi / 180.0, data.shape[0]),
+        center=79.5,
+        detector_pad=True,
+        recon_size=objrecon_size,
+        recon_mask_radius=2.0,
+        initialisation="FBP",
+        iterations=2,
+        subsets_number=24,
+        regularisation_type="PD_TV",
+        regularisation_parameter=0.004,
+        regularisation_iterations=50,
+        regularisation_half_precision=True,
+        nonnegativity=False,
+    )
+
+    assert recon_data.flags.c_contiguous
+    recon_data = cp.asnumpy(recon_data)
+    assert_allclose(np.min(recon_data), -0.024650197, rtol=1e-07, atol=1e-6)
+    assert_allclose(np.mean(recon_data, axis=(0, 2)).sum(), 0.08829363, rtol=1e-04)
+    assert recon_data.dtype == np.float32
+
+
+def test_reconstruct_ADMM3d_tomobar_rof_tv(data, flats, darks, ensure_clean_memory):
+    objrecon_size = data.shape[2]
+    normalised_data = dark_flat_field_correction(data, flats, darks, cutoff=10)
+    normalised_data = minus_log(normalised_data)
+
+    recon_data = ADMM3d_tomobar(
+        data=normalised_data,
+        angles=np.linspace(0.0 * np.pi / 180.0, 180.0 * np.pi / 180.0, data.shape[0]),
+        center=79.5,
+        recon_size=objrecon_size,
+        iterations=3,
+        subsets_number=24,
+        regularisation_type="ROF_TV",
+        regularisation_parameter=0.01,
+        regularisation_iterations=50,
+        regularisation_half_precision=True,
+        nonnegativity=False,
+    )
+
+    assert recon_data.flags.c_contiguous
+    recon_data = cp.asnumpy(recon_data)
+    assert_allclose(np.mean(recon_data), 0.0017946999, rtol=1e-07, atol=1e-6)
+    assert_allclose(np.mean(recon_data, axis=(0, 2)).sum(), 0.22972171, rtol=1e-04)
+    assert recon_data.dtype == np.float32
+
+
 @pytest.mark.perf
 def test_FBP3d_tomobar_performance(ensure_clean_memory):
     dev = cp.cuda.Device()
diff --git a/tests/test_recon/test_rotation.py b/tests/test_recon/test_rotation.py
index bc7e4f4f..649128fd 100644
--- a/tests/test_recon/test_rotation.py
+++ b/tests/test_recon/test_rotation.py
@@ -92,7 +92,7 @@ def test_find_center_vo_performance():
 
 def test_find_center_360_data(data):
     eps = 1e-5
-    (cor, overlap, side, overlap_pos) = find_center_360(data, norm=True, denoise=False)
+    cor, overlap, side, overlap_pos = find_center_360(data, norm=True, denoise=False)
 
     assert_allclose(cor, 133.453167, rtol=eps)
     assert_allclose(overlap, 53.093666, rtol=eps)
diff --git a/zenodo-tests/conftest.py b/zenodo-tests/conftest.py
index 0f237a39..7140eebe 100644
--- a/zenodo-tests/conftest.py
+++ b/zenodo-tests/conftest.py
@@ -231,3 +231,20 @@ def force_clean_gpu_memory():
     gc.collect()
     cp.get_default_memory_pool().free_all_blocks()
     cp.get_default_pinned_memory_pool().free_all_blocks()
+
+
+class MaxMemoryHook(cp.cuda.MemoryHook):
+    def __init__(self, initial=0):
+        self.max_mem = initial
+        self.current = initial
+
+    def malloc_postprocess(
+        self, device_id: int, size: int, mem_size: int, mem_ptr: int, pmem_id: int
+    ):
+        self.current += mem_size
+        self.max_mem = max(self.max_mem, self.current)
+
+    def free_postprocess(
+        self, device_id: int, mem_size: int, mem_ptr: int, pmem_id: int
+    ):
+        self.current -= mem_size
diff --git a/zenodo-tests/test_recon/test_algorithm.py b/zenodo-tests/test_recon/test_algorithm.py
index 9803cd4a..717c7974 100644
--- a/zenodo-tests/test_recon/test_algorithm.py
+++ b/zenodo-tests/test_recon/test_algorithm.py
@@ -13,13 +13,14 @@
     CGLS3d_tomobar,
     SIRT3d_tomobar,
     FISTA3d_tomobar,
+    ADMM3d_tomobar,
 )
 from httomolibgpu.misc.morph import sino_360_to_180
 from numpy.testing import assert_allclose
 import time
 import pytest
 from cupy.cuda import nvtx
-from conftest import force_clean_gpu_memory
+from conftest import force_clean_gpu_memory, MaxMemoryHook
 
 
 def test_reconstruct_FBP2d_astra_i12_dataset1(i12_dataset1: tuple):
@@ -543,3 +544,46 @@ def test_reconstruct_FISTA3d_tomobar_autopad_k11_dataset2(k11_dataset2: tuple):
     assert isclose(np.sum(recon_data), 1355.4624, abs_tol=10**-3)
     assert recon_data.dtype == np.float32
     assert recon_data.shape == (2560, 5, 2560)
+
+
+def test_reconstruct_ADMM3d_tomobar_autopad_k11_dataset2(k11_dataset2: tuple):
+    force_clean_gpu_memory()
+    projdata = k11_dataset2[0]
+    angles = k11_dataset2[1]
+    flats = k11_dataset2[2]
+    darks = k11_dataset2[3]
+    del k11_dataset2
+
+    data_normalised = dark_flat_field_correction(projdata, flats, darks, cutoff=10)
+    data_normalised = minus_log(data_normalised)
+
+    del flats, darks, projdata
+    force_clean_gpu_memory()
+    data = data_normalised[:, 5:10, :]
+
+    args = {
+        "angles": np.deg2rad(angles),
+        "center": 1280.25,
+        "detector_pad": True,
+        "recon_mask_radius": 2.0,
+        "iterations": 7,
+        "regularisation_parameter": 0.0000005,
+    }
+
+    peak_mem = ADMM3d_tomobar(data=data.shape, calc_peak_gpu_mem=True, **args)
+    av_mem = cp.cuda.Device().mem_info[0]
+    if av_mem < peak_mem:
+        pytest.skip("Not enough GPU memory to run this test")
+
+    hook = MaxMemoryHook()
+    with hook:
+        recon_data = ADMM3d_tomobar(data=data, **args)
+
+    assert (hook.max_mem * 1.03) > peak_mem
+    assert (hook.max_mem * 0.99) < peak_mem
+
+    assert recon_data.flags.c_contiguous
+    recon_data = cp.asnumpy(recon_data)
+    assert isclose(np.sum(recon_data), 1228.5009, abs_tol=10**-3)
+    assert recon_data.dtype == np.float32
+    assert recon_data.shape == (2560, 5, 2560)

From 213169367caa29b6589dd27cdfcbc1810f3219c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ferenc=20T=C3=BCk=C3=B6r?= <ferenc.tukor@radwaytech.com>
Date: Mon, 16 Feb 2026 22:49:51 +0100
Subject: [PATCH 2/2] Turn off Astra initialisation for LPRec

---
 httomolibgpu/misc/morph.py                | 20 +++++++++----
 httomolibgpu/recon/algorithm.py           |  6 ++--
 httomolibgpu/recon/rotation.py            |  3 +-
 tests/test_prep/test_phase.py             | 12 ++++++--
 tests/test_prep/test_stripe.py            | 12 ++++++--
 tests/test_recon/test_rotation.py         |  2 +-
 zenodo-tests/test_misc/test_morph.py      |  6 ++--
 zenodo-tests/test_prep/test_stripe.py     |  6 ++--
 zenodo-tests/test_recon/test_algorithm.py | 36 ++++++++---------------
 zenodo-tests/test_recon/test_rotation.py  |  9 ++----
 10 files changed, 63 insertions(+), 49 deletions(-)

diff --git a/httomolibgpu/misc/morph.py b/httomolibgpu/misc/morph.py
index ff49d5f0..c86280f6 100644
--- a/httomolibgpu/misc/morph.py
+++ b/httomolibgpu/misc/morph.py
@@ -69,10 +69,12 @@ def sino_360_to_180(
     dx, dy, dz = data.shape
 
     overlap = int(np.round(overlap))
-    if overlap >= dz:
-        raise ValueError("Overlap must be less than data.shape[2]")
-    if overlap < 0:
-        raise ValueError("Only positive overlaps are allowed.")
+    if overlap >= dz - 1:
+        raise ValueError("Overlap must be less than size of the horizontal detector")
+    if overlap <= 0:
+        raise ValueError("Only positive non-zero overlaps are allowed.")
+    if overlap % 2 != 0:
+        overlap += 1
 
     if side not in ["left", "right"]:
         raise ValueError(
@@ -100,7 +102,15 @@ def sino_360_to_180(
             + (weights * data[n : 2 * n, :, -overlap:])[:, :, ::-1]
         )
 
-    return out
+    return cp.pad(
+        out,
+        pad_width=(
+            (0, 0),
+            (0, 0),
+            (overlap // 2, overlap // 2),
+        ),
+        mode="edge",
+    )
 
 
 def data_resampler(
diff --git a/httomolibgpu/recon/algorithm.py b/httomolibgpu/recon/algorithm.py
index 74094d02..17354349 100644
--- a/httomolibgpu/recon/algorithm.py
+++ b/httomolibgpu/recon/algorithm.py
@@ -39,7 +39,7 @@
     RecToolsIRCuPy = Mock()
 
 from numpy import float32
-from typing import Optional, Tuple, Type, Union
+from typing import Literal, Optional, Tuple, Type, Union
 
 __all__ = [
     "FBP2d_astra",
@@ -244,7 +244,7 @@ def LPRec3d_tomobar(
     """
 
     RecToolsCP = _instantiate_direct_recon_class(
-        data, angles, center, detector_pad, recon_size, 0
+        data, angles, center, detector_pad, recon_size, 0, "fourier"
     )
 
     reconstruction = RecToolsCP.FOURIER_INV(
@@ -656,6 +656,7 @@ def _instantiate_direct_recon_class(
     detector_pad: Union[bool, int] = False,
     recon_size: Optional[int] = None,
     gpu_id: int = 0,
+    projector: Literal["fourier", "astra"] = "astra",
 ) -> Type:
     """instantiate ToMoBAR's direct recon class
 
@@ -690,6 +691,7 @@ def _instantiate_direct_recon_class(
         - 0.5,  # Center of Rotation scalar or a vector
         AnglesVec=-angles,  # A vector of projection angles in radians
         ObjSize=recon_size,  # Reconstructed object dimensions (scalar)
+        projector=projector,
         device_projector=gpu_id,
     )
     return RecToolsCP
diff --git a/httomolibgpu/recon/rotation.py b/httomolibgpu/recon/rotation.py
index c3aa47d2..e02595ef 100644
--- a/httomolibgpu/recon/rotation.py
+++ b/httomolibgpu/recon/rotation.py
@@ -484,9 +484,8 @@ def find_center_360(
     overlap, side, overlap_position = _find_overlap(
         sino_top, sino_bot, win_width, side, denoise, norm, use_overlap
     )
-    cor = ncol - overlap / 2
 
-    return cor, overlap, side, overlap_position
+    return ncol, overlap, side, overlap_position
 
 
 def _find_overlap(
diff --git a/tests/test_prep/test_phase.py b/tests/test_prep/test_phase.py
index 128bcfde..958e9baa 100644
--- a/tests/test_prep/test_phase.py
+++ b/tests/test_prep/test_phase.py
@@ -197,9 +197,17 @@ def test_paganin_filter_calc_mem_big(slices, dims, padding, ensure_clean_memory)
             pytest.skip("Not usable FFT size")
         else:
             raise
-    av_mem = cp.cuda.Device().mem_info[0]
+
+    # the estimation may have reserved blocks in the pool
+    cp.get_default_memory_pool().free_all_blocks()
+
+    av_mem = int(
+        cp.cuda.Device().mem_info[0] * 0.9
+    )  # margin is applied to counter fragmentation and OS reservations
     if av_mem < estimated_mem_peak:
-        pytest.skip("Not enough GPU memory to run this test")
+        pytest.skip(
+            f"Not enough GPU memory ({av_mem/1024**3:.2f} GB) to run this test: {estimated_mem_peak/1024**3:.2f} GB needed"
+        )
 
     hook = MaxMemoryHook()
     with hook:
diff --git a/tests/test_prep/test_stripe.py b/tests/test_prep/test_stripe.py
index 23713961..aa21b44f 100644
--- a/tests/test_prep/test_stripe.py
+++ b/tests/test_prep/test_stripe.py
@@ -125,9 +125,17 @@ def test_remove_stripe_fw_calc_mem_big(wname, slices, level, dims, ensure_clean_
         )
     except cp.cuda.memory.OutOfMemoryError:
         pytest.skip("Not enough GPU memory to estimate memory peak")
-    av_mem = cp.cuda.Device().mem_info[0]
+
+    # the estimation may have reserved blocks in the pool
+    cp.get_default_memory_pool().free_all_blocks()
+
+    av_mem = int(
+        cp.cuda.Device().mem_info[0] * 0.9
+    )  # margin is applied to counter fragmentation and OS reservations
     if av_mem < estimated_mem_peak:
-        pytest.skip("Not enough GPU memory to run this test")
+        pytest.skip(
+            f"Not enough GPU memory ({av_mem/1024**3:.2f} GB) to run this test: {estimated_mem_peak/1024**3:.2f} GB needed"
+        )
 
     hook = MaxMemoryHook()
     with hook:
diff --git a/tests/test_recon/test_rotation.py b/tests/test_recon/test_rotation.py
index 649128fd..0c62a314 100644
--- a/tests/test_recon/test_rotation.py
+++ b/tests/test_recon/test_rotation.py
@@ -94,7 +94,7 @@ def test_find_center_360_data(data):
     eps = 1e-5
     cor, overlap, side, overlap_pos = find_center_360(data, norm=True, denoise=False)
 
-    assert_allclose(cor, 133.453167, rtol=eps)
+    assert_allclose(cor, 160, rtol=eps)
     assert_allclose(overlap, 53.093666, rtol=eps)
     assert side == "right"
     assert_allclose(overlap_pos, 111.906334, rtol=eps)
diff --git a/zenodo-tests/test_misc/test_morph.py b/zenodo-tests/test_misc/test_morph.py
index da00b56d..4b5e7366 100644
--- a/zenodo-tests/test_misc/test_morph.py
+++ b/zenodo-tests/test_misc/test_morph.py
@@ -21,7 +21,7 @@ def test_sino_360_to_180_i13_dataset1(i13_dataset1):
     )
     stiched_data_180degrees = cp.asnumpy(stiched_data_180degrees)
 
-    assert stiched_data_180degrees.shape == (3000, 10, 4646)
+    assert stiched_data_180degrees.shape == (3000, 10, 5120)
     assert stiched_data_180degrees.dtype == np.float32
     assert stiched_data_180degrees.flags.c_contiguous
 
@@ -41,7 +41,7 @@ def test_sino_360_to_180_i13_dataset3(i13_dataset3):
     )
     stiched_data_180degrees = cp.asnumpy(stiched_data_180degrees)
 
-    assert stiched_data_180degrees.shape == (3000, 3, 4682)
+    assert stiched_data_180degrees.shape == (3000, 3, 5120)
     assert stiched_data_180degrees.dtype == np.float32
     assert stiched_data_180degrees.flags.c_contiguous
 
@@ -61,6 +61,6 @@ def test_sino_360_to_180_i12_dataset5(i12_dataset5):
     )
     stiched_data_180degrees = cp.asnumpy(stiched_data_180degrees)
 
-    assert stiched_data_180degrees.shape == (1800, 15, 4933)
+    assert stiched_data_180degrees.shape == (1800, 15, 5120)
     assert stiched_data_180degrees.dtype == np.float32
     assert stiched_data_180degrees.flags.c_contiguous
diff --git a/zenodo-tests/test_prep/test_stripe.py b/zenodo-tests/test_prep/test_stripe.py
index 847e67b2..c22e5b3e 100644
--- a/zenodo-tests/test_prep/test_stripe.py
+++ b/zenodo-tests/test_prep/test_stripe.py
@@ -112,19 +112,19 @@ def test_remove_stripe_ti_i12_dataset4(
             "i12_dataset4",
             0.01,
             7,
-            52.4856,
+            67.9479,
         ),
         (
             "i12_dataset4",
             0.3,
             5,
-            53.7807,
+            68.80203,
         ),
         (
             "i12_dataset4",
             1.0,
             10,
-            262.2167,
+            873.24664,
         ),
     ],
     ids=["case_001", "case_003", "case_006"],
diff --git a/zenodo-tests/test_recon/test_algorithm.py b/zenodo-tests/test_recon/test_algorithm.py
index 717c7974..7523edd4 100644
--- a/zenodo-tests/test_recon/test_algorithm.py
+++ b/zenodo-tests/test_recon/test_algorithm.py
@@ -228,7 +228,7 @@ def test_reconstruct_LPRec_tomobar_i13_dataset1(i13_dataset1: tuple):
     recon_data = LPRec3d_tomobar(
         data=stiched_data_180degrees,
         angles=np.deg2rad(angles[0:3000]),
-        center=2322.08,
+        center=2560,
         detector_pad=False,
         filter_type="shepp",
         filter_freq_cutoff=1.0,
@@ -237,9 +237,9 @@ def test_reconstruct_LPRec_tomobar_i13_dataset1(i13_dataset1: tuple):
 
     assert recon_data.flags.c_contiguous
     recon_data = cp.asnumpy(recon_data)
-    assert int(np.sum(recon_data)) == 1149
+    assert int(np.sum(recon_data)) == 1142
     assert recon_data.dtype == np.float32
-    assert recon_data.shape == (4646, 1, 4646)
+    assert recon_data.shape == (5120, 1, 5120)
 
 
 @pytest.mark.perf
@@ -382,7 +382,7 @@ def test_reconstruct_FBP3d_tomobar_i13_dataset3(i13_dataset3: tuple):
     recon_data = FBP3d_tomobar(
         stiched_data_180degrees,
         np.deg2rad(angles[0:3000]),
-        center=2339,
+        center=2560,
         detector_pad=0,
         filter_freq_cutoff=0.35,
     )
@@ -391,7 +391,7 @@ def test_reconstruct_FBP3d_tomobar_i13_dataset3(i13_dataset3: tuple):
     recon_data = cp.asnumpy(recon_data)
 
     assert recon_data.dtype == np.float32
-    assert recon_data.shape == (4682, 3, 4682)
+    assert recon_data.shape == (5120, 3, 5120)
 
 
 def test_reconstruct_FBP3d_tomobar_i12_dataset5(i12_dataset5: tuple):
@@ -416,7 +416,7 @@ def test_reconstruct_FBP3d_tomobar_i12_dataset5(i12_dataset5: tuple):
     recon_data = FBP3d_tomobar(
         stiched_data_180degrees,
         np.deg2rad(angles[0:1800]),
-        center=2466,
+        center=2560,
         detector_pad=0,
         filter_freq_cutoff=0.35,
     )
@@ -425,7 +425,7 @@ def test_reconstruct_FBP3d_tomobar_i12_dataset5(i12_dataset5: tuple):
     recon_data = cp.asnumpy(recon_data)
 
     assert recon_data.dtype == np.float32
-    assert recon_data.shape == (4933, 15, 4933)
+    assert recon_data.shape == (5120, 15, 5120)
 
 
 def test_reconstruct_LPRec3d_tomobar_k11_dataset2(k11_dataset2: tuple):
@@ -559,31 +559,21 @@ def test_reconstruct_ADMM3d_tomobar_autopad_k11_dataset2(k11_dataset2: tuple):
 
     del flats, darks, projdata
     force_clean_gpu_memory()
-    data = data_normalised[:, 5:10, :]
+    data = data_normalised[:, 5:8, :]
 
     args = {
         "angles": np.deg2rad(angles),
         "center": 1280.25,
         "detector_pad": True,
         "recon_mask_radius": 2.0,
-        "iterations": 7,
-        "regularisation_parameter": 0.0000005,
+        "iterations": 3,
+        "regularisation_parameter": 0.1,
     }
 
-    peak_mem = ADMM3d_tomobar(data=data.shape, calc_peak_gpu_mem=True, **args)
-    av_mem = cp.cuda.Device().mem_info[0]
-    if av_mem < peak_mem:
-        pytest.skip("Not enough GPU memory to run this test")
-
-    hook = MaxMemoryHook()
-    with hook:
-        recon_data = ADMM3d_tomobar(data=data, **args)
-
-    assert (hook.max_mem * 1.03) > peak_mem
-    assert (hook.max_mem * 0.99) < peak_mem
+    recon_data = ADMM3d_tomobar(data=data, **args)
 
     assert recon_data.flags.c_contiguous
     recon_data = cp.asnumpy(recon_data)
-    assert isclose(np.sum(recon_data), 1228.5009, abs_tol=10**-3)
+    assert isclose(np.sum(recon_data), 2275.893, abs_tol=10**-3)
     assert recon_data.dtype == np.float32
-    assert recon_data.shape == (2560, 5, 2560)
+    assert recon_data.shape == (4150, 3, 4150)
diff --git a/zenodo-tests/test_recon/test_rotation.py b/zenodo-tests/test_recon/test_rotation.py
index b5355245..4a9a5d11 100644
--- a/zenodo-tests/test_recon/test_rotation.py
+++ b/zenodo-tests/test_recon/test_rotation.py
@@ -209,10 +209,9 @@ def test_center_360_i12_dataset5(i12_dataset5, ensure_clean_memory):
     ensure_clean_memory
     cor, overlap, side, overlap_position = find_center_360(data_normalised)
 
-    assert int(cor) == 2466
+    assert int(cor) == 2560
     assert side == "left"
     assert int(overlap) == 186
-    assert cor.dtype == np.float64
 
 
 # ----------------------------------------------------------#
@@ -230,10 +229,9 @@ def test_center_360_i13_dataset1(i13_dataset1, ensure_clean_memory):
     ensure_clean_memory
     cor, overlap, side, overlap_position = find_center_360(data_normalised)
 
-    assert int(cor) == 2323
+    assert int(cor) == 2560
     assert side == "right"
     assert int(overlap) == 473  # actual 473.822265625
-    assert cor.dtype == np.float64
 
 
 # ----------------------------------------------------------#
@@ -259,7 +257,6 @@ def test_center_360_i13_dataset3(i13_dataset3, ensure_clean_memory):
         use_overlap=True,
     )
 
-    assert int(cor) == 2340
+    assert int(cor) == 2560
     assert side == "left"
     assert int(overlap) == 438  # actual 438.173828
-    assert cor.dtype == np.float64