diff --git a/httomolibgpu/memory_estimator_helpers.py b/httomolibgpu/memory_estimator_helpers.py deleted file mode 100644 index 4c63a188..00000000 --- a/httomolibgpu/memory_estimator_helpers.py +++ /dev/null @@ -1,24 +0,0 @@ -ALLOCATION_UNIT_SIZE = 512 - - -class _DeviceMemStack: - def __init__(self) -> None: - self.allocations = [] - self.current = 0 - self.highwater = 0 - - def malloc(self, bytes): - self.allocations.append(bytes) - allocated = self._round_up(bytes) - self.current += allocated - self.highwater = max(self.current, self.highwater) - - def free(self, bytes): - assert bytes in self.allocations - self.allocations.remove(bytes) - self.current -= self._round_up(bytes) - assert self.current >= 0 - - def _round_up(self, size): - size = (size + ALLOCATION_UNIT_SIZE - 1) // ALLOCATION_UNIT_SIZE - return size * ALLOCATION_UNIT_SIZE diff --git a/httomolibgpu/prep/phase.py b/httomolibgpu/prep/phase.py index 41f8468a..9dc01335 100644 --- a/httomolibgpu/prep/phase.py +++ b/httomolibgpu/prep/phase.py @@ -22,7 +22,7 @@ import numpy as np from httomolibgpu import cupywrapper -from httomolibgpu.memory_estimator_helpers import _DeviceMemStack +from tomobar.supp.memory_estimator_helpers import DeviceMemStack cp = cupywrapper.cp cupy_run = cupywrapper.cupy_run @@ -91,7 +91,7 @@ def paganin_filter( cp.ndarray The 3D array of Paganin phase-filtered projection images. """ - mem_stack = _DeviceMemStack() if calc_peak_gpu_mem else None + mem_stack = DeviceMemStack() if calc_peak_gpu_mem else None # Check the input data is valid if not mem_stack and tomo.ndim != 3: raise ValueError( @@ -301,7 +301,7 @@ def _pad_projections( "next_power_of_2", "next_fast_length", "use_pad_x_y" ], pad_x_y: Optional[list], - mem_stack: Optional[_DeviceMemStack], + mem_stack: Optional[DeviceMemStack], ) -> Tuple[cp.ndarray, Tuple[int, int]]: """ Performs padding of each projection to a size optimal for FFT. diff --git a/httomolibgpu/prep/stripe.py b/httomolibgpu/prep/stripe.py index 3418b89a..14e05b55 100644 --- a/httomolibgpu/prep/stripe.py +++ b/httomolibgpu/prep/stripe.py @@ -30,6 +30,7 @@ from unittest.mock import Mock if cupy_run: + from tomobar.supp.memory_estimator_helpers import DeviceMemStack from cupyx.scipy.ndimage import median_filter, binary_dilation, uniform_filter1d from cupyx.scipy.fft import fft2, ifft2, fftshift from cupyx.scipy.fftpack import get_fft_plan @@ -204,32 +205,8 @@ def _reflect(x: np.ndarray, minx: float, maxx: float) -> np.ndarray: return np.array(out, dtype=x.dtype) -class _DeviceMemStack: - def __init__(self) -> None: - self.allocations = [] - self.current = 0 - self.highwater = 0 - - def malloc(self, bytes): - self.allocations.append(bytes) - allocated = self._round_up(bytes) - self.current += allocated - self.highwater = max(self.current, self.highwater) - - def free(self, bytes): - assert bytes in self.allocations - self.allocations.remove(bytes) - self.current -= self._round_up(bytes) - assert self.current >= 0 - - def _round_up(self, size): - ALLOCATION_UNIT_SIZE = 512 - size = (size + ALLOCATION_UNIT_SIZE - 1) // ALLOCATION_UNIT_SIZE - return size * ALLOCATION_UNIT_SIZE - - def _mypad( - x: cp.ndarray, pad: Tuple[int, int, int, int], mem_stack: Optional[_DeviceMemStack] + x: cp.ndarray, pad: Tuple[int, int, int, int], mem_stack: Optional[DeviceMemStack] ) -> cp.ndarray: """Function to do numpy like padding on Arrays. Only works for 2-D padding. @@ -272,7 +249,7 @@ def _conv2d( w: np.ndarray, stride: Tuple[int, int], groups: int, - mem_stack: Optional[_DeviceMemStack], + mem_stack: Optional[DeviceMemStack], ) -> cp.ndarray: """Convolution (equivalent pytorch.conv2d)""" b, ci, hi, wi = x.shape if not mem_stack else x @@ -355,7 +332,7 @@ def _conv_transpose2d( stride: Tuple[int, int], pad: Tuple[int, int], groups: int, - mem_stack: Optional[_DeviceMemStack], + mem_stack: Optional[DeviceMemStack], ) -> cp.ndarray: """Transposed convolution (equivalent pytorch.conv_transpose2d)""" b, co, ho, wo = x.shape if not mem_stack else x @@ -428,7 +405,7 @@ def _afb1d( h0: np.ndarray, h1: np.ndarray, dim: int, - mem_stack: Optional[_DeviceMemStack], + mem_stack: Optional[DeviceMemStack], ) -> cp.ndarray: """1D analysis filter bank (along one dimension only) of an image @@ -476,7 +453,7 @@ def _sfb1d( g0: np.ndarray, g1: np.ndarray, dim: int, - mem_stack: Optional[_DeviceMemStack], + mem_stack: Optional[DeviceMemStack], ) -> cp.ndarray: """1D synthesis filter bank of an image Array""" @@ -520,7 +497,7 @@ def __init__(self, wave: str): self.h1_row = np.array(h1_row).astype("float32")[::-1].reshape((1, 1, 1, -1)) def apply( - self, x: cp.ndarray, mem_stack: Optional[_DeviceMemStack] = None + self, x: cp.ndarray, mem_stack: Optional[DeviceMemStack] = None ) -> Tuple[cp.ndarray, cp.ndarray]: """Forward pass of the DWT. @@ -582,7 +559,7 @@ def __init__(self, wave: str): def apply( self, coeffs: Tuple[cp.ndarray, cp.ndarray], - mem_stack: Optional[_DeviceMemStack] = None, + mem_stack: Optional[DeviceMemStack] = None, ) -> cp.ndarray: """ Args: @@ -672,7 +649,7 @@ def remove_stripe_fw( sli_shape = [nz, 1, nproj_pad, ni] if calc_peak_gpu_mem: - mem_stack = _DeviceMemStack() + mem_stack = DeviceMemStack() # A data copy is assumed when invoking the function mem_stack.malloc(np.prod(data) * np.float32().itemsize) mem_stack.malloc(np.prod(sli_shape) * np.float32().itemsize) diff --git a/httomolibgpu/recon/algorithm.py b/httomolibgpu/recon/algorithm.py index 83c9c6c4..7298ed2f 100644 --- a/httomolibgpu/recon/algorithm.py +++ b/httomolibgpu/recon/algorithm.py @@ -29,6 +29,7 @@ from unittest.mock import Mock if cupy_run: + from tomobar.supp.memory_estimator_helpers import DeviceMemStack from tomobar.methodsDIR import RecToolsDIR from tomobar.methodsDIR_CuPy import RecToolsDIRCuPy from tomobar.methodsIR_CuPy import RecToolsIRCuPy @@ -38,7 +39,7 @@ RecToolsIRCuPy = Mock() from numpy import float32 -from typing import Optional, Type, Union +from typing import Literal, Optional, Tuple, Type, Union __all__ = [ "FBP2d_astra", @@ -192,7 +193,7 @@ def FBP3d_tomobar( ## %%%%%%%%%%%%%%%%%%%%%%% LPRec %%%%%%%%%%%%%%%%%%%%%%%%%%%% ## def LPRec3d_tomobar( - data: cp.ndarray, + data: cp.ndarray | Tuple[int, int, int], angles: np.ndarray, center: Optional[float] = None, detector_pad: Union[bool, int] = False, @@ -204,6 +205,7 @@ def LPRec3d_tomobar( power_of_2_cropping: Optional[bool] = False, min_mem_usage_filter: Optional[bool] = True, min_mem_usage_ifft2: Optional[bool] = True, + **kwargs, ) -> cp.ndarray: """ Fourier direct inversion in 3D on unequally spaced (also called as Log-Polar) grids using @@ -232,6 +234,8 @@ def LPRec3d_tomobar( The radius of the circular mask that applies to the reconstructed slice in order to crop out some undesirable artifacts. The values outside the given diameter will be set to zero. To implement the cropping one can use the range [0.7-1.0] or set to 2.0 when no cropping required. + calc_peak_gpu_mem: bool + Parameter to support memory estimation in HTTomo. Irrelevant to the method itself and can be ignored by user. Returns ------- @@ -240,7 +244,7 @@ def LPRec3d_tomobar( """ RecToolsCP = _instantiate_direct_recon_class( - data, angles, center, detector_pad, recon_size, 0 + data, angles, center, detector_pad, recon_size, 0, "fourier" ) reconstruction = RecToolsCP.FOURIER_INV( @@ -253,8 +257,14 @@ def LPRec3d_tomobar( power_of_2_cropping=power_of_2_cropping, min_mem_usage_filter=min_mem_usage_filter, min_mem_usage_ifft2=min_mem_usage_ifft2, + **kwargs, ) cp._default_memory_pool.free_all_blocks() + + mem_stack = DeviceMemStack.instance() + if mem_stack: + return mem_stack.highwater * 1.00625 + return cp.require(cp.swapaxes(reconstruction, 0, 1), requirements="C") @@ -645,7 +655,160 @@ def _instantiate_direct_recon_class( center: Optional[float] = None, detector_pad: Union[bool, int] = False, recon_size: Optional[int] = None, + recon_mask_radius: float = 0.95, + iterations: int = 3, + subsets_number: int = 24, + initialisation: Optional[str] = "FBP", + ADMM_rho_const: float = 1.0, + ADMM_relax_par: float = 1.7, + regularisation_type: str = "PD_TV", + regularisation_parameter: float = 0.0025, + regularisation_iterations: int = 40, + regularisation_half_precision: bool = True, + nonnegativity: bool = False, gpu_id: int = 0, +) -> cp.ndarray: + """ + An Alternating Direction Method of Multipliers method with various types of regularisation or + denoising operations :cite:`kazantsev2019ccpi` (currently accepts ROF_TV and PD_TV regularisations only). + For more information see :ref:`_method_ADMM3d_tomobar`. + + Parameters + ---------- + data : cp.ndarray + Projection data as a CuPy array. + angles : np.ndarray + An array of angles given in radians. + center : float, optional + The center of rotation (CoR). + detector_pad : bool, int + Detector width padding with edge values to remove circle/arc type artifacts in the reconstruction. Set to True to perform + an automated padding or specify a certain value as an integer. + recon_size : int, optional + The [recon_size, recon_size] shape of the reconstructed slice in pixels. + By default (None), the reconstructed size will be the dimension of the horizontal detector. + recon_mask_radius: float + The radius of the circular mask that applies to the reconstructed slice in order to crop + out some undesirable artifacts. The values outside the given diameter will be set to zero. + To implement the cropping one can use the range [0.7-1.0] or set to 2.0 when no cropping required. + iterations : int + The number of ADMM algorithm iterations. The recommended range is between 3 to 5 with initialisation and + more than 10 without. Assuming that the subsets_number is reasonably large (>12). + subsets_number: int + The number of the ordered subsets to accelerate convergence. The recommended range is between 12 to 24. + initialisation: str, optional + Initialise ADMM with the reconstructed image to reduce the number of iterations and accelerate. Choose between 'CGLS' when data + is noisy and undersampled, 'FBP' when data is of better quality (default) or None. + ADMM_rho_const: float + Convergence related parameter for ADMM, higher values lead to slower convergence, but too small values can destabilise the iterations. + Recommended range is between 0.9 and 2.0. + ADMM_relax_par: Relaxation parameter which can lead to acceleration of the algorithm, keep it in the range between 1.5 and 1.8 to avoid divergence. regularisation_type: str + A method to use for regularisation. Currently PD_TV and ROF_TV are available. + regularisation_parameter: float + The main regularisation parameter to control the amount of smoothing/noise removal. Larger values lead to stronger smoothing. + regularisation_iterations: int + The number of iterations for regularisers (aka INNER iterations). + regularisation_half_precision: bool + Perform faster regularisation computation in half-precision with a very minimal sacrifice in quality. + nonnegativity : bool + Impose nonnegativity constraint (set to True) on the reconstructed image. Default False. + gpu_id : int + A GPU device index to perform operation on. + + Returns + ------- + cp.ndarray + The ADMM reconstructed volume as a CuPy array. + """ + if initialisation not in ["FBP", "CGLS", None]: + raise ValueError( + "The acceptable values for initialisation are 'FBP','CGLS' and None" + ) + + if initialisation is not None: + if detector_pad == True: + detector_pad = __estimate_detectorHoriz_padding(data.shape[2]) + + if detector_pad > 0: + # if detector_pad is not zero we need to reconstruct the image on the recon+2*detector_pad size + recon_size = data.shape[2] + 2 * detector_pad + + if initialisation == "FBP": + initialisation_vol = cp.require( + cp.swapaxes( + FBP3d_tomobar( + data, + angles=angles, + center=center, + detector_pad=detector_pad, + recon_size=recon_size, + recon_mask_radius=recon_mask_radius, + ), + 0, + 1, + ), + requirements="C", + ) + elif initialisation == "CGLS": + initialisation_vol = cp.require( + cp.swapaxes( + CGLS3d_tomobar( + data, + angles=angles, + center=center, + detector_pad=detector_pad, + recon_size=recon_size, + recon_mask_radius=recon_mask_radius, + iterations=15, + ), + 0, + 1, + ), + requirements="C", + ) + else: + initialisation_vol = None + + RecToolsCP = _instantiate_iterative_recon_class( + data, angles, center, detector_pad, recon_size, gpu_id, datafidelity="LS" + ) + + _data_ = { + "projection_norm_data": data, + "OS_number": subsets_number, + "data_axes_labels_order": input_data_axis_labels, + } + + _algorithm_ = { + "initialise": initialisation_vol, + "iterations": iterations, + "nonnegativity": nonnegativity, + "recon_mask_radius": recon_mask_radius, + "ADMM_rho_const": ADMM_rho_const, + "ADMM_relax_par": ADMM_relax_par, + } + + _regularisation_ = { + "method": regularisation_type, # Selected regularisation method + "regul_param": regularisation_parameter, # Regularisation parameter + "iterations": regularisation_iterations, # The number of regularisation iterations + "half_precision": regularisation_half_precision, # enabling half-precision calculation + } + + reconstruction = RecToolsCP.ADMM(_data_, _algorithm_, _regularisation_) + cp._default_memory_pool.free_all_blocks() + return cp.require(cp.swapaxes(reconstruction, 0, 1), requirements="C") + + +## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## +def _instantiate_direct_recon_class( + data: cp.ndarray | Tuple[int, int, int], + angles: np.ndarray, + center: Optional[float] = None, + detector_pad: Union[bool, int] = False, + recon_size: Optional[int] = None, + gpu_id: int = 0, + projector: Literal["fourier", "astra"] = "astra", ) -> Type: """instantiate ToMoBAR's direct recon class @@ -660,23 +823,27 @@ def _instantiate_direct_recon_class( Returns: Type[RecToolsDIRCuPy]: an instance of the direct recon class """ + + data_shape = data if isinstance(data, tuple) else data.shape + if center is None: - center = data.shape[2] // 2 # making a crude guess + center = data_shape[2] // 2 # making a crude guess if recon_size is None: - recon_size = data.shape[2] + recon_size = data_shape[2] if detector_pad is True: - detector_pad = __estimate_detectorHoriz_padding(data.shape[2]) + detector_pad = __estimate_detectorHoriz_padding(data_shape[2]) elif detector_pad is False: detector_pad = 0 RecToolsCP = RecToolsDIRCuPy( - DetectorsDimH=data.shape[2], # Horizontal detector dimension + DetectorsDimH=data_shape[2], # Horizontal detector dimension DetectorsDimH_pad=detector_pad, # padding for horizontal detector - DetectorsDimV=data.shape[1], # Vertical detector dimension (3D case) - CenterRotOffset=data.shape[2] / 2 + DetectorsDimV=data_shape[1], # Vertical detector dimension (3D case) + CenterRotOffset=data_shape[2] / 2 - center - 0.5, # Center of Rotation scalar or a vector AnglesVec=-angles, # A vector of projection angles in radians ObjSize=recon_size, # Reconstructed object dimensions (scalar) + projector=projector, device_projector=gpu_id, ) return RecToolsCP