diff --git a/.gitignore b/.gitignore index 35f0ac13..906de9e4 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,9 @@ __pycache__/ *noc-model-stac/ *noc-stac/ +# Dask +*dask-scratch-space/ + # SLURM output files slurm-*.out diff --git a/OceanDataStore/data/ERA5/create_ERA5_daily_climatology.py b/OceanDataStore/data/ERA5/create_ERA5_daily_climatology.py new file mode 100755 index 00000000..5d7d0417 --- /dev/null +++ b/OceanDataStore/data/ERA5/create_ERA5_daily_climatology.py @@ -0,0 +1,110 @@ +# ========================================================= +# create_ERA5_daily_climatology.py +# +# Script to calculate daily mean, minimum, maximum, and +# variance for ERA5 sea surface temperature data. +# +# Created By: Adam Blaker (atb299@noc.ac.uk) +# ========================================================= +import numpy as np +import xarray as xr +import glob +import argparse +import re +from dask.distributed import Client + +def extract_year(filename): + """Extract year from filename like sst_y2011m07.nc""" + match = re.search(r"y(\d{4})m\d{2}", filename) + return int(match.group(1)) if match else None + +def preprocess(ds): + if "valid_time" in ds.dims: + ds = ds.rename({"valid_time": "time"}) + return ds + +def main(start_year, end_year, data_path="./", output="sst_climatology.nc"): + + client = Client(n_workers=16, threads_per_worker=1) + print(client, flush=True) + + # Find all SST files + files = sorted(glob.glob(f"{data_path}/sst_y????m??_daily.nc")) + + # print("Files: ", files) + + # Filter files by year + selected_files = [ + f for f in files + if extract_year(f) is not None and start_year <= extract_year(f) <= end_year + ] + + if not selected_files: + raise ValueError("No files found in the specified year range.") + + print(f"Using {len(selected_files)} files from {start_year} to {end_year}") + + # Open multiple files + ds = xr.open_mfdataset(selected_files, preprocess=preprocess, combine="by_coords", parallel=True, chunks={"time": 31, "latitude": 721, "longitude": 360}) + # print("New chunks:", ds["sst"].chunks, flush=True) + + ds = ds.chunk({ + "time": -1, + "latitude": 100, + "longitude": 100 + }) + + # Compute daily climatology (day of year) + g_sst = ds["sst"].groupby("time.dayofyear") # Group once for readability + + mean = g_sst.mean("time") + mean = mean.persist() + + var = g_sst.var("time") + var = var.persist() + + p10 = g_sst.quantile(0.10, dim="time") + p10 = p10.persist() + + p90 = g_sst.quantile(0.90, dim="time") + p90 = p90.persist() + + minimum = g_sst.min("time") + minimum = minimum.persist() + + maximum = g_sst.max("time") + maximum = maximum.persist() + + # Build output dataset + clim = xr.Dataset() + + clim["sst_mean"] = mean + clim["sst_variance"] = var + clim["sst_p10"] = p10.astype(np.float32) + clim["sst_p90"] = p90.astype(np.float32) + clim["sst_minimum"] = minimum + clim["sst_maximum"] = maximum + + + clim = clim.chunk({ + "dayofyear": 30, + "latitude": 721, + "longitude": 1440 + }) + + # Save output + clim.to_netcdf(output) + + print(f"Climatology saved to {output}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Compute SST daily climatology") + parser.add_argument("start_year", type=int, help="Start year (e.g. 2000)") + parser.add_argument("end_year", type=int, help="End year (e.g. 2010)") + parser.add_argument("--data_path", default=".", help="Directory containing SST files") + parser.add_argument("--output", default="sst_climatology.nc", help="Output file") + + args = parser.parse_args() + + main(args.start_year, args.end_year, args.data_path, args.output) diff --git a/OceanDataStore/data/ERA5/create_ERA5_daily_mean.py b/OceanDataStore/data/ERA5/create_ERA5_daily_mean.py new file mode 100755 index 00000000..a88079d6 --- /dev/null +++ b/OceanDataStore/data/ERA5/create_ERA5_daily_mean.py @@ -0,0 +1,69 @@ +# ========================================================= +# create_ERA5_daily_mean.py +# +# Script to calculate daily mean, minimum, maximum, and +# variance for ERA5 sea surface temperature data. +# +# Created By: Adam Blaker (atb299@noc.ac.uk) +# ========================================================= +import logging +import xarray as xr +import numpy as np + +from OceanDataStore.cli import initialise_logging + +logger = logging.getLogger(__name__) + + +def main(filepath: str, outpath: str) -> None: + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Calculate Daily Mean, Min, Max, and Variance ========== # + logging.info(f"In Progress: Calculating ERA5 SST daily mean, min, max and variance for {year}-{month:02d}...") + ds = xr.open_dataset(filepath, chunks={"time": -1, "latitude": -1, "longitude": -1}) + logging.info(f"Completed: Read ERA5 Hourly SST data from {filepath}.") + + # Experimental: see https://confluence.ecmwf.int/pages/viewpage.action?pageId=173385064 + if 'expver' in [i for i in ds.dims]: + print(f"Dimension 'expver' present in {filepath}") + ds = ds.reduce(np.nansum,dim='expver') + + # Catch and rename the time dimension for consistency + if "valid_time" in ds.dims: + ds = ds.rename({"valid_time": "time"}) + + ds2 = ds.resample(time='1D').mean() + + for var in ds.data_vars: + ds2[var+'_min'] = ds[var].resample(time='1D').min() + ds2[var+'_max'] = ds[var].resample(time='1D').max() + ds2[var+'_var'] = ds[var].resample(time='1D').var() + + vv = [i for i in ds2.data_vars] + z_chunks={vv[0]: {'chunksizes': (1, 24, 24), "zlib": True, "complevel": 1}, + vv[1]: {'chunksizes': (1, 24, 24), "zlib": True, "complevel": 1}, + vv[2]: {'chunksizes': (1, 24, 24), "zlib": True, "complevel": 1}, + vv[3]: {'chunksizes': (1, 24, 24), "zlib": True, "complevel": 1} + } + + logging.info(f"In Progress: Writing ERA5 Daily SST data to {outpath}...") + ds2.to_netcdf(outpath, encoding=z_chunks) + logging.info(f"Completed: ERA5 Daily SST data saved to {outpath}.") + + +if __name__ == "__main__": + # ====== Inputs ====== # + # Define year and month: + year = 2026 + month = 6 + + # Define ERA5[T] source - [original, original_latest]: + source = "original_latest" + + # Define path to hourly ERA5 SST data and output path for daily mean, min, max, and variance: + filepath = f"/dssgfs01/scratch/npd/forcing/ERA5/{source}/{year}/sea_surface_temperature/sea_surface_temperature_{year}-{month:02d}.nc" + outpath = f"/dssgfs01/scratch/otooth/npd_data/observations/ERA5/daily/sst_y{year}m{month:02d}_daily.nc" + + # ====== Calculate ERA5 Daily Mean ====== # + main(filepath, outpath) \ No newline at end of file diff --git a/OceanDataStore/data/ERA5/create_ERA5_monthly_mean.py b/OceanDataStore/data/ERA5/create_ERA5_monthly_mean.py new file mode 100755 index 00000000..9edc5807 --- /dev/null +++ b/OceanDataStore/data/ERA5/create_ERA5_monthly_mean.py @@ -0,0 +1,74 @@ +# ========================================================= +# create_ERA5_monthly_mean.py +# +# Script to calculate monthly mean, minimum, maximum, and +# variance for ERA5 sea surface temperature data. +# +# Created By: Adam Blaker (atb299@noc.ac.uk) +# ========================================================= +import logging +import xarray as xr +import numpy as np + +from OceanDataStore.cli import initialise_logging + +logger = logging.getLogger(__name__) + + +def main(filepath: str, outpath: str, var_out: str) -> None: + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Calculate Monthly Mean, Min, Max, and Variance ========== # + logging.info(f"In Progress: Calculating ERA5 {var_out} monthly mean, min, max and variance for {year}-{month:02d}...") + ds = xr.open_dataset(filepath, chunks={"time": -1, "latitude": -1, "longitude": -1}) + logging.info(f"Completed: Read ERA5 Hourly {var_out} data from {filepath}.") + + # Experimental: see https://confluence.ecmwf.int/pages/viewpage.action?pageId=173385064 + if 'expver' in [i for i in ds.dims]: + print(f"Dimension 'expver' present in {filepath}") + ds = ds.reduce(np.nansum,dim='expver') + + # Catch and rename the time dimension for consistency + if "valid_time" in ds.dims: + ds = ds.rename({"valid_time": "time"}) + + ds2 = ds.resample(time='1ME').mean() + + for var in ds.data_vars: + ds2[var+'_min'] = ds[var].resample(time='1ME').min() + ds2[var+'_max'] = ds[var].resample(time='1ME').max() + ds2[var+'_var'] = ds[var].resample(time='1ME').var() + + vv = [i for i in ds2.data_vars] + z_chunks={vv[0]: {'chunksizes': (1, 24, 24), "zlib": True, "complevel": 1}, + vv[1]: {'chunksizes': (1, 24, 24), "zlib": True, "complevel": 1}, + vv[2]: {'chunksizes': (1, 24, 24), "zlib": True, "complevel": 1}, + vv[3]: {'chunksizes': (1, 24, 24), "zlib": True, "complevel": 1} + } + + logging.info(f"In Progress: Writing ERA5 Monthly {var_out} data to {outpath}...") + ds2.to_netcdf(outpath, encoding=z_chunks) + logging.info(f"Completed: ERA5 Monthly {var_out} data saved to {outpath}.") + + +if __name__ == "__main__": + # ====== Inputs ====== # + # Define year and month: + year = 2026 + month = 5 + + # Define ERA5 variable: + var_in = "sea_ice_cover" + var_out = "siconc" + + # Define ERA5[T] source - [original, original_latest]: + # source = "original" + source = "original_latest" + + # Define path to hourly ERA5 SST data and output path for monthly mean, min, max, and variance: + filepath = f"/dssgfs01/scratch/npd/forcing/ERA5/{source}/{year}/{var_in}/{var_in}_{year}-{month:02d}.nc" + outpath = f"/dssgfs01/scratch/otooth/npd_data/observations/ERA5/monthly/{var_out}_y{year}m{month:02d}_monthly.nc" + + # ====== Calculate ERA5 Monthly Mean ====== # + main(filepath, outpath, var_out) \ No newline at end of file diff --git a/OceanDataStore/data/ERA5/run_create_ERA5_daily_climatology.slurm b/OceanDataStore/data/ERA5/run_create_ERA5_daily_climatology.slurm new file mode 100755 index 00000000..9ea25d33 --- /dev/null +++ b/OceanDataStore/data/ERA5/run_create_ERA5_daily_climatology.slurm @@ -0,0 +1,54 @@ +#!/bin/bash +#SBATCH --job-name=era5_daily_climatology +#SBATCH --time=12:00:00 +#SBATCH --partition=compute +#SBATCH --nodes=1 +#SBATCH --mem=0 +#SBATCH --exclusive +##SBATCH --mem-per-cpu=4G +##SBATCH --ntasks-per-node=64 +##SBATCH --ntasks-per-socket=32 +##SBATCH --ntasks-per-core=1 + +module load NEMO/prg-env +#============================ +export I_MPI_SHM=icx + +source /dssgfs01/working/atb299/miniforge3/bin/activate +conda activate Sci + +# ============================================================== +# run_create_ERA5_daily_climatology.slurm +# +# Description: SLURM script to create the ERA5 daily +# climatology datasets. +# +# Created By: Adam Blaker (atb299@noc.ac.uk) +# Created On: 2026-06-25 +# +# ============================================================== + +#============================ +TIME1=`date +%s` + +dpath="/dssgfs01/scratch/atb299/ERA5_daily/ERA5_daily_fields/" + +Ystart=1996 +Yend=$((Ystart+29)) + +ofile="/dssgfs01/scratch/otooth/npd_data/observations/ERA5/climatology/ERA5_sst_climatology_${Ystart}-${Yend}.nc" + +echo "Start year : " $Ystart +echo "End year : " $Yend +echo "Output file : " $ofile + +python create_ERA5_daily_climatology.py $Ystart $Yend --data_path $dpath --output $ofile + +#============================ +# Job timing + +TIME2=`date +%s` +DIFFSEC=`expr ${TIME2} - ${TIME1}` +echo Took ${DIFFSEC} seconds. +echo Took `date +%H:%M:%S -ud @${DIFFSEC}` + diff --git a/OceanDataStore/data/ERA5/run_send_ERA5_daily_climatology_to_os.slurm b/OceanDataStore/data/ERA5/run_send_ERA5_daily_climatology_to_os.slurm new file mode 100755 index 00000000..91e6d9ad --- /dev/null +++ b/OceanDataStore/data/ERA5/run_send_ERA5_daily_climatology_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=era5_daily_climatology +#SBATCH --partition=compute +#SBATCH --time=02:00:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_send_ERA5_daily_climatology_to_os.slurm +# +# Description: SLURM script to send the ERA5 daily +# climatology datasets to Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-25 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source /dssgfs01/working/otooth/miniforge3/bin/activate +conda activate env_ods + +# -- Send ERA5 daily climatology datasets to JASMIN OS -- # +echo "In Progress: Sending ERA5 daily climatology to Icechunk..." + +python3 send_ERA5_daily_climatology_to_os.py + +echo "Completed: Sent ERA5 daily climatology to Icechunk." diff --git a/OceanDataStore/data/ERA5/run_send_ERA5_daily_to_os.slurm b/OceanDataStore/data/ERA5/run_send_ERA5_daily_to_os.slurm new file mode 100755 index 00000000..d648a88c --- /dev/null +++ b/OceanDataStore/data/ERA5/run_send_ERA5_daily_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=send_era5_daily +#SBATCH --partition=compute +#SBATCH --time=03:00:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_send_ERA5_daily_to_os.slurm +# +# Description: SLURM script to send the ERA5 daily +# time-series dataset to Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-25 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source /dssgfs01/working/otooth/miniforge3/bin/activate +conda activate env_ods + +# -- Send ERA5 daily time-series datasets to JASMIN OS -- # +echo "In Progress: Sending ERA5 daily time-series to Icechunk..." + +python3 send_ERA5_daily_to_os.py + +echo "Completed: Sent ERA5 daily time-series to Icechunk." diff --git a/OceanDataStore/data/ERA5/run_send_ERA5_monthly_to_os.slurm b/OceanDataStore/data/ERA5/run_send_ERA5_monthly_to_os.slurm new file mode 100755 index 00000000..d8926751 --- /dev/null +++ b/OceanDataStore/data/ERA5/run_send_ERA5_monthly_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=send_era5_monthly_to_os +#SBATCH --partition=compute +#SBATCH --time=03:00:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_send_ERA5_monthly_to_os.slurm +# +# Description: SLURM script to send the ERA5 monthly +# time-series dataset to Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-25 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source /dssgfs01/working/otooth/miniforge3/bin/activate +conda activate env_ods + +# -- Send ERA5 monthly time-series datasets to JASMIN OS -- # +echo "In Progress: Sending ERA5 monthly time-series to Icechunk..." + +python3 send_ERA5_monthly_to_os.py + +echo "Completed: Sent ERA5 monthly time-series to Icechunk." diff --git a/OceanDataStore/data/ERA5/run_update_ERA5_daily_to_os.slurm b/OceanDataStore/data/ERA5/run_update_ERA5_daily_to_os.slurm new file mode 100755 index 00000000..5617bba9 --- /dev/null +++ b/OceanDataStore/data/ERA5/run_update_ERA5_daily_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=update_era5_daily +#SBATCH --partition=compute +#SBATCH --time=03:00:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_update_ERA5_daily_to_os.slurm +# +# Description: SLURM script to update the ERA5 daily +# time-series dataset in the Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-25 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source /dssgfs01/working/otooth/miniforge3/bin/activate +conda activate env_ods + +# -- Update ERA5 daily time-series datasets in JASMIN OS -- # +echo "In Progress: Updating ERA5 daily time-series in Icechunk..." + +python3 update_ERA5_daily_to_os.py + +echo "Completed: Updated ERA5 daily time-series in Icechunk." diff --git a/OceanDataStore/data/ERA5/run_update_ERA5_monthly_to_os.slurm b/OceanDataStore/data/ERA5/run_update_ERA5_monthly_to_os.slurm new file mode 100755 index 00000000..edebbc18 --- /dev/null +++ b/OceanDataStore/data/ERA5/run_update_ERA5_monthly_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=update_era5_monthly_to_os +#SBATCH --partition=compute +#SBATCH --time=03:00:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_update_ERA5_monthly_to_os.slurm +# +# Description: SLURM script to update the ERA5 monthly +# time-series dataset in the Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-25 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source /dssgfs01/working/otooth/miniforge3/bin/activate +conda activate env_ods + +# -- Update ERA5 monthly time-series datasets in JASMIN OS -- # +echo "In Progress: Updating ERA5 monthly time-series in Icechunk..." + +python3 update_ERA5_monthly_to_os.py + +echo "Completed: Updated ERA5 monthly time-series in Icechunk." diff --git a/OceanDataStore/data/ERA5/send_ERA5_daily_climatology_to_os.py b/OceanDataStore/data/ERA5/send_ERA5_daily_climatology_to_os.py new file mode 100755 index 00000000..410f6fca --- /dev/null +++ b/OceanDataStore/data/ERA5/send_ERA5_daily_climatology_to_os.py @@ -0,0 +1,159 @@ +# ========================================================= +# send_ERA5_daily_climatology_to_os.py +# +# Script to write ERA5 long-term daily climatologies +# to Icechunk repositories in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import numpy as np +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, send_to_icechunk +from OceanDataStore.data.utils import ( + compute_land_sea_mask, + compute_cell_area, + compute_dx, + compute_dy, +) + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Send to Icechunk Repository ========== # + bucket = "era5" + exists = False + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + variable_commits = True + + # Define climatology period: + start_yr = 1996 + end_yr = 2025 + + logging.info(f"In Progress: Sending ERA5 daily climatology for {start_yr}-{end_yr} to Icechunk...") + # Open ERA5 dataset: + filepath = f"/dssgfs01/scratch/otooth/npd_data/observations/ERA5/climatology/ERA5_sst_climatology_{start_yr}-{end_yr}.nc" + ds = xr.open_dataset(filepath) + + # Standardise coordinate dimension names: + ds = ds.rename({"dayofyear": "day"}) + + # Update longitude coordinates to be in the range [-180, 180]: + ds = ds.assign_coords( + longitude=((ds["longitude"] + 180) % 360) - 180 + ) + ds = ds.sortby("longitude") + + # Add day of year coordinate (1-366): + ds = ds.assign_coords( + day=np.arange(1, 367) + ) + + # Update variable names, units, and attributes: + ds = ds.drop_vars(["quantile"]) + for var in ds.data_vars: + if "sst" in var: + # Transform units degK -> degC: + ds[var] = ds[var] - 273.15 + # Add standard names and units: + ds[var].attrs["standard_name"] = "sea_surface_temperature" + ds[var].attrs["units"] = "degC" + # Rename variables to standard names: + ds = ds.rename({var: var.replace("sst", "tos")}) + + # Update variable long names: + ds["tos_mean"].attrs["long_name"] = "Daily Mean Sea Surface Temperature Climatology" + ds["tos_variance"].attrs["long_name"] = "Daily Variance Sea Surface Temperature Climatology" + ds["tos_p10"].attrs["long_name"] = "Daily 10th Percentile Sea Surface Temperature Climatology" + ds["tos_p90"].attrs["long_name"] = "Daily 90th Percentile Sea Surface Temperature Climatology" + ds["tos_minimum"].attrs["long_name"] = "Daily Minimum Sea Surface Temperature Climatology" + ds["tos_maximum"].attrs["long_name"] = "Daily Maximum Sea Surface Temperature Climatology" + + # Add ancillary variables: + ds['mask'] = compute_land_sea_mask(ds['tos_mean'].isel(day=0)) + ds['dx'] = compute_dx(ds) + ds['dy'] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + + # Update time bounds to reflect climatological period: + ds['time_bnds'] = xr.DataArray( + np.zeros((ds['day'].size, 2), dtype='datetime64[ns]'), + dims=('day', 'bnds'), + coords={'day': ds['day']}, + ) + ds['time_bnds'].data[:, 0] = (np.datetime64(f'{start_yr}-01-01', 'D') + (np.timedelta64(1, 'D') * np.arange(ds['day'].size))).astype('datetime64[ns]') + ds['time_bnds'].data[:, 1] = (np.datetime64(f'{end_yr}-01-01', 'D') + (np.timedelta64(1, 'D') * np.arange(ds['day'].size))).astype('datetime64[ns]') + + # Update global attributes: + ds.attrs.clear() + ds = ds.assign_attrs({ + "Conventions": "CF-1.7", + "title": f"ERA-5 Daily Climatology ({start_yr}-{end_yr})", + "description": f"ERA-5 Sea Surface Temperature Daily Climatology ({start_yr}-{end_yr}).", + "source": "Numerical models: IFS Cy41r2 and 4D-Var data assimilation with prescribed sea surface temperature and sea ice concentration. Satellite observations: HadISST2.1.1.0, OSTIA, OSI SAF.", + "dataset_type": "reanalysis", + "product_type": "climatology", + "product_version": "1.0", + "institution": "European Centre for Medium-Range Weather Forecasts (ECMWF)", + "citation": "Copernicus Climate Change Service, Climate Data Store, (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). DOI: 10.24381/cds.adbb2d47 (Accessed on 20-05-2026).", + "references": "Hersbach, H., Bell, B., Berrisford, P., Biavati, G., Horányi, A., Muñoz Sabater, J., Nicolas, J., Peubey, C., Radu, R., Rozum, I., Schepers, D., Simmons, A., Soci, C., Dee, D., Thépaut, J-N. (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS), DOI: 10.24381/cds.adbb2d47.", + "acknowledgement": "Generated using or contains modified Copernicus Climate Change Service information . Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains.", + "license": "ERA5 data were obtained from https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels and are provided under a Creative Commons CC-BY-4.0 License https://creativecommons.org/licenses/by/4.0/", + "doi": "10.24381/cds.adbb2d47", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "31 km", + "aggregation": "mean", + "aggregation_frequency": "daily", + "status": "completed", + "update_frequency": "None", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + # Optimise chunk sizes for spatial analysis: + ds = ds.chunk({'day': 5, 'latitude': 721, 'longitude': 1440}) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding['compressors'] = [blosccodec] + + # Define prefix and commit message based on climatology period: + prefix = f"era5_{start_yr}_{end_yr}_daily_climatology" + commit_message = f"Added ERA5 SST Daily Climatology ({start_yr}-{end_yr})." + + # Dask LocalCluster configuration: + config_kwargs = { + "temporary_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/", + "local_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/" + } + cluster_kwargs = { + "n_workers" : 20, + "threads_per_worker" : 1, + "memory_limit":"2GB" + } + + send_to_icechunk( + file=ds, + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + exists=exists, + append_dim='day', + branch=branch, + commit_message=commit_message, + variable_commits=variable_commits, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/ERA5/send_ERA5_daily_to_os.py b/OceanDataStore/data/ERA5/send_ERA5_daily_to_os.py new file mode 100755 index 00000000..88e0d313 --- /dev/null +++ b/OceanDataStore/data/ERA5/send_ERA5_daily_to_os.py @@ -0,0 +1,141 @@ +# ========================================================= +# send_ERA5_daily_to_os.py +# +# Script to write ERA5 daily data to Icechunk repositories +# in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, send_to_icechunk +from OceanDataStore.data.utils import ( + compute_land_sea_mask, + compute_cell_area, + compute_dx, + compute_dy, +) + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Send to Icechunk Repository ========== # + bucket = "era5" + exists = False + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + variable_commits = True + + logging.info("In Progress: Sending ERA5 daily data to Icechunk...") + # Open ERA5 dataset: + filepath = "/dssgfs01/scratch/atb299/ERA5_daily/ERA5_daily_fields/sst_y198?m??_daily.nc" + ds = xr.open_mfdataset(filepath, + combine="by_coords", + data_vars="all", + engine="h5netcdf", + chunks={"time": -1, "latitude": -1, "longitude": -1} + ) + + # Update longitude coordinates to be in the range [-180, 180]: + ds = ds.assign_coords( + longitude=((ds["longitude"] + 180) % 360) - 180 + ) + ds = ds.sortby("longitude") + + # Update variable names, units, and attributes: + if "number" in ds.data_vars: + ds = ds.drop_vars(["number"]) + for var in ds.data_vars: + if "sst" in var: + # Transform units degK -> degC: + ds[var] = ds[var] - 273.15 + # Add standard names and units: + ds[var].attrs["standard_name"] = "sea_surface_temperature" + ds[var].attrs["units"] = "degC" + # Rename variables to standard names: + ds = ds.rename({var: var.replace("sst", "tos")}) + + # Update variable long names: + ds["tos"].attrs["long_name"] = "Daily Mean Sea Surface Temperature" + ds["tos_var"].attrs["long_name"] = "Daily Variance Sea Surface Temperature" + ds["tos_min"].attrs["long_name"] = "Daily Minimum Sea Surface Temperature" + ds["tos_max"].attrs["long_name"] = "Daily Maximum Sea Surface Temperature" + + # Add ancillary variables: + ds['mask'] = compute_land_sea_mask(ds['tos'].isel(time=0)) + ds['dx'] = compute_dx(ds) + ds['dy'] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + + # Update global attributes: + ds.attrs.clear() + ds = ds.assign_attrs({ + "Conventions": "CF-1.7", + "title": "ERA5 Sea Surface Daily Timeseries", + "description": "ERA5 daily sea surface temperature timeseries.", + "source": "Numerical models: IFS Cy41r2 and 4D-Var data assimilation with prescribed sea surface temperature and sea ice concentration. Satellite observations: HadISST2.1.1.0, OSTIA, OSI SAF.", + "dataset_type": "reanalysis", + "product_type": "timeseries", + "product_version": "1.0", + "institution": "European Centre for Medium-Range Weather Forecasts (ECMWF)", + "citation": "Copernicus Climate Change Service, Climate Data Store, (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). DOI: 10.24381/cds.adbb2d47 (Accessed on 20-05-2026).", + "references": "Hersbach, H., Bell, B., Berrisford, P., Biavati, G., Horányi, A., Muñoz Sabater, J., Nicolas, J., Peubey, C., Radu, R., Rozum, I., Schepers, D., Simmons, A., Soci, C., Dee, D., Thépaut, J-N. (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS), DOI: 10.24381/cds.adbb2d47.", + "acknowledgement": "Generated using or contains modified Copernicus Climate Change Service information . Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains.", + "license": "ERA5 data were obtained from https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels and are provided under a Creative Commons CC-BY-4.0 License https://creativecommons.org/licenses/by/4.0/", + "doi": "10.24381/cds.adbb2d47", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "31 km", + "aggregation": "mean, variance, min, max", + "aggregation_frequency": "daily", + "status": "completed", + "update_frequency": "None", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + # Optimise chunk sizes for time-series analysis: + ds = ds.chunk({'time': ds['time'].size, 'latitude': 50, 'longitude': 50}) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding['compressors'] = [blosccodec] + + # Define prefix and commit message based on climatology period: + prefix = "era5_daily_timeseries" + commit_message = "Added ERA5 Sea Surface Daily Timeseries (1980-01-1989-12)." + + # Dask LocalCluster configuration: + config_kwargs = { + "temporary_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/", + "local_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/" + } + cluster_kwargs = { + "n_workers" : 25, + "threads_per_worker" : 1, + "memory_limit":"4GB" + } + + send_to_icechunk( + file=ds, + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + exists=exists, + append_dim='time', + branch=branch, + commit_message=commit_message, + variable_commits=variable_commits, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/ERA5/send_ERA5_monthly_to_os.py b/OceanDataStore/data/ERA5/send_ERA5_monthly_to_os.py new file mode 100644 index 00000000..31bc8b80 --- /dev/null +++ b/OceanDataStore/data/ERA5/send_ERA5_monthly_to_os.py @@ -0,0 +1,173 @@ +# ========================================================= +# send_ERA5_monthly_to_os.py +# +# Script to write ERA5 monthly data to Icechunk repositories +# in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, send_to_icechunk +from OceanDataStore.data.utils import ( + compute_land_sea_mask, + compute_cell_area, + compute_dx, + compute_dy, +) + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Send to Icechunk Repository ========== # + bucket = "era5" + exists = False + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + variable_commits = True + + logging.info("In Progress: Sending ERA5 monthly data to Icechunk...") + # Open ERA5 dataset: + filepath = "/dssgfs01/scratch/otooth/npd_data/observations/ERA5/monthly/sst_y198?m??_monthly.nc" + ds_sst = xr.open_mfdataset(filepath, + combine="by_coords", + data_vars="all", + engine="h5netcdf", + chunks={"time": -1, "latitude": -1, "longitude": -1} + ) + + filepath = "/dssgfs01/scratch/otooth/npd_data/observations/ERA5/monthly/siconc_y198?m??_monthly.nc" + ds_si = xr.open_mfdataset(filepath, + combine="by_coords", + data_vars="all", + engine="h5netcdf", + chunks={"time": -1, "latitude": -1, "longitude": -1} + ) + + # SST: Update longitude coordinates to be in the range [-180, 180]: + ds_sst = ds_sst.assign_coords( + longitude=((ds_sst["longitude"] + 180) % 360) - 180 + ) + ds_sst = ds_sst.sortby("longitude") + + # SIC: Update longitude coordinates to be in the range [-180, 180]: + ds_si = ds_si.assign_coords( + longitude=((ds_si["longitude"] + 180) % 360) - 180 + ) + ds_si = ds_si.sortby("longitude") + + # SST: Update variable names, units, and attributes: + if "number" in ds_sst.data_vars: + ds_sst = ds_sst.drop_vars(["number"]) + for var in ds_sst.data_vars: + if "sst" in var: + # Transform units degK -> degC: + ds_sst[var] = ds_sst[var] - 273.15 + # Add standard names and units: + ds_sst[var].attrs["standard_name"] = "sea_surface_temperature" + ds_sst[var].attrs["units"] = "degC" + # Rename variables to standard names: + ds_sst = ds_sst.rename({var: var.replace("sst", "tos")}) + + # SIC: Update variable names, units, and attributes: + if "number" in ds_si.data_vars: + ds_si = ds_si.drop_vars(["number"]) + for var in ds_si.data_vars: + if "siconc" in var: + # Add standard names and units: + ds_si[var].attrs["standard_name"] = "sea_ice_area_fraction" + ds_si[var].attrs["units"] = "1" + + # SST: Update variable long names: + ds_sst["tos"].attrs["long_name"] = "Daily Mean Sea Surface Temperature" + ds_sst["tos_var"].attrs["long_name"] = "Daily Variance Sea Surface Temperature" + ds_sst["tos_min"].attrs["long_name"] = "Daily Minimum Sea Surface Temperature" + ds_sst["tos_max"].attrs["long_name"] = "Daily Maximum Sea Surface Temperature" + + # SIC: Update variable long names: + ds_si["siconc"].attrs["long_name"] = "Daily Mean Sea Ice Area Fraction" + ds_si["siconc_var"].attrs["long_name"] = "Daily Variance Sea Ice Area Fraction" + ds_si["siconc_min"].attrs["long_name"] = "Daily Minimum Sea Ice Area Fraction" + ds_si["siconc_max"].attrs["long_name"] = "Daily Maximum Sea Ice Area Fraction" + + # Merge SST and SIC datasets: + ds = xr.merge([ds_sst, ds_si], compat="override", join="override") + + # Add ancillary variables: + ds['mask'] = compute_land_sea_mask(ds['tos'].isel(time=0)) + ds['dx'] = compute_dx(ds) + ds['dy'] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + + # Update global attributes: + ds.attrs.clear() + ds = ds.assign_attrs({ + "Conventions": "CF-1.7", + "title": "ERA5 Sea Surface Monthly Timeseries", + "description": "ERA5 monthly sea surface temperature and sea ice area fraction timeseries.", + "source": "Numerical models: IFS Cy41r2 and 4D-Var data assimilation with prescribed sea surface temperature and sea ice concentration. Satellite observations: HadISST2.1.1.0, OSTIA, OSI SAF.", + "dataset_type": "reanalysis", + "product_type": "timeseries", + "product_version": "1.0", + "institution": "European Centre for Medium-Range Weather Forecasts (ECMWF)", + "citation": "Copernicus Climate Change Service, Climate Data Store, (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). DOI: 10.24381/cds.adbb2d47 (Accessed on 20-05-2026).", + "references": "Hersbach, H., Bell, B., Berrisford, P., Biavati, G., Horányi, A., Muñoz Sabater, J., Nicolas, J., Peubey, C., Radu, R., Rozum, I., Schepers, D., Simmons, A., Soci, C., Dee, D., Thépaut, J-N. (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS), DOI: 10.24381/cds.adbb2d47.", + "acknowledgement": "Generated using or contains modified Copernicus Climate Change Service information . Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains.", + "license": "ERA5 data were obtained from https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels and are provided under a Creative Commons CC-BY-4.0 License https://creativecommons.org/licenses/by/4.0/", + "doi": "10.24381/cds.adbb2d47", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "31 km", + "aggregation": "mean, variance, min, max", + "aggregation_frequency": "monthly", + "status": "completed", + "update_frequency": "None", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + # Optimise chunk sizes for spatial analysis: + ds = ds.chunk({'time': 1, 'latitude': 721, 'longitude': 1440}) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding['compressors'] = [blosccodec] + + # Define prefix and commit message based on climatology period: + prefix = "era5_monthly_timeseries" + commit_message = "Added ERA5 Sea Surface Monthly Timeseries (1980-01-1989-12)." + + # Dask LocalCluster configuration: + config_kwargs = { + "temporary_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/", + "local_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/" + } + cluster_kwargs = { + "n_workers" : 25, + "threads_per_worker" : 1, + "memory_limit":"4GB" + } + + send_to_icechunk( + file=ds, + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + exists=exists, + append_dim='time', + branch=branch, + commit_message=commit_message, + variable_commits=variable_commits, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/ERA5/update_ERA5_daily_to_os.py b/OceanDataStore/data/ERA5/update_ERA5_daily_to_os.py new file mode 100755 index 00000000..42901e09 --- /dev/null +++ b/OceanDataStore/data/ERA5/update_ERA5_daily_to_os.py @@ -0,0 +1,141 @@ +# ========================================================= +# send_ERA5_daily_to_os.py +# +# Script to write ERA5 daily data to Icechunk repositories +# in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging +from pathlib import Path + +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, update_icechunk +from OceanDataStore.data.utils import ( + compute_cell_area, + compute_dx, + compute_dy, + compute_land_sea_mask, +) + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Send to Icechunk Repository ========== # + bucket = "era5" + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + + logging.info("In Progress: Sending ERA5 daily data to Icechunk...") + # Open ERA5 dataset: + filepath = [] + base = Path("/dssgfs01/scratch/otooth/npd_data/observations/ERA5/daily") + for year in range(2026, 2027): + filepath.extend(sorted(base.glob(f"sst_y{year}m??_daily.nc"))) + ds = xr.open_mfdataset(filepath, + combine="by_coords", + data_vars="all", + engine="h5netcdf", + chunks={"time": -1, "latitude": -1, "longitude": -1} + ) + + # Update longitude coordinates to be in the range [-180, 180]: + ds = ds.assign_coords( + longitude=((ds["longitude"] + 180) % 360) - 180 + ) + ds = ds.sortby("longitude") + + # Update variable names, units, and attributes: + if "number" in ds.data_vars: + ds = ds.drop_vars(["number"]) + for var in ds.data_vars: + if "sst" in var: + # Transform units degK -> degC: + ds[var] = ds[var] - 273.15 + # Add standard names and units: + ds[var].attrs["standard_name"] = "sea_surface_temperature" + ds[var].attrs["units"] = "degC" + # Rename variables to standard names: + ds = ds.rename({var: var.replace("sst", "tos")}) + + # Update variable long names: + ds["tos"].attrs["long_name"] = "Daily Mean Sea Surface Temperature" + ds["tos_var"].attrs["long_name"] = "Daily Variance Sea Surface Temperature" + ds["tos_min"].attrs["long_name"] = "Daily Minimum Sea Surface Temperature" + ds["tos_max"].attrs["long_name"] = "Daily Maximum Sea Surface Temperature" + + # Add ancillary variables: + ds['mask'] = compute_land_sea_mask(ds['tos'].isel(time=0)) + ds['dx'] = compute_dx(ds) + ds['dy'] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + + # Update global attributes: + ds.attrs.clear() + ds = ds.assign_attrs({ + "Conventions": "CF-1.7", + "title": "ERA5 Sea Surface Daily Timeseries", + "description": "ERA5 daily sea surface temperature timeseries.", + "source": "Numerical models: IFS Cy41r2 and 4D-Var data assimilation with prescribed sea surface temperature and sea ice concentration. Satellite observations: HadISST2.1.1.0, OSTIA, OSI SAF.", + "dataset_type": "reanalysis", + "product_type": "timeseries", + "product_version": "1.0", + "institution": "European Centre for Medium-Range Weather Forecasts (ECMWF)", + "citation": "Copernicus Climate Change Service, Climate Data Store, (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). DOI: 10.24381/cds.adbb2d47 (Accessed on 20-05-2026).", + "references": "Hersbach, H., Bell, B., Berrisford, P., Biavati, G., Horányi, A., Muñoz Sabater, J., Nicolas, J., Peubey, C., Radu, R., Rozum, I., Schepers, D., Simmons, A., Soci, C., Dee, D., Thépaut, J-N. (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS), DOI: 10.24381/cds.adbb2d47.", + "acknowledgement": "Generated using or contains modified Copernicus Climate Change Service information . Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains.", + "license": "ERA5 data were obtained from https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels and are provided under a Creative Commons CC-BY-4.0 License https://creativecommons.org/licenses/by/4.0/", + "doi": "10.24381/cds.adbb2d47", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "31 km", + "aggregation": "mean, variance, min, max", + "aggregation_frequency": "daily", + "status": "completed", + "update_frequency": "None", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + # Optimise chunk sizes for time-series analysis: + ds = ds.chunk({'time': ds['time'].size, 'latitude': 50, 'longitude': 50}) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding['compressors'] = [blosccodec] + + # Define prefix and commit message based on climatology period: + prefix = "era5_daily_timeseries" + commit_message = "Added ERA5 Sea Surface Daily Timeseries (2026-01-2026-06)." + + # Dask LocalCluster configuration: + config_kwargs = { + "temporary_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/", + "local_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/" + } + cluster_kwargs = { + "n_workers" : 20, + "threads_per_worker" : 1, + "memory_limit":"5GB" + } + + update_icechunk( + file=ds, + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + append_dim='time', + branch=branch, + commit_message=commit_message, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/ERA5/update_ERA5_monthly_to_os.py b/OceanDataStore/data/ERA5/update_ERA5_monthly_to_os.py new file mode 100644 index 00000000..218f69c6 --- /dev/null +++ b/OceanDataStore/data/ERA5/update_ERA5_monthly_to_os.py @@ -0,0 +1,169 @@ +# ========================================================= +# update_ERA5_monthly_to_os.py +# +# Script to update ERA5 monthly data in Icechunk repositories +# in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, update_icechunk +from OceanDataStore.data.utils import ( + compute_land_sea_mask, + compute_cell_area, + compute_dx, + compute_dy, +) + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Update Icechunk Repository ========== # + bucket = "era5" + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + + logging.info("In Progress: Updating ERA5 monthly data in Icechunk...") + # Open ERA5 dataset: + filepath = "/dssgfs01/scratch/otooth/npd_data/observations/ERA5/monthly/sst_y20??m??_monthly.nc" + ds_sst = xr.open_mfdataset(filepath, + combine="by_coords", + data_vars="all", + engine="h5netcdf", + chunks={"time": -1, "latitude": -1, "longitude": -1} + ) + + filepath = "/dssgfs01/scratch/otooth/npd_data/observations/ERA5/monthly/siconc_y20??m??_monthly.nc" + ds_si = xr.open_mfdataset(filepath, + combine="by_coords", + data_vars="all", + engine="h5netcdf", + chunks={"time": -1, "latitude": -1, "longitude": -1} + ) + + # SST: Update longitude coordinates to be in the range [-180, 180]: + ds_sst = ds_sst.assign_coords( + longitude=((ds_sst["longitude"] + 180) % 360) - 180 + ) + ds_sst = ds_sst.sortby("longitude") + + # SIC: Update longitude coordinates to be in the range [-180, 180]: + ds_si = ds_si.assign_coords( + longitude=((ds_si["longitude"] + 180) % 360) - 180 + ) + ds_si = ds_si.sortby("longitude") + + # SST: Update variable names, units, and attributes: + if "number" in ds_sst.data_vars: + ds_sst = ds_sst.drop_vars(["number"]) + for var in ds_sst.data_vars: + if "sst" in var: + # Transform units degK -> degC: + ds_sst[var] = ds_sst[var] - 273.15 + # Add standard names and units: + ds_sst[var].attrs["standard_name"] = "sea_surface_temperature" + ds_sst[var].attrs["units"] = "degC" + # Rename variables to standard names: + ds_sst = ds_sst.rename({var: var.replace("sst", "tos")}) + + # SIC: Update variable names, units, and attributes: + if "number" in ds_si.data_vars: + ds_si = ds_si.drop_vars(["number"]) + for var in ds_si.data_vars: + if "siconc" in var: + # Add standard names and units: + ds_si[var].attrs["standard_name"] = "sea_ice_area_fraction" + ds_si[var].attrs["units"] = "1" + + # SST: Update variable long names: + ds_sst["tos"].attrs["long_name"] = "Daily Mean Sea Surface Temperature" + ds_sst["tos_var"].attrs["long_name"] = "Daily Variance Sea Surface Temperature" + ds_sst["tos_min"].attrs["long_name"] = "Daily Minimum Sea Surface Temperature" + ds_sst["tos_max"].attrs["long_name"] = "Daily Maximum Sea Surface Temperature" + + # SIC: Update variable long names: + ds_si["siconc"].attrs["long_name"] = "Daily Mean Sea Ice Area Fraction" + ds_si["siconc_var"].attrs["long_name"] = "Daily Variance Sea Ice Area Fraction" + ds_si["siconc_min"].attrs["long_name"] = "Daily Minimum Sea Ice Area Fraction" + ds_si["siconc_max"].attrs["long_name"] = "Daily Maximum Sea Ice Area Fraction" + + # Merge SST and SIC datasets: + ds = xr.merge([ds_sst, ds_si], compat="override", join="override") + + # Add ancillary variables: + ds['mask'] = compute_land_sea_mask(ds['tos'].isel(time=0)) + ds['dx'] = compute_dx(ds) + ds['dy'] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + + # Update global attributes: + ds.attrs.clear() + ds = ds.assign_attrs({ + "Conventions": "CF-1.7", + "title": "ERA5 Sea Surface Monthly Timeseries", + "description": "ERA5 monthly sea surface temperature and sea ice area fraction timeseries.", + "source": "Numerical models: IFS Cy41r2 and 4D-Var data assimilation with prescribed sea surface temperature and sea ice concentration. Satellite observations: HadISST2.1.1.0, OSTIA, OSI SAF.", + "dataset_type": "reanalysis", + "product_type": "timeseries", + "product_version": "1.0", + "institution": "European Centre for Medium-Range Weather Forecasts (ECMWF)", + "citation": "Copernicus Climate Change Service, Climate Data Store, (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). DOI: 10.24381/cds.adbb2d47 (Accessed on 20-05-2026).", + "references": "Hersbach, H., Bell, B., Berrisford, P., Biavati, G., Horányi, A., Muñoz Sabater, J., Nicolas, J., Peubey, C., Radu, R., Rozum, I., Schepers, D., Simmons, A., Soci, C., Dee, D., Thépaut, J-N. (2023): ERA5 hourly data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS), DOI: 10.24381/cds.adbb2d47.", + "acknowledgement": "Generated using or contains modified Copernicus Climate Change Service information . Neither the European Commission nor ECMWF is responsible for any use that may be made of the Copernicus information or data it contains.", + "license": "ERA5 data were obtained from https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels and are provided under a Creative Commons CC-BY-4.0 License https://creativecommons.org/licenses/by/4.0/", + "doi": "10.24381/cds.adbb2d47", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "31 km", + "aggregation": "mean, variance, min, max", + "aggregation_frequency": "monthly", + "status": "completed", + "update_frequency": "None", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + # Optimise chunk sizes for spatial analysis: + ds = ds.chunk({'time': 1, 'latitude': 721, 'longitude': 1440}) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding['compressors'] = [blosccodec] + + # Define prefix and commit message based on climatology period: + prefix = "era5_monthly_timeseries" + commit_message = "Add ERA5 Sea Surface Monthly Timeseries (2000-01-2026-05)." + + # Dask LocalCluster configuration: + config_kwargs = { + "temporary_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/", + "local_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/ERA5/" + } + cluster_kwargs = { + "n_workers" : 25, + "threads_per_worker" : 1, + "memory_limit":"4GB" + } + + update_icechunk( + file=ds, + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + append_dim='time', + branch=branch, + commit_message=commit_message, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/HadISST/download_HadISST1_data.sh b/OceanDataStore/data/HadISST/download_HadISST1_data.sh new file mode 100755 index 00000000..a0d0b50b --- /dev/null +++ b/OceanDataStore/data/HadISST/download_HadISST1_data.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# ---------------------------------------------------------------- +# download_HadISST1_data.sh +# +# This script downloads the HadISST1 dataset from the Met Office +# Hadley Centre HadISST website. The files to be downloaded are +# HadISST1_sst.nc.gz & HadISST_ice.nc.gz. +# +# Files will be downloaded into the current directory. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-05-27 +# ---------------------------------------------------------------- +set -euo pipefail + +# --- Inputs --- # +# Output directory for downloaded files: +output_dir="/dssgfs01/scratch/otooth/npd_data/observations/HadISST" + +# Define base URL to HadISST1 dataset: +url=https://www.metoffice.gov.uk/hadobs/hadisst/data + +# --- Main Script --- # +echo "===================================================" +echo " Downloading HadISST1 Dataset" +echo " v0.1.0" +echo " Oliver J. Tooth, NOC" +echo "===================================================" +echo "In Progress: Downloading HadISST1 dataset..." + +# Download the HadISST1 dataset: +echo "-> Downloading HadISST1_sst.nc.gz & HadISST_ice.nc.gz..." +wget -P $output_dir $url/HadISST_sst.nc.gz +wget -P $output_dir $url/HadISST_ice.nc.gz + +# Unzip the files: +echo "-> Unzipping HadISST1 dataset..." +gunzip $output_dir/HadISST_sst.nc.gz +gunzip $output_dir/HadISST_ice.nc.gz + +# Update users via stdout: +echo "...Completed: HadISST1 dataset downloaded and unzipped." diff --git a/OceanDataStore/data/HadISST/run_send_HadISST1_monthly_to_os.slurm b/OceanDataStore/data/HadISST/run_send_HadISST1_monthly_to_os.slurm new file mode 100755 index 00000000..62f1fffa --- /dev/null +++ b/OceanDataStore/data/HadISST/run_send_HadISST1_monthly_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=hadisst1_monthly +#SBATCH --partition=test +#SBATCH --time=00:20:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_send_HadISST1_monthly_to_os.slurm +# +# Description: SLURM script to send the HadISST1 monthly +# time-series dataset to Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-24 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source /dssgfs01/working/otooth/miniforge3/bin/activate +conda activate env_ods + +# -- Send HadISST1 monthly time-series datasets to JASMIN OS -- # +echo "In Progress: Sending HadISST1 monthly time-series to Icechunk..." + +python3 send_HadISST1_monthly_to_os.py + +echo "Completed: Sent HadISST1 monthly time-series to Icechunk." diff --git a/OceanDataStore/data/HadISST/send_HadISST1_monthly_to_os.py b/OceanDataStore/data/HadISST/send_HadISST1_monthly_to_os.py new file mode 100755 index 00000000..b0594a48 --- /dev/null +++ b/OceanDataStore/data/HadISST/send_HadISST1_monthly_to_os.py @@ -0,0 +1,133 @@ +# ========================================================= +# send_HadISST1_monthly_to_os.py +# +# Script to write HadISST1 monthly data to Icechunk +# repository in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import numpy as np +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, send_to_icechunk +from OceanDataStore.data.utils import ( + compute_cell_area, + compute_dx, + compute_dy, + compute_land_sea_mask, +) + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Prepare Data ========== # + # Open HadISST1 monthly dataset: + filedir = "/dssgfs01/scratch/otooth/npd_data/observations/HadISST" + ds = xr.open_dataset(f"{filedir}/HadISST_sst.nc", engine="netcdf4") + ds_si = xr.open_dataset(f"{filedir}/HadISST_ice.nc", engine="netcdf4") + + # Add sea ice concentration to single dataset: + ds['sic'] = ds_si['sic'] + + # Rename variables to standard names: + ds = ds.rename({"sst": "tos", "sic": "siconc"}) + # Fill missing sea surface temperature values with NaNs: + ds['tos'] = xr.where(cond=ds['tos'] == -1000, x=np.nan, y=ds['tos']) + + # Update variable attributes: + ds["tos"].attrs.update({ + "long_name": "Sea Surface Temperature", + }) + ds["siconc"].attrs.update({ + "long_name": "Sea Ice Area Fraction", + }) + + # Update global attributes: + ds.attrs.clear() + + ds = ds.assign_attrs({ + "Conventions": "CF-1.0", + "title": "Hadley Centre Sea Ice and Sea Surface Temperature (HadISST) monthly timeseries.", + "description": "HadISST v1.1 monthly averages of sea surface temperature and sea ice concentration.", + "source": "Numerical models: Reduced Space Optimal Interpolation. In-situ observations: Met Office Marine Data Bank (MDB), Comprehensive Ocean-Atmosphere Data Set (COADS). Satellite observations: Advanced Very High Resolution Radiometer (AVHRR).", + "dataset_type": "observation", + "product_type": "timeseries", + "product_version": "1.1", + "institution": "Met Office, UK", + "citation": "Rayner, N. A., Parker, D. E., Horton, E. B., Folland, C. K., Alexander, L. V., Rowell, D. P., Kent, E. C., Kaplan, A. Global analyses of sea surface temperature, sea ice, and night marine air temperature since the late nineteenth century J. Geophys. Res.Vol. 108, No. D14, 4407 10.1029/2002JD002670.", + "references": "Rayner, N. A., Parker, D. E., Horton, E. B., Folland, C. K., Alexander, L. V., Rowell, D. P., Kent, E. C., Kaplan, A. Global analyses of sea surface temperature, sea ice, and night marine air temperature since the late nineteenth century J. Geophys. Res.Vol. 108, No. D14, 4407 10.1029/2002JD002670.", + "acknowledgement": "None", + "license": "HadISST1.1 data were obtained from https://www.metoffice.gov.uk/hadobs/hadisst/ and are © Crown Copyright, Met Office, [2026], provided under a Non-Commercial Government Licence http://www.nationalarchives.gov.uk/doc/non-commercial-government-licence/version/2/.", + "doi": "None", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "1 degree", + "aggregation": "mean", + "aggregation_frequency": "monthly", + "status": "ongoing", + "update_frequency": "quarterly", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + # Add ancillary variables: + ds['mask'] = compute_land_sea_mask(ds['tos'].isel(time=0)) + ds['dx'] = compute_dx(ds) + ds['dy'] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + + # Add Northern and Southern Hemisphere sea ice area timeseries: + ds['siarea_NH'] = (ds['siconc'].where(ds['latitude'] > 0) * ds['cell_area']).sum(dim=['latitude', 'longitude']) + ds['siarea_NH'].attrs = {'long_name': 'Total Northern Hemisphere Sea Ice Area', 'standard_name': 'sea_ice_area', 'units': 'm2'} + + ds['siarea_SH'] = (ds['siconc'].where(ds['latitude'] < 0) * ds['cell_area']).sum(dim=['latitude', 'longitude']) + ds['siarea_SH'].attrs = {'long_name': 'Total Southern Hemisphere Sea Ice Area', 'standard_name': 'sea_ice_area', 'units': 'm2'} + + # ========== Send to Icechunk Repository ========== # + bucket = "hadisst" + prefix = "hadisst_v1.1_monthly" + exists = False + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + commit_message = "Added HadISST1 sea surface temperature and sea ice concentration monthly (1870-01-2026-04)." + variable_commits = True + config_kwargs = { + "temporary_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/HadISST/", + "local_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/HadISST/" + } + cluster_kwargs = { + "n_workers" : 15, + "threads_per_worker" : 1, + "memory_limit":"3GB" + } + + # Optimise chunk sizes for spatial analysis: + ds = ds.chunk({'time': 30, 'latitude': 180, 'longitude': 360}) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding['compressors'] = [blosccodec] + + send_to_icechunk( + file=ds, + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + exists=exists, + append_dim='time', + branch=branch, + commit_message=commit_message, + variable_commits=variable_commits, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/OISST/create_OISSTv2_daily_climatology.py b/OceanDataStore/data/OISST/create_OISSTv2_daily_climatology.py new file mode 100755 index 00000000..68a0c740 --- /dev/null +++ b/OceanDataStore/data/OISST/create_OISSTv2_daily_climatology.py @@ -0,0 +1,83 @@ +import argparse +import glob + +import dask +import numpy as np +import xarray as xr +from dask.distributed import Client, LocalCluster + + +def main( + year_start=1991, + year_end=2020, + data_path="/dssgfs01/scratch/otooth/npd_data/observations/OISST/daily/", + output="./sst.daily.climatology.nc", + dask_cluster_kwargs={ + "n_workers" : 8, + "threads_per_worker" : 1, + "memory_limit":"10GB" + }, + dask_config_kwargs={ + "temporary_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/OISST/", + "local_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/OISST/" + } + ): + xr.set_options(use_flox=True) + + if dask_config_kwargs is not None: + dask.config.set(dask_config_kwargs) + + with LocalCluster(**dask_cluster_kwargs) as cluster, Client(cluster) as client: + print(f"Created LocalCluster with {dask_cluster_kwargs['n_workers']} workers @ Client: {client.dashboard_link}") + + # Find all SST files + files = sorted(glob.glob(f"{data_path}/sst.day.mean.????.nc")) + selected_files = [file for file in files if int(file[-7:-3]) >= year_start and int(file[-7:-3]) <= year_end] + print(f"Selected files for climatology computation: {selected_files}", flush=True) + + # Open multiple files + ds = xr.open_mfdataset(selected_files, combine="by_coords", + parallel=True, engine='h5netcdf', + chunks={"time": 31, "latitude": 720, "longitude": 360}, + preprocess=lambda ds: ds['sst'] + ) + + # Compute daily climatology (day of year) + ds = ds.chunk({ + "time": -1, + "lat": 100, + "lon": 100, + }) + g_sst = ds["sst"].groupby("time.dayofyear") # Group once for readability + + mean = g_sst.mean("time") + mean = mean.persist() + + p10 = g_sst.quantile(0.10, dim="time") + p10 = p10.persist() + + p90 = g_sst.quantile(0.90, dim="time") + p90 = p90.persist() + + # Build output dataset + clim = xr.Dataset() + clim["sst_mean"] = mean + clim["sst_p10"] = p10.astype(np.float32) + clim["sst_p90"] = p90.astype(np.float32) + + # Save output + print(f"In Progress: Saving Climatology to {output}") + clim.to_netcdf(output, engine='h5netcdf', mode='w') + print(f"Completed: Climatology saved to {output}", flush=True) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Compute OISST v2.1 SST Daily Climatology") + parser.add_argument("--year_start", type=int, default=1996, help="Start year for climatology") + parser.add_argument("--year_end", type=int, default=2025, help="End year for climatology") + parser.add_argument("--data_path", default="/dssgfs01/scratch/otooth/npd_data/observations/OISST/daily", help="Directory containing SST files") + parser.add_argument("--output", default="./sst.daily.climatology.nc", help="Output file") + + args = parser.parse_args() + + main(args.year_start, args.year_end, args.data_path, args.output) diff --git a/OceanDataStore/data/OISST/download_oisstv2_data.sh b/OceanDataStore/data/OISST/download_oisstv2_data.sh new file mode 100644 index 00000000..bc556c5a --- /dev/null +++ b/OceanDataStore/data/OISST/download_oisstv2_data.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# ---------------------------------------------------------------- +# download_OISSTv2_data.sh +# +# Description: Download the OISSTv2 dataset from the +# NOAA website: +# https://psl.noaa.gov/data/gridded/data.noaa.oisst.v2.highres.html +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-24 +# ---------------------------------------------------------------- +set -euo pipefail + +# --- Inputs --- # +output_dir="/dssgfs01/scratch/otooth/npd_data/observations/OISST/daily/" + +# -- Defaults -- # +base_url="https://downloads.psl.noaa.gov//Datasets/noaa.oisst.v2.highres" + +# --- Main Script --- # +echo "===================================================" +echo " Downloading OISSTv2 Data" +echo " v0.1.0" +echo " Oliver J. Tooth, NOC" +echo "===================================================" +echo "In Progress: Downloading OISSTv2 dataset..." +# Iterate over years: +for yr in {2012..2026}; do + # Construct URL for current year: + url="$base_url/sst.day.mean.${yr}.nc" + + # Download file if not in output directory: + filepath="$output_dir/$(basename $url)" + if [ ! -f "$filepath" ]; then + wget -P $output_dir $url + echo "-> Completed: Downloaded $filepath." + else + echo "-> Skipping Download: NetCDF file for ${yr} already exists in $output_dir." + fi +done + +echo "===================================================" diff --git a/OceanDataStore/data/OISST/run_create_OISSTv2_daily_climatology.slurm b/OceanDataStore/data/OISST/run_create_OISSTv2_daily_climatology.slurm new file mode 100755 index 00000000..4efa3987 --- /dev/null +++ b/OceanDataStore/data/OISST/run_create_OISSTv2_daily_climatology.slurm @@ -0,0 +1,44 @@ +#!/bin/bash +#SBATCH --job-name=oisstv2_daily_climatology +#SBATCH --partition=compute +#SBATCH --time=05:00:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_create_OISSTv2_daily_climatology.slurm +# +# Description: SLURM script to create the OISSTv2.1 daily +# climatology datasets. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-25 +# +# ============================================================== +set -euo pipefail +TIME1=`date +%s` + +# -- Python Environment -- # +# Activate miniconda environment: +source /dssgfs01/working/otooth/miniforge3/bin/activate +conda activate env_ods + +# -- Create OISSTv2.1 daily climatology datasets -- # +dpath="/dssgfs01/scratch/otooth/npd_data/observations/OISST/daily/" +opath="/dssgfs01/scratch/otooth/npd_data/observations/OISST/climatology" +year_start=1996 +year_end=2025 + +echo "Start year : " $year_start +echo "End year : " $year_end +echo "Output file : " $opath/OISST_v2.1_sst_climatology_${year_start}-${year_end}.nc + +python3 create_OISSTv2_daily_climatology.py --year_start $year_start --year_end $year_end --data_path $dpath --output $opath/oisst_climatology_${year_start}-${year_end}.nc + +# -- Report Job Timing -- # +TIME2=`date +%s` +DIFFSEC=`expr ${TIME2} - ${TIME1}` +echo Job Completed in: ${DIFFSEC} seconds. +echo Job Took: `date +%H:%M:%S -ud @${DIFFSEC}` diff --git a/OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm b/OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm index aa9181cc..625a2b66 100755 --- a/OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm +++ b/OceanDataStore/data/OISST/run_send_OISSTv2_daily_climatology_to_os.slurm @@ -21,7 +21,7 @@ set -euo pipefail # -- Python Environment -- # # Activate miniconda environment: -source .../miniforge3/bin/activate +source /dssgfs01/working/otooth/miniforge3/bin/activate conda activate env_ods # -- Send OISSTv2.1 daily climatology datasets to JASMIN OS -- # diff --git a/OceanDataStore/data/OISST/run_send_OISSTv2_daily_to_os.slurm b/OceanDataStore/data/OISST/run_send_OISSTv2_daily_to_os.slurm new file mode 100755 index 00000000..ebbf6fe2 --- /dev/null +++ b/OceanDataStore/data/OISST/run_send_OISSTv2_daily_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=send_oisstv2_daily +#SBATCH --partition=compute +#SBATCH --time=03:00:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_send_OISSTv2_daily_to_os.slurm +# +# Description: SLURM script to send the OISSTv2.1 daily +# time-series dataset to Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-09 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source /dssgfs01/working/otooth/miniforge3/bin/activate +conda activate env_ods + +# -- Send OISSTv2.1 daily time-series datasets to JASMIN OS -- # +echo "In Progress: Sending OISSTv2.1 Daily time-series to Icechunk..." + +python3 send_OISSTv2_daily_to_os.py + +echo "Completed: Sent OISSTv2.1 Daily time-series to Icechunk." diff --git a/OceanDataStore/data/OISST/run_update_OISSTv2_daily_to_os.slurm b/OceanDataStore/data/OISST/run_update_OISSTv2_daily_to_os.slurm new file mode 100755 index 00000000..086c0b25 --- /dev/null +++ b/OceanDataStore/data/OISST/run_update_OISSTv2_daily_to_os.slurm @@ -0,0 +1,32 @@ +#!/bin/bash +#SBATCH --job-name=update_oisstv2_daily +#SBATCH --partition=compute +#SBATCH --time=03:00:00 +#SBATCH --ntasks-per-core=1 +#SBATCH --ntasks-per-node=64 +#SBATCH --ntasks-per-socket=32 +#SBATCH --nodes=1 + +# ============================================================== +# run_update_OISSTv2_daily_to_os.slurm +# +# Description: SLURM script to update the OISSTv2.1 daily +# time-series dataset in Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# Created On: 2026-06-09 +# +# ============================================================== +set -euo pipefail + +# -- Python Environment -- # +# Activate miniconda environment: +source /dssgfs01/working/otooth/miniforge3/bin/activate +conda activate env_ods + +# -- Update OISSTv2.1 daily time-series datasets in JASMIN OS -- # +echo "In Progress: Updating OISSTv2.1 Daily time-series in Icechunk..." + +python3 update_OISSTv2_daily_to_os.py + +echo "Completed: Updated OISSTv2.1 Daily time-series in Icechunk." diff --git a/OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py b/OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py index 671a1da6..c97793e6 100755 --- a/OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py +++ b/OceanDataStore/data/OISST/send_OISSTv2_daily_climatology_to_os.py @@ -40,10 +40,9 @@ def main(): logging.info(f"In Progress: Sending OISSTv2.1 daily climatology for {start_yr}-{end_yr} to Icechunk...") # Open OISSTv2 dataset: - filepaths = [f"/dssgfs01/scratch/otooth/npd_data/observations/OISST/icec.day.mean.ltm.{start_yr}-{end_yr}.nc", - f"/dssgfs01/scratch/otooth/npd_data/observations/OISST/sst.day.mean.ltm.{start_yr}-{end_yr}.nc" - ] - ds = xr.merge([xr.open_dataset(filepath, decode_times=False).drop_vars("valid_yr_count") for filepath in filepaths], compat="no_conflicts") + filepath = f"/dssgfs01/scratch/otooth/npd_data/observations/OISST/climatology/oisst_climatology_{start_yr}-{end_yr}.nc" + ds = xr.open_dataset(filepath, engine="h5netcdf") + # Open OISSTv2 land-sea mask dataset: ds_mask = xr.open_dataset("http://psl.noaa.gov/thredds/dodsC/Datasets/noaa.oisst.v2.highres/lsmask.oisst.nc", decode_times=False) ds_mask = ds_mask.squeeze(drop=True).rename({"lon": "longitude", "lat": "latitude", "lsmask": "mask"}) @@ -52,7 +51,7 @@ def main(): ) # Standardise coordinate dimension names: - ds = ds.rename({"lon": "longitude", "lat": "latitude", "time": "day"}) + ds = ds.rename({"lon": "longitude", "lat": "latitude", "dayofyear": "day"}) # Update longitude coordinates to be in the range [-180, 180]: ds = ds.assign_coords( @@ -60,21 +59,21 @@ def main(): ) ds = ds.sortby("longitude") - # Add day of year coordinate (1-365): + # Add day of year coordinate (1-366): ds = ds.assign_coords( - day=np.arange(1, 366) + day=np.arange(1, 367) ) # Rename variables to standard names: - ds = ds.rename({"sst": "tos", - "icec": "siconc", - "climatology_bounds": "time_bnds", + ds = ds.rename({"sst_mean": "tos_mean", + "sst_p10": "tos_p10", + "sst_p90": "tos_p90", }) # Add standard names and units: - ds["tos"].attrs["standard_name"] = "sea_surface_temperature" - ds["siconc"].attrs["standard_name"] = "sea_ice_area_fraction" - ds["siconc"].attrs["units"] = "1" + ds["tos_mean"].attrs["long_name"] = "Daily Mean Sea Surface Temperature Climatology" + ds["tos_p10"].attrs["long_name"] = "Daily 10th Percentile Sea Surface Temperature Climatology" + ds["tos_p90"].attrs["long_name"] = "Daily 90th Percentile Sea Surface Temperature Climatology" # Add OISSTv2 land mask: ds["mask"] = ds_mask["mask"] @@ -90,7 +89,11 @@ def main(): ds['cell_area'] = compute_cell_area(ds) # Update time bounds to reflect climatological period: - ds['time_bnds'] = ds['time_bnds'].astype('datetime64[ns]') + ds['time_bnds'] = xr.DataArray( + np.zeros((ds['day'].size, 2), dtype='datetime64[ns]'), + dims=('day', 'bnds'), + coords={'day': ds['day']}, + ) ds['time_bnds'].data[:, 0] = (np.datetime64(f'{start_yr}-01', 'M') + (np.timedelta64(1, 'D') * np.arange(ds['day'].size))).astype('datetime64[ns]') ds['time_bnds'].data[:, 1] = (np.datetime64(f'{end_yr}-01', 'M') + (np.timedelta64(1, 'D') * np.arange(ds['day'].size))).astype('datetime64[ns]') ds.time_bnds.attrs.clear() @@ -100,7 +103,7 @@ def main(): ds = ds.assign_attrs({ "Conventions": "CF-1.5", "title": f"NOAA OISSTv2.1 Daily Climatology ({start_yr}-{end_yr})", - "description": f"NOAA 1/4° Daily Optimum Interpolation Sea Surface Temperature (OISST) version 2.1 daily sea surface temperature and sea ice fraction climatology ({start_yr}-{end_yr}).", + "description": f"NOAA 1/4° Daily Optimum Interpolation Sea Surface Temperature (OISST) version 2.1 daily sea surface temperature climatology ({start_yr}-{end_yr}).", "source": "Numerical models: Optimal Interpolation. In-situ observations: ICOADS-D R3.0.2, Argo GDAC. Satellite observations: Advanced Very High Resolution Radiometer (AVHRR).", "dataset_type": "observation", "product_type": "climatology", diff --git a/OceanDataStore/data/OISST/send_OISSTv2_daily_ltm_climatology_to_os.py b/OceanDataStore/data/OISST/send_OISSTv2_daily_ltm_climatology_to_os.py new file mode 100755 index 00000000..671a1da6 --- /dev/null +++ b/OceanDataStore/data/OISST/send_OISSTv2_daily_ltm_climatology_to_os.py @@ -0,0 +1,151 @@ +# ========================================================= +# send_OISSTv2_daily_climatology_to_os.py +# +# Script to write OISST v2.1 long-term daily climatologies +# to Icechunk repositories in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import numpy as np +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, send_to_icechunk +from OceanDataStore.data.utils import ( + compute_cell_area, + compute_dx, + compute_dy, +) + + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Send to Icechunk Repository ========== # + bucket = "oisst" + exists = False + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + variable_commits = True + + # Define climatology period: + start_yr = 1991 + end_yr = 2020 + + logging.info(f"In Progress: Sending OISSTv2.1 daily climatology for {start_yr}-{end_yr} to Icechunk...") + # Open OISSTv2 dataset: + filepaths = [f"/dssgfs01/scratch/otooth/npd_data/observations/OISST/icec.day.mean.ltm.{start_yr}-{end_yr}.nc", + f"/dssgfs01/scratch/otooth/npd_data/observations/OISST/sst.day.mean.ltm.{start_yr}-{end_yr}.nc" + ] + ds = xr.merge([xr.open_dataset(filepath, decode_times=False).drop_vars("valid_yr_count") for filepath in filepaths], compat="no_conflicts") + # Open OISSTv2 land-sea mask dataset: + ds_mask = xr.open_dataset("http://psl.noaa.gov/thredds/dodsC/Datasets/noaa.oisst.v2.highres/lsmask.oisst.nc", decode_times=False) + ds_mask = ds_mask.squeeze(drop=True).rename({"lon": "longitude", "lat": "latitude", "lsmask": "mask"}) + ds_mask = ds_mask.assign_coords( + longitude=((ds_mask["longitude"] + 180) % 360) - 180 + ) + + # Standardise coordinate dimension names: + ds = ds.rename({"lon": "longitude", "lat": "latitude", "time": "day"}) + + # Update longitude coordinates to be in the range [-180, 180]: + ds = ds.assign_coords( + longitude=((ds["longitude"] + 180) % 360) - 180 + ) + ds = ds.sortby("longitude") + + # Add day of year coordinate (1-365): + ds = ds.assign_coords( + day=np.arange(1, 366) + ) + + # Rename variables to standard names: + ds = ds.rename({"sst": "tos", + "icec": "siconc", + "climatology_bounds": "time_bnds", + }) + + # Add standard names and units: + ds["tos"].attrs["standard_name"] = "sea_surface_temperature" + ds["siconc"].attrs["standard_name"] = "sea_ice_area_fraction" + ds["siconc"].attrs["units"] = "1" + + # Add OISSTv2 land mask: + ds["mask"] = ds_mask["mask"] + ds["mask"].attrs.clear() + ds["mask"] = ds["mask"].assign_attrs({'long_name': "Land-Sea Binary Mask", + "standard_name": "sea_binary_mask", + "comment": "1 = sea, 0 = land" + }) + + # Add horizontal grid cell area: + ds['dx'] = compute_dx(ds) + ds['dy'] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + + # Update time bounds to reflect climatological period: + ds['time_bnds'] = ds['time_bnds'].astype('datetime64[ns]') + ds['time_bnds'].data[:, 0] = (np.datetime64(f'{start_yr}-01', 'M') + (np.timedelta64(1, 'D') * np.arange(ds['day'].size))).astype('datetime64[ns]') + ds['time_bnds'].data[:, 1] = (np.datetime64(f'{end_yr}-01', 'M') + (np.timedelta64(1, 'D') * np.arange(ds['day'].size))).astype('datetime64[ns]') + ds.time_bnds.attrs.clear() + + # Update global attributes: + ds.attrs.clear() + ds = ds.assign_attrs({ + "Conventions": "CF-1.5", + "title": f"NOAA OISSTv2.1 Daily Climatology ({start_yr}-{end_yr})", + "description": f"NOAA 1/4° Daily Optimum Interpolation Sea Surface Temperature (OISST) version 2.1 daily sea surface temperature and sea ice fraction climatology ({start_yr}-{end_yr}).", + "source": "Numerical models: Optimal Interpolation. In-situ observations: ICOADS-D R3.0.2, Argo GDAC. Satellite observations: Advanced Very High Resolution Radiometer (AVHRR).", + "dataset_type": "observation", + "product_type": "climatology", + "product_version": "2.1", + "institution": "NOAA National Centers for Environmental Information (NCEI)", + "citation": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2021: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1", + "references": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2020: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1. Banzon, V., Smith, T. M., Chin, T. M., Liu, C., and Hankins, W., 2016: A long-term record of blended satellite and in situ sea-surface temperature for climate monitoring, modeling and environmental studies. Earth Syst. Sci. Data, 8, 165-176, doi:10.5194/essd-8-165-2016. Reynolds, R. W., T. M. Smith, C. Liu, D. B. Chelton, K. S. Casey, and M. G. Schlax, 2007: Daily high-resolution-blended analyses for sea surface temperature. Journal of Climate, 20, 5473-5496, doi:10.1175/JCLI-D-14-00293.1", + "acknowledgement": "NOAA OI SST V2 High Resolution Dataset data provided by the NOAA PSL, Boulder, Colorado, USA, from their website at https://psl.noaa.gov.", + "license": "OISST v2.1 data were obtained from https://psl.noaa.gov/data/gridded/data.noaa.oisst.v2.highres.html and are provided under a Creative Commons CC0 1.0 Universal License https://creativecommons.org/publicdomain/zero/1.0/", + "doi": "10.1175/JCLI-D-20-0166.1", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "0.25 degree", + "aggregation": "mean", + "aggregation_frequency": "daily", + "status": "completed", + "update_frequency": "None", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + # Optimise chunk sizes for spatial analysis: + ds = ds.chunk({'day': 5, 'latitude': 720, 'longitude': 1440}) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding['compressors'] = [blosccodec] + + # Define prefix and commit message based on climatology period: + prefix = f"oisst_v2.1_{start_yr}_{end_yr}_daily_climatology" + commit_message = f"Added OISSTv2.1 Sea Surface Temperature Climatology ({start_yr}-{end_yr})." + + send_to_icechunk( + file=ds, + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + exists=exists, + append_dim='day', + branch=branch, + commit_message=commit_message, + variable_commits=variable_commits, + dask_config_kwargs=None, + dask_cluster_kwargs=None, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/OISST/send_OISSTv2_daily_to_os.py b/OceanDataStore/data/OISST/send_OISSTv2_daily_to_os.py new file mode 100755 index 00000000..95e40598 --- /dev/null +++ b/OceanDataStore/data/OISST/send_OISSTv2_daily_to_os.py @@ -0,0 +1,142 @@ +# ========================================================= +# send_OISSTv2_daily_to_os.py +# +# Script to write OISST v2.1 daily mean time series +# to Icechunk repositories in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, send_to_icechunk +from OceanDataStore.data.utils import ( + compute_cell_area, + compute_dx, + compute_dy, +) + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Send to Icechunk Repository ========== # + bucket = "oisst" + exists = False + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + variable_commits = True + config_kwargs = { + "temporary_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/OISST/", + "local_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/OISST/" + } + cluster_kwargs = { + "n_workers" : 15, + "threads_per_worker" : 1, + "memory_limit":"6GB" + } + + logging.info("In Progress: Sending OISSTv2.1 daily mean time series to Icechunk...") + # Open OISSTv2 dataset: + filepaths = "/dssgfs01/scratch/otooth/npd_data/observations/OISST/daily/sst.day.mean.198?.nc" + ds = xr.open_mfdataset(filepaths, + combine="by_coords", + data_vars="all", + engine="h5netcdf", + ) + + # Open OISSTv2 land-sea mask dataset: + ds_mask = xr.open_dataset("http://psl.noaa.gov/thredds/dodsC/Datasets/noaa.oisst.v2.highres/lsmask.oisst.nc", decode_times=False) + ds_mask = ds_mask.squeeze(drop=True).rename({"lon": "longitude", "lat": "latitude", "lsmask": "mask"}) + ds_mask = ds_mask.assign_coords( + longitude=((ds_mask["longitude"] + 180) % 360) - 180 + ) + + # Standardise coordinate dimension names: + ds = ds.rename({"lon": "longitude", "lat": "latitude"}) + + # Update longitude coordinates to be in the range [-180, 180]: + ds = ds.assign_coords( + longitude=((ds["longitude"] + 180) % 360) - 180 + ) + ds = ds.sortby("longitude") + + # Rename variables to standard names: + ds = ds.rename({"sst": "tos"}) + + # Add standard names and units: + ds["tos"].attrs["standard_name"] = "sea_surface_temperature" + + # Add OISSTv2 land mask: + ds["mask"] = ds_mask["mask"] + ds["mask"].attrs.clear() + ds["mask"] = ds["mask"].assign_attrs({"long_name": "Land-Sea Binary Mask", + "standard_name": "sea_binary_mask", + "comment": "1 = sea, 0 = land" + }) + + # Add horizontal grid cell area: + ds["dx"] = compute_dx(ds) + ds["dy"] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + + # Update global attributes: + ds.attrs.clear() + ds = ds.assign_attrs({ + "Conventions": "CF-1.5", + "title": "NOAA OISSTv2.1 Daily Timeseries", + "description": "NOAA 1/4° Daily Optimum Interpolation Sea Surface Temperature (OISST) version 2.1 daily sea surface temperature timeseries.", + "source": "Numerical models: Optimal Interpolation. In-situ observations: ICOADS-D R3.0.2, Argo GDAC. Satellite observations: Advanced Very High Resolution Radiometer (AVHRR).", + "dataset_type": "observation", + "product_type": "timeseries", + "product_version": "2.1", + "institution": "NOAA National Centers for Environmental Information (NCEI)", + "citation": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2021: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1", + "references": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2020: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1. Banzon, V., Smith, T. M., Chin, T. M., Liu, C., and Hankins, W., 2016: A long-term record of blended satellite and in situ sea-surface temperature for climate monitoring, modeling and environmental studies. Earth Syst. Sci. Data, 8, 165-176, doi:10.5194/essd-8-165-2016. Reynolds, R. W., T. M. Smith, C. Liu, D. B. Chelton, K. S. Casey, and M. G. Schlax, 2007: Daily high-resolution-blended analyses for sea surface temperature. Journal of Climate, 20, 5473-5496, doi:10.1175/JCLI-D-14-00293.1", + "acknowledgement": "NOAA OI SST V2 High Resolution Dataset data provided by the NOAA PSL, Boulder, Colorado, USA, from their website at https://psl.noaa.gov.", + "license": "OISST v2.1 data were obtained from https://psl.noaa.gov/data/gridded/data.noaa.oisst.v2.highres.html and are provided under a Creative Commons CC0 1.0 Universal License https://creativecommons.org/publicdomain/zero/1.0/", + "doi": "10.1175/JCLI-D-20-0166.1", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "0.25 degree", + "aggregation": "mean", + "aggregation_frequency": "daily", + "status": "ongoing", + "update_frequency": "quarterly", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + # Optimise chunk sizes for time-series analysis: + ds = ds.chunk({'time': ds['time'].size, 'latitude': 50, 'longitude': 50}) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding.clear() + ds[var].encoding['compressors'] = [blosccodec] + + # Define prefix and commit message based on climatology period: + prefix = "oisst_v2.1_daily" + commit_message = "Added OISSTv2.1 Sea Surface Temperature Daily Timeseries (1981-09-1989-12)." + + send_to_icechunk( + file=ds, + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + exists=exists, + append_dim='time', + branch=branch, + commit_message=commit_message, + variable_commits=variable_commits, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/OISST/update_OISSTv2_daily_to_os.py b/OceanDataStore/data/OISST/update_OISSTv2_daily_to_os.py new file mode 100755 index 00000000..1909cd5d --- /dev/null +++ b/OceanDataStore/data/OISST/update_OISSTv2_daily_to_os.py @@ -0,0 +1,142 @@ +# ========================================================= +# update_OISSTv2_daily_to_os.py +# +# Script to write OISST v2.1 daily mean timeseries +# to Icechunk repositories in JASMIN cloud object storage. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging +from pathlib import Path + +import xarray as xr +import zarr + +from OceanDataStore.cli import initialise_logging, update_icechunk +from OceanDataStore.data.utils import ( + compute_cell_area, + compute_dx, + compute_dy, +) + +logger = logging.getLogger(__name__) + + +def main(): + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========== Update Icechunk Repository ========== # + bucket = "oisst" + store_credentials_json = ".../credentials/jasmin_os_credentials.json" + branch = "main" + config_kwargs = { + "temporary_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/OISST/", + "local_directory":"/dssgfs01/working/otooth/Software/OceanDataStore/OceanDataStore/data/OISST/" + } + cluster_kwargs = { + "n_workers" : 15, + "threads_per_worker" : 1, + "memory_limit":"6GB" + } + + logging.info("In Progress: Updating OISSTv2.1 daily mean time series in Icechunk...") + # Open OISSTv2 dataset: + filepaths = [] + base = Path("/dssgfs01/scratch/otooth/npd_data/observations/OISST/daily/") + for year in range(2026, 2027): + filepaths.extend(sorted(base.glob(f"sst.day.mean.{year}.nc"))) + ds = xr.open_mfdataset(filepaths, + combine="by_coords", + data_vars="all", + engine="h5netcdf", + ) + + # Open OISSTv2 land-sea mask dataset: + ds_mask = xr.open_dataset("http://psl.noaa.gov/thredds/dodsC/Datasets/noaa.oisst.v2.highres/lsmask.oisst.nc", decode_times=False) + ds_mask = ds_mask.squeeze(drop=True).rename({"lon": "longitude", "lat": "latitude", "lsmask": "mask"}) + ds_mask = ds_mask.assign_coords( + longitude=((ds_mask["longitude"] + 180) % 360) - 180 + ) + + # Standardise coordinate dimension names: + ds = ds.rename({"lon": "longitude", "lat": "latitude"}) + + # Update longitude coordinates to be in the range [-180, 180]: + ds = ds.assign_coords( + longitude=((ds["longitude"] + 180) % 360) - 180 + ) + ds = ds.sortby("longitude") + + # Rename variables to standard names: + ds = ds.rename({"sst": "tos"}) + + # Add standard names and units: + ds["tos"].attrs["standard_name"] = "sea_surface_temperature" + + # Add OISSTv2 land mask: + ds["mask"] = ds_mask["mask"] + ds["mask"].attrs.clear() + ds["mask"] = ds["mask"].assign_attrs({"long_name": "Land-Sea Binary Mask", + "standard_name": "sea_binary_mask", + "comment": "1 = sea, 0 = land" + }) + + # Add horizontal grid cell area: + ds["dx"] = compute_dx(ds) + ds["dy"] = compute_dy(ds) + ds['cell_area'] = compute_cell_area(ds) + + # Update global attributes: + ds.attrs.clear() + ds = ds.assign_attrs({ + "Conventions": "CF-1.5", + "title": "NOAA OISSTv2.1 Daily Timeseries", + "description": "NOAA 1/4° Daily Optimum Interpolation Sea Surface Temperature (OISST) version 2.1 daily sea surface temperature timeseries.", + "source": "Numerical models: Optimal Interpolation. In-situ observations: ICOADS-D R3.0.2, Argo GDAC. Satellite observations: Advanced Very High Resolution Radiometer (AVHRR).", + "dataset_type": "observation", + "product_type": "timeseries", + "product_version": "2.1", + "institution": "NOAA National Centers for Environmental Information (NCEI)", + "citation": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2021: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1", + "references": "Huang, B., C. Liu, V. Banzon, E. Freeman, G. Graham, B. Hankins, T. Smith, and H.-M. Zhang, 2020: Improvements of the Daily Optimum Interpolation Sea Surface Temperature (DOISST) Version 2.1, Journal of Climate, 34, 2923-2939. doi: 10.1175/JCLI-D-20-0166.1. Banzon, V., Smith, T. M., Chin, T. M., Liu, C., and Hankins, W., 2016: A long-term record of blended satellite and in situ sea-surface temperature for climate monitoring, modeling and environmental studies. Earth Syst. Sci. Data, 8, 165-176, doi:10.5194/essd-8-165-2016. Reynolds, R. W., T. M. Smith, C. Liu, D. B. Chelton, K. S. Casey, and M. G. Schlax, 2007: Daily high-resolution-blended analyses for sea surface temperature. Journal of Climate, 20, 5473-5496, doi:10.1175/JCLI-D-14-00293.1", + "acknowledgement": "NOAA OI SST V2 High Resolution Dataset data provided by the NOAA PSL, Boulder, Colorado, USA, from their website at https://psl.noaa.gov.", + "license": "OISST v2.1 data were obtained from https://psl.noaa.gov/data/gridded/data.noaa.oisst.v2.highres.html and are provided under a Creative Commons CC0 1.0 Universal License https://creativecommons.org/publicdomain/zero/1.0/", + "doi": "10.1175/JCLI-D-20-0166.1", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "0.25 degree", + "aggregation": "mean", + "aggregation_frequency": "daily", + "status": "ongoing", + "update_frequency": "quarterly", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + }) + + # Optimise chunk sizes for time-series analysis: + ds = ds.chunk({'time': ds['time'].size, 'latitude': 50, 'longitude': 50}) + + # Update variable encodings: + blosccodec = zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.shuffle) + for var in list(ds.data_vars) + list(ds.coords): + ds[var].encoding.clear() + ds[var].encoding['compressors'] = [blosccodec] + + # Define prefix and commit message based on climatology period: + prefix = "oisst_v2.1_daily" + commit_message = "Added OISSTv2.1 Sea Surface Temperature Daily Timeseries (2026-01-2026-06)." + + update_icechunk( + file=ds, + bucket=bucket, + object_prefix=prefix, + store_credentials_json=store_credentials_json, + append_dim='time', + branch=branch, + commit_message=commit_message, + dask_config_kwargs=config_kwargs, + dask_cluster_kwargs=cluster_kwargs, + ) + +if __name__ == "__main__": + main() diff --git a/OceanDataStore/data/update_icechunk_repo_attrs.py b/OceanDataStore/data/update_icechunk_repo_attrs.py new file mode 100644 index 00000000..63ef0e89 --- /dev/null +++ b/OceanDataStore/data/update_icechunk_repo_attrs.py @@ -0,0 +1,76 @@ +# ========================================================= +# update_icechunk_repo_attrs.py +# +# Script to update global and variable attributes in an +# Icechunk repository. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +from OceanDataStore.data.utils import ( + update_icechunk_global_attrs, + update_icechunk_variable_attrs, +) + +# ========= Shared Inputs ========= # +# Define credential to write to JASMIN OS: +credentials_filepath = '.../credentials/jasmin_os_credentials.json' + +# Define time period for climatology: +start_year = 1991 +end_year = 2020 + +# Define bucket and prefix to Icechunk repository: +bucket = "armor3d" +prefix = f"armor3d_global_my_{start_year}_{end_year}_monthly_climatogy" + +# ========= Update variable attributes ========= # +vars = ["month"] +attrs = [{"units": "1", "long_name": "Month of Year"}] +message = f"Updated ARMOR3D Monthly Climatology ({start_year}-{end_year}) variable attributes. -> ['month']" + +update_icechunk_variable_attrs( + credentials_filepath=credentials_filepath, + bucket=bucket, + prefix=prefix, + vars=vars, + attrs=attrs, + commit_message=message, + ) + +# ========= Update global attributes ========= # +attrs = { + "Conventions": "CF-1.0", + "title": f"Multi Observation Global Ocean 3D Temperature Salinity Height Geostrophic Current and MLD monthly climatology ({start_year}-{end_year}).", + "description": f"Multi Observation Global Ocean ARMOR3D multi-year reprocessed temperature salinity, sea surface height, geostrophic current and mixed layer depth climatology on 1/8 degree regular grid and 50 depth levels ({start_year}-{end_year}).", + "source": "Numerical models: Multiple Linear Regression, Optimal Interpolation. In-situ observations: Copernicus In Situ TAC (including Argo, XBT, CTD and moorings) Copernicus Sea Level TAC, CNES-CLS22 Mean Dynamic Topography, OSTIA Sea Surface Temperature Analysis, Copernicus MOB TAC (Sea Surface Salinity), and World Ocean Atlas 2018 (WOA18).", + "dataset_type": "observation", + "product_type": "climatology", + "product_version": "2.0", + "institution": "Copernicus Marine Service, Mercator Ocean International, France", + "citation": "Multi Observation Global Ocean 3D Temperature Salinity Height Geostrophic Current and MLD. E.U. Copernicus Marine Service Information (CMEMS). Marine Data Store (MDS). DOI: 10.48670/moi-00052 (Accessed on 21 04 2026).", + "references": "Guinehut S., A.-L. Dhomps, G. Larnicol and P.-Y. Le Traon, 2012: High resolution 3D temperature and salinity fields derived from in situ and satellite observations. Ocean Sci., 8(5):845-857. Mulet, S., M.-H. Rio, A. Mignot, S. Guinehut and R. Morrow, 2012: A new estimate of the global 3D geostrophic ocean circulation based on satellite data and in-situ measurements. Deep Sea Research Part II : Topical Studies in Oceanography, 77-80(0):70-81.", + "acknowledgement": "Generated using E.U. Copernicus Marine Service Information; https://doi.org/10.48670/moi-00052.", + "license": "ARMOR3D data were obtained from https://doi.org/10.48670/moi-00052, and are provided under the Copernicus Marine Environment Monitoring Service Service Level Agreement (SLA) https://marine.copernicus.eu/user-corner/service-commitments-and-licence?pk_vid=42ac3e352be888641780994034c3bb6e", + "doi": "10.48670/moi-00052", + "platform": "gr", + "horizontal_grid_type": "regular rectilinear", + "horizontal_grid_resolution": "0.125 degree", + "vertical_grid_type": "z", + "vertical_grid_coordinate": "depth", + "vertical_grid_levels": 50, + "aggregation": "mean", + "aggregation_frequency": "monthly", + "status": "completed", + "update_frequency": "None", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + } + +message = f"Updated ARMOR3D Monthly Climatology ({start_year}-{end_year}) -> root group attributes." + +update_icechunk_global_attrs( + credentials_filepath=credentials_filepath, + bucket=bucket, + prefix=prefix, + attrs=attrs, + commit_message=message, + ) \ No newline at end of file diff --git a/OceanDataStore/data/update_noc_npd_era5v1_attrs.py b/OceanDataStore/data/update_noc_npd_era5v1_attrs.py new file mode 100755 index 00000000..2f1682f6 --- /dev/null +++ b/OceanDataStore/data/update_noc_npd_era5v1_attrs.py @@ -0,0 +1,172 @@ +# ========================================================= +# update_noc_npd_era5v1_attrs.py +# +# Script to update global and variable attributes in NOC +# Near-Present Day ERA5v1 Icechunk repositories. +# +# Created By: Ollie Tooth (oliver.tooth@noc.ac.uk) +# ========================================================= +import logging + +from OceanDataStore.cli import initialise_logging +from OceanDataStore.data.utils import update_icechunk_global_attrs + + +def main(credentials_filepath: str, + bucket: str, + config_name: str, + nemo_config_name: str, + platform: str, + agg: str, + prefix_list: list + ) -> None: + # ========== Initialise OceanDataStore Logging ========== # + initialise_logging() + + # ========= Update global attributes ========= # + for prefix in prefix_list: + logging.info(f"In Progress: Updating global attributes for {config_name} {prefix}...") + + # Define aggregation frequency from prefix: + if "1y" in prefix: + agg_freq = "annual" + elif "1m" in prefix: + agg_freq = "monthly" + elif "5d" in prefix: + agg_freq = "5-daily" + else: + raise ValueError(f"Unable to determine aggregation frequency from prefix: {prefix}") + + # Define dimensionality from prefix: + if "_3d" in prefix: + dimensionality = "3-dimensional" + elif "_4d" in prefix: + dimensionality = "4-dimensional" + else: + dimensionality = "" + + # Define grid type from prefix: + if "T" in prefix: + grid = "T-grid" + variable_type = "scalar variables" + elif "U" in prefix: + grid = "U-grid" + variable_type = "vector variables" + elif "V" in prefix: + grid = "V-grid" + variable_type = "vector variables" + elif "W" in prefix: + grid = "W-grid" + variable_type = "vector variables" + elif "S" in prefix: + grid = "" + variable_type = "scalar variables" + elif "I" in prefix: + grid = "T-grid" + variable_type = "sea-ice variables" + else: + raise ValueError(f"Unable to determine grid type from prefix: {prefix}") + + # Define resolution from nemo_config_name: + if "eORCA12" in nemo_config_name: + horizontal_grid_resolution = "1/12 degree" + elif "eORCA025" in nemo_config_name: + horizontal_grid_resolution = "1/4 degree" + elif "eORCA1" in nemo_config_name: + horizontal_grid_resolution = "1 degree" + else: + raise ValueError(f"Unable to determine horizontal grid resolution from NEMO configuration name: {nemo_config_name}") + + attrs = { + "Conventions": "CF-1.6", + "title": f"National Oceanography Centre Near-Present Day (NPD) {horizontal_grid_resolution} global ocean physics & sea-ice hindcast.", + "description": f"NOC Near-Present Day {agg_freq} {agg} global ocean physics & sea-ice hindcast forced using bias-corrected ERA5 atmospheric reanalysis {dimensionality} {variable_type} stored on the native {nemo_config_name} curvilinear NEMO model {grid}.", + "dataset_type": "model", + "product_type": "timeseries", + "product_version": "1.0", + "institution": "National Oceanography Centre, UK", + "citation": "Blaker, A. T., Tooth, O. J., Palmiéri, J., Coward, A. C., and Mecking, J. (2025). NOC-MSM/NOC_Near_Present_Day: v0.9.0 (v0.9.0). Zenodo. https://doi.org/10.5281/zenodo.15310354.", + "references": "Blaker, A.T., Tooth, O.J., Palmiéri, J., Coward, A.C., & Mecking, J. (2025). NOC-MSM/NOC_Near_Present_Day: v0.9.0 (v0.9.0). Zenodo. https://doi.org/10.5281/zenodo.15310354. Guiavarc'h, C., Storkey, D., Blaker, A. T., Blockley, E., Megann, A., Hewitt, H., Bell, M. J., Calvert, D., Copsey, D., Sinha, B., Moreton, S., Mathiot, P., and An, B.: GOSI9: UK Global Ocean and Sea Ice configurations, Geosci. Model Dev., 18, 377-403, https://doi.org/10.5194/gmd-18-377-2025, 2025.", + "acknowledgement": "NOC Near-Present Day Documentation available at: https://noc-msm.github.io/NOC_Near_Present_Day/", + "license": "UK Open Government License v3.0", + "doi": "pending", + "platform": platform, + "horizontal_grid_type": "curvilinear", + "horizontal_grid_resolution": horizontal_grid_resolution, + "vertical_grid_type": "zps", + "vertical_grid_coordinate": "depth with partial step topography", + "vertical_grid_levels": 75, + "aggregation": agg, + "aggregation_frequency": agg_freq, + "status": "ongoing", + "update_frequency": "quarterly", + "bbox": "[-180.0, 180.0, -90.0, 90.0]", + "ocean_component": "NEMO v4.2.2", + "sea_ice_component": "SI3 v4.0", + "biogeochemistry_component": "None", + "atmospheric_component": "None", + "atmospheric_forcing": "ERA5 v1", + "variant": "r1i1c1f1", + } + + message = f"Updated {config_name} {agg_freq} {agg} -> root group attributes." + + update_icechunk_global_attrs( + credentials_filepath=credentials_filepath, + bucket=bucket, + prefix=prefix, + attrs=attrs, + commit_message=message, + ) + + logging.info(f"Completed: Updated global attributes for {config_name} {prefix}.") + + +if __name__ == "__main__": + # ========= Define Shared Inputs ========= # + # Define credential to write to JASMIN OS: + credentials_filepath = '.../credentials/jasmin_os_credentials.json' + + # Define NPD configuration propeties: + bucket = "npd-eorca12-era5v1" + config_name = "NPD eORCA12 ERA5v1" + nemo_config_name = "eORCA12" + agg = "mean" + platform = "gn" + + # -- eORCA1 --- # + # prefix_list = ["I1m", "I1y", + # "S1m", "S1y", + # "T1m", "T1y", + # "U1m", "U1y", + # "V1m", "V1y", + # "W1m", "W1y", + # ] + + # -- eORCA025 --- # + # prefix_list = ["I1m_3d", "I1y_3d", "I5d_3d", + # "S1m_1d", "S1y_1d", "S5d_1d", + # "T1m_3d", "T1m_4d", "T1y_3d", "T1y_4d", "T5d_3d", "T5d_4d", + # "U1m_3d", "U1m_4d", "U1y_3d", "U1y_4d", "U5d_3d", "U5d_4d", + # "V1m_3d", "V1m_4d", "V1y_3d", "V1y_4d", "V5d_3d", "V5d_4d", + # "W1m_4d", "W1y_4d", "W5d_4d" + # ] + + # -- eORCA12 --- # + prefix_list = ["I1m_3d", "I1y_3d", + "S1m_1d", "S1y_1d", + "T1m_3d", "T1m_4d", "T1y_3d", "T1y_4d", + "U1m_3d", "U1m_4d", "U1y_3d", "U1y_4d", + "V1m_3d", "V1m_4d", "V1y_3d", "V1y_4d", + "W1m_4d", "W1y_4d", + ] + + # ========= Run Main Function ========= # + main(credentials_filepath=credentials_filepath, + bucket=bucket, + config_name=config_name, + nemo_config_name=nemo_config_name, + platform=platform, + agg=agg, + prefix_list=prefix_list + )