From a8a8f3e67bc290ab5c6b2a5fb817c6a89f0fc22c Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 29 Jan 2026 19:04:35 +0000 Subject: [PATCH 1/2] Fix xarray compatibility issues and improve empty dataset handling - Fix datetime extraction from xarray DataArray using .item() method (xarray datetime accessor now returns DataArrays, not scalars) - Replace deprecated Dataset.dims[] with Dataset.sizes[] for dimension lengths to avoid FutureWarning in xarray >= 2024.10 - Add missing datetime import in mimic_lma.py - Handle empty datasets gracefully in get_lutevents() by returning early with an empty lutevent_id dimension - Use assert_allclose instead of assert_equal for floating point comparisons in test_event_lut.py to account for precision limits https://claude.ai/code/session_01J4PoNVYX96Tdw2uaDGgptc --- glmtools/io/glm.py | 20 +++++++++++++++----- glmtools/io/mimic_lma.py | 6 ++++-- glmtools/io/traversal.py | 4 ++-- glmtools/test/test_event_lut.py | 10 ++++++++-- glmtools/test/test_flash_grouping.py | 2 +- 5 files changed, 30 insertions(+), 12 deletions(-) diff --git a/glmtools/io/glm.py b/glmtools/io/glm.py index ac65fe6..bad6701 100644 --- a/glmtools/io/glm.py +++ b/glmtools/io/glm.py @@ -255,9 +255,9 @@ def __init_parent_child_data(self): """ Calculate implied parameters that are useful for analyses of GLM data. """ - if ((self.dataset.dims['number_of_flashes'] == 0) | - (self.dataset.dims['number_of_groups'] == 0) | - (self.dataset.dims['number_of_events'] == 0)): + if ((self.dataset.sizes['number_of_flashes'] == 0) | + (self.dataset.sizes['number_of_groups'] == 0) | + (self.dataset.sizes['number_of_events'] == 0)): no_data = True log.warning('File {0} has no data, skipping it'.format( self._filename)) @@ -552,8 +552,8 @@ def __init_fixed_grid_data(self): if ellipse_rev < 0: log.info("Inferring lightning ellipsoid from GLM product time") pt = self.dataset.product_time.dt - date = datetime(pt.year, pt.month, pt.day, - pt.hour, pt.minute, pt.second) + date = datetime(pt.year.item(), pt.month.item(), pt.day.item(), + pt.hour.item(), pt.minute.item(), pt.second.item()) ellipse_rev = ltg_ellpse_rev(date) log.info("Using lightning ellipsoid rev {0}".format(ellipse_rev)) @@ -622,6 +622,16 @@ def get_lutevents(dataset, scale_factor=28e-6, event_dim='number_of_events', # xarray copys are shallow/cheap, and the xarray docs promote returning new # datasets http://xarray.pydata.org/en/stable/combining.html dataset = dataset.copy() + + # Handle empty datasets - groupby fails on empty DataArrays in xarray > 0.13 + if dataset.sizes.get(event_dim, 0) == 0: + log.debug("Empty dataset, skipping lut event calculation") + # Add empty lutevent dimension to maintain consistent interface + dataset['lutevent_id'] = xr.DataArray(np.array([], dtype='u8'), + dims=['lutevent_id']) + dataset = dataset.set_coords('lutevent_id') + return dataset + event_x, event_y = dataset.event_x.data, dataset.event_y.data event_energy = dataset.event_energy.data product_time = dataset.product_time.data diff --git a/glmtools/io/mimic_lma.py b/glmtools/io/mimic_lma.py index f4bd3d5..1748818 100644 --- a/glmtools/io/mimic_lma.py +++ b/glmtools/io/mimic_lma.py @@ -1,3 +1,5 @@ +from datetime import datetime + import numpy as np import xarray as xr @@ -392,8 +394,8 @@ def read_flash_chunk(flash_data, glm=None, target=None, base_date=None, nadir_lo if fixed_grid: pt = flash_data.product_time.dt - date = datetime(pt.year, pt.month, pt.day, - pt.hour, pt.minute, pt.second) + date = datetime(pt.year.item(), pt.month.item(), pt.day.item(), + pt.hour.item(), pt.minute.item(), pt.second.item()) x_lut, y_lut, corner_lut = load_pixel_corner_lookup(corner_pickle) # Convert from microradians to radians diff --git a/glmtools/io/traversal.py b/glmtools/io/traversal.py index 6c0e0e3..180115a 100644 --- a/glmtools/io/traversal.py +++ b/glmtools/io/traversal.py @@ -72,7 +72,7 @@ def __init__(self, dataset, entity_id_vars, parent_id_vars): self.child_to_parent = collections.OrderedDict() self.parent_to_child = collections.OrderedDict() for (entity_var, parent_var) in self._descend(): - if dataset.dims[dataset[entity_var].dims[0]] == 0: + if dataset.sizes[dataset[entity_var].dims[0]] == 0: # No data, so groupby will fail in xarray > 0.13 entity_grouper = None else: @@ -81,7 +81,7 @@ def __init__(self, dataset, entity_id_vars, parent_id_vars): if parent_var is None: parent_grouper = None else: - if dataset.dims[dataset[parent_var].dims[0]] == 0: + if dataset.sizes[dataset[parent_var].dims[0]] == 0: # No data, so groupby will fail in xarray > 0.13 parent_grouper = None else: diff --git a/glmtools/test/test_event_lut.py b/glmtools/test/test_event_lut.py index 2179472..bfc9890 100644 --- a/glmtools/test/test_event_lut.py +++ b/glmtools/test/test_event_lut.py @@ -1,5 +1,5 @@ import numpy as np -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_allclose from glmtools.io.glm import get_lutevents @@ -32,6 +32,11 @@ def check_flash_dataset(fls): # Direcly sum all event energy and get event count fls = get_lutevents(fls) print(fls) + + # Handle empty datasets - no events to check + if fls.sizes.get('number_of_events', 0) == 0: + return fls + total_energy = fls.event_energy.data.sum() total_count = fls.event_id.shape[0] @@ -50,7 +55,8 @@ def check_flash_dataset(fls): total_energy_lut += lut_row.lutevent_energy.data total_count_lut += lut_row.lutevent_count.data - assert_equal(total_energy, total_energy_lut) + # Use assert_allclose for floating point comparison due to precision limits + assert_allclose(total_energy, total_energy_lut, rtol=1e-5) assert_equal(total_count, total_count_lut) return fls diff --git a/glmtools/test/test_flash_grouping.py b/glmtools/test/test_flash_grouping.py index 79c9ffc..1269c5b 100644 --- a/glmtools/test/test_flash_grouping.py +++ b/glmtools/test/test_flash_grouping.py @@ -28,7 +28,7 @@ def test_flash_ids_for_events(): flash_ids_for_events = glm.dataset['event_parent_flash_id'].data - n_events = glm.dataset.dims['number_of_events'] + n_events = glm.dataset.sizes['number_of_events'] assert_equal(flash_ids_for_events.shape[0], n_events) unq_fl_ids = np.unique(flash_ids_for_events) From 57a30bfcb1c63ff97ae4bd0e05d3c2298650633e Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 29 Jan 2026 23:43:35 +0000 Subject: [PATCH 2/2] Add test_data to gitignore Ignore downloaded GLM test data files https://claude.ai/code/session_01J4PoNVYX96Tdw2uaDGgptc --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 3697d61..e90f92f 100644 --- a/.gitignore +++ b/.gitignore @@ -90,3 +90,4 @@ ENV/ # Mac filesystem detritus .DS_store +test_data/