Fix iotools tests for pandas 3 (#2730)

kandersolar · web-flow · commit 7198fe6fc054 · 2026-04-02T12:02:11.000-04:00
* fix iotools.crn tests

* fix iotools.midc tests

* fix iotools.psm4 tests

* fix iotools.sodapro tests

* fix iotools.tmy tests

* address pandas4 deprecation warnings

* fix one lint issue
diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py
@@ -322,7 +322,7 @@ def _parse_bsrn(fbuf, logical_records=('0100',)):
         LR_0100 = LR_0100.reindex(sorted(LR_0100.columns), axis='columns')
         LR_0100.columns = BSRN_LR0100_COLUMNS
         # Set datetime index
-        LR_0100.index = (start_date+pd.to_timedelta(LR_0100['day']-1, unit='d')
+        LR_0100.index = (start_date+pd.to_timedelta(LR_0100['day']-1, unit='D')
                          + pd.to_timedelta(LR_0100['minute'], unit='minutes'))
         # Drop empty, minute, and day columns
         LR_0100 = LR_0100.drop(columns=['empty', 'day', 'minute'])
@@ -336,7 +336,7 @@ def _parse_bsrn(fbuf, logical_records=('0100',)):
                               na_values=[-999.0, -99.9],
                               colspecs=BSRN_LR0300_COL_SPECS,
                               names=BSRN_LR0300_COLUMNS)
-        LR_0300.index = (start_date+pd.to_timedelta(LR_0300['day']-1, unit='d')
+        LR_0300.index = (start_date+pd.to_timedelta(LR_0300['day']-1, unit='D')
                          + pd.to_timedelta(LR_0300['minute'], unit='minutes'))
         LR_0300 = LR_0300.drop(columns=['day', 'minute']).astype(float)
         dfs.append(LR_0300)
@@ -353,13 +353,13 @@ def _parse_bsrn(fbuf, logical_records=('0100',)):
         # Sort columns to match original order and assign column names
         LR_0500 = LR_0500.reindex(sorted(LR_0500.columns), axis='columns')
         LR_0500.columns = BSRN_LR0500_COLUMNS
-        LR_0500.index = (start_date+pd.to_timedelta(LR_0500['day']-1, unit='d')
+        LR_0500.index = (start_date+pd.to_timedelta(LR_0500['day']-1, unit='D')
                          + pd.to_timedelta(LR_0500['minute'], unit='minutes'))
         LR_0500 = LR_0500.drop(columns=['empty', 'day', 'minute'])
         dfs.append(LR_0500)
 
     if len(dfs):
-        data = pd.concat(dfs, axis='columns')
+        data = pd.concat(dfs, axis='columns', sort=False)
     else:
         data = _empty_dataframe_from_logical_records(logical_records)
         metadata = {}
diff --git a/pvlib/iotools/solrad.py b/pvlib/iotools/solrad.py
@@ -195,7 +195,7 @@ def get_solrad(station, start, end,
     end = pd.to_datetime(end)
 
     # Generate list of filenames
-    dates = pd.date_range(start.floor('d'), end, freq='d')
+    dates = pd.date_range(start.floor('D'), end, freq='D')
     station = station.lower()
     filenames = [
         f"{station}/{d.year}/{station}{d.strftime('%y')}{d.dayofyear:03}.dat"
diff --git a/tests/iotools/test_crn.py b/tests/iotools/test_crn.py
@@ -33,9 +33,11 @@ def columns_unmapped():
 
 @pytest.fixture
 def dtypes():
+    # None indicates string, which is dtype("O") for pandas 2 and StringDtype
+    # for pandas 3
     return [
         dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
-        dtype('int64'), dtype('O'), dtype('float64'), dtype('float64'),
+        dtype('int64'), None, dtype('float64'), dtype('float64'),
         dtype('float64'), dtype('float64'), dtype('float64'),
         dtype('int64'), dtype('float64'), dtype('O'), dtype('int64'),
         dtype('float64'), dtype('int64'), dtype('float64'),
@@ -70,7 +72,10 @@ def test_read_crn(testfile, columns_mapped, dtypes):
          0.0, 393.0, 0, 4.8, 'C', 0, 81.0, 0, nan, nan, 1223, 0, 0.64, 0]])
     expected = pd.DataFrame(values, columns=columns_mapped, index=index)
     for (col, _dtype) in zip(expected.columns, dtypes):
-        expected[col] = expected[col].astype(_dtype)
+        # use inferred types for strings, to cover both pandas 2 and 3
+        if _dtype is not None:
+            expected[col] = expected[col].astype(_dtype)
+
     out = crn.read_crn(testfile)
     assert_frame_equal(out, expected)
 
@@ -94,6 +99,8 @@ def test_read_crn_problems(testfile_problems, columns_mapped, dtypes):
          1.64, 0]])
     expected = pd.DataFrame(values, columns=columns_mapped, index=index)
     for (col, _dtype) in zip(expected.columns, dtypes):
-        expected[col] = expected[col].astype(_dtype)
+        # use inferred types for strings, to cover both pandas 2 and 3
+        if _dtype is not None:
+            expected[col] = expected[col].astype(_dtype)
     out = crn.read_crn(testfile_problems)
     assert_frame_equal(out, expected)
diff --git a/tests/iotools/test_midc.py b/tests/iotools/test_midc.py
@@ -43,7 +43,7 @@ def test_midc__format_index_tz_conversion():
     data = pd.read_csv(MIDC_TESTFILE)
     data = data.rename(columns={'MST': 'PST'})
     data = midc._format_index(data)
-    assert data.index[0].tz == pytz.timezone('Etc/GMT+8')
+    assert str(data.index[0].tz) == 'Etc/GMT+8'
 
 
 def test_midc__format_index_raw():
diff --git a/tests/iotools/test_psm4.py b/tests/iotools/test_psm4.py
@@ -49,7 +49,7 @@ def assert_psm4_equal(data, metadata, expected):
     for mf in METADATA_FIELDS:
         assert mf in metadata
     # check timezone
-    assert (data.index.tzinfo.zone == 'Etc/GMT%+d' % -metadata['Time Zone'])
+    assert (str(data.index.tzinfo) == 'Etc/GMT%+d' % -metadata['Time Zone'])
 
 
 @pytest.mark.remote_data
diff --git a/tests/iotools/test_sodapro.py b/tests/iotools/test_sodapro.py
@@ -26,24 +26,28 @@
 
 
 dtypes_mcclear_verbose = [
-    'object', 'float64', 'float64', 'float64', 'float64', 'float64', 'float64',
+    # None indicates string, which differs between pandas 2 and 3
+    None, 'float64', 'float64', 'float64', 'float64', 'float64', 'float64',
     'float64', 'float64', 'float64', 'float64', 'float64', 'float64',
     'float64', 'float64', 'float64', 'float64', 'float64', 'int64', 'float64',
     'float64', 'float64', 'float64']
 
 dtypes_mcclear = [
-    'object', 'float64', 'float64', 'float64', 'float64', 'float64']
+    # None indicates string, which differs between pandas 2 and 3
+    None, 'float64', 'float64', 'float64', 'float64', 'float64']
 
 dtypes_radiation_verbose = [
-    'object', 'float64', 'float64', 'float64', 'float64', 'float64', 'float64',
+    # None indicates string, which differs between pandas 2 and 3
+    None, 'float64', 'float64', 'float64', 'float64', 'float64', 'float64',
     'float64', 'float64', 'float64', 'float64', 'float64', 'float64',
     'float64', 'float64', 'float64', 'float64', 'float64', 'float64',
     'float64', 'float64', 'float64', 'float64', 'int64', 'float64', 'float64',
     'float64', 'float64', 'float64', 'int64', 'int64', 'float64', 'float64',
     'float64', 'float64']
 
 dtypes_radiation = [
-    'object', 'float64', 'float64', 'float64', 'float64', 'float64', 'float64',
+    # None indicates string, which differs between pandas 2 and 3
+    None, 'float64', 'float64', 'float64', 'float64', 'float64', 'float64',
     'float64', 'float64', 'float64', 'float64']
 
 
@@ -154,7 +158,9 @@ def generate_expected_dataframe(values, columns, index, dtypes):
     expected = pd.DataFrame(values, columns=columns, index=index)
     expected.index.freq = None
     for (col, _dtype) in zip(expected.columns, dtypes):
-        expected[col] = expected[col].astype(_dtype)
+        if _dtype is not None:
+            # for None (string), use inferred type for pandas 2/3 compat
+            expected[col] = expected[col].astype(_dtype)
     return expected
 
 
diff --git a/tests/iotools/test_tmy.py b/tests/iotools/test_tmy.py
@@ -93,7 +93,7 @@ def test_gh865_read_tmy3_feb_leapyear_hr24():
     assert all(data.index[:-1].year == 1990)
     assert data.index[-1].year == 1991
     # let's do a quick sanity check, are the indices monotonically increasing?
-    assert all(np.diff(data.index.view(np.int64)) == 3600000000000)
+    assert all(np.diff(data.index) == pd.Timedelta(hours=1))
     # according to the TMY3 manual, each record corresponds to the previous
     # hour so check that the 1st hour is 1AM and the last hour is midnite
     assert data.index[0].hour == 1