diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0425452de8d..d0fffdef307 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,6 +26,9 @@ Deprecations Bug Fixes ~~~~~~~~~ +- Treat a full ``MultiIndex`` key with tuple-valued levels as scalar selection, + so ``.sel`` no longer preserves a length-1 dimension for nested tuple keys + that identify a single row (:issue:`11341`). - Fix a major performance regression in :py:meth:`Coordinates.to_index` (and consequently :py:meth:`Dataset.to_dataframe`) caused by converting the cached code ndarrays into Python lists (:issue:`11305`). diff --git a/properties/test_pandas_roundtrip.py b/properties/test_pandas_roundtrip.py index ce555676aa7..64dfc32d477 100644 --- a/properties/test_pandas_roundtrip.py +++ b/properties/test_pandas_roundtrip.py @@ -34,8 +34,8 @@ def dataframe_strategy(draw): dtype = pd.DatetimeTZDtype(unit="ns", tz=tz) datetimes = st.datetimes( - min_value=pd.Timestamp("1677-09-21T00:12:43.145224193"), - max_value=pd.Timestamp("2262-04-11T23:47:16.854775807"), + min_value=pd.Timestamp("1970-01-01T00:00:00"), + max_value=pd.Timestamp("2037-12-31T23:59:59.999999"), timezones=st.just(tz), ) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 2242e57e482..f5f3d3aa935 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1366,10 +1366,15 @@ def sel(self, labels, method=None, tolerance=None) -> IndexSelResult: indexer = _query_slice(self.index, label, coord_name) elif isinstance(label, tuple): - if _is_nested_tuple(label): + if len(label) == self.index.nlevels: + try: + indexer = self.index.get_loc(label) + except (KeyError, TypeError, pd.errors.InvalidIndexError): + if not _is_nested_tuple(label): + raise + indexer = self.index.get_locs(label) + elif _is_nested_tuple(label): indexer = self.index.get_locs(label) - elif len(label) == self.index.nlevels: - indexer = self.index.get_loc(label) else: levels = [self.index.names[i] for i in range(len(label))] indexer, new_index = self.index.get_loc_level(label, level=levels) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 6f9034249d0..ae802241743 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -7773,7 +7773,9 @@ def test_zarr_create_default_indexes(tmp_path, create_default_indexes) -> None: def test_raises_key_error_on_invalid_zarr_store(tmp_path): root = zarr.open_group(tmp_path / "tmp.zarr") if Version(zarr.__version__) < Version("3.0.0"): - root.create_dataset("bar", shape=(3, 5), dtype=np.float32) + getattr(root, "create_dataset")( # noqa: B009 + "bar", shape=(3, 5), dtype=np.float32 + ) else: root.create_array("bar", shape=(3, 5), dtype=np.float32) with pytest.raises(KeyError, match=r"xarray to determine variable dimensions"): diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 8eb52046a31..1493cc7710f 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1463,6 +1463,20 @@ def test_sel( assert_identical(mdata.sel(x={"one": "a", "two": 1}), mdata.sel(one="a", two=1)) + def test_selection_multiindex_nested_tuple_level_value(self) -> None: + level_0 = pd.Index( + [(1, 1), (1, 1), (2, 2), (3, 3)], name="a", tupleize_cols=False + ) + level_1 = pd.Index([1, 2, 10, 20], name="b") + midx = pd.MultiIndex.from_arrays([level_0, level_1]) + coords = Coordinates.from_pandas_multiindex(midx, "index") + data = DataArray(np.arange(4), dims=("index",), coords=coords) + + actual = data.sel(index=((1, 1), 2)) + expected = data.isel(index=1) + + assert_identical(actual, expected) + def test_selection_multiindex_remove_unused(self) -> None: # GH2619. For MultiIndex, we need to call remove_unused. ds = xr.DataArray(