diff --git a/doc/whats-new.rst b/doc/whats-new.rst index df28837dcfa..9b78d046148 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -58,6 +58,9 @@ Bug fixes indexed variable (:issue:`3252`). By `David Huard `_. + +- Fix use of multi-index with categorical values (:issue:`3674`). + By `Matthieu Ancellin `_. - Fix alignment with ``join="override"`` when some dimensions are unindexed. (:issue:`3681`). By `Deepak Cherian `_. - Fix :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims` producing diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 06bf08cefd2..dea1767d50c 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -22,6 +22,8 @@ def remove_unused_levels_categories(index): for i, level in enumerate(index.levels): if isinstance(level, pd.CategoricalIndex): level = level[index.codes[i]].remove_unused_categories() + else: + level = level[index.codes[i]] levels.append(level) index = pd.MultiIndex.from_arrays(levels, names=index.names) elif isinstance(index, pd.CategoricalIndex): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d2e8c6b7609..6a6c496591a 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1458,6 +1458,17 @@ def test_categorical_reindex(self): actual = ds.reindex(cat=["foo"])["cat"].values assert (actual == np.array(["foo"])).all() + def test_categorical_multiindex(self): + i1 = pd.Series([0, 0]) + cat = pd.CategoricalDtype(categories=["foo", "baz", "bar"]) + i2 = pd.Series(["baz", "bar"], dtype=cat) + + df = pd.DataFrame({"i1": i1, "i2": i2, "values": [1, 2]}).set_index( + ["i1", "i2"] + ) + actual = df.to_xarray() + assert actual["values"].shape == (1, 2) + def test_sel_drop(self): data = Dataset({"foo": ("x", [1, 2, 3])}, {"x": [0, 1, 2]}) expected = Dataset({"foo": 1})