Merge remote-tracking branch 'upstream/main' into vectorize-groupby-binary

dcherian · dcherian · commit f0e0f92fc4ac · 2022-03-21T10:20:00.000+05:30
* upstream/main: Fix concat with scalar coordinate (pydata#6385) isel: convert IndexVariable to Variable if index is dropped (pydata#6388) fix dataset groupby combine dataarray func (pydata#6386) fix concat with variable or dataarray as dim (pydata#6387) pydata#6367 Fix for time units checking could produce "unhashable type" error (pydata#6368)
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -48,6 +48,8 @@ Bug fixes
 - Many bugs fixed by the explicit indexes refactor, mainly related to multi-index (virtual)
   coordinates. See the corresponding pull-request on GitHub for more details. (:pull:`5692`).
   By `Benoît Bovy <https://github.com/benbovy>`_.
+- Fixed "unhashable type" error trying to read NetCDF file with variable having its 'units'
+  attribute not ``str`` (e.g. ``numpy.ndarray``) (:issue:`6368`). By `Oleh Khoma <https://github.com/okhoma>`_.
 
 Documentation
 ~~~~~~~~~~~~~
diff --git a/xarray/coding/times.py b/xarray/coding/times.py
@@ -695,7 +695,8 @@ def encode(self, variable, name=None):
     def decode(self, variable, name=None):
         dims, data, attrs, encoding = unpack_for_decoding(variable)
 
-        if "units" in attrs and attrs["units"] in TIME_UNITS:
+        units = attrs.get("units")
+        if isinstance(units, str) and units in TIME_UNITS:
             units = pop_to(attrs, encoding, "units")
             transform = partial(decode_cf_timedelta, units=units)
             dtype = np.dtype("timedelta64[ns]")
diff --git a/xarray/core/concat.py b/xarray/core/concat.py
@@ -429,6 +429,7 @@ def _dataset_concat(
     """
     Concatenate a sequence of datasets along a new or existing dimension
     """
+    from .dataarray import DataArray
     from .dataset import Dataset
 
     datasets = list(datasets)
@@ -438,6 +439,13 @@ def _dataset_concat(
             "The elements in the input list need to be either all 'Dataset's or all 'DataArray's"
         )
 
+    if isinstance(dim, DataArray):
+        dim_var = dim.variable
+    elif isinstance(dim, Variable):
+        dim_var = dim
+    else:
+        dim_var = None
+
     dim, index = _calc_concat_dim_index(dim)
 
     # Make sure we're working on a copy (we'll be loading variables)
@@ -524,7 +532,7 @@ def get_indexes(name):
             elif name == dim:
                 var = ds._variables[name]
                 if not var.dims:
-                    yield PandasIndex([var.values], dim)
+                    yield PandasIndex([var.values.item()], dim)
 
     # stack up each variable and/or index to fill-out the dataset (in order)
     # n.b. this loop preserves variable order, needed for groupby.
@@ -582,7 +590,11 @@ def get_indexes(name):
 
     if index is not None:
         # add concat index / coordinate last to ensure that its in the final Dataset
-        result[dim] = index.create_variables()[dim]
+        if dim_var is not None:
+            index_vars = index.create_variables({dim: dim_var})
+        else:
+            index_vars = index.create_variables()
+        result[dim] = index_vars[dim]
         result_indexes[dim] = index
 
     # TODO: add indexes at Dataset creation (when it is supported)
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -2262,6 +2262,8 @@ def _isel_fancy(
                     new_var = var.isel(indexers=var_indexers)
                 else:
                     new_var = var.copy(deep=False)
+                if name not in indexes:
+                    new_var = new_var.to_base_variable()
             variables[name] = new_var
 
         coord_names = self._coord_names & variables.keys()
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
@@ -1041,7 +1041,7 @@ def _combine(self, applied):
         if coord is not None and dim not in applied_example.dims:
             index, index_vars = create_default_index_implicit(coord)
             indexes = {k: index for k in index_vars}
-            combined = combined._overwrite_indexes(indexes, variables=index_vars)
+            combined = combined._overwrite_indexes(indexes, index_vars)
         combined = self._maybe_restore_empty_groups(combined)
         combined = self._maybe_unstack(combined)
         return combined
diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
@@ -440,6 +440,22 @@ def test_concat_promote_shape(self) -> None:
         expected = Dataset({"z": (("x", "y"), [[-1], [1]])}, {"x": [0, 1], "y": [0]})
         assert_identical(actual, expected)
 
+        # regression GH6384
+        objs = [
+            Dataset({}, {"x": pd.Interval(-1, 0, closed="right")}),
+            Dataset({"x": [pd.Interval(0, 1, closed="right")]}),
+        ]
+        actual = concat(objs, "x")
+        expected = Dataset(
+            {
+                "x": [
+                    pd.Interval(-1, 0, closed="right"),
+                    pd.Interval(0, 1, closed="right"),
+                ]
+            }
+        )
+        assert_identical(actual, expected)
+
     def test_concat_do_not_promote(self) -> None:
         # GH438
         objs = [
@@ -459,8 +475,15 @@ def test_concat_do_not_promote(self) -> None:
 
     def test_concat_dim_is_variable(self) -> None:
         objs = [Dataset({"x": 0}), Dataset({"x": 1})]
-        coord = Variable("y", [3, 4])
-        expected = Dataset({"x": ("y", [0, 1]), "y": [3, 4]})
+        coord = Variable("y", [3, 4], attrs={"foo": "bar"})
+        expected = Dataset({"x": ("y", [0, 1]), "y": coord})
+        actual = concat(objs, coord)
+        assert_identical(actual, expected)
+
+    def test_concat_dim_is_dataarray(self) -> None:
+        objs = [Dataset({"x": 0}), Dataset({"x": 1})]
+        coord = DataArray([3, 4], dims="y", attrs={"foo": "bar"})
+        expected = Dataset({"x": ("y", [0, 1]), "y": coord})
         actual = concat(objs, coord)
         assert_identical(actual, expected)
 
diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py
@@ -416,3 +416,10 @@ def test_encoding_kwarg(self) -> None:
     def test_encoding_kwarg_fixed_width_string(self) -> None:
         # CFEncodedInMemoryStore doesn't support explicit string encodings.
         pass
+
+
+class TestDecodeCFVariableWithArrayUnits:
+    def test_decode_cf_variable_with_array_units(self) -> None:
+        v = Variable(["t"], [1, 2, 3], {"units": np.array(["foobar"], dtype=object)})
+        v_decoded = conventions.decode_cf_variable("test2", v)
+        assert_identical(v, v_decoded)
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
@@ -1262,6 +1262,15 @@ def test_isel_dataarray(self):
         with pytest.raises(IndexError, match=r"dimension coordinate 'dim2'"):
             actual = data.isel(dim2=indexing_ds["dim2"])
 
+    def test_isel_fancy_convert_index_variable(self) -> None:
+        # select index variable "x" with a DataArray of dim "z"
+        # -> drop index and convert index variable to base variable
+        ds = xr.Dataset({"foo": ("x", [1, 2, 3])}, coords={"x": [0, 1, 2]})
+        idxr = xr.DataArray([1], dims="z", name="x")
+        actual = ds.isel(x=idxr)
+        assert "x" not in actual.xindexes
+        assert not isinstance(actual.x.variable, IndexVariable)
+
     def test_sel(self):
         data = create_test_data()
         int_slicers = {"dim1": slice(None, None, 2), "dim2": slice(2), "dim3": slice(3)}
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
@@ -945,6 +945,14 @@ def test_groupby_dataset_assign():
     assert_identical(actual, expected)
 
 
+def test_groupby_dataset_map_dataarray_func():
+    # regression GH6379
+    ds = xr.Dataset({"foo": ("x", [1, 2, 3, 4])}, coords={"x": [0, 0, 1, 1]})
+    actual = ds.groupby("x").map(lambda grp: grp.foo.mean())
+    expected = xr.DataArray([1.5, 3.5], coords={"x": [0, 1]}, dims="x", name="foo")
+    assert_identical(actual, expected)
+
+
 class TestDataArrayGroupBy:
     @pytest.fixture(autouse=True)
     def setup(self):