Skip to content

Commit f0e0f92

Browse files
committed
Merge remote-tracking branch 'upstream/main' into vectorize-groupby-binary
* upstream/main: Fix concat with scalar coordinate (pydata#6385) isel: convert IndexVariable to Variable if index is dropped (pydata#6388) fix dataset groupby combine dataarray func (pydata#6386) fix concat with variable or dataarray as dim (pydata#6387) pydata#6367 Fix for time units checking could produce "unhashable type" error (pydata#6368)
2 parents bae15d5 + 83f238a commit f0e0f92

File tree

9 files changed

+70
-6
lines changed

9 files changed

+70
-6
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ Bug fixes
4848
- Many bugs fixed by the explicit indexes refactor, mainly related to multi-index (virtual)
4949
coordinates. See the corresponding pull-request on GitHub for more details. (:pull:`5692`).
5050
By `Benoît Bovy <https://github.com/benbovy>`_.
51+
- Fixed "unhashable type" error trying to read NetCDF file with variable having its 'units'
52+
attribute not ``str`` (e.g. ``numpy.ndarray``) (:issue:`6368`). By `Oleh Khoma <https://github.com/okhoma>`_.
5153

5254
Documentation
5355
~~~~~~~~~~~~~

xarray/coding/times.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -695,7 +695,8 @@ def encode(self, variable, name=None):
695695
def decode(self, variable, name=None):
696696
dims, data, attrs, encoding = unpack_for_decoding(variable)
697697

698-
if "units" in attrs and attrs["units"] in TIME_UNITS:
698+
units = attrs.get("units")
699+
if isinstance(units, str) and units in TIME_UNITS:
699700
units = pop_to(attrs, encoding, "units")
700701
transform = partial(decode_cf_timedelta, units=units)
701702
dtype = np.dtype("timedelta64[ns]")

xarray/core/concat.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,7 @@ def _dataset_concat(
429429
"""
430430
Concatenate a sequence of datasets along a new or existing dimension
431431
"""
432+
from .dataarray import DataArray
432433
from .dataset import Dataset
433434

434435
datasets = list(datasets)
@@ -438,6 +439,13 @@ def _dataset_concat(
438439
"The elements in the input list need to be either all 'Dataset's or all 'DataArray's"
439440
)
440441

442+
if isinstance(dim, DataArray):
443+
dim_var = dim.variable
444+
elif isinstance(dim, Variable):
445+
dim_var = dim
446+
else:
447+
dim_var = None
448+
441449
dim, index = _calc_concat_dim_index(dim)
442450

443451
# Make sure we're working on a copy (we'll be loading variables)
@@ -524,7 +532,7 @@ def get_indexes(name):
524532
elif name == dim:
525533
var = ds._variables[name]
526534
if not var.dims:
527-
yield PandasIndex([var.values], dim)
535+
yield PandasIndex([var.values.item()], dim)
528536

529537
# stack up each variable and/or index to fill-out the dataset (in order)
530538
# n.b. this loop preserves variable order, needed for groupby.
@@ -582,7 +590,11 @@ def get_indexes(name):
582590

583591
if index is not None:
584592
# add concat index / coordinate last to ensure that its in the final Dataset
585-
result[dim] = index.create_variables()[dim]
593+
if dim_var is not None:
594+
index_vars = index.create_variables({dim: dim_var})
595+
else:
596+
index_vars = index.create_variables()
597+
result[dim] = index_vars[dim]
586598
result_indexes[dim] = index
587599

588600
# TODO: add indexes at Dataset creation (when it is supported)

xarray/core/dataset.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2262,6 +2262,8 @@ def _isel_fancy(
22622262
new_var = var.isel(indexers=var_indexers)
22632263
else:
22642264
new_var = var.copy(deep=False)
2265+
if name not in indexes:
2266+
new_var = new_var.to_base_variable()
22652267
variables[name] = new_var
22662268

22672269
coord_names = self._coord_names & variables.keys()

xarray/core/groupby.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1041,7 +1041,7 @@ def _combine(self, applied):
10411041
if coord is not None and dim not in applied_example.dims:
10421042
index, index_vars = create_default_index_implicit(coord)
10431043
indexes = {k: index for k in index_vars}
1044-
combined = combined._overwrite_indexes(indexes, variables=index_vars)
1044+
combined = combined._overwrite_indexes(indexes, index_vars)
10451045
combined = self._maybe_restore_empty_groups(combined)
10461046
combined = self._maybe_unstack(combined)
10471047
return combined

xarray/tests/test_concat.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,22 @@ def test_concat_promote_shape(self) -> None:
440440
expected = Dataset({"z": (("x", "y"), [[-1], [1]])}, {"x": [0, 1], "y": [0]})
441441
assert_identical(actual, expected)
442442

443+
# regression GH6384
444+
objs = [
445+
Dataset({}, {"x": pd.Interval(-1, 0, closed="right")}),
446+
Dataset({"x": [pd.Interval(0, 1, closed="right")]}),
447+
]
448+
actual = concat(objs, "x")
449+
expected = Dataset(
450+
{
451+
"x": [
452+
pd.Interval(-1, 0, closed="right"),
453+
pd.Interval(0, 1, closed="right"),
454+
]
455+
}
456+
)
457+
assert_identical(actual, expected)
458+
443459
def test_concat_do_not_promote(self) -> None:
444460
# GH438
445461
objs = [
@@ -459,8 +475,15 @@ def test_concat_do_not_promote(self) -> None:
459475

460476
def test_concat_dim_is_variable(self) -> None:
461477
objs = [Dataset({"x": 0}), Dataset({"x": 1})]
462-
coord = Variable("y", [3, 4])
463-
expected = Dataset({"x": ("y", [0, 1]), "y": [3, 4]})
478+
coord = Variable("y", [3, 4], attrs={"foo": "bar"})
479+
expected = Dataset({"x": ("y", [0, 1]), "y": coord})
480+
actual = concat(objs, coord)
481+
assert_identical(actual, expected)
482+
483+
def test_concat_dim_is_dataarray(self) -> None:
484+
objs = [Dataset({"x": 0}), Dataset({"x": 1})]
485+
coord = DataArray([3, 4], dims="y", attrs={"foo": "bar"})
486+
expected = Dataset({"x": ("y", [0, 1]), "y": coord})
464487
actual = concat(objs, coord)
465488
assert_identical(actual, expected)
466489

xarray/tests/test_conventions.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,3 +416,10 @@ def test_encoding_kwarg(self) -> None:
416416
def test_encoding_kwarg_fixed_width_string(self) -> None:
417417
# CFEncodedInMemoryStore doesn't support explicit string encodings.
418418
pass
419+
420+
421+
class TestDecodeCFVariableWithArrayUnits:
422+
def test_decode_cf_variable_with_array_units(self) -> None:
423+
v = Variable(["t"], [1, 2, 3], {"units": np.array(["foobar"], dtype=object)})
424+
v_decoded = conventions.decode_cf_variable("test2", v)
425+
assert_identical(v, v_decoded)

xarray/tests/test_dataset.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1262,6 +1262,15 @@ def test_isel_dataarray(self):
12621262
with pytest.raises(IndexError, match=r"dimension coordinate 'dim2'"):
12631263
actual = data.isel(dim2=indexing_ds["dim2"])
12641264

1265+
def test_isel_fancy_convert_index_variable(self) -> None:
1266+
# select index variable "x" with a DataArray of dim "z"
1267+
# -> drop index and convert index variable to base variable
1268+
ds = xr.Dataset({"foo": ("x", [1, 2, 3])}, coords={"x": [0, 1, 2]})
1269+
idxr = xr.DataArray([1], dims="z", name="x")
1270+
actual = ds.isel(x=idxr)
1271+
assert "x" not in actual.xindexes
1272+
assert not isinstance(actual.x.variable, IndexVariable)
1273+
12651274
def test_sel(self):
12661275
data = create_test_data()
12671276
int_slicers = {"dim1": slice(None, None, 2), "dim2": slice(2), "dim3": slice(3)}

xarray/tests/test_groupby.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -945,6 +945,14 @@ def test_groupby_dataset_assign():
945945
assert_identical(actual, expected)
946946

947947

948+
def test_groupby_dataset_map_dataarray_func():
949+
# regression GH6379
950+
ds = xr.Dataset({"foo": ("x", [1, 2, 3, 4])}, coords={"x": [0, 0, 1, 1]})
951+
actual = ds.groupby("x").map(lambda grp: grp.foo.mean())
952+
expected = xr.DataArray([1.5, 3.5], coords={"x": [0, 1]}, dims="x", name="foo")
953+
assert_identical(actual, expected)
954+
955+
948956
class TestDataArrayGroupBy:
949957
@pytest.fixture(autouse=True)
950958
def setup(self):

0 commit comments

Comments
 (0)