Skip to content

Fix recombination in groupby when changing size along the grouped dimension #3807

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,10 @@ Bug fixes
- Fix :py:meth:`Dataset.interp` when indexing array shares coordinates with the
indexed variable (:issue:`3252`).
By `David Huard <https://github.com/huard>`_.


- Fix recombination of groups in :py:meth:`Dataset.groupby` and
:py:meth:`DataArray.groupby` when performing an operation that changes the
size of the groups along the grouped dimension. By `Eric Jansen
<https://github.com/ej81>`_.
- Fix use of multi-index with categorical values (:issue:`3674`).
By `Matthieu Ancellin <https://github.com/mancellin>`_.
- Fix alignment with ``join="override"`` when some dimensions are unindexed. (:issue:`3681`).
Expand Down
8 changes: 5 additions & 3 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,7 +720,7 @@ def assign_coords(self, coords=None, **coords_kwargs):
def _maybe_reorder(xarray_obj, dim, positions):
order = _inverse_permutation_indices(positions)

if order is None:
if order is None or len(order) != xarray_obj.sizes[dim]:
return xarray_obj
else:
return xarray_obj[{dim: order}]
Expand Down Expand Up @@ -838,7 +838,8 @@ def _combine(self, applied, restore_coord_dims=False, shortcut=False):
if isinstance(combined, type(self._obj)):
# only restore dimension order for arrays
combined = self._restore_dim_order(combined)
if coord is not None:
# assign coord when the applied function does not return that coord
if coord is not None and dim not in applied_example.dims:
if shortcut:
coord_var = as_variable(coord)
combined._coords[coord.name] = coord_var
Expand Down Expand Up @@ -954,7 +955,8 @@ def _combine(self, applied):
coord, dim, positions = self._infer_concat_args(applied_example)
combined = concat(applied, dim)
combined = _maybe_reorder(combined, dim, positions)
if coord is not None:
# assign coord when the applied function does not return that coord
if coord is not None and dim not in applied_example.dims:
combined[coord.name] = coord
combined = self._maybe_restore_empty_groups(combined)
combined = self._maybe_unstack(combined)
Expand Down
33 changes: 33 additions & 0 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,39 @@ def test_groupby_input_mutation():
assert_identical(array, array_copy) # should not modify inputs


@pytest.mark.parametrize(
"obj",
[
xr.DataArray([1, 2, 3, 4, 5, 6], [("x", [1, 1, 1, 2, 2, 2])]),
xr.Dataset({"foo": ("x", [1, 2, 3, 4, 5, 6])}, {"x": [1, 1, 1, 2, 2, 2]}),
],
)
def test_groupby_map_shrink_groups(obj):
expected = obj.isel(x=[0, 1, 3, 4])
actual = obj.groupby("x").map(lambda f: f.isel(x=[0, 1]))
assert_identical(expected, actual)


@pytest.mark.parametrize(
"obj",
[
xr.DataArray([1, 2, 3], [("x", [1, 2, 2])]),
xr.Dataset({"foo": ("x", [1, 2, 3])}, {"x": [1, 2, 2]}),
],
)
def test_groupby_map_change_group_size(obj):
def func(group):
if group.sizes["x"] == 1:
result = group.isel(x=[0, 0])
else:
result = group.isel(x=[0])
return result

expected = obj.isel(x=[0, 0, 1])
actual = obj.groupby("x").map(func)
assert_identical(expected, actual)


def test_da_groupby_map_func_args():
def func(arg1, arg2, arg3=0):
return arg1 + arg2 + arg3
Expand Down