From 4b78b6807bc663ed9007fd91f57d7d2c6cfb588b Mon Sep 17 00:00:00 2001 From: Gregory Gundersen Date: Sun, 14 Jul 2019 18:21:31 +0100 Subject: [PATCH 1/8] Support for keyword argument-based dropping. --- xarray/core/dataset.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 94d6ec7651e..8bbea0dae81 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3135,7 +3135,7 @@ def _assert_all_in_dataset(self, names, virtual_okay=False): raise ValueError('One or more of the specified variables ' 'cannot be found in this dataset') - def drop(self, labels, dim=None, *, errors='raise'): + def drop(self, labels=None, dim=None, *, errors='raise', **dim_kwargs): """Drop variables or index labels from this dataset. Parameters @@ -3155,6 +3155,17 @@ def drop(self, labels, dim=None, *, errors='raise'): ------- dropped : Dataset """ + if len(dim_kwargs) > 0: + if labels is not None or dim is not None: + raise ValueError('cannot specify both `dim` and `labels` along' + 'with keyword arguments') + if len(dim_kwargs) > 1: + raise ValueError('cannot specify multiple dimensions') + dim = list(dim_kwargs.keys())[0] + labels = list(dim_kwargs.values()) + if utils.is_list_like(labels[0]): + labels = labels[0] + if errors not in ['raise', 'ignore']: raise ValueError('errors must be either "raise" or "ignore"') if utils.is_scalar(labels): From 553e548a4fc319bd1f47b62ca3a095addfbe40fe Mon Sep 17 00:00:00 2001 From: Gregory Gundersen Date: Fri, 16 Aug 2019 07:51:36 +0100 Subject: [PATCH 2/8] Cherry picked changes. --- xarray/core/dataset.py | 92 +++++++++++++++++++++--------------- xarray/core/utils.py | 4 ++ xarray/tests/test_dataset.py | 25 ++++++++++ 3 files changed, 84 insertions(+), 37 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 47f076ce80d..337a2ee2807 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3444,12 +3444,6 @@ def _assert_all_in_dataset( if virtual_okay: bad_names -= self.virtual_variables if bad_names: -<<<<<<< HEAD - raise ValueError('One or more of the specified variables ' - 'cannot be found in this dataset') - - def drop(self, labels=None, dim=None, *, errors='raise', **dim_kwargs): -======= raise ValueError( "One or more of the specified variables " "cannot be found in this dataset" @@ -3469,8 +3463,7 @@ def drop( ) -> "Dataset": ... - def drop(self, labels, dim=None, *, errors="raise"): # noqa: F811 ->>>>>>> master + def drop(self, labels=None, dim=None, *, errors="raise", **labels_kwargs): """Drop variables or index labels from this dataset. Parameters @@ -3486,49 +3479,74 @@ def drop(self, labels, dim=None, *, errors="raise"): # noqa: F811 any of the variable or index labels passed are not in the dataset. If 'ignore', any given labels that are in the dataset are dropped and no error is raised. + **labels_kwargs : {dim: label, ...}, optional + The keyword arguments form of ``dim`` and ``labels``. Returns ------- dropped : Dataset + + Examples + -------- + >>> data = np.random.randn(2, 3) + >>> labels = ['a', 'b', 'c'] + >>> ds = xr.Dataset({'A': (['x', 'y'], data), 'y': labels}) + >>> ds.drop(y=['a', 'c']) + + Dimensions: (x: 2, y: 1) + Coordinates: + * y (y) >> ds.drop(y='b') + + Dimensions: (x: 2, y: 2) + Coordinates: + * y (y) 0: - if labels is not None or dim is not None: - raise ValueError('cannot specify both `dim` and `labels` along' - 'with keyword arguments') - if len(dim_kwargs) > 1: - raise ValueError('cannot specify multiple dimensions') - dim = list(dim_kwargs.keys())[0] - labels = list(dim_kwargs.values()) - if utils.is_list_like(labels[0]): - labels = labels[0] - - if errors not in ['raise', 'ignore']: -======= if errors not in ["raise", "ignore"]: ->>>>>>> master raise ValueError('errors must be either "raise" or "ignore"') - if dim is None: + if labels_kwargs or utils.is_dict_like(labels): + labels_kwargs = utils.either_dict_or_kwargs(labels, labels_kwargs, "drop") + if dim is not None: + raise ValueError("cannot specify dim amd dict-like " "arguments.") + ds = self + for dim, labels in labels_kwargs.items(): + ds = ds._drop_labels(labels, dim, errors=errors) + return ds + elif dim is None: if isinstance(labels, str) or not isinstance(labels, Iterable): labels = {labels} else: labels = set(labels) - return self._drop_vars(labels, errors=errors) else: - # Don't cast to set, as it would harm performance when labels - # is a large numpy array - if utils.is_scalar(labels): - labels = [labels] - labels = np.asarray(labels) - - try: - index = self.indexes[dim] - except KeyError: - raise ValueError("dimension %r does not have coordinate labels" % dim) - new_index = index.drop(labels, errors=errors) - return self.loc[{dim: new_index}] + if utils.is_list_like(labels): + warnings.warn( + "dropping dimensions using list-like labels is deprecated; " + "use dict-like arguments.", + DeprecationWarning, + stacklevel=2, + ) + return self._drop_labels(labels, dim, errors=errors) + + def _drop_labels(self, labels=None, dim=None, errors="raise"): + # Don't cast to set, as it would harm performance when labels + # is a large numpy array + if utils.is_scalar(labels): + labels = [labels] + labels = np.asarray(labels) + try: + index = self.indexes[dim] + except KeyError: + raise ValueError("dimension %r does not have coordinate labels" % dim) + new_index = index.drop(labels, errors=errors) + return self.loc[{dim: new_index}] def _drop_vars(self, names: set, errors: str = "raise") -> "Dataset": if errors == "raise": diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 66a7adfc95e..ba478686d61 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -250,6 +250,10 @@ def is_full_slice(value: Any) -> bool: return isinstance(value, slice) and value == slice(None) +def is_list_like(value: Any) -> bool: + return isinstance(value, list) or isinstance(value, tuple) + + def either_dict_or_kwargs( pos_kwargs: Optional[Mapping[Hashable, T]], kw_kwargs: Mapping[str, T], diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 75325a77b36..55c44f1048d 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2161,6 +2161,31 @@ def test_drop_index_labels(self): with raises_regex(ValueError, "does not have coordinate labels"): data.drop(1, "y") + def test_drop_labels_by_keyword(self): + # Tests for #2910: Support for a additional `drop()` API. + data = Dataset({"A": (["x", "y"], np.random.randn(2, 3)), "x": ["a", "b"]}) + # Basic functionality. + assert len(data.coords["x"]) == 2 + + with pytest.warns(DeprecationWarning): + ds1 = data.drop(["a"], dim="x") + + ds2 = data.drop(x="a") + ds3 = data.drop(x=["a"]) + ds4 = data.drop(x=["a", "b"]) + assert len(ds1.coords["x"]) == 1 + assert len(ds2.coords["x"]) == 1 + assert len(ds3.coords["x"]) == 1 + assert len(ds4.coords["x"]) == 0 + + # Error handling if user tries both approaches. + with pytest.raises(ValueError): + data.drop(labels=["a"], x="a") + with pytest.raises(ValueError): + data.drop(dim="x", x="a") + with pytest.raises(ValueError): + data.drop(labels=["a"], dim="x", x="a") + def test_drop_dims(self): data = xr.Dataset( { From 5aa8dc1b248f0fa48e9cb7d77c87384c2898ead0 Mon Sep 17 00:00:00 2001 From: Gregory Gundersen Date: Sat, 17 Aug 2019 10:25:51 +0100 Subject: [PATCH 3/8] Moved noqa to correct location; added check for when labels are coordinates. --- xarray/core/dataset.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 337a2ee2807..bce7b0eee0a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -54,6 +54,7 @@ ) from .coordinates import ( DatasetCoordinates, + DataArrayCoordinates, LevelCoordinatesSource, assert_coordinate_consistent, remap_label_indexers, @@ -3450,7 +3451,7 @@ def _assert_all_in_dataset( ) # Drop variables - @overload + @overload # noqa: F811 def drop( self, labels: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" ) -> "Dataset": @@ -3463,7 +3464,9 @@ def drop( ) -> "Dataset": ... - def drop(self, labels=None, dim=None, *, errors="raise", **labels_kwargs): + def drop( # noqa: F811 + self, labels=None, dim=None, *, errors="raise", **labels_kwargs + ): """Drop variables or index labels from this dataset. Parameters @@ -3511,7 +3514,8 @@ def drop(self, labels=None, dim=None, *, errors="raise", **labels_kwargs): if errors not in ["raise", "ignore"]: raise ValueError('errors must be either "raise" or "ignore"') - if labels_kwargs or utils.is_dict_like(labels): + labels_are_coords = isinstance(labels, DataArrayCoordinates) + if labels_kwargs or (utils.is_dict_like(labels) and not labels_are_coords): labels_kwargs = utils.either_dict_or_kwargs(labels, labels_kwargs, "drop") if dim is not None: raise ValueError("cannot specify dim amd dict-like " "arguments.") From 49f7d8358b02b1dd6c0c406d786dcb58b925d57e Mon Sep 17 00:00:00 2001 From: Gregory Gundersen Date: Sat, 17 Aug 2019 21:54:56 +0100 Subject: [PATCH 4/8] Added okwarning to drop docs. --- doc/indexing.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/indexing.rst b/doc/indexing.rst index 9af9db227bc..4c5b93db0b4 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -236,6 +236,7 @@ The :py:meth:`~xarray.Dataset.drop` method returns a new object with the listed index labels along a dimension dropped: .. ipython:: python + :okwarning: ds.drop(['IN', 'IL'], dim='space') From e3f051cbfe9fad086765ce1d95abde80eb41fe6d Mon Sep 17 00:00:00 2001 From: Gregory Gundersen Date: Sat, 17 Aug 2019 22:38:10 +0100 Subject: [PATCH 5/8] Made unit tests more explicit. Added test for dropping along multiple dimensions. --- xarray/tests/test_dataset.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 55c44f1048d..f4cdebf7c10 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2163,20 +2163,28 @@ def test_drop_index_labels(self): def test_drop_labels_by_keyword(self): # Tests for #2910: Support for a additional `drop()` API. - data = Dataset({"A": (["x", "y"], np.random.randn(2, 3)), "x": ["a", "b"]}) + data = Dataset({ + "A": (["x", "y"], np.random.randn(2, 6)), + "x": ["a", "b"], + "y": range(6) + }) # Basic functionality. assert len(data.coords["x"]) == 2 + # This API is allowed but deprecated. with pytest.warns(DeprecationWarning): ds1 = data.drop(["a"], dim="x") - ds2 = data.drop(x="a") ds3 = data.drop(x=["a"]) ds4 = data.drop(x=["a", "b"]) - assert len(ds1.coords["x"]) == 1 - assert len(ds2.coords["x"]) == 1 - assert len(ds3.coords["x"]) == 1 - assert len(ds4.coords["x"]) == 0 + ds5 = data.drop(x=["a", "b"], y=range(0, 6, 2)) + + assert_array_equal(ds1.coords["x"], ["b"]) + assert_array_equal(ds2.coords["x"], ["b"]) + assert_array_equal(ds3.coords["x"], ["b"]) + assert(ds4.coords["x"].size == 0) + assert(ds5.coords["x"].size == 0) + assert_array_equal(ds5.coords["y"], [1, 3, 5]) # Error handling if user tries both approaches. with pytest.raises(ValueError): From c150197b089b1824c9e06d619299885ba7957c53 Mon Sep 17 00:00:00 2001 From: Gregory Gundersen Date: Sat, 17 Aug 2019 22:43:18 +0100 Subject: [PATCH 6/8] Used black. --- xarray/tests/test_dataset.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index f4cdebf7c10..fb671ab1fa3 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2163,11 +2163,9 @@ def test_drop_index_labels(self): def test_drop_labels_by_keyword(self): # Tests for #2910: Support for a additional `drop()` API. - data = Dataset({ - "A": (["x", "y"], np.random.randn(2, 6)), - "x": ["a", "b"], - "y": range(6) - }) + data = Dataset( + {"A": (["x", "y"], np.random.randn(2, 6)), "x": ["a", "b"], "y": range(6)} + ) # Basic functionality. assert len(data.coords["x"]) == 2 @@ -2182,8 +2180,8 @@ def test_drop_labels_by_keyword(self): assert_array_equal(ds1.coords["x"], ["b"]) assert_array_equal(ds2.coords["x"], ["b"]) assert_array_equal(ds3.coords["x"], ["b"]) - assert(ds4.coords["x"].size == 0) - assert(ds5.coords["x"].size == 0) + assert ds4.coords["x"].size == 0 + assert ds5.coords["x"].size == 0 assert_array_equal(ds5.coords["y"], [1, 3, 5]) # Error handling if user tries both approaches. From 2222f0b05575558d2fdfe2aada2595aed5f73248 Mon Sep 17 00:00:00 2001 From: Gregory Gundersen Date: Sun, 18 Aug 2019 08:05:17 +0100 Subject: [PATCH 7/8] Fixed typo. Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index bce7b0eee0a..d25f24c1e28 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3518,7 +3518,7 @@ def drop( # noqa: F811 if labels_kwargs or (utils.is_dict_like(labels) and not labels_are_coords): labels_kwargs = utils.either_dict_or_kwargs(labels, labels_kwargs, "drop") if dim is not None: - raise ValueError("cannot specify dim amd dict-like " "arguments.") + raise ValueError("cannot specify dim and dict-like arguments.") ds = self for dim, labels in labels_kwargs.items(): ds = ds._drop_labels(labels, dim, errors=errors) From 4fc46ba7f1372d1b2baa4f0dc3b59215fe05989c Mon Sep 17 00:00:00 2001 From: Gregory Gundersen Date: Sun, 18 Aug 2019 08:19:54 +0100 Subject: [PATCH 8/8] Docs for amended drop API. --- doc/whats-new.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ae5dc1989cc..a8421d89e74 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -57,12 +57,17 @@ Enhancements - Added ``join='override'``. This only checks that index sizes are equal among objects and skips checking indexes for equality. By `Deepak Cherian `_. + - :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset` now support the ``join`` kwarg. It is passed down to :py:func:`~xarray.align`. By `Deepak Cherian `_. + - In :py:meth:`~xarray.Dataset.to_zarr`, passing ``mode`` is not mandatory if ``append_dim`` is set, as it will automatically be set to ``'a'`` internally. By `David Brochart `_. +- :py:meth:`~xarray.Dataset.drop` now supports keyword arguments; dropping index labels by specifying both ``dim`` and ``labels`` is deprecated (:issue:`2910`). + By `Gregory Gundersen `_. + Bug fixes ~~~~~~~~~ - Fix regression introduced in v0.12.2 where ``copy(deep=True)`` would convert