-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Support keyword API for Dataset.drop
#3128
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4b78b68
1de442e
553e548
5aa8dc1
49f7d83
e3f051c
c150197
2222f0b
a225b80
4fc46ba
6074550
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -54,6 +54,7 @@ | |||||
) | ||||||
from .coordinates import ( | ||||||
DatasetCoordinates, | ||||||
DataArrayCoordinates, | ||||||
LevelCoordinatesSource, | ||||||
assert_coordinate_consistent, | ||||||
remap_label_indexers, | ||||||
|
@@ -3450,7 +3451,7 @@ def _assert_all_in_dataset( | |||||
) | ||||||
|
||||||
# Drop variables | ||||||
@overload | ||||||
@overload # noqa: F811 | ||||||
def drop( | ||||||
self, labels: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" | ||||||
) -> "Dataset": | ||||||
|
@@ -3463,7 +3464,9 @@ def drop( | |||||
) -> "Dataset": | ||||||
... | ||||||
|
||||||
def drop(self, labels, dim=None, *, errors="raise"): # noqa: F811 | ||||||
def drop( # noqa: F811 | ||||||
self, labels=None, dim=None, *, errors="raise", **labels_kwargs | ||||||
): | ||||||
"""Drop variables or index labels from this dataset. | ||||||
|
||||||
Parameters | ||||||
|
@@ -3479,34 +3482,75 @@ def drop(self, labels, dim=None, *, errors="raise"): # noqa: F811 | |||||
any of the variable or index labels passed are not | ||||||
in the dataset. If 'ignore', any given labels that are in the | ||||||
dataset are dropped and no error is raised. | ||||||
**labels_kwargs : {dim: label, ...}, optional | ||||||
The keyword arguments form of ``dim`` and ``labels``. | ||||||
|
||||||
Returns | ||||||
------- | ||||||
dropped : Dataset | ||||||
gwgundersen marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
Examples | ||||||
-------- | ||||||
>>> data = np.random.randn(2, 3) | ||||||
>>> labels = ['a', 'b', 'c'] | ||||||
>>> ds = xr.Dataset({'A': (['x', 'y'], data), 'y': labels}) | ||||||
>>> ds.drop(y=['a', 'c']) | ||||||
<xarray.Dataset> | ||||||
Dimensions: (x: 2, y: 1) | ||||||
Coordinates: | ||||||
* y (y) <U1 'b' | ||||||
Dimensions without coordinates: x | ||||||
Data variables: | ||||||
A (x, y) float64 -0.3454 0.1734 | ||||||
>>> ds.drop(y='b') | ||||||
<xarray.Dataset> | ||||||
Dimensions: (x: 2, y: 2) | ||||||
Coordinates: | ||||||
* y (y) <U1 'a' 'c' | ||||||
Dimensions without coordinates: x | ||||||
Data variables: | ||||||
A (x, y) float64 -0.3944 -1.418 1.423 -1.041 | ||||||
""" | ||||||
if errors not in ["raise", "ignore"]: | ||||||
raise ValueError('errors must be either "raise" or "ignore"') | ||||||
|
||||||
if dim is None: | ||||||
labels_are_coords = isinstance(labels, DataArrayCoordinates) | ||||||
if labels_kwargs or (utils.is_dict_like(labels) and not labels_are_coords): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if it would be more predictable to special case the
Suggested change
More generally, I get that writing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I thought needing the check for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That's right, we would rather always use this new rule for dict-like inputs. |
||||||
labels_kwargs = utils.either_dict_or_kwargs(labels, labels_kwargs, "drop") | ||||||
if dim is not None: | ||||||
raise ValueError("cannot specify dim and dict-like arguments.") | ||||||
ds = self | ||||||
for dim, labels in labels_kwargs.items(): | ||||||
ds = ds._drop_labels(labels, dim, errors=errors) | ||||||
return ds | ||||||
elif dim is None: | ||||||
if isinstance(labels, str) or not isinstance(labels, Iterable): | ||||||
labels = {labels} | ||||||
else: | ||||||
labels = set(labels) | ||||||
|
||||||
return self._drop_vars(labels, errors=errors) | ||||||
else: | ||||||
# Don't cast to set, as it would harm performance when labels | ||||||
# is a large numpy array | ||||||
if utils.is_scalar(labels): | ||||||
labels = [labels] | ||||||
labels = np.asarray(labels) | ||||||
|
||||||
try: | ||||||
index = self.indexes[dim] | ||||||
except KeyError: | ||||||
raise ValueError("dimension %r does not have coordinate labels" % dim) | ||||||
new_index = index.drop(labels, errors=errors) | ||||||
return self.loc[{dim: new_index}] | ||||||
if utils.is_list_like(labels): | ||||||
warnings.warn( | ||||||
"dropping dimensions using list-like labels is deprecated; " | ||||||
"use dict-like arguments.", | ||||||
DeprecationWarning, | ||||||
stacklevel=2, | ||||||
) | ||||||
return self._drop_labels(labels, dim, errors=errors) | ||||||
|
||||||
def _drop_labels(self, labels=None, dim=None, errors="raise"): | ||||||
# Don't cast to set, as it would harm performance when labels | ||||||
# is a large numpy array | ||||||
if utils.is_scalar(labels): | ||||||
labels = [labels] | ||||||
labels = np.asarray(labels) | ||||||
try: | ||||||
index = self.indexes[dim] | ||||||
except KeyError: | ||||||
raise ValueError("dimension %r does not have coordinate labels" % dim) | ||||||
new_index = index.drop(labels, errors=errors) | ||||||
return self.loc[{dim: new_index}] | ||||||
|
||||||
def _drop_vars(self, names: set, errors: str = "raise") -> "Dataset": | ||||||
if errors == "raise": | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since this is deprecated, we should fix the docs to use
ds.drop(space=['IN', 'IL']
as an example of the preferred syntax going forward.