From 9cfce1a0168dad6445434ffc00a3ae2113f59b66 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 31 Mar 2020 22:03:48 +0100 Subject: [PATCH 1/6] Add missing_dims argument allowing isel() to ignore missing dimensions --- xarray/core/dataarray.py | 30 +++++++++++++++++++-- xarray/core/dataset.py | 17 +++++++----- xarray/core/utils.py | 49 ++++++++++++++++++++++++++++++++++ xarray/core/variable.py | 14 ++++++---- xarray/tests/test_dataarray.py | 13 +++++++++ xarray/tests/test_variable.py | 15 +++++++++-- 6 files changed, 123 insertions(+), 15 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 232fb86144e..93fbb471c74 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1004,25 +1004,51 @@ def isel( self, indexers: Mapping[Hashable, Any] = None, drop: bool = False, + missing_dims: str = "exception", **indexers_kwargs: Any, ) -> "DataArray": """Return a new DataArray whose data is given by integer indexing along the specified dimension(s). + Parameters + ---------- + indexers : dict, optional + A dict with keys matching dimensions and values given + by integers, slice objects or arrays. + indexer can be a integer, slice, array-like or DataArray. + If DataArrays are passed as indexers, xarray-style indexing will be + carried out. See :ref:`indexing` for the details. + One of indexers or indexers_kwargs must be provided. + drop : bool, optional + If ``drop=True``, drop coordinates variables indexed by integers + instead of making them scalar. + missing_dims : {"exception", "warning", "ignore"}, default "exception" + What to do if dimensions that should be selected from are not present in the + DataArray: + - "exception": raise an exception + - "warning": raise a warning, and ignore the missing dimensions + - "ignore": ignore the missing dimensions + **indexers_kwargs : {dim: indexer, ...}, optional + The keyword arguments form of ``indexers``. + See Also -------- Dataset.isel DataArray.sel """ + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") + if any(is_fancy_indexer(idx) for idx in indexers.values()): - ds = self._to_temp_dataset()._isel_fancy(indexers, drop=drop) + ds = self._to_temp_dataset()._isel_fancy( + indexers, drop=drop, missing_dims=missing_dims + ) return self._from_temp_dataset(ds) # Much faster algorithm for when all indexers are ints, slices, one-dimensional # lists, or zero or one-dimensional np.ndarray's - variable = self._variable.isel(indexers) + variable = self._variable.isel(indexers, missing_dims=missing_dims) coords = {} for coord_name, coord_value in self._coords.items(): diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 6f96e4f469c..5b40e6e56f7 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -85,6 +85,7 @@ _check_inplace, _default, decode_numpy_dict_values, + drop_dims_from_indexers, either_dict_or_kwargs, hashable, infix_dims, @@ -1765,7 +1766,7 @@ def maybe_chunk(name, var, chunks): return self._replace(variables) def _validate_indexers( - self, indexers: Mapping[Hashable, Any] + self, indexers: Mapping[Hashable, Any], missing_dims: str = "exception", ) -> Iterator[Tuple[Hashable, Union[int, slice, np.ndarray, Variable]]]: """ Here we make sure + indexer has a valid keys @@ -1775,9 +1776,7 @@ def _validate_indexers( """ from .dataarray import DataArray - invalid = indexers.keys() - self.dims.keys() - if invalid: - raise ValueError("dimensions %r do not exist" % invalid) + indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims) # all indexers should be int, slice, np.ndarrays, or Variable for k, v in indexers.items(): @@ -1956,10 +1955,16 @@ def isel( file_obj=self._file_obj, ) - def _isel_fancy(self, indexers: Mapping[Hashable, Any], *, drop: bool) -> "Dataset": + def _isel_fancy( + self, + indexers: Mapping[Hashable, Any], + *, + drop: bool, + missing_dims: str = "exception", + ) -> "Dataset": # Note: we need to preserve the original indexers variable in order to merge the # coords below - indexers_list = list(self._validate_indexers(indexers)) + indexers_list = list(self._validate_indexers(indexers, missing_dims)) variables: Dict[Hashable, Variable] = {} indexes: Dict[Hashable, pd.Index] = {} diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 5570f9e9a80..493232f3b9d 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -24,6 +24,7 @@ Sequence, Tuple, TypeVar, + Union, cast, ) @@ -741,6 +742,54 @@ def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable: return new_dim +def drop_dims_from_indexers( + indexers: Mapping[Hashable, Any], + dims: Union[list, Mapping[Hashable, int]], + missing_dims: str, +) -> Mapping[Hashable, Any]: + """ Depending on the setting of missing_dims, drop any dimensions from indexers that + are not present in dims. + + Parameters + ---------- + indexers : dict + dims : sequence + missing_dims : {"exception", "warning", "ignore"} + """ + + if missing_dims == "exception": + invalid = indexers.keys() - set(dims) + if invalid: + raise ValueError( + f"dimensions {invalid} do not exist. Expected one or more of {dims}" + ) + + return indexers + + elif missing_dims == "warning": + + # don't modify input + indexers = dict(indexers) + + invalid = indexers.keys() - set(dims) + if invalid: + warnings.warn( + f"dimensions {invalid} do not exist. Expected one or more of {dims}" + ) + for key in invalid: + indexers.pop(key) + + return indexers + + elif missing_dims == "ignore": + return {key: val for key, val in indexers.items() if key in dims} + + else: + raise ValueError( + f"Unrecognised option {missing_dims} for missing_dims argument" + ) + + # Singleton type, as per https://github.com/python/typing/pull/240 class Default(Enum): token = 0 diff --git a/xarray/core/variable.py b/xarray/core/variable.py index c9addeefb04..88eb3aad6b7 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -28,6 +28,7 @@ OrderedSet, _default, decode_numpy_dict_values, + drop_dims_from_indexers, either_dict_or_kwargs, ensure_us_time_resolution, infix_dims, @@ -1030,6 +1031,7 @@ def _to_dense(self): def isel( self: VariableType, indexers: Mapping[Hashable, Any] = None, + missing_dims: str = "exception", **indexers_kwargs: Any, ) -> VariableType: """Return a new array indexed along the specified dimension(s). @@ -1039,6 +1041,12 @@ def isel( **indexers : {dim: indexer, ...} Keyword arguments with names matching dimensions and values given by integers, slice objects or arrays. + missing_dims : {"exception", "warning", "ignore"}, default "exception" + What to do if dimensions that should be selected from are not present in the + DataArray: + - "exception": raise an exception + - "warning": raise a warning, and ignore the missing dimensions + - "ignore": ignore the missing dimensions Returns ------- @@ -1050,11 +1058,7 @@ def isel( """ indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") - invalid = indexers.keys() - set(self.dims) - if invalid: - raise ValueError( - f"dimensions {invalid} do not exist. Expected one or more of {self.dims}" - ) + indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims) key = tuple(indexers.get(dim, slice(None)) for dim in self.dims) return self[key] diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 4f19dc2a9cf..4c5556883dd 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -780,6 +780,19 @@ def test_isel(self): assert_identical(self.dv, self.dv.isel(x=slice(None))) assert_identical(self.dv[:3], self.dv.isel(x=slice(3))) assert_identical(self.dv[:3, :5], self.dv.isel(x=slice(3), y=slice(5))) + with raises_regex( + ValueError, + r"dimensions {'not_a_dim'} do not exist. Expected " + r"one or more of \('x', 'y'\)", + ): + self.dv.isel(not_a_dim=0) + with pytest.warns( + UserWarning, + match=r"dimensions {'not_a_dim'} do not exist. " + r"Expected one or more of \('x', 'y'\)", + ): + self.dv.isel(not_a_dim=0, missing_dims="warning") + assert_identical(self.dv, self.dv.isel(not_a_dim=0, missing_dims="ignore")) def test_isel_types(self): # regression test for #1405 diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 116466e112d..1f5c5d6030f 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1254,8 +1254,19 @@ def test_isel(self): assert_identical(v.isel(x=0), v[:, 0]) assert_identical(v.isel(x=[0, 2]), v[:, [0, 2]]) assert_identical(v.isel(time=[]), v[[]]) - with raises_regex(ValueError, "do not exist"): - v.isel(not_a_dim=0) + with raises_regex( + ValueError, + r"dimensions {'not_a_dim'} do not exist. Expected one or more of " + r"\('x', 'y'\)", + ): + self.dv.isel(not_a_dim=0) + with pytest.warns( + UserWarning, + match=r"dimensions {'not_a_dim'} do not exist. Expected one or more of " + r"\('x', 'y'\)", + ): + self.v.isel(not_a_dim=0, missing_dims="warning") + assert_identical(self.dv, self.dv.isel(not_a_dim=0, missing_dims="ignore")) def test_index_0d_numpy_string(self): # regression test to verify our work around for indexing 0d strings From 19133804f199c46eb4ba48a925a138bd94c4de72 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 31 Mar 2020 23:20:29 +0100 Subject: [PATCH 2/6] Add missing_dims to whats-new.rst --- doc/whats-new.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4515f552812..b88aabc070c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -28,6 +28,11 @@ New Features :py:func:`combine_by_coords` and :py:func:`combine_nested` using combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`) By `John Omotani `_ +- 'missing_dims' argument to :py:meth:`DataArray.isel` and + :py:meth:`Variable.isel` to allow replacing the exception when a dimension + passed to ``isel`` is not present with a warning, or just ignore the + dimension. (:issue:`3866`, :pull:`3923`) + By `John Omotani `_ Bug fixes From 8097b9437dbc2c6802d07f4e560b4b2675a04394 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 2 Apr 2020 22:51:20 +0100 Subject: [PATCH 3/6] Fix typos in TestVariable.test_isel() --- xarray/tests/test_variable.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 1f5c5d6030f..4b34da03a32 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1257,16 +1257,16 @@ def test_isel(self): with raises_regex( ValueError, r"dimensions {'not_a_dim'} do not exist. Expected one or more of " - r"\('x', 'y'\)", + r"\('time', 'x'\)", ): - self.dv.isel(not_a_dim=0) + v.isel(not_a_dim=0) with pytest.warns( UserWarning, match=r"dimensions {'not_a_dim'} do not exist. Expected one or more of " - r"\('x', 'y'\)", + r"\('time', 'x'\)", ): - self.v.isel(not_a_dim=0, missing_dims="warning") - assert_identical(self.dv, self.dv.isel(not_a_dim=0, missing_dims="ignore")) + v.isel(not_a_dim=0, missing_dims="warning") + assert_identical(v, v.isel(not_a_dim=0, missing_dims="ignore")) def test_index_0d_numpy_string(self): # regression test to verify our work around for indexing 0d strings From 2657ba67690fdaa61736e9b3527a5c05a2ba4e4b Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 3 Apr 2020 10:21:02 +0100 Subject: [PATCH 4/6] Change values for missing_dims argument to {'raise', 'warn', 'ignore'} Matches the possible values used elsewhere for drop_vars arguments. --- xarray/core/dataarray.py | 4 ++-- xarray/core/dataset.py | 4 ++-- xarray/core/utils.py | 6 +++--- xarray/core/variable.py | 4 ++-- xarray/tests/test_dataarray.py | 2 +- xarray/tests/test_variable.py | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 18485883603..63cba53b689 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1007,7 +1007,7 @@ def isel( self, indexers: Mapping[Hashable, Any] = None, drop: bool = False, - missing_dims: str = "exception", + missing_dims: str = "raise", **indexers_kwargs: Any, ) -> "DataArray": """Return a new DataArray whose data is given by integer indexing @@ -1025,7 +1025,7 @@ def isel( drop : bool, optional If ``drop=True``, drop coordinates variables indexed by integers instead of making them scalar. - missing_dims : {"exception", "warning", "ignore"}, default "exception" + missing_dims : {"raise", "warn", "ignore"}, default "raise" What to do if dimensions that should be selected from are not present in the DataArray: - "exception": raise an exception diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b54e12f721e..e7f8df660c7 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1768,7 +1768,7 @@ def maybe_chunk(name, var, chunks): return self._replace(variables) def _validate_indexers( - self, indexers: Mapping[Hashable, Any], missing_dims: str = "exception", + self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise", ) -> Iterator[Tuple[Hashable, Union[int, slice, np.ndarray, Variable]]]: """ Here we make sure + indexer has a valid keys @@ -1962,7 +1962,7 @@ def _isel_fancy( indexers: Mapping[Hashable, Any], *, drop: bool, - missing_dims: str = "exception", + missing_dims: str = "raise", ) -> "Dataset": # Note: we need to preserve the original indexers variable in order to merge the # coords below diff --git a/xarray/core/utils.py b/xarray/core/utils.py index d50ce04061f..1126cf3037f 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -751,10 +751,10 @@ def drop_dims_from_indexers( ---------- indexers : dict dims : sequence - missing_dims : {"exception", "warning", "ignore"} + missing_dims : {"raise", "warn", "ignore"} """ - if missing_dims == "exception": + if missing_dims == "raise": invalid = indexers.keys() - set(dims) if invalid: raise ValueError( @@ -763,7 +763,7 @@ def drop_dims_from_indexers( return indexers - elif missing_dims == "warning": + elif missing_dims == "warn": # don't modify input indexers = dict(indexers) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 88eb3aad6b7..68e823ca426 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1031,7 +1031,7 @@ def _to_dense(self): def isel( self: VariableType, indexers: Mapping[Hashable, Any] = None, - missing_dims: str = "exception", + missing_dims: str = "raise", **indexers_kwargs: Any, ) -> VariableType: """Return a new array indexed along the specified dimension(s). @@ -1041,7 +1041,7 @@ def isel( **indexers : {dim: indexer, ...} Keyword arguments with names matching dimensions and values given by integers, slice objects or arrays. - missing_dims : {"exception", "warning", "ignore"}, default "exception" + missing_dims : {"raise", "warn", "ignore"}, default "raise" What to do if dimensions that should be selected from are not present in the DataArray: - "exception": raise an exception diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index cebd314cef6..cf31182ed30 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -792,7 +792,7 @@ def test_isel(self): match=r"dimensions {'not_a_dim'} do not exist. " r"Expected one or more of \('x', 'y'\)", ): - self.dv.isel(not_a_dim=0, missing_dims="warning") + self.dv.isel(not_a_dim=0, missing_dims="warn") assert_identical(self.dv, self.dv.isel(not_a_dim=0, missing_dims="ignore")) def test_isel_types(self): diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 4b34da03a32..78e3848b8fb 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1265,7 +1265,7 @@ def test_isel(self): match=r"dimensions {'not_a_dim'} do not exist. Expected one or more of " r"\('time', 'x'\)", ): - v.isel(not_a_dim=0, missing_dims="warning") + v.isel(not_a_dim=0, missing_dims="warn") assert_identical(v, v.isel(not_a_dim=0, missing_dims="ignore")) def test_index_0d_numpy_string(self): From f061bf48bd1a955c861cde62de895dbfd02499bd Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 3 Apr 2020 10:30:02 +0100 Subject: [PATCH 5/6] Add missing_dims argument Dataset.isel() --- xarray/core/dataset.py | 13 +++++++++---- xarray/tests/test_dataset.py | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index e7f8df660c7..97b3caf2b6e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1874,6 +1874,7 @@ def isel( self, indexers: Mapping[Hashable, Any] = None, drop: bool = False, + missing_dims: str = "raise", **indexers_kwargs: Any, ) -> "Dataset": """Returns a new dataset with each array indexed along the specified @@ -1895,6 +1896,12 @@ def isel( drop : bool, optional If ``drop=True``, drop coordinates variables indexed by integers instead of making them scalar. + missing_dims : {"raise", "warn", "ignore"}, default "raise" + What to do if dimensions that should be selected from are not present in the + Dataset: + - "exception": raise an exception + - "warning": raise a warning, and ignore the missing dimensions + - "ignore": ignore the missing dimensions **indexers_kwargs : {dim: indexer, ...}, optional The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -1917,13 +1924,11 @@ def isel( """ indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") if any(is_fancy_indexer(idx) for idx in indexers.values()): - return self._isel_fancy(indexers, drop=drop) + return self._isel_fancy(indexers, drop=drop, missing_dims=missing_dims) # Much faster algorithm for when all indexers are ints, slices, one-dimensional # lists, or zero or one-dimensional np.ndarray's - invalid = indexers.keys() - self.dims.keys() - if invalid: - raise ValueError("dimensions %r do not exist" % invalid) + indexers = drop_dims_from_indexers(indexers, self.dims, missing_dims) variables = {} dims: Dict[Hashable, Tuple[int, ...]] = {} diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 237c315583c..a1cb7361e77 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1023,6 +1023,21 @@ def test_isel(self): with pytest.raises(ValueError): data.isel(not_a_dim=slice(0, 2)) + with raises_regex( + ValueError, + r"dimensions {'not_a_dim'} do not exist. Expected " + r"one or more of " + r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*", + ): + data.isel(not_a_dim=slice(0, 2)) + with pytest.warns( + UserWarning, + match=r"dimensions {'not_a_dim'} do not exist. " + r"Expected one or more of " + r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*", + ): + data.isel(not_a_dim=slice(0, 2), missing_dims="warn") + assert_identical(data, data.isel(not_a_dim=slice(0, 2), missing_dims="ignore")) ret = data.isel(dim1=0) assert {"time": 20, "dim2": 9, "dim3": 10} == ret.dims From f98bc1dd59d336454d15844a80d0d3bf3c6cf64f Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 3 Apr 2020 11:10:11 +0100 Subject: [PATCH 6/6] Mention Dataset.isel in whats-new.rst description of missing_dims --- doc/whats-new.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d6166bdb85d..46a67f9ae4f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -35,10 +35,10 @@ New Features :py:func:`combine_by_coords` and :py:func:`combine_nested` using combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`) By `John Omotani `_ -- 'missing_dims' argument to :py:meth:`DataArray.isel` and - :py:meth:`Variable.isel` to allow replacing the exception when a dimension - passed to ``isel`` is not present with a warning, or just ignore the - dimension. (:issue:`3866`, :pull:`3923`) +- 'missing_dims' argument to :py:meth:`Dataset.isel`, + `:py:meth:`DataArray.isel` and :py:meth:`Variable.isel` to allow replacing + the exception when a dimension passed to ``isel`` is not present with a + warning, or just ignore the dimension. (:issue:`3866`, :pull:`3923`) By `John Omotani `_ - Limited the length of array items with long string reprs to a reasonable width (:pull:`3900`)