From b9942262ff17fe84ed4b2d59da1a1da0d93dbcc3 Mon Sep 17 00:00:00 2001 From: Gerardo Rivera Tello Date: Tue, 3 Sep 2019 15:13:21 -0500 Subject: [PATCH 1/5] Add head, tail and thin methods --- xarray/core/dataarray.py | 49 +++++++++++++++++++++ xarray/core/dataset.py | 84 ++++++++++++++++++++++++++++++++++++ xarray/tests/test_dataset.py | 32 ++++++++++++++ 3 files changed, 165 insertions(+) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index e5d53b1943a..fba322603c5 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1040,6 +1040,55 @@ def sel( ) return self._from_temp_dataset(ds) + def head( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "DataArray": + """Return a new DataArray whose data is given by the the first `n` + values along the specified dimension(s). + + See Also + -------- + Dataset.head + DataArray.tail + DataArray.thin + """ + + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head") + ds = self._to_temp_dataset().head(indexers=indexers) + return self._from_temp_dataset(ds) + + def tail( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "DataArray": + """Return a new DataArray whose data is given by the the last `n` + values along the specified dimension(s). + + See Also + -------- + Dataset.tail + DataArray.head + DataArray.thin + """ + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail") + ds = self._to_temp_dataset().tail(indexers=indexers) + return self._from_temp_dataset(ds) + + def thin( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "DataArray": + """Return a new DataArray whose data is given by each `n` value + along the specified dimension(s). + + See Also + -------- + Dataset.thin + DataArray.head + DataArray.tail + """ + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin") + ds = self._to_temp_dataset().thin(indexers=indexers) + return self._from_temp_dataset(ds) + def broadcast_like( self, other: Union["DataArray", Dataset], exclude: Iterable[Hashable] = None ) -> "DataArray": diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f3ad4650b38..c85b5e9bf80 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2008,6 +2008,90 @@ def sel( result = self.isel(indexers=pos_indexers, drop=drop) return result._overwrite_indexes(new_indexes) + def head( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "Dataset": + """Returns a new dataset with the first `n` values of each array + for the specified dimension(s). + + Parameters + ---------- + indexers : dict, optional + A dict with keys matching dimensions and integer values `n`. + One of indexers or indexers_kwargs must be provided. + **indexers_kwarg : {dim: n, ...}, optional + The keyword arguments form of ``indexers``. + One of indexers or indexers_kwargs must be provided. + + + See Also + -------- + Dataset.tail + Dataset.thin + DataArray.head + """ + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head") + indexers = {k: slice(val) for k, val in indexers.items()} + return self.isel(indexers) + + def tail( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "Dataset": + """Returns a new dataset with the last `n` values of each array + for the specified dimension(s). + + Parameters + ---------- + indexers : dict, optional + A dict with keys matching dimensions and integer values `n`. + One of indexers or indexers_kwargs must be provided. + **indexers_kwarg : {dim: n, ...}, optional + The keyword arguments form of ``indexers``. + One of indexers or indexers_kwargs must be provided. + + + See Also + -------- + Dataset.head + Dataset.thin + DataArray.tail + """ + + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail") + indexers = { + k: slice(-val, None) if val != 0 else slice(val) + for k, val in indexers.items() + } + return self.isel(indexers) + + def thin( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "Dataset": + """Returns a new dataset with each array indexed along every `n`th + value for the specified dimension(s) + + Parameters + ---------- + indexers : dict, optional + A dict with keys matching dimensions and integer values `n`. + One of indexers or indexers_kwargs must be provided. + **indexers_kwarg : {dim: n, ...}, optional + The keyword arguments form of ``indexers``. + One of indexers or indexers_kwargs must be provided. + + + See Also + -------- + Dataset.head + Dataset.tail + DataArray.thin + """ + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin") + if 0 in indexers.values(): + raise ValueError("step cannot be zero") + indexers = {k: slice(None, None, val) for k, val in indexers.items()} + return self.isel(indexers) + def broadcast_like( self, other: Union["Dataset", "DataArray"], exclude: Iterable[Hashable] = None ) -> "Dataset": diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 3953e6c4146..d9f0284969e 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1411,6 +1411,38 @@ def test_isel_drop(self): selected = data.isel(x=0, drop=False) assert_identical(expected, selected) + def test_head(self): + data = create_test_data() + + expected = data.isel(time=slice(5), dim2=slice(6)) + actual = data.head(time=5, dim2=6) + assert_equal(expected, actual) + + expected = data.isel(time=slice(0)) + actual = data.head(time=0) + assert_equal(expected, actual) + + def test_tail(self): + data = create_test_data() + + expected = data.isel(time=slice(-5, None), dim2=slice(-6, None)) + actual = data.tail(time=5, dim2=6) + assert_equal(expected, actual) + + expected = data.isel(dim1=slice(0)) + actual = data.tail(dim1=0) + assert_equal(expected, actual) + + def test_thin(self): + data = create_test_data() + + expected = data.isel(time=slice(None, None, 5), dim2=slice(None, None, 6)) + actual = data.thin(time=5, dim2=6) + assert_equal(expected, actual) + + with raises_regex(ValueError, "cannot be zero"): + data.thin(time=0) + @pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_sel_fancy(self): data = create_test_data() From 6dd98d929348d8091a27b7149965a6d48c2876eb Mon Sep 17 00:00:00 2001 From: Gerardo Rivera Tello Date: Tue, 3 Sep 2019 15:33:27 -0500 Subject: [PATCH 2/5] Update api and whats-new --- doc/api.rst | 6 ++++++ doc/whats-new.rst | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index 872e7786e1b..fb6e037a4f2 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -117,6 +117,9 @@ Indexing Dataset.loc Dataset.isel Dataset.sel + Dataset.head + Dataset.tail + Dataset.thin Dataset.squeeze Dataset.interp Dataset.interp_like @@ -279,6 +282,9 @@ Indexing DataArray.loc DataArray.isel DataArray.sel + Dataset.head + Dataset.tail + Dataset.thin DataArray.squeeze DataArray.interp DataArray.interp_like diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8796c79da4c..1e5855df51f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -87,6 +87,9 @@ New functions/methods Currently only :py:meth:`Dataset.plot.scatter` is implemented. By `Yohai Bar Sinai `_ and `Deepak Cherian `_ +- Added `head`, `tail` and `thin` methods to `Dataset` and `DataArray`. (:issue:`319`) + By `Gerardo Rivera `_. + Enhancements ~~~~~~~~~~~~ @@ -102,7 +105,7 @@ Enhancements - Added the ability to initialize an empty or full DataArray with a single value. (:issue:`277`) - By `Gerardo Rivera `_. + By `Gerardo Rivera `_. - :py:func:`~xarray.Dataset.to_netcdf()` now supports the ``invalid_netcdf`` kwarg when used with ``engine="h5netcdf"``. It is passed to :py:func:`h5netcdf.File`. From d8809e32d83c9533d4092bff06469d6615ed5d48 Mon Sep 17 00:00:00 2001 From: Gerardo Rivera Tello Date: Tue, 3 Sep 2019 15:43:55 -0500 Subject: [PATCH 3/5] Fix pep8 issues --- xarray/core/dataarray.py | 4 ++-- xarray/core/dataset.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index fba322603c5..8660fa952b1 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1045,7 +1045,7 @@ def head( ) -> "DataArray": """Return a new DataArray whose data is given by the the first `n` values along the specified dimension(s). - + See Also -------- Dataset.head @@ -1076,7 +1076,7 @@ def tail( def thin( self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any ) -> "DataArray": - """Return a new DataArray whose data is given by each `n` value + """Return a new DataArray whose data is given by each `n` value along the specified dimension(s). See Also diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c85b5e9bf80..bf21dd54d6f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2037,7 +2037,7 @@ def head( def tail( self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any ) -> "Dataset": - """Returns a new dataset with the last `n` values of each array + """Returns a new dataset with the last `n` values of each array for the specified dimension(s). Parameters From c6e12098913a3a7a1bd0fb34b0a7adeed03cbb3a Mon Sep 17 00:00:00 2001 From: Gerardo Rivera Tello Date: Wed, 4 Sep 2019 12:16:55 -0500 Subject: [PATCH 4/5] Fix typo --- xarray/core/dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index bf21dd54d6f..1476c1ba646 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2019,7 +2019,7 @@ def head( indexers : dict, optional A dict with keys matching dimensions and integer values `n`. One of indexers or indexers_kwargs must be provided. - **indexers_kwarg : {dim: n, ...}, optional + **indexers_kwargs : {dim: n, ...}, optional The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -2045,7 +2045,7 @@ def tail( indexers : dict, optional A dict with keys matching dimensions and integer values `n`. One of indexers or indexers_kwargs must be provided. - **indexers_kwarg : {dim: n, ...}, optional + **indexers_kwargs : {dim: n, ...}, optional The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. @@ -2075,7 +2075,7 @@ def thin( indexers : dict, optional A dict with keys matching dimensions and integer values `n`. One of indexers or indexers_kwargs must be provided. - **indexers_kwarg : {dim: n, ...}, optional + **indexers_kwargs : {dim: n, ...}, optional The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. From db841dac2723b431698a17851069d3c29122b730 Mon Sep 17 00:00:00 2001 From: Gerardo Rivera Tello Date: Wed, 4 Sep 2019 14:28:14 -0500 Subject: [PATCH 5/5] Tests for DataArray --- xarray/tests/test_dataarray.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 2fc86d777aa..27e6ab92f71 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1002,6 +1002,19 @@ def test_isel_drop(self): selected = data.isel(x=0, drop=False) assert_identical(expected, selected) + def test_head(self): + assert_equal(self.dv.isel(x=slice(5)), self.dv.head(x=5)) + assert_equal(self.dv.isel(x=slice(0)), self.dv.head(x=0)) + + def test_tail(self): + assert_equal(self.dv.isel(x=slice(-5, None)), self.dv.tail(x=5)) + assert_equal(self.dv.isel(x=slice(0)), self.dv.tail(x=0)) + + def test_thin(self): + assert_equal(self.dv.isel(x=slice(None, None, 5)), self.dv.thin(x=5)) + with raises_regex(ValueError, "cannot be zero"): + self.dv.thin(time=0) + def test_loc(self): self.ds["x"] = ("x", np.array(list("abcdefghij"))) da = self.ds["foo"]