diff --git a/doc/api.rst b/doc/api.rst index 872e7786e1b..fb6e037a4f2 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -117,6 +117,9 @@ Indexing Dataset.loc Dataset.isel Dataset.sel + Dataset.head + Dataset.tail + Dataset.thin Dataset.squeeze Dataset.interp Dataset.interp_like @@ -279,6 +282,9 @@ Indexing DataArray.loc DataArray.isel DataArray.sel + Dataset.head + Dataset.tail + Dataset.thin DataArray.squeeze DataArray.interp DataArray.interp_like diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8796c79da4c..1e5855df51f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -87,6 +87,9 @@ New functions/methods Currently only :py:meth:`Dataset.plot.scatter` is implemented. By `Yohai Bar Sinai `_ and `Deepak Cherian `_ +- Added `head`, `tail` and `thin` methods to `Dataset` and `DataArray`. (:issue:`319`) + By `Gerardo Rivera `_. + Enhancements ~~~~~~~~~~~~ @@ -102,7 +105,7 @@ Enhancements - Added the ability to initialize an empty or full DataArray with a single value. (:issue:`277`) - By `Gerardo Rivera `_. + By `Gerardo Rivera `_. - :py:func:`~xarray.Dataset.to_netcdf()` now supports the ``invalid_netcdf`` kwarg when used with ``engine="h5netcdf"``. It is passed to :py:func:`h5netcdf.File`. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index e5d53b1943a..8660fa952b1 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1040,6 +1040,55 @@ def sel( ) return self._from_temp_dataset(ds) + def head( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "DataArray": + """Return a new DataArray whose data is given by the the first `n` + values along the specified dimension(s). + + See Also + -------- + Dataset.head + DataArray.tail + DataArray.thin + """ + + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head") + ds = self._to_temp_dataset().head(indexers=indexers) + return self._from_temp_dataset(ds) + + def tail( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "DataArray": + """Return a new DataArray whose data is given by the the last `n` + values along the specified dimension(s). + + See Also + -------- + Dataset.tail + DataArray.head + DataArray.thin + """ + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail") + ds = self._to_temp_dataset().tail(indexers=indexers) + return self._from_temp_dataset(ds) + + def thin( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "DataArray": + """Return a new DataArray whose data is given by each `n` value + along the specified dimension(s). + + See Also + -------- + Dataset.thin + DataArray.head + DataArray.tail + """ + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin") + ds = self._to_temp_dataset().thin(indexers=indexers) + return self._from_temp_dataset(ds) + def broadcast_like( self, other: Union["DataArray", Dataset], exclude: Iterable[Hashable] = None ) -> "DataArray": diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f3ad4650b38..1476c1ba646 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2008,6 +2008,90 @@ def sel( result = self.isel(indexers=pos_indexers, drop=drop) return result._overwrite_indexes(new_indexes) + def head( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "Dataset": + """Returns a new dataset with the first `n` values of each array + for the specified dimension(s). + + Parameters + ---------- + indexers : dict, optional + A dict with keys matching dimensions and integer values `n`. + One of indexers or indexers_kwargs must be provided. + **indexers_kwargs : {dim: n, ...}, optional + The keyword arguments form of ``indexers``. + One of indexers or indexers_kwargs must be provided. + + + See Also + -------- + Dataset.tail + Dataset.thin + DataArray.head + """ + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "head") + indexers = {k: slice(val) for k, val in indexers.items()} + return self.isel(indexers) + + def tail( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "Dataset": + """Returns a new dataset with the last `n` values of each array + for the specified dimension(s). + + Parameters + ---------- + indexers : dict, optional + A dict with keys matching dimensions and integer values `n`. + One of indexers or indexers_kwargs must be provided. + **indexers_kwargs : {dim: n, ...}, optional + The keyword arguments form of ``indexers``. + One of indexers or indexers_kwargs must be provided. + + + See Also + -------- + Dataset.head + Dataset.thin + DataArray.tail + """ + + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "tail") + indexers = { + k: slice(-val, None) if val != 0 else slice(val) + for k, val in indexers.items() + } + return self.isel(indexers) + + def thin( + self, indexers: Mapping[Hashable, Any] = None, **indexers_kwargs: Any + ) -> "Dataset": + """Returns a new dataset with each array indexed along every `n`th + value for the specified dimension(s) + + Parameters + ---------- + indexers : dict, optional + A dict with keys matching dimensions and integer values `n`. + One of indexers or indexers_kwargs must be provided. + **indexers_kwargs : {dim: n, ...}, optional + The keyword arguments form of ``indexers``. + One of indexers or indexers_kwargs must be provided. + + + See Also + -------- + Dataset.head + Dataset.tail + DataArray.thin + """ + indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "thin") + if 0 in indexers.values(): + raise ValueError("step cannot be zero") + indexers = {k: slice(None, None, val) for k, val in indexers.items()} + return self.isel(indexers) + def broadcast_like( self, other: Union["Dataset", "DataArray"], exclude: Iterable[Hashable] = None ) -> "Dataset": diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 2fc86d777aa..27e6ab92f71 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1002,6 +1002,19 @@ def test_isel_drop(self): selected = data.isel(x=0, drop=False) assert_identical(expected, selected) + def test_head(self): + assert_equal(self.dv.isel(x=slice(5)), self.dv.head(x=5)) + assert_equal(self.dv.isel(x=slice(0)), self.dv.head(x=0)) + + def test_tail(self): + assert_equal(self.dv.isel(x=slice(-5, None)), self.dv.tail(x=5)) + assert_equal(self.dv.isel(x=slice(0)), self.dv.tail(x=0)) + + def test_thin(self): + assert_equal(self.dv.isel(x=slice(None, None, 5)), self.dv.thin(x=5)) + with raises_regex(ValueError, "cannot be zero"): + self.dv.thin(time=0) + def test_loc(self): self.ds["x"] = ("x", np.array(list("abcdefghij"))) da = self.ds["foo"] diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 3953e6c4146..d9f0284969e 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1411,6 +1411,38 @@ def test_isel_drop(self): selected = data.isel(x=0, drop=False) assert_identical(expected, selected) + def test_head(self): + data = create_test_data() + + expected = data.isel(time=slice(5), dim2=slice(6)) + actual = data.head(time=5, dim2=6) + assert_equal(expected, actual) + + expected = data.isel(time=slice(0)) + actual = data.head(time=0) + assert_equal(expected, actual) + + def test_tail(self): + data = create_test_data() + + expected = data.isel(time=slice(-5, None), dim2=slice(-6, None)) + actual = data.tail(time=5, dim2=6) + assert_equal(expected, actual) + + expected = data.isel(dim1=slice(0)) + actual = data.tail(dim1=0) + assert_equal(expected, actual) + + def test_thin(self): + data = create_test_data() + + expected = data.isel(time=slice(None, None, 5), dim2=slice(None, None, 6)) + actual = data.thin(time=5, dim2=6) + assert_equal(expected, actual) + + with raises_regex(ValueError, "cannot be zero"): + data.thin(time=0) + @pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_sel_fancy(self): data = create_test_data()