Skip to content

.resample now supports loffset. #2608

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ Enhancements
"dayofyear" and "dayofweek" accessors (:issue:`2597`). By `Spencer Clark
<https://github.com/spencerkclark>`_.
- Support Dask ``HighLevelGraphs`` by `Matthew Rocklin <https://matthewrocklin.com>`_.
- :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` now supports the
``loffset`` kwarg just like Pandas.
By `Deepak Cherian <https://github.com/dcherian>`_


Bug fixes
Expand Down
9 changes: 7 additions & 2 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs):
center=center)

def resample(self, indexer=None, skipna=None, closed=None, label=None,
base=0, keep_attrs=None, **indexer_kwargs):
base=0, keep_attrs=None, loffset=None, **indexer_kwargs):
"""Returns a Resample object for performing resampling operations.

Handles both downsampling and upsampling. If any intervals contain no
Expand All @@ -612,6 +612,9 @@ def resample(self, indexer=None, skipna=None, closed=None, label=None,
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for '24H' frequency, base could
range from 0 through 23.
loffset : timedelta or str, optional
Offset used to adjust the resampled time labels. Some pandas date
offset strings are supported.
keep_attrs : bool, optional
If True, the object's attributes (`attrs`) will be copied from
the original object to the new one. If False (default), the new
Expand Down Expand Up @@ -700,7 +703,9 @@ def resample(self, indexer=None, skipna=None, closed=None, label=None,

group = DataArray(dim_coord, coords=dim_coord.coords,
dims=dim_coord.dims, name=RESAMPLE_DIM)
grouper = pd.Grouper(freq=freq, closed=closed, label=label, base=base)
# TODO: to_offset() call required for pandas==0.19.2
grouper = pd.Grouper(freq=freq, closed=closed, label=label, base=base,
loffset=pd.tseries.frequencies.to_offset(loffset))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this explicit cast with pd.tseries.frequencies.to_offset? I guess it's safe to use since it's listed in the pandas API docs explicitly, but I get a little nervous about digging into internals in other projects.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's needed for pandas 0.19.2 which is our minimum version. on 0.23 pandas does the cast automatically.

resampler = self._resample_cls(self, group=group, dim=dim_name,
grouper=grouper,
resample_dim=RESAMPLE_DIM)
Expand Down
28 changes: 28 additions & 0 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import functools
import warnings

import datetime
import numpy as np
import pandas as pd

Expand Down Expand Up @@ -154,6 +155,32 @@ def _unique_and_monotonic(group):
return index.is_unique and index.is_monotonic


def _apply_loffset(grouper, result):
"""
(copied from pandas)
if loffset is set, offset the result index

This is NOT an idempotent routine, it will be applied
exactly once to the result.

Parameters
----------
result : Series or DataFrame
the result of resample
"""

needs_offset = (
isinstance(grouper.loffset, (pd.DateOffset, datetime.timedelta))
and isinstance(result.index, pd.DatetimeIndex)
and len(result.index) > 0
)

if needs_offset:
result.index = result.index + grouper.loffset

grouper.loffset = None


class GroupBy(SupportsArithmetic):
"""A object that implements the split-apply-combine pattern.

Expand Down Expand Up @@ -235,6 +262,7 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None,
raise ValueError('index must be monotonic for resampling')
s = pd.Series(np.arange(index.size), index)
first_items = s.groupby(grouper).first()
_apply_loffset(grouper, first_items)
full_index = first_items.index
if first_items.isnull().any():
first_items = first_items.dropna()
Expand Down
5 changes: 5 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2273,6 +2273,11 @@ def test_resample(self):
actual = array.resample(time='24H').reduce(np.mean)
assert_identical(expected, actual)

actual = array.resample(time='24H', loffset='-12H').mean()
expected = DataArray(array.to_series().resample('24H', loffset='-12H')
.mean())
assert_identical(expected, actual)

with raises_regex(ValueError, 'index must be monotonic'):
array[[2, 0, 1]].resample(time='1D')

Expand Down
12 changes: 12 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2804,6 +2804,18 @@ def test_resample_by_mean_with_keep_attrs(self):
expected = ds.attrs
assert expected == actual

def test_resample_loffset(self):
times = pd.date_range('2000-01-01', freq='6H', periods=10)
ds = Dataset({'foo': (['time', 'x', 'y'], np.random.randn(10, 5, 3)),
'bar': ('time', np.random.randn(10), {'meta': 'data'}),
'time': times})
ds.attrs['dsmeta'] = 'dsdata'

actual = ds.resample(time='24H', loffset='-12H').mean('time').time
expected = xr.DataArray(ds.bar.to_series()
.resample('24H', loffset='-12H').mean()).time
assert_identical(expected, actual)

def test_resample_by_mean_discarding_attrs(self):
times = pd.date_range('2000-01-01', freq='6H', periods=10)
ds = Dataset({'foo': (['time', 'x', 'y'], np.random.randn(10, 5, 3)),
Expand Down