diff --git a/doc/time-series.rst b/doc/time-series.rst index 1ced1ac30f6..b881045e28d 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -197,11 +197,20 @@ resampling group: ds.resample(time='6H').reduce(np.mean) -For upsampling, xarray provides four methods: ``asfreq``, ``ffill``, ``bfill``, -and ``interpolate``. ``interpolate`` extends ``scipy.interpolate.interp1d`` and -supports all of its schemes. All of these resampling operations work on both +For upsampling, xarray provides six methods: ``asfreq``, ``ffill``, ``bfill``, ``pad``, +``nearest`` and ``interpolate``. ``interpolate`` extends ``scipy.interpolate.interp1d`` +and supports all of its schemes. All of these resampling operations work on both Dataset and DataArray objects with an arbitrary number of dimensions. +In order to limit the scope of the methods ``ffill``, ``bfill``, ``pad`` and +``nearest`` the ``tolerance`` argument can be set in coordinate units. +Data that has indices outside of the given ``tolerance`` are set to ``NaN``. + +.. ipython:: python + + ds.resample(time='1H').nearest(tolerance='1H') + + For more examples of using grouped operations on a time dimension, see :ref:`toy weather data`. diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c408306ffdb..6c08d857172 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -43,6 +43,9 @@ Enhancements report showing what exactly differs between the two objects (dimensions / coordinates / variables / attributes) (:issue:`1507`). By `Benoit Bovy `_. +- Add ``tolerance`` option to ``resample()`` methods ``bfill``, ``pad``, + ``nearest``. (:issue:`2695`) + By `Hauke Schulz `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/core/common.py b/xarray/core/common.py index d272115f492..c49a9782016 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -713,6 +713,13 @@ def resample(self, indexer=None, skipna=None, closed=None, label=None, array([ 0. , 0.032258, 0.064516, ..., 10.935484, 10.967742, 11. ]) Coordinates: * time (time) datetime64[ns] 1999-12-15 1999-12-16 1999-12-17 ... + + Limit scope of upsampling method + >>> da.resample(time='1D').nearest(tolerance='1D') + + array([ 0., 0., nan, ..., nan, 11., 11.]) + Coordinates: + * time (time) datetime64[ns] 1999-12-15 1999-12-16 ... 2000-11-15 References ---------- diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 886303db345..c28a0033566 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -73,23 +73,53 @@ def asfreq(self): """ return self._upsample('asfreq') - def pad(self): + def pad(self, tolerance=None): """Forward fill new values at up-sampled frequency. + + Parameters + ---------- + tolerance : optional + Maximum distance between original and new labels to limit + the up-sampling method. + Up-sampled data with indices that satisfy the equation + ``abs(index[indexer] - target) <= tolerance`` are filled by + new values. Data with indices that are outside the given + tolerance are filled with ``NaN`` s """ - return self._upsample('pad') + return self._upsample('pad', tolerance=tolerance) ffill = pad - def backfill(self): + def backfill(self, tolerance=None): """Backward fill new values at up-sampled frequency. + + Parameters + ---------- + tolerance : optional + Maximum distance between original and new labels to limit + the up-sampling method. + Up-sampled data with indices that satisfy the equation + ``abs(index[indexer] - target) <= tolerance`` are filled by + new values. Data with indices that are outside the given + tolerance are filled with ``NaN`` s """ - return self._upsample('backfill') + return self._upsample('backfill', tolerance=tolerance) bfill = backfill - def nearest(self): + def nearest(self, tolerance=None): """Take new values from nearest original coordinate to up-sampled frequency coordinates. + + Parameters + ---------- + tolerance : optional + Maximum distance between original and new labels to limit + the up-sampling method. + Up-sampled data with indices that satisfy the equation + ``abs(index[indexer] - target) <= tolerance`` are filled by + new values. Data with indices that are outside the given + tolerance are filled with ``NaN`` s """ - return self._upsample('nearest') + return self._upsample('nearest', tolerance=tolerance) def interpolate(self, kind='linear'): """Interpolate up-sampled data using the original data diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 8995fca2f95..86a6a87cbb9 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2493,6 +2493,30 @@ def test_upsample_nd(self): ('x', 'y', 'time')) assert_identical(expected, actual) + def test_upsample_tolerance(self): + # Test tolerance keyword for upsample methods bfill, pad, nearest + times = pd.date_range('2000-01-01', freq='1D', periods=2) + times_upsampled = pd.date_range('2000-01-01', freq='6H', periods=5) + array = DataArray(np.arange(2), [('time', times)]) + + # Forward fill + actual = array.resample(time='6H').ffill(tolerance='12H') + expected = DataArray([0., 0., 0., np.nan, 1.], + [('time', times_upsampled)]) + assert_identical(expected, actual) + + # Backward fill + actual = array.resample(time='6H').bfill(tolerance='12H') + expected = DataArray([0., np.nan, 1., 1., 1.], + [('time', times_upsampled)]) + assert_identical(expected, actual) + + # Nearest + actual = array.resample(time='6H').nearest(tolerance='6H') + expected = DataArray([0, 0, np.nan, 1, 1], + [('time', times_upsampled)]) + assert_identical(expected, actual) + @requires_scipy def test_upsample_interpolate(self): from scipy.interpolate import interp1d