From 765f109b1810fe7da805ff7dfbd99928af896128 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Thu, 20 Jun 2019 18:15:32 +0300 Subject: [PATCH 1/2] Raise an error when doing rolling window operations with dask xref GH2940 --- doc/whats-new.rst | 4 ++++ xarray/core/dask_array_ops.py | 7 +++++++ xarray/tests/test_dataarray.py | 10 +++++++--- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e62c7e87d44..3fdb6b4ae82 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -63,6 +63,10 @@ Enhancements Bug fixes ~~~~~~~~~ +- Rolling operations on xarray objects containing dask arrays could silently + compute the incorrect result or use large amounts of memory (:issue:`2940`). + For now, these operations have been disabled with an explicit error. + By `Stephan Hoyer `_. - NetCDF4 output: variables with unlimited dimensions must be chunked (not contiguous) on output. (:issue:`1849`) By `James McCreight `_. diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py index 7e72c93da27..7e1b898f437 100644 --- a/xarray/core/dask_array_ops.py +++ b/xarray/core/dask_array_ops.py @@ -20,6 +20,13 @@ def dask_rolling_wrapper(moving_func, a, window, min_count=None, axis=-1): '''wrapper to apply bottleneck moving window funcs on dask arrays''' + + raise NotImplementedError( + 'rolling operations on xarray objects backed by dask arrays have not ' + 'been implemented yet (https://github.com/pydata/xarray/issues/2940). ' + 'For now, load your arrays into memory for rolling window operations ' + 'by calling .compute().') + dtype, fill_value = dtypes.maybe_promote(a.dtype) a = a.astype(dtype) # inputs for overlap diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index fd9076e7f65..c66840f129d 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3724,9 +3724,13 @@ def test_rolling_wrapped_dask_nochunk(center): da_day_clim = xr.DataArray(np.arange(1, 367), coords=[np.arange(1, 367)], dims='dayofyear') - expected = da_day_clim.rolling(dayofyear=31, center=center).mean() - actual = da_day_clim.chunk().rolling(dayofyear=31, center=center).mean() - assert_allclose(actual, expected) + expected = da_day_clim.rolling(dayofyear=31, center=center).mean() # noqa + with pytest.raises(NotImplementedError): + actual = da_day_clim.chunk().rolling( # noqa + dayofyear=31, center=center).mean() + # TODO: uncomment this assertion once we fix rolling window operations with + # dask (https://github.com/pydata/xarray/issues/2940) + # assert_allclose(actual, expected) @pytest.mark.parametrize('center', (True, False)) From 982004741262b929239cc8e01952364aa3cc5e1d Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 23 Jun 2019 14:31:29 +0300 Subject: [PATCH 2/2] Temporary test change to see appveyor failure --- xarray/tests/test_dataarray.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index c66840f129d..c8bfa65b7d5 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3725,6 +3725,9 @@ def test_rolling_wrapped_dask_nochunk(center): da_day_clim = xr.DataArray(np.arange(1, 367), coords=[np.arange(1, 367)], dims='dayofyear') expected = da_day_clim.rolling(dayofyear=31, center=center).mean() # noqa + actual = da_day_clim.chunk().rolling( # noqa + dayofyear=31, center=center).mean() + print(actual) with pytest.raises(NotImplementedError): actual = da_day_clim.chunk().rolling( # noqa dayofyear=31, center=center).mean()