From 76d2c5c8cd47cc7673fec84f5c704f3df28bfc4f Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 22 Feb 2021 08:25:41 -0700 Subject: [PATCH 01/17] Add sliding_window_view functions --- xarray/core/dask_array_compat.py | 64 ++++++++++++++++ xarray/core/duck_array_ops.py | 11 +++ xarray/core/npcompat.py | 123 +++++++++++++++++++++++++++++++ 3 files changed, 198 insertions(+) diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index ce15e01fb12..c210ca27bf7 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -93,3 +93,67 @@ def nanmedian(a, axis=None, keepdims=False): ) return result + + +def sliding_window_view(x, window_shape, axis=None): + from dask.array.overlap import ensure_minimum_chunksize, map_overlap + from numpy.core.numeric import normalize_axis_tuple + + from .npcompat import sliding_window_view as _np_sliding_window_view + + window_shape = tuple(window_shape) if np.iterable(window_shape) else (window_shape,) + + window_shape_array = np.array(window_shape) + if np.any(window_shape_array < 0): + raise ValueError("`window_shape` cannot contain negative values") + + if axis is None: + axis = tuple(range(x.ndim)) + if len(window_shape) != len(axis): + raise ValueError( + f"Since axis is `None`, must provide " + f"window_shape for all dimensions of `x`; " + f"got {len(window_shape)} window_shape elements " + f"and `x.ndim` is {x.ndim}." + ) + else: + axis = normalize_axis_tuple(axis, x.ndim, allow_duplicate=True) + if len(window_shape) != len(axis): + raise ValueError( + f"Must provide matching length window_shape and " + f"axis; got {len(window_shape)} window_shape " + f"elements and {len(axis)} axes elements." + ) + + depths = [0] * x.ndim + for ax, window in zip(axis, window_shape): + depths[ax] += window - 1 + + # Ensure that each chunk is big enough to leave at least a size-1 chunk + # after windowing (this is only really necessary for the last chunk). + safe_chunks = tuple( + ensure_minimum_chunksize(d + 1, c) for d, c in zip(depths, x.chunks) + ) + x = x.rechunk(safe_chunks) + + # result.shape = x_shape_trimmed + window_shape, + # where x_shape_trimmed is x.shape with every entry + # reduced by one less than the corresponding window size. + # trim chunks to match x_shape_trimmed + newchunks = tuple(c[:-1] + (c[-1] - d,) for d, c in zip(depths, x.chunks)) + tuple( + (window,) for window in window_shape + ) + + return map_overlap( + _np_sliding_window_view, + x, + depth=tuple((0, d) for d in depths), # Overlap on +ve side only + boundary="none", + meta=x._meta, + new_axis=range(x.ndim, x.ndim + len(axis)), + chunks=newchunks, + trim=False, + align_arrays=False, + window_shape=window_shape, + axis=axis, + ) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 9c8c42d0491..a9ee618e0a7 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -614,6 +614,17 @@ def last(values, axis, skipna=None): return take(values, -1, axis=axis) +def sliding_window_view(array, window_shape, axis): + """ + Make an ndarray with a rolling window of axis-th dimension. + The rolling dimension will be placed at the last dimension. + """ + if is_duck_dask_array(array): + return dask_array_compat.sliding_window_view(array, window_shape, axis) + else: + return npcompat.sliding_window_view(array, window_shape, axis) + + def rolling_window(array, axis, window, center, fill_value): """ Make an ndarray with a rolling window of axis-th dimension. diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py index 25c103374b8..47d744d06d4 100644 --- a/xarray/core/npcompat.py +++ b/xarray/core/npcompat.py @@ -30,6 +30,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import builtins import operator +from distutils.version import LooseVersion from typing import Union import numpy as np @@ -96,3 +97,125 @@ def __array_function__(self, *args, **kwargs): IS_NEP18_ACTIVE = _is_nep18_active() + + +if LooseVersion(np.__version__) >= "1.20.0": + sliding_window_view = np.lib.stride_tricks.sliding_window_view +else: + from numpy.core.numeric import normalize_axis_tuple + from numpy.lib.stride_tricks import as_strided + + def sliding_window_view( + x, window_shape, axis=None, *, subok=False, writeable=False + ): + """ + Create a sliding window view into the array with the given window shape. + + Also known as rolling or moving window, the window slides across all + dimensions of the array and extracts subsets of the array at all window + positions. + + .. versionadded:: 1.20.0 + + Parameters + ---------- + x : array_like + Array to create the sliding window view from. + window_shape : int or tuple of int + Size of window over each axis that takes part in the sliding window. + If `axis` is not present, must have same length as the number of input + array dimensions. Single integers `i` are treated as if they were the + tuple `(i,)`. + axis : int or tuple of int, optional + Axis or axes along which the sliding window is applied. + By default, the sliding window is applied to all axes and + `window_shape[i]` will refer to axis `i` of `x`. + If `axis` is given as a `tuple of int`, `window_shape[i]` will refer to + the axis `axis[i]` of `x`. + Single integers `i` are treated as if they were the tuple `(i,)`. + subok : bool, optional + If True, sub-classes will be passed-through, otherwise the returned + array will be forced to be a base-class array (default). + writeable : bool, optional + When true, allow writing to the returned view. The default is false, + as this should be used with caution: the returned view contains the + same memory location multiple times, so writing to one location will + cause others to change. + + Returns + ------- + view : ndarray + Sliding window view of the array. The sliding window dimensions are + inserted at the end, and the original dimensions are trimmed as + required by the size of the sliding window. + That is, ``view.shape = x_shape_trimmed + window_shape``, where + ``x_shape_trimmed`` is ``x.shape`` with every entry reduced by one less + than the corresponding window size. + + See Also + -------- + lib.stride_tricks.as_strided: A lower-level and less safe routine for + creating arbitrary views from custom shape and strides. + broadcast_to: broadcast an array to a given shape. + + Notes + ----- + For many applications using a sliding window view can be convenient, but + potentially very slow. Often specialized solutions exist, for example: + + - `scipy.signal.fftconvolve` + + - filtering functions in `scipy.ndimage` + + - moving window functions provided by + `bottleneck `_. + + As a rough estimate, a sliding window approach with an input size of `N` + and a window size of `W` will scale as `O(N*W)` where frequently a special + algorithm can achieve `O(N)`. That means that the sliding window variant + for a window size of 100 can be a 100 times slower than a more specialized + version. + + Nevertheless, for small window sizes, when no custom algorithm exists, or + as a prototyping and developing tool, this function can be a good solution. + """ + window_shape = ( + tuple(window_shape) if np.iterable(window_shape) else (window_shape,) + ) + # first convert input to array, possibly keeping subclass + x = np.array(x, copy=False, subok=subok) + + window_shape_array = np.array(window_shape) + if np.any(window_shape_array < 0): + raise ValueError("`window_shape` cannot contain negative values") + + if axis is None: + axis = tuple(range(x.ndim)) + if len(window_shape) != len(axis): + raise ValueError( + f"Since axis is `None`, must provide " + f"window_shape for all dimensions of `x`; " + f"got {len(window_shape)} window_shape elements " + f"and `x.ndim` is {x.ndim}." + ) + else: + axis = normalize_axis_tuple(axis, x.ndim, allow_duplicate=True) + if len(window_shape) != len(axis): + raise ValueError( + f"Must provide matching length window_shape and " + f"axis; got {len(window_shape)} window_shape " + f"elements and {len(axis)} axes elements." + ) + + out_strides = x.strides + tuple(x.strides[ax] for ax in axis) + + # note: same axis can be windowed repeatedly + x_shape_trimmed = list(x.shape) + for ax, dim in zip(axis, window_shape): + if x_shape_trimmed[ax] < dim: + raise ValueError("window shape cannot be larger than input array shape") + x_shape_trimmed[ax] -= dim - 1 + out_shape = tuple(x_shape_trimmed) + window_shape + return as_strided( + x, strides=out_strides, shape=out_shape, subok=subok, writeable=writeable + ) From 979d366386e1ae2c3e15c7ccc7422c8ba229f131 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 22 Feb 2021 08:35:26 -0700 Subject: [PATCH 02/17] Variable.rolling_window uses Variable.pad & sliding_window_view --- xarray/core/variable.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 9b70f721689..3a05126bf66 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2078,10 +2078,10 @@ def rolling_window( """ if fill_value is dtypes.NA: # np.nan is passed dtype, fill_value = dtypes.maybe_promote(self.dtype) - array = self.astype(dtype, copy=False).data + var = self.astype(dtype, copy=False) else: dtype = self.dtype - array = self.data + var = self if isinstance(dim, list): assert len(dim) == len(window) @@ -2092,12 +2092,23 @@ def rolling_window( window = [window] window_dim = [window_dim] center = [center] + + pads = {} + for d, win, cent in zip(dim, window, center): + if cent: + start = int(win / 2) # 10 -> 5, 9 -> 4 + end = win - 1 - start + pads[d] = (start, end) + else: + pads[d] = (win - 1, 0) + + padded = var.pad(pads, mode="constant", constant_values=fill_value) axis = [self.get_axis_num(d) for d in dim] new_dims = self.dims + tuple(window_dim) return Variable( new_dims, - duck_array_ops.rolling_window( - array, axis=axis, window=window, center=center, fill_value=fill_value + duck_array_ops.sliding_window_view( + padded.data, window_shape=window, axis=axis ), ) From d654942878650e03b6aabce6dc751f81c89b3cd7 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 22 Feb 2021 08:48:59 -0700 Subject: [PATCH 03/17] Delete old code --- xarray/core/dask_array_ops.py | 113 +--------------------------------- xarray/core/duck_array_ops.py | 11 ---- xarray/core/nputils.py | 75 ---------------------- xarray/core/rolling.py | 12 ++-- 4 files changed, 5 insertions(+), 206 deletions(-) diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py index 15641506e4e..e323191234c 100644 --- a/xarray/core/dask_array_ops.py +++ b/xarray/core/dask_array_ops.py @@ -1,115 +1,4 @@ -import numpy as np - -from . import dtypes, nputils - - -def dask_rolling_wrapper(moving_func, a, window, min_count=None, axis=-1): - """Wrapper to apply bottleneck moving window funcs on dask arrays""" - import dask.array as da - - dtype, fill_value = dtypes.maybe_promote(a.dtype) - a = a.astype(dtype) - # inputs for overlap - if axis < 0: - axis = a.ndim + axis - depth = {d: 0 for d in range(a.ndim)} - depth[axis] = (window + 1) // 2 - boundary = {d: fill_value for d in range(a.ndim)} - # Create overlap array. - ag = da.overlap.overlap(a, depth=depth, boundary=boundary) - # apply rolling func - out = da.map_blocks( - moving_func, ag, window, min_count=min_count, axis=axis, dtype=a.dtype - ) - # trim array - result = da.overlap.trim_internal(out, depth) - return result - - -def rolling_window(a, axis, window, center, fill_value): - """Dask's equivalence to np.utils.rolling_window""" - import dask.array as da - - if not hasattr(axis, "__len__"): - axis = [axis] - window = [window] - center = [center] - - orig_shape = a.shape - depth = {d: 0 for d in range(a.ndim)} - offset = [0] * a.ndim - drop_size = [0] * a.ndim - pad_size = [0] * a.ndim - for ax, win, cent in zip(axis, window, center): - if ax < 0: - ax = a.ndim + ax - depth[ax] = int(win / 2) - # For evenly sized window, we need to crop the first point of each block. - offset[ax] = 1 if win % 2 == 0 else 0 - - if depth[ax] > min(a.chunks[ax]): - raise ValueError( - "For window size %d, every chunk should be larger than %d, " - "but the smallest chunk size is %d. Rechunk your array\n" - "with a larger chunk size or a chunk size that\n" - "more evenly divides the shape of your array." - % (win, depth[ax], min(a.chunks[ax])) - ) - - # Although da.overlap pads values to boundaries of the array, - # the size of the generated array is smaller than what we want - # if center == False. - if cent: - start = int(win / 2) # 10 -> 5, 9 -> 4 - end = win - 1 - start - else: - start, end = win - 1, 0 - pad_size[ax] = max(start, end) + offset[ax] - depth[ax] - drop_size[ax] = 0 - # pad_size becomes more than 0 when the overlapped array is smaller than - # needed. In this case, we need to enlarge the original array by padding - # before overlapping. - if pad_size[ax] > 0: - if pad_size[ax] < depth[ax]: - # overlapping requires each chunk larger than depth. If pad_size is - # smaller than the depth, we enlarge this and truncate it later. - drop_size[ax] = depth[ax] - pad_size[ax] - pad_size[ax] = depth[ax] - - # TODO maybe following two lines can be summarized. - a = da.pad( - a, [(p, 0) for p in pad_size], mode="constant", constant_values=fill_value - ) - boundary = {d: fill_value for d in range(a.ndim)} - - # create overlap arrays - ag = da.overlap.overlap(a, depth=depth, boundary=boundary) - - def func(x, window, axis): - x = np.asarray(x) - index = [slice(None)] * x.ndim - for ax, win in zip(axis, window): - x = nputils._rolling_window(x, win, ax) - index[ax] = slice(offset[ax], None) - return x[tuple(index)] - - chunks = list(a.chunks) + window - new_axis = [a.ndim + i for i in range(len(axis))] - out = da.map_blocks( - func, - ag, - dtype=a.dtype, - new_axis=new_axis, - chunks=chunks, - window=window, - axis=axis, - ) - - # crop boundary. - index = [slice(None)] * a.ndim - for ax in axis: - index[ax] = slice(drop_size[ax], drop_size[ax] + orig_shape[ax]) - return out[tuple(index)] +from . import nputils def least_squares(lhs, rhs, rcond=None, skipna=False): diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index a9ee618e0a7..bd75f989cc1 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -625,17 +625,6 @@ def sliding_window_view(array, window_shape, axis): return npcompat.sliding_window_view(array, window_shape, axis) -def rolling_window(array, axis, window, center, fill_value): - """ - Make an ndarray with a rolling window of axis-th dimension. - The rolling dimension will be placed at the last dimension. - """ - if is_duck_dask_array(array): - return dask_array_ops.rolling_window(array, axis, window, center, fill_value) - else: # np.ndarray - return nputils.rolling_window(array, axis, window, center, fill_value) - - def least_squares(lhs, rhs, rcond=None, skipna=False): """Return the coefficients and residuals of a least-squares fit.""" if is_duck_dask_array(rhs): diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 926f7691ed7..3ca5dd700c9 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -131,81 +131,6 @@ def __setitem__(self, key, value): self._array[key] = np.moveaxis(value, vindex_positions, mixed_positions) -def rolling_window(a, axis, window, center, fill_value): - """ rolling window with padding. """ - pads = [(0, 0) for s in a.shape] - if not hasattr(axis, "__len__"): - axis = [axis] - window = [window] - center = [center] - - for ax, win, cent in zip(axis, window, center): - if cent: - start = int(win / 2) # 10 -> 5, 9 -> 4 - end = win - 1 - start - pads[ax] = (start, end) - else: - pads[ax] = (win - 1, 0) - a = np.pad(a, pads, mode="constant", constant_values=fill_value) - for ax, win in zip(axis, window): - a = _rolling_window(a, win, ax) - return a - - -def _rolling_window(a, window, axis=-1): - """ - Make an ndarray with a rolling window along axis. - - Parameters - ---------- - a : array_like - Array to add rolling window to - axis : int - axis position along which rolling window will be applied. - window : int - Size of rolling window - - Returns - ------- - Array that is a view of the original array with a added dimension - of size w. - - Examples - -------- - >>> x = np.arange(10).reshape((2, 5)) - >>> _rolling_window(x, 3, axis=-1) - array([[[0, 1, 2], - [1, 2, 3], - [2, 3, 4]], - - [[5, 6, 7], - [6, 7, 8], - [7, 8, 9]]]) - - Calculate rolling mean of last dimension: - >>> np.mean(_rolling_window(x, 3, axis=-1), -1) - array([[1., 2., 3.], - [6., 7., 8.]]) - - This function is taken from https://github.com/numpy/numpy/pull/31 - but slightly modified to accept axis option. - """ - axis = normalize_axis_index(axis, a.ndim) - a = np.swapaxes(a, axis, -1) - - if window < 1: - raise ValueError(f"`window` must be at least 1. Given : {window}") - if window > a.shape[-1]: - raise ValueError(f"`window` is too long. Given : {window}") - - shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) - strides = a.strides + (a.strides[-1],) - rolling = np.lib.stride_tricks.as_strided( - a, shape=shape, strides=strides, writeable=False - ) - return np.swapaxes(rolling, -2, axis) - - def _create_bottleneck_method(name, npmodule=np): def f(values, axis=None, **kwargs): dtype = kwargs.get("dtype", None) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index dbdd9595069..7e3605b1bf2 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -5,7 +5,6 @@ import numpy as np from . import dtypes, duck_array_ops, utils -from .dask_array_ops import dask_rolling_wrapper from .ops import inject_reduce_methods from .options import _get_keep_attrs from .pycompat import is_duck_dask_array @@ -502,13 +501,10 @@ def _bottleneck_reduce(self, func, keep_attrs, **kwargs): if is_duck_dask_array(padded.data): raise AssertionError("should not be reachable") - values = dask_rolling_wrapper( - func, padded.data, window=self.window[0], min_count=min_count, axis=axis - ) - else: - values = func( - padded.data, window=self.window[0], min_count=min_count, axis=axis - ) + + values = func( + padded.data, window=self.window[0], min_count=min_count, axis=axis + ) if self.center[0]: values = values[valid] From a292c2ace4fcdf082e385a6f8ca7a068f6868996 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 25 Feb 2021 15:46:35 -0700 Subject: [PATCH 04/17] Update tests --- xarray/tests/test_duck_array_ops.py | 26 -------------- xarray/tests/test_variable.py | 54 +++++++++++++++++++++-------- 2 files changed, 39 insertions(+), 41 deletions(-) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 90e742dee62..c50b610f07c 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -531,32 +531,6 @@ def test_isnull_with_dask(): assert_equal(da.isnull().load(), da.load().isnull()) -@pytest.mark.skipif(not has_dask, reason="This is for dask.") -@pytest.mark.parametrize("axis", [0, -1]) -@pytest.mark.parametrize("window", [3, 8, 11]) -@pytest.mark.parametrize("center", [True, False]) -def test_dask_rolling(axis, window, center): - import dask.array as da - - x = np.array(np.random.randn(100, 40), dtype=float) - dx = da.from_array(x, chunks=[(6, 30, 30, 20, 14), 8]) - - expected = rolling_window( - x, axis=axis, window=window, center=center, fill_value=np.nan - ) - actual = rolling_window( - dx, axis=axis, window=window, center=center, fill_value=np.nan - ) - assert isinstance(actual, da.Array) - assert_array_equal(actual, expected) - assert actual.shape == expected.shape - - # we need to take care of window size if chunk size is small - # window/2 should be smaller than the smallest chunk size. - with pytest.raises(ValueError): - rolling_window(dx, axis=axis, window=100, center=center, fill_value=np.nan) - - @pytest.mark.skipif(not has_dask, reason="This is for dask.") @pytest.mark.parametrize("axis", [0, -1, 1]) @pytest.mark.parametrize("edge_order", [1, 2]) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 7cb62b4d85f..f5e5da31ec3 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -32,6 +32,7 @@ assert_array_equal, assert_equal, assert_identical, + raise_if_dask_computes, raises_regex, requires_dask, requires_sparse, @@ -873,26 +874,26 @@ def test_pad_constant_values(self, xr_arg, np_arg): ) assert_array_equal(actual, expected) - def test_rolling_window(self): + @pytest.mark.parametrize("d, w", (("x", 3), ("y", 5))) + def test_rolling_window(self, d, w): # Just a working test. See test_nputils for the algorithm validation v = self.cls(["x", "y", "z"], np.arange(40 * 30 * 2).reshape(40, 30, 2)) - for (d, w) in [("x", 3), ("y", 5)]: - v_rolling = v.rolling_window(d, w, d + "_window") - assert v_rolling.dims == ("x", "y", "z", d + "_window") - assert v_rolling.shape == v.shape + (w,) + v_rolling = v.rolling_window(d, w, d + "_window") + assert v_rolling.dims == ("x", "y", "z", d + "_window") + assert v_rolling.shape == v.shape + (w,) - v_rolling = v.rolling_window(d, w, d + "_window", center=True) - assert v_rolling.dims == ("x", "y", "z", d + "_window") - assert v_rolling.shape == v.shape + (w,) + v_rolling = v.rolling_window(d, w, d + "_window", center=True) + assert v_rolling.dims == ("x", "y", "z", d + "_window") + assert v_rolling.shape == v.shape + (w,) - # dask and numpy result should be the same - v_loaded = v.load().rolling_window(d, w, d + "_window", center=True) - assert_array_equal(v_rolling, v_loaded) + # dask and numpy result should be the same + v_loaded = v.load().rolling_window(d, w, d + "_window", center=True) + assert_array_equal(v_rolling, v_loaded) - # numpy backend should not be over-written - if isinstance(v._data, np.ndarray): - with pytest.raises(ValueError): - v_loaded[0] = 1.0 + # numpy backend should not be over-written + if isinstance(v._data, np.ndarray): + with pytest.raises(ValueError): + v_loaded[0] = 1.0 class TestVariable(VariableSubclassobjects): @@ -2010,6 +2011,29 @@ def test_getitem_with_mask_nd_indexer(self): self.cls(("x", "y"), [[0, -1], [-1, 2]]), ) + @pytest.mark.parametrize("dim", ["x", "y"]) + @pytest.mark.parametrize("window", [3, 8, 11]) + @pytest.mark.parametrize("center", [True, False]) + def test_dask_rolling(self, dim, window, center): + import dask + import dask.array as da + + dask.config.set(scheduler="single-threaded") + + x = Variable(("x", "y"), np.array(np.random.randn(100, 40), dtype=float)) + dx = Variable(("x", "y"), da.from_array(x, chunks=[(6, 30, 30, 20, 14), 8])) + + expected = x.rolling_window( + dim, window, "window", center=center, fill_value=np.nan + ) + with raise_if_dask_computes(): + actual = dx.rolling_window( + dim, window, "window", center=center, fill_value=np.nan + ) + assert isinstance(actual.data, da.Array) + assert actual.shape == expected.shape + assert_equal(actual, expected) + @requires_sparse class TestVariableWithSparse: From b60617b65836589d8d2e740a5461bc75a38a8eb1 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 1 Mar 2021 08:38:03 -0700 Subject: [PATCH 05/17] update duckarrays doc --- doc/duckarrays.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/duckarrays.rst b/doc/duckarrays.rst index ba13d5160ae..5d54bf64326 100644 --- a/doc/duckarrays.rst +++ b/doc/duckarrays.rst @@ -44,8 +44,6 @@ the code will still cast to ``numpy`` arrays: addition to not supporting duck arrays in dimension coordinates * :py:meth:`Dataset.rolling_exp` and :py:meth:`DataArray.rolling_exp` (uses ``numbagg``) - * :py:meth:`Dataset.rolling` and :py:meth:`DataArray.rolling` (uses internal functions - of ``numpy``) * :py:meth:`Dataset.interpolate_na` and :py:meth:`DataArray.interpolate_na` (uses :py:class:`numpy.vectorize`) * :py:func:`apply_ufunc` with ``vectorize=True`` (uses :py:class:`numpy.vectorize`) From 5a9f9ef3cae9dc465aba665230b58fdc36df3ddf Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 1 Mar 2021 08:59:13 -0700 Subject: [PATCH 06/17] convert more nputils tests to Variable tests --- xarray/core/variable.py | 14 ++++--- xarray/tests/test_dataarray.py | 9 +++++ xarray/tests/test_duck_array_ops.py | 1 - xarray/tests/test_nputils.py | 37 +---------------- xarray/tests/test_variable.py | 61 +++++++++++++++++++++++++++++ 5 files changed, 79 insertions(+), 43 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 3a05126bf66..684319b390c 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2035,7 +2035,7 @@ def rolling_window( For nd-rolling, should be list of integers. window_dim : str New name of the window dimension. - For nd-rolling, should be list of integers. + For nd-rolling, should be list of strings. center : bool, default: False If True, pad fill_value for both ends. Otherwise, pad in the head of the axis. @@ -2083,15 +2083,17 @@ def rolling_window( dtype = self.dtype var = self - if isinstance(dim, list): - assert len(dim) == len(window) - assert len(dim) == len(window_dim) - assert len(dim) == len(center) - else: + if utils.is_scalar(dim): + for arg in [window, window_dim, center]: + assert utils.is_scalar(arg) dim = [dim] window = [window] window_dim = [window_dim] center = [center] + else: + assert len(dim) == len(window) + assert len(dim) == len(window_dim) + assert len(dim) == len(center) pads = {} for d, win, cent in zip(dim, window, center): diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b28a53023ed..089785f5a12 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -6435,6 +6435,15 @@ def test_rolling_repr(da): assert repr(rolling_obj) == "DataArrayRolling [time->7(center),x->3(center)]" +@requires_dask +def test_repeated_rolling_rechunks(): + + # regression test for GH3277, GH2514 + dat = DataArray(np.random.rand(7653, 300), dims=("day", "item")) + dat_chunk = dat.chunk({"item": 20}) + dat_chunk.rolling(day=10).mean().rolling(day=250).std() + + def test_rolling_doc(da): rolling_obj = da.rolling(time=7) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index c50b610f07c..e030b9d2e42 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -21,7 +21,6 @@ np_timedelta64_to_float, pd_timedelta_to_float, py_timedelta_to_float, - rolling_window, stack, timedelta_to_numeric, where, diff --git a/xarray/tests/test_nputils.py b/xarray/tests/test_nputils.py index ccb825dc7e9..25b77a0da49 100644 --- a/xarray/tests/test_nputils.py +++ b/xarray/tests/test_nputils.py @@ -2,7 +2,7 @@ import pytest from numpy.testing import assert_array_equal -from xarray.core.nputils import NumpyVIndexAdapter, _is_contiguous, rolling_window +from xarray.core.nputils import NumpyVIndexAdapter, _is_contiguous def test_is_contiguous(): @@ -29,38 +29,3 @@ def test_vindex(): vindex[[0, 1], [0, 1], :] = vindex[[0, 1], [0, 1], :] vindex[[0, 1], :, [0, 1]] = vindex[[0, 1], :, [0, 1]] vindex[:, [0, 1], [0, 1]] = vindex[:, [0, 1], [0, 1]] - - -def test_rolling(): - x = np.array([1, 2, 3, 4], dtype=float) - - actual = rolling_window(x, axis=-1, window=3, center=True, fill_value=np.nan) - expected = np.array( - [[np.nan, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, np.nan]], dtype=float - ) - assert_array_equal(actual, expected) - - actual = rolling_window(x, axis=-1, window=3, center=False, fill_value=0.0) - expected = np.array([[0, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, 4]], dtype=float) - assert_array_equal(actual, expected) - - x = np.stack([x, x * 1.1]) - actual = rolling_window(x, axis=-1, window=3, center=False, fill_value=0.0) - expected = np.stack([expected, expected * 1.1], axis=0) - assert_array_equal(actual, expected) - - -@pytest.mark.parametrize("center", [[True, True], [False, False]]) -@pytest.mark.parametrize("axis", [(0, 1), (1, 2), (2, 0)]) -def test_nd_rolling(center, axis): - x = np.arange(7 * 6 * 8).reshape(7, 6, 8).astype(float) - window = [3, 3] - actual = rolling_window( - x, axis=axis, window=window, center=center, fill_value=np.nan - ) - expected = x - for ax, win, cent in zip(axis, window, center): - expected = rolling_window( - expected, axis=ax, window=win, center=cent, fill_value=np.nan - ) - assert_array_equal(actual, expected) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index f5e5da31ec3..fa07714bba6 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -895,6 +895,59 @@ def test_rolling_window(self, d, w): with pytest.raises(ValueError): v_loaded[0] = 1.0 + def test_rolling_1d(self): + x = self.cls("x", np.array([1, 2, 3, 4], dtype=float)) + + kwargs = dict(dim="x", window=3, window_dim="xw") + actual = x.rolling_window(**kwargs, center=True, fill_value=np.nan) + expected = Variable( + ("x", "xw"), + np.array( + [[np.nan, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, np.nan]], dtype=float + ), + ) + assert_equal(actual, expected) + + actual = x.rolling_window(**kwargs, center=False, fill_value=0.0) + expected = self.cls( + ("x", "xw"), + np.array([[0, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, 4]], dtype=float), + ) + assert_equal(actual, expected) + + x = self.cls(("y", "x"), np.stack([x, x * 1.1])) + actual = x.rolling_window(**kwargs, center=False, fill_value=0.0) + expected = self.cls( + ("y", "x", "xw"), np.stack([expected.data, expected.data * 1.1], axis=0) + ) + assert_equal(actual, expected) + + @pytest.mark.parametrize("center", [[True, True], [False, False]]) + @pytest.mark.parametrize("dims", [("x", "y"), ("y", "z"), ("z", "x")]) + def test_nd_rolling(self, center, dims): + x = self.cls( + ("x", "y", "z"), + np.arange(7 * 6 * 8).reshape(7, 6, 8).astype(float), + ) + window = [3, 3] + actual = x.rolling_window( + dim=dims, + window=window, + window_dim=[f"{k}w" for k in dims], + center=center, + fill_value=np.nan, + ) + expected = x + for dim, win, cent in zip(dims, window, center): + expected = expected.rolling_window( + dim=dim, + window=win, + window_dim=f"{dim}w", + center=cent, + fill_value=np.nan, + ) + assert_equal(actual, expected) + class TestVariable(VariableSubclassobjects): cls = staticmethod(Variable) @@ -2189,6 +2242,14 @@ def test_pad_constant_values(self, xr_arg, np_arg): def test_rolling_window(self): super().test_rolling_window() + @pytest.mark.xfail + def test_rolling_1d(self): + super().test_rolling_1d() + + @pytest.mark.xfail + def test_nd_rolling(self): + super().test_nd_rolling() + @pytest.mark.xfail def test_coarsen_2d(self): super().test_coarsen_2d() From 06fb8d7cead7aded429a42d8e45d33af8dbb394f Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 1 Mar 2021 10:43:59 -0700 Subject: [PATCH 07/17] Undo deletion of bottleneck + dask path --- xarray/core/dask_array_ops.py | 25 ++++++++++++++++++++++++- xarray/core/rolling.py | 12 ++++++++---- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py index e323191234c..25f082ec3c5 100644 --- a/xarray/core/dask_array_ops.py +++ b/xarray/core/dask_array_ops.py @@ -1,4 +1,27 @@ -from . import nputils +from . import dtypes, nputils + + +def dask_rolling_wrapper(moving_func, a, window, min_count=None, axis=-1): + """Wrapper to apply bottleneck moving window funcs on dask arrays""" + import dask.array as da + + dtype, fill_value = dtypes.maybe_promote(a.dtype) + a = a.astype(dtype) + # inputs for overlap + if axis < 0: + axis = a.ndim + axis + depth = {d: 0 for d in range(a.ndim)} + depth[axis] = (window + 1) // 2 + boundary = {d: fill_value for d in range(a.ndim)} + # Create overlap array. + ag = da.overlap.overlap(a, depth=depth, boundary=boundary) + # apply rolling func + out = da.map_blocks( + moving_func, ag, window, min_count=min_count, axis=axis, dtype=a.dtype + ) + # trim array + result = da.overlap.trim_internal(out, depth) + return result def least_squares(lhs, rhs, rcond=None, skipna=False): diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 7e3605b1bf2..dbdd9595069 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -5,6 +5,7 @@ import numpy as np from . import dtypes, duck_array_ops, utils +from .dask_array_ops import dask_rolling_wrapper from .ops import inject_reduce_methods from .options import _get_keep_attrs from .pycompat import is_duck_dask_array @@ -501,10 +502,13 @@ def _bottleneck_reduce(self, func, keep_attrs, **kwargs): if is_duck_dask_array(padded.data): raise AssertionError("should not be reachable") - - values = func( - padded.data, window=self.window[0], min_count=min_count, axis=axis - ) + values = dask_rolling_wrapper( + func, padded.data, window=self.window[0], min_count=min_count, axis=axis + ) + else: + values = func( + padded.data, window=self.window[0], min_count=min_count, axis=axis + ) if self.center[0]: values = values[valid] From bfaf076cb302df21ce0c7e5480e2327df7b0848b Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 1 Mar 2021 10:48:40 -0700 Subject: [PATCH 08/17] lint --- xarray/tests/test_nputils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_nputils.py b/xarray/tests/test_nputils.py index 25b77a0da49..3c9c92ae2ba 100644 --- a/xarray/tests/test_nputils.py +++ b/xarray/tests/test_nputils.py @@ -1,5 +1,4 @@ import numpy as np -import pytest from numpy.testing import assert_array_equal from xarray.core.nputils import NumpyVIndexAdapter, _is_contiguous From 9f873a04b42dc411265cd406a97b456c95a60b51 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 1 Mar 2021 11:06:51 -0700 Subject: [PATCH 09/17] vendor ensure minimum chunksize --- xarray/core/dask_array_compat.py | 58 +++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index c210ca27bf7..0a867851a3a 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -95,8 +95,64 @@ def nanmedian(a, axis=None, keepdims=False): return result +if LooseVersion(dask_version) > LooseVersion("2.30.0"): + ensure_minimum_chunksize = da.overlap.ensure_minimum_chunksize +else: + + def ensure_minimum_chunksize(size, chunks): + """Determine new chunks to ensure that every chunk >= size + + Parameters + ---------- + size: int + The maximum size of any chunk. + chunks: tuple + Chunks along one axis, e.g. ``(3, 3, 2)`` + + Examples + -------- + >>> ensure_minimum_chunksize(10, (20, 20, 1)) + (20, 11, 10) + >>> ensure_minimum_chunksize(3, (1, 1, 3)) + (5,) + + See Also + -------- + overlap + """ + if size <= min(chunks): + return chunks + + # add too-small chunks to chunks before them + output = [] + new = 0 + for c in chunks: + if c < size: + if new > size + (size - c): + output.append(new - (size - c)) + new = size + else: + new += c + if new >= size: + output.append(new) + new = 0 + if c >= size: + new += c + if new >= size: + output.append(new) + elif len(output) >= 1: + output[-1] += new + else: + raise ValueError( + f"The overlapping depth {size} is larger than your " + f"array {sum(chunks)}." + ) + + return tuple(output) + + def sliding_window_view(x, window_shape, axis=None): - from dask.array.overlap import ensure_minimum_chunksize, map_overlap + from dask.array.overlap import map_overlap from numpy.core.numeric import normalize_axis_tuple from .npcompat import sliding_window_view as _np_sliding_window_view From f25d16abf81f54ff674a1b24a61426ccafa88c96 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 1 Mar 2021 13:48:09 -0700 Subject: [PATCH 10/17] support older map_overlap signature --- xarray/core/dask_array_compat.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index 0a867851a3a..de334d1e539 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -200,9 +200,7 @@ def sliding_window_view(x, window_shape, axis=None): (window,) for window in window_shape ) - return map_overlap( - _np_sliding_window_view, - x, + kwargs = dict( depth=tuple((0, d) for d in depths), # Overlap on +ve side only boundary="none", meta=x._meta, @@ -213,3 +211,8 @@ def sliding_window_view(x, window_shape, axis=None): window_shape=window_shape, axis=axis, ) + # map_overlap's signature changed in https://github.com/dask/dask/pull/6165 + if LooseVersion(dask_version) > "2.18.0": + return map_overlap(_np_sliding_window_view, x, **kwargs) + else: + return map_overlap(x, _np_sliding_window_view, **kwargs) From bd9bcef262590dbfafc541be72b3161ec2f72658 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 1 Mar 2021 14:07:17 -0700 Subject: [PATCH 11/17] more backcompat --- xarray/core/dask_array_compat.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index de334d1e539..91177f8446a 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -207,12 +207,11 @@ def sliding_window_view(x, window_shape, axis=None): new_axis=range(x.ndim, x.ndim + len(axis)), chunks=newchunks, trim=False, - align_arrays=False, window_shape=window_shape, axis=axis, ) # map_overlap's signature changed in https://github.com/dask/dask/pull/6165 if LooseVersion(dask_version) > "2.18.0": - return map_overlap(_np_sliding_window_view, x, **kwargs) + return map_overlap(_np_sliding_window_view, x, align_arrays=False, **kwargs) else: return map_overlap(x, _np_sliding_window_view, **kwargs) From 5f86df0858012c290cb5bb51b41c585eae717fdb Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 5 Mar 2021 07:07:23 -0700 Subject: [PATCH 12/17] review feedback --- doc/duckarrays.rst | 1 + xarray/core/npcompat.py | 27 --------------------------- xarray/core/variable.py | 11 +++++++---- 3 files changed, 8 insertions(+), 31 deletions(-) diff --git a/doc/duckarrays.rst b/doc/duckarrays.rst index 5d54bf64326..97da968b84a 100644 --- a/doc/duckarrays.rst +++ b/doc/duckarrays.rst @@ -42,6 +42,7 @@ the code will still cast to ``numpy`` arrays: :py:meth:`DataArray.interp` and :py:meth:`DataArray.interp_like` (uses ``scipy``): duck arrays in data variables and non-dimension coordinates will be casted in addition to not supporting duck arrays in dimension coordinates + * :py:meth:`Dataset.rolling` and :py:meth:`DataArray.rolling` (requires ``numpy>=1.20``) * :py:meth:`Dataset.rolling_exp` and :py:meth:`DataArray.rolling_exp` (uses ``numbagg``) * :py:meth:`Dataset.interpolate_na` and :py:meth:`DataArray.interpolate_na` (uses diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py index 47d744d06d4..1e51b89fc1b 100644 --- a/xarray/core/npcompat.py +++ b/xarray/core/npcompat.py @@ -151,33 +151,6 @@ def sliding_window_view( That is, ``view.shape = x_shape_trimmed + window_shape``, where ``x_shape_trimmed`` is ``x.shape`` with every entry reduced by one less than the corresponding window size. - - See Also - -------- - lib.stride_tricks.as_strided: A lower-level and less safe routine for - creating arbitrary views from custom shape and strides. - broadcast_to: broadcast an array to a given shape. - - Notes - ----- - For many applications using a sliding window view can be convenient, but - potentially very slow. Often specialized solutions exist, for example: - - - `scipy.signal.fftconvolve` - - - filtering functions in `scipy.ndimage` - - - moving window functions provided by - `bottleneck `_. - - As a rough estimate, a sliding window approach with an input size of `N` - and a window size of `W` will scale as `O(N*W)` where frequently a special - algorithm can achieve `O(N)`. That means that the sliding window variant - for a window size of 100 can be a 100 times slower than a more specialized - version. - - Nevertheless, for small window sizes, when no custom algorithm exists, or - as a prototyping and developing tool, this function can be a good solution. """ window_shape = ( tuple(window_shape) if np.iterable(window_shape) else (window_shape,) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 684319b390c..d6ef206e7b5 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2091,14 +2091,17 @@ def rolling_window( window_dim = [window_dim] center = [center] else: - assert len(dim) == len(window) - assert len(dim) == len(window_dim) - assert len(dim) == len(center) + if len(dim) != len(window): + raise ValueError( + "'dim', 'window', 'window_dim', and 'center' must be the same length. " + f"Received dim={dim!r}, window={window!r}, window_dim={window_dim!r}," + f" and center={center!r}." + ) pads = {} for d, win, cent in zip(dim, window, center): if cent: - start = int(win / 2) # 10 -> 5, 9 -> 4 + start = win // 2 # 10 -> 5, 9 -> 4 end = win - 1 - start pads[d] = (start, end) else: From 61c5e43a750eef4afb4431c87a043e87e9972fba Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 5 Mar 2021 07:24:21 -0700 Subject: [PATCH 13/17] Add some extra tests --- xarray/core/variable.py | 38 ++++++++++++++++++++--------- xarray/tests/test_variable.py | 46 ++++++++++++++++++++++++++--------- 2 files changed, 61 insertions(+), 23 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index d6ef206e7b5..52bcb2c161c 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2084,19 +2084,33 @@ def rolling_window( var = self if utils.is_scalar(dim): - for arg in [window, window_dim, center]: - assert utils.is_scalar(arg) + for name, arg in zip( + ["window", "window_dim", "center"], [window, window_dim, center] + ): + if not utils.is_scalar(arg): + raise ValueError( + f"Expected {name}={arg!r} to be a scalar like 'dim'." + ) dim = [dim] - window = [window] - window_dim = [window_dim] - center = [center] - else: - if len(dim) != len(window): - raise ValueError( - "'dim', 'window', 'window_dim', and 'center' must be the same length. " - f"Received dim={dim!r}, window={window!r}, window_dim={window_dim!r}," - f" and center={center!r}." - ) + + # dim is now a list + nroll = len(dim) + if utils.is_scalar(window): + window = [window] * nroll + if utils.is_scalar(window_dim): + window_dim = [window_dim] * nroll + if utils.is_scalar(center): + center = [center] * nroll + if ( + len(dim) != len(window) + or len(dim) != len(window_dim) + or len(dim) != len(center) + ): + raise ValueError( + "'dim', 'window', 'window_dim', and 'center' must be the same length. " + f"Received dim={dim!r}, window={window!r}, window_dim={window_dim!r}," + f" and center={center!r}." + ) pads = {} for d, win, cent in zip(dim, window, center): diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index fa07714bba6..1aaaff9b2d4 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -948,6 +948,27 @@ def test_nd_rolling(self, center, dims): ) assert_equal(actual, expected) + @pytest.mark.parametrize( + ("dim, window, window_dim, center"), + [ + ("x", [3, 3], "x_w", True), + ("x", 3, ("x_w", "x_w"), True), + ("x", 3, "x_w", [True, True]), + ], + ) + def test_rolling_window_errors(self, dim, window, window_dim, center): + x = self.cls( + ("x", "y", "z"), + np.arange(7 * 6 * 8).reshape(7, 6, 8).astype(float), + ) + with pytest.raises(ValueError): + x.rolling_window( + dim=dim, + window=window, + window_dim=window_dim, + center=center, + ) + class TestVariable(VariableSubclassobjects): cls = staticmethod(Variable) @@ -2198,23 +2219,23 @@ def test_datetime64(self): # These tests make use of multi-dimensional variables, which are not valid # IndexVariable objects: - @pytest.mark.xfail + @pytest.mark.skip def test_getitem_error(self): super().test_getitem_error() - @pytest.mark.xfail + @pytest.mark.skip def test_getitem_advanced(self): super().test_getitem_advanced() - @pytest.mark.xfail + @pytest.mark.skip def test_getitem_fancy(self): super().test_getitem_fancy() - @pytest.mark.xfail + @pytest.mark.skip def test_getitem_uint(self): super().test_getitem_fancy() - @pytest.mark.xfail + @pytest.mark.skip @pytest.mark.parametrize( "mode", [ @@ -2233,24 +2254,27 @@ def test_getitem_uint(self): def test_pad(self, mode, xr_arg, np_arg): super().test_pad(mode, xr_arg, np_arg) - @pytest.mark.xfail - @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) + @pytest.mark.skip def test_pad_constant_values(self, xr_arg, np_arg): super().test_pad_constant_values(xr_arg, np_arg) - @pytest.mark.xfail + @pytest.mark.skip def test_rolling_window(self): super().test_rolling_window() - @pytest.mark.xfail + @pytest.mark.skip def test_rolling_1d(self): super().test_rolling_1d() - @pytest.mark.xfail + @pytest.mark.skip def test_nd_rolling(self): super().test_nd_rolling() - @pytest.mark.xfail + @pytest.mark.skip + def test_rolling_window_errors(self): + super().test_rolling_window_errors() + + @pytest.mark.skip def test_coarsen_2d(self): super().test_coarsen_2d() From a008aabc2ff9d7bca481c50b35b0218c7abd11d7 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 13 Mar 2021 13:42:47 -0700 Subject: [PATCH 14/17] Apply suggestions from code review Co-authored-by: Mathias Hauser --- xarray/core/dask_array_compat.py | 3 ++- xarray/core/npcompat.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index 91177f8446a..cee71a16d95 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -99,6 +99,7 @@ def nanmedian(a, axis=None, keepdims=False): ensure_minimum_chunksize = da.overlap.ensure_minimum_chunksize else: + # copied from dask def ensure_minimum_chunksize(size, chunks): """Determine new chunks to ensure that every chunk >= size @@ -153,7 +154,7 @@ def ensure_minimum_chunksize(size, chunks): def sliding_window_view(x, window_shape, axis=None): from dask.array.overlap import map_overlap - from numpy.core.numeric import normalize_axis_tuple + from numpy.core.numeric import normalize_axis_tuple # type: ignore from .npcompat import sliding_window_view as _np_sliding_window_view diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py index 1e51b89fc1b..c46f6d5a9bc 100644 --- a/xarray/core/npcompat.py +++ b/xarray/core/npcompat.py @@ -105,6 +105,7 @@ def __array_function__(self, *args, **kwargs): from numpy.core.numeric import normalize_axis_tuple from numpy.lib.stride_tricks import as_strided + # copied from numpy.lib.stride_tricks def sliding_window_view( x, window_shape, axis=None, *, subok=False, writeable=False ): From e41b9a777da389149423b8f36dd2d54b7922462b Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 15 Mar 2021 06:00:59 -0600 Subject: [PATCH 15/17] Update xarray/core/npcompat.py Co-authored-by: Mathias Hauser --- xarray/core/npcompat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py index c46f6d5a9bc..4016fe44ca6 100644 --- a/xarray/core/npcompat.py +++ b/xarray/core/npcompat.py @@ -102,7 +102,7 @@ def __array_function__(self, *args, **kwargs): if LooseVersion(np.__version__) >= "1.20.0": sliding_window_view = np.lib.stride_tricks.sliding_window_view else: - from numpy.core.numeric import normalize_axis_tuple + from numpy.core.numeric import normalize_axis_tuple # type: ignore from numpy.lib.stride_tricks import as_strided # copied from numpy.lib.stride_tricks From cdefb15090cf6f00a8df653239d2768f10607186 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 18 Mar 2021 10:29:59 -0600 Subject: [PATCH 16/17] sync with dask PR --- xarray/core/dask_array_compat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index 91177f8446a..5c52c65c755 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -160,8 +160,8 @@ def sliding_window_view(x, window_shape, axis=None): window_shape = tuple(window_shape) if np.iterable(window_shape) else (window_shape,) window_shape_array = np.array(window_shape) - if np.any(window_shape_array < 0): - raise ValueError("`window_shape` cannot contain negative values") + if np.any(window_shape_array <= 0): + raise ValueError("`window_shape` must contain positive values") if axis is None: axis = tuple(range(x.ndim)) From 39f9e28e2261304f5017012e72b9bf9844a1f1ed Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 18 Mar 2021 10:56:36 -0600 Subject: [PATCH 17/17] [test-upstream] Actually use dask_array_compat. --- xarray/core/dask_array_compat.py | 124 ++++++++++++++++--------------- 1 file changed, 65 insertions(+), 59 deletions(-) diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index d9aae98f331..de180faf233 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -152,67 +152,73 @@ def ensure_minimum_chunksize(size, chunks): return tuple(output) -def sliding_window_view(x, window_shape, axis=None): - from dask.array.overlap import map_overlap - from numpy.core.numeric import normalize_axis_tuple # type: ignore +if LooseVersion(dask_version) > LooseVersion("2021.03.0"): + sliding_window_view = da.lib.stride_tricks.sliding_window_view +else: - from .npcompat import sliding_window_view as _np_sliding_window_view + def sliding_window_view(x, window_shape, axis=None): + from dask.array.overlap import map_overlap + from numpy.core.numeric import normalize_axis_tuple # type: ignore - window_shape = tuple(window_shape) if np.iterable(window_shape) else (window_shape,) + from .npcompat import sliding_window_view as _np_sliding_window_view - window_shape_array = np.array(window_shape) - if np.any(window_shape_array <= 0): - raise ValueError("`window_shape` must contain positive values") + window_shape = ( + tuple(window_shape) if np.iterable(window_shape) else (window_shape,) + ) - if axis is None: - axis = tuple(range(x.ndim)) - if len(window_shape) != len(axis): - raise ValueError( - f"Since axis is `None`, must provide " - f"window_shape for all dimensions of `x`; " - f"got {len(window_shape)} window_shape elements " - f"and `x.ndim` is {x.ndim}." - ) - else: - axis = normalize_axis_tuple(axis, x.ndim, allow_duplicate=True) - if len(window_shape) != len(axis): - raise ValueError( - f"Must provide matching length window_shape and " - f"axis; got {len(window_shape)} window_shape " - f"elements and {len(axis)} axes elements." - ) + window_shape_array = np.array(window_shape) + if np.any(window_shape_array <= 0): + raise ValueError("`window_shape` must contain positive values") - depths = [0] * x.ndim - for ax, window in zip(axis, window_shape): - depths[ax] += window - 1 - - # Ensure that each chunk is big enough to leave at least a size-1 chunk - # after windowing (this is only really necessary for the last chunk). - safe_chunks = tuple( - ensure_minimum_chunksize(d + 1, c) for d, c in zip(depths, x.chunks) - ) - x = x.rechunk(safe_chunks) - - # result.shape = x_shape_trimmed + window_shape, - # where x_shape_trimmed is x.shape with every entry - # reduced by one less than the corresponding window size. - # trim chunks to match x_shape_trimmed - newchunks = tuple(c[:-1] + (c[-1] - d,) for d, c in zip(depths, x.chunks)) + tuple( - (window,) for window in window_shape - ) - - kwargs = dict( - depth=tuple((0, d) for d in depths), # Overlap on +ve side only - boundary="none", - meta=x._meta, - new_axis=range(x.ndim, x.ndim + len(axis)), - chunks=newchunks, - trim=False, - window_shape=window_shape, - axis=axis, - ) - # map_overlap's signature changed in https://github.com/dask/dask/pull/6165 - if LooseVersion(dask_version) > "2.18.0": - return map_overlap(_np_sliding_window_view, x, align_arrays=False, **kwargs) - else: - return map_overlap(x, _np_sliding_window_view, **kwargs) + if axis is None: + axis = tuple(range(x.ndim)) + if len(window_shape) != len(axis): + raise ValueError( + f"Since axis is `None`, must provide " + f"window_shape for all dimensions of `x`; " + f"got {len(window_shape)} window_shape elements " + f"and `x.ndim` is {x.ndim}." + ) + else: + axis = normalize_axis_tuple(axis, x.ndim, allow_duplicate=True) + if len(window_shape) != len(axis): + raise ValueError( + f"Must provide matching length window_shape and " + f"axis; got {len(window_shape)} window_shape " + f"elements and {len(axis)} axes elements." + ) + + depths = [0] * x.ndim + for ax, window in zip(axis, window_shape): + depths[ax] += window - 1 + + # Ensure that each chunk is big enough to leave at least a size-1 chunk + # after windowing (this is only really necessary for the last chunk). + safe_chunks = tuple( + ensure_minimum_chunksize(d + 1, c) for d, c in zip(depths, x.chunks) + ) + x = x.rechunk(safe_chunks) + + # result.shape = x_shape_trimmed + window_shape, + # where x_shape_trimmed is x.shape with every entry + # reduced by one less than the corresponding window size. + # trim chunks to match x_shape_trimmed + newchunks = tuple( + c[:-1] + (c[-1] - d,) for d, c in zip(depths, x.chunks) + ) + tuple((window,) for window in window_shape) + + kwargs = dict( + depth=tuple((0, d) for d in depths), # Overlap on +ve side only + boundary="none", + meta=x._meta, + new_axis=range(x.ndim, x.ndim + len(axis)), + chunks=newchunks, + trim=False, + window_shape=window_shape, + axis=axis, + ) + # map_overlap's signature changed in https://github.com/dask/dask/pull/6165 + if LooseVersion(dask_version) > "2.18.0": + return map_overlap(_np_sliding_window_view, x, align_arrays=False, **kwargs) + else: + return map_overlap(x, _np_sliding_window_view, **kwargs)