diff --git a/doc/user-guide/duckarrays.rst b/doc/user-guide/duckarrays.rst index ba13d5160ae..97da968b84a 100644 --- a/doc/user-guide/duckarrays.rst +++ b/doc/user-guide/duckarrays.rst @@ -42,10 +42,9 @@ the code will still cast to ``numpy`` arrays: :py:meth:`DataArray.interp` and :py:meth:`DataArray.interp_like` (uses ``scipy``): duck arrays in data variables and non-dimension coordinates will be casted in addition to not supporting duck arrays in dimension coordinates + * :py:meth:`Dataset.rolling` and :py:meth:`DataArray.rolling` (requires ``numpy>=1.20``) * :py:meth:`Dataset.rolling_exp` and :py:meth:`DataArray.rolling_exp` (uses ``numbagg``) - * :py:meth:`Dataset.rolling` and :py:meth:`DataArray.rolling` (uses internal functions - of ``numpy``) * :py:meth:`Dataset.interpolate_na` and :py:meth:`DataArray.interpolate_na` (uses :py:class:`numpy.vectorize`) * :py:func:`apply_ufunc` with ``vectorize=True`` (uses :py:class:`numpy.vectorize`) diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index ce15e01fb12..de180faf233 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -93,3 +93,132 @@ def nanmedian(a, axis=None, keepdims=False): ) return result + + +if LooseVersion(dask_version) > LooseVersion("2.30.0"): + ensure_minimum_chunksize = da.overlap.ensure_minimum_chunksize +else: + + # copied from dask + def ensure_minimum_chunksize(size, chunks): + """Determine new chunks to ensure that every chunk >= size + + Parameters + ---------- + size: int + The maximum size of any chunk. + chunks: tuple + Chunks along one axis, e.g. ``(3, 3, 2)`` + + Examples + -------- + >>> ensure_minimum_chunksize(10, (20, 20, 1)) + (20, 11, 10) + >>> ensure_minimum_chunksize(3, (1, 1, 3)) + (5,) + + See Also + -------- + overlap + """ + if size <= min(chunks): + return chunks + + # add too-small chunks to chunks before them + output = [] + new = 0 + for c in chunks: + if c < size: + if new > size + (size - c): + output.append(new - (size - c)) + new = size + else: + new += c + if new >= size: + output.append(new) + new = 0 + if c >= size: + new += c + if new >= size: + output.append(new) + elif len(output) >= 1: + output[-1] += new + else: + raise ValueError( + f"The overlapping depth {size} is larger than your " + f"array {sum(chunks)}." + ) + + return tuple(output) + + +if LooseVersion(dask_version) > LooseVersion("2021.03.0"): + sliding_window_view = da.lib.stride_tricks.sliding_window_view +else: + + def sliding_window_view(x, window_shape, axis=None): + from dask.array.overlap import map_overlap + from numpy.core.numeric import normalize_axis_tuple # type: ignore + + from .npcompat import sliding_window_view as _np_sliding_window_view + + window_shape = ( + tuple(window_shape) if np.iterable(window_shape) else (window_shape,) + ) + + window_shape_array = np.array(window_shape) + if np.any(window_shape_array <= 0): + raise ValueError("`window_shape` must contain positive values") + + if axis is None: + axis = tuple(range(x.ndim)) + if len(window_shape) != len(axis): + raise ValueError( + f"Since axis is `None`, must provide " + f"window_shape for all dimensions of `x`; " + f"got {len(window_shape)} window_shape elements " + f"and `x.ndim` is {x.ndim}." + ) + else: + axis = normalize_axis_tuple(axis, x.ndim, allow_duplicate=True) + if len(window_shape) != len(axis): + raise ValueError( + f"Must provide matching length window_shape and " + f"axis; got {len(window_shape)} window_shape " + f"elements and {len(axis)} axes elements." + ) + + depths = [0] * x.ndim + for ax, window in zip(axis, window_shape): + depths[ax] += window - 1 + + # Ensure that each chunk is big enough to leave at least a size-1 chunk + # after windowing (this is only really necessary for the last chunk). + safe_chunks = tuple( + ensure_minimum_chunksize(d + 1, c) for d, c in zip(depths, x.chunks) + ) + x = x.rechunk(safe_chunks) + + # result.shape = x_shape_trimmed + window_shape, + # where x_shape_trimmed is x.shape with every entry + # reduced by one less than the corresponding window size. + # trim chunks to match x_shape_trimmed + newchunks = tuple( + c[:-1] + (c[-1] - d,) for d, c in zip(depths, x.chunks) + ) + tuple((window,) for window in window_shape) + + kwargs = dict( + depth=tuple((0, d) for d in depths), # Overlap on +ve side only + boundary="none", + meta=x._meta, + new_axis=range(x.ndim, x.ndim + len(axis)), + chunks=newchunks, + trim=False, + window_shape=window_shape, + axis=axis, + ) + # map_overlap's signature changed in https://github.com/dask/dask/pull/6165 + if LooseVersion(dask_version) > "2.18.0": + return map_overlap(_np_sliding_window_view, x, align_arrays=False, **kwargs) + else: + return map_overlap(x, _np_sliding_window_view, **kwargs) diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py index 15641506e4e..25f082ec3c5 100644 --- a/xarray/core/dask_array_ops.py +++ b/xarray/core/dask_array_ops.py @@ -1,5 +1,3 @@ -import numpy as np - from . import dtypes, nputils @@ -26,92 +24,6 @@ def dask_rolling_wrapper(moving_func, a, window, min_count=None, axis=-1): return result -def rolling_window(a, axis, window, center, fill_value): - """Dask's equivalence to np.utils.rolling_window""" - import dask.array as da - - if not hasattr(axis, "__len__"): - axis = [axis] - window = [window] - center = [center] - - orig_shape = a.shape - depth = {d: 0 for d in range(a.ndim)} - offset = [0] * a.ndim - drop_size = [0] * a.ndim - pad_size = [0] * a.ndim - for ax, win, cent in zip(axis, window, center): - if ax < 0: - ax = a.ndim + ax - depth[ax] = int(win / 2) - # For evenly sized window, we need to crop the first point of each block. - offset[ax] = 1 if win % 2 == 0 else 0 - - if depth[ax] > min(a.chunks[ax]): - raise ValueError( - "For window size %d, every chunk should be larger than %d, " - "but the smallest chunk size is %d. Rechunk your array\n" - "with a larger chunk size or a chunk size that\n" - "more evenly divides the shape of your array." - % (win, depth[ax], min(a.chunks[ax])) - ) - - # Although da.overlap pads values to boundaries of the array, - # the size of the generated array is smaller than what we want - # if center == False. - if cent: - start = int(win / 2) # 10 -> 5, 9 -> 4 - end = win - 1 - start - else: - start, end = win - 1, 0 - pad_size[ax] = max(start, end) + offset[ax] - depth[ax] - drop_size[ax] = 0 - # pad_size becomes more than 0 when the overlapped array is smaller than - # needed. In this case, we need to enlarge the original array by padding - # before overlapping. - if pad_size[ax] > 0: - if pad_size[ax] < depth[ax]: - # overlapping requires each chunk larger than depth. If pad_size is - # smaller than the depth, we enlarge this and truncate it later. - drop_size[ax] = depth[ax] - pad_size[ax] - pad_size[ax] = depth[ax] - - # TODO maybe following two lines can be summarized. - a = da.pad( - a, [(p, 0) for p in pad_size], mode="constant", constant_values=fill_value - ) - boundary = {d: fill_value for d in range(a.ndim)} - - # create overlap arrays - ag = da.overlap.overlap(a, depth=depth, boundary=boundary) - - def func(x, window, axis): - x = np.asarray(x) - index = [slice(None)] * x.ndim - for ax, win in zip(axis, window): - x = nputils._rolling_window(x, win, ax) - index[ax] = slice(offset[ax], None) - return x[tuple(index)] - - chunks = list(a.chunks) + window - new_axis = [a.ndim + i for i in range(len(axis))] - out = da.map_blocks( - func, - ag, - dtype=a.dtype, - new_axis=new_axis, - chunks=chunks, - window=window, - axis=axis, - ) - - # crop boundary. - index = [slice(None)] * a.ndim - for ax in axis: - index[ax] = slice(drop_size[ax], drop_size[ax] + orig_shape[ax]) - return out[tuple(index)] - - def least_squares(lhs, rhs, rcond=None, skipna=False): import dask.array as da diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 9c8c42d0491..bd75f989cc1 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -614,15 +614,15 @@ def last(values, axis, skipna=None): return take(values, -1, axis=axis) -def rolling_window(array, axis, window, center, fill_value): +def sliding_window_view(array, window_shape, axis): """ Make an ndarray with a rolling window of axis-th dimension. The rolling dimension will be placed at the last dimension. """ if is_duck_dask_array(array): - return dask_array_ops.rolling_window(array, axis, window, center, fill_value) - else: # np.ndarray - return nputils.rolling_window(array, axis, window, center, fill_value) + return dask_array_compat.sliding_window_view(array, window_shape, axis) + else: + return npcompat.sliding_window_view(array, window_shape, axis) def least_squares(lhs, rhs, rcond=None, skipna=False): diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py index 25c103374b8..4016fe44ca6 100644 --- a/xarray/core/npcompat.py +++ b/xarray/core/npcompat.py @@ -30,6 +30,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import builtins import operator +from distutils.version import LooseVersion from typing import Union import numpy as np @@ -96,3 +97,99 @@ def __array_function__(self, *args, **kwargs): IS_NEP18_ACTIVE = _is_nep18_active() + + +if LooseVersion(np.__version__) >= "1.20.0": + sliding_window_view = np.lib.stride_tricks.sliding_window_view +else: + from numpy.core.numeric import normalize_axis_tuple # type: ignore + from numpy.lib.stride_tricks import as_strided + + # copied from numpy.lib.stride_tricks + def sliding_window_view( + x, window_shape, axis=None, *, subok=False, writeable=False + ): + """ + Create a sliding window view into the array with the given window shape. + + Also known as rolling or moving window, the window slides across all + dimensions of the array and extracts subsets of the array at all window + positions. + + .. versionadded:: 1.20.0 + + Parameters + ---------- + x : array_like + Array to create the sliding window view from. + window_shape : int or tuple of int + Size of window over each axis that takes part in the sliding window. + If `axis` is not present, must have same length as the number of input + array dimensions. Single integers `i` are treated as if they were the + tuple `(i,)`. + axis : int or tuple of int, optional + Axis or axes along which the sliding window is applied. + By default, the sliding window is applied to all axes and + `window_shape[i]` will refer to axis `i` of `x`. + If `axis` is given as a `tuple of int`, `window_shape[i]` will refer to + the axis `axis[i]` of `x`. + Single integers `i` are treated as if they were the tuple `(i,)`. + subok : bool, optional + If True, sub-classes will be passed-through, otherwise the returned + array will be forced to be a base-class array (default). + writeable : bool, optional + When true, allow writing to the returned view. The default is false, + as this should be used with caution: the returned view contains the + same memory location multiple times, so writing to one location will + cause others to change. + + Returns + ------- + view : ndarray + Sliding window view of the array. The sliding window dimensions are + inserted at the end, and the original dimensions are trimmed as + required by the size of the sliding window. + That is, ``view.shape = x_shape_trimmed + window_shape``, where + ``x_shape_trimmed`` is ``x.shape`` with every entry reduced by one less + than the corresponding window size. + """ + window_shape = ( + tuple(window_shape) if np.iterable(window_shape) else (window_shape,) + ) + # first convert input to array, possibly keeping subclass + x = np.array(x, copy=False, subok=subok) + + window_shape_array = np.array(window_shape) + if np.any(window_shape_array < 0): + raise ValueError("`window_shape` cannot contain negative values") + + if axis is None: + axis = tuple(range(x.ndim)) + if len(window_shape) != len(axis): + raise ValueError( + f"Since axis is `None`, must provide " + f"window_shape for all dimensions of `x`; " + f"got {len(window_shape)} window_shape elements " + f"and `x.ndim` is {x.ndim}." + ) + else: + axis = normalize_axis_tuple(axis, x.ndim, allow_duplicate=True) + if len(window_shape) != len(axis): + raise ValueError( + f"Must provide matching length window_shape and " + f"axis; got {len(window_shape)} window_shape " + f"elements and {len(axis)} axes elements." + ) + + out_strides = x.strides + tuple(x.strides[ax] for ax in axis) + + # note: same axis can be windowed repeatedly + x_shape_trimmed = list(x.shape) + for ax, dim in zip(axis, window_shape): + if x_shape_trimmed[ax] < dim: + raise ValueError("window shape cannot be larger than input array shape") + x_shape_trimmed[ax] -= dim - 1 + out_shape = tuple(x_shape_trimmed) + window_shape + return as_strided( + x, strides=out_strides, shape=out_shape, subok=subok, writeable=writeable + ) diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 926f7691ed7..3ca5dd700c9 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -131,81 +131,6 @@ def __setitem__(self, key, value): self._array[key] = np.moveaxis(value, vindex_positions, mixed_positions) -def rolling_window(a, axis, window, center, fill_value): - """ rolling window with padding. """ - pads = [(0, 0) for s in a.shape] - if not hasattr(axis, "__len__"): - axis = [axis] - window = [window] - center = [center] - - for ax, win, cent in zip(axis, window, center): - if cent: - start = int(win / 2) # 10 -> 5, 9 -> 4 - end = win - 1 - start - pads[ax] = (start, end) - else: - pads[ax] = (win - 1, 0) - a = np.pad(a, pads, mode="constant", constant_values=fill_value) - for ax, win in zip(axis, window): - a = _rolling_window(a, win, ax) - return a - - -def _rolling_window(a, window, axis=-1): - """ - Make an ndarray with a rolling window along axis. - - Parameters - ---------- - a : array_like - Array to add rolling window to - axis : int - axis position along which rolling window will be applied. - window : int - Size of rolling window - - Returns - ------- - Array that is a view of the original array with a added dimension - of size w. - - Examples - -------- - >>> x = np.arange(10).reshape((2, 5)) - >>> _rolling_window(x, 3, axis=-1) - array([[[0, 1, 2], - [1, 2, 3], - [2, 3, 4]], - - [[5, 6, 7], - [6, 7, 8], - [7, 8, 9]]]) - - Calculate rolling mean of last dimension: - >>> np.mean(_rolling_window(x, 3, axis=-1), -1) - array([[1., 2., 3.], - [6., 7., 8.]]) - - This function is taken from https://github.com/numpy/numpy/pull/31 - but slightly modified to accept axis option. - """ - axis = normalize_axis_index(axis, a.ndim) - a = np.swapaxes(a, axis, -1) - - if window < 1: - raise ValueError(f"`window` must be at least 1. Given : {window}") - if window > a.shape[-1]: - raise ValueError(f"`window` is too long. Given : {window}") - - shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) - strides = a.strides + (a.strides[-1],) - rolling = np.lib.stride_tricks.as_strided( - a, shape=shape, strides=strides, writeable=False - ) - return np.swapaxes(rolling, -2, axis) - - def _create_bottleneck_method(name, npmodule=np): def f(values, axis=None, **kwargs): dtype = kwargs.get("dtype", None) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index debc53e0bf3..272d7c355ea 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2028,7 +2028,7 @@ def rolling_window( For nd-rolling, should be list of integers. window_dim : str New name of the window dimension. - For nd-rolling, should be list of integers. + For nd-rolling, should be list of strings. center : bool, default: False If True, pad fill_value for both ends. Otherwise, pad in the head of the axis. @@ -2071,26 +2071,56 @@ def rolling_window( """ if fill_value is dtypes.NA: # np.nan is passed dtype, fill_value = dtypes.maybe_promote(self.dtype) - array = self.astype(dtype, copy=False).data + var = self.astype(dtype, copy=False) else: dtype = self.dtype - array = self.data + var = self - if isinstance(dim, list): - assert len(dim) == len(window) - assert len(dim) == len(window_dim) - assert len(dim) == len(center) - else: + if utils.is_scalar(dim): + for name, arg in zip( + ["window", "window_dim", "center"], [window, window_dim, center] + ): + if not utils.is_scalar(arg): + raise ValueError( + f"Expected {name}={arg!r} to be a scalar like 'dim'." + ) dim = [dim] - window = [window] - window_dim = [window_dim] - center = [center] + + # dim is now a list + nroll = len(dim) + if utils.is_scalar(window): + window = [window] * nroll + if utils.is_scalar(window_dim): + window_dim = [window_dim] * nroll + if utils.is_scalar(center): + center = [center] * nroll + if ( + len(dim) != len(window) + or len(dim) != len(window_dim) + or len(dim) != len(center) + ): + raise ValueError( + "'dim', 'window', 'window_dim', and 'center' must be the same length. " + f"Received dim={dim!r}, window={window!r}, window_dim={window_dim!r}," + f" and center={center!r}." + ) + + pads = {} + for d, win, cent in zip(dim, window, center): + if cent: + start = win // 2 # 10 -> 5, 9 -> 4 + end = win - 1 - start + pads[d] = (start, end) + else: + pads[d] = (win - 1, 0) + + padded = var.pad(pads, mode="constant", constant_values=fill_value) axis = [self.get_axis_num(d) for d in dim] new_dims = self.dims + tuple(window_dim) return Variable( new_dims, - duck_array_ops.rolling_window( - array, axis=axis, window=window, center=center, fill_value=fill_value + duck_array_ops.sliding_window_view( + padded.data, window_shape=window, axis=axis ), ) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index c38c3656eaf..20e357543d6 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -6510,6 +6510,15 @@ def test_rolling_repr(da): assert repr(rolling_obj) == "DataArrayRolling [time->7(center),x->3(center)]" +@requires_dask +def test_repeated_rolling_rechunks(): + + # regression test for GH3277, GH2514 + dat = DataArray(np.random.rand(7653, 300), dims=("day", "item")) + dat_chunk = dat.chunk({"item": 20}) + dat_chunk.rolling(day=10).mean().rolling(day=250).std() + + def test_rolling_doc(da): rolling_obj = da.rolling(time=7) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 90e742dee62..e030b9d2e42 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -21,7 +21,6 @@ np_timedelta64_to_float, pd_timedelta_to_float, py_timedelta_to_float, - rolling_window, stack, timedelta_to_numeric, where, @@ -531,32 +530,6 @@ def test_isnull_with_dask(): assert_equal(da.isnull().load(), da.load().isnull()) -@pytest.mark.skipif(not has_dask, reason="This is for dask.") -@pytest.mark.parametrize("axis", [0, -1]) -@pytest.mark.parametrize("window", [3, 8, 11]) -@pytest.mark.parametrize("center", [True, False]) -def test_dask_rolling(axis, window, center): - import dask.array as da - - x = np.array(np.random.randn(100, 40), dtype=float) - dx = da.from_array(x, chunks=[(6, 30, 30, 20, 14), 8]) - - expected = rolling_window( - x, axis=axis, window=window, center=center, fill_value=np.nan - ) - actual = rolling_window( - dx, axis=axis, window=window, center=center, fill_value=np.nan - ) - assert isinstance(actual, da.Array) - assert_array_equal(actual, expected) - assert actual.shape == expected.shape - - # we need to take care of window size if chunk size is small - # window/2 should be smaller than the smallest chunk size. - with pytest.raises(ValueError): - rolling_window(dx, axis=axis, window=100, center=center, fill_value=np.nan) - - @pytest.mark.skipif(not has_dask, reason="This is for dask.") @pytest.mark.parametrize("axis", [0, -1, 1]) @pytest.mark.parametrize("edge_order", [1, 2]) diff --git a/xarray/tests/test_nputils.py b/xarray/tests/test_nputils.py index ccb825dc7e9..3c9c92ae2ba 100644 --- a/xarray/tests/test_nputils.py +++ b/xarray/tests/test_nputils.py @@ -1,8 +1,7 @@ import numpy as np -import pytest from numpy.testing import assert_array_equal -from xarray.core.nputils import NumpyVIndexAdapter, _is_contiguous, rolling_window +from xarray.core.nputils import NumpyVIndexAdapter, _is_contiguous def test_is_contiguous(): @@ -29,38 +28,3 @@ def test_vindex(): vindex[[0, 1], [0, 1], :] = vindex[[0, 1], [0, 1], :] vindex[[0, 1], :, [0, 1]] = vindex[[0, 1], :, [0, 1]] vindex[:, [0, 1], [0, 1]] = vindex[:, [0, 1], [0, 1]] - - -def test_rolling(): - x = np.array([1, 2, 3, 4], dtype=float) - - actual = rolling_window(x, axis=-1, window=3, center=True, fill_value=np.nan) - expected = np.array( - [[np.nan, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, np.nan]], dtype=float - ) - assert_array_equal(actual, expected) - - actual = rolling_window(x, axis=-1, window=3, center=False, fill_value=0.0) - expected = np.array([[0, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, 4]], dtype=float) - assert_array_equal(actual, expected) - - x = np.stack([x, x * 1.1]) - actual = rolling_window(x, axis=-1, window=3, center=False, fill_value=0.0) - expected = np.stack([expected, expected * 1.1], axis=0) - assert_array_equal(actual, expected) - - -@pytest.mark.parametrize("center", [[True, True], [False, False]]) -@pytest.mark.parametrize("axis", [(0, 1), (1, 2), (2, 0)]) -def test_nd_rolling(center, axis): - x = np.arange(7 * 6 * 8).reshape(7, 6, 8).astype(float) - window = [3, 3] - actual = rolling_window( - x, axis=axis, window=window, center=center, fill_value=np.nan - ) - expected = x - for ax, win, cent in zip(axis, window, center): - expected = rolling_window( - expected, axis=ax, window=win, center=cent, fill_value=np.nan - ) - assert_array_equal(actual, expected) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 9259862c86d..c951e7d3118 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -32,6 +32,7 @@ assert_array_equal, assert_equal, assert_identical, + raise_if_dask_computes, raises_regex, requires_dask, requires_sparse, @@ -873,26 +874,100 @@ def test_pad_constant_values(self, xr_arg, np_arg): ) assert_array_equal(actual, expected) - def test_rolling_window(self): + @pytest.mark.parametrize("d, w", (("x", 3), ("y", 5))) + def test_rolling_window(self, d, w): # Just a working test. See test_nputils for the algorithm validation v = self.cls(["x", "y", "z"], np.arange(40 * 30 * 2).reshape(40, 30, 2)) - for (d, w) in [("x", 3), ("y", 5)]: - v_rolling = v.rolling_window(d, w, d + "_window") - assert v_rolling.dims == ("x", "y", "z", d + "_window") - assert v_rolling.shape == v.shape + (w,) + v_rolling = v.rolling_window(d, w, d + "_window") + assert v_rolling.dims == ("x", "y", "z", d + "_window") + assert v_rolling.shape == v.shape + (w,) + + v_rolling = v.rolling_window(d, w, d + "_window", center=True) + assert v_rolling.dims == ("x", "y", "z", d + "_window") + assert v_rolling.shape == v.shape + (w,) + + # dask and numpy result should be the same + v_loaded = v.load().rolling_window(d, w, d + "_window", center=True) + assert_array_equal(v_rolling, v_loaded) + + # numpy backend should not be over-written + if isinstance(v._data, np.ndarray): + with pytest.raises(ValueError): + v_loaded[0] = 1.0 + + def test_rolling_1d(self): + x = self.cls("x", np.array([1, 2, 3, 4], dtype=float)) + + kwargs = dict(dim="x", window=3, window_dim="xw") + actual = x.rolling_window(**kwargs, center=True, fill_value=np.nan) + expected = Variable( + ("x", "xw"), + np.array( + [[np.nan, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, np.nan]], dtype=float + ), + ) + assert_equal(actual, expected) - v_rolling = v.rolling_window(d, w, d + "_window", center=True) - assert v_rolling.dims == ("x", "y", "z", d + "_window") - assert v_rolling.shape == v.shape + (w,) + actual = x.rolling_window(**kwargs, center=False, fill_value=0.0) + expected = self.cls( + ("x", "xw"), + np.array([[0, 0, 1], [0, 1, 2], [1, 2, 3], [2, 3, 4]], dtype=float), + ) + assert_equal(actual, expected) - # dask and numpy result should be the same - v_loaded = v.load().rolling_window(d, w, d + "_window", center=True) - assert_array_equal(v_rolling, v_loaded) + x = self.cls(("y", "x"), np.stack([x, x * 1.1])) + actual = x.rolling_window(**kwargs, center=False, fill_value=0.0) + expected = self.cls( + ("y", "x", "xw"), np.stack([expected.data, expected.data * 1.1], axis=0) + ) + assert_equal(actual, expected) - # numpy backend should not be over-written - if isinstance(v._data, np.ndarray): - with pytest.raises(ValueError): - v_loaded[0] = 1.0 + @pytest.mark.parametrize("center", [[True, True], [False, False]]) + @pytest.mark.parametrize("dims", [("x", "y"), ("y", "z"), ("z", "x")]) + def test_nd_rolling(self, center, dims): + x = self.cls( + ("x", "y", "z"), + np.arange(7 * 6 * 8).reshape(7, 6, 8).astype(float), + ) + window = [3, 3] + actual = x.rolling_window( + dim=dims, + window=window, + window_dim=[f"{k}w" for k in dims], + center=center, + fill_value=np.nan, + ) + expected = x + for dim, win, cent in zip(dims, window, center): + expected = expected.rolling_window( + dim=dim, + window=win, + window_dim=f"{dim}w", + center=cent, + fill_value=np.nan, + ) + assert_equal(actual, expected) + + @pytest.mark.parametrize( + ("dim, window, window_dim, center"), + [ + ("x", [3, 3], "x_w", True), + ("x", 3, ("x_w", "x_w"), True), + ("x", 3, "x_w", [True, True]), + ], + ) + def test_rolling_window_errors(self, dim, window, window_dim, center): + x = self.cls( + ("x", "y", "z"), + np.arange(7 * 6 * 8).reshape(7, 6, 8).astype(float), + ) + with pytest.raises(ValueError): + x.rolling_window( + dim=dim, + window=window, + window_dim=window_dim, + center=center, + ) class TestVariable(VariableSubclassobjects): @@ -2010,6 +2085,29 @@ def test_getitem_with_mask_nd_indexer(self): self.cls(("x", "y"), [[0, -1], [-1, 2]]), ) + @pytest.mark.parametrize("dim", ["x", "y"]) + @pytest.mark.parametrize("window", [3, 8, 11]) + @pytest.mark.parametrize("center", [True, False]) + def test_dask_rolling(self, dim, window, center): + import dask + import dask.array as da + + dask.config.set(scheduler="single-threaded") + + x = Variable(("x", "y"), np.array(np.random.randn(100, 40), dtype=float)) + dx = Variable(("x", "y"), da.from_array(x, chunks=[(6, 30, 30, 20, 14), 8])) + + expected = x.rolling_window( + dim, window, "window", center=center, fill_value=np.nan + ) + with raise_if_dask_computes(): + actual = dx.rolling_window( + dim, window, "window", center=center, fill_value=np.nan + ) + assert isinstance(actual.data, da.Array) + assert actual.shape == expected.shape + assert_equal(actual, expected) + @requires_sparse class TestVariableWithSparse: @@ -2121,23 +2219,23 @@ def test_datetime64(self): # These tests make use of multi-dimensional variables, which are not valid # IndexVariable objects: - @pytest.mark.xfail + @pytest.mark.skip def test_getitem_error(self): super().test_getitem_error() - @pytest.mark.xfail + @pytest.mark.skip def test_getitem_advanced(self): super().test_getitem_advanced() - @pytest.mark.xfail + @pytest.mark.skip def test_getitem_fancy(self): super().test_getitem_fancy() - @pytest.mark.xfail + @pytest.mark.skip def test_getitem_uint(self): super().test_getitem_fancy() - @pytest.mark.xfail + @pytest.mark.skip @pytest.mark.parametrize( "mode", [ @@ -2156,16 +2254,27 @@ def test_getitem_uint(self): def test_pad(self, mode, xr_arg, np_arg): super().test_pad(mode, xr_arg, np_arg) - @pytest.mark.xfail - @pytest.mark.parametrize("xr_arg, np_arg", _PAD_XR_NP_ARGS) + @pytest.mark.skip def test_pad_constant_values(self, xr_arg, np_arg): super().test_pad_constant_values(xr_arg, np_arg) - @pytest.mark.xfail + @pytest.mark.skip def test_rolling_window(self): super().test_rolling_window() - @pytest.mark.xfail + @pytest.mark.skip + def test_rolling_1d(self): + super().test_rolling_1d() + + @pytest.mark.skip + def test_nd_rolling(self): + super().test_nd_rolling() + + @pytest.mark.skip + def test_rolling_window_errors(self): + super().test_rolling_window_errors() + + @pytest.mark.skip def test_coarsen_2d(self): super().test_coarsen_2d()