diff --git a/doc/api.rst b/doc/api.rst index 9a00630f88e..e1f70cfbdea 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -147,6 +147,7 @@ Computation Dataset.groupby Dataset.groupby_bins Dataset.rolling + Dataset.coarsen Dataset.resample Dataset.diff Dataset.quantile @@ -312,6 +313,7 @@ Computation DataArray.groupby DataArray.groupby_bins DataArray.rolling + DataArray.coarsen DataArray.dt DataArray.resample DataArray.get_axis_num diff --git a/doc/computation.rst b/doc/computation.rst index f1d1450a6dc..412f24eee6a 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -199,6 +199,47 @@ You can also use ``construct`` to compute a weighted rolling sum: To avoid this, use ``skipna=False`` as the above example. +.. _comput.coarsen: + +Coarsen large arrays +==================== + +``DataArray`` and ``Dataset`` objects include a +:py:meth:`~xarray.DataArray.coarsen` and :py:meth:`~xarray.Dataset.coarsen` +methods. This supports the block aggregation along multiple dimensions, + +.. ipython:: python + + x = np.linspace(0, 10, 300) + t = pd.date_range('15/12/1999', periods=364) + da = xr.DataArray(np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]), + dims=['time', 'x'], coords={'time': t, 'x': x}) + da + +In order to take a block mean for every 7 days along ``time`` dimension and +every 2 points along ``x`` dimension, + +.. ipython:: python + + da.coarsen(time=7, x=2).mean() + +:py:meth:`~xarray.DataArray.coarsen` raises an ``ValueError`` if the data +length is not a multiple of the corresponding window size. +You can choose ``boundary='trim'`` or ``boundary='pad'`` options for trimming +the excess entries or padding ``nan`` to insufficient entries, + +.. ipython:: python + + da.coarsen(time=30, x=2, boundary='trim').mean() + +If you want to apply a specific function to coordinate, you can pass the +function or method name to ``coord_func`` option, + +.. ipython:: python + + da.coarsen(time=7, x=2, coord_func={'time': 'min'}).mean() + + Computation using Coordinates ============================= diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bfe6e57e3bc..b50df2af10e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -28,6 +28,11 @@ Breaking changes Enhancements ~~~~~~~~~~~~ +- :py:meth:`~xarray.DataArray.coarsen` and + :py:meth:`~xarray.Dataset.coarsen` are newly added. + See :ref:`comput.coarsen` for details. + (:issue:`2525`) + By `Keisuke Fujii `_. - Upsampling an array via interpolation with resample is now dask-compatible, as long as the array is not chunked along the resampling dimension. By `Spencer Clark `_. @@ -76,8 +81,8 @@ Breaking changes - Minimum rasterio version increased from 0.36 to 1.0 (for ``open_rasterio``) - Time bounds variables are now also decoded according to CF conventions (:issue:`2565`). The previous behavior was to decode them only if they - had specific time attributes, now these attributes are copied - automatically from the corresponding time coordinate. This might + had specific time attributes, now these attributes are copied + automatically from the corresponding time coordinate. This might brake downstream code that was relying on these variables to be not decoded. By `Fabien Maussion `_. diff --git a/xarray/core/common.py b/xarray/core/common.py index 674c3b19b06..923d30aad11 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -590,6 +590,66 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs): return self._rolling_cls(self, dim, min_periods=min_periods, center=center) + def coarsen(self, dim=None, boundary='exact', side='left', + coord_func='mean', **dim_kwargs): + """ + Coarsen object. + + Parameters + ---------- + dim: dict, optional + Mapping from the dimension name to the window size. + dim : str + Name of the dimension to create the rolling iterator + along (e.g., `time`). + window : int + Size of the moving window. + boundary : 'exact' | 'trim' | 'pad' + If 'exact', a ValueError will be raised if dimension size is not a + multiple of the window size. If 'trim', the excess entries are + dropped. If 'pad', NA will be padded. + side : 'left' or 'right' or mapping from dimension to 'left' or 'right' + coord_func: function (name) that is applied to the coordintes, + or a mapping from coordinate name to function (name). + + Returns + ------- + Coarsen object (core.rolling.DataArrayCoarsen for DataArray, + core.rolling.DatasetCoarsen for Dataset.) + + Examples + -------- + Coarsen the long time series by averaging over every four days. + + >>> da = xr.DataArray(np.linspace(0, 364, num=364), + ... dims='time', + ... coords={'time': pd.date_range( + ... '15/12/1999', periods=364)}) + >>> da + + array([ 0. , 1.002755, 2.00551 , ..., 361.99449 , 362.997245, + 364. ]) + Coordinates: + * time (time) datetime64[ns] 1999-12-15 1999-12-16 ... 2000-12-12 + >>> + >>> da.coarsen(time=3, boundary='trim').mean() + + array([ 1.002755, 4.011019, 7.019284, ..., 358.986226, + 361.99449 ]) + Coordinates: + * time (time) datetime64[ns] 1999-12-16 1999-12-19 ... 2000-12-10 + >>> + + See Also + -------- + core.rolling.DataArrayCoarsen + core.rolling.DatasetCoarsen + """ + dim = either_dict_or_kwargs(dim, dim_kwargs, 'coarsen') + return self._coarsen_cls( + self, dim, boundary=boundary, side=side, + coord_func=coord_func) + def resample(self, indexer=None, skipna=None, closed=None, label=None, base=0, keep_attrs=None, loffset=None, **indexer_kwargs): """Returns a Resample object for performing resampling operations. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 38aa1b42b92..a63b63b45bf 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -161,6 +161,7 @@ class DataArray(AbstractArray, DataWithCoords): """ _groupby_cls = groupby.DataArrayGroupBy _rolling_cls = rolling.DataArrayRolling + _coarsen_cls = rolling.DataArrayCoarsen _resample_cls = resample.DataArrayResample dt = property(DatetimeAccessor) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 21f98e24c1f..29178c9b13c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -317,6 +317,7 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords, """ _groupby_cls = groupby.DatasetGroupBy _rolling_cls = rolling.DatasetRolling + _coarsen_cls = rolling.DatasetCoarsen _resample_cls = resample.DatasetResample def __init__(self, data_vars=None, coords=None, attrs=None, diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index ef89dba2ab8..b02eb4e899b 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -13,7 +13,7 @@ import numpy as np import pandas as pd -from . import dask_array_ops, dtypes, npcompat, nputils +from . import dask_array_ops, dtypes, npcompat, nputils, utils from .nputils import nanfirst, nanlast from .pycompat import dask_array_type @@ -261,8 +261,6 @@ def f(values, axis=None, skipna=None, **kwargs): sum = _create_nan_agg_method('sum') sum.numeric_only = True sum.available_min_count = True -mean = _create_nan_agg_method('mean') -mean.numeric_only = True std = _create_nan_agg_method('std') std.numeric_only = True var = _create_nan_agg_method('var') @@ -278,6 +276,25 @@ def f(values, axis=None, skipna=None, **kwargs): cumsum_1d.numeric_only = True +_mean = _create_nan_agg_method('mean') + + +def mean(array, axis=None, skipna=None, **kwargs): + """ inhouse mean that can handle datatime dtype """ + array = asarray(array) + if array.dtype.kind == 'M': + offset = min(array) + # xarray always uses datetime[ns] for datetime + dtype = 'timedelta64[ns]' + return _mean(utils.datetime_to_numeric(array, offset), axis=axis, + skipna=skipna, **kwargs).astype(dtype) + offset + else: + return _mean(array, axis=axis, skipna=skipna, **kwargs) + + +mean.numeric_only = True + + def _nd_cum_func(cum_func, array, axis, **kwargs): array = asarray(array) if axis is None: diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 3f4e0fc3ac9..5624d9b5092 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -7,7 +7,7 @@ import numpy as np import pandas as pd -from . import rolling +from . import utils from .common import _contains_datetime_like_objects from .computation import apply_ufunc from .duck_array_ops import dask_array_type @@ -370,7 +370,7 @@ def _get_valid_fill_mask(arr, dim, limit): None''' kw = {dim: limit + 1} # we explicitly use construct method to avoid copy. - new_dim = rolling._get_new_dimname(arr.dims, '_window') + new_dim = utils.get_temp_dimname(arr.dims, '_window') return (arr.isnull().rolling(min_periods=1, **kw) .construct(new_dim, fill_value=False) .sum(new_dim, skipna=False)) <= limit diff --git a/xarray/core/ops.py b/xarray/core/ops.py index a0dd2212a8f..272a4eaf2f1 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -122,6 +122,20 @@ New {da_or_ds} object with `{name}` applied along its rolling dimnension. """ +_COARSEN_REDUCE_DOCSTRING_TEMPLATE = """\ +Coarsen this object by applying `{name}` along its dimensions. + +Parameters +---------- +**kwargs : dict + Additional keyword arguments passed on to `{name}`. + +Returns +------- +reduced : DataArray or Dataset + New object with `{name}` applied along its coasen dimnensions. +""" + def fillna(data, other, join="left", dataset_join="left"): """Fill missing values in this object with data from the other object. @@ -378,3 +392,15 @@ def inject_datasetrolling_methods(cls): func.__doc__ = _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format( name=func.__name__, da_or_ds='Dataset') setattr(cls, 'count', func) + + +def inject_coarsen_methods(cls): + # standard numpy reduce methods + methods = [(name, getattr(duck_array_ops, name)) + for name in NAN_REDUCE_METHODS] + for name, f in methods: + func = cls._reduce_method(f) + func.__name__ = name + func.__doc__ = _COARSEN_REDUCE_DOCSTRING_TEMPLATE.format( + name=func.__name__) + setattr(cls, name, func) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 09b632e47a6..57463ef5987 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -5,32 +5,14 @@ import numpy as np -from . import dtypes +from . import dtypes, duck_array_ops, utils from .dask_array_ops import dask_rolling_wrapper from .ops import ( - bn, has_bottleneck, inject_bottleneck_rolling_methods, - inject_datasetrolling_methods) + bn, has_bottleneck, inject_coarsen_methods, + inject_bottleneck_rolling_methods, inject_datasetrolling_methods) from .pycompat import OrderedDict, dask_array_type, zip -def _get_new_dimname(dims, new_dim): - """ Get an new dimension name based on new_dim, that is not used in dims. - If the same name exists, we add an underscore(s) in the head. - - Example1: - dims: ['a', 'b', 'c'] - new_dim: ['_rolling'] - -> ['_rolling'] - Example2: - dims: ['a', 'b', 'c', '_rolling'] - new_dim: ['_rolling'] - -> ['__rolling'] - """ - while new_dim in dims: - new_dim = '_' + new_dim - return new_dim - - class Rolling(object): """A object that implements the moving window pattern. @@ -231,7 +213,7 @@ def reduce(self, func, **kwargs): reduced : DataArray Array with summarized data. """ - rolling_dim = _get_new_dimname(self.obj.dims, '_rolling_dim') + rolling_dim = utils.get_temp_dimname(self.obj.dims, '_rolling_dim') windows = self.construct(rolling_dim) result = windows.reduce(func, dim=rolling_dim, **kwargs) @@ -242,7 +224,7 @@ def reduce(self, func, **kwargs): def _counts(self): """ Number of non-nan entries in each rolling window. """ - rolling_dim = _get_new_dimname(self.obj.dims, '_rolling_dim') + rolling_dim = utils.get_temp_dimname(self.obj.dims, '_rolling_dim') # We use False as the fill_value instead of np.nan, since boolean # array is faster to be reduced than object array. # The use of skipna==False is also faster since it does not need to @@ -454,5 +436,121 @@ def construct(self, window_dim, stride=1, fill_value=dtypes.NA): **{self.dim: slice(None, None, stride)}) +class Coarsen(object): + """A object that implements the coarsen. + + See Also + -------- + Dataset.coarsen + DataArray.coarsen + """ + + _attributes = ['windows', 'side', 'trim_excess'] + + def __init__(self, obj, windows, boundary, side, coord_func): + """ + Moving window object. + + Parameters + ---------- + obj : Dataset or DataArray + Object to window. + windows : A mapping from a dimension name to window size + dim : str + Name of the dimension to create the rolling iterator + along (e.g., `time`). + window : int + Size of the moving window. + boundary : 'exact' | 'trim' | 'pad' + If 'exact', a ValueError will be raised if dimension size is not a + multiple of window size. If 'trim', the excess indexes are trimed. + If 'pad', NA will be padded. + side : 'left' or 'right' or mapping from dimension to 'left' or 'right' + coord_func: mapping from coordinate name to func. + + Returns + ------- + coarsen + """ + self.obj = obj + self.windows = windows + self.side = side + self.boundary = boundary + + if not utils.is_dict_like(coord_func): + coord_func = {d: coord_func for d in self.obj.dims} + for c in self.obj.coords: + if c not in coord_func: + coord_func[c] = duck_array_ops.mean + self.coord_func = coord_func + + def __repr__(self): + """provide a nice str repr of our coarsen object""" + + attrs = ["{k}->{v}".format(k=k, v=getattr(self, k)) + for k in self._attributes + if getattr(self, k, None) is not None] + return "{klass} [{attrs}]".format(klass=self.__class__.__name__, + attrs=','.join(attrs)) + + +class DataArrayCoarsen(Coarsen): + @classmethod + def _reduce_method(cls, func): + """ + Return a wrapped function for injecting numpy methods. + see ops.inject_coarsen_methods + """ + def wrapped_func(self, **kwargs): + from .dataarray import DataArray + + reduced = self.obj.variable.coarsen( + self.windows, func, self.boundary, self.side) + coords = {} + for c, v in self.obj.coords.items(): + if c == self.obj.name: + coords[c] = reduced + else: + if any(d in self.windows for d in v.dims): + coords[c] = v.variable.coarsen( + self.windows, self.coord_func[c], + self.boundary, self.side) + else: + coords[c] = v + return DataArray(reduced, dims=self.obj.dims, coords=coords) + + return wrapped_func + + +class DatasetCoarsen(Coarsen): + @classmethod + def _reduce_method(cls, func): + """ + Return a wrapped function for injecting numpy methods. + see ops.inject_coarsen_methods + """ + def wrapped_func(self, **kwargs): + from .dataset import Dataset + + reduced = OrderedDict() + for key, da in self.obj.data_vars.items(): + reduced[key] = da.variable.coarsen( + self.windows, func, self.boundary, self.side) + + coords = {} + for c, v in self.obj.coords.items(): + if any(d in self.windows for d in v.dims): + coords[c] = v.variable.coarsen( + self.windows, self.coord_func[c], + self.boundary, self.side) + else: + coords[c] = v.variable + return Dataset(reduced, coords=coords) + + return wrapped_func + + inject_bottleneck_rolling_methods(DataArrayRolling) inject_datasetrolling_methods(DatasetRolling) +inject_coarsen_methods(DataArrayCoarsen) +inject_coarsen_methods(DatasetCoarsen) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index fbda658c23f..e961426195e 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -622,10 +622,36 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): ------- array """ + from . import duck_array_ops + if offset is None: offset = array.min() array = array - offset if datetime_unit: - return (array / np.timedelta64(1, datetime_unit)).astype(dtype) - return array.astype(dtype) + array = array / np.timedelta64(1, datetime_unit) + # convert np.NaT to np.nan + if array.dtype.kind in 'mM': + if hasattr(array, 'isnull'): + return np.where(array.isnull(), np.nan, array.astype(dtype)) + return np.where(duck_array_ops.isnull(array), np.nan, + array.astype(dtype)) + return array + + +def get_temp_dimname(dims, new_dim): + """ Get an new dimension name based on new_dim, that is not used in dims. + If the same name exists, we add an underscore(s) in the head. + + Example1: + dims: ['a', 'b', 'c'] + new_dim: ['_rolling'] + -> ['_rolling'] + Example2: + dims: ['a', 'b', 'c', '_rolling'] + new_dim: ['_rolling'] + -> ['__rolling'] + """ + while new_dim in dims: + new_dim = '_' + new_dim + return new_dim diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 243487db034..48acc8edff9 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1019,7 +1019,7 @@ def pad_with_fill_value(self, pad_widths=None, fill_value=dtypes.NA, pad_widths = either_dict_or_kwargs(pad_widths, pad_widths_kwargs, 'pad') - if fill_value is dtypes.NA: # np.nan is passed + if fill_value is dtypes.NA: dtype, fill_value = dtypes.maybe_promote(self.dtype) else: dtype = self.dtype @@ -1641,6 +1641,85 @@ def rolling_window(self, dim, window, window_dim, center=False, array, axis=self.get_axis_num(dim), window=window, center=center, fill_value=fill_value)) + def coarsen(self, windows, func, boundary='exact', side='left'): + """ + Apply + """ + windows = {k: v for k, v in windows.items() if k in self.dims} + if not windows: + return self.copy() + + reshaped, axes = self._coarsen_reshape(windows, boundary, side) + if isinstance(func, basestring): + name = func + func = getattr(duck_array_ops, name, None) + if func is None: + raise NameError('{} is not a valid method.'.format(name)) + return type(self)(self.dims, func(reshaped, axis=axes), self._attrs) + + def _coarsen_reshape(self, windows, boundary, side): + """ + Construct a reshaped-array for corsen + """ + if not utils.is_dict_like(boundary): + boundary = {d: boundary for d in windows.keys()} + + if not utils.is_dict_like(side): + side = {d: side for d in windows.keys()} + + # remove unrelated dimensions + boundary = {k: v for k, v in boundary.items() if k in windows} + side = {k: v for k, v in side.items() if k in windows} + + for d, window in windows.items(): + if window <= 0: + raise ValueError('window must be > 0. Given {}'.format(window)) + + variable = self + for d, window in windows.items(): + # trim or pad the object + size = variable.shape[self._get_axis_num(d)] + n = int(size / window) + if boundary[d] == 'exact': + if n * window != size: + raise ValueError( + 'Could not coarsen a dimension of size {} with ' + 'window {}'.format(size, window)) + elif boundary[d] == 'trim': + if side[d] == 'left': + variable = variable.isel({d: slice(0, window * n)}) + else: + excess = size - window * n + variable = variable.isel({d: slice(excess, None)}) + elif boundary[d] == 'pad': # pad + pad = window * n - size + if pad < 0: + pad += window + if side[d] == 'left': + pad_widths = {d: (0, pad)} + else: + pad_widths = {d: (pad, 0)} + variable = variable.pad_with_fill_value(pad_widths) + else: + raise TypeError( + "{} is invalid for boundary. Valid option is 'exact', " + "'trim' and 'pad'".format(boundary[d])) + + shape = [] + axes = [] + axis_count = 0 + for i, d in enumerate(variable.dims): + if d in windows: + size = variable.shape[i] + shape.append(int(size / windows[d])) + shape.append(windows[d]) + axis_count += 1 + axes.append(i + axis_count) + else: + shape.append(variable.shape[i]) + + return variable.data.reshape(shape), tuple(axes) + @property def real(self): return type(self)(self.dims, self.data.real, self._attrs) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 6f6287efcac..e7e091efa4c 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4432,6 +4432,48 @@ def ds(request): 'y': range(2)}) +@pytest.mark.parametrize('dask', [True, False]) +@pytest.mark.parametrize(('boundary', 'side'), [ + ('trim', 'left'), ('pad', 'right')]) +def test_coarsen(ds, dask, boundary, side): + if dask and has_dask: + ds = ds.chunk({'x': 4}) + + actual = ds.coarsen(time=2, x=3, boundary=boundary, side=side).max() + assert_equal( + actual['z1'], + ds['z1'].coarsen(time=2, x=3, boundary=boundary, side=side).max()) + # coordinate should be mean by default + assert_equal(actual['time'], ds['time'].coarsen( + time=2, x=3, boundary=boundary, side=side).mean()) + + +@pytest.mark.parametrize('dask', [True, False]) +def test_coarsen_coords(ds, dask): + if dask and has_dask: + ds = ds.chunk({'x': 4}) + + # check if coord_func works + actual = ds.coarsen(time=2, x=3, boundary='trim', + coord_func={'time': 'max'}).max() + assert_equal(actual['z1'], + ds['z1'].coarsen(time=2, x=3, boundary='trim').max()) + assert_equal(actual['time'], + ds['time'].coarsen(time=2, x=3, boundary='trim').max()) + + # raise if exact + with pytest.raises(ValueError): + ds.coarsen(x=3).mean() + # should be no error + ds.isel(x=slice(0, 3 * (len(ds['x']) // 3))).coarsen(x=3).mean() + + # working test with pd.time + da = xr.DataArray( + np.linspace(0, 365, num=364), dims='time', + coords={'time': pd.date_range('15/12/1999', periods=364)}) + actual = da.coarsen(time=2).mean() + + def test_rolling_properties(ds): # catching invalid args with pytest.raises(ValueError) as exception: diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 5ea5b3d2a42..2a6a957e10f 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -251,6 +251,26 @@ def assert_dask_array(da, dask): assert isinstance(da.data, dask_array_type) +@pytest.mark.parametrize('dask', [False, True]) +def test_datetime_reduce(dask): + time = np.array(pd.date_range('15/12/1999', periods=11)) + time[8: 11] = np.nan + da = DataArray( + np.linspace(0, 365, num=11), dims='time', coords={'time': time}) + + if dask and has_dask: + chunks = {'time': 5} + da = da.chunk(chunks) + + actual = da['time'].mean() + assert not pd.isnull(actual) + actual = da['time'].mean(skipna=False) + assert pd.isnull(actual) + + # test for a 0d array + assert da['time'][0].mean() == da['time'][:1].mean() + + @pytest.mark.parametrize('dim_num', [1, 2]) @pytest.mark.parametrize('dtype', [float, int, np.float32, np.bool_]) @pytest.mark.parametrize('dask', [False, True]) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 08cab4b3541..6dd50e11fd3 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1684,6 +1684,58 @@ def assert_assigned_2d(array, key_x, key_y, values): expected = Variable(['x', 'y'], [[2, 3], [3, 4], [4, 5]]) assert_identical(v, expected) + def test_coarsen(self): + v = self.cls(['x'], [0, 1, 2, 3, 4]) + actual = v.coarsen({'x': 2}, boundary='pad', func='mean') + expected = self.cls(['x'], [0.5, 2.5, 4]) + assert_identical(actual, expected) + + actual = v.coarsen({'x': 2}, func='mean', boundary='pad', + side='right') + expected = self.cls(['x'], [0, 1.5, 3.5]) + assert_identical(actual, expected) + + actual = v.coarsen({'x': 2}, func=np.mean, side='right', + boundary='trim') + expected = self.cls(['x'], [1.5, 3.5]) + assert_identical(actual, expected) + + # working test + v = self.cls(['x', 'y', 'z'], + np.arange(40 * 30 * 2).reshape(40, 30, 2)) + for windows, func, side, boundary in [ + ({'x': 2}, np.mean, 'left', 'trim'), + ({'x': 2}, np.median, {'x': 'left'}, 'pad'), + ({'x': 2, 'y': 3}, np.max, 'left', {'x': 'pad', 'y': 'trim'})]: + v.coarsen(windows, func, boundary, side) + + def test_coarsen_2d(self): + # 2d-mean should be the same with the successive 1d-mean + v = self.cls(['x', 'y'], np.arange(6 * 12).reshape(6, 12)) + actual = v.coarsen({'x': 3, 'y': 4}, func='mean') + expected = v.coarsen({'x': 3}, func='mean').coarsen( + {'y': 4}, func='mean') + assert_equal(actual, expected) + + v = self.cls(['x', 'y'], np.arange(7 * 12).reshape(7, 12)) + actual = v.coarsen({'x': 3, 'y': 4}, func='mean', boundary='trim') + expected = v.coarsen({'x': 3}, func='mean', boundary='trim').coarsen( + {'y': 4}, func='mean', boundary='trim') + assert_equal(actual, expected) + + # if there is nan, the two should be different + v = self.cls(['x', 'y'], 1.0 * np.arange(6 * 12).reshape(6, 12)) + v[2, 4] = np.nan + v[3, 5] = np.nan + actual = v.coarsen({'x': 3, 'y': 4}, func='mean', boundary='trim') + expected = v.coarsen({'x': 3}, func='sum', boundary='trim').coarsen( + {'y': 4}, func='sum', boundary='trim') / 12 + assert not actual.equals(expected) + # adjusting the nan count + expected[0, 1] *= 12 / 11 + expected[1, 1] *= 12 / 11 + assert_allclose(actual, expected) + @requires_dask class TestVariableWithDask(VariableSubclassobjects): @@ -1838,6 +1890,10 @@ def test_pad(self): def test_rolling_window(self): super(TestIndexVariable, self).test_rolling_window() + @pytest.mark.xfail + def test_coarsen_2d(self): + super(TestIndexVariable, self).test_coarsen_2d() + class TestAsCompatibleData(object): def test_unchanged_types(self):