diff --git a/ci/requirements-py37.yml b/ci/requirements-py37.yml index fe5afd589c8..723ad24d24d 100644 --- a/ci/requirements-py37.yml +++ b/ci/requirements-py37.yml @@ -30,3 +30,4 @@ dependencies: - pydap - pip: - mypy==0.650 + - numbagg diff --git a/doc/api.rst b/doc/api.rst index 33c8d9d3ceb..811e3241438 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -148,6 +148,7 @@ Computation Dataset.groupby Dataset.groupby_bins Dataset.rolling + Dataset.rolling_exp Dataset.coarsen Dataset.resample Dataset.diff @@ -315,6 +316,7 @@ Computation DataArray.groupby DataArray.groupby_bins DataArray.rolling + DataArray.rolling_exp DataArray.coarsen DataArray.dt DataArray.resample @@ -535,6 +537,7 @@ Rolling objects core.rolling.DatasetRolling core.rolling.DatasetRolling.construct core.rolling.DatasetRolling.reduce + core.rolling_exp.RollingExp Resample objects ================ diff --git a/doc/computation.rst b/doc/computation.rst index 3100925a7d3..b06d7959504 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -190,6 +190,22 @@ We can also manually iterate through ``Rolling`` objects: for label, arr_window in r: # arr_window is a view of x +.. _comput.rolling_exp: + +While ``rolling`` provides a simple moving average, ``DataArray`` also supports +an exponential moving average with :py:meth:`~xarray.DataArray.rolling_exp`. +This is similiar to pandas' ``ewm`` method. numbagg_ is required. + +.. _numbagg: https://github.com/shoyer/numbagg + +.. code:: python + + arr.rolling_exp(y=3).mean() + +The ``rolling_exp`` method takes a ``window_type`` kwarg, which can be ``'alpha'``, +``'com'`` (for ``center-of-mass``), ``'span'``, and ``'halflife'``. The default is +``span``. + Finally, the rolling object has a ``construct`` method which returns a view of the original ``DataArray`` with the windowed dimension in the last position. diff --git a/doc/installing.rst b/doc/installing.rst index f624da18611..b9d1b4d0ba4 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -45,6 +45,8 @@ For accelerating xarray - `bottleneck `__: speeds up NaN-skipping and rolling window aggregations by a large factor (1.1 or later) +- `numbagg `_: for exponential rolling + window operations For parallel computing ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/whats-new.rst b/doc/whats-new.rst index ca50856a25e..c5b9a01a09e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,6 +30,11 @@ Enhancements - Add ``fill_value`` argument for reindex, align, and merge operations to enable custom fill values. (:issue:`2876`) By `Zach Griffith `_. +- :py:meth:`~xarray.DataArray.rolling_exp` and + :py:meth:`~xarray.Dataset.rolling_exp` added, similar to pandas' + ``pd.DataFrame.ewm`` method. Calling ``.mean`` on the resulting object + will return an exponentially weighted moving average. + By `Maximilian Roos `_. - Character arrays' character dimension name decoding and encoding handled by ``var.encoding['char_dim_name']`` (:issue:`2895`) By `James McCreight `_. @@ -188,6 +193,7 @@ Other enhancements - Upsampling an array via interpolation with resample is now dask-compatible, as long as the array is not chunked along the resampling dimension. By `Spencer Clark `_. + - :py:func:`xarray.testing.assert_equal` and :py:func:`xarray.testing.assert_identical` now provide a more detailed report showing what exactly differs between the two objects (dimensions / @@ -737,7 +743,7 @@ Enhancements arguments in ``data_vars`` to indexes set explicitly in ``coords``, where previously an error would be raised. (:issue:`674`) - By `Maximilian Roos `_. + By `Maximilian Roos `_. - :py:meth:`~DataArray.sel`, :py:meth:`~DataArray.isel` & :py:meth:`~DataArray.reindex`, (and their :py:class:`Dataset` counterparts) now support supplying a ``dict`` @@ -745,12 +751,12 @@ Enhancements of supplying `kwargs`. This allows for more robust behavior of dimension names which conflict with other keyword names, or are not strings. - By `Maximilian Roos `_. + By `Maximilian Roos `_. - :py:meth:`~DataArray.rename` now supports supplying ``**kwargs``, as an alternative to the existing approach of supplying a ``dict`` as the first argument. - By `Maximilian Roos `_. + By `Maximilian Roos `_. - :py:meth:`~DataArray.cumsum` and :py:meth:`~DataArray.cumprod` now support aggregation over multiple dimensions at the same time. This is the default @@ -915,7 +921,7 @@ Enhancements which test each value in the array for whether it is contained in the supplied list, returning a bool array. See :ref:`selecting values with isin` for full details. Similar to the ``np.isin`` function. - By `Maximilian Roos `_. + By `Maximilian Roos `_. - Some speed improvement to construct :py:class:`~xarray.DataArrayRolling` object (:issue:`1993`) By `Keisuke Fujii `_. @@ -2110,7 +2116,7 @@ Enhancements ~~~~~~~~~~~~ - New documentation on :ref:`panel transition`. By - `Maximilian Roos `_. + `Maximilian Roos `_. - New ``Dataset`` and ``DataArray`` methods :py:meth:`~xarray.Dataset.to_dict` and :py:meth:`~xarray.Dataset.from_dict` to allow easy conversion between dictionaries and xarray objects (:issue:`432`). See @@ -2131,9 +2137,9 @@ Bug fixes (:issue:`953`). By `Stephan Hoyer `_. - ``Dataset.__dir__()`` (i.e. the method python calls to get autocomplete options) failed if one of the dataset's keys was not a string (:issue:`852`). - By `Maximilian Roos `_. + By `Maximilian Roos `_. - ``Dataset`` constructor can now take arbitrary objects as values - (:issue:`647`). By `Maximilian Roos `_. + (:issue:`647`). By `Maximilian Roos `_. - Clarified ``copy`` argument for :py:meth:`~xarray.DataArray.reindex` and :py:func:`~xarray.align`, which now consistently always return new xarray objects (:issue:`927`). diff --git a/setup.cfg b/setup.cfg index cdfe2ec3e36..bfa49118d84 100644 --- a/setup.cfg +++ b/setup.cfg @@ -9,8 +9,11 @@ filterwarnings = ignore:Using a non-tuple sequence for multidimensional indexing is deprecated:FutureWarning env = UVCDAT_ANONYMOUS_LOG=no +markers = + flaky: flaky tests + network: tests requiring a network connection + slow: slow tests -# This should be kept in sync with .pep8speaks.yml [flake8] max-line-length=79 ignore= @@ -23,10 +26,6 @@ ignore= F401 exclude= doc -markers = - flaky: flaky tests - network: tests requiring a network connection - slow: slow tests [isort] default_section=THIRDPARTY @@ -62,6 +61,8 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-nc_time_axis.*] ignore_missing_imports = True +[mypy-numbagg.*] +ignore_missing_imports = True [mypy-numpy.*] ignore_missing_imports = True [mypy-netCDF4.*] diff --git a/xarray/core/common.py b/xarray/core/common.py index 4e5133fd8c6..0195be62500 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -12,6 +12,7 @@ from .arithmetic import SupportsArithmetic from .options import _get_keep_attrs from .pycompat import dask_array_type +from .rolling_exp import RollingExp from .utils import Frozen, ReprObject, SortedKeysDict, either_dict_or_kwargs # Used as a sentinel value to indicate a all dimensions @@ -86,6 +87,7 @@ def wrapped_func(self, dim=None, **kwargs): # type: ignore class AbstractArray(ImplementsArrayReduce): """Shared base class for DataArray and Variable. """ + def __bool__(self: Any) -> bool: return bool(self.values) @@ -249,6 +251,8 @@ def get_squeeze_dims(xarray_obj, class DataWithCoords(SupportsArithmetic, AttrAccessMixin): """Shared base class for Dataset and DataArray.""" + _rolling_exp_cls = RollingExp + def squeeze(self, dim: Union[Hashable, Iterable[Hashable], None] = None, drop: bool = False, axis: Union[int, Iterable[int], None] = None): @@ -553,7 +557,7 @@ def groupby_bins(self, group, bins, right: bool = True, labels=None, def rolling(self, dim: Optional[Mapping[Hashable, int]] = None, min_periods: Optional[int] = None, center: bool = False, - **dim_kwargs: int): + **window_kwargs: int): """ Rolling window object. @@ -568,9 +572,9 @@ def rolling(self, dim: Optional[Mapping[Hashable, int]] = None, setting min_periods equal to the size of the window. center : boolean, default False Set the labels at the center of the window. - **dim_kwargs : optional + **window_kwargs : optional The keyword arguments form of ``dim``. - One of dim or dim_kwargs must be provided. + One of dim or window_kwargs must be provided. Returns ------- @@ -609,15 +613,54 @@ def rolling(self, dim: Optional[Mapping[Hashable, int]] = None, core.rolling.DataArrayRolling core.rolling.DatasetRolling """ # noqa - dim = either_dict_or_kwargs(dim, dim_kwargs, 'rolling') + dim = either_dict_or_kwargs(dim, window_kwargs, 'rolling') return self._rolling_cls(self, dim, min_periods=min_periods, center=center) + def rolling_exp( + self, + window: Optional[Mapping[Hashable, int]] = None, + window_type: str = 'span', + **window_kwargs + ): + """ + Exponentially-weighted moving window. + Similar to EWM in pandas + + Requires the optional Numbagg dependency. + + Parameters + ---------- + window : A single mapping from a dimension name to window value, + optional + dim : str + Name of the dimension to create the rolling exponential window + along (e.g., `time`). + window : int + Size of the moving window. The type of this is specified in + `window_type` + window_type : str, one of ['span', 'com', 'halflife', 'alpha'], + default 'span' + The format of the previously supplied window. Each is a simple + numerical transformation of the others. Described in detail: + https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.ewm.html + **window_kwargs : optional + The keyword arguments form of ``window``. + One of window or window_kwargs must be provided. + + See Also + -------- + core.rolling_exp.RollingExp + """ + window = either_dict_or_kwargs(window, window_kwargs, 'rolling_exp') + + return self._rolling_exp_cls(self, window, window_type) + def coarsen(self, dim: Optional[Mapping[Hashable, int]] = None, boundary: str = 'exact', side: Union[str, Mapping[Hashable, str]] = 'left', coord_func: str = 'mean', - **dim_kwargs: int): + **window_kwargs: int): """ Coarsen object. @@ -671,7 +714,7 @@ def coarsen(self, dim: Optional[Mapping[Hashable, int]] = None, core.rolling.DataArrayCoarsen core.rolling.DatasetCoarsen """ - dim = either_dict_or_kwargs(dim, dim_kwargs, 'coarsen') + dim = either_dict_or_kwargs(dim, window_kwargs, 'coarsen') return self._coarsen_cls( self, dim, boundary=boundary, side=side, coord_func=coord_func) diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py new file mode 100644 index 00000000000..ff6baef5c3a --- /dev/null +++ b/xarray/core/rolling_exp.py @@ -0,0 +1,106 @@ +import numpy as np + +from .pycompat import dask_array_type + + +def _get_alpha(com=None, span=None, halflife=None, alpha=None): + # pandas defines in terms of com (converting to alpha in the algo) + # so use its function to get a com and then convert to alpha + + com = _get_center_of_mass(com, span, halflife, alpha) + return 1 / (1 + com) + + +def move_exp_nanmean(array, *, axis, alpha): + if isinstance(array, dask_array_type): + raise TypeError("rolling_exp is not currently support for dask arrays") + import numbagg + if axis == (): + return array.astype(np.float64) + else: + return numbagg.move_exp_nanmean( + array, axis=axis, alpha=alpha) + + +def _get_center_of_mass(comass, span, halflife, alpha): + """ + Vendored from pandas.core.window._get_center_of_mass + + See licenses/PANDAS_LICENSE for the function's license + """ + from pandas.core import common as com + valid_count = com.count_not_none(comass, span, halflife, alpha) + if valid_count > 1: + raise ValueError("comass, span, halflife, and alpha " + "are mutually exclusive") + + # Convert to center of mass; domain checks ensure 0 < alpha <= 1 + if comass is not None: + if comass < 0: + raise ValueError("comass must satisfy: comass >= 0") + elif span is not None: + if span < 1: + raise ValueError("span must satisfy: span >= 1") + comass = (span - 1) / 2. + elif halflife is not None: + if halflife <= 0: + raise ValueError("halflife must satisfy: halflife > 0") + decay = 1 - np.exp(np.log(0.5) / halflife) + comass = 1 / decay - 1 + elif alpha is not None: + if alpha <= 0 or alpha > 1: + raise ValueError("alpha must satisfy: 0 < alpha <= 1") + comass = (1.0 - alpha) / alpha + else: + raise ValueError("Must pass one of comass, span, halflife, or alpha") + + return float(comass) + + +class RollingExp: + """ + Exponentially-weighted moving window object. + Similar to EWM in pandas + + Parameters + ---------- + obj : Dataset or DataArray + Object to window. + windows : A single mapping from a single dimension name to window value + dim : str + Name of the dimension to create the rolling exponential window + along (e.g., `time`). + window : int + Size of the moving window. The type of this is specified in + `window_type` + window_type : str, one of ['span', 'com', 'halflife', 'alpha'], default 'span' + The format of the previously supplied window. Each is a simple + numerical transformation of the others. Described in detail: + https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.ewm.html + + Returns + ------- + RollingExp : type of input argument + """ # noqa + + def __init__(self, obj, windows, window_type='span'): + self.obj = obj + dim, window = next(iter(windows.items())) + self.dim = dim + self.alpha = _get_alpha(**{window_type: window}) + + def mean(self): + """ + Exponentially weighted moving average + + Examples + -------- + >>> da = xr.DataArray([1,1,2,2,2], dims='x') + >>> da.rolling_exp(x=2, window_type='span').mean() + + array([1. , 1. , 1.692308, 1.9 , 1.966942]) + Dimensions without coordinates: x + """ + + return self.obj.reduce( + move_exp_nanmean, dim=self.dim, alpha=self.alpha) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index dc8b26e4524..81bb1a1e18d 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -74,6 +74,7 @@ def LooseVersion(vstring): has_np113, requires_np113 = _importorskip('numpy', minversion='1.13.0') has_iris, requires_iris = _importorskip('iris') has_cfgrib, requires_cfgrib = _importorskip('cfgrib') +has_numbagg, requires_numbagg = _importorskip('numbagg') # some special cases has_h5netcdf07, requires_h5netcdf07 = _importorskip('h5netcdf', diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index a8825055479..beb77f343fa 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -20,7 +20,7 @@ LooseVersion, ReturnItem, assert_allclose, assert_array_equal, assert_equal, assert_identical, raises_regex, requires_bottleneck, requires_cftime, requires_dask, requires_iris, requires_np113, - requires_scipy, source_ndarray) + requires_numbagg, requires_scipy, source_ndarray) class TestDataArray: @@ -3919,14 +3919,14 @@ def test_to_and_from_iris(self): assert coord.var_name == original_coord.name assert_array_equal( coord.points, CFDatetimeCoder().encode(original_coord).values) - assert (actual.coord_dims(coord) == - original.get_axis_num( + assert (actual.coord_dims(coord) + == original.get_axis_num( original.coords[coord.var_name].dims)) - assert (actual.coord('distance2').attributes['foo'] == - original.coords['distance2'].attrs['foo']) - assert (actual.coord('distance').units == - cf_units.Unit(original.coords['distance'].units)) + assert (actual.coord('distance2').attributes['foo'] + == original.coords['distance2'].attrs['foo']) + assert (actual.coord('distance').units + == cf_units.Unit(original.coords['distance'].units)) assert actual.attributes['baz'] == original.attrs['baz'] assert actual.standard_name == original.attrs['standard_name'] @@ -3984,14 +3984,14 @@ def test_to_and_from_iris_dask(self): assert coord.var_name == original_coord.name assert_array_equal( coord.points, CFDatetimeCoder().encode(original_coord).values) - assert (actual.coord_dims(coord) == - original.get_axis_num( + assert (actual.coord_dims(coord) + == original.get_axis_num( original.coords[coord.var_name].dims)) assert (actual.coord('distance2').attributes['foo'] == original.coords[ 'distance2'].attrs['foo']) - assert (actual.coord('distance').units == - cf_units.Unit(original.coords['distance'].units)) + assert (actual.coord('distance').units + == cf_units.Unit(original.coords['distance'].units)) assert actual.attributes['baz'] == original.attrs['baz'] assert actual.standard_name == original.attrs['standard_name'] @@ -4087,3 +4087,30 @@ def test_fallback_to_iris_AuxCoord(self, coord_values): expected = Cube(data, aux_coords_and_dims=[ (AuxCoord(coord_values, var_name='space'), 0)]) assert result == expected + + +@requires_numbagg +@pytest.mark.parametrize('dim', ['time', 'x']) +@pytest.mark.parametrize('window_type, window', [ + ['span', 5], + ['alpha', 0.5], + ['com', 0.5], + ['halflife', 5], +]) +def test_rolling_exp(da, dim, window_type, window): + da = da.isel(a=0) + da = da.where(da > 0.2) + + result = da.rolling_exp(window_type=window_type, **{dim: window}).mean() + assert isinstance(result, DataArray) + + pandas_array = da.to_pandas() + assert pandas_array.index.name == 'time' + if dim == 'x': + pandas_array = pandas_array.T + expected = ( + xr.DataArray(pandas_array.ewm(**{window_type: window}).mean()) + .transpose(*da.dims) + ) + + assert_allclose(expected.variable, result.variable) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 8cd129e35de..afbdd51b464 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -23,7 +23,7 @@ InaccessibleArray, UnexpectedDataAccess, assert_allclose, assert_array_equal, assert_equal, assert_identical, has_cftime, has_dask, raises_regex, requires_bottleneck, requires_cftime, requires_dask, - requires_scipy, source_ndarray) + requires_numbagg, requires_scipy, source_ndarray) try: import dask.array as da @@ -4809,6 +4809,13 @@ def test_rolling_wrapped_bottleneck(ds, name, center, min_periods, key): assert_equal(actual, ds['time']) +@requires_numbagg +def test_rolling_exp(ds): + + result = ds.rolling_exp(time=10, window_type='span').mean() + assert isinstance(result, Dataset) + + @pytest.mark.parametrize('center', (True, False)) @pytest.mark.parametrize('min_periods', (None, 1, 2, 3)) @pytest.mark.parametrize('window', (1, 2, 3, 4)) diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py index 50389df85cb..c34faa7487b 100755 --- a/xarray/util/print_versions.py +++ b/xarray/util/print_versions.py @@ -108,6 +108,7 @@ def show_versions(as_json=False): ("matplotlib", lambda mod: mod.__version__), ("cartopy", lambda mod: mod.__version__), ("seaborn", lambda mod: mod.__version__), + ("numbagg", lambda mod: mod.__version__), # xarray setup/test ("setuptools", lambda mod: mod.__version__), ("pip", lambda mod: mod.__version__),