pydata · shoyer · Sep 21, 2018 · Sep 4, 2018 · Sep 4, 2018 · Sep 4, 2018
diff --git a/doc/api.rst b/doc/api.rst
@@ -150,6 +150,7 @@ Computation
    Dataset.resample
    Dataset.diff
    Dataset.quantile
+   Dataset.differentiate
 
 **Aggregation**:
 :py:attr:`~Dataset.all`
@@ -317,6 +318,7 @@ Computation
    DataArray.diff
    DataArray.dot
    DataArray.quantile
+   DataArray.differentiate
 
 **Aggregation**:
 :py:attr:`~DataArray.all`

diff --git a/doc/computation.rst b/doc/computation.rst
@@ -200,6 +200,26 @@ You can also use ``construct`` to compute a weighted rolling sum:
   To avoid this, use ``skipna=False`` as the above example.
 
 
+Computation using Coordinates
+=============================
+
+Xarray objects have some handy methods for the computation with their
+coordinates. :py:meth:`~xarray.DataArray.differentiate` computes derivatives by
+central finite differences using their coordinates,
+
+.. ipython:: python
+    a = xr.DataArray([0, 1, 2, 3], dims=['x'], coords=[0.1, 0.11, 0.2, 0.3])
+    a
+    a.differentiate('x')
+
+This method can be used also for multidimensional arrays,
+
+.. ipython:: python
+    a = xr.DataArray(np.arange(8).reshape(4, 2), dims=['x', 'y'],
+                     coords=[0.1, 0.11, 0.2, 0.3])
+    a.differentiate('x')
+
+
 .. _compute.broadcasting:
 
 Broadcasting by dimension name

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -36,6 +36,10 @@ Documentation
 Enhancements
 ~~~~~~~~~~~~
 
+- :py:meth:`~xarray.DataArray.differentiate` and
+  :py:meth:`~xarray.Dataset.differentiate` are newly added.
+  (:issue:`1332`)
+  By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - Default colormap for sequential and divergent data can now be set via
   :py:func:`~xarray.set_options()`
   (:issue:`2394`)

diff --git a/xarray/__init__.py b/xarray/__init__.py
@@ -10,7 +10,7 @@
 from .core.alignment import align, broadcast, broadcast_arrays
 from .core.common import full_like, zeros_like, ones_like
 from .core.combine import concat, auto_combine
-from .core.computation import apply_ufunc, where, dot
+from .core.computation import apply_ufunc, dot, where
 from .core.extensions import (register_dataarray_accessor,
                               register_dataset_accessor)
 from .core.variable import as_variable, Variable, IndexVariable, Coordinate

diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py
@@ -1,6 +1,9 @@
 from __future__ import absolute_import, division, print_function
 
+from distutils.version import LooseVersion
+
 import numpy as np
+from dask import __version__ as dask_version
 import dask.array as da
 
 try:
@@ -30,3 +33,124 @@ def isin(element, test_elements, assume_unique=False, invert=False):
         if invert:
             result = ~result
         return result
+
+
+if LooseVersion(dask_version) > LooseVersion('1.19.2'):
+    gradient = da.gradient
+
+else:  # pragma: no cover
+    # Copied from dask v0.19.2
+    # Used under the terms of Dask's license, see licenses/DASK_LICENSE.
+    import math
+    from numbers import Integral, Real
+
+    AxisError = np.AxisError
+
+    def validate_axis(axis, ndim):
+        """ Validate an input to axis= keywords """
+        if isinstance(axis, (tuple, list)):
+            return tuple(validate_axis(ax, ndim) for ax in axis)
+        if not isinstance(axis, Integral):
+            raise TypeError("Axis value must be an integer, got %s" % axis)
+        if axis < -ndim or axis >= ndim:
+            raise AxisError("Axis %d is out of bounds for array of dimension "
+                            "%d" % (axis, ndim))
+        if axis < 0:
+            axis += ndim
+        return axis
+
+    def _gradient_kernel(x, block_id, coord, axis, array_locs, grad_kwargs):
+        """
+        x: nd-array
+            array of one block
+        coord: 1d-array or scalar
+            coordinate along which the gradient is computed.
+        axis: int
+            axis along which the gradient is computed
+        array_locs:
+            actual location along axis. None if coordinate is scalar
+        grad_kwargs:
+            keyword to be passed to np.gradient
+        """
+        block_loc = block_id[axis]
+        if array_locs is not None:
+            coord = coord[array_locs[0][block_loc]:array_locs[1][block_loc]]
+        grad = np.gradient(x, coord, axis=axis, **grad_kwargs)
+        return grad
+
+    def gradient(f, *varargs, **kwargs):
+        f = da.asarray(f)
+
+        kwargs["edge_order"] = math.ceil(kwargs.get("edge_order", 1))
+        if kwargs["edge_order"] > 2:
+            raise ValueError("edge_order must be less than or equal to 2.")
+
+        drop_result_list = False
+        axis = kwargs.pop("axis", None)
+        if axis is None:
+            axis = tuple(range(f.ndim))
+        elif isinstance(axis, Integral):
+            drop_result_list = True
+            axis = (axis,)
+
+        axis = validate_axis(axis, f.ndim)
+
+        if len(axis) != len(set(axis)):
+            raise ValueError("duplicate axes not allowed")
+
+        axis = tuple(ax % f.ndim for ax in axis)
+
+        if varargs == ():
+            varargs = (1,)
+        if len(varargs) == 1:
+            varargs = len(axis) * varargs
+        if len(varargs) != len(axis):
+            raise TypeError(
+                "Spacing must either be a single scalar, or a scalar / "
+                "1d-array per axis"
+            )
+
+        if issubclass(f.dtype.type, (np.bool8, Integral)):
+            f = f.astype(float)
+        elif issubclass(f.dtype.type, Real) and f.dtype.itemsize < 4:
+            f = f.astype(float)
+
+        results = []
+        for i, ax in enumerate(axis):
+            for c in f.chunks[ax]:
+                if np.min(c) < kwargs["edge_order"] + 1:
+                    raise ValueError(
+                        'Chunk size must be larger than edge_order + 1. '
+                        'Minimum chunk for aixs {} is {}. Rechunk to '
+                        'proceed.'.format(np.min(c), ax))
+
+            if np.isscalar(varargs[i]):
+                array_locs = None
+            else:
+                if isinstance(varargs[i], da.Array):
+                    raise NotImplementedError(
+                        'dask array coordinated is not supported.')
+                # coordinate position for each block taking overlap into
+                # account
+                chunk = np.array(f.chunks[ax])
+                array_loc_stop = np.cumsum(chunk) + 1
+                array_loc_start = array_loc_stop - chunk - 2
+                array_loc_stop[-1] -= 1
+                array_loc_start[0] = 0
+                array_locs = (array_loc_start, array_loc_stop)
+
+            results.append(f.map_overlap(
+                _gradient_kernel,
+                dtype=f.dtype,
+                depth={j: 1 if j == ax else 0 for j in range(f.ndim)},
+                boundary="none",
+                coord=varargs[i],
+                axis=ax,
+                array_locs=array_locs,
+                grad_kwargs=kwargs,
+            ))
+
+        if drop_result_list:
+            results = results[0]
+
+        return results
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -2010,6 +2010,9 @@ def diff(self, dim, n=1, label='upper'):
         Coordinates:
         * x        (x) int64 3 4
 
+        See Also
+        --------
+        DataArray.differentiate
         """
         ds = self._to_temp_dataset().diff(n=n, dim=dim, label=label)
         return self._from_temp_dataset(ds)
@@ -2289,6 +2292,57 @@ def rank(self, dim, pct=False, keep_attrs=False):
         ds = self._to_temp_dataset().rank(dim, pct=pct, keep_attrs=keep_attrs)
         return self._from_temp_dataset(ds)
 
+    def differentiate(self, coord, edge_order=1, datetime_unit=None):
+        """ Differentiate the array with the second order accurate central
+        differences.
+
+        Parameters
+        ----------
+        coord: str
+            The coordinate to be used to compute the gradient.
+        edge_order: 1 or 2. Default 1
+            N-th order accurate differences at the boundaries.
+        datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
+            'us', 'ns', 'ps', 'fs', 'as'}
+            Unit to compute gradient. Only valid for datetime coordinate.
+
+        Returns
+        -------
+        differentiated: DataArray
+
+        See also
+        --------
+        numpy.gradient: corresponding numpy function
+
+        Examples
+        --------
+
+        >>> da = xr.DataArray(np.arange(12).reshape(4, 3), dims=['x', 'y'],
+        ...                   coords={'x': [0, 0.1, 1.1, 1.2]})
+        >>> da
+        <xarray.DataArray (x: 4, y: 3)>
+        array([[ 0,  1,  2],
+               [ 3,  4,  5],
+               [ 6,  7,  8],
+               [ 9, 10, 11]])
+        Coordinates:
+          * x        (x) float64 0.0 0.1 1.1 1.2
+        Dimensions without coordinates: y
+        >>>
+        >>> da.differentiate('x')
+        <xarray.DataArray (x: 4, y: 3)>
+        array([[30.      , 30.      , 30.      ],
+               [27.545455, 27.545455, 27.545455],
+               [27.545455, 27.545455, 27.545455],
+               [30.      , 30.      , 30.      ]])
+        Coordinates:
+          * x        (x) float64 0.0 0.1 1.1 1.2
+        Dimensions without coordinates: y
+        """
+        ds = self._to_temp_dataset().differentiate(
+            coord, edge_order, datetime_unit)
+        return self._from_temp_dataset(ds)
+
 
 # priority most be higher than Variable to properly work with binary ufuncs
 ops.inject_all_ops_and_reduce_methods(DataArray, priority=60)
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -13,8 +13,8 @@
 import xarray as xr
 
 from . import (
-    alignment, duck_array_ops, formatting, groupby, indexing, ops, resample,
-    rolling, utils)
+    alignment, computation, duck_array_ops, formatting, groupby, indexing, ops,
+    resample, rolling, utils)
 from .. import conventions
 from .alignment import align
 from .common import (
@@ -31,7 +31,7 @@
     OrderedDict, basestring, dask_array_type, integer_types, iteritems, range)
 from .utils import (
     Frozen, SortedKeysDict, either_dict_or_kwargs, decode_numpy_dict_values,
-    ensure_us_time_resolution, hashable, maybe_wrap_array)
+    ensure_us_time_resolution, hashable, maybe_wrap_array, to_numeric)
 from .variable import IndexVariable, Variable, as_variable, broadcast_variables
 
 # list of attributes of pd.DatetimeIndex that are ndarrays of time info
@@ -3313,6 +3313,9 @@ def diff(self, dim, n=1, label='upper'):
         Data variables:
         foo      (x) int64 1 -1
 
+        See Also
+        --------
+        Dataset.differentiate
         """
         if n == 0:
             return self
@@ -3663,6 +3666,58 @@ def rank(self, dim, pct=False, keep_attrs=False):
         attrs = self.attrs if keep_attrs else None
         return self._replace_vars_and_dims(variables, coord_names, attrs=attrs)
 
+    def differentiate(self, coord, edge_order=1, datetime_unit=None):
+        """ Differentiate with the second order accurate central
+        differences.
+
+        Parameters
+        ----------
+        coord: str
+            The coordinate to be used to compute the gradient.
+        edge_order: 1 or 2. Default 1
+            N-th order accurate differences at the boundaries.
+        datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
+            'us', 'ns', 'ps', 'fs', 'as'}
+            Unit to compute gradient. Only valid for datetime coordinate.
+
+        Returns
+        -------
+        differentiated: Dataset
+
+        See also
+        --------
+        numpy.gradient: corresponding numpy function
+        """
+        from .variable import Variable
+
+        if coord not in self.variables and coord not in self.dims:
+            raise ValueError('Coordinate {} does not exist.'.format(coord))
+
+        coord_var = self[coord].variable
+        if coord_var.ndim != 1:
+            raise ValueError('Coordinate {} must be 1 dimensional but is {}'
+                             ' dimensional'.format(coord, coord_var.ndim))
+
+        dim = coord_var.dims[0]
+        coord_data = coord_var.data
+        if coord_data.dtype.kind in 'mM':
+            if datetime_unit is None:
+                datetime_unit, _ = np.datetime_data(coord_data.dtype)
+            coord_data = to_numeric(coord_data, datetime_unit=datetime_unit)
+
+        variables = OrderedDict()
+        for k, v in self.variables.items():
+            if (k in self.data_vars and dim in v.dims and
+                    k not in self.coords):
+                v = to_numeric(v, datetime_unit=datetime_unit)
+                grad = duck_array_ops.gradient(
+                    v.data, coord_data, edge_order=edge_order,
+                    axis=v.get_axis_num(dim))
+                variables[k] = Variable(v.dims, grad)
+            else:
+                variables[k] = v
+        return self._replace_vars_and_dims(variables)
+
     @property
     def real(self):
         return self._unary_op(lambda x: x.real, keep_attrs=True)(self)
@@ -3676,7 +3731,7 @@ def filter_by_attrs(self, **kwargs):
 
         Can pass in ``key=value`` or ``key=callable``.  A Dataset is returned
         containing only the variables for which all the filter tests pass.
-        These tests are either ``key=value`` for which the attribute ``key`` 
+        These tests are either ``key=value`` for which the attribute ``key``
         has the exact value ``value`` or the callable passed into
         ``key=callable`` returns True. The callable will be passed a single
         value, either the value of the attribute ``key`` or ``None`` if the

diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
@@ -93,6 +93,14 @@ def isnull(data):
 einsum = _dask_or_eager_func('einsum', array_args=slice(1, None),
                              requires_dask='0.17.3')
 
+
+def gradient(x, coord, axis, edge_order):
+    if isinstance(x, dask_array_type):
+        return dask_array_compat.gradient(
+            x, coord, axis=axis, edge_order=edge_order)
+    return npcompat.gradient(x, coord, axis=axis, edge_order=edge_order)
+
+
 masked_invalid = _dask_or_eager_func(
     'masked_invalid', eager_module=np.ma,
     dask_module=getattr(dask_array, 'ma', None))

diff --git a/xarray/core/missing.py b/xarray/core/missing.py
@@ -11,7 +11,7 @@
 from . import rolling
 from .computation import apply_ufunc
 from .pycompat import iteritems
-from .utils import is_scalar, OrderedSet
+from .utils import is_scalar, OrderedSet, to_numeric
 from .variable import Variable, broadcast_variables
 from .duck_array_ops import dask_array_type
 
@@ -414,8 +414,8 @@ def _floatize_x(x, new_x):
             # offset (min(x)) and the variation (x - min(x)) can be
             # represented by float.
             xmin = np.min(x[i])
-            x[i] = (x[i] - xmin).astype(np.float64)
-            new_x[i] = (new_x[i] - xmin).astype(np.float64)
+            x[i] = to_numeric(x[i], offset=xmin, dtype=np.float64)
+            new_x[i] = to_numeric(new_x[i], offset=xmin, dtype=np.float64)
     return x, new_x