Skip to content
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ Computation
Dataset.resample
Dataset.diff
Dataset.quantile
Dataset.differentiate

**Aggregation**:
:py:attr:`~Dataset.all`
Expand Down Expand Up @@ -317,6 +318,7 @@ Computation
DataArray.diff
DataArray.dot
DataArray.quantile
DataArray.differentiate

**Aggregation**:
:py:attr:`~DataArray.all`
Expand Down
20 changes: 20 additions & 0 deletions doc/computation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,26 @@ You can also use ``construct`` to compute a weighted rolling sum:
To avoid this, use ``skipna=False`` as the above example.


Computation using Coordinates
=============================

Xarray objects have some handy methods for the computation with their
coordinates. :py:meth:`~xarray.DataArray.differentiate` computes derivatives by
central finite differences using their coordinates,

.. ipython:: python
a = xr.DataArray([0, 1, 2, 3], dims=['x'], coords=[0.1, 0.11, 0.2, 0.3])
a
a.differentiate('x')

This method can be used also for multidimensional arrays,

.. ipython:: python
a = xr.DataArray(np.arange(8).reshape(4, 2), dims=['x', 'y'],
coords=[0.1, 0.11, 0.2, 0.3])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This example raises an error when you run it -- I think you need to supply two dimension names.

a.differentiate('x')


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a place where we could mention the limitations of differentiate and gradient for non-cartesian geometry.

.. _compute.broadcasting:

Broadcasting by dimension name
Expand Down
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ Documentation
Enhancements
~~~~~~~~~~~~

- :py:meth:`~xarray.DataArray.differentiate` and
:py:meth:`~xarray.Dataset.differentiate` are newly added.
(:issue:`1332`)
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
- Default colormap for sequential and divergent data can now be set via
:py:func:`~xarray.set_options()`
(:issue:`2394`)
Expand Down
2 changes: 1 addition & 1 deletion xarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from .core.alignment import align, broadcast, broadcast_arrays
from .core.common import full_like, zeros_like, ones_like
from .core.combine import concat, auto_combine
from .core.computation import apply_ufunc, where, dot
from .core.computation import apply_ufunc, dot, where
from .core.extensions import (register_dataarray_accessor,
register_dataset_accessor)
from .core.variable import as_variable, Variable, IndexVariable, Coordinate
Expand Down
124 changes: 124 additions & 0 deletions xarray/core/dask_array_compat.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import absolute_import, division, print_function

from distutils.version import LooseVersion

import numpy as np
from dask import __version__ as dask_version
import dask.array as da

try:
Expand Down Expand Up @@ -30,3 +33,124 @@ def isin(element, test_elements, assume_unique=False, invert=False):
if invert:
result = ~result
return result


if LooseVersion(dask_version) > LooseVersion('1.19.2'):
gradient = da.gradient

else: # pragma: no cover
# Copied from dask v0.19.2
# Used under the terms of Dask's license, see licenses/DASK_LICENSE.
import math
from numbers import Integral, Real

AxisError = np.AxisError

def validate_axis(axis, ndim):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

E303 too many blank lines (2)

""" Validate an input to axis= keywords """
if isinstance(axis, (tuple, list)):
return tuple(validate_axis(ax, ndim) for ax in axis)
if not isinstance(axis, Integral):
raise TypeError("Axis value must be an integer, got %s" % axis)
if axis < -ndim or axis >= ndim:
raise AxisError("Axis %d is out of bounds for array of dimension "
"%d" % (axis, ndim))
if axis < 0:
axis += ndim
return axis

def _gradient_kernel(x, block_id, coord, axis, array_locs, grad_kwargs):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

E303 too many blank lines (2)

"""
x: nd-array
array of one block
coord: 1d-array or scalar
coordinate along which the gradient is computed.
axis: int
axis along which the gradient is computed
array_locs:
actual location along axis. None if coordinate is scalar
grad_kwargs:
keyword to be passed to np.gradient
"""
block_loc = block_id[axis]
if array_locs is not None:
coord = coord[array_locs[0][block_loc]:array_locs[1][block_loc]]
grad = np.gradient(x, coord, axis=axis, **grad_kwargs)
return grad

def gradient(f, *varargs, **kwargs):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

E303 too many blank lines (2)

f = da.asarray(f)

kwargs["edge_order"] = math.ceil(kwargs.get("edge_order", 1))
if kwargs["edge_order"] > 2:
raise ValueError("edge_order must be less than or equal to 2.")

drop_result_list = False
axis = kwargs.pop("axis", None)
if axis is None:
axis = tuple(range(f.ndim))
elif isinstance(axis, Integral):
drop_result_list = True
axis = (axis,)

axis = validate_axis(axis, f.ndim)

if len(axis) != len(set(axis)):
raise ValueError("duplicate axes not allowed")

axis = tuple(ax % f.ndim for ax in axis)

if varargs == ():
varargs = (1,)
if len(varargs) == 1:
varargs = len(axis) * varargs
if len(varargs) != len(axis):
raise TypeError(
"Spacing must either be a single scalar, or a scalar / "
"1d-array per axis"
)

if issubclass(f.dtype.type, (np.bool8, Integral)):
f = f.astype(float)
elif issubclass(f.dtype.type, Real) and f.dtype.itemsize < 4:
f = f.astype(float)

results = []
for i, ax in enumerate(axis):
for c in f.chunks[ax]:
if np.min(c) < kwargs["edge_order"] + 1:
raise ValueError(
'Chunk size must be larger than edge_order + 1. '
'Minimum chunk for aixs {} is {}. Rechunk to '
'proceed.'.format(np.min(c), ax))

if np.isscalar(varargs[i]):
array_locs = None
else:
if isinstance(varargs[i], da.Array):
raise NotImplementedError(
'dask array coordinated is not supported.')
# coordinate position for each block taking overlap into
# account
chunk = np.array(f.chunks[ax])
array_loc_stop = np.cumsum(chunk) + 1
array_loc_start = array_loc_stop - chunk - 2
array_loc_stop[-1] -= 1
array_loc_start[0] = 0
array_locs = (array_loc_start, array_loc_stop)

results.append(f.map_overlap(
_gradient_kernel,
dtype=f.dtype,
depth={j: 1 if j == ax else 0 for j in range(f.ndim)},
boundary="none",
coord=varargs[i],
axis=ax,
array_locs=array_locs,
grad_kwargs=kwargs,
))

if drop_result_list:
results = results[0]

return results
54 changes: 54 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2010,6 +2010,9 @@ def diff(self, dim, n=1, label='upper'):
Coordinates:
* x (x) int64 3 4

See Also
--------
DataArray.differentiate
"""
ds = self._to_temp_dataset().diff(n=n, dim=dim, label=label)
return self._from_temp_dataset(ds)
Expand Down Expand Up @@ -2289,6 +2292,57 @@ def rank(self, dim, pct=False, keep_attrs=False):
ds = self._to_temp_dataset().rank(dim, pct=pct, keep_attrs=keep_attrs)
return self._from_temp_dataset(ds)

def differentiate(self, coord, edge_order=1, datetime_unit=None):
""" Differentiate the array with the second order accurate central
differences.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and here


Parameters
----------
coord: str
The coordinate to be used to compute the gradient.
edge_order: 1 or 2. Default 1
N-th order accurate differences at the boundaries.
datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
'us', 'ns', 'ps', 'fs', 'as'}
Unit to compute gradient. Only valid for datetime coordinate.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

needs time_unit here

Returns
-------
differentiated: DataArray

See also
--------
numpy.gradient: corresponding numpy function

Examples
--------

>>> da = xr.DataArray(np.arange(12).reshape(4, 3), dims=['x', 'y'],
... coords={'x': [0, 0.1, 1.1, 1.2]})
>>> da
<xarray.DataArray (x: 4, y: 3)>
array([[ 0, 1, 2],
[ 3, 4, 5],
[ 6, 7, 8],
[ 9, 10, 11]])
Coordinates:
* x (x) float64 0.0 0.1 1.1 1.2
Dimensions without coordinates: y
>>>
>>> da.differentiate('x')
<xarray.DataArray (x: 4, y: 3)>
array([[30. , 30. , 30. ],
[27.545455, 27.545455, 27.545455],
[27.545455, 27.545455, 27.545455],
[30. , 30. , 30. ]])
Coordinates:
* x (x) float64 0.0 0.1 1.1 1.2
Dimensions without coordinates: y
"""
ds = self._to_temp_dataset().differentiate(
coord, edge_order, datetime_unit)
return self._from_temp_dataset(ds)


# priority most be higher than Variable to properly work with binary ufuncs
ops.inject_all_ops_and_reduce_methods(DataArray, priority=60)
63 changes: 59 additions & 4 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
import xarray as xr

from . import (
alignment, duck_array_ops, formatting, groupby, indexing, ops, resample,
rolling, utils)
alignment, computation, duck_array_ops, formatting, groupby, indexing, ops,
resample, rolling, utils)
from .. import conventions
from .alignment import align
from .common import (
Expand All @@ -31,7 +31,7 @@
OrderedDict, basestring, dask_array_type, integer_types, iteritems, range)
from .utils import (
Frozen, SortedKeysDict, either_dict_or_kwargs, decode_numpy_dict_values,
ensure_us_time_resolution, hashable, maybe_wrap_array)
ensure_us_time_resolution, hashable, maybe_wrap_array, to_numeric)
from .variable import IndexVariable, Variable, as_variable, broadcast_variables

# list of attributes of pd.DatetimeIndex that are ndarrays of time info
Expand Down Expand Up @@ -3313,6 +3313,9 @@ def diff(self, dim, n=1, label='upper'):
Data variables:
foo (x) int64 1 -1

See Also
--------
Dataset.differentiate
"""
if n == 0:
return self
Expand Down Expand Up @@ -3663,6 +3666,58 @@ def rank(self, dim, pct=False, keep_attrs=False):
attrs = self.attrs if keep_attrs else None
return self._replace_vars_and_dims(variables, coord_names, attrs=attrs)

def differentiate(self, coord, edge_order=1, datetime_unit=None):
""" Differentiate with the second order accurate central
differences.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and here


Parameters
----------
coord: str
The coordinate to be used to compute the gradient.
edge_order: 1 or 2. Default 1
N-th order accurate differences at the boundaries.
datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms',
'us', 'ns', 'ps', 'fs', 'as'}
Unit to compute gradient. Only valid for datetime coordinate.

Returns
-------
differentiated: Dataset

See also
--------
numpy.gradient: corresponding numpy function
"""
from .variable import Variable

if coord not in self.variables and coord not in self.dims:
raise ValueError('Coordinate {} does not exist.'.format(coord))

coord_var = self[coord].variable
if coord_var.ndim != 1:
raise ValueError('Coordinate {} must be 1 dimensional but is {}'
' dimensional'.format(coord, coord_var.ndim))

dim = coord_var.dims[0]
coord_data = coord_var.data
if coord_data.dtype.kind in 'mM':
if datetime_unit is None:
datetime_unit, _ = np.datetime_data(coord_data.dtype)
coord_data = to_numeric(coord_data, datetime_unit=datetime_unit)

variables = OrderedDict()
for k, v in self.variables.items():
if (k in self.data_vars and dim in v.dims and
k not in self.coords):
v = to_numeric(v, datetime_unit=datetime_unit)
grad = duck_array_ops.gradient(
v.data, coord_data, edge_order=edge_order,
axis=v.get_axis_num(dim))
variables[k] = Variable(v.dims, grad)
else:
variables[k] = v
return self._replace_vars_and_dims(variables)

@property
def real(self):
return self._unary_op(lambda x: x.real, keep_attrs=True)(self)
Expand All @@ -3676,7 +3731,7 @@ def filter_by_attrs(self, **kwargs):

Can pass in ``key=value`` or ``key=callable``. A Dataset is returned
containing only the variables for which all the filter tests pass.
These tests are either ``key=value`` for which the attribute ``key``
These tests are either ``key=value`` for which the attribute ``key``
has the exact value ``value`` or the callable passed into
``key=callable`` returns True. The callable will be passed a single
value, either the value of the attribute ``key`` or ``None`` if the
Expand Down
8 changes: 8 additions & 0 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,14 @@ def isnull(data):
einsum = _dask_or_eager_func('einsum', array_args=slice(1, None),
requires_dask='0.17.3')


def gradient(x, coord, axis, edge_order):
if isinstance(x, dask_array_type):
return dask_array_compat.gradient(
x, coord, axis=axis, edge_order=edge_order)
return npcompat.gradient(x, coord, axis=axis, edge_order=edge_order)


masked_invalid = _dask_or_eager_func(
'masked_invalid', eager_module=np.ma,
dask_module=getattr(dask_array, 'ma', None))
Expand Down
6 changes: 3 additions & 3 deletions xarray/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from . import rolling
from .computation import apply_ufunc
from .pycompat import iteritems
from .utils import is_scalar, OrderedSet
from .utils import is_scalar, OrderedSet, to_numeric
from .variable import Variable, broadcast_variables
from .duck_array_ops import dask_array_type

Expand Down Expand Up @@ -414,8 +414,8 @@ def _floatize_x(x, new_x):
# offset (min(x)) and the variation (x - min(x)) can be
# represented by float.
xmin = np.min(x[i])
x[i] = (x[i] - xmin).astype(np.float64)
new_x[i] = (new_x[i] - xmin).astype(np.float64)
x[i] = to_numeric(x[i], offset=xmin, dtype=np.float64)
new_x[i] = to_numeric(new_x[i], offset=xmin, dtype=np.float64)
return x, new_x


Expand Down
Loading