-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
implement Gradient #2398
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
implement Gradient #2398
Changes from 26 commits
59ff688
0adfc68
d665b3c
e0fa5fd
218e62d
888b924
a0ab4c2
c581513
d6be041
a083460
267694d
bf2f35e
2a71b62
b504da8
fb356c5
1694d3c
7a0b57f
e93b926
4c656e0
74acc81
8817306
4112cd9
1c2c88c
cbfecb4
2e8db19
c31539e
528bcab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -200,6 +200,26 @@ You can also use ``construct`` to compute a weighted rolling sum: | |
To avoid this, use ``skipna=False`` as the above example. | ||
|
||
|
||
Computation using Coordinates | ||
============================= | ||
|
||
Xarray objects have some handy methods for the computation with their | ||
coordinates. :py:meth:`~xarray.DataArray.differentiate` computes derivatives by | ||
central finite differences using their coordinates, | ||
|
||
.. ipython:: python | ||
a = xr.DataArray([0, 1, 2, 3], dims=['x'], coords=[0.1, 0.11, 0.2, 0.3]) | ||
a | ||
a.differentiate('x') | ||
|
||
This method can be used also for multidimensional arrays, | ||
|
||
.. ipython:: python | ||
a = xr.DataArray(np.arange(8).reshape(4, 2), dims=['x', 'y'], | ||
coords=[0.1, 0.11, 0.2, 0.3]) | ||
a.differentiate('x') | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a place where we could mention the limitations of |
||
.. _compute.broadcasting: | ||
|
||
Broadcasting by dimension name | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,9 @@ | ||
from __future__ import absolute_import, division, print_function | ||
|
||
from distutils.version import LooseVersion | ||
|
||
import numpy as np | ||
from dask import __version__ as dask_version | ||
import dask.array as da | ||
|
||
try: | ||
|
@@ -30,3 +33,124 @@ def isin(element, test_elements, assume_unique=False, invert=False): | |
if invert: | ||
result = ~result | ||
return result | ||
|
||
|
||
if LooseVersion(dask_version) > LooseVersion('1.19.2'): | ||
gradient = da.gradient | ||
|
||
else: # pragma: no cover | ||
# Copied from dask v0.19.2 | ||
# Used under the terms of Dask's license, see licenses/DASK_LICENSE. | ||
import math | ||
from numbers import Integral, Real | ||
|
||
AxisError = np.AxisError | ||
|
||
def validate_axis(axis, ndim): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. E303 too many blank lines (2) |
||
""" Validate an input to axis= keywords """ | ||
if isinstance(axis, (tuple, list)): | ||
return tuple(validate_axis(ax, ndim) for ax in axis) | ||
if not isinstance(axis, Integral): | ||
raise TypeError("Axis value must be an integer, got %s" % axis) | ||
if axis < -ndim or axis >= ndim: | ||
raise AxisError("Axis %d is out of bounds for array of dimension " | ||
"%d" % (axis, ndim)) | ||
if axis < 0: | ||
axis += ndim | ||
return axis | ||
|
||
def _gradient_kernel(x, block_id, coord, axis, array_locs, grad_kwargs): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. E303 too many blank lines (2) |
||
""" | ||
x: nd-array | ||
array of one block | ||
coord: 1d-array or scalar | ||
coordinate along which the gradient is computed. | ||
axis: int | ||
axis along which the gradient is computed | ||
array_locs: | ||
actual location along axis. None if coordinate is scalar | ||
grad_kwargs: | ||
keyword to be passed to np.gradient | ||
""" | ||
block_loc = block_id[axis] | ||
if array_locs is not None: | ||
coord = coord[array_locs[0][block_loc]:array_locs[1][block_loc]] | ||
grad = np.gradient(x, coord, axis=axis, **grad_kwargs) | ||
return grad | ||
|
||
def gradient(f, *varargs, **kwargs): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. E303 too many blank lines (2) |
||
f = da.asarray(f) | ||
|
||
kwargs["edge_order"] = math.ceil(kwargs.get("edge_order", 1)) | ||
if kwargs["edge_order"] > 2: | ||
raise ValueError("edge_order must be less than or equal to 2.") | ||
|
||
drop_result_list = False | ||
axis = kwargs.pop("axis", None) | ||
if axis is None: | ||
axis = tuple(range(f.ndim)) | ||
elif isinstance(axis, Integral): | ||
drop_result_list = True | ||
axis = (axis,) | ||
|
||
axis = validate_axis(axis, f.ndim) | ||
|
||
if len(axis) != len(set(axis)): | ||
raise ValueError("duplicate axes not allowed") | ||
|
||
axis = tuple(ax % f.ndim for ax in axis) | ||
|
||
if varargs == (): | ||
varargs = (1,) | ||
if len(varargs) == 1: | ||
varargs = len(axis) * varargs | ||
if len(varargs) != len(axis): | ||
raise TypeError( | ||
"Spacing must either be a single scalar, or a scalar / " | ||
"1d-array per axis" | ||
) | ||
|
||
if issubclass(f.dtype.type, (np.bool8, Integral)): | ||
f = f.astype(float) | ||
elif issubclass(f.dtype.type, Real) and f.dtype.itemsize < 4: | ||
f = f.astype(float) | ||
|
||
results = [] | ||
for i, ax in enumerate(axis): | ||
for c in f.chunks[ax]: | ||
if np.min(c) < kwargs["edge_order"] + 1: | ||
raise ValueError( | ||
'Chunk size must be larger than edge_order + 1. ' | ||
'Minimum chunk for aixs {} is {}. Rechunk to ' | ||
'proceed.'.format(np.min(c), ax)) | ||
|
||
if np.isscalar(varargs[i]): | ||
array_locs = None | ||
else: | ||
if isinstance(varargs[i], da.Array): | ||
raise NotImplementedError( | ||
'dask array coordinated is not supported.') | ||
# coordinate position for each block taking overlap into | ||
# account | ||
chunk = np.array(f.chunks[ax]) | ||
array_loc_stop = np.cumsum(chunk) + 1 | ||
array_loc_start = array_loc_stop - chunk - 2 | ||
array_loc_stop[-1] -= 1 | ||
array_loc_start[0] = 0 | ||
array_locs = (array_loc_start, array_loc_stop) | ||
|
||
results.append(f.map_overlap( | ||
_gradient_kernel, | ||
dtype=f.dtype, | ||
depth={j: 1 if j == ax else 0 for j in range(f.ndim)}, | ||
boundary="none", | ||
coord=varargs[i], | ||
axis=ax, | ||
array_locs=array_locs, | ||
grad_kwargs=kwargs, | ||
)) | ||
|
||
if drop_result_list: | ||
results = results[0] | ||
|
||
return results |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2010,6 +2010,9 @@ def diff(self, dim, n=1, label='upper'): | |
Coordinates: | ||
* x (x) int64 3 4 | ||
|
||
See Also | ||
-------- | ||
DataArray.differentiate | ||
""" | ||
ds = self._to_temp_dataset().diff(n=n, dim=dim, label=label) | ||
return self._from_temp_dataset(ds) | ||
|
@@ -2289,6 +2292,57 @@ def rank(self, dim, pct=False, keep_attrs=False): | |
ds = self._to_temp_dataset().rank(dim, pct=pct, keep_attrs=keep_attrs) | ||
return self._from_temp_dataset(ds) | ||
|
||
def differentiate(self, coord, edge_order=1, datetime_unit=None): | ||
""" Differentiate the array with the second order accurate central | ||
differences. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and here |
||
|
||
Parameters | ||
---------- | ||
coord: str | ||
The coordinate to be used to compute the gradient. | ||
edge_order: 1 or 2. Default 1 | ||
N-th order accurate differences at the boundaries. | ||
datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', | ||
'us', 'ns', 'ps', 'fs', 'as'} | ||
Unit to compute gradient. Only valid for datetime coordinate. | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. needs |
||
Returns | ||
------- | ||
differentiated: DataArray | ||
|
||
See also | ||
-------- | ||
numpy.gradient: corresponding numpy function | ||
|
||
Examples | ||
-------- | ||
|
||
>>> da = xr.DataArray(np.arange(12).reshape(4, 3), dims=['x', 'y'], | ||
... coords={'x': [0, 0.1, 1.1, 1.2]}) | ||
>>> da | ||
<xarray.DataArray (x: 4, y: 3)> | ||
array([[ 0, 1, 2], | ||
[ 3, 4, 5], | ||
[ 6, 7, 8], | ||
[ 9, 10, 11]]) | ||
Coordinates: | ||
* x (x) float64 0.0 0.1 1.1 1.2 | ||
Dimensions without coordinates: y | ||
>>> | ||
>>> da.differentiate('x') | ||
<xarray.DataArray (x: 4, y: 3)> | ||
array([[30. , 30. , 30. ], | ||
[27.545455, 27.545455, 27.545455], | ||
[27.545455, 27.545455, 27.545455], | ||
[30. , 30. , 30. ]]) | ||
Coordinates: | ||
* x (x) float64 0.0 0.1 1.1 1.2 | ||
Dimensions without coordinates: y | ||
""" | ||
ds = self._to_temp_dataset().differentiate( | ||
coord, edge_order, datetime_unit) | ||
return self._from_temp_dataset(ds) | ||
|
||
|
||
# priority most be higher than Variable to properly work with binary ufuncs | ||
ops.inject_all_ops_and_reduce_methods(DataArray, priority=60) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,8 +13,8 @@ | |
import xarray as xr | ||
|
||
from . import ( | ||
alignment, duck_array_ops, formatting, groupby, indexing, ops, resample, | ||
rolling, utils) | ||
alignment, computation, duck_array_ops, formatting, groupby, indexing, ops, | ||
resample, rolling, utils) | ||
from .. import conventions | ||
from .alignment import align | ||
from .common import ( | ||
|
@@ -31,7 +31,7 @@ | |
OrderedDict, basestring, dask_array_type, integer_types, iteritems, range) | ||
from .utils import ( | ||
Frozen, SortedKeysDict, either_dict_or_kwargs, decode_numpy_dict_values, | ||
ensure_us_time_resolution, hashable, maybe_wrap_array) | ||
ensure_us_time_resolution, hashable, maybe_wrap_array, to_numeric) | ||
from .variable import IndexVariable, Variable, as_variable, broadcast_variables | ||
|
||
# list of attributes of pd.DatetimeIndex that are ndarrays of time info | ||
|
@@ -3313,6 +3313,9 @@ def diff(self, dim, n=1, label='upper'): | |
Data variables: | ||
foo (x) int64 1 -1 | ||
|
||
See Also | ||
-------- | ||
Dataset.differentiate | ||
""" | ||
if n == 0: | ||
return self | ||
|
@@ -3663,6 +3666,58 @@ def rank(self, dim, pct=False, keep_attrs=False): | |
attrs = self.attrs if keep_attrs else None | ||
return self._replace_vars_and_dims(variables, coord_names, attrs=attrs) | ||
|
||
def differentiate(self, coord, edge_order=1, datetime_unit=None): | ||
""" Differentiate with the second order accurate central | ||
differences. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and here |
||
|
||
Parameters | ||
---------- | ||
coord: str | ||
The coordinate to be used to compute the gradient. | ||
edge_order: 1 or 2. Default 1 | ||
N-th order accurate differences at the boundaries. | ||
datetime_unit: None or any of {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', | ||
'us', 'ns', 'ps', 'fs', 'as'} | ||
Unit to compute gradient. Only valid for datetime coordinate. | ||
|
||
Returns | ||
------- | ||
differentiated: Dataset | ||
|
||
See also | ||
-------- | ||
numpy.gradient: corresponding numpy function | ||
""" | ||
from .variable import Variable | ||
|
||
if coord not in self.variables and coord not in self.dims: | ||
raise ValueError('Coordinate {} does not exist.'.format(coord)) | ||
|
||
coord_var = self[coord].variable | ||
if coord_var.ndim != 1: | ||
raise ValueError('Coordinate {} must be 1 dimensional but is {}' | ||
' dimensional'.format(coord, coord_var.ndim)) | ||
|
||
dim = coord_var.dims[0] | ||
coord_data = coord_var.data | ||
if coord_data.dtype.kind in 'mM': | ||
if datetime_unit is None: | ||
datetime_unit, _ = np.datetime_data(coord_data.dtype) | ||
coord_data = to_numeric(coord_data, datetime_unit=datetime_unit) | ||
|
||
variables = OrderedDict() | ||
for k, v in self.variables.items(): | ||
if (k in self.data_vars and dim in v.dims and | ||
k not in self.coords): | ||
v = to_numeric(v, datetime_unit=datetime_unit) | ||
grad = duck_array_ops.gradient( | ||
v.data, coord_data, edge_order=edge_order, | ||
axis=v.get_axis_num(dim)) | ||
variables[k] = Variable(v.dims, grad) | ||
else: | ||
variables[k] = v | ||
return self._replace_vars_and_dims(variables) | ||
|
||
@property | ||
def real(self): | ||
return self._unary_op(lambda x: x.real, keep_attrs=True)(self) | ||
|
@@ -3676,7 +3731,7 @@ def filter_by_attrs(self, **kwargs): | |
|
||
Can pass in ``key=value`` or ``key=callable``. A Dataset is returned | ||
containing only the variables for which all the filter tests pass. | ||
These tests are either ``key=value`` for which the attribute ``key`` | ||
These tests are either ``key=value`` for which the attribute ``key`` | ||
has the exact value ``value`` or the callable passed into | ||
``key=callable`` returns True. The callable will be passed a single | ||
value, either the value of the attribute ``key`` or ``None`` if the | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This example raises an error when you run it -- I think you need to supply two dimension names.