pydata · fujiisoup · Mar 12, 2018 · Mar 6, 2018 · Mar 6, 2018 · Mar 7, 2018
diff --git a/doc/api.rst b/doc/api.rst
@@ -24,6 +24,7 @@ Top-level functions
    full_like
    zeros_like
    ones_like
+   dot
 
 Dataset
 =======

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -38,6 +38,10 @@ Documentation
 Enhancements
 ~~~~~~~~~~~~
 
+- Addition of :py:func:`~xarray.dot`, equivalent to ``np.einsum``.
+  Also, :py:func:`~xarray.DataArray.dot` now supports ``dims`` option,
+  which specifies the dimensions to be summed over.
+  (:issue:`1951`)
 - Support lazy vectorized-indexing. After this change, flexible indexing such
   as orthogonal/vectorized indexing, becomes possible for all the backend
   arrays. Also, lazy ``transpose`` is now also supported. (:issue:`1897`)

diff --git a/xarray/__init__.py b/xarray/__init__.py
@@ -6,7 +6,7 @@
 from .core.alignment import align, broadcast, broadcast_arrays
 from .core.common import full_like, zeros_like, ones_like
 from .core.combine import concat, auto_combine
-from .core.computation import apply_ufunc, where
+from .core.computation import apply_ufunc, where, dot
 from .core.extensions import (register_dataarray_accessor,
                               register_dataset_accessor)
 from .core.variable import as_variable, Variable, IndexVariable, Coordinate

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
@@ -9,10 +9,10 @@
 
 import numpy as np
 
-from . import duck_array_ops, utils
+from . import duck_array_ops, utils, dtypes
 from .alignment import deep_align
 from .merge import expand_and_merge_variables
-from .pycompat import OrderedDict, dask_array_type
+from .pycompat import OrderedDict, dask_array_type, basestring
 from .utils import is_dict_like
 
 _DEFAULT_FROZEN_SET = frozenset()
@@ -926,6 +926,103 @@ def earth_mover_distance(first_samples,
         return apply_array_ufunc(func, *args, dask=dask)
 
 
+def dot(*arrays, **kwargs):
+    """ dot(*arrays, *, dims=None)
+
+    einsum for xarray object, but providing simpler interface based on
+    the array dimensions.
+
+    Parameters
+    ----------
+    arrays: multiple DataArrays
+        arrays to compute.
+    dims: tuple of strings, optional
+        Along which dimensions to be summed over.
+        If not speciified, then all the common dimensions are summed over.
+
+    Returns
+    -------
+    dot: same type to input.
+
+    Examples
+    --------
+
+    >>> da_a = xr.DataArray(np.arange(3 * 4).reshape(3, 4), dims=['a', 'b'])
+    >>> da_b = xr.DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5),
+                            dims=['a', 'b', 'c'])
+    >>> da_c = xr.DataArray(np.arange(5 * 6).reshape(5, 6), dims=['c', 'd'])
+
+    >>> dot(da_a, da_b, dims=['a', 'b']).dims
+    ('c', )
+    >>> dot(da_a, da_b, dims=['a']).dims
+    ('b', 'c')
+    >>> dot(da_a, da_b, da_c, dims=['b', 'c']).dims
+    ('a', 'd')
+    """
+    from .dataarray import DataArray
+    from .variable import Variable
+
+    dims = kwargs.pop('dims', None)
+
+    if len(arrays) < 2:
+        raise TypeError('More than one arrays must be provided')
+
+    if any(not isinstance(arr, (DataArray, Variable)) for arr in arrays):
+        raise TypeError('Only xr.DataArray and xr.Variable are supported.')
+
+    if isinstance(dims, basestring):
+        dims = [dims]
+
+    common_dims = set(arrays[0].dims)
+    for arr in arrays[1:]:
+        common_dims = common_dims.intersection(set(arr.dims))
+
+    if dims is None:
+        dims = list(common_dims)
+
+    broadcast_dims = [d for d in common_dims if d not in dims]
+
+    input_core_dims = []
+    output_core_dims = [[]]
+    all_dims = []
+
+    for arr in arrays:
+        input_core_dims.append([d for d in arr.dims if d not in
+                                broadcast_dims])
+        output_core_dims[0] += [d for d in arr.dims if d not in
+                                output_core_dims[0] + dims + broadcast_dims]
+        all_dims += [d for d in arr.dims if d not in all_dims]
+
+    einsum_axes = 'abcdefghijklmnopqrstuvwxyz'
+    dim_map = {d: einsum_axes[i] for i, d in enumerate(all_dims)}
+
+    subscripts_list = ['...' + ''.join([dim_map[d] for d in ds]) for ds
+                       in input_core_dims]
+    subscripts = ','.join(subscripts_list)
+    subscripts += '->...' + ''.join([dim_map[d] for d in output_core_dims[0]])
+
+    # dtype estimation is necessary for dask='parallelized'
+    out_dtype = dtypes.result_type(*arrays)
+
+    # we use tensordot if available, because it is more efficient for dask
+    if len(broadcast_dims) == 0 and len(arrays) == 2:
+        axes = [[arr.get_axis_num(d) for d in arr.dims if d in dims]
+                for arr in arrays]
+        return apply_ufunc(duck_array_ops.tensordot, *arrays, dask='allowed',
+                           input_core_dims=input_core_dims,
+                           output_core_dims=output_core_dims,
+                           kwargs={'axes': axes})
+
+    # subscripts should be passed as arg, instead of kwargs. We need
+    # to pass a partial function especially for parallelized computation.
+    func = functools.partial(np.einsum, subscripts)
+    result = apply_ufunc(func, *arrays,
+                         input_core_dims=input_core_dims,
+                         output_core_dims=output_core_dims,
+                         dask='parallelized', output_dtypes=[out_dtype])
+    return result.transpose(*[d for d in all_dims if d in result.dims])
+
+
 def where(cond, x, y):
     """Return elements from `x` or `y` depending on `cond`.
 

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -6,7 +6,7 @@
 import numpy as np
 import pandas as pd
 
-from . import duck_array_ops, groupby, indexing, ops, resample, rolling, utils
+from . import computation, groupby, indexing, ops, resample, rolling, utils
 from ..plot.plot import _PlotMethods
 from .accessors import DatetimeAccessor
 from .alignment import align, reindex_like_indexers
@@ -1926,7 +1926,7 @@ def real(self):
     def imag(self):
         return self._replace(self.variable.imag)
 
-    def dot(self, other):
+    def dot(self, other, dims=None):
         """Perform dot product of two DataArrays along their shared dims.
 
         Equivalent to taking taking tensordot over all shared dims.
@@ -1935,6 +1935,9 @@ def dot(self, other):
         ----------
         other : DataArray
             The other array with which the dot product is performed.
+        dims: list of strings, optional
+            Along which dimensions to be summed over. Default all the common
+            dimensions are summed over.
 
         Returns
         -------
@@ -1943,6 +1946,7 @@ def dot(self, other):
 
         See also
         --------
+        dot
         numpy.tensordot
 
         Examples
@@ -1968,23 +1972,7 @@ def dot(self, other):
         if not isinstance(other, DataArray):
             raise TypeError('dot only operates on DataArrays.')
 
-        # sum over the common dims
-        dims = set(self.dims) & set(other.dims)
-        if len(dims) == 0:
-            raise ValueError('DataArrays have no shared dimensions over which '
-                             'to perform dot.')
-
-        self, other = align(self, other, join='inner', copy=False)
-
-        axes = (self.get_axis_num(dims), other.get_axis_num(dims))
-        new_data = duck_array_ops.tensordot(self.data, other.data, axes=axes)
-
-        new_coords = self.coords.merge(other.coords)
-        new_coords = new_coords.drop([d for d in dims if d in new_coords])
-        new_dims = ([d for d in self.dims if d not in dims] +
-                    [d for d in other.dims if d not in dims])
-
-        return type(self)(new_data, new_coords.variables, new_dims)
+        return computation.dot(self, other, dims=dims)
 
     def sortby(self, variables, ascending=True):
         """

diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py
@@ -13,8 +13,9 @@
     _UFuncSignature, apply_ufunc, broadcast_compat_data, collect_dict_values,
     join_dict_keys, ordered_set_intersection, ordered_set_union, result_name,
     unified_dim_sizes)
+from xarray.core.pycompat import dask_array_type
 
-from . import raises_regex, requires_dask
+from . import raises_regex, requires_dask, has_dask
 
 
 def assert_identical(a, b):
@@ -744,6 +745,64 @@ def test_vectorize_dask():
     assert_identical(expected, actual)
 
 
+@pytest.mark.parametrize('dask', [True, False])
+def test_dot(dask):
+    pytest.mark.skipif(not has_dask, reason='test for dask.')
+
+    a = np.arange(30 * 4).reshape(30, 4)
+    b = np.arange(30 * 4 * 5).reshape(30, 4, 5)
+    c = np.arange(5 * 60).reshape(5, 60)
+    da_a = xr.DataArray(a, dims=['a', 'b'])
+    da_b = xr.DataArray(b, dims=['a', 'b', 'c'])
+    da_c = xr.DataArray(c, dims=['c', 'e'])
+    if dask:
+        da_a = da_a.chunk({'a': 3})
+        da_b = da_b.chunk({'a': 3})
+        da_c = da_c.chunk({'c': 3})
+
+    actual = xr.dot(da_a, da_b, dims=['a', 'b'])
+    assert actual.dims == ('c', )
+    assert (actual.data == np.einsum('ij,ijk->k', a, b)).all()
+    assert isinstance(actual.variable.data, type(da_a.variable.data))
+
+    actual = xr.dot(da_a, da_b)
+    assert actual.dims == ('c', )
+    assert (actual.data == np.einsum('ij,ijk->k', a, b)).all()
+    assert isinstance(actual.variable.data, type(da_a.variable.data))
+
+    if dask:
+        da_a = da_a.chunk({'a': 3})
+        da_b = da_b.chunk({'a': 3})
+        actual = xr.dot(da_a, da_b, dims=['b'])
+        assert actual.dims == ('a', 'c')
+        assert (actual.data == np.einsum('ij,ijk->ik', a, b)).all()
+        assert isinstance(actual.variable.data, type(da_a.variable.data))
+
+        pytest.skip('dot for dask array requires rechunking for core '
+                    'dimensions.')
+
+    # following requires rechunking
+    actual = xr.dot(da_a, da_b, dims=['b'])
+    assert actual.dims == ('a', 'c')
+    assert (actual.data == np.einsum('ij,ijk->ik', a, b)).all()
+
+    actual = xr.dot(da_a, da_b, dims='b')
+    assert actual.dims == ('a', 'c')
+    assert (actual.data == np.einsum('ij,ijk->ik', a, b)).all()
+
+    actual = xr.dot(da_a, da_b, dims='a')
+    assert actual.dims == ('b', 'c')
+    assert (actual.data == np.einsum('ij,ijk->jk', a, b)).all()
+
+    actual = xr.dot(da_a, da_b, dims='c')
+    assert actual.dims == ('a', 'b')
+    assert (actual.data == np.einsum('ij,ijk->ij', a, b)).all()
+
+    actual = xr.dot(da_a, da_b, da_c, dims=['a', 'b'])
+    assert actual.dims == ('c', 'e')
+    assert (actual.data == np.einsum('ij,ijk,kl->kl ', a, b, c)).all()
+
+
 def test_where():
     cond = xr.DataArray([True, False], dims='x')
     actual = xr.where(cond, 1, 0)

diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
@@ -3200,8 +3200,6 @@ def test_dot(self):
             da.dot(dm.to_dataset(name='dm'))
         with pytest.raises(TypeError):
             da.dot(dm.values)
-        with raises_regex(ValueError, 'no shared dimensions'):
-            da.dot(DataArray(1))
 
     def test_binary_op_join_setting(self):
         dim = 'x'
-Original file line number
+Diff line change
@@ Expand Up / @@ -24,6 +24,7 @@ Top-level functions @@
        full_like
        zeros_like
        ones_like
+       dot
     Dataset
     =======
@@ Expand Down @@