diff --git a/test/test_data_array.py b/test/test_data_array.py index cbbab21774b..f8c16a0861d 100644 --- a/test/test_data_array.py +++ b/test/test_data_array.py @@ -2,16 +2,21 @@ import pandas as pd from copy import deepcopy from textwrap import dedent +from collections import OrderedDict -from xray import Dataset, DataArray, Variable, align +from xray import Dataset, DataArray, Variable, align, utils from xray.pycompat import iteritems from . import TestCase, ReturnItem, source_ndarray +_attrs = {'units': 'test', 'long_name': 'testing'} + + class TestDataArray(TestCase): def setUp(self): self.x = np.random.random((10, 20)) self.v = Variable(['x', 'y'], self.x) + self.va = Variable(['x', 'y'], self.x, _attrs) self.ds = Dataset({'foo': self.v}) self.dv = self.ds['foo'] @@ -262,6 +267,18 @@ def test_reduce(self): # needs more... # should check which extra dimensions are dropped + def test_reduce_keep_attrs(self): + + # Test dropped attrs + vm = self.va.mean() + self.assertEqual(len(vm.attrs), 0) + self.assertTrue(utils.dict_equal(vm.attrs, OrderedDict())) + + # Test kept attrs + vm = self.va.mean(keep_attrs=True) + self.assertEqual(len(vm.attrs), len(_attrs)) + self.assertTrue(utils.dict_equal(vm.attrs, _attrs)) + def test_unselect(self): with self.assertRaisesRegexp(ValueError, 'cannot unselect the name'): self.dv.unselect('foo') diff --git a/test/test_dataset.py b/test/test_dataset.py index 9f259d81d45..cb8fd7ce402 100644 --- a/test/test_dataset.py +++ b/test/test_dataset.py @@ -20,6 +20,7 @@ 'var2': ['dim1', 'dim2'], 'var3': ['dim3', 'dim1'], } +_attrs = {'attr1': 'value1', 'attr2': 2929} _testvar = sorted(_vars.keys())[0] _testdim = sorted(_dims.keys())[0] @@ -681,6 +682,21 @@ def test_reduce(self): self.assertDatasetEqual(data.mean(dimension=[]), data) + def test_reduce_keep_attrs(self): + data = create_test_data() + attrs = OrderedDict(_attrs) + data.attrs = attrs + + # Test dropped attrs + ds = data.mean() + self.assertEqual(len(ds.attrs), 0) + self.assertTrue(utils.dict_equal(ds.attrs, OrderedDict())) + + # Test kept attrs + ds = data.mean(keep_attrs=True) + self.assertEqual(len(ds.attrs), len(_attrs)) + self.assertTrue(utils.dict_equal(ds.attrs, attrs)) + def test_reduce_bad_dimension(self): data = create_test_data() with self.assertRaisesRegexp(ValueError, 'Dataset does not contain'): diff --git a/test/test_variable.py b/test/test_variable.py index 632db73a0a0..11469b20f65 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -6,13 +6,15 @@ import numpy as np import pandas as pd -from xray import Variable, Dataset, DataArray, indexing +from xray import Variable, Dataset, DataArray, indexing, utils from xray.variable import (Coordinate, as_variable, NumpyArrayAdapter, PandasIndexAdapter, _as_compatible_data) from xray.pycompat import PY3 from . import TestCase, source_ndarray +_attrs = {'units': 'test', 'long_name': 'testing'} + class VariableSubclassTestCases(object): def test_properties(self): @@ -530,6 +532,19 @@ def test_reduce(self): with self.assertRaisesRegexp(ValueError, 'cannot supply both'): v.mean(dimension='x', axis=0) + def test_reduce_keep_attrs(self): + v = Variable(['x', 'y'], self.d, _attrs) + + # Test dropped attrs + vm = v.mean() + self.assertEqual(len(vm.attrs), 0) + self.assertTrue(utils.dict_equal(vm.attrs, OrderedDict())) + + # Test kept attrs + vm = v.mean(keep_attrs=True) + self.assertEqual(len(vm.attrs), len(_attrs)) + self.assertTrue(utils.dict_equal(vm.attrs, _attrs)) + class TestCoordinate(TestCase, VariableSubclassTestCases): cls = staticmethod(Coordinate) diff --git a/xray/data_array.py b/xray/data_array.py index 2a393cee75e..b8fbd81656a 100644 --- a/xray/data_array.py +++ b/xray/data_array.py @@ -465,7 +465,8 @@ def squeeze(self, dimension=None): ds = self.dataset.squeeze(dimension) return ds[self.name] - def reduce(self, func, dimension=None, axis=None, **kwargs): + def reduce(self, func, dimension=None, axis=None, keep_attrs=False, + **kwargs): """Reduce this array by applying `func` along some dimension(s). Parameters @@ -481,6 +482,10 @@ def reduce(self, func, dimension=None, axis=None, **kwargs): 'dimension' and 'axis' arguments can be supplied. If neither are supplied, then the reduction is calculated over the flattened array (by calling `f(x)` without an axis argument). + keep_attrs : bool, optional + If True, the variable's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to `func`. @@ -490,7 +495,7 @@ def reduce(self, func, dimension=None, axis=None, **kwargs): DataArray with this object's array replaced with an array with summarized data and the indicated dimension(s) removed. """ - var = self.variable.reduce(func, dimension, axis, **kwargs) + var = self.variable.reduce(func, dimension, axis, keep_attrs, **kwargs) drop = set(self.dimensions) - set(var.dimensions) # For now, take an aggressive strategy of removing all variables # associated with any dropped dimensions diff --git a/xray/dataset.py b/xray/dataset.py index 27a80c4cd0f..ceca39f3428 100644 --- a/xray/dataset.py +++ b/xray/dataset.py @@ -1005,7 +1005,7 @@ def func(self, dimension=None, **kwargs): cls=cls.__name__) return func - def reduce(self, func, dimension=None, **kwargs): + def reduce(self, func, dimension=None, keep_attrs=False, **kwargs): """Reduce this dataset by applying `func` along some dimension(s). Parameters @@ -1019,6 +1019,10 @@ def reduce(self, func, dimension=None, **kwargs): applied over all dimensions. **kwargs : dict Additional keyword arguments passed on to `func`. + keep_attrs : bool, optional + If True, the variable's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. Returns ------- @@ -1027,6 +1031,11 @@ def reduce(self, func, dimension=None, **kwargs): of summarized data and the indicated dimension(s) removed. """ + if keep_attrs: + attrs = self.attrs + else: + attrs = OrderedDict() + if isinstance(dimension, basestring): dims = set([dimension]) elif dimension is None: @@ -1052,7 +1061,7 @@ def reduce(self, func, dimension=None, **kwargs): pass else: variables[name] = var - return Dataset(variables=variables) + return Dataset(variables=variables, attributes=attrs) @classmethod def concat(cls, datasets, dimension='concat_dimension', indexers=None, diff --git a/xray/variable.py b/xray/variable.py index 97b43e0ebb5..0894b4c7ea1 100644 --- a/xray/variable.py +++ b/xray/variable.py @@ -457,7 +457,8 @@ def squeeze(self, dimension=None): dimensions = dict(zip(self.dimensions, self.shape)) return utils.squeeze(self, dimensions, dimension) - def reduce(self, func, dimension=None, axis=None, **kwargs): + def reduce(self, func, dimension=None, axis=None, keep_attrs=False, + **kwargs): """Reduce this array by applying `func` along some dimension(s). Parameters @@ -473,6 +474,10 @@ def reduce(self, func, dimension=None, axis=None, **kwargs): and 'axis' arguments can be supplied. If neither are supplied, then the reduction is calculated over the flattened array (by calling `func(x)` without an axis argument). + keep_attrs : bool, optional + If True, the variable's attributes (`attrs`) will be copied from + the original object to the new one. If False (default), the new + object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to `func`. @@ -485,6 +490,10 @@ def reduce(self, func, dimension=None, axis=None, **kwargs): if dimension is not None and axis is not None: raise ValueError("cannot supply both 'axis' and 'dimension' " "arguments") + if keep_attrs: + attrs = self.attrs + else: + attrs = OrderedDict() if dimension is not None: axis = self.get_axis_num(dimension) @@ -495,7 +504,7 @@ def reduce(self, func, dimension=None, axis=None, **kwargs): dims = [dim for n, dim in enumerate(self.dimensions) if n not in removed_axes] - return Variable(dims, data) + return Variable(dims, data, attributes=attrs) @classmethod def concat(cls, variables, dimension='stacked_dimension',