diff --git a/src/polyglot/backends.py b/src/polyglot/backends.py index 1098e294ec6..117ea7b34d7 100644 --- a/src/polyglot/backends.py +++ b/src/polyglot/backends.py @@ -57,30 +57,10 @@ def sync(self): class ScipyVariable(variable.Variable): - def __init__(self, scipy_var): - object.__setattr__(self, 'v', scipy_var) - - def _allocate(self): - return variable.Variable(dims=(), data=0) - - @property - def attributes(self): - return self.v._attributes - - def __getattribute__(self, key): - """ - Here we give some of the attributes of self.data preference over - attributes in the object itself. - """ - if key == 'v': - return object.__getattribute__(self, 'v') - elif hasattr(self.v, key): - return object.__getattribute__(self.v, key) - elif not hasattr(self, key) and hasattr(self.v.data, key): - return getattr(self.v.data, key) - else: - return object.__getattribute__(self, key) + self._dimensions = scipy_var.dimensions + self._data = scipy_var.data + self._attributes = scipy_var._attributes class ScipyDataStore(object): @@ -145,12 +125,10 @@ def sync(self): class NetCDF4Variable(variable.Variable): def __init__(self, nc4_variable): - object.__setattr__(self, 'data', - variable.LazyVariableData(nc4_variable)) - object.__setattr__(self, '_attributes', None) - - def _allocate(self): - return variable.Variable(dims=(), data=0) + self._nc4_variable = nc4_variable + self._dimensions = nc4_variable.dimensions + self._data = nc4_variable + self._attributes = None @property def attributes(self): @@ -166,22 +144,13 @@ def attributes(self): # you would find that any packed variables in the original # netcdf file would now have been scaled twice! packing_attributes = ['scale_factor', 'add_offset'] - keys = [k for k in self.ncattrs() if not k in packing_attributes] - attr_dict = variable.AttributesDict((k, self.data.getncattr(k)) - for k in keys) - object.__setattr__(self, '_attributes', attr_dict) + keys = [k for k in self._nc4_variable.ncattrs() + if not k in packing_attributes] + attr_dict = variable.AttributesDict( + (k, self._nc4_variable.getncattr(k)) for k in keys) + self._attributes = attr_dict return self._attributes - def __getattr__(self, attr): - """__getattr__ is overloaded to selectively expose some of the - attributes of the underlying nc4 variable""" - if attr == 'data': - return object.__getattribute__(self, 'data') - elif hasattr(self.data, attr): - return getattr(self.data, attr) - else: - return object.__getattribute__(self, attr) - class NetCDF4DataStore(object): diff --git a/src/polyglot/data.py b/src/polyglot/data.py index bcadf952922..d08257333a8 100644 --- a/src/polyglot/data.py +++ b/src/polyglot/data.py @@ -604,12 +604,6 @@ def update(self, other): # if a dimension is a new one it gets added, if the dimension already # exists we confirm that they are identical (or throw an exception) for (name, length) in other.dimensions.iteritems(): - if (name == other.record_dimension and - name != self.record_dimension): - raise ValueError( - ("record dimensions do not match: " - "self: %s, other: %s") % - (self.record_dimension, other.record_dimension)) if not name in self.dimensions: self.create_dimension(name, length) else: diff --git a/src/polyglot/variable.py b/src/polyglot/variable.py index 2bababbfae2..d92d1aee157 100644 --- a/src/polyglot/variable.py +++ b/src/polyglot/variable.py @@ -90,6 +90,18 @@ def __eq__(self, other): return True +def _expand_key(key, ndim): + """Given a key for getting an item from an ndarray, expand the key to an + equivalent key which is a tuple with length equal to the number of + dimensions + """ + if not isinstance(key, tuple): + key = (key,) + new_key = [slice(None)] * ndim + new_key[:len(key)] = key + return tuple(new_key) + + class Variable(object): """ A netcdf-like variable consisting of dimensions, data and attributes @@ -97,66 +109,101 @@ class Variable(object): fully described outside the context of its parent Dataset. """ def __init__(self, dims, data, attributes=None): - object.__setattr__(self, 'dimensions', dims) - object.__setattr__(self, 'data', data) + if len(dims) != data.ndim: + raise ValueError('data must have same shape as the number of ' + 'dimensions') + self._dimensions = tuple(dims) + self._data = data if attributes is None: attributes = {} - object.__setattr__(self, 'attributes', AttributesDict(attributes)) + self._attributes = AttributesDict(attributes) - def _allocate(self): - return self.__class__(dims=(), data=0) + @property + def dimensions(self): + return self._dimensions - def __getattribute__(self, key): + @property + def data(self): """ - Here we give some of the attributes of self.data preference over - attributes in the object instelf. + The variable's data as a numpy.ndarray """ - if key in ['dtype', 'shape', 'size', 'ndim', 'nbytes', - 'flat', '__iter__', 'view']: - return getattr(self.data, key) - else: - return object.__getattribute__(self, key) - - def __setattr__(self, attr, value): - """"__setattr__ is overloaded to prevent operations that could - cause loss of data consistency. If you really intend to update - dir(self), use the self.__dict__.update method or the - super(type(a), self).__setattr__ method to bypass.""" - raise AttributeError, "Object is tamper-proof" + if not isinstance(self._data, np.ndarray): + self._data = np.asarray(self._data[...]) + return self._data + + @data.setter + def data(self, value): + value = np.asarray(value) + if value.shape != self.shape: + raise ValueError("replacement data must match the Variable's " + "shape") + self._data = value + + @property + def dtype(self): + return self._data.dtype + + @property + def shape(self): + return self._data.shape + + @property + def size(self): + return self._data.size + + @property + def ndim(self): + return self._data.ndim - def __delattr__(self, attr): - raise AttributeError, "Object is tamper-proof" + def __len__(self): + return len(self._data) - def __getitem__(self, index): - """__getitem__ is overloaded to access the underlying numpy data""" - return self.data[index] + def __getitem__(self, key): + """ + Return a new Variable object whose contents are consistent with getting + the provided key from the underlying data + """ + key = _expand_key(key, self.ndim) + dimensions = [dim for k, dim in zip(key, self.dimensions) + if not isinstance(k, int)] + return Variable(dimensions, self._data[key], self.attributes) - def __setitem__(self, index, data): + def __setitem__(self, key, value): """__setitem__ is overloaded to access the underlying numpy data""" - self.data[index] = data + self.data[key] = value + + def __iter__(self): + """ + Iterate over the contents of this Variable + """ + for n in range(len(self)): + yield self[n] - def __hash__(self): - """__hash__ is overloaded to guarantee that two variables with the same - attributes and np.data values have the same hash (the converse is not true)""" - return hash((self.dimensions, - frozenset((k,v.tostring()) if isinstance(v,np.ndarray) else (k,v) - for (k,v) in self.attributes.items()), - self.data.tostring())) + @property + def attributes(self): + return self._attributes - def __len__(self): - """__len__ is overloaded to access the underlying numpy data""" - return self.data.__len__() + def copy(self): + """ + Returns a shallow copy of the current object. + """ + return self.__copy__() + + def _copy(self, deepcopy=False): + # deepcopies should always be of a numpy view of the data, not the data + # itself, because non-memory backends don't necessarily have deepcopy + # defined sensibly (this is a problem for netCDF4 variables) + data = copy.deepcopy(self.data) if deepcopy else self._data + # note: + # dimensions is already an immutable tuple + # attributes will be copied when the new Variable is created + return Variable(self.dimensions, data, self.attributes) def __copy__(self): """ Returns a shallow copy of the current object. """ - # Create the simplest possible dummy object and then overwrite it - obj = self._allocate() - object.__setattr__(obj, 'dimensions', self.dimensions) - object.__setattr__(obj, 'data', self.data) - object.__setattr__(obj, 'attributes', self.attributes) - return obj + return self._copy(deepcopy=False) def __deepcopy__(self, memo=None): """ @@ -164,24 +211,21 @@ def __deepcopy__(self, memo=None): memo does nothing but is required for compatability with copy.deepcopy """ - # Create the simplest possible dummy object and then overwrite it - obj = self._allocate() - # tuples are immutable - object.__setattr__(obj, 'dimensions', self.dimensions) - object.__setattr__(obj, 'data', self.data[:].copy()) - object.__setattr__(obj, 'attributes', self.attributes.copy()) - return obj + return self._copy(deepcopy=True) + + # mutable objects should not be hashable + __hash__ = None def __eq__(self, other): - if self.dimensions != other.dimensions or \ - (self.data.tostring() != other.data.tostring()): - return False - if not self.attributes == other.attributes: + try: + return (self.dimensions == other.dimensions + and np.all(self.data == other.data) + and self.attributes == other.attributes) + except AttributeError: return False - return True def __ne__(self, other): - return not self.__eq__(other) + return not self == other def __str__(self): """Create a ncdump-like summary of the object""" @@ -230,10 +274,7 @@ def views(self, slicers): for i, dim in enumerate(self.dimensions): if dim in slicers: slices[i] = slicers[dim] - # Shallow copy - obj = copy.copy(self) - object.__setattr__(obj, 'data', self.data[slices]) - return obj + return self[tuple(slices)] def view(self, s, dim): """Return a new Variable object whose contents are a view of the object @@ -244,9 +285,7 @@ def view(self, s, dim): s : slice The slice representing the range of the values to extract. dim : string - The dimension to slice along. If multiple dimensions equal - dim (e.g. a correlation matrix), then the slicing is done - only along the first matching dimension. + The dimension to slice along. Returns ------- @@ -261,7 +300,7 @@ def view(self, s, dim): -------- take """ - return self.views({dim : s}) + return self.views({dim: s}) def take(self, indices, dim): """Return a new Variable object whose contents are sliced from @@ -293,65 +332,7 @@ def take(self, indices, dim): raise ValueError('indices should have a single dimension') # When dim appears repeatedly in self.dimensions, using the index() # method gives us only the first one, which is the desired behavior - axis = list(self.dimensions).index(dim) - # Deep copy - obj = copy.deepcopy(self) - # In case data is lazy we need to slice out all the data before taking. - object.__setattr__(obj, 'data', self.data[:].take(indices, axis=axis)) - return obj - -class LazyVariableData(object): - """ - This object wraps around a Variable object (though - it only really makes sense to use it with a class that - extends variable.Variable). The result mascarades as - variable data, but doesn't actually try accessing the - data until indexing is attempted. - - For example, imagine you have some variable that was - derived from an opendap dataset, 'nc'. - - var = nc['massive_variable'] - - if you wanted to check the data type of var: - - var.data.dtype - - you would find that it might involve downloading all - of the actual data, then inspecting the resulting - numpy array. But with this wrapper calling: - - nc['large_variable'].data.someattribute - - will first inspect the Variable object to see if it has - the desired attribute and only then will it suck down the - actual numpy array and request 'someattribute'. - """ - def __init__(self, lazy_variable): - self.lazyvar = lazy_variable - - def __eq__(self, other): - return self.lazyvar[:] == other - - def __ne__(self, other): - return self.lazyvar[:] != other - - def __getitem__(self, key): - return self.lazyvar[key] - - def __setitem__(self, key, value): - if not isinstance(self.lazyvar, Variable): - self.lazyvar = Variable(self.lazyvar.dimensions, - data = self.lazyvar[:], - dtype = self.lazyvar.dtype, - shape = self.lazyvar.shape, - attributes = self.lazyvar.attributes) - self.lazyvar.__setitem__(key, value) - - def __getattr__(self, attr): - """__getattr__ is overloaded to selectively expose some of the - attributes of the underlying lazy variable""" - if hasattr(self.lazyvar, attr): - return getattr(self.lazyvar, attr) - else: - return getattr(self.lazyvar[:], attr) \ No newline at end of file + axis = self.dimensions.index(dim) + # take only works on actual numpy arrays + data = self.data.take(indices, axis=axis) + return Variable(self.dimensions, data, self.attributes) diff --git a/test/test_data.py b/test/test_data.py index fc9023ab954..83eeea1a0d9 100644 --- a/test/test_data.py +++ b/test/test_data.py @@ -1,7 +1,6 @@ import unittest import os.path import numpy as np -import scipy.interpolate from copy import deepcopy from cStringIO import StringIO @@ -93,8 +92,8 @@ def test_variable(self): a.create_variable(name='bar', dims=('time', 'x',), data=d) # order of creation is preserved self.assertTrue(a.variables.keys() == ['foo', 'bar']) - self.assertTrue(all([a['foo'][i] == d[i] - for i in np.ndindex(*d.shape)])) + self.assertTrue(all([a['foo'][i].data == d[i] + for i in np.ndindex(*d.shape)])) # prevent duplicate creation self.assertRaises(ValueError, a.create_variable, name='foo', dims=('time', 'x',), data=d) @@ -122,8 +121,6 @@ def test_variable(self): self.assertFalse(v1 == v3) self.assertFalse(v1 == v4) self.assertFalse(v1 == v5) - # Variable hash - self.assertEquals(hash(v1), hash(v2)) def test_coordinate(self): a = Dataset() diff --git a/test/test_variable.py b/test/test_variable.py new file mode 100644 index 00000000000..0af5990fa4c --- /dev/null +++ b/test/test_variable.py @@ -0,0 +1,49 @@ +import unittest +import numpy as np + +import polyglot + + +class TestVariable(unittest.TestCase): + def setUp(self): + self.d = np.random.random((10, 3)) + + def test_data(self): + v = polyglot.Variable(['time', 'x'], self.d, {'foo': 'bar'}) + self.assertIs(v.data, self.d) + with self.assertRaises(ValueError): + # wrong size + v.data = np.random.random(5) + d2 = np.random.random((10, 3)) + v.data = d2 + self.assertIs(v.data, d2) + + def test_properties(self): + v = polyglot.Variable(['time', 'x'], self.d, {'foo': 'bar'}) + self.assertEqual(v.dimensions, ('time', 'x')) + self.assertEqual(v.dtype, float) + self.assertEqual(v.shape, (10, 3)) + self.assertEqual(v.size, 30) + self.assertEqual(v.ndim, 2) + self.assertEqual(len(v), 10) + self.assertEqual(v.attributes, {'foo': u'bar'}) + + def test_items(self): + v = polyglot.Variable(['time', 'x'], self.d) + self.assertEqual(v, v[:]) + self.assertEqual(v, v[...]) + self.assertEqual(polyglot.Variable(['x'], self.d[0]), v[0]) + self.assertEqual(polyglot.Variable(['time'], self.d[:, 0]), v[:, 0]) + self.assertEqual(polyglot.Variable(['time', 'x'], self.d[:3, :2]), + v[:3, :2]) + self.assertItemsEqual( + [polyglot.Variable(['x'], self.d[i]) for i in range(10)], v) + v.data[:] = 0 + self.assertTrue(np.all(v.data == 0)) + + def test_views(self): + v = polyglot.Variable(['time', 'x'], self.d) + self.assertEqual(v.views({'time': slice(None)}), v) + self.assertEqual(v.views({'time': 0}), v[0]) + self.assertEqual(v.views({'time': slice(0, 3)}), v[:3]) + self.assertEqual(v.views({'x': 0}), v[:, 0])