-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
xarray to and from iris #814
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
f7842b3
e0498c5
c261845
edae053
cd92bca
44930af
6bed306
cd06a2e
15fbbba
e7f9cb1
03e9076
877d06f
f48de5a
e42aeb2
338ef6b
46f68ff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,24 +3,36 @@ | |
import numpy as np | ||
|
||
from .core.dataarray import DataArray | ||
from .core.pycompat import OrderedDict | ||
from .conventions import ( | ||
maybe_encode_timedelta, maybe_encode_datetime, decode_cf) | ||
|
||
ignored_attrs = set(['name', 'tileIndex']) | ||
cdms2_ignored_attrs = {'name', 'tileIndex'} | ||
iris_forbidden_keys = {'standard_name', 'long_name', 'units', 'bounds', 'axis', | ||
'calendar', 'leap_month', 'leap_year', 'month_lengths', | ||
'coordinates', 'grid_mapping', 'climatology', | ||
'cell_methods', 'formula_terms', 'compress', | ||
'missing_value', 'add_offset', 'scale_factor', | ||
'valid_max', 'valid_min', 'valid_range', '_FillValue'} | ||
|
||
|
||
def encode(var): | ||
return maybe_encode_timedelta(maybe_encode_datetime(var.variable)) | ||
|
||
|
||
def filter_attrs(_attrs, ignored_attrs): | ||
return dict((k, v) for k, v in _attrs.items() if k not in ignored_attrs) | ||
|
||
|
||
def from_cdms2(variable): | ||
"""Convert a cdms2 variable into an DataArray | ||
""" | ||
def get_cdms2_attrs(var): | ||
return dict((k, v) for k, v in var.attributes.items() | ||
if k not in ignored_attrs) | ||
|
||
values = np.asarray(variable) | ||
name = variable.id | ||
coords = [(v.id, np.asarray(v), get_cdms2_attrs(v)) | ||
coords = [(v.id, np.asarray(v), | ||
filter_attrs(v.attributes, cdms2_ignored_attrs)) | ||
for v in variable.getAxisList()] | ||
attrs = get_cdms2_attrs(variable) | ||
attrs = filter_attrs(variable.attributes, cdms2_ignored_attrs) | ||
dataarray = DataArray(values, coords=coords, name=name, attrs=attrs) | ||
return decode_cf(dataarray.to_dataset())[dataarray.name] | ||
|
||
|
@@ -31,12 +43,9 @@ def to_cdms2(dataarray): | |
# we don't want cdms2 to be a hard dependency | ||
import cdms2 | ||
|
||
def encode(var): | ||
return maybe_encode_timedelta(maybe_encode_datetime(var.variable)) | ||
|
||
def set_cdms2_attrs(var, attrs): | ||
def set_cdms2_attrs(_var, attrs): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a reason you needed to change this variable name? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't like variable shadowing so I prefer to add the underscore, it's because I have had to deal with some really badly written academic code. Here it's clear enough so can change it back. |
||
for k, v in attrs.items(): | ||
setattr(var, k, v) | ||
setattr(_var, k, v) | ||
|
||
axes = [] | ||
for dim in dataarray.dims: | ||
|
@@ -49,3 +58,81 @@ def set_cdms2_attrs(var, attrs): | |
cdms2_var = cdms2.createVariable(var.values, axes=axes, id=dataarray.name) | ||
set_cdms2_attrs(cdms2_var, var.attrs) | ||
return cdms2_var | ||
|
||
|
||
# TODO: Add converting bounds from xarray to Iris and back | ||
# TODO: Cell methods are not converted between Iris and xarray | ||
def to_iris(dataarray): | ||
"""Convert a DataArray into a Iris Cube | ||
""" | ||
# Iris not a hard dependency | ||
import iris | ||
# iris.unit is deprecated in Iris v1.9 | ||
import cf_units | ||
|
||
def check_attrs(attrs, keys): | ||
return dict((k, v) for k, v in attrs.items() if k in keys) | ||
|
||
def get_args(attrs): | ||
_args = {'attributes': filter_attrs(attrs, iris_forbidden_keys)} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no need to preface variables inside a function with an underscore -- they are already scoped to only the internal helper function. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It was because there was an outer args and I wanted to make it clear this was not that. I have seen people write helper functions and make use of outer scope variables. Again maybe it's clear enough here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also option is to make some private helper functions (preface the names On Wed, May 11, 2016 at 1:30 PM, Neil Parley [email protected]
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that's a good idea. I will add those changes. |
||
_args.update(check_attrs(attrs, ('standard_name', 'long_name',))) | ||
_unit_args = check_attrs(coord.attrs, ('calendar',)) | ||
if attrs.has_key('units'): | ||
_args['units'] = cf_units.Unit(attrs['units'], **_unit_args) | ||
return _args | ||
|
||
dim_coords = [] | ||
aux_coords = [] | ||
|
||
for coord_name in dataarray.coords: | ||
coord = encode(dataarray.coords[coord_name]) | ||
coord_args = get_args(coord.attrs) | ||
coord_args['var_name'] = coord_name | ||
iris_coord = iris.coords.DimCoord(coord.values, **coord_args) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are probably going to need to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes I had that thought too last night. I started off with the code only working for dimensional coordinates and then extended to auxiliary later. It's because |
||
axis = None | ||
if coord.dims: | ||
axis = dataarray.get_axis_num(coord.dims) | ||
if coord_name in dataarray.dims: | ||
dim_coords.append((iris_coord, axis)) | ||
else: | ||
aux_coords.append((iris_coord, axis)) | ||
|
||
args = get_args(dataarray.attrs) | ||
args['var_name'] = dataarray.name | ||
args['dim_coords_and_dims'] = dim_coords | ||
args['aux_coords_and_dims'] = aux_coords | ||
|
||
cube = iris.cube.Cube(dataarray.to_masked_array(), **args) | ||
return cube | ||
|
||
|
||
def from_iris(cube): | ||
"""Convert a Iris cube into an DataArray | ||
""" | ||
def get_attr(_obj): | ||
attrs = {'standard_name': _obj.standard_name, | ||
'long_name': _obj.long_name} | ||
if _obj.units.calendar: | ||
attrs['calendar'] = _obj.units.calendar | ||
if _obj.units.origin != '1': | ||
attrs['units'] = _obj.units.origin | ||
attrs.update(_obj.attributes) | ||
return dict((k, v) for k, v in attrs.items() if v is not None) | ||
|
||
name = cube.var_name | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it possible for a cube not to have variable name? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this could possible be None. It's the CF variable name but I think the cube could have been made with out reading in a file so that the Name would be None. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A quick search in the Iris source gives this: @property
def var_name(self):
"""The CF variable name for the object."""
return self._var_name
@var_name.setter
def var_name(self, name):
if name is not None:
if not name:
raise ValueError('An empty string is not a valid CF variable '
'name.')
elif set(name).intersection(string.whitespace):
raise ValueError('{!r} is not a valid CF variable name because'
' it contains whitespace.'.format(name))
self._var_name = name So would conclude that's it's not an empty string or None |
||
dims = [dim.var_name for dim in cube.dim_coords] | ||
coords = OrderedDict() | ||
|
||
for coord in cube.coords(): | ||
coord_attrs = get_attr(coord) | ||
coord_dims = [cube.coords()[i].var_name for i in cube.coord_dims(coord)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's probably a better idea to calculate There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes I should actually be able to use |
||
if coord_dims: | ||
coords[coord.var_name] = (coord_dims, coord.points, coord_attrs) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's also possible to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there an xarray exception that might make sense for the error? Or just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We usually just raise ValueError for cases like this where we cannot handle On Wed, Aug 10, 2016 at 2:01 PM, Neil Parley [email protected]
|
||
else: | ||
coords[coord.var_name] = ((), | ||
np.asscalar(coord.points), coord_attrs) | ||
|
||
array_attrs = get_attr(cube) | ||
dataarray = DataArray(cube.data, coords=coords, name=name, | ||
attrs=array_attrs, dims=dims) | ||
return decode_cf(dataarray.to_dataset())[dataarray.name] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1639,6 +1639,50 @@ def test_to_and_from_cdms2(self): | |
roundtripped = DataArray.from_cdms2(actual) | ||
self.assertDataArrayIdentical(original, roundtripped) | ||
|
||
def test_to_and_from_iris(self): | ||
try: | ||
import iris | ||
except ImportError: | ||
raise unittest.SkipTest('iris not installed') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would it be better to use a |
||
|
||
coord_dict = OrderedDict() | ||
coord_dict['distance'] = ('distance', [-2, 2], {'units': 'meters'}) | ||
coord_dict['time'] = ('time', pd.date_range('2000-01-01', periods=3)) | ||
coord_dict['height'] = 10 | ||
coord_dict['distance2'] = ('distance', [0, 1]) | ||
|
||
original = DataArray(np.arange(6).reshape(2, 3), coord_dict, | ||
name='Temperature', attrs={'baz': 123, | ||
'units': 'Kelvin', | ||
'standard_name': | ||
'fire_temperature', | ||
'long_name': | ||
'Fire Temperature'}, | ||
dims=('distance', 'time')) | ||
|
||
expected_coords = [Coordinate('distance', [-2, 2]), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe better to just pull these out of |
||
Coordinate('time', [0, 1, 2]), | ||
Coordinate('height', [10]), | ||
Coordinate('distance2', [0, 1])] | ||
|
||
actual = original.to_iris() | ||
self.assertArrayEqual(actual.data, original.data) | ||
self.assertEqual(actual.var_name, original.name) | ||
self.assertItemsEqual([d.var_name for d in actual.dim_coords], | ||
original.dims) | ||
|
||
for coord, expected_coord in zip((actual.coords()), expected_coords): | ||
self.assertEqual(coord.var_name, expected_coord.name) | ||
self.assertArrayEqual(coord.points, expected_coord.values) | ||
self.assertEqual(actual.coord_dims(coord), | ||
original.get_axis_num | ||
(original.coords[coord.var_name].dims)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe add a check for coordinate attributes? Might be worth checking:
|
||
self.assertEqual(actual.attributes['baz'], original.attrs['baz']) | ||
self.assertEqual(actual.standard_name, original.attrs['standard_name']) | ||
|
||
roundtripped = DataArray.from_iris(actual) | ||
self.assertDataArrayIdentical(original, roundtripped) | ||
|
||
def test_to_dataset_whole(self): | ||
unnamed = DataArray([1, 2], dims='x') | ||
with self.assertRaisesRegexp(ValueError, 'unable to convert unnamed'): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Currently we support Python 2.6, so set literals will cause test failures.
I think we can change this for the next major release of xarray though -- I just sent out a mailing list post about this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can change these, I wasn't doing dictionary comprehension with the new syntax for this reason but forgot about sets. It would not surprise me if quite a few scientist were still using python 2.6. Why did this not get picked up in the travis py26 tests?. Actually I can answer my own question here, I guess it's because no tests hit
xarray/convert.py
if Iris or cdms are not installed.