Skip to content

xarray to and from iris #814

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ matrix:
- python: 2.7
env: CONDA_ENV=py27-min
- python: 2.7
env: CONDA_ENV=py27-cdat+pynio
env: CONDA_ENV=py27-cdat+iris+pynio
- python: 3.4
env: CONDA_ENV=py34
- python: 3.5
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ name: test_env
channels:
- ajdawson # cdat
- dbrown # pynio
- scitools # iris
dependencies:
- python=2.7
- cdat-lite
Expand All @@ -11,6 +12,7 @@ dependencies:
- pandas>=0.15.0
- pynio
- scipy
- iris
- pip:
- coveralls
- pytest-cov
Expand Down
145 changes: 133 additions & 12 deletions xarray/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,40 @@
import numpy as np

from .core.dataarray import DataArray
from .core.pycompat import OrderedDict
from .conventions import (
maybe_encode_timedelta, maybe_encode_datetime, decode_cf)

ignored_attrs = set(['name', 'tileIndex'])
cdms2_ignored_attrs = set(['name', 'tileIndex'])
iris_forbidden_keys = set(
['standard_name', 'long_name', 'units', 'bounds', 'axis',
'calendar', 'leap_month', 'leap_year', 'month_lengths',
'coordinates', 'grid_mapping', 'climatology',
'cell_methods', 'formula_terms', 'compress',
'missing_value', 'add_offset', 'scale_factor',
'valid_max', 'valid_min', 'valid_range', '_FillValue'])
cell_methods_strings = set(['point', 'sum', 'maximum', 'median', 'mid_range',
'minimum', 'mean', 'mode', 'standard_deviation',
'variance'])


def encode(var):
return maybe_encode_timedelta(maybe_encode_datetime(var.variable))


def filter_attrs(_attrs, ignored_attrs):
return dict((k, v) for k, v in _attrs.items() if k not in ignored_attrs)


def from_cdms2(variable):
"""Convert a cdms2 variable into an DataArray
"""
def get_cdms2_attrs(var):
return dict((k, v) for k, v in var.attributes.items()
if k not in ignored_attrs)

values = np.asarray(variable)
name = variable.id
coords = [(v.id, np.asarray(v), get_cdms2_attrs(v))
coords = [(v.id, np.asarray(v),
filter_attrs(v.attributes, cdms2_ignored_attrs))
for v in variable.getAxisList()]
attrs = get_cdms2_attrs(variable)
attrs = filter_attrs(variable.attributes, cdms2_ignored_attrs)
dataarray = DataArray(values, coords=coords, name=name, attrs=attrs)
return decode_cf(dataarray.to_dataset())[dataarray.name]

Expand All @@ -31,12 +47,9 @@ def to_cdms2(dataarray):
# we don't want cdms2 to be a hard dependency
import cdms2

def encode(var):
return maybe_encode_timedelta(maybe_encode_datetime(var.variable))

def set_cdms2_attrs(var, attrs):
def set_cdms2_attrs(_var, attrs):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a reason you needed to change this variable name?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't like variable shadowing so I prefer to add the underscore, it's because I have had to deal with some really badly written academic code. Here it's clear enough so can change it back.

for k, v in attrs.items():
setattr(var, k, v)
setattr(_var, k, v)

axes = []
for dim in dataarray.dims:
Expand All @@ -49,3 +62,111 @@ def set_cdms2_attrs(var, attrs):
cdms2_var = cdms2.createVariable(var.values, axes=axes, id=dataarray.name)
set_cdms2_attrs(cdms2_var, var.attrs)
return cdms2_var


# TODO: Add converting bounds from xarray to Iris and back
def to_iris(dataarray):
"""Convert a DataArray into a Iris Cube
"""
# Iris not a hard dependency
import iris
try:
from iris.fileformats.netcdf import parse_cell_methods
except ImportError:
# prior to v1.10
from iris.fileformats._pyke_rules.compiled_krb.fc_rules_cf_fc \
import _parse_cell_methods as parse_cell_methods

# iris.unit is deprecated in Iris v1.9
import cf_units

def check_attrs(attrs, keys):
return dict((k, v) for k, v in attrs.items() if k in keys)

def get_args(attrs):
_args = {'attributes': filter_attrs(attrs, iris_forbidden_keys)}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no need to preface variables inside a function with an underscore -- they are already scoped to only the internal helper function.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was because there was an outer args and I wanted to make it clear this was not that. I have seen people write helper functions and make use of outer scope variables. Again maybe it's clear enough here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also option is to make some private helper functions (preface the names
with _) in this module instead of nesting them inside the convert function.
That can often be significantly cleaner.

On Wed, May 11, 2016 at 1:30 PM, Neil Parley [email protected]
wrote:

In xarray/convert.py
#814 (comment):

  • import iris
  • try:
  •    from iris.fileformats.netcdf import parse_cell_methods
    
  • except ImportError:
  •    # prior to v1.10
    
  •    from iris.fileformats._pyke_rules.compiled_krb.fc_rules_cf_fc \
    
  •        import _parse_cell_methods as parse_cell_methods
    
  • iris.unit is deprecated in Iris v1.9

  • import cf_units
  • def check_attrs(attrs, keys):
  •    return dict((k, v) for k, v in attrs.items() if k in keys)
    
  • def get_args(attrs):
  •    _args = {'attributes': filter_attrs(attrs, iris_forbidden_keys)}
    

It was because there was an outer args and I wanted to make it clear this
was not that. I have seen people write helper functions and make use of
outer scope variables. Again maybe it's clear enough here.


You are receiving this because you were mentioned.
Reply to this email directly or view it on GitHub
https://github.com/pydata/xarray/pull/814/files/cd06a2e23e77fff6c96ad00c6285a5966bc63b1f#r62921013

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's a good idea. I will add those changes.

_args.update(check_attrs(attrs, ('standard_name', 'long_name',)))
_unit_args = check_attrs(coord.attrs, ('calendar',))
if 'units' in attrs:
_args['units'] = cf_units.Unit(attrs['units'], **_unit_args)
return _args

dim_coords = []
aux_coords = []

for coord_name in dataarray.coords:
coord = encode(dataarray.coords[coord_name])
coord_args = get_args(coord.attrs)
coord_args['var_name'] = coord_name
axis = None
if coord.dims:
axis = dataarray.get_axis_num(coord.dims)
if coord_name in dataarray.dims:
iris_coord = iris.coords.DimCoord(coord.values, **coord_args)
dim_coords.append((iris_coord, axis))
else:
iris_coord = iris.coords.AuxCoord(coord.values, **coord_args)
aux_coords.append((iris_coord, axis))

args = get_args(dataarray.attrs)
args['var_name'] = dataarray.name
args['dim_coords_and_dims'] = dim_coords
args['aux_coords_and_dims'] = aux_coords
if 'cell_methods' in dataarray.attrs:
args['cell_methods'] = \
parse_cell_methods(dataarray.name, dataarray.attrs['cell_methods'])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Avoid line breaks with \ is possible, per PEP8. I would prefer:

        args['cell_methods'] = parse_cell_methods(
            dataarray.name, dataarray.attrs['cell_methods'])


cube = iris.cube.Cube(dataarray.to_masked_array(), **args)
return cube


def from_iris(cube):
"""Convert a Iris cube into an DataArray
"""

def get_attr(_obj):
attrs = {'standard_name': _obj.standard_name,
'long_name': _obj.long_name}
if _obj.units.calendar:
attrs['calendar'] = _obj.units.calendar
if _obj.units.origin != '1':
attrs['units'] = _obj.units.origin
attrs.update(_obj.attributes)
return dict((k, v) for k, v in attrs.items() if v is not None)

def get_cell_methods(cell_methods_obj):
_cell_methods = []
for cell_method in cell_methods_obj:
names = ''.join(['{}: '.format(n) for n in cell_method.coord_names])
intervals = ' '.join(['interval: {}'.format(interval)
for interval in cell_method.intervals])
comments = ' '.join(['comment: {}'.format(comment)
for comment in cell_method.comments])
extra = ' '.join([intervals, comments]).strip()
if extra:
extra = ' ({})'.format(extra)
_cell_methods.append(names + cell_method.method + extra)
return ' '.join(_cell_methods)

name = cube.var_name
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible for a cube not to have variable name?

Copy link
Author

@nparley nparley Aug 10, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this could possible be None. It's the CF variable name but I think the cube could have been made with out reading in a file so that the Name would be None.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A quick search in the Iris source gives this:

    @property
    def var_name(self):
        """The CF variable name for the object."""
        return self._var_name

    @var_name.setter
    def var_name(self, name):
        if name is not None:
            if not name:
                raise ValueError('An empty string is not a valid CF variable '
                                 'name.')
            elif set(name).intersection(string.whitespace):
                raise ValueError('{!r} is not a valid CF variable name because'
                                 ' it contains whitespace.'.format(name))
        self._var_name = name

So would conclude that's it's not an empty string or None

dims = [dim.var_name for dim in cube.dim_coords]
if not dims:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this logic is quite right. If I remember correctly, a cube can have any number of dimensions without coordinates. This only works if the cube has no dimension coords, but doesn't cover the mixed case.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a sane/standard way to figure out which dimensions on the cube are missing from dim_coords, given that it may not have length equal to cube.ndim?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good spot, I'll investigate

dims = ["dim{}".format(i) for i in range(cube.data.ndim)]
coords = OrderedDict()

for coord in cube.coords():
coord_attrs = get_attr(coord)
coord_dims = [dims[i] for i in cube.coord_dims(coord)]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to have coord_dims(coord) return a tuple that does not have length equal to it's number of dimensions?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

coord_dims(coord):

Returns a tuple of the data dimensions relevant to the given coordinate.

So I think it's ok.

if coord_dims:
coords[coord.var_name] = (coord_dims, coord.points, coord_attrs)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's also possible to coord.var_name be None. We should probably raise an error in that case.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there an xarray exception that might make sense for the error? Or just nameerror?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We usually just raise ValueError for cases like this where we cannot handle
the input.

On Wed, Aug 10, 2016 at 2:01 PM, Neil Parley [email protected]
wrote:

In xarray/convert.py
#814 (comment):

  •    try:
    
  •        dim_coord = cube.coord(dim_coords=True, dimensions=(i,))
    
  •        dims.append(dim_coord.var_name)
    
  •    except iris.exceptions.CoordinateNotFoundError:
    
  •        dims.append("dim_{}".format(i))
    
  • dims = [dim.var_name for dim in cube.dim_coords]

  • if not dims:

  • dims = ["dim{}".format(i) for i in range(cube.data.ndim)]

  • coords = OrderedDict()
  • for coord in cube.coords():
  •    coord_attrs = _iris_obj_to_attrs(coord)
    
  •    coord_dims = [dims[i] for i in cube.coord_dims(coord)]
    
  •    if coord_dims:
    
  •        coords[coord.var_name] = (coord_dims, coord.points, coord_attrs)
    

Is there an xarray exception that might make sense for the error? Or just
nameerror?


You are receiving this because you were mentioned.
Reply to this email directly, view it on GitHub
https://github.com/pydata/xarray/pull/814/files/338ef6b7242c06d5107d2224c1df4cdb2a4b951c#r74329887,
or mute the thread
https://github.com/notifications/unsubscribe-auth/ABKS1vRRJWIawQDcDJYZxXjQEcbQJIjkks5qejw5gaJpZM4H9jGH
.

else:
coords[coord.var_name] = ((),
np.asscalar(coord.points), coord_attrs)

array_attrs = get_attr(cube)
cell_methods = get_cell_methods(cube.cell_methods)
if cell_methods:
array_attrs['cell_methods'] = cell_methods
dataarray = DataArray(cube.data, coords=coords, name=name,
attrs=array_attrs, dims=dims)
return decode_cf(dataarray.to_dataset())[dataarray.name]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that to_dataset() will fail if you don't have a name (other than None) on the DataArray.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From above I think name could be None but will not else be empty. Is this ok?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately not -- you need to provide a name other than None to make this work.

Let's use the private _to_temp_dataset() and _from_temp_dataset() methods from DataArray that already handle this:

decoded_ds = decode_cf(dataarray._to_temp_dataset())
return dataarray._from_temp_dataset(decoded_ds)

Or once I merge #959:

decoded_ds = decode_cf(dataarray._to_temp_dataset())
return DataArray._from_temp_dataset(decoded_ds)

13 changes: 13 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,6 +1099,19 @@ def from_cdms2(cls, variable):
from ..convert import from_cdms2
return from_cdms2(variable)

def to_iris(self):
"""Convert this array into a iris.cube.Cube
"""
from ..convert import to_iris
return to_iris(self)

@classmethod
def from_iris(cls, cube):
"""Convert a iris.cube.Cube into an xarray.DataArray
"""
from ..convert import from_iris
return from_iris(cube)

def _all_compat(self, other, compat_str):
"""Helper function for equals and identical"""
def compat(x, y):
Expand Down
63 changes: 59 additions & 4 deletions xarray/test/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Coordinate, Variable)
from xarray.core.pycompat import iteritems, OrderedDict
from xarray.core.common import _full_like
from xarray.conventions import maybe_encode_datetime
from . import (TestCase, ReturnItem, source_ndarray, unittest, requires_dask,
requires_bottleneck)

Expand Down Expand Up @@ -1621,15 +1622,15 @@ def test_to_and_from_cdms2(self):
[('distance', [-2, 2], {'units': 'meters'}),
('time', pd.date_range('2000-01-01', periods=3))],
name='foo', attrs={'baz': 123})
expected_coords = [Coordinate('distance', [-2, 2]),
Coordinate('time', [0, 1, 2])]

actual = original.to_cdms2()
self.assertArrayEqual(actual, original)
self.assertEqual(actual.id, original.name)
self.assertItemsEqual(actual.getAxisIds(), original.dims)
for axis, coord in zip(actual.getAxisList(), expected_coords):
for axis, coord_key in zip(actual.getAxisList(), original.coords):
coord = original.coords[coord_key]
self.assertEqual(axis.id, coord.name)
self.assertArrayEqual(axis, coord.values)
self.assertArrayEqual(axis, maybe_encode_datetime(coord).values)
self.assertEqual(actual.baz, original.attrs['baz'])

component_times = actual.getAxis(1).asComponentTime()
Expand All @@ -1639,6 +1640,60 @@ def test_to_and_from_cdms2(self):
roundtripped = DataArray.from_cdms2(actual)
self.assertDataArrayIdentical(original, roundtripped)

def test_to_and_from_iris(self):
try:
import iris
import cf_units
except ImportError:
raise unittest.SkipTest('iris not installed')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would it be better to use a @requires_iris decorator?


coord_dict = OrderedDict()
coord_dict['distance'] = ('distance', [-2, 2], {'units': 'meters'})
coord_dict['time'] = ('time', pd.date_range('2000-01-01', periods=3))
coord_dict['height'] = 10
coord_dict['distance2'] = ('distance', [0, 1], {'foo': 'bar'})
coord_dict['time2'] = (('distance', 'time'), [[0, 1, 2], [2, 3, 4]])

original = DataArray(np.arange(6).reshape(2, 3), coord_dict,
name='Temperature', attrs={'baz': 123,
'units': 'Kelvin',
'standard_name':
'fire_temperature',
'long_name':
'Fire Temperature'},
dims=('distance', 'time'))

original.attrs['cell_methods'] = 'height: mean (comment: A cell method)'
actual = original.to_iris()
self.assertArrayEqual(actual.data, original.data)
self.assertEqual(actual.var_name, original.name)
self.assertItemsEqual([d.var_name for d in actual.dim_coords],
original.dims)
self.assertEqual(actual.cell_methods,
(iris.coords.CellMethod(method='mean',
coords=('height',),
intervals=(),
comments=('A cell method',)),))

for coord, orginal_key in zip((actual.coords()), original.coords):
original_coord = original.coords[orginal_key]
self.assertEqual(coord.var_name, original_coord.name)
self.assertArrayEqual(coord.points,
maybe_encode_datetime(original_coord).values)
self.assertEqual(actual.coord_dims(coord),
original.get_axis_num
(original.coords[coord.var_name].dims))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add a check for coordinate attributes?

Might be worth checking:

  • that units get converted properly to/from attributes into cf_units
  • that random attributes are preserved as attributes


self.assertEqual(actual.coord('distance2').attributes['foo'],
original.coords['distance2'].attrs['foo'])
self.assertEqual(actual.coord('distance').units,
cf_units.Unit(original.coords['distance'].units))
self.assertEqual(actual.attributes['baz'], original.attrs['baz'])
self.assertEqual(actual.standard_name, original.attrs['standard_name'])

roundtripped = DataArray.from_iris(actual)
self.assertDataArrayIdentical(original, roundtripped)

def test_to_dataset_whole(self):
unnamed = DataArray([1, 2], dims='x')
with self.assertRaisesRegexp(ValueError, 'unable to convert unnamed'):
Expand Down