Skip to content

deepcopying variable raises TypeError: h5py objects cannot be pickled (Dataset.sortby) #4425

Closed
@kmuehlbauer

Description

@kmuehlbauer

What happened:

When using xr.open_dataset with H5NetCDFDataStore and opened h5py.File handle deepcopy in Dataset.sortby/align leads to TypeError: h5py objects cannot be pickled.

What you expected to happen:

While applying Dataset.sortby no error should be raised.

Minimal Complete Verifiable Example:

# create hdf5 file
import h5py
f = h5py.File('myfile.h5','w')
dset = f.create_dataset("data", (360, 1000))
f.close()

import h5netcdf
import xarray as xr
import numpy as np

f = h5netcdf.File("myfile.h5", "r", phony_dims="access")
s0 = xr.backends.H5NetCDFStore(f)
ds = xr.open_dataset(s0, engine="h5netcdf", chunks=None)
ds = ds.assign_coords({"phony_dim_0": np.arange(ds.dims['phony_dim_0'], 0, -1)})
ds.sortby('phony_dim_0')
ds.close()
Error Traceback
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-6-0e1377169cb3> in <module>
      5 ds = xr.open_dataset(s0, engine="h5netcdf", chunks=None)
      6 ds = ds.assign_coords({"phony_dim_0": np.arange(ds.dims['phony_dim_0'], 0, -1)})
----> 7 ds.sortby('phony_dim_0')
      8 ds.close()

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/site-packages/xarray-0.16.1.dev86+g264fdb29-py3.8.egg/xarray/core/dataset.py in sortby(self, variables, ascending)
   5293             variables = variables
   5294         variables = [v if isinstance(v, DataArray) else self[v] for v in variables]
-> 5295         aligned_vars = align(self, *variables, join="left")
   5296         aligned_self = aligned_vars[0]
   5297         aligned_other_vars = aligned_vars[1:]

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/site-packages/xarray-0.16.1.dev86+g264fdb29-py3.8.egg/xarray/core/alignment.py in align(join, copy, indexes, exclude, fill_value, *objects)
    336         if not valid_indexers:
    337             # fast path for no reindexing necessary
--> 338             new_obj = obj.copy(deep=copy)
    339         else:
    340             new_obj = obj.reindex(copy=copy, fill_value=fill_value, **valid_indexers)

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/site-packages/xarray-0.16.1.dev86+g264fdb29-py3.8.egg/xarray/core/dataset.py in copy(self, deep, data)
   1076         """
   1077         if data is None:
-> 1078             variables = {k: v.copy(deep=deep) for k, v in self._variables.items()}
   1079         elif not utils.is_dict_like(data):
   1080             raise ValueError("Data must be dict-like")

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/site-packages/xarray-0.16.1.dev86+g264fdb29-py3.8.egg/xarray/core/dataset.py in <dictcomp>(.0)
   1076         """
   1077         if data is None:
-> 1078             variables = {k: v.copy(deep=deep) for k, v in self._variables.items()}
   1079         elif not utils.is_dict_like(data):
   1080             raise ValueError("Data must be dict-like")

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/site-packages/xarray-0.16.1.dev86+g264fdb29-py3.8.egg/xarray/core/variable.py in copy(self, deep, data)
    938 
    939             if deep:
--> 940                 data = copy.deepcopy(data)
    941 
    942         else:

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    170                     y = x
    171                 else:
--> 172                     y = _reconstruct(x, memo, *rv)
    173 
    174     # If is its own copy, don't memoize.

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
    268     if state is not None:
    269         if deep:
--> 270             state = deepcopy(state, memo)
    271         if hasattr(y, '__setstate__'):
    272             y.__setstate__(state)

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    144     copier = _deepcopy_dispatch.get(cls)
    145     if copier is not None:
--> 146         y = copier(x, memo)
    147     else:
    148         if issubclass(cls, type):

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_tuple(x, memo, deepcopy)
    208 
    209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210     y = [deepcopy(a, memo) for a in x]
    211     # We're not going to put the tuple in the memo, but it's still important we
    212     # check for it, in case the tuple contains recursive mutable structures.

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in <listcomp>(.0)
    208 
    209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210     y = [deepcopy(a, memo) for a in x]
    211     # We're not going to put the tuple in the memo, but it's still important we
    212     # check for it, in case the tuple contains recursive mutable structures.

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    144     copier = _deepcopy_dispatch.get(cls)
    145     if copier is not None:
--> 146         y = copier(x, memo)
    147     else:
    148         if issubclass(cls, type):

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
    228     memo[id(x)] = y
    229     for key, value in x.items():
--> 230         y[deepcopy(key, memo)] = deepcopy(value, memo)
    231     return y
    232 d[dict] = _deepcopy_dict

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    170                     y = x
    171                 else:
--> 172                     y = _reconstruct(x, memo, *rv)
    173 
    174     # If is its own copy, don't memoize.

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
    268     if state is not None:
    269         if deep:
--> 270             state = deepcopy(state, memo)
    271         if hasattr(y, '__setstate__'):
    272             y.__setstate__(state)

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    144     copier = _deepcopy_dispatch.get(cls)
    145     if copier is not None:
--> 146         y = copier(x, memo)
    147     else:
    148         if issubclass(cls, type):

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_tuple(x, memo, deepcopy)
    208 
    209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210     y = [deepcopy(a, memo) for a in x]
    211     # We're not going to put the tuple in the memo, but it's still important we
    212     # check for it, in case the tuple contains recursive mutable structures.

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in <listcomp>(.0)
    208 
    209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210     y = [deepcopy(a, memo) for a in x]
    211     # We're not going to put the tuple in the memo, but it's still important we
    212     # check for it, in case the tuple contains recursive mutable structures.

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    144     copier = _deepcopy_dispatch.get(cls)
    145     if copier is not None:
--> 146         y = copier(x, memo)
    147     else:
    148         if issubclass(cls, type):

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
    228     memo[id(x)] = y
    229     for key, value in x.items():
--> 230         y[deepcopy(key, memo)] = deepcopy(value, memo)
    231     return y
    232 d[dict] = _deepcopy_dict

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    170                     y = x
    171                 else:
--> 172                     y = _reconstruct(x, memo, *rv)
    173 
    174     # If is its own copy, don't memoize.

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
    268     if state is not None:
    269         if deep:
--> 270             state = deepcopy(state, memo)
    271         if hasattr(y, '__setstate__'):
    272             y.__setstate__(state)

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    144     copier = _deepcopy_dispatch.get(cls)
    145     if copier is not None:
--> 146         y = copier(x, memo)
    147     else:
    148         if issubclass(cls, type):

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_tuple(x, memo, deepcopy)
    208 
    209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210     y = [deepcopy(a, memo) for a in x]
    211     # We're not going to put the tuple in the memo, but it's still important we
    212     # check for it, in case the tuple contains recursive mutable structures.

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in <listcomp>(.0)
    208 
    209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210     y = [deepcopy(a, memo) for a in x]
    211     # We're not going to put the tuple in the memo, but it's still important we
    212     # check for it, in case the tuple contains recursive mutable structures.

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    144     copier = _deepcopy_dispatch.get(cls)
    145     if copier is not None:
--> 146         y = copier(x, memo)
    147     else:
    148         if issubclass(cls, type):

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
    228     memo[id(x)] = y
    229     for key, value in x.items():
--> 230         y[deepcopy(key, memo)] = deepcopy(value, memo)
    231     return y
    232 d[dict] = _deepcopy_dict

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    170                     y = x
    171                 else:
--> 172                     y = _reconstruct(x, memo, *rv)
    173 
    174     # If is its own copy, don't memoize.

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
    268     if state is not None:
    269         if deep:
--> 270             state = deepcopy(state, memo)
    271         if hasattr(y, '__setstate__'):
    272             y.__setstate__(state)

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    144     copier = _deepcopy_dispatch.get(cls)
    145     if copier is not None:
--> 146         y = copier(x, memo)
    147     else:
    148         if issubclass(cls, type):

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_tuple(x, memo, deepcopy)
    208 
    209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210     y = [deepcopy(a, memo) for a in x]
    211     # We're not going to put the tuple in the memo, but it's still important we
    212     # check for it, in case the tuple contains recursive mutable structures.

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in <listcomp>(.0)
    208 
    209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210     y = [deepcopy(a, memo) for a in x]
    211     # We're not going to put the tuple in the memo, but it's still important we
    212     # check for it, in case the tuple contains recursive mutable structures.

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    144     copier = _deepcopy_dispatch.get(cls)
    145     if copier is not None:
--> 146         y = copier(x, memo)
    147     else:
    148         if issubclass(cls, type):

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
    228     memo[id(x)] = y
    229     for key, value in x.items():
--> 230         y[deepcopy(key, memo)] = deepcopy(value, memo)
    231     return y
    232 d[dict] = _deepcopy_dict

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    170                     y = x
    171                 else:
--> 172                     y = _reconstruct(x, memo, *rv)
    173 
    174     # If is its own copy, don't memoize.

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
    268     if state is not None:
    269         if deep:
--> 270             state = deepcopy(state, memo)
    271         if hasattr(y, '__setstate__'):
    272             y.__setstate__(state)

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    144     copier = _deepcopy_dispatch.get(cls)
    145     if copier is not None:
--> 146         y = copier(x, memo)
    147     else:
    148         if issubclass(cls, type):

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_tuple(x, memo, deepcopy)
    208 
    209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210     y = [deepcopy(a, memo) for a in x]
    211     # We're not going to put the tuple in the memo, but it's still important we
    212     # check for it, in case the tuple contains recursive mutable structures.

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in <listcomp>(.0)
    208 
    209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210     y = [deepcopy(a, memo) for a in x]
    211     # We're not going to put the tuple in the memo, but it's still important we
    212     # check for it, in case the tuple contains recursive mutable structures.

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    144     copier = _deepcopy_dispatch.get(cls)
    145     if copier is not None:
--> 146         y = copier(x, memo)
    147     else:
    148         if issubclass(cls, type):

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
    228     memo[id(x)] = y
    229     for key, value in x.items():
--> 230         y[deepcopy(key, memo)] = deepcopy(value, memo)
    231     return y
    232 d[dict] = _deepcopy_dict

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    170                     y = x
    171                 else:
--> 172                     y = _reconstruct(x, memo, *rv)
    173 
    174     # If is its own copy, don't memoize.

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
    268     if state is not None:
    269         if deep:
--> 270             state = deepcopy(state, memo)
    271         if hasattr(y, '__setstate__'):
    272             y.__setstate__(state)

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    144     copier = _deepcopy_dispatch.get(cls)
    145     if copier is not None:
--> 146         y = copier(x, memo)
    147     else:
    148         if issubclass(cls, type):

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
    228     memo[id(x)] = y
    229     for key, value in x.items():
--> 230         y[deepcopy(key, memo)] = deepcopy(value, memo)
    231     return y
    232 d[dict] = _deepcopy_dict

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    170                     y = x
    171                 else:
--> 172                     y = _reconstruct(x, memo, *rv)
    173 
    174     # If is its own copy, don't memoize.

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
    268     if state is not None:
    269         if deep:
--> 270             state = deepcopy(state, memo)
    271         if hasattr(y, '__setstate__'):
    272             y.__setstate__(state)

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    144     copier = _deepcopy_dispatch.get(cls)
    145     if copier is not None:
--> 146         y = copier(x, memo)
    147     else:
    148         if issubclass(cls, type):

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
    228     memo[id(x)] = y
    229     for key, value in x.items():
--> 230         y[deepcopy(key, memo)] = deepcopy(value, memo)
    231     return y
    232 d[dict] = _deepcopy_dict

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    159                     reductor = getattr(x, "__reduce_ex__", None)
    160                     if reductor is not None:
--> 161                         rv = reductor(4)
    162                     else:
    163                         reductor = getattr(x, "__reduce__", None)

/home/kai/miniconda/envs/wradlib_devel/lib/python3.8/site-packages/h5py/_hl/base.py in __getnewargs__(self)
    306         limitations, look at the h5pickle project on PyPI.
    307         """
--> 308         raise TypeError("h5py objects cannot be pickled")
    309 
    310     def __getstate__(self):

TypeError: h5py objects cannot be pickled

Anything else we need to know?:

When invoked with chunks={} it works as well as if the following code is used:

ds = xr.open_dataset('myfile.h5', group='/', engine='h5netcdf', backend_kwargs=dict(phony_dims='access'))
ds = ds.assign_coords({"phony_dim_0": np.arange(ds.dims['phony_dim_0'], 0, -1)})
ds.sortby('phony_dim_0')
ds.close()

This was introduced by #4221, see

if deep:
data = copy.deepcopy(data)

Before:

            if deep and (
                hasattr(data, "__array_function__")
                or isinstance(data, dask_array_type)
                or (not IS_NEP18_ACTIVE and isinstance(data, np.ndarray))
            ):
                data = copy.deepcopy(data)

All three of the above tests return False in my case, so deepcopy should never be used here.

Environment:

Output of xr.show_versions()

INSTALLED VERSIONS

commit: None
python: 3.8.5 | packaged by conda-forge | (default, Aug 29 2020, 01:22:49)
[GCC 7.5.0]
python-bits: 64
OS: Linux
OS-release: 4.12.14-lp151.28.67-default
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: de_DE.UTF-8
LOCALE: de_DE.UTF-8
libhdf5: 1.10.6
libnetcdf: 4.7.4

xarray: 0.16.1.dev86+g264fdb29
pandas: 1.1.1
numpy: 1.19.1
scipy: 1.5.0
netCDF4: 1.5.4
pydap: None
h5netcdf: 0.8.0
h5py: 2.10.0
Nio: None
zarr: 2.4.0
cftime: 1.2.1
nc_time_axis: None
PseudoNetCDF: None
rasterio: None
cfgrib: 0.9.8.4
iris: None
bottleneck: 1.3.2
dask: 2.19.0
distributed: 2.25.0
matplotlib: 3.3.1
cartopy: 0.18.0
seaborn: None
numbagg: None
pint: None
setuptools: 49.6.0.post20200814
pip: 20.2.2
conda: 4.8.3
pytest: 5.4.3
IPython: 7.18.1
sphinx: None

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions