diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2cadf6ff478..ad57825504a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -40,6 +40,7 @@ Bug fixes - Variables which are chunked using dask in larger (but aligned) chunks than the target zarr chunk size can now be stored using `to_zarr()` (:pull:`6258`) By `Tobias Kölling `_. +- Multi-file datasets containing encoded :py:class:`cftime.datetime` objects can be read in parallel again (:issue:`6226`, :pull:`6249`). By `Martin Bergemann `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index ac6904d4e31..8f9d19d7897 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -310,7 +310,7 @@ class CFTimeIndex(pd.Index): ) date_type = property(get_date_type) - def __new__(cls, data, name=None): + def __new__(cls, data, name=None, **kwargs): assert_all_valid_date_type(data) if name is None and hasattr(data, "name"): name = data.name diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 94f0cf4c2a5..c70fd53038b 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -1,3 +1,4 @@ +import pickle from datetime import timedelta from textwrap import dedent @@ -1289,3 +1290,12 @@ def test_infer_freq(freq, calendar): indx = xr.cftime_range("2000-01-01", periods=3, freq=freq, calendar=calendar) out = xr.infer_freq(indx) assert out == freq + + +@requires_cftime +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) +def test_pickle_cftimeindex(calendar): + + idx = xr.cftime_range("2000-01-01", periods=3, freq="D", calendar=calendar) + idx_pkl = pickle.loads(pickle.dumps(idx)) + assert (idx == idx_pkl).all() diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py index f70e1c7958e..a6ea792b5ac 100644 --- a/xarray/tests/test_distributed.py +++ b/xarray/tests/test_distributed.py @@ -1,5 +1,8 @@ """ isort:skip_file """ +import os import pickle +import numpy as np +import tempfile import pytest @@ -23,12 +26,15 @@ from . import ( assert_allclose, + assert_identical, has_h5netcdf, has_netCDF4, requires_rasterio, has_scipy, requires_zarr, requires_cfgrib, + requires_cftime, + requires_netCDF4, ) # this is to stop isort throwing errors. May have been easier to just use @@ -105,6 +111,23 @@ def test_dask_distributed_netcdf_roundtrip( assert_allclose(original, computed) +@requires_cftime +@requires_netCDF4 +def test_open_mfdataset_can_open_files_with_cftime_index(): + T = xr.cftime_range("20010101", "20010501", calendar="360_day") + Lon = np.arange(100) + data = np.random.random((T.size, Lon.size)) + da = xr.DataArray(data, coords={"time": T, "Lon": Lon}, name="test") + with cluster() as (s, [a, b]): + with Client(s["address"]): + with tempfile.TemporaryDirectory() as td: + data_file = os.path.join(td, "test.nc") + da.to_netcdf(data_file) + for parallel in (False, True): + with xr.open_mfdataset(data_file, parallel=parallel) as tf: + assert_identical(tf["test"], da) + + @pytest.mark.parametrize("engine,nc_format", ENGINES_AND_FORMATS) def test_dask_distributed_read_netcdf_integration_test( loop, tmp_netcdf_filename, engine, nc_format