From 35ed378f201ecf7c8cd80a7a6180583296781ed7 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 1 May 2022 10:57:36 -0600 Subject: [PATCH 1/4] Support dask arrays in datetime_to_numeric --- xarray/core/duck_array_ops.py | 14 +++++++-- xarray/tests/test_duck_array_ops.py | 49 +++++++++++++++++++++++------ 2 files changed, 51 insertions(+), 12 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index b85d0e1645e..b41170b2444 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -517,10 +517,20 @@ def pd_timedelta_to_float(value, datetime_unit): return np_timedelta64_to_float(value, datetime_unit) +def _timedelta_to_seconds(array): + return np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6 + + def py_timedelta_to_float(array, datetime_unit): """Convert a timedelta object to a float, possibly at a loss of resolution.""" - array = np.asarray(array) - array = np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6 + if not is_duck_array(array): + array = np.asarray(array) + if is_duck_dask_array(array): + array = array.map_blocks( + _timedelta_to_seconds, meta=np.array([], dtype=np.float64) + ) + else: + array = _timedelta_to_seconds(array) conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit) return conversion_factor * array diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index c329bc50c56..392f1b91914 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -675,39 +675,68 @@ def test_multiple_dims(dtype, dask, skipna, func): assert_allclose(actual, expected) -def test_datetime_to_numeric_datetime64(): +@pytest.mark.parametrize("dask", [True, False]) +def test_datetime_to_numeric_datetime64(dask): + if dask and not has_dask: + pytest.skip("requires dask") + times = pd.date_range("2000", periods=5, freq="7D").values - result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h") + if dask: + import dask.array + + times = dask.array.from_array(times, chunks=-1) + + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h") expected = 24 * np.arange(0, 35, 7) np.testing.assert_array_equal(result, expected) offset = times[1] - result = duck_array_ops.datetime_to_numeric(times, offset=offset, datetime_unit="h") + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric( + times, offset=offset, datetime_unit="h" + ) expected = 24 * np.arange(-7, 28, 7) np.testing.assert_array_equal(result, expected) dtype = np.float32 - result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=dtype) + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric( + times, datetime_unit="h", dtype=dtype + ) expected = 24 * np.arange(0, 35, 7).astype(dtype) np.testing.assert_array_equal(result, expected) @requires_cftime -def test_datetime_to_numeric_cftime(): +@pytest.mark.parametrize("dask", [True, False]) +def test_datetime_to_numeric_cftime(dask): + if dask and not has_dask: + pytest.skip("requires dask") + times = cftime_range("2000", periods=5, freq="7D", calendar="standard").values - result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=int) + if dask: + import dask.array + + times = dask.array.from_array(times, chunks=-1) + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=int) expected = 24 * np.arange(0, 35, 7) np.testing.assert_array_equal(result, expected) offset = times[1] - result = duck_array_ops.datetime_to_numeric( - times, offset=offset, datetime_unit="h", dtype=int - ) + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric( + times, offset=offset, datetime_unit="h", dtype=int + ) expected = 24 * np.arange(-7, 28, 7) np.testing.assert_array_equal(result, expected) dtype = np.float32 - result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=dtype) + with raise_if_dask_computes(): + result = duck_array_ops.datetime_to_numeric( + times, datetime_unit="h", dtype=dtype + ) expected = 24 * np.arange(0, 35, 7).astype(dtype) np.testing.assert_array_equal(result, expected) From dfe200d1502a62ca105b301053a4254c9286f20e Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 1 May 2022 11:04:03 -0600 Subject: [PATCH 2/4] Minor cleanup --- xarray/core/duck_array_ops.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index b41170b2444..455ff96d38c 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -523,8 +523,7 @@ def _timedelta_to_seconds(array): def py_timedelta_to_float(array, datetime_unit): """Convert a timedelta object to a float, possibly at a loss of resolution.""" - if not is_duck_array(array): - array = np.asarray(array) + array = asarray(array) if is_duck_dask_array(array): array = array.map_blocks( _timedelta_to_seconds, meta=np.array([], dtype=np.float64) From 398f1b660233402fdf7e631f0ee5f863cae564a2 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 20 May 2022 08:47:56 -0600 Subject: [PATCH 3/4] Backward compatibility dask --- xarray/core/duck_array_ops.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index a8d7476f1ab..033c238b959 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -431,7 +431,14 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): # Compute timedelta object. # For np.datetime64, this can silently yield garbage due to overflow. # One option is to enforce 1970-01-01 as the universal offset. - array = array - offset + + # This map_blocks call is for backwards compatibility. + # dask == 2021.04.1 does not support subtracting object arrays + # which is required for cftime + if is_duck_dask_array(array): + array = array.map_blocks(lambda a, b: a - b, offset) + else: + array = array - offset # Scalar is converted to 0d-array if not hasattr(array, "dtype"): From 193b69514604a2db2dabd99e8b9773cdba694b3c Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 27 May 2022 20:01:37 -0600 Subject: [PATCH 4/4] Better fix --- xarray/core/duck_array_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 033c238b959..e5a659a9ec9 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -435,8 +435,8 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): # This map_blocks call is for backwards compatibility. # dask == 2021.04.1 does not support subtracting object arrays # which is required for cftime - if is_duck_dask_array(array): - array = array.map_blocks(lambda a, b: a - b, offset) + if is_duck_dask_array(array) and np.issubdtype(array.dtype, np.object): + array = array.map_blocks(lambda a, b: a - b, offset, meta=array._meta) else: array = array - offset