pydata · malmans2 · Sep 28, 2023 · Sep 28, 2023 · Sep 28, 2023 · Sep 28, 2023
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -70,6 +70,13 @@ v2023.10.1 (19 Oct, 2023)
 This release updates our minimum numpy version in ``pyproject.toml`` to 1.22,
 consistent with our documentation below.
 
+Bug fixes
+~~~~~~~~~
+
+- Fix bug where :py:meth:`Dataset.to_zarr` would modify chunks of datetime-like variables (:issue:`8230`, :pull:`8253`).
+  By `Mattia Almansi <https://github.com/malmans2>`_.
+
+
 .. _whats-new.2023.10.0:
 
 v2023.10.0 (19 Oct, 2023)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
@@ -106,6 +106,34 @@ def __getitem__(self, key):
         # could possibly have a work-around for 0d data here
 
 
+def _squeeze_var_chunks(
+    var_chunks: tuple[tuple[int, ...], ...], name=None
+) -> tuple[int, ...]:
+    """
+    Normalize chunks to tuple of integers.
+
+    zarr chunks needs to be uniform for each array
+    http://zarr.readthedocs.io/en/latest/spec/v1.html#chunks
+    while dask chunks can be variable sized
+    http://dask.pydata.org/en/latest/array-design.html#chunks
+    """
+    if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks):
+        raise ValueError(
+            "Zarr requires uniform chunk sizes except for final chunk. "
+            f"Variable named {name!r} has incompatible dask chunks: {var_chunks!r}. "
+            "Consider rechunking using `chunk()`."
+        )
+    if any((chunks[0] < chunks[-1]) for chunks in var_chunks):
+        raise ValueError(
+            "Final chunk of Zarr array must be the same size or smaller "
+            f"than the first. Variable named {name!r} has incompatible Dask chunks {var_chunks!r}."
+            "Consider either rechunking using `chunk()` or instead deleting "
+            "or modifying `encoding['chunks']`."
+        )
+    # return the first chunk for each dimension
+    return tuple(chunk[0] for chunk in var_chunks)
+
+
 def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
     """
     Given encoding chunks (possibly None or []) and variable chunks
@@ -123,26 +151,8 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
 
     # if there are no chunks in encoding but there are dask chunks, we try to
     # use the same chunks in zarr
-    # However, zarr chunks needs to be uniform for each array
-    # http://zarr.readthedocs.io/en/latest/spec/v1.html#chunks
-    # while dask chunks can be variable sized
-    # http://dask.pydata.org/en/latest/array-design.html#chunks
     if var_chunks and not enc_chunks:
-        if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks):
-            raise ValueError(
-                "Zarr requires uniform chunk sizes except for final chunk. "
-                f"Variable named {name!r} has incompatible dask chunks: {var_chunks!r}. "
-                "Consider rechunking using `chunk()`."
-            )
-        if any((chunks[0] < chunks[-1]) for chunks in var_chunks):
-            raise ValueError(
-                "Final chunk of Zarr array must be the same size or smaller "
-                f"than the first. Variable named {name!r} has incompatible Dask chunks {var_chunks!r}."
-                "Consider either rechunking using `chunk()` or instead deleting "
-                "or modifying `encoding['chunks']`."
-            )
-        # return the first chunk for each dimension
-        return tuple(chunk[0] for chunk in var_chunks)
+        return _squeeze_var_chunks(var_chunks, name=name)
 
     # from here on, we are dealing with user-specified chunks in encoding
     # zarr allows chunks to be an integer, in which case it uses the same chunk
@@ -286,7 +296,8 @@ def extract_zarr_variable_encoding(
 
 
 # Function below is copied from conventions.encode_cf_variable.
-# The only change is to raise an error for object dtypes.
+# The only change is to raise an error for object dtypes, and
+# add chunks to the encoding when dask arrays are converted to np.
 def encode_zarr_variable(var, needs_copy=True, name=None):
     """
     Converts an Variable into an Variable which follows some
@@ -307,6 +318,7 @@ def encode_zarr_variable(var, needs_copy=True, name=None):
     out : Variable
         A variable which has been encoded as described above.
     """
+    original_chunks = var.chunks
 
     var = conventions.encode_cf_variable(var, name=name)
 
@@ -317,6 +329,9 @@ def encode_zarr_variable(var, needs_copy=True, name=None):
     var = coder.encode(var, name=name)
     var = coding.strings.ensure_fixed_length_bytes(var)
 
+    if original_chunks and not var.chunks and "chunks" not in var.encoding:
+        var = var.chunk(original_chunks)
+        # var.encoding["chunks"] = _squeeze_var_chunks(original_chunks, name=name)
     return var
 
 

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -2709,6 +2709,14 @@ def test_attributes(self, obj) -> None:
             with pytest.raises(TypeError, match=r"Invalid attribute in Dataset.attrs."):
                 ds.to_zarr(store_target, **self.version_kwargs)
 
+    @requires_dask
+    def test_chunked_datetime64(self) -> None:
+        original = create_test_data().astype("datetime64[ns]").chunk(1)
+        with self.roundtrip(original, open_kwargs={"chunks": {}}) as actual:
+            for name, actual_var in actual.variables.items():
+                assert original[name].chunks == actual_var.chunks
+            assert original.chunks == actual.chunks
+
     def test_vectorized_indexing_negative_step(self) -> None:
         if not has_dask:
             pytest.xfail(