From f4c2b3f07ead8e2d635611e61843f96534bd0878 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Wed, 11 Nov 2020 05:29:35 +0100
Subject: [PATCH 01/21] refactor apiv2.open_dataset

---
 xarray/backends/apiv2.py | 86 +++++++++++++++++++++++++---------------
 1 file changed, 54 insertions(+), 32 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 7e4605c42ce..ee967cbb007 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -10,37 +10,33 @@
 )
 
 
-def dataset_from_backend_dataset(
-    ds,
+def _get_mtime(filename_or_obj):
+    # if passed an actual file path, augment the token with
+    # the file modification time
+    if isinstance(filename_or_obj, str) and not is_remote_uri(filename_or_obj):
+        mtime = os.path.getmtime(filename_or_obj)
+    else:
+        mtime = None
+    return mtime
+
+
+def _chunk_ds(
+    backend_ds,
     filename_or_obj,
     engine,
     chunks,
-    cache,
     overwrite_encoded_chunks,
     **extra_tokens,
 ):
-    if not (isinstance(chunks, (int, dict)) or chunks is None):
-        if chunks != "auto":
-            raise ValueError(
-                "chunks must be an int, dict, 'auto', or None. "
-                "Instead found %s. " % chunks
-            )
-
-    _protect_dataset_variables_inplace(ds, cache)
-    if chunks is not None and engine != "zarr":
+    if engine != "zarr":
         from dask.base import tokenize
 
-        # if passed an actual file path, augment the token with
-        # the file modification time
-        if isinstance(filename_or_obj, str) and not is_remote_uri(filename_or_obj):
-            mtime = os.path.getmtime(filename_or_obj)
-        else:
-            mtime = None
+        mtime = _get_mtime(filename_or_obj)
         token = tokenize(filename_or_obj, mtime, engine, chunks, **extra_tokens)
         name_prefix = "open_dataset-%s" % token
-        ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
+        ds = backend_ds.chunk(chunks, name_prefix=name_prefix, token=token)
 
-    elif engine == "zarr":
+    else:
 
         if chunks == "auto":
             try:
@@ -48,28 +44,53 @@ def dataset_from_backend_dataset(
             except ImportError:
                 chunks = None
 
-        if chunks is None:
-            return ds
-
         if isinstance(chunks, int):
-            chunks = dict.fromkeys(ds.dims, chunks)
+            chunks = dict.fromkeys(backend_ds.dims, chunks)
 
         variables = {
             k: zarr.ZarrStore.maybe_chunk(k, v, chunks, overwrite_encoded_chunks)
-            for k, v in ds.variables.items()
+            for k, v in backend_ds.variables.items()
         }
-        ds2 = ds._replace(variables)
+        ds =  backend_ds._replace(variables)
+    ds._file_obj = backend_ds._file_obj
+    return ds
+
 
+def dataset_from_backend_dataset(
+    backend_ds,
+    filename_or_obj,
+    engine,
+    chunks,
+    cache,
+    overwrite_encoded_chunks,
+    **extra_tokens,
+):
+    if not (isinstance(chunks, (int, dict)) or chunks is None):
+        if chunks != "auto":
+            raise ValueError(
+                "chunks must be an int, dict, 'auto', or None. "
+                "Instead found %s. " % chunks
+            )
+
+    _protect_dataset_variables_inplace(backend_ds, cache)
+    if chunks is None:
+        ds = backend_ds
     else:
-        ds2 = ds
-    ds2._file_obj = ds._file_obj
+        ds = _chunk_ds(
+                backend_ds,
+                filename_or_obj,
+                engine,
+                chunks,
+                overwrite_encoded_chunks,
+                **extra_tokens,
+        )
 
     # Ensure source filename always stored in dataset object (GH issue #2550)
     if "source" not in ds.encoding:
         if isinstance(filename_or_obj, str):
-            ds2.encoding["source"] = filename_or_obj
+            ds.encoding["source"] = filename_or_obj
 
-    return ds2
+    return ds
 
 
 def resolve_decoders_kwargs(decode_cf, engine, **decoders):
@@ -236,12 +257,13 @@ def open_dataset(
     open_backend_dataset = _get_backend_cls(engine, engines=plugins.ENGINES)[
         "open_dataset"
     ]
+    filtered_kwargs = {k: v for k, v in kwargs.items() if v is not None}
     backend_ds = open_backend_dataset(
         filename_or_obj,
         drop_variables=drop_variables,
         **decoders,
         **backend_kwargs,
-        **{k: v for k, v in kwargs.items() if v is not None},
+        **filtered_kwargs
     )
     ds = dataset_from_backend_dataset(
         backend_ds,
@@ -253,7 +275,7 @@ def open_dataset(
         drop_variables=drop_variables,
         **decoders,
         **backend_kwargs,
-        **kwargs,
+        **filtered_kwargs,
     )
 
     return ds

From 35663927dbf692b102d1c9797a53657211bafcff Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Wed, 11 Nov 2020 06:14:32 +0100
Subject: [PATCH 02/21] move maybe_chunks and get_chunks in dataset.py

---
 xarray/backends/api.py   |  4 ++--
 xarray/backends/apiv2.py | 45 ++++++++++++++++++--------------------
 xarray/backends/zarr.py  | 47 ----------------------------------------
 xarray/core/dataset.py   | 34 +++++++++++++++++++++++++++++
 4 files changed, 57 insertions(+), 73 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 0b9b5046cb9..f1d58813958 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -26,7 +26,7 @@
     combine_by_coords,
 )
 from ..core.dataarray import DataArray
-from ..core.dataset import Dataset, _maybe_chunk
+from ..core.dataset import Dataset, _get_chunk, _maybe_chunk
 from ..core.utils import close_on_error, is_grib_path, is_remote_uri
 from .common import AbstractDataStore, ArrayWriter
 from .locks import _get_scheduler
@@ -536,7 +536,7 @@ def maybe_decode_store(store, chunks):
                 k: _maybe_chunk(
                     k,
                     v,
-                    store.get_chunk(k, v, chunks),
+                    _get_chunk(k, v, chunks),
                     overwrite_encoded_chunks=overwrite_encoded_chunks,
                 )
                 for k, v in ds.variables.items()
diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index ee967cbb007..3f43684b892 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -1,7 +1,8 @@
 import os
 
+from ..core.dataset import _get_chunk, _maybe_chunk
 from ..core.utils import is_remote_uri
-from . import plugins, zarr
+from . import plugins
 from .api import (
     _autodetect_engine,
     _get_backend_cls,
@@ -28,29 +29,25 @@ def _chunk_ds(
     overwrite_encoded_chunks,
     **extra_tokens,
 ):
-    if engine != "zarr":
-        from dask.base import tokenize
-
-        mtime = _get_mtime(filename_or_obj)
-        token = tokenize(filename_or_obj, mtime, engine, chunks, **extra_tokens)
-        name_prefix = "open_dataset-%s" % token
-        ds = backend_ds.chunk(chunks, name_prefix=name_prefix, token=token)
-
-    else:
-
-        if chunks == "auto":
-            try:
-                import dask.array  # noqa
-            except ImportError:
-                chunks = None
-
-        if isinstance(chunks, int):
-            chunks = dict.fromkeys(backend_ds.dims, chunks)
-
-        variables = {
-            k: zarr.ZarrStore.maybe_chunk(k, v, chunks, overwrite_encoded_chunks)
-            for k, v in backend_ds.variables.items()
-        }
+    from dask.base import tokenize
+
+    mtime = _get_mtime(filename_or_obj)
+    token = tokenize(filename_or_obj, mtime, engine, chunks, **extra_tokens)
+    name_prefix = "open_dataset-%s" % token
+    if isinstance(chunks, int):
+        chunks = dict.fromkeys(backend_ds.dims, chunks)
+
+    variables = {}
+    for name, var in backend_ds.variables.items():
+        var_chunks = _get_chunk(name, var, chunks)
+        variables[name] = _maybe_chunk(
+            name,
+            var,
+            var_chunks,
+            overwrite_encoded_chunks=overwrite_encoded_chunks,
+            name_prefix=name_prefix,
+            token=token,
+        )
         ds =  backend_ds._replace(variables)
     ds._file_obj = backend_ds._file_obj
     return ds
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 9827c345239..22cf2b1fa7e 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -368,53 +368,6 @@ def encode_variable(self, variable):
     def encode_attribute(self, a):
         return encode_zarr_attr_value(a)
 
-    @staticmethod
-    def get_chunk(name, var, chunks):
-        chunk_spec = dict(zip(var.dims, var.encoding.get("chunks")))
-
-        # Coordinate labels aren't chunked
-        if var.ndim == 1 and var.dims[0] == name:
-            return chunk_spec
-
-        if chunks == "auto":
-            return chunk_spec
-
-        for dim in var.dims:
-            if dim in chunks:
-                spec = chunks[dim]
-                if isinstance(spec, int):
-                    spec = (spec,)
-                if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
-                    if any(s % chunk_spec[dim] for s in spec):
-                        warnings.warn(
-                            "Specified Dask chunks %r would "
-                            "separate Zarr chunk shape %r for "
-                            "dimension %r. This significantly "
-                            "degrades performance. Consider "
-                            "rechunking after loading instead."
-                            % (chunks[dim], chunk_spec[dim], dim),
-                            stacklevel=2,
-                        )
-                chunk_spec[dim] = chunks[dim]
-        return chunk_spec
-
-    @classmethod
-    def maybe_chunk(cls, name, var, chunks, overwrite_encoded_chunks):
-        chunk_spec = cls.get_chunk(name, var, chunks)
-
-        if (var.ndim > 0) and (chunk_spec is not None):
-            from dask.base import tokenize
-
-            # does this cause any data to be read?
-            token2 = tokenize(name, var._data, chunks)
-            name2 = f"xarray-{name}-{token2}"
-            var = var.chunk(chunk_spec, name=name2, lock=None)
-            if overwrite_encoded_chunks and var.chunks is not None:
-                var.encoding["chunks"] = tuple(x[0] for x in var.chunks)
-            return var
-        else:
-            return var
-
     def store(
         self,
         variables,
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 04974c58113..86aa531e2a7 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -359,6 +359,40 @@ def _assert_empty(args: tuple, msg: str = "%s") -> None:
         raise ValueError(msg % args)
 
 
+def _check_chunks_compatibility(dim, chunks, chunk_spec):
+    spec = chunks[dim]
+    if isinstance(spec, int):
+        spec = (spec,)
+    if any(s % chunk_spec[dim] for s in spec):
+        warnings.warn(
+            "Specified Dask chunks %r would "
+            "separate on disks chunk shape %r for "
+            "dimension %r. This could "
+            "degrades performance. Consider "
+            "rechunking after loading instead." % (chunks[dim], chunk_spec[dim], dim),
+            stacklevel=2,
+        )
+
+
+def _get_chunk(name, var, chunks):
+    if chunks == "auto":
+        chunks = {}
+
+    preferred_chunks = dict(zip(var.dims, var.encoding.get("chunks", {})))
+    if var.ndim == 1 and var.dims[0] == name:
+        return preferred_chunks
+
+    output_chunks = {}
+    if chunks is not None:
+        for dim in preferred_chunks:
+            if dim in chunks:
+                _check_chunks_compatibility(dim, chunks, preferred_chunks)
+                output_chunks[dim] = chunks[dim]
+            else:
+                output_chunks[dim] = preferred_chunks[dim]
+    return output_chunks
+
+
 def _maybe_chunk(
     name,
     var,

From 7264648d1075cbfca162df9ac332f5bcc6f3d251 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Wed, 11 Nov 2020 09:30:11 +0100
Subject: [PATCH 03/21] - modify key for zarr autochunking from chunks='auto'
 to chunks={}

---
 xarray/backends/zarr.py       |  2 +-
 xarray/core/dataset.py        |  4 ++--
 xarray/tests/test_backends.py | 14 +++++++-------
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 22cf2b1fa7e..97fe2b9b1fd 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -509,7 +509,7 @@ def open_zarr(
     store,
     group=None,
     synchronizer=None,
-    chunks="auto",
+    chunks={},
     decode_cf=True,
     mask_and_scale=True,
     decode_times=True,
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 86aa531e2a7..cd127e1206d 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -376,7 +376,7 @@ def _check_chunks_compatibility(dim, chunks, chunk_spec):
 
 def _get_chunk(name, var, chunks):
     if chunks == "auto":
-        chunks = {}
+        return chunks
 
     preferred_chunks = dict(zip(var.dims, var.encoding.get("chunks", {})))
     if var.ndim == 1 and var.dims[0] == name:
@@ -396,7 +396,7 @@ def _get_chunk(name, var, chunks):
 def _maybe_chunk(
     name,
     var,
-    chunks=None,
+    chunks,
     token=None,
     lock=None,
     name_prefix="xarray-",
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 43bf2de245b..c4b7cea8926 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1567,7 +1567,7 @@ def roundtrip(
         if save_kwargs is None:
             save_kwargs = {}
         if open_kwargs is None:
-            open_kwargs = {"chunks": "auto"}
+            open_kwargs = {"chunks": {}}
         with self.create_zarr_target() as store_target:
             self.save(data, store_target, **save_kwargs)
             with self.open(store_target, **open_kwargs) as ds:
@@ -1604,7 +1604,7 @@ def test_auto_chunk(self):
                 # there should be no chunks
                 assert v.chunks is None
 
-        with self.roundtrip(original, open_kwargs={"chunks": "auto"}) as actual:
+        with self.roundtrip(original, open_kwargs={"chunks": {}}) as actual:
             for k, v in actual.variables.items():
                 # only index variables should be in memory
                 assert v._in_memory == (k in actual.dims)
@@ -1701,7 +1701,7 @@ def test_deprecate_auto_chunk(self):
     def test_write_uneven_dask_chunks(self):
         # regression for GH#2225
         original = create_test_data().chunk({"dim1": 3, "dim2": 4, "dim3": 3})
-        with self.roundtrip(original, open_kwargs={"chunks": "auto"}) as actual:
+        with self.roundtrip(original, open_kwargs={"chunks": {}}) as actual:
             for k, v in actual.data_vars.items():
                 print(k)
                 assert v.chunks == actual[k].chunks
@@ -1851,7 +1851,7 @@ def test_write_persistence_modes(self, group):
             ds_to_append.to_zarr(store_target, append_dim="time", group=group)
             original = xr.concat([ds, ds_to_append], dim="time")
             actual = xr.open_dataset(
-                store_target, group=group, chunks="auto", engine="zarr"
+                store_target, group=group, chunks={}, engine="zarr"
             )
             assert_identical(original, actual)
 
@@ -1941,11 +1941,11 @@ def test_check_encoding_is_consistent_after_append(self):
             encoding = {"da": {"compressor": compressor}}
             ds.to_zarr(store_target, mode="w", encoding=encoding)
             ds_to_append.to_zarr(store_target, append_dim="time")
-            actual_ds = xr.open_dataset(store_target, chunks="auto", engine="zarr")
+            actual_ds = xr.open_dataset(store_target, chunks={}, engine="zarr")
             actual_encoding = actual_ds["da"].encoding["compressor"]
             assert actual_encoding.get_config() == compressor.get_config()
             assert_identical(
-                xr.open_dataset(store_target, chunks="auto", engine="zarr").compute(),
+                xr.open_dataset(store_target, chunks={}, engine="zarr").compute(),
                 xr.concat([ds, ds_to_append], dim="time"),
             )
 
@@ -1961,7 +1961,7 @@ def test_append_with_new_variable(self):
             combined = xr.concat([ds, ds_to_append], dim="time")
             combined["new_var"] = ds_with_new_var["new_var"]
             assert_identical(
-                combined, xr.open_dataset(store_target, chunks="auto", engine="zarr")
+                combined, xr.open_dataset(store_target, chunks={}, engine="zarr")
             )
 
     @requires_dask

From 1d927140df5d47179e58b8c9bb69ce2304bab50b Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Fri, 13 Nov 2020 15:57:56 +0100
Subject: [PATCH 04/21] deprecated None for chunks value in ds.chunks and
 var.chunks

---
 xarray/core/dataarray.py |  4 ++--
 xarray/core/dataset.py   | 24 +++++++++++++++---------
 xarray/core/variable.py  | 15 ++++++++++-----
 3 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index b95f681bc79..f65acca7f56 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -1010,12 +1010,11 @@ def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
     def chunk(
         self,
         chunks: Union[
-            None,
             Number,
             Tuple[Number, ...],
             Tuple[Tuple[Number, ...], ...],
             Mapping[Hashable, Union[None, Number, Tuple[Number, ...]]],
-        ] = None,
+        ] = {},
         name_prefix: str = "xarray-",
         token: str = None,
         lock: bool = False,
@@ -1047,6 +1046,7 @@ def chunk(
         -------
         chunked : xarray.DataArray
         """
+
         if isinstance(chunks, (tuple, list)):
             chunks = dict(zip(self.dims, chunks))
 
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index cd127e1206d..5581df6fea1 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -1853,11 +1853,10 @@ def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
     def chunk(
         self,
         chunks: Union[
-            None,
             Number,
             str,
             Mapping[Hashable, Union[None, Number, str, Tuple[Number, ...]]],
-        ] = None,
+        ] = {},
         name_prefix: str = "xarray-",
         token: str = None,
         lock: bool = False,
@@ -1889,17 +1888,24 @@ def chunk(
         -------
         chunked : xarray.Dataset
         """
+        if chunks is None:
+            warnings.warn(
+                "None value for 'chunks' is deprecated. "
+                "It will raise an error in the future. Use instead '{}'",
+                category=FutureWarning,
+                stacklevel=2,
+            )
+            chunks = {}
 
         if isinstance(chunks, (Number, str)):
             chunks = dict.fromkeys(self.dims, chunks)
 
-        if chunks is not None:
-            bad_dims = chunks.keys() - self.dims.keys()
-            if bad_dims:
-                raise ValueError(
-                    "some chunks keys are not dimensions on this "
-                    "object: %s" % bad_dims
-                )
+        bad_dims = chunks.keys() - self.dims.keys()
+        if bad_dims:
+            raise ValueError(
+                "some chunks keys are not dimensions on this "
+                "object: %s" % bad_dims
+            )
 
         variables = {
             k: _maybe_chunk(k, v, chunks, token, lock, name_prefix)
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index a3876cb0077..5bf83b830e7 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -986,7 +986,7 @@ def chunks(self):
 
     _array_counter = itertools.count()
 
-    def chunk(self, chunks=None, name=None, lock=False):
+    def chunk(self, chunks={}, name=None, lock=False):
         """Coerce this array's data into a dask arrays with the given chunks.
 
         If this variable is a non-dask array, it will be converted to dask
@@ -1015,13 +1015,18 @@ def chunk(self, chunks=None, name=None, lock=False):
         """
         import dask
         import dask.array as da
+        if chunks is None:
+            warnings.warn(
+                "None value for 'chunks' is deprecated. "
+                "It will raise an error in the future. Use instead '{}'",
+                category=FutureWarning,
+                stacklevel=1
+            )
+            chunks = {}
 
         if utils.is_dict_like(chunks):
             chunks = {self.get_axis_num(dim): chunk for dim, chunk in chunks.items()}
 
-        if chunks is None:
-            chunks = self.chunks or self.shape
-
         data = self._data
         if is_duck_dask_array(data):
             data = data.rechunk(chunks)
@@ -2368,7 +2373,7 @@ def values(self, values):
             f"Please use DataArray.assign_coords, Dataset.assign_coords or Dataset.assign as appropriate."
         )
 
-    def chunk(self, chunks=None, name=None, lock=False):
+    def chunk(self, chunks={}, name=None, lock=False):
         # Dummy - do not chunk. This method is invoked e.g. by Dataset.chunk()
         return self.copy(deep=False)
 

From e4180ba73ef114a8cbdae6ec923c998966adc10b Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Wed, 18 Nov 2020 19:13:55 +0100
Subject: [PATCH 05/21]  bugfix in get_chunks (wrong chunking when on-disk
 chunks not available)

---
 xarray/core/dataset.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 5581df6fea1..24157592a50 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -360,17 +360,22 @@ def _assert_empty(args: tuple, msg: str = "%s") -> None:
 
 
 def _check_chunks_compatibility(dim, chunks, chunk_spec):
-    spec = chunks[dim]
-    if isinstance(spec, int):
-        spec = (spec,)
-    if any(s % chunk_spec[dim] for s in spec):
+    if dim not in chunks or (dim not in chunk_spec):
+        return
+
+    chunk_spec_dim = chunk_spec.get(dim)
+    chunks_dim = chunks.get(dim)
+
+    if isinstance(chunks_dim, int):
+        chunks_dim = (chunks_dim,)
+    if any(s % chunk_spec_dim for s in chunks_dim):
         warnings.warn(
             "Specified Dask chunks %r would "
             "separate on disks chunk shape %r for "
             "dimension %r. This could "
             "degrades performance. Consider "
-            "rechunking after loading instead." % (chunks[dim], chunk_spec[dim], dim),
-            stacklevel=2,
+            "rechunking after loading instead." % (chunks_dim, chunk_spec_dim, dim),
+            stacklevel=3,
         )
 
 
@@ -384,12 +389,11 @@ def _get_chunk(name, var, chunks):
 
     output_chunks = {}
     if chunks is not None:
-        for dim in preferred_chunks:
-            if dim in chunks:
-                _check_chunks_compatibility(dim, chunks, preferred_chunks)
-                output_chunks[dim] = chunks[dim]
-            else:
-                output_chunks[dim] = preferred_chunks[dim]
+        for dim in var.dims:
+            _check_chunks_compatibility(dim, chunks, preferred_chunks)
+            preferred_chunks_dim = preferred_chunks.get(dim, None)
+            output_chunks[dim] = chunks.get(dim, preferred_chunks_dim)
+
     return output_chunks
 
 

From f4e485369504df70ac677391025bfc705c4b9c71 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Wed, 18 Nov 2020 20:49:12 +0100
Subject: [PATCH 06/21]  bugfix  - it uses backend preferred_chunking for
 inspiration in "auto" dask chunking (to align with ds.chunk that use the
 previuos chunking)  - it uses check on IndexVariables to skip warnings
 instead of check on variable name and number of dimension

---
 xarray/backends/api.py   |  2 +-
 xarray/backends/apiv2.py |  4 +--
 xarray/core/dataset.py   | 66 ++++++++++++++++++++++++----------------
 3 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index f1d58813958..5f9fc248e84 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -536,7 +536,7 @@ def maybe_decode_store(store, chunks):
                 k: _maybe_chunk(
                     k,
                     v,
-                    _get_chunk(k, v, chunks),
+                    _get_chunk(v, chunks),
                     overwrite_encoded_chunks=overwrite_encoded_chunks,
                 )
                 for k, v in ds.variables.items()
diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 3f43684b892..17dc648496e 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -34,12 +34,12 @@ def _chunk_ds(
     mtime = _get_mtime(filename_or_obj)
     token = tokenize(filename_or_obj, mtime, engine, chunks, **extra_tokens)
     name_prefix = "open_dataset-%s" % token
-    if isinstance(chunks, int):
+    if isinstance(chunks, int) or (chunks == 'auto'):
         chunks = dict.fromkeys(backend_ds.dims, chunks)
 
     variables = {}
     for name, var in backend_ds.variables.items():
-        var_chunks = _get_chunk(name, var, chunks)
+        var_chunks = _get_chunk(var, chunks)
         variables[name] = _maybe_chunk(
             name,
             var,
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 24157592a50..097df45e0d1 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -359,40 +359,52 @@ def _assert_empty(args: tuple, msg: str = "%s") -> None:
         raise ValueError(msg % args)
 
 
-def _check_chunks_compatibility(dim, chunks, chunk_spec):
-    if dim not in chunks or (dim not in chunk_spec):
-        return
+def _check_chunks_compatibility(var, chunks, chunk_spec):
+    for dim in var.dims:
+        if dim not in chunks or (dim not in chunk_spec):
+            return
 
-    chunk_spec_dim = chunk_spec.get(dim)
-    chunks_dim = chunks.get(dim)
+        chunk_spec_dim = chunk_spec.get(dim)
+        chunks_dim = chunks.get(dim)
 
-    if isinstance(chunks_dim, int):
-        chunks_dim = (chunks_dim,)
-    if any(s % chunk_spec_dim for s in chunks_dim):
-        warnings.warn(
-            "Specified Dask chunks %r would "
-            "separate on disks chunk shape %r for "
-            "dimension %r. This could "
-            "degrades performance. Consider "
-            "rechunking after loading instead." % (chunks_dim, chunk_spec_dim, dim),
-            stacklevel=3,
-        )
+        if isinstance(chunks_dim, int):
+            chunks_dim = (chunks_dim,)
+        if any(s % chunk_spec_dim for s in chunks_dim):
+            warnings.warn(
+                "Specified Dask chunks %r would "
+                "separate on disks chunk shape %r for "
+                "dimension %r. This could "
+                "degrades performance. Consider "
+                "rechunking after loading instead." % (chunks_dim, chunk_spec_dim, dim),
+                stacklevel=3,
+            )
 
 
-def _get_chunk(name, var, chunks):
-    if chunks == "auto":
-        return chunks
+def _get_chunk(var, chunks):
+    # chunks need to be explicity computed to take correctly into accout
+    # backend preferred chunking
+    import dask.array as da
 
+    preferred_chunks_list = var.encoding.get("chunks", {})
     preferred_chunks = dict(zip(var.dims, var.encoding.get("chunks", {})))
-    if var.ndim == 1 and var.dims[0] == name:
-        return preferred_chunks
+    if isinstance(var, IndexVariable):
+        return {}
+
+    chunks_list = []
+    for dim in var.dims:
+        chunks_dim = chunks.get(dim, None)
+        preferred_chunks_dim = preferred_chunks.get(dim, None)
+        chunks_list.append(chunks_dim or preferred_chunks_dim)
+
+    output_chunks_list = da.core.normalize_chunks(
+        chunks_list,
+        shape=var.shape,
+        dtype=var.dtype,
+        previous_chunks=preferred_chunks_list
+    )
 
-    output_chunks = {}
-    if chunks is not None:
-        for dim in var.dims:
-            _check_chunks_compatibility(dim, chunks, preferred_chunks)
-            preferred_chunks_dim = preferred_chunks.get(dim, None)
-            output_chunks[dim] = chunks.get(dim, preferred_chunks_dim)
+    output_chunks = dict(zip(var.dims, output_chunks_list))
+    _check_chunks_compatibility(var, output_chunks, preferred_chunks)
 
     return output_chunks
 

From 60d692396cc52c5e16b4204b358f70dad7a642ec Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Wed, 18 Nov 2020 23:45:55 +0100
Subject: [PATCH 07/21] add open_dataset zarr chunking tests

---
 xarray/core/dataset.py        |  2 +-
 xarray/tests/test_backends.py | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 097df45e0d1..313d61f13f2 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -376,7 +376,7 @@ def _check_chunks_compatibility(var, chunks, chunk_spec):
                 "dimension %r. This could "
                 "degrades performance. Consider "
                 "rechunking after loading instead." % (chunks_dim, chunk_spec_dim, dim),
-                stacklevel=3,
+                stacklevel=2,
             )
 
 
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index c4b7cea8926..469e62cbdd2 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -4803,3 +4803,33 @@ def test_load_single_value_h5netcdf(tmp_path):
     ds.to_netcdf(tmp_path / "test.nc")
     with xr.open_dataset(tmp_path / "test.nc", engine="h5netcdf") as ds2:
         ds2["test"][0].load()
+
+
+@requires_zarr
+@requires_dask
+@pytest.mark.parametrize("chunks", ["auto", -1, {}])
+def test_open_dataset_chunking_zarr(chunks, tmp_path):
+    encoded_chunks = 100
+    ds = xr.Dataset(
+        {
+            'test': xr.DataArray(
+                np.ones((500, 500), dtype="float64"),
+                dims=("x", "y"),
+            )
+        }
+    )
+    ds['test'].encoding['chunks'] = encoded_chunks
+    ds.to_zarr(tmp_path / "test.zarr")
+
+    ds = ds.chunk(encoded_chunks)
+    dask_arr = ds['test'].data
+
+    with dask.config.set({"array.chunk-size": "1MiB"}):
+        dask_arr_chunked = dask_arr.rechunk(chunks)
+        expected = dask_arr_chunked.chunks
+
+        dataset_chunked = xr.open_dataset(tmp_path / "test.zarr", engine="zarr", chunks=chunks)
+        actual = dataset_chunked['test'].chunks
+
+        assert actual == expected
+

From 74a52897a5c5a15cffa266c3dc5fa2ef034dd128 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 19 Nov 2020 10:17:25 +0100
Subject: [PATCH 08/21] add few tests in test_open_dataset_chunking_zarr

---
 xarray/tests/test_backends.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 469e62cbdd2..88bc5aa403b 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -4807,7 +4807,10 @@ def test_load_single_value_h5netcdf(tmp_path):
 
 @requires_zarr
 @requires_dask
-@pytest.mark.parametrize("chunks", ["auto", -1, {}])
+@pytest.mark.parametrize(
+    "chunks",
+    ["auto", -1, {}, {"x": "auto"},  {"x": -1}, {"x": "auto", "y": -1}]
+)
 def test_open_dataset_chunking_zarr(chunks, tmp_path):
     encoded_chunks = 100
     ds = xr.Dataset(
@@ -4825,7 +4828,13 @@ def test_open_dataset_chunking_zarr(chunks, tmp_path):
     dask_arr = ds['test'].data
 
     with dask.config.set({"array.chunk-size": "1MiB"}):
-        dask_arr_chunked = dask_arr.rechunk(chunks)
+        xr2dask_key = {'x': 0, 'y': 1}
+        if isinstance(chunks, dict):
+            dask_chunks = {xr2dask_key[k]: chunks[k] for k in chunks}
+        else:
+            dask_chunks = chunks
+
+        dask_arr_chunked = dask_arr.rechunk(dask_chunks)
         expected = dask_arr_chunked.chunks
 
         dataset_chunked = xr.open_dataset(tmp_path / "test.zarr", engine="zarr", chunks=chunks)

From df4bf3b5aeec16036f630c003a4a64692e6ab21b Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 19 Nov 2020 10:57:33 +0100
Subject: [PATCH 09/21] black isort

---
 xarray/backends/apiv2.py      | 18 +++++++++---------
 xarray/backends/zarr.py       |  2 --
 xarray/core/dataset.py        |  5 ++---
 xarray/core/variable.py       |  1 +
 xarray/tests/test_backends.py | 18 +++++++++---------
 5 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 17dc648496e..586e151f02c 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -34,7 +34,7 @@ def _chunk_ds(
     mtime = _get_mtime(filename_or_obj)
     token = tokenize(filename_or_obj, mtime, engine, chunks, **extra_tokens)
     name_prefix = "open_dataset-%s" % token
-    if isinstance(chunks, int) or (chunks == 'auto'):
+    if isinstance(chunks, int) or (chunks == "auto"):
         chunks = dict.fromkeys(backend_ds.dims, chunks)
 
     variables = {}
@@ -48,7 +48,7 @@ def _chunk_ds(
             name_prefix=name_prefix,
             token=token,
         )
-        ds =  backend_ds._replace(variables)
+        ds = backend_ds._replace(variables)
     ds._file_obj = backend_ds._file_obj
     return ds
 
@@ -74,12 +74,12 @@ def dataset_from_backend_dataset(
         ds = backend_ds
     else:
         ds = _chunk_ds(
-                backend_ds,
-                filename_or_obj,
-                engine,
-                chunks,
-                overwrite_encoded_chunks,
-                **extra_tokens,
+            backend_ds,
+            filename_or_obj,
+            engine,
+            chunks,
+            overwrite_encoded_chunks,
+            **extra_tokens,
         )
 
     # Ensure source filename always stored in dataset object (GH issue #2550)
@@ -260,7 +260,7 @@ def open_dataset(
         drop_variables=drop_variables,
         **decoders,
         **backend_kwargs,
-        **filtered_kwargs
+        **filtered_kwargs,
     )
     ds = dataset_from_backend_dataset(
         backend_ds,
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 97fe2b9b1fd..5cfe2d1b0bf 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -1,5 +1,3 @@
-import warnings
-
 import numpy as np
 
 from .. import coding, conventions
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 313d61f13f2..90b94436b40 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -400,7 +400,7 @@ def _get_chunk(var, chunks):
         chunks_list,
         shape=var.shape,
         dtype=var.dtype,
-        previous_chunks=preferred_chunks_list
+        previous_chunks=preferred_chunks_list,
     )
 
     output_chunks = dict(zip(var.dims, output_chunks_list))
@@ -1919,8 +1919,7 @@ def chunk(
         bad_dims = chunks.keys() - self.dims.keys()
         if bad_dims:
             raise ValueError(
-                "some chunks keys are not dimensions on this "
-                "object: %s" % bad_dims
+                "some chunks keys are not dimensions on this " "object: %s" % bad_dims
             )
 
         variables = {
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index 5bf83b830e7..ee993963843 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -1015,6 +1015,7 @@ def chunk(self, chunks={}, name=None, lock=False):
         """
         import dask
         import dask.array as da
+
         if chunks is None:
             warnings.warn(
                 "None value for 'chunks' is deprecated. "
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 88bc5aa403b..61b94c0d421 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -4808,27 +4808,26 @@ def test_load_single_value_h5netcdf(tmp_path):
 @requires_zarr
 @requires_dask
 @pytest.mark.parametrize(
-    "chunks",
-    ["auto", -1, {}, {"x": "auto"},  {"x": -1}, {"x": "auto", "y": -1}]
+    "chunks", ["auto", -1, {}, {"x": "auto"}, {"x": -1}, {"x": "auto", "y": -1}]
 )
 def test_open_dataset_chunking_zarr(chunks, tmp_path):
     encoded_chunks = 100
     ds = xr.Dataset(
         {
-            'test': xr.DataArray(
+            "test": xr.DataArray(
                 np.ones((500, 500), dtype="float64"),
                 dims=("x", "y"),
             )
         }
     )
-    ds['test'].encoding['chunks'] = encoded_chunks
+    ds["test"].encoding["chunks"] = encoded_chunks
     ds.to_zarr(tmp_path / "test.zarr")
 
     ds = ds.chunk(encoded_chunks)
-    dask_arr = ds['test'].data
+    dask_arr = ds["test"].data
 
     with dask.config.set({"array.chunk-size": "1MiB"}):
-        xr2dask_key = {'x': 0, 'y': 1}
+        xr2dask_key = {"x": 0, "y": 1}
         if isinstance(chunks, dict):
             dask_chunks = {xr2dask_key[k]: chunks[k] for k in chunks}
         else:
@@ -4837,8 +4836,9 @@ def test_open_dataset_chunking_zarr(chunks, tmp_path):
         dask_arr_chunked = dask_arr.rechunk(dask_chunks)
         expected = dask_arr_chunked.chunks
 
-        dataset_chunked = xr.open_dataset(tmp_path / "test.zarr", engine="zarr", chunks=chunks)
-        actual = dataset_chunked['test'].chunks
+        dataset_chunked = xr.open_dataset(
+            tmp_path / "test.zarr", engine="zarr", chunks=chunks
+        )
+        actual = dataset_chunked["test"].chunks
 
         assert actual == expected
-

From 736ad78827ea4296bf25061cc69ab32dc774cdb0 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 19 Nov 2020 11:25:32 +0100
Subject: [PATCH 10/21] fix stacklevel in warnings

---
 xarray/core/dataarray.py | 1 -
 xarray/core/dataset.py   | 1 -
 xarray/core/variable.py  | 1 -
 3 files changed, 3 deletions(-)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index f65acca7f56..4fcef4beb7f 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -1046,7 +1046,6 @@ def chunk(
         -------
         chunked : xarray.DataArray
         """
-
         if isinstance(chunks, (tuple, list)):
             chunks = dict(zip(self.dims, chunks))
 
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 90b94436b40..4d027378794 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -1909,7 +1909,6 @@ def chunk(
                 "None value for 'chunks' is deprecated. "
                 "It will raise an error in the future. Use instead '{}'",
                 category=FutureWarning,
-                stacklevel=2,
             )
             chunks = {}
 
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index ee993963843..a604a81cfa1 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -1021,7 +1021,6 @@ def chunk(self, chunks={}, name=None, lock=False):
                 "None value for 'chunks' is deprecated. "
                 "It will raise an error in the future. Use instead '{}'",
                 category=FutureWarning,
-                stacklevel=1
             )
             chunks = {}
 

From b3ba50c40d805acadec74a82bc6c618bcae264c1 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 19 Nov 2020 15:07:21 +0100
Subject: [PATCH 11/21] fix: move piece of chunks computing from open_dataset
 in get_chunks

---
 xarray/backends/apiv2.py | 2 --
 xarray/core/dataset.py   | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py
index 586e151f02c..3f162d899b4 100644
--- a/xarray/backends/apiv2.py
+++ b/xarray/backends/apiv2.py
@@ -34,8 +34,6 @@ def _chunk_ds(
     mtime = _get_mtime(filename_or_obj)
     token = tokenize(filename_or_obj, mtime, engine, chunks, **extra_tokens)
     name_prefix = "open_dataset-%s" % token
-    if isinstance(chunks, int) or (chunks == "auto"):
-        chunks = dict.fromkeys(backend_ds.dims, chunks)
 
     variables = {}
     for name, var in backend_ds.variables.items():
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 4d027378794..0f4102e5d89 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -384,6 +384,8 @@ def _get_chunk(var, chunks):
     # chunks need to be explicity computed to take correctly into accout
     # backend preferred chunking
     import dask.array as da
+    if isinstance(chunks, int) or (chunks == "auto"):
+        chunks = dict.fromkeys(var.dims, chunks)
 
     preferred_chunks_list = var.encoding.get("chunks", {})
     preferred_chunks = dict(zip(var.dims, var.encoding.get("chunks", {})))

From 97a7bc135d27bcdc8fa01bb1e951153fc2f94259 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 19 Nov 2020 15:08:10 +0100
Subject: [PATCH 12/21] update tests chunking in test_backends.py

---
 xarray/tests/test_backends.py | 49 +++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 14 deletions(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 61b94c0d421..c8f8da7163c 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -4812,10 +4812,11 @@ def test_load_single_value_h5netcdf(tmp_path):
 )
 def test_open_dataset_chunking_zarr(chunks, tmp_path):
     encoded_chunks = 100
+    dask_arr = da.from_array(np.ones((500, 500), dtype="float64"), chunks=encoded_chunks)
     ds = xr.Dataset(
         {
             "test": xr.DataArray(
-                np.ones((500, 500), dtype="float64"),
+                dask_arr,
                 dims=("x", "y"),
             )
         }
@@ -4823,22 +4824,42 @@ def test_open_dataset_chunking_zarr(chunks, tmp_path):
     ds["test"].encoding["chunks"] = encoded_chunks
     ds.to_zarr(tmp_path / "test.zarr")
 
-    ds = ds.chunk(encoded_chunks)
-    dask_arr = ds["test"].data
-
     with dask.config.set({"array.chunk-size": "1MiB"}):
-        xr2dask_key = {"x": 0, "y": 1}
-        if isinstance(chunks, dict):
-            dask_chunks = {xr2dask_key[k]: chunks[k] for k in chunks}
-        else:
-            dask_chunks = chunks
+        expected = ds.chunk(chunks)
+        actual = xr.open_dataset(
+            tmp_path / "test.zarr", engine="zarr", chunks=chunks
+        )
+        assert actual == expected
+
 
-        dask_arr_chunked = dask_arr.rechunk(dask_chunks)
-        expected = dask_arr_chunked.chunks
+@requires_zarr
+@requires_dask
+@pytest.mark.parametrize(
+    "chunks", ["auto", -1, {}, {"x": "auto"}, {"x": -1}, {"x": "auto", "y": -1}]
+)
+def test_chunking_consintency(chunks, tmp_path):
+    encoded_chunks = {}
+    dask_arr = da.from_array(np.ones((500, 500), dtype="float64"), chunks=encoded_chunks)
+    ds = xr.Dataset(
+        {
+            "test": xr.DataArray(
+                dask_arr,
+                dims=("x", "y"),
+            )
+        }
+    )
+    ds["test"].encoding["chunks"] = encoded_chunks
+    ds.to_zarr(tmp_path / "test.zarr")
+    ds.to_netcdf(tmp_path / "test.nc")
 
-        dataset_chunked = xr.open_dataset(
+    with dask.config.set({"array.chunk-size": "1MiB"}):
+        expected = ds.chunk(chunks)
+        actual = xr.open_dataset(
             tmp_path / "test.zarr", engine="zarr", chunks=chunks
         )
-        actual = dataset_chunked["test"].chunks
+        xr.testing.assert_chunks_equal(actual, expected)
 
-        assert actual == expected
+        actual = xr.open_dataset(
+            tmp_path / "test.nc", chunks=chunks
+        )
+        xr.testing.assert_chunks_equal(actual, expected)

From 48a7131b8631ece5857d051ae30c708f7c6681fa Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Thu, 19 Nov 2020 15:29:54 +0100
Subject: [PATCH 13/21] black

---
 xarray/core/dataset.py        |  1 +
 xarray/tests/test_backends.py | 20 +++++++++-----------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 0f4102e5d89..39ee5325247 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -384,6 +384,7 @@ def _get_chunk(var, chunks):
     # chunks need to be explicity computed to take correctly into accout
     # backend preferred chunking
     import dask.array as da
+
     if isinstance(chunks, int) or (chunks == "auto"):
         chunks = dict.fromkeys(var.dims, chunks)
 
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index c8f8da7163c..31df99b79c6 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -4812,7 +4812,9 @@ def test_load_single_value_h5netcdf(tmp_path):
 )
 def test_open_dataset_chunking_zarr(chunks, tmp_path):
     encoded_chunks = 100
-    dask_arr = da.from_array(np.ones((500, 500), dtype="float64"), chunks=encoded_chunks)
+    dask_arr = da.from_array(
+        np.ones((500, 500), dtype="float64"), chunks=encoded_chunks
+    )
     ds = xr.Dataset(
         {
             "test": xr.DataArray(
@@ -4826,9 +4828,7 @@ def test_open_dataset_chunking_zarr(chunks, tmp_path):
 
     with dask.config.set({"array.chunk-size": "1MiB"}):
         expected = ds.chunk(chunks)
-        actual = xr.open_dataset(
-            tmp_path / "test.zarr", engine="zarr", chunks=chunks
-        )
+        actual = xr.open_dataset(tmp_path / "test.zarr", engine="zarr", chunks=chunks)
         assert actual == expected
 
 
@@ -4839,7 +4839,9 @@ def test_open_dataset_chunking_zarr(chunks, tmp_path):
 )
 def test_chunking_consintency(chunks, tmp_path):
     encoded_chunks = {}
-    dask_arr = da.from_array(np.ones((500, 500), dtype="float64"), chunks=encoded_chunks)
+    dask_arr = da.from_array(
+        np.ones((500, 500), dtype="float64"), chunks=encoded_chunks
+    )
     ds = xr.Dataset(
         {
             "test": xr.DataArray(
@@ -4854,12 +4856,8 @@ def test_chunking_consintency(chunks, tmp_path):
 
     with dask.config.set({"array.chunk-size": "1MiB"}):
         expected = ds.chunk(chunks)
-        actual = xr.open_dataset(
-            tmp_path / "test.zarr", engine="zarr", chunks=chunks
-        )
+        actual = xr.open_dataset(tmp_path / "test.zarr", engine="zarr", chunks=chunks)
         xr.testing.assert_chunks_equal(actual, expected)
 
-        actual = xr.open_dataset(
-            tmp_path / "test.nc", chunks=chunks
-        )
+        actual = xr.open_dataset(tmp_path / "test.nc", chunks=chunks)
         xr.testing.assert_chunks_equal(actual, expected)

From 300b4e7ba4d26b243226917d1ebccaab621b1c00 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Wed, 25 Nov 2020 14:43:44 +0100
Subject: [PATCH 14/21] open_zarr behaviour is now unchanged

---
 xarray/backends/zarr.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 5cfe2d1b0bf..c4c4d467a0b 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -507,7 +507,7 @@ def open_zarr(
     store,
     group=None,
     synchronizer=None,
-    chunks={},
+    chunks="auto",
     decode_cf=True,
     mask_and_scale=True,
     decode_times=True,
@@ -611,6 +611,13 @@ def open_zarr(
     """
     from .api import open_dataset
 
+    if chunks == "auto":
+        try:
+            import dask.array  # noqa
+            chunks = {}
+        except ImportError:
+            chunks = None
+
     if kwargs:
         raise TypeError(
             "open_zarr() got unexpected keyword arguments " + ",".join(kwargs.keys())

From 1bba7cf9e35b845bde01a430f2601b83eac0134f Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Wed, 25 Nov 2020 14:45:24 +0100
Subject: [PATCH 15/21] fix zarr test: remove "auto" replaced with None or {}

---
 xarray/tests/test_backends.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 31df99b79c6..6049272a266 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1567,7 +1567,7 @@ def roundtrip(
         if save_kwargs is None:
             save_kwargs = {}
         if open_kwargs is None:
-            open_kwargs = {"chunks": {}}
+            open_kwargs = {}
         with self.create_zarr_target() as store_target:
             self.save(data, store_target, **save_kwargs)
             with self.open(store_target, **open_kwargs) as ds:
@@ -1851,7 +1851,7 @@ def test_write_persistence_modes(self, group):
             ds_to_append.to_zarr(store_target, append_dim="time", group=group)
             original = xr.concat([ds, ds_to_append], dim="time")
             actual = xr.open_dataset(
-                store_target, group=group, chunks={}, engine="zarr"
+                store_target, group=group,  engine="zarr"
             )
             assert_identical(original, actual)
 
@@ -1941,11 +1941,11 @@ def test_check_encoding_is_consistent_after_append(self):
             encoding = {"da": {"compressor": compressor}}
             ds.to_zarr(store_target, mode="w", encoding=encoding)
             ds_to_append.to_zarr(store_target, append_dim="time")
-            actual_ds = xr.open_dataset(store_target, chunks={}, engine="zarr")
+            actual_ds = xr.open_dataset(store_target, engine="zarr")
             actual_encoding = actual_ds["da"].encoding["compressor"]
             assert actual_encoding.get_config() == compressor.get_config()
             assert_identical(
-                xr.open_dataset(store_target, chunks={}, engine="zarr").compute(),
+                xr.open_dataset(store_target, engine="zarr").compute(),
                 xr.concat([ds, ds_to_append], dim="time"),
             )
 
@@ -1961,7 +1961,7 @@ def test_append_with_new_variable(self):
             combined = xr.concat([ds, ds_to_append], dim="time")
             combined["new_var"] = ds_with_new_var["new_var"]
             assert_identical(
-                combined, xr.open_dataset(store_target, chunks={}, engine="zarr")
+                combined, xr.open_dataset(store_target, engine="zarr")
             )
 
     @requires_dask

From df6fa7f9248312a5113a4809caccfb3037cf0d3b Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Wed, 25 Nov 2020 14:46:50 +0100
Subject: [PATCH 16/21] black

---
 xarray/backends/zarr.py       | 1 +
 xarray/tests/test_backends.py | 8 ++------
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index c4c4d467a0b..abf0c7372eb 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -614,6 +614,7 @@ def open_zarr(
     if chunks == "auto":
         try:
             import dask.array  # noqa
+
             chunks = {}
         except ImportError:
             chunks = None
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 6049272a266..a556cea67f3 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1850,9 +1850,7 @@ def test_write_persistence_modes(self, group):
             ds.to_zarr(store_target, mode="w", group=group)
             ds_to_append.to_zarr(store_target, append_dim="time", group=group)
             original = xr.concat([ds, ds_to_append], dim="time")
-            actual = xr.open_dataset(
-                store_target, group=group,  engine="zarr"
-            )
+            actual = xr.open_dataset(store_target, group=group, engine="zarr")
             assert_identical(original, actual)
 
     def test_compressor_encoding(self):
@@ -1960,9 +1958,7 @@ def test_append_with_new_variable(self):
             ds_with_new_var.to_zarr(store_target, mode="a")
             combined = xr.concat([ds, ds_to_append], dim="time")
             combined["new_var"] = ds_with_new_var["new_var"]
-            assert_identical(
-                combined, xr.open_dataset(store_target, engine="zarr")
-            )
+            assert_identical(combined, xr.open_dataset(store_target, engine="zarr"))
 
     @requires_dask
     def test_to_zarr_compute_false_roundtrip(self):

From 44d38d603f2dd9873ed5a0719af5f0488d3393a6 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Mon, 30 Nov 2020 11:39:46 +0100
Subject: [PATCH 17/21] fix test_vectorized_indexing in test_backends.py

---
 xarray/tests/test_backends.py | 51 ++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index a556cea67f3..1d10e0720d8 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -298,6 +298,18 @@ def open(self, path, **kwargs):
         with open_dataset(path, engine=self.engine, **kwargs) as ds:
             yield ds
 
+    def check_multiple_indexing(self, indexers, in_memory):
+        # make sure a sequence of lazy indexings certainly works.
+        with self.roundtrip(in_memory) as on_disk:
+            actual = on_disk["var3"]
+            expected = in_memory["var3"]
+            for ind in indexers:
+                actual = actual.isel(**ind)
+                expected = expected.isel(**ind)
+                # make sure the array is not yet loaded into memory
+                assert not actual.variable._in_memory
+            assert_identical(expected, actual.load())
+
     def test_zero_dimensional_variable(self):
         expected = create_test_data()
         expected["float_var"] = ([], 1.0e9, {"units": "units of awesome"})
@@ -608,10 +620,6 @@ def test_orthogonal_indexing(self):
             actual = on_disk.isel(**indexers)
             assert_identical(expected, actual)
 
-    @pytest.mark.xfail(
-        not has_dask,
-        reason="the code for indexing without dask handles negative steps in slices incorrectly",
-    )
     def test_vectorized_indexing(self):
         in_memory = create_test_data()
         with self.roundtrip(in_memory) as on_disk:
@@ -629,18 +637,6 @@ def test_vectorized_indexing(self):
             actual = on_disk.isel(**indexers)
             assert_identical(expected, actual)
 
-        def multiple_indexing(indexers):
-            # make sure a sequence of lazy indexings certainly works.
-            with self.roundtrip(in_memory) as on_disk:
-                actual = on_disk["var3"]
-                expected = in_memory["var3"]
-                for ind in indexers:
-                    actual = actual.isel(**ind)
-                    expected = expected.isel(**ind)
-                    # make sure the array is not yet loaded into memory
-                    assert not actual.variable._in_memory
-                assert_identical(expected, actual.load())
-
         # two-staged vectorized-indexing
         indexers = [
             {
@@ -649,7 +645,7 @@ def multiple_indexing(indexers):
             },
             {"a": DataArray([0, 1], dims=["c"]), "b": DataArray([0, 1], dims=["c"])},
         ]
-        multiple_indexing(indexers)
+        self.check_multiple_indexing(indexers, in_memory)
 
         # vectorized-slice mixed
         indexers = [
@@ -658,7 +654,7 @@ def multiple_indexing(indexers):
                 "dim3": slice(None, 10),
             }
         ]
-        multiple_indexing(indexers)
+        self.check_multiple_indexing(indexers, in_memory)
 
         # vectorized-integer mixed
         indexers = [
@@ -666,7 +662,7 @@ def multiple_indexing(indexers):
             {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])},
             {"a": slice(None, None, 2)},
         ]
-        multiple_indexing(indexers)
+        self.check_multiple_indexing(indexers, in_memory)
 
         # vectorized-integer mixed
         indexers = [
@@ -674,7 +670,14 @@ def multiple_indexing(indexers):
             {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])},
             {"a": 1, "b": 0},
         ]
-        multiple_indexing(indexers)
+        self.check_multiple_indexing(indexers, in_memory)
+
+    @pytest.mark.xfail(
+        not has_dask,
+        reason="the code for indexing without dask handles negative steps in slices incorrectly",
+    )
+    def test_vectorized_indexing_negative_step_slice(self):
+        in_memory = create_test_data()
 
         # with negative step slice.
         indexers = [
@@ -683,7 +686,7 @@ def multiple_indexing(indexers):
                 "dim3": slice(-1, 1, -1),
             }
         ]
-        multiple_indexing(indexers)
+        self.check_multiple_indexing(indexers, in_memory)
 
         # with negative step slice.
         indexers = [
@@ -692,7 +695,7 @@ def multiple_indexing(indexers):
                 "dim3": slice(-1, 1, -2),
             }
         ]
-        multiple_indexing(indexers)
+        self.check_multiple_indexing(indexers, in_memory)
 
     def test_isel_dataarray(self):
         # Make sure isel works lazily. GH:issue:1688
@@ -2168,6 +2171,10 @@ def test_open_zarr_use_cftime(self):
             ds_b = xr.open_zarr(store_target, consolidated=True, use_cftime=True)
             assert xr.coding.times.contains_cftime_datetimes(ds_b.time)
 
+    @requires_dask
+    def test_vectorized_indexing_negative_step_slice(self):
+        super().test_vectorized_indexing_negative_step_slice
+
 
 @requires_zarr
 class TestZarrDictStore(ZarrBase):

From 6457b7477f11192cc497ec6805cdc21876095a06 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Mon, 30 Nov 2020 11:43:01 +0100
Subject: [PATCH 18/21] removed not used xfails

---
 xarray/tests/test_backends.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 1d10e0720d8..7fba8489cf7 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -672,10 +672,6 @@ def test_vectorized_indexing(self):
         ]
         self.check_multiple_indexing(indexers, in_memory)
 
-    @pytest.mark.xfail(
-        not has_dask,
-        reason="the code for indexing without dask handles negative steps in slices incorrectly",
-    )
     def test_vectorized_indexing_negative_step_slice(self):
         in_memory = create_test_data()
 

From ce6f08fe54ee4f93a2b61ed51ad6ae0ee67414b0 Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Mon, 30 Nov 2020 15:44:47 +0100
Subject: [PATCH 19/21] reverted not necessary change

---
 xarray/tests/test_backends.py | 48 ++++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 7fba8489cf7..d9802458f21 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -298,18 +298,6 @@ def open(self, path, **kwargs):
         with open_dataset(path, engine=self.engine, **kwargs) as ds:
             yield ds
 
-    def check_multiple_indexing(self, indexers, in_memory):
-        # make sure a sequence of lazy indexings certainly works.
-        with self.roundtrip(in_memory) as on_disk:
-            actual = on_disk["var3"]
-            expected = in_memory["var3"]
-            for ind in indexers:
-                actual = actual.isel(**ind)
-                expected = expected.isel(**ind)
-                # make sure the array is not yet loaded into memory
-                assert not actual.variable._in_memory
-            assert_identical(expected, actual.load())
-
     def test_zero_dimensional_variable(self):
         expected = create_test_data()
         expected["float_var"] = ([], 1.0e9, {"units": "units of awesome"})
@@ -637,6 +625,18 @@ def test_vectorized_indexing(self):
             actual = on_disk.isel(**indexers)
             assert_identical(expected, actual)
 
+        def multiple_indexing(indexers):
+            # make sure a sequence of lazy indexings certainly works.
+            with self.roundtrip(in_memory) as on_disk:
+                actual = on_disk["var3"]
+                expected = in_memory["var3"]
+                for ind in indexers:
+                    actual = actual.isel(**ind)
+                    expected = expected.isel(**ind)
+                    # make sure the array is not yet loaded into memory
+                    assert not actual.variable._in_memory
+                assert_identical(expected, actual.load())
+
         # two-staged vectorized-indexing
         indexers = [
             {
@@ -645,7 +645,7 @@ def test_vectorized_indexing(self):
             },
             {"a": DataArray([0, 1], dims=["c"]), "b": DataArray([0, 1], dims=["c"])},
         ]
-        self.check_multiple_indexing(indexers, in_memory)
+        multiple_indexing(indexers)
 
         # vectorized-slice mixed
         indexers = [
@@ -654,7 +654,7 @@ def test_vectorized_indexing(self):
                 "dim3": slice(None, 10),
             }
         ]
-        self.check_multiple_indexing(indexers, in_memory)
+        multiple_indexing(indexers)
 
         # vectorized-integer mixed
         indexers = [
@@ -662,7 +662,7 @@ def test_vectorized_indexing(self):
             {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])},
             {"a": slice(None, None, 2)},
         ]
-        self.check_multiple_indexing(indexers, in_memory)
+        multiple_indexing(indexers)
 
         # vectorized-integer mixed
         indexers = [
@@ -670,11 +670,23 @@ def test_vectorized_indexing(self):
             {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])},
             {"a": 1, "b": 0},
         ]
-        self.check_multiple_indexing(indexers, in_memory)
+        multiple_indexing(indexers)
 
     def test_vectorized_indexing_negative_step_slice(self):
         in_memory = create_test_data()
 
+        def multiple_indexing(indexers):
+            # make sure a sequence of lazy indexings certainly works.
+            with self.roundtrip(in_memory) as on_disk:
+                actual = on_disk["var3"]
+                expected = in_memory["var3"]
+                for ind in indexers:
+                    actual = actual.isel(**ind)
+                    expected = expected.isel(**ind)
+                    # make sure the array is not yet loaded into memory
+                    assert not actual.variable._in_memory
+                assert_identical(expected, actual.load())
+
         # with negative step slice.
         indexers = [
             {
@@ -682,7 +694,7 @@ def test_vectorized_indexing_negative_step_slice(self):
                 "dim3": slice(-1, 1, -1),
             }
         ]
-        self.check_multiple_indexing(indexers, in_memory)
+        multiple_indexing(indexers)
 
         # with negative step slice.
         indexers = [
@@ -691,7 +703,7 @@ def test_vectorized_indexing_negative_step_slice(self):
                 "dim3": slice(-1, 1, -2),
             }
         ]
-        self.check_multiple_indexing(indexers, in_memory)
+        multiple_indexing(indexers)
 
     def test_isel_dataarray(self):
         # Make sure isel works lazily. GH:issue:1688

From dda2010f3244ae53e94841bf9f4cfbbec8f35e65 Mon Sep 17 00:00:00 2001
From: Alessandro Amici <a.amici@bopen.eu>
Date: Mon, 30 Nov 2020 19:21:04 +0100
Subject: [PATCH 20/21] Fix method override and enable dask for zarr

---
 xarray/tests/test_backends.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index d9802458f21..8d8ead147d9 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -672,12 +672,12 @@ def multiple_indexing(indexers):
         ]
         multiple_indexing(indexers)
 
-    def test_vectorized_indexing_negative_step_slice(self):
+    def test_vectorized_indexing_negative_step_slice(self, open_kwargs=None):
         in_memory = create_test_data()
 
         def multiple_indexing(indexers):
             # make sure a sequence of lazy indexings certainly works.
-            with self.roundtrip(in_memory) as on_disk:
+            with self.roundtrip(in_memory, open_kwargs=open_kwargs) as on_disk:
                 actual = on_disk["var3"]
                 expected = in_memory["var3"]
                 for ind in indexers:
@@ -2181,7 +2181,7 @@ def test_open_zarr_use_cftime(self):
 
     @requires_dask
     def test_vectorized_indexing_negative_step_slice(self):
-        super().test_vectorized_indexing_negative_step_slice
+        super().test_vectorized_indexing_negative_step_slice(open_kwargs={"chunks": {}})
 
 
 @requires_zarr

From e21820cabef71804c9335d0b54412051b627ce4e Mon Sep 17 00:00:00 2001
From: Aureliana Barghini <a.barghini@bopen.eu>
Date: Tue, 1 Dec 2020 10:33:08 +0100
Subject: [PATCH 21/21] revert to chunks=None ds.chunk default. revert style
 fix

---
 xarray/core/dataarray.py |  3 ++-
 xarray/core/dataset.py   | 24 ++++++++++--------------
 xarray/core/variable.py  | 15 +++++----------
 3 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 4fcef4beb7f..b95f681bc79 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -1010,11 +1010,12 @@ def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
     def chunk(
         self,
         chunks: Union[
+            None,
             Number,
             Tuple[Number, ...],
             Tuple[Tuple[Number, ...], ...],
             Mapping[Hashable, Union[None, Number, Tuple[Number, ...]]],
-        ] = {},
+        ] = None,
         name_prefix: str = "xarray-",
         token: str = None,
         lock: bool = False,
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 39ee5325247..269f58aca92 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -415,7 +415,7 @@ def _get_chunk(var, chunks):
 def _maybe_chunk(
     name,
     var,
-    chunks,
+    chunks=None,
     token=None,
     lock=None,
     name_prefix="xarray-",
@@ -1872,10 +1872,11 @@ def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
     def chunk(
         self,
         chunks: Union[
+            None,
             Number,
             str,
             Mapping[Hashable, Union[None, Number, str, Tuple[Number, ...]]],
-        ] = {},
+        ] = None,
         name_prefix: str = "xarray-",
         token: str = None,
         lock: bool = False,
@@ -1907,22 +1908,17 @@ def chunk(
         -------
         chunked : xarray.Dataset
         """
-        if chunks is None:
-            warnings.warn(
-                "None value for 'chunks' is deprecated. "
-                "It will raise an error in the future. Use instead '{}'",
-                category=FutureWarning,
-            )
-            chunks = {}
 
         if isinstance(chunks, (Number, str)):
             chunks = dict.fromkeys(self.dims, chunks)
 
-        bad_dims = chunks.keys() - self.dims.keys()
-        if bad_dims:
-            raise ValueError(
-                "some chunks keys are not dimensions on this " "object: %s" % bad_dims
-            )
+        if chunks is not None:
+            bad_dims = chunks.keys() - self.dims.keys()
+            if bad_dims:
+                raise ValueError(
+                    "some chunks keys are not dimensions on this "
+                    "object: %s" % bad_dims
+                )
 
         variables = {
             k: _maybe_chunk(k, v, chunks, token, lock, name_prefix)
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index a604a81cfa1..a3876cb0077 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -986,7 +986,7 @@ def chunks(self):
 
     _array_counter = itertools.count()
 
-    def chunk(self, chunks={}, name=None, lock=False):
+    def chunk(self, chunks=None, name=None, lock=False):
         """Coerce this array's data into a dask arrays with the given chunks.
 
         If this variable is a non-dask array, it will be converted to dask
@@ -1016,17 +1016,12 @@ def chunk(self, chunks={}, name=None, lock=False):
         import dask
         import dask.array as da
 
-        if chunks is None:
-            warnings.warn(
-                "None value for 'chunks' is deprecated. "
-                "It will raise an error in the future. Use instead '{}'",
-                category=FutureWarning,
-            )
-            chunks = {}
-
         if utils.is_dict_like(chunks):
             chunks = {self.get_axis_num(dim): chunk for dim, chunk in chunks.items()}
 
+        if chunks is None:
+            chunks = self.chunks or self.shape
+
         data = self._data
         if is_duck_dask_array(data):
             data = data.rechunk(chunks)
@@ -2373,7 +2368,7 @@ def values(self, values):
             f"Please use DataArray.assign_coords, Dataset.assign_coords or Dataset.assign as appropriate."
         )
 
-    def chunk(self, chunks={}, name=None, lock=False):
+    def chunk(self, chunks=None, name=None, lock=False):
         # Dummy - do not chunk. This method is invoked e.g. by Dataset.chunk()
         return self.copy(deep=False)