From c703ebc74f73e3fdc33ea302425bbb689f6204f6 Mon Sep 17 00:00:00 2001
From: Benoit Bovy <benbovy@gmail.com>
Date: Mon, 7 Aug 2023 12:03:39 +0200
Subject: [PATCH 01/11] add set_indexes parameter to open_dataset

---
 xarray/backends/api.py    | 16 ++++++++++++++++
 xarray/backends/common.py |  1 +
 2 files changed, 17 insertions(+)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index e35d85a1e2f..254ed54a3a0 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -403,6 +403,7 @@ def open_dataset(
     concat_characters: bool | None = None,
     decode_coords: Literal["coordinates", "all"] | bool | None = None,
     drop_variables: str | Iterable[str] | None = None,
+    set_indexes: bool = True,
     inline_array: bool = False,
     chunked_array_type: str | None = None,
     from_array_kwargs: dict[str, Any] | None = None,
@@ -492,6 +493,12 @@ def open_dataset(
         A variable or list of variables to exclude from being parsed from the
         dataset. This may be useful to drop variables with problems or
         inconsistent values.
+    set_indexes : bool, optional
+        If True (default), create new indexes from coordinates. Both the number and
+        the type(s) of those indexes depend on the backend used to open the dataset.
+        For most common backends this creates a pandas index for each
+        :term:`Dimension coordinate`, which loads the coordinate data fully in memory.
+        Set it to False if you want to avoid loading data into memory.
     inline_array: bool, default: False
         How to include the array in the dask task graph.
         By default(``inline_array=False``) the array is included in a task by
@@ -570,6 +577,7 @@ def open_dataset(
     backend_ds = backend.open_dataset(
         filename_or_obj,
         drop_variables=drop_variables,
+        set_indexes=set_indexes,
         **decoders,
         **kwargs,
     )
@@ -604,6 +612,7 @@ def open_dataarray(
     concat_characters: bool | None = None,
     decode_coords: Literal["coordinates", "all"] | bool | None = None,
     drop_variables: str | Iterable[str] | None = None,
+    set_indexes: bool = True,
     inline_array: bool = False,
     chunked_array_type: str | None = None,
     from_array_kwargs: dict[str, Any] | None = None,
@@ -695,6 +704,12 @@ def open_dataarray(
         A variable or list of variables to exclude from being parsed from the
         dataset. This may be useful to drop variables with problems or
         inconsistent values.
+    set_indexes : bool, optional
+        If True (default), create new indexes from coordinates. Both the number and
+        the type(s) of those indexes depend on the backend used to open the dataset.
+        For most common backends this creates a pandas index for each
+        :term:`Dimension coordinate`, which loads the coordinate data fully in memory.
+        Set it to False if you want to avoid loading data into memory.
     inline_array: bool, default: False
         How to include the array in the dask task graph.
         By default(``inline_array=False``) the array is included in a task by
@@ -752,6 +767,7 @@ def open_dataarray(
         chunks=chunks,
         cache=cache,
         drop_variables=drop_variables,
+        set_indexes=set_indexes,
         inline_array=inline_array,
         chunked_array_type=chunked_array_type,
         from_array_kwargs=from_array_kwargs,
diff --git a/xarray/backends/common.py b/xarray/backends/common.py
index 1ac988c6b4f..f12057bd4af 100644
--- a/xarray/backends/common.py
+++ b/xarray/backends/common.py
@@ -490,6 +490,7 @@ def open_dataset(
         filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
         *,
         drop_variables: str | Iterable[str] | None = None,
+        set_indexes: bool = True,
         **kwargs: Any,
     ) -> Dataset:
         """

From 6f54cd52e8030cf79eac0c4b1b647c08de901131 Mon Sep 17 00:00:00 2001
From: Benoit Bovy <benbovy@gmail.com>
Date: Mon, 7 Aug 2023 12:04:50 +0200
Subject: [PATCH 02/11] implement set_indexes in (zarr) backend store

---
 xarray/backends/store.py | 20 ++++++++++++++++++--
 xarray/backends/zarr.py  |  8 ++++++++
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/xarray/backends/store.py b/xarray/backends/store.py
index a507ee37470..e15e6b08c0f 100644
--- a/xarray/backends/store.py
+++ b/xarray/backends/store.py
@@ -9,6 +9,7 @@
     AbstractDataStore,
     BackendEntrypoint,
 )
+from xarray.core.coordinates import Coordinates
 from xarray.core.dataset import Dataset
 
 if TYPE_CHECKING:
@@ -35,6 +36,7 @@ def open_dataset(  # type: ignore[override]  # allow LSP violation, not supporti
         concat_characters=True,
         decode_coords=True,
         drop_variables: str | Iterable[str] | None = None,
+        set_indexes: bool = True,
         use_cftime=None,
         decode_timedelta=None,
     ) -> Dataset:
@@ -55,8 +57,22 @@ def open_dataset(  # type: ignore[override]  # allow LSP violation, not supporti
             decode_timedelta=decode_timedelta,
         )
 
-        ds = Dataset(vars, attrs=attrs)
-        ds = ds.set_coords(coord_names.intersection(vars))
+        # split data and coordinate variables (promote dimension coordinates)
+        data_vars = {}
+        coord_vars = {}
+        for name, var in vars.items():
+            if name in coord_names or var.dims == (name,):
+                coord_vars[name] = var
+            else:
+                data_vars[name] = var
+
+        if set_indexes:
+            coords = coord_vars
+        else:
+            # explict Coordinates object with no index passed
+            coords = Coordinates(coord_vars)
+
+        ds = Dataset(data_vars, coords=coords, attrs=attrs)
         ds.set_close(filename_or_obj.close)
         ds.encoding = encoding
 
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index f88523422bb..2de008b8c72 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -757,6 +757,7 @@ def open_zarr(
     zarr_version=None,
     chunked_array_type: str | None = None,
     from_array_kwargs: dict[str, Any] | None = None,
+    set_indexes=True,
     **kwargs,
 ):
     """Load and decode a dataset from a Zarr store.
@@ -850,6 +851,10 @@ def open_zarr(
         chunked arrays, via whichever chunk manager is specified through the `chunked_array_type` kwarg.
         Defaults to {'manager': 'dask'}, meaning additional kwargs will be passed eventually to
         :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
+    set_indexes : bool, optional
+        If True (default), create a default (pandas) index for each
+        :term:`Dimension coordinate`. Set it to False if the dataset contains
+        dimension coordinate arrays that are too large to load fully in memory.
 
     Returns
     -------
@@ -906,6 +911,7 @@ def open_zarr(
         engine="zarr",
         chunks=chunks,
         drop_variables=drop_variables,
+        set_indexes=set_indexes,
         chunked_array_type=chunked_array_type,
         from_array_kwargs=from_array_kwargs,
         backend_kwargs=backend_kwargs,
@@ -950,6 +956,7 @@ def open_dataset(  # type: ignore[override]  # allow LSP violation, not supporti
         concat_characters=True,
         decode_coords=True,
         drop_variables: str | Iterable[str] | None = None,
+        set_indexes: bool = True,
         use_cftime=None,
         decode_timedelta=None,
         group=None,
@@ -986,6 +993,7 @@ def open_dataset(  # type: ignore[override]  # allow LSP violation, not supporti
                 drop_variables=drop_variables,
                 use_cftime=use_cftime,
                 decode_timedelta=decode_timedelta,
+                set_indexes=set_indexes,
             )
         return ds
 

From 145ae1c44c4d2bdbcf74c9107172e4119a969d30 Mon Sep 17 00:00:00 2001
From: Justus Magin <keewis@posteo.de>
Date: Mon, 30 Jun 2025 23:03:21 +0200
Subject: [PATCH 03/11] replace `set_indexes` with `create_default_indexes`

---
 xarray/backends/api.py    | 51 +++++++++++++++++++++++++--------------
 xarray/backends/common.py |  2 --
 xarray/backends/store.py  |  7 ++----
 xarray/backends/zarr.py   | 17 +++++++------
 4 files changed, 44 insertions(+), 33 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index e4fcfe5eeb6..1b5d8e1304b 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -36,6 +36,7 @@
 from xarray.backends.locks import _get_scheduler
 from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder
 from xarray.core import indexing
+from xarray.core.coordinates import Coordinates
 from xarray.core.dataarray import DataArray
 from xarray.core.dataset import Dataset
 from xarray.core.datatree import DataTree
@@ -389,6 +390,7 @@ def _dataset_from_backend_dataset(
     inline_array,
     chunked_array_type,
     from_array_kwargs,
+    create_default_indexes,
     **extra_tokens,
 ):
     if not isinstance(chunks, int | dict) and chunks not in {None, "auto"}:
@@ -397,11 +399,22 @@ def _dataset_from_backend_dataset(
         )
 
     _protect_dataset_variables_inplace(backend_ds, cache)
+
+    if create_default_indexes:
+        to_index = {
+            name: coord.variable
+            for name, coord in backend_ds.coords.items()
+            if coord.dims == (name,) and name not in backend_ds.xindexes
+        }
+        indexed = backend_ds.assign_coords(Coordinates(to_index))
+    else:
+        indexed = backend_ds
+
     if chunks is None:
-        ds = backend_ds
+        ds = indexed
     else:
         ds = _chunk_ds(
-            backend_ds,
+            indexed,
             filename_or_obj,
             engine,
             chunks,
@@ -497,7 +510,7 @@ def open_dataset(
     concat_characters: bool | Mapping[str, bool] | None = None,
     decode_coords: Literal["coordinates", "all"] | bool | None = None,
     drop_variables: str | Iterable[str] | None = None,
-    set_indexes: bool = True,
+    create_default_indexes: bool = True,
     inline_array: bool = False,
     chunked_array_type: str | None = None,
     from_array_kwargs: dict[str, Any] | None = None,
@@ -611,12 +624,13 @@ def open_dataset(
         A variable or list of variables to exclude from being parsed from the
         dataset. This may be useful to drop variables with problems or
         inconsistent values.
-    set_indexes : bool, optional
-        If True (default), create new indexes from coordinates. Both the number and
-        the type(s) of those indexes depend on the backend used to open the dataset.
-        For most common backends this creates a pandas index for each
-        :term:`Dimension coordinate`, which loads the coordinate data fully in memory.
-        Set it to False if you want to avoid loading data into memory.
+    create_default_indexes : bool, default: True
+        If True, create pandas indexes for :term:`dimension coordinates <dimension coordinate>`,
+        which loads the coordinate data into memory. Set it to False if you want to avoid loading
+        data into memory.
+
+        Note that backends can still choose to create other indexes. If you want to control that,
+        please refer to the backend's documentation.
     inline_array: bool, default: False
         How to include the array in the dask task graph.
         By default(``inline_array=False``) the array is included in a task by
@@ -695,7 +709,6 @@ def open_dataset(
     backend_ds = backend.open_dataset(
         filename_or_obj,
         drop_variables=drop_variables,
-        set_indexes=set_indexes,
         **decoders,
         **kwargs,
     )
@@ -710,6 +723,7 @@ def open_dataset(
         chunked_array_type,
         from_array_kwargs,
         drop_variables=drop_variables,
+        create_default_indexes=create_default_indexes,
         **decoders,
         **kwargs,
     )
@@ -733,7 +747,7 @@ def open_dataarray(
     concat_characters: bool | None = None,
     decode_coords: Literal["coordinates", "all"] | bool | None = None,
     drop_variables: str | Iterable[str] | None = None,
-    set_indexes: bool = True,
+    create_default_indexes: bool = True,
     inline_array: bool = False,
     chunked_array_type: str | None = None,
     from_array_kwargs: dict[str, Any] | None = None,
@@ -842,12 +856,13 @@ def open_dataarray(
         A variable or list of variables to exclude from being parsed from the
         dataset. This may be useful to drop variables with problems or
         inconsistent values.
-    set_indexes : bool, optional
-        If True (default), create new indexes from coordinates. Both the number and
-        the type(s) of those indexes depend on the backend used to open the dataset.
-        For most common backends this creates a pandas index for each
-        :term:`Dimension coordinate`, which loads the coordinate data fully in memory.
-        Set it to False if you want to avoid loading data into memory.
+    create_default_indexes : bool, default: True
+        If True, create pandas indexes for :term:`dimension coordinates <dimension coordinate>`,
+        which loads the coordinate data into memory. Set it to False if you want to avoid loading
+        data into memory.
+
+        Note that backends can still choose to create other indexes. If you want to control that,
+        please refer to the backend's documentation.
     inline_array: bool, default: False
         How to include the array in the dask task graph.
         By default(``inline_array=False``) the array is included in a task by
@@ -905,7 +920,7 @@ def open_dataarray(
         chunks=chunks,
         cache=cache,
         drop_variables=drop_variables,
-        set_indexes=set_indexes,
+        create_default_indexes=create_default_indexes,
         inline_array=inline_array,
         chunked_array_type=chunked_array_type,
         from_array_kwargs=from_array_kwargs,
diff --git a/xarray/backends/common.py b/xarray/backends/common.py
index 9ef0da81659..e1f8dc5cecd 100644
--- a/xarray/backends/common.py
+++ b/xarray/backends/common.py
@@ -697,8 +697,6 @@ def open_dataset(
         filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
         *,
         drop_variables: str | Iterable[str] | None = None,
-        set_indexes: bool = True,
-        **kwargs: Any,
     ) -> Dataset:
         """
         Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`.
diff --git a/xarray/backends/store.py b/xarray/backends/store.py
index 0b34ea7ed8c..7edfbd1c4e0 100644
--- a/xarray/backends/store.py
+++ b/xarray/backends/store.py
@@ -67,11 +67,8 @@ def open_dataset(
             else:
                 data_vars[name] = var
 
-        if set_indexes:
-            coords = coord_vars
-        else:
-            # explicit Coordinates object with no index passed
-            coords = Coordinates(coord_vars)
+        # explicit Coordinates object with no index passed
+        coords = Coordinates(coord_vars)
 
         ds = Dataset(data_vars, coords=coords, attrs=attrs)
         ds.set_close(filename_or_obj.close)
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index f8ff1fa6b7f..dc78194dcd2 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -1347,7 +1347,7 @@ def open_zarr(
     use_zarr_fill_value_as_mask=None,
     chunked_array_type: str | None = None,
     from_array_kwargs: dict[str, Any] | None = None,
-    set_indexes=True,
+    create_default_indexes=True,
     **kwargs,
 ):
     """Load and decode a dataset from a Zarr store.
@@ -1458,10 +1458,13 @@ def open_zarr(
         chunked arrays, via whichever chunk manager is specified through the ``chunked_array_type`` kwarg.
         Defaults to ``{'manager': 'dask'}``, meaning additional kwargs will be passed eventually to
         :py:func:`dask.array.from_array`. Experimental API that should not be relied upon.
-    set_indexes : bool, optional
-        If True (default), create a default (pandas) index for each
-        :term:`Dimension coordinate`. Set it to False if the dataset contains
-        dimension coordinate arrays that are too large to load fully in memory.
+    create_default_indexes : bool, default: True
+        If True, create pandas indexes for :term:`dimension coordinates <dimension coordinate>`,
+        which loads the coordinate data into memory. Set it to False if you want to avoid loading
+        data into memory.
+
+        Note that backends can still choose to create other indexes. If you want to control that,
+        please refer to the backend's documentation.
 
     Returns
     -------
@@ -1518,7 +1521,7 @@ def open_zarr(
         engine="zarr",
         chunks=chunks,
         drop_variables=drop_variables,
-        set_indexes=set_indexes,
+        create_default_indexes=create_default_indexes,
         chunked_array_type=chunked_array_type,
         from_array_kwargs=from_array_kwargs,
         backend_kwargs=backend_kwargs,
@@ -1564,7 +1567,6 @@ def open_dataset(
         concat_characters=True,
         decode_coords=True,
         drop_variables: str | Iterable[str] | None = None,
-        set_indexes: bool = True,
         use_cftime=None,
         decode_timedelta=None,
         group=None,
@@ -1608,7 +1610,6 @@ def open_dataset(
                 drop_variables=drop_variables,
                 use_cftime=use_cftime,
                 decode_timedelta=decode_timedelta,
-                set_indexes=set_indexes,
             )
         return ds
 

From 192c367f47c14a1d4e1837e61702eedda07c4a8a Mon Sep 17 00:00:00 2001
From: Justus Magin <keewis@posteo.de>
Date: Mon, 30 Jun 2025 23:20:55 +0200
Subject: [PATCH 04/11] make sure indexes set by the backend survive

---
 xarray/tests/test_backends_api.py | 36 +++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/xarray/tests/test_backends_api.py b/xarray/tests/test_backends_api.py
index 9342423b727..885b8cb8a46 100644
--- a/xarray/tests/test_backends_api.py
+++ b/xarray/tests/test_backends_api.py
@@ -201,3 +201,39 @@ def test_join_chunks(self, shape, pref_chunks, req_chunks):
                 chunks=dict(zip(initial[self.var_name].dims, req_chunks, strict=True)),
             )
         self.check_dataset(initial, final, explicit_chunks(req_chunks, shape))
+
+    @pytest.mark.parametrize("create_default_indexes", [True, False])
+    def test_default_indexes(self, create_default_indexes):
+        """Create default indexes if the backend does not create them."""
+        coords = xr.Coordinates({"x": ("x", [0, 1]), "y": list("abc")}, indexes={})
+        initial = xr.Dataset({"a": ("x", [1, 2])}, coords=coords)
+
+        with assert_no_warnings():
+            final = xr.open_dataset(
+                initial,
+                engine=PassThroughBackendEntrypoint,
+                create_default_indexes=create_default_indexes,
+            )
+
+        if create_default_indexes:
+            assert all(name in final.xindexes for name in ["x", "y"])
+        else:
+            assert not final.xindexes
+
+    @pytest.mark.parametrize("create_default_indexes", [True, False])
+    def test_default_indexes_passthrough(self, create_default_indexes):
+        """Allow creating indexes in the backend."""
+
+        initial = xr.Dataset(
+            {"a": (["x", "y"], [[1, 2, 3], [4, 5, 6]])},
+            coords={"x": ("x", [0, 1]), "y": ("y", list("abc"))},
+        ).stack(z=["x", "y"])
+
+        with assert_no_warnings():
+            final = xr.open_dataset(
+                initial,
+                engine=PassThroughBackendEntrypoint,
+                create_default_indexes=create_default_indexes,
+            )
+
+        assert initial.coords.equals(final.coords)

From f5823a73107643ee1206023cbdb2086f1c375437 Mon Sep 17 00:00:00 2001
From: Justus Magin <keewis@posteo.de>
Date: Mon, 30 Jun 2025 23:27:01 +0200
Subject: [PATCH 05/11] also add the parameter to `open_datatree`

---
 xarray/backends/api.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 1b5d8e1304b..e3582370cb4 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -1206,6 +1206,7 @@ def open_groups(
     concat_characters: bool | Mapping[str, bool] | None = None,
     decode_coords: Literal["coordinates", "all"] | bool | None = None,
     drop_variables: str | Iterable[str] | None = None,
+    create_default_indexes: bool = True,
     inline_array: bool = False,
     chunked_array_type: str | None = None,
     from_array_kwargs: dict[str, Any] | None = None,
@@ -1317,6 +1318,13 @@ def open_groups(
         A variable or list of variables to exclude from being parsed from the
         dataset. This may be useful to drop variables with problems or
         inconsistent values.
+    create_default_indexes : bool, default: True
+        If True, create pandas indexes for :term:`dimension coordinates <dimension coordinate>`,
+        which loads the coordinate data into memory. Set it to False if you want to avoid loading
+        data into memory.
+
+        Note that backends can still choose to create other indexes. If you want to control that,
+        please refer to the backend's documentation.
     inline_array: bool, default: False
         How to include the array in the dask task graph.
         By default(``inline_array=False``) the array is included in a task by
@@ -1412,6 +1420,7 @@ def open_groups(
             chunked_array_type,
             from_array_kwargs,
             drop_variables=drop_variables,
+            create_default_indexes=create_default_indexes,
             **decoders,
             **kwargs,
         )

From 2ff8402192a3afd443e155e5a3c10d592475dce7 Mon Sep 17 00:00:00 2001
From: Justus Magin <keewis@posteo.de>
Date: Mon, 30 Jun 2025 23:37:23 +0200
Subject: [PATCH 06/11] share the implementation of the default indexes
 creation

---
 xarray/backends/api.py | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index e3582370cb4..0ac1a4d9503 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -380,6 +380,18 @@ def _chunk_ds(
     return backend_ds._replace(variables)
 
 
+def _create_default_indexes(ds, create_default_indexes):
+    if not create_default_indexes:
+        return ds
+
+    to_index = {
+        name: coord.variable
+        for name, coord in ds.coords.items()
+        if coord.dims == (name,) and name not in ds.xindexes
+    }
+    return ds.assign_coords(Coordinates(to_index))
+
+
 def _dataset_from_backend_dataset(
     backend_ds,
     filename_or_obj,
@@ -400,15 +412,7 @@ def _dataset_from_backend_dataset(
 
     _protect_dataset_variables_inplace(backend_ds, cache)
 
-    if create_default_indexes:
-        to_index = {
-            name: coord.variable
-            for name, coord in backend_ds.coords.items()
-            if coord.dims == (name,) and name not in backend_ds.xindexes
-        }
-        indexed = backend_ds.assign_coords(Coordinates(to_index))
-    else:
-        indexed = backend_ds
+    indexed = _create_default_indexes(backend_ds, create_default_indexes)
 
     if chunks is None:
         ds = indexed
@@ -447,6 +451,7 @@ def _datatree_from_backend_datatree(
     inline_array,
     chunked_array_type,
     from_array_kwargs,
+    create_default_indexes,
     **extra_tokens,
 ):
     if not isinstance(chunks, int | dict) and chunks not in {None, "auto"}:
@@ -461,7 +466,7 @@ def _datatree_from_backend_datatree(
         tree = DataTree.from_dict(
             {
                 path: _chunk_ds(
-                    node.dataset,
+                    node.dataset.pipe(_create_default_indexes, create_default_indexes),
                     filename_or_obj,
                     engine,
                     chunks,
@@ -977,6 +982,7 @@ def open_datatree(
     concat_characters: bool | Mapping[str, bool] | None = None,
     decode_coords: Literal["coordinates", "all"] | bool | None = None,
     drop_variables: str | Iterable[str] | None = None,
+    create_default_indexes: bool = True,
     inline_array: bool = False,
     chunked_array_type: str | None = None,
     from_array_kwargs: dict[str, Any] | None = None,
@@ -1086,6 +1092,13 @@ def open_datatree(
         A variable or list of variables to exclude from being parsed from the
         dataset. This may be useful to drop variables with problems or
         inconsistent values.
+    create_default_indexes : bool, default: True
+        If True, create pandas indexes for :term:`dimension coordinates <dimension coordinate>`,
+        which loads the coordinate data into memory. Set it to False if you want to avoid loading
+        data into memory.
+
+        Note that backends can still choose to create other indexes. If you want to control that,
+        please refer to the backend's documentation.
     inline_array: bool, default: False
         How to include the array in the dask task graph.
         By default(``inline_array=False``) the array is included in a task by
@@ -1179,6 +1192,7 @@ def open_datatree(
         chunked_array_type,
         from_array_kwargs,
         drop_variables=drop_variables,
+        create_default_indexes=create_default_indexes,
         **decoders,
         **kwargs,
     )

From 294b2f712425ca9424a4d6ad62912d653087beb5 Mon Sep 17 00:00:00 2001
From: Justus Magin <keewis@posteo.de>
Date: Wed, 2 Jul 2025 00:00:11 +0200
Subject: [PATCH 07/11] check that the store backend entrypoint does not create
 default indexes

---
 xarray/tests/test_backends.py     | 61 +++++++++++++++++++++++++++++++
 xarray/tests/test_backends_api.py |  2 +-
 2 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 733188dde1e..92617b5d7b7 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -55,6 +55,7 @@
 from xarray.coding.variables import SerializationWarning
 from xarray.conventions import encode_dataset_coordinates
 from xarray.core import indexing
+from xarray.core.indexes import PandasIndex
 from xarray.core.options import set_options
 from xarray.core.utils import module_available
 from xarray.namedarray.pycompat import array_type
@@ -2050,6 +2051,26 @@ def test_encoding_enum__error_multiple_variable_with_changing_enum(self):
                         with self.roundtrip(original):
                             pass
 
+    @pytest.mark.parametrize("create_default_indexes", [True, False])
+    def test_create_default_indexes(self, tmp_path, create_default_indexes) -> None:
+        store_path = tmp_path / "tmp.nc"
+        original_ds = xr.Dataset(
+            {"data": ("x", np.arange(3))}, coords={"x": [-1, 0, 1]}
+        )
+        original_ds.to_netcdf(store_path, engine=self.engine, mode="w")
+
+        with open_dataset(
+            store_path,
+            engine=self.engine,
+            create_default_indexes=create_default_indexes,
+        ) as loaded_ds:
+            if create_default_indexes:
+                assert list(loaded_ds.xindexes) == ["x"] and isinstance(
+                    loaded_ds.xindexes["x"], PandasIndex
+                )
+            else:
+                assert len(loaded_ds.xindexes) == 0
+
 
 @requires_netCDF4
 class TestNetCDF4Data(NetCDF4Base):
@@ -4009,6 +4030,26 @@ def test_pickle(self) -> None:
     def test_pickle_dataarray(self) -> None:
         pass
 
+    @pytest.mark.parametrize("create_default_indexes", [True, False])
+    def test_create_default_indexes(self, tmp_path, create_default_indexes) -> None:
+        store_path = tmp_path / "tmp.nc"
+        original_ds = xr.Dataset(
+            {"data": ("x", np.arange(3))}, coords={"x": [-1, 0, 1]}
+        )
+        original_ds.to_netcdf(store_path, engine=self.engine, mode="w")
+
+        with open_dataset(
+            store_path,
+            engine=self.engine,
+            create_default_indexes=create_default_indexes,
+        ) as loaded_ds:
+            if create_default_indexes:
+                assert list(loaded_ds.xindexes) == ["x"] and isinstance(
+                    loaded_ds.xindexes["x"], PandasIndex
+                )
+            else:
+                assert len(loaded_ds.xindexes) == 0
+
 
 @requires_scipy
 class TestScipyFilePath(CFEncodedBase, NetCDF3Only):
@@ -6378,6 +6419,26 @@ def test_zarr_closing_internal_zip_store():
         assert_identical(original_da, loaded_da)
 
 
+@requires_zarr
+@pytest.mark.parametrize("create_default_indexes", [True, False])
+def test_zarr_create_default_indexes(tmp_path, create_default_indexes) -> None:
+    from xarray.core.indexes import PandasIndex
+
+    store_path = tmp_path / "tmp.zarr"
+    original_ds = xr.Dataset({"data": ("x", np.arange(3))}, coords={"x": [-1, 0, 1]})
+    original_ds.to_zarr(store_path, mode="w")
+
+    with open_dataset(
+        store_path, engine="zarr", create_default_indexes=create_default_indexes
+    ) as loaded_ds:
+        if create_default_indexes:
+            assert list(loaded_ds.xindexes) == ["x"] and isinstance(
+                loaded_ds.xindexes["x"], PandasIndex
+            )
+        else:
+            assert len(loaded_ds.xindexes) == 0
+
+
 @requires_zarr
 @pytest.mark.usefixtures("default_zarr_format")
 def test_raises_key_error_on_invalid_zarr_store(tmp_path):
diff --git a/xarray/tests/test_backends_api.py b/xarray/tests/test_backends_api.py
index 885b8cb8a46..778e800ec67 100644
--- a/xarray/tests/test_backends_api.py
+++ b/xarray/tests/test_backends_api.py
@@ -218,7 +218,7 @@ def test_default_indexes(self, create_default_indexes):
         if create_default_indexes:
             assert all(name in final.xindexes for name in ["x", "y"])
         else:
-            assert not final.xindexes
+            assert len(final.xindexes) == 0
 
     @pytest.mark.parametrize("create_default_indexes", [True, False])
     def test_default_indexes_passthrough(self, create_default_indexes):

From 5c3a8437839ef22d2cdd9c907b00e7bfb1c775bd Mon Sep 17 00:00:00 2001
From: Justus Magin <keewis@posteo.de>
Date: Wed, 2 Jul 2025 00:01:50 +0200
Subject: [PATCH 08/11] actually do not create default indexes in the backends

---
 xarray/backends/store.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/backends/store.py b/xarray/backends/store.py
index 7edfbd1c4e0..de52aa193ed 100644
--- a/xarray/backends/store.py
+++ b/xarray/backends/store.py
@@ -68,7 +68,7 @@ def open_dataset(
                 data_vars[name] = var
 
         # explicit Coordinates object with no index passed
-        coords = Coordinates(coord_vars)
+        coords = Coordinates(coord_vars, indexes={})
 
         ds = Dataset(data_vars, coords=coords, attrs=attrs)
         ds.set_close(filename_or_obj.close)

From 08939dee3857913711098f1ddce52fc93ad43e47 Mon Sep 17 00:00:00 2001
From: Justus Magin <keewis@posteo.de>
Date: Wed, 2 Jul 2025 00:02:56 +0200
Subject: [PATCH 09/11] rename the helper

---
 xarray/backends/api.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 0ac1a4d9503..38f96270c99 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -380,7 +380,7 @@ def _chunk_ds(
     return backend_ds._replace(variables)
 
 
-def _create_default_indexes(ds, create_default_indexes):
+def _maybe_create_default_indexes(ds, create_default_indexes):
     if not create_default_indexes:
         return ds
 
@@ -412,7 +412,7 @@ def _dataset_from_backend_dataset(
 
     _protect_dataset_variables_inplace(backend_ds, cache)
 
-    indexed = _create_default_indexes(backend_ds, create_default_indexes)
+    indexed = _maybe_create_default_indexes(backend_ds, create_default_indexes)
 
     if chunks is None:
         ds = indexed
@@ -466,7 +466,9 @@ def _datatree_from_backend_datatree(
         tree = DataTree.from_dict(
             {
                 path: _chunk_ds(
-                    node.dataset.pipe(_create_default_indexes, create_default_indexes),
+                    node.dataset.pipe(
+                        _maybe_create_default_indexes, create_default_indexes
+                    ),
                     filename_or_obj,
                     engine,
                     chunks,

From 95dbf8e6e787d15f1a649eafa492e9e7e6c6c4e8 Mon Sep 17 00:00:00 2001
From: Justus Magin <keewis@posteo.de>
Date: Thu, 3 Jul 2025 19:08:37 +0200
Subject: [PATCH 10/11] move the handling of `create_default_indexes` up the
 call stack

---
 xarray/backends/api.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
index 38f96270c99..cb4ef3fa813 100644
--- a/xarray/backends/api.py
+++ b/xarray/backends/api.py
@@ -380,10 +380,7 @@ def _chunk_ds(
     return backend_ds._replace(variables)
 
 
-def _maybe_create_default_indexes(ds, create_default_indexes):
-    if not create_default_indexes:
-        return ds
-
+def _maybe_create_default_indexes(ds):
     to_index = {
         name: coord.variable
         for name, coord in ds.coords.items()
@@ -412,13 +409,14 @@ def _dataset_from_backend_dataset(
 
     _protect_dataset_variables_inplace(backend_ds, cache)
 
-    indexed = _maybe_create_default_indexes(backend_ds, create_default_indexes)
+    if create_default_indexes:
+        backend_ds = _maybe_create_default_indexes(backend_ds)
 
     if chunks is None:
-        ds = indexed
+        ds = backend_ds
     else:
         ds = _chunk_ds(
-            indexed,
+            backend_ds,
             filename_or_obj,
             engine,
             chunks,
@@ -466,8 +464,10 @@ def _datatree_from_backend_datatree(
         tree = DataTree.from_dict(
             {
                 path: _chunk_ds(
-                    node.dataset.pipe(
-                        _maybe_create_default_indexes, create_default_indexes
+                    (
+                        _maybe_create_default_indexes(node.dataset)
+                        if create_default_indexes
+                        else node.dataset
                     ),
                     filename_or_obj,
                     engine,

From d7e6daa7f706234133a4bf160583668622cfec1c Mon Sep 17 00:00:00 2001
From: Justus Magin <keewis@posteo.de>
Date: Thu, 3 Jul 2025 19:12:06 +0200
Subject: [PATCH 11/11] what's new

---
 doc/whats-new.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index ad83cfac531..8c0f5cf635c 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -12,7 +12,8 @@ v2025.07.1 (unreleased)
 
 New Features
 ~~~~~~~~~~~~
-
+- Allow skipping the creation of default indexes when opening datasets (:pull:`8051`).
+  By `Benoit Bovy <https://github.com/benbovy>`_ and `Justus Magin <https://github.com/keewis>`_.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~