diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 313428c29d2..5542e488143 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -9,8 +9,6 @@ .. autosummary:: :toctree: generated/ - auto_combine - Dataset.nbytes Dataset.chunks diff --git a/doc/api.rst b/doc/api.rst index bb0edd0dfa5..603e3e8f6cf 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -21,7 +21,6 @@ Top-level functions broadcast concat merge - auto_combine combine_by_coords combine_nested where diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4b5bb1e491f..c8150ac2057 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,6 +33,13 @@ Breaking changes `_. (:pull:`3274`) By `Elliott Sales de Andrade `_ +- The old :py:func:`auto_combine` function has now been removed in + favour of the :py:func:`combine_by_coords` and + :py:func:`combine_nested` functions. This also means that + the default behaviour of :py:func:`open_mfdataset` has changed to use + ``combine='by_coords'`` as the default argument value. (:issue:`2616`, :pull:`3926`) + By `Tom Nicholas `_. + Enhancements ~~~~~~~~~~~~ diff --git a/xarray/__init__.py b/xarray/__init__.py index cb4824d188d..3886edc60e6 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -16,7 +16,7 @@ from .coding.frequencies import infer_freq from .conventions import SerializationWarning, decode_cf from .core.alignment import align, broadcast -from .core.combine import auto_combine, combine_by_coords, combine_nested +from .core.combine import combine_by_coords, combine_nested from .core.common import ALL_DIMS, full_like, ones_like, zeros_like from .core.computation import apply_ufunc, corr, cov, dot, polyval, where from .core.concat import concat @@ -47,7 +47,6 @@ "align", "apply_ufunc", "as_variable", - "auto_combine", "broadcast", "cftime_range", "combine_by_coords", diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 0919d2a582b..71afa846c90 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -4,7 +4,6 @@ from io import BytesIO from numbers import Number from pathlib import Path -from textwrap import dedent from typing import ( TYPE_CHECKING, Callable, @@ -23,7 +22,6 @@ from ..core.combine import ( _infer_concat_order_from_positions, _nested_combine, - auto_combine, combine_by_coords, ) from ..core.dataarray import DataArray @@ -726,14 +724,14 @@ def close(self): def open_mfdataset( paths, chunks=None, - concat_dim="_not_supplied", + concat_dim=None, compat="no_conflicts", preprocess=None, engine=None, lock=None, data_vars="all", coords="different", - combine="_old_auto", + combine="by_coords", autoclose=None, parallel=False, join="outer", @@ -746,9 +744,8 @@ def open_mfdataset( the datasets into one before returning the result, and if combine='nested' then ``combine_nested`` is used. The filepaths must be structured according to which combining function is used, the details of which are given in the documentation for - ``combine_by_coords`` and ``combine_nested``. By default the old (now deprecated) - ``auto_combine`` will be used, please specify either ``combine='by_coords'`` or - ``combine='nested'`` in future. Requires dask to be installed. See documentation for + ``combine_by_coords`` and ``combine_nested``. By default ``combine='by_coords'`` + will be used. Requires dask to be installed. See documentation for details on dask [1]_. Global attributes from the ``attrs_file`` are used for the combined dataset. @@ -758,7 +755,7 @@ def open_mfdataset( Either a string glob in the form ``"path/to/my/files/*.nc"`` or an explicit list of files to open. Paths can be given as strings or as pathlib Paths. If concatenation along more than one dimension is desired, then ``paths`` must be a - nested list-of-lists (see ``manual_combine`` for details). (A string glob will + nested list-of-lists (see ``combine_nested`` for details). (A string glob will be expanded to a 1-dimensional list.) chunks : int or dict, optional Dictionary with keys given by dimension names and values given by chunk sizes. @@ -768,15 +765,16 @@ def open_mfdataset( see the full documentation for more details [2]_. concat_dim : str, or list of str, DataArray, Index or None, optional Dimensions to concatenate files along. You only need to provide this argument - if any of the dimensions along which you want to concatenate is not a dimension - in the original datasets, e.g., if you want to stack a collection of 2D arrays - along a third dimension. Set ``concat_dim=[..., None, ...]`` explicitly to - disable concatenation along a particular dimension. + if ``combine='by_coords'``, and if any of the dimensions along which you want to + concatenate is not a dimension in the original datasets, e.g., if you want to + stack a collection of 2D arrays along a third dimension. Set + ``concat_dim=[..., None, ...]`` explicitly to disable concatenation along a + particular dimension. Default is None, which for a 1D list of filepaths is + equivalent to opening the files separately and then merging them with + ``xarray.merge``. combine : {'by_coords', 'nested'}, optional Whether ``xarray.combine_by_coords`` or ``xarray.combine_nested`` is used to - combine all the data. If this argument is not provided, `xarray.auto_combine` is - used, but in the future this behavior will switch to use - `xarray.combine_by_coords` by default. + combine all the data. Default is to use ``xarray.combine_by_coords``. compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts', 'override'}, optional String indicating how to compare variables of the same name for @@ -869,7 +867,6 @@ def open_mfdataset( -------- combine_by_coords combine_nested - auto_combine open_dataset References @@ -897,11 +894,8 @@ def open_mfdataset( # If combine='nested' then this creates a flat list which is easier to # iterate over, while saving the originally-supplied structure as "ids" if combine == "nested": - if str(concat_dim) == "_not_supplied": - raise ValueError("Must supply concat_dim when using " "combine='nested'") - else: - if isinstance(concat_dim, (str, DataArray)) or concat_dim is None: - concat_dim = [concat_dim] + if isinstance(concat_dim, (str, DataArray)) or concat_dim is None: + concat_dim = [concat_dim] combined_ids_paths = _infer_concat_order_from_positions(paths) ids, paths = (list(combined_ids_paths.keys()), list(combined_ids_paths.values())) @@ -933,30 +927,7 @@ def open_mfdataset( # Combine all datasets, closing them in case of a ValueError try: - if combine == "_old_auto": - # Use the old auto_combine for now - # Remove this after deprecation cycle from #2616 is complete - basic_msg = dedent( - """\ - In xarray version 0.15 the default behaviour of `open_mfdataset` - will change. To retain the existing behavior, pass - combine='nested'. To use future default behavior, pass - combine='by_coords'. See - http://xarray.pydata.org/en/stable/combining.html#combining-multi - """ - ) - warnings.warn(basic_msg, FutureWarning, stacklevel=2) - - combined = auto_combine( - datasets, - concat_dim=concat_dim, - compat=compat, - data_vars=data_vars, - coords=coords, - join=join, - from_openmfds=True, - ) - elif combine == "nested": + if combine == "nested": # Combined nested list by successive concat and merge operations # along each dimension, using structure given by "ids" combined = _nested_combine( diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 1f990457798..58bd7178fa2 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -1,7 +1,5 @@ import itertools -import warnings from collections import Counter -from textwrap import dedent import pandas as pd @@ -762,272 +760,3 @@ def combine_by_coords( join=join, combine_attrs=combine_attrs, ) - - -# Everything beyond here is only needed until the deprecation cycle in #2616 -# is completed - - -_CONCAT_DIM_DEFAULT = "__infer_concat_dim__" - - -def auto_combine( - datasets, - concat_dim="_not_supplied", - compat="no_conflicts", - data_vars="all", - coords="different", - fill_value=dtypes.NA, - join="outer", - from_openmfds=False, -): - """ - Attempt to auto-magically combine the given datasets into one. - - This entire function is deprecated in favour of ``combine_nested`` and - ``combine_by_coords``. - - This method attempts to combine a list of datasets into a single entity by - inspecting metadata and using a combination of concat and merge. - It does not concatenate along more than one dimension or sort data under - any circumstances. It does align coordinates, but different variables on - datasets can cause it to fail under some scenarios. In complex cases, you - may need to clean up your data and use ``concat``/``merge`` explicitly. - ``auto_combine`` works well if you have N years of data and M data - variables, and each combination of a distinct time period and set of data - variables is saved its own dataset. - - Parameters - ---------- - datasets : sequence of xarray.Dataset - Dataset objects to merge. - concat_dim : str or DataArray or Index, optional - Dimension along which to concatenate variables, as used by - :py:func:`xarray.concat`. You only need to provide this argument if - the dimension along which you want to concatenate is not a dimension - in the original datasets, e.g., if you want to stack a collection of - 2D arrays along a third dimension. - By default, xarray attempts to infer this argument by examining - component files. Set ``concat_dim=None`` explicitly to disable - concatenation. - compat : {'identical', 'equals', 'broadcast_equals', - 'no_conflicts', 'override'}, optional - String indicating how to compare variables of the same name for - potential conflicts: - - - 'broadcast_equals': all values must be equal when variables are - broadcast against each other to ensure common dimensions. - - 'equals': all values and dimensions must be the same. - - 'identical': all values, dimensions and attributes must be the - same. - - 'no_conflicts': only values which are not null in both datasets - must be equal. The returned dataset then contains the combination - of all non-null values. - - 'override': skip comparing and pick variable from first dataset - data_vars : {'minimal', 'different', 'all' or list of str}, optional - Details are in the documentation of concat - coords : {'minimal', 'different', 'all' o list of str}, optional - Details are in the documentation of concat - fill_value : scalar, optional - Value to use for newly missing values - join : {'outer', 'inner', 'left', 'right', 'exact'}, optional - String indicating how to combine differing indexes - (excluding concat_dim) in objects - - - 'outer': use the union of object indexes - - 'inner': use the intersection of object indexes - - 'left': use indexes from the first object with each dimension - - 'right': use indexes from the last object with each dimension - - 'exact': instead of aligning, raise `ValueError` when indexes to be - aligned are not equal - - 'override': if indexes are of same size, rewrite indexes to be - those of the first object with that dimension. Indexes for the same - dimension must have the same size in all objects. - - Returns - ------- - combined : xarray.Dataset - - See also - -------- - concat - Dataset.merge - """ - - if not from_openmfds: - basic_msg = dedent( - """\ - In xarray version 0.15 `auto_combine` will be deprecated. See - http://xarray.pydata.org/en/stable/combining.html#combining-multi""" - ) - warnings.warn(basic_msg, FutureWarning, stacklevel=2) - - if concat_dim == "_not_supplied": - concat_dim = _CONCAT_DIM_DEFAULT - message = "" - else: - message = dedent( - """\ - Also `open_mfdataset` will no longer accept a `concat_dim` argument. - To get equivalent behaviour from now on please use the new - `combine_nested` function instead (or the `combine='nested'` option to - `open_mfdataset`).""" - ) - - if _dimension_coords_exist(datasets): - message += dedent( - """\ - The datasets supplied have global dimension coordinates. You may want - to use the new `combine_by_coords` function (or the - `combine='by_coords'` option to `open_mfdataset`) to order the datasets - before concatenation. Alternatively, to continue concatenating based - on the order the datasets are supplied in future, please use the new - `combine_nested` function (or the `combine='nested'` option to - open_mfdataset).""" - ) - else: - message += dedent( - """\ - The datasets supplied do not have global dimension coordinates. In - future, to continue concatenating without supplying dimension - coordinates, please use the new `combine_nested` function (or the - `combine='nested'` option to open_mfdataset.""" - ) - - if _requires_concat_and_merge(datasets): - manual_dims = [concat_dim].append(None) - message += dedent( - """\ - The datasets supplied require both concatenation and merging. From - xarray version 0.15 this will operation will require either using the - new `combine_nested` function (or the `combine='nested'` option to - open_mfdataset), with a nested list structure such that you can combine - along the dimensions {}. Alternatively if your datasets have global - dimension coordinates then you can use the new `combine_by_coords` - function.""".format( - manual_dims - ) - ) - - warnings.warn(message, FutureWarning, stacklevel=2) - - return _old_auto_combine( - datasets, - concat_dim=concat_dim, - compat=compat, - data_vars=data_vars, - coords=coords, - fill_value=fill_value, - join=join, - ) - - -def _dimension_coords_exist(datasets): - """ - Check if the datasets have consistent global dimension coordinates - which would in future be used by `auto_combine` for concatenation ordering. - """ - - # Group by data vars - sorted_datasets = sorted(datasets, key=vars_as_keys) - grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys) - - # Simulates performing the multidimensional combine on each group of data - # variables before merging back together - try: - for vars, datasets_with_same_vars in grouped_by_vars: - _infer_concat_order_from_coords(list(datasets_with_same_vars)) - return True - except ValueError: - # ValueError means datasets don't have global dimension coordinates - # Or something else went wrong in trying to determine them - return False - - -def _requires_concat_and_merge(datasets): - """ - Check if the datasets require the use of both xarray.concat and - xarray.merge, which in future might require the user to use - `manual_combine` instead. - """ - # Group by data vars - sorted_datasets = sorted(datasets, key=vars_as_keys) - grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys) - - return len(list(grouped_by_vars)) > 1 - - -def _old_auto_combine( - datasets, - concat_dim=_CONCAT_DIM_DEFAULT, - compat="no_conflicts", - data_vars="all", - coords="different", - fill_value=dtypes.NA, - join="outer", -): - if concat_dim is not None: - dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim - - sorted_datasets = sorted(datasets, key=vars_as_keys) - grouped = itertools.groupby(sorted_datasets, key=vars_as_keys) - - concatenated = [ - _auto_concat( - list(datasets), - dim=dim, - data_vars=data_vars, - coords=coords, - compat=compat, - fill_value=fill_value, - join=join, - ) - for vars, datasets in grouped - ] - else: - concatenated = datasets - merged = merge(concatenated, compat=compat, fill_value=fill_value, join=join) - return merged - - -def _auto_concat( - datasets, - dim=None, - data_vars="all", - coords="different", - fill_value=dtypes.NA, - join="outer", - compat="no_conflicts", -): - if len(datasets) == 1 and dim is None: - # There is nothing more to combine, so kick out early. - return datasets[0] - else: - if dim is None: - ds0 = datasets[0] - ds1 = datasets[1] - concat_dims = set(ds0.dims) - if ds0.dims != ds1.dims: - dim_tuples = set(ds0.dims.items()) - set(ds1.dims.items()) - concat_dims = {i for i, _ in dim_tuples} - if len(concat_dims) > 1: - concat_dims = {d for d in concat_dims if not ds0[d].equals(ds1[d])} - if len(concat_dims) > 1: - raise ValueError( - "too many different dimensions to " "concatenate: %s" % concat_dims - ) - elif len(concat_dims) == 0: - raise ValueError( - "cannot infer dimension to concatenate: " - "supply the ``concat_dim`` argument " - "explicitly" - ) - (dim,) = concat_dims - return concat( - datasets, - dim=dim, - data_vars=data_vars, - coords=coords, - fill_value=fill_value, - compat=compat, - ) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 177435fa864..0654f39c14c 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2947,16 +2947,6 @@ def test_open_mfdataset_auto_combine(self): with open_mfdataset([tmp2, tmp1], combine="by_coords") as actual: assert_identical(original, actual) - def test_open_mfdataset_combine_nested_no_concat_dim(self): - original = Dataset({"foo": ("x", np.random.randn(10)), "x": np.arange(10)}) - with create_tmp_file() as tmp1: - with create_tmp_file() as tmp2: - original.isel(x=slice(5)).to_netcdf(tmp1) - original.isel(x=slice(5, 10)).to_netcdf(tmp2) - - with raises_regex(ValueError, "Must supply concat_dim"): - open_mfdataset([tmp2, tmp1], combine="nested") - @pytest.mark.xfail(reason="mfdataset loses encoding currently.") def test_encoding_mfdataset(self): original = Dataset( @@ -3050,6 +3040,15 @@ def test_open_mfdataset_concat_dim_none(self): ) as actual: assert_identical(data, actual) + def test_open_mfdataset_concat_dim_default_none(self): + with create_tmp_file() as tmp1: + with create_tmp_file() as tmp2: + data = Dataset({"x": 0}) + data.to_netcdf(tmp1) + Dataset({"x": np.nan}).to_netcdf(tmp2) + with open_mfdataset([tmp1, tmp2], combine="nested") as actual: + assert_identical(data, actual) + def test_open_dataset(self): original = Dataset({"foo": ("x", np.random.randn(10))}) with create_tmp_file() as tmp: @@ -3173,73 +3172,6 @@ def test_load_dataarray(self): ds.to_netcdf(tmp) -@requires_scipy_or_netCDF4 -@requires_dask -class TestOpenMFDataSetDeprecation: - """ - Set of tests to check that FutureWarnings are correctly raised until the - deprecation cycle is complete. #2616 - """ - - def test_open_mfdataset_default(self): - ds1, ds2 = Dataset({"x": [0]}), Dataset({"x": [1]}) - with create_tmp_file() as tmp1: - with create_tmp_file() as tmp2: - ds1.to_netcdf(tmp1) - ds2.to_netcdf(tmp2) - - with pytest.warns( - FutureWarning, match="default behaviour of" " `open_mfdataset`" - ): - open_mfdataset([tmp1, tmp2]) - - def test_open_mfdataset_with_concat_dim(self): - ds1, ds2 = Dataset({"x": [0]}), Dataset({"x": [1]}) - with create_tmp_file() as tmp1: - with create_tmp_file() as tmp2: - ds1.to_netcdf(tmp1) - ds2.to_netcdf(tmp2) - - with pytest.warns(FutureWarning, match="`concat_dim`"): - open_mfdataset([tmp1, tmp2], concat_dim="x") - - def test_auto_combine_with_merge_and_concat(self): - ds1, ds2 = Dataset({"x": [0]}), Dataset({"x": [1]}) - ds3 = Dataset({"z": ((), 99)}) - with create_tmp_file() as tmp1: - with create_tmp_file() as tmp2: - with create_tmp_file() as tmp3: - ds1.to_netcdf(tmp1) - ds2.to_netcdf(tmp2) - ds3.to_netcdf(tmp3) - - with pytest.warns( - FutureWarning, match="require both concatenation" - ): - open_mfdataset([tmp1, tmp2, tmp3]) - - def test_auto_combine_with_coords(self): - ds1 = Dataset({"foo": ("x", [0])}, coords={"x": ("x", [0])}) - ds2 = Dataset({"foo": ("x", [1])}, coords={"x": ("x", [1])}) - with create_tmp_file() as tmp1: - with create_tmp_file() as tmp2: - ds1.to_netcdf(tmp1) - ds2.to_netcdf(tmp2) - - with pytest.warns(FutureWarning, match="supplied have global"): - open_mfdataset([tmp1, tmp2]) - - def test_auto_combine_without_coords(self): - ds1, ds2 = Dataset({"foo": ("x", [0])}), Dataset({"foo": ("x", [1])}) - with create_tmp_file() as tmp1: - with create_tmp_file() as tmp2: - ds1.to_netcdf(tmp1) - ds2.to_netcdf(tmp2) - - with pytest.warns(FutureWarning, match="supplied do not have global"): - open_mfdataset([tmp1, tmp2]) - - @requires_scipy_or_netCDF4 @requires_pydap @pytest.mark.filterwarnings("ignore:The binary mode of fromstring is deprecated") diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index c3f981f10d1..59f61f59722 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -4,14 +4,7 @@ import numpy as np import pytest -from xarray import ( - DataArray, - Dataset, - auto_combine, - combine_by_coords, - combine_nested, - concat, -) +from xarray import DataArray, Dataset, combine_by_coords, combine_nested, concat from xarray.core import dtypes from xarray.core.combine import ( _check_shape_tile_ids, @@ -818,173 +811,6 @@ def test_combine_by_coords_incomplete_hypercube(self): combine_by_coords([x1, x2, x3], fill_value=None) -@pytest.mark.filterwarnings( - "ignore:In xarray version 0.15 `auto_combine` " "will be deprecated" -) -@pytest.mark.filterwarnings("ignore:Also `open_mfdataset` will no longer") -@pytest.mark.filterwarnings("ignore:The datasets supplied") -class TestAutoCombineOldAPI: - """ - Set of tests which check that old 1-dimensional auto_combine behaviour is - still satisfied. #2616 - """ - - def test_auto_combine(self): - objs = [Dataset({"x": [0]}), Dataset({"x": [1]})] - actual = auto_combine(objs) - expected = Dataset({"x": [0, 1]}) - assert_identical(expected, actual) - - actual = auto_combine([actual]) - assert_identical(expected, actual) - - objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})] - actual = auto_combine(objs) - expected = Dataset({"x": [0, 1, 2]}) - assert_identical(expected, actual) - - # ensure auto_combine handles non-sorted variables - objs = [ - Dataset({"x": ("a", [0]), "y": ("a", [0])}), - Dataset({"y": ("a", [1]), "x": ("a", [1])}), - ] - actual = auto_combine(objs) - expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1])}) - assert_identical(expected, actual) - - objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})] - with raises_regex(ValueError, "too many .* dimensions"): - auto_combine(objs) - - objs = [Dataset({"x": 0}), Dataset({"x": 1})] - with raises_regex(ValueError, "cannot infer dimension"): - auto_combine(objs) - - objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})] - with raises_regex(ValueError, "'y' is not present in all datasets"): - auto_combine(objs) - - def test_auto_combine_previously_failed(self): - # In the above scenario, one file is missing, containing the data for - # one year's data for one variable. - datasets = [ - Dataset({"a": ("x", [0]), "x": [0]}), - Dataset({"b": ("x", [0]), "x": [0]}), - Dataset({"a": ("x", [1]), "x": [1]}), - ] - expected = Dataset({"a": ("x", [0, 1]), "b": ("x", [0, np.nan])}, {"x": [0, 1]}) - actual = auto_combine(datasets) - assert_identical(expected, actual) - - # Your data includes "time" and "station" dimensions, and each year's - # data has a different set of stations. - datasets = [ - Dataset({"a": ("x", [2, 3]), "x": [1, 2]}), - Dataset({"a": ("x", [1, 2]), "x": [0, 1]}), - ] - expected = Dataset( - {"a": (("t", "x"), [[np.nan, 2, 3], [1, 2, np.nan]])}, {"x": [0, 1, 2]} - ) - actual = auto_combine(datasets, concat_dim="t") - assert_identical(expected, actual) - - def test_auto_combine_with_new_variables(self): - datasets = [Dataset({"x": 0}, {"y": 0}), Dataset({"x": 1}, {"y": 1, "z": 1})] - actual = auto_combine(datasets, "y") - expected = Dataset({"x": ("y", [0, 1])}, {"y": [0, 1], "z": 1}) - assert_identical(expected, actual) - - def test_auto_combine_no_concat(self): - objs = [Dataset({"x": 0}), Dataset({"y": 1})] - actual = auto_combine(objs) - expected = Dataset({"x": 0, "y": 1}) - assert_identical(expected, actual) - - objs = [Dataset({"x": 0, "y": 1}), Dataset({"y": np.nan, "z": 2})] - actual = auto_combine(objs) - expected = Dataset({"x": 0, "y": 1, "z": 2}) - assert_identical(expected, actual) - - data = Dataset({"x": 0}) - actual = auto_combine([data, data, data], concat_dim=None) - assert_identical(data, actual) - - # Single object, with a concat_dim explicitly provided - # Test the issue reported in GH #1988 - objs = [Dataset({"x": 0, "y": 1})] - dim = DataArray([100], name="baz", dims="baz") - actual = auto_combine(objs, concat_dim=dim) - expected = Dataset({"x": ("baz", [0]), "y": ("baz", [1])}, {"baz": [100]}) - assert_identical(expected, actual) - - # Just making sure that auto_combine is doing what is - # expected for non-scalar values, too. - objs = [Dataset({"x": ("z", [0, 1]), "y": ("z", [1, 2])})] - dim = DataArray([100], name="baz", dims="baz") - actual = auto_combine(objs, concat_dim=dim) - expected = Dataset( - {"x": (("baz", "z"), [[0, 1]]), "y": (("baz", "z"), [[1, 2]])}, - {"baz": [100]}, - ) - assert_identical(expected, actual) - - def test_auto_combine_order_by_appearance_not_coords(self): - objs = [ - Dataset({"foo": ("x", [0])}, coords={"x": ("x", [1])}), - Dataset({"foo": ("x", [1])}, coords={"x": ("x", [0])}), - ] - actual = auto_combine(objs) - expected = Dataset({"foo": ("x", [0, 1])}, coords={"x": ("x", [1, 0])}) - assert_identical(expected, actual) - - @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0]) - def test_auto_combine_fill_value(self, fill_value): - datasets = [ - Dataset({"a": ("x", [2, 3]), "x": [1, 2]}), - Dataset({"a": ("x", [1, 2]), "x": [0, 1]}), - ] - if fill_value == dtypes.NA: - # if we supply the default, we expect the missing value for a - # float array - fill_value = np.nan - expected = Dataset( - {"a": (("t", "x"), [[fill_value, 2, 3], [1, 2, fill_value]])}, - {"x": [0, 1, 2]}, - ) - actual = auto_combine(datasets, concat_dim="t", fill_value=fill_value) - assert_identical(expected, actual) - - -class TestAutoCombineDeprecation: - """ - Set of tests to check that FutureWarnings are correctly raised until the - deprecation cycle is complete. #2616 - """ - - def test_auto_combine_with_concat_dim(self): - objs = [Dataset({"x": [0]}), Dataset({"x": [1]})] - with pytest.warns(FutureWarning, match="`concat_dim`"): - auto_combine(objs, concat_dim="x") - - def test_auto_combine_with_merge_and_concat(self): - objs = [Dataset({"x": [0]}), Dataset({"x": [1]}), Dataset({"z": ((), 99)})] - with pytest.warns(FutureWarning, match="require both concatenation"): - auto_combine(objs) - - def test_auto_combine_with_coords(self): - objs = [ - Dataset({"foo": ("x", [0])}, coords={"x": ("x", [0])}), - Dataset({"foo": ("x", [1])}, coords={"x": ("x", [1])}), - ] - with pytest.warns(FutureWarning, match="supplied have global"): - auto_combine(objs) - - def test_auto_combine_without_coords(self): - objs = [Dataset({"foo": ("x", [0])}), Dataset({"foo": ("x", [1])})] - with pytest.warns(FutureWarning, match="supplied do not have global"): - auto_combine(objs) - - @requires_cftime def test_combine_by_coords_distant_cftime_dates(): # Regression test for https://github.com/pydata/xarray/issues/3535