From 101ad8d6170dfaa83d321aca90cb42557114a2fd Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Apr 2020 04:01:53 +0100 Subject: [PATCH 01/10] Removed auto_combine function and argument to open_mfdataset --- xarray/__init__.py | 2 +- xarray/backends/api.py | 58 +++------ xarray/core/combine.py | 269 ----------------------------------------- 3 files changed, 16 insertions(+), 313 deletions(-) diff --git a/xarray/__init__.py b/xarray/__init__.py index 0fead57e5fb..06f3cd634b1 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -15,7 +15,7 @@ from .coding.cftimeindex import CFTimeIndex from .conventions import SerializationWarning, decode_cf from .core.alignment import align, broadcast -from .core.combine import auto_combine, combine_by_coords, combine_nested +from .core.combine import combine_by_coords, combine_nested from .core.common import ALL_DIMS, full_like, ones_like, zeros_like from .core.computation import apply_ufunc, dot, polyval, where from .core.concat import concat diff --git a/xarray/backends/api.py b/xarray/backends/api.py index c7481e22b59..1a1ddfc33b4 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -23,7 +23,6 @@ from ..core.combine import ( _infer_concat_order_from_positions, _nested_combine, - auto_combine, combine_by_coords, ) from ..core.dataarray import DataArray @@ -717,7 +716,7 @@ def open_mfdataset( lock=None, data_vars="all", coords="different", - combine="_old_auto", + combine="by_coords", autoclose=None, parallel=False, join="outer", @@ -730,9 +729,8 @@ def open_mfdataset( the datasets into one before returning the result, and if combine='nested' then ``combine_nested`` is used. The filepaths must be structured according to which combining function is used, the details of which are given in the documentation for - ``combine_by_coords`` and ``combine_nested``. By default the old (now deprecated) - ``auto_combine`` will be used, please specify either ``combine='by_coords'`` or - ``combine='nested'`` in future. Requires dask to be installed. See documentation for + ``combine_by_coords`` and ``combine_nested``. By default ``combine='by_coords'`` + will be used. Requires dask to be installed. See documentation for details on dask [1]_. Global attributes from the ``attrs_file`` are used for the combined dataset. @@ -742,7 +740,7 @@ def open_mfdataset( Either a string glob in the form ``"path/to/my/files/*.nc"`` or an explicit list of files to open. Paths can be given as strings or as pathlib Paths. If concatenation along more than one dimension is desired, then ``paths`` must be a - nested list-of-lists (see ``manual_combine`` for details). (A string glob will + nested list-of-lists (see ``combine_nested`` for details). (A string glob will be expanded to a 1-dimensional list.) chunks : int or dict, optional Dictionary with keys given by dimension names and values given by chunk sizes. @@ -752,15 +750,16 @@ def open_mfdataset( see the full documentation for more details [2]_. concat_dim : str, or list of str, DataArray, Index or None, optional Dimensions to concatenate files along. You only need to provide this argument - if any of the dimensions along which you want to concatenate is not a dimension - in the original datasets, e.g., if you want to stack a collection of 2D arrays - along a third dimension. Set ``concat_dim=[..., None, ...]`` explicitly to - disable concatenation along a particular dimension. + if ``combine='by_coords'``, and if any of the dimensions along which you want to + concatenate is not a dimension in the original datasets, e.g., if you want to + stack a collection of 2D arrays along a third dimension. Set + ``concat_dim=[..., None, ...]`` explicitly to disable concatenation along a + particular dimension. Default is None, which for a 1D list of filepaths is + equivalent to opening the files separately and then merging them with + ``xarray.merge``. combine : {'by_coords', 'nested'}, optional Whether ``xarray.combine_by_coords`` or ``xarray.combine_nested`` is used to - combine all the data. If this argument is not provided, `xarray.auto_combine` is - used, but in the future this behavior will switch to use - `xarray.combine_by_coords` by default. + combine all the data. Default is to use ``xarray.combine_by_coords``. compat : {'identical', 'equals', 'broadcast_equals', 'no_conflicts', 'override'}, optional String indicating how to compare variables of the same name for @@ -853,7 +852,6 @@ def open_mfdataset( -------- combine_by_coords combine_nested - auto_combine open_dataset References @@ -881,11 +879,8 @@ def open_mfdataset( # If combine='nested' then this creates a flat list which is easier to # iterate over, while saving the originally-supplied structure as "ids" if combine == "nested": - if str(concat_dim) == "_not_supplied": - raise ValueError("Must supply concat_dim when using " "combine='nested'") - else: - if isinstance(concat_dim, (str, DataArray)) or concat_dim is None: - concat_dim = [concat_dim] + if isinstance(concat_dim, (str, DataArray)) or concat_dim is None: + concat_dim = [concat_dim] combined_ids_paths = _infer_concat_order_from_positions(paths) ids, paths = (list(combined_ids_paths.keys()), list(combined_ids_paths.values())) @@ -917,30 +912,7 @@ def open_mfdataset( # Combine all datasets, closing them in case of a ValueError try: - if combine == "_old_auto": - # Use the old auto_combine for now - # Remove this after deprecation cycle from #2616 is complete - basic_msg = dedent( - """\ - In xarray version 0.15 the default behaviour of `open_mfdataset` - will change. To retain the existing behavior, pass - combine='nested'. To use future default behavior, pass - combine='by_coords'. See - http://xarray.pydata.org/en/stable/combining.html#combining-multi - """ - ) - warnings.warn(basic_msg, FutureWarning, stacklevel=2) - - combined = auto_combine( - datasets, - concat_dim=concat_dim, - compat=compat, - data_vars=data_vars, - coords=coords, - join=join, - from_openmfds=True, - ) - elif combine == "nested": + if combine == "nested": # Combined nested list by successive concat and merge operations # along each dimension, using structure given by "ids" combined = _nested_combine( diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 1f990457798..da2c8654ce2 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -1,7 +1,5 @@ import itertools -import warnings from collections import Counter -from textwrap import dedent import pandas as pd @@ -764,270 +762,3 @@ def combine_by_coords( ) -# Everything beyond here is only needed until the deprecation cycle in #2616 -# is completed - - -_CONCAT_DIM_DEFAULT = "__infer_concat_dim__" - - -def auto_combine( - datasets, - concat_dim="_not_supplied", - compat="no_conflicts", - data_vars="all", - coords="different", - fill_value=dtypes.NA, - join="outer", - from_openmfds=False, -): - """ - Attempt to auto-magically combine the given datasets into one. - - This entire function is deprecated in favour of ``combine_nested`` and - ``combine_by_coords``. - - This method attempts to combine a list of datasets into a single entity by - inspecting metadata and using a combination of concat and merge. - It does not concatenate along more than one dimension or sort data under - any circumstances. It does align coordinates, but different variables on - datasets can cause it to fail under some scenarios. In complex cases, you - may need to clean up your data and use ``concat``/``merge`` explicitly. - ``auto_combine`` works well if you have N years of data and M data - variables, and each combination of a distinct time period and set of data - variables is saved its own dataset. - - Parameters - ---------- - datasets : sequence of xarray.Dataset - Dataset objects to merge. - concat_dim : str or DataArray or Index, optional - Dimension along which to concatenate variables, as used by - :py:func:`xarray.concat`. You only need to provide this argument if - the dimension along which you want to concatenate is not a dimension - in the original datasets, e.g., if you want to stack a collection of - 2D arrays along a third dimension. - By default, xarray attempts to infer this argument by examining - component files. Set ``concat_dim=None`` explicitly to disable - concatenation. - compat : {'identical', 'equals', 'broadcast_equals', - 'no_conflicts', 'override'}, optional - String indicating how to compare variables of the same name for - potential conflicts: - - - 'broadcast_equals': all values must be equal when variables are - broadcast against each other to ensure common dimensions. - - 'equals': all values and dimensions must be the same. - - 'identical': all values, dimensions and attributes must be the - same. - - 'no_conflicts': only values which are not null in both datasets - must be equal. The returned dataset then contains the combination - of all non-null values. - - 'override': skip comparing and pick variable from first dataset - data_vars : {'minimal', 'different', 'all' or list of str}, optional - Details are in the documentation of concat - coords : {'minimal', 'different', 'all' o list of str}, optional - Details are in the documentation of concat - fill_value : scalar, optional - Value to use for newly missing values - join : {'outer', 'inner', 'left', 'right', 'exact'}, optional - String indicating how to combine differing indexes - (excluding concat_dim) in objects - - - 'outer': use the union of object indexes - - 'inner': use the intersection of object indexes - - 'left': use indexes from the first object with each dimension - - 'right': use indexes from the last object with each dimension - - 'exact': instead of aligning, raise `ValueError` when indexes to be - aligned are not equal - - 'override': if indexes are of same size, rewrite indexes to be - those of the first object with that dimension. Indexes for the same - dimension must have the same size in all objects. - - Returns - ------- - combined : xarray.Dataset - - See also - -------- - concat - Dataset.merge - """ - - if not from_openmfds: - basic_msg = dedent( - """\ - In xarray version 0.15 `auto_combine` will be deprecated. See - http://xarray.pydata.org/en/stable/combining.html#combining-multi""" - ) - warnings.warn(basic_msg, FutureWarning, stacklevel=2) - - if concat_dim == "_not_supplied": - concat_dim = _CONCAT_DIM_DEFAULT - message = "" - else: - message = dedent( - """\ - Also `open_mfdataset` will no longer accept a `concat_dim` argument. - To get equivalent behaviour from now on please use the new - `combine_nested` function instead (or the `combine='nested'` option to - `open_mfdataset`).""" - ) - - if _dimension_coords_exist(datasets): - message += dedent( - """\ - The datasets supplied have global dimension coordinates. You may want - to use the new `combine_by_coords` function (or the - `combine='by_coords'` option to `open_mfdataset`) to order the datasets - before concatenation. Alternatively, to continue concatenating based - on the order the datasets are supplied in future, please use the new - `combine_nested` function (or the `combine='nested'` option to - open_mfdataset).""" - ) - else: - message += dedent( - """\ - The datasets supplied do not have global dimension coordinates. In - future, to continue concatenating without supplying dimension - coordinates, please use the new `combine_nested` function (or the - `combine='nested'` option to open_mfdataset.""" - ) - - if _requires_concat_and_merge(datasets): - manual_dims = [concat_dim].append(None) - message += dedent( - """\ - The datasets supplied require both concatenation and merging. From - xarray version 0.15 this will operation will require either using the - new `combine_nested` function (or the `combine='nested'` option to - open_mfdataset), with a nested list structure such that you can combine - along the dimensions {}. Alternatively if your datasets have global - dimension coordinates then you can use the new `combine_by_coords` - function.""".format( - manual_dims - ) - ) - - warnings.warn(message, FutureWarning, stacklevel=2) - - return _old_auto_combine( - datasets, - concat_dim=concat_dim, - compat=compat, - data_vars=data_vars, - coords=coords, - fill_value=fill_value, - join=join, - ) - - -def _dimension_coords_exist(datasets): - """ - Check if the datasets have consistent global dimension coordinates - which would in future be used by `auto_combine` for concatenation ordering. - """ - - # Group by data vars - sorted_datasets = sorted(datasets, key=vars_as_keys) - grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys) - - # Simulates performing the multidimensional combine on each group of data - # variables before merging back together - try: - for vars, datasets_with_same_vars in grouped_by_vars: - _infer_concat_order_from_coords(list(datasets_with_same_vars)) - return True - except ValueError: - # ValueError means datasets don't have global dimension coordinates - # Or something else went wrong in trying to determine them - return False - - -def _requires_concat_and_merge(datasets): - """ - Check if the datasets require the use of both xarray.concat and - xarray.merge, which in future might require the user to use - `manual_combine` instead. - """ - # Group by data vars - sorted_datasets = sorted(datasets, key=vars_as_keys) - grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys) - - return len(list(grouped_by_vars)) > 1 - - -def _old_auto_combine( - datasets, - concat_dim=_CONCAT_DIM_DEFAULT, - compat="no_conflicts", - data_vars="all", - coords="different", - fill_value=dtypes.NA, - join="outer", -): - if concat_dim is not None: - dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim - - sorted_datasets = sorted(datasets, key=vars_as_keys) - grouped = itertools.groupby(sorted_datasets, key=vars_as_keys) - - concatenated = [ - _auto_concat( - list(datasets), - dim=dim, - data_vars=data_vars, - coords=coords, - compat=compat, - fill_value=fill_value, - join=join, - ) - for vars, datasets in grouped - ] - else: - concatenated = datasets - merged = merge(concatenated, compat=compat, fill_value=fill_value, join=join) - return merged - - -def _auto_concat( - datasets, - dim=None, - data_vars="all", - coords="different", - fill_value=dtypes.NA, - join="outer", - compat="no_conflicts", -): - if len(datasets) == 1 and dim is None: - # There is nothing more to combine, so kick out early. - return datasets[0] - else: - if dim is None: - ds0 = datasets[0] - ds1 = datasets[1] - concat_dims = set(ds0.dims) - if ds0.dims != ds1.dims: - dim_tuples = set(ds0.dims.items()) - set(ds1.dims.items()) - concat_dims = {i for i, _ in dim_tuples} - if len(concat_dims) > 1: - concat_dims = {d for d in concat_dims if not ds0[d].equals(ds1[d])} - if len(concat_dims) > 1: - raise ValueError( - "too many different dimensions to " "concatenate: %s" % concat_dims - ) - elif len(concat_dims) == 0: - raise ValueError( - "cannot infer dimension to concatenate: " - "supply the ``concat_dim`` argument " - "explicitly" - ) - (dim,) = concat_dims - return concat( - datasets, - dim=dim, - data_vars=data_vars, - coords=coords, - fill_value=fill_value, - compat=compat, - ) From bc49e6bad86829b79d60e8724efe0ed55b31c48d Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Apr 2020 04:02:13 +0100 Subject: [PATCH 02/10] Removed corresponding tests --- xarray/tests/test_backends.py | 76 --------------- xarray/tests/test_combine.py | 168 ---------------------------------- 2 files changed, 244 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 82fe1b38149..2ba7331c367 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2908,15 +2908,6 @@ def test_open_mfdataset_auto_combine(self): with open_mfdataset([tmp2, tmp1], combine="by_coords") as actual: assert_identical(original, actual) - def test_open_mfdataset_combine_nested_no_concat_dim(self): - original = Dataset({"foo": ("x", np.random.randn(10)), "x": np.arange(10)}) - with create_tmp_file() as tmp1: - with create_tmp_file() as tmp2: - original.isel(x=slice(5)).to_netcdf(tmp1) - original.isel(x=slice(5, 10)).to_netcdf(tmp2) - - with raises_regex(ValueError, "Must supply concat_dim"): - open_mfdataset([tmp2, tmp1], combine="nested") @pytest.mark.xfail(reason="mfdataset loses encoding currently.") def test_encoding_mfdataset(self): @@ -3134,73 +3125,6 @@ def test_load_dataarray(self): ds.to_netcdf(tmp) -@requires_scipy_or_netCDF4 -@requires_dask -class TestOpenMFDataSetDeprecation: - """ - Set of tests to check that FutureWarnings are correctly raised until the - deprecation cycle is complete. #2616 - """ - - def test_open_mfdataset_default(self): - ds1, ds2 = Dataset({"x": [0]}), Dataset({"x": [1]}) - with create_tmp_file() as tmp1: - with create_tmp_file() as tmp2: - ds1.to_netcdf(tmp1) - ds2.to_netcdf(tmp2) - - with pytest.warns( - FutureWarning, match="default behaviour of" " `open_mfdataset`" - ): - open_mfdataset([tmp1, tmp2]) - - def test_open_mfdataset_with_concat_dim(self): - ds1, ds2 = Dataset({"x": [0]}), Dataset({"x": [1]}) - with create_tmp_file() as tmp1: - with create_tmp_file() as tmp2: - ds1.to_netcdf(tmp1) - ds2.to_netcdf(tmp2) - - with pytest.warns(FutureWarning, match="`concat_dim`"): - open_mfdataset([tmp1, tmp2], concat_dim="x") - - def test_auto_combine_with_merge_and_concat(self): - ds1, ds2 = Dataset({"x": [0]}), Dataset({"x": [1]}) - ds3 = Dataset({"z": ((), 99)}) - with create_tmp_file() as tmp1: - with create_tmp_file() as tmp2: - with create_tmp_file() as tmp3: - ds1.to_netcdf(tmp1) - ds2.to_netcdf(tmp2) - ds3.to_netcdf(tmp3) - - with pytest.warns( - FutureWarning, match="require both concatenation" - ): - open_mfdataset([tmp1, tmp2, tmp3]) - - def test_auto_combine_with_coords(self): - ds1 = Dataset({"foo": ("x", [0])}, coords={"x": ("x", [0])}) - ds2 = Dataset({"foo": ("x", [1])}, coords={"x": ("x", [1])}) - with create_tmp_file() as tmp1: - with create_tmp_file() as tmp2: - ds1.to_netcdf(tmp1) - ds2.to_netcdf(tmp2) - - with pytest.warns(FutureWarning, match="supplied have global"): - open_mfdataset([tmp1, tmp2]) - - def test_auto_combine_without_coords(self): - ds1, ds2 = Dataset({"foo": ("x", [0])}), Dataset({"foo": ("x", [1])}) - with create_tmp_file() as tmp1: - with create_tmp_file() as tmp2: - ds1.to_netcdf(tmp1) - ds2.to_netcdf(tmp2) - - with pytest.warns(FutureWarning, match="supplied do not have global"): - open_mfdataset([tmp1, tmp2]) - - @requires_scipy_or_netCDF4 @requires_pydap @pytest.mark.filterwarnings("ignore:The binary mode of fromstring is deprecated") diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index c3f981f10d1..c7ac678693c 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -7,7 +7,6 @@ from xarray import ( DataArray, Dataset, - auto_combine, combine_by_coords, combine_nested, concat, @@ -818,173 +817,6 @@ def test_combine_by_coords_incomplete_hypercube(self): combine_by_coords([x1, x2, x3], fill_value=None) -@pytest.mark.filterwarnings( - "ignore:In xarray version 0.15 `auto_combine` " "will be deprecated" -) -@pytest.mark.filterwarnings("ignore:Also `open_mfdataset` will no longer") -@pytest.mark.filterwarnings("ignore:The datasets supplied") -class TestAutoCombineOldAPI: - """ - Set of tests which check that old 1-dimensional auto_combine behaviour is - still satisfied. #2616 - """ - - def test_auto_combine(self): - objs = [Dataset({"x": [0]}), Dataset({"x": [1]})] - actual = auto_combine(objs) - expected = Dataset({"x": [0, 1]}) - assert_identical(expected, actual) - - actual = auto_combine([actual]) - assert_identical(expected, actual) - - objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})] - actual = auto_combine(objs) - expected = Dataset({"x": [0, 1, 2]}) - assert_identical(expected, actual) - - # ensure auto_combine handles non-sorted variables - objs = [ - Dataset({"x": ("a", [0]), "y": ("a", [0])}), - Dataset({"y": ("a", [1]), "x": ("a", [1])}), - ] - actual = auto_combine(objs) - expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1])}) - assert_identical(expected, actual) - - objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})] - with raises_regex(ValueError, "too many .* dimensions"): - auto_combine(objs) - - objs = [Dataset({"x": 0}), Dataset({"x": 1})] - with raises_regex(ValueError, "cannot infer dimension"): - auto_combine(objs) - - objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})] - with raises_regex(ValueError, "'y' is not present in all datasets"): - auto_combine(objs) - - def test_auto_combine_previously_failed(self): - # In the above scenario, one file is missing, containing the data for - # one year's data for one variable. - datasets = [ - Dataset({"a": ("x", [0]), "x": [0]}), - Dataset({"b": ("x", [0]), "x": [0]}), - Dataset({"a": ("x", [1]), "x": [1]}), - ] - expected = Dataset({"a": ("x", [0, 1]), "b": ("x", [0, np.nan])}, {"x": [0, 1]}) - actual = auto_combine(datasets) - assert_identical(expected, actual) - - # Your data includes "time" and "station" dimensions, and each year's - # data has a different set of stations. - datasets = [ - Dataset({"a": ("x", [2, 3]), "x": [1, 2]}), - Dataset({"a": ("x", [1, 2]), "x": [0, 1]}), - ] - expected = Dataset( - {"a": (("t", "x"), [[np.nan, 2, 3], [1, 2, np.nan]])}, {"x": [0, 1, 2]} - ) - actual = auto_combine(datasets, concat_dim="t") - assert_identical(expected, actual) - - def test_auto_combine_with_new_variables(self): - datasets = [Dataset({"x": 0}, {"y": 0}), Dataset({"x": 1}, {"y": 1, "z": 1})] - actual = auto_combine(datasets, "y") - expected = Dataset({"x": ("y", [0, 1])}, {"y": [0, 1], "z": 1}) - assert_identical(expected, actual) - - def test_auto_combine_no_concat(self): - objs = [Dataset({"x": 0}), Dataset({"y": 1})] - actual = auto_combine(objs) - expected = Dataset({"x": 0, "y": 1}) - assert_identical(expected, actual) - - objs = [Dataset({"x": 0, "y": 1}), Dataset({"y": np.nan, "z": 2})] - actual = auto_combine(objs) - expected = Dataset({"x": 0, "y": 1, "z": 2}) - assert_identical(expected, actual) - - data = Dataset({"x": 0}) - actual = auto_combine([data, data, data], concat_dim=None) - assert_identical(data, actual) - - # Single object, with a concat_dim explicitly provided - # Test the issue reported in GH #1988 - objs = [Dataset({"x": 0, "y": 1})] - dim = DataArray([100], name="baz", dims="baz") - actual = auto_combine(objs, concat_dim=dim) - expected = Dataset({"x": ("baz", [0]), "y": ("baz", [1])}, {"baz": [100]}) - assert_identical(expected, actual) - - # Just making sure that auto_combine is doing what is - # expected for non-scalar values, too. - objs = [Dataset({"x": ("z", [0, 1]), "y": ("z", [1, 2])})] - dim = DataArray([100], name="baz", dims="baz") - actual = auto_combine(objs, concat_dim=dim) - expected = Dataset( - {"x": (("baz", "z"), [[0, 1]]), "y": (("baz", "z"), [[1, 2]])}, - {"baz": [100]}, - ) - assert_identical(expected, actual) - - def test_auto_combine_order_by_appearance_not_coords(self): - objs = [ - Dataset({"foo": ("x", [0])}, coords={"x": ("x", [1])}), - Dataset({"foo": ("x", [1])}, coords={"x": ("x", [0])}), - ] - actual = auto_combine(objs) - expected = Dataset({"foo": ("x", [0, 1])}, coords={"x": ("x", [1, 0])}) - assert_identical(expected, actual) - - @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0]) - def test_auto_combine_fill_value(self, fill_value): - datasets = [ - Dataset({"a": ("x", [2, 3]), "x": [1, 2]}), - Dataset({"a": ("x", [1, 2]), "x": [0, 1]}), - ] - if fill_value == dtypes.NA: - # if we supply the default, we expect the missing value for a - # float array - fill_value = np.nan - expected = Dataset( - {"a": (("t", "x"), [[fill_value, 2, 3], [1, 2, fill_value]])}, - {"x": [0, 1, 2]}, - ) - actual = auto_combine(datasets, concat_dim="t", fill_value=fill_value) - assert_identical(expected, actual) - - -class TestAutoCombineDeprecation: - """ - Set of tests to check that FutureWarnings are correctly raised until the - deprecation cycle is complete. #2616 - """ - - def test_auto_combine_with_concat_dim(self): - objs = [Dataset({"x": [0]}), Dataset({"x": [1]})] - with pytest.warns(FutureWarning, match="`concat_dim`"): - auto_combine(objs, concat_dim="x") - - def test_auto_combine_with_merge_and_concat(self): - objs = [Dataset({"x": [0]}), Dataset({"x": [1]}), Dataset({"z": ((), 99)})] - with pytest.warns(FutureWarning, match="require both concatenation"): - auto_combine(objs) - - def test_auto_combine_with_coords(self): - objs = [ - Dataset({"foo": ("x", [0])}, coords={"x": ("x", [0])}), - Dataset({"foo": ("x", [1])}, coords={"x": ("x", [1])}), - ] - with pytest.warns(FutureWarning, match="supplied have global"): - auto_combine(objs) - - def test_auto_combine_without_coords(self): - objs = [Dataset({"foo": ("x", [0])}), Dataset({"foo": ("x", [1])})] - with pytest.warns(FutureWarning, match="supplied do not have global"): - auto_combine(objs) - - @requires_cftime def test_combine_by_coords_distant_cftime_dates(): # Regression test for https://github.com/pydata/xarray/issues/3535 From 41f2f7b7840dfb6827087f481b5fe49716afcdf2 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Apr 2020 04:11:32 +0100 Subject: [PATCH 03/10] Code formatting --- xarray/core/combine.py | 2 -- xarray/tests/test_backends.py | 1 - xarray/tests/test_combine.py | 8 +------- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index da2c8654ce2..58bd7178fa2 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -760,5 +760,3 @@ def combine_by_coords( join=join, combine_attrs=combine_attrs, ) - - diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 2ba7331c367..8841a97652b 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2908,7 +2908,6 @@ def test_open_mfdataset_auto_combine(self): with open_mfdataset([tmp2, tmp1], combine="by_coords") as actual: assert_identical(original, actual) - @pytest.mark.xfail(reason="mfdataset loses encoding currently.") def test_encoding_mfdataset(self): original = Dataset( diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index c7ac678693c..59f61f59722 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -4,13 +4,7 @@ import numpy as np import pytest -from xarray import ( - DataArray, - Dataset, - combine_by_coords, - combine_nested, - concat, -) +from xarray import DataArray, Dataset, combine_by_coords, combine_nested, concat from xarray.core import dtypes from xarray.core.combine import ( _check_shape_tile_ids, From 6a7e0f735d3df64a1f40fbf609c2bd74804fac54 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Apr 2020 04:21:40 +0100 Subject: [PATCH 04/10] updated what's new --- doc/whats-new.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b9c81ad3474..a4ee1fe8765 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,6 +26,13 @@ Breaking changes `_. (:pull:`3274`) By `Elliott Sales de Andrade `_ +- The old :py:func:`xarray.auto_combine` function is now fully deprecated in + favour of the :py:func:`xarray.combine_by_coords` and + :py:func:`xarray.combine_nested` functions. This also means that + the default behaviour of :py:func:`xarray.open_mfdataset` has changed to use + ``combine='by_coords'`` as the default argument value. (:issue:`2616`, :pull:`3926`) + By `Tom Nicholas `_. + New Features ~~~~~~~~~~~~ From 3e88561988bdb5f14ab0521659da79a42241b8f6 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Apr 2020 04:28:11 +0100 Subject: [PATCH 05/10] PEP8 fixes --- xarray/__init__.py | 1 - xarray/backends/api.py | 1 - 2 files changed, 2 deletions(-) diff --git a/xarray/__init__.py b/xarray/__init__.py index 06f3cd634b1..4dff3c58bb6 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -46,7 +46,6 @@ "align", "apply_ufunc", "as_variable", - "auto_combine", "broadcast", "cftime_range", "combine_by_coords", diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 1a1ddfc33b4..b0310f542d7 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -4,7 +4,6 @@ from io import BytesIO from numbers import Number from pathlib import Path -from textwrap import dedent from typing import ( TYPE_CHECKING, Callable, From a624b0fd4e30cf3e09eb0d47e4dfe1dbcecb6bf1 Mon Sep 17 00:00:00 2001 From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com> Date: Thu, 2 Apr 2020 18:26:56 +0100 Subject: [PATCH 06/10] Update doc/whats-new.rst `:py:func:` links fixed Co-Authored-By: keewis --- doc/whats-new.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a4ee1fe8765..7e465099b3c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,10 +26,10 @@ Breaking changes `_. (:pull:`3274`) By `Elliott Sales de Andrade `_ -- The old :py:func:`xarray.auto_combine` function is now fully deprecated in - favour of the :py:func:`xarray.combine_by_coords` and - :py:func:`xarray.combine_nested` functions. This also means that - the default behaviour of :py:func:`xarray.open_mfdataset` has changed to use +- The old :py:func:`auto_combine` function is now fully deprecated in + favour of the :py:func:`combine_by_coords` and + :py:func:`combine_nested` functions. This also means that + the default behaviour of :py:func:`open_mfdataset` has changed to use ``combine='by_coords'`` as the default argument value. (:issue:`2616`, :pull:`3926`) By `Tom Nicholas `_. From 2a7fd86ff380ccaebf3967a4ed85f8c948c1dabe Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Apr 2020 18:29:34 +0100 Subject: [PATCH 07/10] removed auto_combine from API docs --- doc/api-hidden.rst | 2 -- doc/api.rst | 1 - 2 files changed, 3 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index cc9517a98ba..f96c0b52794 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -9,8 +9,6 @@ .. autosummary:: :toctree: generated/ - auto_combine - Dataset.nbytes Dataset.chunks diff --git a/doc/api.rst b/doc/api.rst index b37c84e7a81..4cbb58f3dfc 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -21,7 +21,6 @@ Top-level functions broadcast concat merge - auto_combine combine_by_coords combine_nested where From 90dee352d83b5f12c6e9f8feeea7df5c496cbb9d Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Apr 2020 18:58:04 +0100 Subject: [PATCH 08/10] clarify that auto_combine is completely removed --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7e465099b3c..5ec097b02d1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,7 +26,7 @@ Breaking changes `_. (:pull:`3274`) By `Elliott Sales de Andrade `_ -- The old :py:func:`auto_combine` function is now fully deprecated in +- The old :py:func:`auto_combine` function has now been removed in favour of the :py:func:`combine_by_coords` and :py:func:`combine_nested` functions. This also means that the default behaviour of :py:func:`open_mfdataset` has changed to use From c14f9d1ca9620bcb8790e87b0531730578466690 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 7 May 2020 23:11:40 +0100 Subject: [PATCH 09/10] concat_dim=None by default for combine='nested' --- xarray/backends/api.py | 2 +- xarray/tests/test_backends.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index b0310f542d7..75790c81f6a 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -708,7 +708,7 @@ def close(self): def open_mfdataset( paths, chunks=None, - concat_dim="_not_supplied", + concat_dim=None, compat="no_conflicts", preprocess=None, engine=None, diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 8841a97652b..f818467b33f 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3001,6 +3001,17 @@ def test_open_mfdataset_concat_dim_none(self): ) as actual: assert_identical(data, actual) + def test_open_mfdataset_concat_dim_default_none(self): + with create_tmp_file() as tmp1: + with create_tmp_file() as tmp2: + data = Dataset({"x": 0}) + data.to_netcdf(tmp1) + Dataset({"x": np.nan}).to_netcdf(tmp2) + with open_mfdataset( + [tmp1, tmp2], combine="nested" + ) as actual: + assert_identical(data, actual) + def test_open_dataset(self): original = Dataset({"foo": ("x", np.random.randn(10))}) with create_tmp_file() as tmp: From 68566f31f6b8c836ca32d896bbbbccbda1d5ee0d Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 13 May 2020 16:49:19 +0100 Subject: [PATCH 10/10] fix black formatting --- xarray/tests/test_backends.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index f818467b33f..7f8e861c3be 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3007,9 +3007,7 @@ def test_open_mfdataset_concat_dim_default_none(self): data = Dataset({"x": 0}) data.to_netcdf(tmp1) Dataset({"x": np.nan}).to_netcdf(tmp2) - with open_mfdataset( - [tmp1, tmp2], combine="nested" - ) as actual: + with open_mfdataset([tmp1, tmp2], combine="nested") as actual: assert_identical(data, actual) def test_open_dataset(self):