From c972e9740dec14b69468510f931278dd2dfbb6ac Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 17 Sep 2022 06:04:17 +0200 Subject: [PATCH 01/38] update dim typing --- flox/xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index 7234f8826..cf3b0c731 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -57,7 +57,7 @@ def xarray_reduce( expected_groups=None, isbin: bool | Sequence[bool] = False, sort: bool = True, - dim: Hashable = None, + dim: str | Iterable[Hashable] | Ellipsis | None = None, split_out: int = 1, fill_value=None, method: str = "map-reduce", From 64c7d771bd1fa421e6c8a00534f92d134974f511 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 19 Sep 2022 22:05:33 +0200 Subject: [PATCH 02/38] Fix mypy errors in xarray.py --- flox/core.py | 4 +- flox/xarray.py | 103 +++++++++++++++++++++++++------------------------ 2 files changed, 54 insertions(+), 53 deletions(-) diff --git a/flox/core.py b/flox/core.py index 943fd029e..b1d26795f 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1282,11 +1282,11 @@ def _assert_by_is_aligned(shape, by): def _convert_expected_groups_to_index( - expected_groups: tuple, isbin: bool, sort: bool + expected_groups: tuple, isbin: Sequence[bool], sort: bool ) -> pd.Index | None: out = [] for ex, isbin_ in zip(expected_groups, isbin): - if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin): + if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): if sort: ex = ex.sort_values() out.append(ex) diff --git a/flox/xarray.py b/flox/xarray.py index 39e30e5aa..d8a66cb85 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Hashable, Iterable, Sequence +from typing import TYPE_CHECKING, Hashable, Iterable, Union, Sequence import numpy as np import pandas as pd @@ -19,8 +19,10 @@ from .xrutils import _contains_cftime_datetimes, _to_pytimedelta, datetime_to_numeric if TYPE_CHECKING: - from xarray import DataArray, Dataset, Resample + from xarray import DataArray, Dataset # TODO: Use T_DataArray, T_Dataset? + from xarray.core.resample import Resample + Dims = Union[str, Iterable[Hashable], None] def _get_input_core_dims(group_names, dim, ds, grouper_dims): input_core_dims = [[], []] @@ -52,12 +54,12 @@ def lookup_order(dimension): def xarray_reduce( obj: Dataset | DataArray, - *by: DataArray | Iterable[str] | Iterable[DataArray], + *by: DataArray | Hashable, func: str | Aggregation, expected_groups=None, isbin: bool | Sequence[bool] = False, sort: bool = True, - dim: str | Iterable[Hashable] | Ellipsis | None = None, + dim: Dims | ellipsis = None, split_out: int = 1, fill_value=None, method: str = "map-reduce", @@ -206,8 +208,10 @@ def xarray_reduce( if keep_attrs is None: keep_attrs = True - if isinstance(isbin, bool): - isbin = (isbin,) * len(by) + if isinstance(isbin, Sequence): + isbins = isbin + else: + isbins = (isbin,) * len(by) if expected_groups is None: expected_groups = (None,) * len(by) if isinstance(expected_groups, (np.ndarray, list)): # TODO: test for list @@ -220,17 +224,17 @@ def xarray_reduce( raise NotImplementedError # eventually drop the variables we are grouping by - maybe_drop = [b for b in by if isinstance(b, str)] + maybe_drop = [b for b in by if isinstance(b, Hashable)] unindexed_dims = tuple( b - for b, isbin_ in zip(by, isbin) - if isinstance(b, str) and not isbin_ and b in obj.dims and b not in obj.indexes + for b, isbin_ in zip(by, isbins) + if isinstance(b, Hashable) and not isbin_ and b in obj.dims and b not in obj.indexes ) - by: tuple[DataArray] = tuple(obj[g] if isinstance(g, str) else g for g in by) # type: ignore + by_da = tuple(obj[g] if isinstance(g, Hashable) else g for g in by) grouper_dims = [] - for g in by: + for g in by_da: for d in g.dims: if d not in grouper_dims: grouper_dims.append(d) @@ -243,53 +247,50 @@ def xarray_reduce( ds = ds.drop_vars([var for var in maybe_drop if var in ds.variables]) if dim is Ellipsis: - dim = tuple(obj.dims) - if by[0].name in ds.dims and not isbin[0]: - dim = tuple(d for d in dim if d != by[0].name) + dim_tuple = tuple(obj.dims) + if by_da[0].name in ds.dims and not isbins[0]: + dim_tuple = tuple(d for d in dim_tuple if d != by_da[0].name) elif dim is not None: - dim = _atleast_1d(dim) + dim_tuple = _atleast_1d(dim) else: - dim = tuple() + dim_tuple = tuple() # broadcast all variables against each other along all dimensions in `by` variables # don't exclude `dim` because it need not be a dimension in any of the `by` variables! # in the case where dim is Ellipsis, and by.ndim < obj.ndim # then we also broadcast `by` to all `obj.dims` # TODO: avoid this broadcasting - exclude_dims = tuple(d for d in ds.dims if d not in grouper_dims and d not in dim) - ds, *by = xr.broadcast(ds, *by, exclude=exclude_dims) + exclude_dims = tuple(d for d in ds.dims if d not in grouper_dims and d not in dim_tuple) + ds_broad, *by_broad = xr.broadcast(ds, *by_da, exclude=exclude_dims) - if not dim: - dim = tuple(by[0].dims) + if not dim_tuple: + dim_tuple = tuple(by_broad[0].dims) - if any(d not in grouper_dims and d not in obj.dims for d in dim): - raise ValueError(f"Cannot reduce over absent dimensions {dim}.") + if any(d not in grouper_dims and d not in obj.dims for d in dim_tuple): + raise ValueError(f"Cannot reduce over absent dimensions {dim_tuple}.") - dims_not_in_groupers = tuple(d for d in dim if d not in grouper_dims) - if dims_not_in_groupers == tuple(dim) and not any(isbin): + dims_not_in_groupers = tuple(d for d in dim_tuple if d not in grouper_dims) + if dims_not_in_groupers == dim_tuple and not any(isbins): # reducing along a dimension along which groups do not vary # This is really just a normal reduction. # This is not right when binning so we exclude. - if skipna and isinstance(func, str): - dsfunc = func[3:] - else: - dsfunc = func + if isinstance(func, str): + dsfunc = func[3:] if skipna else func # TODO: skipna needs test - result = getattr(ds, dsfunc)(dim=dim, skipna=skipna) + result = getattr(ds_broad, dsfunc)(dim=dim_tuple, skipna=skipna) if isinstance(obj, xr.DataArray): return obj._from_temp_dataset(result) else: return result - axis = tuple(range(-len(dim), 0)) - group_names = tuple(g.name if not binned else f"{g.name}_bins" for g, binned in zip(by, isbin)) + axis = tuple(range(-len(dim_tuple), 0)) + group_names = tuple(g.name if not binned else f"{g.name}_bins" for g, binned in zip(by_broad, isbins)) - group_shape = [None] * len(by) expected_groups = list(expected_groups) # Set expected_groups and convert to index since we need coords, sizes # for output xarray objects - for idx, (b, expect, isbin_) in enumerate(zip(by, expected_groups, isbin)): + for idx, (b_, expect, isbin_) in enumerate(zip(by_broad, expected_groups, isbins)): if isbin_ and isinstance(expect, int): raise NotImplementedError( "flox does not support binning into an integer number of bins yet." @@ -300,9 +301,9 @@ def xarray_reduce( f"Please provided bin edges for group variable {idx} " f"named {group_names[idx]} in expected_groups." ) - expected_groups[idx] = _get_expected_groups(b.data, sort=sort, raise_if_dask=True) + expected_groups[idx] = _get_expected_groups(b_.data, sort=sort, raise_if_dask=True) - expected_groups = _convert_expected_groups_to_index(expected_groups, isbin, sort=sort) + expected_groups = _convert_expected_groups_to_index(expected_groups, isbins, sort=sort) group_shape = tuple(len(e) for e in expected_groups) group_sizes = dict(zip(group_names, group_shape)) @@ -350,20 +351,20 @@ def wrapper(array, *by, func, skipna, **kwargs): if isinstance(obj, xr.Dataset): # broadcasting means the group dim gets added to ds, so we check the original obj for k, v in obj.data_vars.items(): - is_missing_dim = not (any(d in v.dims for d in dim)) + is_missing_dim = not (any(d in v.dims for d in dim_tuple)) if is_missing_dim: missing_dim[k] = v - input_core_dims = _get_input_core_dims(group_names, dim, ds, grouper_dims) - input_core_dims += [input_core_dims[-1]] * (len(by) - 1) + input_core_dims = _get_input_core_dims(group_names, dim_tuple, ds_broad, grouper_dims) + input_core_dims += [input_core_dims[-1]] * (len(by_broad) - 1) actual = xr.apply_ufunc( wrapper, - ds.drop_vars(tuple(missing_dim)).transpose(..., *grouper_dims), - *by, + ds_broad.drop_vars(tuple(missing_dim)).transpose(..., *grouper_dims), + *by_broad, input_core_dims=input_core_dims, # for xarray's test_groupby_duplicate_coordinate_labels - exclude_dims=set(dim), + exclude_dims=set(dim_tuple), output_core_dims=[group_names], dask="allowed", dask_gufunc_kwargs=dict(output_sizes=group_sizes), @@ -380,18 +381,18 @@ def wrapper(array, *by, func, skipna, **kwargs): "engine": engine, "reindex": reindex, "expected_groups": tuple(expected_groups), - "isbin": isbin, + "isbin": isbins, "finalize_kwargs": finalize_kwargs, }, ) # restore non-dim coord variables without the core dimension # TODO: shouldn't apply_ufunc handle this? - for var in set(ds.variables) - set(ds.dims): - if all(d not in ds[var].dims for d in dim): - actual[var] = ds[var] + for var in set(ds_broad.variables) - set(ds_broad.dims): + if all(d not in ds_broad[var].dims for d in dim_tuple): + actual[var] = ds_broad[var] - for name, expect, by_ in zip(group_names, expected_groups, by): + for name, expect, by_ in zip(group_names, expected_groups, by_broad): # Can't remove this till xarray handles IntervalIndex if isinstance(expect, pd.IntervalIndex): expect = expect.to_numpy() @@ -399,8 +400,8 @@ def wrapper(array, *by, func, skipna, **kwargs): actual = actual.drop_vars(name) # When grouping by MultiIndex, expect is an pd.Index wrapping # an object array of tuples - if name in ds.indexes and isinstance(ds.indexes[name], pd.MultiIndex): - levelnames = ds.indexes[name].names + if name in ds_broad.indexes and isinstance(ds_broad.indexes[name], pd.MultiIndex): + levelnames = ds_broad.indexes[name].names expect = pd.MultiIndex.from_tuples(expect.values, names=levelnames) actual[name] = expect if Version(xr.__version__) > Version("2022.03.0"): @@ -413,19 +414,19 @@ def wrapper(array, *by, func, skipna, **kwargs): if unindexed_dims: actual = actual.drop_vars(unindexed_dims) - if len(by) == 1: + if len(by_broad) == 1: for var in actual: if isinstance(obj, xr.DataArray): template = obj else: template = obj[var] if actual[var].ndim > 1: - actual[var] = _restore_dim_order(actual[var], template, by[0]) + actual[var] = _restore_dim_order(actual[var], template, by_broad[0]) if missing_dim: for k, v in missing_dim.items(): missing_group_dims = { - dim: size for dim, size in group_sizes.items() if dim not in v.dims + dim_: size for dim_, size in group_sizes.items() if dim_ not in v.dims } # The expand_dims is for backward compat with xarray's questionable behaviour if missing_group_dims: From b3d698a3be3d8ad7b2fba9f08af2ac6c5fc2c4ab Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 19 Sep 2022 20:06:19 +0000 Subject: [PATCH 03/38] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- flox/xarray.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index d8a66cb85..b1dbc53e2 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Hashable, Iterable, Union, Sequence +from typing import TYPE_CHECKING, Hashable, Iterable, Sequence, Union import numpy as np import pandas as pd @@ -19,11 +19,12 @@ from .xrutils import _contains_cftime_datetimes, _to_pytimedelta, datetime_to_numeric if TYPE_CHECKING: - from xarray import DataArray, Dataset # TODO: Use T_DataArray, T_Dataset? + from xarray import DataArray, Dataset # TODO: Use T_DataArray, T_Dataset? from xarray.core.resample import Resample Dims = Union[str, Iterable[Hashable], None] + def _get_input_core_dims(group_names, dim, ds, grouper_dims): input_core_dims = [[], []] for g in group_names: @@ -284,7 +285,9 @@ def xarray_reduce( return result axis = tuple(range(-len(dim_tuple), 0)) - group_names = tuple(g.name if not binned else f"{g.name}_bins" for g, binned in zip(by_broad, isbins)) + group_names = tuple( + g.name if not binned else f"{g.name}_bins" for g, binned in zip(by_broad, isbins) + ) expected_groups = list(expected_groups) From 6e4db03986b84021be4bd4f53eac6194dbb79b06 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 19 Sep 2022 22:07:31 +0200 Subject: [PATCH 04/38] start mypy ci --- .github/workflows/ci-additional.yaml | 80 ++++++++++++++-------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 65b790d96..c326429bb 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -73,46 +73,46 @@ jobs: run: | python -m pytest --doctest-modules flox --ignore flox/tests - # mypy: - # name: Mypy - # runs-on: "ubuntu-latest" - # needs: detect-ci-trigger - # if: needs.detect-ci-trigger.outputs.triggered == 'false' - # defaults: - # run: - # shell: bash -l {0} - # env: - # CONDA_ENV_FILE: ci/environment.yml - # PYTHON_VERSION: "3.10" + mypy: + name: Mypy + runs-on: "ubuntu-latest" + needs: detect-ci-trigger + if: needs.detect-ci-trigger.outputs.triggered == 'false' + defaults: + run: + shell: bash -l {0} + env: + CONDA_ENV_FILE: ci/environment.yml + PYTHON_VERSION: "3.10" - # steps: - # - uses: actions/checkout@v3 - # with: - # fetch-depth: 0 # Fetch all history for all branches and tags. + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # Fetch all history for all branches and tags. - # - name: set environment variables - # run: | - # echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV - # - name: Setup micromamba - # uses: mamba-org/provision-with-micromamba@34071ca7df4983ccd272ed0d3625818b27b70dcc - # with: - # environment-file: ${{env.CONDA_ENV_FILE}} - # environment-name: xarray-tests - # extra-specs: | - # python=${{env.PYTHON_VERSION}} - # cache-env: true - # cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" - # - name: Install xarray - # run: | - # python -m pip install --no-deps -e . - # - name: Version info - # run: | - # conda info -a - # conda list - # - name: Install mypy - # run: | - # python -m pip install mypy + - name: set environment variables + run: | + echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV + - name: Setup micromamba + uses: mamba-org/provision-with-micromamba@34071ca7df4983ccd272ed0d3625818b27b70dcc + with: + environment-file: ${{env.CONDA_ENV_FILE}} + environment-name: xarray-tests + extra-specs: | + python=${{env.PYTHON_VERSION}} + cache-env: true + cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" + - name: Install xarray + run: | + python -m pip install --no-deps -e . + - name: Version info + run: | + conda info -a + conda list + - name: Install mypy + run: | + python -m pip install mypy - # - name: Run mypy - # run: | - # python -m mypy --install-types --non-interactive + - name: Run mypy + run: | + python -m mypy --install-types --non-interactive From ed752dd04917d39c6cb5dbb0f54b31b2a180e04a Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 19 Sep 2022 22:31:29 +0200 Subject: [PATCH 05/38] Use T_DataArray and T_Dataset --- flox/xarray.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index b1dbc53e2..fb02db2af 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -19,7 +19,7 @@ from .xrutils import _contains_cftime_datetimes, _to_pytimedelta, datetime_to_numeric if TYPE_CHECKING: - from xarray import DataArray, Dataset # TODO: Use T_DataArray, T_Dataset? + from xarray.core.types import T_DataArray, T_Dataset from xarray.core.resample import Resample Dims = Union[str, Iterable[Hashable], None] @@ -54,8 +54,8 @@ def lookup_order(dimension): def xarray_reduce( - obj: Dataset | DataArray, - *by: DataArray | Hashable, + obj: T_Dataset | T_DataArray, + *by: T_DataArray | Hashable, func: str | Aggregation, expected_groups=None, isbin: bool | Sequence[bool] = False, @@ -240,10 +240,10 @@ def xarray_reduce( if d not in grouper_dims: grouper_dims.append(d) - if isinstance(obj, xr.DataArray): - ds = obj._to_temp_dataset() - else: + if isinstance(obj, xr.Dataset): ds = obj + else: + ds = obj._to_temp_dataset() ds = ds.drop_vars([var for var in maybe_drop if var in ds.variables]) @@ -419,10 +419,10 @@ def wrapper(array, *by, func, skipna, **kwargs): if len(by_broad) == 1: for var in actual: - if isinstance(obj, xr.DataArray): - template = obj - else: + if isinstance(obj, xr.Dataset): template = obj[var] + else: + template = obj if actual[var].ndim > 1: actual[var] = _restore_dim_order(actual[var], template, by_broad[0]) @@ -444,9 +444,9 @@ def wrapper(array, *by, func, skipna, **kwargs): def rechunk_for_cohorts( - obj: DataArray | Dataset, + obj: T_DataArray | T_Dataset, dim: str, - labels: DataArray, + labels: T_DataArray, force_new_chunk_at, chunksize: int | None = None, ignore_old_chunks: bool = False, @@ -491,7 +491,7 @@ def rechunk_for_cohorts( ) -def rechunk_for_blockwise(obj: DataArray | Dataset, dim: str, labels: DataArray): +def rechunk_for_blockwise(obj: T_DataArray | T_Dataset, dim: str, labels: T_DataArray): """ Rechunks array so that group boundaries line up with chunk boundaries, allowing embarassingly parallel group reductions. From 6303f4a29346b14e8cda7c4d6951cfc449c587ec Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 19 Sep 2022 20:32:33 +0000 Subject: [PATCH 06/38] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- flox/xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index fb02db2af..8dcaf7a9c 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -19,8 +19,8 @@ from .xrutils import _contains_cftime_datetimes, _to_pytimedelta, datetime_to_numeric if TYPE_CHECKING: - from xarray.core.types import T_DataArray, T_Dataset from xarray.core.resample import Resample + from xarray.core.types import T_DataArray, T_Dataset Dims = Union[str, Iterable[Hashable], None] From ae8953af98cff45d3c0e3708ae313e2f69439b37 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 19 Sep 2022 22:36:21 +0200 Subject: [PATCH 07/38] Add mypy ignores --- setup.cfg | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/setup.cfg b/setup.cfg index f254a2f19..4748f0e77 100644 --- a/setup.cfg +++ b/setup.cfg @@ -57,3 +57,80 @@ per-file-ignores = exclude= .eggs doc + +[mypy] +exclude = properties|asv_bench|doc +files = . +show_error_codes = True + +# Most of the numerical computing stack doesn't have type annotations yet. +[mypy-affine.*] +ignore_missing_imports = True +[mypy-bottleneck.*] +ignore_missing_imports = True +[mypy-cartopy.*] +ignore_missing_imports = True +[mypy-cdms2.*] +ignore_missing_imports = True +[mypy-cf_units.*] +ignore_missing_imports = True +[mypy-cfgrib.*] +ignore_missing_imports = True +[mypy-cftime.*] +ignore_missing_imports = True +[mypy-cupy.*] +ignore_missing_imports = True +[mypy-dask.*] +ignore_missing_imports = True +[mypy-distributed.*] +ignore_missing_imports = True +[mypy-fsspec.*] +ignore_missing_imports = True +[mypy-h5netcdf.*] +ignore_missing_imports = True +[mypy-h5py.*] +ignore_missing_imports = True +[mypy-importlib_metadata.*] +ignore_missing_imports = True +[mypy-iris.*] +ignore_missing_imports = True +[mypy-matplotlib.*] +ignore_missing_imports = True +[mypy-Nio.*] +ignore_missing_imports = True +[mypy-nc_time_axis.*] +ignore_missing_imports = True +[mypy-numbagg.*] +ignore_missing_imports = True +[mypy-numpy.*] +ignore_missing_imports = True +[mypy-netCDF4.*] +ignore_missing_imports = True +[mypy-netcdftime.*] +ignore_missing_imports = True +[mypy-pandas.*] +ignore_missing_imports = True +[mypy-pint.*] +ignore_missing_imports = True +[mypy-pooch.*] +ignore_missing_imports = True +[mypy-PseudoNetCDF.*] +ignore_missing_imports = True +[mypy-pydap.*] +ignore_missing_imports = True +[mypy-pytest.*] +ignore_missing_imports = True +[mypy-rasterio.*] +ignore_missing_imports = True +[mypy-scipy.*] +ignore_missing_imports = True +[mypy-seaborn.*] +ignore_missing_imports = True +[mypy-setuptools] +ignore_missing_imports = True +[mypy-sparse.*] +ignore_missing_imports = True +[mypy-toolz.*] +ignore_missing_imports = True +[mypy-zarr.*] +ignore_missing_imports = True \ No newline at end of file From ae5561d6ca17e1dca995aaa997063959210a3a15 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 19 Sep 2022 20:36:57 +0000 Subject: [PATCH 08/38] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 4748f0e77..0845b63a9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -133,4 +133,4 @@ ignore_missing_imports = True [mypy-toolz.*] ignore_missing_imports = True [mypy-zarr.*] -ignore_missing_imports = True \ No newline at end of file +ignore_missing_imports = True From 5145dc204b968041d8a04fa5f0f291e1236c504b Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 19 Sep 2022 22:50:59 +0200 Subject: [PATCH 09/38] correct typing a bit --- flox/core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flox/core.py b/flox/core.py index b1d26795f..e29bc6dcb 100644 --- a/flox/core.py +++ b/flox/core.py @@ -5,7 +5,7 @@ import operator from collections import namedtuple from functools import partial, reduce -from typing import TYPE_CHECKING, Any, Callable, Dict, Mapping, Sequence, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, Mapping, Sequence, Union import numpy as np import numpy_groupies as npg @@ -1282,8 +1282,8 @@ def _assert_by_is_aligned(shape, by): def _convert_expected_groups_to_index( - expected_groups: tuple, isbin: Sequence[bool], sort: bool -) -> pd.Index | None: + expected_groups: Iterable, isbin: Sequence[bool], sort: bool +) -> tuple[pd.Index | None]: out = [] for ex, isbin_ in zip(expected_groups, isbin): if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): From 05893a2a59776a2efb96c3020ae37a9aacd15051 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 19 Sep 2022 20:51:55 +0000 Subject: [PATCH 10/38] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- flox/core.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index e29bc6dcb..ee38822c9 100644 --- a/flox/core.py +++ b/flox/core.py @@ -5,7 +5,16 @@ import operator from collections import namedtuple from functools import partial, reduce -from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, Mapping, Sequence, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Iterable, + Mapping, + Sequence, + Union, +) import numpy as np import numpy_groupies as npg From 375c31b31414efce997510161ce201e94bd61f20 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 19 Sep 2022 22:57:21 +0200 Subject: [PATCH 11/38] test newer flake8 if ellipsis passes there --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1d4aaa3c1..ad579e142 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: - id: black - repo: https://github.com/PyCQA/flake8 - rev: 4.0.1 + rev: 5.0.4 hooks: - id: flake8 From 170467be9f655416ce6fa0999446633d1861be56 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 19 Sep 2022 23:25:44 +0200 Subject: [PATCH 12/38] Allow ellipsis in flake8 --- setup.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.cfg b/setup.cfg index 0845b63a9..ab38c5c5a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -57,6 +57,8 @@ per-file-ignores = exclude= .eggs doc +builtins = + ellipsis [mypy] exclude = properties|asv_bench|doc From a3d63a2a5174472377616a9fbbfa14b9d1920ad3 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 19 Sep 2022 23:43:30 +0200 Subject: [PATCH 13/38] Update core.py --- flox/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/core.py b/flox/core.py index ee38822c9..58b89bf17 100644 --- a/flox/core.py +++ b/flox/core.py @@ -1295,7 +1295,7 @@ def _convert_expected_groups_to_index( ) -> tuple[pd.Index | None]: out = [] for ex, isbin_ in zip(expected_groups, isbin): - if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): + if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin): if sort: ex = ex.sort_values() out.append(ex) From cf0d6cd195565656f6db5b8879795bfbaa686c71 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 20 Sep 2022 18:19:43 +0200 Subject: [PATCH 14/38] Update xarray.py --- flox/xarray.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 8dcaf7a9c..84fac443c 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -275,8 +275,7 @@ def xarray_reduce( # reducing along a dimension along which groups do not vary # This is really just a normal reduction. # This is not right when binning so we exclude. - if isinstance(func, str): - dsfunc = func[3:] if skipna else func + dsfunc = func[3:] if skipna and isinstance(func, str) else func # TODO: skipna needs test result = getattr(ds_broad, dsfunc)(dim=dim_tuple, skipna=skipna) if isinstance(obj, xr.DataArray): From 3728858245c4d469a132b47d24da92d24c36dfb9 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 20 Sep 2022 18:41:22 +0200 Subject: [PATCH 15/38] Update setup.cfg --- setup.cfg | 77 ------------------------------------------------------- 1 file changed, 77 deletions(-) diff --git a/setup.cfg b/setup.cfg index ab38c5c5a..3645e5bc7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -59,80 +59,3 @@ exclude= doc builtins = ellipsis - -[mypy] -exclude = properties|asv_bench|doc -files = . -show_error_codes = True - -# Most of the numerical computing stack doesn't have type annotations yet. -[mypy-affine.*] -ignore_missing_imports = True -[mypy-bottleneck.*] -ignore_missing_imports = True -[mypy-cartopy.*] -ignore_missing_imports = True -[mypy-cdms2.*] -ignore_missing_imports = True -[mypy-cf_units.*] -ignore_missing_imports = True -[mypy-cfgrib.*] -ignore_missing_imports = True -[mypy-cftime.*] -ignore_missing_imports = True -[mypy-cupy.*] -ignore_missing_imports = True -[mypy-dask.*] -ignore_missing_imports = True -[mypy-distributed.*] -ignore_missing_imports = True -[mypy-fsspec.*] -ignore_missing_imports = True -[mypy-h5netcdf.*] -ignore_missing_imports = True -[mypy-h5py.*] -ignore_missing_imports = True -[mypy-importlib_metadata.*] -ignore_missing_imports = True -[mypy-iris.*] -ignore_missing_imports = True -[mypy-matplotlib.*] -ignore_missing_imports = True -[mypy-Nio.*] -ignore_missing_imports = True -[mypy-nc_time_axis.*] -ignore_missing_imports = True -[mypy-numbagg.*] -ignore_missing_imports = True -[mypy-numpy.*] -ignore_missing_imports = True -[mypy-netCDF4.*] -ignore_missing_imports = True -[mypy-netcdftime.*] -ignore_missing_imports = True -[mypy-pandas.*] -ignore_missing_imports = True -[mypy-pint.*] -ignore_missing_imports = True -[mypy-pooch.*] -ignore_missing_imports = True -[mypy-PseudoNetCDF.*] -ignore_missing_imports = True -[mypy-pydap.*] -ignore_missing_imports = True -[mypy-pytest.*] -ignore_missing_imports = True -[mypy-rasterio.*] -ignore_missing_imports = True -[mypy-scipy.*] -ignore_missing_imports = True -[mypy-seaborn.*] -ignore_missing_imports = True -[mypy-setuptools] -ignore_missing_imports = True -[mypy-sparse.*] -ignore_missing_imports = True -[mypy-toolz.*] -ignore_missing_imports = True -[mypy-zarr.*] -ignore_missing_imports = True From 657496da99ee4435d53e9611acb9ef00bdf2d349 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 20 Sep 2022 18:41:51 +0200 Subject: [PATCH 16/38] Update xarray.py --- flox/xarray.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index a0dad227f..a1489d80f 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -271,7 +271,10 @@ def xarray_reduce( # reducing along a dimension along which groups do not vary # This is really just a normal reduction. # This is not right when binning so we exclude. - dsfunc = func[3:] if skipna and isinstance(func, str) else func + if isinstance(func, str): + dsfunc = func[3:] if skipna else func + else: + raise ValueError("func must be a string") # TODO: skipna needs test result = getattr(ds_broad, dsfunc)(dim=dim_tuple, skipna=skipna) if isinstance(obj, xr.DataArray): From 68ac24272774d85444e5d88959befcd0995bb03c Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 20 Sep 2022 19:05:37 +0200 Subject: [PATCH 17/38] Update xarray.py --- flox/xarray.py | 1 + 1 file changed, 1 insertion(+) diff --git a/flox/xarray.py b/flox/xarray.py index a1489d80f..3166dfdb9 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -287,6 +287,7 @@ def xarray_reduce( g.name if not binned else f"{g.name}_bins" for g, binned in zip(by_broad, isbins) ) + group_shape = [None] * len(by) expected_groups = list(expected_groups) # Set expected_groups and convert to index since we need coords, sizes From c306099233ecde433aea6acf943d096445901ebf Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 20 Sep 2022 19:16:04 +0200 Subject: [PATCH 18/38] Update xarray.py --- flox/xarray.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 3166dfdb9..6992a9ab4 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -205,10 +205,12 @@ def xarray_reduce( if keep_attrs is None: keep_attrs = True - if isinstance(isbin, Sequence): - isbins = isbin - else: - isbins = (isbin,) * len(by) + if isinstance(isbin, bool): + isbin = (isbin,) * len(by) + # if isinstance(isbin, Sequence): + # isbins = isbin + # else: + # isbins = (isbin,) * len(by) if expected_groups is None: expected_groups = (None,) * len(by) if isinstance(expected_groups, (np.ndarray, list)): # TODO: test for list @@ -236,10 +238,14 @@ def xarray_reduce( if d not in grouper_dims: grouper_dims.append(d) - if isinstance(obj, xr.Dataset): - ds = obj - else: + if isinstance(obj, xr.DataArray): ds = obj._to_temp_dataset() + else: + ds = obj + # if isinstance(obj, xr.Dataset): + # ds = obj + # else: + # ds = obj._to_temp_dataset() ds = ds.drop_vars([var for var in maybe_drop if var in ds.variables]) From 90b01495e3febc9e8df0afa680eeceb8a6b93096 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 20 Sep 2022 17:17:03 +0000 Subject: [PATCH 19/38] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- flox/xarray.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 6992a9ab4..bff64e0da 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -208,9 +208,9 @@ def xarray_reduce( if isinstance(isbin, bool): isbin = (isbin,) * len(by) # if isinstance(isbin, Sequence): - # isbins = isbin + # isbins = isbin # else: - # isbins = (isbin,) * len(by) + # isbins = (isbin,) * len(by) if expected_groups is None: expected_groups = (None,) * len(by) if isinstance(expected_groups, (np.ndarray, list)): # TODO: test for list @@ -243,9 +243,9 @@ def xarray_reduce( else: ds = obj # if isinstance(obj, xr.Dataset): - # ds = obj + # ds = obj # else: - # ds = obj._to_temp_dataset() + # ds = obj._to_temp_dataset() ds = ds.drop_vars([var for var in maybe_drop if var in ds.variables]) From 332caf9d1df505ffc1829f746e7df96ee3192e55 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 20 Sep 2022 19:18:33 +0200 Subject: [PATCH 20/38] Update xarray.py --- flox/xarray.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index bff64e0da..a94982950 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -206,7 +206,9 @@ def xarray_reduce( keep_attrs = True if isinstance(isbin, bool): - isbin = (isbin,) * len(by) + isbins = (isbin,) * len(by) + else: + isbins = isbin # if isinstance(isbin, Sequence): # isbins = isbin # else: From 9740009520cd65d0c4ce67ebd0b40147ceff2b5a Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 20 Sep 2022 19:28:40 +0200 Subject: [PATCH 21/38] Update pyproject.toml --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 1259704e3..32e55d712 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,8 +39,12 @@ show_error_codes = true [[tool.mypy.overrides]] module=[ + "cachey", + "cftime", "dask.*", + "importlib_metadata", "numpy_groupies", + "matplotlib.*", "pandas", "setuptools", "toolz" From 5c0811491abe3cf872709b64216cc117aa7f5988 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 20 Sep 2022 19:32:02 +0200 Subject: [PATCH 22/38] Update xarray.py --- flox/xarray.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index a94982950..8dfa82e90 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -426,10 +426,14 @@ def wrapper(array, *by, func, skipna, **kwargs): if len(by_broad) == 1: for var in actual: - if isinstance(obj, xr.Dataset): - template = obj[var] - else: + # if isinstance(obj, xr.Dataset): + # template = obj[var] + # else: + # template = obj + if isinstance(obj, xr.DataArray): template = obj + else: + template = obj[var] if actual[var].ndim > 1: actual[var] = _restore_dim_order(actual[var], template, by_broad[0]) From d5409ef2d6eb2272e024c6df321fb95837d33de8 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 20 Sep 2022 20:09:20 +0200 Subject: [PATCH 23/38] Update xarray.py --- flox/xarray.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 8dfa82e90..8acc48be7 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -225,14 +225,14 @@ def xarray_reduce( raise NotImplementedError # eventually drop the variables we are grouping by - maybe_drop = [b for b in by if isinstance(b, Hashable)] + maybe_drop = [b for b in by if isinstance(b, str)] unindexed_dims = tuple( b for b, isbin_ in zip(by, isbins) - if isinstance(b, Hashable) and not isbin_ and b in obj.dims and b not in obj.indexes + if isinstance(b, str) and not isbin_ and b in obj.dims and b not in obj.indexes ) - by_da = tuple(obj[g] if isinstance(g, Hashable) else g for g in by) + by_da = tuple(obj[g] if isinstance(g, str) else g for g in by) grouper_dims = [] for g in by_da: From 1accd73141b2c11a0b4c5bdc6a4a4d3ff39d2d7c Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 20 Sep 2022 23:12:25 +0200 Subject: [PATCH 24/38] hopefully no more pytest errors. --- flox/xarray.py | 67 ++++++++++++++++++++------------------------------ 1 file changed, 27 insertions(+), 40 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 8acc48be7..394fc6f8f 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -205,14 +205,11 @@ def xarray_reduce( if keep_attrs is None: keep_attrs = True - if isinstance(isbin, bool): - isbins = (isbin,) * len(by) - else: + if isinstance(isbin, Sequence): isbins = isbin - # if isinstance(isbin, Sequence): - # isbins = isbin - # else: - # isbins = (isbin,) * len(by) + else: + isbins = (isbin,) * len(by) + if expected_groups is None: expected_groups = (None,) * len(by) if isinstance(expected_groups, (np.ndarray, list)): # TODO: test for list @@ -225,14 +222,14 @@ def xarray_reduce( raise NotImplementedError # eventually drop the variables we are grouping by - maybe_drop = [b for b in by if isinstance(b, str)] + maybe_drop = [b for b in by if isinstance(b, Hashable)] unindexed_dims = tuple( b for b, isbin_ in zip(by, isbins) - if isinstance(b, str) and not isbin_ and b in obj.dims and b not in obj.indexes + if isinstance(b, Hashable) and not isbin_ and b in obj.dims and b not in obj.indexes ) - by_da = tuple(obj[g] if isinstance(g, str) else g for g in by) + by_da= tuple(obj[g] if isinstance(g, Hashable) else g for g in by) grouper_dims = [] for g in by_da: @@ -240,14 +237,10 @@ def xarray_reduce( if d not in grouper_dims: grouper_dims.append(d) - if isinstance(obj, xr.DataArray): - ds = obj._to_temp_dataset() + if isinstance(obj, xr.Dataset): + ds = obj else: - ds = obj - # if isinstance(obj, xr.Dataset): - # ds = obj - # else: - # ds = obj._to_temp_dataset() + ds = obj._to_temp_dataset() ds = ds.drop_vars([var for var in maybe_drop if var in ds.variables]) @@ -266,16 +259,16 @@ def xarray_reduce( # then we also broadcast `by` to all `obj.dims` # TODO: avoid this broadcasting exclude_dims = tuple(d for d in ds.dims if d not in grouper_dims and d not in dim_tuple) - ds_broad, *by_broad = xr.broadcast(ds, *by_da, exclude=exclude_dims) + ds, *by_broad = xr.broadcast(ds, *by_da, exclude=exclude_dims) if not dim_tuple: dim_tuple = tuple(by_broad[0].dims) if any(d not in grouper_dims and d not in obj.dims for d in dim_tuple): - raise ValueError(f"Cannot reduce over absent dimensions {dim_tuple}.") + raise ValueError(f"Cannot reduce over absent dimensions {dim}.") dims_not_in_groupers = tuple(d for d in dim_tuple if d not in grouper_dims) - if dims_not_in_groupers == dim_tuple and not any(isbins): + if dims_not_in_groupers == tuple(dim_tuple) and not any(isbins): # reducing along a dimension along which groups do not vary # This is really just a normal reduction. # This is not right when binning so we exclude. @@ -284,18 +277,15 @@ def xarray_reduce( else: raise ValueError("func must be a string") # TODO: skipna needs test - result = getattr(ds_broad, dsfunc)(dim=dim_tuple, skipna=skipna) + result = getattr(ds, dsfunc)(dim=dim_tuple, skipna=skipna) if isinstance(obj, xr.DataArray): return obj._from_temp_dataset(result) else: return result axis = tuple(range(-len(dim_tuple), 0)) - group_names = tuple( - g.name if not binned else f"{g.name}_bins" for g, binned in zip(by_broad, isbins) - ) + group_names = tuple(g.name if not binned else f"{g.name}_bins" for g, binned in zip(by_broad, isbins)) - group_shape = [None] * len(by) expected_groups = list(expected_groups) # Set expected_groups and convert to index since we need coords, sizes @@ -365,12 +355,12 @@ def wrapper(array, *by, func, skipna, **kwargs): if is_missing_dim: missing_dim[k] = v - input_core_dims = _get_input_core_dims(group_names, dim_tuple, ds_broad, grouper_dims) + input_core_dims = _get_input_core_dims(group_names, dim_tuple, ds, grouper_dims) input_core_dims += [input_core_dims[-1]] * (len(by_broad) - 1) actual = xr.apply_ufunc( wrapper, - ds_broad.drop_vars(tuple(missing_dim)).transpose(..., *grouper_dims), + ds.drop_vars(tuple(missing_dim)).transpose(..., *grouper_dims), *by_broad, input_core_dims=input_core_dims, # for xarray's test_groupby_duplicate_coordinate_labels @@ -398,9 +388,9 @@ def wrapper(array, *by, func, skipna, **kwargs): # restore non-dim coord variables without the core dimension # TODO: shouldn't apply_ufunc handle this? - for var in set(ds_broad.variables) - set(ds_broad.dims): - if all(d not in ds_broad[var].dims for d in dim_tuple): - actual[var] = ds_broad[var] + for var in set(ds.variables) - set(ds.dims): + if all(d not in ds[var].dims for d in dim_tuple): + actual[var] = ds[var] for name, expect, by_ in zip(group_names, expected_groups, by_broad): # Can't remove this till xarray handles IntervalIndex @@ -410,8 +400,8 @@ def wrapper(array, *by, func, skipna, **kwargs): actual = actual.drop_vars(name) # When grouping by MultiIndex, expect is an pd.Index wrapping # an object array of tuples - if name in ds_broad.indexes and isinstance(ds_broad.indexes[name], pd.MultiIndex): - levelnames = ds_broad.indexes[name].names + if name in ds.indexes and isinstance(ds.indexes[name], pd.MultiIndex): + levelnames = ds.indexes[name].names expect = pd.MultiIndex.from_tuples(expect.values, names=levelnames) actual[name] = expect if Version(xr.__version__) > Version("2022.03.0"): @@ -426,21 +416,18 @@ def wrapper(array, *by, func, skipna, **kwargs): if len(by_broad) == 1: for var in actual: - # if isinstance(obj, xr.Dataset): - # template = obj[var] - # else: - # template = obj - if isinstance(obj, xr.DataArray): - template = obj - else: + if isinstance(obj, xr.Dataset): template = obj[var] + else: + template = obj + if actual[var].ndim > 1: actual[var] = _restore_dim_order(actual[var], template, by_broad[0]) if missing_dim: for k, v in missing_dim.items(): missing_group_dims = { - dim_: size for dim_, size in group_sizes.items() if dim_ not in v.dims + d: size for d, size in group_sizes.items() if d not in v.dims } # The expand_dims is for backward compat with xarray's questionable behaviour if missing_group_dims: From a50bb6b590cd55d96ca4ad5c11d252cd12f23b8a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 20 Sep 2022 21:13:04 +0000 Subject: [PATCH 25/38] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- flox/xarray.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 394fc6f8f..77ba48965 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -229,7 +229,7 @@ def xarray_reduce( if isinstance(b, Hashable) and not isbin_ and b in obj.dims and b not in obj.indexes ) - by_da= tuple(obj[g] if isinstance(g, Hashable) else g for g in by) + by_da = tuple(obj[g] if isinstance(g, Hashable) else g for g in by) grouper_dims = [] for g in by_da: @@ -238,9 +238,9 @@ def xarray_reduce( grouper_dims.append(d) if isinstance(obj, xr.Dataset): - ds = obj + ds = obj else: - ds = obj._to_temp_dataset() + ds = obj._to_temp_dataset() ds = ds.drop_vars([var for var in maybe_drop if var in ds.variables]) @@ -284,7 +284,9 @@ def xarray_reduce( return result axis = tuple(range(-len(dim_tuple), 0)) - group_names = tuple(g.name if not binned else f"{g.name}_bins" for g, binned in zip(by_broad, isbins)) + group_names = tuple( + g.name if not binned else f"{g.name}_bins" for g, binned in zip(by_broad, isbins) + ) expected_groups = list(expected_groups) @@ -426,9 +428,7 @@ def wrapper(array, *by, func, skipna, **kwargs): if missing_dim: for k, v in missing_dim.items(): - missing_group_dims = { - d: size for d, size in group_sizes.items() if d not in v.dims - } + missing_group_dims = {d: size for d, size in group_sizes.items() if d not in v.dims} # The expand_dims is for backward compat with xarray's questionable behaviour if missing_group_dims: actual[k] = v.expand_dims(missing_group_dims).variable From 50c2ac2835464955960b790df84e7d4285995cad Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 20 Sep 2022 23:39:09 +0200 Subject: [PATCH 26/38] make sure expected_groups doesn't have None --- flox/xarray.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index 394fc6f8f..b1265b8e3 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -304,7 +304,9 @@ def xarray_reduce( expected_groups[idx] = _get_expected_groups(b_.data, sort=sort, raise_if_dask=True) expected_groups = _convert_expected_groups_to_index(expected_groups, isbins, sort=sort) - group_shape = tuple(len(e) for e in expected_groups) + # TODO: _convert_expected_groups_to_index can return None which is not good + # since len(None) doesn't work. Create similar function with cleaner return type?: + group_shape = tuple(len(e) for e in expected_groups if e is not None) group_sizes = dict(zip(group_names, group_shape)) def wrapper(array, *by, func, skipna, **kwargs): From 1921938f91a8bf27efe1ab2a65c77feb0ac739fc Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 21 Sep 2022 00:04:57 +0200 Subject: [PATCH 27/38] Update flox/xarray.py Co-authored-by: Deepak Cherian --- flox/xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index 0368450f2..bc17e97b7 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -275,7 +275,7 @@ def xarray_reduce( if isinstance(func, str): dsfunc = func[3:] if skipna else func else: - raise ValueError("func must be a string") + raise NotImplementdError("func must be a string when reducing along a dimension not present in `by`") # TODO: skipna needs test result = getattr(ds, dsfunc)(dim=dim_tuple, skipna=skipna) if isinstance(obj, xr.DataArray): From 3cac4b07d37c67deda89c556c629d8850a466dcf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 20 Sep 2022 22:05:18 +0000 Subject: [PATCH 28/38] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- flox/xarray.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index bc17e97b7..dca6039a8 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -275,7 +275,9 @@ def xarray_reduce( if isinstance(func, str): dsfunc = func[3:] if skipna else func else: - raise NotImplementdError("func must be a string when reducing along a dimension not present in `by`") + raise NotImplementdError( + "func must be a string when reducing along a dimension not present in `by`" + ) # TODO: skipna needs test result = getattr(ds, dsfunc)(dim=dim_tuple, skipna=skipna) if isinstance(obj, xr.DataArray): From 43dabff10a56d9743f043f7aa31681cea4efd82c Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 21 Sep 2022 01:07:08 +0200 Subject: [PATCH 29/38] ds_broad and longer comment --- flox/xarray.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index dca6039a8..c8d631a39 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -259,7 +259,7 @@ def xarray_reduce( # then we also broadcast `by` to all `obj.dims` # TODO: avoid this broadcasting exclude_dims = tuple(d for d in ds.dims if d not in grouper_dims and d not in dim_tuple) - ds, *by_broad = xr.broadcast(ds, *by_da, exclude=exclude_dims) + ds_broad, *by_broad = xr.broadcast(ds, *by_da, exclude=exclude_dims) if not dim_tuple: dim_tuple = tuple(by_broad[0].dims) @@ -275,11 +275,11 @@ def xarray_reduce( if isinstance(func, str): dsfunc = func[3:] if skipna else func else: - raise NotImplementdError( + raise NotImplementedError( "func must be a string when reducing along a dimension not present in `by`" ) # TODO: skipna needs test - result = getattr(ds, dsfunc)(dim=dim_tuple, skipna=skipna) + result = getattr(ds_broad, dsfunc)(dim=dim_tuple, skipna=skipna) if isinstance(obj, xr.DataArray): return obj._from_temp_dataset(result) else: @@ -308,8 +308,11 @@ def xarray_reduce( expected_groups[idx] = _get_expected_groups(b_.data, sort=sort, raise_if_dask=True) expected_groups = _convert_expected_groups_to_index(expected_groups, isbins, sort=sort) - # TODO: _convert_expected_groups_to_index can return None which is not good - # since len(None) doesn't work. Create similar function with cleaner return type?: + # TODO: _convert_expected_groups_to_index can return None according to the + # type hints which is not good since len(None) then will not work. + # Narrow down by adding 'if e is not None' in group_shape loop, shouldn't be + # necessary though since this has already been checked in a previous for loop. + # Create similar function with a simpler return type? Maybe use yield?: group_shape = tuple(len(e) for e in expected_groups if e is not None) group_sizes = dict(zip(group_names, group_shape)) @@ -361,12 +364,12 @@ def wrapper(array, *by, func, skipna, **kwargs): if is_missing_dim: missing_dim[k] = v - input_core_dims = _get_input_core_dims(group_names, dim_tuple, ds, grouper_dims) + input_core_dims = _get_input_core_dims(group_names, dim_tuple, ds_broad, grouper_dims) input_core_dims += [input_core_dims[-1]] * (len(by_broad) - 1) actual = xr.apply_ufunc( wrapper, - ds.drop_vars(tuple(missing_dim)).transpose(..., *grouper_dims), + ds_broad.drop_vars(tuple(missing_dim)).transpose(..., *grouper_dims), *by_broad, input_core_dims=input_core_dims, # for xarray's test_groupby_duplicate_coordinate_labels @@ -394,9 +397,9 @@ def wrapper(array, *by, func, skipna, **kwargs): # restore non-dim coord variables without the core dimension # TODO: shouldn't apply_ufunc handle this? - for var in set(ds.variables) - set(ds.dims): - if all(d not in ds[var].dims for d in dim_tuple): - actual[var] = ds[var] + for var in set(ds_broad.variables) - set(ds_broad.dims): + if all(d not in ds_broad[var].dims for d in dim_tuple): + actual[var] = ds_broad[var] for name, expect, by_ in zip(group_names, expected_groups, by_broad): # Can't remove this till xarray handles IntervalIndex @@ -406,8 +409,8 @@ def wrapper(array, *by, func, skipna, **kwargs): actual = actual.drop_vars(name) # When grouping by MultiIndex, expect is an pd.Index wrapping # an object array of tuples - if name in ds.indexes and isinstance(ds.indexes[name], pd.MultiIndex): - levelnames = ds.indexes[name].names + if name in ds_broad.indexes and isinstance(ds_broad.indexes[name], pd.MultiIndex): + levelnames = ds_broad.indexes[name].names expect = pd.MultiIndex.from_tuples(expect.values, names=levelnames) actual[name] = expect if Version(xr.__version__) > Version("2022.03.0"): From e73f6e83df5e38f611a6fd6099a63f1b8f2c1b81 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 21 Sep 2022 23:19:48 +0200 Subject: [PATCH 30/38] Use same for loop for similar things. --- flox/xarray.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index c8d631a39..92fa16d34 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Hashable, Iterable, Sequence, Union +from typing import TYPE_CHECKING, Any, Hashable, Iterable, Sequence, Union import numpy as np import pandas as pd @@ -286,15 +286,16 @@ def xarray_reduce( return result axis = tuple(range(-len(dim_tuple), 0)) - group_names = tuple( - g.name if not binned else f"{g.name}_bins" for g, binned in zip(by_broad, isbins) - ) - - expected_groups = list(expected_groups) # Set expected_groups and convert to index since we need coords, sizes # for output xarray objects + expected_groups = list(expected_groups) + group_names: tuple[Any, ...] = () + group_sizes: dict[Any, int] = {} for idx, (b_, expect, isbin_) in enumerate(zip(by_broad, expected_groups, isbins)): + group_name = b_.name if not isbin_ else f"{b_.name}_bins" + group_names += (group_name, ) + if isbin_ and isinstance(expect, int): raise NotImplementedError( "flox does not support binning into an integer number of bins yet." @@ -303,18 +304,20 @@ def xarray_reduce( if isbin_: raise ValueError( f"Please provided bin edges for group variable {idx} " - f"named {group_names[idx]} in expected_groups." + f"named {group_name} in expected_groups." ) - expected_groups[idx] = _get_expected_groups(b_.data, sort=sort, raise_if_dask=True) - - expected_groups = _convert_expected_groups_to_index(expected_groups, isbins, sort=sort) - # TODO: _convert_expected_groups_to_index can return None according to the - # type hints which is not good since len(None) then will not work. - # Narrow down by adding 'if e is not None' in group_shape loop, shouldn't be - # necessary though since this has already been checked in a previous for loop. - # Create similar function with a simpler return type? Maybe use yield?: - group_shape = tuple(len(e) for e in expected_groups if e is not None) - group_sizes = dict(zip(group_names, group_shape)) + expect_ = _get_expected_groups(b_.data, sort=sort, raise_if_dask=True) + else: + expect_ = expect + expect_index = _convert_expected_groups_to_index((expect_,), (isbin_,), sort=sort)[0] + + # The if-check is for type hinting mainly, it narrows down the return + # type of _convert_expected_groups_to_index to pure pd.Index: + if expect_index is not None: + expected_groups[idx] = expect_index + group_sizes[group_name] = len(expect_index) + else: + raise ValueError("expect_index cannot be None") def wrapper(array, *by, func, skipna, **kwargs): # Handle skipna here because I need to know dtype to make a good default choice. From 2d62748369672734090f7865d75fce28510b1584 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 21 Sep 2022 21:20:12 +0000 Subject: [PATCH 31/38] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- flox/xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index 92fa16d34..db23b35ea 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -294,7 +294,7 @@ def xarray_reduce( group_sizes: dict[Any, int] = {} for idx, (b_, expect, isbin_) in enumerate(zip(by_broad, expected_groups, isbins)): group_name = b_.name if not isbin_ else f"{b_.name}_bins" - group_names += (group_name, ) + group_names += (group_name,) if isbin_ and isinstance(expect, int): raise NotImplementedError( From 41e97e9560783427d6f0f4d5403f4e22657be4e6 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 21 Sep 2022 23:30:35 +0200 Subject: [PATCH 32/38] fix xrutils.py --- flox/xrutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/xrutils.py b/flox/xrutils.py index 17ad2d71d..3e6edd89e 100644 --- a/flox/xrutils.py +++ b/flox/xrutils.py @@ -19,7 +19,7 @@ dask_array_type = dask.array.Array except ImportError: - dask_array_type = () + dask_array_type = () # type: ignore def asarray(data, xp=np): From fc362116fad6b5a62100d00fde25e8e9efe55f94 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 21 Sep 2022 23:34:26 +0200 Subject: [PATCH 33/38] fix errors in cache.py --- flox/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/cache.py b/flox/cache.py index eaac3f360..4f8de8b59 100644 --- a/flox/cache.py +++ b/flox/cache.py @@ -8,4 +8,4 @@ cache = cachey.Cache(1e6) memoize = partial(cache.memoize, key=dask.base.tokenize) except ImportError: - memoize = lambda x: x + memoize = lambda x: x # type: ignore From bfb9c6ee158180ac437ff802ff4b9504cd76562d Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Wed, 21 Sep 2022 23:48:39 +0200 Subject: [PATCH 34/38] Turn off mypy check --- .github/workflows/ci-additional.yaml | 80 ++++++++++++++-------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index c326429bb..65b790d96 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -73,46 +73,46 @@ jobs: run: | python -m pytest --doctest-modules flox --ignore flox/tests - mypy: - name: Mypy - runs-on: "ubuntu-latest" - needs: detect-ci-trigger - if: needs.detect-ci-trigger.outputs.triggered == 'false' - defaults: - run: - shell: bash -l {0} - env: - CONDA_ENV_FILE: ci/environment.yml - PYTHON_VERSION: "3.10" + # mypy: + # name: Mypy + # runs-on: "ubuntu-latest" + # needs: detect-ci-trigger + # if: needs.detect-ci-trigger.outputs.triggered == 'false' + # defaults: + # run: + # shell: bash -l {0} + # env: + # CONDA_ENV_FILE: ci/environment.yml + # PYTHON_VERSION: "3.10" - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 # Fetch all history for all branches and tags. + # steps: + # - uses: actions/checkout@v3 + # with: + # fetch-depth: 0 # Fetch all history for all branches and tags. - - name: set environment variables - run: | - echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV - - name: Setup micromamba - uses: mamba-org/provision-with-micromamba@34071ca7df4983ccd272ed0d3625818b27b70dcc - with: - environment-file: ${{env.CONDA_ENV_FILE}} - environment-name: xarray-tests - extra-specs: | - python=${{env.PYTHON_VERSION}} - cache-env: true - cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" - - name: Install xarray - run: | - python -m pip install --no-deps -e . - - name: Version info - run: | - conda info -a - conda list - - name: Install mypy - run: | - python -m pip install mypy + # - name: set environment variables + # run: | + # echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV + # - name: Setup micromamba + # uses: mamba-org/provision-with-micromamba@34071ca7df4983ccd272ed0d3625818b27b70dcc + # with: + # environment-file: ${{env.CONDA_ENV_FILE}} + # environment-name: xarray-tests + # extra-specs: | + # python=${{env.PYTHON_VERSION}} + # cache-env: true + # cache-env-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" + # - name: Install xarray + # run: | + # python -m pip install --no-deps -e . + # - name: Version info + # run: | + # conda info -a + # conda list + # - name: Install mypy + # run: | + # python -m pip install mypy - - name: Run mypy - run: | - python -m mypy --install-types --non-interactive + # - name: Run mypy + # run: | + # python -m mypy --install-types --non-interactive From 7260660f0cf4582fbe661109f8c33cfb4108a983 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 22 Sep 2022 18:06:43 +0200 Subject: [PATCH 35/38] Update flox/xarray.py Co-authored-by: Deepak Cherian --- flox/xarray.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/flox/xarray.py b/flox/xarray.py index db23b35ea..7204a1d6a 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -261,6 +261,8 @@ def xarray_reduce( exclude_dims = tuple(d for d in ds.dims if d not in grouper_dims and d not in dim_tuple) ds_broad, *by_broad = xr.broadcast(ds, *by_da, exclude=exclude_dims) + # all members of by_broad have the same dimensions + # so we just pull by_broad[0].dims if dim is None if not dim_tuple: dim_tuple = tuple(by_broad[0].dims) From b34c26897fbe5c6eb79b805ced100f92e02bbaeb Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 22 Sep 2022 18:06:53 +0200 Subject: [PATCH 36/38] Update flox/xarray.py Co-authored-by: Deepak Cherian --- flox/xarray.py | 1 + 1 file changed, 1 insertion(+) diff --git a/flox/xarray.py b/flox/xarray.py index 7204a1d6a..36d9fb6d7 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -319,6 +319,7 @@ def xarray_reduce( expected_groups[idx] = expect_index group_sizes[group_name] = len(expect_index) else: + # This will never be reached raise ValueError("expect_index cannot be None") def wrapper(array, *by, func, skipna, **kwargs): From eaf93d2c9f985f49c5d48d657bf1320e5557d797 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 22 Sep 2022 18:13:46 +0200 Subject: [PATCH 37/38] Use if else format to avoid tuple creation --- flox/xarray.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/flox/xarray.py b/flox/xarray.py index 36d9fb6d7..ac112271b 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -245,9 +245,11 @@ def xarray_reduce( ds = ds.drop_vars([var for var in maybe_drop if var in ds.variables]) if dim is Ellipsis: - dim_tuple = tuple(obj.dims) - if by_da[0].name in ds.dims and not isbins[0]: - dim_tuple = tuple(d for d in dim_tuple if d != by_da[0].name) + name_ = by_da[0].name + if name_ in ds.dims and not isbins[0]: + dim_tuple = tuple(d for d in dim_tuple if d != name_) + else: + dim_tuple = tuple(obj.dims) elif dim is not None: dim_tuple = _atleast_1d(dim) else: From 9486184e7962c200591ddfbb8bbbaa34b22d2614 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 22 Sep 2022 18:34:55 +0200 Subject: [PATCH 38/38] Update xarray.py --- flox/xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flox/xarray.py b/flox/xarray.py index ac112271b..0bf0e46cb 100644 --- a/flox/xarray.py +++ b/flox/xarray.py @@ -247,7 +247,7 @@ def xarray_reduce( if dim is Ellipsis: name_ = by_da[0].name if name_ in ds.dims and not isbins[0]: - dim_tuple = tuple(d for d in dim_tuple if d != name_) + dim_tuple = tuple(d for d in obj.dims if d != name_) else: dim_tuple = tuple(obj.dims) elif dim is not None: