From 35a972ac736ba76b3a551a6de201c0db030162ad Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 28 Apr 2023 09:11:48 -0600 Subject: [PATCH 1/4] Optimize nanmean with engine="flox" --- flox/aggregate_flox.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/flox/aggregate_flox.py b/flox/aggregate_flox.py index 4df3f77a4..165d30feb 100644 --- a/flox/aggregate_flox.py +++ b/flox/aggregate_flox.py @@ -99,7 +99,11 @@ def nansum_of_squares(group_idx, array, *, axis=-1, size=None, fill_value=None, def nanlen(group_idx, array, *args, **kwargs): - return sum(group_idx, (~isnull(array)).astype(int), *args, **kwargs) + if np.issubdtype(array.dtype, bool): + array = ~array + else: + array = ~isnull(array) + return sum(group_idx, array.view(np.int8), *args, **kwargs) def mean(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None): @@ -107,14 +111,16 @@ def mean(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None): fill_value = 0 out = sum(group_idx, array, axis=axis, size=size, dtype=dtype, fill_value=fill_value) with np.errstate(invalid="ignore", divide="ignore"): - out /= nanlen(group_idx, array, size=size, axis=axis, fill_value=0) + out /= nanlen(group_idx, array, size=size, axis=axis, fill_value=0, dtype=np.intp) return out def nanmean(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None): if fill_value is None: fill_value = 0 - out = nansum(group_idx, array, size=size, axis=axis, dtype=dtype, fill_value=fill_value) + mask = isnull(array) + masked = np.where(mask, 0, array) + out = sum(group_idx, masked, size=size, axis=axis, dtype=dtype, fill_value=fill_value) with np.errstate(invalid="ignore", divide="ignore"): - out /= nanlen(group_idx, array, size=size, axis=axis, fill_value=0) + out /= nanlen(group_idx, mask, size=size, axis=axis, fill_value=0, dtype=np.intp) return out From f6d9deb42863827e3dadb55c4b3f258b7f823842 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 28 Apr 2023 09:27:22 -0600 Subject: [PATCH 2/4] Cleanup --- flox/aggregate_flox.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/flox/aggregate_flox.py b/flox/aggregate_flox.py index 165d30feb..9f25322bf 100644 --- a/flox/aggregate_flox.py +++ b/flox/aggregate_flox.py @@ -99,11 +99,7 @@ def nansum_of_squares(group_idx, array, *, axis=-1, size=None, fill_value=None, def nanlen(group_idx, array, *args, **kwargs): - if np.issubdtype(array.dtype, bool): - array = ~array - else: - array = ~isnull(array) - return sum(group_idx, array.view(np.int8), *args, **kwargs) + return sum(group_idx, (~isnull(array)).view(np.int8), *args, **kwargs) def mean(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None): @@ -122,5 +118,5 @@ def nanmean(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None masked = np.where(mask, 0, array) out = sum(group_idx, masked, size=size, axis=axis, dtype=dtype, fill_value=fill_value) with np.errstate(invalid="ignore", divide="ignore"): - out /= nanlen(group_idx, mask, size=size, axis=axis, fill_value=0, dtype=np.intp) + out /= sum(group_idx, mask.view(np.int8), size=size, axis=axis, fill_value=0, dtype=np.intp) return out From 75e3466eab7c68f35d1a638550f601cd2f42b74e Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 28 Apr 2023 09:36:55 -0600 Subject: [PATCH 3/4] fix --- flox/aggregate_flox.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flox/aggregate_flox.py b/flox/aggregate_flox.py index 9f25322bf..47cf31331 100644 --- a/flox/aggregate_flox.py +++ b/flox/aggregate_flox.py @@ -118,5 +118,7 @@ def nanmean(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None masked = np.where(mask, 0, array) out = sum(group_idx, masked, size=size, axis=axis, dtype=dtype, fill_value=fill_value) with np.errstate(invalid="ignore", divide="ignore"): - out /= sum(group_idx, mask.view(np.int8), size=size, axis=axis, fill_value=0, dtype=np.intp) + out /= sum( + group_idx, (~mask).view(np.int8), size=size, axis=axis, fill_value=0, dtype=np.intp + ) return out From d5b3b82898497a324a073a65ce5b2abca85c2e2b Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 3 May 2023 21:19:50 -0600 Subject: [PATCH 4/4] tweak --- flox/aggregate_flox.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/flox/aggregate_flox.py b/flox/aggregate_flox.py index 47cf31331..0663f6ef7 100644 --- a/flox/aggregate_flox.py +++ b/flox/aggregate_flox.py @@ -114,11 +114,9 @@ def mean(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None): def nanmean(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None): if fill_value is None: fill_value = 0 - mask = isnull(array) - masked = np.where(mask, 0, array) + mask = ~isnull(array) + masked = np.where(mask, array, 0) out = sum(group_idx, masked, size=size, axis=axis, dtype=dtype, fill_value=fill_value) with np.errstate(invalid="ignore", divide="ignore"): - out /= sum( - group_idx, (~mask).view(np.int8), size=size, axis=axis, fill_value=0, dtype=np.intp - ) + out /= sum(group_idx, mask.view(np.int8), size=size, axis=axis, fill_value=0, dtype=np.intp) return out