-
-
Notifications
You must be signed in to change notification settings - Fork 19.4k
[ArrayManager] GroupBy cython aggregations (no fallback) #39885
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
df70d2d
9cbbf97
692175e
e8e108b
a5fb361
a7bf71e
8c1b8a2
06b6f3f
244152b
32bf7d1
b44804e
50fb97f
1d63f72
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -235,16 +235,19 @@ def shape(self) -> Shape: | |
| def ndim(self) -> int: | ||
| return len(self.axes) | ||
|
|
||
| def set_axis(self, axis: int, new_labels: Index) -> None: | ||
| def set_axis( | ||
| self, axis: int, new_labels: Index, verify_integrity: bool = True | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added a |
||
| ) -> None: | ||
| # Caller is responsible for ensuring we have an Index object. | ||
| old_len = len(self.axes[axis]) | ||
| new_len = len(new_labels) | ||
| if verify_integrity: | ||
| old_len = len(self.axes[axis]) | ||
| new_len = len(new_labels) | ||
|
|
||
| if new_len != old_len: | ||
| raise ValueError( | ||
| f"Length mismatch: Expected axis has {old_len} elements, new " | ||
| f"values have {new_len} elements" | ||
| ) | ||
| if new_len != old_len: | ||
| raise ValueError( | ||
| f"Length mismatch: Expected axis has {old_len} elements, new " | ||
| f"values have {new_len} elements" | ||
| ) | ||
|
|
||
| self.axes[axis] = new_labels | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -41,7 +41,10 @@ | |
| "max", | ||
| ], | ||
| ) | ||
| def test_cythonized_aggers(op_name): | ||
| def test_cythonized_aggers(op_name, using_array_manager): | ||
| if using_array_manager and op_name in {"count", "sem"}: | ||
| # TODO(ArrayManager) groupby count/sem | ||
| pytest.skip("ArrayManager groupby count/sem not yet implemented") | ||
|
||
| data = { | ||
| "A": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1.0, np.nan, np.nan], | ||
| "B": ["A", "B"] * 6, | ||
|
|
@@ -281,7 +284,7 @@ def test_read_only_buffer_source_agg(agg): | |
| "species": ["setosa", "setosa", "setosa", "setosa", "setosa"], | ||
| } | ||
| ) | ||
| df._mgr.blocks[0].values.flags.writeable = False | ||
| df._mgr.arrays[0].flags.writeable = False | ||
|
|
||
| result = df.groupby(["species"]).agg({"sepal_length": agg}) | ||
| expected = df.copy().groupby(["species"]).agg({"sepal_length": agg}) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could combine this with previous check as
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That would be nice, but the problem is that we still need to keep DatetimeArray intact for DatetimeTZBlock. So we would still need the
if hasattr(arr, "tz") and arr.tz is Nonecheck as well, in which case it doesn't necessarily become more readable to combine both checks.Edit: the diff would be:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
instead of
and getattr(arr, "tz", None) is Nonehow aboutisinstance(arr.dtype, np.dtype). either way works i guessThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That still gives the same length of the
ifcheck as in my diff example above, which I don't find an improvement in readabilityThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yah the only possible difference is for mypy