From 236d3f3c3a366485037bb3579951c58354104930 Mon Sep 17 00:00:00 2001 From: Thomas H Date: Thu, 28 Mar 2024 08:01:52 -0400 Subject: [PATCH 1/8] Reorder slice and hashable in __getitem__ --- pandas/core/frame.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b218dd899c8f8..bc265df5c8304 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3849,6 +3849,11 @@ def __getitem__(self, key): key = lib.item_from_zerodim(key) key = com.apply_if_callable(key, self) + # Do we have a slicer (on rows)? + # As of Python 3.12, slice is hashable so check it first + if isinstance(key, slice): + return self._getitem_slice(key) + if is_hashable(key) and not is_iterator(key): # is_iterator to exclude generator e.g. test_getitem_listlike # shortcut if the key is in columns @@ -3865,10 +3870,6 @@ def __getitem__(self, key): elif is_mi and self.columns.is_unique and key in self.columns: return self._getitem_multilevel(key) - # Do we have a slicer (on rows)? - if isinstance(key, slice): - return self._getitem_slice(key) - # Do we have a (boolean) DataFrame? if isinstance(key, DataFrame): return self.where(key) From 19efbbd85d91fd26d7bb2989a1485adfabc99716 Mon Sep 17 00:00:00 2001 From: Thomas H Date: Thu, 28 Mar 2024 09:19:39 -0400 Subject: [PATCH 2/8] Add unit test --- pandas/tests/frame/indexing/test_indexing.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 49e5c4aff5afe..da8416a86d535 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -524,6 +524,22 @@ def test_loc_setitem_boolean_mask_allfalse(self): result.loc[result.b.isna(), "a"] = result.a.copy() tm.assert_frame_equal(result, df) + def test_getitem_slice_empty(self): + df = DataFrame( + [[1]], + columns=pd.MultiIndex.from_product([["A"], ["a"]]) + ) + result = df[:] + + expected = DataFrame( + [[1]], + columns=pd.MultiIndex.from_product([["A"], ["a"]]) + ) + + tm.assert_frame_equal(result, expected) + # Ensure df[:] returns a view of df, not the same object + assert result is not expected + def test_getitem_fancy_slice_integers_step(self): df = DataFrame(np.random.default_rng(2).standard_normal((10, 5))) From ade1f854478f4af1bb565c5dc89077ed5371030f Mon Sep 17 00:00:00 2001 From: Thomas H Date: Thu, 28 Mar 2024 13:27:47 +0000 Subject: [PATCH 3/8] Fix test and formatting --- pandas/tests/frame/indexing/test_indexing.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index da8416a86d535..5a6fe07aa007b 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -525,20 +525,14 @@ def test_loc_setitem_boolean_mask_allfalse(self): tm.assert_frame_equal(result, df) def test_getitem_slice_empty(self): - df = DataFrame( - [[1]], - columns=pd.MultiIndex.from_product([["A"], ["a"]]) - ) + df = DataFrame([[1]], columns=MultiIndex.from_product([["A"], ["a"]])) result = df[:] - expected = DataFrame( - [[1]], - columns=pd.MultiIndex.from_product([["A"], ["a"]]) - ) + expected = DataFrame([[1]], columns=MultiIndex.from_product([["A"], ["a"]])) tm.assert_frame_equal(result, expected) # Ensure df[:] returns a view of df, not the same object - assert result is not expected + assert result is not df def test_getitem_fancy_slice_integers_step(self): df = DataFrame(np.random.default_rng(2).standard_normal((10, 5))) From a85f1d7d185a2af6fef3c2529f91baa37d9b5ce9 Mon Sep 17 00:00:00 2001 From: Thomas H Date: Thu, 28 Mar 2024 13:31:10 +0000 Subject: [PATCH 4/8] Update whatsnew --- doc/source/whatsnew/v2.2.2.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index d0f8951ac07ad..abf3b97b00c23 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -25,6 +25,7 @@ Bug fixes - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the column's type was nullable boolean (:issue:`55332`) - :meth:`DataFrame.__dataframe__` was showing bytemask instead of bitmask for ``'string[pyarrow]'`` validity buffer (:issue:`57762`) - :meth:`DataFrame.__dataframe__` was showing non-null validity buffer (instead of ``None``) ``'string[pyarrow]'`` without missing values (:issue:`57761`) +- :meth:`DataFrame.__getitem__` was returning modified columns when called with ``slice`` on Python 3.12 (:issue:`57500`) .. --------------------------------------------------------------------------- .. _whatsnew_222.other: From 8a0043818bb52d36f71cffef585702335cfacaa8 Mon Sep 17 00:00:00 2001 From: Thomas H Date: Mon, 15 Apr 2024 13:14:32 +0000 Subject: [PATCH 5/8] Restore original flow ordering --- pandas/core/frame.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2d2947714e125..cd4812c3f78ae 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3855,13 +3855,10 @@ def __getitem__(self, key): key = lib.item_from_zerodim(key) key = com.apply_if_callable(key, self) - # Do we have a slicer (on rows)? - # As of Python 3.12, slice is hashable so check it first - if isinstance(key, slice): - return self._getitem_slice(key) - - if is_hashable(key) and not is_iterator(key): + if is_hashable(key) and not is_iterator(key) and not isinstance(key, slice): # is_iterator to exclude generator e.g. test_getitem_listlike + # As of Python 3.12, slice is hashable which breaks MultiIndex (GH#57500) + # shortcut if the key is in columns is_mi = isinstance(self.columns, MultiIndex) # GH#45316 Return view if key is not duplicated @@ -3876,6 +3873,10 @@ def __getitem__(self, key): elif is_mi and self.columns.is_unique and key in self.columns: return self._getitem_multilevel(key) + # Do we have a slicer (on rows)? + if isinstance(key, slice): + return self._getitem_slice(key) + # Do we have a (boolean) DataFrame? if isinstance(key, DataFrame): return self.where(key) From 8dda6416836ae0bbaa53fbc1bc6540a394e0d237 Mon Sep 17 00:00:00 2001 From: Thomas H Date: Mon, 15 Apr 2024 13:18:15 +0000 Subject: [PATCH 6/8] Move whatsnew entry to 3.0.0 --- doc/source/whatsnew/v2.2.2.rst | 1 - doc/source/whatsnew/v3.0.0.rst | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst index e204141fb3423..72a2f84c4aaee 100644 --- a/doc/source/whatsnew/v2.2.2.rst +++ b/doc/source/whatsnew/v2.2.2.rst @@ -41,7 +41,6 @@ Bug fixes - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the column's type was nullable boolean (:issue:`55332`) - :meth:`DataFrame.__dataframe__` was showing bytemask instead of bitmask for ``'string[pyarrow]'`` validity buffer (:issue:`57762`) - :meth:`DataFrame.__dataframe__` was showing non-null validity buffer (instead of ``None``) ``'string[pyarrow]'`` without missing values (:issue:`57761`) -- :meth:`DataFrame.__getitem__` was returning modified columns when called with ``slice`` on Python 3.12 (:issue:`57500`) - :meth:`DataFrame.to_sql` was failing to find the right table when using the schema argument (:issue:`57539`) .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f709bec842c86..8534e5310eb08 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -340,6 +340,7 @@ Bug fixes ~~~~~~~~~ - Fixed bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`) - Fixed bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`) +- Fixed bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` on Python 3.12 (:issue:`57500`) - Fixed bug in :meth:`DataFrame.cumsum` which was raising ``IndexError`` if dtype is ``timedelta64[ns]`` (:issue:`57956`) - Fixed bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`) - Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) @@ -352,6 +353,7 @@ Bug fixes - Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) - Fixed bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`) + Categorical ^^^^^^^^^^^ - From 603a20925d00a1222a06d1ca74d84d9b0f92bc91 Mon Sep 17 00:00:00 2001 From: Thomas H Date: Mon, 15 Apr 2024 17:24:51 +0000 Subject: [PATCH 7/8] Move whatsnew entry to Indexing --- doc/source/whatsnew/v3.0.0.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 8534e5310eb08..f0f029ab77146 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -340,7 +340,6 @@ Bug fixes ~~~~~~~~~ - Fixed bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`) - Fixed bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`) -- Fixed bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` on Python 3.12 (:issue:`57500`) - Fixed bug in :meth:`DataFrame.cumsum` which was raising ``IndexError`` if dtype is ``timedelta64[ns]`` (:issue:`57956`) - Fixed bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`) - Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) @@ -353,7 +352,6 @@ Bug fixes - Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) - Fixed bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`) - Categorical ^^^^^^^^^^^ - @@ -397,7 +395,7 @@ Interval Indexing ^^^^^^^^ -- +- Fixed bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` on Python 3.12 (:issue:`57500`) - Missing From 8d221fe345890dcd93a4f1b72fdf8df1f79b519c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 16 Apr 2024 10:15:45 -0700 Subject: [PATCH 8/8] Update doc/source/whatsnew/v3.0.0.rst --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f0f029ab77146..27e7abe333d6f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -395,7 +395,7 @@ Interval Indexing ^^^^^^^^ -- Fixed bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` on Python 3.12 (:issue:`57500`) +- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) - Missing