Skip to content

ARROW-3953: [Python] Compat with pandas 0.24 rename of MultiIndex labels -> codes #3120

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions python/pyarrow/pandas_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,14 @@ def _pandas_type_to_numpy_type(pandas_type):
return np.dtype(pandas_type)


def _get_multiindex_codes(mi):
# compat for pandas < 0.24 (MI labels renamed to codes).
if isinstance(mi, pd.MultiIndex):
return mi.codes if hasattr(mi, 'codes') else mi.labels
else:
return None


def _reconstruct_columns_from_metadata(columns, column_indexes):
"""Construct a pandas MultiIndex from `columns` and column index metadata
in `column_indexes`.
Expand All @@ -752,7 +760,7 @@ def _reconstruct_columns_from_metadata(columns, column_indexes):
# Get levels and labels, and provide sane defaults if the index has a
# single level to avoid if/else spaghetti.
levels = getattr(columns, 'levels', None) or [columns]
labels = getattr(columns, 'labels', None) or [
labels = _get_multiindex_codes(columns) or [
pd.RangeIndex(len(level)) for level in levels
]

Expand All @@ -779,7 +787,7 @@ def _reconstruct_columns_from_metadata(columns, column_indexes):

new_levels.append(level)

return pd.MultiIndex(levels=new_levels, labels=labels, names=columns.names)
return pd.MultiIndex(new_levels, labels, names=columns.names)


def _table_to_blocks(options, block_table, memory_pool, categories):
Expand All @@ -796,7 +804,7 @@ def _table_to_blocks(options, block_table, memory_pool, categories):
def _flatten_single_level_multiindex(index):
if isinstance(index, pd.MultiIndex) and index.nlevels == 1:
levels, = index.levels
labels, = index.labels
labels, = _get_multiindex_codes(index)

# Cheaply check that we do not somehow have duplicate column names
if not index.is_unique:
Expand Down
10 changes: 10 additions & 0 deletions python/pyarrow/tests/test_convert_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,16 @@ def test_multiindex_columns_unicode(self):
df = pd.DataFrame([(1, 'a'), (2, 'b'), (3, 'c')], columns=columns)
_check_pandas_roundtrip(df, preserve_index=True)

def test_multiindex_doesnt_warn(self):
# ARROW-3953: pandas 0.24 rename of MultiIndex labels to codes
columns = pd.MultiIndex.from_arrays([['one', 'two'], ['X', 'Y']])
df = pd.DataFrame([(1, 'a'), (2, 'b'), (3, 'c')], columns=columns)

with pytest.warns(None) as record:
_check_pandas_roundtrip(df, preserve_index=True)

assert len(record) == 0

def test_integer_index_column(self):
df = pd.DataFrame([(1, 'a'), (2, 'b'), (3, 'c')])
_check_pandas_roundtrip(df, preserve_index=True)
Expand Down