diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index d114f26788f00..43f517fe5a02f 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -178,6 +178,7 @@ Categorical Datetimelike ^^^^^^^^^^^^ - Bug in :func:`to_datetime` returning pd.NaT for inputs that produce duplicated values, when ``cache=True`` (:issue:`42259`) +- Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`) - Timedelta diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 5e327adfb8905..22cce5c614d5a 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -348,22 +348,17 @@ def ndarray_to_mgr( # on the entire block; this is to convert if we have datetimelike's # embedded in an object type if dtype is None and is_object_dtype(values.dtype): - - if values.ndim == 2 and values.shape[0] != 1: - # transpose and separate blocks - - dtlike_vals = [maybe_infer_to_datetimelike(row) for row in values] - dvals_list = [ensure_block_shape(dval, 2) for dval in dtlike_vals] - - # TODO: What about re-joining object columns? + obj_columns = list(values) + maybe_datetime = [maybe_infer_to_datetimelike(x) for x in obj_columns] + # don't convert (and copy) the objects if no type inference occurs + if any(x is not y for x, y in zip(obj_columns, maybe_datetime)): + dvals_list = [ensure_block_shape(dval, 2) for dval in maybe_datetime] block_values = [ new_block(dvals_list[n], placement=n, ndim=2) for n in range(len(dvals_list)) ] - else: - datelike_vals = maybe_infer_to_datetimelike(values) - nb = new_block(datelike_vals, placement=slice(len(columns)), ndim=2) + nb = new_block(values, placement=slice(len(columns)), ndim=2) block_values = [nb] else: nb = new_block(values, placement=slice(len(columns)), ndim=2) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 1d286e379da86..c992606cc88af 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -253,6 +253,20 @@ def test_constructor_dtype_nocast_view_2d_array(self): should_be_view[0][0] = 97 assert df.values[0, 0] == 97 + @td.skip_array_manager_invalid_test + def test_1d_object_array_does_not_copy(self): + # https://github.com/pandas-dev/pandas/issues/39272 + arr = np.array(["a", "b"], dtype="object") + df = DataFrame(arr) + assert np.shares_memory(df.values, arr) + + @td.skip_array_manager_invalid_test + def test_2d_object_array_does_not_copy(self): + # https://github.com/pandas-dev/pandas/issues/39272 + arr = np.array([["a", "b"], ["c", "d"]], dtype="object") + df = DataFrame(arr) + assert np.shares_memory(df.values, arr) + def test_constructor_dtype_list_data(self): df = DataFrame([[1, "2"], [None, "a"]], dtype=object) assert df.loc[1, 0] is None