diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index 28819c522c696..609c14ad92fba 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -128,3 +128,6 @@ Bug Fixes - Bug in the link-time error caused by C ``inline`` functions on FreeBSD 10+ (with ``clang``) (:issue:`10510`) - Bug in ``DataFrame.to_csv`` in passing through arguments for formatting ``MultiIndexes``, including ``date_format`` (:issue:`7791`) - Bug in ``DataFrame.join()`` with ``how='right'`` producing a ``TypeError`` (:issue:`11519`) +- Bug in ``Series.quantile`` with empty list results has ``Index`` with ``object`` dtype (:issue:`11588`) +- Bug in ``pd.merge`` results in empty ``Int64Index`` rather than ``Index(dtype=object)`` when the merge result is empty (:issue:`11588`) + diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 99ee50a9ae7fb..929e07eea98c4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1779,7 +1779,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, avoid duplicating data method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}, optional method to use for filling holes in reindexed DataFrame. - Please note: this is only applicable to DataFrames/Series with a + Please note: this is only applicable to DataFrames/Series with a monotonically increasing/decreasing index. * default: don't fill gaps * pad / ffill: propagate last valid observation forward to next valid @@ -1822,7 +1822,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, Create a new index and reindex the dataframe. By default values in the new index that do not have corresponding - records in the dataframe are assigned ``NaN``. + records in the dataframe are assigned ``NaN``. >>> new_index= ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10', ... 'Chrome'] @@ -1836,8 +1836,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, We can fill in the missing values by passing a value to the keyword ``fill_value``. Because the index is not monotonically - increasing or decreasing, we cannot use arguments to the keyword - ``method`` to fill the ``NaN`` values. + increasing or decreasing, we cannot use arguments to the keyword + ``method`` to fill the ``NaN`` values. >>> df.reindex(new_index, fill_value=0) http_status response_time @@ -1855,8 +1855,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, IE10 404 0.08 Chrome 200 0.02 - To further illustrate the filling functionality in - ``reindex``, we will create a dataframe with a + To further illustrate the filling functionality in + ``reindex``, we will create a dataframe with a monotonically increasing index (for example, a sequence of dates). @@ -1873,7 +1873,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, 2010-01-06 88 Suppose we decide to expand the dataframe to cover a wider - date range. + date range. >>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D') >>> df2.reindex(date_index2) @@ -1890,10 +1890,10 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, 2010-01-07 NaN The index entries that did not have a value in the original data frame - (for example, '2009-12-29') are by default filled with ``NaN``. + (for example, '2009-12-29') are by default filled with ``NaN``. If desired, we can fill in the missing values using one of several - options. - + options. + For example, to backpropagate the last valid value to fill the ``NaN`` values, pass ``bfill`` as an argument to the ``method`` keyword. @@ -1911,7 +1911,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, 2010-01-07 NaN Please note that the ``NaN`` value present in the original dataframe - (at index value 2010-01-03) will not be filled by any of the + (at index value 2010-01-03) will not be filled by any of the value propagation schemes. This is because filling while reindexing does not look at dataframe values, but only compares the original and desired indexes. If you do want to fill in the ``NaN`` values present diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 0f3795fcad0c3..fd4e680fef431 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -524,7 +524,7 @@ def _align_series(self, indexer, ser, multiindex_indexer=False): Parameters ---------- indexer : tuple, slice, scalar - The indexer used to get the locations that will be set to + The indexer used to get the locations that will be set to `ser` ser : pd.Series @@ -532,7 +532,7 @@ def _align_series(self, indexer, ser, multiindex_indexer=False): multiindex_indexer : boolean, optional Defaults to False. Should be set to True if `indexer` was from - a `pd.MultiIndex`, to avoid unnecessary broadcasting. + a `pd.MultiIndex`, to avoid unnecessary broadcasting. Returns: diff --git a/pandas/core/series.py b/pandas/core/series.py index b12a31d64eaf7..000e8c4c2dab8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -27,7 +27,7 @@ _maybe_box_datetimelike, ABCDataFrame, _dict_compat) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, - _ensure_index) + Float64Index, _ensure_index) from pandas.core.indexing import check_bool_indexer, maybe_convert_indices from pandas.core import generic, base from pandas.core.internals import SingleBlockManager @@ -1271,6 +1271,8 @@ def quantile(self, q=0.5): def multi(values, qs): if com.is_list_like(qs): values = [_quantile(values, x*100) for x in qs] + # let empty result to be Float64Index + qs = Float64Index(qs) return self._constructor(values, index=qs, name=self.name) else: return _quantile(values, qs*100) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index a03e54f8078a7..27e607870cebc 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -846,7 +846,8 @@ def test_int_types(self): # test with convert_float=False comes back as float float_frame = frame.astype(float) recons = read_excel(path, 'test1', convert_float=False) - tm.assert_frame_equal(recons, float_frame) + tm.assert_frame_equal(recons, float_frame, + check_index_type=False, check_column_type=False) def test_float_types(self): _skip_if_no_xlrd() @@ -1186,9 +1187,11 @@ def test_to_excel_output_encoding(self): _skip_if_no_xlrd() ext = self.ext filename = '__tmp_to_excel_float_format__.' + ext - df = DataFrame([[u('\u0192'), u('\u0193'), u('\u0194')], - [u('\u0195'), u('\u0196'), u('\u0197')]], - index=[u('A\u0192'), 'B'], columns=[u('X\u0193'), 'Y', 'Z']) + + # avoid mixed inferred_type + df = DataFrame([[u'\u0192', u'\u0193', u'\u0194'], + [u'\u0195', u'\u0196', u'\u0197']], + index=[u'A\u0192', u'B'], columns=[u'X\u0193', u'Y', u'Z']) with ensure_clean(filename) as filename: df.to_excel(filename, sheet_name='TestSheet', encoding='utf8') diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index 40cdc8fe8478c..5f41a803538e6 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -134,7 +134,8 @@ def _check(df): def test_frame_from_json_to_json(self): def _check_orient(df, orient, dtype=None, numpy=False, convert_axes=True, check_dtype=True, raise_ok=None, - sort=None): + sort=None, check_index_type=True, + check_column_type=True): if sort is not None: df = df.sort_values(sort) else: @@ -191,20 +192,29 @@ def _check_orient(df, orient, dtype=None, numpy=False, assert_almost_equal(df.values, unser.values) else: if convert_axes: - assert_frame_equal(df, unser, check_dtype=check_dtype) + assert_frame_equal(df, unser, check_dtype=check_dtype, + check_index_type=check_index_type, + check_column_type=check_column_type) else: assert_frame_equal(df, unser, check_less_precise=False, check_dtype=check_dtype) - def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None, sort=None): + def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None, + sort=None, check_index_type=True, + check_column_type=True): # numpy=False if convert_axes: - _check_orient(df, "columns", dtype=dtype, sort=sort) - _check_orient(df, "records", dtype=dtype, sort=sort) - _check_orient(df, "split", dtype=dtype, sort=sort) - _check_orient(df, "index", dtype=dtype, sort=sort) - _check_orient(df, "values", dtype=dtype, sort=sort) + _check_orient(df, "columns", dtype=dtype, sort=sort, + check_index_type=False, check_column_type=False) + _check_orient(df, "records", dtype=dtype, sort=sort, + check_index_type=False, check_column_type=False) + _check_orient(df, "split", dtype=dtype, sort=sort, + check_index_type=False, check_column_type=False) + _check_orient(df, "index", dtype=dtype, sort=sort, + check_index_type=False, check_column_type=False) + _check_orient(df, "values", dtype=dtype, sort=sort, + check_index_type=False, check_column_type=False) _check_orient(df, "columns", dtype=dtype, convert_axes=False, sort=sort) _check_orient(df, "records", dtype=dtype, convert_axes=False, sort=sort) @@ -215,15 +225,20 @@ def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None, sort=No # numpy=True and raise_ok might be not None, so ignore the error if convert_axes: _check_orient(df, "columns", dtype=dtype, numpy=True, - raise_ok=raise_ok, sort=sort) + raise_ok=raise_ok, sort=sort, + check_index_type=False, check_column_type=False) _check_orient(df, "records", dtype=dtype, numpy=True, - raise_ok=raise_ok, sort=sort) + raise_ok=raise_ok, sort=sort, + check_index_type=False, check_column_type=False) _check_orient(df, "split", dtype=dtype, numpy=True, - raise_ok=raise_ok, sort=sort) + raise_ok=raise_ok, sort=sort, + check_index_type=False, check_column_type=False) _check_orient(df, "index", dtype=dtype, numpy=True, - raise_ok=raise_ok, sort=sort) + raise_ok=raise_ok, sort=sort, + check_index_type=False, check_column_type=False) _check_orient(df, "values", dtype=dtype, numpy=True, - raise_ok=raise_ok, sort=sort) + raise_ok=raise_ok, sort=sort, + check_index_type=False, check_column_type=False) _check_orient(df, "columns", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok, sort=sort) @@ -250,7 +265,7 @@ def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None, sort=No biggie = DataFrame(np.zeros((200, 4)), columns=[str(i) for i in range(4)], index=[str(i) for i in range(200)]) - _check_all_orients(biggie,dtype=False,convert_axes=False) + _check_all_orients(biggie,dtype=False, convert_axes=False) # dtypes _check_all_orients(DataFrame(biggie, dtype=np.float64), @@ -264,7 +279,8 @@ def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None, sort=No _check_all_orients(self.categorical, sort='sort', raise_ok=ValueError) # empty - _check_all_orients(self.empty_frame) + _check_all_orients(self.empty_frame, check_index_type=False, + check_column_type=False) # time series data _check_all_orients(self.tsframe) @@ -354,14 +370,16 @@ def test_frame_to_json_except(self): def test_frame_empty(self): df = DataFrame(columns=['jim', 'joe']) self.assertFalse(df._is_mixed_type) - assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df) + assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df, + check_index_type=False) def test_frame_empty_mixedtype(self): # mixed type df = DataFrame(columns=['jim', 'joe']) df['joe'] = df['joe'].astype('i8') self.assertTrue(df._is_mixed_type) - assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df) + assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df, + check_index_type=False) def test_frame_mixedtype_orient(self): # GH10289 vals = [[10, 1, 'foo', .1, .01], @@ -457,7 +475,8 @@ def test_series_non_unique_index(self): def test_series_from_json_to_json(self): - def _check_orient(series, orient, dtype=None, numpy=False): + def _check_orient(series, orient, dtype=None, numpy=False, + check_index_type=True): series = series.sort_index() unser = read_json(series.to_json(orient=orient), typ='series', orient=orient, numpy=numpy, @@ -467,22 +486,33 @@ def _check_orient(series, orient, dtype=None, numpy=False): assert_almost_equal(series.values, unser.values) else: if orient == "split": - assert_series_equal(series, unser) + assert_series_equal(series, unser, + check_index_type=check_index_type) else: - assert_series_equal(series, unser, check_names=False) - - def _check_all_orients(series, dtype=None): - _check_orient(series, "columns", dtype=dtype) - _check_orient(series, "records", dtype=dtype) - _check_orient(series, "split", dtype=dtype) - _check_orient(series, "index", dtype=dtype) + assert_series_equal(series, unser, check_names=False, + check_index_type=check_index_type) + + def _check_all_orients(series, dtype=None, check_index_type=True): + _check_orient(series, "columns", dtype=dtype, + check_index_type=check_index_type) + _check_orient(series, "records", dtype=dtype, + check_index_type=check_index_type) + _check_orient(series, "split", dtype=dtype, + check_index_type=check_index_type) + _check_orient(series, "index", dtype=dtype, + check_index_type=check_index_type) _check_orient(series, "values", dtype=dtype) - _check_orient(series, "columns", dtype=dtype, numpy=True) - _check_orient(series, "records", dtype=dtype, numpy=True) - _check_orient(series, "split", dtype=dtype, numpy=True) - _check_orient(series, "index", dtype=dtype, numpy=True) - _check_orient(series, "values", dtype=dtype, numpy=True) + _check_orient(series, "columns", dtype=dtype, numpy=True, + check_index_type=check_index_type) + _check_orient(series, "records", dtype=dtype, numpy=True, + check_index_type=check_index_type) + _check_orient(series, "split", dtype=dtype, numpy=True, + check_index_type=check_index_type) + _check_orient(series, "index", dtype=dtype, numpy=True, + check_index_type=check_index_type) + _check_orient(series, "values", dtype=dtype, numpy=True, + check_index_type=check_index_type) # basic _check_all_orients(self.series) @@ -493,7 +523,12 @@ def _check_all_orients(series, dtype=None): index=self.objSeries.index, name=self.objSeries.name) _check_all_orients(objSeries, dtype=False) - _check_all_orients(self.empty_series) + + # empty_series has empty index with object dtype + # which cannot be revert + self.assertEqual(self.empty_series.index.dtype, np.object_) + _check_all_orients(self.empty_series, check_index_type=False) + _check_all_orients(self.ts) # dtype @@ -508,25 +543,30 @@ def test_series_to_json_except(self): def test_series_from_json_precise_float(self): s = Series([4.56, 4.56, 4.56]) result = read_json(s.to_json(), typ='series', precise_float=True) - assert_series_equal(result, s) + assert_series_equal(result, s, check_index_type=False) def test_frame_from_json_precise_float(self): df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]]) result = read_json(df.to_json(), precise_float=True) - assert_frame_equal(result, df) + assert_frame_equal(result, df, check_index_type=False, check_column_type=False) def test_typ(self): s = Series(lrange(6), index=['a','b','c','d','e','f'], dtype='int64') result = read_json(s.to_json(),typ=None) - assert_series_equal(result,s) + assert_series_equal(result, s) def test_reconstruction_index(self): df = DataFrame([[1, 2, 3], [4, 5, 6]]) result = read_json(df.to_json()) - # the index is serialized as strings....correct? + self.assertEqual(result.index.dtype, np.float64) + self.assertEqual(result.columns.dtype, np.float64) + assert_frame_equal(result, df, check_index_type=False, check_column_type=False) + + df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=['A', 'B', 'C']) + result = read_json(df.to_json()) assert_frame_equal(result, df) def test_path(self): @@ -691,7 +731,7 @@ def test_misc_example(self): \\[left\\]: Index\\(\\[u?'a', u?'b'\\], dtype='object'\\) \\[right\\]: Int64Index\\(\\[0, 1\\], dtype='int64'\\)""" with tm.assertRaisesRegexp(AssertionError, error_msg): - assert_frame_equal(result, expected) + assert_frame_equal(result, expected, check_index_type=False) result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]') expected = DataFrame([[1,2], [1,2]], columns=['a','b']) @@ -717,13 +757,19 @@ def test_timedelta(self): converter = lambda x: pd.to_timedelta(x,unit='ms') s = Series([timedelta(23), timedelta(seconds=5)]) - self.assertEqual(s.dtype,'timedelta64[ns]') - assert_series_equal(s, pd.read_json(s.to_json(),typ='series').apply(converter)) + self.assertEqual(s.dtype, 'timedelta64[ns]') + # index will be float dtype + assert_series_equal(s, pd.read_json(s.to_json(),typ='series').apply(converter), + check_index_type=False) + + s = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1], dtype=float)) + self.assertEqual(s.dtype, 'timedelta64[ns]') + assert_series_equal(s, pd.read_json(s.to_json(), typ='series').apply(converter)) frame = DataFrame([timedelta(23), timedelta(seconds=5)]) self.assertEqual(frame[0].dtype,'timedelta64[ns]') - assert_frame_equal( - frame, pd.read_json(frame.to_json()).apply(converter)) + assert_frame_equal(frame, pd.read_json(frame.to_json()).apply(converter), + check_index_type=False, check_column_type=False) frame = DataFrame({'a': [timedelta(days=23), timedelta(seconds=5)], 'b': [1, 2], @@ -732,7 +778,7 @@ def test_timedelta(self): result = pd.read_json(frame.to_json(date_unit='ns')) result['a'] = pd.to_timedelta(result.a, unit='ns') result['c'] = pd.to_datetime(result.c) - assert_frame_equal(frame, result) + assert_frame_equal(frame, result, check_index_type=False) def test_mixed_timedelta_datetime(self): frame = DataFrame({'a': [timedelta(23), pd.Timestamp('20130101')]}, @@ -742,14 +788,14 @@ def test_mixed_timedelta_datetime(self): pd.Timestamp(frame.a[1]).value]}) result = pd.read_json(frame.to_json(date_unit='ns'), dtype={'a': 'int64'}) - assert_frame_equal(result, expected) + assert_frame_equal(result, expected, check_index_type=False) def test_default_handler(self): value = object() frame = DataFrame({'a': ['a', value]}) expected = frame.applymap(str) result = pd.read_json(frame.to_json(default_handler=str)) - assert_frame_equal(expected, result) + assert_frame_equal(expected, result, check_index_type=False) def test_default_handler_raises(self): def my_handler_raises(obj): diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 799c573b13c8b..87450ddde636e 100755 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -2313,14 +2313,14 @@ def test_emtpy_with_multiindex(self): result = self.read_csv(StringIO(data), index_col=['x', 'y']) expected = DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['x', 'y'])) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_index_type=False) def test_empty_with_reversed_multiindex(self): data = 'x,y,z' result = self.read_csv(StringIO(data), index_col=[1, 0]) expected = DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['y', 'x'])) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_index_type=False) def test_empty_index_col_scenarios(self): data = 'x,y,z' @@ -2352,28 +2352,26 @@ def test_empty_index_col_scenarios(self): # list of int index_col, expected = [0, 1], DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['x', 'y'])) - tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected) + tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected, + check_index_type=False) # list of str - index_col, expected = ( - ['x', 'y'], - DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['x', 'y'])) - ) - tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected) + index_col = ['x', 'y'] + expected = DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['x', 'y'])) + tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected, + check_index_type=False) # list of int, reversed sequence - index_col, expected = ( - [1, 0], - DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['y', 'x'])) - ) - tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected) + index_col = [1, 0] + expected = DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['y', 'x'])) + tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected, + check_index_type=False) # list of str, reversed sequence - index_col, expected = ( - ['y', 'x'], - DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['y', 'x'])) - ) - tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected) + index_col = ['y', 'x'] + expected = DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['y', 'x'])) + tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected, + check_index_type=False) def test_empty_with_index_col_false(self): # GH 10413 @@ -2434,11 +2432,11 @@ def test_empty_with_nrows_chunksize(self): result = pd.read_csv(StringIO('foo,bar\n'), nrows=10, as_recarray=True) result = pd.DataFrame(result[2], columns=result[1], index=result[0]) - tm.assert_frame_equal(pd.DataFrame.from_records(result), expected) + tm.assert_frame_equal(pd.DataFrame.from_records(result), expected, check_index_type=False) result = next(iter(pd.read_csv(StringIO('foo,bar\n'), chunksize=10, as_recarray=True))) result = pd.DataFrame(result[2], columns=result[1], index=result[0]) - tm.assert_frame_equal(pd.DataFrame.from_records(result), expected) + tm.assert_frame_equal(pd.DataFrame.from_records(result), expected, check_index_type=False) def test_eof_states(self): # GH 10728 and 10548 @@ -3697,7 +3695,7 @@ def test_empty_pass_dtype(self): expected = DataFrame({'one': np.empty(0, dtype='u1'), 'two': np.empty(0, dtype=np.object)}) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_index_type=False) def test_empty_with_index_pass_dtype(self): data = 'one,two' @@ -3706,38 +3704,37 @@ def test_empty_with_index_pass_dtype(self): expected = DataFrame({'two': np.empty(0, dtype='f')}, index=Index([], dtype='u1', name='one')) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_index_type=False) def test_empty_with_multiindex_pass_dtype(self): data = 'one,two,three' result = self.read_csv(StringIO(data), index_col=['one', 'two'], dtype={'one': 'u1', 1: 'f8'}) - expected = DataFrame({'three': np.empty(0, dtype=np.object)}, index=MultiIndex.from_arrays( - [np.empty(0, dtype='u1'), np.empty(0, dtype='O')], - names=['one', 'two']) - ) - tm.assert_frame_equal(result, expected) + exp_idx = MultiIndex.from_arrays([np.empty(0, dtype='u1'), np.empty(0, dtype='O')], + names=['one', 'two']) + expected = DataFrame({'three': np.empty(0, dtype=np.object)}, index=exp_idx) + tm.assert_frame_equal(result, expected, check_index_type=False) def test_empty_with_mangled_column_pass_dtype_by_names(self): data = 'one,one' result = self.read_csv(StringIO(data), dtype={'one': 'u1', 'one.1': 'f'}) expected = DataFrame({'one': np.empty(0, dtype='u1'), 'one.1': np.empty(0, dtype='f')}) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_index_type=False) def test_empty_with_mangled_column_pass_dtype_by_indexes(self): data = 'one,one' result = self.read_csv(StringIO(data), dtype={0: 'u1', 1: 'f'}) expected = DataFrame({'one': np.empty(0, dtype='u1'), 'one.1': np.empty(0, dtype='f')}) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_index_type=False) def test_empty_with_dup_column_pass_dtype_by_names(self): data = 'one,one' result = self.read_csv(StringIO(data), mangle_dupe_cols=False, dtype={'one': 'u1'}) expected = pd.concat([Series([], name='one', dtype='u1')] * 2, axis=1) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_index_type=False) def test_empty_with_dup_column_pass_dtype_by_indexes(self): ### FIXME in GH9424 @@ -3747,7 +3744,7 @@ def test_empty_with_dup_column_pass_dtype_by_indexes(self): result = self.read_csv(StringIO(data), mangle_dupe_cols=False, dtype={0: 'u1', 1: 'f'}) expected = pd.concat([Series([], name='one', dtype='u1'), Series([], name='one', dtype='f')], axis=1) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_index_type=False) def test_usecols_dtypes(self): data = """\ diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 6c78f9cf3937c..afe76ee1746da 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -2506,7 +2506,7 @@ def test_series(self): ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object)) - self._check_roundtrip(ts3, tm.assert_series_equal) + self._check_roundtrip(ts3, tm.assert_series_equal, check_index_type=False) def test_sparse_series(self): @@ -3049,7 +3049,7 @@ def test_select_dtypes(self): result = store.select( 'df4', where='values>2.0') tm.assert_frame_equal(expected, result) - + # test selection with comparison against numpy scalar # GH 11283 with ensure_clean_store(self.path) as store: diff --git a/pandas/io/tests/test_sas.py b/pandas/io/tests/test_sas.py index 0e08252fdce97..8d1041229bf3c 100644 --- a/pandas/io/tests/test_sas.py +++ b/pandas/io/tests/test_sas.py @@ -60,17 +60,17 @@ def test1_index(self): # Read full file data = XportReader(self.file01, index="SEQN").read() - tm.assert_frame_equal(data, data_csv) + tm.assert_frame_equal(data, data_csv, check_index_type=False) # Test incremental read with `read` method. reader = XportReader(self.file01, index="SEQN") data = reader.read(10) - tm.assert_frame_equal(data, data_csv.iloc[0:10, :]) + tm.assert_frame_equal(data, data_csv.iloc[0:10, :], check_index_type=False) # Test incremental read with `get_chunk` method. reader = XportReader(self.file01, index="SEQN", chunksize=10) data = reader.get_chunk() - tm.assert_frame_equal(data, data_csv.iloc[0:10, :]) + tm.assert_frame_equal(data, data_csv.iloc[0:10, :], check_index_type=False) def test1_incremental(self): @@ -85,7 +85,7 @@ def test1_incremental(self): all_data = [x for x in reader] data = pd.concat(all_data, axis=0) - tm.assert_frame_equal(data, data_csv) + tm.assert_frame_equal(data, data_csv, check_index_type=False) def test2(self): diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index aced92ec8abd0..3736bcecfca9f 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -1480,7 +1480,7 @@ def test_get_schema_create_table(self): self.drop_table(tbl) self.conn.execute(create_sql) returned_df = sql.read_sql_table(tbl, self.conn) - tm.assert_frame_equal(returned_df, blank_test_df) + tm.assert_frame_equal(returned_df, blank_test_df, check_index_type=False) self.drop_table(tbl) def test_dtype(self): diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index aff9cd6c558e2..86dfbc8f76a9b 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -300,7 +300,7 @@ def test_write_dta6(self): original.to_stata(path, None) written_and_read_again = self.read_dta(path) tm.assert_frame_equal(written_and_read_again.set_index('index'), - original) + original, check_index_type=False) def test_read_write_dta10(self): original = DataFrame(data=[["string", "object", 1, 1.1, @@ -315,8 +315,9 @@ def test_read_write_dta10(self): with tm.ensure_clean() as path: original.to_stata(path, {'datetime': 'tc'}) written_and_read_again = self.read_dta(path) + # original.index is np.int32, readed index is np.int64 tm.assert_frame_equal(written_and_read_again.set_index('index'), - original) + original, check_index_type=False) def test_stata_doc_examples(self): with tm.ensure_clean() as path: diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index fb255f300ebdd..948c7a524f266 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -577,7 +577,7 @@ def test_value_counts_inferred(self): s = klass({}) expected = Series([], dtype=np.int64) - tm.assert_series_equal(s.value_counts(), expected) + tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) self.assert_numpy_array_equal(s.unique(), np.array([])) self.assertEqual(s.nunique(), 0) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 1d143236e285b..812951b126320 100755 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -434,64 +434,57 @@ def test_categories_none(self): def test_describe(self): # string type desc = self.factor.describe() - expected = DataFrame.from_dict(dict(counts=[3, 2, 3], - freqs=[3/8., 2/8., 3/8.], - categories=['a', 'b', 'c']) - ).set_index('categories') + expected = DataFrame({'counts': [3, 2, 3], + 'freqs': [3/8., 2/8., 3/8.]}, + index=pd.CategoricalIndex(['a', 'b', 'c'], name='categories')) tm.assert_frame_equal(desc, expected) # check unused categories cat = self.factor.copy() cat.set_categories(["a","b","c","d"], inplace=True) desc = cat.describe() - expected = DataFrame.from_dict(dict(counts=[3, 2, 3, 0], - freqs=[3/8., 2/8., 3/8., 0], - categories=['a', 'b', 'c', 'd']) - ).set_index('categories') + expected = DataFrame({'counts': [3, 2, 3, 0], + 'freqs': [3/8., 2/8., 3/8., 0]}, + index=pd.CategoricalIndex(['a', 'b', 'c', 'd'], name='categories')) tm.assert_frame_equal(desc, expected) # check an integer one desc = Categorical([1,2,3,1,2,3,3,2,1,1,1]).describe() - expected = DataFrame.from_dict(dict(counts=[5, 3, 3], - freqs=[5/11., 3/11., 3/11.], - categories=[1,2,3] - ) - ).set_index('categories') + expected = DataFrame({'counts': [5, 3, 3], + 'freqs': [5/11., 3/11., 3/11.]}, + index=pd.CategoricalIndex([1, 2, 3], name='categories')) tm.assert_frame_equal(desc, expected) # https://github.com/pydata/pandas/issues/3678 # describe should work with NaN cat = pd.Categorical([np.nan,1, 2, 2]) desc = cat.describe() - expected = DataFrame.from_dict(dict(counts=[1, 2, 1], - freqs=[1/4., 2/4., 1/4.], - categories=Categorical([1,2,np.nan], - [1, 2]) - ) - ).set_index('categories') + expected = DataFrame({'counts': [1, 2, 1], + 'freqs': [1/4., 2/4., 1/4.]}, + index=pd.CategoricalIndex([1, 2, np.nan], categories=[1, 2], + name='categories')) tm.assert_frame_equal(desc, expected) # NA as a category with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical(["a","c","c",np.nan], categories=["b","a","c",np.nan]) + cat = pd.Categorical(["a", "c", "c", np.nan], categories=["b", "a", "c", np.nan]) result = cat.describe() expected = DataFrame([[0,0],[1,0.25],[2,0.5],[1,0.25]], columns=['counts','freqs'], - index=Index(['b','a','c',np.nan],name='categories')) + index=pd.CategoricalIndex(['b', 'a', 'c', np.nan], name='categories')) tm.assert_frame_equal(result,expected) # NA as an unused category with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical(["a","c","c"], categories=["b","a","c",np.nan]) + cat = pd.Categorical(["a", "c", "c"], categories=["b", "a", "c", np.nan]) result = cat.describe() - expected = DataFrame([[0,0],[1,1/3.],[2,2/3.],[0,0]], - columns=['counts','freqs'], - index=Index(['b','a','c',np.nan],name='categories')) + exp_idx = pd.CategoricalIndex(['b', 'a', 'c', np.nan], name='categories') + expected = DataFrame([[0, 0], [1, 1/3.], [2, 2/3.], [0, 0]], + columns=['counts', 'freqs'], index=exp_idx) tm.assert_frame_equal(result,expected) - def test_print(self): expected = ["[a, b, b, a, a, c, c, c]", "Categories (3, object): [a < b < c]"] @@ -2373,6 +2366,7 @@ def test_groupby_sort(self): res = self.cat.groupby(['value_group'])['value_group'].count() exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))] + exp.index = pd.CategoricalIndex(exp.index, name=exp.index.name) tm.assert_series_equal(res, exp) def test_min_max(self): @@ -2423,10 +2417,10 @@ def test_value_counts(self): s = pd.Series(pd.Categorical(["a","b","c","c","c","b"], categories=["c","a","b","d"])) res = s.value_counts(sort=False) - exp = Series([3,1,2,0], index=["c","a","b","d"]) + exp = Series([3,1,2,0], index=pd.CategoricalIndex(["c","a","b","d"])) tm.assert_series_equal(res, exp) res = s.value_counts(sort=True) - exp = Series([3,2,1,0], index=["c","b","a","d"]) + exp = Series([3,2,1,0], index=pd.CategoricalIndex(["c","b","a","d"])) tm.assert_series_equal(res, exp) def test_value_counts_with_nan(self): @@ -2435,42 +2429,42 @@ def test_value_counts_with_nan(self): s = pd.Series(["a", "b", "a"], dtype="category") tm.assert_series_equal( s.value_counts(dropna=True), - pd.Series([2, 1], index=["a", "b"])) + pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"]))) tm.assert_series_equal( s.value_counts(dropna=False), - pd.Series([2, 1], index=["a", "b"])) + pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"]))) s = pd.Series(["a", "b", None, "a", None, None], dtype="category") tm.assert_series_equal( s.value_counts(dropna=True), - pd.Series([2, 1], index=["a", "b"])) + pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"]))) tm.assert_series_equal( s.value_counts(dropna=False), - pd.Series([3, 2, 1], index=[np.nan, "a", "b"])) + pd.Series([3, 2, 1], index=pd.CategoricalIndex([np.nan, "a", "b"]))) # When we aren't sorting by counts, and np.nan isn't a # category, it should be last. tm.assert_series_equal( s.value_counts(dropna=False, sort=False), - pd.Series([2, 1, 3], index=["a", "b", np.nan])) + pd.Series([2, 1, 3], index=pd.CategoricalIndex(["a", "b", np.nan]))) with tm.assert_produces_warning(FutureWarning): s = pd.Series(pd.Categorical(["a", "b", "a"], categories=["a", "b", np.nan])) tm.assert_series_equal( s.value_counts(dropna=True), - pd.Series([2, 1], index=["a", "b"])) + pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"]))) tm.assert_series_equal( s.value_counts(dropna=False), - pd.Series([2, 1, 0], index=["a", "b", np.nan])) + pd.Series([2, 1, 0], index=pd.CategoricalIndex(["a", "b", np.nan]))) with tm.assert_produces_warning(FutureWarning): s = pd.Series(pd.Categorical(["a", "b", None, "a", None, None], categories=["a", "b", np.nan])) tm.assert_series_equal( s.value_counts(dropna=True), - pd.Series([2, 1], index=["a", "b"])) + pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"]))) tm.assert_series_equal( s.value_counts(dropna=False), - pd.Series([3, 2, 1], index=[np.nan, "a", "b"])) + pd.Series([3, 2, 1], index=pd.CategoricalIndex([np.nan, "a", "b"]))) def test_groupby(self): @@ -2478,7 +2472,7 @@ def test_groupby(self): data = DataFrame({"a":[1,1,1,2,2,2,3,4,5], "b":cats}) expected = DataFrame({'a': Series([1, 2, 4, np.nan], - index=Index(['a', 'b', 'c', 'd'], name='b'))}) + index=pd.CategoricalIndex(['a', 'b', 'c', 'd'], name='b'))}) result = data.groupby("b").mean() tm.assert_frame_equal(result, expected) @@ -2488,7 +2482,8 @@ def test_groupby(self): # single grouper gb = df.groupby("A") - expected = DataFrame({ 'values' : Series([3,7,np.nan],index=Index(['a','b','z'],name='A')) }) + exp_idx = pd.CategoricalIndex(['a', 'b', 'z'], name='A') + expected = DataFrame({'values': Series([3, 7, np.nan], index=exp_idx)}) result = gb.sum() tm.assert_frame_equal(result, expected) @@ -2568,7 +2563,7 @@ def f(x): df = pd.DataFrame({'a': [1, 0, 0, 0]}) c = pd.cut(df.a, [0, 1, 2, 3, 4]) result = df.groupby(c).apply(len) - expected = pd.Series([1, 0, 0, 0], index=c.values.categories) + expected = pd.Series([1, 0, 0, 0], index=pd.CategoricalIndex(c.values.categories)) expected.index.name = 'a' tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 8a01b1ca17373..8fab5151f9c9a 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1429,7 +1429,12 @@ def test_getitem_setitem_float_labels(self): self.assertEqual(len(result), 4) result = df.ix[4:5] - expected = df.reindex([4, 5]) + expected = df.reindex([4, 5]) # reindex with int + assert_frame_equal(result, expected, check_index_type=False) + self.assertEqual(len(result), 2) + + result = df.ix[4:5] + expected = df.reindex([4.0, 5.0]) # reindex with float assert_frame_equal(result, expected) self.assertEqual(len(result), 2) @@ -1978,30 +1983,34 @@ def test_reindex_level(self): from itertools import permutations icol = ['jim', 'joe', 'jolie'] - def verify_first_level(df, level, idx): + def verify_first_level(df, level, idx, check_index_type=True): f = lambda val: np.nonzero(df[level] == val)[0] i = np.concatenate(list(map(f, idx))) left = df.set_index(icol).reindex(idx, level=level) right = df.iloc[i].set_index(icol) - assert_frame_equal(left, right) + assert_frame_equal(left, right, check_index_type=check_index_type) - def verify(df, level, idx, indexer): + def verify(df, level, idx, indexer, check_index_type=True): left = df.set_index(icol).reindex(idx, level=level) right = df.iloc[indexer].set_index(icol) - assert_frame_equal(left, right) + assert_frame_equal(left, right, check_index_type=check_index_type) df = pd.DataFrame({'jim':list('B' * 4 + 'A' * 2 + 'C' * 3), 'joe':list('abcdeabcd')[::-1], 'jolie':[10, 20, 30] * 3, 'joline': np.random.randint(0, 1000, 9)}) - target = [['C', 'B', 'A'], ['F', 'C', 'A', 'D'], ['A'], ['D', 'F'], + target = [['C', 'B', 'A'], ['F', 'C', 'A', 'D'], ['A'], ['A', 'B', 'C'], ['C', 'A', 'B'], ['C', 'B'], ['C', 'A'], - ['A', 'B'], ['B', 'A', 'C'], ['A', 'C', 'B']] + ['A', 'B'], ['B', 'A', 'C']] for idx in target: verify_first_level(df, 'jim', idx) + # reindex by these causes different MultiIndex levels + for idx in [['D', 'F'], ['A', 'C', 'B']]: + verify_first_level(df, 'jim', idx, check_index_type=False) + verify(df, 'joe', list('abcde'), [3, 2, 1, 0, 5, 4, 8, 7, 6]) verify(df, 'joe', list('abcd'), [3, 2, 1, 0, 5, 8, 7, 6]) verify(df, 'joe', list('abc'), [3, 2, 1, 8, 7, 6]) @@ -2009,7 +2018,7 @@ def verify(df, level, idx, indexer): verify(df, 'joe', list('edc'), [0, 1, 4, 5, 6]) verify(df, 'joe', list('eadbc'), [3, 0, 2, 1, 4, 5, 8, 7, 6]) verify(df, 'joe', list('edwq'), [0, 4, 5]) - verify(df, 'joe', list('wq'), []) + verify(df, 'joe', list('wq'), [], check_index_type=False) df = DataFrame({'jim':['mid'] * 5 + ['btm'] * 8 + ['top'] * 7, 'joe':['3rd'] * 2 + ['1st'] * 3 + ['2nd'] * 3 + @@ -10205,10 +10214,10 @@ def test_xs_corner(self): xs = df.xs(0) assert_almost_equal(xs, [1., 'foo', 2., 'bar', 3.]) - # no columns but index + # no columns but Index(dtype=object) df = DataFrame(index=['a', 'b', 'c']) result = df.xs('a') - expected = Series([], name='a') + expected = Series([], name='a', index=pd.Index([], dtype=object)) assert_series_equal(result, expected) def test_xs_duplicates(self): @@ -10394,7 +10403,7 @@ def test_reindex_nan(self): tm.assert_frame_equal(df.reindex(i), df.iloc[j]) df.index = df.index.astype('object') - tm.assert_frame_equal(df.reindex(i), df.iloc[j]) + tm.assert_frame_equal(df.reindex(i), df.iloc[j], check_index_type=False) # GH10388 df = pd.DataFrame({'other':['a', 'b', np.nan, 'c'], @@ -11527,13 +11536,13 @@ def test_apply_empty(self): result = self.empty.apply(x.append, axis=1, reduce=False) assert_frame_equal(result, self.empty) result = self.empty.apply(x.append, axis=1, reduce=True) - assert_series_equal(result, Series([])) + assert_series_equal(result, Series([], index=pd.Index([], dtype=object))) empty_with_cols = DataFrame(columns=['a', 'b', 'c']) result = empty_with_cols.apply(x.append, axis=1, reduce=False) assert_frame_equal(result, empty_with_cols) result = empty_with_cols.apply(x.append, axis=1, reduce=True) - assert_series_equal(result, Series([])) + assert_series_equal(result, Series([], index=pd.Index([], dtype=object))) # Ensure that x.append hasn't been called self.assertEqual(x, []) @@ -11592,7 +11601,7 @@ def test_apply_mixed_dtype_corner(self): result = df[:0].apply(np.mean, axis=1) # the result here is actually kind of ambiguous, should it be a Series # or a DataFrame? - expected = Series(np.nan, index=[]) + expected = Series(np.nan, index=pd.Index([], dtype=int)) assert_series_equal(result, expected) df = DataFrame({'A': ['foo'], @@ -11881,7 +11890,7 @@ def test_filter(self): # regex with ints in column names # from PR #10384 df = DataFrame(0., index=[0, 1, 2], columns=['A1', 1, 'B', 2, 'C']) - expected = DataFrame(0., index=[0, 1, 2], columns=[1, 2]) + expected = DataFrame(0., index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object)) filtered = df.filter(regex='^[0-9]+$') assert_frame_equal(filtered, expected) diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 22a1c0573d45a..7d41ba060717a 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -612,26 +612,26 @@ def test_rename_mi(self): def test_get_numeric_data_preserve_dtype(self): # get the numeric data - o = Series([1,2,3]) + o = Series([1, 2, 3]) result = o._get_numeric_data() self._compare(result, o) - o = Series([1,'2',3.]) + o = Series([1, '2', 3.]) result = o._get_numeric_data() - expected = Series([],dtype=object) + expected = Series([], dtype=object, index=pd.Index([], dtype=object)) self._compare(result, expected) - o = Series([True,False,True]) + o = Series([True, False, True]) result = o._get_numeric_data() self._compare(result, o) - o = Series([True,False,True]) + o = Series([True, False, True]) result = o._get_bool_data() self._compare(result, o) o = Series(date_range('20130101',periods=3)) result = o._get_numeric_data() - expected = Series([],dtype='M8[ns]') + expected = Series([],dtype='M8[ns]', index=pd.Index([], dtype=object)) self._compare(result, expected) def test_nonzero_single_element(self): diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 7e6aaa8213667..cace8aed24107 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -10,6 +10,7 @@ from datetime import datetime, date +import pandas as pd from pandas import (Series, DataFrame, MultiIndex, PeriodIndex, date_range, bdate_range) from pandas.compat import (range, lrange, StringIO, lmap, lzip, u, zip, @@ -1482,6 +1483,14 @@ def test_unsorted_index(self): l = ax.get_lines()[0] rs = l.get_xydata() rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name='y') + tm.assert_series_equal(rs, df.y, check_index_type=False) + tm.close() + + df.index = pd.Index(np.arange(99, -1, -1), dtype=np.float64) + ax = df.plot() + l = ax.get_lines()[0] + rs = l.get_xydata() + rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name='y') tm.assert_series_equal(rs, df.y) @slow diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 46026a4c887a6..ff2dd63b01a98 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -129,7 +129,8 @@ def checkit(dtype): assert_series_equal(transformed, expected) value_grouped = data.groupby(data) - assert_series_equal(value_grouped.aggregate(np.mean), agged) + assert_series_equal(value_grouped.aggregate(np.mean), agged, + check_index_type=False) # complex agg agged = grouped.aggregate([np.mean, np.std]) @@ -390,6 +391,9 @@ def test_grouper_multilevel_freq(self): # Check string level expected = df.reset_index().groupby([pd.Grouper(key='foo', freq='W'), pd.Grouper(key='bar', freq='W')]).sum() + # reset index changes columns dtype to object + expected.columns = pd.Index([0], dtype=int) + result = df.groupby([pd.Grouper(level='foo', freq='W'), pd.Grouper(level='bar', freq='W')]).sum() assert_frame_equal(result, expected) @@ -746,14 +750,18 @@ def test_get_group_grouped_by_tuple(self): def test_agg_apply_corner(self): # nothing to group, all NA grouped = self.ts.groupby(self.ts * np.nan) + self.assertEqual(self.ts.dtype, np.float64) - assert_series_equal(grouped.sum(), Series([])) - assert_series_equal(grouped.agg(np.sum), Series([])) - assert_series_equal(grouped.apply(np.sum), Series([])) + # groupby float64 values results in Float64Index + exp = Series([], dtype=np.float64, index=pd.Index([], dtype=np.float64)) + assert_series_equal(grouped.sum(), exp) + assert_series_equal(grouped.agg(np.sum), exp) + assert_series_equal(grouped.apply(np.sum), exp, check_index_type=False) # DataFrame grouped = self.tsframe.groupby(self.tsframe['A'] * np.nan) - exp_df = DataFrame(columns=self.tsframe.columns, dtype=float) + exp_df = DataFrame(columns=self.tsframe.columns, dtype=float, + index=pd.Index([], dtype=np.float64)) assert_frame_equal(grouped.sum(), exp_df, check_names=False) assert_frame_equal(grouped.agg(np.sum), exp_df, check_names=False) assert_frame_equal(grouped.apply(np.sum), DataFrame({}, dtype=float)) @@ -1831,7 +1839,8 @@ def test_groupby_head_tail(self): assert_frame_equal(df.loc[[0, 2]], g_not_as.head(1)) assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1)) - empty_not_as = DataFrame(columns=df.columns) + empty_not_as = DataFrame(columns=df.columns, index=pd.Index([], + dtype=df.index.dtype)) empty_not_as['A'] = empty_not_as['A'].astype(df.A.dtype) empty_not_as['B'] = empty_not_as['B'].astype(df.B.dtype) assert_frame_equal(empty_not_as, g_not_as.head(0)) @@ -2972,9 +2981,12 @@ def test_groupby_aggregation_mixed_dtype(self): def test_groupby_dtype_inference_empty(self): # GH 6733 df = DataFrame({'x': [], 'range': np.arange(0,dtype='int64')}) + self.assertEqual(df['x'].dtype, np.float64) + result = df.groupby('x').first() - expected = DataFrame({'range' : Series([],index=Index([],name='x'),dtype='int64') }) - assert_frame_equal(result,expected,by_blocks=True) + exp_index = Index([], name='x', dtype=np.float64) + expected = DataFrame({'range' : Series([], index=exp_index, dtype='int64')}) + assert_frame_equal(result,expected, by_blocks=True) def test_groupby_list_infer_array_like(self): result = self.df.groupby(list(self.df['A'])).mean() @@ -3535,33 +3547,27 @@ def test_groupby_sort_categorical(self): ['(2.5, 5]', 4, 50], ['(0, 2.5]', 1, 60], ['(5, 7.5]', 7, 70]], columns=['range', 'foo', 'bar']) - df['range'] = Categorical(df['range'],ordered=True) - index = Index(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', '(7.5, 10]'], dtype='object') - index.name = 'range' - result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar']) - result_sort.index = index - index = Index(['(7.5, 10]', '(2.5, 5]', '(5, 7.5]', '(0, 2.5]'], dtype='object') - index.name = 'range' - result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], index=index, columns=['foo', 'bar']) - result_nosort.index = index + df['range'] = Categorical(df['range'], ordered=True) + index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', '(7.5, 10]'], name='range') + result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]], + columns=['foo', 'bar'], index=index) col = 'range' assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) # when categories is ordered, group is ordered by category's order assert_frame_equal(result_sort, df.groupby(col, sort=False).first()) - df['range'] = Categorical(df['range'],ordered=False) - index = Index(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', '(7.5, 10]'], dtype='object') - index.name = 'range' - result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar']) - result_sort.index = index - index = Index(['(7.5, 10]', '(2.5, 5]', '(5, 7.5]', '(0, 2.5]'], dtype='object') - index.name = 'range' - result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], index=index, columns=['foo', 'bar']) - result_nosort.index = index + df['range'] = Categorical(df['range'], ordered=False) + index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', '(7.5, 10]'], name='range') + result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]], + columns=['foo', 'bar'], index=index) - col = 'range' + index = CategoricalIndex(['(7.5, 10]', '(2.5, 5]', '(5, 7.5]', '(0, 2.5]'], + name='range') + result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], + index=index, columns=['foo', 'bar']) + col = 'range' #### this is an unordered categorical, but we allow this #### assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) assert_frame_equal(result_nosort, df.groupby(col, sort=False).first()) @@ -3644,7 +3650,8 @@ def test_groupby_categorical(self): result = data.groupby(cats).mean() expected = data.groupby(np.asarray(cats)).mean() - expected = expected.reindex(levels) + exp_idx = CategoricalIndex(levels, ordered=True) + expected = expected.reindex(exp_idx) assert_frame_equal(result, expected) @@ -3654,7 +3661,7 @@ def test_groupby_categorical(self): idx = cats.codes.argsort() ord_labels = np.asarray(cats).take(idx) ord_data = data.take(idx) - expected = ord_data.groupby(ord_labels, sort=False).describe() + expected = ord_data.groupby(Categorical(ord_labels), sort=False).describe() expected.index.names = [None, None] assert_frame_equal(desc_result, expected) @@ -4026,8 +4033,12 @@ def test_groupby_levels_and_columns(self): df = pd.DataFrame(np.arange(12).reshape(-1, 3), index=idx) by_levels = df.groupby(level=idx_names).mean() + # reset_index changes columns dtype to object by_columns = df.reset_index().groupby(idx_names).mean() + tm.assert_frame_equal(by_levels, by_columns, check_column_type=False) + + by_columns.columns = pd.Index(by_columns.columns, dtype=np.int64) tm.assert_frame_equal(by_levels, by_columns) def test_gb_apply_list_of_unequal_len_arrays(self): @@ -5443,7 +5454,7 @@ def test_groupby_categorical_two_columns(self): groups_single_key = test.groupby("cat") res = groups_single_key.agg('mean') exp = DataFrame({"ints":[1.5,1.5,np.nan], "val":[20,30,np.nan]}, - index=pd.Index(["a", "b", "c"], name="cat")) + index=pd.CategoricalIndex(["a", "b", "c"], name="cat")) tm.assert_frame_equal(res, exp) # Grouping on two columns diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 36e825924995a..e5b8aee75ad17 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1880,7 +1880,8 @@ def test_ix_general(self): # this is ok df.sortlevel(inplace=True) res = df.ix[key] - index = MultiIndex.from_arrays([[4] * 3, [2012] * 3], + # col has float dtype, result should be Float64Index + index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3], names=['col', 'year']) expected = DataFrame({'amount': [222, 333, 444]}, index=index) tm.assert_frame_equal(res, expected) @@ -2592,26 +2593,28 @@ def test_dups_fancy_indexing(self): result = dfnu.ix[['E']] assert_frame_equal(result, expected) + # ToDo: check_index_type can be True after GH 11497 + # GH 4619; duplicate indexer with missing label df = DataFrame({"A": [0, 1, 2]}) - result = df.ix[[0,8,0]] - expected = DataFrame({"A": [0, np.nan, 0]},index=[0,8,0]) - assert_frame_equal(result,expected) + result = df.ix[[0, 8, 0]] + expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0]) + assert_frame_equal(result, expected, check_index_type=False) df = DataFrame({"A": list('abc')}) result = df.ix[[0,8,0]] - expected = DataFrame({"A": ['a', np.nan, 'a']},index=[0,8,0]) - assert_frame_equal(result,expected) + expected = DataFrame({"A": ['a', np.nan, 'a']}, index=[0, 8, 0]) + assert_frame_equal(result, expected, check_index_type=False) # non unique with non unique selector - df = DataFrame({'test': [5,7,9,11]}, index=['A','A','B','C']) - expected = DataFrame({'test' : [5,7,5,7,np.nan]},index=['A','A','A','A','E']) - result = df.ix[['A','A','E']] + df = DataFrame({'test': [5, 7, 9, 11]}, index=['A', 'A', 'B', 'C']) + expected = DataFrame({'test' : [5, 7, 5, 7, np.nan]}, index=['A', 'A', 'A', 'A', 'E']) + result = df.ix[['A', 'A', 'E']] assert_frame_equal(result, expected) # GH 5835 # dups on index and missing values - df = DataFrame(np.random.randn(5,5),columns=['A','B','B','B','A']) + df = DataFrame(np.random.randn(5, 5), columns=['A', 'B', 'B', 'B', 'A']) expected = pd.concat([df.ix[:,['A','B']],DataFrame(np.nan,columns=['C'],index=df.index)],axis=1) result = df.ix[:,['A','B','C']] @@ -3168,7 +3171,7 @@ def test_iloc_non_unique_indexing(self): expected = DataFrame(new_list) expected = pd.concat([ expected, DataFrame(index=idx[idx>sidx.max()]) ]) result = df2.loc[idx] - assert_frame_equal(result, expected) + assert_frame_equal(result, expected, check_index_type=False) def test_mi_access(self): @@ -3504,7 +3507,7 @@ def test_loc_setitem_datetime(self): df.loc[conv(dt1),'one'] = 100 df.loc[conv(dt2),'one'] = 200 - expected = DataFrame({'one' : [100.0,200.0]},index=[dt1,dt2]) + expected = DataFrame({'one' : [100.0, 200.0]},index=[dt1, dt2]) assert_frame_equal(df, expected) def test_series_partial_set(self): @@ -3512,42 +3515,44 @@ def test_series_partial_set(self): # Regression from GH4825 ser = Series([0.1, 0.2], index=[1, 2]) + # ToDo: check_index_type can be True after GH 11497 + # loc expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) result = ser.loc[[3, 2, 3]] - assert_series_equal(result, expected) + assert_series_equal(result, expected, check_index_type=False) # raises as nothing in in the index self.assertRaises(KeyError, lambda : ser.loc[[3, 3, 3]]) expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) result = ser.loc[[2, 2, 3]] - assert_series_equal(result, expected) + assert_series_equal(result, expected, check_index_type=False) expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) - result = Series([0.1, 0.2, 0.3], index=[1,2,3]).loc[[3,4,4]] - assert_series_equal(result, expected) + result = Series([0.1, 0.2, 0.3], index=[1, 2, 3]).loc[[3, 4, 4]] + assert_series_equal(result, expected, check_index_type=False) expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) - result = Series([0.1, 0.2, 0.3, 0.4], index=[1,2,3,4]).loc[[5,3,3]] - assert_series_equal(result, expected) + result = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]).loc[[5, 3, 3]] + assert_series_equal(result, expected, check_index_type=False) expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) - result = Series([0.1, 0.2, 0.3, 0.4], index=[1,2,3,4]).loc[[5,4,4]] - assert_series_equal(result, expected) + result = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]).loc[[5, 4, 4]] + assert_series_equal(result, expected, check_index_type=False) expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) - result = Series([0.1, 0.2, 0.3, 0.4], index=[4,5,6,7]).loc[[7,2,2]] - assert_series_equal(result, expected) + result = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7]).loc[[7, 2, 2]] + assert_series_equal(result, expected, check_index_type=False) expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) - result = Series([0.1, 0.2, 0.3, 0.4], index=[1,2,3,4]).loc[[4,5,5]] - assert_series_equal(result, expected) + result = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]).loc[[4, 5, 5]] + assert_series_equal(result, expected, check_index_type=False) # iloc - expected = Series([0.2,0.2,0.1,0.1], index=[2,2,1,1]) - result = ser.iloc[[1,1,0,0]] - assert_series_equal(result, expected) + expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1]) + result = ser.iloc[[1, 1, 0, 0]] + assert_series_equal(result, expected, check_index_type=False) def test_partial_set_invalid(self): @@ -3617,7 +3622,7 @@ def f(): # these work as they don't really change # anything but the index # GH5632 - expected = DataFrame(columns=['foo']) + expected = DataFrame(columns=['foo'], index=pd.Index([], dtype=int)) def f(): df = DataFrame() df['foo'] = Series([], dtype='object') @@ -3634,7 +3639,7 @@ def f(): return df assert_frame_equal(f(), expected) - expected = DataFrame(columns=['foo']) + expected = DataFrame(columns=['foo'], index=pd.Index([], dtype=int)) expected['foo'] = expected['foo'].astype('float64') def f(): df = DataFrame() @@ -3654,16 +3659,16 @@ def f(): df = DataFrame() df2 = DataFrame() - df2[1] = Series([1],index=['foo']) - df.loc[:,1] = Series([1],index=['foo']) - assert_frame_equal(df,DataFrame([[1]],index=['foo'],columns=[1])) + df2[1] = Series([1], index=['foo']) + df.loc[:,1] = Series([1], index=['foo']) + assert_frame_equal(df,DataFrame([[1]], index=['foo'], columns=[1])) assert_frame_equal(df,df2) # no index to start - expected = DataFrame({ 0 : Series(1,index=range(4)) },columns=['A','B',0]) + expected = DataFrame({ 0 : Series(1,index=range(4)) }, columns=['A','B',0]) df = DataFrame(columns=['A','B']) - df[0] = Series(1,index=range(4)) + df[0] = Series(1, index=range(4)) df.dtypes str(df) assert_frame_equal(df,expected) @@ -3676,28 +3681,28 @@ def f(): # GH5720, GH5744 # don't create rows when empty - expected = DataFrame(columns=['A','B','New']) + expected = DataFrame(columns=['A', 'B', 'New'], index=pd.Index([], dtype=int)) expected['A'] = expected['A'].astype('int64') expected['B'] = expected['B'].astype('float64') expected['New'] = expected['New'].astype('float64') df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) y = df[df.A > 5] y['New'] = np.nan - assert_frame_equal(y,expected) + assert_frame_equal(y, expected) #assert_frame_equal(y,expected) - expected = DataFrame(columns=['a','b','c c','d']) + expected = DataFrame(columns=['a', 'b', 'c c', 'd']) expected['d'] = expected['d'].astype('int64') df = DataFrame(columns=['a', 'b', 'c c']) df['d'] = 3 - assert_frame_equal(df,expected) + assert_frame_equal(df, expected) assert_series_equal(df['c c'],Series(name='c c',dtype=object)) # reindex columns is ok df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) y = df[df.A > 5] result = y.reindex(columns=['A','B','C']) - expected = DataFrame(columns=['A','B','C']) + expected = DataFrame(columns=['A','B','C'], index=pd.Index([], dtype=int)) expected['A'] = expected['A'].astype('int64') expected['B'] = expected['B'].astype('float64') expected['C'] = expected['C'].astype('float64') @@ -4140,7 +4145,13 @@ def test_floating_index(self): # fancy floats/integers create the correct entry (as nan) # fancy tests expected = Series([2, 0], index=Float64Index([5.0, 0.0])) - for fancy_idx in [[5.0, 0.0], [5, 0], np.array([5.0, 0.0]), np.array([5, 0])]: + for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float + assert_series_equal(s[fancy_idx], expected) + assert_series_equal(s.loc[fancy_idx], expected) + assert_series_equal(s.ix[fancy_idx], expected) + + expected = Series([2, 0], index=Index([5, 0], dtype=int)) + for fancy_idx in [[5, 0], np.array([5, 0])]: #int assert_series_equal(s[fancy_idx], expected) assert_series_equal(s.loc[fancy_idx], expected) assert_series_equal(s.ix[fancy_idx], expected) @@ -4778,10 +4789,12 @@ def test_loc_listlike(self): expected = self.df.iloc[[4,0,1,5]] assert_frame_equal(result, expected) + # ToDo: check_index_type can be True after GH XXX + result = self.df2.loc[['a','b','e']] - expected = DataFrame({'A' : [0,1,5,2,3,np.nan], - 'B' : Series(list('aaabbe')).astype('category',categories=list('cabe')) }).set_index('B') - assert_frame_equal(result, expected) + exp_index = pd.CategoricalIndex(list('aaabbe'), categories=list('cabe'), name='B') + expected = DataFrame({'A' : [0,1,5,2,3,np.nan]}, index=exp_index) + assert_frame_equal(result, expected, check_index_type=False) # element in the categories but not in the values self.assertRaises(KeyError, lambda : self.df2.loc['e']) @@ -4790,15 +4803,15 @@ def test_loc_listlike(self): df = self.df2.copy() df.loc['e'] = 20 result = df.loc[['a','b','e']] - expected = DataFrame({'A' : [0,1,5,2,3,20], - 'B' : Series(list('aaabbe')).astype('category',categories=list('cabe')) }).set_index('B') + exp_index = pd.CategoricalIndex(list('aaabbe'), categories=list('cabe'), name='B') + expected = DataFrame({'A' : [0,1,5,2,3,20]}, index=exp_index) assert_frame_equal(result, expected) df = self.df2.copy() result = df.loc[['a','b','e']] expected = DataFrame({'A' : [0,1,5,2,3,np.nan], 'B' : Series(list('aaabbe')).astype('category',categories=list('cabe')) }).set_index('B') - assert_frame_equal(result, expected) + assert_frame_equal(result, expected, check_index_type=False) # not all labels in the categories diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 1c8cbac60e7c7..2720435a20c01 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -471,11 +471,20 @@ def f(): # only 1 left, del, add, del s = Series(1) del s[0] - assert_series_equal(s, Series(dtype='int64')) + assert_series_equal(s, Series(dtype='int64', index=Index([], dtype='int64'))) s[0] = 1 assert_series_equal(s, Series(1)) del s[0] - assert_series_equal(s, Series(dtype='int64')) + assert_series_equal(s, Series(dtype='int64', index=Index([], dtype='int64'))) + + # Index(dtype=object) + s = Series(1, index=['a']) + del s['a'] + assert_series_equal(s, Series(dtype='int64', index=Index([], dtype='object'))) + s['a'] = 1 + assert_series_equal(s, Series(1, index=['a'])) + del s['a'] + assert_series_equal(s, Series(dtype='int64', index=Index([], dtype='object'))) def test_getitem_preserve_name(self): result = self.ts[self.ts > 0] @@ -755,7 +764,7 @@ def test_constructor(self): def test_constructor_empty(self): empty = Series() empty2 = Series([]) - assert_series_equal(empty, empty2) + assert_series_equal(empty, empty2, check_index_type=False) empty = Series(index=lrange(10)) empty2 = Series(np.nan, index=lrange(10)) @@ -1448,7 +1457,7 @@ def test_getitem_boolean_empty(self): assert_series_equal(result, expected) s = Series(['A', 'B']) - expected = Series(dtype=object) + expected = Series(dtype=object, index=Index([], dtype=int)) result = s[Series([], dtype=object)] assert_series_equal(result, expected) @@ -3031,7 +3040,7 @@ def test_quantile_multi(self): assert_series_equal(result, expected) result = self.ts.quantile([]) - expected = pd.Series([], name=self.ts.name) + expected = pd.Series([], name=self.ts.name, index=Index([], dtype=float)) assert_series_equal(result, expected) def test_append(self): @@ -4751,12 +4760,23 @@ def f(x): # compress # GH 6658 - s = Series([0,1.,-1],index=list('abc')) - result = np.compress(s>0,s) - assert_series_equal(result, Series([1.],index=['b'])) + s = Series([0, 1., -1], index=list('abc')) + result = np.compress(s > 0, s) + assert_series_equal(result, Series([1.], index=['b'])) + + result = np.compress(s < -1, s) + # result empty Index(dtype=object) as the same as original + exp = Series([], dtype='float64', index=Index([], dtype='object')) + assert_series_equal(result, exp) - result = np.compress(s<-1,s) - assert_series_equal(result, Series([],dtype='float64')) + s = Series([0, 1., -1], index=[.1, .2, .3]) + result = np.compress(s > 0, s) + assert_series_equal(result, Series([1.], index=[.2])) + + result = np.compress(s < -1, s) + # result empty Float64Index as the same as original + exp = Series([], dtype='float64', index=Index([], dtype='float64')) + assert_series_equal(result, exp) def test_complexx(self): @@ -7031,7 +7051,8 @@ def test_reindex_nan(self): assert_series_equal(ts.reindex(i), ts.iloc[j]) ts.index = ts.index.astype('object') - assert_series_equal(ts.reindex(i), ts.iloc[j]) + # reindex coerces index.dtype to float, loc/iloc doesn't + assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False) def test_reindex_corner(self): # (don't forget to fix this) I think it's fixed diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 722ce439722c9..9399f537191e7 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -304,6 +304,7 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer): def _get_join_info(self): left_ax = self.left._data.axes[self.axis] right_ax = self.right._data.axes[self.axis] + if self.left_index and self.right_index: join_index, left_indexer, right_indexer = \ left_ax.join(right_ax, how=self.how, return_indexers=True) @@ -321,7 +322,6 @@ def _get_join_info(self): right_indexer) = _get_join_indexers(self.left_join_keys, self.right_join_keys, sort=self.sort, how=self.how) - if self.right_index: if len(self.left) > 0: join_index = self.left.index.take(left_indexer) @@ -337,6 +337,8 @@ def _get_join_info(self): else: join_index = Index(np.arange(len(left_indexer))) + if len(join_index) == 0: + join_index = join_index.astype(object) return join_index, left_indexer, right_indexer def _get_merge_data(self): diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index 0f920fc5aa5bc..6db2d2e15f699 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -762,6 +762,7 @@ def test_merge_left_empty_right_empty(self): right = pd.DataFrame([], columns=['x', 'y', 'z']) exp_in = pd.DataFrame([], columns=['a', 'b', 'c', 'x', 'y', 'z'], + index=pd.Index([], dtype=object), dtype=object) for kwarg in [dict(left_index=True, right_index=True), @@ -792,6 +793,8 @@ def test_merge_left_empty_right_notempty(self): 'z': [3, 6, 9]}, columns=['a', 'b', 'c', 'x', 'y', 'z']) exp_in = exp_out[0:0] # make empty DataFrame keeping dtype + # result will have object dtype + exp_in.index = exp_in.index.astype(object) for kwarg in [dict(left_index=True, right_index=True), dict(left_index=True, right_on='x'), @@ -822,6 +825,8 @@ def test_merge_left_notempty_right_empty(self): 'z': np.array([np.nan]*3, dtype=object)}, columns=['a', 'b', 'c', 'x', 'y', 'z']) exp_in = exp_out[0:0] # make empty DataFrame keeping dtype + # result will have object dtype + exp_in.index = exp_in.index.astype(object) for kwarg in [dict(left_index=True, right_index=True), dict(left_index=True, right_on='x'), diff --git a/pandas/util/testing.py b/pandas/util/testing.py index be8b0df73593f..a50700813d2ea 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -628,7 +628,12 @@ def _check_types(l, r, obj='Index'): msg = '{0} classes are different'.format(obj) raise_assert_detail(obj, msg, l, r) assert_attr_equal('dtype', l, r, obj=obj) - assert_attr_equal('inferred_type', l, r, obj=obj) + + # allow string-like to have different inferred_types + if l.inferred_type in ('string', 'unicode'): + assertIn(r.inferred_type, ('string', 'unicode')) + else: + assert_attr_equal('inferred_type', l, r, obj=obj) def _get_ilevel_values(index, level): # accept level number only @@ -865,8 +870,8 @@ def assert_numpy_array_equal(left, right, # This could be refactored to use the NDFrame.equals method def assert_series_equal(left, right, check_dtype=True, - check_index_type=False, - check_series_type=False, + check_index_type=True, + check_series_type=True, check_less_precise=False, check_names=True, check_exact=False, @@ -947,9 +952,9 @@ def assert_series_equal(left, right, check_dtype=True, # This could be refactored to use the NDFrame.equals method def assert_frame_equal(left, right, check_dtype=True, - check_index_type=False, - check_column_type=False, - check_frame_type=False, + check_index_type=True, + check_column_type=True, + check_frame_type=True, check_less_precise=False, check_names=True, by_blocks=False,