From c2cf2697cdeb27322ef38aa3ed66c54485df9142 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Tue, 6 Nov 2018 23:00:58 -0500 Subject: [PATCH 01/53] ENH - first pass at modifying set operations on indexes. Dont ignore empty indexes, and allow more cross index operaions --- pandas/core/indexes/base.py | 8 +++----- pandas/core/indexes/interval.py | 9 ++++++++- pandas/core/indexes/period.py | 7 +++---- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 51c84d6e28cb4..89ee9f3b2d9ff 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2762,12 +2762,9 @@ def union(self, other): self._assert_can_do_setop(other) other = ensure_index(other) - if len(other) == 0 or self.equals(other): + if self.equals(other): return self._get_consensus_name(other) - if len(self) == 0: - return other._get_consensus_name(self) - # TODO: is_dtype_union_equal is a hack around # 1. buggy set ops with duplicates (GH #13432) # 2. CategoricalIndex lacking setops (GH #10186) @@ -2775,7 +2772,8 @@ def union(self, other): if not is_dtype_union_equal(self.dtype, other.dtype): this = self.astype('O') other = other.astype('O') - return this.union(other) + # force object dtype, for empty index cases + return this.union(other).astype('O') # TODO(EA): setops-refactor, clean all this up if is_period_dtype(self) or is_datetime64tz_dtype(self): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 25d4dd0cbcc81..0598124853082 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1030,7 +1030,14 @@ def equals(self, other): def _setop(op_name): def func(self, other): - other = self._as_like_interval_index(other) + try: + other = self._as_like_interval_index(other) + except (TypeError, ValueError): + # Currently this will cause difference operations to return + # object dtype as opposed to IntervalIndex, unlike other Index + # objects that return the same type when using `difference` on + # mismatched types + return getattr(self.astype('O'), op_name)(other) # GH 19016: ensure set op will not return a prohibited dtype subtypes = [self.dtype.subtype, other.dtype.subtype] diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index f452a57e82725..b0a5f85e3d109 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -660,10 +660,9 @@ def join(self, other, how='left', level=None, return_indexers=False, def _assert_can_do_setop(self, other): super(PeriodIndex, self)._assert_can_do_setop(other) - if not isinstance(other, PeriodIndex): - raise ValueError('can only call with other PeriodIndex-ed objects') - - if self.freq != other.freq: + # *Can't* use PeriodIndexes of different freqs + # *Can* use PeriodIndex/DatetimeIndex + if isinstance(other, PeriodIndex) and self.freq != other.freq: msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) From 4922fd3fdf04e3eca301140da0bade774ab9da6d Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Thu, 8 Nov 2018 00:20:58 -0500 Subject: [PATCH 02/53] BUG - account for empty index + non-monotonic index, and dont try to coerce mismatched closings on interval indexes --- pandas/core/indexes/base.py | 9 +++++++++ pandas/core/indexes/interval.py | 5 ++++- pandas/core/indexes/period.py | 5 +++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 654805df46587..499884042f8e6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2783,6 +2783,15 @@ def union(self, other): if self.equals(other): return self._get_reconciled_name_object(other) + # Don't allow empty index to move on. If `other` is not monotonic-incr + # ...will fail + if is_dtype_equal(self.dtype, other.dtype): + if len(self) == 0: + return other._get_reconciled_name_object(self) + elif len(other) == 0: + return self._get_reconciled_name_object(other) + + # TODO: is_dtype_union_equal is a hack around # 1. buggy set ops with duplicates (GH #13432) # 2. CategoricalIndex lacking setops (GH #10186) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 4cc198ba03602..f7917e004ca0d 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1040,7 +1040,9 @@ def _setop(op_name): def func(self, other): try: other = self._as_like_interval_index(other) - except (TypeError, ValueError): + # allow ValueError from this method to raise to catch mixed closed + # except only Non-Interval index mismatches. + except TypeError: # Currently this will cause difference operations to return # object dtype as opposed to IntervalIndex, unlike other Index # objects that return the same type when using `difference` on @@ -1066,6 +1068,7 @@ def func(self, other): return type(self).from_tuples(result, closed=self.closed, name=result_name) + return func union = _setop('union') diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index a50c4a93372d9..8feab89871590 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -835,6 +835,11 @@ def join(self, other, how='left', level=None, return_indexers=False, """ self._assert_can_do_setop(other) + if not isinstance(other, PeriodIndex): + return self.astype('O').join(other, how=how, level=level, + return_indexers=return_indexers, + sort=sort) + result = Int64Index.join(self, other, how=how, level=level, return_indexers=return_indexers, sort=sort) From 5e528a1972698e37580adf999dc6480f7174fd95 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Thu, 8 Nov 2018 00:23:10 -0500 Subject: [PATCH 03/53] TST - update existing tests to account for cross type index joins being cast to dtype object --- pandas/tests/indexes/common.py | 24 ++++--------------- .../tests/indexes/datetimes/test_datetime.py | 7 +++--- pandas/tests/indexes/datetimes/test_setops.py | 4 ++-- .../tests/indexes/interval/test_interval.py | 11 +++++---- pandas/tests/indexes/period/test_setops.py | 6 ++--- pandas/tests/reshape/test_concat.py | 6 +++-- pandas/tests/series/test_combine_concat.py | 1 + pandas/tests/series/test_missing.py | 14 +++++++---- 8 files changed, 33 insertions(+), 40 deletions(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index c5cbaea23df76..aae8f1dbfa30e 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -626,11 +626,7 @@ def test_intersection_base(self): cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.intersection(case) - elif isinstance(idx, CategoricalIndex): + if isinstance(idx, CategoricalIndex): pass else: result = first.intersection(case) @@ -653,11 +649,7 @@ def test_union_base(self): cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.union(case) - elif isinstance(idx, CategoricalIndex): + if isinstance(idx, CategoricalIndex): pass else: result = first.union(case) @@ -684,11 +676,7 @@ def test_difference_base(self): cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.difference(case) - elif isinstance(idx, CategoricalIndex): + if isinstance(idx, CategoricalIndex): pass elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): assert result.__class__ == answer.__class__ @@ -718,11 +706,7 @@ def test_symmetric_difference(self): cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assert_raises_regex(ValueError, msg): - result = first.symmetric_difference(case) - elif isinstance(idx, CategoricalIndex): + if isinstance(idx, CategoricalIndex): pass else: result = first.symmetric_difference(case) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index cea56bf803083..b2c9ee42bf064 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -303,10 +303,9 @@ def test_join_with_period_index(self, join_type): c_idx_type='p', r_idx_type='dt') s = df.iloc[:5, 0] - with tm.assert_raises_regex(ValueError, - 'can only call with other ' - 'PeriodIndex-ed objects'): - df.columns.join(s.index, how=join_type) + expected = df.columns.astype('O').join(s.index, how=join_type) + result = df.columns.join(s.index, how=join_type) + tm.assert_index_equal(expected, result) def test_factorize(self): idx1 = DatetimeIndex(['2014-01', '2014-01', '2014-02', '2014-02', diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index d72bf275463ac..eb9fc12579b6c 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -256,11 +256,11 @@ def test_datetimeindex_union_join_empty(self): empty = Index([]) result = dti.union(empty) - assert isinstance(result, DatetimeIndex) - assert result is result + tm.assert_index_equal(result, dti.astype('O')) result = dti.join(empty) assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, dti) def test_join_nonunique(self): idx1 = to_datetime(['2012-11-06 16:00:11.477563', diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index ac0446373a6a1..d2f350018943c 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -835,15 +835,16 @@ def test_symmetric_difference(self, closed): @pytest.mark.parametrize('op_name', [ 'union', 'intersection', 'difference', 'symmetric_difference']) - def test_set_operation_errors(self, closed, op_name): + def test_set_incompatible_types(self, closed, op_name): index = self.create_index(closed=closed) set_op = getattr(index, op_name) # non-IntervalIndex - msg = ('the other index needs to be an IntervalIndex too, but ' - 'was type Int64Index') - with tm.assert_raises_regex(TypeError, msg): - set_op(Index([1, 2, 3])) + expected = getattr(index.astype('O'), op_name)(Index([1, 2, 3])) + result = set_op(Index([1, 2, 3])) + tm.assert_index_equal(result, expected) + + # Come back to mixed interval types # mixed closed msg = ('can only do set operations between two IntervalIndex objects ' diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index c4dd23b1708db..db1c306476f65 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -105,9 +105,9 @@ def test_union_misc(self): with pytest.raises(period.IncompatibleFrequency): index.union(index2) - msg = 'can only call with other PeriodIndex-ed objects' - with tm.assert_raises_regex(ValueError, msg): - index.join(index.to_timestamp()) + # msg = 'can only call with other PeriodIndex-ed objects' + # with tm.assert_raises_regex(ValueError, msg): + # index.join(index.to_timestamp()) index3 = period_range('1/1/2000', '1/20/2000', freq='2D') with pytest.raises(period.IncompatibleFrequency): diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 673658c29fe75..703be970f1676 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2051,7 +2051,8 @@ def test_concat_empty_series(self): s1 = pd.Series([1, 2, 3], name='x') s2 = pd.Series(name='y') res = pd.concat([s1, s2], axis=1) - exp = pd.DataFrame({'x': [1, 2, 3], 'y': [np.nan, np.nan, np.nan]}) + exp = pd.DataFrame({'x': [1, 2, 3], 'y': [np.nan, np.nan, np.nan]}, + index=pd.Index([0, 1, 2], dtype='O')) tm.assert_frame_equal(res, exp) s1 = pd.Series([1, 2, 3], name='x') @@ -2066,7 +2067,8 @@ def test_concat_empty_series(self): s2 = pd.Series(name=None) res = pd.concat([s1, s2], axis=1) exp = pd.DataFrame({'x': [1, 2, 3], 0: [np.nan, np.nan, np.nan]}, - columns=['x', 0]) + columns=['x', 0], + index=pd.Index([0, 1, 2], dtype='O')) tm.assert_frame_equal(res, exp) @pytest.mark.parametrize('tz', [None, 'UTC']) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index a685eb7e9fbd3..2f35572ca8347 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -102,6 +102,7 @@ def test_combine_first(self): # corner case s = Series([1., 2, 3], index=[0, 1, 2]) result = s.combine_first(Series([], index=[])) + s.index = s.index.astype('O') assert_series_equal(s, result) def test_update(self): diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index c38b7c0083a21..a2578ac1bc2d8 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -867,7 +867,7 @@ def test_interpolate_pchip(self): # interpolate at new_index new_index = ser.index.union(Index([49.25, 49.5, 49.75, 50.25, 50.5, - 50.75])) + 50.75])).astype(float) interp_s = ser.reindex(new_index).interpolate(method='pchip') # does not blow up, GH5977 interp_s[49:51] @@ -883,7 +883,9 @@ def test_interpolate_akima(self): index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0])) # interpolate at new_index - new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])) + new_index = ser.index.union( + Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75]) + ).astype(float) interp_s = ser.reindex(new_index).interpolate(method='akima') assert_series_equal(interp_s[1:3], expected) @@ -896,7 +898,9 @@ def test_interpolate_piecewise_polynomial(self): index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0])) # interpolate at new_index - new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])) + new_index = ser.index.union( + Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75]) + ).astype(float) interp_s = ser.reindex(new_index).interpolate( method='piecewise_polynomial') assert_series_equal(interp_s[1:3], expected) @@ -910,7 +914,9 @@ def test_interpolate_from_derivatives(self): index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0])) # interpolate at new_index - new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])) + new_index = ser.index.union( + Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75]) + ).astype(float) interp_s = ser.reindex(new_index).interpolate( method='from_derivatives') assert_series_equal(interp_s[1:3], expected) From cdaa5b0fb56a4bcee2803f96029841b080858386 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Thu, 8 Nov 2018 23:07:46 -0500 Subject: [PATCH 04/53] ENH - incompatibility checks and incompatible type unions --- pandas/core/indexes/base.py | 38 ++++++++++++++++++++----------- pandas/core/indexes/category.py | 8 +++++++ pandas/core/indexes/datetimes.py | 14 ++++++++++++ pandas/core/indexes/numeric.py | 11 +++++++++ pandas/core/indexes/range.py | 11 +++++++++ pandas/core/indexes/timedeltas.py | 3 +++ 6 files changed, 72 insertions(+), 13 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 499884042f8e6..40946313a3122 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2756,6 +2756,15 @@ def _get_reconciled_name_object(self, other): return self._shallow_copy(name=name) return self + def _union_inconsistent_dtypes(self, other): + this = self.astype('O') + other = Index(other).astype('O') + return Index.union(this, other).astype('O') + + def _is_inconsistent(self, other): + return (type(self) is not type(other) + or not is_dtype_equal(self.dtype, other.dtype)) + def union(self, other): """ Form the union of two Index objects and sorts if possible. @@ -2780,27 +2789,30 @@ def union(self, other): self._assert_can_do_setop(other) other = ensure_index(other) - if self.equals(other): + if self._is_inconsistent(other): + return self._union_inconsistent_dtypes(other) + + # if is_dtype_equal(self.dtype, other.dtype): + if len(self) == 0: + return other._get_reconciled_name_object(self) + elif len(other) == 0: return self._get_reconciled_name_object(other) - # Don't allow empty index to move on. If `other` is not monotonic-incr - # ...will fail - if is_dtype_equal(self.dtype, other.dtype): - if len(self) == 0: - return other._get_reconciled_name_object(self) - elif len(other) == 0: - return self._get_reconciled_name_object(other) + if self.equals(other): + return self._get_reconciled_name_object(other) # TODO: is_dtype_union_equal is a hack around # 1. buggy set ops with duplicates (GH #13432) # 2. CategoricalIndex lacking setops (GH #10186) # Once those are fixed, this workaround can be removed - if not is_dtype_union_equal(self.dtype, other.dtype): - this = self.astype('O') - other = other.astype('O') - # force object dtype, for empty index cases - return this.union(other).astype('O') + + # remove? this might be too lenient + # if not is_dtype_union_equal(self.dtype, other.dtype): + # this = self.astype('O') + # other = other.astype('O') + # # force object dtype, for empty index cases + # return this.union(other).astype('O') # TODO(EA): setops-refactor, clean all this up if is_period_dtype(self) or is_datetime64tz_dtype(self): diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 6e2f0b00fcd6e..1a5b7a49da96e 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -872,6 +872,14 @@ def _delegate_method(self, name, *args, **kwargs): return res return CategoricalIndex(res, name=self.name) + def _is_inconsistent(self, other): + if type(self) is not type(other): + return True + elif type(self.dtype) is not type(other.dtype): + return True + else: + return False + CategoricalIndex._add_numeric_methods_add_sub_disabled() CategoricalIndex._add_numeric_methods_disabled() diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 3a2f9986760d3..a4a55021d11c0 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -578,6 +578,17 @@ def unique(self, level=None): result = super(DatetimeIndex, naive).unique(level=level) return self._shallow_copy(result.values) + def _is_inconsistent(self, other): + is_inconsistent = super(DatetimeIndex, self)._is_inconsistent(other) + if is_inconsistent: + if hasattr(other, 'dtype'): + # If same base, consider consistent, let UTC logic takeover + return self.dtype.base != other.dtype.base + else: + return True + else: + return is_inconsistent + def union(self, other): """ Specialized union for DatetimeIndex objects. If combine @@ -594,6 +605,9 @@ def union(self, other): """ self._assert_can_do_setop(other) + if self._is_inconsistent(other): + return self._union_inconsistent_dtypes(other) + if len(other) == 0 or self.equals(other) or len(self) == 0: return super(DatetimeIndex, self).union(other) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 795ffeefa1794..da5eccecf6513 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -228,6 +228,17 @@ def _assert_safe_casting(cls, data, subarr): raise TypeError('Unsafe NumPy casting, you must ' 'explicitly cast') + def _is_inconsistent(self, other): + from pandas.core.indexes.range import RangeIndex + is_inconsistent = super(Int64Index, self)._is_inconsistent(other) + if is_inconsistent: + if type(self) is Int64Index and isinstance(other, RangeIndex): + return False + else: + return True + else: + return is_inconsistent + Int64Index._add_numeric_methods() Int64Index._add_logical_methods() diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index d1b5645928921..fd2ed94a1829a 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -416,6 +416,13 @@ def _extended_gcd(self, a, b): old_t, t = t, old_t - quotient * t return old_r, old_s, old_t + def _is_inconsistent(self, other): + is_inconsistent = super(RangeIndex, self)._is_inconsistent(other) + if is_inconsistent: + return not isinstance(other, Int64Index) + else: + return is_inconsistent + def union(self, other): """ Form the union of two Index objects and sorts if possible @@ -429,6 +436,10 @@ def union(self, other): union : Index """ self._assert_can_do_setop(other) + + if self._is_inconsistent(other): + return self._union_inconsistent_dtypes(other) + if len(other) == 0 or self.equals(other) or len(self) == 0: return super(RangeIndex, self).union(other) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 5b077a6984114..f34ea714c0600 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -283,6 +283,9 @@ def union(self, other): """ self._assert_can_do_setop(other) + if self._is_inconsistent(other): + return self._union_inconsistent_dtypes(other) + if len(other) == 0 or self.equals(other) or len(self) == 0: return super(TimedeltaIndex, self).union(other) From 40d57ec1e04c9ee21efffe660acb2fb3d1b2bf5b Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Thu, 8 Nov 2018 23:08:58 -0500 Subject: [PATCH 05/53] TST - update datetime union tets, add tests for inconsistent unions --- pandas/tests/indexes/datetimes/test_setops.py | 3 +- pandas/tests/indexes/test_setops.py | 65 +++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 pandas/tests/indexes/test_setops.py diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index eb9fc12579b6c..aa55a0a5f8668 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -29,10 +29,11 @@ def test_union2(self): assert tm.equalContents(union, everything) # GH 10149 + expected = first.astype('O').union(pd.Index(second.values, dtype='O')).astype('O') cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: result = first.union(case) - assert tm.equalContents(result, everything) + assert tm.equalContents(result, expected) @pytest.mark.parametrize("tz", tz) def test_union(self, tz): diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py new file mode 100644 index 0000000000000..45657dca22faf --- /dev/null +++ b/pandas/tests/indexes/test_setops.py @@ -0,0 +1,65 @@ +''' +The tests in this package are to ensure the proper resultant dtypes of +set operations. +''' +import itertools as it +import numpy as np +import pytest + +import pandas as pd +import pandas.util.testing as tm +from pandas.core.dtypes.dtypes import PeriodDtype, CategoricalDtype, \ + IntervalDtype + + +def makeEmptyIndex(_=None): + return pd.Index([]) + + +INDEXES = dict( + unicodeIndex=(tm.makeUnicodeIndex, np.dtype('O')), + strIndex=(tm.makeStringIndex, np.dtype('O')), + dateIndex=(tm.makeDateIndex, np.dtype(' Date: Sat, 10 Nov 2018 19:40:53 -0500 Subject: [PATCH 06/53] CLN - refactor union -> _union --- pandas/core/indexes/base.py | 4 +++ pandas/core/indexes/datetimes.py | 6 ++--- pandas/core/indexes/range.py | 7 +++--- pandas/core/indexes/timedeltas.py | 6 ++--- pandas/tests/indexes/test_setops.py | 38 +++++++++++++---------------- 5 files changed, 28 insertions(+), 33 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 40946313a3122..1fd4ab3733406 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2792,6 +2792,10 @@ def union(self, other): if self._is_inconsistent(other): return self._union_inconsistent_dtypes(other) + return self._union(other) + + def _union(self, other): + # if is_dtype_equal(self.dtype, other.dtype): if len(self) == 0: return other._get_reconciled_name_object(self) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index a4a55021d11c0..e854736f6c974 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -603,11 +603,9 @@ def union(self, other): ------- y : Index or DatetimeIndex """ - self._assert_can_do_setop(other) - - if self._is_inconsistent(other): - return self._union_inconsistent_dtypes(other) + return super(DatetimeIndex, self).union(other) + def _union(self, other): if len(other) == 0 or self.equals(other) or len(self) == 0: return super(DatetimeIndex, self).union(other) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index fd2ed94a1829a..9c76ee78ec99e 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -419,7 +419,7 @@ def _extended_gcd(self, a, b): def _is_inconsistent(self, other): is_inconsistent = super(RangeIndex, self)._is_inconsistent(other) if is_inconsistent: - return not isinstance(other, Int64Index) + return not type(other) is Int64Index else: return is_inconsistent @@ -435,11 +435,10 @@ def union(self, other): ------- union : Index """ - self._assert_can_do_setop(other) + return super(RangeIndex, self).union(other) - if self._is_inconsistent(other): - return self._union_inconsistent_dtypes(other) + def _union(self, other): if len(other) == 0 or self.equals(other) or len(self) == 0: return super(RangeIndex, self).union(other) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index f34ea714c0600..69f9f41fac1de 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -281,11 +281,9 @@ def union(self, other): ------- y : Index or TimedeltaIndex """ - self._assert_can_do_setop(other) - - if self._is_inconsistent(other): - return self._union_inconsistent_dtypes(other) + return super(TimedeltaIndex, self).union(other) + def _union(self, other): if len(other) == 0 or self.equals(other) or len(self) == 0: return super(TimedeltaIndex, self).union(other) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 45657dca22faf..cd89947dedb70 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -17,18 +17,18 @@ def makeEmptyIndex(_=None): INDEXES = dict( - unicodeIndex=(tm.makeUnicodeIndex, np.dtype('O')), - strIndex=(tm.makeStringIndex, np.dtype('O')), - dateIndex=(tm.makeDateIndex, np.dtype(' Date: Sat, 10 Nov 2018 22:13:19 -0500 Subject: [PATCH 07/53] TST - add tests for categrorical index, and compatible inconsistent pairs --- pandas/tests/indexes/test_setops.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index cd89947dedb70..a72823b2c5194 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -49,7 +49,7 @@ def test_union_same_types(idxType): @pytest.mark.parametrize('idxType1,idxType2', - list(it.combinations([x for x in INDEXES if x != 'catIndex'], 2)) + list(it.combinations(INDEXES, 2)) ) def test_union_different_types(idxType1, idxType2): if tuple(sorted([idxType1, idxType2])) in COMPATIBLE_INCONSISTENT_PAIRS: @@ -57,5 +57,27 @@ def test_union_different_types(idxType1, idxType2): idx1 = INDEXES[idxType1](10) idx2 = INDEXES[idxType2](20) + + # A union with a CategoricalIndex (even as dtype('O')) and a + # non-CategoricalIndex can only be made if both indices are monotonic. + # This is true before this PR as well. + if idxType1 == 'catIndex' or idxType2 == 'catIndex': + idx1 = idx1.sort_values() + idx2 = idx2.sort_values() + assert idx1.union(idx2).dtype == np.dtype('O') assert idx2.union(idx1).dtype == np.dtype('O') + + +@pytest.mark.parametrize('idxType1,idxType2', + COMPATIBLE_INCONSISTENT_PAIRS +) +def test_compatible_inconsistent_pairs(idxType1, idxType2): + idx1 = INDEXES[idxType1](10) + idx2 = INDEXES[idxType2](20) + + res1 = idx1.union(idx2) + res2 = idx2.union(idx1) + + assert res1.dtype in (idx1.dtype, idx2.dtype) + assert res2.dtype in (idx1.dtype, idx2.dtype) From 8364c2e4ff241a799190a73ff1a557e208269e34 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Sun, 11 Nov 2018 00:33:14 -0500 Subject: [PATCH 08/53] BUG - union -> _union in overriden _union methods --- pandas/core/indexes/base.py | 3 ++- pandas/core/indexes/datetimes.py | 4 ++-- pandas/core/indexes/range.py | 4 ++-- pandas/core/indexes/timedeltas.py | 4 ++-- pandas/tests/indexes/test_base.py | 5 +++++ 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1fd4ab3733406..8080157f7ddd5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2758,6 +2758,7 @@ def _get_reconciled_name_object(self, other): def _union_inconsistent_dtypes(self, other): this = self.astype('O') + # call Index for when `other` is list-like other = Index(other).astype('O') return Index.union(this, other).astype('O') @@ -2787,11 +2788,11 @@ def union(self, other): """ self._assert_can_do_setop(other) - other = ensure_index(other) if self._is_inconsistent(other): return self._union_inconsistent_dtypes(other) + other = ensure_index(other) return self._union(other) def _union(self, other): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e854736f6c974..3d0f3ec24a242 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -607,7 +607,7 @@ def union(self, other): def _union(self, other): if len(other) == 0 or self.equals(other) or len(self) == 0: - return super(DatetimeIndex, self).union(other) + return super(DatetimeIndex, self)._union(other) if not isinstance(other, DatetimeIndex): try: @@ -620,7 +620,7 @@ def _union(self, other): if this._can_fast_union(other): return this._fast_union(other) else: - result = Index.union(this, other) + result = Index._union(this, other) if isinstance(result, DatetimeIndex): result._tz = timezones.tz_standardize(this.tz) if (result.freq is None and diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 9c76ee78ec99e..d2f51ad074518 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -440,7 +440,7 @@ def union(self, other): def _union(self, other): if len(other) == 0 or self.equals(other) or len(self) == 0: - return super(RangeIndex, self).union(other) + return super(RangeIndex, self)._union(other) if isinstance(other, RangeIndex): start_s, step_s = self._start, self._step @@ -479,7 +479,7 @@ def _union(self, other): (end_s - step_o <= end_o)): return RangeIndex(start_r, end_r + step_o, step_o) - return self._int64index.union(other) + return self._int64index._union(other) @Appender(_index_shared_docs['join']) def join(self, other, how='left', level=None, return_indexers=False, diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 69f9f41fac1de..fccd124e5b92d 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -285,7 +285,7 @@ def union(self, other): def _union(self, other): if len(other) == 0 or self.equals(other) or len(self) == 0: - return super(TimedeltaIndex, self).union(other) + return super(TimedeltaIndex, self)._union(other) if not isinstance(other, TimedeltaIndex): try: @@ -297,7 +297,7 @@ def _union(self, other): if this._can_fast_union(other): return this._fast_union(other) else: - result = Index.union(this, other) + result = Index._union(this, other) if isinstance(result, TimedeltaIndex): if result.freq is None: result.freq = to_offset(result.inferred_freq) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 724dffc49dd3b..b7ac785ba224f 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -876,7 +876,12 @@ def test_union_identity(self): union = first.union(first) assert union is first + # This should no longer be the same object, since [] is not consistent, + # both objects will be recast to dtype('O') union = first.union([]) + assert union.equals(first) + + union = first.union(pd.Index([])) assert union is first union = Index([]).union(first) From ab329a916e586ee1c2466bd37e678f9c4b4eab34 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Sun, 11 Nov 2018 01:18:30 -0500 Subject: [PATCH 09/53] TST - update test_operator raised exception --- pandas/core/indexes/range.py | 1 - pandas/tests/indexes/datetimes/test_setops.py | 4 +++- pandas/tests/indexes/test_setops.py | 22 ++++++++----------- pandas/tests/series/test_operators.py | 7 +++--- 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index d2f51ad074518..cfb1b6ee9a580 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -437,7 +437,6 @@ def union(self, other): """ return super(RangeIndex, self).union(other) - def _union(self, other): if len(other) == 0 or self.equals(other) or len(self) == 0: return super(RangeIndex, self)._union(other) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index aa55a0a5f8668..68647f0929630 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -29,7 +29,9 @@ def test_union2(self): assert tm.equalContents(union, everything) # GH 10149 - expected = first.astype('O').union(pd.Index(second.values, dtype='O')).astype('O') + expected = first.astype('O').union( + pd.Index(second.values, dtype='O') + ).astype('O') cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: result = first.union(case) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index a72823b2c5194..d637c36d4f11d 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -8,8 +8,8 @@ import pandas as pd import pandas.util.testing as tm -from pandas.core.dtypes.dtypes import PeriodDtype, CategoricalDtype, \ - IntervalDtype +from pandas.core.dtypes.dtypes import ( + PeriodDtype, CategoricalDtype, IntervalDtype) def makeEmptyIndex(_=None): @@ -29,7 +29,7 @@ def makeEmptyIndex(_=None): catIndex=tm.makeCategoricalIndex, emptyIndex=makeEmptyIndex, intervalIndex=tm.makeIntervalIndex, -) +) COMPATIBLE_INCONSISTENT_PAIRS = { @@ -37,9 +37,7 @@ def makeEmptyIndex(_=None): } -@pytest.mark.parametrize('idxType', - INDEXES.keys() -) +@pytest.mark.parametrize('idxType', INDEXES.keys()) def test_union_same_types(idxType): idx1 = INDEXES[idxType](10) idx2 = INDEXES[idxType](20) @@ -48,17 +46,16 @@ def test_union_same_types(idxType): # Note: catIndex reflects only left dtype, should it reflect both? -@pytest.mark.parametrize('idxType1,idxType2', - list(it.combinations(INDEXES, 2)) -) +@pytest.mark.parametrize('idxType1,idxType2', + list(it.combinations(INDEXES, 2))) def test_union_different_types(idxType1, idxType2): if tuple(sorted([idxType1, idxType2])) in COMPATIBLE_INCONSISTENT_PAIRS: - return + return idx1 = INDEXES[idxType1](10) idx2 = INDEXES[idxType2](20) - # A union with a CategoricalIndex (even as dtype('O')) and a + # A union with a CategoricalIndex (even as dtype('O')) and a # non-CategoricalIndex can only be made if both indices are monotonic. # This is true before this PR as well. if idxType1 == 'catIndex' or idxType2 == 'catIndex': @@ -70,8 +67,7 @@ def test_union_different_types(idxType1, idxType2): @pytest.mark.parametrize('idxType1,idxType2', - COMPATIBLE_INCONSISTENT_PAIRS -) + COMPATIBLE_INCONSISTENT_PAIRS) def test_compatible_inconsistent_pairs(idxType1, idxType2): idx1 = INDEXES[idxType1](10) idx2 = INDEXES[idxType2](20) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 35bd99ff2eda8..6472e13a8cd8f 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -15,6 +15,7 @@ from pandas import ( Categorical, DataFrame, Index, NaT, Series, bdate_range, date_range, isna) from pandas.core import ops +from pandas.core.indexes.base import InvalidIndexError import pandas.core.nanops as nanops import pandas.util.testing as tm from pandas.util.testing import ( @@ -197,9 +198,9 @@ def test_scalar_na_logical_ops_corners(self): raises=AssertionError, strict=True)), pytest.param(ops.ror_, - marks=pytest.mark.xfail(reason="GH#22092 Index " - "implementation raises", - raises=ValueError, strict=True)), + marks=pytest.mark.xfail(reason="Index.get_indexer " + "with non unique index", + raises=InvalidIndexError, strict=True)), pytest.param(ops.rxor, marks=pytest.mark.xfail(reason="GH#22092 Index " "implementation raises", From 93486ada8364131813be3469de1e32d121c884e9 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Sun, 11 Nov 2018 12:23:22 -0500 Subject: [PATCH 10/53] CLN - pep8 line adherence --- pandas/tests/series/test_operators.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 6472e13a8cd8f..aafda1fe94708 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -200,7 +200,8 @@ def test_scalar_na_logical_ops_corners(self): pytest.param(ops.ror_, marks=pytest.mark.xfail(reason="Index.get_indexer " "with non unique index", - raises=InvalidIndexError, strict=True)), + raises=InvalidIndexError, + strict=True)), pytest.param(ops.rxor, marks=pytest.mark.xfail(reason="GH#22092 Index " "implementation raises", From e435e4c02a7aeca89d83e842e6e27db9a8ba4dd1 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Mon, 12 Nov 2018 22:09:35 -0500 Subject: [PATCH 11/53] ENH - reverse polarity of compatibility check and add docstrings --- pandas/core/indexes/base.py | 41 ++++++++++++++++---------------- pandas/core/indexes/category.py | 9 ++----- pandas/core/indexes/datetimes.py | 16 +++++-------- pandas/core/indexes/numeric.py | 18 +++++++------- pandas/core/indexes/range.py | 12 +++++----- 5 files changed, 43 insertions(+), 53 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8080157f7ddd5..99dcec6d2837f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2756,15 +2756,24 @@ def _get_reconciled_name_object(self, other): return self._shallow_copy(name=name) return self - def _union_inconsistent_dtypes(self, other): + def _union_incompatible_dtypes(self, other): + """ + Casts this and other index to object dtype to allow the formation + of a union between incompatible types. + """ this = self.astype('O') # call Index for when `other` is list-like other = Index(other).astype('O') return Index.union(this, other).astype('O') - def _is_inconsistent(self, other): - return (type(self) is not type(other) - or not is_dtype_equal(self.dtype, other.dtype)) + def _is_compatible_with_other(self, other): + """ + Check whether this and the other dtype are compatible with each other. + Meaning a union can be formed between them without needing to be cast + to dtype object. + """ + return (type(self) is type(other) + and is_dtype_equal(self.dtype, other.dtype)) def union(self, other): """ @@ -2789,15 +2798,20 @@ def union(self, other): """ self._assert_can_do_setop(other) - if self._is_inconsistent(other): - return self._union_inconsistent_dtypes(other) + if not self._is_compatible_with_other(other): + return self._union_incompatible_dtypes(other) + # This line needs to be after _union_incompatible_dtypes to ensure + # the original type of other is not lost after being cast to Index other = ensure_index(other) return self._union(other) def _union(self, other): + """ + Specific union logic should go here. In subclasses union behavior + should be overwritten here rather than in `self.union` + """ - # if is_dtype_equal(self.dtype, other.dtype): if len(self) == 0: return other._get_reconciled_name_object(self) elif len(other) == 0: @@ -2806,19 +2820,6 @@ def _union(self, other): if self.equals(other): return self._get_reconciled_name_object(other) - - # TODO: is_dtype_union_equal is a hack around - # 1. buggy set ops with duplicates (GH #13432) - # 2. CategoricalIndex lacking setops (GH #10186) - # Once those are fixed, this workaround can be removed - - # remove? this might be too lenient - # if not is_dtype_union_equal(self.dtype, other.dtype): - # this = self.astype('O') - # other = other.astype('O') - # # force object dtype, for empty index cases - # return this.union(other).astype('O') - # TODO(EA): setops-refactor, clean all this up if is_period_dtype(self) or is_datetime64tz_dtype(self): lvals = self._ndarray_values diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 1a5b7a49da96e..edc76e85748e0 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -872,13 +872,8 @@ def _delegate_method(self, name, *args, **kwargs): return res return CategoricalIndex(res, name=self.name) - def _is_inconsistent(self, other): - if type(self) is not type(other): - return True - elif type(self.dtype) is not type(other.dtype): - return True - else: - return False + def _is_compatible_with_other(self, other): + return type(self) is type(other) and type(self.dtype) is type(self.dtype) CategoricalIndex._add_numeric_methods_add_sub_disabled() diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 3d0f3ec24a242..ffc26ed2c91db 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -578,16 +578,12 @@ def unique(self, level=None): result = super(DatetimeIndex, naive).unique(level=level) return self._shallow_copy(result.values) - def _is_inconsistent(self, other): - is_inconsistent = super(DatetimeIndex, self)._is_inconsistent(other) - if is_inconsistent: - if hasattr(other, 'dtype'): - # If same base, consider consistent, let UTC logic takeover - return self.dtype.base != other.dtype.base - else: - return True - else: - return is_inconsistent + def _is_compatible_with_other(self, other): + is_compat = super(DatetimeIndex, self)._is_compatible_with_other(other) + if not is_compat: + is_compat = (hasattr(other, 'dtype') + and self.dtype.base == other.dtype.base) + return is_compat def union(self, other): """ diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index da5eccecf6513..d50a965d501a6 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -228,16 +228,14 @@ def _assert_safe_casting(cls, data, subarr): raise TypeError('Unsafe NumPy casting, you must ' 'explicitly cast') - def _is_inconsistent(self, other): - from pandas.core.indexes.range import RangeIndex - is_inconsistent = super(Int64Index, self)._is_inconsistent(other) - if is_inconsistent: - if type(self) is Int64Index and isinstance(other, RangeIndex): - return False - else: - return True - else: - return is_inconsistent + def _is_compatible_with_other(self, other): + from pandas.core.dtypes.generic import ABCRangeIndex + is_compat = super(Int64Index, self)._is_compatible_with_other(other) + if not is_compat: + is_compat = (type(self) is Int64Index + and isinstance(other, ABCRangeIndex)) + return is_compat + Int64Index._add_numeric_methods() diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index cfb1b6ee9a580..b0fe5174a00f8 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -416,12 +416,12 @@ def _extended_gcd(self, a, b): old_t, t = t, old_t - quotient * t return old_r, old_s, old_t - def _is_inconsistent(self, other): - is_inconsistent = super(RangeIndex, self)._is_inconsistent(other) - if is_inconsistent: - return not type(other) is Int64Index - else: - return is_inconsistent + + def _is_compatible_with_other(self, other): + is_compat = super(RangeIndex, self)._is_compatible_with_other(other) + if not is_compat: + is_compat = type(other) is Int64Index + return is_compat def union(self, other): """ From 2241b65f871f468c714783048cf8973e84b7b7c9 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Mon, 12 Nov 2018 23:30:57 -0500 Subject: [PATCH 12/53] TST - add test fixture for index factories and use in test_setops --- pandas/tests/indexes/conftest.py | 26 ++++++++++++ pandas/tests/indexes/test_setops.py | 63 ++++++++++++++++++++--------- 2 files changed, 70 insertions(+), 19 deletions(-) diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py index e82cce873e75c..0feeb3655466d 100644 --- a/pandas/tests/indexes/conftest.py +++ b/pandas/tests/indexes/conftest.py @@ -8,6 +8,7 @@ import pandas.util.testing as tm +# add inteval index? @pytest.fixture(params=[tm.makeUnicodeIndex(100), tm.makeStringIndex(100), tm.makeDateIndex(100), @@ -28,6 +29,31 @@ def indices(request): return request.param +def _make_repeating_index(x=10): + # x should be > 1 + return Index(sorted([i for i in range(x//2 + 1)] * 2)[:x]) + + +@pytest.fixture(params=[tm.makeUnicodeIndex, + tm.makeStringIndex, + tm.makeDateIndex, + tm.makePeriodIndex, + tm.makeTimedeltaIndex, + tm.makeIntIndex, + tm.makeUIntIndex, + tm.makeRangeIndex, + tm.makeFloatIndex, + lambda x=10: Index(np.random.choice([True, False], x)), + tm.makeCategoricalIndex, + lambda x=None: Index([]), + tm.makeMultiIndex, + _make_repeating_index, + tm.makeIntervalIndex], + ids=lambda x: type(x).__name__) +def index_factory(request): + return request.param + + @pytest.fixture(params=[1, np.array(1, dtype=np.int64)]) def one(request): # zero-dim integer array behaves like an integer diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index d637c36d4f11d..d1bfd3e289388 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -8,9 +8,13 @@ import pandas as pd import pandas.util.testing as tm +from pandas import Int64Index, RangeIndex +from pandas.core.dtypes.common import is_dtype_equal from pandas.core.dtypes.dtypes import ( PeriodDtype, CategoricalDtype, IntervalDtype) +from pandas.tests.indexes.conftest import index_factory + def makeEmptyIndex(_=None): return pd.Index([]) @@ -33,44 +37,65 @@ def makeEmptyIndex(_=None): COMPATIBLE_INCONSISTENT_PAIRS = { - ('intIndex', 'rangeIndex') + (Int64Index, RangeIndex): (tm.makeIntIndex, tm.makeRangeIndex) } -@pytest.mark.parametrize('idxType', INDEXES.keys()) -def test_union_same_types(idxType): - idx1 = INDEXES[idxType](10) - idx2 = INDEXES[idxType](20) +# @pytest.mark.parametrize('idxType', INDEXES.keys()) +# def test_union_same_types(idxType): +# idx1 = INDEXES[idxType](10) +# idx2 = INDEXES[idxType](20) +# assert idx1.union(idx2).dtype == idx1.dtype + +# # Note: catIndex reflects only left dtype, should it reflect both? + + +def test_union_same_types(index_factory): + # Union with a non-unique, non-monotonic index raises error + # Only needed for bool index factory + idx1 = index_factory(10).sort_values() + idx2 = index_factory(20).sort_values() assert idx1.union(idx2).dtype == idx1.dtype # Note: catIndex reflects only left dtype, should it reflect both? -@pytest.mark.parametrize('idxType1,idxType2', - list(it.combinations(INDEXES, 2))) -def test_union_different_types(idxType1, idxType2): - if tuple(sorted([idxType1, idxType2])) in COMPATIBLE_INCONSISTENT_PAIRS: +@pytest.mark.parametrize( + 'idxfactory1,idxfactory2', + list(it.combinations(index_factory._pytestfixturefunction.params, 2)) +) +def test_union_different_types(idxfactory1, idxfactory2): + idx1 = idxfactory1(10) + idx2 = idxfactory2(20) + + if (tuple(sorted([type(idx1), type(idx2)], key=lambda x: str(x))) + in COMPATIBLE_INCONSISTENT_PAIRS): + return + + if any(isinstance(idx, pd.MultiIndex) for idx in [idx1, idx2]): return - idx1 = INDEXES[idxType1](10) - idx2 = INDEXES[idxType2](20) + if is_dtype_equal(idx1.dtype, idx2.dtype): + return # A union with a CategoricalIndex (even as dtype('O')) and a # non-CategoricalIndex can only be made if both indices are monotonic. # This is true before this PR as well. - if idxType1 == 'catIndex' or idxType2 == 'catIndex': - idx1 = idx1.sort_values() - idx2 = idx2.sort_values() + # + # Union with a non-unique, non-monotonic index raises error + # This applies to the boolean index + idx1 = idx1.sort_values() + idx2 = idx2.sort_values() assert idx1.union(idx2).dtype == np.dtype('O') assert idx2.union(idx1).dtype == np.dtype('O') -@pytest.mark.parametrize('idxType1,idxType2', - COMPATIBLE_INCONSISTENT_PAIRS) -def test_compatible_inconsistent_pairs(idxType1, idxType2): - idx1 = INDEXES[idxType1](10) - idx2 = INDEXES[idxType2](20) +@pytest.mark.parametrize('idx_fact1,idx_fact2', + COMPATIBLE_INCONSISTENT_PAIRS.values()) +def test_compatible_inconsistent_pairs(idx_fact1, idx_fact2): + idx1 = idx_fact1(10) + idx2 = idx_fact2(20) res1 = idx1.union(idx2) res2 = idx2.union(idx1) From 4daf3602c25fa100b61296c68684076ee7081669 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Tue, 13 Nov 2018 20:16:17 -0500 Subject: [PATCH 13/53] ENH - cast difference result to original dtype to match other index behavior --- pandas/core/indexes/interval.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index f9f67647a32c5..c8c06093aec7c 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1040,14 +1040,13 @@ def _setop(op_name): def func(self, other): try: other = self._as_like_interval_index(other) - # allow ValueError from this method to raise to catch mixed closed - # except only Non-Interval index mismatches. + # dont catch ValueError so that mixed closed interval indexes raise + # only catch Non-Interval index mismatches. except TypeError: - # Currently this will cause difference operations to return - # object dtype as opposed to IntervalIndex, unlike other Index - # objects that return the same type when using `difference` on - # mismatched types - return getattr(self.astype('O'), op_name)(other) + result = getattr(self.astype('O'), op_name)(other) + if op_name in ('difference'): + result = result.astype(self.dtype) + return result # GH 19016: ensure set op will not return a prohibited dtype subtypes = [self.dtype.subtype, other.dtype.subtype] From 6e5a52b3cd91b5e12a2cb69c32707e89d08dac00 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Tue, 13 Nov 2018 20:17:34 -0500 Subject: [PATCH 14/53] TST - update interval setop test to account for difference now returning original dtype --- pandas/tests/indexes/interval/test_interval.py | 9 +++++---- pandas/tests/indexes/period/test_setops.py | 4 ---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 206c788e39a5c..54a4396791694 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -839,13 +839,14 @@ def test_set_incompatible_types(self, closed, op_name): index = self.create_index(closed=closed) set_op = getattr(index, op_name) - # non-IntervalIndexf - expected = getattr(index.astype('O'), op_name)(Index([1, 2, 3])) + # non-IntervalIndex + if op_name == 'difference': + expected = index + else: + expected = getattr(index.astype('O'), op_name)(Index([1, 2, 3])) result = set_op(Index([1, 2, 3])) tm.assert_index_equal(result, expected) - # Come back to mixed interval types - # mixed closed msg = ('can only do set operations between two IntervalIndex objects ' 'that are closed on the same side') diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index db1c306476f65..cf6d62a9597fb 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -105,10 +105,6 @@ def test_union_misc(self): with pytest.raises(period.IncompatibleFrequency): index.union(index2) - # msg = 'can only call with other PeriodIndex-ed objects' - # with tm.assert_raises_regex(ValueError, msg): - # index.join(index.to_timestamp()) - index3 = period_range('1/1/2000', '1/20/2000', freq='2D') with pytest.raises(period.IncompatibleFrequency): index.join(index3) From d344e11a3d19d7d6499aab64de2535c431c81c99 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Wed, 14 Nov 2018 18:49:55 -0500 Subject: [PATCH 15/53] CLN - remove unnecceary code from test --- pandas/tests/indexes/test_setops.py | 33 +++-------------------------- 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index d1bfd3e289388..d6a0e0b00efdd 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -16,40 +16,11 @@ from pandas.tests.indexes.conftest import index_factory -def makeEmptyIndex(_=None): - return pd.Index([]) - - -INDEXES = dict( - unicodeIndex=tm.makeUnicodeIndex, - strIndex=tm.makeStringIndex, - dateIndex=tm.makeDateIndex, - periodIndex=tm.makePeriodIndex, - tdIndex=tm.makeTimedeltaIndex, - intIndex=tm.makeIntIndex, - uintIndex=tm.makeUIntIndex, - rangeIndex=tm.makeRangeIndex, - floatIndex=tm.makeFloatIndex, - catIndex=tm.makeCategoricalIndex, - emptyIndex=makeEmptyIndex, - intervalIndex=tm.makeIntervalIndex, -) - - COMPATIBLE_INCONSISTENT_PAIRS = { (Int64Index, RangeIndex): (tm.makeIntIndex, tm.makeRangeIndex) } -# @pytest.mark.parametrize('idxType', INDEXES.keys()) -# def test_union_same_types(idxType): -# idx1 = INDEXES[idxType](10) -# idx2 = INDEXES[idxType](20) -# assert idx1.union(idx2).dtype == idx1.dtype - -# # Note: catIndex reflects only left dtype, should it reflect both? - - def test_union_same_types(index_factory): # Union with a non-unique, non-monotonic index raises error # Only needed for bool index factory @@ -65,6 +36,7 @@ def test_union_same_types(index_factory): list(it.combinations(index_factory._pytestfixturefunction.params, 2)) ) def test_union_different_types(idxfactory1, idxfactory2): + # GH 23525 idx1 = idxfactory1(10) idx2 = idxfactory2(20) @@ -81,7 +53,7 @@ def test_union_different_types(idxfactory1, idxfactory2): # A union with a CategoricalIndex (even as dtype('O')) and a # non-CategoricalIndex can only be made if both indices are monotonic. # This is true before this PR as well. - # + # Union with a non-unique, non-monotonic index raises error # This applies to the boolean index idx1 = idx1.sort_values() @@ -94,6 +66,7 @@ def test_union_different_types(idxfactory1, idxfactory2): @pytest.mark.parametrize('idx_fact1,idx_fact2', COMPATIBLE_INCONSISTENT_PAIRS.values()) def test_compatible_inconsistent_pairs(idx_fact1, idx_fact2): + # GH 23525 idx1 = idx_fact1(10) idx2 = idx_fact2(20) From b339bd195af4dd738452110b9fb746ac78e4188b Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Wed, 14 Nov 2018 18:52:12 -0500 Subject: [PATCH 16/53] CLN - reorganize some code to make it more readable --- pandas/core/indexes/base.py | 4 +--- pandas/core/indexes/datetimes.py | 1 + pandas/core/indexes/numeric.py | 12 ++++-------- pandas/core/indexes/range.py | 7 ------- 4 files changed, 6 insertions(+), 18 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e4160d1700589..f25a522107db0 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2811,13 +2811,11 @@ def _union(self, other): Specific union logic should go here. In subclasses union behavior should be overwritten here rather than in `self.union` """ - if len(self) == 0: return other._get_reconciled_name_object(self) elif len(other) == 0: return self._get_reconciled_name_object(other) - - if self.equals(other): + elif self.equals(other): return self._get_reconciled_name_object(other) # TODO(EA): setops-refactor, clean all this up diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1c2ffefa7f9bc..562cce7e50b64 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -565,6 +565,7 @@ def unique(self, level=None): def _is_compatible_with_other(self, other): is_compat = super(DatetimeIndex, self)._is_compatible_with_other(other) if not is_compat: + # Allow mismatched timezones - UTC logic will take over is_compat = (hasattr(other, 'dtype') and self.dtype.base == other.dtype.base) return is_compat diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index ea022c3a2e1c4..23d1daf768f79 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -16,6 +16,7 @@ from pandas import compat from pandas.core import algorithms import pandas.core.common as com +from pandas.core.dtypes.generic import ABCRangeIndex from pandas.core.indexes.base import ( Index, InvalidIndexError, _index_shared_docs) from pandas.util._decorators import Appender, cache_readonly @@ -229,14 +230,9 @@ def _assert_safe_casting(cls, data, subarr): 'explicitly cast') def _is_compatible_with_other(self, other): - from pandas.core.dtypes.generic import ABCRangeIndex - is_compat = super(Int64Index, self)._is_compatible_with_other(other) - if not is_compat: - is_compat = (type(self) is Int64Index - and isinstance(other, ABCRangeIndex)) - return is_compat - - + return (super(Int64Index, self)._is_compatible_with_other(other) + and isinstance(self, (Int64Index, ABCRangeIndex)) + and isintance(type(other), (Int64Index, ABCRangeIndex))) Int64Index._add_numeric_methods() Int64Index._add_logical_methods() diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index fb7447456a49c..48b83b4355f7f 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -416,13 +416,6 @@ def _extended_gcd(self, a, b): old_t, t = t, old_t - quotient * t return old_r, old_s, old_t - - def _is_compatible_with_other(self, other): - is_compat = super(RangeIndex, self)._is_compatible_with_other(other) - if not is_compat: - is_compat = type(other) is Int64Index - return is_compat - def union(self, other): """ Form the union of two Index objects and sorts if possible From 85e2db7eeb701aae21c1aa13e71afbecfeadbbf2 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Wed, 28 Nov 2018 21:26:22 -0500 Subject: [PATCH 17/53] CLN - pep8 adherence --- pandas/core/indexes/base.py | 4 ++-- pandas/core/indexes/category.py | 3 ++- pandas/core/indexes/datetimes.py | 2 +- pandas/tests/indexes/conftest.py | 2 +- pandas/tests/indexes/test_setops.py | 4 ++-- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f25a522107db0..6ee3fdc24b0c7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2772,7 +2772,7 @@ def _is_compatible_with_other(self, other): Meaning a union can be formed between them without needing to be cast to dtype object. """ - return (type(self) is type(other) + return (type(self) is type(other) and is_dtype_equal(self.dtype, other.dtype)) def union(self, other): @@ -2801,7 +2801,7 @@ def union(self, other): if not self._is_compatible_with_other(other): return self._union_incompatible_dtypes(other) - # This line needs to be after _union_incompatible_dtypes to ensure + # This line needs to be after _union_incompatible_dtypes to ensure # the original type of other is not lost after being cast to Index other = ensure_index(other) return self._union(other) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index edc76e85748e0..20ae7d9c248b0 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -873,7 +873,8 @@ def _delegate_method(self, name, *args, **kwargs): return CategoricalIndex(res, name=self.name) def _is_compatible_with_other(self, other): - return type(self) is type(other) and type(self.dtype) is type(self.dtype) + return (type(self) is type(other) + and type(self.dtype) is type(self.dtype)) CategoricalIndex._add_numeric_methods_add_sub_disabled() diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 562cce7e50b64..e1cda79fd0c9e 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -566,7 +566,7 @@ def _is_compatible_with_other(self, other): is_compat = super(DatetimeIndex, self)._is_compatible_with_other(other) if not is_compat: # Allow mismatched timezones - UTC logic will take over - is_compat = (hasattr(other, 'dtype') + is_compat = (hasattr(other, 'dtype') and self.dtype.base == other.dtype.base) return is_compat diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py index 0feeb3655466d..bb02b112a7f5d 100644 --- a/pandas/tests/indexes/conftest.py +++ b/pandas/tests/indexes/conftest.py @@ -31,7 +31,7 @@ def indices(request): def _make_repeating_index(x=10): # x should be > 1 - return Index(sorted([i for i in range(x//2 + 1)] * 2)[:x]) + return Index(sorted([i for i in range(x // 2 + 1)] * 2)[:x]) @pytest.fixture(params=[tm.makeUnicodeIndex, diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index d6a0e0b00efdd..5e11d1513316d 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -40,8 +40,8 @@ def test_union_different_types(idxfactory1, idxfactory2): idx1 = idxfactory1(10) idx2 = idxfactory2(20) - if (tuple(sorted([type(idx1), type(idx2)], key=lambda x: str(x))) - in COMPATIBLE_INCONSISTENT_PAIRS): + pair = tuple(sorted([type(idx1), type(idx2)], key=lambda x: str(x))) + if pair in COMPATIBLE_INCONSISTENT_PAIRS: return if any(isinstance(idx, pd.MultiIndex) for idx in [idx1, idx2]): From cf349602029d0d376b4ff0f0b35f7442a1b132f4 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Wed, 28 Nov 2018 21:39:46 -0500 Subject: [PATCH 18/53] CLN - pep8 adherence --- pandas/core/indexes/category.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 20ae7d9c248b0..1cb851732f17c 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -873,7 +873,7 @@ def _delegate_method(self, name, *args, **kwargs): return CategoricalIndex(res, name=self.name) def _is_compatible_with_other(self, other): - return (type(self) is type(other) + return (type(self) is type(other) and type(self.dtype) is type(self.dtype)) From 7150c228f4fc78285e7ce19c688cbb35ac5371e5 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Wed, 28 Nov 2018 22:28:18 -0500 Subject: [PATCH 19/53] BUG - fix function name --- pandas/core/indexes/numeric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 23d1daf768f79..de00948ce508c 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -232,7 +232,7 @@ def _assert_safe_casting(cls, data, subarr): def _is_compatible_with_other(self, other): return (super(Int64Index, self)._is_compatible_with_other(other) and isinstance(self, (Int64Index, ABCRangeIndex)) - and isintance(type(other), (Int64Index, ABCRangeIndex))) + and isinstance(type(other), (Int64Index, ABCRangeIndex))) Int64Index._add_numeric_methods() Int64Index._add_logical_methods() From 5aa41f6adcdbf9b17c75326c1f1586c273b6a13c Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Sat, 1 Dec 2018 12:29:40 -0500 Subject: [PATCH 20/53] BUG - fix numeric index compatibility --- pandas/core/indexes/numeric.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index ed91dc0438020..e5a574cc18fc6 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -227,8 +227,8 @@ def _assert_safe_casting(cls, data, subarr): def _is_compatible_with_other(self, other): return (super(Int64Index, self)._is_compatible_with_other(other) - and isinstance(self, (Int64Index, ABCRangeIndex)) - and isinstance(type(other), (Int64Index, ABCRangeIndex))) + or (type(self) in (Int64Index, ABCRangeIndex) + and type(other) in (Int64Index, ABCRangeIndex))) Int64Index._add_numeric_methods() Int64Index._add_logical_methods() From 02d7a3baa4d2c049e6a73691eefecc5caa587ae4 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Sat, 1 Dec 2018 12:45:51 -0500 Subject: [PATCH 21/53] BUG - actually fix numeric compatibilty check, with passing index tests --- pandas/core/indexes/numeric.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index e5a574cc18fc6..da0bb8d0d438c 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -226,9 +226,14 @@ def _assert_safe_casting(cls, data, subarr): 'explicitly cast') def _is_compatible_with_other(self, other): - return (super(Int64Index, self)._is_compatible_with_other(other) - or (type(self) in (Int64Index, ABCRangeIndex) - and type(other) in (Int64Index, ABCRangeIndex))) + return ( + super(Int64Index, self)._is_compatible_with_other(other) + or ( + (type(self) is Int64Index or isinstance(self, ABCRangeIndex)) + and (type(other) is Int64Index or isinstance(other, + ABCRangeIndex)) + ) + ) Int64Index._add_numeric_methods() Int64Index._add_logical_methods() From 558e1824ece108b59e1faba0724f129c773c0ea2 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Sun, 2 Dec 2018 17:12:47 -0500 Subject: [PATCH 22/53] DOC - initial whatsnew --- doc/source/whatsnew/v0.24.0.rst | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index fc505128a2e20..a160dd8b6276a 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1065,6 +1065,36 @@ Datetimelike API Changes - :func:`cut` and :func:`qcut` now returns a :class:`DatetimeIndex` or :class:`TimedeltaIndex` bins when the input is datetime or timedelta dtype respectively and ``retbins=True`` (:issue:`19891`) - :meth:`DatetimeIndex.to_period` and :meth:`Timestamp.to_period` will issue a warning when timezone information will be lost (:issue:`21333`) +.. _whatsnew_0240.api.incompatible_index_unions + +Incompatible Index Type Unions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When performing an :func:`Index.union` operation, between objects of incompatible dtypes, the result will be a base :class:`Index` of dtype `object`. +This behavior holds true for unions between :class:`Index` objects that previously would have been prohibited. +The dtype of empty :class:`Index` objects will now be evaluated before performing the union as well, instead of simply returning the other :class:`Index` object. + + +Previous Behavior: + +.. code-block:: ipython + + In [1]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) + ValueError: can only call with other PeriodIndex-ed objects + + In [2]: pd.Index([]).astype(str).union(pd.Index([1, 2, 3])) + Out[11]: Int64Index([1, 2, 3], dtype='int64') + +Current Behavior: + +.. code-block:: ipython + + In [1]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) + Index([1991-09-05, 1991-09-06, 1, 2, 3], dtype='object') + + In [2]: pd.Index([]).astype(str).union(pd.Index([1, 2, 3])) + Out[11]: Index([1, 2, 3], dtype='object') + .. _whatsnew_0240.api.other: Other API Changes From 706f973e1d62ae6d71808a4d61e696442cd420f4 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Mon, 3 Dec 2018 22:16:06 -0500 Subject: [PATCH 23/53] ENH - no longer consider category indexes containing different categories explicitly compatible, same goes for datetimeindexes with different timezones --- pandas/core/indexes/category.py | 4 ---- pandas/core/indexes/datetimes.py | 10 +--------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index ac842153211eb..91c7648d5cf2e 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -880,10 +880,6 @@ def _delegate_method(self, name, *args, **kwargs): return res return CategoricalIndex(res, name=self.name) - def _is_compatible_with_other(self, other): - return (type(self) is type(other) - and type(self.dtype) is type(self.dtype)) - CategoricalIndex._add_numeric_methods_add_sub_disabled() CategoricalIndex._add_numeric_methods_disabled() diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 7a53c2e3b6457..f5af812370b90 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -470,14 +470,6 @@ def _formatter_func(self): # -------------------------------------------------------------------- # Set Operation Methods - def _is_compatible_with_other(self, other): - is_compat = super(DatetimeIndex, self)._is_compatible_with_other(other) - if not is_compat: - # Allow mismatched timezones - UTC logic will take over - is_compat = (hasattr(other, 'dtype') - and self.dtype.base == other.dtype.base) - return is_compat - def union(self, other): """ Specialized union for DatetimeIndex objects. If combine @@ -495,7 +487,7 @@ def union(self, other): return super(DatetimeIndex, self).union(other) def _union(self, other): - if len(other) == 0 or self.equals(other) or len(self) == 0: + if not len(other) or self.equals(other) or not len(self): return super(DatetimeIndex, self)._union(other) if not isinstance(other, DatetimeIndex): From 2ccab59b95f3529bf61fd5a52a14a5afacd10c71 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Mon, 3 Dec 2018 22:17:15 -0500 Subject: [PATCH 24/53] TST/CLN - no longer need new index_factory fixture and make code more pythonic --- pandas/core/indexes/base.py | 17 ++++-------- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/period.py | 6 ++--- pandas/core/indexes/range.py | 2 +- pandas/tests/indexes/conftest.py | 26 +------------------ pandas/tests/indexes/datetimes/test_setops.py | 3 ++- .../tests/indexes/datetimes/test_timezones.py | 5 +++- pandas/tests/indexes/test_setops.py | 17 +++++------- pandas/tests/series/test_operators.py | 22 ++++++++-------- 9 files changed, 35 insertions(+), 65 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a4c722fe5cf10..46987af54d08d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2251,10 +2251,10 @@ def _union_incompatible_dtypes(self, other): Casts this and other index to object dtype to allow the formation of a union between incompatible types. """ - this = self.astype('O') + this = self.astype(object) # call Index for when `other` is list-like - other = Index(other).astype('O') - return Index.union(this, other).astype('O') + other = Index(other).astype(object) + return Index.union(this, other).astype(object) def _is_compatible_with_other(self, other): """ @@ -2300,20 +2300,13 @@ def _union(self, other): Specific union logic should go here. In subclasses union behavior should be overwritten here rather than in `self.union` """ - if len(self) == 0: - return other._get_reconciled_name_object(self) - elif len(other) == 0: - return self._get_reconciled_name_object(other) - elif self.equals(other): - return self._get_reconciled_name_object(other) - self._assert_can_do_setop(other) other = ensure_index(other) - if len(other) == 0 or self.equals(other): + if not len(other) or self.equals(other): return self._get_reconciled_name_object(other) - if len(self) == 0: + if not len(self): return other._get_reconciled_name_object(self) # TODO(EA): setops-refactor, clean all this up diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 24cc1c9669273..7ca0379be64ee 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1098,7 +1098,7 @@ def func(self, other, sort=True): # dont catch ValueError so that mixed closed interval indexes raise # only catch Non-Interval index mismatches. except TypeError: - result = getattr(self.astype('O'), op_name)(other) + result = getattr(self.astype(object), op_name)(other) if op_name in ('difference'): result = result.astype(self.dtype) return result diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 65e510bfa0d87..4c09bfdc9e301 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -830,9 +830,9 @@ def join(self, other, how='left', level=None, return_indexers=False, self._assert_can_do_setop(other) if not isinstance(other, PeriodIndex): - return self.astype('O').join(other, how=how, level=level, - return_indexers=return_indexers, - sort=sort) + return self.astype(object).join(other, how=how, level=level, + return_indexers=return_indexers, + sort=sort) result = Int64Index.join(self, other, how=how, level=level, return_indexers=return_indexers, diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index ad3252c2ac20b..d1b7403d0fada 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -440,7 +440,7 @@ def union(self, other): return super(RangeIndex, self).union(other) def _union(self, other): - if len(other) == 0 or self.equals(other) or len(self) == 0: + if not len(other) or self.equals(other) or not len(self): return super(RangeIndex, self)._union(other) if isinstance(other, RangeIndex): diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py index bb02b112a7f5d..42280153b971a 100644 --- a/pandas/tests/indexes/conftest.py +++ b/pandas/tests/indexes/conftest.py @@ -20,6 +20,7 @@ tm.makeFloatIndex(100), Index([True, False]), tm.makeCategoricalIndex(100), + tm.makeIntervalIndex(100), Index([]), MultiIndex.from_tuples(lzip( ['foo', 'bar', 'baz'], [1, 2, 3])), @@ -29,31 +30,6 @@ def indices(request): return request.param -def _make_repeating_index(x=10): - # x should be > 1 - return Index(sorted([i for i in range(x // 2 + 1)] * 2)[:x]) - - -@pytest.fixture(params=[tm.makeUnicodeIndex, - tm.makeStringIndex, - tm.makeDateIndex, - tm.makePeriodIndex, - tm.makeTimedeltaIndex, - tm.makeIntIndex, - tm.makeUIntIndex, - tm.makeRangeIndex, - tm.makeFloatIndex, - lambda x=10: Index(np.random.choice([True, False], x)), - tm.makeCategoricalIndex, - lambda x=None: Index([]), - tm.makeMultiIndex, - _make_repeating_index, - tm.makeIntervalIndex], - ids=lambda x: type(x).__name__) -def index_factory(request): - return request.param - - @pytest.fixture(params=[1, np.array(1, dtype=np.int64)]) def one(request): # zero-dim integer array behaves like an integer diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 17b1d204013f2..7eefe0cbd3e78 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -267,7 +267,8 @@ def test_datetimeindex_union_join_empty(self): empty = Index([]) result = dti.union(empty) - tm.assert_index_equal(result, dti.astype('O')) + expected = dti.astype('O') + tm.assert_index_equal(result, expected) result = dti.join(empty) assert isinstance(result, DatetimeIndex) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 8c7d20684fd8c..a94895fe336f9 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -1037,7 +1037,10 @@ def test_dti_union_aware(self): tz="US/Eastern") result = rng.union(rng2) - assert result.tz.zone == 'UTC' + expected = rng.astype('O').union(rng2.astype('O')) + tm.assert_index_equal(result, expected) + assert result[0].tz.zone == 'US/Central' + assert result[-1].tz.zone == 'US/Eastern' @pytest.mark.parametrize('tz', [None, 'UTC', "US/Central", dateutil.tz.tzoffset(None, -28800)]) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 5e11d1513316d..1f6fde22d6023 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -13,7 +13,7 @@ from pandas.core.dtypes.dtypes import ( PeriodDtype, CategoricalDtype, IntervalDtype) -from pandas.tests.indexes.conftest import index_factory +from pandas.tests.indexes.conftest import indices COMPATIBLE_INCONSISTENT_PAIRS = { @@ -21,25 +21,22 @@ } -def test_union_same_types(index_factory): +def test_union_same_types(indices): # Union with a non-unique, non-monotonic index raises error # Only needed for bool index factory - idx1 = index_factory(10).sort_values() - idx2 = index_factory(20).sort_values() + idx1 = indices.sort_values() + idx2 = indices.sort_values() assert idx1.union(idx2).dtype == idx1.dtype # Note: catIndex reflects only left dtype, should it reflect both? @pytest.mark.parametrize( - 'idxfactory1,idxfactory2', - list(it.combinations(index_factory._pytestfixturefunction.params, 2)) + 'idx1,idx2', + list(it.combinations(indices._pytestfixturefunction.params, 2)) ) -def test_union_different_types(idxfactory1, idxfactory2): +def test_union_different_types(idx1, idx2): # GH 23525 - idx1 = idxfactory1(10) - idx2 = idxfactory2(20) - pair = tuple(sorted([type(idx1), type(idx2)], key=lambda x: str(x))) if pair in COMPATIBLE_INCONSISTENT_PAIRS: return diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 3e791db32649f..1ea3c11adf10a 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -190,17 +190,17 @@ def test_scalar_na_logical_ops_corners(self): operator.and_, operator.or_, operator.xor, - # pytest.param(ops.rand_, - # marks=pytest.mark.xfail(reason="GH#22092 Index " - # "implementation returns " - # "Index", - # raises=AssertionError, - # strict=True)), - # pytest.param(ops.ror_, - # marks=pytest.mark.xfail(reason="Index.get_indexer " - # "with non unique index", - # raises=InvalidIndexError, - # strict=True)), + pytest.param(ops.rand_, + marks=pytest.mark.xfail(reason="GH#22092 Index " + "implementation returns " + "Index", + raises=AssertionError, + strict=True)), + pytest.param(ops.ror_, + marks=pytest.mark.xfail(reason="Index.get_indexer " + "with non unique index", + raises=InvalidIndexError, + strict=True)), # pytest.param(ops.rxor, # marks=pytest.mark.xfail(reason="GH#22092 Index " # "implementation raises", From c70f1c051769f496df479952a94544c8b0896cdc Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Tue, 4 Dec 2018 22:38:36 -0500 Subject: [PATCH 25/53] CLN - make code more readable --- pandas/core/indexes/base.py | 14 +++++++++++--- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/numeric.py | 8 ++------ pandas/core/indexes/range.py | 14 -------------- 4 files changed, 14 insertions(+), 24 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 46987af54d08d..4c13380a723ae 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2252,9 +2252,9 @@ def _union_incompatible_dtypes(self, other): of a union between incompatible types. """ this = self.astype(object) - # call Index for when `other` is list-like - other = Index(other).astype(object) - return Index.union(this, other).astype(object) + # cast to Index for when `other` is list-like + other = Index(other, dtype=object, copy=False) + return Index._union(this, other).astype(object) def _is_compatible_with_other(self, other): """ @@ -2269,6 +2269,9 @@ def union(self, other): """ Form the union of two Index objects and sorts if possible. + If the Index objects are incompatible, both Index objects will be + cast to dtype('O') first. + Parameters ---------- other : Index or array-like @@ -2284,6 +2287,11 @@ def union(self, other): >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.union(idx2) Int64Index([1, 2, 3, 4, 5, 6], dtype='int64') + + >>> idx1 = pd.Index(['a', 'b', 'c', 'd']) + >>> idx2 = pd.Index([1, 2, 3, 4]) + >>> idx1.union(idx2) + Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object') """ self._assert_can_do_setop(other) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 7ca0379be64ee..45281c8b938dc 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1099,7 +1099,7 @@ def func(self, other, sort=True): # only catch Non-Interval index mismatches. except TypeError: result = getattr(self.astype(object), op_name)(other) - if op_name in ('difference'): + if op_name in ('difference',): result = result.astype(self.dtype) return result diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index da0bb8d0d438c..016645766a881 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -14,7 +14,7 @@ from pandas.core import algorithms import pandas.core.common as com -from pandas.core.dtypes.generic import ABCRangeIndex +from pandas.core.dtypes.generic import ABCRangeIndex, ABCInt64Index import pandas.core.indexes.base as ibase from pandas.core.indexes.base import ( Index, InvalidIndexError, _index_shared_docs) @@ -228,11 +228,7 @@ def _assert_safe_casting(cls, data, subarr): def _is_compatible_with_other(self, other): return ( super(Int64Index, self)._is_compatible_with_other(other) - or ( - (type(self) is Int64Index or isinstance(self, ABCRangeIndex)) - and (type(other) is Int64Index or isinstance(other, - ABCRangeIndex)) - ) + or all([isinstance(type(obj), (ABCInt64Index, ABCRangeIndex)) for obj in [self, other]]) ) Int64Index._add_numeric_methods() diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index d1b7403d0fada..404e98e7739cf 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -425,20 +425,6 @@ def _extended_gcd(self, a, b): old_t, t = t, old_t - quotient * t return old_r, old_s, old_t - def union(self, other): - """ - Form the union of two Index objects and sorts if possible - - Parameters - ---------- - other : Index or array-like - - Returns - ------- - union : Index - """ - return super(RangeIndex, self).union(other) - def _union(self, other): if not len(other) or self.equals(other) or not len(self): return super(RangeIndex, self)._union(other) From edb7e9c737ee964b86eb83f5ffd8327580a2a6b8 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Tue, 4 Dec 2018 22:39:25 -0500 Subject: [PATCH 26/53] CLN - pep8 adherence --- pandas/core/indexes/numeric.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 016645766a881..8e10103cdf400 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -228,7 +228,8 @@ def _assert_safe_casting(cls, data, subarr): def _is_compatible_with_other(self, other): return ( super(Int64Index, self)._is_compatible_with_other(other) - or all([isinstance(type(obj), (ABCInt64Index, ABCRangeIndex)) for obj in [self, other]]) + or all([isinstance(type(obj), (ABCInt64Index, ABCRangeIndex)) + for obj in [self, other]]) ) Int64Index._add_numeric_methods() From aba75fe31045f97027b733393383f1f9da3451b4 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Tue, 4 Dec 2018 22:51:48 -0500 Subject: [PATCH 27/53] DOC - fix whatsnew entry --- doc/source/whatsnew/v0.24.0.rst | 62 +++++++++++++++++---------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 71dd5f7fc22a4..c885136aaf51d 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -731,6 +731,38 @@ Bug in :func:`DataFrame.to_dict` raises ``ValueError`` when used with df.to_dict(orient='index') +.. _whatsnew_0240.api_breaking.incompatible_index_unions + +Incompatible Index Type Unions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When performing an :func:`Index.union` operation, between objects of incompatible dtypes, the result will be a base :class:`Index` of dtype `object`. +This behavior holds true for unions between :class:`Index` objects that previously would have been prohibited. +The dtype of empty :class:`Index` objects will now be evaluated before performing the union as well, instead of simply returning the other :class:`Index` object. + + +Previous Behavior: + +.. code-block:: ipython + + + In [1]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) + ... + ValueError: can only call with other PeriodIndex-ed objects + + In [2]: pd.Index([]).astype(str).union(pd.Index([1, 2, 3])) + Out[2]: Int64Index([1, 2, 3], dtype='int64') + +Current Behavior: + +.. code-block:: ipython + + In [1]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) + Out[1]: Index([1991-09-05, 1991-09-06, 1, 2, 3], dtype='object') + + In [2]: pd.Index([]).astype(str).union(pd.Index([1, 2, 3])) + Out[2]: Index([1, 2, 3], dtype='object') + .. _whatsnew_0240.api.datetimelike.normalize: Tick DateOffset Normalize Restrictions @@ -1068,36 +1100,6 @@ Datetimelike API Changes - :func:`cut` and :func:`qcut` now returns a :class:`DatetimeIndex` or :class:`TimedeltaIndex` bins when the input is datetime or timedelta dtype respectively and ``retbins=True`` (:issue:`19891`) - :meth:`DatetimeIndex.to_period` and :meth:`Timestamp.to_period` will issue a warning when timezone information will be lost (:issue:`21333`) -.. _whatsnew_0240.api.incompatible_index_unions - -Incompatible Index Type Unions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When performing an :func:`Index.union` operation, between objects of incompatible dtypes, the result will be a base :class:`Index` of dtype `object`. -This behavior holds true for unions between :class:`Index` objects that previously would have been prohibited. -The dtype of empty :class:`Index` objects will now be evaluated before performing the union as well, instead of simply returning the other :class:`Index` object. - - -Previous Behavior: - -.. code-block:: ipython - - In [1]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) - ValueError: can only call with other PeriodIndex-ed objects - - In [2]: pd.Index([]).astype(str).union(pd.Index([1, 2, 3])) - Out[11]: Int64Index([1, 2, 3], dtype='int64') - -Current Behavior: - -.. code-block:: ipython - - In [1]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) - Index([1991-09-05, 1991-09-06, 1, 2, 3], dtype='object') - - In [2]: pd.Index([]).astype(str).union(pd.Index([1, 2, 3])) - Out[11]: Index([1, 2, 3], dtype='object') - .. _whatsnew_0240.api.other: Other API Changes From fc9f138003505c67bce19dccfcf576fdb3eeed29 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Tue, 4 Dec 2018 23:42:38 -0500 Subject: [PATCH 28/53] BUG - chagne object dtype index construction --- pandas/core/indexes/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5c04839ac8b9a..861ee1e60bbc3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2253,8 +2253,8 @@ def _union_incompatible_dtypes(self, other): """ this = self.astype(object) # cast to Index for when `other` is list-like - other = Index(other, dtype=object, copy=False) - return Index._union(this, other).astype(object) + other = Index(other).astype(object) + return Index.union(this, other).astype(object) def _is_compatible_with_other(self, other): """ From fdfc7d7b998148990ea29abc638ef15af631a085 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Wed, 5 Dec 2018 22:23:37 -0500 Subject: [PATCH 29/53] CLN/BUG - clean according to failed pandas-dev style checks --- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/numeric.py | 5 +++-- pandas/tests/indexes/test_setops.py | 2 -- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index caa700b5f7569..fedf38ea36f7c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -19,7 +19,7 @@ ensure_categorical, ensure_int64, ensure_object, ensure_platform_int, is_bool, is_bool_dtype, is_categorical, is_categorical_dtype, is_datetime64_any_dtype, is_datetime64tz_dtype, is_dtype_equal, - is_dtype_union_equal, is_extension_array_dtype, is_float, is_float_dtype, + is_extension_array_dtype, is_float, is_float_dtype, is_hashable, is_integer, is_integer_dtype, is_interval_dtype, is_iterator, is_list_like, is_object_dtype, is_period_dtype, is_scalar, is_signed_integer_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 8e10103cdf400..e152ee197aa6a 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -228,10 +228,11 @@ def _assert_safe_casting(cls, data, subarr): def _is_compatible_with_other(self, other): return ( super(Int64Index, self)._is_compatible_with_other(other) - or all([isinstance(type(obj), (ABCInt64Index, ABCRangeIndex)) - for obj in [self, other]]) + or all(isinstance(type(obj), (ABCInt64Index, ABCRangeIndex)) + for obj in [self, other]) ) + Int64Index._add_numeric_methods() Int64Index._add_logical_methods() diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 1f6fde22d6023..27e9975a122d1 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -10,8 +10,6 @@ import pandas.util.testing as tm from pandas import Int64Index, RangeIndex from pandas.core.dtypes.common import is_dtype_equal -from pandas.core.dtypes.dtypes import ( - PeriodDtype, CategoricalDtype, IntervalDtype) from pandas.tests.indexes.conftest import indices From 42ca70e0eb6ff9096d230ddffffd8485dbbc29b6 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Thu, 6 Dec 2018 21:21:37 -0500 Subject: [PATCH 30/53] CLN - fix imports with isort --- pandas/core/indexes/base.py | 8 ++++---- pandas/core/indexes/numeric.py | 2 +- pandas/tests/indexes/test_setops.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index fedf38ea36f7c..b4996e9ca9819 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -19,10 +19,10 @@ ensure_categorical, ensure_int64, ensure_object, ensure_platform_int, is_bool, is_bool_dtype, is_categorical, is_categorical_dtype, is_datetime64_any_dtype, is_datetime64tz_dtype, is_dtype_equal, - is_extension_array_dtype, is_float, is_float_dtype, - is_hashable, is_integer, is_integer_dtype, is_interval_dtype, is_iterator, - is_list_like, is_object_dtype, is_period_dtype, is_scalar, - is_signed_integer_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype) + is_extension_array_dtype, is_float, is_float_dtype, is_hashable, + is_integer, is_integer_dtype, is_interval_dtype, is_iterator, is_list_like, + is_object_dtype, is_period_dtype, is_scalar, is_signed_integer_dtype, + is_timedelta64_dtype, is_unsigned_integer_dtype) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ( ABCDataFrame, ABCDateOffset, ABCDatetimeIndex, ABCIndexClass, diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index e152ee197aa6a..3b371f5d6c847 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -10,11 +10,11 @@ is_bool, is_bool_dtype, is_dtype_equal, is_float, is_integer_dtype, is_scalar, needs_i8_conversion, pandas_dtype) import pandas.core.dtypes.concat as _concat +from pandas.core.dtypes.generic import ABCInt64Index, ABCRangeIndex from pandas.core.dtypes.missing import isna from pandas.core import algorithms import pandas.core.common as com -from pandas.core.dtypes.generic import ABCRangeIndex, ABCInt64Index import pandas.core.indexes.base as ibase from pandas.core.indexes.base import ( Index, InvalidIndexError, _index_shared_docs) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 27e9975a122d1..a25142878b6a5 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -3,16 +3,16 @@ set operations. ''' import itertools as it + import numpy as np import pytest -import pandas as pd -import pandas.util.testing as tm -from pandas import Int64Index, RangeIndex from pandas.core.dtypes.common import is_dtype_equal +import pandas as pd +from pandas import Int64Index, RangeIndex from pandas.tests.indexes.conftest import indices - +import pandas.util.testing as tm COMPATIBLE_INCONSISTENT_PAIRS = { (Int64Index, RangeIndex): (tm.makeIntIndex, tm.makeRangeIndex) From 5b2564595815c46c9c8a4c6016d30300eec665b6 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Fri, 7 Dec 2018 23:09:41 -0500 Subject: [PATCH 31/53] CLN - refactor tests and remove overriden public union methods --- pandas/core/indexes/datetimes.py | 16 ----------- pandas/core/indexes/timedeltas.py | 16 ----------- pandas/tests/series/test_operators.py | 39 +++++++++++++++++++-------- 3 files changed, 28 insertions(+), 43 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 923449497e8cf..208c54a85cde0 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -408,22 +408,6 @@ def _formatter_func(self): # -------------------------------------------------------------------- # Set Operation Methods - def union(self, other): - """ - Specialized union for DatetimeIndex objects. If combine - overlapping ranges with the same DateOffset, will be much - faster than Index.union - - Parameters - ---------- - other : DatetimeIndex or array-like - - Returns - ------- - y : Index or DatetimeIndex - """ - return super(DatetimeIndex, self).union(other) - def _union(self, other): if not len(other) or self.equals(other) or not len(self): return super(DatetimeIndex, self)._union(other) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 04040152b6bb8..886c888cc38d8 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -293,22 +293,6 @@ def astype(self, dtype, copy=True): return Index(result.astype('i8'), name=self.name) return super(TimedeltaIndex, self).astype(dtype, copy=copy) - def union(self, other): - """ - Specialized union for TimedeltaIndex objects. If combine - overlapping ranges with the same DateOffset, will be much - faster than Index.union - - Parameters - ---------- - other : TimedeltaIndex or array-like - - Returns - ------- - y : Index or TimedeltaIndex - """ - return super(TimedeltaIndex, self).union(other) - def _union(self, other): if len(other) == 0 or self.equals(other) or len(self) == 0: return super(TimedeltaIndex, self)._union(other) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 1ea3c11adf10a..23c3b6618c2c0 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -18,7 +18,8 @@ import pandas.core.nanops as nanops import pandas.util.testing as tm from pandas.util.testing import ( - assert_almost_equal, assert_frame_equal, assert_series_equal) + assert_almost_equal, assert_frame_equal, assert_index_equal, + assert_series_equal) from .common import TestData @@ -190,6 +191,25 @@ def test_scalar_na_logical_ops_corners(self): operator.and_, operator.or_, operator.xor, + ]) + def test_logical_ops_with_index(self, op): + # GH#22092, GH#19792 + ser = Series([True, True, False, False]) + idx1 = Index([True, False, True, False]) + idx2 = Index([1, 0, 1, 0]) + + expected = Series([op(ser[n], idx1[n]) for n in range(len(ser))]) + + result = op(ser, idx1) + assert_series_equal(result, expected) + + expected = Series([op(ser[n], idx2[n]) for n in range(len(ser))], + dtype=bool) + + result = op(ser, idx2) + assert_series_equal(result, expected) + + @pytest.mark.parametrize('op', [ pytest.param(ops.rand_, marks=pytest.mark.xfail(reason="GH#22092 Index " "implementation returns " @@ -201,27 +221,24 @@ def test_scalar_na_logical_ops_corners(self): "with non unique index", raises=InvalidIndexError, strict=True)), - # pytest.param(ops.rxor, - # marks=pytest.mark.xfail(reason="GH#22092 Index " - # "implementation raises", - # raises=TypeError, strict=True)) + ops.rxor, ]) - def test_logical_ops_with_index(self, op): + def test_reversed_logical_ops_with_index(self, op): # GH#22092, GH#19792 ser = Series([True, True, False, False]) idx1 = Index([True, False, True, False]) idx2 = Index([1, 0, 1, 0]) - expected = Series([op(ser[n], idx1[n]) for n in range(len(ser))]) + # symmetric_difference is only for rxor, but other 2 should fail + expected = idx1.symmetric_difference(ser) result = op(ser, idx1) - assert_series_equal(result, expected) + assert_index_equal(result, expected) - expected = Series([op(ser[n], idx2[n]) for n in range(len(ser))], - dtype=bool) + expected = idx2.symmetric_difference(ser) result = op(ser, idx2) - assert_series_equal(result, expected) + assert_index_equal(result, expected) @pytest.mark.parametrize("op, expected", [ (ops.rand_, pd.Index([False, True])), From fdf9b719cc50e8fc60c444ab4c14fe0f96be65a7 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Sat, 8 Dec 2018 17:15:25 -0500 Subject: [PATCH 32/53] CLN - make code more efficient and cleanup whatsnew --- doc/source/whatsnew/v0.24.0.rst | 8 +++++--- pandas/core/indexes/base.py | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 3eef2990f3ecb..09293bce4416f 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -736,9 +736,11 @@ Bug in :func:`DataFrame.to_dict` raises ``ValueError`` when used with Incompatible Index Type Unions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -When performing an :func:`Index.union` operation, between objects of incompatible dtypes, the result will be a base :class:`Index` of dtype `object`. -This behavior holds true for unions between :class:`Index` objects that previously would have been prohibited. -The dtype of empty :class:`Index` objects will now be evaluated before performing the union as well, instead of simply returning the other :class:`Index` object. +When performing an :func:`Index.union` operation, between objects of incompatible dtypes, +the result will be a base :class:`Index` of dtype `object`. This behavior holds true for +unions between :class:`Index` objects that previously would have been prohibited. The dtype +of empty :class:`Index` objects will now be evaluated before performing union operations +rather than simply returning the other :class:`Index` object (:issue:`23525`). Previous Behavior: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index de74aea3150ff..175be0b023ba3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2251,10 +2251,10 @@ def _union_incompatible_dtypes(self, other): Casts this and other index to object dtype to allow the formation of a union between incompatible types. """ - this = self.astype(object) + this = self.astype(object, copy=False) # cast to Index for when `other` is list-like - other = Index(other).astype(object) - return Index.union(this, other).astype(object) + other = Index(other).astype(object, copy=False) + return Index.union(this, other).astype(object, copy=False) def _is_compatible_with_other(self, other): """ From 8ed10937e5c18033f05f6f1b2f24cce06dc26b23 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Tue, 1 Jan 2019 17:32:34 -0500 Subject: [PATCH 33/53] DOC - fix ipython code block --- doc/source/whatsnew/v0.24.0.rst | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index e2e1b5c39f92c..9d044455f27ba 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -812,25 +812,19 @@ rather than simply returning the other :class:`Index` object (:issue:`23525`). Previous Behavior: -.. code-block:: ipython +.. ipython:: python - In [1]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) - ... - ValueError: can only call with other PeriodIndex-ed objects In [2]: pd.Index([]).astype(str).union(pd.Index([1, 2, 3])) - Out[2]: Int64Index([1, 2, 3], dtype='int64') Current Behavior: -.. code-block:: ipython +.. ipython:: python In [1]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) - Out[1]: Index([1991-09-05, 1991-09-06, 1, 2, 3], dtype='object') In [2]: pd.Index([]).astype(str).union(pd.Index([1, 2, 3])) - Out[2]: Index([1, 2, 3], dtype='object') .. _whatsnew_0240.api.datetimelike.normalize: From 77ca3a32c2cd52b92b579c36dee1c68e139cd2f6 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Tue, 1 Jan 2019 21:30:09 -0500 Subject: [PATCH 34/53] DOC - fix whatsnew code blocks again --- doc/source/whatsnew/v0.24.0.rst | 14 ++++++++------ pandas/tests/indexes/test_setops.py | 2 -- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 9d044455f27ba..2710c10775e1b 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -810,21 +810,23 @@ of empty :class:`Index` objects will now be evaluated before performing union op rather than simply returning the other :class:`Index` object (:issue:`23525`). -Previous Behavior: +*Previous Behavior*: -.. ipython:: python +.. code-block:: ipython In [1]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) + ... + ValueError: can only call with other PeriodIndex-ed objects In [2]: pd.Index([]).astype(str).union(pd.Index([1, 2, 3])) + Out[2]: Int64Index([1, 2, 3], dtype='int64') -Current Behavior: +*Current Behavior*: .. ipython:: python - In [1]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) - - In [2]: pd.Index([]).astype(str).union(pd.Index([1, 2, 3])) + pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) + pd.Index([]).astype(str).union(pd.Index([1, 2, 3])) .. _whatsnew_0240.api.datetimelike.normalize: diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index a25142878b6a5..308d4b2cf52fd 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -26,8 +26,6 @@ def test_union_same_types(indices): idx2 = indices.sort_values() assert idx1.union(idx2).dtype == idx1.dtype - # Note: catIndex reflects only left dtype, should it reflect both? - @pytest.mark.parametrize( 'idx1,idx2', From 5921038eba97ba20175b07233d15d1b311b9c3f5 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Wed, 2 Jan 2019 22:12:42 -0500 Subject: [PATCH 35/53] CLN - clean up some code, tests and docs --- doc/source/whatsnew/v0.24.0.rst | 3 ++- pandas/core/indexes/base.py | 1 - pandas/tests/indexes/conftest.py | 37 +++++++++++++++-------------- pandas/tests/indexes/test_setops.py | 4 ++-- 4 files changed, 23 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 2710c10775e1b..7b8d3b1c72573 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -807,7 +807,8 @@ When performing an :func:`Index.union` operation, between objects of incompatibl the result will be a base :class:`Index` of dtype `object`. This behavior holds true for unions between :class:`Index` objects that previously would have been prohibited. The dtype of empty :class:`Index` objects will now be evaluated before performing union operations -rather than simply returning the other :class:`Index` object (:issue:`23525`). +rather than simply returning the other :class:`Index` object. :func:`Index.union` can now be +considered commutative, such that A.union(B) == B.union(A) (:issue:`23525`). *Previous Behavior*: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 84ed875a8db8b..456cc65dfdac5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2298,7 +2298,6 @@ def _union(self, other): should be overwritten here rather than in `self.union` """ self._assert_can_do_setop(other) - other = ensure_index(other) if not len(other) or self.equals(other): return self._get_reconciled_name_object(other) diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py index 42280153b971a..fce395a50a312 100644 --- a/pandas/tests/indexes/conftest.py +++ b/pandas/tests/indexes/conftest.py @@ -8,24 +8,25 @@ import pandas.util.testing as tm -# add inteval index? -@pytest.fixture(params=[tm.makeUnicodeIndex(100), - tm.makeStringIndex(100), - tm.makeDateIndex(100), - tm.makePeriodIndex(100), - tm.makeTimedeltaIndex(100), - tm.makeIntIndex(100), - tm.makeUIntIndex(100), - tm.makeRangeIndex(100), - tm.makeFloatIndex(100), - Index([True, False]), - tm.makeCategoricalIndex(100), - tm.makeIntervalIndex(100), - Index([]), - MultiIndex.from_tuples(lzip( - ['foo', 'bar', 'baz'], [1, 2, 3])), - Index([0, 0, 1, 1, 2, 2])], - ids=lambda x: type(x).__name__) +indices_list = [tm.makeUnicodeIndex(100), + tm.makeStringIndex(100), + tm.makeDateIndex(100), + tm.makePeriodIndex(100), + tm.makeTimedeltaIndex(100), + tm.makeIntIndex(100), + tm.makeUIntIndex(100), + tm.makeRangeIndex(100), + tm.makeFloatIndex(100), + Index([True, False]), + tm.makeCategoricalIndex(100), + tm.makeIntervalIndex(100), + Index([]), + MultiIndex.from_tuples(lzip( + ['foo', 'bar', 'baz'], [1, 2, 3])), + Index([0, 0, 1, 1, 2, 2])] + + +@pytest.fixture(params=indices_list, ids=lambda x: type(x).__name__) def indices(request): return request.param diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 308d4b2cf52fd..359addf33498c 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -11,7 +11,7 @@ import pandas as pd from pandas import Int64Index, RangeIndex -from pandas.tests.indexes.conftest import indices +from pandas.tests.indexes.conftest import indices_list import pandas.util.testing as tm COMPATIBLE_INCONSISTENT_PAIRS = { @@ -29,7 +29,7 @@ def test_union_same_types(indices): @pytest.mark.parametrize( 'idx1,idx2', - list(it.combinations(indices._pytestfixturefunction.params, 2)) + list(it.combinations(indices_list, 2)) ) def test_union_different_types(idx1, idx2): # GH 23525 From 3b94e3b4c7d94f07bf75fd71da4e3075de0e8c53 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Tue, 8 Jan 2019 21:10:13 -0500 Subject: [PATCH 36/53] CLN - reorganize some code and add TODOs --- pandas/core/indexes/base.py | 1 + pandas/core/indexes/interval.py | 25 ++++++------------- .../tests/indexes/interval/test_interval.py | 1 + 3 files changed, 9 insertions(+), 18 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 456cc65dfdac5..539ce1d85b24f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2350,6 +2350,7 @@ def _union(self, other): def _wrap_setop_result(self, other, result): return self._constructor(result, name=get_op_result_name(self, other)) + # TODO: standardize return type of non-union setops type(self vs other) def intersection(self, other): """ Form the intersection of two Index objects. diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index feccdf7190610..06c1bb79d222b 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -964,19 +964,6 @@ def insert(self, loc, item): new_right = self.right.insert(loc, right_insert) return self._shallow_copy(new_left, new_right) - def _as_like_interval_index(self, other): - self._assert_can_do_setop(other) - other = ensure_index(other) - if not isinstance(other, IntervalIndex): - msg = ('the other index needs to be an IntervalIndex too, but ' - 'was type {}').format(other.__class__.__name__) - raise TypeError(msg) - elif self.closed != other.closed: - msg = ('can only do set operations between two IntervalIndex ' - 'objects that are closed on the same side') - raise ValueError(msg) - return other - def _concat_same_dtype(self, to_concat, name): """ assert that we all have the same .closed @@ -1092,15 +1079,17 @@ def overlaps(self, other): def _setop(op_name): def func(self, other, sort=True): - try: - other = self._as_like_interval_index(other) - # dont catch ValueError so that mixed closed interval indexes raise - # only catch Non-Interval index mismatches. - except TypeError: + self._assert_can_do_setop(other) + other = ensure_index(other) + if not isinstance(other, IntervalIndex): result = getattr(self.astype(object), op_name)(other) if op_name in ('difference',): result = result.astype(self.dtype) return result + elif self.closed != other.closed: + msg = ('can only do set operations between two IntervalIndex ' + 'objects that are closed on the same side') + raise ValueError(msg) # GH 19016: ensure set op will not return a prohibited dtype subtypes = [self.dtype.subtype, other.dtype.subtype] diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 3b9d187ad8aee..3b1551d17b272 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -877,6 +877,7 @@ def test_set_incompatible_types(self, closed, op_name): index = self.create_index(closed=closed) set_op = getattr(index, op_name) + # TODO: standardize return type of non-union setops type(self vs other) # non-IntervalIndex if op_name == 'difference': expected = index From fd4510ef5e088ce05b9e5225d79f67b324b43be8 Mon Sep 17 00:00:00 2001 From: ArtinSarraf Date: Sun, 13 Jan 2019 23:09:02 -0500 Subject: [PATCH 37/53] CLN - remove trailing whitespace --- doc/source/whatsnew/v0.24.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 7b8d3b1c72573..48a03593196ea 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -808,7 +808,7 @@ the result will be a base :class:`Index` of dtype `object`. This behavior holds unions between :class:`Index` objects that previously would have been prohibited. The dtype of empty :class:`Index` objects will now be evaluated before performing union operations rather than simply returning the other :class:`Index` object. :func:`Index.union` can now be -considered commutative, such that A.union(B) == B.union(A) (:issue:`23525`). +considered commutative, such that A.union(B) == B.union(A) (:issue:`23525`). *Previous Behavior*: From 5de3d57d4ca13d7e68abb00af11a2b99aa5b9382 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Mon, 14 Jan 2019 20:28:02 -0500 Subject: [PATCH 38/53] CLN - fix import order --- pandas/tests/indexes/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py index fce395a50a312..acd2497ceda75 100644 --- a/pandas/tests/indexes/conftest.py +++ b/pandas/tests/indexes/conftest.py @@ -7,7 +7,6 @@ from pandas.core.indexes.api import Index, MultiIndex import pandas.util.testing as tm - indices_list = [tm.makeUnicodeIndex(100), tm.makeStringIndex(100), tm.makeDateIndex(100), From 6d82621493c811469a873c3b28373e2e3decf17c Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Wed, 16 Jan 2019 19:45:14 -0500 Subject: [PATCH 39/53] CLN - code cleanup, remove unneccesary operations --- doc/source/whatsnew/v0.24.0.rst | 2 +- pandas/core/indexes/base.py | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 49c863b925710..10a5a444d0f77 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -852,7 +852,7 @@ the result will be a base :class:`Index` of dtype `object`. This behavior holds unions between :class:`Index` objects that previously would have been prohibited. The dtype of empty :class:`Index` objects will now be evaluated before performing union operations rather than simply returning the other :class:`Index` object. :func:`Index.union` can now be -considered commutative, such that A.union(B) == B.union(A) (:issue:`23525`). +considered commutative, such that ``A.union(B) == B.union(A)`` (:issue:`23525`). *Previous Behavior*: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 23f5d4146d75c..988b350fa0c01 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2264,7 +2264,7 @@ def union(self, other): Form the union of two Index objects and sorts if possible. If the Index objects are incompatible, both Index objects will be - cast to dtype('O') first. + cast to dtype('object') first. Parameters ---------- @@ -2292,9 +2292,6 @@ def union(self, other): if not self._is_compatible_with_other(other): return self._union_incompatible_dtypes(other) - # This line needs to be after _union_incompatible_dtypes to ensure - # the original type of other is not lost after being cast to Index - other = ensure_index(other) return self._union(other) def _union(self, other): @@ -2302,7 +2299,6 @@ def _union(self, other): Specific union logic should go here. In subclasses union behavior should be overwritten here rather than in `self.union` """ - self._assert_can_do_setop(other) if not len(other) or self.equals(other): return self._get_reconciled_name_object(other) From 5a87715d89577cc96145e7edf4d87495918c6e71 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Mon, 21 Jan 2019 14:03:04 -0500 Subject: [PATCH 40/53] CLN - apply error messages to both statements --- pandas/tests/reshape/test_concat.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index b3ba07fff330f..272209bce9e57 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -961,22 +961,23 @@ def test_append_different_columns_types_raises( df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_can_append) ser = pd.Series([7, 8, 9], index=index_cannot_append_with_other, name=2) - msg = ("the other index needs to be an IntervalIndex too, but was" + msg = (r"unorderable types: (Interval|int)\(\) > " + r"(int|long|float|str)\(\)|" + r"Expected tuple, got (int|long|float|str)|" + r"Cannot compare type 'Timestamp' with type '(int|long)'|" + r"'>' not supported between instances of 'int' and 'str'|" + r"'<' not supported between instances of 'int' and 'str'|" + r"the other index needs to be an IntervalIndex too, but was" r" type {}|" r"object of type '(int|long|float|Timestamp)' has no len\(\)|" "Expected tuple, got str") - with pytest.raises(TypeError, match=msg.format( - index_can_append.__class__.__name__)): + with pytest.raises(TypeError, match=msg): df.append(ser) df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_cannot_append_with_other) ser = pd.Series([7, 8, 9], index=index_can_append, name=2) - msg = (r"unorderable types: (Interval|int)\(\) > " - r"(int|long|float|str)\(\)|" - r"Expected tuple, got (int|long|float|str)|" - r"Cannot compare type 'Timestamp' with type '(int|long)'|" - r"'>' not supported between instances of 'int' and 'str'") + with pytest.raises(TypeError, match=msg): df.append(ser) From a4f9e78815d801951ef5a3eab0896120bf2ae0d5 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Tue, 22 Jan 2019 19:42:46 -0500 Subject: [PATCH 41/53] TST - add regex queries --- pandas/tests/reshape/test_concat.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 272209bce9e57..103ea3b687698 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -961,12 +961,11 @@ def test_append_different_columns_types_raises( df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_can_append) ser = pd.Series([7, 8, 9], index=index_cannot_append_with_other, name=2) - msg = (r"unorderable types: (Interval|int)\(\) > " + msg = (r"unorderable types: (Interval|int)\(\) (<|>) " r"(int|long|float|str)\(\)|" r"Expected tuple, got (int|long|float|str)|" r"Cannot compare type 'Timestamp' with type '(int|long)'|" - r"'>' not supported between instances of 'int' and 'str'|" - r"'<' not supported between instances of 'int' and 'str'|" + r"'(<|>)' not supported between instances of 'int' and 'str'|" r"the other index needs to be an IntervalIndex too, but was" r" type {}|" r"object of type '(int|long|float|Timestamp)' has no len\(\)|" From 0bcbdf45b1f95d1d4538238a6681f6f2fe412040 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Sun, 10 Feb 2019 23:14:28 -0500 Subject: [PATCH 42/53] BUG - fix default sort arg --- pandas/core/indexes/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d8db4593f0b21..b0a93de8aa019 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2314,6 +2314,7 @@ def union(self, other, sort=None): >>> idx1.union(idx2) Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object') """ + print('HIHIHIHI') self._validate_sort_keyword(sort) self._assert_can_do_setop(other) @@ -2381,7 +2382,7 @@ def _wrap_setop_result(self, other, result): return self._constructor(result, name=get_op_result_name(self, other)) # TODO: standardize return type of non-union setops type(self vs other) - def intersection(self, other, sort=True): + def intersection(self, other, sort=False): """ Form the intersection of two Index objects. From c410625ea79f0a654957daa7fbbd99f5341e2026 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Sun, 10 Feb 2019 23:44:27 -0500 Subject: [PATCH 43/53] BUG - remove print --- pandas/core/indexes/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b0a93de8aa019..6846e75258f21 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2314,7 +2314,6 @@ def union(self, other, sort=None): >>> idx1.union(idx2) Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object') """ - print('HIHIHIHI') self._validate_sort_keyword(sort) self._assert_can_do_setop(other) From 6bb054fef835bc9f8660f700a7d572c80064bff4 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Mon, 11 Feb 2019 22:35:16 -0500 Subject: [PATCH 44/53] TST/DOC - move to new whatsnew and use local fixture for tests --- doc/source/whatsnew/v0.24.0.rst | 31 --------------------- doc/source/whatsnew/v0.25.0.rst | 31 +++++++++++++++++++++ pandas/core/indexes/base.py | 42 +++++++++++++++++++++++++++-- pandas/tests/indexes/test_setops.py | 25 +++++++++-------- 4 files changed, 85 insertions(+), 44 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 98909471bbe50..a49ea2cf493a6 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -859,37 +859,6 @@ Bug in :func:`DataFrame.to_dict` raises ``ValueError`` when used with df.to_dict(orient='index') -.. _whatsnew_0240.api_breaking.incompatible_index_unions - -Incompatible Index Type Unions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When performing an :func:`Index.union` operation, between objects of incompatible dtypes, -the result will be a base :class:`Index` of dtype `object`. This behavior holds true for -unions between :class:`Index` objects that previously would have been prohibited. The dtype -of empty :class:`Index` objects will now be evaluated before performing union operations -rather than simply returning the other :class:`Index` object. :func:`Index.union` can now be -considered commutative, such that ``A.union(B) == B.union(A)`` (:issue:`23525`). - - -*Previous Behavior*: - -.. code-block:: ipython - - In [1]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) - ... - ValueError: can only call with other PeriodIndex-ed objects - - In [2]: pd.Index([]).astype(str).union(pd.Index([1, 2, 3])) - Out[2]: Int64Index([1, 2, 3], dtype='int64') - -*Current Behavior*: - -.. ipython:: python - - pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) - pd.Index([]).astype(str).union(pd.Index([1, 2, 3])) - .. _whatsnew_0240.api.datetimelike.normalize: Tick DateOffset Normalize Restrictions diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 4032dc20b2e19..6260fcb72ead4 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -28,6 +28,37 @@ Other Enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _whatsnew_0240.api_breaking.incompatible_index_unions + +Incompatible Index Type Unions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When performing an :func:`Index.union` operation, between objects of incompatible dtypes, +the result will be a base :class:`Index` of dtype `object`. This behavior holds true for +unions between :class:`Index` objects that previously would have been prohibited. The dtype +of empty :class:`Index` objects will now be evaluated before performing union operations +rather than simply returning the other :class:`Index` object. :func:`Index.union` can now be +considered commutative, such that ``A.union(B) == B.union(A)`` (:issue:`23525`). + + +*Previous Behavior*: + +.. code-block:: ipython + + In [1]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) + ... + ValueError: can only call with other PeriodIndex-ed objects + + In [2]: pd.Index([]).astype(str).union(pd.Index([1, 2, 3])) + Out[2]: Int64Index([1, 2, 3], dtype='int64') + +*Current Behavior*: + +.. ipython:: python + + pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) + pd.Index([]).astype(str).union(pd.Index([1, 2, 3])) + .. _whatsnew_0250.api.other: Other API Changes diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6846e75258f21..74a582e83e054 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2248,6 +2248,20 @@ def _union_incompatible_dtypes(self, other, sort): """ Casts this and other index to object dtype to allow the formation of a union between incompatible types. + + Parameters + ---------- + other : Index or array-like + sort : False or None, default False + Whether to sort the resulting index. + + * False : do not sort the result. + * None : sort the result, except when `self` and `other` are equal + or when the values cannot be compared. + + Returns + ------- + Index """ this = self.astype(object, copy=False) # cast to Index for when `other` is list-like @@ -2259,6 +2273,14 @@ def _is_compatible_with_other(self, other): Check whether this and the other dtype are compatible with each other. Meaning a union can be formed between them without needing to be cast to dtype object. + + Parameters + ---------- + other : Index or array-like + + Returns + ------- + bool """ return (type(self) is type(other) and is_dtype_equal(self.dtype, other.dtype)) @@ -2275,6 +2297,8 @@ def union(self, other, sort=None): If the Index objects are incompatible, both Index objects will be cast to dtype('object') first. + .. versionchanged :: 0.25.0 + Parameters ---------- other : Index or array-like @@ -2324,8 +2348,22 @@ def union(self, other, sort=None): def _union(self, other, sort): """ - Specific union logic should go here. In subclasses union behavior - should be overwritten here rather than in `self.union` + Specific union logic should go here. In subclasses, union behavior + should be overwritten here rather than in `self.union`. + + Parameters + ---------- + other : Index or array-like + sort : False or None, default False + Whether to sort the resulting index. + + * False : do not sort the result. + * None : sort the result, except when `self` and `other` are equal + or when the values cannot be compared. + + Returns + ------- + Index """ if not len(other) or self.equals(other): diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 359addf33498c..cbf31267b3db0 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -19,6 +19,12 @@ } +@pytest.fixture(params=list(it.combinations(indices_list, 2)), + ids=lambda x: type(x[0]).__name__ + type(x[1]).__name__) +def index_pair(request): + return request.param + + def test_union_same_types(indices): # Union with a non-unique, non-monotonic index raises error # Only needed for bool index factory @@ -27,21 +33,18 @@ def test_union_same_types(indices): assert idx1.union(idx2).dtype == idx1.dtype -@pytest.mark.parametrize( - 'idx1,idx2', - list(it.combinations(indices_list, 2)) -) -def test_union_different_types(idx1, idx2): +def test_union_different_types(index_pair): # GH 23525 - pair = tuple(sorted([type(idx1), type(idx2)], key=lambda x: str(x))) - if pair in COMPATIBLE_INCONSISTENT_PAIRS: - return + idx1, idx2 = index_pair + type_pair = tuple(sorted([type(idx1), type(idx2)], key=lambda x: str(x))) + if type_pair in COMPATIBLE_INCONSISTENT_PAIRS: + pytest.xfail('This test only considers non compatible indexes.') - if any(isinstance(idx, pd.MultiIndex) for idx in [idx1, idx2]): - return + if any(isinstance(idx, pd.MultiIndex) for idx in index_pair): + pytest.xfail('This test doesn\'t consider multiindixes.') if is_dtype_equal(idx1.dtype, idx2.dtype): - return + pytest.xfail('This test only considers non matching dtypes.') # A union with a CategoricalIndex (even as dtype('O')) and a # non-CategoricalIndex can only be made if both indices are monotonic. From aea731c63937ee8c2f7aa78b2920a11965e635df Mon Sep 17 00:00:00 2001 From: ArtinSarraf Date: Wed, 13 Feb 2019 18:54:00 -0500 Subject: [PATCH 45/53] DOC - minor update to get tests to rerun --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 6260fcb72ead4..12ee2fc5dceee 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -33,7 +33,7 @@ Backwards incompatible API changes Incompatible Index Type Unions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -When performing an :func:`Index.union` operation, between objects of incompatible dtypes, +When performing :func:`Index.union` operations between objects of incompatible dtypes, the result will be a base :class:`Index` of dtype `object`. This behavior holds true for unions between :class:`Index` objects that previously would have been prohibited. The dtype of empty :class:`Index` objects will now be evaluated before performing union operations From 32037b598d15255551cdc50611d5d1c90de9a37d Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Fri, 1 Mar 2019 19:22:02 -0500 Subject: [PATCH 46/53] DOC - fix docstrings and whatsnew --- doc/source/whatsnew/v0.25.0.rst | 8 ++++---- pandas/core/indexes/base.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 75ee7513f3b12..6b255faf907e1 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -64,13 +64,13 @@ is respected in indexing. (:issue:`24076`, :issue:`16785`) df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific')) df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00'] -.. _whatsnew_0240.api_breaking.incompatible_index_unions +.. _whatsnew_0250.api_breaking.incompatible_index_unions Incompatible Index Type Unions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ When performing :func:`Index.union` operations between objects of incompatible dtypes, -the result will be a base :class:`Index` of dtype `object`. This behavior holds true for +the result will be a base :class:`Index` of dtype ``object``. This behavior holds true for unions between :class:`Index` objects that previously would have been prohibited. The dtype of empty :class:`Index` objects will now be evaluated before performing union operations rather than simply returning the other :class:`Index` object. :func:`Index.union` can now be @@ -82,7 +82,7 @@ considered commutative, such that ``A.union(B) == B.union(A)`` (:issue:`23525`). ... ValueError: can only call with other PeriodIndex-ed objects - In [2]: pd.Index([]).astype(str).union(pd.Index([1, 2, 3])) + In [2]: pd.Index([], dtype=object).union(pd.Index([1, 2, 3])) Out[2]: Int64Index([1, 2, 3], dtype='int64') *New Behavior*: @@ -90,7 +90,7 @@ considered commutative, such that ``A.union(B) == B.union(A)`` (:issue:`23525`). .. ipython:: python pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) - pd.Index([]).astype(str).union(pd.Index([1, 2, 3])) + pd.Index([], dtype=object).union(pd.Index([1, 2, 3])) .. _whatsnew_0250.api_breaking.deps: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d6fe0f63ed025..295b40dbf826f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2299,7 +2299,7 @@ def union(self, other, sort=None): If the Index objects are incompatible, both Index objects will be cast to dtype('object') first. - .. versionchanged :: 0.25.0 + .. versionchanged:: 0.25.0 Parameters ---------- From 1d12bc9d71182810d1cc317d68c5a366612771a1 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Mon, 11 Mar 2019 19:28:54 -0400 Subject: [PATCH 47/53] DOC - update docstring --- pandas/core/indexes/base.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3e60168b8bf39..f494048bf293b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2330,11 +2330,15 @@ def union(self, other, sort=None): Examples -------- + Union matching dtypes + >>> idx1 = pd.Index([1, 2, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.union(idx2) Int64Index([1, 2, 3, 4, 5, 6], dtype='int64') + Union mismatched dtypes + >>> idx1 = pd.Index(['a', 'b', 'c', 'd']) >>> idx2 = pd.Index([1, 2, 3, 4]) >>> idx1.union(idx2) From 92f6707a7eab556e4dc91b2c73cb60d8b2e68210 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Mon, 11 Mar 2019 20:48:39 -0400 Subject: [PATCH 48/53] TST - use tm.assert_index_equal --- pandas/tests/indexes/datetimes/test_setops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 1cd0ac3f6ca65..50055013f318f 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -35,8 +35,9 @@ def test_union2(self, sort): ).astype('O') cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: + print('hi') result = first.union(case, sort=sort) - assert tm.equalContents(result, expected) + tm.assert_index_equal(result, expected) @pytest.mark.parametrize("tz", tz) @pytest.mark.parametrize("sort", [None, False]) From 38d9f7414dab4c576f29fefcfcd539c72c7d3683 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Wed, 20 Mar 2019 21:32:49 -0400 Subject: [PATCH 49/53] TST - parametrize union tests --- pandas/tests/indexes/datetimes/test_setops.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 50055013f318f..d40c06ff1dc32 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -29,15 +29,20 @@ def test_union2(self, sort): union = first.union(second, sort=sort) tm.assert_index_equal(union, everything) + @pytest.mark.parametrize("klass", [np.array, Series, list]) + @pytest.mark.parametrize("sort", [None, False]) + def test_union3(self, sort, klass): + everything = tm.makeDateIndex(10) + first = everything[:5] + second = everything[5:] + # GH 10149 expected = first.astype('O').union( pd.Index(second.values, dtype='O') ).astype('O') - cases = [klass(second.values) for klass in [np.array, Series, list]] - for case in cases: - print('hi') - result = first.union(case, sort=sort) - tm.assert_index_equal(result, expected) + case = klass(second.values) + result = first.union(case, sort=sort) + tm.assert_index_equal(result, expected) @pytest.mark.parametrize("tz", tz) @pytest.mark.parametrize("sort", [None, False]) From 69aaa930521fd3f2484eab56c46c0ac61680b1ce Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Thu, 21 Mar 2019 19:52:25 -0400 Subject: [PATCH 50/53] DOC - add docstring --- pandas/tests/indexes/test_setops.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index cbf31267b3db0..7cd006cc14396 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -22,6 +22,9 @@ @pytest.fixture(params=list(it.combinations(indices_list, 2)), ids=lambda x: type(x[0]).__name__ + type(x[1]).__name__) def index_pair(request): + ''' + Create all combinations of 2 index types. + ''' return request.param From 54898c1a981e0e46ca3a87d24ab1ac9352457d74 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Tue, 14 May 2019 23:41:30 -0400 Subject: [PATCH 51/53] CLN/TST - fix super method calls and add error msg --- pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/numeric.py | 2 +- pandas/core/indexes/range.py | 2 +- pandas/tests/reshape/test_concat.py | 3 ++- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 4e97b77913014..c43508113402a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -453,7 +453,7 @@ def _formatter_func(self): def _union(self, other, sort): if not len(other) or self.equals(other) or not len(self): - return super(DatetimeIndex, self)._union(other, sort=sort) + return super()._union(other, sort=sort) if len(other) == 0 or self.equals(other) or len(self) == 0: return super().union(other, sort=sort) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index b0e31849b3ad2..b6c8ba588f9d6 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -224,7 +224,7 @@ def _assert_safe_casting(cls, data, subarr): def _is_compatible_with_other(self, other): return ( - super(Int64Index, self)._is_compatible_with_other(other) + super()._is_compatible_with_other(other) or all(isinstance(type(obj), (ABCInt64Index, ABCRangeIndex)) for obj in [self, other]) ) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 3b5ef6fa708ab..ea14a4c789cd3 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -491,7 +491,7 @@ def _union(self, other, sort): union : Index """ if not len(other) or self.equals(other) or not len(self): - return super(RangeIndex, self)._union(other, sort=sort) + return super()._union(other, sort=sort) if isinstance(other, RangeIndex) and sort is None: start_s, step_s = self._start, self._step diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index e4bf56dfb4d68..fe77b7e545697 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -964,7 +964,8 @@ def test_append_different_columns_types_raises( r"(int|long|float|str)\(\)|" r"Expected tuple, got (int|long|float|str)|" r"Cannot compare type 'Timestamp' with type '(int|long)'|" - r"'(<|>)' not supported between instances of 'int' and 'str'|" + r"'(<|>)' not supported between instances of 'int' " + r"and '(str|Timestamp)'|" r"the other index needs to be an IntervalIndex too, but was" r" type {}|" r"object of type '(int|float|Timestamp)' has no len\(\)|" From fa839a98bc365ed2663b6869aca3348d40bd67b1 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Wed, 15 May 2019 19:37:33 -0400 Subject: [PATCH 52/53] TST - add Timestamp to regexand fix import sorting --- pandas/tests/indexes/conftest.py | 1 - pandas/tests/reshape/test_concat.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py index 902e3449ff515..83f1f22b158b1 100644 --- a/pandas/tests/indexes/conftest.py +++ b/pandas/tests/indexes/conftest.py @@ -5,7 +5,6 @@ from pandas.core.indexes.api import Index, MultiIndex import pandas.util.testing as tm - indices_list = [tm.makeUnicodeIndex(100), tm.makeStringIndex(100), tm.makeDateIndex(100), diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index fe77b7e545697..ecd62380d8c65 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -961,7 +961,7 @@ def test_append_different_columns_types_raises( ser = pd.Series([7, 8, 9], index=index_cannot_append_with_other, name=2) msg = (r"unorderable types: (Interval|int)\(\) (<|>) " - r"(int|long|float|str)\(\)|" + r"(int|long|float|str|Timestamp)\(\)|" r"Expected tuple, got (int|long|float|str)|" r"Cannot compare type 'Timestamp' with type '(int|long)'|" r"'(<|>)' not supported between instances of 'int' " From a36f475c44a07c36cf94a6fe9dcb0043bc885114 Mon Sep 17 00:00:00 2001 From: Artin Sarraf Date: Wed, 15 May 2019 20:18:17 -0400 Subject: [PATCH 53/53] CLN - minor style updates --- pandas/tests/indexes/datetimes/test_setops.py | 6 +++--- pandas/tests/indexes/test_setops.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index af592433593d7..fd666f3d56c9d 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -29,9 +29,9 @@ def test_union2(self, sort): union = first.union(second, sort=sort) tm.assert_index_equal(union, everything) - @pytest.mark.parametrize("klass", [np.array, Series, list]) + @pytest.mark.parametrize("box", [np.array, Series, list]) @pytest.mark.parametrize("sort", [None, False]) - def test_union3(self, sort, klass): + def test_union3(self, sort, box): everything = tm.makeDateIndex(10) first = everything[:5] second = everything[5:] @@ -40,7 +40,7 @@ def test_union3(self, sort, klass): expected = first.astype('O').union( pd.Index(second.values, dtype='O') ).astype('O') - case = klass(second.values) + case = box(second.values) result = first.union(case, sort=sort) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 7cd006cc14396..b626ced2ccb1b 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -22,9 +22,9 @@ @pytest.fixture(params=list(it.combinations(indices_list, 2)), ids=lambda x: type(x[0]).__name__ + type(x[1]).__name__) def index_pair(request): - ''' + """ Create all combinations of 2 index types. - ''' + """ return request.param