From 89dc29d7df766f76ae1ac309f310b3eb0f6df594 Mon Sep 17 00:00:00 2001 From: Chang She Date: Wed, 16 May 2012 18:03:31 -0400 Subject: [PATCH 1/2] ENH: flex comparison operators on DataFrame #652 --- pandas/core/frame.py | 69 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2ebdcac82138b..7bc19e8f5b61e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -222,6 +222,57 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): return f +def flex_comp_method(op, name, default_axis='columns'): + + @Appender('Wrapper for flexible comparison methods %s' % name) + def f(self, other, axis=default_axis, level=None): + if isinstance(other, DataFrame): # Another DataFrame + return self._flex_compare_frame(other, op, level) + + elif isinstance(other, Series): + try: + return self._combine_series(other, op, None, axis, level) + except Exception: + return self._combine_series_infer(other, op) + + elif isinstance(other, (list, tuple)): + if axis is not None and self._get_axis_name(axis) == 'index': + casted = Series(other, index=self.index) + else: + casted = Series(other, index=self.columns) + + try: + return self._combine_series(casted, op, None, axis, level) + except Exception: + return self._combine_series_infer(casted, op) + + elif isinstance(other, np.ndarray): + if other.ndim == 1: + if axis is not None and self._get_axis_name(axis) == 'index': + casted = Series(other, index=self.index) + else: + casted = Series(other, index=self.columns) + + try: + return self._combine_series(casted, op, None, axis, level) + except Exception: + return self._combine_series_infer(casted, op) + + elif other.ndim == 2: + casted = DataFrame(other, index=self.index, + columns=self.columns) + return self._flex_compare_frame(casted, op, level) + + else: # pragma: no cover + raise ValueError("Bad argument shape") + + else: + return self._combine_const(other, op) + + f.__name__ = name + + return f + def comp_method(func, name): @Appender('Wrapper for comparison method %s' % name) @@ -622,6 +673,13 @@ def __neg__(self): __le__ = comp_method(operator.le, '__le__') __ge__ = comp_method(operator.ge, '__ge__') + eq = flex_comp_method(operator.eq, 'eq') + ne = flex_comp_method(operator.ne, 'ne') + gt = flex_comp_method(operator.gt, 'gt') + lt = flex_comp_method(operator.lt, 'lt') + ge = flex_comp_method(operator.ge, 'ge') + le = flex_comp_method(operator.le, 'le') + def dot(self, other): """ Matrix multiplication with DataFrame objects. Does no data alignment @@ -2911,6 +2969,17 @@ def _compare_frame(self, other, func): return self._constructor(data=new_data, index=self.index, columns=self.columns, copy=False) + def _flex_compare_frame(self, other, func, level): + if not self._indexed_same(other): + self, other = self.align(other, 'outer', level=level) + + new_data = {} + for col in self.columns: + new_data[col] = func(self[col], other[col]) + + return self._constructor(data=new_data, index=self.index, + columns=self.columns, copy=False) + def combine(self, other, func, fill_value=None): """ Add two DataFrame objects and do not propagate NaN values, so if for a From f89977eaef76d2290585caea63f7332963e5c118 Mon Sep 17 00:00:00 2001 From: Chang She Date: Mon, 21 May 2012 21:06:27 -0400 Subject: [PATCH 2/2] TST: tests for flex compare #652 --- pandas/core/frame.py | 71 ++++++++++++++++-------- pandas/tests/test_frame.py | 111 +++++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+), 24 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7bc19e8f5b61e..cbe0ba382d197 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -222,18 +222,40 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): return f -def flex_comp_method(op, name, default_axis='columns'): +def _flex_comp_method(op, name, default_axis='columns'): + + def na_op(x, y): + try: + result = op(x, y) + except TypeError: + xrav = x.ravel() + result = np.empty(x.size, dtype=x.dtype) + if isinstance(y, np.ndarray): + yrav = y.ravel() + mask = notnull(xrav) & notnull(yrav) + result[mask] = op(xrav[mask], yrav[mask]) + else: + mask = notnull(xrav) + result[mask] = op(xrav[mask], y) + + if op == operator.ne: + np.putmask(result, -mask, False) + else: + np.putmask(result, -mask, False) + result = result.reshape(x.shape) + + return result @Appender('Wrapper for flexible comparison methods %s' % name) def f(self, other, axis=default_axis, level=None): if isinstance(other, DataFrame): # Another DataFrame - return self._flex_compare_frame(other, op, level) + return self._flex_compare_frame(other, na_op, level) elif isinstance(other, Series): try: - return self._combine_series(other, op, None, axis, level) + return self._combine_series(other, na_op, None, axis, level) except Exception: - return self._combine_series_infer(other, op) + return self._combine_series_infer(other, na_op) elif isinstance(other, (list, tuple)): if axis is not None and self._get_axis_name(axis) == 'index': @@ -242,9 +264,9 @@ def f(self, other, axis=default_axis, level=None): casted = Series(other, index=self.columns) try: - return self._combine_series(casted, op, None, axis, level) + return self._combine_series(casted, na_op, None, axis, level) except Exception: - return self._combine_series_infer(casted, op) + return self._combine_series_infer(casted, na_op) elif isinstance(other, np.ndarray): if other.ndim == 1: @@ -254,27 +276,28 @@ def f(self, other, axis=default_axis, level=None): casted = Series(other, index=self.columns) try: - return self._combine_series(casted, op, None, axis, level) + return self._combine_series(casted, na_op, None, axis, + level) except Exception: - return self._combine_series_infer(casted, op) + return self._combine_series_infer(casted, na_op) elif other.ndim == 2: casted = DataFrame(other, index=self.index, columns=self.columns) - return self._flex_compare_frame(casted, op, level) + return self._flex_compare_frame(casted, na_op, level) else: # pragma: no cover raise ValueError("Bad argument shape") else: - return self._combine_const(other, op) + return self._combine_const(other, na_op) f.__name__ = name return f -def comp_method(func, name): +def _comp_method(func, name): @Appender('Wrapper for comparison method %s' % name) def f(self, other): if isinstance(other, DataFrame): # Another DataFrame @@ -666,19 +689,19 @@ def __neg__(self): return self._wrap_array(arr, self.axes, copy=False) # Comparison methods - __eq__ = comp_method(operator.eq, '__eq__') - __ne__ = comp_method(operator.ne, '__ne__') - __lt__ = comp_method(operator.lt, '__lt__') - __gt__ = comp_method(operator.gt, '__gt__') - __le__ = comp_method(operator.le, '__le__') - __ge__ = comp_method(operator.ge, '__ge__') - - eq = flex_comp_method(operator.eq, 'eq') - ne = flex_comp_method(operator.ne, 'ne') - gt = flex_comp_method(operator.gt, 'gt') - lt = flex_comp_method(operator.lt, 'lt') - ge = flex_comp_method(operator.ge, 'ge') - le = flex_comp_method(operator.le, 'le') + __eq__ = _comp_method(operator.eq, '__eq__') + __ne__ = _comp_method(operator.ne, '__ne__') + __lt__ = _comp_method(operator.lt, '__lt__') + __gt__ = _comp_method(operator.gt, '__gt__') + __le__ = _comp_method(operator.le, '__le__') + __ge__ = _comp_method(operator.ge, '__ge__') + + eq = _flex_comp_method(operator.eq, 'eq') + ne = _flex_comp_method(operator.ne, 'ne') + gt = _flex_comp_method(operator.gt, 'gt') + lt = _flex_comp_method(operator.lt, 'lt') + ge = _flex_comp_method(operator.ge, 'ge') + le = _flex_comp_method(operator.le, 'le') def dot(self, other): """ diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 0b353ea8eba73..6a144c264650a 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2447,6 +2447,117 @@ def test_arith_flex_frame(self): result = self.frame[:0].add(self.frame) assert_frame_equal(result, self.frame * np.nan) + def test_bool_flex_frame(self): + data = np.random.randn(5, 3) + other_data = np.random.randn(5, 3) + df = DataFrame(data) + other = DataFrame(other_data) + + # No NAs + + # DataFrame + self.assert_(df.eq(df).values.all()) + self.assert_(not df.ne(df).values.any()) + + assert_frame_equal((df == other), df.eq(other)) + assert_frame_equal((df != other), df.ne(other)) + assert_frame_equal((df > other), df.gt(other)) + assert_frame_equal((df < other), df.lt(other)) + assert_frame_equal((df >= other), df.ge(other)) + assert_frame_equal((df <= other), df.le(other)) + + # Unaligned + def _check_unaligned_frame(meth, op, df, other, default=False): + part_o = other.ix[3:, 1:].copy() + rs = meth(df, part_o) + xp = op(df, part_o.reindex(index=df.index, columns=df.columns)) + assert_frame_equal(rs, xp) + + _check_unaligned_frame(DataFrame.eq, operator.eq, df, other) + _check_unaligned_frame(DataFrame.ne, operator.ne, df, other, + default=True) + _check_unaligned_frame(DataFrame.gt, operator.gt, df, other) + _check_unaligned_frame(DataFrame.lt, operator.lt, df, other) + _check_unaligned_frame(DataFrame.ge, operator.ge, df, other) + _check_unaligned_frame(DataFrame.le, operator.le, df, other) + + # Series + def _test_seq(df, idx_ser, col_ser): + idx_eq = df.eq(idx_ser, axis=0) + col_eq = df.eq(col_ser) + idx_ne = df.ne(idx_ser, axis=0) + col_ne = df.ne(col_ser) + assert_frame_equal(col_eq, df == Series(col_ser)) + assert_frame_equal(col_eq, -col_ne) + assert_frame_equal(idx_eq, -idx_ne) + assert_frame_equal(idx_eq, df.T.eq(idx_ser).T) + + idx_gt = df.gt(idx_ser, axis=0) + col_gt = df.gt(col_ser) + idx_le = df.le(idx_ser, axis=0) + col_le = df.le(col_ser) + + assert_frame_equal(col_gt, df > Series(col_ser)) + assert_frame_equal(col_gt, -col_le) + assert_frame_equal(idx_gt, -idx_le) + assert_frame_equal(idx_gt, df.T.gt(idx_ser).T) + + idx_ge = df.ge(idx_ser, axis=0) + col_ge = df.ge(col_ser) + idx_lt = df.lt(idx_ser, axis=0) + col_lt = df.lt(col_ser) + assert_frame_equal(col_ge, df >= Series(col_ser)) + assert_frame_equal(col_ge, -col_lt) + assert_frame_equal(idx_ge, -idx_lt) + assert_frame_equal(idx_ge, df.T.ge(idx_ser).T) + + idx_ser = Series(np.random.randn(5)) + col_ser = Series(np.random.randn(3)) + _test_seq(df, idx_ser, col_ser) + + # ndarray + + assert_frame_equal((df == other.values), df.eq(other.values)) + assert_frame_equal((df != other.values), df.ne(other.values)) + assert_frame_equal((df > other.values), df.gt(other.values)) + assert_frame_equal((df < other.values), df.lt(other.values)) + assert_frame_equal((df >= other.values), df.ge(other.values)) + assert_frame_equal((df <= other.values), df.le(other.values)) + + # list/tuple + _test_seq(df, idx_ser.values, col_ser.values) + + # NA + df.ix[0, 0] = np.nan + rs = df.eq(df) + self.assert_(not rs.ix[0, 0]) + rs = df.ne(df) + self.assert_(rs.ix[0, 0]) + rs = df.gt(df) + self.assert_(not rs.ix[0, 0]) + rs = df.lt(df) + self.assert_(not rs.ix[0, 0]) + rs = df.ge(df) + self.assert_(not rs.ix[0, 0]) + rs = df.le(df) + self.assert_(not rs.ix[0, 0]) + + + # scalar + assert_frame_equal(df.eq(0), df == 0) + assert_frame_equal(df.ne(0), df != 0) + assert_frame_equal(df.gt(0), df > 0) + assert_frame_equal(df.lt(0), df < 0) + assert_frame_equal(df.ge(0), df >= 0) + assert_frame_equal(df.le(0), df <= 0) + + assert_frame_equal(df.eq(np.nan), df == np.nan) + assert_frame_equal(df.ne(np.nan), df != np.nan) + assert_frame_equal(df.gt(np.nan), df > np.nan) + assert_frame_equal(df.lt(np.nan), df < np.nan) + assert_frame_equal(df.ge(np.nan), df >= np.nan) + assert_frame_equal(df.le(np.nan), df <= np.nan) + def test_arith_flex_series(self): df = self.simple