From 95aa6b5c78348b9f9d71cd58a3f1460b85edd2e6 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Mon, 15 Aug 2016 22:25:50 +0900 Subject: [PATCH] BUG: DatetimeTZBlock can't assign values near dst boundary --- doc/source/whatsnew/v0.19.0.txt | 2 +- pandas/core/internals.py | 19 +++--- pandas/tests/indexing/test_coercion.py | 4 +- pandas/tests/series/test_indexing.py | 83 +++++++++++++++++++++++++ pandas/tests/series/test_misc_api.py | 8 ++- pandas/tests/series/test_missing.py | 84 +++++++++++++++++--------- 6 files changed, 158 insertions(+), 42 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index a3e8f0c314352..1fd135f1436da 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1559,7 +1559,7 @@ Bug Fixes - Bug in ``Series`` flexible arithmetic methods (like ``.add()``) raises ``ValueError`` when ``axis=None`` (:issue:`13894`) - Bug in ``DataFrame.to_csv()`` with ``MultiIndex`` columns in which a stray empty line was added (:issue:`6618`) - Bug in ``DatetimeIndex``, ``TimedeltaIndex`` and ``PeriodIndex.equals()`` may return ``True`` when input isn't ``Index`` but contains the same values (:issue:`13107`) - +- Bug in assignment against datetime with timezone may not work if it contains datetime near DST boundary (:issue:`14146`) - Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`) - Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index da72309b8eae1..11721a5bdac29 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1487,7 +1487,10 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, ------- a new block(s), the result of the putmask """ - new_values = self.values if inplace else self.values.copy() + + # use block's copy logic. + # .values may be an Index which does shallow copy by default + new_values = self.values if inplace else self.copy().values new_values, _, new, _ = self._try_coerce_args(new_values, new) if isinstance(new, np.ndarray) and len(new) == len(mask): @@ -2314,7 +2317,7 @@ def __init__(self, values, placement, ndim=2, **kwargs): if dtype is not None: if isinstance(dtype, compat.string_types): dtype = DatetimeTZDtype.construct_from_string(dtype) - values = values.tz_localize('UTC').tz_convert(dtype.tz) + values = values._shallow_copy(tz=dtype.tz) if values.tz is None: raise ValueError("cannot create a DatetimeTZBlock without a tz") @@ -2381,12 +2384,14 @@ def _try_coerce_args(self, values, other): base-type values, values mask, base-type other, other mask """ values_mask = _block_shape(isnull(values), ndim=self.ndim) - values = _block_shape(values.tz_localize(None).asi8, ndim=self.ndim) + # asi8 is a view, needs copy + values = _block_shape(values.asi8, ndim=self.ndim) other_mask = False if isinstance(other, ABCSeries): other = self._holder(other) other_mask = isnull(other) + if isinstance(other, bool): raise TypeError elif is_null_datelike_scalar(other): @@ -2395,7 +2400,7 @@ def _try_coerce_args(self, values, other): elif isinstance(other, self._holder): if other.tz != self.values.tz: raise ValueError("incompatible or non tz-aware value") - other = other.tz_localize(None).asi8 + other = other.asi8 other_mask = isnull(other) elif isinstance(other, (np.datetime64, datetime, date)): other = lib.Timestamp(other) @@ -2405,7 +2410,7 @@ def _try_coerce_args(self, values, other): if tz is None or str(tz) != str(self.values.tz): raise ValueError("incompatible or non tz-aware value") other_mask = isnull(other) - other = other.tz_localize(None).value + other = other.value return values, values_mask, other, other_mask @@ -2415,12 +2420,12 @@ def _try_coerce_result(self, result): if result.dtype.kind in ['i', 'f', 'O']: result = result.astype('M8[ns]') elif isinstance(result, (np.integer, np.float, np.datetime64)): - result = lib.Timestamp(result).tz_localize(self.values.tz) + result = lib.Timestamp(result, tz=self.values.tz) if isinstance(result, np.ndarray): # allow passing of > 1dim if its trivial if result.ndim > 1: result = result.reshape(len(result)) - result = self._holder(result).tz_localize(self.values.tz) + result = self.values._shallow_copy(result) return result diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 5fbaea6c5efcb..0cfa7258461f1 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -229,7 +229,7 @@ def test_setitem_series_datetime64tz(self): # datetime64 + int -> object # ToDo: The result must be object exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), - pd.Timestamp(1).tz_localize(tz), + pd.Timestamp(1, tz=tz), pd.Timestamp('2011-01-03', tz=tz), pd.Timestamp('2011-01-04', tz=tz)]) self._assert_setitem_series_conversion(obj, 1, exp, @@ -1038,7 +1038,7 @@ def test_fillna_series_datetime64tz(self): # datetime64tz + int => datetime64tz # ToDo: must be object exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz), - pd.Timestamp(1).tz_localize(tz=tz), + pd.Timestamp(1, tz=tz), pd.Timestamp('2011-01-03', tz=tz), pd.Timestamp('2011-01-04', tz=tz)]) self._assert_fillna_conversion(obj, 1, exp, diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 5eef06bacfcb0..7c16fd060b181 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -776,6 +776,89 @@ def test_ix_getitem_iterator(self): result = self.series.ix[idx] assert_series_equal(result, self.series[:10]) + def test_setitem_with_tz(self): + for tz in ['US/Eastern', 'UTC', 'Asia/Tokyo']: + orig = pd.Series(pd.date_range('2016-01-01', freq='H', periods=3, + tz=tz)) + self.assertEqual(orig.dtype, 'datetime64[ns, {0}]'.format(tz)) + + # scalar + s = orig.copy() + s[1] = pd.Timestamp('2011-01-01', tz=tz) + exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz), + pd.Timestamp('2011-01-01 00:00', tz=tz), + pd.Timestamp('2016-01-01 02:00', tz=tz)]) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.loc[1] = pd.Timestamp('2011-01-01', tz=tz) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz) + tm.assert_series_equal(s, exp) + + # vector + vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz), + pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2]) + self.assertEqual(vals.dtype, 'datetime64[ns, {0}]'.format(tz)) + + s[[1, 2]] = vals + exp = pd.Series([pd.Timestamp('2016-01-01 00:00', tz=tz), + pd.Timestamp('2011-01-01 00:00', tz=tz), + pd.Timestamp('2012-01-01 00:00', tz=tz)]) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.loc[[1, 2]] = vals + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.iloc[[1, 2]] = vals + tm.assert_series_equal(s, exp) + + def test_setitem_with_tz_dst(self): + # GH XXX + tz = 'US/Eastern' + orig = pd.Series(pd.date_range('2016-11-06', freq='H', periods=3, + tz=tz)) + self.assertEqual(orig.dtype, 'datetime64[ns, {0}]'.format(tz)) + + # scalar + s = orig.copy() + s[1] = pd.Timestamp('2011-01-01', tz=tz) + exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz), + pd.Timestamp('2011-01-01 00:00', tz=tz), + pd.Timestamp('2016-11-06 02:00', tz=tz)]) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.loc[1] = pd.Timestamp('2011-01-01', tz=tz) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.iloc[1] = pd.Timestamp('2011-01-01', tz=tz) + tm.assert_series_equal(s, exp) + + # vector + vals = pd.Series([pd.Timestamp('2011-01-01', tz=tz), + pd.Timestamp('2012-01-01', tz=tz)], index=[1, 2]) + self.assertEqual(vals.dtype, 'datetime64[ns, {0}]'.format(tz)) + + s[[1, 2]] = vals + exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz), + pd.Timestamp('2011-01-01 00:00', tz=tz), + pd.Timestamp('2012-01-01 00:00', tz=tz)]) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.loc[[1, 2]] = vals + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.iloc[[1, 2]] = vals + tm.assert_series_equal(s, exp) + def test_where(self): s = Series(np.random.randn(5)) cond = s > 0 diff --git a/pandas/tests/series/test_misc_api.py b/pandas/tests/series/test_misc_api.py index d74966738909d..61bdc59cd500d 100644 --- a/pandas/tests/series/test_misc_api.py +++ b/pandas/tests/series/test_misc_api.py @@ -241,7 +241,6 @@ def test_copy(self): self.assertTrue(np.isnan(s2[0])) self.assertFalse(np.isnan(s[0])) else: - # we DID modify the original Series self.assertTrue(np.isnan(s2[0])) self.assertTrue(np.isnan(s[0])) @@ -252,6 +251,7 @@ def test_copy(self): expected2 = Series([Timestamp('1999/01/01', tz='UTC')]) for deep in [None, False, True]: + s = Series([Timestamp('2012/01/01', tz='UTC')]) if deep is None: @@ -263,11 +263,13 @@ def test_copy(self): # default deep is True if deep is None or deep is True: - assert_series_equal(s, expected) + # Did not modify original Series assert_series_equal(s2, expected2) + assert_series_equal(s, expected) else: - assert_series_equal(s, expected2) + # we DID modify the original Series assert_series_equal(s2, expected2) + assert_series_equal(s, expected2) def test_axis_alias(self): s = Series([1, 2, np.nan]) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index ed10f5b0a7af3..4e6c58df54dfd 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -130,49 +130,66 @@ def test_datetime64_fillna(self): def test_datetime64_tz_fillna(self): for tz in ['US/Eastern', 'Asia/Tokyo']: # DatetimeBlock - s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp( - '2011-01-03 10:00'), pd.NaT]) + s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, + Timestamp('2011-01-03 10:00'), pd.NaT]) + null_loc = pd.Series([False, True, False, True]) + result = s.fillna(pd.Timestamp('2011-01-02 10:00')) - expected = Series([Timestamp('2011-01-01 10:00'), Timestamp( - '2011-01-02 10:00'), Timestamp('2011-01-03 10:00'), Timestamp( - '2011-01-02 10:00')]) + expected = Series([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00'), + Timestamp('2011-01-03 10:00'), + Timestamp('2011-01-02 10:00')]) self.assert_series_equal(expected, result) + # check s is not changed + self.assert_series_equal(pd.isnull(s), null_loc) result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz)) - expected = Series([Timestamp('2011-01-01 10:00'), Timestamp( - '2011-01-02 10:00', tz=tz), Timestamp('2011-01-03 10:00'), - Timestamp('2011-01-02 10:00', tz=tz)]) + expected = Series([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz=tz), + Timestamp('2011-01-03 10:00'), + Timestamp('2011-01-02 10:00', tz=tz)]) self.assert_series_equal(expected, result) + self.assert_series_equal(pd.isnull(s), null_loc) result = s.fillna('AAA') expected = Series([Timestamp('2011-01-01 10:00'), 'AAA', Timestamp('2011-01-03 10:00'), 'AAA'], dtype=object) self.assert_series_equal(expected, result) + self.assert_series_equal(pd.isnull(s), null_loc) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), 3: pd.Timestamp('2011-01-04 10:00')}) - expected = Series([Timestamp('2011-01-01 10:00'), Timestamp( - '2011-01-02 10:00', tz=tz), Timestamp('2011-01-03 10:00'), - Timestamp('2011-01-04 10:00')]) + expected = Series([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz=tz), + Timestamp('2011-01-03 10:00'), + Timestamp('2011-01-04 10:00')]) self.assert_series_equal(expected, result) + self.assert_series_equal(pd.isnull(s), null_loc) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00'), 3: pd.Timestamp('2011-01-04 10:00')}) - expected = Series([Timestamp('2011-01-01 10:00'), Timestamp( - '2011-01-02 10:00'), Timestamp('2011-01-03 10:00'), Timestamp( - '2011-01-04 10:00')]) + expected = Series([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00'), + Timestamp('2011-01-03 10:00'), + Timestamp('2011-01-04 10:00')]) self.assert_series_equal(expected, result) + self.assert_series_equal(pd.isnull(s), null_loc) # DatetimeBlockTZ idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT, '2011-01-03 10:00', pd.NaT], tz=tz) s = pd.Series(idx) + self.assertEqual(s.dtype, 'datetime64[ns, {0}]'.format(tz)) + self.assert_series_equal(pd.isnull(s), null_loc) + result = s.fillna(pd.Timestamp('2011-01-02 10:00')) - expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp( - '2011-01-02 10:00'), Timestamp('2011-01-03 10:00', tz=tz), - Timestamp('2011-01-02 10:00')]) + expected = Series([Timestamp('2011-01-01 10:00', tz=tz), + Timestamp('2011-01-02 10:00'), + Timestamp('2011-01-03 10:00', tz=tz), + Timestamp('2011-01-02 10:00')]) self.assert_series_equal(expected, result) + self.assert_series_equal(pd.isnull(s), null_loc) result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz)) idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00', @@ -180,42 +197,50 @@ def test_datetime64_tz_fillna(self): tz=tz) expected = Series(idx) self.assert_series_equal(expected, result) + self.assert_series_equal(pd.isnull(s), null_loc) - result = s.fillna(pd.Timestamp( - '2011-01-02 10:00', tz=tz).to_pydatetime()) + result = s.fillna(pd.Timestamp('2011-01-02 10:00', + tz=tz).to_pydatetime()) idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00', '2011-01-03 10:00', '2011-01-02 10:00'], tz=tz) expected = Series(idx) self.assert_series_equal(expected, result) + self.assert_series_equal(pd.isnull(s), null_loc) result = s.fillna('AAA') expected = Series([Timestamp('2011-01-01 10:00', tz=tz), 'AAA', Timestamp('2011-01-03 10:00', tz=tz), 'AAA'], dtype=object) self.assert_series_equal(expected, result) + self.assert_series_equal(pd.isnull(s), null_loc) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), 3: pd.Timestamp('2011-01-04 10:00')}) - expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp( - '2011-01-02 10:00', tz=tz), Timestamp( - '2011-01-03 10:00', tz=tz), Timestamp('2011-01-04 10:00')]) + expected = Series([Timestamp('2011-01-01 10:00', tz=tz), + Timestamp('2011-01-02 10:00', tz=tz), + Timestamp('2011-01-03 10:00', tz=tz), + Timestamp('2011-01-04 10:00')]) self.assert_series_equal(expected, result) + self.assert_series_equal(pd.isnull(s), null_loc) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), 3: pd.Timestamp('2011-01-04 10:00', tz=tz)}) - expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp( - '2011-01-02 10:00', tz=tz), Timestamp( - '2011-01-03 10:00', tz=tz), Timestamp('2011-01-04 10:00', - tz=tz)]) + expected = Series([Timestamp('2011-01-01 10:00', tz=tz), + Timestamp('2011-01-02 10:00', tz=tz), + Timestamp('2011-01-03 10:00', tz=tz), + Timestamp('2011-01-04 10:00', tz=tz)]) self.assert_series_equal(expected, result) + self.assert_series_equal(pd.isnull(s), null_loc) # filling with a naive/other zone, coerce to object result = s.fillna(Timestamp('20130101')) - expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp( - '2013-01-01'), Timestamp('2011-01-03 10:00', tz=tz), Timestamp( - '2013-01-01')]) + expected = Series([Timestamp('2011-01-01 10:00', tz=tz), + Timestamp('2013-01-01'), + Timestamp('2011-01-03 10:00', tz=tz), + Timestamp('2013-01-01')]) self.assert_series_equal(expected, result) + self.assert_series_equal(pd.isnull(s), null_loc) result = s.fillna(Timestamp('20130101', tz='US/Pacific')) expected = Series([Timestamp('2011-01-01 10:00', tz=tz), @@ -223,6 +248,7 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-03 10:00', tz=tz), Timestamp('2013-01-01', tz='US/Pacific')]) self.assert_series_equal(expected, result) + self.assert_series_equal(pd.isnull(s), null_loc) def test_fillna_int(self): s = Series(np.random.randint(-100, 100, 50))