From f0fbcbb873b63b2a6ee0c47cee6694c5e9cdc256 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Wed, 26 Nov 2014 16:47:30 -0500 Subject: [PATCH 1/4] BUG/ENH: cleanup for Timestamp arithmetic Fixes GH8865 (Timestamp - Timestamp -> Timedelta) This PR cleans up and extends `Timestamp` arithmetic similarly to the treatment for `Timedelta` in GH8884. It includes a new `to_datetime64()` method, and arithmetic now works between Timestamp and ndarrays. I also ensured comparison operations work properly between all of (Timestamp, Timedelta, NaT) and ndarrays. Implementation notes: wide use of the `NotImplemented` singleton let me cleanup many of these complex cases. I also strove to reduce the tight- coupling of `Timestamp`/`Timedelta` to pandas itself by removing use of the `_typ` property in tslib (I honestly don't quite understand why it needs to exist) and by not treating series/index any differently from any other ndarray-like object. --- doc/source/whatsnew/v0.15.2.txt | 7 +- pandas/tseries/base.py | 10 ++- pandas/tseries/tests/test_timedeltas.py | 27 ++++++ pandas/tseries/tests/test_timeseries.py | 24 +++++ pandas/tseries/tests/test_tslib.py | 69 ++++++++++++-- pandas/tslib.pyx | 115 +++++++++++------------- 6 files changed, 173 insertions(+), 79 deletions(-) diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index d6d36fd8d14ba..f681ee1ddc002 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -66,9 +66,9 @@ Enhancements - Added support for ``utcfromtimestamp()``, ``fromtimestamp()``, and ``combine()`` on `Timestamp` class (:issue:`5351`). - Added Google Analytics (`pandas.io.ga`) basic documentation (:issue:`8835`). See :ref:`here`. - Added flag ``order_categoricals`` to ``StataReader`` and ``read_stata`` to select whether to order imported categorical data (:issue:`8836`). See :ref:`here ` for more information on importing categorical variables from Stata data files. -- ``Timedelta`` arithmetic returns ``NotImplemented`` in unknown cases, allowing extensions by custom classes (:issue:`8813`). -- ``Timedelta`` now supports arithemtic with ``numpy.ndarray`` objects of the appropriate dtype (numpy 1.8 or newer only) (:issue:`8884`). -- Added ``Timedelta.to_timedelta64`` method to the public API (:issue:`8884`). +- ``Timestamp`` and ``Timedelta`` arithmetic and comparisons return ``NotImplemented`` in unknown cases, allowing extensions by custom classes (:issue:`8813`, :issue:`TBD`). +- ``Timestamp`` and ``Timedelta`` now support arithmetic and comparisons with ``numpy.ndarray`` objects of the appropriate dtype (numpy 1.8 or newer only) (:issue:`8884`, :issue:`TBD`). +- Added ``Timestamp.to_datetime64`` and ``Timedelta.to_timedelta64`` methods to the public API (:issue:`8884`, :issue:`TBD`). .. _whatsnew_0152.performance: @@ -93,6 +93,7 @@ Bug Fixes - ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo (:issue:`8761`). - ``Timedelta`` kwargs may now be numpy ints and floats (:issue:`8757`). - Fixed several outstanding bugs for ``Timedelta`` arithmetic and comparisons (:issue:`8813`, :issue:`5963`, :issue:`5436`). +- The difference of two ``Timestamp`` objects is now a ``pandas.Timedelta`` rather than only a ``datetime.timedelta`` (:issue:`8865`). - ``sql_schema`` now generates dialect appropriate ``CREATE TABLE`` statements (:issue:`8697`) - ``slice`` string method now takes step into account (:issue:`8754`) - Bug in ``BlockManager`` where setting values with different type would break block integrity (:issue:`8850`) diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index b523fb1d56290..5bc2565ff3db6 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -316,7 +316,7 @@ def __add__(self, other): return self._add_delta(other) elif com.is_integer(other): return self.shift(other) - elif isinstance(other, (tslib.Timestamp, datetime)): + elif isinstance(other, (tslib.Timestamp, datetime, np.datetime64)): return self._add_datelike(other) else: # pragma: no cover return NotImplemented @@ -339,14 +339,18 @@ def __sub__(self, other): return self._add_delta(-other) elif com.is_integer(other): return self.shift(-other) - elif isinstance(other, (tslib.Timestamp, datetime)): + elif isinstance(other, (tslib.Timestamp, datetime, np.datetime64)): return self._sub_datelike(other) else: # pragma: no cover return NotImplemented cls.__sub__ = __sub__ def __rsub__(self, other): - return -self + other + from pandas.tseries.tdi import TimedeltaIndex + if isinstance(self, TimedeltaIndex): + return -self + other + else: + return -(self - other) cls.__rsub__ = __rsub__ cls.__iadd__ = __add__ diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 494a9cc95dc49..e726f66320af7 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -3,6 +3,7 @@ from __future__ import division from datetime import datetime, timedelta, time import nose +import operator from distutils.version import LooseVersion import numpy as np @@ -288,6 +289,30 @@ def test_compare_timedelta_series(self): expected = pd.Series([False, True]) tm.assert_series_equal(actual, expected) + def test_compare_timedelta_ndarray(self): + lhs = pd.to_timedelta(['1 day', '3 days']).values + rhs = Timedelta('2 day') + + nat = Timedelta('nat') + expected_nat = np.array([False, False]) + + ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq', + 'ne': 'ne'} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + expected = left_f(lhs, rhs) + + result = right_f(rhs, lhs) + self.assert_numpy_array_equal(result, expected) + + expected = ~expected_nat if left == 'ne' else expected_nat + result = left_f(lhs, nat) + self.assert_numpy_array_equal(result, expected) + result = right_f(nat, lhs) + self.assert_numpy_array_equal(result, expected) + def test_ops_notimplemented(self): class Other: pass @@ -299,6 +324,8 @@ class Other: self.assertTrue(td.__truediv__(other) is NotImplemented) self.assertTrue(td.__mul__(other) is NotImplemented) self.assertTrue(td.__floordiv__(td) is NotImplemented) + self.assertTrue(td.__lt__(other) is NotImplemented) + self.assertTrue(td.__eq__(other) is NotImplemented) def test_fields(self): rng = to_timedelta('1 days, 10:11:12') diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 436f9f3b9c9b3..5bfcb7a09978a 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -3680,6 +3680,30 @@ def test_timestamp_compare_series(self): result = right_f(Timestamp('nat'), s_nat) tm.assert_series_equal(result, expected) + def test_timestamp_compare_ndarray(self): + lhs = pd.to_datetime(['1999-12-31', '2000-01-02']).values + rhs = Timestamp('2000-01-01') + + nat = Timestamp('nat') + expected_nat = np.array([False, False]) + + ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq', + 'ne': 'ne'} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + expected = left_f(lhs, rhs) + + result = right_f(rhs, lhs) + self.assert_numpy_array_equal(result, expected) + + expected = ~expected_nat if left == 'ne' else expected_nat + result = left_f(lhs, nat) + self.assert_numpy_array_equal(result, expected) + result = right_f(nat, lhs) + self.assert_numpy_array_equal(result, expected) + class TestSlicing(tm.TestCase): diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 9adcbb4ea4a41..21facabefb5ec 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -1,14 +1,14 @@ +from distutils.version import LooseVersion +import datetime import nose - import numpy as np from pandas import tslib -import datetime - -from pandas.core.api import Timestamp, Series +from pandas.core.api import Timestamp, Timedelta, Series from pandas.tslib import period_asfreq, period_ordinal from pandas.tseries.index import date_range from pandas.tseries.frequencies import get_freq +import pandas as pd import pandas.tseries.offsets as offsets import pandas.util.testing as tm from pandas.util.testing import assert_series_equal @@ -136,6 +136,20 @@ def test_constructor_with_stringoffset(self): self.assertEqual(repr(result), expected_repr) self.assertEqual(result, eval(repr(result))) + def test_conversion(self): + ts = Timestamp('2000-01-01') + + result = ts.to_pydatetime() + expected = datetime.datetime(2000, 1, 1) + self.assertEqual(result, expected) + self.assertEqual(type(result), type(expected)) + + result = ts.to_datetime64() + expected = np.datetime64(ts.value, 'ns') + self.assertEqual(result, expected) + self.assertEqual(type(result), type(expected)) + self.assertEqual(result.dtype, expected.dtype) + def test_repr(self): dates = ['2014-03-07', '2014-01-01 09:00', '2014-01-01 00:00:00.000000001'] timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/America/Los_Angeles'] @@ -232,13 +246,13 @@ def test_tz(self): conv = local.tz_convert('US/Eastern') self.assertEqual(conv.nanosecond, 5) self.assertEqual(conv.hour, 19) - + def test_tz_localize_ambiguous(self): - + ts = Timestamp('2014-11-02 01:00') ts_dst = ts.tz_localize('US/Eastern', ambiguous=True) ts_no_dst = ts.tz_localize('US/Eastern', ambiguous=False) - + rng = date_range('2014-11-02', periods=3, freq='H', tz='US/Eastern') self.assertEqual(rng[1], ts_dst) self.assertEqual(rng[2], ts_no_dst) @@ -675,8 +689,8 @@ def test_addition_subtraction_types(self): self.assertEqual(type(timestamp_instance + 1), Timestamp) self.assertEqual(type(timestamp_instance - 1), Timestamp) - # Timestamp + datetime not supported, though subtraction is supported and yields timedelta - self.assertEqual(type(timestamp_instance - datetime_instance), datetime.timedelta) + # Timestamp + datetime not supported, though subtraction is supported and yields Timedelta + self.assertEqual(type(timestamp_instance - datetime_instance), Timedelta) self.assertEqual(type(timestamp_instance + timedelta_instance), Timestamp) self.assertEqual(type(timestamp_instance - timedelta_instance), Timestamp) @@ -686,6 +700,43 @@ def test_addition_subtraction_types(self): self.assertEqual(type(timestamp_instance + timedelta64_instance), Timestamp) self.assertEqual(type(timestamp_instance - timedelta64_instance), Timestamp) + def test_ops_ndarray(self): + ts = Timestamp('2000-01-01') + + # timedelta operations + other = pd.to_timedelta(['1 day']).values + expected = pd.to_datetime(['2000-01-02']).values + self.assert_numpy_array_equal(ts + other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other + ts, expected) + self.assertRaises(TypeError, lambda: ts + np.array([1])) + self.assertRaises(TypeError, lambda: np.array([1]) + ts) + + expected = pd.to_datetime(['1999-12-31']).values + self.assert_numpy_array_equal(ts - other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(-other + ts, expected) + self.assertRaises(TypeError, lambda: ts - np.array([1])) + self.assertRaises(TypeError, lambda: np.array([1]) - ts) + + # datetime operations + other = pd.to_datetime(['1999-12-31']).values + expected = pd.to_timedelta(['1 days']).values + self.assert_numpy_array_equal(ts - other, expected) + if LooseVersion(np.__version__) >= '1.8': + self.assert_numpy_array_equal(other - ts, -expected) + + def test_ops_notimplemented(self): + class Other: + pass + other = Other() + + ts = Timestamp('2000-01-01') + self.assertTrue(ts.__add__(other) is NotImplemented) + self.assertTrue(ts.__sub__(other) is NotImplemented) + self.assertTrue(ts.__lt__(other) is NotImplemented) + self.assertTrue(ts.__eq__(other) is NotImplemented) + def test_addition_subtraction_preserve_frequency(self): timestamp_instance = date_range('2014-03-05', periods=1, freq='D')[0] timedelta_instance = datetime.timedelta(days=1) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 8efc174d6890b..dc3c5fecdec20 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -531,11 +531,14 @@ class Timestamp(_Timestamp): self.nanosecond/3600.0/1e+9 )/24.0) + # cython extension types like _Timestamp don't use reversed operators like + # __radd__ or __rsub__, so define them here instead def __radd__(self, other): - # __radd__ on cython extension types like _Timestamp is not used, so - # define it here instead return self + other + def __rsub__(self, other): + return -(self - other) + _nat_strings = set(['NaT','nat','NAT','nan','NaN','NAN']) class NaTType(_NaT): @@ -756,30 +759,13 @@ cdef class _Timestamp(datetime): except ValueError: return self._compare_outside_nanorange(other, op) else: - ndim = getattr(other, _NDIM_STRING, -1) - - if ndim != -1: - if ndim == 0: - if isinstance(other, np.datetime64): - other = Timestamp(other) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - - # only allow ==, != ops - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, - type(other).__name__)) - return PyObject_RichCompare(other, self, _reverse_ops[op]) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, type(other).__name__)) + if hasattr(other, 'dtype'): + self_arg = self + if self.tz is None and self.offset is None: + # allow comparison to ndarrays with appropriate dtype + self_arg = self.to_datetime64() + return PyObject_RichCompare(other, self_arg, _reverse_ops[op]) + return NotImplemented self._assert_tzawareness_compat(other) return _cmp_scalar(self.value, ots.value, op) @@ -825,6 +811,13 @@ cdef class _Timestamp(datetime): dts.hour, dts.min, dts.sec, dts.us, ts.tzinfo) + cpdef to_datetime64(self): + """ Returns a numpy.datetime64 object with 'ns' precision """ + return np.datetime64(self.value, 'ns') + + # higher than np.ndarray and np.matrix + __array_priority__ = 100 + def __add__(self, other): cdef int64_t other_int @@ -845,9 +838,14 @@ cdef class _Timestamp(datetime): result = Timestamp(normalize_date(result)) return result - # index/series like - elif hasattr(other, '_typ'): - return other + self + elif hasattr(other, 'dtype'): + if self.tz is None and self.offset is None: + if other.dtype.kind not in ['m', 'M']: + # raise rather than letting numpy return wrong answer + raise TypeError('cannot add operand with type %r to ' + 'Timestamp' % other.dtype) + return self.to_datetime64() + other + return NotImplemented result = datetime.__add__(self, other) if isinstance(result, datetime): @@ -861,17 +859,23 @@ cdef class _Timestamp(datetime): neg_other = -other return self + neg_other - # a Timestamp-DatetimeIndex -> yields a negative TimedeltaIndex - elif getattr(other,'_typ',None) == 'datetimeindex': - return -other.__sub__(self) - - # a Timestamp-TimedeltaIndex -> yields a negative TimedeltaIndex - elif getattr(other,'_typ',None) == 'timedeltaindex': - return (-other).__add__(self) + if hasattr(other, 'dtype'): + if self.tz is None and self.offset is None: + if other.dtype.kind not in ['m', 'M']: + # raise rather than letting numpy return wrong answer + raise TypeError('cannot subtract operand with type %r ' + 'from Timestamp' % other.dtype) + return self.to_datetime64() - other + return NotImplemented elif other is NaT: return NaT - return datetime.__sub__(self, other) + + result = datetime.__sub__(self, other) + if isinstance(result, timedelta): + result = Timedelta(result) + # TODO: handle ns precision? + return result cpdef _get_field(self, field): out = get_date_field(np.array([self.value], dtype=np.int64), field) @@ -907,6 +911,8 @@ cdef class _NaT(_Timestamp): # py3k needs this defined here return hash(self.value) + __array_priority__ = 0 + def __richcmp__(_NaT self, object other, int op): cdef int ndim = getattr(other, 'ndim', -1) @@ -917,8 +923,7 @@ cdef class _NaT(_Timestamp): if isinstance(other, np.datetime64): other = Timestamp(other) else: - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, type(other).__name__)) + return NotImplemented return PyObject_RichCompare(other, self, _reverse_ops[op]) def __add__(self, other): @@ -1515,30 +1520,10 @@ cdef class _Timedelta(timedelta): elif isinstance(other, timedelta): ots = Timedelta(other) else: - ndim = getattr(other, _NDIM_STRING, -1) - - if ndim != -1: - if ndim == 0: - if isinstance(other, np.timedelta64): - other = Timedelta(other) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - - # only allow ==, != ops - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, - type(other).__name__)) - return PyObject_RichCompare(other, self, _reverse_ops[op]) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - raise TypeError('Cannot compare type %r with type %r' % - (type(self).__name__, type(other).__name__)) + if hasattr(other, 'dtype'): + return PyObject_RichCompare(other, self.to_timedelta64(), + _reverse_ops[op]) + return NotImplemented return _cmp_scalar(self.value, ots.value, op) @@ -1925,7 +1910,9 @@ class Timedelta(_Timedelta): if hasattr(other, 'dtype'): if other.dtype.kind not in ['m', 'M']: # raise rathering than letting numpy return wrong answer - return NotImplemented + raise TypeError('cannot calculate %s between Timedelta ' + 'and array with dtype %r' + % (name, other.dtype)) return op(self.to_timedelta64(), other) if not self._validate_ops_compat(other): From dc72e046b8dbdb27cc805f47f84b2a09e964aa86 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Fri, 28 Nov 2014 11:58:35 -0500 Subject: [PATCH 2/4] replace TBD with PR number (and retrigger Travis) --- doc/source/whatsnew/v0.15.2.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index f681ee1ddc002..b13f762d0fd27 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -66,9 +66,9 @@ Enhancements - Added support for ``utcfromtimestamp()``, ``fromtimestamp()``, and ``combine()`` on `Timestamp` class (:issue:`5351`). - Added Google Analytics (`pandas.io.ga`) basic documentation (:issue:`8835`). See :ref:`here`. - Added flag ``order_categoricals`` to ``StataReader`` and ``read_stata`` to select whether to order imported categorical data (:issue:`8836`). See :ref:`here ` for more information on importing categorical variables from Stata data files. -- ``Timestamp`` and ``Timedelta`` arithmetic and comparisons return ``NotImplemented`` in unknown cases, allowing extensions by custom classes (:issue:`8813`, :issue:`TBD`). -- ``Timestamp`` and ``Timedelta`` now support arithmetic and comparisons with ``numpy.ndarray`` objects of the appropriate dtype (numpy 1.8 or newer only) (:issue:`8884`, :issue:`TBD`). -- Added ``Timestamp.to_datetime64`` and ``Timedelta.to_timedelta64`` methods to the public API (:issue:`8884`, :issue:`TBD`). +- ``Timestamp`` and ``Timedelta`` arithmetic and comparisons return ``NotImplemented`` in unknown cases, allowing extensions by custom classes (:issue:`8813`, :issue:`8916`). +- ``Timestamp`` and ``Timedelta`` now support arithmetic and comparisons with ``numpy.ndarray`` objects of the appropriate dtype (numpy 1.8 or newer only) (:issue:`8884`, :issue:`8916`). +- Added ``Timestamp.to_datetime64`` and ``Timedelta.to_timedelta64`` methods to the public API (:issue:`8884`, :issue:`8916`). .. _whatsnew_0152.performance: From 29ebfa8bb34b463d67778a007528c02144b601f6 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Fri, 28 Nov 2014 23:19:24 -0500 Subject: [PATCH 3/4] fixes for Timestamp/Timedelta comparison ops --- pandas/tslib.pyx | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index dc3c5fecdec20..cabd0c4f097b2 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -758,13 +758,18 @@ cdef class _Timestamp(datetime): ots = Timestamp(other) except ValueError: return self._compare_outside_nanorange(other, op) + elif isinstance(other, np.datetime64): + return PyObject_RichCompareBool(self, Timestamp(other), op) + elif hasattr(other, 'dtype'): + if self.tz is None and self.offset is None: + # allow comparison to ndarrays; use the reverse op because it's + # necessary when comparing to pd.Series + return PyObject_RichCompare(other, self.to_datetime64(), + _reverse_ops[op]) + # TODO: somehow trigger normal numpy broadcasting rules even though + # we set __array_priority__ > ndarray.__array_priority__ + return NotImplemented else: - if hasattr(other, 'dtype'): - self_arg = self - if self.tz is None and self.offset is None: - # allow comparison to ndarrays with appropriate dtype - self_arg = self.to_datetime64() - return PyObject_RichCompare(other, self_arg, _reverse_ops[op]) return NotImplemented self._assert_tzawareness_compat(other) @@ -911,7 +916,8 @@ cdef class _NaT(_Timestamp): # py3k needs this defined here return hash(self.value) - __array_priority__ = 0 + # less than np.ndarray + __array_priority__ = -1 def __richcmp__(_NaT self, object other, int op): cdef int ndim = getattr(other, 'ndim', -1) @@ -1519,12 +1525,13 @@ cdef class _Timedelta(timedelta): ots = other elif isinstance(other, timedelta): ots = Timedelta(other) - else: - if hasattr(other, 'dtype'): - return PyObject_RichCompare(other, self.to_timedelta64(), - _reverse_ops[op]) - return NotImplemented - + elif isinstance(other, np.timedelta64): + return PyObject_RichCompareBool(self, Timedelta(other), op) + elif hasattr(other, 'dtype'): + # allow comparison to ndarrays; use the reverse op because it's + # necessary when comparing to pd.Series + return PyObject_RichCompare(other, self.to_datetime64(), + _reverse_ops[op]) return _cmp_scalar(self.value, ots.value, op) def _ensure_components(_Timedelta self): From ef97b8dc8f75f5c6d88eec3a5d7606810e43b35e Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sat, 6 Dec 2014 13:27:45 -0800 Subject: [PATCH 4/4] WIP: terrible hack to attempt to fix dt comparisons --- pandas/tests/test_frame.py | 31 +++++++++++++-------- pandas/tseries/tests/test_tslib.py | 5 ++++ pandas/tslib.pyx | 44 ++++++++++++++++++++---------- 3 files changed, 54 insertions(+), 26 deletions(-) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index fc031afe728dc..ece2a4ee4dded 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -5004,32 +5004,39 @@ def check(df,df2): df = DataFrame(np.random.randint(10, size=(10, 2)), columns=['a', 'b']) df2 = DataFrame({'a': date_range('20010101', periods=len(df)), 'b': date_range('20100101', periods=len(df))}) - check(df,df2) + check(df, df2) + # check(df, pd.Timestamp('2000-01-01')) + # check(df2, 123) def test_timestamp_compare(self): # make sure we can compare Timestamps on the right AND left hand side # GH4982 df = DataFrame({'dates1': date_range('20010101', periods=10), - 'dates2': date_range('20010102', periods=10), - 'intcol': np.random.randint(1000000000, size=10), - 'floatcol': np.random.randn(10), - 'stringcol': list(tm.rands(10))}) - df.loc[np.random.rand(len(df)) > 0.5, 'dates2'] = pd.NaT + 'dates2': date_range('20010101', periods=10)}) + df.loc[::2, 'dates2'] = pd.NaT ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} for left, right in ops.items(): left_f = getattr(operator, left) right_f = getattr(operator, right) + nat_cmp_value = True if left != 'ne' else False + # no nats - expected = left_f(df, Timestamp('20010109')) - result = right_f(Timestamp('20010109'), df) - tm.assert_frame_equal(result, expected) + ts = Timestamp('20010109') + expected = DataFrame(left_f(df.values, ts), columns=df.columns) + left_result = left_f(df, ts) + right_result = right_f(ts, df) + tm.assert_frame_equal(left_result, expected) + tm.assert_frame_equal(right_result, expected) # nats - expected = left_f(df, Timestamp('nat')) - result = right_f(Timestamp('nat'), df) - tm.assert_frame_equal(result, expected) + values = (np.zeros if left != 'ne' else np.ones)((10, 2), bool) + expected = DataFrame(values, columns=df.columns) + left_result = left_f(df, Timestamp('nat')) + right_result = right_f(Timestamp('nat'), df) + tm.assert_frame_equal(left_result, expected) + tm.assert_frame_equal(right_result, expected) def test_modulo(self): diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 21facabefb5ec..7d9427ce0ae00 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -726,6 +726,11 @@ def test_ops_ndarray(self): if LooseVersion(np.__version__) >= '1.8': self.assert_numpy_array_equal(other - ts, -expected) + tsz = Timestamp('2000-01-01', tz='EST') + self.assertRaises(ValueError, lambda: ts > tsz) + self.assertRaises(ValueError, + lambda: pd.to_datetime(['2000-01-02']).values > tsz) + def test_ops_notimplemented(self): class Other: pass diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index cabd0c4f097b2..e23832b04a5d6 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -724,8 +724,6 @@ _reverse_ops[Py_GT] = Py_LT _reverse_ops[Py_GE] = Py_LE -cdef str _NDIM_STRING = "ndim" - # This is PITA. Because we inherit from datetime, which has very specific # construction requirements, we need to do object instantiation in python # (see Timestamp class above). This will serve as a C extension type that @@ -746,10 +744,12 @@ cdef class _Timestamp(datetime): int ndim if isinstance(other, _Timestamp): + # print '_timestamp' if isinstance(other, _NaT): return _cmp_nat_dt(other, self, _reverse_ops[op]) ots = other elif isinstance(other, datetime): + # print 'datetime' if self.nanosecond == 0: val = self.to_datetime() return PyObject_RichCompareBool(val, other, op) @@ -759,17 +759,33 @@ cdef class _Timestamp(datetime): except ValueError: return self._compare_outside_nanorange(other, op) elif isinstance(other, np.datetime64): - return PyObject_RichCompareBool(self, Timestamp(other), op) + # print 'convert dt64' + return PyObject_RichCompare(self, Timestamp(other), op) elif hasattr(other, 'dtype'): + # print 'dtype', type(other), other.dtype, other if self.tz is None and self.offset is None: # allow comparison to ndarrays; use the reverse op because it's # necessary when comparing to pd.Series return PyObject_RichCompare(other, self.to_datetime64(), _reverse_ops[op]) - # TODO: somehow trigger normal numpy broadcasting rules even though - # we set __array_priority__ > ndarray.__array_priority__ - return NotImplemented + # This terrible hack lets us invoke normal numpy broadcasting rules + # even though we set __array_priority__ > + # ndarray.__array_priority__ (for the benefit of arithmetic) + # return NotImplemented + elif self.__array_priority__ == 0: + # print 'priority == 0' + return NotImplemented + else: + # print 'priority set to 0' + # print(self.__array_priority__) + new_obj = Timestamp(self.value, self.offset, self.tz) + new_obj.__array_priority__ = 0 + new_obj._allow_cmp_int_dtype = True + return PyObject_RichCompare(other, new_obj, _reverse_ops[op]) + elif hasattr(self, '_allow_cmp_int_dtype') and isinstance(other, long): + ots = other = Timestamp(other) else: + # print 'not implemented', type(other), other, self.__array_priority__ return NotImplemented self._assert_tzawareness_compat(other) @@ -917,7 +933,7 @@ cdef class _NaT(_Timestamp): return hash(self.value) # less than np.ndarray - __array_priority__ = -1 + __array_priority__ = 0 def __richcmp__(_NaT self, object other, int op): cdef int ndim = getattr(other, 'ndim', -1) @@ -1519,19 +1535,19 @@ cdef class _Timedelta(timedelta): _Timedelta ots int ndim - if isinstance(other, _Timedelta): - if isinstance(other, _NaT): - return _cmp_nat_dt(other, self, _reverse_ops[op]) + if isinstance(other, _NaT): + return NotImplemented + elif isinstance(other, _Timedelta): ots = other - elif isinstance(other, timedelta): - ots = Timedelta(other) - elif isinstance(other, np.timedelta64): + elif isinstance(other, (timedelta, np.timedelta64)): return PyObject_RichCompareBool(self, Timedelta(other), op) elif hasattr(other, 'dtype'): # allow comparison to ndarrays; use the reverse op because it's # necessary when comparing to pd.Series - return PyObject_RichCompare(other, self.to_datetime64(), + return PyObject_RichCompare(other, self.to_timedelta64(), _reverse_ops[op]) + else: + return NotImplemented return _cmp_scalar(self.value, ots.value, op) def _ensure_components(_Timedelta self):