Skip to content

BUG/ENH: cleanup for Timestamp arithmetic #8916

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions doc/source/whatsnew/v0.15.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ Enhancements
- Added support for ``utcfromtimestamp()``, ``fromtimestamp()``, and ``combine()`` on `Timestamp` class (:issue:`5351`).
- Added Google Analytics (`pandas.io.ga`) basic documentation (:issue:`8835`). See :ref:`here<remote_data.ga>`.
- Added flag ``order_categoricals`` to ``StataReader`` and ``read_stata`` to select whether to order imported categorical data (:issue:`8836`). See :ref:`here <io.stata-categorical>` for more information on importing categorical variables from Stata data files.
- ``Timedelta`` arithmetic returns ``NotImplemented`` in unknown cases, allowing extensions by custom classes (:issue:`8813`).
- ``Timedelta`` now supports arithemtic with ``numpy.ndarray`` objects of the appropriate dtype (numpy 1.8 or newer only) (:issue:`8884`).
- Added ``Timedelta.to_timedelta64`` method to the public API (:issue:`8884`).
- ``Timestamp`` and ``Timedelta`` arithmetic and comparisons return ``NotImplemented`` in unknown cases, allowing extensions by custom classes (:issue:`8813`, :issue:`8916`).
- ``Timestamp`` and ``Timedelta`` now support arithmetic and comparisons with ``numpy.ndarray`` objects of the appropriate dtype (numpy 1.8 or newer only) (:issue:`8884`, :issue:`8916`).
- Added ``Timestamp.to_datetime64`` and ``Timedelta.to_timedelta64`` methods to the public API (:issue:`8884`, :issue:`8916`).

.. _whatsnew_0152.performance:

Expand All @@ -93,6 +93,7 @@ Bug Fixes
- ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo (:issue:`8761`).
- ``Timedelta`` kwargs may now be numpy ints and floats (:issue:`8757`).
- Fixed several outstanding bugs for ``Timedelta`` arithmetic and comparisons (:issue:`8813`, :issue:`5963`, :issue:`5436`).
- The difference of two ``Timestamp`` objects is now a ``pandas.Timedelta`` rather than only a ``datetime.timedelta`` (:issue:`8865`).
- ``sql_schema`` now generates dialect appropriate ``CREATE TABLE`` statements (:issue:`8697`)
- ``slice`` string method now takes step into account (:issue:`8754`)
- Bug in ``BlockManager`` where setting values with different type would break block integrity (:issue:`8850`)
Expand Down
31 changes: 19 additions & 12 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5004,32 +5004,39 @@ def check(df,df2):

df = DataFrame(np.random.randint(10, size=(10, 2)), columns=['a', 'b'])
df2 = DataFrame({'a': date_range('20010101', periods=len(df)), 'b': date_range('20100101', periods=len(df))})
check(df,df2)
check(df, df2)
# check(df, pd.Timestamp('2000-01-01'))
# check(df2, 123)

def test_timestamp_compare(self):
# make sure we can compare Timestamps on the right AND left hand side
# GH4982
df = DataFrame({'dates1': date_range('20010101', periods=10),
'dates2': date_range('20010102', periods=10),
'intcol': np.random.randint(1000000000, size=10),
'floatcol': np.random.randn(10),
'stringcol': list(tm.rands(10))})
df.loc[np.random.rand(len(df)) > 0.5, 'dates2'] = pd.NaT
'dates2': date_range('20010101', periods=10)})
df.loc[::2, 'dates2'] = pd.NaT
ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq',
'ne': 'ne'}
for left, right in ops.items():
left_f = getattr(operator, left)
right_f = getattr(operator, right)

nat_cmp_value = True if left != 'ne' else False

# no nats
expected = left_f(df, Timestamp('20010109'))
result = right_f(Timestamp('20010109'), df)
tm.assert_frame_equal(result, expected)
ts = Timestamp('20010109')
expected = DataFrame(left_f(df.values, ts), columns=df.columns)
left_result = left_f(df, ts)
right_result = right_f(ts, df)
tm.assert_frame_equal(left_result, expected)
tm.assert_frame_equal(right_result, expected)

# nats
expected = left_f(df, Timestamp('nat'))
result = right_f(Timestamp('nat'), df)
tm.assert_frame_equal(result, expected)
values = (np.zeros if left != 'ne' else np.ones)((10, 2), bool)
expected = DataFrame(values, columns=df.columns)
left_result = left_f(df, Timestamp('nat'))
right_result = right_f(Timestamp('nat'), df)
tm.assert_frame_equal(left_result, expected)
tm.assert_frame_equal(right_result, expected)

def test_modulo(self):

Expand Down
10 changes: 7 additions & 3 deletions pandas/tseries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def __add__(self, other):
return self._add_delta(other)
elif com.is_integer(other):
return self.shift(other)
elif isinstance(other, (tslib.Timestamp, datetime)):
elif isinstance(other, (tslib.Timestamp, datetime, np.datetime64)):
return self._add_datelike(other)
else: # pragma: no cover
return NotImplemented
Expand All @@ -339,14 +339,18 @@ def __sub__(self, other):
return self._add_delta(-other)
elif com.is_integer(other):
return self.shift(-other)
elif isinstance(other, (tslib.Timestamp, datetime)):
elif isinstance(other, (tslib.Timestamp, datetime, np.datetime64)):
return self._sub_datelike(other)
else: # pragma: no cover
return NotImplemented
cls.__sub__ = __sub__

def __rsub__(self, other):
return -self + other
from pandas.tseries.tdi import TimedeltaIndex
if isinstance(self, TimedeltaIndex):
return -self + other
else:
return -(self - other)
cls.__rsub__ = __rsub__

cls.__iadd__ = __add__
Expand Down
27 changes: 27 additions & 0 deletions pandas/tseries/tests/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import division
from datetime import datetime, timedelta, time
import nose
import operator

from distutils.version import LooseVersion
import numpy as np
Expand Down Expand Up @@ -288,6 +289,30 @@ def test_compare_timedelta_series(self):
expected = pd.Series([False, True])
tm.assert_series_equal(actual, expected)

def test_compare_timedelta_ndarray(self):
lhs = pd.to_timedelta(['1 day', '3 days']).values
rhs = Timedelta('2 day')

nat = Timedelta('nat')
expected_nat = np.array([False, False])

ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq',
'ne': 'ne'}

for left, right in ops.items():
left_f = getattr(operator, left)
right_f = getattr(operator, right)
expected = left_f(lhs, rhs)

result = right_f(rhs, lhs)
self.assert_numpy_array_equal(result, expected)

expected = ~expected_nat if left == 'ne' else expected_nat
result = left_f(lhs, nat)
self.assert_numpy_array_equal(result, expected)
result = right_f(nat, lhs)
self.assert_numpy_array_equal(result, expected)

def test_ops_notimplemented(self):
class Other:
pass
Expand All @@ -299,6 +324,8 @@ class Other:
self.assertTrue(td.__truediv__(other) is NotImplemented)
self.assertTrue(td.__mul__(other) is NotImplemented)
self.assertTrue(td.__floordiv__(td) is NotImplemented)
self.assertTrue(td.__lt__(other) is NotImplemented)
self.assertTrue(td.__eq__(other) is NotImplemented)

def test_fields(self):
rng = to_timedelta('1 days, 10:11:12')
Expand Down
24 changes: 24 additions & 0 deletions pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -3680,6 +3680,30 @@ def test_timestamp_compare_series(self):
result = right_f(Timestamp('nat'), s_nat)
tm.assert_series_equal(result, expected)

def test_timestamp_compare_ndarray(self):
lhs = pd.to_datetime(['1999-12-31', '2000-01-02']).values
rhs = Timestamp('2000-01-01')

nat = Timestamp('nat')
expected_nat = np.array([False, False])

ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq',
'ne': 'ne'}

for left, right in ops.items():
left_f = getattr(operator, left)
right_f = getattr(operator, right)
expected = left_f(lhs, rhs)

result = right_f(rhs, lhs)
self.assert_numpy_array_equal(result, expected)

expected = ~expected_nat if left == 'ne' else expected_nat
result = left_f(lhs, nat)
self.assert_numpy_array_equal(result, expected)
result = right_f(nat, lhs)
self.assert_numpy_array_equal(result, expected)


class TestSlicing(tm.TestCase):

Expand Down
74 changes: 65 additions & 9 deletions pandas/tseries/tests/test_tslib.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from distutils.version import LooseVersion
import datetime
import nose

import numpy as np

from pandas import tslib
import datetime

from pandas.core.api import Timestamp, Series
from pandas.core.api import Timestamp, Timedelta, Series
from pandas.tslib import period_asfreq, period_ordinal
from pandas.tseries.index import date_range
from pandas.tseries.frequencies import get_freq
import pandas as pd
import pandas.tseries.offsets as offsets
import pandas.util.testing as tm
from pandas.util.testing import assert_series_equal
Expand Down Expand Up @@ -136,6 +136,20 @@ def test_constructor_with_stringoffset(self):
self.assertEqual(repr(result), expected_repr)
self.assertEqual(result, eval(repr(result)))

def test_conversion(self):
ts = Timestamp('2000-01-01')

result = ts.to_pydatetime()
expected = datetime.datetime(2000, 1, 1)
self.assertEqual(result, expected)
self.assertEqual(type(result), type(expected))

result = ts.to_datetime64()
expected = np.datetime64(ts.value, 'ns')
self.assertEqual(result, expected)
self.assertEqual(type(result), type(expected))
self.assertEqual(result.dtype, expected.dtype)

def test_repr(self):
dates = ['2014-03-07', '2014-01-01 09:00', '2014-01-01 00:00:00.000000001']
timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern', 'dateutil/America/Los_Angeles']
Expand Down Expand Up @@ -232,13 +246,13 @@ def test_tz(self):
conv = local.tz_convert('US/Eastern')
self.assertEqual(conv.nanosecond, 5)
self.assertEqual(conv.hour, 19)

def test_tz_localize_ambiguous(self):

ts = Timestamp('2014-11-02 01:00')
ts_dst = ts.tz_localize('US/Eastern', ambiguous=True)
ts_no_dst = ts.tz_localize('US/Eastern', ambiguous=False)

rng = date_range('2014-11-02', periods=3, freq='H', tz='US/Eastern')
self.assertEqual(rng[1], ts_dst)
self.assertEqual(rng[2], ts_no_dst)
Expand Down Expand Up @@ -675,8 +689,8 @@ def test_addition_subtraction_types(self):
self.assertEqual(type(timestamp_instance + 1), Timestamp)
self.assertEqual(type(timestamp_instance - 1), Timestamp)

# Timestamp + datetime not supported, though subtraction is supported and yields timedelta
self.assertEqual(type(timestamp_instance - datetime_instance), datetime.timedelta)
# Timestamp + datetime not supported, though subtraction is supported and yields Timedelta
self.assertEqual(type(timestamp_instance - datetime_instance), Timedelta)

self.assertEqual(type(timestamp_instance + timedelta_instance), Timestamp)
self.assertEqual(type(timestamp_instance - timedelta_instance), Timestamp)
Expand All @@ -686,6 +700,48 @@ def test_addition_subtraction_types(self):
self.assertEqual(type(timestamp_instance + timedelta64_instance), Timestamp)
self.assertEqual(type(timestamp_instance - timedelta64_instance), Timestamp)

def test_ops_ndarray(self):
ts = Timestamp('2000-01-01')

# timedelta operations
other = pd.to_timedelta(['1 day']).values
expected = pd.to_datetime(['2000-01-02']).values
self.assert_numpy_array_equal(ts + other, expected)
if LooseVersion(np.__version__) >= '1.8':
self.assert_numpy_array_equal(other + ts, expected)
self.assertRaises(TypeError, lambda: ts + np.array([1]))
self.assertRaises(TypeError, lambda: np.array([1]) + ts)

expected = pd.to_datetime(['1999-12-31']).values
self.assert_numpy_array_equal(ts - other, expected)
if LooseVersion(np.__version__) >= '1.8':
self.assert_numpy_array_equal(-other + ts, expected)
self.assertRaises(TypeError, lambda: ts - np.array([1]))
self.assertRaises(TypeError, lambda: np.array([1]) - ts)

# datetime operations
other = pd.to_datetime(['1999-12-31']).values
expected = pd.to_timedelta(['1 days']).values
self.assert_numpy_array_equal(ts - other, expected)
if LooseVersion(np.__version__) >= '1.8':
self.assert_numpy_array_equal(other - ts, -expected)

tsz = Timestamp('2000-01-01', tz='EST')
self.assertRaises(ValueError, lambda: ts > tsz)
self.assertRaises(ValueError,
lambda: pd.to_datetime(['2000-01-02']).values > tsz)

def test_ops_notimplemented(self):
class Other:
pass
other = Other()

ts = Timestamp('2000-01-01')
self.assertTrue(ts.__add__(other) is NotImplemented)
self.assertTrue(ts.__sub__(other) is NotImplemented)
self.assertTrue(ts.__lt__(other) is NotImplemented)
self.assertTrue(ts.__eq__(other) is NotImplemented)

def test_addition_subtraction_preserve_frequency(self):
timestamp_instance = date_range('2014-03-05', periods=1, freq='D')[0]
timedelta_instance = datetime.timedelta(days=1)
Expand Down
Loading