diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 5a553264e828b..5e26c0ac4433a 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -53,7 +53,10 @@ Strings ^^^^^^^ - Bug in :meth:`Series.str.replace()` where the method throws `TypeError` on Python 3.5.2 (:issue: `21078`) -- + +Timedelta +^^^^^^^^^ +- Bug in :class:`Timedelta`: where passing a float with a unit would prematurely round the float precision (:issue: `14156`) Categorical ^^^^^^^^^^^ diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index f7bb6c1dbb304..22f9d3327f575 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -202,22 +202,22 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: if unit == 'D' or unit == 'd': m = 1000000000L * 86400 - p = 6 + p = 9 elif unit == 'h': m = 1000000000L * 3600 - p = 6 + p = 9 elif unit == 'm': m = 1000000000L * 60 - p = 6 + p = 9 elif unit == 's': m = 1000000000L - p = 6 + p = 9 elif unit == 'ms': m = 1000000L - p = 3 + p = 6 elif unit == 'us': m = 1000L - p = 0 + p = 3 elif unit == 'ns' or unit is None: m = 1L p = 0 @@ -231,10 +231,10 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: # cast the unit, multiply base/frace separately # to avoid precision issues from float -> int base = ts - frac = ts -base + frac = ts - base if p: frac = round(frac, p) - return (base *m) + (frac *m) + return (base * m) + (frac * m) cdef inline _decode_if_necessary(object ts): diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 45be3974dad63..8b0514764b0c0 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -650,6 +650,14 @@ def test_unit_mixed(self, cache): with pytest.raises(ValueError): pd.to_datetime(arr, errors='raise', cache=cache) + @pytest.mark.parametrize('cache', [True, False]) + def test_unit_rounding(self, cache): + # GH 14156: argument will incur floating point errors but no + # premature rounding + result = pd.to_datetime(1434743731.8770001, unit='s', cache=cache) + expected = pd.Timestamp('2015-06-19 19:55:31.877000093') + assert result == expected + @pytest.mark.parametrize('cache', [True, False]) def test_dataframe(self, cache): diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 5da347e47957c..b80263021c269 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -182,6 +182,8 @@ def test_date_time(): fname = os.path.join(dirpath, "datetime.csv") df0 = pd.read_csv(fname, parse_dates=['Date1', 'Date2', 'DateTime', 'DateTimeHi', 'Taiw']) + # GH 19732: Timestamps imported from sas will incur floating point errors + df.iloc[:, 3] = df.iloc[:, 3].dt.round('us') tm.assert_frame_equal(df, df0) diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 3fdc2aa71bfc0..205fdf49d3e91 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -106,6 +106,16 @@ def test_compare_timedelta_ndarray(self): class TestTimedeltas(object): + @pytest.mark.parametrize("unit, value, expected", [ + ('us', 9.999, 9999), ('ms', 9.999999, 9999999), + ('s', 9.999999999, 9999999999)]) + def test_rounding_on_int_unit_construction(self, unit, value, expected): + # GH 12690 + result = Timedelta(value, unit=unit) + assert result.value == expected + result = Timedelta(str(value) + unit) + assert result.value == expected + def test_total_seconds_scalar(self): # see gh-10939 rng = Timedelta('1 days, 10:11:12.100123456') diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index b022b327de57c..ab87d98fca8eb 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -621,10 +621,51 @@ def test_basics_nanos(self): assert stamp.microsecond == 145224 assert stamp.nanosecond == 192 - def test_unit(self): - - def check(val, unit=None, h=1, s=1, us=0): - stamp = Timestamp(val, unit=unit) + @pytest.mark.parametrize('value, check_kwargs', [ + [946688461000000000, {}], + [946688461000000000 / long(1000), dict(unit='us')], + [946688461000000000 / long(1000000), dict(unit='ms')], + [946688461000000000 / long(1000000000), dict(unit='s')], + [10957, dict(unit='D', h=0)], + pytest.param((946688461000000000 + 500000) / long(1000000000), + dict(unit='s', us=499, ns=964), + marks=pytest.mark.skipif(not PY3, + reason='using truediv, so these' + ' are like floats')), + pytest.param((946688461000000000 + 500000000) / long(1000000000), + dict(unit='s', us=500000), + marks=pytest.mark.skipif(not PY3, + reason='using truediv, so these' + ' are like floats')), + pytest.param((946688461000000000 + 500000) / long(1000000), + dict(unit='ms', us=500), + marks=pytest.mark.skipif(not PY3, + reason='using truediv, so these' + ' are like floats')), + pytest.param((946688461000000000 + 500000) / long(1000000000), + dict(unit='s'), + marks=pytest.mark.skipif(PY3, + reason='get chopped in py2')), + pytest.param((946688461000000000 + 500000000) / long(1000000000), + dict(unit='s'), + marks=pytest.mark.skipif(PY3, + reason='get chopped in py2')), + pytest.param((946688461000000000 + 500000) / long(1000000), + dict(unit='ms'), + marks=pytest.mark.skipif(PY3, + reason='get chopped in py2')), + [(946688461000000000 + 500000) / long(1000), dict(unit='us', us=500)], + [(946688461000000000 + 500000000) / long(1000000), + dict(unit='ms', us=500000)], + [946688461000000000 / 1000.0 + 5, dict(unit='us', us=5)], + [946688461000000000 / 1000.0 + 5000, dict(unit='us', us=5000)], + [946688461000000000 / 1000000.0 + 0.5, dict(unit='ms', us=500)], + [946688461000000000 / 1000000.0 + 0.005, dict(unit='ms', us=5, ns=5)], + [946688461000000000 / 1000000000.0 + 0.5, dict(unit='s', us=500000)], + [10957 + 0.5, dict(unit='D', h=12)]]) + def test_unit(self, value, check_kwargs): + def check(value, unit=None, h=1, s=1, us=0, ns=0): + stamp = Timestamp(value, unit=unit) assert stamp.year == 2000 assert stamp.month == 1 assert stamp.day == 1 @@ -637,41 +678,9 @@ def check(val, unit=None, h=1, s=1, us=0): assert stamp.minute == 0 assert stamp.second == 0 assert stamp.microsecond == 0 - assert stamp.nanosecond == 0 - - ts = Timestamp('20000101 01:01:01') - val = ts.value - days = (ts - Timestamp('1970-01-01')).days - - check(val) - check(val / long(1000), unit='us') - check(val / long(1000000), unit='ms') - check(val / long(1000000000), unit='s') - check(days, unit='D', h=0) + assert stamp.nanosecond == ns - # using truediv, so these are like floats - if PY3: - check((val + 500000) / long(1000000000), unit='s', us=500) - check((val + 500000000) / long(1000000000), unit='s', us=500000) - check((val + 500000) / long(1000000), unit='ms', us=500) - - # get chopped in py2 - else: - check((val + 500000) / long(1000000000), unit='s') - check((val + 500000000) / long(1000000000), unit='s') - check((val + 500000) / long(1000000), unit='ms') - - # ok - check((val + 500000) / long(1000), unit='us', us=500) - check((val + 500000000) / long(1000000), unit='ms', us=500000) - - # floats - check(val / 1000.0 + 5, unit='us', us=5) - check(val / 1000.0 + 5000, unit='us', us=5000) - check(val / 1000000.0 + 0.5, unit='ms', us=500) - check(val / 1000000.0 + 0.005, unit='ms', us=5) - check(val / 1000000000.0 + 0.5, unit='s', us=500000) - check(days + 0.5, unit='D', h=12) + check(value, **check_kwargs) def test_roundtrip(self):