diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 6f7e9bce0a3a6..b09ca81d2572f 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -54,6 +54,9 @@ Removal of prior version deprecations/changes Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- 4x improvement in ``timedelta`` string parsing (:issue:`6755`) +- 8x improvement in ``timedelta64`` and ``datetime64`` ops (:issue:`6755`) + .. _whatsnew_0170.bug_fixes: Bug Fixes diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 948a0be91b276..565760b545961 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -109,6 +109,9 @@ def test_construction(self): # currently invalid as it has a - on the hhmmdd part (only allowed on the days) self.assertRaises(ValueError, lambda : Timedelta('-10 days -1 h 1.5m 1s 3us')) + # only leading neg signs are allowed + self.assertRaises(ValueError, lambda : Timedelta('10 days -1 h 1.5m 1s 3us')) + # roundtripping both for string and value for v in ['1s', '-1s', @@ -151,7 +154,7 @@ def test_construction(self): "cannot construct a TimeDelta", lambda : Timedelta()) tm.assertRaisesRegexp(ValueError, - "cannot create timedelta string convert", + "unit abbreviation w/o a number", lambda : Timedelta('foo')) tm.assertRaisesRegexp(ValueError, "cannot construct a TimeDelta from the passed arguments, allowed keywords are ", diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py index 624981c5536f5..60005ef6f2d6f 100644 --- a/pandas/tseries/timedeltas.py +++ b/pandas/tseries/timedeltas.py @@ -34,22 +34,13 @@ def _convert_listlike(arg, box, unit): if isinstance(arg, (list,tuple)) or ((hasattr(arg,'__iter__') and not hasattr(arg,'dtype'))): arg = np.array(list(arg), dtype='O') + # these are shortcutable if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') elif is_integer_dtype(arg): - - # these are shortcutable - value = arg.astype('timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]') + value = arg.astype('timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]', copy=False) else: - try: - value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit, coerce=coerce) - except: - - # try to process strings fast; may need to fallback - try: - value = np.array([ _get_string_converter(r, unit=unit)() for r in arg ],dtype='m8[ns]') - except: - value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit, coerce=coerce) for r in arg ]) + value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit, coerce=coerce) value = value.astype('timedelta64[ns]', copy=False) if box: @@ -95,15 +86,6 @@ def _convert_listlike(arg, box, unit): 'NS' : 'ns', 'ns' : 'ns', } -_unit_scale = { - 'd' : 86400*1e9, - 'h' : 3600*1e9, - 'm' : 60*1e9, - 's' : 1e9, - 'ms' : 1e6, - 'us' : 1e3, - 'ns' : 1, - } def _validate_timedelta_unit(arg): """ provide validation / translation for timedelta short units """ @@ -114,150 +96,11 @@ def _validate_timedelta_unit(arg): return 'ns' raise ValueError("invalid timedelta unit {0} provided".format(arg)) -_short_search = re.compile( - "^\s*(?P-?)\s*(?P\d*\.?\d*)\s*(?Pd|s|ms|us|ns)?\s*$",re.IGNORECASE) -_full_search = re.compile( - "^\s*(?P-?)\s*(?P\d*?\.?\d*?)?\s*(days|d|day)?,?\s*\+?(?P