diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index d10cb28a3f588..4e0c1ae01fdf3 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -336,6 +336,7 @@ Deprecations - :meth:`Rolling.count` with ``min_periods=None`` will default to the size of the window in a future version (:issue:`31302`) - Deprecated slice-indexing on timezone-aware :class:`DatetimeIndex` with naive ``datetime`` objects, to match scalar indexing behavior (:issue:`36148`) - :meth:`Index.ravel` returning a ``np.ndarray`` is deprecated, in the future this will return a view on the same index (:issue:`19956`) +- Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index e21526a8f69e4..45f32d92c7a74 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1,4 +1,5 @@ import collections +import warnings import cython @@ -466,6 +467,15 @@ cdef inline timedelta_from_spec(object number, object frac, object unit): try: unit = ''.join(unit) + + if unit in ["M", "Y", "y"]: + warnings.warn( + "Units 'M', 'Y' and 'y' do not represent unambiguous " + "timedelta values and will be removed in a future version", + FutureWarning, + stacklevel=2, + ) + if unit == 'M': # To parse ISO 8601 string, 'M' should be treated as minute, # not month @@ -634,9 +644,11 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: else: neg = 1 elif c in ['W', 'D', 'H', 'M']: - unit.append(c) if c in ['H', 'M'] and len(number) > 2: raise ValueError(err_msg) + if c == 'M': + c = 'min' + unit.append(c) r = timedelta_from_spec(number, '0', unit) result += timedelta_as_neg(r, neg) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 372eac29bad9e..e8faebd6b2542 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -25,10 +25,12 @@ def to_timedelta(arg, unit=None, errors="raise"): Parameters ---------- arg : str, timedelta, list-like or Series - The data to be converted to timedelta. The character M by itself, - e.g. '1M', is treated as minute, not month. The characters Y and y - are treated as the mean length of the Gregorian calendar year - - 365.2425 days or 365 days 5 hours 49 minutes 12 seconds. + The data to be converted to timedelta. + + .. deprecated:: 1.2 + Strings with units 'M', 'Y' and 'y' do not represent + unambiguous timedelta values and will be removed in a future version + unit : str, optional Denotes the unit of the arg for numeric `arg`. Defaults to ``"ns"``. diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 1fbbb12b64dc5..67e031b53e44e 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -86,7 +86,7 @@ def test_properties(self, closed): [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608], [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf], pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]), - pd.to_timedelta(["1ns", "2ms", "3s", "4M", "5H", "6D"]), + pd.to_timedelta(["1ns", "2ms", "3s", "4min", "5H", "6D"]), ], ) def test_length(self, closed, breaks): diff --git a/pandas/tests/scalar/interval/test_interval.py b/pandas/tests/scalar/interval/test_interval.py index 8ad9a2c7a9c70..5071c5cdec6c8 100644 --- a/pandas/tests/scalar/interval/test_interval.py +++ b/pandas/tests/scalar/interval/test_interval.py @@ -79,8 +79,8 @@ def test_hash(self, interval): (-np.inf, np.inf, np.inf), (Timedelta("0 days"), Timedelta("5 days"), Timedelta("5 days")), (Timedelta("10 days"), Timedelta("10 days"), Timedelta("0 days")), - (Timedelta("1H10M"), Timedelta("5H5M"), Timedelta("3H55M")), - (Timedelta("5S"), Timedelta("1H"), Timedelta("59M55S")), + (Timedelta("1H10min"), Timedelta("5H5min"), Timedelta("3H55min")), + (Timedelta("5S"), Timedelta("1H"), Timedelta("59min55S")), ], ) def test_length(self, left, right, expected): diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index a01921bd6c4c2..89b45b7266daa 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -160,108 +160,117 @@ def test_nat_converters(self): assert result.astype("int64") == iNaT @pytest.mark.parametrize( - "units, np_unit", - [ - (["W", "w"], "W"), - (["D", "d", "days", "day", "Days", "Day"], "D"), - ( - ["m", "minute", "min", "minutes", "t", "Minute", "Min", "Minutes", "T"], + "unit, np_unit", + [(value, "W") for value in ["W", "w"]] + + [(value, "D") for value in ["D", "d", "days", "day", "Days", "Day"]] + + [ + (value, "m") + for value in [ "m", - ), - (["s", "seconds", "sec", "second", "S", "Seconds", "Sec", "Second"], "s"), - ( - [ - "ms", - "milliseconds", - "millisecond", - "milli", - "millis", - "l", - "MS", - "Milliseconds", - "Millisecond", - "Milli", - "Millis", - "L", - ], + "minute", + "min", + "minutes", + "t", + "Minute", + "Min", + "Minutes", + "T", + ] + ] + + [ + (value, "s") + for value in [ + "s", + "seconds", + "sec", + "second", + "S", + "Seconds", + "Sec", + "Second", + ] + ] + + [ + (value, "ms") + for value in [ "ms", - ), - ( - [ - "us", - "microseconds", - "microsecond", - "micro", - "micros", - "u", - "US", - "Microseconds", - "Microsecond", - "Micro", - "Micros", - "U", - ], + "milliseconds", + "millisecond", + "milli", + "millis", + "l", + "MS", + "Milliseconds", + "Millisecond", + "Milli", + "Millis", + "L", + ] + ] + + [ + (value, "us") + for value in [ "us", - ), - ( - [ - "ns", - "nanoseconds", - "nanosecond", - "nano", - "nanos", - "n", - "NS", - "Nanoseconds", - "Nanosecond", - "Nano", - "Nanos", - "N", - ], + "microseconds", + "microsecond", + "micro", + "micros", + "u", + "US", + "Microseconds", + "Microsecond", + "Micro", + "Micros", + "U", + ] + ] + + [ + (value, "ns") + for value in [ "ns", - ), + "nanoseconds", + "nanosecond", + "nano", + "nanos", + "n", + "NS", + "Nanoseconds", + "Nanosecond", + "Nano", + "Nanos", + "N", + ] ], ) @pytest.mark.parametrize("wrapper", [np.array, list, pd.Index]) - def test_unit_parser(self, units, np_unit, wrapper): + def test_unit_parser(self, unit, np_unit, wrapper): # validate all units, GH 6855, GH 21762 - for unit in units: - # array-likes - expected = TimedeltaIndex( - [np.timedelta64(i, np_unit) for i in np.arange(5).tolist()] - ) - result = to_timedelta(wrapper(range(5)), unit=unit) - tm.assert_index_equal(result, expected) - result = TimedeltaIndex(wrapper(range(5)), unit=unit) - tm.assert_index_equal(result, expected) - - if unit == "M": - # M is treated as minutes in string repr - expected = TimedeltaIndex( - [np.timedelta64(i, "m") for i in np.arange(5).tolist()] - ) - - str_repr = [f"{x}{unit}" for x in np.arange(5)] - result = to_timedelta(wrapper(str_repr)) - tm.assert_index_equal(result, expected) - result = TimedeltaIndex(wrapper(str_repr)) - tm.assert_index_equal(result, expected) - - # scalar - expected = Timedelta(np.timedelta64(2, np_unit).astype("timedelta64[ns]")) - - result = to_timedelta(2, unit=unit) - assert result == expected - result = Timedelta(2, unit=unit) - assert result == expected - - if unit == "M": - expected = Timedelta(np.timedelta64(2, "m").astype("timedelta64[ns]")) - - result = to_timedelta(f"2{unit}") - assert result == expected - result = Timedelta(f"2{unit}") - assert result == expected + # array-likes + expected = TimedeltaIndex( + [np.timedelta64(i, np_unit) for i in np.arange(5).tolist()] + ) + result = to_timedelta(wrapper(range(5)), unit=unit) + tm.assert_index_equal(result, expected) + result = TimedeltaIndex(wrapper(range(5)), unit=unit) + tm.assert_index_equal(result, expected) + + str_repr = [f"{x}{unit}" for x in np.arange(5)] + result = to_timedelta(wrapper(str_repr)) + tm.assert_index_equal(result, expected) + result = to_timedelta(wrapper(str_repr)) + tm.assert_index_equal(result, expected) + + # scalar + expected = Timedelta(np.timedelta64(2, np_unit).astype("timedelta64[ns]")) + result = to_timedelta(2, unit=unit) + assert result == expected + result = Timedelta(2, unit=unit) + assert result == expected + + result = to_timedelta(f"2{unit}") + assert result == expected + result = Timedelta(f"2{unit}") + assert result == expected @pytest.mark.parametrize("unit", ["Y", "y", "M"]) def test_unit_m_y_raises(self, unit): diff --git a/pandas/tests/series/methods/test_shift.py b/pandas/tests/series/methods/test_shift.py index 4b23820caeeb4..d38d70abba923 100644 --- a/pandas/tests/series/methods/test_shift.py +++ b/pandas/tests/series/methods/test_shift.py @@ -32,7 +32,7 @@ def test_shift_always_copy(self, ser, shift_size): # GH22397 assert ser.shift(shift_size) is not ser - @pytest.mark.parametrize("move_by_freq", [pd.Timedelta("1D"), pd.Timedelta("1M")]) + @pytest.mark.parametrize("move_by_freq", [pd.Timedelta("1D"), pd.Timedelta("1min")]) def test_datetime_shift_always_copy(self, move_by_freq): # GH#22397 ser = Series(range(5), index=date_range("2017", periods=5)) diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index f68d83f7f4d58..8e48295c533cc 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -121,6 +121,27 @@ def test_to_timedelta_invalid(self): invalid_data, to_timedelta(invalid_data, errors="ignore") ) + @pytest.mark.parametrize( + "val, warning", + [ + ("1M", FutureWarning), + ("1 M", FutureWarning), + ("1Y", FutureWarning), + ("1 Y", FutureWarning), + ("1y", FutureWarning), + ("1 y", FutureWarning), + ("1m", None), + ("1 m", None), + ("1 day", None), + ("2day", None), + ], + ) + def test_unambiguous_timedelta_values(self, val, warning): + # GH36666 Deprecate use of strings denoting units with 'M', 'Y', 'm' or 'y' + # in pd.to_timedelta + with tm.assert_produces_warning(warning, check_stacklevel=False): + to_timedelta(val) + def test_to_timedelta_via_apply(self): # GH 5458 expected = Series([np.timedelta64(1, "s")])