From a40ceb8d7f61c68df92f3521a609d2ffbd69c3bd Mon Sep 17 00:00:00 2001 From: Mathis FELARDOS Date: Sun, 12 Apr 2020 18:32:57 +0200 Subject: [PATCH 01/17] BUG: add support of loffset for timedelta --- pandas/core/resample.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 06751d9c35fab..52df8f7ca15b6 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1502,6 +1502,8 @@ def _get_time_delta_bins(self, ax): # Addresses GH #10530 if self.base > 0: labels += type(self.freq)(self.base) + if self.loffset: + labels += self.loffset return binner, bins, labels From cab76185fe615a8deb3b5c94d432a7f4458b37b0 Mon Sep 17 00:00:00 2001 From: Mathis FELARDOS Date: Sun, 12 Apr 2020 18:34:37 +0200 Subject: [PATCH 02/17] BUG: fix #30353 invalid end --- pandas/core/arrays/timedeltas.py | 10 ++++++---- pandas/tests/resample/test_base.py | 11 ++--------- pandas/tests/resample/test_timedelta.py | 24 ++++++++++++++++++++++-- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a62f94b1a3665..092491122061a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -33,6 +33,7 @@ from pandas.core import nanops from pandas.core.algorithms import checked_add_with_arr from pandas.core.arrays import datetimelike as dtl +from pandas.core.arrays._ranges import _generate_range_overflow_safe import pandas.core.common as com from pandas.core.construction import extract_array from pandas.core.ops.common import unpack_zerodim_and_defer @@ -1054,14 +1055,15 @@ def _generate_regular_range(start, end, periods, offset): stride = offset.nanos if periods is None: b = Timedelta(start).value - e = Timedelta(end).value - e += stride - e % stride + # cannot just use e = Timestamp(end) + 1 because arange breaks when + # stride is too large, see GH 10887 & GH 30353 + e = b + (Timedelta(end).value - b) // stride * stride + stride // 2 + 1 elif start is not None: b = Timedelta(start).value - e = b + periods * stride + e = _generate_range_overflow_safe(b, periods, stride, side="start") elif end is not None: e = Timedelta(end).value + stride - b = e - periods * stride + b = _generate_range_overflow_safe(e, periods, stride, side="end") else: raise ValueError( "at least 'start' or 'end' should be specified if a 'period' is given." diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 6384c5f19c898..d0559923fec51 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -10,7 +10,7 @@ from pandas.core.groupby.grouper import Grouper from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.period import PeriodIndex, period_range -from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range +from pandas.core.indexes.timedeltas import timedelta_range from pandas.core.resample import _asfreq_compat # a fixture value can be overridden by the test parameter value. Note that the @@ -182,7 +182,6 @@ def test_resample_size_empty_dataframe(freq, empty_frame_dti): @pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0)) @pytest.mark.parametrize("dtype", [np.float, np.int, np.object, "datetime64[ns]"]) def test_resample_empty_dtypes(index, dtype, resample_method): - # Empty series were sometimes causing a segfault (for the functions # with Cython bounds-checking disabled) or an IndexError. We just run # them to ensure they no longer do. (GH #10228) @@ -215,13 +214,7 @@ def test_resample_loffset_arg_type(frame, create_index, arg): if isinstance(arg, list): expected.columns = pd.MultiIndex.from_tuples([("value", "mean")]) - # GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex - if isinstance(expected.index, TimedeltaIndex): - msg = "DataFrame are different" - with pytest.raises(AssertionError, match=msg): - tm.assert_frame_equal(result_agg, expected) - else: - tm.assert_frame_equal(result_agg, expected) + tm.assert_frame_equal(result_agg, expected) @all_ts diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index 9fc355a45b656..96e5641f663ed 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -1,6 +1,7 @@ from datetime import timedelta import numpy as np +import pytest import pandas as pd from pandas import DataFrame, Series @@ -114,10 +115,10 @@ def test_resample_timedelta_values(): # check that timedelta dtype is preserved when NaT values are # introduced by the resampling - times = timedelta_range("1 day", "4 day", freq="4D") + times = timedelta_range("1 day", "6 day", freq="4D") df = DataFrame({"time": times}, index=times) - times2 = timedelta_range("1 day", "4 day", freq="2D") + times2 = timedelta_range("1 day", "6 day", freq="2D") exp = Series(times2, index=times2, name="time") exp.iloc[1] = pd.NaT @@ -125,3 +126,22 @@ def test_resample_timedelta_values(): tm.assert_series_equal(res, exp) res = df["time"].resample("2D").first() tm.assert_series_equal(res, exp) + + +@pytest.mark.parametrize( + "freq, resample_freq, start, periods, expected_resample_end", + [("10S", "3H", "8H", 5040, "20H")], +) +def test_resample_timedelta_end_already_included_in_bins( + freq, resample_freq, start, periods, expected_resample_end, +): + # GH 30353 + # check that the timedelta bins does not contains an extra bin + idx = pd.timedelta_range(start=start, freq=freq, periods=periods) + s = pd.Series(np.arange(periods), index=idx) + result = s.resample(resample_freq).min() + expected_index = pd.timedelta_range( + freq=resample_freq, start=start, end=expected_resample_end + ) + tm.assert_index_equal(result.index, expected_index) + assert not np.isnan(result[-1]) From 42caac6e3cc76af350980230aca3ac6650551057 Mon Sep 17 00:00:00 2001 From: Mathis FELARDOS Date: Sun, 12 Apr 2020 18:50:42 +0200 Subject: [PATCH 03/17] TST: add test of large_stride to timedelta_range --- pandas/tests/resample/test_timedelta.py | 27 +++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index 96e5641f663ed..c391e074f6e2c 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -145,3 +145,30 @@ def test_resample_timedelta_end_already_included_in_bins( ) tm.assert_index_equal(result.index, expected_index) assert not np.isnan(result[-1]) + + +@pytest.mark.parametrize( + "freq, start, end", [("1day", "10day", "2D")], +) +def test_timedelta_range_large_stride(start, end, freq): + # GH 30353 + + def mock_timedelta_range( + start=None, end=None, periods=None, freq=None, name=None, closed=None + ): + epoch = pd.Timestamp(0) + if start is not None: + start = epoch + pd.Timedelta(start) + if end is not None: + end = epoch + pd.Timedelta(end) + res = pd.date_range( + start=start, end=end, periods=periods, freq=freq, name=name, closed=closed + ) + res -= epoch + res.freq = freq + return res + + res = pd.timedelta_range("1day", "10day", freq="2D") + exp = mock_timedelta_range("1day", "10day", freq="2D") + + tm.assert_index_equal(res, exp) From 95c9e2b48f3e40119998da0e990f885511330351 Mon Sep 17 00:00:00 2001 From: Mathis FELARDOS Date: Sun, 12 Apr 2020 19:04:44 +0200 Subject: [PATCH 04/17] CLN: refactor core/arrays/_range to support timedeltas --- pandas/core/arrays/_ranges.py | 110 ++++++++++++++++++------------- pandas/core/arrays/datetimes.py | 4 +- pandas/core/arrays/timedeltas.py | 26 +------- 3 files changed, 70 insertions(+), 70 deletions(-) diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index 471bfa736d4b9..862db806daeab 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -7,12 +7,12 @@ import numpy as np -from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp +from pandas._libs.tslibs import OutOfBoundsDatetime, Timedelta, Timestamp from pandas.tseries.offsets import DateOffset, Tick, generate_range -def generate_regular_range( +def generate_timestamps_range( start: Timestamp, end: Timestamp, periods: int, freq: DateOffset ) -> Tuple[np.ndarray, str]: """ @@ -32,57 +32,79 @@ def generate_regular_range( Returns ------- - ndarray[np.int64] representing nanosecond unix timestamps + (tuple): containing: + + values : ndarray[np.int64] representing nanosecond unix timestamps + tz : the timezone of the range """ if isinstance(freq, Tick): - stride = freq.nanos - if periods is None: - b = Timestamp(start).value - # cannot just use e = Timestamp(end) + 1 because arange breaks when - # stride is too large, see GH10887 - e = b + (Timestamp(end).value - b) // stride * stride + stride // 2 + 1 - # end.tz == start.tz by this point due to _generate implementation - tz = start.tz - elif start is not None: - b = Timestamp(start).value - e = _generate_range_overflow_safe(b, periods, stride, side="start") - tz = start.tz - elif end is not None: - e = Timestamp(end).value + stride - b = _generate_range_overflow_safe(e, periods, stride, side="end") - tz = end.tz - else: - raise ValueError( - "at least 'start' or 'end' should be specified " - "if a 'period' is given." - ) - - with np.errstate(over="raise"): - # If the range is sufficiently large, np.arange may overflow - # and incorrectly return an empty array if not caught. - try: - values = np.arange(b, e, stride, dtype=np.int64) - except FloatingPointError: - xdr = [b] - while xdr[-1] != e: - xdr.append(xdr[-1] + stride) - values = np.array(xdr[:-1], dtype=np.int64) - + start_value = Timestamp(start).value if start is not None else None + end_value = Timestamp(end).value if end is not None else None + values = _generate_regular_range(start_value, end_value, periods, freq.nanos) else: - tz = None - # start and end should have the same timezone by this point - if start is not None: - tz = start.tz - elif end is not None: - tz = end.tz - xdr = generate_range(start=start, end=end, periods=periods, offset=freq) - values = np.array([x.value for x in xdr], dtype=np.int64) + tz = start.tz if start is not None else end.tz return values, tz +def generate_timedeltas_range( + start: Timedelta, end: Timedelta, periods: int, freq: DateOffset +): + """ + Generate a range of dates with the spans between dates described by + the given `freq` DateOffset. + + Parameters + ---------- + start : Timedelta or None + first point of produced date range + end : Timedelta or None + last point of produced date range + periods : int + number of periods in produced date range + freq : DateOffset + describes space between dates in produced date range + + Returns + ------- + ndarray[np.int64] representing nanosecond timedeltas + """ + start_value = Timedelta(start).value if start is not None else None + end_value = Timedelta(end).value if end is not None else None + return _generate_regular_range(start_value, end_value, periods, freq.nanos) + + +def _generate_regular_range(start: int, end: int, periods: int, stride: int): + b = start + if periods is None: + # cannot just use e = Timestamp(end) + 1 because arange breaks when + # stride is too large, see GH10887 + e = b + (end - b) // stride * stride + stride // 2 + 1 + elif start is not None: + e = _generate_range_overflow_safe(b, periods, stride, side="start") + elif end is not None: + e = end + stride + b = _generate_range_overflow_safe(e, periods, stride, side="end") + else: + raise ValueError( + "at least 'start' or 'end' should be specified if a 'period' is given." + ) + + with np.errstate(over="raise"): + # If the range is sufficiently large, np.arange may overflow + # and incorrectly return an empty array if not caught. + try: + values = np.arange(b, e, stride, dtype=np.int64) + except FloatingPointError: + xdr = [b] + while xdr[-1] != e: + xdr.append(xdr[-1] + stride) + values = np.array(xdr[:-1], dtype=np.int64) + return values + + def _generate_range_overflow_safe( endpoint: int, periods: int, stride: int, side: str = "start" ) -> int: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 8a1cacfe304ca..c3d1ff0655946 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -44,7 +44,7 @@ from pandas.core.algorithms import checked_add_with_arr from pandas.core.arrays import datetimelike as dtl -from pandas.core.arrays._ranges import generate_regular_range +from pandas.core.arrays._ranges import generate_timestamps_range import pandas.core.common as com from pandas.tseries.frequencies import get_period_alias, to_offset @@ -408,7 +408,7 @@ def _generate_range( if end is not None: end = end.tz_localize(None) - values, _tz = generate_regular_range(start, end, periods, freq) + values, _tz = generate_timestamps_range(start, end, periods, freq) index = cls._simple_new(values, freq=freq, dtype=tz_to_dtype(_tz)) if tz is not None and index.tz is None: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 092491122061a..cc88cd82d0339 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -33,7 +33,7 @@ from pandas.core import nanops from pandas.core.algorithms import checked_add_with_arr from pandas.core.arrays import datetimelike as dtl -from pandas.core.arrays._ranges import _generate_range_overflow_safe +from pandas.core.arrays._ranges import generate_timedeltas_range import pandas.core.common as com from pandas.core.construction import extract_array from pandas.core.ops.common import unpack_zerodim_and_defer @@ -265,7 +265,7 @@ def _generate_range(cls, start, end, periods, freq, closed=None): left_closed, right_closed = dtl.validate_endpoints(closed) if freq is not None: - index = _generate_regular_range(start, end, periods, freq) + index = generate_timedeltas_range(start, end, periods, freq) else: index = np.linspace(start.value, end.value, periods).astype("i8") if len(index) >= 2: @@ -1049,25 +1049,3 @@ def _validate_td64_dtype(dtype): raise ValueError(f"dtype {dtype} cannot be converted to timedelta64[ns]") return dtype - - -def _generate_regular_range(start, end, periods, offset): - stride = offset.nanos - if periods is None: - b = Timedelta(start).value - # cannot just use e = Timestamp(end) + 1 because arange breaks when - # stride is too large, see GH 10887 & GH 30353 - e = b + (Timedelta(end).value - b) // stride * stride + stride // 2 + 1 - elif start is not None: - b = Timedelta(start).value - e = _generate_range_overflow_safe(b, periods, stride, side="start") - elif end is not None: - e = Timedelta(end).value + stride - b = _generate_range_overflow_safe(e, periods, stride, side="end") - else: - raise ValueError( - "at least 'start' or 'end' should be specified if a 'period' is given." - ) - - data = np.arange(b, e, stride, dtype=np.int64) - return data From 7fa229b2aa7f710e070bb29be94354a166b3f8bb Mon Sep 17 00:00:00 2001 From: Mathis FELARDOS Date: Sun, 12 Apr 2020 19:51:39 +0200 Subject: [PATCH 05/17] DOC: add whatsnew entry --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 95cb4ccbbb796..337730b79c7ad 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -574,6 +574,7 @@ Timedelta - Timedeltas now understand ``µs`` as identifier for microsecond (:issue:`32899`) - :class:`Timedelta` string representation now includes nanoseconds, when nanoseconds are non-zero (:issue:`9309`) - Bug in comparing a :class:`Timedelta`` object against a ``np.ndarray`` with ``timedelta64`` dtype incorrectly viewing all entries as unequal (:issue:`33441`) +- Bug in :meth:`timedelta_range` that produced an extra point on a edge case (:issue:`30353`, :issue:`33498`) Timezones ^^^^^^^^^ From 614394aa36f88eec7ec5baff287eff9c0c46e588 Mon Sep 17 00:00:00 2001 From: Mathis FELARDOS Date: Mon, 13 Apr 2020 01:09:50 +0200 Subject: [PATCH 06/17] TST: add more edges cases to test timedelta_range (tests that fails on master) --- pandas/tests/resample/test_timedelta.py | 49 ++++++++++++------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index c391e074f6e2c..c45e47493d7e2 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -129,46 +129,45 @@ def test_resample_timedelta_values(): @pytest.mark.parametrize( - "freq, resample_freq, start, periods, expected_resample_end", - [("10S", "3H", "8H", 5040, "20H")], + "start, end, freq, resample_freq", + [ + ("8H", "21h59min", "10S", "3H"), + ("3H", "22H", "1H", "5H"), + ("527D", "5006D", "3D", "10D") + ], ) -def test_resample_timedelta_end_already_included_in_bins( - freq, resample_freq, start, periods, expected_resample_end, -): +def test_resample_timedelta_edge_case(start, end, freq, resample_freq,): # GH 30353 # check that the timedelta bins does not contains an extra bin - idx = pd.timedelta_range(start=start, freq=freq, periods=periods) - s = pd.Series(np.arange(periods), index=idx) + idx = pd.timedelta_range(start=start, end=end, freq=freq) + s = pd.Series(np.arange(len(idx)), index=idx) result = s.resample(resample_freq).min() - expected_index = pd.timedelta_range( - freq=resample_freq, start=start, end=expected_resample_end - ) + expected_index = pd.timedelta_range(freq=resample_freq, start=start, end=end) tm.assert_index_equal(result.index, expected_index) assert not np.isnan(result[-1]) @pytest.mark.parametrize( - "freq, start, end", [("1day", "10day", "2D")], + "start, end, freq", [ + ("1day", "10day", "2D"), + ("2day", "30day", "3D"), + ("2s", "50s", "5s") + ], ) -def test_timedelta_range_large_stride(start, end, freq): - # GH 30353 +def test_timedelta_range_freq_divide_end(start, end, freq): + # GH 30353 only the cases where `(end % freq) == 0` used to fail - def mock_timedelta_range( - start=None, end=None, periods=None, freq=None, name=None, closed=None - ): + def mock_timedelta_range(start=None, end=None, **kwargs): epoch = pd.Timestamp(0) if start is not None: start = epoch + pd.Timedelta(start) if end is not None: end = epoch + pd.Timedelta(end) - res = pd.date_range( - start=start, end=end, periods=periods, freq=freq, name=name, closed=closed - ) - res -= epoch - res.freq = freq - return res + result = pd.date_range(start=start, end=end, **kwargs) - epoch + result.freq = freq + return result - res = pd.timedelta_range("1day", "10day", freq="2D") - exp = mock_timedelta_range("1day", "10day", freq="2D") + res = pd.timedelta_range(start=start, end=end, freq=freq) + exp = mock_timedelta_range(start=start, end=end, freq=freq) - tm.assert_index_equal(res, exp) + tm.assert_index_equal(res, exp) \ No newline at end of file From 8f8b963802d6f9ad04bf5db0b8db3142d74349b6 Mon Sep 17 00:00:00 2001 From: Mathis FELARDOS Date: Mon, 13 Apr 2020 01:13:13 +0200 Subject: [PATCH 07/17] CLN: fix lint --- pandas/tests/resample/test_timedelta.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index c45e47493d7e2..b9a8a4ec6fc6d 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -133,10 +133,10 @@ def test_resample_timedelta_values(): [ ("8H", "21h59min", "10S", "3H"), ("3H", "22H", "1H", "5H"), - ("527D", "5006D", "3D", "10D") + ("527D", "5006D", "3D", "10D"), ], ) -def test_resample_timedelta_edge_case(start, end, freq, resample_freq,): +def test_resample_timedelta_edge_case(start, end, freq, resample_freq): # GH 30353 # check that the timedelta bins does not contains an extra bin idx = pd.timedelta_range(start=start, end=end, freq=freq) @@ -148,11 +148,8 @@ def test_resample_timedelta_edge_case(start, end, freq, resample_freq,): @pytest.mark.parametrize( - "start, end, freq", [ - ("1day", "10day", "2D"), - ("2day", "30day", "3D"), - ("2s", "50s", "5s") - ], + "start, end, freq", + [("1day", "10day", "2D"), ("2day", "30day", "3D"), ("2s", "50s", "5s")], ) def test_timedelta_range_freq_divide_end(start, end, freq): # GH 30353 only the cases where `(end % freq) == 0` used to fail @@ -170,4 +167,4 @@ def mock_timedelta_range(start=None, end=None, **kwargs): res = pd.timedelta_range(start=start, end=end, freq=freq) exp = mock_timedelta_range(start=start, end=end, freq=freq) - tm.assert_index_equal(res, exp) \ No newline at end of file + tm.assert_index_equal(res, exp) From b73b537283c8957482258b48a68ad6a9452224be Mon Sep 17 00:00:00 2001 From: Mathis FELARDOS Date: Mon, 13 Apr 2020 11:51:41 +0200 Subject: [PATCH 08/17] TST: improve timedelta tests --- pandas/tests/resample/test_timedelta.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index b9a8a4ec6fc6d..7403d5b01ae9e 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -131,13 +131,18 @@ def test_resample_timedelta_values(): @pytest.mark.parametrize( "start, end, freq, resample_freq", [ - ("8H", "21h59min", "10S", "3H"), + ("8H", "21h59min50s", "10S", "3H"), # GH 30353 example ("3H", "22H", "1H", "5H"), ("527D", "5006D", "3D", "10D"), + ("1D", "10D", "1D", "2D"), # GH 13022 example + # tests that worked before GH 33498: + ("8H", "21h59min50s", "10S", "2H"), + ("0H", "21h59min50s", "10S", "3H"), + ("10D", "85D", "D", "2D"), ], ) def test_resample_timedelta_edge_case(start, end, freq, resample_freq): - # GH 30353 + # GH 33498 # check that the timedelta bins does not contains an extra bin idx = pd.timedelta_range(start=start, end=end, freq=freq) s = pd.Series(np.arange(len(idx)), index=idx) @@ -149,10 +154,17 @@ def test_resample_timedelta_edge_case(start, end, freq, resample_freq): @pytest.mark.parametrize( "start, end, freq", - [("1day", "10day", "2D"), ("2day", "30day", "3D"), ("2s", "50s", "5s")], + [ + ("1D", "10D", "2D"), + ("2D", "30D", "3D"), + ("2s", "50s", "5s"), + # tests that worked before GH 33498: + ("4D", "16D", "3D"), + ("8D", "16D", "40s"), + ], ) def test_timedelta_range_freq_divide_end(start, end, freq): - # GH 30353 only the cases where `(end % freq) == 0` used to fail + # GH 33498 only the cases where `(end % freq) == 0` used to fail def mock_timedelta_range(start=None, end=None, **kwargs): epoch = pd.Timestamp(0) From bd7e802c8d596743d51fb05ac1349b83d03153af Mon Sep 17 00:00:00 2001 From: Mathis FELARDOS Date: Thu, 16 Apr 2020 16:36:48 +0200 Subject: [PATCH 09/17] CLN: clean timedelta_range and date_range --- pandas/core/arrays/_ranges.py | 67 ++++++++------------------------ pandas/core/arrays/datetimes.py | 31 +++++++-------- pandas/core/arrays/timedeltas.py | 10 +---- 3 files changed, 31 insertions(+), 77 deletions(-) diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index 862db806daeab..48254072f42cf 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -3,64 +3,30 @@ (and possibly TimedeltaArray/PeriodArray) """ -from typing import Tuple +from typing import Union import numpy as np from pandas._libs.tslibs import OutOfBoundsDatetime, Timedelta, Timestamp -from pandas.tseries.offsets import DateOffset, Tick, generate_range +from pandas.tseries.offsets import DateOffset -def generate_timestamps_range( - start: Timestamp, end: Timestamp, periods: int, freq: DateOffset -) -> Tuple[np.ndarray, str]: - """ - Generate a range of dates with the spans between dates described by - the given `freq` DateOffset. - - Parameters - ---------- - start : Timestamp or None - first point of produced date range - end : Timestamp or None - last point of produced date range - periods : int - number of periods in produced date range - freq : DateOffset - describes space between dates in produced date range - - Returns - ------- - (tuple): containing: - - values : ndarray[np.int64] representing nanosecond unix timestamps - tz : the timezone of the range - """ - if isinstance(freq, Tick): - start_value = Timestamp(start).value if start is not None else None - end_value = Timestamp(end).value if end is not None else None - values = _generate_regular_range(start_value, end_value, periods, freq.nanos) - else: - xdr = generate_range(start=start, end=end, periods=periods, offset=freq) - values = np.array([x.value for x in xdr], dtype=np.int64) - - tz = start.tz if start is not None else end.tz - return values, tz - - -def generate_timedeltas_range( - start: Timedelta, end: Timedelta, periods: int, freq: DateOffset +def generate_regular_range( + start: Union[Timestamp, Timedelta], + end: Union[Timestamp, Timedelta], + periods: int, + freq: DateOffset, ): """ - Generate a range of dates with the spans between dates described by - the given `freq` DateOffset. + Generate a range of dates or timestamps with the spans between dates + described by the given `freq` DateOffset. Parameters ---------- - start : Timedelta or None + start : Timedelta, Timestamp or None first point of produced date range - end : Timedelta or None + start : Timedelta, Timestamp or None last point of produced date range periods : int number of periods in produced date range @@ -69,14 +35,13 @@ def generate_timedeltas_range( Returns ------- - ndarray[np.int64] representing nanosecond timedeltas + ndarray[np.int64] + Representing nanosecond unix timestamps. """ - start_value = Timedelta(start).value if start is not None else None - end_value = Timedelta(end).value if end is not None else None - return _generate_regular_range(start_value, end_value, periods, freq.nanos) - + start = start.value if start is not None else None + end = end.value if end is not None else None + stride = freq.nanos -def _generate_regular_range(start: int, end: int, periods: int, stride: int): b = start if periods is None: # cannot just use e = Timestamp(end) + 1 because arange breaks when diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index c3d1ff0655946..3134ffab2ea5a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -44,11 +44,11 @@ from pandas.core.algorithms import checked_add_with_arr from pandas.core.arrays import datetimelike as dtl -from pandas.core.arrays._ranges import generate_timestamps_range +from pandas.core.arrays._ranges import generate_regular_range import pandas.core.common as com from pandas.tseries.frequencies import get_period_alias, to_offset -from pandas.tseries.offsets import Day, Tick +from pandas.tseries.offsets import Day, Tick, generate_range _midnight = time(0, 0) @@ -370,33 +370,22 @@ def _generate_range( if end is not None: end = Timestamp(end) - if start is None and end is None: - if closed is not None: - raise ValueError( - "Closed has to be None if not both of start and end are defined" - ) if start is NaT or end is NaT: raise ValueError("Neither `start` nor `end` can be NaT") left_closed, right_closed = dtl.validate_endpoints(closed) - start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) - tz = _infer_tz_from_endpoints(start, end, tz) if tz is not None: # Localize the start and end arguments + start_tz = None if start is None else start.tz + end_tz = None if end is None else end.tz start = _maybe_localize_point( - start, - getattr(start, "tz", None), - start, - freq, - tz, - ambiguous, - nonexistent, + start, start_tz, start, freq, tz, ambiguous, nonexistent ) end = _maybe_localize_point( - end, getattr(end, "tz", None), end, freq, tz, ambiguous, nonexistent + end, end_tz, end, freq, tz, ambiguous, nonexistent ) if freq is not None: # We break Day arithmetic (fixed 24 hour) here and opt for @@ -408,7 +397,13 @@ def _generate_range( if end is not None: end = end.tz_localize(None) - values, _tz = generate_timestamps_range(start, end, periods, freq) + if isinstance(freq, Tick): + values = generate_regular_range(start, end, periods, freq) + else: + xdr = generate_range(start=start, end=end, periods=periods, offset=freq) + values = np.array([x.value for x in xdr], dtype=np.int64) + + _tz = start.tz if start is not None else end.tz index = cls._simple_new(values, freq=freq, dtype=tz_to_dtype(_tz)) if tz is not None and index.tz is None: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index cc88cd82d0339..8cd4b874d10ee 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -33,7 +33,7 @@ from pandas.core import nanops from pandas.core.algorithms import checked_add_with_arr from pandas.core.arrays import datetimelike as dtl -from pandas.core.arrays._ranges import generate_timedeltas_range +from pandas.core.arrays._ranges import generate_regular_range import pandas.core.common as com from pandas.core.construction import extract_array from pandas.core.ops.common import unpack_zerodim_and_defer @@ -256,16 +256,10 @@ def _generate_range(cls, start, end, periods, freq, closed=None): if end is not None: end = Timedelta(end) - if start is None and end is None: - if closed is not None: - raise ValueError( - "Closed has to be None if not both of start and end are defined" - ) - left_closed, right_closed = dtl.validate_endpoints(closed) if freq is not None: - index = generate_timedeltas_range(start, end, periods, freq) + index = generate_regular_range(start, end, periods, freq) else: index = np.linspace(start.value, end.value, periods).astype("i8") if len(index) >= 2: From 5a7d10141c8a28e200248f9d00b1177e9eb00ef8 Mon Sep 17 00:00:00 2001 From: Mathis FELARDOS Date: Thu, 16 Apr 2020 16:45:08 +0200 Subject: [PATCH 10/17] DOC: add bug fixes in whatsnew --- doc/source/whatsnew/v1.1.0.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 337730b79c7ad..79f78471922bc 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -574,7 +574,9 @@ Timedelta - Timedeltas now understand ``µs`` as identifier for microsecond (:issue:`32899`) - :class:`Timedelta` string representation now includes nanoseconds, when nanoseconds are non-zero (:issue:`9309`) - Bug in comparing a :class:`Timedelta`` object against a ``np.ndarray`` with ``timedelta64`` dtype incorrectly viewing all entries as unequal (:issue:`33441`) -- Bug in :meth:`timedelta_range` that produced an extra point on a edge case (:issue:`30353`, :issue:`33498`) +- Bug in :func:`timedelta_range` that produced an extra point on a edge case (:issue:`30353`, :issue:`33498`) +- Bug in :meth:`DataFrame.resample` that produced an extra point on a edge case (:issue:`30353`, :issue:`13022`, :issue:`33498`) +- Bug in :meth:`DataFrame.resample` that ignored the ``loffset`` argument when dealing with timedelta (:issue:`7687`, :issue:`33498`) Timezones ^^^^^^^^^ From 2959ce6be259fc5f92c7f8028e6b5885e57d522d Mon Sep 17 00:00:00 2001 From: Mathis FELARDOS Date: Thu, 16 Apr 2020 16:49:18 +0200 Subject: [PATCH 11/17] CLN: fix typo --- pandas/core/arrays/_ranges.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index 48254072f42cf..783abbf869aa7 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -26,7 +26,7 @@ def generate_regular_range( ---------- start : Timedelta, Timestamp or None first point of produced date range - start : Timedelta, Timestamp or None + end : Timedelta, Timestamp or None last point of produced date range periods : int number of periods in produced date range From b706a0a2226dae146c66cb822fef9f83f422e279 Mon Sep 17 00:00:00 2001 From: Mathis FELARDOS Date: Thu, 16 Apr 2020 17:15:40 +0200 Subject: [PATCH 12/17] CLN: fix a mypy issue --- pandas/core/arrays/_ranges.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index 783abbf869aa7..4c0d579e75c9f 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -42,12 +42,13 @@ def generate_regular_range( end = end.value if end is not None else None stride = freq.nanos - b = start if periods is None: + b = start # cannot just use e = Timestamp(end) + 1 because arange breaks when # stride is too large, see GH10887 e = b + (end - b) // stride * stride + stride // 2 + 1 elif start is not None: + b = start e = _generate_range_overflow_safe(b, periods, stride, side="start") elif end is not None: e = end + stride From af00e90bd859c5afa4397d7011cbc1e502b66c5b Mon Sep 17 00:00:00 2001 From: Mathis FELARDOS Date: Thu, 16 Apr 2020 20:19:40 +0200 Subject: [PATCH 13/17] CLN: rename generate_regular_range into generate_time_range --- pandas/core/arrays/_ranges.py | 2 +- pandas/core/arrays/datetimes.py | 4 ++-- pandas/core/arrays/timedeltas.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index 4c0d579e75c9f..7424f3f6a55b1 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -12,7 +12,7 @@ from pandas.tseries.offsets import DateOffset -def generate_regular_range( +def generate_time_range( start: Union[Timestamp, Timedelta], end: Union[Timestamp, Timedelta], periods: int, diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 3134ffab2ea5a..11246d3f5d73d 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -44,7 +44,7 @@ from pandas.core.algorithms import checked_add_with_arr from pandas.core.arrays import datetimelike as dtl -from pandas.core.arrays._ranges import generate_regular_range +from pandas.core.arrays._ranges import generate_time_range import pandas.core.common as com from pandas.tseries.frequencies import get_period_alias, to_offset @@ -398,7 +398,7 @@ def _generate_range( end = end.tz_localize(None) if isinstance(freq, Tick): - values = generate_regular_range(start, end, periods, freq) + values = generate_time_range(start, end, periods, freq) else: xdr = generate_range(start=start, end=end, periods=periods, offset=freq) values = np.array([x.value for x in xdr], dtype=np.int64) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 8cd4b874d10ee..17e961d7f026d 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -33,7 +33,7 @@ from pandas.core import nanops from pandas.core.algorithms import checked_add_with_arr from pandas.core.arrays import datetimelike as dtl -from pandas.core.arrays._ranges import generate_regular_range +from pandas.core.arrays._ranges import generate_time_range import pandas.core.common as com from pandas.core.construction import extract_array from pandas.core.ops.common import unpack_zerodim_and_defer @@ -259,7 +259,7 @@ def _generate_range(cls, start, end, periods, freq, closed=None): left_closed, right_closed = dtl.validate_endpoints(closed) if freq is not None: - index = generate_regular_range(start, end, periods, freq) + index = generate_time_range(start, end, periods, freq) else: index = np.linspace(start.value, end.value, periods).astype("i8") if len(index) >= 2: From c0bbbc509f40853b53e7233681e6b66ca93a185a Mon Sep 17 00:00:00 2001 From: Mathis FELARDOS Date: Thu, 16 Apr 2020 21:05:18 +0200 Subject: [PATCH 14/17] CLN: fix review --- pandas/core/arrays/_ranges.py | 14 ++++---- pandas/core/arrays/datetimes.py | 4 +-- pandas/core/arrays/timedeltas.py | 4 +-- .../timedeltas/test_timedelta_range.py | 32 ++++++++++++++++++- pandas/tests/resample/test_timedelta.py | 31 +----------------- 5 files changed, 43 insertions(+), 42 deletions(-) diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index 7424f3f6a55b1..9265b52239316 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -12,7 +12,7 @@ from pandas.tseries.offsets import DateOffset -def generate_time_range( +def generate_regular_range( start: Union[Timestamp, Timedelta], end: Union[Timestamp, Timedelta], periods: int, @@ -25,18 +25,18 @@ def generate_time_range( Parameters ---------- start : Timedelta, Timestamp or None - first point of produced date range + First point of produced date range. end : Timedelta, Timestamp or None - last point of produced date range + Last point of produced date range. periods : int - number of periods in produced date range + Number of periods in produced date range. freq : DateOffset - describes space between dates in produced date range + Describes space between dates in produced date range. + It should be an instance of Tick. Returns ------- - ndarray[np.int64] - Representing nanosecond unix timestamps. + ndarray[np.int64] Representing nanoseconds. """ start = start.value if start is not None else None end = end.value if end is not None else None diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 11246d3f5d73d..3134ffab2ea5a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -44,7 +44,7 @@ from pandas.core.algorithms import checked_add_with_arr from pandas.core.arrays import datetimelike as dtl -from pandas.core.arrays._ranges import generate_time_range +from pandas.core.arrays._ranges import generate_regular_range import pandas.core.common as com from pandas.tseries.frequencies import get_period_alias, to_offset @@ -398,7 +398,7 @@ def _generate_range( end = end.tz_localize(None) if isinstance(freq, Tick): - values = generate_time_range(start, end, periods, freq) + values = generate_regular_range(start, end, periods, freq) else: xdr = generate_range(start=start, end=end, periods=periods, offset=freq) values = np.array([x.value for x in xdr], dtype=np.int64) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 17e961d7f026d..8cd4b874d10ee 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -33,7 +33,7 @@ from pandas.core import nanops from pandas.core.algorithms import checked_add_with_arr from pandas.core.arrays import datetimelike as dtl -from pandas.core.arrays._ranges import generate_time_range +from pandas.core.arrays._ranges import generate_regular_range import pandas.core.common as com from pandas.core.construction import extract_array from pandas.core.ops.common import unpack_zerodim_and_defer @@ -259,7 +259,7 @@ def _generate_range(cls, start, end, periods, freq, closed=None): left_closed, right_closed = dtl.validate_endpoints(closed) if freq is not None: - index = generate_time_range(start, end, periods, freq) + index = generate_regular_range(start, end, periods, freq) else: index = np.linspace(start.value, end.value, periods).astype("i8") if len(index) >= 2: diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index c07a6471c732f..e64787981eed1 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas import timedelta_range, to_timedelta +from pandas import Timedelta, Timestamp, date_range, timedelta_range, to_timedelta import pandas._testing as tm from pandas.tseries.offsets import Day, Second @@ -61,3 +61,33 @@ def test_errors(self): # too many params with pytest.raises(ValueError, match=msg): timedelta_range(start="0 days", end="5 days", periods=10, freq="H") + + @pytest.mark.parametrize( + "start, end, freq", + [ + ("1D", "10D", "2D"), + ("2D", "30D", "3D"), + ("2s", "50s", "5s"), + # tests that worked before GH 33498: + ("4D", "16D", "3D"), + ("8D", "16D", "40s"), + ], + ) + def test_timedelta_range_freq_divide_end(self, start, end, freq): + # GH 33498 only the cases where `(end % freq) == 0` used to fail + + def mock_timedelta_range(start=None, end=None, **kwargs): + epoch = Timestamp(0) + if start is not None: + start = epoch + Timedelta(start) + if end is not None: + end = epoch + Timedelta(end) + result = date_range(start=start, end=end, **kwargs) - epoch + result.freq = freq + return result + + res = timedelta_range(start=start, end=end, freq=freq) + exp = mock_timedelta_range(start=start, end=end, freq=freq) + + tm.assert_index_equal(res, exp) + assert res.freq == exp.freq diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index 7403d5b01ae9e..1b4a625f078c9 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -149,34 +149,5 @@ def test_resample_timedelta_edge_case(start, end, freq, resample_freq): result = s.resample(resample_freq).min() expected_index = pd.timedelta_range(freq=resample_freq, start=start, end=end) tm.assert_index_equal(result.index, expected_index) + assert result.index.freq == expected_index.freq assert not np.isnan(result[-1]) - - -@pytest.mark.parametrize( - "start, end, freq", - [ - ("1D", "10D", "2D"), - ("2D", "30D", "3D"), - ("2s", "50s", "5s"), - # tests that worked before GH 33498: - ("4D", "16D", "3D"), - ("8D", "16D", "40s"), - ], -) -def test_timedelta_range_freq_divide_end(start, end, freq): - # GH 33498 only the cases where `(end % freq) == 0` used to fail - - def mock_timedelta_range(start=None, end=None, **kwargs): - epoch = pd.Timestamp(0) - if start is not None: - start = epoch + pd.Timedelta(start) - if end is not None: - end = epoch + pd.Timedelta(end) - result = pd.date_range(start=start, end=end, **kwargs) - epoch - result.freq = freq - return result - - res = pd.timedelta_range(start=start, end=end, freq=freq) - exp = mock_timedelta_range(start=start, end=end, freq=freq) - - tm.assert_index_equal(res, exp) From 58dfae08826d9fc10d84152822036a6f93f256c7 Mon Sep 17 00:00:00 2001 From: Mathis FELARDOS Date: Thu, 16 Apr 2020 21:23:19 +0200 Subject: [PATCH 15/17] DOC: change the type requirement of generate_regular_range --- pandas/core/arrays/_ranges.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index 9265b52239316..3b090ca458d88 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -30,9 +30,8 @@ def generate_regular_range( Last point of produced date range. periods : int Number of periods in produced date range. - freq : DateOffset + freq : Tick Describes space between dates in produced date range. - It should be an instance of Tick. Returns ------- From 2469986fab224c633bea464ac615c97575444926 Mon Sep 17 00:00:00 2001 From: Mathis FELARDOS Date: Thu, 16 Apr 2020 23:10:11 +0200 Subject: [PATCH 16/17] CLN: add GH related issue --- pandas/core/resample.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 52df8f7ca15b6..6d79ae070c103 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1499,10 +1499,11 @@ def _get_time_delta_bins(self, ax): end_stamps = labels + self.freq bins = ax.searchsorted(end_stamps, side="left") - # Addresses GH #10530 if self.base > 0: + # GH #10530 labels += type(self.freq)(self.base) if self.loffset: + # GH #33498 labels += self.loffset return binner, bins, labels From 71438d6f7509191d532b60dbb6359b4c0bb87f24 Mon Sep 17 00:00:00 2001 From: Mathis FELARDOS Date: Fri, 1 May 2020 15:37:16 +0200 Subject: [PATCH 17/17] TST: remove the mock of timedelta_range with date_range --- .../timedeltas/test_timedelta_range.py | 34 ++++++------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index e64787981eed1..7d78fbf9ff190 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas import Timedelta, Timestamp, date_range, timedelta_range, to_timedelta +from pandas import Timedelta, timedelta_range, to_timedelta import pandas._testing as tm from pandas.tseries.offsets import Day, Second @@ -63,31 +63,19 @@ def test_errors(self): timedelta_range(start="0 days", end="5 days", periods=10, freq="H") @pytest.mark.parametrize( - "start, end, freq", + "start, end, freq, expected_periods", [ - ("1D", "10D", "2D"), - ("2D", "30D", "3D"), - ("2s", "50s", "5s"), + ("1D", "10D", "2D", (10 - 1) // 2 + 1), + ("2D", "30D", "3D", (30 - 2) // 3 + 1), + ("2s", "50s", "5s", (50 - 2) // 5 + 1), # tests that worked before GH 33498: - ("4D", "16D", "3D"), - ("8D", "16D", "40s"), + ("4D", "16D", "3D", (16 - 4) // 3 + 1), + ("8D", "16D", "40s", (16 * 3600 * 24 - 8 * 3600 * 24) // 40 + 1), ], ) - def test_timedelta_range_freq_divide_end(self, start, end, freq): + def test_timedelta_range_freq_divide_end(self, start, end, freq, expected_periods): # GH 33498 only the cases where `(end % freq) == 0` used to fail - - def mock_timedelta_range(start=None, end=None, **kwargs): - epoch = Timestamp(0) - if start is not None: - start = epoch + Timedelta(start) - if end is not None: - end = epoch + Timedelta(end) - result = date_range(start=start, end=end, **kwargs) - epoch - result.freq = freq - return result - res = timedelta_range(start=start, end=end, freq=freq) - exp = mock_timedelta_range(start=start, end=end, freq=freq) - - tm.assert_index_equal(res, exp) - assert res.freq == exp.freq + assert Timedelta(start) == res[0] + assert Timedelta(end) >= res[-1] + assert len(res) == expected_periods