diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index 3aa50ad609064..d111dc34995d2 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -145,7 +145,8 @@ Bug Fixes - +- Bug in resample that causes a ValueError when resampling across multiple days + and the last offset is not calculated from the start of the range (:issue:`8683`) - Bug in `pd.infer_freq`/`DataFrame.inferred_freq` that prevented proper sub-daily frequency inference diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index b362c55b156a4..95d3ff015394a 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -411,15 +411,19 @@ def _get_range_edges(first, last, offset, closed='left', base=0): def _adjust_dates_anchored(first, last, offset, closed='right', base=0): from pandas.tseries.tools import normalize_date + # First and last offsets should be calculated from the start day to fix an + # error cause by resampling across multiple days when a one day period is + # not a multiple of the frequency. + # + # See https://github.com/pydata/pandas/issues/8683 + start_day_nanos = Timestamp(normalize_date(first)).value - last_day_nanos = Timestamp(normalize_date(last)).value base_nanos = (base % offset.n) * offset.nanos // offset.n start_day_nanos += base_nanos - last_day_nanos += base_nanos foffset = (first.value - start_day_nanos) % offset.nanos - loffset = (last.value - last_day_nanos) % offset.nanos + loffset = (last.value - start_day_nanos) % offset.nanos if closed == 'right': if foffset > 0: diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index bd6c1766cfd61..42b09b699b919 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -705,6 +705,29 @@ def test_resample_anchored_monthstart(self): for freq in freqs: result = ts.resample(freq, how='mean') + def test_resample_anchored_multiday(self): + # When resampling a range spanning multiple days, ensure that the + # start date gets used to determine the offset. Fixes issue where + # a one day period is not a multiple of the frequency. + # + # See: https://github.com/pydata/pandas/issues/8683 + + s = pd.Series(np.random.randn(5), + index=pd.date_range('2014-10-14 23:06:23.206', + periods=3, freq='400L') + | pd.date_range('2014-10-15 23:00:00', + periods=2, freq='2200L')) + + # Ensure left closing works + result = s.resample('2200L', 'mean') + self.assertEqual(result.index[-1], + pd.Timestamp('2014-10-15 23:00:02.000')) + + # Ensure right closing works + result = s.resample('2200L', 'mean', label='right') + self.assertEqual(result.index[-1], + pd.Timestamp('2014-10-15 23:00:04.200')) + def test_corner_cases(self): # miscellaneous test coverage