From b487d128900a035ca55f5ff685a7c7fce963fed9 Mon Sep 17 00:00:00 2001 From: Bert Palm Date: Sat, 9 Oct 2021 23:54:22 +0200 Subject: [PATCH 01/15] [FIX] rolling now respect duplicate datetime indices on the right bound of centered windows. --- pandas/_libs/window/indexers.pyx | 1 - pandas/tests/window/test_rolling.py | 27 +++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index 3782b55bd19b3..59889cb58c3d5 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -122,7 +122,6 @@ def calculate_variable_window_bounds( elif ((index[j] - end_bound) * index_growth_sign == 0 and right_closed): end[i] = j + 1 - break elif (index[j] - end_bound) * index_growth_sign >= 0: end[i] = j break diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index d88ce2ccb54cc..654518594be3e 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -250,6 +250,33 @@ def test_datetimelike_centered_offset_covers_all( tm.assert_equal(result, expected) +@pytest.mark.parametrize( + "window,closed,expected", + [ + ("2D", "right", [4, 4, 4, 4, 4, 4, 2, 2]), + ("2D", "left", [2, 2, 4, 4, 4, 4, 4, 4]), + ("2D", "both", [4, 4, 6, 6, 6, 6, 4, 4]), + ("2D", "neither", [2, 2, 2, 2, 2, 2, 2, 2]), + ], +) +def test_datetimelike_nonunique_index_centering( + window, closed, expected, frame_or_series +): + index = DatetimeIndex([ + '2020-01-01', '2020-01-01', + '2020-01-02', '2020-01-02', + '2020-01-03', '2020-01-03', + '2020-01-04', '2020-01-04', + ]) + + df = frame_or_series([1]*8, index=index, dtype=float) + expected = frame_or_series(expected, index=index, dtype=float) + + result = df.rolling(window, center=True, closed=closed).sum() + + tm.assert_equal(result, expected) + + def test_even_number_window_alignment(): # see discussion in GH 38780 s = Series(range(3), index=date_range(start="2020-01-01", freq="D", periods=3)) From 927a68c14ced8a4457b044e697b683c550020245 Mon Sep 17 00:00:00 2001 From: Bert Palm Date: Sun, 10 Oct 2021 00:45:25 +0200 Subject: [PATCH 02/15] blackified --- pandas/tests/window/test_rolling.py | 30 +++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 654518594be3e..6028e89bea374 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -253,23 +253,29 @@ def test_datetimelike_centered_offset_covers_all( @pytest.mark.parametrize( "window,closed,expected", [ - ("2D", "right", [4, 4, 4, 4, 4, 4, 2, 2]), - ("2D", "left", [2, 2, 4, 4, 4, 4, 4, 4]), - ("2D", "both", [4, 4, 6, 6, 6, 6, 4, 4]), + ("2D", "right", [4, 4, 4, 4, 4, 4, 2, 2]), + ("2D", "left", [2, 2, 4, 4, 4, 4, 4, 4]), + ("2D", "both", [4, 4, 6, 6, 6, 6, 4, 4]), ("2D", "neither", [2, 2, 2, 2, 2, 2, 2, 2]), ], ) def test_datetimelike_nonunique_index_centering( - window, closed, expected, frame_or_series + window, closed, expected, frame_or_series ): - index = DatetimeIndex([ - '2020-01-01', '2020-01-01', - '2020-01-02', '2020-01-02', - '2020-01-03', '2020-01-03', - '2020-01-04', '2020-01-04', - ]) - - df = frame_or_series([1]*8, index=index, dtype=float) + index = DatetimeIndex( + [ + "2020-01-01", + "2020-01-01", + "2020-01-02", + "2020-01-02", + "2020-01-03", + "2020-01-03", + "2020-01-04", + "2020-01-04", + ] + ) + + df = frame_or_series([1] * 8, index=index, dtype=float) expected = frame_or_series(expected, index=index, dtype=float) result = df.rolling(window, center=True, closed=closed).sum() From f73a531638bf56a2ea500f37c8aaae52480ca0a8 Mon Sep 17 00:00:00 2001 From: Bert Palm Date: Sun, 10 Oct 2021 00:56:14 +0200 Subject: [PATCH 03/15] added whatsnew entry --- doc/source/whatsnew/v1.4.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index e638a24f830ef..48fd388922a4b 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -503,6 +503,7 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.apply` with time-based :class:`Grouper` objects incorrectly raising ``ValueError`` in corner cases where the grouping vector contains a ``NaT`` (:issue:`43500`, :issue:`43515`) - Bug in :meth:`GroupBy.mean` failing with ``complex`` dtype (:issue:`43701`) - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly for the first row when ``center=True`` and index is decreasing (:issue:`43927`) +- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`#3944`) Reshaping ^^^^^^^^^ From d00aa034176e033b7fc5eeaab85f4433c0268773 Mon Sep 17 00:00:00 2001 From: Bert Palm Date: Mon, 11 Oct 2021 17:51:28 +0200 Subject: [PATCH 04/15] added step-keyword to rolling, which can be numeric on numeric windows or datetime-like on datetime-like windows. --- pandas/_libs/window/indexers.pyi | 1 + pandas/_libs/window/indexers.pyx | 50 ++++++++++++++++++-------------- pandas/core/generic.py | 2 ++ pandas/core/indexers/objects.py | 27 +++++++++++++++++ pandas/core/window/rolling.py | 44 +++++++++++++++++++++------- 5 files changed, 92 insertions(+), 32 deletions(-) diff --git a/pandas/_libs/window/indexers.pyi b/pandas/_libs/window/indexers.pyi index c9bc64be34ac9..e7630684a2380 100644 --- a/pandas/_libs/window/indexers.pyi +++ b/pandas/_libs/window/indexers.pyi @@ -6,6 +6,7 @@ def calculate_variable_window_bounds( num_values: int, # int64_t window_size: int, # int64_t min_periods, + step_size: int, center: bool, closed: str | None, index: np.ndarray, # const int64_t[:] diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index 59889cb58c3d5..6015d9c6af986 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -14,6 +14,7 @@ def calculate_variable_window_bounds( int64_t num_values, int64_t window_size, object min_periods, # unused but here to match get_window_bounds signature + int64_t step_size, bint center, str closed, const int64_t[:] index @@ -32,6 +33,9 @@ def calculate_variable_window_bounds( min_periods : object ignored, exists for compatibility + step_size : int64 + (minimum) step size of the moving window + center : bint center the rolling window on the current observation @@ -49,8 +53,8 @@ def calculate_variable_window_bounds( bint left_closed = False bint right_closed = False ndarray[int64_t, ndim=1] start, end - int64_t start_bound, end_bound, index_growth_sign = 1 - Py_ssize_t i, j + int64_t start_bound, end_bound, step_bound, index_growth_sign = 1 + Py_ssize_t i, j, last_valid # default is 'right' if closed is None: @@ -89,11 +93,23 @@ def calculate_variable_window_bounds( end[0] = j break + step_bound = index[0] + index_growth_sign * step_size + last_valid = 0 + with nogil: # start is start of slice interval (including) # end is end of slice interval (not including) for i in range(1, num_values): + + if index[i] - index_growth_sign * step_bound < 0: + start[i] = end[i-1] + end[i] = end[i-1] + continue + + else: + step_bound = index[i] + step_size + if center: end_bound = index[i] + index_growth_sign * window_size / 2 start_bound = index[i] - index_growth_sign * window_size / 2 @@ -108,31 +124,21 @@ def calculate_variable_window_bounds( # advance the start bound until we are # within the constraint start[i] = i - for j in range(start[i - 1], i): + for j in range(start[last_valid], i): if (index[j] - start_bound) * index_growth_sign > 0: start[i] = j break # for centered window advance the end bound until we are # outside the constraint - if center: - for j in range(end[i - 1], num_values + 1): - if j == num_values: - end[i] = j - elif ((index[j] - end_bound) * index_growth_sign == 0 and - right_closed): - end[i] = j + 1 - elif (index[j] - end_bound) * index_growth_sign >= 0: - end[i] = j - break - # end bound is previous end - # or current index - elif (index[end[i - 1]] - end_bound) * index_growth_sign <= 0: - end[i] = i + 1 - else: - end[i] = end[i - 1] + end[i] = num_values + for j in range(max(end[last_valid], i), num_values): + if (index[j] - end_bound) * index_growth_sign == 0 and right_closed: + end[i] = j + 1 + elif (index[j] - end_bound) * index_growth_sign >= 0: + end[i] = j + break + + last_valid = i - # right endpoint is open - if not right_closed and not center: - end[i] -= 1 return start, end diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b235f120d98c8..50cea7b0929a7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10965,6 +10965,7 @@ def rolling( axis: Axis = 0, closed: str | None = None, method: str = "single", + step: int | timedelta | BaseOffset | None = None, ): axis = self._get_axis_number(axis) @@ -10991,6 +10992,7 @@ def rolling( axis=axis, closed=closed, method=method, + step=step, ) @final diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py index cef023a647d7f..b95244f1c4f9f 100644 --- a/pandas/core/indexers/objects.py +++ b/pandas/core/indexers/objects.py @@ -70,6 +70,16 @@ def get_window_bounds( class FixedWindowIndexer(BaseIndexer): """Creates window boundaries that are of fixed length.""" + def __init__( + self, + index_array: np.ndarray | None = None, + window_size: int = 0, + step: int = 1, + **kwargs + ): + super().__init__(index_array, window_size, **kwargs) + self.step = step + @Appender(get_window_bounds_doc) def get_window_bounds( self, @@ -94,12 +104,28 @@ def get_window_bounds( end = np.clip(end, 0, num_values) start = np.clip(start, 0, num_values) + # apply step, the resulting window will have zero length + if self.step > 1: + mask = np.full_like(start, True, dtype=bool) + mask[::self.step] = False + start[mask] = end[mask] + return start, end class VariableWindowIndexer(BaseIndexer): """Creates window boundaries that are of variable length, namely for time series.""" + def __init__( + self, + index_array: np.ndarray | None = None, + window_size: int = 0, + step: int = 1, + **kwargs + ): + super().__init__(index_array, window_size, **kwargs) + self.step = step + @Appender(get_window_bounds_doc) def get_window_bounds( self, @@ -117,6 +143,7 @@ def get_window_bounds( num_values, self.window_size, min_periods, + self.step, center, # type: ignore[arg-type] closed, self.index_array, # type: ignore[arg-type] diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 2b8ed3c97d026..1db47abf90bfa 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -127,6 +127,7 @@ def __init__( on: str | Index | None = None, closed: str | None = None, method: str = "single", + step=None, *, selection=None, ): @@ -140,6 +141,7 @@ def __init__( self._win_type = win_type self.axis = obj._get_axis_number(axis) if axis is not None else None self.method = method + self.step = step self._win_freq_i8 = None if self.on is None: if self.axis == 0: @@ -398,8 +400,12 @@ def _get_window_indexer(self) -> BaseIndexer: index_array=self._index_array, window_size=self._win_freq_i8, center=self.center, + step=self.step, ) - return FixedWindowIndexer(window_size=self.window) + return FixedWindowIndexer( + window_size=self.window, + step=self.step, + ) def _apply_series( self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None @@ -1578,6 +1584,17 @@ class Rolling(RollingAndExpandingMixin): "method", ] + def _validate_freq(self, attr_name): + # this will raise ValueError on non-fixed freqs + attr = getattr(self, attr_name) + try: + return to_offset(attr) + except (TypeError, ValueError) as err: + raise ValueError( + f"passed {attr_name} {attr} is not " + "compatible with a datetimelike index" + ) from err + def _validate(self): super()._validate() @@ -1588,15 +1605,8 @@ def _validate(self): ) and isinstance(self.window, (str, BaseOffset, timedelta)): self._validate_monotonic() + freq = self._validate_freq('window') - # this will raise ValueError on non-fixed freqs - try: - freq = to_offset(self.window) - except (TypeError, ValueError) as err: - raise ValueError( - f"passed window {self.window} is not " - "compatible with a datetimelike index" - ) from err if isinstance(self._on, PeriodIndex): self._win_freq_i8 = freq.nanos / (self._on.freq.nanos / self._on.freq.n) else: @@ -1607,11 +1617,25 @@ def _validate(self): self.min_periods = 1 elif isinstance(self.window, BaseIndexer): - # Passed BaseIndexer subclass should handle all other rolling kwargs + # Passed BaseIndexer subclass should handle all other rolling kwargs, return elif not is_integer(self.window) or self.window < 0: raise ValueError("window must be an integer 0 or greater") + if self.step is None: + self.step = 0 + + elif self._win_freq_i8 is not None: # datetimelike index + step = self._validate_freq('step') + + if isinstance(self._on, PeriodIndex): + self.step = step.nanos / (self._on.freq.nanos / self._on.freq.n) + else: + self.step = step.nanos + + elif not is_integer(self.step) or self.step < 0: + raise ValueError("step must be an integer 0 or greater") + def _validate_monotonic(self): """ Validate monotonic (increasing or decreasing). From 1c2589c9a0c3190d39c52cc73a85e3d4c75d7e3a Mon Sep 17 00:00:00 2001 From: Bert Palm Date: Thu, 14 Oct 2021 02:02:05 +0200 Subject: [PATCH 05/15] undo unneccessary changes --- pandas/core/window/rolling.py | 37 +++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 1db47abf90bfa..b7a1464b5cebc 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1584,28 +1584,25 @@ class Rolling(RollingAndExpandingMixin): "method", ] - def _validate_freq(self, attr_name): - # this will raise ValueError on non-fixed freqs - attr = getattr(self, attr_name) - try: - return to_offset(attr) - except (TypeError, ValueError) as err: - raise ValueError( - f"passed {attr_name} {attr} is not " - "compatible with a datetimelike index" - ) from err - def _validate(self): super()._validate() # we allow rolling on a datetimelike index if ( - self.obj.empty - or isinstance(self._on, (DatetimeIndex, TimedeltaIndex, PeriodIndex)) + self.obj.empty + or isinstance(self._on, (DatetimeIndex, TimedeltaIndex, PeriodIndex)) ) and isinstance(self.window, (str, BaseOffset, timedelta)): self._validate_monotonic() - freq = self._validate_freq('window') + + # this will raise ValueError on non-fixed freqs + try: + freq = to_offset(self.window) + except (TypeError, ValueError) as err: + raise ValueError( + f"passed window {self.window} is not " + "compatible with a datetimelike index" + ) from err if isinstance(self._on, PeriodIndex): self._win_freq_i8 = freq.nanos / (self._on.freq.nanos / self._on.freq.n) @@ -1617,16 +1614,22 @@ def _validate(self): self.min_periods = 1 elif isinstance(self.window, BaseIndexer): - # Passed BaseIndexer subclass should handle all other rolling kwargs, + # Passed BaseIndexer subclass should handle all other rolling kwargs return elif not is_integer(self.window) or self.window < 0: raise ValueError("window must be an integer 0 or greater") if self.step is None: self.step = 0 + elif self._win_freq_i8 is not None: - elif self._win_freq_i8 is not None: # datetimelike index - step = self._validate_freq('step') + try: + step = to_offset(self.step) + except (TypeError, ValueError) as err: + raise ValueError( + f"passed step {self.step} is not " + "compatible with a datetimelike window" + ) from err if isinstance(self._on, PeriodIndex): self.step = step.nanos / (self._on.freq.nanos / self._on.freq.n) From b48a8d972aaeaccb29aa85a701be94a5abe83250 Mon Sep 17 00:00:00 2001 From: Bert Palm Date: Thu, 14 Oct 2021 02:20:14 +0200 Subject: [PATCH 06/15] added step to groupby_rolling --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index b7a1464b5cebc..5551b02be648f 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2513,7 +2513,7 @@ def _get_window_indexer(self) -> GroupbyIndexer: GroupbyIndexer """ rolling_indexer: type[BaseIndexer] - indexer_kwargs: dict[str, Any] | None = None + indexer_kwargs: dict[str, Any] | None = dict(step=self.step) index_array = self._index_array if isinstance(self.window, BaseIndexer): rolling_indexer = type(self.window) From fe0f4a858be534236e0ddd6bb9e36e0ae904f8b3 Mon Sep 17 00:00:00 2001 From: Bert Palm Date: Thu, 14 Oct 2021 02:37:49 +0200 Subject: [PATCH 07/15] undo whitespace chages --- pandas/core/window/rolling.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 5551b02be648f..e11d97f2defea 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1589,8 +1589,8 @@ def _validate(self): # we allow rolling on a datetimelike index if ( - self.obj.empty - or isinstance(self._on, (DatetimeIndex, TimedeltaIndex, PeriodIndex)) + self.obj.empty + or isinstance(self._on, (DatetimeIndex, TimedeltaIndex, PeriodIndex)) ) and isinstance(self.window, (str, BaseOffset, timedelta)): self._validate_monotonic() @@ -1603,7 +1603,6 @@ def _validate(self): f"passed window {self.window} is not " "compatible with a datetimelike index" ) from err - if isinstance(self._on, PeriodIndex): self._win_freq_i8 = freq.nanos / (self._on.freq.nanos / self._on.freq.n) else: @@ -1630,7 +1629,6 @@ def _validate(self): f"passed step {self.step} is not " "compatible with a datetimelike window" ) from err - if isinstance(self._on, PeriodIndex): self.step = step.nanos / (self._on.freq.nanos / self._on.freq.n) else: From d04d3636195b90bab4b30fec6b715c6dd0be357d Mon Sep 17 00:00:00 2001 From: Bert Palm Date: Thu, 14 Oct 2021 16:33:36 +0200 Subject: [PATCH 08/15] fixed step=None called double raises Exception bug. --- pandas/core/window/rolling.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index e11d97f2defea..86bc03edf4e41 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -142,6 +142,7 @@ def __init__( self.axis = obj._get_axis_number(axis) if axis is not None else None self.method = method self.step = step + self._step_size = 0 self._win_freq_i8 = None if self.on is None: if self.axis == 0: @@ -400,11 +401,11 @@ def _get_window_indexer(self) -> BaseIndexer: index_array=self._index_array, window_size=self._win_freq_i8, center=self.center, - step=self.step, + step=self._step_size, ) return FixedWindowIndexer( window_size=self.window, - step=self.step, + step=self._step_size, ) def _apply_series( @@ -1582,6 +1583,7 @@ class Rolling(RollingAndExpandingMixin): "on", "closed", "method", + "step", ] def _validate(self): @@ -1619,7 +1621,7 @@ def _validate(self): raise ValueError("window must be an integer 0 or greater") if self.step is None: - self.step = 0 + self._step_size = 0 elif self._win_freq_i8 is not None: try: @@ -1630,9 +1632,9 @@ def _validate(self): "compatible with a datetimelike window" ) from err if isinstance(self._on, PeriodIndex): - self.step = step.nanos / (self._on.freq.nanos / self._on.freq.n) + self._step_size = step.nanos / (self._on.freq.nanos / self._on.freq.n) else: - self.step = step.nanos + self._step_size = step.nanos elif not is_integer(self.step) or self.step < 0: raise ValueError("step must be an integer 0 or greater") @@ -2511,7 +2513,7 @@ def _get_window_indexer(self) -> GroupbyIndexer: GroupbyIndexer """ rolling_indexer: type[BaseIndexer] - indexer_kwargs: dict[str, Any] | None = dict(step=self.step) + indexer_kwargs: dict[str, Any] | None = dict(step=self._step_size) index_array = self._index_array if isinstance(self.window, BaseIndexer): rolling_indexer = type(self.window) From 48fa41e6a8546f5871002dea5a7af0184e9dbb9d Mon Sep 17 00:00:00 2001 From: Bert Palm Date: Thu, 14 Oct 2021 16:35:09 +0200 Subject: [PATCH 09/15] rolling - on non centered windows take first of duplicate indices on right bound --- pandas/_libs/window/indexers.pyx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index 6dc275210fb33..f712cd174a968 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -132,6 +132,11 @@ def calculate_variable_window_bounds( for j in range(max(end[last_valid], i), num_values): if (index[j] - end_bound) * index_growth_sign == 0 and right_closed: end[i] = j + 1 + # for duplicate indices on non-centered windows + # we want the first of the identical indices + # see Gh 43944 and GH 20712 + if not center: + break elif (index[j] - end_bound) * index_growth_sign >= 0: end[i] = j break From 0bbfd98c5680384923402fd589281a1e5e63c395 Mon Sep 17 00:00:00 2001 From: Bert Palm Date: Thu, 14 Oct 2021 17:06:44 +0200 Subject: [PATCH 10/15] undo whitespace changes --- pandas/core/indexers/objects.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py index b95244f1c4f9f..6230d6b2890e8 100644 --- a/pandas/core/indexers/objects.py +++ b/pandas/core/indexers/objects.py @@ -71,11 +71,11 @@ class FixedWindowIndexer(BaseIndexer): """Creates window boundaries that are of fixed length.""" def __init__( - self, - index_array: np.ndarray | None = None, - window_size: int = 0, - step: int = 1, - **kwargs + self, + index_array: np.ndarray | None = None, + window_size: int = 0, + step: int = 1, + **kwargs, ): super().__init__(index_array, window_size, **kwargs) self.step = step @@ -107,7 +107,7 @@ def get_window_bounds( # apply step, the resulting window will have zero length if self.step > 1: mask = np.full_like(start, True, dtype=bool) - mask[::self.step] = False + mask[:: self.step] = False start[mask] = end[mask] return start, end @@ -117,11 +117,11 @@ class VariableWindowIndexer(BaseIndexer): """Creates window boundaries that are of variable length, namely for time series.""" def __init__( - self, - index_array: np.ndarray | None = None, - window_size: int = 0, - step: int = 1, - **kwargs + self, + index_array: np.ndarray | None = None, + window_size: int = 0, + step: int = 1, + **kwargs, ): super().__init__(index_array, window_size, **kwargs) self.step = step From 61dd193ca3902c03a00b049f9da66e49dc7bcaa3 Mon Sep 17 00:00:00 2001 From: Bert Palm Date: Thu, 14 Oct 2021 21:49:31 +0200 Subject: [PATCH 11/15] now support class Window --- pandas/core/generic.py | 1 + pandas/core/window/rolling.py | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 50cea7b0929a7..bf0183b2a32a7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10980,6 +10980,7 @@ def rolling( axis=axis, closed=closed, method=method, + step=step, ) return Rolling( diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 86bc03edf4e41..6a3a7d1ff0597 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1014,6 +1014,7 @@ class Window(BaseWindow): "on", "closed", "method", + "step", ] def _validate(self): @@ -1038,6 +1039,10 @@ def _validate(self): if self.method != "single": raise NotImplementedError("'single' is the only supported method type.") + if self.step is not None and not is_integer(self.step): + raise ValueError("step must be an integer 0 or greater") + self._step_size = self.step or 0 + def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray: """ Center the result in the window for weighted rolling aggregations. @@ -1100,6 +1105,11 @@ def calc(x): if self.center: result = self._center_window(result, offset) + if self._step_size > 1: + mask = np.full_like(result, True, dtype=bool) + mask[:: self._step_size] = False + result[mask] = np.nan + return result return self._apply_blockwise(homogeneous_func, name) From e98dac4090532fbb8f8e0b21b40f77e646617e69 Mon Sep 17 00:00:00 2001 From: Bert Palm Date: Thu, 14 Oct 2021 21:49:48 +0200 Subject: [PATCH 12/15] added docu --- pandas/core/window/rolling.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 6a3a7d1ff0597..7cfbdb40158f4 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -884,6 +884,11 @@ class Window(BaseWindow): .. versionadded:: 1.3.0 + step : int or offset, default None + Minimum step size the window is shifted and have a value + (otherwise result is NA). For a window that is specified by an offset, + `step` also needs to be an offset. Otherwise, `step` must be an integer. + Returns ------- a Window or Rolling sub-classed for the particular operation From 6aa8b0df6526cade82c99f68f0dd4e414b237a56 Mon Sep 17 00:00:00 2001 From: Bert Palm Date: Thu, 14 Oct 2021 22:11:46 +0200 Subject: [PATCH 13/15] flak8tyfied --- pandas/core/window/rolling.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 7cfbdb40158f4..591013c22a426 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2519,6 +2519,7 @@ class RollingGroupby(BaseWindowGroupby, Rolling): _attributes = Rolling._attributes + BaseWindowGroupby._attributes + @property def _get_window_indexer(self) -> GroupbyIndexer: """ Return an indexer class that will compute the window start and end bounds @@ -2528,7 +2529,7 @@ def _get_window_indexer(self) -> GroupbyIndexer: GroupbyIndexer """ rolling_indexer: type[BaseIndexer] - indexer_kwargs: dict[str, Any] | None = dict(step=self._step_size) + indexer_kwargs = {'step': self._step_size} index_array = self._index_array if isinstance(self.window, BaseIndexer): rolling_indexer = type(self.window) From 898086125f59229e0670eae747dd17d24df310ca Mon Sep 17 00:00:00 2001 From: Bert Palm Date: Fri, 15 Oct 2021 14:51:16 +0200 Subject: [PATCH 14/15] undo change that wassnt on purpose --- pandas/core/window/rolling.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 591013c22a426..a4ab626c38cad 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2519,7 +2519,6 @@ class RollingGroupby(BaseWindowGroupby, Rolling): _attributes = Rolling._attributes + BaseWindowGroupby._attributes - @property def _get_window_indexer(self) -> GroupbyIndexer: """ Return an indexer class that will compute the window start and end bounds From 8a54fc11d0a9df7dfec2a4518acaaf943588b44c Mon Sep 17 00:00:00 2001 From: Bert Palm Date: Fri, 15 Oct 2021 15:05:24 +0200 Subject: [PATCH 15/15] blackified --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index a4ab626c38cad..0e6392edf37b6 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2528,7 +2528,7 @@ def _get_window_indexer(self) -> GroupbyIndexer: GroupbyIndexer """ rolling_indexer: type[BaseIndexer] - indexer_kwargs = {'step': self._step_size} + indexer_kwargs = {"step": self._step_size} index_array = self._index_array if isinstance(self.window, BaseIndexer): rolling_indexer = type(self.window)