From 975794d0543ce8f3b5153fb75d419b2428e4f2ec Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 13 May 2022 10:49:43 -0700 Subject: [PATCH 1/4] Fix ewm times, decay validation --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/window/ewm.py | 10 ++++------ pandas/tests/window/test_ewm.py | 8 ++++++++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 9f1c4755bc54f..330fc51450ecb 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -736,6 +736,7 @@ Groupby/resample/rolling - Bug in :meth:`.Rolling.var` would segfault calculating weighted variance when window size was larger than data size (:issue:`46760`) - Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`) - Bug in :meth:`DataFrame.rolling` gives ValueError when center=True, axis=1 and win_type is specified (:issue:`46135`) +- Bug in :class:`.ExponentialMovingWindow` where ``alpha``, ``com``, or ``span`` were incorrectly allowed when ``times`` and ``halflife`` were passed (:issue:`47003`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 32cb4938344c4..c8674492256a0 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -396,12 +396,10 @@ def __init__( if isna(self.times).any(): raise ValueError("Cannot convert NaT values to integer") self._deltas = _calculate_deltas(self.times, self.halflife) - # Halflife is no longer applicable when calculating COM - # But allow COM to still be calculated if the user passes other decay args - if common.count_not_none(self.com, self.span, self.alpha) > 0: - self._com = get_center_of_mass(self.com, self.span, None, self.alpha) - else: - self._com = 1.0 + # GH 47003 + # get_center_of_mass will validate and raise if the user has also + # passed in com, span or alpha (1.0 is a placeholder value) + self._com = get_center_of_mass(self.com, self.span, 1.0, self.alpha) else: if self.halflife is not None and isinstance( self.halflife, (str, datetime.timedelta) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index b1e8b43258750..9acc34c67243a 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -666,3 +666,11 @@ def test_ewm_pairwise_cov_corr(func, frame): result.index = result.index.droplevel(1) expected = getattr(frame[1].ewm(span=10, min_periods=5), func)(frame[5]) tm.assert_series_equal(result, expected, check_names=False) + + +@pytest.mark.parametrize("decay", ["alpha", "com", "span"]) +def test_validate_times_halflife_with_other_decay(decay): + ser = Series([1, 2]) + msg = "comass, span, halflife, and alpha are mutually exclusive" + with pytest.raises(ValueError, match=msg): + ser.ewm(**{decay: 1}, halflife="1 Day", times=DatetimeIndex(["2021", "2022"])) From 8909e94c61e8b3855f7fb72c6604a2681bf64468 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 13 May 2022 13:07:15 -0700 Subject: [PATCH 2/4] Add np.timedelta64 support --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/window/ewm.py | 4 ++-- pandas/tests/window/conftest.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 330fc51450ecb..0f1b71c690ce0 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -150,6 +150,7 @@ Other enhancements - Added ``validate`` argument to :meth:`DataFrame.join` (:issue:`46622`) - A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`) - Added ``numeric_only`` argument to :meth:`Resampler.sum`, :meth:`Resampler.prod`, :meth:`Resampler.min`, :meth:`Resampler.max`, :meth:`Resampler.first`, and :meth:`Resampler.last` (:issue:`46442`) +- ``times`` argument in :class:`.ExponentialMovingWindow` now accepts ``np.timedelta64`` (:issue:`47003`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index c8674492256a0..8a6f7b4b798ee 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -389,7 +389,7 @@ def __init__( raise ValueError("times must be datetime64[ns] dtype.") if len(self.times) != len(obj): raise ValueError("times must be the same length as the object.") - if not isinstance(self.halflife, (str, datetime.timedelta)): + if not isinstance(self.halflife, (str, datetime.timedelta, np.timedelta64)): raise ValueError( "halflife must be a string or datetime.timedelta object" ) @@ -402,7 +402,7 @@ def __init__( self._com = get_center_of_mass(self.com, self.span, 1.0, self.alpha) else: if self.halflife is not None and isinstance( - self.halflife, (str, datetime.timedelta) + self.halflife, (str, datetime.timedelta, np.timedelta64) ): raise ValueError( "halflife can only be a timedelta convertible argument if " diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index f42a1a5449c5c..8977d1a0d9d1b 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -102,7 +102,7 @@ def engine_and_raw(request): return request.param -@pytest.fixture(params=["1 day", timedelta(days=1)]) +@pytest.fixture(params=["1 day", timedelta(days=1), np.timedelta64(1, "D")]) def halflife_with_times(request): """Halflife argument for EWM when times is specified.""" return request.param From bdb64972984b763793417255d50077de76bc0337 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 14 May 2022 18:40:28 -0700 Subject: [PATCH 3/4] Revert "Fix ewm times, decay validation" This reverts commit 975794d0543ce8f3b5153fb75d419b2428e4f2ec. --- doc/source/whatsnew/v1.5.0.rst | 1 - pandas/core/window/ewm.py | 10 ++++++---- pandas/tests/window/test_ewm.py | 8 -------- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 0f1b71c690ce0..4681257dcfca0 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -737,7 +737,6 @@ Groupby/resample/rolling - Bug in :meth:`.Rolling.var` would segfault calculating weighted variance when window size was larger than data size (:issue:`46760`) - Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`) - Bug in :meth:`DataFrame.rolling` gives ValueError when center=True, axis=1 and win_type is specified (:issue:`46135`) -- Bug in :class:`.ExponentialMovingWindow` where ``alpha``, ``com``, or ``span`` were incorrectly allowed when ``times`` and ``halflife`` were passed (:issue:`47003`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 8a6f7b4b798ee..cb918b5ea06e0 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -396,10 +396,12 @@ def __init__( if isna(self.times).any(): raise ValueError("Cannot convert NaT values to integer") self._deltas = _calculate_deltas(self.times, self.halflife) - # GH 47003 - # get_center_of_mass will validate and raise if the user has also - # passed in com, span or alpha (1.0 is a placeholder value) - self._com = get_center_of_mass(self.com, self.span, 1.0, self.alpha) + # Halflife is no longer applicable when calculating COM + # But allow COM to still be calculated if the user passes other decay args + if common.count_not_none(self.com, self.span, self.alpha) > 0: + self._com = get_center_of_mass(self.com, self.span, None, self.alpha) + else: + self._com = 1.0 else: if self.halflife is not None and isinstance( self.halflife, (str, datetime.timedelta, np.timedelta64) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 9acc34c67243a..b1e8b43258750 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -666,11 +666,3 @@ def test_ewm_pairwise_cov_corr(func, frame): result.index = result.index.droplevel(1) expected = getattr(frame[1].ewm(span=10, min_periods=5), func)(frame[5]) tm.assert_series_equal(result, expected, check_names=False) - - -@pytest.mark.parametrize("decay", ["alpha", "com", "span"]) -def test_validate_times_halflife_with_other_decay(decay): - ser = Series([1, 2]) - msg = "comass, span, halflife, and alpha are mutually exclusive" - with pytest.raises(ValueError, match=msg): - ser.ewm(**{decay: 1}, halflife="1 Day", times=DatetimeIndex(["2021", "2022"])) From 7f07b4def6f27b3a12ab39803991c14726c9cd12 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 14 May 2022 18:47:34 -0700 Subject: [PATCH 4/4] Clarify documentation --- pandas/core/window/ewm.py | 11 +++++------ pandas/tests/window/test_ewm.py | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index cb918b5ea06e0..922d194f04c55 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -134,8 +134,9 @@ class ExponentialMovingWindow(BaseWindow): r""" Provide exponentially weighted (EW) calculations. - Exactly one parameter: ``com``, ``span``, ``halflife``, or ``alpha`` must be - provided. + Exactly one of ``com``, ``span``, ``halflife``, or ``alpha`` must be + provided if ``times`` is not provided. If ``times`` is provided, + ``halflife`` and one of ``com``, ``span`` or ``alpha`` may be provided. Parameters ---------- @@ -155,7 +156,7 @@ class ExponentialMovingWindow(BaseWindow): :math:`\alpha = 1 - \exp\left(-\ln(2) / halflife\right)`, for :math:`halflife > 0`. - If ``times`` is specified, the time unit (str or timedelta) over which an + If ``times`` is specified, a timedelta convertible unit over which an observation decays to half its value. Only applicable to ``mean()``, and halflife value will not apply to the other functions. @@ -390,9 +391,7 @@ def __init__( if len(self.times) != len(obj): raise ValueError("times must be the same length as the object.") if not isinstance(self.halflife, (str, datetime.timedelta, np.timedelta64)): - raise ValueError( - "halflife must be a string or datetime.timedelta object" - ) + raise ValueError("halflife must be a timedelta convertible object") if isna(self.times).any(): raise ValueError("Cannot convert NaT values to integer") self._deltas = _calculate_deltas(self.times, self.halflife) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index b1e8b43258750..66cd36d121750 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -90,7 +90,7 @@ def test_ewma_times_not_same_length(): def test_ewma_halflife_not_correct_type(): - msg = "halflife must be a string or datetime.timedelta object" + msg = "halflife must be a timedelta convertible object" with pytest.raises(ValueError, match=msg): Series(range(5)).ewm(halflife=1, times=np.arange(5).astype("datetime64[ns]"))