diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 73e31d9e52fa7..fb1f0194a19a4 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -162,6 +162,7 @@ Deprecations - Deprecated ignoring missing labels when indexing with a sequence of labels on a level of a MultiIndex (:issue:`42351`) - Creating an empty Series without a dtype will now raise a more visible ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`30017`) - Deprecated the 'kind' argument in :meth:`Index.get_slice_bound`, :meth:`Index.slice_indexer`, :meth:`Index.slice_locs`; in a future version passing 'kind' will raise (:issue:`42857`) +- Deprecated dropping of nuisance columns in :class:`Rolling`, :class:`Expanding`, and :class:`EWM` aggregations (:issue:`42738`) - Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`42568`) - diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 01e11ff4b008d..d4c0eb946505d 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -32,6 +32,7 @@ from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( ensure_float64, @@ -436,6 +437,18 @@ def hfunc2d(values: ArrayLike) -> ArrayLike: new_mgr = mgr.apply_2d(hfunc2d, ignore_failures=True) else: new_mgr = mgr.apply(hfunc, ignore_failures=True) + + if 0 != len(new_mgr.items) != len(mgr.items): + # GH#42738 ignore_failures dropped nuisance columns + dropped = mgr.items.difference(new_mgr.items) + warnings.warn( + "Dropping of nuisance columns in rolling operations " + "is deprecated; in a future version this will raise TypeError. " + "Select only valid columns before calling the operation. " + f"Dropped columns were {dropped}", + FutureWarning, + stacklevel=find_stack_level(), + ) out = obj._constructor(new_mgr) return self._resolve_output(out, obj) diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index e70d079739003..7a5fcebfd23d7 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -68,7 +68,10 @@ def tests_skip_nuisance(): def test_skip_sum_object_raises(): df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) r = df.rolling(window=3) - result = r.sum() + msg = r"nuisance columns.*Dropped columns were Index\(\['C'\], dtype='object'\)" + with tm.assert_produces_warning(FutureWarning, match=msg): + # GH#42738 + result = r.sum() expected = DataFrame( {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, columns=list("AB"), diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 977ce281c4b33..d2a3be88eb27b 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -116,8 +116,10 @@ def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods): data = np.arange(10.0) data[::2] = np.nan df = DataFrame({"A": data, "time_col": date_range("2000", freq="D", periods=10)}) - result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean() - expected = df.ewm(halflife=1.0, min_periods=min_periods).mean() + with tm.assert_produces_warning(FutureWarning, match="nuisance columns"): + # GH#42738 + result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean() + expected = df.ewm(halflife=1.0, min_periods=min_periods).mean() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 03b43026c9a6c..2523ec585a491 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -923,7 +923,12 @@ def test_methods(self, method, expected_data): ) tm.assert_frame_equal(result, expected) - expected = df.groupby("A").apply(lambda x: getattr(x.ewm(com=1.0), method)()) + with tm.assert_produces_warning(FutureWarning, match="nuisance"): + # GH#42738 + expected = df.groupby("A").apply( + lambda x: getattr(x.ewm(com=1.0), method)() + ) + # There may be a bug in the above statement; not returning the correct index tm.assert_frame_equal(result.reset_index(drop=True), expected) @@ -955,7 +960,9 @@ def test_pairwise_methods(self, method, expected_data): def test_times(self, times_frame): # GH 40951 halflife = "23 days" - result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean() + with tm.assert_produces_warning(FutureWarning, match="nuisance"): + # GH#42738 + result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean() expected = DataFrame( { "B": [ @@ -992,22 +999,23 @@ def test_times(self, times_frame): def test_times_vs_apply(self, times_frame): # GH 40951 halflife = "23 days" - result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean() - expected = ( - times_frame.groupby("A") - .apply(lambda x: x.ewm(halflife=halflife, times="C").mean()) - .iloc[[0, 3, 6, 9, 1, 4, 7, 2, 5, 8]] - .reset_index(drop=True) - ) + with tm.assert_produces_warning(FutureWarning, match="nuisance"): + # GH#42738 + result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean() + expected = ( + times_frame.groupby("A") + .apply(lambda x: x.ewm(halflife=halflife, times="C").mean()) + .iloc[[0, 3, 6, 9, 1, 4, 7, 2, 5, 8]] + .reset_index(drop=True) + ) tm.assert_frame_equal(result.reset_index(drop=True), expected) def test_times_array(self, times_frame): # GH 40951 halflife = "23 days" - result = times_frame.groupby("A").ewm(halflife=halflife, times="C").mean() - expected = ( - times_frame.groupby("A") - .ewm(halflife=halflife, times=times_frame["C"].values) - .mean() - ) + gb = times_frame.groupby("A") + with tm.assert_produces_warning(FutureWarning, match="nuisance"): + # GH#42738 + result = gb.ewm(halflife=halflife, times="C").mean() + expected = gb.ewm(halflife=halflife, times=times_frame["C"].values).mean() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index a8ec9086e6b02..f507b6a465f5b 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -170,26 +170,39 @@ def test_invalid_engine_kwargs(self, grouper): engine="cython", engine_kwargs={"nopython": True} ) - @pytest.mark.parametrize( - "grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"] - ) + @pytest.mark.parametrize("grouper", ["None", "groupby"]) def test_cython_vs_numba( self, grouper, nogil, parallel, nopython, ignore_na, adjust ): + if grouper == "None": + grouper = lambda x: x + warn = FutureWarning + else: + grouper = lambda x: x.groupby("A") + warn = None + df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)}) ewm = grouper(df).ewm(com=1.0, adjust=adjust, ignore_na=ignore_na) engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} - result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs) - expected = ewm.mean(engine="cython") + with tm.assert_produces_warning(warn, match="nuisance"): + # GH#42738 + result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs) + expected = ewm.mean(engine="cython") tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize( - "grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"] - ) + @pytest.mark.parametrize("grouper", ["None", "groupby"]) def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_na): # GH 40951 + + if grouper == "None": + grouper = lambda x: x + warn = FutureWarning + else: + grouper = lambda x: x.groupby("A") + warn = None + halflife = "23 days" times = to_datetime( [ @@ -207,8 +220,11 @@ def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_ ) engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} - result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs) - expected = ewm.mean(engine="cython") + + with tm.assert_produces_warning(warn, match="nuisance"): + # GH#42738 + result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs) + expected = ewm.mean(engine="cython") tm.assert_frame_equal(result, expected)