diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index d65a1a39e8bc7..7e94763f3f293 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -292,7 +292,7 @@ class Groupby: ["sum", "median", "mean", "max", "min", "kurt", "sum"], [ ("rolling", {"window": 2}), - ("rolling", {"window": "30s", "on": "C"}), + ("rolling", {"window": "30s"}), ("expanding", {}), ], ) @@ -304,9 +304,10 @@ def setup(self, method, window_kwargs): { "A": [str(i) for i in range(N)] * 10, "B": list(range(N)) * 10, - "C": pd.date_range(start="1900-01-01", freq="1min", periods=N * 10), } ) + if isinstance(kwargs.get("window", None), str): + df.index = pd.date_range(start="1900-01-01", freq="1min", periods=N * 10) self.groupby_window = getattr(df.groupby("A"), window)(**kwargs) def time_method(self, method, window_kwargs): diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 4efcbb08580db..d52366797f4a5 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -728,6 +728,7 @@ Removal of prior version deprecations/changes - Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`) - Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`) - Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`) +- Enforced deprecation of silently dropping nuisance columns in :class:`Rolling`, :class:`Expanding`, and :class:`ExponentialMovingWindow` ops. This will now raise a :class:`.errors.DataError` (:issue:`42834`) - Changed behavior in setting values with ``df.loc[:, foo] = bar`` or ``df.iloc[:, foo] = bar``, these now always attempt to set values inplace before falling back to casting (:issue:`45333`) - Changed default of ``numeric_only`` in various :class:`.DataFrameGroupBy` methods; all methods now default to ``numeric_only=False`` (:issue:`46072`) - Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 989b82f45339f..ef0524e48f9e2 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -18,7 +18,6 @@ Sized, cast, ) -import warnings import numpy as np @@ -37,7 +36,6 @@ from pandas.compat._optional import import_optional_dependency from pandas.errors import DataError from pandas.util._decorators import doc -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( ensure_float64, @@ -473,10 +471,6 @@ def _apply_blockwise( obj = notna(obj).astype(int) obj._mgr = obj._mgr.consolidate() - def hfunc(values: ArrayLike) -> ArrayLike: - values = self._prep_values(values) - return homogeneous_func(values) - if self.axis == 1: obj = obj.T @@ -484,13 +478,16 @@ def hfunc(values: ArrayLike) -> ArrayLike: res_values = [] for i, arr in enumerate(obj._iter_column_arrays()): # GH#42736 operate column-wise instead of block-wise + # As of 2.0, hfunc will raise for nuisance columns try: - res = hfunc(arr) - except (TypeError, NotImplementedError): - pass - else: - res_values.append(res) - taker.append(i) + arr = self._prep_values(arr) + except (TypeError, NotImplementedError) as err: + raise DataError( + f"Cannot aggregate non-numeric type: {arr.dtype}" + ) from err + res = homogeneous_func(arr) + res_values.append(res) + taker.append(i) index = self._slice_axis_for_step( obj.index, res_values[0] if len(res_values) > 0 else None @@ -505,18 +502,6 @@ def hfunc(values: ArrayLike) -> ArrayLike: if self.axis == 1: df = df.T - if 0 != len(res_values) != len(obj.columns): - # GH#42738 ignore_failures dropped nuisance columns - dropped = obj.columns.difference(obj.columns.take(taker)) - warnings.warn( - "Dropping of nuisance columns in rolling operations " - "is deprecated; in a future version this will raise TypeError. " - "Select only valid columns before calling the operation. " - f"Dropped columns were {dropped}", - FutureWarning, - stacklevel=find_stack_level(), - ) - return self._resolve_output(df, obj) def _apply_tablewise( diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index bc723b8ed36b8..6180d4a5f8e17 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas.errors import SpecificationError +from pandas.errors import ( + DataError, + SpecificationError, +) from pandas import ( DataFrame, @@ -66,18 +69,12 @@ def tests_skip_nuisance(step): tm.assert_frame_equal(result, expected) -def test_skip_sum_object_raises(step): +def test_sum_object_str_raises(step): df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) r = df.rolling(window=3, step=step) - msg = r"nuisance columns.*Dropped columns were Index\(\['C'\], dtype='object'\)" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#42738 - result = r.sum() - expected = DataFrame( - {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, - columns=list("AB"), - )[::step] - tm.assert_frame_equal(result, expected) + with pytest.raises(DataError, match="Cannot aggregate non-numeric type: object"): + # GH#42738, enforced in 2.0 + r.sum() def test_agg(step): diff --git a/pandas/tests/window/test_dtypes.py b/pandas/tests/window/test_dtypes.py index 52011a2d5f760..b975a28273337 100644 --- a/pandas/tests/window/test_dtypes.py +++ b/pandas/tests/window/test_dtypes.py @@ -165,7 +165,7 @@ def test_dataframe_dtypes(method, expected_data, dtypes, min_periods, step): rolled = df.rolling(2, min_periods=min_periods, step=step) if dtypes in ("m8[ns]", "M8[ns]", "datetime64[ns, UTC]") and method != "count": - msg = "No numeric types to aggregate" + msg = "Cannot aggregate non-numeric type" with pytest.raises(DataError, match=msg): getattr(rolled, method)() else: diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index f88c20f2f78c6..205a02dcb051b 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -98,11 +98,9 @@ def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods): halflife = halflife_with_times data = np.arange(10.0) data[::2] = np.nan - df = DataFrame({"A": data, "time_col": date_range("2000", freq="D", periods=10)}) - with tm.assert_produces_warning(FutureWarning, match="nuisance columns"): - # GH#42738 - result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean() - expected = df.ewm(halflife=1.0, min_periods=min_periods).mean() + df = DataFrame({"A": data}) + result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean() + expected = df.ewm(halflife=1.0, min_periods=min_periods).mean() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 3da14bce6facd..41b2ee70d7987 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -1125,13 +1125,6 @@ def test_methods(self, method, expected_data): ) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match="nuisance"): - # GH#42738 - expected = df.groupby("A", group_keys=True).apply( - lambda x: getattr(x.ewm(com=1.0), method)() - ) - tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize( "method, expected_data", [["corr", [np.nan, 1.0, 1.0, 1]], ["cov", [np.nan, 0.5, 0.928571, 1.385714]]], @@ -1160,13 +1153,9 @@ def test_pairwise_methods(self, method, expected_data): def test_times(self, times_frame): # GH 40951 halflife = "23 days" - with tm.assert_produces_warning(FutureWarning, match="nuisance"): - # GH#42738 - result = ( - times_frame.groupby("A") - .ewm(halflife=halflife, times=times_frame["C"]) - .mean() - ) + # GH#42738 + times = times_frame.pop("C") + result = times_frame.groupby("A").ewm(halflife=halflife, times=times).mean() expected = DataFrame( { "B": [ @@ -1200,29 +1189,13 @@ def test_times(self, times_frame): ) tm.assert_frame_equal(result, expected) - def test_times_vs_apply(self, times_frame): - # GH 40951 - halflife = "23 days" - with tm.assert_produces_warning(FutureWarning, match="nuisance"): - # GH#42738 - result = ( - times_frame.groupby("A") - .ewm(halflife=halflife, times=times_frame["C"]) - .mean() - ) - expected = times_frame.groupby("A", group_keys=True).apply( - lambda x: x.ewm(halflife=halflife, times=x["C"]).mean() - ) - tm.assert_frame_equal(result, expected) - def test_times_array(self, times_frame): # GH 40951 halflife = "23 days" + times = times_frame.pop("C") gb = times_frame.groupby("A") - with tm.assert_produces_warning(FutureWarning, match="nuisance"): - # GH#42738 - result = gb.ewm(halflife=halflife, times=times_frame["C"]).mean() - expected = gb.ewm(halflife=halflife, times=times_frame["C"].values).mean() + result = gb.ewm(halflife=halflife, times=times).mean() + expected = gb.ewm(halflife=halflife, times=times.values).mean() tm.assert_frame_equal(result, expected) def test_dont_mutate_obj_after_slicing(self): diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 1c78a186e9d37..cca0ab3a0a9bb 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -253,22 +253,19 @@ def test_invalid_engine_kwargs(self, grouper, method): def test_cython_vs_numba( self, grouper, method, nogil, parallel, nopython, ignore_na, adjust ): + df = DataFrame({"B": range(4)}) if grouper == "None": grouper = lambda x: x - warn = FutureWarning else: + df["A"] = ["a", "b", "a", "b"] grouper = lambda x: x.groupby("A") - warn = None if method == "sum": adjust = True - df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)}) ewm = grouper(df).ewm(com=1.0, adjust=adjust, ignore_na=ignore_na) engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} - with tm.assert_produces_warning(warn, match="nuisance"): - # GH#42738 - result = getattr(ewm, method)(engine="numba", engine_kwargs=engine_kwargs) - expected = getattr(ewm, method)(engine="cython") + result = getattr(ewm, method)(engine="numba", engine_kwargs=engine_kwargs) + expected = getattr(ewm, method)(engine="cython") tm.assert_frame_equal(result, expected) @@ -276,12 +273,12 @@ def test_cython_vs_numba( def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_na): # GH 40951 + df = DataFrame({"B": [0, 0, 1, 1, 2, 2]}) if grouper == "None": grouper = lambda x: x - warn = FutureWarning else: grouper = lambda x: x.groupby("A") - warn = None + df["A"] = ["a", "b", "a", "b", "b", "a"] halflife = "23 days" times = to_datetime( @@ -294,17 +291,14 @@ def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_ "2020-01-03", ] ) - df = DataFrame({"A": ["a", "b", "a", "b", "b", "a"], "B": [0, 0, 1, 1, 2, 2]}) ewm = grouper(df).ewm( halflife=halflife, adjust=True, ignore_na=ignore_na, times=times ) engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} - with tm.assert_produces_warning(warn, match="nuisance"): - # GH#42738 - result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs) - expected = ewm.mean(engine="cython") + result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs) + expected = ewm.mean(engine="cython") tm.assert_frame_equal(result, expected)