From d3624eeb903b6f77586aa6d3308c7fd9f37c679d Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 16 Jul 2023 09:51:35 -0700 Subject: [PATCH 1/6] DEPR: argmax/min with all-NAs or any-NAs and skipna=False --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/base.py | 16 +++++++ pandas/core/indexes/base.py | 14 +++++++ pandas/tests/extension/base/methods.py | 7 +++- pandas/tests/reductions/test_reductions.py | 49 +++++++++++++++------- 5 files changed, 72 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index bfddccc812571..016995c453b4d 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -367,6 +367,7 @@ Deprecations - Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`) - Deprecated the use of non-supported datetime64 and timedelta64 resolutions with :func:`pandas.array`. Supported resolutions are: "s", "ms", "us", "ns" resolutions (:issue:`53058`) - Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`) +- Deprecated the behavior of :meth:`Index.argmax`, :meth:`Index.argmin`, :meth:`Series.argmax`, :meth:`Series.argmin` with either all-NAs and skipna=True or any-NAs and skipna=False; in a future version this will raise ``ValueError`` (:issue:`??`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/base.py b/pandas/core/base.py index 057b381bbdb58..f37ab777e23c5 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -14,6 +14,7 @@ final, overload, ) +import warnings import numpy as np @@ -36,6 +37,7 @@ cache_readonly, doc, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import can_hold_element from pandas.core.dtypes.common import ( @@ -735,6 +737,13 @@ def argmax( if isinstance(delegate, ExtensionArray): if not skipna and delegate.isna().any(): + warnings.warn( + f"The behavior of {type(self).__name__}.argmax/argmin " + "with skipna=False and NAs, or with all-NAs is deprecated. " + "In a future version this will raise ValueError.", + FutureWarning, + stacklevel=find_stack_level(), + ) return -1 else: return delegate.argmax() @@ -755,6 +764,13 @@ def argmin( if isinstance(delegate, ExtensionArray): if not skipna and delegate.isna().any(): + warnings.warn( + f"The behavior of {type(self).__name__}.argmax/argmin " + "with skipna=False and NAs, or with all-NAs is deprecated. " + "In a future version this will raise ValueError.", + FutureWarning, + stacklevel=find_stack_level(), + ) return -1 else: return delegate.argmin() diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4b6b59898c199..04a2a800c52eb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -7268,6 +7268,13 @@ def argmin(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: # Take advantage of cache mask = self._isnan if not skipna or mask.all(): + warnings.warn( + f"The behavior of {type(self).__name__}.argmax/argmin " + "with skipna=False and NAs, or with all-NAs is deprecated. " + "In a future version this will raise ValueError.", + FutureWarning, + stacklevel=find_stack_level(), + ) return -1 return super().argmin(skipna=skipna) @@ -7280,6 +7287,13 @@ def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: # Take advantage of cache mask = self._isnan if not skipna or mask.all(): + warnings.warn( + f"The behavior of {type(self).__name__}.argmax/argmin " + "with skipna=False and NAs, or with all-NAs is deprecated. " + "In a future version this will raise ValueError.", + FutureWarning, + stacklevel=find_stack_level(), + ) return -1 return super().argmax(skipna=skipna) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 9c699f01944e5..bd0e3472138e2 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -160,8 +160,13 @@ def test_argreduce_series( self, data_missing_for_sorting, op_name, skipna, expected ): # data_missing_for_sorting -> [B, NA, A] with A < B and NA missing. + warn = None + if not isinstance(expected, int) or expected < 0: + warn = FutureWarning + msg = "The behavior of Series.argmax/argmin with skipna=False and NAs" ser = pd.Series(data_missing_for_sorting) - result = getattr(ser, op_name)(skipna=skipna) + with tm.assert_produces_warning(warn, match=msg): + result = getattr(ser, op_name)(skipna=skipna) tm.assert_almost_equal(result, expected) def test_argmax_argmin_no_skipna_notimplemented(self, data_missing_for_sorting): diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 83b9a83c0a6a2..475b575313f82 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -115,7 +115,13 @@ def test_nanargminmax(self, opname, index_or_series): obj = klass([NaT, datetime(2011, 11, 1)]) assert getattr(obj, arg_op)() == 1 - result = getattr(obj, arg_op)(skipna=False) + + msg = ( + "The behavior of (DatetimeIndex|Series).argmax/argmin with " + "skipna=False and NAs" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = getattr(obj, arg_op)(skipna=False) if klass is Series: assert np.isnan(result) else: @@ -124,7 +130,8 @@ def test_nanargminmax(self, opname, index_or_series): obj = klass([NaT, datetime(2011, 11, 1), NaT]) # check DatetimeIndex non-monotonic path assert getattr(obj, arg_op)() == 1 - result = getattr(obj, arg_op)(skipna=False) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = getattr(obj, arg_op)(skipna=False) if klass is Series: assert np.isnan(result) else: @@ -154,26 +161,40 @@ def test_argminmax(self): obj = Index([np.nan, 1, np.nan, 2]) assert obj.argmin() == 1 assert obj.argmax() == 3 - assert obj.argmin(skipna=False) == -1 - assert obj.argmax(skipna=False) == -1 + msg = "The behavior of Index.argmax/argmin with skipna=False and NAs" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert obj.argmin(skipna=False) == -1 + with tm.assert_produces_warning(FutureWarning, match=msg): + assert obj.argmax(skipna=False) == -1 obj = Index([np.nan]) - assert obj.argmin() == -1 - assert obj.argmax() == -1 - assert obj.argmin(skipna=False) == -1 - assert obj.argmax(skipna=False) == -1 + with tm.assert_produces_warning(FutureWarning, match=msg): + assert obj.argmin() == -1 + with tm.assert_produces_warning(FutureWarning, match=msg): + assert obj.argmax() == -1 + with tm.assert_produces_warning(FutureWarning, match=msg): + assert obj.argmin(skipna=False) == -1 + with tm.assert_produces_warning(FutureWarning, match=msg): + assert obj.argmax(skipna=False) == -1 + msg = "The behavior of DatetimeIndex.argmax/argmin with skipna=False and NAs" obj = Index([NaT, datetime(2011, 11, 1), datetime(2011, 11, 2), NaT]) assert obj.argmin() == 1 assert obj.argmax() == 2 - assert obj.argmin(skipna=False) == -1 - assert obj.argmax(skipna=False) == -1 + with tm.assert_produces_warning(FutureWarning, match=msg): + assert obj.argmin(skipna=False) == -1 + with tm.assert_produces_warning(FutureWarning, match=msg): + assert obj.argmax(skipna=False) == -1 obj = Index([NaT]) - assert obj.argmin() == -1 - assert obj.argmax() == -1 - assert obj.argmin(skipna=False) == -1 - assert obj.argmax(skipna=False) == -1 + with tm.assert_produces_warning(FutureWarning, match=msg): + assert obj.argmin() == -1 + with tm.assert_produces_warning(FutureWarning, match=msg): + assert obj.argmax() == -1 + with tm.assert_produces_warning(FutureWarning, match=msg): + assert obj.argmin(skipna=False) == -1 + with tm.assert_produces_warning(FutureWarning, match=msg): + assert obj.argmax(skipna=False) == -1 @pytest.mark.parametrize("op, expected_col", [["max", "a"], ["min", "b"]]) def test_same_tz_min_max_axis_1(self, op, expected_col): From 9467e4306f0cd3dc2cc9f63b1ec4a3ccd2ca5f1e Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 17 Jul 2023 14:21:20 -0700 Subject: [PATCH 2/6] Warn in numpy cases too --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/base.py | 22 ++++++++++++++++++++-- pandas/core/series.py | 12 ++++++++++-- pandas/tests/extension/base/methods.py | 7 +------ pandas/tests/reductions/test_reductions.py | 5 +++-- 5 files changed, 35 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 016995c453b4d..4ce662cc9b856 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -367,7 +367,7 @@ Deprecations - Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`) - Deprecated the use of non-supported datetime64 and timedelta64 resolutions with :func:`pandas.array`. Supported resolutions are: "s", "ms", "us", "ns" resolutions (:issue:`53058`) - Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`) -- Deprecated the behavior of :meth:`Index.argmax`, :meth:`Index.argmin`, :meth:`Series.argmax`, :meth:`Series.argmin` with either all-NAs and skipna=True or any-NAs and skipna=False; in a future version this will raise ``ValueError`` (:issue:`??`) +- Deprecated the behavior of :meth:`Index.argmax`, :meth:`Index.argmin`, :meth:`Series.argmax`, :meth:`Series.argmin` with either all-NAs and skipna=True or any-NAs and skipna=False; in a future version this will raise ``ValueError`` (:issue:`33941`, :issue:`33942`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/base.py b/pandas/core/base.py index f37ab777e23c5..934508cb59182 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -750,9 +750,18 @@ def argmax( else: # error: Incompatible return value type (got "Union[int, ndarray]", expected # "int") - return nanops.nanargmax( # type: ignore[return-value] + result = nanops.nanargmax( # type: ignore[return-value] delegate, skipna=skipna ) + if result == -1: + warnings.warn( + f"The behavior of {type(self).__name__}.argmax/argmin " + "with skipna=False and NAs, or with all-NAs is deprecated. " + "In a future version this will raise ValueError.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return result @doc(argmax, op="min", oppose="max", value="smallest") def argmin( @@ -777,9 +786,18 @@ def argmin( else: # error: Incompatible return value type (got "Union[int, ndarray]", expected # "int") - return nanops.nanargmin( # type: ignore[return-value] + result = nanops.nanargmin( # type: ignore[return-value] delegate, skipna=skipna ) + if result == -1: + warnings.warn( + f"The behavior of {type(self).__name__}.argmax/argmin " + "with skipna=False and NAs, or with all-NAs is deprecated. " + "In a future version this will raise ValueError.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return result def tolist(self): """ diff --git a/pandas/core/series.py b/pandas/core/series.py index f17a633259816..47c73f2566e96 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2530,7 +2530,11 @@ def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab nan """ axis = self._get_axis_number(axis) - i = self.argmin(axis, skipna, *args, **kwargs) + with warnings.catch_warnings(): + # ignore warning produced by argmin since we will issue a different + # warning for idxmin + warnings.simplefilter("ignore") + i = self.argmin(axis, skipna, *args, **kwargs) if i == -1: return np.nan return self.index[i] @@ -2600,7 +2604,11 @@ def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab nan """ axis = self._get_axis_number(axis) - i = self.argmax(axis, skipna, *args, **kwargs) + with warnings.catch_warnings(): + # ignore warning produced by argmax since we will issue a different + # warning for argmax + warnings.simplefilter("ignore") + i = self.argmax(axis, skipna, *args, **kwargs) if i == -1: return np.nan return self.index[i] diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index bd0e3472138e2..9c699f01944e5 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -160,13 +160,8 @@ def test_argreduce_series( self, data_missing_for_sorting, op_name, skipna, expected ): # data_missing_for_sorting -> [B, NA, A] with A < B and NA missing. - warn = None - if not isinstance(expected, int) or expected < 0: - warn = FutureWarning - msg = "The behavior of Series.argmax/argmin with skipna=False and NAs" ser = pd.Series(data_missing_for_sorting) - with tm.assert_produces_warning(warn, match=msg): - result = getattr(ser, op_name)(skipna=skipna) + result = getattr(ser, op_name)(skipna=skipna) tm.assert_almost_equal(result, expected) def test_argmax_argmin_no_skipna_notimplemented(self, data_missing_for_sorting): diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 475b575313f82..71db57ca2c222 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -112,6 +112,7 @@ def test_nanargminmax(self, opname, index_or_series): # GH#7261 klass = index_or_series arg_op = "arg" + opname if klass is Index else "idx" + opname + warn = FutureWarning if klass is Index else None obj = klass([NaT, datetime(2011, 11, 1)]) assert getattr(obj, arg_op)() == 1 @@ -120,7 +121,7 @@ def test_nanargminmax(self, opname, index_or_series): "The behavior of (DatetimeIndex|Series).argmax/argmin with " "skipna=False and NAs" ) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(warn, match=msg): result = getattr(obj, arg_op)(skipna=False) if klass is Series: assert np.isnan(result) @@ -130,7 +131,7 @@ def test_nanargminmax(self, opname, index_or_series): obj = klass([NaT, datetime(2011, 11, 1), NaT]) # check DatetimeIndex non-monotonic path assert getattr(obj, arg_op)() == 1 - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(warn, match=msg): result = getattr(obj, arg_op)(skipna=False) if klass is Series: assert np.isnan(result) From bd6bdc573a25499e4c28a78a0a33ced3ead186b2 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 17 Jul 2023 20:38:27 -0700 Subject: [PATCH 3/6] update test_argreduce_series --- pandas/tests/extension/base/methods.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 9c699f01944e5..bc79db3d18ad7 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -160,8 +160,13 @@ def test_argreduce_series( self, data_missing_for_sorting, op_name, skipna, expected ): # data_missing_for_sorting -> [B, NA, A] with A < B and NA missing. + warn = None + if op_name.startswith("arg") and expected == -1: + warn = FutureWarning + msg = "The behavior of Series.argmax/argmin" ser = pd.Series(data_missing_for_sorting) - result = getattr(ser, op_name)(skipna=skipna) + with tm.assert_produces_warning(warn, match=msg): + result = getattr(ser, op_name)(skipna=skipna) tm.assert_almost_equal(result, expected) def test_argmax_argmin_no_skipna_notimplemented(self, data_missing_for_sorting): From 97e5286c432a283b55c631e59396d04744ca21d0 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 18 Jul 2023 09:23:05 -0700 Subject: [PATCH 4/6] mypy fixup --- pandas/core/base.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 934508cb59182..a61e38b269a8d 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -748,11 +748,7 @@ def argmax( else: return delegate.argmax() else: - # error: Incompatible return value type (got "Union[int, ndarray]", expected - # "int") - result = nanops.nanargmax( # type: ignore[return-value] - delegate, skipna=skipna - ) + result = nanops.nanargmax(delegate, skipna=skipna) if result == -1: warnings.warn( f"The behavior of {type(self).__name__}.argmax/argmin " @@ -761,7 +757,9 @@ def argmax( FutureWarning, stacklevel=find_stack_level(), ) - return result + # error: Incompatible return value type (got "Union[int, ndarray]", expected + # "int") + return result # type: ignore[return-value] @doc(argmax, op="min", oppose="max", value="smallest") def argmin( @@ -784,11 +782,7 @@ def argmin( else: return delegate.argmin() else: - # error: Incompatible return value type (got "Union[int, ndarray]", expected - # "int") - result = nanops.nanargmin( # type: ignore[return-value] - delegate, skipna=skipna - ) + result = nanops.nanargmin(delegate, skipna=skipna) if result == -1: warnings.warn( f"The behavior of {type(self).__name__}.argmax/argmin " @@ -797,7 +791,9 @@ def argmin( FutureWarning, stacklevel=find_stack_level(), ) - return result + # error: Incompatible return value type (got "Union[int, ndarray]", expected + # "int") + return result # type: ignore[return-value] def tolist(self): """ From f34ceaccde99db54137732ce804b43c5f36463d7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 19 Jul 2023 09:55:57 -0700 Subject: [PATCH 5/6] Update doc/source/whatsnew/v2.1.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 6f3ce652709d9..f8c6955f8f47c 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -368,7 +368,7 @@ Deprecations - Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`) - Deprecated the use of non-supported datetime64 and timedelta64 resolutions with :func:`pandas.array`. Supported resolutions are: "s", "ms", "us", "ns" resolutions (:issue:`53058`) - Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`) -- Deprecated the behavior of :meth:`Index.argmax`, :meth:`Index.argmin`, :meth:`Series.argmax`, :meth:`Series.argmin` with either all-NAs and skipna=True or any-NAs and skipna=False; in a future version this will raise ``ValueError`` (:issue:`33941`, :issue:`33942`) +- Deprecated the behavior of :meth:`Index.argmax`, :meth:`Index.argmin`, :meth:`Series.argmax`, :meth:`Series.argmin` with either all-NAs and skipna=True or any-NAs and skipna=False returning -1; in a future version this will raise ``ValueError`` (:issue:`33941`, :issue:`33942`) - .. --------------------------------------------------------------------------- From ded2ff0ba97755dc8d6eed687aac0527e5a0f911 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 19 Jul 2023 15:24:23 -0700 Subject: [PATCH 6/6] comment --- pandas/core/series.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index a1cd0c54b7774..b7445bf158b90 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2531,6 +2531,7 @@ def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab """ axis = self._get_axis_number(axis) with warnings.catch_warnings(): + # TODO(3.0): this catching/filtering can be removed # ignore warning produced by argmin since we will issue a different # warning for idxmin warnings.simplefilter("ignore") @@ -2606,6 +2607,7 @@ def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab """ axis = self._get_axis_number(axis) with warnings.catch_warnings(): + # TODO(3.0): this catching/filtering can be removed # ignore warning produced by argmax since we will issue a different # warning for argmax warnings.simplefilter("ignore")