From d98e262a58a5eaeb108231487be421d6260c33dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Mon, 21 Feb 2022 11:24:40 -0500 Subject: [PATCH 01/22] BUG: Change numeric_only default to True --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/frame.py | 4 ++-- pandas/tests/frame/methods/test_quantile.py | 17 +++++++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index dfa87e3cd4574..3a76b4f662c98 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -352,6 +352,7 @@ Numeric - Bug in operations with array-likes with ``dtype="boolean"`` and :attr:`NA` incorrectly altering the array in-place (:issue:`45421`) - Bug in division, ``pow`` and ``mod`` operations on array-likes with ``dtype="boolean"`` not being like their ``np.bool_`` counterparts (:issue:`46063`) - Bug in multiplying a :class:`Series` with ``IntegerDtype`` or ``FloatingDtype`` by an array-like with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`45622`) +- Bug in :meth:`DataFrame.quantile` attribute ``numeric_only"`` should default False (:issue:`7308`) - Conversion diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9a554537896ab..430d72e7d249b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10558,7 +10558,7 @@ def quantile( self, q=0.5, axis: Axis = 0, - numeric_only: bool = True, + numeric_only: bool = False, interpolation: str = "linear", ): """ @@ -10570,7 +10570,7 @@ def quantile( Value between 0 <= q <= 1, the quantile(s) to compute. axis : {0, 1, 'index', 'columns'}, default 0 Equals 0 or 'index' for row-wise, 1 or 'columns' for column-wise. - numeric_only : bool, default True + numeric_only : bool, default False If False, the quantile of datetime and timedelta data will be computed as well. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 040b981c41593..0215d462f642c 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -14,6 +14,23 @@ class TestDataFrameQuantile: + def test_numeric_only_default_false(self): + # GH #7308 + df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}) + df["C"] = pd.date_range("2014-01-01", periods=3, freq="m") + + df_expected_num_only_false = Series( + [2.0, 3.0, Timestamp("2014-02-28 00:00:00")], + index=["A", "B", "C"], + name=0.5, + ) + tm.assert_series_equal(df.quantile(0.5), df_expected_num_only_false) + + df_expected_num_only_true = Series([2.0, 3.0], index=["A", "B"], name=0.5) + tm.assert_series_equal( + df.quantile(0.5, numeric_only=True), df_expected_num_only_true + ) + @pytest.mark.parametrize( "df,expected", [ From 0db8e52d9d8c93b783520184d8367583765d2335 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Tue, 22 Feb 2022 20:36:38 -0500 Subject: [PATCH 02/22] Add future warning numeric_only in DataFrame.quantile --- pandas/core/frame.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 430d72e7d249b..ef92274007a5a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -122,6 +122,7 @@ is_object_dtype, is_scalar, is_sequence, + is_timedelta64_dtype, needs_i8_conversion, pandas_dtype, ) @@ -10558,7 +10559,7 @@ def quantile( self, q=0.5, axis: Axis = 0, - numeric_only: bool = False, + numeric_only: bool = True, interpolation: str = "linear", ): """ @@ -10642,6 +10643,18 @@ def quantile( return res.astype(dtype) return res + has_any_datetime_or_timestamp = any( + is_datetime64_any_dtype(x) or is_timedelta64_dtype(x) for x in self.dtypes + ) + if numeric_only and has_any_datetime_or_timestamp: + warnings.warn( + "In future versions of pandas, numeric_only will be set to " + "False by default, and the datetime/timedelta columns will " + "be considered in the results. To not consider these columns" + "specify numeric_only=True and ignore this warning.", + FutureWarning, + ) + q = Index(q, dtype=np.float64) data = self._get_numeric_data() if numeric_only else self From c427795943f8ac8f1814be625e0bf7a746e29cd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Tue, 22 Feb 2022 21:51:42 -0500 Subject: [PATCH 03/22] Add deprecations.quantile_datetime_timedelta_colums in whatsnew --- doc/source/whatsnew/v1.5.0.rst | 42 +++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 3a76b4f662c98..37db7527caef1 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -244,6 +244,47 @@ use ``series.loc[i:j]``. Slicing on a :class:`DataFrame` will not be affected. +.. _whatsnew_150.deprecations.quantile_datetime_timedelta_columns: + + +In a future version, method :meth:`DataFrame.quantile` attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`). + + +For example: +.. ipython:: python + + In [2]: df = pd.DataFrame( + ...: {"A": [1, 2, 3], "B": pd.date_range("2014-01-01", periods=3, freq="m")} + ...: ) + + +In the old behavior, datetime/timedelta columns are dropped: + +*Old behavior*: + +.. code-block:: ipython + + In [3]: df.quantile(0.5) + Out[3]: + A 2.0 + Name: 0.5, dtype: float64 + + +In a future version, these columns will be included in the result: + +*Future behavior*: + +.. code-block:: ipython + + In [4]: df.quantile(0.5) + Out[4]: + A 2.0 + B 2014-02-28 00:00:00 + Name: 0.5, dtype: object + +To retain the old behavior, use ``df.quantile(q, numeric_only=False)``. + + .. _whatsnew_150.deprecations.excel_writer_attributes: :class:`ExcelWriter` attributes @@ -352,7 +393,6 @@ Numeric - Bug in operations with array-likes with ``dtype="boolean"`` and :attr:`NA` incorrectly altering the array in-place (:issue:`45421`) - Bug in division, ``pow`` and ``mod`` operations on array-likes with ``dtype="boolean"`` not being like their ``np.bool_`` counterparts (:issue:`46063`) - Bug in multiplying a :class:`Series` with ``IntegerDtype`` or ``FloatingDtype`` by an array-like with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`45622`) -- Bug in :meth:`DataFrame.quantile` attribute ``numeric_only"`` should default False (:issue:`7308`) - Conversion From fcf37a823446ed38297819c070dbf1bf025e362a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Wed, 23 Feb 2022 09:19:57 -0500 Subject: [PATCH 04/22] Add stacklevel to warning in DataFrame.quantile --- pandas/core/frame.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ef92274007a5a..cc7eb4d27d1bc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10653,6 +10653,7 @@ def quantile( "be considered in the results. To not consider these columns" "specify numeric_only=True and ignore this warning.", FutureWarning, + stacklevel=find_stack_level(), ) q = Index(q, dtype=np.float64) From 7e1d2869063efc60fe044cf2fad0d0a40e0a8284 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Wed, 23 Feb 2022 09:21:57 -0500 Subject: [PATCH 05/22] Modify test to expect warning in test_quantile.py --- pandas/tests/frame/methods/test_quantile.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 0215d462f642c..74f68fa19b1c3 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -14,22 +14,18 @@ class TestDataFrameQuantile: - def test_numeric_only_default_false(self): + def test_numeric_only_default_false_warning(self): # GH #7308 df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}) df["C"] = pd.date_range("2014-01-01", periods=3, freq="m") - df_expected_num_only_false = Series( - [2.0, 3.0, Timestamp("2014-02-28 00:00:00")], - index=["A", "B", "C"], + df_expected_num_only_true = Series( + [2.0, 3.0], + index=["A", "B"], name=0.5, ) - tm.assert_series_equal(df.quantile(0.5), df_expected_num_only_false) - - df_expected_num_only_true = Series([2.0, 3.0], index=["A", "B"], name=0.5) - tm.assert_series_equal( - df.quantile(0.5, numeric_only=True), df_expected_num_only_true - ) + with tm.assert_produces_warning(FutureWarning, match="numeric_only"): + tm.assert_series_equal(df.quantile(0.5), df_expected_num_only_true) @pytest.mark.parametrize( "df,expected", From a5e6defe3e6a10525b6d8c5713383b31af255da2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Wed, 23 Feb 2022 09:35:25 -0500 Subject: [PATCH 06/22] Ignore FutureWarning at test_quantile.py --- pandas/tests/frame/methods/test_quantile.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 74f68fa19b1c3..c30c9dab1bb3c 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -52,6 +52,7 @@ def test_quantile_sparse(self, df, expected): tm.assert_series_equal(result, expected) + @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile(self, datetime_frame): from numpy import percentile @@ -157,6 +158,7 @@ def test_quantile_axis_parameter(self): with pytest.raises(ValueError, match=msg): df.quantile(0.1, axis="column") + @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile_interpolation(self): # see gh-10174 @@ -258,6 +260,7 @@ def test_quantile_multi(self): ) tm.assert_frame_equal(result, expected) + @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile_datetime(self): df = DataFrame({"a": pd.to_datetime(["2010", "2011"]), "b": [0, 5]}) @@ -334,6 +337,7 @@ def test_quantile_invalid(self, datetime_frame): with pytest.raises(ValueError, match=msg): datetime_frame.quantile(invalid) + @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile_box(self): df = DataFrame( { @@ -457,6 +461,7 @@ def test_quantile_box(self): ) tm.assert_frame_equal(res, exp) + @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile_nan(self): # GH 14357 - float block where some cols have missing values @@ -525,6 +530,7 @@ def test_quantile_nat(self): ) tm.assert_frame_equal(res, exp) + @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile_empty_no_rows_floats(self): # floats @@ -576,6 +582,7 @@ def test_quantile_empty_no_rows_dt64(self): exp = exp.astype(df["b"].dtype) tm.assert_series_equal(res, exp) + @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile_empty_no_columns(self): # GH#23925 _get_numeric_data may drop all columns df = DataFrame(pd.date_range("1/1/18", periods=5)) From 108bb4548e16eb52d400fc7ff2935ab59d0f7483 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Thu, 24 Feb 2022 14:41:46 -0500 Subject: [PATCH 07/22] Correct documentation numeric_only inDataFrame.quantile --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cc7eb4d27d1bc..5d00c155cbd6f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10571,7 +10571,7 @@ def quantile( Value between 0 <= q <= 1, the quantile(s) to compute. axis : {0, 1, 'index', 'columns'}, default 0 Equals 0 or 'index' for row-wise, 1 or 'columns' for column-wise. - numeric_only : bool, default False + numeric_only : bool, default True If False, the quantile of datetime and timedelta data will be computed as well. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} From 93c5f65c3ed9778a3d17d482ed2fb7d6676d778f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Tue, 1 Mar 2022 18:59:09 -0500 Subject: [PATCH 08/22] DEPR: Specify nodefault for numeric_only default --- pandas/core/frame.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5d00c155cbd6f..a46cba7499515 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -122,7 +122,6 @@ is_object_dtype, is_scalar, is_sequence, - is_timedelta64_dtype, needs_i8_conversion, pandas_dtype, ) @@ -10559,7 +10558,7 @@ def quantile( self, q=0.5, axis: Axis = 0, - numeric_only: bool = True, + numeric_only: bool | lib.NoDefault = no_default, interpolation: str = "linear", ): """ @@ -10630,6 +10629,17 @@ def quantile( validate_percentile(q) axis = self._get_axis_number(axis) + if numeric_only is no_default: + warnings.warn( + "In future versions of pandas, numeric_only will be set to " + "False by default, and the datetime/timedelta columns will " + "be considered in the results. To not consider these columns" + "specify numeric_only=True and ignore this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + numeric_only = True + if not is_list_like(q): # BlockManager.quantile expects listlike, so we wrap and unwrap here res_df = self.quantile( @@ -10643,19 +10653,6 @@ def quantile( return res.astype(dtype) return res - has_any_datetime_or_timestamp = any( - is_datetime64_any_dtype(x) or is_timedelta64_dtype(x) for x in self.dtypes - ) - if numeric_only and has_any_datetime_or_timestamp: - warnings.warn( - "In future versions of pandas, numeric_only will be set to " - "False by default, and the datetime/timedelta columns will " - "be considered in the results. To not consider these columns" - "specify numeric_only=True and ignore this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - q = Index(q, dtype=np.float64) data = self._get_numeric_data() if numeric_only else self From 0f37c305318a9bd5e45c307a61575b8b4bca61d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Tue, 1 Mar 2022 19:04:34 -0500 Subject: [PATCH 09/22] DEPR: Update whatsnew #7308 --- doc/source/whatsnew/v1.5.0.rst | 42 +--------------------------------- 1 file changed, 1 insertion(+), 41 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 37db7527caef1..01a50b5951bed 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -244,47 +244,6 @@ use ``series.loc[i:j]``. Slicing on a :class:`DataFrame` will not be affected. -.. _whatsnew_150.deprecations.quantile_datetime_timedelta_columns: - - -In a future version, method :meth:`DataFrame.quantile` attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`). - - -For example: -.. ipython:: python - - In [2]: df = pd.DataFrame( - ...: {"A": [1, 2, 3], "B": pd.date_range("2014-01-01", periods=3, freq="m")} - ...: ) - - -In the old behavior, datetime/timedelta columns are dropped: - -*Old behavior*: - -.. code-block:: ipython - - In [3]: df.quantile(0.5) - Out[3]: - A 2.0 - Name: 0.5, dtype: float64 - - -In a future version, these columns will be included in the result: - -*Future behavior*: - -.. code-block:: ipython - - In [4]: df.quantile(0.5) - Out[4]: - A 2.0 - B 2014-02-28 00:00:00 - Name: 0.5, dtype: object - -To retain the old behavior, use ``df.quantile(q, numeric_only=False)``. - - .. _whatsnew_150.deprecations.excel_writer_attributes: :class:`ExcelWriter` attributes @@ -338,6 +297,7 @@ Other Deprecations - Deprecated the ``warn`` parameter in :func:`infer_freq` (:issue:`45947`) - Deprecated allowing non-keyword arguments in :meth:`ExtensionArray.argsort` (:issue:`46134`) - Deprecated treating all-bool ``object``-dtype columns as bool-like in :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``, explicitly cast to bool instead (:issue:`46188`) +- In a future version, method :meth:`DataFrame.quantile` attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`). - .. --------------------------------------------------------------------------- From a324dc9b2e8709f72602f8564d02758106a1cabd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Wed, 2 Mar 2022 15:09:54 -0500 Subject: [PATCH 10/22] DEPR: Correct frame.quantile tests to specify numeric_only --- pandas/tests/frame/methods/test_quantile.py | 94 +++++++++++---------- 1 file changed, 48 insertions(+), 46 deletions(-) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index c30c9dab1bb3c..a8b87b3957729 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -48,41 +48,40 @@ def test_numeric_only_default_false_warning(self): def test_quantile_sparse(self, df, expected): # GH#17198 # GH#24600 - result = df.quantile() + result = df.quantile(numeric_only=True) tm.assert_series_equal(result, expected) - @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile(self, datetime_frame): from numpy import percentile df = datetime_frame - q = df.quantile(0.1, axis=0) + q = df.quantile(0.1, axis=0, numeric_only=True) assert q["A"] == percentile(df["A"], 10) tm.assert_index_equal(q.index, df.columns) - q = df.quantile(0.9, axis=1) + q = df.quantile(0.9, axis=1, numeric_only=True) assert q["2000-01-17"] == percentile(df.loc["2000-01-17"], 90) tm.assert_index_equal(q.index, df.index) # test degenerate case - q = DataFrame({"x": [], "y": []}).quantile(0.1, axis=0) + q = DataFrame({"x": [], "y": []}).quantile(0.1, numeric_only=True, axis=0) assert np.isnan(q["x"]) and np.isnan(q["y"]) # non-numeric exclusion df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]}) - rs = df.quantile(0.5) + rs = df.quantile(0.5, numeric_only=True) with tm.assert_produces_warning(FutureWarning, match="Select only valid"): xp = df.median().rename(0.5) tm.assert_series_equal(rs, xp) # axis df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) - result = df.quantile(0.5, axis=1) + result = df.quantile(0.5, axis=1, numeric_only=True) expected = Series([1.5, 2.5, 3.5], index=[1, 2, 3], name=0.5) tm.assert_series_equal(result, expected) - result = df.quantile([0.5, 0.75], axis=1) + result = df.quantile([0.5, 0.75], numeric_only=True, axis=1) expected = DataFrame( {1: [1.5, 1.75], 2: [2.5, 2.75], 3: [3.5, 3.75]}, index=[0.5, 0.75] ) @@ -92,7 +91,7 @@ def test_quantile(self, datetime_frame): # so that we exclude non-numeric along the same axis # See GH #7312 df = DataFrame([[1, 2, 3], ["a", "b", 4]]) - result = df.quantile(0.5, axis=1) + result = df.quantile(0.5, numeric_only=True, axis=1) expected = Series([3.0, 4.0], index=[0, 1], name=0.5) tm.assert_series_equal(result, expected) @@ -121,7 +120,7 @@ def test_quantile_axis_mixed(self): "D": ["foo", "bar", "baz"], } ) - result = df.quantile(0.5, axis=1) + result = df.quantile(0.5, numeric_only=True, axis=1) expected = Series([1.5, 2.5, 3.5], name=0.5) tm.assert_series_equal(result, expected) @@ -135,36 +134,35 @@ def test_quantile_axis_parameter(self): df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) - result = df.quantile(0.5, axis=0) + result = df.quantile(0.5, axis=0, numeric_only=True) expected = Series([2.0, 3.0], index=["A", "B"], name=0.5) tm.assert_series_equal(result, expected) - expected = df.quantile(0.5, axis="index") + expected = df.quantile(0.5, axis="index", numeric_only=True) tm.assert_series_equal(result, expected) - result = df.quantile(0.5, axis=1) + result = df.quantile(0.5, axis=1, numeric_only=True) expected = Series([1.5, 2.5, 3.5], index=[1, 2, 3], name=0.5) tm.assert_series_equal(result, expected) - result = df.quantile(0.5, axis="columns") + result = df.quantile(0.5, axis="columns", numeric_only=True) tm.assert_series_equal(result, expected) msg = "No axis named -1 for object type DataFrame" with pytest.raises(ValueError, match=msg): - df.quantile(0.1, axis=-1) + df.quantile(0.1, axis=-1, numeric_only=True) msg = "No axis named column for object type DataFrame" with pytest.raises(ValueError, match=msg): - df.quantile(0.1, axis="column") + df.quantile(0.1, axis="column", numeric_only=True) - @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile_interpolation(self): # see gh-10174 # interpolation method other than default linear df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) - result = df.quantile(0.5, axis=1, interpolation="nearest") + result = df.quantile(0.5, axis=1, numeric_only=True, interpolation="nearest") expected = Series([1, 2, 3], index=[1, 2, 3], name=0.5) tm.assert_series_equal(result, expected) @@ -180,7 +178,7 @@ def test_quantile_interpolation(self): # float df = DataFrame({"A": [1.0, 2.0, 3.0], "B": [2.0, 3.0, 4.0]}, index=[1, 2, 3]) - result = df.quantile(0.5, axis=1, interpolation="nearest") + result = df.quantile(0.5, axis=1, numeric_only=True, interpolation="nearest") expected = Series([1.0, 2.0, 3.0], index=[1, 2, 3], name=0.5) tm.assert_series_equal(result, expected) exp = np.percentile( @@ -193,7 +191,9 @@ def test_quantile_interpolation(self): tm.assert_series_equal(result, expected) # axis - result = df.quantile([0.5, 0.75], axis=1, interpolation="lower") + result = df.quantile( + [0.5, 0.75], axis=1, numeric_only=True, interpolation="lower" + ) expected = DataFrame( {1: [1.0, 1.0], 2: [2.0, 2.0], 3: [3.0, 3.0]}, index=[0.5, 0.75] ) @@ -201,12 +201,12 @@ def test_quantile_interpolation(self): # test degenerate case df = DataFrame({"x": [], "y": []}) - q = df.quantile(0.1, axis=0, interpolation="higher") + q = df.quantile(0.1, axis=0, numeric_only=True, interpolation="higher") assert np.isnan(q["x"]) and np.isnan(q["y"]) # multi df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=["a", "b", "c"]) - result = df.quantile([0.25, 0.5], interpolation="midpoint") + result = df.quantile([0.25, 0.5], numeric_only=True, interpolation="midpoint") # https://github.com/numpy/numpy/issues/7163 expected = DataFrame( @@ -221,7 +221,7 @@ def test_quantile_interpolation_datetime(self, datetime_frame): # interpolation = linear (default case) df = datetime_frame - q = df.quantile(0.1, axis=0, interpolation="linear") + q = df.quantile(0.1, axis=0, numeric_only=True, interpolation="linear") assert q["A"] == np.percentile(df["A"], 10) def test_quantile_interpolation_int(self, int_frame): @@ -229,17 +229,17 @@ def test_quantile_interpolation_int(self, int_frame): df = int_frame # interpolation = linear (default case) - q = df.quantile(0.1) + q = df.quantile(0.1, numeric_only=True) assert q["A"] == np.percentile(df["A"], 10) # test with and without interpolation keyword - q1 = df.quantile(0.1, axis=0, interpolation="linear") + q1 = df.quantile(0.1, axis=0, numeric_only=True, interpolation="linear") assert q1["A"] == np.percentile(df["A"], 10) tm.assert_series_equal(q, q1) def test_quantile_multi(self): df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=["a", "b", "c"]) - result = df.quantile([0.25, 0.5]) + result = df.quantile([0.25, 0.5], numeric_only=True) expected = DataFrame( [[1.5, 1.5, 1.5], [2.0, 2.0, 2.0]], index=[0.25, 0.5], @@ -248,13 +248,15 @@ def test_quantile_multi(self): tm.assert_frame_equal(result, expected) # axis = 1 - result = df.quantile([0.25, 0.5], axis=1) + result = df.quantile([0.25, 0.5], numeric_only=True, axis=1) expected = DataFrame( [[1.5, 1.5, 1.5], [2.0, 2.0, 2.0]], index=[0.25, 0.5], columns=[0, 1, 2] ) # empty - result = DataFrame({"x": [], "y": []}).quantile([0.1, 0.9], axis=0) + result = DataFrame({"x": [], "y": []}).quantile( + [0.1, 0.9], axis=0, numeric_only=True + ) expected = DataFrame( {"x": [np.nan, np.nan], "y": [np.nan, np.nan]}, index=[0.1, 0.9] ) @@ -265,7 +267,7 @@ def test_quantile_datetime(self): df = DataFrame({"a": pd.to_datetime(["2010", "2011"]), "b": [0, 5]}) # exclude datetime - result = df.quantile(0.5) + result = df.quantile(0.5, numeric_only=True) expected = Series([2.5], index=["b"]) # datetime @@ -301,11 +303,11 @@ def test_quantile_datetime(self): tm.assert_frame_equal(result, expected) # empty when numeric_only=True - result = df[["a", "c"]].quantile(0.5) + result = df[["a", "c"]].quantile(0.5, numeric_only=True) expected = Series([], index=[], dtype=np.float64, name=0.5) tm.assert_series_equal(result, expected) - result = df[["a", "c"]].quantile([0.5]) + result = df[["a", "c"]].quantile([0.5], numeric_only=True) expected = DataFrame(index=[0.5]) tm.assert_frame_equal(result, expected) @@ -468,30 +470,30 @@ def test_quantile_nan(self): df = DataFrame({"a": np.arange(1, 6.0), "b": np.arange(1, 6.0)}) df.iloc[-1, 1] = np.nan - res = df.quantile(0.5) + res = df.quantile(0.5, numeric_only=True) exp = Series([3.0, 2.5], index=["a", "b"], name=0.5) tm.assert_series_equal(res, exp) - res = df.quantile([0.5, 0.75]) + res = df.quantile([0.5, 0.75], numeric_only=True) exp = DataFrame({"a": [3.0, 4.0], "b": [2.5, 3.25]}, index=[0.5, 0.75]) tm.assert_frame_equal(res, exp) - res = df.quantile(0.5, axis=1) + res = df.quantile(0.5, axis=1, numeric_only=True) exp = Series(np.arange(1.0, 6.0), name=0.5) tm.assert_series_equal(res, exp) - res = df.quantile([0.5, 0.75], axis=1) + res = df.quantile([0.5, 0.75], axis=1, numeric_only=True) exp = DataFrame([np.arange(1.0, 6.0)] * 2, index=[0.5, 0.75]) tm.assert_frame_equal(res, exp) # full-nan column df["b"] = np.nan - res = df.quantile(0.5) + res = df.quantile(0.5, numeric_only=True) exp = Series([3.0, np.nan], index=["a", "b"], name=0.5) tm.assert_series_equal(res, exp) - res = df.quantile([0.5, 0.75]) + res = df.quantile([0.5, 0.75], numeric_only=True) exp = DataFrame({"a": [3.0, 4.0], "b": [np.nan, np.nan]}, index=[0.5, 0.75]) tm.assert_frame_equal(res, exp) @@ -536,19 +538,19 @@ def test_quantile_empty_no_rows_floats(self): # floats df = DataFrame(columns=["a", "b"], dtype="float64") - res = df.quantile(0.5) + res = df.quantile(0.5, numeric_only=True) exp = Series([np.nan, np.nan], index=["a", "b"], name=0.5) tm.assert_series_equal(res, exp) - res = df.quantile([0.5]) + res = df.quantile([0.5], numeric_only=True) exp = DataFrame([[np.nan, np.nan]], columns=["a", "b"], index=[0.5]) tm.assert_frame_equal(res, exp) - res = df.quantile(0.5, axis=1) + res = df.quantile(0.5, axis=1, numeric_only=True) exp = Series([], index=[], dtype="float64", name=0.5) tm.assert_series_equal(res, exp) - res = df.quantile([0.5], axis=1) + res = df.quantile([0.5], axis=1, numeric_only=True) exp = DataFrame(columns=[], index=[0.5]) tm.assert_frame_equal(res, exp) @@ -556,7 +558,7 @@ def test_quantile_empty_no_rows_ints(self): # ints df = DataFrame(columns=["a", "b"], dtype="int64") - res = df.quantile(0.5) + res = df.quantile(0.5, numeric_only=True) exp = Series([np.nan, np.nan], index=["a", "b"], name=0.5) tm.assert_series_equal(res, exp) @@ -587,12 +589,12 @@ def test_quantile_empty_no_columns(self): # GH#23925 _get_numeric_data may drop all columns df = DataFrame(pd.date_range("1/1/18", periods=5)) df.columns.name = "captain tightpants" - result = df.quantile(0.5) + result = df.quantile(0.5, numeric_only=True) expected = Series([], index=[], name=0.5, dtype=np.float64) expected.index.name = "captain tightpants" tm.assert_series_equal(result, expected) - result = df.quantile([0.5]) + result = df.quantile([0.5], numeric_only=True) expected = DataFrame([], index=[0.5], columns=[]) expected.columns.name = "captain tightpants" tm.assert_frame_equal(result, expected) @@ -743,7 +745,7 @@ def test_quantile_ea_scalar(self, obj, index): def test_empty_numeric(self, dtype, expected_data, expected_index, axis): # GH 14564 df = DataFrame(columns=["a", "b"], dtype=dtype) - result = df.quantile(0.5, axis=axis) + result = df.quantile(0.5, axis=axis, numeric_only=True) expected = Series( expected_data, name=0.5, index=Index(expected_index), dtype="float64" ) @@ -783,7 +785,7 @@ def test_datelike_numeric_only(self, expected_data, expected_index, axis): "c": pd.to_datetime(["2011", "2012"]), } ) - result = df[["a", "c"]].quantile(0.5, axis=axis) + result = df[["a", "c"]].quantile(0.5, axis=axis, numeric_only=True) expected = Series( expected_data, name=0.5, index=Index(expected_index), dtype=np.float64 ) From 82db984381037ff6e0e81d9a3880341c0f9fda91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Wed, 2 Mar 2022 15:11:26 -0500 Subject: [PATCH 11/22] DEPR: Correct message error frame.quantile --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a46cba7499515..3b7d19810691c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10634,7 +10634,7 @@ def quantile( "In future versions of pandas, numeric_only will be set to " "False by default, and the datetime/timedelta columns will " "be considered in the results. To not consider these columns" - "specify numeric_only=True and ignore this warning.", + "specify numeric_only=True.", FutureWarning, stacklevel=find_stack_level(), ) From 9e9b7a93a9d75b7c32ef3397ff033017d274a561 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Thu, 3 Mar 2022 08:18:33 -0500 Subject: [PATCH 12/22] DEPR: Remove warning filtering DataFrame.quantile --- pandas/tests/frame/methods/test_quantile.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index a8b87b3957729..2d071859225b0 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -262,7 +262,6 @@ def test_quantile_multi(self): ) tm.assert_frame_equal(result, expected) - @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile_datetime(self): df = DataFrame({"a": pd.to_datetime(["2010", "2011"]), "b": [0, 5]}) @@ -339,7 +338,6 @@ def test_quantile_invalid(self, datetime_frame): with pytest.raises(ValueError, match=msg): datetime_frame.quantile(invalid) - @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile_box(self): df = DataFrame( { @@ -463,7 +461,6 @@ def test_quantile_box(self): ) tm.assert_frame_equal(res, exp) - @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile_nan(self): # GH 14357 - float block where some cols have missing values @@ -532,7 +529,6 @@ def test_quantile_nat(self): ) tm.assert_frame_equal(res, exp) - @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile_empty_no_rows_floats(self): # floats @@ -584,7 +580,6 @@ def test_quantile_empty_no_rows_dt64(self): exp = exp.astype(df["b"].dtype) tm.assert_series_equal(res, exp) - @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile_empty_no_columns(self): # GH#23925 _get_numeric_data may drop all columns df = DataFrame(pd.date_range("1/1/18", periods=5)) From 67f2cfa8e818e937a1cb27ac4be72dbb28a58c40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Thu, 3 Mar 2022 08:20:47 -0500 Subject: [PATCH 13/22] DEPR: Update whatsnew doc about numeric_only attribute --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 01a50b5951bed..6808a4629bd4a 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -297,7 +297,7 @@ Other Deprecations - Deprecated the ``warn`` parameter in :func:`infer_freq` (:issue:`45947`) - Deprecated allowing non-keyword arguments in :meth:`ExtensionArray.argsort` (:issue:`46134`) - Deprecated treating all-bool ``object``-dtype columns as bool-like in :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``, explicitly cast to bool instead (:issue:`46188`) -- In a future version, method :meth:`DataFrame.quantile` attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`). +- Deprecated behavior of method :meth:`DataFrame.quantile`, attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`). - .. --------------------------------------------------------------------------- From 42e7df427b408ae09a130c49b2151896c1e6ea50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Thu, 3 Mar 2022 08:22:28 -0500 Subject: [PATCH 14/22] DEPR: Update Examples in docs frame.quantile --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3b7d19810691c..4c8b2d8353acd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10603,11 +10603,11 @@ def quantile( -------- >>> df = pd.DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]), ... columns=['a', 'b']) - >>> df.quantile(.1) + >>> df.quantile(.1, numeric_only=True) a 1.3 b 3.7 Name: 0.1, dtype: float64 - >>> df.quantile([.1, .5]) + >>> df.quantile([.1, .5], numeric_only=True) a b 0.1 1.3 3.7 0.5 2.5 55.0 From d0a12214377637c350451a0b6e2d42b9a32f86ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Thu, 3 Mar 2022 09:37:20 -0500 Subject: [PATCH 15/22] DEPR: Correct test finalize DataFrame.quantile --- pandas/tests/generic/test_finalize.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index cf92cd55a720e..431029c407afc 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -248,14 +248,26 @@ marks=not_implemented_mark, ), pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("quantile")), + ( + pd.DataFrame, + frame_data, + operator.methodcaller("quantile", numeric_only=True), + ), marks=not_implemented_mark, ), pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("quantile", q=[0.25, 0.75])), + ( + pd.DataFrame, + frame_data, + operator.methodcaller("quantile", q=[0.25, 0.75], numeric_only=True), + ), ), pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("quantile")), + ( + pd.DataFrame, + frame_data, + operator.methodcaller("quantile", numeric_only=True), + ), marks=not_implemented_mark, ), ( From 377cc54bc9bdde6e2d7eac45b781d37d8334effb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Tue, 8 Mar 2022 11:43:08 -0500 Subject: [PATCH 16/22] Revert "DEPR: Correct frame.quantile tests to specify numeric_only" This reverts commit 48ccac6a86a30651b067b8b07c2a332afea60233. --- pandas/tests/frame/methods/test_quantile.py | 94 ++++++++++----------- 1 file changed, 46 insertions(+), 48 deletions(-) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 2d071859225b0..98c88a2074fae 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -48,40 +48,41 @@ def test_numeric_only_default_false_warning(self): def test_quantile_sparse(self, df, expected): # GH#17198 # GH#24600 - result = df.quantile(numeric_only=True) + result = df.quantile() tm.assert_series_equal(result, expected) + @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile(self, datetime_frame): from numpy import percentile df = datetime_frame - q = df.quantile(0.1, axis=0, numeric_only=True) + q = df.quantile(0.1, axis=0) assert q["A"] == percentile(df["A"], 10) tm.assert_index_equal(q.index, df.columns) - q = df.quantile(0.9, axis=1, numeric_only=True) + q = df.quantile(0.9, axis=1) assert q["2000-01-17"] == percentile(df.loc["2000-01-17"], 90) tm.assert_index_equal(q.index, df.index) # test degenerate case - q = DataFrame({"x": [], "y": []}).quantile(0.1, numeric_only=True, axis=0) + q = DataFrame({"x": [], "y": []}).quantile(0.1, axis=0) assert np.isnan(q["x"]) and np.isnan(q["y"]) # non-numeric exclusion df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]}) - rs = df.quantile(0.5, numeric_only=True) + rs = df.quantile(0.5) with tm.assert_produces_warning(FutureWarning, match="Select only valid"): xp = df.median().rename(0.5) tm.assert_series_equal(rs, xp) # axis df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) - result = df.quantile(0.5, axis=1, numeric_only=True) + result = df.quantile(0.5, axis=1) expected = Series([1.5, 2.5, 3.5], index=[1, 2, 3], name=0.5) tm.assert_series_equal(result, expected) - result = df.quantile([0.5, 0.75], numeric_only=True, axis=1) + result = df.quantile([0.5, 0.75], axis=1) expected = DataFrame( {1: [1.5, 1.75], 2: [2.5, 2.75], 3: [3.5, 3.75]}, index=[0.5, 0.75] ) @@ -91,7 +92,7 @@ def test_quantile(self, datetime_frame): # so that we exclude non-numeric along the same axis # See GH #7312 df = DataFrame([[1, 2, 3], ["a", "b", 4]]) - result = df.quantile(0.5, numeric_only=True, axis=1) + result = df.quantile(0.5, axis=1) expected = Series([3.0, 4.0], index=[0, 1], name=0.5) tm.assert_series_equal(result, expected) @@ -120,7 +121,7 @@ def test_quantile_axis_mixed(self): "D": ["foo", "bar", "baz"], } ) - result = df.quantile(0.5, numeric_only=True, axis=1) + result = df.quantile(0.5, axis=1) expected = Series([1.5, 2.5, 3.5], name=0.5) tm.assert_series_equal(result, expected) @@ -134,35 +135,36 @@ def test_quantile_axis_parameter(self): df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) - result = df.quantile(0.5, axis=0, numeric_only=True) + result = df.quantile(0.5, axis=0) expected = Series([2.0, 3.0], index=["A", "B"], name=0.5) tm.assert_series_equal(result, expected) - expected = df.quantile(0.5, axis="index", numeric_only=True) + expected = df.quantile(0.5, axis="index") tm.assert_series_equal(result, expected) - result = df.quantile(0.5, axis=1, numeric_only=True) + result = df.quantile(0.5, axis=1) expected = Series([1.5, 2.5, 3.5], index=[1, 2, 3], name=0.5) tm.assert_series_equal(result, expected) - result = df.quantile(0.5, axis="columns", numeric_only=True) + result = df.quantile(0.5, axis="columns") tm.assert_series_equal(result, expected) msg = "No axis named -1 for object type DataFrame" with pytest.raises(ValueError, match=msg): - df.quantile(0.1, axis=-1, numeric_only=True) + df.quantile(0.1, axis=-1) msg = "No axis named column for object type DataFrame" with pytest.raises(ValueError, match=msg): - df.quantile(0.1, axis="column", numeric_only=True) + df.quantile(0.1, axis="column") + @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile_interpolation(self): # see gh-10174 # interpolation method other than default linear df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) - result = df.quantile(0.5, axis=1, numeric_only=True, interpolation="nearest") + result = df.quantile(0.5, axis=1, interpolation="nearest") expected = Series([1, 2, 3], index=[1, 2, 3], name=0.5) tm.assert_series_equal(result, expected) @@ -178,7 +180,7 @@ def test_quantile_interpolation(self): # float df = DataFrame({"A": [1.0, 2.0, 3.0], "B": [2.0, 3.0, 4.0]}, index=[1, 2, 3]) - result = df.quantile(0.5, axis=1, numeric_only=True, interpolation="nearest") + result = df.quantile(0.5, axis=1, interpolation="nearest") expected = Series([1.0, 2.0, 3.0], index=[1, 2, 3], name=0.5) tm.assert_series_equal(result, expected) exp = np.percentile( @@ -191,9 +193,7 @@ def test_quantile_interpolation(self): tm.assert_series_equal(result, expected) # axis - result = df.quantile( - [0.5, 0.75], axis=1, numeric_only=True, interpolation="lower" - ) + result = df.quantile([0.5, 0.75], axis=1, interpolation="lower") expected = DataFrame( {1: [1.0, 1.0], 2: [2.0, 2.0], 3: [3.0, 3.0]}, index=[0.5, 0.75] ) @@ -201,12 +201,12 @@ def test_quantile_interpolation(self): # test degenerate case df = DataFrame({"x": [], "y": []}) - q = df.quantile(0.1, axis=0, numeric_only=True, interpolation="higher") + q = df.quantile(0.1, axis=0, interpolation="higher") assert np.isnan(q["x"]) and np.isnan(q["y"]) # multi df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=["a", "b", "c"]) - result = df.quantile([0.25, 0.5], numeric_only=True, interpolation="midpoint") + result = df.quantile([0.25, 0.5], interpolation="midpoint") # https://github.com/numpy/numpy/issues/7163 expected = DataFrame( @@ -221,7 +221,7 @@ def test_quantile_interpolation_datetime(self, datetime_frame): # interpolation = linear (default case) df = datetime_frame - q = df.quantile(0.1, axis=0, numeric_only=True, interpolation="linear") + q = df.quantile(0.1, axis=0, interpolation="linear") assert q["A"] == np.percentile(df["A"], 10) def test_quantile_interpolation_int(self, int_frame): @@ -229,17 +229,17 @@ def test_quantile_interpolation_int(self, int_frame): df = int_frame # interpolation = linear (default case) - q = df.quantile(0.1, numeric_only=True) + q = df.quantile(0.1) assert q["A"] == np.percentile(df["A"], 10) # test with and without interpolation keyword - q1 = df.quantile(0.1, axis=0, numeric_only=True, interpolation="linear") + q1 = df.quantile(0.1, axis=0, interpolation="linear") assert q1["A"] == np.percentile(df["A"], 10) tm.assert_series_equal(q, q1) def test_quantile_multi(self): df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=["a", "b", "c"]) - result = df.quantile([0.25, 0.5], numeric_only=True) + result = df.quantile([0.25, 0.5]) expected = DataFrame( [[1.5, 1.5, 1.5], [2.0, 2.0, 2.0]], index=[0.25, 0.5], @@ -248,15 +248,13 @@ def test_quantile_multi(self): tm.assert_frame_equal(result, expected) # axis = 1 - result = df.quantile([0.25, 0.5], numeric_only=True, axis=1) + result = df.quantile([0.25, 0.5], axis=1) expected = DataFrame( [[1.5, 1.5, 1.5], [2.0, 2.0, 2.0]], index=[0.25, 0.5], columns=[0, 1, 2] ) # empty - result = DataFrame({"x": [], "y": []}).quantile( - [0.1, 0.9], axis=0, numeric_only=True - ) + result = DataFrame({"x": [], "y": []}).quantile([0.1, 0.9], axis=0) expected = DataFrame( {"x": [np.nan, np.nan], "y": [np.nan, np.nan]}, index=[0.1, 0.9] ) @@ -266,7 +264,7 @@ def test_quantile_datetime(self): df = DataFrame({"a": pd.to_datetime(["2010", "2011"]), "b": [0, 5]}) # exclude datetime - result = df.quantile(0.5, numeric_only=True) + result = df.quantile(0.5) expected = Series([2.5], index=["b"]) # datetime @@ -302,11 +300,11 @@ def test_quantile_datetime(self): tm.assert_frame_equal(result, expected) # empty when numeric_only=True - result = df[["a", "c"]].quantile(0.5, numeric_only=True) + result = df[["a", "c"]].quantile(0.5) expected = Series([], index=[], dtype=np.float64, name=0.5) tm.assert_series_equal(result, expected) - result = df[["a", "c"]].quantile([0.5], numeric_only=True) + result = df[["a", "c"]].quantile([0.5]) expected = DataFrame(index=[0.5]) tm.assert_frame_equal(result, expected) @@ -467,30 +465,30 @@ def test_quantile_nan(self): df = DataFrame({"a": np.arange(1, 6.0), "b": np.arange(1, 6.0)}) df.iloc[-1, 1] = np.nan - res = df.quantile(0.5, numeric_only=True) + res = df.quantile(0.5) exp = Series([3.0, 2.5], index=["a", "b"], name=0.5) tm.assert_series_equal(res, exp) - res = df.quantile([0.5, 0.75], numeric_only=True) + res = df.quantile([0.5, 0.75]) exp = DataFrame({"a": [3.0, 4.0], "b": [2.5, 3.25]}, index=[0.5, 0.75]) tm.assert_frame_equal(res, exp) - res = df.quantile(0.5, axis=1, numeric_only=True) + res = df.quantile(0.5, axis=1) exp = Series(np.arange(1.0, 6.0), name=0.5) tm.assert_series_equal(res, exp) - res = df.quantile([0.5, 0.75], axis=1, numeric_only=True) + res = df.quantile([0.5, 0.75], axis=1) exp = DataFrame([np.arange(1.0, 6.0)] * 2, index=[0.5, 0.75]) tm.assert_frame_equal(res, exp) # full-nan column df["b"] = np.nan - res = df.quantile(0.5, numeric_only=True) + res = df.quantile(0.5) exp = Series([3.0, np.nan], index=["a", "b"], name=0.5) tm.assert_series_equal(res, exp) - res = df.quantile([0.5, 0.75], numeric_only=True) + res = df.quantile([0.5, 0.75]) exp = DataFrame({"a": [3.0, 4.0], "b": [np.nan, np.nan]}, index=[0.5, 0.75]) tm.assert_frame_equal(res, exp) @@ -534,19 +532,19 @@ def test_quantile_empty_no_rows_floats(self): # floats df = DataFrame(columns=["a", "b"], dtype="float64") - res = df.quantile(0.5, numeric_only=True) + res = df.quantile(0.5) exp = Series([np.nan, np.nan], index=["a", "b"], name=0.5) tm.assert_series_equal(res, exp) - res = df.quantile([0.5], numeric_only=True) + res = df.quantile([0.5]) exp = DataFrame([[np.nan, np.nan]], columns=["a", "b"], index=[0.5]) tm.assert_frame_equal(res, exp) - res = df.quantile(0.5, axis=1, numeric_only=True) + res = df.quantile(0.5, axis=1) exp = Series([], index=[], dtype="float64", name=0.5) tm.assert_series_equal(res, exp) - res = df.quantile([0.5], axis=1, numeric_only=True) + res = df.quantile([0.5], axis=1) exp = DataFrame(columns=[], index=[0.5]) tm.assert_frame_equal(res, exp) @@ -554,7 +552,7 @@ def test_quantile_empty_no_rows_ints(self): # ints df = DataFrame(columns=["a", "b"], dtype="int64") - res = df.quantile(0.5, numeric_only=True) + res = df.quantile(0.5) exp = Series([np.nan, np.nan], index=["a", "b"], name=0.5) tm.assert_series_equal(res, exp) @@ -584,12 +582,12 @@ def test_quantile_empty_no_columns(self): # GH#23925 _get_numeric_data may drop all columns df = DataFrame(pd.date_range("1/1/18", periods=5)) df.columns.name = "captain tightpants" - result = df.quantile(0.5, numeric_only=True) + result = df.quantile(0.5) expected = Series([], index=[], name=0.5, dtype=np.float64) expected.index.name = "captain tightpants" tm.assert_series_equal(result, expected) - result = df.quantile([0.5], numeric_only=True) + result = df.quantile([0.5]) expected = DataFrame([], index=[0.5], columns=[]) expected.columns.name = "captain tightpants" tm.assert_frame_equal(result, expected) @@ -740,7 +738,7 @@ def test_quantile_ea_scalar(self, obj, index): def test_empty_numeric(self, dtype, expected_data, expected_index, axis): # GH 14564 df = DataFrame(columns=["a", "b"], dtype=dtype) - result = df.quantile(0.5, axis=axis, numeric_only=True) + result = df.quantile(0.5, axis=axis) expected = Series( expected_data, name=0.5, index=Index(expected_index), dtype="float64" ) @@ -780,7 +778,7 @@ def test_datelike_numeric_only(self, expected_data, expected_index, axis): "c": pd.to_datetime(["2011", "2012"]), } ) - result = df[["a", "c"]].quantile(0.5, axis=axis, numeric_only=True) + result = df[["a", "c"]].quantile(0.5, axis=axis) expected = Series( expected_data, name=0.5, index=Index(expected_index), dtype=np.float64 ) From b460aa295a60c1d4387de318d268e0739195de23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Tue, 8 Mar 2022 15:20:43 -0500 Subject: [PATCH 17/22] DEPR: Update tests of quantile with non num cols" --- pandas/tests/frame/methods/test_quantile.py | 28 ++++++++++----------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 98c88a2074fae..f154f72b63d4e 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -52,26 +52,25 @@ def test_quantile_sparse(self, df, expected): tm.assert_series_equal(result, expected) - @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile(self, datetime_frame): from numpy import percentile df = datetime_frame - q = df.quantile(0.1, axis=0) + q = df.quantile(0.1, axis=0, numeric_only=True) assert q["A"] == percentile(df["A"], 10) tm.assert_index_equal(q.index, df.columns) - q = df.quantile(0.9, axis=1) + q = df.quantile(0.9, axis=1, numeric_only=True) assert q["2000-01-17"] == percentile(df.loc["2000-01-17"], 90) tm.assert_index_equal(q.index, df.index) # test degenerate case - q = DataFrame({"x": [], "y": []}).quantile(0.1, axis=0) + q = DataFrame({"x": [], "y": []}).quantile(0.1, axis=0, numeric_only=True) assert np.isnan(q["x"]) and np.isnan(q["y"]) # non-numeric exclusion df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]}) - rs = df.quantile(0.5) + rs = df.quantile(0.5, numeric_only=True) with tm.assert_produces_warning(FutureWarning, match="Select only valid"): xp = df.median().rename(0.5) tm.assert_series_equal(rs, xp) @@ -92,7 +91,7 @@ def test_quantile(self, datetime_frame): # so that we exclude non-numeric along the same axis # See GH #7312 df = DataFrame([[1, 2, 3], ["a", "b", 4]]) - result = df.quantile(0.5, axis=1) + result = df.quantile(0.5, axis=1, numeric_only=True) expected = Series([3.0, 4.0], index=[0, 1], name=0.5) tm.assert_series_equal(result, expected) @@ -121,7 +120,7 @@ def test_quantile_axis_mixed(self): "D": ["foo", "bar", "baz"], } ) - result = df.quantile(0.5, axis=1) + result = df.quantile(0.5, axis=1, numeric_only=True) expected = Series([1.5, 2.5, 3.5], name=0.5) tm.assert_series_equal(result, expected) @@ -158,7 +157,6 @@ def test_quantile_axis_parameter(self): with pytest.raises(ValueError, match=msg): df.quantile(0.1, axis="column") - @pytest.mark.filterwarnings("ignore:In future versions of pandas, numeric_only") def test_quantile_interpolation(self): # see gh-10174 @@ -221,7 +219,7 @@ def test_quantile_interpolation_datetime(self, datetime_frame): # interpolation = linear (default case) df = datetime_frame - q = df.quantile(0.1, axis=0, interpolation="linear") + q = df.quantile(0.1, axis=0, numeric_only=True, interpolation="linear") assert q["A"] == np.percentile(df["A"], 10) def test_quantile_interpolation_int(self, int_frame): @@ -264,7 +262,7 @@ def test_quantile_datetime(self): df = DataFrame({"a": pd.to_datetime(["2010", "2011"]), "b": [0, 5]}) # exclude datetime - result = df.quantile(0.5) + result = df.quantile(0.5, numeric_only=True) expected = Series([2.5], index=["b"]) # datetime @@ -300,11 +298,11 @@ def test_quantile_datetime(self): tm.assert_frame_equal(result, expected) # empty when numeric_only=True - result = df[["a", "c"]].quantile(0.5) + result = df[["a", "c"]].quantile(0.5, numeric_only=True) expected = Series([], index=[], dtype=np.float64, name=0.5) tm.assert_series_equal(result, expected) - result = df[["a", "c"]].quantile([0.5]) + result = df[["a", "c"]].quantile([0.5], numeric_only=True) expected = DataFrame(index=[0.5]) tm.assert_frame_equal(result, expected) @@ -582,12 +580,12 @@ def test_quantile_empty_no_columns(self): # GH#23925 _get_numeric_data may drop all columns df = DataFrame(pd.date_range("1/1/18", periods=5)) df.columns.name = "captain tightpants" - result = df.quantile(0.5) + result = df.quantile(0.5, numeric_only=True) expected = Series([], index=[], name=0.5, dtype=np.float64) expected.index.name = "captain tightpants" tm.assert_series_equal(result, expected) - result = df.quantile([0.5]) + result = df.quantile([0.5], numeric_only=True) expected = DataFrame([], index=[0.5], columns=[]) expected.columns.name = "captain tightpants" tm.assert_frame_equal(result, expected) @@ -778,7 +776,7 @@ def test_datelike_numeric_only(self, expected_data, expected_index, axis): "c": pd.to_datetime(["2011", "2012"]), } ) - result = df[["a", "c"]].quantile(0.5, axis=axis) + result = df[["a", "c"]].quantile(0.5, axis=axis, numeric_only=True) expected = Series( expected_data, name=0.5, index=Index(expected_index), dtype=np.float64 ) From 7194d13957088ae94cc384c629318bc4ed916d4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Tue, 8 Mar 2022 15:22:48 -0500 Subject: [PATCH 18/22] DEPR: Raise warning frame.quantile with numeric_only --- pandas/core/frame.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4c8b2d8353acd..0aa4d395ebe2e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -119,6 +119,7 @@ is_integer_dtype, is_iterator, is_list_like, + is_numeric_dtype, is_object_dtype, is_scalar, is_sequence, @@ -10628,8 +10629,8 @@ def quantile( """ validate_percentile(q) axis = self._get_axis_number(axis) - - if numeric_only is no_default: + any_not_numeric = any(not is_numeric_dtype(x) for x in self.dtypes) + if numeric_only is no_default and any_not_numeric: warnings.warn( "In future versions of pandas, numeric_only will be set to " "False by default, and the datetime/timedelta columns will " From f5f7a3e9dd0ea34849caf43580f34e091a46e4b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Tue, 8 Mar 2022 15:24:32 -0500 Subject: [PATCH 19/22] DEPR: Update doctests quantile, numeric only --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0aa4d395ebe2e..46cfa745e2e5d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10604,11 +10604,11 @@ def quantile( -------- >>> df = pd.DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]), ... columns=['a', 'b']) - >>> df.quantile(.1, numeric_only=True) + >>> df.quantile(.1) a 1.3 b 3.7 Name: 0.1, dtype: float64 - >>> df.quantile([.1, .5], numeric_only=True) + >>> df.quantile([.1, .5]) a b 0.1 1.3 3.7 0.5 2.5 55.0 From 16e5fc24b5e03f62aa35ce1189e1d9159eb3be55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Thu, 10 Mar 2022 08:46:56 -0500 Subject: [PATCH 20/22] DEPR: Correct test_numeric_only_default_false_warning --- pandas/tests/frame/methods/test_quantile.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index f154f72b63d4e..8dec5ea3ba325 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -19,13 +19,14 @@ def test_numeric_only_default_false_warning(self): df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}) df["C"] = pd.date_range("2014-01-01", periods=3, freq="m") - df_expected_num_only_true = Series( + expected = Series( [2.0, 3.0], index=["A", "B"], name=0.5, ) with tm.assert_produces_warning(FutureWarning, match="numeric_only"): - tm.assert_series_equal(df.quantile(0.5), df_expected_num_only_true) + result = df.quantile(0.5) + tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "df,expected", From b88b196ca7c93cb3ff1a436047d38fce7b42a456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Thu, 10 Mar 2022 08:53:27 -0500 Subject: [PATCH 21/22] DEPR: Add non numeric test to numeric_only warning --- pandas/tests/frame/methods/test_quantile.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 8dec5ea3ba325..3567faa0402f9 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -14,10 +14,18 @@ class TestDataFrameQuantile: - def test_numeric_only_default_false_warning(self): + @pytest.mark.parametrize( + "non_num_col", + [ + pd.date_range("2014-01-01", periods=3, freq="m"), + ["a", "b", "c"], + [DataFrame, Series, Timestamp], + ], + ) + def test_numeric_only_default_false_warning(self, non_num_col): # GH #7308 df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}) - df["C"] = pd.date_range("2014-01-01", periods=3, freq="m") + df["C"] = non_num_col expected = Series( [2.0, 3.0], From db68bf24b17f89d8554e17746d45efd3be1d2a43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Osorio=20L=C3=B3pez?= Date: Thu, 10 Mar 2022 14:03:54 -0500 Subject: [PATCH 22/22] DERP: correct_test_produces_warning in frame.quantile --- pandas/tests/frame/methods/test_quantile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 3567faa0402f9..20f190fcdfd4d 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -34,7 +34,7 @@ def test_numeric_only_default_false_warning(self, non_num_col): ) with tm.assert_produces_warning(FutureWarning, match="numeric_only"): result = df.quantile(0.5) - tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "df,expected",