From 4a3dee0b7b0298d5c76aeb5c11bdf5cb50575c1a Mon Sep 17 00:00:00 2001 From: richard Date: Tue, 28 Nov 2023 23:15:15 -0500 Subject: [PATCH 1/5] DEPR: Default of observed=False in DataFrame.pivot_table --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/core/frame.py | 2 +- pandas/core/reshape/pivot.py | 20 +++++++++++++++++--- pandas/tests/reshape/test_pivot.py | 29 +++++++++++++++++++++-------- 4 files changed, 40 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index dce776755ad7e..c60e7092ca41c 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -401,6 +401,7 @@ Other Deprecations - Deprecated the ``fastpath`` keyword in the :class:`Series` constructor (:issue:`20110`) - Deprecated the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`) - Deprecated the behavior of :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype; in a future version these will not perform dtype inference on the resulting :class:`Index`, do ``result.index = result.index.infer_objects()`` to retain the old behavior (:issue:`56161`) +- Deprecated the default of ``observed=False`` in :meth:`DataFrame.pivot_table`; will be ``True`` in a future version (:issue:`56236`) - Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`) - Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`) - Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6110f694a6700..b2aae4babc580 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9372,7 +9372,7 @@ def pivot_table( margins: bool = False, dropna: bool = True, margins_name: Level = "All", - observed: bool = False, + observed: bool | lib.NoDefault = lib.no_default, sort: bool = True, ) -> DataFrame: from pandas.core.reshape.pivot import pivot_table diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index eba4f72b5eb8f..82718d4c43a65 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -10,6 +10,7 @@ Literal, cast, ) +import warnings import numpy as np @@ -18,6 +19,7 @@ Appender, Substitution, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.dtypes.common import ( @@ -68,7 +70,7 @@ def pivot_table( margins: bool = False, dropna: bool = True, margins_name: Hashable = "All", - observed: bool = False, + observed: bool | lib.NoDefault = lib.no_default, sort: bool = True, ) -> DataFrame: index = _convert_by(index) @@ -123,7 +125,7 @@ def __internal_pivot_table( margins: bool, dropna: bool, margins_name: Hashable, - observed: bool, + observed: bool | lib.NoDefault, sort: bool, ) -> DataFrame: """ @@ -166,7 +168,18 @@ def __internal_pivot_table( pass values = list(values) - grouped = data.groupby(keys, observed=observed, sort=sort, dropna=dropna) + observed_bool = False if observed is lib.no_default else observed + grouped = data.groupby(keys, observed=observed_bool, sort=sort, dropna=dropna) + if observed is lib.no_default and any( + ping._passed_categorical for ping in grouped.grouper.groupings + ): + warnings.warn( + "The default value of observed=False is deprecated and will change " + "to observed=True in a future version of pandas. Specify " + "observed=False to silence this warning and retain the current behavior", + category=FutureWarning, + stacklevel=find_stack_level(), + ) agged = grouped.agg(aggfunc) if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns): @@ -719,6 +732,7 @@ def crosstab( margins=margins, margins_name=margins_name, dropna=dropna, + observed=False, **kwargs, # type: ignore[arg-type] ) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index f8ececf6c0540..885057cb9642f 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -220,7 +220,9 @@ def test_pivot_table_dropna_categoricals(self, dropna): ) df["A"] = df["A"].astype(CategoricalDtype(categories, ordered=False)) - result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna) + msg = "The default value of observed=False is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna) expected_columns = Series(["a", "b", "c"], name="A") expected_columns = expected_columns.astype( CategoricalDtype(categories, ordered=False) @@ -250,7 +252,9 @@ def test_pivot_with_non_observable_dropna(self, dropna): } ) - result = df.pivot_table(index="A", values="B", dropna=dropna) + msg = "The default value of observed=False is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.pivot_table(index="A", values="B", dropna=dropna) if dropna: values = [2.0, 3.0] codes = [0, 1] @@ -283,7 +287,9 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna): } ) - result = df.pivot_table(index="A", values="B", dropna=dropna) + msg = "The default value of observed=False is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.pivot_table(index="A", values="B", dropna=dropna) expected = DataFrame( {"B": [2.0, 3.0, 0.0]}, index=Index( @@ -301,7 +307,10 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna): def test_pivot_with_interval_index(self, interval_values, dropna): # GH 25814 df = DataFrame({"A": interval_values, "B": 1}) - result = df.pivot_table(index="A", values="B", dropna=dropna) + + msg = "The default value of observed=False is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.pivot_table(index="A", values="B", dropna=dropna) expected = DataFrame( {"B": 1.0}, index=Index(interval_values.unique(), name="A") ) @@ -1825,7 +1834,9 @@ def test_categorical_margins_category(self, observed): df.y = df.y.astype("category") df.z = df.z.astype("category") - table = df.pivot_table("x", "y", "z", dropna=observed, margins=True) + msg = "The default value of observed=False is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + table = df.pivot_table("x", "y", "z", dropna=observed, margins=True) tm.assert_frame_equal(table, expected) def test_margins_casted_to_float(self): @@ -1887,9 +1898,11 @@ def test_categorical_aggfunc(self, observed): {"C1": ["A", "B", "C", "C"], "C2": ["a", "a", "b", "b"], "V": [1, 2, 3, 4]} ) df["C1"] = df["C1"].astype("category") - result = df.pivot_table( - "V", index="C1", columns="C2", dropna=observed, aggfunc="count" - ) + msg = "The default value of observed=False is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.pivot_table( + "V", index="C1", columns="C2", dropna=observed, aggfunc="count" + ) expected_index = pd.CategoricalIndex( ["A", "B", "C"], categories=["A", "B", "C"], ordered=False, name="C1" From 60540c74d40699ab2d5cc9dcd31fc67e30c9a5a3 Mon Sep 17 00:00:00 2001 From: richard Date: Tue, 28 Nov 2023 23:55:19 -0500 Subject: [PATCH 2/5] Finish up --- doc/source/user_guide/categorical.rst | 2 +- doc/source/whatsnew/v0.23.0.rst | 2 ++ pandas/core/frame.py | 5 +++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index 34d04745ccdb5..8fb991dca02db 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -647,7 +647,7 @@ Pivot tables: raw_cat = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"]) df = pd.DataFrame({"A": raw_cat, "B": ["c", "d", "c", "d"], "values": [1, 2, 3, 4]}) - pd.pivot_table(df, values="values", index=["A", "B"]) + pd.pivot_table(df, values="values", index=["A", "B"], observed=False) Data munging ------------ diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index cdffc6968a170..be6a2a2a7d2c1 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -278,6 +278,7 @@ To show only observed values: For pivoting operations, this behavior is *already* controlled by the ``dropna`` keyword: .. ipython:: python + :okwarning: cat1 = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True) @@ -287,6 +288,7 @@ For pivoting operations, this behavior is *already* controlled by the ``dropna`` df .. ipython:: python + :okwarning: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=True) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b2aae4babc580..f4d84445fbead 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9262,6 +9262,11 @@ def pivot( If True: only show observed values for categorical groupers. If False: show all values for categorical groupers. + .. deprecated:: 2.2.0 + + The default value of ``False`` is deprecated and will change to + ``True`` in a future version of pandas. + sort : bool, default True Specifies if the result should be sorted. From 4b7c56ab656b158573d6fcca917447df5d4c2bf5 Mon Sep 17 00:00:00 2001 From: richard Date: Wed, 29 Nov 2023 22:10:05 -0500 Subject: [PATCH 3/5] fixup --- pandas/tests/reshape/test_pivot.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 885057cb9642f..86ccc04725019 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -201,7 +201,9 @@ def test_pivot_table_categorical(self): ["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True ) df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) - result = pivot_table(df, values="values", index=["A", "B"], dropna=True) + msg = "The default value of observed=False is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = pivot_table(df, values="values", index=["A", "B"], dropna=True) exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"]) expected = DataFrame({"values": [1.0, 2.0, 3.0, 4.0]}, index=exp_index) @@ -331,9 +333,11 @@ def test_pivot_with_interval_index_margins(self): } ) - pivot_tab = pivot_table( - df, index="C", columns="B", values="A", aggfunc="sum", margins=True - ) + msg = "The default value of observed=False is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + pivot_tab = pivot_table( + df, index="C", columns="B", values="A", aggfunc="sum", margins=True + ) result = pivot_tab["All"] expected = Series( From fece731a5299ae67b3e185c674dfa62e22cc5fd8 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 2 Dec 2023 15:45:29 -0500 Subject: [PATCH 4/5] Convert to code-block --- doc/source/whatsnew/v0.23.0.rst | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index be6a2a2a7d2c1..808741ccf4475 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -278,7 +278,6 @@ To show only observed values: For pivoting operations, this behavior is *already* controlled by the ``dropna`` keyword: .. ipython:: python - :okwarning: cat1 = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True) @@ -287,13 +286,33 @@ For pivoting operations, this behavior is *already* controlled by the ``dropna`` df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) df -.. ipython:: python - :okwarning: - pd.pivot_table(df, values='values', index=['A', 'B'], - dropna=True) - pd.pivot_table(df, values='values', index=['A', 'B'], - dropna=False) +.. code-block:: ipython + + In [1]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=True) + + Out[1]: + values + A B + a c 1.0 + d 2.0 + b c 3.0 + d 4.0 + + In [2]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=False) + + Out[2]: + values + A B + a c 1.0 + d 2.0 + y NaN + b c 3.0 + d 4.0 + y NaN + z c NaN + d NaN + y NaN .. _whatsnew_0230.enhancements.window_raw: From a95d6ce13d1696bd68adeebe85f8f49b18fdbbcf Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 2 Dec 2023 15:47:26 -0500 Subject: [PATCH 5/5] Kickoff builds