pandas-dev · rhshadrach · Aug 2, 2023 · Jul 26, 2023 · Jul 26, 2023 · Jul 26, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -640,6 +640,7 @@ Other
 - Bug in :func:`api.interchange.from_dataframe` when converting an empty DataFrame object (:issue:`53155`)
 - Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`)
 - Bug in :func:`assert_frame_equal` checks category dtypes even when asked not to check index type (:issue:`52126`)
+- Bug in :meth:`DataFrame.pivot_table` with casting the mean of ints back to an int (:issue:`16676`)
 - Bug in :meth:`DataFrame.reindex` with a ``fill_value`` that should be inferred with a :class:`ExtensionDtype` incorrectly inferring ``object`` dtype (:issue:`52586`)
 - Bug in :meth:`DataFrame.shift` and :meth:`Series.shift` and :meth:`DataFrameGroupBy.shift` when passing both "freq" and "fill_value" silently ignoring "fill_value" instead of raising ``ValueError`` (:issue:`53832`)
 - Bug in :meth:`DataFrame.shift` with ``axis=1`` on a :class:`DataFrame` with a single :class:`ExtensionDtype` column giving incorrect results (:issue:`53832`)
@@ -650,7 +651,6 @@ Other
 - Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`)
 - Bug in :meth:`period_range` the default behavior when freq was not passed as an argument was incorrect(:issue:`53687`)
 - Fixed incorrect ``__name__`` attribute of ``pandas._libs.json`` (:issue:`52898`)
--
 
 .. ***DO NOT USE THIS SECTION***
 

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -20,7 +20,6 @@
 
 from pandas.core.dtypes.cast import maybe_downcast_to_dtype
 from pandas.core.dtypes.common import (
-    is_integer_dtype,
     is_list_like,
     is_nested_list_like,
     is_scalar,
@@ -172,28 +171,6 @@ def __internal_pivot_table(
     if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
         agged = agged.dropna(how="all")
 
-        # gh-21133
-        # we want to down cast if
-        # the original values are ints
-        # as we grouped with a NaN value
-        # and then dropped, coercing to floats
-        for v in values:
-            if (
-                v in data
-                and is_integer_dtype(data[v])
-                and v in agged
-                and not is_integer_dtype(agged[v])
-            ):
-                if not isinstance(agged[v], ABCDataFrame) and isinstance(
-                    data[v].dtype, np.dtype
-                ):
-                    # exclude DataFrame case bc maybe_downcast_to_dtype expects
-                    #  ArrayLike
-                    # e.g. test_pivot_table_multiindex_columns_doctest_case
-                    #  agged.columns is a MultiIndex and 'v' is indexing only
-                    #  on its first level.
-                    agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype)
-
     table = agged
 
     # GH17038, this check should only happen if index is defined (not None)

diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py
@@ -185,10 +185,7 @@ def test_drop_multiindex_not_lexsorted(self):
         not_lexsorted_df = not_lexsorted_df.reset_index()
         assert not not_lexsorted_df.columns._is_lexsorted()
 
-        # compare the results
-        tm.assert_frame_equal(lexsorted_df, not_lexsorted_df)
-
-        expected = lexsorted_df.drop("a", axis=1)
+        expected = lexsorted_df.drop("a", axis=1).astype(float)
         with tm.assert_produces_warning(PerformanceWarning):
             result = not_lexsorted_df.drop("a", axis=1)
 

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -1789,9 +1789,6 @@ def test_groupby_multiindex_not_lexsorted():
     not_lexsorted_df = not_lexsorted_df.reset_index()
     assert not not_lexsorted_df.columns._is_lexsorted()
 
-    # compare the results
-    tm.assert_frame_equal(lexsorted_df, not_lexsorted_df)
-
     expected = lexsorted_df.groupby("a").mean()
     with tm.assert_produces_warning(PerformanceWarning):
         result = not_lexsorted_df.groupby("a").mean()

diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -204,7 +204,7 @@ def test_pivot_table_categorical(self):
         result = pivot_table(df, values="values", index=["A", "B"], dropna=True)
 
         exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
-        expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index)
+        expected = DataFrame({"values": [1.0, 2.0, 3.0, 4.0]}, index=exp_index)
         tm.assert_frame_equal(result, expected)
 
     def test_pivot_table_dropna_categoricals(self, dropna):
@@ -225,7 +225,7 @@ def test_pivot_table_dropna_categoricals(self, dropna):
         expected_columns = expected_columns.astype(CDT(categories, ordered=False))
         expected_index = Series([1, 2, 3], name="B")
         expected = DataFrame(
-            [[0, 3, 6], [1, 4, 7], [2, 5, 8]],
+            [[0.0, 3.0, 6.0], [1.0, 4.0, 7.0], [2.0, 5.0, 8.0]],
             index=expected_index,
             columns=expected_columns,
         )
@@ -283,7 +283,7 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
 
         result = df.pivot_table(index="A", values="B", dropna=dropna)
         expected = DataFrame(
-            {"B": [2, 3, 0]},
+            {"B": [2.0, 3.0, 0.0]},
             index=Index(
                 Categorical.from_codes(
                     [0, 1, 2], categories=["low", "high", "left"], ordered=True
@@ -300,7 +300,9 @@ def test_pivot_with_interval_index(self, interval_values, dropna):
         # GH 25814
         df = DataFrame({"A": interval_values, "B": 1})
         result = df.pivot_table(index="A", values="B", dropna=dropna)
-        expected = DataFrame({"B": 1}, index=Index(interval_values.unique(), name="A"))
+        expected = DataFrame(
+            {"B": 1.0}, index=Index(interval_values.unique(), name="A")
+        )
         if not dropna:
             expected = expected.astype(float)
         tm.assert_frame_equal(result, expected)
@@ -444,7 +446,7 @@ def test_pivot_no_values(self):
             index=Grouper(freq="A"), columns=Grouper(key="dt", freq="M")
         )
         exp = DataFrame(
-            [3], index=pd.DatetimeIndex(["2011-12-31"], freq="A"), columns=exp_columns
+            [3.0], index=pd.DatetimeIndex(["2011-12-31"], freq="A"), columns=exp_columns
         )
         tm.assert_frame_equal(res, exp)
 
@@ -1059,7 +1061,7 @@ def test_pivot_table_multiindex_only(self, cols):
 
         result = df2.pivot_table(values="v", columns=cols)
         expected = DataFrame(
-            [[4, 5, 6]],
+            [[4.0, 5.0, 6.0]],
             columns=MultiIndex.from_tuples([(1, 1), (2, 2), (3, 3)], names=cols),
             index=Index(["v"]),
         )
@@ -1558,7 +1560,9 @@ def test_pivot_datetime_tz(self):
         exp_col1 = Index(["value1", "value1"])
         exp_col2 = Index(["a", "b"], name="label")
         exp_col = MultiIndex.from_arrays([exp_col1, exp_col2])
-        expected = DataFrame([[0, 3], [1, 4], [2, 5]], index=exp_idx, columns=exp_col)
+        expected = DataFrame(
+            [[0.0, 3.0], [1.0, 4.0], [2.0, 5.0]], index=exp_idx, columns=exp_col
+        )
         result = pivot_table(df, index=["dt1"], columns=["label"], values=["value1"])
         tm.assert_frame_equal(result, expected)
 
@@ -1570,18 +1574,35 @@ def test_pivot_datetime_tz(self):
             name="dt2",
         )
         exp_col = MultiIndex.from_arrays([exp_col1, exp_col2, exp_col3])
-        expected = DataFrame(
+        expected1 = DataFrame(
             np.array(
                 [
-                    [0, 3, 1, 2, 0, 3, 1, 2],
-                    [1, 4, 2, 1, 1, 4, 2, 1],
-                    [2, 5, 1, 2, 2, 5, 1, 2],
+                    [
+                        0,
+                        3,
+                        1,
+                        2,
+                    ],
+                    [1, 4, 2, 1],
+                    [2, 5, 1, 2],
                 ],
                 dtype="int64",
             ),
             index=exp_idx,
-            columns=exp_col,
+            columns=exp_col[:4],
+        )
+        expected2 = DataFrame(
+            np.array(
+                [
+                    [0.0, 3.0, 1.0, 2.0],
+                    [1.0, 4.0, 2.0, 1.0],
+                    [2.0, 5.0, 1.0, 2.0],
+                ],
+            ),
+            index=exp_idx,
+            columns=exp_col[4:],
         )
+        expected = concat([expected1, expected2], axis=1)
 
         result = pivot_table(
             df,
@@ -1628,7 +1649,7 @@ def test_pivot_dtaccessor(self):
 
         exp_idx = Index(["a", "b"], name="label")
         expected = DataFrame(
-            {7: [0, 3], 8: [1, 4], 9: [2, 5]},
+            {7: [0.0, 3.0], 8: [1.0, 4.0], 9: [2.0, 5.0]},
             index=exp_idx,
             columns=Index([7, 8, 9], dtype=np.int32, name="dt1"),
         )
@@ -1639,7 +1660,7 @@ def test_pivot_dtaccessor(self):
         )
 
         expected = DataFrame(
-            {7: [0, 3], 8: [1, 4], 9: [2, 5]},
+            {7: [0.0, 3.0], 8: [1.0, 4.0], 9: [2.0, 5.0]},
             index=Index([1, 2], dtype=np.int32, name="dt2"),
             columns=Index([7, 8, 9], dtype=np.int32, name="dt1"),
         )
@@ -1660,7 +1681,7 @@ def test_pivot_dtaccessor(self):
             names=["dt1", "dt2"],
         )
         expected = DataFrame(
-            np.array([[0, 3, 1, 4, 2, 5]], dtype="int64"),
+            np.array([[0.0, 3.0, 1.0, 4.0, 2.0, 5.0]]),
             index=Index([2013], dtype=np.int32),
             columns=exp_col,
         )
@@ -1764,13 +1785,7 @@ def test_pivot_table_margins_name_with_aggfunc_list(self):
         expected = DataFrame(table.values, index=ix, columns=cols)
         tm.assert_frame_equal(table, expected)
 
-    def test_categorical_margins(self, observed, request):
-        if observed:
-            request.node.add_marker(
-                pytest.mark.xfail(
-                    reason="GH#17035 (np.mean of ints is casted back to ints)"
-                )
-            )
+    def test_categorical_margins(self, observed):
         # GH 10989
         df = DataFrame(
             {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2}
@@ -1783,13 +1798,7 @@ def test_categorical_margins(self, observed, request):
         table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
         tm.assert_frame_equal(table, expected)
 
-    def test_categorical_margins_category(self, observed, request):
-        if observed:
-            request.node.add_marker(
-                pytest.mark.xfail(
-                    reason="GH#17035 (np.mean of ints is casted back to ints)"
-                )
-            )
+    def test_categorical_margins_category(self, observed):
         df = DataFrame(
             {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2}
         )
@@ -1816,7 +1825,7 @@ def test_margins_casted_to_float(self):
 
         result = pivot_table(df, index="D", margins=True)
         expected = DataFrame(
-            {"A": [3, 7, 5], "B": [2.5, 6.5, 4.5], "C": [2, 5, 3.5]},
+            {"A": [3.0, 7.0, 5], "B": [2.5, 6.5, 4.5], "C": [2.0, 5.0, 3.5]},
             index=Index(["X", "Y", "All"], name="D"),
         )
         tm.assert_frame_equal(result, expected)
@@ -2249,7 +2258,7 @@ def test_pivot_table_sort_false_with_multiple_values(self):
             index=["lastname", "firstname"], values=["height", "age"], sort=False
         )
         expected = DataFrame(
-            [[173, 47], [182, 33]],
+            [[173.0, 47.0], [182.0, 33.0]],
             columns=["height", "age"],
             index=MultiIndex.from_tuples(
                 [("Foo", "John"), ("Bar", "Michael")],