Skip to content

BUG: Support dictionaries in SeriesGroupBy.agg #50687

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1022,6 +1022,7 @@ Groupby/resample/rolling
- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"idxmin"`` and ``"idxmax"`` arguments (:issue:`45986`)
- Bug in :class:`.DataFrameGroupBy` would raise when used with an empty DataFrame, categorical grouper, and ``dropna=False`` (:issue:`50634`)
- Bug in :meth:`.SeriesGroupBy.value_counts` did not respect ``sort=False`` (:issue:`50482`)
- Bug in :meth:`.SeriesGroupBy.agg` would incorrectly raise on any dictionary; nested renamers (a dictionary containing dictionary values) is still not supported (:issue:`50684`)
-

Reshaping
Expand Down
19 changes: 8 additions & 11 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,18 +292,15 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
agg = aggregate

def _aggregate_multiple_funcs(self, arg) -> DataFrame:
if isinstance(arg, dict):

# show the deprecation, but only if we
# have not shown a higher level one
# GH 15931
raise SpecificationError("nested renamer is not supported")

if any(isinstance(x, (tuple, list)) for x in arg):
if is_dict_like(arg):
# GH#50684
# Series is dict-like but doesn't have a `.values()`; use `.items()`
if any(is_dict_like(value) for _, value in arg.items()):
raise SpecificationError("nested renamer is not supported")
arg = list(arg.items())

elif any(isinstance(x, (tuple, list)) for x in arg):
arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg]

# indicated column order
columns = next(zip(*arg))
else:
# list of functions / function names
columns = []
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,9 +434,12 @@ def numpystd(x):

# this uses column selection & renaming
msg = r"nested renamer is not supported"
d = {"C": np.mean, "D": {"foo": np.mean, "bar": np.std}}
with pytest.raises(SpecificationError, match=msg):
d = {"C": np.mean, "D": {"foo": np.mean, "bar": np.std}}
grouped.aggregate(d)
with pytest.raises(SpecificationError, match=msg):
# GH#50684
grouped["B"].aggregate(d)

# But without renaming, these functions are OK
d = {"C": [np.mean], "D": [numpymean, numpystd]}
Expand Down
34 changes: 22 additions & 12 deletions pandas/tests/groupby/aggregate/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,10 @@ def test_agg_dict_renaming_deprecation():
with pytest.raises(KeyError, match=msg):
df.groupby("A")[["B", "C"]].agg({"ma": "max"})

msg = r"nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
df.groupby("A").B.agg({"foo": "count"})
# GH#50684
result = df.groupby("A").B.agg({"foo": "count"})
expected = DataFrame({"foo": df.groupby("A").B.count()})
tm.assert_frame_equal(result, expected)


def test_agg_compat():
Expand All @@ -246,12 +247,17 @@ def test_agg_compat():

g = df.groupby(["A", "B"])

msg = r"nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
g["D"].agg({"C": ["sum", "std"]})
result = g["D"].agg({"C": ["sum", "std"]})
expected = g["D"].agg(["sum", "std"])
expected.columns = MultiIndex(
levels=[["C"], ["sum", "std"]],
codes=[[0, 0], [0, 1]],
)
tm.assert_frame_equal(result, expected)

with pytest.raises(SpecificationError, match=msg):
g["D"].agg({"C": "sum", "D": "std"})
result = g["D"].agg({"C": "sum", "D": "std"})
expected = g["D"].agg(["sum", "std"]).rename(columns={"sum": "C", "std": "D"})
tm.assert_frame_equal(result, expected)


def test_agg_nested_dicts():
Expand All @@ -276,11 +282,15 @@ def test_agg_nested_dicts():

# same name as the original column
# GH9052
with pytest.raises(SpecificationError, match=msg):
g["D"].agg({"result1": np.sum, "result2": np.mean})
result = g["D"].agg({"result1": np.sum, "result2": np.mean})
expected = g["D"].agg(["sum", "mean"])
expected.columns = ["result1", "result2"]
tm.assert_frame_equal(result, expected)

with pytest.raises(SpecificationError, match=msg):
g["D"].agg({"D": np.sum, "result2": np.mean})
result = g["D"].agg({"D": np.sum, "result2": np.mean})
expected = g["D"].agg(["sum", "mean"])
expected.columns = ["D", "result2"]
tm.assert_frame_equal(result, expected)


def test_agg_item_by_item_raise_typeerror():
Expand Down
31 changes: 18 additions & 13 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
import pytest

from pandas.compat import IS64
from pandas.errors import (
PerformanceWarning,
SpecificationError,
)
from pandas.errors import PerformanceWarning

import pandas as pd
from pandas import (
Expand Down Expand Up @@ -71,9 +68,10 @@ def test_basic(dtype):
# complex agg
agged = grouped.aggregate([np.mean, np.std])

msg = r"nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
grouped.aggregate({"one": np.mean, "two": np.std})
# GH#50684
result = grouped.aggregate({"one": np.mean, "two": np.std})
expected = agged.rename(columns={"mean": "one", "std": "two"})
tm.assert_frame_equal(result, expected)

group_constants = {0: 10, 1: 20, 2: 30}
agged = grouped.agg(lambda x: group_constants[x.name] + x.mean())
Expand Down Expand Up @@ -452,9 +450,15 @@ def test_frame_set_name_single(df):
result = grouped["C"].agg([np.mean, np.std])
assert result.index.name == "A"

msg = r"nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
grouped["C"].agg({"foo": np.mean, "bar": np.std})
# GH#50684
result = grouped["C"].agg({"foo": np.mean, "bar": np.std})
expected = DataFrame(
{
"foo": grouped["C"].mean(),
"bar": grouped["C"].std(),
}
)
tm.assert_frame_equal(result, expected)


def test_multi_func(df):
Expand Down Expand Up @@ -677,9 +681,10 @@ def test_groupby_as_index_agg(df):

grouped = df.groupby("A", as_index=True)

msg = r"nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
grouped["C"].agg({"Q": np.sum})
# GH#50684
result = grouped["C"].agg({"Q": np.sum})
expected = DataFrame({"Q": grouped["C"].sum()})
tm.assert_frame_equal(result, expected)

# multi-key

Expand Down
22 changes: 15 additions & 7 deletions pandas/tests/resample/test_resample_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,15 +550,23 @@ def test_agg_misc():
result = t[["A", "B"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
tm.assert_frame_equal(result, expected, check_like=True)

msg = "nested renamer is not supported"

# series like aggs
# series like aggs - GH#50684
for t in cases:
with pytest.raises(pd.errors.SpecificationError, match=msg):
t["A"].agg({"A": ["sum", "std"]})
result = t["A"].agg({"A": ["sum", "std"]})
expected = t["A"].agg(["sum", "std"])
expected.columns = pd.MultiIndex(
levels=[["A"], ["sum", "std"]],
codes=[[0, 0], [0, 1]],
)
tm.assert_frame_equal(result, expected)

with pytest.raises(pd.errors.SpecificationError, match=msg):
t["A"].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
result = t["A"].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
expected = t["A"].agg(["sum", "std", "mean", "std"])
expected.columns = pd.MultiIndex(
levels=[["A", "B"], ["sum", "std", "mean"]],
codes=[[0, 0, 1, 1], [0, 1, 2, 1]],
)
tm.assert_frame_equal(result, expected)

# errors
# invalid names in the agg specification
Expand Down