From dc17dec0ff4618f488dad9351662d77b122a96e9 Mon Sep 17 00:00:00 2001 From: Patrick O'Melveny Date: Tue, 23 May 2017 09:04:56 -0700 Subject: [PATCH 1/3] BUG: Allow non-callable attributes in aggregate function. Fixes GH16405 --- doc/source/whatsnew/v0.20.2.txt | 1 + pandas/core/base.py | 9 +++++-- pandas/tests/frame/test_apply.py | 43 ++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 1f71710d19e44..d61c09b48bca1 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -89,6 +89,7 @@ Reshaping - Bug in ``pd.wide_to_long()`` where no error was raised when ``i`` was not a unique identifier (:issue:`16382`) - Bug in ``Series.isin(..)`` with a list of tuples (:issue:`16394`) - Bug in construction of a ``DataFrame`` with mixed dtypes including an all-NaT column. (:issue:`16395`) +- Bug in ``DataFrame.agg`` and ``Series.agg`` with aggregating on non-callable attributes (:issue:`16404`) Numeric diff --git a/pandas/core/base.py b/pandas/core/base.py index a3ef24c80f883..fb4fd9b362360 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -378,7 +378,7 @@ def aggregate(self, func, *args, **kwargs): def _try_aggregate_string_function(self, arg, *args, **kwargs): """ if arg is a string, then try to operate on it: - - try to find a function on ourselves + - try to find an attribute on ourselves - try to find a numpy function - raise @@ -387,7 +387,12 @@ def _try_aggregate_string_function(self, arg, *args, **kwargs): f = getattr(self, arg, None) if f is not None: - return f(*args, **kwargs) + if callable(f): + return f(*args, **kwargs) + # people may try to aggregate on a non-callable attribute + # but don't let them think they can pass args to it + assert len(args) == 0 and len(kwargs) == 0 + return f f = getattr(np, arg, None) if f is not None: diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index aa7c7a7120c1b..8fde5b8b6acd4 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -635,3 +635,46 @@ def test_nuiscance_columns(self): expected = DataFrame([[6, 6., 'foobarbaz']], index=['sum'], columns=['A', 'B', 'C']) assert_frame_equal(result, expected) + + def test_non_callable_aggregates(self): + + # GH 16405 + df = DataFrame({'A': [None, 2, 3], + 'B': [1.0, np.nan, 3.0], + 'C': ['foo', None, 'bar']}) + + # Function aggregate + result = df.agg({'A': 'count'}) + expected = pd.Series({'A': 2}) + + assert_series_equal(result, expected) + + # Non-function aggregate + result = df.agg({'A': 'size'}) + expected = pd.Series({'A': 3}) + + assert_series_equal(result, expected) + + # Mix function and non-function aggs + result1 = df.agg(['count', 'size']) + result2 = df.agg({'A': ['count', 'size'], + 'B': ['count', 'size'], + 'C': ['count', 'size']}) + expected = pd.DataFrame({'A': {'count': 2, 'size': 3}, + 'B': {'count': 2, 'size': 3}, + 'C': {'count': 2, 'size': 3}}) + + assert_frame_equal(result1.reindex_like(expected), + result2.reindex_like(expected)) + assert_frame_equal(result2.reindex_like(expected), expected) + + # Just functional string arg is same as calling df.arg() + result = df.agg('count') + expected = df.count() + + assert_series_equal(result, expected) + + # Trying to to the same w/ non-function tries to pass args + # which we explicitly forbid + with pytest.raises(AssertionError): + result = df.agg('size') From a117022f15744abaf0a38f9f711ce922ab60f3ee Mon Sep 17 00:00:00 2001 From: Patrick O'Melveny Date: Wed, 24 May 2017 12:30:29 -0700 Subject: [PATCH 2/3] Address changes requested in PR --- doc/source/whatsnew/v0.20.2.txt | 2 +- pandas/core/base.py | 7 +++++-- pandas/tests/frame/test_apply.py | 14 +++++++------- pandas/tests/series/test_apply.py | 16 ++++++++++++++++ 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index d61c09b48bca1..60978c4f7da9c 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -89,7 +89,7 @@ Reshaping - Bug in ``pd.wide_to_long()`` where no error was raised when ``i`` was not a unique identifier (:issue:`16382`) - Bug in ``Series.isin(..)`` with a list of tuples (:issue:`16394`) - Bug in construction of a ``DataFrame`` with mixed dtypes including an all-NaT column. (:issue:`16395`) -- Bug in ``DataFrame.agg`` and ``Series.agg`` with aggregating on non-callable attributes (:issue:`16404`) +- Bug in ``DataFrame.agg`` and ``Series.agg`` with aggregating on non-callable attributes (:issue:`16405`) Numeric diff --git a/pandas/core/base.py b/pandas/core/base.py index fb4fd9b362360..97c4c8626dcbb 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -378,7 +378,7 @@ def aggregate(self, func, *args, **kwargs): def _try_aggregate_string_function(self, arg, *args, **kwargs): """ if arg is a string, then try to operate on it: - - try to find an attribute on ourselves + - try to find a function (or attribute) on ourselves - try to find a numpy function - raise @@ -389,9 +389,12 @@ def _try_aggregate_string_function(self, arg, *args, **kwargs): if f is not None: if callable(f): return f(*args, **kwargs) + # people may try to aggregate on a non-callable attribute # but don't let them think they can pass args to it - assert len(args) == 0 and len(kwargs) == 0 + assert len(args) == 0 + assert len([kwarg for kwarg in kwargs + if kwarg not in ['axis', '_level']]) == 0 return f f = getattr(np, arg, None) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 8fde5b8b6acd4..7694c23e354cf 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -664,9 +664,8 @@ def test_non_callable_aggregates(self): 'B': {'count': 2, 'size': 3}, 'C': {'count': 2, 'size': 3}}) - assert_frame_equal(result1.reindex_like(expected), - result2.reindex_like(expected)) - assert_frame_equal(result2.reindex_like(expected), expected) + assert_frame_equal(result1, result2, check_like=True) + assert_frame_equal(result2, expected, check_like=True) # Just functional string arg is same as calling df.arg() result = df.agg('count') @@ -674,7 +673,8 @@ def test_non_callable_aggregates(self): assert_series_equal(result, expected) - # Trying to to the same w/ non-function tries to pass args - # which we explicitly forbid - with pytest.raises(AssertionError): - result = df.agg('size') + # Just a string attribute arg same as calling df.arg + result = df.agg('size') + expected = df.size + + assert result == expected diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index c273d3161fff5..2c5f0d7772cc2 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -306,6 +306,22 @@ def test_reduce(self): name=self.series.name) assert_series_equal(result, expected) + def test_non_callable_aggregates(self): + # test agg using non-callable series attributes + s = Series([1, 2, None]) + + # Calling agg w/ just a string arg same as calling s.arg + result = s.agg('size') + expected = s.size + assert result == expected + + # test when mixed w/ callable reducers + result = s.agg(['size', 'count', 'mean']) + expected = Series(OrderedDict({'size': 3.0, + 'count': 2.0, + 'mean': 1.5})) + assert_series_equal(result[expected.index], expected) + class TestSeriesMap(TestData): From 0a367c6ba13ffc71021fd56c925a64f9a57df17f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 1 Jun 2017 06:34:19 -0400 Subject: [PATCH 3/3] minor doc corrections --- doc/source/whatsnew/v0.20.2.txt | 2 +- pandas/tests/frame/test_apply.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 60978c4f7da9c..ecbfee862353a 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -89,7 +89,7 @@ Reshaping - Bug in ``pd.wide_to_long()`` where no error was raised when ``i`` was not a unique identifier (:issue:`16382`) - Bug in ``Series.isin(..)`` with a list of tuples (:issue:`16394`) - Bug in construction of a ``DataFrame`` with mixed dtypes including an all-NaT column. (:issue:`16395`) -- Bug in ``DataFrame.agg`` and ``Series.agg`` with aggregating on non-callable attributes (:issue:`16405`) +- Bug in ``DataFrame.agg()`` and ``Series.agg()`` with aggregating on non-callable attributes (:issue:`16405`) Numeric diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 7694c23e354cf..a6f39cabb60ed 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -639,6 +639,8 @@ def test_nuiscance_columns(self): def test_non_callable_aggregates(self): # GH 16405 + # 'size' is a property of frame/series + # validate that this is working df = DataFrame({'A': [None, 2, 3], 'B': [1.0, np.nan, 3.0], 'C': ['foo', None, 'bar']})