From f44e671d5b2ddfcc52711d4efeb36c0c18091a94 Mon Sep 17 00:00:00 2001 From: Ben Date: Sun, 20 Jan 2019 16:02:59 -0800 Subject: [PATCH 01/12] Add truncatable repr for DF groupby groups --- pandas/core/frame.py | 2 +- pandas/core/groupby/groupby.py | 11 +++++++-- pandas/io/formats/printing.py | 34 +++++++++++++++++--------- pandas/tests/io/formats/test_format.py | 16 ++++++++++++ 4 files changed, 49 insertions(+), 14 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b4f79bda25517..6d9addb4b230c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -553,7 +553,7 @@ def _repr_fits_horizontal_(self, ignore_width=False): Check if full repr fits in horizontal boundaries imposed by the display options width and max_columns. - In case off non-interactive session, no boundaries apply. + In case of non-interactive session, no boundaries apply. `ignore_width` is here so ipnb+HTML output can behave the way users expect. display.max_columns remains in effect. diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 8766fdbc29755..25adba592374d 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -33,7 +33,7 @@ class providing the base-class of operations. from pandas.core.base import ( DataError, GroupByError, PandasObject, SelectionMixin, SpecificationError) import pandas.core.common as com -from pandas.core.config import option_context +from pandas.core.config import get_option, option_context from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame from pandas.core.groupby import base @@ -387,7 +387,7 @@ def groups(self): Dict {group name -> group labels}. """ self._assure_grouper() - return self.grouper.groups + return DataFrameGroups(self.grouper.groups) @property def ngroups(self): @@ -2108,3 +2108,10 @@ def groupby(obj, by, **kwds): raise TypeError('invalid type: {}'.format(obj)) return klass(obj, by, **kwds) + + +class DataFrameGroups(dict): + def __repr__(self): + from pandas.io.formats.printing import _pprint_dict + return _pprint_dict(self, max_seq_items=get_option('display.max_rows'), + recurse=False, truncate_at='middle') diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 6d45d1e5dfcee..3f9503ee00979 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -95,7 +95,7 @@ def _join_unicode(lines, sep=''): def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): """ internal. pprinter for iterables. you should probably use pprint_thing() - rather then calling this directly. + rather than calling this directly. bounds length of printed sequence, depending on options """ @@ -124,11 +124,13 @@ def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): return fmt.format(body=body) -def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): +def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, recurse=True, + truncate_at='end', **kwds): """ internal. pprinter for iterables. you should probably use pprint_thing() - rather then calling this directly. + rather than calling this directly. """ + fmt = u("{{{things}}}") pairs = [] @@ -139,16 +141,26 @@ def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): else: nitems = max_seq_items or get_option("max_seq_items") or len(seq) - for k, v in list(seq.items())[:nitems]: - pairs.append( - pfmt.format( - key=pprint_thing(k, _nest_lvl + 1, - max_seq_items=max_seq_items, **kwds), - val=pprint_thing(v, _nest_lvl + 1, - max_seq_items=max_seq_items, **kwds))) + if recurse: + for k, v in list(seq.items())[:nitems]: + pairs.append( + pfmt.format( + key=pprint_thing(k, _nest_lvl + 1, + max_seq_items=max_seq_items, **kwds), + val=pprint_thing(v, _nest_lvl + 1, + max_seq_items=max_seq_items, **kwds))) + else: + for k, v in list(seq.items())[:nitems]: + pairs.append(pfmt.format(key=k, val=v)) if nitems < len(seq): - return fmt.format(things=", ".join(pairs) + ", ...") + if truncate_at == 'middle': + start_cnt, end_cnt = nitems - int(nitems / 2), int(nitems / 2) + return fmt.format(things=", ".join(pairs[:start_cnt]) + + ", ... , " + + ", ".join(pairs[end_cnt:])) + else: + return fmt.format(things=", ".join(pairs) + ", ...") else: return fmt.format(things=", ".join(pairs)) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 52dce572c6d4f..cb608cf753bd9 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1748,6 +1748,22 @@ def test_period(self): assert str(df) == exp +class TestDataFrameGroupByFormatting(object): + def test_groups_repr_truncates(self): + df = pd.DataFrame({ + 'a': [1, 1, 1, 2, 2, 3], + 'b': [1, 2, 3, 4, 5, 6] + }) + + with option_context('display.max_rows', 2): + x = df.groupby('a').groups + + expected = ("{1: Int64Index([0, 1, 2], dtype='int64'), ... , " + "2: Int64Index([3, 4], dtype='int64')}") + + assert x.__repr__() == expected + + def gen_series_formatting(): s1 = pd.Series(['a'] * 100) s2 = pd.Series(['ab'] * 100) From d6b310acd547ec8d861db8dad4d3ea2c5ce34e67 Mon Sep 17 00:00:00 2001 From: Ben Date: Sun, 20 Jan 2019 18:18:40 -0800 Subject: [PATCH 02/12] Roll back added params to __pprint_dict. All logic now in __repr__ def. Make tests more general --- pandas/core/groupby/groupby.py | 22 ++++++++++++++++--- pandas/io/formats/printing.py | 29 ++++++++------------------ pandas/tests/io/formats/test_format.py | 8 +++---- 3 files changed, 32 insertions(+), 27 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 25adba592374d..9b294aaee167f 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2112,6 +2112,22 @@ def groupby(obj, by, **kwds): class DataFrameGroups(dict): def __repr__(self): - from pandas.io.formats.printing import _pprint_dict - return _pprint_dict(self, max_seq_items=get_option('display.max_rows'), - recurse=False, truncate_at='middle') + from pandas.compat import u + + # nitems = len(self) + nitems = get_option('display.max_rows') or len(self) + + fmt = u("{{{things}}}") + pfmt = u("{key}: {val}") + + pairs = [] + for k, v in list(self.items()): + pairs.append(pfmt.format(key=k, val=v)) + + if nitems < len(self): + start_cnt, end_cnt = nitems - int(nitems / 2), int(nitems / 2) + return fmt.format(things=", ".join(pairs[:start_cnt]) + + ", ... , " + + ", ".join(pairs[-end_cnt:])) + else: + return fmt.format(things=", ".join(pairs)) diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 3f9503ee00979..6cb48da7beaf0 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -124,8 +124,7 @@ def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): return fmt.format(body=body) -def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, recurse=True, - truncate_at='end', **kwds): +def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): """ internal. pprinter for iterables. you should probably use pprint_thing() rather than calling this directly. @@ -141,26 +140,16 @@ def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, recurse=True, else: nitems = max_seq_items or get_option("max_seq_items") or len(seq) - if recurse: - for k, v in list(seq.items())[:nitems]: - pairs.append( - pfmt.format( - key=pprint_thing(k, _nest_lvl + 1, - max_seq_items=max_seq_items, **kwds), - val=pprint_thing(v, _nest_lvl + 1, - max_seq_items=max_seq_items, **kwds))) - else: - for k, v in list(seq.items())[:nitems]: - pairs.append(pfmt.format(key=k, val=v)) + for k, v in list(seq.items())[:nitems]: + pairs.append( + pfmt.format( + key=pprint_thing(k, _nest_lvl + 1, + max_seq_items=max_seq_items, **kwds), + val=pprint_thing(v, _nest_lvl + 1, + max_seq_items=max_seq_items, **kwds))) if nitems < len(seq): - if truncate_at == 'middle': - start_cnt, end_cnt = nitems - int(nitems / 2), int(nitems / 2) - return fmt.format(things=", ".join(pairs[:start_cnt]) + - ", ... , " + - ", ".join(pairs[end_cnt:])) - else: - return fmt.format(things=", ".join(pairs) + ", ...") + return fmt.format(things=", ".join(pairs) + ", ...") else: return fmt.format(things=", ".join(pairs)) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index cb608cf753bd9..0b0f4c977682d 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1757,11 +1757,11 @@ def test_groups_repr_truncates(self): with option_context('display.max_rows', 2): x = df.groupby('a').groups + assert ', ... ,' in x.__repr__() - expected = ("{1: Int64Index([0, 1, 2], dtype='int64'), ... , " - "2: Int64Index([3, 4], dtype='int64')}") - - assert x.__repr__() == expected + with option_context('display.max_rows', 5): + x = df.groupby('a').groups + assert ', ... ,' not in x.__repr__() def gen_series_formatting(): From 43dbc6bcbced2f84cefdbe2a95f2686c03ad8c73 Mon Sep 17 00:00:00 2001 From: Ben Date: Sun, 20 Jan 2019 18:21:40 -0800 Subject: [PATCH 03/12] Remove unused line of code --- pandas/core/groupby/groupby.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 9b294aaee167f..fe5142bf726b2 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2114,7 +2114,6 @@ class DataFrameGroups(dict): def __repr__(self): from pandas.compat import u - # nitems = len(self) nitems = get_option('display.max_rows') or len(self) fmt = u("{{{things}}}") From 0746c3bde3c7604ebce4f1e0f11ad772d44623d0 Mon Sep 17 00:00:00 2001 From: Ben Date: Tue, 5 Feb 2019 21:31:13 -0800 Subject: [PATCH 04/12] Temporarily disabling failing test --- pandas/tests/groupby/test_grouping.py | 30 +++++++++++++-------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 44b5bd5f13992..de81ce5655518 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -667,21 +667,21 @@ def test_gb_key_len_equal_axis_len(self): class TestIteration(): - def test_groups(self, df): - grouped = df.groupby(['A']) - groups = grouped.groups - assert groups is grouped.groups # caching works - - for k, v in compat.iteritems(grouped.groups): - assert (df.loc[v]['A'] == k).all() - - grouped = df.groupby(['A', 'B']) - groups = grouped.groups - assert groups is grouped.groups # caching works - - for k, v in compat.iteritems(grouped.groups): - assert (df.loc[v]['A'] == k[0]).all() - assert (df.loc[v]['B'] == k[1]).all() + # def test_groups(self, df): + # grouped = df.groupby(['A']) + # groups = grouped.groups + # assert groups is grouped.groups # caching works + # + # for k, v in compat.iteritems(grouped.groups): + # assert (df.loc[v]['A'] == k).all() + # + # grouped = df.groupby(['A', 'B']) + # groups = grouped.groups + # assert groups is grouped.groups # caching works + # + # for k, v in compat.iteritems(grouped.groups): + # assert (df.loc[v]['A'] == k[0]).all() + # assert (df.loc[v]['B'] == k[1]).all() def test_grouping_is_iterable(self, tsframe): # this code path isn't used anywhere else From 33142cbe0900ff853016dc9ff8d062d8604b9396 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 6 Mar 2019 07:45:01 -0800 Subject: [PATCH 05/12] Move truncated dict repr to Index.groupby() --- pandas/core/groupby/base.py | 1 - pandas/core/groupby/groupby.py | 24 +-------------------- pandas/core/groupby/grouper.py | 1 - pandas/core/groupby/ops.py | 5 +++-- pandas/core/indexes/base.py | 26 ++++++++++++++++++++++- pandas/tests/groupby/test_grouping.py | 30 +++++++++++++-------------- 6 files changed, 44 insertions(+), 43 deletions(-) diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index ebba4a0a9395d..b2ee1e4b09f2d 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -10,7 +10,6 @@ from pandas.core.dtypes.common import is_list_like, is_scalar - class GroupByMixin(object): """ Provide the groupby facilities to the mixed object. diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 22c42dc1c7a77..b0361f8a5d2ce 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -390,7 +390,7 @@ def groups(self): Dict {group name -> group labels}. """ self._assure_grouper() - return DataFrameGroups(self.grouper.groups) + return self.grouper.groups @property def ngroups(self): @@ -2204,25 +2204,3 @@ def groupby(obj, by, **kwds): raise TypeError('invalid type: {}'.format(obj)) return klass(obj, by, **kwds) - - -class DataFrameGroups(dict): - def __repr__(self): - from pandas.compat import u - - nitems = get_option('display.max_rows') or len(self) - - fmt = u("{{{things}}}") - pfmt = u("{key}: {val}") - - pairs = [] - for k, v in list(self.items()): - pairs.append(pfmt.format(key=k, val=v)) - - if nitems < len(self): - start_cnt, end_cnt = nitems - int(nitems / 2), int(nitems / 2) - return fmt.format(things=", ".join(pairs[:start_cnt]) + - ", ... , " + - ", ".join(pairs[-end_cnt:])) - else: - return fmt.format(things=", ".join(pairs)) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index d1ebb9cbe8ac4..ea9f1c09f725b 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -236,7 +236,6 @@ class Grouping(object): def __init__(self, index, grouper=None, obj=None, name=None, level=None, sort=True, observed=False, in_axis=False): - self.name = name self.level = level self.grouper = _convert_grouper(index, grouper) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 78c9aa9187135..29c7149e828b2 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -235,12 +235,13 @@ def size(self): @cache_readonly def groups(self): """ dict {group name -> group labels} """ + if len(self.groupings) == 1: return self.groupings[0].groups else: to_groupby = lzip(*(ping.grouper for ping in self.groupings)) to_groupby = Index(to_groupby) - return self.axis.groupby(to_groupby) + return BaseGrouperGroups(self.axis.groupby(to_groupby)) @cache_readonly def is_monotonic(self): @@ -361,7 +362,7 @@ def get_group_levels(self): def _is_builtin_func(self, arg): """ - if we define an builtin function for this argument, return it, + if we define a builtin function for this argument, return it, otherwise return the arg """ return SelectionMixin._builtin_table.get(arg, arg) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index dee181fc1c569..297459cd82592 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -38,6 +38,7 @@ from pandas.core.arrays import ExtensionArray from pandas.core.base import IndexOpsMixin, PandasObject import pandas.core.common as com +from pandas.core.config import get_option from pandas.core.indexes.frozen import FrozenList import pandas.core.missing as missing from pandas.core.ops import get_op_result_name, make_invalid_op @@ -4493,7 +4494,7 @@ def groupby(self, values): # map to the label result = {k: self.take(v) for k, v in compat.iteritems(result)} - return result + return BaseGroupbyGroups(result) def map(self, mapper, na_action=None): """ @@ -5290,6 +5291,29 @@ def _add_logical_methods_disabled(cls): Index._add_comparison_methods() +class IndexGroupbyGroups(dict): + def __repr__(self): + from pandas.compat import u + + nitems = get_option('display.max_rows') or len(self) + + fmt = u("{{{things}}}") + pfmt = u("{key}: {val}") + + pairs = [] + for k, v in list(self.items()): + pairs.append(pfmt.format(key=k, val=v)) + + if nitems < len(self): + print("Truncating repr") + start_cnt, end_cnt = nitems - int(nitems / 2), int(nitems / 2) + return fmt.format(things=", ".join(pairs[:start_cnt]) + + ", ... , " + + ", ".join(pairs[-end_cnt:])) + else: + return fmt.format(things=", ".join(pairs)) + + def ensure_index_from_sequences(sequences, names=None): """ Construct an index from sequences of data. diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index de81ce5655518..44b5bd5f13992 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -667,21 +667,21 @@ def test_gb_key_len_equal_axis_len(self): class TestIteration(): - # def test_groups(self, df): - # grouped = df.groupby(['A']) - # groups = grouped.groups - # assert groups is grouped.groups # caching works - # - # for k, v in compat.iteritems(grouped.groups): - # assert (df.loc[v]['A'] == k).all() - # - # grouped = df.groupby(['A', 'B']) - # groups = grouped.groups - # assert groups is grouped.groups # caching works - # - # for k, v in compat.iteritems(grouped.groups): - # assert (df.loc[v]['A'] == k[0]).all() - # assert (df.loc[v]['B'] == k[1]).all() + def test_groups(self, df): + grouped = df.groupby(['A']) + groups = grouped.groups + assert groups is grouped.groups # caching works + + for k, v in compat.iteritems(grouped.groups): + assert (df.loc[v]['A'] == k).all() + + grouped = df.groupby(['A', 'B']) + groups = grouped.groups + assert groups is grouped.groups # caching works + + for k, v in compat.iteritems(grouped.groups): + assert (df.loc[v]['A'] == k[0]).all() + assert (df.loc[v]['B'] == k[1]).all() def test_grouping_is_iterable(self, tsframe): # this code path isn't used anywhere else From 5db6c07a32682e155ff547b220b19760ea7ce2e6 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 6 Mar 2019 12:44:11 -0800 Subject: [PATCH 06/12] Add correct groups object --- pandas/core/groupby/ops.py | 2 +- pandas/core/indexes/base.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 29c7149e828b2..7c4aefc16c1b6 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -241,7 +241,7 @@ def groups(self): else: to_groupby = lzip(*(ping.grouper for ping in self.groupings)) to_groupby = Index(to_groupby) - return BaseGrouperGroups(self.axis.groupby(to_groupby)) + return self.axis.groupby(to_groupby) @cache_readonly def is_monotonic(self): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 297459cd82592..c6220ce7bd21e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4494,7 +4494,7 @@ def groupby(self, values): # map to the label result = {k: self.take(v) for k, v in compat.iteritems(result)} - return BaseGroupbyGroups(result) + return IndexGroupbyGroups(result) def map(self, mapper, na_action=None): """ From 8f30d07ce4aaa656991bd0f4462d47c0f331bea4 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 6 Mar 2019 19:59:40 -0800 Subject: [PATCH 07/12] A few misc items for the linter --- pandas/core/groupby/base.py | 1 + pandas/core/groupby/groupby.py | 2 +- pandas/core/indexes/base.py | 2 -- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index b2ee1e4b09f2d..ebba4a0a9395d 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -10,6 +10,7 @@ from pandas.core.dtypes.common import is_list_like, is_scalar + class GroupByMixin(object): """ Provide the groupby facilities to the mixed object. diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b0361f8a5d2ce..926da40deaff2 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -36,7 +36,7 @@ class providing the base-class of operations. from pandas.core.base import ( DataError, GroupByError, PandasObject, SelectionMixin, SpecificationError) import pandas.core.common as com -from pandas.core.config import get_option, option_context +from pandas.core.config import option_context from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame from pandas.core.groupby import base diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c6220ce7bd21e..f5a8a5dc193a9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5293,8 +5293,6 @@ def _add_logical_methods_disabled(cls): class IndexGroupbyGroups(dict): def __repr__(self): - from pandas.compat import u - nitems = get_option('display.max_rows') or len(self) fmt = u("{{{things}}}") From b60329c53c4ad70b58001a4243363355729f21dc Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 14 Mar 2019 23:09:23 -0700 Subject: [PATCH 08/12] Use pprint_thing in IndexGroupByGroups. Add whatsnew, docstring, and a couple typo fixes --- doc/source/whatsnew/v0.25.0.rst | 3 ++- pandas/core/groupby/groupby.py | 2 +- pandas/core/indexes/base.py | 21 ++++----------------- 3 files changed, 7 insertions(+), 19 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index d186fdfe0f322..bbae846fae992 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -87,7 +87,8 @@ Other API Changes - :class:`DatetimeTZDtype` will now standardize pytz timezones to a common timezone instance (:issue:`24713`) - ``Timestamp`` and ``Timedelta`` scalars now implement the :meth:`to_numpy` method as aliases to :meth:`Timestamp.to_datetime64` and :meth:`Timedelta.to_timedelta64`, respectively. (:issue:`24653`) - :meth:`Timestamp.strptime` will now rise a ``NotImplementedError`` (:issue:`25016`) -- Bug in :meth:`DatetimeIndex.snap` which didn't preserving the ``name`` of the input :class:`Index` (:issue:`25575`) +- Bug in :meth:`DatetimeIndex.snap` which didn't preserve the ``name`` of the input :class:`Index` (:issue:`25575`) +- :meth:`Index.groupby` and dependent methods (notably :meth:`GroupBy.groups`) now return object with abbreviated repr (:issue:`1135`) .. _whatsnew_0250.deprecations: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 3d0a6023ac29f..858195a61b102 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -54,7 +54,7 @@ class providing the base-class of operations. _apply_docs = dict( template=""" - Apply function `func` group-wise and combine the results together. + Apply function `func` group-wise and combine the results together. The function passed to `apply` must take a {input} as its first argument and return a DataFrame, Series or scalar. `apply` will diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 00e4640bb5b47..9b6c9c0d4089b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5291,24 +5291,11 @@ def _add_logical_methods_disabled(cls): class IndexGroupbyGroups(dict): - def __repr__(self): - nitems = get_option('display.max_rows') or len(self) - - fmt = u("{{{things}}}") - pfmt = u("{key}: {val}") - - pairs = [] - for k, v in list(self.items()): - pairs.append(pfmt.format(key=k, val=v)) + """Dict extension to support abbreviated __repr__""" + from pandas.io.formats.printing import pprint_thing - if nitems < len(self): - print("Truncating repr") - start_cnt, end_cnt = nitems - int(nitems / 2), int(nitems / 2) - return fmt.format(things=", ".join(pairs[:start_cnt]) + - ", ... , " + - ", ".join(pairs[-end_cnt:])) - else: - return fmt.format(things=", ".join(pairs)) + def __repr__(self): + return pprint_thing(self, max_seq_items=get_option('display.max_rows')) def ensure_index_from_sequences(sequences, names=None): From 29c6263f12a98f7204128a2e2d247b96cfcab13b Mon Sep 17 00:00:00 2001 From: Ben Date: Sat, 30 Mar 2019 13:27:39 -0700 Subject: [PATCH 09/12] Update tests to expect pprint formatting. Use new config location. Small update in doc. --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/core/indexes/base.py | 2 +- pandas/tests/io/formats/test_format.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 0486e18909ae1..f6a5637f7c48b 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -216,7 +216,7 @@ Other API Changes - ``Timestamp`` and ``Timedelta`` scalars now implement the :meth:`to_numpy` method as aliases to :meth:`Timestamp.to_datetime64` and :meth:`Timedelta.to_timedelta64`, respectively. (:issue:`24653`) - :meth:`Timestamp.strptime` will now rise a ``NotImplementedError`` (:issue:`25016`) - Bug in :meth:`DatetimeIndex.snap` which didn't preserve the ``name`` of the input :class:`Index` (:issue:`25575`) -- :meth:`Index.groupby` and dependent methods (notably :meth:`GroupBy.groups`) now return object with abbreviated repr (:issue:`1135`) +- :meth:`Index.groupby` and dependent methods (notably :attr:`GroupBy.groups`) now return object with abbreviated repr (:issue:`1135`) .. _whatsnew_0250.deprecations: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d9452242de70c..c6f5c43bd9f74 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6,6 +6,7 @@ import numpy as np +from pandas._config.config import get_option from pandas._libs import ( algos as libalgos, index as libindex, join as libjoin, lib) from pandas._libs.lib import is_datetime_array @@ -39,7 +40,6 @@ from pandas.core.arrays import ExtensionArray from pandas.core.base import IndexOpsMixin, PandasObject import pandas.core.common as com -from pandas.core.config import get_option from pandas.core.indexes.frozen import FrozenList import pandas.core.missing as missing from pandas.core.ops import get_op_result_name, make_invalid_op diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index f17b84b90638d..cbfa65e302730 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1760,11 +1760,11 @@ def test_groups_repr_truncates(self): with option_context('display.max_rows', 2): x = df.groupby('a').groups - assert ', ... ,' in x.__repr__() + assert x.__repr__().endswith('...}') with option_context('display.max_rows', 5): x = df.groupby('a').groups - assert ', ... ,' not in x.__repr__() + assert not x.__repr__().endswith('...}') def gen_series_formatting(): From c74cbbaed8bc97cb07d0dfa81ba6d8cd1099e22c Mon Sep 17 00:00:00 2001 From: Ben Date: Sat, 30 Mar 2019 14:42:48 -0700 Subject: [PATCH 10/12] Accept isort formatting preference --- pandas/core/indexes/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e6fa3e255f514..ca5f0d449d776 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -7,6 +7,7 @@ import numpy as np from pandas._config.config import get_option + from pandas._libs import ( algos as libalgos, index as libindex, join as libjoin, lib) from pandas._libs.lib import is_datetime_array From 9621669be1e6aa66587cc0871919aa558f01365d Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 10 Apr 2019 13:02:14 -0700 Subject: [PATCH 11/12] Add nonsense to AUTHORS.md --- AUTHORS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.md b/AUTHORS.md index dcaaea101f4c8..4cdf79823bd73 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -55,3 +55,4 @@ license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have BSD-compatible licenses, are included. Their licenses follow the pandas license. +asdf From 38ecd1a7ce2da029a37b6bd39c01222228d49002 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 10 Apr 2019 13:02:50 -0700 Subject: [PATCH 12/12] Revert "Add nonsense to AUTHORS.md" This reverts commit 9621669be1e6aa66587cc0871919aa558f01365d. --- AUTHORS.md | 1 - 1 file changed, 1 deletion(-) diff --git a/AUTHORS.md b/AUTHORS.md index 4cdf79823bd73..dcaaea101f4c8 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -55,4 +55,3 @@ license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have BSD-compatible licenses, are included. Their licenses follow the pandas license. -asdf