From ba50ba78fea13a1b4b45a31132ad63e574dcc653 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 21 Oct 2019 08:23:23 -0700 Subject: [PATCH 1/9] Removed Series / DataFrame from concat --- pandas/core/reshape/concat.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 60bab817705e3..b3db2c26b4175 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -6,13 +6,14 @@ import numpy as np -from pandas import DataFrame, Index, MultiIndex, Series +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries + +from pandas import Index, MultiIndex from pandas.core import common as com from pandas.core.arrays.categorical import ( _factorize_from_iterable, _factorize_from_iterables, ) -from pandas.core.generic import NDFrame from pandas.core.index import ( _all_indexes_same, _get_consensus_names, @@ -275,7 +276,7 @@ def __init__( copy=True, sort=False, ): - if isinstance(objs, (NDFrame, str)): + if isinstance(objs, (ABCDataFrame, ABCSeries, str)): raise TypeError( "first argument must be an iterable of pandas " "objects, you passed an object of type " @@ -322,7 +323,7 @@ def __init__( # consolidate data & figure out what our result ndim is going to be ndims = set() for obj in objs: - if not isinstance(obj, (Series, DataFrame)): + if not isinstance(obj, (ABCSeries, ABCDataFrame)): msg = ( "cannot concatenate object of type '{}';" " only Series and DataFrame objs are valid".format(type(obj)) @@ -348,7 +349,7 @@ def __init__( # filter out the empties if we have not multi-index possibilities # note to keep empty Series as it affect to result columns / name non_empties = [ - obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, Series) + obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, ABCSeries) ] if len(non_empties) and ( @@ -362,17 +363,22 @@ def __init__( self.objs = objs # Standardize axis parameter to int - if isinstance(sample, Series): + # TODO: Should this really require a class import? + """ + if isinstance(sample, ABCSeries): axis = DataFrame._get_axis_number(axis) else: axis = sample._get_axis_number(axis) + """ + if not isinstance(axis, int): + axis = {"index": 0, "columns": 1}[axis] # Need to flip BlockManager axis in the DataFrame special case - self._is_frame = isinstance(sample, DataFrame) + self._is_frame = isinstance(sample, ABCDataFrame) if self._is_frame: axis = 1 if axis == 0 else 0 - self._is_series = isinstance(sample, Series) + self._is_series = isinstance(sample, ABCSeries) if not 0 <= axis <= sample.ndim: raise AssertionError( "axis must be between 0 and {ndim}, input was" @@ -545,7 +551,7 @@ def _get_concat_axis(self): num = 0 has_names = False for i, x in enumerate(self.objs): - if not isinstance(x, Series): + if not isinstance(x, ABCSeries): raise TypeError( "Cannot concatenate type 'Series' " "with object of type {type!r}".format(type=type(x).__name__) From 6fe04ddc79eb5785359e05b64b15e2f7df618dac Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 21 Oct 2019 08:29:21 -0700 Subject: [PATCH 2/9] Resolved more circ dependencies --- pandas/core/reshape/merge.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 7bfc8153da568..940be533a9d17 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -44,6 +44,7 @@ import pandas.core.common as com from pandas.core.frame import _merge_doc from pandas.core.internals import _transform_index, concatenate_block_managers +from pandas.core.reshape.concat import concat import pandas.core.sorting as sorting from pandas.core.sorting import is_int64_overflow_possible @@ -156,8 +157,6 @@ def _groupby_and_merge(by, on, left, right, _merge_pieces, check_duplicates=True # preserve the original order # if we have a missing piece this can be reset - from pandas.core.reshape.concat import concat - result = concat(pieces, ignore_index=True) result = result.reindex(columns=pieces[0].columns, copy=False) return result, lby From 33d75cae938d551f1c7f998274310bda2c7808ff Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 21 Oct 2019 08:48:13 -0700 Subject: [PATCH 3/9] Resolved circular imports in core.reshape --- pandas/core/reshape/concat.py | 3 ++- pandas/core/reshape/reshape.py | 7 +------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index b3db2c26b4175..79329fe98cd53 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -8,7 +8,6 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries -from pandas import Index, MultiIndex from pandas.core import common as com from pandas.core.arrays.categorical import ( _factorize_from_iterable, @@ -19,6 +18,8 @@ _get_consensus_names, _get_objs_combined_axis, ensure_index, + Index, + MultiIndex, ) import pandas.core.indexes.base as ibase from pandas.core.internals import concatenate_block_managers diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 340e964d7c14f..72725d76b0899 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -26,6 +26,7 @@ from pandas.core.construction import extract_array from pandas.core.frame import DataFrame from pandas.core.index import Index, MultiIndex +from pandas.core.reshape.concat import concat from pandas.core.series import Series from pandas.core.sorting import ( compress_group_index, @@ -470,8 +471,6 @@ def _unstack_extension_series(series, level, fill_value): # 2. Followup with a columnwise take. # We use the dummy take to discover newly-created missing values # introduced by the reshape. - from pandas.core.reshape.concat import concat - dummy_arr = np.arange(len(series)) # fill_value=-1, since we will do a series.values.take later result = _Unstacker( @@ -856,8 +855,6 @@ def get_dummies( 1 0.0 1.0 0.0 2 0.0 0.0 1.0 """ - from pandas.core.reshape.concat import concat - dtypes_to_encode = ["object", "category"] if isinstance(data, DataFrame): @@ -945,8 +942,6 @@ def _get_dummies_1d( drop_first=False, dtype=None, ): - from pandas.core.reshape.concat import concat - # Series avoids inconsistent NaN handling codes, levels = _factorize_from_iterable(Series(data)) From ee0495787ed23155d9708cd47886b85d70fc3218 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 21 Oct 2019 08:50:20 -0700 Subject: [PATCH 4/9] Resolved circular imports in series and groupby --- pandas/core/groupby/generic.py | 15 +-------------- pandas/core/groupby/groupby.py | 5 +---- pandas/core/series.py | 3 +-- 3 files changed, 3 insertions(+), 20 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a78857423e7e0..d03284a5679c1 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -66,6 +66,7 @@ from pandas.core.index import Index, MultiIndex, _all_indexes_same import pandas.core.indexes.base as ibase from pandas.core.internals import BlockManager, make_block +from pandas.core.reshape.concat import concat from pandas.core.series import Series from pandas.plotting import boxplot_frame_groupby @@ -274,8 +275,6 @@ def aggregate(self, func=None, *args, **kwargs): # _level handled at higher if not _level and isinstance(ret, dict): - from pandas import concat - ret = concat(ret, axis=1) return ret @@ -442,8 +441,6 @@ def transform(self, func, *args, **kwargs): # check for empty "results" to avoid concat ValueError if results: - from pandas.core.reshape.concat import concat - result = concat(results).sort_index() else: result = Series() @@ -1223,8 +1220,6 @@ def first_not_none(values): # still a series # path added as of GH 5545 elif all_indexed_same: - from pandas.core.reshape.concat import concat - return concat(values) if not all_indexed_same: @@ -1259,8 +1254,6 @@ def first_not_none(values): else: # GH5788 instead of stacking; concat gets the # dtypes correct - from pandas.core.reshape.concat import concat - result = concat( values, keys=key_index, @@ -1305,8 +1298,6 @@ def first_not_none(values): return self._concat_objects(keys, values, not_indexed_same=not_indexed_same) def _transform_general(self, func, *args, **kwargs): - from pandas.core.reshape.concat import concat - applied = [] obj = self._obj_with_exclusions gen = self.grouper.get_iterator(obj, axis=self.axis) @@ -1655,8 +1646,6 @@ def _iterate_column_groupbys(self): ) def _apply_to_column_groupbys(self, func): - from pandas.core.reshape.concat import concat - return concat( (func(col_groupby) for _, col_groupby in self._iterate_column_groupbys()), keys=self._selected_obj.columns, @@ -1745,8 +1734,6 @@ def groupby_series(obj, col=None): if isinstance(obj, Series): results = groupby_series(obj) else: - from pandas.core.reshape.concat import concat - results = [groupby_series(obj[col], col) for col in obj.columns] results = concat(results, axis=1) results.columns.names = obj.columns.names diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f622480cfe4b7..812f67fcbfea0 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -51,6 +51,7 @@ class providing the base-class of operations. from pandas.core.generic import NDFrame from pandas.core.groupby import base from pandas.core.index import CategoricalIndex, Index, MultiIndex +from pandas.core.reshape.concat import concat from pandas.core.series import Series from pandas.core.sorting import get_group_index_sorter @@ -922,8 +923,6 @@ def _python_agg_general(self, func, *args, **kwargs): return self._wrap_aggregated_output(output) def _concat_objects(self, keys, values, not_indexed_same=False): - from pandas.core.reshape.concat import concat - def reset_identity(values): # reset the identities of the components # of the values to prevent aliasing @@ -1852,8 +1851,6 @@ def quantile(self, q=0.5, interpolation="linear"): a 2.0 b 3.0 """ - from pandas import concat - def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]: if is_object_dtype(vals): raise TypeError( diff --git a/pandas/core/series.py b/pandas/core/series.py index ea48b3603623a..33a647e8a8d70 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -75,6 +75,7 @@ from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexing import check_bool_indexer from pandas.core.internals import SingleBlockManager +from pandas.core.reshape.concat import concat from pandas.core.strings import StringMethods from pandas.core.tools.datetimes import to_datetime @@ -2709,8 +2710,6 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): ... ValueError: Indexes have overlapping values: [0, 1, 2] """ - from pandas.core.reshape.concat import concat - if isinstance(to_append, (list, tuple)): to_concat = [self] to_concat.extend(to_append) From 230cf5f8880171b67e90ffeaafffbac58361593b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 21 Oct 2019 08:53:57 -0700 Subject: [PATCH 5/9] Fixed window and frame --- pandas/core/frame.py | 8 +------- pandas/core/window/common.py | 4 ++-- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c90bf4ba7151f..d52ad0e7e24c8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -118,6 +118,7 @@ to_arrays, ) from pandas.core.ops.missing import dispatch_fill_zeros +from pandas.core.reshape.concat import concat from pandas.core.series import Series from pandas.io.formats import console, format as fmt @@ -6995,8 +6996,6 @@ def append(self, other, ignore_index=False, verify_integrity=False, sort=None): if (self.columns.get_indexer(other.columns) >= 0).all(): other = other.reindex(columns=self.columns) - from pandas.core.reshape.concat import concat - if isinstance(other, (list, tuple)): to_concat = [self] + other else: @@ -7134,7 +7133,6 @@ def _join_compat( self, other, on=None, how="left", lsuffix="", rsuffix="", sort=False ): from pandas.core.reshape.merge import merge - from pandas.core.reshape.concat import concat if isinstance(other, Series): if other.name is None: @@ -7295,8 +7293,6 @@ def round(self, decimals=0, *args, **kwargs): 2 0.7 0.0 3 0.2 0.0 """ - from pandas.core.reshape.concat import concat - def _dict_round(df, decimals): for col, vals in df.items(): try: @@ -8303,8 +8299,6 @@ def isin(self, values): dog False False """ if isinstance(values, dict): - from pandas.core.reshape.concat import concat - values = collections.defaultdict(list, values) return concat( ( diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 0f2920b3558c9..2b686290bfe2c 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -11,6 +11,8 @@ from pandas.core.generic import _shared_docs from pandas.core.groupby.base import GroupByMixin from pandas.core.index import MultiIndex +from pandas.core.reshape.concat import concat + _shared_docs = dict(**_shared_docs) _doc_template = """ @@ -128,8 +130,6 @@ def dataframe_from_int_dict(data, frame_template): *_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j]) ) - from pandas import concat - result_index = arg1.index.union(arg2.index) if len(result_index): From 5fc32ccbe0ba57f918cc846bf0138043415a28a1 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 21 Oct 2019 08:56:10 -0700 Subject: [PATCH 6/9] Fixed formats --- pandas/io/formats/format.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b8c40e3f62221..9977900aeba7b 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -70,6 +70,7 @@ from pandas.core.index import Index, ensure_index from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex +from pandas.core.reshape.concat import concat from pandas.io.common import _stringify_path from pandas.io.formats.printing import adjoin, justify, pprint_thing @@ -259,8 +260,6 @@ def __init__( self._chk_truncate() def _chk_truncate(self) -> None: - from pandas.core.reshape.concat import concat - min_rows = self.min_rows max_rows = self.max_rows # truncation determined by max_rows, actual truncated number of rows @@ -608,8 +607,6 @@ def _chk_truncate(self) -> None: Checks whether the frame should be truncated. If so, slices the frame up. """ - from pandas.core.reshape.concat import concat - # Cut the data to the information actually printed max_cols = self.max_cols max_rows = self.max_rows From f61baecaa7511546e248240c74ace06b06321ad3 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 21 Oct 2019 09:06:14 -0700 Subject: [PATCH 7/9] isort and black --- pandas/core/frame.py | 1 + pandas/core/groupby/groupby.py | 1 + pandas/core/reshape/concat.py | 4 ++-- pandas/core/window/common.py | 1 - 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d52ad0e7e24c8..60882bed61d13 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7293,6 +7293,7 @@ def round(self, decimals=0, *args, **kwargs): 2 0.7 0.0 3 0.2 0.0 """ + def _dict_round(df, decimals): for col, vals in df.items(): try: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 812f67fcbfea0..cb57e04d0e7d9 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1851,6 +1851,7 @@ def quantile(self, q=0.5, interpolation="linear"): a 2.0 b 3.0 """ + def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]: if is_object_dtype(vals): raise TypeError( diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 79329fe98cd53..b35298227060b 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -14,12 +14,12 @@ _factorize_from_iterables, ) from pandas.core.index import ( + Index, + MultiIndex, _all_indexes_same, _get_consensus_names, _get_objs_combined_axis, ensure_index, - Index, - MultiIndex, ) import pandas.core.indexes.base as ibase from pandas.core.internals import concatenate_block_managers diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 2b686290bfe2c..4e2fe56e74483 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -13,7 +13,6 @@ from pandas.core.index import MultiIndex from pandas.core.reshape.concat import concat - _shared_docs = dict(**_shared_docs) _doc_template = """ Returns From 56229f659e2748735232032f5be1d614264f1a67 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 22 Oct 2019 17:48:43 -0700 Subject: [PATCH 8/9] axis validation hack --- pandas/core/reshape/concat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index b35298227060b..337b4806c7d33 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -371,8 +371,9 @@ def __init__( else: axis = sample._get_axis_number(axis) """ + # TODO: implement universal axis validation if not isinstance(axis, int): - axis = {"index": 0, "columns": 1}[axis] + axis = {"index": 0, "rows": 0, "columns": 1}[axis] # Need to flip BlockManager axis in the DataFrame special case self._is_frame = isinstance(sample, ABCDataFrame) From 6e76bfcdef951128a9a05f5a60a140596374e4ee Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 23 Oct 2019 07:34:59 -0700 Subject: [PATCH 9/9] Fixed KeyError message --- pandas/core/reshape/concat.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 337b4806c7d33..25768c481ce51 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -371,9 +371,12 @@ def __init__( else: axis = sample._get_axis_number(axis) """ - # TODO: implement universal axis validation + # TODO: implement universal axis validation; align with core.generic if not isinstance(axis, int): - axis = {"index": 0, "rows": 0, "columns": 1}[axis] + try: + axis = {"index": 0, "rows": 0, "columns": 1}[axis] + except KeyError: + raise ValueError("No axis named {}".format(axis)) # Need to flip BlockManager axis in the DataFrame special case self._is_frame = isinstance(sample, ABCDataFrame)