Skip to content

Reduce Circular Imports with pandas.core.reshape.concat #29133

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
7 changes: 1 addition & 6 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@
to_arrays,
)
from pandas.core.ops.missing import dispatch_fill_zeros
from pandas.core.reshape.concat import concat
from pandas.core.series import Series

from pandas.io.formats import console, format as fmt
Expand Down Expand Up @@ -6991,8 +6992,6 @@ def append(self, other, ignore_index=False, verify_integrity=False, sort=None):
if (self.columns.get_indexer(other.columns) >= 0).all():
other = other.reindex(columns=self.columns)

from pandas.core.reshape.concat import concat

if isinstance(other, (list, tuple)):
to_concat = [self] + other
else:
Expand Down Expand Up @@ -7130,7 +7129,6 @@ def _join_compat(
self, other, on=None, how="left", lsuffix="", rsuffix="", sort=False
):
from pandas.core.reshape.merge import merge
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can the merge import also be moved up?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was hoping so but doesn't appear to be the case. I think its intertwined with some of the Categorical stuff so might make sense if / when I can resolve that space

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great.

Categorical is used in a lot of places where I think/hope a less high-powered tool could be used, and doing so couple simplify the dependency structure of the code base quite a bit.

from pandas.core.reshape.concat import concat

if isinstance(other, Series):
if other.name is None:
Expand Down Expand Up @@ -7291,7 +7289,6 @@ def round(self, decimals=0, *args, **kwargs):
2 0.7 0.0
3 0.2 0.0
"""
from pandas.core.reshape.concat import concat

def _dict_round(df, decimals):
for col, vals in df.items():
Expand Down Expand Up @@ -8297,8 +8294,6 @@ def isin(self, values):
dog False False
"""
if isinstance(values, dict):
from pandas.core.reshape.concat import concat

values = collections.defaultdict(list, values)
return concat(
(
Expand Down
15 changes: 1 addition & 14 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
from pandas.core.index import Index, MultiIndex, _all_indexes_same
import pandas.core.indexes.base as ibase
from pandas.core.internals import BlockManager, make_block
from pandas.core.reshape.concat import concat
from pandas.core.series import Series

from pandas.plotting import boxplot_frame_groupby
Expand Down Expand Up @@ -275,8 +276,6 @@ def aggregate(self, func=None, *args, **kwargs):

# _level handled at higher
if not _level and isinstance(ret, dict):
from pandas import concat

ret = concat(ret, axis=1)
return ret

Expand Down Expand Up @@ -443,8 +442,6 @@ def transform(self, func, *args, **kwargs):

# check for empty "results" to avoid concat ValueError
if results:
from pandas.core.reshape.concat import concat

result = concat(results).sort_index()
else:
result = Series()
Expand Down Expand Up @@ -1221,8 +1218,6 @@ def first_not_none(values):
# still a series
# path added as of GH 5545
elif all_indexed_same:
from pandas.core.reshape.concat import concat

return concat(values)

if not all_indexed_same:
Expand Down Expand Up @@ -1257,8 +1252,6 @@ def first_not_none(values):
else:
# GH5788 instead of stacking; concat gets the
# dtypes correct
from pandas.core.reshape.concat import concat

result = concat(
values,
keys=key_index,
Expand Down Expand Up @@ -1303,8 +1296,6 @@ def first_not_none(values):
return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)

def _transform_general(self, func, *args, **kwargs):
from pandas.core.reshape.concat import concat

applied = []
obj = self._obj_with_exclusions
gen = self.grouper.get_iterator(obj, axis=self.axis)
Expand Down Expand Up @@ -1653,8 +1644,6 @@ def _iterate_column_groupbys(self):
)

def _apply_to_column_groupbys(self, func):
from pandas.core.reshape.concat import concat

return concat(
(func(col_groupby) for _, col_groupby in self._iterate_column_groupbys()),
keys=self._selected_obj.columns,
Expand Down Expand Up @@ -1741,8 +1730,6 @@ def groupby_series(obj, col=None):
if isinstance(obj, Series):
results = groupby_series(obj)
else:
from pandas.core.reshape.concat import concat

results = [groupby_series(obj[col], col) for col in obj.columns]
results = concat(results, axis=1)
results.columns.names = obj.columns.names
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class providing the base-class of operations.
from pandas.core.generic import NDFrame
from pandas.core.groupby import base
from pandas.core.index import CategoricalIndex, Index, MultiIndex
from pandas.core.reshape.concat import concat
from pandas.core.series import Series
from pandas.core.sorting import get_group_index_sorter

Expand Down Expand Up @@ -922,8 +923,6 @@ def _python_agg_general(self, func, *args, **kwargs):
return self._wrap_aggregated_output(output)

def _concat_objects(self, keys, values, not_indexed_same=False):
from pandas.core.reshape.concat import concat

def reset_identity(values):
# reset the identities of the components
# of the values to prevent aliasing
Expand Down Expand Up @@ -1852,7 +1851,6 @@ def quantile(self, q=0.5, interpolation="linear"):
a 2.0
b 3.0
"""
from pandas import concat

def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]:
if is_object_dtype(vals):
Expand Down
29 changes: 20 additions & 9 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,16 @@

import numpy as np

from pandas import DataFrame, Index, MultiIndex, Series
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries

from pandas.core import common as com
from pandas.core.arrays.categorical import (
_factorize_from_iterable,
_factorize_from_iterables,
)
from pandas.core.generic import NDFrame
from pandas.core.index import (
Index,
MultiIndex,
_all_indexes_same,
_get_consensus_names,
_get_objs_combined_axis,
Expand Down Expand Up @@ -275,7 +277,7 @@ def __init__(
copy=True,
sort=False,
):
if isinstance(objs, (NDFrame, str)):
if isinstance(objs, (ABCDataFrame, ABCSeries, str)):
raise TypeError(
"first argument must be an iterable of pandas "
"objects, you passed an object of type "
Expand Down Expand Up @@ -322,7 +324,7 @@ def __init__(
# consolidate data & figure out what our result ndim is going to be
ndims = set()
for obj in objs:
if not isinstance(obj, (Series, DataFrame)):
if not isinstance(obj, (ABCSeries, ABCDataFrame)):
msg = (
"cannot concatenate object of type '{}';"
" only Series and DataFrame objs are valid".format(type(obj))
Expand All @@ -348,7 +350,7 @@ def __init__(
# filter out the empties if we have not multi-index possibilities
# note to keep empty Series as it affect to result columns / name
non_empties = [
obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, Series)
obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, ABCSeries)
]

if len(non_empties) and (
Expand All @@ -362,17 +364,26 @@ def __init__(
self.objs = objs

# Standardize axis parameter to int
if isinstance(sample, Series):
# TODO: Should this really require a class import?
"""
if isinstance(sample, ABCSeries):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is maybe a relic of the old panel days but as of now it seems a little overkill to import the generic class definition to resolve "index" and "columns" to their respective axis numbers. For now I just did this in the function body directly below commented code, but this probably belongs somewhere else as a utility function?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my intuition is that you're right here. A lot of _axis_reversed stuff feels leftover

axis = DataFrame._get_axis_number(axis)
else:
axis = sample._get_axis_number(axis)
"""
# TODO: implement universal axis validation; align with core.generic
if not isinstance(axis, int):
try:
axis = {"index": 0, "rows": 0, "columns": 1}[axis]
except KeyError:
raise ValueError("No axis named {}".format(axis))

# Need to flip BlockManager axis in the DataFrame special case
self._is_frame = isinstance(sample, DataFrame)
self._is_frame = isinstance(sample, ABCDataFrame)
if self._is_frame:
axis = 1 if axis == 0 else 0

self._is_series = isinstance(sample, Series)
self._is_series = isinstance(sample, ABCSeries)
if not 0 <= axis <= sample.ndim:
raise AssertionError(
"axis must be between 0 and {ndim}, input was"
Expand Down Expand Up @@ -545,7 +556,7 @@ def _get_concat_axis(self):
num = 0
has_names = False
for i, x in enumerate(self.objs):
if not isinstance(x, Series):
if not isinstance(x, ABCSeries):
raise TypeError(
"Cannot concatenate type 'Series' "
"with object of type {type!r}".format(type=type(x).__name__)
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import pandas.core.common as com
from pandas.core.frame import _merge_doc
from pandas.core.internals import _transform_index, concatenate_block_managers
from pandas.core.reshape.concat import concat
import pandas.core.sorting as sorting
from pandas.core.sorting import is_int64_overflow_possible

Expand Down Expand Up @@ -156,8 +157,6 @@ def _groupby_and_merge(by, on, left, right, _merge_pieces, check_duplicates=True

# preserve the original order
# if we have a missing piece this can be reset
from pandas.core.reshape.concat import concat

result = concat(pieces, ignore_index=True)
result = result.reindex(columns=pieces[0].columns, copy=False)
return result, lby
Expand Down
7 changes: 1 addition & 6 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from pandas.core.construction import extract_array
from pandas.core.frame import DataFrame
from pandas.core.index import Index, MultiIndex
from pandas.core.reshape.concat import concat
from pandas.core.series import Series
from pandas.core.sorting import (
compress_group_index,
Expand Down Expand Up @@ -469,8 +470,6 @@ def _unstack_extension_series(series, level, fill_value):
# 2. Followup with a columnwise take.
# We use the dummy take to discover newly-created missing values
# introduced by the reshape.
from pandas.core.reshape.concat import concat

dummy_arr = np.arange(len(series))
# fill_value=-1, since we will do a series.values.take later
result = _Unstacker(
Expand Down Expand Up @@ -855,8 +854,6 @@ def get_dummies(
1 0.0 1.0 0.0
2 0.0 0.0 1.0
"""
from pandas.core.reshape.concat import concat

dtypes_to_encode = ["object", "category"]

if isinstance(data, DataFrame):
Expand Down Expand Up @@ -946,8 +943,6 @@ def _get_dummies_1d(
drop_first=False,
dtype=None,
):
from pandas.core.reshape.concat import concat

# Series avoids inconsistent NaN handling
codes, levels = _factorize_from_iterable(Series(data))

Expand Down
3 changes: 1 addition & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
from pandas.core.indexes.timedeltas import TimedeltaIndex
from pandas.core.indexing import check_bool_indexer
from pandas.core.internals import SingleBlockManager
from pandas.core.reshape.concat import concat
from pandas.core.strings import StringMethods
from pandas.core.tools.datetimes import to_datetime

Expand Down Expand Up @@ -2705,8 +2706,6 @@ def append(self, to_append, ignore_index=False, verify_integrity=False):
...
ValueError: Indexes have overlapping values: [0, 1, 2]
"""
from pandas.core.reshape.concat import concat

if isinstance(to_append, (list, tuple)):
to_concat = [self]
to_concat.extend(to_append)
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/window/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pandas.core.generic import _shared_docs
from pandas.core.groupby.base import GroupByMixin
from pandas.core.index import MultiIndex
from pandas.core.reshape.concat import concat

_shared_docs = dict(**_shared_docs)
_doc_template = """
Expand Down Expand Up @@ -128,8 +129,6 @@ def dataframe_from_int_dict(data, frame_template):
*_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])
)

from pandas import concat

result_index = arg1.index.union(arg2.index)
if len(result_index):

Expand Down
5 changes: 1 addition & 4 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
from pandas.core.index import Index, ensure_index
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.timedeltas import TimedeltaIndex
from pandas.core.reshape.concat import concat

from pandas.io.common import _stringify_path
from pandas.io.formats.printing import adjoin, justify, pprint_thing
Expand Down Expand Up @@ -260,8 +261,6 @@ def __init__(
self._chk_truncate()

def _chk_truncate(self) -> None:
from pandas.core.reshape.concat import concat

min_rows = self.min_rows
max_rows = self.max_rows
# truncation determined by max_rows, actual truncated number of rows
Expand Down Expand Up @@ -609,8 +608,6 @@ def _chk_truncate(self) -> None:
Checks whether the frame should be truncated. If so, slices
the frame up.
"""
from pandas.core.reshape.concat import concat

# Cut the data to the information actually printed
max_cols = self.max_cols
max_rows = self.max_rows
Expand Down