diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e5de9b428e2d5..e57b88f1be040 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -135,6 +135,7 @@ sanitize_index, to_arrays, ) +from pandas.core.reshape.melt import melt from pandas.core.series import Series from pandas.core.sorting import ensure_key_mapped @@ -7069,104 +7070,6 @@ def unstack(self, level=-1, fill_value=None): return unstack(self, level, fill_value) - _shared_docs[ - "melt" - ] = """ - Unpivot a DataFrame from wide to long format, optionally leaving identifiers set. - - This function is useful to massage a DataFrame into a format where one - or more columns are identifier variables (`id_vars`), while all other - columns, considered measured variables (`value_vars`), are "unpivoted" to - the row axis, leaving just two non-identifier columns, 'variable' and - 'value'. - %(versionadded)s - Parameters - ---------- - id_vars : tuple, list, or ndarray, optional - Column(s) to use as identifier variables. - value_vars : tuple, list, or ndarray, optional - Column(s) to unpivot. If not specified, uses all columns that - are not set as `id_vars`. - var_name : scalar - Name to use for the 'variable' column. If None it uses - ``frame.columns.name`` or 'variable'. - value_name : scalar, default 'value' - Name to use for the 'value' column. - col_level : int or str, optional - If columns are a MultiIndex then use this level to melt. - - Returns - ------- - DataFrame - Unpivoted DataFrame. - - See Also - -------- - %(other)s : Identical method. - pivot_table : Create a spreadsheet-style pivot table as a DataFrame. - DataFrame.pivot : Return reshaped DataFrame organized - by given index / column values. - DataFrame.explode : Explode a DataFrame from list-like - columns to long format. - - Examples - -------- - >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, - ... 'B': {0: 1, 1: 3, 2: 5}, - ... 'C': {0: 2, 1: 4, 2: 6}}) - >>> df - A B C - 0 a 1 2 - 1 b 3 4 - 2 c 5 6 - - >>> %(caller)sid_vars=['A'], value_vars=['B']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - - >>> %(caller)sid_vars=['A'], value_vars=['B', 'C']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - 3 a C 2 - 4 b C 4 - 5 c C 6 - - The names of 'variable' and 'value' columns can be customized: - - >>> %(caller)sid_vars=['A'], value_vars=['B'], - ... var_name='myVarname', value_name='myValname') - A myVarname myValname - 0 a B 1 - 1 b B 3 - 2 c B 5 - - If you have multi-index columns: - - >>> df.columns = [list('ABC'), list('DEF')] - >>> df - A B C - D E F - 0 a 1 2 - 1 b 3 4 - 2 c 5 6 - - >>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B']) - A variable value - 0 a B 1 - 1 b B 3 - 2 c B 5 - - >>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')]) - (A, D) variable_0 variable_1 value - 0 a B E 1 - 1 b B E 3 - 2 c B E 5 - """ - @Appender( _shared_docs["melt"] % dict( @@ -7183,7 +7086,6 @@ def melt( value_name="value", col_level=None, ) -> "DataFrame": - from pandas.core.reshape.melt import melt return melt( self, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 61361c3331d5e..a459f2f403550 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -97,6 +97,7 @@ from pandas.core.internals import BlockManager from pandas.core.missing import find_valid_index from pandas.core.ops import _align_method_FRAME +from pandas.core.shared_docs import _shared_docs from pandas.io.formats import format as fmt from pandas.io.formats.format import DataFrameFormatter, format_percentiles @@ -109,7 +110,6 @@ # goal is to be able to define the docs close to function, while still being # able to share -_shared_docs: Dict[str, str] = dict() _shared_doc_kwargs = dict( axes="keywords for axes", klass="Series/DataFrame", diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index db7e9265ac21d..299b68c6e71e0 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -3,7 +3,7 @@ """ from collections import abc -from typing import Iterable, List, Mapping, Union, overload +from typing import TYPE_CHECKING, Iterable, List, Mapping, Union, overload import numpy as np @@ -12,14 +12,14 @@ from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries -from pandas import DataFrame, Index, MultiIndex, Series from pandas.core.arrays.categorical import ( factorize_from_iterable, factorize_from_iterables, ) import pandas.core.common as com -from pandas.core.generic import NDFrame from pandas.core.indexes.api import ( + Index, + MultiIndex, all_indexes_same, ensure_index, get_consensus_names, @@ -28,6 +28,9 @@ import pandas.core.indexes.base as ibase from pandas.core.internals import concatenate_block_managers +if TYPE_CHECKING: + from pandas import DataFrame + # --------------------------------------------------------------------- # Concatenate DataFrame objects @@ -291,7 +294,7 @@ class _Concatenator: def __init__( self, - objs, + objs: Union[Iterable[FrameOrSeries], Mapping[Label, FrameOrSeries]], axis=0, join: str = "outer", keys=None, @@ -302,7 +305,7 @@ def __init__( copy: bool = True, sort=False, ): - if isinstance(objs, (NDFrame, str)): + if isinstance(objs, (ABCSeries, ABCDataFrame, str)): raise TypeError( "first argument must be an iterable of pandas " f'objects, you passed an object of type "{type(objs).__name__}"' @@ -348,7 +351,7 @@ def __init__( # consolidate data & figure out what our result ndim is going to be ndims = set() for obj in objs: - if not isinstance(obj, (Series, DataFrame)): + if not isinstance(obj, (ABCSeries, ABCDataFrame)): msg = ( f"cannot concatenate object of type '{type(obj)}'; " "only Series and DataFrame objs are valid" @@ -374,7 +377,7 @@ def __init__( # filter out the empties if we have not multi-index possibilities # note to keep empty Series as it affect to result columns / name non_empties = [ - obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, Series) + obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, ABCSeries) ] if len(non_empties) and ( @@ -388,15 +391,15 @@ def __init__( self.objs = objs # Standardize axis parameter to int - if isinstance(sample, Series): - axis = DataFrame._get_axis_number(axis) + if isinstance(sample, ABCSeries): + axis = sample._constructor_expanddim._get_axis_number(axis) else: axis = sample._get_axis_number(axis) # Need to flip BlockManager axis in the DataFrame special case self._is_frame = isinstance(sample, ABCDataFrame) if self._is_frame: - axis = DataFrame._get_block_manager_axis(axis) + axis = sample._get_block_manager_axis(axis) self._is_series = isinstance(sample, ABCSeries) if not 0 <= axis <= sample.ndim: @@ -543,7 +546,7 @@ def _get_concat_axis(self) -> Index: num = 0 has_names = False for i, x in enumerate(self.objs): - if not isinstance(x, Series): + if not isinstance(x, ABCSeries): raise TypeError( f"Cannot concatenate type 'Series' with " f"object of type '{type(x).__name__}'" diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 845f6b67693f4..cd0619738677d 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -11,13 +11,13 @@ from pandas.core.arrays import Categorical import pandas.core.common as com -from pandas.core.frame import DataFrame, _shared_docs from pandas.core.indexes.api import Index, MultiIndex from pandas.core.reshape.concat import concat +from pandas.core.shared_docs import _shared_docs from pandas.core.tools.numeric import to_numeric if TYPE_CHECKING: - from pandas import Series # noqa: F401 + from pandas import DataFrame, Series # noqa: F401 @Appender( @@ -25,13 +25,13 @@ % dict(caller="pd.melt(df, ", versionadded="", other="DataFrame.melt") ) def melt( - frame: DataFrame, + frame: "DataFrame", id_vars=None, value_vars=None, var_name=None, value_name="value", col_level=None, -) -> DataFrame: +) -> "DataFrame": # TODO: what about the existing index? # If multiindex, gather names of columns on all level for checking presence # of `id_vars` and `value_vars` @@ -125,7 +125,7 @@ def melt( @deprecate_kwarg(old_arg_name="label", new_arg_name=None) -def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFrame: +def lreshape(data: "DataFrame", groups, dropna: bool = True, label=None) -> "DataFrame": """ Reshape long-format data to wide. Generalized inverse of DataFrame.pivot @@ -195,8 +195,8 @@ def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFr def wide_to_long( - df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+" -) -> DataFrame: + df: "DataFrame", stubnames, i, j, sep: str = "", suffix: str = r"\d+" +) -> "DataFrame": r""" Wide panel to long format. Less flexible but more user-friendly than melt. diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py new file mode 100644 index 0000000000000..1894f551afea5 --- /dev/null +++ b/pandas/core/shared_docs.py @@ -0,0 +1,102 @@ +from typing import Dict + +_shared_docs: Dict[str, str] = dict() + + +_shared_docs[ + "melt" +] = """ + Unpivot a DataFrame from wide to long format, optionally leaving identifiers set. + + This function is useful to massage a DataFrame into a format where one + or more columns are identifier variables (`id_vars`), while all other + columns, considered measured variables (`value_vars`), are "unpivoted" to + the row axis, leaving just two non-identifier columns, 'variable' and + 'value'. + %(versionadded)s + Parameters + ---------- + id_vars : tuple, list, or ndarray, optional + Column(s) to use as identifier variables. + value_vars : tuple, list, or ndarray, optional + Column(s) to unpivot. If not specified, uses all columns that + are not set as `id_vars`. + var_name : scalar + Name to use for the 'variable' column. If None it uses + ``frame.columns.name`` or 'variable'. + value_name : scalar, default 'value' + Name to use for the 'value' column. + col_level : int or str, optional + If columns are a MultiIndex then use this level to melt. + + Returns + ------- + DataFrame + Unpivoted DataFrame. + + See Also + -------- + %(other)s : Identical method. + pivot_table : Create a spreadsheet-style pivot table as a DataFrame. + DataFrame.pivot : Return reshaped DataFrame organized + by given index / column values. + DataFrame.explode : Explode a DataFrame from list-like + columns to long format. + + Examples + -------- + >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, + ... 'B': {0: 1, 1: 3, 2: 5}, + ... 'C': {0: 2, 1: 4, 2: 6}}) + >>> df + A B C + 0 a 1 2 + 1 b 3 4 + 2 c 5 6 + + >>> %(caller)sid_vars=['A'], value_vars=['B']) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + + >>> %(caller)sid_vars=['A'], value_vars=['B', 'C']) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + 3 a C 2 + 4 b C 4 + 5 c C 6 + + The names of 'variable' and 'value' columns can be customized: + + >>> %(caller)sid_vars=['A'], value_vars=['B'], + ... var_name='myVarname', value_name='myValname') + A myVarname myValname + 0 a B 1 + 1 b B 3 + 2 c B 5 + + If you have multi-index columns: + + >>> df.columns = [list('ABC'), list('DEF')] + >>> df + A B C + D E F + 0 a 1 2 + 1 b 3 4 + 2 c 5 6 + + >>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B']) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + + >>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')]) + (A, D) variable_0 variable_1 value + 0 a B E 1 + 1 b B E 3 + 2 c B E 5 + """