Skip to content

REF: implement _shared_docs to de-circularize dependencies #34837

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 1 addition & 99 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@
sanitize_index,
to_arrays,
)
from pandas.core.reshape.melt import melt
from pandas.core.series import Series
from pandas.core.sorting import ensure_key_mapped

Expand Down Expand Up @@ -7069,104 +7070,6 @@ def unstack(self, level=-1, fill_value=None):

return unstack(self, level, fill_value)

_shared_docs[
"melt"
] = """
Unpivot a DataFrame from wide to long format, optionally leaving identifiers set.

This function is useful to massage a DataFrame into a format where one
or more columns are identifier variables (`id_vars`), while all other
columns, considered measured variables (`value_vars`), are "unpivoted" to
the row axis, leaving just two non-identifier columns, 'variable' and
'value'.
%(versionadded)s
Parameters
----------
id_vars : tuple, list, or ndarray, optional
Column(s) to use as identifier variables.
value_vars : tuple, list, or ndarray, optional
Column(s) to unpivot. If not specified, uses all columns that
are not set as `id_vars`.
var_name : scalar
Name to use for the 'variable' column. If None it uses
``frame.columns.name`` or 'variable'.
value_name : scalar, default 'value'
Name to use for the 'value' column.
col_level : int or str, optional
If columns are a MultiIndex then use this level to melt.

Returns
-------
DataFrame
Unpivoted DataFrame.

See Also
--------
%(other)s : Identical method.
pivot_table : Create a spreadsheet-style pivot table as a DataFrame.
DataFrame.pivot : Return reshaped DataFrame organized
by given index / column values.
DataFrame.explode : Explode a DataFrame from list-like
columns to long format.

Examples
--------
>>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},
... 'B': {0: 1, 1: 3, 2: 5},
... 'C': {0: 2, 1: 4, 2: 6}})
>>> df
A B C
0 a 1 2
1 b 3 4
2 c 5 6

>>> %(caller)sid_vars=['A'], value_vars=['B'])
A variable value
0 a B 1
1 b B 3
2 c B 5

>>> %(caller)sid_vars=['A'], value_vars=['B', 'C'])
A variable value
0 a B 1
1 b B 3
2 c B 5
3 a C 2
4 b C 4
5 c C 6

The names of 'variable' and 'value' columns can be customized:

>>> %(caller)sid_vars=['A'], value_vars=['B'],
... var_name='myVarname', value_name='myValname')
A myVarname myValname
0 a B 1
1 b B 3
2 c B 5

If you have multi-index columns:

>>> df.columns = [list('ABC'), list('DEF')]
>>> df
A B C
D E F
0 a 1 2
1 b 3 4
2 c 5 6

>>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B'])
A variable value
0 a B 1
1 b B 3
2 c B 5

>>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')])
(A, D) variable_0 variable_1 value
0 a B E 1
1 b B E 3
2 c B E 5
"""

@Appender(
_shared_docs["melt"]
% dict(
Expand All @@ -7183,7 +7086,6 @@ def melt(
value_name="value",
col_level=None,
) -> "DataFrame":
from pandas.core.reshape.melt import melt

return melt(
self,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@
from pandas.core.internals import BlockManager
from pandas.core.missing import find_valid_index
from pandas.core.ops import _align_method_FRAME
from pandas.core.shared_docs import _shared_docs

from pandas.io.formats import format as fmt
from pandas.io.formats.format import DataFrameFormatter, format_percentiles
Expand All @@ -109,7 +110,6 @@

# goal is to be able to define the docs close to function, while still being
# able to share
_shared_docs: Dict[str, str] = dict()
_shared_doc_kwargs = dict(
axes="keywords for axes",
klass="Series/DataFrame",
Expand Down
25 changes: 14 additions & 11 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

from collections import abc
from typing import Iterable, List, Mapping, Union, overload
from typing import TYPE_CHECKING, Iterable, List, Mapping, Union, overload

import numpy as np

Expand All @@ -12,14 +12,14 @@
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries

from pandas import DataFrame, Index, MultiIndex, Series
from pandas.core.arrays.categorical import (
factorize_from_iterable,
factorize_from_iterables,
)
import pandas.core.common as com
from pandas.core.generic import NDFrame
from pandas.core.indexes.api import (
Index,
MultiIndex,
all_indexes_same,
ensure_index,
get_consensus_names,
Expand All @@ -28,6 +28,9 @@
import pandas.core.indexes.base as ibase
from pandas.core.internals import concatenate_block_managers

if TYPE_CHECKING:
from pandas import DataFrame

# ---------------------------------------------------------------------
# Concatenate DataFrame objects

Expand Down Expand Up @@ -291,7 +294,7 @@ class _Concatenator:

def __init__(
self,
objs,
objs: Union[Iterable[FrameOrSeries], Mapping[Label, FrameOrSeries]],
axis=0,
join: str = "outer",
keys=None,
Expand All @@ -302,7 +305,7 @@ def __init__(
copy: bool = True,
sort=False,
):
if isinstance(objs, (NDFrame, str)):
if isinstance(objs, (ABCSeries, ABCDataFrame, str)):
raise TypeError(
"first argument must be an iterable of pandas "
f'objects, you passed an object of type "{type(objs).__name__}"'
Expand Down Expand Up @@ -348,7 +351,7 @@ def __init__(
# consolidate data & figure out what our result ndim is going to be
ndims = set()
for obj in objs:
if not isinstance(obj, (Series, DataFrame)):
if not isinstance(obj, (ABCSeries, ABCDataFrame)):
msg = (
f"cannot concatenate object of type '{type(obj)}'; "
"only Series and DataFrame objs are valid"
Expand All @@ -374,7 +377,7 @@ def __init__(
# filter out the empties if we have not multi-index possibilities
# note to keep empty Series as it affect to result columns / name
non_empties = [
obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, Series)
obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, ABCSeries)
]

if len(non_empties) and (
Expand All @@ -388,15 +391,15 @@ def __init__(
self.objs = objs

# Standardize axis parameter to int
if isinstance(sample, Series):
axis = DataFrame._get_axis_number(axis)
if isinstance(sample, ABCSeries):
axis = sample._constructor_expanddim._get_axis_number(axis)
else:
axis = sample._get_axis_number(axis)

# Need to flip BlockManager axis in the DataFrame special case
self._is_frame = isinstance(sample, ABCDataFrame)
if self._is_frame:
axis = DataFrame._get_block_manager_axis(axis)
axis = sample._get_block_manager_axis(axis)

self._is_series = isinstance(sample, ABCSeries)
if not 0 <= axis <= sample.ndim:
Expand Down Expand Up @@ -543,7 +546,7 @@ def _get_concat_axis(self) -> Index:
num = 0
has_names = False
for i, x in enumerate(self.objs):
if not isinstance(x, Series):
if not isinstance(x, ABCSeries):
raise TypeError(
f"Cannot concatenate type 'Series' with "
f"object of type '{type(x).__name__}'"
Expand Down
14 changes: 7 additions & 7 deletions pandas/core/reshape/melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,27 @@

from pandas.core.arrays import Categorical
import pandas.core.common as com
from pandas.core.frame import DataFrame, _shared_docs
from pandas.core.indexes.api import Index, MultiIndex
from pandas.core.reshape.concat import concat
from pandas.core.shared_docs import _shared_docs
from pandas.core.tools.numeric import to_numeric

if TYPE_CHECKING:
from pandas import Series # noqa: F401
from pandas import DataFrame, Series # noqa: F401


@Appender(
_shared_docs["melt"]
% dict(caller="pd.melt(df, ", versionadded="", other="DataFrame.melt")
)
def melt(
frame: DataFrame,
frame: "DataFrame",
id_vars=None,
value_vars=None,
var_name=None,
value_name="value",
col_level=None,
) -> DataFrame:
) -> "DataFrame":
# TODO: what about the existing index?
# If multiindex, gather names of columns on all level for checking presence
# of `id_vars` and `value_vars`
Expand Down Expand Up @@ -125,7 +125,7 @@ def melt(


@deprecate_kwarg(old_arg_name="label", new_arg_name=None)
def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFrame:
def lreshape(data: "DataFrame", groups, dropna: bool = True, label=None) -> "DataFrame":
"""
Reshape long-format data to wide. Generalized inverse of DataFrame.pivot

Expand Down Expand Up @@ -195,8 +195,8 @@ def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFr


def wide_to_long(
df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+"
) -> DataFrame:
df: "DataFrame", stubnames, i, j, sep: str = "", suffix: str = r"\d+"
) -> "DataFrame":
r"""
Wide panel to long format. Less flexible but more user-friendly than melt.

Expand Down
102 changes: 102 additions & 0 deletions pandas/core/shared_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
from typing import Dict

_shared_docs: Dict[str, str] = dict()


_shared_docs[
"melt"
] = """
Unpivot a DataFrame from wide to long format, optionally leaving identifiers set.

This function is useful to massage a DataFrame into a format where one
or more columns are identifier variables (`id_vars`), while all other
columns, considered measured variables (`value_vars`), are "unpivoted" to
the row axis, leaving just two non-identifier columns, 'variable' and
'value'.
%(versionadded)s
Parameters
----------
id_vars : tuple, list, or ndarray, optional
Column(s) to use as identifier variables.
value_vars : tuple, list, or ndarray, optional
Column(s) to unpivot. If not specified, uses all columns that
are not set as `id_vars`.
var_name : scalar
Name to use for the 'variable' column. If None it uses
``frame.columns.name`` or 'variable'.
value_name : scalar, default 'value'
Name to use for the 'value' column.
col_level : int or str, optional
If columns are a MultiIndex then use this level to melt.

Returns
-------
DataFrame
Unpivoted DataFrame.

See Also
--------
%(other)s : Identical method.
pivot_table : Create a spreadsheet-style pivot table as a DataFrame.
DataFrame.pivot : Return reshaped DataFrame organized
by given index / column values.
DataFrame.explode : Explode a DataFrame from list-like
columns to long format.

Examples
--------
>>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},
... 'B': {0: 1, 1: 3, 2: 5},
... 'C': {0: 2, 1: 4, 2: 6}})
>>> df
A B C
0 a 1 2
1 b 3 4
2 c 5 6

>>> %(caller)sid_vars=['A'], value_vars=['B'])
A variable value
0 a B 1
1 b B 3
2 c B 5

>>> %(caller)sid_vars=['A'], value_vars=['B', 'C'])
A variable value
0 a B 1
1 b B 3
2 c B 5
3 a C 2
4 b C 4
5 c C 6

The names of 'variable' and 'value' columns can be customized:

>>> %(caller)sid_vars=['A'], value_vars=['B'],
... var_name='myVarname', value_name='myValname')
A myVarname myValname
0 a B 1
1 b B 3
2 c B 5

If you have multi-index columns:

>>> df.columns = [list('ABC'), list('DEF')]
>>> df
A B C
D E F
0 a 1 2
1 b 3 4
2 c 5 6

>>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B'])
A variable value
0 a B 1
1 b B 3
2 c B 5

>>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')])
(A, D) variable_0 variable_1 value
0 a B E 1
1 b B E 3
2 c B E 5
"""