Skip to content

Commit 6f4e7ca

Browse files
Merge remote-tracking branch 'upstream/master' into GH36666
2 parents 2abd0ed + f0ecfd8 commit 6f4e7ca

File tree

105 files changed

+3548
-3434
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+3548
-3434
lines changed

.pre-commit-config.yaml

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,44 @@ repos:
5656
- id: incorrect-sphinx-directives
5757
name: Check for incorrect Sphinx directives
5858
language: pygrep
59-
entry: >-
60-
\.\. (autosummary|contents|currentmodule|deprecated
61-
|function|image|important|include|ipython|literalinclude
62-
|math|module|note|raw|seealso|toctree|versionadded
63-
|versionchanged|warning):[^:]
59+
entry: |
60+
(?x)
61+
# Check for cases of e.g. .. warning: instead of .. warning::
62+
\.\.\ (
63+
autosummary|contents|currentmodule|deprecated|
64+
function|image|important|include|ipython|literalinclude|
65+
math|module|note|raw|seealso|toctree|versionadded|
66+
versionchanged|warning
67+
):[^:]
6468
files: \.(py|pyx|rst)$
69+
- id: non-standard-imports
70+
name: Check for non-standard imports
71+
language: pygrep
72+
entry: |
73+
(?x)
74+
# Check for imports from pandas.core.common instead of `import pandas.core.common as com`
75+
from\ pandas\.core\.common\ import|
76+
from\ pandas\.core\ import\ common|
77+
78+
# Check for imports from collections.abc instead of `from collections import abc`
79+
from\ collections\.abc\ import|
80+
81+
from\ numpy\ import\ nan
82+
types: [python]
83+
- id: non-standard-imports-in-tests
84+
name: Check for non-standard imports in test suite
85+
language: pygrep
86+
entry: |
87+
(?x)
88+
# Check for imports from pandas._testing instead of `import pandas._testing as tm`
89+
from\ pandas\._testing\ import|
90+
from\ pandas\ import\ _testing\ as\ tm|
91+
92+
# No direct imports from conftest
93+
conftest\ import|
94+
import\ conftest
95+
types: [python]
96+
files: ^pandas/tests/
6597
- id: incorrect-code-directives
6698
name: Check for incorrect code block or IPython directives
6799
language: pygrep

ci/code_checks.sh

Lines changed: 4 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -110,31 +110,6 @@ fi
110110
### PATTERNS ###
111111
if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
112112

113-
# Check for imports from pandas.core.common instead of `import pandas.core.common as com`
114-
# Check for imports from collections.abc instead of `from collections import abc`
115-
MSG='Check for non-standard imports' ; echo $MSG
116-
invgrep -R --include="*.py*" -E "from pandas.core.common import" pandas
117-
RET=$(($RET + $?)) ; echo $MSG "DONE"
118-
invgrep -R --include="*.py*" -E "from pandas.core import common" pandas
119-
RET=$(($RET + $?)) ; echo $MSG "DONE"
120-
invgrep -R --include="*.py*" -E "from collections.abc import" pandas
121-
RET=$(($RET + $?)) ; echo $MSG "DONE"
122-
invgrep -R --include="*.py*" -E "from numpy import nan" pandas
123-
RET=$(($RET + $?)) ; echo $MSG "DONE"
124-
125-
# Checks for test suite
126-
# Check for imports from pandas._testing instead of `import pandas._testing as tm`
127-
invgrep -R --include="*.py*" -E "from pandas._testing import" pandas/tests
128-
RET=$(($RET + $?)) ; echo $MSG "DONE"
129-
invgrep -R --include="*.py*" -E "from pandas import _testing as tm" pandas/tests
130-
RET=$(($RET + $?)) ; echo $MSG "DONE"
131-
132-
# No direct imports from conftest
133-
invgrep -R --include="*.py*" -E "conftest import" pandas/tests
134-
RET=$(($RET + $?)) ; echo $MSG "DONE"
135-
invgrep -R --include="*.py*" -E "import conftest" pandas/tests
136-
RET=$(($RET + $?)) ; echo $MSG "DONE"
137-
138113
MSG='Check for use of exec' ; echo $MSG
139114
invgrep -R --include="*.py*" -E "[^a-zA-Z0-9_]exec\(" pandas
140115
RET=$(($RET + $?)) ; echo $MSG "DONE"
@@ -203,10 +178,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
203178
RET=$(($RET + $?)) ; echo $MSG "DONE"
204179

205180
MSG='Check for inconsistent use of pandas namespace in tests' ; echo $MSG
206-
check_namespace "Series"
207-
RET=$(($RET + $?))
208-
check_namespace "DataFrame"
209-
RET=$(($RET + $?))
181+
for class in "Series" "DataFrame" "Index"; do
182+
check_namespace ${class}
183+
RET=$(($RET + $?))
184+
done
210185
echo $MSG "DONE"
211186
fi
212187

doc/source/whatsnew/v1.1.4.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Fixed regressions
2323
- Fixed regression in :meth:`DataFrame.resample(...).apply(...)` raised ``AttributeError`` when input was a :class:`DataFrame` and only a :class:`Series` was evaluated (:issue:`36951`)
2424
- Fixed regression in :class:`PeriodDtype` comparing both equal and unequal to its string representation (:issue:`37265`)
2525
- Fixed regression in certain offsets (:meth:`pd.offsets.Day() <pandas.tseries.offsets.Day>` and below) no longer being hashable (:issue:`37267`)
26+
- Fixed regression in :class:`StataReader` which required ``chunksize`` to be manually set when using an iterator to read a dataset (:issue:`37280`)
2627

2728
.. ---------------------------------------------------------------------------
2829

doc/source/whatsnew/v1.2.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,7 @@ Datetimelike
371371
- Bug in :meth:`DatetimeIndex.slice_locs` where ``datetime.date`` objects were not accepted (:issue:`34077`)
372372
- Bug in :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with ``datetime64``, ``timedelta64`` or ``Period`` dtype placement of ``NaT`` values being inconsistent with ``NumPy`` (:issue:`36176`, :issue:`36254`)
373373
- Inconsistency in :class:`DatetimeArray`, :class:`TimedeltaArray`, and :class:`PeriodArray` setitem casting arrays of strings to datetimelike scalars but not scalar strings (:issue:`36261`)
374+
- Bug in :meth:`DatetimeArray.take` incorrectly allowing ``fill_value`` with a mismatched timezone (:issue:`37356`)
374375
- Bug in :class:`DatetimeIndex.shift` incorrectly raising when shifting empty indexes (:issue:`14811`)
375376
- :class:`Timestamp` and :class:`DatetimeIndex` comparisons between timezone-aware and timezone-naive objects now follow the standard library ``datetime`` behavior, returning ``True``/``False`` for ``!=``/``==`` and raising for inequality comparisons (:issue:`28507`)
376377
- Bug in :meth:`DatetimeIndex.equals` and :meth:`TimedeltaIndex.equals` incorrectly considering ``int64`` indexes as equal (:issue:`36744`)
@@ -433,6 +434,7 @@ Indexing
433434
- Bug in indexing with boolean masks on datetime-like values sometimes returning a view instead of a copy (:issue:`36210`)
434435
- Bug in :meth:`DataFrame.__getitem__` and :meth:`DataFrame.loc.__getitem__` with :class:`IntervalIndex` columns and a numeric indexer (:issue:`26490`)
435436
- Bug in :meth:`Series.loc.__getitem__` with a non-unique :class:`MultiIndex` and an empty-list indexer (:issue:`13691`)
437+
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`MultiIndex` with a level named "0" (:issue:`37194`)
436438

437439
Missing
438440
^^^^^^^
@@ -505,6 +507,7 @@ Reshaping
505507
- Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`)
506508
- Bug in :meth:`DataFrame.agg` with ``func={'name':<FUNC>}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`)
507509
- Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`)
510+
- Bug in :func:`join` returned a non deterministic level-order for the resulting :class:`MultiIndex` (:issue:`36910`)
508511
-
509512

510513
Sparse

pandas/core/aggregation.py

Lines changed: 15 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
from pandas.core.dtypes.cast import is_nested_object
3333
from pandas.core.dtypes.common import is_dict_like, is_list_like
34-
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
34+
from pandas.core.dtypes.generic import ABCDataFrame, ABCNDFrame, ABCSeries
3535

3636
from pandas.core.base import DataError, SpecificationError
3737
import pandas.core.common as com
@@ -621,58 +621,27 @@ def aggregate(obj, arg: AggFuncType, *args, **kwargs):
621621
# set the final keys
622622
keys = list(arg.keys())
623623

624-
# combine results
625-
626-
def is_any_series() -> bool:
627-
# return a boolean if we have *any* nested series
628-
return any(isinstance(r, ABCSeries) for r in results.values())
629-
630-
def is_any_frame() -> bool:
631-
# return a boolean if we have *any* nested series
632-
return any(isinstance(r, ABCDataFrame) for r in results.values())
633-
634-
if isinstance(results, list):
635-
return concat(results, keys=keys, axis=1, sort=True), True
636-
637-
elif is_any_frame():
638-
# we have a dict of DataFrames
639-
# return a MI DataFrame
624+
# Avoid making two isinstance calls in all and any below
625+
is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]
640626

627+
# combine results
628+
if all(is_ndframe):
641629
keys_to_use = [k for k in keys if not results[k].empty]
642630
# Have to check, if at least one DataFrame is not empty.
643631
keys_to_use = keys_to_use if keys_to_use != [] else keys
644-
return (
645-
concat([results[k] for k in keys_to_use], keys=keys_to_use, axis=1),
646-
True,
632+
axis = 0 if isinstance(obj, ABCSeries) else 1
633+
result = concat({k: results[k] for k in keys_to_use}, axis=axis)
634+
elif any(is_ndframe):
635+
# There is a mix of NDFrames and scalars
636+
raise ValueError(
637+
"cannot perform both aggregation "
638+
"and transformation operations "
639+
"simultaneously"
647640
)
641+
else:
642+
from pandas import Series
648643

649-
elif isinstance(obj, ABCSeries) and is_any_series():
650-
651-
# we have a dict of Series
652-
# return a MI Series
653-
try:
654-
result = concat(results)
655-
except TypeError as err:
656-
# we want to give a nice error here if
657-
# we have non-same sized objects, so
658-
# we don't automatically broadcast
659-
660-
raise ValueError(
661-
"cannot perform both aggregation "
662-
"and transformation operations "
663-
"simultaneously"
664-
) from err
665-
666-
return result, True
667-
668-
# fall thru
669-
from pandas import DataFrame, Series
670-
671-
try:
672-
result = DataFrame(results)
673-
except ValueError:
674644
# we have a dict of scalars
675-
676645
# GH 36212 use name only if obj is a series
677646
if obj.ndim == 1:
678647
obj = cast("Series", obj)

pandas/core/arrays/datetimelike.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,7 @@ def _validate_fill_value(self, fill_value):
480480
fill_value = self._validate_scalar(fill_value)
481481
except TypeError as err:
482482
raise ValueError(msg) from err
483-
return self._unbox(fill_value)
483+
return self._unbox(fill_value, setitem=True)
484484

485485
def _validate_shift_value(self, fill_value):
486486
# TODO(2.0): once this deprecation is enforced, use _validate_fill_value

pandas/core/arrays/datetimes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,7 @@ def _check_compatible_with(self, other, setitem: bool = False):
473473
if setitem:
474474
# Stricter check for setitem vs comparison methods
475475
if not timezones.tz_compare(self.tz, other.tz):
476-
raise ValueError(f"Timezones don't match. '{self.tz} != {other.tz}'")
476+
raise ValueError(f"Timezones don't match. '{self.tz}' != '{other.tz}'")
477477

478478
def _maybe_clear_freq(self):
479479
self._freq = None

pandas/core/dtypes/generic.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def _check(cls, inst) -> bool:
5353
},
5454
)
5555

56+
ABCNDFrame = create_pandas_abc_type("ABCNDFrame", "_typ", ("series", "dataframe"))
5657
ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series",))
5758
ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",))
5859

pandas/core/frame.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7442,9 +7442,9 @@ def _gotitem(
74427442
74437443
>>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})
74447444
A B
7445-
max NaN 8.0
7446-
min 1.0 2.0
74477445
sum 12.0 NaN
7446+
min 1.0 2.0
7447+
max NaN 8.0
74487448
74497449
Aggregate different functions over the columns and rename the index of the resulting
74507450
DataFrame.
@@ -8547,6 +8547,7 @@ def count(self, axis=0, level=None, numeric_only=False):
85478547
See Also
85488548
--------
85498549
Series.count: Number of non-NA elements in a Series.
8550+
DataFrame.value_counts: Count unique combinations of columns.
85508551
DataFrame.shape: Number of DataFrame rows and columns (including NA
85518552
elements).
85528553
DataFrame.isna: Boolean same-sized DataFrame showing places of NA

pandas/core/generic.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3684,7 +3684,9 @@ class animal locomotion
36843684
index = self.index
36853685
if isinstance(index, MultiIndex):
36863686
try:
3687-
loc, new_index = self.index.get_loc_level(key, drop_level=drop_level)
3687+
loc, new_index = self.index._get_loc_level(
3688+
key, level=0, drop_level=drop_level
3689+
)
36883690
except TypeError as e:
36893691
raise TypeError(f"Expected label or tuple of labels, got {key}") from e
36903692
else:

pandas/core/indexes/base.py

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ def _outer_indexer(self, left, right):
220220

221221
_typ = "index"
222222
_data: Union[ExtensionArray, np.ndarray]
223-
_id: _Identity
223+
_id: Optional[_Identity] = None
224224
_name: Label = None
225225
# MultiIndex.levels previously allowed setting the index name. We
226226
# don't allow this anymore, and raise if it happens rather than
@@ -541,10 +541,14 @@ def is_(self, other) -> bool:
541541
--------
542542
Index.identical : Works like ``Index.is_`` but also checks metadata.
543543
"""
544-
try:
545-
return self._id is other._id
546-
except AttributeError:
544+
if self is other:
545+
return True
546+
elif not hasattr(other, "_id"):
547547
return False
548+
elif com.any_none(self._id, other._id):
549+
return False
550+
else:
551+
return self._id is other._id
548552

549553
def _reset_identity(self) -> None:
550554
"""
@@ -1552,12 +1556,19 @@ def droplevel(self, level=0):
15521556

15531557
levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]
15541558

1555-
if len(level) == 0:
1559+
return self._drop_level_numbers(levnums)
1560+
1561+
def _drop_level_numbers(self, levnums: List[int]):
1562+
"""
1563+
Drop MultiIndex levels by level _number_, not name.
1564+
"""
1565+
1566+
if len(levnums) == 0:
15561567
return self
1557-
if len(level) >= self.nlevels:
1568+
if len(levnums) >= self.nlevels:
15581569
raise ValueError(
1559-
f"Cannot remove {len(level)} levels from an index with {self.nlevels} "
1560-
"levels: at least one level must be left."
1570+
f"Cannot remove {len(levnums)} levels from an index with "
1571+
f"{self.nlevels} levels: at least one level must be left."
15611572
)
15621573
# The two checks above guarantee that here self is a MultiIndex
15631574
self = cast("MultiIndex", self)
@@ -3586,8 +3597,12 @@ def _join_multi(self, other, how, return_indexers=True):
35863597
from pandas.core.reshape.merge import restore_dropped_levels_multijoin
35873598

35883599
# figure out join names
3589-
self_names = set(com.not_none(*self.names))
3590-
other_names = set(com.not_none(*other.names))
3600+
self_names_list = list(com.not_none(*self.names))
3601+
other_names_list = list(com.not_none(*other.names))
3602+
self_names_order = self_names_list.index
3603+
other_names_order = other_names_list.index
3604+
self_names = set(self_names_list)
3605+
other_names = set(other_names_list)
35913606
overlap = self_names & other_names
35923607

35933608
# need at least 1 in common
@@ -3597,8 +3612,8 @@ def _join_multi(self, other, how, return_indexers=True):
35973612
if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
35983613

35993614
# Drop the non-matching levels from left and right respectively
3600-
ldrop_names = list(self_names - overlap)
3601-
rdrop_names = list(other_names - overlap)
3615+
ldrop_names = sorted(self_names - overlap, key=self_names_order)
3616+
rdrop_names = sorted(other_names - overlap, key=other_names_order)
36023617

36033618
# if only the order differs
36043619
if not len(ldrop_names + rdrop_names):

pandas/core/indexes/category.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -235,20 +235,23 @@ def _shallow_copy(self, values=None, name: Label = no_default):
235235

236236
return super()._shallow_copy(values=values, name=name)
237237

238-
def _is_dtype_compat(self, other) -> bool:
238+
def _is_dtype_compat(self, other) -> Categorical:
239239
"""
240240
*this is an internal non-public method*
241241
242242
provide a comparison between the dtype of self and other (coercing if
243243
needed)
244244
245+
Returns
246+
-------
247+
Categorical
248+
245249
Raises
246250
------
247251
TypeError if the dtypes are not compatible
248252
"""
249253
if is_categorical_dtype(other):
250-
if isinstance(other, CategoricalIndex):
251-
other = other._values
254+
other = extract_array(other)
252255
if not other.is_dtype_equal(self):
253256
raise TypeError(
254257
"categories must match existing categories when appending"
@@ -263,6 +266,7 @@ def _is_dtype_compat(self, other) -> bool:
263266
raise TypeError(
264267
"cannot append a non-category item to a CategoricalIndex"
265268
)
269+
other = other._values
266270

267271
return other
268272

0 commit comments

Comments
 (0)