pandas-dev
diff --git a/‎.pre-commit-config.yaml
Lines changed: 37 additions & 5 deletions b/‎.pre-commit-config.yaml
Lines changed: 37 additions & 5 deletions
diff --git a/‎ci/code_checks.sh
Lines changed: 4 additions & 29 deletions b/‎ci/code_checks.sh
Lines changed: 4 additions & 29 deletions
diff --git a/‎doc/source/whatsnew/v1.1.4.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/v1.1.4.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/whatsnew/v1.2.0.rst
Lines changed: 3 additions & 0 deletions b/‎doc/source/whatsnew/v1.2.0.rst
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/core/aggregation.py
Lines changed: 15 additions & 46 deletions b/‎pandas/core/aggregation.py
Lines changed: 15 additions & 46 deletions
diff --git a/‎pandas/core/arrays/datetimelike.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/arrays/datetimelike.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/arrays/datetimes.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/arrays/datetimes.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/dtypes/generic.py
Lines changed: 1 addition & 0 deletions b/‎pandas/core/dtypes/generic.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎pandas/core/frame.py
Lines changed: 3 additions & 2 deletions b/‎pandas/core/frame.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎pandas/core/generic.py
Lines changed: 3 additions & 1 deletion b/‎pandas/core/generic.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎pandas/core/indexes/base.py
Lines changed: 27 additions & 12 deletions b/‎pandas/core/indexes/base.py
Lines changed: 27 additions & 12 deletions
diff --git a/‎pandas/core/indexes/category.py
Lines changed: 7 additions & 3 deletions b/‎pandas/core/indexes/category.py
Lines changed: 7 additions & 3 deletions
@@ -56,12 +56,44 @@ repos:
     -   id: incorrect-sphinx-directives
         name: Check for incorrect Sphinx directives
         language: pygrep
-        entry: >-
-            \.\. (autosummary|contents|currentmodule|deprecated
-            |function|image|important|include|ipython|literalinclude
-            |math|module|note|raw|seealso|toctree|versionadded
-            |versionchanged|warning):[^:]
+        entry: |
+            (?x)
+            # Check for cases of e.g. .. warning: instead of .. warning::
+            \.\.\ (
+                autosummary|contents|currentmodule|deprecated|
+                function|image|important|include|ipython|literalinclude|
+                math|module|note|raw|seealso|toctree|versionadded|
+                versionchanged|warning
+            ):[^:]
         files: \.(py|pyx|rst)$
+    -   id: non-standard-imports
+        name: Check for non-standard imports
+        language: pygrep
+        entry: |
+            (?x)
+            # Check for imports from pandas.core.common instead of `import pandas.core.common as com`
+            from\ pandas\.core\.common\ import|
+            from\ pandas\.core\ import\ common|
+
+            # Check for imports from collections.abc instead of `from collections import abc`
+            from\ collections\.abc\ import|
+
+            from\ numpy\ import\ nan
+        types: [python]
+    -   id: non-standard-imports-in-tests
+        name: Check for non-standard imports in test suite
+        language: pygrep
+        entry: |
+            (?x)
+            # Check for imports from pandas._testing instead of `import pandas._testing as tm`
+            from\ pandas\._testing\ import|
+            from\ pandas\ import\ _testing\ as\ tm|
+
+            # No direct imports from conftest
+            conftest\ import|
+            import\ conftest
+        types: [python]
+        files: ^pandas/tests/
     -   id: incorrect-code-directives
         name: Check for incorrect code block or IPython directives
         language: pygrep
 
@@ -110,31 +110,6 @@ fi
 ### PATTERNS ###
 if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
 
-    # Check for imports from pandas.core.common instead of `import pandas.core.common as com`
-    # Check for imports from collections.abc instead of `from collections import abc`
-    MSG='Check for non-standard imports' ; echo $MSG
-    invgrep -R --include="*.py*" -E "from pandas.core.common import" pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-    invgrep -R --include="*.py*" -E "from pandas.core import common" pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-    invgrep -R --include="*.py*" -E "from collections.abc import" pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-    invgrep -R --include="*.py*" -E "from numpy import nan" pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    # Checks for test suite
-    # Check for imports from pandas._testing instead of `import pandas._testing as tm`
-    invgrep -R --include="*.py*" -E "from pandas._testing import" pandas/tests
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-    invgrep -R --include="*.py*" -E "from pandas import _testing as tm" pandas/tests
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    # No direct imports from conftest
-    invgrep -R --include="*.py*" -E "conftest import" pandas/tests
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-    invgrep -R --include="*.py*" -E "import conftest" pandas/tests
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
     MSG='Check for use of exec' ; echo $MSG
     invgrep -R --include="*.py*" -E "[^a-zA-Z0-9_]exec\(" pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
@@ -203,10 +178,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Check for inconsistent use of pandas namespace in tests' ; echo $MSG
-    check_namespace "Series"
-    RET=$(($RET + $?))
-    check_namespace "DataFrame"
-    RET=$(($RET + $?))
+    for class in "Series" "DataFrame" "Index"; do
+        check_namespace ${class}
+        RET=$(($RET + $?))
+    done
     echo $MSG "DONE"
 fi
 
 
@@ -23,6 +23,7 @@ Fixed regressions
 - Fixed regression in :meth:`DataFrame.resample(...).apply(...)` raised ``AttributeError`` when input was a :class:`DataFrame` and only a :class:`Series` was evaluated (:issue:`36951`)
 - Fixed regression in :class:`PeriodDtype` comparing both equal and unequal to its string representation (:issue:`37265`)
 - Fixed regression in certain offsets (:meth:`pd.offsets.Day() <pandas.tseries.offsets.Day>` and below) no longer being hashable (:issue:`37267`)
+- Fixed regression in :class:`StataReader` which required ``chunksize`` to be manually set when using an iterator to read a dataset (:issue:`37280`)
 
 .. ---------------------------------------------------------------------------
 
 
@@ -371,6 +371,7 @@ Datetimelike
 - Bug in :meth:`DatetimeIndex.slice_locs` where ``datetime.date`` objects were not accepted (:issue:`34077`)
 - Bug in :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with ``datetime64``, ``timedelta64`` or ``Period`` dtype placement of ``NaT`` values being inconsistent with ``NumPy`` (:issue:`36176`, :issue:`36254`)
 - Inconsistency in :class:`DatetimeArray`, :class:`TimedeltaArray`, and :class:`PeriodArray`  setitem casting arrays of strings to datetimelike scalars but not scalar strings (:issue:`36261`)
+- Bug in :meth:`DatetimeArray.take` incorrectly allowing ``fill_value`` with a mismatched timezone (:issue:`37356`)
 - Bug in :class:`DatetimeIndex.shift` incorrectly raising when shifting empty indexes (:issue:`14811`)
 - :class:`Timestamp` and :class:`DatetimeIndex` comparisons between timezone-aware and timezone-naive objects now follow the standard library ``datetime`` behavior, returning ``True``/``False`` for ``!=``/``==`` and raising for inequality comparisons (:issue:`28507`)
 - Bug in :meth:`DatetimeIndex.equals` and :meth:`TimedeltaIndex.equals` incorrectly considering ``int64`` indexes as equal (:issue:`36744`)
@@ -433,6 +434,7 @@ Indexing
 - Bug in indexing with boolean masks on datetime-like values sometimes returning a view instead of a copy (:issue:`36210`)
 - Bug in :meth:`DataFrame.__getitem__` and :meth:`DataFrame.loc.__getitem__` with :class:`IntervalIndex` columns and a numeric indexer (:issue:`26490`)
 - Bug in :meth:`Series.loc.__getitem__` with a non-unique :class:`MultiIndex` and an empty-list indexer (:issue:`13691`)
+- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`MultiIndex` with a level named "0" (:issue:`37194`)
 
 Missing
 ^^^^^^^
@@ -505,6 +507,7 @@ Reshaping
 - Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`)
 - Bug in :meth:`DataFrame.agg` with ``func={'name':<FUNC>}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`)
 - Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`)
+- Bug in :func:`join` returned a non deterministic level-order for the resulting :class:`MultiIndex` (:issue:`36910`)
 -
 
 Sparse
 
@@ -31,7 +31,7 @@
 
 from pandas.core.dtypes.cast import is_nested_object
 from pandas.core.dtypes.common import is_dict_like, is_list_like
-from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
+from pandas.core.dtypes.generic import ABCDataFrame, ABCNDFrame, ABCSeries
 
 from pandas.core.base import DataError, SpecificationError
 import pandas.core.common as com
@@ -621,58 +621,27 @@ def aggregate(obj, arg: AggFuncType, *args, **kwargs):
         # set the final keys
         keys = list(arg.keys())
 
-        # combine results
-
-        def is_any_series() -> bool:
-            # return a boolean if we have *any* nested series
-            return any(isinstance(r, ABCSeries) for r in results.values())
-
-        def is_any_frame() -> bool:
-            # return a boolean if we have *any* nested series
-            return any(isinstance(r, ABCDataFrame) for r in results.values())
-
-        if isinstance(results, list):
-            return concat(results, keys=keys, axis=1, sort=True), True
-
-        elif is_any_frame():
-            # we have a dict of DataFrames
-            # return a MI DataFrame
+        # Avoid making two isinstance calls in all and any below
+        is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]
 
+        # combine results
+        if all(is_ndframe):
             keys_to_use = [k for k in keys if not results[k].empty]
             # Have to check, if at least one DataFrame is not empty.
             keys_to_use = keys_to_use if keys_to_use != [] else keys
-            return (
-                concat([results[k] for k in keys_to_use], keys=keys_to_use, axis=1),
-                True,
+            axis = 0 if isinstance(obj, ABCSeries) else 1
+            result = concat({k: results[k] for k in keys_to_use}, axis=axis)
+        elif any(is_ndframe):
+            # There is a mix of NDFrames and scalars
+            raise ValueError(
+                "cannot perform both aggregation "
+                "and transformation operations "
+                "simultaneously"
             )
+        else:
+            from pandas import Series
 
-        elif isinstance(obj, ABCSeries) and is_any_series():
-
-            # we have a dict of Series
-            # return a MI Series
-            try:
-                result = concat(results)
-            except TypeError as err:
-                # we want to give a nice error here if
-                # we have non-same sized objects, so
-                # we don't automatically broadcast
-
-                raise ValueError(
-                    "cannot perform both aggregation "
-                    "and transformation operations "
-                    "simultaneously"
-                ) from err
-
-            return result, True
-
-        # fall thru
-        from pandas import DataFrame, Series
-
-        try:
-            result = DataFrame(results)
-        except ValueError:
             # we have a dict of scalars
-
             # GH 36212 use name only if obj is a series
             if obj.ndim == 1:
                 obj = cast("Series", obj)
 
@@ -480,7 +480,7 @@ def _validate_fill_value(self, fill_value):
             fill_value = self._validate_scalar(fill_value)
         except TypeError as err:
             raise ValueError(msg) from err
-        return self._unbox(fill_value)
+        return self._unbox(fill_value, setitem=True)
 
     def _validate_shift_value(self, fill_value):
         # TODO(2.0): once this deprecation is enforced, use _validate_fill_value
 
@@ -473,7 +473,7 @@ def _check_compatible_with(self, other, setitem: bool = False):
         if setitem:
             # Stricter check for setitem vs comparison methods
             if not timezones.tz_compare(self.tz, other.tz):
-                raise ValueError(f"Timezones don't match. '{self.tz} != {other.tz}'")
+                raise ValueError(f"Timezones don't match. '{self.tz}' != '{other.tz}'")
 
     def _maybe_clear_freq(self):
         self._freq = None
 
@@ -53,6 +53,7 @@ def _check(cls, inst) -> bool:
     },
 )
 
+ABCNDFrame = create_pandas_abc_type("ABCNDFrame", "_typ", ("series", "dataframe"))
 ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series",))
 ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",))
 
 
@@ -7442,9 +7442,9 @@ def _gotitem(
 
     >>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})
             A    B
-    max   NaN  8.0
-    min   1.0  2.0
     sum  12.0  NaN
+    min   1.0  2.0
+    max   NaN  8.0
 
     Aggregate different functions over the columns and rename the index of the resulting
     DataFrame.
@@ -8547,6 +8547,7 @@ def count(self, axis=0, level=None, numeric_only=False):
         See Also
         --------
         Series.count: Number of non-NA elements in a Series.
+        DataFrame.value_counts: Count unique combinations of columns.
         DataFrame.shape: Number of DataFrame rows and columns (including NA
             elements).
         DataFrame.isna: Boolean same-sized DataFrame showing places of NA
 
@@ -3684,7 +3684,9 @@ class   animal   locomotion
         index = self.index
         if isinstance(index, MultiIndex):
             try:
-                loc, new_index = self.index.get_loc_level(key, drop_level=drop_level)
+                loc, new_index = self.index._get_loc_level(
+                    key, level=0, drop_level=drop_level
+                )
             except TypeError as e:
                 raise TypeError(f"Expected label or tuple of labels, got {key}") from e
         else:
 
@@ -220,7 +220,7 @@ def _outer_indexer(self, left, right):
 
     _typ = "index"
     _data: Union[ExtensionArray, np.ndarray]
-    _id: _Identity
+    _id: Optional[_Identity] = None
     _name: Label = None
     # MultiIndex.levels previously allowed setting the index name. We
     # don't allow this anymore, and raise if it happens rather than
@@ -541,10 +541,14 @@ def is_(self, other) -> bool:
         --------
         Index.identical : Works like ``Index.is_`` but also checks metadata.
         """
-        try:
-            return self._id is other._id
-        except AttributeError:
+        if self is other:
+            return True
+        elif not hasattr(other, "_id"):
             return False
+        elif com.any_none(self._id, other._id):
+            return False
+        else:
+            return self._id is other._id
 
     def _reset_identity(self) -> None:
         """
@@ -1552,12 +1556,19 @@ def droplevel(self, level=0):
 
         levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]
 
-        if len(level) == 0:
+        return self._drop_level_numbers(levnums)
+
+    def _drop_level_numbers(self, levnums: List[int]):
+        """
+        Drop MultiIndex levels by level _number_, not name.
+        """
+
+        if len(levnums) == 0:
             return self
-        if len(level) >= self.nlevels:
+        if len(levnums) >= self.nlevels:
             raise ValueError(
-                f"Cannot remove {len(level)} levels from an index with {self.nlevels} "
-                "levels: at least one level must be left."
+                f"Cannot remove {len(levnums)} levels from an index with "
+                f"{self.nlevels} levels: at least one level must be left."
             )
         # The two checks above guarantee that here self is a MultiIndex
         self = cast("MultiIndex", self)
@@ -3586,8 +3597,12 @@ def _join_multi(self, other, how, return_indexers=True):
         from pandas.core.reshape.merge import restore_dropped_levels_multijoin
 
         # figure out join names
-        self_names = set(com.not_none(*self.names))
-        other_names = set(com.not_none(*other.names))
+        self_names_list = list(com.not_none(*self.names))
+        other_names_list = list(com.not_none(*other.names))
+        self_names_order = self_names_list.index
+        other_names_order = other_names_list.index
+        self_names = set(self_names_list)
+        other_names = set(other_names_list)
         overlap = self_names & other_names
 
         # need at least 1 in common
@@ -3597,8 +3612,8 @@ def _join_multi(self, other, how, return_indexers=True):
         if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
 
             # Drop the non-matching levels from left and right respectively
-            ldrop_names = list(self_names - overlap)
-            rdrop_names = list(other_names - overlap)
+            ldrop_names = sorted(self_names - overlap, key=self_names_order)
+            rdrop_names = sorted(other_names - overlap, key=other_names_order)
 
             # if only the order differs
             if not len(ldrop_names + rdrop_names):
 
@@ -235,20 +235,23 @@ def _shallow_copy(self, values=None, name: Label = no_default):
 
         return super()._shallow_copy(values=values, name=name)
 
-    def _is_dtype_compat(self, other) -> bool:
+    def _is_dtype_compat(self, other) -> Categorical:
         """
         *this is an internal non-public method*
 
         provide a comparison between the dtype of self and other (coercing if
         needed)
 
+        Returns
+        -------
+        Categorical
+
         Raises
         ------
         TypeError if the dtypes are not compatible
         """
         if is_categorical_dtype(other):
-            if isinstance(other, CategoricalIndex):
-                other = other._values
+            other = extract_array(other)
             if not other.is_dtype_equal(self):
                 raise TypeError(
                     "categories must match existing categories when appending"
@@ -263,6 +266,7 @@ def _is_dtype_compat(self, other) -> bool:
                 raise TypeError(
                     "cannot append a non-category item to a CategoricalIndex"
                 )
+            other = other._values
 
         return other
Original file line number	Diff line number	Diff line change
`@@ -53,6 +53,7 @@ def _check(cls, inst) -> bool:`
`53`	`53`	`},`
`54`	`54`	`)`
`55`	`55`
	`56`	`+ABCNDFrame = create_pandas_abc_type("ABCNDFrame", "_typ", ("series", "dataframe"))`
`56`	`57`	`ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series",))`
`57`	`58`	`ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",))`
`58`	`59`