From 1ba93e4c91eaf8ed8f7829ddce8df7a067e60f1d Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 30 Dec 2022 10:00:40 +0100
Subject: [PATCH 1/7] BUG: DataFrame constructor not tracking reference if
 called with df or mgr

---
 doc/source/whatsnew/v2.0.0.rst              |  4 ++++
 pandas/core/frame.py                        |  3 +++
 pandas/tests/copy_view/test_constructors.py | 25 +++++++++++++++++++++
 3 files changed, 32 insertions(+)
 create mode 100644 pandas/tests/copy_view/test_constructors.py

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index b1387e9717079..ca1b4bb093dd3 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -878,6 +878,10 @@ Indexing
 - Bug in :meth:`DataFrame.compare` does not recognize differences when comparing ``NA`` with value in nullable dtypes (:issue:`48939`)
 -
 
+Copy on write
+^^^^^^^^^^^^^
+- Bug in :class:`DataFrame` constructor not tracking reference if called with another :class:`DataFrame` (:issue:``)
+
 Missing
 ^^^^^^^
 - Bug in :meth:`Index.equals` raising ``TypeError`` when :class:`Index` consists of tuples that contain ``NA`` (:issue:`48446`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index e671f45216968..f1e119b81a6d5 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -205,6 +205,7 @@
     to_arrays,
     treat_as_nested,
 )
+from pandas.core.internals.managers import _using_copy_on_write
 from pandas.core.reshape.melt import melt
 from pandas.core.series import Series
 from pandas.core.shared_docs import _shared_docs
@@ -643,6 +644,8 @@ def __init__(
             # -> use fastpath (without checking Manager type)
             if index is None and columns is None and dtype is None and not copy:
                 # GH#33357 fastpath
+                if _using_copy_on_write():
+                    data = data.copy(deep=False)
                 NDFrame.__init__(self, data)
                 return
 
diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py
new file mode 100644
index 0000000000000..e169bef916528
--- /dev/null
+++ b/pandas/tests/copy_view/test_constructors.py
@@ -0,0 +1,25 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+from pandas.tests.copy_view.util import get_array
+
+
+@pytest.mark.parametrize("columns", [None, ["a"]])
+@pytest.mark.parametrize("func", [lambda x: x, lambda x: x._mgr])
+def test_dataframe_constructor_mgr(using_copy_on_write, func, columns):
+    df = DataFrame({"a": [1, 2, 3]})
+    df_orig = df.copy()
+
+    new_df = DataFrame(func(df))
+
+    assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
+    new_df.iloc[0] = 100
+
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
+        tm.assert_frame_equal(df, df_orig)
+    else:
+        assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
+        tm.assert_frame_equal(df, new_df)

From 24912bda509955c489ad90090175e48f11133e31 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 30 Dec 2022 10:02:13 +0100
Subject: [PATCH 2/7] Add gh ref

---
 doc/source/whatsnew/v2.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index ca1b4bb093dd3..7a87ea0b3872b 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -880,7 +880,7 @@ Indexing
 
 Copy on write
 ^^^^^^^^^^^^^
-- Bug in :class:`DataFrame` constructor not tracking reference if called with another :class:`DataFrame` (:issue:``)
+- Bug in :class:`DataFrame` constructor not tracking reference if called with another :class:`DataFrame` (:issue:`50499`)
 
 Missing
 ^^^^^^^

From 11a1db854d28396eca29c734c72bb72f23d8f764 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 30 Dec 2022 11:37:18 +0100
Subject: [PATCH 3/7] Fix tests

---
 pandas/tests/frame/methods/test_align.py | 7 +++++--
 pandas/tests/indexing/test_iloc.py       | 6 ++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py
index 88963dcc4b0f7..b1941dce53bf4 100644
--- a/pandas/tests/frame/methods/test_align.py
+++ b/pandas/tests/frame/methods/test_align.py
@@ -40,12 +40,15 @@ def test_frame_align_aware(self):
         assert new1.index.tz is timezone.utc
         assert new2.index.tz is timezone.utc
 
-    def test_align_float(self, float_frame):
+    def test_align_float(self, float_frame, using_copy_on_write):
         af, bf = float_frame.align(float_frame)
         assert af._mgr is not float_frame._mgr
 
         af, bf = float_frame.align(float_frame, copy=False)
-        assert af._mgr is float_frame._mgr
+        if using_copy_on_write:
+            assert not (af._mgr is float_frame._mgr)
+        else:
+            assert af._mgr is float_frame._mgr
 
         # axis = 0
         other = float_frame.iloc[:-5, :3]
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index 0f85cb4515e13..91f9ad3244f20 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -76,7 +76,9 @@ class TestiLocBaseIndependent:
         ],
     )
     @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
-    def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manager):
+    def test_iloc_setitem_fullcol_categorical(
+        self, indexer, key, using_array_manager, using_copy_on_write
+    ):
         frame = DataFrame({0: range(3)}, dtype=object)
 
         cat = Categorical(["alpha", "beta", "gamma"])
@@ -90,7 +92,7 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage
         indexer(df)[key, 0] = cat
 
         expected = DataFrame({0: cat}).astype(object)
-        if not using_array_manager:
+        if not using_array_manager and not using_copy_on_write:
             assert np.shares_memory(df[0].values, orig_vals)
 
         tm.assert_frame_equal(df, expected)

From bd304fc3f33b061a02290a8f882501cca68bdabf Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 30 Dec 2022 12:09:35 +0100
Subject: [PATCH 4/7] Restrict to dataframes, not managers

---
 pandas/core/frame.py                        | 4 ++--
 pandas/core/generic.py                      | 8 +++++++-
 pandas/tests/copy_view/test_constructors.py | 5 ++---
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f1e119b81a6d5..3f8f02aa657ed 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -638,14 +638,14 @@ def __init__(
 
         if isinstance(data, DataFrame):
             data = data._mgr
+            if not copy and _using_copy_on_write():
+                data = data.copy(deep=False)
 
         if isinstance(data, (BlockManager, ArrayManager)):
             # first check if a Manager is passed without any other arguments
             # -> use fastpath (without checking Manager type)
             if index is None and columns is None and dtype is None and not copy:
                 # GH#33357 fastpath
-                if _using_copy_on_write():
-                    data = data.copy(deep=False)
                 NDFrame.__init__(self, data)
                 return
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index c893e9ce3d9a9..9a0bb55debe10 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -159,6 +159,7 @@
     SingleArrayManager,
 )
 from pandas.core.internals.construction import mgr_to_mgr
+from pandas.core.internals.managers import _using_copy_on_write
 from pandas.core.missing import (
     clean_fill_method,
     clean_reindex_fill_method,
@@ -5285,7 +5286,12 @@ def _reindex_with_indexers(
             # If we've made a copy once, no need to make another one
             copy = False
 
-        if (copy or copy is None) and new_data is self._mgr:
+        if (
+            (copy or copy is None)
+            and new_data is self._mgr
+            or not copy
+            and _using_copy_on_write()
+        ):
             new_data = new_data.copy(deep=copy)
 
         return self._constructor(new_data).__finalize__(self)
diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py
index e169bef916528..bc4c9d91aee18 100644
--- a/pandas/tests/copy_view/test_constructors.py
+++ b/pandas/tests/copy_view/test_constructors.py
@@ -7,12 +7,11 @@
 
 
 @pytest.mark.parametrize("columns", [None, ["a"]])
-@pytest.mark.parametrize("func", [lambda x: x, lambda x: x._mgr])
-def test_dataframe_constructor_mgr(using_copy_on_write, func, columns):
+def test_dataframe_constructor_mgr(using_copy_on_write, columns):
     df = DataFrame({"a": [1, 2, 3]})
     df_orig = df.copy()
 
-    new_df = DataFrame(func(df))
+    new_df = DataFrame(df)
 
     assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a"))
     new_df.iloc[0] = 100

From c51a2046ff09c3499ff0a9a6e9f2c995d8f109c1 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 30 Dec 2022 14:03:38 +0100
Subject: [PATCH 5/7] Fix for now

---
 pandas/tests/frame/methods/test_align.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py
index b1941dce53bf4..89da9017e43af 100644
--- a/pandas/tests/frame/methods/test_align.py
+++ b/pandas/tests/frame/methods/test_align.py
@@ -40,13 +40,13 @@ def test_frame_align_aware(self):
         assert new1.index.tz is timezone.utc
         assert new2.index.tz is timezone.utc
 
-    def test_align_float(self, float_frame, using_copy_on_write):
+    def test_align_float(self, float_frame, using_copy_on_write, using_array_manager):
         af, bf = float_frame.align(float_frame)
         assert af._mgr is not float_frame._mgr
 
         af, bf = float_frame.align(float_frame, copy=False)
-        if using_copy_on_write:
-            assert not (af._mgr is float_frame._mgr)
+        if using_copy_on_write or using_array_manager:
+            assert af._mgr is not float_frame._mgr
         else:
             assert af._mgr is float_frame._mgr
 

From f221e0c3ca0498348f472508c2b5fb2aef365262 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 7 Jan 2023 22:53:47 +0100
Subject: [PATCH 6/7] Merge remote-tracking branch 'upstream/main' into
 cow_mgr_constructor

# Conflicts:
#	pandas/core/generic.py
---
 .github/workflows/codeql.yml                  |   3 +
 .github/workflows/macos-windows.yml           |   2 +-
 .github/workflows/ubuntu.yml                  |  11 +-
 .github/workflows/wheels.yml                  |   3 +
 .pre-commit-config.yaml                       | 140 ++++-
 asv_bench/benchmarks/pandas_vb_common.py      |   2 +-
 asv_bench/benchmarks/rolling.py               |   5 +-
 asv_bench/benchmarks/series_methods.py        |  19 +
 ci/deps/actions-310.yaml                      |   2 +-
 ci/deps/actions-38-downstream_compat.yaml     |   2 +-
 ci/deps/actions-38.yaml                       |   2 +-
 ci/deps/actions-39.yaml                       |   2 +-
 ci/deps/circle-38-arm64.yaml                  |   2 +-
 ci/run_tests.sh                               |   7 +
 doc/scripts/eval_performance.py               |   3 +-
 .../development/contributing_codebase.rst     |   2 +-
 doc/source/reference/arrays.rst               |  31 ++
 doc/source/user_guide/io.rst                  |  17 +-
 doc/source/whatsnew/v1.4.0.rst                |   2 +-
 doc/source/whatsnew/v1.5.0.rst                |  46 ++
 doc/source/whatsnew/v2.0.0.rst                |  24 +-
 environment.yml                               |   3 +-
 pandas/__init__.py                            |   2 +-
 pandas/_libs/algos.pyx                        |  53 +-
 pandas/_libs/lib.pyi                          |   3 +
 pandas/_libs/lib.pyx                          |  31 +-
 pandas/_libs/ops.pyx                          |   2 +-
 pandas/_libs/tslibs/np_datetime.pxd           |   6 +
 pandas/_libs/tslibs/np_datetime.pyx           |  10 +-
 pandas/_libs/tslibs/offsets.pyx               |   9 +-
 pandas/_libs/tslibs/parsing.pyx               |  10 +-
 .../tslibs/src/datetime/np_datetime_strings.c | 147 ++++--
 .../tslibs/src/datetime/np_datetime_strings.h |  17 +-
 pandas/_libs/tslibs/strptime.pyx              |  68 +--
 pandas/_libs/tslibs/timestamps.pyx            |   7 +-
 pandas/_libs/tslibs/tzconversion.pyx          |  26 +-
 pandas/_testing/__init__.py                   |   2 +-
 pandas/_typing.py                             |   5 +
 pandas/compat/__init__.py                     |   8 +-
 .../{_compressors.py => compressors.py}       |   0
 pandas/core/arrays/arrow/array.py             |  40 +-
 pandas/core/arrays/base.py                    |   2 +-
 pandas/core/arrays/datetimelike.py            | 133 +++--
 pandas/core/arrays/datetimes.py               |  25 +-
 pandas/core/arrays/interval.py                | 135 +++--
 pandas/core/arrays/period.py                  |   4 +-
 pandas/core/arrays/string_.py                 |  24 +-
 pandas/core/arrays/string_arrow.py            |   6 +-
 pandas/core/base.py                           |  19 +-
 pandas/core/dtypes/common.py                  |   2 +
 pandas/core/groupby/generic.py                | 155 +++++-
 pandas/core/groupby/groupby.py                |  88 ++--
 pandas/core/groupby/grouper.py                |  35 +-
 pandas/core/groupby/ops.py                    |   6 +-
 pandas/core/indexes/base.py                   |   6 +-
 pandas/core/indexes/datetimelike.py           |  12 +-
 pandas/core/indexes/multi.py                  |   8 +-
 pandas/core/indexes/range.py                  |   7 +-
 pandas/core/interchange/dataframe.py          |   8 +-
 pandas/core/{ => methods}/describe.py         |   0
 pandas/core/resample.py                       |   6 +
 pandas/core/series.py                         |  13 +-
 pandas/core/shared_docs.py                    |   8 +-
 pandas/core/strings/base.py                   |   2 +-
 pandas/core/strings/object_array.py           |   2 +-
 pandas/core/tools/datetimes.py                |   2 +-
 pandas/core/window/doc.py                     |   8 +-
 pandas/core/window/ewm.py                     |   2 +-
 pandas/core/window/rolling.py                 |  33 +-
 pandas/io/_util.py                            |  23 +
 pandas/io/common.py                           |   8 +-
 pandas/io/formats/printing.py                 |   8 +-
 pandas/io/json/_json.py                       |   4 +-
 pandas/io/orc.py                              |  24 +-
 pandas/io/parquet.py                          |  19 +-
 pandas/io/xml.py                              |  28 +-
 pandas/plotting/_matplotlib/hist.py           |  17 +-
 pandas/tests/api/test_api.py                  |   2 +-
 pandas/tests/arithmetic/test_timedelta64.py   |  10 +-
 pandas/tests/arrays/boolean/test_function.py  |   2 +-
 pandas/tests/copy_view/test_methods.py        | 244 +++++++++
 pandas/tests/dtypes/test_common.py            |  18 +
 pandas/tests/extension/base/setitem.py        |   8 +
 pandas/tests/extension/test_arrow.py          |  45 +-
 pandas/tests/extension/test_string.py         |  17 +
 pandas/tests/frame/indexing/test_mask.py      |  11 +
 pandas/tests/frame/indexing/test_where.py     |   6 +-
 pandas/tests/frame/methods/test_asfreq.py     |   3 +-
 pandas/tests/frame/methods/test_asof.py       |   8 +-
 .../tests/frame/methods/test_combine_first.py |   6 +-
 pandas/tests/frame/methods/test_equals.py     |   3 +-
 pandas/tests/frame/methods/test_isetitem.py   |  37 ++
 pandas/tests/frame/methods/test_truncate.py   |   6 -
 pandas/tests/frame/test_query_eval.py         |   6 +-
 pandas/tests/frame/test_reductions.py         |   4 +
 pandas/tests/frame/test_stack_unstack.py      |   6 +
 .../tests/groupby/aggregate/test_aggregate.py |  15 +
 .../tests/groupby/test_frame_value_counts.py  |  38 ++
 pandas/tests/groupby/test_groupby.py          |  10 +
 pandas/tests/groupby/test_timegrouper.py      |   2 +
 pandas/tests/groupby/test_value_counts.py     |   9 +-
 .../tests/groupby/transform/test_transform.py |   2 -
 .../indexes/datetimes/test_constructors.py    |   9 +-
 .../indexing/multiindex/test_multiindex.py    |  78 +--
 pandas/tests/io/formats/style/test_style.py   |   4 +-
 .../json/test_json_table_schema_ext_dtype.py  |  10 +-
 pandas/tests/io/parser/test_parse_dates.py    |   2 +-
 pandas/tests/io/pytables/test_round_trip.py   |   9 +-
 pandas/tests/io/pytables/test_store.py        |   2 +-
 pandas/tests/io/test_common.py                |   4 +-
 pandas/tests/io/test_compression.py           |   2 +-
 pandas/tests/io/test_orc.py                   |  58 ++-
 pandas/tests/io/test_pickle.py                |   4 +-
 pandas/tests/io/xml/test_xml.py               |  82 ++-
 pandas/tests/libs/test_lib.py                 |  21 +
 pandas/tests/plotting/test_hist_method.py     |  30 ++
 .../tests/resample/test_resampler_grouper.py  |  22 +
 .../scalar/timestamp/test_constructors.py     |   5 +-
 .../tests/scalar/timestamp/test_timestamp.py  |   9 +
 pandas/tests/series/methods/test_replace.py   |   3 +-
 pandas/tests/series/methods/test_to_numpy.py  |  17 +
 .../tests/series/methods/test_tz_localize.py  |  10 +-
 pandas/tests/tools/test_to_datetime.py        |  54 ++
 .../tseries/offsets/test_business_hour.py     |  12 +
 pandas/tests/tslibs/test_parsing.py           |   3 +-
 pandas/tests/window/test_api.py               |  19 +-
 pandas/tests/window/test_dtypes.py            |   2 +-
 pandas/tests/window/test_ewm.py               |   8 +-
 pandas/tests/window/test_groupby.py           |  39 +-
 pandas/tests/window/test_numba.py             |  22 +-
 requirements-dev.txt                          |   3 +-
 ...check_for_inconsistent_pandas_namespace.py | 142 +++++
 scripts/sync_flake8_versions.py               |  13 +-
 .../test_inconsistent_namespace_check.py      |  61 +++
 scripts/tests/test_sync_flake8_versions.py    |   3 -
 .../tests/test_validate_unwanted_patterns.py  | 419 +++++++++++++++
 scripts/validate_unwanted_patterns.py         | 488 ++++++++++++++++++
 setup.cfg                                     |  16 -
 138 files changed, 3156 insertions(+), 705 deletions(-)
 rename pandas/compat/{_compressors.py => compressors.py} (100%)
 rename pandas/core/{ => methods}/describe.py (100%)
 create mode 100644 pandas/io/_util.py
 create mode 100644 pandas/tests/frame/methods/test_isetitem.py
 create mode 100644 pandas/tests/series/methods/test_to_numpy.py
 create mode 100644 scripts/check_for_inconsistent_pandas_namespace.py
 create mode 100644 scripts/tests/test_inconsistent_namespace_check.py
 create mode 100644 scripts/tests/test_validate_unwanted_patterns.py
 create mode 100755 scripts/validate_unwanted_patterns.py

diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 05a5d003c1dd1..23609f692df7c 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -8,6 +8,9 @@ concurrency:
   group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
   cancel-in-progress: true
 
+permissions:
+  contents: read
+
 jobs:
   analyze:
     runs-on: ubuntu-22.04
diff --git a/.github/workflows/macos-windows.yml b/.github/workflows/macos-windows.yml
index 5efc1aa67b4cd..d762e20db196a 100644
--- a/.github/workflows/macos-windows.yml
+++ b/.github/workflows/macos-windows.yml
@@ -16,7 +16,7 @@ env:
   PANDAS_CI: 1
   PYTEST_TARGET: pandas
   PATTERN: "not slow and not db and not network and not single_cpu"
-  TEST_ARGS: "-W error:::pandas"
+  ERROR_ON_WARNINGS: "1"
 
 
 permissions:
diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml
index 7dbf74278d433..9c93725ea15ec 100644
--- a/.github/workflows/ubuntu.yml
+++ b/.github/workflows/ubuntu.yml
@@ -38,7 +38,7 @@ jobs:
           - name: "Minimum Versions"
             env_file: actions-38-minimum_versions.yaml
             pattern: "not slow and not network and not single_cpu"
-            test_args: ""
+            error_on_warnings: "0"
           - name: "Locale: it_IT"
             env_file: actions-38.yaml
             pattern: "not slow and not network and not single_cpu"
@@ -63,20 +63,22 @@ jobs:
             env_file: actions-310.yaml
             pattern: "not slow and not network and not single_cpu"
             pandas_copy_on_write: "1"
-            test_args: ""
+            error_on_warnings: "0"
           - name: "Data Manager"
             env_file: actions-38.yaml
             pattern: "not slow and not network and not single_cpu"
             pandas_data_manager: "array"
-            test_args: ""
+            error_on_warnings: "0"
           - name: "Pypy"
             env_file: actions-pypy-38.yaml
             pattern: "not slow and not network and not single_cpu"
             test_args: "--max-worker-restart 0"
+            error_on_warnings: "0"
           - name: "Numpy Dev"
             env_file: actions-310-numpydev.yaml
             pattern: "not slow and not network and not single_cpu"
             test_args: "-W error::DeprecationWarning:numpy -W error::FutureWarning:numpy"
+            error_on_warnings: "0"
         exclude:
           - env_file: actions-38.yaml
             pyarrow_version: "7"
@@ -96,11 +98,12 @@ jobs:
       ENV_FILE: ci/deps/${{ matrix.env_file }}
       PATTERN: ${{ matrix.pattern }}
       EXTRA_APT: ${{ matrix.extra_apt || '' }}
+      ERROR_ON_WARNINGS: ${{ matrix.error_on_warnings || '1' }}
       LANG: ${{ matrix.lang || '' }}
       LC_ALL: ${{ matrix.lc_all || '' }}
       PANDAS_DATA_MANAGER: ${{ matrix.pandas_data_manager || 'block' }}
       PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }}
-      TEST_ARGS: ${{ matrix.test_args || '-W error:::pandas' }}
+      TEST_ARGS: ${{ matrix.test_args || '' }}
       PYTEST_WORKERS: ${{ contains(matrix.pattern, 'not single_cpu') && 'auto' || '1' }}
       PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
       IS_PYPY: ${{ contains(matrix.env_file, 'pypy') }}
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index 49d29c91f86cd..0e347b166e425 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -30,6 +30,9 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
   cancel-in-progress: true
 
+permissions:
+  contents: read
+
 jobs:
   build_wheels:
     name: Build wheel for ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 6b531215813d3..82043f79643e4 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -34,7 +34,7 @@ repos:
         types_or: [python, rst, markdown]
         additional_dependencies: [tomli]
 -   repo: https://github.com/MarcoGorelli/cython-lint
-    rev: v0.9.1
+    rev: v0.10.1
     hooks:
     -   id: cython-lint
     -   id: double-quote-cython-strings
@@ -63,25 +63,23 @@ repos:
             '--extensions=c,h',
             '--headers=h',
             --recursive,
-            '--filter=-readability/casting,-runtime/int,-build/include_subdir'
+            --linelength=88,
+            '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size'
         ]
 -   repo: https://github.com/PyCQA/flake8
     rev: 6.0.0
     hooks:
     -   id: flake8
-        # Need to patch os.remove rule in pandas-dev-flaker
-        exclude: ^ci/fix_wheels.py
         additional_dependencies: &flake8_dependencies
         - flake8==6.0.0
         - flake8-bugbear==22.7.1
-        - pandas-dev-flaker==0.5.0
 -   repo: https://github.com/pycqa/pylint
-    rev: v2.15.6
+    rev: v2.15.9
     hooks:
     -   id: pylint
         stages: [manual]
 -   repo: https://github.com/pycqa/pylint
-    rev: v2.15.6
+    rev: v2.15.9
     hooks:
     -   id: pylint
         alias: redefined-outer-name
@@ -94,15 +92,14 @@ repos:
             |^pandas/util/_test_decorators\.py  # keep excluded
             |^pandas/_version\.py  # keep excluded
             |^pandas/conftest\.py  # keep excluded
-            |^pandas/core/generic\.py
         args: [--disable=all, --enable=redefined-outer-name]
         stages: [manual]
 -   repo: https://github.com/PyCQA/isort
-    rev: 5.10.1
+    rev: 5.11.4
     hooks:
     -   id: isort
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v3.2.2
+    rev: v3.3.1
     hooks:
     -   id: pyupgrade
         args: [--py38-plus]
@@ -183,6 +180,21 @@ repos:
         types: [rst]
         args: [--filename=*.rst]
         additional_dependencies: [flake8-rst==0.7.0, flake8==3.7.9]
+    -   id: inconsistent-namespace-usage
+        name: 'Check for inconsistent use of pandas namespace'
+        entry: python scripts/check_for_inconsistent_pandas_namespace.py
+        exclude: ^pandas/core/interchange/
+        language: python
+        types: [python]
+    -   id: no-os-remove
+        name: Check code for instances of os.remove
+        entry: os\.remove
+        language: pygrep
+        types: [python]
+        files: ^pandas/tests/
+        exclude: |
+            (?x)^
+            pandas/tests/io/pytables/test_store\.py$
     -   id: unwanted-patterns
         name: Unwanted patterns
         language: pygrep
@@ -192,6 +204,20 @@ repos:
             \#\ type:\ (?!ignore)
             |\#\ type:\s?ignore(?!\[)
 
+            # foo._class__ instead of type(foo)
+            |\.__class__
+
+            # np.bool/np.object instead of np.bool_/np.object_
+            |np\.bool[^_8`]
+            |np\.object[^_8`]
+
+            # imports from collections.abc instead of `from collections import abc`
+            |from\ collections\.abc\ import
+
+            # Numpy
+            |from\ numpy\ import\ random
+            |from\ numpy\.random\ import
+
             # Incorrect code-block / IPython directives
             |\.\.\ code-block\ ::
             |\.\.\ ipython\ ::
@@ -200,7 +226,17 @@ repos:
 
             # Check for deprecated messages without sphinx directive
             |(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)
+
+            # {foo!r} instead of {repr(foo)}
+            |!r}
+
+            # builtin filter function
+            |(?<!def)[\(\s]filter\(
+
+            # exec
+            |[^a-zA-Z0-9_]exec\(
         types_or: [python, cython, rst]
+        exclude: ^doc/source/development/code_style\.rst  # contains examples of patterns to avoid
     -   id: cython-casting
         name: Check Cython casting is `<type>obj`, not `<type> obj`
         language: pygrep
@@ -231,6 +267,58 @@ repos:
         files: ^pandas/tests/extension/base
         types: [python]
         exclude: ^pandas/tests/extension/base/base\.py
+    -   id: unwanted-patterns-in-tests
+        name: Unwanted patterns in tests
+        language: pygrep
+        entry: |
+            (?x)
+            # pytest.xfail instead of pytest.mark.xfail
+            pytest\.xfail
+
+            # imports from pandas._testing instead of `import pandas._testing as tm`
+            |from\ pandas\._testing\ import
+            |from\ pandas\ import\ _testing\ as\ tm
+
+            # No direct imports from conftest
+            |conftest\ import
+            |import\ conftest
+
+            # pandas.testing instead of tm
+            |pd\.testing\.
+
+            # pd.api.types instead of from pandas.api.types import ...
+            |(pd|pandas)\.api\.types\.
+
+            # np.testing, np.array_equal
+            |(numpy|np)(\.testing|\.array_equal)
+
+            # unittest.mock (use pytest builtin monkeypatch fixture instead)
+            |(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)
+
+            # pytest raises without context
+            |\s\ pytest.raises
+
+            # pytest.warns (use tm.assert_produces_warning instead)
+            |pytest\.warns
+        files: ^pandas/tests/
+        types_or: [python, cython, rst]
+    -   id: unwanted-patterns-in-ea-tests
+        name: Unwanted patterns in EA tests
+        language: pygrep
+        entry: |
+            (?x)
+            tm.assert_(series|frame)_equal
+        files: ^pandas/tests/extension/base/
+        exclude: ^pandas/tests/extension/base/base\.py$
+        types_or: [python, cython, rst]
+    -   id: unwanted-patterns-in-cython
+        name: Unwanted patterns in Cython code
+        language: pygrep
+        entry: |
+            (?x)
+            # `<type>obj` as opposed to `<type> obj`
+            [a-zA-Z0-9*]>[ ]
+        types: [cython]
     -   id: pip-to-conda
         name: Generate pip dependency from conda
         language: python
@@ -251,6 +339,38 @@ repos:
         language: python
         types: [rst]
         files: ^doc/source/(development|reference)/
+    -   id: unwanted-patterns-bare-pytest-raises
+        name: Check for use of bare pytest raises
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="bare_pytest_raises"
+        types: [python]
+        files: ^pandas/tests/
+        exclude: ^pandas/tests/extension/
+    -   id: unwanted-patterns-private-function-across-module
+        name: Check for use of private functions across modules
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
+        types: [python]
+        exclude: ^(asv_bench|pandas/tests|doc)/
+    -   id: unwanted-patterns-private-import-across-module
+        name: Check for import of private attributes across modules
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module"
+        types: [python]
+        exclude: |
+            (?x)
+            ^(asv_bench|pandas/tests|doc)/
+            |scripts/validate_min_versions_in_sync\.py$
+    -   id: unwanted-patterns-strings-to-concatenate
+        name: Check for use of not concatenated strings
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate"
+        types_or: [python, cython]
+    -   id: unwanted-patterns-strings-with-misplaced-whitespace
+        name: Check for strings with misplaced spaces
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace"
+        types_or: [python, cython]
     -   id: use-pd_array-in-core
         name: Import pandas.array as pd_array in core
         language: python
diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py
index d3168bde0a783..97d91111e833a 100644
--- a/asv_bench/benchmarks/pandas_vb_common.py
+++ b/asv_bench/benchmarks/pandas_vb_common.py
@@ -70,7 +70,7 @@ class BaseIO:
     def remove(self, f):
         """Remove created files"""
         try:
-            os.remove(f)  # noqa: PDF008
+            os.remove(f)
         except OSError:
             # On Windows, attempting to remove a file that is in use
             # causes an exception to be raised
diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
index d65a1a39e8bc7..7e94763f3f293 100644
--- a/asv_bench/benchmarks/rolling.py
+++ b/asv_bench/benchmarks/rolling.py
@@ -292,7 +292,7 @@ class Groupby:
         ["sum", "median", "mean", "max", "min", "kurt", "sum"],
         [
             ("rolling", {"window": 2}),
-            ("rolling", {"window": "30s", "on": "C"}),
+            ("rolling", {"window": "30s"}),
             ("expanding", {}),
         ],
     )
@@ -304,9 +304,10 @@ def setup(self, method, window_kwargs):
             {
                 "A": [str(i) for i in range(N)] * 10,
                 "B": list(range(N)) * 10,
-                "C": pd.date_range(start="1900-01-01", freq="1min", periods=N * 10),
             }
         )
+        if isinstance(kwargs.get("window", None), str):
+            df.index = pd.date_range(start="1900-01-01", freq="1min", periods=N * 10)
         self.groupby_window = getattr(df.groupby("A"), window)(**kwargs)
 
     def time_method(self, method, window_kwargs):
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
index dc86352082cca..a0dd52e9f17e4 100644
--- a/asv_bench/benchmarks/series_methods.py
+++ b/asv_bench/benchmarks/series_methods.py
@@ -382,4 +382,23 @@ def time_iter(self, dtype):
             pass
 
 
+class ToNumpy:
+    def setup(self):
+        N = 1_000_000
+        self.ser = Series(
+            np.random.randn(
+                N,
+            )
+        )
+
+    def time_to_numpy(self):
+        self.ser.to_numpy()
+
+    def time_to_numpy_double_copy(self):
+        self.ser.to_numpy(dtype="float64", copy=True)
+
+    def time_to_numpy_copy(self):
+        self.ser.to_numpy(copy=True)
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml
index d787571d9d112..79457cd503876 100644
--- a/ci/deps/actions-310.yaml
+++ b/ci/deps/actions-310.yaml
@@ -48,7 +48,7 @@ dependencies:
   - pyxlsb
   - s3fs>=2021.08.0
   - scipy
-  - sqlalchemy
+  - sqlalchemy<1.4.46
   - tabulate
   - tzdata>=2022a
   - xarray
diff --git a/ci/deps/actions-38-downstream_compat.yaml b/ci/deps/actions-38-downstream_compat.yaml
index 95ec98d72ebcc..6955baa282274 100644
--- a/ci/deps/actions-38-downstream_compat.yaml
+++ b/ci/deps/actions-38-downstream_compat.yaml
@@ -48,7 +48,7 @@ dependencies:
   - pyxlsb
   - s3fs>=2021.08.0
   - scipy
-  - sqlalchemy
+  - sqlalchemy<1.4.46
   - tabulate
   - xarray
   - xlrd
diff --git a/ci/deps/actions-38.yaml b/ci/deps/actions-38.yaml
index f7de8bbee7d8a..004ef93606457 100644
--- a/ci/deps/actions-38.yaml
+++ b/ci/deps/actions-38.yaml
@@ -48,7 +48,7 @@ dependencies:
   - pyxlsb
   - s3fs>=2021.08.0
   - scipy
-  - sqlalchemy
+  - sqlalchemy<1.4.46
   - tabulate
   - xarray
   - xlrd
diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml
index 821ec9c5d4234..ec7ffebde964f 100644
--- a/ci/deps/actions-39.yaml
+++ b/ci/deps/actions-39.yaml
@@ -48,7 +48,7 @@ dependencies:
   - pyxlsb
   - s3fs>=2021.08.0
   - scipy
-  - sqlalchemy
+  - sqlalchemy<1.4.46
   - tabulate
   - tzdata>=2022a
   - xarray
diff --git a/ci/deps/circle-38-arm64.yaml b/ci/deps/circle-38-arm64.yaml
index c94ce79ea2ff8..b4171710564bf 100644
--- a/ci/deps/circle-38-arm64.yaml
+++ b/ci/deps/circle-38-arm64.yaml
@@ -49,7 +49,7 @@ dependencies:
   - pyxlsb
   - s3fs>=2021.08.0
   - scipy
-  - sqlalchemy
+  - sqlalchemy<1.4.46
   - tabulate
   - xarray
   - xlrd
diff --git a/ci/run_tests.sh b/ci/run_tests.sh
index e6de5caf955fc..a48d6c1ad6580 100755
--- a/ci/run_tests.sh
+++ b/ci/run_tests.sh
@@ -30,6 +30,13 @@ if [[ "$PATTERN" ]]; then
   PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\""
 fi
 
+if [[ "$ERROR_ON_WARNINGS" == "1" ]]; then
+  for pth in $(find pandas -name '*.py' -not -path "pandas/tests/*" | sed -e 's/\.py//g' -e 's/\/__init__//g' -e 's/\//./g');
+    do
+        PYTEST_CMD="$PYTEST_CMD -W error:::$pth"
+    done
+fi
+
 echo $PYTEST_CMD
 sh -c "$PYTEST_CMD"
 
diff --git a/doc/scripts/eval_performance.py b/doc/scripts/eval_performance.py
index 85d9ce4ad01e9..f6087e02a9330 100644
--- a/doc/scripts/eval_performance.py
+++ b/doc/scripts/eval_performance.py
@@ -6,8 +6,7 @@
 from pandas import DataFrame
 
 setup_common = """from pandas import DataFrame
-from numpy.random import randn
-df = DataFrame(randn(%d, 3), columns=list('abc'))
+df = DataFrame(np.random.randn(%d, 3), columns=list('abc'))
 %s"""
 
 setup_with = "s = 'a + b * (c ** 2 + b ** 2 - a) / (a * c) ** 3'"
diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst
index b05f026bbbb44..449b6de36cd24 100644
--- a/doc/source/development/contributing_codebase.rst
+++ b/doc/source/development/contributing_codebase.rst
@@ -43,7 +43,7 @@ Pre-commit
 ----------
 
 Additionally, :ref:`Continuous Integration <contributing.ci>` will run code formatting checks
-like ``black``, ``flake8`` (including a `pandas-dev-flaker <https://github.com/pandas-dev/pandas-dev-flaker>`_ plugin),
+like ``black``, ``flake8``,
 ``isort``, and ``cpplint`` and more using `pre-commit hooks <https://pre-commit.com/>`_
 Any warnings from these checks will cause the :ref:`Continuous Integration <contributing.ci>` to fail; therefore,
 it is helpful to run the check yourself before submitting code. This
diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst
index 5b41de4e12e6f..aeaca7caea25d 100644
--- a/doc/source/reference/arrays.rst
+++ b/doc/source/reference/arrays.rst
@@ -60,6 +60,37 @@ is an :class:`ArrowDtype`.
 `Pyarrow <https://arrow.apache.org/docs/python/index.html>`__ provides similar array and `data type <https://arrow.apache.org/docs/python/api/datatypes.html>`__
 support as NumPy including first-class nullability support for all data types, immutability and more.
 
+The table below shows the equivalent pyarrow-backed (``pa``), pandas extension, and numpy (``np``) types that are recognized by pandas.
+Pyarrow-backed types below need to be passed into :class:`ArrowDtype` to be recognized by pandas e.g. ``pd.ArrowDtype(pa.bool_())``
+
+=============================================== ========================== ===================
+PyArrow type                                    pandas extension type      NumPy type
+=============================================== ========================== ===================
+:external+pyarrow:py:func:`pyarrow.bool_`       :class:`BooleanDtype`      ``np.bool_``
+:external+pyarrow:py:func:`pyarrow.int8`        :class:`Int8Dtype`         ``np.int8``
+:external+pyarrow:py:func:`pyarrow.int16`       :class:`Int16Dtype`        ``np.int16``
+:external+pyarrow:py:func:`pyarrow.int32`       :class:`Int32Dtype`        ``np.int32``
+:external+pyarrow:py:func:`pyarrow.int64`       :class:`Int64Dtype`        ``np.int64``
+:external+pyarrow:py:func:`pyarrow.uint8`       :class:`UInt8Dtype`        ``np.uint8``
+:external+pyarrow:py:func:`pyarrow.uint16`      :class:`UInt16Dtype`       ``np.uint16``
+:external+pyarrow:py:func:`pyarrow.uint32`      :class:`UInt32Dtype`       ``np.uint32``
+:external+pyarrow:py:func:`pyarrow.uint64`      :class:`UInt64Dtype`       ``np.uint64``
+:external+pyarrow:py:func:`pyarrow.float32`     :class:`Float32Dtype`      ``np.float32``
+:external+pyarrow:py:func:`pyarrow.float64`     :class:`Float64Dtype`      ``np.float64``
+:external+pyarrow:py:func:`pyarrow.time32`      (none)                     (none)
+:external+pyarrow:py:func:`pyarrow.time64`      (none)                     (none)
+:external+pyarrow:py:func:`pyarrow.timestamp`   :class:`DatetimeTZDtype`   ``np.datetime64``
+:external+pyarrow:py:func:`pyarrow.date32`      (none)                     (none)
+:external+pyarrow:py:func:`pyarrow.date64`      (none)                     (none)
+:external+pyarrow:py:func:`pyarrow.duration`    (none)                     ``np.timedelta64``
+:external+pyarrow:py:func:`pyarrow.binary`      (none)                     (none)
+:external+pyarrow:py:func:`pyarrow.string`      :class:`StringDtype`       ``np.str_``
+:external+pyarrow:py:func:`pyarrow.decimal128`  (none)                     (none)
+:external+pyarrow:py:func:`pyarrow.list_`       (none)                     (none)
+:external+pyarrow:py:func:`pyarrow.map_`        (none)                     (none)
+:external+pyarrow:py:func:`pyarrow.dictionary`  :class:`CategoricalDtype`  (none)
+=============================================== ========================== ===================
+
 .. note::
 
     For string types (``pyarrow.string()``, ``string[pyarrow]``), PyArrow support is still facilitated
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 677be7bf29479..dc21b9f35d272 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -1255,6 +1255,21 @@ The bad line will be a list of strings that was split by the ``sep``:
 
     .. versionadded:: 1.4.0
 
+Note that the callable function will handle only a line with too many fields.
+Bad lines caused by other errors will be silently skipped.
+
+For example:
+
+.. code-block:: ipython
+
+   def bad_lines_func(line):
+      print(line)
+
+   data = 'name,type\nname a,a is of type a\nname b,"b\" is of type b"'
+   data
+   pd.read_csv(data, on_bad_lines=bad_lines_func, engine="python")
+
+The line was not processed in this case, as a "bad line" here is caused by an escape character.
 
 You can also use the ``usecols`` parameter to eliminate extraneous column
 data that appear in some lines but not others:
@@ -3833,7 +3848,7 @@ OpenDocument Spreadsheets
 The io methods for `Excel files`_ also support reading and writing OpenDocument spreadsheets
 using the `odfpy <https://pypi.org/project/odfpy/>`__ module. The semantics and features for reading and writing
 OpenDocument spreadsheets match what can be done for `Excel files`_ using
-``engine='odf'``.
+``engine='odf'``. The optional dependency 'odfpy' needs to be installed.
 
 The :func:`~pandas.read_excel` method can read OpenDocument spreadsheets
 
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 5895a06792ffb..9dbe450261e54 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -320,7 +320,7 @@ Null-values are no longer coerced to NaN-value in value_counts and mode
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 :meth:`Series.value_counts` and :meth:`Series.mode` no longer coerce ``None``,
-``NaT`` and other null-values to a NaN-value for ``np.object``-dtype. This
+``NaT`` and other null-values to a NaN-value for ``np.object_``-dtype. This
 behavior is now consistent with ``unique``, ``isin`` and others
 (:issue:`42688`).
 
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index a1c374db91f8b..b61547d1523cf 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -290,6 +290,52 @@ and attributes without holding entire tree in memory (:issue:`45442`).
 .. _`lxml's iterparse`: https://lxml.de/3.2/parsing.html#iterparse-and-iterwalk
 .. _`etree's iterparse`: https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse
 
+.. _whatsnew_150.enhancements.copy_on_write:
+
+Copy on Write
+^^^^^^^^^^^^^
+
+A new feature ``copy_on_write`` was added (:issue:`46958`). Copy on write ensures that
+any DataFrame or Series derived from another in any way always behaves as a copy.
+Copy on write disallows updating any other object than the object the method
+was applied to.
+
+Copy on write can be enabled through:
+
+.. code-block:: python
+
+    pd.set_option("mode.copy_on_write", True)
+    pd.options.mode.copy_on_write = True
+
+Alternatively, copy on write can be enabled locally through:
+
+.. code-block:: python
+
+    with pd.option_context("mode.copy_on_write", True):
+        ...
+
+Without copy on write, the parent :class:`DataFrame` is updated when updating a child
+:class:`DataFrame` that was derived from this :class:`DataFrame`.
+
+.. ipython:: python
+
+    df = pd.DataFrame({"foo": [1, 2, 3], "bar": 1})
+    view = df["foo"]
+    view.iloc[0]
+    df
+
+With copy on write enabled, df won't be updated anymore:
+
+.. ipython:: python
+
+    with pd.option_context("mode.copy_on_write", True):
+        df = pd.DataFrame({"foo": [1, 2, 3], "bar": 1})
+        view = df["foo"]
+        view.iloc[0]
+        df
+
+A more detailed explanation can be found `here <https://phofl.github.io/cow-introduction.html>`_.
+
 .. _whatsnew_150.enhancements.other:
 
 Other enhancements
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 7a87ea0b3872b..ddcf516076a49 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -39,9 +39,11 @@ The ``use_nullable_dtypes`` keyword argument has been expanded to the following
 * :func:`read_fwf`
 * :func:`read_excel`
 * :func:`read_html`
+* :func:`read_xml`
 * :func:`read_sql`
 * :func:`read_sql_query`
 * :func:`read_sql_table`
+* :func:`read_orc`
 
 Additionally a new global configuration, ``mode.dtype_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
 to select the nullable dtypes implementation.
@@ -49,6 +51,7 @@ to select the nullable dtypes implementation.
 * :func:`read_csv` (with ``engine="pyarrow"`` or ``engine="python"``)
 * :func:`read_excel`
 * :func:`read_html`
+* :func:`read_xml`
 * :func:`read_parquet`
 * :func:`read_orc`
 
@@ -82,7 +85,7 @@ be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 - :func:`read_sas` now supports using ``encoding='infer'`` to correctly read and use the encoding specified by the sas file. (:issue:`48048`)
-- :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` now preserve nullable dtypes instead of casting to numpy dtypes (:issue:`37493`)
+- :meth:`.DataFrameGroupBy.quantile`, :meth:`.SeriesGroupBy.quantile` and :meth:`.DataFrameGroupBy.std` now preserve nullable dtypes instead of casting to numpy dtypes (:issue:`37493`)
 - :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support an ``axis`` argument. If ``axis`` is set, the default behaviour of which axis to consider can be overwritten (:issue:`47819`)
 - :func:`assert_frame_equal` now shows the first element where the DataFrames differ, analogously to ``pytest``'s output (:issue:`47910`)
 - Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`)
@@ -103,6 +106,7 @@ Other enhancements
 - :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`)
 - Improved error message in :func:`to_datetime` for non-ISO8601 formats, informing users about the position of the first error (:issue:`50361`)
 - Improved error message when trying to align :class:`DataFrame` objects (for example, in :func:`DataFrame.compare`) to clarify that "identically labelled" refers to both index and columns (:issue:`50083`)
+- Performance improvement in :func:`to_datetime` when format is given or can be inferred (:issue:`50465`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -461,7 +465,7 @@ to each element individually, e.g. ::
 
 Other API changes
 ^^^^^^^^^^^^^^^^^
-- The ``freq``, ``tz``, ``nanosecond``, and ``unit`` keywords in the :class:`Timestamp` constructor are now keyword-only (:issue:`45307`)
+- The ``freq``, ``tz``, ``nanosecond``, and ``unit`` keywords in the :class:`Timestamp` constructor are now keyword-only (:issue:`45307`, :issue:`32526`)
 - Passing ``nanoseconds`` greater than 999 or less than 0 in :class:`Timestamp` now raises a ``ValueError`` (:issue:`48538`, :issue:`48255`)
 - :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser.
 - Default value of ``dtype`` in :func:`get_dummies` is changed to ``bool`` from ``uint8`` (:issue:`45848`)
@@ -725,6 +729,7 @@ Removal of prior version deprecations/changes
 - Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`)
 - Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`)
 - Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`)
+- Enforced deprecation of silently dropping nuisance columns in :class:`Rolling`, :class:`Expanding`, and :class:`ExponentialMovingWindow` ops. This will now raise a :class:`.errors.DataError` (:issue:`42834`)
 - Changed behavior in setting values with ``df.loc[:, foo] = bar`` or ``df.iloc[:, foo] = bar``, these now always attempt to set values inplace before falling back to casting (:issue:`45333`)
 - Changed default of ``numeric_only`` in various :class:`.DataFrameGroupBy` methods; all methods now default to ``numeric_only=False`` (:issue:`46072`)
 - Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`)
@@ -754,6 +759,7 @@ Performance improvements
 - Performance improvement in :meth:`MultiIndex.putmask` (:issue:`49830`)
 - Performance improvement in :meth:`Index.union` and :meth:`MultiIndex.union` when index contains duplicates (:issue:`48900`)
 - Performance improvement in :meth:`Series.rank` for pyarrow-backed dtypes (:issue:`50264`)
+- Performance improvement in :meth:`Series.searchsorted` for pyarrow-backed dtypes (:issue:`50447`)
 - Performance improvement in :meth:`Series.fillna` for extension array dtypes (:issue:`49722`, :issue:`50078`)
 - Performance improvement in :meth:`Index.join`, :meth:`Index.intersection` and :meth:`Index.union` for masked dtypes when :class:`Index` is monotonic (:issue:`50310`)
 - Performance improvement for :meth:`Series.value_counts` with nullable dtype (:issue:`48338`)
@@ -768,6 +774,7 @@ Performance improvements
 - Performance improvement for :class:`~arrays.StringArray` constructor passing a numpy array with type ``np.str_`` (:issue:`49109`)
 - Performance improvement in :meth:`~arrays.ArrowExtensionArray.factorize` (:issue:`49177`)
 - Performance improvement in :meth:`~arrays.ArrowExtensionArray.__setitem__` when key is a null slice (:issue:`50248`)
+- Performance improvement in :class:`~arrays.ArrowExtensionArray` comparison methods when array contains NA (:issue:`50524`)
 - Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`)
 - Performance improvement in :meth:`DataFrame.join` when joining on a subset of a :class:`MultiIndex` (:issue:`48611`)
 - Performance improvement for :meth:`MultiIndex.intersection` (:issue:`48604`)
@@ -775,6 +782,7 @@ Performance improvements
 - Performance improvement when iterating over pyarrow and nullable dtypes (:issue:`49825`, :issue:`49851`)
 - Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47405`, :issue:`47656`, :issue:`48502`)
 - Memory improvement in :meth:`RangeIndex.sort_values` (:issue:`48801`)
+- Performance improvement in :meth:`Series.to_numpy` if ``copy=True`` by avoiding copying twice (:issue:`24345`)
 - Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``sort=False`` (:issue:`48976`)
 - Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``observed=False`` (:issue:`49596`)
 - Performance improvement in :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default). Now the index will be a :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49745`)
@@ -799,6 +807,7 @@ Categorical
 Datetimelike
 ^^^^^^^^^^^^
 - Bug in :func:`pandas.infer_freq`, raising ``TypeError`` when inferred on :class:`RangeIndex` (:issue:`47084`)
+- Bug in :func:`to_datetime` incorrectly raising ``OverflowError`` with string arguments corresponding to large integers (:issue:`50533`)
 - Bug in :func:`to_datetime` was raising on invalid offsets with ``errors='coerce'`` and ``infer_datetime_format=True`` (:issue:`48633`)
 - Bug in :class:`DatetimeIndex` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``dtype`` or data (:issue:`48659`)
 - Bug in subtracting a ``datetime`` scalar from :class:`DatetimeIndex` failing to retain the original ``freq`` attribute (:issue:`48818`)
@@ -818,7 +827,7 @@ Datetimelike
 - Bug in :func:`to_datetime` was throwing ``ValueError`` when parsing dates with ISO8601 format where some values were not zero-padded (:issue:`21422`)
 - Bug in :func:`to_datetime` was giving incorrect results when using ``format='%Y%m%d'`` and ``errors='ignore'`` (:issue:`26493`)
 - Bug in :func:`to_datetime` was failing to parse date strings ``'today'`` and ``'now'`` if ``format`` was not ISO8601 (:issue:`50359`)
--
+- Bug in :func:`Timestamp.utctimetuple` raising a ``TypeError`` (:issue:`32174`)
 
 Timedelta
 ^^^^^^^^^
@@ -849,6 +858,7 @@ Conversion
 - Bug where any :class:`ExtensionDtype` subclass with ``kind="M"`` would be interpreted as a timezone type (:issue:`34986`)
 - Bug in :class:`.arrays.ArrowExtensionArray` that would raise ``NotImplementedError`` when passed a sequence of strings or binary (:issue:`49172`)
 - Bug in :meth:`Series.astype` raising ``pyarrow.ArrowInvalid`` when converting from a non-pyarrow string dtype to a pyarrow numeric type (:issue:`50430`)
+- Bug in :meth:`Series.to_numpy` converting to NumPy array before applying ``na_value`` (:issue:`48951`)
 - Bug in :func:`to_datetime` was not respecting ``exact`` argument when ``format`` was an ISO8601 format (:issue:`12649`)
 - Bug in :meth:`TimedeltaArray.astype` raising ``TypeError`` when converting to a pyarrow duration type (:issue:`49795`)
 -
@@ -871,11 +881,14 @@ Indexing
 - Bug in :meth:`DataFrame.loc` coercing dtypes when setting values with a list indexer (:issue:`49159`)
 - Bug in :meth:`Series.loc` raising error for out of bounds end of slice indexer (:issue:`50161`)
 - Bug in :meth:`DataFrame.loc` raising ``ValueError`` with ``bool`` indexer and :class:`MultiIndex` (:issue:`47687`)
+- Bug in :meth:`DataFrame.loc` raising ``IndexError`` when setting values for a pyarrow-backed column with a non-scalar indexer (:issue:`50085`)
 - Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when right hand side is :class:`DataFrame` with :class:`MultiIndex` columns (:issue:`49121`)
 - Bug in :meth:`DataFrame.reindex` casting dtype to ``object`` when :class:`DataFrame` has single extension array column when re-indexing ``columns`` and ``index`` (:issue:`48190`)
 - Bug in :meth:`DataFrame.iloc` raising ``IndexError`` when indexer is a :class:`Series` with numeric extension array dtype (:issue:`49521`)
 - Bug in :func:`~DataFrame.describe` when formatting percentiles in the resulting index showed more decimals than needed (:issue:`46362`)
 - Bug in :meth:`DataFrame.compare` does not recognize differences when comparing ``NA`` with value in nullable dtypes (:issue:`48939`)
+- Bug in :meth:`DataFrame.isetitem` coercing extension array dtypes in :class:`DataFrame` to object (:issue:`49922`)
+- Bug in :class:`BusinessHour` would cause creation of :class:`DatetimeIndex` to fail when no opening hour was included in the index (:issue:`49835`)
 -
 
 Copy on write
@@ -929,6 +942,7 @@ Period
 
 Plotting
 ^^^^^^^^
+- Bug in :meth:`DataFrame.plot.hist`, not dropping elements of ``weights`` corresponding to ``NaN`` values in ``data`` (:issue:`48884`)
 - ``ax.set_xlim`` was sometimes raising ``UserWarning`` which users couldn't address due to ``set_xlim`` not accepting parsing arguments - the converter now uses :func:`Timestamp` instead (:issue:`49148`)
 -
 
@@ -948,6 +962,9 @@ Groupby/resample/rolling
 - Bug in :meth:`.SeriesGroupBy.nunique` would incorrectly raise when the grouper was an empty categorical and ``observed=True`` (:issue:`21334`)
 - Bug in :meth:`.SeriesGroupBy.nth` would raise when grouper contained NA values after subsetting from a :class:`DataFrameGroupBy` (:issue:`26454`)
 - Bug in :meth:`DataFrame.groupby` would not include a :class:`.Grouper` specified by ``key`` in the result when ``as_index=False`` (:issue:`50413`)
+- Bug in :meth:`.DataFrameGrouBy.value_counts` would raise when used with a :class:`.TimeGrouper` (:issue:`50486`)
+- Bug in :meth:`Resampler.size` caused a wide :class:`DataFrame` to be returned instead of a :class:`Series` with :class:`MultiIndex` (:issue:`46826`)
+- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"idxmin"`` and ``"idxmax"`` arguments (:issue:`45986`)
 -
 
 Reshaping
@@ -976,6 +993,7 @@ ExtensionArray
 - Bug in :meth:`Series.round` for pyarrow-backed dtypes raising ``AttributeError`` (:issue:`50437`)
 - Bug when concatenating an empty DataFrame with an ExtensionDtype to another DataFrame with the same ExtensionDtype, the resulting dtype turned into object (:issue:`48510`)
 - Bug in :meth:`array.PandasArray.to_numpy` raising with ``NA`` value when ``na_value`` is specified (:issue:`40638`)
+- Bug in :meth:`api.types.is_numeric_dtype` where a custom :class:`ExtensionDtype` would not return ``True`` if ``_is_numeric`` returned ``True`` (:issue:`50563`)
 
 Styler
 ^^^^^^
diff --git a/environment.yml b/environment.yml
index b6b8f7d6af1ba..96753f0f1c9b3 100644
--- a/environment.yml
+++ b/environment.yml
@@ -51,7 +51,7 @@ dependencies:
   - pyxlsb
   - s3fs>=2021.08.0
   - scipy
-  - sqlalchemy
+  - sqlalchemy<1.4.46
   - tabulate
   - tzdata>=2022a
   - xarray
@@ -90,7 +90,6 @@ dependencies:
   - gitdb
   - natsort  # DataFrame.sort_values doctest
   - numpydoc
-  - pandas-dev-flaker=0.5.0
   - pydata-sphinx-theme<0.11
   - pytest-cython  # doctest
   - sphinx
diff --git a/pandas/__init__.py b/pandas/__init__.py
index 951cb38656d0b..048d20f0de72f 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -135,7 +135,7 @@
 )
 
 from pandas import api, arrays, errors, io, plotting, tseries
-from pandas import testing  # noqa:PDF015
+from pandas import testing
 from pandas.util._print_versions import show_versions
 
 from pandas.io.api import (
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 7fcba58772ac4..77876d0c55337 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -647,40 +647,37 @@ def pad_2d_inplace(numeric_object_t[:, :] values, uint8_t[:, :] mask, limit=None
                 val = values[j, i]
 
 
-"""
-Backfilling logic for generating fill vector
-
-Diagram of what's going on
-
-Old      New    Fill vector    Mask
-         .        0               1
-         .        0               1
-         .        0               1
-A        A        0               1
-         .        1               1
-         .        1               1
-         .        1               1
-         .        1               1
-         .        1               1
-B        B        1               1
-         .        2               1
-         .        2               1
-         .        2               1
-C        C        2               1
-         .                        0
-         .                        0
-D
-"""
-
-
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def backfill(
     ndarray[numeric_object_t] old,
     ndarray[numeric_object_t] new,
     limit=None
-) -> ndarray:
-    # -> ndarray[intp_t, ndim=1]
+) -> ndarray:  # -> ndarray[intp_t, ndim=1]
+    """
+    Backfilling logic for generating fill vector
+
+    Diagram of what's going on
+
+    Old      New    Fill vector    Mask
+            .        0               1
+            .        0               1
+            .        0               1
+    A        A        0               1
+            .        1               1
+            .        1               1
+            .        1               1
+            .        1               1
+            .        1               1
+    B        B        1               1
+            .        2               1
+            .        2               1
+            .        2               1
+    C        C        2               1
+            .                        0
+            .                        0
+    D
+    """
     cdef:
         Py_ssize_t i, j, nleft, nright
         ndarray[intp_t, ndim=1] indexer
diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
index 9bc02e90ebb9e..2439082bf7413 100644
--- a/pandas/_libs/lib.pyi
+++ b/pandas/_libs/lib.pyi
@@ -240,3 +240,6 @@ def get_reverse_indexer(
 ) -> npt.NDArray[np.intp]: ...
 def is_bool_list(obj: list) -> bool: ...
 def dtypes_all_equal(types: list[DtypeObj]) -> bool: ...
+def array_equal_fast(
+    left: np.ndarray, right: np.ndarray  # np.ndarray[np.int64, ndim=1]
+) -> bool: ...
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index bc7b876cb5de8..89e02ac0fa86d 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -50,6 +50,7 @@ from numpy cimport (
     complex128_t,
     flatiter,
     float64_t,
+    int32_t,
     int64_t,
     intp_t,
     ndarray,
@@ -642,6 +643,34 @@ def array_equivalent_object(ndarray left, ndarray right) -> bool:
     return True
 
 
+ctypedef fused int6432_t:
+    int64_t
+    int32_t
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def array_equal_fast(
+    ndarray[int6432_t, ndim=1] left, ndarray[int6432_t, ndim=1] right,
+) -> bool:
+    """
+    Perform an element by element comparison on 1-d integer arrays, meant for indexer
+    comparisons
+    """
+    cdef:
+        Py_ssize_t i, n = left.size
+
+    if left.size != right.size:
+        return False
+
+    for i in range(n):
+
+        if left[i] != right[i]:
+            return False
+
+    return True
+
+
 ctypedef fused ndarr_object:
     ndarray[object, ndim=1]
     ndarray[object, ndim=2]
@@ -1482,7 +1511,7 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
         return val
 
     if values.descr.type_num != NPY_OBJECT:
-        # i.e. values.dtype != np.object
+        # i.e. values.dtype != np.object_
         # This should not be reached
         values = values.astype(object)
 
diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx
index 478e7eaee90c1..9154e836b3477 100644
--- a/pandas/_libs/ops.pyx
+++ b/pandas/_libs/ops.pyx
@@ -292,7 +292,7 @@ def maybe_convert_bool(ndarray[object] arr,
             result[i] = 1
         elif val in false_vals:
             result[i] = 0
-        elif is_nan(val):
+        elif is_nan(val) or val is None:
             mask[i] = 1
             result[i] = 0  # Value here doesn't matter, will be replaced w/ nan
             has_na = True
diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd
index de81c611c9ee9..492f45af09e80 100644
--- a/pandas/_libs/tslibs/np_datetime.pxd
+++ b/pandas/_libs/tslibs/np_datetime.pxd
@@ -120,3 +120,9 @@ cdef int64_t convert_reso(
     NPY_DATETIMEUNIT to_reso,
     bint round_ok,
 ) except? -1
+
+cdef extern from "src/datetime/np_datetime_strings.h":
+    ctypedef enum FormatRequirement:
+        PARTIAL_MATCH
+        EXACT_MATCH
+        INFER_FORMAT
diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx
index 9db3f7cb4648e..b1e4022527437 100644
--- a/pandas/_libs/tslibs/np_datetime.pyx
+++ b/pandas/_libs/tslibs/np_datetime.pyx
@@ -53,7 +53,8 @@ cdef extern from "src/datetime/np_datetime_strings.h":
                                 npy_datetimestruct *out,
                                 NPY_DATETIMEUNIT *out_bestunit,
                                 int *out_local, int *out_tzoffset,
-                                const char *format, int format_len, int exact)
+                                const char *format, int format_len,
+                                FormatRequirement exact)
 
 
 # ----------------------------------------------------------------------
@@ -286,17 +287,20 @@ cdef int string_to_dts(
         const char* buf
         Py_ssize_t format_length
         const char* format_buf
+        FormatRequirement format_requirement
 
     buf = get_c_string_buf_and_size(val, &length)
     if format is None:
         format_buf = b""
         format_length = 0
-        exact = False
+        format_requirement = INFER_FORMAT
     else:
         format_buf = get_c_string_buf_and_size(format, &format_length)
+        format_requirement = <FormatRequirement>exact
     return parse_iso_8601_datetime(buf, length, want_exc,
                                    dts, out_bestunit, out_local, out_tzoffset,
-                                   format_buf, format_length, exact)
+                                   format_buf, format_length,
+                                   format_requirement)
 
 
 cpdef ndarray astype_overflowsafe(
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index 0bc9751694e9f..b59d17321d8bf 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -1847,15 +1847,20 @@ cdef class BusinessHour(BusinessMixin):
         earliest_start = self.start[0]
         latest_start = self.start[-1]
 
+        if self.n == 0:
+            is_same_sign = sign > 0
+        else:
+            is_same_sign = self.n * sign >= 0
+
         if not self.next_bday.is_on_offset(other):
             # today is not business day
             other = other + sign * self.next_bday
-            if self.n * sign >= 0:
+            if is_same_sign:
                 hour, minute = earliest_start.hour, earliest_start.minute
             else:
                 hour, minute = latest_start.hour, latest_start.minute
         else:
-            if self.n * sign >= 0:
+            if is_same_sign:
                 if latest_start < other.time():
                     # current time is after latest starting time in today
                     other = other + sign * self.next_bday
diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index aa95febfc9721..9e05640723929 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -53,6 +53,7 @@ from pandas._libs.tslibs.nattype cimport (
     c_NaT as NaT,
     c_nat_strings as nat_strings,
 )
+from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
 from pandas._libs.tslibs.np_datetime cimport (
     NPY_DATETIMEUNIT,
     npy_datetimestruct,
@@ -298,6 +299,12 @@ def parse_datetime_string(
         # following may be raised from dateutil
         # TypeError: 'NoneType' object is not iterable
         raise ValueError(f'Given date string "{date_string}" not likely a datetime')
+    except OverflowError as err:
+        # with e.g. "08335394550" dateutil raises when trying to pass
+        #  year=8335394550 to datetime.replace
+        raise OutOfBoundsDatetime(
+            f'Parsing "{date_string}" to datetime overflows'
+            ) from err
 
     return dt
 
@@ -1005,7 +1012,8 @@ cdef void _maybe_warn_about_dayfirst(format: str, bint dayfirst):
             )
         if (day_index < month_index) and not dayfirst:
             warnings.warn(
-                f"Parsing dates in {format} format when dayfirst=False was specified. "
+                f"Parsing dates in {format} format when dayfirst=False (the default) "
+                "was specified. "
                 "Pass `dayfirst=True` or specify a format to silence this warning.",
                 UserWarning,
                 stacklevel=find_stack_level(),
diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
index 7bb94012fad0c..f1f03e6467eac 100644
--- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
+++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
@@ -67,42 +67,54 @@ This file implements string parsing and creation for NumPy datetime.
  * Returns 0 on success, -1 on failure.
  */
 
+typedef enum {
+    COMPARISON_SUCCESS,
+    COMPLETED_PARTIAL_MATCH,
+    COMPARISON_ERROR
+} DatetimePartParseResult;
 // This function will advance the pointer on format
 // and decrement characters_remaining by n on success
-// On failure will return -1 without incrementing
-static int compare_format(const char **format, int *characters_remaining,
-                 const char *compare_to, int n, const int exact) {
+// On failure will return COMPARISON_ERROR without incrementing
+// If `format_requirement` is PARTIAL_MATCH, and the `format` string has
+// been exhausted, then return COMPLETED_PARTIAL_MATCH.
+static DatetimePartParseResult compare_format(
+        const char **format,
+        int *characters_remaining,
+        const char *compare_to,
+        int n,
+        const FormatRequirement format_requirement
+) {
+  if (format_requirement == INFER_FORMAT) {
+    return COMPARISON_SUCCESS;
+  }
+  if (*characters_remaining < 0) {
+    return COMPARISON_ERROR;
+  }
+  if (format_requirement == PARTIAL_MATCH && *characters_remaining == 0) {
+    return COMPLETED_PARTIAL_MATCH;
+  }
   if (*characters_remaining < n) {
-    if (exact) {
-      // TODO(pandas-dev): in the future we should set a PyErr here
-      // to be very clear about what went wrong
-      return -1;
-    } else if (*characters_remaining) {
-      // TODO(pandas-dev): same return value in this function as
-      // above branch, but stub out a future where
-      // we have a better error message
-      return -1;
-    } else {
-        return 0;
-    }
+    // TODO(pandas-dev): PyErr to differentiate what went wrong
+    return COMPARISON_ERROR;
   } else {
     if (strncmp(*format, compare_to, n)) {
       // TODO(pandas-dev): PyErr to differentiate what went wrong
-      return -1;
+      return COMPARISON_ERROR;
     } else {
       *format += n;
       *characters_remaining -= n;
-      return 0;
+      return COMPARISON_SUCCESS;
     }
   }
-  return 0;
+  return COMPARISON_SUCCESS;
 }
 
 int parse_iso_8601_datetime(const char *str, int len, int want_exc,
                             npy_datetimestruct *out,
                             NPY_DATETIMEUNIT *out_bestunit,
                             int *out_local, int *out_tzoffset,
-                            const char* format, int format_len, int exact) {
+                            const char* format, int format_len,
+                            FormatRequirement format_requirement) {
     if (len < 0 || format_len < 0)
         goto parse_error;
     int year_leap = 0;
@@ -110,6 +122,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     const char *substr;
     int sublen;
     NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC;
+    DatetimePartParseResult comparison;
 
     /* If year-month-day are separated by a valid separator,
      * months/days without leading zeroes will be parsed
@@ -139,8 +152,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     while (sublen > 0 && isspace(*substr)) {
         ++substr;
         --sublen;
-        if (compare_format(&format, &format_len, " ", 1, exact)) {
+        comparison = compare_format(&format, &format_len, " ", 1, format_requirement);
+        if (comparison == COMPARISON_ERROR) {
             goto parse_error;
+        } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+            goto finish;
         }
     }
 
@@ -155,8 +171,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     }
 
     /* PARSE THE YEAR (4 digits) */
-    if (compare_format(&format, &format_len, "%Y", 2, exact)) {
+    comparison = compare_format(&format, &format_len, "%Y", 2, format_requirement);
+    if (comparison == COMPARISON_ERROR) {
         goto parse_error;
+    } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+        goto finish;
     }
 
     out->year = 0;
@@ -202,8 +221,12 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         ++substr;
         --sublen;
 
-        if (compare_format(&format, &format_len, &ymd_sep, 1, exact)) {
+        comparison = compare_format(&format, &format_len, &ymd_sep, 1,
+                                    format_requirement);
+        if (comparison == COMPARISON_ERROR) {
             goto parse_error;
+        } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+            goto finish;
         }
         /* Cannot have trailing separator */
         if (sublen == 0 || !isdigit(*substr)) {
@@ -212,8 +235,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     }
 
     /* PARSE THE MONTH */
-    if (compare_format(&format, &format_len, "%m", 2, exact)) {
+    comparison = compare_format(&format, &format_len, "%m", 2, format_requirement);
+    if (comparison == COMPARISON_ERROR) {
         goto parse_error;
+    } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+        goto finish;
     }
     /* First digit required */
     out->month = (*substr - '0');
@@ -258,14 +284,21 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         }
         ++substr;
         --sublen;
-        if (compare_format(&format, &format_len, &ymd_sep, 1, exact)) {
+        comparison = compare_format(&format, &format_len, &ymd_sep, 1,
+                                    format_requirement);
+        if (comparison == COMPARISON_ERROR) {
             goto parse_error;
+        } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+            goto finish;
         }
     }
 
     /* PARSE THE DAY */
-    if (compare_format(&format, &format_len, "%d", 2, exact)) {
+    comparison = compare_format(&format, &format_len, "%d", 2, format_requirement);
+    if (comparison == COMPARISON_ERROR) {
         goto parse_error;
+    } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+        goto finish;
     }
     /* First digit required */
     if (!isdigit(*substr)) {
@@ -306,15 +339,21 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     if ((*substr != 'T' && *substr != ' ') || sublen == 1) {
         goto parse_error;
     }
-        if (compare_format(&format, &format_len, substr, 1, exact)) {
-            goto parse_error;
-        }
+    comparison = compare_format(&format, &format_len, substr, 1, format_requirement);
+    if (comparison == COMPARISON_ERROR) {
+        goto parse_error;
+    } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+        goto finish;
+    }
     ++substr;
     --sublen;
 
     /* PARSE THE HOURS */
-    if (compare_format(&format, &format_len, "%H", 2, exact)) {
+    comparison = compare_format(&format, &format_len, "%H", 2, format_requirement);
+    if (comparison == COMPARISON_ERROR) {
         goto parse_error;
+    } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+        goto finish;
     }
     /* First digit required */
     if (!isdigit(*substr)) {
@@ -359,8 +398,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
         if (sublen == 0 || !isdigit(*substr)) {
             goto parse_error;
         }
-        if (compare_format(&format, &format_len, ":", 1, exact)) {
+        comparison = compare_format(&format, &format_len, ":", 1, format_requirement);
+        if (comparison == COMPARISON_ERROR) {
             goto parse_error;
+        } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+            goto finish;
         }
     } else if (!isdigit(*substr)) {
         if (!hour_was_2_digits) {
@@ -370,8 +412,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     }
 
     /* PARSE THE MINUTES */
-    if (compare_format(&format, &format_len, "%M", 2, exact)) {
+    comparison = compare_format(&format, &format_len, "%M", 2, format_requirement);
+    if (comparison == COMPARISON_ERROR) {
         goto parse_error;
+    } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+        goto finish;
     }
     /* First digit required */
     out->min = (*substr - '0');
@@ -405,8 +450,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     /* If we make it through this condition block, then the next
      * character is a digit. */
     if (has_hms_sep && *substr == ':') {
-        if (compare_format(&format, &format_len, ":", 1, exact)) {
+        comparison = compare_format(&format, &format_len, ":", 1, format_requirement);
+        if (comparison == COMPARISON_ERROR) {
             goto parse_error;
+        } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+            goto finish;
         }
         ++substr;
         --sublen;
@@ -420,8 +468,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     }
 
     /* PARSE THE SECONDS */
-    if (compare_format(&format, &format_len, "%S", 2, exact)) {
+    comparison = compare_format(&format, &format_len, "%S", 2, format_requirement);
+    if (comparison == COMPARISON_ERROR) {
         goto parse_error;
+    } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+        goto finish;
     }
     /* First digit required */
     out->sec = (*substr - '0');
@@ -448,8 +499,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     if (sublen > 0 && *substr == '.') {
         ++substr;
         --sublen;
-        if (compare_format(&format, &format_len, ".", 1, exact)) {
+        comparison = compare_format(&format, &format_len, ".", 1, format_requirement);
+        if (comparison == COMPARISON_ERROR) {
             goto parse_error;
+        } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+            goto finish;
         }
     } else {
         bestunit = NPY_FR_s;
@@ -457,8 +511,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     }
 
     /* PARSE THE MICROSECONDS (0 to 6 digits) */
-    if (compare_format(&format, &format_len, "%f", 2, exact)) {
+    comparison = compare_format(&format, &format_len, "%f", 2, format_requirement);
+    if (comparison == COMPARISON_ERROR) {
         goto parse_error;
+    } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+        goto finish;
     }
     numdigits = 0;
     for (i = 0; i < 6; ++i) {
@@ -524,8 +581,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     while (sublen > 0 && isspace(*substr)) {
         ++substr;
         --sublen;
-        if (compare_format(&format, &format_len, " ", 1, exact)) {
+        comparison = compare_format(&format, &format_len, " ", 1, format_requirement);
+        if (comparison == COMPARISON_ERROR) {
             goto parse_error;
+        } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+            goto finish;
         }
     }
 
@@ -539,8 +599,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
 
     /* UTC specifier */
     if (*substr == 'Z') {
-        if (compare_format(&format, &format_len, "%z", 2, exact)) {
+        comparison = compare_format(&format, &format_len, "%z", 2, format_requirement);
+        if (comparison == COMPARISON_ERROR) {
             goto parse_error;
+        } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+            goto finish;
         }
         /* "Z" should be equivalent to tz offset "+00:00" */
         if (out_local != NULL) {
@@ -561,8 +624,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
             --sublen;
         }
     } else if (*substr == '-' || *substr == '+') {
-        if (compare_format(&format, &format_len, "%z", 2, exact)) {
+        comparison = compare_format(&format, &format_len, "%z", 2, format_requirement);
+        if (comparison == COMPARISON_ERROR) {
             goto parse_error;
+        } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+            goto finish;
         }
         /* Time zone offset */
         int offset_neg = 0, offset_hour = 0, offset_minute = 0;
@@ -647,8 +713,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
     while (sublen > 0 && isspace(*substr)) {
         ++substr;
         --sublen;
-        if (compare_format(&format, &format_len, " ", 1, exact)) {
+        comparison = compare_format(&format, &format_len, " ", 1, format_requirement);
+        if (comparison == COMPARISON_ERROR) {
             goto parse_error;
+        } else if (comparison == COMPLETED_PARTIAL_MATCH) {
+            goto finish;
         }
     }
 
diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h
index 734f7daceba05..a635192d70809 100644
--- a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h
+++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h
@@ -26,6 +26,21 @@ This file implements string parsing and creation for NumPy datetime.
 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 #endif  // NPY_NO_DEPRECATED_API
 
+/* 'format_requirement' can be one of three values:
+ *      * PARTIAL_MATCH : Only require a partial match with 'format'.
+ *           For example, if the string is '2020-01-01 05:00:00' and
+ *           'format' is '%Y-%m-%d', then parse '2020-01-01';
+ *      * EXACT_MATCH : require an exact match with 'format'. If the
+ *           string is '2020-01-01', then the only format which will
+ *           be able to parse it without error is '%Y-%m-%d';
+ *      * INFER_FORMAT: parse without comparing 'format' (i.e. infer it).
+ */
+typedef enum  {
+    PARTIAL_MATCH,
+    EXACT_MATCH,
+    INFER_FORMAT
+} FormatRequirement;
+
 /*
  * Parses (almost) standard ISO 8601 date strings. The differences are:
  *
@@ -61,7 +76,7 @@ parse_iso_8601_datetime(const char *str, int len, int want_exc,
                         int *out_tzoffset,
                         const char* format,
                         int format_len,
-                        int exact);
+                        FormatRequirement format_requirement);
 
 /*
  * Provides a string length to use for converting datetime
diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx
index 27e99706137b6..69878625295d6 100644
--- a/pandas/_libs/tslibs/strptime.pyx
+++ b/pandas/_libs/tslibs/strptime.pyx
@@ -1,4 +1,18 @@
 """Strptime-related classes and functions.
+
+TimeRE, _calc_julian_from_U_or_W are vendored
+from the standard library, see
+https://github.com/python/cpython/blob/main/Lib/_strptime.py
+The original module-level docstring follows.
+
+Strptime-related classes and functions.
+CLASSES:
+    LocaleTime -- Discovers and stores locale-specific time information
+    TimeRE -- Creates regexes for pattern matching a string of text containing
+                time information
+FUNCTIONS:
+    _getlang -- Figure out what language is being used for the locale
+    strptime -- Calculates the time struct represented by the passed-in string
 """
 from datetime import timezone
 
@@ -10,10 +24,16 @@ from cpython.datetime cimport (
     timedelta,
     tzinfo,
 )
+from _strptime import (
+    TimeRE as _TimeRE,
+    _getlang,
+)
+from _strptime import LocaleTime  # no-cython-lint
 
 import_datetime()
 
 from _thread import allocate_lock as _thread_allocate_lock
+import re
 
 import numpy as np
 import pytz
@@ -50,6 +70,7 @@ from pandas._libs.util cimport (
     is_float_object,
     is_integer_object,
 )
+
 from pandas._libs.tslibs.timestamps import Timestamp
 
 cnp.import_array()
@@ -60,15 +81,23 @@ cdef bint format_is_iso(f: str):
     Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different
     but must be consistent.  Leading 0s in dates and times are optional.
     """
+    iso_regex = re.compile(
+        r"""
+        ^                     # start of string
+        %Y                    # Year
+        (?:([-/ \\.]?)%m      # month with or without separators
+        (?: \1%d              # day with same separator as for year-month
+        (?:[ T]%H             # hour with separator
+        (?:\:%M               # minute with separator
+        (?:\:%S               # second with separator
+        (?:%z|\.%f(?:%z)?     # timezone or fractional second
+        )?)?)?)?)?)?          # optional
+        $                     # end of string
+        """,
+        re.VERBOSE,
+    )
     excluded_formats = ["%Y%m"]
-
-    for date_sep in [" ", "/", "\\", "-", ".", ""]:
-        for time_sep in [" ", "T"]:
-            for micro_or_tz in ["", "%z", ".%f", ".%f%z"]:
-                iso_fmt = f"%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S{micro_or_tz}"
-                if iso_fmt.startswith(f) and f not in excluded_formats:
-                    return True
-    return False
+    return re.match(iso_regex, f) is not None and f not in excluded_formats
 
 
 def _test_format_is_iso(f: str) -> bool:
@@ -487,29 +516,6 @@ def array_strptime(
     return result, result_timezone.base
 
 
-"""
-TimeRE, _calc_julian_from_U_or_W are vendored
-from the standard library, see
-https://github.com/python/cpython/blob/main/Lib/_strptime.py
-The original module-level docstring follows.
-
-Strptime-related classes and functions.
-CLASSES:
-    LocaleTime -- Discovers and stores locale-specific time information
-    TimeRE -- Creates regexes for pattern matching a string of text containing
-                time information
-FUNCTIONS:
-    _getlang -- Figure out what language is being used for the locale
-    strptime -- Calculates the time struct represented by the passed-in string
-"""
-
-from _strptime import (
-    TimeRE as _TimeRE,
-    _getlang,
-)
-from _strptime import LocaleTime  # no-cython-lint
-
-
 class TimeRE(_TimeRE):
     """
     Handle conversion from format directives to regexes.
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index 0cef0ad128aee..b57f2ce5bd953 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -1525,16 +1525,11 @@ class Timestamp(_Timestamp):
         elif is_integer_object(year):
             # User passed positional arguments:
             # Timestamp(year, month, day[, hour[, minute[, second[,
-            # microsecond[, nanosecond[, tzinfo]]]]]])
+            # microsecond[, tzinfo]]]]])
             ts_input = datetime(ts_input, year, month, day or 0,
                                 hour or 0, minute or 0, second or 0, fold=fold or 0)
             unit = None
 
-            if nanosecond is None:
-                # nanosecond was not passed as a keyword, but may have been
-                #  passed positionally see test_constructor_nanosecond
-                nanosecond = microsecond
-
         if getattr(ts_input, "tzinfo", None) is not None and tz is not None:
             raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with "
                              "the tz parameter. Use tz_convert instead.")
diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx
index a54050fdf3cf2..547286bd40b64 100644
--- a/pandas/_libs/tslibs/tzconversion.pyx
+++ b/pandas/_libs/tslibs/tzconversion.pyx
@@ -240,6 +240,8 @@ timedelta-like}
         str stamp
         Localizer info = Localizer(tz, creso=creso)
         int64_t pph = periods_per_day(creso) // 24
+        int64_t pps = periods_per_second(creso)
+        npy_datetimestruct dts
 
     # Vectorized version of DstTzInfo.localize
     if info.use_utc:
@@ -388,14 +390,26 @@ timedelta-like}
                     new_local = val - remaining_mins - 1
 
                 if is_zi:
-                    raise NotImplementedError(
-                        "nonexistent shifting is not implemented with ZoneInfo tzinfos"
-                    )
+                    # use the same construction as in _get_utc_bounds_zoneinfo
+                    pandas_datetime_to_datetimestruct(new_local, creso, &dts)
+                    extra = (dts.ps // 1000) * (pps // 1_000_000_000)
+
+                    dt = datetime_new(dts.year, dts.month, dts.day, dts.hour,
+                                      dts.min, dts.sec, dts.us, None)
 
-                delta_idx = bisect_right_i8(info.tdata, new_local, info.ntrans)
+                    if shift_forward or shift_delta > 0:
+                        dt = dt.replace(tzinfo=tz, fold=1)
+                    else:
+                        dt = dt.replace(tzinfo=tz, fold=0)
+                    dt = dt.astimezone(utc_stdlib)
+                    dt = dt.replace(tzinfo=None)
+                    result[i] = pydatetime_to_dt64(dt, &dts, creso) + extra
+
+                else:
+                    delta_idx = bisect_right_i8(info.tdata, new_local, info.ntrans)
 
-                delta_idx = delta_idx - delta_idx_offset
-                result[i] = new_local - info.deltas[delta_idx]
+                    delta_idx = delta_idx - delta_idx_offset
+                    result[i] = new_local - info.deltas[delta_idx]
             elif fill_nonexist:
                 result[i] = NPY_NAT
             else:
diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index 43020ae471f10..eb2905751a9b4 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -886,7 +886,7 @@ def external_error_raised(expected_exception: type[Exception]) -> ContextManager
     """
     import pytest
 
-    return pytest.raises(expected_exception, match=None)  # noqa: PDF010
+    return pytest.raises(expected_exception, match=None)
 
 
 cython_table = pd.core.common._cython_table.items()
diff --git a/pandas/_typing.py b/pandas/_typing.py
index 1ba5be8b5b0ed..8d3044a978291 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -44,6 +44,10 @@
     from pandas.core.dtypes.dtypes import ExtensionDtype
 
     from pandas import Interval
+    from pandas.arrays import (
+        DatetimeArray,
+        TimedeltaArray,
+    )
     from pandas.core.arrays.base import ExtensionArray
     from pandas.core.frame import DataFrame
     from pandas.core.generic import NDFrame
@@ -88,6 +92,7 @@
 
 ArrayLike = Union["ExtensionArray", np.ndarray]
 AnyArrayLike = Union[ArrayLike, "Index", "Series"]
+TimeArrayLike = Union["DatetimeArray", "TimedeltaArray"]
 
 # scalars
 
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index 085a2a80ca8ec..b59b9632913e4 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -14,7 +14,6 @@
 import sys
 
 from pandas._typing import F
-import pandas.compat._compressors
 from pandas.compat._constants import (
     IS64,
     PY39,
@@ -22,6 +21,7 @@
     PY311,
     PYPY,
 )
+import pandas.compat.compressors
 from pandas.compat.numpy import (
     is_numpy_dev,
     np_version_under1p21,
@@ -131,7 +131,7 @@ def is_ci_environment() -> bool:
     return os.environ.get("PANDAS_CI", "0") == "1"
 
 
-def get_lzma_file() -> type[pandas.compat._compressors.LZMAFile]:
+def get_lzma_file() -> type[pandas.compat.compressors.LZMAFile]:
     """
     Importing the `LZMAFile` class from the `lzma` module.
 
@@ -145,13 +145,13 @@ def get_lzma_file() -> type[pandas.compat._compressors.LZMAFile]:
     RuntimeError
         If the `lzma` module was not imported correctly, or didn't exist.
     """
-    if not pandas.compat._compressors.has_lzma:
+    if not pandas.compat.compressors.has_lzma:
         raise RuntimeError(
             "lzma module not available. "
             "A Python re-install with the proper dependencies, "
             "might be required to solve this issue."
         )
-    return pandas.compat._compressors.LZMAFile
+    return pandas.compat.compressors.LZMAFile
 
 
 __all__ = [
diff --git a/pandas/compat/_compressors.py b/pandas/compat/compressors.py
similarity index 100%
rename from pandas/compat/_compressors.py
rename to pandas/compat/compressors.py
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 0076b3d2b64c3..de85ed67e7e8c 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -3,6 +3,7 @@
 from typing import (
     TYPE_CHECKING,
     Any,
+    Literal,
     TypeVar,
     cast,
 )
@@ -116,6 +117,11 @@ def floordiv_compat(
     }
 
 if TYPE_CHECKING:
+    from pandas._typing import (
+        NumpySorter,
+        NumpyValueArrayLike,
+    )
+
     from pandas import Series
 
 ArrowExtensionArrayT = TypeVar("ArrowExtensionArrayT", bound="ArrowExtensionArray")
@@ -406,8 +412,14 @@ def _cmp_method(self, other, op):
                 f"{op.__name__} not implemented for {type(other)}"
             )
 
-        result = result.to_numpy()
-        return BooleanArray._from_sequence(result)
+        if result.null_count > 0:
+            # GH50524: avoid conversion to object for better perf
+            values = pc.fill_null(result, False).to_numpy()
+            mask = result.is_null().to_numpy()
+        else:
+            values = result.to_numpy()
+            mask = np.zeros(len(values), dtype=np.bool_)
+        return BooleanArray(values, mask)
 
     def _evaluate_op_method(self, other, op, arrow_funcs):
         pc_func = arrow_funcs[op.__name__]
@@ -687,6 +699,23 @@ def round(
         """
         return type(self)(pc.round(self._data, ndigits=decimals))
 
+    @doc(ExtensionArray.searchsorted)
+    def searchsorted(
+        self,
+        value: NumpyValueArrayLike | ExtensionArray,
+        side: Literal["left", "right"] = "left",
+        sorter: NumpySorter = None,
+    ) -> npt.NDArray[np.intp] | np.intp:
+        if self._hasna:
+            raise ValueError(
+                "searchsorted requires array to be sorted, which is impossible "
+                "with NAs present."
+            )
+        if isinstance(value, ExtensionArray):
+            value = value.astype(object)
+        # Base class searchsorted would cast to object, which is *much* slower.
+        return self.to_numpy().searchsorted(value, side=side, sorter=sorter)
+
     def take(
         self,
         indices: TakeIndexer,
@@ -977,7 +1006,7 @@ def pyarrow_meth(data, skip_nulls, **kwargs):
             return self.dtype.na_value
         return result.as_py()
 
-    def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None:
+    def __setitem__(self, key, value) -> None:
         """Set one or more values inplace.
 
         Parameters
@@ -998,6 +1027,10 @@ def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None:
         -------
         None
         """
+        # GH50085: unwrap 1D indexers
+        if isinstance(key, tuple) and len(key) == 1:
+            key = key[0]
+
         key = check_array_indexer(self, key)
         value = self._maybe_convert_setitem_value(value)
 
@@ -1023,7 +1056,6 @@ def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None:
                 return
 
         indices = self._indexing_key_to_indices(key)
-
         argsort = np.argsort(indices)
         indices = indices[argsort]
 
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 6954c97007d23..422b9effeface 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -348,7 +348,7 @@ def __getitem__(
         """
         raise AbstractMethodError(self)
 
-    def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None:
+    def __setitem__(self, key, value) -> None:
         """
         Set one or more values inplace.
 
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index adf6522f76a1a..d7d28eed16f8b 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -190,10 +190,9 @@ class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray):
 
     Assumes that __new__/__init__ defines:
         _ndarray
-        _freq
 
-    and that the inheriting class has methods:
-        _generate_range
+    and that inheriting subclass implements:
+        freq
     """
 
     # _infer_matches -> which infer_dtype strings are close enough to our own
@@ -201,6 +200,7 @@ class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray):
     _is_recognized_dtype: Callable[[DtypeObj], bool]
     _recognized_scalars: tuple[type, ...]
     _ndarray: np.ndarray
+    freq: BaseOffset | None
 
     @cache_readonly
     def _can_hold_na(self) -> bool:
@@ -407,7 +407,7 @@ def _get_getitem_freq(self, key) -> BaseOffset | None:
     # error: Argument 1 of "__setitem__" is incompatible with supertype
     # "ExtensionArray"; supertype defines the argument type as "Union[int,
     # ndarray]"
-    def __setitem__(  # type: ignore[override]
+    def __setitem__(
         self,
         key: int | Sequence[int] | Sequence[bool] | slice,
         value: NaTType | Any | Sequence[Any],
@@ -896,24 +896,6 @@ def _maybe_mask_results(
     # ------------------------------------------------------------------
     # Frequency Properties/Methods
 
-    @property
-    def freq(self):
-        """
-        Return the frequency object if it is set, otherwise None.
-        """
-        return self._freq
-
-    @freq.setter
-    def freq(self, value) -> None:
-        if value is not None:
-            value = to_offset(value)
-            self._validate_frequency(self, value)
-
-            if self.ndim > 1:
-                raise ValueError("Cannot set freq with ndim > 1")
-
-        self._freq = value
-
     @property
     def freqstr(self) -> str | None:
         """
@@ -955,51 +937,6 @@ def resolution(self) -> str:
         # error: Item "None" of "Optional[Any]" has no attribute "attrname"
         return self._resolution_obj.attrname  # type: ignore[union-attr]
 
-    @classmethod
-    def _validate_frequency(cls, index, freq, **kwargs):
-        """
-        Validate that a frequency is compatible with the values of a given
-        Datetime Array/Index or Timedelta Array/Index
-
-        Parameters
-        ----------
-        index : DatetimeIndex or TimedeltaIndex
-            The index on which to determine if the given frequency is valid
-        freq : DateOffset
-            The frequency to validate
-        """
-        # TODO: this is not applicable to PeriodArray, move to correct Mixin
-        inferred = index.inferred_freq
-        if index.size == 0 or inferred == freq.freqstr:
-            return None
-
-        try:
-            on_freq = cls._generate_range(
-                start=index[0], end=None, periods=len(index), freq=freq, **kwargs
-            )
-            if not np.array_equal(index.asi8, on_freq.asi8):
-                raise ValueError
-        except ValueError as e:
-            if "non-fixed" in str(e):
-                # non-fixed frequencies are not meaningful for timedelta64;
-                #  we retain that error message
-                raise e
-            # GH#11587 the main way this is reached is if the `np.array_equal`
-            #  check above is False.  This can also be reached if index[0]
-            #  is `NaT`, in which case the call to `cls._generate_range` will
-            #  raise a ValueError, which we re-raise with a more targeted
-            #  message.
-            raise ValueError(
-                f"Inferred frequency {inferred} from passed values "
-                f"does not conform to passed frequency {freq.freqstr}"
-            ) from e
-
-    @classmethod
-    def _generate_range(
-        cls: type[DatetimeLikeArrayT], start, end, periods, freq, *args, **kwargs
-    ) -> DatetimeLikeArrayT:
-        raise AbstractMethodError(cls)
-
     # monotonicity/uniqueness properties are called via frequencies.infer_freq,
     #  see GH#23789
 
@@ -1953,6 +1890,68 @@ def __init__(
     def _validate_dtype(cls, values, dtype):
         raise AbstractMethodError(cls)
 
+    @property
+    def freq(self):
+        """
+        Return the frequency object if it is set, otherwise None.
+        """
+        return self._freq
+
+    @freq.setter
+    def freq(self, value) -> None:
+        if value is not None:
+            value = to_offset(value)
+            self._validate_frequency(self, value)
+
+            if self.ndim > 1:
+                raise ValueError("Cannot set freq with ndim > 1")
+
+        self._freq = value
+
+    @classmethod
+    def _validate_frequency(cls, index, freq, **kwargs):
+        """
+        Validate that a frequency is compatible with the values of a given
+        Datetime Array/Index or Timedelta Array/Index
+
+        Parameters
+        ----------
+        index : DatetimeIndex or TimedeltaIndex
+            The index on which to determine if the given frequency is valid
+        freq : DateOffset
+            The frequency to validate
+        """
+        inferred = index.inferred_freq
+        if index.size == 0 or inferred == freq.freqstr:
+            return None
+
+        try:
+            on_freq = cls._generate_range(
+                start=index[0], end=None, periods=len(index), freq=freq, **kwargs
+            )
+            if not np.array_equal(index.asi8, on_freq.asi8):
+                raise ValueError
+        except ValueError as err:
+            if "non-fixed" in str(err):
+                # non-fixed frequencies are not meaningful for timedelta64;
+                #  we retain that error message
+                raise err
+            # GH#11587 the main way this is reached is if the `np.array_equal`
+            #  check above is False.  This can also be reached if index[0]
+            #  is `NaT`, in which case the call to `cls._generate_range` will
+            #  raise a ValueError, which we re-raise with a more targeted
+            #  message.
+            raise ValueError(
+                f"Inferred frequency {inferred} from passed values "
+                f"does not conform to passed frequency {freq.freqstr}"
+            ) from err
+
+    @classmethod
+    def _generate_range(
+        cls: type[DatetimeLikeArrayT], start, end, periods, freq, *args, **kwargs
+    ) -> DatetimeLikeArrayT:
+        raise AbstractMethodError(cls)
+
     # --------------------------------------------------------------
 
     @cache_readonly
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 608b38765621b..01584a66f424b 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -50,10 +50,7 @@
     TimeNonexistent,
     npt,
 )
-from pandas.errors import (
-    OutOfBoundsDatetime,
-    PerformanceWarning,
-)
+from pandas.errors import PerformanceWarning
 from pandas.util._exceptions import find_stack_level
 from pandas.util._validators import validate_inclusive
 
@@ -2154,18 +2151,14 @@ def objects_to_datetime64ns(
 
     flags = data.flags
     order: Literal["F", "C"] = "F" if flags.f_contiguous else "C"
-    try:
-        result, tz_parsed = tslib.array_to_datetime(
-            data.ravel("K"),
-            errors=errors,
-            utc=utc,
-            dayfirst=dayfirst,
-            yearfirst=yearfirst,
-        )
-        result = result.reshape(data.shape, order=order)
-    except OverflowError as err:
-        # Exception is raised when a part of date is greater than 32 bit signed int
-        raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from err
+    result, tz_parsed = tslib.array_to_datetime(
+        data.ravel("K"),
+        errors=errors,
+        utc=utc,
+        dayfirst=dayfirst,
+        yearfirst=yearfirst,
+    )
+    result = result.reshape(data.shape, order=order)
 
     if tz_parsed is not None:
         # We can take a shortcut since the datetime64 numpy array
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index f7107a1f7c83c..2f13f199f9744 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -39,6 +39,7 @@
     ScalarIndexer,
     SequenceIndexer,
     SortKind,
+    TimeArrayLike,
     npt,
 )
 from pandas.compat.numpy import function as nv
@@ -82,6 +83,8 @@
     ExtensionArray,
     _extension_array_shared_docs,
 )
+from pandas.core.arrays.datetimes import DatetimeArray
+from pandas.core.arrays.timedeltas import TimedeltaArray
 import pandas.core.common as com
 from pandas.core.construction import (
     array as pd_array,
@@ -102,6 +105,7 @@
 
 
 IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray")
+IntervalSideT = Union[TimeArrayLike, np.ndarray]
 IntervalOrNA = Union[Interval, float]
 
 _interval_shared_docs: dict[str, str] = {}
@@ -123,8 +127,8 @@
 Parameters
 ----------
 data : array-like (1-dimensional)
-    Array-like containing Interval objects from which to build the
-    %(klass)s.
+    Array-like (ndarray, :class:`DateTimeArray`, :class:`TimeDeltaArray`) containing
+    Interval objects from which to build the %(klass)s.
 closed : {'left', 'right', 'both', 'neither'}, default 'right'
     Whether the intervals are closed on the left-side, right-side, both or
     neither.
@@ -213,8 +217,8 @@ def ndim(self) -> Literal[1]:
         return 1
 
     # To make mypy recognize the fields
-    _left: np.ndarray
-    _right: np.ndarray
+    _left: IntervalSideT
+    _right: IntervalSideT
     _dtype: IntervalDtype
 
     # ---------------------------------------------------------------------
@@ -232,9 +236,10 @@ def __new__(
         data = extract_array(data, extract_numpy=True)
 
         if isinstance(data, cls):
-            left = data._left
-            right = data._right
+            left: IntervalSideT = data._left
+            right: IntervalSideT = data._right
             closed = closed or data.closed
+            dtype = IntervalDtype(left.dtype, closed=closed)
         else:
 
             # don't allow scalars
@@ -255,37 +260,57 @@ def __new__(
                 right = lib.maybe_convert_objects(right)
             closed = closed or infer_closed
 
+            left, right, dtype = cls._ensure_simple_new_inputs(
+                left,
+                right,
+                closed=closed,
+                copy=copy,
+                dtype=dtype,
+            )
+
+        if verify_integrity:
+            cls._validate(left, right, dtype=dtype)
+
         return cls._simple_new(
             left,
             right,
-            closed,
-            copy=copy,
             dtype=dtype,
-            verify_integrity=verify_integrity,
         )
 
     @classmethod
     def _simple_new(
         cls: type[IntervalArrayT],
+        left: IntervalSideT,
+        right: IntervalSideT,
+        dtype: IntervalDtype,
+    ) -> IntervalArrayT:
+        result = IntervalMixin.__new__(cls)
+        result._left = left
+        result._right = right
+        result._dtype = dtype
+
+        return result
+
+    @classmethod
+    def _ensure_simple_new_inputs(
+        cls,
         left,
         right,
         closed: IntervalClosedType | None = None,
         copy: bool = False,
         dtype: Dtype | None = None,
-        verify_integrity: bool = True,
-    ) -> IntervalArrayT:
-        result = IntervalMixin.__new__(cls)
+    ) -> tuple[IntervalSideT, IntervalSideT, IntervalDtype]:
+        """Ensure correctness of input parameters for cls._simple_new."""
+        from pandas.core.indexes.base import ensure_index
+
+        left = ensure_index(left, copy=copy)
+        right = ensure_index(right, copy=copy)
 
         if closed is None and isinstance(dtype, IntervalDtype):
             closed = dtype.closed
 
         closed = closed or "right"
 
-        from pandas.core.indexes.base import ensure_index
-
-        left = ensure_index(left, copy=copy)
-        right = ensure_index(right, copy=copy)
-
         if dtype is not None:
             # GH 19262: dtype must be an IntervalDtype to override inferred
             dtype = pandas_dtype(dtype)
@@ -346,13 +371,8 @@ def _simple_new(
             right = right.copy()
 
         dtype = IntervalDtype(left.dtype, closed=closed)
-        result._dtype = dtype
 
-        result._left = left
-        result._right = right
-        if verify_integrity:
-            result._validate()
-        return result
+        return left, right, dtype
 
     @classmethod
     def _from_sequence(
@@ -512,9 +532,16 @@ def from_arrays(
         left = _maybe_convert_platform_interval(left)
         right = _maybe_convert_platform_interval(right)
 
-        return cls._simple_new(
-            left, right, closed, copy=copy, dtype=dtype, verify_integrity=True
+        left, right, dtype = cls._ensure_simple_new_inputs(
+            left,
+            right,
+            closed=closed,
+            copy=copy,
+            dtype=dtype,
         )
+        cls._validate(left, right, dtype=dtype)
+
+        return cls._simple_new(left, right, dtype=dtype)
 
     _interval_shared_docs["from_tuples"] = textwrap.dedent(
         """
@@ -599,32 +626,33 @@ def from_tuples(
 
         return cls.from_arrays(left, right, closed, copy=False, dtype=dtype)
 
-    def _validate(self):
+    @classmethod
+    def _validate(cls, left, right, dtype: IntervalDtype) -> None:
         """
         Verify that the IntervalArray is valid.
 
         Checks that
 
-        * closed is valid
+        * dtype is correct
         * left and right match lengths
         * left and right have the same missing values
         * left is always below right
         """
-        if self.closed not in VALID_CLOSED:
-            msg = f"invalid option for 'closed': {self.closed}"
+        if not isinstance(dtype, IntervalDtype):
+            msg = f"invalid dtype: {dtype}"
             raise ValueError(msg)
-        if len(self._left) != len(self._right):
+        if len(left) != len(right):
             msg = "left and right must have the same length"
             raise ValueError(msg)
-        left_mask = notna(self._left)
-        right_mask = notna(self._right)
+        left_mask = notna(left)
+        right_mask = notna(right)
         if not (left_mask == right_mask).all():
             msg = (
                 "missing values must be missing in the same "
                 "location both left and right sides"
             )
             raise ValueError(msg)
-        if not (self._left[left_mask] <= self._right[left_mask]).all():
+        if not (left[left_mask] <= right[left_mask]).all():
             msg = "left side of interval must be <= right side"
             raise ValueError(msg)
 
@@ -639,7 +667,11 @@ def _shallow_copy(self: IntervalArrayT, left, right) -> IntervalArrayT:
         right : Index
             Values to be used for the right-side of the intervals.
         """
-        return self._simple_new(left, right, closed=self.closed, verify_integrity=False)
+        dtype = IntervalDtype(left.dtype, closed=self.closed)
+        left, right, dtype = self._ensure_simple_new_inputs(left, right, dtype=dtype)
+        self._validate(left, right, dtype=dtype)
+
+        return self._simple_new(left, right, dtype=dtype)
 
     # ---------------------------------------------------------------------
     # Descriptive
@@ -988,7 +1020,10 @@ def _concat_same_type(
 
         left = np.concatenate([interval.left for interval in to_concat])
         right = np.concatenate([interval.right for interval in to_concat])
-        return cls._simple_new(left, right, closed=closed, copy=False)
+
+        left, right, dtype = cls._ensure_simple_new_inputs(left, right, closed=closed)
+
+        return cls._simple_new(left, right, dtype=dtype)
 
     def copy(self: IntervalArrayT) -> IntervalArrayT:
         """
@@ -1000,9 +1035,8 @@ def copy(self: IntervalArrayT) -> IntervalArrayT:
         """
         left = self._left.copy()
         right = self._right.copy()
-        closed = self.closed
-        # TODO: Could skip verify_integrity here.
-        return type(self).from_arrays(left, right, closed=closed)
+        dtype = self.dtype
+        return self._simple_new(left, right, dtype=dtype)
 
     def isna(self) -> np.ndarray:
         return isna(self._left)
@@ -1402,9 +1436,9 @@ def set_closed(self: IntervalArrayT, closed: IntervalClosedType) -> IntervalArra
             msg = f"invalid option for 'closed': {closed}"
             raise ValueError(msg)
 
-        return type(self)._simple_new(
-            left=self._left, right=self._right, closed=closed, verify_integrity=False
-        )
+        left, right = self._left, self._right
+        dtype = IntervalDtype(left.dtype, closed=closed)
+        return self._simple_new(left, right, dtype=dtype)
 
     _interval_shared_docs[
         "is_non_overlapping_monotonic"
@@ -1546,9 +1580,11 @@ def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
 
         if isinstance(self._left, np.ndarray):
             np.putmask(self._left, mask, value_left)
+            assert isinstance(self._right, np.ndarray)
             np.putmask(self._right, mask, value_right)
         else:
             self._left._putmask(mask, value_left)
+            assert not isinstance(self._right, np.ndarray)
             self._right._putmask(mask, value_right)
 
     def insert(self: IntervalArrayT, loc: int, item: Interval) -> IntervalArrayT:
@@ -1576,9 +1612,11 @@ def insert(self: IntervalArrayT, loc: int, item: Interval) -> IntervalArrayT:
     def delete(self: IntervalArrayT, loc) -> IntervalArrayT:
         if isinstance(self._left, np.ndarray):
             new_left = np.delete(self._left, loc)
+            assert isinstance(self._right, np.ndarray)
             new_right = np.delete(self._right, loc)
         else:
             new_left = self._left.delete(loc)
+            assert not isinstance(self._right, np.ndarray)
             new_right = self._right.delete(loc)
         return self._shallow_copy(left=new_left, right=new_right)
 
@@ -1679,7 +1717,7 @@ def isin(self, values) -> npt.NDArray[np.bool_]:
         return isin(self.astype(object), values.astype(object))
 
     @property
-    def _combined(self) -> ArrayLike:
+    def _combined(self) -> IntervalSideT:
         left = self.left._values.reshape(-1, 1)
         right = self.right._values.reshape(-1, 1)
         if needs_i8_conversion(left.dtype):
@@ -1696,15 +1734,12 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray:
 
         dtype = self._left.dtype
         if needs_i8_conversion(dtype):
-            # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence"
-            new_left = type(self._left)._from_sequence(  # type: ignore[attr-defined]
-                nc[:, 0], dtype=dtype
-            )
-            # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence"
-            new_right = type(self._right)._from_sequence(  # type: ignore[attr-defined]
-                nc[:, 1], dtype=dtype
-            )
+            assert isinstance(self._left, (DatetimeArray, TimedeltaArray))
+            new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype)
+            assert isinstance(self._right, (DatetimeArray, TimedeltaArray))
+            new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype)
         else:
+            assert isinstance(dtype, np.dtype)
             new_left = nc[:, 0].view(dtype)
             new_right = nc[:, 1].view(dtype)
         return self._shallow_copy(left=new_left, right=new_right)
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
index 859bb53b6489a..e6682b0dea814 100644
--- a/pandas/core/arrays/period.py
+++ b/pandas/core/arrays/period.py
@@ -353,8 +353,8 @@ def _check_compatible_with(self, other) -> None:
     def dtype(self) -> PeriodDtype:
         return self._dtype
 
-    # error: Read-only property cannot override read-write property
-    @property  # type: ignore[misc]
+    # error: Cannot override writeable attribute with read-only property
+    @property  # type: ignore[override]
     def freq(self) -> BaseOffset:
         """
         Return the frequency object for this PeriodArray.
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index e5fb3fc3ff836..9b26db07fc28f 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -1,6 +1,9 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import (
+    TYPE_CHECKING,
+    Literal,
+)
 
 import numpy as np
 
@@ -54,6 +57,11 @@
 if TYPE_CHECKING:
     import pyarrow
 
+    from pandas._typing import (
+        NumpySorter,
+        NumpyValueArrayLike,
+    )
+
     from pandas import Series
 
 
@@ -492,6 +500,20 @@ def memory_usage(self, deep: bool = False) -> int:
             return result + lib.memory_usage_of_objects(self._ndarray)
         return result
 
+    @doc(ExtensionArray.searchsorted)
+    def searchsorted(
+        self,
+        value: NumpyValueArrayLike | ExtensionArray,
+        side: Literal["left", "right"] = "left",
+        sorter: NumpySorter = None,
+    ) -> npt.NDArray[np.intp] | np.intp:
+        if self._hasna:
+            raise ValueError(
+                "searchsorted requires array to be sorted, which is impossible "
+                "with NAs present."
+            )
+        return super().searchsorted(value=value, side=side, sorter=sorter)
+
     def _cmp_method(self, other, op):
         from pandas.arrays import BooleanArray
 
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 97262b1f4bb21..fb081d0e63c96 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -1,8 +1,10 @@
 from __future__ import annotations
 
-from collections.abc import Callable  # noqa: PDF001
 import re
-from typing import Union
+from typing import (
+    Callable,
+    Union,
+)
 
 import numpy as np
 
diff --git a/pandas/core/base.py b/pandas/core/base.py
index e5e0ac4e121ae..23121b7075fe1 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -531,12 +531,19 @@ def to_numpy(
                 f"to_numpy() got an unexpected keyword argument '{bad_keys}'"
             )
 
-        result = np.asarray(self._values, dtype=dtype)
-        # TODO(GH-24345): Avoid potential double copy
-        if copy or na_value is not lib.no_default:
-            result = result.copy()
-            if na_value is not lib.no_default:
-                result[np.asanyarray(self.isna())] = na_value
+        if na_value is not lib.no_default:
+            values = self._values.copy()
+            values[np.asanyarray(self.isna())] = na_value
+        else:
+            values = self._values
+
+        result = np.asarray(values, dtype=dtype)
+
+        if copy and na_value is lib.no_default:
+            if np.shares_memory(self._values[:2], result[:2]):
+                # Take slices to improve performance of check
+                result = result.copy()
+
         return result
 
     @final
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 4735731e8d6d9..aae815bb68e05 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1200,6 +1200,8 @@ def is_numeric_dtype(arr_or_dtype) -> bool:
     """
     return _is_dtype_type(
         arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_)
+    ) or _is_dtype(
+        arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ._is_numeric
     )
 
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 955f65585963d..8cd1a2543e23a 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -95,7 +95,6 @@
 )
 from pandas.core.indexes.category import CategoricalIndex
 from pandas.core.series import Series
-from pandas.core.shared_docs import _shared_docs
 from pandas.core.util.numba_ import maybe_use_numba
 
 from pandas.plotting import boxplot_frame_groupby
@@ -1848,17 +1847,82 @@ def nunique(self, dropna: bool = True) -> DataFrame:
 
         return results
 
-    @doc(
-        _shared_docs["idxmax"],
-        numeric_only_default="False",
-    )
     def idxmax(
         self,
-        axis: Axis = 0,
+        axis: Axis | None = None,
         skipna: bool = True,
         numeric_only: bool = False,
     ) -> DataFrame:
-        axis = DataFrame._get_axis_number(axis)
+        """
+        Return index of first occurrence of maximum over requested axis.
+
+        NA/null values are excluded.
+
+        Parameters
+        ----------
+        axis : {{0 or 'index', 1 or 'columns'}}, default None
+            The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
+            If axis is not provided, grouper's axis is used.
+
+            .. versionchanged:: 2.0.0
+
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result
+            will be NA.
+        numeric_only : bool, default False
+            Include only `float`, `int` or `boolean` data.
+
+            .. versionadded:: 1.5.0
+
+        Returns
+        -------
+        Series
+            Indexes of maxima along the specified axis.
+
+        Raises
+        ------
+        ValueError
+            * If the row/column is empty
+
+        See Also
+        --------
+        Series.idxmax : Return index of the maximum element.
+
+        Notes
+        -----
+        This method is the DataFrame version of ``ndarray.argmax``.
+
+        Examples
+        --------
+        Consider a dataset containing food consumption in Argentina.
+
+        >>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48],
+        ...                    'co2_emissions': [37.2, 19.66, 1712]},
+        ...                    index=['Pork', 'Wheat Products', 'Beef'])
+
+        >>> df
+                        consumption  co2_emissions
+        Pork                  10.51         37.20
+        Wheat Products       103.11         19.66
+        Beef                  55.48       1712.00
+
+        By default, it returns the index for the maximum value in each column.
+
+        >>> df.idxmax()
+        consumption     Wheat Products
+        co2_emissions             Beef
+        dtype: object
+
+        To return the index for the maximum value in each row, use ``axis="columns"``.
+
+        >>> df.idxmax(axis="columns")
+        Pork              co2_emissions
+        Wheat Products     consumption
+        Beef              co2_emissions
+        dtype: object
+        """
+        if axis is None:
+            axis = self.axis
 
         def func(df):
             res = df._reduce(
@@ -1879,17 +1943,82 @@ def func(df):
         )
         return result
 
-    @doc(
-        _shared_docs["idxmin"],
-        numeric_only_default="False",
-    )
     def idxmin(
         self,
-        axis: Axis = 0,
+        axis: Axis | None = None,
         skipna: bool = True,
         numeric_only: bool = False,
     ) -> DataFrame:
-        axis = DataFrame._get_axis_number(axis)
+        """
+        Return index of first occurrence of minimum over requested axis.
+
+        NA/null values are excluded.
+
+        Parameters
+        ----------
+        axis : {{0 or 'index', 1 or 'columns'}}, default None
+            The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
+            If axis is not provided, grouper's axis is used.
+
+            .. versionchanged:: 2.0.0
+
+        skipna : bool, default True
+            Exclude NA/null values. If an entire row/column is NA, the result
+            will be NA.
+        numeric_only : bool, default False
+            Include only `float`, `int` or `boolean` data.
+
+            .. versionadded:: 1.5.0
+
+        Returns
+        -------
+        Series
+            Indexes of minima along the specified axis.
+
+        Raises
+        ------
+        ValueError
+            * If the row/column is empty
+
+        See Also
+        --------
+        Series.idxmin : Return index of the minimum element.
+
+        Notes
+        -----
+        This method is the DataFrame version of ``ndarray.argmin``.
+
+        Examples
+        --------
+        Consider a dataset containing food consumption in Argentina.
+
+        >>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48],
+        ...                    'co2_emissions': [37.2, 19.66, 1712]},
+        ...                    index=['Pork', 'Wheat Products', 'Beef'])
+
+        >>> df
+                        consumption  co2_emissions
+        Pork                  10.51         37.20
+        Wheat Products       103.11         19.66
+        Beef                  55.48       1712.00
+
+        By default, it returns the index for the minimum value in each column.
+
+        >>> df.idxmin()
+        consumption                Pork
+        co2_emissions    Wheat Products
+        dtype: object
+
+        To return the index for the minimum value in each row, use ``axis="columns"``.
+
+        >>> df.idxmin(axis="columns")
+        Pork                consumption
+        Wheat Products    co2_emissions
+        Beef                consumption
+        dtype: object
+        """
+        if axis is None:
+            axis = self.axis
 
         def func(df):
             res = df._reduce(
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index b823a7a51943e..d1d361c4e8bee 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1806,8 +1806,6 @@ def result_to_bool(
             libgroupby.group_any_all,
             numeric_only=False,
             cython_dtype=np.dtype(np.int8),
-            needs_mask=True,
-            needs_nullable=True,
             pre_processing=objs_to_bool,
             post_processing=result_to_bool,
             val_test=val_test,
@@ -2084,13 +2082,24 @@ def std(
                     f"{type(self).__name__}.std called with "
                     f"numeric_only={numeric_only} and dtype {self.obj.dtype}"
                 )
+
+            def _postprocessing(
+                vals, inference, nullable: bool = False, mask=None
+            ) -> ArrayLike:
+                if nullable:
+                    if mask.ndim == 2:
+                        mask = mask[:, 0]
+                    return FloatingArray(np.sqrt(vals), mask.view(np.bool_))
+                return np.sqrt(vals)
+
             result = self._get_cythonized_result(
                 libgroupby.group_var,
                 cython_dtype=np.dtype(np.float64),
                 numeric_only=numeric_only,
                 needs_counts=True,
-                post_processing=lambda vals, inference: np.sqrt(vals),
+                post_processing=_postprocessing,
                 ddof=ddof,
+                how="std",
             )
             return result
 
@@ -3174,6 +3183,9 @@ def ngroup(self, ascending: bool = True):
         would be seen when iterating over the groupby object, not the
         order they are first observed.
 
+        Groups with missing keys (where `pd.isna()` is True) will be labeled with `NaN`
+        and will be skipped from the count.
+
         Parameters
         ----------
         ascending : bool, default True
@@ -3190,38 +3202,38 @@ def ngroup(self, ascending: bool = True):
 
         Examples
         --------
-        >>> df = pd.DataFrame({"A": list("aaabba")})
+        >>> df = pd.DataFrame({"color": ["red", None, "red", "blue", "blue", "red"]})
         >>> df
-           A
-        0  a
-        1  a
-        2  a
-        3  b
-        4  b
-        5  a
-        >>> df.groupby('A').ngroup()
-        0    0
-        1    0
-        2    0
-        3    1
-        4    1
-        5    0
-        dtype: int64
-        >>> df.groupby('A').ngroup(ascending=False)
+           color
+        0    red
+        1   None
+        2    red
+        3   blue
+        4   blue
+        5    red
+        >>> df.groupby("color").ngroup()
+        0    1.0
+        1    NaN
+        2    1.0
+        3    0.0
+        4    0.0
+        5    1.0
+        dtype: float64
+        >>> df.groupby("color", dropna=False).ngroup()
         0    1
-        1    1
+        1    2
         2    1
         3    0
         4    0
         5    1
         dtype: int64
-        >>> df.groupby(["A", [1,1,2,3,2,1]]).ngroup()
-        0    0
+        >>> df.groupby("color", dropna=False).ngroup(ascending=False)
+        0    1
         1    0
         2    1
-        3    3
+        3    2
         4    2
-        5    0
+        5    1
         dtype: int64
         """
         with self._group_selection_context():
@@ -3495,10 +3507,9 @@ def _get_cythonized_result(
         cython_dtype: np.dtype,
         numeric_only: bool = False,
         needs_counts: bool = False,
-        needs_nullable: bool = False,
-        needs_mask: bool = False,
         pre_processing=None,
         post_processing=None,
+        how: str = "any_all",
         **kwargs,
     ):
         """
@@ -3513,12 +3524,6 @@ def _get_cythonized_result(
             Whether only numeric datatypes should be computed
         needs_counts : bool, default False
             Whether the counts should be a part of the Cython call
-        needs_mask : bool, default False
-            Whether boolean mask needs to be part of the Cython call
-            signature
-        needs_nullable : bool, default False
-            Whether a bool specifying if the input is nullable is part
-            of the Cython call signature
         pre_processing : function, default None
             Function to be applied to `values` prior to passing to Cython.
             Function should return a tuple where the first element is the
@@ -3533,6 +3538,8 @@ def _get_cythonized_result(
             second argument, i.e. the signature should be
             (ndarray, Type). If `needs_nullable=True`, a third argument should be
             `nullable`, to allow for processing specific to nullable values.
+        how : str, default any_all
+            Determines if any/all cython interface or std interface is used.
         **kwargs : dict
             Extra arguments to be passed back to Cython funcs
 
@@ -3576,16 +3583,20 @@ def blk_func(values: ArrayLike) -> ArrayLike:
                 vals = vals.reshape((-1, 1))
             func = partial(func, values=vals)
 
-            if needs_mask:
+            if how != "std" or isinstance(values, BaseMaskedArray):
                 mask = isna(values).view(np.uint8)
                 if mask.ndim == 1:
                     mask = mask.reshape(-1, 1)
                 func = partial(func, mask=mask)
 
-            if needs_nullable:
+            if how != "std":
                 is_nullable = isinstance(values, BaseMaskedArray)
                 func = partial(func, nullable=is_nullable)
 
+            else:
+                result_mask = np.zeros(result.shape, dtype=np.bool_)
+                func = partial(func, result_mask=result_mask)
+
             func(**kwargs)  # Call func to modify indexer values in place
 
             if values.ndim == 1:
@@ -3593,9 +3604,10 @@ def blk_func(values: ArrayLike) -> ArrayLike:
                 result = result[:, 0]
 
             if post_processing:
-                pp_kwargs = {}
-                if needs_nullable:
-                    pp_kwargs["nullable"] = isinstance(values, BaseMaskedArray)
+                pp_kwargs: dict[str, bool | np.ndarray] = {}
+                pp_kwargs["nullable"] = isinstance(values, BaseMaskedArray)
+                if how == "std":
+                    pp_kwargs["mask"] = result_mask
 
                 result = post_processing(result, inferences, **pp_kwargs)
 
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index e323877a512b0..66ad1b3ea7196 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -425,14 +425,22 @@ class Grouping:
         If we are a Categorical, use the observed values
     in_axis : if the Grouping is a column in self.obj and hence among
         Groupby.exclusions list
+    dropna : bool, default True
+        Whether to drop NA groups.
+    uniques : Array-like, optional
+        When specified, will be used for unique values. Enables including empty groups
+        in the result for a BinGrouper. Must not contain duplicates.
 
-    Returns
+    Attributes
     -------
-    **Attributes**:
-      * indices : dict of {group -> index_list}
-      * codes : ndarray, group codes
-      * group_index : unique groups
-      * groups : dict of {group -> label_list}
+    indices : dict
+        Mapping of {group -> index_list}
+    codes : ndarray
+        Group codes
+    group_index : Index or None
+        unique groups
+    groups : dict
+        Mapping of {group -> label_list}
     """
 
     _codes: npt.NDArray[np.signedinteger] | None = None
@@ -452,6 +460,7 @@ def __init__(
         observed: bool = False,
         in_axis: bool = False,
         dropna: bool = True,
+        uniques: ArrayLike | None = None,
     ) -> None:
         self.level = level
         self._orig_grouper = grouper
@@ -464,6 +473,7 @@ def __init__(
         self._observed = observed
         self.in_axis = in_axis
         self._dropna = dropna
+        self._uniques = uniques
 
         self._passed_categorical = False
 
@@ -653,6 +663,7 @@ def group_index(self) -> Index:
 
     @cache_readonly
     def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:
+        uniques: ArrayLike
         if self._passed_categorical:
             # we make a CategoricalIndex out of the cat grouper
             # preserving the categories / ordered attributes;
@@ -697,11 +708,13 @@ def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:
         elif isinstance(self.grouping_vector, ops.BaseGrouper):
             # we have a list of groupers
             codes = self.grouping_vector.codes_info
-            # error: Incompatible types in assignment (expression has type "Union
-            # [ExtensionArray, ndarray[Any, Any]]", variable has type "Categorical")
-            uniques = (
-                self.grouping_vector.result_index._values  # type: ignore[assignment]
-            )
+            uniques = self.grouping_vector.result_index._values
+        elif self._uniques is not None:
+            # GH#50486 Code grouping_vector using _uniques; allows
+            # including uniques that are not present in grouping_vector.
+            cat = Categorical(self.grouping_vector, categories=self._uniques)
+            codes = cat.codes
+            uniques = self._uniques
         else:
             # GH35667, replace dropna=False with use_na_sentinel=False
             # error: Incompatible types in assignment (expression has type "Union[
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index c20fe34a178f5..ea902800cf7e0 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -1214,7 +1214,11 @@ def names(self) -> list[Hashable]:
     @property
     def groupings(self) -> list[grouper.Grouping]:
         lev = self.binlabels
-        ping = grouper.Grouping(lev, lev, in_axis=False, level=None)
+        codes = self.group_info[0]
+        labels = lev.take(codes)
+        ping = grouper.Grouping(
+            labels, labels, in_axis=False, level=None, uniques=lev._values
+        )
         return [ping]
 
     def _aggregate_series_fast(self, obj: Series, func: Callable) -> NoReturn:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 3dc6aed56fa24..775d137523d2b 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -601,7 +601,9 @@ def _dtype_to_subclass(cls, dtype: DtypeObj):
     # See each method's docstring.
 
     @classmethod
-    def _simple_new(cls: type[_IndexT], values, name: Hashable = None) -> _IndexT:
+    def _simple_new(
+        cls: type[_IndexT], values: ArrayLike, name: Hashable = None
+    ) -> _IndexT:
         """
         We require that we have a dtype compat for the values. If we are passed
         a non-dtype compat, then coerce using the constructor.
@@ -1961,7 +1963,7 @@ def droplevel(self, level: IndexLabel = 0):
         Return index with requested level(s) removed.
 
         If resulting index has only 1 level left, the result will be
-        of Index type, not MultiIndex.
+        of Index type, not MultiIndex. The original index is not modified inplace.
 
         Parameters
         ----------
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 1119b6e3b83ad..fde000f84e581 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -82,7 +82,7 @@
     DatetimeLikeArrayMixin,
     cache=True,
 )
-@inherit_names(["mean", "freq", "freqstr"], DatetimeLikeArrayMixin)
+@inherit_names(["mean", "freqstr"], DatetimeLikeArrayMixin)
 class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex):
     """
     Common ops mixin to support a unified interface datetimelike Index.
@@ -90,10 +90,18 @@ class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex):
 
     _can_hold_strings = False
     _data: DatetimeArray | TimedeltaArray | PeriodArray
-    freq: BaseOffset | None
     freqstr: str | None
     _resolution_obj: Resolution
 
+    @property
+    def freq(self) -> BaseOffset | None:
+        return self._data.freq
+
+    @freq.setter
+    def freq(self, value) -> None:
+        # error: Property "freq" defined in "PeriodArray" is read-only  [misc]
+        self._data.freq = value  # type: ignore[misc]
+
     @property
     def asi8(self) -> npt.NDArray[np.int64]:
         return self._data.asi8
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 48cf6000d100d..3d149eccc746b 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -3503,12 +3503,8 @@ def equal_levels(self, other: MultiIndex) -> bool:
 
     def _union(self, other, sort) -> MultiIndex:
         other, result_names = self._convert_can_do_setop(other)
-        if (
-            any(-1 in code for code in self.codes)
-            and any(-1 in code for code in other.codes)
-            or other.has_duplicates
-        ):
-            # This is only necessary if both sides have nans or other has dups,
+        if other.has_duplicates:
+            # This is only necessary if other has dupes,
             # otherwise difference is faster
             result = super()._union(other, sort)
 
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index 1937cd4254790..a9b35b99e4b51 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -167,8 +167,13 @@ def from_range(
         cls._validate_dtype(dtype)
         return cls._simple_new(data, name=name)
 
+    #  error: Argument 1 of "_simple_new" is incompatible with supertype "Index";
+    #  supertype defines the argument type as
+    #  "Union[ExtensionArray, ndarray[Any, Any]]"  [override]
     @classmethod
-    def _simple_new(cls, values: range, name: Hashable = None) -> RangeIndex:
+    def _simple_new(  # type: ignore[override]
+        cls, values: range, name: Hashable = None
+    ) -> RangeIndex:
         result = object.__new__(cls)
 
         assert isinstance(values, range)
diff --git a/pandas/core/interchange/dataframe.py b/pandas/core/interchange/dataframe.py
index 9139cb41e3af7..0de9b130f0aab 100644
--- a/pandas/core/interchange/dataframe.py
+++ b/pandas/core/interchange/dataframe.py
@@ -7,8 +7,10 @@
 from pandas.core.interchange.dataframe_protocol import DataFrame as DataFrameXchg
 
 if TYPE_CHECKING:
-    import pandas as pd
-    from pandas import Index
+    from pandas import (
+        DataFrame,
+        Index,
+    )
 
 
 class PandasDataFrameXchg(DataFrameXchg):
@@ -21,7 +23,7 @@ class PandasDataFrameXchg(DataFrameXchg):
     """
 
     def __init__(
-        self, df: pd.DataFrame, nan_as_null: bool = False, allow_copy: bool = True
+        self, df: DataFrame, nan_as_null: bool = False, allow_copy: bool = True
     ) -> None:
         """
         Constructor - an instance of this (private) class is returned from
diff --git a/pandas/core/describe.py b/pandas/core/methods/describe.py
similarity index 100%
rename from pandas/core/describe.py
rename to pandas/core/methods/describe.py
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 4af4be20a056e..d6cba824767b5 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -989,6 +989,12 @@ def var(
     @doc(GroupBy.size)
     def size(self):
         result = self._downsample("size")
+
+        # If the result is a non-empty DataFrame we stack to get a Series
+        # GH 46826
+        if isinstance(result, ABCDataFrame) and not result.empty:
+            result = result.stack()
+
         if not len(self.ax):
             from pandas import Series
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 873ebd16ac80b..b5d73373f061e 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -4070,7 +4070,9 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series:
         dtype: object"""
         ),
     )
-    def swaplevel(self, i: Level = -2, j: Level = -1, copy: bool = True) -> Series:
+    def swaplevel(
+        self, i: Level = -2, j: Level = -1, copy: bool | None = None
+    ) -> Series:
         """
         Swap levels i and j in a :class:`MultiIndex`.
 
@@ -4090,10 +4092,9 @@ def swaplevel(self, i: Level = -2, j: Level = -1, copy: bool = True) -> Series:
         {examples}
         """
         assert isinstance(self.index, MultiIndex)
-        new_index = self.index.swaplevel(i, j)
-        return self._constructor(self._values, index=new_index, copy=copy).__finalize__(
-            self, method="swaplevel"
-        )
+        result = self.copy(deep=copy)
+        result.index = self.index.swaplevel(i, j)
+        return result
 
     def reorder_levels(self, order: Sequence[Level]) -> Series:
         """
@@ -4113,7 +4114,7 @@ def reorder_levels(self, order: Sequence[Level]) -> Series:
         if not isinstance(self.index, MultiIndex):  # pragma: no cover
             raise Exception("Can only reorder levels on a hierarchical axis.")
 
-        result = self.copy()
+        result = self.copy(deep=None)
         assert isinstance(result.index, MultiIndex)
         result.index = result.index.reorder_levels(order)
         return result
diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
index 147fa622fdedc..486fab62d93e7 100644
--- a/pandas/core/shared_docs.py
+++ b/pandas/core/shared_docs.py
@@ -798,8 +798,8 @@
     Consider a dataset containing food consumption in Argentina.
 
     >>> df = pd.DataFrame({{'consumption': [10.51, 103.11, 55.48],
-    ...                    'co2_emissions': [37.2, 19.66, 1712]}},
-    ...                    index=['Pork', 'Wheat Products', 'Beef'])
+    ...                     'co2_emissions': [37.2, 19.66, 1712]}},
+    ...                   index=['Pork', 'Wheat Products', 'Beef'])
 
     >>> df
                     consumption  co2_emissions
@@ -865,8 +865,8 @@
     Consider a dataset containing food consumption in Argentina.
 
     >>> df = pd.DataFrame({{'consumption': [10.51, 103.11, 55.48],
-    ...                    'co2_emissions': [37.2, 19.66, 1712]}},
-    ...                    index=['Pork', 'Wheat Products', 'Beef'])
+    ...                     'co2_emissions': [37.2, 19.66, 1712]}},
+    ...                   index=['Pork', 'Wheat Products', 'Beef'])
 
     >>> df
                     consumption  co2_emissions
diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py
index b5618207ab9d8..c96e5a1abcf86 100644
--- a/pandas/core/strings/base.py
+++ b/pandas/core/strings/base.py
@@ -1,10 +1,10 @@
 from __future__ import annotations
 
 import abc
-from collections.abc import Callable  # noqa: PDF001
 import re
 from typing import (
     TYPE_CHECKING,
+    Callable,
     Literal,
 )
 
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index 2d77cd0da816f..508ac122d67af 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -1,12 +1,12 @@
 from __future__ import annotations
 
-from collections.abc import Callable  # noqa: PDF001
 import functools
 import re
 import sys
 import textwrap
 from typing import (
     TYPE_CHECKING,
+    Callable,
     Literal,
 )
 import unicodedata
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index e7be2db293527..27328809e23d8 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -139,7 +139,7 @@ def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False) -> str
                 return guessed_format
             warnings.warn(
                 "Could not infer format, so each element will be parsed "
-                "individually by `dateutil`. To ensure parsing is "
+                "individually, falling back to `dateutil`. To ensure parsing is "
                 "consistent and as-expected, please specify a format.",
                 UserWarning,
                 stacklevel=find_stack_level(),
diff --git a/pandas/core/window/doc.py b/pandas/core/window/doc.py
index b1ff53e9d1a44..6e188531a0502 100644
--- a/pandas/core/window/doc.py
+++ b/pandas/core/window/doc.py
@@ -24,10 +24,10 @@ def create_section_header(header: str) -> str:
 
 template_see_also = dedent(
     """
-    pandas.Series.{window_method} : Calling {window_method} with Series data.
-    pandas.DataFrame.{window_method} : Calling {window_method} with DataFrames.
-    pandas.Series.{agg_method} : Aggregating {agg_method} for Series.
-    pandas.DataFrame.{agg_method} : Aggregating {agg_method} for DataFrame.\n
+    Series.{window_method} : Calling {window_method} with Series data.
+    DataFrame.{window_method} : Calling {window_method} with DataFrames.
+    Series.{agg_method} : Aggregating {agg_method} for Series.
+    DataFrame.{agg_method} : Aggregating {agg_method} for DataFrame.\n
     """
 ).replace("\n", "", 1)
 
diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py
index a6c32133803d4..c0a7b2b7cc361 100644
--- a/pandas/core/window/ewm.py
+++ b/pandas/core/window/ewm.py
@@ -26,7 +26,7 @@
 )
 from pandas.core.dtypes.missing import isna
 
-from pandas.core import common  # noqa: PDF018
+from pandas.core import common
 from pandas.core.indexers.objects import (
     BaseIndexer,
     ExponentialMovingWindowIndexer,
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 989b82f45339f..ef0524e48f9e2 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -18,7 +18,6 @@
     Sized,
     cast,
 )
-import warnings
 
 import numpy as np
 
@@ -37,7 +36,6 @@
 from pandas.compat._optional import import_optional_dependency
 from pandas.errors import DataError
 from pandas.util._decorators import doc
-from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import (
     ensure_float64,
@@ -473,10 +471,6 @@ def _apply_blockwise(
             obj = notna(obj).astype(int)
             obj._mgr = obj._mgr.consolidate()
 
-        def hfunc(values: ArrayLike) -> ArrayLike:
-            values = self._prep_values(values)
-            return homogeneous_func(values)
-
         if self.axis == 1:
             obj = obj.T
 
@@ -484,13 +478,16 @@ def hfunc(values: ArrayLike) -> ArrayLike:
         res_values = []
         for i, arr in enumerate(obj._iter_column_arrays()):
             # GH#42736 operate column-wise instead of block-wise
+            # As of 2.0, hfunc will raise for nuisance columns
             try:
-                res = hfunc(arr)
-            except (TypeError, NotImplementedError):
-                pass
-            else:
-                res_values.append(res)
-                taker.append(i)
+                arr = self._prep_values(arr)
+            except (TypeError, NotImplementedError) as err:
+                raise DataError(
+                    f"Cannot aggregate non-numeric type: {arr.dtype}"
+                ) from err
+            res = homogeneous_func(arr)
+            res_values.append(res)
+            taker.append(i)
 
         index = self._slice_axis_for_step(
             obj.index, res_values[0] if len(res_values) > 0 else None
@@ -505,18 +502,6 @@ def hfunc(values: ArrayLike) -> ArrayLike:
         if self.axis == 1:
             df = df.T
 
-        if 0 != len(res_values) != len(obj.columns):
-            # GH#42738 ignore_failures dropped nuisance columns
-            dropped = obj.columns.difference(obj.columns.take(taker))
-            warnings.warn(
-                "Dropping of nuisance columns in rolling operations "
-                "is deprecated; in a future version this will raise TypeError. "
-                "Select only valid columns before calling the operation. "
-                f"Dropped columns were {dropped}",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
-
         return self._resolve_output(df, obj)
 
     def _apply_tablewise(
diff --git a/pandas/io/_util.py b/pandas/io/_util.py
new file mode 100644
index 0000000000000..d2a001f0cf925
--- /dev/null
+++ b/pandas/io/_util.py
@@ -0,0 +1,23 @@
+from __future__ import annotations
+
+from pandas.compat._optional import import_optional_dependency
+
+import pandas as pd
+
+
+def _arrow_dtype_mapping() -> dict:
+    pa = import_optional_dependency("pyarrow")
+    return {
+        pa.int8(): pd.Int8Dtype(),
+        pa.int16(): pd.Int16Dtype(),
+        pa.int32(): pd.Int32Dtype(),
+        pa.int64(): pd.Int64Dtype(),
+        pa.uint8(): pd.UInt8Dtype(),
+        pa.uint16(): pd.UInt16Dtype(),
+        pa.uint32(): pd.UInt32Dtype(),
+        pa.uint64(): pd.UInt64Dtype(),
+        pa.bool_(): pd.BooleanDtype(),
+        pa.string(): pd.StringDtype(),
+        pa.float32(): pd.Float32Dtype(),
+        pa.float64(): pd.Float64Dtype(),
+    }
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 4dae46c8f39f6..6deaf40f00c69 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -55,8 +55,8 @@
     WriteBuffer,
 )
 from pandas.compat import get_lzma_file
-from pandas.compat._compressors import BZ2File as _BZ2File
 from pandas.compat._optional import import_optional_dependency
+from pandas.compat.compressors import BZ2File as _BZ2File
 from pandas.util._decorators import doc
 from pandas.util._exceptions import find_stack_level
 
@@ -478,7 +478,7 @@ def file_path_to_url(path: str) -> str:
     return urljoin("file:", pathname2url(path))
 
 
-_extension_to_compression = {
+extension_to_compression = {
     ".tar": "tar",
     ".tar.gz": "tar",
     ".tar.bz2": "tar",
@@ -489,7 +489,7 @@ def file_path_to_url(path: str) -> str:
     ".xz": "xz",
     ".zst": "zstd",
 }
-_supported_compressions = set(_extension_to_compression.values())
+_supported_compressions = set(extension_to_compression.values())
 
 
 def get_compression_method(
@@ -565,7 +565,7 @@ def infer_compression(
             return None
 
         # Infer compression from the filename/URL extension
-        for extension, compression in _extension_to_compression.items():
+        for extension, compression in extension_to_compression.items():
             if filepath_or_buffer.lower().endswith(extension):
                 return compression
         return None
diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py
index a12e62223c1f1..6cc343703d00c 100644
--- a/pandas/io/formats/printing.py
+++ b/pandas/io/formats/printing.py
@@ -259,10 +259,14 @@ def enable_data_resource_formatter(enable: bool) -> None:
         if mimetype not in formatters:
             # define tableschema formatter
             from IPython.core.formatters import BaseFormatter
+            from traitlets import ObjectName
 
             class TableSchemaFormatter(BaseFormatter):
-                print_method = "_repr_data_resource_"
-                _return_type = (dict,)
+                print_method = ObjectName("_repr_data_resource_")
+                # Incompatible types in assignment (expression has type
+                # "Tuple[Type[Dict[Any, Any]]]", base class "BaseFormatter"
+                # defined the type as "Type[str]")
+                _return_type = (dict,)  # type: ignore[assignment]
 
             # register it:
             formatters[mimetype] = TableSchemaFormatter()
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index f2780d5fa6832..88974f3ab4afa 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -57,7 +57,7 @@
 
 from pandas.io.common import (
     IOHandles,
-    _extension_to_compression,
+    extension_to_compression,
     file_exists,
     get_handle,
     is_fsspec_url,
@@ -854,7 +854,7 @@ def _get_data_from_filepath(self, filepath_or_buffer):
         elif (
             isinstance(filepath_or_buffer, str)
             and filepath_or_buffer.lower().endswith(
-                (".json",) + tuple(f".json{c}" for c in _extension_to_compression)
+                (".json",) + tuple(f".json{c}" for c in extension_to_compression)
             )
             and not file_exists(filepath_or_buffer)
         ):
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index cfa02de9bbcb3..169cb5d16da8d 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -91,18 +91,20 @@ def read_orc(
         pa_table = orc_file.read(columns=columns, **kwargs)
     if use_nullable_dtypes:
         dtype_backend = get_option("mode.dtype_backend")
-        if dtype_backend != "pyarrow":
-            raise NotImplementedError(
-                f"mode.dtype_backend set to {dtype_backend} is not implemented."
+        if dtype_backend == "pyarrow":
+            df = DataFrame(
+                {
+                    col_name: ArrowExtensionArray(pa_col)
+                    for col_name, pa_col in zip(
+                        pa_table.column_names, pa_table.itercolumns()
+                    )
+                }
             )
-        df = DataFrame(
-            {
-                col_name: ArrowExtensionArray(pa_col)
-                for col_name, pa_col in zip(
-                    pa_table.column_names, pa_table.itercolumns()
-                )
-            }
-        )
+        else:
+            from pandas.io._util import _arrow_dtype_mapping
+
+            mapping = _arrow_dtype_mapping()
+            df = pa_table.to_pandas(types_mapper=mapping.get)
         return df
     else:
         return pa_table.to_pandas()
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 568747685a36e..67e00dde5498b 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -225,24 +225,13 @@ def read(
         dtype_backend = get_option("mode.dtype_backend")
         to_pandas_kwargs = {}
         if use_nullable_dtypes:
-            import pandas as pd
 
             if dtype_backend == "pandas":
-                mapping = {
-                    self.api.int8(): pd.Int8Dtype(),
-                    self.api.int16(): pd.Int16Dtype(),
-                    self.api.int32(): pd.Int32Dtype(),
-                    self.api.int64(): pd.Int64Dtype(),
-                    self.api.uint8(): pd.UInt8Dtype(),
-                    self.api.uint16(): pd.UInt16Dtype(),
-                    self.api.uint32(): pd.UInt32Dtype(),
-                    self.api.uint64(): pd.UInt64Dtype(),
-                    self.api.bool_(): pd.BooleanDtype(),
-                    self.api.string(): pd.StringDtype(),
-                    self.api.float32(): pd.Float32Dtype(),
-                    self.api.float64(): pd.Float64Dtype(),
-                }
+                from pandas.io._util import _arrow_dtype_mapping
+
+                mapping = _arrow_dtype_mapping()
                 to_pandas_kwargs["types_mapper"] = mapping.get
+
         manager = get_option("mode.data_manager")
         if manager == "array":
             to_pandas_kwargs["split_blocks"] = True  # type: ignore[assignment]
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 4f61455826286..6ffa3356cc9de 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -46,10 +46,7 @@
 if TYPE_CHECKING:
     from xml.etree.ElementTree import Element
 
-    from lxml.etree import (
-        _Element,
-        _XSLTResultTree,
-    )
+    from lxml import etree
 
     from pandas import DataFrame
 
@@ -417,7 +414,7 @@ def _validate_names(self) -> None:
 
     def _parse_doc(
         self, raw_doc: FilePath | ReadBuffer[bytes] | ReadBuffer[str]
-    ) -> Element | _Element:
+    ) -> Element | etree._Element:
         """
         Build tree from path_or_buffer.
 
@@ -625,7 +622,7 @@ def _validate_names(self) -> None:
 
     def _parse_doc(
         self, raw_doc: FilePath | ReadBuffer[bytes] | ReadBuffer[str]
-    ) -> _Element:
+    ) -> etree._Element:
         from lxml.etree import (
             XMLParser,
             fromstring,
@@ -656,7 +653,7 @@ def _parse_doc(
 
         return document
 
-    def _transform_doc(self) -> _XSLTResultTree:
+    def _transform_doc(self) -> etree._XSLTResultTree:
         """
         Transform original tree using stylesheet.
 
@@ -774,6 +771,7 @@ def _parse(
     iterparse: dict[str, list[str]] | None,
     compression: CompressionOptions,
     storage_options: StorageOptions,
+    use_nullable_dtypes: bool = False,
     **kwargs,
 ) -> DataFrame:
     """
@@ -843,6 +841,7 @@ def _parse(
         dtype=dtype,
         converters=converters,
         parse_dates=parse_dates,
+        use_nullable_dtypes=use_nullable_dtypes,
         **kwargs,
     )
 
@@ -869,6 +868,7 @@ def read_xml(
     iterparse: dict[str, list[str]] | None = None,
     compression: CompressionOptions = "infer",
     storage_options: StorageOptions = None,
+    use_nullable_dtypes: bool = False,
 ) -> DataFrame:
     r"""
     Read XML document into a ``DataFrame`` object.
@@ -980,6 +980,19 @@ def read_xml(
 
     {storage_options}
 
+    use_nullable_dtypes : bool = False
+        Whether or not to use nullable dtypes as default when reading data. If
+        set to True, nullable dtypes are used for all dtypes that have a nullable
+        implementation, even if no nulls are present.
+
+        The nullable dtype implementation can be configured by calling
+        ``pd.set_option("mode.dtype_backend", "pandas")`` to use
+        numpy-backed nullable dtypes or
+        ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
+        pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
+
+        .. versionadded:: 2.0
+
     Returns
     -------
     df
@@ -1113,4 +1126,5 @@ def read_xml(
         iterparse=iterparse,
         compression=compression,
         storage_options=storage_options,
+        use_nullable_dtypes=use_nullable_dtypes,
     )
diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py
index 1add485e03760..956390f739481 100644
--- a/pandas/plotting/_matplotlib/hist.py
+++ b/pandas/plotting/_matplotlib/hist.py
@@ -146,14 +146,23 @@ def _make_plot(self) -> None:
                 kwds["label"] = self.columns
                 kwds.pop("color")
 
-            y = reformat_hist_y_given_by(y, self.by)
-
             # We allow weights to be a multi-dimensional array, e.g. a (10, 2) array,
             # and each sub-array (10,) will be called in each iteration. If users only
             # provide 1D array, we assume the same weights is used for all iterations
             weights = kwds.get("weights", None)
-            if weights is not None and np.ndim(weights) != 1:
-                kwds["weights"] = weights[:, i]
+            if weights is not None:
+                if np.ndim(weights) != 1 and np.shape(weights)[-1] != 1:
+                    try:
+                        weights = weights[:, i]
+                    except IndexError as err:
+                        raise ValueError(
+                            "weights must have the same shape as data, "
+                            "or be a single column"
+                        ) from err
+                weights = weights[~isna(y)]
+                kwds["weights"] = weights
+
+            y = reformat_hist_y_given_by(y, self.by)
 
             artists = self._plot(ax, y, column_num=i, stacking_id=stacking_id, **kwds)
 
diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
index 995b1668046d2..e448e1bce9146 100644
--- a/pandas/tests/api/test_api.py
+++ b/pandas/tests/api/test_api.py
@@ -250,7 +250,7 @@ class TestTesting(Base):
     ]
 
     def test_testing(self):
-        from pandas import testing  # noqa: PDF015
+        from pandas import testing
 
         self.check(testing, self.funcs)
 
diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py
index 4fb63d3c4b97b..538110396e063 100644
--- a/pandas/tests/arithmetic/test_timedelta64.py
+++ b/pandas/tests/arithmetic/test_timedelta64.py
@@ -1756,12 +1756,18 @@ def test_td64arr_floordiv_td64arr_with_nat(
             # columns without missing values
             expected[[0, 1]] = expected[[0, 1]].astype("int64")
 
-        result = left // right
+        with tm.maybe_produces_warning(
+            RuntimeWarning, box is pd.array, check_stacklevel=False
+        ):
+            result = left // right
 
         tm.assert_equal(result, expected)
 
         # case that goes through __rfloordiv__ with arraylike
-        result = np.asarray(left) // right
+        with tm.maybe_produces_warning(
+            RuntimeWarning, box is pd.array, check_stacklevel=False
+        ):
+            result = np.asarray(left) // right
         tm.assert_equal(result, expected)
 
     @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning")
diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py
index 8e9112b531fad..b484dc39cf23b 100644
--- a/pandas/tests/arrays/boolean/test_function.py
+++ b/pandas/tests/arrays/boolean/test_function.py
@@ -67,7 +67,7 @@ def test_ufuncs_unary(ufunc):
 
 
 def test_ufunc_numeric():
-    # np.sqrt on np.bool returns float16, which we upcast to Float32
+    # np.sqrt on np.bool_ returns float16, which we upcast to Float32
     #  bc we do not have Float16
     arr = pd.array([True, False, None], dtype="boolean")
 
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index 878f1d8089d33..7deb5e50464d5 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -3,8 +3,10 @@
 
 from pandas import (
     DataFrame,
+    Index,
     MultiIndex,
     Series,
+    date_range,
 )
 import pandas._testing as tm
 from pandas.tests.copy_view.util import get_array
@@ -113,6 +115,53 @@ def test_rename_columns_modify_parent(using_copy_on_write):
     tm.assert_frame_equal(df2, df2_orig)
 
 
+def test_pipe(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3], "b": 1.5})
+    df_orig = df.copy()
+
+    def testfunc(df):
+        return df
+
+    df2 = df.pipe(testfunc)
+
+    assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    # mutating df2 triggers a copy-on-write for that column
+    df2.iloc[0, 0] = 0
+    if using_copy_on_write:
+        tm.assert_frame_equal(df, df_orig)
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        expected = DataFrame({"a": [0, 2, 3], "b": 1.5})
+        tm.assert_frame_equal(df, expected)
+
+        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
+
+
+def test_pipe_modify_df(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3], "b": 1.5})
+    df_orig = df.copy()
+
+    def testfunc(df):
+        df.iloc[0, 0] = 100
+        return df
+
+    df2 = df.pipe(testfunc)
+
+    assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
+
+    if using_copy_on_write:
+        tm.assert_frame_equal(df, df_orig)
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        expected = DataFrame({"a": [100, 2, 3], "b": 1.5})
+        tm.assert_frame_equal(df, expected)
+
+        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
+
+
 def test_reindex_columns(using_copy_on_write):
     # Case: reindexing the column returns a new dataframe
     # + afterwards modifying the result
@@ -172,6 +221,27 @@ def test_select_dtypes(using_copy_on_write):
     tm.assert_frame_equal(df, df_orig)
 
 
+def test_pop(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df_orig = df.copy()
+    view_original = df[:]
+    result = df.pop("a")
+
+    assert np.shares_memory(result.values, get_array(view_original, "a"))
+    assert np.shares_memory(get_array(df, "b"), get_array(view_original, "b"))
+
+    if using_copy_on_write:
+        result.iloc[0] = 0
+        assert not np.shares_memory(result.values, get_array(view_original, "a"))
+    df.iloc[0, 0] = 0
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df, "b"), get_array(view_original, "b"))
+        tm.assert_frame_equal(view_original, df_orig)
+    else:
+        expected = DataFrame({"a": [1, 2, 3], "b": [0, 5, 6], "c": [0.1, 0.2, 0.3]})
+        tm.assert_frame_equal(view_original, expected)
+
+
 @pytest.mark.parametrize(
     "func",
     [
@@ -369,6 +439,30 @@ def test_head_tail(method, using_copy_on_write):
     tm.assert_frame_equal(df, df_orig)
 
 
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"before": "a", "after": "b", "axis": 1},
+        {"before": 0, "after": 1, "axis": 0},
+    ],
+)
+def test_truncate(using_copy_on_write, kwargs):
+    df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 2})
+    df_orig = df.copy()
+    df2 = df.truncate(**kwargs)
+    df2._mgr._verify_integrity()
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    df2.iloc[0, 0] = 0
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    tm.assert_frame_equal(df, df_orig)
+
+
 @pytest.mark.parametrize("method", ["assign", "drop_duplicates"])
 def test_assign_drop_duplicates(using_copy_on_write, method):
     df = DataFrame({"a": [1, 2, 3]})
@@ -405,6 +499,23 @@ def test_reindex_like(using_copy_on_write):
     tm.assert_frame_equal(df, df_orig)
 
 
+def test_sort_index(using_copy_on_write):
+    # GH 49473
+    ser = Series([1, 2, 3])
+    ser_orig = ser.copy()
+    ser2 = ser.sort_index()
+
+    if using_copy_on_write:
+        assert np.shares_memory(ser.values, ser2.values)
+    else:
+        assert not np.shares_memory(ser.values, ser2.values)
+
+    # mutating ser triggers a copy-on-write for the column / block
+    ser2.iloc[0] = 0
+    assert not np.shares_memory(ser2.values, ser.values)
+    tm.assert_series_equal(ser, ser_orig)
+
+
 def test_reorder_levels(using_copy_on_write):
     index = MultiIndex.from_tuples(
         [(1, 1), (1, 2), (2, 1), (2, 2)], names=["one", "two"]
@@ -424,6 +535,43 @@ def test_reorder_levels(using_copy_on_write):
     tm.assert_frame_equal(df, df_orig)
 
 
+def test_series_reorder_levels(using_copy_on_write):
+    index = MultiIndex.from_tuples(
+        [(1, 1), (1, 2), (2, 1), (2, 2)], names=["one", "two"]
+    )
+    ser = Series([1, 2, 3, 4], index=index)
+    ser_orig = ser.copy()
+    ser2 = ser.reorder_levels(order=["two", "one"])
+
+    if using_copy_on_write:
+        assert np.shares_memory(ser2.values, ser.values)
+    else:
+        assert not np.shares_memory(ser2.values, ser.values)
+
+    ser2.iloc[0] = 0
+    if using_copy_on_write:
+        assert not np.shares_memory(ser2.values, ser.values)
+    tm.assert_series_equal(ser, ser_orig)
+
+
+@pytest.mark.parametrize("obj", [Series([1, 2, 3]), DataFrame({"a": [1, 2, 3]})])
+def test_swaplevel(using_copy_on_write, obj):
+    index = MultiIndex.from_tuples([(1, 1), (1, 2), (2, 1)], names=["one", "two"])
+    obj.index = index
+    obj_orig = obj.copy()
+    obj2 = obj.swaplevel()
+
+    if using_copy_on_write:
+        assert np.shares_memory(obj2.values, obj.values)
+    else:
+        assert not np.shares_memory(obj2.values, obj.values)
+
+    obj2.iloc[0] = 0
+    if using_copy_on_write:
+        assert not np.shares_memory(obj2.values, obj.values)
+    tm.assert_equal(obj, obj_orig)
+
+
 def test_frame_set_axis(using_copy_on_write):
     # GH 49473
     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
@@ -456,3 +604,99 @@ def test_series_set_axis(using_copy_on_write):
     ser2.iloc[0] = 0
     assert not np.shares_memory(ser2, ser)
     tm.assert_series_equal(ser, ser_orig)
+
+
+def test_set_flags(using_copy_on_write):
+    ser = Series([1, 2, 3])
+    ser_orig = ser.copy()
+    ser2 = ser.set_flags(allows_duplicate_labels=False)
+
+    assert np.shares_memory(ser, ser2)
+
+    # mutating ser triggers a copy-on-write for the column / block
+    ser2.iloc[0] = 0
+    if using_copy_on_write:
+        assert not np.shares_memory(ser2, ser)
+        tm.assert_series_equal(ser, ser_orig)
+    else:
+        assert np.shares_memory(ser2, ser)
+        expected = Series([0, 2, 3])
+        tm.assert_series_equal(ser, expected)
+
+
+@pytest.mark.parametrize("copy_kwargs", [{"copy": True}, {}])
+@pytest.mark.parametrize("kwargs", [{"mapper": "test"}, {"index": "test"}])
+def test_rename_axis(using_copy_on_write, kwargs, copy_kwargs):
+    df = DataFrame({"a": [1, 2, 3, 4]}, index=Index([1, 2, 3, 4], name="a"))
+    df_orig = df.copy()
+    df2 = df.rename_axis(**kwargs, **copy_kwargs)
+
+    if using_copy_on_write and not copy_kwargs:
+        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    df2.iloc[0, 0] = 0
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    tm.assert_frame_equal(df, df_orig)
+
+
+@pytest.mark.parametrize(
+    "func, tz", [("tz_convert", "Europe/Berlin"), ("tz_localize", None)]
+)
+def test_tz_convert_localize(using_copy_on_write, func, tz):
+    # GH 49473
+    ser = Series(
+        [1, 2], index=date_range(start="2014-08-01 09:00", freq="H", periods=2, tz=tz)
+    )
+    ser_orig = ser.copy()
+    ser2 = getattr(ser, func)("US/Central")
+
+    if using_copy_on_write:
+        assert np.shares_memory(ser.values, ser2.values)
+    else:
+        assert not np.shares_memory(ser.values, ser2.values)
+
+    # mutating ser triggers a copy-on-write for the column / block
+    ser2.iloc[0] = 0
+    assert not np.shares_memory(ser2.values, ser.values)
+    tm.assert_series_equal(ser, ser_orig)
+
+
+def test_droplevel(using_copy_on_write):
+    # GH 49473
+    index = MultiIndex.from_tuples([(1, 1), (1, 2), (2, 1)], names=["one", "two"])
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, index=index)
+    df_orig = df.copy()
+    df2 = df.droplevel(0)
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+    else:
+        assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+
+    # mutating df2 triggers a copy-on-write for that column / block
+    df2.iloc[0, 0] = 0
+
+    assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+    tm.assert_frame_equal(df, df_orig)
+
+
+def test_squeeze(using_copy_on_write):
+    df = DataFrame({"a": [1, 2, 3]})
+    df_orig = df.copy()
+    series = df.squeeze()
+
+    # Should share memory regardless of CoW since squeeze is just an iloc
+    assert np.shares_memory(series.values, get_array(df, "a"))
+
+    # mutating squeezed df triggers a copy-on-write for that column/block
+    series.iloc[0] = 0
+    if using_copy_on_write:
+        assert not np.shares_memory(series.values, get_array(df, "a"))
+        tm.assert_frame_equal(df, df_orig)
+    else:
+        # Without CoW the original will be modified
+        assert np.shares_memory(series.values, get_array(df, "a"))
+        assert df.loc[0, "a"] == 0
diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
index c08514900af7c..ce900ff649eec 100644
--- a/pandas/tests/dtypes/test_common.py
+++ b/pandas/tests/dtypes/test_common.py
@@ -556,6 +556,24 @@ def test_is_numeric_dtype():
     assert com.is_numeric_dtype(pd.Series([1, 2]))
     assert com.is_numeric_dtype(pd.Index([1, 2.0]))
 
+    class MyNumericDType(ExtensionDtype):
+        @property
+        def type(self):
+            return str
+
+        @property
+        def name(self):
+            raise NotImplementedError
+
+        @classmethod
+        def construct_array_type(cls):
+            raise NotImplementedError
+
+        def _is_numeric(self) -> bool:
+            return True
+
+    assert com.is_numeric_dtype(MyNumericDType())
+
 
 def test_is_float_dtype():
     assert not com.is_float_dtype(str)
diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
index f52abb9349578..73445a96f4a03 100644
--- a/pandas/tests/extension/base/setitem.py
+++ b/pandas/tests/extension/base/setitem.py
@@ -418,3 +418,11 @@ def test_setitem_invalid(self, data, invalid_scalar):
 
         with pytest.raises((ValueError, TypeError), match=msg):
             data[:] = invalid_scalar
+
+    def test_setitem_2d_values(self, data):
+        # GH50085
+        original = data.copy()
+        df = pd.DataFrame({"a": data, "b": data})
+        df.loc[[0, 1], :] = df.loc[[1, 0], :].values
+        assert (df.loc[0, :] == original[1]).all()
+        assert (df.loc[1, :] == original[0]).all()
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index c1785591f41a9..8bb82bf644680 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -37,7 +37,10 @@
 
 import pandas as pd
 import pandas._testing as tm
-from pandas.api.types import is_bool_dtype
+from pandas.api.types import (
+    is_bool_dtype,
+    is_numeric_dtype,
+)
 from pandas.tests.extension import base
 
 pa = pytest.importorskip("pyarrow", minversion="1.0.1")
@@ -550,16 +553,6 @@ def test_groupby_extension_apply(
         ):
             super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
 
-    def test_in_numeric_groupby(self, data_for_grouping, request):
-        pa_dtype = data_for_grouping.dtype.pyarrow_dtype
-        if pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype):
-            request.node.add_marker(
-                pytest.mark.xfail(
-                    reason="ArrowExtensionArray doesn't support .sum() yet.",
-                )
-            )
-        super().test_in_numeric_groupby(data_for_grouping)
-
     @pytest.mark.parametrize("as_index", [True, False])
     def test_groupby_extension_agg(self, as_index, data_for_grouping, request):
         pa_dtype = data_for_grouping.dtype.pyarrow_dtype
@@ -1446,6 +1439,19 @@ def test_is_bool_dtype():
     tm.assert_series_equal(result, expected)
 
 
+def test_is_numeric_dtype(data):
+    # GH 50563
+    pa_type = data.dtype.pyarrow_dtype
+    if (
+        pa.types.is_floating(pa_type)
+        or pa.types.is_integer(pa_type)
+        or pa.types.is_decimal(pa_type)
+    ):
+        assert is_numeric_dtype(data)
+    else:
+        assert not is_numeric_dtype(data)
+
+
 def test_pickle_roundtrip(data):
     # GH 42600
     expected = pd.Series(data)
@@ -1553,3 +1559,20 @@ def test_round():
     result = ser.round(-1)
     expected = pd.Series([120.0, pd.NA, 60.0], dtype=dtype)
     tm.assert_series_equal(result, expected)
+
+
+def test_searchsorted_with_na_raises(data_for_sorting, as_series):
+    # GH50447
+    b, c, a = data_for_sorting
+    arr = data_for_sorting.take([2, 0, 1])  # to get [a, b, c]
+    arr[-1] = pd.NA
+
+    if as_series:
+        arr = pd.Series(arr)
+
+    msg = (
+        "searchsorted requires array to be sorted, "
+        "which is impossible with NAs present."
+    )
+    with pytest.raises(ValueError, match=msg):
+        arr.searchsorted(b)
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index de7967a8578b5..3e865947aa968 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -420,3 +420,20 @@ def arrow_not_supported(self, data, request):
                 reason="2D support not implemented for ArrowStringArray"
             )
             request.node.add_marker(mark)
+
+
+def test_searchsorted_with_na_raises(data_for_sorting, as_series):
+    # GH50447
+    b, c, a = data_for_sorting
+    arr = data_for_sorting.take([2, 0, 1])  # to get [a, b, c]
+    arr[-1] = pd.NA
+
+    if as_series:
+        arr = pd.Series(arr)
+
+    msg = (
+        "searchsorted requires array to be sorted, "
+        "which is impossible with NAs present."
+    )
+    with pytest.raises(ValueError, match=msg):
+        arr.searchsorted(b)
diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py
index e8a49ab868425..23458b096a140 100644
--- a/pandas/tests/frame/indexing/test_mask.py
+++ b/pandas/tests/frame/indexing/test_mask.py
@@ -7,6 +7,7 @@
 from pandas import (
     NA,
     DataFrame,
+    Float64Dtype,
     Series,
     StringDtype,
     Timedelta,
@@ -130,3 +131,13 @@ def test_mask_where_dtype_timedelta():
         [np.nan, np.nan, np.nan, Timedelta("3 day"), Timedelta("4 day")]
     )
     tm.assert_frame_equal(df.where(df > Timedelta(2, unit="d")), expected)
+
+
+def test_mask_return_dtype():
+    # GH#50488
+    ser = Series([0.0, 1.0, 2.0, 3.0], dtype=Float64Dtype())
+    cond = ~ser.isna()
+    other = Series([True, False, True, False])
+    excepted = Series([1.0, 0.0, 1.0, 0.0], dtype=ser.dtype)
+    result = ser.mask(cond, other)
+    tm.assert_series_equal(result, excepted)
diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py
index f65f3a311b403..e37c881472b65 100644
--- a/pandas/tests/frame/indexing/test_where.py
+++ b/pandas/tests/frame/indexing/test_where.py
@@ -550,7 +550,8 @@ def test_where_axis_multiple_dtypes(self):
 
         # DataFrame vs DataFrame
         d1 = df.copy().drop(1, axis=0)
-        expected = df.copy()
+        # Explicit cast to avoid implicit cast when setting value to np.nan
+        expected = df.copy().astype("float")
         expected.loc[1, :] = np.nan
 
         result = df.where(mask, d1)
@@ -669,7 +670,8 @@ def test_where_categorical_filtering(self):
         df["b"] = df["b"].astype("category")
 
         result = df.where(df["a"] > 0)
-        expected = df.copy()
+        # Explicitly cast to 'float' to avoid implicit cast when setting np.nan
+        expected = df.copy().astype({"a": "float"})
         expected.loc[0, :] = np.nan
 
         tm.assert_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py
index 07eacb5e89e3a..5b3e1614e1ada 100644
--- a/pandas/tests/frame/methods/test_asfreq.py
+++ b/pandas/tests/frame/methods/test_asfreq.py
@@ -159,7 +159,8 @@ def test_asfreq_fillvalue(self):
 
         # setup
         rng = date_range("1/1/2016", periods=10, freq="2S")
-        ts = Series(np.arange(len(rng)), index=rng)
+        # Explicit cast to 'float' to avoid implicit cast when setting None
+        ts = Series(np.arange(len(rng)), index=rng, dtype="float")
         df = DataFrame({"one": ts})
 
         # insert pre-existing missing value
diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py
index 0b27fe591f794..a08f8bf5f502e 100644
--- a/pandas/tests/frame/methods/test_asof.py
+++ b/pandas/tests/frame/methods/test_asof.py
@@ -29,7 +29,8 @@ def date_range_frame():
 
 class TestFrameAsof:
     def test_basic(self, date_range_frame):
-        df = date_range_frame
+        # Explicitly cast to float to avoid implicit cast when setting np.nan
+        df = date_range_frame.astype({"A": "float"})
         N = 50
         df.loc[df.index[15:30], "A"] = np.nan
         dates = date_range("1/1/1990", periods=N * 3, freq="25s")
@@ -50,7 +51,8 @@ def test_basic(self, date_range_frame):
 
     def test_subset(self, date_range_frame):
         N = 10
-        df = date_range_frame.iloc[:N].copy()
+        # explicitly cast to float to avoid implicit upcast when setting to np.nan
+        df = date_range_frame.iloc[:N].copy().astype({"A": "float"})
         df.loc[df.index[4:8], "A"] = np.nan
         dates = date_range("1/1/1990", periods=N * 3, freq="25s")
 
@@ -163,7 +165,7 @@ def test_time_zone_aware_index(self, stamp, expected):
     def test_is_copy(self, date_range_frame):
         # GH-27357, GH-30784: ensure the result of asof is an actual copy and
         # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings
-        df = date_range_frame
+        df = date_range_frame.astype({"A": "float"})
         N = 50
         df.loc[df.index[15:30], "A"] = np.nan
         dates = date_range("1/1/1990", periods=N * 3, freq="25s")
diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
index ad6122501bc19..f7da28a43590d 100644
--- a/pandas/tests/frame/methods/test_combine_first.py
+++ b/pandas/tests/frame/methods/test_combine_first.py
@@ -398,11 +398,7 @@ def test_combine_first_string_dtype_only_na(self, nullable_string_dtype):
             pa_version_under7p0 and nullable_string_dtype == "string[pyarrow]",
         ):
             df2.set_index(["a", "b"], inplace=True)
-        with tm.maybe_produces_warning(
-            PerformanceWarning,
-            pa_version_under7p0 and nullable_string_dtype == "string[pyarrow]",
-        ):
-            result = df.combine_first(df2)
+        result = df.combine_first(df2)
         with tm.maybe_produces_warning(
             PerformanceWarning,
             pa_version_under7p0 and nullable_string_dtype == "string[pyarrow]",
diff --git a/pandas/tests/frame/methods/test_equals.py b/pandas/tests/frame/methods/test_equals.py
index dddd6c6d2eaf2..beec3e965d542 100644
--- a/pandas/tests/frame/methods/test_equals.py
+++ b/pandas/tests/frame/methods/test_equals.py
@@ -36,7 +36,8 @@ def test_equals(self):
         df1["start"] = date_range("2000-1-1", periods=10, freq="T")
         df1["end"] = date_range("2000-1-1", periods=10, freq="D")
         df1["diff"] = df1["end"] - df1["start"]
-        df1["bool"] = np.arange(10) % 3 == 0
+        # Explicitly cast to object, to avoid implicit cast when setting np.nan
+        df1["bool"] = (np.arange(10) % 3 == 0).astype(object)
         df1.loc[::2] = np.nan
         df2 = df1.copy()
         assert df1["text"].equals(df2["text"])
diff --git a/pandas/tests/frame/methods/test_isetitem.py b/pandas/tests/frame/methods/test_isetitem.py
new file mode 100644
index 0000000000000..59328aafefefb
--- /dev/null
+++ b/pandas/tests/frame/methods/test_isetitem.py
@@ -0,0 +1,37 @@
+from pandas import (
+    DataFrame,
+    Series,
+)
+import pandas._testing as tm
+
+
+class TestDataFrameSetItem:
+    def test_isetitem_ea_df(self):
+        # GH#49922
+        df = DataFrame([[1, 2, 3], [4, 5, 6]])
+        rhs = DataFrame([[11, 12], [13, 14]], dtype="Int64")
+
+        df.isetitem([0, 1], rhs)
+        expected = DataFrame(
+            {
+                0: Series([11, 13], dtype="Int64"),
+                1: Series([12, 14], dtype="Int64"),
+                2: [3, 6],
+            }
+        )
+        tm.assert_frame_equal(df, expected)
+
+    def test_isetitem_ea_df_scalar_indexer(self):
+        # GH#49922
+        df = DataFrame([[1, 2, 3], [4, 5, 6]])
+        rhs = DataFrame([[11], [13]], dtype="Int64")
+
+        df.isetitem(2, rhs)
+        expected = DataFrame(
+            {
+                0: [1, 4],
+                1: [2, 5],
+                2: Series([11, 13], dtype="Int64"),
+            }
+        )
+        tm.assert_frame_equal(df, expected)
diff --git a/pandas/tests/frame/methods/test_truncate.py b/pandas/tests/frame/methods/test_truncate.py
index bfee3edc085d8..21f0664707ebe 100644
--- a/pandas/tests/frame/methods/test_truncate.py
+++ b/pandas/tests/frame/methods/test_truncate.py
@@ -66,12 +66,6 @@ def test_truncate(self, datetime_frame, frame_or_series):
                 before=ts.index[-1] - ts.index.freq, after=ts.index[0] + ts.index.freq
             )
 
-    def test_truncate_copy(self, datetime_frame):
-        index = datetime_frame.index
-        truncated = datetime_frame.truncate(index[5], index[10])
-        truncated.values[:] = 5.0
-        assert not (datetime_frame.values[5:11] == 5).any()
-
     def test_truncate_nonsortedindex(self, frame_or_series):
         # GH#17935
 
diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
index e81837898c927..159dab04e7da6 100644
--- a/pandas/tests/frame/test_query_eval.py
+++ b/pandas/tests/frame/test_query_eval.py
@@ -448,7 +448,8 @@ def test_date_index_query(self):
     def test_date_index_query_with_NaT(self):
         engine, parser = self.engine, self.parser
         n = 10
-        df = DataFrame(np.random.randn(n, 3))
+        # Cast to object to avoid implicit cast when setting entry to pd.NaT below
+        df = DataFrame(np.random.randn(n, 3)).astype({0: object})
         df["dates1"] = date_range("1/1/2012", periods=n)
         df["dates3"] = date_range("1/1/2014", periods=n)
         df.iloc[0, 0] = pd.NaT
@@ -808,7 +809,8 @@ def test_date_index_query(self):
     def test_date_index_query_with_NaT(self):
         engine, parser = self.engine, self.parser
         n = 10
-        df = DataFrame(np.random.randn(n, 3))
+        # Cast to object to avoid implicit cast when setting entry to pd.NaT below
+        df = DataFrame(np.random.randn(n, 3)).astype({0: object})
         df["dates1"] = date_range("1/1/2012", periods=n)
         df["dates3"] = date_range("1/1/2014", periods=n)
         df.iloc[0, 0] = pd.NaT
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index a3cd3e4afdda1..2e0aa5fd0cf40 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -449,10 +449,14 @@ def test_var_std(self, datetime_frame):
     def test_numeric_only_flag(self, meth):
         # GH 9201
         df1 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"])
+        # Cast to object to avoid implicit cast when setting entry to "100" below
+        df1 = df1.astype({"foo": object})
         # set one entry to a number in str format
         df1.loc[0, "foo"] = "100"
 
         df2 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"])
+        # Cast to object to avoid implicit cast when setting entry to "a" below
+        df2 = df2.astype({"foo": object})
         # set one entry to a non-number str
         df2.loc[0, "foo"] = "a"
 
diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
index 1b6b158cc61f5..2be2a052401ed 100644
--- a/pandas/tests/frame/test_stack_unstack.py
+++ b/pandas/tests/frame/test_stack_unstack.py
@@ -857,6 +857,8 @@ def cast(val):
     def test_unstack_nan_index2(self):
         # GH7403
         df = DataFrame({"A": list("aaaabbbb"), "B": range(8), "C": range(8)})
+        # Explicit cast to avoid implicit cast when setting to np.NaN
+        df = df.astype({"B": "float"})
         df.iloc[3, 1] = np.NaN
         left = df.set_index(["A", "B"]).unstack(0)
 
@@ -874,6 +876,8 @@ def test_unstack_nan_index2(self):
         tm.assert_frame_equal(left, right)
 
         df = DataFrame({"A": list("aaaabbbb"), "B": list(range(4)) * 2, "C": range(8)})
+        # Explicit cast to avoid implicit cast when setting to np.NaN
+        df = df.astype({"B": "float"})
         df.iloc[2, 1] = np.NaN
         left = df.set_index(["A", "B"]).unstack(0)
 
@@ -886,6 +890,8 @@ def test_unstack_nan_index2(self):
         tm.assert_frame_equal(left, right)
 
         df = DataFrame({"A": list("aaaabbbb"), "B": list(range(4)) * 2, "C": range(8)})
+        # Explicit cast to avoid implicit cast when setting to np.NaN
+        df = df.astype({"B": "float"})
         df.iloc[3, 1] = np.NaN
         left = df.set_index(["A", "B"]).unstack(0)
 
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 40ae37cfaba2d..461ae05aedb82 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -210,6 +210,21 @@ def test_aggregate_str_func(tsframe, groupbyfunc):
     tm.assert_frame_equal(result, expected)
 
 
+def test_std_masked_dtype(any_numeric_ea_dtype):
+    # GH#35516
+    df = DataFrame(
+        {
+            "a": [2, 1, 1, 1, 2, 2, 1],
+            "b": Series([pd.NA, 1, 2, 1, 1, 1, 2], dtype="Float64"),
+        }
+    )
+    result = df.groupby("a").std()
+    expected = DataFrame(
+        {"b": [0.57735, 0]}, index=Index([1, 2], name="a"), dtype="Float64"
+    )
+    tm.assert_frame_equal(result, expected)
+
+
 def test_agg_str_with_kwarg_axis_1_raises(df, reduction_func):
     gb = df.groupby(level=0)
     if reduction_func in ("idxmax", "idxmin"):
diff --git a/pandas/tests/groupby/test_frame_value_counts.py b/pandas/tests/groupby/test_frame_value_counts.py
index 8255fbab40dce..56aa121cd48c2 100644
--- a/pandas/tests/groupby/test_frame_value_counts.py
+++ b/pandas/tests/groupby/test_frame_value_counts.py
@@ -4,9 +4,11 @@
 from pandas import (
     CategoricalIndex,
     DataFrame,
+    Grouper,
     Index,
     MultiIndex,
     Series,
+    to_datetime,
 )
 import pandas._testing as tm
 
@@ -781,3 +783,39 @@ def test_subset_duplicate_columns():
         ),
     )
     tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("utc", [True, False])
+def test_value_counts_time_grouper(utc):
+    # GH#50486
+    df = DataFrame(
+        {
+            "Timestamp": [
+                1565083561,
+                1565083561 + 86400,
+                1565083561 + 86500,
+                1565083561 + 86400 * 2,
+                1565083561 + 86400 * 3,
+                1565083561 + 86500 * 3,
+                1565083561 + 86400 * 4,
+            ],
+            "Food": ["apple", "apple", "banana", "banana", "orange", "orange", "pear"],
+        }
+    ).drop([3])
+
+    df["Datetime"] = to_datetime(
+        df["Timestamp"].apply(lambda t: str(t)), utc=utc, unit="s"
+    )
+    gb = df.groupby(Grouper(freq="1D", key="Datetime"))
+    result = gb.value_counts()
+    dates = to_datetime(
+        ["2019-08-06", "2019-08-07", "2019-08-09", "2019-08-10"], utc=utc
+    )
+    timestamps = df["Timestamp"].unique()
+    index = MultiIndex(
+        levels=[dates, timestamps, ["apple", "banana", "orange", "pear"]],
+        codes=[[0, 1, 1, 2, 2, 3], range(6), [0, 0, 1, 2, 2, 3]],
+        names=["Datetime", "Timestamp", "Food"],
+    )
+    expected = Series(1, index=index)
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index b2fc60b76fdf6..a59c2853fa50b 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2828,3 +2828,13 @@ def test_groupby_index_name_in_index_content(val_in, index, val_out):
     result = series.to_frame().groupby("blah").sum()
     expected = expected.to_frame()
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("n", [1, 10, 32, 100, 1000])
+def test_sum_of_booleans(n):
+    # GH 50347
+    df = DataFrame({"groupby_col": 1, "bool": [True] * n})
+    df["bool"] = df["bool"].eq(True)
+    result = df.groupby("groupby_col").sum()
+    expected = DataFrame({"bool": [n]}, index=Index([1], name="groupby_col"))
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
index 4a707d8875db3..f16cf4dd27016 100644
--- a/pandas/tests/groupby/test_timegrouper.py
+++ b/pandas/tests/groupby/test_timegrouper.py
@@ -103,6 +103,8 @@ def test_groupby_with_timegrouper(self):
                     "20130901", "20131205", freq="5D", name="Date", inclusive="left"
                 ),
             )
+            # Cast to object to avoid implicit cast when setting entry to "CarlCarlCarl"
+            expected = expected.astype({"Buyer": object})
             expected.iloc[0, 0] = "CarlCarlCarl"
             expected.iloc[6, 0] = "CarlCarl"
             expected.iloc[18, 0] = "Joe"
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index 577a72d3f5090..1c4e83abc55f7 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -56,6 +56,8 @@ def seed_df(seed_nans, n, m):
     )
 
     if seed_nans:
+        # Explicitly cast to float to avoid implicit cast when setting nan
+        frame["3rd"] = frame["3rd"].astype("float")
         frame.loc[1::11, "1st"] = np.nan
         frame.loc[3::17, "2nd"] = np.nan
         frame.loc[7::19, "3rd"] = np.nan
@@ -114,7 +116,8 @@ def rebuild_index(df):
     tm.assert_series_equal(left.sort_index(), right.sort_index())
 
 
-def test_series_groupby_value_counts_with_grouper():
+@pytest.mark.parametrize("utc", [True, False])
+def test_series_groupby_value_counts_with_grouper(utc):
     # GH28479
     df = DataFrame(
         {
@@ -131,7 +134,9 @@ def test_series_groupby_value_counts_with_grouper():
         }
     ).drop([3])
 
-    df["Datetime"] = to_datetime(df["Timestamp"].apply(lambda t: str(t)), unit="s")
+    df["Datetime"] = to_datetime(
+        df["Timestamp"].apply(lambda t: str(t)), utc=utc, unit="s"
+    )
     dfg = df.groupby(Grouper(freq="1D", key="Datetime"))
 
     # have to sort on index because of unstable sort on values xref GH9212
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 4c6f172b00a58..8f1d52c2ea03d 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -185,8 +185,6 @@ def test_transform_axis_1_reducer(request, reduction_func):
     # GH#45715
     if reduction_func in (
         "corrwith",
-        "idxmax",
-        "idxmin",
         "ngroup",
         "nth",
     ):
diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py
index f962a552d9009..e1ada9f10c261 100644
--- a/pandas/tests/indexes/datetimes/test_constructors.py
+++ b/pandas/tests/indexes/datetimes/test_constructors.py
@@ -880,15 +880,8 @@ def test_constructor_with_ambiguous_keyword_arg(self):
         result = date_range(end=end, periods=2, ambiguous=False)
         tm.assert_index_equal(result, expected)
 
-    def test_constructor_with_nonexistent_keyword_arg(self, warsaw, request):
+    def test_constructor_with_nonexistent_keyword_arg(self, warsaw):
         # GH 35297
-        if type(warsaw).__name__ == "ZoneInfo":
-            mark = pytest.mark.xfail(
-                reason="nonexistent-shift not yet implemented for ZoneInfo",
-                raises=NotImplementedError,
-            )
-            request.node.add_marker(mark)
-
         timezone = warsaw
 
         # nonexistent keyword in start
diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py
index f5f58e7e818d9..8e507212976ec 100644
--- a/pandas/tests/indexing/multiindex/test_multiindex.py
+++ b/pandas/tests/indexing/multiindex/test_multiindex.py
@@ -151,57 +151,35 @@ def test_rename_multiindex_with_duplicates(self):
         expected = DataFrame(index=mi2)
         tm.assert_frame_equal(df, expected)
 
-    @pytest.mark.parametrize(
-        "data_result, data_expected",
-        [
-            (
-                [
-                    [(81.0, np.nan), (np.nan, np.nan)],
-                    [(np.nan, np.nan), (82.0, np.nan)],
-                    [1, 2],
-                    [1, 2],
-                ],
-                [
-                    [[81, 82.0, np.nan], Series([np.nan, np.nan, np.nan])],
-                    [[81, 82.0, np.nan], Series([np.nan, np.nan, np.nan])],
-                    [1, np.nan, 2],
-                    [np.nan, 2, 1],
-                ],
-            ),
-            (
-                [
-                    [(81.0, np.nan), (np.nan, np.nan)],
-                    [(np.nan, np.nan), (81.0, np.nan)],
-                    [1, 2],
-                    [1, 2],
-                ],
-                [
-                    [[81.0, np.nan], Series([np.nan, np.nan])],
-                    [[81.0, np.nan], Series([np.nan, np.nan])],
-                    [1, 2],
-                    [2, 1],
-                ],
-            ),
-        ],
-    )
-    def test_subtracting_two_series_with_unordered_index_and_all_nan_index(
-        self, data_result, data_expected
-    ):
+    def test_series_align_multiindex_with_nan_overlap_only(self):
+        # GH 38439
+        mi1 = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
+        mi2 = MultiIndex.from_arrays([[np.nan, 82.0], [np.nan, np.nan]])
+        ser1 = Series([1, 2], index=mi1)
+        ser2 = Series([1, 2], index=mi2)
+        result1, result2 = ser1.align(ser2)
+
+        mi = MultiIndex.from_arrays([[81.0, 82.0, np.nan], [np.nan, np.nan, np.nan]])
+        expected1 = Series([1.0, np.nan, 2.0], index=mi)
+        expected2 = Series([np.nan, 2.0, 1.0], index=mi)
+
+        tm.assert_series_equal(result1, expected1)
+        tm.assert_series_equal(result2, expected2)
+
+    def test_series_align_multiindex_with_nan(self):
         # GH 38439
-        # TODO: Refactor. This is impossible to understand GH#49443
-        a_index_result = MultiIndex.from_tuples(data_result[0])
-        b_index_result = MultiIndex.from_tuples(data_result[1])
-        a_series_result = Series(data_result[2], index=a_index_result)
-        b_series_result = Series(data_result[3], index=b_index_result)
-        result = a_series_result.align(b_series_result)
-
-        a_index_expected = MultiIndex.from_arrays(data_expected[0])
-        b_index_expected = MultiIndex.from_arrays(data_expected[1])
-        a_series_expected = Series(data_expected[2], index=a_index_expected)
-        b_series_expected = Series(data_expected[3], index=b_index_expected)
-
-        tm.assert_series_equal(result[0], a_series_expected)
-        tm.assert_series_equal(result[1], b_series_expected)
+        mi1 = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
+        mi2 = MultiIndex.from_arrays([[np.nan, 81.0], [np.nan, np.nan]])
+        ser1 = Series([1, 2], index=mi1)
+        ser2 = Series([1, 2], index=mi2)
+        result1, result2 = ser1.align(ser2)
+
+        mi = MultiIndex.from_arrays([[81.0, np.nan], [np.nan, np.nan]])
+        expected1 = Series([1, 2], index=mi)
+        expected2 = Series([2, 1], index=mi)
+
+        tm.assert_series_equal(result1, expected1)
+        tm.assert_series_equal(result2, expected2)
 
     def test_nunique_smoke(self):
         # GH 34019
diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py
index 32ab0336aa93f..46fb614d96633 100644
--- a/pandas/tests/io/formats/style/test_style.py
+++ b/pandas/tests/io/formats/style/test_style.py
@@ -707,13 +707,13 @@ def test_applymap_subset_multiindex(self, slice_):
             and isinstance(slice_[-1][-1], list)
             and "C" in slice_[-1][-1]
         ):
-            ctx = pytest.raises(KeyError, match="C")  # noqa: PDF010
+            ctx = pytest.raises(KeyError, match="C")
         elif (
             isinstance(slice_[0], tuple)
             and isinstance(slice_[0][1], list)
             and 3 in slice_[0][1]
         ):
-            ctx = pytest.raises(KeyError, match="3")  # noqa: PDF010
+            ctx = pytest.raises(KeyError, match="3")
         else:
             ctx = contextlib.nullcontext()
 
diff --git a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py
index cf521aafdc241..d9232a6bddf61 100644
--- a/pandas/tests/io/json/test_json_table_schema_ext_dtype.py
+++ b/pandas/tests/io/json/test_json_table_schema_ext_dtype.py
@@ -48,7 +48,7 @@ def test_build_table_schema(self):
             "fields": [
                 {"name": "index", "type": "integer"},
                 {"name": "A", "type": "any", "extDtype": "DateDtype"},
-                {"name": "B", "type": "any", "extDtype": "decimal"},
+                {"name": "B", "type": "number", "extDtype": "decimal"},
                 {"name": "C", "type": "any", "extDtype": "string"},
                 {"name": "D", "type": "integer", "extDtype": "Int64"},
             ],
@@ -82,10 +82,10 @@ def test_as_json_table_type_ext_date_dtype(self):
         ],
     )
     def test_as_json_table_type_ext_decimal_array_dtype(self, decimal_data):
-        assert as_json_table_type(decimal_data.dtype) == "any"
+        assert as_json_table_type(decimal_data.dtype) == "number"
 
     def test_as_json_table_type_ext_decimal_dtype(self):
-        assert as_json_table_type(DecimalDtype()) == "any"
+        assert as_json_table_type(DecimalDtype()) == "number"
 
     @pytest.mark.parametrize(
         "string_data",
@@ -180,7 +180,7 @@ def test_build_decimal_series(self, dc):
 
         fields = [
             {"name": "id", "type": "integer"},
-            {"name": "a", "type": "any", "extDtype": "decimal"},
+            {"name": "a", "type": "number", "extDtype": "decimal"},
         ]
 
         schema = {"fields": fields, "primaryKey": ["id"]}
@@ -257,7 +257,7 @@ def test_to_json(self, df):
         fields = [
             OrderedDict({"name": "idx", "type": "integer"}),
             OrderedDict({"name": "A", "type": "any", "extDtype": "DateDtype"}),
-            OrderedDict({"name": "B", "type": "any", "extDtype": "decimal"}),
+            OrderedDict({"name": "B", "type": "number", "extDtype": "decimal"}),
             OrderedDict({"name": "C", "type": "any", "extDtype": "string"}),
             OrderedDict({"name": "D", "type": "integer", "extDtype": "Int64"}),
         ]
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index ee9314c8779dd..6c05f5defe4fb 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -2052,7 +2052,7 @@ def test_parse_dot_separated_dates(all_parsers):
             name="a",
         )
         warn = UserWarning
-    msg = "when dayfirst=False was specified"
+    msg = r"when dayfirst=False \(the default\) was specified"
     result = parser.read_csv_check_warnings(
         warn, msg, StringIO(data), parse_dates=True, index_col=0
     )
diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py
index 53be06cd491ef..5c7c4f9ce0b75 100644
--- a/pandas/tests/io/pytables/test_round_trip.py
+++ b/pandas/tests/io/pytables/test_round_trip.py
@@ -349,7 +349,7 @@ def test_index_types(setup_path):
         _check_roundtrip(ser, func, path=setup_path)
 
 
-def test_timeseries_preepoch(setup_path):
+def test_timeseries_preepoch(setup_path, request):
 
     dr = bdate_range("1/1/1940", "1/1/1960")
     ts = Series(np.random.randn(len(dr)), index=dr)
@@ -357,9 +357,10 @@ def test_timeseries_preepoch(setup_path):
         _check_roundtrip(ts, tm.assert_series_equal, path=setup_path)
     except OverflowError:
         if is_platform_windows():
-            pytest.xfail("known failure on some windows platforms")
-        else:
-            raise
+            request.node.add_marker(
+                pytest.mark.xfail("known failure on some windows platforms")
+            )
+        raise
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
index 1263d61b55cd5..2664c7df59223 100644
--- a/pandas/tests/io/pytables/test_store.py
+++ b/pandas/tests/io/pytables/test_store.py
@@ -911,7 +911,7 @@ def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs):
                     os.close(fd)
                 except (OSError, ValueError):
                     pass
-                os.remove(new_f)  # noqa: PDF008
+                os.remove(new_f)
 
         # new table
         df = tm.makeDataFrame()
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index f74d268690a5b..2172a4bf839fb 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -317,7 +317,7 @@ def test_read_expands_user_home_dir(
             ),
         ],
     )
-    @pytest.mark.filterwarnings(  # pytables np.object usage
+    @pytest.mark.filterwarnings(  # pytables np.object_ usage
         "ignore:`np.object` is a deprecated alias:DeprecationWarning"
     )
     def test_read_fspath_all(self, reader, module, path, datapath):
@@ -372,7 +372,7 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module):
                     expected = f_path.read()
                     assert result == expected
 
-    @pytest.mark.filterwarnings(  # pytables np.object usage
+    @pytest.mark.filterwarnings(  # pytables np.object_ usage
         "ignore:`np.object` is a deprecated alias:DeprecationWarning"
     )
     def test_write_fspath_hdf5(self):
diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py
index 782753177f245..fc15ff3488ce9 100644
--- a/pandas/tests/io/test_compression.py
+++ b/pandas/tests/io/test_compression.py
@@ -19,7 +19,7 @@
 import pandas.io.common as icom
 
 _compression_to_extension = {
-    value: key for key, value in icom._extension_to_compression.items()
+    value: key for key, value in icom.extension_to_compression.items()
 }
 
 
diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py
index 87f648bb5acd6..d5c03dcc85a0d 100644
--- a/pandas/tests/io/test_orc.py
+++ b/pandas/tests/io/test_orc.py
@@ -11,6 +11,7 @@
 import pandas as pd
 from pandas import read_orc
 import pandas._testing as tm
+from pandas.core.arrays import StringArray
 
 pytest.importorskip("pyarrow.orc")
 
@@ -305,16 +306,6 @@ def test_orc_writer_dtypes_not_supported(df_not_supported):
         df_not_supported.to_orc()
 
 
-def test_orc_use_nullable_dtypes_pandas_backend_not_supported(dirpath):
-    input_file = os.path.join(dirpath, "TestOrcFile.emptyFile.orc")
-    with pytest.raises(
-        NotImplementedError,
-        match="mode.dtype_backend set to pandas is not implemented.",
-    ):
-        with pd.option_context("mode.dtype_backend", "pandas"):
-            read_orc(input_file, use_nullable_dtypes=True)
-
-
 @td.skip_if_no("pyarrow", min_version="7.0.0")
 def test_orc_use_nullable_dtypes_pyarrow_backend():
     df = pd.DataFrame(
@@ -336,13 +327,60 @@ def test_orc_use_nullable_dtypes_pyarrow_backend():
             ],
         }
     )
+
     bytes_data = df.copy().to_orc()
     with pd.option_context("mode.dtype_backend", "pyarrow"):
         result = read_orc(BytesIO(bytes_data), use_nullable_dtypes=True)
+
     expected = pd.DataFrame(
         {
             col: pd.arrays.ArrowExtensionArray(pa.array(df[col], from_pandas=True))
             for col in df.columns
         }
     )
+
+    tm.assert_frame_equal(result, expected)
+
+
+@td.skip_if_no("pyarrow", min_version="7.0.0")
+def test_orc_use_nullable_dtypes_pandas_backend():
+    # GH#50503
+    df = pd.DataFrame(
+        {
+            "string": list("abc"),
+            "string_with_nan": ["a", np.nan, "c"],
+            "string_with_none": ["a", None, "c"],
+            "int": list(range(1, 4)),
+            "int_with_nan": pd.Series([1, pd.NA, 3], dtype="Int64"),
+            "na_only": pd.Series([pd.NA, pd.NA, pd.NA], dtype="Int64"),
+            "float": np.arange(4.0, 7.0, dtype="float64"),
+            "float_with_nan": [2.0, np.nan, 3.0],
+            "bool": [True, False, True],
+            "bool_with_na": [True, False, None],
+        }
+    )
+
+    bytes_data = df.copy().to_orc()
+    with pd.option_context("mode.dtype_backend", "pandas"):
+        result = read_orc(BytesIO(bytes_data), use_nullable_dtypes=True)
+
+    expected = pd.DataFrame(
+        {
+            "string": StringArray(np.array(["a", "b", "c"], dtype=np.object_)),
+            "string_with_nan": StringArray(
+                np.array(["a", pd.NA, "c"], dtype=np.object_)
+            ),
+            "string_with_none": StringArray(
+                np.array(["a", pd.NA, "c"], dtype=np.object_)
+            ),
+            "int": pd.Series([1, 2, 3], dtype="Int64"),
+            "int_with_nan": pd.Series([1, pd.NA, 3], dtype="Int64"),
+            "na_only": pd.Series([pd.NA, pd.NA, pd.NA], dtype="Int64"),
+            "float": pd.Series([4.0, 5.0, 6.0], dtype="Float64"),
+            "float_with_nan": pd.Series([2.0, pd.NA, 3.0], dtype="Float64"),
+            "bool": pd.Series([True, False, True], dtype="boolean"),
+            "bool_with_na": pd.Series([True, False, pd.NA], dtype="boolean"),
+        }
+    )
+
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index 3dafe6fe61b35..07a028a19d7f9 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -37,8 +37,8 @@
     get_lzma_file,
     is_platform_little_endian,
 )
-from pandas.compat._compressors import flatten_buffer
 from pandas.compat._optional import import_optional_dependency
+from pandas.compat.compressors import flatten_buffer
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -255,7 +255,7 @@ def get_random_path():
 
 class TestCompression:
 
-    _extension_to_compression = icom._extension_to_compression
+    _extension_to_compression = icom.extension_to_compression
 
     def compress_file(self, src_path, dest_path, compression):
         if compression is None:
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index aeaf2d3b7edbf..d65b9b8af4365 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -21,8 +21,17 @@
 )
 import pandas.util._test_decorators as td
 
-from pandas import DataFrame
+import pandas as pd
+from pandas import (
+    NA,
+    DataFrame,
+    Series,
+)
 import pandas._testing as tm
+from pandas.core.arrays import (
+    ArrowStringArray,
+    StringArray,
+)
 
 from pandas.io.common import get_handle
 from pandas.io.xml import read_xml
@@ -1702,3 +1711,74 @@ def test_s3_parser_consistency():
     )
 
     tm.assert_frame_equal(df_lxml, df_etree)
+
+
+@pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
+def test_read_xml_nullable_dtypes(parser, string_storage, dtype_backend):
+    # GH#50500
+    if string_storage == "pyarrow" or dtype_backend == "pyarrow":
+        pa = pytest.importorskip("pyarrow")
+    data = """<?xml version='1.0' encoding='utf-8'?>
+<data xmlns="http://example.com">
+<row>
+  <a>x</a>
+  <b>1</b>
+  <c>4.0</c>
+  <d>x</d>
+  <e>2</e>
+  <f>4.0</f>
+  <g></g>
+  <h>True</h>
+  <i>False</i>
+</row>
+<row>
+  <a>y</a>
+  <b>2</b>
+  <c>5.0</c>
+  <d></d>
+  <e></e>
+  <f></f>
+  <g></g>
+  <h>False</h>
+  <i></i>
+</row>
+</data>"""
+
+    if string_storage == "python":
+        string_array = StringArray(np.array(["x", "y"], dtype=np.object_))
+        string_array_na = StringArray(np.array(["x", NA], dtype=np.object_))
+
+    else:
+        string_array = ArrowStringArray(pa.array(["x", "y"]))
+        string_array_na = ArrowStringArray(pa.array(["x", None]))
+
+    with pd.option_context("mode.string_storage", string_storage):
+        with pd.option_context("mode.dtype_backend", dtype_backend):
+            result = read_xml(data, parser=parser, use_nullable_dtypes=True)
+
+    expected = DataFrame(
+        {
+            "a": string_array,
+            "b": Series([1, 2], dtype="Int64"),
+            "c": Series([4.0, 5.0], dtype="Float64"),
+            "d": string_array_na,
+            "e": Series([2, NA], dtype="Int64"),
+            "f": Series([4.0, NA], dtype="Float64"),
+            "g": Series([NA, NA], dtype="Int64"),
+            "h": Series([True, False], dtype="boolean"),
+            "i": Series([False, NA], dtype="boolean"),
+        }
+    )
+
+    if dtype_backend == "pyarrow":
+        from pandas.arrays import ArrowExtensionArray
+
+        expected = DataFrame(
+            {
+                col: ArrowExtensionArray(pa.array(expected[col], from_pandas=True))
+                for col in expected.columns
+            }
+        )
+        expected["g"] = ArrowExtensionArray(pa.array([None, None]))
+
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py
index fd7c47d47112f..e352250dc748d 100644
--- a/pandas/tests/libs/test_lib.py
+++ b/pandas/tests/libs/test_lib.py
@@ -243,6 +243,27 @@ def test_get_reverse_indexer(self):
         expected = np.array([4, 2, 3, 6, 7], dtype=np.intp)
         tm.assert_numpy_array_equal(result, expected)
 
+    @pytest.mark.parametrize("dtype", ["int64", "int32"])
+    def test_array_equal_fast(self, dtype):
+        # GH#50592
+        left = np.arange(1, 100, dtype=dtype)
+        right = np.arange(1, 100, dtype=dtype)
+        assert lib.array_equal_fast(left, right)
+
+    @pytest.mark.parametrize("dtype", ["int64", "int32"])
+    def test_array_equal_fast_not_equal(self, dtype):
+        # GH#50592
+        left = np.array([1, 2], dtype=dtype)
+        right = np.array([2, 2], dtype=dtype)
+        assert not lib.array_equal_fast(left, right)
+
+    @pytest.mark.parametrize("dtype", ["int64", "int32"])
+    def test_array_equal_fast_not_equal_shape(self, dtype):
+        # GH#50592
+        left = np.array([1, 2, 3], dtype=dtype)
+        right = np.array([2, 2], dtype=dtype)
+        assert not lib.array_equal_fast(left, right)
+
 
 def test_cache_readonly_preserve_docstrings():
     # GH18197
diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py
index 9d90f2e405803..797aae7eaba3a 100644
--- a/pandas/tests/plotting/test_hist_method.py
+++ b/pandas/tests/plotting/test_hist_method.py
@@ -560,6 +560,36 @@ def test_hist_secondary_legend(self):
         assert ax.get_yaxis().get_visible()
         tm.close()
 
+    @td.skip_if_no_mpl
+    def test_hist_with_nans_and_weights(self):
+        # GH 48884
+        df = DataFrame(
+            [[np.nan, 0.2, 0.3], [0.4, np.nan, np.nan], [0.7, 0.8, 0.9]],
+            columns=list("abc"),
+        )
+        weights = np.array([0.25, 0.3, 0.45])
+        no_nan_df = DataFrame([[0.4, 0.2, 0.3], [0.7, 0.8, 0.9]], columns=list("abc"))
+        no_nan_weights = np.array([[0.3, 0.25, 0.25], [0.45, 0.45, 0.45]])
+
+        from matplotlib.patches import Rectangle
+
+        _, ax0 = self.plt.subplots()
+        df.plot.hist(ax=ax0, weights=weights)
+        rects = [x for x in ax0.get_children() if isinstance(x, Rectangle)]
+        heights = [rect.get_height() for rect in rects]
+        _, ax1 = self.plt.subplots()
+        no_nan_df.plot.hist(ax=ax1, weights=no_nan_weights)
+        no_nan_rects = [x for x in ax1.get_children() if isinstance(x, Rectangle)]
+        no_nan_heights = [rect.get_height() for rect in no_nan_rects]
+        assert all(h0 == h1 for h0, h1 in zip(heights, no_nan_heights))
+
+        idxerror_weights = np.array([[0.3, 0.25], [0.45, 0.45]])
+
+        msg = "weights must have the same shape as data, or be a single column"
+        with pytest.raises(ValueError, match=msg):
+            _, ax2 = self.plt.subplots()
+            no_nan_df.plot.hist(ax=ax2, weights=idxerror_weights)
+
 
 @td.skip_if_no_mpl
 class TestDataFrameGroupByPlots(TestPlotBase):
diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
index 0432cf397067d..a521e24aa6022 100644
--- a/pandas/tests/resample/test_resampler_grouper.py
+++ b/pandas/tests/resample/test_resampler_grouper.py
@@ -515,3 +515,25 @@ def test_resample_empty_Dataframe(keys):
         expected.index.name = keys[0]
 
     tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_resample_size_all_index_same():
+    # GH 46826
+    df = DataFrame(
+        {"A": [1] * 3 + [2] * 3 + [1] * 3 + [2] * 3, "B": np.arange(12)},
+        index=date_range("31/12/2000 18:00", freq="H", periods=12),
+    )
+    result = df.groupby("A").resample("D").size()
+    expected = Series(
+        3,
+        index=pd.MultiIndex.from_tuples(
+            [
+                (1, Timestamp("2000-12-31")),
+                (1, Timestamp("2001-01-01")),
+                (2, Timestamp("2000-12-31")),
+                (2, Timestamp("2001-01-01")),
+            ],
+            names=["A", None],
+        ),
+    )
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py
index 604429e7c8d78..4143e52bbb7ed 100644
--- a/pandas/tests/scalar/timestamp/test_constructors.py
+++ b/pandas/tests/scalar/timestamp/test_constructors.py
@@ -415,12 +415,13 @@ def test_constructor_fromordinal(self):
                 nanosecond=1,
                 tz="UTC",
             ),
-            Timestamp(2000, 1, 2, 3, 4, 5, 6, 1, None),
-            Timestamp(2000, 1, 2, 3, 4, 5, 6, 1, pytz.UTC),
+            Timestamp(2000, 1, 2, 3, 4, 5, 6, None, nanosecond=1),
+            Timestamp(2000, 1, 2, 3, 4, 5, 6, tz=pytz.UTC, nanosecond=1),
         ],
     )
     def test_constructor_nanosecond(self, result):
         # GH 18898
+        # As of 2.0 (GH 49416), nanosecond should not be accepted positionally
         expected = Timestamp(datetime(2000, 1, 2, 3, 4, 5, 6), tz=result.tz)
         expected = expected + Timedelta(nanoseconds=1)
         assert result == expected
diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py
index de41ea9021453..70f9f7c924844 100644
--- a/pandas/tests/scalar/timestamp/test_timestamp.py
+++ b/pandas/tests/scalar/timestamp/test_timestamp.py
@@ -7,6 +7,7 @@
     timezone,
 )
 import locale
+import time
 import unicodedata
 
 from dateutil.tz import tzutc
@@ -1095,3 +1096,11 @@ def test_delimited_date():
         result = Timestamp("13-01-2000")
     expected = Timestamp(2000, 1, 13)
     assert result == expected
+
+
+def test_utctimetuple():
+    # GH 32174
+    ts = Timestamp("2000-01-01", tz="UTC")
+    result = ts.utctimetuple()
+    expected = time.struct_time((2000, 1, 1, 0, 0, 0, 5, 1, 0))
+    assert result == expected
diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
index 59afe22e40f7a..18ad275083022 100644
--- a/pandas/tests/series/methods/test_replace.py
+++ b/pandas/tests/series/methods/test_replace.py
@@ -16,7 +16,8 @@ def test_replace_explicit_none(self):
         expected = pd.Series([0, 0, None], dtype=object)
         tm.assert_series_equal(result, expected)
 
-        df = pd.DataFrame(np.zeros((3, 3)))
+        # Cast column 2 to object to avoid implicit cast when setting entry to ""
+        df = pd.DataFrame(np.zeros((3, 3))).astype({2: object})
         df.iloc[2, 2] = ""
         result = df.replace("", None)
         expected = pd.DataFrame(
diff --git a/pandas/tests/series/methods/test_to_numpy.py b/pandas/tests/series/methods/test_to_numpy.py
new file mode 100644
index 0000000000000..487489e8c0b0c
--- /dev/null
+++ b/pandas/tests/series/methods/test_to_numpy.py
@@ -0,0 +1,17 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    NA,
+    Series,
+)
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize("dtype", ["int64", "float64"])
+def test_to_numpy_na_value(dtype):
+    # GH#48951
+    ser = Series([1, 2, NA, 4])
+    result = ser.to_numpy(dtype=dtype, na_value=0)
+    expected = np.array([1, 2, 0, 4], dtype=dtype)
+    tm.assert_numpy_array_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_tz_localize.py b/pandas/tests/series/methods/test_tz_localize.py
index b71e7ed5500c3..6b096a7fcf3eb 100644
--- a/pandas/tests/series/methods/test_tz_localize.py
+++ b/pandas/tests/series/methods/test_tz_localize.py
@@ -64,6 +64,7 @@ def test_series_tz_localize_matching_index(self):
         "method, exp",
         [
             ["shift_forward", "2015-03-29 03:00:00"],
+            ["shift_backward", "2015-03-29 01:59:59.999999999"],
             ["NaT", NaT],
             ["raise", None],
             ["foo", "invalid"],
@@ -99,15 +100,6 @@ def test_tz_localize_nonexistent(self, warsaw, method, exp):
             with pytest.raises(ValueError, match=msg):
                 df.tz_localize(tz, nonexistent=method)
 
-        elif method == "shift_forward" and type(tz).__name__ == "ZoneInfo":
-            msg = "nonexistent shifting is not implemented with ZoneInfo tzinfos"
-            with pytest.raises(NotImplementedError, match=msg):
-                ser.tz_localize(tz, nonexistent=method)
-            with pytest.raises(NotImplementedError, match=msg):
-                df.tz_localize(tz, nonexistent=method)
-            with pytest.raises(NotImplementedError, match=msg):
-                dti.tz_localize(tz, nonexistent=method)
-
         else:
             result = ser.tz_localize(tz, nonexistent=method)
             expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz))
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index 389e32f7f193f..d6e862ed11d36 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -366,6 +366,37 @@ def test_to_datetime_with_non_exact(self, cache):
         )
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize(
+        "format, expected",
+        [
+            ("%Y-%m-%d", Timestamp(2000, 1, 3)),
+            ("%Y-%d-%m", Timestamp(2000, 3, 1)),
+            ("%Y-%m-%d %H", Timestamp(2000, 1, 3, 12)),
+            ("%Y-%d-%m %H", Timestamp(2000, 3, 1, 12)),
+            ("%Y-%m-%d %H:%M", Timestamp(2000, 1, 3, 12, 34)),
+            ("%Y-%d-%m %H:%M", Timestamp(2000, 3, 1, 12, 34)),
+            ("%Y-%m-%d %H:%M:%S", Timestamp(2000, 1, 3, 12, 34, 56)),
+            ("%Y-%d-%m %H:%M:%S", Timestamp(2000, 3, 1, 12, 34, 56)),
+            ("%Y-%m-%d %H:%M:%S.%f", Timestamp(2000, 1, 3, 12, 34, 56, 123456)),
+            ("%Y-%d-%m %H:%M:%S.%f", Timestamp(2000, 3, 1, 12, 34, 56, 123456)),
+            (
+                "%Y-%m-%d %H:%M:%S.%f%z",
+                Timestamp(2000, 1, 3, 12, 34, 56, 123456, tz="UTC+01:00"),
+            ),
+            (
+                "%Y-%d-%m %H:%M:%S.%f%z",
+                Timestamp(2000, 3, 1, 12, 34, 56, 123456, tz="UTC+01:00"),
+            ),
+        ],
+    )
+    def test_non_exact_doesnt_parse_whole_string(self, cache, format, expected):
+        # https://github.com/pandas-dev/pandas/issues/50412
+        # the formats alternate between ISO8601 and non-ISO8601 to check both paths
+        result = to_datetime(
+            "2000-01-03 12:34:56.123456+01:00", format=format, exact=False
+        )
+        assert result == expected
+
     @pytest.mark.parametrize(
         "arg",
         [
@@ -511,6 +542,29 @@ def test_to_datetime_parse_timezone_keeps_name(self):
 
 
 class TestToDatetime:
+    @pytest.mark.filterwarnings("ignore:Could not infer format")
+    def test_to_datetime_overflow(self):
+        # we should get an OutOfBoundsDatetime, NOT OverflowError
+        # TODO: Timestamp raises VaueError("could not convert string to Timestamp")
+        #  can we make these more consistent?
+        arg = "08335394550"
+        msg = 'Parsing "08335394550" to datetime overflows, at position 0'
+        with pytest.raises(OutOfBoundsDatetime, match=msg):
+            to_datetime(arg)
+
+        with pytest.raises(OutOfBoundsDatetime, match=msg):
+            to_datetime([arg])
+
+        res = to_datetime(arg, errors="coerce")
+        assert res is NaT
+        res = to_datetime([arg], errors="coerce")
+        tm.assert_index_equal(res, Index([NaT]))
+
+        res = to_datetime(arg, errors="ignore")
+        assert isinstance(res, str) and res == arg
+        res = to_datetime([arg], errors="ignore")
+        tm.assert_index_equal(res, Index([arg], dtype=object))
+
     def test_to_datetime_mixed_datetime_and_string(self):
         # GH#47018 adapted old doctest with new behavior
         d1 = datetime(2020, 1, 1, 17, tzinfo=timezone(-timedelta(hours=1)))
diff --git a/pandas/tests/tseries/offsets/test_business_hour.py b/pandas/tests/tseries/offsets/test_business_hour.py
index 79e7a5ff67010..319cc053d5d7d 100644
--- a/pandas/tests/tseries/offsets/test_business_hour.py
+++ b/pandas/tests/tseries/offsets/test_business_hour.py
@@ -241,6 +241,12 @@ def test_sub(self, dt, offset2, _offset):
 
         assert dt - offset2 == dt + _offset(-3)
 
+    def test_multiply_by_zero(self, dt, offset1, offset2):
+        assert dt - 0 * offset1 == dt
+        assert dt + 0 * offset1 == dt
+        assert dt - 0 * offset2 == dt
+        assert dt + 0 * offset2 == dt
+
     def testRollback1(
         self,
         dt,
@@ -972,6 +978,12 @@ def test_datetimeindex(self):
         for idx in [idx1, idx2, idx3]:
             tm.assert_index_equal(idx, expected)
 
+    def test_short_datetimeindex_creation(self):
+        # gh-49835
+        idx4 = date_range(start="2014-07-01 10:00", freq="BH", periods=1)
+        expected4 = DatetimeIndex(["2014-07-01 10:00"], freq="BH")
+        tm.assert_index_equal(idx4, expected4)
+
     def test_bday_ignores_timedeltas(self):
         idx = date_range("2010/02/01", "2010/02/10", freq="12H")
         t1 = idx + BDay(offset=Timedelta(3, unit="H"))
diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py
index 558c802fd70f6..a1a14674400c6 100644
--- a/pandas/tests/tslibs/test_parsing.py
+++ b/pandas/tests/tslibs/test_parsing.py
@@ -262,7 +262,8 @@ def test_guess_datetime_format_wrong_type_inputs(invalid_type_dt):
 def test_guess_datetime_format_no_padding(string, fmt, dayfirst, warning):
     # see gh-11142
     msg = (
-        f"Parsing dates in {fmt} format when dayfirst=False was specified. "
+        rf"Parsing dates in {fmt} format when dayfirst=False \(the default\) "
+        "was specified. "
         "Pass `dayfirst=True` or specify a format to silence this warning."
     )
     with tm.assert_produces_warning(warning, match=msg):
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
index bc723b8ed36b8..6180d4a5f8e17 100644
--- a/pandas/tests/window/test_api.py
+++ b/pandas/tests/window/test_api.py
@@ -1,7 +1,10 @@
 import numpy as np
 import pytest
 
-from pandas.errors import SpecificationError
+from pandas.errors import (
+    DataError,
+    SpecificationError,
+)
 
 from pandas import (
     DataFrame,
@@ -66,18 +69,12 @@ def tests_skip_nuisance(step):
     tm.assert_frame_equal(result, expected)
 
 
-def test_skip_sum_object_raises(step):
+def test_sum_object_str_raises(step):
     df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"})
     r = df.rolling(window=3, step=step)
-    msg = r"nuisance columns.*Dropped columns were Index\(\['C'\], dtype='object'\)"
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        # GH#42738
-        result = r.sum()
-    expected = DataFrame(
-        {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]},
-        columns=list("AB"),
-    )[::step]
-    tm.assert_frame_equal(result, expected)
+    with pytest.raises(DataError, match="Cannot aggregate non-numeric type: object"):
+        # GH#42738, enforced in 2.0
+        r.sum()
 
 
 def test_agg(step):
diff --git a/pandas/tests/window/test_dtypes.py b/pandas/tests/window/test_dtypes.py
index 52011a2d5f760..b975a28273337 100644
--- a/pandas/tests/window/test_dtypes.py
+++ b/pandas/tests/window/test_dtypes.py
@@ -165,7 +165,7 @@ def test_dataframe_dtypes(method, expected_data, dtypes, min_periods, step):
     rolled = df.rolling(2, min_periods=min_periods, step=step)
 
     if dtypes in ("m8[ns]", "M8[ns]", "datetime64[ns, UTC]") and method != "count":
-        msg = "No numeric types to aggregate"
+        msg = "Cannot aggregate non-numeric type"
         with pytest.raises(DataError, match=msg):
             getattr(rolled, method)()
     else:
diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py
index f88c20f2f78c6..205a02dcb051b 100644
--- a/pandas/tests/window/test_ewm.py
+++ b/pandas/tests/window/test_ewm.py
@@ -98,11 +98,9 @@ def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods):
     halflife = halflife_with_times
     data = np.arange(10.0)
     data[::2] = np.nan
-    df = DataFrame({"A": data, "time_col": date_range("2000", freq="D", periods=10)})
-    with tm.assert_produces_warning(FutureWarning, match="nuisance columns"):
-        # GH#42738
-        result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean()
-        expected = df.ewm(halflife=1.0, min_periods=min_periods).mean()
+    df = DataFrame({"A": data})
+    result = df.ewm(halflife=halflife, min_periods=min_periods, times=times).mean()
+    expected = df.ewm(halflife=1.0, min_periods=min_periods).mean()
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py
index 3da14bce6facd..41b2ee70d7987 100644
--- a/pandas/tests/window/test_groupby.py
+++ b/pandas/tests/window/test_groupby.py
@@ -1125,13 +1125,6 @@ def test_methods(self, method, expected_data):
         )
         tm.assert_frame_equal(result, expected)
 
-        with tm.assert_produces_warning(FutureWarning, match="nuisance"):
-            # GH#42738
-            expected = df.groupby("A", group_keys=True).apply(
-                lambda x: getattr(x.ewm(com=1.0), method)()
-            )
-        tm.assert_frame_equal(result, expected)
-
     @pytest.mark.parametrize(
         "method, expected_data",
         [["corr", [np.nan, 1.0, 1.0, 1]], ["cov", [np.nan, 0.5, 0.928571, 1.385714]]],
@@ -1160,13 +1153,9 @@ def test_pairwise_methods(self, method, expected_data):
     def test_times(self, times_frame):
         # GH 40951
         halflife = "23 days"
-        with tm.assert_produces_warning(FutureWarning, match="nuisance"):
-            # GH#42738
-            result = (
-                times_frame.groupby("A")
-                .ewm(halflife=halflife, times=times_frame["C"])
-                .mean()
-            )
+        # GH#42738
+        times = times_frame.pop("C")
+        result = times_frame.groupby("A").ewm(halflife=halflife, times=times).mean()
         expected = DataFrame(
             {
                 "B": [
@@ -1200,29 +1189,13 @@ def test_times(self, times_frame):
         )
         tm.assert_frame_equal(result, expected)
 
-    def test_times_vs_apply(self, times_frame):
-        # GH 40951
-        halflife = "23 days"
-        with tm.assert_produces_warning(FutureWarning, match="nuisance"):
-            # GH#42738
-            result = (
-                times_frame.groupby("A")
-                .ewm(halflife=halflife, times=times_frame["C"])
-                .mean()
-            )
-            expected = times_frame.groupby("A", group_keys=True).apply(
-                lambda x: x.ewm(halflife=halflife, times=x["C"]).mean()
-            )
-        tm.assert_frame_equal(result, expected)
-
     def test_times_array(self, times_frame):
         # GH 40951
         halflife = "23 days"
+        times = times_frame.pop("C")
         gb = times_frame.groupby("A")
-        with tm.assert_produces_warning(FutureWarning, match="nuisance"):
-            # GH#42738
-            result = gb.ewm(halflife=halflife, times=times_frame["C"]).mean()
-            expected = gb.ewm(halflife=halflife, times=times_frame["C"].values).mean()
+        result = gb.ewm(halflife=halflife, times=times).mean()
+        expected = gb.ewm(halflife=halflife, times=times.values).mean()
         tm.assert_frame_equal(result, expected)
 
     def test_dont_mutate_obj_after_slicing(self):
diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py
index 1c78a186e9d37..cca0ab3a0a9bb 100644
--- a/pandas/tests/window/test_numba.py
+++ b/pandas/tests/window/test_numba.py
@@ -253,22 +253,19 @@ def test_invalid_engine_kwargs(self, grouper, method):
     def test_cython_vs_numba(
         self, grouper, method, nogil, parallel, nopython, ignore_na, adjust
     ):
+        df = DataFrame({"B": range(4)})
         if grouper == "None":
             grouper = lambda x: x
-            warn = FutureWarning
         else:
+            df["A"] = ["a", "b", "a", "b"]
             grouper = lambda x: x.groupby("A")
-            warn = None
         if method == "sum":
             adjust = True
-        df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)})
         ewm = grouper(df).ewm(com=1.0, adjust=adjust, ignore_na=ignore_na)
 
         engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
-        with tm.assert_produces_warning(warn, match="nuisance"):
-            # GH#42738
-            result = getattr(ewm, method)(engine="numba", engine_kwargs=engine_kwargs)
-            expected = getattr(ewm, method)(engine="cython")
+        result = getattr(ewm, method)(engine="numba", engine_kwargs=engine_kwargs)
+        expected = getattr(ewm, method)(engine="cython")
 
         tm.assert_frame_equal(result, expected)
 
@@ -276,12 +273,12 @@ def test_cython_vs_numba(
     def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_na):
         # GH 40951
 
+        df = DataFrame({"B": [0, 0, 1, 1, 2, 2]})
         if grouper == "None":
             grouper = lambda x: x
-            warn = FutureWarning
         else:
             grouper = lambda x: x.groupby("A")
-            warn = None
+            df["A"] = ["a", "b", "a", "b", "b", "a"]
 
         halflife = "23 days"
         times = to_datetime(
@@ -294,17 +291,14 @@ def test_cython_vs_numba_times(self, grouper, nogil, parallel, nopython, ignore_
                 "2020-01-03",
             ]
         )
-        df = DataFrame({"A": ["a", "b", "a", "b", "b", "a"], "B": [0, 0, 1, 1, 2, 2]})
         ewm = grouper(df).ewm(
             halflife=halflife, adjust=True, ignore_na=ignore_na, times=times
         )
 
         engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
 
-        with tm.assert_produces_warning(warn, match="nuisance"):
-            # GH#42738
-            result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
-            expected = ewm.mean(engine="cython")
+        result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
+        expected = ewm.mean(engine="cython")
 
         tm.assert_frame_equal(result, expected)
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 4f2a80d932fd0..975783a83d1f6 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -40,7 +40,7 @@ python-snappy
 pyxlsb
 s3fs>=2021.08.0
 scipy
-sqlalchemy
+sqlalchemy<1.4.46
 tabulate
 tzdata>=2022.1
 xarray
@@ -65,7 +65,6 @@ gitpython
 gitdb
 natsort
 numpydoc
-pandas-dev-flaker==0.5.0
 pydata-sphinx-theme<0.11
 pytest-cython
 sphinx
diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py
new file mode 100644
index 0000000000000..3c21821e794a9
--- /dev/null
+++ b/scripts/check_for_inconsistent_pandas_namespace.py
@@ -0,0 +1,142 @@
+"""
+Check that test suite file doesn't use the pandas namespace inconsistently.
+
+We check for cases of ``Series`` and ``pd.Series`` appearing in the same file
+(likewise for other pandas objects).
+
+This is meant to be run as a pre-commit hook - to run it manually, you can do:
+
+    pre-commit run inconsistent-namespace-usage --all-files
+
+To automatically fixup a given file, you can pass `--replace`, e.g.
+
+    python scripts/check_for_inconsistent_pandas_namespace.py test_me.py --replace
+
+though note that you may need to manually fixup some imports and that you will also
+need the additional dependency `tokenize-rt` (which is left out from the pre-commit
+hook so that it uses the same virtualenv as the other local ones).
+
+The general structure is similar to that of some plugins from
+https://github.com/asottile/pyupgrade .
+"""
+
+import argparse
+import ast
+import sys
+from typing import (
+    MutableMapping,
+    NamedTuple,
+    Optional,
+    Sequence,
+    Set,
+)
+
+ERROR_MESSAGE = (
+    "{path}:{lineno}:{col_offset}: "
+    "Found both '{prefix}.{name}' and '{name}' in {path}"
+)
+
+
+class OffsetWithNamespace(NamedTuple):
+    lineno: int
+    col_offset: int
+    namespace: str
+
+
+class Visitor(ast.NodeVisitor):
+    def __init__(self) -> None:
+        self.pandas_namespace: MutableMapping[OffsetWithNamespace, str] = {}
+        self.imported_from_pandas: Set[str] = set()
+
+    def visit_Attribute(self, node: ast.Attribute) -> None:
+        if isinstance(node.value, ast.Name) and node.value.id in {"pandas", "pd"}:
+            offset_with_namespace = OffsetWithNamespace(
+                node.lineno, node.col_offset, node.value.id
+            )
+            self.pandas_namespace[offset_with_namespace] = node.attr
+        self.generic_visit(node)
+
+    def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
+        if node.module is not None and "pandas" in node.module:
+            self.imported_from_pandas.update(name.name for name in node.names)
+        self.generic_visit(node)
+
+
+def replace_inconsistent_pandas_namespace(visitor: Visitor, content: str) -> str:
+    from tokenize_rt import (
+        reversed_enumerate,
+        src_to_tokens,
+        tokens_to_src,
+    )
+
+    tokens = src_to_tokens(content)
+    for n, i in reversed_enumerate(tokens):
+        offset_with_namespace = OffsetWithNamespace(i.offset[0], i.offset[1], i.src)
+        if (
+            offset_with_namespace in visitor.pandas_namespace
+            and visitor.pandas_namespace[offset_with_namespace]
+            in visitor.imported_from_pandas
+        ):
+            # Replace `pd`
+            tokens[n] = i._replace(src="")
+            # Replace `.`
+            tokens[n + 1] = tokens[n + 1]._replace(src="")
+
+    new_src: str = tokens_to_src(tokens)
+    return new_src
+
+
+def check_for_inconsistent_pandas_namespace(
+    content: str, path: str, *, replace: bool
+) -> Optional[str]:
+    tree = ast.parse(content)
+
+    visitor = Visitor()
+    visitor.visit(tree)
+
+    inconsistencies = visitor.imported_from_pandas.intersection(
+        visitor.pandas_namespace.values()
+    )
+
+    if not inconsistencies:
+        # No inconsistent namespace usage, nothing to replace.
+        return None
+
+    if not replace:
+        inconsistency = inconsistencies.pop()
+        lineno, col_offset, prefix = next(
+            key for key, val in visitor.pandas_namespace.items() if val == inconsistency
+        )
+        msg = ERROR_MESSAGE.format(
+            lineno=lineno,
+            col_offset=col_offset,
+            prefix=prefix,
+            name=inconsistency,
+            path=path,
+        )
+        sys.stdout.write(msg)
+        sys.exit(1)
+
+    return replace_inconsistent_pandas_namespace(visitor, content)
+
+
+def main(argv: Optional[Sequence[str]] = None) -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("paths", nargs="*")
+    parser.add_argument("--replace", action="store_true")
+    args = parser.parse_args(argv)
+
+    for path in args.paths:
+        with open(path, encoding="utf-8") as fd:
+            content = fd.read()
+        new_content = check_for_inconsistent_pandas_namespace(
+            content, path, replace=args.replace
+        )
+        if not args.replace or new_content is None:
+            continue
+        with open(path, "w", encoding="utf-8") as fd:
+            fd.write(new_content)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/sync_flake8_versions.py b/scripts/sync_flake8_versions.py
index 8852634c5d796..0d513d5937dbe 100644
--- a/scripts/sync_flake8_versions.py
+++ b/scripts/sync_flake8_versions.py
@@ -1,5 +1,5 @@
 """
-Check that the flake8 (and pandas-dev-flaker) pins are the same in:
+Check that the flake8 pins are the same in:
 
 - environment.yml
 - .pre-commit-config.yaml, in the flake8 hook
@@ -103,17 +103,13 @@ def get_revisions(
     precommit_config: YamlMapping, environment: YamlMapping
 ) -> tuple[Revisions, Revisions]:
     flake8_revisions = Revisions(name="flake8")
-    pandas_dev_flaker_revisions = Revisions(name="pandas-dev-flaker")
 
     repos = precommit_config["repos"]
     flake8_repo, flake8_hook = _get_repo_hook(repos, "flake8")
     flake8_revisions.pre_commit = Revision("flake8", "==", flake8_repo["rev"])
     flake8_additional_dependencies = []
     for dep in _process_dependencies(flake8_hook.get("additional_dependencies", [])):
-        if dep.name == "pandas-dev-flaker":
-            pandas_dev_flaker_revisions.pre_commit = dep
-        else:
-            flake8_additional_dependencies.append(dep)
+        flake8_additional_dependencies.append(dep)
 
     environment_dependencies = environment["dependencies"]
     environment_additional_dependencies = []
@@ -121,8 +117,6 @@ def get_revisions(
         if dep.name == "flake8":
             flake8_revisions.environment = dep
             environment_additional_dependencies.append(dep)
-        elif dep.name == "pandas-dev-flaker":
-            pandas_dev_flaker_revisions.environment = dep
         else:
             environment_additional_dependencies.append(dep)
 
@@ -131,8 +125,7 @@ def get_revisions(
         environment_additional_dependencies,
     )
 
-    for revisions in flake8_revisions, pandas_dev_flaker_revisions:
-        _validate_revisions(revisions)
+    _validate_revisions(flake8_revisions)
 
 
 if __name__ == "__main__":
diff --git a/scripts/tests/test_inconsistent_namespace_check.py b/scripts/tests/test_inconsistent_namespace_check.py
new file mode 100644
index 0000000000000..eb995158d8cb4
--- /dev/null
+++ b/scripts/tests/test_inconsistent_namespace_check.py
@@ -0,0 +1,61 @@
+import pytest
+
+from ..check_for_inconsistent_pandas_namespace import (
+    check_for_inconsistent_pandas_namespace,
+)
+
+BAD_FILE_0 = (
+    "from pandas import Categorical\n"
+    "cat_0 = Categorical()\n"
+    "cat_1 = pd.Categorical()"
+)
+BAD_FILE_1 = (
+    "from pandas import Categorical\n"
+    "cat_0 = pd.Categorical()\n"
+    "cat_1 = Categorical()"
+)
+BAD_FILE_2 = (
+    "from pandas import Categorical\n"
+    "cat_0 = pandas.Categorical()\n"
+    "cat_1 = Categorical()"
+)
+GOOD_FILE_0 = (
+    "from pandas import Categorical\ncat_0 = Categorical()\ncat_1 = Categorical()"
+)
+GOOD_FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = pd.Categorical()"
+GOOD_FILE_2 = "from array import array\nimport pandas as pd\narr = pd.array([])"
+PATH = "t.py"
+
+
+@pytest.mark.parametrize(
+    "content, expected",
+    [
+        (BAD_FILE_0, "t.py:3:8: Found both 'pd.Categorical' and 'Categorical' in t.py"),
+        (BAD_FILE_1, "t.py:2:8: Found both 'pd.Categorical' and 'Categorical' in t.py"),
+        (
+            BAD_FILE_2,
+            "t.py:2:8: Found both 'pandas.Categorical' and 'Categorical' in t.py",
+        ),
+    ],
+)
+def test_inconsistent_usage(content, expected, capsys):
+    with pytest.raises(SystemExit):
+        check_for_inconsistent_pandas_namespace(content, PATH, replace=False)
+    result, _ = capsys.readouterr()
+    assert result == expected
+
+
+@pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1, GOOD_FILE_2])
+@pytest.mark.parametrize("replace", [True, False])
+def test_consistent_usage(content, replace):
+    # should not raise
+    check_for_inconsistent_pandas_namespace(content, PATH, replace=replace)
+
+
+@pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1, BAD_FILE_2])
+def test_inconsistent_usage_with_replace(content):
+    result = check_for_inconsistent_pandas_namespace(content, PATH, replace=True)
+    expected = (
+        "from pandas import Categorical\ncat_0 = Categorical()\ncat_1 = Categorical()"
+    )
+    assert result == expected
diff --git a/scripts/tests/test_sync_flake8_versions.py b/scripts/tests/test_sync_flake8_versions.py
index 743ece34e0b56..2349a4f5d8d1c 100644
--- a/scripts/tests/test_sync_flake8_versions.py
+++ b/scripts/tests/test_sync_flake8_versions.py
@@ -87,7 +87,6 @@ def test_get_revisions_no_failure(capsys):
                     {
                         "id": "flake8",
                         "additional_dependencies": [
-                            "pandas-dev-flaker==0.4.0",
                             "flake8-bugs==1.1.1",
                         ],
                     }
@@ -101,7 +100,6 @@ def test_get_revisions_no_failure(capsys):
                         "id": "yesqa",
                         "additional_dependencies": [
                             "flake8==0.1.1",
-                            "pandas-dev-flaker==0.4.0",
                             "flake8-bugs==1.1.1",
                         ],
                     }
@@ -116,7 +114,6 @@ def test_get_revisions_no_failure(capsys):
             {
                 "pip": [
                     "git+https://github.com/pydata/pydata-sphinx-theme.git@master",
-                    "pandas-dev-flaker==0.4.0",
                 ]
             },
         ]
diff --git a/scripts/tests/test_validate_unwanted_patterns.py b/scripts/tests/test_validate_unwanted_patterns.py
new file mode 100644
index 0000000000000..ef93fd1d21981
--- /dev/null
+++ b/scripts/tests/test_validate_unwanted_patterns.py
@@ -0,0 +1,419 @@
+import io
+
+import pytest
+
+from .. import validate_unwanted_patterns
+
+
+class TestBarePytestRaises:
+    @pytest.mark.parametrize(
+        "data",
+        [
+            (
+                """
+    with pytest.raises(ValueError, match="foo"):
+        pass
+    """
+            ),
+            (
+                """
+    # with pytest.raises(ValueError, match="foo"):
+    #    pass
+    """
+            ),
+            (
+                """
+    # with pytest.raises(ValueError):
+    #    pass
+    """
+            ),
+            (
+                """
+    with pytest.raises(
+        ValueError,
+        match="foo"
+    ):
+        pass
+    """
+            ),
+        ],
+    )
+    def test_pytest_raises(self, data):
+        fd = io.StringIO(data.strip())
+        result = list(validate_unwanted_patterns.bare_pytest_raises(fd))
+        assert result == []
+
+    @pytest.mark.parametrize(
+        "data, expected",
+        [
+            (
+                (
+                    """
+    with pytest.raises(ValueError):
+        pass
+    """
+                ),
+                [
+                    (
+                        1,
+                        (
+                            "Bare pytests raise have been found. "
+                            "Please pass in the argument 'match' "
+                            "as well the exception."
+                        ),
+                    ),
+                ],
+            ),
+            (
+                (
+                    """
+    with pytest.raises(ValueError, match="foo"):
+        with pytest.raises(ValueError):
+            pass
+        pass
+    """
+                ),
+                [
+                    (
+                        2,
+                        (
+                            "Bare pytests raise have been found. "
+                            "Please pass in the argument 'match' "
+                            "as well the exception."
+                        ),
+                    ),
+                ],
+            ),
+            (
+                (
+                    """
+    with pytest.raises(ValueError):
+        with pytest.raises(ValueError, match="foo"):
+            pass
+        pass
+    """
+                ),
+                [
+                    (
+                        1,
+                        (
+                            "Bare pytests raise have been found. "
+                            "Please pass in the argument 'match' "
+                            "as well the exception."
+                        ),
+                    ),
+                ],
+            ),
+            (
+                (
+                    """
+    with pytest.raises(
+        ValueError
+    ):
+        pass
+    """
+                ),
+                [
+                    (
+                        1,
+                        (
+                            "Bare pytests raise have been found. "
+                            "Please pass in the argument 'match' "
+                            "as well the exception."
+                        ),
+                    ),
+                ],
+            ),
+            (
+                (
+                    """
+    with pytest.raises(
+        ValueError,
+        # match = "foo"
+    ):
+        pass
+    """
+                ),
+                [
+                    (
+                        1,
+                        (
+                            "Bare pytests raise have been found. "
+                            "Please pass in the argument 'match' "
+                            "as well the exception."
+                        ),
+                    ),
+                ],
+            ),
+        ],
+    )
+    def test_pytest_raises_raises(self, data, expected):
+        fd = io.StringIO(data.strip())
+        result = list(validate_unwanted_patterns.bare_pytest_raises(fd))
+        assert result == expected
+
+
+@pytest.mark.parametrize(
+    "data, expected",
+    [
+        (
+            'msg = ("bar " "baz")',
+            [
+                (
+                    1,
+                    (
+                        "String unnecessarily split in two by black. "
+                        "Please merge them manually."
+                    ),
+                )
+            ],
+        ),
+        (
+            'msg = ("foo " "bar " "baz")',
+            [
+                (
+                    1,
+                    (
+                        "String unnecessarily split in two by black. "
+                        "Please merge them manually."
+                    ),
+                ),
+                (
+                    1,
+                    (
+                        "String unnecessarily split in two by black. "
+                        "Please merge them manually."
+                    ),
+                ),
+            ],
+        ),
+    ],
+)
+def test_strings_to_concatenate(data, expected):
+    fd = io.StringIO(data.strip())
+    result = list(validate_unwanted_patterns.strings_to_concatenate(fd))
+    assert result == expected
+
+
+class TestStringsWithWrongPlacedWhitespace:
+    @pytest.mark.parametrize(
+        "data",
+        [
+            (
+                """
+    msg = (
+        "foo\n"
+        " bar"
+    )
+    """
+            ),
+            (
+                """
+    msg = (
+        "foo"
+        "  bar"
+        "baz"
+    )
+    """
+            ),
+            (
+                """
+    msg = (
+        f"foo"
+        "  bar"
+    )
+    """
+            ),
+            (
+                """
+    msg = (
+        "foo"
+        f"  bar"
+    )
+    """
+            ),
+            (
+                """
+    msg = (
+        "foo"
+        rf"  bar"
+    )
+    """
+            ),
+        ],
+    )
+    def test_strings_with_wrong_placed_whitespace(self, data):
+        fd = io.StringIO(data.strip())
+        result = list(
+            validate_unwanted_patterns.strings_with_wrong_placed_whitespace(fd)
+        )
+        assert result == []
+
+    @pytest.mark.parametrize(
+        "data, expected",
+        [
+            (
+                (
+                    """
+    msg = (
+        "foo"
+        " bar"
+    )
+    """
+                ),
+                [
+                    (
+                        3,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    )
+                ],
+            ),
+            (
+                (
+                    """
+    msg = (
+        f"foo"
+        " bar"
+    )
+    """
+                ),
+                [
+                    (
+                        3,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    )
+                ],
+            ),
+            (
+                (
+                    """
+    msg = (
+        "foo"
+        f" bar"
+    )
+    """
+                ),
+                [
+                    (
+                        3,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    )
+                ],
+            ),
+            (
+                (
+                    """
+    msg = (
+        f"foo"
+        f" bar"
+    )
+    """
+                ),
+                [
+                    (
+                        3,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    )
+                ],
+            ),
+            (
+                (
+                    """
+    msg = (
+        "foo"
+        rf" bar"
+        " baz"
+    )
+    """
+                ),
+                [
+                    (
+                        3,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    ),
+                    (
+                        4,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    ),
+                ],
+            ),
+            (
+                (
+                    """
+    msg = (
+        "foo"
+        " bar"
+        rf" baz"
+    )
+    """
+                ),
+                [
+                    (
+                        3,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    ),
+                    (
+                        4,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    ),
+                ],
+            ),
+            (
+                (
+                    """
+    msg = (
+        "foo"
+        rf" bar"
+        rf" baz"
+    )
+    """
+                ),
+                [
+                    (
+                        3,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    ),
+                    (
+                        4,
+                        (
+                            "String has a space at the beginning instead "
+                            "of the end of the previous string."
+                        ),
+                    ),
+                ],
+            ),
+        ],
+    )
+    def test_strings_with_wrong_placed_whitespace_raises(self, data, expected):
+        fd = io.StringIO(data.strip())
+        result = list(
+            validate_unwanted_patterns.strings_with_wrong_placed_whitespace(fd)
+        )
+        assert result == expected
diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py
new file mode 100755
index 0000000000000..5bc2a915a1c0e
--- /dev/null
+++ b/scripts/validate_unwanted_patterns.py
@@ -0,0 +1,488 @@
+#!/usr/bin/env python3
+"""
+Unwanted patterns test cases.
+
+The reason this file exist despite the fact we already have
+`ci/code_checks.sh`,
+(see https://github.com/pandas-dev/pandas/blob/master/ci/code_checks.sh)
+
+is that some of the test cases are more complex/impossible to validate via regex.
+So this file is somewhat an extensions to `ci/code_checks.sh`
+"""
+
+import argparse
+import ast
+import sys
+import token
+import tokenize
+from typing import (
+    IO,
+    Callable,
+    Iterable,
+    List,
+    Set,
+    Tuple,
+)
+
+PRIVATE_IMPORTS_TO_IGNORE: Set[str] = {
+    "_extension_array_shared_docs",
+    "_index_shared_docs",
+    "_interval_shared_docs",
+    "_merge_doc",
+    "_shared_docs",
+    "_apply_docs",
+    "_new_Index",
+    "_new_PeriodIndex",
+    "_doc_template",
+    "_agg_template",
+    "_pipe_template",
+    "__main__",
+    "_transform_template",
+    "_flex_comp_doc_FRAME",
+    "_op_descriptions",
+    "_IntegerDtype",
+    "_use_inf_as_na",
+    "_get_plot_backend",
+    "_matplotlib",
+    "_arrow_utils",
+    "_registry",
+    "_get_offset",  # TODO: remove after get_offset deprecation enforced
+    "_test_parse_iso8601",
+    "_json_normalize",  # TODO: remove after deprecation is enforced
+    "_testing",
+    "_test_decorators",
+    "__version__",  # check np.__version__ in compat.numpy.function
+    "_arrow_dtype_mapping",
+}
+
+
+def _get_literal_string_prefix_len(token_string: str) -> int:
+    """
+    Getting the length of the literal string prefix.
+
+    Parameters
+    ----------
+    token_string : str
+        String to check.
+
+    Returns
+    -------
+    int
+        Length of the literal string prefix.
+
+    Examples
+    --------
+    >>> example_string = "'Hello world'"
+    >>> _get_literal_string_prefix_len(example_string)
+    0
+    >>> example_string = "r'Hello world'"
+    >>> _get_literal_string_prefix_len(example_string)
+    1
+    """
+    try:
+        return min(
+            token_string.find(quote)
+            for quote in (r"'", r'"')
+            if token_string.find(quote) >= 0
+        )
+    except ValueError:
+        return 0
+
+
+def bare_pytest_raises(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
+    """
+    Test Case for bare pytest raises.
+
+    For example, this is wrong:
+
+    >>> with pytest.raise(ValueError):
+    ...     # Some code that raises ValueError
+
+    And this is what we want instead:
+
+    >>> with pytest.raise(ValueError, match="foo"):
+    ...     # Some code that raises ValueError
+
+    Parameters
+    ----------
+    file_obj : IO
+        File-like object containing the Python code to validate.
+
+    Yields
+    ------
+    line_number : int
+        Line number of unconcatenated string.
+    msg : str
+        Explanation of the error.
+
+    Notes
+    -----
+    GH #23922
+    """
+    contents = file_obj.read()
+    tree = ast.parse(contents)
+
+    for node in ast.walk(tree):
+        if not isinstance(node, ast.Call):
+            continue
+
+        try:
+            if not (node.func.value.id == "pytest" and node.func.attr == "raises"):
+                continue
+        except AttributeError:
+            continue
+
+        if not node.keywords:
+            yield (
+                node.lineno,
+                "Bare pytests raise have been found. "
+                "Please pass in the argument 'match' as well the exception.",
+            )
+        else:
+            # Means that there are arguments that are being passed in,
+            # now we validate that `match` is one of the passed in arguments
+            if not any(keyword.arg == "match" for keyword in node.keywords):
+                yield (
+                    node.lineno,
+                    "Bare pytests raise have been found. "
+                    "Please pass in the argument 'match' as well the exception.",
+                )
+
+
+PRIVATE_FUNCTIONS_ALLOWED = {"sys._getframe"}  # no known alternative
+
+
+def private_function_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
+    """
+    Checking that a private function is not used across modules.
+    Parameters
+    ----------
+    file_obj : IO
+        File-like object containing the Python code to validate.
+    Yields
+    ------
+    line_number : int
+        Line number of the private function that is used across modules.
+    msg : str
+        Explanation of the error.
+    """
+    contents = file_obj.read()
+    tree = ast.parse(contents)
+
+    imported_modules: Set[str] = set()
+
+    for node in ast.walk(tree):
+        if isinstance(node, (ast.Import, ast.ImportFrom)):
+            for module in node.names:
+                module_fqdn = module.name if module.asname is None else module.asname
+                imported_modules.add(module_fqdn)
+
+        if not isinstance(node, ast.Call):
+            continue
+
+        try:
+            module_name = node.func.value.id
+            function_name = node.func.attr
+        except AttributeError:
+            continue
+
+        # Exception section #
+
+        # (Debatable) Class case
+        if module_name[0].isupper():
+            continue
+        # (Debatable) Dunder methods case
+        elif function_name.startswith("__") and function_name.endswith("__"):
+            continue
+        elif module_name + "." + function_name in PRIVATE_FUNCTIONS_ALLOWED:
+            continue
+
+        if module_name in imported_modules and function_name.startswith("_"):
+            yield (node.lineno, f"Private function '{module_name}.{function_name}'")
+
+
+def private_import_across_module(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
+    """
+    Checking that a private function is not imported across modules.
+    Parameters
+    ----------
+    file_obj : IO
+        File-like object containing the Python code to validate.
+    Yields
+    ------
+    line_number : int
+        Line number of import statement, that imports the private function.
+    msg : str
+        Explanation of the error.
+    """
+    contents = file_obj.read()
+    tree = ast.parse(contents)
+
+    for node in ast.walk(tree):
+        if not isinstance(node, (ast.Import, ast.ImportFrom)):
+            continue
+
+        for module in node.names:
+            module_name = module.name.split(".")[-1]
+            if module_name in PRIVATE_IMPORTS_TO_IGNORE:
+                continue
+
+            if module_name.startswith("_"):
+                yield (node.lineno, f"Import of internal function {repr(module_name)}")
+
+
+def strings_to_concatenate(file_obj: IO[str]) -> Iterable[Tuple[int, str]]:
+    """
+    This test case is necessary after 'Black' (https://github.com/psf/black),
+    is formatting strings over multiple lines.
+
+    For example, when this:
+
+    >>> foo = (
+    ...     "bar "
+    ...     "baz"
+    ... )
+
+    Is becoming this:
+
+    >>> foo = ("bar " "baz")
+
+    'Black' is not considering this as an
+    issue (see https://github.com/psf/black/issues/1051),
+    so we are checking it here instead.
+
+    Parameters
+    ----------
+    file_obj : IO
+        File-like object containing the Python code to validate.
+
+    Yields
+    ------
+    line_number : int
+        Line number of unconcatenated string.
+    msg : str
+        Explanation of the error.
+
+    Notes
+    -----
+    GH #30454
+    """
+    tokens: List = list(tokenize.generate_tokens(file_obj.readline))
+
+    for current_token, next_token in zip(tokens, tokens[1:]):
+        if current_token.type == next_token.type == token.STRING:
+            yield (
+                current_token.start[0],
+                (
+                    "String unnecessarily split in two by black. "
+                    "Please merge them manually."
+                ),
+            )
+
+
+def strings_with_wrong_placed_whitespace(
+    file_obj: IO[str],
+) -> Iterable[Tuple[int, str]]:
+    """
+    Test case for leading spaces in concated strings.
+
+    For example:
+
+    >>> rule = (
+    ...    "We want the space at the end of the line, "
+    ...    "not at the beginning"
+    ... )
+
+    Instead of:
+
+    >>> rule = (
+    ...    "We want the space at the end of the line,"
+    ...    " not at the beginning"
+    ... )
+
+    Parameters
+    ----------
+    file_obj : IO
+        File-like object containing the Python code to validate.
+
+    Yields
+    ------
+    line_number : int
+        Line number of unconcatenated string.
+    msg : str
+        Explanation of the error.
+    """
+
+    def has_wrong_whitespace(first_line: str, second_line: str) -> bool:
+        """
+        Checking if the two lines are mattching the unwanted pattern.
+
+        Parameters
+        ----------
+        first_line : str
+            First line to check.
+        second_line : str
+            Second line to check.
+
+        Returns
+        -------
+        bool
+            True if the two received string match, an unwanted pattern.
+
+        Notes
+        -----
+        The unwanted pattern that we are trying to catch is if the spaces in
+        a string that is concatenated over multiple lines are placed at the
+        end of each string, unless this string is ending with a
+        newline character (\n).
+
+        For example, this is bad:
+
+        >>> rule = (
+        ...    "We want the space at the end of the line,"
+        ...    " not at the beginning"
+        ... )
+
+        And what we want is:
+
+        >>> rule = (
+        ...    "We want the space at the end of the line, "
+        ...    "not at the beginning"
+        ... )
+
+        And if the string is ending with a new line character (\n) we
+        do not want any trailing whitespaces after it.
+
+        For example, this is bad:
+
+        >>> rule = (
+        ...    "We want the space at the begging of "
+        ...    "the line if the previous line is ending with a \n "
+        ...    "not at the end, like always"
+        ... )
+
+        And what we do want is:
+
+        >>> rule = (
+        ...    "We want the space at the begging of "
+        ...    "the line if the previous line is ending with a \n"
+        ...    " not at the end, like always"
+        ... )
+        """
+        if first_line.endswith(r"\n"):
+            return False
+        elif first_line.startswith("  ") or second_line.startswith("  "):
+            return False
+        elif first_line.endswith("  ") or second_line.endswith("  "):
+            return False
+        elif (not first_line.endswith(" ")) and second_line.startswith(" "):
+            return True
+        return False
+
+    tokens: List = list(tokenize.generate_tokens(file_obj.readline))
+
+    for first_token, second_token, third_token in zip(tokens, tokens[1:], tokens[2:]):
+        # Checking if we are in a block of concated string
+        if (
+            first_token.type == third_token.type == token.STRING
+            and second_token.type == token.NL
+        ):
+            # Striping the quotes, with the string literal prefix
+            first_string: str = first_token.string[
+                _get_literal_string_prefix_len(first_token.string) + 1 : -1
+            ]
+            second_string: str = third_token.string[
+                _get_literal_string_prefix_len(third_token.string) + 1 : -1
+            ]
+
+            if has_wrong_whitespace(first_string, second_string):
+                yield (
+                    third_token.start[0],
+                    (
+                        "String has a space at the beginning instead "
+                        "of the end of the previous string."
+                    ),
+                )
+
+
+def main(
+    function: Callable[[IO[str]], Iterable[Tuple[int, str]]],
+    source_path: str,
+    output_format: str,
+) -> bool:
+    """
+    Main entry point of the script.
+
+    Parameters
+    ----------
+    function : Callable
+        Function to execute for the specified validation type.
+    source_path : str
+        Source path representing path to a file/directory.
+    output_format : str
+        Output format of the error message.
+    file_extensions_to_check : str
+        Comma separated values of what file extensions to check.
+    excluded_file_paths : str
+        Comma separated values of what file paths to exclude during the check.
+
+    Returns
+    -------
+    bool
+        True if found any patterns are found related to the given function.
+
+    Raises
+    ------
+    ValueError
+        If the `source_path` is not pointing to existing file/directory.
+    """
+    is_failed: bool = False
+
+    for file_path in source_path:
+        with open(file_path, encoding="utf-8") as file_obj:
+            for line_number, msg in function(file_obj):
+                is_failed = True
+                print(
+                    output_format.format(
+                        source_path=file_path, line_number=line_number, msg=msg
+                    )
+                )
+
+    return is_failed
+
+
+if __name__ == "__main__":
+    available_validation_types: List[str] = [
+        "bare_pytest_raises",
+        "private_function_across_module",
+        "private_import_across_module",
+        "strings_to_concatenate",
+        "strings_with_wrong_placed_whitespace",
+    ]
+
+    parser = argparse.ArgumentParser(description="Unwanted patterns checker.")
+
+    parser.add_argument("paths", nargs="*", help="Source paths of files to check.")
+    parser.add_argument(
+        "--format",
+        "-f",
+        default="{source_path}:{line_number}:{msg}",
+        help="Output format of the error message.",
+    )
+    parser.add_argument(
+        "--validation-type",
+        "-vt",
+        choices=available_validation_types,
+        required=True,
+        help="Validation test case to check.",
+    )
+
+    args = parser.parse_args()
+
+    sys.exit(
+        main(
+            function=globals().get(args.validation_type),
+            source_path=args.paths,
+            output_format=args.format,
+        )
+    )
diff --git a/setup.cfg b/setup.cfg
index ef84dd7f9ce85..562ae70fd73ef 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -35,10 +35,6 @@ ignore =
     B023
     # Functions defined inside a loop must not use variables redefined in the loop
     B301,
-    # single-letter variables
-    PDF023,
-    # "use 'pandas._testing' instead" in non-test code
-    PDF025,
     # If test must be a simple comparison against sys.platform or sys.version_info
     Y002,
     # Use "_typeshed.Self" instead of class-bound TypeVar
@@ -59,18 +55,6 @@ exclude =
     versioneer.py,
     # exclude asv benchmark environments from linting
     env
-per-file-ignores =
-    # private import across modules
-    pandas/tests/*:PDF020
-    # pytest.raises without match=
-    pandas/tests/extension/*:PDF009
-    # os.remove
-    doc/make.py:PDF008
-    # import from pandas._testing
-    pandas/testing.py:PDF014
-    # can't use fixtures in asv
-    asv_bench/*:PDF016
-
 
 [flake8-rst]
 max-line-length = 84

From a4343408aa27a53120ea111fc2d53f9a800de92d Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Sat, 7 Jan 2023 22:56:11 +0100
Subject: [PATCH 7/7] Fix test

---
 pandas/tests/frame/methods/test_align.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py
index 89da9017e43af..30b7ed963e792 100644
--- a/pandas/tests/frame/methods/test_align.py
+++ b/pandas/tests/frame/methods/test_align.py
@@ -11,6 +11,7 @@
     date_range,
 )
 import pandas._testing as tm
+from pandas.core.internals.managers import using_copy_on_write
 
 
 class TestDataFrameAlign:
@@ -40,12 +41,12 @@ def test_frame_align_aware(self):
         assert new1.index.tz is timezone.utc
         assert new2.index.tz is timezone.utc
 
-    def test_align_float(self, float_frame, using_copy_on_write, using_array_manager):
+    def test_align_float(self, float_frame):
         af, bf = float_frame.align(float_frame)
         assert af._mgr is not float_frame._mgr
 
         af, bf = float_frame.align(float_frame, copy=False)
-        if using_copy_on_write or using_array_manager:
+        if using_copy_on_write():
             assert af._mgr is not float_frame._mgr
         else:
             assert af._mgr is float_frame._mgr