diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
index bcdd9c6fa5717..65167e6467fd5 100644
--- a/asv_bench/benchmarks/frame_methods.py
+++ b/asv_bench/benchmarks/frame_methods.py
@@ -11,6 +11,7 @@
     date_range,
     isnull,
     period_range,
+    timedelta_range,
 )
 
 from .pandas_vb_common import tm
@@ -355,15 +356,42 @@ def time_isnull_obj(self):
 
 class Fillna:
 
-    params = ([True, False], ["pad", "bfill"])
-    param_names = ["inplace", "method"]
-
-    def setup(self, inplace, method):
-        values = np.random.randn(10000, 100)
-        values[::2] = np.nan
-        self.df = DataFrame(values)
-
-    def time_frame_fillna(self, inplace, method):
+    params = (
+        [True, False],
+        ["pad", "bfill"],
+        [
+            "float64",
+            "float32",
+            "object",
+            "Int64",
+            "Float64",
+            "datetime64[ns]",
+            "datetime64[ns, tz]",
+            "timedelta64[ns]",
+        ],
+    )
+    param_names = ["inplace", "method", "dtype"]
+
+    def setup(self, inplace, method, dtype):
+        N, M = 10000, 100
+        if dtype in ("datetime64[ns]", "datetime64[ns, tz]", "timedelta64[ns]"):
+            data = {
+                "datetime64[ns]": date_range("2011-01-01", freq="H", periods=N),
+                "datetime64[ns, tz]": date_range(
+                    "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
+                ),
+                "timedelta64[ns]": timedelta_range(start="1 day", periods=N, freq="1D"),
+            }
+            self.df = DataFrame({f"col_{i}": data[dtype] for i in range(M)})
+            self.df[::2] = None
+        else:
+            values = np.random.randn(N, M)
+            values[::2] = np.nan
+            if dtype == "Int64":
+                values = values.round()
+            self.df = DataFrame(values, dtype=dtype)
+
+    def time_frame_fillna(self, inplace, method, dtype):
         self.df.fillna(inplace=inplace, method=method)
 
 
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index e99963c6ad56b..ecb9830024900 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -375,6 +375,7 @@ Performance improvements
 - Performance improvement in :meth:`IntervalIndex.isin` (:issue:`38353`)
 - Performance improvement in :meth:`Series.mean` for nullable data types (:issue:`34814`)
 - Performance improvement in :meth:`Series.isin` for nullable data types (:issue:`38340`)
+- Performance improvement in :meth:`DataFrame.fillna` with ``method="pad|backfill"`` for nullable floating and nullable integer dtypes (:issue:`39953`)
 - Performance improvement in :meth:`DataFrame.corr` for method=kendall (:issue:`28329`)
 - Performance improvement in :meth:`core.window.rolling.Rolling.corr` and :meth:`core.window.rolling.Rolling.cov` (:issue:`39388`)
 - Performance improvement in :meth:`core.window.rolling.RollingGroupby.corr`, :meth:`core.window.expanding.ExpandingGroupby.corr`, :meth:`core.window.expanding.ExpandingGroupby.corr` and :meth:`core.window.expanding.ExpandingGroupby.cov` (:issue:`39591`)
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 1a1b263ae356e..5783d3c2353aa 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -597,10 +597,11 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def pad_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
+def pad_inplace(algos_t[:] values, uint8_t[:] mask, limit=None):
     cdef:
         Py_ssize_t i, N
         algos_t val
+        uint8_t prev_mask
         int lim, fill_count = 0
 
     N = len(values)
@@ -612,15 +613,18 @@ def pad_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
     lim = validate_limit(N, limit)
 
     val = values[0]
+    prev_mask = mask[0]
     for i in range(N):
         if mask[i]:
             if fill_count >= lim:
                 continue
             fill_count += 1
             values[i] = val
+            mask[i] = prev_mask
         else:
             fill_count = 0
             val = values[i]
+            prev_mask = mask[i]
 
 
 @cython.boundscheck(False)
@@ -739,10 +743,11 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def backfill_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
+def backfill_inplace(algos_t[:] values, uint8_t[:] mask, limit=None):
     cdef:
         Py_ssize_t i, N
         algos_t val
+        uint8_t prev_mask
         int lim, fill_count = 0
 
     N = len(values)
@@ -754,15 +759,18 @@ def backfill_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None):
     lim = validate_limit(N, limit)
 
     val = values[N - 1]
+    prev_mask = mask[N - 1]
     for i in range(N - 1, -1, -1):
         if mask[i]:
             if fill_count >= lim:
                 continue
             fill_count += 1
             values[i] = val
+            mask[i] = prev_mask
         else:
             fill_count = 0
             val = values[i]
+            prev_mask = mask[i]
 
 
 @cython.boundscheck(False)
diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
index ad0bf76b0556b..4615cb4ec7abd 100644
--- a/pandas/core/arrays/_mixins.py
+++ b/pandas/core/arrays/_mixins.py
@@ -279,7 +279,7 @@ def fillna(
         if mask.any():
             if method is not None:
                 func = missing.get_fill_func(method)
-                new_values = func(self._ndarray.copy(), limit=limit, mask=mask)
+                new_values, _ = func(self._ndarray.copy(), limit=limit, mask=mask)
                 # TODO: PandasArray didn't used to copy, need tests for this
                 new_values = self._from_backing_data(new_values)
             else:
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 3b80c0b189108..86a1bcf24167c 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -702,7 +702,7 @@ def fillna(self, value=None, method=None, limit=None):
         if mask.any():
             if method is not None:
                 func = missing.get_fill_func(method)
-                new_values = func(self.astype(object), limit=limit, mask=mask)
+                new_values, _ = func(self.astype(object), limit=limit, mask=mask)
                 new_values = self._from_sequence(new_values, dtype=self.dtype)
             else:
                 # fill with value
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 8cf876fa32d7b..eff06a5c62894 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -28,6 +28,7 @@
     cache_readonly,
     doc,
 )
+from pandas.util._validators import validate_fillna_kwargs
 
 from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.common import (
@@ -38,12 +39,16 @@
     is_string_dtype,
     pandas_dtype,
 )
+from pandas.core.dtypes.inference import is_array_like
 from pandas.core.dtypes.missing import (
     isna,
     notna,
 )
 
-from pandas.core import nanops
+from pandas.core import (
+    missing,
+    nanops,
+)
 from pandas.core.algorithms import (
     factorize_array,
     isin,
@@ -144,6 +149,39 @@ def __getitem__(
 
         return type(self)(self._data[item], self._mask[item])
 
+    @doc(ExtensionArray.fillna)
+    def fillna(
+        self: BaseMaskedArrayT, value=None, method=None, limit=None
+    ) -> BaseMaskedArrayT:
+        value, method = validate_fillna_kwargs(value, method)
+
+        mask = self._mask
+
+        if is_array_like(value):
+            if len(value) != len(self):
+                raise ValueError(
+                    f"Length of 'value' does not match. Got ({len(value)}) "
+                    f" expected {len(self)}"
+                )
+            value = value[mask]
+
+        if mask.any():
+            if method is not None:
+                func = missing.get_fill_func(method)
+                new_values, new_mask = func(
+                    self._data.copy(),
+                    limit=limit,
+                    mask=mask.copy(),
+                )
+                return type(self)(new_values, new_mask.view(np.bool_))
+            else:
+                # fill with value
+                new_values = self.copy()
+                new_values[mask] = value
+        else:
+            new_values = self.copy()
+        return new_values
+
     def _coerce_to_array(self, values) -> Tuple[np.ndarray, np.ndarray]:
         raise AbstractMethodError(self)
 
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 26fe6338118b6..e003efeabcb66 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -400,7 +400,7 @@ def fillna(self, value=None, method=None, limit=None):
         if mask.any():
             if method is not None:
                 func = missing.get_fill_func(method)
-                new_values = func(self.to_numpy(object), limit=limit, mask=mask)
+                new_values, _ = func(self.to_numpy(object), limit=limit, mask=mask)
                 new_values = self._from_sequence(new_values)
             else:
                 # fill with value
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 597023cb5b000..2e1a14104c16c 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -1727,16 +1727,13 @@ def _slice(self, slicer):
     def fillna(
         self, value, limit=None, inplace: bool = False, downcast=None
     ) -> List[Block]:
-        values = self.values if inplace else self.values.copy()
-        values = values.fillna(value=value, limit=limit)
+        values = self.values.fillna(value=value, limit=limit)
         return [self.make_block_same_class(values=values)]
 
     def interpolate(
         self, method="pad", axis=0, inplace=False, limit=None, fill_value=None, **kwargs
     ):
-
-        values = self.values if inplace else self.values.copy()
-        new_values = values.fillna(value=fill_value, method=method, limit=limit)
+        new_values = self.values.fillna(value=fill_value, method=method, limit=limit)
         return self.make_block_same_class(new_values)
 
     def diff(self, n: int, axis: int = 1) -> List[Block]:
diff --git a/pandas/core/missing.py b/pandas/core/missing.py
index d1597b23cf577..1b5a7237b5287 100644
--- a/pandas/core/missing.py
+++ b/pandas/core/missing.py
@@ -660,9 +660,9 @@ def interpolate_2d(
     method = clean_fill_method(method)
     tvalues = transf(values)
     if method == "pad":
-        result = _pad_2d(tvalues, limit=limit)
+        result, _ = _pad_2d(tvalues, limit=limit)
     else:
-        result = _backfill_2d(tvalues, limit=limit)
+        result, _ = _backfill_2d(tvalues, limit=limit)
 
     result = transf(result)
     # reshape back
@@ -698,8 +698,8 @@ def new_func(values, limit=None, mask=None):
                 # This needs to occur before casting to int64
                 mask = isna(values)
 
-            result = func(values.view("i8"), limit=limit, mask=mask)
-            return result.view(values.dtype)
+            result, mask = func(values.view("i8"), limit=limit, mask=mask)
+            return result.view(values.dtype), mask
 
         return func(values, limit=limit, mask=mask)
 
@@ -707,17 +707,25 @@ def new_func(values, limit=None, mask=None):
 
 
 @_datetimelike_compat
-def _pad_1d(values, limit=None, mask=None):
+def _pad_1d(
+    values: np.ndarray,
+    limit: int | None = None,
+    mask: np.ndarray | None = None,
+) -> tuple[np.ndarray, np.ndarray]:
     mask = _fillna_prep(values, mask)
     algos.pad_inplace(values, mask, limit=limit)
-    return values
+    return values, mask
 
 
 @_datetimelike_compat
-def _backfill_1d(values, limit=None, mask=None):
+def _backfill_1d(
+    values: np.ndarray,
+    limit: int | None = None,
+    mask: np.ndarray | None = None,
+) -> tuple[np.ndarray, np.ndarray]:
     mask = _fillna_prep(values, mask)
     algos.backfill_inplace(values, mask, limit=limit)
-    return values
+    return values, mask
 
 
 @_datetimelike_compat
@@ -729,7 +737,7 @@ def _pad_2d(values, limit=None, mask=None):
     else:
         # for test coverage
         pass
-    return values
+    return values, mask
 
 
 @_datetimelike_compat
@@ -741,7 +749,7 @@ def _backfill_2d(values, limit=None, mask=None):
     else:
         # for test coverage
         pass
-    return values
+    return values, mask
 
 
 _fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d}
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 5a5d1c44b312c..e1a6c6884e003 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -4529,7 +4529,7 @@ def _replace_single(self, to_replace, method: str, inplace: bool, limit):
         fill_f = missing.get_fill_func(method)
 
         mask = missing.mask_missing(result.values, to_replace)
-        values = fill_f(result.values, limit=limit, mask=mask)
+        values, _ = fill_f(result.values, limit=limit, mask=mask)
 
         if values.dtype == orig_dtype and inplace:
             return
diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py
index 0cf03533915f2..c501694a7c2d5 100644
--- a/pandas/tests/extension/base/missing.py
+++ b/pandas/tests/extension/base/missing.py
@@ -69,6 +69,18 @@ def test_fillna_limit_backfill(self, data_missing):
         expected = pd.Series(data_missing.take([1, 0, 1, 1, 1]))
         self.assert_series_equal(result, expected)
 
+    def test_fillna_no_op_returns_copy(self, data):
+        data = data[~data.isna()]
+
+        valid = data[0]
+        result = data.fillna(valid)
+        assert result is not data
+        self.assert_extension_array_equal(result, data)
+
+        result = data.fillna(method="backfill")
+        assert result is not data
+        self.assert_extension_array_equal(result, data)
+
     def test_fillna_series(self, data_missing):
         fill_value = data_missing[1]
         ser = pd.Series(data_missing)
diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py
index 1bc06ee4b6397..24c0d619e2b1a 100644
--- a/pandas/tests/extension/test_interval.py
+++ b/pandas/tests/extension/test_interval.py
@@ -132,6 +132,10 @@ def test_fillna_series_method(self):
     def test_fillna_limit_backfill(self):
         pass
 
+    @unsupported_fill
+    def test_fillna_no_op_returns_copy(self):
+        pass
+
     @unsupported_fill
     def test_fillna_series(self):
         pass
diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
index e8995bc654428..718ef087e47d3 100644
--- a/pandas/tests/extension/test_numpy.py
+++ b/pandas/tests/extension/test_numpy.py
@@ -309,6 +309,11 @@ def test_fillna_scalar(self, data_missing):
         # Non-scalar "scalar" values.
         super().test_fillna_scalar(data_missing)
 
+    @skip_nested
+    def test_fillna_no_op_returns_copy(self, data):
+        # Non-scalar "scalar" values.
+        super().test_fillna_no_op_returns_copy(data)
+
     @skip_nested
     def test_fillna_series(self, data_missing):
         # Non-scalar "scalar" values.
diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
index 067fada5edcae..a49e1b4a367fd 100644
--- a/pandas/tests/extension/test_sparse.py
+++ b/pandas/tests/extension/test_sparse.py
@@ -221,6 +221,13 @@ def test_fillna_limit_backfill(self, data_missing):
         with tm.assert_produces_warning(PerformanceWarning):
             super().test_fillna_limit_backfill(data_missing)
 
+    def test_fillna_no_op_returns_copy(self, data, request):
+        if np.isnan(data.fill_value):
+            request.node.add_marker(
+                pytest.mark.xfail(reason="returns array with different fill value")
+            )
+        super().test_fillna_no_op_returns_copy(data)
+
     def test_fillna_series_method(self, data_missing):
         with tm.assert_produces_warning(PerformanceWarning):
             super().test_fillna_limit_backfill(data_missing)