diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 5783d3c2353aa..ad08bd1c10c68 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -597,43 +597,12 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): @cython.boundscheck(False) @cython.wraparound(False) -def pad_inplace(algos_t[:] values, uint8_t[:] mask, limit=None): - cdef: - Py_ssize_t i, N - algos_t val - uint8_t prev_mask - int lim, fill_count = 0 - - N = len(values) - - # GH#2778 - if N == 0: - return - - lim = validate_limit(N, limit) - - val = values[0] - prev_mask = mask[0] - for i in range(N): - if mask[i]: - if fill_count >= lim: - continue - fill_count += 1 - values[i] = val - mask[i] = prev_mask - else: - fill_count = 0 - val = values[i] - prev_mask = mask[i] - - -@cython.boundscheck(False) -@cython.wraparound(False) -def pad_2d_inplace(algos_t[:, :] values, const uint8_t[:, :] mask, limit=None): +def pad_2d_inplace(algos_t[:, :] values, uint8_t[:, :] mask, limit=None): cdef: Py_ssize_t i, j, N, K algos_t val int lim, fill_count = 0 + uint8_t prev_mask K, N = (values).shape @@ -646,15 +615,18 @@ def pad_2d_inplace(algos_t[:, :] values, const uint8_t[:, :] mask, limit=None): for j in range(K): fill_count = 0 val = values[j, 0] + prev_mask = mask[j, 0] for i in range(N): if mask[j, i]: if fill_count >= lim: continue fill_count += 1 values[j, i] = val + mask[j, i] = prev_mask else: fill_count = 0 val = values[j, i] + prev_mask = mask[j, i] """ @@ -741,70 +713,6 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray: return indexer -@cython.boundscheck(False) -@cython.wraparound(False) -def backfill_inplace(algos_t[:] values, uint8_t[:] mask, limit=None): - cdef: - Py_ssize_t i, N - algos_t val - uint8_t prev_mask - int lim, fill_count = 0 - - N = len(values) - - # GH#2778 - if N == 0: - return - - lim = validate_limit(N, limit) - - val = values[N - 1] - prev_mask = mask[N - 1] - for i in range(N - 1, -1, -1): - if mask[i]: - if fill_count >= lim: - continue - fill_count += 1 - values[i] = val - mask[i] = prev_mask - else: - fill_count = 0 - val = values[i] - prev_mask = mask[i] - - -@cython.boundscheck(False) -@cython.wraparound(False) -def backfill_2d_inplace(algos_t[:, :] values, - const uint8_t[:, :] mask, - limit=None): - cdef: - Py_ssize_t i, j, N, K - algos_t val - int lim, fill_count = 0 - - K, N = (values).shape - - # GH#2778 - if N == 0: - return - - lim = validate_limit(N, limit) - - for j in range(K): - fill_count = 0 - val = values[j, N - 1] - for i in range(N - 1, -1, -1): - if mask[j, i]: - if fill_count >= lim: - continue - fill_count += 1 - values[j, i] = val - else: - fill_count = 0 - val = values[j, i] - - @cython.boundscheck(False) @cython.wraparound(False) def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 4615cb4ec7abd..fa05d655918ce 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -278,8 +278,9 @@ def fillna( if mask.any(): if method is not None: - func = missing.get_fill_func(method) - new_values, _ = func(self._ndarray.copy(), limit=limit, mask=mask) + new_values, _ = missing.interpolate_2d( + self._ndarray.copy(), method=method, limit=limit, mask=mask + ) # TODO: PandasArray didn't used to copy, need tests for this new_values = self._from_backing_data(new_values) else: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 86a1bcf24167c..424392402342b 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -701,8 +701,9 @@ def fillna(self, value=None, method=None, limit=None): if mask.any(): if method is not None: - func = missing.get_fill_func(method) - new_values, _ = func(self.astype(object), limit=limit, mask=mask) + new_values, _ = missing.interpolate_2d( + self.astype(object), method=method, limit=limit, mask=mask + ) new_values = self._from_sequence(new_values, dtype=self.dtype) else: # fill with value diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 7777cb4bf674e..bb6d04193acff 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1774,10 +1774,8 @@ def fillna(self, value=None, method=None, limit=None): # pad / bfill # TODO: dispatch when self.categories is EA-dtype - values = np.asarray(self).reshape(-1, len(self)) - values = interpolate_2d(values, method, 0, None).astype( - self.categories.dtype - )[0] + values, _ = interpolate_2d(np.asarray(self), method=method) + values = values.astype(self.categories.dtype) codes = _get_codes_for_values(values, self.categories) else: diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index eff06a5c62894..03a7ef817af95 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -39,7 +39,6 @@ is_string_dtype, pandas_dtype, ) -from pandas.core.dtypes.inference import is_array_like from pandas.core.dtypes.missing import ( isna, notna, @@ -156,24 +155,17 @@ def fillna( value, method = validate_fillna_kwargs(value, method) mask = self._mask - - if is_array_like(value): - if len(value) != len(self): - raise ValueError( - f"Length of 'value' does not match. Got ({len(value)}) " - f" expected {len(self)}" - ) - value = value[mask] + value = missing.check_value_size(value, mask, len(self)) if mask.any(): if method is not None: - func = missing.get_fill_func(method) - new_values, new_mask = func( + new_values, new_mask = missing.interpolate_2d( self._data.copy(), + method=method, limit=limit, mask=mask.copy(), ) - return type(self)(new_values, new_mask.view(np.bool_)) + return type(self)(new_values, new_mask) else: # fill with value new_values = self.copy() diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index a209037f9a9a6..e8023d3b33fa3 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -670,7 +670,7 @@ def fillna(self, value=None, method=None, limit=None): elif method is not None: msg = "fillna with 'method' requires high memory usage." warnings.warn(msg, PerformanceWarning) - filled = interpolate_2d(np.asarray(self), method=method, limit=limit) + filled, _ = interpolate_2d(np.asarray(self), method=method, limit=limit) return type(self)(filled, fill_value=self.fill_value) else: diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index e003efeabcb66..d741710108cd8 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -399,8 +399,9 @@ def fillna(self, value=None, method=None, limit=None): if mask.any(): if method is not None: - func = missing.get_fill_func(method) - new_values, _ = func(self.to_numpy(object), limit=limit, mask=mask) + new_values, _ = missing.interpolate_2d( + self.to_numpy(object), method=method, limit=limit, mask=mask + ) new_values = self._from_sequence(new_values) else: # fill with value diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b92ef3ec3b367..509579eab1cd4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1208,7 +1208,7 @@ def _interpolate_with_fill( values = self.values if inplace else self.values.copy() - values = missing.interpolate_2d( + values, _ = missing.interpolate_2d( values, method=method, axis=axis, diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 1b5a7237b5287..1ed28d99f68da 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -3,18 +3,15 @@ """ from __future__ import annotations -from functools import ( - partial, - wraps, -) +from functools import partial from typing import ( TYPE_CHECKING, Any, List, Optional, Set, + Tuple, Union, - cast, ) import numpy as np @@ -26,7 +23,6 @@ from pandas._typing import ( ArrayLike, Axis, - F, ) from pandas.compat._optional import import_optional_dependency @@ -589,7 +585,7 @@ def _interpolate_with_limit_area( first = find_valid_index(values, "first") last = find_valid_index(values, "last") - values = interpolate_2d( + values, _ = interpolate_2d( values, method=method, limit=limit, @@ -607,11 +603,13 @@ def _interpolate_with_limit_area( def interpolate_2d( values, + *, method: str = "pad", axis: Axis = 0, limit: Optional[int] = None, limit_area: Optional[str] = None, -): + mask: Optional[np.ndarray] = None, +) -> Tuple[np.ndarray, np.ndarray]: """ Perform an actual interpolation of values, values will be make 2-d if needed fills inplace, returns the result. @@ -620,144 +618,65 @@ def interpolate_2d( ---------- values: array-like Input array. - method: str, default "pad" - Interpolation method. Could be "bfill" or "pad" + method: {"pad", "backfill", "ffill", "bfill"}, default "pad" + Interpolation method. axis: 0 or 1 Interpolation axis limit: int, optional Index limit on interpolation. limit_area: str, optional - Limit area for interpolation. Can be "inside" or "outside" + Limit area for interpolation. Can be "inside" or "outside". + mask: numpy array, optional + boolean array of values to be interpolated. Returns ------- - values: array-like - Interpolated array. + tuple of values: array-like, mask: array or None + Interpolated array and updated mask. """ if limit_area is not None: - return np.apply_along_axis( - partial( - _interpolate_with_limit_area, - method=method, - limit=limit, - limit_area=limit_area, + return ( + np.apply_along_axis( + partial( + _interpolate_with_limit_area, + method=method, + limit=limit, + limit_area=limit_area, + ), + axis, + values, ), - axis, - values, + mask, ) - orig_values = values - - transf = (lambda x: x) if axis == 0 else (lambda x: x.T) + if not np.all(values.shape): + return values, mask - # reshape a 1 dim if needed - ndim = values.ndim - if values.ndim == 1: - if axis != 0: # pragma: no cover - raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0") - values = values.reshape(tuple((1,) + values.shape)) - - method = clean_fill_method(method) - tvalues = transf(values) - if method == "pad": - result, _ = _pad_2d(tvalues, limit=limit) - else: - result, _ = _backfill_2d(tvalues, limit=limit) - - result = transf(result) - # reshape back - if ndim == 1: - result = result[0] - - if orig_values.dtype.kind in ["m", "M"]: - # convert float back to datetime64/timedelta64 - result = result.view(orig_values.dtype) - - return result - - -def _fillna_prep(values, mask=None): - # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d + orig_values, orig_mask = values, mask if mask is None: mask = isna(values) - mask = mask.view(np.uint8) - return mask + if axis == 1: + values, mask = values.T, mask.T + # reshape a 1 dim if needed + if values.ndim == 1: + if axis != 0: + raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0") + values, mask = values[np.newaxis, :], mask[np.newaxis, :] -def _datetimelike_compat(func: F) -> F: - """ - Wrapper to handle datetime64 and timedelta64 dtypes. - """ - - @wraps(func) - def new_func(values, limit=None, mask=None): - if needs_i8_conversion(values.dtype): - if mask is None: - # This needs to occur before casting to int64 - mask = isna(values) - - result, mask = func(values.view("i8"), limit=limit, mask=mask) - return result.view(values.dtype), mask - - return func(values, limit=limit, mask=mask) - - return cast(F, new_func) - - -@_datetimelike_compat -def _pad_1d( - values: np.ndarray, - limit: int | None = None, - mask: np.ndarray | None = None, -) -> tuple[np.ndarray, np.ndarray]: - mask = _fillna_prep(values, mask) - algos.pad_inplace(values, mask, limit=limit) - return values, mask - - -@_datetimelike_compat -def _backfill_1d( - values: np.ndarray, - limit: int | None = None, - mask: np.ndarray | None = None, -) -> tuple[np.ndarray, np.ndarray]: - mask = _fillna_prep(values, mask) - algos.backfill_inplace(values, mask, limit=limit) - return values, mask - - -@_datetimelike_compat -def _pad_2d(values, limit=None, mask=None): - mask = _fillna_prep(values, mask) - - if np.all(values.shape): - algos.pad_2d_inplace(values, mask, limit=limit) - else: - # for test coverage - pass - return values, mask - - -@_datetimelike_compat -def _backfill_2d(values, limit=None, mask=None): - mask = _fillna_prep(values, mask) - - if np.all(values.shape): - algos.backfill_2d_inplace(values, mask, limit=limit) - else: - # for test coverage - pass - return values, mask - + # reverse stride for backfill + method = clean_fill_method(method) + if method == "backfill": + values, mask = values[:, ::-1], mask[:, ::-1] -_fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d} + if needs_i8_conversion(values.dtype): + values = values.view("i8") + algos.pad_2d_inplace(values, mask.view(np.uint8), limit=limit) -def get_fill_func(method): - method = clean_fill_method(method) - return _fill_methods[method] + return orig_values, orig_mask def clean_reindex_fill_method(method): diff --git a/pandas/core/series.py b/pandas/core/series.py index 25b46d2cbd278..a864c16e29b0c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4529,10 +4529,11 @@ def _replace_single(self, to_replace, method: str, inplace: bool, limit): orig_dtype = self.dtype result = self if inplace else self.copy() - fill_f = missing.get_fill_func(method) mask = missing.mask_missing(result.values, to_replace) - values, _ = fill_f(result.values, limit=limit, mask=mask) + values, _ = missing.interpolate_2d( + result.values, method=method, limit=limit, mask=mask + ) if values.dtype == orig_dtype and inplace: return