diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index f9059054ba59f..1a51101bc8db8 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -11,6 +11,7 @@ from pandas._libs import Timestamp, algos, hashtable as htable, lib from pandas._libs.tslib import iNaT +from pandas._typing import AnyArrayLike from pandas.util._decorators import doc from pandas.core.dtypes.cast import ( @@ -45,10 +46,14 @@ is_unsigned_integer_dtype, needs_i8_conversion, ) -from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries +from pandas.core.dtypes.generic import ( + ABCExtensionArray, + ABCIndex, + ABCIndexClass, + ABCSeries, +) from pandas.core.dtypes.missing import isna, na_value_for_dtype -import pandas.core.common as com from pandas.core.construction import array, extract_array from pandas.core.indexers import validate_indices @@ -384,7 +389,7 @@ def unique(values): unique1d = unique -def isin(comps, values) -> np.ndarray: +def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: """ Compute the isin boolean array. @@ -409,15 +414,14 @@ def isin(comps, values) -> np.ndarray: f"to isin(), you passed a [{type(values).__name__}]" ) - if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)): + if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)): values = construct_1d_object_array_from_listlike(list(values)) + comps = extract_array(comps, extract_numpy=True) if is_categorical_dtype(comps): # TODO(extension) # handle categoricals - return comps._values.isin(values) - - comps = com.values_from_object(comps) + return comps.isin(values) # type: ignore comps, dtype = _ensure_data(comps) values, _ = _ensure_data(values, dtype=dtype) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 40a169d03f39c..c6e6acfd47e9a 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1738,12 +1738,17 @@ def fillna(self, value=None, method=None, limit=None): # If value is a dict or a Series (a dict value has already # been converted to a Series) - if isinstance(value, ABCSeries): - if not value[~value.isin(self.categories)].isna().all(): + if isinstance(value, (np.ndarray, Categorical, ABCSeries)): + # We get ndarray or Categorical if called via Series.fillna, + # where it will unwrap another aligned Series before getting here + + mask = ~algorithms.isin(value, self.categories) + if not isna(value[mask]).all(): raise ValueError("fill value must be in categories") values_codes = _get_codes_for_values(value, self.categories) indexer = np.where(codes == -1) + codes = codes.copy() codes[indexer] = values_codes[indexer] # If value is not a dict or Series it should be a scalar diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f0147859cae97..d34b7175c5a86 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5988,6 +5988,8 @@ def fillna( value = create_series_with_explicit_dtype( value, dtype_if_empty=object ) + value = value.reindex(self.index, copy=False) + value = value._values elif not is_list_like(value): pass else: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f6e79a0f2045d..842e55d9abaa8 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -396,6 +396,7 @@ def apply(self, f, filter=None, **kwargs) -> "BlockManager": BlockManager """ result_blocks = [] + # fillna: Series/DataFrame is responsible for making sure value is aligned # filter kwarg is used in replace-* family of methods if filter is not None: @@ -420,11 +421,6 @@ def apply(self, f, filter=None, **kwargs) -> "BlockManager": align_keys = ["new", "mask"] else: align_keys = ["mask"] - elif f == "fillna": - # fillna internally does putmask, maybe it's better to do this - # at mgr, not block level? - align_copy = False - align_keys = ["value"] else: align_keys = [] diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py index 8889f45a84237..9eb3c8b3a8c48 100644 --- a/pandas/tests/arrays/categorical/test_missing.py +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -82,3 +82,18 @@ def test_fillna_iterable_category(self, named): expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)]) tm.assert_categorical_equal(result, expected) + + def test_fillna_array(self): + # accept Categorical or ndarray value if it holds appropriate values + cat = Categorical(["A", "B", "C", None, None]) + + other = cat.fillna("C") + result = cat.fillna(other) + tm.assert_categorical_equal(result, other) + assert isna(cat[-1]) # didnt modify original inplace + + other = np.array(["A", "B", "C", "B", "A"]) + result = cat.fillna(other) + expected = Categorical(["A", "B", "C", "B", "A"], dtype=cat.dtype) + tm.assert_categorical_equal(result, expected) + assert isna(cat[-1]) # didnt modify original inplace diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index a1de9c435c9ba..ad7028702ec8c 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -760,6 +760,16 @@ def test_categorical_from_codes(self): result = algos.isin(Sd, St) tm.assert_numpy_array_equal(expected, result) + def test_categorical_isin(self): + vals = np.array([0, 1, 2, 0]) + cats = ["a", "b", "c"] + cat = Categorical(1).from_codes(vals, cats) + other = Categorical(1).from_codes(np.array([0, 1]), cats) + + expected = np.array([True, True, False, True]) + result = algos.isin(cat, other) + tm.assert_numpy_array_equal(expected, result) + def test_same_nan_is_in(self): # GH 22160 # nan is special, because from " a is b" doesn't follow "a == b"