diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index bc77553924dfa..11a29fdf90f93 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -110,6 +110,7 @@ Categorical ^^^^^^^^^^^ - Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`) +- Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`) - - diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 5929a8d51fe43..9e0a68f764b0a 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -520,6 +520,9 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: if dtype == self.dtype: return self return self._set_dtype(dtype) + if is_integer_dtype(dtype) and self.isna().any(): + msg = "Cannot convert float NaN to integer" + raise ValueError(msg) return np.array(self, dtype=dtype, copy=copy) @cache_readonly diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2dbd592fc6787..5199c6c6869a1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4713,13 +4713,13 @@ def set_value(self, arr, key, value): @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) def get_indexer_non_unique(self, target): target = ensure_index(target) - if is_categorical(target): - target = target.astype(target.dtype.categories.dtype) pself, ptarget = self._maybe_promote(target) if pself is not self or ptarget is not target: return pself.get_indexer_non_unique(ptarget) - if self.is_all_dates: + if is_categorical(target): + tgt_values = np.asarray(target) + elif self.is_all_dates: tgt_values = target.asi8 else: tgt_values = target._ndarray_values @@ -4731,7 +4731,7 @@ def get_indexer_for(self, target, **kwargs): """ Guaranteed return of an indexer even when non-unique. - This dispatches to get_indexer or get_indexer_nonunique + This dispatches to get_indexer or get_indexer_non_unique as appropriate. Returns diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 0c0e8b0123c03..c342777b0ebc4 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -19,7 +19,7 @@ import pytest import pandas as pd -from pandas import Categorical +from pandas import Categorical, CategoricalIndex, Timestamp from pandas.api.types import CategoricalDtype from pandas.tests.extension import base import pandas.util.testing as tm @@ -197,7 +197,15 @@ def test_searchsorted(self, data_for_sorting): class TestCasting(base.BaseCastingTests): - pass + @pytest.mark.parametrize("cls", [Categorical, CategoricalIndex]) + @pytest.mark.parametrize("values", [[1, np.nan], [Timestamp("2000"), pd.NaT]]) + def test_cast_nan_to_int(self, cls, values): + # GH 28406 + s = cls(values) + + msg = "Cannot (cast|convert)" + with pytest.raises((ValueError, TypeError), match=msg): + s.astype(int) class TestArithmeticOps(base.BaseArithmeticOpsTests):