From 9dd2dbe47a3836f4cfe00547f91ab0e4bdbae987 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Thu, 12 Sep 2019 18:54:33 -0400 Subject: [PATCH 01/18] Don't cast categorical nan to int --- pandas/core/arrays/categorical.py | 3 +++ pandas/tests/extension/test_categorical.py | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 5929a8d51fe43..b4bd2933abc52 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -520,6 +520,9 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: if dtype == self.dtype: return self return self._set_dtype(dtype) + if is_integer_dtype(dtype) and self.isin([np.nan, -np.inf, np.inf]).any(): + msg = "Cannot cast to int." + raise ValueError(msg) return np.array(self, dtype=dtype, copy=copy) @cache_readonly diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 0c0e8b0123c03..ef3ab4ac4754a 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -197,7 +197,14 @@ def test_searchsorted(self, data_for_sorting): class TestCasting(base.BaseCastingTests): - pass + def test_cast_nan_to_int(self): + s1 = pd.Series([0, 1, np.nan], dtype="category") + s2 = pd.Series([0, 1, np.inf], dtype="category") + + with pytest.raises(ValueError): + s1.astype(int) + with pytest.raises(ValueError): + s2.astype(int) class TestArithmeticOps(base.BaseArithmeticOpsTests): From 952114f2387025a81a3b35b2e942cdd73d3848ca Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Fri, 13 Sep 2019 19:03:58 -0400 Subject: [PATCH 02/18] Parametrize test --- pandas/tests/extension/test_categorical.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index ef3ab4ac4754a..a5ba6c1ed03cd 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -197,14 +197,12 @@ def test_searchsorted(self, data_for_sorting): class TestCasting(base.BaseCastingTests): - def test_cast_nan_to_int(self): - s1 = pd.Series([0, 1, np.nan], dtype="category") - s2 = pd.Series([0, 1, np.inf], dtype="category") + @pytest.mark.parametrize("value", [np.nan, -np.inf, np.inf]) + def test_cast_nan_to_int(self, value): + s = pd.Series([0, 1, value], dtype="category") with pytest.raises(ValueError): - s1.astype(int) - with pytest.raises(ValueError): - s2.astype(int) + s.astype(int) class TestArithmeticOps(base.BaseArithmeticOpsTests): From ffed8a09a1d743d56f4c56d1e93ac7792b2fb2e4 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Fri, 13 Sep 2019 20:06:54 -0400 Subject: [PATCH 03/18] Add CategoricalIndex test --- pandas/tests/extension/test_categorical.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index a5ba6c1ed03cd..ef4a0f43e7b8d 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -19,7 +19,7 @@ import pytest import pandas as pd -from pandas import Categorical +from pandas import Categorical, CategoricalIndex from pandas.api.types import CategoricalDtype from pandas.tests.extension import base import pandas.util.testing as tm @@ -197,9 +197,10 @@ def test_searchsorted(self, data_for_sorting): class TestCasting(base.BaseCastingTests): + @pytest.mark.parametrize("cls", [Categorical, CategoricalIndex]) @pytest.mark.parametrize("value", [np.nan, -np.inf, np.inf]) - def test_cast_nan_to_int(self, value): - s = pd.Series([0, 1, value], dtype="category") + def test_cast_nan_to_int(self, cls, value): + s = cls([0, 1, value]) with pytest.raises(ValueError): s.astype(int) From 504be9018e548d62937cce2466e5b2f1cb71f759 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Fri, 13 Sep 2019 20:49:23 -0400 Subject: [PATCH 04/18] Use isfinite --- pandas/core/arrays/categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index b4bd2933abc52..090b7b52756cb 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -520,7 +520,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: if dtype == self.dtype: return self return self._set_dtype(dtype) - if is_integer_dtype(dtype) and self.isin([np.nan, -np.inf, np.inf]).any(): + if is_integer_dtype(dtype) and np.isfinite(self.__array__()).all(): msg = "Cannot cast to int." raise ValueError(msg) return np.array(self, dtype=dtype, copy=copy) From 1290cb2fc5fb2d2740b2e824d63ed79e850034f3 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Fri, 13 Sep 2019 20:50:39 -0400 Subject: [PATCH 05/18] Fix --- pandas/core/arrays/categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 090b7b52756cb..f423d31820330 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -520,7 +520,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: if dtype == self.dtype: return self return self._set_dtype(dtype) - if is_integer_dtype(dtype) and np.isfinite(self.__array__()).all(): + if is_integer_dtype(dtype) and not np.isfinite(self.__array__()).all(): msg = "Cannot cast to int." raise ValueError(msg) return np.array(self, dtype=dtype, copy=copy) From ab217631d6d555cd46502d3862ca838707df41df Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Fri, 13 Sep 2019 21:41:45 -0400 Subject: [PATCH 06/18] Check TypeError as well for now Categorical raises a ValueError at the moment, but CategoricalIndex ends up raising a TypeError because this happens during the handling of the ValueError --- pandas/tests/extension/test_categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index ef4a0f43e7b8d..38b1bcdfc59c5 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -202,7 +202,7 @@ class TestCasting(base.BaseCastingTests): def test_cast_nan_to_int(self, cls, value): s = cls([0, 1, value]) - with pytest.raises(ValueError): + with pytest.raises((ValueError, TypeError)): s.astype(int) From 858ff0638ec57c4f1f18f3b054def5f5492e01fc Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sun, 15 Sep 2019 08:59:14 -0400 Subject: [PATCH 07/18] Check error message --- pandas/tests/extension/test_categorical.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 38b1bcdfc59c5..959bb9512b264 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -200,9 +200,10 @@ class TestCasting(base.BaseCastingTests): @pytest.mark.parametrize("cls", [Categorical, CategoricalIndex]) @pytest.mark.parametrize("value", [np.nan, -np.inf, np.inf]) def test_cast_nan_to_int(self, cls, value): + # GH 28406 s = cls([0, 1, value]) - with pytest.raises((ValueError, TypeError)): + with pytest.raises((ValueError, TypeError), match="Cannot cast"): s.astype(int) From 88874dcc048d53d5ad969cb998b6a01efe530430 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sun, 15 Sep 2019 09:00:30 -0400 Subject: [PATCH 08/18] Fix doc typo --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2dbd592fc6787..6124a4b7e3cef 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4731,7 +4731,7 @@ def get_indexer_for(self, target, **kwargs): """ Guaranteed return of an indexer even when non-unique. - This dispatches to get_indexer or get_indexer_nonunique + This dispatches to get_indexer or get_indexer_non_unique as appropriate. Returns From eb76e1f73a7a6ec21b847af133d7ce5198cc5653 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sun, 15 Sep 2019 09:47:28 -0400 Subject: [PATCH 09/18] Revert "Use isfinite" This reverts commit 504be9018e548d62937cce2466e5b2f1cb71f759. --- pandas/core/arrays/categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f423d31820330..b4bd2933abc52 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -520,7 +520,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: if dtype == self.dtype: return self return self._set_dtype(dtype) - if is_integer_dtype(dtype) and not np.isfinite(self.__array__()).all(): + if is_integer_dtype(dtype) and self.isin([np.nan, -np.inf, np.inf]).any(): msg = "Cannot cast to int." raise ValueError(msg) return np.array(self, dtype=dtype, copy=copy) From 2a8186add6b930cd02fd4773f54f103f5f63fcc0 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sun, 15 Sep 2019 10:38:23 -0400 Subject: [PATCH 10/18] Extract array directly --- pandas/core/indexes/base.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6124a4b7e3cef..16cf5ddffa5cd 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4713,13 +4713,10 @@ def set_value(self, arr, key, value): @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) def get_indexer_non_unique(self, target): target = ensure_index(target) - if is_categorical(target): - target = target.astype(target.dtype.categories.dtype) - pself, ptarget = self._maybe_promote(target) - if pself is not self or ptarget is not target: - return pself.get_indexer_non_unique(ptarget) - if self.is_all_dates: + if is_categorical(target): + tgt_values = target.__array__() + elif self.is_all_dates: tgt_values = target.asi8 else: tgt_values = target._ndarray_values From afacbe3925a29b1b7fe61c3ef14e3b7447f53303 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sun, 15 Sep 2019 11:21:28 -0400 Subject: [PATCH 11/18] Keep _maybe_promote --- pandas/core/indexes/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 16cf5ddffa5cd..1484ed144f622 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4713,6 +4713,9 @@ def set_value(self, arr, key, value): @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) def get_indexer_non_unique(self, target): target = ensure_index(target) + pself, ptarget = self._maybe_promote(target) + if pself is not self or ptarget is not target: + return pself.get_indexer_non_unique(ptarget) if is_categorical(target): tgt_values = target.__array__() From dbff36f4cf9ba7145e8e5d88ba0df917186bb822 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sun, 15 Sep 2019 11:47:12 -0400 Subject: [PATCH 12/18] Add note --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index bc77553924dfa..840022f1f615f 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -110,6 +110,7 @@ Categorical ^^^^^^^^^^^ - Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`) +- Bug in :meth:`astype` where ``nans`` were handled incorrectly when casting to int (:issue:`28406`) - - From 075ba332e6bf20333b12b9c224ef2298f5d051c5 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Sun, 15 Sep 2019 14:12:14 -0400 Subject: [PATCH 13/18] Fix typo --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 840022f1f615f..6afebecdce38b 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -110,7 +110,7 @@ Categorical ^^^^^^^^^^^ - Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`) -- Bug in :meth:`astype` where ``nans`` were handled incorrectly when casting to int (:issue:`28406`) +- Bug in :meth:`astype` where ``nans`` were handled incorrectly when casting to int (:issue:`28406`) - - From 5aeb8b67a1c09a6c4f327a53b3baca4fe4239c81 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Mon, 16 Sep 2019 13:15:21 -0400 Subject: [PATCH 14/18] Update doc/source/whatsnew/v1.0.0.rst Co-Authored-By: Tom Augspurger --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 6afebecdce38b..11a29fdf90f93 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -110,7 +110,7 @@ Categorical ^^^^^^^^^^^ - Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`) -- Bug in :meth:`astype` where ``nans`` were handled incorrectly when casting to int (:issue:`28406`) +- Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`) - - From af3ff15c502ee9918deaead44beff43bcb9054d4 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Mon, 16 Sep 2019 14:06:54 -0400 Subject: [PATCH 15/18] Use np.asarray --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1484ed144f622..5199c6c6869a1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4718,7 +4718,7 @@ def get_indexer_non_unique(self, target): return pself.get_indexer_non_unique(ptarget) if is_categorical(target): - tgt_values = target.__array__() + tgt_values = np.asarray(target) elif self.is_all_dates: tgt_values = target.asi8 else: From e9cc1fa2f9c4be40d7387178b1a9f9ac39e93301 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Mon, 16 Sep 2019 19:51:42 -0400 Subject: [PATCH 16/18] Only check NaN --- pandas/core/arrays/categorical.py | 2 +- pandas/tests/extension/test_categorical.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index b4bd2933abc52..1599ffa384021 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -520,7 +520,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: if dtype == self.dtype: return self return self._set_dtype(dtype) - if is_integer_dtype(dtype) and self.isin([np.nan, -np.inf, np.inf]).any(): + if is_integer_dtype(dtype) and self.isna().any(): msg = "Cannot cast to int." raise ValueError(msg) return np.array(self, dtype=dtype, copy=copy) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 959bb9512b264..19e08cfb7bb53 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -198,10 +198,9 @@ def test_searchsorted(self, data_for_sorting): class TestCasting(base.BaseCastingTests): @pytest.mark.parametrize("cls", [Categorical, CategoricalIndex]) - @pytest.mark.parametrize("value", [np.nan, -np.inf, np.inf]) - def test_cast_nan_to_int(self, cls, value): + def test_cast_nan_to_int(self, cls): # GH 28406 - s = cls([0, 1, value]) + s = cls([0, 1, np.nan]) with pytest.raises((ValueError, TypeError), match="Cannot cast"): s.astype(int) From ab7b3ce463e9583f60026862fd177b372d8d5f68 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Tue, 17 Sep 2019 09:09:07 -0400 Subject: [PATCH 17/18] Change error message --- pandas/core/arrays/categorical.py | 2 +- pandas/tests/extension/test_categorical.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 1599ffa384021..9e0a68f764b0a 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -521,7 +521,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: return self return self._set_dtype(dtype) if is_integer_dtype(dtype) and self.isna().any(): - msg = "Cannot cast to int." + msg = "Cannot convert float NaN to integer" raise ValueError(msg) return np.array(self, dtype=dtype, copy=copy) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 19e08cfb7bb53..6bc8f7de70d50 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -202,7 +202,8 @@ def test_cast_nan_to_int(self, cls): # GH 28406 s = cls([0, 1, np.nan]) - with pytest.raises((ValueError, TypeError), match="Cannot cast"): + msg = "Cannot (cast|convert)" + with pytest.raises((ValueError, TypeError), match=msg): s.astype(int) From 754a3edcd632fe9b17adeaf05680be7eeae31bcd Mon Sep 17 00:00:00 2001 From: Daniel Saxton <> Date: Tue, 17 Sep 2019 13:21:00 -0400 Subject: [PATCH 18/18] Add to test cases --- pandas/tests/extension/test_categorical.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 6bc8f7de70d50..c342777b0ebc4 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -19,7 +19,7 @@ import pytest import pandas as pd -from pandas import Categorical, CategoricalIndex +from pandas import Categorical, CategoricalIndex, Timestamp from pandas.api.types import CategoricalDtype from pandas.tests.extension import base import pandas.util.testing as tm @@ -198,9 +198,10 @@ def test_searchsorted(self, data_for_sorting): class TestCasting(base.BaseCastingTests): @pytest.mark.parametrize("cls", [Categorical, CategoricalIndex]) - def test_cast_nan_to_int(self, cls): + @pytest.mark.parametrize("values", [[1, np.nan], [Timestamp("2000"), pd.NaT]]) + def test_cast_nan_to_int(self, cls, values): # GH 28406 - s = cls([0, 1, np.nan]) + s = cls(values) msg = "Cannot (cast|convert)" with pytest.raises((ValueError, TypeError), match=msg):