From 4abdc9a835a827d5a0fdd83e244bdb4d755ccfb0 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 Apr 2023 15:48:16 -0700 Subject: [PATCH 1/4] BUG: FloatingArray.__contains__(nan) --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/arrays/arrow/array.py | 2 +- pandas/core/arrays/masked.py | 8 ++++++++ pandas/tests/arrays/floating/test_contains.py | 12 ++++++++++++ 4 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 pandas/tests/arrays/floating/test_contains.py diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index efc8bc695df85..67ba740f1cb4f 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -419,6 +419,7 @@ Other - Bug in :meth:`DataFrame.reindex` with a ``fill_value`` that should be inferred with a :class:`ExtensionDtype` incorrectly inferring ``object`` dtype (:issue:`52586`) - Bug in :meth:`Series.map` when giving a callable to an empty series, the returned series had ``object`` dtype. It now keeps the original dtype (:issue:`52384`) - Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`) +- Bug in :class:`FloatingArray.__contains__` with ``NaN`` item incorrectly returning ``False`` when ``NaN`` values are presnet (:issue:`??`) - .. ***DO NOT USE THIS SECTION*** diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 539b6bf749210..aff0df8e7b713 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -557,7 +557,7 @@ def __len__(self) -> int: def __contains__(self, key) -> bool: # https://github.com/pandas-dev/pandas/pull/51307#issuecomment-1426372604 if isna(key) and key is not self.dtype.na_value: - if self.dtype.kind == "f" and lib.is_float(key) and isna(key): + if self.dtype.kind == "f" and lib.is_float(key): return pc.any(pc.is_nan(self._pa_array)).as_py() # e.g. date or timestamp types we do not allow None here to match pd.NA diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index f1df86788ac44..dba3049575866 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -236,6 +236,14 @@ def __setitem__(self, key, value) -> None: self._data[key] = value self._mask[key] = mask + def __contains__(self, key) -> bool: + # https://github.com/pandas-dev/pandas/pull/51307#issuecomment-1426372604 + if isna(key) and key is not self.dtype.na_value: + if self._data.dtype.kind == "f" and lib.is_float(key): + return (np.isnan(self._data) & ~self._mask).any() + + return super().__contains__(key) + def __iter__(self) -> Iterator: if self.ndim == 1: if not self._hasna: diff --git a/pandas/tests/arrays/floating/test_contains.py b/pandas/tests/arrays/floating/test_contains.py new file mode 100644 index 0000000000000..231ec069f5255 --- /dev/null +++ b/pandas/tests/arrays/floating/test_contains.py @@ -0,0 +1,12 @@ +import numpy as np + +import pandas as pd + + +def test_contains_nan(): + # GH#51378 + arr = pd.array(range(5)) / 0 + + assert np.isnan(arr._data[0]) + assert not arr.isna()[0] + assert np.nan in arr From e505d0d6f760ac2619fad088b23480b6cd4cc5a4 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 Apr 2023 15:49:47 -0700 Subject: [PATCH 2/4] GH ref --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/arrays/masked.py | 2 +- pandas/tests/arrays/floating/test_contains.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 67ba740f1cb4f..57c79f5adf48a 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -414,12 +414,12 @@ Styler Other ^^^^^ +- Bug in :class:`FloatingArray.__contains__` with ``NaN`` item incorrectly returning ``False`` when ``NaN`` values are presnet (:issue:`52840`) - Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`) - Bug in :func:`assert_frame_equal` checks category dtypes even when asked not to check index type (:issue:`52126`) - Bug in :meth:`DataFrame.reindex` with a ``fill_value`` that should be inferred with a :class:`ExtensionDtype` incorrectly inferring ``object`` dtype (:issue:`52586`) - Bug in :meth:`Series.map` when giving a callable to an empty series, the returned series had ``object`` dtype. It now keeps the original dtype (:issue:`52384`) - Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`) -- Bug in :class:`FloatingArray.__contains__` with ``NaN`` item incorrectly returning ``False`` when ``NaN`` values are presnet (:issue:`??`) - .. ***DO NOT USE THIS SECTION*** diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index dba3049575866..61d11da58cf27 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -237,8 +237,8 @@ def __setitem__(self, key, value) -> None: self._mask[key] = mask def __contains__(self, key) -> bool: - # https://github.com/pandas-dev/pandas/pull/51307#issuecomment-1426372604 if isna(key) and key is not self.dtype.na_value: + # GH#52840 if self._data.dtype.kind == "f" and lib.is_float(key): return (np.isnan(self._data) & ~self._mask).any() diff --git a/pandas/tests/arrays/floating/test_contains.py b/pandas/tests/arrays/floating/test_contains.py index 231ec069f5255..956642697bf32 100644 --- a/pandas/tests/arrays/floating/test_contains.py +++ b/pandas/tests/arrays/floating/test_contains.py @@ -4,7 +4,7 @@ def test_contains_nan(): - # GH#51378 + # GH#52840 arr = pd.array(range(5)) / 0 assert np.isnan(arr._data[0]) From 57c5723704876dc9c13b8c369689941b3555e313 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 Apr 2023 17:23:31 -0700 Subject: [PATCH 3/4] mypy fixup --- pandas/core/arrays/masked.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 61d11da58cf27..cc447d82aec5f 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -240,7 +240,7 @@ def __contains__(self, key) -> bool: if isna(key) and key is not self.dtype.na_value: # GH#52840 if self._data.dtype.kind == "f" and lib.is_float(key): - return (np.isnan(self._data) & ~self._mask).any() + return bool((np.isnan(self._data) & ~self._mask).any()) return super().__contains__(key) From e6f39b5c106d684a8f9d41cf39544ecec8067610 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 29 Apr 2023 08:52:20 -0700 Subject: [PATCH 4/4] mypy fixup --- pandas/core/arrays/masked.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index cc447d82aec5f..88e9e84817ff3 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -242,7 +242,7 @@ def __contains__(self, key) -> bool: if self._data.dtype.kind == "f" and lib.is_float(key): return bool((np.isnan(self._data) & ~self._mask).any()) - return super().__contains__(key) + return bool(super().__contains__(key)) def __iter__(self) -> Iterator: if self.ndim == 1: