diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 6642f5855f4fe..6f7004589ad7e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -858,11 +858,13 @@ ExtensionArray - Bug in :class:`Categorical` when constructing with an :class:`Index` with :class:`ArrowDtype` (:issue:`60563`) - Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`) - Bug in :meth:`ArrowExtensionArray.factorize` where NA values were dropped when input was dictionary-encoded even when dropna was set to False(:issue:`60567`) +- Bug in :meth:`Series.map` and :meth:`Series.apply` where applying functions to a Series with an :class:`Int32Dtype` or other :class:`ExtensionDtype` would convert elements to float and ``pd.NA`` to ``np.nan``, instead of preserving the original types (:issue:`60766`) - Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`) - Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`59505`) - Bug in constructing pandas data structures when passing into ``dtype`` a string of the type followed by ``[pyarrow]`` while PyArrow is not installed would raise ``NameError`` rather than ``ImportError`` (:issue:`57928`) - Bug in various :class:`DataFrame` reductions for pyarrow temporal dtypes returning incorrect dtype when result was null (:issue:`59234`) + Styler ^^^^^^ - Bug in :meth:`Styler.to_latex` where styling column headers when combined with a hidden index or hidden index-levels is fixed. diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index e7a6b207363c3..4f8d651bd94dc 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1325,7 +1325,7 @@ def max(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs): return self._wrap_reduction_result("max", result, skipna=skipna, axis=axis) def map(self, mapper, na_action: Literal["ignore"] | None = None): - return map_array(self.to_numpy(), mapper, na_action=na_action) + return map_array(self, mapper, na_action=na_action) @overload def any( diff --git a/pandas/tests/arrays/masked/test_basemaskedarray_map.py b/pandas/tests/arrays/masked/test_basemaskedarray_map.py new file mode 100644 index 0000000000000..29306a426e575 --- /dev/null +++ b/pandas/tests/arrays/masked/test_basemaskedarray_map.py @@ -0,0 +1,18 @@ +import pandas as pd + + +def test_basemaskedarray_map(): + for dtype, data, expected_data in [ + ("Int32", [1, 2, None, 4], [2, 3, pd.NA, 5]), + ]: + s = pd.Series(data, dtype=dtype) + + def transform(x): + if x is None: + return x + return x + 1 + + result = s.map(transform) + expected = pd.Series(expected_data, dtype=result.dtype) + + assert result.tolist() == expected.tolist() diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index 3b9079d06e231..e262e3714f502 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -171,6 +171,12 @@ class TestMaskedArrays(base.ExtensionTests): @pytest.mark.parametrize("na_action", [None, "ignore"]) def test_map(self, data_missing, na_action): result = data_missing.map(lambda x: x, na_action=na_action) + if data_missing.dtype.kind != "b": + for i in range(len(result)): + if result[i] is pd.NA: + result[i] = "nan" + result = result.astype("float64") + if data_missing.dtype == Float32Dtype(): # map roundtrips through objects, which converts to float64 expected = data_missing.to_numpy(dtype="float64", na_value=np.nan) @@ -181,10 +187,15 @@ def test_map(self, data_missing, na_action): def test_map_na_action_ignore(self, data_missing_for_sorting): zero = data_missing_for_sorting[2] result = data_missing_for_sorting.map(lambda x: zero, na_action="ignore") + if data_missing_for_sorting.dtype.kind == "b": expected = np.array([False, pd.NA, False], dtype=object) else: expected = np.array([zero, np.nan, zero]) + for i in range(len(result)): + if result[i] is pd.NA: + result[i] = "nan" + result = result.astype("float64") tm.assert_numpy_array_equal(result, expected) def _get_expected_exception(self, op_name, obj, other):