diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index a64aef64ab49f..1f5d1f16370b5 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -75,6 +75,7 @@ isin, unique, ) +from pandas.core.construction import array as pd_array from pandas.core.sorting import ( nargminmax, nargsort, @@ -535,21 +536,9 @@ def nbytes(self) -> int: # Additional Methods # ------------------------------------------------------------------------ - @overload - def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: - ... - - @overload - def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: - ... - - @overload - def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: - ... - - def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: + def astype(self, dtype: AstypeArg, copy: bool = True) -> ExtensionArray: """ - Cast to a NumPy array or ExtensionArray with 'dtype'. + Cast to an ExtensionArray with 'dtype'. Parameters ---------- @@ -562,9 +551,9 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: Returns ------- - array : np.ndarray or ExtensionArray - An ExtensionArray if dtype is ExtensionDtype, - Otherwise a NumPy ndarray with 'dtype' for its dtype. + ExtensionArray + ExtensionArray subclass, depending on `dtype`. If `dtype` is a + numpy dtype, this returns a PandasArray. """ dtype = pandas_dtype(dtype) @@ -578,7 +567,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: cls = dtype.construct_array_type() return cls._from_sequence(self, dtype=dtype, copy=copy) - return np.array(self, dtype=dtype, copy=copy) + return pd_array(self, dtype=dtype, copy=copy) def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll: """ @@ -757,7 +746,7 @@ def fillna( if mask.any(): if method is not None: func = missing.get_fill_func(method) - new_values, _ = func(self.astype(object), limit=limit, mask=mask) + new_values, _ = func(self.to_numpy(object), limit=limit, mask=mask) new_values = self._from_sequence(new_values, dtype=self.dtype) else: # fill with value @@ -836,7 +825,7 @@ def unique(self: ExtensionArrayT) -> ExtensionArrayT: ------- uniques : ExtensionArray """ - uniques = unique(self.astype(object)) + uniques = unique(self.to_numpy(object)) return self._from_sequence(uniques, dtype=self.dtype) def searchsorted( @@ -888,9 +877,9 @@ def searchsorted( # 1. Values outside the range of the `data_for_sorting` fixture # 2. Values between the values in the `data_for_sorting` fixture # 3. Missing values. - arr = self.astype(object) + arr = self.to_numpy(object) if isinstance(value, ExtensionArray): - value = value.astype(object) + value = value.to_numpy(object) return arr.searchsorted(value, side=side, sorter=sorter) def equals(self, other: object) -> bool: @@ -965,7 +954,7 @@ def _values_for_factorize(self) -> tuple[np.ndarray, Any]: The values returned by this method are also used in :func:`pandas.util.hash_pandas_object`. """ - return self.astype(object), np.nan + return self.to_numpy(object), np.nan def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]: """ diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 58e7abbbe1ddd..b642e8d70b4d2 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -1,10 +1,7 @@ from __future__ import annotations import numbers -from typing import ( - TYPE_CHECKING, - overload, -) +from typing import TYPE_CHECKING import warnings import numpy as np @@ -14,11 +11,9 @@ missing as libmissing, ) from pandas._typing import ( - ArrayLike, AstypeArg, Dtype, DtypeObj, - npt, type_t, ) from pandas.compat.numpy import function as nv @@ -44,6 +39,7 @@ BaseMaskedArray, BaseMaskedDtype, ) +from pandas.core.arrays.numpy_ import PandasArray if TYPE_CHECKING: import pyarrow @@ -367,22 +363,10 @@ def map_string(s): def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]: return coerce_to_array(value) - @overload - def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: - ... - - @overload - def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: - ... - - @overload - def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: - ... - - def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: + def astype(self, dtype: AstypeArg, copy: bool = True) -> ExtensionArray: """ - Cast to a NumPy array or ExtensionArray with 'dtype'. + Cast to an ExtensionArray with 'dtype'. Parameters ---------- @@ -395,8 +379,8 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: Returns ------- - ndarray or ExtensionArray - NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype. + ExtensionArray + ExtensionArray subclass, depending on `dtype`. Raises ------ @@ -426,7 +410,8 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: if is_float_dtype(dtype): na_value = np.nan # coerce - return self.to_numpy(dtype=dtype, na_value=na_value, copy=False) + arr = self.to_numpy(dtype=dtype, na_value=na_value, copy=False) + return PandasArray(arr) def _values_for_argsort(self) -> np.ndarray: """ diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 6d6cc03a1c83e..076cc3f03e5fc 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -1,6 +1,5 @@ from __future__ import annotations -from typing import overload import warnings import numpy as np @@ -10,10 +9,8 @@ missing as libmissing, ) from pandas._typing import ( - ArrayLike, AstypeArg, DtypeObj, - npt, ) from pandas.compat.numpy import function as nv from pandas.util._decorators import cache_readonly @@ -39,6 +36,7 @@ NumericArray, NumericDtype, ) +from pandas.core.arrays.numpy_ import PandasArray from pandas.core.ops import invalid_comparison from pandas.core.tools.numeric import to_numeric @@ -278,21 +276,9 @@ def _from_sequence_of_strings( def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]: return coerce_to_array(value, dtype=self.dtype) - @overload - def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: - ... - - @overload - def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: - ... - - @overload - def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: - ... - - def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: + def astype(self, dtype: AstypeArg, copy: bool = True) -> ExtensionArray: """ - Cast to a NumPy array or ExtensionArray with 'dtype'. + Cast to an ExtensionArray with 'dtype'. Parameters ---------- @@ -305,9 +291,8 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: Returns ------- - ndarray or ExtensionArray - NumPy ndarray, or BooleanArray, IntegerArray or FloatingArray with - 'dtype' for its dtype. + ExtensionArray + ExtensionArray subclass, depending on `dtype`. Raises ------ @@ -334,7 +319,8 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: # error: Argument 2 to "to_numpy" of "BaseMaskedArray" has incompatible # type "**Dict[str, float]"; expected "bool" data = self.to_numpy(dtype=dtype, **kwargs) # type: ignore[arg-type] - return astype_nansafe(data, dtype, copy=False) + arr = astype_nansafe(data, dtype, copy=False) + return PandasArray(arr) def _values_for_argsort(self) -> np.ndarray: return self._data diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index d8b7bf2b86d2c..3100fbdee5499 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -1,6 +1,5 @@ from __future__ import annotations -from typing import overload import warnings import numpy as np @@ -11,11 +10,9 @@ missing as libmissing, ) from pandas._typing import ( - ArrayLike, AstypeArg, Dtype, DtypeObj, - npt, ) from pandas.compat.numpy import function as nv from pandas.util._decorators import cache_readonly @@ -46,6 +43,7 @@ NumericArray, NumericDtype, ) +from pandas.core.arrays.numpy_ import PandasArray from pandas.core.ops import invalid_comparison from pandas.core.tools.numeric import to_numeric @@ -345,21 +343,9 @@ def _from_sequence_of_strings( def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]: return coerce_to_array(value, dtype=self.dtype) - @overload - def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: - ... - - @overload - def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: - ... - - @overload - def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: - ... - - def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: + def astype(self, dtype: AstypeArg, copy: bool = True) -> ExtensionArray: """ - Cast to a NumPy array or ExtensionArray with 'dtype'. + Cast to an ExtensionArray with 'dtype'. Parameters ---------- @@ -372,8 +358,8 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: Returns ------- - ndarray or ExtensionArray - NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype. + ExtensionArray + ExtensionArray subclass, depending on `dtype`. Raises ------ @@ -397,7 +383,8 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: else: na_value = lib.no_default - return self.to_numpy(dtype=dtype, na_value=na_value, copy=False) + arr = self.to_numpy(dtype=dtype, na_value=na_value, copy=False) + return PandasArray(arr) def _values_for_argsort(self) -> np.ndarray: """ diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 568f3484e78e4..f5887af5ba9ec 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -15,7 +15,6 @@ missing as libmissing, ) from pandas._typing import ( - ArrayLike, AstypeArg, NpDtype, PositionalIndexer, @@ -361,19 +360,7 @@ def to_numpy( data = self._data.astype(dtype, copy=copy) return data - @overload - def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: - ... - - @overload - def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: - ... - - @overload - def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: - ... - - def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: + def astype(self, dtype: AstypeArg, copy: bool = True) -> ExtensionArray: dtype = pandas_dtype(dtype) if is_dtype_equal(dtype, self.dtype): diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 5a8e5f488fbf2..0284f4be3cd00 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -443,7 +443,7 @@ def astype(self, dtype, copy=True): arr[mask] = 0 values = arr.astype(dtype) values[mask] = np.nan - return values + return PandasArray(values) return super().astype(dtype, copy) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6d5162f3fe3a4..551910bedc6e4 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1250,7 +1250,10 @@ def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> Arra if not isinstance(values, np.ndarray): # i.e. ExtensionArray - values = values.astype(dtype, copy=copy) + if isinstance(dtype, ExtensionDtype): + values = values.astype(dtype, copy=copy) + else: + values = values.to_numpy(dtype, copy=False) else: values = astype_nansafe(values, dtype, copy=copy) diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py index 4987751f31dac..c3e41f6d0cb64 100644 --- a/pandas/tests/extension/base/casting.py +++ b/pandas/tests/extension/base/casting.py @@ -5,6 +5,7 @@ import pandas.util._test_decorators as td import pandas as pd +from pandas.core.arrays import ExtensionArray from pandas.core.internals import ObjectBlock from pandas.tests.extension.base.base import BaseExtensionTests @@ -12,6 +13,23 @@ class BaseCastingTests(BaseExtensionTests): """Casting to and from ExtensionDtypes""" + def test_extension_astype_returns_extension_array(self, all_data): + # Base test to ensure that EA.astype always returns an EA + # https://github.com/pandas-dev/pandas/issues/24877 + + # test for some dtype strings and objects that would map to numpy dtypes + for dtype_str in ["object", "int64", "float64"]: + for dtype in [dtype_str, np.dtype(dtype_str)]: + try: + # only taking first two elements to have one case without NAs + res = all_data[:2].astype(dtype) + except (TypeError, ValueError): + # not all casts will be possible, so ignore TypeError/ValueErrors + # and only check the result if there is a return value + pass + else: + assert isinstance(res, ExtensionArray) + def test_astype_object_series(self, all_data): ser = pd.Series(all_data, name="A") result = ser.astype(object) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index befbf3b445d62..e51aa50924bb4 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -203,7 +203,8 @@ def astype(self, dtype, copy=True): value = self.astype(str) # numpy doesn'y like nested dicts return dtype.construct_array_type()._from_sequence(value, copy=False) - return np.array([dict(x) for x in self], dtype=dtype, copy=copy) + arr = np.array([dict(x) for x in self], dtype=dtype, copy=copy) + return pd.array(arr) def unique(self): # Parent method doesn't work since np.array will try to infer