From b10a642a43645beb19680c355f71f9b89e30fc64 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 4 Oct 2022 18:16:02 +0100 Subject: [PATCH 1/5] to_numpy defaults to dtype.type --- doc/source/whatsnew/v1.6.0.rst | 34 ++++++++++++++++ pandas/_testing/asserters.py | 14 +++---- pandas/core/arrays/masked.py | 18 ++++++--- pandas/io/formats/format.py | 2 +- .../tests/arrays/boolean/test_construction.py | 40 +++++++++++-------- pandas/tests/arrays/floating/test_to_numpy.py | 24 ++++++++--- .../tests/arrays/integer/test_construction.py | 2 +- pandas/tests/arrays/integer/test_dtypes.py | 15 +++++-- pandas/tests/arrays/masked_shared.py | 2 +- 9 files changed, 111 insertions(+), 40 deletions(-) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 3c7a80f096844..402c40fbb5233 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -111,6 +111,40 @@ Optional libraries below the lowest tested version may still work, but are not c See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. +Nullable types get converted to their respective NumPy types in ``to_numpy`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, for nullable data types, :meth:`Series.to_numpy` would always convert to ``object`` type: + +*Old Behavior* + +.. code-block:: ipython + + In [1]: pd.Series([1, 2, 3], dtype="Float64").to_numpy() + Out[1]: array([1.0, 2.0, 3.0], dtype=object) + +Now, the above :class:`Series` gets converted to ``float64``: + +*New Behavior* + +.. ipython:: python + + pd.Series([1, 2, 3], dtype="Float64").to_numpy() + +If a :class:`Series` contains missing values (``pd.NA``), then when converting to ``float64``, +they will be converted to ``np.nan``: + +.. ipython:: python + + pd.Series([1, 2, pd.NA], dtype="Float64").to_numpy() + +If converting to a type other than ``float64``, then you need to specify an ``na_value`` +compatible with that ``dtype``, for example: + +.. ipython:: python + + pd.Series([1, 2, pd.NA], dtype="Float64").to_numpy("int64", na_value=-1) + .. _whatsnew_160.api_breaking.other: Other API changes diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 9b8e171413b57..22414fdd48555 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -1051,7 +1051,7 @@ def assert_series_equal( left_values, right_values, check_dtype=check_dtype, - index_values=np.asarray(left.index), + index_values=np.asarray(left.index, dtype=object), ) else: assert_numpy_array_equal( @@ -1059,7 +1059,7 @@ def assert_series_equal( right_values, check_dtype=check_dtype, obj=str(obj), - index_values=np.asarray(left.index), + index_values=np.asarray(left.index, dtype=object), ) elif check_datetimelike_compat and ( needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype) @@ -1088,7 +1088,7 @@ def assert_series_equal( atol=atol, check_dtype=bool(check_dtype), obj=str(obj), - index_values=np.asarray(left.index), + index_values=np.asarray(left.index, dtype=object), ) elif is_extension_array_dtype(left.dtype) and is_extension_array_dtype(right.dtype): assert_extension_array_equal( @@ -1097,7 +1097,7 @@ def assert_series_equal( rtol=rtol, atol=atol, check_dtype=check_dtype, - index_values=np.asarray(left.index), + index_values=np.asarray(left.index, dtype=object), ) elif is_extension_array_dtype_and_needs_i8_conversion( left.dtype, right.dtype @@ -1106,7 +1106,7 @@ def assert_series_equal( left._values, right._values, check_dtype=check_dtype, - index_values=np.asarray(left.index), + index_values=np.asarray(left.index, dtype=object), ) elif needs_i8_conversion(left.dtype) and needs_i8_conversion(right.dtype): # DatetimeArray or TimedeltaArray @@ -1114,7 +1114,7 @@ def assert_series_equal( left._values, right._values, check_dtype=check_dtype, - index_values=np.asarray(left.index), + index_values=np.asarray(left.index, dtype=object), ) else: _testing.assert_almost_equal( @@ -1124,7 +1124,7 @@ def assert_series_equal( atol=atol, check_dtype=bool(check_dtype), obj=str(obj), - index_values=np.asarray(left.index), + index_values=np.asarray(left.index, dtype=object), ) # metadata comparison diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 043e0baf3ec0e..ca353a71dc4b6 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -401,10 +401,14 @@ def to_numpy( >>> a.to_numpy(dtype="bool", na_value=False) array([ True, False, False]) """ - if na_value is lib.no_default: - na_value = libmissing.NA if dtype is None: - dtype = object + dtype = self.dtype.type + + if na_value is lib.no_default and is_float_dtype(dtype): + na_value = np.nan + elif na_value is lib.no_default: + na_value = libmissing.NA + if self._hasna: if ( not is_object_dtype(dtype) @@ -413,8 +417,12 @@ def to_numpy( ): raise ValueError( f"cannot convert to '{dtype}'-dtype NumPy array " - "with missing values. Specify an appropriate 'na_value' " - "for this dtype." + "with missing values.\n" + "Please either:\n" + "- convert to 'float'\n" + "- convert to 'object'\n" + "- specify an appropriate 'na_value' for this dtype\n" + "for this dtype.\n" ) # don't pass copy to astype -> always need a copy since we are mutating data = self._data.astype(dtype) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index cdc21f04da43a..ed98f18c3ea2c 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1657,7 +1657,7 @@ def _format_strings(self) -> list[str]: # Categorical is special for now, so that we can preserve tzinfo array = values._internal_get_values() else: - array = np.asarray(values) + array = np.asarray(values, dtype=object) fmt_values = format_array( array, diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py index d26eea19c06e9..5bf829ae9b574 100644 --- a/pandas/tests/arrays/boolean/test_construction.py +++ b/pandas/tests/arrays/boolean/test_construction.py @@ -214,16 +214,17 @@ def test_coerce_to_array_from_boolean_array(): def test_coerce_to_numpy_array(): - # with missing values -> object dtype + # with missing values -> tries but fails to convert arr = pd.array([True, False, None], dtype="boolean") - result = np.array(arr) - expected = np.array([True, False, pd.NA], dtype="object") - tm.assert_numpy_array_equal(result, expected) + with pytest.raises( + ValueError, match=r"specify an appropriate 'na_value' for this dtype" + ): + result = np.array(arr) - # also with no missing values -> object dtype + # also with no missing values -> successfully converts to bool arr = pd.array([True, False, True], dtype="boolean") result = np.array(arr) - expected = np.array([True, False, True], dtype="object") + expected = np.array([True, False, True], dtype="bool") tm.assert_numpy_array_equal(result, expected) # force bool dtype @@ -233,8 +234,12 @@ def test_coerce_to_numpy_array(): # with missing values will raise error arr = pd.array([True, False, None], dtype="boolean") msg = ( - "cannot convert to 'bool'-dtype NumPy array with missing values. " - "Specify an appropriate 'na_value' for this dtype." + "^cannot convert to 'bool'-dtype NumPy array with missing values.\n" + "Please either:\n" + "- convert to 'float'\n" + "- convert to 'object'\n" + "- specify an appropriate 'na_value' for this dtype\n" + "for this dtype.\n$" ) with pytest.raises(ValueError, match=msg): np.array(arr, dtype="bool") @@ -260,16 +265,17 @@ def test_to_boolean_array_from_strings_invalid_string(): @pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) def test_to_numpy(box): con = pd.Series if box else pd.array - # default (with or without missing values) -> object dtype + # default (with or without missing values) -> bool dtype arr = con([True, False, True], dtype="boolean") result = arr.to_numpy() - expected = np.array([True, False, True], dtype="object") + expected = np.array([True, False, True], dtype="bool") tm.assert_numpy_array_equal(result, expected) arr = con([True, False, None], dtype="boolean") - result = arr.to_numpy() - expected = np.array([True, False, pd.NA], dtype="object") - tm.assert_numpy_array_equal(result, expected) + with pytest.raises( + ValueError, match="specify an appropriate 'na_value' for this dtype" + ): + arr.to_numpy() arr = con([True, False, None], dtype="boolean") result = arr.to_numpy(dtype="str") @@ -304,11 +310,13 @@ def test_to_numpy(box): expected = np.array([1, 0, np.nan], dtype="float64") tm.assert_numpy_array_equal(result, expected) - # converting to int or float without specifying na_value raises + # converting to int without specifying na_value raises with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"): arr.to_numpy(dtype="int64") - with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"): - arr.to_numpy(dtype="float64") + # converting to float without specifying na_value converts NA to nan + result = arr.to_numpy(dtype="float64") + expected = np.array([1, 0, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) def test_to_numpy_copy(): diff --git a/pandas/tests/arrays/floating/test_to_numpy.py b/pandas/tests/arrays/floating/test_to_numpy.py index 2ed52439adf53..61994d0dec3a3 100644 --- a/pandas/tests/arrays/floating/test_to_numpy.py +++ b/pandas/tests/arrays/floating/test_to_numpy.py @@ -10,15 +10,15 @@ def test_to_numpy(box): con = pd.Series if box else pd.array - # default (with or without missing values) -> object dtype + # default (with or without missing values) -> float64 dtype arr = con([0.1, 0.2, 0.3], dtype="Float64") result = arr.to_numpy() - expected = np.array([0.1, 0.2, 0.3], dtype="object") + expected = np.array([0.1, 0.2, 0.3], dtype="float64") tm.assert_numpy_array_equal(result, expected) arr = con([0.1, 0.2, None], dtype="Float64") result = arr.to_numpy() - expected = np.array([0.1, 0.2, pd.NA], dtype="object") + expected = np.array([0.1, 0.2, np.nan], dtype="float64") tm.assert_numpy_array_equal(result, expected) @@ -33,8 +33,9 @@ def test_to_numpy_float(box): tm.assert_numpy_array_equal(result, expected) arr = con([0.1, 0.2, None], dtype="Float64") - with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"): - result = arr.to_numpy(dtype="float64") + result = arr.to_numpy(dtype="float64") + expected = np.array([0.1, 0.2, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) # need to explicitly specify na_value result = arr.to_numpy(dtype="float64", na_value=np.nan) @@ -100,7 +101,18 @@ def test_to_numpy_dtype(box, dtype): tm.assert_numpy_array_equal(result, expected) -@pytest.mark.parametrize("dtype", ["float64", "float32", "int32", "int64", "bool"]) +@pytest.mark.parametrize("dtype", ["float64", "float32"]) +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy_na_doesnt_raise(box, dtype): + # https://github.com/pandas-dev/pandas/issues/48891 + con = pd.Series if box else pd.array + arr = con([0.0, 1.0, None], dtype="Float64") + result = arr.to_numpy(dtype=dtype) + expected = np.array([0.0, 1.0, np.nan], dtype=dtype) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["int32", "int64", "bool"]) @pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) def test_to_numpy_na_raises(box, dtype): con = pd.Series if box else pd.array diff --git a/pandas/tests/arrays/integer/test_construction.py b/pandas/tests/arrays/integer/test_construction.py index 43ef46ddfb581..6c6dc52d7362a 100644 --- a/pandas/tests/arrays/integer/test_construction.py +++ b/pandas/tests/arrays/integer/test_construction.py @@ -37,7 +37,7 @@ def test_from_dtype_from_float(data): # from int / list expected = pd.Series(data) - result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) + result = pd.Series(np.array(data, dtype=object).tolist(), dtype=str(dtype)) tm.assert_series_equal(result, expected) # from int / array diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py index 1566476c32989..fb38e484efbd2 100644 --- a/pandas/tests/arrays/integer/test_dtypes.py +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -89,7 +89,7 @@ def test_astype_index(all_data, dropna): other = all_data dtype = all_data.dtype - idx = pd.Index._with_infer(np.array(other)) + idx = pd.Index._with_infer(np.array(other, dtype=object)) assert isinstance(idx, ABCIndex) result = idx.astype(dtype) @@ -143,7 +143,7 @@ def test_astype(all_data): # coerce to object s = pd.Series(mixed) result = s.astype("object") - expected = pd.Series(np.asarray(mixed)) + expected = pd.Series(np.asarray(mixed, dtype=object)) tm.assert_series_equal(result, expected) @@ -274,13 +274,22 @@ def test_to_numpy_dtype(dtype, in_series): tm.assert_numpy_array_equal(result, expected) -@pytest.mark.parametrize("dtype", ["float64", "int64", "bool"]) +@pytest.mark.parametrize("dtype", ["int64", "bool"]) def test_to_numpy_na_raises(dtype): a = pd.array([0, 1, None], dtype="Int64") with pytest.raises(ValueError, match=dtype): a.to_numpy(dtype=dtype) +@pytest.mark.parametrize("dtype", ["float64"]) +def test_to_numpy_na_doesnt_raise(dtype): + # https://github.com/pandas-dev/pandas/issues/48891 + a = pd.array([0, 1, None], dtype="Int64") + result = a.to_numpy(dtype=dtype) + expected = np.array([0.0, 1.0, np.nan]) + tm.assert_numpy_array_equal(result, expected) + + def test_astype_str(): a = pd.array([1, 2, None], dtype="Int64") expected = np.array(["1", "2", ""], dtype=f"{tm.ENDIAN}U21") diff --git a/pandas/tests/arrays/masked_shared.py b/pandas/tests/arrays/masked_shared.py index 6174ae0a3c19b..aa02490630b1c 100644 --- a/pandas/tests/arrays/masked_shared.py +++ b/pandas/tests/arrays/masked_shared.py @@ -130,7 +130,7 @@ def test_ufunc_with_out(self, dtype): # result |= mask worked because mask could be cast losslessly to # boolean ndarray. mask2 can't, so this raises result = np.zeros(3, dtype=bool) - msg = "Specify an appropriate 'na_value' for this dtype" + msg = "specify an appropriate 'na_value' for this dtype" with pytest.raises(ValueError, match=msg): result |= mask2 From dd3581fe9c60cd7fe9cdf8b9372f96ce06583ace Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 5 Oct 2022 09:19:26 +0100 Subject: [PATCH 2/5] fix test --- pandas/tests/extension/base/interface.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 3e8a754c8c527..3f6c3c492cbf9 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from pandas.core.dtypes.common import is_extension_array_dtype from pandas.core.dtypes.dtypes import ExtensionDtype @@ -60,8 +61,8 @@ def test_memory_usage(self, data): assert result == s.nbytes def test_array_interface(self, data): - result = np.array(data) - assert result[0] == data[0] + with pytest.raises(ValueError, match="specify an appropriate 'na_value' for this dtype"): + np.array(data) result = np.array(data, dtype=object) expected = np.array(list(data), dtype=object) From 37c39910e2e920a0c4027614337bf0d46e0ad086 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 5 Oct 2022 11:04:03 +0100 Subject: [PATCH 3/5] fixup some more tests --- pandas/core/dtypes/missing.py | 2 +- pandas/core/internals/managers.py | 2 +- pandas/tests/base/test_conversion.py | 23 ++++++++++++++++++----- pandas/tests/extension/base/casting.py | 6 +++--- pandas/tests/extension/base/interface.py | 5 +---- 5 files changed, 24 insertions(+), 14 deletions(-) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 375d05bdf11ff..e3f046de1ff65 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -588,7 +588,7 @@ def _array_equivalent_object(left: np.ndarray, right: np.ndarray, strict_nan: bo return False else: try: - if np.any(np.asarray(left_value != right_value)): + if np.any(np.asarray(left_value != right_value, dtype=object)): return False except TypeError as err: if "boolean value of NA is ambiguous" in str(err): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5a398533e1510..1183d6a313ae6 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1742,7 +1742,7 @@ def as_array( # error: Item "ndarray" of "Union[ndarray, ExtensionArray]" has no # attribute "to_numpy" arr = blk.values.to_numpy( # type: ignore[union-attr] - dtype=dtype, + dtype=dtype or 'object', na_value=na_value, ).reshape(blk.shape) else: diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 599aaae4d3527..8936b6eb9d4e9 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -288,7 +288,6 @@ def test_array_multiindex_raises(): pd.core.arrays.period_array(["2000", "2001"], freq="D"), np.array([pd.Period("2000", freq="D"), pd.Period("2001", freq="D")]), ), - (pd.array([0, np.nan], dtype="Int64"), np.array([0, pd.NA], dtype=object)), ( IntervalArray.from_breaks([0, 1, 2]), np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object), @@ -340,10 +339,6 @@ def test_to_numpy(arr, expected, index_or_series_or_array, request): with tm.assert_produces_warning(warn): thing = box(arr) - if arr.dtype.name == "int64" and box is pd.array: - mark = pytest.mark.xfail(reason="thing is Int64 and to_numpy() returns object") - request.node.add_marker(mark) - result = thing.to_numpy() tm.assert_numpy_array_equal(result, expected) @@ -351,6 +346,24 @@ def test_to_numpy(arr, expected, index_or_series_or_array, request): tm.assert_numpy_array_equal(result, expected) +def test_to_numpy_with_error(index_or_series_or_array): + # https://github.com/pandas-dev/pandas/issues/48891 + arr, expected = (pd.array([0, np.nan], dtype="Int64"), np.array([0, pd.NA], dtype=object)) + box = index_or_series_or_array + + warn = None + if index_or_series_or_array is pd.Index and isinstance(arr, SparseArray): + warn = FutureWarning + with tm.assert_produces_warning(warn): + thing = box(arr) + + with pytest.raises(ValueError, match="specify an appropriate 'na_value' for this dtype"): + thing.to_numpy() + + with pytest.raises(ValueError, match="specify an appropriate 'na_value' for this dtype"): + np.asarray(thing) + + @pytest.mark.parametrize("as_series", [True, False]) @pytest.mark.parametrize( "arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)] diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py index 0eb8123e6bdb8..5fd98ce2b60ef 100644 --- a/pandas/tests/extension/base/casting.py +++ b/pandas/tests/extension/base/casting.py @@ -63,12 +63,12 @@ def test_astype_string(self, data, nullable_string_dtype): self.assert_series_equal(result, expected) def test_to_numpy(self, data): - expected = np.asarray(data) + expected = np.asarray(data, dtype=object) - result = data.to_numpy() + result = data.to_numpy(dtype=object) self.assert_equal(result, expected) - result = pd.Series(data).to_numpy() + result = pd.Series(data).to_numpy(dtype=object) self.assert_equal(result, expected) def test_astype_empty_dataframe(self, dtype): diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 3f6c3c492cbf9..64939558a85cf 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -61,10 +61,7 @@ def test_memory_usage(self, data): assert result == s.nbytes def test_array_interface(self, data): - with pytest.raises(ValueError, match="specify an appropriate 'na_value' for this dtype"): - np.array(data) - - result = np.array(data, dtype=object) + result = np.array(data) expected = np.array(list(data), dtype=object) tm.assert_numpy_array_equal(result, expected) From b3a44e851c9c68cb4dcf4532c3205c38eb48c25a Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 5 Oct 2022 12:16:55 +0100 Subject: [PATCH 4/5] fixup tests, temp workaround for .where --- pandas/core/generic.py | 6 ++++-- pandas/core/reshape/merge.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3df5d2aaf9896..ac5132e95477b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9734,10 +9734,12 @@ def _where( other = other._values if axis == 0: other = np.reshape(other, (-1, 1)) + ind = np.arange(other.shape[1]).repeat(self.shape[1]) + other = other.take(ind, axis=1) elif axis == 1: other = np.reshape(other, (1, -1)) - - other = np.broadcast_to(other, self.shape) + ind = np.arange(other.shape[0]).repeat(self.shape[0]) + other = other.take(ind, axis=0) # slice me out of the other else: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index b05340fcb96a5..803eef6e2370c 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2043,7 +2043,7 @@ def injection(obj): # np.array([0, 255, 255], dtype=np.uint8) # and the non-injectivity should make a difference somehow # shouldn't it? - return np.asarray(obj) + return np.asarray(obj, dtype=object) xs = [injection(x) for x in xs] labels = list(string.ascii_lowercase[: len(xs)]) From 1005378fe2b65ed27e81827552bd98a94731faec Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 5 Oct 2022 13:30:22 +0100 Subject: [PATCH 5/5] change behaviour for DataFrame.to_numpy as well --- pandas/core/internals/managers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 1183d6a313ae6..65eaba8cf2638 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1778,14 +1778,13 @@ def _interleave( dtype = interleaved_dtype( # type: ignore[assignment] [blk.dtype for blk in self.blocks] ) - # TODO: https://github.com/pandas-dev/pandas/issues/22791 # Give EAs some input on what happens here. Sparse needs this. if isinstance(dtype, SparseDtype): dtype = dtype.subtype dtype = cast(np.dtype, dtype) elif isinstance(dtype, ExtensionDtype): - dtype = np.dtype("object") + dtype = dtype.type elif is_dtype_equal(dtype, str): dtype = np.dtype("object")