From fcd29edbe5c6c6257f9650676e4b4e94ecbecebb Mon Sep 17 00:00:00 2001 From: Shantanu Gontia Date: Wed, 15 May 2019 23:51:37 +0530 Subject: [PATCH 1/7] bugfix 26390 assigning pandas array to column works --- pandas/core/frame.py | 7 ++++++- pandas/core/internals/blocks.py | 1 + pandas/tests/internals/test_internals.py | 9 +++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6ec36c62f0be8..a6230127f1be1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -70,7 +70,7 @@ is_sequence, is_named_tuple) from pandas.core.dtypes.generic import ( - ABCSeries, ABCDataFrame, ABCIndexClass, ABCMultiIndex) + ABCSeries, ABCDataFrame, ABCIndexClass, ABCMultiIndex, ABCPandasArray) from pandas.core.dtypes.missing import isna, notna from pandas.core import algorithms @@ -3620,6 +3620,11 @@ def reindexer(value): value = cast_scalar_to_array(len(self.index), value) value = maybe_cast_to_datetime(value, infer_dtype) + # convert pandas array to numpy array + if isinstance(value, ABCPandasArray): + value = value.to_numpy() + return np.atleast_2d(np.asarray(value)) + # return internal types directly if is_extension_type(value) or is_extension_array_dtype(value): return value diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 0c49ebb55acdd..b810e5f5c0fc4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -3035,6 +3035,7 @@ def make_block(values, placement, klass=None, ndim=None, dtype=None, # For now, blocks should be backed by ndarrays when possible. if isinstance(values, ABCPandasArray): values = values.to_numpy() + if isinstance(dtype, PandasDtype): dtype = dtype.numpy_dtype diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index fbd821f8ec342..85c268da8ed2d 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1310,3 +1310,12 @@ def test_make_block_no_pandas_array(): result = make_block(arr.to_numpy(), slice(len(arr)), dtype=arr.dtype) assert result.is_integer is True assert result.is_extension is False + + +def test_add_column_with_pandas_array(): + # GH 26390 + df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': ['a', 'b', 'c', 'd']}) + df['c'] = pd.array([1, 2, None, 3]) + df2 = pd.DataFrame({'a': [1, 2, 3, 4], 'b': ['a', 'b', 'c', 'd'], + 'c': pd.array([1, 2, None, 3])}) + assert_frame_equal(df, df2) From a4f6e89df93e3a5491a4bcda0794ac291642d4b4 Mon Sep 17 00:00:00 2001 From: Shantanu Gontia Date: Thu, 16 May 2019 00:50:32 +0530 Subject: [PATCH 2/7] put conversion to ndarray in make_block --- pandas/core/frame.py | 5 ----- pandas/core/internals/blocks.py | 2 ++ pandas/tests/internals/test_internals.py | 5 ++++- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a6230127f1be1..09ebda89061f3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3620,11 +3620,6 @@ def reindexer(value): value = cast_scalar_to_array(len(self.index), value) value = maybe_cast_to_datetime(value, infer_dtype) - # convert pandas array to numpy array - if isinstance(value, ABCPandasArray): - value = value.to_numpy() - return np.atleast_2d(np.asarray(value)) - # return internal types directly if is_extension_type(value) or is_extension_array_dtype(value): return value diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b810e5f5c0fc4..8e47467538dd4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -3035,6 +3035,8 @@ def make_block(values, placement, klass=None, ndim=None, dtype=None, # For now, blocks should be backed by ndarrays when possible. if isinstance(values, ABCPandasArray): values = values.to_numpy() + if ndim and ndim > 1: + values = np.atleast_2d(values) if isinstance(dtype, PandasDtype): dtype = dtype.numpy_dtype diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 85c268da8ed2d..479b01571f909 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -17,7 +17,8 @@ SparseArray) import pandas.core.algorithms as algos from pandas.core.arrays import DatetimeArray, TimedeltaArray -from pandas.core.internals import BlockManager, SingleBlockManager, make_block +from pandas.core.internals import ( + BlockManager, ObjectBlock, SingleBlockManager, make_block) import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal, randn) @@ -1318,4 +1319,6 @@ def test_add_column_with_pandas_array(): df['c'] = pd.array([1, 2, None, 3]) df2 = pd.DataFrame({'a': [1, 2, 3, 4], 'b': ['a', 'b', 'c', 'd'], 'c': pd.array([1, 2, None, 3])}) + assert(df2['c']._data.blocks[0].__class__ == ObjectBlock) + assert(df['c']._data.blocks[0].__class__ == ObjectBlock) assert_frame_equal(df, df2) From b3da29f973ba6d96237f8b1cc445b45d021daf93 Mon Sep 17 00:00:00 2001 From: Shantanu Gontia Date: Thu, 16 May 2019 01:23:06 +0530 Subject: [PATCH 3/7] fixed imports --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 09ebda89061f3..6ec36c62f0be8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -70,7 +70,7 @@ is_sequence, is_named_tuple) from pandas.core.dtypes.generic import ( - ABCSeries, ABCDataFrame, ABCIndexClass, ABCMultiIndex, ABCPandasArray) + ABCSeries, ABCDataFrame, ABCIndexClass, ABCMultiIndex) from pandas.core.dtypes.missing import isna, notna from pandas.core import algorithms From 8a4c87f66972825eb7a4f0dcf26af83f95be6499 Mon Sep 17 00:00:00 2001 From: Shantanu Gontia Date: Thu, 16 May 2019 22:05:17 +0530 Subject: [PATCH 4/7] added release note message --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index d38ee7b8b589a..70e8a6baad337 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -475,7 +475,7 @@ Other - Removed unused C functions from vendored UltraJSON implementation (:issue:`26198`) - Bug in :func:`factorize` when passing an ``ExtensionArray`` with a custom ``na_sentinel`` (:issue:`25696`). - Allow :class:`Index` and :class:`RangeIndex` to be passed to numpy ``min`` and ``max`` functions. - +- Fixed bug where assigning a :class:`pandas.core.arrays.numpy_.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`) .. _whatsnew_0.250.contributors: From cd1d38bce5f1d94826ea478cde713105c0a14016 Mon Sep 17 00:00:00 2001 From: Shantanu Gontia Date: Sun, 19 May 2019 07:17:51 +0530 Subject: [PATCH 5/7] changed test location to block_internals --- pandas/tests/frame/test_block_internals.py | 11 +++++++++++ pandas/tests/internals/test_internals.py | 13 +------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index a506e9ccf21d0..6fbc884829784 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -10,6 +10,7 @@ Categorical, DataFrame, Series, Timestamp, compat, date_range, option_context) from pandas.core.arrays import IntervalArray, integer_array +from pandas.core.internals import ObjectBlock from pandas.core.internals.blocks import IntBlock import pandas.util.testing as tm from pandas.util.testing import ( @@ -584,3 +585,13 @@ def test_constructor_no_pandas_array(self): expected = pd.DataFrame({"A": [1, 2, 3]}) tm.assert_frame_equal(result, expected) assert isinstance(result._data.blocks[0], IntBlock) + + def test_add_column_with_pandas_array(self): + # GH 26390 + df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': ['a', 'b', 'c', 'd']}) + df['c'] = pd.array([1, 2, None, 3]) + df2 = pd.DataFrame({'a': [1, 2, 3, 4], 'b': ['a', 'b', 'c', 'd'], + 'c': pd.array([1, 2, None, 3])}) + assert type(df['c']._data.blocks[0]) == ObjectBlock + assert type(df2['c']._data.blocks[0]) == ObjectBlock + assert_frame_equal(df, df2) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 479b01571f909..b9f221b19d044 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -18,7 +18,7 @@ import pandas.core.algorithms as algos from pandas.core.arrays import DatetimeArray, TimedeltaArray from pandas.core.internals import ( - BlockManager, ObjectBlock, SingleBlockManager, make_block) + BlockManager, SingleBlockManager, make_block) import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal, randn) @@ -1311,14 +1311,3 @@ def test_make_block_no_pandas_array(): result = make_block(arr.to_numpy(), slice(len(arr)), dtype=arr.dtype) assert result.is_integer is True assert result.is_extension is False - - -def test_add_column_with_pandas_array(): - # GH 26390 - df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': ['a', 'b', 'c', 'd']}) - df['c'] = pd.array([1, 2, None, 3]) - df2 = pd.DataFrame({'a': [1, 2, 3, 4], 'b': ['a', 'b', 'c', 'd'], - 'c': pd.array([1, 2, None, 3])}) - assert(df2['c']._data.blocks[0].__class__ == ObjectBlock) - assert(df['c']._data.blocks[0].__class__ == ObjectBlock) - assert_frame_equal(df, df2) From f1e30982c9b42598167ef3192c25ed8386293ffb Mon Sep 17 00:00:00 2001 From: Shantanu Gontia Date: Sun, 19 May 2019 07:21:34 +0530 Subject: [PATCH 6/7] changed section of release note --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 70e8a6baad337..f58b62c6e99b1 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -376,6 +376,7 @@ Indexing - Bug in :meth:`DataFrame.loc` and :meth:`Series.loc` where ``KeyError`` was not raised for a ``MultiIndex`` when the key was less than or equal to the number of levels in the :class:`MultiIndex` (:issue:`14885`). - Bug in which :meth:`DataFrame.append` produced an erroneous warning indicating that a ``KeyError`` will be thrown in the future when the data to be appended contains new columns (:issue:`22252`). - Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`). +- Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`) Missing @@ -475,7 +476,6 @@ Other - Removed unused C functions from vendored UltraJSON implementation (:issue:`26198`) - Bug in :func:`factorize` when passing an ``ExtensionArray`` with a custom ``na_sentinel`` (:issue:`25696`). - Allow :class:`Index` and :class:`RangeIndex` to be passed to numpy ``min`` and ``max`` functions. -- Fixed bug where assigning a :class:`pandas.core.arrays.numpy_.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`) .. _whatsnew_0.250.contributors: From 997c0a57d01356ea773569caa04196329ad03e87 Mon Sep 17 00:00:00 2001 From: Shantanu Gontia Date: Sun, 19 May 2019 07:33:26 +0530 Subject: [PATCH 7/7] fixed import sort order --- pandas/tests/internals/test_internals.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index b9f221b19d044..fbd821f8ec342 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -17,8 +17,7 @@ SparseArray) import pandas.core.algorithms as algos from pandas.core.arrays import DatetimeArray, TimedeltaArray -from pandas.core.internals import ( - BlockManager, SingleBlockManager, make_block) +from pandas.core.internals import BlockManager, SingleBlockManager, make_block import pandas.util.testing as tm from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, assert_series_equal, randn)