From b75fe0a785fbaeaa7bd2334c58c250f349703357 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 5 Aug 2020 16:13:20 -0700 Subject: [PATCH 1/3] BUG: df.shift(n, axis=1) with multiple blocks --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/internals/managers.py | 25 ++++++++++++++++++++-- pandas/tests/frame/methods/test_shift.py | 27 ++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index b16ca0a80c5b4..a83f0b86ba50b 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -157,7 +157,7 @@ ExtensionArray Other ^^^^^ -- +- Bug in :meth:`DataFrame.shift` with ``axis=1`` and heterogeneous dtypes (:issue:`35488`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 895385b170c91..c4671c84a2944 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -551,6 +551,24 @@ def interpolate(self, **kwargs) -> "BlockManager": return self.apply("interpolate", **kwargs) def shift(self, periods: int, axis: int, fill_value) -> "BlockManager": + if axis == 0 and self.ndim == 2 and self.nblocks > 1: + # GH#35488 we need to watch out for multi-block cases + ncols = self.shape[0] + if periods > 0: + indexer = [-1] * periods + list(range(ncols - periods)) + else: + nper = abs(periods) + indexer = list(range(nper, ncols)) + [-1] * nper + result = self.reindex_indexer( + self.items, + indexer, + axis=0, + fill_value=fill_value, + allow_dups=True, + consolidate=False, + ) + return result + return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value) def fillna(self, value, limit, inplace: bool, downcast) -> "BlockManager": @@ -1213,6 +1231,7 @@ def reindex_indexer( fill_value=None, allow_dups: bool = False, copy: bool = True, + consolidate: bool = True, ) -> T: """ Parameters @@ -1223,7 +1242,8 @@ def reindex_indexer( fill_value : object, default None allow_dups : bool, default False copy : bool, default True - + consolidate: bool, default True + Whether to consolidate inplace before reindexing. pandas-indexer with -1's only. """ @@ -1236,7 +1256,8 @@ def reindex_indexer( result.axes[axis] = new_axis return result - self._consolidate_inplace() + if consolidate: + self._consolidate_inplace() # some axes don't allow reindexing with dups if not allow_dups: diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index 9ec029a6c4304..8f6902eca816f 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -145,6 +145,33 @@ def test_shift_duplicate_columns(self): tm.assert_frame_equal(shifted[0], shifted[1]) tm.assert_frame_equal(shifted[0], shifted[2]) + def test_shift_axis1_multiple_blocks(self): + # GH#35488 + df1 = pd.DataFrame(np.random.randint(1000, size=(5, 3))) + df2 = pd.DataFrame(np.random.randint(1000, size=(5, 2))) + df3 = pd.concat([df1, df2], axis=1) + assert len(df3._mgr.blocks) == 2 + + result = df3.shift(2, axis=1) + + expected = df3.take([-1, -1, 0, 1, 2], axis=1) + expected.iloc[:, :2] = np.nan + expected.columns = df3.columns + + tm.assert_frame_equal(result, expected) + + # Case with periods < 0 + # rebuild df3 because `take` call above consolidated + df3 = pd.concat([df1, df2], axis=1) + assert len(df3._mgr.blocks) == 2 + result = df3.shift(-2, axis=1) + + expected = df3.take([2, 3, 4, -1, -1], axis=1) + expected.iloc[:, -2:] = np.nan + expected.columns = df3.columns + + tm.assert_frame_equal(result, expected) + @pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning") def test_tshift(self, datetime_frame): # TODO: remove this test when tshift deprecation is enforced From fa536d133ec23a6aa5382b9071fda4ccfde7b5ce Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 6 Aug 2020 09:00:05 -0700 Subject: [PATCH 2/3] move whatsnew --- doc/source/whatsnew/v1.1.1.rst | 3 +++ doc/source/whatsnew/v1.2.0.rst | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.1.rst b/doc/source/whatsnew/v1.1.1.rst index 6a327a4fc732f..675261861828c 100644 --- a/doc/source/whatsnew/v1.1.1.rst +++ b/doc/source/whatsnew/v1.1.1.rst @@ -51,6 +51,9 @@ Categorical - +**Other** +- Bug in :meth:`DataFrame.shift` with ``axis=1`` and heterogeneous dtypes (:issue:`35488`) + .. --------------------------------------------------------------------------- .. _whatsnew_111.contributors: diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index a83f0b86ba50b..a5c2a1fbd5339 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -157,7 +157,8 @@ ExtensionArray Other ^^^^^ -- Bug in :meth:`DataFrame.shift` with ``axis=1`` and heterogeneous dtypes (:issue:`35488`) + +- - .. --------------------------------------------------------------------------- From ca0b21d77d761062be1abb4f7888395f8fc09cb1 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 6 Aug 2020 09:00:43 -0700 Subject: [PATCH 3/3] move note to regressions section --- doc/source/whatsnew/v1.1.1.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.1.1.rst b/doc/source/whatsnew/v1.1.1.rst index 675261861828c..3d585b67953b0 100644 --- a/doc/source/whatsnew/v1.1.1.rst +++ b/doc/source/whatsnew/v1.1.1.rst @@ -16,7 +16,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression where :func:`read_csv` would raise a ``ValueError`` when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`35493`). -- +- Fixed regression in :meth:`DataFrame.shift` with ``axis=1`` and heterogeneous dtypes (:issue:`35488`) - .. --------------------------------------------------------------------------- @@ -51,8 +51,6 @@ Categorical - -**Other** -- Bug in :meth:`DataFrame.shift` with ``axis=1`` and heterogeneous dtypes (:issue:`35488`) .. ---------------------------------------------------------------------------