diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 689a067e1c211..b65043be6fda6 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -796,7 +796,6 @@ def replace( It is used in ObjectBlocks. It is here for API compatibility. """ inplace = validate_bool_kwarg(inplace, "inplace") - original_to_replace = to_replace if not self._can_hold_element(to_replace): # We cannot hold `to_replace`, so we know immediately that @@ -814,9 +813,20 @@ def replace( return [self] if inplace else [self.copy()] if not self._can_hold_element(value): - blk = self.astype(object) + if self.ndim == 2 and self.shape[0] > 1: + # split so that we only upcast where necessary + nbs = self._split() + res_blocks = extend_blocks( + [ + blk.replace(to_replace, value, inplace=inplace, regex=regex) + for blk in nbs + ] + ) + return res_blocks + + blk = self.coerce_to_target_dtype(value) return blk.replace( - to_replace=original_to_replace, + to_replace=to_replace, value=value, inplace=True, regex=regex, @@ -824,7 +834,7 @@ def replace( blk = self if inplace else self.copy() putmask_inplace(blk.values, mask, value) - blocks = blk.convert(numeric=False, copy=not inplace) + blocks = blk.convert(numeric=False, copy=False) return blocks @final @@ -867,11 +877,7 @@ def _replace_regex( replace_regex(new_values, rx, value, mask) block = self.make_block(new_values) - if convert: - nbs = block.convert(numeric=False) - else: - nbs = [block] - return nbs + return [block] @final def _replace_list( diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 58016be82c405..564481d01abc8 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -265,12 +265,13 @@ def test_fillna_dtype_conversion(self): expected = DataFrame("nan", index=range(3), columns=["A", "B"]) tm.assert_frame_equal(result, expected) - # equiv of replace + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) object upcasting + @pytest.mark.parametrize("val", ["", 1, np.nan, 1.0]) + def test_fillna_dtype_conversion_equiv_replace(self, val): df = DataFrame({"A": [1, np.nan], "B": [1.0, 2.0]}) - for v in ["", 1, np.nan, 1.0]: - expected = df.replace(np.nan, v) - result = df.fillna(v) - tm.assert_frame_equal(result, expected) + expected = df.replace(np.nan, val) + result = df.fillna(val) + tm.assert_frame_equal(result, expected) @td.skip_array_manager_invalid_test def test_fillna_datetime_columns(self): diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 9ae5bb151b685..6d1e90e2f9646 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -783,6 +783,8 @@ def test_replace_mixed(self, float_string_frame): tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result.replace(-1e8, np.nan), float_string_frame) + def test_replace_mixed_int_block_upcasting(self): + # int block upcasting df = DataFrame( { @@ -803,6 +805,8 @@ def test_replace_mixed(self, float_string_frame): assert return_value is None tm.assert_frame_equal(df, expected) + def test_replace_mixed_int_block_splitting(self): + # int block splitting df = DataFrame( { @@ -821,6 +825,8 @@ def test_replace_mixed(self, float_string_frame): result = df.replace(0, 0.5) tm.assert_frame_equal(result, expected) + def test_replace_mixed2(self): + # to object block upcasting df = DataFrame( { @@ -846,6 +852,7 @@ def test_replace_mixed(self, float_string_frame): result = df.replace([1, 2], ["foo", "bar"]) tm.assert_frame_equal(result, expected) + def test_replace_mixed3(self): # test case from df = DataFrame( {"A": Series([3, 0], dtype="int64"), "B": Series([0, 3], dtype="int64")}