diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 94bb265c32e4c..311f240d5f08b 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -51,7 +51,7 @@ For example: Other enhancements ^^^^^^^^^^^^^^^^^^ - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`) -- +- :meth:`DataFrame.fillna` can fill NA values column-wise with a dictionary or :class:`Series` (:issue:`4514`) - diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 520023050d49d..70d714e813085 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6134,20 +6134,25 @@ def fillna( ) elif isinstance(value, (dict, ABCSeries)): + temp_data = self if inplace else self.copy() + if axis == 1: - raise NotImplementedError( - "Currently only can fill " - "with dict/Series column " - "by column" - ) + for i, item in enumerate(temp_data.items()): + label, content = item + temp_data.iloc[:, i] = content.fillna( + value, limit=limit, inplace=False, downcast=downcast + ) + else: + for i, item in enumerate(temp_data.items()): + label, content = item + if label not in value: + continue + temp_data.iloc[:, i] = content.fillna( + value[label], limit=limit, inplace=False, downcast=downcast + ) - result = self if inplace else self.copy() - for k, v in value.items(): - if k not in result: - continue - obj = result[k] - obj.fillna(v, limit=limit, inplace=True, downcast=downcast) - return result if not inplace else None + temp_data = temp_data.infer_objects() + new_data = temp_data._mgr elif not is_list_like(value): new_data = self._mgr.fillna( diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 9bf5d24085697..045f00a17933f 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -618,10 +618,6 @@ def test_fillna_dict_series(self): expected = df.fillna(df.max().to_dict()) tm.assert_frame_equal(result, expected) - # disable this for now - with pytest.raises(NotImplementedError, match="column by column"): - df.fillna(df.max(1), axis=1) - def test_fillna_dataframe(self): # GH 8377 df = DataFrame( @@ -710,3 +706,94 @@ def test_fill_corner(self, float_frame, float_string_frame): # TODO(wesm): unused? result = empty_float.fillna(value=0) # noqa + + @pytest.mark.parametrize( + "expected,fill_value", + [ + ( + DataFrame( + [[100, 100], [200, 4], [5, 6]], columns=list("AB"), dtype="float64" + ), + Series([100, 200, 300]), + ), + ( + DataFrame( + [[100, 100], [np.nan, 4], [5, 6]], + columns=list("AB"), + dtype="float64", + ), + {0: 100, 2: 300, 3: 400}, + ), + ], + ) + def test_fillna_column_wise(self, expected, fill_value): + # GH 4514 + df = DataFrame([[np.nan, np.nan], [np.nan, 4], [5, 6]], columns=list("AB")) + result = df.fillna(fill_value, axis=1) + tm.assert_frame_equal(expected, result) + + def test_fillna_column_wise_downcast(self): + # GH 4514 + df = DataFrame([[np.nan, 2], [3, np.nan], [np.nan, np.nan]], columns=list("AB")) + s = Series([100, 200, 300]) + + expected = DataFrame( + [[100, 2], [3, 200], [300, 300]], columns=list("AB"), dtype="int64" + ) + result = df.fillna(s, axis=1, downcast="infer") + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize( + "fill_value", [Series([100, 200, 300]), {0: 100, 2: 300, 3: 400}] + ) + def test_fillna_column_wise_inplace(self, fill_value): + # GH 4514 + df = DataFrame([[np.nan, np.nan], [np.nan, 4], [5, 6]], columns=list("AB")) + expected = df.fillna(fill_value, axis=1, inplace=False) + df.fillna(fill_value, axis=1, inplace=True) + tm.assert_frame_equal(expected, df) + + @pytest.mark.parametrize( + "fill_value", + [Series([100, 200, 300], index=[0, 1, 2]), {0: 100, 1: 200, 2: 300}], + ) + def test_fillna_column_wise_duplicated_with_series_dict(self, fill_value): + # GH 4514 + df = DataFrame( + [[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]], + columns=list("ABB"), + index=[0, 0, 1], + ) + expected = DataFrame( + [[100, 100, 3], [100, 5, 100], [7, 200, 200]], + columns=list("ABB"), + index=[0, 0, 1], + dtype="float64", + ) + + result = df.fillna(fill_value, axis=1) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "fill_value", + [ + Series([100, 200, 300], index=["A", "B", "C"]), + {"A": 100, "B": 200, "C": 300}, + ], + ) + def test_fillna_duplicated_with_series_dict(self, fill_value): + # GH 4514 + df = DataFrame( + [[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]], + columns=list("ABB"), + index=[0, 0, 1], + ) + expected = DataFrame( + [[100, 200, 3], [100, 5, 200], [7, 200, 200]], + columns=list("ABB"), + index=[0, 0, 1], + dtype="float64", + ) + + result = df.fillna(fill_value) + tm.assert_frame_equal(result, expected)