Skip to content

Commit 5b3c363

Browse files
committed
ENH:column-wise DataFrame.fillna and duplicated DataFrame.fillna with Series and Dict (#30922)
1 parent 7673357 commit 5b3c363

File tree

3 files changed

+109
-16
lines changed

3 files changed

+109
-16
lines changed

doc/source/whatsnew/v1.1.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ Other enhancements
8787
- Positional slicing on a :class:`IntervalIndex` now supports slices with ``step > 1`` (:issue:`31658`)
8888
- :class:`Series.str` now has a `fullmatch` method that matches a regular expression against the entire string in each row of the series, similar to `re.fullmatch` (:issue:`32806`).
8989
- :meth:`DataFrame.sample` will now also allow array-like and BitGenerator objects to be passed to ``random_state`` as seeds (:issue:`32503`)
90+
- :meth:`DataFrame.fillna` can fill NA values column-wise with a dictionary or :class:`Series` (:issue:`4514`)
9091
-
9192

9293
.. ---------------------------------------------------------------------------

pandas/core/generic.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6087,20 +6087,25 @@ def fillna(
60876087
)
60886088

60896089
elif isinstance(value, (dict, ABCSeries)):
6090+
temp_data = self if inplace else self.copy()
6091+
60906092
if axis == 1:
6091-
raise NotImplementedError(
6092-
"Currently only can fill "
6093-
"with dict/Series column "
6094-
"by column"
6095-
)
6093+
for i, item in enumerate(temp_data.items()):
6094+
label, content = item
6095+
temp_data.iloc[:, i] = content.fillna(
6096+
value, limit=limit, inplace=False, downcast=downcast
6097+
)
6098+
else:
6099+
for i, item in enumerate(temp_data.items()):
6100+
label, content = item
6101+
if label not in value:
6102+
continue
6103+
temp_data.iloc[:, i] = content.fillna(
6104+
value[label], limit=limit, inplace=False, downcast=downcast
6105+
)
60966106

6097-
result = self if inplace else self.copy()
6098-
for k, v in value.items():
6099-
if k not in result:
6100-
continue
6101-
obj = result[k]
6102-
obj.fillna(v, limit=limit, inplace=True, downcast=downcast)
6103-
return result if not inplace else None
6107+
temp_data = temp_data.infer_objects()
6108+
new_data = temp_data._mgr
61046109

61056110
elif not is_list_like(value):
61066111
new_data = self._data.fillna(

pandas/tests/frame/test_missing.py

Lines changed: 91 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -602,10 +602,6 @@ def test_fillna_dict_series(self):
602602
expected = df.fillna(df.max().to_dict())
603603
tm.assert_frame_equal(result, expected)
604604

605-
# disable this for now
606-
with pytest.raises(NotImplementedError, match="column by column"):
607-
df.fillna(df.max(1), axis=1)
608-
609605
def test_fillna_dataframe(self):
610606
# GH 8377
611607
df = DataFrame(
@@ -694,3 +690,94 @@ def test_fill_corner(self, float_frame, float_string_frame):
694690

695691
# TODO(wesm): unused?
696692
result = empty_float.fillna(value=0) # noqa
693+
694+
@pytest.mark.parametrize(
695+
"expected,fill_value",
696+
[
697+
(
698+
DataFrame(
699+
[[100, 100], [200, 4], [5, 6]], columns=list("AB"), dtype="float64"
700+
),
701+
Series([100, 200, 300]),
702+
),
703+
(
704+
DataFrame(
705+
[[100, 100], [np.nan, 4], [5, 6]],
706+
columns=list("AB"),
707+
dtype="float64",
708+
),
709+
{0: 100, 2: 300, 3: 400},
710+
),
711+
],
712+
)
713+
def test_fillna_column_wise(self, expected, fill_value):
714+
# GH 4514
715+
df = DataFrame([[np.nan, np.nan], [np.nan, 4], [5, 6]], columns=list("AB"))
716+
result = df.fillna(fill_value, axis=1)
717+
tm.assert_frame_equal(expected, result)
718+
719+
def test_fillna_column_wise_downcast(self):
720+
# GH 4514
721+
df = DataFrame([[np.nan, 2], [3, np.nan], [np.nan, np.nan]], columns=list("AB"))
722+
s = Series([100, 200, 300])
723+
724+
expected = DataFrame(
725+
[[100, 2], [3, 200], [300, 300]], columns=list("AB"), dtype="int64"
726+
)
727+
result = df.fillna(s, axis=1, downcast="infer")
728+
tm.assert_frame_equal(expected, result)
729+
730+
@pytest.mark.parametrize(
731+
"fill_value", [Series([100, 200, 300]), {0: 100, 2: 300, 3: 400}]
732+
)
733+
def test_fillna_column_wise_inplace(self, fill_value):
734+
# GH 4514
735+
df = DataFrame([[np.nan, np.nan], [np.nan, 4], [5, 6]], columns=list("AB"))
736+
expected = df.fillna(fill_value, axis=1, inplace=False)
737+
df.fillna(fill_value, axis=1, inplace=True)
738+
tm.assert_frame_equal(expected, df)
739+
740+
@pytest.mark.parametrize(
741+
"fill_value",
742+
[Series([100, 200, 300], index=[0, 1, 2]), {0: 100, 1: 200, 2: 300}],
743+
)
744+
def test_fillna_column_wise_duplicated_with_series_dict(self, fill_value):
745+
# GH 4514
746+
df = DataFrame(
747+
[[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]],
748+
columns=list("ABB"),
749+
index=[0, 0, 1],
750+
)
751+
expected = DataFrame(
752+
[[100, 100, 3], [100, 5, 100], [7, 200, 200]],
753+
columns=list("ABB"),
754+
index=[0, 0, 1],
755+
dtype="float64",
756+
)
757+
758+
result = df.fillna(fill_value, axis=1)
759+
tm.assert_frame_equal(result, expected)
760+
761+
@pytest.mark.parametrize(
762+
"fill_value",
763+
[
764+
Series([100, 200, 300], index=["A", "B", "C"]),
765+
{"A": 100, "B": 200, "C": 300},
766+
],
767+
)
768+
def test_fillna_duplicated_with_series_dict(self, fill_value):
769+
# GH 4514
770+
df = DataFrame(
771+
[[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]],
772+
columns=list("ABB"),
773+
index=[0, 0, 1],
774+
)
775+
expected = DataFrame(
776+
[[100, 200, 3], [100, 5, 200], [7, 200, 200]],
777+
columns=list("ABB"),
778+
index=[0, 0, 1],
779+
dtype="float64",
780+
)
781+
782+
result = df.fillna(fill_value)
783+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)