Skip to content

Commit ec012e9

Browse files
committed
ENH:column-wise DataFrame.fillna and duplicated DataFrame.fillna with Series and Dict (#30922)
1 parent ab56348 commit ec012e9

File tree

3 files changed

+109
-14
lines changed

3 files changed

+109
-14
lines changed

doc/source/whatsnew/v1.1.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ Other enhancements
4343

4444
- :class:`Styler` may now render CSS more efficiently where multiple cells have the same styling (:issue:`30876`)
4545
- When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`)
46-
-
46+
- :meth:`DataFrame.fillna` can fill NA values column-wise with a dictionary or :class:`Series` (:issue:`4514`)
4747
-
4848

4949
.. ---------------------------------------------------------------------------

pandas/core/generic.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6001,19 +6001,25 @@ def fillna(
60016001
)
60026002

60036003
elif isinstance(value, (dict, ABCSeries)):
6004+
result = self if inplace else self.copy()
6005+
60046006
if axis == 1:
6005-
raise NotImplementedError(
6006-
"Currently only can fill "
6007-
"with dict/Series column "
6008-
"by column"
6009-
)
6007+
# To access column base
6008+
result = result.T
60106009

6011-
result = self if inplace else self.copy()
6012-
for k, v in value.items():
6013-
if k not in result:
6010+
for i in range(result.columns.size):
6011+
label = result.columns[i]
6012+
6013+
if label not in value.keys():
60146014
continue
6015-
obj = result[k]
6016-
obj.fillna(v, limit=limit, inplace=True, downcast=downcast)
6015+
6016+
result.iloc[:, i] = result.iloc[:, i].fillna(
6017+
value[label], limit=limit, inplace=False, downcast=downcast
6018+
)
6019+
6020+
if axis == 1:
6021+
result = result.T
6022+
60176023
return result if not inplace else None
60186024

60196025
elif not is_list_like(value):

pandas/tests/frame/test_missing.py

Lines changed: 92 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -602,9 +602,15 @@ def test_fillna_dict_series(self):
602602
expected = df.fillna(df.max().to_dict())
603603
tm.assert_frame_equal(result, expected)
604604

605-
# disable this for now
606-
with pytest.raises(NotImplementedError, match="column by column"):
607-
df.fillna(df.max(1), axis=1)
605+
expected = DataFrame(
606+
{
607+
"a": [1.0, 1.0, 2.0, 3.0, 4.0],
608+
"b": [1.0, 2.0, 3.0, 3.0, 4.0],
609+
"c": [1.0, 1.0, 2.0, 3.0, 4.0],
610+
}
611+
)
612+
result = df.fillna(df.max(1), axis=1)
613+
tm.assert_frame_equal(expected, result)
608614

609615
def test_fillna_dataframe(self):
610616
# GH 8377
@@ -703,3 +709,86 @@ def test_fill_value_when_combine_const(self):
703709
exp = df.fillna(0).add(2)
704710
res = df.add(2, fill_value=0)
705711
tm.assert_frame_equal(res, exp)
712+
713+
@pytest.mark.parametrize(
714+
"expected,fill_value",
715+
[
716+
(
717+
DataFrame(
718+
[[100, 100], [200, 4], [5, 6]], columns=list("AB"), dtype="float64"
719+
),
720+
Series([100, 200, 300]),
721+
),
722+
(
723+
DataFrame(
724+
[[100, 100], [np.nan, 4], [5, 6]],
725+
columns=list("AB"),
726+
dtype="float64",
727+
),
728+
{0: 100, 2: 300, 3: 400},
729+
),
730+
],
731+
)
732+
def test_fillna_column_wise(self, expected, fill_value):
733+
# GH 4514
734+
df = DataFrame([[np.nan, np.nan], [np.nan, 4], [5, 6]], columns=list("AB"))
735+
result = df.fillna(fill_value, axis=1)
736+
tm.assert_frame_equal(expected, result)
737+
738+
df.fillna(fill_value, axis=1, inplace=True)
739+
tm.assert_frame_equal(expected, df)
740+
741+
def test_fillna_column_wise_downcast(self):
742+
df = DataFrame([[np.nan, 2], [3, np.nan], [np.nan, np.nan]], columns=list("AB"))
743+
s = Series([100, 200, 300])
744+
745+
expected = DataFrame(
746+
[[100, 2], [3, 200], [300, 300]], columns=list("AB"), dtype="int64"
747+
)
748+
result = df.fillna(s, axis=1, downcast="infer")
749+
tm.assert_frame_equal(expected, result)
750+
751+
@pytest.mark.parametrize(
752+
"fill_value",
753+
[Series([100, 200, 300], index=[0, 1, 2]), {0: 100, 1: 200, 2: 300}],
754+
)
755+
def test_fillna_column_wise_duplicated_with_series_dict(self, fill_value):
756+
# GH 4514
757+
df = DataFrame(
758+
[[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]],
759+
columns=list("ABB"),
760+
index=[0, 0, 1],
761+
)
762+
expected = DataFrame(
763+
[[100, 100, 3], [100, 5, 100], [7, 200, 200]],
764+
columns=list("ABB"),
765+
index=[0, 0, 1],
766+
dtype="float64",
767+
)
768+
769+
result = df.fillna(fill_value, axis=1)
770+
tm.assert_frame_equal(result, expected)
771+
772+
@pytest.mark.parametrize(
773+
"fill_value",
774+
[
775+
Series([100, 200, 300], index=["A", "B", "C"]),
776+
{"A": 100, "B": 200, "C": 300},
777+
],
778+
)
779+
def test_fillna_duplicated_with_series_dict(self, fill_value):
780+
# GH 4514
781+
df = DataFrame(
782+
[[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]],
783+
columns=list("ABB"),
784+
index=[0, 0, 1],
785+
)
786+
expected = DataFrame(
787+
[[100, 200, 3], [100, 5, 200], [7, 200, 200]],
788+
columns=list("ABB"),
789+
index=[0, 0, 1],
790+
dtype="float64",
791+
)
792+
793+
result = df.fillna(fill_value)
794+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)