Skip to content

Commit a3245f9

Browse files
committed
ENH:column-wise DataFrame.fillna and duplicated DataFrame.fillna with Series and Dict (#30922)
1 parent e6bd49f commit a3245f9

File tree

3 files changed

+108
-15
lines changed

3 files changed

+108
-15
lines changed

doc/source/whatsnew/v1.1.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ Other enhancements
6666
- :class:`Styler` may now render CSS more efficiently where multiple cells have the same styling (:issue:`30876`)
6767
- When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`)
6868
- `OptionError` is now exposed in `pandas.errors` (:issue:`27553`)
69+
- :meth:`DataFrame.fillna` can fill NA values column-wise with a dictionary or :class:`Series` (:issue:`4514`)
6970
-
7071

7172
.. ---------------------------------------------------------------------------

pandas/core/generic.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6005,20 +6005,25 @@ def fillna(
60056005
)
60066006

60076007
elif isinstance(value, (dict, ABCSeries)):
6008+
new_data = self.copy()
6009+
60086010
if axis == 1:
6009-
raise NotImplementedError(
6010-
"Currently only can fill "
6011-
"with dict/Series column "
6012-
"by column"
6013-
)
6011+
# To access index as columns
6012+
new_data = new_data.T
60146013

6015-
result = self if inplace else self.copy()
6016-
for k, v in value.items():
6017-
if k not in result:
6014+
for i in range(new_data.columns.size):
6015+
label = new_data.columns[i]
6016+
if label not in value:
60186017
continue
6019-
obj = result[k]
6020-
obj.fillna(v, limit=limit, inplace=True, downcast=downcast)
6021-
return result if not inplace else None
6018+
new_data.iloc[:, i] = new_data.iloc[:, i].fillna(
6019+
value[label], limit=limit, inplace=False, downcast=downcast
6020+
)
6021+
6022+
if axis == 1:
6023+
# recover from changing index and columns
6024+
new_data = new_data.T
6025+
6026+
new_data = new_data.infer_objects()
60226027

60236028
elif not is_list_like(value):
60246029
new_data = self._data.fillna(

pandas/tests/frame/test_missing.py

Lines changed: 91 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -602,10 +602,6 @@ def test_fillna_dict_series(self):
602602
expected = df.fillna(df.max().to_dict())
603603
tm.assert_frame_equal(result, expected)
604604

605-
# disable this for now
606-
with pytest.raises(NotImplementedError, match="column by column"):
607-
df.fillna(df.max(1), axis=1)
608-
609605
def test_fillna_dataframe(self):
610606
# GH 8377
611607
df = DataFrame(
@@ -703,3 +699,94 @@ def test_fill_value_when_combine_const(self):
703699
exp = df.fillna(0).add(2)
704700
res = df.add(2, fill_value=0)
705701
tm.assert_frame_equal(res, exp)
702+
703+
@pytest.mark.parametrize(
704+
"expected,fill_value",
705+
[
706+
(
707+
DataFrame(
708+
[[100, 100], [200, 4], [5, 6]], columns=list("AB"), dtype="float64"
709+
),
710+
Series([100, 200, 300]),
711+
),
712+
(
713+
DataFrame(
714+
[[100, 100], [np.nan, 4], [5, 6]],
715+
columns=list("AB"),
716+
dtype="float64",
717+
),
718+
{0: 100, 2: 300, 3: 400},
719+
),
720+
],
721+
)
722+
def test_fillna_column_wise(self, expected, fill_value):
723+
# GH 4514
724+
df = DataFrame([[np.nan, np.nan], [np.nan, 4], [5, 6]], columns=list("AB"))
725+
result = df.fillna(fill_value, axis=1)
726+
tm.assert_frame_equal(expected, result)
727+
728+
def test_fillna_column_wise_downcast(self):
729+
# GH 4514
730+
df = DataFrame([[np.nan, 2], [3, np.nan], [np.nan, np.nan]], columns=list("AB"))
731+
s = Series([100, 200, 300])
732+
733+
expected = DataFrame(
734+
[[100, 2], [3, 200], [300, 300]], columns=list("AB"), dtype="int64"
735+
)
736+
result = df.fillna(s, axis=1, downcast="infer")
737+
tm.assert_frame_equal(expected, result)
738+
739+
@pytest.mark.parametrize(
740+
"fill_value", [Series([100, 200, 300]), {0: 100, 2: 300, 3: 400}]
741+
)
742+
def test_fillna_column_wise_inplace(self, fill_value):
743+
# GH 4514
744+
df = DataFrame([[np.nan, np.nan], [np.nan, 4], [5, 6]], columns=list("AB"))
745+
expected = df.fillna(fill_value, axis=1, inplace=False)
746+
df.fillna(fill_value, axis=1, inplace=True)
747+
tm.assert_frame_equal(expected, df)
748+
749+
@pytest.mark.parametrize(
750+
"fill_value",
751+
[Series([100, 200, 300], index=[0, 1, 2]), {0: 100, 1: 200, 2: 300}],
752+
)
753+
def test_fillna_column_wise_duplicated_with_series_dict(self, fill_value):
754+
# GH 4514
755+
df = DataFrame(
756+
[[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]],
757+
columns=list("ABB"),
758+
index=[0, 0, 1],
759+
)
760+
expected = DataFrame(
761+
[[100, 100, 3], [100, 5, 100], [7, 200, 200]],
762+
columns=list("ABB"),
763+
index=[0, 0, 1],
764+
dtype="float64",
765+
)
766+
767+
result = df.fillna(fill_value, axis=1)
768+
tm.assert_frame_equal(result, expected)
769+
770+
@pytest.mark.parametrize(
771+
"fill_value",
772+
[
773+
Series([100, 200, 300], index=["A", "B", "C"]),
774+
{"A": 100, "B": 200, "C": 300},
775+
],
776+
)
777+
def test_fillna_duplicated_with_series_dict(self, fill_value):
778+
# GH 4514
779+
df = DataFrame(
780+
[[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]],
781+
columns=list("ABB"),
782+
index=[0, 0, 1],
783+
)
784+
expected = DataFrame(
785+
[[100, 200, 3], [100, 5, 200], [7, 200, 200]],
786+
columns=list("ABB"),
787+
index=[0, 0, 1],
788+
dtype="float64",
789+
)
790+
791+
result = df.fillna(fill_value)
792+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)