-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH:column-wise DataFrame.fillna and duplicated DataFrame.fillna with Series and Dict #30922
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6134,20 +6134,25 @@ def fillna( | |
) | ||
|
||
elif isinstance(value, (dict, ABCSeries)): | ||
temp_data = self if inplace else self.copy() | ||
|
||
if axis == 1: | ||
raise NotImplementedError( | ||
"Currently only can fill " | ||
"with dict/Series column " | ||
"by column" | ||
) | ||
for i, item in enumerate(temp_data.items()): | ||
label, content = item | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this doesn't make sense with the axis here; you are updating the same column whether axis==0 or 1 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback |
||
temp_data.iloc[:, i] = content.fillna( | ||
value, limit=limit, inplace=False, downcast=downcast | ||
) | ||
else: | ||
for i, item in enumerate(temp_data.items()): | ||
label, content = item | ||
if label not in value: | ||
continue | ||
temp_data.iloc[:, i] = content.fillna( | ||
value[label], limit=limit, inplace=False, downcast=downcast | ||
) | ||
|
||
result = self if inplace else self.copy() | ||
for k, v in value.items(): | ||
if k not in result: | ||
continue | ||
obj = result[k] | ||
obj.fillna(v, limit=limit, inplace=True, downcast=downcast) | ||
return result if not inplace else None | ||
temp_data = temp_data.infer_objects() | ||
new_data = temp_data._mgr | ||
|
||
elif not is_list_like(value): | ||
new_data = self._mgr.fillna( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -618,10 +618,6 @@ def test_fillna_dict_series(self): | |
expected = df.fillna(df.max().to_dict()) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
# disable this for now | ||
with pytest.raises(NotImplementedError, match="column by column"): | ||
df.fillna(df.max(1), axis=1) | ||
|
||
def test_fillna_dataframe(self): | ||
# GH 8377 | ||
df = DataFrame( | ||
|
@@ -710,3 +706,94 @@ def test_fill_corner(self, float_frame, float_string_frame): | |
|
||
# TODO(wesm): unused? | ||
result = empty_float.fillna(value=0) # noqa | ||
|
||
@pytest.mark.parametrize( | ||
"expected,fill_value", | ||
[ | ||
( | ||
DataFrame( | ||
[[100, 100], [200, 4], [5, 6]], columns=list("AB"), dtype="float64" | ||
), | ||
Series([100, 200, 300]), | ||
), | ||
( | ||
DataFrame( | ||
[[100, 100], [np.nan, 4], [5, 6]], | ||
columns=list("AB"), | ||
dtype="float64", | ||
), | ||
{0: 100, 2: 300, 3: 400}, | ||
), | ||
], | ||
) | ||
def test_fillna_column_wise(self, expected, fill_value): | ||
# GH 4514 | ||
df = DataFrame([[np.nan, np.nan], [np.nan, 4], [5, 6]], columns=list("AB")) | ||
result = df.fillna(fill_value, axis=1) | ||
tm.assert_frame_equal(expected, result) | ||
|
||
def test_fillna_column_wise_downcast(self): | ||
# GH 4514 | ||
df = DataFrame([[np.nan, 2], [3, np.nan], [np.nan, np.nan]], columns=list("AB")) | ||
s = Series([100, 200, 300]) | ||
|
||
expected = DataFrame( | ||
[[100, 2], [3, 200], [300, 300]], columns=list("AB"), dtype="int64" | ||
) | ||
result = df.fillna(s, axis=1, downcast="infer") | ||
tm.assert_frame_equal(expected, result) | ||
|
||
@pytest.mark.parametrize( | ||
"fill_value", [Series([100, 200, 300]), {0: 100, 2: 300, 3: 400}] | ||
) | ||
def test_fillna_column_wise_inplace(self, fill_value): | ||
# GH 4514 | ||
df = DataFrame([[np.nan, np.nan], [np.nan, 4], [5, 6]], columns=list("AB")) | ||
expected = df.fillna(fill_value, axis=1, inplace=False) | ||
df.fillna(fill_value, axis=1, inplace=True) | ||
tm.assert_frame_equal(expected, df) | ||
|
||
@pytest.mark.parametrize( | ||
"fill_value", | ||
[Series([100, 200, 300], index=[0, 1, 2]), {0: 100, 1: 200, 2: 300}], | ||
) | ||
def test_fillna_column_wise_duplicated_with_series_dict(self, fill_value): | ||
# GH 4514 | ||
df = DataFrame( | ||
[[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]], | ||
columns=list("ABB"), | ||
index=[0, 0, 1], | ||
) | ||
expected = DataFrame( | ||
[[100, 100, 3], [100, 5, 100], [7, 200, 200]], | ||
columns=list("ABB"), | ||
index=[0, 0, 1], | ||
dtype="float64", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be integers no? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @WillAyd |
||
) | ||
|
||
result = df.fillna(fill_value, axis=1) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
@pytest.mark.parametrize( | ||
"fill_value", | ||
[ | ||
Series([100, 200, 300], index=["A", "B", "C"]), | ||
{"A": 100, "B": 200, "C": 300}, | ||
], | ||
) | ||
def test_fillna_duplicated_with_series_dict(self, fill_value): | ||
# GH 4514 | ||
df = DataFrame( | ||
[[np.nan, np.nan, 3], [np.nan, 5, np.nan], [7, np.nan, np.nan]], | ||
columns=list("ABB"), | ||
index=[0, 0, 1], | ||
) | ||
expected = DataFrame( | ||
[[100, 200, 3], [100, 5, 200], [7, 200, 200]], | ||
columns=list("ABB"), | ||
index=[0, 0, 1], | ||
dtype="float64", | ||
) | ||
|
||
result = df.fillna(fill_value) | ||
tm.assert_frame_equal(result, expected) |
Uh oh!
There was an error while loading. Please reload this page.