Skip to content

BUG: Preserve sparse dtype when reindexing #26183

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4502,7 +4502,7 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
fill_value=fill_value, copy=copy)

def _reindex_with_indexers(self, reindexers, fill_value=None, copy=False,
allow_dups=False):
allow_dups=False, preserve_dtype=False):
"""allow_dups indicates an internal call here """

# reindex doing multiple operations on different axes if indicated
Expand All @@ -4527,7 +4527,8 @@ def _reindex_with_indexers(self, reindexers, fill_value=None, copy=False,
if copy and new_data is self._data:
new_data = new_data.copy()

return self._constructor(new_data).__finalize__(self)
kwargs = {'dtype': self._data.dtype} if preserve_dtype else {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what happens if you always just pass the dtype of the original in

return self._constructor(new_data, **kwargs).__finalize__(self)

def filter(self, items=None, like=None, regex=None, axis=None):
"""
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/sparse/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,10 +464,20 @@ def copy(self, deep=True):
def reindex(self, index=None, method=None, copy=True, limit=None,
**kwargs):
# TODO: remove?
fill_value = kwargs.pop('fill_value', None)
if fill_value is None:
fill_value = self.fill_value
return super(SparseSeries, self).reindex(index=index, method=method,
fill_value=fill_value,
copy=copy, limit=limit,
**kwargs)

def _reindex_with_indexers(self, *args, **kwargs):
return super(SparseSeries, self)._reindex_with_indexers(
*args,
preserve_dtype=True,
**kwargs)

def sparse_reindex(self, new_index):
"""
Conform sparse values to new SparseIndex
Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/sparse/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import pandas as pd
from pandas import (
DataFrame, Series, SparseDtype, SparseSeries, bdate_range, isna)
DataFrame, Series, SparseDtype, SparseSeries, bdate_range)
from pandas.core.reshape.util import cartesian_product
import pandas.core.sparse.frame as spf
from pandas.tests.series.test_api import SharedWithSparse
Expand Down Expand Up @@ -309,7 +309,7 @@ def test_constructor_scalar(self):
sp = SparseSeries(data, np.arange(100))
sp = sp.reindex(np.arange(200))
assert (sp.loc[:99] == data).all()
assert isna(sp.loc[100:]).all()
assert sp.loc[100:].isin((0,)).all()

data = np.nan
sp = SparseSeries(data, np.arange(100))
Expand Down Expand Up @@ -1031,6 +1031,11 @@ def test_memory_usage_deep(self, deep, fill_value):

assert sparse_usage < dense_usage

@pytest.mark.parametrize('dtype', (np.float32, np.int32, np.bool))
def test_resize_preserves_dtype(self, dtype):
s = pd.SparseSeries([1, 0], dtype=dtype)
assert s.dtype == s.reindex([0, 1, 2]).dtype


class TestSparseHandlingMultiIndexes:

Expand Down